Selaa lähdekoodia

Update PP-ChatOCRv4-doc config (#3325)

* update PP-ChatOCRv4-doc yaml

* fix bug
changdazhou 9 kuukautta sitten
vanhempi
commit
c9f58b8b9d

+ 0 - 1
paddlex/inference/common/batch_sampler/image_batch_sampler.py

@@ -80,7 +80,6 @@ class ImageBatchSampler(BaseBatchSampler):
         if not isinstance(inputs, list):
             inputs = [inputs]
 
-        batch = {"instances": [], "input_paths": [], "page_indexes": []}
         batch = ImgInstance()
         for input in inputs:
             if isinstance(input, np.ndarray):

+ 4 - 10
paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py

@@ -553,14 +553,9 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
         if isinstance(input, list):
             logging.error("Input is a list, but it's not supported here.")
             return {"mllm_res": "Error:Input is a list, but it's not supported here!"}
-        image_array_list = self.img_reader([input])
-        if (
-            isinstance(input, str)
-            and input.endswith(".pdf")
-            and len(image_array_list) > 1
-        ):
-            logging.error("The input with PDF should have only one page.")
-            return {"mllm_res": "Error:The input with PDF should have only one page!"}
+        if isinstance(input, str) and input.endswith(".pdf"):
+            logging.error("MLMM prediction does not support PDF currently!")
+            return {"mllm_res": "Error:MLMM prediction does not support PDF currently!"}
 
         if self.mllm_chat_bot is None:
             logging.warning(
@@ -575,9 +570,8 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
         else:
             mllm_chat_bot = self.mllm_chat_bot
 
-        for image_array in image_array_list:
+        for image_array in self.img_reader([input]):
 
-            assert len(image_array.shape) == 3
             image_string = cv2.imencode(".jpg", image_array)[1].tostring()
             image_base64 = base64.b64encode(image_string).decode("utf-8")
             result = {}