浏览代码

bugfix: process imgs in doc

gaotingquan 8 月之前
父节点
当前提交
c5df8c9c9c

+ 3 - 0
paddlex/inference/pipelines/layout_parsing/pipeline_v2.py

@@ -227,6 +227,7 @@ class LayoutParsingPipelineV2(BasePipeline):
         table_res_list: list,
         seal_res_list: list,
         formula_res_list: list,
+        imgs_in_doc: list,
         text_det_limit_side_len: Optional[int] = None,
         text_det_limit_type: Optional[str] = None,
         text_det_thresh: Optional[float] = None,
@@ -344,6 +345,7 @@ class LayoutParsingPipelineV2(BasePipeline):
             layout_det_res=layout_det_res,
             table_res_list=table_res_list,
             seal_res_list=seal_res_list,
+            imgs_in_doc=imgs_in_doc,
         )
 
         return parsing_res_list
@@ -625,6 +627,7 @@ class LayoutParsingPipelineV2(BasePipeline):
                 table_res_list=table_res_list,
                 seal_res_list=seal_res_list,
                 formula_res_list=formula_res_list,
+                imgs_in_doc=imgs_in_doc,
                 text_det_limit_side_len=text_det_limit_side_len,
                 text_det_limit_type=text_det_limit_type,
                 text_det_thresh=text_det_thresh,

+ 5 - 4
paddlex/inference/pipelines/layout_parsing/utils.py

@@ -542,6 +542,7 @@ def get_single_block_parsing_res(
     layout_det_res: DetResult,
     table_res_list: list,
     seal_res_list: list,
+    imgs_in_doc: list,
 ) -> OCRResult:
     """
     Extract structured information from OCR and layout detection results.
@@ -652,14 +653,14 @@ def get_single_block_parsing_res(
                     ]
 
             if label in ["chart", "image"]:
+                x_min, y_min, x_max, y_max = list(map(int, block_bbox))
+                img_path = f"imgs/img_in_table_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
+                img = Image.fromarray(input_img[y_min:y_max, x_min:x_max, ::-1])
                 single_block_layout_parsing_res.append(
                     {
                         "block_label": label,
                         "block_content": _process_text("".join(rec_res["rec_texts"])),
-                        "block_image": input_img[
-                            int(block_bbox[1]) : int(block_bbox[3]),
-                            int(block_bbox[0]) : int(block_bbox[2]),
-                        ],
+                        "block_image": {img_path: img},
                         "block_bbox": block_bbox,
                         "seg_start_flag": seg_start_flag,
                         "seg_end_flag": seg_end_flag,