zhouchangda 6 meses atrás
pai
commit
2f048e6eae

+ 9 - 3
paddlex/inference/pipelines/layout_parsing/pipeline_v2.py

@@ -791,11 +791,17 @@ class _LayoutParsingPipelineV2(BasePipeline):
                     text_rec_score_thresh=text_rec_score_thresh,
                 )
 
-            if label in ["chart", "image", "seal", "table", "formula"]:
+            if (
+                label
+                in ["seal", "table", "formula", "chart"]
+                + BLOCK_LABEL_MAP["image_labels"]
+            ):
                 x_min, y_min, x_max, y_max = list(map(int, block_bbox))
-                img_path = f"imgs/img_in_table_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
+                img_path = (
+                    f"imgs/img_in_{block.label}_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
+                )
                 img = Image.fromarray(image[y_min:y_max, x_min:x_max, ::-1])
-                block.image = {img_path: img}
+                block.image = {"path": img_path, "img": img}
 
             layout_parsing_blocks.append(block)
 

+ 14 - 11
paddlex/inference/pipelines/layout_parsing/result_v2.py

@@ -96,8 +96,8 @@ def format_text_plain_func(block):
 
 def format_image_scaled_by_html_func(block, original_image_width):
     img_tags = []
-    image_path = "".join(block.image.keys())
-    image_width = block.image[image_path].width
+    image_path = block.image["path"]
+    image_width = block.image["img"].width
     scale = int(image_width / original_image_width * 100)
     img_tags.append(
         '<img src="{}" alt="Image" width="{}%" />'.format(
@@ -109,7 +109,7 @@ def format_image_scaled_by_html_func(block, original_image_width):
 
 def format_image_plain_func(block):
     img_tags = []
-    image_path = "".join(block.image.keys())
+    image_path = block.image["path"]
     img_tags.append("![]({})".format(image_path.replace("-\n", "").replace("\n", " ")))
     return "\n".join(img_tags)
 
@@ -487,10 +487,16 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
         prev_block = None
         page_first_element_seg_start_flag = None
         page_last_element_seg_end_flag = None
+        markdown_info = {}
+        markdown_info["markdown_images"] = {}
         for block in self["parsing_res_list"]:
             seg_start_flag, seg_end_flag = get_seg_flag(block, prev_block)
 
             label = block.label
+            if block.image is not None:
+                markdown_info["markdown_images"][block.image["path"]] = block.image[
+                    "img"
+                ]
             page_first_element_seg_start_flag = (
                 seg_start_flag
                 if (page_first_element_seg_start_flag is None)
@@ -511,14 +517,11 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
                 last_label = label
         page_last_element_seg_end_flag = seg_end_flag
 
-        markdown_info = {
-            "markdown_texts": markdown_content,
-            "page_continuation_flags": (
-                page_first_element_seg_start_flag,
-                page_last_element_seg_end_flag,
-            ),
-        }
-        markdown_info["markdown_images"] = {}
+        markdown_info["markdown_texts"] = markdown_content
+        markdown_info["page_continuation_flags"] = (
+            page_first_element_seg_start_flag,
+            page_last_element_seg_end_flag,
+        )
         for img in self["imgs_in_doc"]:
             markdown_info["markdown_images"][img["path"]] = img["img"]
 

+ 7 - 1
paddlex/inference/pipelines/layout_parsing/setting.py

@@ -55,7 +55,12 @@ BLOCK_LABEL_MAP = {
         "flowchart",
         "figure",
     ],  # 图、表、印章、图表、图
-    "vision_title_labels": ["table_title", "chart_title", "figure_title"],  # 图表标题
+    "vision_title_labels": [
+        "table_title",
+        "chart_title",
+        "figure_title",
+        "figure_table_chart_title",
+    ],  # 图表标题
     "unordered_labels": [
         "aside_text",
         "seal",
@@ -78,4 +83,5 @@ BLOCK_LABEL_MAP = {
         "refer_title",
         "content_title",
     ],
+    "image_labels": ["image", "figure"],
 }

+ 4 - 3
paddlex/inference/pipelines/layout_parsing/utils.py

@@ -27,7 +27,7 @@ from PIL import Image
 
 from ..components import convert_points_to_boxes
 from ..ocr.result import OCRResult
-from .setting import REGION_SETTINGS
+from .setting import BLOCK_LABEL_MAP, REGION_SETTINGS
 
 
 def get_overlap_boxes_idx(src_boxes: np.ndarray, ref_boxes: np.ndarray) -> List:
@@ -612,9 +612,10 @@ def remove_extra_space(input_text: str) -> str:
 def gather_imgs(original_img, layout_det_objs):
     imgs_in_doc = []
     for det_obj in layout_det_objs:
-        if det_obj["label"] in ("image", "chart", "seal", "formula", "table"):
+        if det_obj["label"] in BLOCK_LABEL_MAP["image_labels"]:
+            label = det_obj["label"]
             x_min, y_min, x_max, y_max = list(map(int, det_obj["coordinate"]))
-            img_path = f"imgs/img_in_table_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
+            img_path = f"imgs/img_in_{label}_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
             img = Image.fromarray(original_img[y_min:y_max, x_min:x_max, ::-1])
             imgs_in_doc.append(
                 {

+ 1 - 1
paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py

@@ -134,7 +134,7 @@ def pre_process(
             gap_len = interval[0] - current_interval[1]
             if gap_len >= region.text_line_height * 3:
                 cut_coordinates.append(current_interval[1])
-            elif gap_len > region.text_line_height * 1.8:
+            elif gap_len > region.text_line_height * 1.2:
                 (pre_blocks, post_blocks) = get_cut_blocks(
                     list(block_map.values()), cut_direction, [current_interval[1]], []
                 )