瀏覽代碼

support save block_id and block_order in parsing_res_list (#4463)

changdazhou 2 月之前
父節點
當前提交
11f89421a8

File diff suppressed because it is too large
+ 0 - 0
docs/pipeline_usage/tutorials/ocr_pipelines/PP-StructureV3.en.md


File diff suppressed because it is too large
+ 0 - 0
docs/pipeline_usage/tutorials/ocr_pipelines/PP-StructureV3.md


+ 5 - 4
paddlex/inference/pipelines/layout_parsing/pipeline_v2.py

@@ -830,11 +830,12 @@ class _LayoutParsingPipelineV2(BasePipeline):
 
         parsing_res_list = self.sort_layout_parsing_blocks(layout_parsing_page)
 
-        index = 1
-        for block in parsing_res_list:
+        order_index = 1
+        for index, block in enumerate(parsing_res_list):
+            block.index = index
             if block.label in BLOCK_LABEL_MAP["visualize_index_labels"]:
-                block.order_index = index
-                index += 1
+                block.order_index = order_index
+                order_index += 1
 
         return parsing_res_list
 

+ 15 - 1
paddlex/inference/pipelines/layout_parsing/result_v2.py

@@ -230,6 +230,18 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
         data["page_index"] = self["page_index"]
         model_settings = self["model_settings"]
         data["model_settings"] = model_settings
+        parsing_res_list: List[LayoutBlock] = self["parsing_res_list"]
+        parsing_res_list = [
+            {
+                "block_label": parsing_res.label,
+                "block_content": parsing_res.content,
+                "block_bbox": parsing_res.bbox,
+                "block_id": parsing_res.index,
+                "block_order": parsing_res.order_index,
+            }
+            for parsing_res in parsing_res_list
+        ]
+        data["parsing_res_list"] = parsing_res_list
         if self["model_settings"]["use_doc_preprocessor"]:
             data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
         data["layout_det_res"] = self["layout_det_res"].str["res"]
@@ -271,12 +283,14 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
         data["page_index"] = self["page_index"]
         model_settings = self["model_settings"]
         data["model_settings"] = model_settings
-        parsing_res_list = self["parsing_res_list"]
+        parsing_res_list: List[LayoutBlock] = self["parsing_res_list"]
         parsing_res_list = [
             {
                 "block_label": parsing_res.label,
                 "block_content": parsing_res.content,
                 "block_bbox": parsing_res.bbox,
+                "block_id": parsing_res.index,
+                "block_order": parsing_res.order_index,
             }
             for parsing_res in parsing_res_list
         ]

Some files were not shown because too many files changed in this diff