пре 6 месеци · fdec0ca3c4
--- a/paddlex/configs/pipelines/PP-StructureV3.yaml
+++ b/paddlex/configs/pipelines/PP-StructureV3.yaml
@@ -8,12 +8,12 @@ use_seal_recognition: True
 
				 use_table_recognition: True
			
 
				 use_formula_recognition: True
			
 
				 use_chart_recognition: False
			
 
				-use_region_detection: False
			
 
				+use_region_detection: True
			
 
				 
			
 
				 SubModules:
			
 
				   LayoutDetection:
			
 
				     module_name: layout_detection
			
 
				-    model_name: PP-DocLayout-L
			
 
				+    model_name: PP-DocLayout_plus-L
			
 
				     model_dir: null
			
 
				     batch_size: 8
			
 
				     threshold: 
			
@@ -94,6 +94,12 @@ SubModules:
 
				     model_name: PP-Chart2Table
			
 
				     model_dir: null
			
 
				     batch_size: 1 
			
 
				+  RegionDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocBlockLayout
			
 
				+    model_dir: null
			
 
				+    layout_nms: True
			
 
				+    layout_merge_bboxes_mode: "small"
			
 
				 
			
 
				 SubPipelines:
			
 
				   DocPreprocessor:
			
@@ -121,7 +127,7 @@ SubPipelines:
 
				     SubModules:
			
 
				       TextDetection:
			
 
				         module_name: text_detection
			
 
				-        model_name: PP-OCRv4_server_det
			
 
				+        model_name: PP-OCRv5_server_det
			
 
				         model_dir: null
			
 
				         limit_side_len: 736
			
 
				         limit_type: min
			
@@ -136,7 +142,7 @@ SubPipelines:
 
				         batch_size: 8
			
 
				       TextRecognition:
			
 
				         module_name: text_recognition
			
 
				-        model_name: PP-OCRv4_server_rec_doc
			
 
				+        model_name: PP-OCRv5_server_rec
			
 
				         model_dir: null
			
 
				         batch_size: 8
			
 
				         score_thresh: 0.0
			
@@ -172,6 +178,11 @@ SubPipelines:
 
				         module_name: table_cells_detection
			
 
				         model_name: RT-DETR-L_wireless_table_cell_det
			
 
				         model_dir: null
			
 
				+
			
 
				+      TableOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				     SubPipelines:
			
 
				       GeneralOCR:
			
 
				         pipeline_name: OCR
			
@@ -181,7 +192,7 @@ SubPipelines:
 
				         SubModules:
			
 
				           TextDetection:
			
 
				             module_name: text_detection
			
 
				-            model_name: PP-OCRv4_server_det
			
 
				+            model_name: PP-OCRv5_server_det
			
 
				             model_dir: null
			
 
				             limit_side_len: 736
			
 
				             limit_type: min
			
@@ -196,7 +207,7 @@ SubPipelines:
 
				             batch_size: 8
			
 
				           TextRecognition:
			
 
				             module_name: text_recognition
			
 
				-            model_name: PP-OCRv4_server_rec_doc
			
 
				+            model_name: PP-OCRv5_server_rec
			
 
				             model_dir: null
			
 
				             batch_size: 8
			
 
				         score_thresh: 0.0
			
@@ -226,7 +237,7 @@ SubPipelines:
 
				             unclip_ratio: 0.5
			
 
				           TextRecognition:
			
 
				             module_name: text_recognition
			
 
				-            model_name: PP-OCRv4_server_rec
			
 
				+            model_name: PP-OCRv5_server_rec
			
 
				             model_dir: null
			
 
				             batch_size: 8
			
 
				             score_thresh: 0
			
@@ -239,6 +250,6 @@ SubPipelines:
 
				     SubModules:
			
 
				       FormulaRecognition:
			
 
				         module_name: formula_recognition
			
 
				-        model_name: PP-FormulaNet-L
			
 
				+        model_name: PP-FormulaNet_plus-L
			
 
				         model_dir: null
			
 
				         batch_size: 8
			
--- a/paddlex/inference/models/formula_recognition/result.py
+++ b/paddlex/inference/models/formula_recognition/result.py
@@ -18,7 +18,6 @@ import os
 
				 import re
			
 
				 import subprocess
			
 
				 import tempfile
			
 
				-from pathlib import Path
			
 
				 from typing import List, Optional
			
 
				 
			
 
				 import numpy as np
			
@@ -38,14 +37,6 @@ if is_dep_available("pypdfium2"):
 
				 
			
 
				 
			
 
				 class FormulaRecResult(BaseCVResult):
			
 
				-    def _get_input_fn(self):
			
 
				-        fn = super()._get_input_fn()
			
 
				-        if (page_idx := self["page_index"]) is not None:
			
 
				-            fp = Path(fn)
			
 
				-            stem, suffix = fp.stem, fp.suffix
			
 
				-            return f"{stem}_{page_idx}{suffix}"
			
 
				-        else:
			
 
				-            return fn
			
 
				 
			
 
				     def _to_str(self, *args, **kwargs):
			
 
				         data = copy.deepcopy(self)
			
--- a/paddlex/inference/models/object_detection/processors.py
+++ b/paddlex/inference/models/object_detection/processors.py
@@ -746,16 +746,14 @@ class DetPostProcess:
 
				         if layout_nms:
			
 
				             selected_indices = nms(boxes, iou_same=0.6, iou_diff=0.98)
			
 
				             boxes = np.array(boxes[selected_indices])
			
 
				-        
			
 
				+
			
 
				         filter_large_image = True
			
 
				         if filter_large_image and len(boxes) > 1:
			
 
				             if img_size[0] > img_size[1]:
			
 
				-                area_thres = 0.82 
			
 
				+                area_thres = 0.82
			
 
				             else:
			
 
				                 area_thres = 0.93
			
 
				-            image_index = (
			
 
				-                self.labels.index("image") if "image" in self.labels else None
			
 
				-            )
			
 
				+            image_index = self.labels.index("image") if "image" in self.labels else None
			
 
				             img_area = img_size[0] * img_size[1]
			
 
				             filtered_boxes = []
			
 
				             for box in boxes:
			
@@ -823,7 +821,7 @@ class DetPostProcess:
 
				                 boxes = boxes[keep_mask]
			
 
				 
			
 
				         if boxes.size == 0:
			
 
				-            return np.array([])
			
 
				+            return []
			
 
				 
			
 
				         if layout_unclip_ratio:
			
 
				             if isinstance(layout_unclip_ratio, float):
			
--- a/paddlex/inference/models/table_structure_recognition/result.py
+++ b/paddlex/inference/models/table_structure_recognition/result.py
@@ -13,7 +13,6 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 import copy
			
 
				-from pathlib import Path
			
 
				 
			
 
				 import numpy as np
			
 
				 
			
@@ -28,15 +27,6 @@ if is_dep_available("opencv-contrib-python"):
 
				 class TableRecResult(BaseCVResult):
			
 
				     """SaveTableResults"""
			
 
				 
			
 
				-    def _get_input_fn(self):
			
 
				-        fn = super()._get_input_fn()
			
 
				-        if (page_idx := self["page_index"]) is not None:
			
 
				-            fp = Path(fn)
			
 
				-            stem, suffix = fp.stem, fp.suffix
			
 
				-            return f"{stem}_{page_idx}{suffix}"
			
 
				-        else:
			
 
				-            return fn
			
 
				-
			
 
				     def _to_img(self):
			
 
				         image = self["input_img"]
			
 
				         bbox_res = self["bbox"]
			
--- a/paddlex/inference/models/text_detection/result.py
+++ b/paddlex/inference/models/text_detection/result.py
@@ -13,7 +13,6 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 import copy
			
 
				-from pathlib import Path
			
 
				 
			
 
				 import numpy as np
			
 
				 
			
@@ -27,15 +26,6 @@ if is_dep_available("opencv-contrib-python"):
 
				 @class_requires_deps("opencv-contrib-python")
			
 
				 class TextDetResult(BaseCVResult):
			
 
				 
			
 
				-    def _get_input_fn(self):
			
 
				-        fn = super()._get_input_fn()
			
 
				-        if (page_idx := self["page_index"]) is not None:
			
 
				-            fp = Path(fn)
			
 
				-            stem, suffix = fp.stem, fp.suffix
			
 
				-            return f"{stem}_{page_idx}{suffix}"
			
 
				-        else:
			
 
				-            return fn
			
 
				-
			
 
				     def _to_img(self):
			
 
				         """draw rectangle"""
			
 
				         boxes = self["dt_polys"]
			
--- a/paddlex/inference/pipelines/formula_recognition/result.py
+++ b/paddlex/inference/pipelines/formula_recognition/result.py
@@ -17,7 +17,6 @@ import os
 
				 import random
			
 
				 import subprocess
			
 
				 import tempfile
			
 
				-from pathlib import Path
			
 
				 from typing import Dict, Tuple
			
 
				 
			
 
				 import numpy as np
			
@@ -45,15 +44,6 @@ if is_dep_available("opencv-contrib-python"):
 
				 class FormulaRecognitionResult(BaseCVResult):
			
 
				     """Formula Recognition Result"""
			
 
				 
			
 
				-    def _get_input_fn(self):
			
 
				-        fn = super()._get_input_fn()
			
 
				-        if (page_idx := self["page_index"]) is not None:
			
 
				-            fp = Path(fn)
			
 
				-            stem, suffix = fp.stem, fp.suffix
			
 
				-            return f"{stem}_{page_idx}{suffix}"
			
 
				-        else:
			
 
				-            return fn
			
 
				-
			
 
				     def _to_img(self) -> Dict[str, Image.Image]:
			
 
				         """
			
 
				         Converts the internal data to a PIL Image with detection and recognition results.
			
--- a/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
@@ -100,7 +100,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
 
				         self.use_seal_recognition = config.get("use_seal_recognition", True)
			
 
				         self.use_region_detection = config.get(
			
 
				             "use_region_detection",
			
 
				-            False,
			
 
				+            True,
			
 
				         )
			
 
				         self.use_formula_recognition = config.get(
			
 
				             "use_formula_recognition",
			
@@ -494,7 +494,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
 
				             region_det_res["boxes"] = [
			
 
				                 {
			
 
				                     "coordinate": base_region_bbox,
			
 
				-                    "label": "SupplementaryBlock",
			
 
				+                    "label": "SupplementaryRegion",
			
 
				                     "score": 1,
			
 
				                 }
			
 
				             ]
			
@@ -521,7 +521,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
 
				                     matched_bboxes = [block_bboxes[idx] for idx in matched_idxes]
			
 
				                     new_region_bbox = calculate_minimum_enclosing_bbox(matched_bboxes)
			
 
				                     region_det_res["boxes"][region_idx]["coordinate"] = new_region_bbox
			
 
				-            # Supplement region block when there is no matched block
			
 
				+            # Supplement region when there is no matched block
			
 
				             if len(block_idxes_set) > 0:
			
 
				                 while len(block_idxes_set) > 0:
			
 
				                     matched_idxes = []
			
@@ -555,7 +555,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
 
				                     region_det_res["boxes"].append(
			
 
				                         {
			
 
				                             "coordinate": supplement_region_bbox,
			
 
				-                            "label": "SupplementaryBlock",
			
 
				+                            "label": "SupplementaryRegion",
			
 
				                             "score": 1,
			
 
				                         }
			
 
				                     )
			
@@ -950,7 +950,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
 
				         use_seal_recognition: Union[bool, None] = None,
			
 
				         use_table_recognition: Union[bool, None] = None,
			
 
				         use_formula_recognition: Union[bool, None] = None,
			
 
				-        use_chart_recognition: Union[bool, None] = None,
			
 
				+        use_chart_recognition: Union[bool, None] = False,
			
 
				         use_region_detection: Union[bool, None] = None,
			
 
				         layout_threshold: Optional[Union[float, dict]] = None,
			
 
				         layout_nms: Optional[bool] = None,
			
@@ -1117,9 +1117,19 @@ class _LayoutParsingPipelineV2(BasePipeline):
 
				                 )
			
 
				 
			
 
				             if model_settings["use_table_recognition"]:
			
 
				-                table_contents = []
			
 
				-                for overall_ocr_res, formula_res_list, imgs_in_doc_for_img in zip(
			
 
				-                    overall_ocr_results, formula_res_lists, imgs_in_doc
			
 
				+                table_res_lists = []
			
 
				+                for (
			
 
				+                    layout_det_res,
			
 
				+                    doc_preprocessor_image,
			
 
				+                    overall_ocr_res,
			
 
				+                    formula_res_list,
			
 
				+                    imgs_in_doc_for_img,
			
 
				+                ) in zip(
			
 
				+                    layout_det_results,
			
 
				+                    doc_preprocessor_images,
			
 
				+                    overall_ocr_results,
			
 
				+                    formula_res_lists,
			
 
				+                    imgs_in_doc,
			
 
				                 ):
			
 
				                     table_contents_for_img = copy.deepcopy(overall_ocr_res)
			
 
				                     for formula_res in formula_res_list:
			
@@ -1174,27 +1184,28 @@ class _LayoutParsingPipelineV2(BasePipeline):
 
				                         table_contents_for_img["rec_polys"].append(poly_points)
			
 
				                         table_contents_for_img["rec_scores"].append(img["score"])
			
 
				 
			
 
				-                    table_contents.append(table_contents_for_img)
			
 
				-
			
 
				-                table_res_all = list(
			
 
				-                    self.table_recognition_pipeline(
			
 
				-                        doc_preprocessor_images,
			
 
				-                        use_doc_orientation_classify=False,
			
 
				-                        use_doc_unwarping=False,
			
 
				-                        use_layout_detection=False,
			
 
				-                        use_ocr_model=False,
			
 
				-                        overall_ocr_res=table_contents,
			
 
				-                        layout_det_res=layout_det_results,
			
 
				-                        cell_sort_by_y_projection=True,
			
 
				-                        use_wired_table_cells_trans_to_html=use_wired_table_cells_trans_to_html,
			
 
				-                        use_wireless_table_cells_trans_to_html=use_wireless_table_cells_trans_to_html,
			
 
				-                        use_table_orientation_classify=use_table_orientation_classify,
			
 
				-                        use_ocr_results_with_table_cells=use_ocr_results_with_table_cells,
			
 
				-                        use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model,
			
 
				-                        use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model,
			
 
				-                    ),
			
 
				-                )
			
 
				-                table_res_lists = [item["table_res_list"] for item in table_res_all]
			
 
				+                    table_res_all = list(
			
 
				+                        self.table_recognition_pipeline(
			
 
				+                            doc_preprocessor_image,
			
 
				+                            use_doc_orientation_classify=False,
			
 
				+                            use_doc_unwarping=False,
			
 
				+                            use_layout_detection=False,
			
 
				+                            use_ocr_model=False,
			
 
				+                            overall_ocr_res=table_contents_for_img,
			
 
				+                            layout_det_res=layout_det_res,
			
 
				+                            cell_sort_by_y_projection=True,
			
 
				+                            use_wired_table_cells_trans_to_html=use_wired_table_cells_trans_to_html,
			
 
				+                            use_wireless_table_cells_trans_to_html=use_wireless_table_cells_trans_to_html,
			
 
				+                            use_table_orientation_classify=use_table_orientation_classify,
			
 
				+                            use_ocr_results_with_table_cells=use_ocr_results_with_table_cells,
			
 
				+                            use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model,
			
 
				+                            use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model,
			
 
				+                        ),
			
 
				+                    )
			
 
				+                    single_table_res_lists = [
			
 
				+                        item["table_res_list"] for item in table_res_all
			
 
				+                    ]
			
 
				+                    table_res_lists.extend(single_table_res_lists)
			
 
				             else:
			
 
				                 table_res_lists = [[] for _ in doc_preprocessor_images]
			
 
				 
			
--- a/paddlex/inference/pipelines/layout_parsing/result.py
+++ b/paddlex/inference/pipelines/layout_parsing/result.py
@@ -13,7 +13,6 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 import copy
			
 
				-from pathlib import Path
			
 
				 from typing import Dict
			
 
				 
			
 
				 import numpy as np
			
@@ -31,15 +30,6 @@ class LayoutParsingResult(BaseCVResult, HtmlMixin, XlsxMixin):
 
				         HtmlMixin.__init__(self)
			
 
				         XlsxMixin.__init__(self)
			
 
				 
			
 
				-    def _get_input_fn(self):
			
 
				-        fn = super()._get_input_fn()
			
 
				-        if (page_idx := self["page_index"]) is not None:
			
 
				-            fp = Path(fn)
			
 
				-            stem, suffix = fp.stem, fp.suffix
			
 
				-            return f"{stem}_{page_idx}{suffix}"
			
 
				-        else:
			
 
				-            return fn
			
 
				-
			
 
				     def _to_img(self) -> Dict[str, np.ndarray]:
			
 
				         res_img_dict = {}
			
 
				         model_settings = self["model_settings"]
			
--- a/paddlex/inference/pipelines/layout_parsing/result_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/result_v2.py
@@ -17,7 +17,6 @@ import copy
 
				 import math
			
 
				 import re
			
 
				 from functools import partial
			
 
				-from pathlib import Path
			
 
				 from typing import List
			
 
				 
			
 
				 import numpy as np
			
@@ -204,15 +203,6 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				         MarkdownMixin.__init__(self)
			
 
				         JsonMixin.__init__(self)
			
 
				 
			
 
				-    def _get_input_fn(self):
			
 
				-        fn = super()._get_input_fn()
			
 
				-        if (page_idx := self["page_index"]) is not None:
			
 
				-            fp = Path(fn)
			
 
				-            stem, suffix = fp.stem, fp.suffix
			
 
				-            return f"{stem}_{page_idx}{suffix}"
			
 
				-        else:
			
 
				-            return fn
			
 
				-
			
 
				     def _to_img(self) -> dict[str, np.ndarray]:
			
 
				         from .utils import get_show_color
			
 
				 
			
--- a/paddlex/inference/pipelines/layout_parsing/utils.py
+++ b/paddlex/inference/pipelines/layout_parsing/utils.py
@@ -390,6 +390,10 @@ def is_english_letter(char):
 
				     return bool(re.match(r"^[A-Za-z]$", char))
			
 
				 
			
 
				 
			
 
				+def is_numeric(char):
			
 
				+    return bool(re.match(r"^[\d.]+$", char))
			
 
				+
			
 
				+
			
 
				 def is_non_breaking_punctuation(char):
			
 
				     """
			
 
				     判断一个字符是否是不需要换行的标点符号，包括全角和半角的符号。
			
@@ -481,16 +485,17 @@ def format_line(
 
				         len(line_text) > 0 and is_english_letter(line_text[-1])
			
 
				     ) or line_text.endswith("$"):
			
 
				         line_text += " "
			
 
				-    else:
			
 
				-        if (
			
 
				-            block_stop_coordinate - last_span_box[text_stop_index] > block_width * 0.3
			
 
				-            and block_label != "formula"
			
 
				-        ):
			
 
				+    elif (
			
 
				+        len(line_text) > 0
			
 
				+        and not is_english_letter(line_text[-1])
			
 
				+        and not is_non_breaking_punctuation(line_text[-1])
			
 
				+        and not is_numeric(line_text[-1])
			
 
				+    ) or text_direction == "vertical":
			
 
				+        if block_stop_coordinate - last_span_box[text_stop_index] > block_width * 0.4:
			
 
				             line_text += "\n"
			
 
				         if (
			
 
				             first_span_box[text_start_index] - block_start_coordinate
			
 
				-            > block_width * 0.3
			
 
				-            and block_label != "formula"
			
 
				+            > block_width * 0.4
			
 
				         ):
			
 
				             line_text = "\n" + line_text
			
 
				 
			
--- a/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py
+++ b/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py
@@ -56,7 +56,7 @@ def get_nearest_edge_distance(
 
				     return min_x_distance + min_y_distance
			
 
				 
			
 
				 
			
 
				-def _projection_by_bboxes(boxes: np.ndarray, axis: int) -> np.ndarray:
			
 
				+def projection_by_bboxes(boxes: np.ndarray, axis: int) -> np.ndarray:
			
 
				     """
			
 
				     Generate a 1D projection histogram from bounding boxes along a specified axis.
			
 
				 
			
@@ -84,7 +84,7 @@ def _projection_by_bboxes(boxes: np.ndarray, axis: int) -> np.ndarray:
 
				     return projection
			
 
				 
			
 
				 
			
 
				-def _split_projection_profile(arr_values: np.ndarray, min_value: float, min_gap: float):
			
 
				+def split_projection_profile(arr_values: np.ndarray, min_value: float, min_gap: float):
			
 
				     """
			
 
				     Split the projection profile into segments based on specified thresholds.
			
 
				 
			
@@ -144,8 +144,8 @@ def recursive_yx_cut(
 
				     y_sorted_indices = np.array(indices)[y_sorted_indices]
			
 
				 
			
 
				     # Perform Y-axis projection
			
 
				-    y_projection = _projection_by_bboxes(boxes=y_sorted_boxes, axis=1)
			
 
				-    y_intervals = _split_projection_profile(y_projection, 0, 1)
			
 
				+    y_projection = projection_by_bboxes(boxes=y_sorted_boxes, axis=1)
			
 
				+    y_intervals = split_projection_profile(y_projection, 0, 1)
			
 
				 
			
 
				     if not y_intervals:
			
 
				         return
			
@@ -165,8 +165,8 @@ def recursive_yx_cut(
 
				         x_sorted_indices_chunk = y_indices_chunk[x_sorted_indices]
			
 
				 
			
 
				         # Perform X-axis projection
			
 
				-        x_projection = _projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0)
			
 
				-        x_intervals = _split_projection_profile(x_projection, 0, min_gap)
			
 
				+        x_projection = projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0)
			
 
				+        x_intervals = split_projection_profile(x_projection, 0, min_gap)
			
 
				 
			
 
				         if not x_intervals:
			
 
				             continue
			
@@ -216,8 +216,8 @@ def recursive_xy_cut(
 
				     x_sorted_indices = np.array(indices)[x_sorted_indices]
			
 
				 
			
 
				     # Perform X-axis projection
			
 
				-    x_projection = _projection_by_bboxes(boxes=x_sorted_boxes, axis=0)
			
 
				-    x_intervals = _split_projection_profile(x_projection, 0, 1)
			
 
				+    x_projection = projection_by_bboxes(boxes=x_sorted_boxes, axis=0)
			
 
				+    x_intervals = split_projection_profile(x_projection, 0, 1)
			
 
				 
			
 
				     if not x_intervals:
			
 
				         return
			
@@ -239,8 +239,8 @@ def recursive_xy_cut(
 
				         y_sorted_indices_chunk = x_indices_chunk[y_sorted_indices]
			
 
				 
			
 
				         # Perform Y-axis projection
			
 
				-        y_projection = _projection_by_bboxes(boxes=y_sorted_boxes_chunk, axis=1)
			
 
				-        y_intervals = _split_projection_profile(y_projection, 0, min_gap)
			
 
				+        y_projection = projection_by_bboxes(boxes=y_sorted_boxes_chunk, axis=1)
			
 
				+        y_intervals = split_projection_profile(y_projection, 0, min_gap)
			
 
				 
			
 
				         if not y_intervals:
			
 
				             continue
			
@@ -543,9 +543,7 @@ def sort_normal_blocks(blocks, text_line_height, text_line_width, region_directi
 
				     return blocks
			
 
				 
			
 
				 
			
 
				-def get_cut_blocks(
			
 
				-    blocks, cut_direction, cut_coordinates, overall_region_box, mask_labels=[]
			
 
				-):
			
 
				+def get_cut_blocks(blocks, cut_direction, cut_coordinates, mask_labels=[]):
			
 
				     """
			
 
				     Cut blocks based on the given cut direction and coordinates.
			
 
				 
			
@@ -553,7 +551,6 @@ def get_cut_blocks(
 
				         blocks (list): list of blocks to be cut.
			
 
				         cut_direction (str): cut direction, either "horizontal" or "vertical".
			
 
				         cut_coordinates (list): list of cut coordinates.
			
 
				-        overall_region_box (list): the overall region box that contains all blocks.
			
 
				 
			
 
				     Returns:
			
 
				         list: a list of tuples containing the cutted blocks and their corresponding mean width。
			
--- a/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py
+++ b/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py
@@ -23,14 +23,15 @@ from ..utils import calculate_overlap_ratio, calculate_projection_overlap_ratio
 
				 from .utils import (
			
 
				     calculate_discontinuous_projection,
			
 
				     get_cut_blocks,
			
 
				-    get_nearest_edge_distance,
			
 
				     insert_child_blocks,
			
 
				     manhattan_insert,
			
 
				+    projection_by_bboxes,
			
 
				     recursive_xy_cut,
			
 
				     recursive_yx_cut,
			
 
				     reference_insert,
			
 
				     shrink_overlapping_boxes,
			
 
				     sort_normal_blocks,
			
 
				+    split_projection_profile,
			
 
				     update_doc_title_child_blocks,
			
 
				     update_paragraph_title_child_blocks,
			
 
				     update_vision_child_blocks,
			
@@ -131,27 +132,50 @@ def pre_process(
 
				         current_interval = discontinuous[0]
			
 
				         for interval in discontinuous[1:]:
			
 
				             gap_len = interval[0] - current_interval[1]
			
 
				-            if gap_len >= region.text_line_height * 5:
			
 
				+            if gap_len >= region.text_line_height * 3:
			
 
				                 cut_coordinates.append(current_interval[1])
			
 
				-            elif gap_len > region.text_line_height * 2:
			
 
				-                x1, _, x2, __ = region.bbox
			
 
				-                y1 = current_interval[1]
			
 
				-                y2 = interval[0]
			
 
				-                bbox = [x1, y1, x2, y2]
			
 
				-                ref_interval = interval[0] - current_interval[1]
			
 
				-                ref_bboxes = []
			
 
				-                for block in blocks:
			
 
				-                    if get_nearest_edge_distance(bbox, block.bbox) < ref_interval * 2:
			
 
				-                        ref_bboxes.append(block.bbox)
			
 
				-                discontinuous = calculate_discontinuous_projection(
			
 
				-                    ref_bboxes, direction=region.direction
			
 
				+            elif gap_len > region.text_line_height * 1.8:
			
 
				+                (pre_blocks, post_blocks) = get_cut_blocks(
			
 
				+                    list(block_map.values()), cut_direction, [current_interval[1]], []
			
 
				                 )
			
 
				-                if len(discontinuous) != 2:
			
 
				-                    cut_coordinates.append(current_interval[1])
			
 
				+                pre_bboxes = np.array([block.bbox for block in pre_blocks])
			
 
				+                post_bboxes = np.array([block.bbox for block in post_blocks])
			
 
				+                projection_index = 1 if cut_direction == "horizontal" else 0
			
 
				+                pre_projection = projection_by_bboxes(pre_bboxes, projection_index)
			
 
				+                post_projection = projection_by_bboxes(post_bboxes, projection_index)
			
 
				+                pre_projection_min = np.min(pre_projection)
			
 
				+                post_projection_min = np.min(post_projection)
			
 
				+                pre_projection_min += 5 if pre_projection_min != 0 else 0
			
 
				+                post_projection_min += 5 if post_projection_min != 0 else 0
			
 
				+                pre_intervals = split_projection_profile(
			
 
				+                    pre_projection, pre_projection_min, 1
			
 
				+                )
			
 
				+                post_intervals = split_projection_profile(
			
 
				+                    post_projection, post_projection_min, 1
			
 
				+                )
			
 
				+                pre_gap_boxes = []
			
 
				+                if pre_intervals is not None:
			
 
				+                    for start, end in zip(*pre_intervals):
			
 
				+                        bbox = [0] * 4
			
 
				+                        bbox[projection_index] = start
			
 
				+                        bbox[projection_index + 2] = end
			
 
				+                        pre_gap_boxes.append(bbox)
			
 
				+                post_gap_boxes = []
			
 
				+                if post_intervals is not None:
			
 
				+                    for start, end in zip(*post_intervals):
			
 
				+                        bbox = [0] * 4
			
 
				+                        bbox[projection_index] = start
			
 
				+                        bbox[projection_index + 2] = end
			
 
				+                        post_gap_boxes.append(bbox)
			
 
				+                max_gap_boxes_num = max(len(pre_gap_boxes), len(post_gap_boxes))
			
 
				+                if max_gap_boxes_num > 0:
			
 
				+                    discontinuous_intervals = calculate_discontinuous_projection(
			
 
				+                        pre_gap_boxes + post_gap_boxes, direction=region.direction
			
 
				+                    )
			
 
				+                    if len(discontinuous_intervals) != max_gap_boxes_num:
			
 
				+                        cut_coordinates.append(current_interval[1])
			
 
				             current_interval = interval
			
 
				-    cut_list = get_cut_blocks(
			
 
				-        blocks, cut_direction, cut_coordinates, region.bbox, mask_labels
			
 
				-    )
			
 
				+    cut_list = get_cut_blocks(blocks, cut_direction, cut_coordinates, mask_labels)
			
 
				     pre_cut_list.extend(cut_list)
			
 
				     if region.direction == "vertical":
			
 
				         pre_cut_list = pre_cut_list[::-1]
			
--- a/paddlex/inference/pipelines/ocr/result.py
+++ b/paddlex/inference/pipelines/ocr/result.py
@@ -14,7 +14,6 @@
 
				 
			
 
				 import math
			
 
				 import random
			
 
				-from pathlib import Path
			
 
				 from typing import Dict
			
 
				 
			
 
				 import numpy as np
			
@@ -32,15 +31,6 @@ if is_dep_available("opencv-contrib-python"):
 
				 class OCRResult(BaseCVResult):
			
 
				     """OCR result"""
			
 
				 
			
 
				-    def _get_input_fn(self):
			
 
				-        fn = super()._get_input_fn()
			
 
				-        if (page_idx := self["page_index"]) is not None:
			
 
				-            fp = Path(fn)
			
 
				-            stem, suffix = fp.stem, fp.suffix
			
 
				-            return f"{stem}_{page_idx}{suffix}"
			
 
				-        else:
			
 
				-            return fn
			
 
				-
			
 
				     def get_minarea_rect(self, points: np.ndarray) -> np.ndarray:
			
 
				         """
			
 
				         Get the minimum area rectangle for the given points using OpenCV.