Răsfoiți Sursa

support sort by different text line

zhouchangda 6 luni în urmă
părinte
comite
e23a69cd66

+ 58 - 97
paddlex/inference/pipelines/layout_parsing/pipeline_v2.py

@@ -30,6 +30,7 @@ from ...utils.pp_option import PaddlePredictorOption
 from ..base import BasePipeline
 from ..base import BasePipeline
 from ..ocr.result import OCRResult
 from ..ocr.result import OCRResult
 from .result_v2 import LayoutParsingBlock, LayoutParsingRegion, LayoutParsingResultV2
 from .result_v2 import LayoutParsingBlock, LayoutParsingRegion, LayoutParsingResultV2
+from .setting import BLOCK_LABEL_MAP, BLOCK_SETTINGS, LINE_SETTINGS, REGION_SETTINGS
 from .utils import (
 from .utils import (
     caculate_bbox_area,
     caculate_bbox_area,
     calculate_minimum_enclosing_bbox,
     calculate_minimum_enclosing_bbox,
@@ -260,8 +261,6 @@ class LayoutParsingPipelineV2(BasePipeline):
     def standardized_data(
     def standardized_data(
         self,
         self,
         image: list,
         image: list,
-        parameters_config: dict,
-        block_label_mapping: dict,
         region_det_res: DetResult,
         region_det_res: DetResult,
         layout_det_res: DetResult,
         layout_det_res: DetResult,
         overall_ocr_res: OCRResult,
         overall_ocr_res: OCRResult,
@@ -360,7 +359,7 @@ class LayoutParsingPipelineV2(BasePipeline):
             paragraph_title_block_area = caculate_bbox_area(
             paragraph_title_block_area = caculate_bbox_area(
                 layout_det_res["boxes"][paragraph_title_list[0]]["coordinate"]
                 layout_det_res["boxes"][paragraph_title_list[0]]["coordinate"]
             )
             )
-            title_area_max_block_threshold = parameters_config["block"].get(
+            title_area_max_block_threshold = BLOCK_SETTINGS.get(
                 "title_conversion_area_ratio_threshold", 0.3
                 "title_conversion_area_ratio_threshold", 0.3
             )
             )
             if (
             if (
@@ -441,7 +440,7 @@ class LayoutParsingPipelineV2(BasePipeline):
                     break
                     break
             if not has_text and layout_det_res["boxes"][layout_box_idx][
             if not has_text and layout_det_res["boxes"][layout_box_idx][
                 "label"
                 "label"
-            ] not in block_label_mapping.get("vision_labels", []):
+            ] not in BLOCK_LABEL_MAP.get("vision_labels", []):
                 crop_box = layout_det_res["boxes"][layout_box_idx]["coordinate"]
                 crop_box = layout_det_res["boxes"][layout_box_idx]["coordinate"]
                 x1, y1, x2, y2 = [int(i) for i in crop_box]
                 x1, y1, x2, y2 = [int(i) for i in crop_box]
                 crop_img = np.array(image)[y1:y2, x1:x2]
                 crop_img = np.array(image)[y1:y2, x1:x2]
@@ -506,7 +505,7 @@ class LayoutParsingPipelineV2(BasePipeline):
                     overlap_ratio = calculate_overlap_ratio(
                     overlap_ratio = calculate_overlap_ratio(
                         region_bbox, block_bboxes[block_idx], mode="small"
                         region_bbox, block_bboxes[block_idx], mode="small"
                     )
                     )
-                    if overlap_ratio > parameters_config["region"].get(
+                    if overlap_ratio > REGION_SETTINGS.get(
                         "match_block_overlap_ratio_threshold", 0.8
                         "match_block_overlap_ratio_threshold", 0.8
                     ):
                     ):
                         region_to_block_map[region_idx].append(block_idx)
                         region_to_block_map[region_idx].append(block_idx)
@@ -540,7 +539,6 @@ class LayoutParsingPipelineV2(BasePipeline):
                                     image.shape[0],
                                     image.shape[0],
                                     block_idxes_set,
                                     block_idxes_set,
                                     block_bboxes,
                                     block_bboxes,
-                                    parameters_config,
                                 )
                                 )
                             )
                             )
                     if len(matched_idxes) == 0:
                     if len(matched_idxes) == 0:
@@ -570,7 +568,7 @@ class LayoutParsingPipelineV2(BasePipeline):
         input_img: np.ndarray,
         input_img: np.ndarray,
         text_rec_model: Any,
         text_rec_model: Any,
         text_rec_score_thresh: Union[float, None] = None,
         text_rec_score_thresh: Union[float, None] = None,
-        orientation: str = "vertical",
+        direction: str = "vertical",
     ) -> None:
     ) -> None:
         """
         """
         Sort a line of text spans based on their vertical position within the layout bounding box.
         Sort a line of text spans based on their vertical position within the layout bounding box.
@@ -583,8 +581,8 @@ class LayoutParsingPipelineV2(BasePipeline):
         Returns:
         Returns:
             list: The sorted line of text spans.
             list: The sorted line of text spans.
         """
         """
-        sort_index = 0 if orientation == "horizontal" else 1
-        splited_boxes = split_boxes_by_projection(line, orientation)
+        sort_index = 0 if direction == "horizontal" else 1
+        splited_boxes = split_boxes_by_projection(line, direction)
         splited_lines = []
         splited_lines = []
         if len(line) != len(splited_boxes):
         if len(line) != len(splited_boxes):
             splited_boxes.sort(key=lambda span: span[0][sort_index])
             splited_boxes.sort(key=lambda span: span[0][sort_index])
@@ -614,7 +612,6 @@ class LayoutParsingPipelineV2(BasePipeline):
     def get_block_rec_content(
     def get_block_rec_content(
         self,
         self,
         image: list,
         image: list,
-        line_parameters_config: dict,
         ocr_rec_res: dict,
         ocr_rec_res: dict,
         block: LayoutParsingBlock,
         block: LayoutParsingBlock,
         text_rec_model: Any,
         text_rec_model: Any,
@@ -625,37 +622,49 @@ class LayoutParsingPipelineV2(BasePipeline):
             block.content = ""
             block.content = ""
             return block
             return block
 
 
-        lines, text_orientation = group_boxes_into_lines(
+        lines, text_direction = group_boxes_into_lines(
             ocr_rec_res,
             ocr_rec_res,
-            line_parameters_config.get("line_height_iou_threshold", 0.8),
+            LINE_SETTINGS.get("line_height_iou_threshold", 0.8),
         )
         )
 
 
         if block.label == "reference":
         if block.label == "reference":
             rec_boxes = ocr_rec_res["boxes"]
             rec_boxes = ocr_rec_res["boxes"]
             block_right_coordinate = max([box[2] for box in rec_boxes])
             block_right_coordinate = max([box[2] for box in rec_boxes])
-            last_line_span_limit = 20
         else:
         else:
             block_right_coordinate = block.bbox[2]
             block_right_coordinate = block.bbox[2]
-            last_line_span_limit = 10
 
 
         # format line
         # format line
         text_lines = []
         text_lines = []
         need_new_line_num = 0
         need_new_line_num = 0
-        sort_index = 0 if text_orientation == "horizontal" else 1
+        start_index = 0 if text_direction == "horizontal" else 1
+        secondary_direction_start_index = 1 if text_direction == "horizontal" else 0
+        line_height_list, line_width_list = [], []
         for idx, line in enumerate(lines):
         for idx, line in enumerate(lines):
-            line.sort(key=lambda span: span[0][sort_index])
+            line.sort(key=lambda span: span[0][start_index])
 
 
+            text_bboxes_height = [
+                span[0][secondary_direction_start_index + 2]
+                - span[0][secondary_direction_start_index]
+                for span in line
+            ]
+            text_bboxes_width = [
+                span[0][start_index + 2] - span[0][start_index] for span in line
+            ]
+
+            line_height = np.mean(text_bboxes_height)
+            line_height_list.append(line_height)
+            line_width_list.append(np.mean(text_bboxes_width))
             # merge formula and text
             # merge formula and text
             ocr_labels = [span[2] for span in line]
             ocr_labels = [span[2] for span in line]
             if "formula" in ocr_labels:
             if "formula" in ocr_labels:
                 line = self.sort_line_by_projection(
                 line = self.sort_line_by_projection(
-                    line, image, text_rec_model, text_rec_score_thresh, text_orientation
+                    line, image, text_rec_model, text_rec_score_thresh, text_direction
                 )
                 )
 
 
             line_text, need_new_line = format_line(
             line_text, need_new_line = format_line(
                 line,
                 line,
                 block_right_coordinate,
                 block_right_coordinate,
-                last_line_span_limit=last_line_span_limit,
+                last_line_span_limit=line_height * 1.5,
                 block_label=block.label,
                 block_label=block.label,
             )
             )
             if need_new_line:
             if need_new_line:
@@ -668,21 +677,21 @@ class LayoutParsingPipelineV2(BasePipeline):
                 block.seg_end_coordinate = line_end_coordinate
                 block.seg_end_coordinate = line_end_coordinate
             text_lines.append(line_text)
             text_lines.append(line_text)
 
 
-        delim = line_parameters_config["delimiter_map"].get(block.label, "")
+        delim = LINE_SETTINGS["delimiter_map"].get(block.label, "")
         if need_new_line_num > len(text_lines) * 0.5 and delim == "":
         if need_new_line_num > len(text_lines) * 0.5 and delim == "":
             delim = "\n"
             delim = "\n"
         content = delim.join(text_lines)
         content = delim.join(text_lines)
         block.content = content
         block.content = content
         block.num_of_lines = len(text_lines)
         block.num_of_lines = len(text_lines)
-        block.orientation = text_orientation
+        block.direction = text_direction
+        block.text_line_height = np.mean(line_height_list)
+        block.text_line_width = np.mean(line_width_list)
 
 
         return block
         return block
 
 
     def get_layout_parsing_blocks(
     def get_layout_parsing_blocks(
         self,
         self,
         image: list,
         image: list,
-        parameters_config: dict,
-        block_label_mapping: dict,
         region_block_ocr_idx_map: dict,
         region_block_ocr_idx_map: dict,
         region_det_res: DetResult,
         region_det_res: DetResult,
         overall_ocr_res: OCRResult,
         overall_ocr_res: OCRResult,
@@ -759,7 +768,6 @@ class LayoutParsingPipelineV2(BasePipeline):
                 block = self.get_block_rec_content(
                 block = self.get_block_rec_content(
                     image=image,
                     image=image,
                     block=block,
                     block=block,
-                    line_parameters_config=parameters_config["line"],
                     ocr_rec_res=rec_res,
                     ocr_rec_res=rec_res,
                     text_rec_model=text_rec_model,
                     text_rec_model=text_rec_model,
                     text_rec_score_thresh=text_rec_score_thresh,
                     text_rec_score_thresh=text_rec_score_thresh,
@@ -781,9 +789,8 @@ class LayoutParsingPipelineV2(BasePipeline):
                 for idx in region_block_ocr_idx_map["region_to_block_map"][region_idx]
                 for idx in region_block_ocr_idx_map["region_to_block_map"][region_idx]
             ]
             ]
             region = LayoutParsingRegion(
             region = LayoutParsingRegion(
-                region_bbox=region_bbox,
+                bbox=region_bbox,
                 blocks=region_blocks,
                 blocks=region_blocks,
-                block_label_mapping=block_label_mapping,
             )
             )
             region_list.append(region)
             region_list.append(region)
 
 
@@ -818,14 +825,11 @@ class LayoutParsingPipelineV2(BasePipeline):
         Returns:
         Returns:
             list: A list of dictionaries representing the layout parsing result.
             list: A list of dictionaries representing the layout parsing result.
         """
         """
-        from .setting import block_label_mapping, parameters_config
 
 
         # Standardize data
         # Standardize data
         region_block_ocr_idx_map, region_det_res, layout_det_res = (
         region_block_ocr_idx_map, region_det_res, layout_det_res = (
             self.standardized_data(
             self.standardized_data(
                 image=image,
                 image=image,
-                parameters_config=parameters_config,
-                block_label_mapping=block_label_mapping,
                 region_det_res=region_det_res,
                 region_det_res=region_det_res,
                 layout_det_res=layout_det_res,
                 layout_det_res=layout_det_res,
                 overall_ocr_res=overall_ocr_res,
                 overall_ocr_res=overall_ocr_res,
@@ -838,8 +842,6 @@ class LayoutParsingPipelineV2(BasePipeline):
         # Format layout parsing block
         # Format layout parsing block
         region_list = self.get_layout_parsing_blocks(
         region_list = self.get_layout_parsing_blocks(
             image=image,
             image=image,
-            parameters_config=parameters_config,
-            block_label_mapping=block_label_mapping,
             region_block_ocr_idx_map=region_block_ocr_idx_map,
             region_block_ocr_idx_map=region_block_ocr_idx_map,
             region_det_res=region_det_res,
             region_det_res=region_det_res,
             overall_ocr_res=overall_ocr_res,
             overall_ocr_res=overall_ocr_res,
@@ -854,11 +856,10 @@ class LayoutParsingPipelineV2(BasePipeline):
         for region in region_list:
         for region in region_list:
             parsing_res_list.extend(region.sort())
             parsing_res_list.extend(region.sort())
 
 
-        visualize_index_labels = block_label_mapping["visualize_index_labels"]
         index = 1
         index = 1
         for block in parsing_res_list:
         for block in parsing_res_list:
-            if block.label in visualize_index_labels:
-                block.index = index
+            if block.label in BLOCK_LABEL_MAP["visualize_index_labels"]:
+                block.order_index = index
                 index += 1
                 index += 1
 
 
         return parsing_res_list
         return parsing_res_list
@@ -956,8 +957,6 @@ class LayoutParsingPipelineV2(BasePipeline):
         use_e2e_wired_table_rec_model: bool = False,
         use_e2e_wired_table_rec_model: bool = False,
         use_e2e_wireless_table_rec_model: bool = True,
         use_e2e_wireless_table_rec_model: bool = True,
         is_pretty_markdown: Union[bool, None] = None,
         is_pretty_markdown: Union[bool, None] = None,
-        use_layout_gt: bool = False,
-        layout_gt_dir: Union[str, None] = None,
         **kwargs,
         **kwargs,
     ) -> LayoutParsingResultV2:
     ) -> LayoutParsingResultV2:
         """
         """
@@ -1032,65 +1031,16 @@ class LayoutParsingPipelineV2(BasePipeline):
 
 
             doc_preprocessor_image = doc_preprocessor_res["output_img"]
             doc_preprocessor_image = doc_preprocessor_res["output_img"]
 
 
-            use_layout_gt = use_layout_gt
-            if not use_layout_gt:
-                layout_det_res = next(
-                    self.layout_det_model(
-                        doc_preprocessor_image,
-                        threshold=layout_threshold,
-                        layout_nms=layout_nms,
-                        layout_unclip_ratio=layout_unclip_ratio,
-                        layout_merge_bboxes_mode=layout_merge_bboxes_mode,
-                    )
+            layout_det_res = next(
+                self.layout_det_model(
+                    doc_preprocessor_image,
+                    threshold=layout_threshold,
+                    layout_nms=layout_nms,
+                    layout_unclip_ratio=layout_unclip_ratio,
+                    layout_merge_bboxes_mode=layout_merge_bboxes_mode,
                 )
                 )
-            else:
-                import json
-                import os
-
-                from ...models.object_detection.result import DetResult
-
-                label_dir = layout_gt_dir
-                notes_path = f"{label_dir}/notes.json"
-                labels = f"{label_dir}/labels"
-                gt_file = os.path.basename(input)[:-4] + ".txt"
-                gt_path = f"{labels}/{gt_file}"
-                with open(notes_path, "r") as f:
-                    notes = json.load(f)
-                categories_map = {}
-                for categories in notes["categories"]:
-                    id = int(categories["id"])
-                    name = categories["name"]
-                    categories_map[id] = name
-                with open(gt_path, "r") as f:
-                    lines = f.readlines()
-                layout_det_res_dic = {
-                    "input_img": doc_preprocessor_image,
-                    "page_index": None,
-                    "boxes": [],
-                }
-                for line in lines:
-                    line = line.strip().split(" ")
-                    category_id = int(line[0])
-                    label = categories_map[category_id]
-                    img_h, img_w = doc_preprocessor_image.shape[:2]
-                    center_x = float(line[1]) * img_w
-                    center_y = float(line[2]) * img_h
-                    w = float(line[3]) * img_w
-                    h = float(line[4]) * img_h
-                    x0 = center_x - w / 2
-                    y0 = center_y - h / 2
-                    x1 = center_x + w / 2
-                    y1 = center_y + h / 2
-                    box = [x0, y0, x1, y1]
-                    layout_det_res_dic["boxes"].append(
-                        {
-                            "cls_id": category_id,
-                            "label": label,
-                            "coordinate": box,
-                            "score": 1.0,
-                        }
-                    )
-                layout_det_res = DetResult(layout_det_res_dic)
+            )
+
             imgs_in_doc = gather_imgs(doc_preprocessor_image, layout_det_res["boxes"])
             imgs_in_doc = gather_imgs(doc_preprocessor_image, layout_det_res["boxes"])
 
 
             if model_settings["use_region_detection"]:
             if model_settings["use_region_detection"]:
@@ -1139,7 +1089,13 @@ class LayoutParsingPipelineV2(BasePipeline):
                     ),
                     ),
                 )
                 )
             else:
             else:
-                overall_ocr_res = {}
+                overall_ocr_res = {
+                    "dt_polys": [],
+                    "rec_texts": [],
+                    "rec_scores": [],
+                    "rec_polys": [],
+                    "rec_boxes": np.array([]),
+                }
 
 
             overall_ocr_res["rec_labels"] = ["text"] * len(overall_ocr_res["rec_texts"])
             overall_ocr_res["rec_labels"] = ["text"] * len(overall_ocr_res["rec_texts"])
 
 
@@ -1157,9 +1113,14 @@ class LayoutParsingPipelineV2(BasePipeline):
                     table_contents["rec_texts"].append(
                     table_contents["rec_texts"].append(
                         f"${formula_res['rec_formula']}$"
                         f"${formula_res['rec_formula']}$"
                     )
                     )
-                    table_contents["rec_boxes"] = np.vstack(
-                        (table_contents["rec_boxes"], [formula_res["dt_polys"]])
-                    )
+                    if table_contents["rec_boxes"].size == 0:
+                        table_contents["rec_boxes"] = np.array(
+                            [formula_res["dt_polys"]]
+                        )
+                    else:
+                        table_contents["rec_boxes"] = np.vstack(
+                            (table_contents["rec_boxes"], [formula_res["dt_polys"]])
+                        )
                     table_contents["rec_polys"].append(poly_points)
                     table_contents["rec_polys"].append(poly_points)
                     table_contents["rec_scores"].append(1)
                     table_contents["rec_scores"].append(1)
 
 

+ 129 - 95
paddlex/inference/pipelines/layout_parsing/result_v2.py

@@ -20,8 +20,9 @@ from pathlib import Path
 from typing import List
 from typing import List
 
 
 import numpy as np
 import numpy as np
-from PIL import Image, ImageDraw
+from PIL import Image, ImageDraw, ImageFont
 
 
+from ....utils.fonts import PINGFANG_FONT_FILE_PATH
 from ...common.result import (
 from ...common.result import (
     BaseCVResult,
     BaseCVResult,
     HtmlMixin,
     HtmlMixin,
@@ -29,6 +30,7 @@ from ...common.result import (
     MarkdownMixin,
     MarkdownMixin,
     XlsxMixin,
     XlsxMixin,
 )
 )
+from .setting import BLOCK_LABEL_MAP
 
 
 
 
 class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
@@ -107,16 +109,23 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
         # for layout ordering image
         # for layout ordering image
         image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
         image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
         draw = ImageDraw.Draw(image, "RGBA")
         draw = ImageDraw.Draw(image, "RGBA")
+        font_size = int(0.018 * int(image.width)) + 2
+        font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
         parsing_result: List[LayoutParsingBlock] = self["parsing_res_list"]
         parsing_result: List[LayoutParsingBlock] = self["parsing_res_list"]
         for block in parsing_result:
         for block in parsing_result:
             bbox = block.bbox
             bbox = block.bbox
-            index = block.index
-            label = block.label
-            fill_color = get_show_color(label)
+            index = block.order_index
+            label = block.order_label
+            fill_color = get_show_color(label, True)
             draw.rectangle(bbox, fill=fill_color)
             draw.rectangle(bbox, fill=fill_color)
             if index is not None:
             if index is not None:
-                text_position = (bbox[2] + 2, bbox[1] - 10)
-                draw.text(text_position, str(index), fill="red")
+                text_position = (bbox[2] + 2, bbox[1] - font_size // 2)
+                if int(image.width) - bbox[2] < font_size:
+                    text_position = (
+                        int(bbox[2] - font_size * 1.1),
+                        bbox[1] - font_size // 2,
+                    )
+                draw.text(text_position, str(index), font=font, fill="red")
 
 
         res_img_dict["layout_order_res"] = image
         res_img_dict["layout_order_res"] = image
 
 
@@ -475,8 +484,8 @@ class LayoutParsingBlock:
 
 
     def __init__(self, label, bbox, content="") -> None:
     def __init__(self, label, bbox, content="") -> None:
         self.label = label
         self.label = label
-        self.order_label = "other"
-        self.bbox = [int(item) for item in bbox]
+        self.order_label = None
+        self.bbox = list(map(int, bbox))
         self.content = content
         self.content = content
         self.seg_start_coordinate = float("inf")
         self.seg_start_coordinate = float("inf")
         self.seg_end_coordinate = float("-inf")
         self.seg_end_coordinate = float("-inf")
@@ -486,40 +495,42 @@ class LayoutParsingBlock:
         self.num_of_lines = 1
         self.num_of_lines = 1
         self.image = None
         self.image = None
         self.index = None
         self.index = None
-        self.visual_index = None
-        self.orientation = self.get_bbox_orientation()
+        self.order_index = None
+        self.text_line_width = 1
+        self.text_line_height = 1
+        self.direction = self.get_bbox_direction()
         self.child_blocks = []
         self.child_blocks = []
-        self.update_orientation_info()
+        self.update_direction_info()
 
 
     def __str__(self) -> str:
     def __str__(self) -> str:
         return f"{self.__dict__}"
         return f"{self.__dict__}"
 
 
     def __repr__(self) -> str:
     def __repr__(self) -> str:
-        _str = f"\n\n#################\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
+        _str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
         return _str
         return _str
 
 
     def to_dict(self) -> dict:
     def to_dict(self) -> dict:
         return self.__dict__
         return self.__dict__
 
 
-    def update_orientation_info(self) -> None:
+    def update_direction_info(self) -> None:
         if self.order_label == "vision":
         if self.order_label == "vision":
-            self.orientation = "horizontal"
-        if self.orientation == "horizontal":
-            self.secondary_orientation = "vertical"
+            self.direction = "horizontal"
+        if self.direction == "horizontal":
+            self.secondary_direction = "vertical"
             self.short_side_length = self.height
             self.short_side_length = self.height
             self.long_side_length = self.width
             self.long_side_length = self.width
             self.start_coordinate = self.bbox[0]
             self.start_coordinate = self.bbox[0]
             self.end_coordinate = self.bbox[2]
             self.end_coordinate = self.bbox[2]
-            self.secondary_orientation_start_coordinate = self.bbox[1]
-            self.secondary_orientation_end_coordinate = self.bbox[3]
+            self.secondary_direction_start_coordinate = self.bbox[1]
+            self.secondary_direction_end_coordinate = self.bbox[3]
         else:
         else:
-            self.secondary_orientation = "horizontal"
+            self.secondary_direction = "horizontal"
             self.short_side_length = self.width
             self.short_side_length = self.width
             self.long_side_length = self.height
             self.long_side_length = self.height
             self.start_coordinate = self.bbox[1]
             self.start_coordinate = self.bbox[1]
             self.end_coordinate = self.bbox[3]
             self.end_coordinate = self.bbox[3]
-            self.secondary_orientation_start_coordinate = self.bbox[0]
-            self.secondary_orientation_end_coordinate = self.bbox[2]
+            self.secondary_direction_start_coordinate = self.bbox[0]
+            self.secondary_direction_end_coordinate = self.bbox[2]
 
 
     def append_child_block(self, child_block: LayoutParsingBlock) -> None:
     def append_child_block(self, child_block: LayoutParsingBlock) -> None:
         if not self.child_blocks:
         if not self.child_blocks:
@@ -533,7 +544,7 @@ class LayoutParsingBlock:
             max(y2, y2_child),
             max(y2, y2_child),
         )
         )
         self.bbox = union_bbox
         self.bbox = union_bbox
-        self.update_orientation_info()
+        self.update_direction_info()
         child_blocks = [child_block]
         child_blocks = [child_block]
         if child_block.child_blocks:
         if child_block.child_blocks:
             child_blocks.extend(child_block.get_child_blocks())
             child_blocks.extend(child_block.get_child_blocks())
@@ -550,107 +561,130 @@ class LayoutParsingBlock:
         centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
         centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
         return centroid
         return centroid
 
 
-    def get_bbox_orientation(self, orientation_ratio: float = 1.0) -> bool:
+    def get_bbox_direction(self, direction_ratio: float = 1.0) -> bool:
         """
         """
         Determine if a bounding box is horizontal or vertical.
         Determine if a bounding box is horizontal or vertical.
 
 
         Args:
         Args:
             bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
             bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
-            orientation_ratio (float): Ratio for determining orientation. Default is 1.0.
+            direction_ratio (float): Ratio for determining direction. Default is 1.0.
 
 
         Returns:
         Returns:
             str: "horizontal" or "vertical".
             str: "horizontal" or "vertical".
         """
         """
         return (
         return (
-            "horizontal"
-            if self.width * orientation_ratio >= self.height
-            else "vertical"
+            "horizontal" if self.width * direction_ratio >= self.height else "vertical"
         )
         )
 
 
 
 
 class LayoutParsingRegion:
 class LayoutParsingRegion:
 
 
-    def __init__(
-        self, region_bbox, blocks: List[LayoutParsingBlock] = [], block_label_mapping={}
-    ) -> None:
-        self.region_bbox = region_bbox
-        self.blocks = blocks
+    def __init__(self, bbox, blocks: List[LayoutParsingBlock] = []) -> None:
+        self.bbox = bbox
         self.block_map = {}
         self.block_map = {}
-        self.update_config(block_label_mapping)
-        self.orientation = None
+        self.direction = "horizontal"
         self.calculate_bbox_metrics()
         self.calculate_bbox_metrics()
-
-    def update_config(self, block_label_mapping):
-        self.block_map = {}
-        self.config = copy.deepcopy(block_label_mapping)
-        self.config["region_bbox"] = self.region_bbox
-        horizontal_text_block_num = 0
-        for idx, block in enumerate(self.blocks):
-            label = block.label
-            if (
-                block.order_label not in ["vision", "vision_title"]
-                and block.orientation == "horizontal"
-            ):
-                horizontal_text_block_num += 1
+        self.doc_title_block_idxes = []
+        self.paragraph_title_block_idxes = []
+        self.vision_block_idxes = []
+        self.unordered_block_idxes = []
+        self.vision_title_block_idxes = []
+        self.normal_text_block_idxes = []
+        self.header_block_idxes = []
+        self.footer_block_idxes = []
+        self.text_line_width = 20
+        self.text_line_height = 10
+        self.init_region_info_from_layout(blocks)
+        self.init_direction_info()
+
+    def init_region_info_from_layout(self, blocks: List[LayoutParsingBlock]):
+        horizontal_normal_text_block_num = 0
+        text_line_height_list = []
+        text_line_width_list = []
+        for idx, block in enumerate(blocks):
             self.block_map[idx] = block
             self.block_map[idx] = block
-            self.update_layout_order_config_block_index(label, idx)
-        text_block_num = (
-            len(self.blocks)
-            - len(self.config.get("vision_block_idxes", []))
-            - len(self.config.get("vision_title_block_idxes", []))
-        )
-        self.orientation = (
+            block.index = idx
+            if block.label in BLOCK_LABEL_MAP["header_labels"]:
+                self.header_block_idxes.append(idx)
+            elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
+                self.doc_title_block_idxes.append(idx)
+            elif block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]:
+                self.paragraph_title_block_idxes.append(idx)
+            elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
+                self.vision_block_idxes.append(idx)
+            elif block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
+                self.vision_title_block_idxes.append(idx)
+            elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
+                self.footer_block_idxes.append(idx)
+            elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
+                self.unordered_block_idxes.append(idx)
+            else:
+                self.normal_text_block_idxes.append(idx)
+                text_line_height_list.append(block.text_line_height)
+                text_line_width_list.append(block.text_line_width)
+                if block.direction == "horizontal":
+                    horizontal_normal_text_block_num += 1
+        self.direction = (
             "horizontal"
             "horizontal"
-            if horizontal_text_block_num >= text_block_num * 0.5
+            if horizontal_normal_text_block_num
+            >= len(self.normal_text_block_idxes) * 0.5
             else "vertical"
             else "vertical"
         )
         )
-        self.config["region_orientation"] = self.orientation
+        self.text_line_width = (
+            np.mean(text_line_width_list) if text_line_width_list else 20
+        )
+        self.text_line_height = (
+            np.mean(text_line_height_list) if text_line_height_list else 10
+        )
+
+    def init_direction_info(self):
+        if self.direction == "horizontal":
+            self.direction_start_index = 0
+            self.direction_end_index = 2
+            self.secondary_direction_start_index = 1
+            self.secondary_direction_end_index = 3
+            self.secondary_direction = "vertical"
+        else:
+            self.direction_start_index = 1
+            self.direction_end_index = 3
+            self.secondary_direction_start_index = 0
+            self.secondary_direction_end_index = 2
+            self.secondary_direction = "horizontal"
+
+        self.direction_center_coordinate = (
+            self.bbox[self.direction_start_index] + self.bbox[self.direction_end_index]
+        ) / 2
+        self.secondary_direction_center_coordinate = (
+            self.bbox[self.secondary_direction_start_index]
+            + self.bbox[self.secondary_direction_end_index]
+        ) / 2
 
 
     def calculate_bbox_metrics(self):
     def calculate_bbox_metrics(self):
-        x1, y1, x2, y2 = self.region_bbox
+        x1, y1, x2, y2 = self.bbox
         x_center, y_center = (x1 + x2) / 2, (y1 + y2) / 2
         x_center, y_center = (x1 + x2) / 2, (y1 + y2) / 2
         self.euclidean_distance = math.sqrt(((x1) ** 2 + (y1) ** 2))
         self.euclidean_distance = math.sqrt(((x1) ** 2 + (y1) ** 2))
         self.center_euclidean_distance = math.sqrt(((x_center) ** 2 + (y_center) ** 2))
         self.center_euclidean_distance = math.sqrt(((x_center) ** 2 + (y_center) ** 2))
         self.angle_rad = math.atan2(y_center, x_center)
         self.angle_rad = math.atan2(y_center, x_center)
 
 
+    def sort_normal_blocks(self, blocks):
+        if self.direction == "horizontal":
+            blocks.sort(
+                key=lambda x: (
+                    x.bbox[1] // self.text_line_height,
+                    x.bbox[0] // self.text_line_width,
+                    x.bbox[1] ** 2 + x.bbox[0] ** 2,
+                ),
+            )
+        else:
+            blocks.sort(
+                key=lambda x: (
+                    -x.bbox[0] // self.text_line_width,
+                    x.bbox[1] // self.text_line_height,
+                    -(x.bbox[2] ** 2 + x.bbox[1] ** 2),
+                ),
+            )
+
     def sort(self):
     def sort(self):
         from .xycut_enhanced import xycut_enhanced
         from .xycut_enhanced import xycut_enhanced
 
 
-        return xycut_enhanced(self.blocks, self.config)
-
-    def update_layout_order_config_block_index(
-        self, block_label: str, block_idx: int
-    ) -> None:
-        doc_title_labels = self.config["doc_title_labels"]
-        paragraph_title_labels = self.config["paragraph_title_labels"]
-        vision_labels = self.config["vision_labels"]
-        vision_title_labels = self.config["vision_title_labels"]
-        header_labels = self.config["header_labels"]
-        unordered_labels = self.config["unordered_labels"]
-        footer_labels = self.config["footer_labels"]
-        text_labels = self.config["text_labels"]
-        self.config.setdefault("doc_title_block_idxes", [])
-        self.config.setdefault("paragraph_title_block_idxes", [])
-        self.config.setdefault("vision_block_idxes", [])
-        self.config.setdefault("vision_title_block_idxes", [])
-        self.config.setdefault("unordered_block_idxes", [])
-        self.config.setdefault("text_block_idxes", [])
-        self.config.setdefault("header_block_idxes", [])
-        self.config.setdefault("footer_block_idxes", [])
-
-        if block_label in doc_title_labels:
-            self.config["doc_title_block_idxes"].append(block_idx)
-        if block_label in paragraph_title_labels:
-            self.config["paragraph_title_block_idxes"].append(block_idx)
-        if block_label in vision_labels:
-            self.config["vision_block_idxes"].append(block_idx)
-        if block_label in vision_title_labels:
-            self.config["vision_title_block_idxes"].append(block_idx)
-        if block_label in unordered_labels:
-            self.config["unordered_block_idxes"].append(block_idx)
-        if block_label in text_labels:
-            self.config["text_block_idxes"].append(block_idx)
-        if block_label in header_labels:
-            self.config["header_block_idxes"].append(block_idx)
-        if block_label in footer_labels:
-            self.config["footer_block_idxes"].append(block_idx)
+        return xycut_enhanced(self)

+ 25 - 22
paddlex/inference/pipelines/layout_parsing/setting.py

@@ -12,32 +12,35 @@
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # limitations under the License.
 
 
-parameters_config = {
-    "page": {},
-    "region": {
-        "match_block_overlap_ratio_threshold": 0.8,
-        "split_block_overlap_ratio_threshold": 0.4,
-    },
-    "block": {
-        "title_conversion_area_ratio_threshold": 0.3,  # update paragraph_title -> doc_title
-    },
-    "line": {
-        "line_height_iou_threshold": 0.6,  # For line segmentation of OCR results
-        "delimiter_map": {
-            "doc_title": " ",
-            "content": "\n",
-        },
-    },
-    "word": {
-        "delimiter": " ",
+
+XYCUT_SETTINGS = {
+    "child_block_overlap_ratio_threshold": 0.1,
+    "edge_distance_compare_tolerance_len": 2,
+    "distance_weight_map": {
+        "edge_weight": 10**4,
+        "up_edge_weight": 1,
+        "down_edge_weight": 0.0001,
     },
     },
-    "order": {
-        "block_label_match_iou_threshold": 0.1,
-        "block_title_match_iou_threshold": 0.1,
+}
+
+REGION_SETTINGS = {
+    "match_block_overlap_ratio_threshold": 0.6,
+    "split_block_overlap_ratio_threshold": 0.4,
+}
+
+BLOCK_SETTINGS = {
+    "title_conversion_area_ratio_threshold": 0.3,  # update paragraph_title -> doc_title
+}
+
+LINE_SETTINGS = {
+    "line_height_iou_threshold": 0.6,  # For line segmentation of OCR results
+    "delimiter_map": {
+        "doc_title": " ",
+        "content": "\n",
     },
     },
 }
 }
 
 
-block_label_mapping = {
+BLOCK_LABEL_MAP = {
     "doc_title_labels": ["doc_title"],  # 文档标题
     "doc_title_labels": ["doc_title"],  # 文档标题
     "paragraph_title_labels": [
     "paragraph_title_labels": [
         "paragraph_title",
         "paragraph_title",

+ 98 - 63
paddlex/inference/pipelines/layout_parsing/utils.py

@@ -27,6 +27,7 @@ from PIL import Image
 
 
 from ..components import convert_points_to_boxes
 from ..components import convert_points_to_boxes
 from ..ocr.result import OCRResult
 from ..ocr.result import OCRResult
+from .setting import REGION_SETTINGS
 
 
 
 
 def get_overlap_boxes_idx(src_boxes: np.ndarray, ref_boxes: np.ndarray) -> List:
 def get_overlap_boxes_idx(src_boxes: np.ndarray, ref_boxes: np.ndarray) -> List:
@@ -173,7 +174,7 @@ def sorted_layout_boxes(res, w):
 def calculate_projection_overlap_ratio(
 def calculate_projection_overlap_ratio(
     bbox1: List[float],
     bbox1: List[float],
     bbox2: List[float],
     bbox2: List[float],
-    orientation: str = "horizontal",
+    direction: str = "horizontal",
     mode="union",
     mode="union",
 ) -> float:
 ) -> float:
     """
     """
@@ -182,13 +183,13 @@ def calculate_projection_overlap_ratio(
     Args:
     Args:
         bbox1 (List[float]): First bounding box [x_min, y_min, x_max, y_max].
         bbox1 (List[float]): First bounding box [x_min, y_min, x_max, y_max].
         bbox2 (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
         bbox2 (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
-        orientation (str): orientation of the projection, "horizontal" or "vertical".
+        direction (str): direction of the projection, "horizontal" or "vertical".
 
 
     Returns:
     Returns:
         float: Line overlap ratio. Returns 0 if there is no overlap.
         float: Line overlap ratio. Returns 0 if there is no overlap.
     """
     """
     start_index, end_index = 1, 3
     start_index, end_index = 1, 3
-    if orientation == "horizontal":
+    if direction == "horizontal":
         start_index, end_index = 0, 2
         start_index, end_index = 0, 2
 
 
     intersection_start = max(bbox1[start_index], bbox2[start_index])
     intersection_start = max(bbox1[start_index], bbox2[start_index])
@@ -241,8 +242,8 @@ def calculate_overlap_ratio(
 
 
     inter_area = inter_width * inter_height
     inter_area = inter_width * inter_height
 
 
-    bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
-    bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
+    bbox1_area = caculate_bbox_area(bbox1)
+    bbox2_area = caculate_bbox_area(bbox2)
 
 
     if mode == "union":
     if mode == "union":
         ref_area = bbox1_area + bbox2_area - inter_area
         ref_area = bbox1_area + bbox2_area - inter_area
@@ -271,7 +272,7 @@ def group_boxes_into_lines(ocr_rec_res, line_height_iou_threshold):
     ]
     ]
     text_orientation = calculate_text_orientation(text_boxes)
     text_orientation = calculate_text_orientation(text_boxes)
 
 
-    match_orientation = "vertical" if text_orientation == "horizontal" else "horizontal"
+    match_direction = "vertical" if text_orientation == "horizontal" else "horizontal"
 
 
     spans = list(zip(rec_boxes, rec_texts, rec_labels))
     spans = list(zip(rec_boxes, rec_texts, rec_labels))
     sort_index = 1
     sort_index = 1
@@ -284,14 +285,14 @@ def group_boxes_into_lines(ocr_rec_res, line_height_iou_threshold):
 
 
     lines = []
     lines = []
     line = [spans[0]]
     line = [spans[0]]
-    line_region_box = spans[0][0][:]
+    line_region_box = spans[0][0].copy()
 
 
     # merge line
     # merge line
     for span in spans[1:]:
     for span in spans[1:]:
         rec_bbox = span[0]
         rec_bbox = span[0]
         if (
         if (
             calculate_projection_overlap_ratio(
             calculate_projection_overlap_ratio(
-                line_region_box, rec_bbox, match_orientation, mode="small"
+                line_region_box, rec_bbox, match_direction, mode="small"
             )
             )
             >= line_height_iou_threshold
             >= line_height_iou_threshold
         ):
         ):
@@ -301,7 +302,7 @@ def group_boxes_into_lines(ocr_rec_res, line_height_iou_threshold):
         else:
         else:
             lines.append(line)
             lines.append(line)
             line = [span]
             line = [span]
-            line_region_box = rec_bbox[:]
+            line_region_box = rec_bbox.copy()
 
 
     lines.append(line)
     lines.append(line)
     return lines, text_orientation
     return lines, text_orientation
@@ -365,12 +366,31 @@ def is_english_letter(char):
     return bool(re.match(r"^[A-Za-z]$", char))
     return bool(re.match(r"^[A-Za-z]$", char))
 
 
 
 
+def is_non_breaking_punctuation(char):
+    """
+    判断一个字符是否是不需要换行的标点符号,包括全角和半角的符号。
+
+    :param char: str, 单个字符
+    :return: bool, 如果字符是不需要换行的标点符号,返回True,否则返回False
+    """
+    non_breaking_punctuations = {
+        ",",  # 半角逗号
+        ",",  # 全角逗号
+        "、",  # 顿号
+        ";",  # 半角分号
+        ";",  # 全角分号
+        ":",  # 半角冒号
+        ":",  # 全角冒号
+    }
+
+    return char in non_breaking_punctuations
+
+
 def format_line(
 def format_line(
     line: List[List[Union[List[int], str]]],
     line: List[List[Union[List[int], str]]],
     block_right_coordinate: int,
     block_right_coordinate: int,
     last_line_span_limit: int = 10,
     last_line_span_limit: int = 10,
     block_label: str = "text",
     block_label: str = "text",
-    # delimiter_map: Dict = {},
 ) -> None:
 ) -> None:
     """
     """
     Format a line of text spans based on layout constraints.
     Format a line of text spans based on layout constraints.
@@ -402,6 +422,7 @@ def format_line(
         and not line_text.endswith("-")
         and not line_text.endswith("-")
         and len(line_text) > 0
         and len(line_text) > 0
         and not is_english_letter(line_text[-1])
         and not is_english_letter(line_text[-1])
+        and not is_non_breaking_punctuation(line_text[-1])
     ):
     ):
         need_new_line = True
         need_new_line = True
 
 
@@ -415,37 +436,35 @@ def format_line(
     return line_text, need_new_line
     return line_text, need_new_line
 
 
 
 
-def split_boxes_by_projection(spans: List[List[int]], orientation, offset=1e-5):
+def split_boxes_by_projection(spans: List[List[int]], direction, offset=1e-5):
     """
     """
-    Check if there is any complete containment in the x-orientation
+    Check if there is any complete containment in the x-direction
     between the bounding boxes and split the containing box accordingly.
     between the bounding boxes and split the containing box accordingly.
 
 
     Args:
     Args:
         spans (list of lists): Each element is a list containing an ndarray of length 4, a text string, and a label.
         spans (list of lists): Each element is a list containing an ndarray of length 4, a text string, and a label.
-        orientation: 'horizontal' or 'vertical', indicating whether the spans are arranged horizontally or vertically.
+        direction: 'horizontal' or 'vertical', indicating whether the spans are arranged horizontally or vertically.
         offset (float): A small offset value to ensure that the split boxes are not too close to the original boxes.
         offset (float): A small offset value to ensure that the split boxes are not too close to the original boxes.
     Returns:
     Returns:
         A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
         A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
     """
     """
 
 
     def is_projection_contained(box_a, box_b, start_idx, end_idx):
     def is_projection_contained(box_a, box_b, start_idx, end_idx):
-        """Check if box_a completely contains box_b in the x-orientation."""
+        """Check if box_a completely contains box_b in the x-direction."""
         return box_a[start_idx] <= box_b[start_idx] and box_a[end_idx] >= box_b[end_idx]
         return box_a[start_idx] <= box_b[start_idx] and box_a[end_idx] >= box_b[end_idx]
 
 
     new_boxes = []
     new_boxes = []
-    if orientation == "horizontal":
+    if direction == "horizontal":
         projection_start_index, projection_end_index = 0, 2
         projection_start_index, projection_end_index = 0, 2
     else:
     else:
         projection_start_index, projection_end_index = 1, 3
         projection_start_index, projection_end_index = 1, 3
 
 
     for i in range(len(spans)):
     for i in range(len(spans)):
         span = spans[i]
         span = spans[i]
-        box_a, text, label = span
         is_split = False
         is_split = False
-        for j in range(len(spans)):
-            if i == j:
-                continue
+        for j in range(i, len(spans)):
             box_b = spans[j][0]
             box_b = spans[j][0]
+            box_a, text, label = span
             if is_projection_contained(
             if is_projection_contained(
                 box_a, box_b, projection_start_index, projection_end_index
                 box_a, box_b, projection_start_index, projection_end_index
             ):
             ):
@@ -458,12 +477,13 @@ def split_boxes_by_projection(spans: List[List[int]], orientation, offset=1e-5):
                         - box_a[projection_start_index]
                         - box_a[projection_start_index]
                     )
                     )
                     if w > 1:
                     if w > 1:
-                        box_a[projection_end_index] = (
+                        new_bbox = box_a.copy()
+                        new_bbox[projection_end_index] = (
                             box_b[projection_start_index] - offset
                             box_b[projection_start_index] - offset
                         )
                         )
                         new_boxes.append(
                         new_boxes.append(
                             [
                             [
-                                np.array(box_a),
+                                np.array(new_bbox),
                                 text,
                                 text,
                                 label,
                                 label,
                             ]
                             ]
@@ -562,8 +582,8 @@ def _get_minbox_if_overlap_by_ratio(
             The selected bounding box or None if the overlap ratio is not exceeded.
             The selected bounding box or None if the overlap ratio is not exceeded.
     """
     """
     # Calculate the areas of both bounding boxes
     # Calculate the areas of both bounding boxes
-    area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
-    area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
+    area1 = caculate_bbox_area(bbox1)
+    area2 = caculate_bbox_area(bbox2)
     # Calculate the overlap ratio using a helper function
     # Calculate the overlap ratio using a helper function
     overlap_ratio = calculate_overlap_ratio(bbox1, bbox2, mode="small")
     overlap_ratio = calculate_overlap_ratio(bbox1, bbox2, mode="small")
     # Check if the overlap ratio exceeds the threshold
     # Check if the overlap ratio exceeds the threshold
@@ -683,7 +703,6 @@ def shrink_supplement_region_bbox(
     image_height,
     image_height,
     block_idxes_set,
     block_idxes_set,
     block_bboxes,
     block_bboxes,
-    parameters_config,
 ) -> List:
 ) -> List:
     """
     """
     Shrink the supplement region bbox according to the reference region bbox and match the block bboxes.
     Shrink the supplement region bbox according to the reference region bbox and match the block bboxes.
@@ -695,7 +714,6 @@ def shrink_supplement_region_bbox(
         image_height (int): The height of the image.
         image_height (int): The height of the image.
         block_idxes_set (set): The indexes of the blocks that intersect with the region bbox.
         block_idxes_set (set): The indexes of the blocks that intersect with the region bbox.
         block_bboxes (dict): The dictionary of block bboxes.
         block_bboxes (dict): The dictionary of block bboxes.
-        parameters_config (dict): The configuration parameters.
 
 
     Returns:
     Returns:
         list: The new region bbox and the matched block idxes.
         list: The new region bbox and the matched block idxes.
@@ -723,11 +741,11 @@ def shrink_supplement_region_bbox(
             overlap_ratio = calculate_overlap_ratio(
             overlap_ratio = calculate_overlap_ratio(
                 tmp_region_bbox, block_bboxes[block_idx], mode="small"
                 tmp_region_bbox, block_bboxes[block_idx], mode="small"
             )
             )
-            if overlap_ratio > parameters_config["region"].get(
+            if overlap_ratio > REGION_SETTINGS.get(
                 "match_block_overlap_ratio_threshold", 0.8
                 "match_block_overlap_ratio_threshold", 0.8
             ):
             ):
                 iner_block_idxes.append(block_idx)
                 iner_block_idxes.append(block_idx)
-            elif overlap_ratio > parameters_config["region"].get(
+            elif overlap_ratio > REGION_SETTINGS.get(
                 "split_block_overlap_ratio_threshold", 0.4
                 "split_block_overlap_ratio_threshold", 0.4
             ):
             ):
                 split_block_idxes.append(block_idx)
                 split_block_idxes.append(block_idx)
@@ -755,7 +773,6 @@ def shrink_supplement_region_bbox(
                         image_height,
                         image_height,
                         iner_block_idxes,
                         iner_block_idxes,
                         block_bboxes,
                         block_bboxes,
-                        parameters_config,
                     )
                     )
                     if len(iner_idxes) == 0:
                     if len(iner_idxes) == 0:
                         continue
                         continue
@@ -799,50 +816,68 @@ def convert_formula_res_to_ocr_format(formula_res_list: List, ocr_res: dict):
         ]
         ]
         ocr_res["dt_polys"].append(poly_points)
         ocr_res["dt_polys"].append(poly_points)
         ocr_res["rec_texts"].append(f"{formula_res['rec_formula']}")
         ocr_res["rec_texts"].append(f"{formula_res['rec_formula']}")
-        ocr_res["rec_boxes"] = np.vstack(
-            (ocr_res["rec_boxes"], [formula_res["dt_polys"]])
-        )
+        if ocr_res["rec_boxes"].size == 0:
+            ocr_res["rec_boxes"] = np.array(formula_res["dt_polys"])
+        else:
+            ocr_res["rec_boxes"] = np.vstack(
+                (ocr_res["rec_boxes"], [formula_res["dt_polys"]])
+            )
         ocr_res["rec_labels"].append("formula")
         ocr_res["rec_labels"].append("formula")
         ocr_res["rec_polys"].append(poly_points)
         ocr_res["rec_polys"].append(poly_points)
         ocr_res["rec_scores"].append(1)
         ocr_res["rec_scores"].append(1)
 
 
 
 
 def caculate_bbox_area(bbox):
 def caculate_bbox_area(bbox):
-    x1, y1, x2, y2 = bbox
+    x1, y1, x2, y2 = map(float, bbox)
     area = abs((x2 - x1) * (y2 - y1))
     area = abs((x2 - x1) * (y2 - y1))
     return area
     return area
 
 
 
 
-def get_show_color(label: str) -> Tuple:
-    label_colors = {
-        # Medium Blue (from 'titles_list')
-        "paragraph_title": (102, 102, 255, 100),
-        "doc_title": (255, 248, 220, 100),  # Cornsilk
-        # Light Yellow (from 'tables_caption_list')
-        "table_title": (255, 255, 102, 100),
-        # Sky Blue (from 'imgs_caption_list')
-        "figure_title": (102, 178, 255, 100),
-        "chart_title": (221, 160, 221, 100),  # Plum
-        "vision_footnote": (144, 238, 144, 100),  # Light Green
-        # Deep Purple (from 'texts_list')
-        "text": (153, 0, 76, 100),
-        # Bright Green (from 'interequations_list')
-        "formula": (0, 255, 0, 100),
-        "abstract": (255, 239, 213, 100),  # Papaya Whip
-        # Medium Green (from 'lists_list' and 'indexs_list')
-        "content": (40, 169, 92, 100),
-        # Neutral Gray (from 'dropped_bbox_list')
-        "seal": (158, 158, 158, 100),
-        # Olive Yellow (from 'tables_body_list')
-        "table": (204, 204, 0, 100),
-        # Bright Green (from 'imgs_body_list')
-        "image": (153, 255, 51, 100),
-        # Bright Green (from 'imgs_body_list')
-        "figure": (153, 255, 51, 100),
-        "chart": (216, 191, 216, 100),  # Thistle
-        # Pale Yellow-Green (from 'tables_footnote_list')
-        "reference": (229, 255, 204, 100),
-        "algorithm": (255, 250, 240, 100),  # Floral White
-    }
+def get_show_color(label: str, order_label=False) -> Tuple:
+    if order_label:
+        label_colors = {
+            "doc_title": (255, 248, 220, 100),  # Cornsilk
+            "doc_title_text": (255, 239, 213, 100),
+            "paragraph_title": (102, 102, 255, 100),
+            "sub_paragraph_title": (102, 178, 255, 100),
+            "vision": (153, 255, 51, 100),
+            "vision_title": (144, 238, 144, 100),  # Light Green
+            "vision_footnote": (144, 238, 144, 100),  # Light Green
+            "normal_text": (153, 0, 76, 100),
+            "cross_layout": (53, 218, 207, 100),  # Thistle
+            "cross_reference": (221, 160, 221, 100),  # Floral White
+        }
+    else:
+        label_colors = {
+            # Medium Blue (from 'titles_list')
+            "paragraph_title": (102, 102, 255, 100),
+            "doc_title": (255, 248, 220, 100),  # Cornsilk
+            # Light Yellow (from 'tables_caption_list')
+            "table_title": (255, 255, 102, 100),
+            # Sky Blue (from 'imgs_caption_list')
+            "figure_title": (102, 178, 255, 100),
+            "chart_title": (221, 160, 221, 100),  # Plum
+            "vision_footnote": (144, 238, 144, 100),  # Light Green
+            # Deep Purple (from 'texts_list')
+            "text": (153, 0, 76, 100),
+            # Bright Green (from 'interequations_list')
+            "formula": (0, 255, 0, 100),
+            "abstract": (255, 239, 213, 100),  # Papaya Whip
+            # Medium Green (from 'lists_list' and 'indexs_list')
+            "content": (40, 169, 92, 100),
+            # Neutral Gray (from 'dropped_bbox_list')
+            "seal": (158, 158, 158, 100),
+            # Olive Yellow (from 'tables_body_list')
+            "table": (204, 204, 0, 100),
+            # Bright Green (from 'imgs_body_list')
+            "image": (153, 255, 51, 100),
+            # Bright Green (from 'imgs_body_list')
+            "figure": (153, 255, 51, 100),
+            "chart": (216, 191, 216, 100),  # Thistle
+            # Pale Yellow-Green (from 'tables_footnote_list')
+            "reference": (229, 255, 204, 100),
+            # "reference_content": (229, 255, 204, 100),
+            "algorithm": (255, 250, 240, 100),  # Floral White
+        }
     default_color = (158, 158, 158, 100)
     default_color = (158, 158, 158, 100)
     return label_colors.get(label, default_color)
     return label_colors.get(label, default_color)

+ 319 - 147
paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py

@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # limitations under the License.
 
 
-from typing import Dict, List, Tuple
+from typing import List, Tuple
 
 
 import numpy as np
 import numpy as np
 
 
-from ..result_v2 import LayoutParsingBlock
+from ..result_v2 import LayoutParsingBlock, LayoutParsingRegion
+from ..setting import BLOCK_LABEL_MAP, XYCUT_SETTINGS
 from ..utils import calculate_projection_overlap_ratio
 from ..utils import calculate_projection_overlap_ratio
 
 
 
 
@@ -26,12 +27,12 @@ def get_nearest_edge_distance(
     weight: List[float] = [1.0, 1.0, 1.0, 1.0],
     weight: List[float] = [1.0, 1.0, 1.0, 1.0],
 ) -> Tuple[float]:
 ) -> Tuple[float]:
     """
     """
-    Calculate the nearest edge distance between two bounding boxes, considering orientational weights.
+    Calculate the nearest edge distance between two bounding boxes, considering directional weights.
 
 
     Args:
     Args:
         bbox1 (list): The bounding box coordinates [x1, y1, x2, y2] of the input object.
         bbox1 (list): The bounding box coordinates [x1, y1, x2, y2] of the input object.
         bbox2 (list): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
         bbox2 (list): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
-        weight (list, optional): orientational weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
+        weight (list, optional): directional weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
 
 
     Returns:
     Returns:
         float: The calculated minimum edge distance between the bounding boxes.
         float: The calculated minimum edge distance between the bounding boxes.
@@ -254,8 +255,7 @@ def recursive_xy_cut(
 def reference_insert(
 def reference_insert(
     block: LayoutParsingBlock,
     block: LayoutParsingBlock,
     sorted_blocks: List[LayoutParsingBlock],
     sorted_blocks: List[LayoutParsingBlock],
-    config: Dict,
-    median_width: float = 0.0,
+    **kwargs,
 ):
 ):
     """
     """
     Insert reference block into sorted blocks based on the distance between the block and the nearest sorted block.
     Insert reference block into sorted blocks based on the distance between the block and the nearest sorted block.
@@ -285,8 +285,7 @@ def reference_insert(
 def manhattan_insert(
 def manhattan_insert(
     block: LayoutParsingBlock,
     block: LayoutParsingBlock,
     sorted_blocks: List[LayoutParsingBlock],
     sorted_blocks: List[LayoutParsingBlock],
-    config: Dict,
-    median_width: float = 0.0,
+    **kwargs,
 ):
 ):
     """
     """
     Insert a block into a sorted list of blocks based on the Manhattan distance between the block and the nearest sorted block.
     Insert a block into a sorted list of blocks based on the Manhattan distance between the block and the nearest sorted block.
@@ -315,8 +314,7 @@ def manhattan_insert(
 def weighted_distance_insert(
 def weighted_distance_insert(
     block: LayoutParsingBlock,
     block: LayoutParsingBlock,
     sorted_blocks: List[LayoutParsingBlock],
     sorted_blocks: List[LayoutParsingBlock],
-    config: Dict,
-    median_width: float = 0.0,
+    region: LayoutParsingRegion,
 ):
 ):
     """
     """
     Insert a block into a sorted list of blocks based on the weighted distance between the block and the nearest sorted block.
     Insert a block into a sorted list of blocks based on the weighted distance between the block and the nearest sorted block.
@@ -330,11 +328,8 @@ def weighted_distance_insert(
     Returns:
     Returns:
         sorted_blocks: The updated sorted blocks after insertion.
         sorted_blocks: The updated sorted blocks after insertion.
     """
     """
-    doc_title_labels = config.get("doc_title_labels", [])
-    paragraph_title_labels = config.get("paragraph_title_labels", [])
-    vision_labels = config.get("vision_labels", [])
-    xy_cut_block_labels = config.get("xy_cut_block_labels", [])
-    tolerance_len = config.get("tolerance_len", 2)
+
+    tolerance_len = XYCUT_SETTINGS["edge_distance_compare_tolerance_len"]
     x1, y1, x2, y2 = block.bbox
     x1, y1, x2, y2 = block.bbox
     min_weighted_distance, min_edge_distance, min_up_edge_distance = (
     min_weighted_distance, min_edge_distance, min_up_edge_distance = (
         float("inf"),
         float("inf"),
@@ -347,36 +342,43 @@ def weighted_distance_insert(
         x1_prime, y1_prime, x2_prime, y2_prime = sorted_block.bbox
         x1_prime, y1_prime, x2_prime, y2_prime = sorted_block.bbox
 
 
         # Calculate edge distance
         # Calculate edge distance
-        weight = _get_weights(block.order_label, block.orientation)
+        weight = _get_weights(block.order_label, block.direction)
         edge_distance = get_nearest_edge_distance(block.bbox, sorted_block.bbox, weight)
         edge_distance = get_nearest_edge_distance(block.bbox, sorted_block.bbox, weight)
 
 
-        if block.label in doc_title_labels:
-            disperse = max(1, median_width)
+        if block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
+            disperse = max(1, region.text_line_width)
             tolerance_len = max(tolerance_len, disperse)
             tolerance_len = max(tolerance_len, disperse)
         if block.label == "abstract":
         if block.label == "abstract":
             tolerance_len *= 2
             tolerance_len *= 2
             edge_distance = max(0.1, edge_distance) * 10
             edge_distance = max(0.1, edge_distance) * 10
 
 
         # Calculate up edge distances
         # Calculate up edge distances
-        up_edge_distance = y1_prime
-        left_edge_distance = x1_prime
+        up_edge_distance = y1_prime if region.direction == "horizontal" else -x2_prime
+        left_edge_distance = x1_prime if region.direction == "horizontal" else y1_prime
+        is_below_sorted_block = (
+            y2_prime < y1 if region.direction == "horizontal" else x1_prime > x2
+        )
+
         if (
         if (
-            block.label in xy_cut_block_labels
-            or block.label in doc_title_labels
-            or block.label in paragraph_title_labels
-            or block.label in vision_labels
-        ) and y1 > y2_prime:
-            up_edge_distance = -y2_prime
-            left_edge_distance = -x2_prime
+            block.label not in BLOCK_LABEL_MAP["unordered_labels"]
+            or block.label in BLOCK_LABEL_MAP["doc_title_labels"]
+            or block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]
+            or block.label in BLOCK_LABEL_MAP["vision_labels"]
+        ) and is_below_sorted_block:
+            up_edge_distance = -up_edge_distance
+            left_edge_distance = -left_edge_distance
 
 
         if abs(min_up_edge_distance - up_edge_distance) <= tolerance_len:
         if abs(min_up_edge_distance - up_edge_distance) <= tolerance_len:
             up_edge_distance = min_up_edge_distance
             up_edge_distance = min_up_edge_distance
 
 
         # Calculate weighted distance
         # Calculate weighted distance
         weighted_distance = (
         weighted_distance = (
-            +edge_distance * config.get("edge_weight", 10**4)
-            + up_edge_distance * config.get("up_edge_weight", 1)
-            + left_edge_distance * config.get("left_edge_weight", 0.0001)
+            +edge_distance
+            * XYCUT_SETTINGS["distance_weight_map"].get("edge_weight", 10**4)
+            + up_edge_distance
+            * XYCUT_SETTINGS["distance_weight_map"].get("up_edge_weight", 1)
+            + left_edge_distance
+            * XYCUT_SETTINGS["distance_weight_map"].get("left_edge_weight", 0.0001)
         )
         )
 
 
         min_edge_distance = min(edge_distance, min_edge_distance)
         min_edge_distance = min(edge_distance, min_edge_distance)
@@ -411,7 +413,7 @@ def insert_child_blocks(
     if block.child_blocks:
     if block.child_blocks:
         sub_blocks = block.get_child_blocks()
         sub_blocks = block.get_child_blocks()
         sub_blocks.append(block)
         sub_blocks.append(block)
-        sub_blocks = sort_child_blocks(sub_blocks, block.orientation)
+        sub_blocks = sort_child_blocks(sub_blocks, block.direction)
         sorted_blocks[block_idx] = sub_blocks[0]
         sorted_blocks[block_idx] = sub_blocks[0]
         for block in sub_blocks[1:]:
         for block in sub_blocks[1:]:
             block_idx += 1
             block_idx += 1
@@ -419,17 +421,17 @@ def insert_child_blocks(
     return sorted_blocks
     return sorted_blocks
 
 
 
 
-def sort_child_blocks(blocks, orientation="horizontal") -> List[LayoutParsingBlock]:
+def sort_child_blocks(blocks, direction="horizontal") -> List[LayoutParsingBlock]:
     """
     """
     Sort child blocks based on their bounding box coordinates.
     Sort child blocks based on their bounding box coordinates.
 
 
     Args:
     Args:
         blocks: A list of LayoutParsingBlock objects representing the child blocks.
         blocks: A list of LayoutParsingBlock objects representing the child blocks.
-        orientation: Orientation of the blocks ('horizontal' or 'vertical'). Default is 'horizontal'.
+        direction: direction of the blocks ('horizontal' or 'vertical'). Default is 'horizontal'.
     Returns:
     Returns:
         sorted_blocks: A sorted list of LayoutParsingBlock objects.
         sorted_blocks: A sorted list of LayoutParsingBlock objects.
     """
     """
-    if orientation == "horizontal":
+    if direction == "horizontal":
         # from top to bottom
         # from top to bottom
         blocks.sort(
         blocks.sort(
             key=lambda x: (
             key=lambda x: (
@@ -453,7 +455,7 @@ def sort_child_blocks(blocks, orientation="horizontal") -> List[LayoutParsingBlo
 
 
 
 
 def _get_weights(label, dircetion="horizontal"):
 def _get_weights(label, dircetion="horizontal"):
-    """Define weights based on the label and orientation."""
+    """Define weights based on the label and direction."""
     if label == "doc_title":
     if label == "doc_title":
         return (
         return (
             [1, 0.1, 0.1, 1] if dircetion == "horizontal" else [0.2, 0.1, 1, 1]
             [1, 0.1, 0.1, 1] if dircetion == "horizontal" else [0.2, 0.1, 1, 1]
@@ -518,15 +520,35 @@ def sort_blocks(blocks, median_width=None, reverse=False):
     return blocks
     return blocks
 
 
 
 
+def sort_normal_blocks(blocks, text_line_height, text_line_width, region_direction):
+    if region_direction == "horizontal":
+        blocks.sort(
+            key=lambda x: (
+                x.bbox[1] // text_line_height,
+                x.bbox[0] // text_line_width,
+                x.bbox[1] ** 2 + x.bbox[0] ** 2,
+            ),
+        )
+    else:
+        blocks.sort(
+            key=lambda x: (
+                -x.bbox[0] // text_line_width,
+                x.bbox[1] // text_line_height,
+                -(x.bbox[2] ** 2 + x.bbox[1] ** 2),
+            ),
+        )
+    return blocks
+
+
 def get_cut_blocks(
 def get_cut_blocks(
-    blocks, cut_orientation, cut_coordinates, overall_region_box, mask_labels=[]
+    blocks, cut_direction, cut_coordinates, overall_region_box, mask_labels=[]
 ):
 ):
     """
     """
-    Cut blocks based on the given cut orientation and coordinates.
+    Cut blocks based on the given cut direction and coordinates.
 
 
     Args:
     Args:
         blocks (list): list of blocks to be cut.
         blocks (list): list of blocks to be cut.
-        cut_orientation (str): cut orientation, either "horizontal" or "vertical".
+        cut_direction (str): cut direction, either "horizontal" or "vertical".
         cut_coordinates (list): list of cut coordinates.
         cut_coordinates (list): list of cut coordinates.
         overall_region_box (list): the overall region box that contains all blocks.
         overall_region_box (list): the overall region box that contains all blocks.
 
 
@@ -537,7 +559,7 @@ def get_cut_blocks(
     # filter out mask blocks,including header, footer, unordered and child_blocks
     # filter out mask blocks,including header, footer, unordered and child_blocks
 
 
     # 0: horizontal, 1: vertical
     # 0: horizontal, 1: vertical
-    cut_aixis = 0 if cut_orientation == "horizontal" else 1
+    cut_aixis = 0 if cut_direction == "horizontal" else 1
     blocks.sort(key=lambda x: x.bbox[cut_aixis + 2])
     blocks.sort(key=lambda x: x.bbox[cut_aixis + 2])
     cut_coordinates.append(float("inf"))
     cut_coordinates.append(float("inf"))
 
 
@@ -567,7 +589,7 @@ def add_split_block(
 ) -> List[LayoutParsingBlock]:
 ) -> List[LayoutParsingBlock]:
     block_bboxes = np.array([block.bbox for block in blocks])
     block_bboxes = np.array([block.bbox for block in blocks])
     discontinuous = calculate_discontinuous_projection(
     discontinuous = calculate_discontinuous_projection(
-        block_bboxes, orientation="vertical"
+        block_bboxes, direction="vertical"
     )
     )
     current_interval = discontinuous[0]
     current_interval = discontinuous[0]
     for interval in discontinuous[1:]:
     for interval in discontinuous[1:]:
@@ -582,22 +604,62 @@ def add_split_block(
         current_interval = interval
         current_interval = interval
 
 
 
 
-def get_adjacent_blocks_by_orientation(
+def get_nearest_blocks(
+    block: LayoutParsingBlock,
+    ref_blocks: List[LayoutParsingBlock],
+    overlap_threshold,
+    direction="horizontal",
+) -> List:
+    """
+    Get the adjacent blocks with the same direction as the current block.
+    Args:
+        block (LayoutParsingBlock): The current block.
+        blocks (List[LayoutParsingBlock]): A list of all blocks.
+        ref_block_idxes (List[int]): A list of indices of reference blocks.
+        iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
+    Returns:
+        Int: The index of the previous block with same direction.
+        Int: The index of the following block with same direction.
+    """
+    prev_blocks: List[LayoutParsingBlock] = []
+    post_blocks: List[LayoutParsingBlock] = []
+    sort_index = 1 if direction == "horizontal" else 0
+    for ref_block in ref_blocks:
+        if ref_block.index == block.index:
+            continue
+        overlap_ratio = calculate_projection_overlap_ratio(
+            block.bbox, ref_block.bbox, direction, mode="small"
+        )
+        if overlap_ratio > overlap_threshold:
+            if ref_block.bbox[sort_index] <= block.bbox[sort_index]:
+                prev_blocks.append(ref_block)
+            else:
+                post_blocks.append(ref_block)
+
+    if prev_blocks:
+        prev_blocks.sort(key=lambda x: x.bbox[sort_index], reverse=True)
+    if post_blocks:
+        post_blocks.sort(key=lambda x: x.bbox[sort_index])
+
+    return prev_blocks, post_blocks
+
+
+def get_adjacent_blocks_by_direction(
     blocks: List[LayoutParsingBlock],
     blocks: List[LayoutParsingBlock],
     block_idx: int,
     block_idx: int,
     ref_block_idxes: List[int],
     ref_block_idxes: List[int],
     iou_threshold,
     iou_threshold,
 ) -> List:
 ) -> List:
     """
     """
-    Get the adjacent blocks with the same orientation as the current block.
+    Get the adjacent blocks with the same direction as the current block.
     Args:
     Args:
         block (LayoutParsingBlock): The current block.
         block (LayoutParsingBlock): The current block.
         blocks (List[LayoutParsingBlock]): A list of all blocks.
         blocks (List[LayoutParsingBlock]): A list of all blocks.
         ref_block_idxes (List[int]): A list of indices of reference blocks.
         ref_block_idxes (List[int]): A list of indices of reference blocks.
         iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
         iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
     Returns:
     Returns:
-        Int: The index of the previous block with same orientation.
-        Int: The index of the following block with same orientation.
+        Int: The index of the previous block with same direction.
+        Int: The index of the following block with same direction.
     """
     """
     min_prev_block_distance = float("inf")
     min_prev_block_distance = float("inf")
     prev_block_index = None
     prev_block_index = None
@@ -611,16 +673,16 @@ def get_adjacent_blocks_by_orientation(
         "vision_title",
         "vision_title",
     ]
     ]
 
 
-    # find the nearest text block with same orientation to the current block
+    # find the nearest text block with same direction to the current block
     for ref_block_idx in ref_block_idxes:
     for ref_block_idx in ref_block_idxes:
         ref_block = blocks[ref_block_idx]
         ref_block = blocks[ref_block_idx]
-        ref_block_orientation = ref_block.orientation
+        ref_block_direction = ref_block.direction
         if ref_block.order_label in child_labels:
         if ref_block.order_label in child_labels:
             continue
             continue
         match_block_iou = calculate_projection_overlap_ratio(
         match_block_iou = calculate_projection_overlap_ratio(
             block.bbox,
             block.bbox,
             ref_block.bbox,
             ref_block.bbox,
-            ref_block_orientation,
+            ref_block_direction,
         )
         )
 
 
         child_match_distance_tolerance_len = block.short_side_length / 10
         child_match_distance_tolerance_len = block.short_side_length / 10
@@ -635,38 +697,38 @@ def get_adjacent_blocks_by_orientation(
 
 
         if match_block_iou >= iou_threshold:
         if match_block_iou >= iou_threshold:
             prev_distance = (
             prev_distance = (
-                block.secondary_orientation_start_coordinate
-                - ref_block.secondary_orientation_end_coordinate
+                block.secondary_direction_start_coordinate
+                - ref_block.secondary_direction_end_coordinate
                 + child_match_distance_tolerance_len
                 + child_match_distance_tolerance_len
             ) // 5 + ref_block.start_coordinate / 5000
             ) // 5 + ref_block.start_coordinate / 5000
             next_distance = (
             next_distance = (
-                ref_block.secondary_orientation_start_coordinate
-                - block.secondary_orientation_end_coordinate
+                ref_block.secondary_direction_start_coordinate
+                - block.secondary_direction_end_coordinate
                 + child_match_distance_tolerance_len
                 + child_match_distance_tolerance_len
             ) // 5 + ref_block.start_coordinate / 5000
             ) // 5 + ref_block.start_coordinate / 5000
             if (
             if (
-                ref_block.secondary_orientation_end_coordinate
-                <= block.secondary_orientation_start_coordinate
+                ref_block.secondary_direction_end_coordinate
+                <= block.secondary_direction_start_coordinate
                 + child_match_distance_tolerance_len
                 + child_match_distance_tolerance_len
                 and prev_distance < min_prev_block_distance
                 and prev_distance < min_prev_block_distance
             ):
             ):
                 min_prev_block_distance = prev_distance
                 min_prev_block_distance = prev_distance
                 if (
                 if (
-                    block.secondary_orientation_start_coordinate
-                    - ref_block.secondary_orientation_end_coordinate
+                    block.secondary_direction_start_coordinate
+                    - ref_block.secondary_direction_end_coordinate
                     < gap_tolerance_len
                     < gap_tolerance_len
                 ):
                 ):
                     prev_block_index = ref_block_idx
                     prev_block_index = ref_block_idx
             elif (
             elif (
-                ref_block.secondary_orientation_start_coordinate
-                > block.secondary_orientation_end_coordinate
+                ref_block.secondary_direction_start_coordinate
+                > block.secondary_direction_end_coordinate
                 - child_match_distance_tolerance_len
                 - child_match_distance_tolerance_len
                 and next_distance < min_post_block_distance
                 and next_distance < min_post_block_distance
             ):
             ):
                 min_post_block_distance = next_distance
                 min_post_block_distance = next_distance
                 if (
                 if (
-                    ref_block.secondary_orientation_start_coordinate
-                    - block.secondary_orientation_end_coordinate
+                    ref_block.secondary_direction_start_coordinate
+                    - block.secondary_direction_end_coordinate
                     < gap_tolerance_len
                     < gap_tolerance_len
                 ):
                 ):
                     post_block_index = ref_block_idx
                     post_block_index = ref_block_idx
@@ -684,21 +746,19 @@ def get_adjacent_blocks_by_orientation(
 
 
 
 
 def update_doc_title_child_blocks(
 def update_doc_title_child_blocks(
-    blocks: List[LayoutParsingBlock],
     block: LayoutParsingBlock,
     block: LayoutParsingBlock,
-    prev_idx: int,
-    post_idx: int,
-    config: dict,
+    region: LayoutParsingRegion,
 ) -> None:
 ) -> None:
     """
     """
     Update the child blocks of a document title block.
     Update the child blocks of a document title block.
 
 
     The child blocks need to meet the following conditions:
     The child blocks need to meet the following conditions:
         1. They must be adjacent
         1. They must be adjacent
-        2. They must have the same orientation as the parent block.
+        2. They must have the same direction as the parent block.
         3. Their short side length should be less than 80% of the parent's short side length.
         3. Their short side length should be less than 80% of the parent's short side length.
         4. Their long side length should be less than 150% of the parent's long side length.
         4. Their long side length should be less than 150% of the parent's long side length.
         5. The child block must be text block.
         5. The child block must be text block.
+        6. The nearest edge distance should be less than 2 times of the text line height.
 
 
     Args:
     Args:
         blocks (List[LayoutParsingBlock]): overall blocks.
         blocks (List[LayoutParsingBlock]): overall blocks.
@@ -711,11 +771,23 @@ def update_doc_title_child_blocks(
         None
         None
 
 
     """
     """
-    for idx in [prev_idx, post_idx]:
-        if idx is None:
+    ref_blocks = [region.block_map[idx] for idx in region.normal_text_block_idxes]
+    overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
+    prev_blocks, post_blocks = get_nearest_blocks(
+        block, ref_blocks, overlap_threshold, block.direction
+    )
+    prev_block = None
+    post_block = None
+
+    if prev_blocks:
+        prev_block = prev_blocks[0]
+    if post_blocks:
+        post_block = post_blocks[0]
+
+    for ref_block in [prev_block, post_block]:
+        if ref_block is None:
             continue
             continue
-        ref_block = blocks[idx]
-        with_seem_orientation = ref_block.orientation == block.orientation
+        with_seem_direction = ref_block.direction == block.direction
 
 
         short_side_length_condition = (
         short_side_length_condition = (
             ref_block.short_side_length < block.short_side_length * 0.8
             ref_block.short_side_length < block.short_side_length * 0.8
@@ -726,30 +798,31 @@ def update_doc_title_child_blocks(
             or ref_block.long_side_length > 1.5 * block.long_side_length
             or ref_block.long_side_length > 1.5 * block.long_side_length
         )
         )
 
 
+        nearest_edge_distance = get_nearest_edge_distance(block.bbox, ref_block.bbox)
+
         if (
         if (
-            with_seem_orientation
+            with_seem_direction
+            and ref_block.label in BLOCK_LABEL_MAP["text_labels"]
             and short_side_length_condition
             and short_side_length_condition
             and long_side_length_condition
             and long_side_length_condition
             and ref_block.num_of_lines < 3
             and ref_block.num_of_lines < 3
+            and nearest_edge_distance < ref_block.text_line_height * 2
         ):
         ):
             ref_block.order_label = "doc_title_text"
             ref_block.order_label = "doc_title_text"
             block.append_child_block(ref_block)
             block.append_child_block(ref_block)
-            config["text_block_idxes"].remove(idx)
+            region.normal_text_block_idxes.remove(ref_block.index)
 
 
 
 
 def update_paragraph_title_child_blocks(
 def update_paragraph_title_child_blocks(
-    blocks: List[LayoutParsingBlock],
     block: LayoutParsingBlock,
     block: LayoutParsingBlock,
-    prev_idx: int,
-    post_idx: int,
-    config: dict,
+    region: LayoutParsingRegion,
 ) -> None:
 ) -> None:
     """
     """
     Update the child blocks of a paragraph title block.
     Update the child blocks of a paragraph title block.
 
 
     The child blocks need to meet the following conditions:
     The child blocks need to meet the following conditions:
         1. They must be adjacent
         1. They must be adjacent
-        2. They must have the same orientation as the parent block.
+        2. They must have the same direction as the parent block.
         3. The child block must be paragraph title block.
         3. The child block must be paragraph title block.
 
 
     Args:
     Args:
@@ -763,31 +836,39 @@ def update_paragraph_title_child_blocks(
         None
         None
 
 
     """
     """
-    paragraph_title_labels = config.get("paragraph_title_labels", [])
-    for idx in [prev_idx, post_idx]:
-        if idx is None:
-            continue
-        ref_block = blocks[idx]
-        min_height = min(block.height, ref_block.height)
-        nearest_edge_distance = get_nearest_edge_distance(block.bbox, ref_block.bbox)
-        with_seem_orientation = ref_block.orientation == block.orientation
-        if (
-            with_seem_orientation
-            and ref_block.label in paragraph_title_labels
-            and nearest_edge_distance <= min_height * 2
-        ):
-            ref_block.order_label = "sub_paragraph_title"
-            block.append_child_block(ref_block)
-            config["paragraph_title_block_idxes"].remove(idx)
+    if block.order_label == "sub_paragraph_title":
+        return
+    ref_blocks = [
+        region.block_map[idx]
+        for idx in region.paragraph_title_block_idxes + region.normal_text_block_idxes
+    ]
+    overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
+    prev_blocks, post_blocks = get_nearest_blocks(
+        block, ref_blocks, overlap_threshold, block.direction
+    )
+    for ref_blocks in [prev_blocks, post_blocks]:
+        for ref_block in ref_blocks:
+            if ref_block.label not in BLOCK_LABEL_MAP["paragraph_title_labels"]:
+                break
+            min_text_line_height = min(
+                block.text_line_height, ref_block.text_line_height
+            )
+            nearest_edge_distance = get_nearest_edge_distance(
+                block.bbox, ref_block.bbox
+            )
+            with_seem_direction = ref_block.direction == block.direction
+            if (
+                with_seem_direction
+                and nearest_edge_distance <= min_text_line_height * 1.5
+            ):
+                ref_block.order_label = "sub_paragraph_title"
+                block.append_child_block(ref_block)
+                region.paragraph_title_block_idxes.remove(ref_block.index)
 
 
 
 
 def update_vision_child_blocks(
 def update_vision_child_blocks(
-    blocks: List[LayoutParsingBlock],
     block: LayoutParsingBlock,
     block: LayoutParsingBlock,
-    ref_block_idxes: List[int],
-    prev_idx: int,
-    post_idx: int,
-    config: dict,
+    region: LayoutParsingRegion,
 ) -> None:
 ) -> None:
     """
     """
     Update the child blocks of a paragraph title block.
     Update the child blocks of a paragraph title block.
@@ -816,69 +897,122 @@ def update_vision_child_blocks(
         None
         None
 
 
     """
     """
-    vision_title_labels = config.get("vision_title_labels", [])
-    text_labels = config.get("text_labels", [])
-    for idx in [prev_idx, post_idx]:
-        if idx is None:
-            continue
-        ref_block = blocks[idx]
-        nearest_edge_distance = get_nearest_edge_distance(block.bbox, ref_block.bbox)
-        block_center = block.get_centroid()
-        ref_block_center = ref_block.get_centroid()
-        if ref_block.label in vision_title_labels and nearest_edge_distance <= min(
-            block.height * 0.5, ref_block.height * 2
-        ):
-            ref_block.order_label = "vision_title"
-            block.append_child_block(ref_block)
-            config["vision_title_block_idxes"].remove(idx)
-        elif (
-            nearest_edge_distance <= 15
-            and ref_block.short_side_length < block.short_side_length
-            and ref_block.long_side_length < 0.5 * block.long_side_length
-            and ref_block.orientation == block.orientation
-            and (
-                abs(block_center[0] - ref_block_center[0]) < 10
-                or (
-                    block.bbox[0] - ref_block.bbox[0] < 10
-                    and ref_block.num_of_lines == 1
-                )
-                or (
-                    block.bbox[2] - ref_block.bbox[2] < 10
-                    and ref_block.num_of_lines == 1
-                )
+    ref_blocks = [
+        region.block_map[idx]
+        for idx in region.normal_text_block_idxes + region.vision_title_block_idxes
+    ]
+    overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
+    has_vision_footnote = False
+    has_vision_title = False
+    for direction in [block.direction, block.secondary_direction]:
+        prev_blocks, post_blocks = get_nearest_blocks(
+            block, ref_blocks, overlap_threshold, direction
+        )
+        for ref_block in prev_blocks:
+            if (
+                ref_block.label
+                not in BLOCK_LABEL_MAP["text_labels"]
+                + BLOCK_LABEL_MAP["vision_title_labels"]
+            ):
+                break
+            nearest_edge_distance = get_nearest_edge_distance(
+                block.bbox, ref_block.bbox
             )
             )
-        ):
-            has_vision_footnote = False
-            if len(block.child_blocks) > 0:
-                for child_block in block.child_blocks:
-                    if child_block.label in text_labels:
-                        has_vision_footnote = True
-            if not has_vision_footnote:
-                ref_block.order_label = "vision_footnote"
+            block_center = block.get_centroid()
+            ref_block_center = ref_block.get_centroid()
+            if ref_block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
+                has_vision_title = True
+                ref_block.order_label = "vision_title"
                 block.append_child_block(ref_block)
                 block.append_child_block(ref_block)
-                config["text_block_idxes"].remove(idx)
+                region.vision_title_block_idxes.remove(ref_block.index)
+            if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
+                if (
+                    not has_vision_footnote
+                    and nearest_edge_distance <= block.text_line_height * 2
+                    and ref_block.short_side_length < block.short_side_length
+                    and ref_block.long_side_length < 0.5 * block.long_side_length
+                    and ref_block.direction == block.direction
+                    and (
+                        abs(block_center[0] - ref_block_center[0]) < 10
+                        or (
+                            block.bbox[0] - ref_block.bbox[0] < 10
+                            and ref_block.num_of_lines == 1
+                        )
+                        or (
+                            block.bbox[2] - ref_block.bbox[2] < 10
+                            and ref_block.num_of_lines == 1
+                        )
+                    )
+                ):
+                    has_vision_footnote = True
+                    ref_block.order_label = "vision_footnote"
+                    block.append_child_block(ref_block)
+                    region.normal_text_block_idxes.remove(ref_block.index)
+                break
+        for ref_block in post_blocks:
+            if (
+                has_vision_footnote
+                and ref_block.label in BLOCK_LABEL_MAP["text_labels"]
+            ):
+                break
+            nearest_edge_distance = get_nearest_edge_distance(
+                block.bbox, ref_block.bbox
+            )
+            block_center = block.get_centroid()
+            ref_block_center = ref_block.get_centroid()
+            if ref_block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
+                has_vision_title = True
+                ref_block.order_label = "vision_title"
+                block.append_child_block(ref_block)
+                region.vision_title_block_idxes.remove(ref_block.index)
+            if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
+                if (
+                    not has_vision_footnote
+                    and nearest_edge_distance <= block.text_line_height * 2
+                    and ref_block.short_side_length < block.short_side_length
+                    and ref_block.long_side_length < 0.5 * block.long_side_length
+                    and ref_block.direction == block.direction
+                    and (
+                        abs(block_center[0] - ref_block_center[0]) < 10
+                        or (
+                            block.bbox[0] - ref_block.bbox[0] < 10
+                            and ref_block.num_of_lines == 1
+                        )
+                        or (
+                            block.bbox[2] - ref_block.bbox[2] < 10
+                            and ref_block.num_of_lines == 1
+                        )
+                    )
+                ):
+                    has_vision_footnote = True
+                    ref_block.order_label = "vision_footnote"
+                    block.append_child_block(ref_block)
+                    region.normal_text_block_idxes.remove(ref_block.index)
+                break
+        if has_vision_title:
+            break
 
 
 
 
 def calculate_discontinuous_projection(
 def calculate_discontinuous_projection(
-    boxes, orientation="horizontal", return_num=False
+    boxes, direction="horizontal", return_num=False
 ) -> List:
 ) -> List:
     """
     """
-    Calculate the discontinuous projection of boxes along the specified orientation.
+    Calculate the discontinuous projection of boxes along the specified direction.
 
 
     Args:
     Args:
         boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
         boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
-        orientation (str): orientation along which to perform the projection ('horizontal' or 'vertical').
+        direction (str): direction along which to perform the projection ('horizontal' or 'vertical').
 
 
     Returns:
     Returns:
         list: List of tuples representing the merged intervals.
         list: List of tuples representing the merged intervals.
     """
     """
     boxes = np.array(boxes)
     boxes = np.array(boxes)
-    if orientation == "horizontal":
+    if direction == "horizontal":
         intervals = boxes[:, [0, 2]]
         intervals = boxes[:, [0, 2]]
-    elif orientation == "vertical":
+    elif direction == "vertical":
         intervals = boxes[:, [1, 3]]
         intervals = boxes[:, [1, 3]]
     else:
     else:
-        raise ValueError("orientation must be 'horizontal' or 'vertical'")
+        raise ValueError("direction must be 'horizontal' or 'vertical'")
 
 
     intervals = intervals[np.argsort(intervals[:, 0])]
     intervals = intervals[np.argsort(intervals[:, 0])]
 
 
@@ -904,15 +1038,53 @@ def calculate_discontinuous_projection(
     return merged_intervals
     return merged_intervals
 
 
 
 
+def is_projection_consistent(blocks, intervals, direction="horizontal"):
+
+    for interval in intervals:
+        if direction == "horizontal":
+            start_index, stop_index = 0, 2
+            interval_box = [interval[0], 0, interval[1], 1]
+        else:
+            start_index, stop_index = 1, 3
+            interval_box = [0, interval[0], 1, interval[1]]
+        same_interval_bboxes = []
+        for block in blocks:
+            overlap_ratio = calculate_projection_overlap_ratio(
+                interval_box, block.bbox, direction=direction
+            )
+            if overlap_ratio > 0 and block.label in BLOCK_LABEL_MAP["text_labels"]:
+                same_interval_bboxes.append(block.bbox)
+        start_coordinates = [bbox[start_index] for bbox in same_interval_bboxes]
+        if start_coordinates:
+            min_start_coordinate = min(start_coordinates)
+            max_start_coordinate = max(start_coordinates)
+            is_start_consistent = (
+                False
+                if max_start_coordinate - min_start_coordinate
+                >= abs(interval[0] - interval[1]) * 0.05
+                else True
+            )
+            stop_coordinates = [bbox[stop_index] for bbox in same_interval_bboxes]
+            min_stop_coordinate = min(stop_coordinates)
+            max_stop_coordinate = max(stop_coordinates)
+            if (
+                max_stop_coordinate - min_stop_coordinate
+                >= abs(interval[0] - interval[1]) * 0.05
+                and is_start_consistent
+            ):
+                return False
+    return True
+
+
 def shrink_overlapping_boxes(
 def shrink_overlapping_boxes(
-    boxes, orientation="horizontal", min_threshold=0, max_threshold=0.1
+    boxes, direction="horizontal", min_threshold=0, max_threshold=0.1
 ) -> List:
 ) -> List:
     """
     """
-    Shrink overlapping boxes along the specified orientation.
+    Shrink overlapping boxes along the specified direction.
 
 
     Args:
     Args:
         boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
         boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
-        orientation (str): orientation along which to perform the shrinking ('horizontal' or 'vertical').
+        direction (str): direction along which to perform the shrinking ('horizontal' or 'vertical').
         min_threshold (float): Minimum threshold for shrinking. Default is 0.
         min_threshold (float): Minimum threshold for shrinking. Default is 0.
         max_threshold (float): Maximum threshold for shrinking. Default is 0.2.
         max_threshold (float): Maximum threshold for shrinking. Default is 0.2.
 
 
@@ -924,14 +1096,14 @@ def shrink_overlapping_boxes(
         x1, y1, x2, y2 = current_block.bbox
         x1, y1, x2, y2 = current_block.bbox
         x1_prime, y1_prime, x2_prime, y2_prime = block.bbox
         x1_prime, y1_prime, x2_prime, y2_prime = block.bbox
         cut_iou = calculate_projection_overlap_ratio(
         cut_iou = calculate_projection_overlap_ratio(
-            current_block.bbox, block.bbox, orientation=orientation
+            current_block.bbox, block.bbox, direction=direction
         )
         )
         match_iou = calculate_projection_overlap_ratio(
         match_iou = calculate_projection_overlap_ratio(
             current_block.bbox,
             current_block.bbox,
             block.bbox,
             block.bbox,
-            orientation="horizontal" if orientation == "vertical" else "vertical",
+            direction="horizontal" if direction == "vertical" else "vertical",
         )
         )
-        if orientation == "vertical":
+        if direction == "vertical":
             if (
             if (
                 (match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
                 (match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
                 or y2 == y1_prime
                 or y2 == y1_prime

+ 198 - 200
paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py

@@ -12,24 +12,25 @@
 # See the License for the specific language governing permissions and
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # limitations under the License.
 
 
-from typing import Any, Dict, List, Tuple
+from typing import Dict, List, Tuple
 
 
 import numpy as np
 import numpy as np
 
 
-from ..result_v2 import LayoutParsingBlock
+from ..result_v2 import LayoutParsingBlock, LayoutParsingRegion
+from ..setting import BLOCK_LABEL_MAP
 from ..utils import calculate_overlap_ratio, calculate_projection_overlap_ratio
 from ..utils import calculate_overlap_ratio, calculate_projection_overlap_ratio
 from .utils import (
 from .utils import (
     calculate_discontinuous_projection,
     calculate_discontinuous_projection,
-    get_adjacent_blocks_by_orientation,
     get_cut_blocks,
     get_cut_blocks,
     get_nearest_edge_distance,
     get_nearest_edge_distance,
     insert_child_blocks,
     insert_child_blocks,
+    is_projection_consistent,
     manhattan_insert,
     manhattan_insert,
     recursive_xy_cut,
     recursive_xy_cut,
     recursive_yx_cut,
     recursive_yx_cut,
     reference_insert,
     reference_insert,
     shrink_overlapping_boxes,
     shrink_overlapping_boxes,
-    sort_blocks,
+    sort_normal_blocks,
     update_doc_title_child_blocks,
     update_doc_title_child_blocks,
     update_paragraph_title_child_blocks,
     update_paragraph_title_child_blocks,
     update_vision_child_blocks,
     update_vision_child_blocks,
@@ -38,8 +39,7 @@ from .utils import (
 
 
 
 
 def pre_process(
 def pre_process(
-    blocks: List[LayoutParsingBlock],
-    config: Dict,
+    region: LayoutParsingRegion,
 ) -> List:
 ) -> List:
     """
     """
     Preprocess the layout for sorting purposes.
     Preprocess the layout for sorting purposes.
@@ -49,147 +49,116 @@ def pre_process(
     2. Match the blocks with their children.
     2. Match the blocks with their children.
 
 
     Args:
     Args:
-        blocks (List[LayoutParsingBlock]): A list of LayoutParsingBlock objects representing the layout.
-        config (Dict): Configuration parameters that include settings for pre-cutting and sorting.
+        region: LayoutParsingRegion, the layout region to be pre-processed.
 
 
     Returns:
     Returns:
         List: A list of pre-cutted layout blocks list.
         List: A list of pre-cutted layout blocks list.
     """
     """
-    region_bbox = config.get("region_bbox", None)
-    region_x_center = (region_bbox[0] + region_bbox[2]) / 2
-    region_y_center = (region_bbox[1] + region_bbox[3]) / 2
-
-    header_block_idxes = config.get("header_block_idxes", [])
-    header_blocks = []
-    for idx in header_block_idxes:
-        blocks[idx].order_label = "header"
-        header_blocks.append(blocks[idx])
-
-    unordered_block_idxes = config.get("unordered_block_idxes", [])
-    unordered_blocks = []
-    for idx in unordered_block_idxes:
-        blocks[idx].order_label = "unordered"
-        unordered_blocks.append(blocks[idx])
-
-    footer_block_idxes = config.get("footer_block_idxes", [])
-    footer_blocks = []
-    for idx in footer_block_idxes:
-        blocks[idx].order_label = "footer"
-        footer_blocks.append(blocks[idx])
-
-    mask_labels = ["header", "unordered", "footer"]
-    child_labels = [
+    mask_labels = [
+        "header",
+        "unordered",
+        "footer",
         "vision_footnote",
         "vision_footnote",
         "sub_paragraph_title",
         "sub_paragraph_title",
         "doc_title_text",
         "doc_title_text",
         "vision_title",
         "vision_title",
     ]
     ]
     pre_cut_block_idxes = []
     pre_cut_block_idxes = []
-    for block_idx, block in enumerate(blocks):
-        if block.label in mask_labels:
-            continue
-
-        if block.order_label not in child_labels:
-            update_region_label(blocks, config, block_idx)
-
-        block_orientation = block.orientation
-        if block_orientation == "horizontal":
-            region_bbox_center = region_x_center
+    block_map = region.block_map
+    blocks: List[LayoutParsingBlock] = list(block_map.values())
+    for block in blocks:
+        if block.order_label not in mask_labels:
+            update_region_label(block, region)
+
+        block_direction = block.direction
+        if block_direction == "horizontal":
             tolerance_len = block.long_side_length // 5
             tolerance_len = block.long_side_length // 5
         else:
         else:
-            region_bbox_center = region_y_center
             tolerance_len = block.short_side_length // 10
             tolerance_len = block.short_side_length // 10
 
 
         block_center = (block.start_coordinate + block.end_coordinate) / 2
         block_center = (block.start_coordinate + block.end_coordinate) / 2
-        center_offset = abs(block_center - region_bbox_center)
+        center_offset = abs(block_center - region.direction_center_coordinate)
         is_centered = center_offset <= tolerance_len
         is_centered = center_offset <= tolerance_len
 
 
         if is_centered:
         if is_centered:
-            pre_cut_block_idxes.append(block_idx)
+            pre_cut_block_idxes.append(block.index)
 
 
     pre_cut_list = []
     pre_cut_list = []
-    cut_orientation = "vertical"
+    cut_direction = region.secondary_direction
     cut_coordinates = []
     cut_coordinates = []
     discontinuous = []
     discontinuous = []
-    mask_labels = child_labels + mask_labels
     all_boxes = np.array(
     all_boxes = np.array(
         [block.bbox for block in blocks if block.order_label not in mask_labels]
         [block.bbox for block in blocks if block.order_label not in mask_labels]
     )
     )
     if len(all_boxes) == 0:
     if len(all_boxes) == 0:
-        return header_blocks, pre_cut_list, footer_blocks, unordered_blocks
+        return pre_cut_list
     if pre_cut_block_idxes:
     if pre_cut_block_idxes:
-        horizontal_cut_num = 0
-        for block_idx in pre_cut_block_idxes:
-            block = blocks[block_idx]
-            horizontal_cut_num += (
-                1 if block.secondary_orientation == "horizontal" else 0
-            )
-        cut_orientation = (
-            "horizontal"
-            if horizontal_cut_num > len(pre_cut_block_idxes) * 0.5
-            else "vertical"
-        )
         discontinuous, num_list = calculate_discontinuous_projection(
         discontinuous, num_list = calculate_discontinuous_projection(
-            all_boxes, orientation=cut_orientation, return_num=True
+            all_boxes, direction=cut_direction, return_num=True
         )
         )
         for idx in pre_cut_block_idxes:
         for idx in pre_cut_block_idxes:
-            block = blocks[idx]
+            block = block_map[idx]
             if (
             if (
                 block.order_label not in mask_labels
                 block.order_label not in mask_labels
-                and block.secondary_orientation == cut_orientation
+                and block.secondary_direction == cut_direction
             ):
             ):
                 if (
                 if (
-                    block.secondary_orientation_start_coordinate,
-                    block.secondary_orientation_end_coordinate,
+                    block.secondary_direction_start_coordinate,
+                    block.secondary_direction_end_coordinate,
                 ) in discontinuous:
                 ) in discontinuous:
                     idx = discontinuous.index(
                     idx = discontinuous.index(
                         (
                         (
-                            block.secondary_orientation_start_coordinate,
-                            block.secondary_orientation_end_coordinate,
+                            block.secondary_direction_start_coordinate,
+                            block.secondary_direction_end_coordinate,
                         )
                         )
                     )
                     )
                     if num_list[idx] == 1:
                     if num_list[idx] == 1:
                         cut_coordinates.append(
                         cut_coordinates.append(
-                            block.secondary_orientation_start_coordinate
+                            block.secondary_direction_start_coordinate
                         )
                         )
-                        cut_coordinates.append(
-                            block.secondary_orientation_end_coordinate
-                        )
-    if not discontinuous:
-        discontinuous = calculate_discontinuous_projection(
-            all_boxes, orientation=cut_orientation
-        )
-    current_interval = discontinuous[0]
-    for interval in discontinuous[1:]:
-        gap_len = interval[0] - current_interval[1]
-        if gap_len >= 60:
-            cut_coordinates.append(current_interval[1])
-        elif gap_len > 40:
-            x1, _, x2, __ = region_bbox
-            y1 = current_interval[1]
-            y2 = interval[0]
-            bbox = [x1, y1, x2, y2]
-            ref_interval = interval[0] - current_interval[1]
-            ref_bboxes = []
-            for block in blocks:
-                if get_nearest_edge_distance(bbox, block.bbox) < ref_interval * 2:
-                    ref_bboxes.append(block.bbox)
+                        cut_coordinates.append(block.secondary_direction_end_coordinate)
+    secondary_discontinuous = calculate_discontinuous_projection(
+        all_boxes, direction=region.direction
+    )
+    if len(secondary_discontinuous) == 1:
+        if not discontinuous:
             discontinuous = calculate_discontinuous_projection(
             discontinuous = calculate_discontinuous_projection(
-                ref_bboxes, orientation="horizontal"
+                all_boxes, direction=cut_direction
             )
             )
-            if len(discontinuous) != 2:
+        current_interval = discontinuous[0]
+        for interval in discontinuous[1:]:
+            gap_len = interval[0] - current_interval[1]
+            if gap_len >= region.text_line_height * 5:
                 cut_coordinates.append(current_interval[1])
                 cut_coordinates.append(current_interval[1])
-        current_interval = interval
+            elif gap_len > region.text_line_height * 2:
+                x1, _, x2, __ = region.bbox
+                y1 = current_interval[1]
+                y2 = interval[0]
+                bbox = [x1, y1, x2, y2]
+                ref_interval = interval[0] - current_interval[1]
+                ref_bboxes = []
+                for block in blocks:
+                    if get_nearest_edge_distance(bbox, block.bbox) < ref_interval * 2:
+                        ref_bboxes.append(block.bbox)
+                discontinuous = calculate_discontinuous_projection(
+                    ref_bboxes, direction=region.direction
+                )
+                if len(discontinuous) != 2:
+                    cut_coordinates.append(current_interval[1])
+            current_interval = interval
     cut_list = get_cut_blocks(
     cut_list = get_cut_blocks(
-        blocks, cut_orientation, cut_coordinates, region_bbox, mask_labels
+        blocks, cut_direction, cut_coordinates, region.bbox, mask_labels
     )
     )
     pre_cut_list.extend(cut_list)
     pre_cut_list.extend(cut_list)
+    if region.direction == "vertical":
+        pre_cut_list = pre_cut_list[::-1]
 
 
-    return header_blocks, pre_cut_list, footer_blocks, unordered_blocks
+    return pre_cut_list
 
 
 
 
 def update_region_label(
 def update_region_label(
-    blocks: List[LayoutParsingBlock], config: Dict[str, Any], block_idx: int
+    block: LayoutParsingBlock,
+    region: LayoutParsingRegion,
 ) -> None:
 ) -> None:
     """
     """
     Update the region label of a block based on its label and match the block with its children.
     Update the region label of a block based on its label and match the block with its children.
@@ -202,65 +171,45 @@ def update_region_label(
     Returns:
     Returns:
         None
         None
     """
     """
-
-    # special title block labels
-    doc_title_labels = config.get("doc_title_labels", [])
-    paragraph_title_labels = config.get("paragraph_title_labels", [])
-    vision_labels = config.get("vision_labels", [])
-
-    block = blocks[block_idx]
-    if block.label in doc_title_labels:
+    if block.label in BLOCK_LABEL_MAP["header_labels"]:
+        block.order_label = "header"
+    elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
         block.order_label = "doc_title"
         block.order_label = "doc_title"
-    # Force the orientation of vision type to be horizontal
-    if block.label in vision_labels:
+    elif (
+        block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]
+        and block.order_label is None
+    ):
+        block.order_label = "paragraph_title"
+    elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
         block.order_label = "vision"
         block.order_label = "vision"
         block.num_of_lines = 1
         block.num_of_lines = 1
-        block.update_orientation_info()
-    # some paragraph title block may be labeled as sub_title, so we need to check if block.order_label is "other"(default).
-    if block.label in paragraph_title_labels and block.order_label == "other":
-        block.order_label = "paragraph_title"
+        block.update_direction_info()
+    elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
+        block.order_label = "footer"
+    elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
+        block.order_label = "unordered"
+    else:
+        block.order_label = "normal_text"
 
 
     # only vision and doc title block can have child block
     # only vision and doc title block can have child block
     if block.order_label not in ["vision", "doc_title", "paragraph_title"]:
     if block.order_label not in ["vision", "doc_title", "paragraph_title"]:
         return
         return
 
 
-    iou_threshold = config.get("child_block_match_iou_threshold", 0.1)
     # match doc title text block
     # match doc title text block
     if block.order_label == "doc_title":
     if block.order_label == "doc_title":
-        text_block_idxes = config.get("text_block_idxes", [])
-        prev_idx, post_idx = get_adjacent_blocks_by_orientation(
-            blocks, block_idx, text_block_idxes, iou_threshold
-        )
-        update_doc_title_child_blocks(blocks, block, prev_idx, post_idx, config)
+        update_doc_title_child_blocks(block, region)
     # match sub title block
     # match sub title block
     elif block.order_label == "paragraph_title":
     elif block.order_label == "paragraph_title":
-        iou_threshold = config.get("sub_title_match_iou_threshold", 0.1)
-        paragraph_title_block_idxes = config.get("paragraph_title_block_idxes", [])
-        text_block_idxes = config.get("text_block_idxes", [])
-        megred_block_idxes = text_block_idxes + paragraph_title_block_idxes
-        prev_idx, post_idx = get_adjacent_blocks_by_orientation(
-            blocks, block_idx, megred_block_idxes, iou_threshold
-        )
-        update_paragraph_title_child_blocks(blocks, block, prev_idx, post_idx, config)
-    # match vision title block
+        update_paragraph_title_child_blocks(block, region)
+    # match vision title block and vision footnote block
     elif block.order_label == "vision":
     elif block.order_label == "vision":
-        # for matching vision title block
-        vision_title_block_idxes = config.get("vision_title_block_idxes", [])
-        # for matching vision footnote block
-        text_block_idxes = config.get("text_block_idxes", [])
-        megred_block_idxes = text_block_idxes + vision_title_block_idxes
-        # Some vision title block may be matched with multiple vision title block, so we need to try multiple times
-        for i in range(3):
-            prev_idx, post_idx = get_adjacent_blocks_by_orientation(
-                blocks, block_idx, megred_block_idxes, iou_threshold
-            )
-            update_vision_child_blocks(
-                blocks, block, megred_block_idxes, prev_idx, post_idx, config
-            )
+        update_vision_child_blocks(block, region)
 
 
 
 
 def get_layout_structure(
 def get_layout_structure(
     blocks: List[LayoutParsingBlock],
     blocks: List[LayoutParsingBlock],
+    region_direction: str,
+    region_secondary_direction: str,
 ) -> Tuple[List[Dict[str, any]], bool]:
 ) -> Tuple[List[Dict[str, any]], bool]:
     """
     """
     Determine the layout cross column of blocks.
     Determine the layout cross column of blocks.
@@ -276,7 +225,7 @@ def get_layout_structure(
         key=lambda x: (x.bbox[0], x.width),
         key=lambda x: (x.bbox[0], x.width),
     )
     )
 
 
-    mask_labels = ["doc_title", "cross_text", "cross_reference"]
+    mask_labels = ["doc_title", "cross_layout", "cross_reference"]
     for block_idx, block in enumerate(blocks):
     for block_idx, block in enumerate(blocks):
         if block.order_label in mask_labels:
         if block.order_label in mask_labels:
             continue
             continue
@@ -288,16 +237,16 @@ def get_layout_structure(
             bbox_iou = calculate_overlap_ratio(block.bbox, ref_block.bbox)
             bbox_iou = calculate_overlap_ratio(block.bbox, ref_block.bbox)
             if bbox_iou > 0:
             if bbox_iou > 0:
                 if ref_block.order_label == "vision":
                 if ref_block.order_label == "vision":
-                    ref_block.order_label = "cross_text"
+                    ref_block.order_label = "cross_layout"
                     break
                     break
                 if block.order_label == "vision" or block.area < ref_block.area:
                 if block.order_label == "vision" or block.area < ref_block.area:
-                    block.order_label = "cross_text"
+                    block.order_label = "cross_layout"
                     break
                     break
 
 
             match_projection_iou = calculate_projection_overlap_ratio(
             match_projection_iou = calculate_projection_overlap_ratio(
                 block.bbox,
                 block.bbox,
                 ref_block.bbox,
                 ref_block.bbox,
-                "horizontal",
+                region_direction,
             )
             )
             if match_projection_iou > 0:
             if match_projection_iou > 0:
                 for second_ref_idx, second_ref_block in enumerate(blocks):
                 for second_ref_idx, second_ref_block in enumerate(blocks):
@@ -312,57 +261,59 @@ def get_layout_structure(
                     )
                     )
                     if bbox_iou > 0.1:
                     if bbox_iou > 0.1:
                         if second_ref_block.order_label == "vision":
                         if second_ref_block.order_label == "vision":
-                            second_ref_block.order_label = "cross_text"
+                            second_ref_block.order_label = "cross_layout"
                             break
                             break
                         if (
                         if (
                             block.order_label == "vision"
                             block.order_label == "vision"
                             or block.area < second_ref_block.area
                             or block.area < second_ref_block.area
                         ):
                         ):
-                            block.order_label = "cross_text"
+                            block.order_label = "cross_layout"
                             break
                             break
 
 
                     second_match_projection_iou = calculate_projection_overlap_ratio(
                     second_match_projection_iou = calculate_projection_overlap_ratio(
                         block.bbox,
                         block.bbox,
                         second_ref_block.bbox,
                         second_ref_block.bbox,
-                        "horizontal",
+                        region_direction,
                     )
                     )
                     ref_match_projection_iou = calculate_projection_overlap_ratio(
                     ref_match_projection_iou = calculate_projection_overlap_ratio(
                         ref_block.bbox,
                         ref_block.bbox,
                         second_ref_block.bbox,
                         second_ref_block.bbox,
-                        "horizontal",
+                        region_direction,
                     )
                     )
                     ref_match_projection_iou_ = calculate_projection_overlap_ratio(
                     ref_match_projection_iou_ = calculate_projection_overlap_ratio(
                         ref_block.bbox,
                         ref_block.bbox,
                         second_ref_block.bbox,
                         second_ref_block.bbox,
-                        "vertical",
+                        region_secondary_direction,
                     )
                     )
                     if (
                     if (
                         second_match_projection_iou > 0
                         second_match_projection_iou > 0
                         and ref_match_projection_iou == 0
                         and ref_match_projection_iou == 0
                         and ref_match_projection_iou_ > 0
                         and ref_match_projection_iou_ > 0
-                        and "vision"
-                        not in [ref_block.order_label, second_ref_block.order_label]
                     ):
                     ):
-                        block.order_label = (
-                            "cross_reference"
-                            if block.label == "reference"
-                            else "cross_text"
-                        )
+                        if block.order_label == "vision" or (
+                            ref_block.order_label == "normal_text"
+                            and second_ref_block.order_label == "normal_text"
+                        ):
+                            block.order_label = (
+                                "cross_reference"
+                                if block.label == "reference"
+                                else "cross_layout"
+                            )
 
 
 
 
 def sort_by_xycut(
 def sort_by_xycut(
     block_bboxes: List,
     block_bboxes: List,
-    orientation: int = 0,
+    direction: str = "vertical",
     min_gap: int = 1,
     min_gap: int = 1,
 ) -> List[int]:
 ) -> List[int]:
     """
     """
-    Sort bounding boxes using recursive XY cut method based on the specified orientation.
+    Sort bounding boxes using recursive XY cut method based on the specified direction.
 
 
     Args:
     Args:
         block_bboxes (Union[np.ndarray, List[List[int]]]): An array or list of bounding boxes,
         block_bboxes (Union[np.ndarray, List[List[int]]]): An array or list of bounding boxes,
                                                            where each box is represented as
                                                            where each box is represented as
                                                            [x_min, y_min, x_max, y_max].
                                                            [x_min, y_min, x_max, y_max].
-        orientation (int): orientation for the initial cut. Use 1 for Y-axis first and 0 for X-axis first.
+        direction (int): direction for the initial cut. Use 1 for Y-axis first and 0 for X-axis first.
                          Defaults to 0.
                          Defaults to 0.
         min_gap (int): Minimum gap width to consider a separation between segments. Defaults to 1.
         min_gap (int): Minimum gap width to consider a separation between segments. Defaults to 1.
 
 
@@ -371,7 +322,7 @@ def sort_by_xycut(
     """
     """
     block_bboxes = np.asarray(block_bboxes).astype(int)
     block_bboxes = np.asarray(block_bboxes).astype(int)
     res = []
     res = []
-    if orientation == 1:
+    if direction == "vertical":
         recursive_yx_cut(
         recursive_yx_cut(
             block_bboxes,
             block_bboxes,
             np.arange(len(block_bboxes)).tolist(),
             np.arange(len(block_bboxes)).tolist(),
@@ -391,8 +342,7 @@ def sort_by_xycut(
 def match_unsorted_blocks(
 def match_unsorted_blocks(
     sorted_blocks: List[LayoutParsingBlock],
     sorted_blocks: List[LayoutParsingBlock],
     unsorted_blocks: List[LayoutParsingBlock],
     unsorted_blocks: List[LayoutParsingBlock],
-    config: Dict,
-    median_width: int,
+    region: LayoutParsingRegion,
 ) -> List[LayoutParsingBlock]:
 ) -> List[LayoutParsingBlock]:
     """
     """
     Match special blocks with the sorted blocks based on their region labels.
     Match special blocks with the sorted blocks based on their region labels.
@@ -406,7 +356,7 @@ def match_unsorted_blocks(
         List[LayoutParsingBlock]: The updated sorted blocks after matching special blocks.
         List[LayoutParsingBlock]: The updated sorted blocks after matching special blocks.
     """
     """
     distance_type_map = {
     distance_type_map = {
-        "cross_text": weighted_distance_insert,
+        "cross_layout": weighted_distance_insert,
         "paragraph_title": weighted_distance_insert,
         "paragraph_title": weighted_distance_insert,
         "doc_title": weighted_distance_insert,
         "doc_title": weighted_distance_insert,
         "vision_title": weighted_distance_insert,
         "vision_title": weighted_distance_insert,
@@ -416,21 +366,24 @@ def match_unsorted_blocks(
         "other": manhattan_insert,
         "other": manhattan_insert,
     }
     }
 
 
-    unsorted_blocks = sort_blocks(unsorted_blocks, median_width, reverse=False)
+    unsorted_blocks = sort_normal_blocks(
+        unsorted_blocks,
+        region.text_line_height,
+        region.text_line_width,
+        region.direction,
+    )
     for idx, block in enumerate(unsorted_blocks):
     for idx, block in enumerate(unsorted_blocks):
         order_label = block.order_label
         order_label = block.order_label
         if idx == 0 and order_label == "doc_title":
         if idx == 0 and order_label == "doc_title":
             sorted_blocks.insert(0, block)
             sorted_blocks.insert(0, block)
             continue
             continue
-        sorted_blocks = distance_type_map[order_label](
-            block, sorted_blocks, config, median_width
-        )
+        sorted_blocks = distance_type_map[order_label](block, sorted_blocks, region)
     return sorted_blocks
     return sorted_blocks
 
 
 
 
 def xycut_enhanced(
 def xycut_enhanced(
-    blocks: List[LayoutParsingBlock], config: Dict
-) -> List[LayoutParsingBlock]:
+    region: LayoutParsingRegion,
+) -> LayoutParsingRegion:
     """
     """
     xycut_enhance function performs the following steps:
     xycut_enhance function performs the following steps:
         1. Preprocess the input blocks by extracting headers, footers, and pre-cut blocks.
         1. Preprocess the input blocks by extracting headers, footers, and pre-cut blocks.
@@ -446,42 +399,51 @@ def xycut_enhanced(
     Returns:
     Returns:
         List[LayoutParsingBlock]: Ordered result list after processing.
         List[LayoutParsingBlock]: Ordered result list after processing.
     """
     """
-    if len(blocks) == 0:
-        return blocks
+    if len(region.block_map) == 0:
+        return []
 
 
-    text_labels = config.get("text_labels", [])
-    header_blocks, pre_cut_list, footer_blocks, unordered_blocks = pre_process(
-        blocks, config
-    )
+    pre_cut_list: List[List[LayoutParsingBlock]] = pre_process(region)
     final_order_res_list: List[LayoutParsingBlock] = []
     final_order_res_list: List[LayoutParsingBlock] = []
 
 
-    header_blocks = sort_blocks(header_blocks)
-    footer_blocks = sort_blocks(footer_blocks)
-    unordered_blocks = sort_blocks(unordered_blocks)
+    header_blocks: List[LayoutParsingBlock] = [
+        region.block_map[idx] for idx in region.header_block_idxes
+    ]
+    unordered_blocks: List[LayoutParsingBlock] = [
+        region.block_map[idx] for idx in region.unordered_block_idxes
+    ]
+    footer_blocks: List[LayoutParsingBlock] = [
+        region.block_map[idx] for idx in region.footer_block_idxes
+    ]
+
+    header_blocks: List[LayoutParsingBlock] = sort_normal_blocks(
+        header_blocks, region.text_line_height, region.text_line_width, region.direction
+    )
+    footer_blocks: List[LayoutParsingBlock] = sort_normal_blocks(
+        footer_blocks, region.text_line_height, region.text_line_width, region.direction
+    )
+    unordered_blocks: List[LayoutParsingBlock] = sort_normal_blocks(
+        unordered_blocks,
+        region.text_line_height,
+        region.text_line_width,
+        region.direction,
+    )
     final_order_res_list.extend(header_blocks)
     final_order_res_list.extend(header_blocks)
 
 
     unsorted_blocks: List[LayoutParsingBlock] = []
     unsorted_blocks: List[LayoutParsingBlock] = []
-    sorted_blocks_by_pre_cuts = []
+    sorted_blocks_by_pre_cuts: List[LayoutParsingBlock] = []
     for pre_cut_blocks in pre_cut_list:
     for pre_cut_blocks in pre_cut_list:
         sorted_blocks: List[LayoutParsingBlock] = []
         sorted_blocks: List[LayoutParsingBlock] = []
         doc_title_blocks: List[LayoutParsingBlock] = []
         doc_title_blocks: List[LayoutParsingBlock] = []
         xy_cut_blocks: List[LayoutParsingBlock] = []
         xy_cut_blocks: List[LayoutParsingBlock] = []
-        pre_cut_blocks: List[LayoutParsingBlock]
-        median_width = 1
-        text_block_width = [
-            block.width for block in pre_cut_blocks if block.label in text_labels
-        ]
-        if len(text_block_width) > 0:
-            median_width = int(np.median(text_block_width))
 
 
         get_layout_structure(
         get_layout_structure(
-            pre_cut_blocks,
+            pre_cut_blocks, region.direction, region.secondary_direction
         )
         )
 
 
         # Get xy cut blocks and add other blocks in special_block_map
         # Get xy cut blocks and add other blocks in special_block_map
         for block in pre_cut_blocks:
         for block in pre_cut_blocks:
             if block.order_label not in [
             if block.order_label not in [
-                "cross_text",
+                "cross_layout",
                 "cross_reference",
                 "cross_reference",
                 "doc_title",
                 "doc_title",
                 "unordered",
                 "unordered",
@@ -496,41 +458,77 @@ def xycut_enhanced(
             block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
             block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
             block_text_lines = [block.num_of_lines for block in xy_cut_blocks]
             block_text_lines = [block.num_of_lines for block in xy_cut_blocks]
             discontinuous = calculate_discontinuous_projection(
             discontinuous = calculate_discontinuous_projection(
-                block_bboxes, orientation="horizontal"
+                block_bboxes, direction=region.direction
             )
             )
             if len(discontinuous) > 1:
             if len(discontinuous) > 1:
                 xy_cut_blocks = [block for block in xy_cut_blocks]
                 xy_cut_blocks = [block for block in xy_cut_blocks]
+            # if len(discontinuous) == 1 or max(block_text_lines) == 1 or (not is_projection_consistent(xy_cut_blocks, discontinuous, direction=region.direction) and len(discontinuous) > 2 and max(block_text_lines) - min(block_text_lines) < 3):
             if len(discontinuous) == 1 or max(block_text_lines) == 1:
             if len(discontinuous) == 1 or max(block_text_lines) == 1:
-                xy_cut_blocks.sort(key=lambda x: (x.bbox[1] // 5, x.bbox[0]))
-                xy_cut_blocks = shrink_overlapping_boxes(xy_cut_blocks, "vertical")
+                xy_cut_blocks.sort(
+                    key=lambda x: (
+                        x.bbox[region.secondary_direction_start_index]
+                        // (region.text_line_height // 2),
+                        x.bbox[region.direction_start_index],
+                    )
+                )
+                xy_cut_blocks = shrink_overlapping_boxes(
+                    xy_cut_blocks, region.secondary_direction
+                )
+            if (
+                len(discontinuous) == 1
+                or max(block_text_lines) == 1
+                or (
+                    not is_projection_consistent(
+                        xy_cut_blocks, discontinuous, direction=region.direction
+                    )
+                    and len(discontinuous) > 2
+                    and max(block_text_lines) - min(block_text_lines) < 3
+                )
+            ):
+                xy_cut_blocks.sort(
+                    key=lambda x: (
+                        x.bbox[region.secondary_direction_start_index]
+                        // (region.text_line_height // 2),
+                        x.bbox[region.direction_start_index],
+                    )
+                )
+                xy_cut_blocks = shrink_overlapping_boxes(
+                    xy_cut_blocks, region.secondary_direction
+                )
                 block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
                 block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
-                sorted_indexes = sort_by_xycut(block_bboxes, orientation=1, min_gap=1)
+                sorted_indexes = sort_by_xycut(
+                    block_bboxes, direction=region.secondary_direction, min_gap=1
+                )
             else:
             else:
-                xy_cut_blocks.sort(key=lambda x: (x.bbox[0] // 20, x.bbox[1]))
-                xy_cut_blocks = shrink_overlapping_boxes(xy_cut_blocks, "horizontal")
+                xy_cut_blocks.sort(
+                    key=lambda x: (
+                        x.bbox[region.direction_start_index]
+                        // (region.text_line_width // 2),
+                        x.bbox[region.secondary_direction_start_index],
+                    )
+                )
+                xy_cut_blocks = shrink_overlapping_boxes(
+                    xy_cut_blocks, region.direction
+                )
                 block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
                 block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
-                sorted_indexes = sort_by_xycut(block_bboxes, orientation=0, min_gap=20)
+                sorted_indexes = sort_by_xycut(
+                    block_bboxes, direction=region.direction, min_gap=1
+                )
 
 
             sorted_blocks = [xy_cut_blocks[i] for i in sorted_indexes]
             sorted_blocks = [xy_cut_blocks[i] for i in sorted_indexes]
 
 
         sorted_blocks = match_unsorted_blocks(
         sorted_blocks = match_unsorted_blocks(
             sorted_blocks,
             sorted_blocks,
             doc_title_blocks,
             doc_title_blocks,
-            config,
-            median_width,
+            region=region,
         )
         )
 
 
         sorted_blocks_by_pre_cuts.extend(sorted_blocks)
         sorted_blocks_by_pre_cuts.extend(sorted_blocks)
 
 
-    median_width = 1
-    text_block_width = [block.width for block in blocks if block.label in text_labels]
-    if len(text_block_width) > 0:
-        median_width = int(np.median(text_block_width))
     final_order_res_list = match_unsorted_blocks(
     final_order_res_list = match_unsorted_blocks(
         sorted_blocks_by_pre_cuts,
         sorted_blocks_by_pre_cuts,
         unsorted_blocks,
         unsorted_blocks,
-        config,
-        median_width,
+        region=region,
     )
     )
 
 
     final_order_res_list.extend(footer_blocks)
     final_order_res_list.extend(footer_blocks)