6 luni în urmă · e23a69cd66
--- a/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
@@ -30,6 +30,7 @@ from ...utils.pp_option import PaddlePredictorOption
 
															 from ..base import BasePipeline
														
 
															 from ..ocr.result import OCRResult
														
 
															 from .result_v2 import LayoutParsingBlock, LayoutParsingRegion, LayoutParsingResultV2
														
 
															+from .setting import BLOCK_LABEL_MAP, BLOCK_SETTINGS, LINE_SETTINGS, REGION_SETTINGS
														
 
															 from .utils import (
														
 
															     caculate_bbox_area,
														
 
															     calculate_minimum_enclosing_bbox,
														
@@ -260,8 +261,6 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															     def standardized_data(
														
 
															         self,
														
 
															         image: list,
														
 
															-        parameters_config: dict,
														
 
															-        block_label_mapping: dict,
														
 
															         region_det_res: DetResult,
														
 
															         layout_det_res: DetResult,
														
 
															         overall_ocr_res: OCRResult,
														
@@ -360,7 +359,7 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															             paragraph_title_block_area = caculate_bbox_area(
														
 
															                 layout_det_res["boxes"][paragraph_title_list[0]]["coordinate"]
														
 
															             )
														
 
															-            title_area_max_block_threshold = parameters_config["block"].get(
														
 
															+            title_area_max_block_threshold = BLOCK_SETTINGS.get(
														
 
															                 "title_conversion_area_ratio_threshold", 0.3
														
 
															             )
														
 
															             if (
														
@@ -441,7 +440,7 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															                     break
														
 
															             if not has_text and layout_det_res["boxes"][layout_box_idx][
														
 
															                 "label"
														
 
															-            ] not in block_label_mapping.get("vision_labels", []):
														
 
															+            ] not in BLOCK_LABEL_MAP.get("vision_labels", []):
														
 
															                 crop_box = layout_det_res["boxes"][layout_box_idx]["coordinate"]
														
 
															                 x1, y1, x2, y2 = [int(i) for i in crop_box]
														
 
															                 crop_img = np.array(image)[y1:y2, x1:x2]
														
@@ -506,7 +505,7 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															                     overlap_ratio = calculate_overlap_ratio(
														
 
															                         region_bbox, block_bboxes[block_idx], mode="small"
														
 
															                     )
														
 
															-                    if overlap_ratio > parameters_config["region"].get(
														
 
															+                    if overlap_ratio > REGION_SETTINGS.get(
														
 
															                         "match_block_overlap_ratio_threshold", 0.8
														
 
															                     ):
														
 
															                         region_to_block_map[region_idx].append(block_idx)
														
@@ -540,7 +539,6 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															                                     image.shape[0],
														
 
															                                     block_idxes_set,
														
 
															                                     block_bboxes,
														
 
															-                                    parameters_config,
														
 
															                                 )
														
 
															                             )
														
 
															                     if len(matched_idxes) == 0:
														
@@ -570,7 +568,7 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															         input_img: np.ndarray,
														
 
															         text_rec_model: Any,
														
 
															         text_rec_score_thresh: Union[float, None] = None,
														
 
															-        orientation: str = "vertical",
														
 
															+        direction: str = "vertical",
														
 
															     ) -> None:
														
 
															         """
														
 
															         Sort a line of text spans based on their vertical position within the layout bounding box.
														
@@ -583,8 +581,8 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															         Returns:
														
 
															             list: The sorted line of text spans.
														
 
															         """
														
 
															-        sort_index = 0 if orientation == "horizontal" else 1
														
 
															-        splited_boxes = split_boxes_by_projection(line, orientation)
														
 
															+        sort_index = 0 if direction == "horizontal" else 1
														
 
															+        splited_boxes = split_boxes_by_projection(line, direction)
														
 
															         splited_lines = []
														
 
															         if len(line) != len(splited_boxes):
														
 
															             splited_boxes.sort(key=lambda span: span[0][sort_index])
														
@@ -614,7 +612,6 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															     def get_block_rec_content(
														
 
															         self,
														
 
															         image: list,
														
 
															-        line_parameters_config: dict,
														
 
															         ocr_rec_res: dict,
														
 
															         block: LayoutParsingBlock,
														
 
															         text_rec_model: Any,
														
@@ -625,37 +622,49 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															             block.content = ""
														
 
															             return block
														
 
															-        lines, text_orientation = group_boxes_into_lines(
														
 
															+        lines, text_direction = group_boxes_into_lines(
														
 
															             ocr_rec_res,
														
 
															-            line_parameters_config.get("line_height_iou_threshold", 0.8),
														
 
															+            LINE_SETTINGS.get("line_height_iou_threshold", 0.8),
														
 
															         )
														
 
															         if block.label == "reference":
														
 
															             rec_boxes = ocr_rec_res["boxes"]
														
 
															             block_right_coordinate = max([box[2] for box in rec_boxes])
														
 
															-            last_line_span_limit = 20
														
 
															         else:
														
 
															             block_right_coordinate = block.bbox[2]
														
 
															-            last_line_span_limit = 10
														
 
															         # format line
														
 
															         text_lines = []
														
 
															         need_new_line_num = 0
														
 
															-        sort_index = 0 if text_orientation == "horizontal" else 1
														
 
															+        start_index = 0 if text_direction == "horizontal" else 1
														
 
															+        secondary_direction_start_index = 1 if text_direction == "horizontal" else 0
														
 
															+        line_height_list, line_width_list = [], []
														
 
															         for idx, line in enumerate(lines):
														
 
															-            line.sort(key=lambda span: span[0][sort_index])
														
 
															+            line.sort(key=lambda span: span[0][start_index])
														
 
															+            text_bboxes_height = [
														
 
															+                span[0][secondary_direction_start_index + 2]
														
 
															+                - span[0][secondary_direction_start_index]
														
 
															+                for span in line
														
 
															+            ]
														
 
															+            text_bboxes_width = [
														
 
															+                span[0][start_index + 2] - span[0][start_index] for span in line
														
 
															+            ]
														
 
															+
														
 
															+            line_height = np.mean(text_bboxes_height)
														
 
															+            line_height_list.append(line_height)
														
 
															+            line_width_list.append(np.mean(text_bboxes_width))
														
 
															             # merge formula and text
														
 
															             ocr_labels = [span[2] for span in line]
														
 
															             if "formula" in ocr_labels:
														
 
															                 line = self.sort_line_by_projection(
														
 
															-                    line, image, text_rec_model, text_rec_score_thresh, text_orientation
														
 
															+                    line, image, text_rec_model, text_rec_score_thresh, text_direction
														
 
															                 )
														
 
															             line_text, need_new_line = format_line(
														
 
															                 line,
														
 
															                 block_right_coordinate,
														
 
															-                last_line_span_limit=last_line_span_limit,
														
 
															+                last_line_span_limit=line_height * 1.5,
														
 
															                 block_label=block.label,
														
 
															             )
														
 
															             if need_new_line:
														
@@ -668,21 +677,21 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															                 block.seg_end_coordinate = line_end_coordinate
														
 
															             text_lines.append(line_text)
														
 
															-        delim = line_parameters_config["delimiter_map"].get(block.label, "")
														
 
															+        delim = LINE_SETTINGS["delimiter_map"].get(block.label, "")
														
 
															         if need_new_line_num > len(text_lines) * 0.5 and delim == "":
														
 
															             delim = "\n"
														
 
															         content = delim.join(text_lines)
														
 
															         block.content = content
														
 
															         block.num_of_lines = len(text_lines)
														
 
															-        block.orientation = text_orientation
														
 
															+        block.direction = text_direction
														
 
															+        block.text_line_height = np.mean(line_height_list)
														
 
															+        block.text_line_width = np.mean(line_width_list)
														
 
															         return block
														
 
															     def get_layout_parsing_blocks(
														
 
															         self,
														
 
															         image: list,
														
 
															-        parameters_config: dict,
														
 
															-        block_label_mapping: dict,
														
 
															         region_block_ocr_idx_map: dict,
														
 
															         region_det_res: DetResult,
														
 
															         overall_ocr_res: OCRResult,
														
@@ -759,7 +768,6 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															                 block = self.get_block_rec_content(
														
 
															                     image=image,
														
 
															                     block=block,
														
 
															-                    line_parameters_config=parameters_config["line"],
														
 
															                     ocr_rec_res=rec_res,
														
 
															                     text_rec_model=text_rec_model,
														
 
															                     text_rec_score_thresh=text_rec_score_thresh,
														
@@ -781,9 +789,8 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															                 for idx in region_block_ocr_idx_map["region_to_block_map"][region_idx]
														
 
															             ]
														
 
															             region = LayoutParsingRegion(
														
 
															-                region_bbox=region_bbox,
														
 
															+                bbox=region_bbox,
														
 
															                 blocks=region_blocks,
														
 
															-                block_label_mapping=block_label_mapping,
														
 
															             )
														
 
															             region_list.append(region)
														
@@ -818,14 +825,11 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															         Returns:
														
 
															             list: A list of dictionaries representing the layout parsing result.
														
 
															         """
														
 
															-        from .setting import block_label_mapping, parameters_config
														
 
															         # Standardize data
														
 
															         region_block_ocr_idx_map, region_det_res, layout_det_res = (
														
 
															             self.standardized_data(
														
 
															                 image=image,
														
 
															-                parameters_config=parameters_config,
														
 
															-                block_label_mapping=block_label_mapping,
														
 
															                 region_det_res=region_det_res,
														
 
															                 layout_det_res=layout_det_res,
														
 
															                 overall_ocr_res=overall_ocr_res,
														
@@ -838,8 +842,6 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															         # Format layout parsing block
														
 
															         region_list = self.get_layout_parsing_blocks(
														
 
															             image=image,
														
 
															-            parameters_config=parameters_config,
														
 
															-            block_label_mapping=block_label_mapping,
														
 
															             region_block_ocr_idx_map=region_block_ocr_idx_map,
														
 
															             region_det_res=region_det_res,
														
 
															             overall_ocr_res=overall_ocr_res,
														
@@ -854,11 +856,10 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															         for region in region_list:
														
 
															             parsing_res_list.extend(region.sort())
														
 
															-        visualize_index_labels = block_label_mapping["visualize_index_labels"]
														
 
															         index = 1
														
 
															         for block in parsing_res_list:
														
 
															-            if block.label in visualize_index_labels:
														
 
															-                block.index = index
														
 
															+            if block.label in BLOCK_LABEL_MAP["visualize_index_labels"]:
														
 
															+                block.order_index = index
														
 
															                 index += 1
														
 
															         return parsing_res_list
														
@@ -956,8 +957,6 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															         use_e2e_wired_table_rec_model: bool = False,
														
 
															         use_e2e_wireless_table_rec_model: bool = True,
														
 
															         is_pretty_markdown: Union[bool, None] = None,
														
 
															-        use_layout_gt: bool = False,
														
 
															-        layout_gt_dir: Union[str, None] = None,
														
 
															         **kwargs,
														
 
															     ) -> LayoutParsingResultV2:
														
 
															         """
														
@@ -1032,65 +1031,16 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															             doc_preprocessor_image = doc_preprocessor_res["output_img"]
														
 
															-            use_layout_gt = use_layout_gt
														
 
															-            if not use_layout_gt:
														
 
															-                layout_det_res = next(
														
 
															-                    self.layout_det_model(
														
 
															-                        doc_preprocessor_image,
														
 
															-                        threshold=layout_threshold,
														
 
															-                        layout_nms=layout_nms,
														
 
															-                        layout_unclip_ratio=layout_unclip_ratio,
														
 
															-                        layout_merge_bboxes_mode=layout_merge_bboxes_mode,
														
 
															-                    )
														
 
															+            layout_det_res = next(
														
 
															+                self.layout_det_model(
														
 
															+                    doc_preprocessor_image,
														
 
															+                    threshold=layout_threshold,
														
 
															+                    layout_nms=layout_nms,
														
 
															+                    layout_unclip_ratio=layout_unclip_ratio,
														
 
															+                    layout_merge_bboxes_mode=layout_merge_bboxes_mode,
														
 
															                 )
														
 
															-            else:
														
 
															-                import json
														
 
															-                import os
														
 
															-
														
 
															-                from ...models.object_detection.result import DetResult
														
 
															-
														
 
															-                label_dir = layout_gt_dir
														
 
															-                notes_path = f"{label_dir}/notes.json"
														
 
															-                labels = f"{label_dir}/labels"
														
 
															-                gt_file = os.path.basename(input)[:-4] + ".txt"
														
 
															-                gt_path = f"{labels}/{gt_file}"
														
 
															-                with open(notes_path, "r") as f:
														
 
															-                    notes = json.load(f)
														
 
															-                categories_map = {}
														
 
															-                for categories in notes["categories"]:
														
 
															-                    id = int(categories["id"])
														
 
															-                    name = categories["name"]
														
 
															-                    categories_map[id] = name
														
 
															-                with open(gt_path, "r") as f:
														
 
															-                    lines = f.readlines()
														
 
															-                layout_det_res_dic = {
														
 
															-                    "input_img": doc_preprocessor_image,
														
 
															-                    "page_index": None,
														
 
															-                    "boxes": [],
														
 
															-                }
														
 
															-                for line in lines:
														
 
															-                    line = line.strip().split(" ")
														
 
															-                    category_id = int(line[0])
														
 
															-                    label = categories_map[category_id]
														
 
															-                    img_h, img_w = doc_preprocessor_image.shape[:2]
														
 
															-                    center_x = float(line[1]) * img_w
														
 
															-                    center_y = float(line[2]) * img_h
														
 
															-                    w = float(line[3]) * img_w
														
 
															-                    h = float(line[4]) * img_h
														
 
															-                    x0 = center_x - w / 2
														
 
															-                    y0 = center_y - h / 2
														
 
															-                    x1 = center_x + w / 2
														
 
															-                    y1 = center_y + h / 2
														
 
															-                    box = [x0, y0, x1, y1]
														
 
															-                    layout_det_res_dic["boxes"].append(
														
 
															-                        {
														
 
															-                            "cls_id": category_id,
														
 
															-                            "label": label,
														
 
															-                            "coordinate": box,
														
 
															-                            "score": 1.0,
														
 
															-                        }
														
 
															-                    )
														
 
															-                layout_det_res = DetResult(layout_det_res_dic)
														
 
															+            )
														
 
															+
														
 
															             imgs_in_doc = gather_imgs(doc_preprocessor_image, layout_det_res["boxes"])
														
 
															             if model_settings["use_region_detection"]:
														
@@ -1139,7 +1089,13 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															                     ),
														
 
															                 )
														
 
															             else:
														
 
															-                overall_ocr_res = {}
														
 
															+                overall_ocr_res = {
														
 
															+                    "dt_polys": [],
														
 
															+                    "rec_texts": [],
														
 
															+                    "rec_scores": [],
														
 
															+                    "rec_polys": [],
														
 
															+                    "rec_boxes": np.array([]),
														
 
															+                }
														
 
															             overall_ocr_res["rec_labels"] = ["text"] * len(overall_ocr_res["rec_texts"])
														
@@ -1157,9 +1113,14 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															                     table_contents["rec_texts"].append(
														
 
															                         f"${formula_res['rec_formula']}$"
														
 
															                     )
														
 
															-                    table_contents["rec_boxes"] = np.vstack(
														
 
															-                        (table_contents["rec_boxes"], [formula_res["dt_polys"]])
														
 
															-                    )
														
 
															+                    if table_contents["rec_boxes"].size == 0:
														
 
															+                        table_contents["rec_boxes"] = np.array(
														
 
															+                            [formula_res["dt_polys"]]
														
 
															+                        )
														
 
															+                    else:
														
 
															+                        table_contents["rec_boxes"] = np.vstack(
														
 
															+                            (table_contents["rec_boxes"], [formula_res["dt_polys"]])
														
 
															+                        )
														
 
															                     table_contents["rec_polys"].append(poly_points)
														
 
															                     table_contents["rec_scores"].append(1)
														
--- a/paddlex/inference/pipelines/layout_parsing/result_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/result_v2.py
@@ -20,8 +20,9 @@ from pathlib import Path
 
															 from typing import List
														
 
															 import numpy as np
														
 
															-from PIL import Image, ImageDraw
														
 
															+from PIL import Image, ImageDraw, ImageFont
														
 
															+from ....utils.fonts import PINGFANG_FONT_FILE_PATH
														
 
															 from ...common.result import (
														
 
															     BaseCVResult,
														
 
															     HtmlMixin,
														
@@ -29,6 +30,7 @@ from ...common.result import (
 
															     MarkdownMixin,
														
 
															     XlsxMixin,
														
 
															 )
														
 
															+from .setting import BLOCK_LABEL_MAP
														
 
															 class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
														
@@ -107,16 +109,23 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
															         # for layout ordering image
														
 
															         image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
														
 
															         draw = ImageDraw.Draw(image, "RGBA")
														
 
															+        font_size = int(0.018 * int(image.width)) + 2
														
 
															+        font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
														
 
															         parsing_result: List[LayoutParsingBlock] = self["parsing_res_list"]
														
 
															         for block in parsing_result:
														
 
															             bbox = block.bbox
														
 
															-            index = block.index
														
 
															-            label = block.label
														
 
															-            fill_color = get_show_color(label)
														
 
															+            index = block.order_index
														
 
															+            label = block.order_label
														
 
															+            fill_color = get_show_color(label, True)
														
 
															             draw.rectangle(bbox, fill=fill_color)
														
 
															             if index is not None:
														
 
															-                text_position = (bbox[2] + 2, bbox[1] - 10)
														
 
															-                draw.text(text_position, str(index), fill="red")
														
 
															+                text_position = (bbox[2] + 2, bbox[1] - font_size // 2)
														
 
															+                if int(image.width) - bbox[2] < font_size:
														
 
															+                    text_position = (
														
 
															+                        int(bbox[2] - font_size * 1.1),
														
 
															+                        bbox[1] - font_size // 2,
														
 
															+                    )
														
 
															+                draw.text(text_position, str(index), font=font, fill="red")
														
 
															         res_img_dict["layout_order_res"] = image
														
@@ -475,8 +484,8 @@ class LayoutParsingBlock:
 
															     def __init__(self, label, bbox, content="") -> None:
														
 
															         self.label = label
														
 
															-        self.order_label = "other"
														
 
															-        self.bbox = [int(item) for item in bbox]
														
 
															+        self.order_label = None
														
 
															+        self.bbox = list(map(int, bbox))
														
 
															         self.content = content
														
 
															         self.seg_start_coordinate = float("inf")
														
 
															         self.seg_end_coordinate = float("-inf")
														
@@ -486,40 +495,42 @@ class LayoutParsingBlock:
 
															         self.num_of_lines = 1
														
 
															         self.image = None
														
 
															         self.index = None
														
 
															-        self.visual_index = None
														
 
															-        self.orientation = self.get_bbox_orientation()
														
 
															+        self.order_index = None
														
 
															+        self.text_line_width = 1
														
 
															+        self.text_line_height = 1
														
 
															+        self.direction = self.get_bbox_direction()
														
 
															         self.child_blocks = []
														
 
															-        self.update_orientation_info()
														
 
															+        self.update_direction_info()
														
 
															     def __str__(self) -> str:
														
 
															         return f"{self.__dict__}"
														
 
															     def __repr__(self) -> str:
														
 
															-        _str = f"\n\n#################\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
														
 
															+        _str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
														
 
															         return _str
														
 
															     def to_dict(self) -> dict:
														
 
															         return self.__dict__
														
 
															-    def update_orientation_info(self) -> None:
														
 
															+    def update_direction_info(self) -> None:
														
 
															         if self.order_label == "vision":
														
 
															-            self.orientation = "horizontal"
														
 
															-        if self.orientation == "horizontal":
														
 
															-            self.secondary_orientation = "vertical"
														
 
															+            self.direction = "horizontal"
														
 
															+        if self.direction == "horizontal":
														
 
															+            self.secondary_direction = "vertical"
														
 
															             self.short_side_length = self.height
														
 
															             self.long_side_length = self.width
														
 
															             self.start_coordinate = self.bbox[0]
														
 
															             self.end_coordinate = self.bbox[2]
														
 
															-            self.secondary_orientation_start_coordinate = self.bbox[1]
														
 
															-            self.secondary_orientation_end_coordinate = self.bbox[3]
														
 
															+            self.secondary_direction_start_coordinate = self.bbox[1]
														
 
															+            self.secondary_direction_end_coordinate = self.bbox[3]
														
 
															         else:
														
 
															-            self.secondary_orientation = "horizontal"
														
 
															+            self.secondary_direction = "horizontal"
														
 
															             self.short_side_length = self.width
														
 
															             self.long_side_length = self.height
														
 
															             self.start_coordinate = self.bbox[1]
														
 
															             self.end_coordinate = self.bbox[3]
														
 
															-            self.secondary_orientation_start_coordinate = self.bbox[0]
														
 
															-            self.secondary_orientation_end_coordinate = self.bbox[2]
														
 
															+            self.secondary_direction_start_coordinate = self.bbox[0]
														
 
															+            self.secondary_direction_end_coordinate = self.bbox[2]
														
 
															     def append_child_block(self, child_block: LayoutParsingBlock) -> None:
														
 
															         if not self.child_blocks:
														
@@ -533,7 +544,7 @@ class LayoutParsingBlock:
 
															             max(y2, y2_child),
														
 
															         )
														
 
															         self.bbox = union_bbox
														
 
															-        self.update_orientation_info()
														
 
															+        self.update_direction_info()
														
 
															         child_blocks = [child_block]
														
 
															         if child_block.child_blocks:
														
 
															             child_blocks.extend(child_block.get_child_blocks())
														
@@ -550,107 +561,130 @@ class LayoutParsingBlock:
 
															         centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
														
 
															         return centroid
														
 
															-    def get_bbox_orientation(self, orientation_ratio: float = 1.0) -> bool:
														
 
															+    def get_bbox_direction(self, direction_ratio: float = 1.0) -> bool:
														
 
															         """
														
 
															         Determine if a bounding box is horizontal or vertical.
														
 
															         Args:
														
 
															             bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
														
 
															-            orientation_ratio (float): Ratio for determining orientation. Default is 1.0.
														
 
															+            direction_ratio (float): Ratio for determining direction. Default is 1.0.
														
 
															         Returns:
														
 
															             str: "horizontal" or "vertical".
														
 
															         """
														
 
															         return (
														
 
															-            "horizontal"
														
 
															-            if self.width * orientation_ratio >= self.height
														
 
															-            else "vertical"
														
 
															+            "horizontal" if self.width * direction_ratio >= self.height else "vertical"
														
 
															         )
														
 
															 class LayoutParsingRegion:
														
 
															-    def __init__(
														
 
															-        self, region_bbox, blocks: List[LayoutParsingBlock] = [], block_label_mapping={}
														
 
															-    ) -> None:
														
 
															-        self.region_bbox = region_bbox
														
 
															-        self.blocks = blocks
														
 
															+    def __init__(self, bbox, blocks: List[LayoutParsingBlock] = []) -> None:
														
 
															+        self.bbox = bbox
														
 
															         self.block_map = {}
														
 
															-        self.update_config(block_label_mapping)
														
 
															-        self.orientation = None
														
 
															+        self.direction = "horizontal"
														
 
															         self.calculate_bbox_metrics()
														
 
															-
														
 
															-    def update_config(self, block_label_mapping):
														
 
															-        self.block_map = {}
														
 
															-        self.config = copy.deepcopy(block_label_mapping)
														
 
															-        self.config["region_bbox"] = self.region_bbox
														
 
															-        horizontal_text_block_num = 0
														
 
															-        for idx, block in enumerate(self.blocks):
														
 
															-            label = block.label
														
 
															-            if (
														
 
															-                block.order_label not in ["vision", "vision_title"]
														
 
															-                and block.orientation == "horizontal"
														
 
															-            ):
														
 
															-                horizontal_text_block_num += 1
														
 
															+        self.doc_title_block_idxes = []
														
 
															+        self.paragraph_title_block_idxes = []
														
 
															+        self.vision_block_idxes = []
														
 
															+        self.unordered_block_idxes = []
														
 
															+        self.vision_title_block_idxes = []
														
 
															+        self.normal_text_block_idxes = []
														
 
															+        self.header_block_idxes = []
														
 
															+        self.footer_block_idxes = []
														
 
															+        self.text_line_width = 20
														
 
															+        self.text_line_height = 10
														
 
															+        self.init_region_info_from_layout(blocks)
														
 
															+        self.init_direction_info()
														
 
															+
														
 
															+    def init_region_info_from_layout(self, blocks: List[LayoutParsingBlock]):
														
 
															+        horizontal_normal_text_block_num = 0
														
 
															+        text_line_height_list = []
														
 
															+        text_line_width_list = []
														
 
															+        for idx, block in enumerate(blocks):
														
 
															             self.block_map[idx] = block
														
 
															-            self.update_layout_order_config_block_index(label, idx)
														
 
															-        text_block_num = (
														
 
															-            len(self.blocks)
														
 
															-            - len(self.config.get("vision_block_idxes", []))
														
 
															-            - len(self.config.get("vision_title_block_idxes", []))
														
 
															-        )
														
 
															-        self.orientation = (
														
 
															+            block.index = idx
														
 
															+            if block.label in BLOCK_LABEL_MAP["header_labels"]:
														
 
															+                self.header_block_idxes.append(idx)
														
 
															+            elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
														
 
															+                self.doc_title_block_idxes.append(idx)
														
 
															+            elif block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]:
														
 
															+                self.paragraph_title_block_idxes.append(idx)
														
 
															+            elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
														
 
															+                self.vision_block_idxes.append(idx)
														
 
															+            elif block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
														
 
															+                self.vision_title_block_idxes.append(idx)
														
 
															+            elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
														
 
															+                self.footer_block_idxes.append(idx)
														
 
															+            elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
														
 
															+                self.unordered_block_idxes.append(idx)
														
 
															+            else:
														
 
															+                self.normal_text_block_idxes.append(idx)
														
 
															+                text_line_height_list.append(block.text_line_height)
														
 
															+                text_line_width_list.append(block.text_line_width)
														
 
															+                if block.direction == "horizontal":
														
 
															+                    horizontal_normal_text_block_num += 1
														
 
															+        self.direction = (
														
 
															             "horizontal"
														
 
															-            if horizontal_text_block_num >= text_block_num * 0.5
														
 
															+            if horizontal_normal_text_block_num
														
 
															+            >= len(self.normal_text_block_idxes) * 0.5
														
 
															             else "vertical"
														
 
															         )
														
 
															-        self.config["region_orientation"] = self.orientation
														
 
															+        self.text_line_width = (
														
 
															+            np.mean(text_line_width_list) if text_line_width_list else 20
														
 
															+        )
														
 
															+        self.text_line_height = (
														
 
															+            np.mean(text_line_height_list) if text_line_height_list else 10
														
 
															+        )
														
 
															+
														
 
															+    def init_direction_info(self):
														
 
															+        if self.direction == "horizontal":
														
 
															+            self.direction_start_index = 0
														
 
															+            self.direction_end_index = 2
														
 
															+            self.secondary_direction_start_index = 1
														
 
															+            self.secondary_direction_end_index = 3
														
 
															+            self.secondary_direction = "vertical"
														
 
															+        else:
														
 
															+            self.direction_start_index = 1
														
 
															+            self.direction_end_index = 3
														
 
															+            self.secondary_direction_start_index = 0
														
 
															+            self.secondary_direction_end_index = 2
														
 
															+            self.secondary_direction = "horizontal"
														
 
															+
														
 
															+        self.direction_center_coordinate = (
														
 
															+            self.bbox[self.direction_start_index] + self.bbox[self.direction_end_index]
														
 
															+        ) / 2
														
 
															+        self.secondary_direction_center_coordinate = (
														
 
															+            self.bbox[self.secondary_direction_start_index]
														
 
															+            + self.bbox[self.secondary_direction_end_index]
														
 
															+        ) / 2
														
 
															     def calculate_bbox_metrics(self):
														
 
															-        x1, y1, x2, y2 = self.region_bbox
														
 
															+        x1, y1, x2, y2 = self.bbox
														
 
															         x_center, y_center = (x1 + x2) / 2, (y1 + y2) / 2
														
 
															         self.euclidean_distance = math.sqrt(((x1) ** 2 + (y1) ** 2))
														
 
															         self.center_euclidean_distance = math.sqrt(((x_center) ** 2 + (y_center) ** 2))
														
 
															         self.angle_rad = math.atan2(y_center, x_center)
														
 
															+    def sort_normal_blocks(self, blocks):
														
 
															+        if self.direction == "horizontal":
														
 
															+            blocks.sort(
														
 
															+                key=lambda x: (
														
 
															+                    x.bbox[1] // self.text_line_height,
														
 
															+                    x.bbox[0] // self.text_line_width,
														
 
															+                    x.bbox[1] ** 2 + x.bbox[0] ** 2,
														
 
															+                ),
														
 
															+            )
														
 
															+        else:
														
 
															+            blocks.sort(
														
 
															+                key=lambda x: (
														
 
															+                    -x.bbox[0] // self.text_line_width,
														
 
															+                    x.bbox[1] // self.text_line_height,
														
 
															+                    -(x.bbox[2] ** 2 + x.bbox[1] ** 2),
														
 
															+                ),
														
 
															+            )
														
 
															+
														
 
															     def sort(self):
														
 
															         from .xycut_enhanced import xycut_enhanced
														
 
															-        return xycut_enhanced(self.blocks, self.config)
														
 
															-
														
 
															-    def update_layout_order_config_block_index(
														
 
															-        self, block_label: str, block_idx: int
														
 
															-    ) -> None:
														
 
															-        doc_title_labels = self.config["doc_title_labels"]
														
 
															-        paragraph_title_labels = self.config["paragraph_title_labels"]
														
 
															-        vision_labels = self.config["vision_labels"]
														
 
															-        vision_title_labels = self.config["vision_title_labels"]
														
 
															-        header_labels = self.config["header_labels"]
														
 
															-        unordered_labels = self.config["unordered_labels"]
														
 
															-        footer_labels = self.config["footer_labels"]
														
 
															-        text_labels = self.config["text_labels"]
														
 
															-        self.config.setdefault("doc_title_block_idxes", [])
														
 
															-        self.config.setdefault("paragraph_title_block_idxes", [])
														
 
															-        self.config.setdefault("vision_block_idxes", [])
														
 
															-        self.config.setdefault("vision_title_block_idxes", [])
														
 
															-        self.config.setdefault("unordered_block_idxes", [])
														
 
															-        self.config.setdefault("text_block_idxes", [])
														
 
															-        self.config.setdefault("header_block_idxes", [])
														
 
															-        self.config.setdefault("footer_block_idxes", [])
														
 
															-
														
 
															-        if block_label in doc_title_labels:
														
 
															-            self.config["doc_title_block_idxes"].append(block_idx)
														
 
															-        if block_label in paragraph_title_labels:
														
 
															-            self.config["paragraph_title_block_idxes"].append(block_idx)
														
 
															-        if block_label in vision_labels:
														
 
															-            self.config["vision_block_idxes"].append(block_idx)
														
 
															-        if block_label in vision_title_labels:
														
 
															-            self.config["vision_title_block_idxes"].append(block_idx)
														
 
															-        if block_label in unordered_labels:
														
 
															-            self.config["unordered_block_idxes"].append(block_idx)
														
 
															-        if block_label in text_labels:
														
 
															-            self.config["text_block_idxes"].append(block_idx)
														
 
															-        if block_label in header_labels:
														
 
															-            self.config["header_block_idxes"].append(block_idx)
														
 
															-        if block_label in footer_labels:
														
 
															-            self.config["footer_block_idxes"].append(block_idx)
														
 
															+        return xycut_enhanced(self)
														
--- a/paddlex/inference/pipelines/layout_parsing/setting.py
+++ b/paddlex/inference/pipelines/layout_parsing/setting.py
@@ -12,32 +12,35 @@
 
															 # See the License for the specific language governing permissions and
														
 
															 # limitations under the License.
														
 
															-parameters_config = {
														
 
															-    "page": {},
														
 
															-    "region": {
														
 
															-        "match_block_overlap_ratio_threshold": 0.8,
														
 
															-        "split_block_overlap_ratio_threshold": 0.4,
														
 
															-    },
														
 
															-    "block": {
														
 
															-        "title_conversion_area_ratio_threshold": 0.3,  # update paragraph_title -> doc_title
														
 
															-    },
														
 
															-    "line": {
														
 
															-        "line_height_iou_threshold": 0.6,  # For line segmentation of OCR results
														
 
															-        "delimiter_map": {
														
 
															-            "doc_title": " ",
														
 
															-            "content": "\n",
														
 
															-        },
														
 
															-    },
														
 
															-    "word": {
														
 
															-        "delimiter": " ",
														
 
															+
														
 
															+XYCUT_SETTINGS = {
														
 
															+    "child_block_overlap_ratio_threshold": 0.1,
														
 
															+    "edge_distance_compare_tolerance_len": 2,
														
 
															+    "distance_weight_map": {
														
 
															+        "edge_weight": 10**4,
														
 
															+        "up_edge_weight": 1,
														
 
															+        "down_edge_weight": 0.0001,
														
 
															     },
														
 
															-    "order": {
														
 
															-        "block_label_match_iou_threshold": 0.1,
														
 
															-        "block_title_match_iou_threshold": 0.1,
														
 
															+}
														
 
															+
														
 
															+REGION_SETTINGS = {
														
 
															+    "match_block_overlap_ratio_threshold": 0.6,
														
 
															+    "split_block_overlap_ratio_threshold": 0.4,
														
 
															+}
														
 
															+
														
 
															+BLOCK_SETTINGS = {
														
 
															+    "title_conversion_area_ratio_threshold": 0.3,  # update paragraph_title -> doc_title
														
 
															+}
														
 
															+
														
 
															+LINE_SETTINGS = {
														
 
															+    "line_height_iou_threshold": 0.6,  # For line segmentation of OCR results
														
 
															+    "delimiter_map": {
														
 
															+        "doc_title": " ",
														
 
															+        "content": "\n",
														
 
															     },
														
 
															 }
														
 
															-block_label_mapping = {
														
 
															+BLOCK_LABEL_MAP = {
														
 
															     "doc_title_labels": ["doc_title"],  # 文档标题
														
 
															     "paragraph_title_labels": [
														
 
															         "paragraph_title",
														
--- a/paddlex/inference/pipelines/layout_parsing/utils.py
+++ b/paddlex/inference/pipelines/layout_parsing/utils.py
@@ -27,6 +27,7 @@ from PIL import Image
 
															 from ..components import convert_points_to_boxes
														
 
															 from ..ocr.result import OCRResult
														
 
															+from .setting import REGION_SETTINGS
														
 
															 def get_overlap_boxes_idx(src_boxes: np.ndarray, ref_boxes: np.ndarray) -> List:
														
@@ -173,7 +174,7 @@ def sorted_layout_boxes(res, w):
 
															 def calculate_projection_overlap_ratio(
														
 
															     bbox1: List[float],
														
 
															     bbox2: List[float],
														
 
															-    orientation: str = "horizontal",
														
 
															+    direction: str = "horizontal",
														
 
															     mode="union",
														
 
															 ) -> float:
														
 
															     """
														
@@ -182,13 +183,13 @@ def calculate_projection_overlap_ratio(
 
															     Args:
														
 
															         bbox1 (List[float]): First bounding box [x_min, y_min, x_max, y_max].
														
 
															         bbox2 (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
														
 
															-        orientation (str): orientation of the projection, "horizontal" or "vertical".
														
 
															+        direction (str): direction of the projection, "horizontal" or "vertical".
														
 
															     Returns:
														
 
															         float: Line overlap ratio. Returns 0 if there is no overlap.
														
 
															     """
														
 
															     start_index, end_index = 1, 3
														
 
															-    if orientation == "horizontal":
														
 
															+    if direction == "horizontal":
														
 
															         start_index, end_index = 0, 2
														
 
															     intersection_start = max(bbox1[start_index], bbox2[start_index])
														
@@ -241,8 +242,8 @@ def calculate_overlap_ratio(
 
															     inter_area = inter_width * inter_height
														
 
															-    bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
														
 
															-    bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
														
 
															+    bbox1_area = caculate_bbox_area(bbox1)
														
 
															+    bbox2_area = caculate_bbox_area(bbox2)
														
 
															     if mode == "union":
														
 
															         ref_area = bbox1_area + bbox2_area - inter_area
														
@@ -271,7 +272,7 @@ def group_boxes_into_lines(ocr_rec_res, line_height_iou_threshold):
 
															     ]
														
 
															     text_orientation = calculate_text_orientation(text_boxes)
														
 
															-    match_orientation = "vertical" if text_orientation == "horizontal" else "horizontal"
														
 
															+    match_direction = "vertical" if text_orientation == "horizontal" else "horizontal"
														
 
															     spans = list(zip(rec_boxes, rec_texts, rec_labels))
														
 
															     sort_index = 1
														
@@ -284,14 +285,14 @@ def group_boxes_into_lines(ocr_rec_res, line_height_iou_threshold):
 
															     lines = []
														
 
															     line = [spans[0]]
														
 
															-    line_region_box = spans[0][0][:]
														
 
															+    line_region_box = spans[0][0].copy()
														
 
															     # merge line
														
 
															     for span in spans[1:]:
														
 
															         rec_bbox = span[0]
														
 
															         if (
														
 
															             calculate_projection_overlap_ratio(
														
 
															-                line_region_box, rec_bbox, match_orientation, mode="small"
														
 
															+                line_region_box, rec_bbox, match_direction, mode="small"
														
 
															             )
														
 
															             >= line_height_iou_threshold
														
 
															         ):
														
@@ -301,7 +302,7 @@ def group_boxes_into_lines(ocr_rec_res, line_height_iou_threshold):
 
															         else:
														
 
															             lines.append(line)
														
 
															             line = [span]
														
 
															-            line_region_box = rec_bbox[:]
														
 
															+            line_region_box = rec_bbox.copy()
														
 
															     lines.append(line)
														
 
															     return lines, text_orientation
														
@@ -365,12 +366,31 @@ def is_english_letter(char):
 
															     return bool(re.match(r"^[A-Za-z]$", char))
														
 
															+def is_non_breaking_punctuation(char):
														
 
															+    """
														
 
															+    判断一个字符是否是不需要换行的标点符号，包括全角和半角的符号。
														
 
															+
														
 
															+    :param char: str, 单个字符
														
 
															+    :return: bool, 如果字符是不需要换行的标点符号，返回True，否则返回False
														
 
															+    """
														
 
															+    non_breaking_punctuations = {
														
 
															+        ",",  # 半角逗号
														
 
															+        "，",  # 全角逗号
														
 
															+        "、",  # 顿号
														
 
															+        ";",  # 半角分号
														
 
															+        "；",  # 全角分号
														
 
															+        ":",  # 半角冒号
														
 
															+        "：",  # 全角冒号
														
 
															+    }
														
 
															+
														
 
															+    return char in non_breaking_punctuations
														
 
															+
														
 
															+
														
 
															 def format_line(
														
 
															     line: List[List[Union[List[int], str]]],
														
 
															     block_right_coordinate: int,
														
 
															     last_line_span_limit: int = 10,
														
 
															     block_label: str = "text",
														
 
															-    # delimiter_map: Dict = {},
														
 
															 ) -> None:
														
 
															     """
														
 
															     Format a line of text spans based on layout constraints.
														
@@ -402,6 +422,7 @@ def format_line(
 
															         and not line_text.endswith("-")
														
 
															         and len(line_text) > 0
														
 
															         and not is_english_letter(line_text[-1])
														
 
															+        and not is_non_breaking_punctuation(line_text[-1])
														
 
															     ):
														
 
															         need_new_line = True
														
@@ -415,37 +436,35 @@ def format_line(
 
															     return line_text, need_new_line
														
 
															-def split_boxes_by_projection(spans: List[List[int]], orientation, offset=1e-5):
														
 
															+def split_boxes_by_projection(spans: List[List[int]], direction, offset=1e-5):
														
 
															     """
														
 
															-    Check if there is any complete containment in the x-orientation
														
 
															+    Check if there is any complete containment in the x-direction
														
 
															     between the bounding boxes and split the containing box accordingly.
														
 
															     Args:
														
 
															         spans (list of lists): Each element is a list containing an ndarray of length 4, a text string, and a label.
														
 
															-        orientation: 'horizontal' or 'vertical', indicating whether the spans are arranged horizontally or vertically.
														
 
															+        direction: 'horizontal' or 'vertical', indicating whether the spans are arranged horizontally or vertically.
														
 
															         offset (float): A small offset value to ensure that the split boxes are not too close to the original boxes.
														
 
															     Returns:
														
 
															         A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
														
 
															     """
														
 
															     def is_projection_contained(box_a, box_b, start_idx, end_idx):
														
 
															-        """Check if box_a completely contains box_b in the x-orientation."""
														
 
															+        """Check if box_a completely contains box_b in the x-direction."""
														
 
															         return box_a[start_idx] <= box_b[start_idx] and box_a[end_idx] >= box_b[end_idx]
														
 
															     new_boxes = []
														
 
															-    if orientation == "horizontal":
														
 
															+    if direction == "horizontal":
														
 
															         projection_start_index, projection_end_index = 0, 2
														
 
															     else:
														
 
															         projection_start_index, projection_end_index = 1, 3
														
 
															     for i in range(len(spans)):
														
 
															         span = spans[i]
														
 
															-        box_a, text, label = span
														
 
															         is_split = False
														
 
															-        for j in range(len(spans)):
														
 
															-            if i == j:
														
 
															-                continue
														
 
															+        for j in range(i, len(spans)):
														
 
															             box_b = spans[j][0]
														
 
															+            box_a, text, label = span
														
 
															             if is_projection_contained(
														
 
															                 box_a, box_b, projection_start_index, projection_end_index
														
 
															             ):
														
@@ -458,12 +477,13 @@ def split_boxes_by_projection(spans: List[List[int]], orientation, offset=1e-5):
 
															                         - box_a[projection_start_index]
														
 
															                     )
														
 
															                     if w > 1:
														
 
															-                        box_a[projection_end_index] = (
														
 
															+                        new_bbox = box_a.copy()
														
 
															+                        new_bbox[projection_end_index] = (
														
 
															                             box_b[projection_start_index] - offset
														
 
															                         )
														
 
															                         new_boxes.append(
														
 
															                             [
														
 
															-                                np.array(box_a),
														
 
															+                                np.array(new_bbox),
														
 
															                                 text,
														
 
															                                 label,
														
 
															                             ]
														
@@ -562,8 +582,8 @@ def _get_minbox_if_overlap_by_ratio(
 
															             The selected bounding box or None if the overlap ratio is not exceeded.
														
 
															     """
														
 
															     # Calculate the areas of both bounding boxes
														
 
															-    area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
														
 
															-    area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
														
 
															+    area1 = caculate_bbox_area(bbox1)
														
 
															+    area2 = caculate_bbox_area(bbox2)
														
 
															     # Calculate the overlap ratio using a helper function
														
 
															     overlap_ratio = calculate_overlap_ratio(bbox1, bbox2, mode="small")
														
 
															     # Check if the overlap ratio exceeds the threshold
														
@@ -683,7 +703,6 @@ def shrink_supplement_region_bbox(
 
															     image_height,
														
 
															     block_idxes_set,
														
 
															     block_bboxes,
														
 
															-    parameters_config,
														
 
															 ) -> List:
														
 
															     """
														
 
															     Shrink the supplement region bbox according to the reference region bbox and match the block bboxes.
														
@@ -695,7 +714,6 @@ def shrink_supplement_region_bbox(
 
															         image_height (int): The height of the image.
														
 
															         block_idxes_set (set): The indexes of the blocks that intersect with the region bbox.
														
 
															         block_bboxes (dict): The dictionary of block bboxes.
														
 
															-        parameters_config (dict): The configuration parameters.
														
 
															     Returns:
														
 
															         list: The new region bbox and the matched block idxes.
														
@@ -723,11 +741,11 @@ def shrink_supplement_region_bbox(
 
															             overlap_ratio = calculate_overlap_ratio(
														
 
															                 tmp_region_bbox, block_bboxes[block_idx], mode="small"
														
 
															             )
														
 
															-            if overlap_ratio > parameters_config["region"].get(
														
 
															+            if overlap_ratio > REGION_SETTINGS.get(
														
 
															                 "match_block_overlap_ratio_threshold", 0.8
														
 
															             ):
														
 
															                 iner_block_idxes.append(block_idx)
														
 
															-            elif overlap_ratio > parameters_config["region"].get(
														
 
															+            elif overlap_ratio > REGION_SETTINGS.get(
														
 
															                 "split_block_overlap_ratio_threshold", 0.4
														
 
															             ):
														
 
															                 split_block_idxes.append(block_idx)
														
@@ -755,7 +773,6 @@ def shrink_supplement_region_bbox(
 
															                         image_height,
														
 
															                         iner_block_idxes,
														
 
															                         block_bboxes,
														
 
															-                        parameters_config,
														
 
															                     )
														
 
															                     if len(iner_idxes) == 0:
														
 
															                         continue
														
@@ -799,50 +816,68 @@ def convert_formula_res_to_ocr_format(formula_res_list: List, ocr_res: dict):
 
															         ]
														
 
															         ocr_res["dt_polys"].append(poly_points)
														
 
															         ocr_res["rec_texts"].append(f"{formula_res['rec_formula']}")
														
 
															-        ocr_res["rec_boxes"] = np.vstack(
														
 
															-            (ocr_res["rec_boxes"], [formula_res["dt_polys"]])
														
 
															-        )
														
 
															+        if ocr_res["rec_boxes"].size == 0:
														
 
															+            ocr_res["rec_boxes"] = np.array(formula_res["dt_polys"])
														
 
															+        else:
														
 
															+            ocr_res["rec_boxes"] = np.vstack(
														
 
															+                (ocr_res["rec_boxes"], [formula_res["dt_polys"]])
														
 
															+            )
														
 
															         ocr_res["rec_labels"].append("formula")
														
 
															         ocr_res["rec_polys"].append(poly_points)
														
 
															         ocr_res["rec_scores"].append(1)
														
 
															 def caculate_bbox_area(bbox):
														
 
															-    x1, y1, x2, y2 = bbox
														
 
															+    x1, y1, x2, y2 = map(float, bbox)
														
 
															     area = abs((x2 - x1) * (y2 - y1))
														
 
															     return area
														
 
															-def get_show_color(label: str) -> Tuple:
														
 
															-    label_colors = {
														
 
															-        # Medium Blue (from 'titles_list')
														
 
															-        "paragraph_title": (102, 102, 255, 100),
														
 
															-        "doc_title": (255, 248, 220, 100),  # Cornsilk
														
 
															-        # Light Yellow (from 'tables_caption_list')
														
 
															-        "table_title": (255, 255, 102, 100),
														
 
															-        # Sky Blue (from 'imgs_caption_list')
														
 
															-        "figure_title": (102, 178, 255, 100),
														
 
															-        "chart_title": (221, 160, 221, 100),  # Plum
														
 
															-        "vision_footnote": (144, 238, 144, 100),  # Light Green
														
 
															-        # Deep Purple (from 'texts_list')
														
 
															-        "text": (153, 0, 76, 100),
														
 
															-        # Bright Green (from 'interequations_list')
														
 
															-        "formula": (0, 255, 0, 100),
														
 
															-        "abstract": (255, 239, 213, 100),  # Papaya Whip
														
 
															-        # Medium Green (from 'lists_list' and 'indexs_list')
														
 
															-        "content": (40, 169, 92, 100),
														
 
															-        # Neutral Gray (from 'dropped_bbox_list')
														
 
															-        "seal": (158, 158, 158, 100),
														
 
															-        # Olive Yellow (from 'tables_body_list')
														
 
															-        "table": (204, 204, 0, 100),
														
 
															-        # Bright Green (from 'imgs_body_list')
														
 
															-        "image": (153, 255, 51, 100),
														
 
															-        # Bright Green (from 'imgs_body_list')
														
 
															-        "figure": (153, 255, 51, 100),
														
 
															-        "chart": (216, 191, 216, 100),  # Thistle
														
 
															-        # Pale Yellow-Green (from 'tables_footnote_list')
														
 
															-        "reference": (229, 255, 204, 100),
														
 
															-        "algorithm": (255, 250, 240, 100),  # Floral White
														
 
															-    }
														
 
															+def get_show_color(label: str, order_label=False) -> Tuple:
														
 
															+    if order_label:
														
 
															+        label_colors = {
														
 
															+            "doc_title": (255, 248, 220, 100),  # Cornsilk
														
 
															+            "doc_title_text": (255, 239, 213, 100),
														
 
															+            "paragraph_title": (102, 102, 255, 100),
														
 
															+            "sub_paragraph_title": (102, 178, 255, 100),
														
 
															+            "vision": (153, 255, 51, 100),
														
 
															+            "vision_title": (144, 238, 144, 100),  # Light Green
														
 
															+            "vision_footnote": (144, 238, 144, 100),  # Light Green
														
 
															+            "normal_text": (153, 0, 76, 100),
														
 
															+            "cross_layout": (53, 218, 207, 100),  # Thistle
														
 
															+            "cross_reference": (221, 160, 221, 100),  # Floral White
														
 
															+        }
														
 
															+    else:
														
 
															+        label_colors = {
														
 
															+            # Medium Blue (from 'titles_list')
														
 
															+            "paragraph_title": (102, 102, 255, 100),
														
 
															+            "doc_title": (255, 248, 220, 100),  # Cornsilk
														
 
															+            # Light Yellow (from 'tables_caption_list')
														
 
															+            "table_title": (255, 255, 102, 100),
														
 
															+            # Sky Blue (from 'imgs_caption_list')
														
 
															+            "figure_title": (102, 178, 255, 100),
														
 
															+            "chart_title": (221, 160, 221, 100),  # Plum
														
 
															+            "vision_footnote": (144, 238, 144, 100),  # Light Green
														
 
															+            # Deep Purple (from 'texts_list')
														
 
															+            "text": (153, 0, 76, 100),
														
 
															+            # Bright Green (from 'interequations_list')
														
 
															+            "formula": (0, 255, 0, 100),
														
 
															+            "abstract": (255, 239, 213, 100),  # Papaya Whip
														
 
															+            # Medium Green (from 'lists_list' and 'indexs_list')
														
 
															+            "content": (40, 169, 92, 100),
														
 
															+            # Neutral Gray (from 'dropped_bbox_list')
														
 
															+            "seal": (158, 158, 158, 100),
														
 
															+            # Olive Yellow (from 'tables_body_list')
														
 
															+            "table": (204, 204, 0, 100),
														
 
															+            # Bright Green (from 'imgs_body_list')
														
 
															+            "image": (153, 255, 51, 100),
														
 
															+            # Bright Green (from 'imgs_body_list')
														
 
															+            "figure": (153, 255, 51, 100),
														
 
															+            "chart": (216, 191, 216, 100),  # Thistle
														
 
															+            # Pale Yellow-Green (from 'tables_footnote_list')
														
 
															+            "reference": (229, 255, 204, 100),
														
 
															+            # "reference_content": (229, 255, 204, 100),
														
 
															+            "algorithm": (255, 250, 240, 100),  # Floral White
														
 
															+        }
														
 
															     default_color = (158, 158, 158, 100)
														
 
															     return label_colors.get(label, default_color)
														
--- a/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py
+++ b/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py
@@ -12,11 +12,12 @@
 
															 # See the License for the specific language governing permissions and
														
 
															 # limitations under the License.
														
 
															-from typing import Dict, List, Tuple
														
 
															+from typing import List, Tuple
														
 
															 import numpy as np
														
 
															-from ..result_v2 import LayoutParsingBlock
														
 
															+from ..result_v2 import LayoutParsingBlock, LayoutParsingRegion
														
 
															+from ..setting import BLOCK_LABEL_MAP, XYCUT_SETTINGS
														
 
															 from ..utils import calculate_projection_overlap_ratio
														
@@ -26,12 +27,12 @@ def get_nearest_edge_distance(
 
															     weight: List[float] = [1.0, 1.0, 1.0, 1.0],
														
 
															 ) -> Tuple[float]:
														
 
															     """
														
 
															-    Calculate the nearest edge distance between two bounding boxes, considering orientational weights.
														
 
															+    Calculate the nearest edge distance between two bounding boxes, considering directional weights.
														
 
															     Args:
														
 
															         bbox1 (list): The bounding box coordinates [x1, y1, x2, y2] of the input object.
														
 
															         bbox2 (list): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
														
 
															-        weight (list, optional): orientational weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
														
 
															+        weight (list, optional): directional weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
														
 
															     Returns:
														
 
															         float: The calculated minimum edge distance between the bounding boxes.
														
@@ -254,8 +255,7 @@ def recursive_xy_cut(
 
															 def reference_insert(
														
 
															     block: LayoutParsingBlock,
														
 
															     sorted_blocks: List[LayoutParsingBlock],
														
 
															-    config: Dict,
														
 
															-    median_width: float = 0.0,
														
 
															+    **kwargs,
														
 
															 ):
														
 
															     """
														
 
															     Insert reference block into sorted blocks based on the distance between the block and the nearest sorted block.
														
@@ -285,8 +285,7 @@ def reference_insert(
 
															 def manhattan_insert(
														
 
															     block: LayoutParsingBlock,
														
 
															     sorted_blocks: List[LayoutParsingBlock],
														
 
															-    config: Dict,
														
 
															-    median_width: float = 0.0,
														
 
															+    **kwargs,
														
 
															 ):
														
 
															     """
														
 
															     Insert a block into a sorted list of blocks based on the Manhattan distance between the block and the nearest sorted block.
														
@@ -315,8 +314,7 @@ def manhattan_insert(
 
															 def weighted_distance_insert(
														
 
															     block: LayoutParsingBlock,
														
 
															     sorted_blocks: List[LayoutParsingBlock],
														
 
															-    config: Dict,
														
 
															-    median_width: float = 0.0,
														
 
															+    region: LayoutParsingRegion,
														
 
															 ):
														
 
															     """
														
 
															     Insert a block into a sorted list of blocks based on the weighted distance between the block and the nearest sorted block.
														
@@ -330,11 +328,8 @@ def weighted_distance_insert(
 
															     Returns:
														
 
															         sorted_blocks: The updated sorted blocks after insertion.
														
 
															     """
														
 
															-    doc_title_labels = config.get("doc_title_labels", [])
														
 
															-    paragraph_title_labels = config.get("paragraph_title_labels", [])
														
 
															-    vision_labels = config.get("vision_labels", [])
														
 
															-    xy_cut_block_labels = config.get("xy_cut_block_labels", [])
														
 
															-    tolerance_len = config.get("tolerance_len", 2)
														
 
															+
														
 
															+    tolerance_len = XYCUT_SETTINGS["edge_distance_compare_tolerance_len"]
														
 
															     x1, y1, x2, y2 = block.bbox
														
 
															     min_weighted_distance, min_edge_distance, min_up_edge_distance = (
														
 
															         float("inf"),
														
@@ -347,36 +342,43 @@ def weighted_distance_insert(
 
															         x1_prime, y1_prime, x2_prime, y2_prime = sorted_block.bbox
														
 
															         # Calculate edge distance
														
 
															-        weight = _get_weights(block.order_label, block.orientation)
														
 
															+        weight = _get_weights(block.order_label, block.direction)
														
 
															         edge_distance = get_nearest_edge_distance(block.bbox, sorted_block.bbox, weight)
														
 
															-        if block.label in doc_title_labels:
														
 
															-            disperse = max(1, median_width)
														
 
															+        if block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
														
 
															+            disperse = max(1, region.text_line_width)
														
 
															             tolerance_len = max(tolerance_len, disperse)
														
 
															         if block.label == "abstract":
														
 
															             tolerance_len *= 2
														
 
															             edge_distance = max(0.1, edge_distance) * 10
														
 
															         # Calculate up edge distances
														
 
															-        up_edge_distance = y1_prime
														
 
															-        left_edge_distance = x1_prime
														
 
															+        up_edge_distance = y1_prime if region.direction == "horizontal" else -x2_prime
														
 
															+        left_edge_distance = x1_prime if region.direction == "horizontal" else y1_prime
														
 
															+        is_below_sorted_block = (
														
 
															+            y2_prime < y1 if region.direction == "horizontal" else x1_prime > x2
														
 
															+        )
														
 
															+
														
 
															         if (
														
 
															-            block.label in xy_cut_block_labels
														
 
															-            or block.label in doc_title_labels
														
 
															-            or block.label in paragraph_title_labels
														
 
															-            or block.label in vision_labels
														
 
															-        ) and y1 > y2_prime:
														
 
															-            up_edge_distance = -y2_prime
														
 
															-            left_edge_distance = -x2_prime
														
 
															+            block.label not in BLOCK_LABEL_MAP["unordered_labels"]
														
 
															+            or block.label in BLOCK_LABEL_MAP["doc_title_labels"]
														
 
															+            or block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]
														
 
															+            or block.label in BLOCK_LABEL_MAP["vision_labels"]
														
 
															+        ) and is_below_sorted_block:
														
 
															+            up_edge_distance = -up_edge_distance
														
 
															+            left_edge_distance = -left_edge_distance
														
 
															         if abs(min_up_edge_distance - up_edge_distance) <= tolerance_len:
														
 
															             up_edge_distance = min_up_edge_distance
														
 
															         # Calculate weighted distance
														
 
															         weighted_distance = (
														
 
															-            +edge_distance * config.get("edge_weight", 10**4)
														
 
															-            + up_edge_distance * config.get("up_edge_weight", 1)
														
 
															-            + left_edge_distance * config.get("left_edge_weight", 0.0001)
														
 
															+            +edge_distance
														
 
															+            * XYCUT_SETTINGS["distance_weight_map"].get("edge_weight", 10**4)
														
 
															+            + up_edge_distance
														
 
															+            * XYCUT_SETTINGS["distance_weight_map"].get("up_edge_weight", 1)
														
 
															+            + left_edge_distance
														
 
															+            * XYCUT_SETTINGS["distance_weight_map"].get("left_edge_weight", 0.0001)
														
 
															         )
														
 
															         min_edge_distance = min(edge_distance, min_edge_distance)
														
@@ -411,7 +413,7 @@ def insert_child_blocks(
 
															     if block.child_blocks:
														
 
															         sub_blocks = block.get_child_blocks()
														
 
															         sub_blocks.append(block)
														
 
															-        sub_blocks = sort_child_blocks(sub_blocks, block.orientation)
														
 
															+        sub_blocks = sort_child_blocks(sub_blocks, block.direction)
														
 
															         sorted_blocks[block_idx] = sub_blocks[0]
														
 
															         for block in sub_blocks[1:]:
														
 
															             block_idx += 1
														
@@ -419,17 +421,17 @@ def insert_child_blocks(
 
															     return sorted_blocks
														
 
															-def sort_child_blocks(blocks, orientation="horizontal") -> List[LayoutParsingBlock]:
														
 
															+def sort_child_blocks(blocks, direction="horizontal") -> List[LayoutParsingBlock]:
														
 
															     """
														
 
															     Sort child blocks based on their bounding box coordinates.
														
 
															     Args:
														
 
															         blocks: A list of LayoutParsingBlock objects representing the child blocks.
														
 
															-        orientation: Orientation of the blocks ('horizontal' or 'vertical'). Default is 'horizontal'.
														
 
															+        direction: direction of the blocks ('horizontal' or 'vertical'). Default is 'horizontal'.
														
 
															     Returns:
														
 
															         sorted_blocks: A sorted list of LayoutParsingBlock objects.
														
 
															     """
														
 
															-    if orientation == "horizontal":
														
 
															+    if direction == "horizontal":
														
 
															         # from top to bottom
														
 
															         blocks.sort(
														
 
															             key=lambda x: (
														
@@ -453,7 +455,7 @@ def sort_child_blocks(blocks, orientation="horizontal") -> List[LayoutParsingBlo
 
															 def _get_weights(label, dircetion="horizontal"):
														
 
															-    """Define weights based on the label and orientation."""
														
 
															+    """Define weights based on the label and direction."""
														
 
															     if label == "doc_title":
														
 
															         return (
														
 
															             [1, 0.1, 0.1, 1] if dircetion == "horizontal" else [0.2, 0.1, 1, 1]
														
@@ -518,15 +520,35 @@ def sort_blocks(blocks, median_width=None, reverse=False):
 
															     return blocks
														
 
															+def sort_normal_blocks(blocks, text_line_height, text_line_width, region_direction):
														
 
															+    if region_direction == "horizontal":
														
 
															+        blocks.sort(
														
 
															+            key=lambda x: (
														
 
															+                x.bbox[1] // text_line_height,
														
 
															+                x.bbox[0] // text_line_width,
														
 
															+                x.bbox[1] ** 2 + x.bbox[0] ** 2,
														
 
															+            ),
														
 
															+        )
														
 
															+    else:
														
 
															+        blocks.sort(
														
 
															+            key=lambda x: (
														
 
															+                -x.bbox[0] // text_line_width,
														
 
															+                x.bbox[1] // text_line_height,
														
 
															+                -(x.bbox[2] ** 2 + x.bbox[1] ** 2),
														
 
															+            ),
														
 
															+        )
														
 
															+    return blocks
														
 
															+
														
 
															+
														
 
															 def get_cut_blocks(
														
 
															-    blocks, cut_orientation, cut_coordinates, overall_region_box, mask_labels=[]
														
 
															+    blocks, cut_direction, cut_coordinates, overall_region_box, mask_labels=[]
														
 
															 ):
														
 
															     """
														
 
															-    Cut blocks based on the given cut orientation and coordinates.
														
 
															+    Cut blocks based on the given cut direction and coordinates.
														
 
															     Args:
														
 
															         blocks (list): list of blocks to be cut.
														
 
															-        cut_orientation (str): cut orientation, either "horizontal" or "vertical".
														
 
															+        cut_direction (str): cut direction, either "horizontal" or "vertical".
														
 
															         cut_coordinates (list): list of cut coordinates.
														
 
															         overall_region_box (list): the overall region box that contains all blocks.
														
@@ -537,7 +559,7 @@ def get_cut_blocks(
 
															     # filter out mask blocks,including header, footer, unordered and child_blocks
														
 
															     # 0: horizontal, 1: vertical
														
 
															-    cut_aixis = 0 if cut_orientation == "horizontal" else 1
														
 
															+    cut_aixis = 0 if cut_direction == "horizontal" else 1
														
 
															     blocks.sort(key=lambda x: x.bbox[cut_aixis + 2])
														
 
															     cut_coordinates.append(float("inf"))
														
@@ -567,7 +589,7 @@ def add_split_block(
 
															 ) -> List[LayoutParsingBlock]:
														
 
															     block_bboxes = np.array([block.bbox for block in blocks])
														
 
															     discontinuous = calculate_discontinuous_projection(
														
 
															-        block_bboxes, orientation="vertical"
														
 
															+        block_bboxes, direction="vertical"
														
 
															     )
														
 
															     current_interval = discontinuous[0]
														
 
															     for interval in discontinuous[1:]:
														
@@ -582,22 +604,62 @@ def add_split_block(
 
															         current_interval = interval
														
 
															-def get_adjacent_blocks_by_orientation(
														
 
															+def get_nearest_blocks(
														
 
															+    block: LayoutParsingBlock,
														
 
															+    ref_blocks: List[LayoutParsingBlock],
														
 
															+    overlap_threshold,
														
 
															+    direction="horizontal",
														
 
															+) -> List:
														
 
															+    """
														
 
															+    Get the adjacent blocks with the same direction as the current block.
														
 
															+    Args:
														
 
															+        block (LayoutParsingBlock): The current block.
														
 
															+        blocks (List[LayoutParsingBlock]): A list of all blocks.
														
 
															+        ref_block_idxes (List[int]): A list of indices of reference blocks.
														
 
															+        iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
														
 
															+    Returns:
														
 
															+        Int: The index of the previous block with same direction.
														
 
															+        Int: The index of the following block with same direction.
														
 
															+    """
														
 
															+    prev_blocks: List[LayoutParsingBlock] = []
														
 
															+    post_blocks: List[LayoutParsingBlock] = []
														
 
															+    sort_index = 1 if direction == "horizontal" else 0
														
 
															+    for ref_block in ref_blocks:
														
 
															+        if ref_block.index == block.index:
														
 
															+            continue
														
 
															+        overlap_ratio = calculate_projection_overlap_ratio(
														
 
															+            block.bbox, ref_block.bbox, direction, mode="small"
														
 
															+        )
														
 
															+        if overlap_ratio > overlap_threshold:
														
 
															+            if ref_block.bbox[sort_index] <= block.bbox[sort_index]:
														
 
															+                prev_blocks.append(ref_block)
														
 
															+            else:
														
 
															+                post_blocks.append(ref_block)
														
 
															+
														
 
															+    if prev_blocks:
														
 
															+        prev_blocks.sort(key=lambda x: x.bbox[sort_index], reverse=True)
														
 
															+    if post_blocks:
														
 
															+        post_blocks.sort(key=lambda x: x.bbox[sort_index])
														
 
															+
														
 
															+    return prev_blocks, post_blocks
														
 
															+
														
 
															+
														
 
															+def get_adjacent_blocks_by_direction(
														
 
															     blocks: List[LayoutParsingBlock],
														
 
															     block_idx: int,
														
 
															     ref_block_idxes: List[int],
														
 
															     iou_threshold,
														
 
															 ) -> List:
														
 
															     """
														
 
															-    Get the adjacent blocks with the same orientation as the current block.
														
 
															+    Get the adjacent blocks with the same direction as the current block.
														
 
															     Args:
														
 
															         block (LayoutParsingBlock): The current block.
														
 
															         blocks (List[LayoutParsingBlock]): A list of all blocks.
														
 
															         ref_block_idxes (List[int]): A list of indices of reference blocks.
														
 
															         iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
														
 
															     Returns:
														
 
															-        Int: The index of the previous block with same orientation.
														
 
															-        Int: The index of the following block with same orientation.
														
 
															+        Int: The index of the previous block with same direction.
														
 
															+        Int: The index of the following block with same direction.
														
 
															     """
														
 
															     min_prev_block_distance = float("inf")
														
 
															     prev_block_index = None
														
@@ -611,16 +673,16 @@ def get_adjacent_blocks_by_orientation(
 
															         "vision_title",
														
 
															     ]
														
 
															-    # find the nearest text block with same orientation to the current block
														
 
															+    # find the nearest text block with same direction to the current block
														
 
															     for ref_block_idx in ref_block_idxes:
														
 
															         ref_block = blocks[ref_block_idx]
														
 
															-        ref_block_orientation = ref_block.orientation
														
 
															+        ref_block_direction = ref_block.direction
														
 
															         if ref_block.order_label in child_labels:
														
 
															             continue
														
 
															         match_block_iou = calculate_projection_overlap_ratio(
														
 
															             block.bbox,
														
 
															             ref_block.bbox,
														
 
															-            ref_block_orientation,
														
 
															+            ref_block_direction,
														
 
															         )
														
 
															         child_match_distance_tolerance_len = block.short_side_length / 10
														
@@ -635,38 +697,38 @@ def get_adjacent_blocks_by_orientation(
 
															         if match_block_iou >= iou_threshold:
														
 
															             prev_distance = (
														
 
															-                block.secondary_orientation_start_coordinate
														
 
															-                - ref_block.secondary_orientation_end_coordinate
														
 
															+                block.secondary_direction_start_coordinate
														
 
															+                - ref_block.secondary_direction_end_coordinate
														
 
															                 + child_match_distance_tolerance_len
														
 
															             ) // 5 + ref_block.start_coordinate / 5000
														
 
															             next_distance = (
														
 
															-                ref_block.secondary_orientation_start_coordinate
														
 
															-                - block.secondary_orientation_end_coordinate
														
 
															+                ref_block.secondary_direction_start_coordinate
														
 
															+                - block.secondary_direction_end_coordinate
														
 
															                 + child_match_distance_tolerance_len
														
 
															             ) // 5 + ref_block.start_coordinate / 5000
														
 
															             if (
														
 
															-                ref_block.secondary_orientation_end_coordinate
														
 
															-                <= block.secondary_orientation_start_coordinate
														
 
															+                ref_block.secondary_direction_end_coordinate
														
 
															+                <= block.secondary_direction_start_coordinate
														
 
															                 + child_match_distance_tolerance_len
														
 
															                 and prev_distance < min_prev_block_distance
														
 
															             ):
														
 
															                 min_prev_block_distance = prev_distance
														
 
															                 if (
														
 
															-                    block.secondary_orientation_start_coordinate
														
 
															-                    - ref_block.secondary_orientation_end_coordinate
														
 
															+                    block.secondary_direction_start_coordinate
														
 
															+                    - ref_block.secondary_direction_end_coordinate
														
 
															                     < gap_tolerance_len
														
 
															                 ):
														
 
															                     prev_block_index = ref_block_idx
														
 
															             elif (
														
 
															-                ref_block.secondary_orientation_start_coordinate
														
 
															-                > block.secondary_orientation_end_coordinate
														
 
															+                ref_block.secondary_direction_start_coordinate
														
 
															+                > block.secondary_direction_end_coordinate
														
 
															                 - child_match_distance_tolerance_len
														
 
															                 and next_distance < min_post_block_distance
														
 
															             ):
														
 
															                 min_post_block_distance = next_distance
														
 
															                 if (
														
 
															-                    ref_block.secondary_orientation_start_coordinate
														
 
															-                    - block.secondary_orientation_end_coordinate
														
 
															+                    ref_block.secondary_direction_start_coordinate
														
 
															+                    - block.secondary_direction_end_coordinate
														
 
															                     < gap_tolerance_len
														
 
															                 ):
														
 
															                     post_block_index = ref_block_idx
														
@@ -684,21 +746,19 @@ def get_adjacent_blocks_by_orientation(
 
															 def update_doc_title_child_blocks(
														
 
															-    blocks: List[LayoutParsingBlock],
														
 
															     block: LayoutParsingBlock,
														
 
															-    prev_idx: int,
														
 
															-    post_idx: int,
														
 
															-    config: dict,
														
 
															+    region: LayoutParsingRegion,
														
 
															 ) -> None:
														
 
															     """
														
 
															     Update the child blocks of a document title block.
														
 
															     The child blocks need to meet the following conditions:
														
 
															         1. They must be adjacent
														
 
															-        2. They must have the same orientation as the parent block.
														
 
															+        2. They must have the same direction as the parent block.
														
 
															         3. Their short side length should be less than 80% of the parent's short side length.
														
 
															         4. Their long side length should be less than 150% of the parent's long side length.
														
 
															         5. The child block must be text block.
														
 
															+        6. The nearest edge distance should be less than 2 times of the text line height.
														
 
															     Args:
														
 
															         blocks (List[LayoutParsingBlock]): overall blocks.
														
@@ -711,11 +771,23 @@ def update_doc_title_child_blocks(
 
															         None
														
 
															     """
														
 
															-    for idx in [prev_idx, post_idx]:
														
 
															-        if idx is None:
														
 
															+    ref_blocks = [region.block_map[idx] for idx in region.normal_text_block_idxes]
														
 
															+    overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
														
 
															+    prev_blocks, post_blocks = get_nearest_blocks(
														
 
															+        block, ref_blocks, overlap_threshold, block.direction
														
 
															+    )
														
 
															+    prev_block = None
														
 
															+    post_block = None
														
 
															+
														
 
															+    if prev_blocks:
														
 
															+        prev_block = prev_blocks[0]
														
 
															+    if post_blocks:
														
 
															+        post_block = post_blocks[0]
														
 
															+
														
 
															+    for ref_block in [prev_block, post_block]:
														
 
															+        if ref_block is None:
														
 
															             continue
														
 
															-        ref_block = blocks[idx]
														
 
															-        with_seem_orientation = ref_block.orientation == block.orientation
														
 
															+        with_seem_direction = ref_block.direction == block.direction
														
 
															         short_side_length_condition = (
														
 
															             ref_block.short_side_length < block.short_side_length * 0.8
														
@@ -726,30 +798,31 @@ def update_doc_title_child_blocks(
 
															             or ref_block.long_side_length > 1.5 * block.long_side_length
														
 
															         )
														
 
															+        nearest_edge_distance = get_nearest_edge_distance(block.bbox, ref_block.bbox)
														
 
															+
														
 
															         if (
														
 
															-            with_seem_orientation
														
 
															+            with_seem_direction
														
 
															+            and ref_block.label in BLOCK_LABEL_MAP["text_labels"]
														
 
															             and short_side_length_condition
														
 
															             and long_side_length_condition
														
 
															             and ref_block.num_of_lines < 3
														
 
															+            and nearest_edge_distance < ref_block.text_line_height * 2
														
 
															         ):
														
 
															             ref_block.order_label = "doc_title_text"
														
 
															             block.append_child_block(ref_block)
														
 
															-            config["text_block_idxes"].remove(idx)
														
 
															+            region.normal_text_block_idxes.remove(ref_block.index)
														
 
															 def update_paragraph_title_child_blocks(
														
 
															-    blocks: List[LayoutParsingBlock],
														
 
															     block: LayoutParsingBlock,
														
 
															-    prev_idx: int,
														
 
															-    post_idx: int,
														
 
															-    config: dict,
														
 
															+    region: LayoutParsingRegion,
														
 
															 ) -> None:
														
 
															     """
														
 
															     Update the child blocks of a paragraph title block.
														
 
															     The child blocks need to meet the following conditions:
														
 
															         1. They must be adjacent
														
 
															-        2. They must have the same orientation as the parent block.
														
 
															+        2. They must have the same direction as the parent block.
														
 
															         3. The child block must be paragraph title block.
														
 
															     Args:
														
@@ -763,31 +836,39 @@ def update_paragraph_title_child_blocks(
 
															         None
														
 
															     """
														
 
															-    paragraph_title_labels = config.get("paragraph_title_labels", [])
														
 
															-    for idx in [prev_idx, post_idx]:
														
 
															-        if idx is None:
														
 
															-            continue
														
 
															-        ref_block = blocks[idx]
														
 
															-        min_height = min(block.height, ref_block.height)
														
 
															-        nearest_edge_distance = get_nearest_edge_distance(block.bbox, ref_block.bbox)
														
 
															-        with_seem_orientation = ref_block.orientation == block.orientation
														
 
															-        if (
														
 
															-            with_seem_orientation
														
 
															-            and ref_block.label in paragraph_title_labels
														
 
															-            and nearest_edge_distance <= min_height * 2
														
 
															-        ):
														
 
															-            ref_block.order_label = "sub_paragraph_title"
														
 
															-            block.append_child_block(ref_block)
														
 
															-            config["paragraph_title_block_idxes"].remove(idx)
														
 
															+    if block.order_label == "sub_paragraph_title":
														
 
															+        return
														
 
															+    ref_blocks = [
														
 
															+        region.block_map[idx]
														
 
															+        for idx in region.paragraph_title_block_idxes + region.normal_text_block_idxes
														
 
															+    ]
														
 
															+    overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
														
 
															+    prev_blocks, post_blocks = get_nearest_blocks(
														
 
															+        block, ref_blocks, overlap_threshold, block.direction
														
 
															+    )
														
 
															+    for ref_blocks in [prev_blocks, post_blocks]:
														
 
															+        for ref_block in ref_blocks:
														
 
															+            if ref_block.label not in BLOCK_LABEL_MAP["paragraph_title_labels"]:
														
 
															+                break
														
 
															+            min_text_line_height = min(
														
 
															+                block.text_line_height, ref_block.text_line_height
														
 
															+            )
														
 
															+            nearest_edge_distance = get_nearest_edge_distance(
														
 
															+                block.bbox, ref_block.bbox
														
 
															+            )
														
 
															+            with_seem_direction = ref_block.direction == block.direction
														
 
															+            if (
														
 
															+                with_seem_direction
														
 
															+                and nearest_edge_distance <= min_text_line_height * 1.5
														
 
															+            ):
														
 
															+                ref_block.order_label = "sub_paragraph_title"
														
 
															+                block.append_child_block(ref_block)
														
 
															+                region.paragraph_title_block_idxes.remove(ref_block.index)
														
 
															 def update_vision_child_blocks(
														
 
															-    blocks: List[LayoutParsingBlock],
														
 
															     block: LayoutParsingBlock,
														
 
															-    ref_block_idxes: List[int],
														
 
															-    prev_idx: int,
														
 
															-    post_idx: int,
														
 
															-    config: dict,
														
 
															+    region: LayoutParsingRegion,
														
 
															 ) -> None:
														
 
															     """
														
 
															     Update the child blocks of a paragraph title block.
														
@@ -816,69 +897,122 @@ def update_vision_child_blocks(
 
															         None
														
 
															     """
														
 
															-    vision_title_labels = config.get("vision_title_labels", [])
														
 
															-    text_labels = config.get("text_labels", [])
														
 
															-    for idx in [prev_idx, post_idx]:
														
 
															-        if idx is None:
														
 
															-            continue
														
 
															-        ref_block = blocks[idx]
														
 
															-        nearest_edge_distance = get_nearest_edge_distance(block.bbox, ref_block.bbox)
														
 
															-        block_center = block.get_centroid()
														
 
															-        ref_block_center = ref_block.get_centroid()
														
 
															-        if ref_block.label in vision_title_labels and nearest_edge_distance <= min(
														
 
															-            block.height * 0.5, ref_block.height * 2
														
 
															-        ):
														
 
															-            ref_block.order_label = "vision_title"
														
 
															-            block.append_child_block(ref_block)
														
 
															-            config["vision_title_block_idxes"].remove(idx)
														
 
															-        elif (
														
 
															-            nearest_edge_distance <= 15
														
 
															-            and ref_block.short_side_length < block.short_side_length
														
 
															-            and ref_block.long_side_length < 0.5 * block.long_side_length
														
 
															-            and ref_block.orientation == block.orientation
														
 
															-            and (
														
 
															-                abs(block_center[0] - ref_block_center[0]) < 10
														
 
															-                or (
														
 
															-                    block.bbox[0] - ref_block.bbox[0] < 10
														
 
															-                    and ref_block.num_of_lines == 1
														
 
															-                )
														
 
															-                or (
														
 
															-                    block.bbox[2] - ref_block.bbox[2] < 10
														
 
															-                    and ref_block.num_of_lines == 1
														
 
															-                )
														
 
															+    ref_blocks = [
														
 
															+        region.block_map[idx]
														
 
															+        for idx in region.normal_text_block_idxes + region.vision_title_block_idxes
														
 
															+    ]
														
 
															+    overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
														
 
															+    has_vision_footnote = False
														
 
															+    has_vision_title = False
														
 
															+    for direction in [block.direction, block.secondary_direction]:
														
 
															+        prev_blocks, post_blocks = get_nearest_blocks(
														
 
															+            block, ref_blocks, overlap_threshold, direction
														
 
															+        )
														
 
															+        for ref_block in prev_blocks:
														
 
															+            if (
														
 
															+                ref_block.label
														
 
															+                not in BLOCK_LABEL_MAP["text_labels"]
														
 
															+                + BLOCK_LABEL_MAP["vision_title_labels"]
														
 
															+            ):
														
 
															+                break
														
 
															+            nearest_edge_distance = get_nearest_edge_distance(
														
 
															+                block.bbox, ref_block.bbox
														
 
															             )
														
 
															-        ):
														
 
															-            has_vision_footnote = False
														
 
															-            if len(block.child_blocks) > 0:
														
 
															-                for child_block in block.child_blocks:
														
 
															-                    if child_block.label in text_labels:
														
 
															-                        has_vision_footnote = True
														
 
															-            if not has_vision_footnote:
														
 
															-                ref_block.order_label = "vision_footnote"
														
 
															+            block_center = block.get_centroid()
														
 
															+            ref_block_center = ref_block.get_centroid()
														
 
															+            if ref_block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
														
 
															+                has_vision_title = True
														
 
															+                ref_block.order_label = "vision_title"
														
 
															                 block.append_child_block(ref_block)
														
 
															-                config["text_block_idxes"].remove(idx)
														
 
															+                region.vision_title_block_idxes.remove(ref_block.index)
														
 
															+            if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
														
 
															+                if (
														
 
															+                    not has_vision_footnote
														
 
															+                    and nearest_edge_distance <= block.text_line_height * 2
														
 
															+                    and ref_block.short_side_length < block.short_side_length
														
 
															+                    and ref_block.long_side_length < 0.5 * block.long_side_length
														
 
															+                    and ref_block.direction == block.direction
														
 
															+                    and (
														
 
															+                        abs(block_center[0] - ref_block_center[0]) < 10
														
 
															+                        or (
														
 
															+                            block.bbox[0] - ref_block.bbox[0] < 10
														
 
															+                            and ref_block.num_of_lines == 1
														
 
															+                        )
														
 
															+                        or (
														
 
															+                            block.bbox[2] - ref_block.bbox[2] < 10
														
 
															+                            and ref_block.num_of_lines == 1
														
 
															+                        )
														
 
															+                    )
														
 
															+                ):
														
 
															+                    has_vision_footnote = True
														
 
															+                    ref_block.order_label = "vision_footnote"
														
 
															+                    block.append_child_block(ref_block)
														
 
															+                    region.normal_text_block_idxes.remove(ref_block.index)
														
 
															+                break
														
 
															+        for ref_block in post_blocks:
														
 
															+            if (
														
 
															+                has_vision_footnote
														
 
															+                and ref_block.label in BLOCK_LABEL_MAP["text_labels"]
														
 
															+            ):
														
 
															+                break
														
 
															+            nearest_edge_distance = get_nearest_edge_distance(
														
 
															+                block.bbox, ref_block.bbox
														
 
															+            )
														
 
															+            block_center = block.get_centroid()
														
 
															+            ref_block_center = ref_block.get_centroid()
														
 
															+            if ref_block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
														
 
															+                has_vision_title = True
														
 
															+                ref_block.order_label = "vision_title"
														
 
															+                block.append_child_block(ref_block)
														
 
															+                region.vision_title_block_idxes.remove(ref_block.index)
														
 
															+            if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
														
 
															+                if (
														
 
															+                    not has_vision_footnote
														
 
															+                    and nearest_edge_distance <= block.text_line_height * 2
														
 
															+                    and ref_block.short_side_length < block.short_side_length
														
 
															+                    and ref_block.long_side_length < 0.5 * block.long_side_length
														
 
															+                    and ref_block.direction == block.direction
														
 
															+                    and (
														
 
															+                        abs(block_center[0] - ref_block_center[0]) < 10
														
 
															+                        or (
														
 
															+                            block.bbox[0] - ref_block.bbox[0] < 10
														
 
															+                            and ref_block.num_of_lines == 1
														
 
															+                        )
														
 
															+                        or (
														
 
															+                            block.bbox[2] - ref_block.bbox[2] < 10
														
 
															+                            and ref_block.num_of_lines == 1
														
 
															+                        )
														
 
															+                    )
														
 
															+                ):
														
 
															+                    has_vision_footnote = True
														
 
															+                    ref_block.order_label = "vision_footnote"
														
 
															+                    block.append_child_block(ref_block)
														
 
															+                    region.normal_text_block_idxes.remove(ref_block.index)
														
 
															+                break
														
 
															+        if has_vision_title:
														
 
															+            break
														
 
															 def calculate_discontinuous_projection(
														
 
															-    boxes, orientation="horizontal", return_num=False
														
 
															+    boxes, direction="horizontal", return_num=False
														
 
															 ) -> List:
														
 
															     """
														
 
															-    Calculate the discontinuous projection of boxes along the specified orientation.
														
 
															+    Calculate the discontinuous projection of boxes along the specified direction.
														
 
															     Args:
														
 
															         boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
														
 
															-        orientation (str): orientation along which to perform the projection ('horizontal' or 'vertical').
														
 
															+        direction (str): direction along which to perform the projection ('horizontal' or 'vertical').
														
 
															     Returns:
														
 
															         list: List of tuples representing the merged intervals.
														
 
															     """
														
 
															     boxes = np.array(boxes)
														
 
															-    if orientation == "horizontal":
														
 
															+    if direction == "horizontal":
														
 
															         intervals = boxes[:, [0, 2]]
														
 
															-    elif orientation == "vertical":
														
 
															+    elif direction == "vertical":
														
 
															         intervals = boxes[:, [1, 3]]
														
 
															     else:
														
 
															-        raise ValueError("orientation must be 'horizontal' or 'vertical'")
														
 
															+        raise ValueError("direction must be 'horizontal' or 'vertical'")
														
 
															     intervals = intervals[np.argsort(intervals[:, 0])]
														
@@ -904,15 +1038,53 @@ def calculate_discontinuous_projection(
 
															     return merged_intervals
														
 
															+def is_projection_consistent(blocks, intervals, direction="horizontal"):
														
 
															+
														
 
															+    for interval in intervals:
														
 
															+        if direction == "horizontal":
														
 
															+            start_index, stop_index = 0, 2
														
 
															+            interval_box = [interval[0], 0, interval[1], 1]
														
 
															+        else:
														
 
															+            start_index, stop_index = 1, 3
														
 
															+            interval_box = [0, interval[0], 1, interval[1]]
														
 
															+        same_interval_bboxes = []
														
 
															+        for block in blocks:
														
 
															+            overlap_ratio = calculate_projection_overlap_ratio(
														
 
															+                interval_box, block.bbox, direction=direction
														
 
															+            )
														
 
															+            if overlap_ratio > 0 and block.label in BLOCK_LABEL_MAP["text_labels"]:
														
 
															+                same_interval_bboxes.append(block.bbox)
														
 
															+        start_coordinates = [bbox[start_index] for bbox in same_interval_bboxes]
														
 
															+        if start_coordinates:
														
 
															+            min_start_coordinate = min(start_coordinates)
														
 
															+            max_start_coordinate = max(start_coordinates)
														
 
															+            is_start_consistent = (
														
 
															+                False
														
 
															+                if max_start_coordinate - min_start_coordinate
														
 
															+                >= abs(interval[0] - interval[1]) * 0.05
														
 
															+                else True
														
 
															+            )
														
 
															+            stop_coordinates = [bbox[stop_index] for bbox in same_interval_bboxes]
														
 
															+            min_stop_coordinate = min(stop_coordinates)
														
 
															+            max_stop_coordinate = max(stop_coordinates)
														
 
															+            if (
														
 
															+                max_stop_coordinate - min_stop_coordinate
														
 
															+                >= abs(interval[0] - interval[1]) * 0.05
														
 
															+                and is_start_consistent
														
 
															+            ):
														
 
															+                return False
														
 
															+    return True
														
 
															+
														
 
															+
														
 
															 def shrink_overlapping_boxes(
														
 
															-    boxes, orientation="horizontal", min_threshold=0, max_threshold=0.1
														
 
															+    boxes, direction="horizontal", min_threshold=0, max_threshold=0.1
														
 
															 ) -> List:
														
 
															     """
														
 
															-    Shrink overlapping boxes along the specified orientation.
														
 
															+    Shrink overlapping boxes along the specified direction.
														
 
															     Args:
														
 
															         boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
														
 
															-        orientation (str): orientation along which to perform the shrinking ('horizontal' or 'vertical').
														
 
															+        direction (str): direction along which to perform the shrinking ('horizontal' or 'vertical').
														
 
															         min_threshold (float): Minimum threshold for shrinking. Default is 0.
														
 
															         max_threshold (float): Maximum threshold for shrinking. Default is 0.2.
														
@@ -924,14 +1096,14 @@ def shrink_overlapping_boxes(
 
															         x1, y1, x2, y2 = current_block.bbox
														
 
															         x1_prime, y1_prime, x2_prime, y2_prime = block.bbox
														
 
															         cut_iou = calculate_projection_overlap_ratio(
														
 
															-            current_block.bbox, block.bbox, orientation=orientation
														
 
															+            current_block.bbox, block.bbox, direction=direction
														
 
															         )
														
 
															         match_iou = calculate_projection_overlap_ratio(
														
 
															             current_block.bbox,
														
 
															             block.bbox,
														
 
															-            orientation="horizontal" if orientation == "vertical" else "vertical",
														
 
															+            direction="horizontal" if direction == "vertical" else "vertical",
														
 
															         )
														
 
															-        if orientation == "vertical":
														
 
															+        if direction == "vertical":
														
 
															             if (
														
 
															                 (match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
														
 
															                 or y2 == y1_prime
														
--- a/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py
+++ b/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py
@@ -12,24 +12,25 @@
 
															 # See the License for the specific language governing permissions and
														
 
															 # limitations under the License.
														
 
															-from typing import Any, Dict, List, Tuple
														
 
															+from typing import Dict, List, Tuple
														
 
															 import numpy as np
														
 
															-from ..result_v2 import LayoutParsingBlock
														
 
															+from ..result_v2 import LayoutParsingBlock, LayoutParsingRegion
														
 
															+from ..setting import BLOCK_LABEL_MAP
														
 
															 from ..utils import calculate_overlap_ratio, calculate_projection_overlap_ratio
														
 
															 from .utils import (
														
 
															     calculate_discontinuous_projection,
														
 
															-    get_adjacent_blocks_by_orientation,
														
 
															     get_cut_blocks,
														
 
															     get_nearest_edge_distance,
														
 
															     insert_child_blocks,
														
 
															+    is_projection_consistent,
														
 
															     manhattan_insert,
														
 
															     recursive_xy_cut,
														
 
															     recursive_yx_cut,
														
 
															     reference_insert,
														
 
															     shrink_overlapping_boxes,
														
 
															-    sort_blocks,
														
 
															+    sort_normal_blocks,
														
 
															     update_doc_title_child_blocks,
														
 
															     update_paragraph_title_child_blocks,
														
 
															     update_vision_child_blocks,
														
@@ -38,8 +39,7 @@ from .utils import (
 
															 def pre_process(
														
 
															-    blocks: List[LayoutParsingBlock],
														
 
															-    config: Dict,
														
 
															+    region: LayoutParsingRegion,
														
 
															 ) -> List:
														
 
															     """
														
 
															     Preprocess the layout for sorting purposes.
														
@@ -49,147 +49,116 @@ def pre_process(
 
															     2. Match the blocks with their children.
														
 
															     Args:
														
 
															-        blocks (List[LayoutParsingBlock]): A list of LayoutParsingBlock objects representing the layout.
														
 
															-        config (Dict): Configuration parameters that include settings for pre-cutting and sorting.
														
 
															+        region: LayoutParsingRegion, the layout region to be pre-processed.
														
 
															     Returns:
														
 
															         List: A list of pre-cutted layout blocks list.
														
 
															     """
														
 
															-    region_bbox = config.get("region_bbox", None)
														
 
															-    region_x_center = (region_bbox[0] + region_bbox[2]) / 2
														
 
															-    region_y_center = (region_bbox[1] + region_bbox[3]) / 2
														
 
															-
														
 
															-    header_block_idxes = config.get("header_block_idxes", [])
														
 
															-    header_blocks = []
														
 
															-    for idx in header_block_idxes:
														
 
															-        blocks[idx].order_label = "header"
														
 
															-        header_blocks.append(blocks[idx])
														
 
															-
														
 
															-    unordered_block_idxes = config.get("unordered_block_idxes", [])
														
 
															-    unordered_blocks = []
														
 
															-    for idx in unordered_block_idxes:
														
 
															-        blocks[idx].order_label = "unordered"
														
 
															-        unordered_blocks.append(blocks[idx])
														
 
															-
														
 
															-    footer_block_idxes = config.get("footer_block_idxes", [])
														
 
															-    footer_blocks = []
														
 
															-    for idx in footer_block_idxes:
														
 
															-        blocks[idx].order_label = "footer"
														
 
															-        footer_blocks.append(blocks[idx])
														
 
															-
														
 
															-    mask_labels = ["header", "unordered", "footer"]
														
 
															-    child_labels = [
														
 
															+    mask_labels = [
														
 
															+        "header",
														
 
															+        "unordered",
														
 
															+        "footer",
														
 
															         "vision_footnote",
														
 
															         "sub_paragraph_title",
														
 
															         "doc_title_text",
														
 
															         "vision_title",
														
 
															     ]
														
 
															     pre_cut_block_idxes = []
														
 
															-    for block_idx, block in enumerate(blocks):
														
 
															-        if block.label in mask_labels:
														
 
															-            continue
														
 
															-
														
 
															-        if block.order_label not in child_labels:
														
 
															-            update_region_label(blocks, config, block_idx)
														
 
															-
														
 
															-        block_orientation = block.orientation
														
 
															-        if block_orientation == "horizontal":
														
 
															-            region_bbox_center = region_x_center
														
 
															+    block_map = region.block_map
														
 
															+    blocks: List[LayoutParsingBlock] = list(block_map.values())
														
 
															+    for block in blocks:
														
 
															+        if block.order_label not in mask_labels:
														
 
															+            update_region_label(block, region)
														
 
															+
														
 
															+        block_direction = block.direction
														
 
															+        if block_direction == "horizontal":
														
 
															             tolerance_len = block.long_side_length // 5
														
 
															         else:
														
 
															-            region_bbox_center = region_y_center
														
 
															             tolerance_len = block.short_side_length // 10
														
 
															         block_center = (block.start_coordinate + block.end_coordinate) / 2
														
 
															-        center_offset = abs(block_center - region_bbox_center)
														
 
															+        center_offset = abs(block_center - region.direction_center_coordinate)
														
 
															         is_centered = center_offset <= tolerance_len
														
 
															         if is_centered:
														
 
															-            pre_cut_block_idxes.append(block_idx)
														
 
															+            pre_cut_block_idxes.append(block.index)
														
 
															     pre_cut_list = []
														
 
															-    cut_orientation = "vertical"
														
 
															+    cut_direction = region.secondary_direction
														
 
															     cut_coordinates = []
														
 
															     discontinuous = []
														
 
															-    mask_labels = child_labels + mask_labels
														
 
															     all_boxes = np.array(
														
 
															         [block.bbox for block in blocks if block.order_label not in mask_labels]
														
 
															     )
														
 
															     if len(all_boxes) == 0:
														
 
															-        return header_blocks, pre_cut_list, footer_blocks, unordered_blocks
														
 
															+        return pre_cut_list
														
 
															     if pre_cut_block_idxes:
														
 
															-        horizontal_cut_num = 0
														
 
															-        for block_idx in pre_cut_block_idxes:
														
 
															-            block = blocks[block_idx]
														
 
															-            horizontal_cut_num += (
														
 
															-                1 if block.secondary_orientation == "horizontal" else 0
														
 
															-            )
														
 
															-        cut_orientation = (
														
 
															-            "horizontal"
														
 
															-            if horizontal_cut_num > len(pre_cut_block_idxes) * 0.5
														
 
															-            else "vertical"
														
 
															-        )
														
 
															         discontinuous, num_list = calculate_discontinuous_projection(
														
 
															-            all_boxes, orientation=cut_orientation, return_num=True
														
 
															+            all_boxes, direction=cut_direction, return_num=True
														
 
															         )
														
 
															         for idx in pre_cut_block_idxes:
														
 
															-            block = blocks[idx]
														
 
															+            block = block_map[idx]
														
 
															             if (
														
 
															                 block.order_label not in mask_labels
														
 
															-                and block.secondary_orientation == cut_orientation
														
 
															+                and block.secondary_direction == cut_direction
														
 
															             ):
														
 
															                 if (
														
 
															-                    block.secondary_orientation_start_coordinate,
														
 
															-                    block.secondary_orientation_end_coordinate,
														
 
															+                    block.secondary_direction_start_coordinate,
														
 
															+                    block.secondary_direction_end_coordinate,
														
 
															                 ) in discontinuous:
														
 
															                     idx = discontinuous.index(
														
 
															                         (
														
 
															-                            block.secondary_orientation_start_coordinate,
														
 
															-                            block.secondary_orientation_end_coordinate,
														
 
															+                            block.secondary_direction_start_coordinate,
														
 
															+                            block.secondary_direction_end_coordinate,
														
 
															                         )
														
 
															                     )
														
 
															                     if num_list[idx] == 1:
														
 
															                         cut_coordinates.append(
														
 
															-                            block.secondary_orientation_start_coordinate
														
 
															+                            block.secondary_direction_start_coordinate
														
 
															                         )
														
 
															-                        cut_coordinates.append(
														
 
															-                            block.secondary_orientation_end_coordinate
														
 
															-                        )
														
 
															-    if not discontinuous:
														
 
															-        discontinuous = calculate_discontinuous_projection(
														
 
															-            all_boxes, orientation=cut_orientation
														
 
															-        )
														
 
															-    current_interval = discontinuous[0]
														
 
															-    for interval in discontinuous[1:]:
														
 
															-        gap_len = interval[0] - current_interval[1]
														
 
															-        if gap_len >= 60:
														
 
															-            cut_coordinates.append(current_interval[1])
														
 
															-        elif gap_len > 40:
														
 
															-            x1, _, x2, __ = region_bbox
														
 
															-            y1 = current_interval[1]
														
 
															-            y2 = interval[0]
														
 
															-            bbox = [x1, y1, x2, y2]
														
 
															-            ref_interval = interval[0] - current_interval[1]
														
 
															-            ref_bboxes = []
														
 
															-            for block in blocks:
														
 
															-                if get_nearest_edge_distance(bbox, block.bbox) < ref_interval * 2:
														
 
															-                    ref_bboxes.append(block.bbox)
														
 
															+                        cut_coordinates.append(block.secondary_direction_end_coordinate)
														
 
															+    secondary_discontinuous = calculate_discontinuous_projection(
														
 
															+        all_boxes, direction=region.direction
														
 
															+    )
														
 
															+    if len(secondary_discontinuous) == 1:
														
 
															+        if not discontinuous:
														
 
															             discontinuous = calculate_discontinuous_projection(
														
 
															-                ref_bboxes, orientation="horizontal"
														
 
															+                all_boxes, direction=cut_direction
														
 
															             )
														
 
															-            if len(discontinuous) != 2:
														
 
															+        current_interval = discontinuous[0]
														
 
															+        for interval in discontinuous[1:]:
														
 
															+            gap_len = interval[0] - current_interval[1]
														
 
															+            if gap_len >= region.text_line_height * 5:
														
 
															                 cut_coordinates.append(current_interval[1])
														
 
															-        current_interval = interval
														
 
															+            elif gap_len > region.text_line_height * 2:
														
 
															+                x1, _, x2, __ = region.bbox
														
 
															+                y1 = current_interval[1]
														
 
															+                y2 = interval[0]
														
 
															+                bbox = [x1, y1, x2, y2]
														
 
															+                ref_interval = interval[0] - current_interval[1]
														
 
															+                ref_bboxes = []
														
 
															+                for block in blocks:
														
 
															+                    if get_nearest_edge_distance(bbox, block.bbox) < ref_interval * 2:
														
 
															+                        ref_bboxes.append(block.bbox)
														
 
															+                discontinuous = calculate_discontinuous_projection(
														
 
															+                    ref_bboxes, direction=region.direction
														
 
															+                )
														
 
															+                if len(discontinuous) != 2:
														
 
															+                    cut_coordinates.append(current_interval[1])
														
 
															+            current_interval = interval
														
 
															     cut_list = get_cut_blocks(
														
 
															-        blocks, cut_orientation, cut_coordinates, region_bbox, mask_labels
														
 
															+        blocks, cut_direction, cut_coordinates, region.bbox, mask_labels
														
 
															     )
														
 
															     pre_cut_list.extend(cut_list)
														
 
															+    if region.direction == "vertical":
														
 
															+        pre_cut_list = pre_cut_list[::-1]
														
 
															-    return header_blocks, pre_cut_list, footer_blocks, unordered_blocks
														
 
															+    return pre_cut_list
														
 
															 def update_region_label(
														
 
															-    blocks: List[LayoutParsingBlock], config: Dict[str, Any], block_idx: int
														
 
															+    block: LayoutParsingBlock,
														
 
															+    region: LayoutParsingRegion,
														
 
															 ) -> None:
														
 
															     """
														
 
															     Update the region label of a block based on its label and match the block with its children.
														
@@ -202,65 +171,45 @@ def update_region_label(
 
															     Returns:
														
 
															         None
														
 
															     """
														
 
															-
														
 
															-    # special title block labels
														
 
															-    doc_title_labels = config.get("doc_title_labels", [])
														
 
															-    paragraph_title_labels = config.get("paragraph_title_labels", [])
														
 
															-    vision_labels = config.get("vision_labels", [])
														
 
															-
														
 
															-    block = blocks[block_idx]
														
 
															-    if block.label in doc_title_labels:
														
 
															+    if block.label in BLOCK_LABEL_MAP["header_labels"]:
														
 
															+        block.order_label = "header"
														
 
															+    elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
														
 
															         block.order_label = "doc_title"
														
 
															-    # Force the orientation of vision type to be horizontal
														
 
															-    if block.label in vision_labels:
														
 
															+    elif (
														
 
															+        block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]
														
 
															+        and block.order_label is None
														
 
															+    ):
														
 
															+        block.order_label = "paragraph_title"
														
 
															+    elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
														
 
															         block.order_label = "vision"
														
 
															         block.num_of_lines = 1
														
 
															-        block.update_orientation_info()
														
 
															-    # some paragraph title block may be labeled as sub_title, so we need to check if block.order_label is "other"(default).
														
 
															-    if block.label in paragraph_title_labels and block.order_label == "other":
														
 
															-        block.order_label = "paragraph_title"
														
 
															+        block.update_direction_info()
														
 
															+    elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
														
 
															+        block.order_label = "footer"
														
 
															+    elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
														
 
															+        block.order_label = "unordered"
														
 
															+    else:
														
 
															+        block.order_label = "normal_text"
														
 
															     # only vision and doc title block can have child block
														
 
															     if block.order_label not in ["vision", "doc_title", "paragraph_title"]:
														
 
															         return
														
 
															-    iou_threshold = config.get("child_block_match_iou_threshold", 0.1)
														
 
															     # match doc title text block
														
 
															     if block.order_label == "doc_title":
														
 
															-        text_block_idxes = config.get("text_block_idxes", [])
														
 
															-        prev_idx, post_idx = get_adjacent_blocks_by_orientation(
														
 
															-            blocks, block_idx, text_block_idxes, iou_threshold
														
 
															-        )
														
 
															-        update_doc_title_child_blocks(blocks, block, prev_idx, post_idx, config)
														
 
															+        update_doc_title_child_blocks(block, region)
														
 
															     # match sub title block
														
 
															     elif block.order_label == "paragraph_title":
														
 
															-        iou_threshold = config.get("sub_title_match_iou_threshold", 0.1)
														
 
															-        paragraph_title_block_idxes = config.get("paragraph_title_block_idxes", [])
														
 
															-        text_block_idxes = config.get("text_block_idxes", [])
														
 
															-        megred_block_idxes = text_block_idxes + paragraph_title_block_idxes
														
 
															-        prev_idx, post_idx = get_adjacent_blocks_by_orientation(
														
 
															-            blocks, block_idx, megred_block_idxes, iou_threshold
														
 
															-        )
														
 
															-        update_paragraph_title_child_blocks(blocks, block, prev_idx, post_idx, config)
														
 
															-    # match vision title block
														
 
															+        update_paragraph_title_child_blocks(block, region)
														
 
															+    # match vision title block and vision footnote block
														
 
															     elif block.order_label == "vision":
														
 
															-        # for matching vision title block
														
 
															-        vision_title_block_idxes = config.get("vision_title_block_idxes", [])
														
 
															-        # for matching vision footnote block
														
 
															-        text_block_idxes = config.get("text_block_idxes", [])
														
 
															-        megred_block_idxes = text_block_idxes + vision_title_block_idxes
														
 
															-        # Some vision title block may be matched with multiple vision title block, so we need to try multiple times
														
 
															-        for i in range(3):
														
 
															-            prev_idx, post_idx = get_adjacent_blocks_by_orientation(
														
 
															-                blocks, block_idx, megred_block_idxes, iou_threshold
														
 
															-            )
														
 
															-            update_vision_child_blocks(
														
 
															-                blocks, block, megred_block_idxes, prev_idx, post_idx, config
														
 
															-            )
														
 
															+        update_vision_child_blocks(block, region)
														
 
															 def get_layout_structure(
														
 
															     blocks: List[LayoutParsingBlock],
														
 
															+    region_direction: str,
														
 
															+    region_secondary_direction: str,
														
 
															 ) -> Tuple[List[Dict[str, any]], bool]:
														
 
															     """
														
 
															     Determine the layout cross column of blocks.
														
@@ -276,7 +225,7 @@ def get_layout_structure(
 
															         key=lambda x: (x.bbox[0], x.width),
														
 
															     )
														
 
															-    mask_labels = ["doc_title", "cross_text", "cross_reference"]
														
 
															+    mask_labels = ["doc_title", "cross_layout", "cross_reference"]
														
 
															     for block_idx, block in enumerate(blocks):
														
 
															         if block.order_label in mask_labels:
														
 
															             continue
														
@@ -288,16 +237,16 @@ def get_layout_structure(
 
															             bbox_iou = calculate_overlap_ratio(block.bbox, ref_block.bbox)
														
 
															             if bbox_iou > 0:
														
 
															                 if ref_block.order_label == "vision":
														
 
															-                    ref_block.order_label = "cross_text"
														
 
															+                    ref_block.order_label = "cross_layout"
														
 
															                     break
														
 
															                 if block.order_label == "vision" or block.area < ref_block.area:
														
 
															-                    block.order_label = "cross_text"
														
 
															+                    block.order_label = "cross_layout"
														
 
															                     break
														
 
															             match_projection_iou = calculate_projection_overlap_ratio(
														
 
															                 block.bbox,
														
 
															                 ref_block.bbox,
														
 
															-                "horizontal",
														
 
															+                region_direction,
														
 
															             )
														
 
															             if match_projection_iou > 0:
														
 
															                 for second_ref_idx, second_ref_block in enumerate(blocks):
														
@@ -312,57 +261,59 @@ def get_layout_structure(
 
															                     )
														
 
															                     if bbox_iou > 0.1:
														
 
															                         if second_ref_block.order_label == "vision":
														
 
															-                            second_ref_block.order_label = "cross_text"
														
 
															+                            second_ref_block.order_label = "cross_layout"
														
 
															                             break
														
 
															                         if (
														
 
															                             block.order_label == "vision"
														
 
															                             or block.area < second_ref_block.area
														
 
															                         ):
														
 
															-                            block.order_label = "cross_text"
														
 
															+                            block.order_label = "cross_layout"
														
 
															                             break
														
 
															                     second_match_projection_iou = calculate_projection_overlap_ratio(
														
 
															                         block.bbox,
														
 
															                         second_ref_block.bbox,
														
 
															-                        "horizontal",
														
 
															+                        region_direction,
														
 
															                     )
														
 
															                     ref_match_projection_iou = calculate_projection_overlap_ratio(
														
 
															                         ref_block.bbox,
														
 
															                         second_ref_block.bbox,
														
 
															-                        "horizontal",
														
 
															+                        region_direction,
														
 
															                     )
														
 
															                     ref_match_projection_iou_ = calculate_projection_overlap_ratio(
														
 
															                         ref_block.bbox,
														
 
															                         second_ref_block.bbox,
														
 
															-                        "vertical",
														
 
															+                        region_secondary_direction,
														
 
															                     )
														
 
															                     if (
														
 
															                         second_match_projection_iou > 0
														
 
															                         and ref_match_projection_iou == 0
														
 
															                         and ref_match_projection_iou_ > 0
														
 
															-                        and "vision"
														
 
															-                        not in [ref_block.order_label, second_ref_block.order_label]
														
 
															                     ):
														
 
															-                        block.order_label = (
														
 
															-                            "cross_reference"
														
 
															-                            if block.label == "reference"
														
 
															-                            else "cross_text"
														
 
															-                        )
														
 
															+                        if block.order_label == "vision" or (
														
 
															+                            ref_block.order_label == "normal_text"
														
 
															+                            and second_ref_block.order_label == "normal_text"
														
 
															+                        ):
														
 
															+                            block.order_label = (
														
 
															+                                "cross_reference"
														
 
															+                                if block.label == "reference"
														
 
															+                                else "cross_layout"
														
 
															+                            )
														
 
															 def sort_by_xycut(
														
 
															     block_bboxes: List,
														
 
															-    orientation: int = 0,
														
 
															+    direction: str = "vertical",
														
 
															     min_gap: int = 1,
														
 
															 ) -> List[int]:
														
 
															     """
														
 
															-    Sort bounding boxes using recursive XY cut method based on the specified orientation.
														
 
															+    Sort bounding boxes using recursive XY cut method based on the specified direction.
														
 
															     Args:
														
 
															         block_bboxes (Union[np.ndarray, List[List[int]]]): An array or list of bounding boxes,
														
 
															                                                            where each box is represented as
														
 
															                                                            [x_min, y_min, x_max, y_max].
														
 
															-        orientation (int): orientation for the initial cut. Use 1 for Y-axis first and 0 for X-axis first.
														
 
															+        direction (int): direction for the initial cut. Use 1 for Y-axis first and 0 for X-axis first.
														
 
															                          Defaults to 0.
														
 
															         min_gap (int): Minimum gap width to consider a separation between segments. Defaults to 1.
														
@@ -371,7 +322,7 @@ def sort_by_xycut(
 
															     """
														
 
															     block_bboxes = np.asarray(block_bboxes).astype(int)
														
 
															     res = []
														
 
															-    if orientation == 1:
														
 
															+    if direction == "vertical":
														
 
															         recursive_yx_cut(
														
 
															             block_bboxes,
														
 
															             np.arange(len(block_bboxes)).tolist(),
														
@@ -391,8 +342,7 @@ def sort_by_xycut(
 
															 def match_unsorted_blocks(
														
 
															     sorted_blocks: List[LayoutParsingBlock],
														
 
															     unsorted_blocks: List[LayoutParsingBlock],
														
 
															-    config: Dict,
														
 
															-    median_width: int,
														
 
															+    region: LayoutParsingRegion,
														
 
															 ) -> List[LayoutParsingBlock]:
														
 
															     """
														
 
															     Match special blocks with the sorted blocks based on their region labels.
														
@@ -406,7 +356,7 @@ def match_unsorted_blocks(
 
															         List[LayoutParsingBlock]: The updated sorted blocks after matching special blocks.
														
 
															     """
														
 
															     distance_type_map = {
														
 
															-        "cross_text": weighted_distance_insert,
														
 
															+        "cross_layout": weighted_distance_insert,
														
 
															         "paragraph_title": weighted_distance_insert,
														
 
															         "doc_title": weighted_distance_insert,
														
 
															         "vision_title": weighted_distance_insert,
														
@@ -416,21 +366,24 @@ def match_unsorted_blocks(
 
															         "other": manhattan_insert,
														
 
															     }
														
 
															-    unsorted_blocks = sort_blocks(unsorted_blocks, median_width, reverse=False)
														
 
															+    unsorted_blocks = sort_normal_blocks(
														
 
															+        unsorted_blocks,
														
 
															+        region.text_line_height,
														
 
															+        region.text_line_width,
														
 
															+        region.direction,
														
 
															+    )
														
 
															     for idx, block in enumerate(unsorted_blocks):
														
 
															         order_label = block.order_label
														
 
															         if idx == 0 and order_label == "doc_title":
														
 
															             sorted_blocks.insert(0, block)
														
 
															             continue
														
 
															-        sorted_blocks = distance_type_map[order_label](
														
 
															-            block, sorted_blocks, config, median_width
														
 
															-        )
														
 
															+        sorted_blocks = distance_type_map[order_label](block, sorted_blocks, region)
														
 
															     return sorted_blocks
														
 
															 def xycut_enhanced(
														
 
															-    blocks: List[LayoutParsingBlock], config: Dict
														
 
															-) -> List[LayoutParsingBlock]:
														
 
															+    region: LayoutParsingRegion,
														
 
															+) -> LayoutParsingRegion:
														
 
															     """
														
 
															     xycut_enhance function performs the following steps:
														
 
															         1. Preprocess the input blocks by extracting headers, footers, and pre-cut blocks.
														
@@ -446,42 +399,51 @@ def xycut_enhanced(
 
															     Returns:
														
 
															         List[LayoutParsingBlock]: Ordered result list after processing.
														
 
															     """
														
 
															-    if len(blocks) == 0:
														
 
															-        return blocks
														
 
															+    if len(region.block_map) == 0:
														
 
															+        return []
														
 
															-    text_labels = config.get("text_labels", [])
														
 
															-    header_blocks, pre_cut_list, footer_blocks, unordered_blocks = pre_process(
														
 
															-        blocks, config
														
 
															-    )
														
 
															+    pre_cut_list: List[List[LayoutParsingBlock]] = pre_process(region)
														
 
															     final_order_res_list: List[LayoutParsingBlock] = []
														
 
															-    header_blocks = sort_blocks(header_blocks)
														
 
															-    footer_blocks = sort_blocks(footer_blocks)
														
 
															-    unordered_blocks = sort_blocks(unordered_blocks)
														
 
															+    header_blocks: List[LayoutParsingBlock] = [
														
 
															+        region.block_map[idx] for idx in region.header_block_idxes
														
 
															+    ]
														
 
															+    unordered_blocks: List[LayoutParsingBlock] = [
														
 
															+        region.block_map[idx] for idx in region.unordered_block_idxes
														
 
															+    ]
														
 
															+    footer_blocks: List[LayoutParsingBlock] = [
														
 
															+        region.block_map[idx] for idx in region.footer_block_idxes
														
 
															+    ]
														
 
															+
														
 
															+    header_blocks: List[LayoutParsingBlock] = sort_normal_blocks(
														
 
															+        header_blocks, region.text_line_height, region.text_line_width, region.direction
														
 
															+    )
														
 
															+    footer_blocks: List[LayoutParsingBlock] = sort_normal_blocks(
														
 
															+        footer_blocks, region.text_line_height, region.text_line_width, region.direction
														
 
															+    )
														
 
															+    unordered_blocks: List[LayoutParsingBlock] = sort_normal_blocks(
														
 
															+        unordered_blocks,
														
 
															+        region.text_line_height,
														
 
															+        region.text_line_width,
														
 
															+        region.direction,
														
 
															+    )
														
 
															     final_order_res_list.extend(header_blocks)
														
 
															     unsorted_blocks: List[LayoutParsingBlock] = []
														
 
															-    sorted_blocks_by_pre_cuts = []
														
 
															+    sorted_blocks_by_pre_cuts: List[LayoutParsingBlock] = []
														
 
															     for pre_cut_blocks in pre_cut_list:
														
 
															         sorted_blocks: List[LayoutParsingBlock] = []
														
 
															         doc_title_blocks: List[LayoutParsingBlock] = []
														
 
															         xy_cut_blocks: List[LayoutParsingBlock] = []
														
 
															-        pre_cut_blocks: List[LayoutParsingBlock]
														
 
															-        median_width = 1
														
 
															-        text_block_width = [
														
 
															-            block.width for block in pre_cut_blocks if block.label in text_labels
														
 
															-        ]
														
 
															-        if len(text_block_width) > 0:
														
 
															-            median_width = int(np.median(text_block_width))
														
 
															         get_layout_structure(
														
 
															-            pre_cut_blocks,
														
 
															+            pre_cut_blocks, region.direction, region.secondary_direction
														
 
															         )
														
 
															         # Get xy cut blocks and add other blocks in special_block_map
														
 
															         for block in pre_cut_blocks:
														
 
															             if block.order_label not in [
														
 
															-                "cross_text",
														
 
															+                "cross_layout",
														
 
															                 "cross_reference",
														
 
															                 "doc_title",
														
 
															                 "unordered",
														
@@ -496,41 +458,77 @@ def xycut_enhanced(
 
															             block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
														
 
															             block_text_lines = [block.num_of_lines for block in xy_cut_blocks]
														
 
															             discontinuous = calculate_discontinuous_projection(
														
 
															-                block_bboxes, orientation="horizontal"
														
 
															+                block_bboxes, direction=region.direction
														
 
															             )
														
 
															             if len(discontinuous) > 1:
														
 
															                 xy_cut_blocks = [block for block in xy_cut_blocks]
														
 
															+            # if len(discontinuous) == 1 or max(block_text_lines) == 1 or (not is_projection_consistent(xy_cut_blocks, discontinuous, direction=region.direction) and len(discontinuous) > 2 and max(block_text_lines) - min(block_text_lines) < 3):
														
 
															             if len(discontinuous) == 1 or max(block_text_lines) == 1:
														
 
															-                xy_cut_blocks.sort(key=lambda x: (x.bbox[1] // 5, x.bbox[0]))
														
 
															-                xy_cut_blocks = shrink_overlapping_boxes(xy_cut_blocks, "vertical")
														
 
															+                xy_cut_blocks.sort(
														
 
															+                    key=lambda x: (
														
 
															+                        x.bbox[region.secondary_direction_start_index]
														
 
															+                        // (region.text_line_height // 2),
														
 
															+                        x.bbox[region.direction_start_index],
														
 
															+                    )
														
 
															+                )
														
 
															+                xy_cut_blocks = shrink_overlapping_boxes(
														
 
															+                    xy_cut_blocks, region.secondary_direction
														
 
															+                )
														
 
															+            if (
														
 
															+                len(discontinuous) == 1
														
 
															+                or max(block_text_lines) == 1
														
 
															+                or (
														
 
															+                    not is_projection_consistent(
														
 
															+                        xy_cut_blocks, discontinuous, direction=region.direction
														
 
															+                    )
														
 
															+                    and len(discontinuous) > 2
														
 
															+                    and max(block_text_lines) - min(block_text_lines) < 3
														
 
															+                )
														
 
															+            ):
														
 
															+                xy_cut_blocks.sort(
														
 
															+                    key=lambda x: (
														
 
															+                        x.bbox[region.secondary_direction_start_index]
														
 
															+                        // (region.text_line_height // 2),
														
 
															+                        x.bbox[region.direction_start_index],
														
 
															+                    )
														
 
															+                )
														
 
															+                xy_cut_blocks = shrink_overlapping_boxes(
														
 
															+                    xy_cut_blocks, region.secondary_direction
														
 
															+                )
														
 
															                 block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
														
 
															-                sorted_indexes = sort_by_xycut(block_bboxes, orientation=1, min_gap=1)
														
 
															+                sorted_indexes = sort_by_xycut(
														
 
															+                    block_bboxes, direction=region.secondary_direction, min_gap=1
														
 
															+                )
														
 
															             else:
														
 
															-                xy_cut_blocks.sort(key=lambda x: (x.bbox[0] // 20, x.bbox[1]))
														
 
															-                xy_cut_blocks = shrink_overlapping_boxes(xy_cut_blocks, "horizontal")
														
 
															+                xy_cut_blocks.sort(
														
 
															+                    key=lambda x: (
														
 
															+                        x.bbox[region.direction_start_index]
														
 
															+                        // (region.text_line_width // 2),
														
 
															+                        x.bbox[region.secondary_direction_start_index],
														
 
															+                    )
														
 
															+                )
														
 
															+                xy_cut_blocks = shrink_overlapping_boxes(
														
 
															+                    xy_cut_blocks, region.direction
														
 
															+                )
														
 
															                 block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
														
 
															-                sorted_indexes = sort_by_xycut(block_bboxes, orientation=0, min_gap=20)
														
 
															+                sorted_indexes = sort_by_xycut(
														
 
															+                    block_bboxes, direction=region.direction, min_gap=1
														
 
															+                )
														
 
															             sorted_blocks = [xy_cut_blocks[i] for i in sorted_indexes]
														
 
															         sorted_blocks = match_unsorted_blocks(
														
 
															             sorted_blocks,
														
 
															             doc_title_blocks,
														
 
															-            config,
														
 
															-            median_width,
														
 
															+            region=region,
														
 
															         )
														
 
															         sorted_blocks_by_pre_cuts.extend(sorted_blocks)
														
 
															-    median_width = 1
														
 
															-    text_block_width = [block.width for block in blocks if block.label in text_labels]
														
 
															-    if len(text_block_width) > 0:
														
 
															-        median_width = int(np.median(text_block_width))
														
 
															     final_order_res_list = match_unsorted_blocks(
														
 
															         sorted_blocks_by_pre_cuts,
														
 
															         unsorted_blocks,
														
 
															-        config,
														
 
															-        median_width,
														
 
															+        region=region,
														
 
															     )
														
 
															     final_order_res_list.extend(footer_blocks)