пре 7 месеци · e243a7e536
--- a/paddlex/inference/models/formula_recognition/processors.py
+++ b/paddlex/inference/models/formula_recognition/processors.py
@@ -631,74 +631,80 @@ class UniMERNetDecode(object):
 
				         self.pad_token_type_id = 0
			
 
				         self.pad_to_multiple_of = None
			
 
				 
			
 
				-        temp_path = tempfile.gettempdir()
			
 
				-        fast_tokenizer_file = os.path.join(temp_path, "tokenizer.json")
			
 
				-        tokenizer_config_file = os.path.join(temp_path, "tokenizer_config.json")
			
 
				-        try:
			
 
				-            with open(fast_tokenizer_file, "w") as f:
			
 
				-                json.dump(character_list["fast_tokenizer_file"], f)
			
 
				-            with open(tokenizer_config_file, "w") as f:
			
 
				-                json.dump(character_list["tokenizer_config_file"], f)
			
 
				-        except Exception as e:
			
 
				-            print(
			
 
				-                f"创建 tokenizer.json 和 tokenizer_config.json 文件失败, 原因{str(e)}"
			
 
				-            )
			
 
				-
			
 
				-        self.tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
			
 
				-        added_tokens_decoder = {}
			
 
				-        added_tokens_map = {}
			
 
				-        if tokenizer_config_file is not None:
			
 
				-            with open(
			
 
				-                tokenizer_config_file, encoding="utf-8"
			
 
				-            ) as tokenizer_config_handle:
			
 
				-                init_kwargs = json.load(tokenizer_config_handle)
			
 
				-                if "added_tokens_decoder" in init_kwargs:
			
 
				-                    for idx, token in init_kwargs["added_tokens_decoder"].items():
			
 
				-                        if isinstance(token, dict):
			
 
				-                            token = AddedToken(**token)
			
 
				-                        if isinstance(token, AddedToken):
			
 
				-                            added_tokens_decoder[int(idx)] = token
			
 
				-                            added_tokens_map[str(token)] = token
			
 
				-                        else:
			
 
				-                            raise ValueError(
			
 
				-                                f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
			
 
				-                            )
			
 
				-                init_kwargs["added_tokens_decoder"] = added_tokens_decoder
			
 
				-                added_tokens_decoder = init_kwargs.pop("added_tokens_decoder", {})
			
 
				-                tokens_to_add = [
			
 
				-                    token
			
 
				-                    for index, token in sorted(
			
 
				-                        added_tokens_decoder.items(), key=lambda x: x[0]
			
 
				-                    )
			
 
				-                    if token not in added_tokens_decoder
			
 
				-                ]
			
 
				-                added_tokens_encoder = self.added_tokens_encoder(added_tokens_decoder)
			
 
				-                encoder = list(added_tokens_encoder.keys()) + [
			
 
				-                    str(token) for token in tokens_to_add
			
 
				-                ]
			
 
				-                tokens_to_add += [
			
 
				-                    token
			
 
				-                    for token in self.all_special_tokens_extended
			
 
				-                    if token not in encoder and token not in tokens_to_add
			
 
				-                ]
			
 
				-                if len(tokens_to_add) > 0:
			
 
				-                    is_last_special = None
			
 
				-                    tokens = []
			
 
				-                    special_tokens = self.all_special_tokens
			
 
				-                    for token in tokens_to_add:
			
 
				-                        is_special = (
			
 
				-                            (token.special or str(token) in special_tokens)
			
 
				-                            if isinstance(token, AddedToken)
			
 
				-                            else str(token) in special_tokens
			
 
				+        with tempfile.NamedTemporaryFile(
			
 
				+            mode="w", suffix=".json", delete=True
			
 
				+        ) as temp_file1, tempfile.NamedTemporaryFile(
			
 
				+            mode="w", suffix=".json", delete=True
			
 
				+        ) as temp_file2:
			
 
				+            fast_tokenizer_file = temp_file1.name
			
 
				+            tokenizer_config_file = temp_file2.name
			
 
				+            try:
			
 
				+                with open(fast_tokenizer_file, "w") as f:
			
 
				+                    json.dump(character_list["fast_tokenizer_file"], f)
			
 
				+                with open(tokenizer_config_file, "w") as f:
			
 
				+                    json.dump(character_list["tokenizer_config_file"], f)
			
 
				+            except Exception as e:
			
 
				+                print(
			
 
				+                    f"创建 tokenizer.json 和 tokenizer_config.json 文件失败, 原因{str(e)}"
			
 
				+                )
			
 
				+
			
 
				+            self.tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
			
 
				+            added_tokens_decoder = {}
			
 
				+            added_tokens_map = {}
			
 
				+            if tokenizer_config_file is not None:
			
 
				+                with open(
			
 
				+                    tokenizer_config_file, encoding="utf-8"
			
 
				+                ) as tokenizer_config_handle:
			
 
				+                    init_kwargs = json.load(tokenizer_config_handle)
			
 
				+                    if "added_tokens_decoder" in init_kwargs:
			
 
				+                        for idx, token in init_kwargs["added_tokens_decoder"].items():
			
 
				+                            if isinstance(token, dict):
			
 
				+                                token = AddedToken(**token)
			
 
				+                            if isinstance(token, AddedToken):
			
 
				+                                added_tokens_decoder[int(idx)] = token
			
 
				+                                added_tokens_map[str(token)] = token
			
 
				+                            else:
			
 
				+                                raise ValueError(
			
 
				+                                    f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
			
 
				+                                )
			
 
				+                    init_kwargs["added_tokens_decoder"] = added_tokens_decoder
			
 
				+                    added_tokens_decoder = init_kwargs.pop("added_tokens_decoder", {})
			
 
				+                    tokens_to_add = [
			
 
				+                        token
			
 
				+                        for index, token in sorted(
			
 
				+                            added_tokens_decoder.items(), key=lambda x: x[0]
			
 
				                         )
			
 
				-                        if is_last_special is None or is_last_special == is_special:
			
 
				-                            tokens.append(token)
			
 
				-                        else:
			
 
				+                        if token not in added_tokens_decoder
			
 
				+                    ]
			
 
				+                    added_tokens_encoder = self.added_tokens_encoder(
			
 
				+                        added_tokens_decoder
			
 
				+                    )
			
 
				+                    encoder = list(added_tokens_encoder.keys()) + [
			
 
				+                        str(token) for token in tokens_to_add
			
 
				+                    ]
			
 
				+                    tokens_to_add += [
			
 
				+                        token
			
 
				+                        for token in self.all_special_tokens_extended
			
 
				+                        if token not in encoder and token not in tokens_to_add
			
 
				+                    ]
			
 
				+                    if len(tokens_to_add) > 0:
			
 
				+                        is_last_special = None
			
 
				+                        tokens = []
			
 
				+                        special_tokens = self.all_special_tokens
			
 
				+                        for token in tokens_to_add:
			
 
				+                            is_special = (
			
 
				+                                (token.special or str(token) in special_tokens)
			
 
				+                                if isinstance(token, AddedToken)
			
 
				+                                else str(token) in special_tokens
			
 
				+                            )
			
 
				+                            if is_last_special is None or is_last_special == is_special:
			
 
				+                                tokens.append(token)
			
 
				+                            else:
			
 
				+                                self._add_tokens(tokens, special_tokens=is_last_special)
			
 
				+                                tokens = [token]
			
 
				+                            is_last_special = is_special
			
 
				+                        if tokens:
			
 
				                             self._add_tokens(tokens, special_tokens=is_last_special)
			
 
				-                            tokens = [token]
			
 
				-                        is_last_special = is_special
			
 
				-                    if tokens:
			
 
				-                        self._add_tokens(tokens, special_tokens=is_last_special)
			
 
				 
			
 
				     def _add_tokens(
			
 
				         self, new_tokens: "List[Union[AddedToken, str]]", special_tokens: bool = False
			
--- a/paddlex/inference/pipelines/layout_parsing/pipeline.py
+++ b/paddlex/inference/pipelines/layout_parsing/pipeline.py
@@ -240,10 +240,10 @@ class LayoutParsingPipeline(BasePipeline):
 
				                     )
			
 
				                     seal_index += 1
			
 
				             else:
			
 
				-                ocr_res_in_box, matched_idxs = get_sub_regions_ocr_res(
			
 
				+                ocr_res_in_box, matched_idxes = get_sub_regions_ocr_res(
			
 
				                     overall_ocr_res, [box], return_match_idx=True
			
 
				                 )
			
 
				-                for matched_idx in matched_idxs:
			
 
				+                for matched_idx in matched_idxes:
			
 
				                     if matched_ocr_dict.get(matched_idx, None) is None:
			
 
				                         matched_ocr_dict[matched_idx] = [object_box_idx]
			
 
				                     else:
			
--- a/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py
@@ -15,9 +15,10 @@ from __future__ import annotations
 
				 
			
 
				 import copy
			
 
				 import re
			
 
				-from typing import Any, Dict, Optional, Tuple, Union
			
 
				+from typing import Any, Dict, List, Optional, Tuple, Union
			
 
				 
			
 
				 import numpy as np
			
 
				+from PIL import Image
			
 
				 
			
 
				 from ....utils import logging
			
 
				 from ....utils.deps import pipeline_requires_extra
			
@@ -28,8 +29,22 @@ from ...utils.hpi import HPIConfig
 
				 from ...utils.pp_option import PaddlePredictorOption
			
 
				 from ..base import BasePipeline
			
 
				 from ..ocr.result import OCRResult
			
 
				-from .result_v2 import LayoutParsingResultV2
			
 
				-from .utils import gather_imgs, get_single_block_parsing_res, get_sub_regions_ocr_res
			
 
				+from .result_v2 import LayoutParsingBlock, LayoutParsingResultV2
			
 
				+from .utils import (
			
 
				+    caculate_bbox_area,
			
 
				+    calculate_text_orientation,
			
 
				+    convert_formula_res_to_ocr_format,
			
 
				+    format_line,
			
 
				+    gather_imgs,
			
 
				+    get_bbox_intersection,
			
 
				+    get_sub_regions_ocr_res,
			
 
				+    group_boxes_into_lines,
			
 
				+    remove_overlap_blocks,
			
 
				+    split_boxes_if_x_contained,
			
 
				+    update_layout_order_config_block_index,
			
 
				+    update_region_box,
			
 
				+)
			
 
				+from .xycut_enhanced import xycut_enhanced
			
 
				 
			
 
				 
			
 
				 @pipeline_requires_extra("ocr")
			
@@ -67,7 +82,6 @@ class LayoutParsingPipelineV2(BasePipeline):
 
				         )
			
 
				 
			
 
				         self.inintial_predictor(config)
			
 
				-
			
 
				         self.batch_sampler = ImageBatchSampler(batch_size=1)
			
 
				 
			
 
				         self.img_reader = ReadImage(format="BGR")
			
@@ -229,147 +243,477 @@ class LayoutParsingPipelineV2(BasePipeline):
 
				 
			
 
				         return True
			
 
				 
			
 
				-    def get_layout_parsing_res(
			
 
				+    def standardized_data(
			
 
				         self,
			
 
				         image: list,
			
 
				+        layout_order_config: dict,
			
 
				         layout_det_res: DetResult,
			
 
				         overall_ocr_res: OCRResult,
			
 
				-        table_res_list: list,
			
 
				-        seal_res_list: list,
			
 
				         formula_res_list: list,
			
 
				-        imgs_in_doc: list,
			
 
				-        text_det_limit_side_len: Optional[int] = None,
			
 
				-        text_det_limit_type: Optional[str] = None,
			
 
				-        text_det_thresh: Optional[float] = None,
			
 
				-        text_det_box_thresh: Optional[float] = None,
			
 
				-        text_det_unclip_ratio: Optional[float] = None,
			
 
				-        text_rec_score_thresh: Optional[float] = None,
			
 
				+        text_rec_model: Any,
			
 
				+        text_rec_score_thresh: Union[float, None] = None,
			
 
				     ) -> list:
			
 
				         """
			
 
				         Retrieves the layout parsing result based on the layout detection result, OCR result, and other recognition results.
			
 
				         Args:
			
 
				             image (list): The input image.
			
 
				-            layout_det_res (DetResult): The detection result containing the layout information of the document.
			
 
				-            overall_ocr_res (OCRResult): The overall OCR result containing text information.
			
 
				-            table_res_list (list): A list of table recognition results.
			
 
				-            seal_res_list (list): A list of seal recognition results.
			
 
				+            overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
			
 
				+                - "input_img": The image on which OCR was performed.
			
 
				+                - "dt_boxes": A list of detected text box coordinates.
			
 
				+                - "rec_texts": A list of recognized text corresponding to the detected boxes.
			
 
				+
			
 
				+            layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
			
 
				+                - "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.
			
 
				+
			
 
				+            table_res_list (list): A list of table detection results, where each item is a dictionary containing:
			
 
				+                - "block_bbox": The bounding box of the table layout.
			
 
				+                - "pred_html": The predicted HTML representation of the table.
			
 
				+
			
 
				             formula_res_list (list): A list of formula recognition results.
			
 
				-            text_det_limit_side_len (Optional[int], optional): The maximum side length of the text detection region. Defaults to None.
			
 
				-            text_det_limit_type (Optional[str], optional): The type of limit for the text detection region. Defaults to None.
			
 
				-            text_det_thresh (Optional[float], optional): The confidence threshold for text detection. Defaults to None.
			
 
				-            text_det_box_thresh (Optional[float], optional): The confidence threshold for text detection bounding boxes. Defaults to None
			
 
				-            text_det_unclip_ratio (Optional[float], optional): The unclip ratio for text detection. Defaults to None.
			
 
				+            text_rec_model (Any): The text recognition model.
			
 
				             text_rec_score_thresh (Optional[float], optional): The score threshold for text recognition. Defaults to None.
			
 
				         Returns:
			
 
				             list: A list of dictionaries representing the layout parsing result.
			
 
				         """
			
 
				+
			
 
				         matched_ocr_dict = {}
			
 
				-        image = np.array(image)
			
 
				+        layout_to_ocr_mapping = {}
			
 
				         object_boxes = []
			
 
				         footnote_list = []
			
 
				-        max_bottom_text_coordinate = 0
			
 
				+        bottom_text_y_max = 0
			
 
				+        max_block_area = 0.0
			
 
				+
			
 
				+        region_box = [65535, 65535, 0, 0]
			
 
				+        layout_det_res = remove_overlap_blocks(
			
 
				+            layout_det_res,
			
 
				+            threshold=0.5,
			
 
				+            smaller=True,
			
 
				+        )
			
 
				+
			
 
				+        # convert formula_res_list to OCRResult format
			
 
				+        convert_formula_res_to_ocr_format(formula_res_list, overall_ocr_res)
			
 
				 
			
 
				-        for object_box_idx, box_info in enumerate(layout_det_res["boxes"]):
			
 
				+        # match layout boxes and ocr boxes and get some information for layout_order_config
			
 
				+        for box_idx, box_info in enumerate(layout_det_res["boxes"]):
			
 
				             box = box_info["coordinate"]
			
 
				             label = box_info["label"].lower()
			
 
				             object_boxes.append(box)
			
 
				+            _, _, _, y2 = box
			
 
				+
			
 
				+            # update the region box and max_block_area according to the layout boxes
			
 
				+            region_box = update_region_box(box, region_box)
			
 
				+            max_block_area = max(max_block_area, caculate_bbox_area(box))
			
 
				+
			
 
				+            update_layout_order_config_block_index(layout_order_config, label, box_idx)
			
 
				 
			
 
				             # set the label of footnote to text, when it is above the text boxes
			
 
				             if label == "footnote":
			
 
				-                footnote_list.append(object_box_idx)
			
 
				-            if label == "text" and box[3] > max_bottom_text_coordinate:
			
 
				-                max_bottom_text_coordinate = box[3]
			
 
				+                footnote_list.append(box_idx)
			
 
				+            if label == "text":
			
 
				+                bottom_text_y_max = max(y2, bottom_text_y_max)
			
 
				 
			
 
				             if label not in ["formula", "table", "seal"]:
			
 
				-                _, matched_idxs = get_sub_regions_ocr_res(
			
 
				+                _, matched_idxes = get_sub_regions_ocr_res(
			
 
				                     overall_ocr_res, [box], return_match_idx=True
			
 
				                 )
			
 
				-                for matched_idx in matched_idxs:
			
 
				+                layout_to_ocr_mapping[box_idx] = matched_idxes
			
 
				+                for matched_idx in matched_idxes:
			
 
				                     if matched_ocr_dict.get(matched_idx, None) is None:
			
 
				-                        matched_ocr_dict[matched_idx] = [object_box_idx]
			
 
				+                        matched_ocr_dict[matched_idx] = [box_idx]
			
 
				                     else:
			
 
				-                        matched_ocr_dict[matched_idx].append(object_box_idx)
			
 
				+                        matched_ocr_dict[matched_idx].append(box_idx)
			
 
				 
			
 
				+        # fix the footnote label
			
 
				         for footnote_idx in footnote_list:
			
 
				             if (
			
 
				                 layout_det_res["boxes"][footnote_idx]["coordinate"][3]
			
 
				-                < max_bottom_text_coordinate
			
 
				+                < bottom_text_y_max
			
 
				             ):
			
 
				                 layout_det_res["boxes"][footnote_idx]["label"] = "text"
			
 
				+                layout_order_config["text_block_idxes"].append(footnote_idx)
			
 
				+                layout_order_config["footer_block_idxes"].remove(footnote_idx)
			
 
				 
			
 
				-        already_processed = set()
			
 
				-        for matched_idx, layout_box_ids in matched_ocr_dict.items():
			
 
				-            if len(layout_box_ids) <= 1:
			
 
				-                continue
			
 
				-
			
 
				-            # one ocr is matched to multiple layout boxes, split the text into multiple lines
			
 
				-            for idx in layout_box_ids:
			
 
				-                if idx in already_processed:
			
 
				-                    continue
			
 
				-
			
 
				-                already_processed.add(idx)
			
 
				-                wht_im = np.ones(image.shape, dtype=image.dtype) * 255
			
 
				-                box = object_boxes[idx]
			
 
				-                x1, y1, x2, y2 = [int(i) for i in box]
			
 
				-                wht_im[y1:y2, x1:x2, :] = image[y1:y2, x1:x2, :]
			
 
				-                sub_ocr_res = next(
			
 
				-                    self.general_ocr_pipeline(
			
 
				-                        wht_im,
			
 
				-                        text_det_limit_side_len=text_det_limit_side_len,
			
 
				-                        text_det_limit_type=text_det_limit_type,
			
 
				-                        text_det_thresh=text_det_thresh,
			
 
				-                        text_det_box_thresh=text_det_box_thresh,
			
 
				-                        text_det_unclip_ratio=text_det_unclip_ratio,
			
 
				-                        text_rec_score_thresh=text_rec_score_thresh,
			
 
				-                    )
			
 
				+        # fix the doc_title label
			
 
				+        doc_title_idxes = layout_order_config.get("doc_title_block_idxes", [])
			
 
				+        paragraph_title_idxes = layout_order_config.get(
			
 
				+            "paragraph_title_block_idxes", []
			
 
				+        )
			
 
				+        # check if there is only one paragraph title and without doc_title
			
 
				+        only_one_paragraph_title = (
			
 
				+            len(paragraph_title_idxes) == 1 and len(doc_title_idxes) == 0
			
 
				+        )
			
 
				+        if only_one_paragraph_title:
			
 
				+            paragraph_title_block_area = caculate_bbox_area(
			
 
				+                layout_det_res["boxes"][paragraph_title_idxes[0]]["coordinate"]
			
 
				+            )
			
 
				+            title_area_max_block_threshold = layout_order_config.get(
			
 
				+                "title_area_max_block_threshold", 0.3
			
 
				+            )
			
 
				+            if (
			
 
				+                paragraph_title_block_area
			
 
				+                > max_block_area * title_area_max_block_threshold
			
 
				+            ):
			
 
				+                layout_det_res["boxes"][paragraph_title_idxes[0]]["label"] = "doc_title"
			
 
				+                layout_order_config["doc_title_block_idxes"].append(
			
 
				+                    paragraph_title_idxes[0]
			
 
				                 )
			
 
				-                _, matched_idxs = get_sub_regions_ocr_res(
			
 
				-                    overall_ocr_res, [box], return_match_idx=True
			
 
				+                layout_order_config["paragraph_title_block_idxes"].remove(
			
 
				+                    paragraph_title_idxes[0]
			
 
				                 )
			
 
				-                for matched_idx in sorted(matched_idxs, reverse=True):
			
 
				-                    del overall_ocr_res["dt_polys"][matched_idx]
			
 
				-                    del overall_ocr_res["rec_texts"][matched_idx]
			
 
				-                    overall_ocr_res["rec_boxes"] = np.delete(
			
 
				-                        overall_ocr_res["rec_boxes"], matched_idx, axis=0
			
 
				+
			
 
				+        # Replace the OCR information of the hurdles.
			
 
				+        for overall_ocr_idx, layout_box_ids in matched_ocr_dict.items():
			
 
				+            if len(layout_box_ids) > 1:
			
 
				+                matched_no = 0
			
 
				+                overall_ocr_box = copy.deepcopy(
			
 
				+                    overall_ocr_res["rec_boxes"][overall_ocr_idx]
			
 
				+                )
			
 
				+                overall_ocr_dt_poly = copy.deepcopy(
			
 
				+                    overall_ocr_res["dt_polys"][overall_ocr_idx]
			
 
				+                )
			
 
				+                for box_idx in layout_box_ids:
			
 
				+                    layout_box = layout_det_res["boxes"][box_idx]["coordinate"]
			
 
				+                    crop_box = get_bbox_intersection(overall_ocr_box, layout_box)
			
 
				+                    x1, y1, x2, y2 = [int(i) for i in crop_box]
			
 
				+                    crop_img = np.array(image)[y1:y2, x1:x2]
			
 
				+                    crop_img_rec_res = next(text_rec_model([crop_img]))
			
 
				+                    crop_img_dt_poly = get_bbox_intersection(
			
 
				+                        overall_ocr_dt_poly, layout_box, return_format="poly"
			
 
				                     )
			
 
				-                    del overall_ocr_res["rec_polys"][matched_idx]
			
 
				-                    del overall_ocr_res["rec_scores"][matched_idx]
			
 
				+                    crop_img_rec_score = crop_img_rec_res["rec_score"]
			
 
				+                    crop_img_rec_text = crop_img_rec_res["rec_text"]
			
 
				+                    text_rec_score_thresh = (
			
 
				+                        text_rec_score_thresh
			
 
				+                        if text_rec_score_thresh is not None
			
 
				+                        else (self.general_ocr_pipeline.text_rec_score_thresh)
			
 
				+                    )
			
 
				+                    if crop_img_rec_score >= text_rec_score_thresh:
			
 
				+                        matched_no += 1
			
 
				+                        if matched_no == 1:
			
 
				+                            # the first matched ocr be replaced by the first matched layout box
			
 
				+                            overall_ocr_res["dt_polys"][
			
 
				+                                overall_ocr_idx
			
 
				+                            ] = crop_img_dt_poly
			
 
				+                            overall_ocr_res["rec_boxes"][overall_ocr_idx] = crop_box
			
 
				+                            overall_ocr_res["rec_polys"][
			
 
				+                                overall_ocr_idx
			
 
				+                            ] = crop_img_dt_poly
			
 
				+                            overall_ocr_res["rec_scores"][
			
 
				+                                overall_ocr_idx
			
 
				+                            ] = crop_img_rec_score
			
 
				+                            overall_ocr_res["rec_texts"][
			
 
				+                                overall_ocr_idx
			
 
				+                            ] = crop_img_rec_text
			
 
				+                        else:
			
 
				+                            # the other matched ocr be appended to the overall ocr result
			
 
				+                            overall_ocr_res["dt_polys"].append(crop_img_dt_poly)
			
 
				+                            overall_ocr_res["rec_boxes"] = np.vstack(
			
 
				+                                (overall_ocr_res["rec_boxes"], crop_box)
			
 
				+                            )
			
 
				+                            overall_ocr_res["rec_polys"].append(crop_img_dt_poly)
			
 
				+                            overall_ocr_res["rec_scores"].append(crop_img_rec_score)
			
 
				+                            overall_ocr_res["rec_texts"].append(crop_img_rec_text)
			
 
				+                            overall_ocr_res["rec_labels"].append("text")
			
 
				+                            layout_to_ocr_mapping[box_idx].remove(overall_ocr_idx)
			
 
				+                            layout_to_ocr_mapping[box_idx].append(
			
 
				+                                len(overall_ocr_res["rec_texts"]) - 1
			
 
				+                            )
			
 
				+
			
 
				+        layout_order_config["all_layout_region_box"] = region_box
			
 
				+        layout_order_config["layout_to_ocr_mapping"] = layout_to_ocr_mapping
			
 
				+        layout_order_config["matched_ocr_dict"] = matched_ocr_dict
			
 
				+
			
 
				+        return layout_order_config, layout_det_res
			
 
				+
			
 
				+    def sort_line_by_x_projection(
			
 
				+        self,
			
 
				+        line: List[List[Union[List[int], str]]],
			
 
				+        input_img: np.ndarray,
			
 
				+        text_rec_model: Any,
			
 
				+        text_rec_score_thresh: Union[float, None] = None,
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        Sort a line of text spans based on their vertical position within the layout bounding box.
			
 
				 
			
 
				-                if sub_ocr_res["rec_boxes"].size > 0:
			
 
				-                    sub_ocr_res["rec_labels"] = ["text"] * len(sub_ocr_res["rec_texts"])
			
 
				+        Args:
			
 
				+            line (list): A list of spans, where each span is a list containing a bounding box and text.
			
 
				+            input_img (ndarray): The input image used for OCR.
			
 
				+            general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
			
 
				 
			
 
				-                    overall_ocr_res["dt_polys"].extend(sub_ocr_res["dt_polys"])
			
 
				-                    overall_ocr_res["rec_texts"].extend(sub_ocr_res["rec_texts"])
			
 
				-                    overall_ocr_res["rec_boxes"] = np.concatenate(
			
 
				-                        [overall_ocr_res["rec_boxes"], sub_ocr_res["rec_boxes"]], axis=0
			
 
				+        Returns:
			
 
				+            list: The sorted line of text spans.
			
 
				+        """
			
 
				+        splited_boxes = split_boxes_if_x_contained(line)
			
 
				+        splited_lines = []
			
 
				+        if len(line) != len(splited_boxes):
			
 
				+            splited_boxes.sort(key=lambda span: span[0][0])
			
 
				+            for span in splited_boxes:
			
 
				+                if span[2] == "text":
			
 
				+                    crop_img = input_img[
			
 
				+                        int(span[0][1]) : int(span[0][3]),
			
 
				+                        int(span[0][0]) : int(span[0][2]),
			
 
				+                    ]
			
 
				+                    crop_img_rec_res = next(text_rec_model([crop_img]))
			
 
				+                    crop_img_rec_score = crop_img_rec_res["rec_score"]
			
 
				+                    crop_img_rec_text = crop_img_rec_res["rec_text"]
			
 
				+                    span[1] = (
			
 
				+                        crop_img_rec_text
			
 
				+                        if crop_img_rec_score >= text_rec_score_thresh
			
 
				+                        else ""
			
 
				                     )
			
 
				-                    overall_ocr_res["rec_polys"].extend(sub_ocr_res["rec_polys"])
			
 
				-                    overall_ocr_res["rec_scores"].extend(sub_ocr_res["rec_scores"])
			
 
				-                    overall_ocr_res["rec_labels"].extend(sub_ocr_res["rec_labels"])
			
 
				-
			
 
				-        for formula_res in formula_res_list:
			
 
				-            x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
			
 
				-            poly_points = [
			
 
				-                (x_min, y_min),
			
 
				-                (x_max, y_min),
			
 
				-                (x_max, y_max),
			
 
				-                (x_min, y_max),
			
 
				+
			
 
				+                splited_lines.append(span)
			
 
				+        else:
			
 
				+            splited_lines = line
			
 
				+
			
 
				+        return splited_lines
			
 
				+
			
 
				+    def get_block_rec_content(
			
 
				+        self,
			
 
				+        image: list,
			
 
				+        layout_order_config: dict,
			
 
				+        ocr_rec_res: dict,
			
 
				+        block: LayoutParsingBlock,
			
 
				+        text_rec_model: Any,
			
 
				+        text_rec_score_thresh: Union[float, None] = None,
			
 
				+    ) -> str:
			
 
				+
			
 
				+        text_delimiter_map = {
			
 
				+            "content": "\n",
			
 
				+        }
			
 
				+        line_delimiter_map = {
			
 
				+            "doc_title": " ",
			
 
				+            "content": "\n",
			
 
				+        }
			
 
				+        if len(ocr_rec_res["rec_texts"]) == 0:
			
 
				+            block.content = ""
			
 
				+            return block
			
 
				+
			
 
				+        label = block.label
			
 
				+        if label == "reference":
			
 
				+            rec_boxes = ocr_rec_res["boxes"]
			
 
				+            block_left_coordinate = min([box[0] for box in rec_boxes])
			
 
				+            block_right_coordinate = max([box[2] for box in rec_boxes])
			
 
				+            first_line_span_limit = (5,)
			
 
				+            last_line_span_limit = (20,)
			
 
				+        else:
			
 
				+            block_left_coordinate, _, block_right_coordinate, _ = block.bbox
			
 
				+            first_line_span_limit = (10,)
			
 
				+            last_line_span_limit = (10,)
			
 
				+
			
 
				+        if label == "formula":
			
 
				+            ocr_rec_res["rec_texts"] = [
			
 
				+                rec_res_text.replace("$", "")
			
 
				+                for rec_res_text in ocr_rec_res["rec_texts"]
			
 
				             ]
			
 
				-            overall_ocr_res["dt_polys"].append(poly_points)
			
 
				-            overall_ocr_res["rec_texts"].append(f"${formula_res['rec_formula']}$")
			
 
				-            overall_ocr_res["rec_boxes"] = np.vstack(
			
 
				-                (overall_ocr_res["rec_boxes"], [formula_res["dt_polys"]])
			
 
				+        lines = group_boxes_into_lines(
			
 
				+            ocr_rec_res,
			
 
				+            block,
			
 
				+            layout_order_config.get("line_height_iou_threshold", 0.4),
			
 
				+        )
			
 
				+
			
 
				+        block.num_of_lines = len(lines)
			
 
				+
			
 
				+        # format line
			
 
				+        new_lines = []
			
 
				+        horizontal_text_line_num = 0
			
 
				+        for line in lines:
			
 
				+            line.sort(key=lambda span: span[0][0])
			
 
				+
			
 
				+            # merge formula and text
			
 
				+            ocr_labels = [span[2] for span in line]
			
 
				+            if "formula" in ocr_labels:
			
 
				+                line = self.sort_line_by_x_projection(
			
 
				+                    line, image, text_rec_model, text_rec_score_thresh
			
 
				+                )
			
 
				+
			
 
				+            text_orientation = calculate_text_orientation([span[0] for span in line])
			
 
				+            horizontal_text_line_num += 1 if text_orientation == "horizontal" else 0
			
 
				+
			
 
				+            line_text = format_line(
			
 
				+                line,
			
 
				+                block_left_coordinate,
			
 
				+                block_right_coordinate,
			
 
				+                first_line_span_limit=first_line_span_limit,
			
 
				+                last_line_span_limit=last_line_span_limit,
			
 
				+                block_label=block.label,
			
 
				+                delimiter_map=text_delimiter_map,
			
 
				             )
			
 
				-            overall_ocr_res["rec_labels"].append("formula")
			
 
				-            overall_ocr_res["rec_polys"].append(poly_points)
			
 
				-            overall_ocr_res["rec_scores"].append(1)
			
 
				+            new_lines.append(line_text)
			
 
				+
			
 
				+        delim = line_delimiter_map.get(label, "")
			
 
				+        content = delim.join(new_lines)
			
 
				+        block.content = content
			
 
				+        block.direction = (
			
 
				+            "horizontal"
			
 
				+            if horizontal_text_line_num > len(new_lines) * 0.5
			
 
				+            else "vertical"
			
 
				+        )
			
 
				 
			
 
				-        parsing_res_list = get_single_block_parsing_res(
			
 
				-            self.general_ocr_pipeline,
			
 
				+        return block
			
 
				+
			
 
				+    def get_layout_parsing_blocks(
			
 
				+        self,
			
 
				+        image: list,
			
 
				+        layout_order_config: dict,
			
 
				+        overall_ocr_res: OCRResult,
			
 
				+        layout_det_res: DetResult,
			
 
				+        table_res_list: list,
			
 
				+        seal_res_list: list,
			
 
				+        text_rec_model: Any,
			
 
				+        text_rec_score_thresh: Union[float, None] = None,
			
 
				+    ) -> list:
			
 
				+        """
			
 
				+        Extract structured information from OCR and layout detection results.
			
 
				+
			
 
				+        Args:
			
 
				+            image (list): The input image.
			
 
				+            overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
			
 
				+                - "input_img": The image on which OCR was performed.
			
 
				+                - "dt_boxes": A list of detected text box coordinates.
			
 
				+                - "rec_texts": A list of recognized text corresponding to the detected boxes.
			
 
				+
			
 
				+            layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
			
 
				+                - "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.
			
 
				+
			
 
				+            table_res_list (list): A list of table detection results, where each item is a dictionary containing:
			
 
				+                - "block_bbox": The bounding box of the table layout.
			
 
				+                - "pred_html": The predicted HTML representation of the table.
			
 
				+
			
 
				+            seal_res_list (List): A list of seal detection results. The details of each item depend on the specific application context.
			
 
				+            text_rec_model (Any): A model for text recognition.
			
 
				+            text_rec_score_thresh (Union[float, None]): The minimum score required for a recognized character to be considered valid. If None, use the default value specified during initialization. Default is None.
			
 
				+
			
 
				+        Returns:
			
 
				+            list: A list of structured boxes where each item is a dictionary containing:
			
 
				+                - "block_label": The label of the content (e.g., 'table', 'chart', 'image').
			
 
				+                - The label as a key with either table HTML or image data and text.
			
 
				+                - "block_bbox": The coordinates of the layout box.
			
 
				+        """
			
 
				+
			
 
				+        table_index = 0
			
 
				+        seal_index = 0
			
 
				+        layout_parsing_blocks: List[LayoutParsingBlock] = []
			
 
				+
			
 
				+        for box_idx, box_info in enumerate(layout_det_res["boxes"]):
			
 
				+
			
 
				+            label = box_info["label"]
			
 
				+            block_bbox = box_info["coordinate"]
			
 
				+            rec_res = {"boxes": [], "rec_texts": [], "rec_labels": []}
			
 
				+
			
 
				+            block = LayoutParsingBlock(label=label, bbox=block_bbox)
			
 
				+
			
 
				+            if label == "table" and len(table_res_list) > 0:
			
 
				+                block.content = table_res_list[table_index]["pred_html"]
			
 
				+                table_index += 1
			
 
				+            elif label == "seal" and len(seal_res_list) > 0:
			
 
				+                block.content = seal_res_list[seal_index]["rec_texts"]
			
 
				+                seal_index += 1
			
 
				+            else:
			
 
				+                if label == "formula":
			
 
				+                    _, ocr_idx_list = get_sub_regions_ocr_res(
			
 
				+                        overall_ocr_res, [block_bbox], return_match_idx=True
			
 
				+                    )
			
 
				+                    layout_order_config["layout_to_ocr_mapping"][box_idx] = ocr_idx_list
			
 
				+                else:
			
 
				+                    ocr_idx_list = layout_order_config["layout_to_ocr_mapping"].get(
			
 
				+                        box_idx, []
			
 
				+                    )
			
 
				+                for box_no in ocr_idx_list:
			
 
				+                    rec_res["boxes"].append(overall_ocr_res["rec_boxes"][box_no])
			
 
				+                    rec_res["rec_texts"].append(
			
 
				+                        overall_ocr_res["rec_texts"][box_no],
			
 
				+                    )
			
 
				+                    rec_res["rec_labels"].append(
			
 
				+                        overall_ocr_res["rec_labels"][box_no],
			
 
				+                    )
			
 
				+                block = self.get_block_rec_content(
			
 
				+                    image=image,
			
 
				+                    block=block,
			
 
				+                    layout_order_config=layout_order_config,
			
 
				+                    ocr_rec_res=rec_res,
			
 
				+                    text_rec_model=text_rec_model,
			
 
				+                    text_rec_score_thresh=text_rec_score_thresh,
			
 
				+                )
			
 
				+
			
 
				+            if label in ["chart", "image"]:
			
 
				+                x_min, y_min, x_max, y_max = list(map(int, block_bbox))
			
 
				+                img_path = f"imgs/img_in_table_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
			
 
				+                img = Image.fromarray(image[y_min:y_max, x_min:x_max, ::-1])
			
 
				+                block.image = {img_path: img}
			
 
				+
			
 
				+            layout_parsing_blocks.append(block)
			
 
				+
			
 
				+        # when there is no layout detection result but there is ocr result, use ocr result
			
 
				+        if len(layout_det_res["boxes"]) == 0:
			
 
				+            region_box = [65535, 65535, 0, 0]
			
 
				+            for ocr_idx, (ocr_rec_box, ocr_rec_text) in enumerate(
			
 
				+                zip(overall_ocr_res["rec_boxes"], overall_ocr_res["rec_texts"])
			
 
				+            ):
			
 
				+                update_layout_order_config_block_index(
			
 
				+                    layout_order_config, "text", ocr_idx
			
 
				+                )
			
 
				+                region_box = update_region_box(ocr_rec_box, region_box)
			
 
				+                layout_parsing_blocks.append(
			
 
				+                    LayoutParsingBlock(
			
 
				+                        label="text", bbox=ocr_rec_box, content=ocr_rec_text
			
 
				+                    )
			
 
				+                )
			
 
				+            layout_order_config["all_layout_region_box"] = region_box
			
 
				+
			
 
				+        return layout_parsing_blocks, layout_order_config
			
 
				+
			
 
				+    def get_layout_parsing_res(
			
 
				+        self,
			
 
				+        image: list,
			
 
				+        layout_det_res: DetResult,
			
 
				+        overall_ocr_res: OCRResult,
			
 
				+        table_res_list: list,
			
 
				+        seal_res_list: list,
			
 
				+        formula_res_list: list,
			
 
				+        text_rec_score_thresh: Union[float, None] = None,
			
 
				+    ) -> list:
			
 
				+        """
			
 
				+        Retrieves the layout parsing result based on the layout detection result, OCR result, and other recognition results.
			
 
				+        Args:
			
 
				+            image (list): The input image.
			
 
				+            layout_det_res (DetResult): The detection result containing the layout information of the document.
			
 
				+            overall_ocr_res (OCRResult): The overall OCR result containing text information.
			
 
				+            table_res_list (list): A list of table recognition results.
			
 
				+            seal_res_list (list): A list of seal recognition results.
			
 
				+            formula_res_list (list): A list of formula recognition results.
			
 
				+            text_rec_score_thresh (Optional[float], optional): The score threshold for text recognition. Defaults to None.
			
 
				+        Returns:
			
 
				+            list: A list of dictionaries representing the layout parsing result.
			
 
				+        """
			
 
				+        from .setting import layout_order_config
			
 
				+
			
 
				+        # Standardize data
			
 
				+        layout_order_config, layout_det_res = self.standardized_data(
			
 
				+            image=image,
			
 
				+            layout_order_config=copy.deepcopy(layout_order_config),
			
 
				+            layout_det_res=layout_det_res,
			
 
				+            overall_ocr_res=overall_ocr_res,
			
 
				+            formula_res_list=formula_res_list,
			
 
				+            text_rec_model=self.general_ocr_pipeline.text_rec_model,
			
 
				+            text_rec_score_thresh=text_rec_score_thresh,
			
 
				+        )
			
 
				+
			
 
				+        # Format layout parsing block
			
 
				+        parsing_res_list, layout_order_config = self.get_layout_parsing_blocks(
			
 
				+            image=image,
			
 
				+            layout_order_config=layout_order_config,
			
 
				             overall_ocr_res=overall_ocr_res,
			
 
				             layout_det_res=layout_det_res,
			
 
				             table_res_list=table_res_list,
			
 
				             seal_res_list=seal_res_list,
			
 
				+            text_rec_model=self.general_ocr_pipeline.text_rec_model,
			
 
				+            text_rec_score_thresh=self.general_ocr_pipeline.text_rec_score_thresh,
			
 
				+        )
			
 
				+
			
 
				+        parsing_res_list = xycut_enhanced(
			
 
				+            parsing_res_list,
			
 
				+            layout_order_config,
			
 
				         )
			
 
				 
			
 
				         return parsing_res_list
			
@@ -663,12 +1007,6 @@ class LayoutParsingPipelineV2(BasePipeline):
 
				                 table_res_list=table_res_list,
			
 
				                 seal_res_list=seal_res_list,
			
 
				                 formula_res_list=formula_res_list,
			
 
				-                imgs_in_doc=imgs_in_doc,
			
 
				-                text_det_limit_side_len=text_det_limit_side_len,
			
 
				-                text_det_limit_type=text_det_limit_type,
			
 
				-                text_det_thresh=text_det_thresh,
			
 
				-                text_det_box_thresh=text_det_box_thresh,
			
 
				-                text_det_unclip_ratio=text_det_unclip_ratio,
			
 
				                 text_rec_score_thresh=text_rec_score_thresh,
			
 
				             )
			
 
				 
			
--- a/paddlex/inference/pipelines/layout_parsing/result_v2.py
+++ b/paddlex/inference/pipelines/layout_parsing/result_v2.py
@@ -16,6 +16,7 @@ from __future__ import annotations
 
				 import copy
			
 
				 import re
			
 
				 from pathlib import Path
			
 
				+from typing import List
			
 
				 
			
 
				 import numpy as np
			
 
				 from PIL import Image, ImageDraw
			
@@ -27,7 +28,6 @@ from ...common.result import (
 
				     MarkdownMixin,
			
 
				     XlsxMixin,
			
 
				 )
			
 
				-from .utils import get_show_color
			
 
				 
			
 
				 
			
 
				 class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
			
@@ -64,6 +64,8 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				             return fn
			
 
				 
			
 
				     def _to_img(self) -> dict[str, np.ndarray]:
			
 
				+        from .utils import get_show_color
			
 
				+
			
 
				         res_img_dict = {}
			
 
				         model_settings = self["model_settings"]
			
 
				         if model_settings["use_doc_preprocessor"]:
			
@@ -101,11 +103,11 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				         # for layout ordering image
			
 
				         image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
			
 
				         draw = ImageDraw.Draw(image, "RGBA")
			
 
				-        parsing_result = self["parsing_res_list"]
			
 
				+        parsing_result: List[LayoutParsingBlock] = self["parsing_res_list"]
			
 
				         for block in parsing_result:
			
 
				-            bbox = block["block_bbox"]
			
 
				-            index = block.get("index", None)
			
 
				-            label = block["sub_label"]
			
 
				+            bbox = block.bbox
			
 
				+            index = block.index
			
 
				+            label = block.label
			
 
				             fill_color = get_show_color(label)
			
 
				             draw.rectangle(bbox, fill=fill_color)
			
 
				             if index is not None:
			
@@ -176,9 +178,9 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				         parsing_res_list = self["parsing_res_list"]
			
 
				         parsing_res_list = [
			
 
				             {
			
 
				-                "block_label": parsing_res["block_label"],
			
 
				-                "block_content": parsing_res["block_content"],
			
 
				-                "block_bbox": parsing_res["block_bbox"],
			
 
				+                "block_label": parsing_res.label,
			
 
				+                "block_content": parsing_res.content,
			
 
				+                "block_bbox": parsing_res.bbox,
			
 
				             }
			
 
				             for parsing_res in parsing_res_list
			
 
				         ]
			
@@ -281,18 +283,18 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				                     " ",
			
 
				                 )
			
 
				 
			
 
				-            def format_centered_text(key):
			
 
				+            def format_centered_text():
			
 
				                 return (
			
 
				-                    f'<div style="text-align: center;">{block[key]}</div>'.replace(
			
 
				+                    f'<div style="text-align: center;">{block.content}</div>'.replace(
			
 
				                         "-\n",
			
 
				                         "",
			
 
				                     ).replace("\n", " ")
			
 
				                     + "\n"
			
 
				                 )
			
 
				 
			
 
				-            def format_image(label):
			
 
				+            def format_image():
			
 
				                 img_tags = []
			
 
				-                image_path = "".join(block[label].keys())
			
 
				+                image_path = "".join(block.image.keys())
			
 
				                 img_tags.append(
			
 
				                     '<div style="text-align: center;"><img src="{}" alt="Image" /></div>'.format(
			
 
				                         image_path.replace("-\n", "").replace("\n", " "),
			
@@ -301,7 +303,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				                 return "\n".join(img_tags)
			
 
				 
			
 
				             def format_first_line(templates, format_func, spliter):
			
 
				-                lines = block["block_content"].split(spliter)
			
 
				+                lines = block.content.split(spliter)
			
 
				                 for idx in range(len(lines)):
			
 
				                     line = lines[idx]
			
 
				                     if line.strip() == "":
			
@@ -312,23 +314,23 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				                 return spliter.join(lines)
			
 
				 
			
 
				             def format_table():
			
 
				-                return "\n" + block["block_content"]
			
 
				+                return "\n" + block.content
			
 
				 
			
 
				-            def get_seg_flag(block, prev_block):
			
 
				+            def get_seg_flag(block: LayoutParsingBlock, prev_block: LayoutParsingBlock):
			
 
				 
			
 
				                 seg_start_flag = True
			
 
				                 seg_end_flag = True
			
 
				 
			
 
				-                block_box = block["block_bbox"]
			
 
				+                block_box = block.bbox
			
 
				                 context_left_coordinate = block_box[0]
			
 
				                 context_right_coordinate = block_box[2]
			
 
				-                seg_start_coordinate = block.get("seg_start_coordinate")
			
 
				-                seg_end_coordinate = block.get("seg_end_coordinate")
			
 
				+                seg_start_coordinate = block.seg_start_coordinate
			
 
				+                seg_end_coordinate = block.seg_end_coordinate
			
 
				 
			
 
				                 if prev_block is not None:
			
 
				-                    prev_block_bbox = prev_block["block_bbox"]
			
 
				-                    num_of_prev_lines = prev_block.get("num_of_lines")
			
 
				-                    pre_block_seg_end_coordinate = prev_block.get("seg_end_coordinate")
			
 
				+                    prev_block_bbox = prev_block.bbox
			
 
				+                    num_of_prev_lines = prev_block.num_of_lines
			
 
				+                    pre_block_seg_end_coordinate = prev_block.seg_end_coordinate
			
 
				                     prev_end_space_small = (
			
 
				                         context_right_coordinate - pre_block_seg_end_coordinate < 10
			
 
				                     )
			
@@ -368,32 +370,30 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				                 return seg_start_flag, seg_end_flag
			
 
				 
			
 
				             handlers = {
			
 
				-                "paragraph_title": lambda: format_title(block["block_content"]),
			
 
				-                "doc_title": lambda: f"# {block['block_content']}".replace(
			
 
				+                "paragraph_title": lambda: format_title(block.content),
			
 
				+                "doc_title": lambda: f"# {block.content}".replace(
			
 
				                     "-\n",
			
 
				                     "",
			
 
				                 ).replace("\n", " "),
			
 
				-                "table_title": lambda: format_centered_text("block_content"),
			
 
				-                "figure_title": lambda: format_centered_text("block_content"),
			
 
				-                "chart_title": lambda: format_centered_text("block_content"),
			
 
				-                "text": lambda: block["block_content"]
			
 
				-                .replace("-\n", " ")
			
 
				-                .replace("\n", " "),
			
 
				+                "table_title": lambda: format_centered_text(),
			
 
				+                "figure_title": lambda: format_centered_text(),
			
 
				+                "chart_title": lambda: format_centered_text(),
			
 
				+                "text": lambda: block.content.replace("-\n", " ").replace("\n", " "),
			
 
				                 "abstract": lambda: format_first_line(
			
 
				                     ["摘要", "abstract"], lambda l: f"## {l}\n", " "
			
 
				                 ),
			
 
				-                "content": lambda: block["block_content"]
			
 
				-                .replace("-\n", "  \n")
			
 
				-                .replace("\n", "  \n"),
			
 
				-                "image": lambda: format_image("block_image"),
			
 
				-                "chart": lambda: format_image("block_image"),
			
 
				-                "formula": lambda: f"$${block['block_content']}$$",
			
 
				+                "content": lambda: block.content.replace("-\n", "  \n").replace(
			
 
				+                    "\n", "  \n"
			
 
				+                ),
			
 
				+                "image": lambda: format_image(),
			
 
				+                "chart": lambda: format_image(),
			
 
				+                "formula": lambda: f"$${block.content}$$",
			
 
				                 "table": format_table,
			
 
				                 "reference": lambda: format_first_line(
			
 
				                     ["参考文献", "references"], lambda l: f"## {l}", "\n"
			
 
				                 ),
			
 
				-                "algorithm": lambda: block["block_content"].strip("\n"),
			
 
				-                "seal": lambda: f"Words of Seals:\n{block['block_content']}",
			
 
				+                "algorithm": lambda: block.content.strip("\n"),
			
 
				+                "seal": lambda: f"Words of Seals:\n{block.content}",
			
 
				             }
			
 
				             parsing_res_list = obj["parsing_res_list"]
			
 
				             markdown_content = ""
			
@@ -403,14 +403,10 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				             prev_block = None
			
 
				             page_first_element_seg_start_flag = None
			
 
				             page_last_element_seg_end_flag = None
			
 
				-            parsing_res_list = sorted(
			
 
				-                parsing_res_list,
			
 
				-                key=lambda x: x.get("sub_index", 999),
			
 
				-            )
			
 
				             for block in parsing_res_list:
			
 
				                 seg_start_flag, seg_end_flag = get_seg_flag(block, prev_block)
			
 
				 
			
 
				-                label = block.get("block_label")
			
 
				+                label = block.label
			
 
				                 page_first_element_seg_start_flag = (
			
 
				                     seg_start_flag
			
 
				                     if (page_first_element_seg_start_flag is None)
			
@@ -465,3 +461,100 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
 
				             markdown_info["markdown_images"][img["path"]] = img["img"]
			
 
				 
			
 
				         return markdown_info
			
 
				+
			
 
				+
			
 
				+class LayoutParsingBlock:
			
 
				+
			
 
				+    def __init__(self, label, bbox, content="") -> None:
			
 
				+        self.label = label
			
 
				+        self.region_label = "other"
			
 
				+        self.bbox = [int(item) for item in bbox]
			
 
				+        self.content = content
			
 
				+        self.seg_start_coordinate = float("inf")
			
 
				+        self.seg_end_coordinate = float("-inf")
			
 
				+        self.width = bbox[2] - bbox[0]
			
 
				+        self.height = bbox[3] - bbox[1]
			
 
				+        self.area = self.width * self.height
			
 
				+        self.num_of_lines = 1
			
 
				+        self.image = None
			
 
				+        self.index = None
			
 
				+        self.visual_index = None
			
 
				+        self.direction = self.get_bbox_direction()
			
 
				+        self.child_blocks = []
			
 
				+        self.update_direction_info()
			
 
				+
			
 
				+    def __str__(self) -> str:
			
 
				+        return f"{self.__dict__}"
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        _str = f"\n\n#################\nlabel:\t{self.label}\nregion_label:\t{self.region_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
			
 
				+        return _str
			
 
				+
			
 
				+    def to_dict(self) -> dict:
			
 
				+        return self.__dict__
			
 
				+
			
 
				+    def update_direction_info(self) -> None:
			
 
				+        if self.region_label == "vision":
			
 
				+            self.direction = "horizontal"
			
 
				+        if self.direction == "horizontal":
			
 
				+            self.secondary_direction = "vertical"
			
 
				+            self.short_side_length = self.height
			
 
				+            self.long_side_length = self.width
			
 
				+            self.start_coordinate = self.bbox[0]
			
 
				+            self.end_coordinate = self.bbox[2]
			
 
				+            self.secondary_direction_start_coordinate = self.bbox[1]
			
 
				+            self.secondary_direction_end_coordinate = self.bbox[3]
			
 
				+        else:
			
 
				+            self.secondary_direction = "horizontal"
			
 
				+            self.short_side_length = self.width
			
 
				+            self.long_side_length = self.height
			
 
				+            self.start_coordinate = self.bbox[1]
			
 
				+            self.end_coordinate = self.bbox[3]
			
 
				+            self.secondary_direction_start_coordinate = self.bbox[0]
			
 
				+            self.secondary_direction_end_coordinate = self.bbox[2]
			
 
				+
			
 
				+    def append_child_block(self, child_block: LayoutParsingBlock) -> None:
			
 
				+        if not self.child_blocks:
			
 
				+            self.ori_bbox = self.bbox.copy()
			
 
				+        x1, y1, x2, y2 = self.bbox
			
 
				+        x1_child, y1_child, x2_child, y2_child = child_block.bbox
			
 
				+        union_bbox = (
			
 
				+            min(x1, x1_child),
			
 
				+            min(y1, y1_child),
			
 
				+            max(x2, x2_child),
			
 
				+            max(y2, y2_child),
			
 
				+        )
			
 
				+        self.bbox = union_bbox
			
 
				+        self.update_direction_info()
			
 
				+        child_blocks = [child_block]
			
 
				+        if child_block.child_blocks:
			
 
				+            child_blocks.extend(child_block.get_child_blocks())
			
 
				+        self.child_blocks.extend(child_blocks)
			
 
				+
			
 
				+    def get_child_blocks(self) -> list:
			
 
				+        self.bbox = self.ori_bbox
			
 
				+        child_blocks = self.child_blocks.copy()
			
 
				+        self.child_blocks = []
			
 
				+        return child_blocks
			
 
				+
			
 
				+    def get_centroid(self) -> tuple:
			
 
				+        x1, y1, x2, y2 = self.bbox
			
 
				+        centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
			
 
				+        return centroid
			
 
				+
			
 
				+    def get_bbox_direction(self, orientation_ratio: float = 1.0) -> bool:
			
 
				+        """
			
 
				+        Determine if a bounding box is horizontal or vertical.
			
 
				+
			
 
				+        Args:
			
 
				+            bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
			
 
				+            orientation_ratio (float): Ratio for determining orientation. Default is 1.0.
			
 
				+
			
 
				+        Returns:
			
 
				+            str: "horizontal" or "vertical".
			
 
				+        """
			
 
				+        return (
			
 
				+            "horizontal"
			
 
				+            if self.width * orientation_ratio >= self.height
			
 
				+            else "vertical"
			
 
				+        )
			
--- a/paddlex/inference/pipelines/layout_parsing/setting.py
+++ b/paddlex/inference/pipelines/layout_parsing/setting.py
@@ -0,0 +1,70 @@
 
				+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+layout_order_config = {
			
 
				+    # 人工配置项
			
 
				+    "line_height_iou_threshold": 0.4,  # For line segmentation of OCR results
			
 
				+    "title_area_max_block_threshold": 0.3,  # update paragraph_title -> doc_title
			
 
				+    "block_label_match_iou_threshold": 0.1,
			
 
				+    "block_title_match_iou_threshold": 0.1,
			
 
				+    "doc_title_labels": ["doc_title"],  # 文档标题
			
 
				+    "paragraph_title_labels": ["paragraph_title"],  # 段落标题
			
 
				+    "vision_labels": [
			
 
				+        "image",
			
 
				+        "table",
			
 
				+        "chart",
			
 
				+        "figure",
			
 
				+    ],  # 图、表、印章、图表、图
			
 
				+    "vision_title_labels": ["table_title", "chart_title", "figure_title"],  # 图表标题
			
 
				+    "unordered_labels": [
			
 
				+        "aside_text",
			
 
				+        "seal",
			
 
				+        "number",
			
 
				+        "formula_number",
			
 
				+    ],
			
 
				+    "text_labels": ["text"],
			
 
				+    "header_labels": ["header", "header_image"],
			
 
				+    "footer_labels": ["footer", "footer_image", "footnote"],
			
 
				+    "visualize_index_labels": [
			
 
				+        "text",
			
 
				+        "formula",
			
 
				+        "algorithm",
			
 
				+        "reference",
			
 
				+        "content",
			
 
				+        "abstract",
			
 
				+        "paragraph_title",
			
 
				+        "doc_title",
			
 
				+        "table_title",
			
 
				+        "chart_title",
			
 
				+        "figure_title",
			
 
				+        "image",
			
 
				+        "table",
			
 
				+        "chart",
			
 
				+        "figure",
			
 
				+    ],
			
 
				+    # 自动补全配置项
			
 
				+    "layout_to_ocr_mapping": {},
			
 
				+    "all_layout_region_box": [],  # 区域box
			
 
				+    "doc_title_block_idxes": [],
			
 
				+    "paragraph_title_block_idxes": [],
			
 
				+    "text_title_labels": [],  # doc_title_labels+paragraph_title_labels
			
 
				+    "text_title_block_idxes": [],
			
 
				+    "vision_block_idxes": [],
			
 
				+    "vision_title_block_idxes": [],
			
 
				+    "vision_footnote_block_idxes": [],
			
 
				+    "text_block_idxes": [],
			
 
				+    "header_block_idxes": [],
			
 
				+    "footer_block_idxes": [],
			
 
				+    "unordered_block_idxes": [],
			
 
				+}
			
--- a/paddlex/inference/pipelines/layout_parsing/utils.py
+++ b/paddlex/inference/pipelines/layout_parsing/utils.py
@@ -14,21 +14,21 @@
 
				 
			
 
				 __all__ = [
			
 
				     "get_sub_regions_ocr_res",
			
 
				-    "get_layout_ordering",
			
 
				-    "get_single_block_parsing_res",
			
 
				     "get_show_color",
			
 
				     "sorted_layout_boxes",
			
 
				+    "update_layout_order_config_block_index",
			
 
				 ]
			
 
				 
			
 
				 import re
			
 
				 from copy import deepcopy
			
 
				-from typing import Any, Dict, List, Optional, Tuple, Union
			
 
				+from typing import Dict, List, Optional, Tuple, Union
			
 
				 
			
 
				 import numpy as np
			
 
				 from PIL import Image
			
 
				 
			
 
				-from ...models.object_detection.result import DetResult
			
 
				+from ..components import convert_points_to_boxes
			
 
				 from ..ocr.result import OCRResult
			
 
				+from .xycut_enhanced import calculate_projection_iou
			
 
				 
			
 
				 
			
 
				 def get_overlap_boxes_idx(src_boxes: np.ndarray, ref_boxes: np.ndarray) -> List:
			
@@ -209,88 +209,107 @@ def _calculate_overlap_area_div_minbox_area_ratio(
 
				     return intersection_area / min_box_area
			
 
				 
			
 
				 
			
 
				-def _whether_y_overlap_exceeds_threshold(
			
 
				-    bbox1: Union[list, tuple],
			
 
				-    bbox2: Union[list, tuple],
			
 
				-    overlap_ratio_threshold: float = 0.6,
			
 
				-) -> bool:
			
 
				-    """
			
 
				-    Determines whether the vertical overlap between two bounding boxes exceeds a given threshold.
			
 
				+def group_boxes_into_lines(ocr_rec_res, block_info, line_height_iou_threshold):
			
 
				+    rec_boxes = ocr_rec_res["boxes"]
			
 
				+    rec_texts = ocr_rec_res["rec_texts"]
			
 
				+    rec_labels = ocr_rec_res["rec_labels"]
			
 
				 
			
 
				-    Args:
			
 
				-        bbox1 (list or tuple): The first bounding box defined as (left, top, right, bottom).
			
 
				-        bbox2 (list or tuple): The second bounding box defined as (left, top, right, bottom).
			
 
				-        overlap_ratio_threshold (float): The threshold ratio to determine if the overlap is significant.
			
 
				-                                         Defaults to 0.6.
			
 
				+    spans = list(zip(rec_boxes, rec_texts, rec_labels))
			
 
				 
			
 
				-    Returns:
			
 
				-        bool: True if the vertical overlap divided by the minimum height of the two bounding boxes
			
 
				-              exceeds the overlap_ratio_threshold, otherwise False.
			
 
				-    """
			
 
				-    _, y1_0, _, y1_1 = bbox1
			
 
				-    _, y2_0, _, y2_1 = bbox2
			
 
				+    spans.sort(key=lambda span: span[0][1])
			
 
				+    spans = [list(span) for span in spans]
			
 
				 
			
 
				-    overlap = max(0, min(y1_1, y2_1) - max(y1_0, y2_0))
			
 
				-    min_height = min(y1_1 - y1_0, y2_1 - y2_0)
			
 
				+    lines = []
			
 
				+    line = [spans[0]]
			
 
				+    line_region_box = spans[0][0][:]
			
 
				+    block_info.seg_start_coordinate = spans[0][0][0]
			
 
				+    block_info.seg_end_coordinate = spans[-1][0][2]
			
 
				 
			
 
				-    return (overlap / min_height) > overlap_ratio_threshold
			
 
				+    # merge line
			
 
				+    for span in spans[1:]:
			
 
				+        rec_bbox = span[0]
			
 
				+        if (
			
 
				+            calculate_projection_iou(line_region_box, rec_bbox, "vertical")
			
 
				+            >= line_height_iou_threshold
			
 
				+        ):
			
 
				+            line.append(span)
			
 
				+            line_region_box[1] = min(line_region_box[1], rec_bbox[1])
			
 
				+            line_region_box[3] = max(line_region_box[3], rec_bbox[3])
			
 
				+        else:
			
 
				+            lines.append(line)
			
 
				+            line = [span]
			
 
				+            line_region_box = rec_bbox[:]
			
 
				 
			
 
				+    lines.append(line)
			
 
				+    return lines
			
 
				 
			
 
				-def _adjust_span_text(span: List[str], prepend: bool = False, append: bool = False):
			
 
				+
			
 
				+def calculate_text_orientation(
			
 
				+    bboxes: List[List[int]], orientation_ratio: float = 1.5
			
 
				+) -> bool:
			
 
				     """
			
 
				-    Adjust the text of a span by prepending or appending a newline.
			
 
				+    Calculate the orientation of the text based on the bounding boxes.
			
 
				 
			
 
				     Args:
			
 
				-        span (list): A list where the second element is the text of the span.
			
 
				-        prepend (bool): If True, prepend a newline to the text.
			
 
				-        append (bool): If True, append a newline to the text.
			
 
				+        bboxes (list): A list of bounding boxes.
			
 
				+        orientation_ratio (float): Ratio for determining orientation. Default is 1.5.
			
 
				 
			
 
				     Returns:
			
 
				-        None: The function modifies the span in place.
			
 
				+        str: "horizontal" or "vertical".
			
 
				     """
			
 
				-    if prepend:
			
 
				-        span[1] = "\n" + span[1]
			
 
				-    if append:
			
 
				-        span[1] = span[1] + "\n"
			
 
				-    return span
			
 
				 
			
 
				+    bboxes = np.array(bboxes)
			
 
				+    x_min = np.min(bboxes[:, 0])
			
 
				+    x_max = np.max(bboxes[:, 2])
			
 
				+    width = x_max - x_min
			
 
				+    y_min = np.min(bboxes[:, 1])
			
 
				+    y_max = np.max(bboxes[:, 3])
			
 
				+    height = y_max - y_min
			
 
				+    return "horizontal" if width * orientation_ratio >= height else "vertical"
			
 
				 
			
 
				-def _format_line(
			
 
				+
			
 
				+def format_line(
			
 
				     line: List[List[Union[List[int], str]]],
			
 
				-    layout_min: int,
			
 
				-    layout_max: int,
			
 
				-    is_reference: bool = False,
			
 
				+    block_left_coordinate: int,
			
 
				+    block_right_coordinate: int,
			
 
				+    first_line_span_limit: int = 10,
			
 
				+    last_line_span_limit: int = 10,
			
 
				+    block_label: str = "text",
			
 
				+    delimiter_map: Dict = {},
			
 
				 ) -> None:
			
 
				     """
			
 
				     Format a line of text spans based on layout constraints.
			
 
				 
			
 
				     Args:
			
 
				         line (list): A list of spans, where each span is a list containing a bounding box and text.
			
 
				-        layout_min (int): The minimum x-coordinate of the layout bounding box.
			
 
				-        layout_max (int): The maximum x-coordinate of the layout bounding box.
			
 
				-        is_reference (bool): A flag indicating whether the line is a reference line, which affects formatting rules.
			
 
				-
			
 
				+        block_left_coordinate (int): The minimum x-coordinate of the layout bounding box.
			
 
				+        block_right_coordinate (int): The maximum x-coordinate of the layout bounding box.
			
 
				+        first_line_span_limit (int): The limit for the number of pixels before the first span that should be considered part of the first line. Default is 10.
			
 
				+        last_line_span_limit (int): The limit for the number of pixels after the last span that should be considered part of the last line. Default is 10.
			
 
				+        block_label (str): The label associated with the entire block. Default is 'text'.
			
 
				     Returns:
			
 
				         None: The function modifies the line in place.
			
 
				     """
			
 
				     first_span = line[0]
			
 
				-    end_span = line[-1]
			
 
				+    last_span = line[-1]
			
 
				 
			
 
				-    if not is_reference:
			
 
				-        if first_span[0][0] - layout_min > 10:
			
 
				-            first_span = _adjust_span_text(first_span, prepend=True)
			
 
				-        if layout_max - end_span[0][2] > 10:
			
 
				-            end_span = _adjust_span_text(end_span, append=True)
			
 
				-    else:
			
 
				-        if first_span[0][0] - layout_min < 5:
			
 
				-            first_span = _adjust_span_text(first_span, prepend=True)
			
 
				-        if layout_max - end_span[0][2] > 20:
			
 
				-            end_span = _adjust_span_text(end_span, append=True)
			
 
				+    if first_span[0][0] - block_left_coordinate > first_line_span_limit:
			
 
				+        first_span[1] = "\n" + first_span[1]
			
 
				+    if block_right_coordinate - last_span[0][2] > last_line_span_limit:
			
 
				+        last_span[1] = last_span[1] + "\n"
			
 
				 
			
 
				     line[0] = first_span
			
 
				-    line[-1] = end_span
			
 
				+    line[-1] = last_span
			
 
				+
			
 
				+    delim = delimiter_map.get(block_label, " ")
			
 
				+    line_text = delim.join([span[1] for span in line])
			
 
				+
			
 
				+    if block_label != "reference":
			
 
				+        line_text = remove_extra_space(line_text)
			
 
				 
			
 
				-    return line
			
 
				+    if line_text.endswith("-"):
			
 
				+        line_text = line_text[:-1]
			
 
				+    return line_text
			
 
				 
			
 
				 
			
 
				 def split_boxes_if_x_contained(boxes, offset=1e-5):
			
@@ -361,132 +380,7 @@ def split_boxes_if_x_contained(boxes, offset=1e-5):
 
				     return new_boxes
			
 
				 
			
 
				 
			
 
				-def _sort_line_by_x_projection(
			
 
				-    input_img: np.ndarray,
			
 
				-    general_ocr_pipeline: Any,
			
 
				-    line: List[List[Union[List[int], str]]],
			
 
				-) -> None:
			
 
				-    """
			
 
				-    Sort a line of text spans based on their vertical position within the layout bounding box.
			
 
				-
			
 
				-    Args:
			
 
				-        input_img (ndarray): The input image used for OCR.
			
 
				-        general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
			
 
				-        line (list): A list of spans, where each span is a list containing a bounding box and text.
			
 
				-
			
 
				-    Returns:
			
 
				-        list: The sorted line of text spans.
			
 
				-    """
			
 
				-    splited_boxes = split_boxes_if_x_contained(line)
			
 
				-    splited_lines = []
			
 
				-    if len(line) != len(splited_boxes):
			
 
				-        splited_boxes.sort(key=lambda span: span[0][0])
			
 
				-        text_rec_model = general_ocr_pipeline.text_rec_model
			
 
				-        for span in splited_boxes:
			
 
				-            if span[2] == "text":
			
 
				-                crop_img = input_img[
			
 
				-                    int(span[0][1]) : int(span[0][3]),
			
 
				-                    int(span[0][0]) : int(span[0][2]),
			
 
				-                ]
			
 
				-                span[1] = next(text_rec_model([crop_img]))["rec_text"]
			
 
				-            splited_lines.append(span)
			
 
				-    else:
			
 
				-        splited_lines = line
			
 
				-
			
 
				-    return splited_lines
			
 
				-
			
 
				-
			
 
				-def _sort_ocr_res_by_y_projection(
			
 
				-    input_img: np.ndarray,
			
 
				-    general_ocr_pipeline: Any,
			
 
				-    label: Any,
			
 
				-    block_bbox: Tuple[int, int, int, int],
			
 
				-    ocr_res: Dict[str, List[Any]],
			
 
				-    line_height_iou_threshold: float = 0.7,
			
 
				-) -> Dict[str, List[Any]]:
			
 
				-    """
			
 
				-    Sorts OCR results based on their spatial arrangement, grouping them into lines and blocks.
			
 
				-
			
 
				-    Args:
			
 
				-        input_img (ndarray): The input image used for OCR.
			
 
				-        general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
			
 
				-        label (Any): The label associated with the OCR results. It's not used in the function but might be
			
 
				-                     relevant for other parts of the calling context.
			
 
				-        block_bbox (Tuple[int, int, int, int]): A tuple representing the layout bounding box, defined as
			
 
				-                                                 (left, top, right, bottom).
			
 
				-        ocr_res (Dict[str, List[Any]]): A dictionary containing OCR results with the following keys:
			
 
				-            - "boxes": A list of bounding boxes, each defined as [left, top, right, bottom].
			
 
				-            - "rec_texts": A corresponding list of recognized text strings for each box.
			
 
				-        line_height_iou_threshold (float): The threshold for determining whether two boxes belong to
			
 
				-                                           the same line based on their vertical overlap. Defaults to 0.7.
			
 
				-
			
 
				-    Returns:
			
 
				-        Dict[str, List[Any]]: A dictionary with the same structure as `ocr_res`, but with boxes and texts sorted
			
 
				-                              and grouped into lines and blocks.
			
 
				-    """
			
 
				-    assert (
			
 
				-        ocr_res["boxes"] and ocr_res["rec_texts"]
			
 
				-    ), "OCR results must contain 'boxes' and 'rec_texts'"
			
 
				-
			
 
				-    boxes = ocr_res["boxes"]
			
 
				-    rec_texts = ocr_res["rec_texts"]
			
 
				-    rec_labels = ocr_res["rec_labels"]
			
 
				-
			
 
				-    x_min, _, x_max, _ = block_bbox
			
 
				-    inline_x_min = min([box[0] for box in boxes])
			
 
				-    inline_x_max = max([box[2] for box in boxes])
			
 
				-
			
 
				-    spans = list(zip(boxes, rec_texts, rec_labels))
			
 
				-
			
 
				-    spans.sort(key=lambda span: span[0][1])
			
 
				-    spans = [list(span) for span in spans]
			
 
				-
			
 
				-    lines = []
			
 
				-    current_line = [spans[0]]
			
 
				-    current_y0, current_y1 = spans[0][0][1], spans[0][0][3]
			
 
				-
			
 
				-    for span in spans[1:]:
			
 
				-        y0, y1 = span[0][1], span[0][3]
			
 
				-        if _whether_y_overlap_exceeds_threshold(
			
 
				-            (0, current_y0, 0, current_y1),
			
 
				-            (0, y0, 0, y1),
			
 
				-            line_height_iou_threshold,
			
 
				-        ):
			
 
				-            current_line.append(span)
			
 
				-            current_y0 = min(current_y0, y0)
			
 
				-            current_y1 = max(current_y1, y1)
			
 
				-        else:
			
 
				-            lines.append(current_line)
			
 
				-            current_line = [span]
			
 
				-            current_y0, current_y1 = y0, y1
			
 
				-
			
 
				-    if current_line:
			
 
				-        lines.append(current_line)
			
 
				-
			
 
				-    new_lines = []
			
 
				-    for line in lines:
			
 
				-        line.sort(key=lambda span: span[0][0])
			
 
				-
			
 
				-        ocr_labels = [span[2] for span in line]
			
 
				-        if "formula" in ocr_labels:
			
 
				-            line = _sort_line_by_x_projection(input_img, general_ocr_pipeline, line)
			
 
				-        if label == "reference":
			
 
				-            line = _format_line(line, inline_x_min, inline_x_max, is_reference=True)
			
 
				-        elif label != "content":
			
 
				-            line = _format_line(line, x_min, x_max)
			
 
				-        new_lines.append(line)
			
 
				-
			
 
				-    ocr_res["boxes"] = [span[0] for line in new_lines for span in line]
			
 
				-    if label == "content":
			
 
				-        ocr_res["rec_texts"] = [
			
 
				-            "".join(f"{span[1]} " for span in line).rstrip() for line in new_lines
			
 
				-        ]
			
 
				-    else:
			
 
				-        ocr_res["rec_texts"] = [span[1] + " " for line in new_lines for span in line]
			
 
				-    return ocr_res, len(new_lines)
			
 
				-
			
 
				-
			
 
				-def _process_text(input_text: str) -> str:
			
 
				+def remove_extra_space(input_text: str) -> str:
			
 
				     """
			
 
				     Process the input text to handle spaces.
			
 
				 
			
@@ -500,472 +394,22 @@ def _process_text(input_text: str) -> str:
 
				         str: The processed text with properly formatted spaces.
			
 
				     """
			
 
				 
			
 
				-    def handle_spaces_(text: str) -> str:
			
 
				-        """
			
 
				-        Handle spaces in the text by removing multiple consecutive spaces and inserting a single space
			
 
				-        between Chinese and non-Chinese characters.
			
 
				-
			
 
				-        Args:
			
 
				-            text (str): The text to handle spaces for.
			
 
				-
			
 
				-        Returns:
			
 
				-            str: The text with properly formatted spaces.
			
 
				-        """
			
 
				-        spaces = re.finditer(r"\s+", text)
			
 
				-        processed_text = list(text)
			
 
				-
			
 
				-        for space in reversed(list(spaces)):
			
 
				-            start, end = space.span()
			
 
				-            prev_char = processed_text[start - 1] if start > 0 else ""
			
 
				-            next_char = processed_text[end] if end < len(processed_text) else ""
			
 
				-
			
 
				-            is_prev_chinese = (
			
 
				-                re.match(r"[\u4e00-\u9fff]", prev_char) if prev_char else False
			
 
				-            )
			
 
				-            is_next_chinese = (
			
 
				-                re.match(r"[\u4e00-\u9fff]", next_char) if next_char else False
			
 
				-            )
			
 
				-
			
 
				-            if is_prev_chinese and is_next_chinese:
			
 
				-                processed_text[start:end] = []
			
 
				-            else:
			
 
				-                processed_text[start:end] = [" "]
			
 
				-
			
 
				-        return "".join(processed_text)
			
 
				-
			
 
				-    text_without_spaces = handle_spaces_(input_text)
			
 
				-
			
 
				-    final_text = re.sub(r"\s+", " ", text_without_spaces).strip()
			
 
				-    return final_text
			
 
				-
			
 
				-
			
 
				-def get_single_block_parsing_res(
			
 
				-    general_ocr_pipeline: Any,
			
 
				-    overall_ocr_res: OCRResult,
			
 
				-    layout_det_res: DetResult,
			
 
				-    table_res_list: list,
			
 
				-    seal_res_list: list,
			
 
				-) -> OCRResult:
			
 
				-    """
			
 
				-    Extract structured information from OCR and layout detection results.
			
 
				-
			
 
				-    Args:
			
 
				-        overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
			
 
				-            - "input_img": The image on which OCR was performed.
			
 
				-            - "dt_boxes": A list of detected text box coordinates.
			
 
				-            - "rec_texts": A list of recognized text corresponding to the detected boxes.
			
 
				-
			
 
				-        layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
			
 
				-            - "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.
			
 
				-
			
 
				-        table_res_list (list): A list of table detection results, where each item is a dictionary containing:
			
 
				-            - "block_bbox": The bounding box of the table layout.
			
 
				-            - "pred_html": The predicted HTML representation of the table.
			
 
				-
			
 
				-        seal_res_list (List): A list of seal detection results. The details of each item depend on the specific application context.
			
 
				-
			
 
				-    Returns:
			
 
				-        list: A list of structured boxes where each item is a dictionary containing:
			
 
				-            - "block_label": The label of the content (e.g., 'table', 'chart', 'image').
			
 
				-            - The label as a key with either table HTML or image data and text.
			
 
				-            - "block_bbox": The coordinates of the layout box.
			
 
				-    """
			
 
				-
			
 
				-    single_block_layout_parsing_res = []
			
 
				-    input_img = overall_ocr_res["doc_preprocessor_res"]["output_img"]
			
 
				-    seal_index = 0
			
 
				-    with_doc_title = False
			
 
				-    max_block_area = 0.0
			
 
				-    paragraph_title_indexs = []
			
 
				-
			
 
				-    layout_det_res_list, _ = _remove_overlap_blocks(
			
 
				-        deepcopy(layout_det_res["boxes"]),
			
 
				-        threshold=0.5,
			
 
				-        smaller=True,
			
 
				+    # Remove spaces between Chinese characters
			
 
				+    text_without_spaces = re.sub(
			
 
				+        r"(?<=[\u4e00-\u9fff])\s+(?=[\u4e00-\u9fff])", "", input_text
			
 
				     )
			
 
				 
			
 
				-    for box_idx, box_info in enumerate(layout_det_res_list):
			
 
				-        block_bbox = box_info["coordinate"]
			
 
				-        label = box_info["label"]
			
 
				-        rec_res = {"boxes": [], "rec_texts": [], "rec_labels": [], "flag": False}
			
 
				-        seg_start_coordinate = float("inf")
			
 
				-        seg_end_coordinate = float("-inf")
			
 
				-        num_of_lines = 1
			
 
				-
			
 
				-        if label == "doc_title":
			
 
				-            with_doc_title = True
			
 
				-        elif label == "paragraph_title":
			
 
				-            paragraph_title_indexs.append(box_idx)
			
 
				-
			
 
				-        block_area = (block_bbox[2] - block_bbox[0]) * (block_bbox[3] - block_bbox[1])
			
 
				-        max_block_area = max(max_block_area, block_area)
			
 
				-
			
 
				-        if label == "table":
			
 
				-            for table_res in table_res_list:
			
 
				-                if len(table_res["cell_box_list"]) == 0:
			
 
				-                    continue
			
 
				-                if (
			
 
				-                    _calculate_overlap_area_div_minbox_area_ratio(
			
 
				-                        block_bbox, table_res["cell_box_list"][0]
			
 
				-                    )
			
 
				-                    > 0.5
			
 
				-                ):
			
 
				-                    single_block_layout_parsing_res.append(
			
 
				-                        {
			
 
				-                            "block_label": label,
			
 
				-                            "block_content": table_res["pred_html"],
			
 
				-                            "block_bbox": block_bbox,
			
 
				-                        },
			
 
				-                    )
			
 
				-                    break
			
 
				-        elif label == "seal":
			
 
				-            if len(seal_res_list) > 0:
			
 
				-                single_block_layout_parsing_res.append(
			
 
				-                    {
			
 
				-                        "block_label": label,
			
 
				-                        "block_content": _process_text(
			
 
				-                            ", ".join(seal_res_list[seal_index]["rec_texts"])
			
 
				-                        ),
			
 
				-                        "block_bbox": block_bbox,
			
 
				-                    },
			
 
				-                )
			
 
				-                seal_index += 1
			
 
				-        else:
			
 
				-            overall_text_boxes = overall_ocr_res["rec_boxes"]
			
 
				-            for box_no in range(len(overall_text_boxes)):
			
 
				-                if (
			
 
				-                    _calculate_overlap_area_div_minbox_area_ratio(
			
 
				-                        block_bbox, overall_text_boxes[box_no]
			
 
				-                    )
			
 
				-                    > 0.5
			
 
				-                ):
			
 
				-                    rec_res["boxes"].append(overall_text_boxes[box_no])
			
 
				-                    rec_res["rec_texts"].append(
			
 
				-                        overall_ocr_res["rec_texts"][box_no],
			
 
				-                    )
			
 
				-                    rec_res["rec_labels"].append(
			
 
				-                        overall_ocr_res["rec_labels"][box_no],
			
 
				-                    )
			
 
				-                    rec_res["flag"] = True
			
 
				-
			
 
				-            if rec_res["flag"]:
			
 
				-                rec_res, num_of_lines = _sort_ocr_res_by_y_projection(
			
 
				-                    input_img, general_ocr_pipeline, label, block_bbox, rec_res, 0.7
			
 
				-                )
			
 
				-                seg_start_coordinate = rec_res["boxes"][0][0]
			
 
				-                seg_end_coordinate = rec_res["boxes"][-1][2]
			
 
				-                if label == "formula":
			
 
				-                    rec_res["rec_texts"] = [
			
 
				-                        rec_res_text.replace("$", "")
			
 
				-                        for rec_res_text in rec_res["rec_texts"]
			
 
				-                    ]
			
 
				-
			
 
				-            if label in ["chart", "image"]:
			
 
				-                x_min, y_min, x_max, y_max = list(map(int, block_bbox))
			
 
				-                img_path = f"imgs/img_in_table_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
			
 
				-                img = Image.fromarray(input_img[y_min:y_max, x_min:x_max, ::-1])
			
 
				-                single_block_layout_parsing_res.append(
			
 
				-                    {
			
 
				-                        "block_label": label,
			
 
				-                        "block_content": _process_text("".join(rec_res["rec_texts"])),
			
 
				-                        "block_image": {img_path: img},
			
 
				-                        "block_bbox": block_bbox,
			
 
				-                    },
			
 
				-                )
			
 
				-            else:
			
 
				-                if label in ["doc_title"]:
			
 
				-                    content = " ".join(rec_res["rec_texts"])
			
 
				-                elif label in ["content"]:
			
 
				-                    content = "\n".join(rec_res["rec_texts"])
			
 
				-                else:
			
 
				-                    content = "".join(rec_res["rec_texts"])
			
 
				-                    if label != "reference":
			
 
				-                        content = _process_text(content)
			
 
				-                single_block_layout_parsing_res.append(
			
 
				-                    {
			
 
				-                        "block_label": label,
			
 
				-                        "block_content": content,
			
 
				-                        "block_bbox": block_bbox,
			
 
				-                        "seg_start_coordinate": seg_start_coordinate,
			
 
				-                        "seg_end_coordinate": seg_end_coordinate,
			
 
				-                        "num_of_lines": num_of_lines,
			
 
				-                        "block_area": block_area,
			
 
				-                    },
			
 
				-                )
			
 
				-
			
 
				-    if (
			
 
				-        not with_doc_title
			
 
				-        and len(paragraph_title_indexs) == 1
			
 
				-        and single_block_layout_parsing_res[paragraph_title_indexs[0]].get(
			
 
				-            "block_area", 0
			
 
				-        )
			
 
				-        > max_block_area * 0.3
			
 
				-    ):
			
 
				-        single_block_layout_parsing_res[paragraph_title_indexs[0]][
			
 
				-            "block_label"
			
 
				-        ] = "doc_title"
			
 
				-
			
 
				-    if len(layout_det_res_list) == 0:
			
 
				-        for ocr_rec_box, ocr_rec_text in zip(
			
 
				-            overall_ocr_res["rec_boxes"], overall_ocr_res["rec_texts"]
			
 
				-        ):
			
 
				-            single_block_layout_parsing_res.append(
			
 
				-                {
			
 
				-                    "block_label": "text",
			
 
				-                    "block_content": ocr_rec_text,
			
 
				-                    "block_bbox": ocr_rec_box,
			
 
				-                    "seg_start_coordinate": ocr_rec_box[0],
			
 
				-                    "seg_end_coordinate": ocr_rec_box[2],
			
 
				-                },
			
 
				-            )
			
 
				-
			
 
				-    single_block_layout_parsing_res = get_layout_ordering(
			
 
				-        single_block_layout_parsing_res,
			
 
				-        no_mask_labels=[
			
 
				-            "text",
			
 
				-            "formula",
			
 
				-            "algorithm",
			
 
				-            "reference",
			
 
				-            "content",
			
 
				-            "abstract",
			
 
				-        ],
			
 
				+    # Ensure single space between Chinese and non-Chinese characters
			
 
				+    text_with_single_spaces = re.sub(
			
 
				+        r"(?<=[\u4e00-\u9fff])\s+(?=[^\u4e00-\u9fff])|(?<=[^\u4e00-\u9fff])\s+(?=[\u4e00-\u9fff])",
			
 
				+        " ",
			
 
				+        text_without_spaces,
			
 
				     )
			
 
				 
			
 
				-    return single_block_layout_parsing_res
			
 
				-
			
 
				-
			
 
				-def _projection_by_bboxes(boxes: np.ndarray, axis: int) -> np.ndarray:
			
 
				-    """
			
 
				-    Generate a 1D projection histogram from bounding boxes along a specified axis.
			
 
				-
			
 
				-    Args:
			
 
				-        boxes: A (N, 4) array of bounding boxes defined by [x_min, y_min, x_max, y_max].
			
 
				-        axis: Axis for projection; 0 for horizontal (x-axis), 1 for vertical (y-axis).
			
 
				-
			
 
				-    Returns:
			
 
				-        A 1D numpy array representing the projection histogram based on bounding box intervals.
			
 
				-    """
			
 
				-    assert axis in [0, 1]
			
 
				-    max_length = np.max(boxes[:, axis::2])
			
 
				-    projection = np.zeros(max_length, dtype=int)
			
 
				-
			
 
				-    # Increment projection histogram over the interval defined by each bounding box
			
 
				-    for start, end in boxes[:, axis::2]:
			
 
				-        projection[start:end] += 1
			
 
				-
			
 
				-    return projection
			
 
				-
			
 
				-
			
 
				-def _split_projection_profile(arr_values: np.ndarray, min_value: float, min_gap: float):
			
 
				-    """
			
 
				-    Split the projection profile into segments based on specified thresholds.
			
 
				-
			
 
				-    Args:
			
 
				-        arr_values: 1D array representing the projection profile.
			
 
				-        min_value: Minimum value threshold to consider a profile segment significant.
			
 
				-        min_gap: Minimum gap width to consider a separation between segments.
			
 
				-
			
 
				-    Returns:
			
 
				-        A tuple of start and end indices for each segment that meets the criteria.
			
 
				-    """
			
 
				-    # Identify indices where the projection exceeds the minimum value
			
 
				-    significant_indices = np.where(arr_values > min_value)[0]
			
 
				-    if not len(significant_indices):
			
 
				-        return
			
 
				-
			
 
				-    # Calculate gaps between significant indices
			
 
				-    index_diffs = significant_indices[1:] - significant_indices[:-1]
			
 
				-    gap_indices = np.where(index_diffs > min_gap)[0]
			
 
				-
			
 
				-    # Determine start and end indices of segments
			
 
				-    segment_starts = np.insert(
			
 
				-        significant_indices[gap_indices + 1],
			
 
				-        0,
			
 
				-        significant_indices[0],
			
 
				-    )
			
 
				-    segment_ends = np.append(
			
 
				-        significant_indices[gap_indices],
			
 
				-        significant_indices[-1] + 1,
			
 
				-    )
			
 
				-
			
 
				-    return segment_starts, segment_ends
			
 
				-
			
 
				-
			
 
				-def _recursive_yx_cut(
			
 
				-    boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
			
 
				-):
			
 
				-    """
			
 
				-    Recursively project and segment bounding boxes, starting with Y-axis and followed by X-axis.
			
 
				-
			
 
				-    Args:
			
 
				-        boxes: A (N, 4) array representing bounding boxes.
			
 
				-        indices: List of indices indicating the original position of boxes.
			
 
				-        res: List to store indices of the final segmented bounding boxes.
			
 
				-        min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
			
 
				-
			
 
				-    Returns:
			
 
				-        None: This function modifies the `res` list in place.
			
 
				-    """
			
 
				-    assert len(boxes) == len(
			
 
				-        indices
			
 
				-    ), "The length of boxes and indices must be the same."
			
 
				-
			
 
				-    # Sort by y_min for Y-axis projection
			
 
				-    y_sorted_indices = boxes[:, 1].argsort()
			
 
				-    y_sorted_boxes = boxes[y_sorted_indices]
			
 
				-    y_sorted_indices = np.array(indices)[y_sorted_indices]
			
 
				-
			
 
				-    # Perform Y-axis projection
			
 
				-    y_projection = _projection_by_bboxes(boxes=y_sorted_boxes, axis=1)
			
 
				-    y_intervals = _split_projection_profile(y_projection, 0, 1)
			
 
				-
			
 
				-    if not y_intervals:
			
 
				-        return
			
 
				-
			
 
				-    # Process each segment defined by Y-axis projection
			
 
				-    for y_start, y_end in zip(*y_intervals):
			
 
				-        # Select boxes within the current y interval
			
 
				-        y_interval_indices = (y_start <= y_sorted_boxes[:, 1]) & (
			
 
				-            y_sorted_boxes[:, 1] < y_end
			
 
				-        )
			
 
				-        y_boxes_chunk = y_sorted_boxes[y_interval_indices]
			
 
				-        y_indices_chunk = y_sorted_indices[y_interval_indices]
			
 
				-
			
 
				-        # Sort by x_min for X-axis projection
			
 
				-        x_sorted_indices = y_boxes_chunk[:, 0].argsort()
			
 
				-        x_sorted_boxes_chunk = y_boxes_chunk[x_sorted_indices]
			
 
				-        x_sorted_indices_chunk = y_indices_chunk[x_sorted_indices]
			
 
				-
			
 
				-        # Perform X-axis projection
			
 
				-        x_projection = _projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0)
			
 
				-        x_intervals = _split_projection_profile(x_projection, 0, min_gap)
			
 
				-
			
 
				-        if not x_intervals:
			
 
				-            continue
			
 
				-
			
 
				-        # If X-axis cannot be further segmented, add current indices to results
			
 
				-        if len(x_intervals[0]) == 1:
			
 
				-            res.extend(x_sorted_indices_chunk)
			
 
				-            continue
			
 
				-
			
 
				-        # Recursively process each segment defined by X-axis projection
			
 
				-        for x_start, x_end in zip(*x_intervals):
			
 
				-            x_interval_indices = (x_start <= x_sorted_boxes_chunk[:, 0]) & (
			
 
				-                x_sorted_boxes_chunk[:, 0] < x_end
			
 
				-            )
			
 
				-            _recursive_yx_cut(
			
 
				-                x_sorted_boxes_chunk[x_interval_indices],
			
 
				-                x_sorted_indices_chunk[x_interval_indices],
			
 
				-                res,
			
 
				-            )
			
 
				-
			
 
				-
			
 
				-def _recursive_xy_cut(
			
 
				-    boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
			
 
				-):
			
 
				-    """
			
 
				-    Recursively performs X-axis projection followed by Y-axis projection to segment bounding boxes.
			
 
				-
			
 
				-    Args:
			
 
				-        boxes: A (N, 4) array representing bounding boxes with [x_min, y_min, x_max, y_max].
			
 
				-        indices: A list of indices representing the position of boxes in the original data.
			
 
				-        res: A list to store indices of bounding boxes that meet the criteria.
			
 
				-        min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
			
 
				+    # Reduce any remaining consecutive spaces to a single space
			
 
				+    final_text = re.sub(r"\s+", " ", text_with_single_spaces).strip()
			
 
				 
			
 
				-    Returns:
			
 
				-        None: This function modifies the `res` list in place.
			
 
				-    """
			
 
				-    # Ensure boxes and indices have the same length
			
 
				-    assert len(boxes) == len(
			
 
				-        indices
			
 
				-    ), "The length of boxes and indices must be the same."
			
 
				-
			
 
				-    # Sort by x_min to prepare for X-axis projection
			
 
				-    x_sorted_indices = boxes[:, 0].argsort()
			
 
				-    x_sorted_boxes = boxes[x_sorted_indices]
			
 
				-    x_sorted_indices = np.array(indices)[x_sorted_indices]
			
 
				-
			
 
				-    # Perform X-axis projection
			
 
				-    x_projection = _projection_by_bboxes(boxes=x_sorted_boxes, axis=0)
			
 
				-    x_intervals = _split_projection_profile(x_projection, 0, 1)
			
 
				-
			
 
				-    if not x_intervals:
			
 
				-        return
			
 
				-
			
 
				-    # Process each segment defined by X-axis projection
			
 
				-    for x_start, x_end in zip(*x_intervals):
			
 
				-        # Select boxes within the current x interval
			
 
				-        x_interval_indices = (x_start <= x_sorted_boxes[:, 0]) & (
			
 
				-            x_sorted_boxes[:, 0] < x_end
			
 
				-        )
			
 
				-        x_boxes_chunk = x_sorted_boxes[x_interval_indices]
			
 
				-        x_indices_chunk = x_sorted_indices[x_interval_indices]
			
 
				-
			
 
				-        # Sort selected boxes by y_min to prepare for Y-axis projection
			
 
				-        y_sorted_indices = x_boxes_chunk[:, 1].argsort()
			
 
				-        y_sorted_boxes_chunk = x_boxes_chunk[y_sorted_indices]
			
 
				-        y_sorted_indices_chunk = x_indices_chunk[y_sorted_indices]
			
 
				-
			
 
				-        # Perform Y-axis projection
			
 
				-        y_projection = _projection_by_bboxes(boxes=y_sorted_boxes_chunk, axis=1)
			
 
				-        y_intervals = _split_projection_profile(y_projection, 0, min_gap)
			
 
				-
			
 
				-        if not y_intervals:
			
 
				-            continue
			
 
				-
			
 
				-        # If Y-axis cannot be further segmented, add current indices to results
			
 
				-        if len(y_intervals[0]) == 1:
			
 
				-            res.extend(y_sorted_indices_chunk)
			
 
				-            continue
			
 
				-
			
 
				-        # Recursively process each segment defined by Y-axis projection
			
 
				-        for y_start, y_end in zip(*y_intervals):
			
 
				-            y_interval_indices = (y_start <= y_sorted_boxes_chunk[:, 1]) & (
			
 
				-                y_sorted_boxes_chunk[:, 1] < y_end
			
 
				-            )
			
 
				-            _recursive_xy_cut(
			
 
				-                y_sorted_boxes_chunk[y_interval_indices],
			
 
				-                y_sorted_indices_chunk[y_interval_indices],
			
 
				-                res,
			
 
				-            )
			
 
				-
			
 
				-
			
 
				-def sort_by_xycut(
			
 
				-    block_bboxes: Union[np.ndarray, List[List[int]]],
			
 
				-    direction: int = 0,
			
 
				-    min_gap: int = 1,
			
 
				-) -> List[int]:
			
 
				-    """
			
 
				-    Sort bounding boxes using recursive XY cut method based on the specified direction.
			
 
				-
			
 
				-    Args:
			
 
				-        block_bboxes (Union[np.ndarray, List[List[int]]]): An array or list of bounding boxes,
			
 
				-                                                           where each box is represented as
			
 
				-                                                           [x_min, y_min, x_max, y_max].
			
 
				-        direction (int): Direction for the initial cut. Use 1 for Y-axis first and 0 for X-axis first.
			
 
				-                         Defaults to 0.
			
 
				-        min_gap (int): Minimum gap width to consider a separation between segments. Defaults to 1.
			
 
				-
			
 
				-    Returns:
			
 
				-        List[int]: A list of indices representing the order of sorted bounding boxes.
			
 
				-    """
			
 
				-    block_bboxes = np.asarray(block_bboxes).astype(int)
			
 
				-    res = []
			
 
				-    if direction == 1:
			
 
				-        _recursive_yx_cut(
			
 
				-            block_bboxes,
			
 
				-            np.arange(len(block_bboxes)).tolist(),
			
 
				-            res,
			
 
				-            min_gap,
			
 
				-        )
			
 
				-    else:
			
 
				-        _recursive_xy_cut(
			
 
				-            block_bboxes,
			
 
				-            np.arange(len(block_bboxes)).tolist(),
			
 
				-            res,
			
 
				-            min_gap,
			
 
				-        )
			
 
				-    return res
			
 
				+    return final_text
			
 
				 
			
 
				 
			
 
				 def gather_imgs(original_img, layout_det_objs):
			
@@ -1020,7 +464,7 @@ def _get_minbox_if_overlap_by_ratio(
 
				     return None
			
 
				 
			
 
				 
			
 
				-def _remove_overlap_blocks(
			
 
				+def remove_overlap_blocks(
			
 
				     blocks: List[Dict[str, List[int]]], threshold: float = 0.65, smaller: bool = True
			
 
				 ) -> Tuple[List[Dict[str, List[int]]], List[Dict[str, List[int]]]]:
			
 
				     """
			
@@ -1035,13 +479,12 @@ def _remove_overlap_blocks(
 
				         Tuple[List[Dict[str, List[int]]], List[Dict[str, List[int]]]]:
			
 
				             A tuple containing the updated list of blocks and a list of dropped blocks.
			
 
				     """
			
 
				-    dropped_blocks = []
			
 
				     dropped_indexes = set()
			
 
				-
			
 
				+    blocks = deepcopy(blocks)
			
 
				     # Iterate over each pair of blocks to find overlaps
			
 
				-    for i, block1 in enumerate(blocks):
			
 
				-        for j in range(i + 1, len(blocks)):
			
 
				-            block2 = blocks[j]
			
 
				+    for i, block1 in enumerate(blocks["boxes"]):
			
 
				+        for j in range(i + 1, len(blocks["boxes"])):
			
 
				+            block2 = blocks["boxes"][j]
			
 
				             # Skip blocks that are already marked for removal
			
 
				             if i in dropped_indexes or j in dropped_indexes:
			
 
				                 continue
			
@@ -1062,1291 +505,132 @@ def _remove_overlap_blocks(
 
				 
			
 
				     # Remove marked blocks from the original list
			
 
				     for index in sorted(dropped_indexes, reverse=True):
			
 
				-        dropped_blocks.append(blocks[index])
			
 
				-        del blocks[index]
			
 
				+        del blocks["boxes"][index]
			
 
				 
			
 
				-    return blocks, dropped_blocks
			
 
				+    return blocks
			
 
				 
			
 
				 
			
 
				-def _get_text_median_width(blocks: List[Dict[str, any]]) -> float:
			
 
				+def get_bbox_intersection(bbox1, bbox2, return_format="bbox"):
			
 
				     """
			
 
				-    Calculate the median width of blocks labeled as "text".
			
 
				+    Compute the intersection of two bounding boxes, supporting both 4-coordinate and 8-coordinate formats.
			
 
				 
			
 
				     Args:
			
 
				-        blocks (List[Dict[str, any]]): List of block dictionaries, each containing a 'block_bbox' and 'label'.
			
 
				+        bbox1 (tuple): The first bounding box, either in 4-coordinate format (x_min, y_min, x_max, y_max)
			
 
				+                       or 8-coordinate format (x1, y1, x2, y2, x3, y3, x4, y4).
			
 
				+        bbox2 (tuple): The second bounding box in the same format as bbox1.
			
 
				+        return_format (str): The format of the output intersection, either 'bbox' or 'poly'.
			
 
				 
			
 
				     Returns:
			
 
				-        float: The median width of text blocks, or infinity if no text blocks are found.
			
 
				-    """
			
 
				-    widths = [
			
 
				-        block["block_bbox"][2] - block["block_bbox"][0]
			
 
				-        for block in blocks
			
 
				-        if block.get("block_label") == "text"
			
 
				-    ]
			
 
				-    return np.median(widths) if widths else float("inf")
			
 
				-
			
 
				-
			
 
				-def _get_layout_property(
			
 
				-    blocks: List[Dict[str, any]],
			
 
				-    median_width: float,
			
 
				-    no_mask_labels: List[str],
			
 
				-    threshold: float = 0.8,
			
 
				-) -> Tuple[List[Dict[str, any]], bool]:
			
 
				-    """
			
 
				-    Determine the layout (single or double column) of text blocks.
			
 
				-
			
 
				-    Args:
			
 
				-        blocks (List[Dict[str, any]]): List of block dictionaries containing 'label' and 'block_bbox'.
			
 
				-        median_width (float): Median width of text blocks.
			
 
				-        no_mask_labels (List[str]): Labels of blocks to be considered for layout analysis.
			
 
				-        threshold (float): Threshold for determining layout overlap.
			
 
				-
			
 
				-    Returns:
			
 
				-        Tuple[List[Dict[str, any]], bool]: Updated list of blocks with layout information and a boolean
			
 
				-        indicating if the double layout area is greater than the single layout area.
			
 
				-    """
			
 
				-    blocks.sort(
			
 
				-        key=lambda x: (
			
 
				-            x["block_bbox"][0],
			
 
				-            (x["block_bbox"][2] - x["block_bbox"][0]),
			
 
				-        ),
			
 
				-    )
			
 
				-    check_single_layout = {}
			
 
				-    page_min_x, page_max_x = float("inf"), 0
			
 
				-    double_label_area = 0
			
 
				-    single_label_area = 0
			
 
				-
			
 
				-    for i, block in enumerate(blocks):
			
 
				-        page_min_x = min(page_min_x, block["block_bbox"][0])
			
 
				-        page_max_x = max(page_max_x, block["block_bbox"][2])
			
 
				-    page_width = page_max_x - page_min_x
			
 
				-
			
 
				-    for i, block in enumerate(blocks):
			
 
				-        if block["block_label"] not in no_mask_labels:
			
 
				-            continue
			
 
				-
			
 
				-        x_min_i, _, x_max_i, _ = block["block_bbox"]
			
 
				-        layout_length = x_max_i - x_min_i
			
 
				-        cover_count, cover_with_threshold_count = 0, 0
			
 
				-        match_block_with_threshold_indexes = []
			
 
				-
			
 
				-        for j, other_block in enumerate(blocks):
			
 
				-            if i == j or other_block["block_label"] not in no_mask_labels:
			
 
				-                continue
			
 
				-
			
 
				-            x_min_j, _, x_max_j, _ = other_block["block_bbox"]
			
 
				-            x_match_min, x_match_max = max(
			
 
				-                x_min_i,
			
 
				-                x_min_j,
			
 
				-            ), min(x_max_i, x_max_j)
			
 
				-            match_block_iou = (x_match_max - x_match_min) / (x_max_j - x_min_j)
			
 
				-
			
 
				-            if match_block_iou > 0:
			
 
				-                cover_count += 1
			
 
				-                if match_block_iou > threshold:
			
 
				-                    cover_with_threshold_count += 1
			
 
				-                    match_block_with_threshold_indexes.append(
			
 
				-                        (j, match_block_iou),
			
 
				-                    )
			
 
				-                x_min_i = x_match_max
			
 
				-                if x_min_i >= x_max_i:
			
 
				-                    break
			
 
				-
			
 
				-        if (
			
 
				-            layout_length > median_width * 1.3
			
 
				-            and (cover_with_threshold_count >= 2 or cover_count >= 2)
			
 
				-        ) or layout_length > 0.6 * page_width:
			
 
				-            # if layout_length > median_width * 1.3 and (cover_with_threshold_count >= 2):
			
 
				-            block["layout"] = "double"
			
 
				-            double_label_area += (block["block_bbox"][2] - block["block_bbox"][0]) * (
			
 
				-                block["block_bbox"][3] - block["block_bbox"][1]
			
 
				-            )
			
 
				-        else:
			
 
				-            block["layout"] = "single"
			
 
				-            check_single_layout[i] = match_block_with_threshold_indexes
			
 
				-
			
 
				-    # Check single-layout block
			
 
				-    for i, single_layout in check_single_layout.items():
			
 
				-        if single_layout:
			
 
				-            index, match_iou = single_layout[-1]
			
 
				-            if match_iou > 0.9 and blocks[index]["layout"] == "double":
			
 
				-                blocks[i]["layout"] = "double"
			
 
				-                double_label_area += (
			
 
				-                    blocks[i]["block_bbox"][2] - blocks[i]["block_bbox"][0]
			
 
				-                ) * (blocks[i]["block_bbox"][3] - blocks[i]["block_bbox"][1])
			
 
				-            else:
			
 
				-                single_label_area += (
			
 
				-                    blocks[i]["block_bbox"][2] - blocks[i]["block_bbox"][0]
			
 
				-                ) * (blocks[i]["block_bbox"][3] - blocks[i]["block_bbox"][1])
			
 
				-
			
 
				-    return blocks, (double_label_area > single_label_area)
			
 
				-
			
 
				-
			
 
				-def _get_bbox_direction(input_bbox: List[float], ratio: float = 1.0) -> bool:
			
 
				-    """
			
 
				-    Determine if a bounding box is horizontal or vertical.
			
 
				-
			
 
				-    Args:
			
 
				-        input_bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
			
 
				-        ratio (float): Ratio for determining orientation. Default is 1.0.
			
 
				-
			
 
				-    Returns:
			
 
				-        bool: True if the bounding box is considered horizontal, False if vertical.
			
 
				-    """
			
 
				-    width = input_bbox[2] - input_bbox[0]
			
 
				-    height = input_bbox[3] - input_bbox[1]
			
 
				-    return width * ratio >= height
			
 
				-
			
 
				-
			
 
				-def _get_projection_iou(
			
 
				-    input_bbox: List[float], match_bbox: List[float], is_horizontal: bool = True
			
 
				-) -> float:
			
 
				-    """
			
 
				-    Calculate the IoU of lines between two bounding boxes.
			
 
				-
			
 
				-    Args:
			
 
				-        input_bbox (List[float]): First bounding box [x_min, y_min, x_max, y_max].
			
 
				-        match_bbox (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
			
 
				-        is_horizontal (bool): Whether to compare horizontally or vertically.
			
 
				-
			
 
				-    Returns:
			
 
				-        float: Line IoU. Returns 0 if there is no overlap.
			
 
				-    """
			
 
				-    if is_horizontal:
			
 
				-        x_match_min = max(input_bbox[0], match_bbox[0])
			
 
				-        x_match_max = min(input_bbox[2], match_bbox[2])
			
 
				-        overlap = max(0, x_match_max - x_match_min)
			
 
				-        input_width = min(input_bbox[2] - input_bbox[0], match_bbox[2] - match_bbox[0])
			
 
				-    else:
			
 
				-        y_match_min = max(input_bbox[1], match_bbox[1])
			
 
				-        y_match_max = min(input_bbox[3], match_bbox[3])
			
 
				-        overlap = max(0, y_match_max - y_match_min)
			
 
				-        input_width = min(input_bbox[3] - input_bbox[1], match_bbox[3] - match_bbox[1])
			
 
				-
			
 
				-    return overlap / input_width if input_width > 0 else 0.0
			
 
				-
			
 
				-
			
 
				-def _get_sub_category(
			
 
				-    blocks: List[Dict[str, Any]], title_labels: List[str]
			
 
				-) -> Tuple[List[Dict[str, Any]], List[float]]:
			
 
				-    """
			
 
				-    Determine the layout of title and text blocks and collect pre_cuts.
			
 
				-
			
 
				-    Args:
			
 
				-        blocks (List[Dict[str, Any]]): List of block dictionaries.
			
 
				-        title_labels (List[str]): List of labels considered as titles.
			
 
				-
			
 
				-    Returns:
			
 
				-        List[Dict[str, Any]]: Updated list of blocks with title-text layout information.
			
 
				-        Dict[float]: Dict of pre_cuts coordinates.
			
 
				-    """
			
 
				-
			
 
				-    sub_title_labels = ["paragraph_title"]
			
 
				-    vision_labels = ["image", "table", "chart", "figure"]
			
 
				-    vision_title_labels = ["figure_title", "chart_title", "table_title"]
			
 
				-    all_labels = title_labels + sub_title_labels + vision_labels + vision_title_labels
			
 
				-    special_pre_cut_labels = sub_title_labels
			
 
				-
			
 
				-    # single doc title is irregular,pre cut not applicable
			
 
				-    num_doc_title = 0
			
 
				-    for block in blocks:
			
 
				-        if block["block_label"] == "doc_title":
			
 
				-            num_doc_title += 1
			
 
				-            if num_doc_title == 2:
			
 
				-                special_pre_cut_labels = title_labels + sub_title_labels
			
 
				-                break
			
 
				-    if len(blocks) == 0:
			
 
				-        return blocks, {}
			
 
				-
			
 
				-    min_x = min(block["block_bbox"][0] for block in blocks)
			
 
				-    min_y = min(block["block_bbox"][1] for block in blocks)
			
 
				-    max_x = max(block["block_bbox"][2] for block in blocks)
			
 
				-    max_y = max(block["block_bbox"][3] for block in blocks)
			
 
				-    region_bbox = (min_x, min_y, max_x, max_y)
			
 
				-    region_x_center = (region_bbox[0] + region_bbox[2]) / 2
			
 
				-    region_y_center = (region_bbox[1] + region_bbox[3]) / 2
			
 
				-    region_width = region_bbox[2] - region_bbox[0]
			
 
				-    region_height = region_bbox[3] - region_bbox[1]
			
 
				-
			
 
				-    pre_cuts = {}
			
 
				-
			
 
				-    for i, block1 in enumerate(blocks):
			
 
				-        block1.setdefault("title_text", [])
			
 
				-        block1.setdefault("sub_title", [])
			
 
				-        block1.setdefault("vision_footnote", [])
			
 
				-        block1.setdefault("sub_label", block1["block_label"])
			
 
				-
			
 
				-        if block1["block_label"] not in all_labels:
			
 
				-            continue
			
 
				-
			
 
				-        bbox1 = block1["block_bbox"]
			
 
				-        x1, y1, x2, y2 = bbox1
			
 
				-        is_horizontal_1 = _get_bbox_direction(block1["block_bbox"])
			
 
				-        left_up_title_text_distance = float("inf")
			
 
				-        left_up_title_text_index = -1
			
 
				-        left_up_title_text_direction = None
			
 
				-        right_down_title_text_distance = float("inf")
			
 
				-        right_down_title_text_index = -1
			
 
				-        right_down_title_text_direction = None
			
 
				-
			
 
				-        # pre-cuts
			
 
				-        # Condition 1: Length is greater than half of the layout region
			
 
				-        if is_horizontal_1:
			
 
				-            block_length = x2 - x1
			
 
				-            required_length = region_width / 2
			
 
				-        else:
			
 
				-            block_length = y2 - y1
			
 
				-            required_length = region_height / 2
			
 
				-        if block1["block_label"] in special_pre_cut_labels:
			
 
				-            length_condition = True
			
 
				-        else:
			
 
				-            length_condition = block_length > required_length
			
 
				-
			
 
				-        # Condition 2: Centered check (must be within ±20 in both horizontal and vertical directions)
			
 
				-        block_x_center = (x1 + x2) / 2
			
 
				-        block_y_center = (y1 + y2) / 2
			
 
				-        tolerance_len = block_length // 5
			
 
				-        if block1["block_label"] in special_pre_cut_labels:
			
 
				-            tolerance_len = block_length // 10
			
 
				-        if is_horizontal_1:
			
 
				-            is_centered = abs(block_x_center - region_x_center) <= tolerance_len
			
 
				-        else:
			
 
				-            is_centered = abs(block_y_center - region_y_center) <= tolerance_len
			
 
				-
			
 
				-        # Condition 3: Check for surrounding text
			
 
				-        has_left_text = False
			
 
				-        has_right_text = False
			
 
				-        has_above_text = False
			
 
				-        has_below_text = False
			
 
				-        for block2 in blocks:
			
 
				-            if block2["block_label"] != "text":
			
 
				-                continue
			
 
				-            bbox2 = block2["block_bbox"]
			
 
				-            x1_2, y1_2, x2_2, y2_2 = bbox2
			
 
				-            if is_horizontal_1:
			
 
				-                if x2_2 <= x1 and not (y2_2 <= y1 or y1_2 >= y2):
			
 
				-                    has_left_text = True
			
 
				-                if x1_2 >= x2 and not (y2_2 <= y1 or y1_2 >= y2):
			
 
				-                    has_right_text = True
			
 
				-            else:
			
 
				-                if y2_2 <= y1 and not (x2_2 <= x1 or x1_2 >= x2):
			
 
				-                    has_above_text = True
			
 
				-                if y1_2 >= y2 and not (x2_2 <= x1 or x1_2 >= x2):
			
 
				-                    has_below_text = True
			
 
				-
			
 
				-            if (is_horizontal_1 and has_left_text and has_right_text) or (
			
 
				-                not is_horizontal_1 and has_above_text and has_below_text
			
 
				-            ):
			
 
				-                break
			
 
				-
			
 
				-        no_text_on_sides = (
			
 
				-            not (has_left_text or has_right_text)
			
 
				-            if is_horizontal_1
			
 
				-            else not (has_above_text or has_below_text)
			
 
				-        )
			
 
				-
			
 
				-        # Add coordinates if all conditions are met
			
 
				-        if is_centered and length_condition and no_text_on_sides:
			
 
				-            if is_horizontal_1:
			
 
				-                pre_cuts.setdefault("y", []).append(y1)
			
 
				-            else:
			
 
				-                pre_cuts.setdefault("x", []).append(x1)
			
 
				-
			
 
				-        for j, block2 in enumerate(blocks):
			
 
				-            if i == j:
			
 
				-                continue
			
 
				-
			
 
				-            bbox2 = block2["block_bbox"]
			
 
				-            x1_prime, y1_prime, x2_prime, y2_prime = bbox2
			
 
				-            is_horizontal_2 = _get_bbox_direction(bbox2)
			
 
				-            match_block_iou = _get_projection_iou(
			
 
				-                bbox2,
			
 
				-                bbox1,
			
 
				-                is_horizontal_1,
			
 
				-            )
			
 
				-
			
 
				-            def distance_(is_horizontal, is_left_up):
			
 
				-                if is_horizontal:
			
 
				-                    if is_left_up:
			
 
				-                        return (y1 - y2_prime + 2) // 5 + x1_prime / 5000
			
 
				-                    else:
			
 
				-                        return (y1_prime - y2 + 2) // 5 + x1_prime / 5000
			
 
				-
			
 
				-                else:
			
 
				-                    if is_left_up:
			
 
				-                        return (x1 - x2_prime + 2) // 5 + y1_prime / 5000
			
 
				-                    else:
			
 
				-                        return (x1_prime - x2 + 2) // 5 + y1_prime / 5000
			
 
				-
			
 
				-            block_iou_threshold = 0.1
			
 
				-            if block1["block_label"] in sub_title_labels:
			
 
				-                block_iou_threshold = 0.5
			
 
				-
			
 
				-            if is_horizontal_1:
			
 
				-                if match_block_iou >= block_iou_threshold:
			
 
				-                    left_up_distance = distance_(True, True)
			
 
				-                    right_down_distance = distance_(True, False)
			
 
				-                    if (
			
 
				-                        y2_prime <= y1
			
 
				-                        and left_up_distance <= left_up_title_text_distance
			
 
				-                    ):
			
 
				-                        left_up_title_text_distance = left_up_distance
			
 
				-                        left_up_title_text_index = j
			
 
				-                        left_up_title_text_direction = is_horizontal_2
			
 
				-                    elif (
			
 
				-                        y1_prime > y2
			
 
				-                        and right_down_distance < right_down_title_text_distance
			
 
				-                    ):
			
 
				-                        right_down_title_text_distance = right_down_distance
			
 
				-                        right_down_title_text_index = j
			
 
				-                        right_down_title_text_direction = is_horizontal_2
			
 
				-            else:
			
 
				-                if match_block_iou >= block_iou_threshold:
			
 
				-                    left_up_distance = distance_(False, True)
			
 
				-                    right_down_distance = distance_(False, False)
			
 
				-                    if (
			
 
				-                        x2_prime <= x1
			
 
				-                        and left_up_distance <= left_up_title_text_distance
			
 
				-                    ):
			
 
				-                        left_up_title_text_distance = left_up_distance
			
 
				-                        left_up_title_text_index = j
			
 
				-                        left_up_title_text_direction = is_horizontal_2
			
 
				-                    elif (
			
 
				-                        x1_prime > x2
			
 
				-                        and right_down_distance < right_down_title_text_distance
			
 
				-                    ):
			
 
				-                        right_down_title_text_distance = right_down_distance
			
 
				-                        right_down_title_text_index = j
			
 
				-                        right_down_title_text_direction = is_horizontal_2
			
 
				-
			
 
				-        height = bbox1[3] - bbox1[1]
			
 
				-        width = bbox1[2] - bbox1[0]
			
 
				-        title_text_weight = [0.8, 0.8]
			
 
				-
			
 
				-        title_text, sub_title, vision_footnote = [], [], []
			
 
				-
			
 
				-        def get_sub_category_(
			
 
				-            title_text_direction,
			
 
				-            title_text_index,
			
 
				-            label,
			
 
				-            is_left_up=True,
			
 
				-        ):
			
 
				-            direction_ = [1, 3] if is_left_up else [2, 4]
			
 
				-            if (
			
 
				-                title_text_direction == is_horizontal_1
			
 
				-                and title_text_index != -1
			
 
				-                and (label == "text" or label == "paragraph_title")
			
 
				-            ):
			
 
				-                bbox2 = blocks[title_text_index]["block_bbox"]
			
 
				-                if is_horizontal_1:
			
 
				-                    height1 = bbox2[3] - bbox2[1]
			
 
				-                    width1 = bbox2[2] - bbox2[0]
			
 
				-                    if label == "text":
			
 
				-                        if (
			
 
				-                            _nearest_edge_distance(bbox1, bbox2)[0] <= 15
			
 
				-                            and block1["block_label"] in vision_labels
			
 
				-                            and width1 < width
			
 
				-                            and height1 < 0.5 * height
			
 
				-                        ):
			
 
				-                            blocks[title_text_index]["sub_label"] = "vision_footnote"
			
 
				-                            vision_footnote.append(bbox2)
			
 
				-                        elif (
			
 
				-                            height1 < height * title_text_weight[0]
			
 
				-                            and (width1 < width or width1 > 1.5 * width)
			
 
				-                            and block1["block_label"] in title_labels
			
 
				-                        ):
			
 
				-                            blocks[title_text_index]["sub_label"] = "title_text"
			
 
				-                            title_text.append((direction_[0], bbox2))
			
 
				-                    elif (
			
 
				-                        label == "paragraph_title"
			
 
				-                        and block1["block_label"] in sub_title_labels
			
 
				-                    ):
			
 
				-                        sub_title.append(bbox2)
			
 
				-                else:
			
 
				-                    height1 = bbox2[3] - bbox2[1]
			
 
				-                    width1 = bbox2[2] - bbox2[0]
			
 
				-                    if label == "text":
			
 
				-                        if (
			
 
				-                            _nearest_edge_distance(bbox1, bbox2)[0] <= 15
			
 
				-                            and block1["block_label"] in vision_labels
			
 
				-                            and height1 < height
			
 
				-                            and width1 < 0.5 * width
			
 
				-                        ):
			
 
				-                            blocks[title_text_index]["sub_label"] = "vision_footnote"
			
 
				-                            vision_footnote.append(bbox2)
			
 
				-                        elif (
			
 
				-                            width1 < width * title_text_weight[1]
			
 
				-                            and block1["block_label"] in title_labels
			
 
				-                        ):
			
 
				-                            blocks[title_text_index]["sub_label"] = "title_text"
			
 
				-                            title_text.append((direction_[1], bbox2))
			
 
				-                    elif (
			
 
				-                        label == "paragraph_title"
			
 
				-                        and block1["block_label"] in sub_title_labels
			
 
				-                    ):
			
 
				-                        sub_title.append(bbox2)
			
 
				-
			
 
				-        if (
			
 
				-            is_horizontal_1
			
 
				-            and abs(left_up_title_text_distance - right_down_title_text_distance) * 5
			
 
				-            > height
			
 
				-        ) or (
			
 
				-            not is_horizontal_1
			
 
				-            and abs(left_up_title_text_distance - right_down_title_text_distance) * 5
			
 
				-            > width
			
 
				-        ):
			
 
				-            if left_up_title_text_distance < right_down_title_text_distance:
			
 
				-                get_sub_category_(
			
 
				-                    left_up_title_text_direction,
			
 
				-                    left_up_title_text_index,
			
 
				-                    blocks[left_up_title_text_index]["block_label"],
			
 
				-                    True,
			
 
				-                )
			
 
				-            else:
			
 
				-                get_sub_category_(
			
 
				-                    right_down_title_text_direction,
			
 
				-                    right_down_title_text_index,
			
 
				-                    blocks[right_down_title_text_index]["block_label"],
			
 
				-                    False,
			
 
				-                )
			
 
				-        else:
			
 
				-            get_sub_category_(
			
 
				-                left_up_title_text_direction,
			
 
				-                left_up_title_text_index,
			
 
				-                blocks[left_up_title_text_index]["block_label"],
			
 
				-                True,
			
 
				-            )
			
 
				-            get_sub_category_(
			
 
				-                right_down_title_text_direction,
			
 
				-                right_down_title_text_index,
			
 
				-                blocks[right_down_title_text_index]["block_label"],
			
 
				-                False,
			
 
				-            )
			
 
				-
			
 
				-        if block1["block_label"] in title_labels:
			
 
				-            if blocks[i].get("title_text") == []:
			
 
				-                blocks[i]["title_text"] = title_text
			
 
				-
			
 
				-        if block1["block_label"] in sub_title_labels:
			
 
				-            if blocks[i].get("sub_title") == []:
			
 
				-                blocks[i]["sub_title"] = sub_title
			
 
				-
			
 
				-        if block1["block_label"] in vision_labels:
			
 
				-            if blocks[i].get("vision_footnote") == []:
			
 
				-                blocks[i]["vision_footnote"] = vision_footnote
			
 
				-
			
 
				-    return blocks, pre_cuts
			
 
				-
			
 
				-
			
 
				-def get_layout_ordering(
			
 
				-    parsing_res_list: List[Dict[str, Any]],
			
 
				-    no_mask_labels: List[str] = [],
			
 
				-) -> None:
			
 
				-    """
			
 
				-    Process layout parsing results to remove overlapping bounding boxes
			
 
				-    and assign an ordering index based on their positions.
			
 
				-
			
 
				-    Modifies:
			
 
				-        The 'parsing_res_list' list by adding an 'index' to each block.
			
 
				-
			
 
				-    Args:
			
 
				-        parsing_res_list (List[Dict[str, Any]]): List of block dictionaries with 'block_bbox' and 'block_label'.
			
 
				-        no_mask_labels (List[str]): Labels for which overlapping removal is not performed.
			
 
				-    """
			
 
				-    title_text_labels = ["doc_title"]
			
 
				-    title_labels = ["doc_title", "paragraph_title"]
			
 
				-    vision_labels = ["image", "table", "seal", "chart", "figure"]
			
 
				-    vision_title_labels = ["table_title", "chart_title", "figure_title"]
			
 
				-
			
 
				-    parsing_res_list, pre_cuts = _get_sub_category(parsing_res_list, title_text_labels)
			
 
				-
			
 
				-    parsing_res_by_pre_cuts_list = []
			
 
				-    if len(pre_cuts) > 0:
			
 
				-        block_bboxes = [block["block_bbox"] for block in parsing_res_list]
			
 
				-        for axis, cuts in pre_cuts.items():
			
 
				-            axis_index = 1 if axis == "y" else 0
			
 
				-
			
 
				-            max_val = max(bbox[axis_index + 2] for bbox in block_bboxes)
			
 
				-
			
 
				-            intervals = []
			
 
				-            prev = 0
			
 
				-            for cut in sorted(cuts):
			
 
				-                intervals.append((prev, cut))
			
 
				-                prev = cut
			
 
				-            intervals.append((prev, max_val))
			
 
				-
			
 
				-            for start, end in intervals:
			
 
				-                mask = [
			
 
				-                    (bbox[axis_index] >= start) and (bbox[axis_index] < end)
			
 
				-                    for bbox in block_bboxes
			
 
				-                ]
			
 
				-                parsing_res_by_pre_cuts_list.append(
			
 
				-                    [parsing_res_list[i] for i, m in enumerate(mask) if m]
			
 
				-                )
			
 
				-    else:
			
 
				-        parsing_res_by_pre_cuts_list = [parsing_res_list]
			
 
				-
			
 
				-    final_parsing_res_list = []
			
 
				-    num_index = 0
			
 
				-    num_sub_index = 0
			
 
				-    for parsing_res_by_pre_cuts in parsing_res_by_pre_cuts_list:
			
 
				-
			
 
				-        doc_flag = False
			
 
				-        median_width = _get_text_median_width(parsing_res_by_pre_cuts)
			
 
				-        parsing_res_by_pre_cuts, projection_direction = _get_layout_property(
			
 
				-            parsing_res_by_pre_cuts,
			
 
				-            median_width,
			
 
				-            no_mask_labels=no_mask_labels,
			
 
				-            threshold=0.3,
			
 
				-        )
			
 
				-        # Convert bounding boxes to float and remove overlaps
			
 
				-        (
			
 
				-            double_text_blocks,
			
 
				-            title_text_blocks,
			
 
				-            title_blocks,
			
 
				-            vision_blocks,
			
 
				-            vision_title_blocks,
			
 
				-            vision_footnote_blocks,
			
 
				-            other_blocks,
			
 
				-        ) = ([], [], [], [], [], [], [])
			
 
				-
			
 
				-        drop_indexes = []
			
 
				-
			
 
				-        for index, block in enumerate(parsing_res_by_pre_cuts):
			
 
				-            label = block["sub_label"]
			
 
				-            block["block_bbox"] = list(map(int, block["block_bbox"]))
			
 
				-
			
 
				-            if label == "doc_title":
			
 
				-                doc_flag = True
			
 
				-
			
 
				-            if label in no_mask_labels:
			
 
				-                if block["layout"] == "double":
			
 
				-                    double_text_blocks.append(block)
			
 
				-                    drop_indexes.append(index)
			
 
				-            elif label == "title_text":
			
 
				-                title_text_blocks.append(block)
			
 
				-                drop_indexes.append(index)
			
 
				-            elif label == "vision_footnote":
			
 
				-                vision_footnote_blocks.append(block)
			
 
				-                drop_indexes.append(index)
			
 
				-            elif label in vision_title_labels:
			
 
				-                vision_title_blocks.append(block)
			
 
				-                drop_indexes.append(index)
			
 
				-            elif label in title_labels:
			
 
				-                title_blocks.append(block)
			
 
				-                drop_indexes.append(index)
			
 
				-            elif label in vision_labels:
			
 
				-                vision_blocks.append(block)
			
 
				-                drop_indexes.append(index)
			
 
				-            else:
			
 
				-                other_blocks.append(block)
			
 
				-                drop_indexes.append(index)
			
 
				-
			
 
				-        for index in sorted(drop_indexes, reverse=True):
			
 
				-            del parsing_res_by_pre_cuts[index]
			
 
				-
			
 
				-        if len(parsing_res_by_pre_cuts) > 0:
			
 
				-            # single text label
			
 
				-            if (
			
 
				-                len(double_text_blocks) > len(parsing_res_by_pre_cuts)
			
 
				-                or projection_direction
			
 
				-            ):
			
 
				-                parsing_res_by_pre_cuts.extend(title_blocks + double_text_blocks)
			
 
				-                title_blocks = []
			
 
				-                double_text_blocks = []
			
 
				-                block_bboxes = [
			
 
				-                    block["block_bbox"] for block in parsing_res_by_pre_cuts
			
 
				-                ]
			
 
				-                block_bboxes.sort(
			
 
				-                    key=lambda x: (
			
 
				-                        x[0] // max(20, median_width),
			
 
				-                        x[1],
			
 
				-                    ),
			
 
				-                )
			
 
				-                block_bboxes = np.array(block_bboxes)
			
 
				-                sorted_indices = sort_by_xycut(block_bboxes, direction=1, min_gap=1)
			
 
				-            else:
			
 
				-                block_bboxes = [
			
 
				-                    block["block_bbox"] for block in parsing_res_by_pre_cuts
			
 
				-                ]
			
 
				-                block_bboxes.sort(key=lambda x: (x[0] // 20, x[1]))
			
 
				-                block_bboxes = np.array(block_bboxes)
			
 
				-                sorted_indices = sort_by_xycut(block_bboxes, direction=0, min_gap=20)
			
 
				-
			
 
				-            sorted_boxes = block_bboxes[sorted_indices].tolist()
			
 
				-
			
 
				-            for block in parsing_res_by_pre_cuts:
			
 
				-                block["index"] = num_index + sorted_boxes.index(block["block_bbox"]) + 1
			
 
				-                block["sub_index"] = (
			
 
				-                    num_sub_index + sorted_boxes.index(block["block_bbox"]) + 1
			
 
				-                )
			
 
				-
			
 
				-        def nearest_match_(input_blocks, distance_type="manhattan", is_add_index=True):
			
 
				-            for block in input_blocks:
			
 
				-                bbox = block["block_bbox"]
			
 
				-                min_distance = float("inf")
			
 
				-                min_distance_config = [
			
 
				-                    [float("inf"), float("inf")],
			
 
				-                    float("inf"),
			
 
				-                    float("inf"),
			
 
				-                ]  # for double text
			
 
				-                nearest_gt_index = 0
			
 
				-                for match_block in parsing_res_by_pre_cuts:
			
 
				-                    match_bbox = match_block["block_bbox"]
			
 
				-                    if distance_type == "nearest_iou_edge_distance":
			
 
				-                        distance, min_distance_config = _nearest_iou_edge_distance(
			
 
				-                            bbox,
			
 
				-                            match_bbox,
			
 
				-                            block["sub_label"],
			
 
				-                            vision_labels=vision_labels,
			
 
				-                            no_mask_labels=no_mask_labels,
			
 
				-                            median_width=median_width,
			
 
				-                            title_labels=title_labels,
			
 
				-                            title_text=block["title_text"],
			
 
				-                            sub_title=block["sub_title"],
			
 
				-                            min_distance_config=min_distance_config,
			
 
				-                            tolerance_len=10,
			
 
				-                        )
			
 
				-                    elif distance_type == "title_text":
			
 
				-                        if (
			
 
				-                            match_block["block_label"] in title_labels + ["abstract"]
			
 
				-                            and match_block["title_text"] != []
			
 
				-                        ):
			
 
				-                            iou_left_up = _calculate_overlap_area_div_minbox_area_ratio(
			
 
				-                                bbox,
			
 
				-                                match_block["title_text"][0][1],
			
 
				-                            )
			
 
				-                            iou_right_down = (
			
 
				-                                _calculate_overlap_area_div_minbox_area_ratio(
			
 
				-                                    bbox,
			
 
				-                                    match_block["title_text"][-1][1],
			
 
				-                                )
			
 
				-                            )
			
 
				-                            iou = 1 - max(iou_left_up, iou_right_down)
			
 
				-                            distance = _manhattan_distance(bbox, match_bbox) * iou
			
 
				-                        else:
			
 
				-                            distance = float("inf")
			
 
				-                    elif distance_type == "manhattan":
			
 
				-                        distance = _manhattan_distance(bbox, match_bbox)
			
 
				-                    elif distance_type == "vision_footnote":
			
 
				-                        if (
			
 
				-                            match_block["block_label"] in vision_labels
			
 
				-                            and match_block["vision_footnote"] != []
			
 
				-                        ):
			
 
				-                            iou_left_up = _calculate_overlap_area_div_minbox_area_ratio(
			
 
				-                                bbox,
			
 
				-                                match_block["vision_footnote"][0],
			
 
				-                            )
			
 
				-                            iou_right_down = (
			
 
				-                                _calculate_overlap_area_div_minbox_area_ratio(
			
 
				-                                    bbox,
			
 
				-                                    match_block["vision_footnote"][-1],
			
 
				-                                )
			
 
				-                            )
			
 
				-                            iou = 1 - max(iou_left_up, iou_right_down)
			
 
				-                            distance = _manhattan_distance(bbox, match_bbox) * iou
			
 
				-                        else:
			
 
				-                            distance = float("inf")
			
 
				-                    elif distance_type == "vision_body":
			
 
				-                        if (
			
 
				-                            match_block["block_label"] in vision_title_labels
			
 
				-                            and block["vision_footnote"] != []
			
 
				-                        ):
			
 
				-                            iou_left_up = _calculate_overlap_area_div_minbox_area_ratio(
			
 
				-                                match_bbox,
			
 
				-                                block["vision_footnote"][0],
			
 
				-                            )
			
 
				-                            iou_right_down = (
			
 
				-                                _calculate_overlap_area_div_minbox_area_ratio(
			
 
				-                                    match_bbox,
			
 
				-                                    block["vision_footnote"][-1],
			
 
				-                                )
			
 
				-                            )
			
 
				-                            iou = 1 - max(iou_left_up, iou_right_down)
			
 
				-                            distance = _manhattan_distance(bbox, match_bbox) * iou
			
 
				-                        else:
			
 
				-                            distance = float("inf")
			
 
				-                    # when reference block cross mulitple columns, its order should be after the blocks above it.
			
 
				-                    elif distance_type == "append":
			
 
				-                        if match_bbox[3] <= bbox[1]:
			
 
				-                            distance = -(match_bbox[2] * 10 + match_bbox[3])
			
 
				-                        else:
			
 
				-                            distance = float("inf")
			
 
				-                    else:
			
 
				-                        raise NotImplementedError
			
 
				-
			
 
				-                    if distance < min_distance:
			
 
				-                        min_distance = distance
			
 
				-                        if is_add_index:
			
 
				-                            nearest_gt_index = match_block.get("index", 999)
			
 
				-                        else:
			
 
				-                            nearest_gt_index = match_block.get("sub_index", 999)
			
 
				-
			
 
				-                if is_add_index:
			
 
				-                    block["index"] = nearest_gt_index
			
 
				-                else:
			
 
				-                    block["sub_index"] = nearest_gt_index
			
 
				-
			
 
				-                parsing_res_by_pre_cuts.append(block)
			
 
				-
			
 
				-        # double text label
			
 
				-        double_text_blocks.sort(
			
 
				-            key=lambda x: (
			
 
				-                x["block_bbox"][1] // 10,
			
 
				-                x["block_bbox"][0] // median_width,
			
 
				-                x["block_bbox"][1] ** 2 + x["block_bbox"][0] ** 2,
			
 
				-            ),
			
 
				-        )
			
 
				-        # filter the reference blocks from all blocks that cross mulitple columns.
			
 
				-        # they should be ordered using "append".
			
 
				-        double_text_reference_blocks = []
			
 
				-        i = 0
			
 
				-        while i < len(double_text_blocks):
			
 
				-            if double_text_blocks[i]["block_label"] == "reference":
			
 
				-                double_text_reference_blocks.append(double_text_blocks.pop(i))
			
 
				-            else:
			
 
				-                i += 1
			
 
				-        nearest_match_(
			
 
				-            double_text_blocks,
			
 
				-            distance_type="nearest_iou_edge_distance",
			
 
				-        )
			
 
				-        nearest_match_(
			
 
				-            double_text_reference_blocks,
			
 
				-            distance_type="append",
			
 
				-        )
			
 
				-        parsing_res_by_pre_cuts.sort(
			
 
				-            key=lambda x: (x["index"], x["block_bbox"][1], x["block_bbox"][0]),
			
 
				-        )
			
 
				-
			
 
				-        for idx, block in enumerate(parsing_res_by_pre_cuts):
			
 
				-            block["index"] = num_index + idx + 1
			
 
				-            block["sub_index"] = num_sub_index + idx + 1
			
 
				-
			
 
				-        # title label
			
 
				-        title_blocks.sort(
			
 
				-            key=lambda x: (
			
 
				-                x["block_bbox"][1] // 10,
			
 
				-                x["block_bbox"][0] // median_width,
			
 
				-                x["block_bbox"][1] ** 2 + x["block_bbox"][0] ** 2,
			
 
				-            ),
			
 
				-        )
			
 
				-        nearest_match_(title_blocks, distance_type="nearest_iou_edge_distance")
			
 
				-
			
 
				-        if doc_flag:
			
 
				-            text_sort_labels = ["doc_title"]
			
 
				-            text_label_priority = {
			
 
				-                label: priority for priority, label in enumerate(text_sort_labels)
			
 
				-            }
			
 
				-            doc_titles = []
			
 
				-            for i, block in enumerate(parsing_res_by_pre_cuts):
			
 
				-                if block["block_label"] == "doc_title":
			
 
				-                    doc_titles.append(
			
 
				-                        (i, block["block_bbox"][1], block["block_bbox"][0]),
			
 
				-                    )
			
 
				-            doc_titles.sort(key=lambda x: (x[1], x[2]))
			
 
				-            first_doc_title_index = doc_titles[0][0]
			
 
				-            parsing_res_by_pre_cuts[first_doc_title_index]["index"] = 1
			
 
				-            parsing_res_by_pre_cuts.sort(
			
 
				-                key=lambda x: (
			
 
				-                    x["index"],
			
 
				-                    text_label_priority.get(x["block_label"], 9999),
			
 
				-                    x["block_bbox"][1],
			
 
				-                    x["block_bbox"][0],
			
 
				-                ),
			
 
				-            )
			
 
				-        else:
			
 
				-            parsing_res_by_pre_cuts.sort(
			
 
				-                key=lambda x: (
			
 
				-                    x["index"],
			
 
				-                    x["block_bbox"][1],
			
 
				-                    x["block_bbox"][0],
			
 
				-                ),
			
 
				-            )
			
 
				-
			
 
				-        for idx, block in enumerate(parsing_res_by_pre_cuts):
			
 
				-            block["index"] = num_index + idx + 1
			
 
				-            block["sub_index"] = num_sub_index + idx + 1
			
 
				-
			
 
				-        # title-text label
			
 
				-        nearest_match_(title_text_blocks, distance_type="title_text")
			
 
				-
			
 
				-        def hor_tb_and_ver_lr(x):
			
 
				-            input_bbox = x["block_bbox"]
			
 
				-            is_horizontal = _get_bbox_direction(input_bbox)
			
 
				-            if is_horizontal:
			
 
				-                return input_bbox[1]
			
 
				-            else:
			
 
				-                return input_bbox[0]
			
 
				-
			
 
				-        parsing_res_by_pre_cuts.sort(
			
 
				-            key=lambda x: (x["index"], hor_tb_and_ver_lr(x)),
			
 
				-        )
			
 
				-
			
 
				-        for idx, block in enumerate(parsing_res_by_pre_cuts):
			
 
				-            block["index"] = num_index + idx + 1
			
 
				-            block["sub_index"] = num_sub_index + idx + 1
			
 
				-
			
 
				-        # image,figure,chart,seal label
			
 
				-        nearest_match_(
			
 
				-            vision_blocks,
			
 
				-            distance_type="nearest_iou_edge_distance",
			
 
				-            is_add_index=False,
			
 
				-        )
			
 
				-        parsing_res_by_pre_cuts.sort(
			
 
				-            key=lambda x: (
			
 
				-                x["sub_index"],
			
 
				-                x["block_bbox"][1],
			
 
				-                x["block_bbox"][0],
			
 
				-            ),
			
 
				-        )
			
 
				-
			
 
				-        for idx, block in enumerate(parsing_res_by_pre_cuts):
			
 
				-            block["sub_index"] = num_sub_index + idx + 1
			
 
				-
			
 
				-        # image,figure,chart,seal title label
			
 
				-        nearest_match_(
			
 
				-            vision_title_blocks,
			
 
				-            distance_type="nearest_iou_edge_distance",
			
 
				-            is_add_index=False,
			
 
				-        )
			
 
				-        parsing_res_by_pre_cuts.sort(
			
 
				-            key=lambda x: (
			
 
				-                x["sub_index"],
			
 
				-                x["block_bbox"][1],
			
 
				-                x["block_bbox"][0],
			
 
				-            ),
			
 
				+        tuple or None: The intersection bounding box in the specified format, or None if there is no intersection.
			
 
				+    """
			
 
				+    bbox1 = np.array(bbox1)
			
 
				+    bbox2 = np.array(bbox2)
			
 
				+    # Convert both bounding boxes to rectangles
			
 
				+    rect1 = bbox1 if len(bbox1.shape) == 1 else convert_points_to_boxes([bbox1])[0]
			
 
				+    rect2 = bbox2 if len(bbox2.shape) == 1 else convert_points_to_boxes([bbox2])[0]
			
 
				+
			
 
				+    # Calculate the intersection rectangle
			
 
				+
			
 
				+    x_min_inter = max(rect1[0], rect2[0])
			
 
				+    y_min_inter = max(rect1[1], rect2[1])
			
 
				+    x_max_inter = min(rect1[2], rect2[2])
			
 
				+    y_max_inter = min(rect1[3], rect2[3])
			
 
				+
			
 
				+    # Check if there is an intersection
			
 
				+    if x_min_inter >= x_max_inter or y_min_inter >= y_max_inter:
			
 
				+        return None
			
 
				+
			
 
				+    if return_format == "bbox":
			
 
				+        return np.array([x_min_inter, y_min_inter, x_max_inter, y_max_inter])
			
 
				+    elif return_format == "poly":
			
 
				+        return np.array(
			
 
				+            [
			
 
				+                [x_min_inter, y_min_inter],
			
 
				+                [x_max_inter, y_min_inter],
			
 
				+                [x_max_inter, y_max_inter],
			
 
				+                [x_min_inter, y_max_inter],
			
 
				+            ],
			
 
				+            dtype=np.int16,
			
 
				         )
			
 
				-
			
 
				-        for idx, block in enumerate(parsing_res_by_pre_cuts):
			
 
				-            block["sub_index"] = num_sub_index + idx + 1
			
 
				-
			
 
				-        # vision footnote label
			
 
				-        nearest_match_(
			
 
				-            vision_footnote_blocks,
			
 
				-            distance_type="vision_footnote",
			
 
				-            is_add_index=False,
			
 
				-        )
			
 
				-        text_label_priority = {"vision_footnote": 9999}
			
 
				-        parsing_res_by_pre_cuts.sort(
			
 
				-            key=lambda x: (
			
 
				-                x["sub_index"],
			
 
				-                text_label_priority.get(x["sub_label"], 0),
			
 
				-                x["block_bbox"][1],
			
 
				-                x["block_bbox"][0],
			
 
				-            ),
			
 
				-        )
			
 
				-
			
 
				-        for idx, block in enumerate(parsing_res_by_pre_cuts):
			
 
				-            block["sub_index"] = num_sub_index + idx + 1
			
 
				-
			
 
				-        # header、footnote、header_image... label
			
 
				-        nearest_match_(other_blocks, distance_type="manhattan", is_add_index=False)
			
 
				-
			
 
				-        # add all parsing result
			
 
				-        final_parsing_res_list.extend(parsing_res_by_pre_cuts)
			
 
				-
			
 
				-        # update num index
			
 
				-        num_sub_index += len(parsing_res_by_pre_cuts)
			
 
				-        for parsing_res in parsing_res_by_pre_cuts:
			
 
				-            if parsing_res.get("index"):
			
 
				-                num_index += 1
			
 
				-
			
 
				-    parsing_res_list = [
			
 
				-        {
			
 
				-            "block_label": parsing_res["block_label"],
			
 
				-            "block_content": parsing_res["block_content"],
			
 
				-            "block_bbox": parsing_res["block_bbox"],
			
 
				-            "block_image": parsing_res.get("block_image", None),
			
 
				-            "sub_label": parsing_res["sub_label"],
			
 
				-            "sub_index": parsing_res["sub_index"],
			
 
				-            "index": parsing_res.get("index", None),
			
 
				-            "seg_start_coordinate": parsing_res.get(
			
 
				-                "seg_start_coordinate", float("inf")
			
 
				-            ),
			
 
				-            "seg_end_coordinate": parsing_res.get("seg_end_coordinate", float("-inf")),
			
 
				-            "num_of_lines": parsing_res.get("num_of_lines", 1),
			
 
				-        }
			
 
				-        for parsing_res in final_parsing_res_list
			
 
				-    ]
			
 
				-
			
 
				-    return parsing_res_list
			
 
				-
			
 
				-
			
 
				-def _manhattan_distance(
			
 
				-    point1: Tuple[float, float],
			
 
				-    point2: Tuple[float, float],
			
 
				-    weight_x: float = 1.0,
			
 
				-    weight_y: float = 1.0,
			
 
				-) -> float:
			
 
				-    """
			
 
				-    Calculate the weighted Manhattan distance between two points.
			
 
				-
			
 
				-    Args:
			
 
				-        point1 (Tuple[float, float]): The first point as (x, y).
			
 
				-        point2 (Tuple[float, float]): The second point as (x, y).
			
 
				-        weight_x (float): The weight for the x-axis distance. Default is 1.0.
			
 
				-        weight_y (float): The weight for the y-axis distance. Default is 1.0.
			
 
				-
			
 
				-    Returns:
			
 
				-        float: The weighted Manhattan distance between the two points.
			
 
				-    """
			
 
				-    return weight_x * abs(point1[0] - point2[0]) + weight_y * abs(point1[1] - point2[1])
			
 
				-
			
 
				-
			
 
				-def _calculate_horizontal_distance(
			
 
				-    input_bbox: List[int],
			
 
				-    match_bbox: List[int],
			
 
				-    height: int,
			
 
				-    disperse: int,
			
 
				-    title_text: List[Tuple[int, List[int]]],
			
 
				-) -> float:
			
 
				-    """
			
 
				-    Calculate the horizontal distance between two bounding boxes, considering title text adjustments.
			
 
				-
			
 
				-    Args:
			
 
				-        input_bbox (List[int]): The bounding box coordinates [x1, y1, x2, y2] of the input object.
			
 
				-        match_bbox (List[int]): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
			
 
				-        height (int): The height of the input bounding box used for normalization.
			
 
				-        disperse (int): The dispersion factor used to normalize the horizontal distance.
			
 
				-        title_text (List[Tuple[int, List[int]]]): A list of tuples containing title text information and their bounding box coordinates.
			
 
				-                                                  Format: [(position_indicator, [x1, y1, x2, y2]), ...].
			
 
				-
			
 
				-    Returns:
			
 
				-        float: The calculated horizontal distance taking into account the title text adjustments.
			
 
				-    """
			
 
				-    x1, y1, x2, y2 = input_bbox
			
 
				-    x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
			
 
				-
			
 
				-    # Determine vertical distance adjustment based on title text
			
 
				-    if y2 < y1_prime:
			
 
				-        if title_text and title_text[-1][0] == 2:
			
 
				-            y2 += title_text[-1][1][3] - title_text[-1][1][1]
			
 
				-        vertical_adjustment = (y1_prime - y2) * 0.5
			
 
				-    else:
			
 
				-        if title_text and title_text[0][0] == 1:
			
 
				-            y1 -= title_text[0][1][3] - title_text[0][1][1]
			
 
				-        vertical_adjustment = y1 - y2_prime
			
 
				-
			
 
				-    # Calculate horizontal distance with adjustments
			
 
				-    horizontal_distance = (
			
 
				-        abs(x2_prime - x1) // disperse
			
 
				-        + vertical_adjustment // height
			
 
				-        + vertical_adjustment / 5000
			
 
				-    )
			
 
				-
			
 
				-    return horizontal_distance
			
 
				-
			
 
				-
			
 
				-def _calculate_vertical_distance(
			
 
				-    input_bbox: List[int],
			
 
				-    match_bbox: List[int],
			
 
				-    width: int,
			
 
				-    disperse: int,
			
 
				-    title_text: List[Tuple[int, List[int]]],
			
 
				-) -> float:
			
 
				-    """
			
 
				-    Calculate the vertical distance between two bounding boxes, considering title text adjustments.
			
 
				-
			
 
				-    Args:
			
 
				-        input_bbox (List[int]): The bounding box coordinates [x1, y1, x2, y2] of the input object.
			
 
				-        match_bbox (List[int]): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
			
 
				-        width (int): The width of the input bounding box used for normalization.
			
 
				-        disperse (int): The dispersion factor used to normalize the vertical distance.
			
 
				-        title_text (List[Tuple[int, List[int]]]): A list of tuples containing title text information and their bounding box coordinates.
			
 
				-                                                  Format: [(position_indicator, [x1, y1, x2, y2]), ...].
			
 
				-
			
 
				-    Returns:
			
 
				-        float: The calculated vertical distance taking into account the title text adjustments.
			
 
				-    """
			
 
				-    x1, y1, x2, y2 = input_bbox
			
 
				-    x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
			
 
				-
			
 
				-    # Determine horizontal distance adjustment based on title text
			
 
				-    if x1 > x2_prime:
			
 
				-        if title_text and title_text[0][0] == 3:
			
 
				-            x1 -= title_text[0][1][2] - title_text[0][1][0]
			
 
				-        horizontal_adjustment = (x1 - x2_prime) * 0.5
			
 
				     else:
			
 
				-        if title_text and title_text[-1][0] == 4:
			
 
				-            x2 += title_text[-1][1][2] - title_text[-1][1][0]
			
 
				-        horizontal_adjustment = x1_prime - x2
			
 
				-
			
 
				-    # Calculate vertical distance with adjustments
			
 
				-    vertical_distance = (
			
 
				-        abs(y2_prime - y1) // disperse
			
 
				-        + horizontal_adjustment // width
			
 
				-        + horizontal_adjustment / 5000
			
 
				-    )
			
 
				-
			
 
				-    return vertical_distance
			
 
				+        raise ValueError("return_format must be either 'bbox' or 'poly'.")
			
 
				 
			
 
				 
			
 
				-def _nearest_edge_distance(
			
 
				-    input_bbox: List[int],
			
 
				-    match_bbox: List[int],
			
 
				-    weight: List[float] = [1.0, 1.0, 1.0, 1.0],
			
 
				-    label: str = "text",
			
 
				-    no_mask_labels: List[str] = [],
			
 
				-    min_edge_distance_config: List[float] = [],
			
 
				-    tolerance_len: float = 10.0,
			
 
				-) -> Tuple[float, List[float]]:
			
 
				-    """
			
 
				-    Calculate the nearest edge distance between two bounding boxes, considering directional weights.
			
 
				-
			
 
				-    Args:
			
 
				-        input_bbox (list): The bounding box coordinates [x1, y1, x2, y2] of the input object.
			
 
				-        match_bbox (list): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
			
 
				-        weight (list, optional): Directional weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
			
 
				-        label (str, optional): The label/type of the object in the bounding box (e.g., 'text'). Defaults to 'text'.
			
 
				-        no_mask_labels (list, optional): Labels for which no masking is applied when calculating edge distances. Defaults to an empty list.
			
 
				-        min_edge_distance_config (list, optional): Configuration for minimum edge distances [min_edge_distance_x, min_edge_distance_y].
			
 
				-        Defaults to [float('inf'), float('inf')].
			
 
				-        tolerance_len (float, optional): The tolerance length for adjusting edge distances. Defaults to 10.
			
 
				+def update_layout_order_config_block_index(
			
 
				+    config: dict, block_label: str, block_idx: int
			
 
				+) -> None:
			
 
				 
			
 
				-    Returns:
			
 
				-        Tuple[float, List[float]]: A tuple containing:
			
 
				-            - The calculated minimum edge distance between the bounding boxes.
			
 
				-            - A list with the minimum edge distances in the x and y directions.
			
 
				-    """
			
 
				-    match_bbox_iou = _calculate_overlap_area_div_minbox_area_ratio(
			
 
				-        input_bbox,
			
 
				-        match_bbox,
			
 
				-    )
			
 
				-    if match_bbox_iou > 0 and label not in no_mask_labels:
			
 
				-        return 0, [0, 0]
			
 
				-
			
 
				-    if not min_edge_distance_config:
			
 
				-        min_edge_distance_config = [float("inf"), float("inf")]
			
 
				-    min_edge_distance_x, min_edge_distance_y = min_edge_distance_config
			
 
				-
			
 
				-    x1, y1, x2, y2 = input_bbox
			
 
				-    x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
			
 
				-
			
 
				-    direction_num = 0
			
 
				-    distance_x = float("inf")
			
 
				-    distance_y = float("inf")
			
 
				-    distance = [float("inf")] * 4
			
 
				-
			
 
				-    # input_bbox is to the left of match_bbox
			
 
				-    if x2 < x1_prime:
			
 
				-        direction_num += 1
			
 
				-        distance[0] = x1_prime - x2
			
 
				-        if abs(distance[0] - min_edge_distance_x) <= tolerance_len:
			
 
				-            distance_x = min_edge_distance_x * weight[0]
			
 
				-        else:
			
 
				-            distance_x = distance[0] * weight[0]
			
 
				-    # input_bbox is to the right of match_bbox
			
 
				-    elif x1 > x2_prime:
			
 
				-        direction_num += 1
			
 
				-        distance[1] = x1 - x2_prime
			
 
				-        if abs(distance[1] - min_edge_distance_x) <= tolerance_len:
			
 
				-            distance_x = min_edge_distance_x * weight[1]
			
 
				-        else:
			
 
				-            distance_x = distance[1] * weight[1]
			
 
				-    elif match_bbox_iou > 0:
			
 
				-        distance[0] = 0
			
 
				-        distance_x = 0
			
 
				-
			
 
				-    # input_bbox is above match_bbox
			
 
				-    if y2 < y1_prime:
			
 
				-        direction_num += 1
			
 
				-        distance[2] = y1_prime - y2
			
 
				-        if abs(distance[2] - min_edge_distance_y) <= tolerance_len:
			
 
				-            distance_y = min_edge_distance_y * weight[2]
			
 
				-        else:
			
 
				-            distance_y = distance[2] * weight[2]
			
 
				-        if label in no_mask_labels:
			
 
				-            distance_y = max(0.1, distance_y) * 10  # for abstract
			
 
				-    # input_bbox is below match_bbox
			
 
				-    elif y1 > y2_prime:
			
 
				-        direction_num += 1
			
 
				-        distance[3] = y1 - y2_prime
			
 
				-        if abs(distance[3] - min_edge_distance_y) <= tolerance_len:
			
 
				-            distance_y = min_edge_distance_y * weight[3]
			
 
				-        else:
			
 
				-            distance_y = distance[3] * weight[3]
			
 
				-    elif match_bbox_iou > 0:
			
 
				-        distance[2] = 0
			
 
				-        distance_y = 0
			
 
				-
			
 
				-    if direction_num == 2:
			
 
				-        return (distance_x + distance_y), [
			
 
				-            min(distance[0], distance[1]),
			
 
				-            min(distance[2], distance[3]),
			
 
				+    doc_title_labels = config["doc_title_labels"]
			
 
				+    paragraph_title_labels = config["paragraph_title_labels"]
			
 
				+    vision_labels = config["vision_labels"]
			
 
				+    vision_title_labels = config["vision_title_labels"]
			
 
				+    header_labels = config["header_labels"]
			
 
				+    unordered_labels = config["unordered_labels"]
			
 
				+    footer_labels = config["footer_labels"]
			
 
				+    text_labels = config["text_labels"]
			
 
				+    text_title_labels = doc_title_labels + paragraph_title_labels
			
 
				+    config["text_title_labels"] = text_title_labels
			
 
				+
			
 
				+    if block_label in doc_title_labels:
			
 
				+        config["doc_title_block_idxes"].append(block_idx)
			
 
				+    if block_label in paragraph_title_labels:
			
 
				+        config["paragraph_title_block_idxes"].append(block_idx)
			
 
				+    if block_label in vision_labels:
			
 
				+        config["vision_block_idxes"].append(block_idx)
			
 
				+    if block_label in vision_title_labels:
			
 
				+        config["vision_title_block_idxes"].append(block_idx)
			
 
				+    if block_label in unordered_labels:
			
 
				+        config["unordered_block_idxes"].append(block_idx)
			
 
				+    if block_label in text_title_labels:
			
 
				+        config["text_title_block_idxes"].append(block_idx)
			
 
				+    if block_label in text_labels:
			
 
				+        config["text_block_idxes"].append(block_idx)
			
 
				+    if block_label in header_labels:
			
 
				+        config["header_block_idxes"].append(block_idx)
			
 
				+    if block_label in footer_labels:
			
 
				+        config["footer_block_idxes"].append(block_idx)
			
 
				+
			
 
				+
			
 
				+def update_region_box(bbox, region_box):
			
 
				+    if region_box is None:
			
 
				+        return bbox
			
 
				+
			
 
				+    x1, y1, x2, y2 = bbox
			
 
				+    x1_region, y1_region, x2_region, y2_region = region_box
			
 
				+
			
 
				+    x1_region = int(min(x1, x1_region))
			
 
				+    y1_region = int(min(y1, y1_region))
			
 
				+    x2_region = int(max(x2, x2_region))
			
 
				+    y2_region = int(max(y2, y2_region))
			
 
				+
			
 
				+    region_box = [x1_region, y1_region, x2_region, y2_region]
			
 
				+
			
 
				+    return region_box
			
 
				+
			
 
				+
			
 
				+def convert_formula_res_to_ocr_format(formula_res_list: List, ocr_res: dict):
			
 
				+    for formula_res in formula_res_list:
			
 
				+        x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
			
 
				+        poly_points = [
			
 
				+            (x_min, y_min),
			
 
				+            (x_max, y_min),
			
 
				+            (x_max, y_max),
			
 
				+            (x_min, y_max),
			
 
				         ]
			
 
				-    else:
			
 
				-        return min(distance_x, distance_y), [
			
 
				-            min(distance[0], distance[1]),
			
 
				-            min(distance[2], distance[3]),
			
 
				-        ]
			
 
				-
			
 
				-
			
 
				-def _get_weights(label, horizontal):
			
 
				-    """Define weights based on the label and orientation."""
			
 
				-    if label == "doc_title":
			
 
				-        return (
			
 
				-            [1, 0.1, 0.1, 1] if horizontal else [0.2, 0.1, 1, 1]
			
 
				-        )  # left-down ,  right-left
			
 
				-    elif label in [
			
 
				-        "paragraph_title",
			
 
				-        "table_title",
			
 
				-        "abstract",
			
 
				-        "image",
			
 
				-        "seal",
			
 
				-        "chart",
			
 
				-        "figure",
			
 
				-    ]:
			
 
				-        return [1, 1, 0.1, 1]  # down
			
 
				-    else:
			
 
				-        return [1, 1, 1, 0.1]  # up
			
 
				-
			
 
				-
			
 
				-def _nearest_iou_edge_distance(
			
 
				-    input_bbox: List[int],
			
 
				-    match_bbox: List[int],
			
 
				-    label: str,
			
 
				-    vision_labels: List[str],
			
 
				-    no_mask_labels: List[str],
			
 
				-    median_width: int = -1,
			
 
				-    title_labels: List[str] = [],
			
 
				-    title_text: List[Tuple[int, List[int]]] = [],
			
 
				-    sub_title: List[List[int]] = [],
			
 
				-    min_distance_config: List[float] = [],
			
 
				-    tolerance_len: float = 10.0,
			
 
				-) -> Tuple[float, List[float]]:
			
 
				-    """
			
 
				-    Calculate the nearest IOU edge distance between two bounding boxes, considering label types, title adjustments, and minimum distance configurations.
			
 
				-    This function computes the edge distance between two bounding boxes while considering their overlap (IOU) and various adjustments based on label types,
			
 
				-    title text, and subtitle information. It also applies minimum distance configurations and tolerance adjustments.
			
 
				-
			
 
				-    Args:
			
 
				-        input_bbox (List[int]): The bounding box coordinates [x1, y1, x2, y2] of the input object.
			
 
				-        match_bbox (List[int]): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
			
 
				-        label (str): The label/type of the object in the bounding box (e.g., 'image', 'text', etc.).
			
 
				-        vision_labels (List[str]): List of labels for vision-related objects (e.g., images, icons).
			
 
				-        no_mask_labels (List[str]): Labels for which no masking is applied when calculating edge distances.
			
 
				-        median_width (int, optional): The median width for title dispersion calculation. Defaults to -1.
			
 
				-        title_labels (List[str], optional): Labels that indicate the object is a title. Defaults to an empty list.
			
 
				-        title_text (List[Tuple[int, List[int]]], optional): Text content associated with title labels, in the format [(position_indicator, [x1, y1, x2, y2]), ...].
			
 
				-        sub_title (List[List[int]], optional): List of subtitle bounding boxes to adjust the input_bbox. Defaults to an empty list.
			
 
				-        min_distance_config (List[float], optional): Configuration for minimum distances [min_edge_distance_config, up_edge_distances_config, total_distance].
			
 
				-        tolerance_len (float, optional): The tolerance length for adjusting edge distances. Defaults to 10.0.
			
 
				-
			
 
				-    Returns:
			
 
				-        Tuple[float, List[float]]: A tuple containing:
			
 
				-            - The calculated distance considering IOU and adjustments.
			
 
				-            - The updated minimum distance configuration.
			
 
				-    """
			
 
				-
			
 
				-    x1, y1, x2, y2 = input_bbox
			
 
				-    x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
			
 
				-
			
 
				-    min_edge_distance_config, up_edge_distances_config, total_distance = (
			
 
				-        min_distance_config
			
 
				-    )
			
 
				-
			
 
				-    iou_distance = 0
			
 
				-
			
 
				-    if label in vision_labels:
			
 
				-        horizontal1 = horizontal2 = True
			
 
				-    else:
			
 
				-        horizontal1 = _get_bbox_direction(input_bbox)
			
 
				-        horizontal2 = _get_bbox_direction(match_bbox, 3)
			
 
				-
			
 
				-    if (
			
 
				-        horizontal1 != horizontal2
			
 
				-        or _get_projection_iou(input_bbox, match_bbox, horizontal1) < 0.01
			
 
				-    ):
			
 
				-        iou_distance = 1
			
 
				-
			
 
				-    if label == "doc_title":
			
 
				-        # Calculate distance for titles
			
 
				-        disperse = max(1, median_width)
			
 
				-        tolerance_len = max(tolerance_len, disperse)
			
 
				-
			
 
				-    # Adjust input_bbox based on sub_title
			
 
				-    if sub_title:
			
 
				-        for sub in sub_title:
			
 
				-            x1_, y1_, x2_, y2_ = sub
			
 
				-            x1, y1, x2, y2 = (
			
 
				-                min(x1, x1_),
			
 
				-                min(y1, y1_),
			
 
				-                min(x2, x2_),
			
 
				-                max(y2, y2_),
			
 
				-            )
			
 
				-        input_bbox = [x1, y1, x2, y2]
			
 
				-
			
 
				-    if title_text:
			
 
				-        for sub in title_text:
			
 
				-            x1_, y1_, x2_, y2_ = sub[1]
			
 
				-            if horizontal1:
			
 
				-                x1, y1, x2, y2 = (
			
 
				-                    min(x1, x1_),
			
 
				-                    min(y1, y1_),
			
 
				-                    min(x2, x2_),
			
 
				-                    max(y2, y2_),
			
 
				-                )
			
 
				-            else:
			
 
				-                x1, y1, x2, y2 = (
			
 
				-                    min(x1, x1_),
			
 
				-                    min(y1, y1_),
			
 
				-                    max(x2, x2_),
			
 
				-                    min(y2, y2_),
			
 
				-                )
			
 
				-        input_bbox = [x1, y1, x2, y2]
			
 
				-
			
 
				-    # Calculate edge distance
			
 
				-    weight = _get_weights(label, horizontal1)
			
 
				-    if label == "abstract":
			
 
				-        tolerance_len *= 2
			
 
				-
			
 
				-    edge_distance, edge_distance_config = _nearest_edge_distance(
			
 
				-        input_bbox,
			
 
				-        match_bbox,
			
 
				-        weight,
			
 
				-        label=label,
			
 
				-        no_mask_labels=no_mask_labels,
			
 
				-        min_edge_distance_config=min_edge_distance_config,
			
 
				-        tolerance_len=tolerance_len,
			
 
				-    )
			
 
				-
			
 
				-    # Weights for combining distances
			
 
				-    iou_edge_weight = [10**8, 10**4, 1, 0.0001]
			
 
				-
			
 
				-    # Calculate up and left edge distances
			
 
				-    up_edge_distance = y1_prime
			
 
				-    left_edge_distance = x1_prime
			
 
				-    if (
			
 
				-        label in no_mask_labels or label in title_labels or label in vision_labels
			
 
				-    ) and y1 > y2_prime:
			
 
				-        up_edge_distance = -y2_prime
			
 
				-        left_edge_distance = -x2_prime
			
 
				-
			
 
				-    min_up_edge_distance = up_edge_distances_config
			
 
				-    if abs(min_up_edge_distance - up_edge_distance) <= tolerance_len:
			
 
				-        up_edge_distance = min_up_edge_distance
			
 
				-
			
 
				-    # Calculate total distance
			
 
				-    distance = (
			
 
				-        iou_distance * iou_edge_weight[0]
			
 
				-        + edge_distance * iou_edge_weight[1]
			
 
				-        + up_edge_distance * iou_edge_weight[2]
			
 
				-        + left_edge_distance * iou_edge_weight[3]
			
 
				-    )
			
 
				+        ocr_res["dt_polys"].append(poly_points)
			
 
				+        ocr_res["rec_texts"].append(f"${formula_res['rec_formula']}$")
			
 
				+        ocr_res["rec_boxes"] = np.vstack(
			
 
				+            (ocr_res["rec_boxes"], [formula_res["dt_polys"]])
			
 
				+        )
			
 
				+        ocr_res["rec_labels"].append("formula")
			
 
				+        ocr_res["rec_polys"].append(poly_points)
			
 
				+        ocr_res["rec_scores"].append(1)
			
 
				 
			
 
				-    # Update minimum distance configuration if a smaller distance is found
			
 
				-    if total_distance > distance:
			
 
				-        edge_distance_config = [
			
 
				-            edge_distance_config[0],
			
 
				-            edge_distance_config[1],
			
 
				-        ]
			
 
				-        min_distance_config = [
			
 
				-            edge_distance_config,
			
 
				-            up_edge_distance,
			
 
				-            distance,
			
 
				-        ]
			
 
				 
			
 
				-    return distance, min_distance_config
			
 
				+def caculate_bbox_area(bbox):
			
 
				+    x1, y1, x2, y2 = bbox
			
 
				+    area = abs((x2 - x1) * (y2 - y1))
			
 
				+    return area
			
 
				 
			
 
				 
			
 
				 def get_show_color(label: str) -> Tuple:
			
--- a/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py
+++ b/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py
@@ -0,0 +1,16 @@
 
				+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .utils import *
			
 
				+from .xycuts import *
			
--- a/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py
+++ b/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py
@@ -0,0 +1,1030 @@
 
				+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from typing import Dict, List, Tuple, Union
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+from ..result_v2 import LayoutParsingBlock
			
 
				+
			
 
				+
			
 
				+def calculate_projection_iou(
			
 
				+    bbox1: List[float], bbox2: List[float], direction: str = "horizontal"
			
 
				+) -> float:
			
 
				+    """
			
 
				+    Calculate the IoU of lines between two bounding boxes.
			
 
				+
			
 
				+    Args:
			
 
				+        bbox1 (List[float]): First bounding box [x_min, y_min, x_max, y_max].
			
 
				+        bbox2 (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
			
 
				+        direction (str): direction of the projection, "horizontal" or "vertical".
			
 
				+
			
 
				+    Returns:
			
 
				+        float: Line IoU. Returns 0 if there is no overlap.
			
 
				+    """
			
 
				+    start_index, end_index = 1, 3
			
 
				+    if direction == "horizontal":
			
 
				+        start_index, end_index = 0, 2
			
 
				+
			
 
				+    intersection_start = max(bbox1[start_index], bbox2[start_index])
			
 
				+    intersection_end = min(bbox1[end_index], bbox2[end_index])
			
 
				+    overlap = intersection_end - intersection_start
			
 
				+    if overlap <= 0:
			
 
				+        return 0
			
 
				+    union_width = max(bbox1[end_index], bbox2[end_index]) - min(
			
 
				+        bbox1[start_index], bbox2[start_index]
			
 
				+    )
			
 
				+
			
 
				+    return overlap / union_width if union_width > 0 else 0.0
			
 
				+
			
 
				+
			
 
				+def calculate_iou(
			
 
				+    bbox1: Union[list, tuple],
			
 
				+    bbox2: Union[list, tuple],
			
 
				+) -> float:
			
 
				+    """
			
 
				+    Calculate the Intersection over Union (IoU) of two bounding boxes.
			
 
				+
			
 
				+    Parameters:
			
 
				+    bbox1 (list or tuple): The first bounding box, format [x_min, y_min, x_max, y_max]
			
 
				+    bbox2 (list or tuple): The second bounding box, format [x_min, y_min, x_max, y_max]
			
 
				+
			
 
				+    Returns:
			
 
				+    float: The IoU value between the two bounding boxes
			
 
				+    """
			
 
				+
			
 
				+    x_min_inter = max(bbox1[0], bbox2[0])
			
 
				+    y_min_inter = max(bbox1[1], bbox2[1])
			
 
				+    x_max_inter = min(bbox1[2], bbox2[2])
			
 
				+    y_max_inter = min(bbox1[3], bbox2[3])
			
 
				+
			
 
				+    inter_width = max(0, x_max_inter - x_min_inter)
			
 
				+    inter_height = max(0, y_max_inter - y_min_inter)
			
 
				+
			
 
				+    inter_area = inter_width * inter_height
			
 
				+
			
 
				+    bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
			
 
				+    bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
			
 
				+
			
 
				+    union_area = bbox1_area + bbox2_area - inter_area
			
 
				+
			
 
				+    if union_area == 0:
			
 
				+        return 0.0
			
 
				+
			
 
				+    return inter_area / union_area
			
 
				+
			
 
				+
			
 
				+def get_nearest_edge_distance(
			
 
				+    bbox1: List[int],
			
 
				+    bbox2: List[int],
			
 
				+    weight: List[float] = [1.0, 1.0, 1.0, 1.0],
			
 
				+) -> Tuple[float]:
			
 
				+    """
			
 
				+    Calculate the nearest edge distance between two bounding boxes, considering directional weights.
			
 
				+
			
 
				+    Args:
			
 
				+        bbox1 (list): The bounding box coordinates [x1, y1, x2, y2] of the input object.
			
 
				+        bbox2 (list): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
			
 
				+        weight (list, optional): Directional weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
			
 
				+
			
 
				+    Returns:
			
 
				+        float: The calculated minimum edge distance between the bounding boxes.
			
 
				+    """
			
 
				+    x1, y1, x2, y2 = bbox1
			
 
				+    x1_prime, y1_prime, x2_prime, y2_prime = bbox2
			
 
				+    min_x_distance, min_y_distance = 0, 0
			
 
				+    horizontal_iou = calculate_projection_iou(bbox1, bbox2, "horizontal")
			
 
				+    vertical_iou = calculate_projection_iou(bbox1, bbox2, "vertical")
			
 
				+    if horizontal_iou > 0 and vertical_iou > 0:
			
 
				+        return 0.0
			
 
				+    if horizontal_iou == 0:
			
 
				+        min_x_distance = min(abs(x1 - x2_prime), abs(x2 - x1_prime)) * (
			
 
				+            weight[0] if x2 < x1_prime else weight[1]
			
 
				+        )
			
 
				+    if vertical_iou == 0:
			
 
				+        min_y_distance = min(abs(y1 - y2_prime), abs(y2 - y1_prime)) * (
			
 
				+            weight[2] if y2 < y1_prime else weight[3]
			
 
				+        )
			
 
				+
			
 
				+    return min_x_distance + min_y_distance
			
 
				+
			
 
				+
			
 
				+def _projection_by_bboxes(boxes: np.ndarray, axis: int) -> np.ndarray:
			
 
				+    """
			
 
				+    Generate a 1D projection histogram from bounding boxes along a specified axis.
			
 
				+
			
 
				+    Args:
			
 
				+        boxes: A (N, 4) array of bounding boxes defined by [x_min, y_min, x_max, y_max].
			
 
				+        axis: Axis for projection; 0 for horizontal (x-axis), 1 for vertical (y-axis).
			
 
				+
			
 
				+    Returns:
			
 
				+        A 1D numpy array representing the projection histogram based on bounding box intervals.
			
 
				+    """
			
 
				+    assert axis in [0, 1]
			
 
				+    max_length = np.max(boxes[:, axis::2])
			
 
				+    projection = np.zeros(max_length, dtype=int)
			
 
				+
			
 
				+    # Increment projection histogram over the interval defined by each bounding box
			
 
				+    for start, end in boxes[:, axis::2]:
			
 
				+        projection[start:end] += 1
			
 
				+
			
 
				+    return projection
			
 
				+
			
 
				+
			
 
				+def _split_projection_profile(arr_values: np.ndarray, min_value: float, min_gap: float):
			
 
				+    """
			
 
				+    Split the projection profile into segments based on specified thresholds.
			
 
				+
			
 
				+    Args:
			
 
				+        arr_values: 1D array representing the projection profile.
			
 
				+        min_value: Minimum value threshold to consider a profile segment significant.
			
 
				+        min_gap: Minimum gap width to consider a separation between segments.
			
 
				+
			
 
				+    Returns:
			
 
				+        A tuple of start and end indices for each segment that meets the criteria.
			
 
				+    """
			
 
				+    # Identify indices where the projection exceeds the minimum value
			
 
				+    significant_indices = np.where(arr_values > min_value)[0]
			
 
				+    if not len(significant_indices):
			
 
				+        return
			
 
				+
			
 
				+    # Calculate gaps between significant indices
			
 
				+    index_diffs = significant_indices[1:] - significant_indices[:-1]
			
 
				+    gap_indices = np.where(index_diffs > min_gap)[0]
			
 
				+
			
 
				+    # Determine start and end indices of segments
			
 
				+    segment_starts = np.insert(
			
 
				+        significant_indices[gap_indices + 1],
			
 
				+        0,
			
 
				+        significant_indices[0],
			
 
				+    )
			
 
				+    segment_ends = np.append(
			
 
				+        significant_indices[gap_indices],
			
 
				+        significant_indices[-1] + 1,
			
 
				+    )
			
 
				+
			
 
				+    return segment_starts, segment_ends
			
 
				+
			
 
				+
			
 
				+def recursive_yx_cut(
			
 
				+    boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
			
 
				+):
			
 
				+    """
			
 
				+    Recursively project and segment bounding boxes, starting with Y-axis and followed by X-axis.
			
 
				+
			
 
				+    Args:
			
 
				+        boxes: A (N, 4) array representing bounding boxes.
			
 
				+        indices: List of indices indicating the original position of boxes.
			
 
				+        res: List to store indices of the final segmented bounding boxes.
			
 
				+        min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
			
 
				+
			
 
				+    Returns:
			
 
				+        None: This function modifies the `res` list in place.
			
 
				+    """
			
 
				+    assert len(boxes) == len(
			
 
				+        indices
			
 
				+    ), "The length of boxes and indices must be the same."
			
 
				+
			
 
				+    # Sort by y_min for Y-axis projection
			
 
				+    y_sorted_indices = boxes[:, 1].argsort()
			
 
				+    y_sorted_boxes = boxes[y_sorted_indices]
			
 
				+    y_sorted_indices = np.array(indices)[y_sorted_indices]
			
 
				+
			
 
				+    # Perform Y-axis projection
			
 
				+    y_projection = _projection_by_bboxes(boxes=y_sorted_boxes, axis=1)
			
 
				+    y_intervals = _split_projection_profile(y_projection, 0, 1)
			
 
				+
			
 
				+    if not y_intervals:
			
 
				+        return
			
 
				+
			
 
				+    # Process each segment defined by Y-axis projection
			
 
				+    for y_start, y_end in zip(*y_intervals):
			
 
				+        # Select boxes within the current y interval
			
 
				+        y_interval_indices = (y_start <= y_sorted_boxes[:, 1]) & (
			
 
				+            y_sorted_boxes[:, 1] < y_end
			
 
				+        )
			
 
				+        y_boxes_chunk = y_sorted_boxes[y_interval_indices]
			
 
				+        y_indices_chunk = y_sorted_indices[y_interval_indices]
			
 
				+
			
 
				+        # Sort by x_min for X-axis projection
			
 
				+        x_sorted_indices = y_boxes_chunk[:, 0].argsort()
			
 
				+        x_sorted_boxes_chunk = y_boxes_chunk[x_sorted_indices]
			
 
				+        x_sorted_indices_chunk = y_indices_chunk[x_sorted_indices]
			
 
				+
			
 
				+        # Perform X-axis projection
			
 
				+        x_projection = _projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0)
			
 
				+        x_intervals = _split_projection_profile(x_projection, 0, min_gap)
			
 
				+
			
 
				+        if not x_intervals:
			
 
				+            continue
			
 
				+
			
 
				+        # If X-axis cannot be further segmented, add current indices to results
			
 
				+        if len(x_intervals[0]) == 1:
			
 
				+            res.extend(x_sorted_indices_chunk)
			
 
				+            continue
			
 
				+
			
 
				+        # Recursively process each segment defined by X-axis projection
			
 
				+        for x_start, x_end in zip(*x_intervals):
			
 
				+            x_interval_indices = (x_start <= x_sorted_boxes_chunk[:, 0]) & (
			
 
				+                x_sorted_boxes_chunk[:, 0] < x_end
			
 
				+            )
			
 
				+            recursive_yx_cut(
			
 
				+                x_sorted_boxes_chunk[x_interval_indices],
			
 
				+                x_sorted_indices_chunk[x_interval_indices],
			
 
				+                res,
			
 
				+            )
			
 
				+
			
 
				+
			
 
				+def recursive_xy_cut(
			
 
				+    boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
			
 
				+):
			
 
				+    """
			
 
				+    Recursively performs X-axis projection followed by Y-axis projection to segment bounding boxes.
			
 
				+
			
 
				+    Args:
			
 
				+        boxes: A (N, 4) array representing bounding boxes with [x_min, y_min, x_max, y_max].
			
 
				+        indices: A list of indices representing the position of boxes in the original data.
			
 
				+        res: A list to store indices of bounding boxes that meet the criteria.
			
 
				+        min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
			
 
				+
			
 
				+    Returns:
			
 
				+        None: This function modifies the `res` list in place.
			
 
				+    """
			
 
				+    # Ensure boxes and indices have the same length
			
 
				+    assert len(boxes) == len(
			
 
				+        indices
			
 
				+    ), "The length of boxes and indices must be the same."
			
 
				+
			
 
				+    # Sort by x_min to prepare for X-axis projection
			
 
				+    x_sorted_indices = boxes[:, 0].argsort()
			
 
				+    x_sorted_boxes = boxes[x_sorted_indices]
			
 
				+    x_sorted_indices = np.array(indices)[x_sorted_indices]
			
 
				+
			
 
				+    # Perform X-axis projection
			
 
				+    x_projection = _projection_by_bboxes(boxes=x_sorted_boxes, axis=0)
			
 
				+    x_intervals = _split_projection_profile(x_projection, 0, 1)
			
 
				+
			
 
				+    if not x_intervals:
			
 
				+        return
			
 
				+
			
 
				+    # Process each segment defined by X-axis projection
			
 
				+    for x_start, x_end in zip(*x_intervals):
			
 
				+        # Select boxes within the current x interval
			
 
				+        x_interval_indices = (x_start <= x_sorted_boxes[:, 0]) & (
			
 
				+            x_sorted_boxes[:, 0] < x_end
			
 
				+        )
			
 
				+        x_boxes_chunk = x_sorted_boxes[x_interval_indices]
			
 
				+        x_indices_chunk = x_sorted_indices[x_interval_indices]
			
 
				+
			
 
				+        # Sort selected boxes by y_min to prepare for Y-axis projection
			
 
				+        y_sorted_indices = x_boxes_chunk[:, 1].argsort()
			
 
				+        y_sorted_boxes_chunk = x_boxes_chunk[y_sorted_indices]
			
 
				+        y_sorted_indices_chunk = x_indices_chunk[y_sorted_indices]
			
 
				+
			
 
				+        # Perform Y-axis projection
			
 
				+        y_projection = _projection_by_bboxes(boxes=y_sorted_boxes_chunk, axis=1)
			
 
				+        y_intervals = _split_projection_profile(y_projection, 0, min_gap)
			
 
				+
			
 
				+        if not y_intervals:
			
 
				+            continue
			
 
				+
			
 
				+        # If Y-axis cannot be further segmented, add current indices to results
			
 
				+        if len(y_intervals[0]) == 1:
			
 
				+            res.extend(y_sorted_indices_chunk)
			
 
				+            continue
			
 
				+
			
 
				+        # Recursively process each segment defined by Y-axis projection
			
 
				+        for y_start, y_end in zip(*y_intervals):
			
 
				+            y_interval_indices = (y_start <= y_sorted_boxes_chunk[:, 1]) & (
			
 
				+                y_sorted_boxes_chunk[:, 1] < y_end
			
 
				+            )
			
 
				+            recursive_xy_cut(
			
 
				+                y_sorted_boxes_chunk[y_interval_indices],
			
 
				+                y_sorted_indices_chunk[y_interval_indices],
			
 
				+                res,
			
 
				+            )
			
 
				+
			
 
				+
			
 
				+def reference_insert(
			
 
				+    block: LayoutParsingBlock,
			
 
				+    sorted_blocks: List[LayoutParsingBlock],
			
 
				+    config: Dict,
			
 
				+    median_width: float = 0.0,
			
 
				+):
			
 
				+    """
			
 
				+    Insert reference block into sorted blocks based on the distance between the block and the nearest sorted block.
			
 
				+
			
 
				+    Args:
			
 
				+        block: The block to insert into the sorted blocks.
			
 
				+        sorted_blocks: The sorted blocks where the new block will be inserted.
			
 
				+        config: Configuration dictionary containing parameters related to the layout parsing.
			
 
				+        median_width: Median width of the document. Defaults to 0.0.
			
 
				+
			
 
				+    Returns:
			
 
				+        sorted_blocks: The updated sorted blocks after insertion.
			
 
				+    """
			
 
				+    min_distance = float("inf")
			
 
				+    nearest_sorted_block_index = 0
			
 
				+    for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
			
 
				+        if sorted_block.bbox[3] <= block.bbox[1]:
			
 
				+            distance = -(sorted_block.bbox[2] * 10 + sorted_block.bbox[3])
			
 
				+        if distance < min_distance:
			
 
				+            min_distance = distance
			
 
				+            nearest_sorted_block_index = sorted_block_idx
			
 
				+
			
 
				+    sorted_blocks.insert(nearest_sorted_block_index + 1, block)
			
 
				+    return sorted_blocks
			
 
				+
			
 
				+
			
 
				+def manhattan_insert(
			
 
				+    block: LayoutParsingBlock,
			
 
				+    sorted_blocks: List[LayoutParsingBlock],
			
 
				+    config: Dict,
			
 
				+    median_width: float = 0.0,
			
 
				+):
			
 
				+    """
			
 
				+    Insert a block into a sorted list of blocks based on the Manhattan distance between the block and the nearest sorted block.
			
 
				+
			
 
				+    Args:
			
 
				+        block: The block to insert into the sorted blocks.
			
 
				+        sorted_blocks: The sorted blocks where the new block will be inserted.
			
 
				+        config: Configuration dictionary containing parameters related to the layout parsing.
			
 
				+        median_width: Median width of the document. Defaults to 0.0.
			
 
				+
			
 
				+    Returns:
			
 
				+        sorted_blocks: The updated sorted blocks after insertion.
			
 
				+    """
			
 
				+    min_distance = float("inf")
			
 
				+    nearest_sorted_block_index = 0
			
 
				+    for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
			
 
				+        distance = _manhattan_distance(block.bbox, sorted_block.bbox)
			
 
				+        if distance < min_distance:
			
 
				+            min_distance = distance
			
 
				+            nearest_sorted_block_index = sorted_block_idx
			
 
				+
			
 
				+    sorted_blocks.insert(nearest_sorted_block_index + 1, block)
			
 
				+    return sorted_blocks
			
 
				+
			
 
				+
			
 
				+def weighted_distance_insert(
			
 
				+    block: LayoutParsingBlock,
			
 
				+    sorted_blocks: List[LayoutParsingBlock],
			
 
				+    config: Dict,
			
 
				+    median_width: float = 0.0,
			
 
				+):
			
 
				+    """
			
 
				+    Insert a block into a sorted list of blocks based on the weighted distance between the block and the nearest sorted block.
			
 
				+
			
 
				+    Args:
			
 
				+        block: The block to insert into the sorted blocks.
			
 
				+        sorted_blocks: The sorted blocks where the new block will be inserted.
			
 
				+        config: Configuration dictionary containing parameters related to the layout parsing.
			
 
				+        median_width: Median width of the document. Defaults to 0.0.
			
 
				+
			
 
				+    Returns:
			
 
				+        sorted_blocks: The updated sorted blocks after insertion.
			
 
				+    """
			
 
				+    doc_title_labels = config.get("doc_title_labels", [])
			
 
				+    paragraph_title_labels = config.get("paragraph_title_labels", [])
			
 
				+    vision_labels = config.get("vision_labels", [])
			
 
				+    xy_cut_block_labels = config.get("xy_cut_block_labels", [])
			
 
				+    tolerance_len = config.get("tolerance_len", 2)
			
 
				+    x1, y1, x2, y2 = block.bbox
			
 
				+    min_weighted_distance, min_edge_distance, min_up_edge_distance = (
			
 
				+        float("inf"),
			
 
				+        float("inf"),
			
 
				+        float("inf"),
			
 
				+    )
			
 
				+    nearest_sorted_block_index = 0
			
 
				+    for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
			
 
				+
			
 
				+        x1_prime, y1_prime, x2_prime, y2_prime = sorted_block.bbox
			
 
				+
			
 
				+        # Calculate edge distance
			
 
				+        weight = _get_weights(block.region_label, block.direction)
			
 
				+        edge_distance = get_nearest_edge_distance(block.bbox, sorted_block.bbox, weight)
			
 
				+
			
 
				+        if block.label in doc_title_labels:
			
 
				+            disperse = max(1, median_width)
			
 
				+            tolerance_len = max(tolerance_len, disperse)
			
 
				+        if block.label == "abstract":
			
 
				+            tolerance_len *= 2
			
 
				+            edge_distance = max(0.1, edge_distance) * 10
			
 
				+
			
 
				+        # Calculate up edge distances
			
 
				+        up_edge_distance = y1_prime
			
 
				+        left_edge_distance = x1_prime
			
 
				+        if (
			
 
				+            block.label in xy_cut_block_labels
			
 
				+            or block.label in doc_title_labels
			
 
				+            or block.label in paragraph_title_labels
			
 
				+            or block.label in vision_labels
			
 
				+        ) and y1 > y2_prime:
			
 
				+            up_edge_distance = -y2_prime
			
 
				+            left_edge_distance = -x2_prime
			
 
				+
			
 
				+        if abs(min_up_edge_distance - up_edge_distance) <= tolerance_len:
			
 
				+            up_edge_distance = min_up_edge_distance
			
 
				+
			
 
				+        # Calculate weighted distance
			
 
				+        weighted_distance = (
			
 
				+            +edge_distance * config.get("edge_weight", 10**4)
			
 
				+            + up_edge_distance * config.get("up_edge_weight", 1)
			
 
				+            + left_edge_distance * config.get("left_edge_weight", 0.0001)
			
 
				+        )
			
 
				+
			
 
				+        min_edge_distance = min(edge_distance, min_edge_distance)
			
 
				+        min_up_edge_distance = min(up_edge_distance, min_up_edge_distance)
			
 
				+
			
 
				+        if weighted_distance < min_weighted_distance:
			
 
				+            nearest_sorted_block_index = sorted_block_idx
			
 
				+            min_weighted_distance = weighted_distance
			
 
				+            if y1 > y1_prime or (y1 == y1_prime and x1 > x1_prime):
			
 
				+                nearest_sorted_block_index = sorted_block_idx + 1
			
 
				+
			
 
				+    sorted_blocks.insert(nearest_sorted_block_index, block)
			
 
				+    return sorted_blocks
			
 
				+
			
 
				+
			
 
				+def insert_child_blocks(
			
 
				+    block: LayoutParsingBlock,
			
 
				+    block_idx: int,
			
 
				+    sorted_blocks: List[LayoutParsingBlock],
			
 
				+) -> List[LayoutParsingBlock]:
			
 
				+    """
			
 
				+    Insert child blocks of a block into the sorted blocks list.
			
 
				+
			
 
				+    Args:
			
 
				+        block: The parent block whose child blocks need to be inserted.
			
 
				+        block_idx: Index at which the parent block exists in the sorted blocks list.
			
 
				+        sorted_blocks: Sorted blocks list where the child blocks are to be inserted.
			
 
				+
			
 
				+    Returns:
			
 
				+        sorted_blocks: Updated sorted blocks list after inserting child blocks.
			
 
				+    """
			
 
				+    if block.child_blocks:
			
 
				+        sub_blocks = block.get_child_blocks()
			
 
				+        sub_blocks.append(block)
			
 
				+        sub_blocks = sort_child_blocks(sub_blocks, block.direction)
			
 
				+        sorted_blocks[block_idx] = sub_blocks[0]
			
 
				+        for block in sub_blocks[1:]:
			
 
				+            block_idx += 1
			
 
				+            sorted_blocks.insert(block_idx, block)
			
 
				+    return sorted_blocks
			
 
				+
			
 
				+
			
 
				+def sort_child_blocks(blocks, direction="horizontal") -> List[LayoutParsingBlock]:
			
 
				+    """
			
 
				+    Sort child blocks based on their bounding box coordinates.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks: A list of LayoutParsingBlock objects representing the child blocks.
			
 
				+        direction: Orientation of the blocks ('horizontal' or 'vertical'). Default is 'horizontal'.
			
 
				+    Returns:
			
 
				+        sorted_blocks: A sorted list of LayoutParsingBlock objects.
			
 
				+    """
			
 
				+    if direction == "horizontal":
			
 
				+        # from top to bottom
			
 
				+        blocks.sort(
			
 
				+            key=lambda x: (
			
 
				+                x.bbox[1],  # y_min
			
 
				+                x.bbox[0],  # x_min
			
 
				+                x.bbox[1] ** 2 + x.bbox[0] ** 2,  # distance with (0,0)
			
 
				+            ),
			
 
				+            reverse=False,
			
 
				+        )
			
 
				+    else:
			
 
				+        # from right to left
			
 
				+        blocks.sort(
			
 
				+            key=lambda x: (
			
 
				+                x.bbox[0],  # x_min
			
 
				+                x.bbox[1],  # y_min
			
 
				+                x.bbox[1] ** 2 + x.bbox[0] ** 2,  # distance with (0,0)
			
 
				+            ),
			
 
				+            reverse=True,
			
 
				+        )
			
 
				+    return blocks
			
 
				+
			
 
				+
			
 
				+def _get_weights(label, dircetion="horizontal"):
			
 
				+    """Define weights based on the label and orientation."""
			
 
				+    if label == "doc_title":
			
 
				+        return (
			
 
				+            [1, 0.1, 0.1, 1] if dircetion == "horizontal" else [0.2, 0.1, 1, 1]
			
 
				+        )  # left-down ,  right-left
			
 
				+    elif label in [
			
 
				+        "paragraph_title",
			
 
				+        "table_title",
			
 
				+        "abstract",
			
 
				+        "image",
			
 
				+        "seal",
			
 
				+        "chart",
			
 
				+        "figure",
			
 
				+    ]:
			
 
				+        return [1, 1, 0.1, 1]  # down
			
 
				+    else:
			
 
				+        return [1, 1, 1, 0.1]  # up
			
 
				+
			
 
				+
			
 
				+def _manhattan_distance(
			
 
				+    point1: Tuple[float, float],
			
 
				+    point2: Tuple[float, float],
			
 
				+    weight_x: float = 1.0,
			
 
				+    weight_y: float = 1.0,
			
 
				+) -> float:
			
 
				+    """
			
 
				+    Calculate the weighted Manhattan distance between two points.
			
 
				+
			
 
				+    Args:
			
 
				+        point1 (Tuple[float, float]): The first point as (x, y).
			
 
				+        point2 (Tuple[float, float]): The second point as (x, y).
			
 
				+        weight_x (float): The weight for the x-axis distance. Default is 1.0.
			
 
				+        weight_y (float): The weight for the y-axis distance. Default is 1.0.
			
 
				+
			
 
				+    Returns:
			
 
				+        float: The weighted Manhattan distance between the two points.
			
 
				+    """
			
 
				+    return weight_x * abs(point1[0] - point2[0]) + weight_y * abs(point1[1] - point2[1])
			
 
				+
			
 
				+
			
 
				+def sort_blocks(blocks, median_width=None, reverse=False):
			
 
				+    """
			
 
				+    Sort blocks based on their y_min, x_min and distance with (0,0).
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (list): list of blocks to be sorted.
			
 
				+        median_width (int): the median width of the text blocks.
			
 
				+        reverse (bool, optional): whether to sort in descending order. Default is False.
			
 
				+
			
 
				+    Returns:
			
 
				+        list: a list of sorted blocks.
			
 
				+    """
			
 
				+    if median_width is None:
			
 
				+        median_width = 1
			
 
				+    blocks.sort(
			
 
				+        key=lambda x: (
			
 
				+            x.bbox[1] // 10,  # y_min
			
 
				+            x.bbox[0] // median_width,  # x_min
			
 
				+            x.bbox[1] ** 2 + x.bbox[0] ** 2,  # distance with (0,0)
			
 
				+        ),
			
 
				+        reverse=reverse,
			
 
				+    )
			
 
				+    return blocks
			
 
				+
			
 
				+
			
 
				+def get_cut_blocks(
			
 
				+    blocks, cut_direction, cut_coordinates, overall_region_box, mask_labels=[]
			
 
				+):
			
 
				+    """
			
 
				+    Cut blocks based on the given cut direction and coordinates.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (list): list of blocks to be cut.
			
 
				+        cut_direction (str): cut direction, either "horizontal" or "vertical".
			
 
				+        cut_coordinates (list): list of cut coordinates.
			
 
				+        overall_region_box (list): the overall region box that contains all blocks.
			
 
				+
			
 
				+    Returns:
			
 
				+        list: a list of tuples containing the cutted blocks and their corresponding mean width。
			
 
				+    """
			
 
				+    cuted_list = []
			
 
				+    # filter out mask blocks,including header, footer, unordered and child_blocks
			
 
				+
			
 
				+    # 0: horizontal, 1: vertical
			
 
				+    cut_aixis = 0 if cut_direction == "horizontal" else 1
			
 
				+    blocks.sort(key=lambda x: x.bbox[cut_aixis + 2])
			
 
				+    overall_max_axis_coordinate = overall_region_box[cut_aixis + 2]
			
 
				+    cut_coordinates.append(overall_max_axis_coordinate)
			
 
				+
			
 
				+    cut_coordinates = list(set(cut_coordinates))
			
 
				+    cut_coordinates.sort()
			
 
				+
			
 
				+    cut_idx = 0
			
 
				+    for cut_coordinate in cut_coordinates:
			
 
				+        group_blocks = []
			
 
				+        block_idx = cut_idx
			
 
				+        while block_idx < len(blocks):
			
 
				+            block = blocks[block_idx]
			
 
				+            if block.bbox[cut_aixis + 2] > cut_coordinate:
			
 
				+                break
			
 
				+            elif block.region_label not in mask_labels:
			
 
				+                group_blocks.append(block)
			
 
				+            block_idx += 1
			
 
				+        cut_idx = block_idx
			
 
				+        if group_blocks:
			
 
				+            cuted_list.append(group_blocks)
			
 
				+
			
 
				+    return cuted_list
			
 
				+
			
 
				+
			
 
				+def split_sub_region_blocks(
			
 
				+    blocks: List[LayoutParsingBlock],
			
 
				+    config: Dict,
			
 
				+) -> List:
			
 
				+    """
			
 
				+    Split blocks into sub regions based on the all layout region bbox.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (List[LayoutParsingBlock]): A list of blocks.
			
 
				+        config (Dict): Configuration dictionary.
			
 
				+    Returns:
			
 
				+        List: A list of lists of blocks, each representing a sub region.
			
 
				+    """
			
 
				+
			
 
				+    region_bbox = config.get("all_layout_region_box", None)
			
 
				+    x1, y1, x2, y2 = region_bbox
			
 
				+    region_width = x2 - x1
			
 
				+    region_height = y2 - y1
			
 
				+
			
 
				+    if region_width < region_height:
			
 
				+        return [(blocks, region_bbox)]
			
 
				+
			
 
				+    all_boxes = np.array([block.bbox for block in blocks])
			
 
				+    discontinuous = calculate_discontinuous_projection(all_boxes, direction="vertical")
			
 
				+    if len(discontinuous) > 1:
			
 
				+        cut_coordinates = []
			
 
				+        region_boxes = []
			
 
				+        current_interval = discontinuous[0]
			
 
				+        for x1, x2 in discontinuous[1:]:
			
 
				+            if x1 - current_interval[1] > 100:
			
 
				+                cut_coordinates.extend([x1, x2])
			
 
				+                region_boxes.append([x1, y1, x2, y2])
			
 
				+            current_interval = [x1, x2]
			
 
				+        region_blocks = get_cut_blocks(blocks, "vertical", cut_coordinates, region_bbox)
			
 
				+
			
 
				+        return [region_info for region_info in zip(region_blocks, region_boxes)]
			
 
				+    else:
			
 
				+        return [(blocks, region_bbox)]
			
 
				+
			
 
				+
			
 
				+def get_adjacent_blocks_by_direction(
			
 
				+    blocks: List[LayoutParsingBlock],
			
 
				+    block_idx: int,
			
 
				+    ref_block_idxes: List[int],
			
 
				+    iou_threshold,
			
 
				+) -> List:
			
 
				+    """
			
 
				+    Get the adjacent blocks with the same direction as the current block.
			
 
				+    Args:
			
 
				+        block (LayoutParsingBlock): The current block.
			
 
				+        blocks (List[LayoutParsingBlock]): A list of all blocks.
			
 
				+        ref_block_idxes (List[int]): A list of indices of reference blocks.
			
 
				+        iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
			
 
				+    Returns:
			
 
				+        Int: The index of the previous block with same direction.
			
 
				+        Int: The index of the following block with same direction.
			
 
				+    """
			
 
				+    min_prev_block_distance = float("inf")
			
 
				+    prev_block_index = None
			
 
				+    min_post_block_distance = float("inf")
			
 
				+    post_block_index = None
			
 
				+    block = blocks[block_idx]
			
 
				+    child_labels = [
			
 
				+        "vision_footnote",
			
 
				+        "sub_paragraph_title",
			
 
				+        "doc_title_text",
			
 
				+        "vision_title",
			
 
				+    ]
			
 
				+
			
 
				+    # find the nearest text block with same direction to the current block
			
 
				+    for ref_block_idx in ref_block_idxes:
			
 
				+        ref_block = blocks[ref_block_idx]
			
 
				+        ref_block_direction = ref_block.direction
			
 
				+        if ref_block.region_label in child_labels:
			
 
				+            continue
			
 
				+        match_block_iou = calculate_projection_iou(
			
 
				+            block.bbox,
			
 
				+            ref_block.bbox,
			
 
				+            ref_block_direction,
			
 
				+        )
			
 
				+
			
 
				+        child_match_distance_tolerance_len = block.short_side_length / 10
			
 
				+
			
 
				+        if block.region_label == "vision":
			
 
				+            if ref_block.num_of_lines == 1:
			
 
				+                gap_tolerance_len = ref_block.short_side_length * 2
			
 
				+            else:
			
 
				+                gap_tolerance_len = block.short_side_length / 10
			
 
				+        else:
			
 
				+            gap_tolerance_len = block.short_side_length * 2
			
 
				+
			
 
				+        if match_block_iou >= iou_threshold:
			
 
				+            prev_distance = (
			
 
				+                block.secondary_direction_start_coordinate
			
 
				+                - ref_block.secondary_direction_end_coordinate
			
 
				+                + child_match_distance_tolerance_len
			
 
				+            ) // 5 + ref_block.start_coordinate / 5000
			
 
				+            next_distance = (
			
 
				+                ref_block.secondary_direction_start_coordinate
			
 
				+                - block.secondary_direction_end_coordinate
			
 
				+                + child_match_distance_tolerance_len
			
 
				+            ) // 5 + ref_block.start_coordinate / 5000
			
 
				+            if (
			
 
				+                ref_block.secondary_direction_end_coordinate
			
 
				+                <= block.secondary_direction_start_coordinate
			
 
				+                + child_match_distance_tolerance_len
			
 
				+                and prev_distance < min_prev_block_distance
			
 
				+            ):
			
 
				+                min_prev_block_distance = prev_distance
			
 
				+                if (
			
 
				+                    block.secondary_direction_start_coordinate
			
 
				+                    - ref_block.secondary_direction_end_coordinate
			
 
				+                    < gap_tolerance_len
			
 
				+                ):
			
 
				+                    prev_block_index = ref_block_idx
			
 
				+            elif (
			
 
				+                ref_block.secondary_direction_start_coordinate
			
 
				+                > block.secondary_direction_end_coordinate
			
 
				+                - child_match_distance_tolerance_len
			
 
				+                and next_distance < min_post_block_distance
			
 
				+            ):
			
 
				+                min_post_block_distance = next_distance
			
 
				+                if (
			
 
				+                    ref_block.secondary_direction_start_coordinate
			
 
				+                    - block.secondary_direction_end_coordinate
			
 
				+                    < gap_tolerance_len
			
 
				+                ):
			
 
				+                    post_block_index = ref_block_idx
			
 
				+
			
 
				+    diff_dist = abs(min_prev_block_distance - min_post_block_distance)
			
 
				+
			
 
				+    # if the difference in distance is too large, only consider the nearest one
			
 
				+    if diff_dist * 5 > block.short_side_length:
			
 
				+        if min_prev_block_distance < min_post_block_distance:
			
 
				+            post_block_index = None
			
 
				+        else:
			
 
				+            prev_block_index = None
			
 
				+
			
 
				+    return prev_block_index, post_block_index
			
 
				+
			
 
				+
			
 
				+def update_doc_title_child_blocks(
			
 
				+    blocks: List[LayoutParsingBlock],
			
 
				+    block: LayoutParsingBlock,
			
 
				+    prev_idx: int,
			
 
				+    post_idx: int,
			
 
				+    config: dict,
			
 
				+) -> None:
			
 
				+    """
			
 
				+    Update the child blocks of a document title block.
			
 
				+
			
 
				+    The child blocks need to meet the following conditions:
			
 
				+        1. They must be adjacent
			
 
				+        2. They must have the same direction as the parent block.
			
 
				+        3. Their short side length should be less than 80% of the parent's short side length.
			
 
				+        4. Their long side length should be less than 150% of the parent's long side length.
			
 
				+        5. The child block must be text block.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (List[LayoutParsingBlock]): overall blocks.
			
 
				+        block (LayoutParsingBlock): document title block.
			
 
				+        prev_idx (int): previous block index, None if not exist.
			
 
				+        post_idx (int): post block index, None if not exist.
			
 
				+        config (dict): configurations.
			
 
				+
			
 
				+    Returns:
			
 
				+        None
			
 
				+
			
 
				+    """
			
 
				+    for idx in [prev_idx, post_idx]:
			
 
				+        if idx is None:
			
 
				+            continue
			
 
				+        ref_block = blocks[idx]
			
 
				+        with_seem_direction = ref_block.direction == block.direction
			
 
				+
			
 
				+        short_side_length_condition = (
			
 
				+            ref_block.short_side_length < block.short_side_length * 0.8
			
 
				+        )
			
 
				+
			
 
				+        long_side_length_condition = (
			
 
				+            ref_block.long_side_length < block.long_side_length
			
 
				+            or ref_block.long_side_length > 1.5 * block.long_side_length
			
 
				+        )
			
 
				+
			
 
				+        if (
			
 
				+            with_seem_direction
			
 
				+            and short_side_length_condition
			
 
				+            and long_side_length_condition
			
 
				+            and ref_block.num_of_lines < 3
			
 
				+        ):
			
 
				+            ref_block.region_label = "doc_title_text"
			
 
				+            block.append_child_block(ref_block)
			
 
				+            config["text_block_idxes"].remove(idx)
			
 
				+
			
 
				+
			
 
				+def update_paragraph_title_child_blocks(
			
 
				+    blocks: List[LayoutParsingBlock],
			
 
				+    block: LayoutParsingBlock,
			
 
				+    prev_idx: int,
			
 
				+    post_idx: int,
			
 
				+    config: dict,
			
 
				+) -> None:
			
 
				+    """
			
 
				+    Update the child blocks of a paragraph title block.
			
 
				+
			
 
				+    The child blocks need to meet the following conditions:
			
 
				+        1. They must be adjacent
			
 
				+        2. They must have the same direction as the parent block.
			
 
				+        3. The child block must be paragraph title block.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (List[LayoutParsingBlock]): overall blocks.
			
 
				+        block (LayoutParsingBlock): document title block.
			
 
				+        prev_idx (int): previous block index, None if not exist.
			
 
				+        post_idx (int): post block index, None if not exist.
			
 
				+        config (dict): configurations.
			
 
				+
			
 
				+    Returns:
			
 
				+        None
			
 
				+
			
 
				+    """
			
 
				+    paragraph_title_labels = config.get("paragraph_title_labels", [])
			
 
				+    for idx in [prev_idx, post_idx]:
			
 
				+        if idx is None:
			
 
				+            continue
			
 
				+        ref_block = blocks[idx]
			
 
				+        with_seem_direction = ref_block.direction == block.direction
			
 
				+        if with_seem_direction and ref_block.label in paragraph_title_labels:
			
 
				+            ref_block.region_label = "sub_paragraph_title"
			
 
				+            block.append_child_block(ref_block)
			
 
				+            config["paragraph_title_block_idxes"].remove(idx)
			
 
				+
			
 
				+
			
 
				+def update_vision_child_blocks(
			
 
				+    blocks: List[LayoutParsingBlock],
			
 
				+    block: LayoutParsingBlock,
			
 
				+    ref_block_idxes: List[int],
			
 
				+    prev_idx: int,
			
 
				+    post_idx: int,
			
 
				+    config: dict,
			
 
				+) -> None:
			
 
				+    """
			
 
				+    Update the child blocks of a paragraph title block.
			
 
				+
			
 
				+    The child blocks need to meet the following conditions:
			
 
				+    - For Both:
			
 
				+        1. They must be adjacent
			
 
				+        2. The child block must be vision_title or text block.
			
 
				+    - For vision_title:
			
 
				+        1. The distance between the child block and the parent block should be less than 1/2 of the parent's height.
			
 
				+    - For text block:
			
 
				+        1. The distance between the child block and the parent block should be less than 15.
			
 
				+        2. The child short_side_length should be less than the parent's short side length.
			
 
				+        3. The child long_side_length should be less than 50% of the parent's long side length.
			
 
				+        4. The difference between their centers is very small.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (List[LayoutParsingBlock]): overall blocks.
			
 
				+        block (LayoutParsingBlock): document title block.
			
 
				+        ref_block_idxes (List[int]): A list of indices of reference blocks.
			
 
				+        prev_idx (int): previous block index, None if not exist.
			
 
				+        post_idx (int): post block index, None if not exist.
			
 
				+        config (dict): configurations.
			
 
				+
			
 
				+    Returns:
			
 
				+        None
			
 
				+
			
 
				+    """
			
 
				+    vision_title_labels = config.get("vision_title_labels", [])
			
 
				+    text_labels = config.get("text_labels", [])
			
 
				+    for idx in [prev_idx, post_idx]:
			
 
				+        if idx is None:
			
 
				+            continue
			
 
				+        ref_block = blocks[idx]
			
 
				+        nearest_edge_distance = get_nearest_edge_distance(block.bbox, ref_block.bbox)
			
 
				+        block_center = block.get_centroid()
			
 
				+        ref_block_center = ref_block.get_centroid()
			
 
				+        if ref_block.label in vision_title_labels and nearest_edge_distance <= min(
			
 
				+            block.height * 0.5, ref_block.height * 2
			
 
				+        ):
			
 
				+            ref_block.region_label = "vision_title"
			
 
				+            block.append_child_block(ref_block)
			
 
				+            config["vision_title_block_idxes"].remove(idx)
			
 
				+        elif (
			
 
				+            nearest_edge_distance <= 15
			
 
				+            and ref_block.short_side_length < block.short_side_length
			
 
				+            and ref_block.long_side_length < 0.5 * block.long_side_length
			
 
				+            and ref_block.direction == block.direction
			
 
				+            and (
			
 
				+                abs(block_center[0] - ref_block_center[0]) < 10
			
 
				+                or (
			
 
				+                    block.bbox[0] - ref_block.bbox[0] < 10
			
 
				+                    and ref_block.num_of_lines == 1
			
 
				+                )
			
 
				+                or (
			
 
				+                    block.bbox[2] - ref_block.bbox[2] < 10
			
 
				+                    and ref_block.num_of_lines == 1
			
 
				+                )
			
 
				+            )
			
 
				+        ):
			
 
				+            has_vision_footnote = False
			
 
				+            if len(block.child_blocks) > 0:
			
 
				+                for child_block in block.child_blocks:
			
 
				+                    if child_block.label in text_labels:
			
 
				+                        has_vision_footnote = True
			
 
				+            if not has_vision_footnote:
			
 
				+                ref_block.region_label = "vision_footnote"
			
 
				+                block.append_child_block(ref_block)
			
 
				+                config["text_block_idxes"].remove(idx)
			
 
				+
			
 
				+
			
 
				+def calculate_discontinuous_projection(boxes, direction="horizontal") -> List:
			
 
				+    """
			
 
				+    Calculate the discontinuous projection of boxes along the specified direction.
			
 
				+
			
 
				+    Args:
			
 
				+        boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
			
 
				+        direction (str): Direction along which to perform the projection ('horizontal' or 'vertical').
			
 
				+
			
 
				+    Returns:
			
 
				+        list: List of tuples representing the merged intervals.
			
 
				+    """
			
 
				+    if direction == "horizontal":
			
 
				+        intervals = boxes[:, [0, 2]]
			
 
				+    elif direction == "vertical":
			
 
				+        intervals = boxes[:, [1, 3]]
			
 
				+    else:
			
 
				+        raise ValueError("Direction must be 'horizontal' or 'vertical'")
			
 
				+
			
 
				+    intervals = intervals[np.argsort(intervals[:, 0])]
			
 
				+
			
 
				+    merged_intervals = []
			
 
				+    current_start, current_end = intervals[0]
			
 
				+
			
 
				+    for start, end in intervals[1:]:
			
 
				+        if start <= current_end:
			
 
				+            current_end = max(current_end, end)
			
 
				+        else:
			
 
				+            merged_intervals.append((current_start, current_end))
			
 
				+            current_start, current_end = start, end
			
 
				+
			
 
				+    merged_intervals.append((current_start, current_end))
			
 
				+    return merged_intervals
			
 
				+
			
 
				+
			
 
				+def shrink_overlapping_boxes(
			
 
				+    boxes, direction="horizontal", min_threshold=0, max_threshold=0.1
			
 
				+) -> List:
			
 
				+    """
			
 
				+    Shrink overlapping boxes along the specified direction.
			
 
				+
			
 
				+    Args:
			
 
				+        boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
			
 
				+        direction (str): Direction along which to perform the shrinking ('horizontal' or 'vertical').
			
 
				+        min_threshold (float): Minimum threshold for shrinking. Default is 0.
			
 
				+        max_threshold (float): Maximum threshold for shrinking. Default is 0.2.
			
 
				+
			
 
				+    Returns:
			
 
				+        list: List of tuples representing the merged intervals.
			
 
				+    """
			
 
				+    current_block = boxes[0]
			
 
				+    for block in boxes[1:]:
			
 
				+        x1, y1, x2, y2 = current_block.bbox
			
 
				+        x1_prime, y1_prime, x2_prime, y2_prime = block.bbox
			
 
				+        cut_iou = calculate_projection_iou(
			
 
				+            current_block.bbox, block.bbox, direction=direction
			
 
				+        )
			
 
				+        match_iou = calculate_projection_iou(
			
 
				+            current_block.bbox,
			
 
				+            block.bbox,
			
 
				+            direction="horizontal" if direction == "vertical" else "vertical",
			
 
				+        )
			
 
				+        if direction == "vertical":
			
 
				+            if (
			
 
				+                (match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
			
 
				+                or y2 == y1_prime
			
 
				+                or abs(y2 - y1_prime) <= 3
			
 
				+            ):
			
 
				+                overlap_y_min = max(y1, y1_prime)
			
 
				+                overlap_y_max = min(y2, y2_prime)
			
 
				+                split_y = int((overlap_y_min + overlap_y_max) / 2)
			
 
				+                overlap_y_min = split_y - 1
			
 
				+                overlap_y_max = split_y + 1
			
 
				+                current_block.bbox = [x1, y1, x2, overlap_y_min]
			
 
				+                block.bbox = [x1_prime, overlap_y_max, x2_prime, y2_prime]
			
 
				+        else:
			
 
				+            if (
			
 
				+                (match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
			
 
				+                or x2 == x1_prime
			
 
				+                or abs(x2 - x1_prime) <= 3
			
 
				+            ):
			
 
				+                overlap_x_min = max(x1, x1_prime)
			
 
				+                overlap_x_max = min(x2, x2_prime)
			
 
				+                split_x = int((overlap_x_min + overlap_x_max) / 2)
			
 
				+                overlap_x_min = split_x - 1
			
 
				+                overlap_x_max = split_x + 1
			
 
				+                current_block.bbox = [x1, y1, overlap_x_min, y2]
			
 
				+                block.bbox = [overlap_x_max, y1_prime, x2_prime, y2_prime]
			
 
				+        current_block = block
			
 
				+    return boxes
			
--- a/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py
+++ b/paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py
@@ -0,0 +1,512 @@
 
				+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from typing import Any, Dict, List, Tuple
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+from ..result_v2 import LayoutParsingBlock
			
 
				+from .utils import (
			
 
				+    calculate_discontinuous_projection,
			
 
				+    calculate_iou,
			
 
				+    calculate_projection_iou,
			
 
				+    get_adjacent_blocks_by_direction,
			
 
				+    get_cut_blocks,
			
 
				+    insert_child_blocks,
			
 
				+    manhattan_insert,
			
 
				+    recursive_xy_cut,
			
 
				+    recursive_yx_cut,
			
 
				+    reference_insert,
			
 
				+    shrink_overlapping_boxes,
			
 
				+    sort_blocks,
			
 
				+    update_doc_title_child_blocks,
			
 
				+    update_paragraph_title_child_blocks,
			
 
				+    update_vision_child_blocks,
			
 
				+    weighted_distance_insert,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def pre_process(
			
 
				+    blocks: List[LayoutParsingBlock],
			
 
				+    config: Dict,
			
 
				+) -> List:
			
 
				+    """
			
 
				+    Preprocess the layout for sorting purposes.
			
 
				+
			
 
				+    This function performs two main tasks:
			
 
				+    1. Pre-cuts the layout to ensure the document is correctly partitioned and sorted.
			
 
				+    2. Match the blocks with their children.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (List[LayoutParsingBlock]): A list of LayoutParsingBlock objects representing the layout.
			
 
				+        config (Dict): Configuration parameters that include settings for pre-cutting and sorting.
			
 
				+
			
 
				+    Returns:
			
 
				+        List: A list of pre-cutted layout blocks list.
			
 
				+    """
			
 
				+    region_bbox = config.get("all_layout_region_box", None)
			
 
				+    region_x_center = (region_bbox[0] + region_bbox[2]) / 2
			
 
				+    region_y_center = (region_bbox[1] + region_bbox[3]) / 2
			
 
				+
			
 
				+    header_block_idxes = config.get("header_block_idxes", [])
			
 
				+    header_blocks = []
			
 
				+    for idx in header_block_idxes:
			
 
				+        blocks[idx].region_label = "header"
			
 
				+        header_blocks.append(blocks[idx])
			
 
				+
			
 
				+    unordered_block_idxes = config.get("unordered_block_idxes", [])
			
 
				+    unordered_blocks = []
			
 
				+    for idx in unordered_block_idxes:
			
 
				+        blocks[idx].region_label = "unordered"
			
 
				+        unordered_blocks.append(blocks[idx])
			
 
				+
			
 
				+    footer_block_idxes = config.get("footer_block_idxes", [])
			
 
				+    footer_blocks = []
			
 
				+    for idx in footer_block_idxes:
			
 
				+        blocks[idx].region_label = "footer"
			
 
				+        footer_blocks.append(blocks[idx])
			
 
				+
			
 
				+    mask_labels = ["header", "unordered", "footer"]
			
 
				+    child_labels = [
			
 
				+        "vision_footnote",
			
 
				+        "sub_paragraph_title",
			
 
				+        "doc_title_text",
			
 
				+        "vision_title",
			
 
				+    ]
			
 
				+    pre_cut_block_idxes = []
			
 
				+    for block_idx, block in enumerate(blocks):
			
 
				+        if block.label in mask_labels:
			
 
				+            continue
			
 
				+
			
 
				+        if block.region_label not in child_labels:
			
 
				+            update_region_label(blocks, config, block_idx)
			
 
				+
			
 
				+        block_direction = block.direction
			
 
				+        if block_direction == "horizontal":
			
 
				+            region_bbox_center = region_x_center
			
 
				+            tolerance_len = block.long_side_length // 5
			
 
				+        else:
			
 
				+            region_bbox_center = region_y_center
			
 
				+            tolerance_len = block.short_side_length // 10
			
 
				+
			
 
				+        block_center = (block.start_coordinate + block.end_coordinate) / 2
			
 
				+        center_offset = abs(block_center - region_bbox_center)
			
 
				+        is_centered = center_offset <= tolerance_len
			
 
				+
			
 
				+        if is_centered:
			
 
				+            pre_cut_block_idxes.append(block_idx)
			
 
				+
			
 
				+    pre_cut_list = []
			
 
				+    cut_direction = "vertical"
			
 
				+    cut_coordinates = []
			
 
				+    discontinuous = []
			
 
				+    mask_labels = child_labels + mask_labels
			
 
				+    all_boxes = np.array(
			
 
				+        [block.bbox for block in blocks if block.region_label not in mask_labels]
			
 
				+    )
			
 
				+    if pre_cut_block_idxes:
			
 
				+        horizontal_cut_num = 0
			
 
				+        for block_idx in pre_cut_block_idxes:
			
 
				+            block = blocks[block_idx]
			
 
				+            horizontal_cut_num += 1 if block.secondary_direction == "horizontal" else 0
			
 
				+        cut_direction = (
			
 
				+            "horizontal"
			
 
				+            if horizontal_cut_num > len(pre_cut_block_idxes) * 0.5
			
 
				+            else "vertical"
			
 
				+        )
			
 
				+        discontinuous = calculate_discontinuous_projection(
			
 
				+            all_boxes, direction=cut_direction
			
 
				+        )
			
 
				+        for idx in pre_cut_block_idxes:
			
 
				+            block = blocks[idx]
			
 
				+            if (
			
 
				+                block.region_label not in mask_labels
			
 
				+                and block.secondary_direction == cut_direction
			
 
				+            ):
			
 
				+                if (
			
 
				+                    block.secondary_direction_start_coordinate,
			
 
				+                    block.secondary_direction_end_coordinate,
			
 
				+                ) in discontinuous:
			
 
				+                    cut_coordinates.append(block.secondary_direction_start_coordinate)
			
 
				+                    cut_coordinates.append(block.secondary_direction_end_coordinate)
			
 
				+    if not discontinuous:
			
 
				+        discontinuous = calculate_discontinuous_projection(
			
 
				+            all_boxes, direction=cut_direction
			
 
				+        )
			
 
				+    current_interval = discontinuous[0]
			
 
				+    for interval in discontinuous[1:]:
			
 
				+        gap_len = interval[0] - current_interval[1]
			
 
				+        if gap_len > 40:
			
 
				+            cut_coordinates.append(current_interval[1])
			
 
				+        current_interval = interval
			
 
				+    overall_region_box = config.get("all_layout_region_box")
			
 
				+    cut_list = get_cut_blocks(
			
 
				+        blocks, cut_direction, cut_coordinates, overall_region_box, mask_labels
			
 
				+    )
			
 
				+    pre_cut_list.extend(cut_list)
			
 
				+
			
 
				+    return header_blocks, pre_cut_list, footer_blocks, unordered_blocks
			
 
				+
			
 
				+
			
 
				+def update_region_label(
			
 
				+    blocks: List[LayoutParsingBlock], config: Dict[str, Any], block_idx: int
			
 
				+) -> None:
			
 
				+    """
			
 
				+    Update the region label of a block based on its label and match the block with its children.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (List[LayoutParsingBlock]): The list of blocks to process.
			
 
				+        config (Dict[str, Any]): The configuration dictionary containing the necessary information.
			
 
				+        block_idx (int): The index of the current block being processed.
			
 
				+
			
 
				+    Returns:
			
 
				+        None
			
 
				+    """
			
 
				+
			
 
				+    # special title block labels
			
 
				+    doc_title_labels = config.get("doc_title_labels", [])
			
 
				+    paragraph_title_labels = config.get("paragraph_title_labels", [])
			
 
				+    vision_labels = config.get("vision_labels", [])
			
 
				+
			
 
				+    block = blocks[block_idx]
			
 
				+    if block.label in doc_title_labels:
			
 
				+        block.region_label = "doc_title"
			
 
				+    # Force the direction of vision type to be horizontal
			
 
				+    if block.label in vision_labels:
			
 
				+        block.region_label = "vision"
			
 
				+        block.update_direction_info()
			
 
				+    # some paragraph title block may be labeled as sub_title, so we need to check if block.region_label is "other"(default).
			
 
				+    if block.label in paragraph_title_labels and block.region_label == "other":
			
 
				+        block.region_label = "paragraph_title"
			
 
				+
			
 
				+    # only vision and doc title block can have child block
			
 
				+    if block.region_label not in ["vision", "doc_title", "paragraph_title"]:
			
 
				+        return
			
 
				+
			
 
				+    iou_threshold = config.get("child_block_match_iou_threshold", 0.1)
			
 
				+    # match doc title text block
			
 
				+    if block.region_label == "doc_title":
			
 
				+        text_block_idxes = config.get("text_block_idxes", [])
			
 
				+        prev_idx, post_idx = get_adjacent_blocks_by_direction(
			
 
				+            blocks, block_idx, text_block_idxes, iou_threshold
			
 
				+        )
			
 
				+        update_doc_title_child_blocks(blocks, block, prev_idx, post_idx, config)
			
 
				+    # match sub title block
			
 
				+    elif block.region_label == "paragraph_title":
			
 
				+        iou_threshold = config.get("sub_title_match_iou_threshold", 0.1)
			
 
				+        paragraph_title_block_idxes = config.get("paragraph_title_block_idxes", [])
			
 
				+        text_block_idxes = config.get("text_block_idxes", [])
			
 
				+        megred_block_idxes = text_block_idxes + paragraph_title_block_idxes
			
 
				+        prev_idx, post_idx = get_adjacent_blocks_by_direction(
			
 
				+            blocks, block_idx, megred_block_idxes, iou_threshold
			
 
				+        )
			
 
				+        update_paragraph_title_child_blocks(blocks, block, prev_idx, post_idx, config)
			
 
				+    # match vision title block
			
 
				+    elif block.region_label == "vision":
			
 
				+        # for matching vision title block
			
 
				+        vision_title_block_idxes = config.get("vision_title_block_idxes", [])
			
 
				+        # for matching vision footnote block
			
 
				+        text_block_idxes = config.get("text_block_idxes", [])
			
 
				+        megred_block_idxes = text_block_idxes + vision_title_block_idxes
			
 
				+        # Some vision title block may be matched with multiple vision title block, so we need to try multiple times
			
 
				+        for i in range(3):
			
 
				+            prev_idx, post_idx = get_adjacent_blocks_by_direction(
			
 
				+                blocks, block_idx, megred_block_idxes, iou_threshold
			
 
				+            )
			
 
				+            update_vision_child_blocks(
			
 
				+                blocks, block, megred_block_idxes, prev_idx, post_idx, config
			
 
				+            )
			
 
				+
			
 
				+
			
 
				+def get_layout_structure(
			
 
				+    blocks: List[LayoutParsingBlock],
			
 
				+    median_width: float,
			
 
				+    config: dict,
			
 
				+    threshold: float = 0.8,
			
 
				+) -> Tuple[List[Dict[str, any]], bool]:
			
 
				+    """
			
 
				+    Determine the layout cross column of blocks.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (List[Dict[str, any]]): List of block dictionaries containing 'label' and 'block_bbox'.
			
 
				+        median_width (float): Median width of text blocks.
			
 
				+        no_mask_labels (List[str]): Labels of blocks to be considered for layout analysis.
			
 
				+        threshold (float): Threshold for determining layout overlap.
			
 
				+
			
 
				+    Returns:
			
 
				+        Tuple[List[Dict[str, any]], bool]: Updated list of blocks with layout information and a boolean
			
 
				+        indicating if the cross layout area is greater than the single layout area.
			
 
				+    """
			
 
				+    blocks.sort(
			
 
				+        key=lambda x: (x.bbox[0], x.width),
			
 
				+    )
			
 
				+    check_single_layout = {}
			
 
				+
			
 
				+    doc_title_labels = config.get("doc_title_labels", [])
			
 
				+    region_box = config.get("all_layout_region_box", [0, 0, 0, 0])
			
 
				+    for block_idx, block in enumerate(blocks):
			
 
				+        cover_count = 0
			
 
				+        match_block_with_threshold_indexes = []
			
 
				+
			
 
				+        for ref_idx, ref_block in enumerate(blocks):
			
 
				+            if block_idx == ref_idx:
			
 
				+                continue
			
 
				+
			
 
				+            bbox_iou = calculate_iou(block.bbox, ref_block.bbox)
			
 
				+            if bbox_iou > 0:
			
 
				+                if block.region_label == "vision" or block.area < ref_block.area:
			
 
				+                    block.region_label = "cross_text"
			
 
				+                    break
			
 
				+
			
 
				+            match_projection_iou = calculate_projection_iou(
			
 
				+                block.bbox,
			
 
				+                ref_block.bbox,
			
 
				+                "horizontal",
			
 
				+            )
			
 
				+
			
 
				+            if match_projection_iou > 0:
			
 
				+                cover_count += 1
			
 
				+                if match_projection_iou > threshold:
			
 
				+                    match_block_with_threshold_indexes.append(
			
 
				+                        (ref_idx, match_projection_iou),
			
 
				+                    )
			
 
				+                if ref_block.bbox[2] >= block.bbox[2]:
			
 
				+                    break
			
 
				+
			
 
				+        block_center = (block.bbox[0] + block.bbox[2]) / 2
			
 
				+        region_bbox_center = (region_box[0] + region_box[2]) / 2
			
 
				+        center_offset = abs(block_center - region_bbox_center)
			
 
				+        is_centered = center_offset <= median_width * 0.05
			
 
				+        width_gather_than_median = block.width > median_width * 1.3
			
 
				+
			
 
				+        if (
			
 
				+            cover_count >= 2
			
 
				+            and block.label not in doc_title_labels
			
 
				+            and (width_gather_than_median != is_centered)
			
 
				+        ):
			
 
				+            block.region_label = (
			
 
				+                "cross_reference" if block.label == "reference" else "cross_text"
			
 
				+            )
			
 
				+        else:
			
 
				+            check_single_layout[block_idx] = match_block_with_threshold_indexes
			
 
				+
			
 
				+    # Check single-layout block
			
 
				+    for idx, single_layout in check_single_layout.items():
			
 
				+        if single_layout:
			
 
				+            index, match_iou = single_layout[-1]
			
 
				+            if match_iou > 0.9 and blocks[index].region_label == "cross_text":
			
 
				+                blocks[idx].region_label = (
			
 
				+                    "cross_reference" if block.label == "reference" else "cross_text"
			
 
				+                )
			
 
				+
			
 
				+
			
 
				+def sort_by_xycut(
			
 
				+    block_bboxes: List,
			
 
				+    direction: int = 0,
			
 
				+    min_gap: int = 1,
			
 
				+) -> List[int]:
			
 
				+    """
			
 
				+    Sort bounding boxes using recursive XY cut method based on the specified direction.
			
 
				+
			
 
				+    Args:
			
 
				+        block_bboxes (Union[np.ndarray, List[List[int]]]): An array or list of bounding boxes,
			
 
				+                                                           where each box is represented as
			
 
				+                                                           [x_min, y_min, x_max, y_max].
			
 
				+        direction (int): Direction for the initial cut. Use 1 for Y-axis first and 0 for X-axis first.
			
 
				+                         Defaults to 0.
			
 
				+        min_gap (int): Minimum gap width to consider a separation between segments. Defaults to 1.
			
 
				+
			
 
				+    Returns:
			
 
				+        List[int]: A list of indices representing the order of sorted bounding boxes.
			
 
				+    """
			
 
				+    block_bboxes = np.asarray(block_bboxes).astype(int)
			
 
				+    res = []
			
 
				+    if direction == 1:
			
 
				+        recursive_yx_cut(
			
 
				+            block_bboxes,
			
 
				+            np.arange(len(block_bboxes)).tolist(),
			
 
				+            res,
			
 
				+            min_gap,
			
 
				+        )
			
 
				+    else:
			
 
				+        recursive_xy_cut(
			
 
				+            block_bboxes,
			
 
				+            np.arange(len(block_bboxes)).tolist(),
			
 
				+            res,
			
 
				+            min_gap,
			
 
				+        )
			
 
				+    return res
			
 
				+
			
 
				+
			
 
				+def match_unsorted_blocks(
			
 
				+    sorted_blocks: List[LayoutParsingBlock],
			
 
				+    unsorted_blocks: List[LayoutParsingBlock],
			
 
				+    config: Dict,
			
 
				+    median_width: int,
			
 
				+) -> List[LayoutParsingBlock]:
			
 
				+    """
			
 
				+    Match special blocks with the sorted blocks based on their region labels.
			
 
				+    Args:
			
 
				+        sorted_blocks (List[LayoutParsingBlock]): Sorted blocks to be matched.
			
 
				+        unsorted_blocks (List[LayoutParsingBlock]): Unsorted blocks to be matched.
			
 
				+        config (Dict): Configuration dictionary containing various parameters.
			
 
				+        median_width (int): Median width value used for calculations.
			
 
				+
			
 
				+    Returns:
			
 
				+        List[LayoutParsingBlock]: The updated sorted blocks after matching special blocks.
			
 
				+    """
			
 
				+    distance_type_map = {
			
 
				+        "cross_text": weighted_distance_insert,
			
 
				+        "paragraph_title": weighted_distance_insert,
			
 
				+        "doc_title": weighted_distance_insert,
			
 
				+        "vision_title": weighted_distance_insert,
			
 
				+        "vision": weighted_distance_insert,
			
 
				+        "cross_reference": reference_insert,
			
 
				+        "unordered": manhattan_insert,
			
 
				+        "other": manhattan_insert,
			
 
				+    }
			
 
				+
			
 
				+    unsorted_blocks = sort_blocks(unsorted_blocks, median_width, reverse=False)
			
 
				+    for idx, block in enumerate(unsorted_blocks):
			
 
				+        region_label = block.region_label
			
 
				+        if idx == 0 and region_label == "doc_title":
			
 
				+            sorted_blocks.insert(0, block)
			
 
				+            continue
			
 
				+        sorted_blocks = distance_type_map[region_label](
			
 
				+            block, sorted_blocks, config, median_width
			
 
				+        )
			
 
				+    return sorted_blocks
			
 
				+
			
 
				+
			
 
				+def xycut_enhanced(
			
 
				+    blocks: List[LayoutParsingBlock], config: Dict
			
 
				+) -> List[LayoutParsingBlock]:
			
 
				+    """
			
 
				+    xycut_enhance function performs the following steps:
			
 
				+        1. Preprocess the input blocks by extracting headers, footers, and pre-cut blocks.
			
 
				+        2. Mask blocks that are crossing different blocks.
			
 
				+        3. Perform xycut_enhanced algorithm on the remaining blocks.
			
 
				+        4. Match special blocks with the sorted blocks based on their region labels.
			
 
				+        5. Update child blocks of the sorted blocks based on their parent blocks.
			
 
				+        6. Return the ordered result list.
			
 
				+
			
 
				+    Args:
			
 
				+        blocks (List[LayoutParsingBlock]): Input blocks to be processed.
			
 
				+
			
 
				+    Returns:
			
 
				+        List[LayoutParsingBlock]: Ordered result list after processing.
			
 
				+    """
			
 
				+    if len(blocks) == 0:
			
 
				+        return blocks
			
 
				+
			
 
				+    text_labels = config.get("text_labels", [])
			
 
				+    header_blocks, pre_cut_list, footer_blocks, unordered_blocks = pre_process(
			
 
				+        blocks, config
			
 
				+    )
			
 
				+    final_order_res_list: List[LayoutParsingBlock] = []
			
 
				+
			
 
				+    header_blocks = sort_blocks(header_blocks)
			
 
				+    footer_blocks = sort_blocks(footer_blocks)
			
 
				+    unordered_blocks = sort_blocks(unordered_blocks)
			
 
				+
			
 
				+    final_order_res_list.extend(header_blocks)
			
 
				+
			
 
				+    unsorted_blocks: List[LayoutParsingBlock] = []
			
 
				+    sorted_blocks_by_pre_cuts = []
			
 
				+    for pre_cut_blocks in pre_cut_list:
			
 
				+        sorted_blocks: List[LayoutParsingBlock] = []
			
 
				+        doc_title_blocks: List[LayoutParsingBlock] = []
			
 
				+        xy_cut_blocks: List[LayoutParsingBlock] = []
			
 
				+        pre_cut_blocks: List[LayoutParsingBlock]
			
 
				+        median_width = 1
			
 
				+        text_block_width = [
			
 
				+            block.width for block in pre_cut_blocks if block.label in text_labels
			
 
				+        ]
			
 
				+        if len(text_block_width) > 0:
			
 
				+            median_width = int(np.median(text_block_width))
			
 
				+
			
 
				+        get_layout_structure(
			
 
				+            pre_cut_blocks,
			
 
				+            median_width,
			
 
				+            config,
			
 
				+        )
			
 
				+
			
 
				+        # Get xy cut blocks and add other blocks in special_block_map
			
 
				+        for block in pre_cut_blocks:
			
 
				+            if block.region_label not in [
			
 
				+                "cross_text",
			
 
				+                "cross_reference",
			
 
				+                "doc_title",
			
 
				+                "unordered",
			
 
				+            ]:
			
 
				+                xy_cut_blocks.append(block)
			
 
				+            elif block.label == "doc_title":
			
 
				+                doc_title_blocks.append(block)
			
 
				+            else:
			
 
				+                unsorted_blocks.append(block)
			
 
				+
			
 
				+        if len(xy_cut_blocks) > 0:
			
 
				+            block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
			
 
				+            block_text_lines = [block.num_of_lines for block in xy_cut_blocks]
			
 
				+            discontinuous = calculate_discontinuous_projection(
			
 
				+                block_bboxes, direction="horizontal"
			
 
				+            )
			
 
				+            if len(discontinuous) == 1 or max(block_text_lines) == 1:
			
 
				+                xy_cut_blocks.sort(key=lambda x: (x.bbox[1] // 5, x.bbox[0]))
			
 
				+                xy_cut_blocks = shrink_overlapping_boxes(xy_cut_blocks, "vertical")
			
 
				+                block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
			
 
				+                sorted_indexes = sort_by_xycut(block_bboxes, direction=1, min_gap=1)
			
 
				+            else:
			
 
				+                xy_cut_blocks.sort(key=lambda x: (x.bbox[0] // 20, x.bbox[1]))
			
 
				+                xy_cut_blocks = shrink_overlapping_boxes(xy_cut_blocks, "horizontal")
			
 
				+                block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
			
 
				+                sorted_indexes = sort_by_xycut(block_bboxes, direction=0, min_gap=20)
			
 
				+
			
 
				+            sorted_blocks = [xy_cut_blocks[i] for i in sorted_indexes]
			
 
				+
			
 
				+        sorted_blocks = match_unsorted_blocks(
			
 
				+            sorted_blocks,
			
 
				+            doc_title_blocks,
			
 
				+            config,
			
 
				+            median_width,
			
 
				+        )
			
 
				+
			
 
				+        sorted_blocks_by_pre_cuts.extend(sorted_blocks)
			
 
				+
			
 
				+    median_width = 1
			
 
				+    text_block_width = [block.width for block in blocks if block.label in text_labels]
			
 
				+    if len(text_block_width) > 0:
			
 
				+        median_width = int(np.median(text_block_width))
			
 
				+    final_order_res_list = match_unsorted_blocks(
			
 
				+        sorted_blocks_by_pre_cuts,
			
 
				+        unsorted_blocks,
			
 
				+        config,
			
 
				+        median_width,
			
 
				+    )
			
 
				+
			
 
				+    final_order_res_list.extend(footer_blocks)
			
 
				+    final_order_res_list.extend(unordered_blocks)
			
 
				+
			
 
				+    index = 0
			
 
				+    visualize_index_labels = config.get("visualize_index_labels", [])
			
 
				+    for block_idx, block in enumerate(final_order_res_list):
			
 
				+        if block.label not in visualize_index_labels:
			
 
				+            continue
			
 
				+        final_order_res_list = insert_child_blocks(
			
 
				+            block, block_idx, final_order_res_list
			
 
				+        )
			
 
				+        block = final_order_res_list[block_idx]
			
 
				+        index += 1
			
 
				+        block.index = index
			
 
				+    return final_order_res_list