11 tháng trước cách đây · 831b6d21a2
--- a/api_examples/pipelines/test_doc_preprocessor.py
+++ b/api_examples/pipelines/test_doc_preprocessor.py
@@ -0,0 +1,15 @@
 
				+
			
 
				+from paddlex import create_pipeline
			
 
				+
			
 
				+pipeline = create_pipeline(pipeline="doc_preprocessor")
			
 
				+
			
 
				+test_img_path = "./test_imgs/img_rot180_demo.jpg"
			
 
				+# test_img_path = "./test_imgs/doc_distort_test.jpg"
			
 
				+
			
 
				+output = pipeline.predict(test_img_path, 
			
 
				+    use_doc_orientation_classify=True,
			
 
				+    use_doc_unwarping=True)
			
 
				+
			
 
				+for res in output:
			
 
				+    print(res)
			
 
				+    res.save_to_img("./output")
			
--- a/api_examples/pipelines/test_layout_parsing.py
+++ b/api_examples/pipelines/test_layout_parsing.py
@@ -0,0 +1,19 @@
 
				+
			
 
				+from paddlex import create_pipeline
			
 
				+
			
 
				+pipeline = create_pipeline(pipeline="layout_parsing")
			
 
				+
			
 
				+output = pipeline.predict("./test_imgs/test_layout_parsing.jpg",
			
 
				+    use_doc_orientation_classify=True,
			
 
				+    use_doc_unwarping=True,
			
 
				+    use_common_ocr=True,
			
 
				+    use_seal_recognition=True,
			
 
				+    use_table_recognition=True)
			
 
				+
			
 
				+# output = pipeline("./test_imgs/demo_paper.png")
			
 
				+# output = pipeline("./test_imgs/table_recognition.jpg")
			
 
				+# output = pipeline.predict("./test_imgs/seal_text_det.png")
			
 
				+# output = pipeline.predict("./test_imgs/img_rot180_demo.jpg")
			
 
				+for res in output:
			
 
				+    # print(res)
			
 
				+    res.save_results("./output")
			
--- a/api_examples/pipelines/test_ocr.py
+++ b/api_examples/pipelines/test_ocr.py
@@ -0,0 +1,13 @@
 
				+
			
 
				+from paddlex import create_pipeline
			
 
				+
			
 
				+pipeline = create_pipeline(pipeline="OCR")
			
 
				+
			
 
				+# output = pipeline.predict("./test_imgs/general_ocr_002.png")
			
 
				+
			
 
				+output = pipeline.predict("./test_imgs/seal_text_det.png")
			
 
				+for res in output:
			
 
				+    print(res)
			
 
				+    res.save_to_img("./output")
			
 
				+
			
 
				+
			
--- a/api_examples/pipelines/test_pp_chatocrv3.py
+++ b/api_examples/pipelines/test_pp_chatocrv3.py
@@ -0,0 +1,37 @@
 
				+
			
 
				+from paddlex import create_pipeline
			
 
				+
			
 
				+pipeline = create_pipeline(pipeline="PP-ChatOCRv3-doc")
			
 
				+
			
 
				+# img_path = "./test_demo_imgs/vehicle_certificate-1.png"
			
 
				+# key_list = ['驾驶室准乘人数']
			
 
				+
			
 
				+# img_path = "./test_demo_imgs/test_layout_parsing.jpg"
			
 
				+# key_list = ['3.2的标题']
			
 
				+
			
 
				+img_path = "./test_demo_imgs/seal_text_det.png"
			
 
				+key_list = ['印章上公司']
			
 
				+
			
 
				+# visual_predict_res = pipeline.visual_predict(img_path, 
			
 
				+#     use_doc_orientation_classify=True,
			
 
				+#     use_doc_unwarping=True,
			
 
				+#     use_common_ocr=True,
			
 
				+#     use_seal_recognition=True,
			
 
				+#     use_table_recognition=True)
			
 
				+
			
 
				+# ####[TODO] 增加类别信息
			
 
				+# visual_info_list = []
			
 
				+# for res in visual_predict_res:
			
 
				+#     visual_info_list.append(res["visual_info"])
			
 
				+
			
 
				+# pipeline.save_visual_info_list(visual_info_list, "./res_visual_info/visual_info3.json")
			
 
				+
			
 
				+visual_info_list = pipeline.load_visual_info_list("./res_visual_info/visual_info3.json")
			
 
				+
			
 
				+vector_info = pipeline.build_vector(visual_info_list)
			
 
				+
			
 
				+print(vector_info)
			
 
				+
			
 
				+final_results = pipeline.chat(visual_info_list, key_list, vector_info)
			
 
				+
			
 
				+print(final_results)
			
--- a/api_examples/pipelines/test_table_recognition.py
+++ b/api_examples/pipelines/test_table_recognition.py
@@ -0,0 +1,13 @@
 
				+
			
 
				+from paddlex import create_pipeline
			
 
				+
			
 
				+pipeline = create_pipeline(pipeline="table_recognition")
			
 
				+
			
 
				+output = pipeline("./test_imgs/table_recognition.jpg")
			
 
				+for res in output:
			
 
				+    print(res)
			
 
				+    res.save_to_img("./output/") ## 保存img格式结果
			
 
				+    res.save_to_xlsx("./output/") ## 保存表格格式结果
			
 
				+    res.save_to_html("./output/") ## 保存html结果
			
 
				+
			
 
				+
			
--- a/paddlex/configs/pipelines/OCR.yaml
+++ b/paddlex/configs/pipelines/OCR.yaml
@@ -0,0 +1,36 @@
 
				+
			
 
				+pipeline_name: OCR
			
 
				+
			
 
				+##############################################
			
 
				+####### Config for Common OCR
			
 
				+##############################################
			
 
				+
			
 
				+input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_001.png
			
 
				+text_type: common
			
 
				+
			
 
				+SubModules:
			
 
				+  TextDetection:
			
 
				+    model_name: PP-OCRv4_mobile_det
			
 
				+    model_dir: null
			
 
				+    batch_size: 1    
			
 
				+  TextRecognition:
			
 
				+    model_name: PP-OCRv4_mobile_rec
			
 
				+    model_dir: null
			
 
				+    batch_size: 1
			
 
				+
			
 
				+##############################################
			
 
				+####### Config for Seal OCR
			
 
				+##############################################
			
 
				+
			
 
				+# input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/seal_text_det.png
			
 
				+# text_type: seal
			
 
				+
			
 
				+# SubModules:
			
 
				+#   TextDetection:
			
 
				+#     model_name: PP-OCRv4_mobile_seal_det
			
 
				+#     model_dir: null
			
 
				+#     batch_size: 1    
			
 
				+#   TextRecognition:
			
 
				+#     model_name: PP-OCRv4_mobile_rec
			
 
				+#     model_dir: null
			
 
				+#     batch_size: 1
			
--- a/paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml
+++ b/paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml
@@ -0,0 +1,109 @@
 
				+
			
 
				+pipeline_name: PP-ChatOCRv3-doc
			
 
				+input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/demo_paper.png
			
 
				+
			
 
				+use_vector_retrieval: True
			
 
				+
			
 
				+SubModules:
			
 
				+  LLM_Chat:
			
 
				+    model_name: ernie-3.5
			
 
				+    api_type: qianfan
			
 
				+    # ak: "api_key" # Set this to a real API key
			
 
				+    # sk: "secret_key"  # Set this to a real secret key
			
 
				+    ak: 4iiqB0QfvXTAENgzUwNeDjQ7
			
 
				+    sk: sHQCw4l5A6jnzbHMa0ZvDi05GT9Qz8tZ
			
 
				+
			
 
				+  LLM_Retriever:
			
 
				+    model_name: ernie-3.5
			
 
				+    api_type: qianfan
			
 
				+    # ak: "api_key" # Set this to a real API key
			
 
				+    # sk: "secret_key"  # Set this to a real secret key
			
 
				+    ak: 4iiqB0QfvXTAENgzUwNeDjQ7
			
 
				+    sk: sHQCw4l5A6jnzbHMa0ZvDi05GT9Qz8tZ
			
 
				+
			
 
				+  PromptEngneering:
			
 
				+    KIE_CommonText:
			
 
				+      task_type: text_kie_prompt
			
 
				+      task_description: '你现在的任务是从OCR文字识别的结果中提取关键词列表中每一项对应的关键信息。
			
 
				+          OCR的文字识别结果使用```符号包围，包含所识别出来的文字，顺序在原始图片中从左至右、从上至下。
			
 
				+          我指定的关键词列表使用[]符号包围。请注意OCR的文字识别结果可能存在长句子换行被切断、不合理的分词、
			
 
				+          文字被错误合并等问题，你需要结合上下文语义进行综合判断，以抽取准确的关键信息。'
			
 
				+      output_format: '在返回结果时使用JSON格式，包含多个key-value对，key值为我指定的关键词，value值为所抽取的结果。
			
 
				+          如果认为OCR识别结果中没有关键词key对应的value，则将value赋值为"未知"。请只输出json格式的结果，
			
 
				+          并做json格式校验后返回，不要包含其它多余文字！'
			
 
				+      rules_str:
			
 
				+      few_shot_demo_text_content:
			
 
				+      few_shot_demo_key_value_list:
			
 
				+          
			
 
				+    KIE_Table:
			
 
				+      task_type: table_kie_prompt
			
 
				+      task_description: '你现在的任务是从输入的html格式的表格内容中提取关键词列表中每一项对应的关键信息,
			
 
				+          表格内容用```符号包围，我指定的关键词列表使用[]符号包围。你需要结合上下文语义进行综合判断，以抽取准确的关键信息。
			
 
				+          在返回结果时使用JSON格式，包含多个key-value对，key值为我指定的关键词，value值为所抽取的结果。
			
 
				+          如果认为输入的表格内容中没有关键词key对应的value值，则将value赋值为"未知"。
			
 
				+          请只输出json格式的结果，并做json格式校验后返回，不要包含其它多余文字！'
			
 
				+      output_format: '在返回结果时使用JSON格式，包含多个key-value对，key值为我指定的关键词，value值为所抽取的结果。
			
 
				+          如果认为表格识别结果中没有关键词key对应的value，则将value赋值为"未知"。请只输出json格式的结果，
			
 
				+          并做json格式校验后返回，不要包含其它多余文字！'
			
 
				+      rules_str:
			
 
				+      few_shot_demo_text_content:
			
 
				+      few_shot_demo_key_value_list:
			
 
				+
			
 
				+SubPipelines:
			
 
				+  LayoutParser:
			
 
				+    pipeline_name: layout_parsing
			
 
				+    use_doc_preprocessor: True
			
 
				+    use_common_ocr: True
			
 
				+    use_seal_recognition: True
			
 
				+    use_table_recognition: True
			
 
				+
			
 
				+    SubModules:
			
 
				+      LayoutDetection:
			
 
				+        model_name: RT-DETR-H_layout_3cls
			
 
				+        model_dir: null
			
 
				+        batch_size: 1
			
 
				+      TableStructurePredictor:
			
 
				+        model_name: SLANet_plus
			
 
				+        model_dir: null
			
 
				+        batch_size: 1
			
 
				+
			
 
				+    SubPipelines:
			
 
				+      DocPreprocessor:
			
 
				+        pipeline_name: doc_preprocessor
			
 
				+        use_doc_orientation_classify: True
			
 
				+        use_doc_unwarping: True
			
 
				+        SubModules:
			
 
				+          DocOrientationClassify:
			
 
				+            model_name: PP-LCNet_x1_0_doc_ori
			
 
				+            model_dir: null
			
 
				+            batch_size: 1
			
 
				+          DocUnwarping:
			
 
				+            model_name: UVDoc
			
 
				+            model_dir: null
			
 
				+            batch_size: 1
			
 
				+
			
 
				+      CommonOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        text_type: common
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            model_name: PP-OCRv4_server_det
			
 
				+            model_dir: null
			
 
				+            batch_size: 1    
			
 
				+          TextRecognition:
			
 
				+            model_name: PP-OCRv4_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 1
			
 
				+
			
 
				+      SealOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        text_type: seal
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            model_name: PP-OCRv4_server_seal_det
			
 
				+            model_dir: null
			
 
				+            batch_size: 1    
			
 
				+          TextRecognition:
			
 
				+            model_name: PP-OCRv4_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 1  
			
--- a/paddlex/configs/pipelines/doc_preprocessor.yaml
+++ b/paddlex/configs/pipelines/doc_preprocessor.yaml
@@ -0,0 +1,16 @@
 
				+
			
 
				+pipeline_name: doc_preprocessor
			
 
				+input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/img_rot180_demo.jpg
			
 
				+#input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/doc_test.jpg
			
 
				+use_doc_orientation_classify: True
			
 
				+use_doc_unwarping: True
			
 
				+
			
 
				+SubModules:
			
 
				+  DocOrientationClassify:
			
 
				+    model_name: PP-LCNet_x1_0_doc_ori
			
 
				+    model_dir: null
			
 
				+    batch_size: 1
			
 
				+  DocUnwarping:
			
 
				+    model_name: UVDoc
			
 
				+    model_dir: null
			
 
				+    batch_size: 1
			
--- a/paddlex/configs/pipelines/layout_parsing.yaml
+++ b/paddlex/configs/pipelines/layout_parsing.yaml
@@ -0,0 +1,56 @@
 
				+
			
 
				+pipeline_name: layout_parsing
			
 
				+input: https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/demo_paper.png
			
 
				+use_doc_preprocessor: True
			
 
				+use_common_ocr: True
			
 
				+use_seal_recognition: True
			
 
				+use_table_recognition: True
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    model_name: RT-DETR-H_layout_3cls
			
 
				+    model_dir: null
			
 
				+    batch_size: 1
			
 
				+  TableStructurePredictor:
			
 
				+    model_name: SLANet_plus
			
 
				+    model_dir: null
			
 
				+    batch_size: 1
			
 
				+
			
 
				+SubPipelines:
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: True
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 1
			
 
				+      DocUnwarping:
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
 
				+        batch_size: 1
			
 
				+  CommonOCR:
			
 
				+    pipeline_name: OCR
			
 
				+    text_type: common
			
 
				+    SubModules:
			
 
				+      TextDetection:
			
 
				+        model_name: PP-OCRv4_server_det
			
 
				+        model_dir: null
			
 
				+        batch_size: 1    
			
 
				+      TextRecognition:
			
 
				+        model_name: PP-OCRv4_server_rec
			
 
				+        model_dir: null
			
 
				+        batch_size: 1
			
 
				+  SealOCR:
			
 
				+    pipeline_name: OCR
			
 
				+    text_type: seal
			
 
				+    SubModules:
			
 
				+      TextDetection:
			
 
				+        model_name: PP-OCRv4_server_seal_det
			
 
				+        model_dir: null
			
 
				+        batch_size: 1    
			
 
				+      TextRecognition:
			
 
				+        model_name: PP-OCRv4_server_rec
			
 
				+        model_dir: null
			
 
				+        batch_size: 1
			
--- a/paddlex/inference/__init__.py
+++ b/paddlex/inference/__init__.py
@@ -13,5 +13,9 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 from .models import create_predictor
			
 
				-from .pipelines import create_pipeline
			
 
				+from ..utils import flags
			
 
				+if flags.USE_NEW_INFERENCE:
			
 
				+    from .pipelines_new import create_pipeline
			
 
				+else:
			
 
				+    from .pipelines import create_pipeline
			
 
				 from .utils.pp_option import PaddlePredictorOption
			
--- a/paddlex/inference/pipelines_new/__init__.py
+++ b/paddlex/inference/pipelines_new/__init__.py
@@ -0,0 +1,95 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Dict, Optional
			
 
				+from .base import BasePipeline
			
 
				+from ...utils.config import parse_config
			
 
				+
			
 
				+# from .single_model_pipeline import (
			
 
				+#     _SingleModelPipeline,
			
 
				+#     ImageClassification,
			
 
				+#     ObjectDetection,
			
 
				+#     InstanceSegmentation,
			
 
				+#     SemanticSegmentation,
			
 
				+#     TSFc,
			
 
				+#     TSAd,
			
 
				+#     TSCls,
			
 
				+#     MultiLableImageClas,
			
 
				+#     SmallObjDet,
			
 
				+#     AnomalyDetection,
			
 
				+# )
			
 
				+# from .ocr import OCRPipeline
			
 
				+# from .formula_recognition import FormulaRecognitionPipeline
			
 
				+# from .table_recognition import TableRecPipeline
			
 
				+# from .face_recognition import FaceRecPipeline
			
 
				+# from .seal_recognition import SealOCRPipeline
			
 
				+# from .ppchatocrv3 import PPChatOCRPipeline
			
 
				+# from .layout_parsing import LayoutParsingPipeline
			
 
				+# from .pp_shitu_v2 import ShiTuV2Pipeline
			
 
				+# from .attribute_recognition import AttributeRecPipeline
			
 
				+
			
 
				+from .ocr import OCRPipeline
			
 
				+from .doc_preprocessor import DocPreprocessorPipeline
			
 
				+from .layout_parsing import LayoutParsingPipeline
			
 
				+from .pp_chatocrv3_doc import PP_ChatOCRv3_doc_Pipeline
			
 
				+
			
 
				+def get_pipeline_path(pipeline_name):
			
 
				+    pipeline_path = (
			
 
				+        Path(__file__).parent.parent.parent / "configs/pipelines" / f"{pipeline_name}.yaml"
			
 
				+    ).resolve()
			
 
				+    if not Path(pipeline_path).exists():
			
 
				+        return None
			
 
				+    return pipeline_path
			
 
				+
			
 
				+def load_pipeline_config(pipeline_name: str) -> Dict[str, Any]:
			
 
				+    if not Path(pipeline_name).exists():
			
 
				+        pipeline_path = get_pipeline_path(pipeline_name)
			
 
				+        if pipeline_path is None:
			
 
				+            raise Exception(
			
 
				+                f"The pipeline ({pipeline_name}) does not exist! Please use a pipeline name or a config file path!"
			
 
				+            )
			
 
				+    else:
			
 
				+        pipeline_path = pipeline_name
			
 
				+    config = parse_config(pipeline_path)
			
 
				+    return config
			
 
				+
			
 
				+def create_pipeline(
			
 
				+    pipeline: str,
			
 
				+    device=None,
			
 
				+    pp_option=None,
			
 
				+    use_hpip: bool = False,
			
 
				+    hpi_params: Optional[Dict[str, Any]] = None,
			
 
				+    *args,
			
 
				+    **kwargs,
			
 
				+) -> BasePipeline:
			
 
				+    """build model evaluater
			
 
				+
			
 
				+    Args:
			
 
				+        pipeline (str): the pipeline name, that is name of pipeline class
			
 
				+
			
 
				+    Returns:
			
 
				+        BasePipeline: the pipeline, which is subclass of BasePipeline.
			
 
				+    """
			
 
				+    pipeline_name = pipeline
			
 
				+    config = load_pipeline_config(pipeline_name)
			
 
				+    assert pipeline_name == config["pipeline_name"]
			
 
				+    pipeline = BasePipeline.get(pipeline_name)(
			
 
				+        config=config,
			
 
				+        device=device,
			
 
				+        pp_option=pp_option,
			
 
				+        use_hpip=use_hpip,
			
 
				+        hpi_params=hpi_params)
			
 
				+    return pipeline
			
 
				+    
			
--- a/paddlex/inference/pipelines_new/base.py
+++ b/paddlex/inference/pipelines_new/base.py
@@ -0,0 +1,94 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from abc import ABC, abstractmethod
			
 
				+from ...utils.subclass_register import AutoRegisterABCMetaClass
			
 
				+import yaml
			
 
				+import codecs
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Dict, Optional
			
 
				+from ..models import create_predictor
			
 
				+from .components.chat_server.base import BaseChat
			
 
				+from .components.retriever.base import BaseRetriever
			
 
				+from .components.prompt_engeering.base import BaseGeneratePrompt
			
 
				+
			
 
				+class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
			
 
				+    """Base Pipeline"""
			
 
				+
			
 
				+    __is_base = True
			
 
				+
			
 
				+    def __init__(self,
			
 
				+        device=None, 
			
 
				+        pp_option=None, 
			
 
				+        use_hpip: bool = False, 
			
 
				+        hpi_params: Optional[Dict[str, Any]] = None) -> None:
			
 
				+        super().__init__()
			
 
				+        self.device = device
			
 
				+        self.pp_option = pp_option
			
 
				+        self.use_hpip = use_hpip
			
 
				+        self.hpi_params = hpi_params
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def predict(self, input, **kwargs):
			
 
				+        raise NotImplementedError(
			
 
				+            "The method `predict` has not been implemented yet."
			
 
				+        )
			
 
				+    
			
 
				+    def create_model(self, config):
			
 
				+
			
 
				+        model_dir = config['model_dir']
			
 
				+        if model_dir == None:
			
 
				+            model_dir = config['model_name']
			
 
				+
			
 
				+        model = create_predictor(
			
 
				+            model_dir,
			
 
				+            device=self.device,
			
 
				+            pp_option=self.pp_option,
			
 
				+            use_hpip=self.use_hpip,
			
 
				+            hpi_params=self.hpi_params)
			
 
				+
			
 
				+        ########### [TODO]支持初始化传参能力
			
 
				+        if "batch_size" in config:
			
 
				+            batch_size = config["batch_size"]
			
 
				+            model.set_predictor(batch_size=batch_size)
			
 
				+
			
 
				+        return model
			
 
				+
			
 
				+    def create_pipeline(self, config):
			
 
				+        pipeline_name = config['pipeline_name']
			
 
				+        pipeline = BasePipeline.get(pipeline_name)(
			
 
				+            config=config,
			
 
				+            device=self.device,
			
 
				+            pp_option=self.pp_option,
			
 
				+            use_hpip=self.use_hpip,
			
 
				+            hpi_params=self.hpi_params)
			
 
				+        return pipeline 
			
 
				+
			
 
				+    def create_chat_bot(self, config):
			
 
				+        model_name = config['model_name']
			
 
				+        chat_bot = BaseChat.get(model_name)(config)
			
 
				+        return chat_bot     
			
 
				+
			
 
				+    def create_retriever(self, config):
			
 
				+        model_name = config['model_name']
			
 
				+        retriever = BaseRetriever.get(model_name)(config)
			
 
				+        return retriever    
			
 
				+
			
 
				+    def create_prompt_engeering(self, config):
			
 
				+        task_type = config['task_type']
			
 
				+        pe = BaseGeneratePrompt.get(task_type)(config)
			
 
				+        return pe       
			
 
				+
			
 
				+    def __call__(self, input, **kwargs):
			
 
				+        return self.predict(input, **kwargs)
			
--- a/paddlex/inference/pipelines_new/components/__init__.py
+++ b/paddlex/inference/pipelines_new/components/__init__.py
@@ -0,0 +1,19 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .base import BaseComponent, CVResult, BaseResult
			
 
				+from .common import SortQuadBoxes
			
 
				+from .common import CropByPolys
			
 
				+from .common import CropByBoxes
			
 
				+from .utils.mixin import HtmlMixin, XlsxMixin
			
--- a/paddlex/inference/pipelines_new/components/base.py
+++ b/paddlex/inference/pipelines_new/components/base.py
@@ -0,0 +1,59 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from abc import ABC, abstractmethod
			
 
				+from ....utils.subclass_register import AutoRegisterABCMetaClass
			
 
				+
			
 
				+import inspect
			
 
				+
			
 
				+from ....utils.func_register import FuncRegister
			
 
				+from ...utils.io import ImageReader, ImageWriter
			
 
				+from .utils.mixin import JsonMixin, ImgMixin, StrMixin
			
 
				+
			
 
				+class BaseComponent(ABC, metaclass=AutoRegisterABCMetaClass):
			
 
				+    """Base Component"""
			
 
				+
			
 
				+    __is_base = True
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super().__init__()
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def __call__(self):
			
 
				+        raise NotImplementedError(
			
 
				+            "The component method `__call__` has not been implemented yet.")
			
 
				+
			
 
				+
			
 
				+class BaseResult(dict, StrMixin, JsonMixin):
			
 
				+    def __init__(self, data):
			
 
				+        super().__init__(data)
			
 
				+        self._show_funcs = []
			
 
				+        StrMixin.__init__(self)
			
 
				+        JsonMixin.__init__(self)
			
 
				+
			
 
				+    def save_all(self, save_path):
			
 
				+        for func in self._show_funcs:
			
 
				+            signature = inspect.signature(func)
			
 
				+            if "save_path" in signature.parameters:
			
 
				+                func(save_path=save_path)
			
 
				+            else:
			
 
				+                func()
			
 
				+
			
 
				+class CVResult(BaseResult, ImgMixin):
			
 
				+    def __init__(self, data):
			
 
				+        super().__init__(data)
			
 
				+        ImgMixin.__init__(self, "pillow")
			
 
				+        self._img_reader = ImageReader(backend="pillow")
			
 
				+        self._img_writer = ImageWriter(backend="pillow")
			
 
				+
			
--- a/paddlex/inference/pipelines_new/components/chat_server/__init__.py
+++ b/paddlex/inference/pipelines_new/components/chat_server/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .ernie_bot_chat import ErnieBotChat
			
--- a/paddlex/inference/pipelines_new/components/chat_server/base.py
+++ b/paddlex/inference/pipelines_new/components/chat_server/base.py
@@ -0,0 +1,31 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from abc import ABC, abstractmethod
			
 
				+from .....utils.subclass_register import AutoRegisterABCMetaClass
			
 
				+
			
 
				+import inspect
			
 
				+
			
 
				+class BaseChat(ABC, metaclass=AutoRegisterABCMetaClass):
			
 
				+    """Base Chat"""
			
 
				+
			
 
				+    __is_base = True
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super().__init__()
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def generate_chat_results(self):
			
 
				+        raise NotImplementedError(
			
 
				+            "The method `generate_chat_results` has not been implemented yet.")
			
--- a/paddlex/inference/pipelines_new/components/chat_server/ernie_bot_chat.py
+++ b/paddlex/inference/pipelines_new/components/chat_server/ernie_bot_chat.py
@@ -0,0 +1,95 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .....utils import logging
			
 
				+from .base import BaseChat
			
 
				+import erniebot
			
 
				+
			
 
				+class ErnieBotChat(BaseChat):
			
 
				+    """Ernie Bot Chat"""
			
 
				+
			
 
				+    entities = [
			
 
				+        "ernie-4.0",
			
 
				+        "ernie-3.5",
			
 
				+        "ernie-3.5-8k",
			
 
				+        "ernie-lite",
			
 
				+        "ernie-tiny-8k",
			
 
				+        "ernie-speed",
			
 
				+        "ernie-speed-128k",
			
 
				+        "ernie-char-8k",
			
 
				+    ]
			
 
				+
			
 
				+    def __init__(self, config):
			
 
				+        super().__init__()
			
 
				+        model_name = config.get('model_name', None)
			
 
				+        api_type = config.get('api_type', None)
			
 
				+        ak = config.get('ak', None)
			
 
				+        sk = config.get('sk', None)
			
 
				+        access_token = config.get('access_token', None)
			
 
				+
			
 
				+        if model_name not in self.entities:
			
 
				+            raise ValueError(f"model_name must be in {self.entities} of ErnieBotChat.")
			
 
				+
			
 
				+        if api_type not in ["aistudio", "qianfan"]:
			
 
				+            raise ValueError("api_type must be one of ['aistudio', 'qianfan']")
			
 
				+
			
 
				+        if api_type == "aistudio" and access_token is None:
			
 
				+            raise ValueError("access_token cannot be empty when api_type is aistudio.")
			
 
				+            
			
 
				+        if api_type == "qianfan" and (ak is None or sk is None):
			
 
				+            raise ValueError("ak and sk cannot be empty when api_type is qianfan.")            
			
 
				+
			
 
				+        self.model_name = model_name
			
 
				+        self.config = config
			
 
				+        
			
 
				+    def generate_chat_results(self, prompt, temperature=0.001, max_retries=1):
			
 
				+        """
			
 
				+        args:
			
 
				+        return:
			
 
				+        """
			
 
				+        try:
			
 
				+            cur_config = {
			
 
				+                "api_type": self.config['api_type'],
			
 
				+                "max_retries": max_retries
			
 
				+            }
			
 
				+            if self.config['api_type'] == "aistudio":
			
 
				+                cur_config['access_token'] = self.config['access_token']
			
 
				+            elif self.config['api_type'] == "qianfan":
			
 
				+                cur_config['ak'] = self.config['ak']
			
 
				+                cur_config['sk'] = self.config['sk']
			
 
				+            chat_completion = erniebot.ChatCompletion.create(
			
 
				+                _config_=cur_config,
			
 
				+                model=self.model_name,
			
 
				+                messages=[{"role": "user", "content": prompt}],
			
 
				+                temperature=float(temperature),
			
 
				+            )
			
 
				+            llm_result = chat_completion.get_result()
			
 
				+            return llm_result
			
 
				+        except Exception as e:
			
 
				+            if len(e.args) < 1:
			
 
				+                self.ERROR_MASSAGE = (
			
 
				+                    "暂无权限访问ErnieBot服务，请检查访问令牌。"
			
 
				+                )
			
 
				+            elif (
			
 
				+                e.args[-1]
			
 
				+                == "暂无权限使用，请在 AI Studio 正确获取访问令牌(access token)使用"
			
 
				+            ):
			
 
				+                self.ERROR_MASSAGE = (
			
 
				+                    "暂无权限访问ErnieBot服务，请检查访问令牌。"
			
 
				+                )
			
 
				+            else:
			
 
				+                logging.error(e)
			
 
				+                self.ERROR_MASSAGE = "大模型调用失败"
			
 
				+        return None 
			
 
				+        
			
--- a/paddlex/inference/pipelines_new/components/common/__init__.py
+++ b/paddlex/inference/pipelines_new/components/common/__init__.py
@@ -0,0 +1,17 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .sort_boxes import SortQuadBoxes
			
 
				+from .crop_image_regions import CropByPolys, CropByBoxes
			
 
				+
			
--- a/paddlex/inference/pipelines_new/components/common/crop_image_regions.py
+++ b/paddlex/inference/pipelines_new/components/common/crop_image_regions.py
@@ -0,0 +1,475 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from ..base import BaseComponent
			
 
				+import numpy as np
			
 
				+from ....utils.io import ImageReader
			
 
				+import copy
			
 
				+import cv2
			
 
				+from .seal_det_warp import AutoRectifier
			
 
				+from shapely.geometry import Polygon
			
 
				+from numpy.linalg import norm
			
 
				+
			
 
				+class CropByPolys(BaseComponent):
			
 
				+    """Crop Image by Polys"""
			
 
				+
			
 
				+    entities = "CropByPolys"
			
 
				+
			
 
				+    def __init__(self, det_box_type="quad"):
			
 
				+        super().__init__()
			
 
				+        self.det_box_type = det_box_type
			
 
				+
			
 
				+    def __call__(self, img, dt_polys):
			
 
				+        """__call__"""
			
 
				+
			
 
				+        if self.det_box_type == "quad":
			
 
				+            dt_boxes = np.array(dt_polys)
			
 
				+            output_list = []
			
 
				+            for bno in range(len(dt_boxes)):
			
 
				+                tmp_box = copy.deepcopy(dt_boxes[bno])
			
 
				+                img_crop = self.get_minarea_rect_crop(img, tmp_box)
			
 
				+                output_list.append(
			
 
				+                    {
			
 
				+                        "img": img_crop,
			
 
				+                        "img_size": [img_crop.shape[1], img_crop.shape[0]],
			
 
				+                    }
			
 
				+                )
			
 
				+        elif self.det_box_type == "poly":
			
 
				+            output_list = []
			
 
				+            dt_boxes = dt_polys
			
 
				+            for bno in range(len(dt_boxes)):
			
 
				+                tmp_box = copy.deepcopy(dt_boxes[bno])
			
 
				+                img_crop = self.get_poly_rect_crop(img.copy(), tmp_box)
			
 
				+                output_list.append(
			
 
				+                    {
			
 
				+                        "img": img_crop,
			
 
				+                        "img_size": [img_crop.shape[1], img_crop.shape[0]],
			
 
				+                    }
			
 
				+                )
			
 
				+        else:
			
 
				+            raise NotImplementedError
			
 
				+
			
 
				+        return output_list
			
 
				+
			
 
				+    def get_minarea_rect_crop(self, img, points):
			
 
				+        """get_minarea_rect_crop"""
			
 
				+        bounding_box = cv2.minAreaRect(np.array(points).astype(np.int32))
			
 
				+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
			
 
				+
			
 
				+        index_a, index_b, index_c, index_d = 0, 1, 2, 3
			
 
				+        if points[1][1] > points[0][1]:
			
 
				+            index_a = 0
			
 
				+            index_d = 1
			
 
				+        else:
			
 
				+            index_a = 1
			
 
				+            index_d = 0
			
 
				+        if points[3][1] > points[2][1]:
			
 
				+            index_b = 2
			
 
				+            index_c = 3
			
 
				+        else:
			
 
				+            index_b = 3
			
 
				+            index_c = 2
			
 
				+
			
 
				+        box = [points[index_a], points[index_b], points[index_c], points[index_d]]
			
 
				+        crop_img = self.get_rotate_crop_image(img, np.array(box))
			
 
				+        return crop_img
			
 
				+
			
 
				+    def get_rotate_crop_image(self, img, points):
			
 
				+        """
			
 
				+        img_height, img_width = img.shape[0:2]
			
 
				+        left = int(np.min(points[:, 0]))
			
 
				+        right = int(np.max(points[:, 0]))
			
 
				+        top = int(np.min(points[:, 1]))
			
 
				+        bottom = int(np.max(points[:, 1]))
			
 
				+        img_crop = img[top:bottom, left:right, :].copy()
			
 
				+        points[:, 0] = points[:, 0] - left
			
 
				+        points[:, 1] = points[:, 1] - top
			
 
				+        """
			
 
				+        assert len(points) == 4, "shape of points must be 4*2"
			
 
				+        img_crop_width = int(
			
 
				+            max(
			
 
				+                np.linalg.norm(points[0] - points[1]),
			
 
				+                np.linalg.norm(points[2] - points[3]),
			
 
				+            )
			
 
				+        )
			
 
				+        img_crop_height = int(
			
 
				+            max(
			
 
				+                np.linalg.norm(points[0] - points[3]),
			
 
				+                np.linalg.norm(points[1] - points[2]),
			
 
				+            )
			
 
				+        )
			
 
				+        pts_std = np.float32(
			
 
				+            [
			
 
				+                [0, 0],
			
 
				+                [img_crop_width, 0],
			
 
				+                [img_crop_width, img_crop_height],
			
 
				+                [0, img_crop_height],
			
 
				+            ]
			
 
				+        )
			
 
				+        M = cv2.getPerspectiveTransform(points, pts_std)
			
 
				+        dst_img = cv2.warpPerspective(
			
 
				+            img,
			
 
				+            M,
			
 
				+            (img_crop_width, img_crop_height),
			
 
				+            borderMode=cv2.BORDER_REPLICATE,
			
 
				+            flags=cv2.INTER_CUBIC,
			
 
				+        )
			
 
				+        dst_img_height, dst_img_width = dst_img.shape[0:2]
			
 
				+        if dst_img_height * 1.0 / dst_img_width >= 1.5:
			
 
				+            dst_img = np.rot90(dst_img)
			
 
				+        return dst_img
			
 
				+
			
 
				+    def reorder_poly_edge(self, points):
			
 
				+        """Get the respective points composing head edge, tail edge, top
			
 
				+        sideline and bottom sideline.
			
 
				+
			
 
				+        Args:
			
 
				+            points (ndarray): The points composing a text polygon.
			
 
				+
			
 
				+        Returns:
			
 
				+            head_edge (ndarray): The two points composing the head edge of text
			
 
				+                polygon.
			
 
				+            tail_edge (ndarray): The two points composing the tail edge of text
			
 
				+                polygon.
			
 
				+            top_sideline (ndarray): The points composing top curved sideline of
			
 
				+                text polygon.
			
 
				+            bot_sideline (ndarray): The points composing bottom curved sideline
			
 
				+                of text polygon.
			
 
				+        """
			
 
				+
			
 
				+        assert points.ndim == 2
			
 
				+        assert points.shape[0] >= 4
			
 
				+        assert points.shape[1] == 2
			
 
				+
			
 
				+        orientation_thr = 2.0  # 一个经验超参数
			
 
				+
			
 
				+        head_inds, tail_inds = self.find_head_tail(points, orientation_thr)
			
 
				+        head_edge, tail_edge = points[head_inds], points[tail_inds]
			
 
				+
			
 
				+        pad_points = np.vstack([points, points])
			
 
				+        if tail_inds[1] < 1:
			
 
				+            tail_inds[1] = len(points)
			
 
				+        sideline1 = pad_points[head_inds[1] : tail_inds[1]]
			
 
				+        sideline2 = pad_points[tail_inds[1] : (head_inds[1] + len(points))]
			
 
				+        return head_edge, tail_edge, sideline1, sideline2
			
 
				+
			
 
				+    def vector_slope(self, vec):
			
 
				+        assert len(vec) == 2
			
 
				+        return abs(vec[1] / (vec[0] + 1e-8))
			
 
				+
			
 
				+    def find_head_tail(self, points, orientation_thr):
			
 
				+        """Find the head edge and tail edge of a text polygon.
			
 
				+
			
 
				+        Args:
			
 
				+            points (ndarray): The points composing a text polygon.
			
 
				+            orientation_thr (float): The threshold for distinguishing between
			
 
				+                head edge and tail edge among the horizontal and vertical edges
			
 
				+                of a quadrangle.
			
 
				+
			
 
				+        Returns:
			
 
				+            head_inds (list): The indexes of two points composing head edge.
			
 
				+            tail_inds (list): The indexes of two points composing tail edge.
			
 
				+        """
			
 
				+
			
 
				+        assert points.ndim == 2
			
 
				+        assert points.shape[0] >= 4
			
 
				+        assert points.shape[1] == 2
			
 
				+        assert isinstance(orientation_thr, float)
			
 
				+
			
 
				+        if len(points) > 4:
			
 
				+            pad_points = np.vstack([points, points[0]])
			
 
				+            edge_vec = pad_points[1:] - pad_points[:-1]
			
 
				+
			
 
				+            theta_sum = []
			
 
				+            adjacent_vec_theta = []
			
 
				+            for i, edge_vec1 in enumerate(edge_vec):
			
 
				+                adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]]
			
 
				+                adjacent_edge_vec = edge_vec[adjacent_ind]
			
 
				+                temp_theta_sum = np.sum(self.vector_angle(edge_vec1, adjacent_edge_vec))
			
 
				+                temp_adjacent_theta = self.vector_angle(
			
 
				+                    adjacent_edge_vec[0], adjacent_edge_vec[1]
			
 
				+                )
			
 
				+                theta_sum.append(temp_theta_sum)
			
 
				+                adjacent_vec_theta.append(temp_adjacent_theta)
			
 
				+            theta_sum_score = np.array(theta_sum) / np.pi
			
 
				+            adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi
			
 
				+            poly_center = np.mean(points, axis=0)
			
 
				+            edge_dist = np.maximum(
			
 
				+                norm(pad_points[1:] - poly_center, axis=-1),
			
 
				+                norm(pad_points[:-1] - poly_center, axis=-1),
			
 
				+            )
			
 
				+            dist_score = edge_dist / np.max(edge_dist)
			
 
				+            position_score = np.zeros(len(edge_vec))
			
 
				+            score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score
			
 
				+            score += 0.35 * dist_score
			
 
				+            if len(points) % 2 == 0:
			
 
				+                position_score[(len(score) // 2 - 1)] += 1
			
 
				+                position_score[-1] += 1
			
 
				+            score += 0.1 * position_score
			
 
				+            pad_score = np.concatenate([score, score])
			
 
				+            score_matrix = np.zeros((len(score), len(score) - 3))
			
 
				+            x = np.arange(len(score) - 3) / float(len(score) - 4)
			
 
				+            gaussian = (
			
 
				+                1.0
			
 
				+                / (np.sqrt(2.0 * np.pi) * 0.5)
			
 
				+                * np.exp(-np.power((x - 0.5) / 0.5, 2.0) / 2)
			
 
				+            )
			
 
				+            gaussian = gaussian / np.max(gaussian)
			
 
				+            for i in range(len(score)):
			
 
				+                score_matrix[i, :] = (
			
 
				+                    score[i]
			
 
				+                    + pad_score[(i + 2) : (i + len(score) - 1)] * gaussian * 0.3
			
 
				+                )
			
 
				+
			
 
				+            head_start, tail_increment = np.unravel_index(
			
 
				+                score_matrix.argmax(), score_matrix.shape
			
 
				+            )
			
 
				+            tail_start = (head_start + tail_increment + 2) % len(points)
			
 
				+            head_end = (head_start + 1) % len(points)
			
 
				+            tail_end = (tail_start + 1) % len(points)
			
 
				+
			
 
				+            if head_end > tail_end:
			
 
				+                head_start, tail_start = tail_start, head_start
			
 
				+                head_end, tail_end = tail_end, head_end
			
 
				+            head_inds = [head_start, head_end]
			
 
				+            tail_inds = [tail_start, tail_end]
			
 
				+        else:
			
 
				+            if self.vector_slope(points[1] - points[0]) + self.vector_slope(
			
 
				+                points[3] - points[2]
			
 
				+            ) < self.vector_slope(points[2] - points[1]) + self.vector_slope(
			
 
				+                points[0] - points[3]
			
 
				+            ):
			
 
				+                horizontal_edge_inds = [[0, 1], [2, 3]]
			
 
				+                vertical_edge_inds = [[3, 0], [1, 2]]
			
 
				+            else:
			
 
				+                horizontal_edge_inds = [[3, 0], [1, 2]]
			
 
				+                vertical_edge_inds = [[0, 1], [2, 3]]
			
 
				+
			
 
				+            vertical_len_sum = norm(
			
 
				+                points[vertical_edge_inds[0][0]] - points[vertical_edge_inds[0][1]]
			
 
				+            ) + norm(
			
 
				+                points[vertical_edge_inds[1][0]] - points[vertical_edge_inds[1][1]]
			
 
				+            )
			
 
				+            horizontal_len_sum = norm(
			
 
				+                points[horizontal_edge_inds[0][0]] - points[horizontal_edge_inds[0][1]]
			
 
				+            ) + norm(
			
 
				+                points[horizontal_edge_inds[1][0]] - points[horizontal_edge_inds[1][1]]
			
 
				+            )
			
 
				+
			
 
				+            if vertical_len_sum > horizontal_len_sum * orientation_thr:
			
 
				+                head_inds = horizontal_edge_inds[0]
			
 
				+                tail_inds = horizontal_edge_inds[1]
			
 
				+            else:
			
 
				+                head_inds = vertical_edge_inds[0]
			
 
				+                tail_inds = vertical_edge_inds[1]
			
 
				+
			
 
				+        return head_inds, tail_inds
			
 
				+
			
 
				+    def vector_angle(self, vec1, vec2):
			
 
				+        if vec1.ndim > 1:
			
 
				+            unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8).reshape((-1, 1))
			
 
				+        else:
			
 
				+            unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8)
			
 
				+        if vec2.ndim > 1:
			
 
				+            unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8).reshape((-1, 1))
			
 
				+        else:
			
 
				+            unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8)
			
 
				+        return np.arccos(np.clip(np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0))
			
 
				+
			
 
				+    def get_minarea_rect(self, img, points):
			
 
				+        bounding_box = cv2.minAreaRect(points)
			
 
				+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
			
 
				+
			
 
				+        index_a, index_b, index_c, index_d = 0, 1, 2, 3
			
 
				+        if points[1][1] > points[0][1]:
			
 
				+            index_a = 0
			
 
				+            index_d = 1
			
 
				+        else:
			
 
				+            index_a = 1
			
 
				+            index_d = 0
			
 
				+        if points[3][1] > points[2][1]:
			
 
				+            index_b = 2
			
 
				+            index_c = 3
			
 
				+        else:
			
 
				+            index_b = 3
			
 
				+            index_c = 2
			
 
				+
			
 
				+        box = [points[index_a], points[index_b], points[index_c], points[index_d]]
			
 
				+        crop_img = self.get_rotate_crop_image(img, np.array(box))
			
 
				+        return crop_img, box
			
 
				+
			
 
				+    def sample_points_on_bbox_bp(self, line, n=50):
			
 
				+        """Resample n points on a line.
			
 
				+
			
 
				+        Args:
			
 
				+            line (ndarray): The points composing a line.
			
 
				+            n (int): The resampled points number.
			
 
				+
			
 
				+        Returns:
			
 
				+            resampled_line (ndarray): The points composing the resampled line.
			
 
				+        """
			
 
				+        from numpy.linalg import norm
			
 
				+
			
 
				+        # 断言检查输入参数的有效性
			
 
				+        assert line.ndim == 2
			
 
				+        assert line.shape[0] >= 2
			
 
				+        assert line.shape[1] == 2
			
 
				+        assert isinstance(n, int)
			
 
				+        assert n > 0
			
 
				+
			
 
				+        length_list = [norm(line[i + 1] - line[i]) for i in range(len(line) - 1)]
			
 
				+        total_length = sum(length_list)
			
 
				+        length_cumsum = np.cumsum([0.0] + length_list)
			
 
				+        delta_length = total_length / (float(n) + 1e-8)
			
 
				+        current_edge_ind = 0
			
 
				+        resampled_line = [line[0]]
			
 
				+
			
 
				+        for i in range(1, n):
			
 
				+            current_line_len = i * delta_length
			
 
				+            while (
			
 
				+                current_edge_ind + 1 < len(length_cumsum)
			
 
				+                and current_line_len >= length_cumsum[current_edge_ind + 1]
			
 
				+            ):
			
 
				+                current_edge_ind += 1
			
 
				+            current_edge_end_shift = current_line_len - length_cumsum[current_edge_ind]
			
 
				+            if current_edge_ind >= len(length_list):
			
 
				+                break
			
 
				+            end_shift_ratio = current_edge_end_shift / length_list[current_edge_ind]
			
 
				+            current_point = (
			
 
				+                line[current_edge_ind]
			
 
				+                + (line[current_edge_ind + 1] - line[current_edge_ind])
			
 
				+                * end_shift_ratio
			
 
				+            )
			
 
				+            resampled_line.append(current_point)
			
 
				+        resampled_line.append(line[-1])
			
 
				+        resampled_line = np.array(resampled_line)
			
 
				+        return resampled_line
			
 
				+
			
 
				+    def sample_points_on_bbox(self, line, n=50):
			
 
				+        """Resample n points on a line.
			
 
				+
			
 
				+        Args:
			
 
				+            line (ndarray): The points composing a line.
			
 
				+            n (int): The resampled points number.
			
 
				+
			
 
				+        Returns:
			
 
				+            resampled_line (ndarray): The points composing the resampled line.
			
 
				+        """
			
 
				+        assert line.ndim == 2
			
 
				+        assert line.shape[0] >= 2
			
 
				+        assert line.shape[1] == 2
			
 
				+        assert isinstance(n, int)
			
 
				+        assert n > 0
			
 
				+
			
 
				+        length_list = [norm(line[i + 1] - line[i]) for i in range(len(line) - 1)]
			
 
				+        total_length = sum(length_list)
			
 
				+        mean_length = total_length / (len(length_list) + 1e-8)
			
 
				+        group = [[0]]
			
 
				+        for i in range(len(length_list)):
			
 
				+            point_id = i + 1
			
 
				+            if length_list[i] < 0.9 * mean_length:
			
 
				+                for g in group:
			
 
				+                    if i in g:
			
 
				+                        g.append(point_id)
			
 
				+                        break
			
 
				+            else:
			
 
				+                g = [point_id]
			
 
				+                group.append(g)
			
 
				+
			
 
				+        top_tail_len = norm(line[0] - line[-1])
			
 
				+        if top_tail_len < 0.9 * mean_length:
			
 
				+            group[0].extend(g)
			
 
				+            group.remove(g)
			
 
				+        mean_positions = []
			
 
				+        for indices in group:
			
 
				+            x_sum = 0
			
 
				+            y_sum = 0
			
 
				+            for index in indices:
			
 
				+                x, y = line[index]
			
 
				+                x_sum += x
			
 
				+                y_sum += y
			
 
				+            num_points = len(indices)
			
 
				+            mean_x = x_sum / num_points
			
 
				+            mean_y = y_sum / num_points
			
 
				+            mean_positions.append((mean_x, mean_y))
			
 
				+        resampled_line = np.array(mean_positions)
			
 
				+        return resampled_line
			
 
				+
			
 
				+    def get_poly_rect_crop(self, img, points):
			
 
				+        """
			
 
				+        修改该函数，实现使用polygon，对不规则、弯曲文本的矫正以及crop
			
 
				+        args： img: 图片 ndarrary格式
			
 
				+        points： polygon格式的多点坐标 N*2 shape， ndarray格式
			
 
				+        return： 矫正后的图片 ndarray格式
			
 
				+        """
			
 
				+        points = np.array(points).astype(np.int32).reshape(-1, 2)
			
 
				+        temp_crop_img, temp_box = self.get_minarea_rect(img, points)
			
 
				+
			
 
				+        # 计算最小外接矩形与polygon的IoU
			
 
				+        def get_union(pD, pG):
			
 
				+            return Polygon(pD).union(Polygon(pG)).area
			
 
				+
			
 
				+        def get_intersection_over_union(pD, pG):
			
 
				+            return get_intersection(pD, pG) / (get_union(pD, pG) + 1e-10)
			
 
				+
			
 
				+        def get_intersection(pD, pG):
			
 
				+            return Polygon(pD).intersection(Polygon(pG)).area
			
 
				+
			
 
				+        cal_IoU = get_intersection_over_union(points, temp_box)
			
 
				+
			
 
				+        if cal_IoU >= 0.7:
			
 
				+            points = self.sample_points_on_bbox_bp(points, 31)
			
 
				+            return temp_crop_img
			
 
				+
			
 
				+        points_sample = self.sample_points_on_bbox(points)
			
 
				+        points_sample = points_sample.astype(np.int32)
			
 
				+        head_edge, tail_edge, top_line, bot_line = self.reorder_poly_edge(points_sample)
			
 
				+
			
 
				+        resample_top_line = self.sample_points_on_bbox_bp(top_line, 15)
			
 
				+        resample_bot_line = self.sample_points_on_bbox_bp(bot_line, 15)
			
 
				+
			
 
				+        sideline_mean_shift = np.mean(resample_top_line, axis=0) - np.mean(
			
 
				+            resample_bot_line, axis=0
			
 
				+        )
			
 
				+        if sideline_mean_shift[1] > 0:
			
 
				+            resample_bot_line, resample_top_line = resample_top_line, resample_bot_line
			
 
				+        rectifier = AutoRectifier()
			
 
				+        new_points = np.concatenate([resample_top_line, resample_bot_line])
			
 
				+        new_points_list = list(new_points.astype(np.float32).reshape(1, -1).tolist())
			
 
				+
			
 
				+        if len(img.shape) == 2:
			
 
				+            img = np.stack((img,) * 3, axis=-1)
			
 
				+        img_crop, image = rectifier.run(img, new_points_list, mode="homography")
			
 
				+        return np.array(img_crop[0], dtype=np.uint8)
			
 
				+
			
 
				+
			
 
				+class CropByBoxes(BaseComponent):
			
 
				+    """Crop Image by Box"""
			
 
				+
			
 
				+    entities = "CropByBoxes"
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super().__init__()
			
 
				+
			
 
				+    def __call__(self, img, boxes):
			
 
				+        """__call__"""
			
 
				+        output_list = []
			
 
				+        for bbox_info in boxes:
			
 
				+            label_id = bbox_info["cls_id"]
			
 
				+            box = bbox_info["coordinate"]
			
 
				+            label = bbox_info.get("label", label_id)
			
 
				+            xmin, ymin, xmax, ymax = [int(i) for i in box]
			
 
				+            img_crop = img[ymin:ymax, xmin:xmax].copy()
			
 
				+            output_list.append({"img": img_crop, "box": box, "label": label})
			
 
				+        return output_list
			
--- a/paddlex/inference/pipelines_new/components/common/seal_det_warp.py
+++ b/paddlex/inference/pipelines_new/components/common/seal_det_warp.py
@@ -0,0 +1,939 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import os, sys
			
 
				+import numpy as np
			
 
				+from numpy import cos, sin, arctan, sqrt
			
 
				+import cv2
			
 
				+import copy
			
 
				+import time
			
 
				+
			
 
				+from .....utils import logging
			
 
				+
			
 
				+def Homography(
			
 
				+    image,
			
 
				+    img_points,
			
 
				+    world_width,
			
 
				+    world_height,
			
 
				+    interpolation=cv2.INTER_CUBIC,
			
 
				+    ratio_width=1.0,
			
 
				+    ratio_height=1.0,
			
 
				+):
			
 
				+    _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
			
 
				+
			
 
				+    expand_x = int(0.5 * world_width * (ratio_width - 1))
			
 
				+    expand_y = int(0.5 * world_height * (ratio_height - 1))
			
 
				+
			
 
				+    pt_lefttop = [expand_x, expand_y]
			
 
				+    pt_righttop = [expand_x + world_width, expand_y]
			
 
				+    pt_leftbottom = [expand_x + world_width, expand_y + world_height]
			
 
				+    pt_rightbottom = [expand_x, expand_y + world_height]
			
 
				+
			
 
				+    pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])
			
 
				+
			
 
				+    img_crop_width = int(world_width * ratio_width)
			
 
				+    img_crop_height = int(world_height * ratio_height)
			
 
				+
			
 
				+    M = cv2.getPerspectiveTransform(_points, pts_std)
			
 
				+
			
 
				+    dst_img = cv2.warpPerspective(
			
 
				+        image,
			
 
				+        M,
			
 
				+        (img_crop_width, img_crop_height),
			
 
				+        borderMode=cv2.BORDER_CONSTANT,  # BORDER_CONSTANT BORDER_REPLICATE
			
 
				+        flags=interpolation,
			
 
				+    )
			
 
				+
			
 
				+    return dst_img
			
 
				+
			
 
				+
			
 
				+class PlanB:
			
 
				+    def __call__(
			
 
				+        self,
			
 
				+        image,
			
 
				+        points,
			
 
				+        curveTextRectifier,
			
 
				+        interpolation=cv2.INTER_LINEAR,
			
 
				+        ratio_width=1.0,
			
 
				+        ratio_height=1.0,
			
 
				+        loss_thresh=5.0,
			
 
				+        square=False,
			
 
				+    ):
			
 
				+        """
			
 
				+        Plan B using sub-image when it failed in original image
			
 
				+        :param image:
			
 
				+        :param points:
			
 
				+        :param curveTextRectifier: CurveTextRectifier
			
 
				+        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
			
 
				+        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
			
 
				+        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
			
 
				+        :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
			
 
				+        :param square: crop square image or not. True or False. The default is False
			
 
				+        :return:
			
 
				+        """
			
 
				+        h, w = image.shape[:2]
			
 
				+        _points = np.array(points).reshape(-1, 2).astype(np.float32)
			
 
				+        x_min = int(np.min(_points[:, 0]))
			
 
				+        y_min = int(np.min(_points[:, 1]))
			
 
				+        x_max = int(np.max(_points[:, 0]))
			
 
				+        y_max = int(np.max(_points[:, 1]))
			
 
				+        dx = x_max - x_min
			
 
				+        dy = y_max - y_min
			
 
				+        max_d = max(dx, dy)
			
 
				+        mean_pt = np.mean(_points, 0)
			
 
				+
			
 
				+        expand_x = (ratio_width - 1.0) * 0.5 * max_d
			
 
				+        expand_y = (ratio_height - 1.0) * 0.5 * max_d
			
 
				+
			
 
				+        if square:
			
 
				+            x_min = np.clip(int(mean_pt[0] - max_d - expand_x), 0, w - 1)
			
 
				+            y_min = np.clip(int(mean_pt[1] - max_d - expand_y), 0, h - 1)
			
 
				+            x_max = np.clip(int(mean_pt[0] + max_d + expand_x), 0, w - 1)
			
 
				+            y_max = np.clip(int(mean_pt[1] + max_d + expand_y), 0, h - 1)
			
 
				+        else:
			
 
				+            x_min = np.clip(int(x_min - expand_x), 0, w - 1)
			
 
				+            y_min = np.clip(int(y_min - expand_y), 0, h - 1)
			
 
				+            x_max = np.clip(int(x_max + expand_x), 0, w - 1)
			
 
				+            y_max = np.clip(int(y_max + expand_y), 0, h - 1)
			
 
				+
			
 
				+        new_image = image[y_min:y_max, x_min:x_max, :].copy()
			
 
				+        new_points = _points.copy()
			
 
				+        new_points[:, 0] -= x_min
			
 
				+        new_points[:, 1] -= y_min
			
 
				+
			
 
				+        dst_img, loss = curveTextRectifier(
			
 
				+            new_image,
			
 
				+            new_points,
			
 
				+            interpolation,
			
 
				+            ratio_width,
			
 
				+            ratio_height,
			
 
				+            mode="calibration",
			
 
				+        )
			
 
				+
			
 
				+        return dst_img, loss
			
 
				+
			
 
				+
			
 
				+class CurveTextRectifier:
			
 
				+    """
			
 
				+    spatial transformer via monocular vision
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self.get_virtual_camera_parameter()
			
 
				+
			
 
				+    def get_virtual_camera_parameter(self):
			
 
				+        vcam_thz = 0
			
 
				+        vcam_thx1 = 180
			
 
				+        vcam_thy = 180
			
 
				+        vcam_thx2 = 0
			
 
				+
			
 
				+        vcam_x = 0
			
 
				+        vcam_y = 0
			
 
				+        vcam_z = 100
			
 
				+
			
 
				+        radian = np.pi / 180
			
 
				+
			
 
				+        angle_z = radian * vcam_thz
			
 
				+        angle_x1 = radian * vcam_thx1
			
 
				+        angle_y = radian * vcam_thy
			
 
				+        angle_x2 = radian * vcam_thx2
			
 
				+
			
 
				+        optic_x = vcam_x
			
 
				+        optic_y = vcam_y
			
 
				+        optic_z = vcam_z
			
 
				+
			
 
				+        fu = 100
			
 
				+        fv = 100
			
 
				+
			
 
				+        matT = np.zeros((4, 4))
			
 
				+        matT[0, 0] = cos(angle_z) * cos(angle_y) - sin(angle_z) * sin(angle_x1) * sin(
			
 
				+            angle_y
			
 
				+        )
			
 
				+        matT[0, 1] = cos(angle_z) * sin(angle_y) * sin(angle_x2) - sin(angle_z) * (
			
 
				+            cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
			
 
				+        )
			
 
				+        matT[0, 2] = cos(angle_z) * sin(angle_y) * cos(angle_x2) + sin(angle_z) * (
			
 
				+            cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
			
 
				+        )
			
 
				+        matT[0, 3] = optic_x
			
 
				+        matT[1, 0] = sin(angle_z) * cos(angle_y) + cos(angle_z) * sin(angle_x1) * sin(
			
 
				+            angle_y
			
 
				+        )
			
 
				+        matT[1, 1] = sin(angle_z) * sin(angle_y) * sin(angle_x2) + cos(angle_z) * (
			
 
				+            cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
			
 
				+        )
			
 
				+        matT[1, 2] = sin(angle_z) * sin(angle_y) * cos(angle_x2) - cos(angle_z) * (
			
 
				+            cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
			
 
				+        )
			
 
				+        matT[1, 3] = optic_y
			
 
				+        matT[2, 0] = -cos(angle_x1) * sin(angle_y)
			
 
				+        matT[2, 1] = cos(angle_x1) * cos(angle_y) * sin(angle_x2) + sin(angle_x1) * cos(
			
 
				+            angle_x2
			
 
				+        )
			
 
				+        matT[2, 2] = cos(angle_x1) * cos(angle_y) * cos(angle_x2) - sin(angle_x1) * sin(
			
 
				+            angle_x2
			
 
				+        )
			
 
				+        matT[2, 3] = optic_z
			
 
				+        matT[3, 0] = 0
			
 
				+        matT[3, 1] = 0
			
 
				+        matT[3, 2] = 0
			
 
				+        matT[3, 3] = 1
			
 
				+
			
 
				+        matS = np.zeros((4, 4))
			
 
				+        matS[2, 3] = 0.5
			
 
				+        matS[3, 2] = 0.5
			
 
				+
			
 
				+        self.ifu = 1 / fu
			
 
				+        self.ifv = 1 / fv
			
 
				+
			
 
				+        self.matT = matT
			
 
				+        self.matS = matS
			
 
				+        self.K = np.dot(matT.T, matS)
			
 
				+        self.K = np.dot(self.K, matT)
			
 
				+
			
 
				+    def vertical_text_process(self, points, org_size):
			
 
				+        """
			
 
				+        change sequence amd process
			
 
				+        :param points:
			
 
				+        :param org_size:
			
 
				+        :return:
			
 
				+        """
			
 
				+        org_w, org_h = org_size
			
 
				+        _points = np.array(points).reshape(-1).tolist()
			
 
				+        _points = np.array(_points[2:] + _points[:2]).reshape(-1, 2)
			
 
				+
			
 
				+        # convert to horizontal points
			
 
				+        adjusted_points = np.zeros(_points.shape, dtype=np.float32)
			
 
				+        adjusted_points[:, 0] = _points[:, 1]
			
 
				+        adjusted_points[:, 1] = org_h - _points[:, 0] - 1
			
 
				+
			
 
				+        _image_coord, _world_coord, _new_image_size = self.horizontal_text_process(
			
 
				+            adjusted_points
			
 
				+        )
			
 
				+
			
 
				+        # # convert to vertical points back
			
 
				+        image_coord = _points.reshape(1, -1, 2)
			
 
				+        world_coord = np.zeros(_world_coord.shape, dtype=np.float32)
			
 
				+        world_coord[:, :, 0] = 0 - _world_coord[:, :, 1]
			
 
				+        world_coord[:, :, 1] = _world_coord[:, :, 0]
			
 
				+        world_coord[:, :, 2] = _world_coord[:, :, 2]
			
 
				+        new_image_size = (_new_image_size[1], _new_image_size[0])
			
 
				+
			
 
				+        return image_coord, world_coord, new_image_size
			
 
				+
			
 
				+    def horizontal_text_process(self, points):
			
 
				+        """
			
 
				+        get image coordinate and world coordinate
			
 
				+        :param points:
			
 
				+        :return:
			
 
				+        """
			
 
				+        poly = np.array(points).reshape(-1)
			
 
				+
			
 
				+        dx_list = []
			
 
				+        dy_list = []
			
 
				+        for i in range(1, len(poly) // 2):
			
 
				+            xdx = poly[i * 2] - poly[(i - 1) * 2]
			
 
				+            xdy = poly[i * 2 + 1] - poly[(i - 1) * 2 + 1]
			
 
				+            d = sqrt(xdx**2 + xdy**2)
			
 
				+            dx_list.append(d)
			
 
				+
			
 
				+        for i in range(0, len(poly) // 4):
			
 
				+            ydx = poly[i * 2] - poly[len(poly) - 1 - (i * 2 + 1)]
			
 
				+            ydy = poly[i * 2 + 1] - poly[len(poly) - 1 - (i * 2)]
			
 
				+            d = sqrt(ydx**2 + ydy**2)
			
 
				+            dy_list.append(d)
			
 
				+
			
 
				+        dx_list = [
			
 
				+            (dx_list[i] + dx_list[len(dx_list) - 1 - i]) / 2
			
 
				+            for i in range(len(dx_list) // 2)
			
 
				+        ]
			
 
				+
			
 
				+        height = np.around(np.mean(dy_list))
			
 
				+
			
 
				+        rect_coord = [0, 0]
			
 
				+        for i in range(0, len(poly) // 4 - 1):
			
 
				+            x = rect_coord[-2]
			
 
				+            x += dx_list[i]
			
 
				+            y = 0
			
 
				+            rect_coord.append(x)
			
 
				+            rect_coord.append(y)
			
 
				+
			
 
				+        rect_coord_half = copy.deepcopy(rect_coord)
			
 
				+        for i in range(0, len(poly) // 4):
			
 
				+            x = rect_coord_half[len(rect_coord_half) - 2 * i - 2]
			
 
				+            y = height
			
 
				+            rect_coord.append(x)
			
 
				+            rect_coord.append(y)
			
 
				+
			
 
				+        np_rect_coord = np.array(rect_coord).reshape(-1, 2)
			
 
				+        x_min = np.min(np_rect_coord[:, 0])
			
 
				+        y_min = np.min(np_rect_coord[:, 1])
			
 
				+        x_max = np.max(np_rect_coord[:, 0])
			
 
				+        y_max = np.max(np_rect_coord[:, 1])
			
 
				+        new_image_size = (int(x_max - x_min + 0.5), int(y_max - y_min + 0.5))
			
 
				+        x_mean = (x_max - x_min) / 2
			
 
				+        y_mean = (y_max - y_min) / 2
			
 
				+        np_rect_coord[:, 0] -= x_mean
			
 
				+        np_rect_coord[:, 1] -= y_mean
			
 
				+        rect_coord = np_rect_coord.reshape(-1).tolist()
			
 
				+
			
 
				+        rect_coord = np.array(rect_coord).reshape(-1, 2)
			
 
				+        world_coord = np.ones((len(rect_coord), 3)) * 0
			
 
				+
			
 
				+        world_coord[:, :2] = rect_coord
			
 
				+
			
 
				+        image_coord = np.array(poly).reshape(1, -1, 2)
			
 
				+        world_coord = world_coord.reshape(1, -1, 3)
			
 
				+
			
 
				+        return image_coord, world_coord, new_image_size
			
 
				+
			
 
				+    def horizontal_text_estimate(self, points):
			
 
				+        """
			
 
				+        horizontal or vertical text
			
 
				+        :param points:
			
 
				+        :return:
			
 
				+        """
			
 
				+        pts = np.array(points).reshape(-1, 2)
			
 
				+        x_min = int(np.min(pts[:, 0]))
			
 
				+        y_min = int(np.min(pts[:, 1]))
			
 
				+        x_max = int(np.max(pts[:, 0]))
			
 
				+        y_max = int(np.max(pts[:, 1]))
			
 
				+        x = x_max - x_min
			
 
				+        y = y_max - y_min
			
 
				+        is_horizontal_text = True
			
 
				+        if y / x > 1.5:  # vertical text condition
			
 
				+            is_horizontal_text = False
			
 
				+        return is_horizontal_text
			
 
				+
			
 
				+    def virtual_camera_to_world(self, size):
			
 
				+        ifu, ifv = self.ifu, self.ifv
			
 
				+        K, matT = self.K, self.matT
			
 
				+
			
 
				+        ppu = size[0] / 2 + 1e-6
			
 
				+        ppv = size[1] / 2 + 1e-6
			
 
				+
			
 
				+        P = np.zeros((size[1], size[0], 3))
			
 
				+
			
 
				+        lu = np.array([i for i in range(size[0])])
			
 
				+        lv = np.array([i for i in range(size[1])])
			
 
				+        u, v = np.meshgrid(lu, lv)
			
 
				+
			
 
				+        yp = (v - ppv) * ifv
			
 
				+        xp = (u - ppu) * ifu
			
 
				+        angle_a = arctan(sqrt(xp * xp + yp * yp))
			
 
				+        angle_b = arctan(yp / xp)
			
 
				+
			
 
				+        D0 = sin(angle_a) * cos(angle_b)
			
 
				+        D1 = sin(angle_a) * sin(angle_b)
			
 
				+        D2 = cos(angle_a)
			
 
				+
			
 
				+        D0[xp <= 0] = -D0[xp <= 0]
			
 
				+        D1[xp <= 0] = -D1[xp <= 0]
			
 
				+
			
 
				+        ratio_a = (
			
 
				+            K[0, 0] * D0 * D0
			
 
				+            + K[1, 1] * D1 * D1
			
 
				+            + K[2, 2] * D2 * D2
			
 
				+            + (K[0, 1] + K[1, 0]) * D0 * D1
			
 
				+            + (K[0, 2] + K[2, 0]) * D0 * D2
			
 
				+            + (K[1, 2] + K[2, 1]) * D1 * D2
			
 
				+        )
			
 
				+        ratio_b = (
			
 
				+            (K[0, 3] + K[3, 0]) * D0
			
 
				+            + (K[1, 3] + K[3, 1]) * D1
			
 
				+            + (K[2, 3] + K[3, 2]) * D2
			
 
				+        )
			
 
				+        ratio_c = K[3, 3] * np.ones(ratio_b.shape)
			
 
				+
			
 
				+        delta = ratio_b * ratio_b - 4 * ratio_a * ratio_c
			
 
				+        t = np.zeros(delta.shape)
			
 
				+        t[ratio_a == 0] = -ratio_c[ratio_a == 0] / ratio_b[ratio_a == 0]
			
 
				+        t[ratio_a != 0] = (-ratio_b[ratio_a != 0] + sqrt(delta[ratio_a != 0])) / (
			
 
				+            2 * ratio_a[ratio_a != 0]
			
 
				+        )
			
 
				+        t[delta < 0] = 0
			
 
				+
			
 
				+        P[:, :, 0] = matT[0, 3] + t * (
			
 
				+            matT[0, 0] * D0 + matT[0, 1] * D1 + matT[0, 2] * D2
			
 
				+        )
			
 
				+        P[:, :, 1] = matT[1, 3] + t * (
			
 
				+            matT[1, 0] * D0 + matT[1, 1] * D1 + matT[1, 2] * D2
			
 
				+        )
			
 
				+        P[:, :, 2] = matT[2, 3] + t * (
			
 
				+            matT[2, 0] * D0 + matT[2, 1] * D1 + matT[2, 2] * D2
			
 
				+        )
			
 
				+
			
 
				+        return P
			
 
				+
			
 
				+    def world_to_image(self, image_size, world, intrinsic, distCoeffs, rotation, tvec):
			
 
				+        r11 = rotation[0, 0]
			
 
				+        r12 = rotation[0, 1]
			
 
				+        r13 = rotation[0, 2]
			
 
				+        r21 = rotation[1, 0]
			
 
				+        r22 = rotation[1, 1]
			
 
				+        r23 = rotation[1, 2]
			
 
				+        r31 = rotation[2, 0]
			
 
				+        r32 = rotation[2, 1]
			
 
				+        r33 = rotation[2, 2]
			
 
				+
			
 
				+        t1 = tvec[0]
			
 
				+        t2 = tvec[1]
			
 
				+        t3 = tvec[2]
			
 
				+
			
 
				+        k1 = distCoeffs[0]
			
 
				+        k2 = distCoeffs[1]
			
 
				+        p1 = distCoeffs[2]
			
 
				+        p2 = distCoeffs[3]
			
 
				+        k3 = distCoeffs[4]
			
 
				+        k4 = distCoeffs[5]
			
 
				+        k5 = distCoeffs[6]
			
 
				+        k6 = distCoeffs[7]
			
 
				+
			
 
				+        if len(distCoeffs) > 8:
			
 
				+            s1 = distCoeffs[8]
			
 
				+            s2 = distCoeffs[9]
			
 
				+            s3 = distCoeffs[10]
			
 
				+            s4 = distCoeffs[11]
			
 
				+        else:
			
 
				+            s1 = s2 = s3 = s4 = 0
			
 
				+
			
 
				+        if len(distCoeffs) > 12:
			
 
				+            tx = distCoeffs[12]
			
 
				+            ty = distCoeffs[13]
			
 
				+        else:
			
 
				+            tx = ty = 0
			
 
				+
			
 
				+        fu = intrinsic[0, 0]
			
 
				+        fv = intrinsic[1, 1]
			
 
				+        ppu = intrinsic[0, 2]
			
 
				+        ppv = intrinsic[1, 2]
			
 
				+
			
 
				+        cos_tx = cos(tx)
			
 
				+        cos_ty = cos(ty)
			
 
				+        sin_tx = sin(tx)
			
 
				+        sin_ty = sin(ty)
			
 
				+
			
 
				+        tao11 = cos_ty * cos_tx * cos_ty + sin_ty * cos_tx * sin_ty
			
 
				+        tao12 = cos_ty * cos_tx * sin_ty * sin_tx - sin_ty * cos_tx * cos_ty * sin_tx
			
 
				+        tao13 = -cos_ty * cos_tx * sin_ty * cos_tx + sin_ty * cos_tx * cos_ty * cos_tx
			
 
				+        tao21 = -sin_tx * sin_ty
			
 
				+        tao22 = cos_ty * cos_tx * cos_tx + sin_tx * cos_ty * sin_tx
			
 
				+        tao23 = cos_ty * cos_tx * sin_tx - sin_tx * cos_ty * cos_tx
			
 
				+
			
 
				+        P = np.zeros((image_size[1], image_size[0], 2))
			
 
				+
			
 
				+        c3 = r31 * world[:, :, 0] + r32 * world[:, :, 1] + r33 * world[:, :, 2] + t3
			
 
				+        c1 = r11 * world[:, :, 0] + r12 * world[:, :, 1] + r13 * world[:, :, 2] + t1
			
 
				+        c2 = r21 * world[:, :, 0] + r22 * world[:, :, 1] + r23 * world[:, :, 2] + t2
			
 
				+
			
 
				+        x1 = c1 / c3
			
 
				+        y1 = c2 / c3
			
 
				+        x12 = x1 * x1
			
 
				+        y12 = y1 * y1
			
 
				+        x1y1 = 2 * x1 * y1
			
 
				+        r2 = x12 + y12
			
 
				+        r4 = r2 * r2
			
 
				+        r6 = r2 * r4
			
 
				+
			
 
				+        radial_distortion = (1 + k1 * r2 + k2 * r4 + k3 * r6) / (
			
 
				+            1 + k4 * r2 + k5 * r4 + k6 * r6
			
 
				+        )
			
 
				+        x2 = (
			
 
				+            x1 * radial_distortion + p1 * x1y1 + p2 * (r2 + 2 * x12) + s1 * r2 + s2 * r4
			
 
				+        )
			
 
				+        y2 = (
			
 
				+            y1 * radial_distortion + p2 * x1y1 + p1 * (r2 + 2 * y12) + s3 * r2 + s4 * r4
			
 
				+        )
			
 
				+
			
 
				+        x3 = tao11 * x2 + tao12 * y2 + tao13
			
 
				+        y3 = tao21 * x2 + tao22 * y2 + tao23
			
 
				+
			
 
				+        P[:, :, 0] = fu * x3 + ppu
			
 
				+        P[:, :, 1] = fv * y3 + ppv
			
 
				+        P[c3 <= 0] = 0
			
 
				+
			
 
				+        return P
			
 
				+
			
 
				+    def spatial_transform(
			
 
				+        self, image_data, new_image_size, mtx, dist, rvecs, tvecs, interpolation
			
 
				+    ):
			
 
				+        rotation, _ = cv2.Rodrigues(rvecs)
			
 
				+        world_map = self.virtual_camera_to_world(new_image_size)
			
 
				+        image_map = self.world_to_image(
			
 
				+            new_image_size, world_map, mtx, dist, rotation, tvecs
			
 
				+        )
			
 
				+        image_map = image_map.astype(np.float32)
			
 
				+        dst = cv2.remap(
			
 
				+            image_data, image_map[:, :, 0], image_map[:, :, 1], interpolation
			
 
				+        )
			
 
				+        return dst
			
 
				+
			
 
				+    def calibrate(self, org_size, image_coord, world_coord):
			
 
				+        """
			
 
				+        calibration
			
 
				+        :param org_size:
			
 
				+        :param image_coord:
			
 
				+        :param world_coord:
			
 
				+        :return:
			
 
				+        """
			
 
				+        # flag = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL  | cv2.CALIB_THIN_PRISM_MODEL
			
 
				+        flag = cv2.CALIB_RATIONAL_MODEL
			
 
				+        flag2 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL
			
 
				+        flag3 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_THIN_PRISM_MODEL
			
 
				+        flag4 = (
			
 
				+            cv2.CALIB_RATIONAL_MODEL
			
 
				+            | cv2.CALIB_ZERO_TANGENT_DIST
			
 
				+            | cv2.CALIB_FIX_ASPECT_RATIO
			
 
				+        )
			
 
				+        flag5 = (
			
 
				+            cv2.CALIB_RATIONAL_MODEL
			
 
				+            | cv2.CALIB_TILTED_MODEL
			
 
				+            | cv2.CALIB_ZERO_TANGENT_DIST
			
 
				+        )
			
 
				+        flag6 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_FIX_ASPECT_RATIO
			
 
				+        flag_list = [flag2, flag3, flag4, flag5, flag6]
			
 
				+
			
 
				+        ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(
			
 
				+            world_coord.astype(np.float32),
			
 
				+            image_coord.astype(np.float32),
			
 
				+            org_size,
			
 
				+            None,
			
 
				+            None,
			
 
				+            flags=flag,
			
 
				+        )
			
 
				+        if ret > 2:
			
 
				+            # strategies
			
 
				+            min_ret = ret
			
 
				+            for i, flag in enumerate(flag_list):
			
 
				+                _ret, _mtx, _dist, _rvecs, _tvecs = cv2.calibrateCamera(
			
 
				+                    world_coord.astype(np.float32),
			
 
				+                    image_coord.astype(np.float32),
			
 
				+                    org_size,
			
 
				+                    None,
			
 
				+                    None,
			
 
				+                    flags=flag,
			
 
				+                )
			
 
				+                if _ret < min_ret:
			
 
				+                    min_ret = _ret
			
 
				+                    ret, mtx, dist, rvecs, tvecs = _ret, _mtx, _dist, _rvecs, _tvecs
			
 
				+
			
 
				+        return ret, mtx, dist, rvecs, tvecs
			
 
				+
			
 
				+    def dc_homo(
			
 
				+        self,
			
 
				+        img,
			
 
				+        img_points,
			
 
				+        obj_points,
			
 
				+        is_horizontal_text,
			
 
				+        interpolation=cv2.INTER_LINEAR,
			
 
				+        ratio_width=1.0,
			
 
				+        ratio_height=1.0,
			
 
				+    ):
			
 
				+        """
			
 
				+        divide and conquer: homography
			
 
				+        # ratio_width and ratio_height must be 1.0 here
			
 
				+        """
			
 
				+        _img_points = img_points.reshape(-1, 2)
			
 
				+        _obj_points = obj_points.reshape(-1, 3)
			
 
				+
			
 
				+        homo_img_list = []
			
 
				+        width_list = []
			
 
				+        height_list = []
			
 
				+        # divide and conquer
			
 
				+        for i in range(len(_img_points) // 2 - 1):
			
 
				+            new_img_points = np.zeros((4, 2)).astype(np.float32)
			
 
				+            new_obj_points = np.zeros((4, 2)).astype(np.float32)
			
 
				+
			
 
				+            new_img_points[0:2, :] = _img_points[i : (i + 2), :2]
			
 
				+            new_img_points[2:4, :] = _img_points[::-1, :][i : (i + 2), :2][::-1, :]
			
 
				+
			
 
				+            new_obj_points[0:2, :] = _obj_points[i : (i + 2), :2]
			
 
				+            new_obj_points[2:4, :] = _obj_points[::-1, :][i : (i + 2), :2][::-1, :]
			
 
				+
			
 
				+            if is_horizontal_text:
			
 
				+                world_width = np.abs(new_obj_points[1, 0] - new_obj_points[0, 0])
			
 
				+                world_height = np.abs(new_obj_points[3, 1] - new_obj_points[0, 1])
			
 
				+            else:
			
 
				+                world_width = np.abs(new_obj_points[1, 1] - new_obj_points[0, 1])
			
 
				+                world_height = np.abs(new_obj_points[3, 0] - new_obj_points[0, 0])
			
 
				+
			
 
				+            homo_img = Homography(
			
 
				+                img,
			
 
				+                new_img_points,
			
 
				+                world_width,
			
 
				+                world_height,
			
 
				+                interpolation=interpolation,
			
 
				+                ratio_width=ratio_width,
			
 
				+                ratio_height=ratio_height,
			
 
				+            )
			
 
				+
			
 
				+            homo_img_list.append(homo_img)
			
 
				+            _h, _w = homo_img.shape[:2]
			
 
				+            width_list.append(_w)
			
 
				+            height_list.append(_h)
			
 
				+
			
 
				+        # stitching
			
 
				+        rectified_image = np.zeros((np.max(height_list), sum(width_list), 3)).astype(
			
 
				+            np.uint8
			
 
				+        )
			
 
				+
			
 
				+        st = 0
			
 
				+        for homo_img, w, h in zip(homo_img_list, width_list, height_list):
			
 
				+            rectified_image[:h, st : st + w, :] = homo_img
			
 
				+            st += w
			
 
				+
			
 
				+        if not is_horizontal_text:
			
 
				+            # vertical rotation
			
 
				+            rectified_image = np.rot90(rectified_image, 3)
			
 
				+
			
 
				+        return rectified_image
			
 
				+
			
 
				+    def Homography(
			
 
				+        self,
			
 
				+        image,
			
 
				+        img_points,
			
 
				+        world_width,
			
 
				+        world_height,
			
 
				+        interpolation=cv2.INTER_CUBIC,
			
 
				+        ratio_width=1.0,
			
 
				+        ratio_height=1.0,
			
 
				+    ):
			
 
				+        _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
			
 
				+
			
 
				+        expand_x = int(0.5 * world_width * (ratio_width - 1))
			
 
				+        expand_y = int(0.5 * world_height * (ratio_height - 1))
			
 
				+
			
 
				+        pt_lefttop = [expand_x, expand_y]
			
 
				+        pt_righttop = [expand_x + world_width, expand_y]
			
 
				+        pt_leftbottom = [expand_x + world_width, expand_y + world_height]
			
 
				+        pt_rightbottom = [expand_x, expand_y + world_height]
			
 
				+
			
 
				+        pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])
			
 
				+
			
 
				+        img_crop_width = int(world_width * ratio_width)
			
 
				+        img_crop_height = int(world_height * ratio_height)
			
 
				+
			
 
				+        M = cv2.getPerspectiveTransform(_points, pts_std)
			
 
				+
			
 
				+        dst_img = cv2.warpPerspective(
			
 
				+            image,
			
 
				+            M,
			
 
				+            (img_crop_width, img_crop_height),
			
 
				+            borderMode=cv2.BORDER_CONSTANT,  # BORDER_CONSTANT BORDER_REPLICATE
			
 
				+            flags=interpolation,
			
 
				+        )
			
 
				+
			
 
				+        return dst_img
			
 
				+
			
 
				+    def __call__(
			
 
				+        self,
			
 
				+        image_data,
			
 
				+        points,
			
 
				+        interpolation=cv2.INTER_LINEAR,
			
 
				+        ratio_width=1.0,
			
 
				+        ratio_height=1.0,
			
 
				+        mode="calibration",
			
 
				+    ):
			
 
				+        """
			
 
				+        spatial transform for a poly text
			
 
				+        :param image_data:
			
 
				+        :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
			
 
				+        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
			
 
				+        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
			
 
				+        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
			
 
				+        :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
			
 
				+        :return:
			
 
				+        """
			
 
				+        org_h, org_w = image_data.shape[:2]
			
 
				+        org_size = (org_w, org_h)
			
 
				+        self.image = image_data
			
 
				+
			
 
				+        is_horizontal_text = self.horizontal_text_estimate(points)
			
 
				+        if is_horizontal_text:
			
 
				+            image_coord, world_coord, new_image_size = self.horizontal_text_process(
			
 
				+                points
			
 
				+            )
			
 
				+        else:
			
 
				+            image_coord, world_coord, new_image_size = self.vertical_text_process(
			
 
				+                points, org_size
			
 
				+            )
			
 
				+
			
 
				+        if mode.lower() == "calibration":
			
 
				+            ret, mtx, dist, rvecs, tvecs = self.calibrate(
			
 
				+                org_size, image_coord, world_coord
			
 
				+            )
			
 
				+
			
 
				+            st_size = (
			
 
				+                int(new_image_size[0] * ratio_width),
			
 
				+                int(new_image_size[1] * ratio_height),
			
 
				+            )
			
 
				+            dst = self.spatial_transform(
			
 
				+                image_data, st_size, mtx, dist[0], rvecs[0], tvecs[0], interpolation
			
 
				+            )
			
 
				+        elif mode.lower() == "homography":
			
 
				+            # ratio_width and ratio_height must be 1.0 here and ret set to 0.01 without loss manually
			
 
				+            ret = 0.01
			
 
				+            dst = self.dc_homo(
			
 
				+                image_data,
			
 
				+                image_coord,
			
 
				+                world_coord,
			
 
				+                is_horizontal_text,
			
 
				+                interpolation=interpolation,
			
 
				+                ratio_width=1.0,
			
 
				+                ratio_height=1.0,
			
 
				+            )
			
 
				+        else:
			
 
				+            raise ValueError(
			
 
				+                'mode must be ["calibration", "homography"], but got {}'.format(mode)
			
 
				+            )
			
 
				+
			
 
				+        return dst, ret
			
 
				+
			
 
				+
			
 
				+class AutoRectifier:
			
 
				+    def __init__(self):
			
 
				+        self.npoints = 10
			
 
				+        self.curveTextRectifier = CurveTextRectifier()
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def get_rotate_crop_image(
			
 
				+        img, points, interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0
			
 
				+    ):
			
 
				+        """
			
 
				+        crop or homography
			
 
				+        :param img:
			
 
				+        :param points:
			
 
				+        :param interpolation:
			
 
				+        :param ratio_width:
			
 
				+        :param ratio_height:
			
 
				+        :return:
			
 
				+        """
			
 
				+        h, w = img.shape[:2]
			
 
				+        _points = np.array(points).reshape(-1, 2).astype(np.float32)
			
 
				+
			
 
				+        if len(_points) != 4:
			
 
				+            x_min = int(np.min(_points[:, 0]))
			
 
				+            y_min = int(np.min(_points[:, 1]))
			
 
				+            x_max = int(np.max(_points[:, 0]))
			
 
				+            y_max = int(np.max(_points[:, 1]))
			
 
				+            dx = x_max - x_min
			
 
				+            dy = y_max - y_min
			
 
				+            expand_x = int(0.5 * dx * (ratio_width - 1))
			
 
				+            expand_y = int(0.5 * dy * (ratio_height - 1))
			
 
				+            x_min = np.clip(int(x_min - expand_x), 0, w - 1)
			
 
				+            y_min = np.clip(int(y_min - expand_y), 0, h - 1)
			
 
				+            x_max = np.clip(int(x_max + expand_x), 0, w - 1)
			
 
				+            y_max = np.clip(int(y_max + expand_y), 0, h - 1)
			
 
				+
			
 
				+            dst_img = img[y_min:y_max, x_min:x_max, :].copy()
			
 
				+        else:
			
 
				+            img_crop_width = int(
			
 
				+                max(
			
 
				+                    np.linalg.norm(_points[0] - _points[1]),
			
 
				+                    np.linalg.norm(_points[2] - _points[3]),
			
 
				+                )
			
 
				+            )
			
 
				+            img_crop_height = int(
			
 
				+                max(
			
 
				+                    np.linalg.norm(_points[0] - _points[3]),
			
 
				+                    np.linalg.norm(_points[1] - _points[2]),
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            dst_img = Homography(
			
 
				+                img,
			
 
				+                _points,
			
 
				+                img_crop_width,
			
 
				+                img_crop_height,
			
 
				+                interpolation,
			
 
				+                ratio_width,
			
 
				+                ratio_height,
			
 
				+            )
			
 
				+
			
 
				+        return dst_img
			
 
				+
			
 
				+    def visualize(self, image_data, points_list):
			
 
				+        visualization = image_data.copy()
			
 
				+
			
 
				+        for box in points_list:
			
 
				+            box = np.array(box).reshape(-1, 2).astype(np.int32)
			
 
				+            cv2.drawContours(
			
 
				+                visualization, [np.array(box).reshape((-1, 1, 2))], -1, (0, 0, 255), 2
			
 
				+            )
			
 
				+            for i, p in enumerate(box):
			
 
				+                if i != 0:
			
 
				+                    cv2.circle(
			
 
				+                        visualization,
			
 
				+                        tuple(p),
			
 
				+                        radius=1,
			
 
				+                        color=(255, 0, 0),
			
 
				+                        thickness=2,
			
 
				+                    )
			
 
				+                else:
			
 
				+                    cv2.circle(
			
 
				+                        visualization,
			
 
				+                        tuple(p),
			
 
				+                        radius=1,
			
 
				+                        color=(255, 255, 0),
			
 
				+                        thickness=2,
			
 
				+                    )
			
 
				+        return visualization
			
 
				+
			
 
				+    def __call__(
			
 
				+        self,
			
 
				+        image_data,
			
 
				+        points,
			
 
				+        interpolation=cv2.INTER_LINEAR,
			
 
				+        ratio_width=1.0,
			
 
				+        ratio_height=1.0,
			
 
				+        loss_thresh=5.0,
			
 
				+        mode="calibration",
			
 
				+    ):
			
 
				+        """
			
 
				+        rectification in strategies for a poly text
			
 
				+        :param image_data:
			
 
				+        :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
			
 
				+        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
			
 
				+        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
			
 
				+        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
			
 
				+        :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
			
 
				+        :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
			
 
				+        :return:
			
 
				+        """
			
 
				+        _points = np.array(points).reshape(-1, 2)
			
 
				+        if len(_points) >= self.npoints and len(_points) % 2 == 0:
			
 
				+            try:
			
 
				+                curveTextRectifier = CurveTextRectifier()
			
 
				+
			
 
				+                dst_img, loss = curveTextRectifier(
			
 
				+                    image_data, points, interpolation, ratio_width, ratio_height, mode
			
 
				+                )
			
 
				+                if loss >= 2:
			
 
				+                    # for robust
			
 
				+                    # large loss means it cannot be reconstruct correctly, we must find other way to reconstruct
			
 
				+                    img_list, loss_list = [dst_img], [loss]
			
 
				+                    _dst_img, _loss = PlanB()(
			
 
				+                        image_data,
			
 
				+                        points,
			
 
				+                        curveTextRectifier,
			
 
				+                        interpolation,
			
 
				+                        ratio_width,
			
 
				+                        ratio_height,
			
 
				+                        loss_thresh=loss_thresh,
			
 
				+                        square=True,
			
 
				+                    )
			
 
				+                    img_list += [_dst_img]
			
 
				+                    loss_list += [_loss]
			
 
				+
			
 
				+                    _dst_img, _loss = PlanB()(
			
 
				+                        image_data,
			
 
				+                        points,
			
 
				+                        curveTextRectifier,
			
 
				+                        interpolation,
			
 
				+                        ratio_width,
			
 
				+                        ratio_height,
			
 
				+                        loss_thresh=loss_thresh,
			
 
				+                        square=False,
			
 
				+                    )
			
 
				+                    img_list += [_dst_img]
			
 
				+                    loss_list += [_loss]
			
 
				+
			
 
				+                    min_loss = min(loss_list)
			
 
				+                    dst_img = img_list[loss_list.index(min_loss)]
			
 
				+
			
 
				+                    if min_loss >= loss_thresh:
			
 
				+                        logging.warning(
			
 
				+                            "calibration loss: {} is too large for spatial transformer. It is failed. Using get_rotate_crop_image".format(
			
 
				+                                loss
			
 
				+                            )
			
 
				+                        )
			
 
				+                        dst_img = self.get_rotate_crop_image(
			
 
				+                            image_data, points, interpolation, ratio_width, ratio_height
			
 
				+                        )
			
 
				+            except Exception as e:
			
 
				+                logging.warning(f"Exception caught: {e}")
			
 
				+                dst_img = self.get_rotate_crop_image(
			
 
				+                    image_data, points, interpolation, ratio_width, ratio_height
			
 
				+                )
			
 
				+        else:
			
 
				+            dst_img = self.get_rotate_crop_image(
			
 
				+                image_data, _points, interpolation, ratio_width, ratio_height
			
 
				+            )
			
 
				+
			
 
				+        return dst_img
			
 
				+
			
 
				+    def run(
			
 
				+        self,
			
 
				+        image_data,
			
 
				+        points_list,
			
 
				+        interpolation=cv2.INTER_LINEAR,
			
 
				+        ratio_width=1.0,
			
 
				+        ratio_height=1.0,
			
 
				+        loss_thresh=5.0,
			
 
				+        mode="calibration",
			
 
				+    ):
			
 
				+        """
			
 
				+        run for texts in an image
			
 
				+        :param image_data: numpy.ndarray. The shape is [h, w, 3]
			
 
				+        :param points_list: [[x1,y1,x2,y2,x3,y3,...], [x1,y1,x2,y2,x3,y3,...], ...], clockwise order, (x1,y1) must be the top-left of first char.
			
 
				+        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
			
 
				+        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
			
 
				+        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
			
 
				+        :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
			
 
				+        :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
			
 
				+        :return: res: roi-image list, visualized_image: draw polys in original image
			
 
				+        """
			
 
				+        if image_data is None:
			
 
				+            raise ValueError
			
 
				+        if not isinstance(points_list, list):
			
 
				+            raise ValueError
			
 
				+        for points in points_list:
			
 
				+            if not isinstance(points, list):
			
 
				+                raise ValueError
			
 
				+
			
 
				+        if ratio_width < 1.0 or ratio_height < 1.0:
			
 
				+            raise ValueError(
			
 
				+                "ratio_width and ratio_height cannot be smaller than 1, but got {}",
			
 
				+                (ratio_width, ratio_height),
			
 
				+            )
			
 
				+
			
 
				+        if mode.lower() != "calibration" and mode.lower() != "homography":
			
 
				+            raise ValueError(
			
 
				+                'mode must be ["calibration", "homography"], but got {}'.format(mode)
			
 
				+            )
			
 
				+
			
 
				+        if mode.lower() == "homography" and ratio_width != 1.0 and ratio_height != 1.0:
			
 
				+            raise ValueError(
			
 
				+                "ratio_width and ratio_height must be 1.0 when mode is homography, but got mode:{}, ratio:({},{})".format(
			
 
				+                    mode, ratio_width, ratio_height
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+        res = []
			
 
				+        for points in points_list:
			
 
				+            rectified_img = self(
			
 
				+                image_data,
			
 
				+                points,
			
 
				+                interpolation,
			
 
				+                ratio_width,
			
 
				+                ratio_height,
			
 
				+                loss_thresh=loss_thresh,
			
 
				+                mode=mode,
			
 
				+            )
			
 
				+            res.append(rectified_img)
			
 
				+
			
 
				+        # visualize
			
 
				+        visualized_image = self.visualize(image_data, points_list)
			
 
				+
			
 
				+        return res, visualized_image
			
--- a/paddlex/inference/pipelines_new/components/common/sort_boxes.py
+++ b/paddlex/inference/pipelines_new/components/common/sort_boxes.py
@@ -0,0 +1,48 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from ..base import BaseComponent
			
 
				+import numpy as np
			
 
				+
			
 
				+class SortQuadBoxes(BaseComponent):
			
 
				+    """SortQuadBoxes Component"""
			
 
				+
			
 
				+    entities = "SortQuadBoxes"
			
 
				+    def __init__(self):
			
 
				+        super().__init__()
			
 
				+
			
 
				+    def __call__(self, dt_polys):
			
 
				+        """
			
 
				+        Sort quad boxes in order from top to bottom, left to right
			
 
				+        args:
			
 
				+            dt_polys(array):detected quad boxes with shape [4, 2]
			
 
				+        return:
			
 
				+            sorted boxes(array) with shape [4, 2]
			
 
				+        """
			
 
				+        dt_boxes = np.array(dt_polys)
			
 
				+        num_boxes = dt_boxes.shape[0]
			
 
				+        sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
			
 
				+        _boxes = list(sorted_boxes)
			
 
				+
			
 
				+        for i in range(num_boxes - 1):
			
 
				+            for j in range(i, -1, -1):
			
 
				+                if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and (
			
 
				+                    _boxes[j + 1][0][0] < _boxes[j][0][0]
			
 
				+                ):
			
 
				+                    tmp = _boxes[j]
			
 
				+                    _boxes[j] = _boxes[j + 1]
			
 
				+                    _boxes[j + 1] = tmp
			
 
				+                else:
			
 
				+                    break
			
 
				+        return _boxes
			
--- a/paddlex/inference/pipelines_new/components/prompt_engeering/__init__.py
+++ b/paddlex/inference/pipelines_new/components/prompt_engeering/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .generate_kie_prompt import GenerateKIEPrompt
			
--- a/paddlex/inference/pipelines_new/components/prompt_engeering/base.py
+++ b/paddlex/inference/pipelines_new/components/prompt_engeering/base.py
@@ -0,0 +1,31 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from abc import ABC, abstractmethod
			
 
				+from .....utils.subclass_register import AutoRegisterABCMetaClass
			
 
				+
			
 
				+import inspect
			
 
				+
			
 
				+class BaseGeneratePrompt(ABC, metaclass=AutoRegisterABCMetaClass):
			
 
				+    """Base Chat"""
			
 
				+
			
 
				+    __is_base = True
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super().__init__()
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def generate_prompt(self):
			
 
				+        raise NotImplementedError(
			
 
				+            "The method `generate_prompt` has not been implemented yet.")
			
--- a/paddlex/inference/pipelines_new/components/prompt_engeering/generate_kie_prompt.py
+++ b/paddlex/inference/pipelines_new/components/prompt_engeering/generate_kie_prompt.py
@@ -0,0 +1,100 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .base import BaseGeneratePrompt
			
 
				+
			
 
				+class GenerateKIEPrompt(BaseGeneratePrompt):
			
 
				+    """Generate KIE Prompt"""
			
 
				+
			
 
				+    entities = [
			
 
				+        "text_kie_prompt",
			
 
				+        "table_kie_prompt"
			
 
				+    ]
			
 
				+
			
 
				+    def __init__(self, config):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        task_type = config.get('task_type', "")
			
 
				+        task_description = config.get('task_description', "")  
			
 
				+        output_format = config.get('output_format', "")  
			
 
				+        rules_str = config.get('rules_str', "")  
			
 
				+        few_shot_demo_text_content = config.get('few_shot_demo_text_content', "")  
			
 
				+        few_shot_demo_key_value_list = config.get('few_shot_demo_key_value_list', "")
			
 
				+
			
 
				+        if task_description is None:
			
 
				+            task_description = ""
			
 
				+        
			
 
				+        if output_format is None:
			
 
				+            output_format = ""
			
 
				+        
			
 
				+        if rules_str is None:
			
 
				+            rules_str = ""
			
 
				+        
			
 
				+        if few_shot_demo_text_content is None:
			
 
				+            few_shot_demo_text_content = ""
			
 
				+        
			
 
				+        if few_shot_demo_key_value_list is None:
			
 
				+            few_shot_demo_key_value_list = ""
			
 
				+
			
 
				+        if task_type not in self.entities:
			
 
				+            raise ValueError(f"task type must be in {self.entities} of GenerateKIEPrompt.")
			
 
				+
			
 
				+        self.task_type = task_type
			
 
				+        self.task_description = task_description
			
 
				+        self.output_format = output_format
			
 
				+        self.rules_str = rules_str
			
 
				+        self.few_shot_demo_text_content = few_shot_demo_text_content
			
 
				+        self.few_shot_demo_key_value_list = few_shot_demo_key_value_list
			
 
				+        
			
 
				+    def generate_prompt(self, text_content,
			
 
				+        key_list,
			
 
				+        task_description=None,
			
 
				+        output_format=None,
			
 
				+        rules_str=None,
			
 
				+        few_shot_demo_text_content=None,
			
 
				+        few_shot_demo_key_value_list=None):
			
 
				+        """
			
 
				+        args:
			
 
				+        return:
			
 
				+        """
			
 
				+
			
 
				+        if task_description is None:
			
 
				+            task_description = self.task_description
			
 
				+
			
 
				+        if output_format is None:
			
 
				+            output_format = self.output_format
			
 
				+
			
 
				+        if rules_str is None:
			
 
				+            rules_str = self.rules_str
			
 
				+
			
 
				+        if few_shot_demo_text_content is None:
			
 
				+            few_shot_demo_text_content = self.few_shot_demo_text_content
			
 
				+            
			
 
				+        if few_shot_demo_key_value_list is None:
			
 
				+            few_shot_demo_key_value_list = self.few_shot_demo_key_value_list
			
 
				+
			
 
				+        prompt = f"""{task_description}{output_format}{rules_str}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""
			
 
				+        if self.task_type == "table_kie_prompt":
			
 
				+            prompt += f"""\n结合上面，下面正式开始：\
			
 
				+                表格内容：```{text_content}```\
			
 
				+                关键词列表：{key_list}。""".replace(
			
 
				+                "    ", "")
			
 
				+        elif self.task_type == "text_kie_prompt":
			
 
				+            prompt += f"""\n结合上面的例子，下面正式开始：\
			
 
				+                OCR文字：```{text_content}```\
			
 
				+                关键词列表：{key_list}。""".replace(
			
 
				+                "    ", "")
			
 
				+        else:
			
 
				+            raise ValueError(f"{self.task_type} is currently not supported.") 
			
 
				+        return prompt
			
--- a/paddlex/inference/pipelines_new/components/retriever/__init__.py
+++ b/paddlex/inference/pipelines_new/components/retriever/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .ernie_bot_retriever import ErnieBotRetriever
			
--- a/paddlex/inference/pipelines_new/components/retriever/base.py
+++ b/paddlex/inference/pipelines_new/components/retriever/base.py
@@ -0,0 +1,50 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from abc import ABC, abstractmethod
			
 
				+from .....utils.subclass_register import AutoRegisterABCMetaClass
			
 
				+
			
 
				+import inspect
			
 
				+import base64
			
 
				+
			
 
				+class BaseRetriever(ABC, metaclass=AutoRegisterABCMetaClass):
			
 
				+    """Base Retriever"""
			
 
				+
			
 
				+    __is_base = True
			
 
				+
			
 
				+    VECTOR_STORE_PREFIX = "PADDLEX_VECTOR_STORE"
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super().__init__()
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def generate_vector_database(self):
			
 
				+        raise NotImplementedError(
			
 
				+            "The method `generate_vector_database` has not been implemented yet.")
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def similarity_retrieval(self):
			
 
				+        raise NotImplementedError(
			
 
				+            "The method `similarity_retrieval` has not been implemented yet.")
			
 
				+
			
 
				+    def is_vector_store(self, s):
			
 
				+        return s.startswith(self.VECTOR_STORE_PREFIX)
			
 
				+
			
 
				+    def encode_vector_store(self, vector_store_bytes):
			
 
				+        return self.VECTOR_STORE_PREFIX + base64.b64encode(vector_store_bytes).decode(
			
 
				+            "ascii"
			
 
				+        )
			
 
				+
			
 
				+    def decode_vector_store(self, vector_store_str):
			
 
				+        return base64.b64decode(vector_store_str[len(self.VECTOR_STORE_PREFIX):])
			
--- a/paddlex/inference/pipelines_new/components/retriever/ernie_bot_retriever.py
+++ b/paddlex/inference/pipelines_new/components/retriever/ernie_bot_retriever.py
@@ -0,0 +1,148 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .base import BaseRetriever
			
 
				+import os
			
 
				+
			
 
				+from langchain.docstore.document import Document
			
 
				+from langchain.text_splitter import RecursiveCharacterTextSplitter
			
 
				+
			
 
				+from langchain_community.embeddings import QianfanEmbeddingsEndpoint
			
 
				+from langchain_community.vectorstores import FAISS
			
 
				+from langchain_community import vectorstores
			
 
				+from erniebot_agent.extensions.langchain.embeddings import ErnieEmbeddings
			
 
				+
			
 
				+import time
			
 
				+
			
 
				+class ErnieBotRetriever(BaseRetriever):
			
 
				+    """Ernie Bot Retriever"""
			
 
				+
			
 
				+    entities = [
			
 
				+        "ernie-4.0",
			
 
				+        "ernie-3.5",
			
 
				+        "ernie-3.5-8k",
			
 
				+        "ernie-lite",
			
 
				+        "ernie-tiny-8k",
			
 
				+        "ernie-speed",
			
 
				+        "ernie-speed-128k",
			
 
				+        "ernie-char-8k",
			
 
				+    ]
			
 
				+    
			
 
				+    def __init__(self, config):
			
 
				+
			
 
				+        super().__init__()
			
 
				+
			
 
				+        model_name = config.get('model_name', None)
			
 
				+        api_type = config.get('api_type', None)
			
 
				+        ak = config.get('ak', None)
			
 
				+        sk = config.get('sk', None)
			
 
				+        access_token = config.get('access_token', None)
			
 
				+        
			
 
				+        if model_name not in self.entities:
			
 
				+            raise ValueError(f"model_name must be in {self.entities} of ErnieBotChat.")
			
 
				+
			
 
				+        if api_type not in ["aistudio", "qianfan"]:
			
 
				+            raise ValueError("api_type must be one of ['aistudio', 'qianfan']")
			
 
				+
			
 
				+        if api_type == "aistudio" and access_token is None:
			
 
				+            raise ValueError("access_token cannot be empty when api_type is aistudio.")
			
 
				+            
			
 
				+        if api_type == "qianfan" and (ak is None or sk is None):
			
 
				+            raise ValueError("ak and sk cannot be empty when api_type is qianfan.")            
			
 
				+
			
 
				+        self.model_name = model_name
			
 
				+        self.config = config
			
 
				+        
			
 
				+    def generate_vector_database(self, text_list, 
			
 
				+        block_size=300,
			
 
				+        separators=["\t", "\n", "。", "\n\n", ""],
			
 
				+        sleep_time=0.5):
			
 
				+        """
			
 
				+        args:
			
 
				+        return:
			
 
				+        """
			
 
				+        text_splitter = RecursiveCharacterTextSplitter(
			
 
				+            chunk_size=block_size, chunk_overlap=20, separators=separators
			
 
				+        )
			
 
				+        texts = text_splitter.split_text("\t".join(text_list))
			
 
				+        all_splits = [Document(page_content=text) for text in texts]
			
 
				+
			
 
				+        api_type = self.config["api_type"]
			
 
				+        if api_type == "qianfan":
			
 
				+            os.environ["QIANFAN_AK"] = os.environ.get("EB_AK", self.config["ak"])
			
 
				+            os.environ["QIANFAN_SK"] = os.environ.get("EB_SK", self.config["sk"])
			
 
				+            user_ak = os.environ.get("EB_AK", self.config["ak"])
			
 
				+            user_id = hash(user_ak)
			
 
				+            vectorstore = FAISS.from_documents(
			
 
				+                documents=all_splits, embedding=QianfanEmbeddingsEndpoint()
			
 
				+            )
			
 
				+        elif api_type == "aistudio":
			
 
				+            token = self.config["access_token"]
			
 
				+            vectorstore = FAISS.from_documents(
			
 
				+                documents=all_splits[0:1],
			
 
				+                embedding=ErnieEmbeddings(aistudio_access_token=token),
			
 
				+            )
			
 
				+            #### ErnieEmbeddings.chunk_size = 16
			
 
				+            step = min(16, len(all_splits) - 1)
			
 
				+            for shot_splits in [
			
 
				+                all_splits[i : i + step] for i in range(1, len(all_splits), step)
			
 
				+            ]:
			
 
				+                time.sleep(sleep_time)
			
 
				+                vectorstore_slice = FAISS.from_documents(
			
 
				+                    documents=shot_splits,
			
 
				+                    embedding=ErnieEmbeddings(aistudio_access_token=token),
			
 
				+                )
			
 
				+                vectorstore.merge_from(vectorstore_slice)
			
 
				+        else:
			
 
				+            raise ValueError(f"Unsupported api_type: {api_type}")
			
 
				+
			
 
				+        return vectorstore
			
 
				+
			
 
				+    def encode_vector_store_to_bytes(self, vectorstore):
			
 
				+        vectorstore = self.encode_vector_store(vectorstore.serialize_to_bytes())
			
 
				+        return vectorstore
			
 
				+    
			
 
				+    def decode_vector_store_from_bytes(self, vectorstore):
			
 
				+        if not self.is_vector_store(vectorstore):
			
 
				+            raise ValueError("The retrieved vectorstore is not for PaddleX.")
			
 
				+        api_type = self.config["api_type"]
			
 
				+
			
 
				+        if api_type == "aistudio":
			
 
				+            access_token = self.config["access_token"]
			
 
				+            embeddings = ErnieEmbeddings(aistudio_access_token=access_token)
			
 
				+        elif api_type == "qianfan":
			
 
				+            ak = self.config["ak"]
			
 
				+            sk = self.config["sk"]
			
 
				+            embeddings = QianfanEmbeddingsEndpoint(qianfan_ak=ak, qianfan_sk=sk)
			
 
				+        else:
			
 
				+            raise ValueError(f"Unsupported api_type: {api_type}")
			
 
				+        vectorstore = vectorstores.FAISS.deserialize_from_bytes(
			
 
				+            self.decode_vector_store(vector), embeddings
			
 
				+        )
			
 
				+        return vectorstore
			
 
				+
			
 
				+    def similarity_retrieval(self, query_text_list, vectorstore, sleep_time=0.5):
			
 
				+        # 根据提问匹配上下文
			
 
				+        C = []
			
 
				+        for query_text in query_text_list:
			
 
				+            QUESTION = query_text
			
 
				+            time.sleep(sleep_time)
			
 
				+            docs = vectorstore.similarity_search_with_relevance_scores(QUESTION, k=2)
			
 
				+            context = [(document.page_content, score) for document, score in docs]
			
 
				+            context = sorted(context, key=lambda x: x[1])
			
 
				+            C.extend([x[0] for x in context[::-1]])
			
 
				+        C = list(set(C))
			
 
				+        all_C = " ".join(C)
			
 
				+        return all_C
			
 
				+        
			
--- a/paddlex/inference/pipelines_new/components/utils/__init__.py
+++ b/paddlex/inference/pipelines_new/components/utils/__init__.py
@@ -0,0 +1,13 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
--- a/paddlex/inference/pipelines_new/components/utils/mixin.py
+++ b/paddlex/inference/pipelines_new/components/utils/mixin.py
@@ -0,0 +1,204 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from abc import abstractmethod
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+import numpy as np
			
 
				+from PIL import Image
			
 
				+import pandas as pd
			
 
				+
			
 
				+from .....utils import logging
			
 
				+from ....utils.io import (
			
 
				+    JsonWriter,
			
 
				+    ImageReader,
			
 
				+    ImageWriter,
			
 
				+    CSVWriter,
			
 
				+    HtmlWriter,
			
 
				+    XlsxWriter,
			
 
				+    TextWriter,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def _save_list_data(save_func, save_path, data, *args, **kwargs):
			
 
				+    save_path = Path(save_path)
			
 
				+    if data is None:
			
 
				+        return
			
 
				+    if isinstance(data, list):
			
 
				+        for idx, single in enumerate(data):
			
 
				+            save_func(
			
 
				+                (
			
 
				+                    save_path.parent / f"{save_path.stem}_{idx}{save_path.suffix}"
			
 
				+                ).as_posix(),
			
 
				+                single,
			
 
				+                *args,
			
 
				+                **kwargs,
			
 
				+            )
			
 
				+    save_func(save_path.as_posix(), data, *args, **kwargs)
			
 
				+    logging.info(f"The result has been saved in {save_path}.")
			
 
				+
			
 
				+
			
 
				+class StrMixin:
			
 
				+    @property
			
 
				+    def str(self):
			
 
				+        return self._to_str()
			
 
				+
			
 
				+    def _to_str(self, data, json_format=False, indent=4, ensure_ascii=False):
			
 
				+        if json_format:
			
 
				+            return json.dumps(data.json, indent=indent, ensure_ascii=ensure_ascii)
			
 
				+        else:
			
 
				+            return str(data)
			
 
				+
			
 
				+    def print(self, json_format=False, indent=4, ensure_ascii=False):
			
 
				+        str_ = self._to_str(
			
 
				+            self, json_format=json_format, indent=indent, ensure_ascii=ensure_ascii
			
 
				+        )
			
 
				+        logging.info(str_)
			
 
				+
			
 
				+
			
 
				+class JsonMixin:
			
 
				+    def __init__(self):
			
 
				+        self._json_writer = JsonWriter()
			
 
				+        self._show_funcs.append(self.save_to_json)
			
 
				+
			
 
				+    def _to_json(self):
			
 
				+        def _format_data(obj):
			
 
				+            if isinstance(obj, np.float32):
			
 
				+                return float(obj)
			
 
				+            elif isinstance(obj, np.ndarray):
			
 
				+                return [_format_data(item) for item in obj.tolist()]
			
 
				+            elif isinstance(obj, pd.DataFrame):
			
 
				+                return obj.to_json(orient="records", force_ascii=False)
			
 
				+            elif isinstance(obj, Path):
			
 
				+                return obj.as_posix()
			
 
				+            elif isinstance(obj, dict):
			
 
				+                return type(obj)({k: _format_data(v) for k, v in obj.items()})
			
 
				+            elif isinstance(obj, (list, tuple)):
			
 
				+                return [_format_data(i) for i in obj]
			
 
				+            else:
			
 
				+                return obj
			
 
				+
			
 
				+        return _format_data(self)
			
 
				+
			
 
				+    @property
			
 
				+    def json(self):
			
 
				+        return self._to_json()
			
 
				+
			
 
				+    def save_to_json(self, save_path, indent=4, ensure_ascii=False, *args, **kwargs):
			
 
				+        if not str(save_path).endswith(".json"):
			
 
				+            save_path = Path(save_path) / f"{Path(self['input_path']).stem}.json"
			
 
				+        _save_list_data(
			
 
				+            self._json_writer.write,
			
 
				+            save_path,
			
 
				+            self.json,
			
 
				+            indent=indent,
			
 
				+            ensure_ascii=ensure_ascii,
			
 
				+            *args,
			
 
				+            **kwargs,
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+class Base64Mixin:
			
 
				+    def __init__(self, *args, **kwargs):
			
 
				+        self._base64_writer = TextWriter(*args, **kwargs)
			
 
				+        self._show_funcs.append(self.save_to_base64)
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def _to_base64(self):
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+    @property
			
 
				+    def base64(self):
			
 
				+        return self._to_base64()
			
 
				+
			
 
				+    def save_to_base64(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).lower().endswith((".b64")):
			
 
				+            fp = Path(self["input_path"])
			
 
				+            save_path = Path(save_path) / f"{fp.stem}{fp.suffix}"
			
 
				+        _save_list_data(
			
 
				+            self._base64_writer.write, save_path, self.base64, *args, **kwargs
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+class ImgMixin:
			
 
				+    def __init__(self, backend="pillow", *args, **kwargs):
			
 
				+        self._img_writer = ImageWriter(backend=backend, *args, **kwargs)
			
 
				+        self._show_funcs.append(self.save_to_img)
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def _to_img(self):
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+    @property
			
 
				+    def img(self):
			
 
				+        image = self._to_img()
			
 
				+        # The img must be a PIL.Image obj
			
 
				+        if isinstance(image, np.ndarray):
			
 
				+            return Image.fromarray(image)
			
 
				+        return image
			
 
				+
			
 
				+    def save_to_img(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).lower().endswith((".jpg", ".png")):
			
 
				+            fp = Path(self["input_path"])
			
 
				+            save_path = Path(save_path) / f"{fp.stem}{fp.suffix}"
			
 
				+        _save_list_data(self._img_writer.write, save_path, self.img, *args, **kwargs)
			
 
				+
			
 
				+
			
 
				+class CSVMixin:
			
 
				+    def __init__(self, backend="pandas", *args, **kwargs):
			
 
				+        self._csv_writer = CSVWriter(backend=backend, *args, **kwargs)
			
 
				+        self._show_funcs.append(self.save_to_csv)
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def _to_csv(self):
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+    def save_to_csv(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).endswith(".csv"):
			
 
				+            save_path = Path(save_path) / f"{Path(self['input_path']).stem}.csv"
			
 
				+        _save_list_data(
			
 
				+            self._csv_writer.write, save_path, self._to_csv(), *args, **kwargs
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+class HtmlMixin:
			
 
				+    def __init__(self, *args, **kwargs):
			
 
				+        self._html_writer = HtmlWriter(*args, **kwargs)
			
 
				+        self._show_funcs.append(self.save_to_html)
			
 
				+
			
 
				+    @property
			
 
				+    def html(self):
			
 
				+        return self._to_html()
			
 
				+
			
 
				+    def _to_html(self):
			
 
				+        return self["html"]
			
 
				+
			
 
				+    def save_to_html(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).endswith(".html"):
			
 
				+            save_path = Path(save_path) / f"{Path(self['input_path']).stem}.html"
			
 
				+        _save_list_data(self._html_writer.write, save_path, self.html, *args, **kwargs)
			
 
				+
			
 
				+
			
 
				+class XlsxMixin:
			
 
				+    def __init__(self, *args, **kwargs):
			
 
				+        self._xlsx_writer = XlsxWriter(*args, **kwargs)
			
 
				+        self._show_funcs.append(self.save_to_xlsx)
			
 
				+
			
 
				+    def _to_xlsx(self):
			
 
				+        return self["html"]
			
 
				+
			
 
				+    def save_to_xlsx(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).endswith(".xlsx"):
			
 
				+            save_path = Path(save_path) / f"{Path(self['input_path']).stem}.xlsx"
			
 
				+        _save_list_data(self._xlsx_writer.write, save_path, self.html, *args, **kwargs)
			
--- a/paddlex/inference/pipelines_new/doc_preprocessor/__init__.py
+++ b/paddlex/inference/pipelines_new/doc_preprocessor/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .pipeline import DocPreprocessorPipeline
			
--- a/paddlex/inference/pipelines_new/doc_preprocessor/pipeline.py
+++ b/paddlex/inference/pipelines_new/doc_preprocessor/pipeline.py
@@ -0,0 +1,117 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from ..base import BasePipeline
			
 
				+from typing import Any, Dict, Optional
			
 
				+from scipy.ndimage import rotate
			
 
				+from .result import DocPreprocessorResult
			
 
				+
			
 
				+########## [TODO]后续需要更新路径
			
 
				+from ...components.transforms import ReadImage
			
 
				+
			
 
				+class DocPreprocessorPipeline(BasePipeline):
			
 
				+    """Doc Preprocessor Pipeline"""
			
 
				+
			
 
				+    entities = "doc_preprocessor"
			
 
				+    def __init__(self,
			
 
				+        config,        
			
 
				+        device=None,
			
 
				+        pp_option=None, 
			
 
				+        use_hpip: bool = False,
			
 
				+        hpi_params: Optional[Dict[str, Any]] = None):
			
 
				+        super().__init__(device=device, pp_option=pp_option, 
			
 
				+            use_hpip=use_hpip, hpi_params=hpi_params)
			
 
				+        
			
 
				+        self.use_doc_orientation_classify = True
			
 
				+        if 'use_doc_orientation_classify' in config:
			
 
				+            self.use_doc_orientation_classify = config['use_doc_orientation_classify']
			
 
				+
			
 
				+        self.use_doc_unwarping = True
			
 
				+        if 'use_doc_unwarping' in config:
			
 
				+            self.use_doc_unwarping = config['use_doc_unwarping']
			
 
				+        
			
 
				+        if self.use_doc_orientation_classify:
			
 
				+            doc_ori_classify_config = config['SubModules']["DocOrientationClassify"]
			
 
				+            self.doc_ori_classify_model = self.create_model(doc_ori_classify_config)
			
 
				+
			
 
				+        if self.use_doc_unwarping:
			
 
				+            doc_unwarping_config = config['SubModules']["DocUnwarping"]
			
 
				+            self.doc_unwarping_model = self.create_model(doc_unwarping_config)
			
 
				+        
			
 
				+        self.img_reader = ReadImage(format="BGR")
			
 
				+
			
 
				+    def rotate_image(self, image_array, rotate_angle):
			
 
				+        """rotate image"""
			
 
				+        assert (
			
 
				+            rotate_angle >= 0 and rotate_angle < 360
			
 
				+        ), "rotate_angle must in [0-360), but get {rotate_angle}."
			
 
				+        return rotate(image_array, rotate_angle, reshape=True)
			
 
				+
			
 
				+    def check_input_params(self, input_params):
			
 
				+        
			
 
				+        if input_params['use_doc_orientation_classify'] and \
			
 
				+            not self.use_doc_orientation_classify:
			
 
				+            raise ValueError("The model for doc orientation classify is not initialized.")
			
 
				+
			
 
				+
			
 
				+        if input_params['use_doc_unwarping'] and \
			
 
				+            not self.use_doc_unwarping:
			
 
				+            raise ValueError("The model for doc unwarping is not initialized.")
			
 
				+            
			
 
				+        return 
			
 
				+
			
 
				+    def predict(self, input, 
			
 
				+        use_doc_orientation_classify=True,
			
 
				+        use_doc_unwarping=False,
			
 
				+        **kwargs):
			
 
				+
			
 
				+        if not isinstance(input, list):
			
 
				+            input_list = [input]
			
 
				+        else:
			
 
				+            input_list = input
			
 
				+
			
 
				+        input_params = {"use_doc_orientation_classify":use_doc_orientation_classify,
			
 
				+            "use_doc_unwarping":use_doc_unwarping}
			
 
				+        self.check_input_params(input_params)
			
 
				+
			
 
				+        img_id = 1
			
 
				+        for input in input_list:
			
 
				+            if isinstance(input, str):
			
 
				+                image_array = next(self.img_reader(input))[0]['img']
			
 
				+            else:
			
 
				+                image_array = input
			
 
				+
			
 
				+            assert len(image_array.shape) == 3
			
 
				+
			
 
				+            if input_params['use_doc_orientation_classify']:
			
 
				+                pred = next(self.doc_ori_classify_model(image_array))
			
 
				+                angle = int(pred["label_names"][0])
			
 
				+                rot_img = self.rotate_image(image_array, angle)
			
 
				+            else:
			
 
				+                angle = -1
			
 
				+                rot_img = image_array
			
 
				+
			
 
				+            if input_params['use_doc_unwarping']:
			
 
				+                output_img = next(self.doc_unwarping_model(rot_img))['doctr_img']
			
 
				+            else:
			
 
				+                output_img = rot_img
			
 
				+
			
 
				+            single_img_res = {"input_image":image_array,
			
 
				+                "input_params":input_params,
			
 
				+                "angle":angle, 
			
 
				+                "rot_img":rot_img, 
			
 
				+                "output_img":output_img,
			
 
				+                "img_id":img_id}
			
 
				+            img_id += 1
			
 
				+            yield DocPreprocessorResult(single_img_res)
			
--- a/paddlex/inference/pipelines_new/doc_preprocessor/result.py
+++ b/paddlex/inference/pipelines_new/doc_preprocessor/result.py
@@ -0,0 +1,51 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import math
			
 
				+import random
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+import PIL
			
 
				+from PIL import Image, ImageDraw, ImageFont
			
 
				+
			
 
				+from ....utils.fonts import PINGFANG_FONT_FILE_PATH, create_font
			
 
				+from ..components import CVResult
			
 
				+
			
 
				+class DocPreprocessorResult(CVResult):
			
 
				+
			
 
				+    def save_to_img(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).lower().endswith((".jpg", ".png")):
			
 
				+            img_id = self["img_id"]
			
 
				+            save_path = save_path + "/res_doc_preprocess_%d.jpg" % img_id
			
 
				+        super().save_to_img(save_path, *args, **kwargs)
			
 
				+
			
 
				+    def _to_img(self):
			
 
				+        """draw doc preprocess result"""
			
 
				+        image = self["input_image"]
			
 
				+        angle = self["angle"]
			
 
				+        rot_img = self["rot_img"]
			
 
				+        output_img = self["output_img"]
			
 
				+        h, w = image.shape[0:2]
			
 
				+        img_show = Image.new("RGB", (w * 3, h + 25), (255, 255, 255))
			
 
				+        img_show.paste(Image.fromarray(image), (0, 0, w, h))
			
 
				+        img_show.paste(Image.fromarray(rot_img), (w, 0, w * 2, h))
			
 
				+        img_show.paste(Image.fromarray(output_img), (w * 2, 0, w * 3, h))
			
 
				+
			
 
				+        draw_text = ImageDraw.Draw(img_show)
			
 
				+        txt_list = ["Original Image", "Rotated Image", "Unwarping Image"]
			
 
				+        for tno in range(len(txt_list)):
			
 
				+            txt = txt_list[tno]
			
 
				+            font = create_font(txt, (w, 20), PINGFANG_FONT_FILE_PATH)
			
 
				+            draw_text.text([10 + w * tno, h + 2], txt, fill=(0, 0, 0), font=font)
			
 
				+        return img_show
			
--- a/paddlex/inference/pipelines_new/layout_parsing/__init__.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .pipeline import LayoutParsingPipeline
			
--- a/paddlex/inference/pipelines_new/layout_parsing/pipeline.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/pipeline.py
@@ -0,0 +1,205 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from ..base import BasePipeline
			
 
				+from typing import Any, Dict, Optional
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+from ..components import CropByBoxes
			
 
				+from .utils import convert_points_to_boxes, get_sub_regions_ocr_res
			
 
				+from .table_recognition_post_processing import get_table_recognition_res
			
 
				+
			
 
				+from .result import LayoutParsingResult
			
 
				+
			
 
				+########## [TODO]后续需要更新路径
			
 
				+from ...components.transforms import ReadImage
			
 
				+
			
 
				+class LayoutParsingPipeline(BasePipeline):
			
 
				+    """Layout Parsing Pipeline"""
			
 
				+
			
 
				+    entities = "layout_parsing"
			
 
				+    def __init__(self,
			
 
				+        config,        
			
 
				+        device=None,
			
 
				+        pp_option=None, 
			
 
				+        use_hpip: bool = False,
			
 
				+        hpi_params: Optional[Dict[str, Any]] = None):
			
 
				+        super().__init__(device=device, pp_option=pp_option, 
			
 
				+            use_hpip=use_hpip, hpi_params=hpi_params)
			
 
				+        
			
 
				+        self.inintial_predictor(config)
			
 
				+
			
 
				+        self.img_reader = ReadImage(format="BGR")
			
 
				+
			
 
				+        self._crop_by_boxes = CropByBoxes()
			
 
				+        
			
 
				+
			
 
				+    def inintial_predictor(self, config):
			
 
				+        layout_det_config = config['SubModules']["LayoutDetection"]
			
 
				+        self.layout_det_model = self.create_model(layout_det_config)
			
 
				+
			
 
				+        self.use_doc_preprocessor = False
			
 
				+        if 'use_doc_preprocessor' in config:
			
 
				+            self.use_doc_preprocessor = config['use_doc_preprocessor']
			
 
				+        if self.use_doc_preprocessor:
			
 
				+            doc_preprocessor_config = config['SubPipelines']['DocPreprocessor']
			
 
				+            self.doc_preprocessor_pipeline = self.create_pipeline(doc_preprocessor_config)
			
 
				+        
			
 
				+        self.use_common_ocr = False
			
 
				+        if "use_common_ocr" in config:
			
 
				+            self.use_common_ocr = config['use_common_ocr']
			
 
				+        if self.use_common_ocr:
			
 
				+            common_ocr_config = config['SubPipelines']['CommonOCR']
			
 
				+            self.common_ocr_pipeline = self.create_pipeline(common_ocr_config)
			
 
				+        
			
 
				+        self.use_seal_recognition = False
			
 
				+        if "use_seal_recognition" in config:
			
 
				+            self.use_seal_recognition = config['use_seal_recognition']
			
 
				+        if self.use_seal_recognition:
			
 
				+            seal_ocr_config = config['SubPipelines']['SealOCR']
			
 
				+            self.seal_ocr_pipeline = self.create_pipeline(seal_ocr_config)            
			
 
				+        
			
 
				+        self.use_table_recognition = False
			
 
				+        if "use_table_recognition" in config:
			
 
				+            self.use_table_recognition = config['use_table_recognition']
			
 
				+        if self.use_table_recognition:
			
 
				+            table_structure_config = config['SubModules']['TableStructurePredictor']
			
 
				+            self.table_structure_model = self.create_model(table_structure_config)
			
 
				+            if not self.use_common_ocr:
			
 
				+                common_ocr_config = config['SubPipelines']['OCR']
			
 
				+                self.common_ocr_pipeline = self.create_pipeline(common_ocr_config)
			
 
				+        return 
			
 
				+
			
 
				+    def get_text_paragraphs_ocr_res(self, overall_ocr_res, layout_det_res):
			
 
				+        '''get ocr res of the text paragraphs'''
			
 
				+        object_boxes = []
			
 
				+        for box_info in layout_det_res['boxes']:
			
 
				+            if box_info['label'].lower() in ['image', 'formula', 'table', 'seal']:
			
 
				+                object_boxes.append(box_info['coordinate'])
			
 
				+        object_boxes = np.array(object_boxes)
			
 
				+        return get_sub_regions_ocr_res(overall_ocr_res, object_boxes, flag_within=False)
			
 
				+
			
 
				+    def check_input_params(self, input_params):
			
 
				+
			
 
				+        if input_params['use_doc_preprocessor'] and not self.use_doc_preprocessor:
			
 
				+            raise ValueError("The models for doc preprocessor are not initialized.")
			
 
				+
			
 
				+        if input_params['use_common_ocr'] and not self.use_common_ocr:
			
 
				+            raise ValueError("The models for common OCR are not initialized.")
			
 
				+
			
 
				+        if input_params['use_seal_recognition'] and not self.use_seal_recognition:
			
 
				+            raise ValueError("The models for seal recognition are not initialized.")
			
 
				+
			
 
				+        if input_params['use_table_recognition'] and not self.use_table_recognition:
			
 
				+            raise ValueError("The models for table recognition are not initialized.")
			
 
				+
			
 
				+        return
			
 
				+
			
 
				+    def predict(self, input, 
			
 
				+        use_doc_orientation_classify=True,
			
 
				+        use_doc_unwarping=True,
			
 
				+        use_common_ocr=True,
			
 
				+        use_seal_recognition=True,
			
 
				+        use_table_recognition=True,
			
 
				+        **kwargs):
			
 
				+
			
 
				+        if not isinstance(input, list):
			
 
				+            input_list = [input]
			
 
				+        else:
			
 
				+            input_list = input
			
 
				+        
			
 
				+        input_params = {"use_doc_preprocessor":self.use_doc_preprocessor,
			
 
				+            "use_doc_orientation_classify":use_doc_orientation_classify,
			
 
				+            "use_doc_unwarping":use_doc_unwarping,
			
 
				+            "use_common_ocr":use_common_ocr,
			
 
				+            "use_seal_recognition":use_seal_recognition,
			
 
				+            "use_table_recognition":use_table_recognition}
			
 
				+            
			
 
				+        if use_doc_orientation_classify or use_doc_unwarping:
			
 
				+            input_params['use_doc_preprocessor'] = True
			
 
				+
			
 
				+        self.check_input_params(input_params)
			
 
				+
			
 
				+        img_id = 1
			
 
				+        for input in input_list:
			
 
				+            if isinstance(input, str):
			
 
				+                image_array = next(self.img_reader(input))[0]['img']
			
 
				+            else:
			
 
				+                image_array = input
			
 
				+
			
 
				+            assert len(image_array.shape) == 3
			
 
				+
			
 
				+            if input_params['use_doc_preprocessor']:
			
 
				+                doc_preprocessor_res = next(self.doc_preprocessor_pipeline(
			
 
				+                    image_array, 
			
 
				+                    use_doc_orientation_classify=use_doc_orientation_classify,
			
 
				+                    use_doc_unwarping=use_doc_unwarping))
			
 
				+                doc_preprocessor_image = doc_preprocessor_res['output_img']
			
 
				+                doc_preprocessor_res['img_id'] = img_id
			
 
				+            else:
			
 
				+                doc_preprocessor_res = {}
			
 
				+                doc_preprocessor_image = image_array
			
 
				+            
			
 
				+
			
 
				+            ########## [TODO]RT-DETR 检测结果有重复
			
 
				+            layout_det_res = next(self.layout_det_model(doc_preprocessor_image))
			
 
				+
			
 
				+            if input_params['use_common_ocr'] or input_params['use_table_recognition']:
			
 
				+                overall_ocr_res = next(self.common_ocr_pipeline(doc_preprocessor_image))
			
 
				+                overall_ocr_res['img_id'] = img_id
			
 
				+                dt_boxes = convert_points_to_boxes(overall_ocr_res['dt_polys'])
			
 
				+                overall_ocr_res['dt_boxes'] = dt_boxes
			
 
				+            else:
			
 
				+                overall_ocr_res = {}
			
 
				+            
			
 
				+            text_paragraphs_ocr_res = {}
			
 
				+            if input_params['use_common_ocr']:
			
 
				+                text_paragraphs_ocr_res = self.get_text_paragraphs_ocr_res(
			
 
				+                    overall_ocr_res, layout_det_res)
			
 
				+                text_paragraphs_ocr_res['img_id'] = img_id
			
 
				+            
			
 
				+            table_res_list = []
			
 
				+            if input_params['use_table_recognition']:
			
 
				+                table_region_id = 1
			
 
				+                for box_info in layout_det_res['boxes']:
			
 
				+                    if box_info['label'].lower() in ['table']:
			
 
				+                        crop_img_info = self._crop_by_boxes(doc_preprocessor_image, [box_info])
			
 
				+                        crop_img_info = crop_img_info[0]
			
 
				+                        table_structure_pred = next(self.table_structure_model(
			
 
				+                            crop_img_info['img']))
			
 
				+                        table_recognition_res = get_table_recognition_res(
			
 
				+                            crop_img_info, table_structure_pred, overall_ocr_res)
			
 
				+                        table_recognition_res['table_region_id'] = table_region_id
			
 
				+                        table_region_id += 1
			
 
				+                        table_res_list.append(table_recognition_res)
			
 
				+            
			
 
				+            seal_res_list = []
			
 
				+            if input_params['use_seal_recognition']:
			
 
				+                seal_region_id = 1
			
 
				+                for box_info in layout_det_res['boxes']:
			
 
				+                    if box_info['label'].lower() in ['seal']:
			
 
				+                        crop_img_info = self._crop_by_boxes(doc_preprocessor_image, [box_info])
			
 
				+                        crop_img_info = crop_img_info[0]
			
 
				+                        seal_ocr_res = next(self.seal_ocr_pipeline(crop_img_info['img']))
			
 
				+                        seal_ocr_res['seal_region_id'] = seal_region_id
			
 
				+                        seal_region_id += 1
			
 
				+                        seal_res_list.append(seal_ocr_res)
			
 
				+            
			
 
				+            single_img_res = {"layout_det_res":layout_det_res,
			
 
				+                "doc_preprocessor_res":doc_preprocessor_res,
			
 
				+                "text_paragraphs_ocr_res":text_paragraphs_ocr_res,
			
 
				+                "table_res_list":table_res_list,
			
 
				+                "seal_res_list":seal_res_list,
			
 
				+                "input_params":input_params}
			
 
				+            yield LayoutParsingResult(single_img_res)
			
--- a/paddlex/inference/pipelines_new/layout_parsing/result.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/result.py
@@ -0,0 +1,97 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import math
			
 
				+import random
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+import PIL
			
 
				+import os
			
 
				+from PIL import Image, ImageDraw, ImageFont
			
 
				+
			
 
				+from ....utils.fonts import PINGFANG_FONT_FILE_PATH
			
 
				+from ..components import CVResult, HtmlMixin, XlsxMixin
			
 
				+
			
 
				+class TableRecognitionResult(CVResult, HtmlMixin, XlsxMixin):
			
 
				+    def __init__(self, data):
			
 
				+        super().__init__(data)
			
 
				+        HtmlMixin.__init__(self)
			
 
				+        XlsxMixin.__init__(self)
			
 
				+
			
 
				+    def save_to_html(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).lower().endswith(".html"):
			
 
				+            save_path = save_path + "/res_table_%d.html" % self['table_region_id']
			
 
				+        super().save_to_html(save_path, *args, **kwargs)
			
 
				+
			
 
				+    def _to_html(self):
			
 
				+        return self["pred_html"]
			
 
				+
			
 
				+    def save_to_xlsx(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).lower().endswith(".xlsx"):
			
 
				+            save_path = save_path + "/res_table_%d.xlsx" % self['table_region_id']
			
 
				+        super().save_to_xlsx(save_path, *args, **kwargs)
			
 
				+
			
 
				+    def _to_xlsx(self):
			
 
				+        return self["pred_html"]
			
 
				+
			
 
				+    def save_to_img(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).lower().endswith((".jpg", ".png")):
			
 
				+            ocr_save_path = save_path + "/res_table_ocr_%d.jpg" % self['table_region_id']
			
 
				+            save_path = save_path + "/res_table_cell_%d.jpg" % self['table_region_id']
			
 
				+        self['table_ocr_pred'].save_to_img(ocr_save_path)
			
 
				+        super().save_to_img(save_path, *args, **kwargs)
			
 
				+
			
 
				+    def _to_img(self):
			
 
				+        input_img = self['table_ocr_pred']['input_img'].copy()
			
 
				+        cell_box_list = self['cell_box_list']
			
 
				+        for box in cell_box_list:
			
 
				+            x1, y1, x2, y2 = [int(pos) for pos in box]
			
 
				+            cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
			
 
				+        return input_img
			
 
				+
			
 
				+class LayoutParsingResult(dict):
			
 
				+    def __init__(self, data):
			
 
				+        super().__init__(data)
			
 
				+    
			
 
				+    def save_results(self, save_path):
			
 
				+        if not os.path.isdir(save_path):
			
 
				+            raise ValueError("The save path should be a dir.")
			
 
				+
			
 
				+        layout_det_res = self['layout_det_res']
			
 
				+        save_img_path = save_path + "/layout_det_result.jpg"
			
 
				+        layout_det_res.save_to_img(save_img_path)
			
 
				+
			
 
				+        input_params = self['input_params']
			
 
				+        if input_params['use_doc_preprocessor']:
			
 
				+            save_img_path = save_path + "/doc_preprocessor_result.jpg"
			
 
				+            self['doc_preprocessor_res'].save_to_img(save_img_path)
			
 
				+        
			
 
				+        if input_params['use_common_ocr']:
			
 
				+            save_img_path = save_path + "/text_paragraphs_ocr_result.jpg"
			
 
				+            self['text_paragraphs_ocr_res'].save_to_img(save_img_path)
			
 
				+
			
 
				+        if input_params['use_table_recognition']:
			
 
				+            for tno in range(len(self['table_res_list'])):
			
 
				+                table_res = self['table_res_list'][tno]
			
 
				+                table_res.save_to_img(save_path)
			
 
				+                table_res.save_to_html(save_path)
			
 
				+                table_res.save_to_xlsx(save_path)
			
 
				+        
			
 
				+        if input_params['use_seal_recognition']:
			
 
				+            for sno in range(len(self['seal_res_list'])):
			
 
				+                seal_res = self['seal_res_list'][sno]
			
 
				+                save_img_path = save_path + "/seal_%d_recognition_result.jpg" % seal_res['seal_region_id']
			
 
				+                seal_res.save_to_img(save_img_path)          
			
 
				+        return
			
 
				+
			
--- a/paddlex/inference/pipelines_new/layout_parsing/table_recognition_post_processing.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/table_recognition_post_processing.py
@@ -0,0 +1,203 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .utils import convert_points_to_boxes, get_sub_regions_ocr_res
			
 
				+import numpy as np
			
 
				+from .result import TableRecognitionResult
			
 
				+
			
 
				+def get_ori_image_coordinate(x, y, box_list):
			
 
				+    """
			
 
				+    get the original coordinate from Cropped image to Original image.
			
 
				+    Args:
			
 
				+        x (int): x coordinate of cropped image
			
 
				+        y (int): y coordinate of cropped image
			
 
				+        box_list (list): list of table bounding boxes, eg. [[x1, y1, x2, y2, x3, y3, x4, y4]]
			
 
				+    Returns:
			
 
				+        list: list of original coordinates, eg. [[x1, y1, x2, y2, x3, y3, x4, y4]]
			
 
				+    """
			
 
				+    if not box_list:
			
 
				+        return box_list
			
 
				+    offset = np.array([x, y] * 4)
			
 
				+    box_list = np.array(box_list)
			
 
				+    if box_list.shape[-1] == 2:
			
 
				+        offset = offset.reshape(4, 2)
			
 
				+    ori_box_list = offset + box_list
			
 
				+    return ori_box_list
			
 
				+
			
 
				+def convert_table_structure_pred_bbox(table_structure_pred, 
			
 
				+    crop_start_point, img_shape):
			
 
				+
			
 
				+    cell_points_list = table_structure_pred['bbox']
			
 
				+    ori_cell_points_list = get_ori_image_coordinate(crop_start_point[0], 
			
 
				+        crop_start_point[1], cell_points_list)
			
 
				+    ori_cell_points_list = np.reshape(ori_cell_points_list, (-1, 4, 2))
			
 
				+    cell_box_list = convert_points_to_boxes(ori_cell_points_list)
			
 
				+    img_height, img_width = img_shape
			
 
				+    cell_box_list = np.clip(cell_box_list, 0, 
			
 
				+        [img_width, img_height, img_width, img_height])
			
 
				+    table_structure_pred['cell_box_list'] = cell_box_list
			
 
				+    return 
			
 
				+
			
 
				+def distance(box_1, box_2):
			
 
				+    """
			
 
				+    compute the distance between two boxes
			
 
				+
			
 
				+    Args:
			
 
				+        box_1 (list): first rectangle box,eg.(x1, y1, x2, y2)
			
 
				+        box_2 (list): second rectangle box,eg.(x1, y1, x2, y2)
			
 
				+
			
 
				+    Returns:
			
 
				+        int: the distance between two boxes
			
 
				+    """
			
 
				+    x1, y1, x2, y2 = box_1
			
 
				+    x3, y3, x4, y4 = box_2
			
 
				+    dis = abs(x3 - x1) + abs(y3 - y1) + abs(x4 - x2) + abs(y4 - y2)
			
 
				+    dis_2 = abs(x3 - x1) + abs(y3 - y1)
			
 
				+    dis_3 = abs(x4 - x2) + abs(y4 - y2)
			
 
				+    return dis + min(dis_2, dis_3)
			
 
				+
			
 
				+def compute_iou(rec1, rec2):
			
 
				+    """
			
 
				+    computing IoU
			
 
				+    Args:
			
 
				+        rec1 (list): (x1, y1, x2, y2)
			
 
				+        rec2 (list): (x1, y1, x2, y2)
			
 
				+    Returns:
			
 
				+        float: Intersection over Union
			
 
				+    """
			
 
				+    # computing area of each rectangles
			
 
				+    S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
			
 
				+    S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
			
 
				+
			
 
				+    # computing the sum_area
			
 
				+    sum_area = S_rec1 + S_rec2
			
 
				+
			
 
				+    # find the each edge of intersect rectangle
			
 
				+    left_line = max(rec1[0], rec2[0])
			
 
				+    right_line = min(rec1[2], rec2[2])
			
 
				+    top_line = max(rec1[1], rec2[1])
			
 
				+    bottom_line = min(rec1[3], rec2[3])
			
 
				+
			
 
				+    # judge if there is an intersect
			
 
				+    if left_line >= right_line or top_line >= bottom_line:
			
 
				+        return 0.0
			
 
				+    else:
			
 
				+        intersect = (right_line - left_line) * (bottom_line - top_line)
			
 
				+        return (intersect / (sum_area - intersect)) * 1.0
			
 
				+
			
 
				+def match_table_and_ocr(cell_box_list, ocr_dt_boxes):
			
 
				+    """
			
 
				+    match table and ocr
			
 
				+
			
 
				+    Args:
			
 
				+        cell_box_list (list): bbox for table cell, 2 points, [left, top, right, bottom]
			
 
				+        ocr_dt_boxes (list): bbox for ocr, 2 points, [left, top, right, bottom]
			
 
				+
			
 
				+    Returns:
			
 
				+        dict: matched dict, key is table index, value is ocr index
			
 
				+    """
			
 
				+    matched = {}
			
 
				+    for i, ocr_box in enumerate(np.array(ocr_dt_boxes)):
			
 
				+        ocr_box = ocr_box.astype(np.float32)
			
 
				+        distances = []
			
 
				+        for j, table_box in enumerate(cell_box_list):
			
 
				+            distances.append((distance(table_box, ocr_box), 
			
 
				+                1.0 - compute_iou(table_box, ocr_box)))  # compute iou and l1 distance
			
 
				+        sorted_distances = distances.copy()
			
 
				+        # select det box by iou and l1 distance
			
 
				+        sorted_distances = sorted(
			
 
				+            sorted_distances, key=lambda item: (item[1], item[0]))
			
 
				+        if distances.index(sorted_distances[0]) not in matched.keys():
			
 
				+            matched[distances.index(sorted_distances[0])] = [i]
			
 
				+        else:
			
 
				+            matched[distances.index(sorted_distances[0])].append(i)
			
 
				+    return matched
			
 
				+
			
 
				+def get_html_result(matched_index, ocr_contents, pred_structures):
			
 
				+    pred_html = []
			
 
				+    td_index = 0
			
 
				+    head_structure = pred_structures[0:3]
			
 
				+    html = "".join(head_structure)
			
 
				+    table_structure = pred_structures[3:-3]
			
 
				+    for tag in table_structure:
			
 
				+        if "</td>" in tag:
			
 
				+            if "<td></td>" == tag:
			
 
				+                pred_html.extend("<td>")
			
 
				+            if td_index in matched_index.keys():
			
 
				+                b_with = False
			
 
				+                if (
			
 
				+                    "<b>" in ocr_contents[matched_index[td_index][0]]
			
 
				+                    and len(matched_index[td_index]) > 1
			
 
				+                ):
			
 
				+                    b_with = True
			
 
				+                    pred_html.extend("<b>")
			
 
				+                for i, td_index_index in enumerate(matched_index[td_index]):
			
 
				+                    content = ocr_contents[td_index_index]
			
 
				+                    if len(matched_index[td_index]) > 1:
			
 
				+                        if len(content) == 0:
			
 
				+                            continue
			
 
				+                        if content[0] == " ":
			
 
				+                            content = content[1:]
			
 
				+                        if "<b>" in content:
			
 
				+                            content = content[3:]
			
 
				+                        if "</b>" in content:
			
 
				+                            content = content[:-4]
			
 
				+                        if len(content) == 0:
			
 
				+                            continue
			
 
				+                        if (
			
 
				+                            i != len(matched_index[td_index]) - 1
			
 
				+                            and " " != content[-1]
			
 
				+                        ):
			
 
				+                            content += " "
			
 
				+                    pred_html.extend(content)
			
 
				+                if b_with:
			
 
				+                    pred_html.extend("</b>")
			
 
				+            if "<td></td>" == tag:
			
 
				+                pred_html.append("</td>")
			
 
				+            else:
			
 
				+                pred_html.append(tag)
			
 
				+            td_index += 1
			
 
				+        else:
			
 
				+            pred_html.append(tag)
			
 
				+    html += "".join(pred_html)
			
 
				+    end_structure = pred_structures[-3:]
			
 
				+    html += "".join(end_structure)
			
 
				+    return html
			
 
				+
			
 
				+def get_table_recognition_res(crop_img_info, table_structure_pred, overall_ocr_res):
			
 
				+    '''get_table_recognition_res'''
			
 
				+
			
 
				+    table_box = np.array([crop_img_info['box']])
			
 
				+    table_ocr_pred = get_sub_regions_ocr_res(overall_ocr_res, table_box)
			
 
				+
			
 
				+    crop_start_point = [table_box[0][0], table_box[0][1]]
			
 
				+    img_shape = overall_ocr_res['input_img'].shape[0:2]
			
 
				+
			
 
				+    convert_table_structure_pred_bbox(table_structure_pred, 
			
 
				+        crop_start_point, img_shape)
			
 
				+    
			
 
				+    structures = table_structure_pred["structure"]
			
 
				+    cell_box_list = table_structure_pred["cell_box_list"]
			
 
				+    ocr_dt_boxes = table_ocr_pred["dt_boxes"]
			
 
				+    ocr_text_res = table_ocr_pred["rec_text"]
			
 
				+
			
 
				+    matched_index = match_table_and_ocr(cell_box_list, ocr_dt_boxes)
			
 
				+    pred_html = get_html_result(matched_index, ocr_text_res, structures)
			
 
				+
			
 
				+    single_img_res = {"cell_box_list":cell_box_list, 
			
 
				+        "table_ocr_pred":table_ocr_pred,
			
 
				+        "pred_html":pred_html}
			
 
				+    return TableRecognitionResult(single_img_res)
			
 
				+
			
 
				+
			
--- a/paddlex/inference/pipelines_new/layout_parsing/utils.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/utils.py
@@ -0,0 +1,87 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+__all__ = [
			
 
				+    "convert_points_to_boxes",
			
 
				+    "get_sub_regions_ocr_res"
			
 
				+]
			
 
				+
			
 
				+import numpy as np
			
 
				+import copy
			
 
				+
			
 
				+def convert_points_to_boxes(dt_polys):
			
 
				+    if len(dt_polys) > 0:
			
 
				+        dt_polys_tmp = dt_polys.copy()
			
 
				+        dt_polys_tmp = np.array(dt_polys_tmp)
			
 
				+        boxes_left = np.min(dt_polys_tmp[:, :, 0], axis=1)
			
 
				+        boxes_right = np.max(dt_polys_tmp[:, :, 0], axis=1)
			
 
				+        boxes_top = np.min(dt_polys_tmp[:, :, 1], axis=1)
			
 
				+        boxes_bottom = np.max(dt_polys_tmp[:, :, 1], axis=1)
			
 
				+        dt_boxes = np.array([boxes_left, boxes_top, boxes_right, boxes_bottom])
			
 
				+        dt_boxes = dt_boxes.T
			
 
				+    else:
			
 
				+        dt_boxes = np.array([])
			
 
				+    return dt_boxes
			
 
				+
			
 
				+def get_overlap_boxes_idx(src_boxes, ref_boxes):
			
 
				+    '''get overlap boxes idx''' 
			
 
				+    match_idx_list = []
			
 
				+    src_boxes_num = len(src_boxes)
			
 
				+    if src_boxes_num > 0 and len(ref_boxes) > 0:
			
 
				+        for rno in range(len(ref_boxes)):
			
 
				+            ref_box = ref_boxes[rno]
			
 
				+            x1 = np.maximum(ref_box[0], src_boxes[:, 0])
			
 
				+            y1 = np.maximum(ref_box[1], src_boxes[:, 1])
			
 
				+            x2 = np.minimum(ref_box[2], src_boxes[:, 2])
			
 
				+            y2 = np.minimum(ref_box[3], src_boxes[:, 3])
			
 
				+            pub_w = x2 - x1
			
 
				+            pub_h = y2 - y1
			
 
				+            match_idx = np.where((pub_w > 3) & (pub_h > 3))[0]
			
 
				+            match_idx_list.extend(match_idx)                                   
			
 
				+    return match_idx_list
			
 
				+
			
 
				+def get_sub_regions_ocr_res(overall_ocr_res, object_boxes, flag_within=True):
			
 
				+    """
			
 
				+    :param flag_within: True (within the object regions), False (outside the object regions)
			
 
				+    :return:
			
 
				+    """
			
 
				+
			
 
				+    sub_regions_ocr_res = copy.deepcopy(overall_ocr_res)
			
 
				+    sub_regions_ocr_res['input_img'] = overall_ocr_res['input_img']
			
 
				+    sub_regions_ocr_res['img_id'] = -1
			
 
				+    sub_regions_ocr_res['dt_polys'] = []
			
 
				+    sub_regions_ocr_res['rec_text'] = []
			
 
				+    sub_regions_ocr_res['rec_score'] = []
			
 
				+    sub_regions_ocr_res['dt_boxes'] = []
			
 
				+
			
 
				+    overall_text_boxes = overall_ocr_res['dt_boxes']
			
 
				+    match_idx_list = get_overlap_boxes_idx(overall_text_boxes, object_boxes)
			
 
				+    match_idx_list = list(set(match_idx_list))
			
 
				+    for box_no in range(len(overall_text_boxes)):
			
 
				+        if flag_within:
			
 
				+            if box_no in match_idx_list:
			
 
				+                flag_match = True
			
 
				+            else:
			
 
				+                flag_match = False
			
 
				+        else:
			
 
				+            if box_no not in match_idx_list:
			
 
				+                flag_match = True
			
 
				+            else:
			
 
				+                flag_match = False
			
 
				+        if flag_match:
			
 
				+            sub_regions_ocr_res['dt_polys'].append(overall_ocr_res['dt_polys'][box_no])
			
 
				+            sub_regions_ocr_res['rec_text'].append(overall_ocr_res['rec_text'][box_no])
			
 
				+            sub_regions_ocr_res['rec_score'].append(overall_ocr_res['rec_score'][box_no])
			
 
				+            sub_regions_ocr_res['dt_boxes'].append(overall_ocr_res['dt_boxes'][box_no])
			
 
				+    return sub_regions_ocr_res
			
--- a/paddlex/inference/pipelines_new/ocr/__init__.py
+++ b/paddlex/inference/pipelines_new/ocr/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .pipeline import OCRPipeline
			
--- a/paddlex/inference/pipelines_new/ocr/pipeline.py
+++ b/paddlex/inference/pipelines_new/ocr/pipeline.py
@@ -0,0 +1,96 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from ..base import BasePipeline
			
 
				+from typing import Any, Dict, Optional
			
 
				+from ..components import SortQuadBoxes, CropByPolys
			
 
				+from .result import OCRResult
			
 
				+
			
 
				+########## [TODO]后续需要更新路径
			
 
				+from ...components.transforms import ReadImage
			
 
				+
			
 
				+class OCRPipeline(BasePipeline):
			
 
				+    """OCR Pipeline"""
			
 
				+
			
 
				+    entities = "OCR"
			
 
				+    def __init__(self,
			
 
				+        config,        
			
 
				+        device=None,
			
 
				+        pp_option=None, 
			
 
				+        use_hpip: bool = False,
			
 
				+        hpi_params: Optional[Dict[str, Any]] = None):
			
 
				+        super().__init__(device=device, pp_option=pp_option, 
			
 
				+            use_hpip=use_hpip, hpi_params=hpi_params)
			
 
				+        
			
 
				+        text_det_model_config = config['SubModules']["TextDetection"]
			
 
				+        self.text_det_model = self.create_model(text_det_model_config)
			
 
				+
			
 
				+        text_rec_model_config = config['SubModules']["TextRecognition"]
			
 
				+        self.text_rec_model = self.create_model(text_rec_model_config)
			
 
				+
			
 
				+        self.text_type = config['text_type']
			
 
				+
			
 
				+        self._sort_quad_boxes = SortQuadBoxes()
			
 
				+
			
 
				+        if self.text_type == "common":
			
 
				+            self._crop_by_polys = CropByPolys(det_box_type = "quad")
			
 
				+        elif self.text_type == "seal":
			
 
				+            self._crop_by_polys = CropByPolys(det_box_type = "poly")
			
 
				+        else:
			
 
				+            raise ValueError("Unsupported text type {}".format(self.text_type))
			
 
				+
			
 
				+        self.img_reader = ReadImage(format="BGR")
			
 
				+
			
 
				+    def predict(self, input, **kwargs):
			
 
				+        if not isinstance(input, list):
			
 
				+            input_list = [input]
			
 
				+        else:
			
 
				+            input_list = input
			
 
				+        img_id = 1
			
 
				+        for input in input_list:
			
 
				+            if isinstance(input, str):
			
 
				+                image_array = next(self.img_reader(input))[0]['img']
			
 
				+            else:
			
 
				+                image_array = input
			
 
				+
			
 
				+            assert len(image_array.shape) == 3
			
 
				+
			
 
				+            det_res = next(self.text_det_model(image_array))
			
 
				+
			
 
				+            dt_polys = det_res['dt_polys']
			
 
				+            dt_scores = det_res['dt_scores']
			
 
				+
			
 
				+            ########## [TODO]需要确认检测模块和识别模块过滤阈值等情况
			
 
				+
			
 
				+            if self.text_type == "common":
			
 
				+                dt_polys = self._sort_quad_boxes(dt_polys)
			
 
				+                
			
 
				+            single_img_res = {'input_img':image_array, 'dt_polys':dt_polys, \
			
 
				+                "img_id":img_id, "text_type":self.text_type}
			
 
				+            img_id += 1
			
 
				+            single_img_res["rec_text"] = []
			
 
				+            single_img_res["rec_score"] = []
			
 
				+            if len(dt_polys) > 0:
			
 
				+                all_subs_of_img = list(self._crop_by_polys(image_array, dt_polys))
			
 
				+
			
 
				+                ########## [TODO]updata in future
			
 
				+                for sub_img in all_subs_of_img:
			
 
				+                    sub_img['input'] = sub_img['img']
			
 
				+                ##########
			
 
				+
			
 
				+                for rec_res in self.text_rec_model(all_subs_of_img):
			
 
				+                    single_img_res["rec_text"].append(rec_res["rec_text"])
			
 
				+                    single_img_res["rec_score"].append(rec_res["rec_score"])
			
 
				+
			
 
				+            yield OCRResult(single_img_res)
			
--- a/paddlex/inference/pipelines_new/ocr/result.py
+++ b/paddlex/inference/pipelines_new/ocr/result.py
@@ -0,0 +1,160 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import math
			
 
				+import random
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+import PIL
			
 
				+from PIL import Image, ImageDraw, ImageFont
			
 
				+
			
 
				+from ....utils.fonts import PINGFANG_FONT_FILE_PATH
			
 
				+from ..components import CVResult
			
 
				+
			
 
				+class OCRResult(CVResult):
			
 
				+    def save_to_img(self, save_path, *args, **kwargs):
			
 
				+        if not str(save_path).lower().endswith((".jpg", ".png")):
			
 
				+            img_id = self["img_id"]
			
 
				+            save_path = save_path + "/res_ocr_%d.jpg" % img_id
			
 
				+        super().save_to_img(save_path, *args, **kwargs)
			
 
				+
			
 
				+    def get_minarea_rect(self, points):
			
 
				+        bounding_box = cv2.minAreaRect(points)
			
 
				+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
			
 
				+
			
 
				+        index_a, index_b, index_c, index_d = 0, 1, 2, 3
			
 
				+        if points[1][1] > points[0][1]:
			
 
				+            index_a = 0
			
 
				+            index_d = 1
			
 
				+        else:
			
 
				+            index_a = 1
			
 
				+            index_d = 0
			
 
				+        if points[3][1] > points[2][1]:
			
 
				+            index_b = 2
			
 
				+            index_c = 3
			
 
				+        else:
			
 
				+            index_b = 3
			
 
				+            index_c = 2
			
 
				+
			
 
				+        box = np.array(
			
 
				+            [points[index_a], points[index_b], points[index_c], points[index_d]]
			
 
				+        ).astype(np.int32)
			
 
				+
			
 
				+        return box
			
 
				+
			
 
				+    def _to_img(self):
			
 
				+        """draw ocr result"""
			
 
				+        # TODO(gaotingquan): mv to postprocess
			
 
				+        drop_score = 0.5
			
 
				+
			
 
				+        boxes = self["dt_polys"]
			
 
				+        txts = self["rec_text"]
			
 
				+        scores = self["rec_score"]
			
 
				+        image = self['input_img']
			
 
				+        h, w = image.shape[0:2]
			
 
				+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
			
 
				+        img_left = Image.fromarray(image_rgb)
			
 
				+        img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
			
 
				+        random.seed(0)
			
 
				+        draw_left = ImageDraw.Draw(img_left)
			
 
				+        if txts is None or len(txts) != len(boxes):
			
 
				+            txts = [None] * len(boxes)
			
 
				+        for idx, (box, txt) in enumerate(zip(boxes, txts)):
			
 
				+            try:
			
 
				+                if scores is not None and scores[idx] < drop_score:
			
 
				+                    continue
			
 
				+                color = (
			
 
				+                    random.randint(0, 255),
			
 
				+                    random.randint(0, 255),
			
 
				+                    random.randint(0, 255),
			
 
				+                )
			
 
				+                box = np.array(box)
			
 
				+                if len(box) > 4:
			
 
				+                    pts = [(x, y) for x, y in box.tolist()]
			
 
				+                    draw_left.polygon(pts, outline=color, width=8)
			
 
				+                    box = self.get_minarea_rect(box)
			
 
				+                    height = int(0.5 * (max(box[:, 1]) - min(box[:, 1])))
			
 
				+                    box[:2, 1] = np.mean(box[:, 1])
			
 
				+                    box[2:, 1] = np.mean(box[:, 1]) + min(20, height)
			
 
				+                draw_left.polygon(box, fill=color)
			
 
				+                img_right_text = draw_box_txt_fine(
			
 
				+                    (w, h), box, txt, PINGFANG_FONT_FILE_PATH
			
 
				+                )
			
 
				+                pts = np.array(box, np.int32).reshape((-1, 1, 2))
			
 
				+                cv2.polylines(img_right_text, [pts], True, color, 1)
			
 
				+                img_right = cv2.bitwise_and(img_right, img_right_text)
			
 
				+            except:
			
 
				+                continue
			
 
				+
			
 
				+        img_left = Image.blend(Image.fromarray(image_rgb), img_left, 0.5)
			
 
				+        img_show = Image.new("RGB", (w * 2, h), (255, 255, 255))
			
 
				+        img_show.paste(img_left, (0, 0, w, h))
			
 
				+        img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
			
 
				+        return img_show
			
 
				+
			
 
				+
			
 
				+def draw_box_txt_fine(img_size, box, txt, font_path):
			
 
				+    """draw box text"""
			
 
				+    box_height = int(
			
 
				+        math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
			
 
				+    )
			
 
				+    box_width = int(
			
 
				+        math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
			
 
				+    )
			
 
				+
			
 
				+    if box_height > 2 * box_width and box_height > 30:
			
 
				+        img_text = Image.new("RGB", (box_height, box_width), (255, 255, 255))
			
 
				+        draw_text = ImageDraw.Draw(img_text)
			
 
				+        if txt:
			
 
				+            font = create_font(txt, (box_height, box_width), font_path)
			
 
				+            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
			
 
				+        img_text = img_text.transpose(Image.ROTATE_270)
			
 
				+    else:
			
 
				+        img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
			
 
				+        draw_text = ImageDraw.Draw(img_text)
			
 
				+        if txt:
			
 
				+            font = create_font(txt, (box_width, box_height), font_path)
			
 
				+            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
			
 
				+
			
 
				+    pts1 = np.float32(
			
 
				+        [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
			
 
				+    )
			
 
				+    pts2 = np.array(box, dtype=np.float32)
			
 
				+    M = cv2.getPerspectiveTransform(pts1, pts2)
			
 
				+
			
 
				+    img_text = np.array(img_text, dtype=np.uint8)
			
 
				+    img_right_text = cv2.warpPerspective(
			
 
				+        img_text,
			
 
				+        M,
			
 
				+        img_size,
			
 
				+        flags=cv2.INTER_NEAREST,
			
 
				+        borderMode=cv2.BORDER_CONSTANT,
			
 
				+        borderValue=(255, 255, 255),
			
 
				+    )
			
 
				+    return img_right_text
			
 
				+
			
 
				+
			
 
				+def create_font(txt, sz, font_path):
			
 
				+    """create font"""
			
 
				+    font_size = int(sz[1] * 0.8)
			
 
				+    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
			
 
				+    if int(PIL.__version__.split(".")[0]) < 10:
			
 
				+        length = font.getsize(txt)[0]
			
 
				+    else:
			
 
				+        length = font.getlength(txt)
			
 
				+
			
 
				+    if length > sz[0]:
			
 
				+        font_size = int(font_size * sz[0] / length)
			
 
				+        font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
			
 
				+    return font
			
--- a/paddlex/inference/pipelines_new/pp_chatocrv3_doc/__init__.py
+++ b/paddlex/inference/pipelines_new/pp_chatocrv3_doc/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .pipeline import PP_ChatOCRv3_doc_Pipeline
			
--- a/paddlex/inference/pipelines_new/pp_chatocrv3_doc/pipeline.py
+++ b/paddlex/inference/pipelines_new/pp_chatocrv3_doc/pipeline.py
@@ -0,0 +1,329 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from ..base import BasePipeline
			
 
				+
			
 
				+from typing import Any, Dict, Optional
			
 
				+
			
 
				+# import numpy as np
			
 
				+# import cv2
			
 
				+from .result import VisualInfoResult
			
 
				+import re
			
 
				+
			
 
				+########## [TODO]后续需要更新路径
			
 
				+from ...components.transforms import ReadImage
			
 
				+
			
 
				+import json
			
 
				+
			
 
				+class PP_ChatOCRv3_doc_Pipeline(BasePipeline):
			
 
				+    """PP-ChatOCRv3-doc Pipeline"""
			
 
				+
			
 
				+    entities = "PP-ChatOCRv3-doc"
			
 
				+    def __init__(self,
			
 
				+        config,
			
 
				+        device=None,
			
 
				+        pp_option=None, 
			
 
				+        use_hpip: bool = False,
			
 
				+        hpi_params: Optional[Dict[str, Any]] = None):
			
 
				+        super().__init__(device=device, pp_option=pp_option, 
			
 
				+            use_hpip=use_hpip, hpi_params=hpi_params)
			
 
				+        
			
 
				+        self.inintial_predictor(config)
			
 
				+
			
 
				+        self.img_reader = ReadImage(format="BGR")
			
 
				+        
			
 
				+    def inintial_predictor(self, config):
			
 
				+        # layout_parsing_config = config['SubPipelines']["LayoutParser"]
			
 
				+        # self.layout_parsing_pipeline = self.create_pipeline(layout_parsing_config)
			
 
				+
			
 
				+        chat_bot_config = config['SubModules']['LLM_Chat']
			
 
				+        self.chat_bot = self.create_chat_bot(chat_bot_config)
			
 
				+
			
 
				+        retriever_config = config['SubModules']['LLM_Retriever']
			
 
				+        self.retriever = self.create_retriever(retriever_config)
			
 
				+
			
 
				+        text_pe_config = config['SubModules']['PromptEngneering']['KIE_CommonText']
			
 
				+        self.text_pe = self.create_prompt_engeering(text_pe_config)
			
 
				+        
			
 
				+        table_pe_config = config['SubModules']['PromptEngneering']['KIE_Table']
			
 
				+        self.table_pe = self.create_prompt_engeering(table_pe_config)
			
 
				+
			
 
				+        return 
			
 
				+
			
 
				+    def decode_visual_result(self, layout_parsing_result):
			
 
				+        text_paragraphs_ocr_res = layout_parsing_result['text_paragraphs_ocr_res']
			
 
				+        seal_res_list = layout_parsing_result['seal_res_list']
			
 
				+        normal_text_dict = {}
			
 
				+        layout_type = "text"
			
 
				+        for text in text_paragraphs_ocr_res['rec_text']:
			
 
				+            if layout_type not in normal_text_dict:
			
 
				+                normal_text_dict[layout_type] = text
			
 
				+            else:
			
 
				+                normal_text_dict[layout_type] += f"\n {text}"
			
 
				+        
			
 
				+        layout_type = "seal"
			
 
				+        for seal_res in seal_res_list:
			
 
				+            for text in seal_res['rec_text']:
			
 
				+                if layout_type not in normal_text_dict:
			
 
				+                    normal_text_dict[layout_type] = text
			
 
				+                else:
			
 
				+                    normal_text_dict[layout_type] += f"\n {text}"
			
 
				+
			
 
				+        table_res_list = layout_parsing_result['table_res_list']
			
 
				+        table_text_list = []
			
 
				+        table_html_list = []
			
 
				+        for table_res in table_res_list:
			
 
				+            table_html_list.append(table_res['pred_html'])
			
 
				+            single_table_text = " ".join(table_res["table_ocr_pred"]['rec_text'])
			
 
				+            table_text_list.append(single_table_text)
			
 
				+
			
 
				+        visual_info = {}
			
 
				+        visual_info['normal_text_dict'] = normal_text_dict
			
 
				+        visual_info['table_text_list'] = table_text_list
			
 
				+        visual_info['table_html_list'] = table_html_list
			
 
				+        return VisualInfoResult(visual_info)
			
 
				+
			
 
				+    def visual_predict(self, input,
			
 
				+        use_doc_orientation_classify=True,
			
 
				+        use_doc_unwarping=True,
			
 
				+        use_common_ocr=True,
			
 
				+        use_seal_recognition=True,
			
 
				+        use_table_recognition=True,
			
 
				+        **kwargs):
			
 
				+
			
 
				+        if not isinstance(input, list):
			
 
				+            input_list = [input]
			
 
				+        else:
			
 
				+            input_list = input
			
 
				+
			
 
				+        img_id = 1
			
 
				+        for input in input_list:
			
 
				+            if isinstance(input, str):
			
 
				+                image_array = next(self.img_reader(input))[0]['img']
			
 
				+            else:
			
 
				+                image_array = input
			
 
				+
			
 
				+            assert len(image_array.shape) == 3
			
 
				+
			
 
				+            layout_parsing_result = next(self.layout_parsing_pipeline.predict(
			
 
				+                image_array,
			
 
				+                use_doc_orientation_classify=use_doc_orientation_classify,
			
 
				+                use_doc_unwarping=use_doc_unwarping,
			
 
				+                use_common_ocr=use_common_ocr,
			
 
				+                use_seal_recognition=use_seal_recognition,
			
 
				+                use_table_recognition=use_table_recognition))
			
 
				+            
			
 
				+            visual_info = self.decode_visual_result(layout_parsing_result)
			
 
				+
			
 
				+            visual_predict_res = {"layout_parsing_result":layout_parsing_result,
			
 
				+                "visual_info":visual_info}
			
 
				+            yield visual_predict_res
			
 
				+
			
 
				+    def save_visual_info_list(self, visual_info, save_path):
			
 
				+        if not isinstance(visual_info, list):
			
 
				+            visual_info_list = [visual_info]
			
 
				+        else:
			
 
				+            visual_info_list = visual_info
			
 
				+
			
 
				+        with open(save_path, "w") as fout:
			
 
				+            fout.write(json.dumps(visual_info_list, ensure_ascii=False) + "\n")
			
 
				+        return
			
 
				+    
			
 
				+    def load_visual_info_list(self, data_path):
			
 
				+        with open(data_path, "r") as fin:
			
 
				+            data = fin.readline()
			
 
				+            visual_info_list = json.loads(data)
			
 
				+        return visual_info_list
			
 
				+
			
 
				+    def merge_visual_info_list(self, visual_info_list):
			
 
				+        all_normal_text_list = []
			
 
				+        all_table_text_list = []
			
 
				+        all_table_html_list = []
			
 
				+        for single_visual_info in visual_info_list:
			
 
				+            normal_text_dict = single_visual_info['normal_text_dict']
			
 
				+            table_text_list = single_visual_info['table_text_list']
			
 
				+            table_html_list = single_visual_info['table_html_list']
			
 
				+            all_normal_text_list.append(normal_text_dict)
			
 
				+            all_table_text_list.extend(table_text_list)
			
 
				+            all_table_html_list.extend(table_html_list)
			
 
				+        return all_normal_text_list, all_table_text_list, all_table_html_list
			
 
				+
			
 
				+    def build_vector(self, visual_info, 
			
 
				+        min_characters=3500,
			
 
				+        llm_request_interval=1.0):
			
 
				+
			
 
				+        if not isinstance(visual_info, list):
			
 
				+            visual_info_list = [visual_info]
			
 
				+        else:
			
 
				+            visual_info_list = visual_info
			
 
				+        
			
 
				+        all_visual_info = self.merge_visual_info_list(visual_info_list)
			
 
				+        all_normal_text_list, all_table_text_list, all_table_html_list = all_visual_info
			
 
				+
			
 
				+        all_normal_text_str = "".join(["\n".join(e.values()) for e in all_normal_text_list])
			
 
				+        vector_info = {}
			
 
				+
			
 
				+        all_items = []
			
 
				+        for i, normal_text_dict in enumerate(all_normal_text_list):
			
 
				+            for type, text in normal_text_dict.items():
			
 
				+                all_items += [f"{type}：{text}"]
			
 
				+
			
 
				+        if len(all_normal_text_str) > min_characters:
			
 
				+            vector_info['flag_too_short_text'] = False
			
 
				+            vector_info['vector'] = self.retriever.generate_vector_database(
			
 
				+                all_items)
			
 
				+        else:
			
 
				+            vector_info['flag_too_short_text'] = True  
			
 
				+            vector_info['vector'] = all_items
			
 
				+        return vector_info
			
 
				+
			
 
				+    def format_key(self, key_list):
			
 
				+        """format key"""
			
 
				+        if key_list == "":
			
 
				+            return []
			
 
				+
			
 
				+        if isinstance(key_list, list):
			
 
				+            return key_list
			
 
				+
			
 
				+        if isinstance(key_list, str):
			
 
				+            key_list = re.sub(r"[\t\n\r\f\v]", "", key_list)
			
 
				+            key_list = key_list.replace("，", ",").split(",")
			
 
				+            return key_list
			
 
				+
			
 
				+        return []
			
 
				+
			
 
				+    def fix_llm_result_format(self, llm_result):
			
 
				+        if not llm_result:
			
 
				+            return {}
			
 
				+
			
 
				+        if "json" in llm_result or "```" in llm_result:
			
 
				+            llm_result = (
			
 
				+                llm_result.replace("```", "").replace("json", "").replace("/n", "")
			
 
				+            )
			
 
				+            llm_result = llm_result.replace("[", "").replace("]", "")
			
 
				+
			
 
				+        try:
			
 
				+            llm_result = json.loads(llm_result)
			
 
				+            llm_result_final = {}
			
 
				+            for key in llm_result:
			
 
				+                value = llm_result[key]
			
 
				+                if isinstance(value, list):
			
 
				+                    if len(value) > 0:
			
 
				+                        llm_result_final[key] = value[0]
			
 
				+                else:
			
 
				+                    llm_result_final[key] = value
			
 
				+            return llm_result_final
			
 
				+
			
 
				+        except:
			
 
				+            results = (
			
 
				+                llm_result.replace("\n", "")
			
 
				+                .replace("    ", "")
			
 
				+                .replace("{", "")
			
 
				+                .replace("}", "")
			
 
				+            )
			
 
				+            if not results.endswith('"'):
			
 
				+                results = results + '"'
			
 
				+            pattern = r'"(.*?)": "([^"]*)"'
			
 
				+            matches = re.findall(pattern, str(results))
			
 
				+            if len(matches) > 0:
			
 
				+                llm_result = {k: v for k, v in matches}
			
 
				+                return llm_result 
			
 
				+            else:
			
 
				+                return {}     
			
 
				+
			
 
				+    def generate_and_merge_chat_results(self, prompt, key_list,
			
 
				+        final_results, failed_results):
			
 
				+
			
 
				+        llm_result = self.chat_bot.generate_chat_results(prompt)
			
 
				+        llm_result = self.fix_llm_result_format(llm_result)
			
 
				+
			
 
				+        for key, value in llm_result.items():
			
 
				+            if value not in failed_results and key in key_list:
			
 
				+                key_list.remove(key)
			
 
				+                final_results[key] = value
			
 
				+        return 
			
 
				+        
			
 
				+
			
 
				+    def chat(self, visual_info, 
			
 
				+        key_list, 
			
 
				+        vector_info,
			
 
				+        text_task_description=None,
			
 
				+        text_output_format=None,
			
 
				+        text_rules_str=None,
			
 
				+        text_few_shot_demo_text_content=None,
			
 
				+        text_few_shot_demo_key_value_list=None,        
			
 
				+        table_task_description=None,
			
 
				+        table_output_format=None,
			
 
				+        table_rules_str=None,
			
 
				+        table_few_shot_demo_text_content=None,
			
 
				+        table_few_shot_demo_key_value_list=None):
			
 
				+
			
 
				+        key_list = self.format_key(key_list)
			
 
				+        if len(key_list) == 0:
			
 
				+            return {"chat_res": "输入的key_list无效！"}
			
 
				+
			
 
				+        if not isinstance(visual_info, list):
			
 
				+            visual_info_list = [visual_info]
			
 
				+        else:
			
 
				+            visual_info_list = visual_info
			
 
				+        
			
 
				+        all_visual_info = self.merge_visual_info_list(visual_info_list)
			
 
				+        all_normal_text_list, all_table_text_list, all_table_html_list = all_visual_info
			
 
				+
			
 
				+        final_results = {}
			
 
				+        failed_results = ["大模型调用失败", "未知", "未找到关键信息", "None", ""]
			
 
				+
			
 
				+        for all_table_info in [all_table_html_list, all_table_text_list]:
			
 
				+            for table_info in all_table_info:
			
 
				+                if len(key_list) == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                prompt = self.table_pe.generate_prompt(table_info, 
			
 
				+                    key_list, 
			
 
				+                    task_description=table_task_description,
			
 
				+                    output_format=table_output_format, 
			
 
				+                    rules_str=table_rules_str, 
			
 
				+                    few_shot_demo_text_content=table_few_shot_demo_text_content, 
			
 
				+                    few_shot_demo_key_value_list=table_few_shot_demo_key_value_list)
			
 
				+
			
 
				+                self.generate_and_merge_chat_results(prompt, 
			
 
				+                    key_list, final_results, failed_results)
			
 
				+        
			
 
				+        if len(key_list) > 0:
			
 
				+            question_key_list = [f"抽取关键信息:{key}" for key in key_list]
			
 
				+            vector = vector_info['vector']
			
 
				+            if not vector_info['flag_too_short_text']:
			
 
				+                related_text = self.retriever.similarity_retrieval(
			
 
				+                    question_key_list, vector)
			
 
				+            else:
			
 
				+                related_text = " ".join(vector)
			
 
				+            
			
 
				+            prompt = self.text_pe.generate_prompt(related_text, 
			
 
				+                key_list, 
			
 
				+                task_description=text_task_description,
			
 
				+                output_format=text_output_format, 
			
 
				+                rules_str=text_rules_str, 
			
 
				+                few_shot_demo_text_content=text_few_shot_demo_text_content, 
			
 
				+                few_shot_demo_key_value_list=text_few_shot_demo_key_value_list)
			
 
				+            
			
 
				+            self.generate_and_merge_chat_results(prompt, 
			
 
				+                key_list, final_results, failed_results)
			
 
				+
			
 
				+        return final_results
			
 
				+
			
 
				+    def predict(self, *args, **kwargs):
			
 
				+        logging.error(
			
 
				+            "PP-ChatOCRv3-doc Pipeline do not support to call `predict()` directly! Please invoke `visual_predict`, `build_vector`, `chat` sequentially to obtain the result."
			
 
				+        )
			
 
				+        return
			
--- a/paddlex/inference/pipelines_new/pp_chatocrv3_doc/result.py
+++ b/paddlex/inference/pipelines_new/pp_chatocrv3_doc/result.py
@@ -0,0 +1,44 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+import math
			
 
				+import random
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+import PIL
			
 
				+from PIL import Image, ImageDraw, ImageFont
			
 
				+
			
 
				+from ....utils.fonts import PINGFANG_FONT_FILE_PATH
			
 
				+from ..components import BaseResult
			
 
				+
			
 
				+class VisualInfoResult(BaseResult):
			
 
				+    """VisualInfoResult"""
			
 
				+    
			
 
				+    pass
			
 
				+
			
 
				+# class VectorResult(BaseResult, Base64Mixin):
			
 
				+#     """VisualInfoResult"""
			
 
				+
			
 
				+#     def _to_base64(self):
			
 
				+#         return self["vector"]
			
 
				+
			
 
				+
			
 
				+# class RetrievalResult(BaseResult):
			
 
				+#     """VisualInfoResult"""
			
 
				+
			
 
				+#     pass
			
 
				+
			
 
				+
			
 
				+# class ChatResult(BaseResult):
			
 
				+#     """VisualInfoResult"""
			
--- a/paddlex/utils/flags.py
+++ b/paddlex/utils/flags.py
@@ -26,6 +26,7 @@ __all__ = [
 
				     "INFER_BENCHMARK_OUTPUT",
			
 
				     "INFER_BENCHMARK_DATA_SIZE",
			
 
				     "FLAGS_json_format_model",
			
 
				+    "USE_NEW_INFERENCE",
			
 
				 ]
			
 
				 
			
 
				 
			
@@ -46,6 +47,7 @@ DRY_RUN = get_flag_from_env_var("PADDLE_PDX_DRY_RUN", False)
 
				 CHECK_OPTS = get_flag_from_env_var("PADDLE_PDX_CHECK_OPTS", False)
			
 
				 EAGER_INITIALIZATION = get_flag_from_env_var("PADDLE_PDX_EAGER_INIT", True)
			
 
				 FLAGS_json_format_model = get_flag_from_env_var("FLAGS_json_format_model", None)
			
 
				+USE_NEW_INFERENCE = get_flag_from_env_var("USE_NEW_INFERENCE", False)
			
 
				 
			
 
				 # Inference Benchmark
			
 
				 INFER_BENCHMARK = get_flag_from_env_var("PADDLE_PDX_INFER_BENCHMARK", None)
			
--- a/paddlex/utils/fonts/__init__.py
+++ b/paddlex/utils/fonts/__init__.py
@@ -15,10 +15,25 @@
 
				 
			
 
				 from pathlib import Path
			
 
				 
			
 
				+import PIL
			
 
				+from PIL import ImageFont
			
 
				 
			
 
				 def get_pingfang_file_path():
			
 
				     """get pingfang font file path"""
			
 
				     return (Path(__file__).parent / "PingFang-SC-Regular.ttf").resolve().as_posix()
			
 
				 
			
 
				+def create_font(txt, sz, font_path):
			
 
				+    """create font"""
			
 
				+    font_size = int(sz[1] * 0.8)
			
 
				+    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
			
 
				+    if int(PIL.__version__.split(".")[0]) < 10:
			
 
				+        length = font.getsize(txt)[0]
			
 
				+    else:
			
 
				+        length = font.getlength(txt)
			
 
				+
			
 
				+    if length > sz[0]:
			
 
				+        font_size = int(font_size * sz[0] / length)
			
 
				+        font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
			
 
				+    return font
			
 
				 
			
 
				 PINGFANG_FONT_FILE_PATH = get_pingfang_file_path()