Sfoglia il codice sorgente

Merge pull request #1459 from myhloli/dev

Dev
Xiaomeng Zhao 10 mesi fa
parent
commit
2c1f06b3ab

+ 4 - 8
magic_pdf/model/sub_modules/language_detection/utils.py

@@ -24,11 +24,11 @@ def get_model_config():
     config_path = os.path.join(model_config_dir, 'model_configs.yaml')
     with open(config_path, 'r', encoding='utf-8') as f:
         configs = yaml.load(f, Loader=yaml.FullLoader)
-    return local_models_dir, device, configs
+    return root_dir, local_models_dir, device, configs
 
 
 def get_text_images(simple_images):
-    local_models_dir, device, configs = get_model_config()
+    _, local_models_dir, device, configs = get_model_config()
     atom_model_manager = AtomModelSingleton()
     temp_layout_model = atom_model_manager.get_atom_model(
         atom_model_name=AtomicModel.Layout,
@@ -69,15 +69,11 @@ def model_init(model_name: str):
     atom_model_manager = AtomModelSingleton()
 
     if model_name == MODEL_NAME.YOLO_V11_LangDetect:
-        local_models_dir, device, configs = get_model_config()
+        root_dir, _, device, _ = get_model_config()
         model = atom_model_manager.get_atom_model(
             atom_model_name=AtomicModel.LangDetect,
             langdetect_model_name=MODEL_NAME.YOLO_V11_LangDetect,
-            langdetect_model_weight=str(
-                os.path.join(
-                    local_models_dir, configs['weights'][MODEL_NAME.YOLO_V11_LangDetect]
-                )
-            ),
+            langdetect_model_weight=str(os.path.join(root_dir, 'resources', 'yolov11-langdetect', 'yolo_v11_ft.pt')),
             device=device,
         )
     else:

+ 5 - 0
magic_pdf/pdf_parse_union_core_v2.py

@@ -768,6 +768,11 @@ def parse_page_core(
     """重排block"""
     sorted_blocks = sorted(fix_blocks, key=lambda b: b['index'])
 
+    """block内重排(img和table的block内多个caption或footnote的排序)"""
+    for block in sorted_blocks:
+        if block['type'] in [BlockType.Image, BlockType.Table]:
+            block['blocks'] = sorted(block['blocks'], key=lambda b: b['index'])
+
     """获取QA需要外置的list"""
     images, tables, interline_equations = get_qa_need_list_v2(sorted_blocks)
 

+ 1 - 2
magic_pdf/resources/model_config/model_configs.yaml

@@ -5,5 +5,4 @@ weights:
   unimernet_small: MFR/unimernet_small
   struct_eqtable: TabRec/StructEqTable
   tablemaster: TabRec/TableMaster
-  rapid_table: TabRec/RapidTable
-  yolo_v11n_langdetect: LangDetect/YOLO/yolo_v11_ft.pt
+  rapid_table: TabRec/RapidTable

BIN
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt


BIN
projects/gradio_app/examples/complex_layout.pdf


+ 1 - 1
setup.py

@@ -51,7 +51,7 @@ if __name__ == '__main__':
                      "doclayout_yolo==0.0.2",  # doclayout_yolo
                      "rapidocr-paddle",  # rapidocr-paddle
                      "rapidocr_onnxruntime",
-                     "rapid_table",  # rapid_table
+                     "rapid_table==0.3.0",  # rapid_table
                      "PyYAML",  # yaml
                      "openai",  # openai SDK
                      "detectron2"