Selaa lähdekoodia

refactor(magic_pdf): remove unused threading lock and model initialization code

- Remove threading.Lock import and usage
- Delete unused model initialization comments and code- Simplify OCR model initialization in both pdf_extract_kit.py and pdf_parse_union_core_v2.py
myhloli 11 kuukautta sitten
vanhempi
commit
a1744b770f
2 muutettua tiedostoa jossa 3 lisäystä ja 18 poistoa
  1. 1 10
      magic_pdf/model/pdf_extract_kit.py
  2. 2 8
      magic_pdf/pdf_parse_union_core_v2.py

+ 1 - 10
magic_pdf/model/pdf_extract_kit.py

@@ -28,8 +28,6 @@ from magic_pdf.model.sub_modules.model_utils import (
 from magic_pdf.model.sub_modules.ocr.paddleocr.ocr_utils import (
     get_adjusted_mfdetrec_res, get_ocr_result_list)
 
-from threading import Lock
-
 
 class CustomPEKModel:
 
@@ -37,7 +35,6 @@ class CustomPEKModel:
         """
         ======== model init ========
         """
-        self._lock = Lock()
         # 获取当前文件(即 pdf_extract_kit.py)的绝对路径
         current_file_path = os.path.abspath(__file__)
         # 获取当前文件所在的目录(model)
@@ -153,12 +150,6 @@ class CustomPEKModel:
                 device=self.device,
             )
         # 初始化ocr
-        # self.ocr_model = atom_model_manager.get_atom_model(
-        #     atom_model_name=AtomicModel.OCR,
-        #     ocr_show_log=show_log,
-        #     det_db_box_thresh=0.3,
-        #     lang=self.lang
-        # )
         self.ocr_model = ocr_model_init(
             show_log=show_log,
             det_db_box_thresh=0.3,
@@ -223,7 +214,7 @@ class CustomPEKModel:
 
             # OCR recognition
             new_image = cv2.cvtColor(np.asarray(new_image), cv2.COLOR_RGB2BGR)
-            # with self._lock:
+
             if self.apply_ocr:
                 ocr_res = self.ocr_model.ocr(new_image, mfd_res=adjusted_mfdetrec_res)[0]
             else:

+ 2 - 8
magic_pdf/pdf_parse_union_core_v2.py

@@ -31,7 +31,7 @@ try:
 except ImportError:
     pass
 
-from magic_pdf.model.sub_modules.model_init import AtomModelSingleton, ocr_model_init
+from magic_pdf.model.sub_modules.model_init import ocr_model_init
 from magic_pdf.para.para_split_v3 import para_split
 from magic_pdf.pre_proc.construct_page_dict import ocr_construct_page_component_v2
 from magic_pdf.pre_proc.cut_image import ocr_cut_image_and_table
@@ -231,13 +231,7 @@ def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang
     if len(empty_spans) > 0:
 
         # 初始化ocr模型
-        # atom_model_manager = AtomModelSingleton()
-        # ocr_model = atom_model_manager.get_atom_model(
-        #     atom_model_name="ocr",
-        #     ocr_show_log=False,
-        #     det_db_box_thresh=0.3,
-        #     lang=lang
-        # )
+
         ocr_model = ocr_model_init(
             show_log=False,
             det_db_box_thresh=0.3,