myhloli 1 ano atrás
pai
commit
14f45075d8

+ 2 - 3
magic_pdf/model/doc_analyze_by_custom_model.py

@@ -44,9 +44,8 @@ def load_images_from_pdf(pdf_bytes: bytes, dpi=200) -> list:
     return images
 
 
-def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False, model=MODEL.Paddle,
-                model_type=MODEL_TYPE.SINGLE_PAGE):
-    custom_model = None
+def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False, model=MODEL.PEK,
+                model_type=MODEL_TYPE.MULTI_PAGE):
     if model_config.__use_inside_model__:
         if model == MODEL.Paddle:
             from magic_pdf.model.pp_structure_v2 import CustomPaddleModel

+ 1 - 1
magic_pdf/model/pdf_extract_kit.py

@@ -152,8 +152,8 @@ class CustomPEKModel:
         b = time.time()
         logger.info(f"formula nums: {len(mf_image_list)}, mfr time: {round(b - a, 2)}")
 
+        # ocr识别
         if self.apply_ocr:
-            # ocr识别
             for idx, img_dict in enumerate(images):
                 image = img_dict["img"]
                 pil_img = Image.fromarray(image)