Pārlūkot izejas kodu

feat: remove pipe_auto_mode

xu rui 11 mēneši atpakaļ
vecāks
revīzija
302a695078
3 mainītis faili ar 10 papildinājumiem un 62 dzēšanām
  1. 0 25
      magic_pdf/model/__init__.py
  2. 0 34
      magic_pdf/model/operators.py
  3. 10 3
      magic_pdf/tools/common.py

+ 0 - 25
magic_pdf/model/__init__.py

@@ -66,31 +66,6 @@ class InferenceResultBase(ABC):
         pass
 
     @abstractmethod
-    def pipe_auto_mode(
-        self,
-        imageWriter: DataWriter,
-        start_page_id=0,
-        end_page_id=None,
-        debug_mode=False,
-        lang=None,
-    ) -> PipeResult:
-        """Post-proc the model inference result.
-            step1: classify the dataset type
-            step2: based the result of step1, using `pipe_txt_mode` or `pipe_ocr_mode`
-
-        Args:
-            imageWriter (DataWriter): the image writer handle
-            start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
-            end_page_id (int, optional):  Defaults to the last page index of dataset. Let user select some pages He/She want to process
-            debug_mode (bool, optional): Defaults to False. will dump more log if enabled
-            lang (str, optional): Defaults to None.
-
-        Returns:
-            PipeResult: the result
-        """
-        pass
-
-    @abstractmethod
     def pipe_txt_mode(
         self,
         imageWriter: DataWriter,

+ 0 - 34
magic_pdf/model/operators.py

@@ -71,40 +71,6 @@ class InferenceResult(InferenceResultBase):
         """
         return proc(copy.deepcopy(self._infer_res), *args, **kwargs)
 
-    def pipe_auto_mode(
-        self,
-        imageWriter: DataWriter,
-        start_page_id=0,
-        end_page_id=None,
-        debug_mode=False,
-        lang=None,
-    ) -> PipeResult:
-        """Post-proc the model inference result.
-            step1: classify the dataset type
-            step2: based the result of step1, using `pipe_txt_mode` or `pipe_ocr_mode`
-
-        Args:
-            imageWriter (DataWriter): the image writer handle
-            start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
-            end_page_id (int, optional):  Defaults to the last page index of dataset. Let user select some pages He/She want to process
-            debug_mode (bool, optional): Defaults to False. will dump more log if enabled
-            lang (str, optional): Defaults to None.
-
-        Returns:
-            PipeResult: the result
-        """
-
-        pdf_proc_method = classify(self._dataset.data_bits())
-
-        if pdf_proc_method == SupportedPdfParseMethod.TXT:
-            return self.pipe_txt_mode(
-                imageWriter, start_page_id, end_page_id, debug_mode, lang
-            )
-        else:
-            return self.pipe_ocr_mode(
-                imageWriter, start_page_id, end_page_id, debug_mode, lang
-            )
-
     def pipe_txt_mode(
         self,
         imageWriter: DataWriter,

+ 10 - 3
magic_pdf/tools/common.py

@@ -170,6 +170,7 @@ def do_parse(
             logger.error('need model list input')
             exit(2)
     else:
+        
         infer_result = InferenceResult(model_list, ds)
         if parse_method == 'ocr':
             pipe_result = infer_result.pipe_ocr_mode(
@@ -180,9 +181,15 @@ def do_parse(
                 image_writer, debug_mode=True, lang=lang
             )
         else:
-            pipe_result = infer_result.pipe_auto_mode(
-                image_writer, debug_mode=True, lang=lang
-            )
+            if ds.classify() == SupportedPdfParseMethod.TXT:
+                pipe_result = infer_result.pipe_txt_mode(
+                        image_writer, debug_mode=True, lang=lang
+                    )
+            else:
+                pipe_result = infer_result.pipe_txt_mode(
+                        image_writer, debug_mode=True, lang=lang
+                    )
+            
 
     if f_draw_model_bbox:
         infer_result.draw_model(