Sfoglia il codice sorgente

refactor: increase YOLO layout base batch size and improve progress tracking in predictions

myhloli 4 mesi fa
parent
commit
4243b0eaed

+ 1 - 1
mineru/backend/pipeline/batch_analyze.py

@@ -9,7 +9,7 @@ from ...utils.config_reader import get_formula_enable, get_table_enable
 from ...utils.model_utils import crop_img, get_res_list_from_layout_res
 from ...utils.ocr_utils import get_adjusted_mfdetrec_res, get_ocr_result_list, OcrConfidence
 
-YOLO_LAYOUT_BASE_BATCH_SIZE = 1
+YOLO_LAYOUT_BASE_BATCH_SIZE = 8
 MFD_BASE_BATCH_SIZE = 1
 MFR_BASE_BATCH_SIZE = 16
 

+ 13 - 11
mineru/model/layout/doclayout_yolo.py

@@ -57,15 +57,17 @@ class DocLayoutYOLOModel:
         batch_size: int = 4
     ) -> List[List[Dict]]:
         results = []
-        for idx in tqdm(range(0, len(images), batch_size), desc="Layout Predict"):
-            batch = images[idx: idx + batch_size]
-            predictions = self.model.predict(
-                batch,
-                imgsz=self.imgsz,
-                conf=self.conf,
-                iou=self.iou,
-                verbose=False,
-            )
-            for pred in predictions:
-                results.append(self._parse_prediction(pred))
+        with tqdm(total=len(images), desc="Layout Predict") as pbar:
+            for idx in range(0, len(images), batch_size):
+                batch = images[idx: idx + batch_size]
+                predictions = self.model.predict(
+                    batch,
+                    imgsz=self.imgsz,
+                    conf=self.conf,
+                    iou=self.iou,
+                    verbose=False,
+                )
+                for pred in predictions:
+                    results.append(self._parse_prediction(pred))
+                pbar.update(len(batch))
         return results

+ 6 - 4
mineru/model/mfd/yolo_v8.py

@@ -44,8 +44,10 @@ class YOLOv8MFDModel:
         batch_size: int = 4
     ) -> List:
         results = []
-        for idx in tqdm(range(0, len(images), batch_size), desc="MFD Predict"):
-            batch = images[idx: idx + batch_size]
-            batch_preds = self._run_predict(batch, is_batch=True)
-            results.extend(batch_preds)
+        with tqdm(total=len(images), desc="MFD Predict") as pbar:
+            for idx in range(0, len(images), batch_size):
+                batch = images[idx: idx + batch_size]
+                batch_preds = self._run_predict(batch, is_batch=True)
+                results.extend(batch_preds)
+                pbar.update(len(batch))
         return results

+ 1 - 1
mineru/utils/pdf_reader.py

@@ -15,7 +15,7 @@ def page_to_image(
     scale = dpi / 72
 
     long_side_length = max(*page.get_size())
-    if long_side_length > max_width_or_height:
+    if (long_side_length*scale) > max_width_or_height:
         scale = max_width_or_height / long_side_length
 
     bitmap: PdfBitmap = page.render(scale=scale)  # type: ignore