浏览代码

fix: adjust OCR confidence threshold and refine category assignment logic

myhloli 4 月之前
父节点
当前提交
962a3453ca
共有 2 个文件被更改,包括 8 次插入1 次删除
  1. 7 0
      mineru/backend/pipeline/batch_analyze.py
  2. 1 1
      mineru/utils/ocr_utils.py

+ 7 - 0
mineru/backend/pipeline/batch_analyze.py

@@ -318,6 +318,13 @@ class BatchAnalyze:
                         layout_res_item['score'] = float(f"{ocr_score:.3f}")
                         if ocr_score < OcrConfidence.min_confidence:
                             layout_res_item['category_id'] = 16
+                        else:
+                            layout_res_bbox = [layout_res_item['poly'][0], layout_res_item['poly'][1],
+                                               layout_res_item['poly'][4], layout_res_item['poly'][5]]
+                            layout_res_width = layout_res_bbox[2] - layout_res_bbox[0]
+                            layout_res_height = layout_res_bbox[3] - layout_res_bbox[1]
+                            if ocr_text in ['(204号', '(20', '(2', '(2号'] and ocr_score < 0.8 and layout_res_width < layout_res_height:
+                                layout_res_item['category_id'] = 16
 
                     total_processed += len(img_crop_list)
 

+ 1 - 1
mineru/utils/ocr_utils.py

@@ -5,7 +5,7 @@ import numpy as np
 
 
 class OcrConfidence:
-    min_confidence = 0.68
+    min_confidence = 0.6
     min_width = 3