1 kuukausi sitten · a2311846f1
--- a/ocr_tools/universal_doc_parser/tests/test_second_pass_ocr_aggregate.py
+++ b/ocr_tools/universal_doc_parser/tests/test_second_pass_ocr_aggregate.py
@@ -72,7 +72,7 @@ class TestShouldRunWholeFallback:
 
				             config={
			
 
				                 "second_pass_ocr": {
			
 
				                     "whole_cell_fallback": True,
			
 
				-                    "enhance_retry": {"min_chars": 4},
			
 
				+                    "suspicious_short_min_chars": 4,
			
 
				                 }
			
 
				             },
			
 
				         )
			
@@ -94,6 +94,61 @@ class TestShouldRunWholeFallback:
 
				         assert f._should_run_whole_fallback("", 0.0, cell, [], 0.9)
			
 
				 
			
 
				 
			
 
				+class TestCellPreprocessConfig:
			
 
				+    def test_suspicious_short_from_top_level(self):
			
 
				+        f = TextFiller(
			
 
				+            ocr_engine=None,
			
 
				+            config={"second_pass_ocr": {"suspicious_short_min_chars": 6}},
			
 
				+        )
			
 
				+        assert f.second_pass_suspicious_short_min_chars == 6
			
 
				+
			
 
				+    def test_light_contrast_stage_when_enabled(self):
			
 
				+        import numpy as np
			
 
				+
			
 
				+        f = TextFiller(
			
 
				+            ocr_engine=None,
			
 
				+            config={
			
 
				+                "second_pass_ocr": {
			
 
				+                    "cell_preprocess": {
			
 
				+                        "watermark": {"enabled": True, "method": "threshold"},
			
 
				+                        "contrast": {
			
 
				+                            "enabled": True,
			
 
				+                            "method": "text_restore",
			
 
				+                            "text_black_target": 88,
			
 
				+                        },
			
 
				+                    }
			
 
				+                }
			
 
				+            },
			
 
				+        )
			
 
				+        cell = np.ones((40, 80, 3), dtype=np.uint8) * 200
			
 
				+        _, stages = f._preprocess_cell_for_ocr(cell, mode="light")
			
 
				+        assert "wm" in stages
			
 
				+        assert "contrast" in stages
			
 
				+
			
 
				+
			
 
				+class TestWholeCellParse:
			
 
				+    def test_parse_det_rec_item_uses_rec_not_box(self):
			
 
				+        item = [
			
 
				+            [[146.0, 15.0], [199.0, 15.0], [199.0, 85.0], [146.0, 85.0]],
			
 
				+            ("/", 0.9213118553161621),
			
 
				+        ]
			
 
				+        t, s = TextFiller._parse_det_rec_item(item)
			
 
				+        assert t == "/"
			
 
				+        assert abs(s - 0.9213118553161621) < 1e-6
			
 
				+
			
 
				+    def test_normalize_rec_score_percent(self):
			
 
				+        assert abs(TextFiller._normalize_rec_score(92.5) - 0.925) < 1e-6
			
 
				+        assert TextFiller._normalize_rec_score(0.921) == 0.921
			
 
				+        assert TextFiller._normalize_rec_score(999) == 0.0
			
 
				+
			
 
				+    def test_pick_line_when_whole_score_invalid(self):
			
 
				+        f = TextFiller(ocr_engine=None, config={"second_pass_ocr": {}})
			
 
				+        t, s, strat = f._pick_line_vs_whole("/", 0.92, "146.0199.0146.0/", 999.0)
			
 
				+        assert t == "/"
			
 
				+        assert strat == "lines"
			
 
				+        assert abs(s - 0.92) < 1e-6
			
 
				+
			
 
				+
			
 
				 class TestPickBetterOcrResult:
			
 
				     def test_reject_invalid_pass2_score(self):
			
 
				         pass1 = {"final_text": "取款", "final_score": 0.99, "accepted": True}