|
|
@@ -72,7 +72,7 @@ class TestShouldRunWholeFallback:
|
|
|
config={
|
|
|
"second_pass_ocr": {
|
|
|
"whole_cell_fallback": True,
|
|
|
- "enhance_retry": {"min_chars": 4},
|
|
|
+ "suspicious_short_min_chars": 4,
|
|
|
}
|
|
|
},
|
|
|
)
|
|
|
@@ -94,6 +94,61 @@ class TestShouldRunWholeFallback:
|
|
|
assert f._should_run_whole_fallback("", 0.0, cell, [], 0.9)
|
|
|
|
|
|
|
|
|
+class TestCellPreprocessConfig:
|
|
|
+ def test_suspicious_short_from_top_level(self):
|
|
|
+ f = TextFiller(
|
|
|
+ ocr_engine=None,
|
|
|
+ config={"second_pass_ocr": {"suspicious_short_min_chars": 6}},
|
|
|
+ )
|
|
|
+ assert f.second_pass_suspicious_short_min_chars == 6
|
|
|
+
|
|
|
+ def test_light_contrast_stage_when_enabled(self):
|
|
|
+ import numpy as np
|
|
|
+
|
|
|
+ f = TextFiller(
|
|
|
+ ocr_engine=None,
|
|
|
+ config={
|
|
|
+ "second_pass_ocr": {
|
|
|
+ "cell_preprocess": {
|
|
|
+ "watermark": {"enabled": True, "method": "threshold"},
|
|
|
+ "contrast": {
|
|
|
+ "enabled": True,
|
|
|
+ "method": "text_restore",
|
|
|
+ "text_black_target": 88,
|
|
|
+ },
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ )
|
|
|
+ cell = np.ones((40, 80, 3), dtype=np.uint8) * 200
|
|
|
+ _, stages = f._preprocess_cell_for_ocr(cell, mode="light")
|
|
|
+ assert "wm" in stages
|
|
|
+ assert "contrast" in stages
|
|
|
+
|
|
|
+
|
|
|
+class TestWholeCellParse:
|
|
|
+ def test_parse_det_rec_item_uses_rec_not_box(self):
|
|
|
+ item = [
|
|
|
+ [[146.0, 15.0], [199.0, 15.0], [199.0, 85.0], [146.0, 85.0]],
|
|
|
+ ("/", 0.9213118553161621),
|
|
|
+ ]
|
|
|
+ t, s = TextFiller._parse_det_rec_item(item)
|
|
|
+ assert t == "/"
|
|
|
+ assert abs(s - 0.9213118553161621) < 1e-6
|
|
|
+
|
|
|
+ def test_normalize_rec_score_percent(self):
|
|
|
+ assert abs(TextFiller._normalize_rec_score(92.5) - 0.925) < 1e-6
|
|
|
+ assert TextFiller._normalize_rec_score(0.921) == 0.921
|
|
|
+ assert TextFiller._normalize_rec_score(999) == 0.0
|
|
|
+
|
|
|
+ def test_pick_line_when_whole_score_invalid(self):
|
|
|
+ f = TextFiller(ocr_engine=None, config={"second_pass_ocr": {}})
|
|
|
+ t, s, strat = f._pick_line_vs_whole("/", 0.92, "146.0199.0146.0/", 999.0)
|
|
|
+ assert t == "/"
|
|
|
+ assert strat == "lines"
|
|
|
+ assert abs(s - 0.92) < 1e-6
|
|
|
+
|
|
|
+
|
|
|
class TestPickBetterOcrResult:
|
|
|
def test_reject_invalid_pass2_score(self):
|
|
|
pass1 = {"final_text": "取款", "final_score": 0.99, "accepted": True}
|