|
|
@@ -65,6 +65,43 @@ class TestPickLineVsWhole:
|
|
|
assert strat == "whole"
|
|
|
|
|
|
|
|
|
+class TestShouldRunWholeFallback:
|
|
|
+ def _filler(self) -> TextFiller:
|
|
|
+ return TextFiller(
|
|
|
+ ocr_engine=None,
|
|
|
+ config={
|
|
|
+ "second_pass_ocr": {
|
|
|
+ "whole_cell_fallback": True,
|
|
|
+ "enhance_retry": {"min_chars": 4},
|
|
|
+ }
|
|
|
+ },
|
|
|
+ )
|
|
|
+
|
|
|
+ def test_high_score_short_text_triggers_whole(self):
|
|
|
+ f = self._filler()
|
|
|
+ import numpy as np
|
|
|
+
|
|
|
+ cell = np.ones((40, 120, 3), dtype=np.uint8) * 255
|
|
|
+ assert f._should_run_whole_fallback(
|
|
|
+ "取款", 0.99, cell, [("取款", 0.99)], 0.9
|
|
|
+ )
|
|
|
+
|
|
|
+ def test_empty_line_triggers_whole(self):
|
|
|
+ f = self._filler()
|
|
|
+ import numpy as np
|
|
|
+
|
|
|
+ cell = np.ones((40, 80, 3), dtype=np.uint8) * 255
|
|
|
+ assert f._should_run_whole_fallback("", 0.0, cell, [], 0.9)
|
|
|
+
|
|
|
+
|
|
|
+class TestPickBetterOcrResult:
|
|
|
+ def test_reject_invalid_pass2_score(self):
|
|
|
+ pass1 = {"final_text": "取款", "final_score": 0.99, "accepted": True}
|
|
|
+ pass2 = {"final_text": "14.089", "final_score": 44.5, "accepted": False}
|
|
|
+ chosen = TextFiller._pick_better_ocr_result(pass1, pass2)
|
|
|
+ assert chosen is pass1
|
|
|
+
|
|
|
+
|
|
|
class TestSanitizeDebugFilename:
|
|
|
def test_illegal_chars(self):
|
|
|
assert TextFiller.sanitize_debug_filename("a/b:c") == "a_b_c"
|
|
|
@@ -128,6 +165,42 @@ class TestStripFallbackHeuristic:
|
|
|
assert score == 0.0
|
|
|
|
|
|
|
|
|
+class TestBankStatementReocrTrigger:
|
|
|
+ def _filler(self) -> TextFiller:
|
|
|
+ return TextFiller(
|
|
|
+ ocr_engine=None,
|
|
|
+ config={
|
|
|
+ "second_pass_ocr": {
|
|
|
+ "reocr_mode": "bank_statement",
|
|
|
+ "header_row": 0,
|
|
|
+ "row_peer_min_nonempty": 3,
|
|
|
+ }
|
|
|
+ },
|
|
|
+ )
|
|
|
+
|
|
|
+ def test_body_row_empty_triggers(self):
|
|
|
+ f = self._filler()
|
|
|
+ merged = [
|
|
|
+ {"row": 0, "col": 0, "bbox": [0, 0, 10, 10]},
|
|
|
+ {"row": 1, "col": 0, "bbox": [0, 10, 10, 20]},
|
|
|
+ ]
|
|
|
+ texts = ["header", ""]
|
|
|
+ scores = [0.99, 0.0]
|
|
|
+ ok, reasons = f._should_second_pass_cell(
|
|
|
+ 1, texts, scores, [], merged, "ocr", False, 0
|
|
|
+ )
|
|
|
+ assert ok is True
|
|
|
+ assert "body_row_empty" in reasons
|
|
|
+
|
|
|
+ def test_header_empty_not_body_row_forced(self):
|
|
|
+ f = self._filler()
|
|
|
+ merged = [{"row": 0, "col": 0, "bbox": [0, 0, 10, 10]}]
|
|
|
+ ok, reasons = f._should_second_pass_cell(
|
|
|
+ 0, [""], [0.99], [], merged, "ocr", False, 0
|
|
|
+ )
|
|
|
+ assert "body_row_empty" not in reasons
|
|
|
+
|
|
|
+
|
|
|
class TestResolveCellMatchedBoxes:
|
|
|
"""空大框套小框:避免仅用碎片字触发高置信填格。"""
|
|
|
|