|
@@ -126,3 +126,72 @@ class TestStripFallbackHeuristic:
|
|
|
text, score = TextFiller._parse_single_rec_item(("", 1.0))
|
|
text, score = TextFiller._parse_single_rec_item(("", 1.0))
|
|
|
assert text == ""
|
|
assert text == ""
|
|
|
assert score == 0.0
|
|
assert score == 0.0
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class TestResolveCellMatchedBoxes:
|
|
|
|
|
+ """空大框套小框:避免仅用碎片字触发高置信填格。"""
|
|
|
|
|
+
|
|
|
|
|
+ def _matched_entry(self, text, bbox, score=1.0):
|
|
|
|
|
+ return (
|
|
|
|
|
+ text,
|
|
|
|
|
+ bbox[1],
|
|
|
|
|
+ bbox[0],
|
|
|
|
|
+ 1.0,
|
|
|
|
|
+ score,
|
|
|
|
|
+ {
|
|
|
|
|
+ "bbox": bbox,
|
|
|
|
|
+ "original_bbox": bbox,
|
|
|
|
|
+ "text": text,
|
|
|
|
|
+ "confidence": score,
|
|
|
|
|
+ },
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ def test_empty_outer_with_inner_char_forces_zero_score(self):
|
|
|
|
|
+ # 类似 cell 196:大空框 [877,1593,1084,1671] + 小「部」[966,1644,998,1676]
|
|
|
|
|
+ outer = [877.0, 1593.0, 1084.0, 1671.0]
|
|
|
|
|
+ inner = [966.0, 1644.0, 998.0, 1676.0]
|
|
|
|
|
+ matched = [
|
|
|
|
|
+ self._matched_entry("", outer, 1.0),
|
|
|
|
|
+ self._matched_entry("部", inner, 0.994),
|
|
|
|
|
+ ]
|
|
|
|
|
+ resolved, force_zero = TextFiller._resolve_cell_matched_boxes(matched)
|
|
|
|
|
+ assert force_zero is True
|
|
|
|
|
+ assert len(resolved) == 1
|
|
|
|
|
+ assert resolved[0][0] == ""
|
|
|
|
|
+ text = "".join(t for t, *_ in resolved)
|
|
|
|
|
+ assert text == ""
|
|
|
|
|
+
|
|
|
|
|
+ def test_outer_with_text_drops_inner_fragment(self):
|
|
|
|
|
+ outer = [100.0, 100.0, 400.0, 150.0]
|
|
|
|
|
+ inner = [350.0, 110.0, 380.0, 140.0]
|
|
|
|
|
+ matched = [
|
|
|
|
|
+ self._matched_entry("广东兴宁农村商业银", outer, 0.99),
|
|
|
|
|
+ self._matched_entry("行", inner, 0.95),
|
|
|
|
|
+ ]
|
|
|
|
|
+ resolved, force_zero = TextFiller._resolve_cell_matched_boxes(matched)
|
|
|
|
|
+ assert force_zero is False
|
|
|
|
|
+ assert len(resolved) == 1
|
|
|
|
|
+ assert resolved[0][0] == "广东兴宁农村商业银"
|
|
|
|
|
+
|
|
|
|
|
+ def test_fill_by_center_point_empty_container(self):
|
|
|
|
|
+ filler = TextFiller(ocr_engine=None, config={})
|
|
|
|
|
+ cell = [900.0, 1580.0, 1100.0, 1680.0]
|
|
|
|
|
+ ocr_boxes = [
|
|
|
|
|
+ {
|
|
|
|
|
+ "bbox": [877.0, 1593.0, 1084.0, 1671.0],
|
|
|
|
|
+ "text": "",
|
|
|
|
|
+ "confidence": 1.0,
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "bbox": [966.0, 1644.0, 998.0, 1676.0],
|
|
|
|
|
+ "text": "部",
|
|
|
|
|
+ "confidence": 0.994,
|
|
|
|
|
+ },
|
|
|
|
|
+ ]
|
|
|
|
|
+ texts, scores, matched_boxes, _ = filler.fill_text_by_center_point(
|
|
|
|
|
+ [cell], ocr_boxes
|
|
|
|
|
+ )
|
|
|
|
|
+ assert texts[0] == ""
|
|
|
|
|
+ assert scores[0] == 0.0
|
|
|
|
|
+ assert len(matched_boxes[0]) == 1
|
|
|
|
|
+ assert matched_boxes[0][0].get("text", "") == ""
|