|
|
@@ -181,6 +181,7 @@ class MinerUWiredTableRecognizer:
|
|
|
self,
|
|
|
table_image: np.ndarray,
|
|
|
ocr_boxes: List[Dict[str, Any]],
|
|
|
+ pdf_type: str = 'ocr', # 'ocr' 或 'txt'
|
|
|
debug_options: Optional[Dict[str, Any]] = None,
|
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
|
@@ -387,6 +388,7 @@ class MinerUWiredTableRecognizer:
|
|
|
texts = self.text_filler.second_pass_ocr_fill(
|
|
|
table_image, bboxes_merged, texts, scores,
|
|
|
need_reocr_indices=need_reocr_indices,
|
|
|
+ pdf_type=pdf_type,
|
|
|
force_all=False, # Force Per-Cell OCR
|
|
|
output_dir=output_dir
|
|
|
)
|
|
|
@@ -454,6 +456,7 @@ class MinerUWiredTableRecognizer:
|
|
|
self,
|
|
|
table_image: np.ndarray,
|
|
|
ocr_boxes: List[Dict[str, Any]],
|
|
|
+ pdf_type: str = 'ocr',
|
|
|
debug_options: Optional[Dict[str, Any]] = None,
|
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
|
@@ -470,7 +473,7 @@ class MinerUWiredTableRecognizer:
|
|
|
self.config,
|
|
|
override=debug_options or self.debug_options.__dict__
|
|
|
)
|
|
|
- return self.recognize_v4(table_image, ocr_boxes, debug_options=merged_debug_opts.__dict__)
|
|
|
+ return self.recognize_v4(table_image, ocr_boxes, pdf_type=pdf_type, debug_options=merged_debug_opts.__dict__)
|
|
|
except Exception:
|
|
|
# 回退
|
|
|
return self.recognize_legacy(table_image, ocr_boxes)
|