|
|
@@ -19,6 +19,17 @@ from PIL import Image
|
|
|
MODULE_DEBUG_ROOT = "debug"
|
|
|
|
|
|
|
|
|
+def _json_default(o: Any):
|
|
|
+ """json.dumps 的兜底序列化:处理 numpy 标量/数组(如 OCR confidence 的 float32)。"""
|
|
|
+ if isinstance(o, np.generic):
|
|
|
+ return o.item()
|
|
|
+ if isinstance(o, np.ndarray):
|
|
|
+ return o.tolist()
|
|
|
+ if isinstance(o, (set, tuple)):
|
|
|
+ return list(o)
|
|
|
+ raise TypeError(f"Object of type {o.__class__.__name__} is not JSON serializable")
|
|
|
+
|
|
|
+
|
|
|
def resolve_module_debug_dir(
|
|
|
output_dir: Union[str, Path],
|
|
|
subdir: str,
|
|
|
@@ -286,7 +297,7 @@ def save_layout_debug(
|
|
|
}
|
|
|
json_path = debug_dir / f'{page_name}_layout_{suffix}.json'
|
|
|
json_path.write_text(
|
|
|
- json.dumps(json_data, ensure_ascii=False, indent=2),
|
|
|
+ json.dumps(json_data, ensure_ascii=False, indent=2, default=_json_default),
|
|
|
encoding='utf-8',
|
|
|
)
|
|
|
paths['json'] = str(json_path)
|
|
|
@@ -334,7 +345,7 @@ def save_ocr_debug(
|
|
|
}
|
|
|
json_path = debug_dir / f'{page_name}_ocr_spans.json'
|
|
|
json_path.write_text(
|
|
|
- json.dumps(json_data, ensure_ascii=False, indent=2),
|
|
|
+ json.dumps(json_data, ensure_ascii=False, indent=2, default=_json_default),
|
|
|
encoding='utf-8',
|
|
|
)
|
|
|
paths['json'] = str(json_path)
|