|
|
@@ -71,9 +71,25 @@ class VisualizationUtils:
|
|
|
|
|
|
# 错误
|
|
|
'error': (255, 0, 0), # 红色
|
|
|
+
|
|
|
+ # --- 通用工具颜色(非元素类别,供 module_debug_viz / ocr_validator 引用) ---
|
|
|
+
|
|
|
+ # OCR 文字框:亮蓝(白底/浅灰上比黄/红色易辨认)
|
|
|
+ 'ocr_box': (0, 0, 255),
|
|
|
+ # 印章 OCR 框:亮橙(独立管线,与 layout seal 颜色一致,审计时区分)
|
|
|
+ 'seal_ocr_box': (255, 140, 0),
|
|
|
+ # 表格单元格框:与 ocr_box 同色
|
|
|
+ 'cell_box': (0, 0, 255),
|
|
|
+ # 丢弃/废弃元素框
|
|
|
+ 'discard': (128, 128, 128),
|
|
|
}
|
|
|
|
|
|
- # OCR 框颜色(与 module_debug_viz.OCR_BOX_COLOR_BGR 一致:亮蓝 BGR→RGB)
|
|
|
+ @staticmethod
|
|
|
+ def rgb_to_bgr(rgb: tuple) -> tuple:
|
|
|
+ """RGB → BGR(供 OpenCV 模块使用)。"""
|
|
|
+ return tuple(rgb[i] for i in (2, 1, 0)) if len(rgb) >= 3 else rgb
|
|
|
+
|
|
|
+ # --- 向后兼容别名(推荐使用 COLOR_MAP['ocr_box'] 等) ---
|
|
|
OCR_BOX_COLOR = (0, 0, 255)
|
|
|
CELL_BOX_COLOR = (0, 0, 255)
|
|
|
DISCARD_COLOR = (128, 128, 128) # 灰色
|
|
|
@@ -242,18 +258,18 @@ class VisualizationUtils:
|
|
|
# 半透明填充
|
|
|
overlay = Image.new('RGBA', image.size, (255, 255, 255, 0))
|
|
|
overlay_draw = ImageDraw.Draw(overlay)
|
|
|
- overlay_draw.rectangle([x0, y0, x1, y1], fill=(*VisualizationUtils.DISCARD_COLOR, 30))
|
|
|
+ overlay_draw.rectangle([x0, y0, x1, y1], fill=(*VisualizationUtils.COLOR_MAP['discard'], 30))
|
|
|
image = Image.alpha_composite(image.convert('RGBA'), overlay).convert('RGB')
|
|
|
draw = ImageDraw.Draw(image)
|
|
|
|
|
|
# 灰色边框
|
|
|
- draw.rectangle([x0, y0, x1, y1], outline=VisualizationUtils.DISCARD_COLOR, width=1)
|
|
|
+ draw.rectangle([x0, y0, x1, y1], outline=VisualizationUtils.COLOR_MAP['discard'], width=1)
|
|
|
|
|
|
# 类型标签
|
|
|
if draw_type_label:
|
|
|
label = f"D:{original_category}"
|
|
|
bbox_label = draw.textbbox((x0 + 2, y0 + 2), label, font=font)
|
|
|
- draw.rectangle(bbox_label, fill=VisualizationUtils.DISCARD_COLOR)
|
|
|
+ draw.rectangle(bbox_label, fill=VisualizationUtils.COLOR_MAP['discard'])
|
|
|
draw.text((x0 + 2, y0 + 2), label, fill='white', font=font)
|
|
|
|
|
|
# 根据输入类型决定命名
|
|
|
@@ -276,117 +292,87 @@ class VisualizationUtils:
|
|
|
is_pdf: bool = True
|
|
|
) -> List[str]:
|
|
|
"""
|
|
|
- 保存 OCR 可视化图片
|
|
|
-
|
|
|
+ 保存 OCR 可视化图片(与 *_page_001.json 同源同构)。
|
|
|
+
|
|
|
+ 数据源为 JSONFormatters._element_to_cell_bbox_format 转换后的扁平格式
|
|
|
+ (与 save_page_jsons 输出的 JSON 一致);
|
|
|
+ 绘制样式与 debug/ocr_recognition 一致:亮蓝实线=有文字,虚线=仅框无字。
|
|
|
+
|
|
|
命名规则:
|
|
|
- PDF输入: 文件名_page_001_ocr.png
|
|
|
- 图片输入(单页): 文件名_ocr.png
|
|
|
-
|
|
|
- Args:
|
|
|
- results: 处理结果
|
|
|
- output_dir: 输出目录
|
|
|
- doc_name: 文档名称
|
|
|
- is_pdf: 是否为 PDF 输入
|
|
|
-
|
|
|
- Returns:
|
|
|
- 保存的图片路径列表
|
|
|
"""
|
|
|
+ from ocr_utils.json_formatters import JSONFormatters
|
|
|
+ from ocr_utils.module_debug_viz import draw_ocr_spans_cv2
|
|
|
+
|
|
|
ocr_paths = []
|
|
|
total_pages = len(results.get('pages', []))
|
|
|
-
|
|
|
+
|
|
|
for page in results.get('pages', []):
|
|
|
page_idx = page.get('page_idx', 0)
|
|
|
processed_image = page.get('original_image')
|
|
|
if processed_image is None:
|
|
|
processed_image = page.get('processed_image')
|
|
|
-
|
|
|
+
|
|
|
if processed_image is None:
|
|
|
logger.warning(f"Page {page_idx}: No image data found for OCR visualization")
|
|
|
continue
|
|
|
-
|
|
|
- if isinstance(processed_image, np.ndarray):
|
|
|
- image = Image.fromarray(processed_image).convert('RGB')
|
|
|
- elif isinstance(processed_image, Image.Image):
|
|
|
- image = processed_image.convert('RGB')
|
|
|
- else:
|
|
|
- continue
|
|
|
-
|
|
|
- draw = ImageDraw.Draw(image)
|
|
|
- font = VisualizationUtils._get_font(10)
|
|
|
-
|
|
|
- for element in page.get('elements', []):
|
|
|
- content = element.get('content', {})
|
|
|
-
|
|
|
- # OCR 文本框
|
|
|
- ocr_details = content.get('ocr_details', [])
|
|
|
- for ocr_item in ocr_details:
|
|
|
- ocr_bbox = ocr_item.get('bbox', [])
|
|
|
- if ocr_bbox:
|
|
|
- VisualizationUtils._draw_polygon(
|
|
|
- draw, ocr_bbox, VisualizationUtils.OCR_BOX_COLOR, width=1
|
|
|
- )
|
|
|
-
|
|
|
- # 表格单元格
|
|
|
- cells = content.get('cells', [])
|
|
|
- for cell in cells:
|
|
|
- cell_bbox = cell.get('bbox', [])
|
|
|
- if cell_bbox and len(cell_bbox) >= 4:
|
|
|
- x0, y0, x1, y1 = map(int, cell_bbox[:4])
|
|
|
- draw.rectangle(
|
|
|
- [x0, y0, x1, y1],
|
|
|
- outline=VisualizationUtils.CELL_BOX_COLOR,
|
|
|
- width=2
|
|
|
- )
|
|
|
-
|
|
|
- cell_text = cell.get('text', '')[:10]
|
|
|
- if cell_text:
|
|
|
- draw.text(
|
|
|
- (x0 + 2, y0 + 2),
|
|
|
- cell_text,
|
|
|
- fill=VisualizationUtils.CELL_BOX_COLOR,
|
|
|
- font=font
|
|
|
- )
|
|
|
-
|
|
|
- # OCR 框
|
|
|
- ocr_boxes = content.get('ocr_boxes', [])
|
|
|
- for ocr_box in ocr_boxes:
|
|
|
- bbox = ocr_box.get('bbox', [])
|
|
|
- if bbox:
|
|
|
- VisualizationUtils._draw_polygon(
|
|
|
- draw, bbox, VisualizationUtils.OCR_BOX_COLOR, width=1
|
|
|
- )
|
|
|
-
|
|
|
- # 绘制丢弃元素的 OCR 框
|
|
|
- for element in page.get('discarded_blocks', []):
|
|
|
- bbox = element.get('bbox', [0, 0, 0, 0])
|
|
|
- content = element.get('content', {})
|
|
|
-
|
|
|
- if len(bbox) >= 4:
|
|
|
- x0, y0, x1, y1 = map(int, bbox[:4])
|
|
|
- draw.rectangle(
|
|
|
- [x0, y0, x1, y1],
|
|
|
- outline=VisualizationUtils.DISCARD_COLOR,
|
|
|
- width=1
|
|
|
- )
|
|
|
-
|
|
|
- ocr_details = content.get('ocr_details', [])
|
|
|
- for ocr_item in ocr_details:
|
|
|
- ocr_bbox = ocr_item.get('bbox', [])
|
|
|
- if ocr_bbox:
|
|
|
- VisualizationUtils._draw_polygon(
|
|
|
- draw, ocr_bbox, VisualizationUtils.DISCARD_COLOR, width=1
|
|
|
- )
|
|
|
-
|
|
|
- # 根据输入类型决定命名
|
|
|
+
|
|
|
+ page_rotation_angle = float(page.get('angle', 0))
|
|
|
+
|
|
|
+ flat_elements = []
|
|
|
+ for element in (page.get('elements') or []):
|
|
|
+ converted = JSONFormatters._element_to_cell_bbox_format(
|
|
|
+ element, page_idx, page_rotation_angle
|
|
|
+ )
|
|
|
+ if converted:
|
|
|
+ flat_elements.append(converted)
|
|
|
+ for element in (page.get('discarded_blocks') or []):
|
|
|
+ converted = JSONFormatters._element_to_cell_bbox_format(
|
|
|
+ element, page_idx, page_rotation_angle
|
|
|
+ )
|
|
|
+ if converted:
|
|
|
+ flat_elements.append(converted)
|
|
|
+
|
|
|
+ spans = []
|
|
|
+ for elem in flat_elements:
|
|
|
+ bbox = elem.get('bbox', [])
|
|
|
+ if not bbox or len(bbox) < 4:
|
|
|
+ continue
|
|
|
+ elem_type = elem.get('type', '')
|
|
|
+ if 'table_cells' in elem:
|
|
|
+ for cell in elem['table_cells']:
|
|
|
+ cell_bbox = cell.get('bbox', [])
|
|
|
+ if cell_bbox and len(cell_bbox) >= 4:
|
|
|
+ spans.append({
|
|
|
+ 'bbox': cell_bbox[:4],
|
|
|
+ 'text': cell.get('text', '').strip(),
|
|
|
+ })
|
|
|
+ elif elem.get('text') is not None:
|
|
|
+ spans.append({
|
|
|
+ 'bbox': bbox[:4],
|
|
|
+ 'text': str(elem.get('text', '')).strip(),
|
|
|
+ 'category': 'seal' if elem_type == 'seal' else None,
|
|
|
+ })
|
|
|
+ else:
|
|
|
+ spans.append({
|
|
|
+ 'bbox': bbox[:4],
|
|
|
+ 'text': '',
|
|
|
+ })
|
|
|
+
|
|
|
+ vis_bgr = draw_ocr_spans_cv2(processed_image, spans)
|
|
|
+ vis_rgb = cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB)
|
|
|
+ image = Image.fromarray(vis_rgb)
|
|
|
+
|
|
|
if is_pdf or total_pages > 1:
|
|
|
ocr_path = output_dir / f"{doc_name}_page_{page_idx + 1:03d}_ocr.png"
|
|
|
else:
|
|
|
ocr_path = output_dir / f"{doc_name}_ocr.png"
|
|
|
-
|
|
|
+
|
|
|
image.save(ocr_path)
|
|
|
ocr_paths.append(str(ocr_path))
|
|
|
logger.info(f"🖼️ OCR image saved: {ocr_path}")
|
|
|
-
|
|
|
+
|
|
|
return ocr_paths
|
|
|
|
|
|
@staticmethod
|