Explorar o código

feat(markdown_generator): 添加印章类型支持,增强 Markdown 输出功能

zhch158_admin hai 1 semana
pai
achega
451b26652d
Modificáronse 2 ficheiros con 33 adicións e 0 borrados
  1. 20 0
      ocr_utils/json_formatters.py
  2. 13 0
      ocr_utils/markdown_generator.py

+ 20 - 0
ocr_utils/json_formatters.py

@@ -180,6 +180,20 @@ class JSONFormatters:
                     }]
                 }]
         
+        # 印章类型
+        elif elem_type == 'seal':
+            text = content.get('text', '') if isinstance(content, dict) else str(content)
+            confidence = content.get('confidence', 0.0) if isinstance(content, dict) else 0.0
+            block['lines'] = [{
+                'bbox': bbox,
+                'spans': [{
+                    'bbox': bbox,
+                    'type': 'seal',
+                    'content': text,
+                    'confidence': confidence
+                }]
+            }]
+        
         # 丢弃类型
         elif elem_type in ['abandon', 'discarded']:
             block['type'] = 'abandon'
@@ -361,6 +375,12 @@ class JSONFormatters:
             result['type'] = elem_type
             result['text'] = content.get('text', '') if isinstance(content, dict) else str(content)
         
+        # 印章类型
+        elif elem_type == 'seal':
+            result['type'] = 'seal'
+            result['text'] = content.get('text', '') if isinstance(content, dict) else str(content)
+            result['confidence'] = content.get('confidence', 0.0) if isinstance(content, dict) else 0.0
+        
         # 丢弃元素
         elif elem_type in ['discarded', 'abandon']:
             result['type'] = 'discarded'

+ 13 - 0
ocr_utils/markdown_generator.py

@@ -276,6 +276,12 @@ pages: {len(results.get('pages', []))}
                         else:
                             md_lines.append(f"*{text}*")
                         md_lines.append("")
+                
+                elif elem_type == 'seal':
+                    text = content.get('text', '') if isinstance(content, dict) else str(content)
+                    if text:
+                        md_lines.append(f"🔖 **[印章]** {text}")
+                        md_lines.append("")
         
         return '\n'.join(md_lines)
     
@@ -371,6 +377,13 @@ pages: {len(results.get('pages', []))}
                         md_lines.append(f"*{text}*")
                     md_lines.append("")
             
+            elif elem_type == 'seal':
+                text = content.get('text', '') if isinstance(content, dict) else str(content)
+                if text:
+                    confidence = content.get('confidence', 0.0) if isinstance(content, dict) else 0.0
+                    md_lines.append(f"🔖 **[印章]** {text} _(置信度: {confidence:.2f})_")
+                    md_lines.append("")
+            
             elif elem_type == 'discarded':
                 text = content.get('text', '') if isinstance(content, dict) else ''
                 if text: