Parcourir la source

feat: 优化API结果处理,添加标准化内容的调试信息,移除冗余打印

zhch158_admin il y a 1 mois
Parent
commit
e75951928f
1 fichiers modifiés avec 8 ajouts et 8 suppressions
  1. 8 8
      zhch/ppstructurev3_single_client.py

+ 8 - 8
zhch/ppstructurev3_single_client.py

@@ -201,7 +201,7 @@ def convert_api_result_to_json(api_result: Dict[str, Any],
         for item in converted_json.get('parsing_res_list', []):
             if 'block_content' in item:
                 original_content = item['block_content']
-                
+                normalized_content = original_content
                 # 根据block_label类型选择标准化方法
                 if item.get('block_label') == 'table':
                     normalized_content = normalize_markdown_table(original_content)
@@ -243,8 +243,8 @@ def convert_api_result_to_json(api_result: Dict[str, Any],
             "character_changes_count": changes_count
         }
         
-        if changes_count > 0:
-            print(f"🔧 已标准化 {changes_count} 个字符(全角→半角)")
+        # if changes_count > 0:
+        #     print(f"🔧 已标准化 {changes_count} 个字符(全角→半角)")
     else:
         converted_json['processing_info'] = {
             "normalize_numbers": False,
@@ -291,8 +291,8 @@ def save_markdown_content(api_result: Dict[str, Any], output_dir: str,
         markdown_text = normalize_markdown_table(markdown_text)
         
         changes_count = len([1 for o, n in zip(original_markdown_text, markdown_text) if o != n])
-        if changes_count > 0:
-            print(f"🔧 Markdown中已标准化 {changes_count} 个字符(全角→半角)")
+        # if changes_count > 0:
+        #     print(f"🔧 Markdown中已标准化 {changes_count} 个字符(全角→半角)")
     
     md_file_path = output_path / f"{filename}.md"
     with open(md_file_path, 'w', encoding='utf-8') as f:
@@ -366,9 +366,6 @@ def process_images_via_api(image_paths: List[str],
     output_path = Path(output_dir)
     output_path.mkdir(parents=True, exist_ok=True)
     
-    print(f"🚀 Using API: {api_url}")
-    print(f"🔧 数字标准化: {'启用' if normalize_numbers else '禁用'}")
-    
     all_results = []
     total_images = len(image_paths)
     
@@ -434,6 +431,8 @@ def process_images_via_api(image_paths: List[str],
                 
             except Exception as e:
                 print(f"Error processing {Path(img_path).name}: {e}", file=sys.stderr)
+                import traceback
+                traceback.print_exc()
                 
                 # 添加错误结果
                 all_results.append({
@@ -485,6 +484,7 @@ def main():
             print(f"Test mode: processing only {len(input_files)} images")
         
         print(f"🌐 Using API: {args.api_url}")
+        print(f"🔧 数字标准化: {'启用' if normalize_numbers else '禁用'}")
         print(f"⏱️ Timeout: {args.timeout} seconds")
         
         # 开始处理