|
|
@@ -201,7 +201,7 @@ def convert_api_result_to_json(api_result: Dict[str, Any],
|
|
|
for item in converted_json.get('parsing_res_list', []):
|
|
|
if 'block_content' in item:
|
|
|
original_content = item['block_content']
|
|
|
-
|
|
|
+ normalized_content = original_content
|
|
|
# 根据block_label类型选择标准化方法
|
|
|
if item.get('block_label') == 'table':
|
|
|
normalized_content = normalize_markdown_table(original_content)
|
|
|
@@ -243,8 +243,8 @@ def convert_api_result_to_json(api_result: Dict[str, Any],
|
|
|
"character_changes_count": changes_count
|
|
|
}
|
|
|
|
|
|
- if changes_count > 0:
|
|
|
- print(f"🔧 已标准化 {changes_count} 个字符(全角→半角)")
|
|
|
+ # if changes_count > 0:
|
|
|
+ # print(f"🔧 已标准化 {changes_count} 个字符(全角→半角)")
|
|
|
else:
|
|
|
converted_json['processing_info'] = {
|
|
|
"normalize_numbers": False,
|
|
|
@@ -291,8 +291,8 @@ def save_markdown_content(api_result: Dict[str, Any], output_dir: str,
|
|
|
markdown_text = normalize_markdown_table(markdown_text)
|
|
|
|
|
|
changes_count = len([1 for o, n in zip(original_markdown_text, markdown_text) if o != n])
|
|
|
- if changes_count > 0:
|
|
|
- print(f"🔧 Markdown中已标准化 {changes_count} 个字符(全角→半角)")
|
|
|
+ # if changes_count > 0:
|
|
|
+ # print(f"🔧 Markdown中已标准化 {changes_count} 个字符(全角→半角)")
|
|
|
|
|
|
md_file_path = output_path / f"{filename}.md"
|
|
|
with open(md_file_path, 'w', encoding='utf-8') as f:
|
|
|
@@ -366,9 +366,6 @@ def process_images_via_api(image_paths: List[str],
|
|
|
output_path = Path(output_dir)
|
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
- print(f"🚀 Using API: {api_url}")
|
|
|
- print(f"🔧 数字标准化: {'启用' if normalize_numbers else '禁用'}")
|
|
|
-
|
|
|
all_results = []
|
|
|
total_images = len(image_paths)
|
|
|
|
|
|
@@ -434,6 +431,8 @@ def process_images_via_api(image_paths: List[str],
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Error processing {Path(img_path).name}: {e}", file=sys.stderr)
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
|
|
|
# 添加错误结果
|
|
|
all_results.append({
|
|
|
@@ -485,6 +484,7 @@ def main():
|
|
|
print(f"Test mode: processing only {len(input_files)} images")
|
|
|
|
|
|
print(f"🌐 Using API: {args.api_url}")
|
|
|
+ print(f"🔧 数字标准化: {'启用' if normalize_numbers else '禁用'}")
|
|
|
print(f"⏱️ Timeout: {args.timeout} seconds")
|
|
|
|
|
|
# 开始处理
|