|
|
@@ -138,6 +138,9 @@ def ocr_with_vlm(image_path, output_dir="./output",
|
|
|
if not generated_text:
|
|
|
raise Exception("模型没有生成文本内容")
|
|
|
|
|
|
+ # 去掉generated_text开始‘’‘markdown, 结尾’‘’标记
|
|
|
+ generated_text = re.sub(r"^```markdown\s*", "", generated_text)
|
|
|
+ generated_text = re.sub(r"\s*```$", "", generated_text)
|
|
|
# 标准化数字格式(如果启用)
|
|
|
original_text = generated_text
|
|
|
if normalize_numbers:
|