| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- """
- 批量将表格结构应用到所有页
- """
- import json
- from pathlib import Path
- from table_line_generator import TableLineGenerator
- from PIL import Image
- from typing import List
- import argparse
- def batch_apply_table_structure(
- source_json_path: str,
- target_image_dir: str,
- output_dir: str,
- structure_config_path: str = None
- ):
- """
- 批量应用表格结构
-
- Args:
- source_json_path: 源OCR结果JSON路径(用于生成初始结构)
- target_image_dir: 目标图片目录
- output_dir: 输出目录
- structure_config_path: 表格结构配置路径(可选)
- """
- # 1. 加载或生成表格结构
- if structure_config_path and Path(structure_config_path).exists():
- # 加载已有配置
- with open(structure_config_path, 'r') as f:
- structure = json.load(f)
- print(f"📂 加载表格结构: {structure_config_path}")
- else:
- # 生成新配置
- with open(source_json_path, 'r') as f:
- ocr_data = json.load(f)
-
- source_image_path = Path(source_json_path).with_suffix('.jpg')
- generator = TableLineGenerator(str(source_image_path), ocr_data)
-
- structure_info = generator.analyze_table_structure()
- structure = generator.save_table_structure(
- f"{output_dir}/table_structure.json"
- )
- print(f"✅ 生成表格结构配置")
-
- # 2. 查找所有目标图片
- target_images = list(Path(target_image_dir).glob("*.jpg"))
- target_images.extend(list(Path(target_image_dir).glob("*.png")))
- target_images = sorted(target_images)
-
- print(f"📁 找到 {len(target_images)} 个图片文件")
-
- # 3. 批量应用
- output_path = Path(output_dir)
- output_path.mkdir(parents=True, exist_ok=True)
-
- results = []
- for image_path in target_images:
- try:
- # 创建临时生成器(用于应用结构)
- generator = TableLineGenerator(str(image_path), [])
- generator.rows = structure.get('rows', [])
- generator.columns = structure.get('columns', [])
- generator.row_height = structure.get('row_height', 30)
-
- # 应用结构
- output_file = output_path / f"{image_path.stem}_with_lines.jpg"
- generator.apply_structure_to_image(
- str(image_path),
- structure,
- str(output_file)
- )
-
- results.append({
- 'source': str(image_path),
- 'output': str(output_file),
- 'status': 'success'
- })
- print(f"✅ {image_path.name} → {output_file.name}")
-
- except Exception as e:
- results.append({
- 'source': str(image_path),
- 'status': 'error',
- 'error': str(e)
- })
- print(f"❌ {image_path.name} 失败: {e}")
-
- # 保存结果
- with open(output_path / "batch_results.json", 'w') as f:
- json.dump(results, f, indent=2, ensure_ascii=False)
-
- success_count = sum(1 for r in results if r['status'] == 'success')
- print(f"\n🎉 完成!成功: {success_count}/{len(results)}")
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="批量应用表格结构")
- parser.add_argument('-s', '--source', required=True, help="源OCR结果JSON路径")
- parser.add_argument('-t', '--target', required=True, help="目标图片目录")
- parser.add_argument('-o', '--output', required=True, help="输出目录")
- parser.add_argument('-c', '--config', help="表格结构配置路径")
-
- args = parser.parse_args()
-
- batch_apply_table_structure(
- args.source,
- args.target,
- args.output,
- args.config
- )
|