""" 批量将表格结构应用到所有页 """ import json from pathlib import Path from table_line_generator import TableLineGenerator from PIL import Image from typing import List import argparse def batch_apply_table_structure( source_json_path: str, target_image_dir: str, output_dir: str, structure_config_path: str = None ): """ 批量应用表格结构 Args: source_json_path: 源OCR结果JSON路径(用于生成初始结构) target_image_dir: 目标图片目录 output_dir: 输出目录 structure_config_path: 表格结构配置路径(可选) """ # 1. 加载或生成表格结构 if structure_config_path and Path(structure_config_path).exists(): # 加载已有配置 with open(structure_config_path, 'r') as f: structure = json.load(f) print(f"📂 加载表格结构: {structure_config_path}") else: # 生成新配置 with open(source_json_path, 'r') as f: ocr_data = json.load(f) source_image_path = Path(source_json_path).with_suffix('.jpg') generator = TableLineGenerator(str(source_image_path), ocr_data) structure_info = generator.analyze_table_structure() structure = generator.save_table_structure( f"{output_dir}/table_structure.json" ) print(f"✅ 生成表格结构配置") # 2. 查找所有目标图片 target_images = list(Path(target_image_dir).glob("*.jpg")) target_images.extend(list(Path(target_image_dir).glob("*.png"))) target_images = sorted(target_images) print(f"📁 找到 {len(target_images)} 个图片文件") # 3. 批量应用 output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) results = [] for image_path in target_images: try: # 创建临时生成器(用于应用结构) generator = TableLineGenerator(str(image_path), []) generator.rows = structure.get('rows', []) generator.columns = structure.get('columns', []) generator.row_height = structure.get('row_height', 30) # 应用结构 output_file = output_path / f"{image_path.stem}_with_lines.jpg" generator.apply_structure_to_image( str(image_path), structure, str(output_file) ) results.append({ 'source': str(image_path), 'output': str(output_file), 'status': 'success' }) print(f"✅ {image_path.name} → {output_file.name}") except Exception as e: results.append({ 'source': str(image_path), 'status': 'error', 'error': str(e) }) print(f"❌ {image_path.name} 失败: {e}") # 保存结果 with open(output_path / "batch_results.json", 'w') as f: json.dump(results, f, indent=2, ensure_ascii=False) success_count = sum(1 for r in results if r['status'] == 'success') print(f"\n🎉 完成!成功: {success_count}/{len(results)}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="批量应用表格结构") parser.add_argument('-s', '--source', required=True, help="源OCR结果JSON路径") parser.add_argument('-t', '--target', required=True, help="目标图片目录") parser.add_argument('-o', '--output', required=True, help="输出目录") parser.add_argument('-c', '--config', help="表格结构配置路径") args = parser.parse_args() batch_apply_table_structure( args.source, args.target, args.output, args.config )