""" 配置文件加载/保存 """ import json from pathlib import Path def load_structure_from_config(config_path: Path) -> dict: """ 从配置文件加载表格结构 Args: config_path: 配置文件路径 Returns: 表格结构字典 """ with open(config_path, 'r', encoding='utf-8') as f: structure = json.load(f) # 兼容旧版配置(补充缺失字段) if 'horizontal_lines' not in structure: # 从 rows 生成横线坐标 horizontal_lines = [] for row in structure.get('rows', []): horizontal_lines.append(row['y_start']) if structure.get('rows'): horizontal_lines.append(structure['rows'][-1]['y_end']) structure['horizontal_lines'] = horizontal_lines if 'vertical_lines' not in structure: # 从 columns 生成竖线坐标 vertical_lines = [] for col in structure.get('columns', []): vertical_lines.append(col['x_start']) if structure.get('columns'): vertical_lines.append(structure['columns'][-1]['x_end']) structure['vertical_lines'] = vertical_lines # 转换修改标记(从列表转为集合) if 'modified_h_lines' in structure: structure['modified_h_lines'] = set(structure['modified_h_lines']) else: structure['modified_h_lines'] = set() if 'modified_v_lines' in structure: structure['modified_v_lines'] = set(structure['modified_v_lines']) else: structure['modified_v_lines'] = set() # 转换旧版的 modified_rows/modified_cols(如果存在) if 'modified_rows' in structure and not structure['modified_h_lines']: structure['modified_h_lines'] = set(structure.get('modified_rows', [])) if 'modified_cols' in structure and not structure['modified_v_lines']: structure['modified_v_lines'] = set(structure.get('modified_cols', [])) return structure def save_structure_to_config(structure: dict, output_path: Path): """ 保存表格结构到配置文件 Args: structure: 表格结构字典 output_path: 输出文件路径 """ save_data = { 'rows': structure['rows'], 'columns': structure['columns'], 'horizontal_lines': structure.get('horizontal_lines', []), 'vertical_lines': structure.get('vertical_lines', []), 'row_height': structure['row_height'], 'col_widths': structure['col_widths'], 'table_bbox': structure['table_bbox'], 'modified_h_lines': list(structure.get('modified_h_lines', set())), 'modified_v_lines': list(structure.get('modified_v_lines', set())) } with open(output_path, 'w', encoding='utf-8') as f: json.dump(save_data, f, indent=2, ensure_ascii=False)