config_loader.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. """
  2. 配置文件加载/保存
  3. """
  4. import json
  5. from pathlib import Path
  6. def load_structure_from_config(config_path: Path) -> dict:
  7. """
  8. 从配置文件加载表格结构
  9. Args:
  10. config_path: 配置文件路径
  11. Returns:
  12. 表格结构字典
  13. """
  14. with open(config_path, 'r', encoding='utf-8') as f:
  15. structure = json.load(f)
  16. # 兼容旧版配置(补充缺失字段)
  17. if 'horizontal_lines' not in structure:
  18. # 从 rows 生成横线坐标
  19. horizontal_lines = []
  20. for row in structure.get('rows', []):
  21. horizontal_lines.append(row['y_start'])
  22. if structure.get('rows'):
  23. horizontal_lines.append(structure['rows'][-1]['y_end'])
  24. structure['horizontal_lines'] = horizontal_lines
  25. if 'vertical_lines' not in structure:
  26. # 从 columns 生成竖线坐标
  27. vertical_lines = []
  28. for col in structure.get('columns', []):
  29. vertical_lines.append(col['x_start'])
  30. if structure.get('columns'):
  31. vertical_lines.append(structure['columns'][-1]['x_end'])
  32. structure['vertical_lines'] = vertical_lines
  33. # 转换修改标记(从列表转为集合)
  34. if 'modified_h_lines' in structure:
  35. structure['modified_h_lines'] = set(structure['modified_h_lines'])
  36. else:
  37. structure['modified_h_lines'] = set()
  38. if 'modified_v_lines' in structure:
  39. structure['modified_v_lines'] = set(structure['modified_v_lines'])
  40. else:
  41. structure['modified_v_lines'] = set()
  42. # 转换旧版的 modified_rows/modified_cols(如果存在)
  43. if 'modified_rows' in structure and not structure['modified_h_lines']:
  44. structure['modified_h_lines'] = set(structure.get('modified_rows', []))
  45. if 'modified_cols' in structure and not structure['modified_v_lines']:
  46. structure['modified_v_lines'] = set(structure.get('modified_cols', []))
  47. return structure
  48. def save_structure_to_config(structure: dict, output_path: Path):
  49. """
  50. 保存表格结构到配置文件
  51. Args:
  52. structure: 表格结构字典
  53. output_path: 输出文件路径
  54. """
  55. save_data = {
  56. 'rows': structure['rows'],
  57. 'columns': structure['columns'],
  58. 'horizontal_lines': structure.get('horizontal_lines', []),
  59. 'vertical_lines': structure.get('vertical_lines', []),
  60. 'row_height': structure['row_height'],
  61. 'col_widths': structure['col_widths'],
  62. 'table_bbox': structure['table_bbox'],
  63. 'modified_h_lines': list(structure.get('modified_h_lines', set())),
  64. 'modified_v_lines': list(structure.get('modified_v_lines', set()))
  65. }
  66. with open(output_path, 'w', encoding='utf-8') as f:
  67. json.dump(save_data, f, indent=2, ensure_ascii=False)