config_reader.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. # Copyright (c) Opendatalab. All rights reserved.
  2. import json
  3. import os
  4. from loguru import logger
  5. # 定义配置文件名常量
  6. CONFIG_FILE_NAME = os.getenv('MINERU_TOOLS_CONFIG_JSON', 'magic-pdf.json')
  7. def read_config():
  8. if os.path.isabs(CONFIG_FILE_NAME):
  9. config_file = CONFIG_FILE_NAME
  10. else:
  11. home_dir = os.path.expanduser('~')
  12. config_file = os.path.join(home_dir, CONFIG_FILE_NAME)
  13. if not os.path.exists(config_file):
  14. raise FileNotFoundError(f'{config_file} not found')
  15. with open(config_file, 'r', encoding='utf-8') as f:
  16. config = json.load(f)
  17. return config
  18. def get_s3_config(bucket_name: str):
  19. """~/magic-pdf.json 读出来."""
  20. config = read_config()
  21. bucket_info = config.get('bucket_info')
  22. if bucket_name not in bucket_info:
  23. access_key, secret_key, storage_endpoint = bucket_info['[default]']
  24. else:
  25. access_key, secret_key, storage_endpoint = bucket_info[bucket_name]
  26. if access_key is None or secret_key is None or storage_endpoint is None:
  27. raise Exception(f'ak, sk or endpoint not found in {CONFIG_FILE_NAME}')
  28. # logger.info(f"get_s3_config: ak={access_key}, sk={secret_key}, endpoint={storage_endpoint}")
  29. return access_key, secret_key, storage_endpoint
  30. def get_s3_config_dict(path: str):
  31. access_key, secret_key, storage_endpoint = get_s3_config(get_bucket_name(path))
  32. return {'ak': access_key, 'sk': secret_key, 'endpoint': storage_endpoint}
  33. def get_bucket_name(path):
  34. bucket, key = parse_bucket_key(path)
  35. return bucket
  36. def parse_bucket_key(s3_full_path: str):
  37. """
  38. 输入 s3://bucket/path/to/my/file.txt
  39. 输出 bucket, path/to/my/file.txt
  40. """
  41. s3_full_path = s3_full_path.strip()
  42. if s3_full_path.startswith("s3://"):
  43. s3_full_path = s3_full_path[5:]
  44. if s3_full_path.startswith("/"):
  45. s3_full_path = s3_full_path[1:]
  46. bucket, key = s3_full_path.split("/", 1)
  47. return bucket, key
  48. def get_local_models_dir():
  49. config = read_config()
  50. models_dir = config.get('models-dir')
  51. if models_dir is None:
  52. logger.warning(f"'models-dir' not found in {CONFIG_FILE_NAME}, use '/tmp/models' as default")
  53. return '/tmp/models'
  54. else:
  55. return models_dir
  56. def get_local_layoutreader_model_dir():
  57. config = read_config()
  58. layoutreader_model_dir = config.get('layoutreader-model-dir')
  59. if layoutreader_model_dir is None or not os.path.exists(layoutreader_model_dir):
  60. home_dir = os.path.expanduser('~')
  61. layoutreader_at_modelscope_dir_path = os.path.join(home_dir, '.cache/modelscope/hub/ppaanngggg/layoutreader')
  62. logger.warning(f"'layoutreader-model-dir' not exists, use {layoutreader_at_modelscope_dir_path} as default")
  63. return layoutreader_at_modelscope_dir_path
  64. else:
  65. return layoutreader_model_dir
  66. def get_device():
  67. config = read_config()
  68. device = config.get('device-mode')
  69. if device is None:
  70. logger.warning(f"'device-mode' not found in {CONFIG_FILE_NAME}, use 'cpu' as default")
  71. return 'cpu'
  72. else:
  73. return device
  74. def get_table_recog_config():
  75. config = read_config()
  76. table_config = config.get('table-config')
  77. if table_config is None:
  78. logger.warning(f"'table-config' not found in {CONFIG_FILE_NAME}, use 'False' as default")
  79. return json.loads(f'{{"enable": true}}')
  80. else:
  81. return table_config
  82. def get_formula_config():
  83. config = read_config()
  84. formula_config = config.get('formula-config')
  85. if formula_config is None:
  86. logger.warning(f"'formula-config' not found in {CONFIG_FILE_NAME}, use 'True' as default")
  87. return json.loads(f'{{"enable": true}}')
  88. else:
  89. return formula_config
  90. def get_latex_delimiter_config():
  91. config = read_config()
  92. latex_delimiter_config = config.get('latex-delimiter-config')
  93. if latex_delimiter_config is None:
  94. logger.warning(f"'latex-delimiter-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
  95. return None
  96. else:
  97. return latex_delimiter_config
  98. def get_llm_aided_config():
  99. config = read_config()
  100. llm_aided_config = config.get('llm-aided-config')
  101. if llm_aided_config is None:
  102. logger.warning(f"'llm-aided-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
  103. return None
  104. else:
  105. return llm_aided_config