Ver código fonte

refactor: improve configuration handling and enhance environment variable support

myhloli 5 meses atrás
pai
commit
f407079bc3
2 arquivos alterados com 22 adições e 53 exclusões
  1. 1 30
      magic-pdf.template.json
  2. 21 23
      mineru/backend/pipeline/config_reader.py

+ 1 - 30
magic-pdf.template.json

@@ -3,23 +3,6 @@
         "bucket-name-1":["ak", "sk", "endpoint"],
         "bucket-name-2":["ak", "sk", "endpoint"]
     },
-    "models-dir":"/tmp/models",
-    "layoutreader-model-dir":"/tmp/layoutreader",
-    "device-mode":"cpu",
-    "layout-config": {
-        "model": "doclayout_yolo"
-    },
-    "formula-config": {
-        "mfd_model": "yolo_v8_mfd",
-        "mfr_model": "unimernet_small",
-        "enable": true
-    },
-    "table-config": {
-        "model": "rapid_table",
-        "sub_model": "slanet_plus",
-        "enable": true,
-        "max_time": 400
-    },
     "latex-delimiter-config": {
         "display": {
             "left": "$$",
@@ -31,18 +14,6 @@
         }
     },
     "llm-aided-config": {
-        "formula_aided": {
-            "api_key": "your_api_key",
-            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
-            "model": "qwen2.5-7b-instruct",
-            "enable": false
-        },
-        "text_aided": {
-            "api_key": "your_api_key",
-            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
-            "model": "qwen2.5-7b-instruct",
-            "enable": false
-        },
         "title_aided": {
             "api_key": "your_api_key",
             "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
@@ -50,5 +21,5 @@
             "enable": false
         }
     },
-    "config_version": "1.2.1"
+    "config_version": "1.2.2"
 }

+ 21 - 23
mineru/backend/pipeline/config_reader.py

@@ -16,11 +16,12 @@ def read_config():
         config_file = os.path.join(home_dir, CONFIG_FILE_NAME)
 
     if not os.path.exists(config_file):
-        raise FileNotFoundError(f'{config_file} not found')
-
-    with open(config_file, 'r', encoding='utf-8') as f:
-        config = json.load(f)
-    return config
+        logger.warning(f'{config_file} not found, using default configuration')
+        return None
+    else:
+        with open(config_file, 'r', encoding='utf-8') as f:
+            config = json.load(f)
+        return config
 
 
 def get_s3_config(bucket_name: str):
@@ -88,33 +89,30 @@ def get_local_layoutreader_model_dir():
 
 
 def get_device():
-    config = read_config()
-    device = config.get('device-mode')
-    if device is None:
-        logger.warning(f"'device-mode' not found in {CONFIG_FILE_NAME}, use 'cpu' as default")
-        return 'cpu'
+    device_mode = os.getenv('MINERU_DEVICE_MODE', None)
+    if device_mode is not None:
+        return device_mode
     else:
-        return device
+        logger.warning(f"not found 'MINERU_DEVICE_MODE' in environment variable, use 'cpu' as default.")
+        return 'cpu'
 
 
 def get_table_recog_config():
-    config = read_config()
-    table_config = config.get('table-config')
-    if table_config is None:
-        logger.warning(f"'table-config' not found in {CONFIG_FILE_NAME}, use 'False' as default")
-        return json.loads(f'{{"enable": true}}')
+    table_enable = os.getenv('MINERU_TABLE_ENABLE', None)
+    if table_enable is not None:
+        return json.loads(f'{{"enable": {table_enable}}}')
     else:
-        return table_config
+        logger.warning(f"not found 'MINERU_TABLE_ENABLE' in environment variable, use 'true' as default.")
+        return json.loads(f'{{"enable": true}}')
 
 
 def get_formula_config():
-    config = read_config()
-    formula_config = config.get('formula-config')
-    if formula_config is None:
-        logger.warning(f"'formula-config' not found in {CONFIG_FILE_NAME}, use 'True' as default")
-        return json.loads(f'{{"enable": true}}')
+    formula_enable = os.getenv('MINERU_FORMULA_ENABLE', None)
+    if formula_enable is not None:
+        return json.loads(f'{{"enable": {formula_enable}}}')
     else:
-        return formula_config
+        logger.warning(f"not found 'MINERU_FORMULA_ENABLE' in environment variable, use 'true' as default.")
+        return json.loads(f'{{"enable": true}}')
 
 
 def get_latex_delimiter_config():