浏览代码

fix(magic-pdf): add default values and improve warning logs for config optionsEnsure that 'temp-output-dir', 'models-dir', and 'device-mode' have sensible default
values in case they are not specified in the config file.

myhloli 1 年之前
父节点
当前提交
30ac6f227c
共有 3 个文件被更改,包括 20 次插入4 次删除
  1. 1 1
      magic_pdf/cli/magicpdf.py
  2. 18 3
      magic_pdf/libs/config_reader.py
  3. 1 0
      magic_pdf/model/pdf_extract_kit.py

+ 1 - 1
magic_pdf/cli/magicpdf.py

@@ -89,7 +89,6 @@ def do_parse(
     orig_model_list = copy.deepcopy(model_list)
 
     local_image_dir, local_md_dir = prepare_env(pdf_file_name, parse_method)
-    logger.info(f"local output dir is {local_md_dir}")
     image_writer, md_writer = DiskReaderWriter(local_image_dir), DiskReaderWriter(local_md_dir)
     image_dir = str(os.path.basename(local_image_dir))
 
@@ -163,6 +162,7 @@ def do_parse(
             path=f"{pdf_file_name}_content_list.json",
             mode=AbsReaderWriter.MODE_TXT,
         )
+    logger.info(f"local output dir is '{local_md_dir}', you can found the result in it.")
 
 
 @click.group()

+ 18 - 3
magic_pdf/libs/config_reader.py

@@ -56,17 +56,32 @@ def get_bucket_name(path):
 
 def get_local_dir():
     config = read_config()
-    return config.get("temp-output-dir", "/tmp")
+    local_dir = config.get("temp-output-dir")
+    if local_dir is None:
+        logger.warning("'temp-output-dir' not found in magic-pdf.json, use '/tmp' as default")
+        return "/tmp"
+    else:
+        return local_dir
 
 
 def get_local_models_dir():
     config = read_config()
-    return config.get("models-dir", "/tmp/models")
+    models_dir = config.get("models-dir")
+    if models_dir is None:
+        logger.warning("'models-dir' not found in magic-pdf.json, use '/tmp/models' as default")
+        return "/tmp/models"
+    else:
+        return models_dir
 
 
 def get_device():
     config = read_config()
-    return config.get("device-mode", "cpu")
+    device = config.get("device-mode")
+    if device is None:
+        logger.warning("'device-mode' not found in magic-pdf.json, use 'cpu' as default")
+        return "cpu"
+    else:
+        return device
 
 
 if __name__ == "__main__":

+ 1 - 0
magic_pdf/model/pdf_extract_kit.py

@@ -105,6 +105,7 @@ class CustomPEKModel:
         self.device = kwargs.get("device", self.configs["config"]["device"])
         logger.info("using device: {}".format(self.device))
         models_dir = kwargs.get("models_dir", os.path.join(root_dir, "resources", "models"))
+        logger.info("using models_dir: {}".format(models_dir))
 
         # 初始化公式识别
         if self.apply_formula: