浏览代码

feat(language): add FT LANG cache directory setup

myhloli 1 年之前
父节点
当前提交
57380cbed5
共有 2 个文件被更改,包括 12 次插入0 次删除
  1. 12 0
      magic_pdf/libs/language.py
  2. 二进制
      magic_pdf/resources/fasttext-langdetect/lid.176.ftz

+ 12 - 0
magic_pdf/libs/language.py

@@ -1,8 +1,19 @@
+import os
 import unicodedata
+
+if not os.getenv("FTLANG_CACHE"):
+    current_file_path = os.path.abspath(__file__)
+    current_dir = os.path.dirname(current_file_path)
+    root_dir = os.path.dirname(current_dir)
+    ftlang_cache_dir = os.path.join(root_dir, 'resources', 'fasttext-langdetect')
+    os.environ["FTLANG_CACHE"] = str(ftlang_cache_dir)
+    # print(os.getenv("FTLANG_CACHE"))
+
 from fast_langdetect import detect_language
 
 
 def detect_lang(text: str) -> str:
+
     if len(text) == 0:
         return ""
     try:
@@ -18,6 +29,7 @@ def detect_lang(text: str) -> str:
 
 
 if __name__ == '__main__':
+    print(os.getenv("FTLANG_CACHE"))
     print(detect_lang("This is a test."))
     print(detect_lang("<html>This is a test</html>"))
     print(detect_lang("这个是中文测试。"))

二进制
magic_pdf/resources/fasttext-langdetect/lid.176.ftz