Procházet zdrojové kódy

add version_name to middle json

赵小蒙 před 1 rokem
rodič
revize
bd1834284e
3 změnil soubory, kde provedl 26 přidání a 15 odebrání
  1. 15 0
      magic_pdf/libs/commons.py
  2. 7 1
      magic_pdf/user_api.py
  3. 4 14
      setup.py

+ 15 - 0
magic_pdf/libs/commons.py

@@ -1,6 +1,7 @@
 import datetime
 import json
 import os, re, configparser
+import subprocess
 import time
 
 import boto3
@@ -11,6 +12,20 @@ from botocore.config import Config
 import fitz # 1.23.9中已经切换到rebase
 # import fitz_old as fitz  # 使用1.23.9之前的pymupdf库
 
+
+def get_version():
+    command = ["git", "describe", "--tags"]
+    try:
+        version = subprocess.check_output(command).decode().strip()
+        version_parts = version.split("-")
+        if len(version_parts) > 1 and version_parts[0].startswith("magic_pdf"):
+            return version_parts[1]
+        else:
+            raise ValueError(f"Invalid version tag {version}. Expected format is magic_pdf-<version>-released.")
+    except Exception as e:
+        print(e)
+        return "0.0.0"
+
 def get_delta_time(input_time):
     return round(time.time() - input_time, 2)
 

+ 7 - 1
magic_pdf/user_api.py

@@ -16,11 +16,11 @@ import re
 
 from loguru import logger
 
+from magic_pdf.libs.commons import get_version
 from magic_pdf.rw import AbsReaderWriter
 from magic_pdf.pdf_parse_by_ocr_v2 import parse_pdf_by_ocr
 from magic_pdf.pdf_parse_by_txt_v2 import parse_pdf_by_txt
 
-
 PARSE_TYPE_TXT = "txt"
 PARSE_TYPE_OCR = "ocr"
 
@@ -39,6 +39,8 @@ def parse_txt_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit
 
     pdf_info_dict["_parse_type"] = PARSE_TYPE_TXT
 
+    pdf_info_dict["_version_name"] = get_version()
+
     return pdf_info_dict
 
 
@@ -57,6 +59,8 @@ def parse_ocr_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit
 
     pdf_info_dict["_parse_type"] = PARSE_TYPE_OCR
 
+    pdf_info_dict["_version_name"] = get_version()
+
     return pdf_info_dict
 
 
@@ -118,4 +122,6 @@ def parse_union_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWr
     else:
         pdf_info_dict["_parse_type"] = PARSE_TYPE_TXT
 
+    pdf_info_dict["_version_name"] = get_version()
+
     return pdf_info_dict

+ 4 - 14
setup.py

@@ -1,5 +1,8 @@
 from setuptools import setup, find_packages
-import subprocess
+
+from magic_pdf.libs.commons import get_version
+
+
 def parse_requirements(filename):
     with open(filename) as f:
         lines = f.read().splitlines()
@@ -15,23 +18,10 @@ def parse_requirements(filename):
 
     return requires
 
-def get_version():
-    command = ["git", "describe", "--tags"]
-    try:
-        version = subprocess.check_output(command).decode().strip()
-        version_parts = version.split("-")
-        if len(version_parts) > 1 and version_parts[0].startswith("magic_pdf"):
-            return version_parts[1]
-        else:
-            raise ValueError(f"Invalid version tag {version}. Expected format is magic_pdf-<version>-released.")
-    except Exception as e:
-        print(e)
-        return "0.0.0"
 
 
 setup(
     name="magic_pdf",  # 项目名
-    # version="0.1.3",  # 版本号
     version=get_version(),  # 自动从tag中获取版本号
     packages=find_packages(),  # 包含所有的包
     install_requires=parse_requirements('requirements.txt'),  # 项目依赖的第三方库