Просмотр исходного кода

refactor: improve file naming logic and enhance unique filename generation

myhloli 5 месяцев назад
Родитель
Сommit
7d27726eb2
1 измененных файлов с 13 добавлено и 5 удалено
  1. 13 5
      projects/gradio_app/app.py

+ 13 - 5
projects/gradio_app/app.py

@@ -20,7 +20,7 @@ def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, formula_enable, table_e
     os.makedirs(output_dir, exist_ok=True)
 
     try:
-        file_name = f'{str(Path(doc_path).stem)}_{time.time()}'
+        file_name = f'{str(Path(doc_path).stem)}_{time.strftime("%y%m%d_%H%M%S")}'
         pdf_data = read_fn(doc_path)
         if is_ocr:
             parse_method = 'ocr'
@@ -142,13 +142,21 @@ all_lang = []
 all_lang.extend([*other_lang, *add_lang])
 
 
+def safe_stem(file_path):
+    stem = Path(file_path).stem
+    # 只保留字母、数字、下划线和点,其他字符替换为下划线
+    return re.sub(r'[^\w.]', '_', stem)
+
+
 def to_pdf(file_path):
+
     if file_path is None:
         return None
+
     pdf_bytes = read_fn(file_path)
-    # 将pdfbytes 写入到uuid.pdf中
-    # 生成唯一的文件名
-    unique_filename = f'{uuid.uuid4()}.pdf'
+
+    # unique_filename = f'{uuid.uuid4()}.pdf'
+    unique_filename = f'{safe_stem(file_path)}.pdf'
 
     # 构建完整的文件路径
     tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
@@ -168,7 +176,7 @@ if __name__ == '__main__':
                 with gr.Row():
                     file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'])
                 with gr.Row(equal_height=True):
-                    with gr.Column(scale=3):
+                    with gr.Column(scale=4):
                         max_pages = gr.Slider(1, 20, 10, step=1, label='Max convert pages')
                     with gr.Column(scale=1):
                         language = gr.Dropdown(all_lang, label='Language', value='ch')