Эх сурвалжийг харах

feat: update Gradio app to improve Markdown conversion and enhance HTML header

myhloli 4 сар өмнө
parent
commit
e853541093

+ 6 - 2
mineru/cli/gradio_app.py

@@ -90,7 +90,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
     return re.sub(pattern, replace, markdown_text)
 
 
-def to_markdown(file_path, end_pages, is_ocr, formula_enable, table_enable, language, backend, url):
+def to_markdown(file_path, end_pages=10, is_ocr=False, formula_enable=True, table_enable=True, language="ch", backend="pipeline", url=None):
     file_path = to_pdf(file_path)
     # 获取识别的md文件以及压缩包文件路径
     local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr, formula_enable, table_enable, language, backend, url)
@@ -172,7 +172,7 @@ def to_pdf(file_path):
     return tmp_file_path
 
 
-if __name__ == '__main__':
+def main():
     example_enable = False
 
     with gr.Blocks() as demo:
@@ -248,3 +248,7 @@ if __name__ == '__main__':
         clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr])
 
     demo.launch(server_name='localhost')
+
+
+if __name__ == '__main__':
+    main()

+ 2 - 3
mineru/resources/header.html

@@ -54,7 +54,7 @@
             font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
               'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
           ">
-          MinerU: PDF Extraction Demo
+          MinerU 2: PDF Extraction Demo
         </h1>
       </div>
     </div>
@@ -66,8 +66,7 @@
         color: #fafafa;
         opacity: 0.8;
       ">
-      A one-stop, open-source, high-quality data extraction tool, supports
-      PDF/webpage/e-book extraction.<br>
+      A one-stop, open-source, high-quality data extraction tool that supports converting PDF to Markdown and JSON.<br>
     </p>
     <style>
       .link-block {

+ 1 - 0
pyproject.toml

@@ -109,6 +109,7 @@ mineru = "mineru.cli:client.main"
 mineru-sglang-server = "mineru.cli.vlm_sglang_server:main"
 mineru-models-download = "mineru.cli.models_download:download_models"
 mineru-api = "mineru.cli.fast_api:main"
+mineru-gradio = "mineru.cli.gradio_app:main"
 
 [tool.setuptools.dynamic]
 version = {attr = "mineru.version.__version__"}