Selaa lähdekoodia

Merge remote-tracking branch 'origin/master'

myhloli 1 vuosi sitten
vanhempi
commit
ec96c4adf9

+ 0 - 2
.github/workflows/gpu-ci.yml

@@ -32,8 +32,6 @@ jobs:
     - name: check-requirements
       run: |
         source ~/.bashrc
-        conda init
-        conda activate QA
         pip install magic-pdf[full-cpu]==0.6.1 -i https://pypi.tuna.tsinghua.edu.cn/simple
         pip install https://raw.githubusercontent.com/myhloli/wheels/main/assets/whl/detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl
 

+ 16 - 2
README.md

@@ -1,4 +1,11 @@
-<div id="top"></div>
+<div id="top">
+
+<p align="center">
+  <img src="docs/images/datalab_logo.png" width="160px" style="vertical-align:middle;">
+  <span style="font-size: 28px; vertical-align: middle;">MinerU</span>
+</p>
+
+</div>
 <div align="center">
 
 [![stars](https://img.shields.io/github/stars/opendatalab/MinerU.svg)](https://github.com/opendatalab/MinerU)
@@ -17,7 +24,14 @@
 </div>
 
 <div align="center">
-
+<p align="center">
+<a href="https://github.com/opendatalab/MinerU">MinerU: An end-to-end PDF parsing tool based on PDF-Extract-Kit, supporting conversion from PDF to Markdown.</a>🚀🚀🚀<br>
+<a href="https://github.com/opendatalab/PDF-Extract-Kit">PDF-Extract-Kit: A Comprehensive Toolkit for High-Quality PDF Content Extraction</a>🔥🔥🔥
+</p>
+
+<p align="center">
+    👋 join us on <a href="https://discord.gg/AsQMhuMN" target="_blank">Discord</a> and <a href="https://cdn.vansin.top/internlm/mineru.jpg" target="_blank">WeChat</a>
+</p>
 </div>
 
 # MinerU 

+ 14 - 2
README_zh-CN.md

@@ -1,4 +1,9 @@
-<div id="top"></div>
+<div id="top">
+<p align="center">
+  <img src="docs/images/datalab_logo.png" width="160px" style="vertical-align:middle;">
+  <span style="font-size: 28px; vertical-align: middle;">MinerU</span>
+</p>
+</div>
 <div align="center">
 
 [![stars](https://img.shields.io/github/stars/opendatalab/MinerU.svg)](https://github.com/opendatalab/MinerU)
@@ -14,7 +19,14 @@
 </div>
 
 <div align="center">
-
+<p align="center">
+<a href="https://github.com/opendatalab/MinerU">MinerU: 端到端的PDF解析工具(基于PDF-Extract-Kit)支持PDF转Markdown</a>🚀🚀🚀<br>
+<a href="https://github.com/opendatalab/PDF-Extract-Kit">PDF-Extract-Kit: 高质量PDF解析工具箱</a>🔥🔥🔥
+</p>
+
+<p align="center">
+    👋 join us on <a href="https://discord.gg/AsQMhuMN" target="_blank">Discord</a> and <a href="https://cdn.vansin.top/internlm/mineru.jpg" target="_blank">WeChat</a>
+</p>
 </div>
 
 # MinerU 

BIN
docs/images/datalab_logo.png


BIN
magic_pdf/libs/__pycache__/__init__.cpython-312.pyc


BIN
magic_pdf/libs/__pycache__/version.cpython-312.pyc


BIN
tests/__pycache__/test_unit.cpython-39-pytest-7.4.0.pyc


BIN
tests/test_cli/__pycache__/test_bench.cpython-39-pytest-7.4.0.pyc


BIN
tests/test_cli/__pycache__/test_cli.cpython-39-pytest-7.4.0.pyc


BIN
tests/test_cli/conf/__pycache__/conf.cpython-39.pyc


BIN
tests/test_cli/lib/__pycache__/__init__.cpython-39.pyc


BIN
tests/test_cli/lib/__pycache__/calculate_score.cpython-39.pyc


BIN
tests/test_cli/lib/__pycache__/scoring.cpython-39.pyc


+ 3 - 3
tests/test_cli/test_bench_gpu.py

@@ -6,7 +6,7 @@ import json
 from magic_pdf.pipe.UNIPipe import UNIPipe
 from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
 from lib import calculate_score
-
+import shutil
 pdf_res_path = conf.conf["pdf_res_path"]
 code_path = conf.conf["code_path"]
 pdf_dev_path = conf.conf["pdf_dev_path"]
@@ -58,8 +58,8 @@ def pdf_to_markdown():
         if not os.path.exists(dir_path):
             os.makedirs(dir_path, exist_ok=True)
         res_path = os.path.join(dir_path, f"{demo_name}.md")
-        #src_path = os.path.join(pdf_res_path, "pdf", f"{demo_name}.pdf") 
-        #shutil.copy(src_path, res_path)
+        src_path = os.path.join(pdf_res_path, demo_name, "auto", f"{demo_name}.md")
+        shutil.copy(src_path, res_path)