|
|
@@ -0,0 +1,96 @@
|
|
|
+# 环境选择
|
|
|
+```
|
|
|
+git clone https://github.com/PaddlePaddle/PaddleX.git
|
|
|
+
|
|
|
+git config --local user.name "zhch158_admin"
|
|
|
+git config --local user.email "zhch158@sina.com"
|
|
|
+# 自定义缓存时间(如7200秒)
|
|
|
+git config --global credential.helper 'cache --timeout=7200'
|
|
|
+
|
|
|
+cd PaddleX
|
|
|
+# 安装python环境
|
|
|
+uv venv paddle_env --python 3.11
|
|
|
+#激活环境
|
|
|
+source paddle_env/bin/activate
|
|
|
+```
|
|
|
+# 安装依赖 CPU
|
|
|
+```
|
|
|
+uv pip install paddlepaddle==3.0.0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
|
|
|
+uv pip install -e .
|
|
|
+uv pip install paddleocr==3.0.2
|
|
|
+```
|
|
|
+
|
|
|
+# linux GPU
|
|
|
+```
|
|
|
+# GPU 版本,需显卡驱动程序版本 ≥550.54.14(Linux)或 ≥550.54.14(Windows)
|
|
|
+uv pip install paddlepaddle-gpu==3.0.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
|
|
|
+uv pip install -e .
|
|
|
+uv pip install paddleocr==3.0.2
|
|
|
+```
|
|
|
+
|
|
|
+# 设置launch.json
|
|
|
+```json
|
|
|
+ "configurations": [
|
|
|
+ {
|
|
|
+ "name": "Python Debugger: Current File",
|
|
|
+ "type": "debugpy",
|
|
|
+ "request": "launch",
|
|
|
+ "program": "${file}",
|
|
|
+ "console": "integratedTerminal",
|
|
|
+ "cwd": "${fileDirname}",
|
|
|
+ "env": {"PYTHONPATH":"${workspaceFolder};${env:PYTHONPATH}"},
|
|
|
+ "envFile": "${workspaceFolder}/.env",
|
|
|
+ "justMyCode": false
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "pipline ppstructurev3",
|
|
|
+ "type": "debugpy",
|
|
|
+ "request": "launch",
|
|
|
+ "module": "paddlex.__main__",
|
|
|
+ // "program": "${workspaceFolder}/paddlex/paddlex_cli.py",
|
|
|
+ "console": "integratedTerminal",
|
|
|
+ "cwd": "${workspaceFolder}/zhch",
|
|
|
+ "env": {"PYTHONPATH":"${workspaceFolder};${env:PYTHONPATH}"},
|
|
|
+ "envFile": "${workspaceFolder}/.env",
|
|
|
+ "args": [
|
|
|
+ // "-m", "paddlex.paddlex_cli",
|
|
|
+ // "--get_pipeline_config", "${workspaceFolder}/zhch/PP-StructureV3-zhch.yaml"
|
|
|
+ "--pipeline", "PP-StructureV3",
|
|
|
+ "--use_doc_orientation_classify=True",
|
|
|
+ "--use_doc_unwarping=True",
|
|
|
+ "--input", "sample_data/300674-母公司现金流量表-扫描.png",
|
|
|
+ "--save_path", "sample_data/output",
|
|
|
+ "--device", "gpu",
|
|
|
+ ],
|
|
|
+ "justMyCode": false
|
|
|
+ }
|
|
|
+ ]
|
|
|
+```
|
|
|
+
|
|
|
+# 测试样例
|
|
|
+```
|
|
|
+paddleocr ocr -i zhch/sample_data/600916_中国黄金_2002年报_83_94_2.png --save_path zhch/sample_data/output --enable_mkldnn=False --device=cpu
|
|
|
+
|
|
|
+# 版面分析
|
|
|
+paddleocr layout_detection -i zhch/sample_data/600916_中国黄金_2002年报_83_94_2.png --save_path zhch/sample_data/output --model_name="PP-DocLayout_plus-L" --enable_mkldnn=False --device=cpu
|
|
|
+
|
|
|
+# 表格解析
|
|
|
+paddleocr table_recognition_v2 -i zhch/sample_data/600916_中国黄金_2002年报_83_94_2.png --save_path zhch/sample_data/output --device=cpu --enable_mkldnn=False
|
|
|
+
|
|
|
+paddleocr pp_structurev3 \
|
|
|
+ -i zhch/sample_data/600916_中国黄金_2002年报_83_94_2.png --save_path zhch/sample_data/output \
|
|
|
+ --enable_mkldnn=False --device=cpu
|
|
|
+
|
|
|
+paddlex --pipeline table_recognition_v2 --use_doc_orientation_classify=False --use_doc_unwarping=False --input zhch/sample_data/600916_中国黄金_2002年报_83_94_2.png --save_path zhch/sample_data/output --device cpu
|
|
|
+
|
|
|
+paddlex --pipeline table_recognition_v2 --use_doc_orientation_classify=True --use_doc_unwarping=True --input zhch/sample_data/600916_中国黄金_2002年报_83_94_2.png --save_path zhch/sample_data/output --device cpu
|
|
|
+
|
|
|
+# 输入是PDF
|
|
|
+paddlex --pipeline table_recognition_v2 --use_doc_orientation_classify=True --use_doc_unwarping=True --input zhch/sample_data/至远彩色印刷工业有限公司-2022年母公司.pdf --save_path zhch/sample_data/output --device cpu
|
|
|
+```
|
|
|
+
|
|
|
+# macOS 查看显卡信息
|
|
|
+```
|
|
|
+# 使用 system_profiler 命令查看 GPU 信息
|
|
|
+system_profiler SPDisplaysDataType
|
|
|
+```
|