1 ماه پیش · fe475b635a
--- a/zhch/README.md
+++ b/zhch/README.md
@@ -22,10 +22,10 @@ uv pip install pytest PyMuPDF
 
				 # 安装依赖 CPU
			
 
				 
			
 
				 ```
			
 
				-uv pip install paddlepaddle==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
			
 
				+uv pip install paddlepaddle==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
			
 
				 uv pip install -e .
			
 
				 paddlex --install PaddleOCR PaddleDetection PaddleClas  # 例如PaddleOCR
			
 
				-# uv pip install paddleocr==3.1.0
			
 
				+# uv pip install paddleocr==3.2.0
			
 
				 ```
			
 
				 
			
 
				 # linux GPU - 更新版本以匹配NVML 570.169
			
@@ -44,7 +44,7 @@ wget https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/
 
				 sudo sh cuda_12.6.0_560.28.03_linux.run
			
 
				 
			
 
				 # 安装PaddlePaddle GPU版本
			
 
				-uv pip install -U --reinstall paddlepaddle-gpu==3.1.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
			
 
				+uv pip install -U --reinstall paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
			
 
				 uv pip install -e .
			
 
				 paddlex --install PaddleOCR PaddleDetection PaddleClas
			
 
				 
			
@@ -53,6 +53,7 @@ python -c "import paddle; print(f'CUDA available: {paddle.device.is_compiled_wit
 
				 ```
			
 
				 
			
 
				 # 模型下载源
			
 
				+
			
 
				 PaddleX 支持通过环境变量 PADDLE_PDX_MODEL_SOURCE 来指定模型下载源。根据 official_models.py 中的实现，支持以下下载源：
			
 
				 
			
 
				 支持的下载源
			
@@ -60,6 +61,7 @@ huggingface - HuggingFace（默认优先级）
 
				 modelscope - ModelScope
			
 
				 bos - 百度云存储
			
 
				 aistudio - AI Studio
			
 
				+
			
 
				 ```bash
			
 
				 # 使用百度云存储源
			
 
				 export PADDLE_PDX_MODEL_SOURCE="bos"
			
@@ -142,6 +144,19 @@ system_profiler SPDisplaysDataType
 
				 
			
 
				 # 调试pipeline
			
 
				 
			
 
				+zhch/my_config/PP-StructureV3.yaml中需要所有OCR的参数设定limit_type,limit_side_len
			
 
				+
			
 
				+```yaml
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            module_name: text_detection
			
 
				+            model_name: PP-OCRv5_server_det
			
 
				+            model_dir: null
			
 
				+            limit_side_len: 2560
			
 
				+            limit_type: max
			
 
				+            max_side_limit: 4000
			
 
				+```
			
 
				+
			
 
				 ## 问题分析
			
 
				 
			
 
				 1. **配置文件中印章识别是关闭的**：
			
@@ -226,10 +241,13 @@ paddlex --serve --pipeline zhch/PP-StructureV3-zhch.yaml
 
				 
			
 
				 # linux 10.192.72.11
			
 
				 # nohup paddlex --serve --port 8111 --device "gpu:3" --pipeline zhch/my_config/PP-StructureV3.yaml &
			
 
				-python3 start_paddlex_with_adapter.py --serve \
			
 
				+# python3 start_paddlex_with_adapter.py --serve \
			
 
				     --port "8111" \
			
 
				     --device "gpu:3" \
			
 
				-    --pipeline "my_config/PP-StructureV3.yaml"       
			
 
				+    --pipeline "my_config/PP-StructureV3.yaml"     
			
 
				+
			
 
				+# 启动ppstructure_v3增强自定义adapter服务
			
 
				+zhch/ppstructure_v3_daemon.sh start
			
 
				 
			
 
				 # client
			
 
				 python ppstructurev3_single_client.py --input_file "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/2023年度报告母公司.pdf" --output_dir "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results" --api_url "http://10.192.72.11:8111/layout-parsing" --timeout 300
			
@@ -242,4 +260,9 @@ python ppstructurev3_single_client.py --input_file "/Users/zhch158/workspace/dat
 
				 python ppstructurev3_single_client.py --input_file "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水.pdf" --output_dir "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/data_PPStructureV3_Results" --api_url "http://10.192.72.11:8111/layout-parsing" --timeout 300
			
 
				 
			
 
				 python ppstructurev3_single_client.py --input_file "/Users/zhch158/workspace/data/流水分析/B用户_图片合成流水.pdf" --output_dir "/Users/zhch158/workspace/data/流水分析/B用户_图片合成流水/data_PPStructureV3_Results" --api_url "http://10.192.72.11:8111/layout-parsing" --timeout 300
			
 
				+
			
 
				+python ppstructurev3_single_client.py --input_file "/Users/zhch158/workspace/data/流水分析/对公_招商银行图.pdf" --output_dir "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results" --api_url "http://10.192.72.11:8111/layout-parsing" --timeout 300
			
 
				+
			
 
				+python ppstructurev3_single_client.py --input_file "//Users/zhch158/workspace/data/流水分析/德_内蒙古银行照.pdf" --output_dir "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_PPStructureV3_Results" --api_url "http://10.192.72.11:8111/layout-parsing" --timeout 300
			
 
				+
			
 
				 ```