|
|
há 1 mês atrás | |
|---|---|---|
| .. | ||
| image | há 2 meses atrás | |
| utils | há 1 mês atrás | |
| OmniDocBench_DotsOCR_multthreads.py | há 1 mês atrás | |
| README.md | há 1 mês atrás | |
| convert_model_float16.py | há 3 meses atrás | |
| convert_model_macos_float32.py | há 3 meses atrás | |
| demo_1.jpg | há 3 meses atrás | |
| demo_gradio.py | há 3 meses atrás | |
| demo_hf.py | há 2 meses atrás | |
| demo_hf_macos_bfloat16.py | há 3 meses atrás | |
| demo_hf_macos_float16.py | há 3 meses atrás | |
| demo_hf_macos_float32.py | há 3 meses atrás | |
| demo_vllm.py | há 3 meses atrás | |
| get_image_by_fitz.md | há 3 meses atrás | |
| launch_model_vllm.sh | há 2 meses atrás | |
| launch_model_vllm_1gpu.sh | há 3 meses atrás | |
| launch_model_vllm_macos.sh | há 2 meses atrás | |
| requirements-macos.txt | há 3 meses atrás | |
| vllm_daemon.sh | há 1 mês atrás | |
git clone https://gitee.com/zhch158_admin/dots.ocr.git
git config --local user.name "zhch158_admin"
git config --local user.email "zhch158@sina.com"
# 自定义缓存时间(如7200秒)
git config --global credential.helper 'cache --timeout=7200'
conda create -n dots.ocr python=3.12
# vllm目前最高支持到3.12
flash-attn 确实没有 macOS 的预编译包,因为 flash-attn 是专门为 NVIDIA GPU(CUDA)设计的,而 Mac 使用的是 Apple Silicon 或 Intel CPU,不支持 CUDA

cd dots.ocr
conda activate dots.ocr
uv pip install -U torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/cu126
# dots.ocr目前使用transformers-4.51.3
# vllm必须降级,只能使用0.9.1
# 但vllm 0.10.1.1 requires transformers>=4.55.0
uv pip install -e .
# 安装最新版vllm
uv pip install -U vllm==0.9.1
# 解决vllm版本兼容问题,检查当前版本
pip list | grep -E "(vllm|transformers|torch|flash_attn|flashinfer)"
# 查看端口占用情况
sudo lsof -i:8101
# 显示所有监听的端口
sudo ss -tuln
# 显示特定端口
sudo ss -tulpn | grep :8101
# 显示进程信息
sudo ss -tulpn sport = :8101
# 更新后下载模型 # with modelscope
python3 tools/download_model.py --type modelscope
# 移除错误的符号链接
sudo rm -f /usr/lib/x86_64-linux-gnu/libcuda.so*
sudo rm -f /usr/local/cuda*/lib64/libcuda.so*
# 找到真正的 NVIDIA 驱动库
find /usr -name "libcuda.so*" 2>/dev/null
# 创建正确的符号链接(假设找到了 /usr/lib/x86_64-linux-gnu/libcuda.so.570.169)
sudo ln -sf /usr/lib/x86_64-linux-gnu/libcuda.so.570.169 /usr/lib/x86_64-linux-gnu/libcuda.so.1
sudo ln -sf /usr/lib/x86_64-linux-gnu/libcuda.so.1 /usr/lib/x86_64-linux-gnu/libcuda.so
# 运行诊断命令
nvidia-smi
python -c "import torch; print(torch.cuda.is_available()); print(torch.cuda.device_count())"
cd dots.ocr
conda activate py312
uv pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0
# 跳过依赖安装
uv pip install -e . --no-deps
# 手动安装兼容的依赖
uv pip install -r zhch/requirements-macos.txt
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"cwd": "${fileDirname}",
"env": {"PYTHONPATH":"${workspaceFolder};${env:PYTHONPATH}"},
"envFile": "${workspaceFolder}/.env",
"justMyCode": false
},
{
"name": "app",
"type": "debugpy",
"request": "launch",
// "module": "paddlex.__main__",
"program": "${workspaceFolder}/zhch/paddlex_cli.py",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}/zhch",
"env": {"PYTHONPATH":"${workspaceFolder};${env:PYTHONPATH}"},
"envFile": "${workspaceFolder}/.env",
"args": [
// "-m", "paddlex.paddlex_cli",
// "--get_pipeline_config", "${workspaceFolder}/zhch/PP-StructureV3-zhch.yaml"
"--pipeline", "PP-StructureV3",
"--use_doc_orientation_classify=True",
"--use_doc_unwarping=True",
"--input", "sample_data/300674-母公司现金流量表-扫描.png",
"--save_path", "sample_data/output",
"--device", "gpu",
],
"justMyCode": false
}
]
zhch/launch_model_vllm.sh 需要在/home/ubuntu/anaconda3/envs/dots.ocr/bin/vllm中加入DotsOCR
#!/home/ubuntu/anaconda3/envs/dots.ocr/bin/python3
# -*- coding: utf-8 -*-
import sys
from vllm.entrypoints.cli.main import main
from DotsOCR import modeling_dots_ocr_vllm
if __name__ == "__main__":
if sys.argv[0].endswith("-script.pyw"):
sys.argv[0] = sys.argv[0][:-11]
elif sys.argv[0].endswith(".exe"):
sys.argv[0] = sys.argv[0][:-4]
sys.exit(main())
# 监控内存使用情况
watch -n 1 nvidia-smi
# 启动应用
python demo_gradio.py
https://dotsocr.xiaohongshu.com/
Flash Attention 是一种高效的注意力机制实现,通过重新设计内存访问模式来显著提升性能:
分块计算 (Block-wise Computation)
内存层次优化
在线 Softmax
# demo/demo_hf.py
model = AutoModelForCausalLM.from_pretrained(
model_path,
attn_implementation="flash_attention_2", # 启用 Flash Attention 2
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True
)
flash-attn==2.8.0.post2 # CUDA 专用,不支持 macOS
cd ~/zhch/dots.ocr/zhch
conda activate dots.ocr
./vllm_daemon.sh start
python OmniDocBench_DotsOCR_multthreads.py
python OmniDocBench_DotsOCR_multthreads.py --input_file "/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/2023年度报告母公司.pdf" --output_dir "./data_DotsOCR_Results" --ip "127.0.0.1" --port "8101" --model_name "DotsOCR" --prompt_mode "prompt_layout_all_en" --batch_size "1" --max_workers "3" --dpi "200"
python OmniDocBench_DotsOCR_multthreads.py --input_file "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/2023年度报告母公司.pdf" --output_dir "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results" --ip "10.192.72.11" --port "8101" --model_name "DotsOCR" --prompt_mode "prompt_layout_all_en" --batch_size "1" --max_workers "3" --dpi "200"