|
|
@@ -16,6 +16,7 @@ from paddlex import create_pipeline
|
|
|
from paddlex.utils.device import constr_device, parse_device
|
|
|
from tqdm import tqdm
|
|
|
import paddle
|
|
|
+from cuda_utils import detect_available_gpus, monitor_gpu_memory
|
|
|
|
|
|
from dotenv import load_dotenv
|
|
|
load_dotenv(override=True)
|
|
|
@@ -41,9 +42,26 @@ def worker(pipeline_name_or_config_path: str,
|
|
|
"""
|
|
|
try:
|
|
|
# 创建pipeline实例
|
|
|
+ from dotenv import load_dotenv
|
|
|
+ load_dotenv(override=True)
|
|
|
+ print(f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES')}")
|
|
|
+ import paddle
|
|
|
+ paddle.set_device(device)
|
|
|
pipeline = create_pipeline(pipeline_name_or_config_path, device=device)
|
|
|
print(f"Worker {worker_id} initialized with device {device}")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"Worker {worker_id} ({device}) initialization failed: {e}", file=sys.stderr)
|
|
|
+ traceback.print_exc()
|
|
|
+ # 发送错误信息到结果队列
|
|
|
+ result_queue.put([{
|
|
|
+ "error": f"Worker initialization failed: {str(e)}",
|
|
|
+ "worker_id": worker_id,
|
|
|
+ "device": device,
|
|
|
+ "success": False
|
|
|
+ }])
|
|
|
+ return
|
|
|
|
|
|
+ try:
|
|
|
should_end = False
|
|
|
batch = []
|
|
|
processed_count = 0
|
|
|
@@ -61,7 +79,7 @@ def worker(pipeline_name_or_config_path: str,
|
|
|
start_time = time.time()
|
|
|
|
|
|
# 使用pipeline预测
|
|
|
- results = list(pipeline.predict(
|
|
|
+ results = pipeline.predict(
|
|
|
batch,
|
|
|
use_doc_orientation_classify=True,
|
|
|
use_doc_unwarping=False,
|
|
|
@@ -69,7 +87,7 @@ def worker(pipeline_name_or_config_path: str,
|
|
|
use_chart_recognition=True,
|
|
|
use_table_recognition=True,
|
|
|
use_formula_recognition=True,
|
|
|
- ))
|
|
|
+ )
|
|
|
|
|
|
batch_processing_time = time.time() - start_time
|
|
|
batch_results = []
|
|
|
@@ -193,6 +211,14 @@ def parallel_process_with_official_approach(image_paths: List[str],
|
|
|
print(f" Batch size: {batch_size}")
|
|
|
print(f" Total images: {len(image_paths)}")
|
|
|
|
|
|
+ # 在主进程中初始化paddle,防止子进程CUDA初始化冲突
|
|
|
+ try:
|
|
|
+ import paddle
|
|
|
+ # 只在主进程中设置一个默认设备
|
|
|
+ paddle.set_device("cpu") # 主进程使用CPU
|
|
|
+ except Exception as e:
|
|
|
+ print(f"Warning: Failed to initialize paddle in main process: {e}")
|
|
|
+
|
|
|
# 使用Manager创建队列
|
|
|
with Manager() as manager:
|
|
|
task_queue = manager.Queue()
|
|
|
@@ -492,6 +518,12 @@ def main():
|
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
+ print(f"🚀 启动OCR程序...")
|
|
|
+ print(f"CUDA 版本: {paddle.device.cuda.get_device_name()}")
|
|
|
+ print(f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES')}")
|
|
|
+ available_gpus = detect_available_gpus()
|
|
|
+ monitor_gpu_memory(available_gpus)
|
|
|
+
|
|
|
if len(sys.argv) == 1:
|
|
|
# 如果没有命令行参数,使用默认配置运行
|
|
|
print("No command line arguments provided. Running with default configuration...")
|
|
|
@@ -501,10 +533,10 @@ if __name__ == "__main__":
|
|
|
"input_dir": "../../OmniDocBench/OpenDataLab___OmniDocBench/images",
|
|
|
"output_dir": "./OmniDocBench_Results_Official",
|
|
|
"pipeline": "PP-StructureV3",
|
|
|
- "device": "gpu:0,1",
|
|
|
+ "device": "gpu:0",
|
|
|
"instances_per_device": 1,
|
|
|
- "batch_size": 1,
|
|
|
- "test_mode": False
|
|
|
+ "batch_size": 4,
|
|
|
+ # "test_mode": False
|
|
|
}
|
|
|
|
|
|
# 构造参数
|
|
|
@@ -513,6 +545,6 @@ if __name__ == "__main__":
|
|
|
sys.argv.extend([f"--{key}", str(value)])
|
|
|
|
|
|
# 测试模式
|
|
|
- # sys.argv.append("--test_mode")
|
|
|
+ sys.argv.append("--test_mode")
|
|
|
|
|
|
sys.exit(main())
|