|
|
@@ -1,6 +1,7 @@
|
|
|
import tempfile
|
|
|
from pathlib import Path
|
|
|
-from typing import List
|
|
|
+from typing import List, Tuple
|
|
|
+import json
|
|
|
|
|
|
def split_files(file_list: List[str], num_splits: int) -> List[List[str]]:
|
|
|
"""
|
|
|
@@ -130,4 +131,47 @@ def get_image_files_from_csv(csv_file: str, status_filter: str = "fail") -> List
|
|
|
if status.lower() == status_filter.lower():
|
|
|
image_files.append(image_file)
|
|
|
|
|
|
- return image_files
|
|
|
+ return image_files
|
|
|
+
|
|
|
+
|
|
|
+def collect_pid_files(pid_output_file: str) -> List[Tuple[str, str]]:
|
|
|
+ """
|
|
|
+ 从进程输出文件中收集文件
|
|
|
+
|
|
|
+ Args:
|
|
|
+ pid_output_file: 进程输出文件路径
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 文件列表(文件路径,处理结果)
|
|
|
+ """
|
|
|
+
|
|
|
+ """
|
|
|
+ 单进程结果统计文件格式
|
|
|
+ "results": [
|
|
|
+ {
|
|
|
+ "image_path": "docstructbench_dianzishu_zhongwenzaixian-o.O-61520612.pdf_140.jpg",
|
|
|
+ "processing_time": 2.0265579223632812e-06,
|
|
|
+ "success": true,
|
|
|
+ "device": "gpu:3",
|
|
|
+ "output_json": "/home/ubuntu/zhch/PaddleX/zhch/OmniDocBench_Results_Scheduler/process_3/docstructbench_dianzishu_zhongwenzaixian-o.O-61520612.pdf_140.json",
|
|
|
+ "output_md": "/home/ubuntu/zhch/PaddleX/zhch/OmniDocBench_Results_Scheduler/process_3/docstructbench_dianzishu_zhongwenzaixian-o.O-61520612.pdf_140.md"
|
|
|
+ },
|
|
|
+ ...
|
|
|
+ """
|
|
|
+ if not Path(pid_output_file).exists():
|
|
|
+ print(f"⚠️ Warning: PID output file not found: {pid_output_file}")
|
|
|
+ return []
|
|
|
+
|
|
|
+ with open(pid_output_file, 'r', encoding='utf-8') as f:
|
|
|
+ data = json.load(f)
|
|
|
+
|
|
|
+ if not isinstance(data, dict) or "results" not in data:
|
|
|
+ print(f"⚠️ Warning: Invalid PID output file format: {pid_output_file}")
|
|
|
+ return []
|
|
|
+ # 返回文件路径和处理状态, 如果“success”: True, 则状态为“success”, 否则为“fail”
|
|
|
+ file_list = []
|
|
|
+ for file_result in data.get("results", []):
|
|
|
+ image_path = file_result.get("image_path", "")
|
|
|
+ status = "success" if file_result.get("success", False) else "fail"
|
|
|
+ file_list.append((image_path, status))
|
|
|
+ return file_list
|