瀏覽代碼

feat: 添加从CSV文件获取图像文件列表的功能,支持状态过滤

zhch158_admin 3 月之前
父節點
當前提交
623eb0b3c4
共有 1 個文件被更改,包括 30 次插入2 次删除
  1. 30 2
      zhch/ppstructurev3_scheduler.py

+ 30 - 2
zhch/ppstructurev3_scheduler.py

@@ -118,6 +118,29 @@ def get_image_files_from_list(file_list_path: str) -> List[str]:
     print(f"✅ Found {len(valid_files)} valid files out of {len(image_files)} in list")
     return valid_files
 
+def get_image_files_from_csv(csv_file: str, status_filter: str = "fail") -> List[str]:
+    """
+    从CSV文件获取图像文件列表
+
+    Args:
+        csv_file: CSV文件路径
+        status_filter: 状态过滤器
+
+    Returns:
+        图像文件路径列表
+    """
+    print(f"📄 Reading image files from CSV: {csv_file}")
+
+	# 读取CSV文件, 表头:image_path,status
+    image_files = []
+    with open(csv_file, 'r', encoding='utf-8') as f:
+        for line in f:
+            # 需要去掉表头, 按“,”分割,读取文件名,状态
+            image_file, status = line.strip().split(",")
+            if status.lower() == status_filter.lower():
+                image_files.append(image_file)
+
+    return image_files
 
 def collect_pid_files(pid_output_file: str) -> List[Tuple[str, str]]:
     """
@@ -332,6 +355,7 @@ def main():
     input_group = parser.add_mutually_exclusive_group(required=True)
     input_group.add_argument("--input_dir", type=str, help="Input directory")
     input_group.add_argument("--input_file_list", type=str, help="Input file list (one file per line)")
+    input_group.add_argument("--input_csv", type=str, help="Input CSV file with image_path and status columns")
 
     parser.add_argument("--output_dir", type=str, required=True, help="Output directory")
     parser.add_argument("--single_process_script", type=str, 
@@ -356,7 +380,11 @@ def main():
     
     try:
         # 获取图像文件列表
-        if args.input_file_list:
+        if args.input_csv:
+            # 从CSV文件读取
+            image_files = get_image_files_from_csv(args.input_csv, "fail")
+            print(f"📊 Loaded {len(image_files)} files from CSV with status filter: fail")
+        elif args.input_file_list:
             # 从文件列表读取
             image_files = get_image_files_from_list(args.input_file_list)
         else:
@@ -506,7 +534,7 @@ def main():
         # 收集文件处理结果
         processed_files = []
         processed_files = collect_processed_files(results)
-        output_file_processed = output_dir / f"processed_files_{args.num_processes}procs.csv"
+        output_file_processed = output_dir / f"processed_files_{args.num_processes}procs_{time.strftime('%Y%m%d_%H%M%S')}.csv"
         with open(output_file_processed, 'w', encoding='utf-8') as f:
             f.write("image_path,status\n")
             for file_path, status in processed_files: