import tempfile from pathlib import Path from typing import List def split_files(file_list: List[str], num_splits: int) -> List[List[str]]: """ 将文件列表分割成指定数量的子列表 Args: file_list: 文件路径列表 num_splits: 分割数量 Returns: 分割后的文件列表 """ if num_splits <= 0: return [file_list] chunk_size = len(file_list) // num_splits remainder = len(file_list) % num_splits chunks = [] start = 0 for i in range(num_splits): # 前remainder个chunk多分配一个文件 current_chunk_size = chunk_size + (1 if i < remainder else 0) if current_chunk_size > 0: chunks.append(file_list[start:start + current_chunk_size]) start += current_chunk_size return [chunk for chunk in chunks if chunk] # 过滤空列表 def create_temp_file_list(file_chunk: List[str]) -> str: """ 创建临时文件列表文件 Args: file_chunk: 文件路径列表 Returns: 临时文件路径 """ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: for file_path in file_chunk: f.write(f"{file_path}\n") return f.name def get_image_files_from_dir(input_dir: Path, max_files: int = None) -> List[str]: """ 从目录获取图像文件列表 Args: input_dir: 输入目录 max_files: 最大文件数量限制 Returns: 图像文件路径列表 """ image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'] image_files = [] for ext in image_extensions: image_files.extend(list(input_dir.glob(f"*{ext}"))) image_files.extend(list(input_dir.glob(f"*{ext.upper()}"))) # 去重并排序 image_files = sorted(list(set(str(f) for f in image_files))) # 限制文件数量 if max_files: image_files = image_files[:max_files] return image_files def get_image_files_from_list(file_list_path: str) -> List[str]: """ 从文件列表获取图像文件列表 Args: file_list_path: 文件列表路径 Returns: 图像文件路径列表 """ print(f"📄 Reading file list from: {file_list_path}") with open(file_list_path, 'r', encoding='utf-8') as f: image_files = [line.strip() for line in f if line.strip()] # 验证文件存在性 valid_files = [] missing_files = [] for file_path in image_files: if Path(file_path).exists(): valid_files.append(file_path) else: missing_files.append(file_path) if missing_files: print(f"⚠️ Warning: {len(missing_files)} files not found:") for missing_file in missing_files[:5]: # 只显示前5个 print(f" - {missing_file}") if len(missing_files) > 5: print(f" ... and {len(missing_files) - 5} more") print(f"✅ Found {len(valid_files)} valid files out of {len(image_files)} in list") return valid_files def get_image_files_from_csv(csv_file: str, status_filter: str = "fail") -> List[str]: """ 从CSV文件获取图像文件列表 Args: csv_file: CSV文件路径 status_filter: 状态过滤器 Returns: 图像文件路径列表 """ print(f"📄 Reading image files from CSV: {csv_file}") # 读取CSV文件, 表头:image_path,status image_files = [] with open(csv_file, 'r', encoding='utf-8') as f: for line in f: # 需要去掉表头, 按“,”分割,读取文件名,状态 image_file, status = line.strip().split(",") if status.lower() == status_filter.lower(): image_files.append(image_file) return image_files