Parcourir la source

feat: 添加支持单个文件输入的功能,允许处理PDF和图像文件

zhch158_admin il y a 2 mois
Parent
commit
410bcfa650
1 fichiers modifiés avec 13 ajouts et 7 suppressions
  1. 13 7
      zhch/ppstructurev3_single_process.py

+ 13 - 7
zhch/ppstructurev3_single_process.py

@@ -95,6 +95,8 @@ def get_input_files(args) -> List[str]:
         raw_files = get_image_files_from_csv(args.input_csv, "fail")
     elif args.input_file_list:
         raw_files = get_image_files_from_list(args.input_file_list)
+    elif args.input_file:
+        raw_files = [Path(args.input_file).resolve()]
     else:
         input_dir = Path(args.input_dir).resolve()
         if not input_dir.exists():
@@ -267,6 +269,7 @@ def main():
     
     # 参数定义
     input_group = parser.add_mutually_exclusive_group(required=True)
+    input_group.add_argument("--input_file", type=str, help="Input file (supports both PDF and image file)")
     input_group.add_argument("--input_dir", type=str, help="Input directory (supports both PDF and image files)")
     input_group.add_argument("--input_file_list", type=str, help="Input file list (one file per line)")
     input_group.add_argument("--input_csv", type=str, help="Input CSV file with image_path and status columns")
@@ -356,14 +359,17 @@ def main():
         
         print(f"💾 Results saved to: {output_file}")
 
-        if args.collect_results:
-            processed_files = collect_pid_files(output_file)
+		# 如果没有收集结果的路径,使用缺省文件名,和output_dir同一路径
+        if not args.collect_results:
+            output_file_processed = Path(args.output_dir) / f"processed_files_{time.strftime('%Y%m%d_%H%M%S')}.csv"
+        else:
             output_file_processed = Path(args.collect_results).resolve()
-            with open(output_file_processed, 'w', encoding='utf-8') as f:
-                f.write("image_path,status\n")
-                for file_path, status in processed_files:
-                    f.write(f"{file_path},{status}\n")
-            print(f"💾 Processed files saved to: {output_file_processed}")
+        processed_files = collect_pid_files(output_file)
+        with open(output_file_processed, 'w', encoding='utf-8') as f:
+            f.write("image_path,status\n")
+            for file_path, status in processed_files:
+                f.write(f"{file_path},{status}\n")
+        print(f"💾 Processed files saved to: {output_file_processed}")
 
         return 0