Browse Source

doc: remove dummy log

icecraft 8 tháng trước cách đây
mục cha
commit
95f334fb7a
1 tập tin đã thay đổi với 1 bổ sung7 xóa
  1. 1 7
      magic_pdf/data/batch_build_dataset.py

+ 1 - 7
magic_pdf/data/batch_build_dataset.py

@@ -1,7 +1,4 @@
 import concurrent.futures
-import glob
-import os
-import threading
 
 import fitz
 
@@ -83,6 +80,7 @@ def process_pdf_batch(pdf_jobs, idx):
         images.append(tmp)
     return (idx, images)
 
+
 def batch_build_dataset(pdf_paths, k, lang=None):
     """Process multiple PDFs by partitioning them into k balanced parts and
     processing each part in parallel.
@@ -122,9 +120,6 @@ def batch_build_dataset(pdf_paths, k, lang=None):
     # Partition the jobs based on page countEach job has 1 page
     partitions = partition_array_greedy(pdf_info, k)
 
-    for i, partition in enumerate(partitions):
-        print(f'Partition {i+1}: {len(partition)} pdfs')
-
     # Process each partition in parallel
     all_images_h = {}
 
@@ -146,7 +141,6 @@ def batch_build_dataset(pdf_paths, k, lang=None):
         for i, future in enumerate(concurrent.futures.as_completed(futures)):
             try:
                 idx, images = future.result()
-                print(f'Partition {i+1} completed: processed {len(images)} images')
                 all_images_h[idx] = images
             except Exception as e:
                 print(f'Error processing partition: {e}')