Browse Source

Merge pull request #2166 from icecraft/fix/doc_analyze

fix: support page range
Xiaomeng Zhao 7 months ago
parent
commit
8aa61b0e9f
1 changed files with 3 additions and 2 deletions
  1. 3 2
      magic_pdf/model/doc_analyze_by_custom_model.py

+ 3 - 2
magic_pdf/model/doc_analyze_by_custom_model.py

@@ -146,10 +146,11 @@ def doc_analyze(
             img_dict = page_data.get_image()
             images.append(img_dict['img'])
             page_wh_list.append((img_dict['width'], img_dict['height']))
+    
     if lang is None or lang == 'auto':
-        images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(dataset))]
+        images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(images))]
     else:
-        images_with_extra_info = [(images[index], ocr, lang) for index in range(len(dataset))]
+        images_with_extra_info = [(images[index], ocr, lang) for index in range(len(images))]
 
     if len(images) >= MIN_BATCH_INFERENCE_SIZE:
         batch_size = MIN_BATCH_INFERENCE_SIZE