|
|
@@ -76,7 +76,11 @@ def doc_analyze(
|
|
|
formula_enable=True,
|
|
|
table_enable=True,
|
|
|
):
|
|
|
- MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
|
|
|
+ """
|
|
|
+ 适当调大MIN_BATCH_INFERENCE_SIZE可以提高性能,可能会增加显存使用量,
|
|
|
+ 可通过环境变量MINERU_MIN_BATCH_INFERENCE_SIZE设置,默认值为100。
|
|
|
+ """
|
|
|
+ min_batch_inference_size = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
|
|
|
|
|
|
# 收集所有页面信息
|
|
|
all_pages_info = [] # 存储(dataset_index, page_index, img, ocr, lang, width, height)
|
|
|
@@ -109,7 +113,7 @@ def doc_analyze(
|
|
|
|
|
|
# 准备批处理
|
|
|
images_with_extra_info = [(info[2], info[3], info[4]) for info in all_pages_info]
|
|
|
- batch_size = MIN_BATCH_INFERENCE_SIZE
|
|
|
+ batch_size = min_batch_inference_size
|
|
|
batch_images = [
|
|
|
images_with_extra_info[i:i + batch_size]
|
|
|
for i in range(0, len(images_with_extra_info), batch_size)
|