|
|
@@ -225,6 +225,7 @@ async def _async_process_vlm(
|
|
|
f_dump_content_list,
|
|
|
f_make_md_mode,
|
|
|
server_url=None,
|
|
|
+ **kwargs,
|
|
|
):
|
|
|
"""异步处理VLM后端逻辑"""
|
|
|
parse_method = "vlm"
|
|
|
@@ -238,7 +239,7 @@ async def _async_process_vlm(
|
|
|
image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)
|
|
|
|
|
|
middle_json, infer_result = await aio_vlm_doc_analyze(
|
|
|
- pdf_bytes, image_writer=image_writer, backend=backend, server_url=server_url
|
|
|
+ pdf_bytes, image_writer=image_writer, backend=backend, server_url=server_url, **kwargs,
|
|
|
)
|
|
|
|
|
|
pdf_info = middle_json["pdf_info"]
|
|
|
@@ -265,6 +266,7 @@ def _process_vlm(
|
|
|
f_dump_content_list,
|
|
|
f_make_md_mode,
|
|
|
server_url=None,
|
|
|
+ **kwargs,
|
|
|
):
|
|
|
"""同步处理VLM后端逻辑"""
|
|
|
parse_method = "vlm"
|
|
|
@@ -278,7 +280,7 @@ def _process_vlm(
|
|
|
image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)
|
|
|
|
|
|
middle_json, infer_result = vlm_doc_analyze(
|
|
|
- pdf_bytes, image_writer=image_writer, backend=backend, server_url=server_url
|
|
|
+ pdf_bytes, image_writer=image_writer, backend=backend, server_url=server_url, **kwargs,
|
|
|
)
|
|
|
|
|
|
pdf_info = middle_json["pdf_info"]
|
|
|
@@ -311,6 +313,7 @@ def do_parse(
|
|
|
f_make_md_mode=MakeMode.MM_MD,
|
|
|
start_page_id=0,
|
|
|
end_page_id=None,
|
|
|
+ **kwargs,
|
|
|
):
|
|
|
# 预处理PDF字节数据
|
|
|
pdf_bytes_list = _prepare_pdf_bytes(pdf_bytes_list, start_page_id, end_page_id)
|
|
|
@@ -333,7 +336,7 @@ def do_parse(
|
|
|
output_dir, pdf_file_names, pdf_bytes_list, backend,
|
|
|
f_draw_layout_bbox, f_draw_span_bbox, f_dump_md, f_dump_middle_json,
|
|
|
f_dump_model_output, f_dump_orig_pdf, f_dump_content_list, f_make_md_mode,
|
|
|
- server_url
|
|
|
+ server_url, **kwargs,
|
|
|
)
|
|
|
|
|
|
|
|
|
@@ -357,6 +360,7 @@ async def aio_do_parse(
|
|
|
f_make_md_mode=MakeMode.MM_MD,
|
|
|
start_page_id=0,
|
|
|
end_page_id=None,
|
|
|
+ **kwargs,
|
|
|
):
|
|
|
# 预处理PDF字节数据
|
|
|
pdf_bytes_list = _prepare_pdf_bytes(pdf_bytes_list, start_page_id, end_page_id)
|
|
|
@@ -380,7 +384,7 @@ async def aio_do_parse(
|
|
|
output_dir, pdf_file_names, pdf_bytes_list, backend,
|
|
|
f_draw_layout_bbox, f_draw_span_bbox, f_dump_md, f_dump_middle_json,
|
|
|
f_dump_model_output, f_dump_orig_pdf, f_dump_content_list, f_make_md_mode,
|
|
|
- server_url
|
|
|
+ server_url, **kwargs,
|
|
|
)
|
|
|
|
|
|
|