Browse Source

refactor(tools): improve code readability and maintainability

- Remove unnecessary line breaks and adjust indentation
- Update function call to use named arguments for better readability
- Modify _do_parse function call to use MakeMode.MM_MD instead of
myhloli 7 tháng trước cách đây
mục cha
commit
54ce594bf6
1 tập tin đã thay đổi với 21 bổ sung4 xóa
  1. 21 4
      magic_pdf/tools/common.py

+ 21 - 4
magic_pdf/tools/common.py

@@ -109,9 +109,7 @@ def _do_parse(
     pdf_bytes = ds._raw_data
     local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
 
-    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
-        local_md_dir
-    )
+    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)
     image_dir = str(os.path.basename(local_image_dir))
 
     if len(model_list) == 0:
@@ -317,7 +315,26 @@ def batch_do_parse(
 
     infer_results = batch_doc_analyze(dss, parse_method, lang=lang, layout_model=layout_model, formula_enable=formula_enable, table_enable=table_enable)
     for idx, infer_result in enumerate(infer_results):
-        _do_parse(output_dir, pdf_file_names[idx], dss[idx], infer_result.get_infer_res(), parse_method, debug_able, f_draw_span_bbox=f_draw_span_bbox, f_draw_layout_bbox=f_draw_layout_bbox, f_dump_md=f_dump_md, f_dump_middle_json=f_dump_middle_json, f_dump_model_json=f_dump_model_json, f_dump_orig_pdf=f_dump_orig_pdf, f_dump_content_list=f_dump_content_list, f_make_md_mode=f_make_md_mode, f_draw_model_bbox=f_draw_model_bbox, f_draw_line_sort_bbox=f_draw_line_sort_bbox, f_draw_char_bbox=f_draw_char_bbox, lang=lang)
+        _do_parse(
+            output_dir = output_dir,
+            pdf_file_name = pdf_file_names[idx],
+            pdf_bytes_or_dataset = dss[idx],
+            model_list = infer_result.get_infer_res(),
+            parse_method = parse_method,
+            debug_able = debug_able,
+            f_draw_span_bbox = f_draw_span_bbox,
+            f_draw_layout_bbox = f_draw_layout_bbox,
+            f_dump_md=f_dump_md,
+            f_dump_middle_json=f_dump_middle_json,
+            f_dump_model_json=f_dump_model_json,
+            f_dump_orig_pdf=f_dump_orig_pdf,
+            f_dump_content_list=f_dump_content_list,
+            f_make_md_mode=MakeMode.MM_MD,
+            f_draw_model_bbox=f_draw_model_bbox,
+            f_draw_line_sort_bbox=f_draw_line_sort_bbox,
+            f_draw_char_bbox=f_draw_char_bbox,
+            lang=lang,
+        )
 
 
 parse_pdf_methods = click.Choice(['ocr', 'txt', 'auto'])