瀏覽代碼

refactor(pdf_parse): comment out performance measurement and logging

- Comment out @measure_time decorator for txt_spans_extract_v2 and sort_lines_by_model functions
- Remove logger.info for page_process_time
- Comment out PerformanceStats.print_stats call
myhloli 8 月之前
父節點
當前提交
6bfc17119d
共有 1 個文件被更改,包括 3 次插入8 次删除
  1. 3 8
      magic_pdf/pdf_parse_union_core_v2.py

+ 3 - 8
magic_pdf/pdf_parse_union_core_v2.py

@@ -218,7 +218,7 @@ def calculate_contrast(img, img_mode) -> float:
     # logger.info(f"contrast: {contrast}")
     return round(contrast, 2)
 
-@measure_time
+# @measure_time
 def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang):
     # cid用0xfffd表示,连字符拆开
     # text_blocks_raw = pdf_page.get_text('rawdict', flags=fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_MEDIABOX_CLIP)['blocks']
@@ -492,7 +492,7 @@ def insert_lines_into_block(block_bbox, line_height, page_w, page_h):
     else:
         return [[x0, y0, x1, y1]]
 
-@measure_time
+# @measure_time
 def sort_lines_by_model(fix_blocks, page_w, page_h, line_height):
     page_line_list = []
 
@@ -962,12 +962,7 @@ def pdf_parse_union(
             )
         pdf_info_dict[f'page_{page_id}'] = page_info
 
-
-    logger.info(
-        f'page_process_time: {round(time.time() - start_time, 2)}'
-    )
-
-    PerformanceStats.print_stats()
+    # PerformanceStats.print_stats()
 
     """分段"""
     para_split(pdf_info_dict)