فهرست منبع

v2pipeline在分段区域增加异常捕获

赵小蒙 1 سال پیش
والد
کامیت
fa6e305c12
2فایلهای تغییر یافته به همراه10 افزوده شده و 2 حذف شده
  1. 5 1
      magic_pdf/pdf_parse_by_ocr_v2.py
  2. 5 1
      magic_pdf/pdf_parse_by_txt_v2.py

+ 5 - 1
magic_pdf/pdf_parse_by_ocr_v2.py

@@ -92,7 +92,11 @@ def parse_pdf_by_ocr(pdf_bytes,
         pdf_info_dict[f"page_{page_id}"] = page_info
 
     """分段"""
-    para_split(pdf_info_dict, debug_mode=debug_mode)
+    try:
+        para_split(pdf_info_dict, debug_mode=debug_mode)
+    except Exception as e:
+        logger.exception(e)
+        raise e
 
     """dict转list"""
     pdf_info_list = dict_to_list(pdf_info_dict)

+ 5 - 1
magic_pdf/pdf_parse_by_txt_v2.py

@@ -171,7 +171,11 @@ def parse_pdf_by_txt(
         pdf_info_dict[f"page_{page_id}"] = page_info
 
     """分段"""
-    para_split(pdf_info_dict, debug_mode=debug_mode)
+    try:
+        para_split(pdf_info_dict, debug_mode=debug_mode)
+    except Exception as e:
+        logger.exception(e)
+        raise e
 
     """dict转list"""
     pdf_info_list = dict_to_list(pdf_info_dict)