Эх сурвалжийг харах

v2pipeline在分段区域增加异常捕获

赵小蒙 1 жил өмнө
parent
commit
fa6e305c12

+ 5 - 1
magic_pdf/pdf_parse_by_ocr_v2.py

@@ -92,7 +92,11 @@ def parse_pdf_by_ocr(pdf_bytes,
         pdf_info_dict[f"page_{page_id}"] = page_info
 
     """分段"""
-    para_split(pdf_info_dict, debug_mode=debug_mode)
+    try:
+        para_split(pdf_info_dict, debug_mode=debug_mode)
+    except Exception as e:
+        logger.exception(e)
+        raise e
 
     """dict转list"""
     pdf_info_list = dict_to_list(pdf_info_dict)

+ 5 - 1
magic_pdf/pdf_parse_by_txt_v2.py

@@ -171,7 +171,11 @@ def parse_pdf_by_txt(
         pdf_info_dict[f"page_{page_id}"] = page_info
 
     """分段"""
-    para_split(pdf_info_dict, debug_mode=debug_mode)
+    try:
+        para_split(pdf_info_dict, debug_mode=debug_mode)
+    except Exception as e:
+        logger.exception(e)
+        raise e
 
     """dict转list"""
     pdf_info_list = dict_to_list(pdf_info_dict)