Explorar el Código

v2pipeline在分段区域增加异常捕获

赵小蒙 hace 1 año
padre
commit
fa6e305c12
Se han modificado 2 ficheros con 10 adiciones y 2 borrados
  1. 5 1
      magic_pdf/pdf_parse_by_ocr_v2.py
  2. 5 1
      magic_pdf/pdf_parse_by_txt_v2.py

+ 5 - 1
magic_pdf/pdf_parse_by_ocr_v2.py

@@ -92,7 +92,11 @@ def parse_pdf_by_ocr(pdf_bytes,
         pdf_info_dict[f"page_{page_id}"] = page_info
 
     """分段"""
-    para_split(pdf_info_dict, debug_mode=debug_mode)
+    try:
+        para_split(pdf_info_dict, debug_mode=debug_mode)
+    except Exception as e:
+        logger.exception(e)
+        raise e
 
     """dict转list"""
     pdf_info_list = dict_to_list(pdf_info_dict)

+ 5 - 1
magic_pdf/pdf_parse_by_txt_v2.py

@@ -171,7 +171,11 @@ def parse_pdf_by_txt(
         pdf_info_dict[f"page_{page_id}"] = page_info
 
     """分段"""
-    para_split(pdf_info_dict, debug_mode=debug_mode)
+    try:
+        para_split(pdf_info_dict, debug_mode=debug_mode)
+    except Exception as e:
+        logger.exception(e)
+        raise e
 
     """dict转list"""
     pdf_info_list = dict_to_list(pdf_info_dict)