소스 검색

v2pipeline在分段区域增加异常捕获

赵小蒙 1 년 전
부모
커밋
fa6e305c12
2개의 변경된 파일10개의 추가작업 그리고 2개의 파일을 삭제
  1. 5 1
      magic_pdf/pdf_parse_by_ocr_v2.py
  2. 5 1
      magic_pdf/pdf_parse_by_txt_v2.py

+ 5 - 1
magic_pdf/pdf_parse_by_ocr_v2.py

@@ -92,7 +92,11 @@ def parse_pdf_by_ocr(pdf_bytes,
         pdf_info_dict[f"page_{page_id}"] = page_info
 
     """分段"""
-    para_split(pdf_info_dict, debug_mode=debug_mode)
+    try:
+        para_split(pdf_info_dict, debug_mode=debug_mode)
+    except Exception as e:
+        logger.exception(e)
+        raise e
 
     """dict转list"""
     pdf_info_list = dict_to_list(pdf_info_dict)

+ 5 - 1
magic_pdf/pdf_parse_by_txt_v2.py

@@ -171,7 +171,11 @@ def parse_pdf_by_txt(
         pdf_info_dict[f"page_{page_id}"] = page_info
 
     """分段"""
-    para_split(pdf_info_dict, debug_mode=debug_mode)
+    try:
+        para_split(pdf_info_dict, debug_mode=debug_mode)
+    except Exception as e:
+        logger.exception(e)
+        raise e
 
     """dict转list"""
     pdf_info_list = dict_to_list(pdf_info_dict)