Răsfoiți Sursa

Merge pull request #1553 from myhloli/dev

fix(magic_pdf): correct end page index and improve error handling
Xiaomeng Zhao 10 luni în urmă
părinte
comite
d08fe2714c

+ 1 - 1
magic_pdf/model/doc_analyze_by_custom_model.py

@@ -158,7 +158,7 @@ def doc_analyze(
     table_enable=None,
     table_enable=None,
 ) -> InferenceResult:
 ) -> InferenceResult:
 
 
-    end_page_id = end_page_id if end_page_id else len(dataset)
+    end_page_id = end_page_id if end_page_id else len(dataset) - 1
 
 
     model_manager = ModelSingleton()
     model_manager = ModelSingleton()
     custom_model = model_manager.get_model(
     custom_model = model_manager.get_model(

+ 1 - 1
magic_pdf/post_proc/llm_aided.py

@@ -151,7 +151,7 @@ Corrected title list:
                 logger.warning("The number of titles in the optimized result is not equal to the number of titles in the input.")
                 logger.warning("The number of titles in the optimized result is not equal to the number of titles in the input.")
                 retry_count += 1
                 retry_count += 1
         except Exception as e:
         except Exception as e:
-            if e is json.JSONDecodeError:
+            if isinstance(e, json.decoder.JSONDecodeError):
                 logger.warning(f"JSON decode error on attempt {retry_count + 1}: {e}")
                 logger.warning(f"JSON decode error on attempt {retry_count + 1}: {e}")
             else:
             else:
                 logger.exception(e)
                 logger.exception(e)