Browse Source

fix(magic_pdf): correct end page index and improve error handling

- Adjust end_page_id calculation to prevent IndexError when accessing pages
- Enhance error handling in LLM post-processing by specifically catching JSONDecodeError
myhloli 10 months ago
parent
commit
f209ddea62

+ 1 - 1
magic_pdf/model/doc_analyze_by_custom_model.py

@@ -158,7 +158,7 @@ def doc_analyze(
     table_enable=None,
     table_enable=None,
 ) -> InferenceResult:
 ) -> InferenceResult:
 
 
-    end_page_id = end_page_id if end_page_id else len(dataset)
+    end_page_id = end_page_id if end_page_id else len(dataset) - 1
 
 
     model_manager = ModelSingleton()
     model_manager = ModelSingleton()
     custom_model = model_manager.get_model(
     custom_model = model_manager.get_model(

+ 1 - 1
magic_pdf/post_proc/llm_aided.py

@@ -151,7 +151,7 @@ Corrected title list:
                 logger.warning("The number of titles in the optimized result is not equal to the number of titles in the input.")
                 logger.warning("The number of titles in the optimized result is not equal to the number of titles in the input.")
                 retry_count += 1
                 retry_count += 1
         except Exception as e:
         except Exception as e:
-            if e is json.JSONDecodeError:
+            if isinstance(e, json.decoder.JSONDecodeError):
                 logger.warning(f"JSON decode error on attempt {retry_count + 1}: {e}")
                 logger.warning(f"JSON decode error on attempt {retry_count + 1}: {e}")
             else:
             else:
                 logger.exception(e)
                 logger.exception(e)