6 месяцев назад · c11f2ea045
--- a/ocr_utils/pdf_utils.py
+++ b/ocr_utils/pdf_utils.py
@@ -41,6 +41,7 @@ from .pdf_text_extraction import (
 
				     extract_all_text_blocks,
			
 
				     extract_all_text_blocks_pypdfium2,
			
 
				     extract_all_text_blocks_fitz,
			
 
				+    detect_page_type,
			
 
				 )
			
 
				 
			
 
				 from .pdf_image_rendering import (
			
@@ -371,8 +372,20 @@ class PDFUtils:
 
				             pdf_bytes, dpi, start_page_id, end_page_id, image_type
			
 
				         )
			
 
				     
			
 
				-    # ========================================================================
			
 
				-    # 其他功能
			
 
				+    @staticmethod
			
 
				+    def detect_page_type(
			
 
				+        pdf_doc: Any, 
			
 
				+        page_idx: int,
			
 
				+        char_threshold: int = 50
			
 
				+    ) -> str:
			
 
				+        """
			
 
				+        检测页面类型（文本PDF或扫描OCR）
			
 
				+        
			
 
				+        Returns:
			
 
				+            页面类型：'txt' 或 'ocr'
			
 
				+        """
			
 
				+        return detect_page_type(pdf_doc, page_idx, char_threshold)
			
 
				+
			
 
				     # ========================================================================
			
 
				     # 其他功能
			
 
				     # ========================================================================