3 місяців тому · 6a18247ccf
--- a/zhch/demo_gradio.py
+++ b/zhch/demo_gradio.py
@@ -0,0 +1,950 @@
 
				+"""
			
 
				+Layout Inference Web Application with Gradio
			
 
				+
			
 
				+A Gradio-based layout inference tool that supports image uploads and multiple backend inference engines.
			
 
				+It adopts a reference-style interface design while preserving the original inference logic.
			
 
				+"""
			
 
				+
			
 
				+import gradio as gr
			
 
				+import json
			
 
				+import os
			
 
				+import io
			
 
				+import tempfile
			
 
				+import base64
			
 
				+import zipfile
			
 
				+import uuid
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+from PIL import Image
			
 
				+import requests
			
 
				+
			
 
				+# Local tool imports
			
 
				+from dots_ocr.utils import dict_promptmode_to_prompt
			
 
				+from dots_ocr.utils.consts import MIN_PIXELS, MAX_PIXELS
			
 
				+from dots_ocr.utils.demo_utils.display import read_image
			
 
				+from dots_ocr.utils.doc_utils import load_images_from_pdf
			
 
				+
			
 
				+# Add DotsOCRParser import
			
 
				+from dots_ocr.parser import DotsOCRParser
			
 
				+
			
 
				+
			
 
				+# ==================== Configuration ====================
			
 
				+DEFAULT_CONFIG = {
			
 
				+    'ip': "127.0.0.1",
			
 
				+    'port_vllm': 8101,
			
 
				+    'min_pixels': MIN_PIXELS,
			
 
				+    'max_pixels': MAX_PIXELS,
			
 
				+    'test_images_dir': "../demo/assets/showcase_origin",
			
 
				+    'model_name': "DotsOCR",
			
 
				+}
			
 
				+
			
 
				+# ==================== Global Variables ====================
			
 
				+# Store current configuration
			
 
				+current_config = DEFAULT_CONFIG.copy()
			
 
				+
			
 
				+# Create DotsOCRParser instance
			
 
				+dots_parser = DotsOCRParser(
			
 
				+    ip=DEFAULT_CONFIG['ip'],
			
 
				+    port=DEFAULT_CONFIG['port_vllm'],
			
 
				+    dpi=200,
			
 
				+    min_pixels=DEFAULT_CONFIG['min_pixels'],
			
 
				+    max_pixels=DEFAULT_CONFIG['max_pixels'],
			
 
				+    model_name= DEFAULT_CONFIG['model_name']
			
 
				+)
			
 
				+
			
 
				+# Store processing results
			
 
				+processing_results = {
			
 
				+    'original_image': None,
			
 
				+    'processed_image': None,
			
 
				+    'layout_result': None,
			
 
				+    'markdown_content': None,
			
 
				+    'cells_data': None,
			
 
				+    'temp_dir': None,
			
 
				+    'session_id': None,
			
 
				+    'result_paths': None,
			
 
				+    'pdf_results': None  # Store multi-page PDF results
			
 
				+}
			
 
				+
			
 
				+# PDF caching mechanism
			
 
				+pdf_cache = {
			
 
				+    "images": [],
			
 
				+    "current_page": 0,
			
 
				+    "total_pages": 0,
			
 
				+    "file_type": None,  # 'image' or 'pdf'
			
 
				+    "is_parsed": False,  # Whether it has been parsed
			
 
				+    "results": []  # Store parsing results for each page
			
 
				+}
			
 
				+
			
 
				+def read_image_v2(img):
			
 
				+    """Reads an image, supports URLs and local paths"""
			
 
				+    if isinstance(img, str) and img.startswith(("http://", "https://")):
			
 
				+        with requests.get(img, stream=True) as response:
			
 
				+            response.raise_for_status()
			
 
				+            img = Image.open(io.BytesIO(response.content))
			
 
				+    elif isinstance(img, str):
			
 
				+        img, _, _ = read_image(img, use_native=True)
			
 
				+    elif isinstance(img, Image.Image):
			
 
				+        pass
			
 
				+    else:
			
 
				+        raise ValueError(f"Invalid image type: {type(img)}")
			
 
				+    return img
			
 
				+
			
 
				+def load_file_for_preview(file_path):
			
 
				+    """Loads a file for preview, supports PDF and image files"""
			
 
				+    global pdf_cache
			
 
				+    
			
 
				+    if not file_path or not os.path.exists(file_path):
			
 
				+        return None, "<div id='page_info_box'>0 / 0</div>"
			
 
				+    
			
 
				+    file_ext = os.path.splitext(file_path)[1].lower()
			
 
				+    
			
 
				+    if file_ext == '.pdf':
			
 
				+        try:
			
 
				+            # Read PDF and convert to images (one image per page)
			
 
				+            pages = load_images_from_pdf(file_path)
			
 
				+            pdf_cache["file_type"] = "pdf"
			
 
				+        except Exception as e:
			
 
				+            return None, f"<div id='page_info_box'>PDF loading failed: {str(e)}</div>"
			
 
				+    elif file_ext in ['.jpg', '.jpeg', '.png']:
			
 
				+        # For image files, read directly as a single-page image
			
 
				+        try:
			
 
				+            image = Image.open(file_path)
			
 
				+            pages = [image]
			
 
				+            pdf_cache["file_type"] = "image"
			
 
				+        except Exception as e:
			
 
				+            return None, f"<div id='page_info_box'>Image loading failed: {str(e)}</div>"
			
 
				+    else:
			
 
				+        return None, "<div id='page_info_box'>Unsupported file format</div>"
			
 
				+    
			
 
				+    pdf_cache["images"] = pages
			
 
				+    pdf_cache["current_page"] = 0
			
 
				+    pdf_cache["total_pages"] = len(pages)
			
 
				+    pdf_cache["is_parsed"] = False
			
 
				+    pdf_cache["results"] = []
			
 
				+    
			
 
				+    return pages[0], f"<div id='page_info_box'>1 / {len(pages)}</div>"
			
 
				+
			
 
				+def turn_page(direction):
			
 
				+    """Page turning function"""
			
 
				+    global pdf_cache
			
 
				+    
			
 
				+    if not pdf_cache["images"]:
			
 
				+        return None, "<div id='page_info_box'>0 / 0</div>", "", ""
			
 
				+
			
 
				+    if direction == "prev":
			
 
				+        pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1)
			
 
				+    elif direction == "next":
			
 
				+        pdf_cache["current_page"] = min(pdf_cache["total_pages"] - 1, pdf_cache["current_page"] + 1)
			
 
				+
			
 
				+    index = pdf_cache["current_page"]
			
 
				+    current_image = pdf_cache["images"][index]  # Use the original image by default
			
 
				+    page_info = f"<div id='page_info_box'>{index + 1} / {pdf_cache['total_pages']}</div>"
			
 
				+    
			
 
				+    # If parsed, display the results for the current page
			
 
				+    current_md = ""
			
 
				+    current_md_raw = ""
			
 
				+    current_json = ""
			
 
				+    if pdf_cache["is_parsed"] and index < len(pdf_cache["results"]):
			
 
				+        result = pdf_cache["results"][index]
			
 
				+        if 'md_content' in result:
			
 
				+            # Get the raw markdown content
			
 
				+            current_md_raw = result['md_content']
			
 
				+            # Process the content after LaTeX rendering
			
 
				+            current_md = result['md_content'] if result['md_content'] else ""
			
 
				+        if 'cells_data' in result:
			
 
				+            try:
			
 
				+                current_json = json.dumps(result['cells_data'], ensure_ascii=False, indent=2)
			
 
				+            except:
			
 
				+                current_json = str(result.get('cells_data', ''))
			
 
				+        # Use the image with layout boxes (if available)
			
 
				+        if 'layout_image' in result and result['layout_image']:
			
 
				+            current_image = result['layout_image']
			
 
				+    
			
 
				+    return current_image, page_info, current_json
			
 
				+
			
 
				+def get_test_images():
			
 
				+    """Gets the list of test images"""
			
 
				+    test_images = []
			
 
				+    test_dir = current_config['test_images_dir']
			
 
				+    if os.path.exists(test_dir):
			
 
				+        test_images = [os.path.join(test_dir, name) for name in os.listdir(test_dir) 
			
 
				+                      if name.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf'))]
			
 
				+    return test_images
			
 
				+
			
 
				+def convert_image_to_base64(image):
			
 
				+    """Converts a PIL image to base64 encoding"""
			
 
				+    buffered = io.BytesIO()
			
 
				+    image.save(buffered, format="PNG")
			
 
				+    img_str = base64.b64encode(buffered.getvalue()).decode()
			
 
				+    return f"data:image/png;base64,{img_str}"
			
 
				+
			
 
				+def create_temp_session_dir():
			
 
				+    """Creates a unique temporary directory for each processing request"""
			
 
				+    session_id = uuid.uuid4().hex[:8]
			
 
				+    temp_dir = os.path.join(tempfile.gettempdir(), f"dots_ocr_demo_{session_id}")
			
 
				+    os.makedirs(temp_dir, exist_ok=True)
			
 
				+    return temp_dir, session_id
			
 
				+
			
 
				+def parse_image_with_high_level_api(parser, image, prompt_mode, fitz_preprocess=False):
			
 
				+    """
			
 
				+    Processes using the high-level API parse_image from DotsOCRParser
			
 
				+    """
			
 
				+    # Create a temporary session directory
			
 
				+    temp_dir, session_id = create_temp_session_dir()
			
 
				+    
			
 
				+    try:
			
 
				+        # Save the PIL Image as a temporary file
			
 
				+        temp_image_path = os.path.join(temp_dir, f"input_{session_id}.png")
			
 
				+        image.save(temp_image_path, "PNG")
			
 
				+        
			
 
				+        # Use the high-level API parse_image
			
 
				+        filename = f"demo_{session_id}"
			
 
				+        results = parser.parse_image(
			
 
				+            # input_path=temp_image_path,
			
 
				+            input_path=image,
			
 
				+            filename=filename, 
			
 
				+            prompt_mode=prompt_mode,
			
 
				+            save_dir=temp_dir,
			
 
				+            fitz_preprocess=fitz_preprocess
			
 
				+        )
			
 
				+        
			
 
				+        # Parse the results
			
 
				+        if not results:
			
 
				+            raise ValueError("No results returned from parser")
			
 
				+        
			
 
				+        result = results[0]  # parse_image returns a list with a single result
			
 
				+        
			
 
				+        # Read the result files
			
 
				+        layout_image = None
			
 
				+        cells_data = None
			
 
				+        md_content = None
			
 
				+        raw_response = None
			
 
				+        filtered = False
			
 
				+        
			
 
				+        # Read the layout image
			
 
				+        if 'layout_image_path' in result and os.path.exists(result['layout_image_path']):
			
 
				+            layout_image = Image.open(result['layout_image_path'])
			
 
				+        
			
 
				+        # Read the JSON data
			
 
				+        if 'layout_info_path' in result and os.path.exists(result['layout_info_path']):
			
 
				+            with open(result['layout_info_path'], 'r', encoding='utf-8') as f:
			
 
				+                cells_data = json.load(f)
			
 
				+        
			
 
				+        # Read the Markdown content
			
 
				+        if 'md_content_path' in result and os.path.exists(result['md_content_path']):
			
 
				+            with open(result['md_content_path'], 'r', encoding='utf-8') as f:
			
 
				+                md_content = f.read()
			
 
				+        
			
 
				+        # Check for the raw response file (when JSON parsing fails)
			
 
				+        if 'filtered' in result:
			
 
				+            filtered = result['filtered']
			
 
				+        
			
 
				+        return {
			
 
				+            'layout_image': layout_image,
			
 
				+            'cells_data': cells_data,
			
 
				+            'md_content': md_content,
			
 
				+            'filtered': filtered,
			
 
				+            'temp_dir': temp_dir,
			
 
				+            'session_id': session_id,
			
 
				+            'result_paths': result,
			
 
				+            'input_width': result['input_width'],
			
 
				+            'input_height': result['input_height'],
			
 
				+        }
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        # Clean up the temporary directory on error
			
 
				+        import shutil
			
 
				+        if os.path.exists(temp_dir):
			
 
				+            shutil.rmtree(temp_dir, ignore_errors=True)
			
 
				+        raise e
			
 
				+
			
 
				+def parse_pdf_with_high_level_api(parser, pdf_path, prompt_mode):
			
 
				+    """
			
 
				+    Processes using the high-level API parse_pdf from DotsOCRParser
			
 
				+    """
			
 
				+    # Create a temporary session directory
			
 
				+    temp_dir, session_id = create_temp_session_dir()
			
 
				+    
			
 
				+    try:
			
 
				+        # Use the high-level API parse_pdf
			
 
				+        filename = f"demo_{session_id}"
			
 
				+        results = parser.parse_pdf(
			
 
				+            input_path=pdf_path,
			
 
				+            filename=filename,
			
 
				+            prompt_mode=prompt_mode,
			
 
				+            save_dir=temp_dir
			
 
				+        )
			
 
				+        
			
 
				+        # Parse the results
			
 
				+        if not results:
			
 
				+            raise ValueError("No results returned from parser")
			
 
				+        
			
 
				+        # Handle multi-page results
			
 
				+        parsed_results = []
			
 
				+        all_md_content = []
			
 
				+        all_cells_data = []
			
 
				+        
			
 
				+        for i, result in enumerate(results):
			
 
				+            page_result = {
			
 
				+                'page_no': result.get('page_no', i),
			
 
				+                'layout_image': None,
			
 
				+                'cells_data': None,
			
 
				+                'md_content': None,
			
 
				+                'filtered': False
			
 
				+            }
			
 
				+            
			
 
				+            # Read the layout image
			
 
				+            if 'layout_image_path' in result and os.path.exists(result['layout_image_path']):
			
 
				+                page_result['layout_image'] = Image.open(result['layout_image_path'])
			
 
				+            
			
 
				+            # Read the JSON data
			
 
				+            if 'layout_info_path' in result and os.path.exists(result['layout_info_path']):
			
 
				+                with open(result['layout_info_path'], 'r', encoding='utf-8') as f:
			
 
				+                    page_result['cells_data'] = json.load(f)
			
 
				+                    all_cells_data.extend(page_result['cells_data'])
			
 
				+            
			
 
				+            # Read the Markdown content
			
 
				+            if 'md_content_path' in result and os.path.exists(result['md_content_path']):
			
 
				+                with open(result['md_content_path'], 'r', encoding='utf-8') as f:
			
 
				+                    page_content = f.read()
			
 
				+                    page_result['md_content'] = page_content
			
 
				+                    all_md_content.append(page_content)
			
 
				+            
			
 
				+            # Check for the raw response file (when JSON parsing fails)
			
 
				+            page_result['filtered'] = False
			
 
				+            if 'filtered' in page_result:
			
 
				+                page_result['filtered'] = page_result['filtered']
			
 
				+
			
 
				+            parsed_results.append(page_result)
			
 
				+        
			
 
				+        # Merge the content of all pages
			
 
				+        combined_md = "\n\n---\n\n".join(all_md_content) if all_md_content else ""
			
 
				+        
			
 
				+        return {
			
 
				+            'parsed_results': parsed_results,
			
 
				+            'combined_md_content': combined_md,
			
 
				+            'combined_cells_data': all_cells_data,
			
 
				+            'temp_dir': temp_dir,
			
 
				+            'session_id': session_id,
			
 
				+            'total_pages': len(results)
			
 
				+        }
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        # Clean up the temporary directory on error
			
 
				+        import shutil
			
 
				+        if os.path.exists(temp_dir):
			
 
				+            shutil.rmtree(temp_dir, ignore_errors=True)
			
 
				+        raise e
			
 
				+
			
 
				+# ==================== Core Processing Function ====================
			
 
				+def process_image_inference(test_image_input, file_input,
			
 
				+                          prompt_mode, server_ip, server_port, min_pixels, max_pixels,
			
 
				+                          fitz_preprocess=False
			
 
				+                          ):
			
 
				+    """Core function to handle image/PDF inference"""
			
 
				+    global current_config, processing_results, dots_parser, pdf_cache
			
 
				+    
			
 
				+    # First, clean up previous processing results to avoid confusion with the download button
			
 
				+    if processing_results.get('temp_dir') and os.path.exists(processing_results['temp_dir']):
			
 
				+        import shutil
			
 
				+        try:
			
 
				+            shutil.rmtree(processing_results['temp_dir'], ignore_errors=True)
			
 
				+        except Exception as e:
			
 
				+            print(f"Failed to clean up previous temporary directory: {e}")
			
 
				+    
			
 
				+    # Reset processing results
			
 
				+    processing_results = {
			
 
				+        'original_image': None,
			
 
				+        'processed_image': None,
			
 
				+        'layout_result': None,
			
 
				+        'markdown_content': None,
			
 
				+        'cells_data': None,
			
 
				+        'temp_dir': None,
			
 
				+        'session_id': None,
			
 
				+        'result_paths': None,
			
 
				+        'pdf_results': None
			
 
				+    }
			
 
				+    
			
 
				+    # Update configuration
			
 
				+    current_config.update({
			
 
				+        'ip': server_ip,
			
 
				+        'port_vllm': server_port,
			
 
				+        'min_pixels': min_pixels,
			
 
				+        'max_pixels': max_pixels
			
 
				+    })
			
 
				+    
			
 
				+    # Update parser configuration
			
 
				+    dots_parser.ip = server_ip
			
 
				+    dots_parser.port = server_port
			
 
				+    dots_parser.min_pixels = min_pixels
			
 
				+    dots_parser.max_pixels = max_pixels
			
 
				+    
			
 
				+    # Determine the input source
			
 
				+    input_file_path = None
			
 
				+    image = None
			
 
				+    
			
 
				+    # Prioritize file input (supports PDF)
			
 
				+    if file_input is not None:
			
 
				+        input_file_path = file_input
			
 
				+        file_ext = os.path.splitext(input_file_path)[1].lower()
			
 
				+        
			
 
				+        if file_ext == '.pdf':
			
 
				+            # PDF file processing
			
 
				+            try:
			
 
				+                return process_pdf_file(input_file_path, prompt_mode)
			
 
				+            except Exception as e:
			
 
				+                return None, f"PDF processing failed: {e}", "", "", gr.update(value=None), None, ""
			
 
				+        elif file_ext in ['.jpg', '.jpeg', '.png']:
			
 
				+            # Image file processing
			
 
				+            try:
			
 
				+                image = Image.open(input_file_path)
			
 
				+            except Exception as e:
			
 
				+                return None, f"Failed to read image file: {e}", "", "", gr.update(value=None), None, ""
			
 
				+    
			
 
				+    # If no file input, check the test image input
			
 
				+    if image is None:
			
 
				+        if test_image_input and test_image_input != "":
			
 
				+            file_ext = os.path.splitext(test_image_input)[1].lower()
			
 
				+            if file_ext == '.pdf':
			
 
				+                return process_pdf_file(test_image_input, prompt_mode)
			
 
				+            else:
			
 
				+                try:
			
 
				+                    image = read_image_v2(test_image_input)
			
 
				+                except Exception as e:
			
 
				+                    return None, f"Failed to read test image: {e}", "", "", gr.update(value=None), gr.update(value=None), None, ""
			
 
				+    
			
 
				+    if image is None:
			
 
				+        return None, "Please upload image/PDF file or select test image", "", "", gr.update(value=None), None, ""
			
 
				+    
			
 
				+    try:
			
 
				+        # Clear PDF cache (for image processing)
			
 
				+        pdf_cache["images"] = []
			
 
				+        pdf_cache["current_page"] = 0
			
 
				+        pdf_cache["total_pages"] = 0
			
 
				+        pdf_cache["is_parsed"] = False
			
 
				+        pdf_cache["results"] = []
			
 
				+        
			
 
				+        # Process using the high-level API of DotsOCRParser
			
 
				+        original_image = image
			
 
				+        parse_result = parse_image_with_high_level_api(dots_parser, image, prompt_mode, fitz_preprocess)
			
 
				+        
			
 
				+        # Extract parsing results
			
 
				+        layout_image = parse_result['layout_image']
			
 
				+        cells_data = parse_result['cells_data']
			
 
				+        md_content = parse_result['md_content']
			
 
				+        filtered = parse_result['filtered']
			
 
				+        
			
 
				+        # Handle parsing failure case
			
 
				+        if filtered:
			
 
				+            # JSON parsing failed, only text content is available
			
 
				+            info_text = f"""
			
 
				+**Image Information:**
			
 
				+- Original Size: {original_image.width} x {original_image.height}
			
 
				+- Processing: JSON parsing failed, using cleaned text output
			
 
				+- Server: {current_config['ip']}:{current_config['port_vllm']}
			
 
				+- Session ID: {parse_result['session_id']}
			
 
				+            """
			
 
				+            
			
 
				+            # Store results
			
 
				+            processing_results.update({
			
 
				+                'original_image': original_image,
			
 
				+                'processed_image': None,
			
 
				+                'layout_result': None,
			
 
				+                'markdown_content': md_content,
			
 
				+                'cells_data': None,
			
 
				+                'temp_dir': parse_result['temp_dir'],
			
 
				+                'session_id': parse_result['session_id'],
			
 
				+                'result_paths': parse_result['result_paths']
			
 
				+            })
			
 
				+            
			
 
				+            return (
			
 
				+                original_image,  # No layout image
			
 
				+                info_text,
			
 
				+                md_content,
			
 
				+                md_content,  # Display raw markdown text
			
 
				+                gr.update(visible=False),  # Hide download button
			
 
				+                None,  # Page info
			
 
				+                ""     # Current page JSON output
			
 
				+            )
			
 
				+        
			
 
				+        # JSON parsing successful case
			
 
				+        # Save the raw markdown content (before LaTeX processing)
			
 
				+        md_content_raw = md_content or "No markdown content generated"
			
 
				+        
			
 
				+        # Store results
			
 
				+        processing_results.update({
			
 
				+            'original_image': original_image,
			
 
				+            'processed_image': None,  # High-level API does not return processed_image
			
 
				+            'layout_result': layout_image,
			
 
				+            'markdown_content': md_content,
			
 
				+            'cells_data': cells_data,
			
 
				+            'temp_dir': parse_result['temp_dir'],
			
 
				+            'session_id': parse_result['session_id'],
			
 
				+            'result_paths': parse_result['result_paths']
			
 
				+        })
			
 
				+        
			
 
				+        # Prepare display information
			
 
				+        num_elements = len(cells_data) if cells_data else 0
			
 
				+        info_text = f"""
			
 
				+**Image Information:**
			
 
				+- Original Size: {original_image.width} x {original_image.height}
			
 
				+- Model Input Size: {parse_result['input_width']} x {parse_result['input_height']}
			
 
				+- Server: {current_config['ip']}:{current_config['port_vllm']}
			
 
				+- Detected {num_elements} layout elements
			
 
				+- Session ID: {parse_result['session_id']}
			
 
				+        """
			
 
				+        
			
 
				+        # Current page JSON output
			
 
				+        current_json = ""
			
 
				+        if cells_data:
			
 
				+            try:
			
 
				+                current_json = json.dumps(cells_data, ensure_ascii=False, indent=2)
			
 
				+            except:
			
 
				+                current_json = str(cells_data)
			
 
				+        
			
 
				+        # Create the download ZIP file
			
 
				+        download_zip_path = None
			
 
				+        if parse_result['temp_dir']:
			
 
				+            download_zip_path = os.path.join(parse_result['temp_dir'], f"layout_results_{parse_result['session_id']}.zip")
			
 
				+            try:
			
 
				+                with zipfile.ZipFile(download_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
			
 
				+                    for root, dirs, files in os.walk(parse_result['temp_dir']):
			
 
				+                        for file in files:
			
 
				+                            if file.endswith('.zip'):
			
 
				+                                continue
			
 
				+                            file_path = os.path.join(root, file)
			
 
				+                            arcname = os.path.relpath(file_path, parse_result['temp_dir'])
			
 
				+                            zipf.write(file_path, arcname)
			
 
				+            except Exception as e:
			
 
				+                print(f"Failed to create download ZIP: {e}")
			
 
				+                download_zip_path = None
			
 
				+        
			
 
				+        return (
			
 
				+            layout_image,
			
 
				+            info_text,
			
 
				+            md_content or "No markdown content generated",
			
 
				+            md_content_raw,  # Raw markdown text
			
 
				+            gr.update(value=download_zip_path, visible=True) if download_zip_path else gr.update(visible=False),  # Set the download file
			
 
				+            None,  # Page info (not displayed for image processing)
			
 
				+            current_json     # Current page JSON
			
 
				+        )
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        return None, f"Error during processing: {e}", "", "", gr.update(value=None), None, ""
			
 
				+
			
 
				+def process_pdf_file(pdf_path, prompt_mode):
			
 
				+    """Dedicated function for processing PDF files"""
			
 
				+    global pdf_cache, processing_results, dots_parser
			
 
				+    
			
 
				+    try:
			
 
				+        # First, load the PDF for preview
			
 
				+        preview_image, page_info = load_file_for_preview(pdf_path)
			
 
				+        
			
 
				+        # Parse the PDF using DotsOCRParser
			
 
				+        pdf_result = parse_pdf_with_high_level_api(dots_parser, pdf_path, prompt_mode)
			
 
				+        
			
 
				+        # Update the PDF cache
			
 
				+        pdf_cache["is_parsed"] = True
			
 
				+        pdf_cache["results"] = pdf_result['parsed_results']
			
 
				+        
			
 
				+        # Handle LaTeX table rendering
			
 
				+        combined_md = pdf_result['combined_md_content']
			
 
				+        combined_md_raw = combined_md or "No markdown content generated"  # Save the raw content
			
 
				+
			
 
				+        # Store results
			
 
				+        processing_results.update({
			
 
				+            'original_image': None,
			
 
				+            'processed_image': None,
			
 
				+            'layout_result': None,
			
 
				+            'markdown_content': combined_md,
			
 
				+            'cells_data': pdf_result['combined_cells_data'],
			
 
				+            'temp_dir': pdf_result['temp_dir'],
			
 
				+            'session_id': pdf_result['session_id'],
			
 
				+            'result_paths': None,
			
 
				+            'pdf_results': pdf_result['parsed_results']
			
 
				+        })
			
 
				+        
			
 
				+        # Prepare display information
			
 
				+        total_elements = len(pdf_result['combined_cells_data'])
			
 
				+        info_text = f"""
			
 
				+**PDF Information:**
			
 
				+- Total Pages: {pdf_result['total_pages']}
			
 
				+- Server: {current_config['ip']}:{current_config['port_vllm']}
			
 
				+- Total Detected Elements: {total_elements}
			
 
				+- Session ID: {pdf_result['session_id']}
			
 
				+        """
			
 
				+        
			
 
				+        # Content of the current page (first page)
			
 
				+        current_page_md = ""
			
 
				+        current_page_md_raw = ""
			
 
				+        current_page_json = ""
			
 
				+        current_page_layout_image = preview_image  # Use the original preview image by default
			
 
				+        
			
 
				+        if pdf_cache["results"] and len(pdf_cache["results"]) > 0:
			
 
				+            current_result = pdf_cache["results"][0]
			
 
				+            if current_result['md_content']:
			
 
				+                # Raw markdown content
			
 
				+                current_page_md_raw = current_result['md_content']
			
 
				+                # Process the content after LaTeX rendering
			
 
				+
			
 
				+                current_page_md = current_result['md_content']
			
 
				+            if current_result['cells_data']:
			
 
				+                try:
			
 
				+                    current_page_json = json.dumps(current_result['cells_data'], ensure_ascii=False, indent=2)
			
 
				+                except:
			
 
				+                    current_page_json = str(current_result['cells_data'])
			
 
				+            # Use the image with layout boxes (if available)
			
 
				+            if 'layout_image' in current_result and current_result['layout_image']:
			
 
				+                current_page_layout_image = current_result['layout_image']
			
 
				+        
			
 
				+        # Create the download ZIP file
			
 
				+        download_zip_path = None
			
 
				+        if pdf_result['temp_dir']:
			
 
				+            download_zip_path = os.path.join(pdf_result['temp_dir'], f"layout_results_{pdf_result['session_id']}.zip")
			
 
				+            try:
			
 
				+                with zipfile.ZipFile(download_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
			
 
				+                    for root, dirs, files in os.walk(pdf_result['temp_dir']):
			
 
				+                        for file in files:
			
 
				+                            if file.endswith('.zip'):
			
 
				+                                continue
			
 
				+                            file_path = os.path.join(root, file)
			
 
				+                            arcname = os.path.relpath(file_path, pdf_result['temp_dir'])
			
 
				+                            zipf.write(file_path, arcname)
			
 
				+            except Exception as e:
			
 
				+                print(f"Failed to create download ZIP: {e}")
			
 
				+                download_zip_path = None
			
 
				+        
			
 
				+        return (
			
 
				+            current_page_layout_image,  # Use the image with layout boxes
			
 
				+            info_text,
			
 
				+            combined_md or "No markdown content generated",  # Display the markdown for the entire PDF
			
 
				+            combined_md_raw or "No markdown content generated",  # Display the raw markdown for the entire PDF
			
 
				+            gr.update(value=download_zip_path, visible=True) if download_zip_path else gr.update(visible=False),  # Set the download file
			
 
				+            page_info,
			
 
				+            current_page_json
			
 
				+        )
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        # Reset the PDF cache
			
 
				+        pdf_cache["images"] = []
			
 
				+        pdf_cache["current_page"] = 0
			
 
				+        pdf_cache["total_pages"] = 0
			
 
				+        pdf_cache["is_parsed"] = False
			
 
				+        pdf_cache["results"] = []
			
 
				+        raise e
			
 
				+
			
 
				+def clear_all_data():
			
 
				+    """Clears all data"""
			
 
				+    global processing_results, pdf_cache
			
 
				+    
			
 
				+    # Clean up the temporary directory
			
 
				+    if processing_results.get('temp_dir') and os.path.exists(processing_results['temp_dir']):
			
 
				+        import shutil
			
 
				+        try:
			
 
				+            shutil.rmtree(processing_results['temp_dir'], ignore_errors=True)
			
 
				+        except Exception as e:
			
 
				+            print(f"Failed to clean up temporary directory: {e}")
			
 
				+    
			
 
				+    # Reset processing results
			
 
				+    processing_results = {
			
 
				+        'original_image': None,
			
 
				+        'processed_image': None,
			
 
				+        'layout_result': None,
			
 
				+        'markdown_content': None,
			
 
				+        'cells_data': None,
			
 
				+        'temp_dir': None,
			
 
				+        'session_id': None,
			
 
				+        'result_paths': None,
			
 
				+        'pdf_results': None
			
 
				+    }
			
 
				+    
			
 
				+    # Reset the PDF cache
			
 
				+    pdf_cache = {
			
 
				+        "images": [],
			
 
				+        "current_page": 0,
			
 
				+        "total_pages": 0,
			
 
				+        "file_type": None,
			
 
				+        "is_parsed": False,
			
 
				+        "results": []
			
 
				+    }
			
 
				+    
			
 
				+    return (
			
 
				+        None,  # Clear file input
			
 
				+        "",    # Clear test image selection
			
 
				+        None,  # Clear result image
			
 
				+        "Waiting for processing results...",  # Reset info display
			
 
				+        "## Waiting for processing results...",  # Reset Markdown display
			
 
				+        "🕐 Waiting for parsing result...",    # Clear raw Markdown text
			
 
				+        gr.update(visible=False),  # Hide download button
			
 
				+        "<div id='page_info_box'>0 / 0</div>",  # Reset page info
			
 
				+        "🕐 Waiting for parsing result..."     # Clear current page JSON
			
 
				+    )
			
 
				+
			
 
				+def update_prompt_display(prompt_mode):
			
 
				+    """Updates the prompt display content"""
			
 
				+    return dict_promptmode_to_prompt[prompt_mode]
			
 
				+
			
 
				+# ==================== Gradio Interface ====================
			
 
				+def create_gradio_interface():
			
 
				+    """Creates the Gradio interface"""
			
 
				+    
			
 
				+    # CSS styles, matching the reference style
			
 
				+    css = """
			
 
				+
			
 
				+    #parse_button {
			
 
				+        background: #FF576D !important; /* !important 确保覆盖主题默认样式 */
			
 
				+        border-color: #FF576D !important;
			
 
				+    }
			
 
				+    /* 鼠标悬停时的颜色 */
			
 
				+    #parse_button:hover {
			
 
				+        background: #F72C49 !important;
			
 
				+        border-color: #F72C49 !important;
			
 
				+    }
			
 
				+    
			
 
				+    #page_info_html {
			
 
				+        display: flex;
			
 
				+        align-items: center;
			
 
				+        justify-content: center;
			
 
				+        height: 100%;
			
 
				+        margin: 0 12px;
			
 
				+    }
			
 
				+
			
 
				+    #page_info_box {
			
 
				+        padding: 8px 20px;
			
 
				+        font-size: 16px;
			
 
				+        border: 1px solid #bbb;
			
 
				+        border-radius: 8px;
			
 
				+        background-color: #f8f8f8;
			
 
				+        text-align: center;
			
 
				+        min-width: 80px;
			
 
				+        box-shadow: 0 1px 3px rgba(0,0,0,0.1);
			
 
				+    }
			
 
				+
			
 
				+    #markdown_output {
			
 
				+        min-height: 800px;
			
 
				+        overflow: auto;
			
 
				+    }
			
 
				+
			
 
				+    footer {
			
 
				+        visibility: hidden;
			
 
				+    }
			
 
				+    
			
 
				+    #info_box {
			
 
				+        padding: 10px;
			
 
				+        background-color: #f8f9fa;
			
 
				+        border-radius: 8px;
			
 
				+        border: 1px solid #dee2e6;
			
 
				+        margin: 10px 0;
			
 
				+        font-size: 14px;
			
 
				+    }
			
 
				+    
			
 
				+    #result_image {
			
 
				+        border-radius: 8px;
			
 
				+    }
			
 
				+    
			
 
				+    #markdown_tabs {
			
 
				+        height: 100%;
			
 
				+    }
			
 
				+    """
			
 
				+    
			
 
				+    with gr.Blocks(theme="ocean", css=css, title='dots.ocr') as demo:
			
 
				+        
			
 
				+        # Title
			
 
				+        gr.HTML("""
			
 
				+            <div style="display: flex; align-items: center; justify-content: center; margin-bottom: 20px;">
			
 
				+                <h1 style="margin: 0; font-size: 2em;">🔍 dots.ocr</h1>
			
 
				+            </div>
			
 
				+            <div style="text-align: center; margin-bottom: 10px;">
			
 
				+                <em>Supports image/PDF layout analysis and structured output</em>
			
 
				+            </div>
			
 
				+        """)
			
 
				+        
			
 
				+        with gr.Row():
			
 
				+            # Left side: Input and Configuration
			
 
				+            with gr.Column(scale=1, elem_id="left-panel"):
			
 
				+                gr.Markdown("### 📥 Upload & Select")
			
 
				+                file_input = gr.File(
			
 
				+                    label="Upload PDF/Image", 
			
 
				+                    type="filepath", 
			
 
				+                    file_types=[".pdf", ".jpg", ".jpeg", ".png"],
			
 
				+                )
			
 
				+                
			
 
				+                test_images = get_test_images()
			
 
				+                test_image_input = gr.Dropdown(
			
 
				+                    label="Or Select an Example",
			
 
				+                    choices=[""] + test_images,
			
 
				+                    value="",
			
 
				+                )
			
 
				+
			
 
				+                gr.Markdown("### ⚙️ Prompt & Actions")
			
 
				+                prompt_mode = gr.Dropdown(
			
 
				+                    label="Select Prompt",
			
 
				+                    choices=["prompt_layout_all_en", "prompt_layout_only_en", "prompt_ocr"],
			
 
				+                    value="prompt_layout_all_en",
			
 
				+                    show_label=True
			
 
				+                )
			
 
				+                
			
 
				+                # Display current prompt content
			
 
				+                prompt_display = gr.Textbox(
			
 
				+                    label="Current Prompt Content",
			
 
				+                    value=dict_promptmode_to_prompt[list(dict_promptmode_to_prompt.keys())[0]],
			
 
				+                    lines=4,
			
 
				+                    max_lines=8,
			
 
				+                    interactive=False,
			
 
				+                    show_copy_button=True
			
 
				+                )
			
 
				+                
			
 
				+                with gr.Row():
			
 
				+                    process_btn = gr.Button("🔍 Parse", variant="primary", scale=2, elem_id="parse_button")
			
 
				+                    clear_btn = gr.Button("🗑️ Clear", variant="secondary", scale=1)
			
 
				+                
			
 
				+                with gr.Accordion("🛠️ Advanced Configuration", open=False):
			
 
				+                    fitz_preprocess = gr.Checkbox(
			
 
				+                        label="Enable fitz_preprocess for images", 
			
 
				+                        value=True,
			
 
				+                        info="Processes image via a PDF-like pipeline (image->pdf->200dpi image). Recommended if your image DPI is low."
			
 
				+                    )
			
 
				+                    with gr.Row():
			
 
				+                        server_ip = gr.Textbox(label="Server IP", value=DEFAULT_CONFIG['ip'])
			
 
				+                        server_port = gr.Number(label="Port", value=DEFAULT_CONFIG['port_vllm'], precision=0)
			
 
				+                    with gr.Row():
			
 
				+                        min_pixels = gr.Number(label="Min Pixels", value=DEFAULT_CONFIG['min_pixels'], precision=0)
			
 
				+                        max_pixels = gr.Number(label="Max Pixels", value=DEFAULT_CONFIG['max_pixels'], precision=0)
			
 
				+            # Right side: Result Display
			
 
				+            with gr.Column(scale=6, variant="compact"):
			
 
				+                with gr.Row():
			
 
				+                    # Result Image
			
 
				+                    with gr.Column(scale=3):
			
 
				+                        gr.Markdown("### 👁️ File Preview")
			
 
				+                        result_image = gr.Image(
			
 
				+                            label="Layout Preview",
			
 
				+                            visible=True,
			
 
				+                            height=800,
			
 
				+                            show_label=False
			
 
				+                        )
			
 
				+                        
			
 
				+                        # Page navigation (shown during PDF preview)
			
 
				+                        with gr.Row():
			
 
				+                            prev_btn = gr.Button("⬅ Previous", size="sm")
			
 
				+                            page_info = gr.HTML(
			
 
				+                                value="<div id='page_info_box'>0 / 0</div>", 
			
 
				+                                elem_id="page_info_html"
			
 
				+                            )
			
 
				+                            next_btn = gr.Button("Next ➡", size="sm")
			
 
				+                        
			
 
				+                        # Info Display
			
 
				+                        info_display = gr.Markdown(
			
 
				+                            "Waiting for processing results...",
			
 
				+                            elem_id="info_box"
			
 
				+                        )
			
 
				+                    
			
 
				+                    # Markdown Result
			
 
				+                    with gr.Column(scale=3):
			
 
				+                        gr.Markdown("### ✔️ Result Display")
			
 
				+                        
			
 
				+                        with gr.Tabs(elem_id="markdown_tabs"):
			
 
				+                            with gr.TabItem("Markdown Render Preview"):
			
 
				+                                md_output = gr.Markdown(
			
 
				+                                    "## Please click the parse button to parse or select for single-task recognition...",
			
 
				+                                    label="Markdown Preview",
			
 
				+                                    max_height=600,
			
 
				+                                    latex_delimiters=[
			
 
				+                                        {"left": "$$", "right": "$$", "display": True},
			
 
				+                                        {"left": "$", "right": "$", "display": False},
			
 
				+                                    ],
			
 
				+                                    show_copy_button=False,
			
 
				+                                    elem_id="markdown_output"
			
 
				+                                )
			
 
				+                            
			
 
				+                            with gr.TabItem("Markdown Raw Text"):
			
 
				+                                md_raw_output = gr.Textbox(
			
 
				+                                    value="🕐 Waiting for parsing result...",
			
 
				+                                    label="Markdown Raw Text",
			
 
				+                                    max_lines=100,
			
 
				+                                    lines=38,
			
 
				+                                    show_copy_button=True,
			
 
				+                                    elem_id="markdown_output",
			
 
				+                                    show_label=False
			
 
				+                                )
			
 
				+                            
			
 
				+                            with gr.TabItem("Current Page JSON"):
			
 
				+                                current_page_json = gr.Textbox(
			
 
				+                                    value="🕐 Waiting for parsing result...",
			
 
				+                                    label="Current Page JSON",
			
 
				+                                    max_lines=100,
			
 
				+                                    lines=38,
			
 
				+                                    show_copy_button=True,
			
 
				+                                    elem_id="markdown_output",
			
 
				+                                    show_label=False
			
 
				+                                )
			
 
				+                
			
 
				+                # Download Button
			
 
				+                with gr.Row():
			
 
				+                    download_btn = gr.DownloadButton(
			
 
				+                        "⬇️ Download Results",
			
 
				+                        visible=False
			
 
				+                    )
			
 
				+        
			
 
				+        # When the prompt mode changes, update the display content
			
 
				+        prompt_mode.change(
			
 
				+            fn=update_prompt_display,
			
 
				+            inputs=prompt_mode,
			
 
				+            outputs=prompt_display,
			
 
				+            show_progress=False
			
 
				+        )
			
 
				+        
			
 
				+        # Show preview on file upload
			
 
				+        file_input.upload(
			
 
				+            fn=load_file_for_preview,
			
 
				+            inputs=file_input,
			
 
				+            outputs=[result_image, page_info],
			
 
				+            show_progress=False
			
 
				+        )
			
 
				+        
			
 
				+        # Page navigation
			
 
				+        prev_btn.click(
			
 
				+            fn=lambda: turn_page("prev"), 
			
 
				+            outputs=[result_image, page_info, current_page_json],
			
 
				+            show_progress=False
			
 
				+        )
			
 
				+        
			
 
				+        next_btn.click(
			
 
				+            fn=lambda: turn_page("next"), 
			
 
				+            outputs=[result_image, page_info, current_page_json],
			
 
				+            show_progress=False
			
 
				+        )
			
 
				+        
			
 
				+        process_btn.click(
			
 
				+            fn=process_image_inference,
			
 
				+            inputs=[
			
 
				+                test_image_input, file_input,
			
 
				+                prompt_mode, server_ip, server_port, min_pixels, max_pixels, 
			
 
				+                fitz_preprocess
			
 
				+            ],
			
 
				+            outputs=[
			
 
				+                result_image, info_display, md_output, md_raw_output,
			
 
				+                download_btn, page_info, current_page_json
			
 
				+            ],
			
 
				+            show_progress=True
			
 
				+        )
			
 
				+        
			
 
				+        clear_btn.click(
			
 
				+            fn=clear_all_data,
			
 
				+            outputs=[
			
 
				+                file_input, test_image_input,
			
 
				+                result_image, info_display, md_output, md_raw_output,
			
 
				+                download_btn, page_info, current_page_json
			
 
				+            ],
			
 
				+            show_progress=False
			
 
				+        )
			
 
				+    
			
 
				+    return demo
			
 
				+
			
 
				+# ==================== Main Program ====================
			
 
				+if __name__ == "__main__":
			
 
				+    demo = create_gradio_interface()
			
 
				+    demo.queue().launch(
			
 
				+        server_name="0.0.0.0", 
			
 
				+        server_port=7860, 
			
 
				+        debug=True
			
 
				+    )