"""
Markdown 生成模块
负责将合并后的数据生成 Markdown 文件
"""
import shutil
from pathlib import Path
from typing import List, Dict, Optional


class MarkdownGenerator:
    """Markdown 生成器"""
    
    @staticmethod
    def generate_enhanced_markdown(merged_data: List[Dict], 
                                   output_path: Optional[str] = None,
                                   mineru_file: Optional[str] = None) -> str:
        """
        生成增强的 Markdown（包含 bbox 信息的注释）
        
        Args:
            merged_data: 合并后的数据
            output_path: 输出路径
            mineru_file: MinerU 源文件路径（用于复制图片）
        
        Returns:
            Markdown 内容
        """
        md_lines = []
        
        for item in merged_data:
            item_type = item.get('type', '')
            
            if item_type == 'title':
                md_lines.extend(MarkdownGenerator._format_title(item))
            elif item_type == 'text':
                md_lines.extend(MarkdownGenerator._format_text(item))
            elif item_type == 'list':
                md_lines.extend(MarkdownGenerator._format_list(item))
            elif item_type == 'table':
                md_lines.extend(MarkdownGenerator._format_table(item))
            elif item_type == 'image':
                md_lines.extend(MarkdownGenerator._format_image(
                    item, output_path, mineru_file
                ))
            elif item_type == 'equation':
                md_lines.extend(MarkdownGenerator._format_equation(item))
            elif item_type == 'inline_equation':
                md_lines.extend(MarkdownGenerator._format_inline_equation(item))
            elif item_type in ['page_number', 'header', 'footer']:
                md_lines.extend(MarkdownGenerator._format_metadata(item, item_type))
            elif item_type == 'reference':
                md_lines.extend(MarkdownGenerator._format_reference(item))
            else:
                md_lines.extend(MarkdownGenerator._format_unknown(item))
        
        markdown_content = '\n'.join(md_lines)
        
        if output_path:
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(markdown_content)
        
        return markdown_content
    
    @staticmethod
    def _add_bbox_comment(bbox: List) -> str:
        """添加 bbox 注释"""
        return f"<!-- bbox: {bbox} -->"
    
    @staticmethod
    def _format_title(item: Dict) -> List[str]:
        """格式化标题"""
        lines = []
        bbox = item.get('bbox', [])
        if bbox:
            lines.append(MarkdownGenerator._add_bbox_comment(bbox))
        
        text = item.get('text', '')
        text_level = item.get('text_level', 1)
        heading = '#' * min(text_level, 6)
        lines.append(f"{heading} {text}\n")
        
        return lines
    
    @staticmethod
    def _format_text(item: Dict) -> List[str]:
        """格式化文本"""
        lines = []
        bbox = item.get('bbox', [])
        if bbox:
            lines.append(MarkdownGenerator._add_bbox_comment(bbox))
        
        text = item.get('text', '')
        text_level = item.get('text_level', 0)
        
        if text_level > 0:
            heading = '#' * min(text_level, 6)
            lines.append(f"{heading} {text}\n")
        else:
            lines.append(f"{text}\n")
        
        return lines
    
    @staticmethod
    def _format_list(item: Dict) -> List[str]:
        """格式化列表"""
        lines = []
        bbox = item.get('bbox', [])
        if bbox:
            lines.append(MarkdownGenerator._add_bbox_comment(bbox))
        
        list_items = item.get('list_items', [])
        for list_item in list_items:
            lines.append(f"{list_item}\n")
        
        lines.append("")
        return lines
    
    @staticmethod
    def _format_table(item: Dict) -> List[str]:
        """格式化表格"""
        lines = []
        bbox = item.get('bbox', [])
        if bbox:
            lines.append(MarkdownGenerator._add_bbox_comment(bbox))
        
        # 表格标题
        table_caption = item.get('table_caption', [])
        for caption in table_caption:
            if caption:
                lines.append(f"**{caption}**\n")
        
        # 表格内容
        table_body = item.get('table_body_with_bbox', item.get('table_body', ''))
        if table_body:
            lines.append(table_body)
            lines.append("")
        
        # 表格脚注
        table_footnote = item.get('table_footnote', [])
        for footnote in table_footnote:
            if footnote:
                lines.append(f"*{footnote}*")
        if table_footnote:
            lines.append("")
        
        return lines
    
    @staticmethod
    def _format_image(item: Dict, output_path: Optional[str],
                     mineru_file: Optional[str]) -> List[str]:
        """格式化图片"""
        lines = []
        bbox = item.get('bbox', [])
        if bbox:
            lines.append(MarkdownGenerator._add_bbox_comment(bbox))
        
        img_path = item.get('img_path', '')
        
        # 复制图片
        if img_path and mineru_file and output_path:
            MarkdownGenerator._copy_image(img_path, mineru_file, output_path)
        
        # 图片标题
        image_caption = item.get('image_caption', [])
        for caption in image_caption:
            if caption:
                lines.append(f"**{caption}**\n")
        
        lines.append(f"![Image]({img_path})\n")
        
        # 图片脚注
        image_footnote = item.get('image_footnote', [])
        for footnote in image_footnote:
            if footnote:
                lines.append(f"*{footnote}*")
        if image_footnote:
            lines.append("")
        
        return lines
    
    @staticmethod
    def _copy_image(img_path: str, mineru_file: str, output_path: str):
        """复制图片到输出目录"""
        mineru_dir = Path(mineru_file).parent
        img_full_path = mineru_dir / img_path
        if img_full_path.exists():
            output_img_path = Path(output_path).parent / img_path
            output_img_path.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy(img_full_path, output_img_path)
    
    @staticmethod
    def _format_equation(item: Dict) -> List[str]:
        """格式化公式"""
        latex = item.get('latex', '')
        if latex:
            return [f"$$\n{latex}\n$$\n"]
        return []
    
    @staticmethod
    def _format_inline_equation(item: Dict) -> List[str]:
        """格式化行内公式"""
        latex = item.get('latex', '')
        if latex:
            return [f"${latex}$\n"]
        return []
    
    @staticmethod
    def _format_metadata(item: Dict, item_type: str) -> List[str]:
        """格式化元数据（页码、页眉、页脚）"""
        text = item.get('text', '')
        type_map = {
            'page_number': '页码',
            'header': '页眉',
            'footer': '页脚'
        }
        if text:
            return [f"<!-- {type_map.get(item_type, item_type)}: {text} -->\n"]
        return []
    
    @staticmethod
    def _format_reference(item: Dict) -> List[str]:
        """格式化参考文献"""
        text = item.get('text', '')
        return [f"> {text}\n"]
    
    @staticmethod
    def _format_unknown(item: Dict) -> List[str]:
        """格式化未知类型"""
        text = item.get('text', '')
        if text:
            return [f"{text}\n"]
        return []