瀏覽代碼

bugfix:While converting file to pdf, Chinese font will be ignored.

Doge2077 7 月之前
父節點
當前提交
82a4376d8a
共有 3 個文件被更改,包括 81 次插入5 次删除
  1. 10 0
      docker/china/Dockerfile
  2. 10 0
      docker/global/Dockerfile
  3. 61 5
      magic_pdf/utils/office_to_pdf.py

+ 10 - 0
docker/china/Dockerfile

@@ -18,7 +18,17 @@ RUN apt-get update && \
         wget \
         git \
         libgl1 \
+        libreoffice \
+        fonts-noto-cjk \
+        fonts-wqy-zenhei \
+        fonts-wqy-microhei \
+        ttf-mscorefonts-installer \
+        fontconfig \
         libglib2.0-0 \
+        libxrender1 \
+        libsm6 \
+        libxext6 \
+        poppler-utils \
         && rm -rf /var/lib/apt/lists/*
 
 # Set Python 3.10 as the default python3

+ 10 - 0
docker/global/Dockerfile

@@ -18,7 +18,17 @@ RUN apt-get update && \
         wget \
         git \
         libgl1 \
+        libreoffice \
+        fonts-noto-cjk \
+        fonts-wqy-zenhei \
+        fonts-wqy-microhei \
+        ttf-mscorefonts-installer \
+        fontconfig \
         libglib2.0-0 \
+        libxrender1 \
+        libsm6 \
+        libxext6 \
+        poppler-utils \
         && rm -rf /var/lib/apt/lists/*
 
 # Set Python 3.10 as the default python3

+ 61 - 5
magic_pdf/utils/office_to_pdf.py

@@ -1,5 +1,6 @@
 import os
 import subprocess
+import platform
 from pathlib import Path
 
 
@@ -9,21 +10,76 @@ class ConvertToPdfError(Exception):
         super().__init__(self.msg)
 
 
+# Chinese font list
+REQUIRED_CHS_FONTS = ['SimSun', 'Microsoft YaHei', 'Noto Sans CJK SC']
+
+
+def check_fonts_installed():
+    """Check if required Chinese fonts are installed."""
+    system_type = platform.system()
+
+    if system_type == 'Windows':
+        # Windows: check fonts via registry or system font folder
+        font_dir = Path("C:/Windows/Fonts")
+        installed_fonts = [f.name for f in font_dir.glob("*.ttf")]
+        if any(font for font in REQUIRED_CHS_FONTS if any(font in f for f in installed_fonts)):
+            return True
+        raise EnvironmentError(
+            f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
+        )
+    else:
+        # Linux/macOS: use fc-list
+        try:
+            output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
+            for font in REQUIRED_CHS_FONTS:
+                if font in output:
+                    return True
+            raise EnvironmentError(
+                f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
+            )
+        except Exception as e:
+            raise EnvironmentError(f"Font detection failed. Please install 'fontconfig' and fonts: {str(e)}")
+
+
+def get_soffice_command():
+    """Return the path to LibreOffice's soffice executable depending on the platform."""
+    if platform.system() == 'Windows':
+        possible_paths = [
+            Path("C:/Program Files/LibreOffice/program/soffice.exe"),
+            Path("C:/Program Files (x86)/LibreOffice/program/soffice.exe")
+        ]
+        for path in possible_paths:
+            if path.exists():
+                return str(path)
+        raise ConvertToPdfError(
+            "LibreOffice not found. Please install LibreOffice and ensure soffice.exe is located in a standard path."
+        )
+    else:
+        return 'soffice'  # Assume it's in PATH on Linux/macOS
+
+
 def convert_file_to_pdf(input_path, output_dir):
+    """Convert a single document (ppt, doc, etc.) to PDF."""
     if not os.path.isfile(input_path):
         raise FileNotFoundError(f"The input file {input_path} does not exist.")
 
     os.makedirs(output_dir, exist_ok=True)
-    
+
+    check_fonts_installed()
+
+    soffice_cmd = get_soffice_command()
+
     cmd = [
-        'soffice',
+        soffice_cmd,
         '--headless',
+        '--norestore',
+        '--invisible',
         '--convert-to', 'pdf',
         '--outdir', str(output_dir),
         str(input_path)
     ]
-    
+
     process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    
+
     if process.returncode != 0:
-        raise ConvertToPdfError(process.stderr.decode())
+        raise ConvertToPdfError(f"LibreOffice convert failed: {process.stderr.decode()}")