office_to_pdf.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. import os
  2. import subprocess
  3. import platform
  4. from pathlib import Path
  5. import shutil
  6. class ConvertToPdfError(Exception):
  7. def __init__(self, msg):
  8. self.msg = msg
  9. super().__init__(self.msg)
  10. # Chinese font list
  11. REQUIRED_CHS_FONTS = ['SimSun', 'Microsoft YaHei', 'Noto Sans CJK SC']
  12. def check_fonts_installed():
  13. """Check if required Chinese fonts are installed."""
  14. system_type = platform.system()
  15. if system_type == 'Windows':
  16. # Windows: check fonts via registry or system font folder
  17. font_dir = Path("C:/Windows/Fonts")
  18. installed_fonts = [f.name for f in font_dir.glob("*.ttf")]
  19. if any(font for font in REQUIRED_CHS_FONTS if any(font in f for f in installed_fonts)):
  20. return True
  21. raise EnvironmentError(
  22. f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
  23. )
  24. else:
  25. # Linux/macOS: use fc-list
  26. try:
  27. output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
  28. for font in REQUIRED_CHS_FONTS:
  29. if font in output:
  30. return True
  31. raise EnvironmentError(
  32. f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
  33. )
  34. except Exception as e:
  35. raise EnvironmentError(f"Font detection failed. Please install 'fontconfig' and fonts: {str(e)}")
  36. def get_soffice_command():
  37. """Return the path to LibreOffice's soffice executable depending on the platform."""
  38. system_type = platform.system()
  39. # First check if soffice is in PATH
  40. soffice_path = shutil.which('soffice')
  41. if soffice_path:
  42. return soffice_path
  43. if system_type == 'Windows':
  44. # Check common installation paths
  45. possible_paths = [
  46. Path(os.environ.get('PROGRAMFILES', 'C:/Program Files')) / 'LibreOffice/program/soffice.exe',
  47. Path(os.environ.get('PROGRAMFILES(X86)', 'C:/Program Files (x86)')) / 'LibreOffice/program/soffice.exe',
  48. Path('C:/Program Files/LibreOffice/program/soffice.exe'),
  49. Path('C:/Program Files (x86)/LibreOffice/program/soffice.exe')
  50. ]
  51. # Check other drives for windows
  52. for drive in ['C:', 'D:', 'E:', 'F:', 'G:', 'H:']:
  53. possible_paths.append(Path(f"{drive}/LibreOffice/program/soffice.exe"))
  54. for path in possible_paths:
  55. if path.exists():
  56. return str(path)
  57. raise ConvertToPdfError(
  58. "LibreOffice not found. Please install LibreOffice from https://www.libreoffice.org/ "
  59. "or ensure soffice.exe is in your PATH environment variable."
  60. )
  61. else:
  62. # For Linux/macOS, provide installation instructions if not found
  63. try:
  64. # Try to find soffice in standard locations
  65. possible_paths = [
  66. '/usr/bin/soffice',
  67. '/usr/local/bin/soffice',
  68. '/opt/libreoffice/program/soffice',
  69. '/Applications/LibreOffice.app/Contents/MacOS/soffice'
  70. ]
  71. for path in possible_paths:
  72. if os.path.exists(path):
  73. return path
  74. raise ConvertToPdfError(
  75. "LibreOffice not found. Please install it:\n"
  76. " - Ubuntu/Debian: sudo apt-get install libreoffice\n"
  77. " - CentOS/RHEL: sudo yum install libreoffice\n"
  78. " - macOS: brew install libreoffice or download from https://www.libreoffice.org/\n"
  79. " - Or ensure soffice is in your PATH environment variable."
  80. )
  81. except Exception as e:
  82. raise ConvertToPdfError(f"Error locating LibreOffice: {str(e)}")
  83. def convert_file_to_pdf(input_path, output_dir):
  84. """Convert a single document (ppt, doc, etc.) to PDF."""
  85. if not os.path.isfile(input_path):
  86. raise FileNotFoundError(f"The input file {input_path} does not exist.")
  87. os.makedirs(output_dir, exist_ok=True)
  88. check_fonts_installed()
  89. soffice_cmd = get_soffice_command()
  90. cmd = [
  91. soffice_cmd,
  92. '--headless',
  93. '--norestore',
  94. '--invisible',
  95. '--convert-to', 'pdf',
  96. '--outdir', str(output_dir),
  97. str(input_path)
  98. ]
  99. process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  100. if process.returncode != 0:
  101. raise ConvertToPdfError(f"LibreOffice convert failed: {process.stderr.decode()}")