office_to_pdf.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. import os
  2. import subprocess
  3. import platform
  4. from pathlib import Path
  5. import shutil
  6. class ConvertToPdfError(Exception):
  7. def __init__(self, msg):
  8. self.msg = msg
  9. super().__init__(self.msg)
  10. # Chinese font list
  11. REQUIRED_CHS_FONTS = ['SimSun', 'Microsoft YaHei', 'Noto Sans CJK SC']
  12. def check_fonts_installed():
  13. """Check if required Chinese fonts are installed."""
  14. system_type = platform.system()
  15. if system_type == 'Windows':
  16. # Windows: check fonts via registry or system font folder
  17. # font_dir = Path("C:/Windows/Fonts")
  18. # installed_fonts = [f.name for f in font_dir.glob("*.ttf")]
  19. # if any(font for font in REQUIRED_CHS_FONTS if any(font in f for f in installed_fonts)):
  20. # return True
  21. # raise EnvironmentError(
  22. # f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
  23. # )
  24. pass
  25. else:
  26. # Linux/macOS: use fc-list
  27. try:
  28. output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
  29. for font in REQUIRED_CHS_FONTS:
  30. if font in output:
  31. return True
  32. raise EnvironmentError(
  33. f"Missing Chinese font. Please install at least one of: {', '.join(REQUIRED_CHS_FONTS)}"
  34. )
  35. except Exception as e:
  36. raise EnvironmentError(f"Font detection failed. Please install 'fontconfig' and fonts: {str(e)}")
  37. def get_soffice_command():
  38. """Return the path to LibreOffice's soffice executable depending on the platform."""
  39. system_type = platform.system()
  40. # First check if soffice is in PATH
  41. soffice_path = shutil.which('soffice')
  42. if soffice_path:
  43. return soffice_path
  44. if system_type == 'Windows':
  45. # Check common installation paths
  46. possible_paths = [
  47. Path(os.environ.get('PROGRAMFILES', 'C:/Program Files')) / 'LibreOffice/program/soffice.exe',
  48. Path(os.environ.get('PROGRAMFILES(X86)', 'C:/Program Files (x86)')) / 'LibreOffice/program/soffice.exe',
  49. Path('C:/Program Files/LibreOffice/program/soffice.exe'),
  50. Path('C:/Program Files (x86)/LibreOffice/program/soffice.exe')
  51. ]
  52. # Check other drives for windows
  53. for drive in ['C:', 'D:', 'E:', 'F:', 'G:', 'H:']:
  54. possible_paths.append(Path(f"{drive}/LibreOffice/program/soffice.exe"))
  55. for path in possible_paths:
  56. if path.exists():
  57. return str(path)
  58. raise ConvertToPdfError(
  59. "LibreOffice not found. Please install LibreOffice from https://www.libreoffice.org/ "
  60. "or ensure soffice.exe is in your PATH environment variable."
  61. )
  62. else:
  63. # For Linux/macOS, provide installation instructions if not found
  64. try:
  65. # Try to find soffice in standard locations
  66. possible_paths = [
  67. '/usr/bin/soffice',
  68. '/usr/local/bin/soffice',
  69. '/opt/libreoffice/program/soffice',
  70. '/Applications/LibreOffice.app/Contents/MacOS/soffice'
  71. ]
  72. for path in possible_paths:
  73. if os.path.exists(path):
  74. return path
  75. raise ConvertToPdfError(
  76. "LibreOffice not found. Please install it:\n"
  77. " - Ubuntu/Debian: sudo apt-get install libreoffice\n"
  78. " - CentOS/RHEL: sudo yum install libreoffice\n"
  79. " - macOS: brew install libreoffice or download from https://www.libreoffice.org/\n"
  80. " - Or ensure soffice is in your PATH environment variable."
  81. )
  82. except Exception as e:
  83. raise ConvertToPdfError(f"Error locating LibreOffice: {str(e)}")
  84. def convert_file_to_pdf(input_path, output_dir):
  85. """Convert a single document (ppt, doc, etc.) to PDF."""
  86. if not os.path.isfile(input_path):
  87. raise FileNotFoundError(f"The input file {input_path} does not exist.")
  88. os.makedirs(output_dir, exist_ok=True)
  89. check_fonts_installed()
  90. soffice_cmd = get_soffice_command()
  91. cmd = [
  92. soffice_cmd,
  93. '--headless',
  94. '--norestore',
  95. '--invisible',
  96. '--convert-to', 'pdf',
  97. '--outdir', str(output_dir),
  98. str(input_path)
  99. ]
  100. process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  101. if process.returncode != 0:
  102. raise ConvertToPdfError(f"LibreOffice convert failed: {process.stderr.decode()}")