office_to_pdf.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. import os
  2. import subprocess
  3. import platform
  4. from pathlib import Path
  5. import shutil
  6. from loguru import logger
  7. class ConvertToPdfError(Exception):
  8. def __init__(self, msg):
  9. self.msg = msg
  10. super().__init__(self.msg)
  11. def check_fonts_installed():
  12. """Check if required Chinese fonts are installed."""
  13. system_type = platform.system()
  14. if system_type in ['Windows', 'Darwin']:
  15. pass
  16. else:
  17. # Linux: use fc-list
  18. try:
  19. output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
  20. if output.strip(): # 只要有任何输出(非空)
  21. return True
  22. else:
  23. logger.warning(
  24. f"No Chinese fonts were detected, the converted document may not display Chinese content properly."
  25. )
  26. except Exception:
  27. pass
  28. def get_soffice_command():
  29. """Return the path to LibreOffice's soffice executable depending on the platform."""
  30. system_type = platform.system()
  31. # First check if soffice is in PATH
  32. soffice_path = shutil.which('soffice')
  33. if soffice_path:
  34. return soffice_path
  35. if system_type == 'Windows':
  36. # Check common installation paths
  37. possible_paths = [
  38. Path(os.environ.get('PROGRAMFILES', 'C:/Program Files')) / 'LibreOffice/program/soffice.exe',
  39. Path(os.environ.get('PROGRAMFILES(X86)', 'C:/Program Files (x86)')) / 'LibreOffice/program/soffice.exe',
  40. Path('C:/Program Files/LibreOffice/program/soffice.exe'),
  41. Path('C:/Program Files (x86)/LibreOffice/program/soffice.exe')
  42. ]
  43. # Check other drives for windows
  44. for drive in ['C:', 'D:', 'E:', 'F:', 'G:', 'H:']:
  45. possible_paths.append(Path(f"{drive}/LibreOffice/program/soffice.exe"))
  46. for path in possible_paths:
  47. if path.exists():
  48. return str(path)
  49. raise ConvertToPdfError(
  50. "LibreOffice not found. Please install LibreOffice from https://www.libreoffice.org/ "
  51. "or ensure soffice.exe is in your PATH environment variable."
  52. )
  53. else:
  54. # For Linux/macOS, provide installation instructions if not found
  55. try:
  56. # Try to find soffice in standard locations
  57. possible_paths = [
  58. '/usr/bin/soffice',
  59. '/usr/local/bin/soffice',
  60. '/opt/libreoffice/program/soffice',
  61. '/Applications/LibreOffice.app/Contents/MacOS/soffice'
  62. ]
  63. for path in possible_paths:
  64. if os.path.exists(path):
  65. return path
  66. raise ConvertToPdfError(
  67. "LibreOffice not found. Please install it:\n"
  68. " - Ubuntu/Debian: sudo apt-get install libreoffice\n"
  69. " - CentOS/RHEL: sudo yum install libreoffice\n"
  70. " - macOS: brew install libreoffice or download from https://www.libreoffice.org/\n"
  71. " - Or ensure soffice is in your PATH environment variable."
  72. )
  73. except Exception as e:
  74. raise ConvertToPdfError(f"Error locating LibreOffice: {str(e)}")
  75. def convert_file_to_pdf(input_path, output_dir):
  76. """Convert a single document (ppt, doc, etc.) to PDF."""
  77. if not os.path.isfile(input_path):
  78. raise FileNotFoundError(f"The input file {input_path} does not exist.")
  79. os.makedirs(output_dir, exist_ok=True)
  80. check_fonts_installed()
  81. soffice_cmd = get_soffice_command()
  82. cmd = [
  83. soffice_cmd,
  84. '--headless',
  85. '--norestore',
  86. '--invisible',
  87. '--convert-to', 'pdf',
  88. '--outdir', str(output_dir),
  89. str(input_path)
  90. ]
  91. process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  92. if process.returncode != 0:
  93. raise ConvertToPdfError(f"LibreOffice convert failed: {process.stderr.decode()}")