start_all.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. """
  2. MinerU Tianshu - Unified Startup Script
  3. 天枢统一启动脚本
  4. 一键启动所有服务:API Server + LitServe Workers + Task Scheduler
  5. """
  6. import subprocess
  7. import signal
  8. import sys
  9. import time
  10. from loguru import logger
  11. from pathlib import Path
  12. import argparse
  13. class TianshuLauncher:
  14. """天枢服务启动器"""
  15. def __init__(
  16. self,
  17. output_dir='/tmp/mineru_tianshu_output',
  18. api_port=8000,
  19. worker_port=9000,
  20. workers_per_device=1,
  21. devices='auto',
  22. accelerator='auto'
  23. ):
  24. self.output_dir = output_dir
  25. self.api_port = api_port
  26. self.worker_port = worker_port
  27. self.workers_per_device = workers_per_device
  28. self.devices = devices
  29. self.accelerator = accelerator
  30. self.processes = []
  31. def start_services(self):
  32. """启动所有服务"""
  33. logger.info("=" * 70)
  34. logger.info("🚀 MinerU Tianshu - Starting All Services")
  35. logger.info("=" * 70)
  36. logger.info("天枢 - 企业级多GPU文档解析服务")
  37. logger.info("")
  38. try:
  39. # 1. 启动 API Server
  40. logger.info("📡 [1/3] Starting API Server...")
  41. api_proc = subprocess.Popen(
  42. [sys.executable, 'api_server.py'],
  43. cwd=Path(__file__).parent
  44. )
  45. self.processes.append(('API Server', api_proc))
  46. time.sleep(3)
  47. if api_proc.poll() is not None:
  48. logger.error("❌ API Server failed to start!")
  49. return False
  50. logger.info(f" ✅ API Server started (PID: {api_proc.pid})")
  51. logger.info(f" 📖 API Docs: http://localhost:{self.api_port}/docs")
  52. logger.info("")
  53. # 2. 启动 LitServe Worker Pool
  54. logger.info("⚙️ [2/3] Starting LitServe Worker Pool...")
  55. worker_cmd = [
  56. sys.executable, 'litserve_worker.py',
  57. '--output-dir', self.output_dir,
  58. '--accelerator', self.accelerator,
  59. '--workers-per-device', str(self.workers_per_device),
  60. '--port', str(self.worker_port),
  61. '--devices', str(self.devices) if isinstance(self.devices, str) else ','.join(map(str, self.devices))
  62. ]
  63. worker_proc = subprocess.Popen(
  64. worker_cmd,
  65. cwd=Path(__file__).parent
  66. )
  67. self.processes.append(('LitServe Workers', worker_proc))
  68. time.sleep(5)
  69. if worker_proc.poll() is not None:
  70. logger.error("❌ LitServe Workers failed to start!")
  71. return False
  72. logger.info(f" ✅ LitServe Workers started (PID: {worker_proc.pid})")
  73. logger.info(f" 🔌 Worker Port: {self.worker_port}")
  74. logger.info(f" 👷 Workers per Device: {self.workers_per_device}")
  75. logger.info("")
  76. # 3. 启动 Task Scheduler
  77. logger.info("🔄 [3/3] Starting Task Scheduler...")
  78. scheduler_cmd = [
  79. sys.executable, 'task_scheduler.py',
  80. '--litserve-url', f'http://localhost:{self.worker_port}/predict',
  81. '--wait-for-workers'
  82. ]
  83. scheduler_proc = subprocess.Popen(
  84. scheduler_cmd,
  85. cwd=Path(__file__).parent
  86. )
  87. self.processes.append(('Task Scheduler', scheduler_proc))
  88. time.sleep(3)
  89. if scheduler_proc.poll() is not None:
  90. logger.error("❌ Task Scheduler failed to start!")
  91. return False
  92. logger.info(f" ✅ Task Scheduler started (PID: {scheduler_proc.pid})")
  93. logger.info("")
  94. # 启动成功
  95. logger.info("=" * 70)
  96. logger.info("✅ All Services Started Successfully!")
  97. logger.info("=" * 70)
  98. logger.info("")
  99. logger.info("📚 Quick Start:")
  100. logger.info(f" • API Documentation: http://localhost:{self.api_port}/docs")
  101. logger.info(f" • Submit Task: POST http://localhost:{self.api_port}/api/v1/tasks/submit")
  102. logger.info(f" • Query Status: GET http://localhost:{self.api_port}/api/v1/tasks/{{task_id}}")
  103. logger.info(f" • Queue Stats: GET http://localhost:{self.api_port}/api/v1/queue/stats")
  104. logger.info("")
  105. logger.info("🔧 Service Details:")
  106. for name, proc in self.processes:
  107. logger.info(f" • {name:20s} PID: {proc.pid}")
  108. logger.info("")
  109. logger.info("⚠️ Press Ctrl+C to stop all services")
  110. logger.info("=" * 70)
  111. return True
  112. except Exception as e:
  113. logger.error(f"❌ Failed to start services: {e}")
  114. self.stop_services()
  115. return False
  116. def stop_services(self, signum=None, frame=None):
  117. """停止所有服务"""
  118. logger.info("")
  119. logger.info("=" * 70)
  120. logger.info("⏹️ Stopping All Services...")
  121. logger.info("=" * 70)
  122. for name, proc in self.processes:
  123. if proc.poll() is None: # 进程仍在运行
  124. logger.info(f" Stopping {name} (PID: {proc.pid})...")
  125. proc.terminate()
  126. # 等待所有进程结束
  127. for name, proc in self.processes:
  128. try:
  129. proc.wait(timeout=10)
  130. logger.info(f" ✅ {name} stopped")
  131. except subprocess.TimeoutExpired:
  132. logger.warning(f" ⚠️ {name} did not stop gracefully, forcing...")
  133. proc.kill()
  134. proc.wait()
  135. logger.info("=" * 70)
  136. logger.info("✅ All Services Stopped")
  137. logger.info("=" * 70)
  138. sys.exit(0)
  139. def wait(self):
  140. """等待所有服务"""
  141. try:
  142. while True:
  143. time.sleep(1)
  144. # 检查进程状态
  145. for name, proc in self.processes:
  146. if proc.poll() is not None:
  147. logger.error(f"❌ {name} unexpectedly stopped!")
  148. self.stop_services()
  149. return
  150. except KeyboardInterrupt:
  151. self.stop_services()
  152. def main():
  153. """主函数"""
  154. parser = argparse.ArgumentParser(
  155. description='MinerU Tianshu - 统一启动脚本',
  156. formatter_class=argparse.RawDescriptionHelpFormatter,
  157. epilog="""
  158. 示例:
  159. # 使用默认配置启动(自动检测GPU)
  160. python start_all.py
  161. # 使用CPU模式
  162. python start_all.py --accelerator cpu
  163. # 指定输出目录和端口
  164. python start_all.py --output-dir /data/output --api-port 8080
  165. # 每个GPU启动2个worker
  166. python start_all.py --accelerator cuda --workers-per-device 2
  167. # 只使用指定的GPU
  168. python start_all.py --accelerator cuda --devices 0,1
  169. """
  170. )
  171. parser.add_argument('--output-dir', type=str, default='/tmp/mineru_tianshu_output',
  172. help='输出目录 (默认: /tmp/mineru_tianshu_output)')
  173. parser.add_argument('--api-port', type=int, default=8000,
  174. help='API服务器端口 (默认: 8000)')
  175. parser.add_argument('--worker-port', type=int, default=9000,
  176. help='Worker服务器端口 (默认: 9000)')
  177. parser.add_argument('--accelerator', type=str, default='auto',
  178. choices=['auto', 'cuda', 'cpu', 'mps'],
  179. help='加速器类型 (默认: auto,自动检测)')
  180. parser.add_argument('--workers-per-device', type=int, default=1,
  181. help='每个GPU的worker数量 (默认: 1)')
  182. parser.add_argument('--devices', type=str, default='auto',
  183. help='使用的GPU设备,逗号分隔 (默认: auto,使用所有GPU)')
  184. args = parser.parse_args()
  185. # 处理 devices 参数
  186. devices = args.devices
  187. if devices != 'auto':
  188. try:
  189. devices = [int(d) for d in devices.split(',')]
  190. except:
  191. logger.warning(f"Invalid devices format: {devices}, using 'auto'")
  192. devices = 'auto'
  193. # 创建启动器
  194. launcher = TianshuLauncher(
  195. output_dir=args.output_dir,
  196. api_port=args.api_port,
  197. worker_port=args.worker_port,
  198. workers_per_device=args.workers_per_device,
  199. devices=devices,
  200. accelerator=args.accelerator
  201. )
  202. # 设置信号处理
  203. signal.signal(signal.SIGINT, launcher.stop_services)
  204. signal.signal(signal.SIGTERM, launcher.stop_services)
  205. # 启动服务
  206. if launcher.start_services():
  207. launcher.wait()
  208. else:
  209. sys.exit(1)
  210. if __name__ == '__main__':
  211. main()