start_all.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. """
  2. MinerU Tianshu - Unified Startup Script
  3. 天枢统一启动脚本
  4. 一键启动所有服务:API Server + LitServe Workers + Task Scheduler
  5. """
  6. import subprocess
  7. import signal
  8. import sys
  9. import time
  10. import os
  11. from loguru import logger
  12. from pathlib import Path
  13. import argparse
  14. class TianshuLauncher:
  15. """天枢服务启动器"""
  16. def __init__(
  17. self,
  18. output_dir='/tmp/mineru_tianshu_output',
  19. api_port=8000,
  20. worker_port=9000,
  21. workers_per_device=1,
  22. devices='auto',
  23. accelerator='auto'
  24. ):
  25. self.output_dir = output_dir
  26. self.api_port = api_port
  27. self.worker_port = worker_port
  28. self.workers_per_device = workers_per_device
  29. self.devices = devices
  30. self.accelerator = accelerator
  31. self.processes = []
  32. def start_services(self):
  33. """启动所有服务"""
  34. logger.info("=" * 70)
  35. logger.info("🚀 MinerU Tianshu - Starting All Services")
  36. logger.info("=" * 70)
  37. logger.info("天枢 - 企业级多GPU文档解析服务")
  38. logger.info("")
  39. try:
  40. # 1. 启动 API Server
  41. logger.info("📡 [1/3] Starting API Server...")
  42. env = os.environ.copy()
  43. env['API_PORT'] = str(self.api_port)
  44. api_proc = subprocess.Popen(
  45. [sys.executable, 'api_server.py'],
  46. cwd=Path(__file__).parent,
  47. env=env
  48. )
  49. self.processes.append(('API Server', api_proc))
  50. time.sleep(3)
  51. if api_proc.poll() is not None:
  52. logger.error("❌ API Server failed to start!")
  53. return False
  54. logger.info(f" ✅ API Server started (PID: {api_proc.pid})")
  55. logger.info(f" 📖 API Docs: http://localhost:{self.api_port}/docs")
  56. logger.info("")
  57. # 2. 启动 LitServe Worker Pool
  58. logger.info("⚙️ [2/3] Starting LitServe Worker Pool...")
  59. worker_cmd = [
  60. sys.executable, 'litserve_worker.py',
  61. '--output-dir', self.output_dir,
  62. '--accelerator', self.accelerator,
  63. '--workers-per-device', str(self.workers_per_device),
  64. '--port', str(self.worker_port),
  65. '--devices', str(self.devices) if isinstance(self.devices, str) else ','.join(map(str, self.devices))
  66. ]
  67. worker_proc = subprocess.Popen(
  68. worker_cmd,
  69. cwd=Path(__file__).parent
  70. )
  71. self.processes.append(('LitServe Workers', worker_proc))
  72. time.sleep(5)
  73. if worker_proc.poll() is not None:
  74. logger.error("❌ LitServe Workers failed to start!")
  75. return False
  76. logger.info(f" ✅ LitServe Workers started (PID: {worker_proc.pid})")
  77. logger.info(f" 🔌 Worker Port: {self.worker_port}")
  78. logger.info(f" 👷 Workers per Device: {self.workers_per_device}")
  79. logger.info("")
  80. # 3. 启动 Task Scheduler
  81. logger.info("🔄 [3/3] Starting Task Scheduler...")
  82. scheduler_cmd = [
  83. sys.executable, 'task_scheduler.py',
  84. '--litserve-url', f'http://localhost:{self.worker_port}/predict',
  85. '--wait-for-workers'
  86. ]
  87. scheduler_proc = subprocess.Popen(
  88. scheduler_cmd,
  89. cwd=Path(__file__).parent
  90. )
  91. self.processes.append(('Task Scheduler', scheduler_proc))
  92. time.sleep(3)
  93. if scheduler_proc.poll() is not None:
  94. logger.error("❌ Task Scheduler failed to start!")
  95. return False
  96. logger.info(f" ✅ Task Scheduler started (PID: {scheduler_proc.pid})")
  97. logger.info("")
  98. # 启动成功
  99. logger.info("=" * 70)
  100. logger.info("✅ All Services Started Successfully!")
  101. logger.info("=" * 70)
  102. logger.info("")
  103. logger.info("📚 Quick Start:")
  104. logger.info(f" • API Documentation: http://localhost:{self.api_port}/docs")
  105. logger.info(f" • Submit Task: POST http://localhost:{self.api_port}/api/v1/tasks/submit")
  106. logger.info(f" • Query Status: GET http://localhost:{self.api_port}/api/v1/tasks/{{task_id}}")
  107. logger.info(f" • Queue Stats: GET http://localhost:{self.api_port}/api/v1/queue/stats")
  108. logger.info("")
  109. logger.info("🔧 Service Details:")
  110. for name, proc in self.processes:
  111. logger.info(f" • {name:20s} PID: {proc.pid}")
  112. logger.info("")
  113. logger.info("⚠️ Press Ctrl+C to stop all services")
  114. logger.info("=" * 70)
  115. return True
  116. except Exception as e:
  117. logger.error(f"❌ Failed to start services: {e}")
  118. self.stop_services()
  119. return False
  120. def stop_services(self, signum=None, frame=None):
  121. """停止所有服务"""
  122. logger.info("")
  123. logger.info("=" * 70)
  124. logger.info("⏹️ Stopping All Services...")
  125. logger.info("=" * 70)
  126. for name, proc in self.processes:
  127. if proc.poll() is None: # 进程仍在运行
  128. logger.info(f" Stopping {name} (PID: {proc.pid})...")
  129. proc.terminate()
  130. # 等待所有进程结束
  131. for name, proc in self.processes:
  132. try:
  133. proc.wait(timeout=10)
  134. logger.info(f" ✅ {name} stopped")
  135. except subprocess.TimeoutExpired:
  136. logger.warning(f" ⚠️ {name} did not stop gracefully, forcing...")
  137. proc.kill()
  138. proc.wait()
  139. logger.info("=" * 70)
  140. logger.info("✅ All Services Stopped")
  141. logger.info("=" * 70)
  142. sys.exit(0)
  143. def wait(self):
  144. """等待所有服务"""
  145. try:
  146. while True:
  147. time.sleep(1)
  148. # 检查进程状态
  149. for name, proc in self.processes:
  150. if proc.poll() is not None:
  151. logger.error(f"❌ {name} unexpectedly stopped!")
  152. self.stop_services()
  153. return
  154. except KeyboardInterrupt:
  155. self.stop_services()
  156. def main():
  157. """主函数"""
  158. parser = argparse.ArgumentParser(
  159. description='MinerU Tianshu - 统一启动脚本',
  160. formatter_class=argparse.RawDescriptionHelpFormatter,
  161. epilog="""
  162. 示例:
  163. # 使用默认配置启动(自动检测GPU)
  164. python start_all.py
  165. # 使用CPU模式
  166. python start_all.py --accelerator cpu
  167. # 指定输出目录和端口
  168. python start_all.py --output-dir /data/output --api-port 8080
  169. # 每个GPU启动2个worker
  170. python start_all.py --accelerator cuda --workers-per-device 2
  171. # 只使用指定的GPU
  172. python start_all.py --accelerator cuda --devices 0,1
  173. """
  174. )
  175. parser.add_argument('--output-dir', type=str, default='/tmp/mineru_tianshu_output',
  176. help='输出目录 (默认: /tmp/mineru_tianshu_output)')
  177. parser.add_argument('--api-port', type=int, default=8000,
  178. help='API服务器端口 (默认: 8000)')
  179. parser.add_argument('--worker-port', type=int, default=9000,
  180. help='Worker服务器端口 (默认: 9000)')
  181. parser.add_argument('--accelerator', type=str, default='auto',
  182. choices=['auto', 'cuda', 'cpu', 'mps'],
  183. help='加速器类型 (默认: auto,自动检测)')
  184. parser.add_argument('--workers-per-device', type=int, default=1,
  185. help='每个GPU的worker数量 (默认: 1)')
  186. parser.add_argument('--devices', type=str, default='auto',
  187. help='使用的GPU设备,逗号分隔 (默认: auto,使用所有GPU)')
  188. args = parser.parse_args()
  189. # 处理 devices 参数
  190. devices = args.devices
  191. if devices != 'auto':
  192. try:
  193. devices = [int(d) for d in devices.split(',')]
  194. except:
  195. logger.warning(f"Invalid devices format: {devices}, using 'auto'")
  196. devices = 'auto'
  197. # 创建启动器
  198. launcher = TianshuLauncher(
  199. output_dir=args.output_dir,
  200. api_port=args.api_port,
  201. worker_port=args.worker_port,
  202. workers_per_device=args.workers_per_device,
  203. devices=devices,
  204. accelerator=args.accelerator
  205. )
  206. # 设置信号处理
  207. signal.signal(signal.SIGINT, launcher.stop_services)
  208. signal.signal(signal.SIGTERM, launcher.stop_services)
  209. # 启动服务
  210. if launcher.start_services():
  211. launcher.wait()
  212. else:
  213. sys.exit(1)
  214. if __name__ == '__main__':
  215. main()