paddlex_cli.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import argparse
  15. import ast
  16. import importlib.resources
  17. import os
  18. import shutil
  19. import subprocess
  20. import sys
  21. from pathlib import Path
  22. from . import create_pipeline
  23. from .constants import MODEL_FILE_PREFIX
  24. from .inference.pipelines import load_pipeline_config
  25. from .inference.utils.model_paths import get_model_paths
  26. from .repo_manager import get_all_supported_repo_names, setup
  27. from .utils import logging
  28. from .utils.deps import (
  29. get_dep_version,
  30. get_genai_dep_specs,
  31. get_genai_fastdeploy_spec,
  32. get_paddle2onnx_dep_specs,
  33. get_serving_dep_specs,
  34. is_dep_available,
  35. is_paddle2onnx_plugin_available,
  36. )
  37. from .utils.env import get_paddle_cuda_version
  38. from .utils.install import install_packages, uninstall_packages
  39. from .utils.interactive_get_pipeline import interactive_get_pipeline
  40. from .utils.pipeline_arguments import PIPELINE_ARGUMENTS
  41. def args_cfg():
  42. """parse cli arguments"""
  43. def parse_str(s):
  44. """convert str type value
  45. to None type if it is "None",
  46. to bool type if it means True or False.
  47. """
  48. if s in ("None", "none", "NONE"):
  49. return None
  50. elif s in ("TRUE", "True", "true", "T", "t"):
  51. return True
  52. elif s in ("FALSE", "False", "false", "F", "f"):
  53. return False
  54. return s
  55. parser = argparse.ArgumentParser(
  56. "Command-line interface for PaddleX. Use the options below to install plugins, run pipeline predictions, or start the serving application."
  57. )
  58. install_group = parser.add_argument_group("Install PaddleX Options")
  59. pipeline_group = parser.add_argument_group("Pipeline Predict Options")
  60. serving_group = parser.add_argument_group("Serving Options")
  61. paddle2onnx_group = parser.add_argument_group("Paddle2ONNX Options")
  62. ################# install pdx #################
  63. install_group.add_argument(
  64. "--install",
  65. nargs="*",
  66. metavar="PLUGIN",
  67. help="Install specified PaddleX plugins.",
  68. )
  69. install_group.add_argument(
  70. "--no_deps",
  71. action="store_true",
  72. help="Install custom development plugins without their dependencies.",
  73. )
  74. install_group.add_argument(
  75. "--platform",
  76. type=str,
  77. choices=["github.com", "gitee.com"],
  78. default="github.com",
  79. help="Platform to use for installation (default: github.com).",
  80. )
  81. install_group.add_argument(
  82. "-y",
  83. "--yes",
  84. dest="update_repos",
  85. action="store_true",
  86. help="Automatically confirm prompts and update repositories.",
  87. )
  88. install_group.add_argument(
  89. "--use_local_repos",
  90. action="store_true",
  91. default=False,
  92. help="Use local repositories if they exist.",
  93. )
  94. install_group.add_argument(
  95. "--deps_to_replace",
  96. type=str,
  97. nargs="+",
  98. default=None,
  99. help="Replace dependency version when installing from repositories.",
  100. )
  101. ################# pipeline predict #################
  102. pipeline_group.add_argument(
  103. "--pipeline", type=str, help="Name of the pipeline to execute for prediction."
  104. )
  105. pipeline_group.add_argument(
  106. "--input",
  107. type=str,
  108. default=None,
  109. help="Input data or path for the pipeline, supports specific file and directory.",
  110. )
  111. pipeline_group.add_argument(
  112. "--save_path",
  113. type=str,
  114. default=None,
  115. help="Path to save the prediction results.",
  116. )
  117. pipeline_group.add_argument(
  118. "--device",
  119. type=str,
  120. default=None,
  121. help="Device to run the pipeline on (e.g., 'cpu', 'gpu:0').",
  122. )
  123. pipeline_group.add_argument(
  124. "--use_hpip",
  125. action="store_true",
  126. help="Use high-performance inference plugin.",
  127. )
  128. pipeline_group.add_argument(
  129. "--hpi_config",
  130. type=ast.literal_eval,
  131. help="High-performance inference configuration.",
  132. )
  133. pipeline_group.add_argument(
  134. "--get_pipeline_config",
  135. type=str,
  136. default=None,
  137. help="Retrieve the configuration for a specified pipeline.",
  138. )
  139. ################# serving #################
  140. serving_group.add_argument(
  141. "--serve",
  142. action="store_true",
  143. help="Start the serving application to handle requests.",
  144. )
  145. serving_group.add_argument(
  146. "--host",
  147. type=str,
  148. default="0.0.0.0",
  149. help="Host address to serve on (default: 0.0.0.0).",
  150. )
  151. serving_group.add_argument(
  152. "--port",
  153. type=int,
  154. default=8080,
  155. help="Port number to serve on (default: 8080).",
  156. )
  157. # Serving also uses `--pipeline`, `--device`, `--use_hpip`, and `--hpi_config`
  158. ################# paddle2onnx #################
  159. paddle2onnx_group.add_argument(
  160. "--paddle2onnx",
  161. action="store_true",
  162. help="Convert PaddlePaddle model to ONNX format.",
  163. )
  164. paddle2onnx_group.add_argument(
  165. "--paddle_model_dir",
  166. type=str,
  167. help="Directory containing the PaddlePaddle model.",
  168. )
  169. paddle2onnx_group.add_argument(
  170. "--onnx_model_dir",
  171. type=str,
  172. help="Output directory for the ONNX model.",
  173. )
  174. paddle2onnx_group.add_argument(
  175. "--opset_version", type=int, default=7, help="Version of the ONNX opset to use."
  176. )
  177. # Parse known arguments to get the pipeline name
  178. args, remaining_args = parser.parse_known_args()
  179. pipeline = args.pipeline
  180. pipeline_args = []
  181. if (
  182. not (args.install is not None or args.serve or args.paddle2onnx)
  183. and pipeline is not None
  184. ):
  185. if os.path.isfile(pipeline):
  186. pipeline_name = load_pipeline_config(pipeline)["pipeline_name"]
  187. else:
  188. pipeline_name = pipeline
  189. if pipeline_name not in PIPELINE_ARGUMENTS:
  190. support_pipelines = ", ".join(PIPELINE_ARGUMENTS.keys())
  191. logging.error(
  192. f"Unsupported pipeline: {pipeline_name}, CLI predict only supports these pipelines: {support_pipelines}\n"
  193. )
  194. sys.exit(1)
  195. pipeline_args = PIPELINE_ARGUMENTS[pipeline_name]
  196. if pipeline_args is None:
  197. pipeline_args = []
  198. pipeline_specific_group = parser.add_argument_group(
  199. f"{pipeline_name.capitalize()} Pipeline Options"
  200. )
  201. for arg in pipeline_args:
  202. pipeline_specific_group.add_argument(
  203. arg["name"],
  204. type=parse_str if arg["type"] is bool else arg["type"],
  205. help=arg.get("help", f"Argument for {pipeline_name} pipeline."),
  206. )
  207. return parser, pipeline_args
  208. def install(args):
  209. """install paddlex"""
  210. def _install_serving_deps():
  211. try:
  212. install_packages(get_serving_dep_specs())
  213. except Exception:
  214. logging.error("Installation failed", exc_info=True)
  215. sys.exit(1)
  216. logging.info("Successfully installed the serving plugin")
  217. def _install_paddle2onnx_deps():
  218. try:
  219. install_packages(get_paddle2onnx_dep_specs())
  220. except Exception:
  221. logging.error("Installation failed", exc_info=True)
  222. sys.exit(1)
  223. logging.info("Successfully installed the Paddle2ONNX plugin")
  224. def _install_hpi_deps(device_type):
  225. SUPPORTED_DEVICE_TYPES = ["cpu", "gpu", "npu"]
  226. if device_type not in SUPPORTED_DEVICE_TYPES:
  227. logging.error(
  228. "Failed to install the high-performance plugin.\n"
  229. "Supported device types: %s. Your input device type: %s.\n",
  230. SUPPORTED_DEVICE_TYPES,
  231. device_type,
  232. )
  233. sys.exit(2)
  234. hpip_links_file = "hpip_links.html"
  235. if device_type == "gpu":
  236. cuda_version = get_paddle_cuda_version()
  237. if not cuda_version:
  238. sys.exit(
  239. "No CUDA version found. Please make sure you have installed PaddlePaddle with CUDA enabled."
  240. )
  241. if cuda_version[0] == 12:
  242. hpip_links_file = "hpip_links_cu12.html"
  243. elif cuda_version[0] != 11:
  244. sys.exit(
  245. "Currently, only CUDA versions 11.x and 12.x are supported by the high-performance inference plugin."
  246. )
  247. package_mapping = {
  248. "cpu": "ultra-infer-python",
  249. "gpu": "ultra-infer-gpu-python",
  250. "npu": "ultra-infer-npu-python",
  251. }
  252. package = package_mapping[device_type]
  253. other_packages = set(package_mapping.values()) - {package}
  254. for other_package in other_packages:
  255. version = get_dep_version(other_package)
  256. if version is not None:
  257. logging.info(
  258. f"The high-performance inference plugin '{package}' is mutually exclusive with '{other_package}' (version {version} installed). Uninstalling '{other_package}'..."
  259. )
  260. try:
  261. uninstall_packages([other_package])
  262. except Exception:
  263. logging.error("Failed to uninstall packages", exc_info=True)
  264. sys.exit(1)
  265. with importlib.resources.path("paddlex", hpip_links_file) as f:
  266. version = get_dep_version(package)
  267. try:
  268. if version is None:
  269. install_packages(
  270. [package], pip_install_opts=["--find-links", str(f)]
  271. )
  272. else:
  273. response = input(
  274. f"The high-performance inference plugin is already installed (version {repr(version)}). Do you want to reinstall it? (y/n):"
  275. )
  276. if response.lower() in ["y", "yes"]:
  277. uninstall_packages([package])
  278. install_packages(
  279. [package],
  280. pip_install_opts=[
  281. "--find-links",
  282. str(f),
  283. ],
  284. )
  285. else:
  286. return
  287. except Exception:
  288. logging.error("Installation failed", exc_info=True)
  289. sys.exit(1)
  290. logging.info("Successfully installed the high-performance inference plugin")
  291. if not is_paddle2onnx_plugin_available():
  292. logging.info(
  293. "The Paddle2ONNX plugin is not available. It is recommended to run `paddlex --install paddle2onnx` to install the Paddle2ONNX plugin to use the full functionality of high-performance inference."
  294. )
  295. def _install_genai_deps(plugin_types):
  296. fd_plugin_types = []
  297. not_fd_plugin_types = []
  298. for plugin_type in plugin_types:
  299. if "fastdeploy" in plugin_type:
  300. fd_plugin_types.append(plugin_type)
  301. else:
  302. not_fd_plugin_types.append(plugin_type)
  303. if fd_plugin_types:
  304. if not is_dep_available("paddlepaddle"):
  305. sys.exit("Please install PaddlePaddle first.")
  306. import paddle.device
  307. if not paddle.device.is_compiled_with_cuda():
  308. sys.exit("Currently, only the GPU version of FastDeploy is supported.")
  309. cap = paddle.device.cuda.get_device_capability()
  310. if cap in ((8, 0), (9, 0)):
  311. index_url = "https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-gpu-80_90/"
  312. elif cap in ((8, 6), (8, 9)):
  313. index_url = "https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-gpu-86_89/"
  314. else:
  315. sys.exit(
  316. f"The compute capability of the GPU is {cap[0]}.{cap[1]}, which is not supported. The supported compute capabilities are 8.0, 8.6, 8.9, and 9.0."
  317. )
  318. try:
  319. install_packages(
  320. [get_genai_fastdeploy_spec("gpu")],
  321. pip_install_opts=["--extra-index-url", index_url],
  322. )
  323. except Exception:
  324. logging.error("Installation failed", exc_info=True)
  325. sys.exit(1)
  326. reqs = []
  327. for plugin_type in not_fd_plugin_types:
  328. try:
  329. r = get_genai_dep_specs(plugin_type)
  330. except ValueError:
  331. logging.error("Invalid generative AI plugin type: %s", plugin_type)
  332. sys.exit(2)
  333. reqs += r
  334. try:
  335. install_packages(reqs, constraints="required")
  336. except Exception:
  337. logging.error("Installation failed", exc_info=True)
  338. sys.exit(1)
  339. for plugin_type in plugin_types:
  340. if "vllm" in plugin_type or "sglang" in plugin_type:
  341. try:
  342. install_packages(["wheel"], constraints="required")
  343. install_packages(["flash-attn == 2.8.2"], constraints="required")
  344. except Exception:
  345. logging.error("Installation failed", exc_info=True)
  346. sys.exit(1)
  347. break
  348. logging.info(
  349. "Successfully installed the generative AI plugin"
  350. + ("s" if len(plugin_types) > 1 else "")
  351. )
  352. # Enable debug info
  353. os.environ["PADDLE_PDX_DEBUG"] = "True"
  354. # Disable eager initialization
  355. os.environ["PADDLE_PDX_EAGER_INIT"] = "False"
  356. plugins = args.install[:]
  357. if "serving" in plugins:
  358. plugins.remove("serving")
  359. if plugins:
  360. logging.error("`serving` cannot be used together with other plugins.")
  361. sys.exit(2)
  362. _install_serving_deps()
  363. return
  364. if "paddle2onnx" in plugins:
  365. plugins.remove("paddle2onnx")
  366. if plugins:
  367. logging.error("`paddle2onnx` cannot be used together with other plugins.")
  368. sys.exit(2)
  369. _install_paddle2onnx_deps()
  370. return
  371. hpi_plugins = list(filter(lambda name: name.startswith("hpi-"), plugins))
  372. if hpi_plugins:
  373. for p in hpi_plugins:
  374. plugins.remove(p)
  375. if plugins:
  376. logging.error("`hpi-xxx` cannot be used together with other plugins.")
  377. sys.exit(2)
  378. if len(hpi_plugins) > 1 or len(hpi_plugins[0].split("-")) != 2:
  379. logging.error(
  380. "Invalid HPI plugin installation format detected.\n"
  381. "Correct format: paddlex --install hpi-<device_type>\n"
  382. "Example: paddlex --install hpi-gpu"
  383. )
  384. sys.exit(2)
  385. device_type = hpi_plugins[0].split("-")[1]
  386. _install_hpi_deps(device_type=device_type)
  387. return
  388. genai_plugins = list(filter(lambda name: name.startswith("genai-"), plugins))
  389. if genai_plugins:
  390. for p in genai_plugins:
  391. plugins.remove(p)
  392. if plugins:
  393. logging.error("`genai-xxx` cannot be used together with other plugins.")
  394. sys.exit(2)
  395. genai_plugin_types = [p[len("genai-") :] for p in genai_plugins]
  396. _install_genai_deps(genai_plugin_types)
  397. return
  398. all_repo_names = get_all_supported_repo_names()
  399. unknown_plugins = []
  400. for p in plugins:
  401. if p not in all_repo_names:
  402. unknown_plugins.append(p)
  403. if unknown_plugins:
  404. logging.error("Unknown plugins: %s", unknown_plugins)
  405. sys.exit(2)
  406. if plugins:
  407. repo_names = plugins
  408. elif len(plugins) == 0:
  409. repo_names = all_repo_names
  410. setup(
  411. repo_names=repo_names,
  412. no_deps=args.no_deps,
  413. platform=args.platform,
  414. update_repos=args.update_repos,
  415. use_local_repos=args.use_local_repos,
  416. deps_to_replace=args.deps_to_replace,
  417. )
  418. return
  419. def pipeline_predict(
  420. pipeline,
  421. input,
  422. device,
  423. save_path,
  424. use_hpip,
  425. hpi_config,
  426. **pipeline_args,
  427. ):
  428. """pipeline predict"""
  429. pipeline = create_pipeline(
  430. pipeline, device=device, use_hpip=use_hpip, hpi_config=hpi_config
  431. )
  432. result = pipeline.predict(input, **pipeline_args)
  433. for res in result:
  434. res.print()
  435. if save_path:
  436. res.save_all(save_path=save_path)
  437. def serve(pipeline, *, device, use_hpip, hpi_config, host, port):
  438. try:
  439. from .inference.serving.basic_serving import create_pipeline_app, run_server
  440. except RuntimeError:
  441. logging.error("Failed to load the serving module", exc_info=True)
  442. sys.exit(1)
  443. pipeline_config = load_pipeline_config(pipeline)
  444. try:
  445. pipeline = create_pipeline(
  446. config=pipeline_config,
  447. device=device,
  448. use_hpip=use_hpip,
  449. hpi_config=hpi_config,
  450. )
  451. except Exception:
  452. logging.error("Failed to create the pipeline", exc_info=True)
  453. sys.exit(1)
  454. app = create_pipeline_app(pipeline, pipeline_config)
  455. run_server(app, host=host, port=port)
  456. # TODO: Move to another module
  457. def paddle_to_onnx(paddle_model_dir, onnx_model_dir, *, opset_version):
  458. if not is_paddle2onnx_plugin_available():
  459. sys.exit("Please install the Paddle2ONNX plugin first.")
  460. ONNX_MODEL_FILENAME = f"{MODEL_FILE_PREFIX}.onnx"
  461. CONFIG_FILENAME = f"{MODEL_FILE_PREFIX}.yml"
  462. ADDITIONAL_FILENAMES = ["scaler.pkl"]
  463. def _check_input_dir(input_dir):
  464. if input_dir is None:
  465. sys.exit("Input directory must be specified")
  466. if not input_dir.exists():
  467. sys.exit(f"{input_dir} does not exist")
  468. if not input_dir.is_dir():
  469. sys.exit(f"{input_dir} is not a directory")
  470. model_paths = get_model_paths(input_dir)
  471. if "paddle" not in model_paths:
  472. sys.exit("PaddlePaddle model does not exist")
  473. config_path = input_dir / CONFIG_FILENAME
  474. if not config_path.exists():
  475. sys.exit(f"{config_path} does not exist")
  476. def _check_paddle2onnx():
  477. if shutil.which("paddle2onnx") is None:
  478. sys.exit("Paddle2ONNX is not available. Please install the plugin first.")
  479. def _run_paddle2onnx(input_dir, output_dir, opset_version):
  480. model_paths = get_model_paths(input_dir)
  481. logging.info("Paddle2ONNX conversion starting...")
  482. # XXX: To circumvent Paddle2ONNX's bug
  483. cmd = [
  484. "paddle2onnx",
  485. "--model_dir",
  486. str(model_paths["paddle"][0].parent),
  487. "--model_filename",
  488. str(model_paths["paddle"][0].name),
  489. "--params_filename",
  490. str(model_paths["paddle"][1].name),
  491. "--save_file",
  492. str(output_dir / ONNX_MODEL_FILENAME),
  493. "--opset_version",
  494. str(opset_version),
  495. ]
  496. try:
  497. subprocess.check_call(cmd)
  498. except subprocess.CalledProcessError as e:
  499. sys.exit(f"Paddle2ONNX conversion failed with exit code {e.returncode}")
  500. logging.info("Paddle2ONNX conversion succeeded")
  501. def _copy_config_file(input_dir, output_dir):
  502. src_path = input_dir / CONFIG_FILENAME
  503. dst_path = output_dir / CONFIG_FILENAME
  504. shutil.copy(src_path, dst_path)
  505. logging.info(f"Copied {src_path} to {dst_path}")
  506. def _copy_additional_files(input_dir, output_dir):
  507. for filename in ADDITIONAL_FILENAMES:
  508. src_path = input_dir / filename
  509. if not src_path.exists():
  510. continue
  511. dst_path = output_dir / filename
  512. shutil.copy(src_path, dst_path)
  513. logging.info(f"Copied {src_path} to {dst_path}")
  514. if not paddle_model_dir:
  515. sys.exit("PaddlePaddle model directory must be specified")
  516. paddle_model_dir = Path(paddle_model_dir)
  517. if not onnx_model_dir:
  518. onnx_model_dir = paddle_model_dir
  519. onnx_model_dir = Path(onnx_model_dir)
  520. logging.info(f"Input dir: {paddle_model_dir}")
  521. logging.info(f"Output dir: {onnx_model_dir}")
  522. _check_input_dir(paddle_model_dir)
  523. _check_paddle2onnx()
  524. _run_paddle2onnx(paddle_model_dir, onnx_model_dir, opset_version)
  525. if not (onnx_model_dir.exists() and onnx_model_dir.samefile(paddle_model_dir)):
  526. _copy_config_file(paddle_model_dir, onnx_model_dir)
  527. _copy_additional_files(paddle_model_dir, onnx_model_dir)
  528. logging.info("Done")
  529. # for CLI
  530. def main():
  531. """API for command line"""
  532. parser, pipeline_args = args_cfg()
  533. args = parser.parse_args()
  534. if len(sys.argv) == 1:
  535. logging.warning("No arguments provided. Displaying help information:")
  536. parser.print_help()
  537. sys.exit(2)
  538. if args.install is not None:
  539. install(args)
  540. elif args.serve:
  541. serve(
  542. args.pipeline,
  543. device=args.device,
  544. use_hpip=args.use_hpip or None,
  545. hpi_config=args.hpi_config,
  546. host=args.host,
  547. port=args.port,
  548. )
  549. elif args.paddle2onnx:
  550. paddle_to_onnx(
  551. args.paddle_model_dir,
  552. args.onnx_model_dir,
  553. opset_version=args.opset_version,
  554. )
  555. else:
  556. if args.get_pipeline_config is not None:
  557. interactive_get_pipeline(args.get_pipeline_config, args.save_path)
  558. else:
  559. pipeline_args_dict = {}
  560. for arg in pipeline_args:
  561. arg_name = arg["name"].lstrip("-")
  562. if hasattr(args, arg_name):
  563. pipeline_args_dict[arg_name] = getattr(args, arg_name)
  564. else:
  565. logging.warning(f"Argument {arg_name} is missing in args")
  566. try:
  567. pipeline_predict(
  568. args.pipeline,
  569. args.input,
  570. args.device,
  571. args.save_path,
  572. use_hpip=args.use_hpip or None,
  573. hpi_config=args.hpi_config,
  574. **pipeline_args_dict,
  575. )
  576. except Exception:
  577. logging.error("Pipeline prediction failed", exc_info=True)
  578. sys.exit(1)