Parcourir la source

[Feat] Group dependencies and support dependency check at runtime (#3752)

* Fix style

* Fix comment

* Improve

* Align opencv variants

* Fix header comments

* Fix 3D op bugs

* Rename 3d_bev_detection to legal module name m_3d_bev_detection

* Fix bug

* Fix requirements

* Support extras

* Use importlib.metadata

* Remove quick_check

* Allow repos not installing packages

* Fix style rule

* Remove unused code

* Remove unused code

* Always no deps
Lin Manhui il y a 7 mois
Parent
commit
c7f71b63a1

+ 2 - 0
.precommit/check_custom.py

@@ -41,6 +41,8 @@ def check(file_path):
     # Exclude shebang line
     if content.startswith("#!"):
         content = content[content.index("\n") + 1 :]
+        if content.startswith("\n"):
+            content = content[1:]
     if not re.match(LICENSE_TEXT, content):
         print(f"License header missing in {file_path}")
         return False

+ 0 - 1
paddlex/paddle2onnx_requirements.txt

@@ -1 +0,0 @@
-paddle2onnx>=2

+ 12 - 23
paddlex/paddlex_cli.py

@@ -25,7 +25,9 @@ from .constants import MODEL_FILE_PREFIX
 from .inference.pipelines import load_pipeline_config
 from .repo_manager import get_all_supported_repo_names, setup
 from .utils import logging
+from .utils.deps import EXTRAS
 from .utils.flags import FLAGS_json_format_model
+from .utils.install import install_packages
 from .utils.interactive_get_pipeline import interactive_get_pipeline
 from .utils.pipeline_arguments import PIPELINE_ARGUMENTS
 
@@ -213,20 +215,17 @@ def install(args):
     """install paddlex"""
 
     def _install_serving_deps():
-        with importlib.resources.path(
-            "paddlex", "serving_requirements.txt"
-        ) as req_file:
-            return subprocess.check_call(
-                [sys.executable, "-m", "pip", "install", "-r", str(req_file)]
-            )
+        reqs = []
+        for dep_specs in EXTRAS["serving"].values():
+            reqs += dep_specs
+        # Should we sort the requirements?
+        install_packages(reqs)
 
     def _install_paddle2onnx_deps():
-        with importlib.resources.path(
-            "paddlex", "paddle2onnx_requirements.txt"
-        ) as req_file:
-            return subprocess.check_call(
-                [sys.executable, "-m", "pip", "install", "-r", str(req_file)]
-            )
+        reqs = []
+        for dep_specs in EXTRAS["paddle2onnx"].values():
+            reqs += dep_specs
+        install_packages(reqs)
 
     def _install_hpi_deps(device_type):
         supported_device_types = ["cpu", "gpu", "npu"]
@@ -248,17 +247,7 @@ def install(args):
             packages = ["ultra-infer-npu-python"]
 
         with importlib.resources.path("paddlex", "hpip_links.html") as f:
-            return subprocess.check_call(
-                [
-                    sys.executable,
-                    "-m",
-                    "pip",
-                    "install",
-                    "--find-links",
-                    str(f),
-                    *packages,
-                ]
-            )
+            install_packages(packages, ["--find-links", str(f)])
 
     # Enable debug info
     os.environ["PADDLE_PDX_DEBUG"] = "True"

+ 1 - 8
paddlex/repo_manager/__init__.py

@@ -13,12 +13,5 @@
 # limitations under the License.
 
 
-from .core import (
-    get_versions,
-    initialize,
-    is_initialized,
-    set_parent_dirs,
-    setup,
-    wheel,
-)
+from .core import get_versions, initialize, is_initialized, set_parent_dirs, setup
 from .meta import get_all_repo_names as get_all_supported_repo_names

+ 0 - 22
paddlex/repo_manager/core.py

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import sys
 from collections import OrderedDict
 
@@ -27,7 +26,6 @@ from .repo import (
 __all__ = [
     "set_parent_dirs",
     "setup",
-    "wheel",
     "is_initialized",
     "initialize",
     "get_versions",
@@ -210,26 +208,6 @@ def setup(
     logging.info("All packages are installed.")
 
 
-def wheel(repo_names, dst_dir="./", fail_fast=False):
-    """wheel"""
-    for repo_name in repo_names:
-        repo = _GlobalContext.build_repo_instance(repo_name)
-        logging.info(f"Now building Wheel for {repo_name}...")
-        try:
-            tgt_dir = os.path.join(dst_dir, repo.pkg_name)
-            if os.path.exists(tgt_dir):
-                raise FileExistsError(f"{tgt_dir} already exists.")
-            repo.wheel(tgt_dir)
-        except Exception as e:
-            logging.warning(
-                f"Failed to build wheel for {repo_name}. We encountered the following error:\n  {str(e)}\n"
-            )
-            if fail_fast:
-                raise
-        else:
-            logging.info(f"Wheel for {repo_name} is built.\n")
-
-
 def initialize(repo_names=None):
     """initialize"""
     if _GlobalContext.is_initialized():

+ 25 - 19
paddlex/repo_manager/meta.py

@@ -34,19 +34,21 @@ REPO_META = {
         "git_path": "/PaddlePaddle/PaddleSeg.git",
         "platform": "github",
         "branch": "develop",
-        "pkg_name": "paddleseg",
         "lib_name": "paddleseg",
+        "install_pkg": True,
+        "pkg_name": "paddleseg",
         "pdx_pkg_name": "PaddleSeg_api",
         "editable": False,
-        "extra_req_files": ["Matting/requirements.txt"],
+        "extra_pkgs": ["Matting/requirements.txt"],
         "path_env": "PADDLE_PDX_PADDLESEG_PATH",
     },
     "PaddleClas": {
         "git_path": "/PaddlePaddle/PaddleClas.git",
         "platform": "github",
         "branch": "develop",
-        "pkg_name": "paddleclas",
         "lib_name": "paddleclas",
+        "install_pkg": True,
+        "pkg_name": "paddleclas",
         "pdx_pkg_name": "PaddleClas_api",
         # PaddleClas must be installed in non-editable mode, otherwise it throws
         # an Import error.
@@ -57,8 +59,9 @@ REPO_META = {
         "git_path": "/PaddlePaddle/PaddleDetection.git",
         "platform": "github",
         "branch": "develop",
-        "pkg_name": "paddledet",
         "lib_name": "ppdet",
+        "install_pkg": True,
+        "pkg_name": "paddledet",
         "pdx_pkg_name": "PaddleDetection_api",
         "editable": False,
         "path_env": "PADDLE_PDX_PADDLEDETECTION_PATH",
@@ -67,11 +70,10 @@ REPO_META = {
         "git_path": "/PaddlePaddle/PaddleOCR.git",
         "platform": "github",
         "branch": "main",
-        "pkg_name": "paddleocr",
         "lib_name": "paddleocr",
+        "install_pkg": False,
         "pdx_pkg_name": "PaddleOCR_api",
-        "editable": False,
-        "extra_req_files": [
+        "extra_pkgs": [
             "ppstructure/kie/requirements.txt",
             "docs/algorithm/formula_recognition/requirements.txt",
         ],
@@ -82,8 +84,9 @@ REPO_META = {
         "git_path": "/PaddlePaddle/PaddleTS.git",
         "platform": "github",
         "branch": "release_v1.1",
-        "pkg_name": "paddlets",
         "lib_name": "paddlets",
+        "install_pkg": True,
+        "pkg_name": "paddlets",
         "pdx_pkg_name": "PaddleTS_api",
         "editable": False,
         "path_env": "PADDLE_PDX_PADDLETS_PATH",
@@ -93,8 +96,9 @@ REPO_META = {
         "git_path": "/PaddlePaddle/PaddleNLP.git",
         "platform": "github",
         "branch": "release/2.9",
-        "pkg_name": "paddlenlp",
         "lib_name": "paddlenlp",
+        "install_pkg": True,
+        "pkg_name": "paddlenlp",
         "pdx_pkg_name": "PaddleNLP_api",
         "editable": False,
         "path_env": "PADDLE_PDX_PADDLENLP_PATH",
@@ -103,8 +107,9 @@ REPO_META = {
         "git_path": "/PaddlePaddle/PaddleSpeech.git",
         "platform": "github",
         "branch": "develop",
-        "pkg_name": "paddlespeech",
         "lib_name": "paddlespeech",
+        "install_pkg": True,
+        "pkg_name": "paddlespeech",
         "pdx_pkg_name": "PaddleSpeech_api",
         "editable": False,
         "path_env": "PADDLE_PDX_PADDLESPEECH_PATH",
@@ -114,8 +119,9 @@ REPO_META = {
         "git_path": "/PaddlePaddle/PARL.git",
         "platform": "github",
         "branch": "develop",
-        "pkg_name": "parl",
         "lib_name": "parl",
+        "install_pkg": True,
+        "pkg_name": "parl",
         "pdx_pkg_name": "PARL_api",
         "editable": False,
         "path_env": "PADDLE_PDX_PARL_PATH",
@@ -124,11 +130,11 @@ REPO_META = {
         "git_path": "/PaddlePaddle/PaddleMIX.git",
         "platform": "github",
         "branch": "develop",
-        "pkg_name": "paddlemix",
         "lib_name": "paddlemix",
+        "pkg_name": "paddlemix",
         "pdx_pkg_name": "PaddleMIX_api",
         "editable": True,
-        "extra_editable": "ppdiffusers",
+        "extra_pkgs": [("ppdiffusers", "ppdiffusers", None, True)],
         "path_env": "PADDLE_PDX_PADDLEMIX_PATH",
         "requires": ["PaddleNLP"],
     },
@@ -136,25 +142,25 @@ REPO_META = {
         "git_path": "/PaddlePaddle/Paddle3D.git",
         "platform": "github",
         "branch": "develop",
-        "pkg_name": "paddle3d",
         "lib_name": "paddle3d",
+        "install_pkg": True,
+        "pkg_name": "paddle3d",
         "pdx_pkg_name": "Paddle3D_api",
         "editable": False,
         "path_env": "PADDLE_PDX_PADDLE3D_PATH",
         "requires": ["PaddleSeg", "PaddleDetection"],
-        "main_req_file": "requirements_pdx.txt",
+        "main_reqs_file": "requirements_pdx.txt",
     },
     "PaddleVideo": {
         "git_path": "/PaddlePaddle/PaddleVideo.git",
         "platform": "github",
         "branch": "develop",
-        "pkg_name": "paddlevideo",
         "lib_name": "ppvideo",
+        "install_pkg": True,
+        "pkg_name": "paddlevideo",
         "pdx_pkg_name": "PaddleVideo_api",
         "editable": False,
-        "extra_req_files": [
-            "requirements_paddlex.txt",
-        ],
+        "main_reqs_file": "requirements_paddlex.txt",
         "path_env": "PADDLE_PDX_PADDLEVIDEO_PATH",
     },
 }

+ 102 - 143
paddlex/repo_manager/repo.py

@@ -18,23 +18,25 @@ import os.path as osp
 import shutil
 import tempfile
 
+from packaging.requirements import Requirement
+
 from ..utils import logging
 from ..utils.download import download_and_extract
 from ..utils.file_interface import custom_open
+from ..utils.install import (
+    install_packages,
+    install_packages_from_requirements_file,
+    uninstall_packages,
+)
 from .meta import REPO_DOWNLOAD_BASE, get_repo_meta
 from .utils import (
-    build_wheel_using_pip,
-    check_installation_using_pip,
-    env_marker_ast2expr,
+    check_package_installation,
     fetch_repo_using_git,
     install_external_deps,
-    install_packages_using_pip,
     mute,
     remove_repo_using_rm,
     reset_repo_using_git,
     switch_working_dir,
-    to_dep_spec_pep508,
-    uninstall_package_using_pip,
 )
 
 __all__ = ["build_repo_instance", "build_repo_group_installer"]
@@ -72,16 +74,16 @@ class PPRepository(object):
 
         self.meta = get_repo_meta(self.name)
         self.git_path = self.meta["git_path"]
-        self.pkg_name = self.meta["pkg_name"]
         self.lib_name = self.meta["lib_name"]
+        self.pkg_name = self.meta.get("pkg_name", None)
         self.pdx_mod_name = (
             pdx_collection_mod.__name__ + "." + self.meta["pdx_pkg_name"]
         )
-        self.main_req_file = self.meta.get("main_req_file", "requirements.txt")
+        self.main_reqs_file = self.meta.get("main_reqs_file", "requirements.txt")
 
     def initialize(self):
         """initialize"""
-        if not self.check_installation(quick_check=True):
+        if not self.check_installation():
             return False
         if "path_env" in self.meta:
             # Set env var
@@ -91,14 +93,9 @@ class PPRepository(object):
         self.get_pdx()
         return True
 
-    def check_installation(self, quick_check=False):
+    def check_installation(self):
         """check_installation"""
-        if quick_check:
-            lib = self._get_lib(load=False)
-            return lib is not None
-        else:
-            # TODO: Also check if correct dependencies are installed.
-            return check_installation_using_pip(self.pkg_name)
+        return osp.exists(osp.join(self.root_dir, ".installed"))
 
     def replace_repo_deps(self, deps_to_replace, src_requirements):
         """replace_repo_deps"""
@@ -122,58 +119,59 @@ class PPRepository(object):
         with open(src_requirements, "w") as file:
             file.writelines([l + "\n" for l in existing_deps])
 
-    def check_repo_exiting(self, quick_check=False):
+    def check_repo_exiting(self):
         """check_repo_exiting"""
-        return os.path.exists(os.path.join(self.root_dir, ".git"))
-
-    def install(self, *args, **kwargs):
-        """install"""
-        return RepositoryGroupInstaller([self]).install(*args, **kwargs)
-
-    def uninstall(self, *args, **kwargs):
-        """uninstall"""
-        return RepositoryGroupInstaller([self]).uninstall(*args, **kwargs)
-
-    def install_deps(self, *args, **kwargs):
-        """install_deps"""
-        return RepositoryGroupInstaller([self]).install_deps(*args, **kwargs)
-
-    def install_package(self, no_deps=False, clean=True, install_extra_only=False):
-        """install_package"""
-        editable = self.meta.get("editable", True)
-        extra_editable = self.meta.get("extra_editable", None)
-        if editable:
-            logging.warning(f"{self.pkg_name} will be installed in editable mode.")
-        with switch_working_dir(self.root_dir):
-            if install_extra_only:
-                src_requirements = os.path.join(self.root_dir, "requirements.txt")
-                paddlex_requirements = os.path.join(
-                    self.root_dir, "requirements_paddlex.txt"
-                )
-                shutil.copy(paddlex_requirements, src_requirements)
-            try:
-                install_packages_using_pip(["."], editable=editable, no_deps=no_deps)
-                install_external_deps(self.name, self.root_dir)
-            finally:
-                if clean:
-                    # Clean build artifacts
-                    tmp_build_dir = os.path.join(self.root_dir, "build")
-                    if os.path.exists(tmp_build_dir):
-                        shutil.rmtree(tmp_build_dir)
-        if extra_editable:
-            with switch_working_dir(os.path.join(self.root_dir, extra_editable)):
+        return osp.exists(osp.join(self.root_dir, ".git"))
+
+    def install_packages(self, clean=True):
+        """install_packages"""
+        if self.meta["install_pkg"]:
+            editable = self.meta.get("editable", True)
+            if editable:
+                logging.warning(f"{self.pkg_name} will be installed in editable mode.")
+            with switch_working_dir(self.root_dir):
                 try:
-                    install_packages_using_pip(["."], editable=True, no_deps=no_deps)
+                    pip_install_opts = ["--no-deps"]
+                    if editable:
+                        pip_install_opts.append("-e")
+                    install_packages(["."], pip_install_opts=pip_install_opts)
+                    install_external_deps(self.name, self.root_dir)
                 finally:
                     if clean:
                         # Clean build artifacts
-                        tmp_build_dir = os.path.join(self.root_dir, "build")
-                        if os.path.exists(tmp_build_dir):
+                        tmp_build_dir = "build"
+                        if osp.exists(tmp_build_dir):
                             shutil.rmtree(tmp_build_dir)
-
-    def uninstall_package(self):
-        """uninstall_package"""
-        uninstall_package_using_pip(self.pkg_name)
+        for e in self.meta.get("extra", []):
+            if isinstance(e, tuple):
+                with switch_working_dir(osp.join(self.root_dir, e[0])):
+                    try:
+                        pip_install_opts = ["--no-deps"]
+                        if e[3]:
+                            pip_install_opts.append("-e")
+                        install_packages(["."], pip_install_opts=pip_install_opts)
+                    finally:
+                        if clean:
+                            tmp_build_dir = "build"
+                            if osp.exists(tmp_build_dir):
+                                shutil.rmtree(tmp_build_dir)
+
+    def uninstall_packages(self):
+        """uninstall_packages"""
+        pkgs = []
+        if self.install_pkg:
+            pkgs.append(self.pkg_name)
+        for e in self.meta.get("extra", []):
+            if isinstance(e, tuple):
+                pkgs.append(e[1])
+        uninstall_packages(pkgs)
+
+    def mark_installed(self):
+        with open(osp.join(self.root_dir, ".installed"), "wb"):
+            pass
+
+    def mark_uninstalled(self):
+        os.unlink(osp.join(self.root_dir, ".installed"))
 
     def download(self):
         """download from remote"""
@@ -200,61 +198,36 @@ class PPRepository(object):
                     f"Update {self.name} from {git_url} failed, check your network connection. Error:\n{e}"
                 )
 
-    def wheel(self, dst_dir):
-        """wheel"""
-        with tempfile.TemporaryDirectory() as td:
-            tmp_repo_dir = osp.join(td, self.name)
-            tmp_dst_dir = osp.join(td, "dist")
-            shutil.copytree(self.root_dir, tmp_repo_dir, symlinks=False)
-
-            # NOTE: Installation of the repo relies on `self.main_req_file` in root directory
-            # Thus, we overwrite the content of it.
-            main_req_file_path = osp.join(tmp_repo_dir, self.main_req_file)
-            deps_str = self.get_deps()
-            with open(main_req_file_path, "w", encoding="utf-8") as f:
-                f.write(deps_str)
-            install_packages_using_pip([], req_files=[main_req_file_path])
-            with switch_working_dir(tmp_repo_dir):
-                build_wheel_using_pip(".", tmp_dst_dir)
-            shutil.copytree(tmp_dst_dir, dst_dir)
-
-    def _get_lib(self, load=True):
+    def _get_lib(self):
         """_get_lib"""
         import importlib.util
 
         importlib.invalidate_caches()
-        if load:
-            try:
-                with mute():
-                    return importlib.import_module(self.lib_name)
-            except ImportError:
-                return None
-        else:
-            spec = importlib.util.find_spec(self.lib_name)
-            if spec is not None and not osp.exists(spec.origin):
-                return None
-            else:
-                return spec
+        try:
+            with mute():
+                return importlib.import_module(self.lib_name)
+        except ImportError:
+            return None
 
     def get_pdx(self):
         """get_pdx"""
         return importlib.import_module(self.pdx_mod_name)
 
-    def get_deps(self, install_extra_only=False, deps_to_replace=None):
+    def get_deps(self, deps_to_replace=None):
         """get_deps"""
         # Merge requirement files
-        if install_extra_only:
-            req_list = []
-        else:
-            req_list = [self.main_req_file]
-        req_list.extend(self.meta.get("extra_req_files", []))
+        req_list = [self.main_reqs_file]
+        for e in self.meta.get("extra", []):
+            if isinstance(e, tuple):
+                e = e[2] or osp.join(e[0], "requirements.txt")
+            req_list.append(e)
         if deps_to_replace is not None:
             deps_dict = {}
             for dep in deps_to_replace:
                 part, version = dep.split("=")
                 repo_name, dep_name = part.split(".")
                 deps_dict[repo_name] = {dep_name: version}
-            src_requirements = os.path.join(self.root_dir, "requirements.txt")
+            src_requirements = osp.join(self.root_dir, "requirements.txt")
             if self.name in deps_dict:
                 self.replace_repo_deps(deps_dict[self.name], src_requirements)
         deps = []
@@ -317,13 +290,8 @@ class RepositoryGroupInstaller(object):
         # failure of one repo package aborts the entire installation process.
         for ins_flag, repo in zip(ins_flags, repos):
             if ins_flag:
-                if repo.name in ["PaddleVideo"]:
-                    repo.install_package(
-                        no_deps=True,
-                        install_extra_only=True,
-                    )
-                else:
-                    repo.install_package(no_deps=True)
+                repo.install_packages()
+                repo.mark_installed()
 
     def uninstall(self):
         """uninstall"""
@@ -332,19 +300,15 @@ class RepositoryGroupInstaller(object):
         for repo in repos:
             if repo.check_installation():
                 # NOTE: Dependencies are not uninstalled.
-                repo.uninstall_package()
+                repo.uninstall_packages()
+                repo.mark_uninstalled()
 
     def get_deps(self, deps_to_replace=None):
         """get_deps"""
         deps_list = []
         repos = self._sort_repos(self.repos, check_missing=True)
         for repo in repos:
-            if repo.name in ["PaddleVideo"]:
-                deps = repo.get_deps(
-                    install_extra_only=True, deps_to_replace=deps_to_replace
-                )
-            else:
-                deps = repo.get_deps(deps_to_replace=deps_to_replace)
+            deps = repo.get_deps(deps_to_replace=deps_to_replace)
             deps = self._normalize_deps(deps, headline=f"# {repo.name} dependencies")
             deps_list.append(deps)
         # Add an extra new line to separate dependencies of different repos.
@@ -354,17 +318,23 @@ class RepositoryGroupInstaller(object):
         """install_deps"""
         deps_str = self.get_deps(deps_to_replace=deps_to_replace)
         with tempfile.TemporaryDirectory() as td:
-            req_file = os.path.join(td, "requirements.txt")
+            req_file = osp.join(td, "requirements.txt")
             with open(req_file, "w", encoding="utf-8") as fr:
                 fr.write(deps_str)
             if constraints is not None:
-                cons_file = os.path.join(td, "constraints.txt")
+                cons_file = osp.join(td, "constraints.txt")
                 with open(cons_file, "w", encoding="utf-8") as fc:
                     fc.write(constraints)
                 cons_files = [cons_file]
             else:
                 cons_files = []
-            install_packages_using_pip([], req_files=[req_file], cons_files=cons_files)
+            pip_install_opts = []
+            for f in cons_files:
+                pip_install_opts.append("-c")
+                pip_install_opts.append(f)
+            install_packages_from_requirements_file(
+                req_file, pip_install_opts=pip_install_opts
+            )
 
     def _sort_repos(self, repos, check_missing=False):
         # We sort the repos to ensure that the dependencies precede the
@@ -400,41 +370,30 @@ class RepositoryGroupInstaller(object):
 
     def _normalize_deps(self, deps, headline=None):
         repo_pkgs = set(repo.pkg_name for repo in self.repos)
-        normed_lines = []
+        lines = []
         if headline is not None:
-            normed_lines.append(headline)
+            lines.append(headline)
         for line in deps.splitlines():
             line_s = line.strip()
-            if len(line_s) == 0 or line_s.startswith("#"):
+            if not line_s:
+                continue
+            pos = line_s.find("#")
+            if pos == 0:
                 continue
-            # If `line` is not a comment, it must be a requirement specifier.
+            elif pos > 0:
+                line_s = line_s[:pos]
+            # If `line` is not an empty line or a comment, it must be a requirement specifier.
             # Other forms may cause a parse error.
-            n, e, v, m = to_dep_spec_pep508(line_s)
-            if isinstance(v, str):
-                raise RuntimeError("Currently, URL based lookup is not supported.")
-            if n in repo_pkgs:
+            req = Requirement(line_s)
+            if req.name in repo_pkgs:
                 # Skip repo packages
                 continue
-            elif check_installation_using_pip(n):
+            elif check_package_installation(req.name):
                 continue
             else:
-                line_n = [n]
-                fe = f"[{','.join(e)}]" if e else ""
-                if fe:
-                    line_n.append(fe)
-                fv = []
-                for tup in v:
-                    fv.append(" ".join(tup))
-                fv = ", ".join(fv) if fv else ""
-                if fv:
-                    line_n.append(fv)
-                if m is not None:
-                    fm = f"; {env_marker_ast2expr(m)}"
-                    line_n.append(fm)
-                line_n = " ".join(line_n)
-                normed_lines.append(line_n)
-
-        return "\n".join(normed_lines)
+                lines.append(line_s)
+
+        return "\n".join(lines)
 
 
 class RepositoryGroupGetter(object):

+ 7 - 198
paddlex/repo_manager/utils.py

@@ -13,14 +13,13 @@
 # limitations under the License.
 
 import contextlib
-import json
+import importlib.metadata
 import os
 import platform
 import subprocess
 import sys
 
 import lazy_paddle as paddle
-from parsley import makeGrammar
 
 from ..utils import logging
 from ..utils.env import get_device_type
@@ -32,10 +31,6 @@ def _check_call(*args, **kwargs):
     return subprocess.check_call(*args, **kwargs)
 
 
-def _check_output(*args, **kwargs):
-    return subprocess.check_output(*args, **kwargs)
-
-
 def _compare_version(version1, version2):
     import re
 
@@ -65,40 +60,12 @@ def _compare_version(version1, version2):
     return 0
 
 
-def check_installation_using_pip(pkg):
-    """check_installation_using_pip"""
-    out = _check_output(["pip", "list", "--format", "json"])
-    out = out.rstrip()
-    lst = json.loads(out)
-    return any(ele["name"] == pkg for ele in lst)
-
-
-def uninstall_package_using_pip(pkg):
-    """uninstall_package_using_pip"""
-    return _check_call([sys.executable, "-m", "pip", "uninstall", "-y", pkg])
-
-
-def install_packages_using_pip(
-    pkgs, editable=False, req_files=None, cons_files=None, no_deps=False, pip_flags=None
-):
-    """install_packages_using_pip"""
-    args = [sys.executable, "-m", "pip", "install"]
-    if editable:
-        args.append("-e")
-    if req_files is not None:
-        for req_file in req_files:
-            args.append("-r")
-            args.append(req_file)
-    if cons_files is not None:
-        for cons_file in cons_files:
-            args.append("-c")
-            args.append(cons_file)
-    if isinstance(pkgs, str):
-        pkgs = [pkgs]
-    args.extend(pkgs)
-    if pip_flags is not None:
-        args.extend(pip_flags)
-    return _check_call(args)
+def check_package_installation(package):
+    try:
+        importlib.metadata.distribution(package)
+    except importlib.metadata.PackageNotFoundError:
+        return False
+    return True
 
 
 def install_external_deps(repo_name, repo_root):
@@ -191,161 +158,3 @@ def switch_working_dir(new_wd):
         yield
     finally:
         os.chdir(cwd)
-
-
-def _build_dep_spec_pep508_grammar():
-    # Refer to https://peps.python.org/pep-0508/
-    grammar = """
-        wsp           = ' ' | '\t'
-        version_cmp   = wsp* <'<=' | '<' | '!=' | '==' | '>=' | '>' | '~=' | '==='>
-        version       = wsp* <(letterOrDigit | '-' | '_' | '.' | '*' | '+' | '!')+>
-        version_one   = version_cmp:op version:v wsp* -> (op, v)
-        version_many  = version_one:v1 (wsp* ',' version_one)*:v2 -> [v1] + v2
-        versionspec   = ('(' version_many:v ')' ->v) | version_many
-        urlspec       = '@' wsp* <uri_reference>
-        marker_op     = version_cmp | (wsp* 'in') | (wsp* 'not' wsp+ 'in')
-        python_str_c  = (wsp | letter | digit | '(' | ')' | '.' | '{' | '}' |
-                        '-' | '_' | '*' | '#' | ':' | ';' | ',' | '/' | '?' |
-                        '[' | ']' | '!' | '~' | '`' | '@' | '$' | '%' | '^' |
-                        '&' | '=' | '+' | '|' | '<' | '>' )
-        dquote        = '"'
-        squote        = '\\''
-        comment       = '#' <anything*>:s end -> s
-        python_str    = (squote <(python_str_c | dquote)*>:s squote |
-                        dquote <(python_str_c | squote)*>:s dquote) -> s
-        env_var       = ('python_version' | 'python_full_version' |
-                        'os_name' | 'sys_platform' | 'platform_release' |
-                        'platform_system' | 'platform_version' |
-                        'platform_machine' | 'platform_python_implementation' |
-                        'implementation_name' | 'implementation_version' |
-                        'extra' # ONLY when defined by a containing layer
-                        )
-        marker_var    = wsp* (env_var | python_str)
-        marker_expr   = marker_var:l marker_op:o marker_var:r -> (o, l, r)
-                    | wsp* '(' marker:m wsp* ')' -> m
-        marker_and    = marker_expr:l wsp* 'and' marker_expr:r -> ('and', l, r)
-                    | marker_expr:m -> m
-        marker_or     = marker_and:l wsp* 'or' marker_and:r -> ('or', l, r)
-                        | marker_and:m -> m
-        marker        = marker_or
-        quoted_marker = ';' wsp* marker
-        identifier_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit)
-        identifier    = <letterOrDigit identifier_end* >
-        name          = identifier
-        extras_list   = identifier:i (wsp* ',' wsp* identifier)*:ids -> [i] + ids
-        extras        = '[' wsp* extras_list?:e wsp* ']' -> e
-        name_req      = (name:n wsp* extras?:e wsp* versionspec?:v wsp* quoted_marker?:m
-                        -> (n, e or [], v or [], m))
-        url_req       = (name:n wsp* extras?:e wsp* urlspec:v (wsp+ | end) quoted_marker?:m
-                        -> (n, e or [], v, m))
-        specification = wsp* (url_req | name_req):s wsp* comment? -> s
-        # The result is a tuple - name, list-of-extras,
-        # list-of-version-constraints-or-a-url, marker-ast or None
-
-
-        uri_reference = <uri | relative_ref>
-        uri           = scheme ':' hier_part ('?' query )? ('#' fragment)?
-        hier_part     = ('//' authority path_abempty) | path_absolute | path_rootless | path_empty
-        absolute_uri  = scheme ':' hier_part ('?' query )?
-        relative_ref  = relative_part ('?' query )? ('#' fragment )?
-        relative_part = '//' authority path_abempty | path_absolute | path_noscheme | path_empty
-        scheme        = letter (letter | digit | '+' | '-' | '.')*
-        authority     = (userinfo '@' )? host (':' port )?
-        userinfo      = (unreserved | pct_encoded | sub_delims | ':')*
-        host          = ip_literal | ipv4_address | reg_name
-        port          = digit*
-        ip_literal    = '[' (ipv6_address | ipvfuture) ']'
-        ipvfuture     = 'v' hexdig+ '.' (unreserved | sub_delims | ':')+
-        ipv6_address   = (
-                        (h16 ':'){6} ls32
-                        | '::' (h16 ':'){5} ls32
-                        | (h16 )?  '::' (h16 ':'){4} ls32
-                        | ((h16 ':')? h16 )? '::' (h16 ':'){3} ls32
-                        | ((h16 ':'){0,2} h16 )? '::' (h16 ':'){2} ls32
-                        | ((h16 ':'){0,3} h16 )? '::' h16 ':' ls32
-                        | ((h16 ':'){0,4} h16 )? '::' ls32
-                        | ((h16 ':'){0,5} h16 )? '::' h16
-                        | ((h16 ':'){0,6} h16 )? '::' )
-        h16           = hexdig{1,4}
-        ls32          = (h16 ':' h16) | ipv4_address
-        ipv4_address   = dec_octet '.' dec_octet '.' dec_octet '.' dec_octet
-        nz            = ~'0' digit
-        dec_octet     = (
-                        digit # 0-9
-                        | nz digit # 10-99
-                        | '1' digit{2} # 100-199
-                        | '2' ('0' | '1' | '2' | '3' | '4') digit # 200-249
-                        | '25' ('0' | '1' | '2' | '3' | '4' | '5') )# %250-255
-        reg_name = (unreserved | pct_encoded | sub_delims)*
-        path = (
-                path_abempty # begins with '/' or is empty
-                | path_absolute # begins with '/' but not '//'
-                | path_noscheme # begins with a non-colon segment
-                | path_rootless # begins with a segment
-                | path_empty ) # zero characters
-        path_abempty  = ('/' segment)*
-        path_absolute = '/' (segment_nz ('/' segment)* )?
-        path_noscheme = segment_nz_nc ('/' segment)*
-        path_rootless = segment_nz ('/' segment)*
-        path_empty    = pchar{0}
-        segment       = pchar*
-        segment_nz    = pchar+
-        segment_nz_nc = (unreserved | pct_encoded | sub_delims | '@')+
-                        # non-zero-length segment without any colon ':'
-        pchar         = unreserved | pct_encoded | sub_delims | ':' | '@'
-        query         = (pchar | '/' | '?')*
-        fragment      = (pchar | '/' | '?')*
-        pct_encoded   = '%' hexdig
-        unreserved    = letter | digit | '-' | '.' | '_' | '~'
-        reserved      = gen_delims | sub_delims
-        gen_delims    = ':' | '/' | '?' | '#' | '(' | ')?' | '@'
-        sub_delims    = '!' | '$' | '&' | '\\'' | '(' | ')' | '*' | '+' | ',' | ';' | '='
-        hexdig        = digit | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D' | 'e' | 'E' | 'f' | 'F'
-    """
-
-    compiled = makeGrammar(grammar, {})
-    return compiled
-
-
-_pep508_grammar = None
-
-
-def to_dep_spec_pep508(s):
-    """to_dep_spec_pep508"""
-    global _pep508_grammar
-    if _pep508_grammar is None:
-        _pep508_grammar = _build_dep_spec_pep508_grammar()
-    parsed = _pep508_grammar(s)
-    return parsed.specification()
-
-
-def env_marker_ast2expr(marker_ast):
-    """env_marker_ast2expr"""
-    MARKER_VARS = (
-        "python_version",
-        "python_full_version",
-        "os_name",
-        "sys_platform",
-        "platform_release",
-        "platform_system",
-        "platform_version",
-        "platform_machine",
-        "platform_python_implementation",
-        "implementation_name",
-        "implementation_version",
-        "extra",  # ONLY when defined by a containing layer
-    )
-    o, l, r = marker_ast
-    if isinstance(l, tuple):
-        l = env_marker_ast2expr(l)
-    else:
-        assert isinstance(l, str)
-        if l not in MARKER_VARS:
-            l = repr(l)
-    if isinstance(r, tuple):
-        r = env_marker_ast2expr(r)
-    else:
-        assert isinstance(r, str)
-        if r not in MARKER_VARS:
-            r = repr(r)
-    return f"{l} {o} {r}"

+ 0 - 7
paddlex/serving_requirements.txt

@@ -1,7 +0,0 @@
-aiohttp>=3.9
-bce-python-sdk>=0.9
-fastapi>=0.110
-filetype>=1.2
-starlette>=0.36
-uvicorn>=0.16
-yarl>=1.9

+ 60 - 0
paddlex/utils/deps.py

@@ -0,0 +1,60 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.metadata
+import re
+from collections import defaultdict
+
+from packaging.requirements import Requirement
+
+_EXTRA_PATTERN = re.compile(
+    r"(?:;|and)*[ \t]*extra[ \t]*==[ \t]*['\"]([a-z0-9]+(?:-[a-z0-9]+)*)['\"]"
+)
+_EXTRA_NAMES_TO_EXCLUDE = {"base", "plugins"}
+
+
+def _get_extra_name_and_remove_extra_marker(dep_spec):
+    # XXX: Not sure if this is correct
+    m = _EXTRA_PATTERN.search(dep_spec)
+    if m:
+        return m.group(1), dep_spec[: m.start()] + dep_spec[m.end() :]
+    else:
+        return None, dep_spec
+
+
+def get_package_version(package_name):
+    try:
+        return importlib.metadata.version(package_name)
+    except importlib.metadata.PackageNotFoundError:
+        return None
+
+
+def get_extras():
+    metadata = importlib.metadata.metadata("paddlex")
+    extras = {}
+    # XXX: The `metadata.get_all` used here is not well documented.
+    for name in metadata.get_all("Provides-Extra", []):
+        if name not in _EXTRA_NAMES_TO_EXCLUDE:
+            extras[name] = defaultdict(list)
+    for dep_spec in importlib.metadata.requires("paddlex"):
+        extra_name, dep_spec = _get_extra_name_and_remove_extra_marker(dep_spec)
+        if extra_name is not None and extra_name not in _EXTRA_NAMES_TO_EXCLUDE:
+            dep_spec = dep_spec.rstrip()
+            req = Requirement(dep_spec)
+            assert extra_name in extras, extra_name
+            extras[extra_name][req.name].append(dep_spec)
+    return extras
+
+
+EXTRAS = get_extras()

+ 65 - 0
paddlex/utils/install.py

@@ -0,0 +1,65 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+import sys
+import tempfile
+
+
+def install_packages_from_requirements_file(
+    requirements_file_path, pip_install_opts=None
+):
+    # TODO: Constraints can be applied here to ensure a safe installation.
+    # For example, it is best to prevent installing a different version of a
+    # distribution for an already loaded package, as that could lead to
+    # problems.
+    return subprocess.check_call(
+        [
+            sys.executable,
+            "-m",
+            "pip",
+            "install",
+            *(pip_install_opts or []),
+            "-r",
+            requirements_file_path,
+        ]
+    )
+
+
+def install_packages(requirements, pip_install_opts=None):
+    with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as f:
+        for req in requirements:
+            f.write(req + "\n")
+        reqs_file_path = f.name
+    try:
+        return install_packages_from_requirements_file(
+            reqs_file_path, pip_install_opts=pip_install_opts
+        )
+    finally:
+        os.unlink(reqs_file_path)
+
+
+def uninstall_packages(pkgs, pip_uninstall_opts=None):
+    return subprocess.check_call(
+        [
+            sys.executable,
+            "-m",
+            "pip",
+            "uninstall",
+            "-y",
+            *(pip_uninstall_opts or []),
+            *pkgs,
+        ]
+    )

+ 0 - 54
requirements.txt

@@ -1,54 +0,0 @@
-prettytable # only for benchmark
-py-cpuinfo # LaTeX_OCR_rec only support MKLDNN on CPU
-imagesize
-colorlog
-PyYAML==6.0.2
-filelock
-ftfy
-ruamel.yaml
-chardet
-numpy==1.24.4; python_version<"3.12"
-numpy==1.26.4; python_version>="3.12"
-matplotlib
-opencv-python==4.5.5.64; platform_system == "Windows"
-opencv-python-headless==4.10.0.84; platform_system == "Windows"
-opencv-contrib-python==4.10.0.84
-chinese_calendar
-scikit-learn
-pycocotools
-tqdm
-pyclipper
-shapely
-pandas
-parsley
-requests
-tokenizers==0.19.1
-GPUtil>=1.4.0
-scikit-image
-lxml
-openpyxl
-premailer
-PyMuPDF
-ujson
-Pillow
-pydantic>=2
-typing_extensions>=4.11
-######## For Chatocrv3 #######
-langchain==0.2.17
-langchain-openai==0.1.25
-langchain-community==0.2.17
-langchain-text-splitters==0.2.4
-transformers==4.40.0
-openai==1.63.2
-unstructured
-networkx
-faiss-cpu
-######## For Video #######
-decord==0.6.0; (platform_machine == 'x86_64' or platform_machine == 'AMD64') and sys_platform != 'darwin'
-######## For NLP Tokenizer #######
-jieba
-sentencepiece
-jinja2
-regex
-######## For Speech #######
-soundfile

+ 193 - 17
setup.py

@@ -20,6 +20,183 @@ from pathlib import Path
 
 from setuptools import find_packages, setup
 
+DEP_SPECS = {
+    "aiohttp": ">= 3.9",
+    "bce-python-sdk": ">= 0.9",
+    "chardet": "",
+    "chinese-calendar": "",
+    "colorlog": "",
+    "decord": "== 0.6.0; (platform_machine == 'x86_64' or platform_machine == 'AMD64') and sys_platform != 'darwin'",
+    "faiss-cpu": "",
+    "fastapi": ">= 0.110",
+    "filelock": "",
+    "filetype": ">= 1.2",
+    "ftfy": "",
+    "GPUtil": ">= 1.4",
+    "imagesize": "",
+    "Jinja2": "",
+    "joblib": "",
+    "langchain": "== 0.2.17",
+    "langchain-community": "== 0.2.17",
+    "langchain-core": "",
+    "langchain-openai": "== 0.1.25",
+    "lxml": "",
+    "matplotlib": "",
+    "numpy": [
+        "== 1.24.4; python_version < '3.12'",
+        "== 1.26.4; python_version >= '3.12'",
+    ],
+    "openai": "== 1.63.2",
+    "opencv-contrib-python": "== 4.10.0.84",
+    "openpyxl": "",
+    "packaging": "",
+    "paddle2onnx": ">= 2",
+    "pandas": "",
+    "pillow": "",
+    "premailer": "",
+    "prettytable": "",
+    "py-cpuinfo": "",
+    "pyclipper": "",
+    "pycocotools": "",
+    "pydantic": ">= 2",
+    "PyMuPDF": "",
+    "PyYAML": "== 6.0.2",
+    "regex": "",
+    "requests": "",
+    "ruamel.yaml": "",
+    "scikit-image": "",
+    "scikit-learn": "",
+    "shapely": "",
+    "six": "",
+    "soundfile": "",
+    "starlette": ">= 0.36",
+    "tokenizers": "== 0.19.1",
+    "tqdm": "",
+    "typing-extensions": "",
+    "ujson": "",
+    "uvicorn": ">= 0.16",
+    "yarl": ">= 1.9",
+}
+
+REQUIRED_DEPS = [
+    "chardet",
+    "colorlog",
+    "filelock",
+    "GPUtil",
+    "numpy",
+    "packaging",
+    "pillow",
+    "py-cpuinfo",
+    "pydantic",
+    "PyYAML",
+    "requests",
+    "ruamel.yaml",
+    "typing-extensions",
+    "ujson",
+]
+
+EXTRAS = {
+    "base": {
+        "cv": [
+            "faiss-cpu",
+            "matplotlib",
+            "opencv-contrib-python",
+            "pycocotools",
+            "scikit-image",
+        ],
+        "multimodal": [
+            "ftfy",
+            "Jinja2",
+            "regex",
+            "six",
+        ],
+        "ie": [
+            "ftfy",
+            "imagesize",
+            "langchain",
+            "langchain-community",
+            "langchain-core",
+            "langchain-openai",
+            "lxml",
+            "openai",
+            "opencv-contrib-python",
+            "openpyxl",
+            "premailer",
+            "prettytable",
+            "pyclipper",
+            "PyMuPDF",
+            "scikit-learn",
+            "shapely",
+            "tokenizers",
+        ],
+        "ocr": [
+            "ftfy",
+            "imagesize",
+            "lxml",
+            "opencv-contrib-python",
+            "openpyxl",
+            "premailer",
+            "prettytable",
+            "pyclipper",
+            "PyMuPDF",
+            "scikit-learn",
+            "shapely",
+            "tokenizers",
+        ],
+        "speech": [
+            "ftfy",
+            "Jinja2",
+            "regex",
+            "six",
+            "soundfile",
+            "tqdm",
+        ],
+        "ts": [
+            "chinese-calendar",
+            "joblib",
+            "matplotlib",
+            "pandas",
+            "scikit-learn",
+        ],
+        "video": [
+            "decord",
+            "opencv-contrib-python",
+        ],
+    },
+    "plugins": {
+        "serving": [
+            "aiohttp",
+            "bce-python-sdk",
+            "fastapi",
+            "filetype",
+            "starlette",
+            "uvicorn",
+            "yarl",
+        ],
+        "paddle2onnx": [
+            "paddle2onnx",
+        ],
+    },
+}
+
+
+def _get_dep_specs(deps):
+    dep_specs = []
+    for dep in deps:
+        val = DEP_SPECS[dep]
+        if not isinstance(val, list):
+            val = [val]
+        for v in val:
+            if not v:
+                dep_specs.append(dep)
+            else:
+                dep_specs.append(dep + " " + v)
+    return dep_specs
+
+
+def _sort_dep_specs(dep_specs):
+    return sorted(dep_specs, key=str.lower)
+
 
 def readme():
     """get readme"""
@@ -28,19 +205,23 @@ def readme():
 
 
 def dependencies():
-    """get dependencies"""
-    with open("requirements.txt", "r") as file:
-        return file.read()
+    dep_specs = _get_dep_specs(REQUIRED_DEPS)
+    return _sort_dep_specs(dep_specs)
 
 
-def serving_dependencies():
-    with open(os.path.join("paddlex", "serving_requirements.txt"), "r") as file:
-        return file.read()
-
-
-def paddle2onnx_dependencies():
-    with open(os.path.join("paddlex", "paddle2onnx_requirements.txt"), "r") as file:
-        return file.read()
+def extras():
+    dic = {}
+    all_dep_specs = set()
+    for group_name, group in EXTRAS.items():
+        group_dep_specs = set()
+        for extra_name, extra_deps in group.items():
+            extra_dep_specs = _get_dep_specs(extra_deps)
+            dic[extra_name] = _sort_dep_specs(extra_dep_specs)
+            group_dep_specs.update(extra_dep_specs)
+            dic[group_name] = _sort_dep_specs(group_dep_specs)
+            all_dep_specs.update(group_dep_specs)
+    dic["all"] = _sort_dep_specs(all_dep_specs)
+    return dic
 
 
 def version():
@@ -92,8 +273,6 @@ def packages_and_package_data():
     pkg_data.append("inference/pipelines/ppchatocrv3/ch_prompt.yaml")
     pkg_data.extend(pipeline_config)
     pkg_data.append(".version")
-    pkg_data.append("serving_requirements.txt")
-    pkg_data.append("paddle2onnx_requirements.txt")
     pkg_data.append("hpip_links.html")
     pkg_data.append("inference/utils/hpi_model_info_collection.json")
     ops_file_dir = "paddlex/ops"
@@ -116,10 +295,7 @@ if __name__ == "__main__":
         author="PaddlePaddle Authors",
         author_email="",
         install_requires=dependencies(),
-        extras_require={
-            "serving": serving_dependencies(),
-            "paddle2onnx": paddle2onnx_dependencies(),
-        },
+        extras_require=extras(),
         packages=pkgs,
         package_data=pkg_data,
         entry_points={