Эх сурвалжийг харах

optimize the installation

gaotingquan 1 жил өмнө
parent
commit
79a8d80099

+ 4 - 8
paddlex/paddlex_cli.py

@@ -47,19 +47,17 @@ def args_cfg():
     parser.add_argument('devkits', nargs='*', default=[])
     parser.add_argument('--no_deps', action='store_true')
     parser.add_argument('--platform', type=str, default='github.com')
-    parser.add_argument('--update_repos', action='store_true')
     parser.add_argument(
         '-y',
         '--yes',
-        dest='reinstall',
+        dest='update_repos',
         action='store_true',
-        help="Whether to reinstall all packages.")
+        help="Whether to update_repos all packages.")
     parser.add_argument(
         '--use_local_repos',
         action='store_true',
         default=False,
-        help="Use local repos when installing.")
-    parser.add_argument('--force_clone', action='store_true', default=False)
+        help="Use local repos when existing.")
 
     ################# pipeline predict #################
     parser.add_argument('--predict', action='store_true', default=True, help="")
@@ -86,12 +84,10 @@ def install(args):
         repo_names = get_all_supported_repo_names()
     setup(
         repo_names=repo_names,
-        reinstall=args.reinstall or None,
         no_deps=args.no_deps,
         platform=args.platform,
         update_repos=args.update_repos,
-        use_local_repos=args.use_local_repos,
-        force_clone=args.force_clone)
+        use_local_repos=args.use_local_repos)
     return
 
 

+ 41 - 25
paddlex/repo_manager/core.py

@@ -19,7 +19,7 @@ from collections import OrderedDict
 from ..utils import logging
 from .utils import install_deps_using_pip
 from .meta import get_all_repo_names, get_repo_meta
-from .repo import build_repo_instance, build_repo_group_cloner, build_repo_group_installer
+from .repo import build_repo_instance, build_repo_group_getter, build_repo_group_installer
 
 __all__ = [
     'set_parent_dirs', 'setup', 'wheel', 'is_initialized', 'initialize',
@@ -84,14 +84,18 @@ is_initialized = _GlobalContext.is_initialized
 
 
 def setup(repo_names,
-          reinstall=None,
           no_deps=False,
           constraints=None,
           platform=None,
           update_repos=False,
-          use_local_repos=False,
-          force_clone=False):
+          use_local_repos=False):
     """ setup """
+    if update_repos and use_local_repos:
+        logging.error(
+            f"The `--update_repos` and `--use_local_repos` should not be True at the same time. They are global setting for all repos. `--update_repos` means that update all repos to sync with remote, and `--use_local_repos` means that don't update when local repo is exsting."
+        )
+        raise Exception()
+
     repo_names = list(set(_parse_repo_deps(repo_names)))
 
     repos = []
@@ -99,44 +103,56 @@ def setup(repo_names,
         repo = _GlobalContext.build_repo_instance(repo_name)
         repos.append(repo)
 
-    repos_to_clone = []
+    changed_repos = []
+    repos_to_get = []
     for repo in repos:
         repo_name = repo.name
         if repo.check_repo_exiting():
             if use_local_repos:
-                reinstall = True
+                # when use_local_repos has been set, it can be only assume that the local repo has changed, otherwise there is no need to specify.
+                changed_repos.append(repo_name)
                 logging.warning(
-                    f"We will use the existing repo of {repo.name}.")
+                    f"We will use the existing repo of {repo.name} and the repo will be reinstall."
+                )
                 continue
+
             logging.warning(f"Existing of {repo.name} repo.")
-            if reinstall is None:
+            if update_repos:
+                remove_existing = True
+            else:
                 if sys.stdin.isatty():
                     logging.warning("Should we remove it (y/n)?")
                 try:
                     remove_existing = input()
                 except EOFError:
                     logging.warning(
-                        "Unable to read from stdin. Please set `reinstall` to \
-                        True or False to apply a global setting for reclone repos."
+                        "Unable to read from stdin. Please set `--use_local_repos` to \
+                        True or False to apply a global setting for using exsting or re-getting repos."
                     )
                     raise
                 remove_existing = remove_existing.lower() in ('y', 'yes')
-            else:
-                remove_existing = reinstall
+
             if remove_existing:
+                changed_repos.append(repo_name)
                 repo.remove()
-                repos_to_clone.append(repo)
+                logging.warning(f"Existing {repo.name} repo has been removed.")
+                repos_to_get.append(repo)
             else:
                 logging.warning(
                     f"We will use the existing repo of {repo.name}.")
         else:
-            repos_to_clone.append(repo)
+            changed_repos.append(repo)
+            repos_to_get.append(repo)
+
     repos_to_install = []
     for repo in repos:
         repo_name = repo.name
         if repo.check_installation():
             logging.warning(f"Existing installation of {repo.name} detected.")
-            if reinstall is None and not update_repos:
+            reinstall = repo_name in changed_repos
+            if reinstall:
+                uninstall_existing = True
+            else:
                 if sys.stdin.isatty():
                     logging.warning("Should we uninstall it (y/n)?")
                 try:
@@ -148,9 +164,7 @@ def setup(repo_names,
                     )
                     raise
                 uninstall_existing = uninstall_existing.lower() in ('y', 'yes')
-            else:
-                if reinstall or update_repos:
-                    uninstall_existing = True
+
             if uninstall_existing:
                 repo.uninstall()
                 repos_to_install.append(repo)
@@ -159,12 +173,17 @@ def setup(repo_names,
                     f"We will use the existing installation of {repo.name}.")
         else:
             repos_to_install.append(repo)
-    cloner = build_repo_group_cloner(*repos_to_clone)
+    getter = build_repo_group_getter(*repos_to_get)
     installer = build_repo_group_installer(*repos_to_install)
 
-    logging.info("Now cloning the repos...")
-    cloner.clone(force_clone=force_clone, platform=platform)
-    logging.info("All repos are existing.")
+    if len(repos_to_get) > 0:
+        logging.info(
+            f"Now download and update the repos: {list(repo.name for repo in repos_to_get)}."
+        )
+        getter.get(force=True, platform=platform)
+        logging.info("All repos are existing.")
+    else:
+        logging.info("No repo need to download or update.")
 
     if not no_deps:
         logging.info("Dependencies are listed below:")
@@ -172,9 +191,6 @@ def setup(repo_names,
 
     logging.info("Now installing the packages...")
     install_deps_using_pip()
-    if update_repos:
-        installer.update()
-        logging.info("All repos are updated.")
     installer.install(
         force_reinstall=False, no_deps=no_deps, constraints=constraints)
     logging.info("All packages are installed.")

+ 9 - 9
paddlex/repo_manager/meta.py

@@ -23,7 +23,7 @@ REPO_NAMES = [
 
 REPO_META = {
     'PaddleSeg': {
-        'git_url': '/PaddlePaddle/PaddleSeg.git',
+        'git_path': '/PaddlePaddle/PaddleSeg.git',
         'platform': 'github',
         'branch': 'develop',
         'pkg_name': 'paddleseg',
@@ -34,7 +34,7 @@ REPO_META = {
         'path_env': 'PADDLE_PDX_PADDLESEG_PATH',
     },
     'PaddleClas': {
-        'git_url': '/PaddlePaddle/PaddleClas.git',
+        'git_path': '/PaddlePaddle/PaddleClas.git',
         'platform': 'github',
         'branch': 'develop',
         'pkg_name': 'paddleclas',
@@ -46,7 +46,7 @@ REPO_META = {
         'path_env': 'PADDLE_PDX_PADDLECLAS_PATH',
     },
     'PaddleDetection': {
-        'git_url': '/PaddlePaddle/PaddleDetection.git',
+        'git_path': '/PaddlePaddle/PaddleDetection.git',
         'platform': 'github',
         'branch': 'develop',
         'pkg_name': 'paddledet',
@@ -56,7 +56,7 @@ REPO_META = {
         'path_env': 'PADDLE_PDX_PADDLEDETECTION_PATH',
     },
     'PaddleOCR': {
-        'git_url': '/PaddlePaddle/PaddleOCR.git',
+        'git_path': '/PaddlePaddle/PaddleOCR.git',
         'platform': 'github',
         'branch': 'main',
         'pkg_name': 'paddleocr',
@@ -68,7 +68,7 @@ REPO_META = {
         'requires': ['PaddleNLP'],
     },
     'PaddleTS': {
-        'git_url': '/PaddlePaddle/PaddleTS.git',
+        'git_path': '/PaddlePaddle/PaddleTS.git',
         'platform': 'github',
         'branch': 'release_v1.1',
         'pkg_name': 'paddlets',
@@ -79,7 +79,7 @@ REPO_META = {
         'pdx_pkg_deps': ['pandas', 'ruamel.yaml'],
     },
     'PaddleNLP': {
-        'git_url': '/PaddlePaddle/PaddleNLP.git',
+        'git_path': '/PaddlePaddle/PaddleNLP.git',
         'platform': 'github',
         'branch': 'release/2.9',
         'pkg_name': 'paddlenlp',
@@ -89,7 +89,7 @@ REPO_META = {
         'path_env': 'PADDLE_PDX_PADDLENLP_PATH',
     },
     'PaddleSpeech': {
-        'git_url': '/PaddlePaddle/PaddleSpeech.git',
+        'git_path': '/PaddlePaddle/PaddleSpeech.git',
         'platform': 'github',
         'branch': 'develop',
         'pkg_name': 'paddlespeech',
@@ -100,7 +100,7 @@ REPO_META = {
         'requires': ['PaddleNLP'],
     },
     'PARL': {
-        'git_url': '/PaddlePaddle/PARL.git',
+        'git_path': '/PaddlePaddle/PARL.git',
         'platform': 'github',
         'branch': 'develop',
         'pkg_name': 'parl',
@@ -110,7 +110,7 @@ REPO_META = {
         'path_env': 'PADDLE_PDX_PARL_PATH',
     },
     'PaddleMIX': {
-        'git_url': '/PaddlePaddle/PaddleMIX.git',
+        'git_path': '/PaddlePaddle/PaddleMIX.git',
         'platform': 'github',
         'branch': 'develop',
         'pkg_name': 'paddlemix',

+ 29 - 54
paddlex/repo_manager/repo.py

@@ -21,9 +21,9 @@ import shutil
 from ..utils import logging
 from ..utils.download import download_and_extract
 from .meta import get_repo_meta, REPO_DOWNLOAD_BASE
-from .utils import (install_packages_using_pip, clone_repos_using_git,
-                    update_repos_using_git, uninstall_package_using_pip,
-                    remove_repos_using_rm, check_installation_using_pip,
+from .utils import (install_packages_using_pip, fetch_repo_using_git,
+                    reset_repo_using_git, uninstall_package_using_pip,
+                    remove_repo_using_rm, check_installation_using_pip,
                     build_wheel_using_pip, mute, switch_working_dir,
                     to_dep_spec_pep508, env_marker_ast2expr)
 
@@ -43,9 +43,9 @@ def build_repo_group_installer(*repos):
     return RepositoryGroupInstaller(list(repos))
 
 
-def build_repo_group_cloner(*repos):
-    """ build_repo_group_cloner """
-    return RepositoryGroupCloner(list(repos))
+def build_repo_group_getter(*repos):
+    """ build_repo_group_getter """
+    return RepositoryGroupGetter(list(repos))
 
 
 class PPRepository(object):
@@ -61,7 +61,7 @@ class PPRepository(object):
         self.root_dir = osp.join(repo_parent_dir, self.name)
 
         self.meta = get_repo_meta(self.name)
-        self.git_url = self.meta['git_url']
+        self.git_path = self.meta['git_path']
         self.pkg_name = self.meta['pkg_name']
         self.lib_name = self.meta['lib_name']
         self.pdx_mod_name = pdx_collection_mod.__name__ + '.' + self.meta[
@@ -139,48 +139,31 @@ class PPRepository(object):
         """ uninstall_package """
         uninstall_package_using_pip(self.pkg_name)
 
-    def clone(self, *args, **kwargs):
-        """ clone """
-        return RepositoryGroupCloner([self]).clone(*args, **kwargs)
-
-    def remove(self, *args, **kwargs):
-        """ remove """
-        return RepositoryGroupCloner([self]).remove(*args, **kwargs)
-
-    def clone_repos(self, platform=None):
-        """ clone_repos """
-        branch = self.meta.get('branch', None)
-        git_url = f'https://{platform}{self.git_url}'
-        # uncomment this if you prefer using ssh connection (requires additional setup)
-        # if platform == 'github.com':
-        #    git_url = f'git@github.com:{self.git_url}'
-        os.makedirs(self.repo_parent_dir, exist_ok=True)
-        with switch_working_dir(self.repo_parent_dir):
-            clone_repos_using_git(git_url, branch=branch)
-
-    def download_repos(self):
-        """ download and pull repos """
+    def download(self):
+        """ download from remote """
         download_url = f'{REPO_DOWNLOAD_BASE}{self.name}.tar'
         os.makedirs(self.repo_parent_dir, exist_ok=True)
         download_and_extract(download_url, self.repo_parent_dir, self.name)
+        # reset_repo_using_git('FETCH_HEAD')
+
+    def remove(self):
+        """ remove """
+        with switch_working_dir(self.repo_parent_dir):
+            remove_repo_using_rm(self.name)
 
-    def update_repos(self, platform=None):
-        """ update_repos """
+    def update(self, platform=None):
+        """ update """
         branch = self.meta.get('branch', None)
-        git_url = f'https://{platform}{self.git_url}'
+        git_url = f'https://{platform}{self.git_path}'
         with switch_working_dir(self.root_dir):
             try:
-                update_repos_using_git(branch=branch, url=git_url)
+                fetch_repo_using_git(branch=branch, url=git_url)
+                reset_repo_using_git('FETCH_HEAD')
             except Exception as e:
                 logging.warning(
-                    f"Pull {self.name} from {self.git_url} failed, check your network connection."
+                    f"Update {self.name} from {git_url} failed, check your network connection. Error:\n{e}"
                 )
 
-    def remove_repos(self):
-        """ remove_repos """
-        with switch_working_dir(self.repo_parent_dir):
-            remove_repos_using_rm(self.name)
-
     def wheel(self, dst_dir):
         """ wheel """
         with tempfile.TemporaryDirectory() as td:
@@ -290,11 +273,6 @@ class RepositoryGroupInstaller(object):
                 # NOTE: Dependencies are not uninstalled.
                 repo.uninstall_package()
 
-    def update(self):
-        """ update """
-        for repo in self.repos:
-            repo.update_repos()
-
     def get_deps(self):
         """ get_deps """
         deps_list = []
@@ -395,25 +373,22 @@ class RepositoryGroupInstaller(object):
         return '\n'.join(normed_lines)
 
 
-class RepositoryGroupCloner(object):
-    """ RepositoryGroupCloner """
+class RepositoryGroupGetter(object):
+    """ RepositoryGroupGetter """
 
     def __init__(self, repos):
         super().__init__()
         self.repos = repos
 
-    def clone(self, force_clone=False, platform=None):
+    def get(self, force=False, platform=None):
         """ clone """
-        if force_clone:
+        if force:
             self.remove()
-            for repo in self.repos:
-                repo.clone_repos(platform=platform)
-        else:
-            for repo in self.repos:
-                repo.download_repos()
-                repo.update_repos(platform=platform)
+        for repo in self.repos:
+            repo.download()
+            repo.update(platform=platform)
 
     def remove(self):
         """ remove """
         for repo in self.repos:
-            repo.remove_repos()
+            repo.remove()

+ 14 - 14
paddlex/repo_manager/utils.py

@@ -76,8 +76,8 @@ def install_deps_using_pip():
     return _check_call(args)
 
 
-def clone_repos_using_git(url, branch=None):
-    """ clone_repos_using_git """
+def clone_repo_using_git(url, branch=None):
+    """ clone_repo_using_git """
     args = ['git', 'clone', '--depth', '1']
     if isinstance(url, str):
         url = [url]
@@ -87,20 +87,20 @@ def clone_repos_using_git(url, branch=None):
     return _check_call(args)
 
 
-def update_repos_using_git(branch=None, url=None):
-    """ update_repos_using_git """
-    if url:
-        args = ['git', 'fetch', url, branch]
-        _check_call(args)
-        args = ['git', 'merge', 'FETCH_HEAD']
-        return _check_call(args)
-    else:
-        args = ['git', 'pull']
-        return _check_call(args)
+def fetch_repo_using_git(branch, url, depth=1):
+    """ fetch_repo_using_git """
+    args = ['git', 'fetch', url, branch, '--depth', str(depth)]
+    _check_call(args)
+
+
+def reset_repo_using_git(pointer, hard=True):
+    """ reset_repo_using_git """
+    args = ['git', 'reset', '--hard', pointer]
+    return _check_call(args)
 
 
-def remove_repos_using_rm(name):
-    """ remove_repos_using_rm """
+def remove_repo_using_rm(name):
+    """ remove_repo_using_rm """
     return _check_call(['rm', '-rf', name])