From 7673441ec04cba3123aceeb69c03e001a6af888c Mon Sep 17 00:00:00 2001
From: Egor Tensin <Egor.Tensin@gmail.com>
Date: Sat, 31 Jul 2021 15:27:46 +0300
Subject: factor various "repository" classes into Repo

Some other refactoring efforts are included.
---
 .ci/docker/client/etc/cgitize.toml |   2 +-
 .ci/local/test.sh                  |   2 +-
 cgitize/bitbucket.py               |  24 ++++
 cgitize/config.py                  |  97 +++++++++-------
 cgitize/github.py                  |  23 ++++
 cgitize/main.py                    |   4 +-
 cgitize/repo.py                    | 219 +++++++++++--------------------------
 cgitize/utils.py                   |  20 ++++
 examples/cgitize.toml              |   6 +-
 9 files changed, 201 insertions(+), 196 deletions(-)
 create mode 100644 cgitize/bitbucket.py
 create mode 100644 cgitize/github.py

diff --git a/.ci/docker/client/etc/cgitize.toml b/.ci/docker/client/etc/cgitize.toml
index bc46aca..4e8fda7 100644
--- a/.ci/docker/client/etc/cgitize.toml
+++ b/.ci/docker/client/etc/cgitize.toml
@@ -1,4 +1,4 @@
 [repositories.test_repo]
 
-id = "test_repo"
+name = "test_repo"
 clone_url = "root@server:~/test_repo"
diff --git a/.ci/local/test.sh b/.ci/local/test.sh
index 8826ff4..e78a4be 100755
--- a/.ci/local/test.sh
+++ b/.ci/local/test.sh
@@ -74,7 +74,7 @@ setup_cgitize_toml() {
 output = "$output_dir"
 
 [repositories.test_repo]
-id = "test_repo"
+name = "test_repo"
 clone_url = "$upstream_repo_dir"
 EOF
 }
diff --git a/cgitize/bitbucket.py b/cgitize/bitbucket.py
new file mode 100644
index 0000000..16f6d41
--- /dev/null
+++ b/cgitize/bitbucket.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2021 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is part of the "cgitize" project.
+# For details, see https://github.com/egor-tensin/cgitize.
+# Distributed under the MIT License.
+
+import logging
+
+from atlassian.bitbucket.cloud import Cloud
+from requests.exceptions import HTTPError
+
+
+class Bitbucket:
+    def __init__(self, username=None, password=None):
+        self._impl = Cloud(username=username, password=password, cloud=True)
+
+    def get_repo(self, repo):
+        if 'id' not in repo:
+            raise ValueError('every Bitbucket repository must have an ID')
+        repo_id = repo['id']
+        try:
+            return self._impl.repositories.get(repo_id)
+        except HTTPError:
+            logging.error("Couldn't fetch repository: %s", repo_id)
+            raise
diff --git a/cgitize/config.py b/cgitize/config.py
index 223dcd2..857d42a 100644
--- a/cgitize/config.py
+++ b/cgitize/config.py
@@ -9,7 +9,9 @@ import logging
 import os
 import sys
 
-from cgitize.repo import Repo, GitHub as GitHubRepo, Bitbucket as BitbucketRepo
+from cgitize.bitbucket import Bitbucket
+from cgitize.github import GitHub
+from cgitize.repo import Repo
 from cgitize.utils import chdir
 
 import tomli
@@ -28,13 +30,21 @@ class Section:
     def _get_config_path(self, *args, **kwargs):
         return os.path.abspath(self._get_config_value(*args, **kwargs))
 
+    def _get_config_or_env(self, key, env_name):
+        val = self._get_config_value(key, required=False)
+        if val is not None:
+            return val
+        if env_name in os.environ:
+            return os.environ[env_name]
+        return None
+
 
-class Main(Section):
+class MainSection(Section):
     DEFAULT_OUTPUT_DIR = '/var/tmp/cgitize/output'
 
     @property
     def output(self):
-        return self._get_config_path('output', default=Main.DEFAULT_OUTPUT_DIR)
+        return self._get_config_path('output', default=MainSection.DEFAULT_OUTPUT_DIR)
 
     @property
     def clone_url(self):
@@ -49,52 +59,51 @@ class Main(Section):
         return self._get_config_value('ssh', default=True)
 
 
-class GitHub(Section):
+class GitHubSection(Section):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.repositories = Repositories(self.impl.get('repositories', {}), GitHubRepo)
+        self.repositories = RepositoriesSection(self.impl.get('repositories', {}))
 
     @property
     def access_token(self):
-        access_token = self._get_config_value('access_token', required=False)
-        if access_token is not None:
-            return access_token
-        env_var = 'CGITIZE_GITHUB_ACCESS_TOKEN'
-        if env_var in os.environ:
-            return os.environ[env_var]
-        return None
+        return self._get_config_or_env('access_token', 'CGITIZE_GITHUB_ACCESS_TOKEN')
+
+    @property
+    def url_auth(self):
+        return self.access_token
 
     def enum_repositories(self):
         return self.repositories.enum_repositories()
 
 
-class Bitbucket(Section):
+class BitbucketSection(Section):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.repositories = Repositories(self.impl.get('repositories', {}), BitbucketRepo)
+        self.repositories = RepositoriesSection(self.impl.get('repositories', {}))
 
     @property
     def app_password(self):
-        app_password = self._get_config_value('app_password', required=False)
-        if app_password is not None:
-            return app_password
-        env_var = 'CGITIZE_BITBUCKET_APP_PASSWORD'
-        if env_var in os.environ:
-            return os.environ[env_var]
-        return None
+        return self._get_config_or_env('app_password', 'CGITIZE_BITBUCKET_APP_PASSWORD')
+
+    @property
+    def username(self):
+        return self._get_config_or_env('username', 'CGITIZE_BITBUCKET_USERNAME')
+
+    @property
+    def url_auth(self):
+        username = self.username
+        password = self.app_password
+        if username is None or password is None:
+            return None
+        return f'{username}:{password}'
 
     def enum_repositories(self):
         return self.repositories.enum_repositories()
 
 
-class Repositories(Section):
-    def __init__(self, impl, repo_cls=Repo):
-        super().__init__(impl)
-        self.repo_cls = repo_cls
-
+class RepositoriesSection(Section):
     def enum_repositories(self):
-        for k, v in self.impl.items():
-            yield self.repo_cls.from_config(v)
+        return self.impl.values()
 
 
 class Config:
@@ -108,12 +117,26 @@ class Config:
         self.path = os.path.abspath(path)
         with open(self.path, 'rb') as f:
             self.impl = tomli.load(f)
-        self.main = Main(self.impl)
-        self.repositories = Repositories(self.impl.get('repositories', {}))
-        self.github = GitHub(self.impl.get('github', {}))
-        self.bitbucket = Bitbucket(self.impl.get('bitbucket', {}))
-
-    def enum_repositories(self):
-        yield from self.repositories.enum_repositories()
-        yield from self.github.enum_repositories()
-        yield from self.bitbucket.enum_repositories()
+        self.main = MainSection(self.impl)
+        self.repositories = RepositoriesSection(self.impl.get('repositories', {}))
+        self.github = GitHubSection(self.impl.get('github', {}))
+        self.bitbucket = BitbucketSection(self.impl.get('bitbucket', {}))
+
+    def _parse_explicit_repositories(self):
+        for r in self.repositories.enum_repositories():
+            yield Repo.from_config(r, self)
+
+    def _parse_github_repositories(self):
+        github = GitHub(self.github.access_token)
+        for r in self.github.repositories.enum_repositories():
+            yield Repo.from_github(github.get_repo(r), self)
+
+    def _parse_bitbucket_repositories(self):
+        bitbucket = Bitbucket(self.bitbucket.username, self.bitbucket.app_password)
+        for r in self.bitbucket.repositories.enum_repositories():
+            yield Repo.from_bitbucket(bitbucket.get_repo(r), self)
+
+    def parse_repositories(self):
+        yield from self._parse_explicit_repositories()
+        yield from self._parse_github_repositories()
+        yield from self._parse_bitbucket_repositories()
diff --git a/cgitize/github.py b/cgitize/github.py
new file mode 100644
index 0000000..2c88426
--- /dev/null
+++ b/cgitize/github.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2021 Egor Tensin <Egor.Tensin@gmail.com>
+# This file is part of the "cgitize" project.
+# For details, see https://github.com/egor-tensin/cgitize.
+# Distributed under the MIT License.
+
+import logging
+
+from github import Github, GithubException
+
+
+class GitHub:
+    def __init__(self, access_token):
+        self._impl = Github(access_token)
+
+    def get_repo(self, repo):
+        if 'id' not in repo:
+            raise ValueError('every GitHub repository must have an ID')
+        repo_id = repo['id']
+        try:
+            return self._impl.get_repo(repo_id)
+        except GithubException:
+            logging.error("Couldn't fetch repository: %s", repo_id)
+            raise
diff --git a/cgitize/main.py b/cgitize/main.py
index 6b4705f..e7d2dfd 100644
--- a/cgitize/main.py
+++ b/cgitize/main.py
@@ -36,10 +36,8 @@ def main(args=None):
         cgit_server = CGitServer(config.main.clone_url)
         output = CGitRepositories(config.main.output, cgit_server, force=args.force)
         success = True
-        for repo in config.enum_repositories():
+        for repo in config.parse_repositories():
             if args.repos is None or repo.repo_id in args.repos:
-                repo.fill_defaults(config)
-                repo.validate()
                 if not output.update(repo):
                     success = False
         if success:
diff --git a/cgitize/repo.py b/cgitize/repo.py
index 01b7430..4c91968 100644
--- a/cgitize/repo.py
+++ b/cgitize/repo.py
@@ -5,46 +5,75 @@
 
 import abc
 import os.path
-from urllib.parse import urlsplit, urlunsplit
+
+from cgitize.utils import url_remove_auth, url_replace_auth
 
 
 class Repo:
-    @classmethod
-    def from_config(cls, cfg):
-        if 'id' not in cfg:
-            raise ValueError('every repository must have its id defined')
-        return cls(cfg['id'], clone_url=cfg.get('clone_url'),
-                   owner=cfg.get('owner'), desc=cfg.get('desc'),
-                   homepage=cfg.get('homepage'))
-
-    def __init__(self, name, clone_url=None, owner=None, desc=None,
-                 homepage=None):
+    @staticmethod
+    def from_config(src, config):
+        if 'name' not in src:
+            raise ValueError('every repository must have a name')
+        name = src['name']
+        desc = src.get('desc')
+        homepage = src.get('homepage')
+        owner = src.get('owner', config.main.default_owner)
+        if 'clone_url' not in src:
+            raise ValueError('every repository must have a clone URL')
+        clone_url = src['clone_url']
+        return Repo(name, clone_url, owner=owner, desc=desc, homepage=homepage)
+
+    @staticmethod
+    def from_github(src, config):
+        name = src.name
+        desc = src.description
+        homepage = src.html_url
+        owner = src.owner.name
+
+        https_url = src.clone_url
+        ssh_url = src.ssh_url
+        clone_url = ssh_url if config.main.via_ssh else https_url
+        url_auth = None if config.main.via_ssh else config.github.url_auth
+
+        return Repo(name, clone_url, owner=owner, desc=desc, homepage=homepage,
+                    url_auth=url_auth)
+
+    @staticmethod
+    def from_bitbucket(src, config):
+        name = src['name']
+        desc = src['description']
+        homepage = src['links']['html']['href']
+        owner = src['owner']['display_name']
+
+        https_urls = [link for link in src['links']['clone'] if link['name'] == 'https']
+        if len(https_urls) != 1:
+            raise RuntimeError(f"no https:// clone URL for repository '{name}'?!")
+        # Bitbucket leaves the username in the URL... Sigh.
+        https_url = url_remove_auth(https_urls[0]['href'])
+
+        ssh_urls = [link for link in src['links']['clone'] if link ['name'] == 'ssh']
+        if len(ssh_urls) != 1: raise RuntimeError(f"no ssh:// clone URL for repository '{name}'?!")
+        ssh_url = ssh_urls[0]['href']
+
+        clone_url = ssh_url if config.main.via_ssh else https_url
+        url_auth = None if config.main.via_ssh else config.bitbucket.url_auth
+
+        return Repo(name, clone_url, owner=owner, desc=desc, homepage=homepage,
+                    url_auth=url_auth)
+
+    def __init__(self, name, clone_url, owner=None, desc=None, homepage=None,
+                 url_auth=None):
         self._name = name
-        self._clone_url = clone_url
-        self._owner = owner
         self._desc = desc
         self._homepage = homepage
-
-    def fill_defaults(self, config):
-        if self._owner is None:
-            self._owner = config.main.default_owner
-
-    def validate(self):
-        if self.clone_url is None:
-            raise RuntimeError('upstream repository URL must be specified')
+        self._owner = owner
+        self._clone_url = clone_url
+        self._url_auth = url_auth
 
     @property
     def name(self):
         return self._name
 
-    @property
-    def clone_url(self):
-        return self._clone_url
-
-    @property
-    def owner(self):
-        return self._owner
-
     @property
     def desc(self):
         if self._desc is not None:
@@ -60,135 +89,19 @@ class Repo:
         return self._homepage
 
     @property
-    def url_auth(self):
-        return False
-
-
-class HostedRepo(Repo, abc.ABC):
-    @classmethod
-    def from_config(cls, cfg):
-        if 'id' not in cfg:
-            raise ValueError('every repository must have its id defined')
-        return cls(cfg['id'], owner=cfg.get('owner'), desc=cfg.get('desc'),
-                   homepage=cfg.get('homepage'))
-
-    @staticmethod
-    def split_repo_id(repo_id):
-        components = repo_id.split('/')
-        if len(components) != 2:
-            raise ValueError(f'repository ID must be in the USER/NAME format: {repo_id}')
-        user, name = components
-        return user, name
-
-    def __init__(self, repo_id, owner=None, desc=None, homepage=None,
-                 via_ssh=True):
-        user, name = self.split_repo_id(repo_id)
-        super().__init__(name, clone_url=None, owner=owner, desc=desc,
-                         homepage=homepage)
-        self._user = user
-        self._via_ssh = via_ssh
-
-    def fill_defaults(self, config):
-        super().fill_defaults(config)
-        self._via_ssh = config.main.via_ssh
-
-    @property
-    def user(self):
-        return self._user
-
-    @property
-    @abc.abstractmethod
-    def provider_name(self):
-        pass
-
-    @property
-    @abc.abstractmethod
-    def clone_url_ssh(self):
-        pass
-
-    @property
-    @abc.abstractmethod
-    def clone_url_https(self):
-        pass
+    def owner(self):
+        return self._owner
 
     @property
     def clone_url(self):
-        if self._via_ssh:
-            return self.clone_url_ssh
-        return self.clone_url_https
-
-    @property
-    def clone_url_with_auth(self):
-        if self._via_ssh:
-            return self.clone_url_ssh
-        auth = self.url_auth
-        clone_url = self.clone_url_https
-        if not auth:
-            return clone_url
-        clone_url = urlsplit(clone_url)
-        clone_url = clone_url._replace(netloc=f'{auth}@{clone_url.netloc}')
-        return urlunsplit(clone_url)
-
-
-class GitHub(HostedRepo):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self._access_token = None
-
-    def fill_defaults(self, config):
-        super().fill_defaults(config)
-        self._access_token = config.github.access_token
-
-    @property
-    def provider_name(self):
-        return 'GitHub'
-
-    @property
-    def homepage(self):
-        return f'https://github.com/{self.user}/{self.name}'
-
-    @property
-    def url_auth(self):
-        if self._access_token is None:
-            return ''
-        return f'{self._access_token}'
-
-    @property
-    def clone_url_ssh(self):
-        return f'ssh://git@github.com/{self.user}/{self.name}.git'
-
-    @property
-    def clone_url_https(self):
-        return f'https://github.com/{self.user}/{self.name}.git'
-
-
-class Bitbucket(HostedRepo):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self._app_password = None
-
-    def fill_defaults(self, config):
-        super().fill_defaults(config)
-        self._app_password = config.bitbucket.app_password
-
-    @property
-    def provider_name(self):
-        return 'Bitbucket'
-
-    @property
-    def homepage(self):
-        return f'https://bitbucket.org/{self.user}/{self.name.lower()}'
+        return self._clone_url
 
     @property
     def url_auth(self):
-        if self._app_password is None:
-            return ''
-        return f'{self.user}:{self._app_password}'
+        return self._url_auth
 
     @property
-    def clone_url_ssh(self):
-        return f'ssh://git@bitbucket.org/{self.user}/{self.name}.git'
-
-    @property
-    def clone_url_https(self):
-        return f'https://bitbucket.org/{self.user}/{self.name}.git'
+    def clone_url_with_auth(self):
+        if not self.url_auth:
+            return self.clone_url
+        return url_replace_auth(self.clone_url, self.url_auth)
diff --git a/cgitize/utils.py b/cgitize/utils.py
index 2f91939..64b419a 100644
--- a/cgitize/utils.py
+++ b/cgitize/utils.py
@@ -9,6 +9,7 @@ import os
 import stat
 import subprocess
 import sys
+from urllib.parse import urlsplit, urlunsplit
 
 
 @contextmanager
@@ -92,3 +93,22 @@ def protected_file(path):
             yield
         finally:
             os.unlink(path)
+
+
+def url_replace_auth(url, username, password=None):
+    parts = urlsplit(url)
+    netloc = username
+    if password is not None:
+        netloc += f':{password}'
+    netloc += f'@{parts.hostname}'
+    if parts.port is not None:
+        netloc += f':{parts.port}'
+    parts = parts._replace(netloc=netloc)
+    return urlunsplit(parts)
+
+
+def url_remove_auth(url):
+    parts = urlsplit(url)
+    netloc = parts.hostname
+    parts = parts._replace(netloc=netloc)
+    return urlunsplit(parts)
diff --git a/examples/cgitize.toml b/examples/cgitize.toml
index 6046e36..fcf6b92 100644
--- a/examples/cgitize.toml
+++ b/examples/cgitize.toml
@@ -31,6 +31,10 @@ id = "bobfang1992/pytomlpp"
 # passwords" to access them. Also can be provided using the
 # CGITIZE_BITBUCKET_APP_PASSWORD environment variable.
 #app_password = "XXX"
+# Contrary to GitHub, Bitbucket doesn't associate app passwords with a
+# username, so you also need that (CGITIZE_BITBUCKET_USERNAME is the
+# environment variable).
+#username = "your-username"
 
 # Some random repositories hosted on Bitbucket:
 [bitbucket.repositories.cef]
@@ -42,7 +46,7 @@ id = "berkeleylab/upc-runtime"
 
 # Some random repositories hosted on the web:
 [repositories.wintun]
-id = "wintun"
+name = "wintun"
 clone_url = "https://git.zx2c4.com/wintun"
 owner = "Jason A. Donenfeld"
 desc = "Layer 3 TUN Driver for Windows"
-- 
cgit v1.2.3