Module ti.kit.repo

Expand source code Browse git
import sys
import os
import io
import re
from zipfile import ZipFile
from shutil import rmtree

from gitlab import Gitlab, GitlabGetError

VERSION_DIGIT_RE = re.compile(r"^([0-9]+).*")
SHELL_VAR_RE = re.compile(r"[^A-Z0-9_]")
CHECK_FILE = "/__checkout__.txt"


def initTree(path, fresh=False, gentle=False):
    exists = os.path.exists(path)

    if fresh and exists:
        rmtree(path)

    if not exists or fresh:
        os.makedirs(path, exist_ok=True)


def GLPERS(backend):
    return f"GL_{SHELL_VAR_RE.sub('_', backend.upper())}_PERS"


def readSha(folder):
    path = f"{folder}/{CHECK_FILE}"
    commit = None

    if os.path.isfile(path):
        with open(path) as f:
            for line in f:
                text = line.strip()

                if text:
                    commit = text
                    break

    return commit


def writeSha(folder, commit):
    path = f"{folder}/{CHECK_FILE}"

    if not os.path.isdir(folder):
        os.makedirs(folder, exist_ok=True)

    with open(path, mode="w") as f:
        f.write(f"{commit}\n")


def fetchRepo(
    backend, org, repo, folder, destDir, force=False, verbose=False, indent=""
):
    """Get latest version of a subfolder of a GitLab repo.

    Before downloading the data, the commit hash of the online data and the
    local copy will be compared. If they are equal, the download will
    not happen, except when the `force` parameter is nonzero.

    After download, the commit hash will be written to the downloaded folder.

    If the folder is deep into the repo, say `/a/b/c/folder`,
    the zip file returned by GitLab will have paths `/a/b/c/folder/`... .

    The extraction will remove `/a/b/c` from these paths.
    After the extraction, a file __checkout__.txt containing the commit hash
    will be placed in the `destDir/folder` directory.

    Parameters
    ----------
    backend: string
        The name of an on-premiss gitlab server, e.g. gitlab.huc.knaw.nl
    org: string
        Organization or group on gitlab
    repo: string
        Repository within organization or group
    folder: string
        Subdirectory within repo
    destDir: string
        Local directory where the downloaded data should land
    force: boolean, optional False
        Whether to force downloading data if the local copy matches the online copy
        by sha-hash.

        If `False`: no re-download will take place. Otherwise the folder
        will be downloaded again.

        If `True`, the folder will be downloaded again, but the local copy will
        not be wiped on beforehand, so new files will overwrite existing files,
        but if the local copy contains additional material, it will be left in place.
    verbose: boolean, optional False
        If True, informational messages will be issued, otherwise only error messages
        will be issued.
    indent: string, optional ""
        Precede each console message with this string (usually a bunch of spaces)

    Returns
    -------
    bool
        Whether the operation was successful
    """

    def console(*msg, error=False, newline=True):
        msg = " ".join(m if type(m) is str else repr(m) for m in msg)
        msg = msg[1:] if msg.startswith("\n") else msg
        msg = msg[0:-1] if msg.endswith("\n") else msg
        target = sys.stderr if error else sys.stdout
        nl = "\n" if newline else ""

        if indent:
            msg = indent + msg.replace("\n", f"\n{indent}")

        target.write(f"{msg}{nl}")
        target.flush()

    conn = None

    bUrl = f"https://{backend}"
    onlineSrc = f"{bUrl}/{org}/{repo}"
    person = os.environ.get(GLPERS(backend), None)

    if person:
        conn = Gitlab(bUrl, private_token=person, keep_base_url=True)
    else:
        conn = Gitlab(bUrl)

    backendVersion = conn.version()

    if (
        not backendVersion
        or backendVersion[0] == "unknown"
        or backendVersion[-1] == "unknown"
    ):
        conn = None
        console(f"cannot connect to GitLab instance {bUrl}\n", error=True)
        return False

    if verbose:
        console(f"connected to {bUrl}")

    repoOnline = None

    try:
        repoOnline = conn.projects.get(f"{org}/{repo}")
    except Exception as e:
        console(f"connecting failed to online {onlineSrc}", error=True)

        if type(e) is GitlabGetError:
            console(f"{bUrl} says: {e}", error=True)
        else:
            console(f"error with {bUrl}: {e}", error=True)

        return False

    if verbose:
        console(f"connected to {onlineSrc}")

    commit = None

    try:
        cs = repoOnline.commits.list(all=True)

        if not len(cs):
            console(f"no commit in {onlineSrc}", error=True)
        else:
            cs = sorted(cs, key=lambda x: x.created_at)

            if len(cs):
                commit = cs[-1]
    except Exception as e:
        console(str(e), error=True)

    if commit is None:
        console(f"cannot find commits in {onlineSrc}", error=True)
        return False

    sha = commit.id

    if verbose:
        console(f"{sha} = latest commit online")

    destDir = os.path.expanduser(destDir)
    (folderHead, folderTail) = folder.rsplit("/", 1) if "/" in folder else ("", folder)

    folderLocal = f"{destDir}/{folderTail}"
    existingSha = readSha(folderLocal)

    if verbose:
        console(f"{existingSha} = commit of local copy")

    localOk = existingSha == sha
    removeLocal = False

    if localOk:
        if verbose:
            console("Offline copy already up to date")

        if not force:
            return True
        else:
            console("Will download again over local copy")
    else:
        if verbose:
            console("Offline copy not up to date, will download new copy")

        removeLocal = True

    try:
        if verbose:
            console(f"Downloading {onlineSrc}/{folder} ... ", newline=False)

        response = conn.http_get(
            f"/projects/{repoOnline.id}/repository/archive.zip",
            query_data=dict(path=folder),
            raw=True,
        )
        zf = response.content
    except Exception as e:
        if verbose:
            console("failed", error=True)

        console(str(e), error=True)
        return False

    if verbose:
        console("done")

    if len(zf) == 0:
        console("Download is empty")
        return False

    initTree(folderLocal, fresh=removeLocal)

    if verbose:
        console(f"Extracting data to {folderLocal} ... ", newline=False)

    try:
        zf = io.BytesIO(zf)
        z = ZipFile(zf)

        folderHeadSlash = f"{folderHead}/" if folderHead else ""
        gitlabSlugRe = re.compile(f"^{repo}(?:-(?:master|main))?-[^/]*/")

        for zInfo in z.infolist():
            fileName = zInfo.filename

            if fileName.endswith("/"):
                continue

            fileName = gitlabSlugRe.sub("", fileName) or "/"
            fileName = fileName.removeprefix(folderHeadSlash)
            zInfo.filename = fileName
            z.extract(zInfo, path=destDir)

        writeSha(folderLocal, sha)

    except Exception as e:
        if verbose:
            console("failed", error=True)

        console(str(e), error=True)
        return False

    if verbose:
        console("done")

    return True

Functions

def GLPERS(backend)
Expand source code Browse git
def GLPERS(backend):
    return f"GL_{SHELL_VAR_RE.sub('_', backend.upper())}_PERS"
def fetchRepo(backend, org, repo, folder, destDir, force=False, verbose=False, indent='')

Get latest version of a subfolder of a GitLab repo.

Before downloading the data, the commit hash of the online data and the local copy will be compared. If they are equal, the download will not happen, except when the force parameter is nonzero.

After download, the commit hash will be written to the downloaded folder.

If the folder is deep into the repo, say /a/b/c/folder, the zip file returned by GitLab will have paths /a/b/c/folder/… .

The extraction will remove /a/b/c from these paths. After the extraction, a file checkout.txt containing the commit hash will be placed in the destDir/folder directory.

Parameters

backend : string
The name of an on-premiss gitlab server, e.g. gitlab.huc.knaw.nl
org : string
Organization or group on gitlab
repo : string
Repository within organization or group
folder : string
Subdirectory within repo
destDir : string
Local directory where the downloaded data should land
force : boolean, optional False

Whether to force downloading data if the local copy matches the online copy by sha-hash.

If False: no re-download will take place. Otherwise the folder will be downloaded again.

If True, the folder will be downloaded again, but the local copy will not be wiped on beforehand, so new files will overwrite existing files, but if the local copy contains additional material, it will be left in place.

verbose : boolean, optional False
If True, informational messages will be issued, otherwise only error messages will be issued.
indent : string, optional ""
Precede each console message with this string (usually a bunch of spaces)

Returns

bool
Whether the operation was successful
Expand source code Browse git
def fetchRepo(
    backend, org, repo, folder, destDir, force=False, verbose=False, indent=""
):
    """Get latest version of a subfolder of a GitLab repo.

    Before downloading the data, the commit hash of the online data and the
    local copy will be compared. If they are equal, the download will
    not happen, except when the `force` parameter is nonzero.

    After download, the commit hash will be written to the downloaded folder.

    If the folder is deep into the repo, say `/a/b/c/folder`,
    the zip file returned by GitLab will have paths `/a/b/c/folder/`... .

    The extraction will remove `/a/b/c` from these paths.
    After the extraction, a file __checkout__.txt containing the commit hash
    will be placed in the `destDir/folder` directory.

    Parameters
    ----------
    backend: string
        The name of an on-premiss gitlab server, e.g. gitlab.huc.knaw.nl
    org: string
        Organization or group on gitlab
    repo: string
        Repository within organization or group
    folder: string
        Subdirectory within repo
    destDir: string
        Local directory where the downloaded data should land
    force: boolean, optional False
        Whether to force downloading data if the local copy matches the online copy
        by sha-hash.

        If `False`: no re-download will take place. Otherwise the folder
        will be downloaded again.

        If `True`, the folder will be downloaded again, but the local copy will
        not be wiped on beforehand, so new files will overwrite existing files,
        but if the local copy contains additional material, it will be left in place.
    verbose: boolean, optional False
        If True, informational messages will be issued, otherwise only error messages
        will be issued.
    indent: string, optional ""
        Precede each console message with this string (usually a bunch of spaces)

    Returns
    -------
    bool
        Whether the operation was successful
    """

    def console(*msg, error=False, newline=True):
        msg = " ".join(m if type(m) is str else repr(m) for m in msg)
        msg = msg[1:] if msg.startswith("\n") else msg
        msg = msg[0:-1] if msg.endswith("\n") else msg
        target = sys.stderr if error else sys.stdout
        nl = "\n" if newline else ""

        if indent:
            msg = indent + msg.replace("\n", f"\n{indent}")

        target.write(f"{msg}{nl}")
        target.flush()

    conn = None

    bUrl = f"https://{backend}"
    onlineSrc = f"{bUrl}/{org}/{repo}"
    person = os.environ.get(GLPERS(backend), None)

    if person:
        conn = Gitlab(bUrl, private_token=person, keep_base_url=True)
    else:
        conn = Gitlab(bUrl)

    backendVersion = conn.version()

    if (
        not backendVersion
        or backendVersion[0] == "unknown"
        or backendVersion[-1] == "unknown"
    ):
        conn = None
        console(f"cannot connect to GitLab instance {bUrl}\n", error=True)
        return False

    if verbose:
        console(f"connected to {bUrl}")

    repoOnline = None

    try:
        repoOnline = conn.projects.get(f"{org}/{repo}")
    except Exception as e:
        console(f"connecting failed to online {onlineSrc}", error=True)

        if type(e) is GitlabGetError:
            console(f"{bUrl} says: {e}", error=True)
        else:
            console(f"error with {bUrl}: {e}", error=True)

        return False

    if verbose:
        console(f"connected to {onlineSrc}")

    commit = None

    try:
        cs = repoOnline.commits.list(all=True)

        if not len(cs):
            console(f"no commit in {onlineSrc}", error=True)
        else:
            cs = sorted(cs, key=lambda x: x.created_at)

            if len(cs):
                commit = cs[-1]
    except Exception as e:
        console(str(e), error=True)

    if commit is None:
        console(f"cannot find commits in {onlineSrc}", error=True)
        return False

    sha = commit.id

    if verbose:
        console(f"{sha} = latest commit online")

    destDir = os.path.expanduser(destDir)
    (folderHead, folderTail) = folder.rsplit("/", 1) if "/" in folder else ("", folder)

    folderLocal = f"{destDir}/{folderTail}"
    existingSha = readSha(folderLocal)

    if verbose:
        console(f"{existingSha} = commit of local copy")

    localOk = existingSha == sha
    removeLocal = False

    if localOk:
        if verbose:
            console("Offline copy already up to date")

        if not force:
            return True
        else:
            console("Will download again over local copy")
    else:
        if verbose:
            console("Offline copy not up to date, will download new copy")

        removeLocal = True

    try:
        if verbose:
            console(f"Downloading {onlineSrc}/{folder} ... ", newline=False)

        response = conn.http_get(
            f"/projects/{repoOnline.id}/repository/archive.zip",
            query_data=dict(path=folder),
            raw=True,
        )
        zf = response.content
    except Exception as e:
        if verbose:
            console("failed", error=True)

        console(str(e), error=True)
        return False

    if verbose:
        console("done")

    if len(zf) == 0:
        console("Download is empty")
        return False

    initTree(folderLocal, fresh=removeLocal)

    if verbose:
        console(f"Extracting data to {folderLocal} ... ", newline=False)

    try:
        zf = io.BytesIO(zf)
        z = ZipFile(zf)

        folderHeadSlash = f"{folderHead}/" if folderHead else ""
        gitlabSlugRe = re.compile(f"^{repo}(?:-(?:master|main))?-[^/]*/")

        for zInfo in z.infolist():
            fileName = zInfo.filename

            if fileName.endswith("/"):
                continue

            fileName = gitlabSlugRe.sub("", fileName) or "/"
            fileName = fileName.removeprefix(folderHeadSlash)
            zInfo.filename = fileName
            z.extract(zInfo, path=destDir)

        writeSha(folderLocal, sha)

    except Exception as e:
        if verbose:
            console("failed", error=True)

        console(str(e), error=True)
        return False

    if verbose:
        console("done")

    return True
def initTree(path, fresh=False, gentle=False)
Expand source code Browse git
def initTree(path, fresh=False, gentle=False):
    exists = os.path.exists(path)

    if fresh and exists:
        rmtree(path)

    if not exists or fresh:
        os.makedirs(path, exist_ok=True)
def readSha(folder)
Expand source code Browse git
def readSha(folder):
    path = f"{folder}/{CHECK_FILE}"
    commit = None

    if os.path.isfile(path):
        with open(path) as f:
            for line in f:
                text = line.strip()

                if text:
                    commit = text
                    break

    return commit
def writeSha(folder, commit)
Expand source code Browse git
def writeSha(folder, commit):
    path = f"{folder}/{CHECK_FILE}"

    if not os.path.isdir(folder):
        os.makedirs(folder, exist_ok=True)

    with open(path, mode="w") as f:
        f.write(f"{commit}\n")