]> git.scottworley.com Git - git-cache/commitdiff
Initial version v1.0.0
authorScott Worley <scottworley@scottworley.com>
Fri, 10 Jul 2020 19:37:44 +0000 (12:37 -0700)
committerScott Worley <scottworley@scottworley.com>
Fri, 10 Jul 2020 19:37:44 +0000 (12:37 -0700)
Changelog [new file with mode: 0644]
README [new file with mode: 0644]
default.nix [new file with mode: 0644]
git-pre-commit-hook [new file with mode: 0755]
git_cache.py [new file with mode: 0644]
setup.py [new file with mode: 0644]
test.sh [new file with mode: 0755]
test_git_cache.py [new file with mode: 0644]

diff --git a/Changelog b/Changelog
new file mode 100644 (file)
index 0000000..bfacb4c
--- /dev/null
+++ b/Changelog
@@ -0,0 +1,4 @@
+## [Unreleased]
+
+## [1.0.0] - 2020-07-10
+Initial release
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..bb0ce9e
--- /dev/null
+++ b/README
@@ -0,0 +1,3 @@
+# git-cache
+
+Cache git repos, much like nix's `builtins.fetchGit`
diff --git a/default.nix b/default.nix
new file mode 100644 (file)
index 0000000..c351571
--- /dev/null
@@ -0,0 +1,11 @@
+{ pkgs ? import <nixpkgs> { }, lint ? false }:
+pkgs.python3Packages.callPackage
+({ lib, buildPythonPackage, nix, git, autopep8, mypy, pylint, }:
+  buildPythonPackage rec {
+    pname = "git-cache";
+    version = "1.0.0";
+    src = lib.cleanSource ./.;
+    checkInputs = [ nix git mypy ] ++ lib.optionals lint [ autopep8 pylint ];
+    doCheck = true;
+    checkPhase = "./test.sh";
+  }) { }
diff --git a/git-pre-commit-hook b/git-pre-commit-hook
new file mode 100755 (executable)
index 0000000..85b4445
--- /dev/null
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+# Copy me to .git/hooks/pre-commit
+
+set -e
+
+cleanup() {
+  if [[ "$D" && -e "$D" ]];then
+    rm -rf "$D"
+  fi
+}
+trap cleanup EXIT
+
+D=$(mktemp -d)
+[[ "$D" && -d "$D" ]]
+
+git checkout-index --prefix="$D/" -a
+pushd "$D"
+
+nix-shell --arg lint true --run './test.sh lint'
+
+popd
diff --git a/git_cache.py b/git_cache.py
new file mode 100644 (file)
index 0000000..d1923cd
--- /dev/null
@@ -0,0 +1,79 @@
+# It would be nice if we could share the nix git cache, but as of the
+# time of writing it is transitioning from gitv2 (deprecated) to gitv3
+# (not ready yet), and trying to straddle them both is too far into nix
+# implementation details for my comfort.  So we re-implement here half of
+# nix's builtins.fetchGit.  :(
+
+import hashlib
+import logging
+import os
+import subprocess
+
+from typing import Tuple
+
+Path = str  # eg: "/home/user/.cache/git-cache/v1"
+Repo = str  # eg: "https://github.com/NixOS/nixpkgs.git"
+Ref = str   # eg: "master" or "v1.0.0"
+Rev = str   # eg: "53a27350551844e1ed1a9257690294767389ef0d"
+
+
+def git_cachedir(repo: Repo) -> Path:
+    # Use xdg module when it's less painful to have as a dependency
+    XDG_CACHE_HOME = Path(
+        os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
+
+    return Path(os.path.join(
+        XDG_CACHE_HOME,
+        'git-cache/v1',
+        hashlib.sha256(repo.encode()).hexdigest()))
+
+
+def verify_ancestry(repo: Repo, ref: Ref, rev: Rev) -> None:
+    cachedir = git_cachedir(repo)
+    logging.debug('Verifying rev %s is an ancestor of ref "%s"', rev, ref)
+    subprocess.run(['git', '-C', cachedir, 'merge-base', '--is-ancestor',
+                    rev, ref], check=True)
+
+
+def fetch(repo: Repo, ref: Ref) -> Tuple[Path, Rev]:
+    cachedir = git_cachedir(repo)
+    if not os.path.exists(cachedir):
+        logging.debug("Initializing git repo")
+        subprocess.run(['git', 'init', '--bare', cachedir], check=True)
+
+    logging.debug('Fetching ref "%s" from %s', ref, repo)
+    # We don't use --force here because we want to abort and freak out if forced
+    # updates are happening.
+    subprocess.run(['git', '-C', cachedir, 'fetch', repo,
+                    '%s:%s' % (ref, ref)], check=True)
+
+    with open(os.path.join(cachedir, 'refs', 'heads', ref)) as rev_file:
+        rev = Rev(rev_file.read(999).strip())
+    verify_ancestry(repo, ref, rev)
+
+    return cachedir, rev
+
+
+def ensure_rev_available(repo: Repo, ref: Ref, rev: Rev) -> Path:
+    cachedir = git_cachedir(repo)
+    if os.path.exists(cachedir):
+        logging.debug('Checking if we already have rev %s', rev)
+        process = subprocess.run(
+            ['git', '-C', cachedir, 'cat-file', '-e', rev], check=False)
+        if process.returncode == 0:
+            logging.debug('We already have rev %s', rev)
+            verify_ancestry(repo, ref, rev)
+            return cachedir
+        if process.returncode != 1:
+            raise Exception(
+                'Could not test for presence of rev %s.  Is cache dir "%s" messed up?' %
+                (rev, cachedir))
+
+    logging.debug(
+        'We do not have rev %s.  We will fetch ref "%s" and hope it appears.',
+        rev, ref)
+    fetch(repo, ref)
+    logging.debug('Verifying that fetch retrieved rev %s', rev)
+    subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
+
+    return cachedir
diff --git a/setup.py b/setup.py
new file mode 100644 (file)
index 0000000..b7bbbf1
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,7 @@
+from setuptools import setup
+
+setup(
+    name="git_cache",
+    version="0.0.1-pre",
+    py_modules=['git_cache'],
+)
diff --git a/test.sh b/test.sh
new file mode 100755 (executable)
index 0000000..a0e3f5b
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+set -e
+
+PARALLELISM=4
+
+find . -name build -prune -o -name dist -prune -o -name '*.py' -print0 |
+  xargs -0 mypy --strict --ignore-missing-imports --no-warn-unused-ignores
+
+python3 -m unittest
+
+if [ "$1" = lint ];then
+
+  find . -name '*.py' -print0 | xargs -0 pylint --reports=n --persistent=n --ignore-imports=y -d invalid-name,missing-docstring
+
+  formatting_needs_fixing=$(
+    find . -name '*.py' -print0 |
+      xargs -P "$PARALLELISM" -0 -n1 autopep8 --diff -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  )
+  if [[ "$formatting_needs_fixing" ]];then
+    echo "Formatting needs fixing:"
+    echo "$formatting_needs_fixing"
+    exit 1
+  fi
+
+fi
diff --git a/test_git_cache.py b/test_git_cache.py
new file mode 100644 (file)
index 0000000..7408cad
--- /dev/null
@@ -0,0 +1,190 @@
+import os.path
+import tempfile
+import shutil
+import subprocess
+import unittest
+
+import git_cache
+
+
+def _git(directory: str, *args: str) -> bytes:
+    p = subprocess.run(['git', '-C', directory] + list(args),
+                       stdout=subprocess.PIPE, check=True)
+    return p.stdout
+
+
+def _commit_file(
+        directory: str,
+        filename: str,
+        contents: str,
+        commit_message: str) -> None:
+    with open(os.path.join(directory, filename), 'w') as f:
+        f.write(contents)
+    _git(directory, 'add', filename)
+    _git(directory, 'commit', '-m', commit_message)
+
+
+# pylint: disable=too-many-public-methods
+class TestGitCache(unittest.TestCase):
+
+    def setUp(self) -> None:
+        self.xdgcache = tempfile.TemporaryDirectory(prefix='git_cache_test-')
+        self.old_XDG_CACHE_HOME = os.environ.get('XDG_CACHE_HOME')
+        os.environ['XDG_CACHE_HOME'] = self.xdgcache.name
+
+        os.environ['GIT_AUTHOR_NAME'] = 'test_git_cache'
+        os.environ['GIT_COMMITTER_NAME'] = 'test_git_cache'
+        os.environ['GIT_AUTHOR_EMAIL'] = 'test_git_cache@example.com'
+        os.environ['GIT_COMMITTER_EMAIL'] = 'test_git_cache@example.com'
+
+        self.tempdir = tempfile.TemporaryDirectory(prefix='git_cache_test-')
+        self.upstream = os.path.join(self.tempdir.name, 'upstream')
+        subprocess.run(['git', 'init', self.upstream], check=True)
+        _commit_file(self.upstream, 'file', 'Contents', 'First commit')
+
+    def tearDown(self) -> None:
+        if self.old_XDG_CACHE_HOME is None:
+            del os.environ['XDG_CACHE_HOME']
+        else:
+            os.environ['XDG_CACHE_HOME'] = self.old_XDG_CACHE_HOME
+
+        self.tempdir.cleanup()
+        self.xdgcache.cleanup()
+
+    def test_fetch(self) -> None:
+        d, rev = git_cache.fetch(self.upstream, 'master')
+        self.assertEqual(_git(d, 'show', '%s:file' % rev), b'Contents')
+
+    def test_fetch_twice(self) -> None:
+        d1, rev1 = git_cache.fetch(self.upstream, 'master')
+        self.assertEqual(_git(d1, 'show', '%s:file' % rev1), b'Contents')
+        d2, rev2 = git_cache.fetch(self.upstream, 'master')
+        self.assertEqual(d1, d2)
+        self.assertEqual(rev1, rev2)
+        self.assertEqual(_git(d2, 'show', '%s:file' % rev2), b'Contents')
+
+    def test_fetch_then_ensure(self) -> None:
+        d1, rev = git_cache.fetch(self.upstream, 'master')
+        self.assertEqual(_git(d1, 'show', '%s:file' % rev), b'Contents')
+        d2 = git_cache.ensure_rev_available(self.upstream, 'master', rev)
+        self.assertEqual(d1, d2)
+        self.assertEqual(_git(d2, 'show', '%s:file' % rev), b'Contents')
+
+    def test_ensure_then_fetch(self) -> None:
+        rev1 = _git(
+            self.upstream, 'log', '--format=%H', '-n1').strip().decode()
+        d1 = git_cache.ensure_rev_available(self.upstream, 'master', rev1)
+        self.assertEqual(_git(d1, 'show', '%s:file' % rev1), b'Contents')
+        d2, rev2 = git_cache.fetch(self.upstream, 'master')
+        self.assertEqual(d1, d2)
+        self.assertEqual(rev1, rev2)
+        self.assertEqual(_git(d2, 'show', '%s:file' % rev2), b'Contents')
+
+    def test_fetch_new_file(self) -> None:
+        d1, rev1 = git_cache.fetch(self.upstream, 'master')
+        _commit_file(self.upstream, 'foofile', 'foo', 'Foo')
+        d2, rev2 = git_cache.fetch(self.upstream, 'master')
+        self.assertEqual(d1, d2)
+        self.assertNotEqual(rev1, rev2)
+        self.assertEqual(_git(d2, 'show', '%s:foofile' % rev2), b'foo')
+
+    def test_ensure_doesnt_fetch_new_file(self) -> None:
+        d1, rev1 = git_cache.fetch(self.upstream, 'master')
+        _commit_file(self.upstream, 'foofile', 'foo', 'Foo')
+        rev2 = _git(
+            self.upstream, 'log', '--format=%H', '-n1').strip().decode()
+        self.assertNotEqual(rev1, rev2)
+        d2 = git_cache.ensure_rev_available(self.upstream, 'master', rev1)
+        self.assertEqual(d1, d2)
+        p = subprocess.run(
+            ['git', '-C', d2, 'show', '%s:foofile' % rev2], check=False)
+        self.assertNotEqual(p.returncode, 0)
+
+    def test_ensure_doesnt_fetch_from_deleted_upstream(self) -> None:
+        d1, rev = git_cache.fetch(self.upstream, 'master')
+        self.tempdir.cleanup()
+        d2 = git_cache.ensure_rev_available(self.upstream, 'master', rev)
+        self.assertEqual(d1, d2)
+
+    def test_ensure_fetches_new_file(self) -> None:
+        d1, rev1 = git_cache.fetch(self.upstream, 'master')
+        _commit_file(self.upstream, 'foofile', 'foo', 'Foo')
+        rev2 = _git(
+            self.upstream, 'log', '--format=%H', '-n1').strip().decode()
+        self.assertNotEqual(rev1, rev2)
+        d2 = git_cache.ensure_rev_available(self.upstream, 'master', rev2)
+        self.assertEqual(d1, d2)
+        self.assertEqual(_git(d2, 'show', '%s:foofile' % rev2), b'foo')
+
+    def test_fetch_raises_on_invalid_repo(self) -> None:
+        self.tempdir.cleanup()
+        with self.assertRaises(Exception):
+            git_cache.fetch(self.upstream, 'master')
+
+    def test_ensure_raises_on_invalid_repo(self) -> None:
+        rev = _git(self.upstream, 'log', '--format=%H', '-n1').strip().decode()
+        self.tempdir.cleanup()
+        with self.assertRaises(Exception):
+            git_cache.ensure_rev_available(self.upstream, 'master', rev)
+
+    def test_fetch_raises_on_invalid_ref(self) -> None:
+        with self.assertRaises(Exception):
+            git_cache.fetch(self.upstream, 'nobranch')
+
+    def test_ensure_raises_on_invalid_ref(self) -> None:
+        rev = _git(self.upstream, 'log', '--format=%H', '-n1').strip().decode()
+        with self.assertRaises(Exception):
+            git_cache.ensure_rev_available(self.upstream, 'nobranch', rev)
+
+    def test_ensure_raises_on_invalid_rev(self) -> None:
+        with self.assertRaises(Exception):
+            git_cache.ensure_rev_available(
+                self.upstream,
+                'nobranch',
+                '1234567890abcdef01234567890abcdef1234567')
+
+    def test_ensure_raises_on_rev_from_other_branch(self) -> None:
+        _git(self.upstream, 'checkout', '-b', 'otherbranch')
+        _commit_file(self.upstream, 'foofile', 'foo', 'Foo')
+        rev = _git(self.upstream, 'log', '--format=%H', '-n1').strip().decode()
+        with self.assertRaises(Exception):
+            git_cache.ensure_rev_available(self.upstream, 'master', rev)
+
+    def test_ensure_other_branch(self) -> None:
+        _git(self.upstream, 'checkout', '-b', 'otherbranch')
+        _commit_file(self.upstream, 'foofile', 'foo', 'Foo')
+        rev = _git(self.upstream, 'log', '--format=%H', '-n1').strip().decode()
+        d = git_cache.ensure_rev_available(self.upstream, 'otherbranch', rev)
+        self.assertEqual(_git(d, 'show', '%s:foofile' % rev), b'foo')
+
+    def test_fetch_after_cache_deleted(self) -> None:
+        d1, rev1 = git_cache.fetch(self.upstream, 'master')
+        shutil.rmtree(d1)
+        d2, rev2 = git_cache.fetch(self.upstream, 'master')
+        self.assertEqual(d1, d2)
+        self.assertEqual(rev1, rev2)
+        self.assertEqual(_git(d2, 'show', '%s:file' % rev2), b'Contents')
+
+    def test_ensure_after_cache_deleted(self) -> None:
+        d1, rev = git_cache.fetch(self.upstream, 'master')
+        shutil.rmtree(d1)
+        d2 = git_cache.ensure_rev_available(self.upstream, 'master', rev)
+        self.assertEqual(d1, d2)
+        self.assertEqual(_git(d2, 'show', '%s:file' % rev), b'Contents')
+
+    def test_fetch_raises_on_amend(self) -> None:
+        git_cache.fetch(self.upstream, 'master')
+        _git(self.upstream, 'commit', '--amend', '-m', 'Amended')
+        with self.assertRaises(Exception):
+            git_cache.fetch(self.upstream, 'master')
+
+    def test_ensure_raises_on_amend(self) -> None:
+        git_cache.fetch(self.upstream, 'master')
+        _git(self.upstream, 'commit', '--amend', '-m', 'Amended')
+        rev = _git(self.upstream, 'log', '--format=%H', '-n1').strip().decode()
+        with self.assertRaises(Exception):
+            git_cache.ensure_rev_available(self.upstream, 'master', rev)
+
+
+if __name__ == '__main__':
+    unittest.main()