]> git.scottworley.com Git - git-cache/blame - git_cache.py
Parse command line arguments more extensibly
[git-cache] / git_cache.py
CommitLineData
bef7ce53
SW
1# It would be nice if we could share the nix git cache, but as of the
2# time of writing it is transitioning from gitv2 (deprecated) to gitv3
3# (not ready yet), and trying to straddle them both is too far into nix
4# implementation details for my comfort. So we re-implement here half of
5# nix's builtins.fetchGit. :(
6
d1ab0853 7import argparse
35000f72 8import functools
bef7ce53
SW
9import hashlib
10import logging
11import os
12import subprocess
347be7cf 13import sys
35000f72 14import time
bef7ce53 15
35000f72 16from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
bef7ce53 17
f36d5c6f
SW
18import backoff
19
35000f72
SW
20Path = str # eg: "/home/user/.cache/git-cache/v1"
21Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
22Ref = str # eg: "master" or "v1.0.0"
23Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
a5d42d8d 24RefOrRev = Union[Ref, Rev]
bef7ce53
SW
25
26
35000f72
SW
27class _LogEntry(NamedTuple):
28 ref: Ref
29 rev: Rev
30
31
32T = TypeVar('T')
33
34
35def _repo_hashname(repo: Repo) -> str:
36 return hashlib.sha256(repo.encode()).hexdigest()
37
38
bef7ce53
SW
39def git_cachedir(repo: Repo) -> Path:
40 # Use xdg module when it's less painful to have as a dependency
41 XDG_CACHE_HOME = Path(
42 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
43
44 return Path(os.path.join(
45 XDG_CACHE_HOME,
46 'git-cache/v1',
35000f72
SW
47 _repo_hashname(repo)))
48
49
50def _log_filename(repo: Repo) -> Path:
51 # Use xdg module when it's less painful to have as a dependency
52 XDG_DATA_HOME = Path(
53 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
54
55 return Path(os.path.join(
56 XDG_DATA_HOME,
57 'git-cache/v1',
58 _repo_hashname(repo)))
bef7ce53
SW
59
60
a5d42d8d 61def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
bef7ce53 62 cachedir = git_cachedir(repo)
a5d42d8d
SW
63 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
64 process = subprocess.run(['git',
65 '-C',
66 cachedir,
67 'merge-base',
68 '--is-ancestor',
69 ancestor,
70 descendant],
71 check=False)
eb638847
SW
72 return process.returncode == 0
73
74
a5d42d8d
SW
75def verify_ancestry(
76 repo: Repo,
77 descendant: RefOrRev,
78 ancestor: RefOrRev) -> None:
79 if not is_ancestor(repo, descendant, ancestor):
80 raise Exception('%s is not an ancestor of %s' % (ancestor, descendant))
bef7ce53
SW
81
82
35000f72
SW
83def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
84 filename = _log_filename(repo)
85 if not os.path.exists(filename):
86 return
87 with open(filename, 'r') as f:
88 for line in f:
89 _, _, rev, ref = line.strip().split(maxsplit=3)
90 yield _LogEntry(ref, rev)
91
92
93def _last(it: Iterator[T]) -> Optional[T]:
94 return functools.reduce(lambda a, b: b, it, None)
95
96
97def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
98 return _last(entry.rev for entry in _read_fetch_log(
99 repo) if entry.ref == ref)
100
101
102def _log_fetch(repo: Repo, ref: Ref, rev: Rev) -> None:
103 prev_rev = _previous_fetched_rev(repo, ref)
104 if prev_rev is not None:
105 verify_ancestry(repo, rev, prev_rev)
106 filename = _log_filename(repo)
107 os.makedirs(os.path.dirname(filename), exist_ok=True)
108 with open(filename, 'a') as f:
109 f.write('%s fetch %s %s\n' %
110 (time.strftime('%Y-%m%d-%H:%M:%S%z'), rev, ref))
111
112
f36d5c6f
SW
113@backoff.on_exception(
114 backoff.expo,
115 subprocess.CalledProcessError,
116 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
117def _git_fetch(cachedir: Path, repo: Repo, ref: Ref) -> None:
118 # We don't use --force here because we want to abort and freak out if forced
119 # updates are happening.
120 subprocess.run(['git', '-C', cachedir, 'fetch', repo,
121 '%s:%s' % (ref, ref)], check=True)
122
123
bef7ce53
SW
124def fetch(repo: Repo, ref: Ref) -> Tuple[Path, Rev]:
125 cachedir = git_cachedir(repo)
126 if not os.path.exists(cachedir):
127 logging.debug("Initializing git repo")
513b354c
SW
128 subprocess.run(['git', 'init', '--bare', cachedir],
129 check=True, stdout=sys.stderr)
bef7ce53
SW
130
131 logging.debug('Fetching ref "%s" from %s', ref, repo)
f36d5c6f 132 _git_fetch(cachedir, repo, ref)
bef7ce53
SW
133
134 with open(os.path.join(cachedir, 'refs', 'heads', ref)) as rev_file:
135 rev = Rev(rev_file.read(999).strip())
136 verify_ancestry(repo, ref, rev)
35000f72 137 _log_fetch(repo, ref, rev)
bef7ce53
SW
138
139 return cachedir, rev
140
141
142def ensure_rev_available(repo: Repo, ref: Ref, rev: Rev) -> Path:
143 cachedir = git_cachedir(repo)
eb638847
SW
144 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
145 return cachedir
bef7ce53
SW
146
147 logging.debug(
148 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
149 rev, ref)
150 fetch(repo, ref)
151 logging.debug('Verifying that fetch retrieved rev %s', rev)
152 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
eb638847 153 verify_ancestry(repo, ref, rev)
bef7ce53
SW
154
155 return cachedir
347be7cf
SW
156
157
158def _main() -> None:
d1ab0853
SW
159 parser = argparse.ArgumentParser(
160 description='Cache remote git repositories locally.',
161 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
162 parser.add_argument(
163 'repo',
164 metavar='Repo',
165 type=Repo,
166 help='Git repository URL')
167 parser.add_argument(
168 'ref',
169 metavar='Ref',
170 type=Ref,
171 help='Ref (branch or tag) in the git repo')
172 parser.add_argument(
173 'rev',
174 metavar='Rev',
175 type=Rev,
176 nargs='?',
177 help='Ensure that this revision is present. ' +
178 'If this revision is already present locally, no network operations are performed.')
179 args = parser.parse_args()
180
181 if args.rev is None:
182 print('{1} {0}'.format(*fetch(args.repo, args.ref)))
347be7cf 183 else:
d1ab0853 184 print(ensure_rev_available(args.repo, args.ref, args.rev))