]> git.scottworley.com Git - git-cache/blame - git_cache.py
Release 1.4.0
[git-cache] / git_cache.py
CommitLineData
bef7ce53
SW
1# It would be nice if we could share the nix git cache, but as of the
2# time of writing it is transitioning from gitv2 (deprecated) to gitv3
3# (not ready yet), and trying to straddle them both is too far into nix
4# implementation details for my comfort. So we re-implement here half of
5# nix's builtins.fetchGit. :(
6
d1ab0853 7import argparse
35000f72 8import functools
bef7ce53
SW
9import hashlib
10import logging
11import os
12import subprocess
347be7cf 13import sys
35000f72 14import time
bef7ce53 15
35000f72 16from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
bef7ce53 17
f36d5c6f
SW
18import backoff
19
35000f72
SW
20Path = str # eg: "/home/user/.cache/git-cache/v1"
21Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
22Ref = str # eg: "master" or "v1.0.0"
23Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
a5d42d8d 24RefOrRev = Union[Ref, Rev]
bef7ce53
SW
25
26
35000f72
SW
27class _LogEntry(NamedTuple):
28 ref: Ref
29 rev: Rev
30
31
32T = TypeVar('T')
33
34
35def _repo_hashname(repo: Repo) -> str:
36 return hashlib.sha256(repo.encode()).hexdigest()
37
38
bef7ce53
SW
39def git_cachedir(repo: Repo) -> Path:
40 # Use xdg module when it's less painful to have as a dependency
41 XDG_CACHE_HOME = Path(
42 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
43
44 return Path(os.path.join(
45 XDG_CACHE_HOME,
46 'git-cache/v1',
35000f72
SW
47 _repo_hashname(repo)))
48
49
50def _log_filename(repo: Repo) -> Path:
51 # Use xdg module when it's less painful to have as a dependency
52 XDG_DATA_HOME = Path(
53 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
54
55 return Path(os.path.join(
56 XDG_DATA_HOME,
57 'git-cache/v1',
58 _repo_hashname(repo)))
bef7ce53
SW
59
60
a5d42d8d 61def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
bef7ce53 62 cachedir = git_cachedir(repo)
a5d42d8d
SW
63 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
64 process = subprocess.run(['git',
65 '-C',
66 cachedir,
67 'merge-base',
68 '--is-ancestor',
69 ancestor,
70 descendant],
71 check=False)
eb638847
SW
72 return process.returncode == 0
73
74
a5d42d8d
SW
75def verify_ancestry(
76 repo: Repo,
77 descendant: RefOrRev,
083b90e7
SW
78 ancestor: RefOrRev,
79 force: bool = False) -> None:
80 if not force and not is_ancestor(repo, descendant, ancestor):
a5d42d8d 81 raise Exception('%s is not an ancestor of %s' % (ancestor, descendant))
bef7ce53
SW
82
83
35000f72
SW
84def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
85 filename = _log_filename(repo)
86 if not os.path.exists(filename):
87 return
88 with open(filename, 'r') as f:
89 for line in f:
90 _, _, rev, ref = line.strip().split(maxsplit=3)
91 yield _LogEntry(ref, rev)
92
93
94def _last(it: Iterator[T]) -> Optional[T]:
95 return functools.reduce(lambda a, b: b, it, None)
96
97
98def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
99 return _last(entry.rev for entry in _read_fetch_log(
100 repo) if entry.ref == ref)
101
102
083b90e7
SW
103def _log_fetch(repo: Repo, ref: Ref, rev: Rev, force: bool = False) -> None:
104 if not force:
105 prev_rev = _previous_fetched_rev(repo, ref)
106 if prev_rev is not None:
107 verify_ancestry(repo, rev, prev_rev)
35000f72
SW
108 filename = _log_filename(repo)
109 os.makedirs(os.path.dirname(filename), exist_ok=True)
110 with open(filename, 'a') as f:
083b90e7
SW
111 f.write('%s %s %s %s\n' %
112 (time.strftime('%Y-%m%d-%H:%M:%S%z'),
113 ('FORCEDFETCH' if force else 'fetch'), rev, ref))
114
115
116def _show_force_warning() -> None:
117 print('''
118**************************************************************************
119* WARNING: git-cache INVOKED WITH --force! *
120* *
121* This mode allows previously-fetched refs to be overwritten to point to *
122* non-descendants -- commits that don't have the previous version of the *
123* the ref in their history! *
124* *
125* This should only be invoked by a human operator who knows what they're *
126* doing to correct a specific, known, problem. Care should be taken to *
127* prevent recurrence. *
128* *
129* Press ^C to abort. *
130* *
131''', end='', file=sys.stderr)
132 warn_time_override = os.environ.get('FORCE_WARNING_TIME', None)
133 warn_time: int
134 if warn_time_override is None:
135 warn_time = 15
136 else:
137 warn_time = int(warn_time_override)
138 print(
139 '''* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
140* !! WARNING DISPLAY TIME OVERRIDDEN !! *
141* !! !! *
142* !! This message is intended to be displayed long enough for a !! *
143* !! human operator to read it and have a chance to abort. An !! *
144* !! override for the delay time is provided FOR THE UNIT TESTS !! *
145* !! to avoid delaying software builds unnecessarily. This is !! *
146* !! INTENDED FOR USE IN UNIT TESTS ONLY; THIS MESSAGE SHOULD !! *
147* !! NEVER BE SEEN OUTSIDE BUILD LOGS! !! *
148* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
149* *
150''', end='', file=sys.stderr)
151
152 for i in range(warn_time, 0, -1):
153 msg = '* %-70s *' % ("Continuing in %d seconds..." % i)
154 print(msg, file=sys.stderr)
155 time.sleep(1)
156 print('*' * 74, file=sys.stderr)
157
35000f72
SW
158
159
f36d5c6f
SW
160@backoff.on_exception(
161 backoff.expo,
162 subprocess.CalledProcessError,
163 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
083b90e7
SW
164def _git_fetch(
165 cachedir: Path,
166 repo: Repo,
167 ref: Ref,
168 force: bool = False) -> None:
169 subprocess.run(['git', '-C', cachedir, 'fetch'] +
170 (['--force'] if force else []) +
171 [repo, '%s:%s' % (ref, ref)], check=True)
f36d5c6f
SW
172
173
083b90e7
SW
174def fetch(repo: Repo, ref: Ref, force: bool = False) -> Tuple[Path, Rev]:
175 if force:
176 _show_force_warning()
bef7ce53
SW
177 cachedir = git_cachedir(repo)
178 if not os.path.exists(cachedir):
179 logging.debug("Initializing git repo")
513b354c
SW
180 subprocess.run(['git', 'init', '--bare', cachedir],
181 check=True, stdout=sys.stderr)
bef7ce53
SW
182
183 logging.debug('Fetching ref "%s" from %s', ref, repo)
083b90e7 184 _git_fetch(cachedir, repo, ref, force=force)
bef7ce53
SW
185
186 with open(os.path.join(cachedir, 'refs', 'heads', ref)) as rev_file:
187 rev = Rev(rev_file.read(999).strip())
083b90e7
SW
188 verify_ancestry(repo, ref, rev, force=force)
189 _log_fetch(repo, ref, rev, force=force)
bef7ce53
SW
190
191 return cachedir, rev
192
193
083b90e7
SW
194def ensure_rev_available(
195 repo: Repo,
196 ref: Ref,
197 rev: Rev,
198 force: bool = False) -> Path:
bef7ce53 199 cachedir = git_cachedir(repo)
eb638847
SW
200 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
201 return cachedir
bef7ce53
SW
202
203 logging.debug(
204 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
205 rev, ref)
083b90e7 206 fetch(repo, ref, force=force)
bef7ce53
SW
207 logging.debug('Verifying that fetch retrieved rev %s', rev)
208 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
083b90e7 209 verify_ancestry(repo, ref, rev, force=force)
bef7ce53
SW
210
211 return cachedir
347be7cf
SW
212
213
214def _main() -> None:
d1ab0853
SW
215 parser = argparse.ArgumentParser(
216 description='Cache remote git repositories locally.',
217 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
083b90e7
SW
218 parser.add_argument(
219 '--force',
220 action='store_true',
221 help='Recover from a force-push in the remote repo')
d1ab0853
SW
222 parser.add_argument(
223 'repo',
224 metavar='Repo',
225 type=Repo,
226 help='Git repository URL')
227 parser.add_argument(
228 'ref',
229 metavar='Ref',
230 type=Ref,
231 help='Ref (branch or tag) in the git repo')
232 parser.add_argument(
233 'rev',
234 metavar='Rev',
235 type=Rev,
236 nargs='?',
237 help='Ensure that this revision is present. ' +
238 'If this revision is already present locally, no network operations are performed.')
239 args = parser.parse_args()
240
241 if args.rev is None:
083b90e7 242 print('{1} {0}'.format(*fetch(args.repo, args.ref, force=args.force)))
347be7cf 243 else:
083b90e7
SW
244 print(
245 ensure_rev_available(
246 args.repo,
247 args.ref,
248 args.rev,
249 force=args.force))