]> git.scottworley.com Git - git-cache/blame - git_cache.py
Release 1.5.0
[git-cache] / git_cache.py
CommitLineData
50685beb
SW
1# git-cache: Cache git content locally
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
bef7ce53
SW
8# It would be nice if we could share the nix git cache, but as of the
9# time of writing it is transitioning from gitv2 (deprecated) to gitv3
10# (not ready yet), and trying to straddle them both is too far into nix
11# implementation details for my comfort. So we re-implement here half of
12# nix's builtins.fetchGit. :(
13
d1ab0853 14import argparse
35000f72 15import functools
bef7ce53
SW
16import hashlib
17import logging
18import os
19import subprocess
347be7cf 20import sys
35000f72 21import time
bef7ce53 22
35000f72 23from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
bef7ce53 24
f36d5c6f
SW
25import backoff
26
a3835f50
SW
27
28class GitCacheError(Exception):
29 pass
30
31
35000f72
SW
32Path = str # eg: "/home/user/.cache/git-cache/v1"
33Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
34Ref = str # eg: "master" or "v1.0.0"
35Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
a5d42d8d 36RefOrRev = Union[Ref, Rev]
bef7ce53
SW
37
38
35000f72
SW
39class _LogEntry(NamedTuple):
40 ref: Ref
41 rev: Rev
42
43
44T = TypeVar('T')
45
46
47def _repo_hashname(repo: Repo) -> str:
48 return hashlib.sha256(repo.encode()).hexdigest()
49
50
bef7ce53
SW
51def git_cachedir(repo: Repo) -> Path:
52 # Use xdg module when it's less painful to have as a dependency
53 XDG_CACHE_HOME = Path(
54 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
55
56 return Path(os.path.join(
57 XDG_CACHE_HOME,
58 'git-cache/v1',
35000f72
SW
59 _repo_hashname(repo)))
60
61
62def _log_filename(repo: Repo) -> Path:
63 # Use xdg module when it's less painful to have as a dependency
64 XDG_DATA_HOME = Path(
65 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
66
67 return Path(os.path.join(
68 XDG_DATA_HOME,
69 'git-cache/v1',
70 _repo_hashname(repo)))
bef7ce53
SW
71
72
a5d42d8d 73def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
bef7ce53 74 cachedir = git_cachedir(repo)
a5d42d8d
SW
75 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
76 process = subprocess.run(['git',
77 '-C',
78 cachedir,
79 'merge-base',
80 '--is-ancestor',
81 ancestor,
2fa3bb5b 82 descendant.removeprefix('tag ')],
a5d42d8d 83 check=False)
eb638847
SW
84 return process.returncode == 0
85
86
a5d42d8d
SW
87def verify_ancestry(
88 repo: Repo,
89 descendant: RefOrRev,
083b90e7
SW
90 ancestor: RefOrRev,
91 force: bool = False) -> None:
92 if not force and not is_ancestor(repo, descendant, ancestor):
a3835f50 93 raise GitCacheError(f'{ancestor} is not an ancestor of {descendant}')
bef7ce53
SW
94
95
35000f72
SW
96def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
97 filename = _log_filename(repo)
98 if not os.path.exists(filename):
99 return
a28f4bb7 100 with open(filename, 'r', encoding='utf-8') as f:
35000f72
SW
101 for line in f:
102 _, _, rev, ref = line.strip().split(maxsplit=3)
103 yield _LogEntry(ref, rev)
104
105
106def _last(it: Iterator[T]) -> Optional[T]:
107 return functools.reduce(lambda a, b: b, it, None)
108
109
110def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
111 return _last(entry.rev for entry in _read_fetch_log(
112 repo) if entry.ref == ref)
113
114
083b90e7
SW
115def _log_fetch(repo: Repo, ref: Ref, rev: Rev, force: bool = False) -> None:
116 if not force:
117 prev_rev = _previous_fetched_rev(repo, ref)
118 if prev_rev is not None:
119 verify_ancestry(repo, rev, prev_rev)
35000f72
SW
120 filename = _log_filename(repo)
121 os.makedirs(os.path.dirname(filename), exist_ok=True)
a28f4bb7 122 with open(filename, 'a', encoding='utf-8') as f:
f580771a
SW
123 f.write(
124 f'{time.strftime("%Y-%m%d-%H:%M:%S%z")} '
125 f'{"FORCEDFETCH" if force else "fetch"} {rev} {ref}\n'
126 )
083b90e7
SW
127
128
129def _show_force_warning() -> None:
130 print('''
131**************************************************************************
132* WARNING: git-cache INVOKED WITH --force! *
133* *
134* This mode allows previously-fetched refs to be overwritten to point to *
135* non-descendants -- commits that don't have the previous version of the *
136* the ref in their history! *
137* *
138* This should only be invoked by a human operator who knows what they're *
139* doing to correct a specific, known, problem. Care should be taken to *
140* prevent recurrence. *
141* *
142* Press ^C to abort. *
143* *
144''', end='', file=sys.stderr)
145 warn_time_override = os.environ.get('FORCE_WARNING_TIME', None)
146 warn_time: int
147 if warn_time_override is None:
148 warn_time = 15
149 else:
150 warn_time = int(warn_time_override)
151 print(
152 '''* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
153* !! WARNING DISPLAY TIME OVERRIDDEN !! *
154* !! !! *
155* !! This message is intended to be displayed long enough for a !! *
156* !! human operator to read it and have a chance to abort. An !! *
157* !! override for the delay time is provided FOR THE UNIT TESTS !! *
158* !! to avoid delaying software builds unnecessarily. This is !! *
159* !! INTENDED FOR USE IN UNIT TESTS ONLY; THIS MESSAGE SHOULD !! *
160* !! NEVER BE SEEN OUTSIDE BUILD LOGS! !! *
161* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
162* *
163''', end='', file=sys.stderr)
164
165 for i in range(warn_time, 0, -1):
f580771a 166 msg = f'* {f"Continuing in {i} seconds...":-70s} *'
083b90e7
SW
167 print(msg, file=sys.stderr)
168 time.sleep(1)
169 print('*' * 74, file=sys.stderr)
170
35000f72 171
f36d5c6f
SW
172@backoff.on_exception(
173 backoff.expo,
174 subprocess.CalledProcessError,
175 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
083b90e7
SW
176def _git_fetch(
177 cachedir: Path,
178 repo: Repo,
179 ref: Ref,
180 force: bool = False) -> None:
2fa3bb5b
SW
181 refargs = (['tag', ref.removeprefix('tag ')]
182 if ref.startswith('tag ')
183 else [f'{ref}:{ref}'])
083b90e7
SW
184 subprocess.run(['git', '-C', cachedir, 'fetch'] +
185 (['--force'] if force else []) +
2fa3bb5b 186 [repo] + refargs, check=True)
f36d5c6f
SW
187
188
083b90e7
SW
189def fetch(repo: Repo, ref: Ref, force: bool = False) -> Tuple[Path, Rev]:
190 if force:
191 _show_force_warning()
bef7ce53
SW
192 cachedir = git_cachedir(repo)
193 if not os.path.exists(cachedir):
194 logging.debug("Initializing git repo")
21971f7f
SW
195 subprocess.run(['git',
196 '-c',
197 'init.defaultBranch=git-cache--no-default-branch',
198 'init',
199 '--bare',
200 cachedir],
201 check=True,
202 stdout=sys.stderr)
bef7ce53
SW
203
204 logging.debug('Fetching ref "%s" from %s', ref, repo)
083b90e7 205 _git_fetch(cachedir, repo, ref, force=force)
bef7ce53 206
2fa3bb5b
SW
207 rev_path = (['tags', ref.removeprefix('tag ')]
208 if ref.startswith('tag ')
209 else ['heads', ref])
210 with open(os.path.join(cachedir, 'refs', *rev_path), encoding='utf-8') as rev_file:
bef7ce53 211 rev = Rev(rev_file.read(999).strip())
083b90e7
SW
212 verify_ancestry(repo, ref, rev, force=force)
213 _log_fetch(repo, ref, rev, force=force)
bef7ce53
SW
214
215 return cachedir, rev
216
217
083b90e7
SW
218def ensure_rev_available(
219 repo: Repo,
220 ref: Ref,
221 rev: Rev,
222 force: bool = False) -> Path:
bef7ce53 223 cachedir = git_cachedir(repo)
eb638847
SW
224 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
225 return cachedir
bef7ce53
SW
226
227 logging.debug(
228 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
229 rev, ref)
083b90e7 230 fetch(repo, ref, force=force)
bef7ce53
SW
231 logging.debug('Verifying that fetch retrieved rev %s', rev)
232 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
083b90e7 233 verify_ancestry(repo, ref, rev, force=force)
bef7ce53
SW
234
235 return cachedir
347be7cf
SW
236
237
238def _main() -> None:
d1ab0853
SW
239 parser = argparse.ArgumentParser(
240 description='Cache remote git repositories locally.',
241 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
083b90e7
SW
242 parser.add_argument(
243 '--force',
244 action='store_true',
245 help='Recover from a force-push in the remote repo')
d1ab0853
SW
246 parser.add_argument(
247 'repo',
248 metavar='Repo',
249 type=Repo,
250 help='Git repository URL')
251 parser.add_argument(
252 'ref',
253 metavar='Ref',
254 type=Ref,
255 help='Ref (branch or tag) in the git repo')
256 parser.add_argument(
257 'rev',
258 metavar='Rev',
259 type=Rev,
260 nargs='?',
261 help='Ensure that this revision is present. ' +
262 'If this revision is already present locally, no network operations are performed.')
263 args = parser.parse_args()
264
265 if args.rev is None:
f580771a
SW
266 cachedir, rev = fetch(args.repo, args.ref, force=args.force)
267 print(f'{rev} {cachedir}')
347be7cf 268 else:
083b90e7
SW
269 print(
270 ensure_rev_available(
271 args.repo,
272 args.ref,
273 args.rev,
274 force=args.force))