]> git.scottworley.com Git - git-cache/blob - git_cache.py
Rely on nixpkgs' python3Packages.backoff
[git-cache] / git_cache.py
1 # It would be nice if we could share the nix git cache, but as of the
2 # time of writing it is transitioning from gitv2 (deprecated) to gitv3
3 # (not ready yet), and trying to straddle them both is too far into nix
4 # implementation details for my comfort. So we re-implement here half of
5 # nix's builtins.fetchGit. :(
6
7 import argparse
8 import functools
9 import hashlib
10 import logging
11 import os
12 import subprocess
13 import sys
14 import time
15
16 from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
17
18 import backoff
19
20 Path = str # eg: "/home/user/.cache/git-cache/v1"
21 Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
22 Ref = str # eg: "master" or "v1.0.0"
23 Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
24 RefOrRev = Union[Ref, Rev]
25
26
27 class _LogEntry(NamedTuple):
28 ref: Ref
29 rev: Rev
30
31
32 T = TypeVar('T')
33
34
35 def _repo_hashname(repo: Repo) -> str:
36 return hashlib.sha256(repo.encode()).hexdigest()
37
38
39 def git_cachedir(repo: Repo) -> Path:
40 # Use xdg module when it's less painful to have as a dependency
41 XDG_CACHE_HOME = Path(
42 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
43
44 return Path(os.path.join(
45 XDG_CACHE_HOME,
46 'git-cache/v1',
47 _repo_hashname(repo)))
48
49
50 def _log_filename(repo: Repo) -> Path:
51 # Use xdg module when it's less painful to have as a dependency
52 XDG_DATA_HOME = Path(
53 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
54
55 return Path(os.path.join(
56 XDG_DATA_HOME,
57 'git-cache/v1',
58 _repo_hashname(repo)))
59
60
61 def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
62 cachedir = git_cachedir(repo)
63 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
64 process = subprocess.run(['git',
65 '-C',
66 cachedir,
67 'merge-base',
68 '--is-ancestor',
69 ancestor,
70 descendant],
71 check=False)
72 return process.returncode == 0
73
74
75 def verify_ancestry(
76 repo: Repo,
77 descendant: RefOrRev,
78 ancestor: RefOrRev,
79 force: bool = False) -> None:
80 if not force and not is_ancestor(repo, descendant, ancestor):
81 raise Exception(f'{ancestor} is not an ancestor of {descendant}')
82
83
84 def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
85 filename = _log_filename(repo)
86 if not os.path.exists(filename):
87 return
88 with open(filename, 'r', encoding='utf-8') as f:
89 for line in f:
90 _, _, rev, ref = line.strip().split(maxsplit=3)
91 yield _LogEntry(ref, rev)
92
93
94 def _last(it: Iterator[T]) -> Optional[T]:
95 return functools.reduce(lambda a, b: b, it, None)
96
97
98 def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
99 return _last(entry.rev for entry in _read_fetch_log(
100 repo) if entry.ref == ref)
101
102
103 def _log_fetch(repo: Repo, ref: Ref, rev: Rev, force: bool = False) -> None:
104 if not force:
105 prev_rev = _previous_fetched_rev(repo, ref)
106 if prev_rev is not None:
107 verify_ancestry(repo, rev, prev_rev)
108 filename = _log_filename(repo)
109 os.makedirs(os.path.dirname(filename), exist_ok=True)
110 with open(filename, 'a', encoding='utf-8') as f:
111 f.write(
112 f'{time.strftime("%Y-%m%d-%H:%M:%S%z")} '
113 f'{"FORCEDFETCH" if force else "fetch"} {rev} {ref}\n'
114 )
115
116
117 def _show_force_warning() -> None:
118 print('''
119 **************************************************************************
120 * WARNING: git-cache INVOKED WITH --force! *
121 * *
122 * This mode allows previously-fetched refs to be overwritten to point to *
123 * non-descendants -- commits that don't have the previous version of the *
124 * the ref in their history! *
125 * *
126 * This should only be invoked by a human operator who knows what they're *
127 * doing to correct a specific, known, problem. Care should be taken to *
128 * prevent recurrence. *
129 * *
130 * Press ^C to abort. *
131 * *
132 ''', end='', file=sys.stderr)
133 warn_time_override = os.environ.get('FORCE_WARNING_TIME', None)
134 warn_time: int
135 if warn_time_override is None:
136 warn_time = 15
137 else:
138 warn_time = int(warn_time_override)
139 print(
140 '''* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
141 * !! WARNING DISPLAY TIME OVERRIDDEN !! *
142 * !! !! *
143 * !! This message is intended to be displayed long enough for a !! *
144 * !! human operator to read it and have a chance to abort. An !! *
145 * !! override for the delay time is provided FOR THE UNIT TESTS !! *
146 * !! to avoid delaying software builds unnecessarily. This is !! *
147 * !! INTENDED FOR USE IN UNIT TESTS ONLY; THIS MESSAGE SHOULD !! *
148 * !! NEVER BE SEEN OUTSIDE BUILD LOGS! !! *
149 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
150 * *
151 ''', end='', file=sys.stderr)
152
153 for i in range(warn_time, 0, -1):
154 msg = f'* {f"Continuing in {i} seconds...":-70s} *'
155 print(msg, file=sys.stderr)
156 time.sleep(1)
157 print('*' * 74, file=sys.stderr)
158
159
160 @backoff.on_exception(
161 backoff.expo,
162 subprocess.CalledProcessError,
163 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
164 def _git_fetch(
165 cachedir: Path,
166 repo: Repo,
167 ref: Ref,
168 force: bool = False) -> None:
169 subprocess.run(['git', '-C', cachedir, 'fetch'] +
170 (['--force'] if force else []) +
171 [repo, f'{ref}:{ref}'], check=True)
172
173
174 def fetch(repo: Repo, ref: Ref, force: bool = False) -> Tuple[Path, Rev]:
175 if force:
176 _show_force_warning()
177 cachedir = git_cachedir(repo)
178 if not os.path.exists(cachedir):
179 logging.debug("Initializing git repo")
180 subprocess.run(['git',
181 '-c',
182 'init.defaultBranch=git-cache--no-default-branch',
183 'init',
184 '--bare',
185 cachedir],
186 check=True,
187 stdout=sys.stderr)
188
189 logging.debug('Fetching ref "%s" from %s', ref, repo)
190 _git_fetch(cachedir, repo, ref, force=force)
191
192 with open(os.path.join(cachedir, 'refs', 'heads', ref), encoding='utf-8') as rev_file:
193 rev = Rev(rev_file.read(999).strip())
194 verify_ancestry(repo, ref, rev, force=force)
195 _log_fetch(repo, ref, rev, force=force)
196
197 return cachedir, rev
198
199
200 def ensure_rev_available(
201 repo: Repo,
202 ref: Ref,
203 rev: Rev,
204 force: bool = False) -> Path:
205 cachedir = git_cachedir(repo)
206 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
207 return cachedir
208
209 logging.debug(
210 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
211 rev, ref)
212 fetch(repo, ref, force=force)
213 logging.debug('Verifying that fetch retrieved rev %s', rev)
214 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
215 verify_ancestry(repo, ref, rev, force=force)
216
217 return cachedir
218
219
220 def _main() -> None:
221 parser = argparse.ArgumentParser(
222 description='Cache remote git repositories locally.',
223 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
224 parser.add_argument(
225 '--force',
226 action='store_true',
227 help='Recover from a force-push in the remote repo')
228 parser.add_argument(
229 'repo',
230 metavar='Repo',
231 type=Repo,
232 help='Git repository URL')
233 parser.add_argument(
234 'ref',
235 metavar='Ref',
236 type=Ref,
237 help='Ref (branch or tag) in the git repo')
238 parser.add_argument(
239 'rev',
240 metavar='Rev',
241 type=Rev,
242 nargs='?',
243 help='Ensure that this revision is present. ' +
244 'If this revision is already present locally, no network operations are performed.')
245 args = parser.parse_args()
246
247 if args.rev is None:
248 cachedir, rev = fetch(args.repo, args.ref, force=args.force)
249 print(f'{rev} {cachedir}')
250 else:
251 print(
252 ensure_rev_available(
253 args.repo,
254 args.ref,
255 args.rev,
256 force=args.force))