]> git.scottworley.com Git - git-cache/blob - git_cache.py
8171b48f3a06e8b4921bbcce5dcb028980c4d8d0
[git-cache] / git_cache.py
1 # git-cache: Cache git content locally
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 # It would be nice if we could share the nix git cache, but as of the
9 # time of writing it is transitioning from gitv2 (deprecated) to gitv3
10 # (not ready yet), and trying to straddle them both is too far into nix
11 # implementation details for my comfort. So we re-implement here half of
12 # nix's builtins.fetchGit. :(
13
14 import argparse
15 import functools
16 import hashlib
17 import logging
18 import os
19 import subprocess
20 import sys
21 import time
22
23 from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
24
25 import backoff
26
27
28 class GitCacheError(Exception):
29 pass
30
31
32 Path = str # eg: "/home/user/.cache/git-cache/v1"
33 Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
34 Ref = str # eg: "master" or "v1.0.0"
35 Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
36 RefOrRev = Union[Ref, Rev]
37
38
39 class _LogEntry(NamedTuple):
40 ref: Ref
41 rev: Rev
42
43
44 T = TypeVar('T')
45
46
47 def _repo_hashname(repo: Repo) -> str:
48 return hashlib.sha256(repo.encode()).hexdigest()
49
50
51 def git_cachedir(repo: Repo) -> Path:
52 # Use xdg module when it's less painful to have as a dependency
53 XDG_CACHE_HOME = Path(
54 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
55
56 return Path(os.path.join(
57 XDG_CACHE_HOME,
58 'git-cache/v1',
59 _repo_hashname(repo)))
60
61
62 def _log_filename(repo: Repo) -> Path:
63 # Use xdg module when it's less painful to have as a dependency
64 XDG_DATA_HOME = Path(
65 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
66
67 return Path(os.path.join(
68 XDG_DATA_HOME,
69 'git-cache/v1',
70 _repo_hashname(repo)))
71
72
73 def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
74 cachedir = git_cachedir(repo)
75 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
76 process = subprocess.run(['git',
77 '-C',
78 cachedir,
79 'merge-base',
80 '--is-ancestor',
81 ancestor,
82 descendant],
83 check=False)
84 return process.returncode == 0
85
86
87 def verify_ancestry(
88 repo: Repo,
89 descendant: RefOrRev,
90 ancestor: RefOrRev,
91 force: bool = False) -> None:
92 if not force and not is_ancestor(repo, descendant, ancestor):
93 raise GitCacheError(f'{ancestor} is not an ancestor of {descendant}')
94
95
96 def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
97 filename = _log_filename(repo)
98 if not os.path.exists(filename):
99 return
100 with open(filename, 'r', encoding='utf-8') as f:
101 for line in f:
102 _, _, rev, ref = line.strip().split(maxsplit=3)
103 yield _LogEntry(ref, rev)
104
105
106 def _last(it: Iterator[T]) -> Optional[T]:
107 return functools.reduce(lambda a, b: b, it, None)
108
109
110 def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
111 return _last(entry.rev for entry in _read_fetch_log(
112 repo) if entry.ref == ref)
113
114
115 def _log_fetch(repo: Repo, ref: Ref, rev: Rev, force: bool = False) -> None:
116 if not force:
117 prev_rev = _previous_fetched_rev(repo, ref)
118 if prev_rev is not None:
119 verify_ancestry(repo, rev, prev_rev)
120 filename = _log_filename(repo)
121 os.makedirs(os.path.dirname(filename), exist_ok=True)
122 with open(filename, 'a', encoding='utf-8') as f:
123 f.write(
124 f'{time.strftime("%Y-%m%d-%H:%M:%S%z")} '
125 f'{"FORCEDFETCH" if force else "fetch"} {rev} {ref}\n'
126 )
127
128
129 def _show_force_warning() -> None:
130 print('''
131 **************************************************************************
132 * WARNING: git-cache INVOKED WITH --force! *
133 * *
134 * This mode allows previously-fetched refs to be overwritten to point to *
135 * non-descendants -- commits that don't have the previous version of the *
136 * the ref in their history! *
137 * *
138 * This should only be invoked by a human operator who knows what they're *
139 * doing to correct a specific, known, problem. Care should be taken to *
140 * prevent recurrence. *
141 * *
142 * Press ^C to abort. *
143 * *
144 ''', end='', file=sys.stderr)
145 warn_time_override = os.environ.get('FORCE_WARNING_TIME', None)
146 warn_time: int
147 if warn_time_override is None:
148 warn_time = 15
149 else:
150 warn_time = int(warn_time_override)
151 print(
152 '''* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
153 * !! WARNING DISPLAY TIME OVERRIDDEN !! *
154 * !! !! *
155 * !! This message is intended to be displayed long enough for a !! *
156 * !! human operator to read it and have a chance to abort. An !! *
157 * !! override for the delay time is provided FOR THE UNIT TESTS !! *
158 * !! to avoid delaying software builds unnecessarily. This is !! *
159 * !! INTENDED FOR USE IN UNIT TESTS ONLY; THIS MESSAGE SHOULD !! *
160 * !! NEVER BE SEEN OUTSIDE BUILD LOGS! !! *
161 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
162 * *
163 ''', end='', file=sys.stderr)
164
165 for i in range(warn_time, 0, -1):
166 msg = f'* {f"Continuing in {i} seconds...":-70s} *'
167 print(msg, file=sys.stderr)
168 time.sleep(1)
169 print('*' * 74, file=sys.stderr)
170
171
172 @backoff.on_exception(
173 backoff.expo,
174 subprocess.CalledProcessError,
175 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
176 def _git_fetch(
177 cachedir: Path,
178 repo: Repo,
179 ref: Ref,
180 force: bool = False) -> None:
181 subprocess.run(['git', '-C', cachedir, 'fetch'] +
182 (['--force'] if force else []) +
183 [repo, f'{ref}:{ref}'], check=True)
184
185
186 def fetch(repo: Repo, ref: Ref, force: bool = False) -> Tuple[Path, Rev]:
187 if force:
188 _show_force_warning()
189 cachedir = git_cachedir(repo)
190 if not os.path.exists(cachedir):
191 logging.debug("Initializing git repo")
192 subprocess.run(['git',
193 '-c',
194 'init.defaultBranch=git-cache--no-default-branch',
195 'init',
196 '--bare',
197 cachedir],
198 check=True,
199 stdout=sys.stderr)
200
201 logging.debug('Fetching ref "%s" from %s', ref, repo)
202 _git_fetch(cachedir, repo, ref, force=force)
203
204 with open(os.path.join(cachedir, 'refs', 'heads', ref), encoding='utf-8') as rev_file:
205 rev = Rev(rev_file.read(999).strip())
206 verify_ancestry(repo, ref, rev, force=force)
207 _log_fetch(repo, ref, rev, force=force)
208
209 return cachedir, rev
210
211
212 def ensure_rev_available(
213 repo: Repo,
214 ref: Ref,
215 rev: Rev,
216 force: bool = False) -> Path:
217 cachedir = git_cachedir(repo)
218 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
219 return cachedir
220
221 logging.debug(
222 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
223 rev, ref)
224 fetch(repo, ref, force=force)
225 logging.debug('Verifying that fetch retrieved rev %s', rev)
226 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
227 verify_ancestry(repo, ref, rev, force=force)
228
229 return cachedir
230
231
232 def _main() -> None:
233 parser = argparse.ArgumentParser(
234 description='Cache remote git repositories locally.',
235 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
236 parser.add_argument(
237 '--force',
238 action='store_true',
239 help='Recover from a force-push in the remote repo')
240 parser.add_argument(
241 'repo',
242 metavar='Repo',
243 type=Repo,
244 help='Git repository URL')
245 parser.add_argument(
246 'ref',
247 metavar='Ref',
248 type=Ref,
249 help='Ref (branch or tag) in the git repo')
250 parser.add_argument(
251 'rev',
252 metavar='Rev',
253 type=Rev,
254 nargs='?',
255 help='Ensure that this revision is present. ' +
256 'If this revision is already present locally, no network operations are performed.')
257 args = parser.parse_args()
258
259 if args.rev is None:
260 cachedir, rev = fetch(args.repo, args.ref, force=args.force)
261 print(f'{rev} {cachedir}')
262 else:
263 print(
264 ensure_rev_available(
265 args.repo,
266 args.ref,
267 args.rev,
268 force=args.force))