]> git.scottworley.com Git - git-cache/blob - git_cache.py
1ed26017ceb18f0831d8b5566068c6eb11fdcf7a
[git-cache] / git_cache.py
1 # It would be nice if we could share the nix git cache, but as of the
2 # time of writing it is transitioning from gitv2 (deprecated) to gitv3
3 # (not ready yet), and trying to straddle them both is too far into nix
4 # implementation details for my comfort. So we re-implement here half of
5 # nix's builtins.fetchGit. :(
6
7 import argparse
8 import functools
9 import hashlib
10 import logging
11 import os
12 import subprocess
13 import sys
14 import time
15
16 from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
17
18 import backoff
19
20 Path = str # eg: "/home/user/.cache/git-cache/v1"
21 Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
22 Ref = str # eg: "master" or "v1.0.0"
23 Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
24 RefOrRev = Union[Ref, Rev]
25
26
27 class _LogEntry(NamedTuple):
28 ref: Ref
29 rev: Rev
30
31
32 T = TypeVar('T')
33
34
35 def _repo_hashname(repo: Repo) -> str:
36 return hashlib.sha256(repo.encode()).hexdigest()
37
38
39 def git_cachedir(repo: Repo) -> Path:
40 # Use xdg module when it's less painful to have as a dependency
41 XDG_CACHE_HOME = Path(
42 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
43
44 return Path(os.path.join(
45 XDG_CACHE_HOME,
46 'git-cache/v1',
47 _repo_hashname(repo)))
48
49
50 def _log_filename(repo: Repo) -> Path:
51 # Use xdg module when it's less painful to have as a dependency
52 XDG_DATA_HOME = Path(
53 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
54
55 return Path(os.path.join(
56 XDG_DATA_HOME,
57 'git-cache/v1',
58 _repo_hashname(repo)))
59
60
61 def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
62 cachedir = git_cachedir(repo)
63 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
64 process = subprocess.run(['git',
65 '-C',
66 cachedir,
67 'merge-base',
68 '--is-ancestor',
69 ancestor,
70 descendant],
71 check=False)
72 return process.returncode == 0
73
74
75 def verify_ancestry(
76 repo: Repo,
77 descendant: RefOrRev,
78 ancestor: RefOrRev,
79 force: bool = False) -> None:
80 if not force and not is_ancestor(repo, descendant, ancestor):
81 raise Exception('%s is not an ancestor of %s' % (ancestor, descendant))
82
83
84 def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
85 filename = _log_filename(repo)
86 if not os.path.exists(filename):
87 return
88 with open(filename, 'r') as f:
89 for line in f:
90 _, _, rev, ref = line.strip().split(maxsplit=3)
91 yield _LogEntry(ref, rev)
92
93
94 def _last(it: Iterator[T]) -> Optional[T]:
95 return functools.reduce(lambda a, b: b, it, None)
96
97
98 def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
99 return _last(entry.rev for entry in _read_fetch_log(
100 repo) if entry.ref == ref)
101
102
103 def _log_fetch(repo: Repo, ref: Ref, rev: Rev, force: bool = False) -> None:
104 if not force:
105 prev_rev = _previous_fetched_rev(repo, ref)
106 if prev_rev is not None:
107 verify_ancestry(repo, rev, prev_rev)
108 filename = _log_filename(repo)
109 os.makedirs(os.path.dirname(filename), exist_ok=True)
110 with open(filename, 'a') as f:
111 f.write('%s %s %s %s\n' %
112 (time.strftime('%Y-%m%d-%H:%M:%S%z'),
113 ('FORCEDFETCH' if force else 'fetch'), rev, ref))
114
115
116 def _show_force_warning() -> None:
117 print('''
118 **************************************************************************
119 * WARNING: git-cache INVOKED WITH --force! *
120 * *
121 * This mode allows previously-fetched refs to be overwritten to point to *
122 * non-descendants -- commits that don't have the previous version of the *
123 * the ref in their history! *
124 * *
125 * This should only be invoked by a human operator who knows what they're *
126 * doing to correct a specific, known, problem. Care should be taken to *
127 * prevent recurrence. *
128 * *
129 * Press ^C to abort. *
130 * *
131 ''', end='', file=sys.stderr)
132 warn_time_override = os.environ.get('FORCE_WARNING_TIME', None)
133 warn_time: int
134 if warn_time_override is None:
135 warn_time = 15
136 else:
137 warn_time = int(warn_time_override)
138 print(
139 '''* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
140 * !! WARNING DISPLAY TIME OVERRIDDEN !! *
141 * !! !! *
142 * !! This message is intended to be displayed long enough for a !! *
143 * !! human operator to read it and have a chance to abort. An !! *
144 * !! override for the delay time is provided FOR THE UNIT TESTS !! *
145 * !! to avoid delaying software builds unnecessarily. This is !! *
146 * !! INTENDED FOR USE IN UNIT TESTS ONLY; THIS MESSAGE SHOULD !! *
147 * !! NEVER BE SEEN OUTSIDE BUILD LOGS! !! *
148 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
149 * *
150 ''', end='', file=sys.stderr)
151
152 for i in range(warn_time, 0, -1):
153 msg = '* %-70s *' % ("Continuing in %d seconds..." % i)
154 print(msg, file=sys.stderr)
155 time.sleep(1)
156 print('*' * 74, file=sys.stderr)
157
158
159 @backoff.on_exception(
160 backoff.expo,
161 subprocess.CalledProcessError,
162 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
163 def _git_fetch(
164 cachedir: Path,
165 repo: Repo,
166 ref: Ref,
167 force: bool = False) -> None:
168 subprocess.run(['git', '-C', cachedir, 'fetch'] +
169 (['--force'] if force else []) +
170 [repo, '%s:%s' % (ref, ref)], check=True)
171
172
173 def fetch(repo: Repo, ref: Ref, force: bool = False) -> Tuple[Path, Rev]:
174 if force:
175 _show_force_warning()
176 cachedir = git_cachedir(repo)
177 if not os.path.exists(cachedir):
178 logging.debug("Initializing git repo")
179 subprocess.run(['git',
180 '-c',
181 'init.defaultBranch=git-cache--no-default-branch',
182 'init',
183 '--bare',
184 cachedir],
185 check=True,
186 stdout=sys.stderr)
187
188 logging.debug('Fetching ref "%s" from %s', ref, repo)
189 _git_fetch(cachedir, repo, ref, force=force)
190
191 with open(os.path.join(cachedir, 'refs', 'heads', ref)) as rev_file:
192 rev = Rev(rev_file.read(999).strip())
193 verify_ancestry(repo, ref, rev, force=force)
194 _log_fetch(repo, ref, rev, force=force)
195
196 return cachedir, rev
197
198
199 def ensure_rev_available(
200 repo: Repo,
201 ref: Ref,
202 rev: Rev,
203 force: bool = False) -> Path:
204 cachedir = git_cachedir(repo)
205 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
206 return cachedir
207
208 logging.debug(
209 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
210 rev, ref)
211 fetch(repo, ref, force=force)
212 logging.debug('Verifying that fetch retrieved rev %s', rev)
213 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
214 verify_ancestry(repo, ref, rev, force=force)
215
216 return cachedir
217
218
219 def _main() -> None:
220 parser = argparse.ArgumentParser(
221 description='Cache remote git repositories locally.',
222 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
223 parser.add_argument(
224 '--force',
225 action='store_true',
226 help='Recover from a force-push in the remote repo')
227 parser.add_argument(
228 'repo',
229 metavar='Repo',
230 type=Repo,
231 help='Git repository URL')
232 parser.add_argument(
233 'ref',
234 metavar='Ref',
235 type=Ref,
236 help='Ref (branch or tag) in the git repo')
237 parser.add_argument(
238 'rev',
239 metavar='Rev',
240 type=Rev,
241 nargs='?',
242 help='Ensure that this revision is present. ' +
243 'If this revision is already present locally, no network operations are performed.')
244 args = parser.parse_args()
245
246 if args.rev is None:
247 print('{1} {0}'.format(*fetch(args.repo, args.ref, force=args.force)))
248 else:
249 print(
250 ensure_rev_available(
251 args.repo,
252 args.ref,
253 args.rev,
254 force=args.force))