]> git.scottworley.com Git - git-cache/blob - git_cache.py
Specify license
[git-cache] / git_cache.py
1 # git-cache: Cache git content locally
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 # It would be nice if we could share the nix git cache, but as of the
9 # time of writing it is transitioning from gitv2 (deprecated) to gitv3
10 # (not ready yet), and trying to straddle them both is too far into nix
11 # implementation details for my comfort. So we re-implement here half of
12 # nix's builtins.fetchGit. :(
13
14 import argparse
15 import functools
16 import hashlib
17 import logging
18 import os
19 import subprocess
20 import sys
21 import time
22
23 from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
24
25 import backoff
26
27 Path = str # eg: "/home/user/.cache/git-cache/v1"
28 Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
29 Ref = str # eg: "master" or "v1.0.0"
30 Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
31 RefOrRev = Union[Ref, Rev]
32
33
34 class _LogEntry(NamedTuple):
35 ref: Ref
36 rev: Rev
37
38
39 T = TypeVar('T')
40
41
42 def _repo_hashname(repo: Repo) -> str:
43 return hashlib.sha256(repo.encode()).hexdigest()
44
45
46 def git_cachedir(repo: Repo) -> Path:
47 # Use xdg module when it's less painful to have as a dependency
48 XDG_CACHE_HOME = Path(
49 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
50
51 return Path(os.path.join(
52 XDG_CACHE_HOME,
53 'git-cache/v1',
54 _repo_hashname(repo)))
55
56
57 def _log_filename(repo: Repo) -> Path:
58 # Use xdg module when it's less painful to have as a dependency
59 XDG_DATA_HOME = Path(
60 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
61
62 return Path(os.path.join(
63 XDG_DATA_HOME,
64 'git-cache/v1',
65 _repo_hashname(repo)))
66
67
68 def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
69 cachedir = git_cachedir(repo)
70 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
71 process = subprocess.run(['git',
72 '-C',
73 cachedir,
74 'merge-base',
75 '--is-ancestor',
76 ancestor,
77 descendant],
78 check=False)
79 return process.returncode == 0
80
81
82 def verify_ancestry(
83 repo: Repo,
84 descendant: RefOrRev,
85 ancestor: RefOrRev,
86 force: bool = False) -> None:
87 if not force and not is_ancestor(repo, descendant, ancestor):
88 raise Exception(f'{ancestor} is not an ancestor of {descendant}')
89
90
91 def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
92 filename = _log_filename(repo)
93 if not os.path.exists(filename):
94 return
95 with open(filename, 'r', encoding='utf-8') as f:
96 for line in f:
97 _, _, rev, ref = line.strip().split(maxsplit=3)
98 yield _LogEntry(ref, rev)
99
100
101 def _last(it: Iterator[T]) -> Optional[T]:
102 return functools.reduce(lambda a, b: b, it, None)
103
104
105 def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
106 return _last(entry.rev for entry in _read_fetch_log(
107 repo) if entry.ref == ref)
108
109
110 def _log_fetch(repo: Repo, ref: Ref, rev: Rev, force: bool = False) -> None:
111 if not force:
112 prev_rev = _previous_fetched_rev(repo, ref)
113 if prev_rev is not None:
114 verify_ancestry(repo, rev, prev_rev)
115 filename = _log_filename(repo)
116 os.makedirs(os.path.dirname(filename), exist_ok=True)
117 with open(filename, 'a', encoding='utf-8') as f:
118 f.write(
119 f'{time.strftime("%Y-%m%d-%H:%M:%S%z")} '
120 f'{"FORCEDFETCH" if force else "fetch"} {rev} {ref}\n'
121 )
122
123
124 def _show_force_warning() -> None:
125 print('''
126 **************************************************************************
127 * WARNING: git-cache INVOKED WITH --force! *
128 * *
129 * This mode allows previously-fetched refs to be overwritten to point to *
130 * non-descendants -- commits that don't have the previous version of the *
131 * the ref in their history! *
132 * *
133 * This should only be invoked by a human operator who knows what they're *
134 * doing to correct a specific, known, problem. Care should be taken to *
135 * prevent recurrence. *
136 * *
137 * Press ^C to abort. *
138 * *
139 ''', end='', file=sys.stderr)
140 warn_time_override = os.environ.get('FORCE_WARNING_TIME', None)
141 warn_time: int
142 if warn_time_override is None:
143 warn_time = 15
144 else:
145 warn_time = int(warn_time_override)
146 print(
147 '''* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
148 * !! WARNING DISPLAY TIME OVERRIDDEN !! *
149 * !! !! *
150 * !! This message is intended to be displayed long enough for a !! *
151 * !! human operator to read it and have a chance to abort. An !! *
152 * !! override for the delay time is provided FOR THE UNIT TESTS !! *
153 * !! to avoid delaying software builds unnecessarily. This is !! *
154 * !! INTENDED FOR USE IN UNIT TESTS ONLY; THIS MESSAGE SHOULD !! *
155 * !! NEVER BE SEEN OUTSIDE BUILD LOGS! !! *
156 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
157 * *
158 ''', end='', file=sys.stderr)
159
160 for i in range(warn_time, 0, -1):
161 msg = f'* {f"Continuing in {i} seconds...":-70s} *'
162 print(msg, file=sys.stderr)
163 time.sleep(1)
164 print('*' * 74, file=sys.stderr)
165
166
167 @backoff.on_exception(
168 backoff.expo,
169 subprocess.CalledProcessError,
170 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
171 def _git_fetch(
172 cachedir: Path,
173 repo: Repo,
174 ref: Ref,
175 force: bool = False) -> None:
176 subprocess.run(['git', '-C', cachedir, 'fetch'] +
177 (['--force'] if force else []) +
178 [repo, f'{ref}:{ref}'], check=True)
179
180
181 def fetch(repo: Repo, ref: Ref, force: bool = False) -> Tuple[Path, Rev]:
182 if force:
183 _show_force_warning()
184 cachedir = git_cachedir(repo)
185 if not os.path.exists(cachedir):
186 logging.debug("Initializing git repo")
187 subprocess.run(['git',
188 '-c',
189 'init.defaultBranch=git-cache--no-default-branch',
190 'init',
191 '--bare',
192 cachedir],
193 check=True,
194 stdout=sys.stderr)
195
196 logging.debug('Fetching ref "%s" from %s', ref, repo)
197 _git_fetch(cachedir, repo, ref, force=force)
198
199 with open(os.path.join(cachedir, 'refs', 'heads', ref), encoding='utf-8') as rev_file:
200 rev = Rev(rev_file.read(999).strip())
201 verify_ancestry(repo, ref, rev, force=force)
202 _log_fetch(repo, ref, rev, force=force)
203
204 return cachedir, rev
205
206
207 def ensure_rev_available(
208 repo: Repo,
209 ref: Ref,
210 rev: Rev,
211 force: bool = False) -> Path:
212 cachedir = git_cachedir(repo)
213 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
214 return cachedir
215
216 logging.debug(
217 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
218 rev, ref)
219 fetch(repo, ref, force=force)
220 logging.debug('Verifying that fetch retrieved rev %s', rev)
221 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
222 verify_ancestry(repo, ref, rev, force=force)
223
224 return cachedir
225
226
227 def _main() -> None:
228 parser = argparse.ArgumentParser(
229 description='Cache remote git repositories locally.',
230 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
231 parser.add_argument(
232 '--force',
233 action='store_true',
234 help='Recover from a force-push in the remote repo')
235 parser.add_argument(
236 'repo',
237 metavar='Repo',
238 type=Repo,
239 help='Git repository URL')
240 parser.add_argument(
241 'ref',
242 metavar='Ref',
243 type=Ref,
244 help='Ref (branch or tag) in the git repo')
245 parser.add_argument(
246 'rev',
247 metavar='Rev',
248 type=Rev,
249 nargs='?',
250 help='Ensure that this revision is present. ' +
251 'If this revision is already present locally, no network operations are performed.')
252 args = parser.parse_args()
253
254 if args.rev is None:
255 cachedir, rev = fetch(args.repo, args.ref, force=args.force)
256 print(f'{rev} {cachedir}')
257 else:
258 print(
259 ensure_rev_available(
260 args.repo,
261 args.ref,
262 args.rev,
263 force=args.force))