]>
Commit | Line | Data |
---|---|---|
50685beb SW |
1 | # git-cache: Cache git content locally |
2 | # | |
3 | # This program is free software: you can redistribute it and/or modify it | |
4 | # under the terms of the GNU General Public License as published by the | |
5 | # Free Software Foundation, version 3. | |
6 | ||
7 | ||
bef7ce53 SW |
8 | # It would be nice if we could share the nix git cache, but as of the |
9 | # time of writing it is transitioning from gitv2 (deprecated) to gitv3 | |
10 | # (not ready yet), and trying to straddle them both is too far into nix | |
11 | # implementation details for my comfort. So we re-implement here half of | |
12 | # nix's builtins.fetchGit. :( | |
13 | ||
d1ab0853 | 14 | import argparse |
35000f72 | 15 | import functools |
bef7ce53 SW |
16 | import hashlib |
17 | import logging | |
18 | import os | |
19 | import subprocess | |
347be7cf | 20 | import sys |
35000f72 | 21 | import time |
bef7ce53 | 22 | |
35000f72 | 23 | from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union |
bef7ce53 | 24 | |
f36d5c6f SW |
25 | import backoff |
26 | ||
a3835f50 SW |
27 | |
28 | class GitCacheError(Exception): | |
29 | pass | |
30 | ||
31 | ||
35000f72 SW |
32 | Path = str # eg: "/home/user/.cache/git-cache/v1" |
33 | Repo = str # eg: "https://github.com/NixOS/nixpkgs.git" | |
34 | Ref = str # eg: "master" or "v1.0.0" | |
35 | Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d" | |
a5d42d8d | 36 | RefOrRev = Union[Ref, Rev] |
bef7ce53 SW |
37 | |
38 | ||
35000f72 SW |
39 | class _LogEntry(NamedTuple): |
40 | ref: Ref | |
41 | rev: Rev | |
42 | ||
43 | ||
44 | T = TypeVar('T') | |
45 | ||
46 | ||
47 | def _repo_hashname(repo: Repo) -> str: | |
48 | return hashlib.sha256(repo.encode()).hexdigest() | |
49 | ||
50 | ||
bef7ce53 SW |
51 | def git_cachedir(repo: Repo) -> Path: |
52 | # Use xdg module when it's less painful to have as a dependency | |
53 | XDG_CACHE_HOME = Path( | |
54 | os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))) | |
55 | ||
56 | return Path(os.path.join( | |
57 | XDG_CACHE_HOME, | |
58 | 'git-cache/v1', | |
35000f72 SW |
59 | _repo_hashname(repo))) |
60 | ||
61 | ||
62 | def _log_filename(repo: Repo) -> Path: | |
63 | # Use xdg module when it's less painful to have as a dependency | |
64 | XDG_DATA_HOME = Path( | |
65 | os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share'))) | |
66 | ||
67 | return Path(os.path.join( | |
68 | XDG_DATA_HOME, | |
69 | 'git-cache/v1', | |
70 | _repo_hashname(repo))) | |
bef7ce53 SW |
71 | |
72 | ||
a5d42d8d | 73 | def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool: |
bef7ce53 | 74 | cachedir = git_cachedir(repo) |
a5d42d8d SW |
75 | logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant) |
76 | process = subprocess.run(['git', | |
77 | '-C', | |
78 | cachedir, | |
79 | 'merge-base', | |
80 | '--is-ancestor', | |
81 | ancestor, | |
82 | descendant], | |
83 | check=False) | |
eb638847 SW |
84 | return process.returncode == 0 |
85 | ||
86 | ||
a5d42d8d SW |
87 | def verify_ancestry( |
88 | repo: Repo, | |
89 | descendant: RefOrRev, | |
083b90e7 SW |
90 | ancestor: RefOrRev, |
91 | force: bool = False) -> None: | |
92 | if not force and not is_ancestor(repo, descendant, ancestor): | |
a3835f50 | 93 | raise GitCacheError(f'{ancestor} is not an ancestor of {descendant}') |
bef7ce53 SW |
94 | |
95 | ||
35000f72 SW |
96 | def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]: |
97 | filename = _log_filename(repo) | |
98 | if not os.path.exists(filename): | |
99 | return | |
a28f4bb7 | 100 | with open(filename, 'r', encoding='utf-8') as f: |
35000f72 SW |
101 | for line in f: |
102 | _, _, rev, ref = line.strip().split(maxsplit=3) | |
103 | yield _LogEntry(ref, rev) | |
104 | ||
105 | ||
106 | def _last(it: Iterator[T]) -> Optional[T]: | |
107 | return functools.reduce(lambda a, b: b, it, None) | |
108 | ||
109 | ||
110 | def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]: | |
111 | return _last(entry.rev for entry in _read_fetch_log( | |
112 | repo) if entry.ref == ref) | |
113 | ||
114 | ||
083b90e7 SW |
115 | def _log_fetch(repo: Repo, ref: Ref, rev: Rev, force: bool = False) -> None: |
116 | if not force: | |
117 | prev_rev = _previous_fetched_rev(repo, ref) | |
118 | if prev_rev is not None: | |
119 | verify_ancestry(repo, rev, prev_rev) | |
35000f72 SW |
120 | filename = _log_filename(repo) |
121 | os.makedirs(os.path.dirname(filename), exist_ok=True) | |
a28f4bb7 | 122 | with open(filename, 'a', encoding='utf-8') as f: |
f580771a SW |
123 | f.write( |
124 | f'{time.strftime("%Y-%m%d-%H:%M:%S%z")} ' | |
125 | f'{"FORCEDFETCH" if force else "fetch"} {rev} {ref}\n' | |
126 | ) | |
083b90e7 SW |
127 | |
128 | ||
129 | def _show_force_warning() -> None: | |
130 | print(''' | |
131 | ************************************************************************** | |
132 | * WARNING: git-cache INVOKED WITH --force! * | |
133 | * * | |
134 | * This mode allows previously-fetched refs to be overwritten to point to * | |
135 | * non-descendants -- commits that don't have the previous version of the * | |
136 | * the ref in their history! * | |
137 | * * | |
138 | * This should only be invoked by a human operator who knows what they're * | |
139 | * doing to correct a specific, known, problem. Care should be taken to * | |
140 | * prevent recurrence. * | |
141 | * * | |
142 | * Press ^C to abort. * | |
143 | * * | |
144 | ''', end='', file=sys.stderr) | |
145 | warn_time_override = os.environ.get('FORCE_WARNING_TIME', None) | |
146 | warn_time: int | |
147 | if warn_time_override is None: | |
148 | warn_time = 15 | |
149 | else: | |
150 | warn_time = int(warn_time_override) | |
151 | print( | |
152 | '''* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! * | |
153 | * !! WARNING DISPLAY TIME OVERRIDDEN !! * | |
154 | * !! !! * | |
155 | * !! This message is intended to be displayed long enough for a !! * | |
156 | * !! human operator to read it and have a chance to abort. An !! * | |
157 | * !! override for the delay time is provided FOR THE UNIT TESTS !! * | |
158 | * !! to avoid delaying software builds unnecessarily. This is !! * | |
159 | * !! INTENDED FOR USE IN UNIT TESTS ONLY; THIS MESSAGE SHOULD !! * | |
160 | * !! NEVER BE SEEN OUTSIDE BUILD LOGS! !! * | |
161 | * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! * | |
162 | * * | |
163 | ''', end='', file=sys.stderr) | |
164 | ||
165 | for i in range(warn_time, 0, -1): | |
f580771a | 166 | msg = f'* {f"Continuing in {i} seconds...":-70s} *' |
083b90e7 SW |
167 | print(msg, file=sys.stderr) |
168 | time.sleep(1) | |
169 | print('*' * 74, file=sys.stderr) | |
170 | ||
35000f72 | 171 | |
f36d5c6f SW |
172 | @backoff.on_exception( |
173 | backoff.expo, | |
174 | subprocess.CalledProcessError, | |
175 | max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30'))) | |
083b90e7 SW |
176 | def _git_fetch( |
177 | cachedir: Path, | |
178 | repo: Repo, | |
179 | ref: Ref, | |
180 | force: bool = False) -> None: | |
181 | subprocess.run(['git', '-C', cachedir, 'fetch'] + | |
182 | (['--force'] if force else []) + | |
f580771a | 183 | [repo, f'{ref}:{ref}'], check=True) |
f36d5c6f SW |
184 | |
185 | ||
083b90e7 SW |
186 | def fetch(repo: Repo, ref: Ref, force: bool = False) -> Tuple[Path, Rev]: |
187 | if force: | |
188 | _show_force_warning() | |
bef7ce53 SW |
189 | cachedir = git_cachedir(repo) |
190 | if not os.path.exists(cachedir): | |
191 | logging.debug("Initializing git repo") | |
21971f7f SW |
192 | subprocess.run(['git', |
193 | '-c', | |
194 | 'init.defaultBranch=git-cache--no-default-branch', | |
195 | 'init', | |
196 | '--bare', | |
197 | cachedir], | |
198 | check=True, | |
199 | stdout=sys.stderr) | |
bef7ce53 SW |
200 | |
201 | logging.debug('Fetching ref "%s" from %s', ref, repo) | |
083b90e7 | 202 | _git_fetch(cachedir, repo, ref, force=force) |
bef7ce53 | 203 | |
a28f4bb7 | 204 | with open(os.path.join(cachedir, 'refs', 'heads', ref), encoding='utf-8') as rev_file: |
bef7ce53 | 205 | rev = Rev(rev_file.read(999).strip()) |
083b90e7 SW |
206 | verify_ancestry(repo, ref, rev, force=force) |
207 | _log_fetch(repo, ref, rev, force=force) | |
bef7ce53 SW |
208 | |
209 | return cachedir, rev | |
210 | ||
211 | ||
083b90e7 SW |
212 | def ensure_rev_available( |
213 | repo: Repo, | |
214 | ref: Ref, | |
215 | rev: Rev, | |
216 | force: bool = False) -> Path: | |
bef7ce53 | 217 | cachedir = git_cachedir(repo) |
eb638847 SW |
218 | if os.path.exists(cachedir) and is_ancestor(repo, ref, rev): |
219 | return cachedir | |
bef7ce53 SW |
220 | |
221 | logging.debug( | |
222 | 'We do not have rev %s. We will fetch ref "%s" and hope it appears.', | |
223 | rev, ref) | |
083b90e7 | 224 | fetch(repo, ref, force=force) |
bef7ce53 SW |
225 | logging.debug('Verifying that fetch retrieved rev %s', rev) |
226 | subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True) | |
083b90e7 | 227 | verify_ancestry(repo, ref, rev, force=force) |
bef7ce53 SW |
228 | |
229 | return cachedir | |
347be7cf SW |
230 | |
231 | ||
232 | def _main() -> None: | |
d1ab0853 SW |
233 | parser = argparse.ArgumentParser( |
234 | description='Cache remote git repositories locally.', | |
235 | epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master') | |
083b90e7 SW |
236 | parser.add_argument( |
237 | '--force', | |
238 | action='store_true', | |
239 | help='Recover from a force-push in the remote repo') | |
d1ab0853 SW |
240 | parser.add_argument( |
241 | 'repo', | |
242 | metavar='Repo', | |
243 | type=Repo, | |
244 | help='Git repository URL') | |
245 | parser.add_argument( | |
246 | 'ref', | |
247 | metavar='Ref', | |
248 | type=Ref, | |
249 | help='Ref (branch or tag) in the git repo') | |
250 | parser.add_argument( | |
251 | 'rev', | |
252 | metavar='Rev', | |
253 | type=Rev, | |
254 | nargs='?', | |
255 | help='Ensure that this revision is present. ' + | |
256 | 'If this revision is already present locally, no network operations are performed.') | |
257 | args = parser.parse_args() | |
258 | ||
259 | if args.rev is None: | |
f580771a SW |
260 | cachedir, rev = fetch(args.repo, args.ref, force=args.force) |
261 | print(f'{rev} {cachedir}') | |
347be7cf | 262 | else: |
083b90e7 SW |
263 | print( |
264 | ensure_rev_available( | |
265 | args.repo, | |
266 | args.ref, | |
267 | args.rev, | |
268 | force=args.force)) |