]> git.scottworley.com Git - git-cache/blame - git_cache.py
Specify license
[git-cache] / git_cache.py
CommitLineData
50685beb
SW
1# git-cache: Cache git content locally
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
bef7ce53
SW
8# It would be nice if we could share the nix git cache, but as of the
9# time of writing it is transitioning from gitv2 (deprecated) to gitv3
10# (not ready yet), and trying to straddle them both is too far into nix
11# implementation details for my comfort. So we re-implement here half of
12# nix's builtins.fetchGit. :(
13
d1ab0853 14import argparse
35000f72 15import functools
bef7ce53
SW
16import hashlib
17import logging
18import os
19import subprocess
347be7cf 20import sys
35000f72 21import time
bef7ce53 22
35000f72 23from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
bef7ce53 24
f36d5c6f
SW
25import backoff
26
35000f72
SW
27Path = str # eg: "/home/user/.cache/git-cache/v1"
28Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
29Ref = str # eg: "master" or "v1.0.0"
30Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
a5d42d8d 31RefOrRev = Union[Ref, Rev]
bef7ce53
SW
32
33
35000f72
SW
34class _LogEntry(NamedTuple):
35 ref: Ref
36 rev: Rev
37
38
39T = TypeVar('T')
40
41
42def _repo_hashname(repo: Repo) -> str:
43 return hashlib.sha256(repo.encode()).hexdigest()
44
45
bef7ce53
SW
46def git_cachedir(repo: Repo) -> Path:
47 # Use xdg module when it's less painful to have as a dependency
48 XDG_CACHE_HOME = Path(
49 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
50
51 return Path(os.path.join(
52 XDG_CACHE_HOME,
53 'git-cache/v1',
35000f72
SW
54 _repo_hashname(repo)))
55
56
57def _log_filename(repo: Repo) -> Path:
58 # Use xdg module when it's less painful to have as a dependency
59 XDG_DATA_HOME = Path(
60 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
61
62 return Path(os.path.join(
63 XDG_DATA_HOME,
64 'git-cache/v1',
65 _repo_hashname(repo)))
bef7ce53
SW
66
67
a5d42d8d 68def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
bef7ce53 69 cachedir = git_cachedir(repo)
a5d42d8d
SW
70 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
71 process = subprocess.run(['git',
72 '-C',
73 cachedir,
74 'merge-base',
75 '--is-ancestor',
76 ancestor,
77 descendant],
78 check=False)
eb638847
SW
79 return process.returncode == 0
80
81
a5d42d8d
SW
82def verify_ancestry(
83 repo: Repo,
84 descendant: RefOrRev,
083b90e7
SW
85 ancestor: RefOrRev,
86 force: bool = False) -> None:
87 if not force and not is_ancestor(repo, descendant, ancestor):
f580771a 88 raise Exception(f'{ancestor} is not an ancestor of {descendant}')
bef7ce53
SW
89
90
35000f72
SW
91def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
92 filename = _log_filename(repo)
93 if not os.path.exists(filename):
94 return
a28f4bb7 95 with open(filename, 'r', encoding='utf-8') as f:
35000f72
SW
96 for line in f:
97 _, _, rev, ref = line.strip().split(maxsplit=3)
98 yield _LogEntry(ref, rev)
99
100
101def _last(it: Iterator[T]) -> Optional[T]:
102 return functools.reduce(lambda a, b: b, it, None)
103
104
105def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
106 return _last(entry.rev for entry in _read_fetch_log(
107 repo) if entry.ref == ref)
108
109
083b90e7
SW
110def _log_fetch(repo: Repo, ref: Ref, rev: Rev, force: bool = False) -> None:
111 if not force:
112 prev_rev = _previous_fetched_rev(repo, ref)
113 if prev_rev is not None:
114 verify_ancestry(repo, rev, prev_rev)
35000f72
SW
115 filename = _log_filename(repo)
116 os.makedirs(os.path.dirname(filename), exist_ok=True)
a28f4bb7 117 with open(filename, 'a', encoding='utf-8') as f:
f580771a
SW
118 f.write(
119 f'{time.strftime("%Y-%m%d-%H:%M:%S%z")} '
120 f'{"FORCEDFETCH" if force else "fetch"} {rev} {ref}\n'
121 )
083b90e7
SW
122
123
124def _show_force_warning() -> None:
125 print('''
126**************************************************************************
127* WARNING: git-cache INVOKED WITH --force! *
128* *
129* This mode allows previously-fetched refs to be overwritten to point to *
130* non-descendants -- commits that don't have the previous version of the *
131* the ref in their history! *
132* *
133* This should only be invoked by a human operator who knows what they're *
134* doing to correct a specific, known, problem. Care should be taken to *
135* prevent recurrence. *
136* *
137* Press ^C to abort. *
138* *
139''', end='', file=sys.stderr)
140 warn_time_override = os.environ.get('FORCE_WARNING_TIME', None)
141 warn_time: int
142 if warn_time_override is None:
143 warn_time = 15
144 else:
145 warn_time = int(warn_time_override)
146 print(
147 '''* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
148* !! WARNING DISPLAY TIME OVERRIDDEN !! *
149* !! !! *
150* !! This message is intended to be displayed long enough for a !! *
151* !! human operator to read it and have a chance to abort. An !! *
152* !! override for the delay time is provided FOR THE UNIT TESTS !! *
153* !! to avoid delaying software builds unnecessarily. This is !! *
154* !! INTENDED FOR USE IN UNIT TESTS ONLY; THIS MESSAGE SHOULD !! *
155* !! NEVER BE SEEN OUTSIDE BUILD LOGS! !! *
156* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
157* *
158''', end='', file=sys.stderr)
159
160 for i in range(warn_time, 0, -1):
f580771a 161 msg = f'* {f"Continuing in {i} seconds...":-70s} *'
083b90e7
SW
162 print(msg, file=sys.stderr)
163 time.sleep(1)
164 print('*' * 74, file=sys.stderr)
165
35000f72 166
f36d5c6f
SW
167@backoff.on_exception(
168 backoff.expo,
169 subprocess.CalledProcessError,
170 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
083b90e7
SW
171def _git_fetch(
172 cachedir: Path,
173 repo: Repo,
174 ref: Ref,
175 force: bool = False) -> None:
176 subprocess.run(['git', '-C', cachedir, 'fetch'] +
177 (['--force'] if force else []) +
f580771a 178 [repo, f'{ref}:{ref}'], check=True)
f36d5c6f
SW
179
180
083b90e7
SW
181def fetch(repo: Repo, ref: Ref, force: bool = False) -> Tuple[Path, Rev]:
182 if force:
183 _show_force_warning()
bef7ce53
SW
184 cachedir = git_cachedir(repo)
185 if not os.path.exists(cachedir):
186 logging.debug("Initializing git repo")
21971f7f
SW
187 subprocess.run(['git',
188 '-c',
189 'init.defaultBranch=git-cache--no-default-branch',
190 'init',
191 '--bare',
192 cachedir],
193 check=True,
194 stdout=sys.stderr)
bef7ce53
SW
195
196 logging.debug('Fetching ref "%s" from %s', ref, repo)
083b90e7 197 _git_fetch(cachedir, repo, ref, force=force)
bef7ce53 198
a28f4bb7 199 with open(os.path.join(cachedir, 'refs', 'heads', ref), encoding='utf-8') as rev_file:
bef7ce53 200 rev = Rev(rev_file.read(999).strip())
083b90e7
SW
201 verify_ancestry(repo, ref, rev, force=force)
202 _log_fetch(repo, ref, rev, force=force)
bef7ce53
SW
203
204 return cachedir, rev
205
206
083b90e7
SW
207def ensure_rev_available(
208 repo: Repo,
209 ref: Ref,
210 rev: Rev,
211 force: bool = False) -> Path:
bef7ce53 212 cachedir = git_cachedir(repo)
eb638847
SW
213 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
214 return cachedir
bef7ce53
SW
215
216 logging.debug(
217 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
218 rev, ref)
083b90e7 219 fetch(repo, ref, force=force)
bef7ce53
SW
220 logging.debug('Verifying that fetch retrieved rev %s', rev)
221 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
083b90e7 222 verify_ancestry(repo, ref, rev, force=force)
bef7ce53
SW
223
224 return cachedir
347be7cf
SW
225
226
227def _main() -> None:
d1ab0853
SW
228 parser = argparse.ArgumentParser(
229 description='Cache remote git repositories locally.',
230 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
083b90e7
SW
231 parser.add_argument(
232 '--force',
233 action='store_true',
234 help='Recover from a force-push in the remote repo')
d1ab0853
SW
235 parser.add_argument(
236 'repo',
237 metavar='Repo',
238 type=Repo,
239 help='Git repository URL')
240 parser.add_argument(
241 'ref',
242 metavar='Ref',
243 type=Ref,
244 help='Ref (branch or tag) in the git repo')
245 parser.add_argument(
246 'rev',
247 metavar='Rev',
248 type=Rev,
249 nargs='?',
250 help='Ensure that this revision is present. ' +
251 'If this revision is already present locally, no network operations are performed.')
252 args = parser.parse_args()
253
254 if args.rev is None:
f580771a
SW
255 cachedir, rev = fetch(args.repo, args.ref, force=args.force)
256 print(f'{rev} {cachedir}')
347be7cf 257 else:
083b90e7
SW
258 print(
259 ensure_rev_available(
260 args.repo,
261 args.ref,
262 args.rev,
263 force=args.force))