]>
Commit | Line | Data |
---|---|---|
bef7ce53 SW |
1 | # It would be nice if we could share the nix git cache, but as of the |
2 | # time of writing it is transitioning from gitv2 (deprecated) to gitv3 | |
3 | # (not ready yet), and trying to straddle them both is too far into nix | |
4 | # implementation details for my comfort. So we re-implement here half of | |
5 | # nix's builtins.fetchGit. :( | |
6 | ||
d1ab0853 | 7 | import argparse |
35000f72 | 8 | import functools |
bef7ce53 SW |
9 | import hashlib |
10 | import logging | |
11 | import os | |
12 | import subprocess | |
347be7cf | 13 | import sys |
35000f72 | 14 | import time |
bef7ce53 | 15 | |
35000f72 | 16 | from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union |
bef7ce53 | 17 | |
f36d5c6f SW |
18 | import backoff |
19 | ||
35000f72 SW |
20 | Path = str # eg: "/home/user/.cache/git-cache/v1" |
21 | Repo = str # eg: "https://github.com/NixOS/nixpkgs.git" | |
22 | Ref = str # eg: "master" or "v1.0.0" | |
23 | Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d" | |
a5d42d8d | 24 | RefOrRev = Union[Ref, Rev] |
bef7ce53 SW |
25 | |
26 | ||
35000f72 SW |
27 | class _LogEntry(NamedTuple): |
28 | ref: Ref | |
29 | rev: Rev | |
30 | ||
31 | ||
32 | T = TypeVar('T') | |
33 | ||
34 | ||
35 | def _repo_hashname(repo: Repo) -> str: | |
36 | return hashlib.sha256(repo.encode()).hexdigest() | |
37 | ||
38 | ||
bef7ce53 SW |
39 | def git_cachedir(repo: Repo) -> Path: |
40 | # Use xdg module when it's less painful to have as a dependency | |
41 | XDG_CACHE_HOME = Path( | |
42 | os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))) | |
43 | ||
44 | return Path(os.path.join( | |
45 | XDG_CACHE_HOME, | |
46 | 'git-cache/v1', | |
35000f72 SW |
47 | _repo_hashname(repo))) |
48 | ||
49 | ||
50 | def _log_filename(repo: Repo) -> Path: | |
51 | # Use xdg module when it's less painful to have as a dependency | |
52 | XDG_DATA_HOME = Path( | |
53 | os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share'))) | |
54 | ||
55 | return Path(os.path.join( | |
56 | XDG_DATA_HOME, | |
57 | 'git-cache/v1', | |
58 | _repo_hashname(repo))) | |
bef7ce53 SW |
59 | |
60 | ||
a5d42d8d | 61 | def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool: |
bef7ce53 | 62 | cachedir = git_cachedir(repo) |
a5d42d8d SW |
63 | logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant) |
64 | process = subprocess.run(['git', | |
65 | '-C', | |
66 | cachedir, | |
67 | 'merge-base', | |
68 | '--is-ancestor', | |
69 | ancestor, | |
70 | descendant], | |
71 | check=False) | |
eb638847 SW |
72 | return process.returncode == 0 |
73 | ||
74 | ||
a5d42d8d SW |
75 | def verify_ancestry( |
76 | repo: Repo, | |
77 | descendant: RefOrRev, | |
78 | ancestor: RefOrRev) -> None: | |
79 | if not is_ancestor(repo, descendant, ancestor): | |
80 | raise Exception('%s is not an ancestor of %s' % (ancestor, descendant)) | |
bef7ce53 SW |
81 | |
82 | ||
35000f72 SW |
83 | def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]: |
84 | filename = _log_filename(repo) | |
85 | if not os.path.exists(filename): | |
86 | return | |
87 | with open(filename, 'r') as f: | |
88 | for line in f: | |
89 | _, _, rev, ref = line.strip().split(maxsplit=3) | |
90 | yield _LogEntry(ref, rev) | |
91 | ||
92 | ||
93 | def _last(it: Iterator[T]) -> Optional[T]: | |
94 | return functools.reduce(lambda a, b: b, it, None) | |
95 | ||
96 | ||
97 | def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]: | |
98 | return _last(entry.rev for entry in _read_fetch_log( | |
99 | repo) if entry.ref == ref) | |
100 | ||
101 | ||
102 | def _log_fetch(repo: Repo, ref: Ref, rev: Rev) -> None: | |
103 | prev_rev = _previous_fetched_rev(repo, ref) | |
104 | if prev_rev is not None: | |
105 | verify_ancestry(repo, rev, prev_rev) | |
106 | filename = _log_filename(repo) | |
107 | os.makedirs(os.path.dirname(filename), exist_ok=True) | |
108 | with open(filename, 'a') as f: | |
109 | f.write('%s fetch %s %s\n' % | |
110 | (time.strftime('%Y-%m%d-%H:%M:%S%z'), rev, ref)) | |
111 | ||
112 | ||
f36d5c6f SW |
113 | @backoff.on_exception( |
114 | backoff.expo, | |
115 | subprocess.CalledProcessError, | |
116 | max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30'))) | |
117 | def _git_fetch(cachedir: Path, repo: Repo, ref: Ref) -> None: | |
118 | # We don't use --force here because we want to abort and freak out if forced | |
119 | # updates are happening. | |
120 | subprocess.run(['git', '-C', cachedir, 'fetch', repo, | |
121 | '%s:%s' % (ref, ref)], check=True) | |
122 | ||
123 | ||
bef7ce53 SW |
124 | def fetch(repo: Repo, ref: Ref) -> Tuple[Path, Rev]: |
125 | cachedir = git_cachedir(repo) | |
126 | if not os.path.exists(cachedir): | |
127 | logging.debug("Initializing git repo") | |
513b354c SW |
128 | subprocess.run(['git', 'init', '--bare', cachedir], |
129 | check=True, stdout=sys.stderr) | |
bef7ce53 SW |
130 | |
131 | logging.debug('Fetching ref "%s" from %s', ref, repo) | |
f36d5c6f | 132 | _git_fetch(cachedir, repo, ref) |
bef7ce53 SW |
133 | |
134 | with open(os.path.join(cachedir, 'refs', 'heads', ref)) as rev_file: | |
135 | rev = Rev(rev_file.read(999).strip()) | |
136 | verify_ancestry(repo, ref, rev) | |
35000f72 | 137 | _log_fetch(repo, ref, rev) |
bef7ce53 SW |
138 | |
139 | return cachedir, rev | |
140 | ||
141 | ||
142 | def ensure_rev_available(repo: Repo, ref: Ref, rev: Rev) -> Path: | |
143 | cachedir = git_cachedir(repo) | |
eb638847 SW |
144 | if os.path.exists(cachedir) and is_ancestor(repo, ref, rev): |
145 | return cachedir | |
bef7ce53 SW |
146 | |
147 | logging.debug( | |
148 | 'We do not have rev %s. We will fetch ref "%s" and hope it appears.', | |
149 | rev, ref) | |
150 | fetch(repo, ref) | |
151 | logging.debug('Verifying that fetch retrieved rev %s', rev) | |
152 | subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True) | |
eb638847 | 153 | verify_ancestry(repo, ref, rev) |
bef7ce53 SW |
154 | |
155 | return cachedir | |
347be7cf SW |
156 | |
157 | ||
158 | def _main() -> None: | |
d1ab0853 SW |
159 | parser = argparse.ArgumentParser( |
160 | description='Cache remote git repositories locally.', | |
161 | epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master') | |
162 | parser.add_argument( | |
163 | 'repo', | |
164 | metavar='Repo', | |
165 | type=Repo, | |
166 | help='Git repository URL') | |
167 | parser.add_argument( | |
168 | 'ref', | |
169 | metavar='Ref', | |
170 | type=Ref, | |
171 | help='Ref (branch or tag) in the git repo') | |
172 | parser.add_argument( | |
173 | 'rev', | |
174 | metavar='Rev', | |
175 | type=Rev, | |
176 | nargs='?', | |
177 | help='Ensure that this revision is present. ' + | |
178 | 'If this revision is already present locally, no network operations are performed.') | |
179 | args = parser.parse_args() | |
180 | ||
181 | if args.rev is None: | |
182 | print('{1} {0}'.format(*fetch(args.repo, args.ref))) | |
347be7cf | 183 | else: |
d1ab0853 | 184 | print(ensure_rev_available(args.repo, args.ref, args.rev)) |