]> git.scottworley.com Git - git-cache/blob - git_cache.py
d43df2624fd8e9d9bc92536a4617c21eeb69d249
[git-cache] / git_cache.py
1 # It would be nice if we could share the nix git cache, but as of the
2 # time of writing it is transitioning from gitv2 (deprecated) to gitv3
3 # (not ready yet), and trying to straddle them both is too far into nix
4 # implementation details for my comfort. So we re-implement here half of
5 # nix's builtins.fetchGit. :(
6
7 import argparse
8 import functools
9 import hashlib
10 import logging
11 import os
12 import subprocess
13 import sys
14 import time
15
16 from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
17
18 import backoff
19
20 Path = str # eg: "/home/user/.cache/git-cache/v1"
21 Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
22 Ref = str # eg: "master" or "v1.0.0"
23 Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
24 RefOrRev = Union[Ref, Rev]
25
26
27 class _LogEntry(NamedTuple):
28 ref: Ref
29 rev: Rev
30
31
32 T = TypeVar('T')
33
34
35 def _repo_hashname(repo: Repo) -> str:
36 return hashlib.sha256(repo.encode()).hexdigest()
37
38
39 def git_cachedir(repo: Repo) -> Path:
40 # Use xdg module when it's less painful to have as a dependency
41 XDG_CACHE_HOME = Path(
42 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
43
44 return Path(os.path.join(
45 XDG_CACHE_HOME,
46 'git-cache/v1',
47 _repo_hashname(repo)))
48
49
50 def _log_filename(repo: Repo) -> Path:
51 # Use xdg module when it's less painful to have as a dependency
52 XDG_DATA_HOME = Path(
53 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
54
55 return Path(os.path.join(
56 XDG_DATA_HOME,
57 'git-cache/v1',
58 _repo_hashname(repo)))
59
60
61 def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
62 cachedir = git_cachedir(repo)
63 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
64 process = subprocess.run(['git',
65 '-C',
66 cachedir,
67 'merge-base',
68 '--is-ancestor',
69 ancestor,
70 descendant],
71 check=False)
72 return process.returncode == 0
73
74
75 def verify_ancestry(
76 repo: Repo,
77 descendant: RefOrRev,
78 ancestor: RefOrRev) -> None:
79 if not is_ancestor(repo, descendant, ancestor):
80 raise Exception('%s is not an ancestor of %s' % (ancestor, descendant))
81
82
83 def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
84 filename = _log_filename(repo)
85 if not os.path.exists(filename):
86 return
87 with open(filename, 'r') as f:
88 for line in f:
89 _, _, rev, ref = line.strip().split(maxsplit=3)
90 yield _LogEntry(ref, rev)
91
92
93 def _last(it: Iterator[T]) -> Optional[T]:
94 return functools.reduce(lambda a, b: b, it, None)
95
96
97 def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
98 return _last(entry.rev for entry in _read_fetch_log(
99 repo) if entry.ref == ref)
100
101
102 def _log_fetch(repo: Repo, ref: Ref, rev: Rev) -> None:
103 prev_rev = _previous_fetched_rev(repo, ref)
104 if prev_rev is not None:
105 verify_ancestry(repo, rev, prev_rev)
106 filename = _log_filename(repo)
107 os.makedirs(os.path.dirname(filename), exist_ok=True)
108 with open(filename, 'a') as f:
109 f.write('%s fetch %s %s\n' %
110 (time.strftime('%Y-%m%d-%H:%M:%S%z'), rev, ref))
111
112
113 @backoff.on_exception(
114 backoff.expo,
115 subprocess.CalledProcessError,
116 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
117 def _git_fetch(cachedir: Path, repo: Repo, ref: Ref) -> None:
118 # We don't use --force here because we want to abort and freak out if forced
119 # updates are happening.
120 subprocess.run(['git', '-C', cachedir, 'fetch', repo,
121 '%s:%s' % (ref, ref)], check=True)
122
123
124 def fetch(repo: Repo, ref: Ref) -> Tuple[Path, Rev]:
125 cachedir = git_cachedir(repo)
126 if not os.path.exists(cachedir):
127 logging.debug("Initializing git repo")
128 subprocess.run(['git', 'init', '--bare', cachedir],
129 check=True, stdout=sys.stderr)
130
131 logging.debug('Fetching ref "%s" from %s', ref, repo)
132 _git_fetch(cachedir, repo, ref)
133
134 with open(os.path.join(cachedir, 'refs', 'heads', ref)) as rev_file:
135 rev = Rev(rev_file.read(999).strip())
136 verify_ancestry(repo, ref, rev)
137 _log_fetch(repo, ref, rev)
138
139 return cachedir, rev
140
141
142 def ensure_rev_available(repo: Repo, ref: Ref, rev: Rev) -> Path:
143 cachedir = git_cachedir(repo)
144 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
145 return cachedir
146
147 logging.debug(
148 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
149 rev, ref)
150 fetch(repo, ref)
151 logging.debug('Verifying that fetch retrieved rev %s', rev)
152 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
153 verify_ancestry(repo, ref, rev)
154
155 return cachedir
156
157
158 def _main() -> None:
159 parser = argparse.ArgumentParser(
160 description='Cache remote git repositories locally.',
161 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
162 parser.add_argument(
163 'repo',
164 metavar='Repo',
165 type=Repo,
166 help='Git repository URL')
167 parser.add_argument(
168 'ref',
169 metavar='Ref',
170 type=Ref,
171 help='Ref (branch or tag) in the git repo')
172 parser.add_argument(
173 'rev',
174 metavar='Rev',
175 type=Rev,
176 nargs='?',
177 help='Ensure that this revision is present. ' +
178 'If this revision is already present locally, no network operations are performed.')
179 args = parser.parse_args()
180
181 if args.rev is None:
182 print('{1} {0}'.format(*fetch(args.repo, args.ref)))
183 else:
184 print(ensure_rev_available(args.repo, args.ref, args.rev))