]> git.scottworley.com Git - git-cache/blame_incremental - git_cache.py
Parse command line arguments more extensibly
[git-cache] / git_cache.py
... / ...
CommitLineData
1# It would be nice if we could share the nix git cache, but as of the
2# time of writing it is transitioning from gitv2 (deprecated) to gitv3
3# (not ready yet), and trying to straddle them both is too far into nix
4# implementation details for my comfort. So we re-implement here half of
5# nix's builtins.fetchGit. :(
6
7import argparse
8import functools
9import hashlib
10import logging
11import os
12import subprocess
13import sys
14import time
15
16from typing import Iterator, NamedTuple, Optional, TypeVar, Tuple, Union
17
18import backoff
19
20Path = str # eg: "/home/user/.cache/git-cache/v1"
21Repo = str # eg: "https://github.com/NixOS/nixpkgs.git"
22Ref = str # eg: "master" or "v1.0.0"
23Rev = str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
24RefOrRev = Union[Ref, Rev]
25
26
27class _LogEntry(NamedTuple):
28 ref: Ref
29 rev: Rev
30
31
32T = TypeVar('T')
33
34
35def _repo_hashname(repo: Repo) -> str:
36 return hashlib.sha256(repo.encode()).hexdigest()
37
38
39def git_cachedir(repo: Repo) -> Path:
40 # Use xdg module when it's less painful to have as a dependency
41 XDG_CACHE_HOME = Path(
42 os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')))
43
44 return Path(os.path.join(
45 XDG_CACHE_HOME,
46 'git-cache/v1',
47 _repo_hashname(repo)))
48
49
50def _log_filename(repo: Repo) -> Path:
51 # Use xdg module when it's less painful to have as a dependency
52 XDG_DATA_HOME = Path(
53 os.environ.get('XDG_DATA_HOME', os.path.expanduser('~/.local/share')))
54
55 return Path(os.path.join(
56 XDG_DATA_HOME,
57 'git-cache/v1',
58 _repo_hashname(repo)))
59
60
61def is_ancestor(repo: Repo, descendant: RefOrRev, ancestor: RefOrRev) -> bool:
62 cachedir = git_cachedir(repo)
63 logging.debug('Checking if %s is an ancestor of %s', ancestor, descendant)
64 process = subprocess.run(['git',
65 '-C',
66 cachedir,
67 'merge-base',
68 '--is-ancestor',
69 ancestor,
70 descendant],
71 check=False)
72 return process.returncode == 0
73
74
75def verify_ancestry(
76 repo: Repo,
77 descendant: RefOrRev,
78 ancestor: RefOrRev) -> None:
79 if not is_ancestor(repo, descendant, ancestor):
80 raise Exception('%s is not an ancestor of %s' % (ancestor, descendant))
81
82
83def _read_fetch_log(repo: Repo) -> Iterator[_LogEntry]:
84 filename = _log_filename(repo)
85 if not os.path.exists(filename):
86 return
87 with open(filename, 'r') as f:
88 for line in f:
89 _, _, rev, ref = line.strip().split(maxsplit=3)
90 yield _LogEntry(ref, rev)
91
92
93def _last(it: Iterator[T]) -> Optional[T]:
94 return functools.reduce(lambda a, b: b, it, None)
95
96
97def _previous_fetched_rev(repo: Repo, ref: Ref) -> Optional[Rev]:
98 return _last(entry.rev for entry in _read_fetch_log(
99 repo) if entry.ref == ref)
100
101
102def _log_fetch(repo: Repo, ref: Ref, rev: Rev) -> None:
103 prev_rev = _previous_fetched_rev(repo, ref)
104 if prev_rev is not None:
105 verify_ancestry(repo, rev, prev_rev)
106 filename = _log_filename(repo)
107 os.makedirs(os.path.dirname(filename), exist_ok=True)
108 with open(filename, 'a') as f:
109 f.write('%s fetch %s %s\n' %
110 (time.strftime('%Y-%m%d-%H:%M:%S%z'), rev, ref))
111
112
113@backoff.on_exception(
114 backoff.expo,
115 subprocess.CalledProcessError,
116 max_time=lambda: int(os.environ.get('BACKOFF_MAX_TIME', '30')))
117def _git_fetch(cachedir: Path, repo: Repo, ref: Ref) -> None:
118 # We don't use --force here because we want to abort and freak out if forced
119 # updates are happening.
120 subprocess.run(['git', '-C', cachedir, 'fetch', repo,
121 '%s:%s' % (ref, ref)], check=True)
122
123
124def fetch(repo: Repo, ref: Ref) -> Tuple[Path, Rev]:
125 cachedir = git_cachedir(repo)
126 if not os.path.exists(cachedir):
127 logging.debug("Initializing git repo")
128 subprocess.run(['git', 'init', '--bare', cachedir],
129 check=True, stdout=sys.stderr)
130
131 logging.debug('Fetching ref "%s" from %s', ref, repo)
132 _git_fetch(cachedir, repo, ref)
133
134 with open(os.path.join(cachedir, 'refs', 'heads', ref)) as rev_file:
135 rev = Rev(rev_file.read(999).strip())
136 verify_ancestry(repo, ref, rev)
137 _log_fetch(repo, ref, rev)
138
139 return cachedir, rev
140
141
142def ensure_rev_available(repo: Repo, ref: Ref, rev: Rev) -> Path:
143 cachedir = git_cachedir(repo)
144 if os.path.exists(cachedir) and is_ancestor(repo, ref, rev):
145 return cachedir
146
147 logging.debug(
148 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
149 rev, ref)
150 fetch(repo, ref)
151 logging.debug('Verifying that fetch retrieved rev %s', rev)
152 subprocess.run(['git', '-C', cachedir, 'cat-file', '-e', rev], check=True)
153 verify_ancestry(repo, ref, rev)
154
155 return cachedir
156
157
158def _main() -> None:
159 parser = argparse.ArgumentParser(
160 description='Cache remote git repositories locally.',
161 epilog='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
162 parser.add_argument(
163 'repo',
164 metavar='Repo',
165 type=Repo,
166 help='Git repository URL')
167 parser.add_argument(
168 'ref',
169 metavar='Ref',
170 type=Ref,
171 help='Ref (branch or tag) in the git repo')
172 parser.add_argument(
173 'rev',
174 metavar='Rev',
175 type=Rev,
176 nargs='?',
177 help='Ensure that this revision is present. ' +
178 'If this revision is already present locally, no network operations are performed.')
179 args = parser.parse_args()
180
181 if args.rev is None:
182 print('{1} {0}'.format(*fetch(args.repo, args.ref)))
183 else:
184 print(ensure_rev_available(args.repo, args.ref, args.rev))