]> git.scottworley.com Git - pinch/blame_incremental - pinch.py
git_cachedir without Channel
[pinch] / pinch.py
... / ...
CommitLineData
1import argparse
2import configparser
3import filecmp
4import functools
5import hashlib
6import operator
7import os
8import os.path
9import shutil
10import subprocess
11import tempfile
12import types
13import urllib.parse
14import urllib.request
15import xml.dom.minidom
16
17from typing import (
18 Dict,
19 Iterable,
20 List,
21 NewType,
22 Tuple,
23)
24
25Digest16 = NewType('Digest16', str)
26Digest32 = NewType('Digest32', str)
27
28
29class ChannelTableEntry(types.SimpleNamespace):
30 absolute_url: str
31 digest: Digest16
32 file: str
33 size: int
34 url: str
35
36
37class Channel(types.SimpleNamespace):
38 channel_html: bytes
39 channel_url: str
40 forwarded_url: str
41 git_ref: str
42 git_repo: str
43 git_revision: str
44 old_git_revision: str
45 release_name: str
46 table: Dict[str, ChannelTableEntry]
47
48
49class VerificationError(Exception):
50 pass
51
52
53class Verification:
54
55 def __init__(self) -> None:
56 self.line_length = 0
57
58 def status(self, s: str) -> None:
59 print(s, end=' ', flush=True)
60 self.line_length += 1 + len(s) # Unicode??
61
62 @staticmethod
63 def _color(s: str, c: int) -> str:
64 return '\033[%2dm%s\033[00m' % (c, s)
65
66 def result(self, r: bool) -> None:
67 message, color = {True: ('OK ', 92), False: ('FAIL', 91)}[r]
68 length = len(message)
69 cols = shutil.get_terminal_size().columns
70 pad = (cols - (self.line_length + length)) % cols
71 print(' ' * pad + self._color(message, color))
72 self.line_length = 0
73 if not r:
74 raise VerificationError()
75
76 def check(self, s: str, r: bool) -> None:
77 self.status(s)
78 self.result(r)
79
80 def ok(self) -> None:
81 self.result(True)
82
83
84def compare(a: str, b: str) -> Tuple[List[str], List[str], List[str]]:
85
86 def throw(error: OSError) -> None:
87 raise error
88
89 def join(x: str, y: str) -> str:
90 return y if x == '.' else os.path.join(x, y)
91
92 def recursive_files(d: str) -> Iterable[str]:
93 all_files: List[str] = []
94 for path, dirs, files in os.walk(d, onerror=throw):
95 rel = os.path.relpath(path, start=d)
96 all_files.extend(join(rel, f) for f in files)
97 for dir_or_link in dirs:
98 if os.path.islink(join(path, dir_or_link)):
99 all_files.append(join(rel, dir_or_link))
100 return all_files
101
102 def exclude_dot_git(files: Iterable[str]) -> Iterable[str]:
103 return (f for f in files if not f.startswith('.git/'))
104
105 files = functools.reduce(
106 operator.or_, (set(
107 exclude_dot_git(
108 recursive_files(x))) for x in [a, b]))
109 return filecmp.cmpfiles(a, b, files, shallow=False)
110
111
112def fetch(v: Verification, channel: Channel) -> None:
113 v.status('Fetching channel')
114 request = urllib.request.urlopen(channel.channel_url, timeout=10)
115 channel.channel_html = request.read()
116 channel.forwarded_url = request.geturl()
117 v.result(request.status == 200)
118 v.check('Got forwarded', channel.channel_url != channel.forwarded_url)
119
120
121def parse_channel(v: Verification, channel: Channel) -> None:
122 v.status('Parsing channel description as XML')
123 d = xml.dom.minidom.parseString(channel.channel_html)
124 v.ok()
125
126 v.status('Extracting release name:')
127 title_name = d.getElementsByTagName(
128 'title')[0].firstChild.nodeValue.split()[2]
129 h1_name = d.getElementsByTagName('h1')[0].firstChild.nodeValue.split()[2]
130 v.status(title_name)
131 v.result(title_name == h1_name)
132 channel.release_name = title_name
133
134 v.status('Extracting git commit:')
135 git_commit_node = d.getElementsByTagName('tt')[0]
136 channel.git_revision = git_commit_node.firstChild.nodeValue
137 v.status(channel.git_revision)
138 v.ok()
139 v.status('Verifying git commit label')
140 v.result(git_commit_node.previousSibling.nodeValue == 'Git commit ')
141
142 v.status('Parsing table')
143 channel.table = {}
144 for row in d.getElementsByTagName('tr')[1:]:
145 name = row.childNodes[0].firstChild.firstChild.nodeValue
146 url = row.childNodes[0].firstChild.getAttribute('href')
147 size = int(row.childNodes[1].firstChild.nodeValue)
148 digest = Digest16(row.childNodes[2].firstChild.firstChild.nodeValue)
149 channel.table[name] = ChannelTableEntry(
150 url=url, digest=digest, size=size)
151 v.ok()
152
153
154def digest_string(s: bytes) -> Digest16:
155 return Digest16(hashlib.sha256(s).hexdigest())
156
157
158def digest_file(filename: str) -> Digest16:
159 hasher = hashlib.sha256()
160 with open(filename, 'rb') as f:
161 # pylint: disable=cell-var-from-loop
162 for block in iter(lambda: f.read(4096), b''):
163 hasher.update(block)
164 return Digest16(hasher.hexdigest())
165
166
167def to_Digest16(v: Verification, digest32: Digest32) -> Digest16:
168 v.status('Converting digest to base16')
169 process = subprocess.run(
170 ['nix', 'to-base16', '--type', 'sha256', digest32], capture_output=True)
171 v.result(process.returncode == 0)
172 return Digest16(process.stdout.decode().strip())
173
174
175def to_Digest32(v: Verification, digest16: Digest16) -> Digest32:
176 v.status('Converting digest to base32')
177 process = subprocess.run(
178 ['nix', 'to-base32', '--type', 'sha256', digest16], capture_output=True)
179 v.result(process.returncode == 0)
180 return Digest32(process.stdout.decode().strip())
181
182
183def fetch_with_nix_prefetch_url(
184 v: Verification,
185 url: str,
186 digest: Digest16) -> str:
187 v.status('Fetching %s' % url)
188 process = subprocess.run(
189 ['nix-prefetch-url', '--print-path', url, digest], capture_output=True)
190 v.result(process.returncode == 0)
191 prefetch_digest, path, empty = process.stdout.decode().split('\n')
192 assert empty == ''
193 v.check("Verifying nix-prefetch-url's digest",
194 to_Digest16(v, Digest32(prefetch_digest)) == digest)
195 v.status("Verifying file digest")
196 file_digest = digest_file(path)
197 v.result(file_digest == digest)
198 return path
199
200
201def fetch_resources(v: Verification, channel: Channel) -> None:
202 for resource in ['git-revision', 'nixexprs.tar.xz']:
203 fields = channel.table[resource]
204 fields.absolute_url = urllib.parse.urljoin(
205 channel.forwarded_url, fields.url)
206 fields.file = fetch_with_nix_prefetch_url(
207 v, fields.absolute_url, fields.digest)
208 v.status('Verifying git commit on main page matches git commit in table')
209 v.result(
210 open(
211 channel.table['git-revision'].file).read(999) == channel.git_revision)
212
213def git_cachedir(git_repo: str) -> str:
214 # TODO: Consider using pyxdg to find this path.
215 return os.path.expanduser('~/.cache/nix-pin-channel/git/%s' % digest_string(git_repo.encode()))
216
217
218def git_fetch(v: Verification, channel: Channel) -> None:
219 # It would be nice if we could share the nix git cache, but as of the time
220 # of writing it is transitioning from gitv2 (deprecated) to gitv3 (not ready
221 # yet), and trying to straddle them both is too far into nix implementation
222 # details for my comfort. So we re-implement here half of nix.fetchGit.
223 # :(
224
225 cachedir = git_cachedir(channel.git_repo)
226 if not os.path.exists(cachedir):
227 v.status("Initializing git repo")
228 process = subprocess.run(
229 ['git', 'init', '--bare', cachedir])
230 v.result(process.returncode == 0)
231
232 have_rev = False
233 if hasattr(channel, 'git_revision'):
234 v.status('Checking if we already have this rev:')
235 process = subprocess.run(
236 ['git', '-C', cachedir, 'cat-file', '-e', channel.git_revision])
237 if process.returncode == 0:
238 v.status('yes')
239 if process.returncode == 1:
240 v.status('no')
241 v.result(process.returncode == 0 or process.returncode == 1)
242 have_rev = process.returncode == 0
243
244 if not have_rev:
245 v.status(
246 'Fetching ref "%s" from %s' %
247 (channel.git_ref, channel.git_repo))
248 # We don't use --force here because we want to abort and freak out if forced
249 # updates are happening.
250 process = subprocess.run(['git',
251 '-C',
252 cachedir,
253 'fetch',
254 channel.git_repo,
255 '%s:%s' % (channel.git_ref,
256 channel.git_ref)])
257 v.result(process.returncode == 0)
258 if hasattr(channel, 'git_revision'):
259 v.status('Verifying that fetch retrieved this rev')
260 process = subprocess.run(
261 ['git', '-C', cachedir, 'cat-file', '-e', channel.git_revision])
262 v.result(process.returncode == 0)
263
264 if not hasattr(channel, 'git_revision'):
265 channel.git_revision = open(
266 os.path.join(
267 cachedir,
268 'refs',
269 'heads',
270 channel.git_ref)).read(999).strip()
271
272 v.status('Verifying rev is an ancestor of ref')
273 process = subprocess.run(['git',
274 '-C',
275 cachedir,
276 'merge-base',
277 '--is-ancestor',
278 channel.git_revision,
279 channel.git_ref])
280 v.result(process.returncode == 0)
281
282 if hasattr(channel, 'old_git_revision'):
283 v.status(
284 'Verifying rev is an ancestor of previous rev %s' %
285 channel.old_git_revision)
286 process = subprocess.run(['git',
287 '-C',
288 cachedir,
289 'merge-base',
290 '--is-ancestor',
291 channel.old_git_revision,
292 channel.git_revision])
293 v.result(process.returncode == 0)
294
295
296def compare_tarball_and_git(
297 v: Verification,
298 channel: Channel,
299 channel_contents: str,
300 git_contents: str) -> None:
301 v.status('Comparing channel tarball with git checkout')
302 match, mismatch, errors = compare(os.path.join(
303 channel_contents, channel.release_name), git_contents)
304 v.ok()
305 v.check('%d files match' % len(match), len(match) > 0)
306 v.check('%d files differ' % len(mismatch), len(mismatch) == 0)
307 expected_errors = [
308 '.git-revision',
309 '.version-suffix',
310 'nixpkgs',
311 'programs.sqlite',
312 'svn-revision']
313 benign_errors = []
314 for ee in expected_errors:
315 if ee in errors:
316 errors.remove(ee)
317 benign_errors.append(ee)
318 v.check(
319 '%d unexpected incomparable files' %
320 len(errors),
321 len(errors) == 0)
322 v.check(
323 '(%d of %d expected incomparable files)' %
324 (len(benign_errors),
325 len(expected_errors)),
326 len(benign_errors) == len(expected_errors))
327
328
329def extract_tarball(v: Verification, channel: Channel, dest: str) -> None:
330 v.status('Extracting tarball %s' %
331 channel.table['nixexprs.tar.xz'].file)
332 shutil.unpack_archive(
333 channel.table['nixexprs.tar.xz'].file,
334 dest)
335 v.ok()
336
337
338def git_checkout(v: Verification, channel: Channel, dest: str) -> None:
339 v.status('Checking out corresponding git revision')
340 git = subprocess.Popen(['git',
341 '-C',
342 git_cachedir(channel.git_repo),
343 'archive',
344 channel.git_revision],
345 stdout=subprocess.PIPE)
346 tar = subprocess.Popen(
347 ['tar', 'x', '-C', dest, '-f', '-'], stdin=git.stdout)
348 git.stdout.close()
349 tar.wait()
350 git.wait()
351 v.result(git.returncode == 0 and tar.returncode == 0)
352
353
354def check_channel_metadata(
355 v: Verification,
356 channel: Channel,
357 channel_contents: str) -> None:
358 v.status('Verifying git commit in channel tarball')
359 v.result(
360 open(
361 os.path.join(
362 channel_contents,
363 channel.release_name,
364 '.git-revision')).read(999) == channel.git_revision)
365
366 v.status(
367 'Verifying version-suffix is a suffix of release name %s:' %
368 channel.release_name)
369 version_suffix = open(
370 os.path.join(
371 channel_contents,
372 channel.release_name,
373 '.version-suffix')).read(999)
374 v.status(version_suffix)
375 v.result(channel.release_name.endswith(version_suffix))
376
377
378def check_channel_contents(v: Verification, channel: Channel) -> None:
379 with tempfile.TemporaryDirectory() as channel_contents, \
380 tempfile.TemporaryDirectory() as git_contents:
381
382 extract_tarball(v, channel, channel_contents)
383 check_channel_metadata(v, channel, channel_contents)
384
385 git_checkout(v, channel, git_contents)
386
387 compare_tarball_and_git(v, channel, channel_contents, git_contents)
388
389 v.status('Removing temporary directories')
390 v.ok()
391
392
393def pin_channel(v: Verification, channel: Channel) -> None:
394 fetch(v, channel)
395 parse_channel(v, channel)
396 fetch_resources(v, channel)
397 git_fetch(v, channel)
398 check_channel_contents(v, channel)
399
400
401def git_revision_name(v: Verification, channel: Channel) -> str:
402 v.status('Getting commit date')
403 process = subprocess.run(['git',
404 '-C',
405 git_cachedir(channel.git_repo),
406 'lo',
407 '-n1',
408 '--format=%ct-%h',
409 '--abbrev=11',
410 channel.git_revision],
411 capture_output=True)
412 v.result(process.returncode == 0 and process.stdout != '')
413 return '%s-%s' % (os.path.basename(channel.git_repo),
414 process.stdout.decode().strip())
415
416
417def make_channel(conf: configparser.SectionProxy) -> Channel:
418 channel = Channel(**dict(conf.items()))
419 if hasattr(channel, 'git_revision'):
420 channel.old_git_revision = channel.git_revision
421 del channel.git_revision
422 return channel
423
424
425def pin(args: argparse.Namespace) -> None:
426 v = Verification()
427 config = configparser.ConfigParser()
428 config.read_file(open(args.channels_file), args.channels_file)
429 for section in config.sections():
430 channel = make_channel(config[section])
431 if 'channel_url' in config[section]:
432 pin_channel(v, channel)
433 config[section]['name'] = channel.release_name
434 config[section]['tarball_url'] = channel.table['nixexprs.tar.xz'].absolute_url
435 config[section]['tarball_sha256'] = channel.table['nixexprs.tar.xz'].digest
436 else:
437 git_fetch(v, channel)
438 config[section]['name'] = git_revision_name(v, channel)
439 config[section]['git_revision'] = channel.git_revision
440
441 with open(args.channels_file, 'w') as configfile:
442 config.write(configfile)
443
444
445def main() -> None:
446 parser = argparse.ArgumentParser(prog='pinch')
447 subparsers = parser.add_subparsers(dest='mode', required=True)
448 parser_pin = subparsers.add_parser('pin')
449 parser_pin.add_argument('channels_file', type=str)
450 parser_pin.set_defaults(func=pin)
451 args = parser.parse_args()
452 args.func(args)
453
454
455main()