]> git.scottworley.com Git - pinch/blame_incremental - pinch.py
Verify old rev is an ancestor of new rev
[pinch] / pinch.py
... / ...
CommitLineData
1import configparser
2import filecmp
3import functools
4import hashlib
5import operator
6import os
7import os.path
8import shutil
9import subprocess
10import sys
11import tempfile
12import types
13import urllib.parse
14import urllib.request
15import xml.dom.minidom
16
17from typing import (
18 Dict,
19 Iterable,
20 List,
21 NewType,
22 Tuple,
23)
24
25Digest16 = NewType('Digest16', str)
26Digest32 = NewType('Digest32', str)
27
28
29class ChannelTableEntry(types.SimpleNamespace):
30 absolute_url: str
31 digest: Digest16
32 file: str
33 size: int
34 url: str
35
36
37class Channel(types.SimpleNamespace):
38 channel_html: bytes
39 channel_url: str
40 forwarded_url: str
41 git_cachedir: str
42 git_ref: str
43 git_repo: str
44 git_revision: str
45 old_git_revision: str
46 release_name: str
47 table: Dict[str, ChannelTableEntry]
48
49
50class VerificationError(Exception):
51 pass
52
53
54class Verification:
55
56 def __init__(self) -> None:
57 self.line_length = 0
58
59 def status(self, s: str) -> None:
60 print(s, end=' ', flush=True)
61 self.line_length += 1 + len(s) # Unicode??
62
63 @staticmethod
64 def _color(s: str, c: int) -> str:
65 return '\033[%2dm%s\033[00m' % (c, s)
66
67 def result(self, r: bool) -> None:
68 message, color = {True: ('OK ', 92), False: ('FAIL', 91)}[r]
69 length = len(message)
70 cols = shutil.get_terminal_size().columns
71 pad = (cols - (self.line_length + length)) % cols
72 print(' ' * pad + self._color(message, color))
73 self.line_length = 0
74 if not r:
75 raise VerificationError()
76
77 def check(self, s: str, r: bool) -> None:
78 self.status(s)
79 self.result(r)
80
81 def ok(self) -> None:
82 self.result(True)
83
84
85def compare(a: str, b: str) -> Tuple[List[str], List[str], List[str]]:
86
87 def throw(error: OSError) -> None:
88 raise error
89
90 def join(x: str, y: str) -> str:
91 return y if x == '.' else os.path.join(x, y)
92
93 def recursive_files(d: str) -> Iterable[str]:
94 all_files: List[str] = []
95 for path, dirs, files in os.walk(d, onerror=throw):
96 rel = os.path.relpath(path, start=d)
97 all_files.extend(join(rel, f) for f in files)
98 for dir_or_link in dirs:
99 if os.path.islink(join(path, dir_or_link)):
100 all_files.append(join(rel, dir_or_link))
101 return all_files
102
103 def exclude_dot_git(files: Iterable[str]) -> Iterable[str]:
104 return (f for f in files if not f.startswith('.git/'))
105
106 files = functools.reduce(
107 operator.or_, (set(
108 exclude_dot_git(
109 recursive_files(x))) for x in [a, b]))
110 return filecmp.cmpfiles(a, b, files, shallow=False)
111
112
113def fetch(v: Verification, channel: Channel) -> None:
114 v.status('Fetching channel')
115 request = urllib.request.urlopen(channel.channel_url, timeout=10)
116 channel.channel_html = request.read()
117 channel.forwarded_url = request.geturl()
118 v.result(request.status == 200)
119 v.check('Got forwarded', channel.channel_url != channel.forwarded_url)
120
121
122def parse_channel(v: Verification, channel: Channel) -> None:
123 v.status('Parsing channel description as XML')
124 d = xml.dom.minidom.parseString(channel.channel_html)
125 v.ok()
126
127 v.status('Extracting release name:')
128 title_name = d.getElementsByTagName(
129 'title')[0].firstChild.nodeValue.split()[2]
130 h1_name = d.getElementsByTagName('h1')[0].firstChild.nodeValue.split()[2]
131 v.status(title_name)
132 v.result(title_name == h1_name)
133 channel.release_name = title_name
134
135 v.status('Extracting git commit:')
136 git_commit_node = d.getElementsByTagName('tt')[0]
137 channel.git_revision = git_commit_node.firstChild.nodeValue
138 v.status(channel.git_revision)
139 v.ok()
140 v.status('Verifying git commit label')
141 v.result(git_commit_node.previousSibling.nodeValue == 'Git commit ')
142
143 v.status('Parsing table')
144 channel.table = {}
145 for row in d.getElementsByTagName('tr')[1:]:
146 name = row.childNodes[0].firstChild.firstChild.nodeValue
147 url = row.childNodes[0].firstChild.getAttribute('href')
148 size = int(row.childNodes[1].firstChild.nodeValue)
149 digest = Digest16(row.childNodes[2].firstChild.firstChild.nodeValue)
150 channel.table[name] = ChannelTableEntry(
151 url=url, digest=digest, size=size)
152 v.ok()
153
154
155def digest_string(s: bytes) -> Digest16:
156 return Digest16(hashlib.sha256(s).hexdigest())
157
158
159def digest_file(filename: str) -> Digest16:
160 hasher = hashlib.sha256()
161 with open(filename, 'rb') as f:
162 # pylint: disable=cell-var-from-loop
163 for block in iter(lambda: f.read(4096), b''):
164 hasher.update(block)
165 return Digest16(hasher.hexdigest())
166
167
168def to_Digest16(v: Verification, digest32: Digest32) -> Digest16:
169 v.status('Converting digest to base16')
170 process = subprocess.run(
171 ['nix', 'to-base16', '--type', 'sha256', digest32], capture_output=True)
172 v.result(process.returncode == 0)
173 return Digest16(process.stdout.decode().strip())
174
175
176def to_Digest32(v: Verification, digest16: Digest16) -> Digest32:
177 v.status('Converting digest to base32')
178 process = subprocess.run(
179 ['nix', 'to-base32', '--type', 'sha256', digest16], capture_output=True)
180 v.result(process.returncode == 0)
181 return Digest32(process.stdout.decode().strip())
182
183
184def fetch_with_nix_prefetch_url(
185 v: Verification,
186 url: str,
187 digest: Digest16) -> str:
188 v.status('Fetching %s' % url)
189 process = subprocess.run(
190 ['nix-prefetch-url', '--print-path', url, digest], capture_output=True)
191 v.result(process.returncode == 0)
192 prefetch_digest, path, empty = process.stdout.decode().split('\n')
193 assert empty == ''
194 v.check("Verifying nix-prefetch-url's digest",
195 to_Digest16(v, Digest32(prefetch_digest)) == digest)
196 v.status("Verifying file digest")
197 file_digest = digest_file(path)
198 v.result(file_digest == digest)
199 return path
200
201
202def fetch_resources(v: Verification, channel: Channel) -> None:
203 for resource in ['git-revision', 'nixexprs.tar.xz']:
204 fields = channel.table[resource]
205 fields.absolute_url = urllib.parse.urljoin(
206 channel.forwarded_url, fields.url)
207 fields.file = fetch_with_nix_prefetch_url(
208 v, fields.absolute_url, fields.digest)
209 v.status('Verifying git commit on main page matches git commit in table')
210 v.result(
211 open(
212 channel.table['git-revision'].file).read(999) == channel.git_revision)
213
214
215def git_fetch(v: Verification, channel: Channel) -> None:
216 # It would be nice if we could share the nix git cache, but as of the time
217 # of writing it is transitioning from gitv2 (deprecated) to gitv3 (not ready
218 # yet), and trying to straddle them both is too far into nix implementation
219 # details for my comfort. So we re-implement here half of nix.fetchGit.
220 # :(
221
222 # TODO: Consider using pyxdg to find this path.
223 channel.git_cachedir = os.path.expanduser(
224 '~/.cache/nix-pin-channel/git/%s' %
225 digest_string(
226 channel.git_repo.encode()))
227 if not os.path.exists(channel.git_cachedir):
228 v.status("Initializing git repo")
229 process = subprocess.run(
230 ['git', 'init', '--bare', channel.git_cachedir])
231 v.result(process.returncode == 0)
232
233 have_rev = False
234 if hasattr(channel, 'git_revision'):
235 v.status('Checking if we already have this rev:')
236 process = subprocess.run(
237 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_revision])
238 if process.returncode == 0:
239 v.status('yes')
240 if process.returncode == 1:
241 v.status('no')
242 v.result(process.returncode == 0 or process.returncode == 1)
243 have_rev = process.returncode == 0
244
245 if not have_rev:
246 v.status('Fetching ref "%s" from %s' % (channel.git_ref, channel.git_repo))
247 # We don't use --force here because we want to abort and freak out if forced
248 # updates are happening.
249 process = subprocess.run(['git',
250 '-C',
251 channel.git_cachedir,
252 'fetch',
253 channel.git_repo,
254 '%s:%s' % (channel.git_ref,
255 channel.git_ref)])
256 v.result(process.returncode == 0)
257 if hasattr(channel, 'git_revision'):
258 v.status('Verifying that fetch retrieved this rev')
259 process = subprocess.run(
260 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_revision])
261 v.result(process.returncode == 0)
262
263 if not hasattr(channel, 'git_revision'):
264 channel.git_revision = open(
265 os.path.join(
266 channel.git_cachedir,
267 'refs',
268 'heads',
269 channel.git_ref)).read(999).strip()
270
271 v.status('Verifying rev is an ancestor of ref')
272 process = subprocess.run(['git',
273 '-C',
274 channel.git_cachedir,
275 'merge-base',
276 '--is-ancestor',
277 channel.git_revision,
278 channel.git_ref])
279 v.result(process.returncode == 0)
280
281 if hasattr(channel, 'old_git_revision'):
282 v.status('Verifying rev is an ancestor of previous rev %s' % channel.old_git_revision)
283 process = subprocess.run(['git',
284 '-C',
285 channel.git_cachedir,
286 'merge-base',
287 '--is-ancestor',
288 channel.old_git_revision,
289 channel.git_revision])
290 v.result(process.returncode == 0)
291
292
293def compare_tarball_and_git(
294 v: Verification,
295 channel: Channel,
296 channel_contents: str,
297 git_contents: str) -> None:
298 v.status('Comparing channel tarball with git checkout')
299 match, mismatch, errors = compare(os.path.join(
300 channel_contents, channel.release_name), git_contents)
301 v.ok()
302 v.check('%d files match' % len(match), len(match) > 0)
303 v.check('%d files differ' % len(mismatch), len(mismatch) == 0)
304 expected_errors = [
305 '.git-revision',
306 '.version-suffix',
307 'nixpkgs',
308 'programs.sqlite',
309 'svn-revision']
310 benign_errors = []
311 for ee in expected_errors:
312 if ee in errors:
313 errors.remove(ee)
314 benign_errors.append(ee)
315 v.check(
316 '%d unexpected incomparable files' %
317 len(errors),
318 len(errors) == 0)
319 v.check(
320 '(%d of %d expected incomparable files)' %
321 (len(benign_errors),
322 len(expected_errors)),
323 len(benign_errors) == len(expected_errors))
324
325
326def extract_tarball(v: Verification, channel: Channel, dest: str) -> None:
327 v.status('Extracting tarball %s' %
328 channel.table['nixexprs.tar.xz'].file)
329 shutil.unpack_archive(
330 channel.table['nixexprs.tar.xz'].file,
331 dest)
332 v.ok()
333
334
335def git_checkout(v: Verification, channel: Channel, dest: str) -> None:
336 v.status('Checking out corresponding git revision')
337 git = subprocess.Popen(['git',
338 '-C',
339 channel.git_cachedir,
340 'archive',
341 channel.git_revision],
342 stdout=subprocess.PIPE)
343 tar = subprocess.Popen(
344 ['tar', 'x', '-C', dest, '-f', '-'], stdin=git.stdout)
345 git.stdout.close()
346 tar.wait()
347 git.wait()
348 v.result(git.returncode == 0 and tar.returncode == 0)
349
350
351def check_channel_metadata(
352 v: Verification,
353 channel: Channel,
354 channel_contents: str) -> None:
355 v.status('Verifying git commit in channel tarball')
356 v.result(
357 open(
358 os.path.join(
359 channel_contents,
360 channel.release_name,
361 '.git-revision')).read(999) == channel.git_revision)
362
363 v.status(
364 'Verifying version-suffix is a suffix of release name %s:' %
365 channel.release_name)
366 version_suffix = open(
367 os.path.join(
368 channel_contents,
369 channel.release_name,
370 '.version-suffix')).read(999)
371 v.status(version_suffix)
372 v.result(channel.release_name.endswith(version_suffix))
373
374
375def check_channel_contents(v: Verification, channel: Channel) -> None:
376 with tempfile.TemporaryDirectory() as channel_contents, \
377 tempfile.TemporaryDirectory() as git_contents:
378
379 extract_tarball(v, channel, channel_contents)
380 check_channel_metadata(v, channel, channel_contents)
381
382 git_checkout(v, channel, git_contents)
383
384 compare_tarball_and_git(v, channel, channel_contents, git_contents)
385
386 v.status('Removing temporary directories')
387 v.ok()
388
389
390def pin_channel(v: Verification, channel: Channel) -> None:
391 fetch(v, channel)
392 parse_channel(v, channel)
393 fetch_resources(v, channel)
394 git_fetch(v, channel)
395 check_channel_contents(v, channel)
396
397
398def make_channel(conf: configparser.SectionProxy) -> Channel:
399 channel = Channel(**dict(conf.items()))
400 if hasattr(channel, 'git_revision'):
401 channel.old_git_revision = channel.git_revision
402 del channel.git_revision
403 return channel
404
405
406def main(argv: List[str]) -> None:
407 v = Verification()
408 config = configparser.ConfigParser()
409 config.read_file(open(argv[1]), argv[1])
410 for section in config.sections():
411 channel = make_channel(config[section])
412 if 'channel_url' in config[section]:
413 pin_channel(v, channel)
414 config[section]['tarball_url'] = channel.table['nixexprs.tar.xz'].absolute_url
415 config[section]['tarball_sha256'] = channel.table['nixexprs.tar.xz'].digest
416 else:
417 git_fetch(v, channel)
418 config[section]['git_revision'] = channel.git_revision
419
420 with open(argv[1], 'w') as configfile:
421 config.write(configfile)
422
423
424main(sys.argv)