]> git.scottworley.com Git - pinch/blame_incremental - pinch.py
.gitignore
[pinch] / pinch.py
... / ...
CommitLineData
1import configparser
2import filecmp
3import functools
4import hashlib
5import operator
6import os
7import os.path
8import shutil
9import subprocess
10import sys
11import tempfile
12import types
13import urllib.parse
14import urllib.request
15import xml.dom.minidom
16
17from typing import (
18 Dict,
19 Iterable,
20 List,
21 NewType,
22 Tuple,
23)
24
25Digest16 = NewType('Digest16', str)
26Digest32 = NewType('Digest32', str)
27
28
29class ChannelTableEntry(types.SimpleNamespace):
30 absolute_url: str
31 digest: Digest16
32 file: str
33 size: int
34 url: str
35
36
37class Channel(types.SimpleNamespace):
38 channel_html: bytes
39 forwarded_url: str
40 git_cachedir: str
41 git_ref: str
42 git_repo: str
43 git_revision: str
44 release_name: str
45 table: Dict[str, ChannelTableEntry]
46 url: str
47
48
49class VerificationError(Exception):
50 pass
51
52
53class Verification:
54
55 def __init__(self) -> None:
56 self.line_length = 0
57
58 def status(self, s: str) -> None:
59 print(s, end=' ', flush=True)
60 self.line_length += 1 + len(s) # Unicode??
61
62 @staticmethod
63 def _color(s: str, c: int) -> str:
64 return '\033[%2dm%s\033[00m' % (c, s)
65
66 def result(self, r: bool) -> None:
67 message, color = {True: ('OK ', 92), False: ('FAIL', 91)}[r]
68 length = len(message)
69 cols = shutil.get_terminal_size().columns
70 pad = (cols - (self.line_length + length)) % cols
71 print(' ' * pad + self._color(message, color))
72 self.line_length = 0
73 if not r:
74 raise VerificationError()
75
76 def check(self, s: str, r: bool) -> None:
77 self.status(s)
78 self.result(r)
79
80 def ok(self) -> None:
81 self.result(True)
82
83
84def compare(a: str, b: str) -> Tuple[List[str], List[str], List[str]]:
85
86 def throw(error: OSError) -> None:
87 raise error
88
89 def join(x: str, y: str) -> str:
90 return y if x == '.' else os.path.join(x, y)
91
92 def recursive_files(d: str) -> Iterable[str]:
93 all_files: List[str] = []
94 for path, dirs, files in os.walk(d, onerror=throw):
95 rel = os.path.relpath(path, start=d)
96 all_files.extend(join(rel, f) for f in files)
97 for dir_or_link in dirs:
98 if os.path.islink(join(path, dir_or_link)):
99 all_files.append(join(rel, dir_or_link))
100 return all_files
101
102 def exclude_dot_git(files: Iterable[str]) -> Iterable[str]:
103 return (f for f in files if not f.startswith('.git/'))
104
105 files = functools.reduce(
106 operator.or_, (set(
107 exclude_dot_git(
108 recursive_files(x))) for x in [a, b]))
109 return filecmp.cmpfiles(a, b, files, shallow=False)
110
111
112def fetch(v: Verification, channel: Channel) -> None:
113 v.status('Fetching channel')
114 request = urllib.request.urlopen(channel.url, timeout=10)
115 channel.channel_html = request.read()
116 channel.forwarded_url = request.geturl()
117 v.result(request.status == 200)
118 v.check('Got forwarded', channel.url != channel.forwarded_url)
119
120
121def parse_channel(v: Verification, channel: Channel) -> None:
122 v.status('Parsing channel description as XML')
123 d = xml.dom.minidom.parseString(channel.channel_html)
124 v.ok()
125
126 v.status('Extracting release name:')
127 title_name = d.getElementsByTagName(
128 'title')[0].firstChild.nodeValue.split()[2]
129 h1_name = d.getElementsByTagName('h1')[0].firstChild.nodeValue.split()[2]
130 v.status(title_name)
131 v.result(title_name == h1_name)
132 channel.release_name = title_name
133
134 v.status('Extracting git commit:')
135 git_commit_node = d.getElementsByTagName('tt')[0]
136 channel.git_revision = git_commit_node.firstChild.nodeValue
137 v.status(channel.git_revision)
138 v.ok()
139 v.status('Verifying git commit label')
140 v.result(git_commit_node.previousSibling.nodeValue == 'Git commit ')
141
142 v.status('Parsing table')
143 channel.table = {}
144 for row in d.getElementsByTagName('tr')[1:]:
145 name = row.childNodes[0].firstChild.firstChild.nodeValue
146 url = row.childNodes[0].firstChild.getAttribute('href')
147 size = int(row.childNodes[1].firstChild.nodeValue)
148 digest = Digest16(row.childNodes[2].firstChild.firstChild.nodeValue)
149 channel.table[name] = ChannelTableEntry(
150 url=url, digest=digest, size=size)
151 v.ok()
152
153
154def digest_string(s: bytes) -> Digest16:
155 return Digest16(hashlib.sha256(s).hexdigest())
156
157
158def digest_file(filename: str) -> Digest16:
159 hasher = hashlib.sha256()
160 with open(filename, 'rb') as f:
161 # pylint: disable=cell-var-from-loop
162 for block in iter(lambda: f.read(4096), b''):
163 hasher.update(block)
164 return Digest16(hasher.hexdigest())
165
166
167def to_Digest16(v: Verification, digest32: Digest32) -> Digest16:
168 v.status('Converting digest to base16')
169 process = subprocess.run(
170 ['nix', 'to-base16', '--type', 'sha256', digest32], capture_output=True)
171 v.result(process.returncode == 0)
172 return Digest16(process.stdout.decode().strip())
173
174
175def to_Digest32(v: Verification, digest16: Digest16) -> Digest32:
176 v.status('Converting digest to base32')
177 process = subprocess.run(
178 ['nix', 'to-base32', '--type', 'sha256', digest16], capture_output=True)
179 v.result(process.returncode == 0)
180 return Digest32(process.stdout.decode().strip())
181
182
183def fetch_with_nix_prefetch_url(
184 v: Verification,
185 url: str,
186 digest: Digest16) -> str:
187 v.status('Fetching %s' % url)
188 process = subprocess.run(
189 ['nix-prefetch-url', '--print-path', url, digest], capture_output=True)
190 v.result(process.returncode == 0)
191 prefetch_digest, path, empty = process.stdout.decode().split('\n')
192 assert empty == ''
193 v.check("Verifying nix-prefetch-url's digest",
194 to_Digest16(v, Digest32(prefetch_digest)) == digest)
195 v.status("Verifying file digest")
196 file_digest = digest_file(path)
197 v.result(file_digest == digest)
198 return path
199
200
201def fetch_resources(v: Verification, channel: Channel) -> None:
202 for resource in ['git-revision', 'nixexprs.tar.xz']:
203 fields = channel.table[resource]
204 fields.absolute_url = urllib.parse.urljoin(
205 channel.forwarded_url, fields.url)
206 fields.file = fetch_with_nix_prefetch_url(
207 v, fields.absolute_url, fields.digest)
208 v.status('Verifying git commit on main page matches git commit in table')
209 v.result(
210 open(
211 channel.table['git-revision'].file).read(999) == channel.git_revision)
212
213
214def git_fetch(v: Verification, channel: Channel) -> None:
215 # It would be nice if we could share the nix git cache, but as of the time
216 # of writing it is transitioning from gitv2 (deprecated) to gitv3 (not ready
217 # yet), and trying to straddle them both is too far into nix implementation
218 # details for my comfort. So we re-implement here half of nix.fetchGit.
219 # :(
220
221 # TODO: Consider using pyxdg to find this path.
222 channel.git_cachedir = os.path.expanduser(
223 '~/.cache/nix-pin-channel/git/%s' %
224 digest_string(
225 channel.url.encode()))
226 if not os.path.exists(channel.git_cachedir):
227 v.status("Initializing git repo")
228 process = subprocess.run(
229 ['git', 'init', '--bare', channel.git_cachedir])
230 v.result(process.returncode == 0)
231
232 v.status('Checking if we already have this rev:')
233 process = subprocess.run(
234 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_revision])
235 if process.returncode == 0:
236 v.status('yes')
237 if process.returncode == 1:
238 v.status('no')
239 v.result(process.returncode == 0 or process.returncode == 1)
240 if process.returncode == 1:
241 v.status('Fetching ref "%s"' % channel.git_ref)
242 # We don't use --force here because we want to abort and freak out if forced
243 # updates are happening.
244 process = subprocess.run(['git',
245 '-C',
246 channel.git_cachedir,
247 'fetch',
248 channel.git_repo,
249 '%s:%s' % (channel.git_ref,
250 channel.git_ref)])
251 v.result(process.returncode == 0)
252 v.status('Verifying that fetch retrieved this rev')
253 process = subprocess.run(
254 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_revision])
255 v.result(process.returncode == 0)
256
257 v.status('Verifying rev is an ancestor of ref')
258 process = subprocess.run(['git',
259 '-C',
260 channel.git_cachedir,
261 'merge-base',
262 '--is-ancestor',
263 channel.git_revision,
264 channel.git_ref])
265 v.result(process.returncode == 0)
266
267
268def compare_tarball_and_git(
269 v: Verification,
270 channel: Channel,
271 channel_contents: str,
272 git_contents: str) -> None:
273 v.status('Comparing channel tarball with git checkout')
274 match, mismatch, errors = compare(os.path.join(
275 channel_contents, channel.release_name), git_contents)
276 v.ok()
277 v.check('%d files match' % len(match), len(match) > 0)
278 v.check('%d files differ' % len(mismatch), len(mismatch) == 0)
279 expected_errors = [
280 '.git-revision',
281 '.version-suffix',
282 'nixpkgs',
283 'programs.sqlite',
284 'svn-revision']
285 benign_errors = []
286 for ee in expected_errors:
287 if ee in errors:
288 errors.remove(ee)
289 benign_errors.append(ee)
290 v.check(
291 '%d unexpected incomparable files' %
292 len(errors),
293 len(errors) == 0)
294 v.check(
295 '(%d of %d expected incomparable files)' %
296 (len(benign_errors),
297 len(expected_errors)),
298 len(benign_errors) == len(expected_errors))
299
300
301def extract_tarball(v: Verification, channel: Channel, dest: str) -> None:
302 v.status('Extracting tarball %s' %
303 channel.table['nixexprs.tar.xz'].file)
304 shutil.unpack_archive(
305 channel.table['nixexprs.tar.xz'].file,
306 dest)
307 v.ok()
308
309
310def git_checkout(v: Verification, channel: Channel, dest: str) -> None:
311 v.status('Checking out corresponding git revision')
312 git = subprocess.Popen(['git',
313 '-C',
314 channel.git_cachedir,
315 'archive',
316 channel.git_revision],
317 stdout=subprocess.PIPE)
318 tar = subprocess.Popen(
319 ['tar', 'x', '-C', dest, '-f', '-'], stdin=git.stdout)
320 git.stdout.close()
321 tar.wait()
322 git.wait()
323 v.result(git.returncode == 0 and tar.returncode == 0)
324
325
326def check_channel_metadata(
327 v: Verification,
328 channel: Channel,
329 channel_contents: str) -> None:
330 v.status('Verifying git commit in channel tarball')
331 v.result(
332 open(
333 os.path.join(
334 channel_contents,
335 channel.release_name,
336 '.git-revision')).read(999) == channel.git_revision)
337
338 v.status(
339 'Verifying version-suffix is a suffix of release name %s:' %
340 channel.release_name)
341 version_suffix = open(
342 os.path.join(
343 channel_contents,
344 channel.release_name,
345 '.version-suffix')).read(999)
346 v.status(version_suffix)
347 v.result(channel.release_name.endswith(version_suffix))
348
349
350def check_channel_contents(v: Verification, channel: Channel) -> None:
351 with tempfile.TemporaryDirectory() as channel_contents, \
352 tempfile.TemporaryDirectory() as git_contents:
353
354 extract_tarball(v, channel, channel_contents)
355 check_channel_metadata(v, channel, channel_contents)
356
357 git_checkout(v, channel, git_contents)
358
359 compare_tarball_and_git(v, channel, channel_contents, git_contents)
360
361 v.status('Removing temporary directories')
362 v.ok()
363
364
365def main(argv: List[str]) -> None:
366 v = Verification()
367 config = configparser.ConfigParser()
368 config.read_file(open(argv[1]), argv[1])
369 for section in config.sections():
370 channel = Channel(**dict(config[section].items()))
371 fetch(v, channel)
372 parse_channel(v, channel)
373 fetch_resources(v, channel)
374 git_fetch(v, channel)
375 check_channel_contents(v, channel)
376 config[section]['git_rev'] = channel.git_revision
377 config[section]['tarball_url'] = channel.table['nixexprs.tar.xz'].absolute_url
378 config[section]['tarball_sha256'] = channel.table['nixexprs.tar.xz'].digest
379 with open(argv[1], 'w') as configfile:
380 config.write(configfile)
381
382
383main(sys.argv)