]> git.scottworley.com Git - pinch/blame_incremental - pinch.py
Support multiple channels
[pinch] / pinch.py
... / ...
CommitLineData
1import configparser
2import filecmp
3import functools
4import hashlib
5import operator
6import os
7import os.path
8import shutil
9import subprocess
10import sys
11import tempfile
12import types
13import urllib.parse
14import urllib.request
15import xml.dom.minidom
16
17from typing import (
18 Dict,
19 Iterable,
20 List,
21 NewType,
22 Tuple,
23)
24
25Digest16 = NewType('Digest16', str)
26Digest32 = NewType('Digest32', str)
27
28
29class ChannelTableEntry(types.SimpleNamespace):
30 digest: Digest16
31 file: str
32 size: int
33 url: str
34
35
36class Channel(types.SimpleNamespace):
37 channel_html: bytes
38 forwarded_url: str
39 git_cachedir: str
40 git_ref: str
41 git_repo: str
42 git_revision: str
43 release_name: str
44 table: Dict[str, ChannelTableEntry]
45 url: str
46
47
48class VerificationError(Exception):
49 pass
50
51
52class Verification:
53
54 def __init__(self) -> None:
55 self.line_length = 0
56
57 def status(self, s: str) -> None:
58 print(s, end=' ', flush=True)
59 self.line_length += 1 + len(s) # Unicode??
60
61 @staticmethod
62 def _color(s: str, c: int) -> str:
63 return '\033[%2dm%s\033[00m' % (c, s)
64
65 def result(self, r: bool) -> None:
66 message, color = {True: ('OK ', 92), False: ('FAIL', 91)}[r]
67 length = len(message)
68 cols = shutil.get_terminal_size().columns
69 pad = (cols - (self.line_length + length)) % cols
70 print(' ' * pad + self._color(message, color))
71 self.line_length = 0
72 if not r:
73 raise VerificationError()
74
75 def check(self, s: str, r: bool) -> None:
76 self.status(s)
77 self.result(r)
78
79 def ok(self) -> None:
80 self.result(True)
81
82
83def compare(a: str, b: str) -> Tuple[List[str], List[str], List[str]]:
84
85 def throw(error: OSError) -> None:
86 raise error
87
88 def join(x: str, y: str) -> str:
89 return y if x == '.' else os.path.join(x, y)
90
91 def recursive_files(d: str) -> Iterable[str]:
92 all_files: List[str] = []
93 for path, dirs, files in os.walk(d, onerror=throw):
94 rel = os.path.relpath(path, start=d)
95 all_files.extend(join(rel, f) for f in files)
96 for dir_or_link in dirs:
97 if os.path.islink(join(path, dir_or_link)):
98 all_files.append(join(rel, dir_or_link))
99 return all_files
100
101 def exclude_dot_git(files: Iterable[str]) -> Iterable[str]:
102 return (f for f in files if not f.startswith('.git/'))
103
104 files = functools.reduce(
105 operator.or_, (set(
106 exclude_dot_git(
107 recursive_files(x))) for x in [a, b]))
108 return filecmp.cmpfiles(a, b, files, shallow=False)
109
110
111def fetch(v: Verification, channel: Channel) -> None:
112 v.status('Fetching channel')
113 request = urllib.request.urlopen(channel.url, timeout=10)
114 channel.channel_html = request.read()
115 channel.forwarded_url = request.geturl()
116 v.result(request.status == 200)
117 v.check('Got forwarded', channel.url != channel.forwarded_url)
118
119
120def parse_channel(v: Verification, channel: Channel) -> None:
121 v.status('Parsing channel description as XML')
122 d = xml.dom.minidom.parseString(channel.channel_html)
123 v.ok()
124
125 v.status('Extracting release name:')
126 title_name = d.getElementsByTagName(
127 'title')[0].firstChild.nodeValue.split()[2]
128 h1_name = d.getElementsByTagName('h1')[0].firstChild.nodeValue.split()[2]
129 v.status(title_name)
130 v.result(title_name == h1_name)
131 channel.release_name = title_name
132
133 v.status('Extracting git commit:')
134 git_commit_node = d.getElementsByTagName('tt')[0]
135 channel.git_revision = git_commit_node.firstChild.nodeValue
136 v.status(channel.git_revision)
137 v.ok()
138 v.status('Verifying git commit label')
139 v.result(git_commit_node.previousSibling.nodeValue == 'Git commit ')
140
141 v.status('Parsing table')
142 channel.table = {}
143 for row in d.getElementsByTagName('tr')[1:]:
144 name = row.childNodes[0].firstChild.firstChild.nodeValue
145 url = row.childNodes[0].firstChild.getAttribute('href')
146 size = int(row.childNodes[1].firstChild.nodeValue)
147 digest = Digest16(row.childNodes[2].firstChild.firstChild.nodeValue)
148 channel.table[name] = ChannelTableEntry(
149 url=url, digest=digest, size=size)
150 v.ok()
151
152
153def digest_string(s: bytes) -> Digest16:
154 return Digest16(hashlib.sha256(s).hexdigest())
155
156
157def digest_file(filename: str) -> Digest16:
158 hasher = hashlib.sha256()
159 with open(filename, 'rb') as f:
160 # pylint: disable=cell-var-from-loop
161 for block in iter(lambda: f.read(4096), b''):
162 hasher.update(block)
163 return Digest16(hasher.hexdigest())
164
165
166def to_Digest16(v: Verification, digest32: Digest32) -> Digest16:
167 v.status('Converting digest to base16')
168 process = subprocess.run(
169 ['nix', 'to-base16', '--type', 'sha256', digest32], capture_output=True)
170 v.result(process.returncode == 0)
171 return Digest16(process.stdout.decode().strip())
172
173
174def to_Digest32(v: Verification, digest16: Digest16) -> Digest32:
175 v.status('Converting digest to base32')
176 process = subprocess.run(
177 ['nix', 'to-base32', '--type', 'sha256', digest16], capture_output=True)
178 v.result(process.returncode == 0)
179 return Digest32(process.stdout.decode().strip())
180
181
182def fetch_with_nix_prefetch_url(
183 v: Verification,
184 url: str,
185 digest: Digest16) -> str:
186 v.status('Fetching %s' % url)
187 process = subprocess.run(
188 ['nix-prefetch-url', '--print-path', url, digest], capture_output=True)
189 v.result(process.returncode == 0)
190 prefetch_digest, path, empty = process.stdout.decode().split('\n')
191 assert empty == ''
192 v.check("Verifying nix-prefetch-url's digest",
193 to_Digest16(v, Digest32(prefetch_digest)) == digest)
194 v.status("Verifying file digest")
195 file_digest = digest_file(path)
196 v.result(file_digest == digest)
197 return path
198
199
200def fetch_resources(v: Verification, channel: Channel) -> None:
201 for resource in ['git-revision', 'nixexprs.tar.xz']:
202 fields = channel.table[resource]
203 url = urllib.parse.urljoin(channel.forwarded_url, fields.url)
204 fields.file = fetch_with_nix_prefetch_url(v, url, fields.digest)
205 v.status('Verifying git commit on main page matches git commit in table')
206 v.result(
207 open(
208 channel.table['git-revision'].file).read(999) == channel.git_revision)
209
210
211def git_fetch(v: Verification, channel: Channel) -> None:
212 # It would be nice if we could share the nix git cache, but as of the time
213 # of writing it is transitioning from gitv2 (deprecated) to gitv3 (not ready
214 # yet), and trying to straddle them both is too far into nix implementation
215 # details for my comfort. So we re-implement here half of nix.fetchGit.
216 # :(
217
218 # TODO: Consider using pyxdg to find this path.
219 channel.git_cachedir = os.path.expanduser(
220 '~/.cache/nix-pin-channel/git/%s' %
221 digest_string(
222 channel.url.encode()))
223 if not os.path.exists(channel.git_cachedir):
224 v.status("Initializing git repo")
225 process = subprocess.run(
226 ['git', 'init', '--bare', channel.git_cachedir])
227 v.result(process.returncode == 0)
228
229 v.status('Checking if we already have this rev:')
230 process = subprocess.run(
231 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_revision])
232 if process.returncode == 0:
233 v.status('yes')
234 if process.returncode == 1:
235 v.status('no')
236 v.result(process.returncode == 0 or process.returncode == 1)
237 if process.returncode == 1:
238 v.status('Fetching ref "%s"' % channel.git_ref)
239 # We don't use --force here because we want to abort and freak out if forced
240 # updates are happening.
241 process = subprocess.run(['git',
242 '-C',
243 channel.git_cachedir,
244 'fetch',
245 channel.git_repo,
246 '%s:%s' % (channel.git_ref,
247 channel.git_ref)])
248 v.result(process.returncode == 0)
249 v.status('Verifying that fetch retrieved this rev')
250 process = subprocess.run(
251 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_revision])
252 v.result(process.returncode == 0)
253
254 v.status('Verifying rev is an ancestor of ref')
255 process = subprocess.run(['git',
256 '-C',
257 channel.git_cachedir,
258 'merge-base',
259 '--is-ancestor',
260 channel.git_revision,
261 channel.git_ref])
262 v.result(process.returncode == 0)
263
264
265def compare_tarball_and_git(
266 v: Verification,
267 channel: Channel,
268 channel_contents: str,
269 git_contents: str) -> None:
270 v.status('Comparing channel tarball with git checkout')
271 match, mismatch, errors = compare(os.path.join(
272 channel_contents, channel.release_name), git_contents)
273 v.ok()
274 v.check('%d files match' % len(match), len(match) > 0)
275 v.check('%d files differ' % len(mismatch), len(mismatch) == 0)
276 expected_errors = [
277 '.git-revision',
278 '.version-suffix',
279 'nixpkgs',
280 'programs.sqlite',
281 'svn-revision']
282 benign_errors = []
283 for ee in expected_errors:
284 if ee in errors:
285 errors.remove(ee)
286 benign_errors.append(ee)
287 v.check(
288 '%d unexpected incomparable files' %
289 len(errors),
290 len(errors) == 0)
291 v.check(
292 '(%d of %d expected incomparable files)' %
293 (len(benign_errors),
294 len(expected_errors)),
295 len(benign_errors) == len(expected_errors))
296
297
298def extract_tarball(v: Verification, channel: Channel, dest: str) -> None:
299 v.status('Extracting tarball %s' %
300 channel.table['nixexprs.tar.xz'].file)
301 shutil.unpack_archive(
302 channel.table['nixexprs.tar.xz'].file,
303 dest)
304 v.ok()
305
306
307def git_checkout(v: Verification, channel: Channel, dest: str) -> None:
308 v.status('Checking out corresponding git revision')
309 git = subprocess.Popen(['git',
310 '-C',
311 channel.git_cachedir,
312 'archive',
313 channel.git_revision],
314 stdout=subprocess.PIPE)
315 tar = subprocess.Popen(
316 ['tar', 'x', '-C', dest, '-f', '-'], stdin=git.stdout)
317 git.stdout.close()
318 tar.wait()
319 git.wait()
320 v.result(git.returncode == 0 and tar.returncode == 0)
321
322
323def check_channel_metadata(
324 v: Verification,
325 channel: Channel,
326 channel_contents: str) -> None:
327 v.status('Verifying git commit in channel tarball')
328 v.result(
329 open(
330 os.path.join(
331 channel_contents,
332 channel.release_name,
333 '.git-revision')).read(999) == channel.git_revision)
334
335 v.status(
336 'Verifying version-suffix is a suffix of release name %s:' %
337 channel.release_name)
338 version_suffix = open(
339 os.path.join(
340 channel_contents,
341 channel.release_name,
342 '.version-suffix')).read(999)
343 v.status(version_suffix)
344 v.result(channel.release_name.endswith(version_suffix))
345
346
347def check_channel_contents(v: Verification, channel: Channel) -> None:
348 with tempfile.TemporaryDirectory() as channel_contents, \
349 tempfile.TemporaryDirectory() as git_contents:
350
351 extract_tarball(v, channel, channel_contents)
352 check_channel_metadata(v, channel, channel_contents)
353
354 git_checkout(v, channel, git_contents)
355
356 compare_tarball_and_git(v, channel, channel_contents, git_contents)
357
358 v.status('Removing temporary directories')
359 v.ok()
360
361
362def main(argv: List[str]) -> None:
363 v = Verification()
364 config = configparser.ConfigParser()
365 config.read_file(open(argv[1]), argv[1])
366 for section in config.sections():
367 channel = Channel(**dict(config[section].items()))
368 fetch(v, channel)
369 parse_channel(v, channel)
370 fetch_resources(v, channel)
371 git_fetch(v, channel)
372 check_channel_contents(v, channel)
373 print(channel)
374
375
376main(sys.argv)