]> git.scottworley.com Git - pinch/blame_incremental - pinch.py
Verify channel tarball metadata
[pinch] / pinch.py
... / ...
CommitLineData
1import filecmp
2import functools
3import hashlib
4import operator
5import os
6import os.path
7import shutil
8import subprocess
9import tempfile
10import types
11import urllib.parse
12import urllib.request
13import xml.dom.minidom
14
15from typing import (
16 Dict,
17 Iterable,
18 List,
19 NewType,
20 Tuple,
21)
22
23Digest16 = NewType('Digest16', str)
24Digest32 = NewType('Digest32', str)
25
26
27class ChannelTableEntry(types.SimpleNamespace):
28 digest: Digest16
29 file: str
30 size: int
31 url: str
32
33
34class Channel(types.SimpleNamespace):
35 channel_html: bytes
36 forwarded_url: str
37 git_cachedir: str
38 git_ref: str
39 git_repo: str
40 git_revision: str
41 release_name: str
42 table: Dict[str, ChannelTableEntry]
43 url: str
44
45
46class VerificationError(Exception):
47 pass
48
49
50class Verification:
51
52 def __init__(self) -> None:
53 self.line_length = 0
54
55 def status(self, s: str) -> None:
56 print(s, end=' ', flush=True)
57 self.line_length += 1 + len(s) # Unicode??
58
59 @staticmethod
60 def _color(s: str, c: int) -> str:
61 return '\033[%2dm%s\033[00m' % (c, s)
62
63 def result(self, r: bool) -> None:
64 message, color = {True: ('OK ', 92), False: ('FAIL', 91)}[r]
65 length = len(message)
66 cols = shutil.get_terminal_size().columns
67 pad = (cols - (self.line_length + length)) % cols
68 print(' ' * pad + self._color(message, color))
69 self.line_length = 0
70 if not r:
71 raise VerificationError()
72
73 def check(self, s: str, r: bool) -> None:
74 self.status(s)
75 self.result(r)
76
77 def ok(self) -> None:
78 self.result(True)
79
80
81def compare(a: str, b: str) -> Tuple[List[str], List[str], List[str]]:
82
83 def throw(error: OSError) -> None:
84 raise error
85
86 def join(x: str, y: str) -> str:
87 return y if x == '.' else os.path.join(x, y)
88
89 def recursive_files(d: str) -> Iterable[str]:
90 all_files: List[str] = []
91 for path, dirs, files in os.walk(d, onerror=throw):
92 rel = os.path.relpath(path, start=d)
93 all_files.extend(join(rel, f) for f in files)
94 for dir_or_link in dirs:
95 if os.path.islink(join(path, dir_or_link)):
96 all_files.append(join(rel, dir_or_link))
97 return all_files
98
99 def exclude_dot_git(files: Iterable[str]) -> Iterable[str]:
100 return (f for f in files if not f.startswith('.git/'))
101
102 files = functools.reduce(
103 operator.or_, (set(
104 exclude_dot_git(
105 recursive_files(x))) for x in [a, b]))
106 return filecmp.cmpfiles(a, b, files, shallow=False)
107
108
109def fetch(v: Verification, channel: Channel) -> None:
110 v.status('Fetching channel')
111 request = urllib.request.urlopen(channel.url, timeout=10)
112 channel.channel_html = request.read()
113 channel.forwarded_url = request.geturl()
114 v.result(request.status == 200)
115 v.check('Got forwarded', channel.url != channel.forwarded_url)
116
117
118def parse_channel(v: Verification, channel: Channel) -> None:
119 v.status('Parsing channel description as XML')
120 d = xml.dom.minidom.parseString(channel.channel_html)
121 v.ok()
122
123 v.status('Extracting release name:')
124 title_name = d.getElementsByTagName(
125 'title')[0].firstChild.nodeValue.split()[2]
126 h1_name = d.getElementsByTagName('h1')[0].firstChild.nodeValue.split()[2]
127 v.status(title_name)
128 v.result(title_name == h1_name)
129 channel.release_name = title_name
130
131 v.status('Extracting git commit:')
132 git_commit_node = d.getElementsByTagName('tt')[0]
133 channel.git_commit = git_commit_node.firstChild.nodeValue
134 v.status(channel.git_commit)
135 v.ok()
136 v.status('Verifying git commit label')
137 v.result(git_commit_node.previousSibling.nodeValue == 'Git commit ')
138
139 v.status('Parsing table')
140 channel.table = {}
141 for row in d.getElementsByTagName('tr')[1:]:
142 name = row.childNodes[0].firstChild.firstChild.nodeValue
143 url = row.childNodes[0].firstChild.getAttribute('href')
144 size = int(row.childNodes[1].firstChild.nodeValue)
145 digest = Digest16(row.childNodes[2].firstChild.firstChild.nodeValue)
146 channel.table[name] = ChannelTableEntry(
147 url=url, digest=digest, size=size)
148 v.ok()
149
150
151def digest_string(s: bytes) -> Digest16:
152 return Digest16(hashlib.sha256(s).hexdigest())
153
154
155def digest_file(filename: str) -> Digest16:
156 hasher = hashlib.sha256()
157 with open(filename, 'rb') as f:
158 # pylint: disable=cell-var-from-loop
159 for block in iter(lambda: f.read(4096), b''):
160 hasher.update(block)
161 return Digest16(hasher.hexdigest())
162
163
164def to_Digest16(v: Verification, digest32: Digest32) -> Digest16:
165 v.status('Converting digest to base16')
166 process = subprocess.run(
167 ['nix', 'to-base16', '--type', 'sha256', digest32], capture_output=True)
168 v.result(process.returncode == 0)
169 return Digest16(process.stdout.decode().strip())
170
171
172def to_Digest32(v: Verification, digest16: Digest16) -> Digest32:
173 v.status('Converting digest to base32')
174 process = subprocess.run(
175 ['nix', 'to-base32', '--type', 'sha256', digest16], capture_output=True)
176 v.result(process.returncode == 0)
177 return Digest32(process.stdout.decode().strip())
178
179
180def fetch_with_nix_prefetch_url(
181 v: Verification,
182 url: str,
183 digest: Digest16) -> str:
184 v.status('Fetching %s' % url)
185 process = subprocess.run(
186 ['nix-prefetch-url', '--print-path', url, digest], capture_output=True)
187 v.result(process.returncode == 0)
188 prefetch_digest, path, empty = process.stdout.decode().split('\n')
189 assert empty == ''
190 v.check("Verifying nix-prefetch-url's digest",
191 to_Digest16(v, Digest32(prefetch_digest)) == digest)
192 v.status("Verifying file digest")
193 file_digest = digest_file(path)
194 v.result(file_digest == digest)
195 return path
196
197
198def fetch_resources(v: Verification, channel: Channel) -> None:
199 for resource in ['git-revision', 'nixexprs.tar.xz']:
200 fields = channel.table[resource]
201 url = urllib.parse.urljoin(channel.forwarded_url, fields.url)
202 fields.file = fetch_with_nix_prefetch_url(v, url, fields.digest)
203 v.status('Verifying git commit on main page matches git commit in table')
204 v.result(
205 open(
206 channel.table['git-revision'].file).read(999) == channel.git_commit)
207
208
209def git_fetch(v: Verification, channel: Channel) -> None:
210 # It would be nice if we could share the nix git cache, but as of the time
211 # of writing it is transitioning from gitv2 (deprecated) to gitv3 (not ready
212 # yet), and trying to straddle them both is too far into nix implementation
213 # details for my comfort. So we re-implement here half of nix.fetchGit.
214 # :(
215
216 # TODO: Consider using pyxdg to find this path.
217 channel.git_cachedir = os.path.expanduser(
218 '~/.cache/nix-pin-channel/git/%s' %
219 digest_string(
220 channel.url.encode()))
221 if not os.path.exists(channel.git_cachedir):
222 v.status("Initializing git repo")
223 process = subprocess.run(
224 ['git', 'init', '--bare', channel.git_cachedir])
225 v.result(process.returncode == 0)
226
227 v.status('Checking if we already have this rev:')
228 process = subprocess.run(
229 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_commit])
230 if process.returncode == 0:
231 v.status('yes')
232 if process.returncode == 1:
233 v.status('no')
234 v.result(process.returncode == 0 or process.returncode == 1)
235 if process.returncode == 1:
236 v.status('Fetching ref "%s"' % channel.git_ref)
237 # We don't use --force here because we want to abort and freak out if forced
238 # updates are happening.
239 process = subprocess.run(['git',
240 '-C',
241 channel.git_cachedir,
242 'fetch',
243 channel.git_repo,
244 '%s:%s' % (channel.git_ref,
245 channel.git_ref)])
246 v.result(process.returncode == 0)
247 v.status('Verifying that fetch retrieved this rev')
248 process = subprocess.run(
249 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_commit])
250 v.result(process.returncode == 0)
251
252 v.status('Verifying rev is an ancestor of ref')
253 process = subprocess.run(['git',
254 '-C',
255 channel.git_cachedir,
256 'merge-base',
257 '--is-ancestor',
258 channel.git_commit,
259 channel.git_ref])
260 v.result(process.returncode == 0)
261
262
263def compare_tarball_and_git(
264 v: Verification,
265 channel: Channel,
266 channel_contents: str,
267 git_contents: str) -> None:
268 v.status('Comparing channel tarball with git checkout')
269 match, mismatch, errors = compare(os.path.join(
270 channel_contents, channel.release_name), git_contents)
271 v.ok()
272 v.check('%d files match' % len(match), len(match) > 0)
273 v.check('%d files differ' % len(mismatch), len(mismatch) == 0)
274 expected_errors = [
275 '.git-revision',
276 '.version-suffix',
277 'nixpkgs',
278 'programs.sqlite',
279 'svn-revision']
280 benign_errors = []
281 for ee in expected_errors:
282 if ee in errors:
283 errors.remove(ee)
284 benign_errors.append(ee)
285 v.check(
286 '%d unexpected incomparable files' %
287 len(errors),
288 len(errors) == 0)
289 v.check(
290 '(%d of %d expected incomparable files)' %
291 (len(benign_errors),
292 len(expected_errors)),
293 len(benign_errors) == len(expected_errors))
294
295
296def extract_tarball(v: Verification, channel: Channel, dest: str) -> None:
297 v.status('Extracting tarball %s' %
298 channel.table['nixexprs.tar.xz'].file)
299 shutil.unpack_archive(
300 channel.table['nixexprs.tar.xz'].file,
301 dest)
302 v.ok()
303
304
305def git_checkout(v: Verification, channel: Channel, dest: str) -> None:
306 v.status('Checking out corresponding git revision')
307 git = subprocess.Popen(['git',
308 '-C',
309 channel.git_cachedir,
310 'archive',
311 channel.git_commit],
312 stdout=subprocess.PIPE)
313 tar = subprocess.Popen(
314 ['tar', 'x', '-C', dest, '-f', '-'], stdin=git.stdout)
315 git.stdout.close()
316 tar.wait()
317 git.wait()
318 v.result(git.returncode == 0 and tar.returncode == 0)
319
320
321def check_channel_metadata(
322 v: Verification,
323 channel: Channel,
324 channel_contents: str) -> None:
325 v.status('Verifying git commit in channel tarball')
326 v.result(
327 open(
328 os.path.join(
329 channel_contents,
330 channel.release_name,
331 '.git-revision')).read(999) == channel.git_commit)
332
333 v.status(
334 'Verifying version-suffix is a suffix of release name %s:' %
335 channel.release_name)
336 version_suffix = open(
337 os.path.join(
338 channel_contents,
339 channel.release_name,
340 '.version-suffix')).read(999)
341 v.status(version_suffix)
342 v.result(channel.release_name.endswith(version_suffix))
343
344
345def check_channel_contents(v: Verification, channel: Channel) -> None:
346 with tempfile.TemporaryDirectory() as channel_contents, \
347 tempfile.TemporaryDirectory() as git_contents:
348
349 extract_tarball(v, channel, channel_contents)
350 check_channel_metadata(v, channel, channel_contents)
351
352 git_checkout(v, channel, git_contents)
353
354 compare_tarball_and_git(v, channel, channel_contents, git_contents)
355
356 v.status('Removing temporary directories')
357 v.ok()
358
359
360def main() -> None:
361 v = Verification()
362 channel = Channel(url='https://channels.nixos.org/nixos-20.03',
363 git_repo='https://github.com/NixOS/nixpkgs.git',
364 git_ref='nixos-20.03')
365 fetch(v, channel)
366 parse_channel(v, channel)
367 fetch_resources(v, channel)
368 git_fetch(v, channel)
369 check_channel_contents(v, channel)
370 print(channel)
371
372
373main()