]> git.scottworley.com Git - pinch/blame - pinch.py
Verify channel tarball metadata
[pinch] / pinch.py
CommitLineData
2f96f32a
SW
1import filecmp
2import functools
3import hashlib
4import operator
5import os
6import os.path
7import shutil
73bec7e8 8import subprocess
2f96f32a 9import tempfile
89e79125 10import types
2f96f32a
SW
11import urllib.parse
12import urllib.request
13import xml.dom.minidom
14
15from typing import (
2f96f32a
SW
16 Dict,
17 Iterable,
18 List,
73bec7e8 19 NewType,
2f96f32a
SW
20 Tuple,
21)
22
73bec7e8
SW
23Digest16 = NewType('Digest16', str)
24Digest32 = NewType('Digest32', str)
25
2f96f32a 26
72d3478a 27class ChannelTableEntry(types.SimpleNamespace):
73bec7e8 28 digest: Digest16
89e79125
SW
29 file: str
30 size: int
31 url: str
32
33
72d3478a 34class Channel(types.SimpleNamespace):
89e79125
SW
35 channel_html: bytes
36 forwarded_url: str
dc038df0
SW
37 git_cachedir: str
38 git_ref: str
39 git_repo: str
89e79125 40 git_revision: str
3e6421c4 41 release_name: str
72d3478a 42 table: Dict[str, ChannelTableEntry]
89e79125
SW
43 url: str
44
45
2f96f32a
SW
46class VerificationError(Exception):
47 pass
48
49
50class Verification:
51
52 def __init__(self) -> None:
53 self.line_length = 0
54
55 def status(self, s: str) -> None:
56 print(s, end=' ', flush=True)
57 self.line_length += 1 + len(s) # Unicode??
58
59 @staticmethod
60 def _color(s: str, c: int) -> str:
61 return '\033[%2dm%s\033[00m' % (c, s)
62
63 def result(self, r: bool) -> None:
64 message, color = {True: ('OK ', 92), False: ('FAIL', 91)}[r]
65 length = len(message)
66 cols = shutil.get_terminal_size().columns
67 pad = (cols - (self.line_length + length)) % cols
68 print(' ' * pad + self._color(message, color))
69 self.line_length = 0
70 if not r:
71 raise VerificationError()
72
73 def check(self, s: str, r: bool) -> None:
74 self.status(s)
75 self.result(r)
76
77 def ok(self) -> None:
78 self.result(True)
79
80
dc038df0 81def compare(a: str, b: str) -> Tuple[List[str], List[str], List[str]]:
2f96f32a
SW
82
83 def throw(error: OSError) -> None:
84 raise error
85
86 def join(x: str, y: str) -> str:
87 return y if x == '.' else os.path.join(x, y)
88
89 def recursive_files(d: str) -> Iterable[str]:
90 all_files: List[str] = []
91 for path, dirs, files in os.walk(d, onerror=throw):
92 rel = os.path.relpath(path, start=d)
93 all_files.extend(join(rel, f) for f in files)
94 for dir_or_link in dirs:
95 if os.path.islink(join(path, dir_or_link)):
96 all_files.append(join(rel, dir_or_link))
97 return all_files
98
99 def exclude_dot_git(files: Iterable[str]) -> Iterable[str]:
100 return (f for f in files if not f.startswith('.git/'))
101
102 files = functools.reduce(
103 operator.or_, (set(
104 exclude_dot_git(
105 recursive_files(x))) for x in [a, b]))
106 return filecmp.cmpfiles(a, b, files, shallow=False)
107
108
ca2c3edd 109def fetch(v: Verification, channel: Channel) -> None:
2f96f32a 110 v.status('Fetching channel')
ca2c3edd 111 request = urllib.request.urlopen(channel.url, timeout=10)
72d3478a
SW
112 channel.channel_html = request.read()
113 channel.forwarded_url = request.geturl()
2f96f32a 114 v.result(request.status == 200)
72d3478a 115 v.check('Got forwarded', channel.url != channel.forwarded_url)
2f96f32a
SW
116
117
72d3478a 118def parse_channel(v: Verification, channel: Channel) -> None:
2f96f32a 119 v.status('Parsing channel description as XML')
72d3478a 120 d = xml.dom.minidom.parseString(channel.channel_html)
2f96f32a
SW
121 v.ok()
122
3e6421c4
SW
123 v.status('Extracting release name:')
124 title_name = d.getElementsByTagName(
125 'title')[0].firstChild.nodeValue.split()[2]
126 h1_name = d.getElementsByTagName('h1')[0].firstChild.nodeValue.split()[2]
127 v.status(title_name)
128 v.result(title_name == h1_name)
72d3478a 129 channel.release_name = title_name
3e6421c4
SW
130
131 v.status('Extracting git commit:')
2f96f32a 132 git_commit_node = d.getElementsByTagName('tt')[0]
72d3478a
SW
133 channel.git_commit = git_commit_node.firstChild.nodeValue
134 v.status(channel.git_commit)
2f96f32a
SW
135 v.ok()
136 v.status('Verifying git commit label')
137 v.result(git_commit_node.previousSibling.nodeValue == 'Git commit ')
138
139 v.status('Parsing table')
72d3478a 140 channel.table = {}
2f96f32a
SW
141 for row in d.getElementsByTagName('tr')[1:]:
142 name = row.childNodes[0].firstChild.firstChild.nodeValue
143 url = row.childNodes[0].firstChild.getAttribute('href')
144 size = int(row.childNodes[1].firstChild.nodeValue)
73bec7e8 145 digest = Digest16(row.childNodes[2].firstChild.firstChild.nodeValue)
dc038df0
SW
146 channel.table[name] = ChannelTableEntry(
147 url=url, digest=digest, size=size)
2f96f32a
SW
148 v.ok()
149
150
dc038df0
SW
151def digest_string(s: bytes) -> Digest16:
152 return Digest16(hashlib.sha256(s).hexdigest())
153
154
73bec7e8
SW
155def digest_file(filename: str) -> Digest16:
156 hasher = hashlib.sha256()
157 with open(filename, 'rb') as f:
158 # pylint: disable=cell-var-from-loop
159 for block in iter(lambda: f.read(4096), b''):
160 hasher.update(block)
161 return Digest16(hasher.hexdigest())
162
163
164def to_Digest16(v: Verification, digest32: Digest32) -> Digest16:
165 v.status('Converting digest to base16')
166 process = subprocess.run(
167 ['nix', 'to-base16', '--type', 'sha256', digest32], capture_output=True)
168 v.result(process.returncode == 0)
169 return Digest16(process.stdout.decode().strip())
170
171
172def to_Digest32(v: Verification, digest16: Digest16) -> Digest32:
173 v.status('Converting digest to base32')
174 process = subprocess.run(
175 ['nix', 'to-base32', '--type', 'sha256', digest16], capture_output=True)
176 v.result(process.returncode == 0)
177 return Digest32(process.stdout.decode().strip())
178
179
180def fetch_with_nix_prefetch_url(
181 v: Verification,
182 url: str,
183 digest: Digest16) -> str:
184 v.status('Fetching %s' % url)
185 process = subprocess.run(
186 ['nix-prefetch-url', '--print-path', url, digest], capture_output=True)
187 v.result(process.returncode == 0)
188 prefetch_digest, path, empty = process.stdout.decode().split('\n')
189 assert empty == ''
190 v.check("Verifying nix-prefetch-url's digest",
191 to_Digest16(v, Digest32(prefetch_digest)) == digest)
192 v.status("Verifying file digest")
193 file_digest = digest_file(path)
194 v.result(file_digest == digest)
195 return path
2f96f32a 196
73bec7e8 197
72d3478a 198def fetch_resources(v: Verification, channel: Channel) -> None:
2f96f32a 199 for resource in ['git-revision', 'nixexprs.tar.xz']:
72d3478a
SW
200 fields = channel.table[resource]
201 url = urllib.parse.urljoin(channel.forwarded_url, fields.url)
73bec7e8
SW
202 fields.file = fetch_with_nix_prefetch_url(v, url, fields.digest)
203 v.status('Verifying git commit on main page matches git commit in table')
204 v.result(
205 open(
72d3478a 206 channel.table['git-revision'].file).read(999) == channel.git_commit)
2f96f32a
SW
207
208
dc038df0
SW
209def git_fetch(v: Verification, channel: Channel) -> None:
210 # It would be nice if we could share the nix git cache, but as of the time
211 # of writing it is transitioning from gitv2 (deprecated) to gitv3 (not ready
212 # yet), and trying to straddle them both is too far into nix implementation
213 # details for my comfort. So we re-implement here half of nix.fetchGit.
214 # :(
215
216 # TODO: Consider using pyxdg to find this path.
217 channel.git_cachedir = os.path.expanduser(
218 '~/.cache/nix-pin-channel/git/%s' %
219 digest_string(
220 channel.url.encode()))
221 if not os.path.exists(channel.git_cachedir):
222 v.status("Initializing git repo")
223 process = subprocess.run(
224 ['git', 'init', '--bare', channel.git_cachedir])
225 v.result(process.returncode == 0)
226
227 v.status('Checking if we already have this rev:')
228 process = subprocess.run(
229 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_commit])
230 if process.returncode == 0:
231 v.status('yes')
232 if process.returncode == 1:
233 v.status('no')
234 v.result(process.returncode == 0 or process.returncode == 1)
235 if process.returncode == 1:
236 v.status('Fetching ref "%s"' % channel.git_ref)
237 # We don't use --force here because we want to abort and freak out if forced
238 # updates are happening.
239 process = subprocess.run(['git',
240 '-C',
241 channel.git_cachedir,
242 'fetch',
243 channel.git_repo,
244 '%s:%s' % (channel.git_ref,
245 channel.git_ref)])
246 v.result(process.returncode == 0)
247 v.status('Verifying that fetch retrieved this rev')
248 process = subprocess.run(
249 ['git', '-C', channel.git_cachedir, 'cat-file', '-e', channel.git_commit])
250 v.result(process.returncode == 0)
251
252 v.status('Verifying rev is an ancestor of ref')
253 process = subprocess.run(['git',
254 '-C',
255 channel.git_cachedir,
256 'merge-base',
257 '--is-ancestor',
258 channel.git_commit,
259 channel.git_ref])
260 v.result(process.returncode == 0)
261
262
925c801b
SW
263def compare_tarball_and_git(
264 v: Verification,
265 channel: Channel,
266 channel_contents: str,
267 git_contents: str) -> None:
268 v.status('Comparing channel tarball with git checkout')
269 match, mismatch, errors = compare(os.path.join(
270 channel_contents, channel.release_name), git_contents)
271 v.ok()
272 v.check('%d files match' % len(match), len(match) > 0)
273 v.check('%d files differ' % len(mismatch), len(mismatch) == 0)
274 expected_errors = [
275 '.git-revision',
276 '.version-suffix',
277 'nixpkgs',
278 'programs.sqlite',
279 'svn-revision']
280 benign_errors = []
281 for ee in expected_errors:
282 if ee in errors:
283 errors.remove(ee)
284 benign_errors.append(ee)
285 v.check(
286 '%d unexpected incomparable files' %
287 len(errors),
288 len(errors) == 0)
289 v.check(
290 '(%d of %d expected incomparable files)' %
291 (len(benign_errors),
292 len(expected_errors)),
293 len(benign_errors) == len(expected_errors))
294
295
296def extract_tarball(v: Verification, channel: Channel, dest: str) -> None:
297 v.status('Extracting tarball %s' %
298 channel.table['nixexprs.tar.xz'].file)
299 shutil.unpack_archive(
300 channel.table['nixexprs.tar.xz'].file,
301 dest)
302 v.ok()
303
304
305def git_checkout(v: Verification, channel: Channel, dest: str) -> None:
306 v.status('Checking out corresponding git revision')
307 git = subprocess.Popen(['git',
308 '-C',
309 channel.git_cachedir,
310 'archive',
311 channel.git_commit],
312 stdout=subprocess.PIPE)
313 tar = subprocess.Popen(
314 ['tar', 'x', '-C', dest, '-f', '-'], stdin=git.stdout)
315 git.stdout.close()
316 tar.wait()
317 git.wait()
318 v.result(git.returncode == 0 and tar.returncode == 0)
319
320
f9cd7bdc
SW
321def check_channel_metadata(
322 v: Verification,
323 channel: Channel,
324 channel_contents: str) -> None:
325 v.status('Verifying git commit in channel tarball')
326 v.result(
327 open(
328 os.path.join(
329 channel_contents,
330 channel.release_name,
331 '.git-revision')).read(999) == channel.git_commit)
332
333 v.status(
334 'Verifying version-suffix is a suffix of release name %s:' %
335 channel.release_name)
336 version_suffix = open(
337 os.path.join(
338 channel_contents,
339 channel.release_name,
340 '.version-suffix')).read(999)
341 v.status(version_suffix)
342 v.result(channel.release_name.endswith(version_suffix))
343
344
72d3478a 345def check_channel_contents(v: Verification, channel: Channel) -> None:
dc038df0
SW
346 with tempfile.TemporaryDirectory() as channel_contents, \
347 tempfile.TemporaryDirectory() as git_contents:
925c801b
SW
348
349 extract_tarball(v, channel, channel_contents)
f9cd7bdc
SW
350 check_channel_metadata(v, channel, channel_contents)
351
925c801b
SW
352 git_checkout(v, channel, git_contents)
353
354 compare_tarball_and_git(v, channel, channel_contents, git_contents)
355
dc038df0 356 v.status('Removing temporary directories')
2f96f32a
SW
357 v.ok()
358
359
360def main() -> None:
361 v = Verification()
dc038df0
SW
362 channel = Channel(url='https://channels.nixos.org/nixos-20.03',
363 git_repo='https://github.com/NixOS/nixpkgs.git',
364 git_ref='nixos-20.03')
ca2c3edd 365 fetch(v, channel)
72d3478a
SW
366 parse_channel(v, channel)
367 fetch_resources(v, channel)
dc038df0 368 git_fetch(v, channel)
72d3478a
SW
369 check_channel_contents(v, channel)
370 print(channel)
2f96f32a
SW
371
372
373main()