]>
git.scottworley.com Git - git-cache/blob - git_cache.py
1 # git-cache: Cache git content locally
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
8 # It would be nice if we could share the nix git cache, but as of the
9 # time of writing it is transitioning from gitv2 (deprecated) to gitv3
10 # (not ready yet), and trying to straddle them both is too far into nix
11 # implementation details for my comfort. So we re-implement here half of
12 # nix's builtins.fetchGit. :(
23 from typing
import Iterator
, NamedTuple
, Optional
, TypeVar
, Tuple
, Union
28 class GitCacheError(Exception):
32 Path
= str # eg: "/home/user/.cache/git-cache/v1"
33 Repo
= str # eg: "https://github.com/NixOS/nixpkgs.git"
34 Ref
= str # eg: "master" or "v1.0.0"
35 Rev
= str # eg: "53a27350551844e1ed1a9257690294767389ef0d"
36 RefOrRev
= Union
[Ref
, Rev
]
39 class _LogEntry(NamedTuple
):
47 def _repo_hashname(repo
: Repo
) -> str:
48 return hashlib
.sha256(repo
.encode()).hexdigest()
51 def git_cachedir(repo
: Repo
) -> Path
:
52 # Use xdg module when it's less painful to have as a dependency
53 XDG_CACHE_HOME
= Path(
54 os
.environ
.get('XDG_CACHE_HOME', os
.path
.expanduser('~/.cache')))
56 return Path(os
.path
.join(
59 _repo_hashname(repo
)))
62 def _log_filename(repo
: Repo
) -> Path
:
63 # Use xdg module when it's less painful to have as a dependency
65 os
.environ
.get('XDG_DATA_HOME', os
.path
.expanduser('~/.local/share')))
67 return Path(os
.path
.join(
70 _repo_hashname(repo
)))
73 def is_ancestor(repo
: Repo
, descendant
: RefOrRev
, ancestor
: RefOrRev
) -> bool:
74 cachedir
= git_cachedir(repo
)
75 logging
.debug('Checking if %s is an ancestor of %s', ancestor
, descendant
)
76 process
= subprocess
.run(['git',
84 return process
.returncode
== 0
91 force
: bool = False) -> None:
92 if not force
and not is_ancestor(repo
, descendant
, ancestor
):
93 raise GitCacheError(f
'{ancestor} is not an ancestor of {descendant}')
96 def _read_fetch_log(repo
: Repo
) -> Iterator
[_LogEntry
]:
97 filename
= _log_filename(repo
)
98 if not os
.path
.exists(filename
):
100 with open(filename
, 'r', encoding
='utf-8') as f
:
102 _
, _
, rev
, ref
= line
.strip().split(maxsplit
=3)
103 yield _LogEntry(ref
, rev
)
106 def _last(it
: Iterator
[T
]) -> Optional
[T
]:
107 return functools
.reduce(lambda a
, b
: b
, it
, None)
110 def _previous_fetched_rev(repo
: Repo
, ref
: Ref
) -> Optional
[Rev
]:
111 return _last(entry
.rev
for entry
in _read_fetch_log(
112 repo
) if entry
.ref
== ref
)
115 def _log_fetch(repo
: Repo
, ref
: Ref
, rev
: Rev
, force
: bool = False) -> None:
117 prev_rev
= _previous_fetched_rev(repo
, ref
)
118 if prev_rev
is not None:
119 verify_ancestry(repo
, rev
, prev_rev
)
120 filename
= _log_filename(repo
)
121 os
.makedirs(os
.path
.dirname(filename
), exist_ok
=True)
122 with open(filename
, 'a', encoding
='utf-8') as f
:
124 f
'{time.strftime("%Y-%m%d-%H:%M:%S%z")} '
125 f
'{"FORCEDFETCH" if force else "fetch"} {rev} {ref}\n'
129 def _show_force_warning() -> None:
131 **************************************************************************
132 * WARNING: git-cache INVOKED WITH --force! *
134 * This mode allows previously-fetched refs to be overwritten to point to *
135 * non-descendants -- commits that don't have the previous version of the *
136 * the ref in their history! *
138 * This should only be invoked by a human operator who knows what they're *
139 * doing to correct a specific, known, problem. Care should be taken to *
140 * prevent recurrence. *
142 * Press ^C to abort. *
144 ''', end
='', file=sys
.stderr
)
145 warn_time_override
= os
.environ
.get('FORCE_WARNING_TIME', None)
147 if warn_time_override
is None:
150 warn_time
= int(warn_time_override
)
152 '''* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
153 * !! WARNING DISPLAY TIME OVERRIDDEN !! *
155 * !! This message is intended to be displayed long enough for a !! *
156 * !! human operator to read it and have a chance to abort. An !! *
157 * !! override for the delay time is provided FOR THE UNIT TESTS !! *
158 * !! to avoid delaying software builds unnecessarily. This is !! *
159 * !! INTENDED FOR USE IN UNIT TESTS ONLY; THIS MESSAGE SHOULD !! *
160 * !! NEVER BE SEEN OUTSIDE BUILD LOGS! !! *
161 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *
163 ''', end
='', file=sys
.stderr
)
165 for i
in range(warn_time
, 0, -1):
166 msg
= f
'* {f"Continuing in {i} seconds...":-70s} *'
167 print(msg
, file=sys
.stderr
)
169 print('*' * 74, file=sys
.stderr
)
172 @backoff.on_exception(
174 subprocess
.CalledProcessError
,
175 max_time
=lambda: int(os
.environ
.get('BACKOFF_MAX_TIME', '30')))
180 force
: bool = False) -> None:
181 subprocess
.run(['git', '-C', cachedir
, 'fetch'] +
182 (['--force'] if force
else []) +
183 [repo
, f
'{ref}:{ref}'], check
=True)
186 def fetch(repo
: Repo
, ref
: Ref
, force
: bool = False) -> Tuple
[Path
, Rev
]:
188 _show_force_warning()
189 cachedir
= git_cachedir(repo
)
190 if not os
.path
.exists(cachedir
):
191 logging
.debug("Initializing git repo")
192 subprocess
.run(['git',
194 'init.defaultBranch=git-cache--no-default-branch',
201 logging
.debug('Fetching ref "%s" from %s', ref
, repo
)
202 _git_fetch(cachedir
, repo
, ref
, force
=force
)
204 with open(os
.path
.join(cachedir
, 'refs', 'heads', ref
), encoding
='utf-8') as rev_file
:
205 rev
= Rev(rev_file
.read(999).strip())
206 verify_ancestry(repo
, ref
, rev
, force
=force
)
207 _log_fetch(repo
, ref
, rev
, force
=force
)
212 def ensure_rev_available(
216 force
: bool = False) -> Path
:
217 cachedir
= git_cachedir(repo
)
218 if os
.path
.exists(cachedir
) and is_ancestor(repo
, ref
, rev
):
222 'We do not have rev %s. We will fetch ref "%s" and hope it appears.',
224 fetch(repo
, ref
, force
=force
)
225 logging
.debug('Verifying that fetch retrieved rev %s', rev
)
226 subprocess
.run(['git', '-C', cachedir
, 'cat-file', '-e', rev
], check
=True)
227 verify_ancestry(repo
, ref
, rev
, force
=force
)
233 parser
= argparse
.ArgumentParser(
234 description
='Cache remote git repositories locally.',
235 epilog
='example usage: git-cache https://github.com/NixOS/nixpkgs.git master')
239 help='Recover from a force-push in the remote repo')
244 help='Git repository URL')
249 help='Ref (branch or tag) in the git repo')
255 help='Ensure that this revision is present. ' +
256 'If this revision is already present locally, no network operations are performed.')
257 args
= parser
.parse_args()
260 cachedir
, rev
= fetch(args
.repo
, args
.ref
, force
=args
.force
)
261 print(f
'{rev} {cachedir}')
264 ensure_rev_available(