]>
Commit | Line | Data |
---|---|---|
1 | # nix-profile-gc: More gently remove old profiles | |
2 | # Copyright (C) 2022 Scott Worley <scottworley@scottworley.com> | |
3 | # | |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU General Public License as published by | |
6 | # the Free Software Foundation, version 3. | |
7 | # | |
8 | # This program is distributed in the hope that it will be useful, | |
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 | # GNU General Public License for more details. | |
12 | # | |
13 | # You should have received a copy of the GNU General Public License | |
14 | # along with this program. If not, see <https://www.gnu.org/licenses/>. | |
15 | ||
16 | { lib, config, pkgs, ... }: | |
17 | let | |
18 | inherit (lib) escapeShellArg; | |
19 | cfg = config.nix.profile-gc; | |
20 | parse-duration = duration: pkgs.runCommand "duration" { buildInputs = with pkgs; [ systemd ]; } '' | |
21 | set -euo pipefail | |
22 | parsed=$(systemd-analyze timespan ${escapeShellArg duration} | awk '$1 == "μs:" { print $2 }') | |
23 | echo "$parsed" > "$out" | |
24 | ''; | |
25 | in { | |
26 | options = { | |
27 | nix.profile-gc = { | |
28 | enable = lib.mkEnableOption "Automatic profile garbage collection"; | |
29 | dryRun = lib.mkOption { | |
30 | description = "Say what would have been deleted rather than actually deleting profiles"; | |
31 | type = lib.types.bool; | |
32 | default = false; | |
33 | }; | |
34 | keepLast = lib.mkOption { | |
35 | description = '' | |
36 | Number of recent profiles to keep. | |
37 | This control is similar to nix-env --delete-generation's +5 syntax. | |
38 | ''; | |
39 | type = lib.types.ints.unsigned; | |
40 | default = 5; | |
41 | }; | |
42 | keepLastActive = lib.mkOption { | |
43 | description = "Number of recent active profiles to keep"; | |
44 | type = lib.types.ints.unsigned; | |
45 | default = 5; | |
46 | }; | |
47 | keepLastActiveSystem = lib.mkOption { | |
48 | description = "Number of recent active system profiles to keep"; | |
49 | type = lib.types.ints.unsigned; | |
50 | default = 5; | |
51 | }; | |
52 | keepLastActiveBoot = lib.mkOption { | |
53 | description = "Number of recent active boot profiles to keep"; | |
54 | type = lib.types.ints.unsigned; | |
55 | default = 3; | |
56 | }; | |
57 | activeThreshold = lib.mkOption { | |
58 | description = '' | |
59 | A system profile that is active (or is either /run/current-system or /run/booted-system) | |
60 | for at least this long (of powered-on machine time) is considered 'active' for | |
61 | the purpose of evaluating the keepLastActive number of profiles. This mechanism is | |
62 | intended to preserve profiles that are in some sense stable, that have served us well, | |
63 | so they don't immediately become gc-elligible when a system hasn't been updated in | |
64 | awhile (so keepLatest won't protect them) generates a bunch of broken profiles (so | |
65 | keepLast won't protect them) while trying to get up to date. | |
66 | ||
67 | This threshold is approximate, see activeMeasurementGranularity. | |
68 | Do not set less than activeMeasurementGranularity! | |
69 | ''; | |
70 | # We admonish the user "Do not set less than activeMeasurementGranularity!" and check | |
71 | # it at runtime rather than verifying this with an assertion at evaluation time because | |
72 | # parsing these durations at evaluation-time requires import-from-derivation, which we | |
73 | # want to avoid. :( | |
74 | type = lib.types.str; | |
75 | default = "5 days"; | |
76 | }; | |
77 | activeMeasurementGranularity = lib.mkOption { | |
78 | description = '' | |
79 | How often to make a note of the currently-active profiles. This is the useful | |
80 | granularity and minimum value of activeThreshold. | |
81 | ''; | |
82 | default = "1 hour"; | |
83 | }; | |
84 | keepLatest = lib.mkOption { | |
85 | description = '' | |
86 | Keep all profiles younger than this duration (systemd.time format). | |
87 | This control is similar to nix-collect-garbage's --delete-older-than. | |
88 | ''; | |
89 | type = lib.types.str; | |
90 | default = "6 months"; | |
91 | }; | |
92 | keepFuture = lib.mkOption { | |
93 | description = "Keep profiles 'ahead' of the current profile (happens after rollback)"; | |
94 | type = lib.types.bool; | |
95 | default = true; | |
96 | }; | |
97 | logdir = lib.mkOption { | |
98 | description = "Where to keep liveness logs"; | |
99 | type = lib.types.str; | |
100 | default = "/var/log/profile-gc"; | |
101 | }; | |
102 | }; | |
103 | }; | |
104 | config = lib.mkIf cfg.enable { | |
105 | assertions = [ | |
106 | { | |
107 | assertion = cfg.enable -> config.nix.gc.automatic; | |
108 | message = ''nix.profile-gc.enable requires nix.gc.automatic''; | |
109 | } | |
110 | ]; | |
111 | systemd.services.nix-gc.serviceConfig.ExecStartPre = pkgs.writeShellScript "nix-profile-gc" '' | |
112 | set -euo pipefail | |
113 | ||
114 | if [[ ! -e ${cfg.logdir}/active-system | |
115 | || ! -e ${cfg.logdir}/active-boot | |
116 | || ! -e ${cfg.logdir}/active-profiles ]] | |
117 | then | |
118 | echo "Liveness logs not found. Not doing any profile garbage collection." >&2 | |
119 | exit 0 | |
120 | fi | |
121 | ||
122 | alive_threshold="$(< ${parse-duration cfg.activeThreshold})" | |
123 | alive_loginterval="$(< ${parse-duration cfg.activeMeasurementGranularity})" | |
124 | if (( alive_threshold < alive_loginterval ));then | |
125 | echo "Liveness threshold is too low. Not doing any profile garbage collection." >&2 | |
126 | exit 0 | |
127 | fi | |
128 | ||
129 | topn() { | |
130 | ${pkgs.coreutils}/bin/tac "$1" | | |
131 | ${pkgs.gawk}/bin/awk \ | |
132 | --assign key="$2" \ | |
133 | --assign n="$3" \ | |
134 | --assign threshold="$alive_threshold" \ | |
135 | --assign loginterval="$alive_loginterval" \ | |
136 | ' | |
137 | !key || $1 == key { | |
138 | val = key ? $2 : $1 | |
139 | if (++count[val] == int(threshold/loginterval)) { | |
140 | print val | |
141 | if (++found == n) { | |
142 | exit 0 | |
143 | } | |
144 | } | |
145 | } | |
146 | ' | |
147 | } | |
148 | ||
149 | verbose_topn() { | |
150 | topn "$@" | tee >( | |
151 | echo "Keeping the last $3 $2 entries from $1:" >&2 | |
152 | ${pkgs.gawk}/bin/gawk '{ print " " $0 }' >&2 ) | |
153 | } | |
154 | ||
155 | declare -A active_targets | |
156 | while read target;do | |
157 | active_targets[$target]=1 | |
158 | done < <( | |
159 | verbose_topn ${cfg.logdir}/active-system "" ${escapeShellArg cfg.keepLastActiveSystem} | |
160 | verbose_topn ${cfg.logdir}/active-boot "" ${escapeShellArg cfg.keepLastActiveBoot } | |
161 | ) | |
162 | ||
163 | now=$(${pkgs.coreutils}/bin/date +%s) | |
164 | age_threshold="$(< ${parse-duration cfg.keepLatest})" | |
165 | while read profile;do | |
166 | echo "Contemplating profiles for $profile:" >&2 | |
167 | unset active | |
168 | declare -A active | |
169 | while read p;do | |
170 | active[$p]=1 | |
171 | done < <(verbose_topn ${cfg.logdir}/active-profiles "$profile" ${escapeShellArg cfg.keepLastActive}) | |
172 | current=$(${pkgs.coreutils}/bin/readlink "$profile") | |
173 | currentgen=''${current%-link} | |
174 | currentgen=''${currentgen##*-} | |
175 | for p in "$profile"-*-link;do | |
176 | pgen=''${p%-link} | |
177 | pgen=''${pgen##*-} | |
178 | if [[ "$p" != "$profile-$pgen-link" ]];then | |
179 | echo "(Disregarding unrelated profile $p)" >&2 | |
180 | continue | |
181 | fi | |
182 | pname=$(${pkgs.coreutils}/bin/basename "$p") | |
183 | if [[ "$pname" == "$current" ]];then | |
184 | echo "Keeeping current profile $p" >&2 | |
185 | continue | |
186 | fi | |
187 | if [[ "''${active_targets[$(${pkgs.coreutils}/bin/readlink "$p")]:-}" ]];then | |
188 | echo "Keeeping active system/boot profile $p" >&2 | |
189 | continue | |
190 | fi | |
191 | if [[ "''${active[$pname]:-}" ]];then | |
192 | echo "Keeeping active profile $p" >&2 | |
193 | continue | |
194 | fi | |
195 | if (( (now - "$(${pkgs.findutils}/bin/find "$p" -printf %Ts)") < age_threshold/1000000 ));then | |
196 | echo "Keeeping young profile $p" >&2 | |
197 | continue | |
198 | fi | |
199 | ${lib.optionalString cfg.keepFuture '' | |
200 | if (( pgen > currentgen ));then | |
201 | echo "Keeeping future profile $p" >&2 | |
202 | continue | |
203 | fi | |
204 | ''} | |
205 | ${if cfg.dryRun then '' | |
206 | echo "Would remove profile $p" >&2 | |
207 | '' else '' | |
208 | echo "Removing profile $p" >&2 | |
209 | rm "$p" | |
210 | ''} | |
211 | done | |
212 | done < <(${pkgs.findutils}/bin/find ''${NIX_STATE_DIR:-/nix/var/nix}/profiles/ -type l -not -name '*[0-9]-link') | |
213 | ''; | |
214 | systemd.timers.profile-gc-log-active = { | |
215 | wantedBy = [ "timers.target" ]; | |
216 | timerConfig.OnActiveSec = cfg.activeMeasurementGranularity; | |
217 | timerConfig.OnUnitActiveSec = cfg.activeMeasurementGranularity; | |
218 | }; | |
219 | systemd.services.profile-gc-log-active = { | |
220 | description = | |
221 | "Log the active profiles for gc collection policy evaluation"; | |
222 | serviceConfig.Type = "oneshot"; | |
223 | script = '' | |
224 | ${pkgs.coreutils}/bin/mkdir -p ${cfg.logdir} | |
225 | ${pkgs.coreutils}/bin/readlink /run/current-system >> ${cfg.logdir}/active-system | |
226 | ${pkgs.coreutils}/bin/readlink /run/booted-system >> ${cfg.logdir}/active-boot | |
227 | ${pkgs.findutils}/bin/find ''${NIX_STATE_DIR:-/nix/var/nix}/profiles/ \ | |
228 | -type l -not -name '*[0-9]-link' \ | |
229 | -exec ${pkgs.stdenv.shell} -c ' | |
230 | for f;do | |
231 | echo -n "$f " | |
232 | ${pkgs.coreutils}/bin/readlink "$f" | |
233 | done' - {} + \ | |
234 | >> ${cfg.logdir}/active-profiles | |
235 | ''; | |
236 | }; | |
237 | }; | |
238 | } |