]> git.scottworley.com Git - annex-ec/blob - annex-ec
660291936563690b10e82123ccf7c78371b65c1f
[annex-ec] / annex-ec
1 #!/usr/bin/env bash
2
3 # annex-ec: Use erasure codes for more efficient storage use in git-annex
4 # Copyright (C) 2026 Scott Worley
5
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as
8 # published by the Free Software Foundation, either version 3 of the
9 # License, or (at your option) any later version.
10
11 set -euo pipefail
12
13 die() {
14 echo "$*" >&2
15 exit 1
16 }
17
18
19 parse_volume_list() {
20 this_volume_name=$(join -j1 <(git config get annex.uuid) <(git cat-file -p git-annex:uuid.log|sort) | cut -d' ' -f2)
21 tmp=()
22 mapfile -d , tmp <<< "$1"
23 volumes=()
24 for x in "${tmp[@]}";do
25 x=${x%,}
26 x=${x%$'\n'}
27 if [[ "$x" ]];then
28 if [[ "$x" == "$this_volume_name" ]];then
29 volumes+=( "here" )
30 else
31 volumes+=( "$x" )
32 fi
33 fi
34 done
35 }
36
37 make_name() {
38 FILENAME_MAX=255
39 EXAMPLE_SUFFIX='.vol0000+9999.par2'
40 EXAMPLE_ANNEX_INTERNAL_PREFIX='ingest-'
41 EXAMPLE_ANNEX_INTERNAL_SUFFIX='-1-1173fd7'
42 num_separating_dashes=$((N - 1))
43 overhead=$(( ${#EXAMPLE_SUFFIX} + ${#EXAMPLE_ANNEX_INTERNAL_PREFIX} + ${#EXAMPLE_ANNEX_INTERNAL_SUFFIX} + num_separating_dashes ))
44 available=$((FILENAME_MAX - overhead))
45 len=$((available / N))
46 name=$(find "$@" -printf '%l\n' | sed -r 's/.*SHA256E-s[0-9]+--//;s/\..*//' | cut -c-$len | tr \\n -)
47 name=${name%-}
48 }
49
50 volumes=()
51 redundancy=1
52 max_block_size=$((128*1024*1024))
53 block_size_is_a_multiple_of=4 # par2 requires that this be at least 4
54 blocks_per_file=10
55 while getopts b:m:r:v:x: opt;do
56 case $opt in
57 b) blocks_per_file=$OPTARG;;
58 m) block_size_is_a_multiple_of=$OPTARG;;
59 r) redundancy=$OPTARG;;
60 v) parse_volume_list "$OPTARG";;
61 x) max_block_size=$OPTARG;;
62 *) echo 'usage: annex-ec [-v remote1,remote2,...] [-r N] [-b N] [-x N] [-m N] file file...' >&2; exit 1;;
63 esac
64 done
65 shift $((OPTIND - 1))
66
67 if (( ${#volumes[@]} == 0 ));then
68 parse_volume_list "here,$(git remote | tr \\n ,)"
69 fi
70
71 N=$((${#volumes[@]} - redundancy))
72
73 (( $# == N )) || die "Expected $N files in this group ($N + $redundancy = ${#volumes[@]}), but got $#"
74
75
76 git annex get -- "$@"
77
78 max_size=$(find -L "$@" -printf '%s\n' | sort -nr | head -n1)
79 while true;do
80 block_size=$(( ((max_size/(block_size_is_a_multiple_of*blocks_per_file))+1) * block_size_is_a_multiple_of))
81 if (( block_size < max_block_size ));then break;fi
82 blocks_per_file=$((blocks_per_file + 1))
83 done
84
85 make_name "$@"
86
87 if [[ ! -d ec ]];then
88 mkdir ec
89 # TODO: Make this robust against being interrupted here
90 echo '* annex.numcopies=1' >> ec/.gitattributes
91 git add ec/.gitattributes
92 fi
93
94 par2 c -u -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@"
95 rm "$name.par2"
96 mv "$name.vol"* ec/
97
98 i=0
99 for f;do
100 target_volume="${volumes[i]}"
101 for volume in "${volumes[@]}";do
102 if [[ "$volume" != here ]];then
103 if [[ "$volume" == "$target_volume" ]]; then
104 git annex copy --to "$volume" "$f"
105 fi
106 fi
107 done
108 i=$((i+1))
109 done
110
111 git annex add ec/"$name.vol"*
112 for f in ec/"$name.vol"*;do
113 target_volume="${volumes[i]}"
114 if [[ "$target_volume" != here ]];then
115 git annex move --to "$target_volume" "$f"
116 fi
117 i=$((i+1))
118 done
119
120 for f;do
121 f=${f//\\/\\\\}
122 f=${f//\[/\\[}
123 f=${f//\*/\\*}
124 f=${f//\?/\\?}
125 f=${f// /[[:space:]]}
126 echo "/$f annex.numcopies=1" >> .gitattributes
127 done
128
129 for volume in here "${volumes[@]}";do
130 i=0
131 to_drop=()
132 for f;do
133 target_volume="${volumes[i]}"
134 if [[ "$volume" != "$target_volume" ]]; then
135 to_drop+=( "$f" )
136 fi
137 i=$((i+1))
138 done
139 if [[ "$volume" == here ]];then
140 git annex drop "${to_drop[@]}"
141 else
142 git annex drop --from "$volume" "${to_drop[@]}"
143 fi
144 done
145 git add .gitattributes
146
147 (
148 flock 1
149 echo "$name"
150 for f;do
151 echo " $f"
152 done
153 ) >> ec/.meta
154 git add ec/.meta