]> git.scottworley.com Git - annex-ec/blob - annex-ec
d569fcc301eaae92e0605d618db3a116702b4d45
[annex-ec] / annex-ec
1 #!/usr/bin/env bash
2
3 # annex-ec: Use erasure codes for more efficient storage use in git-annex
4 # Copyright (C) 2026 Scott Worley
5
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Affero General Public License as
8 # published by the Free Software Foundation, either version 3 of the
9 # License, or (at your option) any later version.
10
11 set -euo pipefail
12
13 die() {
14 echo "$*" >&2
15 exit 1
16 }
17
18
19 parse_volume_list() {
20 this_volume_name=$(join -j1 <(git config get annex.uuid) <(git cat-file -p git-annex:uuid.log|sort) | cut -d' ' -f2)
21 tmp=()
22 mapfile -d , tmp <<< "$1"
23 volumes=()
24 for x in "${tmp[@]}";do
25 x=${x%,}
26 x=${x%$'\n'}
27 if [[ "$x" ]];then
28 if [[ "$x" == "$this_volume_name" ]];then
29 volumes+=( "here" )
30 else
31 volumes+=( "$x" )
32 fi
33 fi
34 done
35 }
36
37 make_name() {
38 FILENAME_MAX=255
39 EXAMPLE_SUFFIX='.vol0000+9999.par2'
40 EXAMPLE_ANNEX_INTERNAL_PREFIX='ingest-'
41 EXAMPLE_ANNEX_INTERNAL_SUFFIX='-1-1173fd7'
42 num_separating_dashes=$((N - 1))
43 overhead=$(( ${#EXAMPLE_SUFFIX} + ${#EXAMPLE_ANNEX_INTERNAL_PREFIX} + ${#EXAMPLE_ANNEX_INTERNAL_SUFFIX} + num_separating_dashes ))
44 available=$((FILENAME_MAX - overhead))
45 len=$((available / N))
46 name=$(find "$@" -printf '%l\n' | sed -r 's/.*SHA256E-s[0-9]+--//;s/\..*//' | cut -c-$len | tr \\n -)
47 name=${name%-}
48 }
49
50 volumes=()
51 redundancy=1
52 block_size_is_a_multiple_of=4 # par2 requires that this be at least 4
53 blocks_per_file=10
54 while getopts b:m:r:v: opt;do
55 case $opt in
56 b) blocks_per_file=$OPTARG;;
57 m) block_size_is_a_multiple_of=$OPTARG;;
58 r) redundancy=$OPTARG;;
59 v) parse_volume_list "$OPTARG";;
60 *) echo 'usage: annex-ec [-v remote1,remote2,...] [-r N] file file...' >&2; exit 1;;
61 esac
62 done
63 shift $((OPTIND - 1))
64
65 if (( ${#volumes[@]} == 0 ));then
66 parse_volume_list "here,$(git remote | tr \\n ,)"
67 fi
68
69 N=$((${#volumes[@]} - redundancy))
70
71 (( $# == N )) || die "Expected $N files in this group ($N + $redundancy = ${#volumes[@]}), but got $#"
72
73
74 git annex get -- "$@"
75
76 max_size=$(find -L "$@" -printf '%s\n' | sort -nr | head -n1)
77 block_size=$(( ((max_size/(block_size_is_a_multiple_of*blocks_per_file))+1) * block_size_is_a_multiple_of))
78
79 make_name "$@"
80
81 if [[ ! -d ec ]];then
82 mkdir ec
83 # TODO: Make this robust against being interrupted here
84 echo '* annex.numcopies=1' >> ec/.gitattributes
85 git add ec/.gitattributes
86 fi
87
88 par2 c -u -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@"
89 rm "$name.par2"
90 mv "$name.vol"* ec/
91
92 i=0
93 for f;do
94 target_volume="${volumes[i]}"
95 for volume in "${volumes[@]}";do
96 if [[ "$volume" != here ]];then
97 if [[ "$volume" == "$target_volume" ]]; then
98 git annex copy --to "$volume" "$f"
99 fi
100 fi
101 done
102 i=$((i+1))
103 done
104
105 for f in ec/"$name.vol"*;do
106 target_volume="${volumes[i]}"
107 git annex add "$f"
108 if [[ "$target_volume" != here ]];then
109 git annex move --to "$target_volume" "$f"
110 fi
111 i=$((i+1))
112 done
113
114 for f;do
115 echo "${f// /[[:space:]]} annex.numcopies=1" >> .gitattributes
116 done
117
118 for volume in here "${volumes[@]}";do
119 i=0
120 to_drop=()
121 for f;do
122 target_volume="${volumes[i]}"
123 if [[ "$volume" != "$target_volume" ]]; then
124 to_drop+=( "$f" )
125 fi
126 i=$((i+1))
127 done
128 if [[ "$volume" == here ]];then
129 git annex drop "${to_drop[@]}"
130 else
131 git annex drop --from "$volume" "${to_drop[@]}"
132 fi
133 done
134 git add .gitattributes
135
136 (
137 flock 1
138 echo "$name"
139 for f;do
140 echo " $f"
141 done
142 ) >> ec/.meta
143 git add ec/.meta