#!/usr/bin/env bash
+# annex-ec: Use erasure codes for more efficient storage use in git-annex
+# Copyright (C) 2026 Scott Worley
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
set -euo pipefail
die() {
overhead=$(( ${#EXAMPLE_SUFFIX} + ${#EXAMPLE_ANNEX_INTERNAL_PREFIX} + ${#EXAMPLE_ANNEX_INTERNAL_SUFFIX} + num_separating_dashes ))
available=$((FILENAME_MAX - overhead))
len=$((available / N))
- name=$(find "$@" -printf '%l\n' | sed -r 's/.*SHA256E-s[0-9]+--//;s/\..*//' | cut -c-$len | tr \\n -)
- name=${name%-}
+ name=$(find "$@" -printf '%l\n' | sed -r 's/.*SHA256E-s[0-9]+--//;s/\..*//' | cut -c-$len | tr \\n -)
+ name=${name%-}
}
volumes=()
redundancy=1
+max_block_size=$((128*1024*1024))
block_size_is_a_multiple_of=4 # par2 requires that this be at least 4
blocks_per_file=10
-while getopts b:m:r:v: opt;do
+while getopts b:m:r:v:x: opt;do
case $opt in
b) blocks_per_file=$OPTARG;;
m) block_size_is_a_multiple_of=$OPTARG;;
r) redundancy=$OPTARG;;
v) parse_volume_list "$OPTARG";;
- *) echo 'usage: annex-ec [-v remote1,remote2,...] [-r N] file file...' >&2; exit 1;;
+ x) max_block_size=$OPTARG;;
+ *) echo 'usage: annex-ec [-v remote1,remote2,...] [-r N] [-b N] [-x N] [-m N] file file...' >&2; exit 1;;
esac
done
shift $((OPTIND - 1))
git annex get -- "$@"
max_size=$(find -L "$@" -printf '%s\n' | sort -nr | head -n1)
-block_size=$(( ((max_size/(block_size_is_a_multiple_of*blocks_per_file))+1) * block_size_is_a_multiple_of))
+while true;do
+ block_size=$(( ((max_size/(block_size_is_a_multiple_of*blocks_per_file))+1) * block_size_is_a_multiple_of))
+ if (( block_size < max_block_size ));then break;fi
+ blocks_per_file=$((blocks_per_file + 1))
+done
make_name "$@"
git add ec/.gitattributes
fi
-par2 c -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@"
+par2 c -u -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@"
rm "$name.par2"
mv "$name.vol"* ec/
i=$((i+1))
done
-i=0
for f;do
- target_volume="${volumes[i]}"
echo "${f// /[[:space:]]} annex.numcopies=1" >> .gitattributes
- for volume in here "${volumes[@]}";do
+done
+
+for volume in here "${volumes[@]}";do
+ i=0
+ to_drop=()
+ for f;do
+ target_volume="${volumes[i]}"
if [[ "$volume" != "$target_volume" ]]; then
- if [[ "$volume" == here ]];then
- git annex drop "$f"
- else
- git annex drop --from "$volume" "$f"
- fi
+ to_drop+=( "$f" )
fi
+ i=$((i+1))
done
- i=$((i+1))
+ if [[ "$volume" == here ]];then
+ git annex drop "${to_drop[@]}"
+ else
+ git annex drop --from "$volume" "${to_drop[@]}"
+ fi
done
git add .gitattributes