#!/usr/bin/env bash # annex-ec: Use erasure codes for more efficient storage use in git-annex # Copyright (C) 2026 Scott Worley # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. set -euo pipefail die() { echo "$*" >&2 exit 1 } parse_volume_list() { this_volume_name=$(join -j1 <(git config get annex.uuid) <(git cat-file -p git-annex:uuid.log|sort) | cut -d' ' -f2) tmp=() mapfile -d , tmp <<< "$1" volumes=() for x in "${tmp[@]}";do x=${x%,} x=${x%$'\n'} if [[ "$x" ]];then if [[ "$x" == "$this_volume_name" ]];then volumes+=( "here" ) else volumes+=( "$x" ) fi fi done } make_name() { FILENAME_MAX=255 EXAMPLE_SUFFIX='.vol0000+9999.par2' EXAMPLE_ANNEX_INTERNAL_PREFIX='ingest-' EXAMPLE_ANNEX_INTERNAL_SUFFIX='-1-1173fd7' num_separating_dashes=$((N - 1)) overhead=$(( ${#EXAMPLE_SUFFIX} + ${#EXAMPLE_ANNEX_INTERNAL_PREFIX} + ${#EXAMPLE_ANNEX_INTERNAL_SUFFIX} + num_separating_dashes )) available=$((FILENAME_MAX - overhead)) len=$((available / N)) name=$(find "$@" -printf '%l\n' | sed -r 's/.*SHA256E-s[0-9]+--//;s/\..*//' | cut -c-$len | tr \\n -) name=${name%-} } volumes=() redundancy=1 max_block_size=$((128*1024*1024)) block_size_is_a_multiple_of=4 # par2 requires that this be at least 4 blocks_per_file=10 while getopts b:m:r:v:x: opt;do case $opt in b) blocks_per_file=$OPTARG;; m) block_size_is_a_multiple_of=$OPTARG;; r) redundancy=$OPTARG;; v) parse_volume_list "$OPTARG";; x) max_block_size=$OPTARG;; *) echo 'usage: annex-ec [-v remote1,remote2,...] [-r N] [-b N] [-x N] [-m N] file file...' >&2; exit 1;; esac done shift $((OPTIND - 1)) if (( ${#volumes[@]} == 0 ));then parse_volume_list "here,$(git remote | tr \\n ,)" fi N=$((${#volumes[@]} - redundancy)) (( $# == N )) || die "Expected $N files in this group ($N + $redundancy = ${#volumes[@]}), but got $#" git annex get -- "$@" max_size=$(find -L "$@" -printf '%s\n' | sort -nr | head -n1) while true;do block_size=$(( ((max_size/(block_size_is_a_multiple_of*blocks_per_file))+1) * block_size_is_a_multiple_of)) if (( block_size < max_block_size ));then break;fi blocks_per_file=$((blocks_per_file + 1)) done make_name "$@" if [[ ! -d ec ]];then mkdir ec # TODO: Make this robust against being interrupted here echo '* annex.numcopies=1' >> ec/.gitattributes git add ec/.gitattributes fi par2 c -u -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@" rm "$name.par2" mv "$name.vol"* ec/ i=0 for f;do target_volume="${volumes[i]}" for volume in "${volumes[@]}";do if [[ "$volume" != here ]];then if [[ "$volume" == "$target_volume" ]]; then git annex copy --to "$volume" "$f" fi fi done i=$((i+1)) done for f in ec/"$name.vol"*;do target_volume="${volumes[i]}" git annex add "$f" if [[ "$target_volume" != here ]];then git annex move --to "$target_volume" "$f" fi i=$((i+1)) done for f;do echo "${f// /[[:space:]]} annex.numcopies=1" >> .gitattributes done for volume in here "${volumes[@]}";do i=0 to_drop=() for f;do target_volume="${volumes[i]}" if [[ "$volume" != "$target_volume" ]]; then to_drop+=( "$f" ) fi i=$((i+1)) done if [[ "$volume" == here ]];then git annex drop "${to_drop[@]}" else git annex drop --from "$volume" "${to_drop[@]}" fi done git add .gitattributes ( flock 1 echo "$name" for f;do echo " $f" done ) >> ec/.meta git add ec/.meta