--- /dev/null
+#!/usr/bin/env bash
+
+# annex-ec: Use erasure codes for more efficient storage use in git-annex
+# Copyright (C) 2026 Scott Worley
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+set -euo pipefail
+
+die() {
+ echo "$*" >&2
+ exit 1
+}
+
+find_recovery_set_files() {
+ local found
+ found=false
+ recovery_set_files=()
+ while IFS= read -r line;do
+ if [[ "$line" == ' '* ]];then
+ recovery_set_files+=( "${line# }" )
+ if [[ "${line# }" == "$1" ]];then
+ found=true
+ fi
+ else
+ if $found;then
+ break
+ fi
+ recovery_set_name=$line
+ recovery_set_files=()
+ fi
+ done < ec/.meta
+ if ! $found;then
+ die "Could not find a recovery set containing file $1"
+ fi
+}
+
+while [[ "$1" == -- ]];do shift; done
+
+for target;do
+ if [[ -e "$target" ]];then continue; fi
+ set +e
+ git annex get "$target"
+ set -e
+ if [[ -e "$target" ]];then continue; fi
+
+ find_recovery_set_files "$target"
+
+ set +e
+ git annex get "${recovery_set_files[@]}" ec/"$recovery_set_name"*
+ set -e
+
+ # par2 doesn't like directory structure, so we work from a temp dir where we can flatten everything
+ srcdir=$PWD
+ flat="$(mktemp -d)"
+ pushd "$flat"
+
+ for f in "${recovery_set_files[@]}";do
+ ln -s "$srcdir"/"$f" .
+ done
+ ln -s "$srcdir"/ec/"$recovery_set_name"* .
+
+ # We have to remove the broken symlinks or par2 gets confused trying to write to them
+ find -L . -type l -exec rm -v {} +
+
+ par2 r -- *.par2
+
+ popd
+ for f in "${recovery_set_files[@]}";do
+ git annex reinject "$flat"/"$(basename "$f")" "$f"
+ done
+done
for already_deleted in "${deleted_vols[@]}";do
if [[ "$vol" == "$already_deleted" ]];then continue 2; fi
done
- git -C "$(vol_dir "$vol")" annex fsck
+ pushd "$(vol_dir "$vol")"
+ git annex fsck -- *-*
+ popd
done
}
echo "$f"
}
-choose_volumes() {
+some_random_volume_names() {
x=$(for vol in "${vols[@]}";do
vol_name "$vol"
done | shuf | head -n "$1" | tr \\n ,)
echo "${x%,}"
}
+a_random_volume_dir() {
+ while true;do
+ vol="${vols[$RANDOM % $num_vols]}"
+ for already_deleted in "${deleted_vols[@]}";do
+ if [[ "$vol" == "$already_deleted" ]];then continue 2; fi
+ done
+ vol_dir "$vol"
+ break
+ done
+}
+
MIN_REDUNDANCY=1
MIN_FILES=2 # If you only have one file in a group, you'd just make copies of it, no need for annex-ec
MIN_VOLUMES=$((MIN_REDUNDANCY + MIN_FILES))
done
sync_everything
sync_everything
- pushd "$(vol_dir "${vols[$RANDOM % $num_vols]}")"
- cmd=(annex-ec -r "$redundancy" -v "$(choose_volumes $((num_files+redundancy)))" "${files[@]}")
+ pushd "$(a_random_volume_dir)"
+ cmd=(annex-ec -r "$redundancy" -v "$(some_random_volume_names $((num_files+redundancy)))" "${files[@]}")
echo "In $PWD , running ${cmd[*]}" >&2
"${cmd[@]}"
popd
sync_everything
+ sync_everything
fsck_everything
+
delete_some_test_vols "$redundancy"
- # TODO: Recover
+ pushd "$(a_random_volume_dir)"
+ annex-ec-recover -- *-*
+ popd
+
+ sync_everything
sync_everything
- # fsck_everything # Skip this check until recovery is implemented
+ fsck_everything
delete_all_test_vols
done
done