From: Scott Worley Date: Tue, 17 Mar 2026 03:11:19 +0000 (-0700) Subject: Implement recovery X-Git-Url: http://git.scottworley.com/annex-ec/commitdiff_plain/fe6bf209a525eb54b1124271ee5063a3de44072b Implement recovery --- diff --git a/annex-ec b/annex-ec index 78f3d0d..a87636f 100755 --- a/annex-ec +++ b/annex-ec @@ -85,7 +85,7 @@ if [[ ! -d ec ]];then git add ec/.gitattributes fi -par2 c -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@" +par2 c -u -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@" rm "$name.par2" mv "$name.vol"* ec/ diff --git a/annex-ec-recover b/annex-ec-recover new file mode 100755 index 0000000..3e45653 --- /dev/null +++ b/annex-ec-recover @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +# annex-ec: Use erasure codes for more efficient storage use in git-annex +# Copyright (C) 2026 Scott Worley + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +set -euo pipefail + +die() { + echo "$*" >&2 + exit 1 +} + +find_recovery_set_files() { + local found + found=false + recovery_set_files=() + while IFS= read -r line;do + if [[ "$line" == ' '* ]];then + recovery_set_files+=( "${line# }" ) + if [[ "${line# }" == "$1" ]];then + found=true + fi + else + if $found;then + break + fi + recovery_set_name=$line + recovery_set_files=() + fi + done < ec/.meta + if ! $found;then + die "Could not find a recovery set containing file $1" + fi +} + +while [[ "$1" == -- ]];do shift; done + +for target;do + if [[ -e "$target" ]];then continue; fi + set +e + git annex get "$target" + set -e + if [[ -e "$target" ]];then continue; fi + + find_recovery_set_files "$target" + + set +e + git annex get "${recovery_set_files[@]}" ec/"$recovery_set_name"* + set -e + + # par2 doesn't like directory structure, so we work from a temp dir where we can flatten everything + srcdir=$PWD + flat="$(mktemp -d)" + pushd "$flat" + + for f in "${recovery_set_files[@]}";do + ln -s "$srcdir"/"$f" . + done + ln -s "$srcdir"/ec/"$recovery_set_name"* . + + # We have to remove the broken symlinks or par2 gets confused trying to write to them + find -L . -type l -exec rm -v {} + + + par2 r -- *.par2 + + popd + for f in "${recovery_set_files[@]}";do + git annex reinject "$flat"/"$(basename "$f")" "$f" + done +done diff --git a/annex-ec-test b/annex-ec-test index 20363f8..3ba2006 100755 --- a/annex-ec-test +++ b/annex-ec-test @@ -55,7 +55,9 @@ fsck_everything() { for already_deleted in "${deleted_vols[@]}";do if [[ "$vol" == "$already_deleted" ]];then continue 2; fi done - git -C "$(vol_dir "$vol")" annex fsck + pushd "$(vol_dir "$vol")" + git annex fsck -- *-* + popd done } @@ -107,13 +109,24 @@ make_test_file() { echo "$f" } -choose_volumes() { +some_random_volume_names() { x=$(for vol in "${vols[@]}";do vol_name "$vol" done | shuf | head -n "$1" | tr \\n ,) echo "${x%,}" } +a_random_volume_dir() { + while true;do + vol="${vols[$RANDOM % $num_vols]}" + for already_deleted in "${deleted_vols[@]}";do + if [[ "$vol" == "$already_deleted" ]];then continue 2; fi + done + vol_dir "$vol" + break + done +} + MIN_REDUNDANCY=1 MIN_FILES=2 # If you only have one file in a group, you'd just make copies of it, no need for annex-ec MIN_VOLUMES=$((MIN_REDUNDANCY + MIN_FILES)) @@ -129,17 +142,23 @@ for (( num_vols=MIN_VOLUMES; num_vols <= 10; num_vols++ ));do done sync_everything sync_everything - pushd "$(vol_dir "${vols[$RANDOM % $num_vols]}")" - cmd=(annex-ec -r "$redundancy" -v "$(choose_volumes $((num_files+redundancy)))" "${files[@]}") + pushd "$(a_random_volume_dir)" + cmd=(annex-ec -r "$redundancy" -v "$(some_random_volume_names $((num_files+redundancy)))" "${files[@]}") echo "In $PWD , running ${cmd[*]}" >&2 "${cmd[@]}" popd sync_everything + sync_everything fsck_everything + delete_some_test_vols "$redundancy" - # TODO: Recover + pushd "$(a_random_volume_dir)" + annex-ec-recover -- *-* + popd + + sync_everything sync_everything - # fsck_everything # Skip this check until recovery is implemented + fsck_everything delete_all_test_vols done done