]> git.scottworley.com Git - annex-ec/commitdiff
Implement recovery
authorScott Worley <scottworley@scottworley.com>
Tue, 17 Mar 2026 03:11:19 +0000 (20:11 -0700)
committerScott Worley <scottworley@scottworley.com>
Tue, 17 Mar 2026 03:11:19 +0000 (20:11 -0700)
annex-ec
annex-ec-recover [new file with mode: 0755]
annex-ec-test

index 78f3d0d22e40cea83d44ca3757de6b262ee6fff7..a87636fb65de69e5af292cc0c761a6fbdc40c65d 100755 (executable)
--- a/annex-ec
+++ b/annex-ec
@@ -85,7 +85,7 @@ if [[ ! -d ec ]];then
   git add ec/.gitattributes
 fi
 
-par2 c -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@"
+par2 c -u -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@"
 rm "$name.par2"
 mv "$name.vol"* ec/
 
diff --git a/annex-ec-recover b/annex-ec-recover
new file mode 100755 (executable)
index 0000000..3e45653
--- /dev/null
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+
+# annex-ec: Use erasure codes for more efficient storage use in git-annex
+# Copyright (C) 2026  Scott Worley
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+set -euo pipefail
+
+die() {
+  echo "$*" >&2
+  exit 1
+}
+
+find_recovery_set_files() {
+  local found
+  found=false
+  recovery_set_files=()
+  while IFS= read -r line;do
+    if [[ "$line" == ' '* ]];then
+      recovery_set_files+=( "${line# }" )
+      if [[ "${line# }" == "$1" ]];then
+        found=true
+      fi
+    else
+      if $found;then
+        break
+      fi
+      recovery_set_name=$line
+      recovery_set_files=()
+    fi
+  done < ec/.meta
+  if ! $found;then
+    die "Could not find a recovery set containing file $1"
+  fi
+}
+
+while [[ "$1" == -- ]];do shift; done
+
+for target;do
+  if [[ -e "$target" ]];then continue; fi
+  set +e
+  git annex get "$target"
+  set -e
+  if [[ -e "$target" ]];then continue; fi
+
+  find_recovery_set_files "$target"
+
+  set +e
+  git annex get "${recovery_set_files[@]}" ec/"$recovery_set_name"*
+  set -e
+
+  # par2 doesn't like directory structure, so we work from a temp dir where we can flatten everything
+  srcdir=$PWD
+  flat="$(mktemp -d)"
+  pushd "$flat"
+
+    for f in "${recovery_set_files[@]}";do
+      ln -s "$srcdir"/"$f" .
+    done
+    ln -s "$srcdir"/ec/"$recovery_set_name"* .
+
+    # We have to remove the broken symlinks or par2 gets confused trying to write to them
+    find -L . -type l -exec rm -v {} +
+
+    par2 r -- *.par2
+
+  popd
+  for f in "${recovery_set_files[@]}";do
+    git annex reinject "$flat"/"$(basename "$f")" "$f"
+  done
+done
index 20363f816068a930c5aebac9233c7f1a3853b7fb..3ba2006cc2dfa72b055749aecbba885435190c90 100755 (executable)
@@ -55,7 +55,9 @@ fsck_everything() {
     for already_deleted in "${deleted_vols[@]}";do
       if [[ "$vol" == "$already_deleted" ]];then continue 2; fi
     done
-    git -C "$(vol_dir "$vol")" annex fsck
+    pushd "$(vol_dir "$vol")"
+      git annex fsck -- *-*
+    popd
   done
 }
 
@@ -107,13 +109,24 @@ make_test_file() {
   echo "$f"
 }
 
-choose_volumes() {
+some_random_volume_names() {
   x=$(for vol in "${vols[@]}";do
         vol_name "$vol"
       done | shuf | head -n "$1" | tr \\n ,)
   echo "${x%,}"
 }
 
+a_random_volume_dir() {
+  while true;do
+    vol="${vols[$RANDOM % $num_vols]}"
+    for already_deleted in "${deleted_vols[@]}";do
+      if [[ "$vol" == "$already_deleted" ]];then continue 2; fi
+    done
+    vol_dir "$vol"
+    break
+  done
+}
+
 MIN_REDUNDANCY=1
 MIN_FILES=2 # If you only have one file in a group, you'd just make copies of it, no need for annex-ec
 MIN_VOLUMES=$((MIN_REDUNDANCY + MIN_FILES))
@@ -129,17 +142,23 @@ for (( num_vols=MIN_VOLUMES; num_vols <= 10; num_vols++ ));do
       done
       sync_everything
       sync_everything
-      pushd "$(vol_dir "${vols[$RANDOM % $num_vols]}")"
-        cmd=(annex-ec -r "$redundancy" -v "$(choose_volumes $((num_files+redundancy)))" "${files[@]}")
+      pushd "$(a_random_volume_dir)"
+        cmd=(annex-ec -r "$redundancy" -v "$(some_random_volume_names $((num_files+redundancy)))" "${files[@]}")
         echo "In $PWD , running ${cmd[*]}" >&2
         "${cmd[@]}"
       popd
       sync_everything
+      sync_everything
       fsck_everything
+
       delete_some_test_vols "$redundancy"
-      # TODO: Recover
+      pushd "$(a_random_volume_dir)"
+        annex-ec-recover -- *-*
+      popd
+
+      sync_everything
       sync_everything
-      # fsck_everything # Skip this check until recovery is implemented
+      fsck_everything
       delete_all_test_vols
     done
   done