]> git.scottworley.com Git - annex-ec/commitdiff
Begin master
authorScott Worley <scottworley@scottworley.com>
Mon, 16 Mar 2026 08:51:53 +0000 (01:51 -0700)
committerScott Worley <scottworley@scottworley.com>
Mon, 16 Mar 2026 08:51:53 +0000 (01:51 -0700)
annex-ec [new file with mode: 0755]
annex-ec-test [new file with mode: 0755]

diff --git a/annex-ec b/annex-ec
new file mode 100755 (executable)
index 0000000..b6170e6
--- /dev/null
+++ b/annex-ec
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+die() {
+  echo "$*" >&2
+  exit 1
+}
+
+
+parse_volume_list() {
+  this_volume_name=$(join -j1 <(git config get annex.uuid) <(git cat-file -p git-annex:uuid.log|sort)  | cut -d' ' -f2)
+  tmp=()
+  mapfile -d , tmp <<< "$1"
+  volumes=()
+  for x in "${tmp[@]}";do
+    x=${x%,}
+    x=${x%$'\n'}
+    if [[ "$x" ]];then
+      if [[ "$x" == "$this_volume_name" ]];then
+        volumes+=( "here" )
+      else
+        volumes+=( "$x" )
+      fi
+    fi
+  done
+}
+
+make_name() {
+  FILENAME_MAX=255
+  EXAMPLE_SUFFIX='.vol0000+9999.par2'
+  EXAMPLE_ANNEX_INTERNAL_PREFIX='ingest-'
+  EXAMPLE_ANNEX_INTERNAL_SUFFIX='-1-1173fd7'
+  num_separating_dashes=$((N - 1))
+  overhead=$(( ${#EXAMPLE_SUFFIX} + ${#EXAMPLE_ANNEX_INTERNAL_PREFIX} + ${#EXAMPLE_ANNEX_INTERNAL_SUFFIX} + num_separating_dashes ))
+  available=$((FILENAME_MAX - overhead))
+  len=$((available / N))
+       name=$(find "$@" -printf '%l\n' | sed -r 's/.*SHA256E-s[0-9]+--//;s/\..*//' | cut -c-$len | tr \\n -)
+       name=${name%-}
+}
+
+volumes=()
+redundancy=1
+block_size_is_a_multiple_of=4  # par2 requires that this be at least 4
+blocks_per_file=10
+while getopts b:m:r:v: opt;do
+ case $opt in
+  b)    blocks_per_file=$OPTARG;;
+  m)    block_size_is_a_multiple_of=$OPTARG;;
+  r)    redundancy=$OPTARG;;
+  v)    parse_volume_list "$OPTARG";;
+  *)    echo 'usage: annex-ec [-v remote1,remote2,...] [-r N] file file...' >&2; exit 1;;
+ esac
+done
+shift $((OPTIND - 1))
+
+if (( ${#volumes[@]} == 0 ));then
+  parse_volume_list "here,$(git remote | tr \\n ,)"
+fi
+
+N=$((${#volumes[@]} - redundancy))
+
+(( $# == N )) || die "Expected $N files in this group ($N + $redundancy = ${#volumes[@]}), but got $#"
+
+
+git annex get -- "$@"
+
+max_size=$(find -L "$@" -printf '%s\n' | sort -nr | head -n1)
+block_size=$(( ((max_size/(block_size_is_a_multiple_of*blocks_per_file))+1) * block_size_is_a_multiple_of))
+
+make_name "$@"
+
+if [[ ! -d ec ]];then
+  mkdir ec
+  # TODO: Make this robust against being interrupted here
+  echo '* annex.numcopies=1' >> ec/.gitattributes
+  git add ec/.gitattributes
+fi
+
+par2 c -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@"
+rm "$name.par2"
+mv "$name.vol"* ec/
+
+i=0
+for f;do
+  target_volume="${volumes[i]}"
+  for volume in "${volumes[@]}";do
+    if [[ "$volume" != here ]];then
+      if [[ "$volume" == "$target_volume" ]]; then
+        git annex copy --to "$volume" "$f"
+      fi
+    fi
+  done
+  i=$((i+1))
+done
+
+for f in ec/"$name.vol"*;do
+  target_volume="${volumes[i]}"
+  git annex add "$f"
+  if [[ "$target_volume" != here ]];then
+    git annex move --to "$target_volume" "$f"
+  fi
+  i=$((i+1))
+done
+
+i=0
+for f;do
+  target_volume="${volumes[i]}"
+  echo "${f// /[[:space:]]} annex.numcopies=1" >> .gitattributes
+  for volume in "${volumes[@]}";do
+    if [[ "$volume" != "$target_volume" ]]; then
+      if [[ "$volume" == here ]];then
+        git annex drop "$f"
+      else
+        git annex drop --from "$volume" "$f"
+      fi
+    fi
+  done
+  i=$((i+1))
+done
+git add .gitattributes
+
+(
+  flock 1
+  echo "$name"
+  for f;do
+    echo " $f"
+  done
+) >> ec/.meta
+git add ec/.meta
diff --git a/annex-ec-test b/annex-ec-test
new file mode 100755 (executable)
index 0000000..013236d
--- /dev/null
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+die() { echo "$*" >&2; exit 1; }
+
+vol_name() { echo "${1% *}"; }
+vol_dir() { echo "${1#* }"; }
+
+make_test_vols() {
+  vols=()
+  deleted_vols=()
+  for (( i=0; i<$1; i++ ));do
+    vol=$(mktemp -d)
+    name="r$i"
+    if (( i == 0 ));then
+      git -C "$vol" init
+      git -C "$vol" commit --allow-empty -m "Begin"
+    else
+      git clone "$(vol_dir "${vols[0]}")" "$vol"
+      git -C "$vol" remote remove origin
+    fi
+    git -C "$vol" annex init "$name"
+    vols+=( "$name $vol" )
+  done
+  for vol in "${vols[@]}";do
+    for r in "${vols[@]}";do
+      if [[ "$vol" != "$r" ]];then
+        git -C "$(vol_dir "$vol")" remote add "$(vol_name "$r")" "$(vol_dir "$r")"
+      fi
+    done
+  done
+  sync_everything
+}
+
+sync_everything() {
+  for vol in "${vols[@]}";do
+    for already_deleted in "${deleted_vols[@]}";do
+      if [[ "$vol" == "$already_deleted" ]];then continue 2; fi
+    done
+    git -C "$(vol_dir "$vol")" annex sync
+  done
+}
+
+fsck_everything() {
+  for vol in "${vols[@]}";do
+    for already_deleted in "${deleted_vols[@]}";do
+      if [[ "$vol" == "$already_deleted" ]];then continue 2; fi
+    done
+    git -C "$(vol_dir "$vol")" annex fsck
+  done
+}
+
+delete_test_vol() {
+  for already_deleted in "${deleted_vols[@]}";do
+    if [[ "$1" == "$already_deleted" ]];then return; fi
+  done
+  d="$(vol_dir "$1")"
+  if [[ -d "$d/.git/annex/objects" ]];then
+    chmod -R +w "$d/.git/annex/objects"
+  fi
+  rm -rf "$d"
+  deleted_vols+=( "$1" )
+
+  # Find a not-yet-deleted volume (if there is one) and report the deleted volume as dead
+  for vol in "${vols[@]}";do
+    for already_deleted in "${deleted_vols[@]}";do
+      if [[ "$vol" == "$already_deleted" ]];then continue 2; fi
+    done
+    git -C "$(vol_dir "$vol")" annex dead "$(vol_name "$1")"
+    break
+  done
+}
+
+delete_some_test_vols() {
+  while read -r vol;do
+    delete_test_vol "$vol"
+  done < <(for vol in "${vols[@]}";do
+             echo "$vol"
+           done | shuf | head -n "$1")
+}
+
+delete_all_test_vols() {
+  for vol in "${vols[@]}";do
+    delete_test_vol "$vol"
+  done
+  vols=()
+  deleted_vols=()
+}
+
+make_test_file() {
+  name=$(tr -cd 0-9a-f < /dev/urandom | head -c 32)
+  size=$((RANDOM + RANDOM))
+  f="$name-$size"
+  set +o pipefail
+  openssl aes-128-cbc -nosalt -iv 0 -K "$name" < /dev/zero | head -c "$size" > "$1/$f"
+  set -o pipefail
+  git -C "$1" annex add "$f" >&2
+  echo "$f"
+}
+
+choose_volumes() {
+  x=$(for vol in "${vols[@]}";do
+        vol_name "$vol"
+      done | shuf | head -n "$1" | tr \\n ,)
+  echo "${x%,}"
+}
+
+MIN_REDUNDANCY=1
+MIN_FILES=2 # If you only have one file in a group, you'd just make copies of it, no need for annex-ec
+MIN_VOLUMES=$((MIN_REDUNDANCY + MIN_FILES))
+
+for (( num_vols=MIN_VOLUMES; num_vols <= 10; num_vols++ ));do
+  for (( redundancy=1; redundancy < num_vols-2; redundancy++ ));do
+    max_files=$(( num_vols - redundancy ))
+    for (( num_files=MIN_FILES; num_files <= max_files; num_files++ ));do
+      make_test_vols "$num_vols"
+      files=()
+      for (( i=0; i < num_files; i++ )); do
+        files[i]=$(make_test_file "$(vol_dir "${vols[i]}")")
+      done
+      sync_everything
+      sync_everything
+      pushd "$(vol_dir "${vols[$RANDOM % $num_vols]}")"
+        cmd=(annex-ec -r "$redundancy" -v "$(choose_volumes $((num_files+redundancy)))" "${files[@]}")
+        echo "In $PWD , running ${cmd[*]}" >&2
+        "${cmd[@]}"
+      popd
+      sync_everything
+      fsck_everything
+      delete_some_test_vols "$redundancy"
+      # TODO: Recover
+      sync_everything
+      # fsck_everything # Skip this check until recovery is implemented
+      delete_all_test_vols
+    done
+  done
+done
+