From 2b22dfad8f09a236e4c8f7983b491b31b06b4c25 Mon Sep 17 00:00:00 2001 From: Scott Worley Date: Mon, 16 Mar 2026 01:51:53 -0700 Subject: [PATCH] Begin --- annex-ec | 130 ++++++++++++++++++++++++++++++++++++++++++++++ annex-ec-test | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100755 annex-ec create mode 100755 annex-ec-test diff --git a/annex-ec b/annex-ec new file mode 100755 index 0000000..b6170e6 --- /dev/null +++ b/annex-ec @@ -0,0 +1,130 @@ +#!/usr/bin/env bash + +set -euo pipefail + +die() { + echo "$*" >&2 + exit 1 +} + + +parse_volume_list() { + this_volume_name=$(join -j1 <(git config get annex.uuid) <(git cat-file -p git-annex:uuid.log|sort) | cut -d' ' -f2) + tmp=() + mapfile -d , tmp <<< "$1" + volumes=() + for x in "${tmp[@]}";do + x=${x%,} + x=${x%$'\n'} + if [[ "$x" ]];then + if [[ "$x" == "$this_volume_name" ]];then + volumes+=( "here" ) + else + volumes+=( "$x" ) + fi + fi + done +} + +make_name() { + FILENAME_MAX=255 + EXAMPLE_SUFFIX='.vol0000+9999.par2' + EXAMPLE_ANNEX_INTERNAL_PREFIX='ingest-' + EXAMPLE_ANNEX_INTERNAL_SUFFIX='-1-1173fd7' + num_separating_dashes=$((N - 1)) + overhead=$(( ${#EXAMPLE_SUFFIX} + ${#EXAMPLE_ANNEX_INTERNAL_PREFIX} + ${#EXAMPLE_ANNEX_INTERNAL_SUFFIX} + num_separating_dashes )) + available=$((FILENAME_MAX - overhead)) + len=$((available / N)) + name=$(find "$@" -printf '%l\n' | sed -r 's/.*SHA256E-s[0-9]+--//;s/\..*//' | cut -c-$len | tr \\n -) + name=${name%-} +} + +volumes=() +redundancy=1 +block_size_is_a_multiple_of=4 # par2 requires that this be at least 4 +blocks_per_file=10 +while getopts b:m:r:v: opt;do + case $opt in + b) blocks_per_file=$OPTARG;; + m) block_size_is_a_multiple_of=$OPTARG;; + r) redundancy=$OPTARG;; + v) parse_volume_list "$OPTARG";; + *) echo 'usage: annex-ec [-v remote1,remote2,...] [-r N] file file...' >&2; exit 1;; + esac +done +shift $((OPTIND - 1)) + +if (( ${#volumes[@]} == 0 ));then + parse_volume_list "here,$(git remote | tr \\n ,)" +fi + +N=$((${#volumes[@]} - redundancy)) + +(( $# == N )) || die "Expected $N files in this group ($N + $redundancy = ${#volumes[@]}), but got $#" + + +git annex get -- "$@" + +max_size=$(find -L "$@" -printf '%s\n' | sort -nr | head -n1) +block_size=$(( ((max_size/(block_size_is_a_multiple_of*blocks_per_file))+1) * block_size_is_a_multiple_of)) + +make_name "$@" + +if [[ ! -d ec ]];then + mkdir ec + # TODO: Make this robust against being interrupted here + echo '* annex.numcopies=1' >> ec/.gitattributes + git add ec/.gitattributes +fi + +par2 c -n"$redundancy" -c"$((blocks_per_file * redundancy))" -s"$block_size" "$name.par2" "$@" +rm "$name.par2" +mv "$name.vol"* ec/ + +i=0 +for f;do + target_volume="${volumes[i]}" + for volume in "${volumes[@]}";do + if [[ "$volume" != here ]];then + if [[ "$volume" == "$target_volume" ]]; then + git annex copy --to "$volume" "$f" + fi + fi + done + i=$((i+1)) +done + +for f in ec/"$name.vol"*;do + target_volume="${volumes[i]}" + git annex add "$f" + if [[ "$target_volume" != here ]];then + git annex move --to "$target_volume" "$f" + fi + i=$((i+1)) +done + +i=0 +for f;do + target_volume="${volumes[i]}" + echo "${f// /[[:space:]]} annex.numcopies=1" >> .gitattributes + for volume in "${volumes[@]}";do + if [[ "$volume" != "$target_volume" ]]; then + if [[ "$volume" == here ]];then + git annex drop "$f" + else + git annex drop --from "$volume" "$f" + fi + fi + done + i=$((i+1)) +done +git add .gitattributes + +( + flock 1 + echo "$name" + for f;do + echo " $f" + done +) >> ec/.meta +git add ec/.meta diff --git a/annex-ec-test b/annex-ec-test new file mode 100755 index 0000000..013236d --- /dev/null +++ b/annex-ec-test @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +set -euo pipefail + +die() { echo "$*" >&2; exit 1; } + +vol_name() { echo "${1% *}"; } +vol_dir() { echo "${1#* }"; } + +make_test_vols() { + vols=() + deleted_vols=() + for (( i=0; i<$1; i++ ));do + vol=$(mktemp -d) + name="r$i" + if (( i == 0 ));then + git -C "$vol" init + git -C "$vol" commit --allow-empty -m "Begin" + else + git clone "$(vol_dir "${vols[0]}")" "$vol" + git -C "$vol" remote remove origin + fi + git -C "$vol" annex init "$name" + vols+=( "$name $vol" ) + done + for vol in "${vols[@]}";do + for r in "${vols[@]}";do + if [[ "$vol" != "$r" ]];then + git -C "$(vol_dir "$vol")" remote add "$(vol_name "$r")" "$(vol_dir "$r")" + fi + done + done + sync_everything +} + +sync_everything() { + for vol in "${vols[@]}";do + for already_deleted in "${deleted_vols[@]}";do + if [[ "$vol" == "$already_deleted" ]];then continue 2; fi + done + git -C "$(vol_dir "$vol")" annex sync + done +} + +fsck_everything() { + for vol in "${vols[@]}";do + for already_deleted in "${deleted_vols[@]}";do + if [[ "$vol" == "$already_deleted" ]];then continue 2; fi + done + git -C "$(vol_dir "$vol")" annex fsck + done +} + +delete_test_vol() { + for already_deleted in "${deleted_vols[@]}";do + if [[ "$1" == "$already_deleted" ]];then return; fi + done + d="$(vol_dir "$1")" + if [[ -d "$d/.git/annex/objects" ]];then + chmod -R +w "$d/.git/annex/objects" + fi + rm -rf "$d" + deleted_vols+=( "$1" ) + + # Find a not-yet-deleted volume (if there is one) and report the deleted volume as dead + for vol in "${vols[@]}";do + for already_deleted in "${deleted_vols[@]}";do + if [[ "$vol" == "$already_deleted" ]];then continue 2; fi + done + git -C "$(vol_dir "$vol")" annex dead "$(vol_name "$1")" + break + done +} + +delete_some_test_vols() { + while read -r vol;do + delete_test_vol "$vol" + done < <(for vol in "${vols[@]}";do + echo "$vol" + done | shuf | head -n "$1") +} + +delete_all_test_vols() { + for vol in "${vols[@]}";do + delete_test_vol "$vol" + done + vols=() + deleted_vols=() +} + +make_test_file() { + name=$(tr -cd 0-9a-f < /dev/urandom | head -c 32) + size=$((RANDOM + RANDOM)) + f="$name-$size" + set +o pipefail + openssl aes-128-cbc -nosalt -iv 0 -K "$name" < /dev/zero | head -c "$size" > "$1/$f" + set -o pipefail + git -C "$1" annex add "$f" >&2 + echo "$f" +} + +choose_volumes() { + x=$(for vol in "${vols[@]}";do + vol_name "$vol" + done | shuf | head -n "$1" | tr \\n ,) + echo "${x%,}" +} + +MIN_REDUNDANCY=1 +MIN_FILES=2 # If you only have one file in a group, you'd just make copies of it, no need for annex-ec +MIN_VOLUMES=$((MIN_REDUNDANCY + MIN_FILES)) + +for (( num_vols=MIN_VOLUMES; num_vols <= 10; num_vols++ ));do + for (( redundancy=1; redundancy < num_vols-2; redundancy++ ));do + max_files=$(( num_vols - redundancy )) + for (( num_files=MIN_FILES; num_files <= max_files; num_files++ ));do + make_test_vols "$num_vols" + files=() + for (( i=0; i < num_files; i++ )); do + files[i]=$(make_test_file "$(vol_dir "${vols[i]}")") + done + sync_everything + sync_everything + pushd "$(vol_dir "${vols[$RANDOM % $num_vols]}")" + cmd=(annex-ec -r "$redundancy" -v "$(choose_volumes $((num_files+redundancy)))" "${files[@]}") + echo "In $PWD , running ${cmd[*]}" >&2 + "${cmd[@]}" + popd + sync_everything + fsck_everything + delete_some_test_vols "$redundancy" + # TODO: Recover + sync_everything + # fsck_everything # Skip this check until recovery is implemented + delete_all_test_vols + done + done +done + -- 2.51.2