diff options
| author | 2026-01-16 20:59:38 +0000 | |
|---|---|---|
| committer | 2026-01-16 20:59:38 +0000 | |
| commit | e0df187ea05cdfa2d80e2846806b8060e93b0ced (patch) | |
| tree | 3b4016665ffea272fbfdab45f4b5751b32ca6eea /bin | |
| parent | pkgs(little-things-script): init (diff) | |
| signature | ||
bin/git-archive: init
Diffstat (limited to 'bin')
| -rwxr-xr-x | bin/git-archive | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/bin/git-archive b/bin/git-archive new file mode 100755 index 0000000..dd8c36d --- /dev/null +++ b/bin/git-archive @@ -0,0 +1,73 @@ +#!/bin/sh +set -eu + +[ "$#" -lt 2 ] && { + echo "Use: [workdir] [basepath] #items on stdin" + exit; +} + +readonly mepath="$(readlink -m $(dirname $0)/..)" + +starttime="$(date +%s)" +ddate() { date -u "-d@$starttime" "$1"; } +ftime() { faketime -f "$(ddate '+%Y-%m-%d %H:%M:%SZ') x0" "$@"; } +base_dir="$1"; shift +base_url="$1"; shift + +# hacky thing to make sure url ends in / +if ! echo "$base_url" | grep -q '/$'; then + base_url="${base_url}/" +fi + +safeurl() { echo "$1" | sed 's,/,_,g;s,_$,,'; } + +[ -d "$base_dir" ] || mkdir "$base_dir" + +base_url_hostonly="$(echo "$base_url" | sed -e 's,^http,,;s,^s,,;s,^://,,')" +item_id="$(safeurl "$base_url_hostonly")__bundles_$(ddate '+%Y%m%d')${1:-}" +work_dir="${base_dir}/${item_id}" + +echo "-- making $item_id folder --" +mkdir "$work_dir" + +while IFS='' read -r url; do + urlp="$(safeurl "$url")" + urlwp="$work_dir/$urlp" + { + unset HOME XDG_CONFIG_HOME SSH_ASKPASS GIT_ASKPASS + echo "$ git clone --mirror $url $urlp" + USER=archiveteam-codearchiver-runner ftime git -c 'credential.interactive=false' clone --mirror "$url" "$urlwp" || true + echo "$ GIT_DIR=$urlp git bundle create $urlp.bundle --all" + USER=archiveteam-codearchiver-runner GIT_DIR=$urlwp ftime git bundle create "$urlwp.bundle" --all || true + [ "$?" = "0" ] && { + echo "$ rm --recursive --force $urlp" + USER=archiveteam-codearchiver-runner rm --recursive --force "$urlwp" || true + } + } 2>&1 | tee "$urlwp.log" + zstd --rm -19 "$urlwp.log" +done + +echo "-- finished job for $base_url --" +echo "-- generating dir-to-ia.conf (check it and write as .config) --" + +cat >"${work_dir}/.dir-to-ia.conf" <<EoC +exit 1 # make user edit check this +sha256=yes +rm=no +rmwait=yes +sha1check=yes +clobber=no +derive=no + +# Custom options for ia-upload-stream, as an array of args; this can be used to choose the part size and concurrency, for example, using (--part-size 1G --concurrency 4). +iauploadstreamopts=() + +# Item metadata (array with 'key:value' elements); the only mandatory variable with no default +metadata=( + "collection:open_source_software" + "date:$(ddate '+%Y-%m-%d')" + "description:Git bundles for all public, non-empty code and wiki repositories on $base_url as of $(ddate '+%Y-%m-%d %H:%M:%S') UTC. Retrieved with $(git --version), using <code>git clone --mirror \$REPOURL \$TMPCLONEDIR</code> and <code>GIT_DIR=\$TMPCLONEDIR git bundle create \$BUNDLENAME --all</code>" + "mediatype:software" + "title:$base_url_hostonly repository bundles ($(ddate '+%Y-%m-%d'))" +) +EoC |
