summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/git-archive73
1 files changed, 73 insertions, 0 deletions
diff --git a/bin/git-archive b/bin/git-archive
new file mode 100755
index 0000000..dd8c36d
--- /dev/null
+++ b/bin/git-archive
@@ -0,0 +1,73 @@
+#!/bin/sh
+set -eu
+
+[ "$#" -lt 2 ] && {
+ echo "Use: [workdir] [basepath] #items on stdin"
+ exit;
+}
+
+readonly mepath="$(readlink -m $(dirname $0)/..)"
+
+starttime="$(date +%s)"
+ddate() { date -u "-d@$starttime" "$1"; }
+ftime() { faketime -f "$(ddate '+%Y-%m-%d %H:%M:%SZ') x0" "$@"; }
+base_dir="$1"; shift
+base_url="$1"; shift
+
+# hacky thing to make sure url ends in /
+if ! echo "$base_url" | grep -q '/$'; then
+ base_url="${base_url}/"
+fi
+
+safeurl() { echo "$1" | sed 's,/,_,g;s,_$,,'; }
+
+[ -d "$base_dir" ] || mkdir "$base_dir"
+
+base_url_hostonly="$(echo "$base_url" | sed -e 's,^http,,;s,^s,,;s,^://,,')"
+item_id="$(safeurl "$base_url_hostonly")__bundles_$(ddate '+%Y%m%d')${1:-}"
+work_dir="${base_dir}/${item_id}"
+
+echo "-- making $item_id folder --"
+mkdir "$work_dir"
+
+while IFS='' read -r url; do
+ urlp="$(safeurl "$url")"
+ urlwp="$work_dir/$urlp"
+ {
+ unset HOME XDG_CONFIG_HOME SSH_ASKPASS GIT_ASKPASS
+ echo "$ git clone --mirror $url $urlp"
+ USER=archiveteam-codearchiver-runner ftime git -c 'credential.interactive=false' clone --mirror "$url" "$urlwp" || true
+ echo "$ GIT_DIR=$urlp git bundle create $urlp.bundle --all"
+ USER=archiveteam-codearchiver-runner GIT_DIR=$urlwp ftime git bundle create "$urlwp.bundle" --all || true
+ [ "$?" = "0" ] && {
+ echo "$ rm --recursive --force $urlp"
+ USER=archiveteam-codearchiver-runner rm --recursive --force "$urlwp" || true
+ }
+ } 2>&1 | tee "$urlwp.log"
+ zstd --rm -19 "$urlwp.log"
+done
+
+echo "-- finished job for $base_url --"
+echo "-- generating dir-to-ia.conf (check it and write as .config) --"
+
+cat >"${work_dir}/.dir-to-ia.conf" <<EoC
+exit 1 # make user edit check this
+sha256=yes
+rm=no
+rmwait=yes
+sha1check=yes
+clobber=no
+derive=no
+
+# Custom options for ia-upload-stream, as an array of args; this can be used to choose the part size and concurrency, for example, using (--part-size 1G --concurrency 4).
+iauploadstreamopts=()
+
+# Item metadata (array with 'key:value' elements); the only mandatory variable with no default
+metadata=(
+ "collection:open_source_software"
+ "date:$(ddate '+%Y-%m-%d')"
+ "description:Git bundles for all public, non-empty code and wiki repositories on $base_url as of $(ddate '+%Y-%m-%d %H:%M:%S') UTC. Retrieved with $(git --version), using <code>git clone --mirror \$REPOURL \$TMPCLONEDIR</code> and <code>GIT_DIR=\$TMPCLONEDIR git bundle create \$BUNDLENAME --all</code>"
+ "mediatype:software"
+ "title:$base_url_hostonly repository bundles ($(ddate '+%Y-%m-%d'))"
+)
+EoC