summaryrefslogtreecommitdiff
path: root/bin/git-archive
blob: 9007c77a0e122747f738200c71b2c9dbdb50af03 (about) (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/bin/sh
set -eu

[ "$#" -lt 2 ] && {
	echo "Use: [workdir] [basepath] #items on stdin"
	exit;
}

readonly mepath="$(readlink -m $(dirname $0)/..)"

starttime="$(date +%s)"
ddate() { date -u "-d@$starttime" "$1"; }
ftime() { faketime -f "$(ddate '+%Y-%m-%d %H:%M:%SZ') x0" "$@"; }
base_dir="$1"; shift
base_url="$1"; shift

# hacky thing to make sure url ends in /
if ! echo "$base_url" | grep -q '/$'; then
	base_url="${base_url}/"
fi

safeurl() { echo "$1" | sed 's,/,_,g;s,_$,,'; }

[ -d "$base_dir" ] || mkdir "$base_dir"

base_url_hostonly="$(echo "$base_url" | sed -e 's,^http,,;s,^s,,;s,^://,,')"
item_id="$(safeurl "$base_url_hostonly")__bundles_$(ddate '+%Y%m%d')${1:-}"
work_dir="${base_dir}/${item_id}"

echo "-- making $item_id folder --"
mkdir "$work_dir"

while IFS='' read -r url; do
	urlp="$(safeurl "$url")"
	urlwp="$work_dir/$urlp"
	{
		unset HOME XDG_CONFIG_HOME SSH_ASKPASS GIT_ASKPASS
		set +e
		echo "$ git clone --mirror $url $urlp"
		USER=archiveteam-codearchiver-runner ftime git -c 'core.sshCommand=ssh -F /dev/null -o "UserKnownHostsFile /dev/null" -o "IdentitiesOnly yes" -o "StrictHostKeyChecking no"' -c 'credential.interactive=false' clone --mirror "$url" "$urlwp"
		echo "$ GIT_DIR=$urlp git bundle create $urlp.bundle --all"
		USER=archiveteam-codearchiver-runner GIT_DIR=$urlwp ftime git bundle create "$urlwp.bundle" --all
		[ "$?" = "0" ] && {
			echo "$ rm --recursive --force $urlp"
			USER=archiveteam-codearchiver-runner rm --recursive --force "$urlwp"
		}
		set -e
	} 2>&1 | tee "$urlwp.log"
	sed -i $'s\1'"$work_dir"$'\1'"/archive/"$'\1g' "$urlwp.log"
	zstd --rm -19 "$urlwp.log"
done

echo "-- finished job for $base_url --"
echo "-- generating dir-to-ia.conf (check it and write as .config) --"

cat >"${work_dir}/.dir-to-ia.conf" <<EoC
exit 1 # make user edit check this
sha256=yes
rm=no
rmwait=yes
sha1check=yes
clobber=no
derive=no

# Custom options for ia-upload-stream, as an array of args; this can be used to choose the part size and concurrency, for example, using (--part-size 1G --concurrency 4).
iauploadstreamopts=()

# Item metadata (array with 'key:value' elements); the only mandatory variable with no default
metadata=(
	"collection:open_source_software"
	"date:$(ddate '+%Y-%m-%d')"
	"description:Git bundles for all public, non-empty code and wiki repositories on $base_url as of $(ddate '+%Y-%m-%d %H:%M:%S') UTC. Retrieved with $(git --version), using <code>git clone --mirror \$REPOURL \$TMPCLONEDIR</code> and <code>GIT_DIR=\$TMPCLONEDIR git bundle create \$BUNDLENAME --all</code>"
	"mediatype:software"
	"title:$base_url_hostonly repository bundles ($(ddate '+%Y-%m-%d'))"
)
EoC