28 files changed, 2369 insertions, 9 deletions
diff --git a/.golangci.yaml b/.golangci.yaml
index 0f2234be..cb9dec5c 100644
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -10,6 +10,10 @@ linters:
       - path: internal/testgit
         linters:
           - err113        # test helpers report ad-hoc git failures; static errors add no value here
+      - path: cmd/
+        linters:
+          - err113
+          - wrapcheck
   disable:
     - dupword          # extremely normal in tests and a pretty unnecessary linter
     - goconst          # unnecessary especially for our parsing code; many false positives
diff --git a/cmd/doc.go b/cmd/doc.go
new file mode 100644
index 00000000..119a6522
--- /dev/null
+++ b/cmd/doc.go
@@ -0,0 +1,3 @@
+// Package cmd provides some commands for testing furgit
+// and inspecting git repositories.
+package cmd
diff --git a/cmd/explain-pack/delta.go b/cmd/explain-pack/delta.go
new file mode 100644
index 00000000..22e32195
--- /dev/null
+++ b/cmd/explain-pack/delta.go
@@ -0,0 +1,120 @@
+package main
+
+import (
+	"fmt"
+)
+
+func (explainer *explainer) walkDelta(base, payload []byte, pos int) ([]byte, bool, error) {
+	explainer.printf("\tdelta\n")
+
+	building := base != nil
+
+	var result []byte
+
+	insn := 0
+
+	for pos < len(payload) {
+		op := payload[pos]
+		pos++
+		insn++
+
+		switch {
+		case op&0x80 != 0:
+			next, seg, err := explainer.decodeCopy(base, payload, pos, op)
+			if err != nil {
+				return nil, false, err
+			}
+
+			pos = next
+
+			if building {
+				result = append(result, seg...)
+			}
+		case op != 0:
+			next, lit, err := explainer.decodeInsert(payload, pos, int(op))
+			if err != nil {
+				return nil, false, err
+			}
+
+			pos = next
+
+			if building {
+				result = append(result, lit...)
+			}
+		default:
+			explainer.printf("\t\tinvalid opcode 0x00; stopping delta decode\n")
+
+			return nil, false, nil
+		}
+	}
+
+	if !building {
+		return nil, false, nil
+	}
+
+	return result, true, nil
+}
+
+func (explainer *explainer) decodeCopy(base, payload []byte, pos int, op byte) (int, []byte, error) {
+	offset := 0
+
+	for i := range 4 {
+		if op&(1<<uint(i)) == 0 {
+			continue
+		}
+
+		if pos >= len(payload) {
+			return 0, nil, fmt.Errorf("truncated copy offset")
+		}
+
+		offset |= int(payload[pos]) << (8 * uint(i))
+		pos++
+	}
+
+	size := 0
+
+	for i := range 3 {
+		if op&(1<<uint(4+i)) == 0 {
+			continue
+		}
+
+		if pos >= len(payload) {
+			return 0, nil, fmt.Errorf("truncated copy size")
+		}
+
+		size |= int(payload[pos]) << (8 * uint(i))
+		pos++
+	}
+
+	if size == 0 {
+		size = 0x10000
+	}
+
+	explainer.printf("\t\tcpy %d from %d\n", size, offset)
+
+	if base == nil {
+		return pos, nil, nil
+	}
+
+	if offset < 0 || offset+size > len(base) {
+		return 0, nil, fmt.Errorf("copy of %d byte(s) from base offset %d exceeds base length %d", size, offset, len(base))
+	}
+
+	seg := base[offset : offset+size]
+	hexBlock(explainer.out, "\t\t\t", seg)
+
+	return pos, seg, nil
+}
+
+func (explainer *explainer) decodeInsert(payload []byte, pos, n int) (int, []byte, error) {
+	if pos+n > len(payload) {
+		return 0, nil, fmt.Errorf("truncated insert payload")
+	}
+
+	lit := payload[pos : pos+n]
+
+	explainer.printf("\t\tins %d\n", n)
+	hexBlock(explainer.out, "\t\t\t", lit)
+
+	return pos + n, lit, nil
+}
diff --git a/cmd/explain-pack/doc.go b/cmd/explain-pack/doc.go
new file mode 100644
index 00000000..f5fcc986
--- /dev/null
+++ b/cmd/explain-pack/doc.go
@@ -0,0 +1,10 @@
+// Command explain-pack reads a Git packfile and writes a
+// human-readable explanation to stdout.
+//
+// With a pack filename argument
+// the pack is mmap'd
+// and a sibling .idx is used when present;
+// with no argument the pack is read from stdin.
+// A packfile does not record its object format,
+// so the format must be given with -format.
+package main
diff --git a/cmd/explain-pack/entry.go b/cmd/explain-pack/entry.go
new file mode 100644
index 00000000..0b796ef0
--- /dev/null
+++ b/cmd/explain-pack/entry.go
@@ -0,0 +1,257 @@
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+
+	"lindenii.org/go/furgit/internal/compress/zlib"
+	"lindenii.org/go/furgit/internal/format/packfile"
+	"lindenii.org/go/furgit/internal/format/packfile/delta"
+	"lindenii.org/go/furgit/object/tree"
+	"lindenii.org/go/lgo/intconv"
+)
+
+func (explainer *explainer) explainEntry(num, count, cursor int) (int, error) {
+	hashSize := explainer.objectFormat.Size()
+
+	header, err := packfile.ParseEntryHeader(explainer.data[cursor:], hashSize)
+	if err != nil {
+		return 0, fmt.Errorf("entry %d at offset %d: %w", num, cursor, err)
+	}
+
+	payloadStart := cursor + header.HeaderLen
+	if payloadStart > len(explainer.data) {
+		return 0, fmt.Errorf("entry %d at offset %d: header runs past the end of the pack", num, cursor)
+	}
+
+	payload, consumed, err := inflateAt(explainer.data[payloadStart:])
+	if err != nil {
+		return 0, fmt.Errorf("entry %d at offset %d: %w", num, cursor, err)
+	}
+
+	next := payloadStart + consumed
+
+	explainer.printf("object %d of %d\n", num, count)
+	explainer.printf("\tty\t%s\n", entryTypeLabel(header.Type))
+	explainer.printf("\tofs\t%d\n", cursor)
+	explainer.printf("\thdrsz\t%d\n", header.HeaderLen)
+	explainer.printf("\tsz\t%d\n", header.Size)
+
+	if uint64(len(payload)) != header.Size {
+		explainer.printf("\tnote\tdeclared %d byte(s) but inflated to %d\n", header.Size, len(payload))
+	}
+
+	if header.Type.IsBase() {
+		err = explainer.renderBase(cursor, header.Type, payload, consumed)
+	} else {
+		err = explainer.renderDelta(cursor, header, payload, consumed)
+	}
+
+	if err != nil {
+		return 0, fmt.Errorf("entry %d at offset %d: %w", num, cursor, err)
+	}
+
+	explainer.printf("\n")
+
+	return next, nil
+}
+
+func (explainer *explainer) renderBase(cursor int, entryType packfile.EntryType, content []byte, consumed int) error {
+	explainer.renderContent(entryType, content)
+
+	explainer.printf("\tzlib\t%d\n", consumed)
+
+	oid, err := explainer.recomputeOID(entryType, content)
+	if err != nil {
+		return err
+	}
+
+	explainer.printf("\toid\t%s\n", oid)
+
+	explainer.oidIndex[oid] = cursor
+	explainer.cache.Add(cursor, resolvedBase{entryType: entryType, content: content})
+
+	return nil
+}
+
+func (explainer *explainer) renderDelta(cursor int, header packfile.EntryHeader, payload []byte, consumed int) error {
+	baseSize, resultSize, pos, err := delta.ParseHeaderSizes(payload)
+	if err != nil {
+		return fmt.Errorf("delta header: %w", err)
+	}
+
+	err = explainer.renderBaseRef(cursor, header)
+	if err != nil {
+		return err
+	}
+
+	explainer.printf("\tbasesz\t%d\n", baseSize)
+	explainer.printf("\tnewsz\t%d\n", resultSize)
+
+	baseOffset, located, err := explainer.baseOffset(cursor, header)
+	if err != nil {
+		return err
+	}
+
+	var (
+		baseType     packfile.EntryType
+		baseContent  []byte
+		baseResolved bool
+	)
+
+	if located {
+		baseType, baseContent, baseResolved, err = explainer.reconstruct(baseOffset, 0)
+		if err != nil {
+			return err
+		}
+	}
+
+	var walkBase []byte
+	if baseResolved {
+		walkBase = baseContent
+	}
+
+	result, complete, err := explainer.walkDelta(walkBase, payload, pos)
+	if err != nil {
+		return err
+	}
+
+	explainer.printf("\tzlib\t%d\n", consumed)
+
+	switch {
+	case baseResolved && complete:
+		if uint64(len(result)) != resultSize {
+			explainer.printf("\tnote\tdelta produced %d byte(s) but declared %d\n", len(result), resultSize)
+		}
+
+		explainer.renderContent(baseType, result)
+
+		newOID, err := explainer.recomputeOID(baseType, result)
+		if err != nil {
+			return err
+		}
+
+		explainer.printf("\tnewoid\t%s\n", newOID)
+
+		explainer.oidIndex[newOID] = cursor
+		explainer.cache.Add(cursor, resolvedBase{entryType: baseType, content: result})
+	case !baseResolved:
+		explainer.printf("\tnote\tbase not available in this pack; cannot reconstruct\n")
+	default:
+		explainer.printf("\tnote\tdelta decode incomplete; cannot reconstruct\n")
+	}
+
+	return nil
+}
+
+func (explainer *explainer) renderBaseRef(cursor int, header packfile.EntryHeader) error {
+	switch header.Type {
+	case packfile.EntryTypeOfsDelta:
+		dist, err := intconv.Uint64ToInt(header.OfsDistance)
+		if err != nil {
+			return fmt.Errorf("ofs-delta distance overflows int: %w", err)
+		}
+
+		explainer.printf("\tbaseofs\t-%d = %d\n", dist, cursor-dist)
+	case packfile.EntryTypeRefDelta:
+		baseID, err := explainer.objectFormat.FromBytes(header.RefBase[:explainer.objectFormat.Size()])
+		if err != nil {
+			return fmt.Errorf("ref-delta base ID: %w", err)
+		}
+
+		explainer.printf("\tbaseoid\t%s\n", baseID)
+	case packfile.EntryTypeInvalid,
+		packfile.EntryTypeCommit,
+		packfile.EntryTypeTree,
+		packfile.EntryTypeBlob,
+		packfile.EntryTypeTag,
+		packfile.EntryTypeFuture:
+	}
+
+	return nil
+}
+
+func (explainer *explainer) renderContent(entryType packfile.EntryType, content []byte) {
+	switch entryType {
+	case packfile.EntryTypeCommit, packfile.EntryTypeTag:
+		explainer.printf("\tcontent\n")
+		indentBlock(explainer.out, "\t\t", content)
+	case packfile.EntryTypeTree:
+		explainer.renderTree(content)
+	case packfile.EntryTypeBlob,
+		packfile.EntryTypeOfsDelta,
+		packfile.EntryTypeRefDelta,
+		packfile.EntryTypeInvalid,
+		packfile.EntryTypeFuture:
+		explainer.printf("\thexdump\n")
+		hexBlock(explainer.out, "\t\t", content)
+	}
+}
+
+func (explainer *explainer) renderTree(content []byte) {
+	parsed, err := tree.Parse(content, explainer.objectFormat)
+	if err != nil {
+		explainer.printf("\thexdump\t(not a valid tree: %v)\n", err)
+		hexBlock(explainer.out, "\t\t", content)
+
+		return
+	}
+
+	explainer.printf("\ttree\n")
+
+	for _, entry := range parsed.Entries() {
+		mode := string(entry.Mode.Append(nil))
+		explainer.printf(
+			"\t\t%s %s %s\t%s\n",
+			mode, entry.Mode.ObjectType().Name(), entry.ID, entry.Name,
+		)
+	}
+}
+
+func inflateAt(data []byte) ([]byte, int, error) {
+	reader := bytes.NewReader(data)
+
+	zr, err := zlib.NewReader(reader)
+	if err != nil {
+		return nil, 0, fmt.Errorf("opening zlib stream: %w", err)
+	}
+
+	content, err := io.ReadAll(zr)
+	closeErr := zr.Close()
+
+	if err != nil {
+		return nil, 0, fmt.Errorf("inflating payload: %w", err)
+	}
+
+	if closeErr != nil {
+		return nil, 0, fmt.Errorf("closing zlib stream: %w", closeErr)
+	}
+
+	consumed := len(data) - reader.Len()
+
+	return content, consumed, nil
+}
+
+func entryTypeLabel(entryType packfile.EntryType) string {
+	switch entryType {
+	case packfile.EntryTypeCommit:
+		return "commit"
+	case packfile.EntryTypeTree:
+		return "tree"
+	case packfile.EntryTypeBlob:
+		return "blob"
+	case packfile.EntryTypeTag:
+		return "tag"
+	case packfile.EntryTypeOfsDelta:
+		return "ofs-delta"
+	case packfile.EntryTypeRefDelta:
+		return "ref-delta"
+	case packfile.EntryTypeInvalid:
+		return "invalid"
+	case packfile.EntryTypeFuture:
+		return "future"
+	default:
+		return fmt.Sprintf("unknown (%d)", entryType)
+	}
+}
diff --git a/cmd/explain-pack/fmt.go b/cmd/explain-pack/fmt.go
new file mode 100644
index 00000000..a3d1b333
--- /dev/null
+++ b/cmd/explain-pack/fmt.go
@@ -0,0 +1,30 @@
+package main
+
+import (
+	"bytes"
+	"encoding/hex"
+	"io"
+
+	"lindenii.org/go/furgit/internal/utils"
+)
+
+func indentBlock(out io.Writer, indent string, block []byte) {
+	lines := bytes.Split(block, []byte("\n"))
+	if n := len(lines); n > 0 && len(lines[n-1]) == 0 {
+		lines = lines[:n-1]
+	}
+
+	for _, line := range lines {
+		utils.BestEffortFprintf(out, "%s%s\n", indent, line)
+	}
+}
+
+func hexBlock(out io.Writer, indent string, data []byte) {
+	var buf bytes.Buffer
+
+	dumper := hex.Dumper(&buf)
+	_, _ = dumper.Write(data)
+	_ = dumper.Close()
+
+	indentBlock(out, indent, buf.Bytes())
+}
diff --git a/cmd/explain-pack/main.go b/cmd/explain-pack/main.go
new file mode 100644
index 00000000..af5b7480
--- /dev/null
+++ b/cmd/explain-pack/main.go
@@ -0,0 +1,240 @@
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/hex"
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+
+	"lindenii.org/go/furgit/internal/format/packfile"
+	"lindenii.org/go/furgit/internal/format/packidx"
+	"lindenii.org/go/furgit/internal/mmap"
+	"lindenii.org/go/furgit/internal/utils"
+	"lindenii.org/go/furgit/object/id"
+)
+
+func main() {
+	format := flag.String("format", "", "object format of the pack: sha1 or sha256 (required)")
+
+	flag.Parse()
+
+	err := run(*format, flag.Args(), os.Stdin, os.Stdout)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "explain-pack:", err)
+		os.Exit(1)
+	}
+}
+
+type explainer struct {
+	data         []byte
+	objectFormat id.ObjectFormat
+	out          *bufio.Writer
+
+	idx *packidx.Packidx
+
+	cache    *baseCache
+	oidIndex map[id.ObjectID]int
+}
+
+func run(format string, args []string, stdin io.Reader, stdout io.Writer) error {
+	if format == "" {
+		return fmt.Errorf("the -format flag is required (sha1 or sha256)")
+	}
+
+	objectFormat, err := id.ParseObjectFormat(format)
+	if err != nil {
+		return fmt.Errorf("invalid -format %q: %w", format, err)
+	}
+
+	if len(args) > 1 {
+		return fmt.Errorf("at most one pack file argument is accepted, got %d", len(args))
+	}
+
+	data, idx, closers, err := openInput(args, objectFormat, stdin)
+	if err != nil {
+		return err
+	}
+
+	defer func() {
+		for _, c := range closers {
+			_ = c.Close()
+		}
+	}()
+
+	out := bufio.NewWriter(stdout)
+
+	explainer := &explainer{
+		data:         data,
+		objectFormat: objectFormat,
+		out:          out,
+		idx:          idx,
+		cache:        newBaseCache(),
+		oidIndex:     make(map[id.ObjectID]int),
+	}
+
+	err = explainer.explain()
+	if err != nil {
+		return err
+	}
+
+	return out.Flush()
+}
+
+func openInput(args []string, objectFormat id.ObjectFormat, stdin io.Reader) ([]byte, *packidx.Packidx, []io.Closer, error) {
+	if len(args) == 0 {
+		data, err := io.ReadAll(stdin)
+		if err != nil {
+			return nil, nil, nil, fmt.Errorf("reading pack from stdin: %w", err)
+		}
+
+		return data, nil, nil, nil
+	}
+
+	packPath := args[0]
+
+	packMapping, err := mapPath(packPath)
+	if err != nil {
+		return nil, nil, nil, err
+	}
+
+	closers := []io.Closer{packMapping}
+
+	idx, idxMapping, err := openIndex(packPath, objectFormat)
+	if err != nil {
+		_ = packMapping.Close()
+
+		return nil, nil, nil, err
+	}
+
+	if idxMapping != nil {
+		closers = append(closers, idxMapping)
+	}
+
+	return packMapping.Data(), idx, closers, nil
+}
+
+func openIndex(packPath string, objectFormat id.ObjectFormat) (*packidx.Packidx, *mmap.Mmap, error) {
+	idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
+
+	file, err := os.Open(idxPath) //#nosec G304
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil, nil
+		}
+
+		return nil, nil, fmt.Errorf("opening index %q: %w", idxPath, err)
+	}
+
+	defer func() { _ = file.Close() }()
+
+	mapping, err := mmap.Open(file)
+	if err != nil {
+		return nil, nil, fmt.Errorf("mapping index %q: %w", idxPath, err)
+	}
+
+	idx, err := packidx.Parse(mapping.Data(), objectFormat.Size())
+	if err != nil {
+		_ = mapping.Close()
+
+		return nil, nil, fmt.Errorf("parsing index %q: %w", idxPath, err)
+	}
+
+	return &idx, mapping, nil
+}
+
+func mapPath(path string) (*mmap.Mmap, error) {
+	file, err := os.Open(path) //#nosec G304
+	if err != nil {
+		return nil, fmt.Errorf("opening pack %q: %w", path, err)
+	}
+
+	defer func() { _ = file.Close() }()
+
+	mapping, err := mmap.Open(file)
+	if err != nil {
+		return nil, fmt.Errorf("mapping pack %q: %w", path, err)
+	}
+
+	return mapping, nil
+}
+
+func (explainer *explainer) printf(format string, args ...any) {
+	utils.BestEffortFprintf(explainer.out, format, args...)
+}
+
+func (explainer *explainer) explain() error {
+	hashSize := explainer.objectFormat.Size()
+
+	if len(explainer.data) < packfile.HeaderLen+hashSize {
+		return fmt.Errorf("pack is too short to contain a header and a %d-byte trailer", hashSize)
+	}
+
+	count, err := explainer.explainHeader()
+	if err != nil {
+		return err
+	}
+
+	cursor := packfile.HeaderLen
+
+	for num := 1; num <= count; num++ {
+		next, err := explainer.explainEntry(num, count, cursor)
+		if err != nil {
+			return err
+		}
+
+		cursor = next
+	}
+
+	return explainer.explainTrailer(cursor)
+}
+
+func (explainer *explainer) explainHeader() (int, error) {
+	header, err := packfile.ParseHeader(explainer.data[:packfile.HeaderLen])
+	if err != nil {
+		return 0, fmt.Errorf("pack header: %w", err)
+	}
+
+	explainer.printf("pack header\n")
+	explainer.printf("\tmagic\t\"PACK\"\n")
+	explainer.printf("\tversion\t2\n")
+	explainer.printf("\tobjects\t%d\n", header.ObjectCount)
+	explainer.printf("\n")
+
+	return int(header.ObjectCount), nil
+}
+
+func (explainer *explainer) explainTrailer(cursor int) error {
+	hashSize := explainer.objectFormat.Size()
+	trailerStart := len(explainer.data) - hashSize
+
+	if cursor != trailerStart {
+		explainer.printf(
+			"note\t%d byte(s) between the last entry and the trailer were unaccounted for\n",
+			trailerStart-cursor,
+		)
+	}
+
+	trailer := explainer.data[trailerStart:]
+
+	explainer.printf("pack trailer\n")
+	explainer.printf("\tchecksum\t%s\n", hex.EncodeToString(trailer))
+
+	hashImpl, err := explainer.objectFormat.New()
+	if err != nil {
+		return fmt.Errorf("object/store: %w", err)
+	}
+
+	_, _ = hashImpl.Write(explainer.data[:trailerStart])
+
+	if bytes.Equal(hashImpl.Sum(nil), trailer) {
+		explainer.printf("\trecomputed\tmatches\n")
+	} else {
+		explainer.printf("\trecomputed\tMISMATCH (corrupt pack or wrong -format)\n")
+	}
+
+	return nil
+}
diff --git a/cmd/explain-pack/resolve.go b/cmd/explain-pack/resolve.go
new file mode 100644
index 00000000..4396fe19
--- /dev/null
+++ b/cmd/explain-pack/resolve.go
@@ -0,0 +1,161 @@
+package main
+
+import (
+	"fmt"
+
+	"lindenii.org/go/furgit/internal/cache/clock"
+	"lindenii.org/go/furgit/internal/format/packfile"
+	"lindenii.org/go/furgit/internal/format/packfile/delta"
+	"lindenii.org/go/furgit/object/header"
+	"lindenii.org/go/furgit/object/id"
+	"lindenii.org/go/lgo/intconv"
+)
+
+const baseCacheMaxWeight = 64 << 20
+
+type resolvedBase struct {
+	entryType packfile.EntryType
+	content   []byte
+}
+
+type baseCache = clock.Clock[int, resolvedBase]
+
+func newBaseCache() *baseCache {
+	return clock.New(baseCacheMaxWeight, func(_ int, base resolvedBase) uint64 {
+		return uint64(len(base.content)) + 32
+	})
+}
+
+func (explainer *explainer) reconstruct(offset, depth int) (packfile.EntryType, []byte, bool, error) {
+	var zero packfile.EntryType
+
+	if depth > delta.MaxChainDepth {
+		return zero, nil, false, fmt.Errorf("delta chain too deep at offset %d", offset)
+	}
+
+	if cached, ok := explainer.cache.Get(offset); ok {
+		return cached.entryType, cached.content, true, nil
+	}
+
+	header, err := packfile.ParseEntryHeader(explainer.data[offset:], explainer.objectFormat.Size())
+	if err != nil {
+		return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err)
+	}
+
+	payloadStart := offset + header.HeaderLen
+	if payloadStart > len(explainer.data) {
+		return zero, nil, false, fmt.Errorf("entry at offset %d: header runs past end of pack", offset)
+	}
+
+	if header.Type.IsBase() {
+		content, _, err := inflateAt(explainer.data[payloadStart:])
+		if err != nil {
+			return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err)
+		}
+
+		explainer.cache.Add(offset, resolvedBase{entryType: header.Type, content: content})
+
+		return header.Type, content, true, nil
+	}
+
+	baseOffset, ok, err := explainer.baseOffset(offset, header)
+	if err != nil {
+		return zero, nil, false, err
+	}
+
+	if !ok {
+		return zero, nil, false, nil
+	}
+
+	baseType, baseContent, ok, err := explainer.reconstruct(baseOffset, depth+1)
+	if err != nil || !ok {
+		return zero, nil, ok, err
+	}
+
+	payload, _, err := inflateAt(explainer.data[payloadStart:])
+	if err != nil {
+		return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err)
+	}
+
+	content, err := delta.Apply(baseContent, payload)
+	if err != nil {
+		return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err)
+	}
+
+	explainer.cache.Add(offset, resolvedBase{entryType: baseType, content: content})
+
+	return baseType, content, true, nil
+}
+
+func (explainer *explainer) baseOffset(offset int, header packfile.EntryHeader) (int, bool, error) {
+	switch header.Type {
+	case packfile.EntryTypeOfsDelta:
+		dist, err := intconv.Uint64ToInt(header.OfsDistance)
+		if err != nil || dist <= 0 || dist > offset {
+			return 0, false, fmt.Errorf("entry at offset %d: ofs-delta base out of bounds", offset)
+		}
+
+		return offset - dist, true, nil
+	case packfile.EntryTypeRefDelta:
+		refBytes := header.RefBase[:explainer.objectFormat.Size()]
+
+		if explainer.idx != nil {
+			baseOffsetU, found, err := explainer.idx.Lookup(refBytes)
+			if err != nil {
+				return 0, false, fmt.Errorf("entry at offset %d: index lookup: %w", offset, err)
+			}
+
+			if found {
+				baseOffset, err := intconv.Uint64ToInt(baseOffsetU)
+				if err != nil {
+					return 0, false, fmt.Errorf("entry at offset %d: index base offset overflows int: %w", offset, err)
+				}
+
+				return baseOffset, true, nil
+			}
+		}
+
+		baseID, err := explainer.objectFormat.FromBytes(refBytes)
+		if err != nil {
+			return 0, false, fmt.Errorf("entry at offset %d: %w", offset, err)
+		}
+
+		if baseOffset, found := explainer.oidIndex[baseID]; found {
+			return baseOffset, true, nil
+		}
+
+		return 0, false, nil
+	case packfile.EntryTypeInvalid,
+		packfile.EntryTypeCommit,
+		packfile.EntryTypeTree,
+		packfile.EntryTypeBlob,
+		packfile.EntryTypeTag,
+		packfile.EntryTypeFuture:
+	}
+
+	return 0, false, fmt.Errorf("entry at offset %d: not a delta entry", offset)
+}
+
+func (explainer *explainer) recomputeOID(entryType packfile.EntryType, content []byte) (id.ObjectID, error) {
+	var zero id.ObjectID
+
+	objectType, err := entryType.ObjectType()
+	if err != nil {
+		return zero, err
+	}
+
+	hashImpl, err := explainer.objectFormat.New()
+	if err != nil {
+		return zero, err
+	}
+
+	_, _ = hashImpl.Write(header.Append(nil, objectType, len(content)))
+	_, _ = hashImpl.Write(content)
+
+	oid, err := explainer.objectFormat.FromBytes(hashImpl.Sum(nil))
+	if err != nil {
+		return zero, err
+	}
+
+	return oid, nil
+}
diff --git a/cmd/idx-bloom/doc.go b/cmd/idx-bloom/doc.go
new file mode 100644
index 00000000..e7d4e818
--- /dev/null
+++ b/cmd/idx-bloom/doc.go
@@ -0,0 +1,8 @@
+// Command idx-bloom reads a Git pack index
+// and writes an IDBL Bloom filter over its object IDs to stdout.
+//
+// With an index filename argument the index is read from that file;
+// with no argument it is read from stdin.
+// A pack index does not record its object format,
+// so the format must be given with -format.
+package main
diff --git a/cmd/idx-bloom/main.go b/cmd/idx-bloom/main.go
new file mode 100644
index 00000000..fa471237
--- /dev/null
+++ b/cmd/idx-bloom/main.go
@@ -0,0 +1,99 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"os"
+
+	"lindenii.org/go/furgit/internal/format/packidx"
+	"lindenii.org/go/furgit/internal/format/packidx/bloom"
+	"lindenii.org/go/furgit/object/id"
+)
+
+func main() {
+	format := flag.String("format", "", "object format of the index: sha1 or sha256 (required)")
+
+	flag.Parse()
+
+	err := run(*format, flag.Args(), os.Stdin, os.Stdout)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "idx-bloom:", err)
+		os.Exit(1)
+	}
+}
+
+func run(format string, args []string, stdin io.Reader, stdout io.Writer) error {
+	if format == "" {
+		return fmt.Errorf("the -format flag is required (sha1 or sha256)")
+	}
+
+	objectFormat, err := id.ParseObjectFormat(format)
+	if err != nil {
+		return fmt.Errorf("invalid -format %q: %w", format, err)
+	}
+
+	if len(args) > 1 {
+		return fmt.Errorf("at most one index file argument is accepted, got %d", len(args))
+	}
+
+	data, err := readInput(args, stdin)
+	if err != nil {
+		return err
+	}
+
+	index, err := packidx.Parse(data, objectFormat.Size())
+	if err != nil {
+		return fmt.Errorf("parsing index: %w", err)
+	}
+
+	filter, err := buildFilter(objectFormat, &index)
+	if err != nil {
+		return err
+	}
+
+	_, err = stdout.Write(filter)
+	if err != nil {
+		return fmt.Errorf("writing filter: %w", err)
+	}
+
+	return nil
+}
+
+func readInput(args []string, stdin io.Reader) ([]byte, error) {
+	if len(args) == 0 {
+		data, err := io.ReadAll(stdin)
+		if err != nil {
+			return nil, fmt.Errorf("reading index from stdin: %w", err)
+		}
+
+		return data, nil
+	}
+
+	data, err := os.ReadFile(args[0]) //#nosec G304
+	if err != nil {
+		return nil, fmt.Errorf("reading index %q: %w", args[0], err)
+	}
+
+	return data, nil
+}
+
+func buildFilter(objectFormat id.ObjectFormat, index *packidx.Packidx) ([]byte, error) {
+	objects := index.NumObjects()
+
+	bucketCount, k, err := bloom.RecommendParams(objectFormat, objects)
+	if err != nil {
+		return nil, fmt.Errorf("choosing parameters: %w", err)
+	}
+
+	builder, err := bloom.NewBuilder(objectFormat, bucketCount, k, index.PackHash())
+	if err != nil {
+		return nil, fmt.Errorf("creating builder: %w", err)
+	}
+
+	for pos := range objects {
+		builder.Add(index.OIDAt(pos))
+	}
+
+	return builder.Bytes(), nil
+}
diff --git a/internal/format/packidx/bloom/bloom.go b/internal/format/packidx/bloom/bloom.go
new file mode 100644
index 00000000..8b608221
--- /dev/null
+++ b/internal/format/packidx/bloom/bloom.go
@@ -0,0 +1,180 @@
+package bloom
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"math/bits"
+
+	"lindenii.org/go/furgit/object/id"
+)
+
+// ErrMalformedBloomFilter reports that
+// a Bloom filter is truncated,
+// has a bad signature, version, or hash function,
+// or has inconsistent parameters.
+var ErrMalformedBloomFilter = errors.New("internal/format/packidx/bloom: malformed bloom filter")
+
+const (
+	signature = 0x4944424c // "IDBL"
+	version   = 1
+
+	// HeaderLen is the fixed header length in octets,
+	// i.e., the signature, version, hash function identifier,
+	// B, K, and the trailing zero padding.
+	HeaderLen = 64
+
+	// BucketLen is the length of one bucket in octets,
+	// chosen to match the most common cache-line size.
+	BucketLen = 64
+
+	// wordBits is the bit width of one bucket word.
+	wordBits = 64
+
+	// fieldBits is the width of one in-bucket position field.
+	fieldBits = 9
+)
+
+// checkParams validates the filter parameters
+// against one object hash size,
+// returning log2(bucketCount) on success.
+func checkParams(bucketCount uint32, k uint16, hashSize int) (uint, error) {
+	switch {
+	case bucketCount == 0 || bucketCount&(bucketCount-1) != 0:
+		return 0, errors.New("bucket count not a nonzero power of two") //nolint:err113
+	case k == 0:
+		return 0, errors.New("zero probe count") //nolint:err113
+	}
+
+	log2B := uint(bits.TrailingZeros32(bucketCount))
+	if log2B+fieldBits*uint(k) > uint(hashSize)*8 {
+		return 0, errors.New("parameters exceed hash length") //nolint:err113
+	}
+
+	return log2B, nil
+}
+
+// hashFunctionID returns the on-disk hash function identifier
+// for one object format.
+func hashFunctionID(objectFormat id.ObjectFormat) (uint32, error) {
+	switch objectFormat {
+	case id.ObjectFormatSHA1:
+		return 1, nil
+	case id.ObjectFormatSHA256:
+		return 2, nil
+	case id.ObjectFormatUnknown:
+	}
+
+	return 0, id.ErrInvalidObjectFormat
+}
+
+// Bloom is a parsed blocked Bloom filter view over borrowed bytes.
+//
+// Labels: Deps-Borrowed, Life-Parent, MT-Safe.
+type Bloom struct {
+	// data is the entire filter payload.
+	data []byte
+
+	// buckets is the bucket region, between the header and the trailer.
+	buckets []byte
+
+	// objectFormat is the filter's object format.
+	objectFormat id.ObjectFormat
+
+	// log2B is the base-2 logarithm of the bucket count,
+	// i.e. the number of leading object ID bits that select a bucket.
+	log2B uint
+
+	// k is the number of bits set and tested per object ID.
+	k int
+}
+
+// Parse parses one Bloom filter from data.
+//
+// Labels: Deps-Borrowed, Life-Parent.
+func Parse(data []byte, objectFormat id.ObjectFormat) (Bloom, error) {
+	var zero Bloom
+
+	wantHashID, err := hashFunctionID(objectFormat)
+	if err != nil {
+		return zero, err
+	}
+
+	hashSize := objectFormat.Size()
+
+	if len(data) < HeaderLen {
+		return zero, fmt.Errorf("%w: truncated", ErrMalformedBloomFilter)
+	}
+
+	if binary.BigEndian.Uint32(data) != signature {
+		return zero, fmt.Errorf("%w: bad signature", ErrMalformedBloomFilter)
+	}
+
+	if binary.BigEndian.Uint32(data[4:]) != version {
+		return zero, fmt.Errorf("%w: unsupported version", ErrMalformedBloomFilter)
+	}
+
+	if binary.BigEndian.Uint32(data[8:]) != wantHashID {
+		return zero, fmt.Errorf("%w: hash function mismatch", ErrMalformedBloomFilter)
+	}
+
+	bucketCount := binary.BigEndian.Uint32(data[12:])
+	k := binary.BigEndian.Uint16(data[16:])
+
+	for _, octet := range data[18:HeaderLen] {
+		if octet != 0 {
+			return zero, fmt.Errorf("%w: nonzero padding", ErrMalformedBloomFilter)
+		}
+	}
+
+	log2B, err := checkParams(bucketCount, k, hashSize)
+	if err != nil {
+		return zero, fmt.Errorf("%w: %w", ErrMalformedBloomFilter, err)
+	}
+
+	want := uint64(HeaderLen) + uint64(BucketLen)*uint64(bucketCount) + 2*uint64(hashSize) //#nosec G115
+	if uint64(len(data)) != want {
+		return zero, fmt.Errorf("%w: file size disagrees with bucket count", ErrMalformedBloomFilter)
+	}
+
+	return Bloom{
+		data:         data,
+		buckets:      data[HeaderLen : len(data)-2*hashSize],
+		objectFormat: objectFormat,
+		log2B:        log2B,
+		k:            int(k),
+	}, nil
+}
+
+// PackHash returns the pack hash recorded in the filter trailer.
+//
+// Labels: Life-Parent, Mut-No.
+func (f *Bloom) PackHash() []byte {
+	hashSize := f.objectFormat.Size()
+	end := len(f.data) - hashSize
+
+	return f.data[end-hashSize : end]
+}
+
+// Verify recomputes the filter's trailing checksum and reports any mismatch.
+//
+// Verify reads the whole filter,
+// so callers should treat it as a deliberate integrity check
+// rather than part of the open path.
+func (f *Bloom) Verify() error {
+	hashImpl, err := f.objectFormat.New()
+	if err != nil {
+		return fmt.Errorf("internal/format/packidx/bloom: %w", err)
+	}
+
+	checksumOff := len(f.data) - f.objectFormat.Size()
+
+	_, _ = hashImpl.Write(f.data[:checksumOff])
+
+	if !bytes.Equal(hashImpl.Sum(nil), f.data[checksumOff:]) {
+		return fmt.Errorf("%w: checksum mismatch", ErrMalformedBloomFilter)
+	}
+
+	return nil
+}
diff --git a/internal/format/packidx/bloom/bloom_test.go b/internal/format/packidx/bloom/bloom_test.go
new file mode 100644
index 00000000..bcfb4419
--- /dev/null
+++ b/internal/format/packidx/bloom/bloom_test.go
@@ -0,0 +1,159 @@
+package bloom_test
+
+import (
+	"encoding/binary"
+	"errors"
+	"testing"
+
+	"lindenii.org/go/furgit/internal/format/packidx/bloom"
+	"lindenii.org/go/furgit/object/id"
+)
+
+func validFilter(t *testing.T, format id.ObjectFormat) []byte {
+	// TODO: maybe testgit should have something like this?
+	t.Helper()
+
+	builder, err := bloom.NewBuilder(format, 4, 2, make([]byte, format.Size()))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	return builder.Bytes()
+}
+
+func otherFormat(t *testing.T, format id.ObjectFormat) id.ObjectFormat {
+	t.Helper()
+
+	for _, candidate := range id.SupportedObjectFormats() {
+		if candidate != format {
+			return candidate
+		}
+	}
+
+	t.Skip("only one supported object format")
+
+	return id.ObjectFormatUnknown
+}
+
+func TestParseValid(t *testing.T) {
+	t.Parallel()
+
+	for _, format := range id.SupportedObjectFormats() {
+		t.Run(format.String(), func(t *testing.T) {
+			t.Parallel()
+
+			_, err := bloom.Parse(validFilter(t, format), format)
+			if err != nil {
+				t.Fatalf("Parse rejected a valid filter: %v", err)
+			}
+		})
+	}
+}
+
+func TestParseMalformed(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name   string
+		mangle func(data []byte) []byte
+	}{
+		{"truncated", func(data []byte) []byte { return data[:bloom.HeaderLen-1] }},
+		{"bad signature", func(data []byte) []byte {
+			data[0] ^= 0xff
+
+			return data
+		}},
+		{"bad version", func(data []byte) []byte {
+			binary.BigEndian.PutUint32(data[4:], 99)
+
+			return data
+		}},
+		{"non power of two", func(data []byte) []byte {
+			binary.BigEndian.PutUint32(data[12:], 3)
+
+			return data
+		}},
+		{"zero probe count", func(data []byte) []byte {
+			binary.BigEndian.PutUint16(data[16:], 0)
+
+			return data
+		}},
+		{"parameters exceed hash", func(data []byte) []byte {
+			binary.BigEndian.PutUint32(data[12:], 1<<31)
+			binary.BigEndian.PutUint16(data[16:], 30)
+
+			return data
+		}},
+		{"nonzero padding", func(data []byte) []byte {
+			data[20] = 1
+
+			return data
+		}},
+		{"size disagrees", func(data []byte) []byte { return data[:len(data)-1] }},
+	}
+
+	for _, format := range id.SupportedObjectFormats() {
+		t.Run(format.String(), func(t *testing.T) {
+			t.Parallel()
+
+			for _, tc := range cases {
+				t.Run(tc.name, func(t *testing.T) {
+					t.Parallel()
+
+					data := tc.mangle(append([]byte(nil), validFilter(t, format)...))
+
+					_, err := bloom.Parse(data, format)
+					if !errors.Is(err, bloom.ErrMalformedBloomFilter) {
+						t.Fatalf("Parse error = %v, want ErrMalformedBloomFilter", err)
+					}
+				})
+			}
+		})
+	}
+}
+
+// TestVerifyDetectsCorruption checks that Verify accepts a sound filter
+// and rejects one whose bucket bytes have been altered.
+func TestVerifyDetectsCorruption(t *testing.T) {
+	t.Parallel()
+
+	for _, format := range id.SupportedObjectFormats() {
+		t.Run(format.String(), func(t *testing.T) {
+			t.Parallel()
+
+			data := validFilter(t, format)
+
+			filter, err := bloom.Parse(data, format)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			err = filter.Verify()
+			if err != nil {
+				t.Fatalf("Verify on a sound filter: %v", err)
+			}
+
+			data[bloom.HeaderLen] ^= 0xff
+
+			err = filter.Verify()
+			if !errors.Is(err, bloom.ErrMalformedBloomFilter) {
+				t.Fatalf("Verify error = %v, want ErrMalformedBloomFilter", err)
+			}
+		})
+	}
+}
+
+func TestParseHashMismatch(t *testing.T) {
+	t.Parallel()
+
+	for _, format := range id.SupportedObjectFormats() {
+		t.Run(format.String(), func(t *testing.T) {
+			t.Parallel()
+
+			_, err := bloom.Parse(validFilter(t, format), otherFormat(t, format))
+			if !errors.Is(err, bloom.ErrMalformedBloomFilter) {
+				t.Fatalf("Parse error = %v, want ErrMalformedBloomFilter", err)
+			}
+		})
+	}
+}
diff --git a/internal/format/packidx/bloom/doc.go b/internal/format/packidx/bloom/doc.go
new file mode 100644
index 00000000..06ca57cd
--- /dev/null
+++ b/internal/format/packidx/bloom/doc.go
@@ -0,0 +1,138 @@
+// Package bloom provides a blocked Bloom filter
+// for pack indexes.
+//
+// A filter answers, from a single cache-line-sized read,
+// whether an object ID is definitely absent from the index it covers.
+// A lookup that must consult many packs
+// can then skip the full binary search
+// in every pack whose filter rejects the object,
+// decreasing the cost of misses.
+//
+// # Rationale
+//
+// Especially for server-side usage,
+// repacking is expensive,
+// and creating multi-pack-indexes is still rather expensive.
+// Incremental multi-pack-indexes partially solve this,
+// but having too many of them defeats the purpose,
+// since the indexes must still be walked in order
+// while performing expensive lookups.
+//
+// Instead, each multi-pack-index layer,
+// and each ordinary pack index,
+// may carry its own filter.
+// The indexes are still traversed in their usual order,
+// but the first step when traversing one
+// is to check whether it could possibly hold the wanted object.
+//
+// The filter is split into 64-octet buckets,
+// matching the most common cache-line size.
+// Some bits of the object ID choose the bucket,
+// and the rest choose several bit positions inside it,
+// so a lookup reads one 64-octet bucket
+// and checks whether all required bits are set.
+//
+// # Parameters
+//
+// A filter is parameterized by
+// the number of buckets B
+// and the number of bits set and tested per object ID, K.
+// All integers in the format are big endian.
+// The object ID is interpreted as a big-endian bitstring,
+// where bit offset 0 is the most significant bit of octet 0.
+// B must be a nonzero power of two,
+// K must be nonzero,
+// and log2(B) + 9*K must not exceed the hash length in bits.
+//
+// # File format
+//
+// A filter file is a 64-octet header,
+// then B buckets of 64 octets each,
+// then a two-hash trailer:
+//
+//   - 4-octet signature: {'I', 'D', 'B', 'L'}.
+//   - 4-octet version identifier (= 1).
+//   - 4-octet object hash algorithm identifier
+//     (= 1 for SHA-1, 2 for SHA-256).
+//   - 4-octet B, the number of buckets.
+//   - 2-octet K, the number of bits set and tested per object ID.
+//   - 46-octet padding, which must be all zero.
+//   - B buckets of 64 octets each.
+//   - the pack trailer hash, which binds the filter to its pack.
+//   - the checksum of everything before it, over the filter's hash function.
+//
+// The hash length is that of the object format,
+// so the trailer is 2 hashes wide
+// and the file size is exactly 64 + 64*B + 2*hashlen octets.
+//
+// A reader must validate that
+// the signature matches,
+// the version is supported,
+// the hash function identifier is recognized,
+// B is nonzero and a power of two,
+// K is nonzero,
+// log2(B) + 9*K does not exceed the hash length in bits,
+// the padding is all zero,
+// and the file size is exactly 64 + 64*B + 2*hashlen octets.
+//
+// # Binding and integrity
+//
+// The pack hash binds a filter to one pack;
+// a reader trusts a filter only when the recorded pack hash
+// matches the pack it accompanies.
+//
+// The checksum guards against corruption of the filter itself.
+// Recomputing it reads the whole file and rehashes it as fsck.
+//
+// # Lookup
+//
+// A lookup against one filter proceeds as follows:
+//
+//  1. Let b be the unsigned integer encoded
+//     by the most significant log2(B) bits of the object ID.
+//     B is a power of two, so 0 <= b < B.
+//  2. Select and read bucket b.
+//  3. For each 0 <= i < K,
+//     take the i-th 9-bit field
+//     from the 9*K bits that follow the bucket-selecting bits,
+//     and let pi be the unsigned integer it encodes,
+//     so 0 <= pi < 512.
+//     Compute wi = pi >> 6 and bi = pi & 63,
+//     so wi identifies one of the eight 64-bit words in bucket b
+//     and bi identifies one bit within that word.
+//     Within each 64-bit word,
+//     bit index 0 is the most significant bit
+//     and bit index 63 is the least significant bit.
+//     Test whether bit bi is set in word wi of bucket b.
+//
+// If any test fails,
+// the object ID is definitely not in the covered index.
+// If all tests succeed,
+// the object ID may be in it.
+// Two of the K 9-bit fields can decode to the same pi,
+// so an insertion may set fewer than K distinct bits;
+// this only raises the false positive rate
+// and never causes a false negative.
+//
+// # Worked example
+//
+// Let B = 1 << 15 = 32768 and K = 8.
+// Then log2(B) = 15,
+// so each lookup uses 15 bits to choose the bucket
+// and 8*9 = 72 bits to choose the in-bucket positions,
+// for a total of 87 bits taken from the object ID.
+// A SHA-1 has 160 bits and a SHA-256 has 256 bits,
+// so both leave ample headroom.
+//
+// # Security considerations
+//
+// Object IDs are public unkeyed hashes,
+// so an adversary can mine packs
+// whose object IDs share a chosen prefix
+// to crowd objects into one bucket
+// and fill its bits.
+// In the worst case this renders some buckets useless,
+// making the filter degrade to "may contain" for those buckets,
+// but it never produces a false negative
+// and is not a significant denial-of-service vector.
+package bloom
diff --git a/internal/format/packidx/bloom/lookup.go b/internal/format/packidx/bloom/lookup.go
new file mode 100644
index 00000000..4a8d7bda
--- /dev/null
+++ b/internal/format/packidx/bloom/lookup.go
@@ -0,0 +1,42 @@
+package bloom
+
+import (
+	"encoding/binary"
+)
+
+// MayContain reports whether oid may be present
+// in the index covered by the filter.
+//
+// oid must be exactly the filter's hash size;
+// MayContain panics otherwise.
+//
+// Labels: Mut-No.
+func (f *Bloom) MayContain(oid []byte) bool {
+	if len(oid) != f.objectFormat.Size() {
+		panic("internal/format/packidx/bloom: invalid object ID length")
+	}
+
+	base := int(binary.BigEndian.Uint32(oid[:4])>>(32-f.log2B)) * BucketLen
+
+	for i := range f.k {
+		word, mask := probe(oid, f.log2B, i)
+		if binary.BigEndian.Uint64(f.buckets[base+word*8:])&mask == 0 {
+			return false
+		}
+	}
+
+	return true
+}
+
+// probe returns the bucket word index and single-bit mask
+// addressed by the i-th probe of oid.
+func probe(oid []byte, log2B uint, i int) (word int, mask uint64) {
+	bitOff := log2B + fieldBits*uint(i)
+	byteOff := bitOff >> 3
+	bitInByte := bitOff & 7
+
+	window := uint32(oid[byteOff])<<8 | uint32(oid[byteOff+1])
+	pi := (window >> (16 - bitInByte - fieldBits)) & 0x1ff
+
+	return int(pi >> 6), 1 << (wordBits - 1 - (pi & 63))
+}
diff --git a/internal/format/packidx/bloom/lookup_test.go b/internal/format/packidx/bloom/lookup_test.go
new file mode 100644
index 00000000..e6264f9a
--- /dev/null
+++ b/internal/format/packidx/bloom/lookup_test.go
@@ -0,0 +1,32 @@
+package bloom_test
+
+import (
+	"testing"
+
+	"lindenii.org/go/furgit/internal/format/packidx/bloom"
+	"lindenii.org/go/furgit/object/id"
+)
+
+func TestMayContainBadLength(t *testing.T) {
+	t.Parallel()
+
+	format := id.ObjectFormatSHA256
+
+	builder, err := bloom.NewBuilder(format, 4, 2, make([]byte, format.Size()))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	filter, err := bloom.Parse(builder.Bytes(), format)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if recover() == nil {
+			t.Fatal("MayContain did not panic on a short object ID")
+		}
+	}()
+
+	filter.MayContain(make([]byte, format.Size()-1))
+}
diff --git a/internal/format/packidx/bloom/roundtrip_test.go b/internal/format/packidx/bloom/roundtrip_test.go
new file mode 100644
index 00000000..28db17a5
--- /dev/null
+++ b/internal/format/packidx/bloom/roundtrip_test.go
@@ -0,0 +1,92 @@
+package bloom_test
+
+import (
+	"bytes"
+	"encoding/binary"
+	"testing"
+
+	"lindenii.org/go/furgit/internal/format/packidx/bloom"
+	"lindenii.org/go/furgit/object/id"
+)
+
+func makeOID(size int, seed uint64) []byte {
+	out := make([]byte, size)
+	state := seed
+
+	for i := 0; i < size; i += 8 {
+		state = state*6364136223846793005 + 1442695040888963407
+
+		var word [8]byte
+
+		binary.BigEndian.PutUint64(word[:], state)
+		copy(out[i:], word[:])
+	}
+
+	return out
+}
+
+func TestRoundTrip(t *testing.T) {
+	t.Parallel()
+
+	for _, format := range id.SupportedObjectFormats() {
+		t.Run(format.String(), func(t *testing.T) {
+			t.Parallel()
+
+			const objects = 10000
+
+			bucketCount, k, err := bloom.RecommendParams(format, objects)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			size := format.Size()
+			packHash := makeOID(size, 0xC0FFEE)
+
+			builder, err := bloom.NewBuilder(format, bucketCount, k, packHash)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			for i := range objects {
+				builder.Add(makeOID(size, uint64(i)))
+			}
+
+			filter, err := bloom.Parse(builder.Bytes(), format)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if !bytes.Equal(filter.PackHash(), packHash) {
+				t.Fatalf("PackHash = %x, want %x", filter.PackHash(), packHash)
+			}
+
+			err = filter.Verify()
+			if err != nil {
+				t.Fatalf("Verify on a freshly built filter: %v", err)
+			}
+
+			for i := range objects {
+				if !filter.MayContain(makeOID(size, uint64(i))) {
+					t.Fatalf("false negative for added object %d", i)
+				}
+			}
+
+			const probes = 10000
+
+			falsePositives := 0
+
+			for i := range probes {
+				if filter.MayContain(makeOID(size, uint64(1)<<40+uint64(i))) {
+					falsePositives++
+				}
+			}
+
+			rate := float64(falsePositives) / float64(probes)
+			if rate > 0.05 {
+				t.Errorf("false positive rate %.4f exceeds 0.05", rate)
+			}
+
+			t.Logf("B=%d K=%d false positive rate %.4f", bucketCount, k, rate)
+		})
+	}
+}
diff --git a/internal/format/packidx/bloom/write.go b/internal/format/packidx/bloom/write.go
new file mode 100644
index 00000000..e6213a2c
--- /dev/null
+++ b/internal/format/packidx/bloom/write.go
@@ -0,0 +1,164 @@
+package bloom
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"hash"
+	"math/bits"
+
+	"lindenii.org/go/furgit/object/id"
+	"lindenii.org/go/lgo/intconv"
+)
+
+// ErrInvalidParameters reports that
+// the parameters supplied for a filter build
+// are not representable in the format.
+var ErrInvalidParameters = errors.New("internal/format/packidx/bloom: invalid parameters")
+
+// defaultK is the probe count used by [RecommendParams].
+//
+// With 512-bit buckets it keeps the false positive rate near one percent
+// at the target bucket load.
+const defaultK = 8
+
+// targetLoad is the object count per bucket that [RecommendParams] aims for.
+const targetLoad = 48
+
+// Builder accumulates object IDs into an in-memory Bloom filter
+// and serializes it.
+//
+// Labels: MT-Unsafe.
+type Builder struct {
+	// data is the full filter file, header and trailer included.
+	data []byte
+
+	// buckets aliases the bucket region of data, between header and trailer.
+	buckets []byte
+
+	// hashImpl computes the trailing checksum and gives the hash size.
+	hashImpl hash.Hash
+
+	log2B uint
+	k     int
+}
+
+// NewBuilder creates a filter builder
+// for bucketCount buckets and k probes per object ID,
+// binding the filter to packHash.
+//
+// bucketCount must be a nonzero power of two,
+// k must be nonzero,
+// and log2(bucketCount) + 9*k must not exceed the hash length in bits.
+// packHash must be the pack's trailer hash;
+// NewBuilder panics when its length does not match the object format.
+func NewBuilder(objectFormat id.ObjectFormat, bucketCount uint32, k uint16, packHash []byte) (*Builder, error) {
+	hashID, err := hashFunctionID(objectFormat)
+	if err != nil {
+		return nil, err
+	}
+
+	hashImpl, err := objectFormat.New()
+	if err != nil {
+		return nil, fmt.Errorf("internal/format/packidx/bloom: %w", err)
+	}
+
+	hashSize := objectFormat.Size()
+
+	if len(packHash) != hashSize {
+		panic("internal/format/packidx/bloom: invalid pack hash length")
+	}
+
+	log2B, err := checkParams(bucketCount, k, hashSize)
+	if err != nil {
+		return nil, fmt.Errorf("%w: %w", ErrInvalidParameters, err)
+	}
+
+	total, err := intconv.Uint64ToInt(uint64(HeaderLen) + uint64(BucketLen)*uint64(bucketCount) + 2*uint64(hashSize)) //#nosec G115
+	if err != nil {
+		return nil, fmt.Errorf("%w: %w", ErrInvalidParameters, err)
+	}
+
+	data := make([]byte, total)
+	binary.BigEndian.PutUint32(data[0:], signature)
+	binary.BigEndian.PutUint32(data[4:], version)
+	binary.BigEndian.PutUint32(data[8:], hashID)
+	binary.BigEndian.PutUint32(data[12:], bucketCount)
+	binary.BigEndian.PutUint16(data[16:], k)
+
+	bucketsEnd := total - 2*hashSize
+	copy(data[bucketsEnd:], packHash)
+
+	return &Builder{
+		data:     data,
+		buckets:  data[HeaderLen:bucketsEnd],
+		hashImpl: hashImpl,
+		log2B:    log2B,
+		k:        int(k),
+	}, nil
+}
+
+// Add records oid in the filter.
+//
+// oid must be exactly the filter's hash size;
+// Add panics otherwise.
+func (b *Builder) Add(oid []byte) {
+	if len(oid) != b.hashImpl.Size() {
+		panic("internal/format/packidx/bloom: invalid object ID length")
+	}
+
+	base := int(binary.BigEndian.Uint32(oid[:4])>>(32-b.log2B)) * BucketLen
+
+	for i := range b.k {
+		word, mask := probe(oid, b.log2B, i)
+
+		off := base + word*8
+		set := binary.BigEndian.Uint64(b.buckets[off:]) | mask
+		binary.BigEndian.PutUint64(b.buckets[off:], set)
+	}
+}
+
+// Bytes returns the serialized filter, including its trailing checksum.
+//
+// Labels: Life-Parent, Mut-No.
+func (b *Builder) Bytes() []byte {
+	checksumOff := len(b.data) - b.hashImpl.Size()
+
+	b.hashImpl.Reset()
+	_, _ = b.hashImpl.Write(b.data[:checksumOff])
+	b.hashImpl.Sum(b.data[checksumOff:checksumOff])
+
+	return b.data
+}
+
+// RecommendParams returns filter parameters for an index of n objects,
+// targeting a false positive rate near one percent.
+func RecommendParams(objectFormat id.ObjectFormat, n int) (bucketCount uint32, k uint16, err error) {
+	hashSize := objectFormat.Size()
+	if hashSize == 0 {
+		return 0, 0, id.ErrInvalidObjectFormat
+	}
+
+	const maxPow2 = uint32(1) << 31
+
+	wanted := uint64(0)
+	if n > 0 {
+		wanted = (uint64(n) + targetLoad - 1) / targetLoad
+	}
+
+	switch {
+	case wanted <= 1:
+		bucketCount = 1
+	case wanted > uint64(maxPow2):
+		bucketCount = maxPow2
+	default:
+		bucketCount = uint32(1) << bits.Len64(wanted-1)
+	}
+
+	_, err = checkParams(bucketCount, defaultK, hashSize)
+	if err != nil {
+		return 0, 0, fmt.Errorf("%w: %w", ErrInvalidParameters, err)
+	}
+
+	return bucketCount, defaultK, nil
+}
diff --git a/internal/format/packidx/bloom/write_test.go b/internal/format/packidx/bloom/write_test.go
new file mode 100644
index 00000000..74173921
--- /dev/null
+++ b/internal/format/packidx/bloom/write_test.go
@@ -0,0 +1,95 @@
+package bloom_test
+
+import (
+	"errors"
+	"testing"
+
+	"lindenii.org/go/furgit/internal/format/packidx/bloom"
+	"lindenii.org/go/furgit/object/id"
+)
+
+func TestRecommendParams(t *testing.T) {
+	t.Parallel()
+
+	for _, format := range id.SupportedObjectFormats() {
+		t.Run(format.String(), func(t *testing.T) {
+			t.Parallel()
+
+			for _, n := range []int{0, 1, 1000, 10000, 1000000} {
+				bucketCount, k, err := bloom.RecommendParams(format, n)
+				if err != nil {
+					t.Fatalf("n=%d: %v", n, err)
+				}
+
+				if bucketCount == 0 || bucketCount&(bucketCount-1) != 0 {
+					t.Errorf("n=%d: bucket count %d not a power of two", n, bucketCount)
+				}
+
+				_, err = bloom.NewBuilder(format, bucketCount, k, make([]byte, format.Size()))
+				if err != nil {
+					t.Errorf("n=%d: recommended parameters rejected: %v", n, err)
+				}
+			}
+		})
+	}
+}
+
+func TestNewBuilderRejects(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name        string
+		bucketCount uint32
+		k           uint16
+	}{
+		{"zero buckets", 0, 8},
+		{"non power of two", 3, 8},
+		{"zero probe count", 4, 0},
+	}
+
+	for _, format := range id.SupportedObjectFormats() {
+		t.Run(format.String(), func(t *testing.T) {
+			t.Parallel()
+
+			for _, tc := range cases {
+				t.Run(tc.name, func(t *testing.T) {
+					t.Parallel()
+
+					_, err := bloom.NewBuilder(format, tc.bucketCount, tc.k, make([]byte, format.Size()))
+					if !errors.Is(err, bloom.ErrInvalidParameters) {
+						t.Fatalf("error = %v, want ErrInvalidParameters", err)
+					}
+				})
+			}
+		})
+	}
+}
+
+func TestNewBuilderBadPackHash(t *testing.T) {
+	t.Parallel()
+
+	defer func() {
+		if recover() == nil {
+			t.Fatal("NewBuilder did not panic on a short pack hash")
+		}
+	}()
+
+	_, _ = bloom.NewBuilder(id.ObjectFormatSHA256, 4, 2, make([]byte, id.ObjectFormatSHA256.Size()-1))
+}
+
+func TestAddBadLength(t *testing.T) {
+	t.Parallel()
+
+	builder, err := bloom.NewBuilder(id.ObjectFormatSHA256, 4, 2, make([]byte, id.ObjectFormatSHA256.Size()))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	defer func() {
+		if recover() == nil {
+			t.Fatal("Add did not panic on a short object ID")
+		}
+	}()
+
+	builder.Add(make([]byte, id.ObjectFormatSHA256.Size()-1))
+}
diff --git a/internal/format/packidx/lookup.go b/internal/format/packidx/lookup.go
index d1293f47..e4b565b6 100644
--- a/internal/format/packidx/lookup.go
+++ b/internal/format/packidx/lookup.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
+	"math/bits"
 )
 
 // Lookup searches the index for one object ID
@@ -18,6 +19,53 @@ func (idx *Packidx) Lookup(oid []byte) (offset uint64, found bool, err error) {
 
 	lo, hi := idx.fanoutRange(oid[0])
 
+	// Object IDs are uniform for honest inputs,
+	// interp on next 8 octets converges in O(log log n).
+	//
+	// OIDs are public unkeyed hashes,
+	// so an attacker may sample/filter/mine prefix clusters,
+	// making interpolation mis-estimate every probe.
+	// See https://runxiyu.org/comp/ch4ht/
+	// for why cryptographic hash algorithms are insufficient.
+	//
+	// Cap the interp at bisect's probe count and finish by bisect;
+	// adversarial at O(log n) plus small interpolation overhead,
+	// honest exit well under the cap.
+	target := binary.BigEndian.Uint64(oid[1:9])
+
+	for budget := bits.Len(uint(hi - lo)); hi-lo > 8 && budget > 0; budget-- {
+		loKey := binary.BigEndian.Uint64(idx.OIDAt(lo)[1:9])
+		hiKey := binary.BigEndian.Uint64(idx.OIDAt(hi - 1)[1:9])
+
+		var mid int
+
+		switch {
+		case target <= loKey:
+			mid = lo
+		case target >= hiKey:
+			mid = hi - 1
+		default:
+			hi128, lo128 := bits.Mul64(target-loKey, uint64(hi-lo-1)) //#nosec G115
+			q, _ := bits.Div64(hi128, lo128, hiKey-loKey)
+			mid = lo + int(q) //#nosec G115
+		}
+
+		switch cmp := bytes.Compare(oid, idx.OIDAt(mid)); {
+		case cmp == 0:
+			offset, err = idx.OffsetAt(mid)
+			if err != nil {
+				return 0, false, err
+			}
+
+			return offset, true, nil
+		case cmp < 0:
+			hi = mid
+		default:
+			lo = mid + 1
+		}
+	}
+
+	// Interpolation narrowed or capped; bisect to finish.
 	for lo < hi {
 		mid := lo + (hi-lo)/2
 
diff --git a/object/store/packed/internal/ingest/finalize.go b/object/store/packed/internal/ingest/finalize.go
index f0ab6622..afed996c 100644
--- a/object/store/packed/internal/ingest/finalize.go
+++ b/object/store/packed/internal/ingest/finalize.go
@@ -8,6 +8,7 @@ import (
 	"slices"
 
 	"lindenii.org/go/furgit/internal/format/packidx"
+	"lindenii.org/go/furgit/internal/format/packidx/bloom"
 	"lindenii.org/go/furgit/internal/format/packrev"
 	"lindenii.org/go/furgit/object/id"
 	"lindenii.org/go/lgo/intconv"
@@ -38,12 +39,27 @@ func (ingestion *ingestion) finalize() (Result, error) {
 		return Result{}, err
 	}
 
+	bloomBuilder, err := ingestion.buildBloom(entries, packHash)
+	if err != nil {
+		return Result{}, err
+	}
+
+	bloomTmp, err := ingestion.writeTemp("tmp_bloom_", func(w io.Writer) error {
+		_, err := w.Write(bloomBuilder.Bytes())
+
+		return err
+	})
+	if err != nil {
+		return Result{}, err
+	}
+
 	base := "pack-" + ingestion.packHash.String()
 	packFinal := base + ".pack"
 	idxFinal := base + ".idx"
 	revFinal := base + ".rev"
+	bloomFinal := base + ".bloom"
 
-	// Link the pack and reverse index before the index,
+	// Link the pack, reverse index, and Bloom filter before the index,
 	// since the index is what publishes the pack to readers.
 	err = ingestion.link(ingestion.packTmp, packFinal)
 	if err != nil {
@@ -55,6 +71,11 @@ func (ingestion *ingestion) finalize() (Result, error) {
 		return Result{}, err
 	}
 
+	err = ingestion.link(bloomTmp, bloomFinal)
+	if err != nil {
+		return Result{}, err
+	}
+
 	err = ingestion.link(idxTmp, idxFinal)
 	if err != nil {
 		return Result{}, err
@@ -69,12 +90,34 @@ func (ingestion *ingestion) finalize() (Result, error) {
 		PackName:    packFinal,
 		IdxName:     idxFinal,
 		RevName:     revFinal,
+		BloomName:   bloomFinal,
 		PackHash:    ingestion.packHash,
 		ObjectCount: objectCount,
 		ThinFixed:   ingestion.thinFixed,
 	}, nil
 }
 
+// buildBloom builds a Bloom filter over the index entries' object IDs,
+// bound to packHash.
+func (ingestion *ingestion) buildBloom(entries []packidx.Entry, packHash []byte) (*bloom.Builder, error) {
+	bucketCount, k, err := bloom.RecommendParams(ingestion.objectFormat, len(entries))
+	if err != nil {
+		return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err)
+	}
+
+	builder, err := bloom.NewBuilder(ingestion.objectFormat, bucketCount, k, packHash)
+	if err != nil {
+		return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err)
+	}
+
+	size := ingestion.objectFormat.Size()
+	for i := range entries {
+		builder.Add(entries[i].OID[:size])
+	}
+
+	return builder, nil
+}
+
 // indexEntries returns the index entries in object-ID order
 // and, for each record in pack order, its position in that index order.
 func (ingestion *ingestion) indexEntries() ([]packidx.Entry, []uint32, error) {
diff --git a/object/store/packed/internal/ingest/result.go b/object/store/packed/internal/ingest/result.go
index 0ae5593a..9cd6ef1d 100644
--- a/object/store/packed/internal/ingest/result.go
+++ b/object/store/packed/internal/ingest/result.go
@@ -13,6 +13,9 @@ type Result struct {
 	// RevName is the destination-relative name of the written reverse index.
 	RevName string
 
+	// BloomName is the destination-relative name of the written Bloom filter.
+	BloomName string
+
 	// PackHash is the pack trailer hash
 	// shared by the pack, index, and reverse index.
 	PackHash id.ObjectID
diff --git a/object/store/packed/internal/ingest/writepack_test.go b/object/store/packed/internal/ingest/writepack_test.go
index 394d8f6e..adc0ba35 100644
--- a/object/store/packed/internal/ingest/writepack_test.go
+++ b/object/store/packed/internal/ingest/writepack_test.go
@@ -8,6 +8,8 @@ import (
 	"path/filepath"
 	"testing"
 
+	"lindenii.org/go/furgit/internal/format/packidx"
+	"lindenii.org/go/furgit/internal/format/packidx/bloom"
 	"lindenii.org/go/furgit/internal/testgit"
 	"lindenii.org/go/furgit/object/id"
 	"lindenii.org/go/furgit/object/store"
@@ -89,6 +91,81 @@ func TestWritePackMatchesGit(t *testing.T) {
 	}
 }
 
+// TestWritePackBloom verifies that ingesting a pack writes a Bloom filter
+// that reports every object in the pack as present.
+func TestWritePackBloom(t *testing.T) {
+	t.Parallel()
+
+	for _, objectFormat := range id.SupportedObjectFormats() {
+		t.Run(objectFormat.String(), func(t *testing.T) {
+			t.Parallel()
+
+			repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat})
+			if err != nil {
+				t.Fatalf("NewRepo: %v", err)
+			}
+
+			seeded, err := repo.SeedHistory(t)
+			if err != nil {
+				t.Fatalf("SeedHistory: %v", err)
+			}
+
+			gitPrefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{
+				RevIndex: true,
+				Revs:     false,
+				Exclude:  nil,
+			})
+			if err != nil {
+				t.Fatalf("PackObjects: %v", err)
+			}
+
+			stream, err := os.ReadFile(gitPrefix + ".pack") //nolint:gosec
+			if err != nil {
+				t.Fatalf("ReadFile pack: %v", err)
+			}
+
+			dir, result := writePack(t, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{
+				ThinBase: nil,
+				Progress: nil,
+			})
+
+			if result.BloomName == "" {
+				t.Fatal("BloomName is empty")
+			}
+
+			bloomBytes, err := os.ReadFile(filepath.Join(dir, result.BloomName)) //nolint:gosec
+			if err != nil {
+				t.Fatalf("ReadFile bloom: %v", err)
+			}
+
+			filter, err := bloom.Parse(bloomBytes, objectFormat)
+			if err != nil {
+				t.Fatalf("bloom.Parse: %v", err)
+			}
+
+			idxBytes, err := os.ReadFile(filepath.Join(dir, result.IdxName)) //nolint:gosec
+			if err != nil {
+				t.Fatalf("ReadFile idx: %v", err)
+			}
+
+			index, err := packidx.Parse(idxBytes, objectFormat.Size())
+			if err != nil {
+				t.Fatalf("packidx.Parse: %v", err)
+			}
+
+			if !bytes.Equal(filter.PackHash(), index.PackHash()) {
+				t.Fatalf("filter pack hash %x, want %x", filter.PackHash(), index.PackHash())
+			}
+
+			for pos := range index.NumObjects() {
+				if !filter.MayContain(index.OIDAt(pos)) {
+					t.Fatalf("filter rejects object at index position %d", pos)
+				}
+			}
+		})
+	}
+}
+
 // TestWritePackEmpty verifies that a zero-object pack
 // succeeds without writing any artifacts.
 func TestWritePackEmpty(t *testing.T) {
diff --git a/object/store/packed/lookup.go b/object/store/packed/lookup.go
index e54d34b2..e06870a9 100644
--- a/object/store/packed/lookup.go
+++ b/object/store/packed/lookup.go
@@ -24,6 +24,10 @@ func (packed *Packed) lookup(objectID id.ObjectID) (*pack, int, error) {
 	oid := objectID.RawBytes()
 
 	for _, p := range packed.order.Keys() {
+		if p.filter != nil && !p.filter.MayContain(oid) {
+			continue
+		}
+
 		offsetU, found, err := p.idx.Lookup(oid)
 		if err != nil {
 			return nil, 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err)
diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go
index dd43bc7a..9cd6162b 100644
--- a/object/store/packed/pack.go
+++ b/object/store/packed/pack.go
@@ -8,6 +8,7 @@ import (
 
 	"lindenii.org/go/furgit/internal/format/packfile"
 	"lindenii.org/go/furgit/internal/format/packidx"
+	"lindenii.org/go/furgit/internal/format/packidx/bloom"
 	"lindenii.org/go/furgit/internal/mmap"
 	"lindenii.org/go/furgit/object/id"
 	"lindenii.org/go/lgo/intconv"
@@ -36,6 +37,9 @@ type pack struct {
 	// and data aliases them.
 	dataMapping *mmap.Mmap
 	data        []byte
+
+	bloomMapping *mmap.Mmap
+	filter       *bloom.Bloom
 }
 
 // openPack opens, maps, and validates
@@ -69,15 +73,41 @@ func openPack(root *os.Root, name string, objectFormat id.ObjectFormat) (*pack,
 		return nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, name, err)
 	}
 
+	bloomMapping, filter := openBloom(root, name, objectFormat, idx.PackHash())
+
 	return &pack{
-		name:        name,
-		idxMapping:  idxMapping,
-		idx:         idx,
-		dataMapping: dataMapping,
-		data:        dataMapping.Data(),
+		name:         name,
+		idxMapping:   idxMapping,
+		idx:          idx,
+		dataMapping:  dataMapping,
+		data:         dataMapping.Data(),
+		bloomMapping: bloomMapping,
+		filter:       filter,
 	}, nil
 }
 
+func openBloom(root *os.Root, name string, objectFormat id.ObjectFormat, packHash []byte) (*mmap.Mmap, *bloom.Bloom) {
+	mapping, err := mapFile(root, name+".bloom")
+	if err != nil {
+		return nil, nil
+	}
+
+	filter, err := bloom.Parse(mapping.Data(), objectFormat)
+	if err != nil {
+		_ = mapping.Close()
+
+		return nil, nil
+	}
+
+	if !bytes.Equal(filter.PackHash(), packHash) {
+		_ = mapping.Close()
+
+		return nil, nil
+	}
+
+	return mapping, &filter
+}
+
 // mapFile opens and maps one file under root.
 func mapFile(root *os.Root, name string) (*mmap.Mmap, error) {
 	file, err := root.Open(name)
@@ -125,10 +155,16 @@ func validatePackData(data []byte, idx *packidx.Packidx, hashSize int) error {
 	return nil
 }
 
-// close releases the pack data and index mappings.
+// close releases the pack data, index, and filter mappings.
 func (pack *pack) close() error {
-	return errors.Join(
+	errs := []error{
 		pack.dataMapping.Close(),
 		pack.idxMapping.Close(),
-	)
+	}
+
+	if pack.bloomMapping != nil {
+		errs = append(errs, pack.bloomMapping.Close())
+	}
+
+	return errors.Join(errs...)
 }
diff --git a/object/store/packed/quarantine.go b/object/store/packed/quarantine.go
index 5e0b85cb..977a9543 100644
--- a/object/store/packed/quarantine.go
+++ b/object/store/packed/quarantine.go
@@ -156,6 +156,8 @@ func packPromotionPriority(name string) int {
 		return 1
 	case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".rev"):
 		return 2
+	case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".bloom"):
+		return 2
 	case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".idx"):
 		return 3
 	default:
diff --git a/object/tree/malformed_test.go b/object/tree/malformed_test.go
index ca00ea94..8a22b90f 100644
--- a/object/tree/malformed_test.go
+++ b/object/tree/malformed_test.go
@@ -44,6 +44,7 @@ func TestParseMalformed(t *testing.T) {
 				{name: "unsorted", body: append(record("100644", "b", size), record("100644", "a", size)...)},
 				{name: "duplicate", body: append(record("100644", "a", size), record("100644", "a", size)...)},
 				{name: "conflicting-tree-blob", body: append(record("100644", "foo", size), record("40000", "foo", size)...)},
+				{name: "conflicting-tree-blob-nonadjacent", body: append(append(record("100644", "foo", size), record("100644", "foo.c", size)...), record("40000", "foo", size)...)},
 			} {
 				t.Run(tc.name, func(t *testing.T) {
 					t.Parallel()
diff --git a/research/dynamic_packfiles.txt b/research/dynamic_packfiles.txt
new file mode 100644
index 00000000..e4fe7e54
--- /dev/null
+++ b/research/dynamic_packfiles.txt
@@ -0,0 +1,179 @@
+dynamic packfiles to append objects
+
+gc/refcount process punches page-sized holes in them for pages fully
+within the space of unwanted objects, after setting a tombstone mark
+
+holes are recorded in an index and re-used
+
+then, if desired, the repack process removes all the punched holes
+and anything surrounding from unwanted objects that are slightly out
+of the page boundary
+
+repack is not really git's repack algorithm, it's bascially just
+defragmentation.
+
+genreational bloom filters
+
+idx design
+==========
+
+so, let's first get our invariants and patterns clear.
+
+* fixed-length cryptographic object IDs
+* essentially uniform key distribution
+* exact lookup only, no range scans, no ordered iteration requirements
+* reads are extremely important
+* writes are mostly append-like
+* deletes/tombstones may happen later but are secondary
+
+1st design
+----------
+
+* mutable front index
+* immutable base index
+* period merge/compaction into a new base generation
+
+
+
+upload-pack/send-pack/defrag
+============================
+
+take current pack, remove dead objects/holes, filter objects out, record
+offsets and adjust ofs_deltas since they always go backwards, write the pack
+back; then stream written pack to client. two-step necessary because pack
+header includes object count; could have a custom new protocol that doesn't do
+so.
+
+random chat log dump
+====================
+<~runxiyu> ori: actually. i think my hashtable-ish .idx scheme doesn't work really well with e.g. "user provided us a small part of the hash"
+<~runxiyu> and when using the Git CLI, abbreviated hashes are extremely common....
+<~runxiyu> not lik ei'd need them in a *forge*
+<~runxiyu> but ugh
+<~runxiyu> i guess i'm going with some sort of b-tree :((
+<~runxiyu> ~~maybe i should just port gefs to git~~
+<&ori> runxiyu: why not? you should be able to pick the pages based on the prefix and then scan, no?
+<~rx> ori: i need to somehow munge the has to prevent page directory explosions
+<~rx> the hash*
+<~rx> e.g. siphash(objectid, secret)
+<~rx> otherwise an attacker could give you 10M objects that start with 00000 and whatnot
+<&ori> what's the worst case that would happen there, and is it exponentially worse than giving you 10M objects that start with anything?
+<&ori> I'm thinking that you can't generate a case worse than 256/nobject extra table lookups, assuming one bit per fanout..
+<~runxiyu> ori: for extendible hashing, yes, definitely worse
+<~runxiyu> the directory will expand a lot for no good reason
+<&ori> yes, but you have 256 bits of hash
+<&ori> how much is a lot worse?
+<&ori> what's the worst an attacker can do, and how is the impact worse than uploading 10M giant objects?
+<&ori> also, spotted a bag of kuai kuai keeping the cash register working today at a tea shop
+<~runxiyu> waitt
+<~runxiyu> hmmm
+ * runxiyu looks agagin if it's O(N) or O(2^N)
+<~runxiyu> well
+<~runxiyu> i think it should be a O(2^n) directory size when the attacker can control n bits prefix
+<&ori> what's the 'n' here?
+<~runxiyu> > can control n bits prefix
+<&ori> yeah, you run out of prefix pretty quickly, though
+<&ori> I'm not seeing how you could get an exponential blowup if you share pages
+<&ori> may be missing something, though
+<~runxiyu> hm
+<&ori> oh, wait, I see
+<&ori> no, wait
+<~runxiyu> i think im confusing myself too to some extent but something doesn't feel right
+<~runxiyu> urgh
+<~runxiyu> okay, rethinking this
+<~runxiyu> d is the global depth
+<~runxiyu> diretory size is 2^d
+<~runxiyu> B records per bucket
+<~runxiyu> whatever happens inside the bucket idc, let's say it's a linked list
+<~runxiyu> whatever happens inside the bucket idc, let's say it's an array* (linked lists suck)
+<~runxiyu> l <= d
+<~runxiyu> (l being the local depth of a bucket)
+<~runxiyu> normal: d = log^2(N/B)
+<&ori> ahh, I see.
+<~runxiyu> N is the object count
+<&ori> yes, so what if you binary searched the page directory, or made it multi-level
+<~runxiyu> an attacker could grab a giant repo and find commonly-prefixed objects, they don't need to brute force their own
+<~runxiyu> ori: remember we're trying to do something easy to add new objects into
+<~runxiyu> how'd you do that with a binary search?
+<~runxiyu> not sure what you mean by multi-level yet here
+<~runxiyu> well, it could just turn into a b+tree...
+<~runxiyu> hm
+<&ori> multilevel -- you have pd[0] using bits 0..n
+<~runxiyu> maybe an lmdb object store isn't too bad after all
+<&ori> pd[0][1] using bits n...m
+<&ori> etc
+<&ori> and the reason I was a bit confused was that I had thought the directory was a trie
+<&ori> rather than just an expanding top level directory
+<~runxiyu> ah
+<&ori> so, yeah, I was thinking you could make the page directory an actual trie
+<~runxiyu> sigh
+<~runxiyu> i guess abbreviated object IDs is something i can't really skip.
+<~runxiyu> ori: ill look into radix trees and LSM trees too
+<~runxiyu> well, you're basically suggesting a radix tree i guess
+<~runxiyu> well actually
+<~runxiyu> radix might not necessarily be the best trie here
+<~runxiyu> idk
+<~runxiyu> hm
+<~runxiyu> firstly im really heavy on reads
+<~runxiyu> and random keys with no sequential access
+<~runxiyu> ok LSM makes no sense
+<&hax[xor]> > O(2^N)
+<~runxiyu> ori: thoughts on how to make tries reasonable to use on disks?
+<&hax[xor]> that sounds like something is already very broken
+<~runxiyu> hax[xor]: wdym
+<&hax[xor]> directory size should absolutely not scale like that
+<~runxiyu> hax[xor]: maybe read up on how extendible hsahing works again?
+<&hax[xor]> probably but if that's how it scales it still sounds verybroken
+<~runxiyu> n is not the amount of objects
+<~runxiyu> it's a pathlogic condition caused by chosne-prefix keys
+<~runxiyu> (your keys are usually supposed to be hashed into something the attacker can't predict)
+<&hax[xor]> if you mean the directory size scales linearly with the number of objects the attacker puts in it... that sounds perfectly normal?
+<&ori> runxiyu: same as extendible hashing, just after you extend to, say, 8 bits, you stop splitting the page directory, and have subdirectories
+<~runxiyu> ori: that could make senes
+<~runxiyu> haven't thought it through
+<~runxiyu> directory size is 2^d, d being the global depth
+<~runxiyu> urgh i need to review for exams
+<~runxiyu> okay
+<~runxiyu> write amplification issue
+<~runxiyu> im not sure how significant this is for realistic git workloads
+<~runxiyu> i haven't counted, but there should be many, many, many more reads than writes
+<~runxiyu> if write amplification is really an issue
+<&ori> I may go wander around a bit.
+<~runxiyu> then ill just port gefs
+<~runxiyu> ori: do you mean IRL, or over dynamic pack data structures-
+<&ori> irl.
+<~runxiyu> alright that makes more sense :P
+<&ori> tomorrow I think I check out Jiufen
+<~runxiyu> frick i want to be able to type epsilon with compose
+<&ori> is that not possible?
+<~runxiyu> i don't seem to be able to
+<~runxiyu> but idk the compose tables on my system
+<~runxiyu> ε
+<~runxiyu> well
+<~runxiyu> unicode hex input always works :/
+<~runxiyu> OKAY FUCK
+<~runxiyu> I keep getting distracted by interesting things
+<~runxiyu> I need to review for my fucking exams
+-- Mode #chat [-q runxiyu] by runxiyu
+-- Mode #chat [-a runxiyu] by runxiyu
+-- #chat: You must be a channel halfop or higher to set channel mode b (ban).
+-- Mode #chat [+b mute:account:runxiyu] by runxiyu
+-- #chat: You cannot send messages to this channel whilst a m: (mute) extban is set matching you.
+-- #chat: You cannot send messages to this channel whilst a m: (mute) extban is set matching you.
+<&f_> does that even work?
+<&ori> for 9front, <alt>*e gives ε
+<&ori> but, don't remember the compose map
+<&ori> thought that there was a similar thing for all greek letters
+
+
+See also:
+https://github.com/inkandswitch/darn
+https://www.youtube.com/watch?v=nk4nefmguZk
+https://crates.io/crates/iroh-blobs
+https://crates.io/crates/bao-tree
+
+
+Actually, who cares about abbreviated hashes?
+Clients. Clients only.
+It will be a separate interface satisfied by "normal repos"
+but not satisfied by our store. Good enough.
diff --git a/research/packfile_bloom.txt b/research/packfile_bloom.txt
new file mode 100644
index 00000000..63acafbe
--- /dev/null
+++ b/research/packfile_bloom.txt
@@ -0,0 +1,133 @@
+Packfile bloom filter RFC
+=========================
+
+Problem
+-------
+
+Especially for server-side usages, repacking is extremely expensive, and
+creating multi-pack-indexes is still rather expensive. Incremental MIDX
+partially solves this, but would defeat the purpose of MIDX when there are too
+many of them, as Git would still have to walk the MIDXes in order while
+performing expensive indexing queries.
+
+Idea
+----
+
+Each MIDX layer, and each non-MIDX index, comes with a bloom filter. MIDXes and
+ordinary .idx files are still traversed in their usual order, but the first
+step when traversing them, is to check whether that index could possibly have
+the desired object, through a bloom filter.
+
+We will want the filters to be mmaped, and we want the lookup cost to be
+dominated by one cache-line read rather than using many scattered reads.
+Therefore, a blocked bloom filter is likely the right direction here. The steps
+are as follows:
+
+1. Split the filter into 64-octet buckets, since 64 octets is the most common
+   cache-line size.
+2. Use some bits of the object ID to choose the bucket.
+3. Use the rest of the key to choose several bit positions inside that bucket.
+4. A lookup thus reads one 64-octet bucket and checks whether all required bits
+   are set.
+
+Definitions
+-----------
+
+Let:
+
+     B := number of buckets
+     K := number of bits set and tested per object ID
+
+* All integers here are big endian.
+* The OID is to be interpreted as a big-endian bitstring, where bit offset 0
+  is the most significant bit of octet 0.
+* log2(B) + 9K <= hash length in bits.
+
+File layout
+-----------
+
+* 4-octet signature: {'I', 'D', 'B', 'L'}
+* 4-octet version identifier (= 1)
+* 4-octet object hash algorithm identifier (= 1 for SHA-1, 2 for SHA-256)
+* 4-octet B (number of buckets)
+* 2-octet K (number of bits set and tested per object ID)
+* 46-octet padding (must be all zeros)
+* B buckets of 64 octets each.
+
+Validation
+----------
+
+* Matching signature
+* Supported version (the rest of the rules are for this version)
+* Hash function identifier must be recognized
+* B must be nonzero and a power of two
+* K must be nonzero
+* log2(B) + 9K <= hash length in bits
+* Padding must be all zero
+* File size must be 64 + 64 * B octets
+
+Lookup procedure
+----------------
+
+1. Let b be the unsigned integer encoded by the most significant log2(B) bits
+   of OID. B is a power of two, and 0 <= b < B.
+2. Select and read bucket b.
+3. For each 0 <= i < K:
+   1. Start immediately after the most significant log2(B) bits of OID, let the
+      i-th 9-bit field be the bits at offset 9 * i through 9 * i + 8 within the
+      next 9 * K bits of the OID.
+   2. Let pi be the unsigned integer encoded by that 9-bit field.
+      Then, 0 <= pi < 512.
+   3. Compute wi := pi >> 6, and bi := pi & 63.
+      Thus, wi identifies one of the 8 64-bit words in bucket b, and bi
+      identifies one bit within that word.
+   4. Test whether bi is set in the word wi of bucket b. (Within each 64-bit
+      word, bit index 0 denotes the most significant bit, and bit index 63
+      denotes the least significant bit.)
+
+If any test fails, the OID is definitely not in the relevant idx.
+If all tests succeed, the OID may be in the relevant idx.
+
+Note that two of the K 9-bit fields can decode to the same pi, which means an
+insertion may set fewer than K distinct bits.
+
+Worked example
+--------------
+
+Let:
+
+    B = 1 << 15 = 32768
+    K = 8
+
+Then, log2(B) = 15. Each lookup thus uses 15 bits to choose the bucket
+and 8 * 9 = 72 bits to choose the in-bucket positions, for a total of
+87 bits taken from the object ID.
+
+1. Read the first 15 bits of OID and interpret them as b, where
+   0 <= b < 32768.
+2. Read bucket b.
+3. For each 0 <= i < 8:
+   1. Read the i-th 9-bit field from the next 72 bits of OID and interpret it
+      as pi, where 0 <= pi < 512.
+   2. Compute: wi = pi >> 6, bi = pi & 63.
+   3. Test whether bit bi is set in the word wi of bucket b.
+
+Security considerations
+------------------------
+
+An adversarial packfile where objects are (computationally intensive, even for
+SHA-1 as vulnerable as it is) constructed to have the same prefix for the
+relevant object format hash algorithm could be used to fill up the bloom
+filters, rendering some buckets useless. In the worst case, if they somehow
+fill all filters, this proposal's optimizations become useless, but would not
+be a significant DoS vector.
+
+TODOs
+-----
+
+* Consider dropping mmap (page read vs cachline read)
+* How should B and K be chosen?
+* How does creation/insert work? Note that packfiles and `.idx`es are immutable.
+* What are the sizes?
+* What are the false positive rates?
+* How are benchmarks?