aboutsummaryrefslogtreecommitdiff
path: root/object
diff options
context:
space:
mode:
Diffstat (limited to 'object')
-rw-r--r--object/store/mix/mix.go2
-rw-r--r--object/store/packed/delta.go14
-rw-r--r--object/store/packed/entry.go3
-rw-r--r--object/store/packed/internal/ingest/finalize.go45
-rw-r--r--object/store/packed/internal/ingest/resolve.go2
-rw-r--r--object/store/packed/internal/ingest/result.go3
-rw-r--r--object/store/packed/internal/ingest/thin.go2
-rw-r--r--object/store/packed/internal/ingest/writepack_test.go77
-rw-r--r--object/store/packed/lookup.go4
-rw-r--r--object/store/packed/pack.go52
-rw-r--r--object/store/packed/packed.go2
-rw-r--r--object/store/packed/quarantine.go2
-rw-r--r--object/store/packed/reader.go2
-rw-r--r--object/store/packed/refresh.go2
-rw-r--r--object/store/packed/refresh_test.go2
-rw-r--r--object/tree/malformed_test.go1
-rw-r--r--object/tree/parse.go43
17 files changed, 232 insertions, 26 deletions
diff --git a/object/store/mix/mix.go b/object/store/mix/mix.go
index 2e8e926b..b048fe86 100644
--- a/object/store/mix/mix.go
+++ b/object/store/mix/mix.go
@@ -28,7 +28,7 @@ func New(backends ...store.ObjectReader) *Mix {
present[backend] = struct{}{}
}
- order := mru.New[store.ObjectReader]()
+ order := mru.New[store.ObjectReader](mru.Options{Interval: 48})
order.Sync(present)
return &Mix{
diff --git a/object/store/packed/delta.go b/object/store/packed/delta.go
index e48e2c65..5b538221 100644
--- a/object/store/packed/delta.go
+++ b/object/store/packed/delta.go
@@ -1,7 +1,6 @@
package packed
import (
- "bytes"
"fmt"
"io"
"slices"
@@ -11,8 +10,14 @@ import (
"lindenii.org/go/furgit/internal/format/packfile"
"lindenii.org/go/furgit/internal/format/packfile/delta"
"lindenii.org/go/lgo/intconv"
+ "lindenii.org/go/lgo/sync"
)
+//nolint:gochecknoglobals
+var deltaHeaderPool = sync.NewPool(func() *[delta.MaxHeaderSizesLen]byte {
+ return new([delta.MaxHeaderSizesLen]byte)
+})
+
// deltaNode is a delta entry on a resolution chain.
type deltaNode struct {
// payload is the entry's compressed delta payload view.
@@ -208,16 +213,19 @@ func (packed *Packed) resolveType(p *pack, offset int, entryHeader packfile.Entr
// deltaResultSize reads the declared result size
// from one compressed delta payload prefix.
func deltaResultSize(payload []byte, deltaSize uint64) (int, error) {
- zr, err := zlib.NewReader(bytes.NewReader(payload))
+ zr, err := zlib.NewReaderBytes(payload)
if err != nil {
return 0, fmt.Errorf("reading delta header: %w", err)
}
defer func() { _ = zr.Close() }()
+ buf := deltaHeaderPool.Get()
+ defer deltaHeaderPool.Put(buf)
+
prefixLen := min(uint64(delta.MaxHeaderSizesLen), deltaSize)
- prefix := make([]byte, prefixLen)
+ prefix := buf[:prefixLen]
_, err = io.ReadFull(zr, prefix)
if err != nil {
diff --git a/object/store/packed/entry.go b/object/store/packed/entry.go
index e9d45bb4..908afad0 100644
--- a/object/store/packed/entry.go
+++ b/object/store/packed/entry.go
@@ -1,7 +1,6 @@
package packed
import (
- "bytes"
"errors"
"fmt"
"io"
@@ -49,7 +48,7 @@ func inflate(payload []byte, expectedSize uint64) ([]byte, error) {
return nil, fmt.Errorf("declared size: %w", err)
}
- zr, err := zlib.NewReader(bytes.NewReader(payload))
+ zr, err := zlib.NewReaderBytes(payload)
if err != nil {
return nil, fmt.Errorf("inflating entry payload: %w", err)
}
diff --git a/object/store/packed/internal/ingest/finalize.go b/object/store/packed/internal/ingest/finalize.go
index f0ab6622..afed996c 100644
--- a/object/store/packed/internal/ingest/finalize.go
+++ b/object/store/packed/internal/ingest/finalize.go
@@ -8,6 +8,7 @@ import (
"slices"
"lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packidx/bloom"
"lindenii.org/go/furgit/internal/format/packrev"
"lindenii.org/go/furgit/object/id"
"lindenii.org/go/lgo/intconv"
@@ -38,12 +39,27 @@ func (ingestion *ingestion) finalize() (Result, error) {
return Result{}, err
}
+ bloomBuilder, err := ingestion.buildBloom(entries, packHash)
+ if err != nil {
+ return Result{}, err
+ }
+
+ bloomTmp, err := ingestion.writeTemp("tmp_bloom_", func(w io.Writer) error {
+ _, err := w.Write(bloomBuilder.Bytes())
+
+ return err
+ })
+ if err != nil {
+ return Result{}, err
+ }
+
base := "pack-" + ingestion.packHash.String()
packFinal := base + ".pack"
idxFinal := base + ".idx"
revFinal := base + ".rev"
+ bloomFinal := base + ".bloom"
- // Link the pack and reverse index before the index,
+ // Link the pack, reverse index, and Bloom filter before the index,
// since the index is what publishes the pack to readers.
err = ingestion.link(ingestion.packTmp, packFinal)
if err != nil {
@@ -55,6 +71,11 @@ func (ingestion *ingestion) finalize() (Result, error) {
return Result{}, err
}
+ err = ingestion.link(bloomTmp, bloomFinal)
+ if err != nil {
+ return Result{}, err
+ }
+
err = ingestion.link(idxTmp, idxFinal)
if err != nil {
return Result{}, err
@@ -69,12 +90,34 @@ func (ingestion *ingestion) finalize() (Result, error) {
PackName: packFinal,
IdxName: idxFinal,
RevName: revFinal,
+ BloomName: bloomFinal,
PackHash: ingestion.packHash,
ObjectCount: objectCount,
ThinFixed: ingestion.thinFixed,
}, nil
}
+// buildBloom builds a Bloom filter over the index entries' object IDs,
+// bound to packHash.
+func (ingestion *ingestion) buildBloom(entries []packidx.Entry, packHash []byte) (*bloom.Builder, error) {
+ bucketCount, k, err := bloom.RecommendParams(ingestion.objectFormat, len(entries))
+ if err != nil {
+ return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err)
+ }
+
+ builder, err := bloom.NewBuilder(ingestion.objectFormat, bucketCount, k, packHash)
+ if err != nil {
+ return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err)
+ }
+
+ size := ingestion.objectFormat.Size()
+ for i := range entries {
+ builder.Add(entries[i].OID[:size])
+ }
+
+ return builder, nil
+}
+
// indexEntries returns the index entries in object-ID order
// and, for each record in pack order, its position in that index order.
func (ingestion *ingestion) indexEntries() ([]packidx.Entry, []uint32, error) {
diff --git a/object/store/packed/internal/ingest/resolve.go b/object/store/packed/internal/ingest/resolve.go
index 8595d366..77b0fa0f 100644
--- a/object/store/packed/internal/ingest/resolve.go
+++ b/object/store/packed/internal/ingest/resolve.go
@@ -270,7 +270,7 @@ func (ingestion *ingestion) countUnresolved() int {
func (ingestion *ingestion) unresolvedExternalBases() []id.ObjectID {
seen := make(map[id.ObjectID]struct{})
- var out []id.ObjectID
+ out := make([]id.ObjectID, 0, ingestion.deltaCount-ingestion.deltasResolved)
for index := range ingestion.records {
rec := &ingestion.records[index]
diff --git a/object/store/packed/internal/ingest/result.go b/object/store/packed/internal/ingest/result.go
index 0ae5593a..9cd6ef1d 100644
--- a/object/store/packed/internal/ingest/result.go
+++ b/object/store/packed/internal/ingest/result.go
@@ -13,6 +13,9 @@ type Result struct {
// RevName is the destination-relative name of the written reverse index.
RevName string
+ // BloomName is the destination-relative name of the written Bloom filter.
+ BloomName string
+
// PackHash is the pack trailer hash
// shared by the pack, index, and reverse index.
PackHash id.ObjectID
diff --git a/object/store/packed/internal/ingest/thin.go b/object/store/packed/internal/ingest/thin.go
index 8d1566e0..fa125f2f 100644
--- a/object/store/packed/internal/ingest/thin.go
+++ b/object/store/packed/internal/ingest/thin.go
@@ -33,7 +33,7 @@ func (ingestion *ingestion) fixThin(external []id.ObjectID, adjacency adjacency,
// Drop the trailer from the write cursor.
ingestion.scanner.consumed -= hashSize
- var appended []int
+ appended := make([]int, 0, len(external))
for _, baseOID := range external {
ty, content, err := ingestion.opts.ThinBase.ReadBytesContent(baseOID)
diff --git a/object/store/packed/internal/ingest/writepack_test.go b/object/store/packed/internal/ingest/writepack_test.go
index 394d8f6e..adc0ba35 100644
--- a/object/store/packed/internal/ingest/writepack_test.go
+++ b/object/store/packed/internal/ingest/writepack_test.go
@@ -8,6 +8,8 @@ import (
"path/filepath"
"testing"
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packidx/bloom"
"lindenii.org/go/furgit/internal/testgit"
"lindenii.org/go/furgit/object/id"
"lindenii.org/go/furgit/object/store"
@@ -89,6 +91,81 @@ func TestWritePackMatchesGit(t *testing.T) {
}
}
+// TestWritePackBloom verifies that ingesting a pack writes a Bloom filter
+// that reports every object in the pack as present.
+func TestWritePackBloom(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat})
+ if err != nil {
+ t.Fatalf("NewRepo: %v", err)
+ }
+
+ seeded, err := repo.SeedHistory(t)
+ if err != nil {
+ t.Fatalf("SeedHistory: %v", err)
+ }
+
+ gitPrefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{
+ RevIndex: true,
+ Revs: false,
+ Exclude: nil,
+ })
+ if err != nil {
+ t.Fatalf("PackObjects: %v", err)
+ }
+
+ stream, err := os.ReadFile(gitPrefix + ".pack") //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile pack: %v", err)
+ }
+
+ dir, result := writePack(t, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{
+ ThinBase: nil,
+ Progress: nil,
+ })
+
+ if result.BloomName == "" {
+ t.Fatal("BloomName is empty")
+ }
+
+ bloomBytes, err := os.ReadFile(filepath.Join(dir, result.BloomName)) //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile bloom: %v", err)
+ }
+
+ filter, err := bloom.Parse(bloomBytes, objectFormat)
+ if err != nil {
+ t.Fatalf("bloom.Parse: %v", err)
+ }
+
+ idxBytes, err := os.ReadFile(filepath.Join(dir, result.IdxName)) //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile idx: %v", err)
+ }
+
+ index, err := packidx.Parse(idxBytes, objectFormat.Size())
+ if err != nil {
+ t.Fatalf("packidx.Parse: %v", err)
+ }
+
+ if !bytes.Equal(filter.PackHash(), index.PackHash()) {
+ t.Fatalf("filter pack hash %x, want %x", filter.PackHash(), index.PackHash())
+ }
+
+ for pos := range index.NumObjects() {
+ if !filter.MayContain(index.OIDAt(pos)) {
+ t.Fatalf("filter rejects object at index position %d", pos)
+ }
+ }
+ })
+ }
+}
+
// TestWritePackEmpty verifies that a zero-object pack
// succeeds without writing any artifacts.
func TestWritePackEmpty(t *testing.T) {
diff --git a/object/store/packed/lookup.go b/object/store/packed/lookup.go
index e54d34b2..e06870a9 100644
--- a/object/store/packed/lookup.go
+++ b/object/store/packed/lookup.go
@@ -24,6 +24,10 @@ func (packed *Packed) lookup(objectID id.ObjectID) (*pack, int, error) {
oid := objectID.RawBytes()
for _, p := range packed.order.Keys() {
+ if p.filter != nil && !p.filter.MayContain(oid) {
+ continue
+ }
+
offsetU, found, err := p.idx.Lookup(oid)
if err != nil {
return nil, 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err)
diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go
index dd43bc7a..9cd6162b 100644
--- a/object/store/packed/pack.go
+++ b/object/store/packed/pack.go
@@ -8,6 +8,7 @@ import (
"lindenii.org/go/furgit/internal/format/packfile"
"lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packidx/bloom"
"lindenii.org/go/furgit/internal/mmap"
"lindenii.org/go/furgit/object/id"
"lindenii.org/go/lgo/intconv"
@@ -36,6 +37,9 @@ type pack struct {
// and data aliases them.
dataMapping *mmap.Mmap
data []byte
+
+ bloomMapping *mmap.Mmap
+ filter *bloom.Bloom
}
// openPack opens, maps, and validates
@@ -69,15 +73,41 @@ func openPack(root *os.Root, name string, objectFormat id.ObjectFormat) (*pack,
return nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, name, err)
}
+ bloomMapping, filter := openBloom(root, name, objectFormat, idx.PackHash())
+
return &pack{
- name: name,
- idxMapping: idxMapping,
- idx: idx,
- dataMapping: dataMapping,
- data: dataMapping.Data(),
+ name: name,
+ idxMapping: idxMapping,
+ idx: idx,
+ dataMapping: dataMapping,
+ data: dataMapping.Data(),
+ bloomMapping: bloomMapping,
+ filter: filter,
}, nil
}
+func openBloom(root *os.Root, name string, objectFormat id.ObjectFormat, packHash []byte) (*mmap.Mmap, *bloom.Bloom) {
+ mapping, err := mapFile(root, name+".bloom")
+ if err != nil {
+ return nil, nil
+ }
+
+ filter, err := bloom.Parse(mapping.Data(), objectFormat)
+ if err != nil {
+ _ = mapping.Close()
+
+ return nil, nil
+ }
+
+ if !bytes.Equal(filter.PackHash(), packHash) {
+ _ = mapping.Close()
+
+ return nil, nil
+ }
+
+ return mapping, &filter
+}
+
// mapFile opens and maps one file under root.
func mapFile(root *os.Root, name string) (*mmap.Mmap, error) {
file, err := root.Open(name)
@@ -125,10 +155,16 @@ func validatePackData(data []byte, idx *packidx.Packidx, hashSize int) error {
return nil
}
-// close releases the pack data and index mappings.
+// close releases the pack data, index, and filter mappings.
func (pack *pack) close() error {
- return errors.Join(
+ errs := []error{
pack.dataMapping.Close(),
pack.idxMapping.Close(),
- )
+ }
+
+ if pack.bloomMapping != nil {
+ errs = append(errs, pack.bloomMapping.Close())
+ }
+
+ return errors.Join(errs...)
}
diff --git a/object/store/packed/packed.go b/object/store/packed/packed.go
index f22c2445..897b3b98 100644
--- a/object/store/packed/packed.go
+++ b/object/store/packed/packed.go
@@ -63,7 +63,7 @@ func New(root *os.Root, objectFormat id.ObjectFormat) (*Packed, error) {
packed := &Packed{
root: root,
objectFormat: objectFormat,
- order: mru.New[*pack](),
+ order: mru.New[*pack](mru.Options{Interval: 48}),
baseCache: newBaseCache(),
refreshMu: sync.Mutex{},
byName: nil,
diff --git a/object/store/packed/quarantine.go b/object/store/packed/quarantine.go
index 5e0b85cb..977a9543 100644
--- a/object/store/packed/quarantine.go
+++ b/object/store/packed/quarantine.go
@@ -156,6 +156,8 @@ func packPromotionPriority(name string) int {
return 1
case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".rev"):
return 2
+ case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".bloom"):
+ return 2
case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".idx"):
return 3
default:
diff --git a/object/store/packed/reader.go b/object/store/packed/reader.go
index bfc82eff..cf433cfc 100644
--- a/object/store/packed/reader.go
+++ b/object/store/packed/reader.go
@@ -165,7 +165,7 @@ func (packed *Packed) ReadReaderContent(objectID id.ObjectID) (typ.Type, int, io
return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: object size overflows int: %w", ErrMalformedPackedStore, p.name, err)
}
- zr, err := zlib.NewReader(bytes.NewReader(payload))
+ zr, err := zlib.NewReaderBytes(payload)
if err != nil {
return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err)
}
diff --git a/object/store/packed/refresh.go b/object/store/packed/refresh.go
index 14c66013..f06e9859 100644
--- a/object/store/packed/refresh.go
+++ b/object/store/packed/refresh.go
@@ -23,7 +23,7 @@ func (packed *Packed) Refresh() error {
next := make(map[string]*pack, len(packed.byName))
- var opened []*pack
+ opened := make([]*pack, 0, len(dirEntries))
for _, dirEntry := range dirEntries {
name, ok := strings.CutSuffix(dirEntry.Name(), ".idx")
diff --git a/object/store/packed/refresh_test.go b/object/store/packed/refresh_test.go
index 025f6f62..e54dc97d 100644
--- a/object/store/packed/refresh_test.go
+++ b/object/store/packed/refresh_test.go
@@ -101,7 +101,7 @@ func cp(t *testing.T, src, dst string) {
t.Fatalf("ReadFile: %v", err)
}
- err = os.WriteFile(dst, data, 0o600) //nolint:gosec
+ err = os.WriteFile(dst, data, 0o600) //#nosec G703
if err != nil {
t.Fatalf("WriteFile: %v", err)
}
diff --git a/object/tree/malformed_test.go b/object/tree/malformed_test.go
index ca00ea94..8a22b90f 100644
--- a/object/tree/malformed_test.go
+++ b/object/tree/malformed_test.go
@@ -44,6 +44,7 @@ func TestParseMalformed(t *testing.T) {
{name: "unsorted", body: append(record("100644", "b", size), record("100644", "a", size)...)},
{name: "duplicate", body: append(record("100644", "a", size), record("100644", "a", size)...)},
{name: "conflicting-tree-blob", body: append(record("100644", "foo", size), record("40000", "foo", size)...)},
+ {name: "conflicting-tree-blob-nonadjacent", body: append(append(record("100644", "foo", size), record("100644", "foo.c", size)...), record("40000", "foo", size)...)},
} {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
diff --git a/object/tree/parse.go b/object/tree/parse.go
index e89ce0fa..bd6ed3b0 100644
--- a/object/tree/parse.go
+++ b/object/tree/parse.go
@@ -6,7 +6,6 @@ import (
"lindenii.org/go/furgit/object/id"
"lindenii.org/go/furgit/object/tree/mode"
- "lindenii.org/go/lgo/unsafe"
)
// Parse decodes a tree object body into a fully materialized Tree.
@@ -26,7 +25,11 @@ import (
func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) {
tree := new(Tree)
idSize := objectFormat.Size()
- seen := make(map[string]struct{})
+
+ const minEntryOverhead = 5 + 1 + 1 + 1 // mode, space, name, NUL
+ if estimate := len(body) / (minEntryOverhead + idSize); estimate > 0 {
+ tree.entries = make([]Entry, 0, estimate)
+ }
i := 0
for i < len(body) {
@@ -76,14 +79,44 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) {
}
}
- if _, dup := seen[unsafe.String(entry.Name)]; dup {
+ if entryMode == mode.Directory && hasNonDirNamed(tree.entries, entry.Name) {
return nil, fmt.Errorf("%w: duplicate entry name %q", ErrInvalidTree, entry.Name)
}
- seen[unsafe.String(entry.Name)] = struct{}{}
-
tree.entries = append(tree.entries, entry)
}
return tree, nil
}
+
+// hasNonDirNamed reports whether entries, sorted in Git tree order,
+// holds a non-directory entry whose name equals name.
+//
+// The match sorts immediately below a directory of the same name,
+// so the search gallops from the back before binary searching the bracket.
+func hasNonDirNamed(entries []Entry, name []byte) bool {
+ lo, hi := 0, len(entries)
+
+ for stride := 1; stride < hi-lo; stride *= 2 {
+ mid := hi - stride
+ if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 {
+ lo = mid + 1
+
+ break
+ }
+
+ hi = mid
+ }
+
+ for lo < hi {
+ mid := lo + (hi-lo)/2
+ if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 {
+ lo = mid + 1
+ } else {
+ hi = mid
+ }
+ }
+
+ return lo < len(entries) &&
+ nameCompare(entries[lo].Name, entries[lo].Mode == mode.Directory, name, false) == 0
+}