diff options
Diffstat (limited to 'object')
| -rw-r--r-- | object/store/mix/mix.go | 2 | ||||
| -rw-r--r-- | object/store/packed/delta.go | 14 | ||||
| -rw-r--r-- | object/store/packed/entry.go | 3 | ||||
| -rw-r--r-- | object/store/packed/internal/ingest/finalize.go | 45 | ||||
| -rw-r--r-- | object/store/packed/internal/ingest/resolve.go | 2 | ||||
| -rw-r--r-- | object/store/packed/internal/ingest/result.go | 3 | ||||
| -rw-r--r-- | object/store/packed/internal/ingest/thin.go | 2 | ||||
| -rw-r--r-- | object/store/packed/internal/ingest/writepack_test.go | 77 | ||||
| -rw-r--r-- | object/store/packed/lookup.go | 4 | ||||
| -rw-r--r-- | object/store/packed/pack.go | 52 | ||||
| -rw-r--r-- | object/store/packed/packed.go | 2 | ||||
| -rw-r--r-- | object/store/packed/quarantine.go | 2 | ||||
| -rw-r--r-- | object/store/packed/reader.go | 2 | ||||
| -rw-r--r-- | object/store/packed/refresh.go | 2 | ||||
| -rw-r--r-- | object/store/packed/refresh_test.go | 2 | ||||
| -rw-r--r-- | object/tree/malformed_test.go | 1 | ||||
| -rw-r--r-- | object/tree/parse.go | 43 |
17 files changed, 232 insertions, 26 deletions
diff --git a/object/store/mix/mix.go b/object/store/mix/mix.go index 2e8e926b..b048fe86 100644 --- a/object/store/mix/mix.go +++ b/object/store/mix/mix.go @@ -28,7 +28,7 @@ func New(backends ...store.ObjectReader) *Mix { present[backend] = struct{}{} } - order := mru.New[store.ObjectReader]() + order := mru.New[store.ObjectReader](mru.Options{Interval: 48}) order.Sync(present) return &Mix{ diff --git a/object/store/packed/delta.go b/object/store/packed/delta.go index e48e2c65..5b538221 100644 --- a/object/store/packed/delta.go +++ b/object/store/packed/delta.go @@ -1,7 +1,6 @@ package packed import ( - "bytes" "fmt" "io" "slices" @@ -11,8 +10,14 @@ import ( "lindenii.org/go/furgit/internal/format/packfile" "lindenii.org/go/furgit/internal/format/packfile/delta" "lindenii.org/go/lgo/intconv" + "lindenii.org/go/lgo/sync" ) +//nolint:gochecknoglobals +var deltaHeaderPool = sync.NewPool(func() *[delta.MaxHeaderSizesLen]byte { + return new([delta.MaxHeaderSizesLen]byte) +}) + // deltaNode is a delta entry on a resolution chain. type deltaNode struct { // payload is the entry's compressed delta payload view. @@ -208,16 +213,19 @@ func (packed *Packed) resolveType(p *pack, offset int, entryHeader packfile.Entr // deltaResultSize reads the declared result size // from one compressed delta payload prefix. func deltaResultSize(payload []byte, deltaSize uint64) (int, error) { - zr, err := zlib.NewReader(bytes.NewReader(payload)) + zr, err := zlib.NewReaderBytes(payload) if err != nil { return 0, fmt.Errorf("reading delta header: %w", err) } defer func() { _ = zr.Close() }() + buf := deltaHeaderPool.Get() + defer deltaHeaderPool.Put(buf) + prefixLen := min(uint64(delta.MaxHeaderSizesLen), deltaSize) - prefix := make([]byte, prefixLen) + prefix := buf[:prefixLen] _, err = io.ReadFull(zr, prefix) if err != nil { diff --git a/object/store/packed/entry.go b/object/store/packed/entry.go index e9d45bb4..908afad0 100644 --- a/object/store/packed/entry.go +++ b/object/store/packed/entry.go @@ -1,7 +1,6 @@ package packed import ( - "bytes" "errors" "fmt" "io" @@ -49,7 +48,7 @@ func inflate(payload []byte, expectedSize uint64) ([]byte, error) { return nil, fmt.Errorf("declared size: %w", err) } - zr, err := zlib.NewReader(bytes.NewReader(payload)) + zr, err := zlib.NewReaderBytes(payload) if err != nil { return nil, fmt.Errorf("inflating entry payload: %w", err) } diff --git a/object/store/packed/internal/ingest/finalize.go b/object/store/packed/internal/ingest/finalize.go index f0ab6622..afed996c 100644 --- a/object/store/packed/internal/ingest/finalize.go +++ b/object/store/packed/internal/ingest/finalize.go @@ -8,6 +8,7 @@ import ( "slices" "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/format/packrev" "lindenii.org/go/furgit/object/id" "lindenii.org/go/lgo/intconv" @@ -38,12 +39,27 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, err } + bloomBuilder, err := ingestion.buildBloom(entries, packHash) + if err != nil { + return Result{}, err + } + + bloomTmp, err := ingestion.writeTemp("tmp_bloom_", func(w io.Writer) error { + _, err := w.Write(bloomBuilder.Bytes()) + + return err + }) + if err != nil { + return Result{}, err + } + base := "pack-" + ingestion.packHash.String() packFinal := base + ".pack" idxFinal := base + ".idx" revFinal := base + ".rev" + bloomFinal := base + ".bloom" - // Link the pack and reverse index before the index, + // Link the pack, reverse index, and Bloom filter before the index, // since the index is what publishes the pack to readers. err = ingestion.link(ingestion.packTmp, packFinal) if err != nil { @@ -55,6 +71,11 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, err } + err = ingestion.link(bloomTmp, bloomFinal) + if err != nil { + return Result{}, err + } + err = ingestion.link(idxTmp, idxFinal) if err != nil { return Result{}, err @@ -69,12 +90,34 @@ func (ingestion *ingestion) finalize() (Result, error) { PackName: packFinal, IdxName: idxFinal, RevName: revFinal, + BloomName: bloomFinal, PackHash: ingestion.packHash, ObjectCount: objectCount, ThinFixed: ingestion.thinFixed, }, nil } +// buildBloom builds a Bloom filter over the index entries' object IDs, +// bound to packHash. +func (ingestion *ingestion) buildBloom(entries []packidx.Entry, packHash []byte) (*bloom.Builder, error) { + bucketCount, k, err := bloom.RecommendParams(ingestion.objectFormat, len(entries)) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + builder, err := bloom.NewBuilder(ingestion.objectFormat, bucketCount, k, packHash) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + size := ingestion.objectFormat.Size() + for i := range entries { + builder.Add(entries[i].OID[:size]) + } + + return builder, nil +} + // indexEntries returns the index entries in object-ID order // and, for each record in pack order, its position in that index order. func (ingestion *ingestion) indexEntries() ([]packidx.Entry, []uint32, error) { diff --git a/object/store/packed/internal/ingest/resolve.go b/object/store/packed/internal/ingest/resolve.go index 8595d366..77b0fa0f 100644 --- a/object/store/packed/internal/ingest/resolve.go +++ b/object/store/packed/internal/ingest/resolve.go @@ -270,7 +270,7 @@ func (ingestion *ingestion) countUnresolved() int { func (ingestion *ingestion) unresolvedExternalBases() []id.ObjectID { seen := make(map[id.ObjectID]struct{}) - var out []id.ObjectID + out := make([]id.ObjectID, 0, ingestion.deltaCount-ingestion.deltasResolved) for index := range ingestion.records { rec := &ingestion.records[index] diff --git a/object/store/packed/internal/ingest/result.go b/object/store/packed/internal/ingest/result.go index 0ae5593a..9cd6ef1d 100644 --- a/object/store/packed/internal/ingest/result.go +++ b/object/store/packed/internal/ingest/result.go @@ -13,6 +13,9 @@ type Result struct { // RevName is the destination-relative name of the written reverse index. RevName string + // BloomName is the destination-relative name of the written Bloom filter. + BloomName string + // PackHash is the pack trailer hash // shared by the pack, index, and reverse index. PackHash id.ObjectID diff --git a/object/store/packed/internal/ingest/thin.go b/object/store/packed/internal/ingest/thin.go index 8d1566e0..fa125f2f 100644 --- a/object/store/packed/internal/ingest/thin.go +++ b/object/store/packed/internal/ingest/thin.go @@ -33,7 +33,7 @@ func (ingestion *ingestion) fixThin(external []id.ObjectID, adjacency adjacency, // Drop the trailer from the write cursor. ingestion.scanner.consumed -= hashSize - var appended []int + appended := make([]int, 0, len(external)) for _, baseOID := range external { ty, content, err := ingestion.opts.ThinBase.ReadBytesContent(baseOID) diff --git a/object/store/packed/internal/ingest/writepack_test.go b/object/store/packed/internal/ingest/writepack_test.go index 394d8f6e..adc0ba35 100644 --- a/object/store/packed/internal/ingest/writepack_test.go +++ b/object/store/packed/internal/ingest/writepack_test.go @@ -8,6 +8,8 @@ import ( "path/filepath" "testing" + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/testgit" "lindenii.org/go/furgit/object/id" "lindenii.org/go/furgit/object/store" @@ -89,6 +91,81 @@ func TestWritePackMatchesGit(t *testing.T) { } } +// TestWritePackBloom verifies that ingesting a pack writes a Bloom filter +// that reports every object in the pack as present. +func TestWritePackBloom(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat}) + if err != nil { + t.Fatalf("NewRepo: %v", err) + } + + seeded, err := repo.SeedHistory(t) + if err != nil { + t.Fatalf("SeedHistory: %v", err) + } + + gitPrefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{ + RevIndex: true, + Revs: false, + Exclude: nil, + }) + if err != nil { + t.Fatalf("PackObjects: %v", err) + } + + stream, err := os.ReadFile(gitPrefix + ".pack") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile pack: %v", err) + } + + dir, result := writePack(t, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + ThinBase: nil, + Progress: nil, + }) + + if result.BloomName == "" { + t.Fatal("BloomName is empty") + } + + bloomBytes, err := os.ReadFile(filepath.Join(dir, result.BloomName)) //nolint:gosec + if err != nil { + t.Fatalf("ReadFile bloom: %v", err) + } + + filter, err := bloom.Parse(bloomBytes, objectFormat) + if err != nil { + t.Fatalf("bloom.Parse: %v", err) + } + + idxBytes, err := os.ReadFile(filepath.Join(dir, result.IdxName)) //nolint:gosec + if err != nil { + t.Fatalf("ReadFile idx: %v", err) + } + + index, err := packidx.Parse(idxBytes, objectFormat.Size()) + if err != nil { + t.Fatalf("packidx.Parse: %v", err) + } + + if !bytes.Equal(filter.PackHash(), index.PackHash()) { + t.Fatalf("filter pack hash %x, want %x", filter.PackHash(), index.PackHash()) + } + + for pos := range index.NumObjects() { + if !filter.MayContain(index.OIDAt(pos)) { + t.Fatalf("filter rejects object at index position %d", pos) + } + } + }) + } +} + // TestWritePackEmpty verifies that a zero-object pack // succeeds without writing any artifacts. func TestWritePackEmpty(t *testing.T) { diff --git a/object/store/packed/lookup.go b/object/store/packed/lookup.go index e54d34b2..e06870a9 100644 --- a/object/store/packed/lookup.go +++ b/object/store/packed/lookup.go @@ -24,6 +24,10 @@ func (packed *Packed) lookup(objectID id.ObjectID) (*pack, int, error) { oid := objectID.RawBytes() for _, p := range packed.order.Keys() { + if p.filter != nil && !p.filter.MayContain(oid) { + continue + } + offsetU, found, err := p.idx.Lookup(oid) if err != nil { return nil, 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go index dd43bc7a..9cd6162b 100644 --- a/object/store/packed/pack.go +++ b/object/store/packed/pack.go @@ -8,6 +8,7 @@ import ( "lindenii.org/go/furgit/internal/format/packfile" "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/mmap" "lindenii.org/go/furgit/object/id" "lindenii.org/go/lgo/intconv" @@ -36,6 +37,9 @@ type pack struct { // and data aliases them. dataMapping *mmap.Mmap data []byte + + bloomMapping *mmap.Mmap + filter *bloom.Bloom } // openPack opens, maps, and validates @@ -69,15 +73,41 @@ func openPack(root *os.Root, name string, objectFormat id.ObjectFormat) (*pack, return nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, name, err) } + bloomMapping, filter := openBloom(root, name, objectFormat, idx.PackHash()) + return &pack{ - name: name, - idxMapping: idxMapping, - idx: idx, - dataMapping: dataMapping, - data: dataMapping.Data(), + name: name, + idxMapping: idxMapping, + idx: idx, + dataMapping: dataMapping, + data: dataMapping.Data(), + bloomMapping: bloomMapping, + filter: filter, }, nil } +func openBloom(root *os.Root, name string, objectFormat id.ObjectFormat, packHash []byte) (*mmap.Mmap, *bloom.Bloom) { + mapping, err := mapFile(root, name+".bloom") + if err != nil { + return nil, nil + } + + filter, err := bloom.Parse(mapping.Data(), objectFormat) + if err != nil { + _ = mapping.Close() + + return nil, nil + } + + if !bytes.Equal(filter.PackHash(), packHash) { + _ = mapping.Close() + + return nil, nil + } + + return mapping, &filter +} + // mapFile opens and maps one file under root. func mapFile(root *os.Root, name string) (*mmap.Mmap, error) { file, err := root.Open(name) @@ -125,10 +155,16 @@ func validatePackData(data []byte, idx *packidx.Packidx, hashSize int) error { return nil } -// close releases the pack data and index mappings. +// close releases the pack data, index, and filter mappings. func (pack *pack) close() error { - return errors.Join( + errs := []error{ pack.dataMapping.Close(), pack.idxMapping.Close(), - ) + } + + if pack.bloomMapping != nil { + errs = append(errs, pack.bloomMapping.Close()) + } + + return errors.Join(errs...) } diff --git a/object/store/packed/packed.go b/object/store/packed/packed.go index f22c2445..897b3b98 100644 --- a/object/store/packed/packed.go +++ b/object/store/packed/packed.go @@ -63,7 +63,7 @@ func New(root *os.Root, objectFormat id.ObjectFormat) (*Packed, error) { packed := &Packed{ root: root, objectFormat: objectFormat, - order: mru.New[*pack](), + order: mru.New[*pack](mru.Options{Interval: 48}), baseCache: newBaseCache(), refreshMu: sync.Mutex{}, byName: nil, diff --git a/object/store/packed/quarantine.go b/object/store/packed/quarantine.go index 5e0b85cb..977a9543 100644 --- a/object/store/packed/quarantine.go +++ b/object/store/packed/quarantine.go @@ -156,6 +156,8 @@ func packPromotionPriority(name string) int { return 1 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".rev"): return 2 + case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".bloom"): + return 2 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".idx"): return 3 default: diff --git a/object/store/packed/reader.go b/object/store/packed/reader.go index bfc82eff..cf433cfc 100644 --- a/object/store/packed/reader.go +++ b/object/store/packed/reader.go @@ -165,7 +165,7 @@ func (packed *Packed) ReadReaderContent(objectID id.ObjectID) (typ.Type, int, io return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: object size overflows int: %w", ErrMalformedPackedStore, p.name, err) } - zr, err := zlib.NewReader(bytes.NewReader(payload)) + zr, err := zlib.NewReaderBytes(payload) if err != nil { return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) } diff --git a/object/store/packed/refresh.go b/object/store/packed/refresh.go index 14c66013..f06e9859 100644 --- a/object/store/packed/refresh.go +++ b/object/store/packed/refresh.go @@ -23,7 +23,7 @@ func (packed *Packed) Refresh() error { next := make(map[string]*pack, len(packed.byName)) - var opened []*pack + opened := make([]*pack, 0, len(dirEntries)) for _, dirEntry := range dirEntries { name, ok := strings.CutSuffix(dirEntry.Name(), ".idx") diff --git a/object/store/packed/refresh_test.go b/object/store/packed/refresh_test.go index 025f6f62..e54dc97d 100644 --- a/object/store/packed/refresh_test.go +++ b/object/store/packed/refresh_test.go @@ -101,7 +101,7 @@ func cp(t *testing.T, src, dst string) { t.Fatalf("ReadFile: %v", err) } - err = os.WriteFile(dst, data, 0o600) //nolint:gosec + err = os.WriteFile(dst, data, 0o600) //#nosec G703 if err != nil { t.Fatalf("WriteFile: %v", err) } diff --git a/object/tree/malformed_test.go b/object/tree/malformed_test.go index ca00ea94..8a22b90f 100644 --- a/object/tree/malformed_test.go +++ b/object/tree/malformed_test.go @@ -44,6 +44,7 @@ func TestParseMalformed(t *testing.T) { {name: "unsorted", body: append(record("100644", "b", size), record("100644", "a", size)...)}, {name: "duplicate", body: append(record("100644", "a", size), record("100644", "a", size)...)}, {name: "conflicting-tree-blob", body: append(record("100644", "foo", size), record("40000", "foo", size)...)}, + {name: "conflicting-tree-blob-nonadjacent", body: append(append(record("100644", "foo", size), record("100644", "foo.c", size)...), record("40000", "foo", size)...)}, } { t.Run(tc.name, func(t *testing.T) { t.Parallel() diff --git a/object/tree/parse.go b/object/tree/parse.go index e89ce0fa..bd6ed3b0 100644 --- a/object/tree/parse.go +++ b/object/tree/parse.go @@ -6,7 +6,6 @@ import ( "lindenii.org/go/furgit/object/id" "lindenii.org/go/furgit/object/tree/mode" - "lindenii.org/go/lgo/unsafe" ) // Parse decodes a tree object body into a fully materialized Tree. @@ -26,7 +25,11 @@ import ( func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) { tree := new(Tree) idSize := objectFormat.Size() - seen := make(map[string]struct{}) + + const minEntryOverhead = 5 + 1 + 1 + 1 // mode, space, name, NUL + if estimate := len(body) / (minEntryOverhead + idSize); estimate > 0 { + tree.entries = make([]Entry, 0, estimate) + } i := 0 for i < len(body) { @@ -76,14 +79,44 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) { } } - if _, dup := seen[unsafe.String(entry.Name)]; dup { + if entryMode == mode.Directory && hasNonDirNamed(tree.entries, entry.Name) { return nil, fmt.Errorf("%w: duplicate entry name %q", ErrInvalidTree, entry.Name) } - seen[unsafe.String(entry.Name)] = struct{}{} - tree.entries = append(tree.entries, entry) } return tree, nil } + +// hasNonDirNamed reports whether entries, sorted in Git tree order, +// holds a non-directory entry whose name equals name. +// +// The match sorts immediately below a directory of the same name, +// so the search gallops from the back before binary searching the bracket. +func hasNonDirNamed(entries []Entry, name []byte) bool { + lo, hi := 0, len(entries) + + for stride := 1; stride < hi-lo; stride *= 2 { + mid := hi - stride + if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 { + lo = mid + 1 + + break + } + + hi = mid + } + + for lo < hi { + mid := lo + (hi-lo)/2 + if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 { + lo = mid + 1 + } else { + hi = mid + } + } + + return lo < len(entries) && + nameCompare(entries[lo].Name, entries[lo].Mode == mode.Directory, name, false) == 0 +} |
