diff options
| author | 2026-06-14 13:27:15 +0000 | |
|---|---|---|
| committer | 2026-06-14 13:27:15 +0000 | |
| commit | a772af2310eae22f007fd95a4195346d32f3ffbd (patch) | |
| tree | f9452ef35c69e7f759f5973369a07015fd29d123 /object | |
| parent | internal/format/packidx/bloom: Add (diff) | |
Build bloom filter too.
Diffstat (limited to 'object')
| -rw-r--r-- | object/store/packed/internal/ingest/finalize.go | 44 | ||||
| -rw-r--r-- | object/store/packed/internal/ingest/result.go | 3 | ||||
| -rw-r--r-- | object/store/packed/quarantine.go | 2 |
3 files changed, 48 insertions, 1 deletions
diff --git a/object/store/packed/internal/ingest/finalize.go b/object/store/packed/internal/ingest/finalize.go index f0ab6622..13a7278b 100644 --- a/object/store/packed/internal/ingest/finalize.go +++ b/object/store/packed/internal/ingest/finalize.go @@ -8,6 +8,7 @@ import ( "slices" "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/format/packrev" "lindenii.org/go/furgit/object/id" "lindenii.org/go/lgo/intconv" @@ -38,12 +39,27 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, err } + bloomBuilder, err := ingestion.buildBloom(entries) + if err != nil { + return Result{}, err + } + + bloomTmp, err := ingestion.writeTemp("tmp_bloom_", func(w io.Writer) error { + _, err := bloomBuilder.WriteTo(w) + + return err + }) + if err != nil { + return Result{}, err + } + base := "pack-" + ingestion.packHash.String() packFinal := base + ".pack" idxFinal := base + ".idx" revFinal := base + ".rev" + bloomFinal := base + ".bloom" - // Link the pack and reverse index before the index, + // Link the pack, reverse index, and Bloom filter before the index, // since the index is what publishes the pack to readers. err = ingestion.link(ingestion.packTmp, packFinal) if err != nil { @@ -55,6 +71,11 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, err } + err = ingestion.link(bloomTmp, bloomFinal) + if err != nil { + return Result{}, err + } + err = ingestion.link(idxTmp, idxFinal) if err != nil { return Result{}, err @@ -69,12 +90,33 @@ func (ingestion *ingestion) finalize() (Result, error) { PackName: packFinal, IdxName: idxFinal, RevName: revFinal, + BloomName: bloomFinal, PackHash: ingestion.packHash, ObjectCount: objectCount, ThinFixed: ingestion.thinFixed, }, nil } +// buildBloom builds a Bloom filter over the index entries' object IDs. +func (ingestion *ingestion) buildBloom(entries []packidx.Entry) (*bloom.Builder, error) { + bucketCount, k, err := bloom.RecommendParams(ingestion.objectFormat, len(entries)) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + builder, err := bloom.NewBuilder(ingestion.objectFormat, bucketCount, k) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + size := ingestion.objectFormat.Size() + for i := range entries { + builder.Add(entries[i].OID[:size]) + } + + return builder, nil +} + // indexEntries returns the index entries in object-ID order // and, for each record in pack order, its position in that index order. func (ingestion *ingestion) indexEntries() ([]packidx.Entry, []uint32, error) { diff --git a/object/store/packed/internal/ingest/result.go b/object/store/packed/internal/ingest/result.go index 0ae5593a..9cd6ef1d 100644 --- a/object/store/packed/internal/ingest/result.go +++ b/object/store/packed/internal/ingest/result.go @@ -13,6 +13,9 @@ type Result struct { // RevName is the destination-relative name of the written reverse index. RevName string + // BloomName is the destination-relative name of the written Bloom filter. + BloomName string + // PackHash is the pack trailer hash // shared by the pack, index, and reverse index. PackHash id.ObjectID diff --git a/object/store/packed/quarantine.go b/object/store/packed/quarantine.go index 5e0b85cb..977a9543 100644 --- a/object/store/packed/quarantine.go +++ b/object/store/packed/quarantine.go @@ -156,6 +156,8 @@ func packPromotionPriority(name string) int { return 1 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".rev"): return 2 + case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".bloom"): + return 2 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".idx"): return 3 default: |
