aboutsummaryrefslogtreecommitdiff
path: root/object
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-06-14 13:27:15 +0000
committerGravatar Runxi Yu2026-06-14 13:27:15 +0000
commita772af2310eae22f007fd95a4195346d32f3ffbd (patch)
treef9452ef35c69e7f759f5973369a07015fd29d123 /object
parentinternal/format/packidx/bloom: Add (diff)
Build bloom filter too.
Diffstat (limited to 'object')
-rw-r--r--object/store/packed/internal/ingest/finalize.go44
-rw-r--r--object/store/packed/internal/ingest/result.go3
-rw-r--r--object/store/packed/quarantine.go2
3 files changed, 48 insertions, 1 deletions
diff --git a/object/store/packed/internal/ingest/finalize.go b/object/store/packed/internal/ingest/finalize.go
index f0ab6622..13a7278b 100644
--- a/object/store/packed/internal/ingest/finalize.go
+++ b/object/store/packed/internal/ingest/finalize.go
@@ -8,6 +8,7 @@ import (
"slices"
"lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packidx/bloom"
"lindenii.org/go/furgit/internal/format/packrev"
"lindenii.org/go/furgit/object/id"
"lindenii.org/go/lgo/intconv"
@@ -38,12 +39,27 @@ func (ingestion *ingestion) finalize() (Result, error) {
return Result{}, err
}
+ bloomBuilder, err := ingestion.buildBloom(entries)
+ if err != nil {
+ return Result{}, err
+ }
+
+ bloomTmp, err := ingestion.writeTemp("tmp_bloom_", func(w io.Writer) error {
+ _, err := bloomBuilder.WriteTo(w)
+
+ return err
+ })
+ if err != nil {
+ return Result{}, err
+ }
+
base := "pack-" + ingestion.packHash.String()
packFinal := base + ".pack"
idxFinal := base + ".idx"
revFinal := base + ".rev"
+ bloomFinal := base + ".bloom"
- // Link the pack and reverse index before the index,
+ // Link the pack, reverse index, and Bloom filter before the index,
// since the index is what publishes the pack to readers.
err = ingestion.link(ingestion.packTmp, packFinal)
if err != nil {
@@ -55,6 +71,11 @@ func (ingestion *ingestion) finalize() (Result, error) {
return Result{}, err
}
+ err = ingestion.link(bloomTmp, bloomFinal)
+ if err != nil {
+ return Result{}, err
+ }
+
err = ingestion.link(idxTmp, idxFinal)
if err != nil {
return Result{}, err
@@ -69,12 +90,33 @@ func (ingestion *ingestion) finalize() (Result, error) {
PackName: packFinal,
IdxName: idxFinal,
RevName: revFinal,
+ BloomName: bloomFinal,
PackHash: ingestion.packHash,
ObjectCount: objectCount,
ThinFixed: ingestion.thinFixed,
}, nil
}
+// buildBloom builds a Bloom filter over the index entries' object IDs.
+func (ingestion *ingestion) buildBloom(entries []packidx.Entry) (*bloom.Builder, error) {
+ bucketCount, k, err := bloom.RecommendParams(ingestion.objectFormat, len(entries))
+ if err != nil {
+ return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err)
+ }
+
+ builder, err := bloom.NewBuilder(ingestion.objectFormat, bucketCount, k)
+ if err != nil {
+ return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err)
+ }
+
+ size := ingestion.objectFormat.Size()
+ for i := range entries {
+ builder.Add(entries[i].OID[:size])
+ }
+
+ return builder, nil
+}
+
// indexEntries returns the index entries in object-ID order
// and, for each record in pack order, its position in that index order.
func (ingestion *ingestion) indexEntries() ([]packidx.Entry, []uint32, error) {
diff --git a/object/store/packed/internal/ingest/result.go b/object/store/packed/internal/ingest/result.go
index 0ae5593a..9cd6ef1d 100644
--- a/object/store/packed/internal/ingest/result.go
+++ b/object/store/packed/internal/ingest/result.go
@@ -13,6 +13,9 @@ type Result struct {
// RevName is the destination-relative name of the written reverse index.
RevName string
+ // BloomName is the destination-relative name of the written Bloom filter.
+ BloomName string
+
// PackHash is the pack trailer hash
// shared by the pack, index, and reverse index.
PackHash id.ObjectID
diff --git a/object/store/packed/quarantine.go b/object/store/packed/quarantine.go
index 5e0b85cb..977a9543 100644
--- a/object/store/packed/quarantine.go
+++ b/object/store/packed/quarantine.go
@@ -156,6 +156,8 @@ func packPromotionPriority(name string) int {
return 1
case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".rev"):
return 2
+ case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".bloom"):
+ return 2
case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".idx"):
return 3
default: