diff options
Diffstat (limited to 'object')
| -rw-r--r-- | object/store/packed/internal/ingest/finalize.go | 45 | ||||
| -rw-r--r-- | object/store/packed/internal/ingest/result.go | 3 | ||||
| -rw-r--r-- | object/store/packed/internal/ingest/writepack_test.go | 77 | ||||
| -rw-r--r-- | object/store/packed/lookup.go | 4 | ||||
| -rw-r--r-- | object/store/packed/pack.go | 52 | ||||
| -rw-r--r-- | object/store/packed/quarantine.go | 2 | ||||
| -rw-r--r-- | object/tree/malformed_test.go | 1 |
7 files changed, 175 insertions, 9 deletions
diff --git a/object/store/packed/internal/ingest/finalize.go b/object/store/packed/internal/ingest/finalize.go index f0ab6622..afed996c 100644 --- a/object/store/packed/internal/ingest/finalize.go +++ b/object/store/packed/internal/ingest/finalize.go @@ -8,6 +8,7 @@ import ( "slices" "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/format/packrev" "lindenii.org/go/furgit/object/id" "lindenii.org/go/lgo/intconv" @@ -38,12 +39,27 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, err } + bloomBuilder, err := ingestion.buildBloom(entries, packHash) + if err != nil { + return Result{}, err + } + + bloomTmp, err := ingestion.writeTemp("tmp_bloom_", func(w io.Writer) error { + _, err := w.Write(bloomBuilder.Bytes()) + + return err + }) + if err != nil { + return Result{}, err + } + base := "pack-" + ingestion.packHash.String() packFinal := base + ".pack" idxFinal := base + ".idx" revFinal := base + ".rev" + bloomFinal := base + ".bloom" - // Link the pack and reverse index before the index, + // Link the pack, reverse index, and Bloom filter before the index, // since the index is what publishes the pack to readers. err = ingestion.link(ingestion.packTmp, packFinal) if err != nil { @@ -55,6 +71,11 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, err } + err = ingestion.link(bloomTmp, bloomFinal) + if err != nil { + return Result{}, err + } + err = ingestion.link(idxTmp, idxFinal) if err != nil { return Result{}, err @@ -69,12 +90,34 @@ func (ingestion *ingestion) finalize() (Result, error) { PackName: packFinal, IdxName: idxFinal, RevName: revFinal, + BloomName: bloomFinal, PackHash: ingestion.packHash, ObjectCount: objectCount, ThinFixed: ingestion.thinFixed, }, nil } +// buildBloom builds a Bloom filter over the index entries' object IDs, +// bound to packHash. +func (ingestion *ingestion) buildBloom(entries []packidx.Entry, packHash []byte) (*bloom.Builder, error) { + bucketCount, k, err := bloom.RecommendParams(ingestion.objectFormat, len(entries)) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + builder, err := bloom.NewBuilder(ingestion.objectFormat, bucketCount, k, packHash) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + size := ingestion.objectFormat.Size() + for i := range entries { + builder.Add(entries[i].OID[:size]) + } + + return builder, nil +} + // indexEntries returns the index entries in object-ID order // and, for each record in pack order, its position in that index order. func (ingestion *ingestion) indexEntries() ([]packidx.Entry, []uint32, error) { diff --git a/object/store/packed/internal/ingest/result.go b/object/store/packed/internal/ingest/result.go index 0ae5593a..9cd6ef1d 100644 --- a/object/store/packed/internal/ingest/result.go +++ b/object/store/packed/internal/ingest/result.go @@ -13,6 +13,9 @@ type Result struct { // RevName is the destination-relative name of the written reverse index. RevName string + // BloomName is the destination-relative name of the written Bloom filter. + BloomName string + // PackHash is the pack trailer hash // shared by the pack, index, and reverse index. PackHash id.ObjectID diff --git a/object/store/packed/internal/ingest/writepack_test.go b/object/store/packed/internal/ingest/writepack_test.go index 394d8f6e..adc0ba35 100644 --- a/object/store/packed/internal/ingest/writepack_test.go +++ b/object/store/packed/internal/ingest/writepack_test.go @@ -8,6 +8,8 @@ import ( "path/filepath" "testing" + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/testgit" "lindenii.org/go/furgit/object/id" "lindenii.org/go/furgit/object/store" @@ -89,6 +91,81 @@ func TestWritePackMatchesGit(t *testing.T) { } } +// TestWritePackBloom verifies that ingesting a pack writes a Bloom filter +// that reports every object in the pack as present. +func TestWritePackBloom(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat}) + if err != nil { + t.Fatalf("NewRepo: %v", err) + } + + seeded, err := repo.SeedHistory(t) + if err != nil { + t.Fatalf("SeedHistory: %v", err) + } + + gitPrefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{ + RevIndex: true, + Revs: false, + Exclude: nil, + }) + if err != nil { + t.Fatalf("PackObjects: %v", err) + } + + stream, err := os.ReadFile(gitPrefix + ".pack") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile pack: %v", err) + } + + dir, result := writePack(t, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + ThinBase: nil, + Progress: nil, + }) + + if result.BloomName == "" { + t.Fatal("BloomName is empty") + } + + bloomBytes, err := os.ReadFile(filepath.Join(dir, result.BloomName)) //nolint:gosec + if err != nil { + t.Fatalf("ReadFile bloom: %v", err) + } + + filter, err := bloom.Parse(bloomBytes, objectFormat) + if err != nil { + t.Fatalf("bloom.Parse: %v", err) + } + + idxBytes, err := os.ReadFile(filepath.Join(dir, result.IdxName)) //nolint:gosec + if err != nil { + t.Fatalf("ReadFile idx: %v", err) + } + + index, err := packidx.Parse(idxBytes, objectFormat.Size()) + if err != nil { + t.Fatalf("packidx.Parse: %v", err) + } + + if !bytes.Equal(filter.PackHash(), index.PackHash()) { + t.Fatalf("filter pack hash %x, want %x", filter.PackHash(), index.PackHash()) + } + + for pos := range index.NumObjects() { + if !filter.MayContain(index.OIDAt(pos)) { + t.Fatalf("filter rejects object at index position %d", pos) + } + } + }) + } +} + // TestWritePackEmpty verifies that a zero-object pack // succeeds without writing any artifacts. func TestWritePackEmpty(t *testing.T) { diff --git a/object/store/packed/lookup.go b/object/store/packed/lookup.go index e54d34b2..e06870a9 100644 --- a/object/store/packed/lookup.go +++ b/object/store/packed/lookup.go @@ -24,6 +24,10 @@ func (packed *Packed) lookup(objectID id.ObjectID) (*pack, int, error) { oid := objectID.RawBytes() for _, p := range packed.order.Keys() { + if p.filter != nil && !p.filter.MayContain(oid) { + continue + } + offsetU, found, err := p.idx.Lookup(oid) if err != nil { return nil, 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go index dd43bc7a..9cd6162b 100644 --- a/object/store/packed/pack.go +++ b/object/store/packed/pack.go @@ -8,6 +8,7 @@ import ( "lindenii.org/go/furgit/internal/format/packfile" "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/mmap" "lindenii.org/go/furgit/object/id" "lindenii.org/go/lgo/intconv" @@ -36,6 +37,9 @@ type pack struct { // and data aliases them. dataMapping *mmap.Mmap data []byte + + bloomMapping *mmap.Mmap + filter *bloom.Bloom } // openPack opens, maps, and validates @@ -69,15 +73,41 @@ func openPack(root *os.Root, name string, objectFormat id.ObjectFormat) (*pack, return nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, name, err) } + bloomMapping, filter := openBloom(root, name, objectFormat, idx.PackHash()) + return &pack{ - name: name, - idxMapping: idxMapping, - idx: idx, - dataMapping: dataMapping, - data: dataMapping.Data(), + name: name, + idxMapping: idxMapping, + idx: idx, + dataMapping: dataMapping, + data: dataMapping.Data(), + bloomMapping: bloomMapping, + filter: filter, }, nil } +func openBloom(root *os.Root, name string, objectFormat id.ObjectFormat, packHash []byte) (*mmap.Mmap, *bloom.Bloom) { + mapping, err := mapFile(root, name+".bloom") + if err != nil { + return nil, nil + } + + filter, err := bloom.Parse(mapping.Data(), objectFormat) + if err != nil { + _ = mapping.Close() + + return nil, nil + } + + if !bytes.Equal(filter.PackHash(), packHash) { + _ = mapping.Close() + + return nil, nil + } + + return mapping, &filter +} + // mapFile opens and maps one file under root. func mapFile(root *os.Root, name string) (*mmap.Mmap, error) { file, err := root.Open(name) @@ -125,10 +155,16 @@ func validatePackData(data []byte, idx *packidx.Packidx, hashSize int) error { return nil } -// close releases the pack data and index mappings. +// close releases the pack data, index, and filter mappings. func (pack *pack) close() error { - return errors.Join( + errs := []error{ pack.dataMapping.Close(), pack.idxMapping.Close(), - ) + } + + if pack.bloomMapping != nil { + errs = append(errs, pack.bloomMapping.Close()) + } + + return errors.Join(errs...) } diff --git a/object/store/packed/quarantine.go b/object/store/packed/quarantine.go index 5e0b85cb..977a9543 100644 --- a/object/store/packed/quarantine.go +++ b/object/store/packed/quarantine.go @@ -156,6 +156,8 @@ func packPromotionPriority(name string) int { return 1 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".rev"): return 2 + case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".bloom"): + return 2 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".idx"): return 3 default: diff --git a/object/tree/malformed_test.go b/object/tree/malformed_test.go index ca00ea94..8a22b90f 100644 --- a/object/tree/malformed_test.go +++ b/object/tree/malformed_test.go @@ -44,6 +44,7 @@ func TestParseMalformed(t *testing.T) { {name: "unsorted", body: append(record("100644", "b", size), record("100644", "a", size)...)}, {name: "duplicate", body: append(record("100644", "a", size), record("100644", "a", size)...)}, {name: "conflicting-tree-blob", body: append(record("100644", "foo", size), record("40000", "foo", size)...)}, + {name: "conflicting-tree-blob-nonadjacent", body: append(append(record("100644", "foo", size), record("100644", "foo.c", size)...), record("40000", "foo", size)...)}, } { t.Run(tc.name, func(t *testing.T) { t.Parallel() |
