aboutsummaryrefslogtreecommitdiff
path: root/internal/bloom
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-02-20 19:06:13 +0800
committerGravatar Runxi Yu2026-02-20 19:07:14 +0800
commitaa513c069c1418734aea894dc944e27c6a78a3bb (patch)
tree687f0a11bb550fa088fd82a98ceb8979bbc35f69 /internal/bloom
parentComment on prior reverts removing the pack writing API (diff)
signatureNo signature
Delete everything, I'm redesigning this.
I'll stop using a flat package and make things much more modular. And also experiment with streaming APIs so large blobs don't OOM us.
Diffstat (limited to 'internal/bloom')
-rw-r--r--internal/bloom/bloom.go236
-rw-r--r--internal/bloom/errors.go5
2 files changed, 0 insertions, 241 deletions
diff --git a/internal/bloom/bloom.go b/internal/bloom/bloom.go
deleted file mode 100644
index c1a116d6..00000000
--- a/internal/bloom/bloom.go
+++ /dev/null
@@ -1,236 +0,0 @@
-// Package bloom provides a bloom filter implementation used for changed-path
-// filters in Git commit graphs.
-package bloom
-
-import "encoding/binary"
-
-const (
- // DataHeaderSize is the size of the BDAT header in commit-graph files.
- DataHeaderSize = 3 * 4
- // DefaultMaxChange matches Git's default max-changed-paths behavior.
- DefaultMaxChange = 512
-)
-
-// Settings describe the changed-paths Bloom filter parameters stored in
-// commit-graph BDAT chunks.
-//
-// Obviously, they must match the repository's commit-graph settings to
-// interpret filters correctly.
-type Settings struct {
- HashVersion uint32
- NumHashes uint32
- BitsPerEntry uint32
- MaxChangePaths uint32
-}
-
-// Filter represents a changed-paths Bloom filter associated with a commit.
-//
-// The filter encodes which paths changed between a commit and its first
-// parent. Paths are expected to be in Git's slash-separated form and
-// are queried using a path and its prefixes (e.g. "a/b/c", "a/b", "a").
-type Filter struct {
- Data []byte
- Version uint32
-}
-
-// ParseSettings reads Bloom filter settings from a BDAT chunk header.
-func ParseSettings(bdat []byte) (*Settings, error) {
- if len(bdat) < DataHeaderSize {
- return nil, ErrInvalid
- }
- settings := &Settings{
- HashVersion: binary.BigEndian.Uint32(bdat[0:4]),
- NumHashes: binary.BigEndian.Uint32(bdat[4:8]),
- BitsPerEntry: binary.BigEndian.Uint32(bdat[8:12]),
- MaxChangePaths: DefaultMaxChange,
- }
- return settings, nil
-}
-
-// MightContain reports whether the Bloom filter may contain the given path.
-//
-// Evaluated against the full path and each of its directory prefixes. A true
-// result indicates a possible match; false means the path definitely did not
-// change.
-func (f *Filter) MightContain(path []byte, settings *Settings) bool {
- if f == nil || settings == nil {
- return false
- }
- if len(f.Data) == 0 {
- return false
- }
- keys := keyvec(path, settings)
- for i := range keys {
- if filterContainsKey(f, &keys[i], settings) {
- return true
- }
- }
- return false
-}
-
-type key struct {
- hashes []uint32
-}
-
-func keyvec(path []byte, settings *Settings) []key {
- if len(path) == 0 {
- return nil
- }
- count := 1
- for _, b := range path {
- if b == '/' {
- count++
- }
- }
- keys := make([]key, 0, count)
- keys = append(keys, keyFill(path, settings))
- for i := len(path) - 1; i >= 0; i-- {
- if path[i] == '/' {
- keys = append(keys, keyFill(path[:i], settings))
- }
- }
- return keys
-}
-
-func keyFill(path []byte, settings *Settings) key {
- const seed0 = 0x293ae76f
- const seed1 = 0x7e646e2c
- var h0, h1 uint32
- if settings.HashVersion == 2 {
- h0 = murmur3SeededV2(seed0, path)
- h1 = murmur3SeededV2(seed1, path)
- } else {
- h0 = murmur3SeededV1(seed0, path)
- h1 = murmur3SeededV1(seed1, path)
- }
- hashes := make([]uint32, settings.NumHashes)
- for i := uint32(0); i < settings.NumHashes; i++ {
- hashes[i] = h0 + i*h1
- }
- return key{hashes: hashes}
-}
-
-func filterContainsKey(filter *Filter, key *key, settings *Settings) bool {
- if filter == nil || key == nil || settings == nil {
- return false
- }
- if len(filter.Data) == 0 {
- return false
- }
- mod := uint64(len(filter.Data)) * 8
- for _, h := range key.hashes {
- idx := uint64(h) % mod
- bytePos := idx / 8
- bit := byte(1 << (idx & 7))
- if filter.Data[bytePos]&bit == 0 {
- return false
- }
- }
- return true
-}
-
-func murmur3SeededV2(seed uint32, data []byte) uint32 {
- const (
- c1 = 0xcc9e2d51
- c2 = 0x1b873593
- r1 = 15
- r2 = 13
- m = 5
- n = 0xe6546b64
- )
- h := seed
- nblocks := len(data) / 4
- for i := 0; i < nblocks; i++ {
- k := uint32(data[4*i]) |
- (uint32(data[4*i+1]) << 8) |
- (uint32(data[4*i+2]) << 16) |
- (uint32(data[4*i+3]) << 24)
- k *= c1
- k = (k << r1) | (k >> (32 - r1))
- k *= c2
-
- h ^= k
- h = (h << r2) | (h >> (32 - r2))
- h = h*m + n
- }
-
- var k1 uint32
- tail := data[nblocks*4:]
- switch len(tail) & 3 {
- case 3:
- k1 ^= uint32(tail[2]) << 16
- fallthrough
- case 2:
- k1 ^= uint32(tail[1]) << 8
- fallthrough
- case 1:
- k1 ^= uint32(tail[0])
- k1 *= c1
- k1 = (k1 << r1) | (k1 >> (32 - r1))
- k1 *= c2
- h ^= k1
- }
-
- h ^= uint32(len(data))
- h ^= h >> 16
- h *= 0x85ebca6b
- h ^= h >> 13
- h *= 0xc2b2ae35
- h ^= h >> 16
- return h
-}
-
-func murmur3SeededV1(seed uint32, data []byte) uint32 {
- const (
- c1 = 0xcc9e2d51
- c2 = 0x1b873593
- r1 = 15
- r2 = 13
- m = 5
- n = 0xe6546b64
- )
- h := seed
- nblocks := len(data) / 4
- for i := 0; i < nblocks; i++ {
- b0 := int8(data[4*i])
- b1 := int8(data[4*i+1])
- b2 := int8(data[4*i+2])
- b3 := int8(data[4*i+3])
- k := uint32(b0) |
- (uint32(b1) << 8) |
- (uint32(b2) << 16) |
- (uint32(b3) << 24)
- k *= c1
- k = (k << r1) | (k >> (32 - r1))
- k *= c2
-
- h ^= k
- h = (h << r2) | (h >> (32 - r2))
- h = h*m + n
- }
-
- var k1 uint32
- tail := data[nblocks*4:]
- switch len(tail) & 3 {
- case 3:
- k1 ^= uint32(int8(tail[2])) << 16
- fallthrough
- case 2:
- k1 ^= uint32(int8(tail[1])) << 8
- fallthrough
- case 1:
- k1 ^= uint32(int8(tail[0]))
- k1 *= c1
- k1 = (k1 << r1) | (k1 >> (32 - r1))
- k1 *= c2
- h ^= k1
- }
-
- h ^= uint32(len(data))
- h ^= h >> 16
- h *= 0x85ebca6b
- h ^= h >> 13
- h *= 0xc2b2ae35
- h ^= h >> 16
- return h
-}
diff --git a/internal/bloom/errors.go b/internal/bloom/errors.go
deleted file mode 100644
index fe38d1bc..00000000
--- a/internal/bloom/errors.go
+++ /dev/null
@@ -1,5 +0,0 @@
-package bloom
-
-import "errors"
-
-var ErrInvalid = errors.New("bloom: invalid data")