aboutsummaryrefslogtreecommitdiff
path: root/format/commitgraph/bloom
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-03-26 09:17:14 +0000
committerGravatar Runxi Yu2026-03-26 09:18:30 +0000
commit3e884f5f3d42cbc4874a04da31dde10314b0cfad (patch)
treef5e1e325fd1a2a0801791c054010213214475d80 /format/commitgraph/bloom
parentnetwork/receivepack: Rename from receivepack (diff)
signatureNo signature
format: Move commitgraph and packfile here
Diffstat (limited to 'format/commitgraph/bloom')
-rw-r--r--format/commitgraph/bloom/bloom.go3
-rw-r--r--format/commitgraph/bloom/constants.go8
-rw-r--r--format/commitgraph/bloom/contain.go25
-rw-r--r--format/commitgraph/bloom/errors.go5
-rw-r--r--format/commitgraph/bloom/filter.go26
-rw-r--r--format/commitgraph/bloom/key.go117
-rw-r--r--format/commitgraph/bloom/murmur.go127
-rw-r--r--format/commitgraph/bloom/settings.go50
8 files changed, 361 insertions, 0 deletions
diff --git a/format/commitgraph/bloom/bloom.go b/format/commitgraph/bloom/bloom.go
new file mode 100644
index 00000000..9653d595
--- /dev/null
+++ b/format/commitgraph/bloom/bloom.go
@@ -0,0 +1,3 @@
+// Package bloom provides a bloom filter implementation used for changed-path
+// filters in Git commit graphs.
+package bloom
diff --git a/format/commitgraph/bloom/constants.go b/format/commitgraph/bloom/constants.go
new file mode 100644
index 00000000..958e551e
--- /dev/null
+++ b/format/commitgraph/bloom/constants.go
@@ -0,0 +1,8 @@
+package bloom
+
+const (
+ // DataHeaderSize is the size of the BDAT header in commit-graph files.
+ DataHeaderSize = 3 * 4
+ // DefaultMaxChange matches Git's default max-changed-paths behavior.
+ DefaultMaxChange = 512
+)
diff --git a/format/commitgraph/bloom/contain.go b/format/commitgraph/bloom/contain.go
new file mode 100644
index 00000000..331b7687
--- /dev/null
+++ b/format/commitgraph/bloom/contain.go
@@ -0,0 +1,25 @@
+package bloom
+
+// MightContain reports whether the Bloom filter may contain the given path.
+//
+// Evaluated against the full path and each of its directory prefixes. A true
+// result indicates a possible match; false means the path definitely did not
+// change.
+func (f *Filter) MightContain(path []byte) (bool, error) {
+ if len(f.Data) == 0 {
+ return false, nil
+ }
+
+ keys, err := keyvec(path, f)
+ if err != nil {
+ return false, err
+ }
+
+ for i := range keys {
+ if filterContainsKey(f, keys[i]) {
+ return true, nil
+ }
+ }
+
+ return false, nil
+}
diff --git a/format/commitgraph/bloom/errors.go b/format/commitgraph/bloom/errors.go
new file mode 100644
index 00000000..fe38d1bc
--- /dev/null
+++ b/format/commitgraph/bloom/errors.go
@@ -0,0 +1,5 @@
+package bloom
+
+import "errors"
+
+var ErrInvalid = errors.New("bloom: invalid data")
diff --git a/format/commitgraph/bloom/filter.go b/format/commitgraph/bloom/filter.go
new file mode 100644
index 00000000..395dd5ce
--- /dev/null
+++ b/format/commitgraph/bloom/filter.go
@@ -0,0 +1,26 @@
+package bloom
+
+// Filter represents a changed-paths Bloom filter associated with a commit.
+//
+// The filter encodes which paths changed between a commit and its first
+// parent. Paths are expected to be in Git's slash-separated form and
+// are queried using a path and its prefixes (e.g. "a/b/c", "a/b", "a").
+type Filter struct {
+ Data []byte
+
+ HashVersion uint32
+ NumHashes uint32
+ BitsPerEntry uint32
+ MaxChangePaths uint32
+}
+
+// NewFilter constructs one query-ready bloom filter from raw data/settings.
+func NewFilter(data []byte, settings Settings) Filter {
+ return Filter{
+ Data: data,
+ HashVersion: settings.HashVersion,
+ NumHashes: settings.NumHashes,
+ BitsPerEntry: settings.BitsPerEntry,
+ MaxChangePaths: settings.MaxChangePaths,
+ }
+}
diff --git a/format/commitgraph/bloom/key.go b/format/commitgraph/bloom/key.go
new file mode 100644
index 00000000..a15df904
--- /dev/null
+++ b/format/commitgraph/bloom/key.go
@@ -0,0 +1,117 @@
+package bloom
+
+import "codeberg.org/lindenii/furgit/internal/intconv"
+
+type key struct {
+ hashes []uint32
+}
+
+func keyvec(path []byte, filter *Filter) ([]key, error) {
+ if len(path) == 0 {
+ return nil, nil
+ }
+
+ count := 1
+
+ for _, b := range path {
+ if b == '/' {
+ count++
+ }
+ }
+
+ keys := make([]key, 0, count)
+
+ full, err := keyFill(path, filter)
+ if err != nil {
+ return nil, err
+ }
+
+ keys = append(keys, full)
+
+ for i := len(path) - 1; i >= 0; i-- {
+ if path[i] == '/' {
+ k, err := keyFill(path[:i], filter)
+ if err != nil {
+ return nil, err
+ }
+
+ keys = append(keys, k)
+ }
+ }
+
+ return keys, nil
+}
+
+func keyFill(path []byte, filter *Filter) (key, error) {
+ const (
+ seed0 = 0x293ae76f
+ seed1 = 0x7e646e2c
+ )
+
+ var (
+ h0 uint32
+ h1 uint32
+ err error
+ )
+
+ switch filter.HashVersion {
+ case 2:
+ h0, err = murmur3SeededV2(seed0, path)
+ if err != nil {
+ return key{}, err
+ }
+
+ h1, err = murmur3SeededV2(seed1, path)
+ if err != nil {
+ return key{}, err
+ }
+ case 1:
+ h0, err = murmur3SeededV1(seed0, path)
+ if err != nil {
+ return key{}, err
+ }
+
+ h1, err = murmur3SeededV1(seed1, path)
+ if err != nil {
+ return key{}, err
+ }
+ default:
+ return key{}, ErrInvalid
+ }
+
+ hashCount, err := intconv.Uint32ToInt(filter.NumHashes)
+ if err != nil {
+ return key{}, ErrInvalid
+ }
+
+ hashes := make([]uint32, hashCount)
+ for i := range hashCount {
+ iU32, err := intconv.IntToUint32(i)
+ if err != nil {
+ return key{}, ErrInvalid
+ }
+
+ hashes[i] = h0 + iU32*h1
+ }
+
+ return key{hashes: hashes}, nil
+}
+
+func filterContainsKey(filter *Filter, key key) bool {
+ if len(filter.Data) == 0 {
+ return false
+ }
+
+ mod := uint64(len(filter.Data)) * 8
+ for _, h := range key.hashes {
+ idx := uint64(h) % mod
+ bytePos := idx / 8
+
+ bit := byte(1 << (idx & 7))
+ if filter.Data[bytePos]&bit == 0 {
+ return false
+ }
+ }
+
+ return true
+}
diff --git a/format/commitgraph/bloom/murmur.go b/format/commitgraph/bloom/murmur.go
new file mode 100644
index 00000000..363b63ae
--- /dev/null
+++ b/format/commitgraph/bloom/murmur.go
@@ -0,0 +1,127 @@
+package bloom
+
+import "codeberg.org/lindenii/furgit/internal/intconv"
+
+func murmur3SeededV2(seed uint32, data []byte) (uint32, error) {
+ const (
+ c1 = 0xcc9e2d51
+ c2 = 0x1b873593
+ r1 = 15
+ r2 = 13
+ m = 5
+ n = 0xe6546b64
+ )
+
+ h := seed
+
+ nblocks := len(data) / 4
+ for i := range nblocks {
+ k := uint32(data[4*i]) |
+ (uint32(data[4*i+1]) << 8) |
+ (uint32(data[4*i+2]) << 16) |
+ (uint32(data[4*i+3]) << 24)
+ k *= c1
+ k = (k << r1) | (k >> (32 - r1))
+ k *= c2
+
+ h ^= k
+ h = (h << r2) | (h >> (32 - r2))
+ h = h*m + n
+ }
+
+ var k1 uint32
+
+ tail := data[nblocks*4:]
+ switch len(tail) & 3 {
+ case 3:
+ k1 ^= uint32(tail[2]) << 16
+
+ fallthrough
+ case 2:
+ k1 ^= uint32(tail[1]) << 8
+
+ fallthrough
+ case 1:
+ k1 ^= uint32(tail[0])
+ k1 *= c1
+ k1 = (k1 << r1) | (k1 >> (32 - r1))
+ k1 *= c2
+ h ^= k1
+ }
+
+ dataLen, err := intconv.IntToUint32(len(data))
+ if err != nil {
+ return 0, err
+ }
+
+ h ^= dataLen
+ h ^= h >> 16
+ h *= 0x85ebca6b
+ h ^= h >> 13
+ h *= 0xc2b2ae35
+ h ^= h >> 16
+
+ return h, nil
+}
+
+func murmur3SeededV1(seed uint32, data []byte) (uint32, error) {
+ const (
+ c1 = 0xcc9e2d51
+ c2 = 0x1b873593
+ r1 = 15
+ r2 = 13
+ m = 5
+ n = 0xe6546b64
+ )
+
+ h := seed
+
+ nblocks := len(data) / 4
+ for i := range nblocks {
+ k := intconv.SignExtendByteToUint32(data[4*i]) |
+ (intconv.SignExtendByteToUint32(data[4*i+1]) << 8) |
+ (intconv.SignExtendByteToUint32(data[4*i+2]) << 16) |
+ (intconv.SignExtendByteToUint32(data[4*i+3]) << 24)
+ k *= c1
+ k = (k << r1) | (k >> (32 - r1))
+ k *= c2
+
+ h ^= k
+ h = (h << r2) | (h >> (32 - r2))
+ h = h*m + n
+ }
+
+ var k1 uint32
+
+ tail := data[nblocks*4:]
+ switch len(tail) & 3 {
+ case 3:
+ k1 ^= intconv.SignExtendByteToUint32(tail[2]) << 16
+
+ fallthrough
+ case 2:
+ k1 ^= intconv.SignExtendByteToUint32(tail[1]) << 8
+
+ fallthrough
+ case 1:
+ k1 ^= intconv.SignExtendByteToUint32(tail[0])
+ k1 *= c1
+ k1 = (k1 << r1) | (k1 >> (32 - r1))
+ k1 *= c2
+ h ^= k1
+ }
+
+ dataLen, err := intconv.IntToUint32(len(data))
+ if err != nil {
+ return 0, err
+ }
+
+ h ^= dataLen
+ h ^= h >> 16
+ h *= 0x85ebca6b
+ h ^= h >> 13
+ h *= 0xc2b2ae35
+ h ^= h >> 16
+
+ return h, nil
+}
diff --git a/format/commitgraph/bloom/settings.go b/format/commitgraph/bloom/settings.go
new file mode 100644
index 00000000..764653bd
--- /dev/null
+++ b/format/commitgraph/bloom/settings.go
@@ -0,0 +1,50 @@
+package bloom
+
+import (
+ "encoding/binary"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+)
+
+// Settings describe the changed-paths Bloom filter parameters stored in
+// commit-graph BDAT chunks.
+//
+// Obviously, they must match the repository's commit-graph settings to
+// interpret filters correctly.
+type Settings struct {
+ HashVersion uint32
+ NumHashes uint32
+ BitsPerEntry uint32
+ MaxChangePaths uint32
+}
+
+// ParseSettings reads Bloom filter settings from a BDAT chunk header.
+func ParseSettings(bdat []byte) (*Settings, error) {
+ if len(bdat) < DataHeaderSize {
+ return nil, ErrInvalid
+ }
+
+ settings := &Settings{
+ HashVersion: binary.BigEndian.Uint32(bdat[0:4]),
+ NumHashes: binary.BigEndian.Uint32(bdat[4:8]),
+ BitsPerEntry: binary.BigEndian.Uint32(bdat[8:12]),
+ MaxChangePaths: DefaultMaxChange,
+ }
+
+ switch settings.HashVersion {
+ case 1, 2:
+ default:
+ return nil, ErrInvalid
+ }
+
+ if settings.NumHashes == 0 {
+ return nil, ErrInvalid
+ }
+
+ _, err := intconv.Uint32ToInt(settings.NumHashes)
+ if err != nil {
+ return nil, ErrInvalid
+ }
+
+ return settings, nil
+}