diff options
Diffstat (limited to 'format/commitgraph')
| -rw-r--r-- | format/commitgraph/bloom/bloom.go | 3 | ||||
| -rw-r--r-- | format/commitgraph/bloom/constants.go | 8 | ||||
| -rw-r--r-- | format/commitgraph/bloom/contain.go | 29 | ||||
| -rw-r--r-- | format/commitgraph/bloom/errors.go | 5 | ||||
| -rw-r--r-- | format/commitgraph/bloom/filter.go | 11 | ||||
| -rw-r--r-- | format/commitgraph/bloom/key.go | 106 | ||||
| -rw-r--r-- | format/commitgraph/bloom/murmur.go | 127 | ||||
| -rw-r--r-- | format/commitgraph/bloom/settings.go | 31 |
8 files changed, 320 insertions, 0 deletions
diff --git a/format/commitgraph/bloom/bloom.go b/format/commitgraph/bloom/bloom.go new file mode 100644 index 00000000..9653d595 --- /dev/null +++ b/format/commitgraph/bloom/bloom.go @@ -0,0 +1,3 @@ +// Package bloom provides a bloom filter implementation used for changed-path +// filters in Git commit graphs. +package bloom diff --git a/format/commitgraph/bloom/constants.go b/format/commitgraph/bloom/constants.go new file mode 100644 index 00000000..958e551e --- /dev/null +++ b/format/commitgraph/bloom/constants.go @@ -0,0 +1,8 @@ +package bloom + +const ( + // DataHeaderSize is the size of the BDAT header in commit-graph files. + DataHeaderSize = 3 * 4 + // DefaultMaxChange matches Git's default max-changed-paths behavior. + DefaultMaxChange = 512 +) diff --git a/format/commitgraph/bloom/contain.go b/format/commitgraph/bloom/contain.go new file mode 100644 index 00000000..4789b321 --- /dev/null +++ b/format/commitgraph/bloom/contain.go @@ -0,0 +1,29 @@ +package bloom + +// MightContain reports whether the Bloom filter may contain the given path. +// +// Evaluated against the full path and each of its directory prefixes. A true +// result indicates a possible match; false means the path definitely did not +// change. +func (f *Filter) MightContain(path []byte, settings *Settings) (bool, error) { + if f == nil || settings == nil { + return false, nil + } + + if len(f.Data) == 0 { + return false, nil + } + + keys, err := keyvec(path, settings) + if err != nil { + return false, err + } + + for i := range keys { + if filterContainsKey(f, &keys[i], settings) { + return true, nil + } + } + + return false, nil +} diff --git a/format/commitgraph/bloom/errors.go b/format/commitgraph/bloom/errors.go new file mode 100644 index 00000000..fe38d1bc --- /dev/null +++ b/format/commitgraph/bloom/errors.go @@ -0,0 +1,5 @@ +package bloom + +import "errors" + +var ErrInvalid = errors.New("bloom: invalid data") diff --git a/format/commitgraph/bloom/filter.go b/format/commitgraph/bloom/filter.go new file mode 100644 index 00000000..f56e9ba3 --- /dev/null +++ b/format/commitgraph/bloom/filter.go @@ -0,0 +1,11 @@ +package bloom + +// Filter represents a changed-paths Bloom filter associated with a commit. +// +// The filter encodes which paths changed between a commit and its first +// parent. Paths are expected to be in Git's slash-separated form and +// are queried using a path and its prefixes (e.g. "a/b/c", "a/b", "a"). +type Filter struct { + Data []byte + Version uint32 +} diff --git a/format/commitgraph/bloom/key.go b/format/commitgraph/bloom/key.go new file mode 100644 index 00000000..6e49959d --- /dev/null +++ b/format/commitgraph/bloom/key.go @@ -0,0 +1,106 @@ +package bloom + +type key struct { + hashes []uint32 +} + +func keyvec(path []byte, settings *Settings) ([]key, error) { + if len(path) == 0 { + return nil, nil + } + + count := 1 + + for _, b := range path { + if b == '/' { + count++ + } + } + + keys := make([]key, 0, count) + + full, err := keyFill(path, settings) + if err != nil { + return nil, err + } + + keys = append(keys, full) + + for i := len(path) - 1; i >= 0; i-- { + if path[i] == '/' { + k, err := keyFill(path[:i], settings) + if err != nil { + return nil, err + } + + keys = append(keys, k) + } + } + + return keys, nil +} + +func keyFill(path []byte, settings *Settings) (key, error) { + const ( + seed0 = 0x293ae76f + seed1 = 0x7e646e2c + ) + + var ( + h0 uint32 + h1 uint32 + err error + ) + + if settings.HashVersion == 2 { //nolint:nestif + h0, err = murmur3SeededV2(seed0, path) + if err != nil { + return key{}, err + } + + h1, err = murmur3SeededV2(seed1, path) + if err != nil { + return key{}, err + } + } else { + h0, err = murmur3SeededV1(seed0, path) + if err != nil { + return key{}, err + } + + h1, err = murmur3SeededV1(seed1, path) + if err != nil { + return key{}, err + } + } + + hashes := make([]uint32, settings.NumHashes) + for i := range settings.NumHashes { + hashes[i] = h0 + i*h1 + } + + return key{hashes: hashes}, nil +} + +func filterContainsKey(filter *Filter, key *key, settings *Settings) bool { + if filter == nil || key == nil || settings == nil { + return false + } + + if len(filter.Data) == 0 { + return false + } + + mod := uint64(len(filter.Data)) * 8 + for _, h := range key.hashes { + idx := uint64(h) % mod + bytePos := idx / 8 + + bit := byte(1 << (idx & 7)) + if filter.Data[bytePos]&bit == 0 { + return false + } + } + + return true +} diff --git a/format/commitgraph/bloom/murmur.go b/format/commitgraph/bloom/murmur.go new file mode 100644 index 00000000..363b63ae --- /dev/null +++ b/format/commitgraph/bloom/murmur.go @@ -0,0 +1,127 @@ +package bloom + +import "codeberg.org/lindenii/furgit/internal/intconv" + +func murmur3SeededV2(seed uint32, data []byte) (uint32, error) { + const ( + c1 = 0xcc9e2d51 + c2 = 0x1b873593 + r1 = 15 + r2 = 13 + m = 5 + n = 0xe6546b64 + ) + + h := seed + + nblocks := len(data) / 4 + for i := range nblocks { + k := uint32(data[4*i]) | + (uint32(data[4*i+1]) << 8) | + (uint32(data[4*i+2]) << 16) | + (uint32(data[4*i+3]) << 24) + k *= c1 + k = (k << r1) | (k >> (32 - r1)) + k *= c2 + + h ^= k + h = (h << r2) | (h >> (32 - r2)) + h = h*m + n + } + + var k1 uint32 + + tail := data[nblocks*4:] + switch len(tail) & 3 { + case 3: + k1 ^= uint32(tail[2]) << 16 + + fallthrough + case 2: + k1 ^= uint32(tail[1]) << 8 + + fallthrough + case 1: + k1 ^= uint32(tail[0]) + k1 *= c1 + k1 = (k1 << r1) | (k1 >> (32 - r1)) + k1 *= c2 + h ^= k1 + } + + dataLen, err := intconv.IntToUint32(len(data)) + if err != nil { + return 0, err + } + + h ^= dataLen + h ^= h >> 16 + h *= 0x85ebca6b + h ^= h >> 13 + h *= 0xc2b2ae35 + h ^= h >> 16 + + return h, nil +} + +func murmur3SeededV1(seed uint32, data []byte) (uint32, error) { + const ( + c1 = 0xcc9e2d51 + c2 = 0x1b873593 + r1 = 15 + r2 = 13 + m = 5 + n = 0xe6546b64 + ) + + h := seed + + nblocks := len(data) / 4 + for i := range nblocks { + k := intconv.SignExtendByteToUint32(data[4*i]) | + (intconv.SignExtendByteToUint32(data[4*i+1]) << 8) | + (intconv.SignExtendByteToUint32(data[4*i+2]) << 16) | + (intconv.SignExtendByteToUint32(data[4*i+3]) << 24) + k *= c1 + k = (k << r1) | (k >> (32 - r1)) + k *= c2 + + h ^= k + h = (h << r2) | (h >> (32 - r2)) + h = h*m + n + } + + var k1 uint32 + + tail := data[nblocks*4:] + switch len(tail) & 3 { + case 3: + k1 ^= intconv.SignExtendByteToUint32(tail[2]) << 16 + + fallthrough + case 2: + k1 ^= intconv.SignExtendByteToUint32(tail[1]) << 8 + + fallthrough + case 1: + k1 ^= intconv.SignExtendByteToUint32(tail[0]) + k1 *= c1 + k1 = (k1 << r1) | (k1 >> (32 - r1)) + k1 *= c2 + h ^= k1 + } + + dataLen, err := intconv.IntToUint32(len(data)) + if err != nil { + return 0, err + } + + h ^= dataLen + h ^= h >> 16 + h *= 0x85ebca6b + h ^= h >> 13 + h *= 0xc2b2ae35 + h ^= h >> 16 + + return h, nil +} diff --git a/format/commitgraph/bloom/settings.go b/format/commitgraph/bloom/settings.go new file mode 100644 index 00000000..5aa122a9 --- /dev/null +++ b/format/commitgraph/bloom/settings.go @@ -0,0 +1,31 @@ +package bloom + +import "encoding/binary" + +// Settings describe the changed-paths Bloom filter parameters stored in +// commit-graph BDAT chunks. +// +// Obviously, they must match the repository's commit-graph settings to +// interpret filters correctly. +type Settings struct { + HashVersion uint32 + NumHashes uint32 + BitsPerEntry uint32 + MaxChangePaths uint32 +} + +// ParseSettings reads Bloom filter settings from a BDAT chunk header. +func ParseSettings(bdat []byte) (*Settings, error) { + if len(bdat) < DataHeaderSize { + return nil, ErrInvalid + } + + settings := &Settings{ + HashVersion: binary.BigEndian.Uint32(bdat[0:4]), + NumHashes: binary.BigEndian.Uint32(bdat[4:8]), + BitsPerEntry: binary.BigEndian.Uint32(bdat[8:12]), + MaxChangePaths: DefaultMaxChange, + } + + return settings, nil +} |
