// Package bloom provides a bloom filter implementation used for changed-path
// filters in Git commit graphs.
package bloom

import "encoding/binary"

const (
	// DataHeaderSize is the size of the BDAT header in commit-graph files.
	DataHeaderSize = 3 * 4
	// DefaultMaxChange matches Git's default max-changed-paths behavior.
	DefaultMaxChange = 512
)

// Settings describe the changed-paths Bloom filter parameters stored in
// commit-graph BDAT chunks.
//
// Obviously, they must match the repository's commit-graph settings to
// interpret filters correctly.
type Settings struct {
	HashVersion    uint32
	NumHashes      uint32
	BitsPerEntry   uint32
	MaxChangePaths uint32
}

// Filter represents a changed-paths Bloom filter associated with a commit.
//
// The filter encodes which paths changed between a commit and its first
// parent. Paths are expected to be in Git's slash-separated form and
// are queried using a path and its prefixes (e.g. "a/b/c", "a/b", "a").
type Filter struct {
	Data    []byte
	Version uint32
}

// ParseSettings reads Bloom filter settings from a BDAT chunk header.
func ParseSettings(bdat []byte) (*Settings, error) {
	if len(bdat) < DataHeaderSize {
		return nil, ErrInvalid
	}
	settings := &Settings{
		HashVersion:    binary.BigEndian.Uint32(bdat[0:4]),
		NumHashes:      binary.BigEndian.Uint32(bdat[4:8]),
		BitsPerEntry:   binary.BigEndian.Uint32(bdat[8:12]),
		MaxChangePaths: DefaultMaxChange,
	}
	return settings, nil
}

// MightContain reports whether the Bloom filter may contain the given path.
//
// Evaluated against the full path and each of its directory prefixes. A true
// result indicates a possible match; false means the path definitely did not
// change.
func (f *Filter) MightContain(path []byte, settings *Settings) bool {
	if f == nil || settings == nil {
		return false
	}
	if len(f.Data) == 0 {
		return false
	}
	keys := keyvec(path, settings)
	for i := range keys {
		if filterContainsKey(f, &keys[i], settings) {
			return true
		}
	}
	return false
}

type key struct {
	hashes []uint32
}

func keyvec(path []byte, settings *Settings) []key {
	if len(path) == 0 {
		return nil
	}
	count := 1
	for _, b := range path {
		if b == '/' {
			count++
		}
	}
	keys := make([]key, 0, count)
	keys = append(keys, keyFill(path, settings))
	for i := len(path) - 1; i >= 0; i-- {
		if path[i] == '/' {
			keys = append(keys, keyFill(path[:i], settings))
		}
	}
	return keys
}

func keyFill(path []byte, settings *Settings) key {
	const seed0 = 0x293ae76f
	const seed1 = 0x7e646e2c
	var h0, h1 uint32
	if settings.HashVersion == 2 {
		h0 = murmur3SeededV2(seed0, path)
		h1 = murmur3SeededV2(seed1, path)
	} else {
		h0 = murmur3SeededV1(seed0, path)
		h1 = murmur3SeededV1(seed1, path)
	}
	hashes := make([]uint32, settings.NumHashes)
	for i := uint32(0); i < settings.NumHashes; i++ {
		hashes[i] = h0 + i*h1
	}
	return key{hashes: hashes}
}

func filterContainsKey(filter *Filter, key *key, settings *Settings) bool {
	if filter == nil || key == nil || settings == nil {
		return false
	}
	if len(filter.Data) == 0 {
		return false
	}
	mod := uint64(len(filter.Data)) * 8
	for _, h := range key.hashes {
		idx := uint64(h) % mod
		bytePos := idx / 8
		bit := byte(1 << (idx & 7))
		if filter.Data[bytePos]&bit == 0 {
			return false
		}
	}
	return true
}

func murmur3SeededV2(seed uint32, data []byte) uint32 {
	const (
		c1 = 0xcc9e2d51
		c2 = 0x1b873593
		r1 = 15
		r2 = 13
		m  = 5
		n  = 0xe6546b64
	)
	h := seed
	nblocks := len(data) / 4
	for i := 0; i < nblocks; i++ {
		k := uint32(data[4*i]) |
			(uint32(data[4*i+1]) << 8) |
			(uint32(data[4*i+2]) << 16) |
			(uint32(data[4*i+3]) << 24)
		k *= c1
		k = (k << r1) | (k >> (32 - r1))
		k *= c2

		h ^= k
		h = (h << r2) | (h >> (32 - r2))
		h = h*m + n
	}

	var k1 uint32
	tail := data[nblocks*4:]
	switch len(tail) & 3 {
	case 3:
		k1 ^= uint32(tail[2]) << 16
		fallthrough
	case 2:
		k1 ^= uint32(tail[1]) << 8
		fallthrough
	case 1:
		k1 ^= uint32(tail[0])
		k1 *= c1
		k1 = (k1 << r1) | (k1 >> (32 - r1))
		k1 *= c2
		h ^= k1
	}

	h ^= uint32(len(data))
	h ^= h >> 16
	h *= 0x85ebca6b
	h ^= h >> 13
	h *= 0xc2b2ae35
	h ^= h >> 16
	return h
}

func murmur3SeededV1(seed uint32, data []byte) uint32 {
	const (
		c1 = 0xcc9e2d51
		c2 = 0x1b873593
		r1 = 15
		r2 = 13
		m  = 5
		n  = 0xe6546b64
	)
	h := seed
	nblocks := len(data) / 4
	for i := 0; i < nblocks; i++ {
		b0 := int8(data[4*i])
		b1 := int8(data[4*i+1])
		b2 := int8(data[4*i+2])
		b3 := int8(data[4*i+3])
		k := uint32(b0) |
			(uint32(b1) << 8) |
			(uint32(b2) << 16) |
			(uint32(b3) << 24)
		k *= c1
		k = (k << r1) | (k >> (32 - r1))
		k *= c2

		h ^= k
		h = (h << r2) | (h >> (32 - r2))
		h = h*m + n
	}

	var k1 uint32
	tail := data[nblocks*4:]
	switch len(tail) & 3 {
	case 3:
		k1 ^= uint32(int8(tail[2])) << 16
		fallthrough
	case 2:
		k1 ^= uint32(int8(tail[1])) << 8
		fallthrough
	case 1:
		k1 ^= uint32(int8(tail[0]))
		k1 *= c1
		k1 = (k1 << r1) | (k1 >> (32 - r1))
		k1 *= c2
		h ^= k1
	}

	h ^= uint32(len(data))
	h ^= h >> 16
	h *= 0x85ebca6b
	h ^= h >> 13
	h *= 0xc2b2ae35
	h ^= h >> 16
	return h
}