From 6002485582541df9dff3e2c782a014564e22ed07 Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Tue, 27 Jan 2026 19:05:00 +0100 Subject: hash: Use a hashAlgorithmDetails struct for single source of truth hashAlgorithm's are assumed to be valid; methods on invalid hashAlgorithms will panic from out-of-bounds read when it's not found in hashAlgorithmTable and that's expected and intended. --- hash.go | 112 +++++++++++++++++++++++++++++++++++++---------------------- hash_test.go | 2 +- obj_tree.go | 6 ++-- pack_idx.go | 6 ++-- pack_pack.go | 4 +-- refs.go | 4 +-- repo.go | 23 +++--------- repo_test.go | 2 +- 8 files changed, 87 insertions(+), 72 deletions(-) diff --git a/hash.go b/hash.go index df39ac30..cbaac821 100644 --- a/hash.go +++ b/hash.go @@ -6,59 +6,74 @@ import ( "encoding/hex" ) -// maxHashSize MUST be equal to (or larger than) the size of the -// largest hash supported in hashFuncs. +// maxHashSize MUST be >= the largest supported algorithm size. const maxHashSize = sha256.Size // hashAlgorithm identifies the hash algorithm used for Git object IDs. type hashAlgorithm uint8 -// hashFuncs maps hash algorithm to hash function. -var hashFuncs = map[hashAlgorithm]hashFunc{ - hashAlgoSHA1: func(data []byte) Hash { - sum := sha1.Sum(data) - var h Hash - copy(h.data[:], sum[:]) - h.algo = hashAlgoSHA1 - return h - }, - hashAlgoSHA256: func(data []byte) Hash { - sum := sha256.Sum256(data) - var h Hash - copy(h.data[:], sum[:]) - h.algo = hashAlgoSHA256 - return h - }, -} - const ( hashAlgoUnknown hashAlgorithm = iota hashAlgoSHA1 hashAlgoSHA256 ) -// size returns the hash size in bytes. -func (algo hashAlgorithm) size() int { - switch algo { - case hashAlgoSHA1: - return sha1.Size - case hashAlgoSHA256: - return sha256.Size - default: - return 0 - } +type hashAlgorithmDetails struct { + name string + size int + sum func([]byte) Hash +} + +var hashAlgorithmTable = [...]hashAlgorithmDetails{ + hashAlgoUnknown: {}, + hashAlgoSHA1: { + name: "sha1", + size: sha1.Size, + sum: func(data []byte) Hash { + sum := sha1.Sum(data) + var h Hash + copy(h.data[:], sum[:]) + h.algo = hashAlgoSHA1 + return h + }, + }, + hashAlgoSHA256: { + name: "sha256", + size: sha256.Size, + sum: func(data []byte) Hash { + sum := sha256.Sum256(data) + var h Hash + copy(h.data[:], sum[:]) + h.algo = hashAlgoSHA256 + return h + }, + }, +} + +func (algo hashAlgorithm) info() hashAlgorithmDetails { + return hashAlgorithmTable[algo] +} + +// Size returns the hash size in bytes. +func (algo hashAlgorithm) Size() int { + return algo.info().size } // String returns the canonical name of the hash algorithm. func (algo hashAlgorithm) String() string { - switch algo { - case hashAlgoSHA1: - return "sha1" - case hashAlgoSHA256: - return "sha256" - default: + inf := algo.info() + if inf.name == "" { return "unknown" } + return inf.name +} + +func (algo hashAlgorithm) HexLen() int { + return algo.Size() * 2 +} + +func (algo hashAlgorithm) Sum(data []byte) Hash { + return algo.info().sum(data) } // Hash represents a Git object ID. @@ -67,12 +82,9 @@ type Hash struct { data [maxHashSize]byte } -// hashFunc is a function that computes a hash from input data. -type hashFunc func([]byte) Hash - // String returns a hexadecimal string representation of the hash. func (hash Hash) String() string { - size := hash.algo.size() + size := hash.algo.Size() if size == 0 { return "" } @@ -81,7 +93,7 @@ func (hash Hash) String() string { // Bytes returns a copy of the hash's bytes. func (hash Hash) Bytes() []byte { - size := hash.algo.size() + size := hash.algo.Size() if size == 0 { return nil } @@ -90,5 +102,21 @@ func (hash Hash) Bytes() []byte { // Size returns the hash size. func (hash Hash) Size() int { - return hash.algo.size() + return hash.algo.Size() +} + +var algoByName = map[string]hashAlgorithm{} + +func init() { + for algo, info := range hashAlgorithmTable { + if info.name == "" { + continue + } + algoByName[info.name] = hashAlgorithm(algo) + } +} + +func parseHashAlgorithm(s string) (hashAlgorithm, bool) { + algo, ok := algoByName[s] + return algo, ok } diff --git a/hash_test.go b/hash_test.go index dcbd7027..0b15fd38 100644 --- a/hash_test.go +++ b/hash_test.go @@ -18,7 +18,7 @@ func TestHashParse(t *testing.T) { var validHash string var expectedSize int - if repo.hashAlgo.size() == 32 { + if repo.hashAlgo.Size() == 32 { validHash = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" expectedSize = 32 } else { diff --git a/obj_tree.go b/obj_tree.go index 1a63d729..07c77441 100644 --- a/obj_tree.go +++ b/obj_tree.go @@ -78,13 +78,13 @@ func parseTree(id Hash, body []byte, repo *Repository) (*StoredTree, error) { nameBytes := body[i : i+nul] i += nul + 1 - if i+repo.hashAlgo.size() > len(body) { + if i+repo.hashAlgo.Size() > len(body) { return nil, errors.New("furgit: tree: truncated child hash") } var child Hash - copy(child.data[:], body[i:i+repo.hashAlgo.size()]) + copy(child.data[:], body[i:i+repo.hashAlgo.Size()]) child.algo = repo.hashAlgo - i += repo.hashAlgo.size() + i += repo.hashAlgo.Size() mode, err := strconv.ParseUint(string(modeBytes), 8, 32) if err != nil { diff --git a/pack_idx.go b/pack_idx.go index c4c166ef..0dbb9bcf 100644 --- a/pack_idx.go +++ b/pack_idx.go @@ -163,7 +163,7 @@ func (pi *packIndex) parse(buf []byte) error { nobj := int(readBE32(pi.fanout[len(pi.fanout)-4:])) namesStart := fanoutEnd - namesEnd := namesStart + nobj*pi.repo.hashAlgo.size() + namesEnd := namesStart + nobj*pi.repo.hashAlgo.Size() if namesEnd > len(buf) { return ErrInvalidObject } @@ -183,7 +183,7 @@ func (pi *packIndex) parse(buf []byte) error { pi.offset32 = buf[off32Start:off32End] off64Start := off32End - trailerStart := len(buf) - 2*pi.repo.hashAlgo.size() + trailerStart := len(buf) - 2*pi.repo.hashAlgo.Size() if trailerStart < off64Start { return ErrInvalidObject } @@ -253,7 +253,7 @@ func (pi *packIndex) lookup(id Hash) (packlocation, error) { lo = int(pi.fanoutEntry(first - 1)) } hi := int(pi.fanoutEntry(first)) - idx, found := bsearchHash(pi.names, pi.repo.hashAlgo.size(), lo, hi, id) + idx, found := bsearchHash(pi.names, pi.repo.hashAlgo.Size(), lo, hi, id) if !found { return packlocation{}, ErrNotFound } diff --git a/pack_pack.go b/pack_pack.go index 6e0806b1..628fa258 100644 --- a/pack_pack.go +++ b/pack_pack.go @@ -176,7 +176,7 @@ func (repo *Repository) packTypeSizeWithin(pf *packFile, ofs uint64, seen map[pa case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: return ty, declaredSize, nil case ObjectTypeRefDelta: - hashEnd := dataStart + uint64(repo.hashAlgo.size()) + hashEnd := dataStart + uint64(repo.hashAlgo.Size()) if hashEnd > uint64(len(pf.data)) { return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF } @@ -273,7 +273,7 @@ func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjectT resultTy = ty resolved = true case ObjectTypeRefDelta: - hashEnd := dataStart + uint64(repo.hashAlgo.size()) + hashEnd := dataStart + uint64(repo.hashAlgo.Size()) if hashEnd > uint64(len(pf.data)) { return fail(io.ErrUnexpectedEOF) } diff --git a/refs.go b/refs.go index 372d31fd..6efdf5ec 100644 --- a/refs.go +++ b/refs.go @@ -70,7 +70,7 @@ func (repo *Repository) resolvePackedRef(refname string) (Ref, error) { } sp := bytes.IndexByte(line, ' ') - if sp != repo.hashAlgo.size()*2 { + if sp != repo.hashAlgo.Size()*2 { continue } @@ -428,7 +428,7 @@ func (repo *Repository) ListRefs(pattern string) ([]Ref, error) { } sp := bytes.IndexByte(line, ' ') - if sp != repo.hashAlgo.size()*2 { + if sp != repo.hashAlgo.Size()*2 { lastIdx = -1 continue } diff --git a/repo.go b/repo.go index 8c0cd95e..4a4ebcc6 100644 --- a/repo.go +++ b/repo.go @@ -63,23 +63,11 @@ func OpenRepository(path string) (*Repository, error) { algo = "sha1" } - var hashAlgo hashAlgorithm - switch algo { - case "sha1": - hashAlgo = hashAlgoSHA1 - case "sha256": - hashAlgo = hashAlgoSHA256 - default: + hashAlgo, ok := parseHashAlgorithm(algo) + if !ok { return nil, fmt.Errorf("furgit: unsupported hash algorithm %q", algo) } - if hashAlgo.size() == 0 { - return nil, fmt.Errorf("furgit: unsupported hash algorithm %q", algo) - } - if _, ok := hashFuncs[hashAlgo]; !ok { - return nil, fmt.Errorf("furgit: hash algorithm %q is not supported by the hash functions provided by this build", algo) - } - return &Repository{ rootPath: path, hashAlgo: hashAlgo, @@ -130,9 +118,9 @@ func (repo *Repository) ParseHash(s string) (Hash, error) { if len(s)%2 != 0 { return id, fmt.Errorf("furgit: invalid hash length %d, it has to be even at the very least", len(s)) } - expectedLen := repo.hashAlgo.size() * 2 + expectedLen := repo.hashAlgo.Size() * 2 if len(s) != expectedLen { - return id, fmt.Errorf("furgit: hash length mismatch: got %d chars, expected %d for hash size %d", len(s), expectedLen, repo.hashAlgo.size()) + return id, fmt.Errorf("furgit: hash length mismatch: got %d chars, expected %d for hash size %d", len(s), expectedLen, repo.hashAlgo.Size()) } data, err := hex.DecodeString(s) if err != nil { @@ -145,8 +133,7 @@ func (repo *Repository) ParseHash(s string) (Hash, error) { // computeRawHash computes a hash from raw data using the repository's hash algorithm. func (repo *Repository) computeRawHash(data []byte) Hash { - hashFunc := hashFuncs[repo.hashAlgo] - return hashFunc(data) + return repo.hashAlgo.Sum(data) } // verifyRawObject verifies a raw object against its expected hash. diff --git a/repo_test.go b/repo_test.go index c4a0e059..3e622e37 100644 --- a/repo_test.go +++ b/repo_test.go @@ -17,7 +17,7 @@ func TestRepositoryOpen(t *testing.T) { if repo.rootPath != repoPath { t.Errorf("rootPath: got %q, want %q", repo.rootPath, repoPath) } - hashSize := repo.hashAlgo.size() + hashSize := repo.hashAlgo.Size() if hashSize != 32 && hashSize != 20 { t.Errorf("hashSize: got %d, want 32 (SHA-256) or 20 (SHA-1)", hashSize) } -- cgit v1.3.1-10-gc9f91