diff options
| author | 2026-01-30 15:46:49 +0100 | |
|---|---|---|
| committer | 2026-01-30 15:46:49 +0100 | |
| commit | 1ea061c92ca6ad435c00bea458b8f24a5e1a822a (patch) | |
| tree | 1ca3e0455546c57cedbb017c24a2ef0da0397318 | |
| parent | README: Add writing thin packs to the list of things to do (diff) | |
| signature | No signature | |
commitgraph: Add basic commit-graph implementation
| -rw-r--r-- | commitgraph_read.go | 600 | ||||
| -rw-r--r-- | commitgraph_read_access.go | 124 | ||||
| -rw-r--r-- | commitgraph_read_test.go | 558 | ||||
| -rw-r--r-- | repo.go | 13 |
4 files changed, 1294 insertions, 1 deletions
diff --git a/commitgraph_read.go b/commitgraph_read.go new file mode 100644 index 00000000..978a2d61 --- /dev/null +++ b/commitgraph_read.go @@ -0,0 +1,600 @@ +package furgit + +import ( + "bufio" + "bytes" + "encoding/binary" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "syscall" + + "codeberg.org/lindenii/furgit/internal/bloom" +) + +const ( + commitGraphSignature = 0x43475048 // "CGPH" + commitGraphVersion = 1 +) + +const ( + commitGraphChunkOIDF = 0x4f494446 // "OIDF" + commitGraphChunkOIDL = 0x4f49444c // "OIDL" + commitGraphChunkCDAT = 0x43444154 // "CDAT" + commitGraphChunkGDA2 = 0x47444132 // "GDA2" + commitGraphChunkGDO2 = 0x47444f32 // "GDO2" + commitGraphChunkEDGE = 0x45444745 // "EDGE" + commitGraphChunkBIDX = 0x42494458 // "BIDX" + commitGraphChunkBDAT = 0x42444154 // "BDAT" + commitGraphChunkBASE = 0x42415345 // "BASE" +) + +const ( + commitGraphHeaderSize = 8 + commitGraphChunkEntrySize = 12 + commitGraphFanoutSize = 256 * 4 +) + +type commitGraph struct { + repo *Repository + + filename string + data []byte + dataLen int + + numChunks uint8 + numBase uint8 + hashAlgo hashAlgorithm + + numCommits uint32 + numCommitsInBase uint32 + + base *commitGraph + + chunkOIDFanout []byte + chunkOIDLookup []byte + chunkCommit []byte + + chunkGeneration []byte + chunkGenerationOv []byte + readGeneration bool + + chunkExtraEdges []byte + + chunkBloomIndex []byte + chunkBloomData []byte + + chunkBaseGraphs []byte + + bloomSettings *bloom.Settings + + closeOnce sync.Once +} + +func (cg *commitGraph) Close() error { + if cg == nil { + return nil + } + var closeErr error + cg.closeOnce.Do(func() { + if len(cg.data) > 0 { + if err := syscall.Munmap(cg.data); closeErr == nil { + closeErr = err + } + cg.data = nil + } + }) + if cg.base != nil { + _ = cg.base.Close() + } + return closeErr +} + +func (repo *Repository) CommitGraph() (*commitGraph, error) { + if repo == nil { + return nil, ErrInvalidObject + } + repo.commitGraphOnce.Do(func() { + repo.commitGraph, repo.commitGraphErr = repo.loadCommitGraph() + }) + return repo.commitGraph, repo.commitGraphErr +} + +func (repo *Repository) loadCommitGraph() (*commitGraph, error) { + cg, err := repo.loadCommitGraphSingle() + if err == nil { + return cg, nil + } + if !errors.Is(err, ErrNotFound) { + return nil, err + } + return repo.loadCommitGraphChain() +} + +func (repo *Repository) loadCommitGraphSingle() (*commitGraph, error) { + path := repo.repoPath(filepath.Join("objects", "info", "commit-graph")) + return repo.loadCommitGraphFile(path) +} + +func (repo *Repository) loadCommitGraphChain() (*commitGraph, error) { + chainPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", "commit-graph-chain")) + f, err := os.Open(chainPath) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotFound + } + return nil, err + } + defer func() { _ = f.Close() }() + + var hashes []string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } + hashes = append(hashes, line) + } + if err := scanner.Err(); err != nil { + return nil, err + } + if len(hashes) == 0 { + return nil, ErrNotFound + } + + var chain *commitGraph + for i, h := range hashes { + graphPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", fmt.Sprintf("graph-%s.graph", h))) + g, err := repo.loadCommitGraphFile(graphPath) + if err != nil { + return nil, err + } + if i > 0 { + if g.chunkBaseGraphs == nil { + return nil, ErrInvalidObject + } + if len(g.chunkBaseGraphs) < i*repo.hashAlgo.Size() { + return nil, ErrInvalidObject + } + for j := 0; j < i; j++ { + want := hashes[j] + start := j * repo.hashAlgo.Size() + end := start + repo.hashAlgo.Size() + got := hexEncode(g.chunkBaseGraphs[start:end]) + if got != want { + return nil, ErrInvalidObject + } + } + } + if chain != nil { + if chain.numCommits+chain.numCommitsInBase > ^uint32(0) { + return nil, ErrInvalidObject + } + g.numCommitsInBase = chain.numCommits + chain.numCommitsInBase + } + g.base = chain + chain = g + } + validateCommitGraphChain(chain) + return chain, nil +} + +func validateCommitGraphChain(head *commitGraph) { + if head == nil { + return + } + readGeneration := true + for g := head; g != nil; g = g.base { + readGeneration = readGeneration && g.readGeneration + } + if !readGeneration { + for g := head; g != nil; g = g.base { + g.readGeneration = false + } + } + + var settings *bloom.Settings + for g := head; g != nil; g = g.base { + if g.bloomSettings == nil { + continue + } + if settings == nil { + settings = g.bloomSettings + continue + } + if settings.HashVersion != g.bloomSettings.HashVersion || + settings.NumHashes != g.bloomSettings.NumHashes || + settings.BitsPerEntry != g.bloomSettings.BitsPerEntry { + g.chunkBloomIndex = nil + g.chunkBloomData = nil + g.bloomSettings = nil + } + } +} + +func (repo *Repository) loadCommitGraphFile(path string) (*commitGraph, error) { + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotFound + } + return nil, err + } + stat, err := f.Stat() + if err != nil { + _ = f.Close() + return nil, err + } + if stat.Size() < int64(commitGraphHeaderSize+commitGraphFanoutSize) { + _ = f.Close() + return nil, ErrInvalidObject + } + region, err := syscall.Mmap( + int(f.Fd()), + 0, + int(stat.Size()), + syscall.PROT_READ, + syscall.MAP_PRIVATE, + ) + if err != nil { + _ = f.Close() + return nil, err + } + err = f.Close() + if err != nil { + _ = syscall.Munmap(region) + return nil, err + } + cg, err := parseCommitGraph(repo, path, region) + if err != nil { + _ = syscall.Munmap(region) + return nil, err + } + cg.data = region + return cg, nil +} + +func parseCommitGraph(repo *Repository, filename string, data []byte) (*commitGraph, error) { + if len(data) < commitGraphHeaderSize { + return nil, ErrInvalidObject + } + sig := binary.BigEndian.Uint32(data[0:4]) + if sig != commitGraphSignature { + return nil, ErrInvalidObject + } + version := data[4] + if version != commitGraphVersion { + return nil, ErrInvalidObject + } + hashVersion := data[5] + if hashVersion != hashAlgoVersion(repo.hashAlgo) { + return nil, ErrInvalidObject + } + numChunks := data[6] + numBase := data[7] + + tocStart := commitGraphHeaderSize + tocLen := (int(numChunks) + 1) * commitGraphChunkEntrySize + if len(data) < tocStart+tocLen+repo.hashAlgo.Size() { + return nil, ErrInvalidObject + } + + trailerStart := len(data) - repo.hashAlgo.Size() + + type chunkEntry struct { + id uint32 + offset uint64 + } + entries := make([]chunkEntry, 0, numChunks+1) + for i := 0; i < int(numChunks)+1; i++ { + pos := tocStart + i*commitGraphChunkEntrySize + id := binary.BigEndian.Uint32(data[pos : pos+4]) + offset := binary.BigEndian.Uint64(data[pos+4 : pos+12]) + entries = append(entries, chunkEntry{id: id, offset: offset}) + } + + chunks := map[uint32][]byte{} + for i := 0; i < len(entries)-1; i++ { + id := entries[i].id + if id == 0 { + break + } + start := int(entries[i].offset) + end := int(entries[i+1].offset) + if start < tocStart+tocLen || end < start || end > trailerStart { + return nil, ErrInvalidObject + } + chunks[id] = data[start:end] + } + + cg := &commitGraph{ + repo: repo, + filename: filename, + dataLen: len(data), + hashAlgo: repo.hashAlgo, + numChunks: numChunks, + numBase: numBase, + } + + oidf := chunks[commitGraphChunkOIDF] + if len(oidf) != commitGraphFanoutSize { + return nil, ErrInvalidObject + } + cg.chunkOIDFanout = oidf + cg.numCommits = binary.BigEndian.Uint32(oidf[len(oidf)-4:]) + for i := 0; i < 255; i++ { + if binary.BigEndian.Uint32(oidf[i*4:(i+1)*4]) > + binary.BigEndian.Uint32(oidf[(i+1)*4:(i+2)*4]) { + return nil, ErrInvalidObject + } + } + + oidl := chunks[commitGraphChunkOIDL] + if len(oidl) != int(cg.numCommits)*repo.hashAlgo.Size() { + return nil, ErrInvalidObject + } + cg.chunkOIDLookup = oidl + + cdat := chunks[commitGraphChunkCDAT] + if len(cdat) != int(cg.numCommits)*commitGraphCommitDataSize(repo.hashAlgo) { + return nil, ErrInvalidObject + } + cg.chunkCommit = cdat + + if gda2 := chunks[commitGraphChunkGDA2]; len(gda2) != 0 { + if len(gda2) != int(cg.numCommits)*4 { + return nil, ErrInvalidObject + } + cg.chunkGeneration = gda2 + cg.readGeneration = true + } + if gdo2 := chunks[commitGraphChunkGDO2]; len(gdo2) != 0 { + cg.chunkGenerationOv = gdo2 + } + + if edge := chunks[commitGraphChunkEDGE]; len(edge) != 0 { + if len(edge)%4 != 0 { + return nil, ErrInvalidObject + } + cg.chunkExtraEdges = edge + } + + if base := chunks[commitGraphChunkBASE]; len(base) != 0 { + if numBase == 0 { + return nil, ErrInvalidObject + } + if len(base) != int(numBase)*repo.hashAlgo.Size() { + return nil, ErrInvalidObject + } + cg.chunkBaseGraphs = base + } else if numBase != 0 { + return nil, ErrInvalidObject + } + + if bidx := chunks[commitGraphChunkBIDX]; len(bidx) != 0 { + if len(bidx) != int(cg.numCommits)*4 { + return nil, ErrInvalidObject + } + cg.chunkBloomIndex = bidx + } + if bdat := chunks[commitGraphChunkBDAT]; len(bdat) != 0 { + if len(bdat) < bloom.DataHeaderSize { + return nil, ErrInvalidObject + } + cg.chunkBloomData = bdat + settings, err := bloom.ParseSettings(bdat) + if err != nil { + return nil, err + } + cg.bloomSettings = settings + } + + if (cg.chunkBloomIndex != nil) != (cg.chunkBloomData != nil) { + cg.chunkBloomIndex = nil + cg.chunkBloomData = nil + cg.bloomSettings = nil + } + if cg.chunkBloomIndex != nil && cg.chunkBloomData != nil { + for i := 0; i < int(cg.numCommits); i++ { + cur := binary.BigEndian.Uint32(cg.chunkBloomIndex[i*4 : i*4+4]) + if i > 0 { + prev := binary.BigEndian.Uint32(cg.chunkBloomIndex[(i-1)*4 : i*4]) + if cur < prev { + return nil, ErrInvalidObject + } + } + if int(cur) > len(cg.chunkBloomData)-bloom.DataHeaderSize { + return nil, ErrInvalidObject + } + } + } + + if err := verifyCommitGraphTrailer(repo, data); err != nil { + return nil, err + } + + return cg, nil +} + +func commitGraphCommitDataSize(algo hashAlgorithm) int { + return algo.Size() + 16 +} + +func hashAlgoVersion(algo hashAlgorithm) byte { + switch algo { + case hashAlgoSHA1: + return 1 + case hashAlgoSHA256: + return 2 + default: + return 0 + } +} + +func verifyCommitGraphTrailer(repo *Repository, data []byte) error { + if repo == nil { + return ErrInvalidObject + } + hashSize := repo.hashAlgo.Size() + if len(data) < hashSize { + return ErrInvalidObject + } + want := data[len(data)-hashSize:] + h, err := repo.hashAlgo.New() + if err != nil { + return err + } + _, _ = h.Write(data[:len(data)-hashSize]) + got := h.Sum(nil) + if !bytes.Equal(got, want) { + return ErrInvalidObject + } + return nil +} + +func hexEncode(b []byte) string { + const hex = "0123456789abcdef" + out := make([]byte, len(b)*2) + for i, v := range b { + out[i*2] = hex[v>>4] + out[i*2+1] = hex[v&0x0f] + } + return string(out) +} + +func (cg *commitGraph) lookupOID(id Hash) (uint32, bool) { + if cg == nil { + return 0, false + } + return commitGraphBsearchOID(cg, id) +} + +func commitGraphBsearchOID(cg *commitGraph, id Hash) (uint32, bool) { + if cg == nil || len(cg.chunkOIDLookup) == 0 { + return 0, false + } + hashSize := cg.hashAlgo.Size() + first := int(id.data[0]) + fanout := cg.chunkOIDFanout + var start uint32 + if first > 0 { + start = binary.BigEndian.Uint32(fanout[(first-1)*4 : first*4]) + } + end := binary.BigEndian.Uint32(fanout[first*4 : (first+1)*4]) + lo := int(start) + hi := int(end) - 1 + for lo <= hi { + mid := lo + (hi-lo)/2 + pos := mid * hashSize + cmp := bytes.Compare(cg.chunkOIDLookup[pos:pos+hashSize], id.data[:hashSize]) + if cmp == 0 { + return uint32(mid) + cg.numCommitsInBase, true + } + if cmp < 0 { + lo = mid + 1 + } else { + hi = mid - 1 + } + } + if cg.base != nil { + return commitGraphBsearchOID(cg.base, id) + } + return 0, false +} + +func (cg *commitGraph) loadOID(pos uint32) (Hash, error) { + g := cg + for g != nil && pos < g.numCommitsInBase { + g = g.base + } + if g == nil { + return Hash{}, ErrInvalidObject + } + if pos >= g.numCommits+g.numCommitsInBase { + return Hash{}, ErrInvalidObject + } + lex := pos - g.numCommitsInBase + hashSize := g.hashAlgo.Size() + start := int(lex) * hashSize + end := start + hashSize + var out Hash + copy(out.data[:], g.chunkOIDLookup[start:end]) + out.algo = g.hashAlgo + return out, nil +} + +func (cg *commitGraph) commitDataAt(pos uint32) (*commitGraphCommit, error) { + g := cg + for g != nil && pos < g.numCommitsInBase { + g = g.base + } + if g == nil { + return nil, ErrInvalidObject + } + if pos >= g.numCommits+g.numCommitsInBase { + return nil, ErrInvalidObject + } + lex := pos - g.numCommitsInBase + stride := commitGraphCommitDataSize(g.hashAlgo) + start := int(lex) * stride + end := start + stride + if end > len(g.chunkCommit) { + return nil, ErrInvalidObject + } + buf := g.chunkCommit[start:end] + + var tree Hash + hashSize := g.hashAlgo.Size() + copy(tree.data[:], buf[:hashSize]) + tree.algo = g.hashAlgo + + p1 := binary.BigEndian.Uint32(buf[hashSize : hashSize+4]) + p2 := binary.BigEndian.Uint32(buf[hashSize+4 : hashSize+8]) + + genTime := binary.BigEndian.Uint32(buf[hashSize+8 : hashSize+12]) + timeLow := binary.BigEndian.Uint32(buf[hashSize+12 : hashSize+16]) + + timeHigh := genTime & 0x3 + commitTime := (uint64(timeHigh) << 32) | uint64(timeLow) + generation := uint64(genTime >> 2) + + if g.readGeneration { + genOffset := binary.BigEndian.Uint32(g.chunkGeneration[int(lex)*4:]) + if genOffset&correctedGenerationOverflow != 0 { + off := genOffset ^ correctedGenerationOverflow + idx := int(off) * 8 + if g.chunkGenerationOv == nil || idx+8 > len(g.chunkGenerationOv) { + return nil, ErrInvalidObject + } + offset := binary.BigEndian.Uint64(g.chunkGenerationOv[idx : idx+8]) + generation = commitTime + offset + } else { + generation = commitTime + uint64(genOffset) + } + } + + var parents []uint32 + var err error + parents, err = commitGraphAppendParent(parents, g, p1) + if err != nil { + return nil, err + } + parents, err = commitGraphAppendParent(parents, g, p2) + if err != nil { + return nil, err + } + + return &commitGraphCommit{ + Position: pos, + Tree: tree, + Parents: parents, + CommitTime: commitTime, + Generation: generation, + Graph: g, + graphLexPos: lex, + }, nil +} diff --git a/commitgraph_read_access.go b/commitgraph_read_access.go new file mode 100644 index 00000000..6757cb1d --- /dev/null +++ b/commitgraph_read_access.go @@ -0,0 +1,124 @@ +package furgit + +import ( + "encoding/binary" + + "codeberg.org/lindenii/furgit/internal/bloom" +) + +const ( + commitGraphParentNone = 0x70000000 + commitGraphParentExtraMask = 0x80000000 + commitGraphParentLastMask = 0x7fffffff + + correctedGenerationOverflow = 0x80000000 +) + +type commitGraphCommit struct { + Position uint32 + Tree Hash + Parents []uint32 + CommitTime uint64 + Generation uint64 + Graph *commitGraph + graphLexPos uint32 +} + +func commitGraphAppendParent(parents []uint32, g *commitGraph, edge uint32) ([]uint32, error) { + if edge == commitGraphParentNone { + return parents, nil + } + if (edge & commitGraphParentExtraMask) == 0 { + return append(parents, edge), nil + } + if g.chunkExtraEdges == nil { + return nil, ErrInvalidObject + } + idx := edge & commitGraphParentLastMask + for { + off := int(idx) * 4 + if off+4 > len(g.chunkExtraEdges) { + return nil, ErrInvalidObject + } + val := binary.BigEndian.Uint32(g.chunkExtraEdges[off : off+4]) + parents = append(parents, val&commitGraphParentLastMask) + idx++ + if (val & commitGraphParentExtraMask) != 0 { + break + } + } + return parents, nil +} + +func (cg *commitGraph) CommitPosition(id Hash) (uint32, bool) { + return cg.lookupOID(id) +} + +func (cg *commitGraph) CommitAt(pos uint32) (*commitGraphCommit, error) { + return cg.commitDataAt(pos) +} + +func (cg *commitGraph) OIDAt(pos uint32) (Hash, error) { + return cg.loadOID(pos) +} + +func (c *commitGraphCommit) HasBloom() bool { + return c != nil && c.Graph != nil && c.Graph.chunkBloomIndex != nil && c.Graph.chunkBloomData != nil +} + +func (cg *commitGraph) BloomSettings() *bloom.Settings { + if cg == nil { + return nil + } + return cg.bloomSettings +} + +func (cg *commitGraph) BloomFilter(pos uint32) (*bloom.Filter, bool) { + g := cg + for g != nil && pos < g.numCommitsInBase { + g = g.base + } + if g == nil || g.chunkBloomIndex == nil || g.chunkBloomData == nil || g.bloomSettings == nil { + return nil, false + } + if pos >= g.numCommits+g.numCommitsInBase { + return nil, false + } + lex := pos - g.numCommitsInBase + if int(lex)*4+4 > len(g.chunkBloomIndex) { + return nil, false + } + end := binary.BigEndian.Uint32(g.chunkBloomIndex[int(lex)*4:]) + var start uint32 + if lex > 0 { + start = binary.BigEndian.Uint32(g.chunkBloomIndex[int(lex-1)*4:]) + } + if end < start { + return nil, false + } + if int(end) > len(g.chunkBloomData)-bloom.DataHeaderSize || + int(start) > len(g.chunkBloomData)-bloom.DataHeaderSize { + return nil, false + } + data := g.chunkBloomData[bloom.DataHeaderSize+start : bloom.DataHeaderSize+end] + return &bloom.Filter{Data: data, Version: g.bloomSettings.HashVersion}, true +} + +func (c *commitGraphCommit) BloomFilter() (*bloom.Filter, *bloom.Settings, bool) { + if c == nil || c.Graph == nil { + return nil, nil, false + } + filter, ok := c.Graph.BloomFilter(c.Position) + if !ok { + return nil, nil, false + } + return filter, c.Graph.bloomSettings, true +} + +func (c *commitGraphCommit) ChangedPathMightContain(path []byte) bool { + filter, settings, ok := c.BloomFilter() + if !ok { + return false + } + return filter.MightContain(path, settings) +} diff --git a/commitgraph_read_test.go b/commitgraph_read_test.go new file mode 100644 index 00000000..9a8ea49e --- /dev/null +++ b/commitgraph_read_test.go @@ -0,0 +1,558 @@ +package furgit + +import ( + "bytes" + "encoding/binary" + "fmt" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "testing" + + "codeberg.org/lindenii/furgit/internal/bloom" +) + +func TestCommitGraphReadBasic(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + files := []string{"a.txt", "b.txt", "c.txt"} + for i, name := range files { + path := filepath.Join(workDir, name) + content := fmt.Sprintf("content-%d\n", i) + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("write %s: %v", name, err) + } + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i)) + } + + gitCmd(t, repoPath, "repack", "-a", "-d") + gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths") + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + cg, err := repo.CommitGraph() + if err != nil { + t.Fatalf("CommitGraph failed: %v", err) + } + + revList := gitCmd(t, repoPath, "rev-list", "--all") + commits := strings.Fields(revList) + if len(commits) == 0 { + t.Fatal("no commits found") + } + + falsePositives := 0 + for _, oidStr := range commits { + id, err := repo.ParseHash(oidStr) + if err != nil { + t.Fatalf("ParseHash failed: %v", err) + } + pos, ok := cg.CommitPosition(id) + if !ok { + t.Fatalf("commit %s not found in commit-graph", oidStr) + } + cc, err := cg.CommitAt(pos) + if err != nil { + t.Fatalf("CommitAt failed: %v", err) + } + + treeStr := string(gitCmd(t, repoPath, "show", "-s", "--format=%T", oidStr)) + tree, err := repo.ParseHash(treeStr) + if err != nil { + t.Fatalf("ParseHash tree failed: %v", err) + } + if cc.Tree != tree { + t.Fatalf("tree mismatch for %s: got %s want %s", oidStr, cc.Tree.String(), tree.String()) + } + + parentStr := string(gitCmd(t, repoPath, "show", "-s", "--format=%P", oidStr)) + parents := strings.Fields(parentStr) + if len(parents) != len(cc.Parents) { + t.Fatalf("parent count mismatch for %s: got %d want %d", oidStr, len(cc.Parents), len(parents)) + } + for i, p := range parents { + pid, err := repo.ParseHash(p) + if err != nil { + t.Fatalf("ParseHash parent failed: %v", err) + } + got, err := cg.OIDAt(cc.Parents[i]) + if err != nil { + t.Fatalf("OIDAt parent failed: %v", err) + } + if got != pid { + t.Fatalf("parent mismatch for %s: got %s want %s", oidStr, got.String(), pid.String()) + } + } + + ctStr := gitCmd(t, repoPath, "show", "-s", "--format=%ct", oidStr) + ct, err := strconv.ParseInt(ctStr, 10, 64) + if err != nil { + t.Fatalf("parse commit time: %v", err) + } + if cc.CommitTime != uint64(ct) { + t.Fatalf("commit time mismatch for %s: got %d want %d", oidStr, cc.CommitTime, ct) + } + + changed := gitCmd(t, repoPath, "diff-tree", "--no-commit-id", "--name-only", "-r", "--root", oidStr) + for _, path := range strings.Fields(changed) { + if path == "" { + continue + } + if !cc.ChangedPathMightContain([]byte(path)) { + t.Fatalf("bloom filter missing changed path %q in %s", path, oidStr) + } + } + if cc.ChangedPathMightContain([]byte("this-path-should-not-exist-abcdefg")) { + falsePositives++ + } + } + if falsePositives > 1 { + t.Fatalf("unexpected bloom false positive rate: %d", falsePositives) + } +} + +func TestCommitGraphReadExtraEdges(t *testing.T) { + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + gitCmd(t, workDir, "init") + + if err := os.WriteFile(filepath.Join(workDir, "base.txt"), []byte("base\n"), 0o644); err != nil { + t.Fatalf("write base: %v", err) + } + gitCmd(t, workDir, "add", ".") + gitCmd(t, workDir, "commit", "-m", "base") + + gitCmd(t, workDir, "checkout", "-b", "branch1") + if err := os.WriteFile(filepath.Join(workDir, "b1.txt"), []byte("b1\n"), 0o644); err != nil { + t.Fatalf("write b1: %v", err) + } + gitCmd(t, workDir, "add", ".") + gitCmd(t, workDir, "commit", "-m", "b1") + + gitCmd(t, workDir, "checkout", "master") + gitCmd(t, workDir, "checkout", "-b", "branch2") + if err := os.WriteFile(filepath.Join(workDir, "b2.txt"), []byte("b2\n"), 0o644); err != nil { + t.Fatalf("write b2: %v", err) + } + gitCmd(t, workDir, "add", ".") + gitCmd(t, workDir, "commit", "-m", "b2") + + gitCmd(t, workDir, "checkout", "master") + gitCmd(t, workDir, "checkout", "-b", "branch3") + if err := os.WriteFile(filepath.Join(workDir, "b3.txt"), []byte("b3\n"), 0o644); err != nil { + t.Fatalf("write b3: %v", err) + } + gitCmd(t, workDir, "add", ".") + gitCmd(t, workDir, "commit", "-m", "b3") + + gitCmd(t, workDir, "checkout", "master") + gitCmd(t, workDir, "merge", "--no-ff", "-m", "octopus", "branch1", "branch2", "branch3") + + gitCmd(t, workDir, "repack", "-a", "-d") + gitCmd(t, workDir, "commit-graph", "write", "--reachable") + + repo, err := OpenRepository(filepath.Join(workDir, ".git")) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + cg, err := repo.CommitGraph() + if err != nil { + t.Fatalf("CommitGraph failed: %v", err) + } + + mergeHash := gitCmd(t, workDir, "rev-parse", "HEAD") + id, _ := repo.ParseHash(mergeHash) + pos, ok := cg.CommitPosition(id) + if !ok { + t.Fatalf("merge commit not found") + } + cc, err := cg.CommitAt(pos) + if err != nil { + t.Fatalf("CommitAt failed: %v", err) + } + parentStr := gitCmd(t, workDir, "show", "-s", "--format=%P", mergeHash) + parents := strings.Fields(parentStr) + if len(cc.Parents) != len(parents) { + t.Fatalf("octopus parent mismatch: got %d want %d", len(cc.Parents), len(parents)) + } +} + +func TestCommitGraphReadSplitChain(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + for i := 0; i < 5; i++ { + path := filepath.Join(workDir, fmt.Sprintf("file%d.txt", i)) + if err := os.WriteFile(path, []byte(fmt.Sprintf("v%d\n", i)), 0o644); err != nil { + t.Fatalf("write %s: %v", path, err) + } + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i)) + } + + gitCmd(t, repoPath, "repack", "-a", "-d") + gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths", "--split=no-merge") + + // more commits needed to get a split chain with base layer + for i := 5; i < 8; i++ { + path := filepath.Join(workDir, fmt.Sprintf("file%d.txt", i)) + if err := os.WriteFile(path, []byte(fmt.Sprintf("v%d\n", i)), 0o644); err != nil { + t.Fatalf("write %s: %v", path, err) + } + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i)) + } + gitCmd(t, repoPath, "repack", "-a", "-d") + gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths", "--split=no-merge", "--append") + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + cg, err := repo.CommitGraph() + if err != nil { + t.Fatalf("CommitGraph failed: %v", err) + } + chainPath := filepath.Join(repoPath, "objects", "info", "commit-graphs", "commit-graph-chain") + if _, err := os.Stat(chainPath); err != nil { + t.Fatalf("commit-graph chain not created by git: %v", err) + } + if cg.base == nil { + t.Fatalf("expected split commit-graph chain") + } + if cg.numCommitsInBase == 0 { + t.Fatalf("expected non-zero base commit count") + } + revList := gitCmd(t, repoPath, "rev-list", "--max-parents=0", "HEAD") + id, _ := repo.ParseHash(revList) + if _, ok := cg.CommitPosition(id); !ok { + t.Fatalf("base commit not found in commit-graph chain") + } +} + +func TestCommitGraphTrailerChecksum(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + data := buildCommitGraphBytes(t, repo, 1, nil, nil, nil) + data[len(data)-1] ^= 0xff + if _, err := parseCommitGraph(repo, "corrupt.graph", data); err == nil { + t.Fatalf("expected checksum error") + } +} + +func TestCommitGraphFanoutMonotonicity(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + oids := [][]byte{ + bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()), + bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()), + } + data := buildCommitGraphBytes(t, repo, 2, oids, nil, nil) + start, end, ok := chunkRange(t, repo, data, commitGraphChunkOIDF) + if !ok { + t.Fatalf("OIDF chunk not found") + } + fanout := make([]byte, end-start) + copy(fanout, data[start:end]) + binary.BigEndian.PutUint32(fanout[0:4], 2) + binary.BigEndian.PutUint32(fanout[4:8], 1) + data = replaceChunk(t, repo, data, commitGraphChunkOIDF, fanout) + if _, err := parseCommitGraph(repo, "bad-fanout.graph", data); err == nil { + t.Fatalf("expected fanout monotonicity error") + } +} + +func TestCommitGraphBIDXMonotonicity(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + oids := [][]byte{ + bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()), + bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()), + } + bidx := []uint32{8, 4} + data := buildCommitGraphBytes(t, repo, 2, oids, bidx, []byte{0, 0, 0, 0, 0, 0, 0, 0}) + if _, err := parseCommitGraph(repo, "bad-bidx.graph", data); err == nil { + t.Fatalf("expected bidx monotonicity error") + } +} + +func TestCommitGraphExtraEdgesBounds(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + oids := [][]byte{ + bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()), + bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()), + bytes.Repeat([]byte{0x30}, repo.hashAlgo.Size()), + } + extraEdges := []uint32{0x80000000 | 100} + data := buildCommitGraphBytes(t, repo, 3, oids, nil, nil) + + // inject EDGE chunk with an out-of-range index + data = injectChunk(t, repo, data, commitGraphChunkEDGE, u32SliceToBytes(extraEdges)) + + // force the first commit to use the extra edges list + start, end, ok := chunkRange(t, repo, data, commitGraphChunkCDAT) + if !ok { + t.Fatalf("CDAT chunk not found") + } + cdat := make([]byte, end-start) + copy(cdat, data[start:end]) + hashSize := repo.hashAlgo.Size() + binary.BigEndian.PutUint32(cdat[hashSize+4:hashSize+8], 0x80000000|100) + data = replaceChunk(t, repo, data, commitGraphChunkCDAT, cdat) + + cg, err := parseCommitGraph(repo, "edge.graph", data) + if err != nil { + t.Fatalf("parseCommitGraph failed: %v", err) + } + pos, ok := cg.CommitPosition(hashFromBytes(repo, oids[0])) + if !ok { + t.Fatalf("commit not found") + } + if _, err := cg.CommitAt(pos); err == nil { + t.Fatalf("expected extra-edges bounds error") + } +} + +func buildCommitGraphBytes(t *testing.T, repo *Repository, n int, oids [][]byte, bidx []uint32, bdat []byte) []byte { + t.Helper() + if n <= 0 { + t.Fatalf("invalid num commits") + } + hashSize := repo.hashAlgo.Size() + if len(oids) == 0 { + for i := 0; i < n; i++ { + b := bytes.Repeat([]byte{byte(0x10 + i)}, hashSize) + oids = append(oids, b) + } + } + sort.Slice(oids, func(i, j int) bool { + return bytes.Compare(oids[i], oids[j]) < 0 + }) + + var chunks []chunkSpec + oidf := make([]byte, commitGraphFanoutSize) + for _, oid := range oids { + idx := int(oid[0]) + for i := idx; i < 256; i++ { + v := binary.BigEndian.Uint32(oidf[i*4 : i*4+4]) + binary.BigEndian.PutUint32(oidf[i*4:i*4+4], v+1) + } + } + chunks = append(chunks, chunkSpec{commitGraphChunkOIDF, oidf}) + + oidl := make([]byte, 0, n*hashSize) + for _, oid := range oids { + oidl = append(oidl, oid...) + } + chunks = append(chunks, chunkSpec{commitGraphChunkOIDL, oidl}) + + cdat := make([]byte, n*commitGraphCommitDataSize(repo.hashAlgo)) + for i := 0; i < n; i++ { + base := i * commitGraphCommitDataSize(repo.hashAlgo) + copy(cdat[base:base+hashSize], bytes.Repeat([]byte{0xaa}, hashSize)) + binary.BigEndian.PutUint32(cdat[base+hashSize:], commitGraphParentNone) + binary.BigEndian.PutUint32(cdat[base+hashSize+4:], commitGraphParentNone) + binary.BigEndian.PutUint32(cdat[base+hashSize+8:], 4) + binary.BigEndian.PutUint32(cdat[base+hashSize+12:], 0) + } + chunks = append(chunks, chunkSpec{commitGraphChunkCDAT, cdat}) + + if bidx != nil && bdat != nil { + bidxBytes := u32SliceToBytes(bidx) + bdatHeader := make([]byte, bloom.DataHeaderSize) + binary.BigEndian.PutUint32(bdatHeader[0:4], 2) + binary.BigEndian.PutUint32(bdatHeader[4:8], 7) + binary.BigEndian.PutUint32(bdatHeader[8:12], 10) + bdatFull := append(bdatHeader, bdat...) + chunks = append(chunks, chunkSpec{commitGraphChunkBIDX, bidxBytes}) + chunks = append(chunks, chunkSpec{commitGraphChunkBDAT, bdatFull}) + } + + return assembleCommitGraphBytes(t, repo, chunks) +} + +type chunkSpec struct { + id uint32 + data []byte +} + +func assembleCommitGraphBytes(t *testing.T, repo *Repository, chunks []chunkSpec) []byte { + t.Helper() + var buf bytes.Buffer + buf.Grow(1024) + var header [commitGraphHeaderSize]byte + binary.BigEndian.PutUint32(header[0:4], commitGraphSignature) + header[4] = commitGraphVersion + header[5] = hashAlgoVersion(repo.hashAlgo) + header[6] = byte(len(chunks)) + header[7] = 0 + buf.Write(header[:]) + + tocStart := buf.Len() + tocLen := (len(chunks) + 1) * commitGraphChunkEntrySize + buf.Grow(tocLen) + buf.Write(make([]byte, tocLen)) + + offset := tocStart + tocLen + for i, ch := range chunks { + pos := tocStart + i*commitGraphChunkEntrySize + binary.BigEndian.PutUint32(buf.Bytes()[pos:pos+4], ch.id) + binary.BigEndian.PutUint64(buf.Bytes()[pos+4:pos+12], uint64(offset)) + offset += len(ch.data) + } + pos := tocStart + len(chunks)*commitGraphChunkEntrySize + binary.BigEndian.PutUint32(buf.Bytes()[pos:pos+4], 0) + binary.BigEndian.PutUint64(buf.Bytes()[pos+4:pos+12], uint64(offset)) + + for _, ch := range chunks { + buf.Write(ch.data) + } + + h, err := repo.hashAlgo.New() + if err != nil { + t.Fatalf("hash.New failed: %v", err) + } + _, _ = h.Write(buf.Bytes()) + buf.Write(h.Sum(nil)) + return buf.Bytes() +} + +func chunkRange(t *testing.T, repo *Repository, data []byte, id uint32) (int, int, bool) { + t.Helper() + hashSize := repo.hashAlgo.Size() + numChunks := int(data[6]) + tocStart := commitGraphHeaderSize + trailerStart := len(data) - hashSize + + for i := 0; i < numChunks; i++ { + pos := tocStart + i*commitGraphChunkEntrySize + chID := binary.BigEndian.Uint32(data[pos : pos+4]) + if chID == 0 { + break + } + start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12])) + end := trailerStart + if i+1 < numChunks { + end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12])) + } + if chID == id { + return start, end, true + } + } + return 0, 0, false +} + +func replaceChunk(t *testing.T, repo *Repository, data []byte, id uint32, payload []byte) []byte { + t.Helper() + hashSize := repo.hashAlgo.Size() + numChunks := int(data[6]) + tocStart := commitGraphHeaderSize + trailerStart := len(data) - hashSize + + chunks := make([]chunkSpec, 0, numChunks) + for i := 0; i < numChunks; i++ { + pos := tocStart + i*commitGraphChunkEntrySize + chID := binary.BigEndian.Uint32(data[pos : pos+4]) + if chID == 0 { + break + } + start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12])) + end := trailerStart + if i+1 < numChunks { + end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12])) + } + if chID == id { + chunks = append(chunks, chunkSpec{chID, payload}) + } else { + chunks = append(chunks, chunkSpec{chID, data[start:end]}) + } + } + if len(chunks) == 0 { + t.Fatalf("no chunks found") + } + return assembleCommitGraphBytes(t, repo, chunks) +} + +func injectChunk(t *testing.T, repo *Repository, data []byte, id uint32, payload []byte) []byte { + t.Helper() + hashSize := repo.hashAlgo.Size() + numChunks := int(data[6]) + tocStart := commitGraphHeaderSize + trailerStart := len(data) - hashSize + + chunks := make([]chunkSpec, 0, numChunks+1) + for i := 0; i < numChunks; i++ { + pos := tocStart + i*commitGraphChunkEntrySize + chID := binary.BigEndian.Uint32(data[pos : pos+4]) + if chID == 0 { + break + } + start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12])) + end := trailerStart + if i+1 < numChunks { + end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12])) + } + chunks = append(chunks, chunkSpec{chID, data[start:end]}) + } + chunks = append(chunks, chunkSpec{id, payload}) + return assembleCommitGraphBytes(t, repo, chunks) +} + +func u32SliceToBytes(vals []uint32) []byte { + out := make([]byte, len(vals)*4) + for i, v := range vals { + binary.BigEndian.PutUint32(out[i*4:i*4+4], v) + } + return out +} + +func hashFromBytes(repo *Repository, b []byte) Hash { + var h Hash + copy(h.data[:], b) + h.algo = repo.hashAlgo + return h +} @@ -27,7 +27,12 @@ type Repository struct { packFiles map[string]*packFile packFilesMu sync.RWMutex - closeOnce sync.Once + + commitGraphOnce sync.Once + commitGraph *commitGraph + commitGraphErr error + + closeOnce sync.Once } // OpenRepository opens the repository at the provided path. @@ -102,6 +107,12 @@ func (repo *Repository) Close() error { } } } + if repo.commitGraph != nil { + err := repo.commitGraph.Close() + if err != nil && closeErr == nil { + closeErr = err + } + } }) return closeErr } |
