diff options
| -rw-r--r-- | commitgraph_read.go | 600 | ||||
| -rw-r--r-- | commitgraph_read_access.go | 124 | ||||
| -rw-r--r-- | commitgraph_read_test.go | 558 | ||||
| -rw-r--r-- | repo.go | 13 |
4 files changed, 1 insertions, 1294 deletions
diff --git a/commitgraph_read.go b/commitgraph_read.go deleted file mode 100644 index 978a2d61..00000000 --- a/commitgraph_read.go +++ /dev/null @@ -1,600 +0,0 @@ -package furgit - -import ( - "bufio" - "bytes" - "encoding/binary" - "errors" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - "syscall" - - "codeberg.org/lindenii/furgit/internal/bloom" -) - -const ( - commitGraphSignature = 0x43475048 // "CGPH" - commitGraphVersion = 1 -) - -const ( - commitGraphChunkOIDF = 0x4f494446 // "OIDF" - commitGraphChunkOIDL = 0x4f49444c // "OIDL" - commitGraphChunkCDAT = 0x43444154 // "CDAT" - commitGraphChunkGDA2 = 0x47444132 // "GDA2" - commitGraphChunkGDO2 = 0x47444f32 // "GDO2" - commitGraphChunkEDGE = 0x45444745 // "EDGE" - commitGraphChunkBIDX = 0x42494458 // "BIDX" - commitGraphChunkBDAT = 0x42444154 // "BDAT" - commitGraphChunkBASE = 0x42415345 // "BASE" -) - -const ( - commitGraphHeaderSize = 8 - commitGraphChunkEntrySize = 12 - commitGraphFanoutSize = 256 * 4 -) - -type commitGraph struct { - repo *Repository - - filename string - data []byte - dataLen int - - numChunks uint8 - numBase uint8 - hashAlgo hashAlgorithm - - numCommits uint32 - numCommitsInBase uint32 - - base *commitGraph - - chunkOIDFanout []byte - chunkOIDLookup []byte - chunkCommit []byte - - chunkGeneration []byte - chunkGenerationOv []byte - readGeneration bool - - chunkExtraEdges []byte - - chunkBloomIndex []byte - chunkBloomData []byte - - chunkBaseGraphs []byte - - bloomSettings *bloom.Settings - - closeOnce sync.Once -} - -func (cg *commitGraph) Close() error { - if cg == nil { - return nil - } - var closeErr error - cg.closeOnce.Do(func() { - if len(cg.data) > 0 { - if err := syscall.Munmap(cg.data); closeErr == nil { - closeErr = err - } - cg.data = nil - } - }) - if cg.base != nil { - _ = cg.base.Close() - } - return closeErr -} - -func (repo *Repository) CommitGraph() (*commitGraph, error) { - if repo == nil { - return nil, ErrInvalidObject - } - repo.commitGraphOnce.Do(func() { - repo.commitGraph, repo.commitGraphErr = repo.loadCommitGraph() - }) - return repo.commitGraph, repo.commitGraphErr -} - -func (repo *Repository) loadCommitGraph() (*commitGraph, error) { - cg, err := repo.loadCommitGraphSingle() - if err == nil { - return cg, nil - } - if !errors.Is(err, ErrNotFound) { - return nil, err - } - return repo.loadCommitGraphChain() -} - -func (repo *Repository) loadCommitGraphSingle() (*commitGraph, error) { - path := repo.repoPath(filepath.Join("objects", "info", "commit-graph")) - return repo.loadCommitGraphFile(path) -} - -func (repo *Repository) loadCommitGraphChain() (*commitGraph, error) { - chainPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", "commit-graph-chain")) - f, err := os.Open(chainPath) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrNotFound - } - return nil, err - } - defer func() { _ = f.Close() }() - - var hashes []string - scanner := bufio.NewScanner(f) - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - if line == "" { - continue - } - hashes = append(hashes, line) - } - if err := scanner.Err(); err != nil { - return nil, err - } - if len(hashes) == 0 { - return nil, ErrNotFound - } - - var chain *commitGraph - for i, h := range hashes { - graphPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", fmt.Sprintf("graph-%s.graph", h))) - g, err := repo.loadCommitGraphFile(graphPath) - if err != nil { - return nil, err - } - if i > 0 { - if g.chunkBaseGraphs == nil { - return nil, ErrInvalidObject - } - if len(g.chunkBaseGraphs) < i*repo.hashAlgo.Size() { - return nil, ErrInvalidObject - } - for j := 0; j < i; j++ { - want := hashes[j] - start := j * repo.hashAlgo.Size() - end := start + repo.hashAlgo.Size() - got := hexEncode(g.chunkBaseGraphs[start:end]) - if got != want { - return nil, ErrInvalidObject - } - } - } - if chain != nil { - if chain.numCommits+chain.numCommitsInBase > ^uint32(0) { - return nil, ErrInvalidObject - } - g.numCommitsInBase = chain.numCommits + chain.numCommitsInBase - } - g.base = chain - chain = g - } - validateCommitGraphChain(chain) - return chain, nil -} - -func validateCommitGraphChain(head *commitGraph) { - if head == nil { - return - } - readGeneration := true - for g := head; g != nil; g = g.base { - readGeneration = readGeneration && g.readGeneration - } - if !readGeneration { - for g := head; g != nil; g = g.base { - g.readGeneration = false - } - } - - var settings *bloom.Settings - for g := head; g != nil; g = g.base { - if g.bloomSettings == nil { - continue - } - if settings == nil { - settings = g.bloomSettings - continue - } - if settings.HashVersion != g.bloomSettings.HashVersion || - settings.NumHashes != g.bloomSettings.NumHashes || - settings.BitsPerEntry != g.bloomSettings.BitsPerEntry { - g.chunkBloomIndex = nil - g.chunkBloomData = nil - g.bloomSettings = nil - } - } -} - -func (repo *Repository) loadCommitGraphFile(path string) (*commitGraph, error) { - f, err := os.Open(path) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrNotFound - } - return nil, err - } - stat, err := f.Stat() - if err != nil { - _ = f.Close() - return nil, err - } - if stat.Size() < int64(commitGraphHeaderSize+commitGraphFanoutSize) { - _ = f.Close() - return nil, ErrInvalidObject - } - region, err := syscall.Mmap( - int(f.Fd()), - 0, - int(stat.Size()), - syscall.PROT_READ, - syscall.MAP_PRIVATE, - ) - if err != nil { - _ = f.Close() - return nil, err - } - err = f.Close() - if err != nil { - _ = syscall.Munmap(region) - return nil, err - } - cg, err := parseCommitGraph(repo, path, region) - if err != nil { - _ = syscall.Munmap(region) - return nil, err - } - cg.data = region - return cg, nil -} - -func parseCommitGraph(repo *Repository, filename string, data []byte) (*commitGraph, error) { - if len(data) < commitGraphHeaderSize { - return nil, ErrInvalidObject - } - sig := binary.BigEndian.Uint32(data[0:4]) - if sig != commitGraphSignature { - return nil, ErrInvalidObject - } - version := data[4] - if version != commitGraphVersion { - return nil, ErrInvalidObject - } - hashVersion := data[5] - if hashVersion != hashAlgoVersion(repo.hashAlgo) { - return nil, ErrInvalidObject - } - numChunks := data[6] - numBase := data[7] - - tocStart := commitGraphHeaderSize - tocLen := (int(numChunks) + 1) * commitGraphChunkEntrySize - if len(data) < tocStart+tocLen+repo.hashAlgo.Size() { - return nil, ErrInvalidObject - } - - trailerStart := len(data) - repo.hashAlgo.Size() - - type chunkEntry struct { - id uint32 - offset uint64 - } - entries := make([]chunkEntry, 0, numChunks+1) - for i := 0; i < int(numChunks)+1; i++ { - pos := tocStart + i*commitGraphChunkEntrySize - id := binary.BigEndian.Uint32(data[pos : pos+4]) - offset := binary.BigEndian.Uint64(data[pos+4 : pos+12]) - entries = append(entries, chunkEntry{id: id, offset: offset}) - } - - chunks := map[uint32][]byte{} - for i := 0; i < len(entries)-1; i++ { - id := entries[i].id - if id == 0 { - break - } - start := int(entries[i].offset) - end := int(entries[i+1].offset) - if start < tocStart+tocLen || end < start || end > trailerStart { - return nil, ErrInvalidObject - } - chunks[id] = data[start:end] - } - - cg := &commitGraph{ - repo: repo, - filename: filename, - dataLen: len(data), - hashAlgo: repo.hashAlgo, - numChunks: numChunks, - numBase: numBase, - } - - oidf := chunks[commitGraphChunkOIDF] - if len(oidf) != commitGraphFanoutSize { - return nil, ErrInvalidObject - } - cg.chunkOIDFanout = oidf - cg.numCommits = binary.BigEndian.Uint32(oidf[len(oidf)-4:]) - for i := 0; i < 255; i++ { - if binary.BigEndian.Uint32(oidf[i*4:(i+1)*4]) > - binary.BigEndian.Uint32(oidf[(i+1)*4:(i+2)*4]) { - return nil, ErrInvalidObject - } - } - - oidl := chunks[commitGraphChunkOIDL] - if len(oidl) != int(cg.numCommits)*repo.hashAlgo.Size() { - return nil, ErrInvalidObject - } - cg.chunkOIDLookup = oidl - - cdat := chunks[commitGraphChunkCDAT] - if len(cdat) != int(cg.numCommits)*commitGraphCommitDataSize(repo.hashAlgo) { - return nil, ErrInvalidObject - } - cg.chunkCommit = cdat - - if gda2 := chunks[commitGraphChunkGDA2]; len(gda2) != 0 { - if len(gda2) != int(cg.numCommits)*4 { - return nil, ErrInvalidObject - } - cg.chunkGeneration = gda2 - cg.readGeneration = true - } - if gdo2 := chunks[commitGraphChunkGDO2]; len(gdo2) != 0 { - cg.chunkGenerationOv = gdo2 - } - - if edge := chunks[commitGraphChunkEDGE]; len(edge) != 0 { - if len(edge)%4 != 0 { - return nil, ErrInvalidObject - } - cg.chunkExtraEdges = edge - } - - if base := chunks[commitGraphChunkBASE]; len(base) != 0 { - if numBase == 0 { - return nil, ErrInvalidObject - } - if len(base) != int(numBase)*repo.hashAlgo.Size() { - return nil, ErrInvalidObject - } - cg.chunkBaseGraphs = base - } else if numBase != 0 { - return nil, ErrInvalidObject - } - - if bidx := chunks[commitGraphChunkBIDX]; len(bidx) != 0 { - if len(bidx) != int(cg.numCommits)*4 { - return nil, ErrInvalidObject - } - cg.chunkBloomIndex = bidx - } - if bdat := chunks[commitGraphChunkBDAT]; len(bdat) != 0 { - if len(bdat) < bloom.DataHeaderSize { - return nil, ErrInvalidObject - } - cg.chunkBloomData = bdat - settings, err := bloom.ParseSettings(bdat) - if err != nil { - return nil, err - } - cg.bloomSettings = settings - } - - if (cg.chunkBloomIndex != nil) != (cg.chunkBloomData != nil) { - cg.chunkBloomIndex = nil - cg.chunkBloomData = nil - cg.bloomSettings = nil - } - if cg.chunkBloomIndex != nil && cg.chunkBloomData != nil { - for i := 0; i < int(cg.numCommits); i++ { - cur := binary.BigEndian.Uint32(cg.chunkBloomIndex[i*4 : i*4+4]) - if i > 0 { - prev := binary.BigEndian.Uint32(cg.chunkBloomIndex[(i-1)*4 : i*4]) - if cur < prev { - return nil, ErrInvalidObject - } - } - if int(cur) > len(cg.chunkBloomData)-bloom.DataHeaderSize { - return nil, ErrInvalidObject - } - } - } - - if err := verifyCommitGraphTrailer(repo, data); err != nil { - return nil, err - } - - return cg, nil -} - -func commitGraphCommitDataSize(algo hashAlgorithm) int { - return algo.Size() + 16 -} - -func hashAlgoVersion(algo hashAlgorithm) byte { - switch algo { - case hashAlgoSHA1: - return 1 - case hashAlgoSHA256: - return 2 - default: - return 0 - } -} - -func verifyCommitGraphTrailer(repo *Repository, data []byte) error { - if repo == nil { - return ErrInvalidObject - } - hashSize := repo.hashAlgo.Size() - if len(data) < hashSize { - return ErrInvalidObject - } - want := data[len(data)-hashSize:] - h, err := repo.hashAlgo.New() - if err != nil { - return err - } - _, _ = h.Write(data[:len(data)-hashSize]) - got := h.Sum(nil) - if !bytes.Equal(got, want) { - return ErrInvalidObject - } - return nil -} - -func hexEncode(b []byte) string { - const hex = "0123456789abcdef" - out := make([]byte, len(b)*2) - for i, v := range b { - out[i*2] = hex[v>>4] - out[i*2+1] = hex[v&0x0f] - } - return string(out) -} - -func (cg *commitGraph) lookupOID(id Hash) (uint32, bool) { - if cg == nil { - return 0, false - } - return commitGraphBsearchOID(cg, id) -} - -func commitGraphBsearchOID(cg *commitGraph, id Hash) (uint32, bool) { - if cg == nil || len(cg.chunkOIDLookup) == 0 { - return 0, false - } - hashSize := cg.hashAlgo.Size() - first := int(id.data[0]) - fanout := cg.chunkOIDFanout - var start uint32 - if first > 0 { - start = binary.BigEndian.Uint32(fanout[(first-1)*4 : first*4]) - } - end := binary.BigEndian.Uint32(fanout[first*4 : (first+1)*4]) - lo := int(start) - hi := int(end) - 1 - for lo <= hi { - mid := lo + (hi-lo)/2 - pos := mid * hashSize - cmp := bytes.Compare(cg.chunkOIDLookup[pos:pos+hashSize], id.data[:hashSize]) - if cmp == 0 { - return uint32(mid) + cg.numCommitsInBase, true - } - if cmp < 0 { - lo = mid + 1 - } else { - hi = mid - 1 - } - } - if cg.base != nil { - return commitGraphBsearchOID(cg.base, id) - } - return 0, false -} - -func (cg *commitGraph) loadOID(pos uint32) (Hash, error) { - g := cg - for g != nil && pos < g.numCommitsInBase { - g = g.base - } - if g == nil { - return Hash{}, ErrInvalidObject - } - if pos >= g.numCommits+g.numCommitsInBase { - return Hash{}, ErrInvalidObject - } - lex := pos - g.numCommitsInBase - hashSize := g.hashAlgo.Size() - start := int(lex) * hashSize - end := start + hashSize - var out Hash - copy(out.data[:], g.chunkOIDLookup[start:end]) - out.algo = g.hashAlgo - return out, nil -} - -func (cg *commitGraph) commitDataAt(pos uint32) (*commitGraphCommit, error) { - g := cg - for g != nil && pos < g.numCommitsInBase { - g = g.base - } - if g == nil { - return nil, ErrInvalidObject - } - if pos >= g.numCommits+g.numCommitsInBase { - return nil, ErrInvalidObject - } - lex := pos - g.numCommitsInBase - stride := commitGraphCommitDataSize(g.hashAlgo) - start := int(lex) * stride - end := start + stride - if end > len(g.chunkCommit) { - return nil, ErrInvalidObject - } - buf := g.chunkCommit[start:end] - - var tree Hash - hashSize := g.hashAlgo.Size() - copy(tree.data[:], buf[:hashSize]) - tree.algo = g.hashAlgo - - p1 := binary.BigEndian.Uint32(buf[hashSize : hashSize+4]) - p2 := binary.BigEndian.Uint32(buf[hashSize+4 : hashSize+8]) - - genTime := binary.BigEndian.Uint32(buf[hashSize+8 : hashSize+12]) - timeLow := binary.BigEndian.Uint32(buf[hashSize+12 : hashSize+16]) - - timeHigh := genTime & 0x3 - commitTime := (uint64(timeHigh) << 32) | uint64(timeLow) - generation := uint64(genTime >> 2) - - if g.readGeneration { - genOffset := binary.BigEndian.Uint32(g.chunkGeneration[int(lex)*4:]) - if genOffset&correctedGenerationOverflow != 0 { - off := genOffset ^ correctedGenerationOverflow - idx := int(off) * 8 - if g.chunkGenerationOv == nil || idx+8 > len(g.chunkGenerationOv) { - return nil, ErrInvalidObject - } - offset := binary.BigEndian.Uint64(g.chunkGenerationOv[idx : idx+8]) - generation = commitTime + offset - } else { - generation = commitTime + uint64(genOffset) - } - } - - var parents []uint32 - var err error - parents, err = commitGraphAppendParent(parents, g, p1) - if err != nil { - return nil, err - } - parents, err = commitGraphAppendParent(parents, g, p2) - if err != nil { - return nil, err - } - - return &commitGraphCommit{ - Position: pos, - Tree: tree, - Parents: parents, - CommitTime: commitTime, - Generation: generation, - Graph: g, - graphLexPos: lex, - }, nil -} diff --git a/commitgraph_read_access.go b/commitgraph_read_access.go deleted file mode 100644 index 6757cb1d..00000000 --- a/commitgraph_read_access.go +++ /dev/null @@ -1,124 +0,0 @@ -package furgit - -import ( - "encoding/binary" - - "codeberg.org/lindenii/furgit/internal/bloom" -) - -const ( - commitGraphParentNone = 0x70000000 - commitGraphParentExtraMask = 0x80000000 - commitGraphParentLastMask = 0x7fffffff - - correctedGenerationOverflow = 0x80000000 -) - -type commitGraphCommit struct { - Position uint32 - Tree Hash - Parents []uint32 - CommitTime uint64 - Generation uint64 - Graph *commitGraph - graphLexPos uint32 -} - -func commitGraphAppendParent(parents []uint32, g *commitGraph, edge uint32) ([]uint32, error) { - if edge == commitGraphParentNone { - return parents, nil - } - if (edge & commitGraphParentExtraMask) == 0 { - return append(parents, edge), nil - } - if g.chunkExtraEdges == nil { - return nil, ErrInvalidObject - } - idx := edge & commitGraphParentLastMask - for { - off := int(idx) * 4 - if off+4 > len(g.chunkExtraEdges) { - return nil, ErrInvalidObject - } - val := binary.BigEndian.Uint32(g.chunkExtraEdges[off : off+4]) - parents = append(parents, val&commitGraphParentLastMask) - idx++ - if (val & commitGraphParentExtraMask) != 0 { - break - } - } - return parents, nil -} - -func (cg *commitGraph) CommitPosition(id Hash) (uint32, bool) { - return cg.lookupOID(id) -} - -func (cg *commitGraph) CommitAt(pos uint32) (*commitGraphCommit, error) { - return cg.commitDataAt(pos) -} - -func (cg *commitGraph) OIDAt(pos uint32) (Hash, error) { - return cg.loadOID(pos) -} - -func (c *commitGraphCommit) HasBloom() bool { - return c != nil && c.Graph != nil && c.Graph.chunkBloomIndex != nil && c.Graph.chunkBloomData != nil -} - -func (cg *commitGraph) BloomSettings() *bloom.Settings { - if cg == nil { - return nil - } - return cg.bloomSettings -} - -func (cg *commitGraph) BloomFilter(pos uint32) (*bloom.Filter, bool) { - g := cg - for g != nil && pos < g.numCommitsInBase { - g = g.base - } - if g == nil || g.chunkBloomIndex == nil || g.chunkBloomData == nil || g.bloomSettings == nil { - return nil, false - } - if pos >= g.numCommits+g.numCommitsInBase { - return nil, false - } - lex := pos - g.numCommitsInBase - if int(lex)*4+4 > len(g.chunkBloomIndex) { - return nil, false - } - end := binary.BigEndian.Uint32(g.chunkBloomIndex[int(lex)*4:]) - var start uint32 - if lex > 0 { - start = binary.BigEndian.Uint32(g.chunkBloomIndex[int(lex-1)*4:]) - } - if end < start { - return nil, false - } - if int(end) > len(g.chunkBloomData)-bloom.DataHeaderSize || - int(start) > len(g.chunkBloomData)-bloom.DataHeaderSize { - return nil, false - } - data := g.chunkBloomData[bloom.DataHeaderSize+start : bloom.DataHeaderSize+end] - return &bloom.Filter{Data: data, Version: g.bloomSettings.HashVersion}, true -} - -func (c *commitGraphCommit) BloomFilter() (*bloom.Filter, *bloom.Settings, bool) { - if c == nil || c.Graph == nil { - return nil, nil, false - } - filter, ok := c.Graph.BloomFilter(c.Position) - if !ok { - return nil, nil, false - } - return filter, c.Graph.bloomSettings, true -} - -func (c *commitGraphCommit) ChangedPathMightContain(path []byte) bool { - filter, settings, ok := c.BloomFilter() - if !ok { - return false - } - return filter.MightContain(path, settings) -} diff --git a/commitgraph_read_test.go b/commitgraph_read_test.go deleted file mode 100644 index 9a8ea49e..00000000 --- a/commitgraph_read_test.go +++ /dev/null @@ -1,558 +0,0 @@ -package furgit - -import ( - "bytes" - "encoding/binary" - "fmt" - "os" - "path/filepath" - "sort" - "strconv" - "strings" - "testing" - - "codeberg.org/lindenii/furgit/internal/bloom" -) - -func TestCommitGraphReadBasic(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - files := []string{"a.txt", "b.txt", "c.txt"} - for i, name := range files { - path := filepath.Join(workDir, name) - content := fmt.Sprintf("content-%d\n", i) - if err := os.WriteFile(path, []byte(content), 0o644); err != nil { - t.Fatalf("write %s: %v", name, err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i)) - } - - gitCmd(t, repoPath, "repack", "-a", "-d") - gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - cg, err := repo.CommitGraph() - if err != nil { - t.Fatalf("CommitGraph failed: %v", err) - } - - revList := gitCmd(t, repoPath, "rev-list", "--all") - commits := strings.Fields(revList) - if len(commits) == 0 { - t.Fatal("no commits found") - } - - falsePositives := 0 - for _, oidStr := range commits { - id, err := repo.ParseHash(oidStr) - if err != nil { - t.Fatalf("ParseHash failed: %v", err) - } - pos, ok := cg.CommitPosition(id) - if !ok { - t.Fatalf("commit %s not found in commit-graph", oidStr) - } - cc, err := cg.CommitAt(pos) - if err != nil { - t.Fatalf("CommitAt failed: %v", err) - } - - treeStr := string(gitCmd(t, repoPath, "show", "-s", "--format=%T", oidStr)) - tree, err := repo.ParseHash(treeStr) - if err != nil { - t.Fatalf("ParseHash tree failed: %v", err) - } - if cc.Tree != tree { - t.Fatalf("tree mismatch for %s: got %s want %s", oidStr, cc.Tree.String(), tree.String()) - } - - parentStr := string(gitCmd(t, repoPath, "show", "-s", "--format=%P", oidStr)) - parents := strings.Fields(parentStr) - if len(parents) != len(cc.Parents) { - t.Fatalf("parent count mismatch for %s: got %d want %d", oidStr, len(cc.Parents), len(parents)) - } - for i, p := range parents { - pid, err := repo.ParseHash(p) - if err != nil { - t.Fatalf("ParseHash parent failed: %v", err) - } - got, err := cg.OIDAt(cc.Parents[i]) - if err != nil { - t.Fatalf("OIDAt parent failed: %v", err) - } - if got != pid { - t.Fatalf("parent mismatch for %s: got %s want %s", oidStr, got.String(), pid.String()) - } - } - - ctStr := gitCmd(t, repoPath, "show", "-s", "--format=%ct", oidStr) - ct, err := strconv.ParseInt(ctStr, 10, 64) - if err != nil { - t.Fatalf("parse commit time: %v", err) - } - if cc.CommitTime != uint64(ct) { - t.Fatalf("commit time mismatch for %s: got %d want %d", oidStr, cc.CommitTime, ct) - } - - changed := gitCmd(t, repoPath, "diff-tree", "--no-commit-id", "--name-only", "-r", "--root", oidStr) - for _, path := range strings.Fields(changed) { - if path == "" { - continue - } - if !cc.ChangedPathMightContain([]byte(path)) { - t.Fatalf("bloom filter missing changed path %q in %s", path, oidStr) - } - } - if cc.ChangedPathMightContain([]byte("this-path-should-not-exist-abcdefg")) { - falsePositives++ - } - } - if falsePositives > 1 { - t.Fatalf("unexpected bloom false positive rate: %d", falsePositives) - } -} - -func TestCommitGraphReadExtraEdges(t *testing.T) { - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - gitCmd(t, workDir, "init") - - if err := os.WriteFile(filepath.Join(workDir, "base.txt"), []byte("base\n"), 0o644); err != nil { - t.Fatalf("write base: %v", err) - } - gitCmd(t, workDir, "add", ".") - gitCmd(t, workDir, "commit", "-m", "base") - - gitCmd(t, workDir, "checkout", "-b", "branch1") - if err := os.WriteFile(filepath.Join(workDir, "b1.txt"), []byte("b1\n"), 0o644); err != nil { - t.Fatalf("write b1: %v", err) - } - gitCmd(t, workDir, "add", ".") - gitCmd(t, workDir, "commit", "-m", "b1") - - gitCmd(t, workDir, "checkout", "master") - gitCmd(t, workDir, "checkout", "-b", "branch2") - if err := os.WriteFile(filepath.Join(workDir, "b2.txt"), []byte("b2\n"), 0o644); err != nil { - t.Fatalf("write b2: %v", err) - } - gitCmd(t, workDir, "add", ".") - gitCmd(t, workDir, "commit", "-m", "b2") - - gitCmd(t, workDir, "checkout", "master") - gitCmd(t, workDir, "checkout", "-b", "branch3") - if err := os.WriteFile(filepath.Join(workDir, "b3.txt"), []byte("b3\n"), 0o644); err != nil { - t.Fatalf("write b3: %v", err) - } - gitCmd(t, workDir, "add", ".") - gitCmd(t, workDir, "commit", "-m", "b3") - - gitCmd(t, workDir, "checkout", "master") - gitCmd(t, workDir, "merge", "--no-ff", "-m", "octopus", "branch1", "branch2", "branch3") - - gitCmd(t, workDir, "repack", "-a", "-d") - gitCmd(t, workDir, "commit-graph", "write", "--reachable") - - repo, err := OpenRepository(filepath.Join(workDir, ".git")) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - cg, err := repo.CommitGraph() - if err != nil { - t.Fatalf("CommitGraph failed: %v", err) - } - - mergeHash := gitCmd(t, workDir, "rev-parse", "HEAD") - id, _ := repo.ParseHash(mergeHash) - pos, ok := cg.CommitPosition(id) - if !ok { - t.Fatalf("merge commit not found") - } - cc, err := cg.CommitAt(pos) - if err != nil { - t.Fatalf("CommitAt failed: %v", err) - } - parentStr := gitCmd(t, workDir, "show", "-s", "--format=%P", mergeHash) - parents := strings.Fields(parentStr) - if len(cc.Parents) != len(parents) { - t.Fatalf("octopus parent mismatch: got %d want %d", len(cc.Parents), len(parents)) - } -} - -func TestCommitGraphReadSplitChain(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - for i := 0; i < 5; i++ { - path := filepath.Join(workDir, fmt.Sprintf("file%d.txt", i)) - if err := os.WriteFile(path, []byte(fmt.Sprintf("v%d\n", i)), 0o644); err != nil { - t.Fatalf("write %s: %v", path, err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i)) - } - - gitCmd(t, repoPath, "repack", "-a", "-d") - gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths", "--split=no-merge") - - // more commits needed to get a split chain with base layer - for i := 5; i < 8; i++ { - path := filepath.Join(workDir, fmt.Sprintf("file%d.txt", i)) - if err := os.WriteFile(path, []byte(fmt.Sprintf("v%d\n", i)), 0o644); err != nil { - t.Fatalf("write %s: %v", path, err) - } - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", fmt.Sprintf("commit %d", i)) - } - gitCmd(t, repoPath, "repack", "-a", "-d") - gitCmd(t, repoPath, "commit-graph", "write", "--reachable", "--changed-paths", "--split=no-merge", "--append") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - cg, err := repo.CommitGraph() - if err != nil { - t.Fatalf("CommitGraph failed: %v", err) - } - chainPath := filepath.Join(repoPath, "objects", "info", "commit-graphs", "commit-graph-chain") - if _, err := os.Stat(chainPath); err != nil { - t.Fatalf("commit-graph chain not created by git: %v", err) - } - if cg.base == nil { - t.Fatalf("expected split commit-graph chain") - } - if cg.numCommitsInBase == 0 { - t.Fatalf("expected non-zero base commit count") - } - revList := gitCmd(t, repoPath, "rev-list", "--max-parents=0", "HEAD") - id, _ := repo.ParseHash(revList) - if _, ok := cg.CommitPosition(id); !ok { - t.Fatalf("base commit not found in commit-graph chain") - } -} - -func TestCommitGraphTrailerChecksum(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - data := buildCommitGraphBytes(t, repo, 1, nil, nil, nil) - data[len(data)-1] ^= 0xff - if _, err := parseCommitGraph(repo, "corrupt.graph", data); err == nil { - t.Fatalf("expected checksum error") - } -} - -func TestCommitGraphFanoutMonotonicity(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - oids := [][]byte{ - bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()), - bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()), - } - data := buildCommitGraphBytes(t, repo, 2, oids, nil, nil) - start, end, ok := chunkRange(t, repo, data, commitGraphChunkOIDF) - if !ok { - t.Fatalf("OIDF chunk not found") - } - fanout := make([]byte, end-start) - copy(fanout, data[start:end]) - binary.BigEndian.PutUint32(fanout[0:4], 2) - binary.BigEndian.PutUint32(fanout[4:8], 1) - data = replaceChunk(t, repo, data, commitGraphChunkOIDF, fanout) - if _, err := parseCommitGraph(repo, "bad-fanout.graph", data); err == nil { - t.Fatalf("expected fanout monotonicity error") - } -} - -func TestCommitGraphBIDXMonotonicity(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - oids := [][]byte{ - bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()), - bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()), - } - bidx := []uint32{8, 4} - data := buildCommitGraphBytes(t, repo, 2, oids, bidx, []byte{0, 0, 0, 0, 0, 0, 0, 0}) - if _, err := parseCommitGraph(repo, "bad-bidx.graph", data); err == nil { - t.Fatalf("expected bidx monotonicity error") - } -} - -func TestCommitGraphExtraEdgesBounds(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - oids := [][]byte{ - bytes.Repeat([]byte{0x10}, repo.hashAlgo.Size()), - bytes.Repeat([]byte{0x20}, repo.hashAlgo.Size()), - bytes.Repeat([]byte{0x30}, repo.hashAlgo.Size()), - } - extraEdges := []uint32{0x80000000 | 100} - data := buildCommitGraphBytes(t, repo, 3, oids, nil, nil) - - // inject EDGE chunk with an out-of-range index - data = injectChunk(t, repo, data, commitGraphChunkEDGE, u32SliceToBytes(extraEdges)) - - // force the first commit to use the extra edges list - start, end, ok := chunkRange(t, repo, data, commitGraphChunkCDAT) - if !ok { - t.Fatalf("CDAT chunk not found") - } - cdat := make([]byte, end-start) - copy(cdat, data[start:end]) - hashSize := repo.hashAlgo.Size() - binary.BigEndian.PutUint32(cdat[hashSize+4:hashSize+8], 0x80000000|100) - data = replaceChunk(t, repo, data, commitGraphChunkCDAT, cdat) - - cg, err := parseCommitGraph(repo, "edge.graph", data) - if err != nil { - t.Fatalf("parseCommitGraph failed: %v", err) - } - pos, ok := cg.CommitPosition(hashFromBytes(repo, oids[0])) - if !ok { - t.Fatalf("commit not found") - } - if _, err := cg.CommitAt(pos); err == nil { - t.Fatalf("expected extra-edges bounds error") - } -} - -func buildCommitGraphBytes(t *testing.T, repo *Repository, n int, oids [][]byte, bidx []uint32, bdat []byte) []byte { - t.Helper() - if n <= 0 { - t.Fatalf("invalid num commits") - } - hashSize := repo.hashAlgo.Size() - if len(oids) == 0 { - for i := 0; i < n; i++ { - b := bytes.Repeat([]byte{byte(0x10 + i)}, hashSize) - oids = append(oids, b) - } - } - sort.Slice(oids, func(i, j int) bool { - return bytes.Compare(oids[i], oids[j]) < 0 - }) - - var chunks []chunkSpec - oidf := make([]byte, commitGraphFanoutSize) - for _, oid := range oids { - idx := int(oid[0]) - for i := idx; i < 256; i++ { - v := binary.BigEndian.Uint32(oidf[i*4 : i*4+4]) - binary.BigEndian.PutUint32(oidf[i*4:i*4+4], v+1) - } - } - chunks = append(chunks, chunkSpec{commitGraphChunkOIDF, oidf}) - - oidl := make([]byte, 0, n*hashSize) - for _, oid := range oids { - oidl = append(oidl, oid...) - } - chunks = append(chunks, chunkSpec{commitGraphChunkOIDL, oidl}) - - cdat := make([]byte, n*commitGraphCommitDataSize(repo.hashAlgo)) - for i := 0; i < n; i++ { - base := i * commitGraphCommitDataSize(repo.hashAlgo) - copy(cdat[base:base+hashSize], bytes.Repeat([]byte{0xaa}, hashSize)) - binary.BigEndian.PutUint32(cdat[base+hashSize:], commitGraphParentNone) - binary.BigEndian.PutUint32(cdat[base+hashSize+4:], commitGraphParentNone) - binary.BigEndian.PutUint32(cdat[base+hashSize+8:], 4) - binary.BigEndian.PutUint32(cdat[base+hashSize+12:], 0) - } - chunks = append(chunks, chunkSpec{commitGraphChunkCDAT, cdat}) - - if bidx != nil && bdat != nil { - bidxBytes := u32SliceToBytes(bidx) - bdatHeader := make([]byte, bloom.DataHeaderSize) - binary.BigEndian.PutUint32(bdatHeader[0:4], 2) - binary.BigEndian.PutUint32(bdatHeader[4:8], 7) - binary.BigEndian.PutUint32(bdatHeader[8:12], 10) - bdatFull := append(bdatHeader, bdat...) - chunks = append(chunks, chunkSpec{commitGraphChunkBIDX, bidxBytes}) - chunks = append(chunks, chunkSpec{commitGraphChunkBDAT, bdatFull}) - } - - return assembleCommitGraphBytes(t, repo, chunks) -} - -type chunkSpec struct { - id uint32 - data []byte -} - -func assembleCommitGraphBytes(t *testing.T, repo *Repository, chunks []chunkSpec) []byte { - t.Helper() - var buf bytes.Buffer - buf.Grow(1024) - var header [commitGraphHeaderSize]byte - binary.BigEndian.PutUint32(header[0:4], commitGraphSignature) - header[4] = commitGraphVersion - header[5] = hashAlgoVersion(repo.hashAlgo) - header[6] = byte(len(chunks)) - header[7] = 0 - buf.Write(header[:]) - - tocStart := buf.Len() - tocLen := (len(chunks) + 1) * commitGraphChunkEntrySize - buf.Grow(tocLen) - buf.Write(make([]byte, tocLen)) - - offset := tocStart + tocLen - for i, ch := range chunks { - pos := tocStart + i*commitGraphChunkEntrySize - binary.BigEndian.PutUint32(buf.Bytes()[pos:pos+4], ch.id) - binary.BigEndian.PutUint64(buf.Bytes()[pos+4:pos+12], uint64(offset)) - offset += len(ch.data) - } - pos := tocStart + len(chunks)*commitGraphChunkEntrySize - binary.BigEndian.PutUint32(buf.Bytes()[pos:pos+4], 0) - binary.BigEndian.PutUint64(buf.Bytes()[pos+4:pos+12], uint64(offset)) - - for _, ch := range chunks { - buf.Write(ch.data) - } - - h, err := repo.hashAlgo.New() - if err != nil { - t.Fatalf("hash.New failed: %v", err) - } - _, _ = h.Write(buf.Bytes()) - buf.Write(h.Sum(nil)) - return buf.Bytes() -} - -func chunkRange(t *testing.T, repo *Repository, data []byte, id uint32) (int, int, bool) { - t.Helper() - hashSize := repo.hashAlgo.Size() - numChunks := int(data[6]) - tocStart := commitGraphHeaderSize - trailerStart := len(data) - hashSize - - for i := 0; i < numChunks; i++ { - pos := tocStart + i*commitGraphChunkEntrySize - chID := binary.BigEndian.Uint32(data[pos : pos+4]) - if chID == 0 { - break - } - start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12])) - end := trailerStart - if i+1 < numChunks { - end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12])) - } - if chID == id { - return start, end, true - } - } - return 0, 0, false -} - -func replaceChunk(t *testing.T, repo *Repository, data []byte, id uint32, payload []byte) []byte { - t.Helper() - hashSize := repo.hashAlgo.Size() - numChunks := int(data[6]) - tocStart := commitGraphHeaderSize - trailerStart := len(data) - hashSize - - chunks := make([]chunkSpec, 0, numChunks) - for i := 0; i < numChunks; i++ { - pos := tocStart + i*commitGraphChunkEntrySize - chID := binary.BigEndian.Uint32(data[pos : pos+4]) - if chID == 0 { - break - } - start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12])) - end := trailerStart - if i+1 < numChunks { - end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12])) - } - if chID == id { - chunks = append(chunks, chunkSpec{chID, payload}) - } else { - chunks = append(chunks, chunkSpec{chID, data[start:end]}) - } - } - if len(chunks) == 0 { - t.Fatalf("no chunks found") - } - return assembleCommitGraphBytes(t, repo, chunks) -} - -func injectChunk(t *testing.T, repo *Repository, data []byte, id uint32, payload []byte) []byte { - t.Helper() - hashSize := repo.hashAlgo.Size() - numChunks := int(data[6]) - tocStart := commitGraphHeaderSize - trailerStart := len(data) - hashSize - - chunks := make([]chunkSpec, 0, numChunks+1) - for i := 0; i < numChunks; i++ { - pos := tocStart + i*commitGraphChunkEntrySize - chID := binary.BigEndian.Uint32(data[pos : pos+4]) - if chID == 0 { - break - } - start := int(binary.BigEndian.Uint64(data[pos+4 : pos+12])) - end := trailerStart - if i+1 < numChunks { - end = int(binary.BigEndian.Uint64(data[pos+commitGraphChunkEntrySize+4 : pos+commitGraphChunkEntrySize+12])) - } - chunks = append(chunks, chunkSpec{chID, data[start:end]}) - } - chunks = append(chunks, chunkSpec{id, payload}) - return assembleCommitGraphBytes(t, repo, chunks) -} - -func u32SliceToBytes(vals []uint32) []byte { - out := make([]byte, len(vals)*4) - for i, v := range vals { - binary.BigEndian.PutUint32(out[i*4:i*4+4], v) - } - return out -} - -func hashFromBytes(repo *Repository, b []byte) Hash { - var h Hash - copy(h.data[:], b) - h.algo = repo.hashAlgo - return h -} @@ -27,12 +27,7 @@ type Repository struct { packFiles map[string]*packFile packFilesMu sync.RWMutex - - commitGraphOnce sync.Once - commitGraph *commitGraph - commitGraphErr error - - closeOnce sync.Once + closeOnce sync.Once } // OpenRepository opens the repository at the provided path. @@ -107,12 +102,6 @@ func (repo *Repository) Close() error { } } } - if repo.commitGraph != nil { - err := repo.commitGraph.Close() - if err != nil && closeErr == nil { - closeErr = err - } - } }) return closeErr } |
