aboutsummaryrefslogtreecommitdiff
path: root/commitgraph_read.go
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-01-30 15:46:49 +0100
committerGravatar Runxi Yu2026-01-30 15:46:49 +0100
commit1ea061c92ca6ad435c00bea458b8f24a5e1a822a (patch)
tree1ca3e0455546c57cedbb017c24a2ef0da0397318 /commitgraph_read.go
parentREADME: Add writing thin packs to the list of things to do (diff)
signatureNo signature
commitgraph: Add basic commit-graph implementation
Diffstat (limited to 'commitgraph_read.go')
-rw-r--r--commitgraph_read.go600
1 files changed, 600 insertions, 0 deletions
diff --git a/commitgraph_read.go b/commitgraph_read.go
new file mode 100644
index 00000000..978a2d61
--- /dev/null
+++ b/commitgraph_read.go
@@ -0,0 +1,600 @@
+package furgit
+
+import (
+ "bufio"
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "syscall"
+
+ "codeberg.org/lindenii/furgit/internal/bloom"
+)
+
+const (
+ commitGraphSignature = 0x43475048 // "CGPH"
+ commitGraphVersion = 1
+)
+
+const (
+ commitGraphChunkOIDF = 0x4f494446 // "OIDF"
+ commitGraphChunkOIDL = 0x4f49444c // "OIDL"
+ commitGraphChunkCDAT = 0x43444154 // "CDAT"
+ commitGraphChunkGDA2 = 0x47444132 // "GDA2"
+ commitGraphChunkGDO2 = 0x47444f32 // "GDO2"
+ commitGraphChunkEDGE = 0x45444745 // "EDGE"
+ commitGraphChunkBIDX = 0x42494458 // "BIDX"
+ commitGraphChunkBDAT = 0x42444154 // "BDAT"
+ commitGraphChunkBASE = 0x42415345 // "BASE"
+)
+
+const (
+ commitGraphHeaderSize = 8
+ commitGraphChunkEntrySize = 12
+ commitGraphFanoutSize = 256 * 4
+)
+
+type commitGraph struct {
+ repo *Repository
+
+ filename string
+ data []byte
+ dataLen int
+
+ numChunks uint8
+ numBase uint8
+ hashAlgo hashAlgorithm
+
+ numCommits uint32
+ numCommitsInBase uint32
+
+ base *commitGraph
+
+ chunkOIDFanout []byte
+ chunkOIDLookup []byte
+ chunkCommit []byte
+
+ chunkGeneration []byte
+ chunkGenerationOv []byte
+ readGeneration bool
+
+ chunkExtraEdges []byte
+
+ chunkBloomIndex []byte
+ chunkBloomData []byte
+
+ chunkBaseGraphs []byte
+
+ bloomSettings *bloom.Settings
+
+ closeOnce sync.Once
+}
+
+func (cg *commitGraph) Close() error {
+ if cg == nil {
+ return nil
+ }
+ var closeErr error
+ cg.closeOnce.Do(func() {
+ if len(cg.data) > 0 {
+ if err := syscall.Munmap(cg.data); closeErr == nil {
+ closeErr = err
+ }
+ cg.data = nil
+ }
+ })
+ if cg.base != nil {
+ _ = cg.base.Close()
+ }
+ return closeErr
+}
+
+func (repo *Repository) CommitGraph() (*commitGraph, error) {
+ if repo == nil {
+ return nil, ErrInvalidObject
+ }
+ repo.commitGraphOnce.Do(func() {
+ repo.commitGraph, repo.commitGraphErr = repo.loadCommitGraph()
+ })
+ return repo.commitGraph, repo.commitGraphErr
+}
+
+func (repo *Repository) loadCommitGraph() (*commitGraph, error) {
+ cg, err := repo.loadCommitGraphSingle()
+ if err == nil {
+ return cg, nil
+ }
+ if !errors.Is(err, ErrNotFound) {
+ return nil, err
+ }
+ return repo.loadCommitGraphChain()
+}
+
+func (repo *Repository) loadCommitGraphSingle() (*commitGraph, error) {
+ path := repo.repoPath(filepath.Join("objects", "info", "commit-graph"))
+ return repo.loadCommitGraphFile(path)
+}
+
+func (repo *Repository) loadCommitGraphChain() (*commitGraph, error) {
+ chainPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", "commit-graph-chain"))
+ f, err := os.Open(chainPath)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil, ErrNotFound
+ }
+ return nil, err
+ }
+ defer func() { _ = f.Close() }()
+
+ var hashes []string
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if line == "" {
+ continue
+ }
+ hashes = append(hashes, line)
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ if len(hashes) == 0 {
+ return nil, ErrNotFound
+ }
+
+ var chain *commitGraph
+ for i, h := range hashes {
+ graphPath := repo.repoPath(filepath.Join("objects", "info", "commit-graphs", fmt.Sprintf("graph-%s.graph", h)))
+ g, err := repo.loadCommitGraphFile(graphPath)
+ if err != nil {
+ return nil, err
+ }
+ if i > 0 {
+ if g.chunkBaseGraphs == nil {
+ return nil, ErrInvalidObject
+ }
+ if len(g.chunkBaseGraphs) < i*repo.hashAlgo.Size() {
+ return nil, ErrInvalidObject
+ }
+ for j := 0; j < i; j++ {
+ want := hashes[j]
+ start := j * repo.hashAlgo.Size()
+ end := start + repo.hashAlgo.Size()
+ got := hexEncode(g.chunkBaseGraphs[start:end])
+ if got != want {
+ return nil, ErrInvalidObject
+ }
+ }
+ }
+ if chain != nil {
+ if chain.numCommits+chain.numCommitsInBase > ^uint32(0) {
+ return nil, ErrInvalidObject
+ }
+ g.numCommitsInBase = chain.numCommits + chain.numCommitsInBase
+ }
+ g.base = chain
+ chain = g
+ }
+ validateCommitGraphChain(chain)
+ return chain, nil
+}
+
+func validateCommitGraphChain(head *commitGraph) {
+ if head == nil {
+ return
+ }
+ readGeneration := true
+ for g := head; g != nil; g = g.base {
+ readGeneration = readGeneration && g.readGeneration
+ }
+ if !readGeneration {
+ for g := head; g != nil; g = g.base {
+ g.readGeneration = false
+ }
+ }
+
+ var settings *bloom.Settings
+ for g := head; g != nil; g = g.base {
+ if g.bloomSettings == nil {
+ continue
+ }
+ if settings == nil {
+ settings = g.bloomSettings
+ continue
+ }
+ if settings.HashVersion != g.bloomSettings.HashVersion ||
+ settings.NumHashes != g.bloomSettings.NumHashes ||
+ settings.BitsPerEntry != g.bloomSettings.BitsPerEntry {
+ g.chunkBloomIndex = nil
+ g.chunkBloomData = nil
+ g.bloomSettings = nil
+ }
+ }
+}
+
+func (repo *Repository) loadCommitGraphFile(path string) (*commitGraph, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil, ErrNotFound
+ }
+ return nil, err
+ }
+ stat, err := f.Stat()
+ if err != nil {
+ _ = f.Close()
+ return nil, err
+ }
+ if stat.Size() < int64(commitGraphHeaderSize+commitGraphFanoutSize) {
+ _ = f.Close()
+ return nil, ErrInvalidObject
+ }
+ region, err := syscall.Mmap(
+ int(f.Fd()),
+ 0,
+ int(stat.Size()),
+ syscall.PROT_READ,
+ syscall.MAP_PRIVATE,
+ )
+ if err != nil {
+ _ = f.Close()
+ return nil, err
+ }
+ err = f.Close()
+ if err != nil {
+ _ = syscall.Munmap(region)
+ return nil, err
+ }
+ cg, err := parseCommitGraph(repo, path, region)
+ if err != nil {
+ _ = syscall.Munmap(region)
+ return nil, err
+ }
+ cg.data = region
+ return cg, nil
+}
+
+func parseCommitGraph(repo *Repository, filename string, data []byte) (*commitGraph, error) {
+ if len(data) < commitGraphHeaderSize {
+ return nil, ErrInvalidObject
+ }
+ sig := binary.BigEndian.Uint32(data[0:4])
+ if sig != commitGraphSignature {
+ return nil, ErrInvalidObject
+ }
+ version := data[4]
+ if version != commitGraphVersion {
+ return nil, ErrInvalidObject
+ }
+ hashVersion := data[5]
+ if hashVersion != hashAlgoVersion(repo.hashAlgo) {
+ return nil, ErrInvalidObject
+ }
+ numChunks := data[6]
+ numBase := data[7]
+
+ tocStart := commitGraphHeaderSize
+ tocLen := (int(numChunks) + 1) * commitGraphChunkEntrySize
+ if len(data) < tocStart+tocLen+repo.hashAlgo.Size() {
+ return nil, ErrInvalidObject
+ }
+
+ trailerStart := len(data) - repo.hashAlgo.Size()
+
+ type chunkEntry struct {
+ id uint32
+ offset uint64
+ }
+ entries := make([]chunkEntry, 0, numChunks+1)
+ for i := 0; i < int(numChunks)+1; i++ {
+ pos := tocStart + i*commitGraphChunkEntrySize
+ id := binary.BigEndian.Uint32(data[pos : pos+4])
+ offset := binary.BigEndian.Uint64(data[pos+4 : pos+12])
+ entries = append(entries, chunkEntry{id: id, offset: offset})
+ }
+
+ chunks := map[uint32][]byte{}
+ for i := 0; i < len(entries)-1; i++ {
+ id := entries[i].id
+ if id == 0 {
+ break
+ }
+ start := int(entries[i].offset)
+ end := int(entries[i+1].offset)
+ if start < tocStart+tocLen || end < start || end > trailerStart {
+ return nil, ErrInvalidObject
+ }
+ chunks[id] = data[start:end]
+ }
+
+ cg := &commitGraph{
+ repo: repo,
+ filename: filename,
+ dataLen: len(data),
+ hashAlgo: repo.hashAlgo,
+ numChunks: numChunks,
+ numBase: numBase,
+ }
+
+ oidf := chunks[commitGraphChunkOIDF]
+ if len(oidf) != commitGraphFanoutSize {
+ return nil, ErrInvalidObject
+ }
+ cg.chunkOIDFanout = oidf
+ cg.numCommits = binary.BigEndian.Uint32(oidf[len(oidf)-4:])
+ for i := 0; i < 255; i++ {
+ if binary.BigEndian.Uint32(oidf[i*4:(i+1)*4]) >
+ binary.BigEndian.Uint32(oidf[(i+1)*4:(i+2)*4]) {
+ return nil, ErrInvalidObject
+ }
+ }
+
+ oidl := chunks[commitGraphChunkOIDL]
+ if len(oidl) != int(cg.numCommits)*repo.hashAlgo.Size() {
+ return nil, ErrInvalidObject
+ }
+ cg.chunkOIDLookup = oidl
+
+ cdat := chunks[commitGraphChunkCDAT]
+ if len(cdat) != int(cg.numCommits)*commitGraphCommitDataSize(repo.hashAlgo) {
+ return nil, ErrInvalidObject
+ }
+ cg.chunkCommit = cdat
+
+ if gda2 := chunks[commitGraphChunkGDA2]; len(gda2) != 0 {
+ if len(gda2) != int(cg.numCommits)*4 {
+ return nil, ErrInvalidObject
+ }
+ cg.chunkGeneration = gda2
+ cg.readGeneration = true
+ }
+ if gdo2 := chunks[commitGraphChunkGDO2]; len(gdo2) != 0 {
+ cg.chunkGenerationOv = gdo2
+ }
+
+ if edge := chunks[commitGraphChunkEDGE]; len(edge) != 0 {
+ if len(edge)%4 != 0 {
+ return nil, ErrInvalidObject
+ }
+ cg.chunkExtraEdges = edge
+ }
+
+ if base := chunks[commitGraphChunkBASE]; len(base) != 0 {
+ if numBase == 0 {
+ return nil, ErrInvalidObject
+ }
+ if len(base) != int(numBase)*repo.hashAlgo.Size() {
+ return nil, ErrInvalidObject
+ }
+ cg.chunkBaseGraphs = base
+ } else if numBase != 0 {
+ return nil, ErrInvalidObject
+ }
+
+ if bidx := chunks[commitGraphChunkBIDX]; len(bidx) != 0 {
+ if len(bidx) != int(cg.numCommits)*4 {
+ return nil, ErrInvalidObject
+ }
+ cg.chunkBloomIndex = bidx
+ }
+ if bdat := chunks[commitGraphChunkBDAT]; len(bdat) != 0 {
+ if len(bdat) < bloom.DataHeaderSize {
+ return nil, ErrInvalidObject
+ }
+ cg.chunkBloomData = bdat
+ settings, err := bloom.ParseSettings(bdat)
+ if err != nil {
+ return nil, err
+ }
+ cg.bloomSettings = settings
+ }
+
+ if (cg.chunkBloomIndex != nil) != (cg.chunkBloomData != nil) {
+ cg.chunkBloomIndex = nil
+ cg.chunkBloomData = nil
+ cg.bloomSettings = nil
+ }
+ if cg.chunkBloomIndex != nil && cg.chunkBloomData != nil {
+ for i := 0; i < int(cg.numCommits); i++ {
+ cur := binary.BigEndian.Uint32(cg.chunkBloomIndex[i*4 : i*4+4])
+ if i > 0 {
+ prev := binary.BigEndian.Uint32(cg.chunkBloomIndex[(i-1)*4 : i*4])
+ if cur < prev {
+ return nil, ErrInvalidObject
+ }
+ }
+ if int(cur) > len(cg.chunkBloomData)-bloom.DataHeaderSize {
+ return nil, ErrInvalidObject
+ }
+ }
+ }
+
+ if err := verifyCommitGraphTrailer(repo, data); err != nil {
+ return nil, err
+ }
+
+ return cg, nil
+}
+
+func commitGraphCommitDataSize(algo hashAlgorithm) int {
+ return algo.Size() + 16
+}
+
+func hashAlgoVersion(algo hashAlgorithm) byte {
+ switch algo {
+ case hashAlgoSHA1:
+ return 1
+ case hashAlgoSHA256:
+ return 2
+ default:
+ return 0
+ }
+}
+
+func verifyCommitGraphTrailer(repo *Repository, data []byte) error {
+ if repo == nil {
+ return ErrInvalidObject
+ }
+ hashSize := repo.hashAlgo.Size()
+ if len(data) < hashSize {
+ return ErrInvalidObject
+ }
+ want := data[len(data)-hashSize:]
+ h, err := repo.hashAlgo.New()
+ if err != nil {
+ return err
+ }
+ _, _ = h.Write(data[:len(data)-hashSize])
+ got := h.Sum(nil)
+ if !bytes.Equal(got, want) {
+ return ErrInvalidObject
+ }
+ return nil
+}
+
+func hexEncode(b []byte) string {
+ const hex = "0123456789abcdef"
+ out := make([]byte, len(b)*2)
+ for i, v := range b {
+ out[i*2] = hex[v>>4]
+ out[i*2+1] = hex[v&0x0f]
+ }
+ return string(out)
+}
+
+func (cg *commitGraph) lookupOID(id Hash) (uint32, bool) {
+ if cg == nil {
+ return 0, false
+ }
+ return commitGraphBsearchOID(cg, id)
+}
+
+func commitGraphBsearchOID(cg *commitGraph, id Hash) (uint32, bool) {
+ if cg == nil || len(cg.chunkOIDLookup) == 0 {
+ return 0, false
+ }
+ hashSize := cg.hashAlgo.Size()
+ first := int(id.data[0])
+ fanout := cg.chunkOIDFanout
+ var start uint32
+ if first > 0 {
+ start = binary.BigEndian.Uint32(fanout[(first-1)*4 : first*4])
+ }
+ end := binary.BigEndian.Uint32(fanout[first*4 : (first+1)*4])
+ lo := int(start)
+ hi := int(end) - 1
+ for lo <= hi {
+ mid := lo + (hi-lo)/2
+ pos := mid * hashSize
+ cmp := bytes.Compare(cg.chunkOIDLookup[pos:pos+hashSize], id.data[:hashSize])
+ if cmp == 0 {
+ return uint32(mid) + cg.numCommitsInBase, true
+ }
+ if cmp < 0 {
+ lo = mid + 1
+ } else {
+ hi = mid - 1
+ }
+ }
+ if cg.base != nil {
+ return commitGraphBsearchOID(cg.base, id)
+ }
+ return 0, false
+}
+
+func (cg *commitGraph) loadOID(pos uint32) (Hash, error) {
+ g := cg
+ for g != nil && pos < g.numCommitsInBase {
+ g = g.base
+ }
+ if g == nil {
+ return Hash{}, ErrInvalidObject
+ }
+ if pos >= g.numCommits+g.numCommitsInBase {
+ return Hash{}, ErrInvalidObject
+ }
+ lex := pos - g.numCommitsInBase
+ hashSize := g.hashAlgo.Size()
+ start := int(lex) * hashSize
+ end := start + hashSize
+ var out Hash
+ copy(out.data[:], g.chunkOIDLookup[start:end])
+ out.algo = g.hashAlgo
+ return out, nil
+}
+
+func (cg *commitGraph) commitDataAt(pos uint32) (*commitGraphCommit, error) {
+ g := cg
+ for g != nil && pos < g.numCommitsInBase {
+ g = g.base
+ }
+ if g == nil {
+ return nil, ErrInvalidObject
+ }
+ if pos >= g.numCommits+g.numCommitsInBase {
+ return nil, ErrInvalidObject
+ }
+ lex := pos - g.numCommitsInBase
+ stride := commitGraphCommitDataSize(g.hashAlgo)
+ start := int(lex) * stride
+ end := start + stride
+ if end > len(g.chunkCommit) {
+ return nil, ErrInvalidObject
+ }
+ buf := g.chunkCommit[start:end]
+
+ var tree Hash
+ hashSize := g.hashAlgo.Size()
+ copy(tree.data[:], buf[:hashSize])
+ tree.algo = g.hashAlgo
+
+ p1 := binary.BigEndian.Uint32(buf[hashSize : hashSize+4])
+ p2 := binary.BigEndian.Uint32(buf[hashSize+4 : hashSize+8])
+
+ genTime := binary.BigEndian.Uint32(buf[hashSize+8 : hashSize+12])
+ timeLow := binary.BigEndian.Uint32(buf[hashSize+12 : hashSize+16])
+
+ timeHigh := genTime & 0x3
+ commitTime := (uint64(timeHigh) << 32) | uint64(timeLow)
+ generation := uint64(genTime >> 2)
+
+ if g.readGeneration {
+ genOffset := binary.BigEndian.Uint32(g.chunkGeneration[int(lex)*4:])
+ if genOffset&correctedGenerationOverflow != 0 {
+ off := genOffset ^ correctedGenerationOverflow
+ idx := int(off) * 8
+ if g.chunkGenerationOv == nil || idx+8 > len(g.chunkGenerationOv) {
+ return nil, ErrInvalidObject
+ }
+ offset := binary.BigEndian.Uint64(g.chunkGenerationOv[idx : idx+8])
+ generation = commitTime + offset
+ } else {
+ generation = commitTime + uint64(genOffset)
+ }
+ }
+
+ var parents []uint32
+ var err error
+ parents, err = commitGraphAppendParent(parents, g, p1)
+ if err != nil {
+ return nil, err
+ }
+ parents, err = commitGraphAppendParent(parents, g, p2)
+ if err != nil {
+ return nil, err
+ }
+
+ return &commitGraphCommit{
+ Position: pos,
+ Tree: tree,
+ Parents: parents,
+ CommitTime: commitTime,
+ Generation: generation,
+ Graph: g,
+ graphLexPos: lex,
+ }, nil
+}