diff options
| -rw-r--r-- | reachability.go | 385 | ||||
| -rw-r--r-- | reachability_test.go | 196 |
2 files changed, 581 insertions, 0 deletions
diff --git a/reachability.go b/reachability.go new file mode 100644 index 00000000..d27095c4 --- /dev/null +++ b/reachability.go @@ -0,0 +1,385 @@ +package furgit + +import ( + "fmt" + "iter" +) + +// ReachabilityMode controls which object types are walked. +type ReachabilityMode uint8 + +const ( + // ReachabilityCommitsOnly walks only commit objects. + ReachabilityCommitsOnly ReachabilityMode = iota + // ReachabilityAllObjects walks commits, trees, blobs, and tags reachable + // from the commits in Wants. + ReachabilityAllObjects +) + +// ReachabilityQuery describes a want/have reachability walk. +// +// ReachableObjects returns objects reachable from Wants. If Mode is +// ReachabilityCommitsOnly, non-commit Wants are ignored except for tags, +// which are peeled to their target. +type ReachabilityQuery struct { + Wants []Hash + Haves []Hash + Mode ReachabilityMode + + // StopAtHaves prunes traversal when an object is reachable from Haves. + StopAtHaves bool +} + +// ReachableObject reports a reachable object and whether it is also reachable +// from the Have set. +type ReachableObject struct { + ID Hash + Type ObjectType + InHave bool +} + +// ReachabilityWalk is a single-use reachability iterator. +// After iterating, Err reports any error encountered during the walk. +type ReachabilityWalk struct { + repo *Repository + query ReachabilityQuery + err error + + haveInit bool + haveErr error + haveSet map[Hash]struct{} +} + +// ReachableObjects returns a single-use iterator over objects reachable from +// query.Wants. +// +// It yields ReachableObject values; InHave is true when the object is also +// reachable from query.Haves. +func (repo *Repository) ReachableObjects(query ReachabilityQuery) (*ReachabilityWalk, error) { + if repo == nil { + return nil, ErrInvalidObject + } + switch query.Mode { + case ReachabilityCommitsOnly, ReachabilityAllObjects: + default: + return nil, ErrInvalidObject + } + for _, id := range query.Wants { + if id.algo != repo.hashAlgo { + return nil, fmt.Errorf("furgit: reachability: want hash algorithm mismatch") + } + } + for _, id := range query.Haves { + if id.algo != repo.hashAlgo { + return nil, fmt.Errorf("furgit: reachability: have hash algorithm mismatch") + } + } + return &ReachabilityWalk{ + repo: repo, + query: query, + }, nil +} + +// Seq returns the iterator. +func (w *ReachabilityWalk) Seq() iter.Seq[ReachableObject] { + return func(yield func(ReachableObject) bool) { + if w == nil || w.repo == nil { + w.err = ErrInvalidObject + return + } + haveSet, err := w.ensureHaveSet() + if err != nil { + w.err = err + return + } + + wantWalk := reachabilityWalker{ + repo: w.repo, + mode: w.query.Mode, + seenCommits: make(map[Hash]struct{}), + seenObjects: make(map[Hash]struct{}), + haveSet: haveSet, + stopAtHaves: w.query.StopAtHaves, + } + if err := wantWalk.walkRoots(w.query.Wants, func(obj ReachableObject) bool { + return yield(obj) + }); err != nil { + w.err = err + return + } + } +} + +// Err reports the first error encountered by the iterator. +func (w *ReachabilityWalk) Err() error { + if w == nil { + return ErrInvalidObject + } + return w.err +} + +// HaveContains reports whether id is reachable from Haves. +func (w *ReachabilityWalk) HaveContains(id Hash) (bool, error) { + if w == nil || w.repo == nil { + return false, ErrInvalidObject + } + haveSet, err := w.ensureHaveSet() + if err != nil { + return false, err + } + _, ok := haveSet[id] + return ok, nil +} + +func (w *ReachabilityWalk) ensureHaveSet() (map[Hash]struct{}, error) { + if w.haveInit { + return w.haveSet, w.haveErr + } + w.haveInit = true + w.haveSet = make(map[Hash]struct{}) + if len(w.query.Haves) == 0 { + return w.haveSet, nil + } + haveWalk := reachabilityWalker{ + repo: w.repo, + mode: w.query.Mode, + seenCommits: make(map[Hash]struct{}), + seenObjects: make(map[Hash]struct{}), + recordHaveOnly: true, + haveSet: w.haveSet, + } + if err := haveWalk.walkRoots(w.query.Haves, nil); err != nil { + w.haveErr = err + return nil, err + } + return w.haveSet, nil +} + +type reachabilityWalker struct { + repo *Repository + mode ReachabilityMode + + seenCommits map[Hash]struct{} + seenObjects map[Hash]struct{} + + haveSet map[Hash]struct{} + recordHaveOnly bool + stopAtHaves bool + + cg *commitGraph + cgInit bool +} + +func (rw *reachabilityWalker) initCommitGraph() { + if rw.cgInit { + return + } + rw.cgInit = true + cg, err := rw.repo.CommitGraph() + if err == nil { + rw.cg = cg + } +} + +func (rw *reachabilityWalker) walkRoots(roots []Hash, emit func(ReachableObject) bool) error { + for _, id := range roots { + if err := rw.walkObject(id, emit); err != nil { + return err + } + } + return nil +} + +func (rw *reachabilityWalker) walkObject(id Hash, emit func(ReachableObject) bool) error { + if rw.stopAtHaves { + if _, ok := rw.haveSet[id]; ok { + return nil + } + } + if rw.recordHaveOnly { + if _, ok := rw.haveSet[id]; ok { + return nil + } + } else { + if _, ok := rw.seenObjects[id]; ok { + return nil + } + } + + rw.initCommitGraph() + if rw.cg != nil { + if pos, ok := rw.cg.CommitPosition(id); ok { + return rw.walkCommitByPos(pos, id, emit) + } + } + + ty, body, err := rw.repo.ReadObjectTypeRaw(id) + if err != nil { + return err + } + + switch ty { + case ObjectTypeCommit: + return rw.walkCommitBody(id, body, emit) + case ObjectTypeTree: + if rw.mode != ReachabilityAllObjects { + return nil + } + return rw.walkTreeBody(id, body, emit) + case ObjectTypeBlob: + if rw.mode != ReachabilityAllObjects { + return nil + } + return rw.emitObject(id, ObjectTypeBlob, emit) + case ObjectTypeTag: + return rw.walkTagBody(id, body, emit) + default: + return ErrInvalidObject + } +} + +func (rw *reachabilityWalker) walkCommitByPos(pos uint32, id Hash, emit func(ReachableObject) bool) error { + if _, ok := rw.seenCommits[id]; ok { + return nil + } + rw.seenCommits[id] = struct{}{} + + cc, err := rw.cg.CommitAt(pos) + if err != nil { + return err + } + + if err := rw.emitObject(id, ObjectTypeCommit, emit); err != nil { + return err + } + + if rw.mode == ReachabilityAllObjects { + if err := rw.walkTreeByID(cc.Tree, emit); err != nil { + return err + } + } + + for _, parentPos := range cc.Parents { + parentID, err := rw.cg.OIDAt(parentPos) + if err != nil { + return err + } + if err := rw.walkObject(parentID, emit); err != nil { + return err + } + } + return nil +} + +func (rw *reachabilityWalker) walkCommitBody(id Hash, body []byte, emit func(ReachableObject) bool) error { + if _, ok := rw.seenCommits[id]; ok { + return nil + } + rw.seenCommits[id] = struct{}{} + + commit, err := parseCommit(id, body, rw.repo) + if err != nil { + return err + } + if err := rw.emitObject(id, ObjectTypeCommit, emit); err != nil { + return err + } + if rw.mode == ReachabilityAllObjects { + if err := rw.walkTreeByID(commit.Tree, emit); err != nil { + return err + } + } + for _, parent := range commit.Parents { + if err := rw.walkObject(parent, emit); err != nil { + return err + } + } + return nil +} + +func (rw *reachabilityWalker) walkTagBody(id Hash, body []byte, emit func(ReachableObject) bool) error { + tag, err := parseTag(id, body, rw.repo) + if err != nil { + return err + } + if rw.mode == ReachabilityAllObjects { + if err := rw.emitObject(id, ObjectTypeTag, emit); err != nil { + return err + } + } + if tag.TargetType == ObjectTypeCommit { + return rw.walkObject(tag.Target, emit) + } + if rw.mode == ReachabilityAllObjects { + return rw.walkObject(tag.Target, emit) + } + return nil +} + +func (rw *reachabilityWalker) walkTreeByID(id Hash, emit func(ReachableObject) bool) error { + if rw.mode != ReachabilityAllObjects { + return nil + } + if _, ok := rw.seenObjects[id]; ok && !rw.recordHaveOnly { + return nil + } + ty, body, err := rw.repo.ReadObjectTypeRaw(id) + if err != nil { + return err + } + if ty != ObjectTypeTree { + return ErrInvalidObject + } + return rw.walkTreeBody(id, body, emit) +} + +func (rw *reachabilityWalker) walkTreeBody(id Hash, body []byte, emit func(ReachableObject) bool) error { + if rw.mode != ReachabilityAllObjects { + return nil + } + tree, err := parseTree(id, body, rw.repo) + if err != nil { + return err + } + if err := rw.emitObject(id, ObjectTypeTree, emit); err != nil { + return err + } + for _, entry := range tree.Entries { + switch entry.Mode { + case FileModeDir: + if err := rw.walkTreeByID(entry.ID, emit); err != nil { + return err + } + case FileModeGitlink: + // IIRC Gitlinks are references to external repositories + // and do not imply reachability of the target commit... + continue + default: + if err := rw.emitObject(entry.ID, ObjectTypeBlob, emit); err != nil { + return err + } + } + } + return nil +} + +func (rw *reachabilityWalker) emitObject(id Hash, ty ObjectType, emit func(ReachableObject) bool) error { + if rw.recordHaveOnly { + rw.haveSet[id] = struct{}{} + return nil + } + if _, ok := rw.seenObjects[id]; ok { + return nil + } + rw.seenObjects[id] = struct{}{} + inHave := false + if _, ok := rw.haveSet[id]; ok { + inHave = true + } + if emit != nil { + if !emit(ReachableObject{ID: id, Type: ty, InHave: inHave}) { + return nil + } + } + return nil +} diff --git a/reachability_test.go b/reachability_test.go new file mode 100644 index 00000000..2a2d5060 --- /dev/null +++ b/reachability_test.go @@ -0,0 +1,196 @@ +package furgit + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestReachabilityCommitsWantHave(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + var commits []string + for i := 0; i < 3; i++ { + path := filepath.Join(workDir, "file.txt") + if err := os.WriteFile(path, []byte{byte('a' + i), '\n'}, 0o644); err != nil { + t.Fatalf("write file: %v", err) + } + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "commit") + commits = append(commits, gitCmd(t, repoPath, "rev-parse", "HEAD")) + } + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + wantID, _ := repo.ParseHash(commits[2]) + haveID, _ := repo.ParseHash(commits[1]) + walk, err := repo.ReachableObjects(ReachabilityQuery{ + Wants: []Hash{wantID}, + Haves: []Hash{haveID}, + Mode: ReachabilityCommitsOnly, + }) + if err != nil { + t.Fatalf("ReachableObjects failed: %v", err) + } + + seen := make(map[Hash]ReachableObject) + for obj := range walk.Seq() { + seen[obj.ID] = obj + if obj.Type != ObjectTypeCommit { + t.Fatalf("unexpected object type: %v", obj.Type) + } + } + if err := walk.Err(); err != nil { + t.Fatalf("Reachability walk error: %v", err) + } + + headID := wantID + parentID, _ := repo.ParseHash(commits[1]) + rootID, _ := repo.ParseHash(commits[0]) + if _, ok := seen[headID]; !ok { + t.Fatalf("missing head commit") + } + if _, ok := seen[parentID]; !ok { + t.Fatalf("missing parent commit") + } + if _, ok := seen[rootID]; !ok { + t.Fatalf("missing root commit") + } + if seen[headID].InHave { + t.Fatalf("head commit incorrectly marked InHave") + } + if !seen[parentID].InHave || !seen[rootID].InHave { + t.Fatalf("expected parent and root commits to be InHave") + } + + inHave, err := walk.HaveContains(parentID) + if err != nil { + t.Fatalf("HaveContains failed: %v", err) + } + if !inHave { + t.Fatalf("expected parent to be reachable from have") + } +} + +func TestReachabilityAllObjects(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + if err := os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("one\n"), 0o644); err != nil { + t.Fatalf("write file1: %v", err) + } + if err := os.Mkdir(filepath.Join(workDir, "dir"), 0o755); err != nil { + t.Fatalf("mkdir dir: %v", err) + } + if err := os.WriteFile(filepath.Join(workDir, "dir", "file2.txt"), []byte("two\n"), 0o644); err != nil { + t.Fatalf("write file2: %v", err) + } + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "commit") + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + head := gitCmd(t, repoPath, "rev-parse", "HEAD") + wantID, _ := repo.ParseHash(head) + walk, err := repo.ReachableObjects(ReachabilityQuery{ + Wants: []Hash{wantID}, + Mode: ReachabilityAllObjects, + }) + if err != nil { + t.Fatalf("ReachableObjects failed: %v", err) + } + + seen := make(map[Hash]ObjectType) + for obj := range walk.Seq() { + seen[obj.ID] = obj.Type + } + if err := walk.Err(); err != nil { + t.Fatalf("Reachability walk error: %v", err) + } + + treeStr := gitCmd(t, repoPath, "show", "-s", "--format=%T", head) + treeID, _ := repo.ParseHash(treeStr) + lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeStr) + fields := strings.Fields(lsTree) + if len(fields) < 3 { + t.Fatalf("unexpected ls-tree output: %q", lsTree) + } + blobID, _ := repo.ParseHash(fields[2]) + + if seen[wantID] != ObjectTypeCommit { + t.Fatalf("missing commit in reachability walk") + } + if seen[treeID] != ObjectTypeTree { + t.Fatalf("missing tree in reachability walk") + } + if seen[blobID] != ObjectTypeBlob { + t.Fatalf("missing blob in reachability walk") + } +} + +func TestReachabilityStopAtHaves(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + var commits []string + for i := 0; i < 3; i++ { + path := filepath.Join(workDir, "file.txt") + if err := os.WriteFile(path, []byte{byte('a' + i), '\n'}, 0o644); err != nil { + t.Fatalf("write file: %v", err) + } + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "commit") + commits = append(commits, gitCmd(t, repoPath, "rev-parse", "HEAD")) + } + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + wantID, _ := repo.ParseHash(commits[2]) + haveID, _ := repo.ParseHash(commits[1]) + walk, err := repo.ReachableObjects(ReachabilityQuery{ + Wants: []Hash{wantID}, + Haves: []Hash{haveID}, + Mode: ReachabilityCommitsOnly, + StopAtHaves: true, + }) + if err != nil { + t.Fatalf("ReachableObjects failed: %v", err) + } + + var got []Hash + for obj := range walk.Seq() { + got = append(got, obj.ID) + if obj.InHave { + t.Fatalf("unexpected InHave object in send set") + } + } + if err := walk.Err(); err != nil { + t.Fatalf("Reachability walk error: %v", err) + } + if len(got) != 1 || got[0] != wantID { + t.Fatalf("StopAtHaves mismatch: got %d objects", len(got)) + } +} |
