From 70916ec7713442a1f9b80f394b980ac2ab5a92df Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Sat, 21 Feb 2026 13:29:38 +0800 Subject: diff/trees: Add tree-diffing routines --- diff/trees/diff.go | 155 ++++++++++++++++++++++++++++++ diff/trees/diff_test.go | 244 ++++++++++++++++++++++++++++++++++++++++++++++++ diff/trees/entry.go | 15 +++ diff/trees/kind.go | 15 +++ diff/trees/path.go | 14 +++ 5 files changed, 443 insertions(+) create mode 100644 diff/trees/diff.go create mode 100644 diff/trees/diff_test.go create mode 100644 diff/trees/entry.go create mode 100644 diff/trees/kind.go create mode 100644 diff/trees/path.go (limited to 'diff/trees') diff --git a/diff/trees/diff.go b/diff/trees/diff.go new file mode 100644 index 00000000..836b71cc --- /dev/null +++ b/diff/trees/diff.go @@ -0,0 +1,155 @@ +// Package trees provides recursive diffs between Git tree objects. +package trees + +import ( + "codeberg.org/lindenii/furgit/object" + "codeberg.org/lindenii/furgit/objectid" +) + +// Diff compares two trees and returns recursive differences. +// +// readTree is used to lazily load child trees by object ID when recursion +// reaches directory entries. +func Diff(a, b *object.Tree, readTree func(objectid.ObjectID) (*object.Tree, error)) ([]Entry, error) { + var out []Entry + if err := diffRecursive(a, b, nil, readTree, &out); err != nil { + return nil, err + } + return out, nil +} + +func diffRecursive(a, b *object.Tree, prefix []byte, readTree func(objectid.ObjectID) (*object.Tree, error), out *[]Entry) error { + if a == nil && b == nil { + return nil + } + + if a == nil { + for i := range b.Entries { + entry := &b.Entries[i] + full := joinPath(prefix, entry.Name) + *out = append(*out, Entry{Path: full, Kind: EntryKindAdded, Old: nil, New: entry}) + if entry.Mode == object.FileModeDir { + sub, err := readTree(entry.ID) + if err != nil { + return err + } + if err := diffRecursive(nil, sub, full, readTree, out); err != nil { + return err + } + } + } + return nil + } + + if b == nil { + for i := range a.Entries { + entry := &a.Entries[i] + full := joinPath(prefix, entry.Name) + *out = append(*out, Entry{Path: full, Kind: EntryKindDeleted, Old: entry, New: nil}) + if entry.Mode == object.FileModeDir { + sub, err := readTree(entry.ID) + if err != nil { + return err + } + if err := diffRecursive(sub, nil, full, readTree, out); err != nil { + return err + } + } + } + return nil + } + + i := 0 + j := 0 + for i < len(a.Entries) && j < len(b.Entries) { + left := &a.Entries[i] + right := &b.Entries[j] + cmp := object.TreeEntryNameCompare( + left.Name, + left.Mode, + right.Name, + right.Mode == object.FileModeDir, + ) + switch { + case cmp < 0: + full := joinPath(prefix, left.Name) + *out = append(*out, Entry{Path: full, Kind: EntryKindDeleted, Old: left, New: nil}) + if left.Mode == object.FileModeDir { + sub, err := readTree(left.ID) + if err != nil { + return err + } + if err := diffRecursive(sub, nil, full, readTree, out); err != nil { + return err + } + } + i++ + case cmp > 0: + full := joinPath(prefix, right.Name) + *out = append(*out, Entry{Path: full, Kind: EntryKindAdded, Old: nil, New: right}) + if right.Mode == object.FileModeDir { + sub, err := readTree(right.ID) + if err != nil { + return err + } + if err := diffRecursive(nil, sub, full, readTree, out); err != nil { + return err + } + } + j++ + default: + full := joinPath(prefix, left.Name) + modified := left.Mode != right.Mode || left.ID != right.ID + if modified { + *out = append(*out, Entry{Path: full, Kind: EntryKindModified, Old: left, New: right}) + } + if left.Mode == object.FileModeDir && right.Mode == object.FileModeDir && left.ID != right.ID { + leftSub, err := readTree(left.ID) + if err != nil { + return err + } + rightSub, err := readTree(right.ID) + if err != nil { + return err + } + if err := diffRecursive(leftSub, rightSub, full, readTree, out); err != nil { + return err + } + } + i++ + j++ + } + } + + for ; i < len(a.Entries); i++ { + left := &a.Entries[i] + full := joinPath(prefix, left.Name) + *out = append(*out, Entry{Path: full, Kind: EntryKindDeleted, Old: left, New: nil}) + if left.Mode == object.FileModeDir { + sub, err := readTree(left.ID) + if err != nil { + return err + } + if err := diffRecursive(sub, nil, full, readTree, out); err != nil { + return err + } + } + } + + for ; j < len(b.Entries); j++ { + right := &b.Entries[j] + full := joinPath(prefix, right.Name) + *out = append(*out, Entry{Path: full, Kind: EntryKindAdded, Old: nil, New: right}) + if right.Mode == object.FileModeDir { + sub, err := readTree(right.ID) + if err != nil { + return err + } + if err := diffRecursive(nil, sub, full, readTree, out); err != nil { + return err + } + } + } + + return nil +} diff --git a/diff/trees/diff_test.go b/diff/trees/diff_test.go new file mode 100644 index 00000000..a23fbf1b --- /dev/null +++ b/diff/trees/diff_test.go @@ -0,0 +1,244 @@ +package trees_test + +import ( + "errors" + "os" + "path/filepath" + "testing" + + "codeberg.org/lindenii/furgit/diff/trees" + "codeberg.org/lindenii/furgit/internal/testgit" + "codeberg.org/lindenii/furgit/object" + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/objectstore/loose" + "codeberg.org/lindenii/furgit/objecttype" +) + +func TestDiffComplexNestedChanges(t *testing.T) { + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { + repo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: false}) + + writeTestFile(t, filepath.Join(repo.Dir(), "README.md"), "initial readme\n") + writeTestFile(t, filepath.Join(repo.Dir(), "unchanged.txt"), "leave me as-is\n") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "file_a.txt"), "alpha v1\n") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "file_b.txt"), "beta v1\n") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "file_c.txt"), "gamma v1\n") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "old.txt"), "old branch\n") + writeTestFile(t, filepath.Join(repo.Dir(), "treeB", "legacy.txt"), "legacy root\n") + writeTestFile(t, filepath.Join(repo.Dir(), "treeB", "sub", "retired.txt"), "retired\n") + + repo.Run(t, "add", ".") + baseTreeID := parseID(t, algo, repo.Run(t, "write-tree")) + + writeTestFile(t, filepath.Join(repo.Dir(), "README.md"), "updated readme\n") + repo.Run(t, "rm", "-f", "dir/file_a.txt") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "file_b.txt"), "beta v2\n") + repo.Run(t, "rm", "-f", "dir/nested/deeper/old.txt") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "new.txt"), "new branch entry\n") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "branch", "info.md"), "branch info\n") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "branch", "subbranch", "leaf.txt"), "leaf data\n") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "nested", "deeper", "branch", "subbranch", "deep", "final.txt"), "final artifact\n") + writeTestFile(t, filepath.Join(repo.Dir(), "dir", "newchild.txt"), "brand new sibling\n") + repo.Run(t, "rm", "-r", "-f", "treeB") + writeTestFile(t, filepath.Join(repo.Dir(), "features", "alpha", "README.md"), "alpha docs\n") + writeTestFile(t, filepath.Join(repo.Dir(), "features", "alpha", "beta", "gamma.txt"), "gamma payload\n") + writeTestFile(t, filepath.Join(repo.Dir(), "modules", "v2", "core", "main.go"), "package core\n") + writeTestFile(t, filepath.Join(repo.Dir(), "root_addition.txt"), "root level file\n") + + repo.Run(t, "add", ".") + updatedTreeID := parseID(t, algo, repo.Run(t, "write-tree")) + + store := openLooseStore(t, filepath.Join(repo.Dir(), ".git", "objects"), algo) + readTree := makeReadTree(t, store, algo) + baseTree := mustReadTree(t, readTree, baseTreeID) + updatedTree := mustReadTree(t, readTree, updatedTreeID) + + diffs, err := trees.Diff(baseTree, updatedTree, readTree) + if err != nil { + t.Fatalf("trees.Diff: %v", err) + } + + expected := map[string]diffExpectation{ + "README.md": {kind: trees.EntryKindModified}, + "dir": {kind: trees.EntryKindModified}, + "dir/file_a.txt": {kind: trees.EntryKindDeleted, newNil: true}, + "dir/newchild.txt": {kind: trees.EntryKindAdded, oldNil: true}, + "dir/nested": {kind: trees.EntryKindModified}, + "dir/nested/file_b.txt": {kind: trees.EntryKindModified}, + "dir/nested/deeper": {kind: trees.EntryKindModified}, + "dir/nested/deeper/old.txt": {kind: trees.EntryKindDeleted, newNil: true}, + "dir/nested/deeper/new.txt": {kind: trees.EntryKindAdded, oldNil: true}, + "dir/nested/deeper/branch": {kind: trees.EntryKindAdded, oldNil: true}, + "dir/nested/deeper/branch/info.md": {kind: trees.EntryKindAdded, oldNil: true}, + "dir/nested/deeper/branch/subbranch": {kind: trees.EntryKindAdded, oldNil: true}, + "dir/nested/deeper/branch/subbranch/leaf.txt": {kind: trees.EntryKindAdded, oldNil: true}, + "dir/nested/deeper/branch/subbranch/deep": {kind: trees.EntryKindAdded, oldNil: true}, + "dir/nested/deeper/branch/subbranch/deep/final.txt": { + kind: trees.EntryKindAdded, + oldNil: true, + }, + "features": {kind: trees.EntryKindAdded, oldNil: true}, + "features/alpha": {kind: trees.EntryKindAdded, oldNil: true}, + "features/alpha/README.md": {kind: trees.EntryKindAdded, oldNil: true}, + "features/alpha/beta": {kind: trees.EntryKindAdded, oldNil: true}, + "features/alpha/beta/gamma.txt": {kind: trees.EntryKindAdded, oldNil: true}, + "modules": {kind: trees.EntryKindAdded, oldNil: true}, + "modules/v2": {kind: trees.EntryKindAdded, oldNil: true}, + "modules/v2/core": {kind: trees.EntryKindAdded, oldNil: true}, + "modules/v2/core/main.go": {kind: trees.EntryKindAdded, oldNil: true}, + "root_addition.txt": {kind: trees.EntryKindAdded, oldNil: true}, + "treeB": {kind: trees.EntryKindDeleted, newNil: true}, + "treeB/legacy.txt": {kind: trees.EntryKindDeleted, newNil: true}, + "treeB/sub": {kind: trees.EntryKindDeleted, newNil: true}, + "treeB/sub/retired.txt": {kind: trees.EntryKindDeleted, newNil: true}, + } + + checkDiffs(t, diffs, expected) + }) +} + +func TestDiffDirectoryAddDeleteDeep(t *testing.T) { + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { + repo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: false}) + + writeTestFile(t, filepath.Join(repo.Dir(), "old_dir", "old.txt"), "stale directory\n") + writeTestFile(t, filepath.Join(repo.Dir(), "old_dir", "sub1", "legacy.txt"), "legacy path\n") + writeTestFile(t, filepath.Join(repo.Dir(), "old_dir", "sub1", "nested", "end.txt"), "legacy end\n") + + repo.Run(t, "add", ".") + originalTreeID := parseID(t, algo, repo.Run(t, "write-tree")) + + repo.Run(t, "rm", "-r", "-f", "old_dir") + writeTestFile(t, filepath.Join(repo.Dir(), "fresh", "alpha", "beta", "new.txt"), "brand new directory\n") + writeTestFile(t, filepath.Join(repo.Dir(), "fresh", "alpha", "docs", "note.md"), "docs note\n") + writeTestFile(t, filepath.Join(repo.Dir(), "fresh", "alpha", "beta", "gamma", "delta.txt"), "delta payload\n") + + repo.Run(t, "add", ".") + nextTreeID := parseID(t, algo, repo.Run(t, "write-tree")) + + store := openLooseStore(t, filepath.Join(repo.Dir(), ".git", "objects"), algo) + readTree := makeReadTree(t, store, algo) + originalTree := mustReadTree(t, readTree, originalTreeID) + nextTree := mustReadTree(t, readTree, nextTreeID) + + diffs, err := trees.Diff(originalTree, nextTree, readTree) + if err != nil { + t.Fatalf("trees.Diff: %v", err) + } + + expected := map[string]diffExpectation{ + "fresh": {kind: trees.EntryKindAdded, oldNil: true}, + "fresh/alpha": {kind: trees.EntryKindAdded, oldNil: true}, + "fresh/alpha/beta": {kind: trees.EntryKindAdded, oldNil: true}, + "fresh/alpha/beta/new.txt": {kind: trees.EntryKindAdded, oldNil: true}, + "fresh/alpha/beta/gamma": {kind: trees.EntryKindAdded, oldNil: true}, + "fresh/alpha/beta/gamma/delta.txt": {kind: trees.EntryKindAdded, oldNil: true}, + "fresh/alpha/docs": {kind: trees.EntryKindAdded, oldNil: true}, + "fresh/alpha/docs/note.md": {kind: trees.EntryKindAdded, oldNil: true}, + "old_dir": {kind: trees.EntryKindDeleted, newNil: true}, + "old_dir/old.txt": {kind: trees.EntryKindDeleted, newNil: true}, + "old_dir/sub1": {kind: trees.EntryKindDeleted, newNil: true}, + "old_dir/sub1/legacy.txt": {kind: trees.EntryKindDeleted, newNil: true}, + "old_dir/sub1/nested": {kind: trees.EntryKindDeleted, newNil: true}, + "old_dir/sub1/nested/end.txt": {kind: trees.EntryKindDeleted, newNil: true}, + } + + checkDiffs(t, diffs, expected) + }) +} + +type diffExpectation struct { + kind trees.EntryKind + oldNil bool + newNil bool +} + +func writeTestFile(t *testing.T, path, data string) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("create directory for %s: %v", path, err) + } + if err := os.WriteFile(path, []byte(data), 0o644); err != nil { + t.Fatalf("write %s: %v", path, err) + } +} + +func openLooseStore(t *testing.T, objectsPath string, algo objectid.Algorithm) *loose.Store { + t.Helper() + root, err := os.OpenRoot(objectsPath) + if err != nil { + t.Fatalf("OpenRoot(%q): %v", objectsPath, err) + } + t.Cleanup(func() { _ = root.Close() }) + store, err := loose.New(root, algo) + if err != nil { + t.Fatalf("loose.New: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + return store +} + +func makeReadTree(t *testing.T, store *loose.Store, algo objectid.Algorithm) func(objectid.ObjectID) (*object.Tree, error) { + t.Helper() + return func(id objectid.ObjectID) (*object.Tree, error) { + ty, content, err := store.ReadBytesContent(id) + if err != nil { + return nil, err + } + if ty != objecttype.TypeTree { + return nil, errors.New("diff/trees test: object is not a tree") + } + return object.ParseTree(content, algo) + } +} + +func mustReadTree(t *testing.T, readTree func(objectid.ObjectID) (*object.Tree, error), id objectid.ObjectID) *object.Tree { + t.Helper() + tree, err := readTree(id) + if err != nil { + t.Fatalf("read tree %s: %v", id, err) + } + return tree +} + +func parseID(t *testing.T, algo objectid.Algorithm, hex string) objectid.ObjectID { + t.Helper() + id, err := objectid.ParseHex(algo, hex) + if err != nil { + t.Fatalf("parse object id %q: %v", hex, err) + } + return id +} + +func checkDiffs(t *testing.T, diffs []trees.Entry, expected map[string]diffExpectation) { + t.Helper() + got := make(map[string]trees.Entry, len(diffs)) + for _, diff := range diffs { + path := string(diff.Path) + if _, exists := got[path]; exists { + t.Fatalf("duplicate diff path %q", path) + } + got[path] = diff + } + if len(got) != len(expected) { + t.Fatalf("diff count = %d, want %d", len(got), len(expected)) + } + for path, want := range expected { + diff, ok := got[path] + if !ok { + t.Fatalf("missing diff for %q", path) + } + if diff.Kind != want.kind { + t.Errorf("%s kind = %v, want %v", path, diff.Kind, want.kind) + } + if (diff.Old == nil) != want.oldNil { + t.Errorf("%s old nil = %v, want %v", path, diff.Old == nil, want.oldNil) + } + if (diff.New == nil) != want.newNil { + t.Errorf("%s new nil = %v, want %v", path, diff.New == nil, want.newNil) + } + if diff.Kind == trees.EntryKindModified && diff.Old != nil && diff.New != nil && diff.Old.ID == diff.New.ID { + t.Errorf("%s modified entry should change IDs", path) + } + } +} diff --git a/diff/trees/entry.go b/diff/trees/entry.go new file mode 100644 index 00000000..267c3380 --- /dev/null +++ b/diff/trees/entry.go @@ -0,0 +1,15 @@ +package trees + +import "codeberg.org/lindenii/furgit/object" + +// Entry is one recursive tree difference at a path. +type Entry struct { + // Path is the slash-separated path relative to the diff root. + Path []byte + // Kind is the difference kind for this path. + Kind EntryKind + // Old is the old tree entry (nil when Kind is EntryKindAdded). + Old *object.TreeEntry + // New is the new tree entry (nil when Kind is EntryKindDeleted). + New *object.TreeEntry +} diff --git a/diff/trees/kind.go b/diff/trees/kind.go new file mode 100644 index 00000000..6fdc6e0d --- /dev/null +++ b/diff/trees/kind.go @@ -0,0 +1,15 @@ +package trees + +// EntryKind identifies a tree-diff entry kind. +type EntryKind int + +const ( + // EntryKindInvalid indicates an invalid diff entry kind. + EntryKindInvalid EntryKind = iota + // EntryKindDeleted indicates a deleted path. + EntryKindDeleted + // EntryKindAdded indicates an added path. + EntryKindAdded + // EntryKindModified indicates a modified path. + EntryKindModified +) diff --git a/diff/trees/path.go b/diff/trees/path.go new file mode 100644 index 00000000..0ced379a --- /dev/null +++ b/diff/trees/path.go @@ -0,0 +1,14 @@ +package trees + +func joinPath(prefix, name []byte) []byte { + if len(prefix) == 0 { + out := make([]byte, len(name)) + copy(out, name) + return out + } + out := make([]byte, len(prefix)+1+len(name)) + copy(out, prefix) + out[len(prefix)] = '/' + copy(out[len(prefix)+1:], name) + return out +} -- cgit v1.3.1-10-gc9f91