From 55cd9c544a66d11b6e8fa3c654fa57d27c7c4e14 Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Fri, 30 Jan 2026 10:02:45 +0100 Subject: diffbytes: Move to its own package --- diffbytes.go | 222 ------------------------------ diffbytes/diffbytes.go | 222 ++++++++++++++++++++++++++++++ diffbytes/diffbytes_test.go | 326 ++++++++++++++++++++++++++++++++++++++++++++ diffbytes/unsafe.go | 17 +++ diffbytes_test.go | 326 -------------------------------------------- unsafe.go | 17 --- 6 files changed, 565 insertions(+), 565 deletions(-) delete mode 100644 diffbytes.go create mode 100644 diffbytes/diffbytes.go create mode 100644 diffbytes/diffbytes_test.go create mode 100644 diffbytes/unsafe.go delete mode 100644 diffbytes_test.go delete mode 100644 unsafe.go diff --git a/diffbytes.go b/diffbytes.go deleted file mode 100644 index 2698bf55..00000000 --- a/diffbytes.go +++ /dev/null @@ -1,222 +0,0 @@ -package furgit - -import "bytes" - -// DiffBytes performs a line-based diff. -// Lines are bytes up to and including '\n' (final line may lack '\n'). -func DiffBytes(oldB, newB []byte) ([]BytesDiffChunk, error) { - type lineRef struct { - base []byte - start int - end int - } - - split := func(b []byte) []lineRef { - if len(b) == 0 { - return nil - } - var res []lineRef - start := 0 - for i := range b { - if b[i] == '\n' { - res = append(res, lineRef{base: b, start: start, end: i + 1}) - start = i + 1 - } - } - if start < len(b) { - res = append(res, lineRef{base: b, start: start, end: len(b)}) - } - return res - } - - oldLines := split(oldB) - newLines := split(newB) - - n := len(oldLines) - m := len(newLines) - if n == 0 && m == 0 { - return nil, nil - } - - idOf := make(map[string]int) - nextID := 0 - oldIDs := make([]int, n) - for i, ln := range oldLines { - key := bytesToString(ln.base[ln.start:ln.end]) - id, ok := idOf[key] - if !ok { - id = nextID - idOf[key] = id - nextID++ - } - oldIDs[i] = id - } - newIDs := make([]int, m) - for i, ln := range newLines { - key := bytesToString(ln.base[ln.start:ln.end]) - id, ok := idOf[key] - if !ok { - id = nextID - idOf[key] = id - nextID++ - } - newIDs[i] = id - } - - max := n + m - offset := max - trace := make([][]int, 0, max+1) - - Vprev := make([]int, 2*max+1) - for i := range Vprev { - Vprev[i] = -1 - } - - x0 := 0 - y0 := 0 - for x0 < n && y0 < m && oldIDs[x0] == newIDs[y0] { - x0++ - y0++ - } - Vprev[offset+0] = x0 - trace = append(trace, append([]int(nil), Vprev...)) - - found := x0 >= n && y0 >= m - - for D := 1; D <= max && !found; D++ { - V := make([]int, 2*max+1) - for i := range V { - V[i] = -1 - } - - for k := -D; k <= D; k += 2 { - var x int - if k == -D || (k != D && Vprev[offset+(k-1)] < Vprev[offset+(k+1)]) { - x = Vprev[offset+(k+1)] - } else { - x = Vprev[offset+(k-1)] + 1 - } - y := x - k - - for x < n && y < m && oldIDs[x] == newIDs[y] { - x++ - y++ - } - V[offset+k] = x - - if x >= n && y >= m { - trace = append(trace, V) - found = true - break - } - } - - if !found { - trace = append(trace, V) - Vprev = V - } - } - - type edit struct { - kind BytesDiffChunkKind - lineref lineRef - } - revEdits := make([]edit, 0, n+m) - - x := n - y := m - for D := len(trace) - 1; D >= 0; D-- { - k := x - y - - var ( - prevK int - prevX int - prevY int - ) - if D > 0 { - prevV := trace[D-1] - if k == -D || (k != D && prevV[offset+(k-1)] < prevV[offset+(k+1)]) { - prevK = k + 1 - } else { - prevK = k - 1 - } - prevX = prevV[offset+prevK] - prevY = prevX - prevK - } - - for x > prevX && y > prevY { - x-- - y-- - revEdits = append(revEdits, edit{kind: BytesDiffChunkKindUnchanged, lineref: oldLines[x]}) - } - - if D == 0 { - break - } - - if x == prevX { - y-- - revEdits = append(revEdits, edit{kind: BytesDiffChunkKindAdded, lineref: newLines[y]}) - } else { - x-- - revEdits = append(revEdits, edit{kind: BytesDiffChunkKindDeleted, lineref: oldLines[x]}) - } - } - - for i, j := 0, len(revEdits)-1; i < j; i, j = i+1, j-1 { - revEdits[i], revEdits[j] = revEdits[j], revEdits[i] - } - - var out []BytesDiffChunk - type meta struct { - base []byte - start int - end int - } - var metas []meta - - for _, e := range revEdits { - curBase := e.lineref.base - curStart := e.lineref.start - curEnd := e.lineref.end - - if len(out) == 0 || out[len(out)-1].Kind != e.kind { - out = append(out, BytesDiffChunk{Kind: e.kind, Data: curBase[curStart:curEnd]}) - metas = append(metas, meta{base: curBase, start: curStart, end: curEnd}) - continue - } - - lastIdx := len(out) - 1 - lastMeta := metas[lastIdx] - - if bytes.Equal(lastMeta.base, curBase) && lastMeta.end == curStart { - metas[lastIdx].end = curEnd - out[lastIdx].Data = curBase[metas[lastIdx].start:metas[lastIdx].end] - continue - } - - out[lastIdx].Data = append(out[lastIdx].Data, curBase[curStart:curEnd]...) - metas[lastIdx] = meta{base: nil, start: 0, end: 0} - } - - return out, nil -} - -// BytesDiffChunk represents a contiguous region of bytes categorized -// as unchanged, deleted, or added. -type BytesDiffChunk struct { - Kind BytesDiffChunkKind - Data []byte -} - -// BytesDiffChunkKind enumerates the type of diff chunk. -type BytesDiffChunkKind int - -const ( - // BytesDiffChunkKindUnchanged represents an unchanged diff chunk. - BytesDiffChunkKindUnchanged BytesDiffChunkKind = iota - // BytesDiffChunkKindDeleted represents a deleted diff chunk. - BytesDiffChunkKindDeleted - // BytesDiffChunkKindAdded represents an added diff chunk. - BytesDiffChunkKindAdded -) diff --git a/diffbytes/diffbytes.go b/diffbytes/diffbytes.go new file mode 100644 index 00000000..cac79aef --- /dev/null +++ b/diffbytes/diffbytes.go @@ -0,0 +1,222 @@ +package diffbytes + +import "bytes" + +// DiffBytes performs a line-based diff. +// Lines are bytes up to and including '\n' (final line may lack '\n'). +func DiffBytes(oldB, newB []byte) ([]BytesDiffChunk, error) { + type lineRef struct { + base []byte + start int + end int + } + + split := func(b []byte) []lineRef { + if len(b) == 0 { + return nil + } + var res []lineRef + start := 0 + for i := range b { + if b[i] == '\n' { + res = append(res, lineRef{base: b, start: start, end: i + 1}) + start = i + 1 + } + } + if start < len(b) { + res = append(res, lineRef{base: b, start: start, end: len(b)}) + } + return res + } + + oldLines := split(oldB) + newLines := split(newB) + + n := len(oldLines) + m := len(newLines) + if n == 0 && m == 0 { + return nil, nil + } + + idOf := make(map[string]int) + nextID := 0 + oldIDs := make([]int, n) + for i, ln := range oldLines { + key := bytesToString(ln.base[ln.start:ln.end]) + id, ok := idOf[key] + if !ok { + id = nextID + idOf[key] = id + nextID++ + } + oldIDs[i] = id + } + newIDs := make([]int, m) + for i, ln := range newLines { + key := bytesToString(ln.base[ln.start:ln.end]) + id, ok := idOf[key] + if !ok { + id = nextID + idOf[key] = id + nextID++ + } + newIDs[i] = id + } + + max := n + m + offset := max + trace := make([][]int, 0, max+1) + + Vprev := make([]int, 2*max+1) + for i := range Vprev { + Vprev[i] = -1 + } + + x0 := 0 + y0 := 0 + for x0 < n && y0 < m && oldIDs[x0] == newIDs[y0] { + x0++ + y0++ + } + Vprev[offset+0] = x0 + trace = append(trace, append([]int(nil), Vprev...)) + + found := x0 >= n && y0 >= m + + for D := 1; D <= max && !found; D++ { + V := make([]int, 2*max+1) + for i := range V { + V[i] = -1 + } + + for k := -D; k <= D; k += 2 { + var x int + if k == -D || (k != D && Vprev[offset+(k-1)] < Vprev[offset+(k+1)]) { + x = Vprev[offset+(k+1)] + } else { + x = Vprev[offset+(k-1)] + 1 + } + y := x - k + + for x < n && y < m && oldIDs[x] == newIDs[y] { + x++ + y++ + } + V[offset+k] = x + + if x >= n && y >= m { + trace = append(trace, V) + found = true + break + } + } + + if !found { + trace = append(trace, V) + Vprev = V + } + } + + type edit struct { + kind BytesDiffChunkKind + lineref lineRef + } + revEdits := make([]edit, 0, n+m) + + x := n + y := m + for D := len(trace) - 1; D >= 0; D-- { + k := x - y + + var ( + prevK int + prevX int + prevY int + ) + if D > 0 { + prevV := trace[D-1] + if k == -D || (k != D && prevV[offset+(k-1)] < prevV[offset+(k+1)]) { + prevK = k + 1 + } else { + prevK = k - 1 + } + prevX = prevV[offset+prevK] + prevY = prevX - prevK + } + + for x > prevX && y > prevY { + x-- + y-- + revEdits = append(revEdits, edit{kind: BytesDiffChunkKindUnchanged, lineref: oldLines[x]}) + } + + if D == 0 { + break + } + + if x == prevX { + y-- + revEdits = append(revEdits, edit{kind: BytesDiffChunkKindAdded, lineref: newLines[y]}) + } else { + x-- + revEdits = append(revEdits, edit{kind: BytesDiffChunkKindDeleted, lineref: oldLines[x]}) + } + } + + for i, j := 0, len(revEdits)-1; i < j; i, j = i+1, j-1 { + revEdits[i], revEdits[j] = revEdits[j], revEdits[i] + } + + var out []BytesDiffChunk + type meta struct { + base []byte + start int + end int + } + var metas []meta + + for _, e := range revEdits { + curBase := e.lineref.base + curStart := e.lineref.start + curEnd := e.lineref.end + + if len(out) == 0 || out[len(out)-1].Kind != e.kind { + out = append(out, BytesDiffChunk{Kind: e.kind, Data: curBase[curStart:curEnd]}) + metas = append(metas, meta{base: curBase, start: curStart, end: curEnd}) + continue + } + + lastIdx := len(out) - 1 + lastMeta := metas[lastIdx] + + if bytes.Equal(lastMeta.base, curBase) && lastMeta.end == curStart { + metas[lastIdx].end = curEnd + out[lastIdx].Data = curBase[metas[lastIdx].start:metas[lastIdx].end] + continue + } + + out[lastIdx].Data = append(out[lastIdx].Data, curBase[curStart:curEnd]...) + metas[lastIdx] = meta{base: nil, start: 0, end: 0} + } + + return out, nil +} + +// BytesDiffChunk represents a contiguous region of bytes categorized +// as unchanged, deleted, or added. +type BytesDiffChunk struct { + Kind BytesDiffChunkKind + Data []byte +} + +// BytesDiffChunkKind enumerates the type of diff chunk. +type BytesDiffChunkKind int + +const ( + // BytesDiffChunkKindUnchanged represents an unchanged diff chunk. + BytesDiffChunkKindUnchanged BytesDiffChunkKind = iota + // BytesDiffChunkKindDeleted represents a deleted diff chunk. + BytesDiffChunkKindDeleted + // BytesDiffChunkKindAdded represents an added diff chunk. + BytesDiffChunkKindAdded +) diff --git a/diffbytes/diffbytes_test.go b/diffbytes/diffbytes_test.go new file mode 100644 index 00000000..00af1881 --- /dev/null +++ b/diffbytes/diffbytes_test.go @@ -0,0 +1,326 @@ +package diffbytes + +import ( + "bytes" + "strconv" + "strings" + "testing" +) + +func TestDiffBytes(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + oldInput string + newInput string + expected []BytesDiffChunk + }{ + { + name: "empty inputs produce no chunks", + oldInput: "", + newInput: "", + expected: []BytesDiffChunk{}, + }, + { + name: "only additions", + oldInput: "", + newInput: "alpha\nbeta\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindAdded, Data: []byte("alpha\nbeta\n")}, + }, + }, + { + name: "only deletions", + oldInput: "alpha\nbeta\n", + newInput: "", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindDeleted, Data: []byte("alpha\nbeta\n")}, + }, + }, + { + name: "unchanged content is grouped", + oldInput: "same\nlines\n", + newInput: "same\nlines\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("same\nlines\n")}, + }, + }, + { + name: "insertion in the middle", + oldInput: "a\nb\nc\n", + newInput: "a\nb\nX\nc\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("a\nb\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("X\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("c\n")}, + }, + }, + { + name: "replacement without trailing newline", + oldInput: "first\nsecond", + newInput: "first\nsecond\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("first\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("second")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("second\n")}, + }, + }, + { + name: "line replacement", + oldInput: "a\nb\nc\n", + newInput: "a\nB\nc\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("a\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("b\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("B\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("c\n")}, + }, + }, + { + name: "swap adjacent lines", + oldInput: "A\nB\n", + newInput: "B\nA\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindDeleted, Data: []byte("A\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("B\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("A\n")}, + }, + }, + { + name: "indentation change is a full line replacement", + oldInput: "func main() {\n\treturn\n}\n", + newInput: "func main() {\n return\n}\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("func main() {\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("\treturn\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte(" return\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("}\n")}, + }, + }, + { + name: "commenting out lines", + oldInput: "code\n", + newInput: "// code\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindDeleted, Data: []byte("code\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("// code\n")}, + }, + }, + { + name: "reducing repeating lines", + oldInput: "log\nlog\nlog\n", + newInput: "log\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("log\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("log\nlog\n")}, + }, + }, + { + name: "expanding repeating lines", + oldInput: "tick\n", + newInput: "tick\ntick\ntick\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("tick\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("tick\ntick\n")}, + }, + }, + { + name: "interleaved modifications", + oldInput: "keep\nchange\nkeep\nchange\n", + newInput: "keep\nfixed\nkeep\nfixed\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("keep\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("change\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("fixed\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("keep\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("change\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("fixed\n")}, + }, + }, + { + name: "large common header and footer", + oldInput: "header\nheader\nheader\nOLD\nfooter\nfooter\n", + newInput: "header\nheader\nheader\nNEW\nfooter\nfooter\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("header\nheader\nheader\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("OLD\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("NEW\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("footer\nfooter\n")}, + }, + }, + { + name: "completely different content", + oldInput: "apple\nbanana\n", + newInput: "cherry\ndate\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindDeleted, Data: []byte("apple\nbanana\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("cherry\ndate\n")}, + }, + }, + { + name: "unicode and emoji changes", + oldInput: "Hello 🌍\nYay\n", + newInput: "Hello 🌎\nYay\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindDeleted, Data: []byte("Hello 🌍\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("Hello 🌎\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("Yay\n")}, + }, + }, + { + name: "binary data with embedded newlines", + oldInput: "\x00\x01\n\x02\x03\n", + newInput: "\x00\x01\n\x02\xFF\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("\x00\x01\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("\x02\x03\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("\x02\xFF\n")}, + }, + }, + { + name: "adding trailing newline to last line", + oldInput: "Line 1\nLine 2", + newInput: "Line 1\nLine 2\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("Line 1\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("Line 2")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("Line 2\n")}, + }, + }, + { + name: "removing trailing newline", + oldInput: "A\nB\n", + newInput: "A\nB", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("A\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("B\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("B")}, + }, + }, + { + name: "inserting blank lines", + oldInput: "A\nB\n", + newInput: "A\n\n\nB\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("A\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("\n\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("B\n")}, + }, + }, + { + name: "collapsing blank lines", + oldInput: "A\n\n\n\nB\n", + newInput: "A\nB\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("A\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("\n\n\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("B\n")}, + }, + }, + { + name: "case sensitivity check", + oldInput: "FOO\nbar\n", + newInput: "foo\nbar\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindDeleted, Data: []byte("FOO\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("foo\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("bar\n")}, + }, + }, + { + name: "partial line match is full mismatch", + oldInput: "The quick brown fox\n", + newInput: "The quick brown fox jumps\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindDeleted, Data: []byte("The quick brown fox\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("The quick brown fox jumps\n")}, + }, + }, + { + name: "inserting middle content", + oldInput: "Top\nBottom\n", + newInput: "Top\nMiddle\nBottom\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("Top\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("Middle\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("Bottom\n")}, + }, + }, + { + name: "block move simulated", + oldInput: "BlockA\nBlockB\nBlockC\n", + newInput: "BlockA\nBlockC\nBlockB\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("BlockA\n")}, + {Kind: BytesDiffChunkKindDeleted, Data: []byte("BlockB\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("BlockC\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("BlockB\n")}, + }, + }, + { + name: "alternating additions", + oldInput: "A\nB\nC\n", + newInput: "A\n1\nB\n2\nC\n", + expected: []BytesDiffChunk{ + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("A\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("1\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("B\n")}, + {Kind: BytesDiffChunkKindAdded, Data: []byte("2\n")}, + {Kind: BytesDiffChunkKindUnchanged, Data: []byte("C\n")}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + chunks, err := DiffBytes([]byte(tt.oldInput), []byte(tt.newInput)) + if err != nil { + t.Fatalf("DiffBytes returned error: %v", err) + } + + if len(chunks) != len(tt.expected) { + t.Fatalf("expected %d chunks, got %d: %s", len(tt.expected), len(chunks), formatChunks(chunks)) + } + + for i := range tt.expected { + if chunks[i].Kind != tt.expected[i].Kind { + t.Fatalf("chunk %d kind mismatch: got %v, want %v; chunks: %s", i, chunks[i].Kind, tt.expected[i].Kind, formatChunks(chunks)) + } + if !bytes.Equal(chunks[i].Data, tt.expected[i].Data) { + t.Fatalf("chunk %d data mismatch: got %q, want %q; chunks: %s", i, string(chunks[i].Data), string(tt.expected[i].Data), formatChunks(chunks)) + } + } + }) + } +} + +func formatChunks(chunks []BytesDiffChunk) string { + var b strings.Builder + b.WriteByte('[') + for i, chunk := range chunks { + if i > 0 { + b.WriteString(", ") + } + b.WriteString(chunkKindName(chunk.Kind)) + b.WriteByte(':') + b.WriteString(strconv.Quote(string(chunk.Data))) + } + b.WriteByte(']') + return b.String() +} + +func chunkKindName(kind BytesDiffChunkKind) string { + switch kind { + case BytesDiffChunkKindUnchanged: + return "U" + case BytesDiffChunkKindDeleted: + return "D" + case BytesDiffChunkKindAdded: + return "A" + default: + return "?" + } +} diff --git a/diffbytes/unsafe.go b/diffbytes/unsafe.go new file mode 100644 index 00000000..79c215a2 --- /dev/null +++ b/diffbytes/unsafe.go @@ -0,0 +1,17 @@ +package diffbytes + +import "unsafe" + +// // stringToBytes converts a string to a byte slice without copying the string. +// // Memory is borrowed from the string. +// // The resulting byte slice must not be modified in any form. +// func stringToBytes(s string) (bytes []byte) { +// return unsafe.Slice(unsafe.StringData(s), len(s)) //#nosec G103 +// } + +// bytesToString converts a byte slice to a string without copying the bytes. +// Memory is borrowed from the byte slice. +// The source byte slice must not be modified. +func bytesToString(b []byte) string { + return unsafe.String(unsafe.SliceData(b), len(b)) //#nosec G103 +} diff --git a/diffbytes_test.go b/diffbytes_test.go deleted file mode 100644 index fe929b77..00000000 --- a/diffbytes_test.go +++ /dev/null @@ -1,326 +0,0 @@ -package furgit - -import ( - "bytes" - "strconv" - "strings" - "testing" -) - -func TestDiffBytes(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - oldInput string - newInput string - expected []BytesDiffChunk - }{ - { - name: "empty inputs produce no chunks", - oldInput: "", - newInput: "", - expected: []BytesDiffChunk{}, - }, - { - name: "only additions", - oldInput: "", - newInput: "alpha\nbeta\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindAdded, Data: []byte("alpha\nbeta\n")}, - }, - }, - { - name: "only deletions", - oldInput: "alpha\nbeta\n", - newInput: "", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindDeleted, Data: []byte("alpha\nbeta\n")}, - }, - }, - { - name: "unchanged content is grouped", - oldInput: "same\nlines\n", - newInput: "same\nlines\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("same\nlines\n")}, - }, - }, - { - name: "insertion in the middle", - oldInput: "a\nb\nc\n", - newInput: "a\nb\nX\nc\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("a\nb\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("X\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("c\n")}, - }, - }, - { - name: "replacement without trailing newline", - oldInput: "first\nsecond", - newInput: "first\nsecond\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("first\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("second")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("second\n")}, - }, - }, - { - name: "line replacement", - oldInput: "a\nb\nc\n", - newInput: "a\nB\nc\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("a\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("b\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("B\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("c\n")}, - }, - }, - { - name: "swap adjacent lines", - oldInput: "A\nB\n", - newInput: "B\nA\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindDeleted, Data: []byte("A\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("B\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("A\n")}, - }, - }, - { - name: "indentation change is a full line replacement", - oldInput: "func main() {\n\treturn\n}\n", - newInput: "func main() {\n return\n}\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("func main() {\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("\treturn\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte(" return\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("}\n")}, - }, - }, - { - name: "commenting out lines", - oldInput: "code\n", - newInput: "// code\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindDeleted, Data: []byte("code\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("// code\n")}, - }, - }, - { - name: "reducing repeating lines", - oldInput: "log\nlog\nlog\n", - newInput: "log\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("log\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("log\nlog\n")}, - }, - }, - { - name: "expanding repeating lines", - oldInput: "tick\n", - newInput: "tick\ntick\ntick\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("tick\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("tick\ntick\n")}, - }, - }, - { - name: "interleaved modifications", - oldInput: "keep\nchange\nkeep\nchange\n", - newInput: "keep\nfixed\nkeep\nfixed\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("keep\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("change\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("fixed\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("keep\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("change\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("fixed\n")}, - }, - }, - { - name: "large common header and footer", - oldInput: "header\nheader\nheader\nOLD\nfooter\nfooter\n", - newInput: "header\nheader\nheader\nNEW\nfooter\nfooter\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("header\nheader\nheader\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("OLD\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("NEW\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("footer\nfooter\n")}, - }, - }, - { - name: "completely different content", - oldInput: "apple\nbanana\n", - newInput: "cherry\ndate\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindDeleted, Data: []byte("apple\nbanana\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("cherry\ndate\n")}, - }, - }, - { - name: "unicode and emoji changes", - oldInput: "Hello 🌍\nYay\n", - newInput: "Hello 🌎\nYay\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindDeleted, Data: []byte("Hello 🌍\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("Hello 🌎\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("Yay\n")}, - }, - }, - { - name: "binary data with embedded newlines", - oldInput: "\x00\x01\n\x02\x03\n", - newInput: "\x00\x01\n\x02\xFF\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("\x00\x01\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("\x02\x03\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("\x02\xFF\n")}, - }, - }, - { - name: "adding trailing newline to last line", - oldInput: "Line 1\nLine 2", - newInput: "Line 1\nLine 2\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("Line 1\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("Line 2")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("Line 2\n")}, - }, - }, - { - name: "removing trailing newline", - oldInput: "A\nB\n", - newInput: "A\nB", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("A\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("B\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("B")}, - }, - }, - { - name: "inserting blank lines", - oldInput: "A\nB\n", - newInput: "A\n\n\nB\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("A\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("\n\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("B\n")}, - }, - }, - { - name: "collapsing blank lines", - oldInput: "A\n\n\n\nB\n", - newInput: "A\nB\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("A\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("\n\n\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("B\n")}, - }, - }, - { - name: "case sensitivity check", - oldInput: "FOO\nbar\n", - newInput: "foo\nbar\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindDeleted, Data: []byte("FOO\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("foo\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("bar\n")}, - }, - }, - { - name: "partial line match is full mismatch", - oldInput: "The quick brown fox\n", - newInput: "The quick brown fox jumps\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindDeleted, Data: []byte("The quick brown fox\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("The quick brown fox jumps\n")}, - }, - }, - { - name: "inserting middle content", - oldInput: "Top\nBottom\n", - newInput: "Top\nMiddle\nBottom\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("Top\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("Middle\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("Bottom\n")}, - }, - }, - { - name: "block move simulated", - oldInput: "BlockA\nBlockB\nBlockC\n", - newInput: "BlockA\nBlockC\nBlockB\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("BlockA\n")}, - {Kind: BytesDiffChunkKindDeleted, Data: []byte("BlockB\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("BlockC\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("BlockB\n")}, - }, - }, - { - name: "alternating additions", - oldInput: "A\nB\nC\n", - newInput: "A\n1\nB\n2\nC\n", - expected: []BytesDiffChunk{ - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("A\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("1\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("B\n")}, - {Kind: BytesDiffChunkKindAdded, Data: []byte("2\n")}, - {Kind: BytesDiffChunkKindUnchanged, Data: []byte("C\n")}, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - chunks, err := DiffBytes([]byte(tt.oldInput), []byte(tt.newInput)) - if err != nil { - t.Fatalf("DiffBytes returned error: %v", err) - } - - if len(chunks) != len(tt.expected) { - t.Fatalf("expected %d chunks, got %d: %s", len(tt.expected), len(chunks), formatChunks(chunks)) - } - - for i := range tt.expected { - if chunks[i].Kind != tt.expected[i].Kind { - t.Fatalf("chunk %d kind mismatch: got %v, want %v; chunks: %s", i, chunks[i].Kind, tt.expected[i].Kind, formatChunks(chunks)) - } - if !bytes.Equal(chunks[i].Data, tt.expected[i].Data) { - t.Fatalf("chunk %d data mismatch: got %q, want %q; chunks: %s", i, string(chunks[i].Data), string(tt.expected[i].Data), formatChunks(chunks)) - } - } - }) - } -} - -func formatChunks(chunks []BytesDiffChunk) string { - var b strings.Builder - b.WriteByte('[') - for i, chunk := range chunks { - if i > 0 { - b.WriteString(", ") - } - b.WriteString(chunkKindName(chunk.Kind)) - b.WriteByte(':') - b.WriteString(strconv.Quote(string(chunk.Data))) - } - b.WriteByte(']') - return b.String() -} - -func chunkKindName(kind BytesDiffChunkKind) string { - switch kind { - case BytesDiffChunkKindUnchanged: - return "U" - case BytesDiffChunkKindDeleted: - return "D" - case BytesDiffChunkKindAdded: - return "A" - default: - return "?" - } -} diff --git a/unsafe.go b/unsafe.go deleted file mode 100644 index fac20230..00000000 --- a/unsafe.go +++ /dev/null @@ -1,17 +0,0 @@ -package furgit - -import "unsafe" - -// // stringToBytes converts a string to a byte slice without copying the string. -// // Memory is borrowed from the string. -// // The resulting byte slice must not be modified in any form. -// func stringToBytes(s string) (bytes []byte) { -// return unsafe.Slice(unsafe.StringData(s), len(s)) //#nosec G103 -// } - -// bytesToString converts a byte slice to a string without copying the bytes. -// Memory is borrowed from the byte slice. -// The source byte slice must not be modified. -func bytesToString(b []byte) string { - return unsafe.String(unsafe.SliceData(b), len(b)) //#nosec G103 -} -- cgit v1.3.1-10-gc9f91