aboutsummaryrefslogtreecommitdiff
path: root/difflines
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-02-20 19:06:13 +0800
committerGravatar Runxi Yu2026-02-20 19:07:14 +0800
commitaa513c069c1418734aea894dc944e27c6a78a3bb (patch)
tree687f0a11bb550fa088fd82a98ceb8979bbc35f69 /difflines
parentComment on prior reverts removing the pack writing API (diff)
signatureNo signature
Delete everything, I'm redesigning this.
I'll stop using a flat package and make things much more modular. And also experiment with streaming APIs so large blobs don't OOM us.
Diffstat (limited to 'difflines')
-rw-r--r--difflines/difflines.go223
-rw-r--r--difflines/difflines_test.go326
-rw-r--r--difflines/unsafe.go17
3 files changed, 0 insertions, 566 deletions
diff --git a/difflines/difflines.go b/difflines/difflines.go
deleted file mode 100644
index db4c1a03..00000000
--- a/difflines/difflines.go
+++ /dev/null
@@ -1,223 +0,0 @@
-// Package difflines provides routines to perform line-based diffs.
-package difflines
-
-import "bytes"
-
-// DiffLines performs a line-based diff.
-// Lines are bytes up to and including '\n' (final line may lack '\n').
-func DiffLines(oldB, newB []byte) ([]LinesDiffChunk, error) {
- type lineRef struct {
- base []byte
- start int
- end int
- }
-
- split := func(b []byte) []lineRef {
- if len(b) == 0 {
- return nil
- }
- var res []lineRef
- start := 0
- for i := range b {
- if b[i] == '\n' {
- res = append(res, lineRef{base: b, start: start, end: i + 1})
- start = i + 1
- }
- }
- if start < len(b) {
- res = append(res, lineRef{base: b, start: start, end: len(b)})
- }
- return res
- }
-
- oldLines := split(oldB)
- newLines := split(newB)
-
- n := len(oldLines)
- m := len(newLines)
- if n == 0 && m == 0 {
- return nil, nil
- }
-
- idOf := make(map[string]int)
- nextID := 0
- oldIDs := make([]int, n)
- for i, ln := range oldLines {
- key := bytesToString(ln.base[ln.start:ln.end])
- id, ok := idOf[key]
- if !ok {
- id = nextID
- idOf[key] = id
- nextID++
- }
- oldIDs[i] = id
- }
- newIDs := make([]int, m)
- for i, ln := range newLines {
- key := bytesToString(ln.base[ln.start:ln.end])
- id, ok := idOf[key]
- if !ok {
- id = nextID
- idOf[key] = id
- nextID++
- }
- newIDs[i] = id
- }
-
- max := n + m
- offset := max
- trace := make([][]int, 0, max+1)
-
- Vprev := make([]int, 2*max+1)
- for i := range Vprev {
- Vprev[i] = -1
- }
-
- x0 := 0
- y0 := 0
- for x0 < n && y0 < m && oldIDs[x0] == newIDs[y0] {
- x0++
- y0++
- }
- Vprev[offset+0] = x0
- trace = append(trace, append([]int(nil), Vprev...))
-
- found := x0 >= n && y0 >= m
-
- for D := 1; D <= max && !found; D++ {
- V := make([]int, 2*max+1)
- for i := range V {
- V[i] = -1
- }
-
- for k := -D; k <= D; k += 2 {
- var x int
- if k == -D || (k != D && Vprev[offset+(k-1)] < Vprev[offset+(k+1)]) {
- x = Vprev[offset+(k+1)]
- } else {
- x = Vprev[offset+(k-1)] + 1
- }
- y := x - k
-
- for x < n && y < m && oldIDs[x] == newIDs[y] {
- x++
- y++
- }
- V[offset+k] = x
-
- if x >= n && y >= m {
- trace = append(trace, V)
- found = true
- break
- }
- }
-
- if !found {
- trace = append(trace, V)
- Vprev = V
- }
- }
-
- type edit struct {
- kind LinesDiffChunkKind
- lineref lineRef
- }
- revEdits := make([]edit, 0, n+m)
-
- x := n
- y := m
- for D := len(trace) - 1; D >= 0; D-- {
- k := x - y
-
- var (
- prevK int
- prevX int
- prevY int
- )
- if D > 0 {
- prevV := trace[D-1]
- if k == -D || (k != D && prevV[offset+(k-1)] < prevV[offset+(k+1)]) {
- prevK = k + 1
- } else {
- prevK = k - 1
- }
- prevX = prevV[offset+prevK]
- prevY = prevX - prevK
- }
-
- for x > prevX && y > prevY {
- x--
- y--
- revEdits = append(revEdits, edit{kind: LinesDiffChunkKindUnchanged, lineref: oldLines[x]})
- }
-
- if D == 0 {
- break
- }
-
- if x == prevX {
- y--
- revEdits = append(revEdits, edit{kind: LinesDiffChunkKindAdded, lineref: newLines[y]})
- } else {
- x--
- revEdits = append(revEdits, edit{kind: LinesDiffChunkKindDeleted, lineref: oldLines[x]})
- }
- }
-
- for i, j := 0, len(revEdits)-1; i < j; i, j = i+1, j-1 {
- revEdits[i], revEdits[j] = revEdits[j], revEdits[i]
- }
-
- var out []LinesDiffChunk
- type meta struct {
- base []byte
- start int
- end int
- }
- var metas []meta
-
- for _, e := range revEdits {
- curBase := e.lineref.base
- curStart := e.lineref.start
- curEnd := e.lineref.end
-
- if len(out) == 0 || out[len(out)-1].Kind != e.kind {
- out = append(out, LinesDiffChunk{Kind: e.kind, Data: curBase[curStart:curEnd]})
- metas = append(metas, meta{base: curBase, start: curStart, end: curEnd})
- continue
- }
-
- lastIdx := len(out) - 1
- lastMeta := metas[lastIdx]
-
- if bytes.Equal(lastMeta.base, curBase) && lastMeta.end == curStart {
- metas[lastIdx].end = curEnd
- out[lastIdx].Data = curBase[metas[lastIdx].start:metas[lastIdx].end]
- continue
- }
-
- out[lastIdx].Data = append(out[lastIdx].Data, curBase[curStart:curEnd]...)
- metas[lastIdx] = meta{base: nil, start: 0, end: 0}
- }
-
- return out, nil
-}
-
-// LinesDiffChunk represents a contiguous region of lines categorized
-// as unchanged, deleted, or added.
-type LinesDiffChunk struct {
- Kind LinesDiffChunkKind
- Data []byte
-}
-
-// LinesDiffChunkKind enumerates the type of diff chunk.
-type LinesDiffChunkKind int
-
-const (
- // LinesDiffChunkKindUnchanged represents an unchanged diff chunk.
- LinesDiffChunkKindUnchanged LinesDiffChunkKind = iota
- // LinesDiffChunkKindDeleted represents a deleted diff chunk.
- LinesDiffChunkKindDeleted
- // LinesDiffChunkKindAdded represents an added diff chunk.
- LinesDiffChunkKindAdded
-)
diff --git a/difflines/difflines_test.go b/difflines/difflines_test.go
deleted file mode 100644
index 783c2d6e..00000000
--- a/difflines/difflines_test.go
+++ /dev/null
@@ -1,326 +0,0 @@
-package difflines
-
-import (
- "bytes"
- "strconv"
- "strings"
- "testing"
-)
-
-func TestDiffLines(t *testing.T) {
- t.Parallel()
-
- tests := []struct {
- name string
- oldInput string
- newInput string
- expected []LinesDiffChunk
- }{
- {
- name: "empty inputs produce no chunks",
- oldInput: "",
- newInput: "",
- expected: []LinesDiffChunk{},
- },
- {
- name: "only additions",
- oldInput: "",
- newInput: "alpha\nbeta\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindAdded, Data: []byte("alpha\nbeta\n")},
- },
- },
- {
- name: "only deletions",
- oldInput: "alpha\nbeta\n",
- newInput: "",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("alpha\nbeta\n")},
- },
- },
- {
- name: "unchanged content is grouped",
- oldInput: "same\nlines\n",
- newInput: "same\nlines\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("same\nlines\n")},
- },
- },
- {
- name: "insertion in the middle",
- oldInput: "a\nb\nc\n",
- newInput: "a\nb\nX\nc\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("a\nb\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("X\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("c\n")},
- },
- },
- {
- name: "replacement without trailing newline",
- oldInput: "first\nsecond",
- newInput: "first\nsecond\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("first\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("second")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("second\n")},
- },
- },
- {
- name: "line replacement",
- oldInput: "a\nb\nc\n",
- newInput: "a\nB\nc\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("a\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("b\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("B\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("c\n")},
- },
- },
- {
- name: "swap adjacent lines",
- oldInput: "A\nB\n",
- newInput: "B\nA\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("A\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("B\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("A\n")},
- },
- },
- {
- name: "indentation change is a full line replacement",
- oldInput: "func main() {\n\treturn\n}\n",
- newInput: "func main() {\n return\n}\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("func main() {\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("\treturn\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte(" return\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("}\n")},
- },
- },
- {
- name: "commenting out lines",
- oldInput: "code\n",
- newInput: "// code\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("code\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("// code\n")},
- },
- },
- {
- name: "reducing repeating lines",
- oldInput: "log\nlog\nlog\n",
- newInput: "log\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("log\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("log\nlog\n")},
- },
- },
- {
- name: "expanding repeating lines",
- oldInput: "tick\n",
- newInput: "tick\ntick\ntick\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("tick\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("tick\ntick\n")},
- },
- },
- {
- name: "interleaved modifications",
- oldInput: "keep\nchange\nkeep\nchange\n",
- newInput: "keep\nfixed\nkeep\nfixed\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("keep\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("change\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("fixed\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("keep\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("change\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("fixed\n")},
- },
- },
- {
- name: "large common header and footer",
- oldInput: "header\nheader\nheader\nOLD\nfooter\nfooter\n",
- newInput: "header\nheader\nheader\nNEW\nfooter\nfooter\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("header\nheader\nheader\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("OLD\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("NEW\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("footer\nfooter\n")},
- },
- },
- {
- name: "completely different content",
- oldInput: "apple\nbanana\n",
- newInput: "cherry\ndate\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("apple\nbanana\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("cherry\ndate\n")},
- },
- },
- {
- name: "unicode and emoji changes",
- oldInput: "Hello 🌍\nYay\n",
- newInput: "Hello 🌎\nYay\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("Hello 🌍\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("Hello 🌎\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("Yay\n")},
- },
- },
- {
- name: "binary data with embedded newlines",
- oldInput: "\x00\x01\n\x02\x03\n",
- newInput: "\x00\x01\n\x02\xFF\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("\x00\x01\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("\x02\x03\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("\x02\xFF\n")},
- },
- },
- {
- name: "adding trailing newline to last line",
- oldInput: "Line 1\nLine 2",
- newInput: "Line 1\nLine 2\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("Line 1\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("Line 2")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("Line 2\n")},
- },
- },
- {
- name: "removing trailing newline",
- oldInput: "A\nB\n",
- newInput: "A\nB",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("A\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("B\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("B")},
- },
- },
- {
- name: "inserting blank lines",
- oldInput: "A\nB\n",
- newInput: "A\n\n\nB\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("A\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("\n\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("B\n")},
- },
- },
- {
- name: "collapsing blank lines",
- oldInput: "A\n\n\n\nB\n",
- newInput: "A\nB\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("A\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("\n\n\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("B\n")},
- },
- },
- {
- name: "case sensitivity check",
- oldInput: "FOO\nbar\n",
- newInput: "foo\nbar\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("FOO\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("foo\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("bar\n")},
- },
- },
- {
- name: "partial line match is full mismatch",
- oldInput: "The quick brown fox\n",
- newInput: "The quick brown fox jumps\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("The quick brown fox\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("The quick brown fox jumps\n")},
- },
- },
- {
- name: "inserting middle content",
- oldInput: "Top\nBottom\n",
- newInput: "Top\nMiddle\nBottom\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("Top\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("Middle\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("Bottom\n")},
- },
- },
- {
- name: "block move simulated",
- oldInput: "BlockA\nBlockB\nBlockC\n",
- newInput: "BlockA\nBlockC\nBlockB\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("BlockA\n")},
- {Kind: LinesDiffChunkKindDeleted, Data: []byte("BlockB\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("BlockC\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("BlockB\n")},
- },
- },
- {
- name: "alternating additions",
- oldInput: "A\nB\nC\n",
- newInput: "A\n1\nB\n2\nC\n",
- expected: []LinesDiffChunk{
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("A\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("1\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("B\n")},
- {Kind: LinesDiffChunkKindAdded, Data: []byte("2\n")},
- {Kind: LinesDiffChunkKindUnchanged, Data: []byte("C\n")},
- },
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- t.Parallel()
-
- chunks, err := DiffLines([]byte(tt.oldInput), []byte(tt.newInput))
- if err != nil {
- t.Fatalf("DiffLines returned error: %v", err)
- }
-
- if len(chunks) != len(tt.expected) {
- t.Fatalf("expected %d chunks, got %d: %s", len(tt.expected), len(chunks), formatChunks(chunks))
- }
-
- for i := range tt.expected {
- if chunks[i].Kind != tt.expected[i].Kind {
- t.Fatalf("chunk %d kind mismatch: got %v, want %v; chunks: %s", i, chunks[i].Kind, tt.expected[i].Kind, formatChunks(chunks))
- }
- if !bytes.Equal(chunks[i].Data, tt.expected[i].Data) {
- t.Fatalf("chunk %d data mismatch: got %q, want %q; chunks: %s", i, string(chunks[i].Data), string(tt.expected[i].Data), formatChunks(chunks))
- }
- }
- })
- }
-}
-
-func formatChunks(chunks []LinesDiffChunk) string {
- var b strings.Builder
- b.WriteByte('[')
- for i, chunk := range chunks {
- if i > 0 {
- b.WriteString(", ")
- }
- b.WriteString(chunkKindName(chunk.Kind))
- b.WriteByte(':')
- b.WriteString(strconv.Quote(string(chunk.Data)))
- }
- b.WriteByte(']')
- return b.String()
-}
-
-func chunkKindName(kind LinesDiffChunkKind) string {
- switch kind {
- case LinesDiffChunkKindUnchanged:
- return "U"
- case LinesDiffChunkKindDeleted:
- return "D"
- case LinesDiffChunkKindAdded:
- return "A"
- default:
- return "?"
- }
-}
diff --git a/difflines/unsafe.go b/difflines/unsafe.go
deleted file mode 100644
index 6e7ac5fd..00000000
--- a/difflines/unsafe.go
+++ /dev/null
@@ -1,17 +0,0 @@
-package difflines
-
-import "unsafe"
-
-// // stringToBytes converts a string to a byte slice without copying the string.
-// // Memory is borrowed from the string.
-// // The resulting byte slice must not be modified in any form.
-// func stringToBytes(s string) (bytes []byte) {
-// return unsafe.Slice(unsafe.StringData(s), len(s)) //#nosec G103
-// }
-
-// bytesToString converts a byte slice to a string without copying the bytes.
-// Memory is borrowed from the byte slice.
-// The source byte slice must not be modified.
-func bytesToString(b []byte) string {
- return unsafe.String(unsafe.SliceData(b), len(b)) //#nosec G103
-}