aboutsummaryrefslogtreecommitdiff
path: root/internal/format/packfile/delta
diff options
context:
space:
mode:
Diffstat (limited to 'internal/format/packfile/delta')
-rw-r--r--internal/format/packfile/delta/apply.go122
-rw-r--r--internal/format/packfile/delta/apply_test.go174
-rw-r--r--internal/format/packfile/delta/doc.go2
-rw-r--r--internal/format/packfile/delta/header.go98
-rw-r--r--internal/format/packfile/delta/header_test.go88
5 files changed, 484 insertions, 0 deletions
diff --git a/internal/format/packfile/delta/apply.go b/internal/format/packfile/delta/apply.go
new file mode 100644
index 00000000..656a69d0
--- /dev/null
+++ b/internal/format/packfile/delta/apply.go
@@ -0,0 +1,122 @@
+package delta
+
+import (
+ "fmt"
+
+ "lindenii.org/go/lgo/intconv"
+)
+
+// MaxChainDepth is the maximum supported delta chain length.
+// Resolvers reject chains deeper than this
+// to bound recursion depth and reconstruction work.
+const MaxChainDepth = 1 << 12
+
+// Apply applies one inflated delta payload to base
+// and returns the reconstructed result.
+//
+// delta must include the leading base/result size header.
+func Apply(base, delta []byte) ([]byte, error) {
+ baseSize, resultSize, pos, err := ParseHeaderSizes(delta)
+ if err != nil {
+ return nil, err
+ }
+
+ if baseSize != uint64(len(base)) {
+ return nil, fmt.Errorf("%w: base size mismatch", ErrMalformedDelta)
+ }
+
+ outLen, err := intconv.Uint64ToInt(resultSize)
+ if err != nil {
+ return nil, fmt.Errorf("%w: result size overflows int", ErrMalformedDelta)
+ }
+
+ out := make([]byte, outLen)
+ outPos := 0
+
+ for pos < len(delta) {
+ op := delta[pos]
+ pos++
+
+ switch {
+ case op&0x80 != 0:
+ outPos, err = applyCopy(out, outPos, base, delta, &pos, op)
+ case op != 0:
+ outPos, err = applyInsert(out, outPos, delta, &pos, int(op))
+ default:
+ err = fmt.Errorf("%w: invalid opcode 0", ErrMalformedDelta)
+ }
+
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if outPos != len(out) {
+ return nil, fmt.Errorf("%w: result size mismatch", ErrMalformedDelta)
+ }
+
+ return out, nil
+}
+
+// applyCopy executes one copy instruction,
+// copying a base range into out,
+// and returns the new output position.
+func applyCopy(out []byte, outPos int, base, delta []byte, pos *int, op byte) (int, error) {
+ off, err := parseCopyOperand(delta, pos, op, 0, 4)
+ if err != nil {
+ return 0, err
+ }
+
+ n, err := parseCopyOperand(delta, pos, op, 4, 3)
+ if err != nil {
+ return 0, err
+ }
+
+ if n == 0 {
+ n = 0x10000
+ }
+
+ if off+n > len(base) || outPos+n > len(out) {
+ return 0, fmt.Errorf("%w: copy out of bounds", ErrMalformedDelta)
+ }
+
+ copy(out[outPos:outPos+n], base[off:off+n])
+
+ return outPos + n, nil
+}
+
+// applyInsert executes one insert instruction,
+// copying n literal delta bytes into out,
+// and returns the new output position.
+func applyInsert(out []byte, outPos int, delta []byte, pos *int, n int) (int, error) {
+ if *pos+n > len(delta) || outPos+n > len(out) {
+ return 0, fmt.Errorf("%w: insert out of bounds", ErrMalformedDelta)
+ }
+
+ copy(out[outPos:outPos+n], delta[*pos:*pos+n])
+ *pos += n
+
+ return outPos + n, nil
+}
+
+// parseCopyOperand assembles one little-endian copy instruction operand
+// from the operand bytes selected by op's flag bits
+// firstBit through firstBit+count-1.
+func parseCopyOperand(delta []byte, pos *int, op byte, firstBit uint, count int) (int, error) {
+ value := 0
+
+ for i := range count {
+ if op&(1<<(firstBit+uint(i))) == 0 { //nolint:gosec
+ continue
+ }
+
+ if *pos >= len(delta) {
+ return 0, fmt.Errorf("%w: truncated copy operand", ErrMalformedDelta)
+ }
+
+ value |= int(delta[*pos]) << (8 * i)
+ *pos++
+ }
+
+ return value, nil
+}
diff --git a/internal/format/packfile/delta/apply_test.go b/internal/format/packfile/delta/apply_test.go
new file mode 100644
index 00000000..bdf454d9
--- /dev/null
+++ b/internal/format/packfile/delta/apply_test.go
@@ -0,0 +1,174 @@
+package delta_test
+
+import (
+ "bytes"
+ "errors"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile/delta"
+)
+
+func TestApplyInsert(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, 0, 5)
+ data = append(data, 5, 'h', 'e', 'l', 'l', 'o')
+
+ out, err := delta.Apply(nil, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "hello" {
+ t.Fatalf("Apply = %q, want %q", out, "hello")
+ }
+}
+
+func TestApplyCopy(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("0123456789")
+
+ // One offset byte and one size byte.
+ data := delta.AppendHeaderSizes(nil, 10, 4)
+ data = append(data, 0x91, 3, 4)
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "3456" {
+ t.Fatalf("Apply = %q, want %q", out, "3456")
+ }
+}
+
+func TestApplyCopyImplicitSize(t *testing.T) {
+ t.Parallel()
+
+ base := bytes.Repeat([]byte{0xab}, 0x10000+10)
+
+ // Offset 0 and the implicit copy size 0x10000.
+ data := delta.AppendHeaderSizes(nil, uint64(len(base)), 0x10000)
+ data = append(data, 0x80)
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if !bytes.Equal(out, base[:0x10000]) {
+ t.Fatalf("Apply implicit-size copy mismatch")
+ }
+}
+
+func TestApplyMixed(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("the quick brown fox")
+
+ data := delta.AppendHeaderSizes(nil, uint64(len(base)), 9)
+ data = append(data, 0x91, 4, 5) // copy "quick"
+ data = append(data, 3, '!', '?', '!') // insert "!?!"
+ data = append(data, 0x91, 16, 1) // copy "f"
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "quick!?!f" {
+ t.Fatalf("Apply = %q, want %q", out, "quick!?!f")
+ }
+}
+
+func TestApplyEmptyResult(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, 3, 0)
+
+ out, err := delta.Apply([]byte("abc"), data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if len(out) != 0 {
+ t.Fatalf("Apply = %q, want empty", out)
+ }
+}
+
+func TestApplyMalformed(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("0123456789")
+
+ cases := []struct {
+ name string
+ ops []byte
+ baseSize uint64
+ resultSize uint64
+ }{
+ {
+ name: "opcode zero",
+ ops: []byte{0x00},
+ baseSize: 10,
+ resultSize: 1,
+ },
+ {
+ name: "truncated copy operand",
+ ops: []byte{0x91, 3},
+ baseSize: 10,
+ resultSize: 4,
+ },
+ {
+ name: "copy past base",
+ ops: []byte{0x91, 8, 5},
+ baseSize: 10,
+ resultSize: 5,
+ },
+ {
+ name: "copy past result",
+ ops: []byte{0x91, 0, 5},
+ baseSize: 10,
+ resultSize: 3,
+ },
+ {
+ name: "insert past delta",
+ ops: []byte{5, 'a', 'b'},
+ baseSize: 10,
+ resultSize: 5,
+ },
+ {
+ name: "insert past result",
+ ops: []byte{5, 'a', 'b', 'c', 'd', 'e'},
+ baseSize: 10,
+ resultSize: 3,
+ },
+ {
+ name: "base size mismatch",
+ ops: []byte{0x91, 0, 1},
+ baseSize: 9,
+ resultSize: 1,
+ },
+ {
+ name: "result shorter than declared",
+ ops: []byte{0x91, 0, 1},
+ baseSize: 10,
+ resultSize: 2,
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize)
+ data = append(data, tc.ops...)
+
+ _, err := delta.Apply(base, data)
+ if !errors.Is(err, delta.ErrMalformedDelta) {
+ t.Fatalf("Apply error = %v, want ErrMalformedDelta", err)
+ }
+ })
+ }
+}
diff --git a/internal/format/packfile/delta/doc.go b/internal/format/packfile/delta/doc.go
new file mode 100644
index 00000000..f63c96a8
--- /dev/null
+++ b/internal/format/packfile/delta/doc.go
@@ -0,0 +1,2 @@
+// Package delta provides various routines to handle Git delta compression.
+package delta
diff --git a/internal/format/packfile/delta/header.go b/internal/format/packfile/delta/header.go
new file mode 100644
index 00000000..d5dd93fb
--- /dev/null
+++ b/internal/format/packfile/delta/header.go
@@ -0,0 +1,98 @@
+package delta
+
+import (
+ "errors"
+ "fmt"
+)
+
+// ErrMalformedDelta reports that
+// a delta payload is truncated, overlong,
+// declares sizes that overflow uint64,
+// contains invalid instructions,
+// or does not match the supplied base or declared sizes.
+var ErrMalformedDelta = errors.New("internal/format/packfile/delta: malformed delta")
+
+// MaxSizeVarintLen is the maximum encoded length
+// of one delta header size varint.
+// Every uint64 size is encodable within this bound,
+// and parsing rejects longer encodings.
+const MaxSizeVarintLen = 10
+
+// MaxHeaderSizesLen is the maximum encoded length
+// of the base/result size header of a delta payload.
+//
+// Callers reading a delta from a stream may inflate
+// MaxHeaderSizesLen bytes
+// (or fewer if the payload ends sooner)
+// and parse with [ParseHeaderSizes];
+// no valid header is longer.
+const MaxHeaderSizesLen = 2 * MaxSizeVarintLen
+
+// ParseHeaderSizes parses the base size and result size varints
+// at the beginning of one inflated delta payload.
+func ParseHeaderSizes(data []byte) (baseSize, resultSize uint64, consumed int, err error) {
+ baseSize, consumed, err = parseSizeVarint(data, 0)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+
+ resultSize, consumed, err = parseSizeVarint(data, consumed)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+
+ return baseSize, resultSize, consumed, nil
+}
+
+// AppendHeaderSizes appends the base/result size header encoding
+// of one delta payload to dst.
+func AppendHeaderSizes(dst []byte, baseSize, resultSize uint64) []byte {
+ dst = appendSizeVarint(dst, baseSize)
+
+ return appendSizeVarint(dst, resultSize)
+}
+
+// appendSizeVarint appends the encoding of one delta header size varint to dst.
+func appendSizeVarint(dst []byte, value uint64) []byte {
+ for value >= 0x80 {
+ dst = append(dst, byte(value)|0x80)
+ value >>= 7
+ }
+
+ return append(dst, byte(value))
+}
+
+// parseSizeVarint parses one delta header size varint
+// beginning at pos,
+// and returns the position past it.
+func parseSizeVarint(data []byte, pos int) (value uint64, consumed int, err error) {
+ start := pos
+
+ shift := uint(0)
+
+ for {
+ if pos-start >= MaxSizeVarintLen {
+ return 0, 0, fmt.Errorf("%w: overlong size varint", ErrMalformedDelta)
+ }
+
+ if pos >= len(data) {
+ return 0, 0, fmt.Errorf("%w: truncated size varint", ErrMalformedDelta)
+ }
+
+ b := data[pos]
+ pos++
+
+ group := uint64(b & 0x7f)
+ if group<<shift>>shift != group {
+ return 0, 0, fmt.Errorf("%w: size overflows uint64", ErrMalformedDelta)
+ }
+
+ value |= group << shift
+
+ if b&0x80 == 0 {
+ return value, pos, nil
+ }
+
+ shift += 7
+ }
+}
diff --git a/internal/format/packfile/delta/header_test.go b/internal/format/packfile/delta/header_test.go
new file mode 100644
index 00000000..8d97674f
--- /dev/null
+++ b/internal/format/packfile/delta/header_test.go
@@ -0,0 +1,88 @@
+package delta_test
+
+import (
+ "bytes"
+ "errors"
+ "math"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile/delta"
+)
+
+func TestParseHeaderSizesRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ baseSize uint64
+ resultSize uint64
+ }{
+ {name: "zero", baseSize: 0, resultSize: 0},
+ {name: "small", baseSize: 5, resultSize: 130},
+ {name: "boundaries", baseSize: 127, resultSize: 128},
+ {name: "large", baseSize: 1 << 32, resultSize: 1 << 57},
+ {name: "max", baseSize: math.MaxUint64, resultSize: math.MaxUint64},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize)
+
+ wantConsumed := len(data)
+
+ data = append(data, 0xde, 0xad)
+
+ baseSize, resultSize, consumed, err := delta.ParseHeaderSizes(data)
+ if err != nil {
+ t.Fatalf("ParseHeaderSizes: %v", err)
+ }
+
+ if baseSize != tc.baseSize {
+ t.Fatalf("ParseHeaderSizes base size = %d, want %d", baseSize, tc.baseSize)
+ }
+
+ if resultSize != tc.resultSize {
+ t.Fatalf("ParseHeaderSizes result size = %d, want %d", resultSize, tc.resultSize)
+ }
+
+ if consumed != wantConsumed {
+ t.Fatalf("ParseHeaderSizes consumed = %d, want %d", consumed, wantConsumed)
+ }
+ })
+ }
+}
+
+func TestParseHeaderSizesMalformed(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ data []byte
+ }{
+ {name: "empty", data: []byte{}},
+ {name: "truncated first varint", data: []byte{0x80}},
+ {name: "missing second varint", data: []byte{0x05}},
+ {name: "truncated second varint", data: []byte{0x05, 0x80}},
+ {
+ name: "overflow",
+ data: append(bytes.Repeat([]byte{0xff}, 9), 0x7f),
+ },
+ {
+ name: "overlong",
+ data: append(bytes.Repeat([]byte{0x80}, 10), 0x00),
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ _, _, _, err := delta.ParseHeaderSizes(tc.data)
+ if !errors.Is(err, delta.ErrMalformedDelta) {
+ t.Fatalf("ParseHeaderSizes error = %v, want ErrMalformedDelta", err)
+ }
+ })
+ }
+}