aboutsummaryrefslogtreecommitdiff
path: root/internal/format/packfile
diff options
context:
space:
mode:
Diffstat (limited to 'internal/format/packfile')
-rw-r--r--internal/format/packfile/delta/apply.go122
-rw-r--r--internal/format/packfile/delta/apply_test.go174
-rw-r--r--internal/format/packfile/delta/header.go98
-rw-r--r--internal/format/packfile/delta/header_test.go88
-rw-r--r--internal/format/packfile/entry_header.go11
-rw-r--r--internal/format/packfile/entry_header_test.go13
-rw-r--r--internal/format/packfile/header.go64
7 files changed, 553 insertions, 17 deletions
diff --git a/internal/format/packfile/delta/apply.go b/internal/format/packfile/delta/apply.go
new file mode 100644
index 00000000..4210d1b3
--- /dev/null
+++ b/internal/format/packfile/delta/apply.go
@@ -0,0 +1,122 @@
+package delta
+
+import (
+ "fmt"
+
+ "lindenii.org/go/lgo/intconv"
+)
+
+// MaxChainDepth is the maximum supported delta chain length.
+// Resolvers reject chains deeper than this
+// to bound recursion depth and reconstruction work.
+const MaxChainDepth = 1 << 12
+
+// Apply applies one inflated delta payload to base
+// and returns the reconstructed result.
+//
+// delta must include the leading base/result size header.
+func Apply(base, delta []byte) ([]byte, error) {
+ baseSize, resultSize, pos, err := ParseHeaderSizes(delta)
+ if err != nil {
+ return nil, err
+ }
+
+ if baseSize != uint64(len(base)) {
+ return nil, fmt.Errorf("%w: base size mismatch", ErrMalformedDelta)
+ }
+
+ outLen, err := intconv.Uint64ToInt(resultSize)
+ if err != nil {
+ return nil, fmt.Errorf("%w: result size overflows int", ErrMalformedDelta)
+ }
+
+ out := make([]byte, outLen)
+ outPos := 0
+
+ for pos < len(delta) {
+ op := delta[pos]
+ pos++
+
+ switch {
+ case op&0x80 != 0:
+ outPos, err = applyCopy(out, outPos, base, delta, &pos, op)
+ case op != 0:
+ outPos, err = applyInsert(out, outPos, delta, &pos, int(op))
+ default:
+ err = fmt.Errorf("%w: invalid opcode 0", ErrMalformedDelta)
+ }
+
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if outPos != len(out) {
+ return nil, fmt.Errorf("%w: result size mismatch", ErrMalformedDelta)
+ }
+
+ return out, nil
+}
+
+// applyCopy executes one copy instruction,
+// copying a base range into out,
+// and returns the new output position.
+func applyCopy(out []byte, outPos int, base, delta []byte, pos *int, op byte) (int, error) {
+ off, err := parseCopyOperand(delta, pos, op, 0, 4)
+ if err != nil {
+ return 0, err
+ }
+
+ n, err := parseCopyOperand(delta, pos, op, 4, 3)
+ if err != nil {
+ return 0, err
+ }
+
+ if n == 0 {
+ n = 0x10000
+ }
+
+ if off+n > len(base) || outPos+n > len(out) {
+ return 0, fmt.Errorf("%w: copy out of bounds", ErrMalformedDelta)
+ }
+
+ copy(out[outPos:outPos+n], base[off:off+n])
+
+ return outPos + n, nil
+}
+
+// applyInsert executes one insert instruction,
+// copying n literal delta bytes into out,
+// and returns the new output position.
+func applyInsert(out []byte, outPos int, delta []byte, pos *int, n int) (int, error) {
+ if *pos+n > len(delta) || outPos+n > len(out) {
+ return 0, fmt.Errorf("%w: insert out of bounds", ErrMalformedDelta)
+ }
+
+ copy(out[outPos:outPos+n], delta[*pos:*pos+n])
+ *pos += n
+
+ return outPos + n, nil
+}
+
+// parseCopyOperand assembles one little-endian copy instruction operand
+// from the operand bytes selected by op's flag bits
+// firstBit through firstBit+count-1.
+func parseCopyOperand(delta []byte, pos *int, op byte, firstBit uint, count int) (int, error) {
+ value := 0
+
+ for i := range count {
+ if op&(1<<(firstBit+uint(i))) == 0 {
+ continue
+ }
+
+ if *pos >= len(delta) {
+ return 0, fmt.Errorf("%w: truncated copy operand", ErrMalformedDelta)
+ }
+
+ value |= int(delta[*pos]) << (8 * i)
+ *pos++
+ }
+
+ return value, nil
+}
diff --git a/internal/format/packfile/delta/apply_test.go b/internal/format/packfile/delta/apply_test.go
new file mode 100644
index 00000000..bdf454d9
--- /dev/null
+++ b/internal/format/packfile/delta/apply_test.go
@@ -0,0 +1,174 @@
+package delta_test
+
+import (
+ "bytes"
+ "errors"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile/delta"
+)
+
+func TestApplyInsert(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, 0, 5)
+ data = append(data, 5, 'h', 'e', 'l', 'l', 'o')
+
+ out, err := delta.Apply(nil, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "hello" {
+ t.Fatalf("Apply = %q, want %q", out, "hello")
+ }
+}
+
+func TestApplyCopy(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("0123456789")
+
+ // One offset byte and one size byte.
+ data := delta.AppendHeaderSizes(nil, 10, 4)
+ data = append(data, 0x91, 3, 4)
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "3456" {
+ t.Fatalf("Apply = %q, want %q", out, "3456")
+ }
+}
+
+func TestApplyCopyImplicitSize(t *testing.T) {
+ t.Parallel()
+
+ base := bytes.Repeat([]byte{0xab}, 0x10000+10)
+
+ // Offset 0 and the implicit copy size 0x10000.
+ data := delta.AppendHeaderSizes(nil, uint64(len(base)), 0x10000)
+ data = append(data, 0x80)
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if !bytes.Equal(out, base[:0x10000]) {
+ t.Fatalf("Apply implicit-size copy mismatch")
+ }
+}
+
+func TestApplyMixed(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("the quick brown fox")
+
+ data := delta.AppendHeaderSizes(nil, uint64(len(base)), 9)
+ data = append(data, 0x91, 4, 5) // copy "quick"
+ data = append(data, 3, '!', '?', '!') // insert "!?!"
+ data = append(data, 0x91, 16, 1) // copy "f"
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "quick!?!f" {
+ t.Fatalf("Apply = %q, want %q", out, "quick!?!f")
+ }
+}
+
+func TestApplyEmptyResult(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, 3, 0)
+
+ out, err := delta.Apply([]byte("abc"), data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if len(out) != 0 {
+ t.Fatalf("Apply = %q, want empty", out)
+ }
+}
+
+func TestApplyMalformed(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("0123456789")
+
+ cases := []struct {
+ name string
+ ops []byte
+ baseSize uint64
+ resultSize uint64
+ }{
+ {
+ name: "opcode zero",
+ ops: []byte{0x00},
+ baseSize: 10,
+ resultSize: 1,
+ },
+ {
+ name: "truncated copy operand",
+ ops: []byte{0x91, 3},
+ baseSize: 10,
+ resultSize: 4,
+ },
+ {
+ name: "copy past base",
+ ops: []byte{0x91, 8, 5},
+ baseSize: 10,
+ resultSize: 5,
+ },
+ {
+ name: "copy past result",
+ ops: []byte{0x91, 0, 5},
+ baseSize: 10,
+ resultSize: 3,
+ },
+ {
+ name: "insert past delta",
+ ops: []byte{5, 'a', 'b'},
+ baseSize: 10,
+ resultSize: 5,
+ },
+ {
+ name: "insert past result",
+ ops: []byte{5, 'a', 'b', 'c', 'd', 'e'},
+ baseSize: 10,
+ resultSize: 3,
+ },
+ {
+ name: "base size mismatch",
+ ops: []byte{0x91, 0, 1},
+ baseSize: 9,
+ resultSize: 1,
+ },
+ {
+ name: "result shorter than declared",
+ ops: []byte{0x91, 0, 1},
+ baseSize: 10,
+ resultSize: 2,
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize)
+ data = append(data, tc.ops...)
+
+ _, err := delta.Apply(base, data)
+ if !errors.Is(err, delta.ErrMalformedDelta) {
+ t.Fatalf("Apply error = %v, want ErrMalformedDelta", err)
+ }
+ })
+ }
+}
diff --git a/internal/format/packfile/delta/header.go b/internal/format/packfile/delta/header.go
new file mode 100644
index 00000000..d5dd93fb
--- /dev/null
+++ b/internal/format/packfile/delta/header.go
@@ -0,0 +1,98 @@
+package delta
+
+import (
+ "errors"
+ "fmt"
+)
+
+// ErrMalformedDelta reports that
+// a delta payload is truncated, overlong,
+// declares sizes that overflow uint64,
+// contains invalid instructions,
+// or does not match the supplied base or declared sizes.
+var ErrMalformedDelta = errors.New("internal/format/packfile/delta: malformed delta")
+
+// MaxSizeVarintLen is the maximum encoded length
+// of one delta header size varint.
+// Every uint64 size is encodable within this bound,
+// and parsing rejects longer encodings.
+const MaxSizeVarintLen = 10
+
+// MaxHeaderSizesLen is the maximum encoded length
+// of the base/result size header of a delta payload.
+//
+// Callers reading a delta from a stream may inflate
+// MaxHeaderSizesLen bytes
+// (or fewer if the payload ends sooner)
+// and parse with [ParseHeaderSizes];
+// no valid header is longer.
+const MaxHeaderSizesLen = 2 * MaxSizeVarintLen
+
+// ParseHeaderSizes parses the base size and result size varints
+// at the beginning of one inflated delta payload.
+func ParseHeaderSizes(data []byte) (baseSize, resultSize uint64, consumed int, err error) {
+ baseSize, consumed, err = parseSizeVarint(data, 0)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+
+ resultSize, consumed, err = parseSizeVarint(data, consumed)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+
+ return baseSize, resultSize, consumed, nil
+}
+
+// AppendHeaderSizes appends the base/result size header encoding
+// of one delta payload to dst.
+func AppendHeaderSizes(dst []byte, baseSize, resultSize uint64) []byte {
+ dst = appendSizeVarint(dst, baseSize)
+
+ return appendSizeVarint(dst, resultSize)
+}
+
+// appendSizeVarint appends the encoding of one delta header size varint to dst.
+func appendSizeVarint(dst []byte, value uint64) []byte {
+ for value >= 0x80 {
+ dst = append(dst, byte(value)|0x80)
+ value >>= 7
+ }
+
+ return append(dst, byte(value))
+}
+
+// parseSizeVarint parses one delta header size varint
+// beginning at pos,
+// and returns the position past it.
+func parseSizeVarint(data []byte, pos int) (value uint64, consumed int, err error) {
+ start := pos
+
+ shift := uint(0)
+
+ for {
+ if pos-start >= MaxSizeVarintLen {
+ return 0, 0, fmt.Errorf("%w: overlong size varint", ErrMalformedDelta)
+ }
+
+ if pos >= len(data) {
+ return 0, 0, fmt.Errorf("%w: truncated size varint", ErrMalformedDelta)
+ }
+
+ b := data[pos]
+ pos++
+
+ group := uint64(b & 0x7f)
+ if group<<shift>>shift != group {
+ return 0, 0, fmt.Errorf("%w: size overflows uint64", ErrMalformedDelta)
+ }
+
+ value |= group << shift
+
+ if b&0x80 == 0 {
+ return value, pos, nil
+ }
+
+ shift += 7
+ }
+}
diff --git a/internal/format/packfile/delta/header_test.go b/internal/format/packfile/delta/header_test.go
new file mode 100644
index 00000000..8d97674f
--- /dev/null
+++ b/internal/format/packfile/delta/header_test.go
@@ -0,0 +1,88 @@
+package delta_test
+
+import (
+ "bytes"
+ "errors"
+ "math"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile/delta"
+)
+
+func TestParseHeaderSizesRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ baseSize uint64
+ resultSize uint64
+ }{
+ {name: "zero", baseSize: 0, resultSize: 0},
+ {name: "small", baseSize: 5, resultSize: 130},
+ {name: "boundaries", baseSize: 127, resultSize: 128},
+ {name: "large", baseSize: 1 << 32, resultSize: 1 << 57},
+ {name: "max", baseSize: math.MaxUint64, resultSize: math.MaxUint64},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize)
+
+ wantConsumed := len(data)
+
+ data = append(data, 0xde, 0xad)
+
+ baseSize, resultSize, consumed, err := delta.ParseHeaderSizes(data)
+ if err != nil {
+ t.Fatalf("ParseHeaderSizes: %v", err)
+ }
+
+ if baseSize != tc.baseSize {
+ t.Fatalf("ParseHeaderSizes base size = %d, want %d", baseSize, tc.baseSize)
+ }
+
+ if resultSize != tc.resultSize {
+ t.Fatalf("ParseHeaderSizes result size = %d, want %d", resultSize, tc.resultSize)
+ }
+
+ if consumed != wantConsumed {
+ t.Fatalf("ParseHeaderSizes consumed = %d, want %d", consumed, wantConsumed)
+ }
+ })
+ }
+}
+
+func TestParseHeaderSizesMalformed(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ data []byte
+ }{
+ {name: "empty", data: []byte{}},
+ {name: "truncated first varint", data: []byte{0x80}},
+ {name: "missing second varint", data: []byte{0x05}},
+ {name: "truncated second varint", data: []byte{0x05, 0x80}},
+ {
+ name: "overflow",
+ data: append(bytes.Repeat([]byte{0xff}, 9), 0x7f),
+ },
+ {
+ name: "overlong",
+ data: append(bytes.Repeat([]byte{0x80}, 10), 0x00),
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ _, _, _, err := delta.ParseHeaderSizes(tc.data)
+ if !errors.Is(err, delta.ErrMalformedDelta) {
+ t.Fatalf("ParseHeaderSizes error = %v, want ErrMalformedDelta", err)
+ }
+ })
+ }
+}
diff --git a/internal/format/packfile/entry_header.go b/internal/format/packfile/entry_header.go
index 1428600b..e5f2d62d 100644
--- a/internal/format/packfile/entry_header.go
+++ b/internal/format/packfile/entry_header.go
@@ -13,12 +13,6 @@ import (
// or declares a size that overflows uint64.
var ErrMalformedEntryHeader = errors.New("internal/format/packfile: malformed entry header")
-// ErrInvalidHashSize reports that
-// a supplied hash size is not a plausible object ID size.
-// This indicates a caller bug,
-// not malformed pack data.
-var ErrInvalidHashSize = errors.New("internal/format/packfile: invalid hash size")
-
// MaxTypeSizeLen is the maximum encoded length
// of the type/size prefix of an entry header.
// Every uint64 size is encodable within this bound,
@@ -73,7 +67,8 @@ type EntryHeader struct {
// from the beginning of data.
//
// hashSize must be the object ID size
-// of the pack's object format.
+// of the pack's object format;
+// ParseEntryHeader panics on implausible hash sizes.
//
// data need not contain the whole entry;
// [MaxEntryHeaderLen] bytes always suffice.
@@ -83,7 +78,7 @@ func ParseEntryHeader(data []byte, hashSize int) (EntryHeader, error) {
var zero EntryHeader
if hashSize <= 0 || hashSize > id.MaxObjectIDSize {
- return zero, fmt.Errorf("%w: %d", ErrInvalidHashSize, hashSize)
+ panic("internal/format/packfile: invalid hash size")
}
if len(data) == 0 {
diff --git a/internal/format/packfile/entry_header_test.go b/internal/format/packfile/entry_header_test.go
index 807c544e..79dc2740 100644
--- a/internal/format/packfile/entry_header_test.go
+++ b/internal/format/packfile/entry_header_test.go
@@ -130,10 +130,15 @@ func TestParseEntryHeaderBadHashSize(t *testing.T) {
t.Parallel()
for _, hashSize := range []int{-1, 0, id.MaxObjectIDSize + 1} {
- _, err := packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize)
- if err == nil {
- t.Fatalf("ParseEntryHeader hash size %d: expected error", hashSize)
- }
+ func() {
+ defer func() {
+ if recover() == nil {
+ t.Fatalf("ParseEntryHeader hash size %d: expected panic", hashSize)
+ }
+ }()
+
+ _, _ = packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize)
+ }()
}
}
diff --git a/internal/format/packfile/header.go b/internal/format/packfile/header.go
index 1a156f36..64c7d140 100644
--- a/internal/format/packfile/header.go
+++ b/internal/format/packfile/header.go
@@ -1,9 +1,63 @@
package packfile
-// Signature is the 4-byte "PACK" magic at the start of pack files.
-const Signature = 0x5041434b
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+)
-// SupportedVersion reports whether a pack version is supported.
-func SupportedVersion(version uint32) bool {
- return version == 2
+const (
+ // signature is the 4-byte "PACK" magic at the start of pack files.
+ signature = 0x5041434b
+
+ // version is the only pack version this package reads or writes.
+ version = 2
+)
+
+// HeaderLen is the size of a pack header:
+// signature, version, and object count.
+const HeaderLen = 12
+
+// ErrMalformedHeader reports that
+// a pack header is truncated,
+// or has a bad signature or unsupported version.
+var ErrMalformedHeader = errors.New("internal/format/packfile: malformed pack header")
+
+// Header is one parsed pack header.
+type Header struct {
+ // ObjectCount is the number of object entries
+ // declared to follow the header.
+ ObjectCount uint32
+}
+
+// ParseHeader parses and validates the pack header
+// at the start of data.
+func ParseHeader(data []byte) (Header, error) {
+ var zero Header
+
+ if len(data) < HeaderLen {
+ return zero, fmt.Errorf("%w: truncated", ErrMalformedHeader)
+ }
+
+ if binary.BigEndian.Uint32(data[0:4]) != signature {
+ return zero, fmt.Errorf("%w: bad signature", ErrMalformedHeader)
+ }
+
+ parsedVersion := binary.BigEndian.Uint32(data[4:8])
+ if parsedVersion != version {
+ return zero, fmt.Errorf("%w: unsupported version %d", ErrMalformedHeader, parsedVersion)
+ }
+
+ return Header{
+ ObjectCount: binary.BigEndian.Uint32(data[8:12]),
+ }, nil
+}
+
+// AppendHeader appends an encoded pack header for objectCount to dst.
+func AppendHeader(dst []byte, objectCount uint32) []byte {
+ dst = binary.BigEndian.AppendUint32(dst, signature)
+ dst = binary.BigEndian.AppendUint32(dst, version)
+ dst = binary.BigEndian.AppendUint32(dst, objectCount)
+
+ return dst
}