diff options
Diffstat (limited to 'internal/format/packfile')
| -rw-r--r-- | internal/format/packfile/delta/apply.go | 122 | ||||
| -rw-r--r-- | internal/format/packfile/delta/apply_test.go | 174 | ||||
| -rw-r--r-- | internal/format/packfile/delta/header.go | 98 | ||||
| -rw-r--r-- | internal/format/packfile/delta/header_test.go | 88 | ||||
| -rw-r--r-- | internal/format/packfile/entry_header.go | 11 | ||||
| -rw-r--r-- | internal/format/packfile/entry_header_test.go | 13 | ||||
| -rw-r--r-- | internal/format/packfile/header.go | 64 |
7 files changed, 553 insertions, 17 deletions
diff --git a/internal/format/packfile/delta/apply.go b/internal/format/packfile/delta/apply.go new file mode 100644 index 00000000..4210d1b3 --- /dev/null +++ b/internal/format/packfile/delta/apply.go @@ -0,0 +1,122 @@ +package delta + +import ( + "fmt" + + "lindenii.org/go/lgo/intconv" +) + +// MaxChainDepth is the maximum supported delta chain length. +// Resolvers reject chains deeper than this +// to bound recursion depth and reconstruction work. +const MaxChainDepth = 1 << 12 + +// Apply applies one inflated delta payload to base +// and returns the reconstructed result. +// +// delta must include the leading base/result size header. +func Apply(base, delta []byte) ([]byte, error) { + baseSize, resultSize, pos, err := ParseHeaderSizes(delta) + if err != nil { + return nil, err + } + + if baseSize != uint64(len(base)) { + return nil, fmt.Errorf("%w: base size mismatch", ErrMalformedDelta) + } + + outLen, err := intconv.Uint64ToInt(resultSize) + if err != nil { + return nil, fmt.Errorf("%w: result size overflows int", ErrMalformedDelta) + } + + out := make([]byte, outLen) + outPos := 0 + + for pos < len(delta) { + op := delta[pos] + pos++ + + switch { + case op&0x80 != 0: + outPos, err = applyCopy(out, outPos, base, delta, &pos, op) + case op != 0: + outPos, err = applyInsert(out, outPos, delta, &pos, int(op)) + default: + err = fmt.Errorf("%w: invalid opcode 0", ErrMalformedDelta) + } + + if err != nil { + return nil, err + } + } + + if outPos != len(out) { + return nil, fmt.Errorf("%w: result size mismatch", ErrMalformedDelta) + } + + return out, nil +} + +// applyCopy executes one copy instruction, +// copying a base range into out, +// and returns the new output position. +func applyCopy(out []byte, outPos int, base, delta []byte, pos *int, op byte) (int, error) { + off, err := parseCopyOperand(delta, pos, op, 0, 4) + if err != nil { + return 0, err + } + + n, err := parseCopyOperand(delta, pos, op, 4, 3) + if err != nil { + return 0, err + } + + if n == 0 { + n = 0x10000 + } + + if off+n > len(base) || outPos+n > len(out) { + return 0, fmt.Errorf("%w: copy out of bounds", ErrMalformedDelta) + } + + copy(out[outPos:outPos+n], base[off:off+n]) + + return outPos + n, nil +} + +// applyInsert executes one insert instruction, +// copying n literal delta bytes into out, +// and returns the new output position. +func applyInsert(out []byte, outPos int, delta []byte, pos *int, n int) (int, error) { + if *pos+n > len(delta) || outPos+n > len(out) { + return 0, fmt.Errorf("%w: insert out of bounds", ErrMalformedDelta) + } + + copy(out[outPos:outPos+n], delta[*pos:*pos+n]) + *pos += n + + return outPos + n, nil +} + +// parseCopyOperand assembles one little-endian copy instruction operand +// from the operand bytes selected by op's flag bits +// firstBit through firstBit+count-1. +func parseCopyOperand(delta []byte, pos *int, op byte, firstBit uint, count int) (int, error) { + value := 0 + + for i := range count { + if op&(1<<(firstBit+uint(i))) == 0 { + continue + } + + if *pos >= len(delta) { + return 0, fmt.Errorf("%w: truncated copy operand", ErrMalformedDelta) + } + + value |= int(delta[*pos]) << (8 * i) + *pos++ + } + + return value, nil +} diff --git a/internal/format/packfile/delta/apply_test.go b/internal/format/packfile/delta/apply_test.go new file mode 100644 index 00000000..bdf454d9 --- /dev/null +++ b/internal/format/packfile/delta/apply_test.go @@ -0,0 +1,174 @@ +package delta_test + +import ( + "bytes" + "errors" + "testing" + + "lindenii.org/go/furgit/internal/format/packfile/delta" +) + +func TestApplyInsert(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, 0, 5) + data = append(data, 5, 'h', 'e', 'l', 'l', 'o') + + out, err := delta.Apply(nil, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if string(out) != "hello" { + t.Fatalf("Apply = %q, want %q", out, "hello") + } +} + +func TestApplyCopy(t *testing.T) { + t.Parallel() + + base := []byte("0123456789") + + // One offset byte and one size byte. + data := delta.AppendHeaderSizes(nil, 10, 4) + data = append(data, 0x91, 3, 4) + + out, err := delta.Apply(base, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if string(out) != "3456" { + t.Fatalf("Apply = %q, want %q", out, "3456") + } +} + +func TestApplyCopyImplicitSize(t *testing.T) { + t.Parallel() + + base := bytes.Repeat([]byte{0xab}, 0x10000+10) + + // Offset 0 and the implicit copy size 0x10000. + data := delta.AppendHeaderSizes(nil, uint64(len(base)), 0x10000) + data = append(data, 0x80) + + out, err := delta.Apply(base, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if !bytes.Equal(out, base[:0x10000]) { + t.Fatalf("Apply implicit-size copy mismatch") + } +} + +func TestApplyMixed(t *testing.T) { + t.Parallel() + + base := []byte("the quick brown fox") + + data := delta.AppendHeaderSizes(nil, uint64(len(base)), 9) + data = append(data, 0x91, 4, 5) // copy "quick" + data = append(data, 3, '!', '?', '!') // insert "!?!" + data = append(data, 0x91, 16, 1) // copy "f" + + out, err := delta.Apply(base, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if string(out) != "quick!?!f" { + t.Fatalf("Apply = %q, want %q", out, "quick!?!f") + } +} + +func TestApplyEmptyResult(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, 3, 0) + + out, err := delta.Apply([]byte("abc"), data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if len(out) != 0 { + t.Fatalf("Apply = %q, want empty", out) + } +} + +func TestApplyMalformed(t *testing.T) { + t.Parallel() + + base := []byte("0123456789") + + cases := []struct { + name string + ops []byte + baseSize uint64 + resultSize uint64 + }{ + { + name: "opcode zero", + ops: []byte{0x00}, + baseSize: 10, + resultSize: 1, + }, + { + name: "truncated copy operand", + ops: []byte{0x91, 3}, + baseSize: 10, + resultSize: 4, + }, + { + name: "copy past base", + ops: []byte{0x91, 8, 5}, + baseSize: 10, + resultSize: 5, + }, + { + name: "copy past result", + ops: []byte{0x91, 0, 5}, + baseSize: 10, + resultSize: 3, + }, + { + name: "insert past delta", + ops: []byte{5, 'a', 'b'}, + baseSize: 10, + resultSize: 5, + }, + { + name: "insert past result", + ops: []byte{5, 'a', 'b', 'c', 'd', 'e'}, + baseSize: 10, + resultSize: 3, + }, + { + name: "base size mismatch", + ops: []byte{0x91, 0, 1}, + baseSize: 9, + resultSize: 1, + }, + { + name: "result shorter than declared", + ops: []byte{0x91, 0, 1}, + baseSize: 10, + resultSize: 2, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize) + data = append(data, tc.ops...) + + _, err := delta.Apply(base, data) + if !errors.Is(err, delta.ErrMalformedDelta) { + t.Fatalf("Apply error = %v, want ErrMalformedDelta", err) + } + }) + } +} diff --git a/internal/format/packfile/delta/header.go b/internal/format/packfile/delta/header.go new file mode 100644 index 00000000..d5dd93fb --- /dev/null +++ b/internal/format/packfile/delta/header.go @@ -0,0 +1,98 @@ +package delta + +import ( + "errors" + "fmt" +) + +// ErrMalformedDelta reports that +// a delta payload is truncated, overlong, +// declares sizes that overflow uint64, +// contains invalid instructions, +// or does not match the supplied base or declared sizes. +var ErrMalformedDelta = errors.New("internal/format/packfile/delta: malformed delta") + +// MaxSizeVarintLen is the maximum encoded length +// of one delta header size varint. +// Every uint64 size is encodable within this bound, +// and parsing rejects longer encodings. +const MaxSizeVarintLen = 10 + +// MaxHeaderSizesLen is the maximum encoded length +// of the base/result size header of a delta payload. +// +// Callers reading a delta from a stream may inflate +// MaxHeaderSizesLen bytes +// (or fewer if the payload ends sooner) +// and parse with [ParseHeaderSizes]; +// no valid header is longer. +const MaxHeaderSizesLen = 2 * MaxSizeVarintLen + +// ParseHeaderSizes parses the base size and result size varints +// at the beginning of one inflated delta payload. +func ParseHeaderSizes(data []byte) (baseSize, resultSize uint64, consumed int, err error) { + baseSize, consumed, err = parseSizeVarint(data, 0) + if err != nil { + return 0, 0, 0, err + } + + resultSize, consumed, err = parseSizeVarint(data, consumed) + if err != nil { + return 0, 0, 0, err + } + + return baseSize, resultSize, consumed, nil +} + +// AppendHeaderSizes appends the base/result size header encoding +// of one delta payload to dst. +func AppendHeaderSizes(dst []byte, baseSize, resultSize uint64) []byte { + dst = appendSizeVarint(dst, baseSize) + + return appendSizeVarint(dst, resultSize) +} + +// appendSizeVarint appends the encoding of one delta header size varint to dst. +func appendSizeVarint(dst []byte, value uint64) []byte { + for value >= 0x80 { + dst = append(dst, byte(value)|0x80) + value >>= 7 + } + + return append(dst, byte(value)) +} + +// parseSizeVarint parses one delta header size varint +// beginning at pos, +// and returns the position past it. +func parseSizeVarint(data []byte, pos int) (value uint64, consumed int, err error) { + start := pos + + shift := uint(0) + + for { + if pos-start >= MaxSizeVarintLen { + return 0, 0, fmt.Errorf("%w: overlong size varint", ErrMalformedDelta) + } + + if pos >= len(data) { + return 0, 0, fmt.Errorf("%w: truncated size varint", ErrMalformedDelta) + } + + b := data[pos] + pos++ + + group := uint64(b & 0x7f) + if group<<shift>>shift != group { + return 0, 0, fmt.Errorf("%w: size overflows uint64", ErrMalformedDelta) + } + + value |= group << shift + + if b&0x80 == 0 { + return value, pos, nil + } + + shift += 7 + } +} diff --git a/internal/format/packfile/delta/header_test.go b/internal/format/packfile/delta/header_test.go new file mode 100644 index 00000000..8d97674f --- /dev/null +++ b/internal/format/packfile/delta/header_test.go @@ -0,0 +1,88 @@ +package delta_test + +import ( + "bytes" + "errors" + "math" + "testing" + + "lindenii.org/go/furgit/internal/format/packfile/delta" +) + +func TestParseHeaderSizesRoundTrip(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + baseSize uint64 + resultSize uint64 + }{ + {name: "zero", baseSize: 0, resultSize: 0}, + {name: "small", baseSize: 5, resultSize: 130}, + {name: "boundaries", baseSize: 127, resultSize: 128}, + {name: "large", baseSize: 1 << 32, resultSize: 1 << 57}, + {name: "max", baseSize: math.MaxUint64, resultSize: math.MaxUint64}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize) + + wantConsumed := len(data) + + data = append(data, 0xde, 0xad) + + baseSize, resultSize, consumed, err := delta.ParseHeaderSizes(data) + if err != nil { + t.Fatalf("ParseHeaderSizes: %v", err) + } + + if baseSize != tc.baseSize { + t.Fatalf("ParseHeaderSizes base size = %d, want %d", baseSize, tc.baseSize) + } + + if resultSize != tc.resultSize { + t.Fatalf("ParseHeaderSizes result size = %d, want %d", resultSize, tc.resultSize) + } + + if consumed != wantConsumed { + t.Fatalf("ParseHeaderSizes consumed = %d, want %d", consumed, wantConsumed) + } + }) + } +} + +func TestParseHeaderSizesMalformed(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + data []byte + }{ + {name: "empty", data: []byte{}}, + {name: "truncated first varint", data: []byte{0x80}}, + {name: "missing second varint", data: []byte{0x05}}, + {name: "truncated second varint", data: []byte{0x05, 0x80}}, + { + name: "overflow", + data: append(bytes.Repeat([]byte{0xff}, 9), 0x7f), + }, + { + name: "overlong", + data: append(bytes.Repeat([]byte{0x80}, 10), 0x00), + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + _, _, _, err := delta.ParseHeaderSizes(tc.data) + if !errors.Is(err, delta.ErrMalformedDelta) { + t.Fatalf("ParseHeaderSizes error = %v, want ErrMalformedDelta", err) + } + }) + } +} diff --git a/internal/format/packfile/entry_header.go b/internal/format/packfile/entry_header.go index 1428600b..e5f2d62d 100644 --- a/internal/format/packfile/entry_header.go +++ b/internal/format/packfile/entry_header.go @@ -13,12 +13,6 @@ import ( // or declares a size that overflows uint64. var ErrMalformedEntryHeader = errors.New("internal/format/packfile: malformed entry header") -// ErrInvalidHashSize reports that -// a supplied hash size is not a plausible object ID size. -// This indicates a caller bug, -// not malformed pack data. -var ErrInvalidHashSize = errors.New("internal/format/packfile: invalid hash size") - // MaxTypeSizeLen is the maximum encoded length // of the type/size prefix of an entry header. // Every uint64 size is encodable within this bound, @@ -73,7 +67,8 @@ type EntryHeader struct { // from the beginning of data. // // hashSize must be the object ID size -// of the pack's object format. +// of the pack's object format; +// ParseEntryHeader panics on implausible hash sizes. // // data need not contain the whole entry; // [MaxEntryHeaderLen] bytes always suffice. @@ -83,7 +78,7 @@ func ParseEntryHeader(data []byte, hashSize int) (EntryHeader, error) { var zero EntryHeader if hashSize <= 0 || hashSize > id.MaxObjectIDSize { - return zero, fmt.Errorf("%w: %d", ErrInvalidHashSize, hashSize) + panic("internal/format/packfile: invalid hash size") } if len(data) == 0 { diff --git a/internal/format/packfile/entry_header_test.go b/internal/format/packfile/entry_header_test.go index 807c544e..79dc2740 100644 --- a/internal/format/packfile/entry_header_test.go +++ b/internal/format/packfile/entry_header_test.go @@ -130,10 +130,15 @@ func TestParseEntryHeaderBadHashSize(t *testing.T) { t.Parallel() for _, hashSize := range []int{-1, 0, id.MaxObjectIDSize + 1} { - _, err := packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize) - if err == nil { - t.Fatalf("ParseEntryHeader hash size %d: expected error", hashSize) - } + func() { + defer func() { + if recover() == nil { + t.Fatalf("ParseEntryHeader hash size %d: expected panic", hashSize) + } + }() + + _, _ = packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize) + }() } } diff --git a/internal/format/packfile/header.go b/internal/format/packfile/header.go index 1a156f36..64c7d140 100644 --- a/internal/format/packfile/header.go +++ b/internal/format/packfile/header.go @@ -1,9 +1,63 @@ package packfile -// Signature is the 4-byte "PACK" magic at the start of pack files. -const Signature = 0x5041434b +import ( + "encoding/binary" + "errors" + "fmt" +) -// SupportedVersion reports whether a pack version is supported. -func SupportedVersion(version uint32) bool { - return version == 2 +const ( + // signature is the 4-byte "PACK" magic at the start of pack files. + signature = 0x5041434b + + // version is the only pack version this package reads or writes. + version = 2 +) + +// HeaderLen is the size of a pack header: +// signature, version, and object count. +const HeaderLen = 12 + +// ErrMalformedHeader reports that +// a pack header is truncated, +// or has a bad signature or unsupported version. +var ErrMalformedHeader = errors.New("internal/format/packfile: malformed pack header") + +// Header is one parsed pack header. +type Header struct { + // ObjectCount is the number of object entries + // declared to follow the header. + ObjectCount uint32 +} + +// ParseHeader parses and validates the pack header +// at the start of data. +func ParseHeader(data []byte) (Header, error) { + var zero Header + + if len(data) < HeaderLen { + return zero, fmt.Errorf("%w: truncated", ErrMalformedHeader) + } + + if binary.BigEndian.Uint32(data[0:4]) != signature { + return zero, fmt.Errorf("%w: bad signature", ErrMalformedHeader) + } + + parsedVersion := binary.BigEndian.Uint32(data[4:8]) + if parsedVersion != version { + return zero, fmt.Errorf("%w: unsupported version %d", ErrMalformedHeader, parsedVersion) + } + + return Header{ + ObjectCount: binary.BigEndian.Uint32(data[8:12]), + }, nil +} + +// AppendHeader appends an encoded pack header for objectCount to dst. +func AppendHeader(dst []byte, objectCount uint32) []byte { + dst = binary.BigEndian.AppendUint32(dst, signature) + dst = binary.BigEndian.AppendUint32(dst, version) + dst = binary.BigEndian.AppendUint32(dst, objectCount) + + return dst } |
