diff options
Diffstat (limited to 'internal/format/packfile')
| -rw-r--r-- | internal/format/packfile/delta/apply.go | 122 | ||||
| -rw-r--r-- | internal/format/packfile/delta/apply_test.go | 174 | ||||
| -rw-r--r-- | internal/format/packfile/delta/doc.go | 2 | ||||
| -rw-r--r-- | internal/format/packfile/delta/header.go | 98 | ||||
| -rw-r--r-- | internal/format/packfile/delta/header_test.go | 88 | ||||
| -rw-r--r-- | internal/format/packfile/doc.go | 2 | ||||
| -rw-r--r-- | internal/format/packfile/entry_header.go | 165 | ||||
| -rw-r--r-- | internal/format/packfile/entry_header_test.go | 251 | ||||
| -rw-r--r-- | internal/format/packfile/entry_type.go | 86 | ||||
| -rw-r--r-- | internal/format/packfile/header.go | 63 | ||||
| -rw-r--r-- | internal/format/packfile/ofs.go | 68 | ||||
| -rw-r--r-- | internal/format/packfile/ofs_test.go | 100 |
12 files changed, 1219 insertions, 0 deletions
diff --git a/internal/format/packfile/delta/apply.go b/internal/format/packfile/delta/apply.go new file mode 100644 index 00000000..4210d1b3 --- /dev/null +++ b/internal/format/packfile/delta/apply.go @@ -0,0 +1,122 @@ +package delta + +import ( + "fmt" + + "lindenii.org/go/lgo/intconv" +) + +// MaxChainDepth is the maximum supported delta chain length. +// Resolvers reject chains deeper than this +// to bound recursion depth and reconstruction work. +const MaxChainDepth = 1 << 12 + +// Apply applies one inflated delta payload to base +// and returns the reconstructed result. +// +// delta must include the leading base/result size header. +func Apply(base, delta []byte) ([]byte, error) { + baseSize, resultSize, pos, err := ParseHeaderSizes(delta) + if err != nil { + return nil, err + } + + if baseSize != uint64(len(base)) { + return nil, fmt.Errorf("%w: base size mismatch", ErrMalformedDelta) + } + + outLen, err := intconv.Uint64ToInt(resultSize) + if err != nil { + return nil, fmt.Errorf("%w: result size overflows int", ErrMalformedDelta) + } + + out := make([]byte, outLen) + outPos := 0 + + for pos < len(delta) { + op := delta[pos] + pos++ + + switch { + case op&0x80 != 0: + outPos, err = applyCopy(out, outPos, base, delta, &pos, op) + case op != 0: + outPos, err = applyInsert(out, outPos, delta, &pos, int(op)) + default: + err = fmt.Errorf("%w: invalid opcode 0", ErrMalformedDelta) + } + + if err != nil { + return nil, err + } + } + + if outPos != len(out) { + return nil, fmt.Errorf("%w: result size mismatch", ErrMalformedDelta) + } + + return out, nil +} + +// applyCopy executes one copy instruction, +// copying a base range into out, +// and returns the new output position. +func applyCopy(out []byte, outPos int, base, delta []byte, pos *int, op byte) (int, error) { + off, err := parseCopyOperand(delta, pos, op, 0, 4) + if err != nil { + return 0, err + } + + n, err := parseCopyOperand(delta, pos, op, 4, 3) + if err != nil { + return 0, err + } + + if n == 0 { + n = 0x10000 + } + + if off+n > len(base) || outPos+n > len(out) { + return 0, fmt.Errorf("%w: copy out of bounds", ErrMalformedDelta) + } + + copy(out[outPos:outPos+n], base[off:off+n]) + + return outPos + n, nil +} + +// applyInsert executes one insert instruction, +// copying n literal delta bytes into out, +// and returns the new output position. +func applyInsert(out []byte, outPos int, delta []byte, pos *int, n int) (int, error) { + if *pos+n > len(delta) || outPos+n > len(out) { + return 0, fmt.Errorf("%w: insert out of bounds", ErrMalformedDelta) + } + + copy(out[outPos:outPos+n], delta[*pos:*pos+n]) + *pos += n + + return outPos + n, nil +} + +// parseCopyOperand assembles one little-endian copy instruction operand +// from the operand bytes selected by op's flag bits +// firstBit through firstBit+count-1. +func parseCopyOperand(delta []byte, pos *int, op byte, firstBit uint, count int) (int, error) { + value := 0 + + for i := range count { + if op&(1<<(firstBit+uint(i))) == 0 { + continue + } + + if *pos >= len(delta) { + return 0, fmt.Errorf("%w: truncated copy operand", ErrMalformedDelta) + } + + value |= int(delta[*pos]) << (8 * i) + *pos++ + } + + return value, nil +} diff --git a/internal/format/packfile/delta/apply_test.go b/internal/format/packfile/delta/apply_test.go new file mode 100644 index 00000000..bdf454d9 --- /dev/null +++ b/internal/format/packfile/delta/apply_test.go @@ -0,0 +1,174 @@ +package delta_test + +import ( + "bytes" + "errors" + "testing" + + "lindenii.org/go/furgit/internal/format/packfile/delta" +) + +func TestApplyInsert(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, 0, 5) + data = append(data, 5, 'h', 'e', 'l', 'l', 'o') + + out, err := delta.Apply(nil, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if string(out) != "hello" { + t.Fatalf("Apply = %q, want %q", out, "hello") + } +} + +func TestApplyCopy(t *testing.T) { + t.Parallel() + + base := []byte("0123456789") + + // One offset byte and one size byte. + data := delta.AppendHeaderSizes(nil, 10, 4) + data = append(data, 0x91, 3, 4) + + out, err := delta.Apply(base, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if string(out) != "3456" { + t.Fatalf("Apply = %q, want %q", out, "3456") + } +} + +func TestApplyCopyImplicitSize(t *testing.T) { + t.Parallel() + + base := bytes.Repeat([]byte{0xab}, 0x10000+10) + + // Offset 0 and the implicit copy size 0x10000. + data := delta.AppendHeaderSizes(nil, uint64(len(base)), 0x10000) + data = append(data, 0x80) + + out, err := delta.Apply(base, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if !bytes.Equal(out, base[:0x10000]) { + t.Fatalf("Apply implicit-size copy mismatch") + } +} + +func TestApplyMixed(t *testing.T) { + t.Parallel() + + base := []byte("the quick brown fox") + + data := delta.AppendHeaderSizes(nil, uint64(len(base)), 9) + data = append(data, 0x91, 4, 5) // copy "quick" + data = append(data, 3, '!', '?', '!') // insert "!?!" + data = append(data, 0x91, 16, 1) // copy "f" + + out, err := delta.Apply(base, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if string(out) != "quick!?!f" { + t.Fatalf("Apply = %q, want %q", out, "quick!?!f") + } +} + +func TestApplyEmptyResult(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, 3, 0) + + out, err := delta.Apply([]byte("abc"), data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if len(out) != 0 { + t.Fatalf("Apply = %q, want empty", out) + } +} + +func TestApplyMalformed(t *testing.T) { + t.Parallel() + + base := []byte("0123456789") + + cases := []struct { + name string + ops []byte + baseSize uint64 + resultSize uint64 + }{ + { + name: "opcode zero", + ops: []byte{0x00}, + baseSize: 10, + resultSize: 1, + }, + { + name: "truncated copy operand", + ops: []byte{0x91, 3}, + baseSize: 10, + resultSize: 4, + }, + { + name: "copy past base", + ops: []byte{0x91, 8, 5}, + baseSize: 10, + resultSize: 5, + }, + { + name: "copy past result", + ops: []byte{0x91, 0, 5}, + baseSize: 10, + resultSize: 3, + }, + { + name: "insert past delta", + ops: []byte{5, 'a', 'b'}, + baseSize: 10, + resultSize: 5, + }, + { + name: "insert past result", + ops: []byte{5, 'a', 'b', 'c', 'd', 'e'}, + baseSize: 10, + resultSize: 3, + }, + { + name: "base size mismatch", + ops: []byte{0x91, 0, 1}, + baseSize: 9, + resultSize: 1, + }, + { + name: "result shorter than declared", + ops: []byte{0x91, 0, 1}, + baseSize: 10, + resultSize: 2, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize) + data = append(data, tc.ops...) + + _, err := delta.Apply(base, data) + if !errors.Is(err, delta.ErrMalformedDelta) { + t.Fatalf("Apply error = %v, want ErrMalformedDelta", err) + } + }) + } +} diff --git a/internal/format/packfile/delta/doc.go b/internal/format/packfile/delta/doc.go new file mode 100644 index 00000000..f63c96a8 --- /dev/null +++ b/internal/format/packfile/delta/doc.go @@ -0,0 +1,2 @@ +// Package delta provides various routines to handle Git delta compression. +package delta diff --git a/internal/format/packfile/delta/header.go b/internal/format/packfile/delta/header.go new file mode 100644 index 00000000..d5dd93fb --- /dev/null +++ b/internal/format/packfile/delta/header.go @@ -0,0 +1,98 @@ +package delta + +import ( + "errors" + "fmt" +) + +// ErrMalformedDelta reports that +// a delta payload is truncated, overlong, +// declares sizes that overflow uint64, +// contains invalid instructions, +// or does not match the supplied base or declared sizes. +var ErrMalformedDelta = errors.New("internal/format/packfile/delta: malformed delta") + +// MaxSizeVarintLen is the maximum encoded length +// of one delta header size varint. +// Every uint64 size is encodable within this bound, +// and parsing rejects longer encodings. +const MaxSizeVarintLen = 10 + +// MaxHeaderSizesLen is the maximum encoded length +// of the base/result size header of a delta payload. +// +// Callers reading a delta from a stream may inflate +// MaxHeaderSizesLen bytes +// (or fewer if the payload ends sooner) +// and parse with [ParseHeaderSizes]; +// no valid header is longer. +const MaxHeaderSizesLen = 2 * MaxSizeVarintLen + +// ParseHeaderSizes parses the base size and result size varints +// at the beginning of one inflated delta payload. +func ParseHeaderSizes(data []byte) (baseSize, resultSize uint64, consumed int, err error) { + baseSize, consumed, err = parseSizeVarint(data, 0) + if err != nil { + return 0, 0, 0, err + } + + resultSize, consumed, err = parseSizeVarint(data, consumed) + if err != nil { + return 0, 0, 0, err + } + + return baseSize, resultSize, consumed, nil +} + +// AppendHeaderSizes appends the base/result size header encoding +// of one delta payload to dst. +func AppendHeaderSizes(dst []byte, baseSize, resultSize uint64) []byte { + dst = appendSizeVarint(dst, baseSize) + + return appendSizeVarint(dst, resultSize) +} + +// appendSizeVarint appends the encoding of one delta header size varint to dst. +func appendSizeVarint(dst []byte, value uint64) []byte { + for value >= 0x80 { + dst = append(dst, byte(value)|0x80) + value >>= 7 + } + + return append(dst, byte(value)) +} + +// parseSizeVarint parses one delta header size varint +// beginning at pos, +// and returns the position past it. +func parseSizeVarint(data []byte, pos int) (value uint64, consumed int, err error) { + start := pos + + shift := uint(0) + + for { + if pos-start >= MaxSizeVarintLen { + return 0, 0, fmt.Errorf("%w: overlong size varint", ErrMalformedDelta) + } + + if pos >= len(data) { + return 0, 0, fmt.Errorf("%w: truncated size varint", ErrMalformedDelta) + } + + b := data[pos] + pos++ + + group := uint64(b & 0x7f) + if group<<shift>>shift != group { + return 0, 0, fmt.Errorf("%w: size overflows uint64", ErrMalformedDelta) + } + + value |= group << shift + + if b&0x80 == 0 { + return value, pos, nil + } + + shift += 7 + } +} diff --git a/internal/format/packfile/delta/header_test.go b/internal/format/packfile/delta/header_test.go new file mode 100644 index 00000000..8d97674f --- /dev/null +++ b/internal/format/packfile/delta/header_test.go @@ -0,0 +1,88 @@ +package delta_test + +import ( + "bytes" + "errors" + "math" + "testing" + + "lindenii.org/go/furgit/internal/format/packfile/delta" +) + +func TestParseHeaderSizesRoundTrip(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + baseSize uint64 + resultSize uint64 + }{ + {name: "zero", baseSize: 0, resultSize: 0}, + {name: "small", baseSize: 5, resultSize: 130}, + {name: "boundaries", baseSize: 127, resultSize: 128}, + {name: "large", baseSize: 1 << 32, resultSize: 1 << 57}, + {name: "max", baseSize: math.MaxUint64, resultSize: math.MaxUint64}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize) + + wantConsumed := len(data) + + data = append(data, 0xde, 0xad) + + baseSize, resultSize, consumed, err := delta.ParseHeaderSizes(data) + if err != nil { + t.Fatalf("ParseHeaderSizes: %v", err) + } + + if baseSize != tc.baseSize { + t.Fatalf("ParseHeaderSizes base size = %d, want %d", baseSize, tc.baseSize) + } + + if resultSize != tc.resultSize { + t.Fatalf("ParseHeaderSizes result size = %d, want %d", resultSize, tc.resultSize) + } + + if consumed != wantConsumed { + t.Fatalf("ParseHeaderSizes consumed = %d, want %d", consumed, wantConsumed) + } + }) + } +} + +func TestParseHeaderSizesMalformed(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + data []byte + }{ + {name: "empty", data: []byte{}}, + {name: "truncated first varint", data: []byte{0x80}}, + {name: "missing second varint", data: []byte{0x05}}, + {name: "truncated second varint", data: []byte{0x05, 0x80}}, + { + name: "overflow", + data: append(bytes.Repeat([]byte{0xff}, 9), 0x7f), + }, + { + name: "overlong", + data: append(bytes.Repeat([]byte{0x80}, 10), 0x00), + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + _, _, _, err := delta.ParseHeaderSizes(tc.data) + if !errors.Is(err, delta.ErrMalformedDelta) { + t.Fatalf("ParseHeaderSizes error = %v, want ErrMalformedDelta", err) + } + }) + } +} diff --git a/internal/format/packfile/doc.go b/internal/format/packfile/doc.go new file mode 100644 index 00000000..d656e256 --- /dev/null +++ b/internal/format/packfile/doc.go @@ -0,0 +1,2 @@ +// Package packfile provides Git packfile format parsing primitives. +package packfile diff --git a/internal/format/packfile/entry_header.go b/internal/format/packfile/entry_header.go new file mode 100644 index 00000000..e5f2d62d --- /dev/null +++ b/internal/format/packfile/entry_header.go @@ -0,0 +1,165 @@ +package packfile + +import ( + "errors" + "fmt" + + "lindenii.org/go/furgit/object/id" +) + +// ErrMalformedEntryHeader reports that +// a packfile entry header is truncated, overlong, +// has an unsupported entry type, +// or declares a size that overflows uint64. +var ErrMalformedEntryHeader = errors.New("internal/format/packfile: malformed entry header") + +// MaxTypeSizeLen is the maximum encoded length +// of the type/size prefix of an entry header. +// Every uint64 size is encodable within this bound, +// and [ParseEntryHeader] rejects longer prefixes. +const MaxTypeSizeLen = 10 + +// MaxEntryHeaderLen returns the maximum encoded length +// of a full entry header +// for packs whose object IDs are hashSize bytes. +// +// Callers parsing from a stream may buffer +// MaxEntryHeaderLen bytes +// (or fewer if the pack data ends sooner) +// and parse with [ParseEntryHeader]; +// no valid entry header is longer. +func MaxEntryHeaderLen(hashSize int) int { + return MaxTypeSizeLen + max(hashSize, MaxOfsDeltaDistanceLen) +} + +// EntryHeader is one parsed packfile entry header: +// everything from the start of the entry +// up to its zlib payload. +type EntryHeader struct { + // Type is the packfile entry type. + Type EntryType + + // Size is the declared inflated size + // of the entry's payload. + // For delta entries this is the delta size, + // not the reconstructed object size. + Size uint64 + + // HeaderLen is the number of bytes + // the header occupies in the pack; + // the zlib payload begins HeaderLen bytes + // after the start of the entry. + HeaderLen int + + // RefBase holds the base object ID + // for ref-delta entries. + // Only the first hashSize bytes are meaningful. + RefBase [id.MaxObjectIDSize]byte + + // OfsDistance is the backward distance + // from the start of this entry + // to the start of the base entry, + // for ofs-delta entries. + OfsDistance uint64 +} + +// ParseEntryHeader parses one packfile entry header +// from the beginning of data. +// +// hashSize must be the object ID size +// of the pack's object format; +// ParseEntryHeader panics on implausible hash sizes. +// +// data need not contain the whole entry; +// [MaxEntryHeaderLen] bytes always suffice. +// Headers of types [EntryTypeInvalid] and [EntryTypeFuture] +// are rejected as malformed. +func ParseEntryHeader(data []byte, hashSize int) (EntryHeader, error) { + var zero EntryHeader + + if hashSize <= 0 || hashSize > id.MaxObjectIDSize { + panic("internal/format/packfile: invalid hash size") + } + + if len(data) == 0 { + return zero, fmt.Errorf("%w: truncated type/size prefix", ErrMalformedEntryHeader) + } + + first := data[0] + header := EntryHeader{ + Type: EntryType((first >> 4) & 0x07), + Size: uint64(first & 0x0f), + HeaderLen: 1, + } + + shift := uint(4) + + b := first + for b&0x80 != 0 { + if header.HeaderLen >= MaxTypeSizeLen { + return zero, fmt.Errorf("%w: overlong type/size prefix", ErrMalformedEntryHeader) + } + + if header.HeaderLen >= len(data) { + return zero, fmt.Errorf("%w: truncated type/size prefix", ErrMalformedEntryHeader) + } + + b = data[header.HeaderLen] + header.HeaderLen++ + + group := uint64(b & 0x7f) + if group<<shift>>shift != group { + return zero, fmt.Errorf("%w: size overflows uint64", ErrMalformedEntryHeader) + } + + header.Size |= group << shift + shift += 7 + } + + switch header.Type { + case EntryTypeCommit, EntryTypeTree, EntryTypeBlob, EntryTypeTag: + // Base entries have nothing between the type/size prefix and the payload. + case EntryTypeRefDelta: + end := header.HeaderLen + hashSize + if end > len(data) { + return zero, fmt.Errorf("%w: truncated ref-delta base ID", ErrMalformedEntryHeader) + } + + copy(header.RefBase[:], data[header.HeaderLen:end]) + header.HeaderLen = end + case EntryTypeOfsDelta: + dist, consumed, err := ParseOfsDeltaDistance(data[header.HeaderLen:]) + if err != nil { + return zero, fmt.Errorf("%w: %w", ErrMalformedEntryHeader, err) + } + + header.OfsDistance = dist + header.HeaderLen += consumed + case EntryTypeInvalid, EntryTypeFuture: + return zero, fmt.Errorf("%w: unsupported entry type", ErrMalformedEntryHeader) + default: + return zero, fmt.Errorf("%w: unsupported entry type", ErrMalformedEntryHeader) + } + + return header, nil +} + +// AppendTypeSize appends the type/size prefix encoding +// of an entry header to dst. +// +// entryType must be a valid on-disk entry type; +// [EntryTypeInvalid] and [EntryTypeFuture] and +// values that do not fit in three bits +// produce garbage encodings. +func AppendTypeSize(dst []byte, entryType EntryType, size uint64) []byte { + b := byte(entryType)<<4 | byte(size&0x0f) + size >>= 4 + + for size != 0 { + dst = append(dst, b|0x80) + b = byte(size & 0x7f) + size >>= 7 + } + + return append(dst, b) +} diff --git a/internal/format/packfile/entry_header_test.go b/internal/format/packfile/entry_header_test.go new file mode 100644 index 00000000..79dc2740 --- /dev/null +++ b/internal/format/packfile/entry_header_test.go @@ -0,0 +1,251 @@ +package packfile_test + +import ( + "bytes" + "errors" + "math" + "testing" + + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/object/id" +) + +func TestParseEntryHeaderBase(t *testing.T) { + t.Parallel() + + // Commit entry of size 85: + // 0x95 has the continuation bit set, type 1, and size low bits 0x5; + // 0x05 contributes 5<<4. + header, err := packfile.ParseEntryHeader([]byte{0x95, 0x05}, 20) + if err != nil { + t.Fatalf("ParseEntryHeader: %v", err) + } + + if header.Type != packfile.EntryTypeCommit { + t.Fatalf("ParseEntryHeader type = %d, want %d", header.Type, packfile.EntryTypeCommit) + } + + if header.Size != 85 { + t.Fatalf("ParseEntryHeader size = %d, want 85", header.Size) + } + + if header.HeaderLen != 2 { + t.Fatalf("ParseEntryHeader header len = %d, want 2", header.HeaderLen) + } +} + +func TestParseEntryHeaderRefDelta(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + hashSize := objectFormat.Size() + + base := bytes.Repeat([]byte{0xab}, hashSize) + data := append([]byte{0x73}, base...) + + header, err := packfile.ParseEntryHeader(data, hashSize) + if err != nil { + t.Fatalf("ParseEntryHeader: %v", err) + } + + if header.Type != packfile.EntryTypeRefDelta { + t.Fatalf("ParseEntryHeader type = %d, want %d", header.Type, packfile.EntryTypeRefDelta) + } + + if header.Size != 3 { + t.Fatalf("ParseEntryHeader size = %d, want 3", header.Size) + } + + if header.HeaderLen != 1+hashSize { + t.Fatalf("ParseEntryHeader header len = %d, want %d", header.HeaderLen, 1+hashSize) + } + + if !bytes.Equal(header.RefBase[:hashSize], base) { + t.Fatalf("ParseEntryHeader ref base mismatch") + } + }) + } +} + +func TestParseEntryHeaderOfsDelta(t *testing.T) { + t.Parallel() + + header, err := packfile.ParseEntryHeader([]byte{0x65, 0x80, 0x00}, 20) + if err != nil { + t.Fatalf("ParseEntryHeader: %v", err) + } + + if header.Type != packfile.EntryTypeOfsDelta { + t.Fatalf("ParseEntryHeader type = %d, want %d", header.Type, packfile.EntryTypeOfsDelta) + } + + if header.OfsDistance != 128 { + t.Fatalf("ParseEntryHeader distance = %d, want 128", header.OfsDistance) + } + + if header.HeaderLen != 3 { + t.Fatalf("ParseEntryHeader header len = %d, want 3", header.HeaderLen) + } +} + +func TestParseEntryHeaderMalformed(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + data []byte + }{ + {name: "empty", data: []byte{}}, + {name: "truncated type/size", data: []byte{0x95}}, + { + name: "size overflow", + data: append([]byte{0x9f}, bytes.Repeat([]byte{0xff}, 9)...), + }, + { + name: "overlong type/size", + data: append([]byte{0x95}, append(bytes.Repeat([]byte{0x80}, 9), 0x00)...), + }, + {name: "truncated ref-delta base", data: []byte{0x73, 0xab, 0xab}}, + {name: "truncated ofs distance", data: []byte{0x65, 0x80}}, + {name: "type invalid", data: []byte{0x05}}, + {name: "type future", data: []byte{0x55}}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + _, err := packfile.ParseEntryHeader(tc.data, 20) + if !errors.Is(err, packfile.ErrMalformedEntryHeader) { + t.Fatalf("ParseEntryHeader error = %v, want ErrMalformedEntryHeader", err) + } + }) + } +} + +func TestParseEntryHeaderBadHashSize(t *testing.T) { + t.Parallel() + + for _, hashSize := range []int{-1, 0, id.MaxObjectIDSize + 1} { + func() { + defer func() { + if recover() == nil { + t.Fatalf("ParseEntryHeader hash size %d: expected panic", hashSize) + } + }() + + _, _ = packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize) + }() + } +} + +func TestTypeSizeRoundTrip(t *testing.T) { + t.Parallel() + + baseTypes := []packfile.EntryType{ + packfile.EntryTypeCommit, + packfile.EntryTypeTree, + packfile.EntryTypeBlob, + packfile.EntryTypeTag, + } + + sizes := []uint64{ + 0, 1, 15, 16, 127, 128, 1 << 20, 1 << 57, math.MaxUint64, + } + + for _, entryType := range baseTypes { + for _, size := range sizes { + data := packfile.AppendTypeSize(nil, entryType, size) + if len(data) > packfile.MaxTypeSizeLen { + t.Fatalf("AppendTypeSize(%d, %d) length = %d, want <= %d", + entryType, size, len(data), packfile.MaxTypeSizeLen) + } + + header, err := packfile.ParseEntryHeader(data, 20) + if err != nil { + t.Fatalf("ParseEntryHeader: %v", err) + } + + if header.Type != entryType { + t.Fatalf("round trip type = %d, want %d", header.Type, entryType) + } + + if header.Size != size { + t.Fatalf("round trip size = %d, want %d", header.Size, size) + } + + if header.HeaderLen != len(data) { + t.Fatalf("round trip header len = %d, want %d", header.HeaderLen, len(data)) + } + } + } +} + +func TestRefDeltaHeaderRoundTrip(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + hashSize := objectFormat.Size() + + base := bytes.Repeat([]byte{0xcd}, hashSize) + data := packfile.AppendTypeSize(nil, packfile.EntryTypeRefDelta, 42) + data = append(data, base...) + + header, err := packfile.ParseEntryHeader(data, hashSize) + if err != nil { + t.Fatalf("ParseEntryHeader: %v", err) + } + + if !bytes.Equal(header.RefBase[:hashSize], base) { + t.Fatalf("round trip ref base mismatch") + } + + if header.HeaderLen != len(data) { + t.Fatalf("round trip header len = %d, want %d", header.HeaderLen, len(data)) + } + }) + } +} + +func TestOfsDeltaHeaderRoundTrip(t *testing.T) { + t.Parallel() + + data := packfile.AppendTypeSize(nil, packfile.EntryTypeOfsDelta, 7) + data = packfile.AppendOfsDeltaDistance(data, 123456) + + header, err := packfile.ParseEntryHeader(data, 20) + if err != nil { + t.Fatalf("ParseEntryHeader: %v", err) + } + + if header.OfsDistance != 123456 { + t.Fatalf("round trip distance = %d, want 123456", header.OfsDistance) + } + + if header.HeaderLen != len(data) { + t.Fatalf("round trip header len = %d, want %d", header.HeaderLen, len(data)) + } +} + +func TestMaxEntryHeaderLenBounds(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + hashSize := objectFormat.Size() + + maxLen := packfile.MaxEntryHeaderLen(hashSize) + if maxLen < packfile.MaxTypeSizeLen+hashSize { + t.Fatalf("MaxEntryHeaderLen(%d) = %d cannot fit a ref-delta header", hashSize, maxLen) + } + + if maxLen < packfile.MaxTypeSizeLen+packfile.MaxOfsDeltaDistanceLen { + t.Fatalf("MaxEntryHeaderLen(%d) = %d cannot fit an ofs-delta header", hashSize, maxLen) + } + } +} diff --git a/internal/format/packfile/entry_type.go b/internal/format/packfile/entry_type.go new file mode 100644 index 00000000..b4baceb4 --- /dev/null +++ b/internal/format/packfile/entry_type.go @@ -0,0 +1,86 @@ +package packfile + +import ( + "errors" + + "lindenii.org/go/furgit/object/typ" +) + +var ( + // ErrInternalEntryType reports that + // a supplied packfile [EntryType] + // cannot be converted into an ordinary [typ.Type] + // since it is a packfile implementation detail. + ErrInternalEntryType = errors.New("internal/format/packfile: packfile-internal entry type cannot be converted to an object type") + + // ErrUnrepresentableObjectType reports that + // a supplied ordinary [typ.Type] + // is not currently representable + // as a packfile [EntryType]. + ErrUnrepresentableObjectType = errors.New("internal/format/packfile: object type not representable in packfiles") +) + +// EntryType represents the type of an entry in a git packfile. +type EntryType uint8 + +const ( + EntryTypeInvalid EntryType = 0 + EntryTypeCommit EntryType = 1 + EntryTypeTree EntryType = 2 + EntryTypeBlob EntryType = 3 + EntryTypeTag EntryType = 4 + EntryTypeFuture EntryType = 5 + EntryTypeOfsDelta EntryType = 6 + EntryTypeRefDelta EntryType = 7 +) + +// EntryTypeFromObjectType converts an ordinary [typ.Type] into a packfile [EntryType]. +func EntryTypeFromObjectType(ty typ.Type) (EntryType, error) { + switch ty { + case typ.Commit: + return EntryTypeCommit, nil + case typ.Tree: + return EntryTypeTree, nil + case typ.Blob: + return EntryTypeBlob, nil + case typ.Tag: + return EntryTypeTag, nil + case typ.Unknown: + } + + return EntryTypeInvalid, ErrUnrepresentableObjectType +} + +// ObjectType converts a packfile [EntryType] into an ordinary [typ.Type]. +func (entryType EntryType) ObjectType() (typ.Type, error) { + switch entryType { + case EntryTypeCommit: + return typ.Commit, nil + case EntryTypeTree: + return typ.Tree, nil + case EntryTypeBlob: + return typ.Blob, nil + case EntryTypeTag: + return typ.Tag, nil + case EntryTypeInvalid, EntryTypeFuture, EntryTypeOfsDelta, EntryTypeRefDelta: + } + + return typ.Unknown, ErrInternalEntryType +} + +// IsBase reports whether the entry type +// is a base (non-delta) object type. +func (entryType EntryType) IsBase() bool { + switch entryType { + case EntryTypeCommit, EntryTypeTree, EntryTypeBlob, EntryTypeTag: + return true + case EntryTypeInvalid, EntryTypeFuture, EntryTypeOfsDelta, EntryTypeRefDelta: + } + + return false +} + +// IsDelta reports whether the entry type is a delta type. +func (entryType EntryType) IsDelta() bool { + return entryType == EntryTypeOfsDelta || entryType == EntryTypeRefDelta +} diff --git a/internal/format/packfile/header.go b/internal/format/packfile/header.go new file mode 100644 index 00000000..64c7d140 --- /dev/null +++ b/internal/format/packfile/header.go @@ -0,0 +1,63 @@ +package packfile + +import ( + "encoding/binary" + "errors" + "fmt" +) + +const ( + // signature is the 4-byte "PACK" magic at the start of pack files. + signature = 0x5041434b + + // version is the only pack version this package reads or writes. + version = 2 +) + +// HeaderLen is the size of a pack header: +// signature, version, and object count. +const HeaderLen = 12 + +// ErrMalformedHeader reports that +// a pack header is truncated, +// or has a bad signature or unsupported version. +var ErrMalformedHeader = errors.New("internal/format/packfile: malformed pack header") + +// Header is one parsed pack header. +type Header struct { + // ObjectCount is the number of object entries + // declared to follow the header. + ObjectCount uint32 +} + +// ParseHeader parses and validates the pack header +// at the start of data. +func ParseHeader(data []byte) (Header, error) { + var zero Header + + if len(data) < HeaderLen { + return zero, fmt.Errorf("%w: truncated", ErrMalformedHeader) + } + + if binary.BigEndian.Uint32(data[0:4]) != signature { + return zero, fmt.Errorf("%w: bad signature", ErrMalformedHeader) + } + + parsedVersion := binary.BigEndian.Uint32(data[4:8]) + if parsedVersion != version { + return zero, fmt.Errorf("%w: unsupported version %d", ErrMalformedHeader, parsedVersion) + } + + return Header{ + ObjectCount: binary.BigEndian.Uint32(data[8:12]), + }, nil +} + +// AppendHeader appends an encoded pack header for objectCount to dst. +func AppendHeader(dst []byte, objectCount uint32) []byte { + dst = binary.BigEndian.AppendUint32(dst, signature) + dst = binary.BigEndian.AppendUint32(dst, version) + dst = binary.BigEndian.AppendUint32(dst, objectCount) + + return dst +} diff --git a/internal/format/packfile/ofs.go b/internal/format/packfile/ofs.go new file mode 100644 index 00000000..5fb8ecdd --- /dev/null +++ b/internal/format/packfile/ofs.go @@ -0,0 +1,68 @@ +package packfile + +import ( + "errors" + "fmt" + "math" +) + +// ErrMalformedOfsDeltaDistance reports that +// an ofs-delta backward distance encoding +// is truncated, overlong, or overflows uint64. +var ErrMalformedOfsDeltaDistance = errors.New("internal/format/packfile: malformed ofs-delta distance") + +// MaxOfsDeltaDistanceLen is the maximum encoded length +// of an ofs-delta backward distance. +// Every uint64 distance is encodable within this bound, +// and [ParseOfsDeltaDistance] rejects longer encodings. +const MaxOfsDeltaDistanceLen = 10 + +// ParseOfsDeltaDistance parses an ofs-delta backward distance. +func ParseOfsDeltaDistance(buf []byte) (dist uint64, consumed int, err error) { + if len(buf) == 0 { + return 0, 0, fmt.Errorf("%w: truncated", ErrMalformedOfsDeltaDistance) + } + + b := buf[0] + dist = uint64(b & 0x7f) + + consumed = 1 + for b&0x80 != 0 { + if consumed >= MaxOfsDeltaDistanceLen { + return 0, 0, fmt.Errorf("%w: overlong encoding", ErrMalformedOfsDeltaDistance) + } + + if consumed >= len(buf) { + return 0, 0, fmt.Errorf("%w: truncated", ErrMalformedOfsDeltaDistance) + } + + if dist >= math.MaxUint64>>7 { + return 0, 0, fmt.Errorf("%w: overflows uint64", ErrMalformedOfsDeltaDistance) + } + + b = buf[consumed] + consumed++ + dist = ((dist + 1) << 7) | uint64(b&0x7f) + } + + return dist, consumed, nil +} + +// AppendOfsDeltaDistance appends the encoding of +// an ofs-delta backward distance to dst. +func AppendOfsDeltaDistance(dst []byte, dist uint64) []byte { + var buf [MaxOfsDeltaDistanceLen]byte + + pos := len(buf) - 1 + buf[pos] = byte(dist & 0x7f) + + dist >>= 7 + for dist != 0 { + dist-- + pos-- + buf[pos] = 0x80 | byte(dist&0x7f) + dist >>= 7 + } + + return append(dst, buf[pos:]...) +} diff --git a/internal/format/packfile/ofs_test.go b/internal/format/packfile/ofs_test.go new file mode 100644 index 00000000..736f1ac9 --- /dev/null +++ b/internal/format/packfile/ofs_test.go @@ -0,0 +1,100 @@ +package packfile_test + +import ( + "bytes" + "errors" + "math" + "testing" + + "lindenii.org/go/furgit/internal/format/packfile" +) + +func TestParseOfsDeltaDistance(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + data []byte + dist uint64 + consumed int + }{ + {name: "zero", data: []byte{0x00}, dist: 0, consumed: 1}, + {name: "one byte max", data: []byte{0x7f}, dist: 127, consumed: 1}, + {name: "two byte min", data: []byte{0x80, 0x00}, dist: 128, consumed: 2}, + {name: "two byte max", data: []byte{0xff, 0x7f}, dist: 16511, consumed: 2}, + {name: "trailing bytes ignored", data: []byte{0x7f, 0xde, 0xad}, dist: 127, consumed: 1}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + dist, consumed, err := packfile.ParseOfsDeltaDistance(tc.data) + if err != nil { + t.Fatalf("ParseOfsDeltaDistance: %v", err) + } + + if dist != tc.dist { + t.Fatalf("ParseOfsDeltaDistance = %d, want %d", dist, tc.dist) + } + + if consumed != tc.consumed { + t.Fatalf("ParseOfsDeltaDistance consumed = %d, want %d", consumed, tc.consumed) + } + }) + } +} + +func TestParseOfsDeltaDistanceMalformed(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + data []byte + }{ + {name: "empty", data: []byte{}}, + {name: "truncated", data: []byte{0x80}}, + {name: "overflow", data: bytes.Repeat([]byte{0xff}, 10)}, + {name: "overlong", data: append(bytes.Repeat([]byte{0x80}, 10), 0x00)}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + _, _, err := packfile.ParseOfsDeltaDistance(tc.data) + if !errors.Is(err, packfile.ErrMalformedOfsDeltaDistance) { + t.Fatalf("ParseOfsDeltaDistance error = %v, want ErrMalformedOfsDeltaDistance", err) + } + }) + } +} + +func TestOfsDeltaDistanceRoundTrip(t *testing.T) { + t.Parallel() + + distances := []uint64{ + 0, 1, 127, 128, 16511, 16512, 1 << 31, 1 << 57, math.MaxUint64, + } + + for _, dist := range distances { + data := packfile.AppendOfsDeltaDistance(nil, dist) + if len(data) > packfile.MaxOfsDeltaDistanceLen { + t.Fatalf("AppendOfsDeltaDistance(%d) length = %d, want <= %d", + dist, len(data), packfile.MaxOfsDeltaDistanceLen) + } + + got, consumed, err := packfile.ParseOfsDeltaDistance(data) + if err != nil { + t.Fatalf("ParseOfsDeltaDistance: %v", err) + } + + if got != dist { + t.Fatalf("round trip = %d, want %d", got, dist) + } + + if consumed != len(data) { + t.Fatalf("round trip consumed = %d, want %d", consumed, len(data)) + } + } +} |
