aboutsummaryrefslogtreecommitdiff
path: root/internal/format/packfile
diff options
context:
space:
mode:
Diffstat (limited to 'internal/format/packfile')
-rw-r--r--internal/format/packfile/delta/apply.go122
-rw-r--r--internal/format/packfile/delta/apply_test.go174
-rw-r--r--internal/format/packfile/delta/doc.go2
-rw-r--r--internal/format/packfile/delta/header.go98
-rw-r--r--internal/format/packfile/delta/header_test.go88
-rw-r--r--internal/format/packfile/doc.go2
-rw-r--r--internal/format/packfile/entry_header.go165
-rw-r--r--internal/format/packfile/entry_header_test.go251
-rw-r--r--internal/format/packfile/entry_type.go86
-rw-r--r--internal/format/packfile/header.go63
-rw-r--r--internal/format/packfile/ofs.go68
-rw-r--r--internal/format/packfile/ofs_test.go100
12 files changed, 1219 insertions, 0 deletions
diff --git a/internal/format/packfile/delta/apply.go b/internal/format/packfile/delta/apply.go
new file mode 100644
index 00000000..4210d1b3
--- /dev/null
+++ b/internal/format/packfile/delta/apply.go
@@ -0,0 +1,122 @@
+package delta
+
+import (
+ "fmt"
+
+ "lindenii.org/go/lgo/intconv"
+)
+
+// MaxChainDepth is the maximum supported delta chain length.
+// Resolvers reject chains deeper than this
+// to bound recursion depth and reconstruction work.
+const MaxChainDepth = 1 << 12
+
+// Apply applies one inflated delta payload to base
+// and returns the reconstructed result.
+//
+// delta must include the leading base/result size header.
+func Apply(base, delta []byte) ([]byte, error) {
+ baseSize, resultSize, pos, err := ParseHeaderSizes(delta)
+ if err != nil {
+ return nil, err
+ }
+
+ if baseSize != uint64(len(base)) {
+ return nil, fmt.Errorf("%w: base size mismatch", ErrMalformedDelta)
+ }
+
+ outLen, err := intconv.Uint64ToInt(resultSize)
+ if err != nil {
+ return nil, fmt.Errorf("%w: result size overflows int", ErrMalformedDelta)
+ }
+
+ out := make([]byte, outLen)
+ outPos := 0
+
+ for pos < len(delta) {
+ op := delta[pos]
+ pos++
+
+ switch {
+ case op&0x80 != 0:
+ outPos, err = applyCopy(out, outPos, base, delta, &pos, op)
+ case op != 0:
+ outPos, err = applyInsert(out, outPos, delta, &pos, int(op))
+ default:
+ err = fmt.Errorf("%w: invalid opcode 0", ErrMalformedDelta)
+ }
+
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if outPos != len(out) {
+ return nil, fmt.Errorf("%w: result size mismatch", ErrMalformedDelta)
+ }
+
+ return out, nil
+}
+
+// applyCopy executes one copy instruction,
+// copying a base range into out,
+// and returns the new output position.
+func applyCopy(out []byte, outPos int, base, delta []byte, pos *int, op byte) (int, error) {
+ off, err := parseCopyOperand(delta, pos, op, 0, 4)
+ if err != nil {
+ return 0, err
+ }
+
+ n, err := parseCopyOperand(delta, pos, op, 4, 3)
+ if err != nil {
+ return 0, err
+ }
+
+ if n == 0 {
+ n = 0x10000
+ }
+
+ if off+n > len(base) || outPos+n > len(out) {
+ return 0, fmt.Errorf("%w: copy out of bounds", ErrMalformedDelta)
+ }
+
+ copy(out[outPos:outPos+n], base[off:off+n])
+
+ return outPos + n, nil
+}
+
+// applyInsert executes one insert instruction,
+// copying n literal delta bytes into out,
+// and returns the new output position.
+func applyInsert(out []byte, outPos int, delta []byte, pos *int, n int) (int, error) {
+ if *pos+n > len(delta) || outPos+n > len(out) {
+ return 0, fmt.Errorf("%w: insert out of bounds", ErrMalformedDelta)
+ }
+
+ copy(out[outPos:outPos+n], delta[*pos:*pos+n])
+ *pos += n
+
+ return outPos + n, nil
+}
+
+// parseCopyOperand assembles one little-endian copy instruction operand
+// from the operand bytes selected by op's flag bits
+// firstBit through firstBit+count-1.
+func parseCopyOperand(delta []byte, pos *int, op byte, firstBit uint, count int) (int, error) {
+ value := 0
+
+ for i := range count {
+ if op&(1<<(firstBit+uint(i))) == 0 {
+ continue
+ }
+
+ if *pos >= len(delta) {
+ return 0, fmt.Errorf("%w: truncated copy operand", ErrMalformedDelta)
+ }
+
+ value |= int(delta[*pos]) << (8 * i)
+ *pos++
+ }
+
+ return value, nil
+}
diff --git a/internal/format/packfile/delta/apply_test.go b/internal/format/packfile/delta/apply_test.go
new file mode 100644
index 00000000..bdf454d9
--- /dev/null
+++ b/internal/format/packfile/delta/apply_test.go
@@ -0,0 +1,174 @@
+package delta_test
+
+import (
+ "bytes"
+ "errors"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile/delta"
+)
+
+func TestApplyInsert(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, 0, 5)
+ data = append(data, 5, 'h', 'e', 'l', 'l', 'o')
+
+ out, err := delta.Apply(nil, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "hello" {
+ t.Fatalf("Apply = %q, want %q", out, "hello")
+ }
+}
+
+func TestApplyCopy(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("0123456789")
+
+ // One offset byte and one size byte.
+ data := delta.AppendHeaderSizes(nil, 10, 4)
+ data = append(data, 0x91, 3, 4)
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "3456" {
+ t.Fatalf("Apply = %q, want %q", out, "3456")
+ }
+}
+
+func TestApplyCopyImplicitSize(t *testing.T) {
+ t.Parallel()
+
+ base := bytes.Repeat([]byte{0xab}, 0x10000+10)
+
+ // Offset 0 and the implicit copy size 0x10000.
+ data := delta.AppendHeaderSizes(nil, uint64(len(base)), 0x10000)
+ data = append(data, 0x80)
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if !bytes.Equal(out, base[:0x10000]) {
+ t.Fatalf("Apply implicit-size copy mismatch")
+ }
+}
+
+func TestApplyMixed(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("the quick brown fox")
+
+ data := delta.AppendHeaderSizes(nil, uint64(len(base)), 9)
+ data = append(data, 0x91, 4, 5) // copy "quick"
+ data = append(data, 3, '!', '?', '!') // insert "!?!"
+ data = append(data, 0x91, 16, 1) // copy "f"
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "quick!?!f" {
+ t.Fatalf("Apply = %q, want %q", out, "quick!?!f")
+ }
+}
+
+func TestApplyEmptyResult(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, 3, 0)
+
+ out, err := delta.Apply([]byte("abc"), data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if len(out) != 0 {
+ t.Fatalf("Apply = %q, want empty", out)
+ }
+}
+
+func TestApplyMalformed(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("0123456789")
+
+ cases := []struct {
+ name string
+ ops []byte
+ baseSize uint64
+ resultSize uint64
+ }{
+ {
+ name: "opcode zero",
+ ops: []byte{0x00},
+ baseSize: 10,
+ resultSize: 1,
+ },
+ {
+ name: "truncated copy operand",
+ ops: []byte{0x91, 3},
+ baseSize: 10,
+ resultSize: 4,
+ },
+ {
+ name: "copy past base",
+ ops: []byte{0x91, 8, 5},
+ baseSize: 10,
+ resultSize: 5,
+ },
+ {
+ name: "copy past result",
+ ops: []byte{0x91, 0, 5},
+ baseSize: 10,
+ resultSize: 3,
+ },
+ {
+ name: "insert past delta",
+ ops: []byte{5, 'a', 'b'},
+ baseSize: 10,
+ resultSize: 5,
+ },
+ {
+ name: "insert past result",
+ ops: []byte{5, 'a', 'b', 'c', 'd', 'e'},
+ baseSize: 10,
+ resultSize: 3,
+ },
+ {
+ name: "base size mismatch",
+ ops: []byte{0x91, 0, 1},
+ baseSize: 9,
+ resultSize: 1,
+ },
+ {
+ name: "result shorter than declared",
+ ops: []byte{0x91, 0, 1},
+ baseSize: 10,
+ resultSize: 2,
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize)
+ data = append(data, tc.ops...)
+
+ _, err := delta.Apply(base, data)
+ if !errors.Is(err, delta.ErrMalformedDelta) {
+ t.Fatalf("Apply error = %v, want ErrMalformedDelta", err)
+ }
+ })
+ }
+}
diff --git a/internal/format/packfile/delta/doc.go b/internal/format/packfile/delta/doc.go
new file mode 100644
index 00000000..f63c96a8
--- /dev/null
+++ b/internal/format/packfile/delta/doc.go
@@ -0,0 +1,2 @@
+// Package delta provides various routines to handle Git delta compression.
+package delta
diff --git a/internal/format/packfile/delta/header.go b/internal/format/packfile/delta/header.go
new file mode 100644
index 00000000..d5dd93fb
--- /dev/null
+++ b/internal/format/packfile/delta/header.go
@@ -0,0 +1,98 @@
+package delta
+
+import (
+ "errors"
+ "fmt"
+)
+
+// ErrMalformedDelta reports that
+// a delta payload is truncated, overlong,
+// declares sizes that overflow uint64,
+// contains invalid instructions,
+// or does not match the supplied base or declared sizes.
+var ErrMalformedDelta = errors.New("internal/format/packfile/delta: malformed delta")
+
+// MaxSizeVarintLen is the maximum encoded length
+// of one delta header size varint.
+// Every uint64 size is encodable within this bound,
+// and parsing rejects longer encodings.
+const MaxSizeVarintLen = 10
+
+// MaxHeaderSizesLen is the maximum encoded length
+// of the base/result size header of a delta payload.
+//
+// Callers reading a delta from a stream may inflate
+// MaxHeaderSizesLen bytes
+// (or fewer if the payload ends sooner)
+// and parse with [ParseHeaderSizes];
+// no valid header is longer.
+const MaxHeaderSizesLen = 2 * MaxSizeVarintLen
+
+// ParseHeaderSizes parses the base size and result size varints
+// at the beginning of one inflated delta payload.
+func ParseHeaderSizes(data []byte) (baseSize, resultSize uint64, consumed int, err error) {
+ baseSize, consumed, err = parseSizeVarint(data, 0)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+
+ resultSize, consumed, err = parseSizeVarint(data, consumed)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+
+ return baseSize, resultSize, consumed, nil
+}
+
+// AppendHeaderSizes appends the base/result size header encoding
+// of one delta payload to dst.
+func AppendHeaderSizes(dst []byte, baseSize, resultSize uint64) []byte {
+ dst = appendSizeVarint(dst, baseSize)
+
+ return appendSizeVarint(dst, resultSize)
+}
+
+// appendSizeVarint appends the encoding of one delta header size varint to dst.
+func appendSizeVarint(dst []byte, value uint64) []byte {
+ for value >= 0x80 {
+ dst = append(dst, byte(value)|0x80)
+ value >>= 7
+ }
+
+ return append(dst, byte(value))
+}
+
+// parseSizeVarint parses one delta header size varint
+// beginning at pos,
+// and returns the position past it.
+func parseSizeVarint(data []byte, pos int) (value uint64, consumed int, err error) {
+ start := pos
+
+ shift := uint(0)
+
+ for {
+ if pos-start >= MaxSizeVarintLen {
+ return 0, 0, fmt.Errorf("%w: overlong size varint", ErrMalformedDelta)
+ }
+
+ if pos >= len(data) {
+ return 0, 0, fmt.Errorf("%w: truncated size varint", ErrMalformedDelta)
+ }
+
+ b := data[pos]
+ pos++
+
+ group := uint64(b & 0x7f)
+ if group<<shift>>shift != group {
+ return 0, 0, fmt.Errorf("%w: size overflows uint64", ErrMalformedDelta)
+ }
+
+ value |= group << shift
+
+ if b&0x80 == 0 {
+ return value, pos, nil
+ }
+
+ shift += 7
+ }
+}
diff --git a/internal/format/packfile/delta/header_test.go b/internal/format/packfile/delta/header_test.go
new file mode 100644
index 00000000..8d97674f
--- /dev/null
+++ b/internal/format/packfile/delta/header_test.go
@@ -0,0 +1,88 @@
+package delta_test
+
+import (
+ "bytes"
+ "errors"
+ "math"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile/delta"
+)
+
+func TestParseHeaderSizesRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ baseSize uint64
+ resultSize uint64
+ }{
+ {name: "zero", baseSize: 0, resultSize: 0},
+ {name: "small", baseSize: 5, resultSize: 130},
+ {name: "boundaries", baseSize: 127, resultSize: 128},
+ {name: "large", baseSize: 1 << 32, resultSize: 1 << 57},
+ {name: "max", baseSize: math.MaxUint64, resultSize: math.MaxUint64},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize)
+
+ wantConsumed := len(data)
+
+ data = append(data, 0xde, 0xad)
+
+ baseSize, resultSize, consumed, err := delta.ParseHeaderSizes(data)
+ if err != nil {
+ t.Fatalf("ParseHeaderSizes: %v", err)
+ }
+
+ if baseSize != tc.baseSize {
+ t.Fatalf("ParseHeaderSizes base size = %d, want %d", baseSize, tc.baseSize)
+ }
+
+ if resultSize != tc.resultSize {
+ t.Fatalf("ParseHeaderSizes result size = %d, want %d", resultSize, tc.resultSize)
+ }
+
+ if consumed != wantConsumed {
+ t.Fatalf("ParseHeaderSizes consumed = %d, want %d", consumed, wantConsumed)
+ }
+ })
+ }
+}
+
+func TestParseHeaderSizesMalformed(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ data []byte
+ }{
+ {name: "empty", data: []byte{}},
+ {name: "truncated first varint", data: []byte{0x80}},
+ {name: "missing second varint", data: []byte{0x05}},
+ {name: "truncated second varint", data: []byte{0x05, 0x80}},
+ {
+ name: "overflow",
+ data: append(bytes.Repeat([]byte{0xff}, 9), 0x7f),
+ },
+ {
+ name: "overlong",
+ data: append(bytes.Repeat([]byte{0x80}, 10), 0x00),
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ _, _, _, err := delta.ParseHeaderSizes(tc.data)
+ if !errors.Is(err, delta.ErrMalformedDelta) {
+ t.Fatalf("ParseHeaderSizes error = %v, want ErrMalformedDelta", err)
+ }
+ })
+ }
+}
diff --git a/internal/format/packfile/doc.go b/internal/format/packfile/doc.go
new file mode 100644
index 00000000..d656e256
--- /dev/null
+++ b/internal/format/packfile/doc.go
@@ -0,0 +1,2 @@
+// Package packfile provides Git packfile format parsing primitives.
+package packfile
diff --git a/internal/format/packfile/entry_header.go b/internal/format/packfile/entry_header.go
new file mode 100644
index 00000000..e5f2d62d
--- /dev/null
+++ b/internal/format/packfile/entry_header.go
@@ -0,0 +1,165 @@
+package packfile
+
+import (
+ "errors"
+ "fmt"
+
+ "lindenii.org/go/furgit/object/id"
+)
+
+// ErrMalformedEntryHeader reports that
+// a packfile entry header is truncated, overlong,
+// has an unsupported entry type,
+// or declares a size that overflows uint64.
+var ErrMalformedEntryHeader = errors.New("internal/format/packfile: malformed entry header")
+
+// MaxTypeSizeLen is the maximum encoded length
+// of the type/size prefix of an entry header.
+// Every uint64 size is encodable within this bound,
+// and [ParseEntryHeader] rejects longer prefixes.
+const MaxTypeSizeLen = 10
+
+// MaxEntryHeaderLen returns the maximum encoded length
+// of a full entry header
+// for packs whose object IDs are hashSize bytes.
+//
+// Callers parsing from a stream may buffer
+// MaxEntryHeaderLen bytes
+// (or fewer if the pack data ends sooner)
+// and parse with [ParseEntryHeader];
+// no valid entry header is longer.
+func MaxEntryHeaderLen(hashSize int) int {
+ return MaxTypeSizeLen + max(hashSize, MaxOfsDeltaDistanceLen)
+}
+
+// EntryHeader is one parsed packfile entry header:
+// everything from the start of the entry
+// up to its zlib payload.
+type EntryHeader struct {
+ // Type is the packfile entry type.
+ Type EntryType
+
+ // Size is the declared inflated size
+ // of the entry's payload.
+ // For delta entries this is the delta size,
+ // not the reconstructed object size.
+ Size uint64
+
+ // HeaderLen is the number of bytes
+ // the header occupies in the pack;
+ // the zlib payload begins HeaderLen bytes
+ // after the start of the entry.
+ HeaderLen int
+
+ // RefBase holds the base object ID
+ // for ref-delta entries.
+ // Only the first hashSize bytes are meaningful.
+ RefBase [id.MaxObjectIDSize]byte
+
+ // OfsDistance is the backward distance
+ // from the start of this entry
+ // to the start of the base entry,
+ // for ofs-delta entries.
+ OfsDistance uint64
+}
+
+// ParseEntryHeader parses one packfile entry header
+// from the beginning of data.
+//
+// hashSize must be the object ID size
+// of the pack's object format;
+// ParseEntryHeader panics on implausible hash sizes.
+//
+// data need not contain the whole entry;
+// [MaxEntryHeaderLen] bytes always suffice.
+// Headers of types [EntryTypeInvalid] and [EntryTypeFuture]
+// are rejected as malformed.
+func ParseEntryHeader(data []byte, hashSize int) (EntryHeader, error) {
+ var zero EntryHeader
+
+ if hashSize <= 0 || hashSize > id.MaxObjectIDSize {
+ panic("internal/format/packfile: invalid hash size")
+ }
+
+ if len(data) == 0 {
+ return zero, fmt.Errorf("%w: truncated type/size prefix", ErrMalformedEntryHeader)
+ }
+
+ first := data[0]
+ header := EntryHeader{
+ Type: EntryType((first >> 4) & 0x07),
+ Size: uint64(first & 0x0f),
+ HeaderLen: 1,
+ }
+
+ shift := uint(4)
+
+ b := first
+ for b&0x80 != 0 {
+ if header.HeaderLen >= MaxTypeSizeLen {
+ return zero, fmt.Errorf("%w: overlong type/size prefix", ErrMalformedEntryHeader)
+ }
+
+ if header.HeaderLen >= len(data) {
+ return zero, fmt.Errorf("%w: truncated type/size prefix", ErrMalformedEntryHeader)
+ }
+
+ b = data[header.HeaderLen]
+ header.HeaderLen++
+
+ group := uint64(b & 0x7f)
+ if group<<shift>>shift != group {
+ return zero, fmt.Errorf("%w: size overflows uint64", ErrMalformedEntryHeader)
+ }
+
+ header.Size |= group << shift
+ shift += 7
+ }
+
+ switch header.Type {
+ case EntryTypeCommit, EntryTypeTree, EntryTypeBlob, EntryTypeTag:
+ // Base entries have nothing between the type/size prefix and the payload.
+ case EntryTypeRefDelta:
+ end := header.HeaderLen + hashSize
+ if end > len(data) {
+ return zero, fmt.Errorf("%w: truncated ref-delta base ID", ErrMalformedEntryHeader)
+ }
+
+ copy(header.RefBase[:], data[header.HeaderLen:end])
+ header.HeaderLen = end
+ case EntryTypeOfsDelta:
+ dist, consumed, err := ParseOfsDeltaDistance(data[header.HeaderLen:])
+ if err != nil {
+ return zero, fmt.Errorf("%w: %w", ErrMalformedEntryHeader, err)
+ }
+
+ header.OfsDistance = dist
+ header.HeaderLen += consumed
+ case EntryTypeInvalid, EntryTypeFuture:
+ return zero, fmt.Errorf("%w: unsupported entry type", ErrMalformedEntryHeader)
+ default:
+ return zero, fmt.Errorf("%w: unsupported entry type", ErrMalformedEntryHeader)
+ }
+
+ return header, nil
+}
+
+// AppendTypeSize appends the type/size prefix encoding
+// of an entry header to dst.
+//
+// entryType must be a valid on-disk entry type;
+// [EntryTypeInvalid] and [EntryTypeFuture] and
+// values that do not fit in three bits
+// produce garbage encodings.
+func AppendTypeSize(dst []byte, entryType EntryType, size uint64) []byte {
+ b := byte(entryType)<<4 | byte(size&0x0f)
+ size >>= 4
+
+ for size != 0 {
+ dst = append(dst, b|0x80)
+ b = byte(size & 0x7f)
+ size >>= 7
+ }
+
+ return append(dst, b)
+}
diff --git a/internal/format/packfile/entry_header_test.go b/internal/format/packfile/entry_header_test.go
new file mode 100644
index 00000000..79dc2740
--- /dev/null
+++ b/internal/format/packfile/entry_header_test.go
@@ -0,0 +1,251 @@
+package packfile_test
+
+import (
+ "bytes"
+ "errors"
+ "math"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile"
+ "lindenii.org/go/furgit/object/id"
+)
+
+func TestParseEntryHeaderBase(t *testing.T) {
+ t.Parallel()
+
+ // Commit entry of size 85:
+ // 0x95 has the continuation bit set, type 1, and size low bits 0x5;
+ // 0x05 contributes 5<<4.
+ header, err := packfile.ParseEntryHeader([]byte{0x95, 0x05}, 20)
+ if err != nil {
+ t.Fatalf("ParseEntryHeader: %v", err)
+ }
+
+ if header.Type != packfile.EntryTypeCommit {
+ t.Fatalf("ParseEntryHeader type = %d, want %d", header.Type, packfile.EntryTypeCommit)
+ }
+
+ if header.Size != 85 {
+ t.Fatalf("ParseEntryHeader size = %d, want 85", header.Size)
+ }
+
+ if header.HeaderLen != 2 {
+ t.Fatalf("ParseEntryHeader header len = %d, want 2", header.HeaderLen)
+ }
+}
+
+func TestParseEntryHeaderRefDelta(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ hashSize := objectFormat.Size()
+
+ base := bytes.Repeat([]byte{0xab}, hashSize)
+ data := append([]byte{0x73}, base...)
+
+ header, err := packfile.ParseEntryHeader(data, hashSize)
+ if err != nil {
+ t.Fatalf("ParseEntryHeader: %v", err)
+ }
+
+ if header.Type != packfile.EntryTypeRefDelta {
+ t.Fatalf("ParseEntryHeader type = %d, want %d", header.Type, packfile.EntryTypeRefDelta)
+ }
+
+ if header.Size != 3 {
+ t.Fatalf("ParseEntryHeader size = %d, want 3", header.Size)
+ }
+
+ if header.HeaderLen != 1+hashSize {
+ t.Fatalf("ParseEntryHeader header len = %d, want %d", header.HeaderLen, 1+hashSize)
+ }
+
+ if !bytes.Equal(header.RefBase[:hashSize], base) {
+ t.Fatalf("ParseEntryHeader ref base mismatch")
+ }
+ })
+ }
+}
+
+func TestParseEntryHeaderOfsDelta(t *testing.T) {
+ t.Parallel()
+
+ header, err := packfile.ParseEntryHeader([]byte{0x65, 0x80, 0x00}, 20)
+ if err != nil {
+ t.Fatalf("ParseEntryHeader: %v", err)
+ }
+
+ if header.Type != packfile.EntryTypeOfsDelta {
+ t.Fatalf("ParseEntryHeader type = %d, want %d", header.Type, packfile.EntryTypeOfsDelta)
+ }
+
+ if header.OfsDistance != 128 {
+ t.Fatalf("ParseEntryHeader distance = %d, want 128", header.OfsDistance)
+ }
+
+ if header.HeaderLen != 3 {
+ t.Fatalf("ParseEntryHeader header len = %d, want 3", header.HeaderLen)
+ }
+}
+
+func TestParseEntryHeaderMalformed(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ data []byte
+ }{
+ {name: "empty", data: []byte{}},
+ {name: "truncated type/size", data: []byte{0x95}},
+ {
+ name: "size overflow",
+ data: append([]byte{0x9f}, bytes.Repeat([]byte{0xff}, 9)...),
+ },
+ {
+ name: "overlong type/size",
+ data: append([]byte{0x95}, append(bytes.Repeat([]byte{0x80}, 9), 0x00)...),
+ },
+ {name: "truncated ref-delta base", data: []byte{0x73, 0xab, 0xab}},
+ {name: "truncated ofs distance", data: []byte{0x65, 0x80}},
+ {name: "type invalid", data: []byte{0x05}},
+ {name: "type future", data: []byte{0x55}},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ _, err := packfile.ParseEntryHeader(tc.data, 20)
+ if !errors.Is(err, packfile.ErrMalformedEntryHeader) {
+ t.Fatalf("ParseEntryHeader error = %v, want ErrMalformedEntryHeader", err)
+ }
+ })
+ }
+}
+
+func TestParseEntryHeaderBadHashSize(t *testing.T) {
+ t.Parallel()
+
+ for _, hashSize := range []int{-1, 0, id.MaxObjectIDSize + 1} {
+ func() {
+ defer func() {
+ if recover() == nil {
+ t.Fatalf("ParseEntryHeader hash size %d: expected panic", hashSize)
+ }
+ }()
+
+ _, _ = packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize)
+ }()
+ }
+}
+
+func TestTypeSizeRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ baseTypes := []packfile.EntryType{
+ packfile.EntryTypeCommit,
+ packfile.EntryTypeTree,
+ packfile.EntryTypeBlob,
+ packfile.EntryTypeTag,
+ }
+
+ sizes := []uint64{
+ 0, 1, 15, 16, 127, 128, 1 << 20, 1 << 57, math.MaxUint64,
+ }
+
+ for _, entryType := range baseTypes {
+ for _, size := range sizes {
+ data := packfile.AppendTypeSize(nil, entryType, size)
+ if len(data) > packfile.MaxTypeSizeLen {
+ t.Fatalf("AppendTypeSize(%d, %d) length = %d, want <= %d",
+ entryType, size, len(data), packfile.MaxTypeSizeLen)
+ }
+
+ header, err := packfile.ParseEntryHeader(data, 20)
+ if err != nil {
+ t.Fatalf("ParseEntryHeader: %v", err)
+ }
+
+ if header.Type != entryType {
+ t.Fatalf("round trip type = %d, want %d", header.Type, entryType)
+ }
+
+ if header.Size != size {
+ t.Fatalf("round trip size = %d, want %d", header.Size, size)
+ }
+
+ if header.HeaderLen != len(data) {
+ t.Fatalf("round trip header len = %d, want %d", header.HeaderLen, len(data))
+ }
+ }
+ }
+}
+
+func TestRefDeltaHeaderRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ hashSize := objectFormat.Size()
+
+ base := bytes.Repeat([]byte{0xcd}, hashSize)
+ data := packfile.AppendTypeSize(nil, packfile.EntryTypeRefDelta, 42)
+ data = append(data, base...)
+
+ header, err := packfile.ParseEntryHeader(data, hashSize)
+ if err != nil {
+ t.Fatalf("ParseEntryHeader: %v", err)
+ }
+
+ if !bytes.Equal(header.RefBase[:hashSize], base) {
+ t.Fatalf("round trip ref base mismatch")
+ }
+
+ if header.HeaderLen != len(data) {
+ t.Fatalf("round trip header len = %d, want %d", header.HeaderLen, len(data))
+ }
+ })
+ }
+}
+
+func TestOfsDeltaHeaderRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ data := packfile.AppendTypeSize(nil, packfile.EntryTypeOfsDelta, 7)
+ data = packfile.AppendOfsDeltaDistance(data, 123456)
+
+ header, err := packfile.ParseEntryHeader(data, 20)
+ if err != nil {
+ t.Fatalf("ParseEntryHeader: %v", err)
+ }
+
+ if header.OfsDistance != 123456 {
+ t.Fatalf("round trip distance = %d, want 123456", header.OfsDistance)
+ }
+
+ if header.HeaderLen != len(data) {
+ t.Fatalf("round trip header len = %d, want %d", header.HeaderLen, len(data))
+ }
+}
+
+func TestMaxEntryHeaderLenBounds(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ hashSize := objectFormat.Size()
+
+ maxLen := packfile.MaxEntryHeaderLen(hashSize)
+ if maxLen < packfile.MaxTypeSizeLen+hashSize {
+ t.Fatalf("MaxEntryHeaderLen(%d) = %d cannot fit a ref-delta header", hashSize, maxLen)
+ }
+
+ if maxLen < packfile.MaxTypeSizeLen+packfile.MaxOfsDeltaDistanceLen {
+ t.Fatalf("MaxEntryHeaderLen(%d) = %d cannot fit an ofs-delta header", hashSize, maxLen)
+ }
+ }
+}
diff --git a/internal/format/packfile/entry_type.go b/internal/format/packfile/entry_type.go
new file mode 100644
index 00000000..b4baceb4
--- /dev/null
+++ b/internal/format/packfile/entry_type.go
@@ -0,0 +1,86 @@
+package packfile
+
+import (
+ "errors"
+
+ "lindenii.org/go/furgit/object/typ"
+)
+
+var (
+ // ErrInternalEntryType reports that
+ // a supplied packfile [EntryType]
+ // cannot be converted into an ordinary [typ.Type]
+ // since it is a packfile implementation detail.
+ ErrInternalEntryType = errors.New("internal/format/packfile: packfile-internal entry type cannot be converted to an object type")
+
+ // ErrUnrepresentableObjectType reports that
+ // a supplied ordinary [typ.Type]
+ // is not currently representable
+ // as a packfile [EntryType].
+ ErrUnrepresentableObjectType = errors.New("internal/format/packfile: object type not representable in packfiles")
+)
+
+// EntryType represents the type of an entry in a git packfile.
+type EntryType uint8
+
+const (
+ EntryTypeInvalid EntryType = 0
+ EntryTypeCommit EntryType = 1
+ EntryTypeTree EntryType = 2
+ EntryTypeBlob EntryType = 3
+ EntryTypeTag EntryType = 4
+ EntryTypeFuture EntryType = 5
+ EntryTypeOfsDelta EntryType = 6
+ EntryTypeRefDelta EntryType = 7
+)
+
+// EntryTypeFromObjectType converts an ordinary [typ.Type] into a packfile [EntryType].
+func EntryTypeFromObjectType(ty typ.Type) (EntryType, error) {
+ switch ty {
+ case typ.Commit:
+ return EntryTypeCommit, nil
+ case typ.Tree:
+ return EntryTypeTree, nil
+ case typ.Blob:
+ return EntryTypeBlob, nil
+ case typ.Tag:
+ return EntryTypeTag, nil
+ case typ.Unknown:
+ }
+
+ return EntryTypeInvalid, ErrUnrepresentableObjectType
+}
+
+// ObjectType converts a packfile [EntryType] into an ordinary [typ.Type].
+func (entryType EntryType) ObjectType() (typ.Type, error) {
+ switch entryType {
+ case EntryTypeCommit:
+ return typ.Commit, nil
+ case EntryTypeTree:
+ return typ.Tree, nil
+ case EntryTypeBlob:
+ return typ.Blob, nil
+ case EntryTypeTag:
+ return typ.Tag, nil
+ case EntryTypeInvalid, EntryTypeFuture, EntryTypeOfsDelta, EntryTypeRefDelta:
+ }
+
+ return typ.Unknown, ErrInternalEntryType
+}
+
+// IsBase reports whether the entry type
+// is a base (non-delta) object type.
+func (entryType EntryType) IsBase() bool {
+ switch entryType {
+ case EntryTypeCommit, EntryTypeTree, EntryTypeBlob, EntryTypeTag:
+ return true
+ case EntryTypeInvalid, EntryTypeFuture, EntryTypeOfsDelta, EntryTypeRefDelta:
+ }
+
+ return false
+}
+
+// IsDelta reports whether the entry type is a delta type.
+func (entryType EntryType) IsDelta() bool {
+ return entryType == EntryTypeOfsDelta || entryType == EntryTypeRefDelta
+}
diff --git a/internal/format/packfile/header.go b/internal/format/packfile/header.go
new file mode 100644
index 00000000..64c7d140
--- /dev/null
+++ b/internal/format/packfile/header.go
@@ -0,0 +1,63 @@
+package packfile
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+)
+
+const (
+ // signature is the 4-byte "PACK" magic at the start of pack files.
+ signature = 0x5041434b
+
+ // version is the only pack version this package reads or writes.
+ version = 2
+)
+
+// HeaderLen is the size of a pack header:
+// signature, version, and object count.
+const HeaderLen = 12
+
+// ErrMalformedHeader reports that
+// a pack header is truncated,
+// or has a bad signature or unsupported version.
+var ErrMalformedHeader = errors.New("internal/format/packfile: malformed pack header")
+
+// Header is one parsed pack header.
+type Header struct {
+ // ObjectCount is the number of object entries
+ // declared to follow the header.
+ ObjectCount uint32
+}
+
+// ParseHeader parses and validates the pack header
+// at the start of data.
+func ParseHeader(data []byte) (Header, error) {
+ var zero Header
+
+ if len(data) < HeaderLen {
+ return zero, fmt.Errorf("%w: truncated", ErrMalformedHeader)
+ }
+
+ if binary.BigEndian.Uint32(data[0:4]) != signature {
+ return zero, fmt.Errorf("%w: bad signature", ErrMalformedHeader)
+ }
+
+ parsedVersion := binary.BigEndian.Uint32(data[4:8])
+ if parsedVersion != version {
+ return zero, fmt.Errorf("%w: unsupported version %d", ErrMalformedHeader, parsedVersion)
+ }
+
+ return Header{
+ ObjectCount: binary.BigEndian.Uint32(data[8:12]),
+ }, nil
+}
+
+// AppendHeader appends an encoded pack header for objectCount to dst.
+func AppendHeader(dst []byte, objectCount uint32) []byte {
+ dst = binary.BigEndian.AppendUint32(dst, signature)
+ dst = binary.BigEndian.AppendUint32(dst, version)
+ dst = binary.BigEndian.AppendUint32(dst, objectCount)
+
+ return dst
+}
diff --git a/internal/format/packfile/ofs.go b/internal/format/packfile/ofs.go
new file mode 100644
index 00000000..5fb8ecdd
--- /dev/null
+++ b/internal/format/packfile/ofs.go
@@ -0,0 +1,68 @@
+package packfile
+
+import (
+ "errors"
+ "fmt"
+ "math"
+)
+
+// ErrMalformedOfsDeltaDistance reports that
+// an ofs-delta backward distance encoding
+// is truncated, overlong, or overflows uint64.
+var ErrMalformedOfsDeltaDistance = errors.New("internal/format/packfile: malformed ofs-delta distance")
+
+// MaxOfsDeltaDistanceLen is the maximum encoded length
+// of an ofs-delta backward distance.
+// Every uint64 distance is encodable within this bound,
+// and [ParseOfsDeltaDistance] rejects longer encodings.
+const MaxOfsDeltaDistanceLen = 10
+
+// ParseOfsDeltaDistance parses an ofs-delta backward distance.
+func ParseOfsDeltaDistance(buf []byte) (dist uint64, consumed int, err error) {
+ if len(buf) == 0 {
+ return 0, 0, fmt.Errorf("%w: truncated", ErrMalformedOfsDeltaDistance)
+ }
+
+ b := buf[0]
+ dist = uint64(b & 0x7f)
+
+ consumed = 1
+ for b&0x80 != 0 {
+ if consumed >= MaxOfsDeltaDistanceLen {
+ return 0, 0, fmt.Errorf("%w: overlong encoding", ErrMalformedOfsDeltaDistance)
+ }
+
+ if consumed >= len(buf) {
+ return 0, 0, fmt.Errorf("%w: truncated", ErrMalformedOfsDeltaDistance)
+ }
+
+ if dist >= math.MaxUint64>>7 {
+ return 0, 0, fmt.Errorf("%w: overflows uint64", ErrMalformedOfsDeltaDistance)
+ }
+
+ b = buf[consumed]
+ consumed++
+ dist = ((dist + 1) << 7) | uint64(b&0x7f)
+ }
+
+ return dist, consumed, nil
+}
+
+// AppendOfsDeltaDistance appends the encoding of
+// an ofs-delta backward distance to dst.
+func AppendOfsDeltaDistance(dst []byte, dist uint64) []byte {
+ var buf [MaxOfsDeltaDistanceLen]byte
+
+ pos := len(buf) - 1
+ buf[pos] = byte(dist & 0x7f)
+
+ dist >>= 7
+ for dist != 0 {
+ dist--
+ pos--
+ buf[pos] = 0x80 | byte(dist&0x7f)
+ dist >>= 7
+ }
+
+ return append(dst, buf[pos:]...)
+}
diff --git a/internal/format/packfile/ofs_test.go b/internal/format/packfile/ofs_test.go
new file mode 100644
index 00000000..736f1ac9
--- /dev/null
+++ b/internal/format/packfile/ofs_test.go
@@ -0,0 +1,100 @@
+package packfile_test
+
+import (
+ "bytes"
+ "errors"
+ "math"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile"
+)
+
+func TestParseOfsDeltaDistance(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ data []byte
+ dist uint64
+ consumed int
+ }{
+ {name: "zero", data: []byte{0x00}, dist: 0, consumed: 1},
+ {name: "one byte max", data: []byte{0x7f}, dist: 127, consumed: 1},
+ {name: "two byte min", data: []byte{0x80, 0x00}, dist: 128, consumed: 2},
+ {name: "two byte max", data: []byte{0xff, 0x7f}, dist: 16511, consumed: 2},
+ {name: "trailing bytes ignored", data: []byte{0x7f, 0xde, 0xad}, dist: 127, consumed: 1},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ dist, consumed, err := packfile.ParseOfsDeltaDistance(tc.data)
+ if err != nil {
+ t.Fatalf("ParseOfsDeltaDistance: %v", err)
+ }
+
+ if dist != tc.dist {
+ t.Fatalf("ParseOfsDeltaDistance = %d, want %d", dist, tc.dist)
+ }
+
+ if consumed != tc.consumed {
+ t.Fatalf("ParseOfsDeltaDistance consumed = %d, want %d", consumed, tc.consumed)
+ }
+ })
+ }
+}
+
+func TestParseOfsDeltaDistanceMalformed(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ data []byte
+ }{
+ {name: "empty", data: []byte{}},
+ {name: "truncated", data: []byte{0x80}},
+ {name: "overflow", data: bytes.Repeat([]byte{0xff}, 10)},
+ {name: "overlong", data: append(bytes.Repeat([]byte{0x80}, 10), 0x00)},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ _, _, err := packfile.ParseOfsDeltaDistance(tc.data)
+ if !errors.Is(err, packfile.ErrMalformedOfsDeltaDistance) {
+ t.Fatalf("ParseOfsDeltaDistance error = %v, want ErrMalformedOfsDeltaDistance", err)
+ }
+ })
+ }
+}
+
+func TestOfsDeltaDistanceRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ distances := []uint64{
+ 0, 1, 127, 128, 16511, 16512, 1 << 31, 1 << 57, math.MaxUint64,
+ }
+
+ for _, dist := range distances {
+ data := packfile.AppendOfsDeltaDistance(nil, dist)
+ if len(data) > packfile.MaxOfsDeltaDistanceLen {
+ t.Fatalf("AppendOfsDeltaDistance(%d) length = %d, want <= %d",
+ dist, len(data), packfile.MaxOfsDeltaDistanceLen)
+ }
+
+ got, consumed, err := packfile.ParseOfsDeltaDistance(data)
+ if err != nil {
+ t.Fatalf("ParseOfsDeltaDistance: %v", err)
+ }
+
+ if got != dist {
+ t.Fatalf("round trip = %d, want %d", got, dist)
+ }
+
+ if consumed != len(data) {
+ t.Fatalf("round trip consumed = %d, want %d", consumed, len(data))
+ }
+ }
+}