diff options
Diffstat (limited to 'internal')
26 files changed, 1568 insertions, 82 deletions
diff --git a/internal/cache/clock/bench_test.go b/internal/cache/clock/bench_test.go index 917aac3b..48bdff99 100644 --- a/internal/cache/clock/bench_test.go +++ b/internal/cache/clock/bench_test.go @@ -28,9 +28,9 @@ func BenchmarkReadHeavy(b *testing.B) { // ≈95% Get hits, ≈5% Add, and fits budget. const n = 100_000 - cache := New(n, benchWeight) + clock := New(n, benchWeight) for k := range n { - cache.Add(k, k) + clock.Add(k, k) } b.ResetTimer() @@ -41,9 +41,9 @@ func BenchmarkReadHeavy(b *testing.B) { key := i % n if i%20 == 0 { - cache.Add(key, key) + clock.Add(key, key) } else { - _, _ = cache.Get(key) + _, _ = clock.Get(key) } } }) @@ -56,9 +56,9 @@ func BenchmarkHotKey(b *testing.B) { maxWeight = 4096 ) - cache := New(maxWeight, benchWeight) + clock := New(maxWeight, benchWeight) for k := range hot { - cache.Add(k, k) + clock.Add(k, k) } b.ResetTimer() @@ -66,7 +66,7 @@ func BenchmarkHotKey(b *testing.B) { i := workerOffset() for pb.Next() { i++ - _, _ = cache.Get(i % hot) + _, _ = clock.Get(i % hot) } }) } @@ -75,9 +75,9 @@ func BenchmarkMixed(b *testing.B) { // Even split of Get and Add over a working set that fits. const n = 100_000 - cache := New(n, benchWeight) + clock := New(n, benchWeight) for k := range n { - cache.Add(k, k) + clock.Add(k, k) } b.ResetTimer() @@ -88,9 +88,9 @@ func BenchmarkMixed(b *testing.B) { key := i % n if i%2 == 0 { - cache.Add(key, key) + clock.Add(key, key) } else { - _, _ = cache.Get(key) + _, _ = clock.Get(key) } } }) @@ -98,11 +98,11 @@ func BenchmarkMixed(b *testing.B) { func BenchmarkChurn(b *testing.B) { // Every op inserts a fresh key into a small budget. - // I don't think this is likely to happen for git delta caches, + // I don't think this is likely to happen for git delta clocks, // but this may matter if we use this for other workloads later. const maxWeight = 4096 - cache := New(maxWeight, benchWeight) + clock := New(maxWeight, benchWeight) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { @@ -111,7 +111,7 @@ func BenchmarkChurn(b *testing.B) { i := 0 for pb.Next() { i++ - cache.Add(base+i, i) + clock.Add(base+i, i) } }) } diff --git a/internal/cache/clock/cache.go b/internal/cache/clock/clock.go index 31d97082..27d83c7b 100644 --- a/internal/cache/clock/cache.go +++ b/internal/cache/clock/clock.go @@ -17,14 +17,14 @@ const maxShards = 16 // WeightFunc reports one entry's weight, used for eviction budgeting. type WeightFunc[K comparable, V any] func(key K, value V) uint64 -// Cache is a concurrent, weight-bounded cache +// Clock is a concurrent, weight-bounded cache // with CLOCK eviction. // // Reads are lock-free; // writes lock only the shard that owns the key. // // Labels: MT-Safe. -type Cache[K comparable, V any] struct { +type Clock[K comparable, V any] struct { shards []*shard[K, V] seed maphash.Seed mask uint64 @@ -35,7 +35,7 @@ type Cache[K comparable, V any] struct { // weighing entries with weightFn. // // New panics if weightFn is nil. -func New[K comparable, V any](maxWeight uint64, weightFn WeightFunc[K, V]) *Cache[K, V] { +func New[K comparable, V any](maxWeight uint64, weightFn WeightFunc[K, V]) *Clock[K, V] { if weightFn == nil { panic("internal/clock: nil weight function") } @@ -48,7 +48,7 @@ func New[K comparable, V any](maxWeight uint64, weightFn WeightFunc[K, V]) *Cach shards[i] = newShard[K, V](perShard) } - return &Cache[K, V]{ + return &Clock[K, V]{ shards: shards, seed: maphash.MakeSeed(), mask: mask, @@ -81,6 +81,6 @@ func shardLayout(maxWeight uint64) (int, uint64) { } // shardFor returns the shard that owns key. -func (cache *Cache[K, V]) shardFor(key K) *shard[K, V] { - return cache.shards[maphash.Comparable(cache.seed, key)&cache.mask] +func (clock *Clock[K, V]) shardFor(key K) *shard[K, V] { + return clock.shards[maphash.Comparable(clock.seed, key)&clock.mask] } diff --git a/internal/cache/clock/cache_ops.go b/internal/cache/clock/clock_ops.go index 18958202..a21f44c3 100644 --- a/internal/cache/clock/cache_ops.go +++ b/internal/cache/clock/clock_ops.go @@ -5,28 +5,28 @@ package clock // It reports whether the entry was admitted; // an entry heavier than the per-shard budget is rejected // and leaves the cache unchanged. -func (cache *Cache[K, V]) Add(key K, value V) bool { - return cache.shardFor(key).add(key, value, cache.weightFn(key, value)) +func (clock *Clock[K, V]) Add(key K, value V) bool { + return clock.shardFor(key).add(key, value, clock.weightFn(key, value)) } // Get returns the value for key and marks it recently used. // //nolint:ireturn -func (cache *Cache[K, V]) Get(key K) (V, bool) { - return cache.shardFor(key).get(key) +func (clock *Clock[K, V]) Get(key K) (V, bool) { + return clock.shardFor(key).get(key) } // Peek returns the value for key without changing its recency. // //nolint:ireturn -func (cache *Cache[K, V]) Peek(key K) (V, bool) { - return cache.shardFor(key).peek(key) +func (clock *Clock[K, V]) Peek(key K) (V, bool) { + return clock.shardFor(key).peek(key) } // Len returns the number of cached entries. -func (cache *Cache[K, V]) Len() int { +func (clock *Clock[K, V]) Len() int { total := 0 - for _, shard := range cache.shards { + for _, shard := range clock.shards { total += shard.len() } @@ -34,9 +34,9 @@ func (cache *Cache[K, V]) Len() int { } // Weight returns the current total weight across all shards. -func (cache *Cache[K, V]) Weight() uint64 { +func (clock *Clock[K, V]) Weight() uint64 { var total uint64 - for _, shard := range cache.shards { + for _, shard := range clock.shards { total += shard.loadWeight() } @@ -44,8 +44,8 @@ func (cache *Cache[K, V]) Weight() uint64 { } // Clear removes all entries. -func (cache *Cache[K, V]) Clear() { - for _, shard := range cache.shards { +func (clock *Clock[K, V]) Clear() { + for _, shard := range clock.shards { shard.clear() } } diff --git a/internal/cache/clock/cache_test.go b/internal/cache/clock/clock_test.go index 3d734b39..36622818 100644 --- a/internal/cache/clock/cache_test.go +++ b/internal/cache/clock/clock_test.go @@ -21,21 +21,21 @@ func byteWeight(_ string, value string) uint64 { func TestCacheAddGetPeek(t *testing.T) { t.Parallel() - cache := clock.New(1<<20, byteWeight) + clock := clock.New(1<<20, byteWeight) - if !cache.Add("a", "alpha") { + if !clock.Add("a", "alpha") { t.Fatalf("Add(a) should succeed") } - if got, ok := cache.Get("a"); !ok || got != "alpha" { + if got, ok := clock.Get("a"); !ok || got != "alpha" { t.Fatalf("Get(a) = (%q, %v), want (alpha, true)", got, ok) } - if got, ok := cache.Peek("a"); !ok || got != "alpha" { + if got, ok := clock.Peek("a"); !ok || got != "alpha" { t.Fatalf("Peek(a) = (%q, %v), want (alpha, true)", got, ok) } - if _, ok := cache.Get("missing"); ok { + if _, ok := clock.Get("missing"); ok { t.Fatalf("Get(missing) should miss") } } @@ -45,14 +45,14 @@ func TestCacheWeightStaysBounded(t *testing.T) { const maxWeight = 4096 - cache := clock.New(maxWeight, byteWeight) + clock := clock.New(maxWeight, byteWeight) value := strings.Repeat("x", 64) for i := range 1000 { - cache.Add(fmt.Sprintf("key-%d", i), value) + clock.Add(fmt.Sprintf("key-%d", i), value) } - if got := cache.Weight(); got > maxWeight { + if got := clock.Weight(); got > maxWeight { t.Fatalf("weight = %d, exceeds max %d", got, maxWeight) } } @@ -60,23 +60,23 @@ func TestCacheWeightStaysBounded(t *testing.T) { func TestCacheLenAndClear(t *testing.T) { t.Parallel() - cache := clock.New(1<<20, byteWeight) + clock := clock.New(1<<20, byteWeight) for i := range 10 { - cache.Add(fmt.Sprintf("key-%d", i), "v") + clock.Add(fmt.Sprintf("key-%d", i), "v") } - if got := cache.Len(); got != 10 { + if got := clock.Len(); got != 10 { t.Fatalf("Len = %d, want 10", got) } - cache.Clear() + clock.Clear() - if got := cache.Len(); got != 0 { + if got := clock.Len(); got != 0 { t.Fatalf("Len after Clear = %d, want 0", got) } - if got := cache.Weight(); got != 0 { + if got := clock.Weight(); got != 0 { t.Fatalf("Weight after Clear = %d, want 0", got) } } @@ -84,17 +84,17 @@ func TestCacheLenAndClear(t *testing.T) { func TestCacheRejectsOversized(t *testing.T) { t.Parallel() - cache := clock.New(4, byteWeight) + clock := clock.New(4, byteWeight) - if cache.Add("a", "xxxxx") { + if clock.Add("a", "xxxxx") { t.Fatalf("oversized Add should report false") } - if _, ok := cache.Get("a"); ok { - t.Fatalf("oversized entry must not be cached") + if _, ok := clock.Get("a"); ok { + t.Fatalf("oversized entry must not be clockd") } - if got := cache.Weight(); got != 0 { + if got := clock.Weight(); got != 0 { t.Fatalf("weight = %d, want 0", got) } } diff --git a/internal/cache/clock/concurrent_test.go b/internal/cache/clock/concurrent_test.go index 86283a9b..4b7da162 100644 --- a/internal/cache/clock/concurrent_test.go +++ b/internal/cache/clock/concurrent_test.go @@ -19,7 +19,7 @@ func TestConcurrentStress(t *testing.T) { rounds = 5000 ) - cache := New(maxWeight, func(_ int, _ int) uint64 { return 1 }) + clock := New(maxWeight, func(_ int, _ int) uint64 { return 1 }) var wg sync.WaitGroup @@ -30,13 +30,13 @@ func TestConcurrentStress(t *testing.T) { switch i % 4 { case 0, 1: - cache.Add(key, keyValue(key)) + clock.Add(key, keyValue(key)) case 2: - if got, ok := cache.Get(key); ok && got != keyValue(key) { + if got, ok := clock.Get(key); ok && got != keyValue(key) { t.Errorf("Get(%d) = %d, want %d", key, got, keyValue(key)) } case 3: - if got, ok := cache.Peek(key); ok && got != keyValue(key) { + if got, ok := clock.Peek(key); ok && got != keyValue(key) { t.Errorf("Peek(%d) = %d, want %d", key, got, keyValue(key)) } } @@ -46,9 +46,9 @@ func TestConcurrentStress(t *testing.T) { wg.Wait() - checkCache(t, cache) + checkCache(t, clock) - if got := cache.Weight(); got > maxWeight { + if got := clock.Weight(); got > maxWeight { t.Fatalf("weight %d exceeds max %d", got, maxWeight) } } @@ -64,7 +64,7 @@ func TestReadDuringEviction(t *testing.T) { rounds = 20000 ) - cache := New(maxWeight, func(_ int, _ int) uint64 { return 1 }) + clock := New(maxWeight, func(_ int, _ int) uint64 { return 1 }) var wg sync.WaitGroup @@ -72,7 +72,7 @@ func TestReadDuringEviction(t *testing.T) { wg.Go(func() { for i := range rounds { key := i % hot - cache.Add(key, keyValue(key)) + clock.Add(key, keyValue(key)) } }) } @@ -82,11 +82,11 @@ func TestReadDuringEviction(t *testing.T) { for i := range rounds { key := i % hot - if got, ok := cache.Get(key); ok && got != keyValue(key) { + if got, ok := clock.Get(key); ok && got != keyValue(key) { t.Errorf("Get(%d) = %d, want %d", key, got, keyValue(key)) } - if got, ok := cache.Peek(key); ok && got != keyValue(key) { + if got, ok := clock.Peek(key); ok && got != keyValue(key) { t.Errorf("Peek(%d) = %d, want %d", key, got, keyValue(key)) } } @@ -95,9 +95,9 @@ func TestReadDuringEviction(t *testing.T) { wg.Wait() - checkCache(t, cache) + checkCache(t, clock) - if got := cache.Weight(); got > maxWeight { + if got := clock.Weight(); got > maxWeight { t.Fatalf("weight %d exceeds max %d", got, maxWeight) } } diff --git a/internal/cache/clock/invariant_test.go b/internal/cache/clock/invariant_test.go index 2efd7ff9..04896ca1 100644 --- a/internal/cache/clock/invariant_test.go +++ b/internal/cache/clock/invariant_test.go @@ -79,10 +79,10 @@ func checkShard[K comparable, V any](t *testing.T, shard *shard[K, V]) { } // checkCache verifies every shard's invariants. -func checkCache[K comparable, V any](t *testing.T, cache *Cache[K, V]) { +func checkCache[K comparable, V any](t *testing.T, clock *Clock[K, V]) { t.Helper() - for _, shard := range cache.shards { + for _, shard := range clock.shards { checkShard(t, shard) } } diff --git a/internal/compress/zlib/writer.go b/internal/compress/zlib/writer.go index af2d8bf3..98053e71 100644 --- a/internal/compress/zlib/writer.go +++ b/internal/compress/zlib/writer.go @@ -149,7 +149,7 @@ func (z *Writer) Write(p []byte) (n int, err error) { return 0, z.err } - return n, err //nolint:wrapcheck + return n, err } // Flush flushes the Writer to its underlying [io.Writer]. diff --git a/internal/compress/zlib/writer_header.go b/internal/compress/zlib/writer_header.go index bd0fe4c3..e23e73d6 100644 --- a/internal/compress/zlib/writer_header.go +++ b/internal/compress/zlib/writer_header.go @@ -43,7 +43,7 @@ func (z *Writer) writeHeader() (err error) { _, err = z.w.Write(z.scratch[0:2]) if err != nil { - return err //nolint:wrapcheck + return err } if z.dict != nil { @@ -52,7 +52,7 @@ func (z *Writer) writeHeader() (err error) { _, err = z.w.Write(z.scratch[0:4]) if err != nil { - return err //nolint:wrapcheck + return err } } diff --git a/internal/format/packfile/delta/apply.go b/internal/format/packfile/delta/apply.go new file mode 100644 index 00000000..d0918839 --- /dev/null +++ b/internal/format/packfile/delta/apply.go @@ -0,0 +1,117 @@ +package delta + +import ( + "fmt" + + "lindenii.org/go/lgo/intconv" +) + +// Apply applies one inflated delta payload to base +// and returns the reconstructed result. +// +// delta must include the leading base/result size header. +func Apply(base, delta []byte) ([]byte, error) { + baseSize, resultSize, pos, err := ParseHeaderSizes(delta) + if err != nil { + return nil, err + } + + if baseSize != uint64(len(base)) { + return nil, fmt.Errorf("%w: base size mismatch", ErrMalformedDelta) + } + + outLen, err := intconv.Uint64ToInt(resultSize) + if err != nil { + return nil, fmt.Errorf("%w: result size overflows int", ErrMalformedDelta) + } + + out := make([]byte, outLen) + outPos := 0 + + for pos < len(delta) { + op := delta[pos] + pos++ + + switch { + case op&0x80 != 0: + outPos, err = applyCopy(out, outPos, base, delta, &pos, op) + case op != 0: + outPos, err = applyInsert(out, outPos, delta, &pos, int(op)) + default: + err = fmt.Errorf("%w: invalid opcode 0", ErrMalformedDelta) + } + + if err != nil { + return nil, err + } + } + + if outPos != len(out) { + return nil, fmt.Errorf("%w: result size mismatch", ErrMalformedDelta) + } + + return out, nil +} + +// applyCopy executes one copy instruction, +// copying a base range into out, +// and returns the new output position. +func applyCopy(out []byte, outPos int, base, delta []byte, pos *int, op byte) (int, error) { + off, err := parseCopyOperand(delta, pos, op, 0, 4) + if err != nil { + return 0, err + } + + n, err := parseCopyOperand(delta, pos, op, 4, 3) + if err != nil { + return 0, err + } + + if n == 0 { + n = 0x10000 + } + + if off+n > len(base) || outPos+n > len(out) { + return 0, fmt.Errorf("%w: copy out of bounds", ErrMalformedDelta) + } + + copy(out[outPos:outPos+n], base[off:off+n]) + + return outPos + n, nil +} + +// applyInsert executes one insert instruction, +// copying n literal delta bytes into out, +// and returns the new output position. +func applyInsert(out []byte, outPos int, delta []byte, pos *int, n int) (int, error) { + if *pos+n > len(delta) || outPos+n > len(out) { + return 0, fmt.Errorf("%w: insert out of bounds", ErrMalformedDelta) + } + + copy(out[outPos:outPos+n], delta[*pos:*pos+n]) + *pos += n + + return outPos + n, nil +} + +// parseCopyOperand assembles one little-endian copy instruction operand +// from the operand bytes selected by op's flag bits +// firstBit through firstBit+count-1. +func parseCopyOperand(delta []byte, pos *int, op byte, firstBit uint, count int) (int, error) { + value := 0 + + for i := range count { + if op&(1<<(firstBit+uint(i))) == 0 { + continue + } + + if *pos >= len(delta) { + return 0, fmt.Errorf("%w: truncated copy operand", ErrMalformedDelta) + } + + value |= int(delta[*pos]) << (8 * i) + *pos++ + } + + return value, nil +} diff --git a/internal/format/packfile/delta/apply_test.go b/internal/format/packfile/delta/apply_test.go new file mode 100644 index 00000000..bdf454d9 --- /dev/null +++ b/internal/format/packfile/delta/apply_test.go @@ -0,0 +1,174 @@ +package delta_test + +import ( + "bytes" + "errors" + "testing" + + "lindenii.org/go/furgit/internal/format/packfile/delta" +) + +func TestApplyInsert(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, 0, 5) + data = append(data, 5, 'h', 'e', 'l', 'l', 'o') + + out, err := delta.Apply(nil, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if string(out) != "hello" { + t.Fatalf("Apply = %q, want %q", out, "hello") + } +} + +func TestApplyCopy(t *testing.T) { + t.Parallel() + + base := []byte("0123456789") + + // One offset byte and one size byte. + data := delta.AppendHeaderSizes(nil, 10, 4) + data = append(data, 0x91, 3, 4) + + out, err := delta.Apply(base, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if string(out) != "3456" { + t.Fatalf("Apply = %q, want %q", out, "3456") + } +} + +func TestApplyCopyImplicitSize(t *testing.T) { + t.Parallel() + + base := bytes.Repeat([]byte{0xab}, 0x10000+10) + + // Offset 0 and the implicit copy size 0x10000. + data := delta.AppendHeaderSizes(nil, uint64(len(base)), 0x10000) + data = append(data, 0x80) + + out, err := delta.Apply(base, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if !bytes.Equal(out, base[:0x10000]) { + t.Fatalf("Apply implicit-size copy mismatch") + } +} + +func TestApplyMixed(t *testing.T) { + t.Parallel() + + base := []byte("the quick brown fox") + + data := delta.AppendHeaderSizes(nil, uint64(len(base)), 9) + data = append(data, 0x91, 4, 5) // copy "quick" + data = append(data, 3, '!', '?', '!') // insert "!?!" + data = append(data, 0x91, 16, 1) // copy "f" + + out, err := delta.Apply(base, data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if string(out) != "quick!?!f" { + t.Fatalf("Apply = %q, want %q", out, "quick!?!f") + } +} + +func TestApplyEmptyResult(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, 3, 0) + + out, err := delta.Apply([]byte("abc"), data) + if err != nil { + t.Fatalf("Apply: %v", err) + } + + if len(out) != 0 { + t.Fatalf("Apply = %q, want empty", out) + } +} + +func TestApplyMalformed(t *testing.T) { + t.Parallel() + + base := []byte("0123456789") + + cases := []struct { + name string + ops []byte + baseSize uint64 + resultSize uint64 + }{ + { + name: "opcode zero", + ops: []byte{0x00}, + baseSize: 10, + resultSize: 1, + }, + { + name: "truncated copy operand", + ops: []byte{0x91, 3}, + baseSize: 10, + resultSize: 4, + }, + { + name: "copy past base", + ops: []byte{0x91, 8, 5}, + baseSize: 10, + resultSize: 5, + }, + { + name: "copy past result", + ops: []byte{0x91, 0, 5}, + baseSize: 10, + resultSize: 3, + }, + { + name: "insert past delta", + ops: []byte{5, 'a', 'b'}, + baseSize: 10, + resultSize: 5, + }, + { + name: "insert past result", + ops: []byte{5, 'a', 'b', 'c', 'd', 'e'}, + baseSize: 10, + resultSize: 3, + }, + { + name: "base size mismatch", + ops: []byte{0x91, 0, 1}, + baseSize: 9, + resultSize: 1, + }, + { + name: "result shorter than declared", + ops: []byte{0x91, 0, 1}, + baseSize: 10, + resultSize: 2, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize) + data = append(data, tc.ops...) + + _, err := delta.Apply(base, data) + if !errors.Is(err, delta.ErrMalformedDelta) { + t.Fatalf("Apply error = %v, want ErrMalformedDelta", err) + } + }) + } +} diff --git a/internal/format/packfile/delta/header.go b/internal/format/packfile/delta/header.go new file mode 100644 index 00000000..d5dd93fb --- /dev/null +++ b/internal/format/packfile/delta/header.go @@ -0,0 +1,98 @@ +package delta + +import ( + "errors" + "fmt" +) + +// ErrMalformedDelta reports that +// a delta payload is truncated, overlong, +// declares sizes that overflow uint64, +// contains invalid instructions, +// or does not match the supplied base or declared sizes. +var ErrMalformedDelta = errors.New("internal/format/packfile/delta: malformed delta") + +// MaxSizeVarintLen is the maximum encoded length +// of one delta header size varint. +// Every uint64 size is encodable within this bound, +// and parsing rejects longer encodings. +const MaxSizeVarintLen = 10 + +// MaxHeaderSizesLen is the maximum encoded length +// of the base/result size header of a delta payload. +// +// Callers reading a delta from a stream may inflate +// MaxHeaderSizesLen bytes +// (or fewer if the payload ends sooner) +// and parse with [ParseHeaderSizes]; +// no valid header is longer. +const MaxHeaderSizesLen = 2 * MaxSizeVarintLen + +// ParseHeaderSizes parses the base size and result size varints +// at the beginning of one inflated delta payload. +func ParseHeaderSizes(data []byte) (baseSize, resultSize uint64, consumed int, err error) { + baseSize, consumed, err = parseSizeVarint(data, 0) + if err != nil { + return 0, 0, 0, err + } + + resultSize, consumed, err = parseSizeVarint(data, consumed) + if err != nil { + return 0, 0, 0, err + } + + return baseSize, resultSize, consumed, nil +} + +// AppendHeaderSizes appends the base/result size header encoding +// of one delta payload to dst. +func AppendHeaderSizes(dst []byte, baseSize, resultSize uint64) []byte { + dst = appendSizeVarint(dst, baseSize) + + return appendSizeVarint(dst, resultSize) +} + +// appendSizeVarint appends the encoding of one delta header size varint to dst. +func appendSizeVarint(dst []byte, value uint64) []byte { + for value >= 0x80 { + dst = append(dst, byte(value)|0x80) + value >>= 7 + } + + return append(dst, byte(value)) +} + +// parseSizeVarint parses one delta header size varint +// beginning at pos, +// and returns the position past it. +func parseSizeVarint(data []byte, pos int) (value uint64, consumed int, err error) { + start := pos + + shift := uint(0) + + for { + if pos-start >= MaxSizeVarintLen { + return 0, 0, fmt.Errorf("%w: overlong size varint", ErrMalformedDelta) + } + + if pos >= len(data) { + return 0, 0, fmt.Errorf("%w: truncated size varint", ErrMalformedDelta) + } + + b := data[pos] + pos++ + + group := uint64(b & 0x7f) + if group<<shift>>shift != group { + return 0, 0, fmt.Errorf("%w: size overflows uint64", ErrMalformedDelta) + } + + value |= group << shift + + if b&0x80 == 0 { + return value, pos, nil + } + + shift += 7 + } +} diff --git a/internal/format/packfile/delta/header_test.go b/internal/format/packfile/delta/header_test.go new file mode 100644 index 00000000..8d97674f --- /dev/null +++ b/internal/format/packfile/delta/header_test.go @@ -0,0 +1,88 @@ +package delta_test + +import ( + "bytes" + "errors" + "math" + "testing" + + "lindenii.org/go/furgit/internal/format/packfile/delta" +) + +func TestParseHeaderSizesRoundTrip(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + baseSize uint64 + resultSize uint64 + }{ + {name: "zero", baseSize: 0, resultSize: 0}, + {name: "small", baseSize: 5, resultSize: 130}, + {name: "boundaries", baseSize: 127, resultSize: 128}, + {name: "large", baseSize: 1 << 32, resultSize: 1 << 57}, + {name: "max", baseSize: math.MaxUint64, resultSize: math.MaxUint64}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize) + + wantConsumed := len(data) + + data = append(data, 0xde, 0xad) + + baseSize, resultSize, consumed, err := delta.ParseHeaderSizes(data) + if err != nil { + t.Fatalf("ParseHeaderSizes: %v", err) + } + + if baseSize != tc.baseSize { + t.Fatalf("ParseHeaderSizes base size = %d, want %d", baseSize, tc.baseSize) + } + + if resultSize != tc.resultSize { + t.Fatalf("ParseHeaderSizes result size = %d, want %d", resultSize, tc.resultSize) + } + + if consumed != wantConsumed { + t.Fatalf("ParseHeaderSizes consumed = %d, want %d", consumed, wantConsumed) + } + }) + } +} + +func TestParseHeaderSizesMalformed(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + data []byte + }{ + {name: "empty", data: []byte{}}, + {name: "truncated first varint", data: []byte{0x80}}, + {name: "missing second varint", data: []byte{0x05}}, + {name: "truncated second varint", data: []byte{0x05, 0x80}}, + { + name: "overflow", + data: append(bytes.Repeat([]byte{0xff}, 9), 0x7f), + }, + { + name: "overlong", + data: append(bytes.Repeat([]byte{0x80}, 10), 0x00), + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + _, _, _, err := delta.ParseHeaderSizes(tc.data) + if !errors.Is(err, delta.ErrMalformedDelta) { + t.Fatalf("ParseHeaderSizes error = %v, want ErrMalformedDelta", err) + } + }) + } +} diff --git a/internal/format/packfile/entry_header.go b/internal/format/packfile/entry_header.go index 1428600b..e5f2d62d 100644 --- a/internal/format/packfile/entry_header.go +++ b/internal/format/packfile/entry_header.go @@ -13,12 +13,6 @@ import ( // or declares a size that overflows uint64. var ErrMalformedEntryHeader = errors.New("internal/format/packfile: malformed entry header") -// ErrInvalidHashSize reports that -// a supplied hash size is not a plausible object ID size. -// This indicates a caller bug, -// not malformed pack data. -var ErrInvalidHashSize = errors.New("internal/format/packfile: invalid hash size") - // MaxTypeSizeLen is the maximum encoded length // of the type/size prefix of an entry header. // Every uint64 size is encodable within this bound, @@ -73,7 +67,8 @@ type EntryHeader struct { // from the beginning of data. // // hashSize must be the object ID size -// of the pack's object format. +// of the pack's object format; +// ParseEntryHeader panics on implausible hash sizes. // // data need not contain the whole entry; // [MaxEntryHeaderLen] bytes always suffice. @@ -83,7 +78,7 @@ func ParseEntryHeader(data []byte, hashSize int) (EntryHeader, error) { var zero EntryHeader if hashSize <= 0 || hashSize > id.MaxObjectIDSize { - return zero, fmt.Errorf("%w: %d", ErrInvalidHashSize, hashSize) + panic("internal/format/packfile: invalid hash size") } if len(data) == 0 { diff --git a/internal/format/packfile/entry_header_test.go b/internal/format/packfile/entry_header_test.go index 807c544e..79dc2740 100644 --- a/internal/format/packfile/entry_header_test.go +++ b/internal/format/packfile/entry_header_test.go @@ -130,10 +130,15 @@ func TestParseEntryHeaderBadHashSize(t *testing.T) { t.Parallel() for _, hashSize := range []int{-1, 0, id.MaxObjectIDSize + 1} { - _, err := packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize) - if err == nil { - t.Fatalf("ParseEntryHeader hash size %d: expected error", hashSize) - } + func() { + defer func() { + if recover() == nil { + t.Fatalf("ParseEntryHeader hash size %d: expected panic", hashSize) + } + }() + + _, _ = packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize) + }() } } diff --git a/internal/format/packidx/doc.go b/internal/format/packidx/doc.go new file mode 100644 index 00000000..5c6015b4 --- /dev/null +++ b/internal/format/packidx/doc.go @@ -0,0 +1,3 @@ +// Package packidx provides Git pack index (version 2) format +// parsing and writing primitives. +package packidx diff --git a/internal/format/packidx/helpers_test.go b/internal/format/packidx/helpers_test.go new file mode 100644 index 00000000..72bb4663 --- /dev/null +++ b/internal/format/packidx/helpers_test.go @@ -0,0 +1,95 @@ +package packidx_test + +import ( + "fmt" + "os" + "slices" + "strconv" + "strings" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/testgit" + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/typ" +) + +// makeGitPack builds a repository with some blobs, +// packs them with git pack-objects, +// and returns the repository, the artifact path prefix, +// and the packed object IDs. +func makeGitPack(t *testing.T, objectFormat id.ObjectFormat) (*testgit.Repo, string, []id.ObjectID) { + t.Helper() + + repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat}) + if err != nil { + t.Fatalf("NewRepo: %v", err) + } + + oids := make([]id.ObjectID, 0, 40) + + for i := range 40 { + content := fmt.Sprintf("blob %d\n%s", i, strings.Repeat("filler\n", i)) + + oid, err := repo.HashObject(t, typ.Blob, strings.NewReader(content)) + if err != nil { + t.Fatalf("HashObject: %v", err) + } + + oids = append(oids, oid) + } + + prefix, err := repo.PackObjects(t, slices.Values(oids), testgit.PackObjectsOptions{}) + if err != nil { + t.Fatalf("PackObjects: %v", err) + } + + return repo, prefix, oids +} + +// parseGitIdxFile reads and parses one .idx file produced by git. +func parseGitIdxFile(t *testing.T, prefix string, objectFormat id.ObjectFormat) ([]byte, packidx.Packidx) { + t.Helper() + + data, err := os.ReadFile(prefix + ".idx") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + idx, err := packidx.Parse(data, objectFormat.Size()) + if err != nil { + t.Fatalf("Parse: %v", err) + } + + return data, idx +} + +// gitPackOffsets extracts the object-to-offset mapping +// from git verify-pack -v output. +func gitPackOffsets(t *testing.T, repo *testgit.Repo, idxPath string, objectFormat id.ObjectFormat) map[string]uint64 { + t.Helper() + + out, err := repo.VerifyPack(t, idxPath) + if err != nil { + t.Fatalf("VerifyPack: %v", err) + } + + hexLen := objectFormat.HexLen() + offsets := make(map[string]uint64) + + for line := range strings.Lines(string(out)) { + fields := strings.Fields(line) + if len(fields) < 5 || len(fields[0]) != hexLen { + continue + } + + offset, err := strconv.ParseUint(fields[4], 10, 64) + if err != nil { + continue + } + + offsets[fields[0]] = offset + } + + return offsets +} diff --git a/internal/format/packidx/lookup.go b/internal/format/packidx/lookup.go new file mode 100644 index 00000000..d1293f47 --- /dev/null +++ b/internal/format/packidx/lookup.go @@ -0,0 +1,73 @@ +package packidx + +import ( + "bytes" + "encoding/binary" + "fmt" +) + +// Lookup searches the index for one object ID +// and returns its pack file offset. +// +// oid must be exactly the index's hash size; +// Lookup panics otherwise. +func (idx *Packidx) Lookup(oid []byte) (offset uint64, found bool, err error) { + if len(oid) != idx.hashSize { + panic("internal/format/packidx: invalid object ID length") + } + + lo, hi := idx.fanoutRange(oid[0]) + + for lo < hi { + mid := lo + (hi-lo)/2 + + cmp := bytes.Compare(oid, idx.OIDAt(mid)) + + switch { + case cmp == 0: + offset, err = idx.OffsetAt(mid) + if err != nil { + return 0, false, err + } + + return offset, true, nil + case cmp < 0: + hi = mid + default: + lo = mid + 1 + } + } + + return 0, false, nil +} + +// OffsetAt returns the pack file offset at one index position. +// +// OffsetAt panics when pos is out of range. +func (idx *Packidx) OffsetAt(pos int) (uint64, error) { + idx.checkPos(pos) + + raw := binary.BigEndian.Uint32(idx.data[idx.off32Off+4*pos:]) + if raw&largeOffsetFlag == 0 { + return uint64(raw), nil + } + + slot := raw &^ largeOffsetFlag + if uint64(slot) >= idx.off64Count { + return 0, fmt.Errorf("%w: 64-bit offset reference out of range", ErrMalformedPackIndex) + } + + return binary.BigEndian.Uint64(idx.data[idx.off64Off+8*int(slot):]), nil +} + +// fanoutRange returns the index position range [lo, hi) +// of object IDs beginning with first. +func (idx *Packidx) fanoutRange(first byte) (lo, hi int) { + hi = int(binary.BigEndian.Uint32(idx.data[headerLen+4*int(first):])) + + if first > 0 { + lo = int(binary.BigEndian.Uint32(idx.data[headerLen+4*(int(first)-1):])) + } + + return lo, hi +} diff --git a/internal/format/packidx/lookup_test.go b/internal/format/packidx/lookup_test.go new file mode 100644 index 00000000..514aad7a --- /dev/null +++ b/internal/format/packidx/lookup_test.go @@ -0,0 +1,99 @@ +package packidx_test + +import ( + "bytes" + "encoding/binary" + "errors" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/object/id" +) + +func TestLookupGitIndex(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + repo, prefix, oids := makeGitPack(t, objectFormat) + _, idx := parseGitIdxFile(t, prefix, objectFormat) + + wantOffsets := gitPackOffsets(t, repo, prefix+".idx", objectFormat) + if len(wantOffsets) != len(oids) { + t.Fatalf("verify-pack offsets = %d entries, want %d", len(wantOffsets), len(oids)) + } + + for _, oid := range oids { + offset, found, err := idx.Lookup(oid.RawBytes()) + if err != nil { + t.Fatalf("Lookup(%s): %v", oid, err) + } + + if !found { + t.Fatalf("Lookup(%s) not found", oid) + } + + if offset != wantOffsets[oid.String()] { + t.Fatalf("Lookup(%s) = %d, want %d", oid, offset, wantOffsets[oid.String()]) + } + } + }) + } +} + +func TestLookupMissing(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + _, prefix, oids := makeGitPack(t, objectFormat) + _, idx := parseGitIdxFile(t, prefix, objectFormat) + + missing := bytes.Clone(oids[0].RawBytes()) + missing[len(missing)-1] ^= 0xff + + _, found, err := idx.Lookup(missing) + if err != nil { + t.Fatalf("Lookup: %v", err) + } + + if found { + t.Fatalf("Lookup of mutated oid unexpectedly found") + } + }) + } +} + +func TestOffsetAtMalformedLargeReference(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + hashSize := objectFormat.Size() + + entries := syntheticEntries(3) + data := writeSyntheticIndex(t, objectFormat, entries) + + // Mark the first 32-bit offset entry as a 64-bit reference; + // the index has no 64-bit offset table. + off32Off := 8 + 256*4 + len(entries)*hashSize + len(entries)*4 + binary.BigEndian.PutUint32(data[off32Off:], 0x80000000) + + idx, err := packidx.Parse(data, hashSize) + if err != nil { + t.Fatalf("Parse: %v", err) + } + + _, err = idx.OffsetAt(0) + if !errors.Is(err, packidx.ErrMalformedPackIndex) { + t.Fatalf("OffsetAt error = %v, want ErrMalformedPackIndex", err) + } + }) + } +} diff --git a/internal/format/packidx/packidx.go b/internal/format/packidx/packidx.go new file mode 100644 index 00000000..ef2c93f4 --- /dev/null +++ b/internal/format/packidx/packidx.go @@ -0,0 +1,197 @@ +package packidx + +import ( + "encoding/binary" + "errors" + "fmt" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/lgo/intconv" +) + +// ErrMalformedPackIndex reports that +// a pack index is truncated, +// has a bad signature or unsupported version, +// or has inconsistent tables. +var ErrMalformedPackIndex = errors.New("internal/format/packidx: malformed pack index") + +const ( + signature = 0xff744f63 + version = 2 + + headerLen = 8 + fanoutLen = 256 * 4 + + // largeOffsetFlag marks one 32-bit offset table entry + // as an index into the 64-bit offset table. + largeOffsetFlag = 0x80000000 +) + +// Packidx is one parsed pack index view over borrowed bytes. +// +// Labels: Deps-Borrowed, Life-Parent, MT-Safe. +type Packidx struct { + // data is the entire pack index payload. + data []byte + // hashSize is the object ID size of the index's object format. + hashSize int + + // numObjects is the object count from the last fanout entry. + numObjects int + + // namesOff, crcOff, off32Off, and off64Off are + // the byte offsets of the object ID, CRC32, + // 32-bit offset, and 64-bit offset tables. + namesOff int + crcOff int + off32Off int + off64Off int + // off64Count is the number of 64-bit offset table entries. + off64Count uint64 +} + +// Parse parses one pack index from data. +// +// hashSize must be the object ID size +// of the pack's object format; +// Parse panics on implausible hash sizes. +func Parse(data []byte, hashSize int) (Packidx, error) { + var zero Packidx + + if hashSize <= 0 || hashSize > id.MaxObjectIDSize { + panic("internal/format/packidx: invalid hash size") + } + + if len(data) < headerLen+fanoutLen+2*hashSize { + return zero, fmt.Errorf("%w: truncated", ErrMalformedPackIndex) + } + + if binary.BigEndian.Uint32(data) != signature { + return zero, fmt.Errorf("%w: bad signature", ErrMalformedPackIndex) + } + + if binary.BigEndian.Uint32(data[4:]) != version { + return zero, fmt.Errorf("%w: unsupported version", ErrMalformedPackIndex) + } + + prev := uint32(0) + + for i := range 256 { + count := binary.BigEndian.Uint32(data[headerLen+4*i:]) + if count < prev { + return zero, fmt.Errorf("%w: non-monotonic fanout", ErrMalformedPackIndex) + } + + prev = count + } + + numObjects := uint64(prev) + hashSize64 := uint64(hashSize) + + namesOff := uint64(headerLen + fanoutLen) + crcOff := namesOff + numObjects*hashSize64 + off32Off := crcOff + 4*numObjects + off64Off := off32Off + 4*numObjects + + minTotal := off64Off + 2*hashSize64 + + dataLen, err := intconv.IntToUint64(len(data)) + if err != nil { + return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err) + } + + if dataLen < minTotal { + return zero, fmt.Errorf("%w: tables exceed index size", ErrMalformedPackIndex) + } + + off64Bytes := dataLen - minTotal + if off64Bytes%8 != 0 { + return zero, fmt.Errorf("%w: trailing table size not a 64-bit offset multiple", ErrMalformedPackIndex) + } + + off64Count := off64Bytes / 8 + if off64Count > numObjects { + return zero, fmt.Errorf("%w: more 64-bit offsets than objects", ErrMalformedPackIndex) + } + + idx := Packidx{ + data: data, + hashSize: hashSize, + off64Count: off64Count, + } + + idx.numObjects, err = intconv.Uint64ToInt(numObjects) + if err != nil { + return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err) + } + + idx.namesOff, err = intconv.Uint64ToInt(namesOff) + if err != nil { + return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err) + } + + idx.crcOff, err = intconv.Uint64ToInt(crcOff) + if err != nil { + return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err) + } + + idx.off32Off, err = intconv.Uint64ToInt(off32Off) + if err != nil { + return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err) + } + + idx.off64Off, err = intconv.Uint64ToInt(off64Off) + if err != nil { + return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err) + } + + return idx, nil +} + +// NumObjects returns the number of objects in the index. +func (idx *Packidx) NumObjects() int { + return idx.numObjects +} + +// PackHash returns the pack hash recorded in the index trailer. +// +// Labels: Life-Parent, Mut-No. +func (idx *Packidx) PackHash() []byte { + return idx.data[len(idx.data)-2*idx.hashSize : len(idx.data)-idx.hashSize] +} + +// OIDAt returns the object ID bytes at one index position. +// Positions follow object ID sort order. +// +// OIDAt panics when pos is out of range. +// +// Labels: Life-Parent, Mut-No. +func (idx *Packidx) OIDAt(pos int) []byte { + idx.checkPos(pos) + + start := idx.namesOff + pos*idx.hashSize + + return idx.data[start : start+idx.hashSize] +} + +// CRCAt returns the CRC32 of the packed entry data +// at one index position. +// +// CRCAt panics when pos is out of range. +func (idx *Packidx) CRCAt(pos int) uint32 { + idx.checkPos(pos) + + return binary.BigEndian.Uint32(idx.data[idx.crcOff+4*pos:]) +} + +// checkPos panics when pos is not a valid index position. +// +// An out-of-range position is a caller bug +// that slice bounds checking would not catch, +// since the tables share one data slice; +// an unchecked access would silently read other tables' bytes. +func (idx *Packidx) checkPos(pos int) { + if pos < 0 || pos >= idx.numObjects { + panic("internal/format/packidx: index position out of range") + } +} diff --git a/internal/format/packidx/packidx_test.go b/internal/format/packidx/packidx_test.go new file mode 100644 index 00000000..d225a993 --- /dev/null +++ b/internal/format/packidx/packidx_test.go @@ -0,0 +1,124 @@ +package packidx_test + +import ( + "bytes" + "encoding/binary" + "errors" + "os" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/object/id" +) + +func TestParseGitIndex(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + _, prefix, oids := makeGitPack(t, objectFormat) + _, idx := parseGitIdxFile(t, prefix, objectFormat) + + if idx.NumObjects() != len(oids) { + t.Fatalf("NumObjects = %d, want %d", idx.NumObjects(), len(oids)) + } + + for pos := 1; pos < idx.NumObjects(); pos++ { + if bytes.Compare(idx.OIDAt(pos-1), idx.OIDAt(pos)) >= 0 { + t.Fatalf("OIDAt(%d) not sorted after predecessor", pos) + } + } + + packData, err := os.ReadFile(prefix + ".pack") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + packTrailer := packData[len(packData)-objectFormat.Size():] + if !bytes.Equal(idx.PackHash(), packTrailer) { + t.Fatalf("PackHash does not match pack trailer") + } + }) + } +} + +func TestParseMalformed(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + hashSize := objectFormat.Size() + + valid := writeSyntheticIndex(t, objectFormat, syntheticEntries(8)) + + corrupt := func(mutate func(data []byte) []byte) []byte { + return mutate(bytes.Clone(valid)) + } + + cases := []struct { + name string + data []byte + }{ + {name: "empty", data: []byte{}}, + {name: "truncated", data: corrupt(func(d []byte) []byte { return d[:20] })}, + { + name: "bad signature", + data: corrupt(func(d []byte) []byte { + d[0] ^= 0xff + + return d + }), + }, + { + name: "bad version", + data: corrupt(func(d []byte) []byte { + d[7] = 3 + + return d + }), + }, + { + name: "non-monotonic fanout", + data: corrupt(func(d []byte) []byte { + binary.BigEndian.PutUint32(d[8:], 0xffffffff) + + return d + }), + }, + { + name: "tables exceed index size", + data: corrupt(func(d []byte) []byte { + binary.BigEndian.PutUint32(d[8+255*4:], 0x00ffffff) + + return d + }), + }, + { + name: "trailing size not 64-bit multiple", + data: corrupt(func(d []byte) []byte { return append(d, 0xde, 0xad, 0xbe, 0xef) }), + }, + { + name: "more 64-bit offsets than objects", + data: corrupt(func(d []byte) []byte { + return append(d, bytes.Repeat([]byte{0x00}, 9*8)...) + }), + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + _, err := packidx.Parse(tc.data, hashSize) + if !errors.Is(err, packidx.ErrMalformedPackIndex) { + t.Fatalf("Parse error = %v, want ErrMalformedPackIndex", err) + } + }) + } + }) + } +} diff --git a/internal/format/packidx/roundtrip_test.go b/internal/format/packidx/roundtrip_test.go new file mode 100644 index 00000000..f5bd82e7 --- /dev/null +++ b/internal/format/packidx/roundtrip_test.go @@ -0,0 +1,88 @@ +package packidx_test + +import ( + "bytes" + "math" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/object/id" +) + +func TestWriteRoundTrip(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + hashSize := objectFormat.Size() + + entries := syntheticEntries(20) + entries[3].Offset = 1 << 33 + entries[11].Offset = math.MaxUint64 + entries[12].Offset = 0x7fffffff + entries[13].Offset = 0x80000000 + + data := writeSyntheticIndex(t, objectFormat, entries) + + idx, err := packidx.Parse(data, hashSize) + if err != nil { + t.Fatalf("Parse: %v", err) + } + + if idx.NumObjects() != len(entries) { + t.Fatalf("NumObjects = %d, want %d", idx.NumObjects(), len(entries)) + } + + if !bytes.Equal(idx.PackHash(), bytes.Repeat([]byte{0x5a}, hashSize)) { + t.Fatalf("PackHash mismatch") + } + + for pos, entry := range entries { + if !bytes.Equal(idx.OIDAt(pos), entry.OID[:hashSize]) { + t.Fatalf("OIDAt(%d) mismatch", pos) + } + + if idx.CRCAt(pos) != entry.CRC32 { + t.Fatalf("CRCAt(%d) = %x, want %x", pos, idx.CRCAt(pos), entry.CRC32) + } + + offset, err := idx.OffsetAt(pos) + if err != nil { + t.Fatalf("OffsetAt(%d): %v", pos, err) + } + + if offset != entry.Offset { + t.Fatalf("OffsetAt(%d) = %d, want %d", pos, offset, entry.Offset) + } + + offset, found, err := idx.Lookup(entry.OID[:hashSize]) + if err != nil { + t.Fatalf("Lookup(%d): %v", pos, err) + } + + if !found || offset != entry.Offset { + t.Fatalf("Lookup(%d) = %d %v, want %d found", pos, offset, found, entry.Offset) + } + } + }) + } +} + +// writeSyntheticIndex writes one index over entries +// with a fixed fake pack hash. +func writeSyntheticIndex(t *testing.T, objectFormat id.ObjectFormat, entries []packidx.Entry) []byte { + t.Helper() + + packHash := bytes.Repeat([]byte{0x5a}, objectFormat.Size()) + + var buf bytes.Buffer + + err := packidx.Write(&buf, objectFormat, entries, packHash) + if err != nil { + t.Fatalf("Write: %v", err) + } + + return buf.Bytes() +} diff --git a/internal/format/packidx/write.go b/internal/format/packidx/write.go new file mode 100644 index 00000000..2bcdb51a --- /dev/null +++ b/internal/format/packidx/write.go @@ -0,0 +1,158 @@ +package packidx + +import ( + "bufio" + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + + "lindenii.org/go/furgit/object/id" +) + +// ErrInvalidEntries reports that +// entries supplied for an index write +// are unsorted, duplicated, or not representable +// in the pack index format. +var ErrInvalidEntries = errors.New("internal/format/packidx: invalid entries") + +// Entry is one object record for an index write. +type Entry struct { + // OID holds the object ID bytes; + // only the first hash-size bytes are meaningful. + OID [id.MaxObjectIDSize]byte + // Offset is the entry's pack file offset. + Offset uint64 + // CRC32 is the CRC32 of the entry's packed data. + CRC32 uint32 +} + +// Write writes one pack index over entries to w. +// +// entries must be sorted by object ID without duplicates. +// packHash must be the pack's trailer hash; +// Write panics when its length does not match the object format. +func Write(w io.Writer, objectFormat id.ObjectFormat, entries []Entry, packHash []byte) error { + hashSize := objectFormat.Size() + if hashSize == 0 { + return id.ErrInvalidObjectFormat + } + + if len(packHash) != hashSize { + panic("internal/format/packidx: invalid pack hash length") + } + + if len(entries) > math.MaxUint32 { + return fmt.Errorf("%w: too many entries", ErrInvalidEntries) + } + + for i := 1; i < len(entries); i++ { + if bytes.Compare(entries[i-1].OID[:hashSize], entries[i].OID[:hashSize]) >= 0 { + return fmt.Errorf("%w: not sorted by object ID", ErrInvalidEntries) + } + } + + hashImpl, err := objectFormat.New() + if err != nil { + return fmt.Errorf("internal/format/packidx: %w", err) + } + + bw := bufio.NewWriter(io.MultiWriter(w, hashImpl)) + sw := &stickyWriter{w: bw} + + sw.writeUint32(signature) + sw.writeUint32(version) + + var counts [256]uint32 + for i := range entries { + counts[entries[i].OID[0]]++ + } + + cumulative := uint32(0) + for _, count := range counts { + cumulative += count + sw.writeUint32(cumulative) + } + + for i := range entries { + sw.write(entries[i].OID[:hashSize]) + } + + for i := range entries { + sw.writeUint32(entries[i].CRC32) + } + + var largeOffsets []uint64 + + for i := range entries { + offset := entries[i].Offset + if offset < largeOffsetFlag { + sw.writeUint32(uint32(offset)) + + continue + } + + slot := len(largeOffsets) + if slot >= largeOffsetFlag { + return fmt.Errorf("%w: too many large offsets", ErrInvalidEntries) + } + + sw.writeUint32(largeOffsetFlag | uint32(slot)) + + largeOffsets = append(largeOffsets, offset) + } + + for _, offset := range largeOffsets { + sw.writeUint64(offset) + } + + sw.write(packHash) + + if sw.err != nil { + return fmt.Errorf("internal/format/packidx: %w", sw.err) + } + + err = bw.Flush() + if err != nil { + return fmt.Errorf("internal/format/packidx: %w", err) + } + + _, err = w.Write(hashImpl.Sum(nil)) + if err != nil { + return fmt.Errorf("internal/format/packidx: %w", err) + } + + return nil +} + +// stickyWriter forwards writes to w +// and retains the first error, +// turning subsequent writes into no-ops. +type stickyWriter struct { + w io.Writer + err error +} + +func (sw *stickyWriter) write(p []byte) { + if sw.err != nil { + return + } + + _, sw.err = sw.w.Write(p) +} + +func (sw *stickyWriter) writeUint32(v uint32) { + var buf [4]byte + + binary.BigEndian.PutUint32(buf[:], v) + sw.write(buf[:]) +} + +func (sw *stickyWriter) writeUint64(v uint64) { + var buf [8]byte + + binary.BigEndian.PutUint64(buf[:], v) + sw.write(buf[:]) +} diff --git a/internal/format/packidx/write_test.go b/internal/format/packidx/write_test.go new file mode 100644 index 00000000..68df3ece --- /dev/null +++ b/internal/format/packidx/write_test.go @@ -0,0 +1,112 @@ +package packidx_test + +import ( + "bytes" + "errors" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/object/id" +) + +// syntheticEntries builds n distinct entries sorted by object ID, +// spread over the fanout table. +func syntheticEntries(n int) []packidx.Entry { + entries := make([]packidx.Entry, n) + for i := range entries { + entries[i].OID[0] = byte(i * 7) + entries[i].OID[1] = byte(i + 1) + entries[i].Offset = uint64(i+1) * 100 + entries[i].CRC32 = uint32(i+1) * 0x01010101 + } + + return entries +} + +func TestWriteMatchesGit(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + _, prefix, _ := makeGitPack(t, objectFormat) + gitData, idx := parseGitIdxFile(t, prefix, objectFormat) + + entries := make([]packidx.Entry, idx.NumObjects()) + for pos := range entries { + copy(entries[pos].OID[:], idx.OIDAt(pos)) + entries[pos].CRC32 = idx.CRCAt(pos) + + offset, err := idx.OffsetAt(pos) + if err != nil { + t.Fatalf("OffsetAt(%d): %v", pos, err) + } + + entries[pos].Offset = offset + } + + var buf bytes.Buffer + + err := packidx.Write(&buf, objectFormat, entries, idx.PackHash()) + if err != nil { + t.Fatalf("Write: %v", err) + } + + if !bytes.Equal(buf.Bytes(), gitData) { + t.Fatalf("Write output differs from git's index (%d vs %d bytes)", buf.Len(), len(gitData)) + } + }) + } +} + +func TestWriteInvalidEntries(t *testing.T) { + t.Parallel() + + unsorted := syntheticEntries(3) + unsorted[0], unsorted[2] = unsorted[2], unsorted[0] + + duplicated := syntheticEntries(3) + duplicated[1] = duplicated[0] + + cases := []struct { + name string + entries []packidx.Entry + }{ + {name: "unsorted", entries: unsorted}, + {name: "duplicated", entries: duplicated}, + } + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + packHash := bytes.Repeat([]byte{0x5a}, objectFormat.Size()) + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + err := packidx.Write(&bytes.Buffer{}, objectFormat, tc.entries, packHash) + if !errors.Is(err, packidx.ErrInvalidEntries) { + t.Fatalf("Write error = %v, want ErrInvalidEntries", err) + } + }) + } + }) + } +} + +func TestWriteBadPackHashPanics(t *testing.T) { + t.Parallel() + + objectFormat := id.ObjectFormatSHA256 + + defer func() { + if recover() == nil { + t.Fatalf("Write with short pack hash: expected panic") + } + }() + + _ = packidx.Write(&bytes.Buffer{}, objectFormat, nil, []byte{0x01}) +} diff --git a/internal/iolimit/expect_length_reader.go b/internal/iolimit/expect_length_reader.go index 1efe2b75..72b0d912 100644 --- a/internal/iolimit/expect_length_reader.go +++ b/internal/iolimit/expect_length_reader.go @@ -53,7 +53,7 @@ func (reader *expectLengthReader) Read(dst []byte) (int, error) { return 0, nil } - return 0, err //nolint:wrapcheck + return 0, err } if uint64(len(dst)) > reader.remaining { @@ -87,5 +87,5 @@ func (reader *expectLengthReader) Read(dst []byte) (int, error) { return 0, io.EOF } - return n, err //nolint:wrapcheck + return n, err } diff --git a/internal/testgit/packobjects.go b/internal/testgit/packobjects.go new file mode 100644 index 00000000..9f56eef0 --- /dev/null +++ b/internal/testgit/packobjects.go @@ -0,0 +1,47 @@ +package testgit + +import ( + "bytes" + "iter" + "path/filepath" + "strings" + "testing" + + "lindenii.org/go/furgit/object/id" +) + +// PackObjectsOptions controls one pack-objects invocation. +type PackObjectsOptions struct { + // RevIndex requests writing a .rev reverse index alongside the pack. + RevIndex bool +} + +// PackObjects packs the supplied objects with git pack-objects +// into a temporary directory, +// and returns the artifact path prefix "<dir>/pack-<hash>", +// to which ".pack", ".idx", and ".rev" suffixes apply. +func (repo *Repo) PackObjects(tb testing.TB, oids iter.Seq[id.ObjectID], opts PackObjectsOptions) (string, error) { + tb.Helper() + + dir := tb.TempDir() + + var stdin bytes.Buffer + for oid := range oids { + stdin.WriteString(oid.String()) + stdin.WriteByte('\n') + } + + revIndex := "false" + if opts.RevIndex { + revIndex = "true" + } + + out, err := repo.run(tb, &stdin, + "git", "-c", "pack.writeReverseIndex="+revIndex, + "pack-objects", "--end-of-options", filepath.Join(dir, "pack")) + if err != nil { + return "", err + } + + return filepath.Join(dir, "pack-"+strings.TrimSpace(string(out))), nil +} diff --git a/internal/testgit/verifypack.go b/internal/testgit/verifypack.go new file mode 100644 index 00000000..ec59572a --- /dev/null +++ b/internal/testgit/verifypack.go @@ -0,0 +1,13 @@ +package testgit + +import ( + "testing" +) + +// VerifyPack runs git verify-pack -v on one pack index path +// and returns its raw verbose output. +func (repo *Repo) VerifyPack(tb testing.TB, idxPath string) ([]byte, error) { + tb.Helper() + + return repo.run(tb, nil, "git", "verify-pack", "-v", "--end-of-options", idxPath) +} |
