aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
Diffstat (limited to 'internal')
-rw-r--r--internal/cache/clock/bench_test.go28
-rw-r--r--internal/cache/clock/clock.go (renamed from internal/cache/clock/cache.go)12
-rw-r--r--internal/cache/clock/clock_ops.go (renamed from internal/cache/clock/cache_ops.go)24
-rw-r--r--internal/cache/clock/clock_test.go (renamed from internal/cache/clock/cache_test.go)38
-rw-r--r--internal/cache/clock/concurrent_test.go24
-rw-r--r--internal/cache/clock/invariant_test.go4
-rw-r--r--internal/compress/zlib/writer.go2
-rw-r--r--internal/compress/zlib/writer_header.go4
-rw-r--r--internal/format/packfile/delta/apply.go117
-rw-r--r--internal/format/packfile/delta/apply_test.go174
-rw-r--r--internal/format/packfile/delta/header.go98
-rw-r--r--internal/format/packfile/delta/header_test.go88
-rw-r--r--internal/format/packfile/entry_header.go11
-rw-r--r--internal/format/packfile/entry_header_test.go13
-rw-r--r--internal/format/packidx/doc.go3
-rw-r--r--internal/format/packidx/helpers_test.go95
-rw-r--r--internal/format/packidx/lookup.go73
-rw-r--r--internal/format/packidx/lookup_test.go99
-rw-r--r--internal/format/packidx/packidx.go197
-rw-r--r--internal/format/packidx/packidx_test.go124
-rw-r--r--internal/format/packidx/roundtrip_test.go88
-rw-r--r--internal/format/packidx/write.go158
-rw-r--r--internal/format/packidx/write_test.go112
-rw-r--r--internal/iolimit/expect_length_reader.go4
-rw-r--r--internal/testgit/packobjects.go47
-rw-r--r--internal/testgit/verifypack.go13
26 files changed, 1568 insertions, 82 deletions
diff --git a/internal/cache/clock/bench_test.go b/internal/cache/clock/bench_test.go
index 917aac3b..48bdff99 100644
--- a/internal/cache/clock/bench_test.go
+++ b/internal/cache/clock/bench_test.go
@@ -28,9 +28,9 @@ func BenchmarkReadHeavy(b *testing.B) {
// ≈95% Get hits, ≈5% Add, and fits budget.
const n = 100_000
- cache := New(n, benchWeight)
+ clock := New(n, benchWeight)
for k := range n {
- cache.Add(k, k)
+ clock.Add(k, k)
}
b.ResetTimer()
@@ -41,9 +41,9 @@ func BenchmarkReadHeavy(b *testing.B) {
key := i % n
if i%20 == 0 {
- cache.Add(key, key)
+ clock.Add(key, key)
} else {
- _, _ = cache.Get(key)
+ _, _ = clock.Get(key)
}
}
})
@@ -56,9 +56,9 @@ func BenchmarkHotKey(b *testing.B) {
maxWeight = 4096
)
- cache := New(maxWeight, benchWeight)
+ clock := New(maxWeight, benchWeight)
for k := range hot {
- cache.Add(k, k)
+ clock.Add(k, k)
}
b.ResetTimer()
@@ -66,7 +66,7 @@ func BenchmarkHotKey(b *testing.B) {
i := workerOffset()
for pb.Next() {
i++
- _, _ = cache.Get(i % hot)
+ _, _ = clock.Get(i % hot)
}
})
}
@@ -75,9 +75,9 @@ func BenchmarkMixed(b *testing.B) {
// Even split of Get and Add over a working set that fits.
const n = 100_000
- cache := New(n, benchWeight)
+ clock := New(n, benchWeight)
for k := range n {
- cache.Add(k, k)
+ clock.Add(k, k)
}
b.ResetTimer()
@@ -88,9 +88,9 @@ func BenchmarkMixed(b *testing.B) {
key := i % n
if i%2 == 0 {
- cache.Add(key, key)
+ clock.Add(key, key)
} else {
- _, _ = cache.Get(key)
+ _, _ = clock.Get(key)
}
}
})
@@ -98,11 +98,11 @@ func BenchmarkMixed(b *testing.B) {
func BenchmarkChurn(b *testing.B) {
// Every op inserts a fresh key into a small budget.
- // I don't think this is likely to happen for git delta caches,
+ // I don't think this is likely to happen for git delta clocks,
// but this may matter if we use this for other workloads later.
const maxWeight = 4096
- cache := New(maxWeight, benchWeight)
+ clock := New(maxWeight, benchWeight)
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
@@ -111,7 +111,7 @@ func BenchmarkChurn(b *testing.B) {
i := 0
for pb.Next() {
i++
- cache.Add(base+i, i)
+ clock.Add(base+i, i)
}
})
}
diff --git a/internal/cache/clock/cache.go b/internal/cache/clock/clock.go
index 31d97082..27d83c7b 100644
--- a/internal/cache/clock/cache.go
+++ b/internal/cache/clock/clock.go
@@ -17,14 +17,14 @@ const maxShards = 16
// WeightFunc reports one entry's weight, used for eviction budgeting.
type WeightFunc[K comparable, V any] func(key K, value V) uint64
-// Cache is a concurrent, weight-bounded cache
+// Clock is a concurrent, weight-bounded cache
// with CLOCK eviction.
//
// Reads are lock-free;
// writes lock only the shard that owns the key.
//
// Labels: MT-Safe.
-type Cache[K comparable, V any] struct {
+type Clock[K comparable, V any] struct {
shards []*shard[K, V]
seed maphash.Seed
mask uint64
@@ -35,7 +35,7 @@ type Cache[K comparable, V any] struct {
// weighing entries with weightFn.
//
// New panics if weightFn is nil.
-func New[K comparable, V any](maxWeight uint64, weightFn WeightFunc[K, V]) *Cache[K, V] {
+func New[K comparable, V any](maxWeight uint64, weightFn WeightFunc[K, V]) *Clock[K, V] {
if weightFn == nil {
panic("internal/clock: nil weight function")
}
@@ -48,7 +48,7 @@ func New[K comparable, V any](maxWeight uint64, weightFn WeightFunc[K, V]) *Cach
shards[i] = newShard[K, V](perShard)
}
- return &Cache[K, V]{
+ return &Clock[K, V]{
shards: shards,
seed: maphash.MakeSeed(),
mask: mask,
@@ -81,6 +81,6 @@ func shardLayout(maxWeight uint64) (int, uint64) {
}
// shardFor returns the shard that owns key.
-func (cache *Cache[K, V]) shardFor(key K) *shard[K, V] {
- return cache.shards[maphash.Comparable(cache.seed, key)&cache.mask]
+func (clock *Clock[K, V]) shardFor(key K) *shard[K, V] {
+ return clock.shards[maphash.Comparable(clock.seed, key)&clock.mask]
}
diff --git a/internal/cache/clock/cache_ops.go b/internal/cache/clock/clock_ops.go
index 18958202..a21f44c3 100644
--- a/internal/cache/clock/cache_ops.go
+++ b/internal/cache/clock/clock_ops.go
@@ -5,28 +5,28 @@ package clock
// It reports whether the entry was admitted;
// an entry heavier than the per-shard budget is rejected
// and leaves the cache unchanged.
-func (cache *Cache[K, V]) Add(key K, value V) bool {
- return cache.shardFor(key).add(key, value, cache.weightFn(key, value))
+func (clock *Clock[K, V]) Add(key K, value V) bool {
+ return clock.shardFor(key).add(key, value, clock.weightFn(key, value))
}
// Get returns the value for key and marks it recently used.
//
//nolint:ireturn
-func (cache *Cache[K, V]) Get(key K) (V, bool) {
- return cache.shardFor(key).get(key)
+func (clock *Clock[K, V]) Get(key K) (V, bool) {
+ return clock.shardFor(key).get(key)
}
// Peek returns the value for key without changing its recency.
//
//nolint:ireturn
-func (cache *Cache[K, V]) Peek(key K) (V, bool) {
- return cache.shardFor(key).peek(key)
+func (clock *Clock[K, V]) Peek(key K) (V, bool) {
+ return clock.shardFor(key).peek(key)
}
// Len returns the number of cached entries.
-func (cache *Cache[K, V]) Len() int {
+func (clock *Clock[K, V]) Len() int {
total := 0
- for _, shard := range cache.shards {
+ for _, shard := range clock.shards {
total += shard.len()
}
@@ -34,9 +34,9 @@ func (cache *Cache[K, V]) Len() int {
}
// Weight returns the current total weight across all shards.
-func (cache *Cache[K, V]) Weight() uint64 {
+func (clock *Clock[K, V]) Weight() uint64 {
var total uint64
- for _, shard := range cache.shards {
+ for _, shard := range clock.shards {
total += shard.loadWeight()
}
@@ -44,8 +44,8 @@ func (cache *Cache[K, V]) Weight() uint64 {
}
// Clear removes all entries.
-func (cache *Cache[K, V]) Clear() {
- for _, shard := range cache.shards {
+func (clock *Clock[K, V]) Clear() {
+ for _, shard := range clock.shards {
shard.clear()
}
}
diff --git a/internal/cache/clock/cache_test.go b/internal/cache/clock/clock_test.go
index 3d734b39..36622818 100644
--- a/internal/cache/clock/cache_test.go
+++ b/internal/cache/clock/clock_test.go
@@ -21,21 +21,21 @@ func byteWeight(_ string, value string) uint64 {
func TestCacheAddGetPeek(t *testing.T) {
t.Parallel()
- cache := clock.New(1<<20, byteWeight)
+ clock := clock.New(1<<20, byteWeight)
- if !cache.Add("a", "alpha") {
+ if !clock.Add("a", "alpha") {
t.Fatalf("Add(a) should succeed")
}
- if got, ok := cache.Get("a"); !ok || got != "alpha" {
+ if got, ok := clock.Get("a"); !ok || got != "alpha" {
t.Fatalf("Get(a) = (%q, %v), want (alpha, true)", got, ok)
}
- if got, ok := cache.Peek("a"); !ok || got != "alpha" {
+ if got, ok := clock.Peek("a"); !ok || got != "alpha" {
t.Fatalf("Peek(a) = (%q, %v), want (alpha, true)", got, ok)
}
- if _, ok := cache.Get("missing"); ok {
+ if _, ok := clock.Get("missing"); ok {
t.Fatalf("Get(missing) should miss")
}
}
@@ -45,14 +45,14 @@ func TestCacheWeightStaysBounded(t *testing.T) {
const maxWeight = 4096
- cache := clock.New(maxWeight, byteWeight)
+ clock := clock.New(maxWeight, byteWeight)
value := strings.Repeat("x", 64)
for i := range 1000 {
- cache.Add(fmt.Sprintf("key-%d", i), value)
+ clock.Add(fmt.Sprintf("key-%d", i), value)
}
- if got := cache.Weight(); got > maxWeight {
+ if got := clock.Weight(); got > maxWeight {
t.Fatalf("weight = %d, exceeds max %d", got, maxWeight)
}
}
@@ -60,23 +60,23 @@ func TestCacheWeightStaysBounded(t *testing.T) {
func TestCacheLenAndClear(t *testing.T) {
t.Parallel()
- cache := clock.New(1<<20, byteWeight)
+ clock := clock.New(1<<20, byteWeight)
for i := range 10 {
- cache.Add(fmt.Sprintf("key-%d", i), "v")
+ clock.Add(fmt.Sprintf("key-%d", i), "v")
}
- if got := cache.Len(); got != 10 {
+ if got := clock.Len(); got != 10 {
t.Fatalf("Len = %d, want 10", got)
}
- cache.Clear()
+ clock.Clear()
- if got := cache.Len(); got != 0 {
+ if got := clock.Len(); got != 0 {
t.Fatalf("Len after Clear = %d, want 0", got)
}
- if got := cache.Weight(); got != 0 {
+ if got := clock.Weight(); got != 0 {
t.Fatalf("Weight after Clear = %d, want 0", got)
}
}
@@ -84,17 +84,17 @@ func TestCacheLenAndClear(t *testing.T) {
func TestCacheRejectsOversized(t *testing.T) {
t.Parallel()
- cache := clock.New(4, byteWeight)
+ clock := clock.New(4, byteWeight)
- if cache.Add("a", "xxxxx") {
+ if clock.Add("a", "xxxxx") {
t.Fatalf("oversized Add should report false")
}
- if _, ok := cache.Get("a"); ok {
- t.Fatalf("oversized entry must not be cached")
+ if _, ok := clock.Get("a"); ok {
+ t.Fatalf("oversized entry must not be clockd")
}
- if got := cache.Weight(); got != 0 {
+ if got := clock.Weight(); got != 0 {
t.Fatalf("weight = %d, want 0", got)
}
}
diff --git a/internal/cache/clock/concurrent_test.go b/internal/cache/clock/concurrent_test.go
index 86283a9b..4b7da162 100644
--- a/internal/cache/clock/concurrent_test.go
+++ b/internal/cache/clock/concurrent_test.go
@@ -19,7 +19,7 @@ func TestConcurrentStress(t *testing.T) {
rounds = 5000
)
- cache := New(maxWeight, func(_ int, _ int) uint64 { return 1 })
+ clock := New(maxWeight, func(_ int, _ int) uint64 { return 1 })
var wg sync.WaitGroup
@@ -30,13 +30,13 @@ func TestConcurrentStress(t *testing.T) {
switch i % 4 {
case 0, 1:
- cache.Add(key, keyValue(key))
+ clock.Add(key, keyValue(key))
case 2:
- if got, ok := cache.Get(key); ok && got != keyValue(key) {
+ if got, ok := clock.Get(key); ok && got != keyValue(key) {
t.Errorf("Get(%d) = %d, want %d", key, got, keyValue(key))
}
case 3:
- if got, ok := cache.Peek(key); ok && got != keyValue(key) {
+ if got, ok := clock.Peek(key); ok && got != keyValue(key) {
t.Errorf("Peek(%d) = %d, want %d", key, got, keyValue(key))
}
}
@@ -46,9 +46,9 @@ func TestConcurrentStress(t *testing.T) {
wg.Wait()
- checkCache(t, cache)
+ checkCache(t, clock)
- if got := cache.Weight(); got > maxWeight {
+ if got := clock.Weight(); got > maxWeight {
t.Fatalf("weight %d exceeds max %d", got, maxWeight)
}
}
@@ -64,7 +64,7 @@ func TestReadDuringEviction(t *testing.T) {
rounds = 20000
)
- cache := New(maxWeight, func(_ int, _ int) uint64 { return 1 })
+ clock := New(maxWeight, func(_ int, _ int) uint64 { return 1 })
var wg sync.WaitGroup
@@ -72,7 +72,7 @@ func TestReadDuringEviction(t *testing.T) {
wg.Go(func() {
for i := range rounds {
key := i % hot
- cache.Add(key, keyValue(key))
+ clock.Add(key, keyValue(key))
}
})
}
@@ -82,11 +82,11 @@ func TestReadDuringEviction(t *testing.T) {
for i := range rounds {
key := i % hot
- if got, ok := cache.Get(key); ok && got != keyValue(key) {
+ if got, ok := clock.Get(key); ok && got != keyValue(key) {
t.Errorf("Get(%d) = %d, want %d", key, got, keyValue(key))
}
- if got, ok := cache.Peek(key); ok && got != keyValue(key) {
+ if got, ok := clock.Peek(key); ok && got != keyValue(key) {
t.Errorf("Peek(%d) = %d, want %d", key, got, keyValue(key))
}
}
@@ -95,9 +95,9 @@ func TestReadDuringEviction(t *testing.T) {
wg.Wait()
- checkCache(t, cache)
+ checkCache(t, clock)
- if got := cache.Weight(); got > maxWeight {
+ if got := clock.Weight(); got > maxWeight {
t.Fatalf("weight %d exceeds max %d", got, maxWeight)
}
}
diff --git a/internal/cache/clock/invariant_test.go b/internal/cache/clock/invariant_test.go
index 2efd7ff9..04896ca1 100644
--- a/internal/cache/clock/invariant_test.go
+++ b/internal/cache/clock/invariant_test.go
@@ -79,10 +79,10 @@ func checkShard[K comparable, V any](t *testing.T, shard *shard[K, V]) {
}
// checkCache verifies every shard's invariants.
-func checkCache[K comparable, V any](t *testing.T, cache *Cache[K, V]) {
+func checkCache[K comparable, V any](t *testing.T, clock *Clock[K, V]) {
t.Helper()
- for _, shard := range cache.shards {
+ for _, shard := range clock.shards {
checkShard(t, shard)
}
}
diff --git a/internal/compress/zlib/writer.go b/internal/compress/zlib/writer.go
index af2d8bf3..98053e71 100644
--- a/internal/compress/zlib/writer.go
+++ b/internal/compress/zlib/writer.go
@@ -149,7 +149,7 @@ func (z *Writer) Write(p []byte) (n int, err error) {
return 0, z.err
}
- return n, err //nolint:wrapcheck
+ return n, err
}
// Flush flushes the Writer to its underlying [io.Writer].
diff --git a/internal/compress/zlib/writer_header.go b/internal/compress/zlib/writer_header.go
index bd0fe4c3..e23e73d6 100644
--- a/internal/compress/zlib/writer_header.go
+++ b/internal/compress/zlib/writer_header.go
@@ -43,7 +43,7 @@ func (z *Writer) writeHeader() (err error) {
_, err = z.w.Write(z.scratch[0:2])
if err != nil {
- return err //nolint:wrapcheck
+ return err
}
if z.dict != nil {
@@ -52,7 +52,7 @@ func (z *Writer) writeHeader() (err error) {
_, err = z.w.Write(z.scratch[0:4])
if err != nil {
- return err //nolint:wrapcheck
+ return err
}
}
diff --git a/internal/format/packfile/delta/apply.go b/internal/format/packfile/delta/apply.go
new file mode 100644
index 00000000..d0918839
--- /dev/null
+++ b/internal/format/packfile/delta/apply.go
@@ -0,0 +1,117 @@
+package delta
+
+import (
+ "fmt"
+
+ "lindenii.org/go/lgo/intconv"
+)
+
+// Apply applies one inflated delta payload to base
+// and returns the reconstructed result.
+//
+// delta must include the leading base/result size header.
+func Apply(base, delta []byte) ([]byte, error) {
+ baseSize, resultSize, pos, err := ParseHeaderSizes(delta)
+ if err != nil {
+ return nil, err
+ }
+
+ if baseSize != uint64(len(base)) {
+ return nil, fmt.Errorf("%w: base size mismatch", ErrMalformedDelta)
+ }
+
+ outLen, err := intconv.Uint64ToInt(resultSize)
+ if err != nil {
+ return nil, fmt.Errorf("%w: result size overflows int", ErrMalformedDelta)
+ }
+
+ out := make([]byte, outLen)
+ outPos := 0
+
+ for pos < len(delta) {
+ op := delta[pos]
+ pos++
+
+ switch {
+ case op&0x80 != 0:
+ outPos, err = applyCopy(out, outPos, base, delta, &pos, op)
+ case op != 0:
+ outPos, err = applyInsert(out, outPos, delta, &pos, int(op))
+ default:
+ err = fmt.Errorf("%w: invalid opcode 0", ErrMalformedDelta)
+ }
+
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if outPos != len(out) {
+ return nil, fmt.Errorf("%w: result size mismatch", ErrMalformedDelta)
+ }
+
+ return out, nil
+}
+
+// applyCopy executes one copy instruction,
+// copying a base range into out,
+// and returns the new output position.
+func applyCopy(out []byte, outPos int, base, delta []byte, pos *int, op byte) (int, error) {
+ off, err := parseCopyOperand(delta, pos, op, 0, 4)
+ if err != nil {
+ return 0, err
+ }
+
+ n, err := parseCopyOperand(delta, pos, op, 4, 3)
+ if err != nil {
+ return 0, err
+ }
+
+ if n == 0 {
+ n = 0x10000
+ }
+
+ if off+n > len(base) || outPos+n > len(out) {
+ return 0, fmt.Errorf("%w: copy out of bounds", ErrMalformedDelta)
+ }
+
+ copy(out[outPos:outPos+n], base[off:off+n])
+
+ return outPos + n, nil
+}
+
+// applyInsert executes one insert instruction,
+// copying n literal delta bytes into out,
+// and returns the new output position.
+func applyInsert(out []byte, outPos int, delta []byte, pos *int, n int) (int, error) {
+ if *pos+n > len(delta) || outPos+n > len(out) {
+ return 0, fmt.Errorf("%w: insert out of bounds", ErrMalformedDelta)
+ }
+
+ copy(out[outPos:outPos+n], delta[*pos:*pos+n])
+ *pos += n
+
+ return outPos + n, nil
+}
+
+// parseCopyOperand assembles one little-endian copy instruction operand
+// from the operand bytes selected by op's flag bits
+// firstBit through firstBit+count-1.
+func parseCopyOperand(delta []byte, pos *int, op byte, firstBit uint, count int) (int, error) {
+ value := 0
+
+ for i := range count {
+ if op&(1<<(firstBit+uint(i))) == 0 {
+ continue
+ }
+
+ if *pos >= len(delta) {
+ return 0, fmt.Errorf("%w: truncated copy operand", ErrMalformedDelta)
+ }
+
+ value |= int(delta[*pos]) << (8 * i)
+ *pos++
+ }
+
+ return value, nil
+}
diff --git a/internal/format/packfile/delta/apply_test.go b/internal/format/packfile/delta/apply_test.go
new file mode 100644
index 00000000..bdf454d9
--- /dev/null
+++ b/internal/format/packfile/delta/apply_test.go
@@ -0,0 +1,174 @@
+package delta_test
+
+import (
+ "bytes"
+ "errors"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile/delta"
+)
+
+func TestApplyInsert(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, 0, 5)
+ data = append(data, 5, 'h', 'e', 'l', 'l', 'o')
+
+ out, err := delta.Apply(nil, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "hello" {
+ t.Fatalf("Apply = %q, want %q", out, "hello")
+ }
+}
+
+func TestApplyCopy(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("0123456789")
+
+ // One offset byte and one size byte.
+ data := delta.AppendHeaderSizes(nil, 10, 4)
+ data = append(data, 0x91, 3, 4)
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "3456" {
+ t.Fatalf("Apply = %q, want %q", out, "3456")
+ }
+}
+
+func TestApplyCopyImplicitSize(t *testing.T) {
+ t.Parallel()
+
+ base := bytes.Repeat([]byte{0xab}, 0x10000+10)
+
+ // Offset 0 and the implicit copy size 0x10000.
+ data := delta.AppendHeaderSizes(nil, uint64(len(base)), 0x10000)
+ data = append(data, 0x80)
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if !bytes.Equal(out, base[:0x10000]) {
+ t.Fatalf("Apply implicit-size copy mismatch")
+ }
+}
+
+func TestApplyMixed(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("the quick brown fox")
+
+ data := delta.AppendHeaderSizes(nil, uint64(len(base)), 9)
+ data = append(data, 0x91, 4, 5) // copy "quick"
+ data = append(data, 3, '!', '?', '!') // insert "!?!"
+ data = append(data, 0x91, 16, 1) // copy "f"
+
+ out, err := delta.Apply(base, data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if string(out) != "quick!?!f" {
+ t.Fatalf("Apply = %q, want %q", out, "quick!?!f")
+ }
+}
+
+func TestApplyEmptyResult(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, 3, 0)
+
+ out, err := delta.Apply([]byte("abc"), data)
+ if err != nil {
+ t.Fatalf("Apply: %v", err)
+ }
+
+ if len(out) != 0 {
+ t.Fatalf("Apply = %q, want empty", out)
+ }
+}
+
+func TestApplyMalformed(t *testing.T) {
+ t.Parallel()
+
+ base := []byte("0123456789")
+
+ cases := []struct {
+ name string
+ ops []byte
+ baseSize uint64
+ resultSize uint64
+ }{
+ {
+ name: "opcode zero",
+ ops: []byte{0x00},
+ baseSize: 10,
+ resultSize: 1,
+ },
+ {
+ name: "truncated copy operand",
+ ops: []byte{0x91, 3},
+ baseSize: 10,
+ resultSize: 4,
+ },
+ {
+ name: "copy past base",
+ ops: []byte{0x91, 8, 5},
+ baseSize: 10,
+ resultSize: 5,
+ },
+ {
+ name: "copy past result",
+ ops: []byte{0x91, 0, 5},
+ baseSize: 10,
+ resultSize: 3,
+ },
+ {
+ name: "insert past delta",
+ ops: []byte{5, 'a', 'b'},
+ baseSize: 10,
+ resultSize: 5,
+ },
+ {
+ name: "insert past result",
+ ops: []byte{5, 'a', 'b', 'c', 'd', 'e'},
+ baseSize: 10,
+ resultSize: 3,
+ },
+ {
+ name: "base size mismatch",
+ ops: []byte{0x91, 0, 1},
+ baseSize: 9,
+ resultSize: 1,
+ },
+ {
+ name: "result shorter than declared",
+ ops: []byte{0x91, 0, 1},
+ baseSize: 10,
+ resultSize: 2,
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize)
+ data = append(data, tc.ops...)
+
+ _, err := delta.Apply(base, data)
+ if !errors.Is(err, delta.ErrMalformedDelta) {
+ t.Fatalf("Apply error = %v, want ErrMalformedDelta", err)
+ }
+ })
+ }
+}
diff --git a/internal/format/packfile/delta/header.go b/internal/format/packfile/delta/header.go
new file mode 100644
index 00000000..d5dd93fb
--- /dev/null
+++ b/internal/format/packfile/delta/header.go
@@ -0,0 +1,98 @@
+package delta
+
+import (
+ "errors"
+ "fmt"
+)
+
+// ErrMalformedDelta reports that
+// a delta payload is truncated, overlong,
+// declares sizes that overflow uint64,
+// contains invalid instructions,
+// or does not match the supplied base or declared sizes.
+var ErrMalformedDelta = errors.New("internal/format/packfile/delta: malformed delta")
+
+// MaxSizeVarintLen is the maximum encoded length
+// of one delta header size varint.
+// Every uint64 size is encodable within this bound,
+// and parsing rejects longer encodings.
+const MaxSizeVarintLen = 10
+
+// MaxHeaderSizesLen is the maximum encoded length
+// of the base/result size header of a delta payload.
+//
+// Callers reading a delta from a stream may inflate
+// MaxHeaderSizesLen bytes
+// (or fewer if the payload ends sooner)
+// and parse with [ParseHeaderSizes];
+// no valid header is longer.
+const MaxHeaderSizesLen = 2 * MaxSizeVarintLen
+
+// ParseHeaderSizes parses the base size and result size varints
+// at the beginning of one inflated delta payload.
+func ParseHeaderSizes(data []byte) (baseSize, resultSize uint64, consumed int, err error) {
+ baseSize, consumed, err = parseSizeVarint(data, 0)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+
+ resultSize, consumed, err = parseSizeVarint(data, consumed)
+ if err != nil {
+ return 0, 0, 0, err
+ }
+
+ return baseSize, resultSize, consumed, nil
+}
+
+// AppendHeaderSizes appends the base/result size header encoding
+// of one delta payload to dst.
+func AppendHeaderSizes(dst []byte, baseSize, resultSize uint64) []byte {
+ dst = appendSizeVarint(dst, baseSize)
+
+ return appendSizeVarint(dst, resultSize)
+}
+
+// appendSizeVarint appends the encoding of one delta header size varint to dst.
+func appendSizeVarint(dst []byte, value uint64) []byte {
+ for value >= 0x80 {
+ dst = append(dst, byte(value)|0x80)
+ value >>= 7
+ }
+
+ return append(dst, byte(value))
+}
+
+// parseSizeVarint parses one delta header size varint
+// beginning at pos,
+// and returns the position past it.
+func parseSizeVarint(data []byte, pos int) (value uint64, consumed int, err error) {
+ start := pos
+
+ shift := uint(0)
+
+ for {
+ if pos-start >= MaxSizeVarintLen {
+ return 0, 0, fmt.Errorf("%w: overlong size varint", ErrMalformedDelta)
+ }
+
+ if pos >= len(data) {
+ return 0, 0, fmt.Errorf("%w: truncated size varint", ErrMalformedDelta)
+ }
+
+ b := data[pos]
+ pos++
+
+ group := uint64(b & 0x7f)
+ if group<<shift>>shift != group {
+ return 0, 0, fmt.Errorf("%w: size overflows uint64", ErrMalformedDelta)
+ }
+
+ value |= group << shift
+
+ if b&0x80 == 0 {
+ return value, pos, nil
+ }
+
+ shift += 7
+ }
+}
diff --git a/internal/format/packfile/delta/header_test.go b/internal/format/packfile/delta/header_test.go
new file mode 100644
index 00000000..8d97674f
--- /dev/null
+++ b/internal/format/packfile/delta/header_test.go
@@ -0,0 +1,88 @@
+package delta_test
+
+import (
+ "bytes"
+ "errors"
+ "math"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packfile/delta"
+)
+
+func TestParseHeaderSizesRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ baseSize uint64
+ resultSize uint64
+ }{
+ {name: "zero", baseSize: 0, resultSize: 0},
+ {name: "small", baseSize: 5, resultSize: 130},
+ {name: "boundaries", baseSize: 127, resultSize: 128},
+ {name: "large", baseSize: 1 << 32, resultSize: 1 << 57},
+ {name: "max", baseSize: math.MaxUint64, resultSize: math.MaxUint64},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ data := delta.AppendHeaderSizes(nil, tc.baseSize, tc.resultSize)
+
+ wantConsumed := len(data)
+
+ data = append(data, 0xde, 0xad)
+
+ baseSize, resultSize, consumed, err := delta.ParseHeaderSizes(data)
+ if err != nil {
+ t.Fatalf("ParseHeaderSizes: %v", err)
+ }
+
+ if baseSize != tc.baseSize {
+ t.Fatalf("ParseHeaderSizes base size = %d, want %d", baseSize, tc.baseSize)
+ }
+
+ if resultSize != tc.resultSize {
+ t.Fatalf("ParseHeaderSizes result size = %d, want %d", resultSize, tc.resultSize)
+ }
+
+ if consumed != wantConsumed {
+ t.Fatalf("ParseHeaderSizes consumed = %d, want %d", consumed, wantConsumed)
+ }
+ })
+ }
+}
+
+func TestParseHeaderSizesMalformed(t *testing.T) {
+ t.Parallel()
+
+ cases := []struct {
+ name string
+ data []byte
+ }{
+ {name: "empty", data: []byte{}},
+ {name: "truncated first varint", data: []byte{0x80}},
+ {name: "missing second varint", data: []byte{0x05}},
+ {name: "truncated second varint", data: []byte{0x05, 0x80}},
+ {
+ name: "overflow",
+ data: append(bytes.Repeat([]byte{0xff}, 9), 0x7f),
+ },
+ {
+ name: "overlong",
+ data: append(bytes.Repeat([]byte{0x80}, 10), 0x00),
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ _, _, _, err := delta.ParseHeaderSizes(tc.data)
+ if !errors.Is(err, delta.ErrMalformedDelta) {
+ t.Fatalf("ParseHeaderSizes error = %v, want ErrMalformedDelta", err)
+ }
+ })
+ }
+}
diff --git a/internal/format/packfile/entry_header.go b/internal/format/packfile/entry_header.go
index 1428600b..e5f2d62d 100644
--- a/internal/format/packfile/entry_header.go
+++ b/internal/format/packfile/entry_header.go
@@ -13,12 +13,6 @@ import (
// or declares a size that overflows uint64.
var ErrMalformedEntryHeader = errors.New("internal/format/packfile: malformed entry header")
-// ErrInvalidHashSize reports that
-// a supplied hash size is not a plausible object ID size.
-// This indicates a caller bug,
-// not malformed pack data.
-var ErrInvalidHashSize = errors.New("internal/format/packfile: invalid hash size")
-
// MaxTypeSizeLen is the maximum encoded length
// of the type/size prefix of an entry header.
// Every uint64 size is encodable within this bound,
@@ -73,7 +67,8 @@ type EntryHeader struct {
// from the beginning of data.
//
// hashSize must be the object ID size
-// of the pack's object format.
+// of the pack's object format;
+// ParseEntryHeader panics on implausible hash sizes.
//
// data need not contain the whole entry;
// [MaxEntryHeaderLen] bytes always suffice.
@@ -83,7 +78,7 @@ func ParseEntryHeader(data []byte, hashSize int) (EntryHeader, error) {
var zero EntryHeader
if hashSize <= 0 || hashSize > id.MaxObjectIDSize {
- return zero, fmt.Errorf("%w: %d", ErrInvalidHashSize, hashSize)
+ panic("internal/format/packfile: invalid hash size")
}
if len(data) == 0 {
diff --git a/internal/format/packfile/entry_header_test.go b/internal/format/packfile/entry_header_test.go
index 807c544e..79dc2740 100644
--- a/internal/format/packfile/entry_header_test.go
+++ b/internal/format/packfile/entry_header_test.go
@@ -130,10 +130,15 @@ func TestParseEntryHeaderBadHashSize(t *testing.T) {
t.Parallel()
for _, hashSize := range []int{-1, 0, id.MaxObjectIDSize + 1} {
- _, err := packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize)
- if err == nil {
- t.Fatalf("ParseEntryHeader hash size %d: expected error", hashSize)
- }
+ func() {
+ defer func() {
+ if recover() == nil {
+ t.Fatalf("ParseEntryHeader hash size %d: expected panic", hashSize)
+ }
+ }()
+
+ _, _ = packfile.ParseEntryHeader([]byte{0x95, 0x05}, hashSize)
+ }()
}
}
diff --git a/internal/format/packidx/doc.go b/internal/format/packidx/doc.go
new file mode 100644
index 00000000..5c6015b4
--- /dev/null
+++ b/internal/format/packidx/doc.go
@@ -0,0 +1,3 @@
+// Package packidx provides Git pack index (version 2) format
+// parsing and writing primitives.
+package packidx
diff --git a/internal/format/packidx/helpers_test.go b/internal/format/packidx/helpers_test.go
new file mode 100644
index 00000000..72bb4663
--- /dev/null
+++ b/internal/format/packidx/helpers_test.go
@@ -0,0 +1,95 @@
+package packidx_test
+
+import (
+ "fmt"
+ "os"
+ "slices"
+ "strconv"
+ "strings"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/testgit"
+ "lindenii.org/go/furgit/object/id"
+ "lindenii.org/go/furgit/object/typ"
+)
+
+// makeGitPack builds a repository with some blobs,
+// packs them with git pack-objects,
+// and returns the repository, the artifact path prefix,
+// and the packed object IDs.
+func makeGitPack(t *testing.T, objectFormat id.ObjectFormat) (*testgit.Repo, string, []id.ObjectID) {
+ t.Helper()
+
+ repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat})
+ if err != nil {
+ t.Fatalf("NewRepo: %v", err)
+ }
+
+ oids := make([]id.ObjectID, 0, 40)
+
+ for i := range 40 {
+ content := fmt.Sprintf("blob %d\n%s", i, strings.Repeat("filler\n", i))
+
+ oid, err := repo.HashObject(t, typ.Blob, strings.NewReader(content))
+ if err != nil {
+ t.Fatalf("HashObject: %v", err)
+ }
+
+ oids = append(oids, oid)
+ }
+
+ prefix, err := repo.PackObjects(t, slices.Values(oids), testgit.PackObjectsOptions{})
+ if err != nil {
+ t.Fatalf("PackObjects: %v", err)
+ }
+
+ return repo, prefix, oids
+}
+
+// parseGitIdxFile reads and parses one .idx file produced by git.
+func parseGitIdxFile(t *testing.T, prefix string, objectFormat id.ObjectFormat) ([]byte, packidx.Packidx) {
+ t.Helper()
+
+ data, err := os.ReadFile(prefix + ".idx") //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile: %v", err)
+ }
+
+ idx, err := packidx.Parse(data, objectFormat.Size())
+ if err != nil {
+ t.Fatalf("Parse: %v", err)
+ }
+
+ return data, idx
+}
+
+// gitPackOffsets extracts the object-to-offset mapping
+// from git verify-pack -v output.
+func gitPackOffsets(t *testing.T, repo *testgit.Repo, idxPath string, objectFormat id.ObjectFormat) map[string]uint64 {
+ t.Helper()
+
+ out, err := repo.VerifyPack(t, idxPath)
+ if err != nil {
+ t.Fatalf("VerifyPack: %v", err)
+ }
+
+ hexLen := objectFormat.HexLen()
+ offsets := make(map[string]uint64)
+
+ for line := range strings.Lines(string(out)) {
+ fields := strings.Fields(line)
+ if len(fields) < 5 || len(fields[0]) != hexLen {
+ continue
+ }
+
+ offset, err := strconv.ParseUint(fields[4], 10, 64)
+ if err != nil {
+ continue
+ }
+
+ offsets[fields[0]] = offset
+ }
+
+ return offsets
+}
diff --git a/internal/format/packidx/lookup.go b/internal/format/packidx/lookup.go
new file mode 100644
index 00000000..d1293f47
--- /dev/null
+++ b/internal/format/packidx/lookup.go
@@ -0,0 +1,73 @@
+package packidx
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+)
+
+// Lookup searches the index for one object ID
+// and returns its pack file offset.
+//
+// oid must be exactly the index's hash size;
+// Lookup panics otherwise.
+func (idx *Packidx) Lookup(oid []byte) (offset uint64, found bool, err error) {
+ if len(oid) != idx.hashSize {
+ panic("internal/format/packidx: invalid object ID length")
+ }
+
+ lo, hi := idx.fanoutRange(oid[0])
+
+ for lo < hi {
+ mid := lo + (hi-lo)/2
+
+ cmp := bytes.Compare(oid, idx.OIDAt(mid))
+
+ switch {
+ case cmp == 0:
+ offset, err = idx.OffsetAt(mid)
+ if err != nil {
+ return 0, false, err
+ }
+
+ return offset, true, nil
+ case cmp < 0:
+ hi = mid
+ default:
+ lo = mid + 1
+ }
+ }
+
+ return 0, false, nil
+}
+
+// OffsetAt returns the pack file offset at one index position.
+//
+// OffsetAt panics when pos is out of range.
+func (idx *Packidx) OffsetAt(pos int) (uint64, error) {
+ idx.checkPos(pos)
+
+ raw := binary.BigEndian.Uint32(idx.data[idx.off32Off+4*pos:])
+ if raw&largeOffsetFlag == 0 {
+ return uint64(raw), nil
+ }
+
+ slot := raw &^ largeOffsetFlag
+ if uint64(slot) >= idx.off64Count {
+ return 0, fmt.Errorf("%w: 64-bit offset reference out of range", ErrMalformedPackIndex)
+ }
+
+ return binary.BigEndian.Uint64(idx.data[idx.off64Off+8*int(slot):]), nil
+}
+
+// fanoutRange returns the index position range [lo, hi)
+// of object IDs beginning with first.
+func (idx *Packidx) fanoutRange(first byte) (lo, hi int) {
+ hi = int(binary.BigEndian.Uint32(idx.data[headerLen+4*int(first):]))
+
+ if first > 0 {
+ lo = int(binary.BigEndian.Uint32(idx.data[headerLen+4*(int(first)-1):]))
+ }
+
+ return lo, hi
+}
diff --git a/internal/format/packidx/lookup_test.go b/internal/format/packidx/lookup_test.go
new file mode 100644
index 00000000..514aad7a
--- /dev/null
+++ b/internal/format/packidx/lookup_test.go
@@ -0,0 +1,99 @@
+package packidx_test
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/object/id"
+)
+
+func TestLookupGitIndex(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ repo, prefix, oids := makeGitPack(t, objectFormat)
+ _, idx := parseGitIdxFile(t, prefix, objectFormat)
+
+ wantOffsets := gitPackOffsets(t, repo, prefix+".idx", objectFormat)
+ if len(wantOffsets) != len(oids) {
+ t.Fatalf("verify-pack offsets = %d entries, want %d", len(wantOffsets), len(oids))
+ }
+
+ for _, oid := range oids {
+ offset, found, err := idx.Lookup(oid.RawBytes())
+ if err != nil {
+ t.Fatalf("Lookup(%s): %v", oid, err)
+ }
+
+ if !found {
+ t.Fatalf("Lookup(%s) not found", oid)
+ }
+
+ if offset != wantOffsets[oid.String()] {
+ t.Fatalf("Lookup(%s) = %d, want %d", oid, offset, wantOffsets[oid.String()])
+ }
+ }
+ })
+ }
+}
+
+func TestLookupMissing(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ _, prefix, oids := makeGitPack(t, objectFormat)
+ _, idx := parseGitIdxFile(t, prefix, objectFormat)
+
+ missing := bytes.Clone(oids[0].RawBytes())
+ missing[len(missing)-1] ^= 0xff
+
+ _, found, err := idx.Lookup(missing)
+ if err != nil {
+ t.Fatalf("Lookup: %v", err)
+ }
+
+ if found {
+ t.Fatalf("Lookup of mutated oid unexpectedly found")
+ }
+ })
+ }
+}
+
+func TestOffsetAtMalformedLargeReference(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ hashSize := objectFormat.Size()
+
+ entries := syntheticEntries(3)
+ data := writeSyntheticIndex(t, objectFormat, entries)
+
+ // Mark the first 32-bit offset entry as a 64-bit reference;
+ // the index has no 64-bit offset table.
+ off32Off := 8 + 256*4 + len(entries)*hashSize + len(entries)*4
+ binary.BigEndian.PutUint32(data[off32Off:], 0x80000000)
+
+ idx, err := packidx.Parse(data, hashSize)
+ if err != nil {
+ t.Fatalf("Parse: %v", err)
+ }
+
+ _, err = idx.OffsetAt(0)
+ if !errors.Is(err, packidx.ErrMalformedPackIndex) {
+ t.Fatalf("OffsetAt error = %v, want ErrMalformedPackIndex", err)
+ }
+ })
+ }
+}
diff --git a/internal/format/packidx/packidx.go b/internal/format/packidx/packidx.go
new file mode 100644
index 00000000..ef2c93f4
--- /dev/null
+++ b/internal/format/packidx/packidx.go
@@ -0,0 +1,197 @@
+package packidx
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+
+ "lindenii.org/go/furgit/object/id"
+ "lindenii.org/go/lgo/intconv"
+)
+
+// ErrMalformedPackIndex reports that
+// a pack index is truncated,
+// has a bad signature or unsupported version,
+// or has inconsistent tables.
+var ErrMalformedPackIndex = errors.New("internal/format/packidx: malformed pack index")
+
+const (
+ signature = 0xff744f63
+ version = 2
+
+ headerLen = 8
+ fanoutLen = 256 * 4
+
+ // largeOffsetFlag marks one 32-bit offset table entry
+ // as an index into the 64-bit offset table.
+ largeOffsetFlag = 0x80000000
+)
+
+// Packidx is one parsed pack index view over borrowed bytes.
+//
+// Labels: Deps-Borrowed, Life-Parent, MT-Safe.
+type Packidx struct {
+ // data is the entire pack index payload.
+ data []byte
+ // hashSize is the object ID size of the index's object format.
+ hashSize int
+
+ // numObjects is the object count from the last fanout entry.
+ numObjects int
+
+ // namesOff, crcOff, off32Off, and off64Off are
+ // the byte offsets of the object ID, CRC32,
+ // 32-bit offset, and 64-bit offset tables.
+ namesOff int
+ crcOff int
+ off32Off int
+ off64Off int
+ // off64Count is the number of 64-bit offset table entries.
+ off64Count uint64
+}
+
+// Parse parses one pack index from data.
+//
+// hashSize must be the object ID size
+// of the pack's object format;
+// Parse panics on implausible hash sizes.
+func Parse(data []byte, hashSize int) (Packidx, error) {
+ var zero Packidx
+
+ if hashSize <= 0 || hashSize > id.MaxObjectIDSize {
+ panic("internal/format/packidx: invalid hash size")
+ }
+
+ if len(data) < headerLen+fanoutLen+2*hashSize {
+ return zero, fmt.Errorf("%w: truncated", ErrMalformedPackIndex)
+ }
+
+ if binary.BigEndian.Uint32(data) != signature {
+ return zero, fmt.Errorf("%w: bad signature", ErrMalformedPackIndex)
+ }
+
+ if binary.BigEndian.Uint32(data[4:]) != version {
+ return zero, fmt.Errorf("%w: unsupported version", ErrMalformedPackIndex)
+ }
+
+ prev := uint32(0)
+
+ for i := range 256 {
+ count := binary.BigEndian.Uint32(data[headerLen+4*i:])
+ if count < prev {
+ return zero, fmt.Errorf("%w: non-monotonic fanout", ErrMalformedPackIndex)
+ }
+
+ prev = count
+ }
+
+ numObjects := uint64(prev)
+ hashSize64 := uint64(hashSize)
+
+ namesOff := uint64(headerLen + fanoutLen)
+ crcOff := namesOff + numObjects*hashSize64
+ off32Off := crcOff + 4*numObjects
+ off64Off := off32Off + 4*numObjects
+
+ minTotal := off64Off + 2*hashSize64
+
+ dataLen, err := intconv.IntToUint64(len(data))
+ if err != nil {
+ return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err)
+ }
+
+ if dataLen < minTotal {
+ return zero, fmt.Errorf("%w: tables exceed index size", ErrMalformedPackIndex)
+ }
+
+ off64Bytes := dataLen - minTotal
+ if off64Bytes%8 != 0 {
+ return zero, fmt.Errorf("%w: trailing table size not a 64-bit offset multiple", ErrMalformedPackIndex)
+ }
+
+ off64Count := off64Bytes / 8
+ if off64Count > numObjects {
+ return zero, fmt.Errorf("%w: more 64-bit offsets than objects", ErrMalformedPackIndex)
+ }
+
+ idx := Packidx{
+ data: data,
+ hashSize: hashSize,
+ off64Count: off64Count,
+ }
+
+ idx.numObjects, err = intconv.Uint64ToInt(numObjects)
+ if err != nil {
+ return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err)
+ }
+
+ idx.namesOff, err = intconv.Uint64ToInt(namesOff)
+ if err != nil {
+ return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err)
+ }
+
+ idx.crcOff, err = intconv.Uint64ToInt(crcOff)
+ if err != nil {
+ return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err)
+ }
+
+ idx.off32Off, err = intconv.Uint64ToInt(off32Off)
+ if err != nil {
+ return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err)
+ }
+
+ idx.off64Off, err = intconv.Uint64ToInt(off64Off)
+ if err != nil {
+ return zero, fmt.Errorf("%w: %w", ErrMalformedPackIndex, err)
+ }
+
+ return idx, nil
+}
+
+// NumObjects returns the number of objects in the index.
+func (idx *Packidx) NumObjects() int {
+ return idx.numObjects
+}
+
+// PackHash returns the pack hash recorded in the index trailer.
+//
+// Labels: Life-Parent, Mut-No.
+func (idx *Packidx) PackHash() []byte {
+ return idx.data[len(idx.data)-2*idx.hashSize : len(idx.data)-idx.hashSize]
+}
+
+// OIDAt returns the object ID bytes at one index position.
+// Positions follow object ID sort order.
+//
+// OIDAt panics when pos is out of range.
+//
+// Labels: Life-Parent, Mut-No.
+func (idx *Packidx) OIDAt(pos int) []byte {
+ idx.checkPos(pos)
+
+ start := idx.namesOff + pos*idx.hashSize
+
+ return idx.data[start : start+idx.hashSize]
+}
+
+// CRCAt returns the CRC32 of the packed entry data
+// at one index position.
+//
+// CRCAt panics when pos is out of range.
+func (idx *Packidx) CRCAt(pos int) uint32 {
+ idx.checkPos(pos)
+
+ return binary.BigEndian.Uint32(idx.data[idx.crcOff+4*pos:])
+}
+
+// checkPos panics when pos is not a valid index position.
+//
+// An out-of-range position is a caller bug
+// that slice bounds checking would not catch,
+// since the tables share one data slice;
+// an unchecked access would silently read other tables' bytes.
+func (idx *Packidx) checkPos(pos int) {
+ if pos < 0 || pos >= idx.numObjects {
+ panic("internal/format/packidx: index position out of range")
+ }
+}
diff --git a/internal/format/packidx/packidx_test.go b/internal/format/packidx/packidx_test.go
new file mode 100644
index 00000000..d225a993
--- /dev/null
+++ b/internal/format/packidx/packidx_test.go
@@ -0,0 +1,124 @@
+package packidx_test
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "os"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/object/id"
+)
+
+func TestParseGitIndex(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ _, prefix, oids := makeGitPack(t, objectFormat)
+ _, idx := parseGitIdxFile(t, prefix, objectFormat)
+
+ if idx.NumObjects() != len(oids) {
+ t.Fatalf("NumObjects = %d, want %d", idx.NumObjects(), len(oids))
+ }
+
+ for pos := 1; pos < idx.NumObjects(); pos++ {
+ if bytes.Compare(idx.OIDAt(pos-1), idx.OIDAt(pos)) >= 0 {
+ t.Fatalf("OIDAt(%d) not sorted after predecessor", pos)
+ }
+ }
+
+ packData, err := os.ReadFile(prefix + ".pack") //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile: %v", err)
+ }
+
+ packTrailer := packData[len(packData)-objectFormat.Size():]
+ if !bytes.Equal(idx.PackHash(), packTrailer) {
+ t.Fatalf("PackHash does not match pack trailer")
+ }
+ })
+ }
+}
+
+func TestParseMalformed(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ hashSize := objectFormat.Size()
+
+ valid := writeSyntheticIndex(t, objectFormat, syntheticEntries(8))
+
+ corrupt := func(mutate func(data []byte) []byte) []byte {
+ return mutate(bytes.Clone(valid))
+ }
+
+ cases := []struct {
+ name string
+ data []byte
+ }{
+ {name: "empty", data: []byte{}},
+ {name: "truncated", data: corrupt(func(d []byte) []byte { return d[:20] })},
+ {
+ name: "bad signature",
+ data: corrupt(func(d []byte) []byte {
+ d[0] ^= 0xff
+
+ return d
+ }),
+ },
+ {
+ name: "bad version",
+ data: corrupt(func(d []byte) []byte {
+ d[7] = 3
+
+ return d
+ }),
+ },
+ {
+ name: "non-monotonic fanout",
+ data: corrupt(func(d []byte) []byte {
+ binary.BigEndian.PutUint32(d[8:], 0xffffffff)
+
+ return d
+ }),
+ },
+ {
+ name: "tables exceed index size",
+ data: corrupt(func(d []byte) []byte {
+ binary.BigEndian.PutUint32(d[8+255*4:], 0x00ffffff)
+
+ return d
+ }),
+ },
+ {
+ name: "trailing size not 64-bit multiple",
+ data: corrupt(func(d []byte) []byte { return append(d, 0xde, 0xad, 0xbe, 0xef) }),
+ },
+ {
+ name: "more 64-bit offsets than objects",
+ data: corrupt(func(d []byte) []byte {
+ return append(d, bytes.Repeat([]byte{0x00}, 9*8)...)
+ }),
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ _, err := packidx.Parse(tc.data, hashSize)
+ if !errors.Is(err, packidx.ErrMalformedPackIndex) {
+ t.Fatalf("Parse error = %v, want ErrMalformedPackIndex", err)
+ }
+ })
+ }
+ })
+ }
+}
diff --git a/internal/format/packidx/roundtrip_test.go b/internal/format/packidx/roundtrip_test.go
new file mode 100644
index 00000000..f5bd82e7
--- /dev/null
+++ b/internal/format/packidx/roundtrip_test.go
@@ -0,0 +1,88 @@
+package packidx_test
+
+import (
+ "bytes"
+ "math"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/object/id"
+)
+
+func TestWriteRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ hashSize := objectFormat.Size()
+
+ entries := syntheticEntries(20)
+ entries[3].Offset = 1 << 33
+ entries[11].Offset = math.MaxUint64
+ entries[12].Offset = 0x7fffffff
+ entries[13].Offset = 0x80000000
+
+ data := writeSyntheticIndex(t, objectFormat, entries)
+
+ idx, err := packidx.Parse(data, hashSize)
+ if err != nil {
+ t.Fatalf("Parse: %v", err)
+ }
+
+ if idx.NumObjects() != len(entries) {
+ t.Fatalf("NumObjects = %d, want %d", idx.NumObjects(), len(entries))
+ }
+
+ if !bytes.Equal(idx.PackHash(), bytes.Repeat([]byte{0x5a}, hashSize)) {
+ t.Fatalf("PackHash mismatch")
+ }
+
+ for pos, entry := range entries {
+ if !bytes.Equal(idx.OIDAt(pos), entry.OID[:hashSize]) {
+ t.Fatalf("OIDAt(%d) mismatch", pos)
+ }
+
+ if idx.CRCAt(pos) != entry.CRC32 {
+ t.Fatalf("CRCAt(%d) = %x, want %x", pos, idx.CRCAt(pos), entry.CRC32)
+ }
+
+ offset, err := idx.OffsetAt(pos)
+ if err != nil {
+ t.Fatalf("OffsetAt(%d): %v", pos, err)
+ }
+
+ if offset != entry.Offset {
+ t.Fatalf("OffsetAt(%d) = %d, want %d", pos, offset, entry.Offset)
+ }
+
+ offset, found, err := idx.Lookup(entry.OID[:hashSize])
+ if err != nil {
+ t.Fatalf("Lookup(%d): %v", pos, err)
+ }
+
+ if !found || offset != entry.Offset {
+ t.Fatalf("Lookup(%d) = %d %v, want %d found", pos, offset, found, entry.Offset)
+ }
+ }
+ })
+ }
+}
+
+// writeSyntheticIndex writes one index over entries
+// with a fixed fake pack hash.
+func writeSyntheticIndex(t *testing.T, objectFormat id.ObjectFormat, entries []packidx.Entry) []byte {
+ t.Helper()
+
+ packHash := bytes.Repeat([]byte{0x5a}, objectFormat.Size())
+
+ var buf bytes.Buffer
+
+ err := packidx.Write(&buf, objectFormat, entries, packHash)
+ if err != nil {
+ t.Fatalf("Write: %v", err)
+ }
+
+ return buf.Bytes()
+}
diff --git a/internal/format/packidx/write.go b/internal/format/packidx/write.go
new file mode 100644
index 00000000..2bcdb51a
--- /dev/null
+++ b/internal/format/packidx/write.go
@@ -0,0 +1,158 @@
+package packidx
+
+import (
+ "bufio"
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+ "math"
+
+ "lindenii.org/go/furgit/object/id"
+)
+
+// ErrInvalidEntries reports that
+// entries supplied for an index write
+// are unsorted, duplicated, or not representable
+// in the pack index format.
+var ErrInvalidEntries = errors.New("internal/format/packidx: invalid entries")
+
+// Entry is one object record for an index write.
+type Entry struct {
+ // OID holds the object ID bytes;
+ // only the first hash-size bytes are meaningful.
+ OID [id.MaxObjectIDSize]byte
+ // Offset is the entry's pack file offset.
+ Offset uint64
+ // CRC32 is the CRC32 of the entry's packed data.
+ CRC32 uint32
+}
+
+// Write writes one pack index over entries to w.
+//
+// entries must be sorted by object ID without duplicates.
+// packHash must be the pack's trailer hash;
+// Write panics when its length does not match the object format.
+func Write(w io.Writer, objectFormat id.ObjectFormat, entries []Entry, packHash []byte) error {
+ hashSize := objectFormat.Size()
+ if hashSize == 0 {
+ return id.ErrInvalidObjectFormat
+ }
+
+ if len(packHash) != hashSize {
+ panic("internal/format/packidx: invalid pack hash length")
+ }
+
+ if len(entries) > math.MaxUint32 {
+ return fmt.Errorf("%w: too many entries", ErrInvalidEntries)
+ }
+
+ for i := 1; i < len(entries); i++ {
+ if bytes.Compare(entries[i-1].OID[:hashSize], entries[i].OID[:hashSize]) >= 0 {
+ return fmt.Errorf("%w: not sorted by object ID", ErrInvalidEntries)
+ }
+ }
+
+ hashImpl, err := objectFormat.New()
+ if err != nil {
+ return fmt.Errorf("internal/format/packidx: %w", err)
+ }
+
+ bw := bufio.NewWriter(io.MultiWriter(w, hashImpl))
+ sw := &stickyWriter{w: bw}
+
+ sw.writeUint32(signature)
+ sw.writeUint32(version)
+
+ var counts [256]uint32
+ for i := range entries {
+ counts[entries[i].OID[0]]++
+ }
+
+ cumulative := uint32(0)
+ for _, count := range counts {
+ cumulative += count
+ sw.writeUint32(cumulative)
+ }
+
+ for i := range entries {
+ sw.write(entries[i].OID[:hashSize])
+ }
+
+ for i := range entries {
+ sw.writeUint32(entries[i].CRC32)
+ }
+
+ var largeOffsets []uint64
+
+ for i := range entries {
+ offset := entries[i].Offset
+ if offset < largeOffsetFlag {
+ sw.writeUint32(uint32(offset))
+
+ continue
+ }
+
+ slot := len(largeOffsets)
+ if slot >= largeOffsetFlag {
+ return fmt.Errorf("%w: too many large offsets", ErrInvalidEntries)
+ }
+
+ sw.writeUint32(largeOffsetFlag | uint32(slot))
+
+ largeOffsets = append(largeOffsets, offset)
+ }
+
+ for _, offset := range largeOffsets {
+ sw.writeUint64(offset)
+ }
+
+ sw.write(packHash)
+
+ if sw.err != nil {
+ return fmt.Errorf("internal/format/packidx: %w", sw.err)
+ }
+
+ err = bw.Flush()
+ if err != nil {
+ return fmt.Errorf("internal/format/packidx: %w", err)
+ }
+
+ _, err = w.Write(hashImpl.Sum(nil))
+ if err != nil {
+ return fmt.Errorf("internal/format/packidx: %w", err)
+ }
+
+ return nil
+}
+
+// stickyWriter forwards writes to w
+// and retains the first error,
+// turning subsequent writes into no-ops.
+type stickyWriter struct {
+ w io.Writer
+ err error
+}
+
+func (sw *stickyWriter) write(p []byte) {
+ if sw.err != nil {
+ return
+ }
+
+ _, sw.err = sw.w.Write(p)
+}
+
+func (sw *stickyWriter) writeUint32(v uint32) {
+ var buf [4]byte
+
+ binary.BigEndian.PutUint32(buf[:], v)
+ sw.write(buf[:])
+}
+
+func (sw *stickyWriter) writeUint64(v uint64) {
+ var buf [8]byte
+
+ binary.BigEndian.PutUint64(buf[:], v)
+ sw.write(buf[:])
+}
diff --git a/internal/format/packidx/write_test.go b/internal/format/packidx/write_test.go
new file mode 100644
index 00000000..68df3ece
--- /dev/null
+++ b/internal/format/packidx/write_test.go
@@ -0,0 +1,112 @@
+package packidx_test
+
+import (
+ "bytes"
+ "errors"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/object/id"
+)
+
+// syntheticEntries builds n distinct entries sorted by object ID,
+// spread over the fanout table.
+func syntheticEntries(n int) []packidx.Entry {
+ entries := make([]packidx.Entry, n)
+ for i := range entries {
+ entries[i].OID[0] = byte(i * 7)
+ entries[i].OID[1] = byte(i + 1)
+ entries[i].Offset = uint64(i+1) * 100
+ entries[i].CRC32 = uint32(i+1) * 0x01010101
+ }
+
+ return entries
+}
+
+func TestWriteMatchesGit(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ _, prefix, _ := makeGitPack(t, objectFormat)
+ gitData, idx := parseGitIdxFile(t, prefix, objectFormat)
+
+ entries := make([]packidx.Entry, idx.NumObjects())
+ for pos := range entries {
+ copy(entries[pos].OID[:], idx.OIDAt(pos))
+ entries[pos].CRC32 = idx.CRCAt(pos)
+
+ offset, err := idx.OffsetAt(pos)
+ if err != nil {
+ t.Fatalf("OffsetAt(%d): %v", pos, err)
+ }
+
+ entries[pos].Offset = offset
+ }
+
+ var buf bytes.Buffer
+
+ err := packidx.Write(&buf, objectFormat, entries, idx.PackHash())
+ if err != nil {
+ t.Fatalf("Write: %v", err)
+ }
+
+ if !bytes.Equal(buf.Bytes(), gitData) {
+ t.Fatalf("Write output differs from git's index (%d vs %d bytes)", buf.Len(), len(gitData))
+ }
+ })
+ }
+}
+
+func TestWriteInvalidEntries(t *testing.T) {
+ t.Parallel()
+
+ unsorted := syntheticEntries(3)
+ unsorted[0], unsorted[2] = unsorted[2], unsorted[0]
+
+ duplicated := syntheticEntries(3)
+ duplicated[1] = duplicated[0]
+
+ cases := []struct {
+ name string
+ entries []packidx.Entry
+ }{
+ {name: "unsorted", entries: unsorted},
+ {name: "duplicated", entries: duplicated},
+ }
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ packHash := bytes.Repeat([]byte{0x5a}, objectFormat.Size())
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ err := packidx.Write(&bytes.Buffer{}, objectFormat, tc.entries, packHash)
+ if !errors.Is(err, packidx.ErrInvalidEntries) {
+ t.Fatalf("Write error = %v, want ErrInvalidEntries", err)
+ }
+ })
+ }
+ })
+ }
+}
+
+func TestWriteBadPackHashPanics(t *testing.T) {
+ t.Parallel()
+
+ objectFormat := id.ObjectFormatSHA256
+
+ defer func() {
+ if recover() == nil {
+ t.Fatalf("Write with short pack hash: expected panic")
+ }
+ }()
+
+ _ = packidx.Write(&bytes.Buffer{}, objectFormat, nil, []byte{0x01})
+}
diff --git a/internal/iolimit/expect_length_reader.go b/internal/iolimit/expect_length_reader.go
index 1efe2b75..72b0d912 100644
--- a/internal/iolimit/expect_length_reader.go
+++ b/internal/iolimit/expect_length_reader.go
@@ -53,7 +53,7 @@ func (reader *expectLengthReader) Read(dst []byte) (int, error) {
return 0, nil
}
- return 0, err //nolint:wrapcheck
+ return 0, err
}
if uint64(len(dst)) > reader.remaining {
@@ -87,5 +87,5 @@ func (reader *expectLengthReader) Read(dst []byte) (int, error) {
return 0, io.EOF
}
- return n, err //nolint:wrapcheck
+ return n, err
}
diff --git a/internal/testgit/packobjects.go b/internal/testgit/packobjects.go
new file mode 100644
index 00000000..9f56eef0
--- /dev/null
+++ b/internal/testgit/packobjects.go
@@ -0,0 +1,47 @@
+package testgit
+
+import (
+ "bytes"
+ "iter"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "lindenii.org/go/furgit/object/id"
+)
+
+// PackObjectsOptions controls one pack-objects invocation.
+type PackObjectsOptions struct {
+ // RevIndex requests writing a .rev reverse index alongside the pack.
+ RevIndex bool
+}
+
+// PackObjects packs the supplied objects with git pack-objects
+// into a temporary directory,
+// and returns the artifact path prefix "<dir>/pack-<hash>",
+// to which ".pack", ".idx", and ".rev" suffixes apply.
+func (repo *Repo) PackObjects(tb testing.TB, oids iter.Seq[id.ObjectID], opts PackObjectsOptions) (string, error) {
+ tb.Helper()
+
+ dir := tb.TempDir()
+
+ var stdin bytes.Buffer
+ for oid := range oids {
+ stdin.WriteString(oid.String())
+ stdin.WriteByte('\n')
+ }
+
+ revIndex := "false"
+ if opts.RevIndex {
+ revIndex = "true"
+ }
+
+ out, err := repo.run(tb, &stdin,
+ "git", "-c", "pack.writeReverseIndex="+revIndex,
+ "pack-objects", "--end-of-options", filepath.Join(dir, "pack"))
+ if err != nil {
+ return "", err
+ }
+
+ return filepath.Join(dir, "pack-"+strings.TrimSpace(string(out))), nil
+}
diff --git a/internal/testgit/verifypack.go b/internal/testgit/verifypack.go
new file mode 100644
index 00000000..ec59572a
--- /dev/null
+++ b/internal/testgit/verifypack.go
@@ -0,0 +1,13 @@
+package testgit
+
+import (
+ "testing"
+)
+
+// VerifyPack runs git verify-pack -v on one pack index path
+// and returns its raw verbose output.
+func (repo *Repo) VerifyPack(tb testing.TB, idxPath string) ([]byte, error) {
+ tb.Helper()
+
+ return repo.run(tb, nil, "git", "verify-pack", "-v", "--end-of-options", idxPath)
+}