aboutsummaryrefslogtreecommitdiff
path: root/internal/format/packrev
diff options
context:
space:
mode:
Diffstat (limited to 'internal/format/packrev')
-rw-r--r--internal/format/packrev/doc.go3
-rw-r--r--internal/format/packrev/helpers_test.go75
-rw-r--r--internal/format/packrev/packrev.go124
-rw-r--r--internal/format/packrev/packrev_test.go160
-rw-r--r--internal/format/packrev/write.go79
-rw-r--r--internal/format/packrev/write_test.go135
6 files changed, 576 insertions, 0 deletions
diff --git a/internal/format/packrev/doc.go b/internal/format/packrev/doc.go
new file mode 100644
index 00000000..6ce8113e
--- /dev/null
+++ b/internal/format/packrev/doc.go
@@ -0,0 +1,3 @@
+// Package packrev provides Git pack reverse index (version 1) format
+// parsing and writing primitives.
+package packrev
diff --git a/internal/format/packrev/helpers_test.go b/internal/format/packrev/helpers_test.go
new file mode 100644
index 00000000..2d781669
--- /dev/null
+++ b/internal/format/packrev/helpers_test.go
@@ -0,0 +1,75 @@
+package packrev_test
+
+import (
+ "cmp"
+ "slices"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/testgit"
+ "lindenii.org/go/furgit/object/id"
+ "lindenii.org/go/lgo/intconv"
+)
+
+// makeGitPack seeds a repository,
+// packs the seeded objects with git pack-objects
+// including a reverse index,
+// and returns the artifact path prefix.
+func makeGitPack(t *testing.T, objectFormat id.ObjectFormat) string {
+ t.Helper()
+
+ repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat})
+ if err != nil {
+ t.Fatalf("NewRepo: %v", err)
+ }
+
+ seeded, err := repo.SeedHistory(t)
+ if err != nil {
+ t.Fatalf("SeedHistory: %v", err)
+ }
+
+ prefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{RevIndex: true})
+ if err != nil {
+ t.Fatalf("PackObjects: %v", err)
+ }
+
+ return prefix
+}
+
+// packOrderPositions derives the pack-offset-order index positions
+// from one parsed pack index.
+func packOrderPositions(t *testing.T, idx *packidx.Packidx) []uint32 {
+ t.Helper()
+
+ type pair struct {
+ offset uint64
+ position uint32
+ }
+
+ pairs := make([]pair, 0, idx.NumObjects())
+
+ for pos := range idx.NumObjects() {
+ offset, err := idx.OffsetAt(pos)
+ if err != nil {
+ t.Fatalf("OffsetAt(%d): %v", pos, err)
+ }
+
+ position, err := intconv.IntToUint32(pos)
+ if err != nil {
+ t.Fatalf("IntToUint32(%d): %v", pos, err)
+ }
+
+ pairs = append(pairs, pair{offset: offset, position: position})
+ }
+
+ slices.SortFunc(pairs, func(a, b pair) int {
+ return cmp.Compare(a.offset, b.offset)
+ })
+
+ positions := make([]uint32, 0, len(pairs))
+ for _, p := range pairs {
+ positions = append(positions, p.position)
+ }
+
+ return positions
+}
diff --git a/internal/format/packrev/packrev.go b/internal/format/packrev/packrev.go
new file mode 100644
index 00000000..3a6dc2de
--- /dev/null
+++ b/internal/format/packrev/packrev.go
@@ -0,0 +1,124 @@
+package packrev
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+
+ "lindenii.org/go/furgit/object/id"
+ "lindenii.org/go/lgo/intconv"
+)
+
+// ErrMalformedReverseIndex reports that
+// a pack reverse index is truncated,
+// has a bad signature, version, or hash function,
+// or contains invalid index positions.
+var ErrMalformedReverseIndex = errors.New("internal/format/packrev: malformed pack reverse index")
+
+const (
+ signature = 0x52494458 // "RIDX"
+ version = 1
+
+ headerLen = 12
+)
+
+// hashFunctionID returns the on-disk hash function identifier
+// for one object format.
+func hashFunctionID(objectFormat id.ObjectFormat) (uint32, error) {
+ switch objectFormat {
+ case id.ObjectFormatSHA1:
+ return 1, nil
+ case id.ObjectFormatSHA256:
+ return 2, nil
+ case id.ObjectFormatUnknown:
+ }
+
+ return 0, id.ErrInvalidObjectFormat
+}
+
+// Packrev is a parsed pack reverse index view over borrowed bytes.
+//
+// Labels: Deps-Borrowed, Life-Parent, MT-Safe.
+type Packrev struct {
+ // data is the entire pack reverse index payload.
+ data []byte
+ // hashSize is the object ID size of the object format.
+ hashSize int
+ // numObjects is the number of index position entries.
+ numObjects int
+}
+
+// Parse parses a pack reverse index from data.
+func Parse(data []byte, objectFormat id.ObjectFormat) (Packrev, error) {
+ var zero Packrev
+
+ wantHashID, err := hashFunctionID(objectFormat)
+ if err != nil {
+ return zero, err
+ }
+
+ hashSize := objectFormat.Size()
+
+ if len(data) < headerLen+2*hashSize {
+ return zero, fmt.Errorf("%w: truncated", ErrMalformedReverseIndex)
+ }
+
+ if binary.BigEndian.Uint32(data) != signature {
+ return zero, fmt.Errorf("%w: bad signature", ErrMalformedReverseIndex)
+ }
+
+ if binary.BigEndian.Uint32(data[4:]) != version {
+ return zero, fmt.Errorf("%w: unsupported version", ErrMalformedReverseIndex)
+ }
+
+ if binary.BigEndian.Uint32(data[8:]) != wantHashID {
+ return zero, fmt.Errorf("%w: hash function mismatch", ErrMalformedReverseIndex)
+ }
+
+ positionBytes := len(data) - headerLen - 2*hashSize
+ if positionBytes%4 != 0 {
+ return zero, fmt.Errorf("%w: position table size not a 32-bit multiple", ErrMalformedReverseIndex)
+ }
+
+ return Packrev{
+ data: data,
+ hashSize: hashSize,
+ numObjects: positionBytes / 4,
+ }, nil
+}
+
+// NumObjects returns the number of index position entries.
+func (rev *Packrev) NumObjects() int {
+ return rev.numObjects
+}
+
+// PackHash returns the pack hash recorded in the trailer.
+//
+// Labels: Life-Parent, Mut-No.
+func (rev *Packrev) PackHash() []byte {
+ return rev.data[len(rev.data)-2*rev.hashSize : len(rev.data)-rev.hashSize]
+}
+
+// PositionAt returns the pack index position
+// of the object at a pack offset order position.
+//
+// PositionAt panics when packOrder is out of range,
+// and errors when the stored position is not a valid index position.
+func (rev *Packrev) PositionAt(packOrder int) (int, error) {
+ if packOrder < 0 || packOrder >= rev.numObjects {
+ panic("internal/format/packrev: pack order position out of range")
+ }
+
+ stored := binary.BigEndian.Uint32(rev.data[headerLen+4*packOrder:])
+
+ position, err := intconv.Uint32ToInt(stored)
+ if err != nil {
+ return 0, fmt.Errorf("%w: %w", ErrMalformedReverseIndex, err)
+ }
+
+ if position >= rev.numObjects {
+ return 0, fmt.Errorf("%w: index position out of range", ErrMalformedReverseIndex)
+ }
+
+ return position, nil
+}
diff --git a/internal/format/packrev/packrev_test.go b/internal/format/packrev/packrev_test.go
new file mode 100644
index 00000000..b644e15e
--- /dev/null
+++ b/internal/format/packrev/packrev_test.go
@@ -0,0 +1,160 @@
+package packrev_test
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "os"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packrev"
+ "lindenii.org/go/furgit/object/id"
+)
+
+func TestParseGitReverseIndex(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ prefix := makeGitPack(t, objectFormat)
+
+ revData, err := os.ReadFile(prefix + ".rev") //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile: %v", err)
+ }
+
+ rev, err := packrev.Parse(revData, objectFormat)
+ if err != nil {
+ t.Fatalf("Parse: %v", err)
+ }
+
+ idxData, err := os.ReadFile(prefix + ".idx") //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile: %v", err)
+ }
+
+ idx, err := packidx.Parse(idxData, objectFormat.Size())
+ if err != nil {
+ t.Fatalf("packidx.Parse: %v", err)
+ }
+
+ if rev.NumObjects() != idx.NumObjects() {
+ t.Fatalf("NumObjects = %d, want %d", rev.NumObjects(), idx.NumObjects())
+ }
+
+ packData, err := os.ReadFile(prefix + ".pack") //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile: %v", err)
+ }
+
+ packTrailer := packData[len(packData)-objectFormat.Size():]
+ if !bytes.Equal(rev.PackHash(), packTrailer) {
+ t.Fatalf("PackHash does not match pack trailer")
+ }
+
+ want := packOrderPositions(t, &idx)
+
+ for packOrder, wantPosition := range want {
+ position, err := rev.PositionAt(packOrder)
+ if err != nil {
+ t.Fatalf("PositionAt(%d): %v", packOrder, err)
+ }
+
+ if position != int(wantPosition) {
+ t.Fatalf("PositionAt(%d) = %d, want %d", packOrder, position, wantPosition)
+ }
+ }
+ })
+ }
+}
+
+func TestParseMalformed(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ valid := writeSyntheticRev(t, objectFormat, []uint32{2, 0, 1, 3})
+
+ corrupt := func(mutate func(data []byte) []byte) []byte {
+ return mutate(bytes.Clone(valid))
+ }
+
+ cases := []struct {
+ name string
+ data []byte
+ }{
+ {name: "empty", data: []byte{}},
+ {name: "truncated", data: corrupt(func(d []byte) []byte { return d[:10] })},
+ {
+ name: "bad signature",
+ data: corrupt(func(d []byte) []byte {
+ d[0] ^= 0xff
+
+ return d
+ }),
+ },
+ {
+ name: "bad version",
+ data: corrupt(func(d []byte) []byte {
+ d[7] = 2
+
+ return d
+ }),
+ },
+ {
+ name: "hash function mismatch",
+ data: corrupt(func(d []byte) []byte {
+ d[11] ^= 0xff
+
+ return d
+ }),
+ },
+ {
+ name: "position table size not 32-bit multiple",
+ data: corrupt(func(d []byte) []byte { return append(d, 0xde, 0xad) }),
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ t.Parallel()
+
+ _, err := packrev.Parse(tc.data, objectFormat)
+ if !errors.Is(err, packrev.ErrMalformedReverseIndex) {
+ t.Fatalf("Parse error = %v, want ErrMalformedReverseIndex", err)
+ }
+ })
+ }
+ })
+ }
+}
+
+func TestPositionAtMalformed(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ data := writeSyntheticRev(t, objectFormat, []uint32{2, 0, 1, 3})
+
+ // Corrupt the first stored position to one past the object count.
+ binary.BigEndian.PutUint32(data[12:], 4)
+
+ rev, err := packrev.Parse(data, objectFormat)
+ if err != nil {
+ t.Fatalf("Parse: %v", err)
+ }
+
+ _, err = rev.PositionAt(0)
+ if !errors.Is(err, packrev.ErrMalformedReverseIndex) {
+ t.Fatalf("PositionAt error = %v, want ErrMalformedReverseIndex", err)
+ }
+ })
+ }
+}
diff --git a/internal/format/packrev/write.go b/internal/format/packrev/write.go
new file mode 100644
index 00000000..399c9157
--- /dev/null
+++ b/internal/format/packrev/write.go
@@ -0,0 +1,79 @@
+package packrev
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "io"
+ "math"
+
+ "lindenii.org/go/furgit/internal/stickyio"
+ "lindenii.org/go/furgit/object/id"
+)
+
+// ErrInvalidPositions reports that
+// positions supplied for a reverse index write
+// are out of range or too numerous.
+var ErrInvalidPositions = errors.New("internal/format/packrev: invalid positions")
+
+// Write writes one pack reverse index to w.
+//
+// positions holds, for each object in pack offset order,
+// the object's pack index position.
+// packHash must be the pack's trailer hash;
+// Write panics when its length does not match the object format.
+func Write(w io.Writer, objectFormat id.ObjectFormat, positions []uint32, packHash []byte) error {
+ hashID, err := hashFunctionID(objectFormat)
+ if err != nil {
+ return err
+ }
+
+ if len(packHash) != objectFormat.Size() {
+ panic("internal/format/packrev: invalid pack hash length")
+ }
+
+ if len(positions) > math.MaxUint32 {
+ return fmt.Errorf("%w: too many positions", ErrInvalidPositions)
+ }
+
+ for _, position := range positions {
+ if uint64(position) >= uint64(len(positions)) {
+ return fmt.Errorf("%w: index position out of range", ErrInvalidPositions)
+ }
+ }
+
+ hashImpl, err := objectFormat.New()
+ if err != nil {
+ return fmt.Errorf("internal/format/packrev: %w", err)
+ }
+
+ bw := bufio.NewWriter(io.MultiWriter(w, hashImpl))
+ sw := stickyio.New(bw)
+
+ sw.PutUint32(signature)
+ sw.PutUint32(version)
+ sw.PutUint32(hashID)
+
+ for _, position := range positions {
+ sw.PutUint32(position)
+ }
+
+ sw.Put(packHash)
+
+ err = sw.Err()
+ if err != nil {
+ return fmt.Errorf("internal/format/packrev: %w", err)
+ }
+
+ err = bw.Flush()
+ if err != nil {
+ return fmt.Errorf("internal/format/packrev: %w", err)
+ }
+
+ _, err = w.Write(hashImpl.Sum(nil))
+ if err != nil {
+ return fmt.Errorf("internal/format/packrev: %w", err)
+ }
+
+ return nil
+}
diff --git a/internal/format/packrev/write_test.go b/internal/format/packrev/write_test.go
new file mode 100644
index 00000000..b5c1fcb9
--- /dev/null
+++ b/internal/format/packrev/write_test.go
@@ -0,0 +1,135 @@
+package packrev_test
+
+import (
+ "bytes"
+ "errors"
+ "os"
+ "testing"
+
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packrev"
+ "lindenii.org/go/furgit/object/id"
+)
+
+// writeSyntheticRev writes one reverse index over positions
+// with a fixed fake pack hash.
+func writeSyntheticRev(t *testing.T, objectFormat id.ObjectFormat, positions []uint32) []byte {
+ t.Helper()
+
+ packHash := bytes.Repeat([]byte{0x5a}, objectFormat.Size())
+
+ var buf bytes.Buffer
+
+ err := packrev.Write(&buf, objectFormat, positions, packHash)
+ if err != nil {
+ t.Fatalf("Write: %v", err)
+ }
+
+ return buf.Bytes()
+}
+
+func TestWriteRoundTrip(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ positions := []uint32{8, 6, 7, 5, 3, 0, 4, 1, 2}
+ data := writeSyntheticRev(t, objectFormat, positions)
+
+ rev, err := packrev.Parse(data, objectFormat)
+ if err != nil {
+ t.Fatalf("Parse: %v", err)
+ }
+
+ if rev.NumObjects() != len(positions) {
+ t.Fatalf("NumObjects = %d, want %d", rev.NumObjects(), len(positions))
+ }
+
+ if !bytes.Equal(rev.PackHash(), bytes.Repeat([]byte{0x5a}, objectFormat.Size())) {
+ t.Fatalf("PackHash mismatch")
+ }
+
+ for packOrder, want := range positions {
+ position, err := rev.PositionAt(packOrder)
+ if err != nil {
+ t.Fatalf("PositionAt(%d): %v", packOrder, err)
+ }
+
+ if position != int(want) {
+ t.Fatalf("PositionAt(%d) = %d, want %d", packOrder, position, want)
+ }
+ }
+ })
+ }
+}
+
+func TestWriteMatchesGit(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ prefix := makeGitPack(t, objectFormat)
+
+ gitData, err := os.ReadFile(prefix + ".rev") //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile: %v", err)
+ }
+
+ idxData, err := os.ReadFile(prefix + ".idx") //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile: %v", err)
+ }
+
+ idx, err := packidx.Parse(idxData, objectFormat.Size())
+ if err != nil {
+ t.Fatalf("packidx.Parse: %v", err)
+ }
+
+ positions := packOrderPositions(t, &idx)
+
+ var buf bytes.Buffer
+
+ err = packrev.Write(&buf, objectFormat, positions, idx.PackHash())
+ if err != nil {
+ t.Fatalf("Write: %v", err)
+ }
+
+ if !bytes.Equal(buf.Bytes(), gitData) {
+ t.Fatalf("Write output differs from git's reverse index (%d vs %d bytes)", buf.Len(), len(gitData))
+ }
+ })
+ }
+}
+
+func TestWriteInvalidPositions(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ packHash := bytes.Repeat([]byte{0x5a}, objectFormat.Size())
+
+ err := packrev.Write(&bytes.Buffer{}, objectFormat, []uint32{0, 5}, packHash)
+ if !errors.Is(err, packrev.ErrInvalidPositions) {
+ t.Fatalf("Write error = %v, want ErrInvalidPositions", err)
+ }
+ })
+ }
+}
+
+func TestWriteBadPackHashPanics(t *testing.T) {
+ t.Parallel()
+
+ defer func() {
+ if recover() == nil {
+ t.Fatalf("Write with short pack hash: expected panic")
+ }
+ }()
+
+ _ = packrev.Write(&bytes.Buffer{}, id.ObjectFormatSHA256, nil, []byte{0x01})
+}