diff options
Diffstat (limited to 'internal/format/packrev')
| -rw-r--r-- | internal/format/packrev/doc.go | 3 | ||||
| -rw-r--r-- | internal/format/packrev/helpers_test.go | 75 | ||||
| -rw-r--r-- | internal/format/packrev/packrev.go | 124 | ||||
| -rw-r--r-- | internal/format/packrev/packrev_test.go | 160 | ||||
| -rw-r--r-- | internal/format/packrev/write.go | 79 | ||||
| -rw-r--r-- | internal/format/packrev/write_test.go | 135 |
6 files changed, 576 insertions, 0 deletions
diff --git a/internal/format/packrev/doc.go b/internal/format/packrev/doc.go new file mode 100644 index 00000000..6ce8113e --- /dev/null +++ b/internal/format/packrev/doc.go @@ -0,0 +1,3 @@ +// Package packrev provides Git pack reverse index (version 1) format +// parsing and writing primitives. +package packrev diff --git a/internal/format/packrev/helpers_test.go b/internal/format/packrev/helpers_test.go new file mode 100644 index 00000000..2d781669 --- /dev/null +++ b/internal/format/packrev/helpers_test.go @@ -0,0 +1,75 @@ +package packrev_test + +import ( + "cmp" + "slices" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/testgit" + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/lgo/intconv" +) + +// makeGitPack seeds a repository, +// packs the seeded objects with git pack-objects +// including a reverse index, +// and returns the artifact path prefix. +func makeGitPack(t *testing.T, objectFormat id.ObjectFormat) string { + t.Helper() + + repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat}) + if err != nil { + t.Fatalf("NewRepo: %v", err) + } + + seeded, err := repo.SeedHistory(t) + if err != nil { + t.Fatalf("SeedHistory: %v", err) + } + + prefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{RevIndex: true}) + if err != nil { + t.Fatalf("PackObjects: %v", err) + } + + return prefix +} + +// packOrderPositions derives the pack-offset-order index positions +// from one parsed pack index. +func packOrderPositions(t *testing.T, idx *packidx.Packidx) []uint32 { + t.Helper() + + type pair struct { + offset uint64 + position uint32 + } + + pairs := make([]pair, 0, idx.NumObjects()) + + for pos := range idx.NumObjects() { + offset, err := idx.OffsetAt(pos) + if err != nil { + t.Fatalf("OffsetAt(%d): %v", pos, err) + } + + position, err := intconv.IntToUint32(pos) + if err != nil { + t.Fatalf("IntToUint32(%d): %v", pos, err) + } + + pairs = append(pairs, pair{offset: offset, position: position}) + } + + slices.SortFunc(pairs, func(a, b pair) int { + return cmp.Compare(a.offset, b.offset) + }) + + positions := make([]uint32, 0, len(pairs)) + for _, p := range pairs { + positions = append(positions, p.position) + } + + return positions +} diff --git a/internal/format/packrev/packrev.go b/internal/format/packrev/packrev.go new file mode 100644 index 00000000..3a6dc2de --- /dev/null +++ b/internal/format/packrev/packrev.go @@ -0,0 +1,124 @@ +package packrev + +import ( + "encoding/binary" + "errors" + "fmt" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/lgo/intconv" +) + +// ErrMalformedReverseIndex reports that +// a pack reverse index is truncated, +// has a bad signature, version, or hash function, +// or contains invalid index positions. +var ErrMalformedReverseIndex = errors.New("internal/format/packrev: malformed pack reverse index") + +const ( + signature = 0x52494458 // "RIDX" + version = 1 + + headerLen = 12 +) + +// hashFunctionID returns the on-disk hash function identifier +// for one object format. +func hashFunctionID(objectFormat id.ObjectFormat) (uint32, error) { + switch objectFormat { + case id.ObjectFormatSHA1: + return 1, nil + case id.ObjectFormatSHA256: + return 2, nil + case id.ObjectFormatUnknown: + } + + return 0, id.ErrInvalidObjectFormat +} + +// Packrev is a parsed pack reverse index view over borrowed bytes. +// +// Labels: Deps-Borrowed, Life-Parent, MT-Safe. +type Packrev struct { + // data is the entire pack reverse index payload. + data []byte + // hashSize is the object ID size of the object format. + hashSize int + // numObjects is the number of index position entries. + numObjects int +} + +// Parse parses a pack reverse index from data. +func Parse(data []byte, objectFormat id.ObjectFormat) (Packrev, error) { + var zero Packrev + + wantHashID, err := hashFunctionID(objectFormat) + if err != nil { + return zero, err + } + + hashSize := objectFormat.Size() + + if len(data) < headerLen+2*hashSize { + return zero, fmt.Errorf("%w: truncated", ErrMalformedReverseIndex) + } + + if binary.BigEndian.Uint32(data) != signature { + return zero, fmt.Errorf("%w: bad signature", ErrMalformedReverseIndex) + } + + if binary.BigEndian.Uint32(data[4:]) != version { + return zero, fmt.Errorf("%w: unsupported version", ErrMalformedReverseIndex) + } + + if binary.BigEndian.Uint32(data[8:]) != wantHashID { + return zero, fmt.Errorf("%w: hash function mismatch", ErrMalformedReverseIndex) + } + + positionBytes := len(data) - headerLen - 2*hashSize + if positionBytes%4 != 0 { + return zero, fmt.Errorf("%w: position table size not a 32-bit multiple", ErrMalformedReverseIndex) + } + + return Packrev{ + data: data, + hashSize: hashSize, + numObjects: positionBytes / 4, + }, nil +} + +// NumObjects returns the number of index position entries. +func (rev *Packrev) NumObjects() int { + return rev.numObjects +} + +// PackHash returns the pack hash recorded in the trailer. +// +// Labels: Life-Parent, Mut-No. +func (rev *Packrev) PackHash() []byte { + return rev.data[len(rev.data)-2*rev.hashSize : len(rev.data)-rev.hashSize] +} + +// PositionAt returns the pack index position +// of the object at a pack offset order position. +// +// PositionAt panics when packOrder is out of range, +// and errors when the stored position is not a valid index position. +func (rev *Packrev) PositionAt(packOrder int) (int, error) { + if packOrder < 0 || packOrder >= rev.numObjects { + panic("internal/format/packrev: pack order position out of range") + } + + stored := binary.BigEndian.Uint32(rev.data[headerLen+4*packOrder:]) + + position, err := intconv.Uint32ToInt(stored) + if err != nil { + return 0, fmt.Errorf("%w: %w", ErrMalformedReverseIndex, err) + } + + if position >= rev.numObjects { + return 0, fmt.Errorf("%w: index position out of range", ErrMalformedReverseIndex) + } + + return position, nil +} diff --git a/internal/format/packrev/packrev_test.go b/internal/format/packrev/packrev_test.go new file mode 100644 index 00000000..b644e15e --- /dev/null +++ b/internal/format/packrev/packrev_test.go @@ -0,0 +1,160 @@ +package packrev_test + +import ( + "bytes" + "encoding/binary" + "errors" + "os" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packrev" + "lindenii.org/go/furgit/object/id" +) + +func TestParseGitReverseIndex(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + prefix := makeGitPack(t, objectFormat) + + revData, err := os.ReadFile(prefix + ".rev") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + rev, err := packrev.Parse(revData, objectFormat) + if err != nil { + t.Fatalf("Parse: %v", err) + } + + idxData, err := os.ReadFile(prefix + ".idx") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + idx, err := packidx.Parse(idxData, objectFormat.Size()) + if err != nil { + t.Fatalf("packidx.Parse: %v", err) + } + + if rev.NumObjects() != idx.NumObjects() { + t.Fatalf("NumObjects = %d, want %d", rev.NumObjects(), idx.NumObjects()) + } + + packData, err := os.ReadFile(prefix + ".pack") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + packTrailer := packData[len(packData)-objectFormat.Size():] + if !bytes.Equal(rev.PackHash(), packTrailer) { + t.Fatalf("PackHash does not match pack trailer") + } + + want := packOrderPositions(t, &idx) + + for packOrder, wantPosition := range want { + position, err := rev.PositionAt(packOrder) + if err != nil { + t.Fatalf("PositionAt(%d): %v", packOrder, err) + } + + if position != int(wantPosition) { + t.Fatalf("PositionAt(%d) = %d, want %d", packOrder, position, wantPosition) + } + } + }) + } +} + +func TestParseMalformed(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + valid := writeSyntheticRev(t, objectFormat, []uint32{2, 0, 1, 3}) + + corrupt := func(mutate func(data []byte) []byte) []byte { + return mutate(bytes.Clone(valid)) + } + + cases := []struct { + name string + data []byte + }{ + {name: "empty", data: []byte{}}, + {name: "truncated", data: corrupt(func(d []byte) []byte { return d[:10] })}, + { + name: "bad signature", + data: corrupt(func(d []byte) []byte { + d[0] ^= 0xff + + return d + }), + }, + { + name: "bad version", + data: corrupt(func(d []byte) []byte { + d[7] = 2 + + return d + }), + }, + { + name: "hash function mismatch", + data: corrupt(func(d []byte) []byte { + d[11] ^= 0xff + + return d + }), + }, + { + name: "position table size not 32-bit multiple", + data: corrupt(func(d []byte) []byte { return append(d, 0xde, 0xad) }), + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + _, err := packrev.Parse(tc.data, objectFormat) + if !errors.Is(err, packrev.ErrMalformedReverseIndex) { + t.Fatalf("Parse error = %v, want ErrMalformedReverseIndex", err) + } + }) + } + }) + } +} + +func TestPositionAtMalformed(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + data := writeSyntheticRev(t, objectFormat, []uint32{2, 0, 1, 3}) + + // Corrupt the first stored position to one past the object count. + binary.BigEndian.PutUint32(data[12:], 4) + + rev, err := packrev.Parse(data, objectFormat) + if err != nil { + t.Fatalf("Parse: %v", err) + } + + _, err = rev.PositionAt(0) + if !errors.Is(err, packrev.ErrMalformedReverseIndex) { + t.Fatalf("PositionAt error = %v, want ErrMalformedReverseIndex", err) + } + }) + } +} diff --git a/internal/format/packrev/write.go b/internal/format/packrev/write.go new file mode 100644 index 00000000..399c9157 --- /dev/null +++ b/internal/format/packrev/write.go @@ -0,0 +1,79 @@ +package packrev + +import ( + "bufio" + "errors" + "fmt" + "io" + "math" + + "lindenii.org/go/furgit/internal/stickyio" + "lindenii.org/go/furgit/object/id" +) + +// ErrInvalidPositions reports that +// positions supplied for a reverse index write +// are out of range or too numerous. +var ErrInvalidPositions = errors.New("internal/format/packrev: invalid positions") + +// Write writes one pack reverse index to w. +// +// positions holds, for each object in pack offset order, +// the object's pack index position. +// packHash must be the pack's trailer hash; +// Write panics when its length does not match the object format. +func Write(w io.Writer, objectFormat id.ObjectFormat, positions []uint32, packHash []byte) error { + hashID, err := hashFunctionID(objectFormat) + if err != nil { + return err + } + + if len(packHash) != objectFormat.Size() { + panic("internal/format/packrev: invalid pack hash length") + } + + if len(positions) > math.MaxUint32 { + return fmt.Errorf("%w: too many positions", ErrInvalidPositions) + } + + for _, position := range positions { + if uint64(position) >= uint64(len(positions)) { + return fmt.Errorf("%w: index position out of range", ErrInvalidPositions) + } + } + + hashImpl, err := objectFormat.New() + if err != nil { + return fmt.Errorf("internal/format/packrev: %w", err) + } + + bw := bufio.NewWriter(io.MultiWriter(w, hashImpl)) + sw := stickyio.New(bw) + + sw.PutUint32(signature) + sw.PutUint32(version) + sw.PutUint32(hashID) + + for _, position := range positions { + sw.PutUint32(position) + } + + sw.Put(packHash) + + err = sw.Err() + if err != nil { + return fmt.Errorf("internal/format/packrev: %w", err) + } + + err = bw.Flush() + if err != nil { + return fmt.Errorf("internal/format/packrev: %w", err) + } + + _, err = w.Write(hashImpl.Sum(nil)) + if err != nil { + return fmt.Errorf("internal/format/packrev: %w", err) + } + + return nil +} diff --git a/internal/format/packrev/write_test.go b/internal/format/packrev/write_test.go new file mode 100644 index 00000000..b5c1fcb9 --- /dev/null +++ b/internal/format/packrev/write_test.go @@ -0,0 +1,135 @@ +package packrev_test + +import ( + "bytes" + "errors" + "os" + "testing" + + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packrev" + "lindenii.org/go/furgit/object/id" +) + +// writeSyntheticRev writes one reverse index over positions +// with a fixed fake pack hash. +func writeSyntheticRev(t *testing.T, objectFormat id.ObjectFormat, positions []uint32) []byte { + t.Helper() + + packHash := bytes.Repeat([]byte{0x5a}, objectFormat.Size()) + + var buf bytes.Buffer + + err := packrev.Write(&buf, objectFormat, positions, packHash) + if err != nil { + t.Fatalf("Write: %v", err) + } + + return buf.Bytes() +} + +func TestWriteRoundTrip(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + positions := []uint32{8, 6, 7, 5, 3, 0, 4, 1, 2} + data := writeSyntheticRev(t, objectFormat, positions) + + rev, err := packrev.Parse(data, objectFormat) + if err != nil { + t.Fatalf("Parse: %v", err) + } + + if rev.NumObjects() != len(positions) { + t.Fatalf("NumObjects = %d, want %d", rev.NumObjects(), len(positions)) + } + + if !bytes.Equal(rev.PackHash(), bytes.Repeat([]byte{0x5a}, objectFormat.Size())) { + t.Fatalf("PackHash mismatch") + } + + for packOrder, want := range positions { + position, err := rev.PositionAt(packOrder) + if err != nil { + t.Fatalf("PositionAt(%d): %v", packOrder, err) + } + + if position != int(want) { + t.Fatalf("PositionAt(%d) = %d, want %d", packOrder, position, want) + } + } + }) + } +} + +func TestWriteMatchesGit(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + prefix := makeGitPack(t, objectFormat) + + gitData, err := os.ReadFile(prefix + ".rev") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + idxData, err := os.ReadFile(prefix + ".idx") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + idx, err := packidx.Parse(idxData, objectFormat.Size()) + if err != nil { + t.Fatalf("packidx.Parse: %v", err) + } + + positions := packOrderPositions(t, &idx) + + var buf bytes.Buffer + + err = packrev.Write(&buf, objectFormat, positions, idx.PackHash()) + if err != nil { + t.Fatalf("Write: %v", err) + } + + if !bytes.Equal(buf.Bytes(), gitData) { + t.Fatalf("Write output differs from git's reverse index (%d vs %d bytes)", buf.Len(), len(gitData)) + } + }) + } +} + +func TestWriteInvalidPositions(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + packHash := bytes.Repeat([]byte{0x5a}, objectFormat.Size()) + + err := packrev.Write(&bytes.Buffer{}, objectFormat, []uint32{0, 5}, packHash) + if !errors.Is(err, packrev.ErrInvalidPositions) { + t.Fatalf("Write error = %v, want ErrInvalidPositions", err) + } + }) + } +} + +func TestWriteBadPackHashPanics(t *testing.T) { + t.Parallel() + + defer func() { + if recover() == nil { + t.Fatalf("Write with short pack hash: expected panic") + } + }() + + _ = packrev.Write(&bytes.Buffer{}, id.ObjectFormatSHA256, nil, []byte{0x01}) +} |
