diff options
| author | 2026-06-12 12:43:30 +0000 | |
|---|---|---|
| committer | 2026-06-12 12:43:30 +0000 | |
| commit | ed1d706ee1cf3e3ac34bce8b295525bebff8bcb9 (patch) | |
| tree | 721ac907565df0ad91d766d4c6b16b2eb029a55a /object | |
| parent | object/store/loose: Use SeedHistory (diff) | |
object/store/packed: Basic reading functionality
Diffstat (limited to 'object')
| -rw-r--r-- | object/store/packed/basecache.go | 38 | ||||
| -rw-r--r-- | object/store/packed/delta.go | 224 | ||||
| -rw-r--r-- | object/store/packed/entry.go | 74 | ||||
| -rw-r--r-- | object/store/packed/helpers_test.go | 83 | ||||
| -rw-r--r-- | object/store/packed/lookup.go | 41 | ||||
| -rw-r--r-- | object/store/packed/lookup_test.go | 35 | ||||
| -rw-r--r-- | object/store/packed/pack.go | 143 | ||||
| -rw-r--r-- | object/store/packed/packed.go | 125 | ||||
| -rw-r--r-- | object/store/packed/read_test.go | 140 | ||||
| -rw-r--r-- | object/store/packed/reader.go | 228 | ||||
| -rw-r--r-- | object/store/packed/refresh.go | 69 | ||||
| -rw-r--r-- | object/store/packed/refresh_test.go | 107 |
12 files changed, 1253 insertions, 54 deletions
diff --git a/object/store/packed/basecache.go b/object/store/packed/basecache.go new file mode 100644 index 00000000..7de4ec7b --- /dev/null +++ b/object/store/packed/basecache.go @@ -0,0 +1,38 @@ +package packed + +import ( + "lindenii.org/go/furgit/internal/cache/clock" + "lindenii.org/go/furgit/internal/format/packfile" +) + +// baseCacheMaxWeight bounds the delta base cache weight. +const baseCacheMaxWeight = 96 << 20 + +// baseKey addresses an entry in one pack +// as a delta base cache key. +type baseKey struct { + pack *pack + offset uint64 +} + +// cachedBase is a cached delta base, i.e., +// its resolved object entry type and full content. +// +// content is shared with concurrent users; +// it may only be returned to callers as a copy. +// +// Labels: Mut-No. +type cachedBase struct { + entryType packfile.EntryType + content []byte +} + +// newBaseCache creates the delta base cache. +func newBaseCache() *clock.Clock[baseKey, cachedBase] { + return clock.New(baseCacheMaxWeight, baseWeight) +} + +// baseWeight weighs one cached delta base. +func baseWeight(_ baseKey, base cachedBase) uint64 { + return uint64(len(base.content)) + 32 +} diff --git a/object/store/packed/delta.go b/object/store/packed/delta.go new file mode 100644 index 00000000..c52bd10a --- /dev/null +++ b/object/store/packed/delta.go @@ -0,0 +1,224 @@ +package packed + +import ( + "bytes" + "fmt" + "io" + "slices" + + "lindenii.org/go/furgit/errs" + "lindenii.org/go/furgit/internal/compress/zlib" + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/internal/format/packfile/delta" +) + +// maxDeltaDepth bounds delta chain length, +// guaranteeing termination on crafted ref-delta loops. +const maxDeltaDepth = 1 << 12 + +// deltaNode is a delta entry on a resolution chain. +type deltaNode struct { + // payload is the entry's compressed delta payload view. + payload []byte + + // size is the entry's declared inflated delta size. + size uint64 + + // baseOffset is the entry's base entry offset. + baseOffset uint64 +} + +// unpackEntry reconstructs the object stored at offset in p, +// following ref- and ofs-delta chains within the pack. +// +// Labels: Life-Independent. +func (packed *Packed) unpackEntry(p *pack, offset uint64) (packfile.EntryType, []byte, error) { + var zero packfile.EntryType + + var ( + chain []deltaNode + baseType packfile.EntryType + base []byte + fromCache bool + ) + + // Drill down to the innermost base, + // stopping early at any cached base. + cur := offset + + for { + if cached, ok := packed.baseCache.Get(baseKey{pack: p, offset: cur}); ok { + baseType = cached.entryType + base = cached.content + fromCache = true + + break + } + + if len(chain) >= maxDeltaDepth { + return zero, nil, fmt.Errorf("%w: pack %q: delta chain too deep", ErrMalformedPackedStore, p.name) + } + + header, payload, err := p.entryHeaderAt(cur, packed.objectFormat) + if err != nil { + return zero, nil, err + } + + if header.Type.IsBase() { + base, err = inflate(payload, header.Size) + if err != nil { + return zero, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) + } + + baseType = header.Type + + break + } + + baseOffset, err := packed.deltaBaseOffset(p, cur, header) + if err != nil { + return zero, nil, err + } + + chain = append(chain, deltaNode{ + payload: payload, + size: header.Size, + baseOffset: baseOffset, + }) + + cur = baseOffset + } + + // A direct cache hit with no deltas to apply must be copied. + if len(chain) == 0 && fromCache { + return baseType, bytes.Clone(base), nil + } + + // Apply deltas back up the chain, caching each consumed base. + for i, node := range slices.Backward(chain) { + deltaData, err := inflate(node.payload, node.size) + if err != nil { + return zero, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) + } + + result, err := delta.Apply(base, deltaData) + if err != nil { + return zero, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) + } + + consumedFromCache := fromCache && i == len(chain)-1 + if !consumedFromCache { + packed.baseCache.Add( + baseKey{pack: p, offset: node.baseOffset}, + cachedBase{entryType: baseType, content: base}, + ) + } + + base = result + } + + return baseType, base, nil +} + +// deltaBaseOffset resolves a delta entry's base entry offset +// within the same pack. +func (packed *Packed) deltaBaseOffset(p *pack, offset uint64, header packfile.EntryHeader) (uint64, error) { + switch header.Type { + case packfile.EntryTypeOfsDelta: + if header.OfsDistance == 0 || header.OfsDistance > offset { + return 0, fmt.Errorf("%w: pack %q: invalid ofs-delta distance", ErrMalformedPackedStore, p.name) + } + + return offset - header.OfsDistance, nil + case packfile.EntryTypeRefDelta: + refBase := header.RefBase[:packed.objectFormat.Size()] + + baseOffset, found, err := p.idx.Lookup(refBase) + if err != nil { + return 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) + } + + if !found { + baseID, idErr := packed.objectFormat.FromBytes(refBase) + if idErr != nil { + return 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, idErr) + } + + return 0, fmt.Errorf( + "%w: resolving ref-delta: %w", + ErrMalformedPackedStore, &errs.ObjectMissingError{OID: baseID}, + ) + } + + return baseOffset, nil + case packfile.EntryTypeInvalid, + packfile.EntryTypeCommit, + packfile.EntryTypeTree, + packfile.EntryTypeBlob, + packfile.EntryTypeTag, + packfile.EntryTypeFuture: + } + + panic("object/store/packed: deltaBaseOffset on non-delta entry") +} + +// resolveType walks one delta chain +// to find the chained base object entry type, +// without inflating any content. +func (packed *Packed) resolveType(p *pack, offset uint64, entryHeader packfile.EntryHeader) (packfile.EntryType, error) { + var zero packfile.EntryType + + depth := 0 + + for entryHeader.Type.IsDelta() { + if cached, ok := packed.baseCache.Peek(baseKey{pack: p, offset: offset}); ok { + return cached.entryType, nil + } + + depth++ + if depth > maxDeltaDepth { + return zero, fmt.Errorf("%w: pack %q: delta chain too deep", ErrMalformedPackedStore, p.name) + } + + baseOffset, err := packed.deltaBaseOffset(p, offset, entryHeader) + if err != nil { + return zero, err + } + + offset = baseOffset + + entryHeader, _, err = p.entryHeaderAt(offset, packed.objectFormat) + if err != nil { + return zero, err + } + } + + return entryHeader.Type, nil +} + +// deltaResultSize reads the declared result size +// from one compressed delta payload prefix. +func deltaResultSize(payload []byte, deltaSize uint64) (uint64, error) { + zr, err := zlib.NewReader(bytes.NewReader(payload)) + if err != nil { + return 0, fmt.Errorf("reading delta header: %w", err) + } + + defer func() { _ = zr.Close() }() + + prefixLen := min(uint64(delta.MaxHeaderSizesLen), deltaSize) + + prefix := make([]byte, prefixLen) + + _, err = io.ReadFull(zr, prefix) + if err != nil { + return 0, fmt.Errorf("reading delta header: %w", err) + } + + _, resultSize, _, err := delta.ParseHeaderSizes(prefix) + if err != nil { + return 0, fmt.Errorf("reading delta header: %w", err) + } + + return resultSize, nil +} diff --git a/object/store/packed/entry.go b/object/store/packed/entry.go new file mode 100644 index 00000000..23f389a3 --- /dev/null +++ b/object/store/packed/entry.go @@ -0,0 +1,74 @@ +package packed + +import ( + "bytes" + "errors" + "fmt" + "io" + + "lindenii.org/go/furgit/internal/compress/zlib" + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/lgo/intconv" +) + +var errPayloadOverlong = errors.New("entry payload longer than declared") + +// entryHeaderAt parses the entry header at offset, +// returning it together with the entry's compressed payload view. +// +// The entry header only contains the inflated length, +// so payload slice extends to the end of the pack; +// the compressed data length is determined by the zlib stream end, +// not the slice length. +// +// Labels: Life-Parent, Mut-No. +func (pack *pack) entryHeaderAt(offset uint64, objectFormat id.ObjectFormat) (packfile.EntryHeader, []byte, error) { + var zero packfile.EntryHeader + + pos, err := intconv.Uint64ToInt(offset) + if err != nil || pos >= len(pack.data) { + return zero, nil, fmt.Errorf("%w: pack %q: entry offset out of bounds", ErrMalformedPackedStore, pack.name) + } + + header, err := packfile.ParseEntryHeader(pack.data[pos:], objectFormat.Size()) + if err != nil { + return zero, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, pack.name, err) + } + + return header, pack.data[pos+header.HeaderLen:], nil +} + +// inflate decompresses one entry payload of expectedSize bytes, +// rejecting payloads whose inflated size differs. +// +// Labels: Life-Independent. +func inflate(payload []byte, expectedSize uint64) ([]byte, error) { + size, err := intconv.Uint64ToInt(expectedSize) + if err != nil { + return nil, fmt.Errorf("declared size: %w", err) + } + + zr, err := zlib.NewReader(bytes.NewReader(payload)) + if err != nil { + return nil, fmt.Errorf("inflating entry payload: %w", err) + } + + defer func() { _ = zr.Close() }() + + out := make([]byte, size) + + _, err = io.ReadFull(zr, out) + if err != nil { + return nil, fmt.Errorf("inflating entry payload: %w", err) + } + + var probe [1]byte + + n, err := zr.Read(probe[:]) + if n != 0 || !errors.Is(err, io.EOF) { + return nil, errPayloadOverlong + } + + return out, nil +} diff --git a/object/store/packed/helpers_test.go b/object/store/packed/helpers_test.go new file mode 100644 index 00000000..08e07399 --- /dev/null +++ b/object/store/packed/helpers_test.go @@ -0,0 +1,83 @@ +package packed_test + +import ( + "os" + "path/filepath" + "slices" + "strings" + "testing" + + "lindenii.org/go/furgit/internal/testgit" + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/store/packed" +) + +// makeGitPack seeds a repository, +// packs the seeded objects with git pack-objects, +// and returns the repository, the artifact path prefix, +// and the seeded objects. +func makeGitPack(t *testing.T, objectFormat id.ObjectFormat) (*testgit.Repo, string, testgit.Seeded) { + t.Helper() + + repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat}) + if err != nil { + t.Fatalf("NewRepo: %v", err) + } + + seeded, err := repo.SeedHistory(t) + if err != nil { + t.Fatalf("SeedHistory: %v", err) + } + + prefix, err := repo.PackObjects(t, slices.Values(seeded.All()), testgit.PackObjectsOptions{}) + if err != nil { + t.Fatalf("PackObjects: %v", err) + } + + return repo, prefix, seeded +} + +// requireDeltas asserts that the pack at prefix +// contains at least one deltified entry, +// so that tests really do exercise delta resolution. +func requireDeltas(t *testing.T, repo *testgit.Repo, prefix string, objectFormat id.ObjectFormat) { + t.Helper() + + out, err := repo.VerifyPack(t, prefix+".idx") + if err != nil { + t.Fatalf("VerifyPack: %v", err) + } + + hexLen := objectFormat.HexLen() + + for line := range strings.Lines(string(out)) { + fields := strings.Fields(line) + if len(fields) >= 7 && len(fields[0]) == hexLen { + return + } + } + + t.Fatalf("fixture pack contains no deltified entries") +} + +// openPackedStore opens a packed store +// over the directory containing prefix's pack artifacts. +func openPackedStore(t *testing.T, prefix string, objectFormat id.ObjectFormat) *packed.Packed { + t.Helper() + + root, err := os.OpenRoot(filepath.Dir(prefix)) + if err != nil { + t.Fatalf("OpenRoot: %v", err) + } + + t.Cleanup(func() { _ = root.Close() }) + + packedStore, err := packed.New(root, objectFormat) + if err != nil { + t.Fatalf("New: %v", err) + } + + t.Cleanup(func() { _ = packedStore.Close() }) + + return packedStore +} diff --git a/object/store/packed/lookup.go b/object/store/packed/lookup.go new file mode 100644 index 00000000..74087072 --- /dev/null +++ b/object/store/packed/lookup.go @@ -0,0 +1,41 @@ +package packed + +import ( + "fmt" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/store" +) + +// lookup finds the pack containing objectID +// and the entry offset within it, +// probing packs in most-recently-used-ish order. +// +// Labels: Life-Parent. +func (packed *Packed) lookup(objectID id.ObjectID) (*pack, uint64, error) { + if objectID.ObjectFormat() != packed.objectFormat { + return nil, 0, fmt.Errorf( + "%w: got %s want %s", + id.ErrInvalidObjectFormat, objectID.ObjectFormat(), packed.objectFormat, + ) + } + + oid := objectID.RawBytes() + + for _, p := range packed.order.Keys() { + offset, found, err := p.idx.Lookup(oid) + if err != nil { + return nil, 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) + } + + if !found { + continue + } + + packed.order.Touch(p) + + return p, offset, nil + } + + return nil, 0, store.ErrObjectNotFound +} diff --git a/object/store/packed/lookup_test.go b/object/store/packed/lookup_test.go new file mode 100644 index 00000000..d9f3dff9 --- /dev/null +++ b/object/store/packed/lookup_test.go @@ -0,0 +1,35 @@ +package packed_test + +import ( + "errors" + "testing" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/store" +) + +func TestLookupMissing(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + _, prefix, seeded := makeGitPack(t, objectFormat) + packedStore := openPackedStore(t, prefix, objectFormat) + + raw := seeded.Blobs[0].Bytes() + raw[len(raw)-1] ^= 0xff + + missing, err := objectFormat.FromBytes(raw) + if err != nil { + t.Fatalf("FromBytes: %v", err) + } + + _, _, err = packedStore.ReadBytesContent(missing) + if !errors.Is(err, store.ErrObjectNotFound) { + t.Fatalf("ReadBytesContent error = %v, want ErrObjectNotFound", err) + } + }) + } +} diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go new file mode 100644 index 00000000..55991c2c --- /dev/null +++ b/object/store/packed/pack.go @@ -0,0 +1,143 @@ +package packed + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "os" + + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/mmap" + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/lgo/intconv" +) + +// packHeaderLen is the size of the on-disk pack header: +// signature, version, and object count. +const packHeaderLen = 12 + +var ( + errPackTruncated = errors.New("truncated") + errPackBadSignature = errors.New("bad signature") + errPackUnsupportedVersion = errors.New("unsupported pack version") + errPackCountMismatch = errors.New("object count differs from index") + errPackTrailerMismatch = errors.New("trailer hash differs from index") +) + +// pack is one discovered pack: +// its base name, its parsed index, and its mapped data. +// All fields are immutable after openPack. +type pack struct { + // name is the pack base name, like "pack-<hash>". + name string + + // idxMapping owns the mapped pack index bytes, + // and idx is the parsed index view over them. + idxMapping *mmap.Mmap + idx packidx.Packidx + + // dataMapping owns the mapped pack data bytes, + // and data aliases them. + dataMapping *mmap.Mmap + data []byte +} + +// openPack opens, maps, and validates +// one pack index and its pack data +// by pack base name. +func openPack(root *os.Root, name string, objectFormat id.ObjectFormat) (*pack, error) { + idxMapping, err := mapFile(root, name+".idx") + if err != nil { + return nil, err + } + + idx, err := packidx.Parse(idxMapping.Data(), objectFormat.Size()) + if err != nil { + _ = idxMapping.Close() + + return nil, fmt.Errorf("%w: index %q: %w", ErrMalformedPackedStore, name, err) + } + + dataMapping, err := mapFile(root, name+".pack") + if err != nil { + _ = idxMapping.Close() + + return nil, err + } + + err = validatePackData(dataMapping.Data(), &idx, objectFormat.Size()) + if err != nil { + _ = idxMapping.Close() + _ = dataMapping.Close() + + return nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, name, err) + } + + return &pack{ + name: name, + idxMapping: idxMapping, + idx: idx, + dataMapping: dataMapping, + data: dataMapping.Data(), + }, nil +} + +// mapFile opens and maps one file under root. +func mapFile(root *os.Root, name string) (*mmap.Mmap, error) { + file, err := root.Open(name) + if err != nil { + return nil, fmt.Errorf("object/store/packed: %w", err) + } + + defer func() { _ = file.Close() }() + + mapping, err := mmap.Open(file) + if err != nil { + return nil, fmt.Errorf("object/store/packed: %q: %w", name, err) + } + + return mapping, nil +} + +// validatePackData checks one mapped pack +// against the pack format and its index. +func validatePackData(data []byte, idx *packidx.Packidx, hashSize int) error { + if len(data) < packHeaderLen+hashSize { + return errPackTruncated + } + + if binary.BigEndian.Uint32(data) != packfile.Signature { + return errPackBadSignature + } + + if !packfile.SupportedVersion(binary.BigEndian.Uint32(data[4:])) { + return errPackUnsupportedVersion + } + + count := uint64(binary.BigEndian.Uint32(data[8:])) + + numObjects, err := intconv.IntToUint64(idx.NumObjects()) + if err != nil { + return fmt.Errorf("object count: %w", err) + } + + if count != numObjects { + return errPackCountMismatch + } + + if !bytes.Equal(data[len(data)-hashSize:], idx.PackHash()) { + return errPackTrailerMismatch + } + + return nil +} + +// close releases the pack data and index mappings. +func (pack *pack) close() error { + return errors.Join( + pack.dataMapping.Close(), + pack.idxMapping.Close(), + ) +} diff --git a/object/store/packed/packed.go b/object/store/packed/packed.go index c0961508..f22c2445 100644 --- a/object/store/packed/packed.go +++ b/object/store/packed/packed.go @@ -1,36 +1,101 @@ package packed -// import ( -// "os" -// -// "lindenii.org/go/furgit/object/id" -// "lindenii.org/go/furgit/object/store" -// ) +import ( + "errors" + "os" + "sync" + + "lindenii.org/go/furgit/internal/cache/clock" + "lindenii.org/go/furgit/internal/mru" + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/store" +) + +// ErrMalformedPackedStore reports that +// a pack or pack index in the store is +// truncated, inconsistent, or otherwise corrupt. +var ErrMalformedPackedStore = errors.New("object/store/packed: malformed packed store") + +// Packed reads Git objects from pack/index files +// under an objects/pack root. // -// // Packed reads Git objects from pack/index files under an objects/pack root, -// // and ingests incoming pack streams into it. -// // -// // Labels: Close-Caller. -// type Packed struct { -// // root is the objects/pack directory capability -// // used for all pack and index file access. -// // Packed borrows this root. -// root *os.Root -// // objectFormat is the expected object format for lookups. -// objectFormat id.ObjectFormat -// } +// Packs appearing after construction are only visible +// after an explicit [Packed.Refresh]. // -// var ( -// _ store.ObjectReader = (*Packed)(nil) -// _ store.PackWriter = (*Packed)(nil) -// ) +// Labels: Close-Caller. +type Packed struct { + // root is the objects/pack directory + // used for all pack and index file access. + root *os.Root + + // objectFormat is the expected object format for lookups. + objectFormat id.ObjectFormat + + // order contains the packs to probe, MRU-first. + order *mru.Order[*pack] + + // baseCache caches delta bases consumed during resolution. + baseCache *clock.Clock[baseKey, cachedBase] + + // refreshMu serializes Refresh. + // Readers uses none of these. + refreshMu sync.Mutex + + // byName supports reusing surviving packs across Refresh, + // and retired holds dropped packs until Close, + // since concurrent readers may still use them. + byName map[string]*pack + + retired []*pack +} + +var _ store.ObjectReader = (*Packed)(nil) + +// New creates a packed-object store rooted at an objects/pack directory, +// performing an initial Refresh. // -// // New creates a packed-object store rooted at an objects/pack directory. -// // -// // Labels: Deps-Borrowed, Life-Parent. -// func New(root *os.Root, objectFormat id.ObjectFormat) (*Packed, error) +// Labels: Deps-Borrowed, Life-Parent. +func New(root *os.Root, objectFormat id.ObjectFormat) (*Packed, error) { + if objectFormat.Size() == 0 { + return nil, id.ErrInvalidObjectFormat + } + + packed := &Packed{ + root: root, + objectFormat: objectFormat, + order: mru.New[*pack](), + baseCache: newBaseCache(), + refreshMu: sync.Mutex{}, + byName: nil, + retired: nil, + } + + err := packed.Refresh() + if err != nil { + return nil, err + } + + return packed, nil +} + +// Close releases mapped pack/index resources associated with the store. // -// // Close releases mapped pack/index resources associated with the store. -// // -// // Labels: MT-Unsafe. -// func (packed *Packed) Close() error +// Labels: MT-Unsafe. +func (packed *Packed) Close() error { + errs := make([]error, 0, len(packed.byName)+len(packed.retired)) + + for _, p := range packed.byName { + errs = append(errs, p.close()) + } + + for _, p := range packed.retired { + errs = append(errs, p.close()) + } + + packed.byName = nil + packed.retired = nil + + packed.baseCache.Clear() + + return errors.Join(errs...) +} diff --git a/object/store/packed/read_test.go b/object/store/packed/read_test.go new file mode 100644 index 00000000..02a3a5b0 --- /dev/null +++ b/object/store/packed/read_test.go @@ -0,0 +1,140 @@ +package packed_test + +import ( + "bytes" + "io" + "testing" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/store/packed" + "lindenii.org/go/furgit/object/typ" +) + +func TestReadGitPack(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + repo, prefix, seeded := makeGitPack(t, objectFormat) + requireDeltas(t, repo, prefix, objectFormat) + + packedStore := openPackedStore(t, prefix, objectFormat) + + groups := []struct { + ty typ.Type + oids []id.ObjectID + }{ + {ty: typ.Blob, oids: seeded.Blobs}, + {ty: typ.Tree, oids: seeded.Trees}, + {ty: typ.Commit, oids: seeded.Commits}, + {ty: typ.Tag, oids: seeded.Tags}, + } + + for _, group := range groups { + for _, oid := range group.oids { + wantContent, err := repo.CatFile(t, group.ty, oid) + if err != nil { + t.Fatalf("CatFile(%s): %v", oid, err) + } + + ty, content, err := packedStore.ReadBytesContent(oid) + if err != nil { + t.Fatalf("ReadBytesContent(%s): %v", oid, err) + } + + if ty != group.ty { + t.Fatalf("ReadBytesContent(%s) type = %v, want %v", oid, ty, group.ty) + } + + if !bytes.Equal(content, wantContent) { + t.Fatalf("ReadBytesContent(%s) content mismatch", oid) + } + + raw, err := packedStore.ReadBytesFull(oid) + if err != nil { + t.Fatalf("ReadBytesFull(%s): %v", oid, err) + } + + if got := objectFormat.Sum(raw); got != oid { + t.Fatalf("ReadBytesFull(%s) hashes to %s", oid, got) + } + + ty, size, err := packedStore.ReadHeader(oid) + if err != nil { + t.Fatalf("ReadHeader(%s): %v", oid, err) + } + + if ty != group.ty { + t.Fatalf("ReadHeader(%s) type = %v, want %v", oid, ty, group.ty) + } + + if size != uint64(len(wantContent)) { + t.Fatalf("ReadHeader(%s) size = %d, want %d", oid, size, len(wantContent)) + } + + size, err = packedStore.ReadSize(oid) + if err != nil { + t.Fatalf("ReadSize(%s): %v", oid, err) + } + + if size != uint64(len(wantContent)) { + t.Fatalf("ReadSize(%s) = %d, want %d", oid, size, len(wantContent)) + } + + checkReaderContent(t, packedStore, oid, group.ty, wantContent) + checkReaderFull(t, packedStore, oid, objectFormat) + } + } + }) + } +} + +func checkReaderContent(t *testing.T, packedStore *packed.Packed, oid id.ObjectID, wantType typ.Type, wantContent []byte) { + t.Helper() + + ty, size, reader, err := packedStore.ReadReaderContent(oid) + if err != nil { + t.Fatalf("ReadReaderContent(%s): %v", oid, err) + } + + defer func() { _ = reader.Close() }() + + if ty != wantType { + t.Fatalf("ReadReaderContent(%s) type = %v, want %v", oid, ty, wantType) + } + + if size != uint64(len(wantContent)) { + t.Fatalf("ReadReaderContent(%s) size = %d, want %d", oid, size, len(wantContent)) + } + + content, err := io.ReadAll(reader) + if err != nil { + t.Fatalf("ReadReaderContent(%s) read: %v", oid, err) + } + + if !bytes.Equal(content, wantContent) { + t.Fatalf("ReadReaderContent(%s) content mismatch", oid) + } +} + +func checkReaderFull(t *testing.T, packedStore *packed.Packed, oid id.ObjectID, objectFormat id.ObjectFormat) { + t.Helper() + + reader, err := packedStore.ReadReaderFull(oid) + if err != nil { + t.Fatalf("ReadReaderFull(%s): %v", oid, err) + } + + defer func() { _ = reader.Close() }() + + raw, err := io.ReadAll(reader) + if err != nil { + t.Fatalf("ReadReaderFull(%s) read: %v", oid, err) + } + + if got := objectFormat.Sum(raw); got != oid { + t.Fatalf("ReadReaderFull(%s) hashes to %s", oid, got) + } +} diff --git a/object/store/packed/reader.go b/object/store/packed/reader.go index a7c38d61..9c183575 100644 --- a/object/store/packed/reader.go +++ b/object/store/packed/reader.go @@ -1,30 +1,210 @@ package packed -// import ( -// "io" -// -// "lindenii.org/go/furgit/object/id" -// "lindenii.org/go/furgit/object/typ" -// ) -// -// // ReadBytesFull reads a full serialized object as "type size\x00content". -// func (packed *Packed) ReadBytesFull(objectID id.ObjectID) ([]byte, error) -// -// // ReadBytesContent reads an object's type and content bytes. -// func (packed *Packed) ReadBytesContent(objectID id.ObjectID) (typ.Type, []byte, error) -// -// // ReadReaderFull reads a full serialized object stream as "type size\x00content". -// func (packed *Packed) ReadReaderFull(objectID id.ObjectID) (io.ReadCloser, error) +import ( + "bytes" + "fmt" + "io" + + "lindenii.org/go/furgit/internal/compress/zlib" + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/internal/iolimit" + "lindenii.org/go/furgit/object/header" + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/typ" +) + +// ReadBytesContent reads an object's type and content bytes, +// fully resolving delta chains. +func (packed *Packed) ReadBytesContent(objectID id.ObjectID) (typ.Type, []byte, error) { + p, offset, err := packed.lookup(objectID) + if err != nil { + return typ.Unknown, nil, err + } + + entryType, content, err := packed.unpackEntry(p, offset) + if err != nil { + return typ.Unknown, nil, err + } + + ty, err := objectTypeOf(entryType) + if err != nil { + return typ.Unknown, nil, err + } + + return ty, content, nil +} + +// ReadBytesFull reads a full serialized object as "type size\x00content", +// fully resolving delta chains. +func (packed *Packed) ReadBytesFull(objectID id.ObjectID) ([]byte, error) { + ty, content, err := packed.ReadBytesContent(objectID) + if err != nil { + return nil, err + } + + raw := header.Append(make([]byte, 0, len(content)+32), ty, uint64(len(content))) + + return append(raw, content...), nil +} + +// ReadHeader reads an object's type and declared content length. // -// // ReadReaderContent reads an object's type, declared content length, -// // and content stream. -// func (packed *Packed) ReadReaderContent(objectID id.ObjectID) (typ.Type, uint64, io.ReadCloser, error) +// For delta entries this resolves the chained base type +// and the declared delta result size, +// without reconstructing content. +func (packed *Packed) ReadHeader(objectID id.ObjectID) (typ.Type, uint64, error) { + p, offset, err := packed.lookup(objectID) + if err != nil { + return typ.Unknown, 0, err + } + + entryHeader, payload, err := p.entryHeaderAt(offset, packed.objectFormat) + if err != nil { + return typ.Unknown, 0, err + } + + size := entryHeader.Size + + if entryHeader.Type.IsDelta() { + size, err = deltaResultSize(payload, entryHeader.Size) + if err != nil { + return typ.Unknown, 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) + } + } + + entryType, err := packed.resolveType(p, offset, entryHeader) + if err != nil { + return typ.Unknown, 0, err + } + + ty, err := objectTypeOf(entryType) + if err != nil { + return typ.Unknown, 0, err + } + + return ty, size, nil +} + +// ReadSize reads an object's declared content length. // -// // ReadSize reads an object's declared content length. -// func (packed *Packed) ReadSize(objectID id.ObjectID) (uint64, error) +// Unlike ReadHeader, +// this never walks delta chains. +func (packed *Packed) ReadSize(objectID id.ObjectID) (uint64, error) { + p, offset, err := packed.lookup(objectID) + if err != nil { + return 0, err + } + + entryHeader, payload, err := p.entryHeaderAt(offset, packed.objectFormat) + if err != nil { + return 0, err + } + + if !entryHeader.Type.IsDelta() { + return entryHeader.Size, nil + } + + size, err := deltaResultSize(payload, entryHeader.Size) + if err != nil { + return 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) + } + + return size, nil +} + +// ReadReaderContent reads an object's type, +// declared content length, and content stream. // -// // ReadHeader reads an object's type and declared content length. -// func (packed *Packed) ReadHeader(objectID id.ObjectID) (typ.Type, uint64, error) +// Non-delta entries stream directly from the pack; +// delta entries are fully resolved in memory first. +// Close releases resources only +// and does not drain unread data for additional validation. +func (packed *Packed) ReadReaderContent(objectID id.ObjectID) (typ.Type, uint64, io.ReadCloser, error) { + p, offset, err := packed.lookup(objectID) + if err != nil { + return typ.Unknown, 0, nil, err + } + + entryHeader, payload, err := p.entryHeaderAt(offset, packed.objectFormat) + if err != nil { + return typ.Unknown, 0, nil, err + } + + if !entryHeader.Type.IsBase() { + entryType, content, err := packed.unpackEntry(p, offset) + if err != nil { + return typ.Unknown, 0, nil, err + } + + ty, err := objectTypeOf(entryType) + if err != nil { + return typ.Unknown, 0, nil, err + } + + return ty, uint64(len(content)), io.NopCloser(bytes.NewReader(content)), nil + } + + ty, err := objectTypeOf(entryHeader.Type) + if err != nil { + return typ.Unknown, 0, nil, err + } + + zr, err := zlib.NewReader(bytes.NewReader(payload)) + if err != nil { + return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) + } + + return ty, entryHeader.Size, &objectReader{ + reader: iolimit.ExpectLengthReader(zr, entryHeader.Size), + zr: zr, + }, nil +} + +// ReadReaderFull reads a full serialized object stream +// as "type size\x00content". // -// // Refresh updates the packed-store view of on-disk pack/index candidates. -// func (packed *Packed) Refresh() error +// Non-delta entries stream directly from the pack; +// delta entries are fully resolved in memory first. +// Close releases resources only +// and does not drain unread data for additional validation. +func (packed *Packed) ReadReaderFull(objectID id.ObjectID) (io.ReadCloser, error) { + ty, size, reader, err := packed.ReadReaderContent(objectID) + if err != nil { + return nil, err + } + + headerBytes := header.Append(nil, ty, size) + + return &objectReader{ + reader: io.MultiReader(bytes.NewReader(headerBytes), reader), + zr: reader, + }, nil +} + +// objectTypeOf converts one packfile entry type +// into an ordinary object type. +func objectTypeOf(entryType packfile.EntryType) (typ.Type, error) { + ty, err := entryType.ObjectType() + if err != nil { + return typ.Unknown, fmt.Errorf("%w: %w", ErrMalformedPackedStore, err) + } + + return ty, nil +} + +// objectReader streams one packed object payload +// and owns the underlying decompressor. +type objectReader struct { + // reader is the stream exposed by Read. + reader io.Reader + // zr is closed by Close. + zr io.Closer +} + +func (reader *objectReader) Read(dst []byte) (int, error) { + return reader.reader.Read(dst) +} + +func (reader *objectReader) Close() error { + return reader.zr.Close() +} diff --git a/object/store/packed/refresh.go b/object/store/packed/refresh.go new file mode 100644 index 00000000..14c66013 --- /dev/null +++ b/object/store/packed/refresh.go @@ -0,0 +1,69 @@ +package packed + +import ( + "fmt" + "io/fs" + "strings" +) + +// Refresh rescans the pack directory +// and replaces the store's view of available packs. +// +// Every index found must parse +// and have its pack data present and consistent; +// otherwise Refresh fails without changing the view. +func (packed *Packed) Refresh() error { + packed.refreshMu.Lock() + defer packed.refreshMu.Unlock() + + dirEntries, err := fs.ReadDir(packed.root.FS(), ".") + if err != nil { + return fmt.Errorf("object/store/packed: %w", err) + } + + next := make(map[string]*pack, len(packed.byName)) + + var opened []*pack + + for _, dirEntry := range dirEntries { + name, ok := strings.CutSuffix(dirEntry.Name(), ".idx") + if !ok || dirEntry.IsDir() { + continue + } + + if existing, ok := packed.byName[name]; ok { + next[name] = existing + + continue + } + + p, err := openPack(packed.root, name, packed.objectFormat) + if err != nil { + for _, p := range opened { + _ = p.close() + } + + return err + } + + opened = append(opened, p) + next[name] = p + } + + for name, p := range packed.byName { + if _, ok := next[name]; !ok { + packed.retired = append(packed.retired, p) + } + } + + packed.byName = next + + present := make(map[*pack]struct{}, len(next)) + for _, p := range next { + present[p] = struct{}{} + } + + packed.order.Sync(present) + + return nil +} diff --git a/object/store/packed/refresh_test.go b/object/store/packed/refresh_test.go new file mode 100644 index 00000000..2a5b91cb --- /dev/null +++ b/object/store/packed/refresh_test.go @@ -0,0 +1,107 @@ +package packed_test + +import ( + "errors" + "os" + "path/filepath" + "testing" + + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/furgit/object/store" + "lindenii.org/go/furgit/object/store/packed" +) + +func TestRefreshIsExplicit(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + _, prefix, seeded := makeGitPack(t, objectFormat) + oids := seeded.All() + + dir := t.TempDir() + + root, err := os.OpenRoot(dir) + if err != nil { + t.Fatalf("OpenRoot: %v", err) + } + + t.Cleanup(func() { _ = root.Close() }) + + packedStore, err := packed.New(root, objectFormat) + if err != nil { + t.Fatalf("New: %v", err) + } + + t.Cleanup(func() { _ = packedStore.Close() }) + + _, _, err = packedStore.ReadBytesContent(oids[0]) + if !errors.Is(err, store.ErrObjectNotFound) { + t.Fatalf("empty store read error = %v, want ErrObjectNotFound", err) + } + + t.Helper() + base := filepath.Base(prefix) + cp(t, prefix+".pack", filepath.Join(dir, base+".pack")) + cp(t, prefix+".idx", filepath.Join(dir, base+".idx")) + + // New packs must stay invisible until an explicit Refresh. + _, _, err = packedStore.ReadBytesContent(oids[0]) + if !errors.Is(err, store.ErrObjectNotFound) { + t.Fatalf("pre-Refresh read error = %v, want ErrObjectNotFound", err) + } + + err = packedStore.Refresh() + if err != nil { + t.Fatalf("Refresh: %v", err) + } + + for _, oid := range oids { + _, _, err := packedStore.ReadBytesContent(oid) + if err != nil { + t.Fatalf("post-Refresh ReadBytesContent(%s): %v", oid, err) + } + } + }) + } +} + +func TestRefreshRejectsIndexWithoutPack(t *testing.T) { + t.Parallel() + + objectFormat := id.ObjectFormatSHA256 + + _, prefix, _ := makeGitPack(t, objectFormat) + + dir := t.TempDir() + cp(t, prefix+".idx", filepath.Join(dir, filepath.Base(prefix)+".idx")) + + root, err := os.OpenRoot(dir) + if err != nil { + t.Fatalf("OpenRoot: %v", err) + } + + t.Cleanup(func() { _ = root.Close() }) + + _, err = packed.New(root, objectFormat) + if err == nil { + t.Fatalf("New with orphan index: expected error") + } +} + +// cp copies one file from src to dst. +func cp(t *testing.T, src, dst string) { + t.Helper() + + data, err := os.ReadFile(src) //nolint:gosec + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + + err = os.WriteFile(dst, data, 0o600) //nolint:gosec + if err != nil { + t.Fatalf("WriteFile: %v", err) + } +} |
