From 238b2caf83dde3c4395109c51b8c9affa6e11890 Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Mon, 30 Mar 2026 13:53:54 +0000 Subject: object/store/packed: Start the internal/reading split --- object/store/packed/TODO | 3 - object/store/packed/close.go | 35 --- object/store/packed/delta_build_chain.go | 65 ----- object/store/packed/delta_cache.go | 61 ----- object/store/packed/delta_chain.go | 13 - object/store/packed/delta_node.go | 9 - object/store/packed/delta_resolve_chain.go | 61 ----- object/store/packed/delta_resolve_chain_start.go | 58 ---- object/store/packed/delta_resolve_content.go | 26 -- object/store/packed/delta_size.go | 27 -- object/store/packed/doc.go | 3 + object/store/packed/entry_inflate.go | 64 ----- object/store/packed/entry_meta.go | 16 -- object/store/packed/entry_parse.go | 71 ----- object/store/packed/helpers_test.go | 102 ------- object/store/packed/idx.go | 36 --- object/store/packed/idx_candidates_mru.go | 136 ---------- object/store/packed/idx_close.go | 28 -- object/store/packed/idx_lookup.go | 91 ------- object/store/packed/idx_lookup_candidates.go | 126 --------- object/store/packed/idx_open.go | 98 ------- object/store/packed/idx_parse.go | 78 ------ object/store/packed/internal/doc.go | 6 + object/store/packed/internal/reading/TODO | 3 + object/store/packed/internal/reading/close.go | 35 +++ .../packed/internal/reading/delta_build_chain.go | 65 +++++ .../store/packed/internal/reading/delta_cache.go | 61 +++++ .../store/packed/internal/reading/delta_chain.go | 13 + object/store/packed/internal/reading/delta_node.go | 9 + .../packed/internal/reading/delta_resolve_chain.go | 61 +++++ .../internal/reading/delta_resolve_chain_start.go | 58 ++++ .../internal/reading/delta_resolve_content.go | 26 ++ object/store/packed/internal/reading/delta_size.go | 27 ++ object/store/packed/internal/reading/doc.go | 6 + .../store/packed/internal/reading/entry_inflate.go | 64 +++++ object/store/packed/internal/reading/entry_meta.go | 16 ++ .../store/packed/internal/reading/entry_parse.go | 71 +++++ .../store/packed/internal/reading/helpers_test.go | 102 +++++++ object/store/packed/internal/reading/idx.go | 36 +++ .../packed/internal/reading/idx_candidates_mru.go | 136 ++++++++++ object/store/packed/internal/reading/idx_close.go | 28 ++ object/store/packed/internal/reading/idx_lookup.go | 91 +++++++ .../internal/reading/idx_lookup_candidates.go | 126 +++++++++ object/store/packed/internal/reading/idx_open.go | 98 +++++++ object/store/packed/internal/reading/idx_parse.go | 78 ++++++ object/store/packed/internal/reading/location.go | 7 + object/store/packed/internal/reading/new.go | 33 +++ object/store/packed/internal/reading/options.go | 16 ++ object/store/packed/internal/reading/pack.go | 82 ++++++ .../packed/internal/reading/pack_idx_checksum.go | 34 +++ object/store/packed/internal/reading/read_bytes.go | 46 ++++ .../store/packed/internal/reading/read_closer.go | 19 ++ .../store/packed/internal/reading/read_header.go | 20 ++ .../packed/internal/reading/read_header_resolve.go | 65 +++++ .../store/packed/internal/reading/read_reader.go | 92 +++++++ object/store/packed/internal/reading/read_size.go | 45 +++ object/store/packed/internal/reading/read_test.go | 301 +++++++++++++++++++++ object/store/packed/internal/reading/store.go | 52 ++++ .../store/packed/internal/reading/store_lookup.go | 106 ++++++++ .../packed/internal/reading/store_open_pack.go | 57 ++++ .../store/packed/internal/reading/trailer_match.go | 29 ++ object/store/packed/location.go | 7 - object/store/packed/new.go | 23 +- object/store/packed/options.go | 10 - object/store/packed/options_refresh.go | 11 + object/store/packed/pack.go | 82 ------ object/store/packed/pack_idx_checksum.go | 34 --- object/store/packed/read_bytes.go | 46 ---- object/store/packed/read_closer.go | 19 -- object/store/packed/read_header.go | 20 -- object/store/packed/read_header_resolve.go | 65 ----- object/store/packed/read_reader.go | 92 ------- object/store/packed/read_size.go | 45 --- object/store/packed/read_test.go | 301 --------------------- object/store/packed/reader.go | 65 +++++ object/store/packed/store.go | 48 +--- object/store/packed/store_lookup.go | 106 -------- object/store/packed/store_open_pack.go | 57 ---- object/store/packed/trailer_match.go | 29 -- 79 files changed, 2310 insertions(+), 2177 deletions(-) delete mode 100644 object/store/packed/TODO delete mode 100644 object/store/packed/close.go delete mode 100644 object/store/packed/delta_build_chain.go delete mode 100644 object/store/packed/delta_cache.go delete mode 100644 object/store/packed/delta_chain.go delete mode 100644 object/store/packed/delta_node.go delete mode 100644 object/store/packed/delta_resolve_chain.go delete mode 100644 object/store/packed/delta_resolve_chain_start.go delete mode 100644 object/store/packed/delta_resolve_content.go delete mode 100644 object/store/packed/delta_size.go create mode 100644 object/store/packed/doc.go delete mode 100644 object/store/packed/entry_inflate.go delete mode 100644 object/store/packed/entry_meta.go delete mode 100644 object/store/packed/entry_parse.go delete mode 100644 object/store/packed/helpers_test.go delete mode 100644 object/store/packed/idx.go delete mode 100644 object/store/packed/idx_candidates_mru.go delete mode 100644 object/store/packed/idx_close.go delete mode 100644 object/store/packed/idx_lookup.go delete mode 100644 object/store/packed/idx_lookup_candidates.go delete mode 100644 object/store/packed/idx_open.go delete mode 100644 object/store/packed/idx_parse.go create mode 100644 object/store/packed/internal/doc.go create mode 100644 object/store/packed/internal/reading/TODO create mode 100644 object/store/packed/internal/reading/close.go create mode 100644 object/store/packed/internal/reading/delta_build_chain.go create mode 100644 object/store/packed/internal/reading/delta_cache.go create mode 100644 object/store/packed/internal/reading/delta_chain.go create mode 100644 object/store/packed/internal/reading/delta_node.go create mode 100644 object/store/packed/internal/reading/delta_resolve_chain.go create mode 100644 object/store/packed/internal/reading/delta_resolve_chain_start.go create mode 100644 object/store/packed/internal/reading/delta_resolve_content.go create mode 100644 object/store/packed/internal/reading/delta_size.go create mode 100644 object/store/packed/internal/reading/doc.go create mode 100644 object/store/packed/internal/reading/entry_inflate.go create mode 100644 object/store/packed/internal/reading/entry_meta.go create mode 100644 object/store/packed/internal/reading/entry_parse.go create mode 100644 object/store/packed/internal/reading/helpers_test.go create mode 100644 object/store/packed/internal/reading/idx.go create mode 100644 object/store/packed/internal/reading/idx_candidates_mru.go create mode 100644 object/store/packed/internal/reading/idx_close.go create mode 100644 object/store/packed/internal/reading/idx_lookup.go create mode 100644 object/store/packed/internal/reading/idx_lookup_candidates.go create mode 100644 object/store/packed/internal/reading/idx_open.go create mode 100644 object/store/packed/internal/reading/idx_parse.go create mode 100644 object/store/packed/internal/reading/location.go create mode 100644 object/store/packed/internal/reading/new.go create mode 100644 object/store/packed/internal/reading/options.go create mode 100644 object/store/packed/internal/reading/pack.go create mode 100644 object/store/packed/internal/reading/pack_idx_checksum.go create mode 100644 object/store/packed/internal/reading/read_bytes.go create mode 100644 object/store/packed/internal/reading/read_closer.go create mode 100644 object/store/packed/internal/reading/read_header.go create mode 100644 object/store/packed/internal/reading/read_header_resolve.go create mode 100644 object/store/packed/internal/reading/read_reader.go create mode 100644 object/store/packed/internal/reading/read_size.go create mode 100644 object/store/packed/internal/reading/read_test.go create mode 100644 object/store/packed/internal/reading/store.go create mode 100644 object/store/packed/internal/reading/store_lookup.go create mode 100644 object/store/packed/internal/reading/store_open_pack.go create mode 100644 object/store/packed/internal/reading/trailer_match.go delete mode 100644 object/store/packed/location.go create mode 100644 object/store/packed/options_refresh.go delete mode 100644 object/store/packed/pack.go delete mode 100644 object/store/packed/pack_idx_checksum.go delete mode 100644 object/store/packed/read_bytes.go delete mode 100644 object/store/packed/read_closer.go delete mode 100644 object/store/packed/read_header.go delete mode 100644 object/store/packed/read_header_resolve.go delete mode 100644 object/store/packed/read_reader.go delete mode 100644 object/store/packed/read_size.go delete mode 100644 object/store/packed/read_test.go create mode 100644 object/store/packed/reader.go delete mode 100644 object/store/packed/store_lookup.go delete mode 100644 object/store/packed/store_open_pack.go delete mode 100644 object/store/packed/trailer_match.go diff --git a/object/store/packed/TODO b/object/store/packed/TODO deleted file mode 100644 index f4a5f48e..00000000 --- a/object/store/packed/TODO +++ /dev/null @@ -1,3 +0,0 @@ -* Per delta-plan memo map -* Internal handle/request context (might expose it externally later and add to global interface) -* Audit on mutex diff --git a/object/store/packed/close.go b/object/store/packed/close.go deleted file mode 100644 index 6ad31aac..00000000 --- a/object/store/packed/close.go +++ /dev/null @@ -1,35 +0,0 @@ -package packed - -// Close releases mapped pack/index resources associated with the store. -// -// Labels: MT-Unsafe. -func (store *Store) Close() error { - store.stateMu.Lock() - packs := store.packs - store.stateMu.Unlock() - store.idxMu.RLock() - indexes := store.idxByPack - store.idxMu.RUnlock() - - var closeErr error - - for _, pack := range packs { - err := pack.close() - if err != nil && closeErr == nil { - closeErr = err - } - } - - for _, index := range indexes { - err := index.close() - if err != nil && closeErr == nil { - closeErr = err - } - } - - store.cacheMu.Lock() - store.deltaCache.clear() - store.cacheMu.Unlock() - - return closeErr -} diff --git a/object/store/packed/delta_build_chain.go b/object/store/packed/delta_build_chain.go deleted file mode 100644 index a528f705..00000000 --- a/object/store/packed/delta_build_chain.go +++ /dev/null @@ -1,65 +0,0 @@ -package packed - -import ( - "fmt" - - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -// deltaBuildChain walks one object's chain and builds a reconstruction chain. -func (store *Store) deltaBuildChain(start location) (deltaChain, error) { - visited := make(map[location]struct{}) - current := start - - var chain deltaChain - - for { - if _, ok := visited[current]; ok { - return deltaChain{}, fmt.Errorf("objectstore/packed: delta cycle while resolving object") - } - - visited[current] = struct{}{} - - _, meta, err := store.entryMetaAt(current) - if err != nil { - return deltaChain{}, err - } - - if meta.ty.IsBaseObject() { - chain.baseLoc = current - chain.baseType = meta.ty - - return chain, nil - } - - switch meta.ty { - case objecttype.TypeRefDelta: - chain.deltas = append(chain.deltas, deltaNode{ - loc: current, - dataOffset: meta.dataOffset, - }) - - next, err := store.lookup(meta.baseRefID) - if err != nil { - return deltaChain{}, err - } - - current = next - case objecttype.TypeOfsDelta: - chain.deltas = append(chain.deltas, deltaNode{ - loc: current, - dataOffset: meta.dataOffset, - }) - current = location{ - packName: current.packName, - offset: meta.baseOfs, - } - case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: - return deltaChain{}, fmt.Errorf("objectstore/packed: internal invariant violation for base type %d", meta.ty) - case objecttype.TypeInvalid, objecttype.TypeFuture: - return deltaChain{}, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) - default: - return deltaChain{}, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) - } - } -} diff --git a/object/store/packed/delta_cache.go b/object/store/packed/delta_cache.go deleted file mode 100644 index 3bf3a035..00000000 --- a/object/store/packed/delta_cache.go +++ /dev/null @@ -1,61 +0,0 @@ -package packed - -import ( - "codeberg.org/lindenii/furgit/internal/lru" - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -const defaultDeltaCacheMaxBytes = 32 << 20 - -// deltaBaseKey identifies one base object by pack location. -type deltaBaseKey struct { - packName string - offset uint64 -} - -// deltaBaseValue stores one cached base object body. -type deltaBaseValue struct { - ty objecttype.Type - content []byte -} - -// deltaCache wraps a weighted LRU for resolved delta bases. -type deltaCache struct { - lru *lru.Cache[deltaBaseKey, deltaBaseValue] -} - -// newDeltaCache creates a delta base cache with a byte budget. -func newDeltaCache(maxBytes int64) *deltaCache { - return &deltaCache{ - lru: lru.New( - maxBytes, - func(_ deltaBaseKey, value deltaBaseValue) int64 { - return int64(len(value.content)) - }, - nil, - ), - } -} - -// get returns a cloned cached base object value. -func (cache *deltaCache) get(key deltaBaseKey) (objecttype.Type, []byte, bool) { - value, ok := cache.lru.Get(key) - if !ok { - return objecttype.TypeInvalid, nil, false - } - - return value.ty, append([]byte(nil), value.content...), true -} - -// add stores a cloned base object value. -func (cache *deltaCache) add(key deltaBaseKey, ty objecttype.Type, content []byte) { - cache.lru.Add(key, deltaBaseValue{ - ty: ty, - content: append([]byte(nil), content...), - }) -} - -// clear removes all cached entries. -func (cache *deltaCache) clear() { - cache.lru.Clear() -} diff --git a/object/store/packed/delta_chain.go b/object/store/packed/delta_chain.go deleted file mode 100644 index 372e89cd..00000000 --- a/object/store/packed/delta_chain.go +++ /dev/null @@ -1,13 +0,0 @@ -package packed - -import objecttype "codeberg.org/lindenii/furgit/object/type" - -// deltaChain describes how to reconstruct one requested object. -type deltaChain struct { - // baseLoc points to the innermost base object. - baseLoc location - // baseType is the canonical object type resolved from baseLoc. - baseType objecttype.Type - // deltas contains delta objects from target down toward base. - deltas []deltaNode -} diff --git a/object/store/packed/delta_node.go b/object/store/packed/delta_node.go deleted file mode 100644 index 24ede1e0..00000000 --- a/object/store/packed/delta_node.go +++ /dev/null @@ -1,9 +0,0 @@ -package packed - -// deltaNode describes one delta object in a reconstruction chain. -type deltaNode struct { - // loc identifies the delta object's pack location. - loc location - // dataOffset points to the start of the delta zlib payload in pack. - dataOffset int -} diff --git a/object/store/packed/delta_resolve_chain.go b/object/store/packed/delta_resolve_chain.go deleted file mode 100644 index 6347ee41..00000000 --- a/object/store/packed/delta_resolve_chain.go +++ /dev/null @@ -1,61 +0,0 @@ -package packed - -import ( - "fmt" - - deltaapply "codeberg.org/lindenii/furgit/format/packfile/delta/apply" - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -// deltaResolveChain resolves one object chain into content bytes. -func (store *Store) deltaResolveChain(chain deltaChain, declaredSize int64) (objecttype.Type, []byte, error) { - ty, out, nextDelta, err := store.deltaResolveChainStart(chain) - if err != nil { - return objecttype.TypeInvalid, nil, err - } - - for i := nextDelta; i >= 0; i-- { - node := chain.deltas[i] - - pack, err := store.openPack(node.loc.packName) - if err != nil { - return objecttype.TypeInvalid, nil, err - } - - delta, err := inflateAt(pack, node.dataOffset, -1) - if err != nil { - return objecttype.TypeInvalid, nil, err - } - - out, err = deltaapply.Apply(out, delta) - if err != nil { - return objecttype.TypeInvalid, nil, err - } - - store.cacheMu.Lock() - store.deltaCache.add( - deltaBaseKey{packName: node.loc.packName, offset: node.loc.offset}, - ty, - out, - ) - store.cacheMu.Unlock() - } - - if int64(len(out)) != declaredSize { - return objecttype.TypeInvalid, nil, fmt.Errorf( - "objectstore/packed: resolved content size mismatch: got %d want %d", - len(out), - declaredSize, - ) - } - - if ty != chain.baseType { - return objecttype.TypeInvalid, nil, fmt.Errorf( - "objectstore/packed: resolved content type mismatch: got %d want %d", - ty, - chain.baseType, - ) - } - - return ty, out, nil -} diff --git a/object/store/packed/delta_resolve_chain_start.go b/object/store/packed/delta_resolve_chain_start.go deleted file mode 100644 index ac214576..00000000 --- a/object/store/packed/delta_resolve_chain_start.go +++ /dev/null @@ -1,58 +0,0 @@ -package packed - -import ( - "fmt" - - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -// deltaResolveChainStart finds the nearest cached chain node or inflates the -// innermost base object. It returns the starting bytes and the next delta index -// to apply in reverse order. -func (store *Store) deltaResolveChainStart(chain deltaChain) (objecttype.Type, []byte, int, error) { - for i, node := range chain.deltas { - store.cacheMu.RLock() - ty, out, ok := store.deltaCache.get( - deltaBaseKey{packName: node.loc.packName, offset: node.loc.offset}, - ) - store.cacheMu.RUnlock() - - if ok { - return ty, out, i - 1, nil - } - } - - store.cacheMu.RLock() - ty, out, ok := store.deltaCache.get( - deltaBaseKey{packName: chain.baseLoc.packName, offset: chain.baseLoc.offset}, - ) - store.cacheMu.RUnlock() - - if ok { - return ty, out, len(chain.deltas) - 1, nil - } - - pack, meta, err := store.entryMetaAt(chain.baseLoc) - if err != nil { - return objecttype.TypeInvalid, nil, 0, err - } - - if !meta.ty.IsBaseObject() { - return objecttype.TypeInvalid, nil, 0, fmt.Errorf("objectstore/packed: delta chain base is not a base object") - } - - base, err := inflateAt(pack, meta.dataOffset, meta.size) - if err != nil { - return objecttype.TypeInvalid, nil, 0, err - } - - store.cacheMu.Lock() - store.deltaCache.add( - deltaBaseKey{packName: chain.baseLoc.packName, offset: chain.baseLoc.offset}, - meta.ty, - base, - ) - store.cacheMu.Unlock() - - return meta.ty, base, len(chain.deltas) - 1, nil -} diff --git a/object/store/packed/delta_resolve_content.go b/object/store/packed/delta_resolve_content.go deleted file mode 100644 index 7b4d5319..00000000 --- a/object/store/packed/delta_resolve_content.go +++ /dev/null @@ -1,26 +0,0 @@ -package packed - -import objecttype "codeberg.org/lindenii/furgit/object/type" - -// deltaResolveContent resolves one object's content bytes from its pack location. -func (store *Store) deltaResolveContent(start location) (objecttype.Type, []byte, error) { - chain, err := store.deltaBuildChain(start) - if err != nil { - return objecttype.TypeInvalid, nil, err - } - - pack, meta, err := store.entryMetaAt(start) - if err != nil { - return objecttype.TypeInvalid, nil, err - } - - declaredSize := meta.size - if !meta.ty.IsBaseObject() { - declaredSize, err = deltaDeclaredSizeAt(pack, meta.dataOffset) - if err != nil { - return objecttype.TypeInvalid, nil, err - } - } - - return store.deltaResolveChain(chain, declaredSize) -} diff --git a/object/store/packed/delta_size.go b/object/store/packed/delta_size.go deleted file mode 100644 index e5ba3bb7..00000000 --- a/object/store/packed/delta_size.go +++ /dev/null @@ -1,27 +0,0 @@ -package packed - -import ( - "bufio" - - deltaapply "codeberg.org/lindenii/furgit/format/packfile/delta/apply" -) - -// deltaDeclaredSizeAt returns the resolved object size declared by one delta -// stream header at dataOffset. -func deltaDeclaredSizeAt(pack *packFile, dataOffset int) (int64, error) { - reader, err := zlibReaderAt(pack, dataOffset) - if err != nil { - return 0, err - } - - defer func() { _ = reader.Close() }() - - br := bufio.NewReaderSize(reader, 32) - - _, size, err := deltaapply.ReadHeaderSizes(br) - if err != nil { - return 0, err - } - - return int64(size), nil -} diff --git a/object/store/packed/doc.go b/object/store/packed/doc.go new file mode 100644 index 00000000..252a2baf --- /dev/null +++ b/object/store/packed/doc.go @@ -0,0 +1,3 @@ +// Package packed provides Git object reading from pack/index files under an +// objects/pack directory. +package packed diff --git a/object/store/packed/entry_inflate.go b/object/store/packed/entry_inflate.go deleted file mode 100644 index f79d86c0..00000000 --- a/object/store/packed/entry_inflate.go +++ /dev/null @@ -1,64 +0,0 @@ -package packed - -import ( - "bytes" - "fmt" - "io" - "math" - - "codeberg.org/lindenii/furgit/internal/compress/zlib" - "codeberg.org/lindenii/furgit/internal/iolimit" -) - -// zlibReaderAt opens a zlib reader starting at data offset within pack. -func zlibReaderAt(pack *packFile, offset int) (io.ReadCloser, error) { - if offset < 0 || offset > len(pack.data) { - return nil, fmt.Errorf("objectstore/packed: pack %q zlib offset out of bounds", pack.name) - } - - return zlib.NewReader(bytes.NewReader(pack.data[offset:])) -} - -// inflateAt inflates one entry payload from data offset. -func inflateAt(pack *packFile, offset int, expectedSize int64) ([]byte, error) { - reader, err := zlibReaderAt(pack, offset) - if err != nil { - return nil, err - } - - defer func() { _ = reader.Close() }() - - if expectedSize >= 0 { - if expectedSize > int64(math.MaxInt) { - return nil, fmt.Errorf( - "objectstore/packed: pack %q expected inflated size overflows int: %d", - pack.name, - expectedSize, - ) - } - - reader := iolimit.ExpectLengthReader(reader, expectedSize) - body := make([]byte, int(expectedSize)) - - _, err := io.ReadFull(reader, body) - if err != nil { - return nil, err - } - - var probe [1]byte - - _, err = reader.Read(probe[:]) - if err != nil && err != io.EOF { - return nil, err - } - - return body, nil - } - - body, err := io.ReadAll(reader) - if err != nil { - return nil, err - } - - return body, nil -} diff --git a/object/store/packed/entry_meta.go b/object/store/packed/entry_meta.go deleted file mode 100644 index 0bbe8bef..00000000 --- a/object/store/packed/entry_meta.go +++ /dev/null @@ -1,16 +0,0 @@ -package packed - -// entryMetaAt parses one pack entry header at location. -func (store *Store) entryMetaAt(loc location) (*packFile, entryMeta, error) { - pack, err := store.openPack(loc.packName) - if err != nil { - return nil, entryMeta{}, err - } - - meta, err := parseEntryMeta(pack, store.algo, loc.offset) - if err != nil { - return nil, entryMeta{}, err - } - - return pack, meta, nil -} diff --git a/object/store/packed/entry_parse.go b/object/store/packed/entry_parse.go deleted file mode 100644 index 962e39df..00000000 --- a/object/store/packed/entry_parse.go +++ /dev/null @@ -1,71 +0,0 @@ -package packed - -import ( - "fmt" - - packfmt "codeberg.org/lindenii/furgit/format/packfile" - "codeberg.org/lindenii/furgit/internal/intconv" - objectid "codeberg.org/lindenii/furgit/object/id" - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -// entryMeta describes one parsed pack entry header. -type entryMeta struct { - // ty is the pack entry type tag. - ty objecttype.Type - // size is the declared resulting content size. - size int64 - // dataOffset points to the zlib payload start. - dataOffset int - // baseRefID is set for ref-delta entries. - baseRefID objectid.ObjectID - // baseOfs is set for ofs-delta entries. - baseOfs uint64 -} - -// parseEntryMeta parses one pack entry header at offset. -func parseEntryMeta(pack *packFile, algo objectid.Algorithm, offset uint64) (entryMeta, error) { - var zero entryMeta - if offset >= uint64(len(pack.data)) { - return zero, fmt.Errorf("objectstore/packed: pack %q offset %d out of bounds", pack.name, offset) - } - - pos, err := intconv.Uint64ToInt(offset) - if err != nil { - return zero, fmt.Errorf("objectstore/packed: pack %q offset conversion: %w", pack.name, err) - } - - entry, err := packfmt.ParseEntry(pack.data[pos:], algo.Size()) - if err != nil { - return zero, fmt.Errorf("objectstore/packed: pack %q: %w", pack.name, err) - } - - meta := entryMeta{ - ty: entry.Type, - size: entry.Size, - dataOffset: pos + entry.DataOffset, - } - switch meta.ty { - case objecttype.TypeRefDelta: - baseID, err := objectid.FromBytes(algo, entry.RefBaseID) - if err != nil { - return zero, fmt.Errorf("objectstore/packed: pack %q invalid ref-delta base id: %w", pack.name, err) - } - - meta.baseRefID = baseID - case objecttype.TypeOfsDelta: - if offset <= entry.OfsBaseDistance { - return zero, fmt.Errorf("objectstore/packed: pack %q has invalid ofs-delta base", pack.name) - } - - meta.baseOfs = offset - entry.OfsBaseDistance - case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: - // Base object types do not have delta base metadata. - case objecttype.TypeInvalid, objecttype.TypeFuture: - return zero, fmt.Errorf("objectstore/packed: pack %q has unsupported entry type %d", pack.name, meta.ty) - default: - return zero, fmt.Errorf("objectstore/packed: pack %q has unsupported entry type %d", pack.name, meta.ty) - } - - return meta, nil -} diff --git a/object/store/packed/helpers_test.go b/object/store/packed/helpers_test.go deleted file mode 100644 index dc02e316..00000000 --- a/object/store/packed/helpers_test.go +++ /dev/null @@ -1,102 +0,0 @@ -package packed_test - -import ( - "fmt" - "io" - "strconv" - "strings" - "testing" - - "codeberg.org/lindenii/furgit/internal/testgit" - objectheader "codeberg.org/lindenii/furgit/object/header" - objectid "codeberg.org/lindenii/furgit/object/id" - "codeberg.org/lindenii/furgit/object/store/packed" - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -func openPackedStore(t *testing.T, testRepo *testgit.TestRepo, algo objectid.Algorithm) *packed.Store { - t.Helper() - - root := testRepo.OpenPackRoot(t) - - store, err := packed.New(root, algo, packed.Options{}) - if err != nil { - t.Fatalf("packed.New: %v", err) - } - - return store -} - -func mustReadAllAndClose(t *testing.T, reader io.ReadCloser) []byte { - t.Helper() - - data, err := io.ReadAll(reader) - if err != nil { - _ = reader.Close() - - t.Fatalf("ReadAll: %v", err) - } - - err = reader.Close() - if err != nil { - t.Fatalf("Close: %v", err) - } - - return data -} - -func expectedRawObject(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) (objecttype.Type, []byte, []byte) { - t.Helper() - - typeName := testRepo.Run(t, "cat-file", "-t", id.String()) - - ty, ok := objecttype.Parse(typeName) - if !ok { - t.Fatalf("ParseName(%q) failed", typeName) - } - - body := testRepo.CatFile(t, typeName, id) - - header, ok := objectheader.Encode(ty, int64(len(body))) - if !ok { - t.Fatalf("objectheader.Encode failed") - } - - raw := make([]byte, len(header)+len(body)) - copy(raw, header) - copy(raw[len(header):], body) - - return ty, body, raw -} - -func createPackedFixtureRepo(t *testing.T, algo objectid.Algorithm) (*testgit.TestRepo, []objectid.ObjectID) { - t.Helper() - - testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) - blobID, treeID, commitID := testRepo.MakeCommit(t, "packed store base commit") - testRepo.Run(t, "update-ref", "refs/heads/main", commitID.String()) - tagID := testRepo.TagAnnotated(t, "v1.0.0", commitID, "packed-store-tag") - - parent := commitID - - for i := range 24 { - content := "common-prefix\n" + strings.Repeat("line-"+strconv.Itoa(i%3)+"\n", 256) + fmt.Sprintf("tail-%d\n", i) - nextBlob, nextTree := testRepo.MakeSingleFileTree(t, fmt.Sprintf("file-%02d.txt", i), []byte(content)) - nextCommit := testRepo.CommitTree(t, nextTree, fmt.Sprintf("commit-%02d", i), parent) - testRepo.Run(t, "update-ref", "refs/heads/main", nextCommit.String()) - parent = nextCommit - - _ = nextBlob - _ = nextTree - } - - testRepo.Repack(t, "-a", "-d", "-f", "--window=64", "--depth=64") - - return testRepo, []objectid.ObjectID{ - blobID, - treeID, - commitID, - tagID, - parent, - } -} diff --git a/object/store/packed/idx.go b/object/store/packed/idx.go deleted file mode 100644 index 5024f2f3..00000000 --- a/object/store/packed/idx.go +++ /dev/null @@ -1,36 +0,0 @@ -package packed - -import ( - "os" - - objectid "codeberg.org/lindenii/furgit/object/id" -) - -// idxFile stores one mapped and validated idx v2 file. -type idxFile struct { - // idxName is the basename of this .idx file. - idxName string - // packName is the matching .pack basename. - packName string - // algo is the hash algorithm encoded by the index. - algo objectid.Algorithm - - // file is the opened index file descriptor. - file *os.File - // data is the mapped index bytes. - data []byte - - // fanout stores fanout table values. - fanout [256]uint32 - // numObjects equals fanout[255]. - numObjects int - - // namesOffset starts the sorted object-id table. - namesOffset int - // offset32Offset starts the 32-bit offset table. - offset32Offset int - // offset64Offset starts the 64-bit offset table. - offset64Offset int - // offset64Count is the number of 64-bit offset entries. - offset64Count int -} diff --git a/object/store/packed/idx_candidates_mru.go b/object/store/packed/idx_candidates_mru.go deleted file mode 100644 index d0cc7052..00000000 --- a/object/store/packed/idx_candidates_mru.go +++ /dev/null @@ -1,136 +0,0 @@ -package packed - -// packCandidateNode is one node in the candidate MRU order list. -type packCandidateNode struct { - packName string - prev *packCandidateNode - next *packCandidateNode -} - -func (store *Store) reconcileMRU(candidates []packCandidate) { - store.mruMu.Lock() - defer store.mruMu.Unlock() - - if store.mruNodeByPack == nil { - store.mruNodeByPack = make(map[string]*packCandidateNode, len(candidates)) - } - - present := make(map[string]struct{}, len(candidates)) - for _, candidate := range candidates { - present[candidate.packName] = struct{}{} - } - - ordered := make([]string, 0, len(candidates)) - - for node := store.mruHead; node != nil; node = node.next { - if _, ok := present[node.packName]; !ok { - continue - } - - ordered = append(ordered, node.packName) - delete(present, node.packName) - } - - for _, candidate := range candidates { - if _, ok := present[candidate.packName]; !ok { - continue - } - - ordered = append(ordered, candidate.packName) - delete(present, candidate.packName) - } - - store.mruHead = nil - store.mruTail = nil - store.mruNodeByPack = make(map[string]*packCandidateNode, len(ordered)) - - for _, packName := range ordered { - node := &packCandidateNode{ - packName: packName, - prev: store.mruTail, - } - if store.mruTail != nil { - store.mruTail.next = node - } - - if store.mruHead == nil { - store.mruHead = node - } - - store.mruTail = node - store.mruNodeByPack[packName] = node - } -} - -// touchCandidate moves one candidate to the front of the lookup order. -// This is done on a best-effort basis. -func (store *Store) touchCandidate(packName string) { - if !store.mruMu.TryLock() { - return - } - defer store.mruMu.Unlock() - - node := store.mruNodeByPack[packName] - if node == nil || node == store.mruHead { - return - } - - if node.prev != nil { - node.prev.next = node.next - } - - if node.next != nil { - node.next.prev = node.prev - } - - if store.mruTail == node { - store.mruTail = node.prev - } - - node.prev = nil - - node.next = store.mruHead - if store.mruHead != nil { - store.mruHead.prev = node - } - - store.mruHead = node - if store.mruTail == nil { - store.mruTail = node - } -} - -// firstCandidatePackName returns the current head pack name, or "" when none -// are available. -func (store *Store) firstCandidatePackName(snapshot *candidateSnapshot) string { - store.mruMu.RLock() - defer store.mruMu.RUnlock() - - for node := store.mruHead; node != nil; node = node.next { - if _, ok := snapshot.candidateByPack[node.packName]; ok { - return node.packName - } - } - - return "" -} - -// nextCandidatePackName returns the pack name after currentPack in current MRU -// order, or "" at end / when currentPack is not present. -func (store *Store) nextCandidatePackName(currentPack string, snapshot *candidateSnapshot) string { - store.mruMu.RLock() - defer store.mruMu.RUnlock() - - node := store.mruNodeByPack[currentPack] - if node == nil { - return "" - } - - for node = node.next; node != nil; node = node.next { - if _, ok := snapshot.candidateByPack[node.packName]; ok { - return node.packName - } - } - - return "" -} diff --git a/object/store/packed/idx_close.go b/object/store/packed/idx_close.go deleted file mode 100644 index 814ec987..00000000 --- a/object/store/packed/idx_close.go +++ /dev/null @@ -1,28 +0,0 @@ -package packed - -import "syscall" - -// close unmaps and closes one idx handle. -func (index *idxFile) close() error { - var closeErr error - - if index.data != nil { - err := syscall.Munmap(index.data) - if err != nil && closeErr == nil { - closeErr = err - } - - index.data = nil - } - - if index.file != nil { - err := index.file.Close() - if err != nil && closeErr == nil { - closeErr = err - } - - index.file = nil - } - - return closeErr -} diff --git a/object/store/packed/idx_lookup.go b/object/store/packed/idx_lookup.go deleted file mode 100644 index 0bd11d1b..00000000 --- a/object/store/packed/idx_lookup.go +++ /dev/null @@ -1,91 +0,0 @@ -package packed - -import ( - "bytes" - "encoding/binary" - "fmt" - - objectid "codeberg.org/lindenii/furgit/object/id" -) - -// lookup resolves one object ID to its pack offset within this index. -func (index *idxFile) lookup(id objectid.ObjectID) (uint64, bool, error) { - if id.Algorithm() != index.algo { - return 0, false, fmt.Errorf("objectstore/packed: object id algorithm mismatch") - } - - idBytes := (&id).RawBytes() - - hashSize := len(idBytes) - if hashSize != index.algo.Size() { - return 0, false, fmt.Errorf("objectstore/packed: unexpected object id length") - } - - first := int(idBytes[0]) - - lo := 0 - if first > 0 { - lo = int(index.fanout[first-1]) - } - - hi := int(index.fanout[first]) - if lo < 0 || hi < 0 || lo > hi || hi > index.numObjects { - return 0, false, fmt.Errorf("objectstore/packed: idx %q has invalid fanout bounds", index.idxName) - } - - for lo < hi { - mid := lo + (hi-lo)/2 - - nameOffset := index.namesOffset + mid*hashSize - if nameOffset < 0 || nameOffset+hashSize > len(index.data) { - return 0, false, fmt.Errorf("objectstore/packed: idx %q truncated name table", index.idxName) - } - - cmp := bytes.Compare(index.data[nameOffset:nameOffset+hashSize], idBytes) - if cmp == 0 { - offset, err := index.offsetAt(mid) - if err != nil { - return 0, false, err - } - - return offset, true, nil - } - - if cmp < 0 { - lo = mid + 1 - } else { - hi = mid - } - } - - return 0, false, nil -} - -// offsetAt resolves the pack offset for one object index entry. -func (index *idxFile) offsetAt(objectIndex int) (uint64, error) { - if objectIndex < 0 || objectIndex >= index.numObjects { - return 0, fmt.Errorf("objectstore/packed: idx %q offset index out of bounds", index.idxName) - } - - wordOffset := index.offset32Offset + objectIndex*4 - if wordOffset < 0 || wordOffset+4 > len(index.data) { - return 0, fmt.Errorf("objectstore/packed: idx %q truncated 32-bit offset table", index.idxName) - } - - word := binary.BigEndian.Uint32(index.data[wordOffset : wordOffset+4]) - if word&0x80000000 == 0 { - return uint64(word), nil - } - - pos := int(word & 0x7fffffff) - if pos < 0 || pos >= index.offset64Count { - return 0, fmt.Errorf("objectstore/packed: idx %q invalid 64-bit offset position", index.idxName) - } - - offOffset := index.offset64Offset + pos*8 - if offOffset < 0 || offOffset+8 > len(index.data)-2*index.algo.Size() { - return 0, fmt.Errorf("objectstore/packed: idx %q truncated 64-bit offset table", index.idxName) - } - - return binary.BigEndian.Uint64(index.data[offOffset : offOffset+8]), nil -} diff --git a/object/store/packed/idx_lookup_candidates.go b/object/store/packed/idx_lookup_candidates.go deleted file mode 100644 index a2de262a..00000000 --- a/object/store/packed/idx_lookup_candidates.go +++ /dev/null @@ -1,126 +0,0 @@ -package packed - -import ( - "fmt" - "os" - "slices" - "strings" -) - -// packCandidate describes one discovered pack/index pair. -type packCandidate struct { - // packName is the .pack basename. - packName string - // idxName is the .idx basename. - idxName string - // mtime is the pack file modification time for initial ordering. - mtime int64 -} - -type candidateSnapshot struct { - candidates []packCandidate - candidateByPack map[string]packCandidate -} - -// Refresh rescans objects/pack and atomically installs a fresh candidate list -// for future lookups. -// -// Refresh does not invalidate existing readers. Cached pack/index mappings, -// including ones for previously visible candidates, may be retained until -// Close. -func (store *Store) Refresh() error { - store.refreshMu.Lock() - defer store.refreshMu.Unlock() - - candidates, err := store.discoverCandidates() - if err != nil { - return err - } - - candidateByPack := make(map[string]packCandidate, len(candidates)) - for _, candidate := range candidates { - candidateByPack[candidate.packName] = candidate - } - - store.reconcileMRU(candidates) - - store.candidates.Store(&candidateSnapshot{ - candidates: candidates, - candidateByPack: candidateByPack, - }) - - return nil -} - -func (store *Store) ensureCandidates() (*candidateSnapshot, error) { - snapshot := store.candidates.Load() - if snapshot != nil { - return snapshot, nil - } - - err := store.Refresh() - if err != nil { - return nil, err - } - - return store.candidates.Load(), nil -} - -// discoverCandidates scans the objects/pack root and returns sorted pack/index -// pairs. -func (store *Store) discoverCandidates() ([]packCandidate, error) { - dir, err := store.root.Open(".") - if err != nil { - if os.IsNotExist(err) { - return nil, nil - } - - return nil, err - } - - defer func() { _ = dir.Close() }() - - entries, err := dir.ReadDir(-1) - if err != nil { - return nil, err - } - - candidates := make([]packCandidate, 0, len(entries)) - for _, entry := range entries { - if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { - continue - } - - idxName := entry.Name() - packName := strings.TrimSuffix(idxName, ".idx") + ".pack" - - packInfo, err := store.root.Stat(packName) - if err != nil { - if os.IsNotExist(err) { - return nil, fmt.Errorf("objectstore/packed: missing pack file for index %q", idxName) - } - - return nil, err - } - - candidates = append(candidates, packCandidate{ - packName: packName, - idxName: idxName, - mtime: packInfo.ModTime().UnixNano(), - }) - } - - slices.SortFunc(candidates, func(a, b packCandidate) int { - if a.mtime != b.mtime { - if a.mtime > b.mtime { - return -1 - } - - return 1 - } - - return strings.Compare(a.packName, b.packName) - }) - - return candidates, nil -} diff --git a/object/store/packed/idx_open.go b/object/store/packed/idx_open.go deleted file mode 100644 index fabd0c00..00000000 --- a/object/store/packed/idx_open.go +++ /dev/null @@ -1,98 +0,0 @@ -package packed - -import ( - "fmt" - "os" - "syscall" - - "codeberg.org/lindenii/furgit/internal/intconv" - objectid "codeberg.org/lindenii/furgit/object/id" -) - -// openIndex returns one opened and parsed index, caching it by pack basename. -func (store *Store) openIndex(candidate packCandidate) (*idxFile, error) { - store.idxMu.RLock() - - index, ok := store.idxByPack[candidate.packName] - if ok { - store.idxMu.RUnlock() - - return index, nil - } - - store.idxMu.RUnlock() - - index, err := openIdxFile(store.root, candidate.idxName, candidate.packName, store.algo) - if err != nil { - return nil, err - } - - store.idxMu.Lock() - - existing, ok := store.idxByPack[candidate.packName] - if ok { - store.idxMu.Unlock() - - _ = index.close() - - return existing, nil - } - - store.idxByPack[candidate.packName] = index - store.idxMu.Unlock() - - return index, nil -} - -// openIdxFile maps and validates one idx v2 file. -func openIdxFile(root *os.Root, idxName, packName string, algo objectid.Algorithm) (*idxFile, error) { - file, err := root.Open(idxName) - if err != nil { - return nil, err - } - - info, err := file.Stat() - if err != nil { - _ = file.Close() - - return nil, err - } - - size := info.Size() - if size < 0 || size > int64(int(^uint(0)>>1)) { - _ = file.Close() - - return nil, fmt.Errorf("objectstore/packed: idx %q has unsupported size", idxName) - } - - fd, err := intconv.UintptrToInt(file.Fd()) - if err != nil { - _ = file.Close() - - return nil, err - } - - data, err := syscall.Mmap(fd, 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE) - if err != nil { - _ = file.Close() - - return nil, err - } - - index := &idxFile{ - idxName: idxName, - packName: packName, - algo: algo, - file: file, - data: data, - } - - err = index.parse() - if err != nil { - _ = index.close() - - return nil, err - } - - return index, nil -} diff --git a/object/store/packed/idx_parse.go b/object/store/packed/idx_parse.go deleted file mode 100644 index 4da3bf42..00000000 --- a/object/store/packed/idx_parse.go +++ /dev/null @@ -1,78 +0,0 @@ -package packed - -import ( - "encoding/binary" - "fmt" -) - -const ( - idxMagicV2 = 0xff744f63 - idxVersionV2 = 2 -) - -// parse validates mapped idx v2 structure and stores table boundaries. -func (index *idxFile) parse() error { - hashSize := index.algo.Size() - if hashSize <= 0 { - return fmt.Errorf("objectstore/packed: idx %q has invalid hash algorithm", index.idxName) - } - - minLen := 8 + 256*4 + 2*hashSize - if len(index.data) < minLen { - return fmt.Errorf("objectstore/packed: idx %q too short", index.idxName) - } - - if binary.BigEndian.Uint32(index.data[:4]) != idxMagicV2 { - return fmt.Errorf("objectstore/packed: idx %q invalid magic", index.idxName) - } - - if binary.BigEndian.Uint32(index.data[4:8]) != idxVersionV2 { - return fmt.Errorf("objectstore/packed: idx %q unsupported version", index.idxName) - } - - prev := uint32(0) - - for i := range 256 { - base := 8 + i*4 - - cur := binary.BigEndian.Uint32(index.data[base : base+4]) - if cur < prev { - return fmt.Errorf("objectstore/packed: idx %q has non-monotonic fanout table", index.idxName) - } - - index.fanout[i] = cur - prev = cur - } - - index.numObjects = int(index.fanout[255]) - if index.numObjects < 0 { - return fmt.Errorf("objectstore/packed: idx %q has invalid object count", index.idxName) - } - - namesBytes := index.numObjects * hashSize - crcBytes := index.numObjects * 4 - offset32Bytes := index.numObjects * 4 - - minSize := 8 + 256*4 + namesBytes + crcBytes + offset32Bytes + 2*hashSize - if minSize < 0 || len(index.data) < minSize { - return fmt.Errorf("objectstore/packed: idx %q has truncated tables", index.idxName) - } - - index.namesOffset = 8 + 256*4 - index.offset32Offset = index.namesOffset + namesBytes + crcBytes - index.offset64Offset = index.offset32Offset + offset32Bytes - - offset64Bytes := len(index.data) - index.offset64Offset - 2*hashSize - if offset64Bytes < 0 || offset64Bytes%8 != 0 { - return fmt.Errorf("objectstore/packed: idx %q has malformed 64-bit offset table", index.idxName) - } - - index.offset64Count = offset64Bytes / 8 - - maxOffset64Count := max(index.numObjects-1, 0) - if index.offset64Count > maxOffset64Count { - return fmt.Errorf("objectstore/packed: idx %q has oversized 64-bit offset table", index.idxName) - } - - return nil -} diff --git a/object/store/packed/internal/doc.go b/object/store/packed/internal/doc.go new file mode 100644 index 00000000..05a9c2be --- /dev/null +++ b/object/store/packed/internal/doc.go @@ -0,0 +1,6 @@ +// Package internal encapsulates packed store implementation details. +// +// We have separate internal subpackages for ingest vs read and such, +// because these operations are so different that they almost share +// no code. This makes things clearer. +package internal diff --git a/object/store/packed/internal/reading/TODO b/object/store/packed/internal/reading/TODO new file mode 100644 index 00000000..f4a5f48e --- /dev/null +++ b/object/store/packed/internal/reading/TODO @@ -0,0 +1,3 @@ +* Per delta-plan memo map +* Internal handle/request context (might expose it externally later and add to global interface) +* Audit on mutex diff --git a/object/store/packed/internal/reading/close.go b/object/store/packed/internal/reading/close.go new file mode 100644 index 00000000..62c62025 --- /dev/null +++ b/object/store/packed/internal/reading/close.go @@ -0,0 +1,35 @@ +package reading + +// Close releases mapped pack/index resources associated with the store. +// +// Labels: MT-Unsafe. +func (store *Store) Close() error { + store.stateMu.Lock() + packs := store.packs + store.stateMu.Unlock() + store.idxMu.RLock() + indexes := store.idxByPack + store.idxMu.RUnlock() + + var closeErr error + + for _, pack := range packs { + err := pack.close() + if err != nil && closeErr == nil { + closeErr = err + } + } + + for _, index := range indexes { + err := index.close() + if err != nil && closeErr == nil { + closeErr = err + } + } + + store.cacheMu.Lock() + store.deltaCache.clear() + store.cacheMu.Unlock() + + return closeErr +} diff --git a/object/store/packed/internal/reading/delta_build_chain.go b/object/store/packed/internal/reading/delta_build_chain.go new file mode 100644 index 00000000..a0e3151d --- /dev/null +++ b/object/store/packed/internal/reading/delta_build_chain.go @@ -0,0 +1,65 @@ +package reading + +import ( + "fmt" + + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// deltaBuildChain walks one object's chain and builds a reconstruction chain. +func (store *Store) deltaBuildChain(start location) (deltaChain, error) { + visited := make(map[location]struct{}) + current := start + + var chain deltaChain + + for { + if _, ok := visited[current]; ok { + return deltaChain{}, fmt.Errorf("objectstore/packed: delta cycle while resolving object") + } + + visited[current] = struct{}{} + + _, meta, err := store.entryMetaAt(current) + if err != nil { + return deltaChain{}, err + } + + if meta.ty.IsBaseObject() { + chain.baseLoc = current + chain.baseType = meta.ty + + return chain, nil + } + + switch meta.ty { + case objecttype.TypeRefDelta: + chain.deltas = append(chain.deltas, deltaNode{ + loc: current, + dataOffset: meta.dataOffset, + }) + + next, err := store.lookup(meta.baseRefID) + if err != nil { + return deltaChain{}, err + } + + current = next + case objecttype.TypeOfsDelta: + chain.deltas = append(chain.deltas, deltaNode{ + loc: current, + dataOffset: meta.dataOffset, + }) + current = location{ + packName: current.packName, + offset: meta.baseOfs, + } + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + return deltaChain{}, fmt.Errorf("objectstore/packed: internal invariant violation for base type %d", meta.ty) + case objecttype.TypeInvalid, objecttype.TypeFuture: + return deltaChain{}, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + default: + return deltaChain{}, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + } + } +} diff --git a/object/store/packed/internal/reading/delta_cache.go b/object/store/packed/internal/reading/delta_cache.go new file mode 100644 index 00000000..4259eb81 --- /dev/null +++ b/object/store/packed/internal/reading/delta_cache.go @@ -0,0 +1,61 @@ +package reading + +import ( + "codeberg.org/lindenii/furgit/internal/lru" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +const defaultDeltaCacheMaxBytes = 32 << 20 + +// deltaBaseKey identifies one base object by pack location. +type deltaBaseKey struct { + packName string + offset uint64 +} + +// deltaBaseValue stores one cached base object body. +type deltaBaseValue struct { + ty objecttype.Type + content []byte +} + +// deltaCache wraps a weighted LRU for resolved delta bases. +type deltaCache struct { + lru *lru.Cache[deltaBaseKey, deltaBaseValue] +} + +// newDeltaCache creates a delta base cache with a byte budget. +func newDeltaCache(maxBytes int64) *deltaCache { + return &deltaCache{ + lru: lru.New( + maxBytes, + func(_ deltaBaseKey, value deltaBaseValue) int64 { + return int64(len(value.content)) + }, + nil, + ), + } +} + +// get returns a cloned cached base object value. +func (cache *deltaCache) get(key deltaBaseKey) (objecttype.Type, []byte, bool) { + value, ok := cache.lru.Get(key) + if !ok { + return objecttype.TypeInvalid, nil, false + } + + return value.ty, append([]byte(nil), value.content...), true +} + +// add stores a cloned base object value. +func (cache *deltaCache) add(key deltaBaseKey, ty objecttype.Type, content []byte) { + cache.lru.Add(key, deltaBaseValue{ + ty: ty, + content: append([]byte(nil), content...), + }) +} + +// clear removes all cached entries. +func (cache *deltaCache) clear() { + cache.lru.Clear() +} diff --git a/object/store/packed/internal/reading/delta_chain.go b/object/store/packed/internal/reading/delta_chain.go new file mode 100644 index 00000000..6e82873e --- /dev/null +++ b/object/store/packed/internal/reading/delta_chain.go @@ -0,0 +1,13 @@ +package reading + +import objecttype "codeberg.org/lindenii/furgit/object/type" + +// deltaChain describes how to reconstruct one requested object. +type deltaChain struct { + // baseLoc points to the innermost base object. + baseLoc location + // baseType is the canonical object type resolved from baseLoc. + baseType objecttype.Type + // deltas contains delta objects from target down toward base. + deltas []deltaNode +} diff --git a/object/store/packed/internal/reading/delta_node.go b/object/store/packed/internal/reading/delta_node.go new file mode 100644 index 00000000..56f7b078 --- /dev/null +++ b/object/store/packed/internal/reading/delta_node.go @@ -0,0 +1,9 @@ +package reading + +// deltaNode describes one delta object in a reconstruction chain. +type deltaNode struct { + // loc identifies the delta object's pack location. + loc location + // dataOffset points to the start of the delta zlib payload in pack. + dataOffset int +} diff --git a/object/store/packed/internal/reading/delta_resolve_chain.go b/object/store/packed/internal/reading/delta_resolve_chain.go new file mode 100644 index 00000000..ec9c39e2 --- /dev/null +++ b/object/store/packed/internal/reading/delta_resolve_chain.go @@ -0,0 +1,61 @@ +package reading + +import ( + "fmt" + + deltaapply "codeberg.org/lindenii/furgit/format/packfile/delta/apply" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// deltaResolveChain resolves one object chain into content bytes. +func (store *Store) deltaResolveChain(chain deltaChain, declaredSize int64) (objecttype.Type, []byte, error) { + ty, out, nextDelta, err := store.deltaResolveChainStart(chain) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + for i := nextDelta; i >= 0; i-- { + node := chain.deltas[i] + + pack, err := store.openPack(node.loc.packName) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + delta, err := inflateAt(pack, node.dataOffset, -1) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + out, err = deltaapply.Apply(out, delta) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + store.cacheMu.Lock() + store.deltaCache.add( + deltaBaseKey{packName: node.loc.packName, offset: node.loc.offset}, + ty, + out, + ) + store.cacheMu.Unlock() + } + + if int64(len(out)) != declaredSize { + return objecttype.TypeInvalid, nil, fmt.Errorf( + "objectstore/packed: resolved content size mismatch: got %d want %d", + len(out), + declaredSize, + ) + } + + if ty != chain.baseType { + return objecttype.TypeInvalid, nil, fmt.Errorf( + "objectstore/packed: resolved content type mismatch: got %d want %d", + ty, + chain.baseType, + ) + } + + return ty, out, nil +} diff --git a/object/store/packed/internal/reading/delta_resolve_chain_start.go b/object/store/packed/internal/reading/delta_resolve_chain_start.go new file mode 100644 index 00000000..17274027 --- /dev/null +++ b/object/store/packed/internal/reading/delta_resolve_chain_start.go @@ -0,0 +1,58 @@ +package reading + +import ( + "fmt" + + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// deltaResolveChainStart finds the nearest cached chain node or inflates the +// innermost base object. It returns the starting bytes and the next delta index +// to apply in reverse order. +func (store *Store) deltaResolveChainStart(chain deltaChain) (objecttype.Type, []byte, int, error) { + for i, node := range chain.deltas { + store.cacheMu.RLock() + ty, out, ok := store.deltaCache.get( + deltaBaseKey{packName: node.loc.packName, offset: node.loc.offset}, + ) + store.cacheMu.RUnlock() + + if ok { + return ty, out, i - 1, nil + } + } + + store.cacheMu.RLock() + ty, out, ok := store.deltaCache.get( + deltaBaseKey{packName: chain.baseLoc.packName, offset: chain.baseLoc.offset}, + ) + store.cacheMu.RUnlock() + + if ok { + return ty, out, len(chain.deltas) - 1, nil + } + + pack, meta, err := store.entryMetaAt(chain.baseLoc) + if err != nil { + return objecttype.TypeInvalid, nil, 0, err + } + + if !meta.ty.IsBaseObject() { + return objecttype.TypeInvalid, nil, 0, fmt.Errorf("objectstore/packed: delta chain base is not a base object") + } + + base, err := inflateAt(pack, meta.dataOffset, meta.size) + if err != nil { + return objecttype.TypeInvalid, nil, 0, err + } + + store.cacheMu.Lock() + store.deltaCache.add( + deltaBaseKey{packName: chain.baseLoc.packName, offset: chain.baseLoc.offset}, + meta.ty, + base, + ) + store.cacheMu.Unlock() + + return meta.ty, base, len(chain.deltas) - 1, nil +} diff --git a/object/store/packed/internal/reading/delta_resolve_content.go b/object/store/packed/internal/reading/delta_resolve_content.go new file mode 100644 index 00000000..71eb69cf --- /dev/null +++ b/object/store/packed/internal/reading/delta_resolve_content.go @@ -0,0 +1,26 @@ +package reading + +import objecttype "codeberg.org/lindenii/furgit/object/type" + +// deltaResolveContent resolves one object's content bytes from its pack location. +func (store *Store) deltaResolveContent(start location) (objecttype.Type, []byte, error) { + chain, err := store.deltaBuildChain(start) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + pack, meta, err := store.entryMetaAt(start) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + declaredSize := meta.size + if !meta.ty.IsBaseObject() { + declaredSize, err = deltaDeclaredSizeAt(pack, meta.dataOffset) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + } + + return store.deltaResolveChain(chain, declaredSize) +} diff --git a/object/store/packed/internal/reading/delta_size.go b/object/store/packed/internal/reading/delta_size.go new file mode 100644 index 00000000..8a85fad9 --- /dev/null +++ b/object/store/packed/internal/reading/delta_size.go @@ -0,0 +1,27 @@ +package reading + +import ( + "bufio" + + deltaapply "codeberg.org/lindenii/furgit/format/packfile/delta/apply" +) + +// deltaDeclaredSizeAt returns the resolved object size declared by one delta +// stream header at dataOffset. +func deltaDeclaredSizeAt(pack *packFile, dataOffset int) (int64, error) { + reader, err := zlibReaderAt(pack, dataOffset) + if err != nil { + return 0, err + } + + defer func() { _ = reader.Close() }() + + br := bufio.NewReaderSize(reader, 32) + + _, size, err := deltaapply.ReadHeaderSizes(br) + if err != nil { + return 0, err + } + + return int64(size), nil +} diff --git a/object/store/packed/internal/reading/doc.go b/object/store/packed/internal/reading/doc.go new file mode 100644 index 00000000..a513d3bd --- /dev/null +++ b/object/store/packed/internal/reading/doc.go @@ -0,0 +1,6 @@ +// Package reading implements the packed-store read path: pack and index +// discovery, lookup, caching, and object reads from existing packfiles. +// +// Obviously, this internal package is not meant to be used by anyone +// other than object/store/packed. +package reading diff --git a/object/store/packed/internal/reading/entry_inflate.go b/object/store/packed/internal/reading/entry_inflate.go new file mode 100644 index 00000000..82b2a7a8 --- /dev/null +++ b/object/store/packed/internal/reading/entry_inflate.go @@ -0,0 +1,64 @@ +package reading + +import ( + "bytes" + "fmt" + "io" + "math" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" + "codeberg.org/lindenii/furgit/internal/iolimit" +) + +// zlibReaderAt opens a zlib reader starting at data offset within pack. +func zlibReaderAt(pack *packFile, offset int) (io.ReadCloser, error) { + if offset < 0 || offset > len(pack.data) { + return nil, fmt.Errorf("objectstore/packed: pack %q zlib offset out of bounds", pack.name) + } + + return zlib.NewReader(bytes.NewReader(pack.data[offset:])) +} + +// inflateAt inflates one entry payload from data offset. +func inflateAt(pack *packFile, offset int, expectedSize int64) ([]byte, error) { + reader, err := zlibReaderAt(pack, offset) + if err != nil { + return nil, err + } + + defer func() { _ = reader.Close() }() + + if expectedSize >= 0 { + if expectedSize > int64(math.MaxInt) { + return nil, fmt.Errorf( + "objectstore/packed: pack %q expected inflated size overflows int: %d", + pack.name, + expectedSize, + ) + } + + reader := iolimit.ExpectLengthReader(reader, expectedSize) + body := make([]byte, int(expectedSize)) + + _, err := io.ReadFull(reader, body) + if err != nil { + return nil, err + } + + var probe [1]byte + + _, err = reader.Read(probe[:]) + if err != nil && err != io.EOF { + return nil, err + } + + return body, nil + } + + body, err := io.ReadAll(reader) + if err != nil { + return nil, err + } + + return body, nil +} diff --git a/object/store/packed/internal/reading/entry_meta.go b/object/store/packed/internal/reading/entry_meta.go new file mode 100644 index 00000000..336dc3b9 --- /dev/null +++ b/object/store/packed/internal/reading/entry_meta.go @@ -0,0 +1,16 @@ +package reading + +// entryMetaAt parses one pack entry header at location. +func (store *Store) entryMetaAt(loc location) (*packFile, entryMeta, error) { + pack, err := store.openPack(loc.packName) + if err != nil { + return nil, entryMeta{}, err + } + + meta, err := parseEntryMeta(pack, store.algo, loc.offset) + if err != nil { + return nil, entryMeta{}, err + } + + return pack, meta, nil +} diff --git a/object/store/packed/internal/reading/entry_parse.go b/object/store/packed/internal/reading/entry_parse.go new file mode 100644 index 00000000..ecbfb6cb --- /dev/null +++ b/object/store/packed/internal/reading/entry_parse.go @@ -0,0 +1,71 @@ +package reading + +import ( + "fmt" + + packfmt "codeberg.org/lindenii/furgit/format/packfile" + "codeberg.org/lindenii/furgit/internal/intconv" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// entryMeta describes one parsed pack entry header. +type entryMeta struct { + // ty is the pack entry type tag. + ty objecttype.Type + // size is the declared resulting content size. + size int64 + // dataOffset points to the zlib payload start. + dataOffset int + // baseRefID is set for ref-delta entries. + baseRefID objectid.ObjectID + // baseOfs is set for ofs-delta entries. + baseOfs uint64 +} + +// parseEntryMeta parses one pack entry header at offset. +func parseEntryMeta(pack *packFile, algo objectid.Algorithm, offset uint64) (entryMeta, error) { + var zero entryMeta + if offset >= uint64(len(pack.data)) { + return zero, fmt.Errorf("objectstore/packed: pack %q offset %d out of bounds", pack.name, offset) + } + + pos, err := intconv.Uint64ToInt(offset) + if err != nil { + return zero, fmt.Errorf("objectstore/packed: pack %q offset conversion: %w", pack.name, err) + } + + entry, err := packfmt.ParseEntry(pack.data[pos:], algo.Size()) + if err != nil { + return zero, fmt.Errorf("objectstore/packed: pack %q: %w", pack.name, err) + } + + meta := entryMeta{ + ty: entry.Type, + size: entry.Size, + dataOffset: pos + entry.DataOffset, + } + switch meta.ty { + case objecttype.TypeRefDelta: + baseID, err := objectid.FromBytes(algo, entry.RefBaseID) + if err != nil { + return zero, fmt.Errorf("objectstore/packed: pack %q invalid ref-delta base id: %w", pack.name, err) + } + + meta.baseRefID = baseID + case objecttype.TypeOfsDelta: + if offset <= entry.OfsBaseDistance { + return zero, fmt.Errorf("objectstore/packed: pack %q has invalid ofs-delta base", pack.name) + } + + meta.baseOfs = offset - entry.OfsBaseDistance + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + // Base object types do not have delta base metadata. + case objecttype.TypeInvalid, objecttype.TypeFuture: + return zero, fmt.Errorf("objectstore/packed: pack %q has unsupported entry type %d", pack.name, meta.ty) + default: + return zero, fmt.Errorf("objectstore/packed: pack %q has unsupported entry type %d", pack.name, meta.ty) + } + + return meta, nil +} diff --git a/object/store/packed/internal/reading/helpers_test.go b/object/store/packed/internal/reading/helpers_test.go new file mode 100644 index 00000000..5a37d2f1 --- /dev/null +++ b/object/store/packed/internal/reading/helpers_test.go @@ -0,0 +1,102 @@ +package reading_test + +import ( + "fmt" + "io" + "strconv" + "strings" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + "codeberg.org/lindenii/furgit/object/store/packed" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +func openPackedStore(t *testing.T, testRepo *testgit.TestRepo, algo objectid.Algorithm) *packed.Store { + t.Helper() + + root := testRepo.OpenPackRoot(t) + + store, err := packed.New(root, algo, packed.Options{}) + if err != nil { + t.Fatalf("packed.New: %v", err) + } + + return store +} + +func mustReadAllAndClose(t *testing.T, reader io.ReadCloser) []byte { + t.Helper() + + data, err := io.ReadAll(reader) + if err != nil { + _ = reader.Close() + + t.Fatalf("ReadAll: %v", err) + } + + err = reader.Close() + if err != nil { + t.Fatalf("Close: %v", err) + } + + return data +} + +func expectedRawObject(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) (objecttype.Type, []byte, []byte) { + t.Helper() + + typeName := testRepo.Run(t, "cat-file", "-t", id.String()) + + ty, ok := objecttype.Parse(typeName) + if !ok { + t.Fatalf("ParseName(%q) failed", typeName) + } + + body := testRepo.CatFile(t, typeName, id) + + header, ok := objectheader.Encode(ty, int64(len(body))) + if !ok { + t.Fatalf("objectheader.Encode failed") + } + + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + + return ty, body, raw +} + +func createPackedFixtureRepo(t *testing.T, algo objectid.Algorithm) (*testgit.TestRepo, []objectid.ObjectID) { + t.Helper() + + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + blobID, treeID, commitID := testRepo.MakeCommit(t, "packed store base commit") + testRepo.Run(t, "update-ref", "refs/heads/main", commitID.String()) + tagID := testRepo.TagAnnotated(t, "v1.0.0", commitID, "packed-store-tag") + + parent := commitID + + for i := range 24 { + content := "common-prefix\n" + strings.Repeat("line-"+strconv.Itoa(i%3)+"\n", 256) + fmt.Sprintf("tail-%d\n", i) + nextBlob, nextTree := testRepo.MakeSingleFileTree(t, fmt.Sprintf("file-%02d.txt", i), []byte(content)) + nextCommit := testRepo.CommitTree(t, nextTree, fmt.Sprintf("commit-%02d", i), parent) + testRepo.Run(t, "update-ref", "refs/heads/main", nextCommit.String()) + parent = nextCommit + + _ = nextBlob + _ = nextTree + } + + testRepo.Repack(t, "-a", "-d", "-f", "--window=64", "--depth=64") + + return testRepo, []objectid.ObjectID{ + blobID, + treeID, + commitID, + tagID, + parent, + } +} diff --git a/object/store/packed/internal/reading/idx.go b/object/store/packed/internal/reading/idx.go new file mode 100644 index 00000000..3c91e1a2 --- /dev/null +++ b/object/store/packed/internal/reading/idx.go @@ -0,0 +1,36 @@ +package reading + +import ( + "os" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// idxFile stores one mapped and validated idx v2 file. +type idxFile struct { + // idxName is the basename of this .idx file. + idxName string + // packName is the matching .pack basename. + packName string + // algo is the hash algorithm encoded by the index. + algo objectid.Algorithm + + // file is the opened index file descriptor. + file *os.File + // data is the mapped index bytes. + data []byte + + // fanout stores fanout table values. + fanout [256]uint32 + // numObjects equals fanout[255]. + numObjects int + + // namesOffset starts the sorted object-id table. + namesOffset int + // offset32Offset starts the 32-bit offset table. + offset32Offset int + // offset64Offset starts the 64-bit offset table. + offset64Offset int + // offset64Count is the number of 64-bit offset entries. + offset64Count int +} diff --git a/object/store/packed/internal/reading/idx_candidates_mru.go b/object/store/packed/internal/reading/idx_candidates_mru.go new file mode 100644 index 00000000..08ab6f85 --- /dev/null +++ b/object/store/packed/internal/reading/idx_candidates_mru.go @@ -0,0 +1,136 @@ +package reading + +// packCandidateNode is one node in the candidate MRU order list. +type packCandidateNode struct { + packName string + prev *packCandidateNode + next *packCandidateNode +} + +func (store *Store) reconcileMRU(candidates []packCandidate) { + store.mruMu.Lock() + defer store.mruMu.Unlock() + + if store.mruNodeByPack == nil { + store.mruNodeByPack = make(map[string]*packCandidateNode, len(candidates)) + } + + present := make(map[string]struct{}, len(candidates)) + for _, candidate := range candidates { + present[candidate.packName] = struct{}{} + } + + ordered := make([]string, 0, len(candidates)) + + for node := store.mruHead; node != nil; node = node.next { + if _, ok := present[node.packName]; !ok { + continue + } + + ordered = append(ordered, node.packName) + delete(present, node.packName) + } + + for _, candidate := range candidates { + if _, ok := present[candidate.packName]; !ok { + continue + } + + ordered = append(ordered, candidate.packName) + delete(present, candidate.packName) + } + + store.mruHead = nil + store.mruTail = nil + store.mruNodeByPack = make(map[string]*packCandidateNode, len(ordered)) + + for _, packName := range ordered { + node := &packCandidateNode{ + packName: packName, + prev: store.mruTail, + } + if store.mruTail != nil { + store.mruTail.next = node + } + + if store.mruHead == nil { + store.mruHead = node + } + + store.mruTail = node + store.mruNodeByPack[packName] = node + } +} + +// touchCandidate moves one candidate to the front of the lookup order. +// This is done on a best-effort basis. +func (store *Store) touchCandidate(packName string) { + if !store.mruMu.TryLock() { + return + } + defer store.mruMu.Unlock() + + node := store.mruNodeByPack[packName] + if node == nil || node == store.mruHead { + return + } + + if node.prev != nil { + node.prev.next = node.next + } + + if node.next != nil { + node.next.prev = node.prev + } + + if store.mruTail == node { + store.mruTail = node.prev + } + + node.prev = nil + + node.next = store.mruHead + if store.mruHead != nil { + store.mruHead.prev = node + } + + store.mruHead = node + if store.mruTail == nil { + store.mruTail = node + } +} + +// firstCandidatePackName returns the current head pack name, or "" when none +// are available. +func (store *Store) firstCandidatePackName(snapshot *candidateSnapshot) string { + store.mruMu.RLock() + defer store.mruMu.RUnlock() + + for node := store.mruHead; node != nil; node = node.next { + if _, ok := snapshot.candidateByPack[node.packName]; ok { + return node.packName + } + } + + return "" +} + +// nextCandidatePackName returns the pack name after currentPack in current MRU +// order, or "" at end / when currentPack is not present. +func (store *Store) nextCandidatePackName(currentPack string, snapshot *candidateSnapshot) string { + store.mruMu.RLock() + defer store.mruMu.RUnlock() + + node := store.mruNodeByPack[currentPack] + if node == nil { + return "" + } + + for node = node.next; node != nil; node = node.next { + if _, ok := snapshot.candidateByPack[node.packName]; ok { + return node.packName + } + } + + return "" +} diff --git a/object/store/packed/internal/reading/idx_close.go b/object/store/packed/internal/reading/idx_close.go new file mode 100644 index 00000000..1590854c --- /dev/null +++ b/object/store/packed/internal/reading/idx_close.go @@ -0,0 +1,28 @@ +package reading + +import "syscall" + +// close unmaps and closes one idx handle. +func (index *idxFile) close() error { + var closeErr error + + if index.data != nil { + err := syscall.Munmap(index.data) + if err != nil && closeErr == nil { + closeErr = err + } + + index.data = nil + } + + if index.file != nil { + err := index.file.Close() + if err != nil && closeErr == nil { + closeErr = err + } + + index.file = nil + } + + return closeErr +} diff --git a/object/store/packed/internal/reading/idx_lookup.go b/object/store/packed/internal/reading/idx_lookup.go new file mode 100644 index 00000000..bb02fb20 --- /dev/null +++ b/object/store/packed/internal/reading/idx_lookup.go @@ -0,0 +1,91 @@ +package reading + +import ( + "bytes" + "encoding/binary" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// lookup resolves one object ID to its pack offset within this index. +func (index *idxFile) lookup(id objectid.ObjectID) (uint64, bool, error) { + if id.Algorithm() != index.algo { + return 0, false, fmt.Errorf("objectstore/packed: object id algorithm mismatch") + } + + idBytes := (&id).RawBytes() + + hashSize := len(idBytes) + if hashSize != index.algo.Size() { + return 0, false, fmt.Errorf("objectstore/packed: unexpected object id length") + } + + first := int(idBytes[0]) + + lo := 0 + if first > 0 { + lo = int(index.fanout[first-1]) + } + + hi := int(index.fanout[first]) + if lo < 0 || hi < 0 || lo > hi || hi > index.numObjects { + return 0, false, fmt.Errorf("objectstore/packed: idx %q has invalid fanout bounds", index.idxName) + } + + for lo < hi { + mid := lo + (hi-lo)/2 + + nameOffset := index.namesOffset + mid*hashSize + if nameOffset < 0 || nameOffset+hashSize > len(index.data) { + return 0, false, fmt.Errorf("objectstore/packed: idx %q truncated name table", index.idxName) + } + + cmp := bytes.Compare(index.data[nameOffset:nameOffset+hashSize], idBytes) + if cmp == 0 { + offset, err := index.offsetAt(mid) + if err != nil { + return 0, false, err + } + + return offset, true, nil + } + + if cmp < 0 { + lo = mid + 1 + } else { + hi = mid + } + } + + return 0, false, nil +} + +// offsetAt resolves the pack offset for one object index entry. +func (index *idxFile) offsetAt(objectIndex int) (uint64, error) { + if objectIndex < 0 || objectIndex >= index.numObjects { + return 0, fmt.Errorf("objectstore/packed: idx %q offset index out of bounds", index.idxName) + } + + wordOffset := index.offset32Offset + objectIndex*4 + if wordOffset < 0 || wordOffset+4 > len(index.data) { + return 0, fmt.Errorf("objectstore/packed: idx %q truncated 32-bit offset table", index.idxName) + } + + word := binary.BigEndian.Uint32(index.data[wordOffset : wordOffset+4]) + if word&0x80000000 == 0 { + return uint64(word), nil + } + + pos := int(word & 0x7fffffff) + if pos < 0 || pos >= index.offset64Count { + return 0, fmt.Errorf("objectstore/packed: idx %q invalid 64-bit offset position", index.idxName) + } + + offOffset := index.offset64Offset + pos*8 + if offOffset < 0 || offOffset+8 > len(index.data)-2*index.algo.Size() { + return 0, fmt.Errorf("objectstore/packed: idx %q truncated 64-bit offset table", index.idxName) + } + + return binary.BigEndian.Uint64(index.data[offOffset : offOffset+8]), nil +} diff --git a/object/store/packed/internal/reading/idx_lookup_candidates.go b/object/store/packed/internal/reading/idx_lookup_candidates.go new file mode 100644 index 00000000..c89ada7a --- /dev/null +++ b/object/store/packed/internal/reading/idx_lookup_candidates.go @@ -0,0 +1,126 @@ +package reading + +import ( + "fmt" + "os" + "slices" + "strings" +) + +// packCandidate describes one discovered pack/index pair. +type packCandidate struct { + // packName is the .pack basename. + packName string + // idxName is the .idx basename. + idxName string + // mtime is the pack file modification time for initial ordering. + mtime int64 +} + +type candidateSnapshot struct { + candidates []packCandidate + candidateByPack map[string]packCandidate +} + +// Refresh rescans objects/pack and atomically installs a fresh candidate list +// for future lookups. +// +// Refresh does not invalidate existing readers. Cached pack/index mappings, +// including ones for previously visible candidates, may be retained until +// Close. +func (store *Store) Refresh() error { + store.refreshMu.Lock() + defer store.refreshMu.Unlock() + + candidates, err := store.discoverCandidates() + if err != nil { + return err + } + + candidateByPack := make(map[string]packCandidate, len(candidates)) + for _, candidate := range candidates { + candidateByPack[candidate.packName] = candidate + } + + store.reconcileMRU(candidates) + + store.candidates.Store(&candidateSnapshot{ + candidates: candidates, + candidateByPack: candidateByPack, + }) + + return nil +} + +func (store *Store) ensureCandidates() (*candidateSnapshot, error) { + snapshot := store.candidates.Load() + if snapshot != nil { + return snapshot, nil + } + + err := store.Refresh() + if err != nil { + return nil, err + } + + return store.candidates.Load(), nil +} + +// discoverCandidates scans the objects/pack root and returns sorted pack/index +// pairs. +func (store *Store) discoverCandidates() ([]packCandidate, error) { + dir, err := store.root.Open(".") + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + + return nil, err + } + + defer func() { _ = dir.Close() }() + + entries, err := dir.ReadDir(-1) + if err != nil { + return nil, err + } + + candidates := make([]packCandidate, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { + continue + } + + idxName := entry.Name() + packName := strings.TrimSuffix(idxName, ".idx") + ".pack" + + packInfo, err := store.root.Stat(packName) + if err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("objectstore/packed: missing pack file for index %q", idxName) + } + + return nil, err + } + + candidates = append(candidates, packCandidate{ + packName: packName, + idxName: idxName, + mtime: packInfo.ModTime().UnixNano(), + }) + } + + slices.SortFunc(candidates, func(a, b packCandidate) int { + if a.mtime != b.mtime { + if a.mtime > b.mtime { + return -1 + } + + return 1 + } + + return strings.Compare(a.packName, b.packName) + }) + + return candidates, nil +} diff --git a/object/store/packed/internal/reading/idx_open.go b/object/store/packed/internal/reading/idx_open.go new file mode 100644 index 00000000..8f73c867 --- /dev/null +++ b/object/store/packed/internal/reading/idx_open.go @@ -0,0 +1,98 @@ +package reading + +import ( + "fmt" + "os" + "syscall" + + "codeberg.org/lindenii/furgit/internal/intconv" + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// openIndex returns one opened and parsed index, caching it by pack basename. +func (store *Store) openIndex(candidate packCandidate) (*idxFile, error) { + store.idxMu.RLock() + + index, ok := store.idxByPack[candidate.packName] + if ok { + store.idxMu.RUnlock() + + return index, nil + } + + store.idxMu.RUnlock() + + index, err := openIdxFile(store.root, candidate.idxName, candidate.packName, store.algo) + if err != nil { + return nil, err + } + + store.idxMu.Lock() + + existing, ok := store.idxByPack[candidate.packName] + if ok { + store.idxMu.Unlock() + + _ = index.close() + + return existing, nil + } + + store.idxByPack[candidate.packName] = index + store.idxMu.Unlock() + + return index, nil +} + +// openIdxFile maps and validates one idx v2 file. +func openIdxFile(root *os.Root, idxName, packName string, algo objectid.Algorithm) (*idxFile, error) { + file, err := root.Open(idxName) + if err != nil { + return nil, err + } + + info, err := file.Stat() + if err != nil { + _ = file.Close() + + return nil, err + } + + size := info.Size() + if size < 0 || size > int64(int(^uint(0)>>1)) { + _ = file.Close() + + return nil, fmt.Errorf("objectstore/packed: idx %q has unsupported size", idxName) + } + + fd, err := intconv.UintptrToInt(file.Fd()) + if err != nil { + _ = file.Close() + + return nil, err + } + + data, err := syscall.Mmap(fd, 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE) + if err != nil { + _ = file.Close() + + return nil, err + } + + index := &idxFile{ + idxName: idxName, + packName: packName, + algo: algo, + file: file, + data: data, + } + + err = index.parse() + if err != nil { + _ = index.close() + + return nil, err + } + + return index, nil +} diff --git a/object/store/packed/internal/reading/idx_parse.go b/object/store/packed/internal/reading/idx_parse.go new file mode 100644 index 00000000..d38aaf4d --- /dev/null +++ b/object/store/packed/internal/reading/idx_parse.go @@ -0,0 +1,78 @@ +package reading + +import ( + "encoding/binary" + "fmt" +) + +const ( + idxMagicV2 = 0xff744f63 + idxVersionV2 = 2 +) + +// parse validates mapped idx v2 structure and stores table boundaries. +func (index *idxFile) parse() error { + hashSize := index.algo.Size() + if hashSize <= 0 { + return fmt.Errorf("objectstore/packed: idx %q has invalid hash algorithm", index.idxName) + } + + minLen := 8 + 256*4 + 2*hashSize + if len(index.data) < minLen { + return fmt.Errorf("objectstore/packed: idx %q too short", index.idxName) + } + + if binary.BigEndian.Uint32(index.data[:4]) != idxMagicV2 { + return fmt.Errorf("objectstore/packed: idx %q invalid magic", index.idxName) + } + + if binary.BigEndian.Uint32(index.data[4:8]) != idxVersionV2 { + return fmt.Errorf("objectstore/packed: idx %q unsupported version", index.idxName) + } + + prev := uint32(0) + + for i := range 256 { + base := 8 + i*4 + + cur := binary.BigEndian.Uint32(index.data[base : base+4]) + if cur < prev { + return fmt.Errorf("objectstore/packed: idx %q has non-monotonic fanout table", index.idxName) + } + + index.fanout[i] = cur + prev = cur + } + + index.numObjects = int(index.fanout[255]) + if index.numObjects < 0 { + return fmt.Errorf("objectstore/packed: idx %q has invalid object count", index.idxName) + } + + namesBytes := index.numObjects * hashSize + crcBytes := index.numObjects * 4 + offset32Bytes := index.numObjects * 4 + + minSize := 8 + 256*4 + namesBytes + crcBytes + offset32Bytes + 2*hashSize + if minSize < 0 || len(index.data) < minSize { + return fmt.Errorf("objectstore/packed: idx %q has truncated tables", index.idxName) + } + + index.namesOffset = 8 + 256*4 + index.offset32Offset = index.namesOffset + namesBytes + crcBytes + index.offset64Offset = index.offset32Offset + offset32Bytes + + offset64Bytes := len(index.data) - index.offset64Offset - 2*hashSize + if offset64Bytes < 0 || offset64Bytes%8 != 0 { + return fmt.Errorf("objectstore/packed: idx %q has malformed 64-bit offset table", index.idxName) + } + + index.offset64Count = offset64Bytes / 8 + + maxOffset64Count := max(index.numObjects-1, 0) + if index.offset64Count > maxOffset64Count { + return fmt.Errorf("objectstore/packed: idx %q has oversized 64-bit offset table", index.idxName) + } + + return nil +} diff --git a/object/store/packed/internal/reading/location.go b/object/store/packed/internal/reading/location.go new file mode 100644 index 00000000..f315dd1d --- /dev/null +++ b/object/store/packed/internal/reading/location.go @@ -0,0 +1,7 @@ +package reading + +// location identifies one object entry in a specific pack file. +type location struct { + packName string + offset uint64 +} diff --git a/object/store/packed/internal/reading/new.go b/object/store/packed/internal/reading/new.go new file mode 100644 index 00000000..d8a12db3 --- /dev/null +++ b/object/store/packed/internal/reading/new.go @@ -0,0 +1,33 @@ +package reading + +import ( + "fmt" + "os" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// New creates a packed-object store rooted at an objects/pack directory. +// +// Labels: Deps-Borrowed, Life-Parent. +func New(root *os.Root, algo objectid.Algorithm, opts Options) (*Store, error) { + if algo.Size() == 0 { + return nil, objectid.ErrInvalidAlgorithm + } + + switch opts.RefreshPolicy { + case RefreshPolicyOnMissing, RefreshPolicyNever: + default: + return nil, fmt.Errorf("objectstore/packed: invalid refresh policy %d", opts.RefreshPolicy) + } + + return &Store{ + root: root, + algo: algo, + refreshPolicy: opts.RefreshPolicy, + mruNodeByPack: make(map[string]*packCandidateNode), + idxByPack: make(map[string]*idxFile), + packs: make(map[string]*packFile), + deltaCache: newDeltaCache(defaultDeltaCacheMaxBytes), + }, nil +} diff --git a/object/store/packed/internal/reading/options.go b/object/store/packed/internal/reading/options.go new file mode 100644 index 00000000..0c5b76af --- /dev/null +++ b/object/store/packed/internal/reading/options.go @@ -0,0 +1,16 @@ +package reading + +// RefreshPolicy configures when candidate pack/index discovery refreshes. +type RefreshPolicy uint8 + +const ( + // RefreshPolicyOnMissing refreshes candidates once after a lookup miss. + RefreshPolicyOnMissing RefreshPolicy = iota + // RefreshPolicyNever disables automatic refresh after lookup misses. + RefreshPolicyNever +) + +// Options configures a packed object store. +type Options struct { + RefreshPolicy RefreshPolicy +} diff --git a/object/store/packed/internal/reading/pack.go b/object/store/packed/internal/reading/pack.go new file mode 100644 index 00000000..431ed5f9 --- /dev/null +++ b/object/store/packed/internal/reading/pack.go @@ -0,0 +1,82 @@ +package reading + +import ( + "encoding/binary" + "fmt" + "os" + "syscall" + + packfmt "codeberg.org/lindenii/furgit/format/packfile" + "codeberg.org/lindenii/furgit/internal/intconv" +) + +// packFile stores one mapped and validated .pack file. +type packFile struct { + // name is the .pack basename. + name string + // file is the opened pack file descriptor. + file *os.File + // data is the mapped pack bytes. + data []byte +} + +// openPackFile maps and validates one pack file. +func openPackFile(name string, file *os.File, size int64) (*packFile, error) { + if size < 12 { + return nil, fmt.Errorf("objectstore/packed: pack %q too short", name) + } + + if size > int64(int(^uint(0)>>1)) { + return nil, fmt.Errorf("objectstore/packed: pack %q has unsupported size", name) + } + + fd, err := intconv.UintptrToInt(file.Fd()) + if err != nil { + return nil, err + } + + data, err := syscall.Mmap(fd, 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE) + if err != nil { + return nil, err + } + + if binary.BigEndian.Uint32(data[:4]) != packfmt.Signature { + _ = syscall.Munmap(data) + + return nil, fmt.Errorf("objectstore/packed: pack %q invalid signature", name) + } + + version := binary.BigEndian.Uint32(data[4:8]) + if !packfmt.SupportedVersion(version) { + _ = syscall.Munmap(data) + + return nil, fmt.Errorf("objectstore/packed: pack %q unsupported version %d", name, version) + } + + return &packFile{name: name, file: file, data: data}, nil +} + +// close unmaps and closes one pack handle. +func (pack *packFile) close() error { + var closeErr error + + if pack.data != nil { + err := syscall.Munmap(pack.data) + if err != nil && closeErr == nil { + closeErr = err + } + + pack.data = nil + } + + if pack.file != nil { + err := pack.file.Close() + if err != nil && closeErr == nil { + closeErr = err + } + + pack.file = nil + } + + return closeErr +} diff --git a/object/store/packed/internal/reading/pack_idx_checksum.go b/object/store/packed/internal/reading/pack_idx_checksum.go new file mode 100644 index 00000000..b2ad09f1 --- /dev/null +++ b/object/store/packed/internal/reading/pack_idx_checksum.go @@ -0,0 +1,34 @@ +package reading + +import ( + "bytes" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// verifyMappedPackMatchesMappedIdx compares one mapped pack trailer hash with +// the pack hash recorded in one mapped idx trailer. +func verifyMappedPackMatchesMappedIdx(packData, idxData []byte, algo objectid.Algorithm) error { + hashSize := algo.Size() + if hashSize <= 0 { + return objectid.ErrInvalidAlgorithm + } + + if len(packData) < hashSize { + return fmt.Errorf("objectstore/packed: pack too short for trailer hash") + } + + if len(idxData) < hashSize*2 { + return fmt.Errorf("objectstore/packed: idx too short for trailer hashes") + } + + packTrailerHash := packData[len(packData)-hashSize:] + + idxPackHash := idxData[len(idxData)-hashSize*2 : len(idxData)-hashSize] + if !bytes.Equal(packTrailerHash, idxPackHash) { + return fmt.Errorf("objectstore/packed: pack hash does not match idx") + } + + return nil +} diff --git a/object/store/packed/internal/reading/read_bytes.go b/object/store/packed/internal/reading/read_bytes.go new file mode 100644 index 00000000..f0821687 --- /dev/null +++ b/object/store/packed/internal/reading/read_bytes.go @@ -0,0 +1,46 @@ +package reading + +import ( + "fmt" + + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadBytesContent reads an object's type and content bytes. +// +// It fully resolves the requested object bytes. For base pack entries, this +// includes verifying that the zlib stream inflates to exactly the declared +// object size and verifying the Adler-32 trailer. +func (store *Store) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { + loc, err := store.lookup(id) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + return store.deltaResolveContent(loc) +} + +// ReadBytesFull reads a full serialized object as "type size\0content". +// +// Like ReadBytesContent, it fully resolves the requested object bytes. For +// base pack entries, this includes verifying that the zlib stream inflates to +// exactly the declared object size and verifying the Adler-32 trailer. +func (store *Store) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { + ty, content, err := store.ReadBytesContent(id) + if err != nil { + return nil, err + } + + header, ok := objectheader.Encode(ty, int64(len(content))) + if !ok { + return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", ty) + } + + out := make([]byte, len(header)+len(content)) + copy(out, header) + copy(out[len(header):], content) + + return out, nil +} diff --git a/object/store/packed/internal/reading/read_closer.go b/object/store/packed/internal/reading/read_closer.go new file mode 100644 index 00000000..4ef4c039 --- /dev/null +++ b/object/store/packed/internal/reading/read_closer.go @@ -0,0 +1,19 @@ +package reading + +import "io" + +// readCloser proxies reads and closes one underlying closer. +type readCloser struct { + reader io.Reader + closer io.Closer +} + +// Read proxies reads to the underlying reader. +func (reader *readCloser) Read(dst []byte) (int, error) { + return reader.reader.Read(dst) +} + +// Close closes the underlying closer. +func (reader *readCloser) Close() error { + return reader.closer.Close() +} diff --git a/object/store/packed/internal/reading/read_header.go b/object/store/packed/internal/reading/read_header.go new file mode 100644 index 00000000..d627a6b3 --- /dev/null +++ b/object/store/packed/internal/reading/read_header.go @@ -0,0 +1,20 @@ +package reading + +import ( + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadHeader reads an object's type and declared content size. +// +// It resolves header metadata only. It does not verify that the full pack entry +// payload is readable and does not verify any zlib Adler-32 trailer for +// compressed entry data. +func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { + loc, err := store.lookup(id) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + return store.resolveHeaderAt(loc) +} diff --git a/object/store/packed/internal/reading/read_header_resolve.go b/object/store/packed/internal/reading/read_header_resolve.go new file mode 100644 index 00000000..a2916b73 --- /dev/null +++ b/object/store/packed/internal/reading/read_header_resolve.go @@ -0,0 +1,65 @@ +package reading + +import ( + "fmt" + + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// resolveHeaderAt resolves one object's canonical type and declared content size. +func (store *Store) resolveHeaderAt(start location) (objecttype.Type, int64, error) { + visited := make(map[location]struct{}) + current := start + declaredSize := int64(-1) + + for { + if _, ok := visited[current]; ok { + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: delta cycle while resolving object header") + } + + visited[current] = struct{}{} + + pack, meta, err := store.entryMetaAt(current) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + if declaredSize < 0 { + if meta.ty.IsBaseObject() { + declaredSize = meta.size + } else { + size, err := deltaDeclaredSizeAt(pack, meta.dataOffset) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + declaredSize = size + } + } + + if meta.ty.IsBaseObject() { + return meta.ty, declaredSize, nil + } + + switch meta.ty { + case objecttype.TypeRefDelta: + next, err := store.lookup(meta.baseRefID) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + current = next + case objecttype.TypeOfsDelta: + current = location{ + packName: current.packName, + offset: meta.baseOfs, + } + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: internal invariant violation for base type %d", meta.ty) + case objecttype.TypeInvalid, objecttype.TypeFuture: + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + default: + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + } + } +} diff --git a/object/store/packed/internal/reading/read_reader.go b/object/store/packed/internal/reading/read_reader.go new file mode 100644 index 00000000..3fa0f592 --- /dev/null +++ b/object/store/packed/internal/reading/read_reader.go @@ -0,0 +1,92 @@ +package reading + +import ( + "bytes" + "fmt" + "io" + + "codeberg.org/lindenii/furgit/internal/iolimit" + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadReaderContent reads an object's type, declared content size, and content +// stream. +// +// Close releases reader-local resources only. It does not drain unread data for +// additional validation. In particular, malformed trailing compressed data, +// trailing bytes past the declared object size, and the zlib Adler-32 trailer +// may go unverified unless the caller reads to io.EOF. +func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { + loc, err := store.lookup(id) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + pack, meta, err := store.entryMetaAt(loc) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + if meta.ty.IsBaseObject() { + zr, err := zlibReaderAt(pack, meta.dataOffset) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + return meta.ty, meta.size, &readCloser{ + reader: iolimit.ExpectLengthReader(zr, meta.size), + closer: zr, + }, nil + } + + ty, content, err := store.deltaResolveContent(loc) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + return ty, int64(len(content)), io.NopCloser(bytes.NewReader(content)), nil +} + +// ReadReaderFull reads a full serialized object stream as "type size\0content". +// +// Close releases reader-local resources only. It does not drain unread data for +// additional validation. In particular, malformed trailing compressed data, +// trailing bytes past the declared object size, and the zlib Adler-32 trailer +// may go unverified unless the caller reads to io.EOF. +func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { + loc, err := store.lookup(id) + if err != nil { + return nil, err + } + + pack, meta, err := store.entryMetaAt(loc) + if err != nil { + return nil, err + } + + if meta.ty.IsBaseObject() { + header, ok := objectheader.Encode(meta.ty, meta.size) + if !ok { + return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", meta.ty) + } + + zr, err := zlibReaderAt(pack, meta.dataOffset) + if err != nil { + return nil, err + } + + return &readCloser{ + reader: io.MultiReader(bytes.NewReader(header), iolimit.ExpectLengthReader(zr, meta.size)), + closer: zr, + }, nil + } + + raw, err := store.ReadBytesFull(id) + if err != nil { + return nil, err + } + + return io.NopCloser(bytes.NewReader(raw)), nil +} diff --git a/object/store/packed/internal/reading/read_size.go b/object/store/packed/internal/reading/read_size.go new file mode 100644 index 00000000..3c1e05b1 --- /dev/null +++ b/object/store/packed/internal/reading/read_size.go @@ -0,0 +1,45 @@ +package reading + +import ( + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadSize reads an object's declared content size. +// +// Like ReadHeader, it resolves header metadata only. It does not verify that +// the full pack entry payload is readable and does not verify any zlib +// Adler-32 trailer for compressed entry data. +func (store *Store) ReadSize(id objectid.ObjectID) (int64, error) { + loc, err := store.lookup(id) + if err != nil { + return 0, err + } + + return store.resolveSizeAt(loc) +} + +// resolveSizeAt resolves one object's declared content size from location. +func (store *Store) resolveSizeAt(start location) (int64, error) { + pack, meta, err := store.entryMetaAt(start) + if err != nil { + return 0, err + } + + if meta.ty.IsBaseObject() { + return meta.size, nil + } + + switch meta.ty { + case objecttype.TypeRefDelta, objecttype.TypeOfsDelta: + return deltaDeclaredSizeAt(pack, meta.dataOffset) + case objecttype.TypeInvalid, objecttype.TypeFuture: + return 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + return 0, fmt.Errorf("objectstore/packed: internal invariant violation for base type %d", meta.ty) + default: + return 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + } +} diff --git a/object/store/packed/internal/reading/read_test.go b/object/store/packed/internal/reading/read_test.go new file mode 100644 index 00000000..8a92b603 --- /dev/null +++ b/object/store/packed/internal/reading/read_test.go @@ -0,0 +1,301 @@ +package reading_test + +import ( + "bytes" + "errors" + "fmt" + "io/fs" + "strconv" + "strings" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + "codeberg.org/lindenii/furgit/object/store/packed" +) + +func TestPackedStoreReadAgainstGit(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo, ids := createPackedFixtureRepo(t, algo) + store := openPackedStore(t, testRepo, algo) + + for _, id := range ids { + t.Run(id.String(), func(t *testing.T) { + wantType, wantBody, wantRaw := expectedRawObject(t, testRepo, id) + + gotHeaderType, gotHeaderSize, err := store.ReadHeader(id) + if err != nil { + t.Fatalf("ReadHeader: %v", err) + } + + if gotHeaderType != wantType { + t.Fatalf("ReadHeader type = %v, want %v", gotHeaderType, wantType) + } + + if gotHeaderSize != int64(len(wantBody)) { + t.Fatalf("ReadHeader size = %d, want %d", gotHeaderSize, len(wantBody)) + } + + gotSize, err := store.ReadSize(id) + if err != nil { + t.Fatalf("ReadSize: %v", err) + } + + if gotSize != int64(len(wantBody)) { + t.Fatalf("ReadSize = %d, want %d", gotSize, len(wantBody)) + } + + gotRaw, err := store.ReadBytesFull(id) + if err != nil { + t.Fatalf("ReadBytesFull: %v", err) + } + + if !bytes.Equal(gotRaw, wantRaw) { + t.Fatalf("ReadBytesFull mismatch") + } + + gotType, gotBody, err := store.ReadBytesContent(id) + if err != nil { + t.Fatalf("ReadBytesContent: %v", err) + } + + if gotType != wantType { + t.Fatalf("ReadBytesContent type = %v, want %v", gotType, wantType) + } + + if !bytes.Equal(gotBody, wantBody) { + t.Fatalf("ReadBytesContent mismatch") + } + + fullReader, err := store.ReadReaderFull(id) + if err != nil { + t.Fatalf("ReadReaderFull: %v", err) + } + + got := mustReadAllAndClose(t, fullReader) + if !bytes.Equal(got, wantRaw) { + t.Fatalf("ReadReaderFull mismatch") + } + + contentType, contentSize, contentReader, err := store.ReadReaderContent(id) + if err != nil { + t.Fatalf("ReadReaderContent: %v", err) + } + + if contentType != wantType { + t.Fatalf("ReadReaderContent type = %v, want %v", contentType, wantType) + } + + if contentSize != int64(len(wantBody)) { + t.Fatalf("ReadReaderContent size = %d, want %d", contentSize, len(wantBody)) + } + + got = mustReadAllAndClose(t, contentReader) + if !bytes.Equal(got, wantBody) { + t.Fatalf("ReadReaderContent mismatch") + } + }) + } + }) +} + +func TestPackedStoreErrors(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo, _ := createPackedFixtureRepo(t, algo) + store := openPackedStore(t, testRepo, algo) + + notFoundID, err := objectid.ParseHex(algo, strings.Repeat("0", algo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(notFound): %v", err) + } + + _, err = store.ReadBytesFull(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesFull not-found error = %v", err) + } + + _, _, err = store.ReadBytesContent(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesContent not-found error = %v", err) + } + + _, err = store.ReadReaderFull(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderFull not-found error = %v", err) + } + + _, _, _, err = store.ReadReaderContent(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderContent not-found error = %v", err) + } + + _, _, err = store.ReadHeader(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadHeader not-found error = %v", err) + } + + _, err = store.ReadSize(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadSize not-found error = %v", err) + } + + var otherAlgo objectid.Algorithm + + for _, candidate := range objectid.SupportedAlgorithms() { + if candidate != algo { + otherAlgo = candidate + + break + } + } + + if otherAlgo != objectid.AlgorithmUnknown { + mismatchID, err := objectid.ParseHex(otherAlgo, strings.Repeat("0", otherAlgo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(mismatch): %v", err) + } + + _, err = store.ReadBytesFull(mismatchID) + if err == nil || !strings.Contains(err.Error(), "algorithm mismatch") { + t.Fatalf("ReadBytesFull algorithm-mismatch error = %v", err) + } + } + }) +} + +func TestPackedStoreNewValidation(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo, _ := createPackedFixtureRepo(t, algo) + + store := openPackedStore(t, testRepo, algo) + + err := store.Close() + if err != nil { + t.Fatalf("Close: %v", err) + } + }) +} + +func TestPackedStoreInvalidAlgorithm(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectid.AlgorithmSHA1, Bare: true}) + + root := testRepo.OpenPackRoot(t) + + _, err := packed.New(root, objectid.AlgorithmUnknown, packed.Options{}) + if !errors.Is(err, objectid.ErrInvalidAlgorithm) { + t.Fatalf("packed.New invalid algorithm error = %v", err) + } +} + +func TestPackedStoreReadHeaderUsesResolvedObjectSizeForDelta(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + + var parent objectid.ObjectID + + for i := range 96 { + content := strings.Repeat("common-line-"+strconv.Itoa(i%7)+"\n", 384) + fmt.Sprintf("tail-%03d\n", i) + + _, treeID := testRepo.MakeSingleFileTree(t, "file.txt", []byte(content)) + if i == 0 { + parent = testRepo.CommitTree(t, treeID, "delta-header-size-0") + + continue + } + + parent = testRepo.CommitTree(t, treeID, fmt.Sprintf("delta-header-size-%03d", i), parent) + } + + testRepo.UpdateRef(t, "refs/heads/main", parent) + testRepo.Repack(t, "-a", "-d", "-f", "--window=128", "--depth=128") + + deltaID, wantResolvedSize := findDeltaObjectWithResolvedSizeMismatch(t, testRepo, algo) + store := openPackedStore(t, testRepo, algo) + + _, gotSize, err := store.ReadHeader(deltaID) + if err != nil { + t.Fatalf("ReadHeader(%s): %v", deltaID, err) + } + + if gotSize != wantResolvedSize { + t.Fatalf("ReadHeader(%s) size = %d, want resolved size %d", deltaID, gotSize, wantResolvedSize) + } + + gotReadSize, err := store.ReadSize(deltaID) + if err != nil { + t.Fatalf("ReadSize(%s): %v", deltaID, err) + } + + if gotReadSize != wantResolvedSize { + t.Fatalf("ReadSize(%s) = %d, want resolved size %d", deltaID, gotReadSize, wantResolvedSize) + } + }) +} + +func findDeltaObjectWithResolvedSizeMismatch(t *testing.T, testRepo *testgit.TestRepo, algo objectid.Algorithm) (objectid.ObjectID, int64) { + t.Helper() + + packRoot := testRepo.OpenPackRoot(t) + + entries, err := fs.ReadDir(packRoot.FS(), ".") + if err != nil { + t.Fatalf("ReadDir(pack): %v", err) + } + + var idxName string + + for _, entry := range entries { + if strings.HasSuffix(entry.Name(), ".idx") { + idxName = entry.Name() + + break + } + } + + if idxName == "" { + t.Fatalf("no idx files found") + } + + verifyOut := testRepo.Run(t, "verify-pack", "-v", "objects/pack/"+idxName) + for line := range strings.SplitSeq(strings.TrimSpace(verifyOut), "\n") { + fields := strings.Fields(line) + if len(fields) < 7 { + continue + } + + idHex := fields[0] + + deltaStreamSize, err := strconv.ParseInt(fields[2], 10, 64) + if err != nil { + continue + } + + resolvedSizeStr := testRepo.Run(t, "cat-file", "-s", idHex) + + resolvedSize, err := strconv.ParseInt(strings.TrimSpace(resolvedSizeStr), 10, 64) + if err != nil { + t.Fatalf("parse cat-file size for %s: %v", idHex, err) + } + + if deltaStreamSize == resolvedSize { + continue + } + + id, err := objectid.ParseHex(algo, idHex) + if err != nil { + t.Fatalf("ParseHex(%s): %v", idHex, err) + } + + return id, resolvedSize + } + + t.Fatalf("did not find a delta object with mismatched stream/resolved size") + + return objectid.ObjectID{}, 0 +} diff --git a/object/store/packed/internal/reading/store.go b/object/store/packed/internal/reading/store.go new file mode 100644 index 00000000..cb4829ab --- /dev/null +++ b/object/store/packed/internal/reading/store.go @@ -0,0 +1,52 @@ +package reading + +import ( + "os" + "sync" + "sync/atomic" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" +) + +// Store reads Git objects from pack/index files under an objects/pack root. +// +// Cached pack/index mappings are retained until Close. +// +// Labels: Close-Caller. +type Store struct { + // root is the borrowed objects/pack capability used for all file access. + root *os.Root + // algo is the expected object ID algorithm for lookups. + algo objectid.Algorithm + // refreshPolicy controls automatic candidate refresh on lookup misses. + refreshPolicy RefreshPolicy + + // candidates stores the latest immutable candidate snapshot. + candidates atomic.Pointer[candidateSnapshot] + // refreshMu serializes candidate refresh. + refreshMu sync.Mutex + // mruMu guards candidate MRU linked-list state. + mruMu sync.RWMutex + // mruHead is the first pack in MRU order. + mruHead *packCandidateNode + // mruTail is the last pack in MRU order. + mruTail *packCandidateNode + // mruNodeByPack maps pack basename to MRU node. + mruNodeByPack map[string]*packCandidateNode + // idxByPack caches opened and parsed indexes by pack basename. + idxByPack map[string]*idxFile + + // stateMu guards pack cache and close state. + stateMu sync.RWMutex + // idxMu guards parsed index cache. + idxMu sync.RWMutex + // cacheMu guards delta cache operations. + cacheMu sync.RWMutex + // packs caches opened .pack handles by basename. + packs map[string]*packFile + // deltaCache caches resolved base objects by pack location. + deltaCache *deltaCache +} + +var _ objectstore.Reader = (*Store)(nil) diff --git a/object/store/packed/internal/reading/store_lookup.go b/object/store/packed/internal/reading/store_lookup.go new file mode 100644 index 00000000..9d863113 --- /dev/null +++ b/object/store/packed/internal/reading/store_lookup.go @@ -0,0 +1,106 @@ +package reading + +import ( + "errors" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" +) + +// lookup resolves one object ID to its pack location. +func (store *Store) lookup(id objectid.ObjectID) (location, error) { + var zero location + if id.Algorithm() != store.algo { + return zero, errors.New("objectstore/packed: object id algorithm mismatch") + } + + snapshot, err := store.ensureCandidates() + if err != nil { + return zero, err + } + + loc, ok, err := store.lookupInCandidates(id, snapshot) + if err != nil { + return zero, err + } + + if ok { + return loc, nil + } + + if store.refreshPolicy == RefreshPolicyOnMissing { //nolint:nestif + err = store.Refresh() + if err != nil { + return zero, err + } + + refreshed := store.candidates.Load() + if refreshed != nil && refreshed != snapshot { + loc, ok, err = store.lookupInCandidates(id, refreshed) + if err != nil { + return zero, err + } + + if ok { + return loc, nil + } + } + } + + return zero, objectstore.ErrObjectNotFound +} + +func (store *Store) lookupInCandidates( + id objectid.ObjectID, + snapshot *candidateSnapshot, +) (location, bool, error) { + var zero location + + nextPackName := store.firstCandidatePackName(snapshot) + for nextPackName != "" { + candidate, ok := snapshot.candidateByPack[nextPackName] + if !ok { + nextPackName = store.firstCandidatePackName(snapshot) + + continue + } + + nextPackName = store.nextCandidatePackName(candidate.packName, snapshot) + + index, err := store.openIndex(candidate) + if err != nil { + return zero, false, err + } + + offset, ok, err := index.lookup(id) + if err != nil { + return zero, false, err + } + + if ok { + store.touchCandidate(candidate.packName) + + return location{packName: index.packName, offset: offset}, true, nil + } + } + + for _, candidate := range snapshot.candidates { + index, err := store.openIndex(candidate) + if err != nil { + return zero, false, err + } + + offset, ok, err := index.lookup(id) + if err != nil { + return zero, false, err + } + + if ok { + store.touchCandidate(candidate.packName) + + return location{packName: index.packName, offset: offset}, true, nil + } + } + + return zero, false, nil +} diff --git a/object/store/packed/internal/reading/store_open_pack.go b/object/store/packed/internal/reading/store_open_pack.go new file mode 100644 index 00000000..35cb960a --- /dev/null +++ b/object/store/packed/internal/reading/store_open_pack.go @@ -0,0 +1,57 @@ +package reading + +// openPack returns one opened and validated pack handle. +func (store *Store) openPack(name string) (*packFile, error) { + store.stateMu.RLock() + + pack, ok := store.packs[name] + if ok { + store.stateMu.RUnlock() + + return pack, nil + } + + store.stateMu.RUnlock() + + file, err := store.root.Open(name) + if err != nil { + return nil, err + } + + info, err := file.Stat() + if err != nil { + _ = file.Close() + + return nil, err + } + + pack, err = openPackFile(name, file, info.Size()) + if err != nil { + _ = file.Close() + + return nil, err + } + + err = store.verifyPackMatchesIndexes(pack) + if err != nil { + _ = pack.close() + + return nil, err + } + + store.stateMu.Lock() + + existing, ok := store.packs[name] + if ok { + store.stateMu.Unlock() + + _ = pack.close() + + return existing, nil + } + + store.packs[name] = pack + store.stateMu.Unlock() + + return pack, nil +} diff --git a/object/store/packed/internal/reading/trailer_match.go b/object/store/packed/internal/reading/trailer_match.go new file mode 100644 index 00000000..8c7500b9 --- /dev/null +++ b/object/store/packed/internal/reading/trailer_match.go @@ -0,0 +1,29 @@ +package reading + +import "fmt" + +// verifyPackMatchesIndexes checks that one opened pack's trailer hash matches +// every loaded index that references the same pack name. +func (store *Store) verifyPackMatchesIndexes(pack *packFile) error { + snapshot, err := store.ensureCandidates() + if err != nil { + return err + } + + candidate, ok := snapshot.candidateByPack[pack.name] + if !ok { + return fmt.Errorf("objectstore/packed: missing index for pack %q", pack.name) + } + + index, err := store.openIndex(candidate) + if err != nil { + return err + } + + err = verifyMappedPackMatchesMappedIdx(pack.data, index.data, store.algo) + if err != nil { + return fmt.Errorf("objectstore/packed: pack %q does not match idx %q: %w", pack.name, index.idxName, err) + } + + return nil +} diff --git a/object/store/packed/location.go b/object/store/packed/location.go deleted file mode 100644 index 82d17c17..00000000 --- a/object/store/packed/location.go +++ /dev/null @@ -1,7 +0,0 @@ -package packed - -// location identifies one object entry in a specific pack file. -type location struct { - packName string - offset uint64 -} diff --git a/object/store/packed/new.go b/object/store/packed/new.go index 96339f3d..efcdd602 100644 --- a/object/store/packed/new.go +++ b/object/store/packed/new.go @@ -1,33 +1,20 @@ package packed import ( - "fmt" "os" objectid "codeberg.org/lindenii/furgit/object/id" + "codeberg.org/lindenii/furgit/object/store/packed/internal/reading" ) // New creates a packed-object store rooted at an objects/pack directory. // // Labels: Deps-Borrowed, Life-Parent. func New(root *os.Root, algo objectid.Algorithm, opts Options) (*Store, error) { - if algo.Size() == 0 { - return nil, objectid.ErrInvalidAlgorithm + reader, err := reading.New(root, algo, opts.toReadingOptions()) + if err != nil { + return nil, err } - switch opts.RefreshPolicy { - case RefreshPolicyOnMissing, RefreshPolicyNever: - default: - return nil, fmt.Errorf("objectstore/packed: invalid refresh policy %d", opts.RefreshPolicy) - } - - return &Store{ - root: root, - algo: algo, - refreshPolicy: opts.RefreshPolicy, - mruNodeByPack: make(map[string]*packCandidateNode), - idxByPack: make(map[string]*idxFile), - packs: make(map[string]*packFile), - deltaCache: newDeltaCache(defaultDeltaCacheMaxBytes), - }, nil + return &Store{reader: reader}, nil } diff --git a/object/store/packed/options.go b/object/store/packed/options.go index 05cbee30..72c153a1 100644 --- a/object/store/packed/options.go +++ b/object/store/packed/options.go @@ -1,15 +1,5 @@ package packed -// RefreshPolicy configures when candidate pack/index discovery refreshes. -type RefreshPolicy uint8 - -const ( - // RefreshPolicyOnMissing refreshes candidates once after a lookup miss. - RefreshPolicyOnMissing RefreshPolicy = iota - // RefreshPolicyNever disables automatic refresh after lookup misses. - RefreshPolicyNever -) - // Options configures a packed object store. type Options struct { RefreshPolicy RefreshPolicy diff --git a/object/store/packed/options_refresh.go b/object/store/packed/options_refresh.go new file mode 100644 index 00000000..ee3d5f2e --- /dev/null +++ b/object/store/packed/options_refresh.go @@ -0,0 +1,11 @@ +package packed + +// RefreshPolicy configures when candidate pack/index discovery refreshes. +type RefreshPolicy uint8 + +const ( + // RefreshPolicyOnMissing refreshes candidates once after a lookup miss. + RefreshPolicyOnMissing RefreshPolicy = iota + // RefreshPolicyNever disables automatic refresh after lookup misses. + RefreshPolicyNever +) diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go deleted file mode 100644 index 928ced70..00000000 --- a/object/store/packed/pack.go +++ /dev/null @@ -1,82 +0,0 @@ -package packed - -import ( - "encoding/binary" - "fmt" - "os" - "syscall" - - packfmt "codeberg.org/lindenii/furgit/format/packfile" - "codeberg.org/lindenii/furgit/internal/intconv" -) - -// packFile stores one mapped and validated .pack file. -type packFile struct { - // name is the .pack basename. - name string - // file is the opened pack file descriptor. - file *os.File - // data is the mapped pack bytes. - data []byte -} - -// openPackFile maps and validates one pack file. -func openPackFile(name string, file *os.File, size int64) (*packFile, error) { - if size < 12 { - return nil, fmt.Errorf("objectstore/packed: pack %q too short", name) - } - - if size > int64(int(^uint(0)>>1)) { - return nil, fmt.Errorf("objectstore/packed: pack %q has unsupported size", name) - } - - fd, err := intconv.UintptrToInt(file.Fd()) - if err != nil { - return nil, err - } - - data, err := syscall.Mmap(fd, 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE) - if err != nil { - return nil, err - } - - if binary.BigEndian.Uint32(data[:4]) != packfmt.Signature { - _ = syscall.Munmap(data) - - return nil, fmt.Errorf("objectstore/packed: pack %q invalid signature", name) - } - - version := binary.BigEndian.Uint32(data[4:8]) - if !packfmt.SupportedVersion(version) { - _ = syscall.Munmap(data) - - return nil, fmt.Errorf("objectstore/packed: pack %q unsupported version %d", name, version) - } - - return &packFile{name: name, file: file, data: data}, nil -} - -// close unmaps and closes one pack handle. -func (pack *packFile) close() error { - var closeErr error - - if pack.data != nil { - err := syscall.Munmap(pack.data) - if err != nil && closeErr == nil { - closeErr = err - } - - pack.data = nil - } - - if pack.file != nil { - err := pack.file.Close() - if err != nil && closeErr == nil { - closeErr = err - } - - pack.file = nil - } - - return closeErr -} diff --git a/object/store/packed/pack_idx_checksum.go b/object/store/packed/pack_idx_checksum.go deleted file mode 100644 index 28d4c3db..00000000 --- a/object/store/packed/pack_idx_checksum.go +++ /dev/null @@ -1,34 +0,0 @@ -package packed - -import ( - "bytes" - "fmt" - - objectid "codeberg.org/lindenii/furgit/object/id" -) - -// verifyMappedPackMatchesMappedIdx compares one mapped pack trailer hash with -// the pack hash recorded in one mapped idx trailer. -func verifyMappedPackMatchesMappedIdx(packData, idxData []byte, algo objectid.Algorithm) error { - hashSize := algo.Size() - if hashSize <= 0 { - return objectid.ErrInvalidAlgorithm - } - - if len(packData) < hashSize { - return fmt.Errorf("objectstore/packed: pack too short for trailer hash") - } - - if len(idxData) < hashSize*2 { - return fmt.Errorf("objectstore/packed: idx too short for trailer hashes") - } - - packTrailerHash := packData[len(packData)-hashSize:] - - idxPackHash := idxData[len(idxData)-hashSize*2 : len(idxData)-hashSize] - if !bytes.Equal(packTrailerHash, idxPackHash) { - return fmt.Errorf("objectstore/packed: pack hash does not match idx") - } - - return nil -} diff --git a/object/store/packed/read_bytes.go b/object/store/packed/read_bytes.go deleted file mode 100644 index 222d9a05..00000000 --- a/object/store/packed/read_bytes.go +++ /dev/null @@ -1,46 +0,0 @@ -package packed - -import ( - "fmt" - - objectheader "codeberg.org/lindenii/furgit/object/header" - objectid "codeberg.org/lindenii/furgit/object/id" - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -// ReadBytesContent reads an object's type and content bytes. -// -// It fully resolves the requested object bytes. For base pack entries, this -// includes verifying that the zlib stream inflates to exactly the declared -// object size and verifying the Adler-32 trailer. -func (store *Store) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { - loc, err := store.lookup(id) - if err != nil { - return objecttype.TypeInvalid, nil, err - } - - return store.deltaResolveContent(loc) -} - -// ReadBytesFull reads a full serialized object as "type size\0content". -// -// Like ReadBytesContent, it fully resolves the requested object bytes. For -// base pack entries, this includes verifying that the zlib stream inflates to -// exactly the declared object size and verifying the Adler-32 trailer. -func (store *Store) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { - ty, content, err := store.ReadBytesContent(id) - if err != nil { - return nil, err - } - - header, ok := objectheader.Encode(ty, int64(len(content))) - if !ok { - return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", ty) - } - - out := make([]byte, len(header)+len(content)) - copy(out, header) - copy(out[len(header):], content) - - return out, nil -} diff --git a/object/store/packed/read_closer.go b/object/store/packed/read_closer.go deleted file mode 100644 index c317d002..00000000 --- a/object/store/packed/read_closer.go +++ /dev/null @@ -1,19 +0,0 @@ -package packed - -import "io" - -// readCloser proxies reads and closes one underlying closer. -type readCloser struct { - reader io.Reader - closer io.Closer -} - -// Read proxies reads to the underlying reader. -func (reader *readCloser) Read(dst []byte) (int, error) { - return reader.reader.Read(dst) -} - -// Close closes the underlying closer. -func (reader *readCloser) Close() error { - return reader.closer.Close() -} diff --git a/object/store/packed/read_header.go b/object/store/packed/read_header.go deleted file mode 100644 index d774de7c..00000000 --- a/object/store/packed/read_header.go +++ /dev/null @@ -1,20 +0,0 @@ -package packed - -import ( - objectid "codeberg.org/lindenii/furgit/object/id" - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -// ReadHeader reads an object's type and declared content size. -// -// It resolves header metadata only. It does not verify that the full pack entry -// payload is readable and does not verify any zlib Adler-32 trailer for -// compressed entry data. -func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { - loc, err := store.lookup(id) - if err != nil { - return objecttype.TypeInvalid, 0, err - } - - return store.resolveHeaderAt(loc) -} diff --git a/object/store/packed/read_header_resolve.go b/object/store/packed/read_header_resolve.go deleted file mode 100644 index 849cfbc7..00000000 --- a/object/store/packed/read_header_resolve.go +++ /dev/null @@ -1,65 +0,0 @@ -package packed - -import ( - "fmt" - - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -// resolveHeaderAt resolves one object's canonical type and declared content size. -func (store *Store) resolveHeaderAt(start location) (objecttype.Type, int64, error) { - visited := make(map[location]struct{}) - current := start - declaredSize := int64(-1) - - for { - if _, ok := visited[current]; ok { - return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: delta cycle while resolving object header") - } - - visited[current] = struct{}{} - - pack, meta, err := store.entryMetaAt(current) - if err != nil { - return objecttype.TypeInvalid, 0, err - } - - if declaredSize < 0 { - if meta.ty.IsBaseObject() { - declaredSize = meta.size - } else { - size, err := deltaDeclaredSizeAt(pack, meta.dataOffset) - if err != nil { - return objecttype.TypeInvalid, 0, err - } - - declaredSize = size - } - } - - if meta.ty.IsBaseObject() { - return meta.ty, declaredSize, nil - } - - switch meta.ty { - case objecttype.TypeRefDelta: - next, err := store.lookup(meta.baseRefID) - if err != nil { - return objecttype.TypeInvalid, 0, err - } - - current = next - case objecttype.TypeOfsDelta: - current = location{ - packName: current.packName, - offset: meta.baseOfs, - } - case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: - return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: internal invariant violation for base type %d", meta.ty) - case objecttype.TypeInvalid, objecttype.TypeFuture: - return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) - default: - return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) - } - } -} diff --git a/object/store/packed/read_reader.go b/object/store/packed/read_reader.go deleted file mode 100644 index 8539e0bf..00000000 --- a/object/store/packed/read_reader.go +++ /dev/null @@ -1,92 +0,0 @@ -package packed - -import ( - "bytes" - "fmt" - "io" - - "codeberg.org/lindenii/furgit/internal/iolimit" - objectheader "codeberg.org/lindenii/furgit/object/header" - objectid "codeberg.org/lindenii/furgit/object/id" - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -// ReadReaderContent reads an object's type, declared content size, and content -// stream. -// -// Close releases reader-local resources only. It does not drain unread data for -// additional validation. In particular, malformed trailing compressed data, -// trailing bytes past the declared object size, and the zlib Adler-32 trailer -// may go unverified unless the caller reads to io.EOF. -func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { - loc, err := store.lookup(id) - if err != nil { - return objecttype.TypeInvalid, 0, nil, err - } - - pack, meta, err := store.entryMetaAt(loc) - if err != nil { - return objecttype.TypeInvalid, 0, nil, err - } - - if meta.ty.IsBaseObject() { - zr, err := zlibReaderAt(pack, meta.dataOffset) - if err != nil { - return objecttype.TypeInvalid, 0, nil, err - } - - return meta.ty, meta.size, &readCloser{ - reader: iolimit.ExpectLengthReader(zr, meta.size), - closer: zr, - }, nil - } - - ty, content, err := store.deltaResolveContent(loc) - if err != nil { - return objecttype.TypeInvalid, 0, nil, err - } - - return ty, int64(len(content)), io.NopCloser(bytes.NewReader(content)), nil -} - -// ReadReaderFull reads a full serialized object stream as "type size\0content". -// -// Close releases reader-local resources only. It does not drain unread data for -// additional validation. In particular, malformed trailing compressed data, -// trailing bytes past the declared object size, and the zlib Adler-32 trailer -// may go unverified unless the caller reads to io.EOF. -func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { - loc, err := store.lookup(id) - if err != nil { - return nil, err - } - - pack, meta, err := store.entryMetaAt(loc) - if err != nil { - return nil, err - } - - if meta.ty.IsBaseObject() { - header, ok := objectheader.Encode(meta.ty, meta.size) - if !ok { - return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", meta.ty) - } - - zr, err := zlibReaderAt(pack, meta.dataOffset) - if err != nil { - return nil, err - } - - return &readCloser{ - reader: io.MultiReader(bytes.NewReader(header), iolimit.ExpectLengthReader(zr, meta.size)), - closer: zr, - }, nil - } - - raw, err := store.ReadBytesFull(id) - if err != nil { - return nil, err - } - - return io.NopCloser(bytes.NewReader(raw)), nil -} diff --git a/object/store/packed/read_size.go b/object/store/packed/read_size.go deleted file mode 100644 index ffec8b13..00000000 --- a/object/store/packed/read_size.go +++ /dev/null @@ -1,45 +0,0 @@ -package packed - -import ( - "fmt" - - objectid "codeberg.org/lindenii/furgit/object/id" - objecttype "codeberg.org/lindenii/furgit/object/type" -) - -// ReadSize reads an object's declared content size. -// -// Like ReadHeader, it resolves header metadata only. It does not verify that -// the full pack entry payload is readable and does not verify any zlib -// Adler-32 trailer for compressed entry data. -func (store *Store) ReadSize(id objectid.ObjectID) (int64, error) { - loc, err := store.lookup(id) - if err != nil { - return 0, err - } - - return store.resolveSizeAt(loc) -} - -// resolveSizeAt resolves one object's declared content size from location. -func (store *Store) resolveSizeAt(start location) (int64, error) { - pack, meta, err := store.entryMetaAt(start) - if err != nil { - return 0, err - } - - if meta.ty.IsBaseObject() { - return meta.size, nil - } - - switch meta.ty { - case objecttype.TypeRefDelta, objecttype.TypeOfsDelta: - return deltaDeclaredSizeAt(pack, meta.dataOffset) - case objecttype.TypeInvalid, objecttype.TypeFuture: - return 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) - case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: - return 0, fmt.Errorf("objectstore/packed: internal invariant violation for base type %d", meta.ty) - default: - return 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) - } -} diff --git a/object/store/packed/read_test.go b/object/store/packed/read_test.go deleted file mode 100644 index 45ee8b01..00000000 --- a/object/store/packed/read_test.go +++ /dev/null @@ -1,301 +0,0 @@ -package packed_test - -import ( - "bytes" - "errors" - "fmt" - "io/fs" - "strconv" - "strings" - "testing" - - "codeberg.org/lindenii/furgit/internal/testgit" - objectid "codeberg.org/lindenii/furgit/object/id" - objectstore "codeberg.org/lindenii/furgit/object/store" - "codeberg.org/lindenii/furgit/object/store/packed" -) - -func TestPackedStoreReadAgainstGit(t *testing.T) { - t.Parallel() - testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper - testRepo, ids := createPackedFixtureRepo(t, algo) - store := openPackedStore(t, testRepo, algo) - - for _, id := range ids { - t.Run(id.String(), func(t *testing.T) { - wantType, wantBody, wantRaw := expectedRawObject(t, testRepo, id) - - gotHeaderType, gotHeaderSize, err := store.ReadHeader(id) - if err != nil { - t.Fatalf("ReadHeader: %v", err) - } - - if gotHeaderType != wantType { - t.Fatalf("ReadHeader type = %v, want %v", gotHeaderType, wantType) - } - - if gotHeaderSize != int64(len(wantBody)) { - t.Fatalf("ReadHeader size = %d, want %d", gotHeaderSize, len(wantBody)) - } - - gotSize, err := store.ReadSize(id) - if err != nil { - t.Fatalf("ReadSize: %v", err) - } - - if gotSize != int64(len(wantBody)) { - t.Fatalf("ReadSize = %d, want %d", gotSize, len(wantBody)) - } - - gotRaw, err := store.ReadBytesFull(id) - if err != nil { - t.Fatalf("ReadBytesFull: %v", err) - } - - if !bytes.Equal(gotRaw, wantRaw) { - t.Fatalf("ReadBytesFull mismatch") - } - - gotType, gotBody, err := store.ReadBytesContent(id) - if err != nil { - t.Fatalf("ReadBytesContent: %v", err) - } - - if gotType != wantType { - t.Fatalf("ReadBytesContent type = %v, want %v", gotType, wantType) - } - - if !bytes.Equal(gotBody, wantBody) { - t.Fatalf("ReadBytesContent mismatch") - } - - fullReader, err := store.ReadReaderFull(id) - if err != nil { - t.Fatalf("ReadReaderFull: %v", err) - } - - got := mustReadAllAndClose(t, fullReader) - if !bytes.Equal(got, wantRaw) { - t.Fatalf("ReadReaderFull mismatch") - } - - contentType, contentSize, contentReader, err := store.ReadReaderContent(id) - if err != nil { - t.Fatalf("ReadReaderContent: %v", err) - } - - if contentType != wantType { - t.Fatalf("ReadReaderContent type = %v, want %v", contentType, wantType) - } - - if contentSize != int64(len(wantBody)) { - t.Fatalf("ReadReaderContent size = %d, want %d", contentSize, len(wantBody)) - } - - got = mustReadAllAndClose(t, contentReader) - if !bytes.Equal(got, wantBody) { - t.Fatalf("ReadReaderContent mismatch") - } - }) - } - }) -} - -func TestPackedStoreErrors(t *testing.T) { - t.Parallel() - testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper - testRepo, _ := createPackedFixtureRepo(t, algo) - store := openPackedStore(t, testRepo, algo) - - notFoundID, err := objectid.ParseHex(algo, strings.Repeat("0", algo.HexLen())) - if err != nil { - t.Fatalf("ParseHex(notFound): %v", err) - } - - _, err = store.ReadBytesFull(notFoundID) - if !errors.Is(err, objectstore.ErrObjectNotFound) { - t.Fatalf("ReadBytesFull not-found error = %v", err) - } - - _, _, err = store.ReadBytesContent(notFoundID) - if !errors.Is(err, objectstore.ErrObjectNotFound) { - t.Fatalf("ReadBytesContent not-found error = %v", err) - } - - _, err = store.ReadReaderFull(notFoundID) - if !errors.Is(err, objectstore.ErrObjectNotFound) { - t.Fatalf("ReadReaderFull not-found error = %v", err) - } - - _, _, _, err = store.ReadReaderContent(notFoundID) - if !errors.Is(err, objectstore.ErrObjectNotFound) { - t.Fatalf("ReadReaderContent not-found error = %v", err) - } - - _, _, err = store.ReadHeader(notFoundID) - if !errors.Is(err, objectstore.ErrObjectNotFound) { - t.Fatalf("ReadHeader not-found error = %v", err) - } - - _, err = store.ReadSize(notFoundID) - if !errors.Is(err, objectstore.ErrObjectNotFound) { - t.Fatalf("ReadSize not-found error = %v", err) - } - - var otherAlgo objectid.Algorithm - - for _, candidate := range objectid.SupportedAlgorithms() { - if candidate != algo { - otherAlgo = candidate - - break - } - } - - if otherAlgo != objectid.AlgorithmUnknown { - mismatchID, err := objectid.ParseHex(otherAlgo, strings.Repeat("0", otherAlgo.HexLen())) - if err != nil { - t.Fatalf("ParseHex(mismatch): %v", err) - } - - _, err = store.ReadBytesFull(mismatchID) - if err == nil || !strings.Contains(err.Error(), "algorithm mismatch") { - t.Fatalf("ReadBytesFull algorithm-mismatch error = %v", err) - } - } - }) -} - -func TestPackedStoreNewValidation(t *testing.T) { - t.Parallel() - testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper - testRepo, _ := createPackedFixtureRepo(t, algo) - - store := openPackedStore(t, testRepo, algo) - - err := store.Close() - if err != nil { - t.Fatalf("Close: %v", err) - } - }) -} - -func TestPackedStoreInvalidAlgorithm(t *testing.T) { - t.Parallel() - testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectid.AlgorithmSHA1, Bare: true}) - - root := testRepo.OpenPackRoot(t) - - _, err := packed.New(root, objectid.AlgorithmUnknown, packed.Options{}) - if !errors.Is(err, objectid.ErrInvalidAlgorithm) { - t.Fatalf("packed.New invalid algorithm error = %v", err) - } -} - -func TestPackedStoreReadHeaderUsesResolvedObjectSizeForDelta(t *testing.T) { - t.Parallel() - testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper - testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) - - var parent objectid.ObjectID - - for i := range 96 { - content := strings.Repeat("common-line-"+strconv.Itoa(i%7)+"\n", 384) + fmt.Sprintf("tail-%03d\n", i) - - _, treeID := testRepo.MakeSingleFileTree(t, "file.txt", []byte(content)) - if i == 0 { - parent = testRepo.CommitTree(t, treeID, "delta-header-size-0") - - continue - } - - parent = testRepo.CommitTree(t, treeID, fmt.Sprintf("delta-header-size-%03d", i), parent) - } - - testRepo.UpdateRef(t, "refs/heads/main", parent) - testRepo.Repack(t, "-a", "-d", "-f", "--window=128", "--depth=128") - - deltaID, wantResolvedSize := findDeltaObjectWithResolvedSizeMismatch(t, testRepo, algo) - store := openPackedStore(t, testRepo, algo) - - _, gotSize, err := store.ReadHeader(deltaID) - if err != nil { - t.Fatalf("ReadHeader(%s): %v", deltaID, err) - } - - if gotSize != wantResolvedSize { - t.Fatalf("ReadHeader(%s) size = %d, want resolved size %d", deltaID, gotSize, wantResolvedSize) - } - - gotReadSize, err := store.ReadSize(deltaID) - if err != nil { - t.Fatalf("ReadSize(%s): %v", deltaID, err) - } - - if gotReadSize != wantResolvedSize { - t.Fatalf("ReadSize(%s) = %d, want resolved size %d", deltaID, gotReadSize, wantResolvedSize) - } - }) -} - -func findDeltaObjectWithResolvedSizeMismatch(t *testing.T, testRepo *testgit.TestRepo, algo objectid.Algorithm) (objectid.ObjectID, int64) { - t.Helper() - - packRoot := testRepo.OpenPackRoot(t) - - entries, err := fs.ReadDir(packRoot.FS(), ".") - if err != nil { - t.Fatalf("ReadDir(pack): %v", err) - } - - var idxName string - - for _, entry := range entries { - if strings.HasSuffix(entry.Name(), ".idx") { - idxName = entry.Name() - - break - } - } - - if idxName == "" { - t.Fatalf("no idx files found") - } - - verifyOut := testRepo.Run(t, "verify-pack", "-v", "objects/pack/"+idxName) - for line := range strings.SplitSeq(strings.TrimSpace(verifyOut), "\n") { - fields := strings.Fields(line) - if len(fields) < 7 { - continue - } - - idHex := fields[0] - - deltaStreamSize, err := strconv.ParseInt(fields[2], 10, 64) - if err != nil { - continue - } - - resolvedSizeStr := testRepo.Run(t, "cat-file", "-s", idHex) - - resolvedSize, err := strconv.ParseInt(strings.TrimSpace(resolvedSizeStr), 10, 64) - if err != nil { - t.Fatalf("parse cat-file size for %s: %v", idHex, err) - } - - if deltaStreamSize == resolvedSize { - continue - } - - id, err := objectid.ParseHex(algo, idHex) - if err != nil { - t.Fatalf("ParseHex(%s): %v", idHex, err) - } - - return id, resolvedSize - } - - t.Fatalf("did not find a delta object with mismatched stream/resolved size") - - return objectid.ObjectID{}, 0 -} diff --git a/object/store/packed/reader.go b/object/store/packed/reader.go new file mode 100644 index 00000000..45b9e8d9 --- /dev/null +++ b/object/store/packed/reader.go @@ -0,0 +1,65 @@ +package packed + +import ( + "io" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + "codeberg.org/lindenii/furgit/object/store/packed/internal/reading" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +var _ objectstore.Reader = (*Store)(nil) + +// ReadBytesFull reads a full serialized object as "type size\0content". +func (store *Store) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { + return store.reader.ReadBytesFull(id) +} + +// ReadBytesContent reads an object's type and content bytes. +func (store *Store) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { + return store.reader.ReadBytesContent(id) +} + +// ReadReaderFull reads a full serialized object stream as "type size\0content". +func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { + return store.reader.ReadReaderFull(id) +} + +// ReadReaderContent reads an object's type, declared content length, and +// content stream. +func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { + return store.reader.ReadReaderContent(id) +} + +// ReadSize reads an object's declared content length. +func (store *Store) ReadSize(id objectid.ObjectID) (int64, error) { + return store.reader.ReadSize(id) +} + +// ReadHeader reads an object's type and declared content length. +func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { + return store.reader.ReadHeader(id) +} + +// Refresh updates the packed-store view of on-disk pack/index candidates. +func (store *Store) Refresh() error { + return store.reader.Refresh() +} + +func (opts Options) toReadingOptions() reading.Options { + var refreshPolicy reading.RefreshPolicy + + switch opts.RefreshPolicy { + case RefreshPolicyOnMissing: + refreshPolicy = reading.RefreshPolicyOnMissing + case RefreshPolicyNever: + refreshPolicy = reading.RefreshPolicyNever + default: + refreshPolicy = reading.RefreshPolicy(opts.RefreshPolicy) + } + + return reading.Options{ + RefreshPolicy: refreshPolicy, + } +} diff --git a/object/store/packed/store.go b/object/store/packed/store.go index 233b3fec..321e0c2c 100644 --- a/object/store/packed/store.go +++ b/object/store/packed/store.go @@ -1,53 +1,17 @@ -// Package packed provides packfile reading and associated indexes. package packed import ( - "os" - "sync" - "sync/atomic" - - objectid "codeberg.org/lindenii/furgit/object/id" - objectstore "codeberg.org/lindenii/furgit/object/store" + "codeberg.org/lindenii/furgit/object/store/packed/internal/reading" ) // Store reads Git objects from pack/index files under an objects/pack root. // -// Cached pack/index mappings are retained until Close. -// // Labels: Close-Caller. type Store struct { - // root is the borrowed objects/pack capability used for all file access. - root *os.Root - // algo is the expected object ID algorithm for lookups. - algo objectid.Algorithm - // refreshPolicy controls automatic candidate refresh on lookup misses. - refreshPolicy RefreshPolicy - - // candidates stores the latest immutable candidate snapshot. - candidates atomic.Pointer[candidateSnapshot] - // refreshMu serializes candidate refresh. - refreshMu sync.Mutex - // mruMu guards candidate MRU linked-list state. - mruMu sync.RWMutex - // mruHead is the first pack in MRU order. - mruHead *packCandidateNode - // mruTail is the last pack in MRU order. - mruTail *packCandidateNode - // mruNodeByPack maps pack basename to MRU node. - mruNodeByPack map[string]*packCandidateNode - // idxByPack caches opened and parsed indexes by pack basename. - idxByPack map[string]*idxFile - - // stateMu guards pack cache and close state. - stateMu sync.RWMutex - // idxMu guards parsed index cache. - idxMu sync.RWMutex - // cacheMu guards delta cache operations. - cacheMu sync.RWMutex - // packs caches opened .pack handles by basename. - packs map[string]*packFile - // deltaCache caches resolved base objects by pack location. - deltaCache *deltaCache + reader *reading.Store } -var _ objectstore.Reader = (*Store)(nil) +// Close releases mapped pack/index resources associated with the store. +func (store *Store) Close() error { + return store.reader.Close() +} diff --git a/object/store/packed/store_lookup.go b/object/store/packed/store_lookup.go deleted file mode 100644 index 0513caa7..00000000 --- a/object/store/packed/store_lookup.go +++ /dev/null @@ -1,106 +0,0 @@ -package packed - -import ( - "errors" - - objectid "codeberg.org/lindenii/furgit/object/id" - objectstore "codeberg.org/lindenii/furgit/object/store" -) - -// lookup resolves one object ID to its pack location. -func (store *Store) lookup(id objectid.ObjectID) (location, error) { - var zero location - if id.Algorithm() != store.algo { - return zero, errors.New("objectstore/packed: object id algorithm mismatch") - } - - snapshot, err := store.ensureCandidates() - if err != nil { - return zero, err - } - - loc, ok, err := store.lookupInCandidates(id, snapshot) - if err != nil { - return zero, err - } - - if ok { - return loc, nil - } - - if store.refreshPolicy == RefreshPolicyOnMissing { //nolint:nestif - err = store.Refresh() - if err != nil { - return zero, err - } - - refreshed := store.candidates.Load() - if refreshed != nil && refreshed != snapshot { - loc, ok, err = store.lookupInCandidates(id, refreshed) - if err != nil { - return zero, err - } - - if ok { - return loc, nil - } - } - } - - return zero, objectstore.ErrObjectNotFound -} - -func (store *Store) lookupInCandidates( - id objectid.ObjectID, - snapshot *candidateSnapshot, -) (location, bool, error) { - var zero location - - nextPackName := store.firstCandidatePackName(snapshot) - for nextPackName != "" { - candidate, ok := snapshot.candidateByPack[nextPackName] - if !ok { - nextPackName = store.firstCandidatePackName(snapshot) - - continue - } - - nextPackName = store.nextCandidatePackName(candidate.packName, snapshot) - - index, err := store.openIndex(candidate) - if err != nil { - return zero, false, err - } - - offset, ok, err := index.lookup(id) - if err != nil { - return zero, false, err - } - - if ok { - store.touchCandidate(candidate.packName) - - return location{packName: index.packName, offset: offset}, true, nil - } - } - - for _, candidate := range snapshot.candidates { - index, err := store.openIndex(candidate) - if err != nil { - return zero, false, err - } - - offset, ok, err := index.lookup(id) - if err != nil { - return zero, false, err - } - - if ok { - store.touchCandidate(candidate.packName) - - return location{packName: index.packName, offset: offset}, true, nil - } - } - - return zero, false, nil -} diff --git a/object/store/packed/store_open_pack.go b/object/store/packed/store_open_pack.go deleted file mode 100644 index c621e08c..00000000 --- a/object/store/packed/store_open_pack.go +++ /dev/null @@ -1,57 +0,0 @@ -package packed - -// openPack returns one opened and validated pack handle. -func (store *Store) openPack(name string) (*packFile, error) { - store.stateMu.RLock() - - pack, ok := store.packs[name] - if ok { - store.stateMu.RUnlock() - - return pack, nil - } - - store.stateMu.RUnlock() - - file, err := store.root.Open(name) - if err != nil { - return nil, err - } - - info, err := file.Stat() - if err != nil { - _ = file.Close() - - return nil, err - } - - pack, err = openPackFile(name, file, info.Size()) - if err != nil { - _ = file.Close() - - return nil, err - } - - err = store.verifyPackMatchesIndexes(pack) - if err != nil { - _ = pack.close() - - return nil, err - } - - store.stateMu.Lock() - - existing, ok := store.packs[name] - if ok { - store.stateMu.Unlock() - - _ = pack.close() - - return existing, nil - } - - store.packs[name] = pack - store.stateMu.Unlock() - - return pack, nil -} diff --git a/object/store/packed/trailer_match.go b/object/store/packed/trailer_match.go deleted file mode 100644 index dc43e37d..00000000 --- a/object/store/packed/trailer_match.go +++ /dev/null @@ -1,29 +0,0 @@ -package packed - -import "fmt" - -// verifyPackMatchesIndexes checks that one opened pack's trailer hash matches -// every loaded index that references the same pack name. -func (store *Store) verifyPackMatchesIndexes(pack *packFile) error { - snapshot, err := store.ensureCandidates() - if err != nil { - return err - } - - candidate, ok := snapshot.candidateByPack[pack.name] - if !ok { - return fmt.Errorf("objectstore/packed: missing index for pack %q", pack.name) - } - - index, err := store.openIndex(candidate) - if err != nil { - return err - } - - err = verifyMappedPackMatchesMappedIdx(pack.data, index.data, store.algo) - if err != nil { - return fmt.Errorf("objectstore/packed: pack %q does not match idx %q: %w", pack.name, index.idxName, err) - } - - return nil -} -- cgit v1.3.1-10-gc9f91