diff options
| author | 2026-02-21 05:35:12 +0800 | |
|---|---|---|
| committer | 2026-02-21 11:15:18 +0800 | |
| commit | ae879b8cf5a87199802a33d6b15c76afafa8002b (patch) | |
| tree | a93e9486a9610b78823e157c68b75e0724366217 | |
| parent | cache/lru: Add basic LRU (diff) | |
| signature | No signature | |
objectstore/packed: Add initial pack reading support
| -rw-r--r-- | objectstore/packed/TODO | 3 | ||||
| -rw-r--r-- | objectstore/packed/delta_apply.go | 166 | ||||
| -rw-r--r-- | objectstore/packed/delta_base.go | 39 | ||||
| -rw-r--r-- | objectstore/packed/delta_cache.go | 58 | ||||
| -rw-r--r-- | objectstore/packed/delta_plan.go | 81 | ||||
| -rw-r--r-- | objectstore/packed/entry_inflate.go | 41 | ||||
| -rw-r--r-- | objectstore/packed/entry_parse.go | 117 | ||||
| -rw-r--r-- | objectstore/packed/helpers_test.go | 96 | ||||
| -rw-r--r-- | objectstore/packed/idx_load.go | 145 | ||||
| -rw-r--r-- | objectstore/packed/idx_parse.go | 142 | ||||
| -rw-r--r-- | objectstore/packed/pack.go | 62 | ||||
| -rw-r--r-- | objectstore/packed/read_bytes.go | 34 | ||||
| -rw-r--r-- | objectstore/packed/read_header.go | 19 | ||||
| -rw-r--r-- | objectstore/packed/read_reader.go | 93 | ||||
| -rw-r--r-- | objectstore/packed/read_test.go | 149 | ||||
| -rw-r--r-- | objectstore/packed/store.go | 182 |
16 files changed, 1427 insertions, 0 deletions
diff --git a/objectstore/packed/TODO b/objectstore/packed/TODO new file mode 100644 index 00000000..f4a5f48e --- /dev/null +++ b/objectstore/packed/TODO @@ -0,0 +1,3 @@ +* Per delta-plan memo map +* Internal handle/request context (might expose it externally later and add to global interface) +* Audit on mutex diff --git a/objectstore/packed/delta_apply.go b/objectstore/packed/delta_apply.go new file mode 100644 index 00000000..9d34c245 --- /dev/null +++ b/objectstore/packed/delta_apply.go @@ -0,0 +1,166 @@ +package packed + +import ( + "fmt" + + "codeberg.org/lindenii/furgit/objecttype" +) + +// deltaResolveContent resolves one object's content bytes from its pack location. +func (store *Store) deltaResolveContent(start location) (objecttype.Type, []byte, error) { + plan, err := store.deltaPlanFor(start) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + baseType, out, err := store.deltaResolveBase(plan) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + for i := len(plan.frames) - 1; i >= 0; i-- { + frame := plan.frames[i] + pack, err := store.openPack(frame.packName) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + delta, err := inflateAt(pack, frame.dataOffset, -1) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + out, err = applyDelta(out, delta) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + } + if int64(len(out)) != plan.declaredSize { + return objecttype.TypeInvalid, nil, fmt.Errorf( + "objectstore/packed: resolved content size mismatch: got %d want %d", + len(out), + plan.declaredSize, + ) + } + return baseType, out, nil +} + +// applyDelta applies one Git delta instruction stream to base. +func applyDelta(base, delta []byte) ([]byte, error) { + pos := 0 + srcSize, err := readDeltaVarint(delta, &pos) + if err != nil { + return nil, err + } + dstSize, err := readDeltaVarint(delta, &pos) + if err != nil { + return nil, err + } + if srcSize != len(base) { + return nil, fmt.Errorf("objectstore/packed: delta source size mismatch: got %d want %d", srcSize, len(base)) + } + + out := make([]byte, dstSize) + outPos := 0 + for pos < len(delta) { + op := delta[pos] + pos++ + if op&0x80 != 0 { + off := 0 + if op&0x01 != 0 { + if pos >= len(delta) { + return nil, fmt.Errorf("objectstore/packed: malformed delta copy offset") + } + off |= int(delta[pos]) + pos++ + } + if op&0x02 != 0 { + if pos >= len(delta) { + return nil, fmt.Errorf("objectstore/packed: malformed delta copy offset") + } + off |= int(delta[pos]) << 8 + pos++ + } + if op&0x04 != 0 { + if pos >= len(delta) { + return nil, fmt.Errorf("objectstore/packed: malformed delta copy offset") + } + off |= int(delta[pos]) << 16 + pos++ + } + if op&0x08 != 0 { + if pos >= len(delta) { + return nil, fmt.Errorf("objectstore/packed: malformed delta copy offset") + } + off |= int(delta[pos]) << 24 + pos++ + } + + n := 0 + if op&0x10 != 0 { + if pos >= len(delta) { + return nil, fmt.Errorf("objectstore/packed: malformed delta copy size") + } + n |= int(delta[pos]) + pos++ + } + if op&0x20 != 0 { + if pos >= len(delta) { + return nil, fmt.Errorf("objectstore/packed: malformed delta copy size") + } + n |= int(delta[pos]) << 8 + pos++ + } + if op&0x40 != 0 { + if pos >= len(delta) { + return nil, fmt.Errorf("objectstore/packed: malformed delta copy size") + } + n |= int(delta[pos]) << 16 + pos++ + } + if n == 0 { + n = 0x10000 + } + if off < 0 || n < 0 || off+n > len(base) || outPos+n > len(out) { + return nil, fmt.Errorf("objectstore/packed: delta copy out of bounds") + } + copy(out[outPos:outPos+n], base[off:off+n]) + outPos += n + continue + } + + if op == 0 { + return nil, fmt.Errorf("objectstore/packed: invalid delta opcode 0") + } + n := int(op) + if pos+n > len(delta) || outPos+n > len(out) { + return nil, fmt.Errorf("objectstore/packed: delta insert out of bounds") + } + copy(out[outPos:outPos+n], delta[pos:pos+n]) + outPos += n + pos += n + } + if outPos != len(out) { + return nil, fmt.Errorf("objectstore/packed: delta output size mismatch: got %d want %d", outPos, len(out)) + } + return out, nil +} + +// readDeltaVarint parses one Git delta varint and advances pos. +func readDeltaVarint(buf []byte, pos *int) (int, error) { + value := 0 + shift := uint(0) + for { + if *pos >= len(buf) { + return 0, fmt.Errorf("objectstore/packed: malformed delta varint") + } + b := buf[*pos] + *pos++ + value |= int(b&0x7f) << shift + if b&0x80 == 0 { + break + } + shift += 7 + if shift > 63 { + return 0, fmt.Errorf("objectstore/packed: delta varint overflow") + } + } + return value, nil +} diff --git a/objectstore/packed/delta_base.go b/objectstore/packed/delta_base.go new file mode 100644 index 00000000..fd9b96d5 --- /dev/null +++ b/objectstore/packed/delta_base.go @@ -0,0 +1,39 @@ +package packed + +import ( + "fmt" + + "codeberg.org/lindenii/furgit/objecttype" +) + +// deltaResolveBase materializes the base object body for one delta plan. +func (store *Store) deltaResolveBase(plan deltaPlan) (objecttype.Type, []byte, error) { + cacheKey := deltaBaseKey{ + packName: plan.baseLoc.packName, + offset: plan.baseLoc.offset, + } + + store.cacheMu.RLock() + if ty, content, ok := store.deltaCache.get(cacheKey); ok { + store.cacheMu.RUnlock() + return ty, content, nil + } + store.cacheMu.RUnlock() + + pack, meta, err := store.entryMetaAt(plan.baseLoc) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + if !isBaseObjectType(meta.ty) { + return objecttype.TypeInvalid, nil, fmt.Errorf("objectstore/packed: delta plan base is not a base object") + } + base, err := inflateAt(pack, meta.dataOffset, meta.size) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + store.cacheMu.Lock() + store.deltaCache.add(cacheKey, meta.ty, base) + store.cacheMu.Unlock() + return meta.ty, base, nil +} diff --git a/objectstore/packed/delta_cache.go b/objectstore/packed/delta_cache.go new file mode 100644 index 00000000..66bd8ac4 --- /dev/null +++ b/objectstore/packed/delta_cache.go @@ -0,0 +1,58 @@ +package packed + +import ( + "codeberg.org/lindenii/furgit/internal/cache/lru" + "codeberg.org/lindenii/furgit/objecttype" +) + +// deltaBaseKey identifies one base object by pack location. +type deltaBaseKey struct { + packName string + offset uint64 +} + +// deltaBaseValue stores one cached base object body. +type deltaBaseValue struct { + ty objecttype.Type + content []byte +} + +// deltaCache wraps a weighted LRU for resolved delta bases. +type deltaCache struct { + lru *lru.Cache[deltaBaseKey, deltaBaseValue] +} + +// newDeltaCache creates a delta base cache with a byte budget. +func newDeltaCache(maxBytes int64) *deltaCache { + return &deltaCache{ + lru: lru.New( + maxBytes, + func(_ deltaBaseKey, value deltaBaseValue) int64 { + return int64(len(value.content)) + }, + nil, + ), + } +} + +// get returns a cloned cached base object value. +func (cache *deltaCache) get(key deltaBaseKey) (objecttype.Type, []byte, bool) { + value, ok := cache.lru.Get(key) + if !ok { + return objecttype.TypeInvalid, nil, false + } + return value.ty, append([]byte(nil), value.content...), true +} + +// add stores a cloned base object value. +func (cache *deltaCache) add(key deltaBaseKey, ty objecttype.Type, content []byte) { + cache.lru.Add(key, deltaBaseValue{ + ty: ty, + content: append([]byte(nil), content...), + }) +} + +// clear removes all cached entries. +func (cache *deltaCache) clear() { + cache.lru.Clear() +} diff --git a/objectstore/packed/delta_plan.go b/objectstore/packed/delta_plan.go new file mode 100644 index 00000000..e55400aa --- /dev/null +++ b/objectstore/packed/delta_plan.go @@ -0,0 +1,81 @@ +package packed + +import ( + "fmt" + + "codeberg.org/lindenii/furgit/objecttype" +) + +// deltaFrame describes one delta payload to apply during reconstruction. +type deltaFrame struct { + // packName identifies where the delta payload lives. + packName string + // dataOffset points to the start of the delta zlib payload in pack. + dataOffset int +} + +// deltaPlan describes how to reconstruct one requested object. +type deltaPlan struct { + // declaredSize is the target object's declared content size. + declaredSize int64 + // baseLoc points to the innermost base object. + baseLoc location + // baseType is the canonical object type resolved from baseLoc. + baseType objecttype.Type + // frames contains deltas from target down toward base. + frames []deltaFrame +} + +// deltaPlanFor walks one object's chain and builds a delta reconstruction plan. +func (store *Store) deltaPlanFor(start location) (deltaPlan, error) { + visited := make(map[location]struct{}) + current := start + + var plan deltaPlan + plan.declaredSize = -1 + + for { + if _, ok := visited[current]; ok { + return deltaPlan{}, fmt.Errorf("objectstore/packed: delta cycle while resolving object") + } + visited[current] = struct{}{} + + _, meta, err := store.entryMetaAt(current) + if err != nil { + return deltaPlan{}, err + } + if plan.declaredSize < 0 { + plan.declaredSize = meta.size + } + + if isBaseObjectType(meta.ty) { + plan.baseLoc = current + plan.baseType = meta.ty + return plan, nil + } + + switch meta.ty { + case objecttype.TypeRefDelta: + plan.frames = append(plan.frames, deltaFrame{ + packName: current.packName, + dataOffset: meta.dataOffset, + }) + next, err := store.lookup(meta.baseRefID) + if err != nil { + return deltaPlan{}, err + } + current = next + case objecttype.TypeOfsDelta: + plan.frames = append(plan.frames, deltaFrame{ + packName: current.packName, + dataOffset: meta.dataOffset, + }) + current = location{ + packName: current.packName, + offset: meta.baseOfs, + } + default: + return deltaPlan{}, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + } + } +} diff --git a/objectstore/packed/entry_inflate.go b/objectstore/packed/entry_inflate.go new file mode 100644 index 00000000..5f1c476e --- /dev/null +++ b/objectstore/packed/entry_inflate.go @@ -0,0 +1,41 @@ +package packed + +import ( + "bytes" + "compress/zlib" + "fmt" + "io" +) + +// zlibReaderAt opens a zlib reader starting at data offset within pack. +func zlibReaderAt(pack *packFile, offset int) (io.ReadCloser, error) { + if offset < 0 || offset > len(pack.data) { + return nil, fmt.Errorf("objectstore/packed: pack %q zlib offset out of bounds", pack.name) + } + return zlib.NewReader(bytes.NewReader(pack.data[offset:])) +} + +// inflateAt inflates one entry payload from data offset. +// +// When expectedSize is non-negative, the inflated length must match. +func inflateAt(pack *packFile, offset int, expectedSize int64) ([]byte, error) { + reader, err := zlibReaderAt(pack, offset) + if err != nil { + return nil, err + } + defer func() { _ = reader.Close() }() + + body, err := io.ReadAll(reader) + if err != nil { + return nil, err + } + if expectedSize >= 0 && int64(len(body)) != expectedSize { + return nil, fmt.Errorf( + "objectstore/packed: pack %q inflated size mismatch: got %d want %d", + pack.name, + len(body), + expectedSize, + ) + } + return body, nil +} diff --git a/objectstore/packed/entry_parse.go b/objectstore/packed/entry_parse.go new file mode 100644 index 00000000..e3cbeac3 --- /dev/null +++ b/objectstore/packed/entry_parse.go @@ -0,0 +1,117 @@ +package packed + +import ( + "fmt" + + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/objecttype" +) + +// entryMeta describes one parsed pack entry header. +type entryMeta struct { + // ty is the pack entry type tag. + ty objecttype.Type + // size is the declared resulting content size. + size int64 + // dataOffset points to the zlib payload start. + dataOffset int + // baseRefID is set for ref-delta entries. + baseRefID objectid.ObjectID + // baseOfs is set for ofs-delta entries. + baseOfs uint64 +} + +// parseEntryMeta parses one pack entry header at offset. +func parseEntryMeta(pack *packFile, algo objectid.Algorithm, offset uint64) (entryMeta, error) { + var zero entryMeta + if offset >= uint64(len(pack.data)) { + return zero, fmt.Errorf("objectstore/packed: pack %q offset %d out of bounds", pack.name, offset) + } + + pos := int(offset) + first := pack.data[pos] + pos++ + + meta := entryMeta{ + ty: objecttype.Type((first >> 4) & 0x07), + size: int64(first & 0x0f), + } + + shift := uint(4) + b := first + for b&0x80 != 0 { + if pos >= len(pack.data) { + return zero, fmt.Errorf("objectstore/packed: pack %q truncated entry header", pack.name) + } + b = pack.data[pos] + pos++ + meta.size |= int64(b&0x7f) << shift + shift += 7 + } + if meta.size < 0 { + return zero, fmt.Errorf("objectstore/packed: pack %q entry has negative size", pack.name) + } + + switch meta.ty { + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + // Base object entries have no extra header fields. + case objecttype.TypeRefDelta: + hashSize := algo.Size() + if pos+hashSize > len(pack.data) { + return zero, fmt.Errorf("objectstore/packed: pack %q truncated ref-delta base id", pack.name) + } + baseID, err := objectid.FromBytes(algo, pack.data[pos:pos+hashSize]) + if err != nil { + return zero, err + } + meta.baseRefID = baseID + pos += hashSize + case objecttype.TypeOfsDelta: + dist, consumed, err := parseOfsDeltaDistance(pack.data[pos:]) + if err != nil { + return zero, err + } + pos += consumed + if offset <= dist { + return zero, fmt.Errorf("objectstore/packed: pack %q has invalid ofs-delta base", pack.name) + } + meta.baseOfs = offset - dist + default: + return zero, fmt.Errorf("objectstore/packed: pack %q has unsupported object type %d", pack.name, meta.ty) + } + + meta.dataOffset = pos + if meta.dataOffset > len(pack.data) { + return zero, fmt.Errorf("objectstore/packed: pack %q entry data offset out of bounds", pack.name) + } + return meta, nil +} + +// parseOfsDeltaDistance parses one ofs-delta backward distance. +func parseOfsDeltaDistance(buf []byte) (uint64, int, error) { + if len(buf) == 0 { + return 0, 0, fmt.Errorf("objectstore/packed: malformed ofs-delta distance") + } + b := buf[0] + dist := uint64(b & 0x7f) + consumed := 1 + for b&0x80 != 0 { + if consumed >= len(buf) { + return 0, 0, fmt.Errorf("objectstore/packed: malformed ofs-delta distance") + } + b = buf[consumed] + consumed++ + dist = ((dist + 1) << 7) + uint64(b&0x7f) + } + return dist, consumed, nil +} + +// isBaseObjectType reports whether ty is one of the four canonical object types. +func isBaseObjectType(ty objecttype.Type) bool { + switch ty { + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + return true + default: + return false + } +} diff --git a/objectstore/packed/helpers_test.go b/objectstore/packed/helpers_test.go new file mode 100644 index 00000000..5af44f66 --- /dev/null +++ b/objectstore/packed/helpers_test.go @@ -0,0 +1,96 @@ +package packed_test + +import ( + "fmt" + "io" + "os" + "path/filepath" + "strconv" + "strings" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + "codeberg.org/lindenii/furgit/objectheader" + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/objectstore/packed" + "codeberg.org/lindenii/furgit/objecttype" +) + +func openPackedStore(t *testing.T, repoPath string, algo objectid.Algorithm) *packed.Store { + t.Helper() + packPath := filepath.Join(repoPath, "objects", "pack") + root, err := os.OpenRoot(packPath) + if err != nil { + t.Fatalf("OpenRoot(%q): %v", packPath, err) + } + t.Cleanup(func() { _ = root.Close() }) + + store, err := packed.New(root, algo) + if err != nil { + t.Fatalf("packed.New: %v", err) + } + return store +} + +func mustReadAllAndClose(t *testing.T, reader io.ReadCloser) []byte { + t.Helper() + data, err := io.ReadAll(reader) + if err != nil { + _ = reader.Close() + t.Fatalf("ReadAll: %v", err) + } + if err := reader.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + return data +} + +func expectedRawObject(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) (objecttype.Type, []byte, []byte) { + t.Helper() + + typeName := testRepo.Run(t, "cat-file", "-t", id.String()) + ty, ok := objecttype.ParseName(typeName) + if !ok { + t.Fatalf("ParseName(%q) failed", typeName) + } + body := testRepo.CatFile(t, typeName, id) + header, ok := objectheader.Encode(ty, int64(len(body))) + if !ok { + t.Fatalf("objectheader.Encode failed") + } + + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + return ty, body, raw +} + +func createPackedFixtureRepo(t *testing.T, algo objectid.Algorithm) (*testgit.TestRepo, []objectid.ObjectID) { + t.Helper() + + testRepo := testgit.NewBareRepo(t, algo) + blobID, treeID, commitID := testRepo.MakeCommit(t, "packed store base commit") + testRepo.Run(t, "update-ref", "refs/heads/main", commitID.String()) + tagID := testRepo.TagAnnotated(t, "v1.0.0", commitID, "packed-store-tag") + + parent := commitID + for i := range 24 { + content := "common-prefix\n" + strings.Repeat("line-"+strconv.Itoa(i%3)+"\n", 256) + fmt.Sprintf("tail-%d\n", i) + nextBlob, nextTree := testRepo.MakeSingleFileTree(t, fmt.Sprintf("file-%02d.txt", i), []byte(content)) + nextCommit := testRepo.CommitTree(t, nextTree, fmt.Sprintf("commit-%02d", i), parent) + testRepo.Run(t, "update-ref", "refs/heads/main", nextCommit.String()) + parent = nextCommit + + _ = nextBlob + _ = nextTree + } + + testRepo.Repack(t, "-a", "-d", "-f", "--window=64", "--depth=64") + return testRepo, []objectid.ObjectID{ + blobID, + treeID, + commitID, + tagID, + parent, + } +} diff --git a/objectstore/packed/idx_load.go b/objectstore/packed/idx_load.go new file mode 100644 index 00000000..106701fd --- /dev/null +++ b/objectstore/packed/idx_load.go @@ -0,0 +1,145 @@ +package packed + +import ( + "fmt" + "os" + "slices" + "strings" + "syscall" + + "codeberg.org/lindenii/furgit/objectid" +) + +// location identifies one object entry in a specific pack file. +type location struct { + packName string + offset uint64 +} + +// idxFile stores one mapped and validated idx v2 file. +type idxFile struct { + // idxName is the basename of this .idx file. + idxName string + // packName is the matching .pack basename. + packName string + // algo is the hash algorithm encoded by the index. + algo objectid.Algorithm + + // file is the opened index file descriptor. + file *os.File + // data is the mapped index bytes. + data []byte + + // fanout stores fanout table values. + fanout [256]uint32 + // numObjects equals fanout[255]. + numObjects int + + // namesOffset starts the sorted object-id table. + namesOffset int + // offset32Offset starts the 32-bit offset table. + offset32Offset int + // offset64Offset starts the 64-bit offset table. + offset64Offset int + // offset64Count is the number of 64-bit offset entries. + offset64Count int +} + +// loadIndexes loads and validates all .idx files under objects/pack. +func (store *Store) loadIndexes() ([]*idxFile, error) { + dir, err := store.root.Open(".") + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + defer func() { _ = dir.Close() }() + entries, err := dir.ReadDir(-1) + if err != nil { + return nil, err + } + + idxNames := make([]string, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { + continue + } + idxNames = append(idxNames, entry.Name()) + } + slices.Sort(idxNames) + + out := make([]*idxFile, 0, len(idxNames)) + for _, idxName := range idxNames { + packName := strings.TrimSuffix(idxName, ".idx") + ".pack" + if _, err := store.root.Stat(packName); err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("objectstore/packed: missing pack file for index %q", idxName) + } + return nil, err + } + index, err := openIdxFile(store.root, idxName, packName, store.algo) + if err != nil { + for _, loaded := range out { + _ = loaded.close() + } + return nil, err + } + out = append(out, index) + } + return out, nil +} + +// openIdxFile maps and validates one idx v2 file. +func openIdxFile(root *os.Root, idxName, packName string, algo objectid.Algorithm) (*idxFile, error) { + file, err := root.Open(idxName) + if err != nil { + return nil, err + } + info, err := file.Stat() + if err != nil { + _ = file.Close() + return nil, err + } + size := info.Size() + if size < 0 || size > int64(int(^uint(0)>>1)) { + _ = file.Close() + return nil, fmt.Errorf("objectstore/packed: idx %q has unsupported size", idxName) + } + data, err := syscall.Mmap(int(file.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE) + if err != nil { + _ = file.Close() + return nil, err + } + + index := &idxFile{ + idxName: idxName, + packName: packName, + algo: algo, + file: file, + data: data, + } + if err := index.parse(); err != nil { + _ = index.close() + return nil, err + } + return index, nil +} + +// close unmaps and closes one idx handle. +func (index *idxFile) close() error { + var closeErr error + if index.data != nil { + if err := syscall.Munmap(index.data); err != nil && closeErr == nil { + closeErr = err + } + index.data = nil + } + if index.file != nil { + if err := index.file.Close(); err != nil && closeErr == nil { + closeErr = err + } + index.file = nil + } + return closeErr +} diff --git a/objectstore/packed/idx_parse.go b/objectstore/packed/idx_parse.go new file mode 100644 index 00000000..a6adc721 --- /dev/null +++ b/objectstore/packed/idx_parse.go @@ -0,0 +1,142 @@ +package packed + +import ( + "bytes" + "encoding/binary" + "fmt" + + "codeberg.org/lindenii/furgit/objectid" +) + +const ( + idxMagicV2 = 0xff744f63 + idxVersionV2 = 2 +) + +// parse validates mapped idx v2 structure and stores table boundaries. +func (index *idxFile) parse() error { + hashSize := index.algo.Size() + if hashSize <= 0 { + return fmt.Errorf("objectstore/packed: idx %q has invalid hash algorithm", index.idxName) + } + minLen := 8 + 256*4 + 2*hashSize + if len(index.data) < minLen { + return fmt.Errorf("objectstore/packed: idx %q too short", index.idxName) + } + if binary.BigEndian.Uint32(index.data[:4]) != idxMagicV2 { + return fmt.Errorf("objectstore/packed: idx %q invalid magic", index.idxName) + } + if binary.BigEndian.Uint32(index.data[4:8]) != idxVersionV2 { + return fmt.Errorf("objectstore/packed: idx %q unsupported version", index.idxName) + } + + prev := uint32(0) + for i := range 256 { + base := 8 + i*4 + cur := binary.BigEndian.Uint32(index.data[base : base+4]) + if cur < prev { + return fmt.Errorf("objectstore/packed: idx %q has non-monotonic fanout table", index.idxName) + } + index.fanout[i] = cur + prev = cur + } + index.numObjects = int(index.fanout[255]) + if index.numObjects < 0 { + return fmt.Errorf("objectstore/packed: idx %q has invalid object count", index.idxName) + } + + namesBytes := index.numObjects * hashSize + crcBytes := index.numObjects * 4 + offset32Bytes := index.numObjects * 4 + minSize := 8 + 256*4 + namesBytes + crcBytes + offset32Bytes + 2*hashSize + if minSize < 0 || len(index.data) < minSize { + return fmt.Errorf("objectstore/packed: idx %q has truncated tables", index.idxName) + } + + index.namesOffset = 8 + 256*4 + index.offset32Offset = index.namesOffset + namesBytes + crcBytes + index.offset64Offset = index.offset32Offset + offset32Bytes + + offset64Bytes := len(index.data) - index.offset64Offset - 2*hashSize + if offset64Bytes < 0 || offset64Bytes%8 != 0 { + return fmt.Errorf("objectstore/packed: idx %q has malformed 64-bit offset table", index.idxName) + } + index.offset64Count = offset64Bytes / 8 + maxOffset64Count := index.numObjects - 1 + if maxOffset64Count < 0 { + maxOffset64Count = 0 + } + if index.offset64Count > maxOffset64Count { + return fmt.Errorf("objectstore/packed: idx %q has oversized 64-bit offset table", index.idxName) + } + return nil +} + +// lookup resolves one object ID to its pack offset within this index. +func (index *idxFile) lookup(id objectid.ObjectID) (uint64, bool, error) { + if id.Algorithm() != index.algo { + return 0, false, fmt.Errorf("objectstore/packed: object id algorithm mismatch") + } + idBytes := (&id).RawBytes() + hashSize := len(idBytes) + if hashSize != index.algo.Size() { + return 0, false, fmt.Errorf("objectstore/packed: unexpected object id length") + } + + first := int(idBytes[0]) + lo := 0 + if first > 0 { + lo = int(index.fanout[first-1]) + } + hi := int(index.fanout[first]) + if lo < 0 || hi < 0 || lo > hi || hi > index.numObjects { + return 0, false, fmt.Errorf("objectstore/packed: idx %q has invalid fanout bounds", index.idxName) + } + + for lo < hi { + mid := lo + (hi-lo)/2 + nameOffset := index.namesOffset + mid*hashSize + if nameOffset < 0 || nameOffset+hashSize > len(index.data) { + return 0, false, fmt.Errorf("objectstore/packed: idx %q truncated name table", index.idxName) + } + cmp := bytes.Compare(index.data[nameOffset:nameOffset+hashSize], idBytes) + if cmp == 0 { + offset, err := index.offsetAt(mid) + if err != nil { + return 0, false, err + } + return offset, true, nil + } + if cmp < 0 { + lo = mid + 1 + } else { + hi = mid + } + } + return 0, false, nil +} + +// offsetAt resolves the pack offset for one object index entry. +func (index *idxFile) offsetAt(objectIndex int) (uint64, error) { + if objectIndex < 0 || objectIndex >= index.numObjects { + return 0, fmt.Errorf("objectstore/packed: idx %q offset index out of bounds", index.idxName) + } + wordOffset := index.offset32Offset + objectIndex*4 + if wordOffset < 0 || wordOffset+4 > len(index.data) { + return 0, fmt.Errorf("objectstore/packed: idx %q truncated 32-bit offset table", index.idxName) + } + word := binary.BigEndian.Uint32(index.data[wordOffset : wordOffset+4]) + if word&0x80000000 == 0 { + return uint64(word), nil + } + + pos := int(word & 0x7fffffff) + if pos < 0 || pos >= index.offset64Count { + return 0, fmt.Errorf("objectstore/packed: idx %q invalid 64-bit offset position", index.idxName) + } + offOffset := index.offset64Offset + pos*8 + if offOffset < 0 || offOffset+8 > len(index.data)-2*index.algo.Size() { + return 0, fmt.Errorf("objectstore/packed: idx %q truncated 64-bit offset table", index.idxName) + } + return binary.BigEndian.Uint64(index.data[offOffset : offOffset+8]), nil +} diff --git a/objectstore/packed/pack.go b/objectstore/packed/pack.go new file mode 100644 index 00000000..46eca524 --- /dev/null +++ b/objectstore/packed/pack.go @@ -0,0 +1,62 @@ +package packed + +import ( + "encoding/binary" + "fmt" + "os" + "syscall" +) + +const packSignature = 0x5041434b + +// packFile stores one mapped and validated .pack file. +type packFile struct { + // name is the .pack basename. + name string + // file is the opened pack file descriptor. + file *os.File + // data is the mapped pack bytes. + data []byte +} + +// openPackFile maps and validates one pack file. +func openPackFile(name string, file *os.File, size int64) (*packFile, error) { + if size < 12 { + return nil, fmt.Errorf("objectstore/packed: pack %q too short", name) + } + if size > int64(int(^uint(0)>>1)) { + return nil, fmt.Errorf("objectstore/packed: pack %q has unsupported size", name) + } + data, err := syscall.Mmap(int(file.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE) + if err != nil { + return nil, err + } + if binary.BigEndian.Uint32(data[:4]) != packSignature { + _ = syscall.Munmap(data) + return nil, fmt.Errorf("objectstore/packed: pack %q invalid signature", name) + } + version := binary.BigEndian.Uint32(data[4:8]) + if version != 2 && version != 3 { + _ = syscall.Munmap(data) + return nil, fmt.Errorf("objectstore/packed: pack %q unsupported version %d", name, version) + } + return &packFile{name: name, file: file, data: data}, nil +} + +// close unmaps and closes one pack handle. +func (pack *packFile) close() error { + var closeErr error + if pack.data != nil { + if err := syscall.Munmap(pack.data); err != nil && closeErr == nil { + closeErr = err + } + pack.data = nil + } + if pack.file != nil { + if err := pack.file.Close(); err != nil && closeErr == nil { + closeErr = err + } + pack.file = nil + } + return closeErr +} diff --git a/objectstore/packed/read_bytes.go b/objectstore/packed/read_bytes.go new file mode 100644 index 00000000..b6f42a0d --- /dev/null +++ b/objectstore/packed/read_bytes.go @@ -0,0 +1,34 @@ +package packed + +import ( + "fmt" + + "codeberg.org/lindenii/furgit/objectheader" + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/objecttype" +) + +// ReadBytesContent reads an object's type and content bytes. +func (store *Store) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { + loc, err := store.lookup(id) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + return store.deltaResolveContent(loc) +} + +// ReadBytesFull reads a full serialized object as "type size\0content". +func (store *Store) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { + ty, content, err := store.ReadBytesContent(id) + if err != nil { + return nil, err + } + header, ok := objectheader.Encode(ty, int64(len(content))) + if !ok { + return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", ty) + } + out := make([]byte, len(header)+len(content)) + copy(out, header) + copy(out[len(header):], content) + return out, nil +} diff --git a/objectstore/packed/read_header.go b/objectstore/packed/read_header.go new file mode 100644 index 00000000..c72188b9 --- /dev/null +++ b/objectstore/packed/read_header.go @@ -0,0 +1,19 @@ +package packed + +import ( + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/objecttype" +) + +// ReadHeader reads an object's type and declared content size. +func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { + loc, err := store.lookup(id) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + plan, err := store.deltaPlanFor(loc) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + return plan.baseType, plan.declaredSize, nil +} diff --git a/objectstore/packed/read_reader.go b/objectstore/packed/read_reader.go new file mode 100644 index 00000000..4f40792d --- /dev/null +++ b/objectstore/packed/read_reader.go @@ -0,0 +1,93 @@ +package packed + +import ( + "bytes" + "fmt" + "io" + + "codeberg.org/lindenii/furgit/objectheader" + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/objecttype" +) + +// readCloser proxies reads and closes one underlying closer. +type readCloser struct { + reader io.Reader + closer io.Closer +} + +// Read proxies reads to the underlying reader. +func (reader *readCloser) Read(dst []byte) (int, error) { + return reader.reader.Read(dst) +} + +// Close closes the underlying closer. +func (reader *readCloser) Close() error { + return reader.closer.Close() +} + +// ReadReaderContent reads an object's type, declared content size, and content stream. +// +// The caller must close the returned reader. +func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { + loc, err := store.lookup(id) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + pack, meta, err := store.entryMetaAt(loc) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + if isBaseObjectType(meta.ty) { + zr, err := zlibReaderAt(pack, meta.dataOffset) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + return meta.ty, meta.size, &readCloser{ + reader: io.LimitReader(zr, meta.size), + closer: zr, + }, nil + } + + ty, content, err := store.deltaResolveContent(loc) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + return ty, int64(len(content)), io.NopCloser(bytes.NewReader(content)), nil +} + +// ReadReaderFull reads a full serialized object stream as "type size\0content". +// +// The caller must close the returned reader. +func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { + loc, err := store.lookup(id) + if err != nil { + return nil, err + } + + pack, meta, err := store.entryMetaAt(loc) + if err != nil { + return nil, err + } + if isBaseObjectType(meta.ty) { + header, ok := objectheader.Encode(meta.ty, meta.size) + if !ok { + return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", meta.ty) + } + zr, err := zlibReaderAt(pack, meta.dataOffset) + if err != nil { + return nil, err + } + return &readCloser{ + reader: io.MultiReader(bytes.NewReader(header), io.LimitReader(zr, meta.size)), + closer: zr, + }, nil + } + + raw, err := store.ReadBytesFull(id) + if err != nil { + return nil, err + } + return io.NopCloser(bytes.NewReader(raw)), nil +} diff --git a/objectstore/packed/read_test.go b/objectstore/packed/read_test.go new file mode 100644 index 00000000..9244d573 --- /dev/null +++ b/objectstore/packed/read_test.go @@ -0,0 +1,149 @@ +package packed_test + +import ( + "bytes" + "errors" + "os" + "strings" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/objectstore" + "codeberg.org/lindenii/furgit/objectstore/packed" +) + +func TestPackedStoreReadAgainstGit(t *testing.T) { + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { + testRepo, ids := createPackedFixtureRepo(t, algo) + store := openPackedStore(t, testRepo.Dir(), algo) + + for _, id := range ids { + id := id + t.Run(id.String(), func(t *testing.T) { + wantType, wantBody, wantRaw := expectedRawObject(t, testRepo, id) + + gotHeaderType, gotHeaderSize, err := store.ReadHeader(id) + if err != nil { + t.Fatalf("ReadHeader: %v", err) + } + if gotHeaderType != wantType { + t.Fatalf("ReadHeader type = %v, want %v", gotHeaderType, wantType) + } + if gotHeaderSize != int64(len(wantBody)) { + t.Fatalf("ReadHeader size = %d, want %d", gotHeaderSize, len(wantBody)) + } + + gotRaw, err := store.ReadBytesFull(id) + if err != nil { + t.Fatalf("ReadBytesFull: %v", err) + } + if !bytes.Equal(gotRaw, wantRaw) { + t.Fatalf("ReadBytesFull mismatch") + } + + gotType, gotBody, err := store.ReadBytesContent(id) + if err != nil { + t.Fatalf("ReadBytesContent: %v", err) + } + if gotType != wantType { + t.Fatalf("ReadBytesContent type = %v, want %v", gotType, wantType) + } + if !bytes.Equal(gotBody, wantBody) { + t.Fatalf("ReadBytesContent mismatch") + } + + fullReader, err := store.ReadReaderFull(id) + if err != nil { + t.Fatalf("ReadReaderFull: %v", err) + } + if got := mustReadAllAndClose(t, fullReader); !bytes.Equal(got, wantRaw) { + t.Fatalf("ReadReaderFull mismatch") + } + + contentType, contentSize, contentReader, err := store.ReadReaderContent(id) + if err != nil { + t.Fatalf("ReadReaderContent: %v", err) + } + if contentType != wantType { + t.Fatalf("ReadReaderContent type = %v, want %v", contentType, wantType) + } + if contentSize != int64(len(wantBody)) { + t.Fatalf("ReadReaderContent size = %d, want %d", contentSize, len(wantBody)) + } + if got := mustReadAllAndClose(t, contentReader); !bytes.Equal(got, wantBody) { + t.Fatalf("ReadReaderContent mismatch") + } + }) + } + }) +} + +func TestPackedStoreErrors(t *testing.T) { + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { + testRepo, _ := createPackedFixtureRepo(t, algo) + store := openPackedStore(t, testRepo.Dir(), algo) + + notFoundID, err := objectid.ParseHex(algo, strings.Repeat("0", algo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(notFound): %v", err) + } + + if _, err := store.ReadBytesFull(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesFull not-found error = %v", err) + } + if _, _, err := store.ReadBytesContent(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesContent not-found error = %v", err) + } + if _, err := store.ReadReaderFull(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderFull not-found error = %v", err) + } + if _, _, _, err := store.ReadReaderContent(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderContent not-found error = %v", err) + } + if _, _, err := store.ReadHeader(notFoundID); !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadHeader not-found error = %v", err) + } + + var otherAlgo objectid.Algorithm + for _, candidate := range objectid.SupportedAlgorithms() { + if candidate != algo { + otherAlgo = candidate + break + } + } + if otherAlgo != objectid.AlgorithmUnknown { + mismatchID, err := objectid.ParseHex(otherAlgo, strings.Repeat("0", otherAlgo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(mismatch): %v", err) + } + if _, err := store.ReadBytesFull(mismatchID); err == nil || !strings.Contains(err.Error(), "algorithm mismatch") { + t.Fatalf("ReadBytesFull algorithm-mismatch error = %v", err) + } + } + }) +} + +func TestPackedStoreNewValidation(t *testing.T) { + testRepo, _ := createPackedFixtureRepo(t, objectid.AlgorithmSHA1) + store := openPackedStore(t, testRepo.Dir(), objectid.AlgorithmSHA1) + if err := store.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + if err := store.Close(); err != nil { + t.Fatalf("Close second: %v", err) + } +} + +func TestPackedStoreInvalidAlgorithm(t *testing.T) { + testRepo := testgit.NewBareRepo(t, objectid.AlgorithmSHA1) + root, err := os.OpenRoot(testRepo.Dir()) + if err != nil { + t.Fatalf("OpenRoot(%q): %v", testRepo.Dir(), err) + } + t.Cleanup(func() { _ = root.Close() }) + + if _, err := packed.New(root, objectid.AlgorithmUnknown); !errors.Is(err, objectid.ErrInvalidAlgorithm) { + t.Fatalf("packed.New invalid algorithm error = %v", err) + } +} diff --git a/objectstore/packed/store.go b/objectstore/packed/store.go new file mode 100644 index 00000000..d780245d --- /dev/null +++ b/objectstore/packed/store.go @@ -0,0 +1,182 @@ +// Package packed provides read access to packed Git objects from objects/pack. +package packed + +import ( + "errors" + "os" + "sync" + + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/objectstore" +) + +// Store reads Git objects from pack/index files under an objects/pack root. +// +// Store does not own root. Callers are responsible for closing root. +type Store struct { + // root is the objects/pack capability used for all file access. + root *os.Root + // algo is the expected object ID algorithm for lookups. + algo objectid.Algorithm + + // loadOnce guards one-time index loading. + loadOnce sync.Once + // loadErr stores index loading failures. + loadErr error + // indexesLoaded reports whether indexes/loadErr have been initialized. + indexesLoaded bool + // indexes stores parsed .idx handles. + indexes []*idxFile + + // stateMu guards index publication, pack cache, and close state. + stateMu sync.RWMutex + // cacheMu guards delta cache operations. + cacheMu sync.RWMutex + // packs caches opened .pack handles by basename. + packs map[string]*packFile + // deltaCache caches resolved base objects by pack location. + deltaCache *deltaCache + // closed reports whether Close has been called. + closed bool +} + +const defaultDeltaCacheMaxBytes = 32 << 20 + +var _ objectstore.Store = (*Store)(nil) + +// New creates a packed-object store rooted at an objects/pack directory. +func New(root *os.Root, algo objectid.Algorithm) (*Store, error) { + if algo.Size() == 0 { + return nil, objectid.ErrInvalidAlgorithm + } + return &Store{ + root: root, + algo: algo, + packs: make(map[string]*packFile), + deltaCache: newDeltaCache(defaultDeltaCacheMaxBytes), + }, nil +} + +// Close releases mapped pack/index resources associated with the store. +func (store *Store) Close() error { + store.stateMu.Lock() + if store.closed { + store.stateMu.Unlock() + return nil + } + store.closed = true + packs := store.packs + store.packs = make(map[string]*packFile) + indexes := store.indexes + store.indexes = nil + store.stateMu.Unlock() + + var closeErr error + for _, pack := range packs { + if err := pack.close(); err != nil && closeErr == nil { + closeErr = err + } + } + for _, index := range indexes { + if index == nil { + continue + } + if err := index.close(); err != nil && closeErr == nil { + closeErr = err + } + } + store.cacheMu.Lock() + if store.deltaCache != nil { + store.deltaCache.clear() + } + store.cacheMu.Unlock() + return closeErr +} + +// ensureIndexes loads and validates all pack indexes once. +func (store *Store) ensureIndexes() error { + store.loadOnce.Do(func() { + indexes, err := store.loadIndexes() + store.stateMu.Lock() + store.indexes = indexes + store.loadErr = err + store.indexesLoaded = true + store.stateMu.Unlock() + }) + + store.stateMu.RLock() + defer store.stateMu.RUnlock() + if store.indexesLoaded { + return store.loadErr + } + return errors.New("objectstore/packed: indexes were not initialized") +} + +// lookup resolves one object ID to its pack location. +func (store *Store) lookup(id objectid.ObjectID) (location, error) { + var zero location + if id.Algorithm() != store.algo { + return zero, errors.New("objectstore/packed: object id algorithm mismatch") + } + if err := store.ensureIndexes(); err != nil { + return zero, err + } + for _, index := range store.indexes { + offset, ok, err := index.lookup(id) + if err != nil { + return zero, err + } + if ok { + return location{packName: index.packName, offset: offset}, nil + } + } + return zero, objectstore.ErrObjectNotFound +} + +// openPack returns one opened and validated pack handle. +func (store *Store) openPack(name string) (*packFile, error) { + store.stateMu.RLock() + if pack, ok := store.packs[name]; ok { + store.stateMu.RUnlock() + return pack, nil + } + store.stateMu.RUnlock() + + file, err := store.root.Open(name) + if err != nil { + return nil, err + } + info, err := file.Stat() + if err != nil { + _ = file.Close() + return nil, err + } + pack, err := openPackFile(name, file, info.Size()) + if err != nil { + _ = file.Close() + return nil, err + } + + store.stateMu.Lock() + if existing, ok := store.packs[name]; ok { + store.stateMu.Unlock() + _ = pack.close() + return existing, nil + } + store.packs[name] = pack + store.stateMu.Unlock() + return pack, nil +} + +// entryMetaAt parses one pack entry header at location. +func (store *Store) entryMetaAt(loc location) (*packFile, entryMeta, error) { + pack, err := store.openPack(loc.packName) + if err != nil { + return nil, entryMeta{}, err + } + meta, err := parseEntryMeta(pack, store.algo, loc.offset) + if err != nil { + return nil, entryMeta{}, err + } + return pack, meta, nil +} |
