diff options
99 files changed, 4225 insertions, 22 deletions
diff --git a/cmd/index-pack/main.go b/cmd/index-pack/main.go index d90e6c33..fb6e9062 100644 --- a/cmd/index-pack/main.go +++ b/cmd/index-pack/main.go @@ -36,7 +36,7 @@ func main() { func run(repoPath, destinationPath, objectFormat string, fixThin, writeRev bool) error { var ( algo objectid.Algorithm - base objectstore.Store + base objectstore.ReadingStore repo *repository.Repository ) diff --git a/commitquery/context.go b/commitquery/context.go index 35fc47cf..3e9ceee8 100644 --- a/commitquery/context.go +++ b/commitquery/context.go @@ -10,7 +10,7 @@ import ( // Query owns the mutable node arena for commit-domain queries over one object // store. type Query struct { - store objectstore.Store + store objectstore.ReadingStore graph *commitgraphread.Reader nodes []node @@ -24,7 +24,7 @@ type Query struct { // New builds one reusable commit query arena over one object store and optional // commit-graph reader. -func New(store objectstore.Store, graph *commitgraphread.Reader) *Query { +func New(store objectstore.ReadingStore, graph *commitgraphread.Reader) *Query { return &Query{ store: store, graph: graph, diff --git a/format/packfile/ingest/api.go b/format/packfile/ingest/api.go index bb57eb48..d8a1998e 100644 --- a/format/packfile/ingest/api.go +++ b/format/packfile/ingest/api.go @@ -18,7 +18,7 @@ type Options struct { // WriteRev writes a .rev alongside the .pack and .idx. WriteRev bool // Base supplies existing objects for thin-pack fixup. - Base objectstore.Store + Base objectstore.ReadingStore // Progress receives human-readable progress messages. // // When nil, no progress output is emitted. diff --git a/internal/peel/peel.go b/internal/peel/peel.go index bad1b551..41d84e10 100644 --- a/internal/peel/peel.go +++ b/internal/peel/peel.go @@ -12,7 +12,7 @@ import ( ) // ToCommit peels annotated tags transitively until a commit is reached. -func ToCommit(store objectstore.Store, id objectid.ObjectID) (objectid.ObjectID, error) { +func ToCommit(store objectstore.ReadingStore, id objectid.ObjectID) (objectid.ObjectID, error) { for { ty, _, err := store.ReadHeader(id) if err != nil { diff --git a/internal/testgit/repo_open_object_store.go b/internal/testgit/repo_open_object_store.go index 9d65265b..a001d767 100644 --- a/internal/testgit/repo_open_object_store.go +++ b/internal/testgit/repo_open_object_store.go @@ -11,7 +11,7 @@ import ( // the caller. // //nolint:ireturn -func (testRepo *TestRepo) OpenObjectStore(tb testing.TB) objectstore.Store { +func (testRepo *TestRepo) OpenObjectStore(tb testing.TB) objectstore.ReadingStore { tb.Helper() root := testRepo.OpenGitRoot(tb) diff --git a/network/receivepack/hook.go b/network/receivepack/hook.go index a26b8dbb..32101fa6 100644 --- a/network/receivepack/hook.go +++ b/network/receivepack/hook.go @@ -35,8 +35,8 @@ type UpdateDecision struct { // valid for the duration of the hook call. type HookRequest struct { Refs refstore.ReadingStore - ExistingObjects objectstore.Store - QuarantinedObjects objectstore.Store + ExistingObjects objectstore.ReadingStore + QuarantinedObjects objectstore.ReadingStore Updates []RefUpdate PushOptions []string IO HookIO diff --git a/network/receivepack/options.go b/network/receivepack/options.go index d9bf555e..9dd57b1f 100644 --- a/network/receivepack/options.go +++ b/network/receivepack/options.go @@ -25,7 +25,7 @@ type Options struct { Refs refstore.ReadWriteStore // ExistingObjects is the object store visible to the push before any newly // uploaded quarantined objects are promoted. - ExistingObjects objectstore.Store + ExistingObjects objectstore.ReadingStore // ObjectsRoot is the permanent object storage root beneath which per-push // quarantine directories are derived. ObjectsRoot *os.Root diff --git a/network/receivepack/service/hook.go b/network/receivepack/service/hook.go index 6dadae4c..c3be2a76 100644 --- a/network/receivepack/service/hook.go +++ b/network/receivepack/service/hook.go @@ -31,8 +31,8 @@ type UpdateDecision struct { // valid for the duration of the hook call. type HookRequest struct { Refs refstore.ReadingStore - ExistingObjects objectstore.Store - QuarantinedObjects objectstore.Store + ExistingObjects objectstore.ReadingStore + QuarantinedObjects objectstore.ReadingStore Updates []RefUpdate PushOptions []string IO HookIO diff --git a/network/receivepack/service/options.go b/network/receivepack/service/options.go index c4059a4c..783afd47 100644 --- a/network/receivepack/service/options.go +++ b/network/receivepack/service/options.go @@ -26,7 +26,7 @@ type PromotedObjectPermissions struct { type Options struct { Algorithm objectid.Algorithm Refs refstore.ReadWriteStore - ExistingObjects objectstore.Store + ExistingObjects objectstore.ReadingStore ObjectsRoot *os.Root Progress io.Writer ProgressFlush func() error diff --git a/network/receivepack/service/run_hook.go b/network/receivepack/service/run_hook.go index ab7cee96..dbfb21f7 100644 --- a/network/receivepack/service/run_hook.go +++ b/network/receivepack/service/run_hook.go @@ -40,7 +40,7 @@ func (service *Service) runHook( quarantinedObjects := service.opts.ExistingObjects var ( - quarantineObjectsStore objectstore.Store + quarantineObjectsStore objectstore.ReadingStore quarantineLooseStore *loose.Store quarantinePackedStore *packed.Store quarantineLooseRoot *os.Root diff --git a/object/fetch/fetcher.go b/object/fetch/fetcher.go index b0c3d099..b3787480 100644 --- a/object/fetch/fetcher.go +++ b/object/fetch/fetcher.go @@ -6,12 +6,12 @@ import objectstore "codeberg.org/lindenii/furgit/object/store" // // A Fetcher does not take ownership of the store and does not close it. type Fetcher struct { - store objectstore.Store + store objectstore.ReadingStore } // New returns a Fetcher that reads objects from store. // // The returned Fetcher does not take ownership of store. -func New(store objectstore.Store) *Fetcher { +func New(store objectstore.ReadingStore) *Fetcher { return &Fetcher{store: store} } diff --git a/object/store/chain/bytes.go b/object/store/chain/bytes.go new file mode 100644 index 00000000..dc9b7906 --- /dev/null +++ b/object/store/chain/bytes.go @@ -0,0 +1,46 @@ +package chain + +import ( + "errors" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadBytesFull reads a full serialized object from the first backend that has it. +func (chain *Chain) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { + for i, backend := range chain.backends { + full, err := backend.ReadBytesFull(id) + if err == nil { + return full, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return nil, fmt.Errorf("objectstore: backend %d read bytes full: %w", i, err) + } + + return nil, objectstore.ErrObjectNotFound +} + +// ReadBytesContent reads an object's type and content bytes from the first backend that has it. +func (chain *Chain) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { + for i, backend := range chain.backends { + ty, content, err := backend.ReadBytesContent(id) + if err == nil { + return ty, content, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return objecttype.TypeInvalid, nil, fmt.Errorf("objectstore: backend %d read bytes content: %w", i, err) + } + + return objecttype.TypeInvalid, nil, objectstore.ErrObjectNotFound +} diff --git a/object/store/chain/chain.go b/object/store/chain/chain.go new file mode 100644 index 00000000..1fb7408a --- /dev/null +++ b/object/store/chain/chain.go @@ -0,0 +1,12 @@ +// Package chain provides a wrapper object storage backend to query a chain of +// backends. +package chain + +import objectstore "codeberg.org/lindenii/furgit/object/store" + +// Chain queries multiple object databases in order. +// +// Chain borrows its backend stores. +type Chain struct { + backends []objectstore.ReadingStore +} diff --git a/object/store/chain/close.go b/object/store/chain/close.go new file mode 100644 index 00000000..6bd74565 --- /dev/null +++ b/object/store/chain/close.go @@ -0,0 +1,8 @@ +package chain + +// Close releases wrapper-local resources. +// +// Chain borrows its backends, so Close does not close them. +// +// Repeated calls to Close are undefined behavior. +func (chain *Chain) Close() error { return nil } diff --git a/object/store/chain/header.go b/object/store/chain/header.go new file mode 100644 index 00000000..f6c92459 --- /dev/null +++ b/object/store/chain/header.go @@ -0,0 +1,28 @@ +package chain + +import ( + "errors" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadHeader reads object header data from the first backend that has it. +func (chain *Chain) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { + for i, backend := range chain.backends { + ty, size, err := backend.ReadHeader(id) + if err == nil { + return ty, size, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore: backend %d read header: %w", i, err) + } + + return objecttype.TypeInvalid, 0, objectstore.ErrObjectNotFound +} diff --git a/object/store/chain/new.go b/object/store/chain/new.go new file mode 100644 index 00000000..c78c7c98 --- /dev/null +++ b/object/store/chain/new.go @@ -0,0 +1,13 @@ +package chain + +import objectstore "codeberg.org/lindenii/furgit/object/store" + +// New creates an ordered object database chain. +// +// The provided backends must be non-nil and distinct. +// Chain borrows the provided backends and does not close them in Close. +func New(backends ...objectstore.ReadingStore) *Chain { + return &Chain{ + backends: append([]objectstore.ReadingStore(nil), backends...), + } +} diff --git a/object/store/chain/reader.go b/object/store/chain/reader.go new file mode 100644 index 00000000..3991ee9a --- /dev/null +++ b/object/store/chain/reader.go @@ -0,0 +1,47 @@ +package chain + +import ( + "errors" + "fmt" + "io" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadReaderFull reads a full serialized object stream from the first backend that has it. +func (chain *Chain) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { + for i, backend := range chain.backends { + reader, err := backend.ReadReaderFull(id) + if err == nil { + return reader, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return nil, fmt.Errorf("objectstore: backend %d read reader full: %w", i, err) + } + + return nil, objectstore.ErrObjectNotFound +} + +// ReadReaderContent reads an object's type, declared content length, and content stream from the first backend that has it. +func (chain *Chain) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { + for i, backend := range chain.backends { + ty, size, reader, err := backend.ReadReaderContent(id) + if err == nil { + return ty, size, reader, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return objecttype.TypeInvalid, 0, nil, fmt.Errorf("objectstore: backend %d read reader content: %w", i, err) + } + + return objecttype.TypeInvalid, 0, nil, objectstore.ErrObjectNotFound +} diff --git a/object/store/chain/refresh.go b/object/store/chain/refresh.go new file mode 100644 index 00000000..c47352dc --- /dev/null +++ b/object/store/chain/refresh.go @@ -0,0 +1,17 @@ +package chain + +import "errors" + +// Refresh forwards refresh calls to all backends. +func (chain *Chain) Refresh() error { + var errs []error + + for _, backend := range chain.backends { + err := backend.Refresh() + if err != nil { + errs = append(errs, err) + } + } + + return errors.Join(errs...) +} diff --git a/object/store/chain/size.go b/object/store/chain/size.go new file mode 100644 index 00000000..f0099028 --- /dev/null +++ b/object/store/chain/size.go @@ -0,0 +1,27 @@ +package chain + +import ( + "errors" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" +) + +// ReadSize reads object content length from the first backend that has it. +func (chain *Chain) ReadSize(id objectid.ObjectID) (int64, error) { + for i, backend := range chain.backends { + size, err := backend.ReadSize(id) + if err == nil { + return size, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return 0, fmt.Errorf("objectstore: backend %d read size: %w", i, err) + } + + return 0, objectstore.ErrObjectNotFound +} diff --git a/object/store/cursor.go b/object/store/cursor.go new file mode 100644 index 00000000..c6008ccd --- /dev/null +++ b/object/store/cursor.go @@ -0,0 +1,7 @@ +package objectstore + +// type Cursor any +// +// Then make all read functions accept and provide a Cursor +// nil must always be accepted and would exhibit the same behavior as right now +// Non-nil behavior is implementation-defined: e.g., pack selection diff --git a/object/store/doc.go b/object/store/doc.go new file mode 100644 index 00000000..2a9a428e --- /dev/null +++ b/object/store/doc.go @@ -0,0 +1,10 @@ +// Package objectstore provides interfaces for object storage backends. +// +// There is currently no writing-store interface because different +// object store backends have very different models for writing. +// For example, a loose object store can trivially write single loose +// objects, but writing individual objects to a packfile store would +// be extremely wasteful. +// +// At some time, we will have writing-store interfaces. +package objectstore diff --git a/object/store/errors.go b/object/store/errors.go new file mode 100644 index 00000000..3ee438f7 --- /dev/null +++ b/object/store/errors.go @@ -0,0 +1,12 @@ +package objectstore + +import "errors" + +// ErrObjectNotFound indicates that an object does not exist in a backend. +// This error MUST only be used in situations where the object store has +// no specified object ID, but no other unexpected conditions were +// encountered. In particular, it is not suitable for situations where one +// object references another (such as a tree referencing a blob) but +// the latter does not exist; these situations should use a separate +// error (TODO). +var ErrObjectNotFound = errors.New("objectstore: object not found") diff --git a/object/store/loose/helpers_test.go b/object/store/loose/helpers_test.go new file mode 100644 index 00000000..e69c7e7a --- /dev/null +++ b/object/store/loose/helpers_test.go @@ -0,0 +1,107 @@ +package loose_test + +import ( + "io" + "os" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + "codeberg.org/lindenii/furgit/object/store/loose" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +func openLooseStore(t *testing.T, testRepo *testgit.TestRepo, algo objectid.Algorithm) *loose.Store { + t.Helper() + + root := testRepo.OpenObjectsRoot(t) + + store, err := loose.New(root, algo) + if err != nil { + t.Fatalf("loose.New: %v", err) + } + + return store +} + +func mustReadAllAndClose(t *testing.T, reader io.ReadCloser) []byte { + t.Helper() + + data, err := io.ReadAll(reader) + if err != nil { + _ = reader.Close() + + t.Fatalf("ReadAll: %v", err) + } + + err = reader.Close() + if err != nil { + t.Fatalf("Close: %v", err) + } + + return data +} + +func expectedRawObject(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) (objecttype.Type, []byte, []byte) { + t.Helper() + + typeName := testRepo.Run(t, "cat-file", "-t", id.String()) + + ty, ok := objecttype.ParseName(typeName) + if !ok { + t.Fatalf("ParseName(%q) failed", typeName) + } + + body := testRepo.CatFile(t, typeName, id) + + header, ok := objectheader.Encode(ty, int64(len(body))) + if !ok { + t.Fatalf("objectheader.Encode failed") + } + + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + + return ty, body, raw +} + +func corruptLooseObjectTrailer(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) { + t.Helper() + + root := testRepo.OpenObjectsRoot(t) + + hex := id.String() + relPath := hex[:2] + "/" + hex[2:] + + file, err := root.OpenFile(relPath, os.O_RDWR, 0) + if err != nil { + t.Fatalf("OpenFile(%q): %v", relPath, err) + } + + defer func() { _ = file.Close() }() + + info, err := file.Stat() + if err != nil { + t.Fatalf("Stat(%q): %v", relPath, err) + } + + if info.Size() == 0 { + t.Fatalf("corrupt trailer on empty file %q", relPath) + } + + last := make([]byte, 1) + + _, err = file.ReadAt(last, info.Size()-1) + if err != nil { + t.Fatalf("ReadAt(%q): %v", relPath, err) + } + + last[0] ^= 0xff + + _, err = file.WriteAt(last, info.Size()-1) + if err != nil { + t.Fatalf("WriteAt(%q): %v", relPath, err) + } +} diff --git a/object/store/loose/parse.go b/object/store/loose/parse.go new file mode 100644 index 00000000..dfb420ba --- /dev/null +++ b/object/store/loose/parse.go @@ -0,0 +1,55 @@ +package loose + +import ( + "bufio" + "errors" + "io" + "os" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" + objectheader "codeberg.org/lindenii/furgit/object/header" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// decodeAll inflates the full loose object payload from file. +func decodeAll(file *os.File) ([]byte, error) { + zr, err := zlib.NewReader(file) + if err != nil { + return nil, err + } + + defer func() { _ = zr.Close() }() + + return io.ReadAll(zr) +} + +// parseRaw parses a loose object payload in "type size\0content" format. +func parseRaw(raw []byte) (objecttype.Type, []byte, error) { + ty, size, headerLen, ok := objectheader.Parse(raw) + if !ok { + return objecttype.TypeInvalid, nil, errors.New("objectstore/loose: malformed object header") + } + + content := raw[headerLen:] + if int64(len(content)) != size { + return objecttype.TypeInvalid, nil, errors.New("objectstore/loose: object header size/content mismatch") + } + + return ty, content, nil +} + +// readHeader reads and parses a loose object header from br, and returns +// the raw header bytes including the trailing NUL. +func readHeader(br *bufio.Reader) ([]byte, objecttype.Type, int64, error) { + header, err := br.ReadSlice(0) + if err != nil { + return nil, objecttype.TypeInvalid, 0, err + } + + ty, size, _, ok := objectheader.Parse(header) + if !ok { + return nil, objecttype.TypeInvalid, 0, errors.New("objectstore/loose: malformed object header") + } + + return header, ty, size, nil +} diff --git a/object/store/loose/paths.go b/object/store/loose/paths.go new file mode 100644 index 00000000..0593cc0d --- /dev/null +++ b/object/store/loose/paths.go @@ -0,0 +1,43 @@ +package loose + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" +) + +// objectPath returns the loose object path for id relative to the objects root. +func (store *Store) objectPath(id objectid.ObjectID) (string, error) { + if id.Algorithm() != store.algo { + return "", fmt.Errorf("objectstore/loose: object id algorithm mismatch: got %s want %s", id.Algorithm(), store.algo) + } + + hex := id.String() + + return filepath.Join(hex[:2], hex[2:]), nil +} + +// openObject opens the loose object file for id. +// Missing files cause objectstore.ErrObjectNotFound. +func (store *Store) openObject(id objectid.ObjectID) (*os.File, error) { + relPath, err := store.objectPath(id) + if err != nil { + return nil, err + } + + file, err := store.root.Open(relPath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil, objectstore.ErrObjectNotFound + } + + return nil, err + } + + return file, nil +} diff --git a/object/store/loose/read_bytes.go b/object/store/loose/read_bytes.go new file mode 100644 index 00000000..0b6da81b --- /dev/null +++ b/object/store/loose/read_bytes.go @@ -0,0 +1,49 @@ +package loose + +import ( + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// readBytesParsed reads, inflates, and parses a loose object in one pass. +// It returns the full raw payload and its parsed type and content. +func (store *Store) readBytesParsed(id objectid.ObjectID) ([]byte, objecttype.Type, []byte, error) { + file, err := store.openObject(id) + if err != nil { + return nil, objecttype.TypeInvalid, nil, err + } + + defer func() { _ = file.Close() }() + + raw, err := decodeAll(file) + if err != nil { + return nil, objecttype.TypeInvalid, nil, err + } + + ty, content, err := parseRaw(raw) + if err != nil { + return nil, objecttype.TypeInvalid, nil, err + } + + return raw, ty, content, nil +} + +// ReadBytesFull reads a full serialized object as "type size\0content". +func (store *Store) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { + raw, _, _, err := store.readBytesParsed(id) + if err != nil { + return nil, err + } + + return raw, nil +} + +// ReadBytesContent reads an object's type and content bytes. +func (store *Store) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { + _, ty, content, err := store.readBytesParsed(id) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + return ty, content, nil +} diff --git a/object/store/loose/read_header.go b/object/store/loose/read_header.go new file mode 100644 index 00000000..37bf40de --- /dev/null +++ b/object/store/loose/read_header.go @@ -0,0 +1,37 @@ +package loose + +import ( + "bufio" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadHeader reads an object's type and declared content length. +// +// It parses only enough of the zlib-decoded object to recover the object +// header. It does not verify that the remaining object content is readable and +// does not verify the zlib Adler-32 trailer. +func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { + file, err := store.openObject(id) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + defer func() { _ = file.Close() }() + + zr, err := zlib.NewReader(file) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + defer func() { _ = zr.Close() }() + + _, ty, size, err := readHeader(bufio.NewReader(zr)) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + return ty, size, nil +} diff --git a/object/store/loose/read_reader.go b/object/store/loose/read_reader.go new file mode 100644 index 00000000..29b71627 --- /dev/null +++ b/object/store/loose/read_reader.go @@ -0,0 +1,118 @@ +package loose + +import ( + "bufio" + "bytes" + "errors" + "io" + "os" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" + "codeberg.org/lindenii/furgit/internal/iolimit" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +type objectReader struct { + // reader is the stream exposed by Read. + reader io.Reader + // file is the underlying loose object file and is closed by Close. + file *os.File + // zr is the zlib decoder and is closed by Close. + zr io.ReadCloser +} + +func (reader *objectReader) Read(dst []byte) (int, error) { + return reader.reader.Read(dst) +} + +func (reader *objectReader) Close() error { + errZlib := reader.zr.Close() + errFile := reader.file.Close() + + return errors.Join(errZlib, errFile) +} + +// openInflated opens and zlib-decodes a loose object file. +// The caller owns both returned closers and must close them. +func (store *Store) openInflated(id objectid.ObjectID) (*os.File, io.ReadCloser, error) { + file, err := store.openObject(id) + if err != nil { + return nil, nil, err + } + + zr, err := zlib.NewReader(file) + if err != nil { + _ = file.Close() + + return nil, nil, err + } + + return file, zr, nil +} + +// ReadReaderFull reads a full serialized object stream as "type size\0content". +// +// The caller must close the returned reader. +// +// Close releases resources only. It does not drain unread data for additional +// validation. In particular, malformed trailing compressed data, trailing bytes +// past the declared object size, and the zlib Adler-32 trailer may go +// unverified unless the caller reads to io.EOF. +func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { + file, zr, err := store.openInflated(id) + if err != nil { + return nil, err + } + + br := bufio.NewReader(zr) + + header, _, size, err := readHeader(br) + if err != nil { + _ = zr.Close() + _ = file.Close() + + return nil, err + } + + return &objectReader{ + reader: io.MultiReader( + bytes.NewReader(header), + iolimit.ExpectLengthReader(br, size), + ), + file: file, + zr: zr, + }, nil +} + +// ReadReaderContent reads an object's type, declared content length, and +// content stream. +// +// The caller must close the returned reader. +// +// Close releases resources only. It does not drain unread data for additional +// validation. In particular, malformed trailing compressed data, trailing bytes +// past the declared object size, and the zlib Adler-32 trailer may go +// unverified unless the caller reads to io.EOF. +func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { + file, zr, err := store.openInflated(id) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + br := bufio.NewReader(zr) + + _, ty, size, err := readHeader(br) + if err != nil { + _ = zr.Close() + _ = file.Close() + + return objecttype.TypeInvalid, 0, nil, err + } + + return ty, size, &objectReader{ + reader: iolimit.ExpectLengthReader(br, size), + file: file, + zr: zr, + }, nil +} diff --git a/object/store/loose/read_size.go b/object/store/loose/read_size.go new file mode 100644 index 00000000..2ececc49 --- /dev/null +++ b/object/store/loose/read_size.go @@ -0,0 +1,13 @@ +package loose + +import objectid "codeberg.org/lindenii/furgit/object/id" + +// ReadSize reads an object's declared content length. +// +// Like ReadHeader, it parses only enough of the zlib-decoded object to recover +// the header and does not verify the zlib Adler-32 trailer. +func (store *Store) ReadSize(id objectid.ObjectID) (int64, error) { + _, size, err := store.ReadHeader(id) + + return size, err +} diff --git a/object/store/loose/read_test.go b/object/store/loose/read_test.go new file mode 100644 index 00000000..fcb4fe17 --- /dev/null +++ b/object/store/loose/read_test.go @@ -0,0 +1,212 @@ +package loose_test + +import ( + "bytes" + "errors" + "os" + "strings" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + "codeberg.org/lindenii/furgit/object/store/loose" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +func TestLooseStoreReadAgainstGit(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + blobID := testRepo.HashObject(t, "blob", []byte("blob body\n")) + _, treeID, commitID := testRepo.MakeCommit(t, "subject\n\nbody") + tagID := testRepo.TagAnnotated(t, "v1", commitID, "tag message") + + store := openLooseStore(t, testRepo, algo) + + tests := []struct { + name string + id objectid.ObjectID + }{ + {name: "blob", id: blobID}, + {name: "tree", id: treeID}, + {name: "commit", id: commitID}, + {name: "tag", id: tagID}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + wantType, wantBody, wantRaw := expectedRawObject(t, testRepo, tt.id) + + gotRaw, err := store.ReadBytesFull(tt.id) + if err != nil { + t.Fatalf("ReadBytesFull: %v", err) + } + + if !bytes.Equal(gotRaw, wantRaw) { + t.Fatalf("ReadBytesFull mismatch") + } + + gotType, gotBody, err := store.ReadBytesContent(tt.id) + if err != nil { + t.Fatalf("ReadBytesContent: %v", err) + } + + if gotType != wantType { + t.Fatalf("ReadBytesContent type = %v, want %v", gotType, wantType) + } + + if !bytes.Equal(gotBody, wantBody) { + t.Fatalf("ReadBytesContent body mismatch") + } + + headType, headSize, err := store.ReadHeader(tt.id) + if err != nil { + t.Fatalf("ReadHeader: %v", err) + } + + if headType != wantType { + t.Fatalf("ReadHeader type = %v, want %v", headType, wantType) + } + + if headSize != int64(len(wantBody)) { + t.Fatalf("ReadHeader size = %d, want %d", headSize, len(wantBody)) + } + + fullReader, err := store.ReadReaderFull(tt.id) + if err != nil { + t.Fatalf("ReadReaderFull: %v", err) + } + + got := mustReadAllAndClose(t, fullReader) + if !bytes.Equal(got, wantRaw) { + t.Fatalf("ReadReaderFull stream mismatch") + } + + contentType, contentSize, contentReader, err := store.ReadReaderContent(tt.id) + if err != nil { + t.Fatalf("ReadReaderContent: %v", err) + } + + if contentType != wantType { + t.Fatalf("ReadReaderContent type = %v, want %v", contentType, wantType) + } + + if contentSize != int64(len(wantBody)) { + t.Fatalf("ReadReaderContent size = %d, want %d", contentSize, len(wantBody)) + } + + got = mustReadAllAndClose(t, contentReader) + if !bytes.Equal(got, wantBody) { + t.Fatalf("ReadReaderContent stream mismatch") + } + }) + } + }) +} + +func TestLooseStoreErrors(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + notFoundID, err := objectid.ParseHex(algo, strings.Repeat("0", algo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(notFoundID): %v", err) + } + + _, err = store.ReadBytesFull(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesFull not-found error = %v", err) + } + + _, _, err = store.ReadBytesContent(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesContent not-found error = %v", err) + } + + _, err = store.ReadReaderFull(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderFull not-found error = %v", err) + } + + _, _, _, err = store.ReadReaderContent(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderContent not-found error = %v", err) + } + + _, _, err = store.ReadHeader(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadHeader not-found error = %v", err) + } + + var otherAlgo objectid.Algorithm + if algo == objectid.AlgorithmSHA1 { + otherAlgo = objectid.AlgorithmSHA256 + } else { + otherAlgo = objectid.AlgorithmSHA1 + } + + otherID, err := objectid.ParseHex(otherAlgo, strings.Repeat("1", otherAlgo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(otherID): %v", err) + } + + _, err = store.ReadBytesFull(otherID) + if err == nil || !strings.Contains(err.Error(), "algorithm mismatch") { + t.Fatalf("ReadBytesFull algorithm-mismatch error = %v", err) + } + }) +} + +func TestLooseStoreNewValidation(t *testing.T) { + t.Parallel() + + root, err := os.OpenRoot(t.TempDir()) + if err != nil { + t.Fatalf("OpenRoot: %v", err) + } + + defer func() { _ = root.Close() }() + + _, err = loose.New(root, objectid.AlgorithmUnknown) + if err == nil { + t.Fatalf("loose.New(root, unknown) expected error") + } +} + +func TestLooseStoreReadHeaderDoesNotVerifyAdler32(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + content := []byte("header-only-check\n") + + id, err := store.WriteBytesContent(objecttype.TypeBlob, content) + if err != nil { + t.Fatalf("WriteBytesContent: %v", err) + } + + corruptLooseObjectTrailer(t, testRepo, id) + + ty, size, err := store.ReadHeader(id) + if err != nil { + t.Fatalf("ReadHeader: %v", err) + } + + if ty != objecttype.TypeBlob { + t.Fatalf("ReadHeader type = %v, want %v", ty, objecttype.TypeBlob) + } + + if size != int64(len(content)) { + t.Fatalf("ReadHeader size = %d, want %d", size, len(content)) + } + + _, err = store.ReadBytesFull(id) + if err == nil { + t.Fatalf("ReadBytesFull on corrupted trailer succeeded") + } + }) +} diff --git a/object/store/loose/refresh.go b/object/store/loose/refresh.go new file mode 100644 index 00000000..b720ebc6 --- /dev/null +++ b/object/store/loose/refresh.go @@ -0,0 +1,6 @@ +package loose + +// Refresh is a no-op for loose object stores. +func (store *Store) Refresh() error { + return nil +} diff --git a/object/store/loose/store.go b/object/store/loose/store.go new file mode 100644 index 00000000..d8eba84e --- /dev/null +++ b/object/store/loose/store.go @@ -0,0 +1,41 @@ +// Package loose provides a loose object backend (objects/XX/YYYYY..). +package loose + +import ( + "os" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// Store reads loose Git objects from an objects directory root. +// +// Loose objects are zlib streams whose trailer uses Adler-32. Which reads +// consume enough of the stream to reach and verify that trailer is documented +// on the individual methods. +type Store struct { + // root is the objects directory capability used for all object file access. + // Object files are opened by relative paths like "<first2>/<rest>". + // Store borrows this root. + root *os.Root + // algo is the expected object ID algorithm for lookups. + algo objectid.Algorithm +} + +// New creates a loose-object store rooted at an objects directory for algo. +func New(root *os.Root, algo objectid.Algorithm) (*Store, error) { + if algo.Size() == 0 { + return nil, objectid.ErrInvalidAlgorithm + } + + return &Store{ + root: root, + algo: algo, + }, nil +} + +// Close releases resources associated with the backend. +// +// Store borrows its root, so Close does not close it. +// +// Repeated calls to Close are undefined behavior. +func (store *Store) Close() error { return nil } diff --git a/object/store/loose/write_bytes.go b/object/store/loose/write_bytes.go new file mode 100644 index 00000000..ffc65117 --- /dev/null +++ b/object/store/loose/write_bytes.go @@ -0,0 +1,18 @@ +package loose + +import ( + "bytes" + + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// WriteBytesFull writes a full serialized object as "type size\0content". +func (store *Store) WriteBytesFull(raw []byte) (objectid.ObjectID, error) { + return store.WriteReaderFull(bytes.NewReader(raw)) +} + +// WriteBytesContent writes typed content bytes as a loose object. +func (store *Store) WriteBytesContent(ty objecttype.Type, content []byte) (objectid.ObjectID, error) { + return store.WriteReaderContent(ty, int64(len(content)), bytes.NewReader(content)) +} diff --git a/object/store/loose/write_reader.go b/object/store/loose/write_reader.go new file mode 100644 index 00000000..f686f279 --- /dev/null +++ b/object/store/loose/write_reader.go @@ -0,0 +1,81 @@ +package loose + +import ( + "fmt" + "io" + + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// WriteReaderContent writes one loose object from typed content bytes read from src. +// src must provide exactly size bytes. +// size is required because loose object headers are "type size\0content", so the +// header must be emitted before streaming content without buffering. +func (store *Store) WriteReaderContent(ty objecttype.Type, size int64, src io.Reader) (objectid.ObjectID, error) { + if size < 0 { + return objectid.ObjectID{}, fmt.Errorf("objectstore/loose: negative content size: %d", size) + } + + header, ok := objectheader.Encode(ty, size) + if !ok { + return objectid.ObjectID{}, fmt.Errorf("objectstore/loose: failed to encode object header for type %v", ty) + } + + writer, err := store.newStreamWriter(false) + if err != nil { + return objectid.ObjectID{}, err + } + + writer.headerDone = true + writer.expectedContentLeft = size + + err = writer.writeRawChunk(header) + if err != nil { + _ = writer.Close() + _ = store.root.Remove(writer.tmpRelPath) + + return objectid.ObjectID{}, err + } + + return writeReaderIntoStreamWriter(writer, src) +} + +// WriteReaderFull writes one loose object from raw bytes "type size\0content" +// read from src. +func (store *Store) WriteReaderFull(src io.Reader) (objectid.ObjectID, error) { + writer, err := store.newStreamWriter(true) + if err != nil { + return objectid.ObjectID{}, err + } + + return writeReaderIntoStreamWriter(writer, src) +} + +// writeReaderIntoStreamWriter copies src into writer and publishes the object. +func writeReaderIntoStreamWriter(writer *streamWriter, src io.Reader) (objectid.ObjectID, error) { + _, err := io.Copy(writer, src) + if err != nil { + _ = writer.Close() + _ = writer.store.root.Remove(writer.tmpRelPath) + + return objectid.ObjectID{}, err + } + + err = writer.Close() + if err != nil { + _ = writer.store.root.Remove(writer.tmpRelPath) + + return objectid.ObjectID{}, err + } + + id, err := writer.finalize() + if err != nil { + _ = writer.store.root.Remove(writer.tmpRelPath) + + return objectid.ObjectID{}, err + } + + return id, nil +} diff --git a/object/store/loose/write_temp_object_file.go b/object/store/loose/write_temp_object_file.go new file mode 100644 index 00000000..1a78db48 --- /dev/null +++ b/object/store/loose/write_temp_object_file.go @@ -0,0 +1,30 @@ +package loose + +import ( + "crypto/rand" + "errors" + "io/fs" + "os" + "path/filepath" +) + +// createTempObjectFile creates a unique temporary object file within dir. +// The returned path is relative to the objects root. +func (store *Store) createTempObjectFile(dir string) (string, *os.File, error) { + for range 16 { + relPath := filepath.Join(dir, tempObjectFilePrefix+rand.Text()) + + file, err := store.root.OpenFile(relPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644) + if err == nil { + return relPath, file, nil + } + + if errors.Is(err, fs.ErrExist) { + continue + } + + return "", nil, err + } + + return "", nil, errors.New("objectstore/loose: failed to create temporary object file") +} diff --git a/object/store/loose/write_test.go b/object/store/loose/write_test.go new file mode 100644 index 00000000..30d8dbdb --- /dev/null +++ b/object/store/loose/write_test.go @@ -0,0 +1,137 @@ +package loose_test + +import ( + "bytes" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +func TestLooseStoreWriteReaderContentAgainstGit(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + content := []byte("written-by-content-reader\n") + expectedHex := testRepo.RunInput(t, content, "hash-object", "-t", "blob", "--stdin") + + expectedID, err := objectid.ParseHex(algo, expectedHex) + if err != nil { + t.Fatalf("ParseHex(expected): %v", err) + } + + writtenID, err := store.WriteReaderContent(objecttype.TypeBlob, int64(len(content)), bytes.NewReader(content)) + if err != nil { + t.Fatalf("WriteReaderContent: %v", err) + } + + if writtenID != expectedID { + t.Fatalf("WriteReaderContent id = %s, want %s", writtenID, expectedID) + } + + gotBody := testRepo.CatFile(t, "blob", writtenID) + if !bytes.Equal(gotBody, content) { + t.Fatalf("git cat-file body mismatch") + } + + // Writing the same object again should succeed and return the same ID. + writtenID2, err := store.WriteReaderContent(objecttype.TypeBlob, int64(len(content)), bytes.NewReader(content)) + if err != nil { + t.Fatalf("WriteReaderContent second: %v", err) + } + + if writtenID2 != expectedID { + t.Fatalf("WriteReaderContent second id = %s, want %s", writtenID2, expectedID) + } + }) +} + +func TestLooseStoreWriteReaderFullAgainstGit(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + body := []byte("full-reader-body\n") + + header, ok := objectheader.Encode(objecttype.TypeBlob, int64(len(body))) + if !ok { + t.Fatalf("objectheader.Encode failed") + } + + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + + wantID := algo.Sum(raw) + + gotID, err := store.WriteReaderFull(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("WriteReaderFull: %v", err) + } + + if gotID != wantID { + t.Fatalf("WriteReaderFull id = %s, want %s", gotID, wantID) + } + + gotBody := testRepo.CatFile(t, "blob", gotID) + if !bytes.Equal(gotBody, body) { + t.Fatalf("git cat-file body mismatch") + } + }) +} + +func TestLooseStoreReaderValidationErrors(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + t.Run("content overflow", func(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + _, err := store.WriteReaderContent(objecttype.TypeBlob, 1, bytes.NewReader([]byte("hello"))) + if err == nil { + t.Fatalf("expected error after overflow") + } + }) + + t.Run("content short", func(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + _, err := store.WriteReaderContent(objecttype.TypeBlob, 5, bytes.NewReader([]byte("x"))) + if err == nil { + t.Fatalf("expected error for short content") + } + }) + + t.Run("full malformed header", func(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + _, err := store.WriteReaderFull(bytes.NewReader([]byte("not-a-header"))) + if err == nil { + t.Fatalf("expected error for malformed header") + } + }) + + t.Run("full size mismatch", func(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + raw := []byte("blob 1\x00hello") + + _, err := store.WriteReaderFull(bytes.NewReader(raw)) + if err == nil { + t.Fatalf("expected error after mismatch") + } + }) + }) +} diff --git a/object/store/loose/write_writer.go b/object/store/loose/write_writer.go new file mode 100644 index 00000000..0d6b5b80 --- /dev/null +++ b/object/store/loose/write_writer.go @@ -0,0 +1,94 @@ +package loose + +import ( + "errors" + "hash" + "os" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" +) + +const tempObjectFilePrefix = "tmp_obj_" + +// streamWriter incrementally hashes and deflates an object into a temp file. +// Finalize validates size accounting and atomically renames the temp file. +type streamWriter struct { + // store owns path and root operations used by this write session. + store *Store + // file is the temporary destination file under objects/. + file *os.File + // zw compresses raw object bytes into file. + zw *zlib.Writer + // hash receives the same raw bytes used to compute the resulting object ID. + hash hash.Hash + + // tmpRelPath is the relative path of file under the objects root. + tmpRelPath string + + // fullMode selects full-object input ("type size\0content") as opposed to content-only input. + fullMode bool + + // headerBuf accumulates header bytes while fullMode parses up to the first NUL. + headerBuf []byte + // headerDone reports whether the full-object header has been parsed. + headerDone bool + // expectedContentLeft tracks remaining declared content bytes. + expectedContentLeft int64 + + closed bool + finalized bool +} + +// newStreamWriter creates a stream writer with a temp file rooted in objects/. +func (store *Store) newStreamWriter(fullMode bool) (*streamWriter, error) { + hashFn, err := store.algo.New() + if err != nil { + return nil, err + } + + tmpRelPath, file, err := store.createTempObjectFile(".") + if err != nil { + return nil, err + } + + return &streamWriter{ + store: store, + file: file, + zw: zlib.NewWriter(file), + hash: hashFn, + tmpRelPath: tmpRelPath, + fullMode: fullMode, + headerBuf: make([]byte, 0, 64), + }, nil +} + +// Write validates and writes raw bytes into the stream. +// In full mode, it parses and enforces the streamed header-declared content size. +func (writer *streamWriter) Write(src []byte) (int, error) { + if writer.finalized { + return 0, errors.New("objectstore/loose: write after finalize") + } + + if writer.closed { + return 0, errors.New("objectstore/loose: write after close") + } + + if writer.fullMode { + err := writer.acceptFull(src) + if err != nil { + return 0, err + } + } else { + err := writer.acceptContent(int64(len(src))) + if err != nil { + return 0, err + } + } + + err := writer.writeRawChunk(src) + if err != nil { + return 0, err + } + + return len(src), nil +} diff --git a/object/store/loose/write_writer_accept.go b/object/store/loose/write_writer_accept.go new file mode 100644 index 00000000..bf55966a --- /dev/null +++ b/object/store/loose/write_writer_accept.go @@ -0,0 +1,61 @@ +package loose + +import ( + "bytes" + "errors" + + objectheader "codeberg.org/lindenii/furgit/object/header" +) + +// acceptFull validates and accounts raw full-object input. +func (writer *streamWriter) acceptFull(src []byte) error { + if !writer.headerDone { + nul := bytes.IndexByte(src, 0) + if nul >= 0 { + headerChunkLen := nul + 1 + writer.headerBuf = append(writer.headerBuf, src[:headerChunkLen]...) + + _, size, _, ok := objectheader.Parse(writer.headerBuf) + if !ok { + return errors.New("objectstore/loose: malformed object header") + } + + writer.headerDone = true + writer.expectedContentLeft = size + + return writer.acceptContent(int64(len(src) - headerChunkLen)) + } + + writer.headerBuf = append(writer.headerBuf, src...) + + return nil + } + + return writer.acceptContent(int64(len(src))) +} + +// acceptContent validates and accounts content byte counts. +func (writer *streamWriter) acceptContent(n int64) error { + if n > writer.expectedContentLeft { + return errors.New("objectstore/loose: object content exceeds declared size") + } + + writer.expectedContentLeft -= n + + return nil +} + +// writeRawChunk forwards raw bytes to the hash and deflate pipeline. +func (writer *streamWriter) writeRawChunk(src []byte) error { + _, err := writer.hash.Write(src) + if err != nil { + return err + } + + _, err = writer.zw.Write(src) + if err != nil { + return err + } + + return nil +} diff --git a/object/store/loose/write_writer_finalize.go b/object/store/loose/write_writer_finalize.go new file mode 100644 index 00000000..f8dee54d --- /dev/null +++ b/object/store/loose/write_writer_finalize.go @@ -0,0 +1,90 @@ +package loose + +import ( + "errors" + "io/fs" + "path/filepath" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// Close flushes and closes the underlying zlib stream and temp file. +// +// Repeated calls to Close are undefined behavior. +func (writer *streamWriter) Close() error { + errZlib := writer.zw.Close() + errSync := writer.file.Sync() + errFile := writer.file.Close() + + writer.closed = true + writer.file = nil + + return errors.Join(errZlib, errSync, errFile) +} + +// finalize validates write completeness and atomically publishes the object. +// Publication is no-clobber: it links tmpRelPath to the object path and treats +// existing destination objects as success. +func (writer *streamWriter) finalize() (objectid.ObjectID, error) { + writer.finalized = true + + var zero objectid.ObjectID + + if !writer.closed { + err := writer.Close() + if err != nil { + return zero, err + } + } + + if writer.fullMode && !writer.headerDone { + return zero, errors.New("objectstore/loose: missing full object header") + } + + if writer.expectedContentLeft != 0 { + return zero, errors.New("objectstore/loose: object content shorter than declared size") + } + + idBytes := writer.hash.Sum(nil) + + id, err := objectid.FromBytes(writer.store.algo, idBytes) + if err != nil { + return zero, err + } + + relPath, err := writer.store.objectPath(id) + if err != nil { + return zero, err + } + + dir := filepath.Dir(relPath) + + err = writer.store.root.MkdirAll(dir, 0o755) + if err != nil { + return zero, err + } + + cleanup := true + + defer func() { + if cleanup { + _ = writer.store.root.Remove(writer.tmpRelPath) + } + }() + + err = writer.store.root.Link(writer.tmpRelPath, relPath) + if err != nil { + if errors.Is(err, fs.ErrExist) { + cleanup = false + _ = writer.store.root.Remove(writer.tmpRelPath) + + return id, nil + } + + return zero, err + } + + cleanup = false + + return id, nil +} diff --git a/object/store/memory/add.go b/object/store/memory/add.go new file mode 100644 index 00000000..3b27f52d --- /dev/null +++ b/object/store/memory/add.go @@ -0,0 +1,21 @@ +package memory + +import ( + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// AddObject stores one object body and returns its object ID. +func (store *Store) AddObject(ty objecttype.Type, body []byte) objectid.ObjectID { + header, ok := objectheader.Encode(ty, int64(len(body))) + if !ok { + panic("failed to encode object header") + } + + raw := append(append([]byte(nil), header...), body...) + id := store.algo.Sum(raw) + store.objects[id] = storedObject{ty: ty, content: append([]byte(nil), body...)} + + return id +} diff --git a/object/store/memory/algorithm.go b/object/store/memory/algorithm.go new file mode 100644 index 00000000..bf7f3a82 --- /dev/null +++ b/object/store/memory/algorithm.go @@ -0,0 +1,8 @@ +package memory + +import objectid "codeberg.org/lindenii/furgit/object/id" + +// Algorithm returns the object ID algorithm used by the store. +func (store *Store) Algorithm() objectid.Algorithm { + return store.algo +} diff --git a/object/store/memory/doc.go b/object/store/memory/doc.go new file mode 100644 index 00000000..cb40d466 --- /dev/null +++ b/object/store/memory/doc.go @@ -0,0 +1,2 @@ +// Package memory provides one in-memory object store. +package memory diff --git a/object/store/memory/object.go b/object/store/memory/object.go new file mode 100644 index 00000000..a85175c7 --- /dev/null +++ b/object/store/memory/object.go @@ -0,0 +1,9 @@ +package memory + +import objecttype "codeberg.org/lindenii/furgit/object/type" + +// storedObject is one in-memory object entry. +type storedObject struct { + ty objecttype.Type + content []byte +} diff --git a/object/store/memory/read_bytes.go b/object/store/memory/read_bytes.go new file mode 100644 index 00000000..48d3694a --- /dev/null +++ b/object/store/memory/read_bytes.go @@ -0,0 +1,37 @@ +package memory + +import ( + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadBytesFull reads one full object, including the object header. +func (store *Store) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { + obj, ok := store.objects[id] + if !ok { + return nil, objectstore.ErrObjectNotFound + } + + header, ok := objectheader.Encode(obj.ty, int64(len(obj.content))) + if !ok { + panic("failed to encode object header") + } + + raw := make([]byte, len(header)+len(obj.content)) + copy(raw, header) + copy(raw[len(header):], obj.content) + + return raw, nil +} + +// ReadBytesContent reads one object body. +func (store *Store) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { + obj, ok := store.objects[id] + if !ok { + return objecttype.TypeInvalid, nil, objectstore.ErrObjectNotFound + } + + return obj.ty, append([]byte(nil), obj.content...), nil +} diff --git a/object/store/memory/read_header.go b/object/store/memory/read_header.go new file mode 100644 index 00000000..da3acd1c --- /dev/null +++ b/object/store/memory/read_header.go @@ -0,0 +1,17 @@ +package memory + +import ( + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadHeader reads one object header. +func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { + obj, ok := store.objects[id] + if !ok { + return objecttype.TypeInvalid, 0, objectstore.ErrObjectNotFound + } + + return obj.ty, int64(len(obj.content)), nil +} diff --git a/object/store/memory/read_reader.go b/object/store/memory/read_reader.go new file mode 100644 index 00000000..425c3034 --- /dev/null +++ b/object/store/memory/read_reader.go @@ -0,0 +1,29 @@ +package memory + +import ( + "bytes" + "io" + + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadReaderFull reads one full object through a reader. +func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { + raw, err := store.ReadBytesFull(id) + if err != nil { + return nil, err + } + + return io.NopCloser(bytes.NewReader(raw)), nil +} + +// ReadReaderContent reads one object body through a reader. +func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { + ty, content, err := store.ReadBytesContent(id) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + return ty, int64(len(content)), io.NopCloser(bytes.NewReader(content)), nil +} diff --git a/object/store/memory/read_size.go b/object/store/memory/read_size.go new file mode 100644 index 00000000..7045bd61 --- /dev/null +++ b/object/store/memory/read_size.go @@ -0,0 +1,13 @@ +package memory + +import objectid "codeberg.org/lindenii/furgit/object/id" + +// ReadSize reads one object size. +func (store *Store) ReadSize(id objectid.ObjectID) (int64, error) { + _, size, err := store.ReadHeader(id) + if err != nil { + return 0, err + } + + return size, nil +} diff --git a/object/store/memory/refresh.go b/object/store/memory/refresh.go new file mode 100644 index 00000000..1e18eef3 --- /dev/null +++ b/object/store/memory/refresh.go @@ -0,0 +1,6 @@ +package memory + +// Refresh is a no-op for in-memory object stores. +func (store *Store) Refresh() error { + return nil +} diff --git a/object/store/memory/store.go b/object/store/memory/store.go new file mode 100644 index 00000000..8f212c38 --- /dev/null +++ b/object/store/memory/store.go @@ -0,0 +1,24 @@ +package memory + +import ( + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// Store is one in-memory object store. +type Store struct { + algo objectid.Algorithm + objects map[objectid.ObjectID]storedObject +} + +// New builds one empty in-memory store for one object format. +func New(algo objectid.Algorithm) *Store { + return &Store{ + algo: algo, + objects: make(map[objectid.ObjectID]storedObject), + } +} + +// Close closes the in-memory store. +func (store *Store) Close() error { + return nil +} diff --git a/object/store/mix/bytes.go b/object/store/mix/bytes.go new file mode 100644 index 00000000..5b62ff06 --- /dev/null +++ b/object/store/mix/bytes.go @@ -0,0 +1,51 @@ +package mix + +import ( + "errors" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadBytesFull reads a full serialized object from one backend that has it. +func (mix *Mix) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { + for i, backend := 0, mix.firstBackend(); backend != nil; i, backend = i+1, mix.nextBackend(backend) { + full, err := backend.ReadBytesFull(id) + if err == nil { + mix.touchBackend(backend) + + return full, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return nil, fmt.Errorf("objectstore: backend %d read bytes full: %w", i, err) + } + + return nil, objectstore.ErrObjectNotFound +} + +// ReadBytesContent reads an object's type and content bytes from one backend +// that has it. +func (mix *Mix) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { + for i, backend := 0, mix.firstBackend(); backend != nil; i, backend = i+1, mix.nextBackend(backend) { + ty, content, err := backend.ReadBytesContent(id) + if err == nil { + mix.touchBackend(backend) + + return ty, content, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return objecttype.TypeInvalid, nil, fmt.Errorf("objectstore: backend %d read bytes content: %w", i, err) + } + + return objecttype.TypeInvalid, nil, objectstore.ErrObjectNotFound +} diff --git a/object/store/mix/close.go b/object/store/mix/close.go new file mode 100644 index 00000000..53f6cd30 --- /dev/null +++ b/object/store/mix/close.go @@ -0,0 +1,8 @@ +package mix + +// Close releases wrapper-local resources. +// +// Mix borrows its backends, so Close does not close them. +// +// Repeated calls to Close are undefined behavior. +func (mix *Mix) Close() error { return nil } diff --git a/object/store/mix/header.go b/object/store/mix/header.go new file mode 100644 index 00000000..d57375ec --- /dev/null +++ b/object/store/mix/header.go @@ -0,0 +1,30 @@ +package mix + +import ( + "errors" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadHeader reads object header data from one backend that has it. +func (mix *Mix) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { + for i, backend := 0, mix.firstBackend(); backend != nil; i, backend = i+1, mix.nextBackend(backend) { + ty, size, err := backend.ReadHeader(id) + if err == nil { + mix.touchBackend(backend) + + return ty, size, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore: backend %d read header: %w", i, err) + } + + return objecttype.TypeInvalid, 0, objectstore.ErrObjectNotFound +} diff --git a/object/store/mix/mix.go b/object/store/mix/mix.go new file mode 100644 index 00000000..1e57329d --- /dev/null +++ b/object/store/mix/mix.go @@ -0,0 +1,20 @@ +// Package mix provides an adaptive wrapper over multiple object storage +// backends. +package mix + +import ( + "sync" + + objectstore "codeberg.org/lindenii/furgit/object/store" +) + +// Mix queries multiple object databases with an MRU backend preference. +// +// Mix borrows its backend stores. +type Mix struct { + mu sync.RWMutex + + backendHead *backendNode + backendTail *backendNode + backendNodeByStore map[objectstore.ReadingStore]*backendNode +} diff --git a/object/store/mix/mru.go b/object/store/mix/mru.go new file mode 100644 index 00000000..c8a58d2d --- /dev/null +++ b/object/store/mix/mru.go @@ -0,0 +1,74 @@ +package mix + +import objectstore "codeberg.org/lindenii/furgit/object/store" + +type backendNode struct { + backend objectstore.ReadingStore + prev *backendNode + next *backendNode +} + +//nolint:ireturn +func (mix *Mix) firstBackend() objectstore.ReadingStore { + mix.mu.RLock() + defer mix.mu.RUnlock() + + if mix.backendHead == nil { + return nil + } + + return mix.backendHead.backend +} + +//nolint:ireturn +func (mix *Mix) nextBackend(current objectstore.ReadingStore) objectstore.ReadingStore { + mix.mu.RLock() + defer mix.mu.RUnlock() + + node := mix.backendNodeByStore[current] + if node == nil || node.next == nil { + return nil + } + + return node.next.backend +} + +func (mix *Mix) touchBackend(backend objectstore.ReadingStore) { + if backend == nil { + return + } + + if !mix.mu.TryLock() { + return + } + defer mix.mu.Unlock() + + node := mix.backendNodeByStore[backend] + if node == nil || node == mix.backendHead { + return + } + + if node.prev != nil { + node.prev.next = node.next + } + + if node.next != nil { + node.next.prev = node.prev + } + + if mix.backendTail == node { + mix.backendTail = node.prev + } + + node.prev = nil + + node.next = mix.backendHead + if mix.backendHead != nil { + mix.backendHead.prev = node + } + + mix.backendHead = node + if mix.backendTail == nil { + mix.backendTail = node + } +} diff --git a/object/store/mix/new.go b/object/store/mix/new.go new file mode 100644 index 00000000..7bd3235f --- /dev/null +++ b/object/store/mix/new.go @@ -0,0 +1,39 @@ +package mix + +import objectstore "codeberg.org/lindenii/furgit/object/store" + +// New creates a Mix from backends. +// +// The provided backends must be non-nil and distinct. +// Mix borrows the provided backends and does not close them in Close. +func New(backends ...objectstore.ReadingStore) *Mix { + nodeByStore := make(map[objectstore.ReadingStore]*backendNode, len(backends)) + + var ( + head *backendNode + tail *backendNode + ) + + for _, backend := range backends { + node := &backendNode{ + backend: backend, + prev: tail, + } + if tail != nil { + tail.next = node + } + + if head == nil { + head = node + } + + tail = node + nodeByStore[backend] = node + } + + return &Mix{ + backendHead: head, + backendTail: tail, + backendNodeByStore: nodeByStore, + } +} diff --git a/object/store/mix/reader.go b/object/store/mix/reader.go new file mode 100644 index 00000000..8d515c50 --- /dev/null +++ b/object/store/mix/reader.go @@ -0,0 +1,53 @@ +package mix + +import ( + "errors" + "fmt" + "io" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadReaderFull reads a full serialized object stream from one backend that +// has it. +func (mix *Mix) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { + for i, backend := 0, mix.firstBackend(); backend != nil; i, backend = i+1, mix.nextBackend(backend) { + reader, err := backend.ReadReaderFull(id) + if err == nil { + mix.touchBackend(backend) + + return reader, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return nil, fmt.Errorf("objectstore: backend %d read reader full: %w", i, err) + } + + return nil, objectstore.ErrObjectNotFound +} + +// ReadReaderContent reads an object's type, declared content length, and +// content stream from one backend that has it. +func (mix *Mix) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { + for i, backend := 0, mix.firstBackend(); backend != nil; i, backend = i+1, mix.nextBackend(backend) { + ty, size, reader, err := backend.ReadReaderContent(id) + if err == nil { + mix.touchBackend(backend) + + return ty, size, reader, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return objecttype.TypeInvalid, 0, nil, fmt.Errorf("objectstore: backend %d read reader content: %w", i, err) + } + + return objecttype.TypeInvalid, 0, nil, objectstore.ErrObjectNotFound +} diff --git a/object/store/mix/refresh.go b/object/store/mix/refresh.go new file mode 100644 index 00000000..eb9c2bf5 --- /dev/null +++ b/object/store/mix/refresh.go @@ -0,0 +1,30 @@ +package mix + +import ( + "errors" + + objectstore "codeberg.org/lindenii/furgit/object/store" +) + +// Refresh forwards refresh calls to refresh-capable backends. +func (mix *Mix) Refresh() error { + mix.mu.RLock() + + backends := make([]objectstore.ReadingStore, 0, len(mix.backendNodeByStore)) + for node := mix.backendHead; node != nil; node = node.next { + backends = append(backends, node.backend) + } + + mix.mu.RUnlock() + + var errs []error + + for _, backend := range backends { + err := backend.Refresh() + if err != nil { + errs = append(errs, err) + } + } + + return errors.Join(errs...) +} diff --git a/object/store/mix/size.go b/object/store/mix/size.go new file mode 100644 index 00000000..4feb142e --- /dev/null +++ b/object/store/mix/size.go @@ -0,0 +1,29 @@ +package mix + +import ( + "errors" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" +) + +// ReadSize reads object content length from one backend that has it. +func (mix *Mix) ReadSize(id objectid.ObjectID) (int64, error) { + for i, backend := 0, mix.firstBackend(); backend != nil; i, backend = i+1, mix.nextBackend(backend) { + size, err := backend.ReadSize(id) + if err == nil { + mix.touchBackend(backend) + + return size, nil + } + + if errors.Is(err, objectstore.ErrObjectNotFound) { + continue + } + + return 0, fmt.Errorf("objectstore: backend %d read size: %w", i, err) + } + + return 0, objectstore.ErrObjectNotFound +} diff --git a/object/store/packed/TODO b/object/store/packed/TODO new file mode 100644 index 00000000..f4a5f48e --- /dev/null +++ b/object/store/packed/TODO @@ -0,0 +1,3 @@ +* Per delta-plan memo map +* Internal handle/request context (might expose it externally later and add to global interface) +* Audit on mutex diff --git a/object/store/packed/close.go b/object/store/packed/close.go new file mode 100644 index 00000000..f05a8573 --- /dev/null +++ b/object/store/packed/close.go @@ -0,0 +1,38 @@ +package packed + +// Close releases mapped pack/index resources associated with the store. +// +// Store borrows its root, so Close does not close it. +// Close releases cached pack/index mappings retained by the store. +// +// Repeated calls to Close are undefined behavior. +func (store *Store) Close() error { + store.stateMu.Lock() + packs := store.packs + store.stateMu.Unlock() + store.idxMu.RLock() + indexes := store.idxByPack + store.idxMu.RUnlock() + + var closeErr error + + for _, pack := range packs { + err := pack.close() + if err != nil && closeErr == nil { + closeErr = err + } + } + + for _, index := range indexes { + err := index.close() + if err != nil && closeErr == nil { + closeErr = err + } + } + + store.cacheMu.Lock() + store.deltaCache.clear() + store.cacheMu.Unlock() + + return closeErr +} diff --git a/object/store/packed/delta_build_chain.go b/object/store/packed/delta_build_chain.go new file mode 100644 index 00000000..59f83542 --- /dev/null +++ b/object/store/packed/delta_build_chain.go @@ -0,0 +1,66 @@ +package packed + +import ( + "fmt" + + packfmt "codeberg.org/lindenii/furgit/format/packfile" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// deltaBuildChain walks one object's chain and builds a reconstruction chain. +func (store *Store) deltaBuildChain(start location) (deltaChain, error) { + visited := make(map[location]struct{}) + current := start + + var chain deltaChain + + for { + if _, ok := visited[current]; ok { + return deltaChain{}, fmt.Errorf("objectstore/packed: delta cycle while resolving object") + } + + visited[current] = struct{}{} + + _, meta, err := store.entryMetaAt(current) + if err != nil { + return deltaChain{}, err + } + + if packfmt.IsBaseObjectType(meta.ty) { + chain.baseLoc = current + chain.baseType = meta.ty + + return chain, nil + } + + switch meta.ty { + case objecttype.TypeRefDelta: + chain.deltas = append(chain.deltas, deltaNode{ + loc: current, + dataOffset: meta.dataOffset, + }) + + next, err := store.lookup(meta.baseRefID) + if err != nil { + return deltaChain{}, err + } + + current = next + case objecttype.TypeOfsDelta: + chain.deltas = append(chain.deltas, deltaNode{ + loc: current, + dataOffset: meta.dataOffset, + }) + current = location{ + packName: current.packName, + offset: meta.baseOfs, + } + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + return deltaChain{}, fmt.Errorf("objectstore/packed: internal invariant violation for base type %d", meta.ty) + case objecttype.TypeInvalid, objecttype.TypeFuture: + return deltaChain{}, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + default: + return deltaChain{}, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + } + } +} diff --git a/object/store/packed/delta_cache.go b/object/store/packed/delta_cache.go new file mode 100644 index 00000000..3bf3a035 --- /dev/null +++ b/object/store/packed/delta_cache.go @@ -0,0 +1,61 @@ +package packed + +import ( + "codeberg.org/lindenii/furgit/internal/lru" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +const defaultDeltaCacheMaxBytes = 32 << 20 + +// deltaBaseKey identifies one base object by pack location. +type deltaBaseKey struct { + packName string + offset uint64 +} + +// deltaBaseValue stores one cached base object body. +type deltaBaseValue struct { + ty objecttype.Type + content []byte +} + +// deltaCache wraps a weighted LRU for resolved delta bases. +type deltaCache struct { + lru *lru.Cache[deltaBaseKey, deltaBaseValue] +} + +// newDeltaCache creates a delta base cache with a byte budget. +func newDeltaCache(maxBytes int64) *deltaCache { + return &deltaCache{ + lru: lru.New( + maxBytes, + func(_ deltaBaseKey, value deltaBaseValue) int64 { + return int64(len(value.content)) + }, + nil, + ), + } +} + +// get returns a cloned cached base object value. +func (cache *deltaCache) get(key deltaBaseKey) (objecttype.Type, []byte, bool) { + value, ok := cache.lru.Get(key) + if !ok { + return objecttype.TypeInvalid, nil, false + } + + return value.ty, append([]byte(nil), value.content...), true +} + +// add stores a cloned base object value. +func (cache *deltaCache) add(key deltaBaseKey, ty objecttype.Type, content []byte) { + cache.lru.Add(key, deltaBaseValue{ + ty: ty, + content: append([]byte(nil), content...), + }) +} + +// clear removes all cached entries. +func (cache *deltaCache) clear() { + cache.lru.Clear() +} diff --git a/object/store/packed/delta_chain.go b/object/store/packed/delta_chain.go new file mode 100644 index 00000000..372e89cd --- /dev/null +++ b/object/store/packed/delta_chain.go @@ -0,0 +1,13 @@ +package packed + +import objecttype "codeberg.org/lindenii/furgit/object/type" + +// deltaChain describes how to reconstruct one requested object. +type deltaChain struct { + // baseLoc points to the innermost base object. + baseLoc location + // baseType is the canonical object type resolved from baseLoc. + baseType objecttype.Type + // deltas contains delta objects from target down toward base. + deltas []deltaNode +} diff --git a/object/store/packed/delta_node.go b/object/store/packed/delta_node.go new file mode 100644 index 00000000..24ede1e0 --- /dev/null +++ b/object/store/packed/delta_node.go @@ -0,0 +1,9 @@ +package packed + +// deltaNode describes one delta object in a reconstruction chain. +type deltaNode struct { + // loc identifies the delta object's pack location. + loc location + // dataOffset points to the start of the delta zlib payload in pack. + dataOffset int +} diff --git a/object/store/packed/delta_resolve_chain.go b/object/store/packed/delta_resolve_chain.go new file mode 100644 index 00000000..6347ee41 --- /dev/null +++ b/object/store/packed/delta_resolve_chain.go @@ -0,0 +1,61 @@ +package packed + +import ( + "fmt" + + deltaapply "codeberg.org/lindenii/furgit/format/packfile/delta/apply" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// deltaResolveChain resolves one object chain into content bytes. +func (store *Store) deltaResolveChain(chain deltaChain, declaredSize int64) (objecttype.Type, []byte, error) { + ty, out, nextDelta, err := store.deltaResolveChainStart(chain) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + for i := nextDelta; i >= 0; i-- { + node := chain.deltas[i] + + pack, err := store.openPack(node.loc.packName) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + delta, err := inflateAt(pack, node.dataOffset, -1) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + out, err = deltaapply.Apply(out, delta) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + store.cacheMu.Lock() + store.deltaCache.add( + deltaBaseKey{packName: node.loc.packName, offset: node.loc.offset}, + ty, + out, + ) + store.cacheMu.Unlock() + } + + if int64(len(out)) != declaredSize { + return objecttype.TypeInvalid, nil, fmt.Errorf( + "objectstore/packed: resolved content size mismatch: got %d want %d", + len(out), + declaredSize, + ) + } + + if ty != chain.baseType { + return objecttype.TypeInvalid, nil, fmt.Errorf( + "objectstore/packed: resolved content type mismatch: got %d want %d", + ty, + chain.baseType, + ) + } + + return ty, out, nil +} diff --git a/object/store/packed/delta_resolve_chain_start.go b/object/store/packed/delta_resolve_chain_start.go new file mode 100644 index 00000000..047073cb --- /dev/null +++ b/object/store/packed/delta_resolve_chain_start.go @@ -0,0 +1,59 @@ +package packed + +import ( + "fmt" + + packfmt "codeberg.org/lindenii/furgit/format/packfile" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// deltaResolveChainStart finds the nearest cached chain node or inflates the +// innermost base object. It returns the starting bytes and the next delta index +// to apply in reverse order. +func (store *Store) deltaResolveChainStart(chain deltaChain) (objecttype.Type, []byte, int, error) { + for i, node := range chain.deltas { + store.cacheMu.RLock() + ty, out, ok := store.deltaCache.get( + deltaBaseKey{packName: node.loc.packName, offset: node.loc.offset}, + ) + store.cacheMu.RUnlock() + + if ok { + return ty, out, i - 1, nil + } + } + + store.cacheMu.RLock() + ty, out, ok := store.deltaCache.get( + deltaBaseKey{packName: chain.baseLoc.packName, offset: chain.baseLoc.offset}, + ) + store.cacheMu.RUnlock() + + if ok { + return ty, out, len(chain.deltas) - 1, nil + } + + pack, meta, err := store.entryMetaAt(chain.baseLoc) + if err != nil { + return objecttype.TypeInvalid, nil, 0, err + } + + if !packfmt.IsBaseObjectType(meta.ty) { + return objecttype.TypeInvalid, nil, 0, fmt.Errorf("objectstore/packed: delta chain base is not a base object") + } + + base, err := inflateAt(pack, meta.dataOffset, meta.size) + if err != nil { + return objecttype.TypeInvalid, nil, 0, err + } + + store.cacheMu.Lock() + store.deltaCache.add( + deltaBaseKey{packName: chain.baseLoc.packName, offset: chain.baseLoc.offset}, + meta.ty, + base, + ) + store.cacheMu.Unlock() + + return meta.ty, base, len(chain.deltas) - 1, nil +} diff --git a/object/store/packed/delta_resolve_content.go b/object/store/packed/delta_resolve_content.go new file mode 100644 index 00000000..9a24873b --- /dev/null +++ b/object/store/packed/delta_resolve_content.go @@ -0,0 +1,29 @@ +package packed + +import ( + packfmt "codeberg.org/lindenii/furgit/format/packfile" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// deltaResolveContent resolves one object's content bytes from its pack location. +func (store *Store) deltaResolveContent(start location) (objecttype.Type, []byte, error) { + chain, err := store.deltaBuildChain(start) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + pack, meta, err := store.entryMetaAt(start) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + declaredSize := meta.size + if !packfmt.IsBaseObjectType(meta.ty) { + declaredSize, err = deltaDeclaredSizeAt(pack, meta.dataOffset) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + } + + return store.deltaResolveChain(chain, declaredSize) +} diff --git a/object/store/packed/delta_size.go b/object/store/packed/delta_size.go new file mode 100644 index 00000000..e5ba3bb7 --- /dev/null +++ b/object/store/packed/delta_size.go @@ -0,0 +1,27 @@ +package packed + +import ( + "bufio" + + deltaapply "codeberg.org/lindenii/furgit/format/packfile/delta/apply" +) + +// deltaDeclaredSizeAt returns the resolved object size declared by one delta +// stream header at dataOffset. +func deltaDeclaredSizeAt(pack *packFile, dataOffset int) (int64, error) { + reader, err := zlibReaderAt(pack, dataOffset) + if err != nil { + return 0, err + } + + defer func() { _ = reader.Close() }() + + br := bufio.NewReaderSize(reader, 32) + + _, size, err := deltaapply.ReadHeaderSizes(br) + if err != nil { + return 0, err + } + + return int64(size), nil +} diff --git a/object/store/packed/entry_inflate.go b/object/store/packed/entry_inflate.go new file mode 100644 index 00000000..1c3943e9 --- /dev/null +++ b/object/store/packed/entry_inflate.go @@ -0,0 +1,55 @@ +package packed + +import ( + "bytes" + "fmt" + "io" + "math" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" +) + +// zlibReaderAt opens a zlib reader starting at data offset within pack. +func zlibReaderAt(pack *packFile, offset int) (io.ReadCloser, error) { + if offset < 0 || offset > len(pack.data) { + return nil, fmt.Errorf("objectstore/packed: pack %q zlib offset out of bounds", pack.name) + } + + return zlib.NewReader(bytes.NewReader(pack.data[offset:])) +} + +// inflateAt inflates one entry payload from data offset. +func inflateAt(pack *packFile, offset int, expectedSize int64) ([]byte, error) { + reader, err := zlibReaderAt(pack, offset) + if err != nil { + return nil, err + } + + defer func() { _ = reader.Close() }() + + if expectedSize >= 0 { + if expectedSize > int64(math.MaxInt) { + return nil, fmt.Errorf( + "objectstore/packed: pack %q expected inflated size overflows int: %d", + pack.name, + expectedSize, + ) + } + + body := make([]byte, int(expectedSize)) + + _, err := io.ReadFull(reader, body) + if err != nil { + return nil, err + } + + return body, nil + } + + body, err := io.ReadAll(reader) + if err != nil { + return nil, err + } + + return body, nil +} diff --git a/object/store/packed/entry_meta.go b/object/store/packed/entry_meta.go new file mode 100644 index 00000000..0bbe8bef --- /dev/null +++ b/object/store/packed/entry_meta.go @@ -0,0 +1,16 @@ +package packed + +// entryMetaAt parses one pack entry header at location. +func (store *Store) entryMetaAt(loc location) (*packFile, entryMeta, error) { + pack, err := store.openPack(loc.packName) + if err != nil { + return nil, entryMeta{}, err + } + + meta, err := parseEntryMeta(pack, store.algo, loc.offset) + if err != nil { + return nil, entryMeta{}, err + } + + return pack, meta, nil +} diff --git a/object/store/packed/entry_parse.go b/object/store/packed/entry_parse.go new file mode 100644 index 00000000..962e39df --- /dev/null +++ b/object/store/packed/entry_parse.go @@ -0,0 +1,71 @@ +package packed + +import ( + "fmt" + + packfmt "codeberg.org/lindenii/furgit/format/packfile" + "codeberg.org/lindenii/furgit/internal/intconv" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// entryMeta describes one parsed pack entry header. +type entryMeta struct { + // ty is the pack entry type tag. + ty objecttype.Type + // size is the declared resulting content size. + size int64 + // dataOffset points to the zlib payload start. + dataOffset int + // baseRefID is set for ref-delta entries. + baseRefID objectid.ObjectID + // baseOfs is set for ofs-delta entries. + baseOfs uint64 +} + +// parseEntryMeta parses one pack entry header at offset. +func parseEntryMeta(pack *packFile, algo objectid.Algorithm, offset uint64) (entryMeta, error) { + var zero entryMeta + if offset >= uint64(len(pack.data)) { + return zero, fmt.Errorf("objectstore/packed: pack %q offset %d out of bounds", pack.name, offset) + } + + pos, err := intconv.Uint64ToInt(offset) + if err != nil { + return zero, fmt.Errorf("objectstore/packed: pack %q offset conversion: %w", pack.name, err) + } + + entry, err := packfmt.ParseEntry(pack.data[pos:], algo.Size()) + if err != nil { + return zero, fmt.Errorf("objectstore/packed: pack %q: %w", pack.name, err) + } + + meta := entryMeta{ + ty: entry.Type, + size: entry.Size, + dataOffset: pos + entry.DataOffset, + } + switch meta.ty { + case objecttype.TypeRefDelta: + baseID, err := objectid.FromBytes(algo, entry.RefBaseID) + if err != nil { + return zero, fmt.Errorf("objectstore/packed: pack %q invalid ref-delta base id: %w", pack.name, err) + } + + meta.baseRefID = baseID + case objecttype.TypeOfsDelta: + if offset <= entry.OfsBaseDistance { + return zero, fmt.Errorf("objectstore/packed: pack %q has invalid ofs-delta base", pack.name) + } + + meta.baseOfs = offset - entry.OfsBaseDistance + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + // Base object types do not have delta base metadata. + case objecttype.TypeInvalid, objecttype.TypeFuture: + return zero, fmt.Errorf("objectstore/packed: pack %q has unsupported entry type %d", pack.name, meta.ty) + default: + return zero, fmt.Errorf("objectstore/packed: pack %q has unsupported entry type %d", pack.name, meta.ty) + } + + return meta, nil +} diff --git a/object/store/packed/helpers_test.go b/object/store/packed/helpers_test.go new file mode 100644 index 00000000..2125185d --- /dev/null +++ b/object/store/packed/helpers_test.go @@ -0,0 +1,102 @@ +package packed_test + +import ( + "fmt" + "io" + "strconv" + "strings" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + "codeberg.org/lindenii/furgit/object/store/packed" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +func openPackedStore(t *testing.T, testRepo *testgit.TestRepo, algo objectid.Algorithm) *packed.Store { + t.Helper() + + root := testRepo.OpenPackRoot(t) + + store, err := packed.New(root, algo, packed.Options{}) + if err != nil { + t.Fatalf("packed.New: %v", err) + } + + return store +} + +func mustReadAllAndClose(t *testing.T, reader io.ReadCloser) []byte { + t.Helper() + + data, err := io.ReadAll(reader) + if err != nil { + _ = reader.Close() + + t.Fatalf("ReadAll: %v", err) + } + + err = reader.Close() + if err != nil { + t.Fatalf("Close: %v", err) + } + + return data +} + +func expectedRawObject(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) (objecttype.Type, []byte, []byte) { + t.Helper() + + typeName := testRepo.Run(t, "cat-file", "-t", id.String()) + + ty, ok := objecttype.ParseName(typeName) + if !ok { + t.Fatalf("ParseName(%q) failed", typeName) + } + + body := testRepo.CatFile(t, typeName, id) + + header, ok := objectheader.Encode(ty, int64(len(body))) + if !ok { + t.Fatalf("objectheader.Encode failed") + } + + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + + return ty, body, raw +} + +func createPackedFixtureRepo(t *testing.T, algo objectid.Algorithm) (*testgit.TestRepo, []objectid.ObjectID) { + t.Helper() + + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + blobID, treeID, commitID := testRepo.MakeCommit(t, "packed store base commit") + testRepo.Run(t, "update-ref", "refs/heads/main", commitID.String()) + tagID := testRepo.TagAnnotated(t, "v1.0.0", commitID, "packed-store-tag") + + parent := commitID + + for i := range 24 { + content := "common-prefix\n" + strings.Repeat("line-"+strconv.Itoa(i%3)+"\n", 256) + fmt.Sprintf("tail-%d\n", i) + nextBlob, nextTree := testRepo.MakeSingleFileTree(t, fmt.Sprintf("file-%02d.txt", i), []byte(content)) + nextCommit := testRepo.CommitTree(t, nextTree, fmt.Sprintf("commit-%02d", i), parent) + testRepo.Run(t, "update-ref", "refs/heads/main", nextCommit.String()) + parent = nextCommit + + _ = nextBlob + _ = nextTree + } + + testRepo.Repack(t, "-a", "-d", "-f", "--window=64", "--depth=64") + + return testRepo, []objectid.ObjectID{ + blobID, + treeID, + commitID, + tagID, + parent, + } +} diff --git a/object/store/packed/idx.go b/object/store/packed/idx.go new file mode 100644 index 00000000..5024f2f3 --- /dev/null +++ b/object/store/packed/idx.go @@ -0,0 +1,36 @@ +package packed + +import ( + "os" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// idxFile stores one mapped and validated idx v2 file. +type idxFile struct { + // idxName is the basename of this .idx file. + idxName string + // packName is the matching .pack basename. + packName string + // algo is the hash algorithm encoded by the index. + algo objectid.Algorithm + + // file is the opened index file descriptor. + file *os.File + // data is the mapped index bytes. + data []byte + + // fanout stores fanout table values. + fanout [256]uint32 + // numObjects equals fanout[255]. + numObjects int + + // namesOffset starts the sorted object-id table. + namesOffset int + // offset32Offset starts the 32-bit offset table. + offset32Offset int + // offset64Offset starts the 64-bit offset table. + offset64Offset int + // offset64Count is the number of 64-bit offset entries. + offset64Count int +} diff --git a/object/store/packed/idx_candidates_mru.go b/object/store/packed/idx_candidates_mru.go new file mode 100644 index 00000000..d0cc7052 --- /dev/null +++ b/object/store/packed/idx_candidates_mru.go @@ -0,0 +1,136 @@ +package packed + +// packCandidateNode is one node in the candidate MRU order list. +type packCandidateNode struct { + packName string + prev *packCandidateNode + next *packCandidateNode +} + +func (store *Store) reconcileMRU(candidates []packCandidate) { + store.mruMu.Lock() + defer store.mruMu.Unlock() + + if store.mruNodeByPack == nil { + store.mruNodeByPack = make(map[string]*packCandidateNode, len(candidates)) + } + + present := make(map[string]struct{}, len(candidates)) + for _, candidate := range candidates { + present[candidate.packName] = struct{}{} + } + + ordered := make([]string, 0, len(candidates)) + + for node := store.mruHead; node != nil; node = node.next { + if _, ok := present[node.packName]; !ok { + continue + } + + ordered = append(ordered, node.packName) + delete(present, node.packName) + } + + for _, candidate := range candidates { + if _, ok := present[candidate.packName]; !ok { + continue + } + + ordered = append(ordered, candidate.packName) + delete(present, candidate.packName) + } + + store.mruHead = nil + store.mruTail = nil + store.mruNodeByPack = make(map[string]*packCandidateNode, len(ordered)) + + for _, packName := range ordered { + node := &packCandidateNode{ + packName: packName, + prev: store.mruTail, + } + if store.mruTail != nil { + store.mruTail.next = node + } + + if store.mruHead == nil { + store.mruHead = node + } + + store.mruTail = node + store.mruNodeByPack[packName] = node + } +} + +// touchCandidate moves one candidate to the front of the lookup order. +// This is done on a best-effort basis. +func (store *Store) touchCandidate(packName string) { + if !store.mruMu.TryLock() { + return + } + defer store.mruMu.Unlock() + + node := store.mruNodeByPack[packName] + if node == nil || node == store.mruHead { + return + } + + if node.prev != nil { + node.prev.next = node.next + } + + if node.next != nil { + node.next.prev = node.prev + } + + if store.mruTail == node { + store.mruTail = node.prev + } + + node.prev = nil + + node.next = store.mruHead + if store.mruHead != nil { + store.mruHead.prev = node + } + + store.mruHead = node + if store.mruTail == nil { + store.mruTail = node + } +} + +// firstCandidatePackName returns the current head pack name, or "" when none +// are available. +func (store *Store) firstCandidatePackName(snapshot *candidateSnapshot) string { + store.mruMu.RLock() + defer store.mruMu.RUnlock() + + for node := store.mruHead; node != nil; node = node.next { + if _, ok := snapshot.candidateByPack[node.packName]; ok { + return node.packName + } + } + + return "" +} + +// nextCandidatePackName returns the pack name after currentPack in current MRU +// order, or "" at end / when currentPack is not present. +func (store *Store) nextCandidatePackName(currentPack string, snapshot *candidateSnapshot) string { + store.mruMu.RLock() + defer store.mruMu.RUnlock() + + node := store.mruNodeByPack[currentPack] + if node == nil { + return "" + } + + for node = node.next; node != nil; node = node.next { + if _, ok := snapshot.candidateByPack[node.packName]; ok { + return node.packName + } + } + + return "" +} diff --git a/object/store/packed/idx_close.go b/object/store/packed/idx_close.go new file mode 100644 index 00000000..814ec987 --- /dev/null +++ b/object/store/packed/idx_close.go @@ -0,0 +1,28 @@ +package packed + +import "syscall" + +// close unmaps and closes one idx handle. +func (index *idxFile) close() error { + var closeErr error + + if index.data != nil { + err := syscall.Munmap(index.data) + if err != nil && closeErr == nil { + closeErr = err + } + + index.data = nil + } + + if index.file != nil { + err := index.file.Close() + if err != nil && closeErr == nil { + closeErr = err + } + + index.file = nil + } + + return closeErr +} diff --git a/object/store/packed/idx_lookup.go b/object/store/packed/idx_lookup.go new file mode 100644 index 00000000..0bd11d1b --- /dev/null +++ b/object/store/packed/idx_lookup.go @@ -0,0 +1,91 @@ +package packed + +import ( + "bytes" + "encoding/binary" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// lookup resolves one object ID to its pack offset within this index. +func (index *idxFile) lookup(id objectid.ObjectID) (uint64, bool, error) { + if id.Algorithm() != index.algo { + return 0, false, fmt.Errorf("objectstore/packed: object id algorithm mismatch") + } + + idBytes := (&id).RawBytes() + + hashSize := len(idBytes) + if hashSize != index.algo.Size() { + return 0, false, fmt.Errorf("objectstore/packed: unexpected object id length") + } + + first := int(idBytes[0]) + + lo := 0 + if first > 0 { + lo = int(index.fanout[first-1]) + } + + hi := int(index.fanout[first]) + if lo < 0 || hi < 0 || lo > hi || hi > index.numObjects { + return 0, false, fmt.Errorf("objectstore/packed: idx %q has invalid fanout bounds", index.idxName) + } + + for lo < hi { + mid := lo + (hi-lo)/2 + + nameOffset := index.namesOffset + mid*hashSize + if nameOffset < 0 || nameOffset+hashSize > len(index.data) { + return 0, false, fmt.Errorf("objectstore/packed: idx %q truncated name table", index.idxName) + } + + cmp := bytes.Compare(index.data[nameOffset:nameOffset+hashSize], idBytes) + if cmp == 0 { + offset, err := index.offsetAt(mid) + if err != nil { + return 0, false, err + } + + return offset, true, nil + } + + if cmp < 0 { + lo = mid + 1 + } else { + hi = mid + } + } + + return 0, false, nil +} + +// offsetAt resolves the pack offset for one object index entry. +func (index *idxFile) offsetAt(objectIndex int) (uint64, error) { + if objectIndex < 0 || objectIndex >= index.numObjects { + return 0, fmt.Errorf("objectstore/packed: idx %q offset index out of bounds", index.idxName) + } + + wordOffset := index.offset32Offset + objectIndex*4 + if wordOffset < 0 || wordOffset+4 > len(index.data) { + return 0, fmt.Errorf("objectstore/packed: idx %q truncated 32-bit offset table", index.idxName) + } + + word := binary.BigEndian.Uint32(index.data[wordOffset : wordOffset+4]) + if word&0x80000000 == 0 { + return uint64(word), nil + } + + pos := int(word & 0x7fffffff) + if pos < 0 || pos >= index.offset64Count { + return 0, fmt.Errorf("objectstore/packed: idx %q invalid 64-bit offset position", index.idxName) + } + + offOffset := index.offset64Offset + pos*8 + if offOffset < 0 || offOffset+8 > len(index.data)-2*index.algo.Size() { + return 0, fmt.Errorf("objectstore/packed: idx %q truncated 64-bit offset table", index.idxName) + } + + return binary.BigEndian.Uint64(index.data[offOffset : offOffset+8]), nil +} diff --git a/object/store/packed/idx_lookup_candidates.go b/object/store/packed/idx_lookup_candidates.go new file mode 100644 index 00000000..a2de262a --- /dev/null +++ b/object/store/packed/idx_lookup_candidates.go @@ -0,0 +1,126 @@ +package packed + +import ( + "fmt" + "os" + "slices" + "strings" +) + +// packCandidate describes one discovered pack/index pair. +type packCandidate struct { + // packName is the .pack basename. + packName string + // idxName is the .idx basename. + idxName string + // mtime is the pack file modification time for initial ordering. + mtime int64 +} + +type candidateSnapshot struct { + candidates []packCandidate + candidateByPack map[string]packCandidate +} + +// Refresh rescans objects/pack and atomically installs a fresh candidate list +// for future lookups. +// +// Refresh does not invalidate existing readers. Cached pack/index mappings, +// including ones for previously visible candidates, may be retained until +// Close. +func (store *Store) Refresh() error { + store.refreshMu.Lock() + defer store.refreshMu.Unlock() + + candidates, err := store.discoverCandidates() + if err != nil { + return err + } + + candidateByPack := make(map[string]packCandidate, len(candidates)) + for _, candidate := range candidates { + candidateByPack[candidate.packName] = candidate + } + + store.reconcileMRU(candidates) + + store.candidates.Store(&candidateSnapshot{ + candidates: candidates, + candidateByPack: candidateByPack, + }) + + return nil +} + +func (store *Store) ensureCandidates() (*candidateSnapshot, error) { + snapshot := store.candidates.Load() + if snapshot != nil { + return snapshot, nil + } + + err := store.Refresh() + if err != nil { + return nil, err + } + + return store.candidates.Load(), nil +} + +// discoverCandidates scans the objects/pack root and returns sorted pack/index +// pairs. +func (store *Store) discoverCandidates() ([]packCandidate, error) { + dir, err := store.root.Open(".") + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + + return nil, err + } + + defer func() { _ = dir.Close() }() + + entries, err := dir.ReadDir(-1) + if err != nil { + return nil, err + } + + candidates := make([]packCandidate, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { + continue + } + + idxName := entry.Name() + packName := strings.TrimSuffix(idxName, ".idx") + ".pack" + + packInfo, err := store.root.Stat(packName) + if err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("objectstore/packed: missing pack file for index %q", idxName) + } + + return nil, err + } + + candidates = append(candidates, packCandidate{ + packName: packName, + idxName: idxName, + mtime: packInfo.ModTime().UnixNano(), + }) + } + + slices.SortFunc(candidates, func(a, b packCandidate) int { + if a.mtime != b.mtime { + if a.mtime > b.mtime { + return -1 + } + + return 1 + } + + return strings.Compare(a.packName, b.packName) + }) + + return candidates, nil +} diff --git a/object/store/packed/idx_open.go b/object/store/packed/idx_open.go new file mode 100644 index 00000000..fabd0c00 --- /dev/null +++ b/object/store/packed/idx_open.go @@ -0,0 +1,98 @@ +package packed + +import ( + "fmt" + "os" + "syscall" + + "codeberg.org/lindenii/furgit/internal/intconv" + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// openIndex returns one opened and parsed index, caching it by pack basename. +func (store *Store) openIndex(candidate packCandidate) (*idxFile, error) { + store.idxMu.RLock() + + index, ok := store.idxByPack[candidate.packName] + if ok { + store.idxMu.RUnlock() + + return index, nil + } + + store.idxMu.RUnlock() + + index, err := openIdxFile(store.root, candidate.idxName, candidate.packName, store.algo) + if err != nil { + return nil, err + } + + store.idxMu.Lock() + + existing, ok := store.idxByPack[candidate.packName] + if ok { + store.idxMu.Unlock() + + _ = index.close() + + return existing, nil + } + + store.idxByPack[candidate.packName] = index + store.idxMu.Unlock() + + return index, nil +} + +// openIdxFile maps and validates one idx v2 file. +func openIdxFile(root *os.Root, idxName, packName string, algo objectid.Algorithm) (*idxFile, error) { + file, err := root.Open(idxName) + if err != nil { + return nil, err + } + + info, err := file.Stat() + if err != nil { + _ = file.Close() + + return nil, err + } + + size := info.Size() + if size < 0 || size > int64(int(^uint(0)>>1)) { + _ = file.Close() + + return nil, fmt.Errorf("objectstore/packed: idx %q has unsupported size", idxName) + } + + fd, err := intconv.UintptrToInt(file.Fd()) + if err != nil { + _ = file.Close() + + return nil, err + } + + data, err := syscall.Mmap(fd, 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE) + if err != nil { + _ = file.Close() + + return nil, err + } + + index := &idxFile{ + idxName: idxName, + packName: packName, + algo: algo, + file: file, + data: data, + } + + err = index.parse() + if err != nil { + _ = index.close() + + return nil, err + } + + return index, nil +} diff --git a/object/store/packed/idx_parse.go b/object/store/packed/idx_parse.go new file mode 100644 index 00000000..4da3bf42 --- /dev/null +++ b/object/store/packed/idx_parse.go @@ -0,0 +1,78 @@ +package packed + +import ( + "encoding/binary" + "fmt" +) + +const ( + idxMagicV2 = 0xff744f63 + idxVersionV2 = 2 +) + +// parse validates mapped idx v2 structure and stores table boundaries. +func (index *idxFile) parse() error { + hashSize := index.algo.Size() + if hashSize <= 0 { + return fmt.Errorf("objectstore/packed: idx %q has invalid hash algorithm", index.idxName) + } + + minLen := 8 + 256*4 + 2*hashSize + if len(index.data) < minLen { + return fmt.Errorf("objectstore/packed: idx %q too short", index.idxName) + } + + if binary.BigEndian.Uint32(index.data[:4]) != idxMagicV2 { + return fmt.Errorf("objectstore/packed: idx %q invalid magic", index.idxName) + } + + if binary.BigEndian.Uint32(index.data[4:8]) != idxVersionV2 { + return fmt.Errorf("objectstore/packed: idx %q unsupported version", index.idxName) + } + + prev := uint32(0) + + for i := range 256 { + base := 8 + i*4 + + cur := binary.BigEndian.Uint32(index.data[base : base+4]) + if cur < prev { + return fmt.Errorf("objectstore/packed: idx %q has non-monotonic fanout table", index.idxName) + } + + index.fanout[i] = cur + prev = cur + } + + index.numObjects = int(index.fanout[255]) + if index.numObjects < 0 { + return fmt.Errorf("objectstore/packed: idx %q has invalid object count", index.idxName) + } + + namesBytes := index.numObjects * hashSize + crcBytes := index.numObjects * 4 + offset32Bytes := index.numObjects * 4 + + minSize := 8 + 256*4 + namesBytes + crcBytes + offset32Bytes + 2*hashSize + if minSize < 0 || len(index.data) < minSize { + return fmt.Errorf("objectstore/packed: idx %q has truncated tables", index.idxName) + } + + index.namesOffset = 8 + 256*4 + index.offset32Offset = index.namesOffset + namesBytes + crcBytes + index.offset64Offset = index.offset32Offset + offset32Bytes + + offset64Bytes := len(index.data) - index.offset64Offset - 2*hashSize + if offset64Bytes < 0 || offset64Bytes%8 != 0 { + return fmt.Errorf("objectstore/packed: idx %q has malformed 64-bit offset table", index.idxName) + } + + index.offset64Count = offset64Bytes / 8 + + maxOffset64Count := max(index.numObjects-1, 0) + if index.offset64Count > maxOffset64Count { + return fmt.Errorf("objectstore/packed: idx %q has oversized 64-bit offset table", index.idxName) + } + + return nil +} diff --git a/object/store/packed/location.go b/object/store/packed/location.go new file mode 100644 index 00000000..82d17c17 --- /dev/null +++ b/object/store/packed/location.go @@ -0,0 +1,7 @@ +package packed + +// location identifies one object entry in a specific pack file. +type location struct { + packName string + offset uint64 +} diff --git a/object/store/packed/new.go b/object/store/packed/new.go new file mode 100644 index 00000000..a4d1752d --- /dev/null +++ b/object/store/packed/new.go @@ -0,0 +1,31 @@ +package packed + +import ( + "fmt" + "os" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// New creates a packed-object store rooted at an objects/pack directory. +func New(root *os.Root, algo objectid.Algorithm, opts Options) (*Store, error) { + if algo.Size() == 0 { + return nil, objectid.ErrInvalidAlgorithm + } + + switch opts.RefreshPolicy { + case RefreshPolicyOnMissing, RefreshPolicyNever: + default: + return nil, fmt.Errorf("objectstore/packed: invalid refresh policy %d", opts.RefreshPolicy) + } + + return &Store{ + root: root, + algo: algo, + refreshPolicy: opts.RefreshPolicy, + mruNodeByPack: make(map[string]*packCandidateNode), + idxByPack: make(map[string]*idxFile), + packs: make(map[string]*packFile), + deltaCache: newDeltaCache(defaultDeltaCacheMaxBytes), + }, nil +} diff --git a/object/store/packed/options.go b/object/store/packed/options.go new file mode 100644 index 00000000..05cbee30 --- /dev/null +++ b/object/store/packed/options.go @@ -0,0 +1,16 @@ +package packed + +// RefreshPolicy configures when candidate pack/index discovery refreshes. +type RefreshPolicy uint8 + +const ( + // RefreshPolicyOnMissing refreshes candidates once after a lookup miss. + RefreshPolicyOnMissing RefreshPolicy = iota + // RefreshPolicyNever disables automatic refresh after lookup misses. + RefreshPolicyNever +) + +// Options configures a packed object store. +type Options struct { + RefreshPolicy RefreshPolicy +} diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go new file mode 100644 index 00000000..5abc6c15 --- /dev/null +++ b/object/store/packed/pack.go @@ -0,0 +1,82 @@ +package packed + +import ( + "encoding/binary" + "fmt" + "os" + "syscall" + + packfmt "codeberg.org/lindenii/furgit/format/packfile" + "codeberg.org/lindenii/furgit/internal/intconv" +) + +// packFile stores one mapped and validated .pack file. +type packFile struct { + // name is the .pack basename. + name string + // file is the opened pack file descriptor. + file *os.File + // data is the mapped pack bytes. + data []byte +} + +// openPackFile maps and validates one pack file. +func openPackFile(name string, file *os.File, size int64) (*packFile, error) { + if size < 12 { + return nil, fmt.Errorf("objectstore/packed: pack %q too short", name) + } + + if size > int64(int(^uint(0)>>1)) { + return nil, fmt.Errorf("objectstore/packed: pack %q has unsupported size", name) + } + + fd, err := intconv.UintptrToInt(file.Fd()) + if err != nil { + return nil, err + } + + data, err := syscall.Mmap(fd, 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE) + if err != nil { + return nil, err + } + + if binary.BigEndian.Uint32(data[:4]) != packfmt.Signature { + _ = syscall.Munmap(data) + + return nil, fmt.Errorf("objectstore/packed: pack %q invalid signature", name) + } + + version := binary.BigEndian.Uint32(data[4:8]) + if !packfmt.VersionSupported(version) { + _ = syscall.Munmap(data) + + return nil, fmt.Errorf("objectstore/packed: pack %q unsupported version %d", name, version) + } + + return &packFile{name: name, file: file, data: data}, nil +} + +// close unmaps and closes one pack handle. +func (pack *packFile) close() error { + var closeErr error + + if pack.data != nil { + err := syscall.Munmap(pack.data) + if err != nil && closeErr == nil { + closeErr = err + } + + pack.data = nil + } + + if pack.file != nil { + err := pack.file.Close() + if err != nil && closeErr == nil { + closeErr = err + } + + pack.file = nil + } + + return closeErr +} diff --git a/object/store/packed/pack_idx_checksum.go b/object/store/packed/pack_idx_checksum.go new file mode 100644 index 00000000..28d4c3db --- /dev/null +++ b/object/store/packed/pack_idx_checksum.go @@ -0,0 +1,34 @@ +package packed + +import ( + "bytes" + "fmt" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// verifyMappedPackMatchesMappedIdx compares one mapped pack trailer hash with +// the pack hash recorded in one mapped idx trailer. +func verifyMappedPackMatchesMappedIdx(packData, idxData []byte, algo objectid.Algorithm) error { + hashSize := algo.Size() + if hashSize <= 0 { + return objectid.ErrInvalidAlgorithm + } + + if len(packData) < hashSize { + return fmt.Errorf("objectstore/packed: pack too short for trailer hash") + } + + if len(idxData) < hashSize*2 { + return fmt.Errorf("objectstore/packed: idx too short for trailer hashes") + } + + packTrailerHash := packData[len(packData)-hashSize:] + + idxPackHash := idxData[len(idxData)-hashSize*2 : len(idxData)-hashSize] + if !bytes.Equal(packTrailerHash, idxPackHash) { + return fmt.Errorf("objectstore/packed: pack hash does not match idx") + } + + return nil +} diff --git a/object/store/packed/read_bytes.go b/object/store/packed/read_bytes.go new file mode 100644 index 00000000..333cfaae --- /dev/null +++ b/object/store/packed/read_bytes.go @@ -0,0 +1,38 @@ +package packed + +import ( + "fmt" + + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadBytesContent reads an object's type and content bytes. +func (store *Store) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { + loc, err := store.lookup(id) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + return store.deltaResolveContent(loc) +} + +// ReadBytesFull reads a full serialized object as "type size\0content". +func (store *Store) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { + ty, content, err := store.ReadBytesContent(id) + if err != nil { + return nil, err + } + + header, ok := objectheader.Encode(ty, int64(len(content))) + if !ok { + return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", ty) + } + + out := make([]byte, len(header)+len(content)) + copy(out, header) + copy(out[len(header):], content) + + return out, nil +} diff --git a/object/store/packed/read_closer.go b/object/store/packed/read_closer.go new file mode 100644 index 00000000..c317d002 --- /dev/null +++ b/object/store/packed/read_closer.go @@ -0,0 +1,19 @@ +package packed + +import "io" + +// readCloser proxies reads and closes one underlying closer. +type readCloser struct { + reader io.Reader + closer io.Closer +} + +// Read proxies reads to the underlying reader. +func (reader *readCloser) Read(dst []byte) (int, error) { + return reader.reader.Read(dst) +} + +// Close closes the underlying closer. +func (reader *readCloser) Close() error { + return reader.closer.Close() +} diff --git a/object/store/packed/read_header.go b/object/store/packed/read_header.go new file mode 100644 index 00000000..d774de7c --- /dev/null +++ b/object/store/packed/read_header.go @@ -0,0 +1,20 @@ +package packed + +import ( + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadHeader reads an object's type and declared content size. +// +// It resolves header metadata only. It does not verify that the full pack entry +// payload is readable and does not verify any zlib Adler-32 trailer for +// compressed entry data. +func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { + loc, err := store.lookup(id) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + return store.resolveHeaderAt(loc) +} diff --git a/object/store/packed/read_header_resolve.go b/object/store/packed/read_header_resolve.go new file mode 100644 index 00000000..f61cd104 --- /dev/null +++ b/object/store/packed/read_header_resolve.go @@ -0,0 +1,66 @@ +package packed + +import ( + "fmt" + + packfmt "codeberg.org/lindenii/furgit/format/packfile" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// resolveHeaderAt resolves one object's canonical type and declared content size. +func (store *Store) resolveHeaderAt(start location) (objecttype.Type, int64, error) { + visited := make(map[location]struct{}) + current := start + declaredSize := int64(-1) + + for { + if _, ok := visited[current]; ok { + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: delta cycle while resolving object header") + } + + visited[current] = struct{}{} + + pack, meta, err := store.entryMetaAt(current) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + if declaredSize < 0 { + if packfmt.IsBaseObjectType(meta.ty) { + declaredSize = meta.size + } else { + size, err := deltaDeclaredSizeAt(pack, meta.dataOffset) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + declaredSize = size + } + } + + if packfmt.IsBaseObjectType(meta.ty) { + return meta.ty, declaredSize, nil + } + + switch meta.ty { + case objecttype.TypeRefDelta: + next, err := store.lookup(meta.baseRefID) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + current = next + case objecttype.TypeOfsDelta: + current = location{ + packName: current.packName, + offset: meta.baseOfs, + } + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: internal invariant violation for base type %d", meta.ty) + case objecttype.TypeInvalid, objecttype.TypeFuture: + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + default: + return objecttype.TypeInvalid, 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + } + } +} diff --git a/object/store/packed/read_reader.go b/object/store/packed/read_reader.go new file mode 100644 index 00000000..0608e390 --- /dev/null +++ b/object/store/packed/read_reader.go @@ -0,0 +1,103 @@ +package packed + +import ( + "bytes" + "fmt" + "io" + + packfmt "codeberg.org/lindenii/furgit/format/packfile" + "codeberg.org/lindenii/furgit/internal/iolimit" + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadReaderContent reads an object's type, declared content size, and content +// stream. +// +// The caller must close the returned reader. +// +// For base pack entries, the returned reader borrows store-owned mapped pack +// data and is only valid until the store is closed. +// +// Close releases reader-local resources only. It does not drain unread data for +// additional validation. In particular, malformed trailing compressed data, +// trailing bytes past the declared object size, and the zlib Adler-32 trailer +// may go unverified unless the caller reads to io.EOF. +func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { + loc, err := store.lookup(id) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + pack, meta, err := store.entryMetaAt(loc) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + if packfmt.IsBaseObjectType(meta.ty) { + zr, err := zlibReaderAt(pack, meta.dataOffset) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + return meta.ty, meta.size, &readCloser{ + reader: iolimit.ExpectLengthReader(zr, meta.size), + closer: zr, + }, nil + } + + ty, content, err := store.deltaResolveContent(loc) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + return ty, int64(len(content)), io.NopCloser(bytes.NewReader(content)), nil +} + +// ReadReaderFull reads a full serialized object stream as "type size\0content". +// +// The caller must close the returned reader. +// +// For base pack entries, the returned reader borrows store-owned mapped pack +// data and is only valid until the store is closed. +// +// Close releases reader-local resources only. It does not drain unread data for +// additional validation. In particular, malformed trailing compressed data, +// trailing bytes past the declared object size, and the zlib Adler-32 trailer +// may go unverified unless the caller reads to io.EOF. +func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { + loc, err := store.lookup(id) + if err != nil { + return nil, err + } + + pack, meta, err := store.entryMetaAt(loc) + if err != nil { + return nil, err + } + + if packfmt.IsBaseObjectType(meta.ty) { + header, ok := objectheader.Encode(meta.ty, meta.size) + if !ok { + return nil, fmt.Errorf("objectstore/packed: failed to encode object header for type %d", meta.ty) + } + + zr, err := zlibReaderAt(pack, meta.dataOffset) + if err != nil { + return nil, err + } + + return &readCloser{ + reader: io.MultiReader(bytes.NewReader(header), iolimit.ExpectLengthReader(zr, meta.size)), + closer: zr, + }, nil + } + + raw, err := store.ReadBytesFull(id) + if err != nil { + return nil, err + } + + return io.NopCloser(bytes.NewReader(raw)), nil +} diff --git a/object/store/packed/read_size.go b/object/store/packed/read_size.go new file mode 100644 index 00000000..5e744a06 --- /dev/null +++ b/object/store/packed/read_size.go @@ -0,0 +1,46 @@ +package packed + +import ( + "fmt" + + packfmt "codeberg.org/lindenii/furgit/format/packfile" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadSize reads an object's declared content size. +// +// Like ReadHeader, it resolves header metadata only. It does not verify that +// the full pack entry payload is readable and does not verify any zlib +// Adler-32 trailer for compressed entry data. +func (store *Store) ReadSize(id objectid.ObjectID) (int64, error) { + loc, err := store.lookup(id) + if err != nil { + return 0, err + } + + return store.resolveSizeAt(loc) +} + +// resolveSizeAt resolves one object's declared content size from location. +func (store *Store) resolveSizeAt(start location) (int64, error) { + pack, meta, err := store.entryMetaAt(start) + if err != nil { + return 0, err + } + + if packfmt.IsBaseObjectType(meta.ty) { + return meta.size, nil + } + + switch meta.ty { + case objecttype.TypeRefDelta, objecttype.TypeOfsDelta: + return deltaDeclaredSizeAt(pack, meta.dataOffset) + case objecttype.TypeInvalid, objecttype.TypeFuture: + return 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag: + return 0, fmt.Errorf("objectstore/packed: internal invariant violation for base type %d", meta.ty) + default: + return 0, fmt.Errorf("objectstore/packed: unsupported pack type %d", meta.ty) + } +} diff --git a/object/store/packed/read_test.go b/object/store/packed/read_test.go new file mode 100644 index 00000000..45ee8b01 --- /dev/null +++ b/object/store/packed/read_test.go @@ -0,0 +1,301 @@ +package packed_test + +import ( + "bytes" + "errors" + "fmt" + "io/fs" + "strconv" + "strings" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" + "codeberg.org/lindenii/furgit/object/store/packed" +) + +func TestPackedStoreReadAgainstGit(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo, ids := createPackedFixtureRepo(t, algo) + store := openPackedStore(t, testRepo, algo) + + for _, id := range ids { + t.Run(id.String(), func(t *testing.T) { + wantType, wantBody, wantRaw := expectedRawObject(t, testRepo, id) + + gotHeaderType, gotHeaderSize, err := store.ReadHeader(id) + if err != nil { + t.Fatalf("ReadHeader: %v", err) + } + + if gotHeaderType != wantType { + t.Fatalf("ReadHeader type = %v, want %v", gotHeaderType, wantType) + } + + if gotHeaderSize != int64(len(wantBody)) { + t.Fatalf("ReadHeader size = %d, want %d", gotHeaderSize, len(wantBody)) + } + + gotSize, err := store.ReadSize(id) + if err != nil { + t.Fatalf("ReadSize: %v", err) + } + + if gotSize != int64(len(wantBody)) { + t.Fatalf("ReadSize = %d, want %d", gotSize, len(wantBody)) + } + + gotRaw, err := store.ReadBytesFull(id) + if err != nil { + t.Fatalf("ReadBytesFull: %v", err) + } + + if !bytes.Equal(gotRaw, wantRaw) { + t.Fatalf("ReadBytesFull mismatch") + } + + gotType, gotBody, err := store.ReadBytesContent(id) + if err != nil { + t.Fatalf("ReadBytesContent: %v", err) + } + + if gotType != wantType { + t.Fatalf("ReadBytesContent type = %v, want %v", gotType, wantType) + } + + if !bytes.Equal(gotBody, wantBody) { + t.Fatalf("ReadBytesContent mismatch") + } + + fullReader, err := store.ReadReaderFull(id) + if err != nil { + t.Fatalf("ReadReaderFull: %v", err) + } + + got := mustReadAllAndClose(t, fullReader) + if !bytes.Equal(got, wantRaw) { + t.Fatalf("ReadReaderFull mismatch") + } + + contentType, contentSize, contentReader, err := store.ReadReaderContent(id) + if err != nil { + t.Fatalf("ReadReaderContent: %v", err) + } + + if contentType != wantType { + t.Fatalf("ReadReaderContent type = %v, want %v", contentType, wantType) + } + + if contentSize != int64(len(wantBody)) { + t.Fatalf("ReadReaderContent size = %d, want %d", contentSize, len(wantBody)) + } + + got = mustReadAllAndClose(t, contentReader) + if !bytes.Equal(got, wantBody) { + t.Fatalf("ReadReaderContent mismatch") + } + }) + } + }) +} + +func TestPackedStoreErrors(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo, _ := createPackedFixtureRepo(t, algo) + store := openPackedStore(t, testRepo, algo) + + notFoundID, err := objectid.ParseHex(algo, strings.Repeat("0", algo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(notFound): %v", err) + } + + _, err = store.ReadBytesFull(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesFull not-found error = %v", err) + } + + _, _, err = store.ReadBytesContent(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesContent not-found error = %v", err) + } + + _, err = store.ReadReaderFull(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderFull not-found error = %v", err) + } + + _, _, _, err = store.ReadReaderContent(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderContent not-found error = %v", err) + } + + _, _, err = store.ReadHeader(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadHeader not-found error = %v", err) + } + + _, err = store.ReadSize(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadSize not-found error = %v", err) + } + + var otherAlgo objectid.Algorithm + + for _, candidate := range objectid.SupportedAlgorithms() { + if candidate != algo { + otherAlgo = candidate + + break + } + } + + if otherAlgo != objectid.AlgorithmUnknown { + mismatchID, err := objectid.ParseHex(otherAlgo, strings.Repeat("0", otherAlgo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(mismatch): %v", err) + } + + _, err = store.ReadBytesFull(mismatchID) + if err == nil || !strings.Contains(err.Error(), "algorithm mismatch") { + t.Fatalf("ReadBytesFull algorithm-mismatch error = %v", err) + } + } + }) +} + +func TestPackedStoreNewValidation(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo, _ := createPackedFixtureRepo(t, algo) + + store := openPackedStore(t, testRepo, algo) + + err := store.Close() + if err != nil { + t.Fatalf("Close: %v", err) + } + }) +} + +func TestPackedStoreInvalidAlgorithm(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectid.AlgorithmSHA1, Bare: true}) + + root := testRepo.OpenPackRoot(t) + + _, err := packed.New(root, objectid.AlgorithmUnknown, packed.Options{}) + if !errors.Is(err, objectid.ErrInvalidAlgorithm) { + t.Fatalf("packed.New invalid algorithm error = %v", err) + } +} + +func TestPackedStoreReadHeaderUsesResolvedObjectSizeForDelta(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + + var parent objectid.ObjectID + + for i := range 96 { + content := strings.Repeat("common-line-"+strconv.Itoa(i%7)+"\n", 384) + fmt.Sprintf("tail-%03d\n", i) + + _, treeID := testRepo.MakeSingleFileTree(t, "file.txt", []byte(content)) + if i == 0 { + parent = testRepo.CommitTree(t, treeID, "delta-header-size-0") + + continue + } + + parent = testRepo.CommitTree(t, treeID, fmt.Sprintf("delta-header-size-%03d", i), parent) + } + + testRepo.UpdateRef(t, "refs/heads/main", parent) + testRepo.Repack(t, "-a", "-d", "-f", "--window=128", "--depth=128") + + deltaID, wantResolvedSize := findDeltaObjectWithResolvedSizeMismatch(t, testRepo, algo) + store := openPackedStore(t, testRepo, algo) + + _, gotSize, err := store.ReadHeader(deltaID) + if err != nil { + t.Fatalf("ReadHeader(%s): %v", deltaID, err) + } + + if gotSize != wantResolvedSize { + t.Fatalf("ReadHeader(%s) size = %d, want resolved size %d", deltaID, gotSize, wantResolvedSize) + } + + gotReadSize, err := store.ReadSize(deltaID) + if err != nil { + t.Fatalf("ReadSize(%s): %v", deltaID, err) + } + + if gotReadSize != wantResolvedSize { + t.Fatalf("ReadSize(%s) = %d, want resolved size %d", deltaID, gotReadSize, wantResolvedSize) + } + }) +} + +func findDeltaObjectWithResolvedSizeMismatch(t *testing.T, testRepo *testgit.TestRepo, algo objectid.Algorithm) (objectid.ObjectID, int64) { + t.Helper() + + packRoot := testRepo.OpenPackRoot(t) + + entries, err := fs.ReadDir(packRoot.FS(), ".") + if err != nil { + t.Fatalf("ReadDir(pack): %v", err) + } + + var idxName string + + for _, entry := range entries { + if strings.HasSuffix(entry.Name(), ".idx") { + idxName = entry.Name() + + break + } + } + + if idxName == "" { + t.Fatalf("no idx files found") + } + + verifyOut := testRepo.Run(t, "verify-pack", "-v", "objects/pack/"+idxName) + for line := range strings.SplitSeq(strings.TrimSpace(verifyOut), "\n") { + fields := strings.Fields(line) + if len(fields) < 7 { + continue + } + + idHex := fields[0] + + deltaStreamSize, err := strconv.ParseInt(fields[2], 10, 64) + if err != nil { + continue + } + + resolvedSizeStr := testRepo.Run(t, "cat-file", "-s", idHex) + + resolvedSize, err := strconv.ParseInt(strings.TrimSpace(resolvedSizeStr), 10, 64) + if err != nil { + t.Fatalf("parse cat-file size for %s: %v", idHex, err) + } + + if deltaStreamSize == resolvedSize { + continue + } + + id, err := objectid.ParseHex(algo, idHex) + if err != nil { + t.Fatalf("ParseHex(%s): %v", idHex, err) + } + + return id, resolvedSize + } + + t.Fatalf("did not find a delta object with mismatched stream/resolved size") + + return objectid.ObjectID{}, 0 +} diff --git a/object/store/packed/store.go b/object/store/packed/store.go new file mode 100644 index 00000000..ef8a5eaf --- /dev/null +++ b/object/store/packed/store.go @@ -0,0 +1,51 @@ +// Package packed provides packfile reading and associated indexes. +package packed + +import ( + "os" + "sync" + "sync/atomic" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" +) + +// Store reads Git objects from pack/index files under an objects/pack root. +// +// Store borrows its root. Cached pack/index mappings are retained until Close. +type Store struct { + // root is the borrowed objects/pack capability used for all file access. + root *os.Root + // algo is the expected object ID algorithm for lookups. + algo objectid.Algorithm + // refreshPolicy controls automatic candidate refresh on lookup misses. + refreshPolicy RefreshPolicy + + // candidates stores the latest immutable candidate snapshot. + candidates atomic.Pointer[candidateSnapshot] + // refreshMu serializes candidate refresh. + refreshMu sync.Mutex + // mruMu guards candidate MRU linked-list state. + mruMu sync.RWMutex + // mruHead is the first pack in MRU order. + mruHead *packCandidateNode + // mruTail is the last pack in MRU order. + mruTail *packCandidateNode + // mruNodeByPack maps pack basename to MRU node. + mruNodeByPack map[string]*packCandidateNode + // idxByPack caches opened and parsed indexes by pack basename. + idxByPack map[string]*idxFile + + // stateMu guards pack cache and close state. + stateMu sync.RWMutex + // idxMu guards parsed index cache. + idxMu sync.RWMutex + // cacheMu guards delta cache operations. + cacheMu sync.RWMutex + // packs caches opened .pack handles by basename. + packs map[string]*packFile + // deltaCache caches resolved base objects by pack location. + deltaCache *deltaCache +} + +var _ objectstore.ReadingStore = (*Store)(nil) diff --git a/object/store/packed/store_lookup.go b/object/store/packed/store_lookup.go new file mode 100644 index 00000000..0513caa7 --- /dev/null +++ b/object/store/packed/store_lookup.go @@ -0,0 +1,106 @@ +package packed + +import ( + "errors" + + objectid "codeberg.org/lindenii/furgit/object/id" + objectstore "codeberg.org/lindenii/furgit/object/store" +) + +// lookup resolves one object ID to its pack location. +func (store *Store) lookup(id objectid.ObjectID) (location, error) { + var zero location + if id.Algorithm() != store.algo { + return zero, errors.New("objectstore/packed: object id algorithm mismatch") + } + + snapshot, err := store.ensureCandidates() + if err != nil { + return zero, err + } + + loc, ok, err := store.lookupInCandidates(id, snapshot) + if err != nil { + return zero, err + } + + if ok { + return loc, nil + } + + if store.refreshPolicy == RefreshPolicyOnMissing { //nolint:nestif + err = store.Refresh() + if err != nil { + return zero, err + } + + refreshed := store.candidates.Load() + if refreshed != nil && refreshed != snapshot { + loc, ok, err = store.lookupInCandidates(id, refreshed) + if err != nil { + return zero, err + } + + if ok { + return loc, nil + } + } + } + + return zero, objectstore.ErrObjectNotFound +} + +func (store *Store) lookupInCandidates( + id objectid.ObjectID, + snapshot *candidateSnapshot, +) (location, bool, error) { + var zero location + + nextPackName := store.firstCandidatePackName(snapshot) + for nextPackName != "" { + candidate, ok := snapshot.candidateByPack[nextPackName] + if !ok { + nextPackName = store.firstCandidatePackName(snapshot) + + continue + } + + nextPackName = store.nextCandidatePackName(candidate.packName, snapshot) + + index, err := store.openIndex(candidate) + if err != nil { + return zero, false, err + } + + offset, ok, err := index.lookup(id) + if err != nil { + return zero, false, err + } + + if ok { + store.touchCandidate(candidate.packName) + + return location{packName: index.packName, offset: offset}, true, nil + } + } + + for _, candidate := range snapshot.candidates { + index, err := store.openIndex(candidate) + if err != nil { + return zero, false, err + } + + offset, ok, err := index.lookup(id) + if err != nil { + return zero, false, err + } + + if ok { + store.touchCandidate(candidate.packName) + + return location{packName: index.packName, offset: offset}, true, nil + } + } + + return zero, false, nil +} diff --git a/object/store/packed/store_open_pack.go b/object/store/packed/store_open_pack.go new file mode 100644 index 00000000..c621e08c --- /dev/null +++ b/object/store/packed/store_open_pack.go @@ -0,0 +1,57 @@ +package packed + +// openPack returns one opened and validated pack handle. +func (store *Store) openPack(name string) (*packFile, error) { + store.stateMu.RLock() + + pack, ok := store.packs[name] + if ok { + store.stateMu.RUnlock() + + return pack, nil + } + + store.stateMu.RUnlock() + + file, err := store.root.Open(name) + if err != nil { + return nil, err + } + + info, err := file.Stat() + if err != nil { + _ = file.Close() + + return nil, err + } + + pack, err = openPackFile(name, file, info.Size()) + if err != nil { + _ = file.Close() + + return nil, err + } + + err = store.verifyPackMatchesIndexes(pack) + if err != nil { + _ = pack.close() + + return nil, err + } + + store.stateMu.Lock() + + existing, ok := store.packs[name] + if ok { + store.stateMu.Unlock() + + _ = pack.close() + + return existing, nil + } + + store.packs[name] = pack + store.stateMu.Unlock() + + return pack, nil +} diff --git a/object/store/packed/trailer_match.go b/object/store/packed/trailer_match.go new file mode 100644 index 00000000..dc43e37d --- /dev/null +++ b/object/store/packed/trailer_match.go @@ -0,0 +1,29 @@ +package packed + +import "fmt" + +// verifyPackMatchesIndexes checks that one opened pack's trailer hash matches +// every loaded index that references the same pack name. +func (store *Store) verifyPackMatchesIndexes(pack *packFile) error { + snapshot, err := store.ensureCandidates() + if err != nil { + return err + } + + candidate, ok := snapshot.candidateByPack[pack.name] + if !ok { + return fmt.Errorf("objectstore/packed: missing index for pack %q", pack.name) + } + + index, err := store.openIndex(candidate) + if err != nil { + return err + } + + err = verifyMappedPackMatchesMappedIdx(pack.data, index.data, store.algo) + if err != nil { + return fmt.Errorf("objectstore/packed: pack %q does not match idx %q: %w", pack.name, index.idxName, err) + } + + return nil +} diff --git a/object/store/reading.go b/object/store/reading.go new file mode 100644 index 00000000..017c3e94 --- /dev/null +++ b/object/store/reading.go @@ -0,0 +1,75 @@ +package objectstore + +import ( + "io" + + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadingStore reads Git objects by object ID. +// +// Unless an implementation explicitly documents otherwise, values returned by +// ReadingStore methods are only valid until the store is closed. +type ReadingStore interface { + // ReadBytesFull reads a full serialized object as "type size\0content". + // + // In a valid repository, hashing this payload with the same algorithm yields + // the requested object ID. Readers should treat this as a repository + // invariant and should not re-verify it on every read. + // + // Any read-time integrity verification beyond producing this payload is + // implementation-defined. + ReadBytesFull(id objectid.ObjectID) ([]byte, error) + + // ReadBytesContent reads an object's type and content bytes. + // + // Any read-time integrity verification beyond producing this payload is + // implementation-defined. + ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) + + // ReadReaderFull reads a full serialized object stream as "type size\0content". + // + // Caller must close the returned reader. + // The returned reader is only valid until the store is closed. + // + // Any read-time integrity verification performed while producing the stream + // is implementation-defined. + ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) + + // ReadReaderContent reads an object's type, declared content length, + // and content stream. + // + // Caller must close the returned reader. + // The returned reader is only valid until the store is closed. + // + // Any read-time integrity verification performed while producing the stream + // is implementation-defined. + ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) + + // ReadSize reads an object's declared content length. + // + // This is equivalent to ReadHeader(...).size and may be cheaper than + // ReadHeader when callers do not need object type. + // + // Any read-time integrity verification performed to produce the size is + // implementation-defined. + ReadSize(id objectid.ObjectID) (int64, error) + + // ReadHeader reads an object's type and declared content length. + // + // Any read-time integrity verification performed to produce the header is + // implementation-defined. + ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) + + // Refresh updates any backend-local discovery/cache view of on-disk objects. + // + // Backends without dynamic discovery should return nil. + Refresh() error + + // Close releases resources associated with the backend. + // + // Repeated calls to Close are undefined behavior unless the implementation + // explicitly documents otherwise. + Close() error +} diff --git a/reachability/reachability.go b/reachability/reachability.go index 7e34dc32..14bb79cf 100644 --- a/reachability/reachability.go +++ b/reachability/reachability.go @@ -10,17 +10,17 @@ import ( // // It is not safe for concurrent use. type Reachability struct { - store objectstore.Store + store objectstore.ReadingStore graph *commitgraphread.Reader } // New builds a Reachability over one object store. -func New(store objectstore.Store) *Reachability { +func New(store objectstore.ReadingStore) *Reachability { return &Reachability{store: store} } // NewWithCommitGraph builds a Reachability over one object store with an // optional commit-graph reader for faster commit-domain traversal. -func NewWithCommitGraph(store objectstore.Store, graph *commitgraphread.Reader) *Reachability { +func NewWithCommitGraph(store objectstore.ReadingStore, graph *commitgraphread.Reader) *Reachability { return &Reachability{store: store, graph: graph} } diff --git a/repository/objects.go b/repository/objects.go index 71b01b36..0aba751a 100644 --- a/repository/objects.go +++ b/repository/objects.go @@ -17,7 +17,7 @@ func openObjectStore( root *os.Root, algo objectid.Algorithm, ) ( - objects objectstore.Store, + objects objectstore.ReadingStore, objectsRoot *os.Root, objectsPackRoot *os.Root, objectsLoose *objectloose.Store, @@ -36,7 +36,7 @@ func openObjectStore( return nil, nil, nil, nil, nil, err } - backends := []objectstore.Store{objectsLoose} + backends := []objectstore.ReadingStore{objectsLoose} objectsPackRoot, err = objectsRoot.OpenRoot("pack") if err == nil { @@ -73,6 +73,6 @@ func openObjectStore( // Close. // //nolint:ireturn -func (repo *Repository) Objects() objectstore.Store { +func (repo *Repository) Objects() objectstore.ReadingStore { return repo.objects } diff --git a/repository/repository.go b/repository/repository.go index cce73161..6b970c20 100644 --- a/repository/repository.go +++ b/repository/repository.go @@ -25,7 +25,7 @@ type Repository struct { config *config.Config algo objectid.Algorithm - objects objectstore.Store + objects objectstore.ReadingStore objectsRoot *os.Root objectsPackRoot *os.Root objectsLoose *objectloose.Store |
