diff options
| author | 2026-03-25 14:30:31 +0000 | |
|---|---|---|
| committer | 2026-03-25 14:30:31 +0000 | |
| commit | bfa0a3f5f18b752a6ebd3d5b37411c6871f7bb17 (patch) | |
| tree | 8ee2479273e2b34d284c30703c2be48efe197556 /object/store/loose | |
| parent | *: Resort import order (diff) | |
| signature | No signature | |
*: objectstore -> object/store
Diffstat (limited to 'object/store/loose')
| -rw-r--r-- | object/store/loose/helpers_test.go | 107 | ||||
| -rw-r--r-- | object/store/loose/parse.go | 55 | ||||
| -rw-r--r-- | object/store/loose/paths.go | 43 | ||||
| -rw-r--r-- | object/store/loose/read_bytes.go | 49 | ||||
| -rw-r--r-- | object/store/loose/read_header.go | 37 | ||||
| -rw-r--r-- | object/store/loose/read_reader.go | 118 | ||||
| -rw-r--r-- | object/store/loose/read_size.go | 13 | ||||
| -rw-r--r-- | object/store/loose/read_test.go | 212 | ||||
| -rw-r--r-- | object/store/loose/refresh.go | 6 | ||||
| -rw-r--r-- | object/store/loose/store.go | 41 | ||||
| -rw-r--r-- | object/store/loose/write_bytes.go | 18 | ||||
| -rw-r--r-- | object/store/loose/write_reader.go | 81 | ||||
| -rw-r--r-- | object/store/loose/write_temp_object_file.go | 30 | ||||
| -rw-r--r-- | object/store/loose/write_test.go | 137 | ||||
| -rw-r--r-- | object/store/loose/write_writer.go | 94 | ||||
| -rw-r--r-- | object/store/loose/write_writer_accept.go | 61 | ||||
| -rw-r--r-- | object/store/loose/write_writer_finalize.go | 90 |
17 files changed, 1192 insertions, 0 deletions
diff --git a/object/store/loose/helpers_test.go b/object/store/loose/helpers_test.go new file mode 100644 index 00000000..e69c7e7a --- /dev/null +++ b/object/store/loose/helpers_test.go @@ -0,0 +1,107 @@ +package loose_test + +import ( + "io" + "os" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + "codeberg.org/lindenii/furgit/object/store/loose" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +func openLooseStore(t *testing.T, testRepo *testgit.TestRepo, algo objectid.Algorithm) *loose.Store { + t.Helper() + + root := testRepo.OpenObjectsRoot(t) + + store, err := loose.New(root, algo) + if err != nil { + t.Fatalf("loose.New: %v", err) + } + + return store +} + +func mustReadAllAndClose(t *testing.T, reader io.ReadCloser) []byte { + t.Helper() + + data, err := io.ReadAll(reader) + if err != nil { + _ = reader.Close() + + t.Fatalf("ReadAll: %v", err) + } + + err = reader.Close() + if err != nil { + t.Fatalf("Close: %v", err) + } + + return data +} + +func expectedRawObject(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) (objecttype.Type, []byte, []byte) { + t.Helper() + + typeName := testRepo.Run(t, "cat-file", "-t", id.String()) + + ty, ok := objecttype.ParseName(typeName) + if !ok { + t.Fatalf("ParseName(%q) failed", typeName) + } + + body := testRepo.CatFile(t, typeName, id) + + header, ok := objectheader.Encode(ty, int64(len(body))) + if !ok { + t.Fatalf("objectheader.Encode failed") + } + + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + + return ty, body, raw +} + +func corruptLooseObjectTrailer(t *testing.T, testRepo *testgit.TestRepo, id objectid.ObjectID) { + t.Helper() + + root := testRepo.OpenObjectsRoot(t) + + hex := id.String() + relPath := hex[:2] + "/" + hex[2:] + + file, err := root.OpenFile(relPath, os.O_RDWR, 0) + if err != nil { + t.Fatalf("OpenFile(%q): %v", relPath, err) + } + + defer func() { _ = file.Close() }() + + info, err := file.Stat() + if err != nil { + t.Fatalf("Stat(%q): %v", relPath, err) + } + + if info.Size() == 0 { + t.Fatalf("corrupt trailer on empty file %q", relPath) + } + + last := make([]byte, 1) + + _, err = file.ReadAt(last, info.Size()-1) + if err != nil { + t.Fatalf("ReadAt(%q): %v", relPath, err) + } + + last[0] ^= 0xff + + _, err = file.WriteAt(last, info.Size()-1) + if err != nil { + t.Fatalf("WriteAt(%q): %v", relPath, err) + } +} diff --git a/object/store/loose/parse.go b/object/store/loose/parse.go new file mode 100644 index 00000000..dfb420ba --- /dev/null +++ b/object/store/loose/parse.go @@ -0,0 +1,55 @@ +package loose + +import ( + "bufio" + "errors" + "io" + "os" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" + objectheader "codeberg.org/lindenii/furgit/object/header" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// decodeAll inflates the full loose object payload from file. +func decodeAll(file *os.File) ([]byte, error) { + zr, err := zlib.NewReader(file) + if err != nil { + return nil, err + } + + defer func() { _ = zr.Close() }() + + return io.ReadAll(zr) +} + +// parseRaw parses a loose object payload in "type size\0content" format. +func parseRaw(raw []byte) (objecttype.Type, []byte, error) { + ty, size, headerLen, ok := objectheader.Parse(raw) + if !ok { + return objecttype.TypeInvalid, nil, errors.New("objectstore/loose: malformed object header") + } + + content := raw[headerLen:] + if int64(len(content)) != size { + return objecttype.TypeInvalid, nil, errors.New("objectstore/loose: object header size/content mismatch") + } + + return ty, content, nil +} + +// readHeader reads and parses a loose object header from br, and returns +// the raw header bytes including the trailing NUL. +func readHeader(br *bufio.Reader) ([]byte, objecttype.Type, int64, error) { + header, err := br.ReadSlice(0) + if err != nil { + return nil, objecttype.TypeInvalid, 0, err + } + + ty, size, _, ok := objectheader.Parse(header) + if !ok { + return nil, objecttype.TypeInvalid, 0, errors.New("objectstore/loose: malformed object header") + } + + return header, ty, size, nil +} diff --git a/object/store/loose/paths.go b/object/store/loose/paths.go new file mode 100644 index 00000000..e03f5c28 --- /dev/null +++ b/object/store/loose/paths.go @@ -0,0 +1,43 @@ +package loose + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + + objectid "codeberg.org/lindenii/furgit/object/id" + "codeberg.org/lindenii/furgit/object/store" +) + +// objectPath returns the loose object path for id relative to the objects root. +func (store *Store) objectPath(id objectid.ObjectID) (string, error) { + if id.Algorithm() != store.algo { + return "", fmt.Errorf("objectstore/loose: object id algorithm mismatch: got %s want %s", id.Algorithm(), store.algo) + } + + hex := id.String() + + return filepath.Join(hex[:2], hex[2:]), nil +} + +// openObject opens the loose object file for id. +// Missing files cause objectstore.ErrObjectNotFound. +func (store *Store) openObject(id objectid.ObjectID) (*os.File, error) { + relPath, err := store.objectPath(id) + if err != nil { + return nil, err + } + + file, err := store.root.Open(relPath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil, objectstore.ErrObjectNotFound + } + + return nil, err + } + + return file, nil +} diff --git a/object/store/loose/read_bytes.go b/object/store/loose/read_bytes.go new file mode 100644 index 00000000..0b6da81b --- /dev/null +++ b/object/store/loose/read_bytes.go @@ -0,0 +1,49 @@ +package loose + +import ( + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// readBytesParsed reads, inflates, and parses a loose object in one pass. +// It returns the full raw payload and its parsed type and content. +func (store *Store) readBytesParsed(id objectid.ObjectID) ([]byte, objecttype.Type, []byte, error) { + file, err := store.openObject(id) + if err != nil { + return nil, objecttype.TypeInvalid, nil, err + } + + defer func() { _ = file.Close() }() + + raw, err := decodeAll(file) + if err != nil { + return nil, objecttype.TypeInvalid, nil, err + } + + ty, content, err := parseRaw(raw) + if err != nil { + return nil, objecttype.TypeInvalid, nil, err + } + + return raw, ty, content, nil +} + +// ReadBytesFull reads a full serialized object as "type size\0content". +func (store *Store) ReadBytesFull(id objectid.ObjectID) ([]byte, error) { + raw, _, _, err := store.readBytesParsed(id) + if err != nil { + return nil, err + } + + return raw, nil +} + +// ReadBytesContent reads an object's type and content bytes. +func (store *Store) ReadBytesContent(id objectid.ObjectID) (objecttype.Type, []byte, error) { + _, ty, content, err := store.readBytesParsed(id) + if err != nil { + return objecttype.TypeInvalid, nil, err + } + + return ty, content, nil +} diff --git a/object/store/loose/read_header.go b/object/store/loose/read_header.go new file mode 100644 index 00000000..37bf40de --- /dev/null +++ b/object/store/loose/read_header.go @@ -0,0 +1,37 @@ +package loose + +import ( + "bufio" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// ReadHeader reads an object's type and declared content length. +// +// It parses only enough of the zlib-decoded object to recover the object +// header. It does not verify that the remaining object content is readable and +// does not verify the zlib Adler-32 trailer. +func (store *Store) ReadHeader(id objectid.ObjectID) (objecttype.Type, int64, error) { + file, err := store.openObject(id) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + defer func() { _ = file.Close() }() + + zr, err := zlib.NewReader(file) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + defer func() { _ = zr.Close() }() + + _, ty, size, err := readHeader(bufio.NewReader(zr)) + if err != nil { + return objecttype.TypeInvalid, 0, err + } + + return ty, size, nil +} diff --git a/object/store/loose/read_reader.go b/object/store/loose/read_reader.go new file mode 100644 index 00000000..29b71627 --- /dev/null +++ b/object/store/loose/read_reader.go @@ -0,0 +1,118 @@ +package loose + +import ( + "bufio" + "bytes" + "errors" + "io" + "os" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" + "codeberg.org/lindenii/furgit/internal/iolimit" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +type objectReader struct { + // reader is the stream exposed by Read. + reader io.Reader + // file is the underlying loose object file and is closed by Close. + file *os.File + // zr is the zlib decoder and is closed by Close. + zr io.ReadCloser +} + +func (reader *objectReader) Read(dst []byte) (int, error) { + return reader.reader.Read(dst) +} + +func (reader *objectReader) Close() error { + errZlib := reader.zr.Close() + errFile := reader.file.Close() + + return errors.Join(errZlib, errFile) +} + +// openInflated opens and zlib-decodes a loose object file. +// The caller owns both returned closers and must close them. +func (store *Store) openInflated(id objectid.ObjectID) (*os.File, io.ReadCloser, error) { + file, err := store.openObject(id) + if err != nil { + return nil, nil, err + } + + zr, err := zlib.NewReader(file) + if err != nil { + _ = file.Close() + + return nil, nil, err + } + + return file, zr, nil +} + +// ReadReaderFull reads a full serialized object stream as "type size\0content". +// +// The caller must close the returned reader. +// +// Close releases resources only. It does not drain unread data for additional +// validation. In particular, malformed trailing compressed data, trailing bytes +// past the declared object size, and the zlib Adler-32 trailer may go +// unverified unless the caller reads to io.EOF. +func (store *Store) ReadReaderFull(id objectid.ObjectID) (io.ReadCloser, error) { + file, zr, err := store.openInflated(id) + if err != nil { + return nil, err + } + + br := bufio.NewReader(zr) + + header, _, size, err := readHeader(br) + if err != nil { + _ = zr.Close() + _ = file.Close() + + return nil, err + } + + return &objectReader{ + reader: io.MultiReader( + bytes.NewReader(header), + iolimit.ExpectLengthReader(br, size), + ), + file: file, + zr: zr, + }, nil +} + +// ReadReaderContent reads an object's type, declared content length, and +// content stream. +// +// The caller must close the returned reader. +// +// Close releases resources only. It does not drain unread data for additional +// validation. In particular, malformed trailing compressed data, trailing bytes +// past the declared object size, and the zlib Adler-32 trailer may go +// unverified unless the caller reads to io.EOF. +func (store *Store) ReadReaderContent(id objectid.ObjectID) (objecttype.Type, int64, io.ReadCloser, error) { + file, zr, err := store.openInflated(id) + if err != nil { + return objecttype.TypeInvalid, 0, nil, err + } + + br := bufio.NewReader(zr) + + _, ty, size, err := readHeader(br) + if err != nil { + _ = zr.Close() + _ = file.Close() + + return objecttype.TypeInvalid, 0, nil, err + } + + return ty, size, &objectReader{ + reader: iolimit.ExpectLengthReader(br, size), + file: file, + zr: zr, + }, nil +} diff --git a/object/store/loose/read_size.go b/object/store/loose/read_size.go new file mode 100644 index 00000000..2ececc49 --- /dev/null +++ b/object/store/loose/read_size.go @@ -0,0 +1,13 @@ +package loose + +import objectid "codeberg.org/lindenii/furgit/object/id" + +// ReadSize reads an object's declared content length. +// +// Like ReadHeader, it parses only enough of the zlib-decoded object to recover +// the header and does not verify the zlib Adler-32 trailer. +func (store *Store) ReadSize(id objectid.ObjectID) (int64, error) { + _, size, err := store.ReadHeader(id) + + return size, err +} diff --git a/object/store/loose/read_test.go b/object/store/loose/read_test.go new file mode 100644 index 00000000..4652670d --- /dev/null +++ b/object/store/loose/read_test.go @@ -0,0 +1,212 @@ +package loose_test + +import ( + "bytes" + "errors" + "os" + "strings" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectid "codeberg.org/lindenii/furgit/object/id" + "codeberg.org/lindenii/furgit/object/store" + "codeberg.org/lindenii/furgit/object/store/loose" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +func TestLooseStoreReadAgainstGit(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + blobID := testRepo.HashObject(t, "blob", []byte("blob body\n")) + _, treeID, commitID := testRepo.MakeCommit(t, "subject\n\nbody") + tagID := testRepo.TagAnnotated(t, "v1", commitID, "tag message") + + store := openLooseStore(t, testRepo, algo) + + tests := []struct { + name string + id objectid.ObjectID + }{ + {name: "blob", id: blobID}, + {name: "tree", id: treeID}, + {name: "commit", id: commitID}, + {name: "tag", id: tagID}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + wantType, wantBody, wantRaw := expectedRawObject(t, testRepo, tt.id) + + gotRaw, err := store.ReadBytesFull(tt.id) + if err != nil { + t.Fatalf("ReadBytesFull: %v", err) + } + + if !bytes.Equal(gotRaw, wantRaw) { + t.Fatalf("ReadBytesFull mismatch") + } + + gotType, gotBody, err := store.ReadBytesContent(tt.id) + if err != nil { + t.Fatalf("ReadBytesContent: %v", err) + } + + if gotType != wantType { + t.Fatalf("ReadBytesContent type = %v, want %v", gotType, wantType) + } + + if !bytes.Equal(gotBody, wantBody) { + t.Fatalf("ReadBytesContent body mismatch") + } + + headType, headSize, err := store.ReadHeader(tt.id) + if err != nil { + t.Fatalf("ReadHeader: %v", err) + } + + if headType != wantType { + t.Fatalf("ReadHeader type = %v, want %v", headType, wantType) + } + + if headSize != int64(len(wantBody)) { + t.Fatalf("ReadHeader size = %d, want %d", headSize, len(wantBody)) + } + + fullReader, err := store.ReadReaderFull(tt.id) + if err != nil { + t.Fatalf("ReadReaderFull: %v", err) + } + + got := mustReadAllAndClose(t, fullReader) + if !bytes.Equal(got, wantRaw) { + t.Fatalf("ReadReaderFull stream mismatch") + } + + contentType, contentSize, contentReader, err := store.ReadReaderContent(tt.id) + if err != nil { + t.Fatalf("ReadReaderContent: %v", err) + } + + if contentType != wantType { + t.Fatalf("ReadReaderContent type = %v, want %v", contentType, wantType) + } + + if contentSize != int64(len(wantBody)) { + t.Fatalf("ReadReaderContent size = %d, want %d", contentSize, len(wantBody)) + } + + got = mustReadAllAndClose(t, contentReader) + if !bytes.Equal(got, wantBody) { + t.Fatalf("ReadReaderContent stream mismatch") + } + }) + } + }) +} + +func TestLooseStoreErrors(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + notFoundID, err := objectid.ParseHex(algo, strings.Repeat("0", algo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(notFoundID): %v", err) + } + + _, err = store.ReadBytesFull(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesFull not-found error = %v", err) + } + + _, _, err = store.ReadBytesContent(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadBytesContent not-found error = %v", err) + } + + _, err = store.ReadReaderFull(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderFull not-found error = %v", err) + } + + _, _, _, err = store.ReadReaderContent(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadReaderContent not-found error = %v", err) + } + + _, _, err = store.ReadHeader(notFoundID) + if !errors.Is(err, objectstore.ErrObjectNotFound) { + t.Fatalf("ReadHeader not-found error = %v", err) + } + + var otherAlgo objectid.Algorithm + if algo == objectid.AlgorithmSHA1 { + otherAlgo = objectid.AlgorithmSHA256 + } else { + otherAlgo = objectid.AlgorithmSHA1 + } + + otherID, err := objectid.ParseHex(otherAlgo, strings.Repeat("1", otherAlgo.HexLen())) + if err != nil { + t.Fatalf("ParseHex(otherID): %v", err) + } + + _, err = store.ReadBytesFull(otherID) + if err == nil || !strings.Contains(err.Error(), "algorithm mismatch") { + t.Fatalf("ReadBytesFull algorithm-mismatch error = %v", err) + } + }) +} + +func TestLooseStoreNewValidation(t *testing.T) { + t.Parallel() + + root, err := os.OpenRoot(t.TempDir()) + if err != nil { + t.Fatalf("OpenRoot: %v", err) + } + + defer func() { _ = root.Close() }() + + _, err = loose.New(root, objectid.AlgorithmUnknown) + if err == nil { + t.Fatalf("loose.New(root, unknown) expected error") + } +} + +func TestLooseStoreReadHeaderDoesNotVerifyAdler32(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + content := []byte("header-only-check\n") + + id, err := store.WriteBytesContent(objecttype.TypeBlob, content) + if err != nil { + t.Fatalf("WriteBytesContent: %v", err) + } + + corruptLooseObjectTrailer(t, testRepo, id) + + ty, size, err := store.ReadHeader(id) + if err != nil { + t.Fatalf("ReadHeader: %v", err) + } + + if ty != objecttype.TypeBlob { + t.Fatalf("ReadHeader type = %v, want %v", ty, objecttype.TypeBlob) + } + + if size != int64(len(content)) { + t.Fatalf("ReadHeader size = %d, want %d", size, len(content)) + } + + _, err = store.ReadBytesFull(id) + if err == nil { + t.Fatalf("ReadBytesFull on corrupted trailer succeeded") + } + }) +} diff --git a/object/store/loose/refresh.go b/object/store/loose/refresh.go new file mode 100644 index 00000000..b720ebc6 --- /dev/null +++ b/object/store/loose/refresh.go @@ -0,0 +1,6 @@ +package loose + +// Refresh is a no-op for loose object stores. +func (store *Store) Refresh() error { + return nil +} diff --git a/object/store/loose/store.go b/object/store/loose/store.go new file mode 100644 index 00000000..d8eba84e --- /dev/null +++ b/object/store/loose/store.go @@ -0,0 +1,41 @@ +// Package loose provides a loose object backend (objects/XX/YYYYY..). +package loose + +import ( + "os" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// Store reads loose Git objects from an objects directory root. +// +// Loose objects are zlib streams whose trailer uses Adler-32. Which reads +// consume enough of the stream to reach and verify that trailer is documented +// on the individual methods. +type Store struct { + // root is the objects directory capability used for all object file access. + // Object files are opened by relative paths like "<first2>/<rest>". + // Store borrows this root. + root *os.Root + // algo is the expected object ID algorithm for lookups. + algo objectid.Algorithm +} + +// New creates a loose-object store rooted at an objects directory for algo. +func New(root *os.Root, algo objectid.Algorithm) (*Store, error) { + if algo.Size() == 0 { + return nil, objectid.ErrInvalidAlgorithm + } + + return &Store{ + root: root, + algo: algo, + }, nil +} + +// Close releases resources associated with the backend. +// +// Store borrows its root, so Close does not close it. +// +// Repeated calls to Close are undefined behavior. +func (store *Store) Close() error { return nil } diff --git a/object/store/loose/write_bytes.go b/object/store/loose/write_bytes.go new file mode 100644 index 00000000..ffc65117 --- /dev/null +++ b/object/store/loose/write_bytes.go @@ -0,0 +1,18 @@ +package loose + +import ( + "bytes" + + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// WriteBytesFull writes a full serialized object as "type size\0content". +func (store *Store) WriteBytesFull(raw []byte) (objectid.ObjectID, error) { + return store.WriteReaderFull(bytes.NewReader(raw)) +} + +// WriteBytesContent writes typed content bytes as a loose object. +func (store *Store) WriteBytesContent(ty objecttype.Type, content []byte) (objectid.ObjectID, error) { + return store.WriteReaderContent(ty, int64(len(content)), bytes.NewReader(content)) +} diff --git a/object/store/loose/write_reader.go b/object/store/loose/write_reader.go new file mode 100644 index 00000000..f686f279 --- /dev/null +++ b/object/store/loose/write_reader.go @@ -0,0 +1,81 @@ +package loose + +import ( + "fmt" + "io" + + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +// WriteReaderContent writes one loose object from typed content bytes read from src. +// src must provide exactly size bytes. +// size is required because loose object headers are "type size\0content", so the +// header must be emitted before streaming content without buffering. +func (store *Store) WriteReaderContent(ty objecttype.Type, size int64, src io.Reader) (objectid.ObjectID, error) { + if size < 0 { + return objectid.ObjectID{}, fmt.Errorf("objectstore/loose: negative content size: %d", size) + } + + header, ok := objectheader.Encode(ty, size) + if !ok { + return objectid.ObjectID{}, fmt.Errorf("objectstore/loose: failed to encode object header for type %v", ty) + } + + writer, err := store.newStreamWriter(false) + if err != nil { + return objectid.ObjectID{}, err + } + + writer.headerDone = true + writer.expectedContentLeft = size + + err = writer.writeRawChunk(header) + if err != nil { + _ = writer.Close() + _ = store.root.Remove(writer.tmpRelPath) + + return objectid.ObjectID{}, err + } + + return writeReaderIntoStreamWriter(writer, src) +} + +// WriteReaderFull writes one loose object from raw bytes "type size\0content" +// read from src. +func (store *Store) WriteReaderFull(src io.Reader) (objectid.ObjectID, error) { + writer, err := store.newStreamWriter(true) + if err != nil { + return objectid.ObjectID{}, err + } + + return writeReaderIntoStreamWriter(writer, src) +} + +// writeReaderIntoStreamWriter copies src into writer and publishes the object. +func writeReaderIntoStreamWriter(writer *streamWriter, src io.Reader) (objectid.ObjectID, error) { + _, err := io.Copy(writer, src) + if err != nil { + _ = writer.Close() + _ = writer.store.root.Remove(writer.tmpRelPath) + + return objectid.ObjectID{}, err + } + + err = writer.Close() + if err != nil { + _ = writer.store.root.Remove(writer.tmpRelPath) + + return objectid.ObjectID{}, err + } + + id, err := writer.finalize() + if err != nil { + _ = writer.store.root.Remove(writer.tmpRelPath) + + return objectid.ObjectID{}, err + } + + return id, nil +} diff --git a/object/store/loose/write_temp_object_file.go b/object/store/loose/write_temp_object_file.go new file mode 100644 index 00000000..1a78db48 --- /dev/null +++ b/object/store/loose/write_temp_object_file.go @@ -0,0 +1,30 @@ +package loose + +import ( + "crypto/rand" + "errors" + "io/fs" + "os" + "path/filepath" +) + +// createTempObjectFile creates a unique temporary object file within dir. +// The returned path is relative to the objects root. +func (store *Store) createTempObjectFile(dir string) (string, *os.File, error) { + for range 16 { + relPath := filepath.Join(dir, tempObjectFilePrefix+rand.Text()) + + file, err := store.root.OpenFile(relPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644) + if err == nil { + return relPath, file, nil + } + + if errors.Is(err, fs.ErrExist) { + continue + } + + return "", nil, err + } + + return "", nil, errors.New("objectstore/loose: failed to create temporary object file") +} diff --git a/object/store/loose/write_test.go b/object/store/loose/write_test.go new file mode 100644 index 00000000..30d8dbdb --- /dev/null +++ b/object/store/loose/write_test.go @@ -0,0 +1,137 @@ +package loose_test + +import ( + "bytes" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + objectheader "codeberg.org/lindenii/furgit/object/header" + objectid "codeberg.org/lindenii/furgit/object/id" + objecttype "codeberg.org/lindenii/furgit/object/type" +) + +func TestLooseStoreWriteReaderContentAgainstGit(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + content := []byte("written-by-content-reader\n") + expectedHex := testRepo.RunInput(t, content, "hash-object", "-t", "blob", "--stdin") + + expectedID, err := objectid.ParseHex(algo, expectedHex) + if err != nil { + t.Fatalf("ParseHex(expected): %v", err) + } + + writtenID, err := store.WriteReaderContent(objecttype.TypeBlob, int64(len(content)), bytes.NewReader(content)) + if err != nil { + t.Fatalf("WriteReaderContent: %v", err) + } + + if writtenID != expectedID { + t.Fatalf("WriteReaderContent id = %s, want %s", writtenID, expectedID) + } + + gotBody := testRepo.CatFile(t, "blob", writtenID) + if !bytes.Equal(gotBody, content) { + t.Fatalf("git cat-file body mismatch") + } + + // Writing the same object again should succeed and return the same ID. + writtenID2, err := store.WriteReaderContent(objecttype.TypeBlob, int64(len(content)), bytes.NewReader(content)) + if err != nil { + t.Fatalf("WriteReaderContent second: %v", err) + } + + if writtenID2 != expectedID { + t.Fatalf("WriteReaderContent second id = %s, want %s", writtenID2, expectedID) + } + }) +} + +func TestLooseStoreWriteReaderFullAgainstGit(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + body := []byte("full-reader-body\n") + + header, ok := objectheader.Encode(objecttype.TypeBlob, int64(len(body))) + if !ok { + t.Fatalf("objectheader.Encode failed") + } + + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + + wantID := algo.Sum(raw) + + gotID, err := store.WriteReaderFull(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("WriteReaderFull: %v", err) + } + + if gotID != wantID { + t.Fatalf("WriteReaderFull id = %s, want %s", gotID, wantID) + } + + gotBody := testRepo.CatFile(t, "blob", gotID) + if !bytes.Equal(gotBody, body) { + t.Fatalf("git cat-file body mismatch") + } + }) +} + +func TestLooseStoreReaderValidationErrors(t *testing.T) { + t.Parallel() + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper + t.Run("content overflow", func(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + _, err := store.WriteReaderContent(objecttype.TypeBlob, 1, bytes.NewReader([]byte("hello"))) + if err == nil { + t.Fatalf("expected error after overflow") + } + }) + + t.Run("content short", func(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + _, err := store.WriteReaderContent(objecttype.TypeBlob, 5, bytes.NewReader([]byte("x"))) + if err == nil { + t.Fatalf("expected error for short content") + } + }) + + t.Run("full malformed header", func(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + _, err := store.WriteReaderFull(bytes.NewReader([]byte("not-a-header"))) + if err == nil { + t.Fatalf("expected error for malformed header") + } + }) + + t.Run("full size mismatch", func(t *testing.T) { + t.Parallel() + testRepo := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true}) + store := openLooseStore(t, testRepo, algo) + + raw := []byte("blob 1\x00hello") + + _, err := store.WriteReaderFull(bytes.NewReader(raw)) + if err == nil { + t.Fatalf("expected error after mismatch") + } + }) + }) +} diff --git a/object/store/loose/write_writer.go b/object/store/loose/write_writer.go new file mode 100644 index 00000000..0d6b5b80 --- /dev/null +++ b/object/store/loose/write_writer.go @@ -0,0 +1,94 @@ +package loose + +import ( + "errors" + "hash" + "os" + + "codeberg.org/lindenii/furgit/internal/compress/zlib" +) + +const tempObjectFilePrefix = "tmp_obj_" + +// streamWriter incrementally hashes and deflates an object into a temp file. +// Finalize validates size accounting and atomically renames the temp file. +type streamWriter struct { + // store owns path and root operations used by this write session. + store *Store + // file is the temporary destination file under objects/. + file *os.File + // zw compresses raw object bytes into file. + zw *zlib.Writer + // hash receives the same raw bytes used to compute the resulting object ID. + hash hash.Hash + + // tmpRelPath is the relative path of file under the objects root. + tmpRelPath string + + // fullMode selects full-object input ("type size\0content") as opposed to content-only input. + fullMode bool + + // headerBuf accumulates header bytes while fullMode parses up to the first NUL. + headerBuf []byte + // headerDone reports whether the full-object header has been parsed. + headerDone bool + // expectedContentLeft tracks remaining declared content bytes. + expectedContentLeft int64 + + closed bool + finalized bool +} + +// newStreamWriter creates a stream writer with a temp file rooted in objects/. +func (store *Store) newStreamWriter(fullMode bool) (*streamWriter, error) { + hashFn, err := store.algo.New() + if err != nil { + return nil, err + } + + tmpRelPath, file, err := store.createTempObjectFile(".") + if err != nil { + return nil, err + } + + return &streamWriter{ + store: store, + file: file, + zw: zlib.NewWriter(file), + hash: hashFn, + tmpRelPath: tmpRelPath, + fullMode: fullMode, + headerBuf: make([]byte, 0, 64), + }, nil +} + +// Write validates and writes raw bytes into the stream. +// In full mode, it parses and enforces the streamed header-declared content size. +func (writer *streamWriter) Write(src []byte) (int, error) { + if writer.finalized { + return 0, errors.New("objectstore/loose: write after finalize") + } + + if writer.closed { + return 0, errors.New("objectstore/loose: write after close") + } + + if writer.fullMode { + err := writer.acceptFull(src) + if err != nil { + return 0, err + } + } else { + err := writer.acceptContent(int64(len(src))) + if err != nil { + return 0, err + } + } + + err := writer.writeRawChunk(src) + if err != nil { + return 0, err + } + + return len(src), nil +} diff --git a/object/store/loose/write_writer_accept.go b/object/store/loose/write_writer_accept.go new file mode 100644 index 00000000..bf55966a --- /dev/null +++ b/object/store/loose/write_writer_accept.go @@ -0,0 +1,61 @@ +package loose + +import ( + "bytes" + "errors" + + objectheader "codeberg.org/lindenii/furgit/object/header" +) + +// acceptFull validates and accounts raw full-object input. +func (writer *streamWriter) acceptFull(src []byte) error { + if !writer.headerDone { + nul := bytes.IndexByte(src, 0) + if nul >= 0 { + headerChunkLen := nul + 1 + writer.headerBuf = append(writer.headerBuf, src[:headerChunkLen]...) + + _, size, _, ok := objectheader.Parse(writer.headerBuf) + if !ok { + return errors.New("objectstore/loose: malformed object header") + } + + writer.headerDone = true + writer.expectedContentLeft = size + + return writer.acceptContent(int64(len(src) - headerChunkLen)) + } + + writer.headerBuf = append(writer.headerBuf, src...) + + return nil + } + + return writer.acceptContent(int64(len(src))) +} + +// acceptContent validates and accounts content byte counts. +func (writer *streamWriter) acceptContent(n int64) error { + if n > writer.expectedContentLeft { + return errors.New("objectstore/loose: object content exceeds declared size") + } + + writer.expectedContentLeft -= n + + return nil +} + +// writeRawChunk forwards raw bytes to the hash and deflate pipeline. +func (writer *streamWriter) writeRawChunk(src []byte) error { + _, err := writer.hash.Write(src) + if err != nil { + return err + } + + _, err = writer.zw.Write(src) + if err != nil { + return err + } + + return nil +} diff --git a/object/store/loose/write_writer_finalize.go b/object/store/loose/write_writer_finalize.go new file mode 100644 index 00000000..f8dee54d --- /dev/null +++ b/object/store/loose/write_writer_finalize.go @@ -0,0 +1,90 @@ +package loose + +import ( + "errors" + "io/fs" + "path/filepath" + + objectid "codeberg.org/lindenii/furgit/object/id" +) + +// Close flushes and closes the underlying zlib stream and temp file. +// +// Repeated calls to Close are undefined behavior. +func (writer *streamWriter) Close() error { + errZlib := writer.zw.Close() + errSync := writer.file.Sync() + errFile := writer.file.Close() + + writer.closed = true + writer.file = nil + + return errors.Join(errZlib, errSync, errFile) +} + +// finalize validates write completeness and atomically publishes the object. +// Publication is no-clobber: it links tmpRelPath to the object path and treats +// existing destination objects as success. +func (writer *streamWriter) finalize() (objectid.ObjectID, error) { + writer.finalized = true + + var zero objectid.ObjectID + + if !writer.closed { + err := writer.Close() + if err != nil { + return zero, err + } + } + + if writer.fullMode && !writer.headerDone { + return zero, errors.New("objectstore/loose: missing full object header") + } + + if writer.expectedContentLeft != 0 { + return zero, errors.New("objectstore/loose: object content shorter than declared size") + } + + idBytes := writer.hash.Sum(nil) + + id, err := objectid.FromBytes(writer.store.algo, idBytes) + if err != nil { + return zero, err + } + + relPath, err := writer.store.objectPath(id) + if err != nil { + return zero, err + } + + dir := filepath.Dir(relPath) + + err = writer.store.root.MkdirAll(dir, 0o755) + if err != nil { + return zero, err + } + + cleanup := true + + defer func() { + if cleanup { + _ = writer.store.root.Remove(writer.tmpRelPath) + } + }() + + err = writer.store.root.Link(writer.tmpRelPath, relPath) + if err != nil { + if errors.Is(err, fs.ErrExist) { + cleanup = false + _ = writer.store.root.Remove(writer.tmpRelPath) + + return id, nil + } + + return zero, err + } + + cleanup = false + + return id, nil +} |
