aboutsummaryrefslogtreecommitdiff
path: root/object
diff options
context:
space:
mode:
Diffstat (limited to 'object')
-rw-r--r--object/blob/clone.go11
-rw-r--r--object/blob/parse.go10
-rw-r--r--object/commit/append.go4
-rw-r--r--object/commit/clone.go33
-rw-r--r--object/commit/commit.go4
-rw-r--r--object/commit/parse.go18
-rw-r--r--object/commit/roundtrip_test.go10
-rw-r--r--object/fetch/path.go2
-rw-r--r--object/fetch/treefs.go10
-rw-r--r--object/fetch/treefs_test.go6
-rw-r--r--object/signature/clone.go16
-rw-r--r--object/signature/parse.go12
-rw-r--r--object/store/memory/reader.go4
-rw-r--r--object/store/mix/mix.go2
-rw-r--r--object/store/packed/delta.go26
-rw-r--r--object/store/packed/entry.go3
-rw-r--r--object/store/packed/internal/ingest/finalize.go45
-rw-r--r--object/store/packed/internal/ingest/resolve.go2
-rw-r--r--object/store/packed/internal/ingest/result.go3
-rw-r--r--object/store/packed/internal/ingest/thin.go2
-rw-r--r--object/store/packed/internal/ingest/writepack_test.go77
-rw-r--r--object/store/packed/lookup.go4
-rw-r--r--object/store/packed/pack.go52
-rw-r--r--object/store/packed/packed.go2
-rw-r--r--object/store/packed/quarantine.go2
-rw-r--r--object/store/packed/reader.go2
-rw-r--r--object/store/packed/refresh.go2
-rw-r--r--object/store/packed/refresh_test.go2
-rw-r--r--object/store/reader.go13
-rw-r--r--object/tag/append.go2
-rw-r--r--object/tag/clone.go29
-rw-r--r--object/tag/parse.go18
-rw-r--r--object/tag/roundtrip_test.go6
-rw-r--r--object/tag/tag.go2
-rw-r--r--object/tree/clone.go24
-rw-r--r--object/tree/compare.go2
-rw-r--r--object/tree/helpers_test.go44
-rw-r--r--object/tree/insert.go10
-rw-r--r--object/tree/insert_test.go41
-rw-r--r--object/tree/lookup.go14
-rw-r--r--object/tree/lookup_test.go5
-rw-r--r--object/tree/malformed_test.go1
-rw-r--r--object/tree/parse.go52
-rw-r--r--object/tree/roundtrip_test.go2
-rw-r--r--object/tree/tree.go16
45 files changed, 506 insertions, 141 deletions
diff --git a/object/blob/clone.go b/object/blob/clone.go
new file mode 100644
index 00000000..7106c3aa
--- /dev/null
+++ b/object/blob/clone.go
@@ -0,0 +1,11 @@
+package blob
+
+import "bytes"
+
+// Clone returns a deep copy of the blob
+// whose Data is independent of any memory the original may alias.
+//
+// Labels: Life-Independent.
+func (blob *Blob) Clone() *Blob {
+ return &Blob{Data: bytes.Clone(blob.Data)}
+}
diff --git a/object/blob/parse.go b/object/blob/parse.go
index c013af96..1796d42f 100644
--- a/object/blob/parse.go
+++ b/object/blob/parse.go
@@ -2,7 +2,13 @@ package blob
// Parse decodes a blob object body.
//
-// Labels: Life-Independent.
+// The returned blob aliases body:
+// its Data shares the same backing array,
+// so the blob inherits body's lifetime
+// and must not be mutated unless body may be.
+// Use [Blob.Clone] for an independent copy.
+//
+// Labels: Life-Parent, Mut-No.
func Parse(body []byte) (*Blob, error) {
- return &Blob{Data: append([]byte(nil), body...)}, nil
+ return &Blob{Data: body}, nil
}
diff --git a/object/commit/append.go b/object/commit/append.go
index d5258b97..02d69058 100644
--- a/object/commit/append.go
+++ b/object/commit/append.go
@@ -33,7 +33,7 @@ func (commit *Commit) AppendWithoutHeader(dst []byte) ([]byte, error) {
dst = append(dst, byte('\n'))
- if commit.ChangeID != "" {
+ if len(commit.ChangeID) != 0 {
dst = append(dst, []byte("change-id ")...)
dst = append(dst, commit.ChangeID...)
dst = append(dst, byte('\n'))
@@ -41,7 +41,7 @@ func (commit *Commit) AppendWithoutHeader(dst []byte) ([]byte, error) {
for _, h := range commit.ExtraHeaders {
// GIGO on empty keys and such.
- dst = append(dst, []byte(h.Key)...)
+ dst = append(dst, h.Key...)
dst = append(dst, byte(' '))
dst = append(dst, h.Value...)
dst = append(dst, byte('\n'))
diff --git a/object/commit/clone.go b/object/commit/clone.go
new file mode 100644
index 00000000..08987f26
--- /dev/null
+++ b/object/commit/clone.go
@@ -0,0 +1,33 @@
+package commit
+
+import (
+ "bytes"
+ "slices"
+)
+
+// Clone returns a deep copy of the commit
+// whose byte fields are independent of any memory the original may alias.
+//
+// Labels: Life-Independent.
+func (commit *Commit) Clone() *Commit {
+ clone := &Commit{
+ Tree: commit.Tree,
+ Parents: slices.Clone(commit.Parents),
+ Author: commit.Author.Clone(),
+ Committer: commit.Committer.Clone(),
+ Message: bytes.Clone(commit.Message),
+ ChangeID: bytes.Clone(commit.ChangeID),
+ }
+
+ if commit.ExtraHeaders != nil {
+ clone.ExtraHeaders = make([]ExtraHeader, len(commit.ExtraHeaders))
+ for i, h := range commit.ExtraHeaders {
+ clone.ExtraHeaders[i] = ExtraHeader{
+ Key: bytes.Clone(h.Key),
+ Value: bytes.Clone(h.Value),
+ }
+ }
+ }
+
+ return clone
+}
diff --git a/object/commit/commit.go b/object/commit/commit.go
index 6a89bce9..a8a247bf 100644
--- a/object/commit/commit.go
+++ b/object/commit/commit.go
@@ -14,12 +14,12 @@ type Commit struct {
Author signature.Signature
Committer signature.Signature
Message []byte
- ChangeID string
+ ChangeID []byte
ExtraHeaders []ExtraHeader
}
// ExtraHeader represents an extra header in a Git object.
type ExtraHeader struct {
- Key string
+ Key []byte
Value []byte
}
diff --git a/object/commit/parse.go b/object/commit/parse.go
index 20353e14..74f607f1 100644
--- a/object/commit/parse.go
+++ b/object/commit/parse.go
@@ -13,6 +13,16 @@ import (
var ErrInvalidCommit = errors.New("object/commit: invalid commit")
// Parse decodes a commit object body.
+//
+// The returned commit aliases body:
+// its Message, ChangeID, and extra-header fields,
+// along with the byte fields of its signatures,
+// share body's backing array.
+// The commit inherits body's lifetime
+// and must not be mutated unless body may be.
+// Use [Commit.Clone] for an independent copy.
+//
+// Labels: Life-Parent, Mut-No.
func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) {
c := new(Commit)
@@ -95,7 +105,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) {
return nil, fmt.Errorf("%w: unexpected change-id header at offset %d", ErrInvalidCommit, lineStart)
}
- c.ChangeID = string(value)
+ c.ChangeID = value
case "gpgsig", "gpgsig-sha256":
if state != parseStateExtra {
return nil, fmt.Errorf("%w: unexpected %s header at offset %d", ErrInvalidCommit, key, lineStart)
@@ -119,8 +129,8 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) {
}
c.ExtraHeaders = append(c.ExtraHeaders, ExtraHeader{
- Key: string(key),
- Value: append([]byte(nil), value...),
+ Key: key,
+ Value: value,
})
}
}
@@ -141,7 +151,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) {
panic("unreachable parse state")
}
- c.Message = append([]byte(nil), body[i:]...)
+ c.Message = body[i:]
return c, nil
}
diff --git a/object/commit/roundtrip_test.go b/object/commit/roundtrip_test.go
index faa8a834..1cfcaaac 100644
--- a/object/commit/roundtrip_test.go
+++ b/object/commit/roundtrip_test.go
@@ -87,10 +87,10 @@ func TestRoundTrip(t *testing.T) {
OffsetMinutes: 330,
},
Message: []byte("roundtrip subject\n\nroundtrip body\n\n"),
- ChangeID: "zyxwvutsrqponmlk",
+ ChangeID: []byte("zyxwvutsrqponmlk"),
ExtraHeaders: []commit.ExtraHeader{
- {Key: "encoding", Value: []byte("UTF-8")},
- {Key: "x-test-header", Value: []byte("value")},
+ {Key: []byte("encoding"), Value: []byte("UTF-8")},
+ {Key: []byte("x-test-header"), Value: []byte("value")},
},
}
@@ -145,7 +145,7 @@ func assertCommitEqual(t *testing.T, got *commit.Commit, want *commit.Commit) {
t.Fatalf("message = %q, want %q", got.Message, want.Message)
}
- if got.ChangeID != want.ChangeID {
+ if !bytes.Equal(got.ChangeID, want.ChangeID) {
t.Fatalf("change id = %q, want %q", got.ChangeID, want.ChangeID)
}
@@ -155,7 +155,7 @@ func assertCommitEqual(t *testing.T, got *commit.Commit, want *commit.Commit) {
for i, wantHeader := range want.ExtraHeaders {
gotHeader := got.ExtraHeaders[i]
- if gotHeader.Key != wantHeader.Key {
+ if !bytes.Equal(gotHeader.Key, wantHeader.Key) {
t.Fatalf("extra header[%d] key = %q, want %q", i, gotHeader.Key, wantHeader.Key)
}
diff --git a/object/fetch/path.go b/object/fetch/path.go
index f8eca507..e8b12481 100644
--- a/object/fetch/path.go
+++ b/object/fetch/path.go
@@ -47,7 +47,7 @@ func (err *PathNotTreeError) Error() string {
// for an io/fs.FS-like interface.
//
// Labels: Life-Parent.
-func (fetcher *Fetcher) Path(root oid.ObjectID, parts []string) (tree.Entry, error) {
+func (fetcher *Fetcher) Path(root oid.ObjectID, parts [][]byte) (tree.Entry, error) {
if len(parts) == 0 {
return tree.Entry{}, ErrPathInvalid
}
diff --git a/object/fetch/treefs.go b/object/fetch/treefs.go
index 9d88abb2..d12e3dd6 100644
--- a/object/fetch/treefs.go
+++ b/object/fetch/treefs.go
@@ -1,11 +1,11 @@
package fetch
import (
+ "bytes"
"errors"
"fmt"
"io"
"io/fs"
- "strings"
"time"
oid "lindenii.org/go/furgit/object/id"
@@ -48,12 +48,12 @@ var ErrGitlinkNotFile = fmt.Errorf("%w: object/fetch: gitlink entries are not re
// generic fs consumers classify it correctly.
var ErrIsDirectory = fmt.Errorf("%w: object/fetch: is a directory", fs.ErrInvalid)
-func splitPath(path string) []string {
+func splitPath(path string) [][]byte {
if len(path) == 0 {
return nil
}
- return strings.Split(path, "/")
+ return bytes.Split([]byte(path), []byte("/"))
}
type treeEntryValue struct {
@@ -197,7 +197,7 @@ func (treeFS *TreeFS) Open(name string) (fs.File, error) {
entries := make([]fs.DirEntry, 0, len(tree.Object().Entries()))
for _, child := range tree.Object().Entries() {
childEntry := treeEntryValue{
- name: child.Name,
+ name: string(child.Name),
mode: child.Mode,
objectID: child.ID,
treeEntry: &child,
@@ -401,7 +401,7 @@ func (treeFS *TreeFS) resolvePath(op treeFSOp, name string) (treeEntryValue, err
}
return treeEntryValue{
- name: entry.Name,
+ name: string(entry.Name),
mode: entry.Mode,
objectID: entry.ID,
treeEntry: &entry,
diff --git a/object/fetch/treefs_test.go b/object/fetch/treefs_test.go
index ba292276..05240823 100644
--- a/object/fetch/treefs_test.go
+++ b/object/fetch/treefs_test.go
@@ -35,12 +35,12 @@ func TestTreeFS(t *testing.T) {
}
subTreeID := writeTree(t, store, []tree.Entry{
- {Mode: mode.Executable, Name: "exec.sh", ID: execID},
+ {Mode: mode.Executable, Name: []byte("exec.sh"), ID: execID},
})
rootTreeID := writeTree(t, store, []tree.Entry{
- {Mode: mode.Regular, Name: "plain.txt", ID: plainID},
- {Mode: mode.Directory, Name: "dir", ID: subTreeID},
+ {Mode: mode.Regular, Name: []byte("plain.txt"), ID: plainID},
+ {Mode: mode.Directory, Name: []byte("dir"), ID: subTreeID},
})
commitID := writeCommit(t, store, rootTreeID)
diff --git a/object/signature/clone.go b/object/signature/clone.go
new file mode 100644
index 00000000..4637a258
--- /dev/null
+++ b/object/signature/clone.go
@@ -0,0 +1,16 @@
+package signature
+
+import "bytes"
+
+// Clone returns a deep copy of the signature
+// whose Name and Email are independent of any memory the original may alias.
+//
+// Labels: Life-Independent.
+func (signature Signature) Clone() Signature {
+ return Signature{
+ Name: bytes.Clone(signature.Name),
+ Email: bytes.Clone(signature.Email),
+ WhenUnix: signature.WhenUnix,
+ OffsetMinutes: signature.OffsetMinutes,
+ }
+}
diff --git a/object/signature/parse.go b/object/signature/parse.go
index b39100cd..190d2cf4 100644
--- a/object/signature/parse.go
+++ b/object/signature/parse.go
@@ -10,7 +10,13 @@ import (
// Parse parses a canonical Git signature line.
//
-// Labels: Life-Independent.
+// The returned signature aliases line:
+// its Name and Email share line's backing array,
+// so the signature inherits line's lifetime
+// and must not be mutated unless line may be.
+// Use [Signature.Clone] for an independent copy.
+//
+// Labels: Life-Parent, Mut-No.
func Parse(line []byte) (*Signature, error) {
lt := bytes.IndexByte(line, '<')
if lt < 0 {
@@ -24,8 +30,8 @@ func Parse(line []byte) (*Signature, error) {
gt := lt + 1 + gtRel
- nameBytes := append([]byte(nil), bytes.TrimRight(line[:lt], " ")...)
- emailBytes := append([]byte(nil), line[lt+1:gt]...)
+ nameBytes := bytes.TrimRight(line[:lt], " ")
+ emailBytes := line[lt+1 : gt]
rest := line[gt+1:]
if len(rest) == 0 || rest[0] != ' ' {
diff --git a/object/store/memory/reader.go b/object/store/memory/reader.go
index 6b8fae55..e04ad759 100644
--- a/object/store/memory/reader.go
+++ b/object/store/memory/reader.go
@@ -24,13 +24,15 @@ func (memory *Memory) ReadBytesFull(id id.ObjectID) ([]byte, error) {
}
// ReadBytesContent reads one object body.
+//
+// The returned slice aliases the store's own copy of the object content.
func (memory *Memory) ReadBytesContent(id id.ObjectID) (typ.Type, []byte, error) {
obj, ok := memory.objects.Load(id)
if !ok {
return typ.Unknown, nil, store.ErrObjectNotFound
}
- return obj.ty, append([]byte(nil), obj.content...), nil
+ return obj.ty, obj.content, nil
}
// ReadHeader reads one object header.
diff --git a/object/store/mix/mix.go b/object/store/mix/mix.go
index 2e8e926b..b048fe86 100644
--- a/object/store/mix/mix.go
+++ b/object/store/mix/mix.go
@@ -28,7 +28,7 @@ func New(backends ...store.ObjectReader) *Mix {
present[backend] = struct{}{}
}
- order := mru.New[store.ObjectReader]()
+ order := mru.New[store.ObjectReader](mru.Options{Interval: 48})
order.Sync(present)
return &Mix{
diff --git a/object/store/packed/delta.go b/object/store/packed/delta.go
index 567fd679..5b538221 100644
--- a/object/store/packed/delta.go
+++ b/object/store/packed/delta.go
@@ -1,7 +1,6 @@
package packed
import (
- "bytes"
"fmt"
"io"
"slices"
@@ -11,8 +10,14 @@ import (
"lindenii.org/go/furgit/internal/format/packfile"
"lindenii.org/go/furgit/internal/format/packfile/delta"
"lindenii.org/go/lgo/intconv"
+ "lindenii.org/go/lgo/sync"
)
+//nolint:gochecknoglobals
+var deltaHeaderPool = sync.NewPool(func() *[delta.MaxHeaderSizesLen]byte {
+ return new([delta.MaxHeaderSizesLen]byte)
+})
+
// deltaNode is a delta entry on a resolution chain.
type deltaNode struct {
// payload is the entry's compressed delta payload view.
@@ -28,7 +33,11 @@ type deltaNode struct {
// unpackEntry reconstructs the object stored at offset in p,
// following ref- and ofs-delta chains within the pack.
//
-// Labels: Life-Independent.
+// A direct base-cache hit returns the shared cache buffer itself,
+// so the result may alias cache storage and must not be mutated;
+// delta-applied results are freshly allocated.
+//
+// Labels: Life-Parent, Mut-No.
func (packed *Packed) unpackEntry(p *pack, offset int) (packfile.EntryType, []byte, error) {
var zero packfile.EntryType
@@ -86,9 +95,11 @@ func (packed *Packed) unpackEntry(p *pack, offset int) (packfile.EntryType, []by
cur = baseOffset
}
- // A direct cache hit with no deltas to apply must be copied.
+ // A direct cache hit with no deltas to apply
+ // returns the shared cache buffer directly;
+ // callers are contractually Mut-No.
if len(chain) == 0 && fromCache {
- return baseType, bytes.Clone(base), nil
+ return baseType, base, nil
}
// Apply deltas back up the chain, caching each consumed base.
@@ -202,16 +213,19 @@ func (packed *Packed) resolveType(p *pack, offset int, entryHeader packfile.Entr
// deltaResultSize reads the declared result size
// from one compressed delta payload prefix.
func deltaResultSize(payload []byte, deltaSize uint64) (int, error) {
- zr, err := zlib.NewReader(bytes.NewReader(payload))
+ zr, err := zlib.NewReaderBytes(payload)
if err != nil {
return 0, fmt.Errorf("reading delta header: %w", err)
}
defer func() { _ = zr.Close() }()
+ buf := deltaHeaderPool.Get()
+ defer deltaHeaderPool.Put(buf)
+
prefixLen := min(uint64(delta.MaxHeaderSizesLen), deltaSize)
- prefix := make([]byte, prefixLen)
+ prefix := buf[:prefixLen]
_, err = io.ReadFull(zr, prefix)
if err != nil {
diff --git a/object/store/packed/entry.go b/object/store/packed/entry.go
index e9d45bb4..908afad0 100644
--- a/object/store/packed/entry.go
+++ b/object/store/packed/entry.go
@@ -1,7 +1,6 @@
package packed
import (
- "bytes"
"errors"
"fmt"
"io"
@@ -49,7 +48,7 @@ func inflate(payload []byte, expectedSize uint64) ([]byte, error) {
return nil, fmt.Errorf("declared size: %w", err)
}
- zr, err := zlib.NewReader(bytes.NewReader(payload))
+ zr, err := zlib.NewReaderBytes(payload)
if err != nil {
return nil, fmt.Errorf("inflating entry payload: %w", err)
}
diff --git a/object/store/packed/internal/ingest/finalize.go b/object/store/packed/internal/ingest/finalize.go
index f0ab6622..afed996c 100644
--- a/object/store/packed/internal/ingest/finalize.go
+++ b/object/store/packed/internal/ingest/finalize.go
@@ -8,6 +8,7 @@ import (
"slices"
"lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packidx/bloom"
"lindenii.org/go/furgit/internal/format/packrev"
"lindenii.org/go/furgit/object/id"
"lindenii.org/go/lgo/intconv"
@@ -38,12 +39,27 @@ func (ingestion *ingestion) finalize() (Result, error) {
return Result{}, err
}
+ bloomBuilder, err := ingestion.buildBloom(entries, packHash)
+ if err != nil {
+ return Result{}, err
+ }
+
+ bloomTmp, err := ingestion.writeTemp("tmp_bloom_", func(w io.Writer) error {
+ _, err := w.Write(bloomBuilder.Bytes())
+
+ return err
+ })
+ if err != nil {
+ return Result{}, err
+ }
+
base := "pack-" + ingestion.packHash.String()
packFinal := base + ".pack"
idxFinal := base + ".idx"
revFinal := base + ".rev"
+ bloomFinal := base + ".bloom"
- // Link the pack and reverse index before the index,
+ // Link the pack, reverse index, and Bloom filter before the index,
// since the index is what publishes the pack to readers.
err = ingestion.link(ingestion.packTmp, packFinal)
if err != nil {
@@ -55,6 +71,11 @@ func (ingestion *ingestion) finalize() (Result, error) {
return Result{}, err
}
+ err = ingestion.link(bloomTmp, bloomFinal)
+ if err != nil {
+ return Result{}, err
+ }
+
err = ingestion.link(idxTmp, idxFinal)
if err != nil {
return Result{}, err
@@ -69,12 +90,34 @@ func (ingestion *ingestion) finalize() (Result, error) {
PackName: packFinal,
IdxName: idxFinal,
RevName: revFinal,
+ BloomName: bloomFinal,
PackHash: ingestion.packHash,
ObjectCount: objectCount,
ThinFixed: ingestion.thinFixed,
}, nil
}
+// buildBloom builds a Bloom filter over the index entries' object IDs,
+// bound to packHash.
+func (ingestion *ingestion) buildBloom(entries []packidx.Entry, packHash []byte) (*bloom.Builder, error) {
+ bucketCount, k, err := bloom.RecommendParams(ingestion.objectFormat, len(entries))
+ if err != nil {
+ return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err)
+ }
+
+ builder, err := bloom.NewBuilder(ingestion.objectFormat, bucketCount, k, packHash)
+ if err != nil {
+ return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err)
+ }
+
+ size := ingestion.objectFormat.Size()
+ for i := range entries {
+ builder.Add(entries[i].OID[:size])
+ }
+
+ return builder, nil
+}
+
// indexEntries returns the index entries in object-ID order
// and, for each record in pack order, its position in that index order.
func (ingestion *ingestion) indexEntries() ([]packidx.Entry, []uint32, error) {
diff --git a/object/store/packed/internal/ingest/resolve.go b/object/store/packed/internal/ingest/resolve.go
index 8595d366..77b0fa0f 100644
--- a/object/store/packed/internal/ingest/resolve.go
+++ b/object/store/packed/internal/ingest/resolve.go
@@ -270,7 +270,7 @@ func (ingestion *ingestion) countUnresolved() int {
func (ingestion *ingestion) unresolvedExternalBases() []id.ObjectID {
seen := make(map[id.ObjectID]struct{})
- var out []id.ObjectID
+ out := make([]id.ObjectID, 0, ingestion.deltaCount-ingestion.deltasResolved)
for index := range ingestion.records {
rec := &ingestion.records[index]
diff --git a/object/store/packed/internal/ingest/result.go b/object/store/packed/internal/ingest/result.go
index 0ae5593a..9cd6ef1d 100644
--- a/object/store/packed/internal/ingest/result.go
+++ b/object/store/packed/internal/ingest/result.go
@@ -13,6 +13,9 @@ type Result struct {
// RevName is the destination-relative name of the written reverse index.
RevName string
+ // BloomName is the destination-relative name of the written Bloom filter.
+ BloomName string
+
// PackHash is the pack trailer hash
// shared by the pack, index, and reverse index.
PackHash id.ObjectID
diff --git a/object/store/packed/internal/ingest/thin.go b/object/store/packed/internal/ingest/thin.go
index 8d1566e0..fa125f2f 100644
--- a/object/store/packed/internal/ingest/thin.go
+++ b/object/store/packed/internal/ingest/thin.go
@@ -33,7 +33,7 @@ func (ingestion *ingestion) fixThin(external []id.ObjectID, adjacency adjacency,
// Drop the trailer from the write cursor.
ingestion.scanner.consumed -= hashSize
- var appended []int
+ appended := make([]int, 0, len(external))
for _, baseOID := range external {
ty, content, err := ingestion.opts.ThinBase.ReadBytesContent(baseOID)
diff --git a/object/store/packed/internal/ingest/writepack_test.go b/object/store/packed/internal/ingest/writepack_test.go
index 394d8f6e..adc0ba35 100644
--- a/object/store/packed/internal/ingest/writepack_test.go
+++ b/object/store/packed/internal/ingest/writepack_test.go
@@ -8,6 +8,8 @@ import (
"path/filepath"
"testing"
+ "lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packidx/bloom"
"lindenii.org/go/furgit/internal/testgit"
"lindenii.org/go/furgit/object/id"
"lindenii.org/go/furgit/object/store"
@@ -89,6 +91,81 @@ func TestWritePackMatchesGit(t *testing.T) {
}
}
+// TestWritePackBloom verifies that ingesting a pack writes a Bloom filter
+// that reports every object in the pack as present.
+func TestWritePackBloom(t *testing.T) {
+ t.Parallel()
+
+ for _, objectFormat := range id.SupportedObjectFormats() {
+ t.Run(objectFormat.String(), func(t *testing.T) {
+ t.Parallel()
+
+ repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat})
+ if err != nil {
+ t.Fatalf("NewRepo: %v", err)
+ }
+
+ seeded, err := repo.SeedHistory(t)
+ if err != nil {
+ t.Fatalf("SeedHistory: %v", err)
+ }
+
+ gitPrefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{
+ RevIndex: true,
+ Revs: false,
+ Exclude: nil,
+ })
+ if err != nil {
+ t.Fatalf("PackObjects: %v", err)
+ }
+
+ stream, err := os.ReadFile(gitPrefix + ".pack") //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile pack: %v", err)
+ }
+
+ dir, result := writePack(t, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{
+ ThinBase: nil,
+ Progress: nil,
+ })
+
+ if result.BloomName == "" {
+ t.Fatal("BloomName is empty")
+ }
+
+ bloomBytes, err := os.ReadFile(filepath.Join(dir, result.BloomName)) //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile bloom: %v", err)
+ }
+
+ filter, err := bloom.Parse(bloomBytes, objectFormat)
+ if err != nil {
+ t.Fatalf("bloom.Parse: %v", err)
+ }
+
+ idxBytes, err := os.ReadFile(filepath.Join(dir, result.IdxName)) //nolint:gosec
+ if err != nil {
+ t.Fatalf("ReadFile idx: %v", err)
+ }
+
+ index, err := packidx.Parse(idxBytes, objectFormat.Size())
+ if err != nil {
+ t.Fatalf("packidx.Parse: %v", err)
+ }
+
+ if !bytes.Equal(filter.PackHash(), index.PackHash()) {
+ t.Fatalf("filter pack hash %x, want %x", filter.PackHash(), index.PackHash())
+ }
+
+ for pos := range index.NumObjects() {
+ if !filter.MayContain(index.OIDAt(pos)) {
+ t.Fatalf("filter rejects object at index position %d", pos)
+ }
+ }
+ })
+ }
+}
+
// TestWritePackEmpty verifies that a zero-object pack
// succeeds without writing any artifacts.
func TestWritePackEmpty(t *testing.T) {
diff --git a/object/store/packed/lookup.go b/object/store/packed/lookup.go
index e54d34b2..e06870a9 100644
--- a/object/store/packed/lookup.go
+++ b/object/store/packed/lookup.go
@@ -24,6 +24,10 @@ func (packed *Packed) lookup(objectID id.ObjectID) (*pack, int, error) {
oid := objectID.RawBytes()
for _, p := range packed.order.Keys() {
+ if p.filter != nil && !p.filter.MayContain(oid) {
+ continue
+ }
+
offsetU, found, err := p.idx.Lookup(oid)
if err != nil {
return nil, 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err)
diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go
index dd43bc7a..9cd6162b 100644
--- a/object/store/packed/pack.go
+++ b/object/store/packed/pack.go
@@ -8,6 +8,7 @@ import (
"lindenii.org/go/furgit/internal/format/packfile"
"lindenii.org/go/furgit/internal/format/packidx"
+ "lindenii.org/go/furgit/internal/format/packidx/bloom"
"lindenii.org/go/furgit/internal/mmap"
"lindenii.org/go/furgit/object/id"
"lindenii.org/go/lgo/intconv"
@@ -36,6 +37,9 @@ type pack struct {
// and data aliases them.
dataMapping *mmap.Mmap
data []byte
+
+ bloomMapping *mmap.Mmap
+ filter *bloom.Bloom
}
// openPack opens, maps, and validates
@@ -69,15 +73,41 @@ func openPack(root *os.Root, name string, objectFormat id.ObjectFormat) (*pack,
return nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, name, err)
}
+ bloomMapping, filter := openBloom(root, name, objectFormat, idx.PackHash())
+
return &pack{
- name: name,
- idxMapping: idxMapping,
- idx: idx,
- dataMapping: dataMapping,
- data: dataMapping.Data(),
+ name: name,
+ idxMapping: idxMapping,
+ idx: idx,
+ dataMapping: dataMapping,
+ data: dataMapping.Data(),
+ bloomMapping: bloomMapping,
+ filter: filter,
}, nil
}
+func openBloom(root *os.Root, name string, objectFormat id.ObjectFormat, packHash []byte) (*mmap.Mmap, *bloom.Bloom) {
+ mapping, err := mapFile(root, name+".bloom")
+ if err != nil {
+ return nil, nil
+ }
+
+ filter, err := bloom.Parse(mapping.Data(), objectFormat)
+ if err != nil {
+ _ = mapping.Close()
+
+ return nil, nil
+ }
+
+ if !bytes.Equal(filter.PackHash(), packHash) {
+ _ = mapping.Close()
+
+ return nil, nil
+ }
+
+ return mapping, &filter
+}
+
// mapFile opens and maps one file under root.
func mapFile(root *os.Root, name string) (*mmap.Mmap, error) {
file, err := root.Open(name)
@@ -125,10 +155,16 @@ func validatePackData(data []byte, idx *packidx.Packidx, hashSize int) error {
return nil
}
-// close releases the pack data and index mappings.
+// close releases the pack data, index, and filter mappings.
func (pack *pack) close() error {
- return errors.Join(
+ errs := []error{
pack.dataMapping.Close(),
pack.idxMapping.Close(),
- )
+ }
+
+ if pack.bloomMapping != nil {
+ errs = append(errs, pack.bloomMapping.Close())
+ }
+
+ return errors.Join(errs...)
}
diff --git a/object/store/packed/packed.go b/object/store/packed/packed.go
index f22c2445..897b3b98 100644
--- a/object/store/packed/packed.go
+++ b/object/store/packed/packed.go
@@ -63,7 +63,7 @@ func New(root *os.Root, objectFormat id.ObjectFormat) (*Packed, error) {
packed := &Packed{
root: root,
objectFormat: objectFormat,
- order: mru.New[*pack](),
+ order: mru.New[*pack](mru.Options{Interval: 48}),
baseCache: newBaseCache(),
refreshMu: sync.Mutex{},
byName: nil,
diff --git a/object/store/packed/quarantine.go b/object/store/packed/quarantine.go
index 5e0b85cb..977a9543 100644
--- a/object/store/packed/quarantine.go
+++ b/object/store/packed/quarantine.go
@@ -156,6 +156,8 @@ func packPromotionPriority(name string) int {
return 1
case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".rev"):
return 2
+ case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".bloom"):
+ return 2
case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".idx"):
return 3
default:
diff --git a/object/store/packed/reader.go b/object/store/packed/reader.go
index bfc82eff..cf433cfc 100644
--- a/object/store/packed/reader.go
+++ b/object/store/packed/reader.go
@@ -165,7 +165,7 @@ func (packed *Packed) ReadReaderContent(objectID id.ObjectID) (typ.Type, int, io
return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: object size overflows int: %w", ErrMalformedPackedStore, p.name, err)
}
- zr, err := zlib.NewReader(bytes.NewReader(payload))
+ zr, err := zlib.NewReaderBytes(payload)
if err != nil {
return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err)
}
diff --git a/object/store/packed/refresh.go b/object/store/packed/refresh.go
index 14c66013..f06e9859 100644
--- a/object/store/packed/refresh.go
+++ b/object/store/packed/refresh.go
@@ -23,7 +23,7 @@ func (packed *Packed) Refresh() error {
next := make(map[string]*pack, len(packed.byName))
- var opened []*pack
+ opened := make([]*pack, 0, len(dirEntries))
for _, dirEntry := range dirEntries {
name, ok := strings.CutSuffix(dirEntry.Name(), ".idx")
diff --git a/object/store/packed/refresh_test.go b/object/store/packed/refresh_test.go
index 025f6f62..e54dc97d 100644
--- a/object/store/packed/refresh_test.go
+++ b/object/store/packed/refresh_test.go
@@ -101,7 +101,7 @@ func cp(t *testing.T, src, dst string) {
t.Fatalf("ReadFile: %v", err)
}
- err = os.WriteFile(dst, data, 0o600) //nolint:gosec
+ err = os.WriteFile(dst, data, 0o600) //#nosec G703
if err != nil {
t.Fatalf("WriteFile: %v", err)
}
diff --git a/object/store/reader.go b/object/store/reader.go
index 7979fb6c..bbfe1fe8 100644
--- a/object/store/reader.go
+++ b/object/store/reader.go
@@ -23,12 +23,21 @@ type ObjectReader interface {
// Users should treat this as an invariant;
// implementations should not re-verify it on every read.
//
- // Labels: Life-Parent.
+ // The returned slice may alias storage owned by the backend,
+ // such as a memory-mapped pack or a shared cache buffer.
+ // Callers must not mutate it
+ // and must not retain it past the backend's lifetime.
+ //
+ // Labels: Life-Parent, Mut-No.
ReadBytesFull(id id.ObjectID) ([]byte, error)
// ReadBytesContent reads an object's type and content bytes.
//
- // Labels: Life-Parent.
+ // The returned slice may alias backend-owned storage.
+ // Callers must not mutate it
+ // and must not retain it past the backend's lifetime.
+ //
+ // Labels: Life-Parent, Mut-No.
ReadBytesContent(id id.ObjectID) (typ.Type, []byte, error)
// ReadReaderFull reads a full serialized object stream
diff --git a/object/tag/append.go b/object/tag/append.go
index 15a6fde9..2f524a73 100644
--- a/object/tag/append.go
+++ b/object/tag/append.go
@@ -27,7 +27,7 @@ func (tag *Tag) AppendWithoutHeader(dst []byte) ([]byte, error) {
for _, h := range tag.ExtraHeaders {
// GIGO on empty keys and such.
- dst = append(dst, []byte(h.Key)...)
+ dst = append(dst, h.Key...)
dst = append(dst, byte(' '))
dst = append(dst, h.Value...)
dst = append(dst, byte('\n'))
diff --git a/object/tag/clone.go b/object/tag/clone.go
new file mode 100644
index 00000000..0f792bc1
--- /dev/null
+++ b/object/tag/clone.go
@@ -0,0 +1,29 @@
+package tag
+
+import "bytes"
+
+// Clone returns a deep copy of the tag
+// whose byte fields are independent of any memory the original may alias.
+//
+// Labels: Life-Independent.
+func (tag *Tag) Clone() *Tag {
+ clone := &Tag{
+ TargetID: tag.TargetID,
+ TargetType: tag.TargetType,
+ Name: bytes.Clone(tag.Name),
+ Tagger: tag.Tagger.Clone(),
+ Message: bytes.Clone(tag.Message),
+ }
+
+ if tag.ExtraHeaders != nil {
+ clone.ExtraHeaders = make([]ExtraHeader, len(tag.ExtraHeaders))
+ for i, h := range tag.ExtraHeaders {
+ clone.ExtraHeaders[i] = ExtraHeader{
+ Key: bytes.Clone(h.Key),
+ Value: bytes.Clone(h.Value),
+ }
+ }
+ }
+
+ return clone
+}
diff --git a/object/tag/parse.go b/object/tag/parse.go
index c5ea7e14..1fcc7c2c 100644
--- a/object/tag/parse.go
+++ b/object/tag/parse.go
@@ -15,6 +15,16 @@ import (
var ErrInvalidTag = errors.New("object/tag: invalid tag")
// Parse decodes a tag object body.
+//
+// The returned tag aliases body:
+// its Name, Message, and extra-header fields,
+// along with the byte fields of its tagger signature,
+// share body's backing array.
+// The tag inherits body's lifetime
+// and must not be mutated unless body may be.
+// Use [Tag.Clone] for an independent copy.
+//
+// Labels: Life-Parent, Mut-No.
func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) {
t := new(Tag)
@@ -56,7 +66,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) {
return nil, fmt.Errorf("%w: tag name: %w", ErrInvalidTag, err)
}
- t.Name = append([]byte(nil), line...)
+ t.Name = line
i = next
line, next, err = requiredHeaderLine(body, i, "tagger")
@@ -84,7 +94,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) {
i += rel + 1
if len(line) == 0 {
- t.Message = append([]byte(nil), body[i:]...)
+ t.Message = body[i:]
return t, nil
}
@@ -112,8 +122,8 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) {
}
default:
t.ExtraHeaders = append(t.ExtraHeaders, ExtraHeader{
- Key: string(key),
- Value: append([]byte(nil), value...),
+ Key: key,
+ Value: value,
})
}
}
diff --git a/object/tag/roundtrip_test.go b/object/tag/roundtrip_test.go
index c49b3d75..cf4b69a1 100644
--- a/object/tag/roundtrip_test.go
+++ b/object/tag/roundtrip_test.go
@@ -42,8 +42,8 @@ func TestRoundTrip(t *testing.T) {
},
Message: []byte("roundtrip subject\n\nroundtrip body\n\n"),
ExtraHeaders: []tag.ExtraHeader{
- {Key: "encoding", Value: []byte("UTF-8")},
- {Key: "x-test-header", Value: []byte("value")},
+ {Key: []byte("encoding"), Value: []byte("UTF-8")},
+ {Key: []byte("x-test-header"), Value: []byte("value")},
},
}
@@ -102,7 +102,7 @@ func assertTagEqual(t *testing.T, got *tag.Tag, want *tag.Tag) {
}
if !slices.EqualFunc(got.ExtraHeaders, want.ExtraHeaders, func(gotHeader tag.ExtraHeader, wantHeader tag.ExtraHeader) bool {
- return gotHeader.Key == wantHeader.Key && bytes.Equal(gotHeader.Value, wantHeader.Value)
+ return bytes.Equal(gotHeader.Key, wantHeader.Key) && bytes.Equal(gotHeader.Value, wantHeader.Value)
}) {
t.Fatalf("extra headers = %+v, want %+v", got.ExtraHeaders, want.ExtraHeaders)
}
diff --git a/object/tag/tag.go b/object/tag/tag.go
index f4b36c30..a4572921 100644
--- a/object/tag/tag.go
+++ b/object/tag/tag.go
@@ -20,6 +20,6 @@ type Tag struct {
// ExtraHeader represents an extra header in a Git tag object.
type ExtraHeader struct {
- Key string
+ Key []byte
Value []byte
}
diff --git a/object/tree/clone.go b/object/tree/clone.go
new file mode 100644
index 00000000..d00c62f2
--- /dev/null
+++ b/object/tree/clone.go
@@ -0,0 +1,24 @@
+package tree
+
+import "bytes"
+
+// Clone returns a deep copy of the tree
+// whose entry names are independent of any memory the original may alias.
+//
+// Labels: Life-Independent.
+func (tree *Tree) Clone() *Tree {
+ if tree.entries == nil {
+ return &Tree{}
+ }
+
+ clone := &Tree{entries: make([]Entry, len(tree.entries))}
+ for i, entry := range tree.entries {
+ clone.entries[i] = Entry{
+ Mode: entry.Mode,
+ Name: bytes.Clone(entry.Name),
+ ID: entry.ID,
+ }
+ }
+
+ return clone
+}
diff --git a/object/tree/compare.go b/object/tree/compare.go
index 78bf56a4..9bf16f90 100644
--- a/object/tree/compare.go
+++ b/object/tree/compare.go
@@ -6,7 +6,7 @@ package tree
// treating directory names as if they carried a trailing '/'.
// entryIsTree and searchIsTree indicate
// whether the respective names belong to subtree entries.
-func nameCompare(entryName string, entryIsTree bool, searchName string, searchIsTree bool) int {
+func nameCompare(entryName []byte, entryIsTree bool, searchName []byte, searchIsTree bool) int {
entryLen := len(entryName)
if entryIsTree {
entryLen++
diff --git a/object/tree/helpers_test.go b/object/tree/helpers_test.go
index d6cfddb6..3e5eddd4 100644
--- a/object/tree/helpers_test.go
+++ b/object/tree/helpers_test.go
@@ -43,26 +43,26 @@ func mixedEntries(tb testing.TB, repo *testgit.Repo) []tree.Entry {
}
return []tree.Entry{
- {Mode: mode.Regular, Name: "z", ID: blobA},
- {Mode: mode.Regular, Name: "A", ID: blobB},
- {Mode: mode.Regular, Name: "aa", ID: blobC},
- {Mode: mode.Regular, Name: "a0", ID: blobA},
- {Mode: mode.Regular, Name: "a.", ID: blobC},
- {Mode: mode.Regular, Name: "Z", ID: blobB},
- {Mode: mode.Regular, Name: "0", ID: blobA},
- {Mode: mode.Regular, Name: "CAPS", ID: blobB},
- {Mode: mode.Regular, Name: "caps", ID: blobC},
- {Mode: mode.Regular, Name: "name with space", ID: blobB},
- {Mode: mode.Regular, Name: "name.with.dot", ID: blobA},
- {Mode: mode.Regular, Name: "这是一些非 ASCII 的字符", ID: blobC},
- {Mode: mode.Regular, Name: "Emoji 👀", ID: blobC},
- {Mode: mode.Regular, Name: ".hidden", ID: blobA},
- {Mode: mode.Executable, Name: "exec.sh", ID: blobB},
- {Mode: mode.Symlink, Name: "sym.link", ID: blobC},
- {Mode: mode.Gitlink, Name: "submodule", ID: submodule},
- {Mode: mode.Regular, Name: "dir-", ID: blobA},
- {Mode: mode.Directory, Name: "dir", ID: subTree},
- {Mode: mode.Regular, Name: "dir0", ID: blobB},
+ {Mode: mode.Regular, Name: []byte("z"), ID: blobA},
+ {Mode: mode.Regular, Name: []byte("A"), ID: blobB},
+ {Mode: mode.Regular, Name: []byte("aa"), ID: blobC},
+ {Mode: mode.Regular, Name: []byte("a0"), ID: blobA},
+ {Mode: mode.Regular, Name: []byte("a."), ID: blobC},
+ {Mode: mode.Regular, Name: []byte("Z"), ID: blobB},
+ {Mode: mode.Regular, Name: []byte("0"), ID: blobA},
+ {Mode: mode.Regular, Name: []byte("CAPS"), ID: blobB},
+ {Mode: mode.Regular, Name: []byte("caps"), ID: blobC},
+ {Mode: mode.Regular, Name: []byte("name with space"), ID: blobB},
+ {Mode: mode.Regular, Name: []byte("name.with.dot"), ID: blobA},
+ {Mode: mode.Regular, Name: []byte("这是一些非 ASCII 的字符"), ID: blobC},
+ {Mode: mode.Regular, Name: []byte("Emoji 👀"), ID: blobC},
+ {Mode: mode.Regular, Name: []byte(".hidden"), ID: blobA},
+ {Mode: mode.Executable, Name: []byte("exec.sh"), ID: blobB},
+ {Mode: mode.Symlink, Name: []byte("sym.link"), ID: blobC},
+ {Mode: mode.Gitlink, Name: []byte("submodule"), ID: submodule},
+ {Mode: mode.Regular, Name: []byte("dir-"), ID: blobA},
+ {Mode: mode.Directory, Name: []byte("dir"), ID: subTree},
+ {Mode: mode.Regular, Name: []byte("dir0"), ID: blobB},
}
}
@@ -73,7 +73,7 @@ func mkTreeEntries(entries []tree.Entry) []testgit.TreeEntry {
Mode: strconv.FormatUint(uint64(entry.Mode), 8),
Type: entry.Mode.ObjectType(),
OID: entry.ID,
- Name: entry.Name,
+ Name: string(entry.Name),
}
}
@@ -124,7 +124,7 @@ func assertGitDecode(tb testing.TB, repo *testgit.Repo, treeID id.ObjectID, got
tb.Fatalf("entry[%d] id = %s, want %s", i, got[i].ID, want[i].OID)
}
- if got[i].Name != want[i].Name {
+ if string(got[i].Name) != want[i].Name {
tb.Fatalf("entry[%d] name = %q, want %q", i, got[i].Name, want[i].Name)
}
}
diff --git a/object/tree/insert.go b/object/tree/insert.go
index 5e519069..b6c52400 100644
--- a/object/tree/insert.go
+++ b/object/tree/insert.go
@@ -1,10 +1,10 @@
package tree
import (
+ "bytes"
"errors"
"fmt"
"slices"
- "strings"
"lindenii.org/go/furgit/object/tree/mode"
)
@@ -42,16 +42,16 @@ func (tree *Tree) Insert(entry Entry) error {
}
// validateName checks that name is a structurally valid tree entry name.
-func validateName(name string) error {
- if name == "" {
+func validateName(name []byte) error {
+ if len(name) == 0 {
return fmt.Errorf("%w: empty entry name", ErrInvalidTree)
}
- if strings.IndexByte(name, 0) >= 0 {
+ if bytes.IndexByte(name, 0) >= 0 {
return fmt.Errorf("%w: entry name %q contains NUL", ErrInvalidTree, name)
}
- if strings.IndexByte(name, '/') >= 0 {
+ if bytes.IndexByte(name, '/') >= 0 {
return fmt.Errorf("%w: entry name %q contains '/'", ErrInvalidTree, name)
}
diff --git a/object/tree/insert_test.go b/object/tree/insert_test.go
index fbf65b84..1dd406d5 100644
--- a/object/tree/insert_test.go
+++ b/object/tree/insert_test.go
@@ -18,10 +18,10 @@ func TestInsertRejects(t *testing.T) {
name string
entry tree.Entry
}{
- {name: "empty-name", entry: tree.Entry{Mode: mode.Regular, Name: "", ID: zero}},
- {name: "slash-name", entry: tree.Entry{Mode: mode.Regular, Name: "a/b", ID: zero}},
- {name: "nul-name", entry: tree.Entry{Mode: mode.Regular, Name: "a\x00b", ID: zero}},
- {name: "invalid-mode", entry: tree.Entry{Mode: mode.Mode(0o100640), Name: "file", ID: zero}},
+ {name: "empty-name", entry: tree.Entry{Mode: mode.Regular, Name: []byte(""), ID: zero}},
+ {name: "slash-name", entry: tree.Entry{Mode: mode.Regular, Name: []byte("a/b"), ID: zero}},
+ {name: "nul-name", entry: tree.Entry{Mode: mode.Regular, Name: []byte("a\x00b"), ID: zero}},
+ {name: "invalid-mode", entry: tree.Entry{Mode: mode.Mode(0o100640), Name: []byte("file"), ID: zero}},
} {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
@@ -48,18 +48,18 @@ func TestInsertRejectsConflict(t *testing.T) {
}{
{
name: "same-mode",
- first: tree.Entry{Mode: mode.Regular, Name: "file", ID: zero},
- second: tree.Entry{Mode: mode.Regular, Name: "file", ID: zero},
+ first: tree.Entry{Mode: mode.Regular, Name: []byte("file"), ID: zero},
+ second: tree.Entry{Mode: mode.Regular, Name: []byte("file"), ID: zero},
},
{
name: "blob-then-tree",
- first: tree.Entry{Mode: mode.Regular, Name: "name", ID: zero},
- second: tree.Entry{Mode: mode.Directory, Name: "name", ID: zero},
+ first: tree.Entry{Mode: mode.Regular, Name: []byte("name"), ID: zero},
+ second: tree.Entry{Mode: mode.Directory, Name: []byte("name"), ID: zero},
},
{
name: "tree-then-blob",
- first: tree.Entry{Mode: mode.Directory, Name: "name", ID: zero},
- second: tree.Entry{Mode: mode.Regular, Name: "name", ID: zero},
+ first: tree.Entry{Mode: mode.Directory, Name: []byte("name"), ID: zero},
+ second: tree.Entry{Mode: mode.Regular, Name: []byte("name"), ID: zero},
},
} {
t.Run(tc.name, func(t *testing.T) {
@@ -79,24 +79,3 @@ func TestInsertRejectsConflict(t *testing.T) {
})
}
}
-
-func TestEntriesIsCopy(t *testing.T) {
- t.Parallel()
-
- zero := id.SupportedObjectFormats()[0].Zero()
-
- var tr tree.Tree
-
- err := tr.Insert(tree.Entry{Mode: mode.Regular, Name: "file", ID: zero})
- if err != nil {
- t.Fatalf("Insert: %v", err)
- }
-
- entries := tr.Entries()
- entries[0].Name = "mutated"
-
- again := tr.Entries()
- if again[0].Name != "file" {
- t.Fatalf("Entries()[0].Name = %q, want %q", again[0].Name, "file")
- }
-}
diff --git a/object/tree/lookup.go b/object/tree/lookup.go
index 34a01748..2ff6ce76 100644
--- a/object/tree/lookup.go
+++ b/object/tree/lookup.go
@@ -1,6 +1,7 @@
package tree
import (
+ "bytes"
"slices"
"lindenii.org/go/furgit/object/tree/mode"
@@ -10,13 +11,18 @@ import (
//
// A name matches whether stored as a blob-like or as a subtree entry,
// so both orderings are searched.
-// The returned entry is a copy; mutating it does not affect the tree.
-func (tree *Tree) Find(name string) (Entry, bool) {
+//
+// The returned entry is a shallow copy:
+// its Name aliases the tree's internal storage,
+// so it must not be mutated and shares the tree's lifetime.
+//
+// Labels: Life-Parent, Mut-No.
+func (tree *Tree) Find(name []byte) (Entry, bool) {
for _, searchIsTree := range [...]bool{true, false} {
- index, ok := slices.BinarySearchFunc(tree.entries, name, func(existing Entry, target string) int {
+ index, ok := slices.BinarySearchFunc(tree.entries, name, func(existing Entry, target []byte) int {
return nameCompare(existing.Name, existing.Mode == mode.Directory, target, searchIsTree)
})
- if ok && tree.entries[index].Name == name {
+ if ok && bytes.Equal(tree.entries[index].Name, name) {
return tree.entries[index], true
}
}
diff --git a/object/tree/lookup_test.go b/object/tree/lookup_test.go
index 22d73615..706c1cd2 100644
--- a/object/tree/lookup_test.go
+++ b/object/tree/lookup_test.go
@@ -1,6 +1,7 @@
package tree_test
import (
+ "bytes"
"testing"
"lindenii.org/go/furgit/internal/testgit"
@@ -28,12 +29,12 @@ func TestFind(t *testing.T) {
t.Fatalf("Find(%q) not found", want.Name)
}
- if got.Mode != want.Mode || got.Name != want.Name || got.ID != want.ID {
+ if got.Mode != want.Mode || !bytes.Equal(got.Name, want.Name) || got.ID != want.ID {
t.Fatalf("Find(%q) = %+v, want %+v", want.Name, got, want)
}
}
- if _, ok := tr.Find("does-not-exist"); ok {
+ if _, ok := tr.Find([]byte("does-not-exist")); ok {
t.Fatalf("Find(does-not-exist) = true, want false")
}
})
diff --git a/object/tree/malformed_test.go b/object/tree/malformed_test.go
index ca00ea94..8a22b90f 100644
--- a/object/tree/malformed_test.go
+++ b/object/tree/malformed_test.go
@@ -44,6 +44,7 @@ func TestParseMalformed(t *testing.T) {
{name: "unsorted", body: append(record("100644", "b", size), record("100644", "a", size)...)},
{name: "duplicate", body: append(record("100644", "a", size), record("100644", "a", size)...)},
{name: "conflicting-tree-blob", body: append(record("100644", "foo", size), record("40000", "foo", size)...)},
+ {name: "conflicting-tree-blob-nonadjacent", body: append(append(record("100644", "foo", size), record("100644", "foo.c", size)...), record("40000", "foo", size)...)},
} {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
diff --git a/object/tree/parse.go b/object/tree/parse.go
index 5b01fa05..bd6ed3b0 100644
--- a/object/tree/parse.go
+++ b/object/tree/parse.go
@@ -14,10 +14,22 @@ import (
// correctly sized object IDs, and strictly increasing Git tree order.
// It does not enforce fsck-level name policy
// (for example ".", "..", ".git", or platform-specific aliases).
+//
+// The returned tree aliases body:
+// each entry's Name shares body's backing array.
+// The tree inherits body's lifetime
+// and must not be mutated unless body may be.
+// Use [Tree.Clone] for an independent copy.
+//
+// Labels: Life-Parent, Mut-No.
func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) {
tree := new(Tree)
idSize := objectFormat.Size()
- seen := make(map[string]struct{})
+
+ const minEntryOverhead = 5 + 1 + 1 + 1 // mode, space, name, NUL
+ if estimate := len(body) / (minEntryOverhead + idSize); estimate > 0 {
+ tree.entries = make([]Entry, 0, estimate)
+ }
i := 0
for i < len(body) {
@@ -38,7 +50,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) {
return nil, fmt.Errorf("%w: missing name terminator at offset %d", ErrInvalidTree, i)
}
- name := string(body[i : i+nul])
+ name := body[i : i+nul]
i += nul + 1
err = validateName(name)
@@ -67,14 +79,44 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) {
}
}
- if _, dup := seen[entry.Name]; dup {
+ if entryMode == mode.Directory && hasNonDirNamed(tree.entries, entry.Name) {
return nil, fmt.Errorf("%w: duplicate entry name %q", ErrInvalidTree, entry.Name)
}
- seen[entry.Name] = struct{}{}
-
tree.entries = append(tree.entries, entry)
}
return tree, nil
}
+
+// hasNonDirNamed reports whether entries, sorted in Git tree order,
+// holds a non-directory entry whose name equals name.
+//
+// The match sorts immediately below a directory of the same name,
+// so the search gallops from the back before binary searching the bracket.
+func hasNonDirNamed(entries []Entry, name []byte) bool {
+ lo, hi := 0, len(entries)
+
+ for stride := 1; stride < hi-lo; stride *= 2 {
+ mid := hi - stride
+ if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 {
+ lo = mid + 1
+
+ break
+ }
+
+ hi = mid
+ }
+
+ for lo < hi {
+ mid := lo + (hi-lo)/2
+ if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 {
+ lo = mid + 1
+ } else {
+ hi = mid
+ }
+ }
+
+ return lo < len(entries) &&
+ nameCompare(entries[lo].Name, entries[lo].Mode == mode.Directory, name, false) == 0
+}
diff --git a/object/tree/roundtrip_test.go b/object/tree/roundtrip_test.go
index 3e82c79a..a9d5f40f 100644
--- a/object/tree/roundtrip_test.go
+++ b/object/tree/roundtrip_test.go
@@ -70,7 +70,7 @@ func assertEntriesEqual(t *testing.T, got []tree.Entry, want []tree.Entry) {
t.Fatalf("entry[%d] mode = %o, want %o", i, got[i].Mode, want[i].Mode)
}
- if got[i].Name != want[i].Name {
+ if !bytes.Equal(got[i].Name, want[i].Name) {
t.Fatalf("entry[%d] name = %q, want %q", i, got[i].Name, want[i].Name)
}
diff --git a/object/tree/tree.go b/object/tree/tree.go
index 431df649..f40bb165 100644
--- a/object/tree/tree.go
+++ b/object/tree/tree.go
@@ -1,8 +1,6 @@
package tree
import (
- "slices"
-
"lindenii.org/go/furgit/object/id"
"lindenii.org/go/furgit/object/tree/mode"
)
@@ -21,15 +19,19 @@ type Tree struct {
// Entry represents a single entry in a tree.
type Entry struct {
Mode mode.Mode
- Name string
+ Name []byte
ID id.ObjectID
}
-// Entries returns a copy of the tree's entries in Git tree order.
+// Entries returns the tree's entries in Git tree order.
//
-// Mutating the returned slice does not affect the tree.
+// The returned slice aliases the tree's internal storage,
+// so it must not be mutated,
+// and it is invalidated by any subsequent call that mutates the tree,
+// such as [Tree.Insert].
+// Use [Tree.Clone] for an independent tree.
//
-// Labels: Life-Independent.
+// Labels: Life-Parent, Mut-No.
func (tree *Tree) Entries() []Entry {
- return slices.Clone(tree.entries)
+ return tree.entries
}