diff options
Diffstat (limited to 'object')
45 files changed, 506 insertions, 141 deletions
diff --git a/object/blob/clone.go b/object/blob/clone.go new file mode 100644 index 00000000..7106c3aa --- /dev/null +++ b/object/blob/clone.go @@ -0,0 +1,11 @@ +package blob + +import "bytes" + +// Clone returns a deep copy of the blob +// whose Data is independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (blob *Blob) Clone() *Blob { + return &Blob{Data: bytes.Clone(blob.Data)} +} diff --git a/object/blob/parse.go b/object/blob/parse.go index c013af96..1796d42f 100644 --- a/object/blob/parse.go +++ b/object/blob/parse.go @@ -2,7 +2,13 @@ package blob // Parse decodes a blob object body. // -// Labels: Life-Independent. +// The returned blob aliases body: +// its Data shares the same backing array, +// so the blob inherits body's lifetime +// and must not be mutated unless body may be. +// Use [Blob.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(body []byte) (*Blob, error) { - return &Blob{Data: append([]byte(nil), body...)}, nil + return &Blob{Data: body}, nil } diff --git a/object/commit/append.go b/object/commit/append.go index d5258b97..02d69058 100644 --- a/object/commit/append.go +++ b/object/commit/append.go @@ -33,7 +33,7 @@ func (commit *Commit) AppendWithoutHeader(dst []byte) ([]byte, error) { dst = append(dst, byte('\n')) - if commit.ChangeID != "" { + if len(commit.ChangeID) != 0 { dst = append(dst, []byte("change-id ")...) dst = append(dst, commit.ChangeID...) dst = append(dst, byte('\n')) @@ -41,7 +41,7 @@ func (commit *Commit) AppendWithoutHeader(dst []byte) ([]byte, error) { for _, h := range commit.ExtraHeaders { // GIGO on empty keys and such. - dst = append(dst, []byte(h.Key)...) + dst = append(dst, h.Key...) dst = append(dst, byte(' ')) dst = append(dst, h.Value...) dst = append(dst, byte('\n')) diff --git a/object/commit/clone.go b/object/commit/clone.go new file mode 100644 index 00000000..08987f26 --- /dev/null +++ b/object/commit/clone.go @@ -0,0 +1,33 @@ +package commit + +import ( + "bytes" + "slices" +) + +// Clone returns a deep copy of the commit +// whose byte fields are independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (commit *Commit) Clone() *Commit { + clone := &Commit{ + Tree: commit.Tree, + Parents: slices.Clone(commit.Parents), + Author: commit.Author.Clone(), + Committer: commit.Committer.Clone(), + Message: bytes.Clone(commit.Message), + ChangeID: bytes.Clone(commit.ChangeID), + } + + if commit.ExtraHeaders != nil { + clone.ExtraHeaders = make([]ExtraHeader, len(commit.ExtraHeaders)) + for i, h := range commit.ExtraHeaders { + clone.ExtraHeaders[i] = ExtraHeader{ + Key: bytes.Clone(h.Key), + Value: bytes.Clone(h.Value), + } + } + } + + return clone +} diff --git a/object/commit/commit.go b/object/commit/commit.go index 6a89bce9..a8a247bf 100644 --- a/object/commit/commit.go +++ b/object/commit/commit.go @@ -14,12 +14,12 @@ type Commit struct { Author signature.Signature Committer signature.Signature Message []byte - ChangeID string + ChangeID []byte ExtraHeaders []ExtraHeader } // ExtraHeader represents an extra header in a Git object. type ExtraHeader struct { - Key string + Key []byte Value []byte } diff --git a/object/commit/parse.go b/object/commit/parse.go index 20353e14..74f607f1 100644 --- a/object/commit/parse.go +++ b/object/commit/parse.go @@ -13,6 +13,16 @@ import ( var ErrInvalidCommit = errors.New("object/commit: invalid commit") // Parse decodes a commit object body. +// +// The returned commit aliases body: +// its Message, ChangeID, and extra-header fields, +// along with the byte fields of its signatures, +// share body's backing array. +// The commit inherits body's lifetime +// and must not be mutated unless body may be. +// Use [Commit.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) { c := new(Commit) @@ -95,7 +105,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) { return nil, fmt.Errorf("%w: unexpected change-id header at offset %d", ErrInvalidCommit, lineStart) } - c.ChangeID = string(value) + c.ChangeID = value case "gpgsig", "gpgsig-sha256": if state != parseStateExtra { return nil, fmt.Errorf("%w: unexpected %s header at offset %d", ErrInvalidCommit, key, lineStart) @@ -119,8 +129,8 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) { } c.ExtraHeaders = append(c.ExtraHeaders, ExtraHeader{ - Key: string(key), - Value: append([]byte(nil), value...), + Key: key, + Value: value, }) } } @@ -141,7 +151,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Commit, error) { panic("unreachable parse state") } - c.Message = append([]byte(nil), body[i:]...) + c.Message = body[i:] return c, nil } diff --git a/object/commit/roundtrip_test.go b/object/commit/roundtrip_test.go index faa8a834..1cfcaaac 100644 --- a/object/commit/roundtrip_test.go +++ b/object/commit/roundtrip_test.go @@ -87,10 +87,10 @@ func TestRoundTrip(t *testing.T) { OffsetMinutes: 330, }, Message: []byte("roundtrip subject\n\nroundtrip body\n\n"), - ChangeID: "zyxwvutsrqponmlk", + ChangeID: []byte("zyxwvutsrqponmlk"), ExtraHeaders: []commit.ExtraHeader{ - {Key: "encoding", Value: []byte("UTF-8")}, - {Key: "x-test-header", Value: []byte("value")}, + {Key: []byte("encoding"), Value: []byte("UTF-8")}, + {Key: []byte("x-test-header"), Value: []byte("value")}, }, } @@ -145,7 +145,7 @@ func assertCommitEqual(t *testing.T, got *commit.Commit, want *commit.Commit) { t.Fatalf("message = %q, want %q", got.Message, want.Message) } - if got.ChangeID != want.ChangeID { + if !bytes.Equal(got.ChangeID, want.ChangeID) { t.Fatalf("change id = %q, want %q", got.ChangeID, want.ChangeID) } @@ -155,7 +155,7 @@ func assertCommitEqual(t *testing.T, got *commit.Commit, want *commit.Commit) { for i, wantHeader := range want.ExtraHeaders { gotHeader := got.ExtraHeaders[i] - if gotHeader.Key != wantHeader.Key { + if !bytes.Equal(gotHeader.Key, wantHeader.Key) { t.Fatalf("extra header[%d] key = %q, want %q", i, gotHeader.Key, wantHeader.Key) } diff --git a/object/fetch/path.go b/object/fetch/path.go index f8eca507..e8b12481 100644 --- a/object/fetch/path.go +++ b/object/fetch/path.go @@ -47,7 +47,7 @@ func (err *PathNotTreeError) Error() string { // for an io/fs.FS-like interface. // // Labels: Life-Parent. -func (fetcher *Fetcher) Path(root oid.ObjectID, parts []string) (tree.Entry, error) { +func (fetcher *Fetcher) Path(root oid.ObjectID, parts [][]byte) (tree.Entry, error) { if len(parts) == 0 { return tree.Entry{}, ErrPathInvalid } diff --git a/object/fetch/treefs.go b/object/fetch/treefs.go index 9d88abb2..d12e3dd6 100644 --- a/object/fetch/treefs.go +++ b/object/fetch/treefs.go @@ -1,11 +1,11 @@ package fetch import ( + "bytes" "errors" "fmt" "io" "io/fs" - "strings" "time" oid "lindenii.org/go/furgit/object/id" @@ -48,12 +48,12 @@ var ErrGitlinkNotFile = fmt.Errorf("%w: object/fetch: gitlink entries are not re // generic fs consumers classify it correctly. var ErrIsDirectory = fmt.Errorf("%w: object/fetch: is a directory", fs.ErrInvalid) -func splitPath(path string) []string { +func splitPath(path string) [][]byte { if len(path) == 0 { return nil } - return strings.Split(path, "/") + return bytes.Split([]byte(path), []byte("/")) } type treeEntryValue struct { @@ -197,7 +197,7 @@ func (treeFS *TreeFS) Open(name string) (fs.File, error) { entries := make([]fs.DirEntry, 0, len(tree.Object().Entries())) for _, child := range tree.Object().Entries() { childEntry := treeEntryValue{ - name: child.Name, + name: string(child.Name), mode: child.Mode, objectID: child.ID, treeEntry: &child, @@ -401,7 +401,7 @@ func (treeFS *TreeFS) resolvePath(op treeFSOp, name string) (treeEntryValue, err } return treeEntryValue{ - name: entry.Name, + name: string(entry.Name), mode: entry.Mode, objectID: entry.ID, treeEntry: &entry, diff --git a/object/fetch/treefs_test.go b/object/fetch/treefs_test.go index ba292276..05240823 100644 --- a/object/fetch/treefs_test.go +++ b/object/fetch/treefs_test.go @@ -35,12 +35,12 @@ func TestTreeFS(t *testing.T) { } subTreeID := writeTree(t, store, []tree.Entry{ - {Mode: mode.Executable, Name: "exec.sh", ID: execID}, + {Mode: mode.Executable, Name: []byte("exec.sh"), ID: execID}, }) rootTreeID := writeTree(t, store, []tree.Entry{ - {Mode: mode.Regular, Name: "plain.txt", ID: plainID}, - {Mode: mode.Directory, Name: "dir", ID: subTreeID}, + {Mode: mode.Regular, Name: []byte("plain.txt"), ID: plainID}, + {Mode: mode.Directory, Name: []byte("dir"), ID: subTreeID}, }) commitID := writeCommit(t, store, rootTreeID) diff --git a/object/signature/clone.go b/object/signature/clone.go new file mode 100644 index 00000000..4637a258 --- /dev/null +++ b/object/signature/clone.go @@ -0,0 +1,16 @@ +package signature + +import "bytes" + +// Clone returns a deep copy of the signature +// whose Name and Email are independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (signature Signature) Clone() Signature { + return Signature{ + Name: bytes.Clone(signature.Name), + Email: bytes.Clone(signature.Email), + WhenUnix: signature.WhenUnix, + OffsetMinutes: signature.OffsetMinutes, + } +} diff --git a/object/signature/parse.go b/object/signature/parse.go index b39100cd..190d2cf4 100644 --- a/object/signature/parse.go +++ b/object/signature/parse.go @@ -10,7 +10,13 @@ import ( // Parse parses a canonical Git signature line. // -// Labels: Life-Independent. +// The returned signature aliases line: +// its Name and Email share line's backing array, +// so the signature inherits line's lifetime +// and must not be mutated unless line may be. +// Use [Signature.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(line []byte) (*Signature, error) { lt := bytes.IndexByte(line, '<') if lt < 0 { @@ -24,8 +30,8 @@ func Parse(line []byte) (*Signature, error) { gt := lt + 1 + gtRel - nameBytes := append([]byte(nil), bytes.TrimRight(line[:lt], " ")...) - emailBytes := append([]byte(nil), line[lt+1:gt]...) + nameBytes := bytes.TrimRight(line[:lt], " ") + emailBytes := line[lt+1 : gt] rest := line[gt+1:] if len(rest) == 0 || rest[0] != ' ' { diff --git a/object/store/memory/reader.go b/object/store/memory/reader.go index 6b8fae55..e04ad759 100644 --- a/object/store/memory/reader.go +++ b/object/store/memory/reader.go @@ -24,13 +24,15 @@ func (memory *Memory) ReadBytesFull(id id.ObjectID) ([]byte, error) { } // ReadBytesContent reads one object body. +// +// The returned slice aliases the store's own copy of the object content. func (memory *Memory) ReadBytesContent(id id.ObjectID) (typ.Type, []byte, error) { obj, ok := memory.objects.Load(id) if !ok { return typ.Unknown, nil, store.ErrObjectNotFound } - return obj.ty, append([]byte(nil), obj.content...), nil + return obj.ty, obj.content, nil } // ReadHeader reads one object header. diff --git a/object/store/mix/mix.go b/object/store/mix/mix.go index 2e8e926b..b048fe86 100644 --- a/object/store/mix/mix.go +++ b/object/store/mix/mix.go @@ -28,7 +28,7 @@ func New(backends ...store.ObjectReader) *Mix { present[backend] = struct{}{} } - order := mru.New[store.ObjectReader]() + order := mru.New[store.ObjectReader](mru.Options{Interval: 48}) order.Sync(present) return &Mix{ diff --git a/object/store/packed/delta.go b/object/store/packed/delta.go index 567fd679..5b538221 100644 --- a/object/store/packed/delta.go +++ b/object/store/packed/delta.go @@ -1,7 +1,6 @@ package packed import ( - "bytes" "fmt" "io" "slices" @@ -11,8 +10,14 @@ import ( "lindenii.org/go/furgit/internal/format/packfile" "lindenii.org/go/furgit/internal/format/packfile/delta" "lindenii.org/go/lgo/intconv" + "lindenii.org/go/lgo/sync" ) +//nolint:gochecknoglobals +var deltaHeaderPool = sync.NewPool(func() *[delta.MaxHeaderSizesLen]byte { + return new([delta.MaxHeaderSizesLen]byte) +}) + // deltaNode is a delta entry on a resolution chain. type deltaNode struct { // payload is the entry's compressed delta payload view. @@ -28,7 +33,11 @@ type deltaNode struct { // unpackEntry reconstructs the object stored at offset in p, // following ref- and ofs-delta chains within the pack. // -// Labels: Life-Independent. +// A direct base-cache hit returns the shared cache buffer itself, +// so the result may alias cache storage and must not be mutated; +// delta-applied results are freshly allocated. +// +// Labels: Life-Parent, Mut-No. func (packed *Packed) unpackEntry(p *pack, offset int) (packfile.EntryType, []byte, error) { var zero packfile.EntryType @@ -86,9 +95,11 @@ func (packed *Packed) unpackEntry(p *pack, offset int) (packfile.EntryType, []by cur = baseOffset } - // A direct cache hit with no deltas to apply must be copied. + // A direct cache hit with no deltas to apply + // returns the shared cache buffer directly; + // callers are contractually Mut-No. if len(chain) == 0 && fromCache { - return baseType, bytes.Clone(base), nil + return baseType, base, nil } // Apply deltas back up the chain, caching each consumed base. @@ -202,16 +213,19 @@ func (packed *Packed) resolveType(p *pack, offset int, entryHeader packfile.Entr // deltaResultSize reads the declared result size // from one compressed delta payload prefix. func deltaResultSize(payload []byte, deltaSize uint64) (int, error) { - zr, err := zlib.NewReader(bytes.NewReader(payload)) + zr, err := zlib.NewReaderBytes(payload) if err != nil { return 0, fmt.Errorf("reading delta header: %w", err) } defer func() { _ = zr.Close() }() + buf := deltaHeaderPool.Get() + defer deltaHeaderPool.Put(buf) + prefixLen := min(uint64(delta.MaxHeaderSizesLen), deltaSize) - prefix := make([]byte, prefixLen) + prefix := buf[:prefixLen] _, err = io.ReadFull(zr, prefix) if err != nil { diff --git a/object/store/packed/entry.go b/object/store/packed/entry.go index e9d45bb4..908afad0 100644 --- a/object/store/packed/entry.go +++ b/object/store/packed/entry.go @@ -1,7 +1,6 @@ package packed import ( - "bytes" "errors" "fmt" "io" @@ -49,7 +48,7 @@ func inflate(payload []byte, expectedSize uint64) ([]byte, error) { return nil, fmt.Errorf("declared size: %w", err) } - zr, err := zlib.NewReader(bytes.NewReader(payload)) + zr, err := zlib.NewReaderBytes(payload) if err != nil { return nil, fmt.Errorf("inflating entry payload: %w", err) } diff --git a/object/store/packed/internal/ingest/finalize.go b/object/store/packed/internal/ingest/finalize.go index f0ab6622..afed996c 100644 --- a/object/store/packed/internal/ingest/finalize.go +++ b/object/store/packed/internal/ingest/finalize.go @@ -8,6 +8,7 @@ import ( "slices" "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/format/packrev" "lindenii.org/go/furgit/object/id" "lindenii.org/go/lgo/intconv" @@ -38,12 +39,27 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, err } + bloomBuilder, err := ingestion.buildBloom(entries, packHash) + if err != nil { + return Result{}, err + } + + bloomTmp, err := ingestion.writeTemp("tmp_bloom_", func(w io.Writer) error { + _, err := w.Write(bloomBuilder.Bytes()) + + return err + }) + if err != nil { + return Result{}, err + } + base := "pack-" + ingestion.packHash.String() packFinal := base + ".pack" idxFinal := base + ".idx" revFinal := base + ".rev" + bloomFinal := base + ".bloom" - // Link the pack and reverse index before the index, + // Link the pack, reverse index, and Bloom filter before the index, // since the index is what publishes the pack to readers. err = ingestion.link(ingestion.packTmp, packFinal) if err != nil { @@ -55,6 +71,11 @@ func (ingestion *ingestion) finalize() (Result, error) { return Result{}, err } + err = ingestion.link(bloomTmp, bloomFinal) + if err != nil { + return Result{}, err + } + err = ingestion.link(idxTmp, idxFinal) if err != nil { return Result{}, err @@ -69,12 +90,34 @@ func (ingestion *ingestion) finalize() (Result, error) { PackName: packFinal, IdxName: idxFinal, RevName: revFinal, + BloomName: bloomFinal, PackHash: ingestion.packHash, ObjectCount: objectCount, ThinFixed: ingestion.thinFixed, }, nil } +// buildBloom builds a Bloom filter over the index entries' object IDs, +// bound to packHash. +func (ingestion *ingestion) buildBloom(entries []packidx.Entry, packHash []byte) (*bloom.Builder, error) { + bucketCount, k, err := bloom.RecommendParams(ingestion.objectFormat, len(entries)) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + builder, err := bloom.NewBuilder(ingestion.objectFormat, bucketCount, k, packHash) + if err != nil { + return nil, fmt.Errorf("object/store/packed/internal/ingest: %w", err) + } + + size := ingestion.objectFormat.Size() + for i := range entries { + builder.Add(entries[i].OID[:size]) + } + + return builder, nil +} + // indexEntries returns the index entries in object-ID order // and, for each record in pack order, its position in that index order. func (ingestion *ingestion) indexEntries() ([]packidx.Entry, []uint32, error) { diff --git a/object/store/packed/internal/ingest/resolve.go b/object/store/packed/internal/ingest/resolve.go index 8595d366..77b0fa0f 100644 --- a/object/store/packed/internal/ingest/resolve.go +++ b/object/store/packed/internal/ingest/resolve.go @@ -270,7 +270,7 @@ func (ingestion *ingestion) countUnresolved() int { func (ingestion *ingestion) unresolvedExternalBases() []id.ObjectID { seen := make(map[id.ObjectID]struct{}) - var out []id.ObjectID + out := make([]id.ObjectID, 0, ingestion.deltaCount-ingestion.deltasResolved) for index := range ingestion.records { rec := &ingestion.records[index] diff --git a/object/store/packed/internal/ingest/result.go b/object/store/packed/internal/ingest/result.go index 0ae5593a..9cd6ef1d 100644 --- a/object/store/packed/internal/ingest/result.go +++ b/object/store/packed/internal/ingest/result.go @@ -13,6 +13,9 @@ type Result struct { // RevName is the destination-relative name of the written reverse index. RevName string + // BloomName is the destination-relative name of the written Bloom filter. + BloomName string + // PackHash is the pack trailer hash // shared by the pack, index, and reverse index. PackHash id.ObjectID diff --git a/object/store/packed/internal/ingest/thin.go b/object/store/packed/internal/ingest/thin.go index 8d1566e0..fa125f2f 100644 --- a/object/store/packed/internal/ingest/thin.go +++ b/object/store/packed/internal/ingest/thin.go @@ -33,7 +33,7 @@ func (ingestion *ingestion) fixThin(external []id.ObjectID, adjacency adjacency, // Drop the trailer from the write cursor. ingestion.scanner.consumed -= hashSize - var appended []int + appended := make([]int, 0, len(external)) for _, baseOID := range external { ty, content, err := ingestion.opts.ThinBase.ReadBytesContent(baseOID) diff --git a/object/store/packed/internal/ingest/writepack_test.go b/object/store/packed/internal/ingest/writepack_test.go index 394d8f6e..adc0ba35 100644 --- a/object/store/packed/internal/ingest/writepack_test.go +++ b/object/store/packed/internal/ingest/writepack_test.go @@ -8,6 +8,8 @@ import ( "path/filepath" "testing" + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/testgit" "lindenii.org/go/furgit/object/id" "lindenii.org/go/furgit/object/store" @@ -89,6 +91,81 @@ func TestWritePackMatchesGit(t *testing.T) { } } +// TestWritePackBloom verifies that ingesting a pack writes a Bloom filter +// that reports every object in the pack as present. +func TestWritePackBloom(t *testing.T) { + t.Parallel() + + for _, objectFormat := range id.SupportedObjectFormats() { + t.Run(objectFormat.String(), func(t *testing.T) { + t.Parallel() + + repo, err := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: objectFormat}) + if err != nil { + t.Fatalf("NewRepo: %v", err) + } + + seeded, err := repo.SeedHistory(t) + if err != nil { + t.Fatalf("SeedHistory: %v", err) + } + + gitPrefix, err := repo.PackObjects(t, seeded.All(), testgit.PackObjectsOptions{ + RevIndex: true, + Revs: false, + Exclude: nil, + }) + if err != nil { + t.Fatalf("PackObjects: %v", err) + } + + stream, err := os.ReadFile(gitPrefix + ".pack") //nolint:gosec + if err != nil { + t.Fatalf("ReadFile pack: %v", err) + } + + dir, result := writePack(t, objectFormat, bytes.NewReader(stream), store.PackWriteOptions{ + ThinBase: nil, + Progress: nil, + }) + + if result.BloomName == "" { + t.Fatal("BloomName is empty") + } + + bloomBytes, err := os.ReadFile(filepath.Join(dir, result.BloomName)) //nolint:gosec + if err != nil { + t.Fatalf("ReadFile bloom: %v", err) + } + + filter, err := bloom.Parse(bloomBytes, objectFormat) + if err != nil { + t.Fatalf("bloom.Parse: %v", err) + } + + idxBytes, err := os.ReadFile(filepath.Join(dir, result.IdxName)) //nolint:gosec + if err != nil { + t.Fatalf("ReadFile idx: %v", err) + } + + index, err := packidx.Parse(idxBytes, objectFormat.Size()) + if err != nil { + t.Fatalf("packidx.Parse: %v", err) + } + + if !bytes.Equal(filter.PackHash(), index.PackHash()) { + t.Fatalf("filter pack hash %x, want %x", filter.PackHash(), index.PackHash()) + } + + for pos := range index.NumObjects() { + if !filter.MayContain(index.OIDAt(pos)) { + t.Fatalf("filter rejects object at index position %d", pos) + } + } + }) + } +} + // TestWritePackEmpty verifies that a zero-object pack // succeeds without writing any artifacts. func TestWritePackEmpty(t *testing.T) { diff --git a/object/store/packed/lookup.go b/object/store/packed/lookup.go index e54d34b2..e06870a9 100644 --- a/object/store/packed/lookup.go +++ b/object/store/packed/lookup.go @@ -24,6 +24,10 @@ func (packed *Packed) lookup(objectID id.ObjectID) (*pack, int, error) { oid := objectID.RawBytes() for _, p := range packed.order.Keys() { + if p.filter != nil && !p.filter.MayContain(oid) { + continue + } + offsetU, found, err := p.idx.Lookup(oid) if err != nil { return nil, 0, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) diff --git a/object/store/packed/pack.go b/object/store/packed/pack.go index dd43bc7a..9cd6162b 100644 --- a/object/store/packed/pack.go +++ b/object/store/packed/pack.go @@ -8,6 +8,7 @@ import ( "lindenii.org/go/furgit/internal/format/packfile" "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/format/packidx/bloom" "lindenii.org/go/furgit/internal/mmap" "lindenii.org/go/furgit/object/id" "lindenii.org/go/lgo/intconv" @@ -36,6 +37,9 @@ type pack struct { // and data aliases them. dataMapping *mmap.Mmap data []byte + + bloomMapping *mmap.Mmap + filter *bloom.Bloom } // openPack opens, maps, and validates @@ -69,15 +73,41 @@ func openPack(root *os.Root, name string, objectFormat id.ObjectFormat) (*pack, return nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, name, err) } + bloomMapping, filter := openBloom(root, name, objectFormat, idx.PackHash()) + return &pack{ - name: name, - idxMapping: idxMapping, - idx: idx, - dataMapping: dataMapping, - data: dataMapping.Data(), + name: name, + idxMapping: idxMapping, + idx: idx, + dataMapping: dataMapping, + data: dataMapping.Data(), + bloomMapping: bloomMapping, + filter: filter, }, nil } +func openBloom(root *os.Root, name string, objectFormat id.ObjectFormat, packHash []byte) (*mmap.Mmap, *bloom.Bloom) { + mapping, err := mapFile(root, name+".bloom") + if err != nil { + return nil, nil + } + + filter, err := bloom.Parse(mapping.Data(), objectFormat) + if err != nil { + _ = mapping.Close() + + return nil, nil + } + + if !bytes.Equal(filter.PackHash(), packHash) { + _ = mapping.Close() + + return nil, nil + } + + return mapping, &filter +} + // mapFile opens and maps one file under root. func mapFile(root *os.Root, name string) (*mmap.Mmap, error) { file, err := root.Open(name) @@ -125,10 +155,16 @@ func validatePackData(data []byte, idx *packidx.Packidx, hashSize int) error { return nil } -// close releases the pack data and index mappings. +// close releases the pack data, index, and filter mappings. func (pack *pack) close() error { - return errors.Join( + errs := []error{ pack.dataMapping.Close(), pack.idxMapping.Close(), - ) + } + + if pack.bloomMapping != nil { + errs = append(errs, pack.bloomMapping.Close()) + } + + return errors.Join(errs...) } diff --git a/object/store/packed/packed.go b/object/store/packed/packed.go index f22c2445..897b3b98 100644 --- a/object/store/packed/packed.go +++ b/object/store/packed/packed.go @@ -63,7 +63,7 @@ func New(root *os.Root, objectFormat id.ObjectFormat) (*Packed, error) { packed := &Packed{ root: root, objectFormat: objectFormat, - order: mru.New[*pack](), + order: mru.New[*pack](mru.Options{Interval: 48}), baseCache: newBaseCache(), refreshMu: sync.Mutex{}, byName: nil, diff --git a/object/store/packed/quarantine.go b/object/store/packed/quarantine.go index 5e0b85cb..977a9543 100644 --- a/object/store/packed/quarantine.go +++ b/object/store/packed/quarantine.go @@ -156,6 +156,8 @@ func packPromotionPriority(name string) int { return 1 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".rev"): return 2 + case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".bloom"): + return 2 case strings.HasPrefix(name, "pack-") && strings.HasSuffix(name, ".idx"): return 3 default: diff --git a/object/store/packed/reader.go b/object/store/packed/reader.go index bfc82eff..cf433cfc 100644 --- a/object/store/packed/reader.go +++ b/object/store/packed/reader.go @@ -165,7 +165,7 @@ func (packed *Packed) ReadReaderContent(objectID id.ObjectID) (typ.Type, int, io return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: object size overflows int: %w", ErrMalformedPackedStore, p.name, err) } - zr, err := zlib.NewReader(bytes.NewReader(payload)) + zr, err := zlib.NewReaderBytes(payload) if err != nil { return typ.Unknown, 0, nil, fmt.Errorf("%w: pack %q: %w", ErrMalformedPackedStore, p.name, err) } diff --git a/object/store/packed/refresh.go b/object/store/packed/refresh.go index 14c66013..f06e9859 100644 --- a/object/store/packed/refresh.go +++ b/object/store/packed/refresh.go @@ -23,7 +23,7 @@ func (packed *Packed) Refresh() error { next := make(map[string]*pack, len(packed.byName)) - var opened []*pack + opened := make([]*pack, 0, len(dirEntries)) for _, dirEntry := range dirEntries { name, ok := strings.CutSuffix(dirEntry.Name(), ".idx") diff --git a/object/store/packed/refresh_test.go b/object/store/packed/refresh_test.go index 025f6f62..e54dc97d 100644 --- a/object/store/packed/refresh_test.go +++ b/object/store/packed/refresh_test.go @@ -101,7 +101,7 @@ func cp(t *testing.T, src, dst string) { t.Fatalf("ReadFile: %v", err) } - err = os.WriteFile(dst, data, 0o600) //nolint:gosec + err = os.WriteFile(dst, data, 0o600) //#nosec G703 if err != nil { t.Fatalf("WriteFile: %v", err) } diff --git a/object/store/reader.go b/object/store/reader.go index 7979fb6c..bbfe1fe8 100644 --- a/object/store/reader.go +++ b/object/store/reader.go @@ -23,12 +23,21 @@ type ObjectReader interface { // Users should treat this as an invariant; // implementations should not re-verify it on every read. // - // Labels: Life-Parent. + // The returned slice may alias storage owned by the backend, + // such as a memory-mapped pack or a shared cache buffer. + // Callers must not mutate it + // and must not retain it past the backend's lifetime. + // + // Labels: Life-Parent, Mut-No. ReadBytesFull(id id.ObjectID) ([]byte, error) // ReadBytesContent reads an object's type and content bytes. // - // Labels: Life-Parent. + // The returned slice may alias backend-owned storage. + // Callers must not mutate it + // and must not retain it past the backend's lifetime. + // + // Labels: Life-Parent, Mut-No. ReadBytesContent(id id.ObjectID) (typ.Type, []byte, error) // ReadReaderFull reads a full serialized object stream diff --git a/object/tag/append.go b/object/tag/append.go index 15a6fde9..2f524a73 100644 --- a/object/tag/append.go +++ b/object/tag/append.go @@ -27,7 +27,7 @@ func (tag *Tag) AppendWithoutHeader(dst []byte) ([]byte, error) { for _, h := range tag.ExtraHeaders { // GIGO on empty keys and such. - dst = append(dst, []byte(h.Key)...) + dst = append(dst, h.Key...) dst = append(dst, byte(' ')) dst = append(dst, h.Value...) dst = append(dst, byte('\n')) diff --git a/object/tag/clone.go b/object/tag/clone.go new file mode 100644 index 00000000..0f792bc1 --- /dev/null +++ b/object/tag/clone.go @@ -0,0 +1,29 @@ +package tag + +import "bytes" + +// Clone returns a deep copy of the tag +// whose byte fields are independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (tag *Tag) Clone() *Tag { + clone := &Tag{ + TargetID: tag.TargetID, + TargetType: tag.TargetType, + Name: bytes.Clone(tag.Name), + Tagger: tag.Tagger.Clone(), + Message: bytes.Clone(tag.Message), + } + + if tag.ExtraHeaders != nil { + clone.ExtraHeaders = make([]ExtraHeader, len(tag.ExtraHeaders)) + for i, h := range tag.ExtraHeaders { + clone.ExtraHeaders[i] = ExtraHeader{ + Key: bytes.Clone(h.Key), + Value: bytes.Clone(h.Value), + } + } + } + + return clone +} diff --git a/object/tag/parse.go b/object/tag/parse.go index c5ea7e14..1fcc7c2c 100644 --- a/object/tag/parse.go +++ b/object/tag/parse.go @@ -15,6 +15,16 @@ import ( var ErrInvalidTag = errors.New("object/tag: invalid tag") // Parse decodes a tag object body. +// +// The returned tag aliases body: +// its Name, Message, and extra-header fields, +// along with the byte fields of its tagger signature, +// share body's backing array. +// The tag inherits body's lifetime +// and must not be mutated unless body may be. +// Use [Tag.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) { t := new(Tag) @@ -56,7 +66,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) { return nil, fmt.Errorf("%w: tag name: %w", ErrInvalidTag, err) } - t.Name = append([]byte(nil), line...) + t.Name = line i = next line, next, err = requiredHeaderLine(body, i, "tagger") @@ -84,7 +94,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) { i += rel + 1 if len(line) == 0 { - t.Message = append([]byte(nil), body[i:]...) + t.Message = body[i:] return t, nil } @@ -112,8 +122,8 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tag, error) { } default: t.ExtraHeaders = append(t.ExtraHeaders, ExtraHeader{ - Key: string(key), - Value: append([]byte(nil), value...), + Key: key, + Value: value, }) } } diff --git a/object/tag/roundtrip_test.go b/object/tag/roundtrip_test.go index c49b3d75..cf4b69a1 100644 --- a/object/tag/roundtrip_test.go +++ b/object/tag/roundtrip_test.go @@ -42,8 +42,8 @@ func TestRoundTrip(t *testing.T) { }, Message: []byte("roundtrip subject\n\nroundtrip body\n\n"), ExtraHeaders: []tag.ExtraHeader{ - {Key: "encoding", Value: []byte("UTF-8")}, - {Key: "x-test-header", Value: []byte("value")}, + {Key: []byte("encoding"), Value: []byte("UTF-8")}, + {Key: []byte("x-test-header"), Value: []byte("value")}, }, } @@ -102,7 +102,7 @@ func assertTagEqual(t *testing.T, got *tag.Tag, want *tag.Tag) { } if !slices.EqualFunc(got.ExtraHeaders, want.ExtraHeaders, func(gotHeader tag.ExtraHeader, wantHeader tag.ExtraHeader) bool { - return gotHeader.Key == wantHeader.Key && bytes.Equal(gotHeader.Value, wantHeader.Value) + return bytes.Equal(gotHeader.Key, wantHeader.Key) && bytes.Equal(gotHeader.Value, wantHeader.Value) }) { t.Fatalf("extra headers = %+v, want %+v", got.ExtraHeaders, want.ExtraHeaders) } diff --git a/object/tag/tag.go b/object/tag/tag.go index f4b36c30..a4572921 100644 --- a/object/tag/tag.go +++ b/object/tag/tag.go @@ -20,6 +20,6 @@ type Tag struct { // ExtraHeader represents an extra header in a Git tag object. type ExtraHeader struct { - Key string + Key []byte Value []byte } diff --git a/object/tree/clone.go b/object/tree/clone.go new file mode 100644 index 00000000..d00c62f2 --- /dev/null +++ b/object/tree/clone.go @@ -0,0 +1,24 @@ +package tree + +import "bytes" + +// Clone returns a deep copy of the tree +// whose entry names are independent of any memory the original may alias. +// +// Labels: Life-Independent. +func (tree *Tree) Clone() *Tree { + if tree.entries == nil { + return &Tree{} + } + + clone := &Tree{entries: make([]Entry, len(tree.entries))} + for i, entry := range tree.entries { + clone.entries[i] = Entry{ + Mode: entry.Mode, + Name: bytes.Clone(entry.Name), + ID: entry.ID, + } + } + + return clone +} diff --git a/object/tree/compare.go b/object/tree/compare.go index 78bf56a4..9bf16f90 100644 --- a/object/tree/compare.go +++ b/object/tree/compare.go @@ -6,7 +6,7 @@ package tree // treating directory names as if they carried a trailing '/'. // entryIsTree and searchIsTree indicate // whether the respective names belong to subtree entries. -func nameCompare(entryName string, entryIsTree bool, searchName string, searchIsTree bool) int { +func nameCompare(entryName []byte, entryIsTree bool, searchName []byte, searchIsTree bool) int { entryLen := len(entryName) if entryIsTree { entryLen++ diff --git a/object/tree/helpers_test.go b/object/tree/helpers_test.go index d6cfddb6..3e5eddd4 100644 --- a/object/tree/helpers_test.go +++ b/object/tree/helpers_test.go @@ -43,26 +43,26 @@ func mixedEntries(tb testing.TB, repo *testgit.Repo) []tree.Entry { } return []tree.Entry{ - {Mode: mode.Regular, Name: "z", ID: blobA}, - {Mode: mode.Regular, Name: "A", ID: blobB}, - {Mode: mode.Regular, Name: "aa", ID: blobC}, - {Mode: mode.Regular, Name: "a0", ID: blobA}, - {Mode: mode.Regular, Name: "a.", ID: blobC}, - {Mode: mode.Regular, Name: "Z", ID: blobB}, - {Mode: mode.Regular, Name: "0", ID: blobA}, - {Mode: mode.Regular, Name: "CAPS", ID: blobB}, - {Mode: mode.Regular, Name: "caps", ID: blobC}, - {Mode: mode.Regular, Name: "name with space", ID: blobB}, - {Mode: mode.Regular, Name: "name.with.dot", ID: blobA}, - {Mode: mode.Regular, Name: "这是一些非 ASCII 的字符", ID: blobC}, - {Mode: mode.Regular, Name: "Emoji 👀", ID: blobC}, - {Mode: mode.Regular, Name: ".hidden", ID: blobA}, - {Mode: mode.Executable, Name: "exec.sh", ID: blobB}, - {Mode: mode.Symlink, Name: "sym.link", ID: blobC}, - {Mode: mode.Gitlink, Name: "submodule", ID: submodule}, - {Mode: mode.Regular, Name: "dir-", ID: blobA}, - {Mode: mode.Directory, Name: "dir", ID: subTree}, - {Mode: mode.Regular, Name: "dir0", ID: blobB}, + {Mode: mode.Regular, Name: []byte("z"), ID: blobA}, + {Mode: mode.Regular, Name: []byte("A"), ID: blobB}, + {Mode: mode.Regular, Name: []byte("aa"), ID: blobC}, + {Mode: mode.Regular, Name: []byte("a0"), ID: blobA}, + {Mode: mode.Regular, Name: []byte("a."), ID: blobC}, + {Mode: mode.Regular, Name: []byte("Z"), ID: blobB}, + {Mode: mode.Regular, Name: []byte("0"), ID: blobA}, + {Mode: mode.Regular, Name: []byte("CAPS"), ID: blobB}, + {Mode: mode.Regular, Name: []byte("caps"), ID: blobC}, + {Mode: mode.Regular, Name: []byte("name with space"), ID: blobB}, + {Mode: mode.Regular, Name: []byte("name.with.dot"), ID: blobA}, + {Mode: mode.Regular, Name: []byte("这是一些非 ASCII 的字符"), ID: blobC}, + {Mode: mode.Regular, Name: []byte("Emoji 👀"), ID: blobC}, + {Mode: mode.Regular, Name: []byte(".hidden"), ID: blobA}, + {Mode: mode.Executable, Name: []byte("exec.sh"), ID: blobB}, + {Mode: mode.Symlink, Name: []byte("sym.link"), ID: blobC}, + {Mode: mode.Gitlink, Name: []byte("submodule"), ID: submodule}, + {Mode: mode.Regular, Name: []byte("dir-"), ID: blobA}, + {Mode: mode.Directory, Name: []byte("dir"), ID: subTree}, + {Mode: mode.Regular, Name: []byte("dir0"), ID: blobB}, } } @@ -73,7 +73,7 @@ func mkTreeEntries(entries []tree.Entry) []testgit.TreeEntry { Mode: strconv.FormatUint(uint64(entry.Mode), 8), Type: entry.Mode.ObjectType(), OID: entry.ID, - Name: entry.Name, + Name: string(entry.Name), } } @@ -124,7 +124,7 @@ func assertGitDecode(tb testing.TB, repo *testgit.Repo, treeID id.ObjectID, got tb.Fatalf("entry[%d] id = %s, want %s", i, got[i].ID, want[i].OID) } - if got[i].Name != want[i].Name { + if string(got[i].Name) != want[i].Name { tb.Fatalf("entry[%d] name = %q, want %q", i, got[i].Name, want[i].Name) } } diff --git a/object/tree/insert.go b/object/tree/insert.go index 5e519069..b6c52400 100644 --- a/object/tree/insert.go +++ b/object/tree/insert.go @@ -1,10 +1,10 @@ package tree import ( + "bytes" "errors" "fmt" "slices" - "strings" "lindenii.org/go/furgit/object/tree/mode" ) @@ -42,16 +42,16 @@ func (tree *Tree) Insert(entry Entry) error { } // validateName checks that name is a structurally valid tree entry name. -func validateName(name string) error { - if name == "" { +func validateName(name []byte) error { + if len(name) == 0 { return fmt.Errorf("%w: empty entry name", ErrInvalidTree) } - if strings.IndexByte(name, 0) >= 0 { + if bytes.IndexByte(name, 0) >= 0 { return fmt.Errorf("%w: entry name %q contains NUL", ErrInvalidTree, name) } - if strings.IndexByte(name, '/') >= 0 { + if bytes.IndexByte(name, '/') >= 0 { return fmt.Errorf("%w: entry name %q contains '/'", ErrInvalidTree, name) } diff --git a/object/tree/insert_test.go b/object/tree/insert_test.go index fbf65b84..1dd406d5 100644 --- a/object/tree/insert_test.go +++ b/object/tree/insert_test.go @@ -18,10 +18,10 @@ func TestInsertRejects(t *testing.T) { name string entry tree.Entry }{ - {name: "empty-name", entry: tree.Entry{Mode: mode.Regular, Name: "", ID: zero}}, - {name: "slash-name", entry: tree.Entry{Mode: mode.Regular, Name: "a/b", ID: zero}}, - {name: "nul-name", entry: tree.Entry{Mode: mode.Regular, Name: "a\x00b", ID: zero}}, - {name: "invalid-mode", entry: tree.Entry{Mode: mode.Mode(0o100640), Name: "file", ID: zero}}, + {name: "empty-name", entry: tree.Entry{Mode: mode.Regular, Name: []byte(""), ID: zero}}, + {name: "slash-name", entry: tree.Entry{Mode: mode.Regular, Name: []byte("a/b"), ID: zero}}, + {name: "nul-name", entry: tree.Entry{Mode: mode.Regular, Name: []byte("a\x00b"), ID: zero}}, + {name: "invalid-mode", entry: tree.Entry{Mode: mode.Mode(0o100640), Name: []byte("file"), ID: zero}}, } { t.Run(tc.name, func(t *testing.T) { t.Parallel() @@ -48,18 +48,18 @@ func TestInsertRejectsConflict(t *testing.T) { }{ { name: "same-mode", - first: tree.Entry{Mode: mode.Regular, Name: "file", ID: zero}, - second: tree.Entry{Mode: mode.Regular, Name: "file", ID: zero}, + first: tree.Entry{Mode: mode.Regular, Name: []byte("file"), ID: zero}, + second: tree.Entry{Mode: mode.Regular, Name: []byte("file"), ID: zero}, }, { name: "blob-then-tree", - first: tree.Entry{Mode: mode.Regular, Name: "name", ID: zero}, - second: tree.Entry{Mode: mode.Directory, Name: "name", ID: zero}, + first: tree.Entry{Mode: mode.Regular, Name: []byte("name"), ID: zero}, + second: tree.Entry{Mode: mode.Directory, Name: []byte("name"), ID: zero}, }, { name: "tree-then-blob", - first: tree.Entry{Mode: mode.Directory, Name: "name", ID: zero}, - second: tree.Entry{Mode: mode.Regular, Name: "name", ID: zero}, + first: tree.Entry{Mode: mode.Directory, Name: []byte("name"), ID: zero}, + second: tree.Entry{Mode: mode.Regular, Name: []byte("name"), ID: zero}, }, } { t.Run(tc.name, func(t *testing.T) { @@ -79,24 +79,3 @@ func TestInsertRejectsConflict(t *testing.T) { }) } } - -func TestEntriesIsCopy(t *testing.T) { - t.Parallel() - - zero := id.SupportedObjectFormats()[0].Zero() - - var tr tree.Tree - - err := tr.Insert(tree.Entry{Mode: mode.Regular, Name: "file", ID: zero}) - if err != nil { - t.Fatalf("Insert: %v", err) - } - - entries := tr.Entries() - entries[0].Name = "mutated" - - again := tr.Entries() - if again[0].Name != "file" { - t.Fatalf("Entries()[0].Name = %q, want %q", again[0].Name, "file") - } -} diff --git a/object/tree/lookup.go b/object/tree/lookup.go index 34a01748..2ff6ce76 100644 --- a/object/tree/lookup.go +++ b/object/tree/lookup.go @@ -1,6 +1,7 @@ package tree import ( + "bytes" "slices" "lindenii.org/go/furgit/object/tree/mode" @@ -10,13 +11,18 @@ import ( // // A name matches whether stored as a blob-like or as a subtree entry, // so both orderings are searched. -// The returned entry is a copy; mutating it does not affect the tree. -func (tree *Tree) Find(name string) (Entry, bool) { +// +// The returned entry is a shallow copy: +// its Name aliases the tree's internal storage, +// so it must not be mutated and shares the tree's lifetime. +// +// Labels: Life-Parent, Mut-No. +func (tree *Tree) Find(name []byte) (Entry, bool) { for _, searchIsTree := range [...]bool{true, false} { - index, ok := slices.BinarySearchFunc(tree.entries, name, func(existing Entry, target string) int { + index, ok := slices.BinarySearchFunc(tree.entries, name, func(existing Entry, target []byte) int { return nameCompare(existing.Name, existing.Mode == mode.Directory, target, searchIsTree) }) - if ok && tree.entries[index].Name == name { + if ok && bytes.Equal(tree.entries[index].Name, name) { return tree.entries[index], true } } diff --git a/object/tree/lookup_test.go b/object/tree/lookup_test.go index 22d73615..706c1cd2 100644 --- a/object/tree/lookup_test.go +++ b/object/tree/lookup_test.go @@ -1,6 +1,7 @@ package tree_test import ( + "bytes" "testing" "lindenii.org/go/furgit/internal/testgit" @@ -28,12 +29,12 @@ func TestFind(t *testing.T) { t.Fatalf("Find(%q) not found", want.Name) } - if got.Mode != want.Mode || got.Name != want.Name || got.ID != want.ID { + if got.Mode != want.Mode || !bytes.Equal(got.Name, want.Name) || got.ID != want.ID { t.Fatalf("Find(%q) = %+v, want %+v", want.Name, got, want) } } - if _, ok := tr.Find("does-not-exist"); ok { + if _, ok := tr.Find([]byte("does-not-exist")); ok { t.Fatalf("Find(does-not-exist) = true, want false") } }) diff --git a/object/tree/malformed_test.go b/object/tree/malformed_test.go index ca00ea94..8a22b90f 100644 --- a/object/tree/malformed_test.go +++ b/object/tree/malformed_test.go @@ -44,6 +44,7 @@ func TestParseMalformed(t *testing.T) { {name: "unsorted", body: append(record("100644", "b", size), record("100644", "a", size)...)}, {name: "duplicate", body: append(record("100644", "a", size), record("100644", "a", size)...)}, {name: "conflicting-tree-blob", body: append(record("100644", "foo", size), record("40000", "foo", size)...)}, + {name: "conflicting-tree-blob-nonadjacent", body: append(append(record("100644", "foo", size), record("100644", "foo.c", size)...), record("40000", "foo", size)...)}, } { t.Run(tc.name, func(t *testing.T) { t.Parallel() diff --git a/object/tree/parse.go b/object/tree/parse.go index 5b01fa05..bd6ed3b0 100644 --- a/object/tree/parse.go +++ b/object/tree/parse.go @@ -14,10 +14,22 @@ import ( // correctly sized object IDs, and strictly increasing Git tree order. // It does not enforce fsck-level name policy // (for example ".", "..", ".git", or platform-specific aliases). +// +// The returned tree aliases body: +// each entry's Name shares body's backing array. +// The tree inherits body's lifetime +// and must not be mutated unless body may be. +// Use [Tree.Clone] for an independent copy. +// +// Labels: Life-Parent, Mut-No. func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) { tree := new(Tree) idSize := objectFormat.Size() - seen := make(map[string]struct{}) + + const minEntryOverhead = 5 + 1 + 1 + 1 // mode, space, name, NUL + if estimate := len(body) / (minEntryOverhead + idSize); estimate > 0 { + tree.entries = make([]Entry, 0, estimate) + } i := 0 for i < len(body) { @@ -38,7 +50,7 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) { return nil, fmt.Errorf("%w: missing name terminator at offset %d", ErrInvalidTree, i) } - name := string(body[i : i+nul]) + name := body[i : i+nul] i += nul + 1 err = validateName(name) @@ -67,14 +79,44 @@ func Parse(body []byte, objectFormat id.ObjectFormat) (*Tree, error) { } } - if _, dup := seen[entry.Name]; dup { + if entryMode == mode.Directory && hasNonDirNamed(tree.entries, entry.Name) { return nil, fmt.Errorf("%w: duplicate entry name %q", ErrInvalidTree, entry.Name) } - seen[entry.Name] = struct{}{} - tree.entries = append(tree.entries, entry) } return tree, nil } + +// hasNonDirNamed reports whether entries, sorted in Git tree order, +// holds a non-directory entry whose name equals name. +// +// The match sorts immediately below a directory of the same name, +// so the search gallops from the back before binary searching the bracket. +func hasNonDirNamed(entries []Entry, name []byte) bool { + lo, hi := 0, len(entries) + + for stride := 1; stride < hi-lo; stride *= 2 { + mid := hi - stride + if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 { + lo = mid + 1 + + break + } + + hi = mid + } + + for lo < hi { + mid := lo + (hi-lo)/2 + if nameCompare(entries[mid].Name, entries[mid].Mode == mode.Directory, name, false) < 0 { + lo = mid + 1 + } else { + hi = mid + } + } + + return lo < len(entries) && + nameCompare(entries[lo].Name, entries[lo].Mode == mode.Directory, name, false) == 0 +} diff --git a/object/tree/roundtrip_test.go b/object/tree/roundtrip_test.go index 3e82c79a..a9d5f40f 100644 --- a/object/tree/roundtrip_test.go +++ b/object/tree/roundtrip_test.go @@ -70,7 +70,7 @@ func assertEntriesEqual(t *testing.T, got []tree.Entry, want []tree.Entry) { t.Fatalf("entry[%d] mode = %o, want %o", i, got[i].Mode, want[i].Mode) } - if got[i].Name != want[i].Name { + if !bytes.Equal(got[i].Name, want[i].Name) { t.Fatalf("entry[%d] name = %q, want %q", i, got[i].Name, want[i].Name) } diff --git a/object/tree/tree.go b/object/tree/tree.go index 431df649..f40bb165 100644 --- a/object/tree/tree.go +++ b/object/tree/tree.go @@ -1,8 +1,6 @@ package tree import ( - "slices" - "lindenii.org/go/furgit/object/id" "lindenii.org/go/furgit/object/tree/mode" ) @@ -21,15 +19,19 @@ type Tree struct { // Entry represents a single entry in a tree. type Entry struct { Mode mode.Mode - Name string + Name []byte ID id.ObjectID } -// Entries returns a copy of the tree's entries in Git tree order. +// Entries returns the tree's entries in Git tree order. // -// Mutating the returned slice does not affect the tree. +// The returned slice aliases the tree's internal storage, +// so it must not be mutated, +// and it is invalidated by any subsequent call that mutates the tree, +// such as [Tree.Insert]. +// Use [Tree.Clone] for an independent tree. // -// Labels: Life-Independent. +// Labels: Life-Parent, Mut-No. func (tree *Tree) Entries() []Entry { - return slices.Clone(tree.entries) + return tree.entries } |
