aboutsummaryrefslogtreecommitdiff
path: root/format
diff options
context:
space:
mode:
Diffstat (limited to 'format')
-rw-r--r--format/commitgraph/TODO6
-rw-r--r--format/commitgraph/bloom/bloom.go3
-rw-r--r--format/commitgraph/bloom/constants.go8
-rw-r--r--format/commitgraph/bloom/contain.go25
-rw-r--r--format/commitgraph/bloom/errors.go5
-rw-r--r--format/commitgraph/bloom/filter.go26
-rw-r--r--format/commitgraph/bloom/key.go117
-rw-r--r--format/commitgraph/bloom/murmur.go127
-rw-r--r--format/commitgraph/bloom/settings.go50
-rw-r--r--format/commitgraph/constants.go32
-rw-r--r--format/commitgraph/doc.go2
-rw-r--r--format/commitgraph/read/bloom.go117
-rw-r--r--format/commitgraph/read/close.go20
-rw-r--r--format/commitgraph/read/commitat.go85
-rw-r--r--format/commitgraph/read/commits.go20
-rw-r--r--format/commitgraph/read/doc.go2
-rw-r--r--format/commitgraph/read/edges.go48
-rw-r--r--format/commitgraph/read/errors.go58
-rw-r--r--format/commitgraph/read/generation.go43
-rw-r--r--format/commitgraph/read/hash.go79
-rw-r--r--format/commitgraph/read/iterators.go45
-rw-r--r--format/commitgraph/read/layer.go28
-rw-r--r--format/commitgraph/read/layer_close.go33
-rw-r--r--format/commitgraph/read/layer_lookup.go53
-rw-r--r--format/commitgraph/read/layer_open.go81
-rw-r--r--format/commitgraph/read/layer_parse.go276
-rw-r--r--format/commitgraph/read/layer_pos.go21
-rw-r--r--format/commitgraph/read/layerinfo.go23
-rw-r--r--format/commitgraph/read/lookup.go29
-rw-r--r--format/commitgraph/read/mode.go11
-rw-r--r--format/commitgraph/read/oidat.go36
-rw-r--r--format/commitgraph/read/open.go26
-rw-r--r--format/commitgraph/read/open_chain.go133
-rw-r--r--format/commitgraph/read/open_single.go32
-rw-r--r--format/commitgraph/read/parents.go67
-rw-r--r--format/commitgraph/read/position.go38
-rw-r--r--format/commitgraph/read/read_test.go322
-rw-r--r--format/commitgraph/read/reader.go16
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/HEAD1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/config4
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/commit-graph-chain2
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/graph-bf985c21612a52070d8b008e6ef51edf8b609401.graphbin0 -> 4810 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/graph-dd7578d5216ca76c25b19631ba90f7498aeabbe7.graphbin0 -> 7088 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/packs2
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.bitmapbin0 -> 8234 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.idxbin0 -> 13252 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.packbin0 -> 34730 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.revbin0 -> 1792 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/refs/heads/master1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/HEAD1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/config4
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/info/commit-graphbin0 -> 9068 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/info/packs2
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.bitmapbin0 -> 7780 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.idxbin0 -> 11152 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.packbin0 -> 28664 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.revbin0 -> 1492 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/refs/heads/main1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/HEAD1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/config4
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/info/commit-graphbin0 -> 5912 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/info/packs2
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.bitmapbin0 -> 5452 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.idxbin0 -> 7792 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.packbin0 -> 18969 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.revbin0 -> 1012 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/refs/heads/master1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/HEAD1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/config6
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/commit-graph-chain2
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/graph-505cab61f8ddfa614301e8f97943112739236c6bcd19ed4d1f7c6b830cab4f62.graphbin0 -> 9260 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/graph-77c47bd6ca2ce17208c9361717a5823c0cb4b5ee336a14959678e060d674ffb6.graphbin0 -> 6154 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/packs2
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.bitmapbin0 -> 8234 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.idxbin0 -> 18496 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.packbin0 -> 41482 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.revbin0 -> 1816 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/refs/heads/master1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/HEAD1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/config6
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/info/commit-graphbin0 -> 11960 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/info/packs2
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.bitmapbin0 -> 7804 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.idxbin0 -> 15496 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.packbin0 -> 34252 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.revbin0 -> 1516 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/refs/heads/main1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/HEAD1
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/config6
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/info/commit-graphbin0 -> 7844 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/info/packs2
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.bitmapbin0 -> 5476 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.idxbin0 -> 10696 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.packbin0 -> 22569 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.revbin0 -> 1036 bytes
-rw-r--r--format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/refs/heads/master1
-rw-r--r--format/doc.go5
-rw-r--r--format/packfile/delta/apply/apply.go160
-rw-r--r--format/packfile/delta/apply/header.go47
-rw-r--r--format/packfile/delta/doc.go2
-rw-r--r--format/packfile/doc.go5
-rw-r--r--format/packfile/entry.go76
-rw-r--r--format/packfile/entry_header.go52
-rw-r--r--format/packfile/header.go9
-rw-r--r--format/packfile/ingest/api.go195
-rw-r--r--format/packfile/ingest/byteslice_reader.go21
-rw-r--r--format/packfile/ingest/cache.go53
-rw-r--r--format/packfile/ingest/counting_writer.go17
-rw-r--r--format/packfile/ingest/crc.go22
-rw-r--r--format/packfile/ingest/delta_header.go11
-rw-r--r--format/packfile/ingest/distance.go30
-rw-r--r--format/packfile/ingest/doc.go3
-rw-r--r--format/packfile/ingest/drain.go68
-rw-r--r--format/packfile/ingest/entry.go92
-rw-r--r--format/packfile/ingest/entry_header.go33
-rw-r--r--format/packfile/ingest/entry_prefix.go95
-rw-r--r--format/packfile/ingest/errors.go75
-rw-r--r--format/packfile/ingest/file_section_writer.go22
-rw-r--r--format/packfile/ingest/fill.go44
-rw-r--r--format/packfile/ingest/finalize.go94
-rw-r--r--format/packfile/ingest/flush.go37
-rw-r--r--format/packfile/ingest/hash.go27
-rw-r--r--format/packfile/ingest/header.go49
-rw-r--r--format/packfile/ingest/idx_write.go266
-rw-r--r--format/packfile/ingest/ingest.go68
-rw-r--r--format/packfile/ingest/ingest_test.go434
-rw-r--r--format/packfile/ingest/progress_write.go11
-rw-r--r--format/packfile/ingest/record_content.go30
-rw-r--r--format/packfile/ingest/record_delta.go60
-rw-r--r--format/packfile/ingest/record_inflate.go46
-rw-r--r--format/packfile/ingest/record_resolve.go117
-rw-r--r--format/packfile/ingest/records.go46
-rw-r--r--format/packfile/ingest/resolve_all.go71
-rw-r--r--format/packfile/ingest/rev_write.go138
-rw-r--r--format/packfile/ingest/rewrite_header_trailer.go89
-rw-r--r--format/packfile/ingest/scan.go106
-rw-r--r--format/packfile/ingest/state.go70
-rw-r--r--format/packfile/ingest/stream.go111
-rw-r--r--format/packfile/ingest/temp.go103
-rw-r--r--format/packfile/ingest/testdata/fixtures/sha1/METADATA.txt3
-rw-r--r--format/packfile/ingest/testdata/fixtures/sha1/base.packbin0 -> 81007 bytes
-rw-r--r--format/packfile/ingest/testdata/fixtures/sha1/nonthin.packbin0 -> 117458 bytes
-rw-r--r--format/packfile/ingest/testdata/fixtures/sha1/thin.packbin0 -> 38581 bytes
-rw-r--r--format/packfile/ingest/testdata/fixtures/sha256/METADATA.txt3
-rw-r--r--format/packfile/ingest/testdata/fixtures/sha256/base.packbin0 -> 105138 bytes
-rw-r--r--format/packfile/ingest/testdata/fixtures/sha256/nonthin.packbin0 -> 152284 bytes
-rw-r--r--format/packfile/ingest/testdata/fixtures/sha256/thin.packbin0 -> 49412 bytes
-rw-r--r--format/packfile/ingest/thin_append.go91
-rw-r--r--format/packfile/ingest/thin_fix.go100
-rw-r--r--format/packfile/ingest/thin_unresolved.go34
-rw-r--r--format/packfile/ingest/trailer.go58
-rw-r--r--format/packfile/ingest/use.go34
-rw-r--r--format/packfile/object_type.go16
-rw-r--r--format/packfile/ofs.go26
154 files changed, 5676 insertions, 0 deletions
diff --git a/format/commitgraph/TODO b/format/commitgraph/TODO
new file mode 100644
index 00000000..87e0888d
--- /dev/null
+++ b/format/commitgraph/TODO
@@ -0,0 +1,6 @@
+Paranoia mode
+Split commit-graph chain with mixed generation and bloom setting
+Separate chunk parsing layer
+Config stuff
+
+Writing
diff --git a/format/commitgraph/bloom/bloom.go b/format/commitgraph/bloom/bloom.go
new file mode 100644
index 00000000..9653d595
--- /dev/null
+++ b/format/commitgraph/bloom/bloom.go
@@ -0,0 +1,3 @@
+// Package bloom provides a bloom filter implementation used for changed-path
+// filters in Git commit graphs.
+package bloom
diff --git a/format/commitgraph/bloom/constants.go b/format/commitgraph/bloom/constants.go
new file mode 100644
index 00000000..958e551e
--- /dev/null
+++ b/format/commitgraph/bloom/constants.go
@@ -0,0 +1,8 @@
+package bloom
+
+const (
+ // DataHeaderSize is the size of the BDAT header in commit-graph files.
+ DataHeaderSize = 3 * 4
+ // DefaultMaxChange matches Git's default max-changed-paths behavior.
+ DefaultMaxChange = 512
+)
diff --git a/format/commitgraph/bloom/contain.go b/format/commitgraph/bloom/contain.go
new file mode 100644
index 00000000..331b7687
--- /dev/null
+++ b/format/commitgraph/bloom/contain.go
@@ -0,0 +1,25 @@
+package bloom
+
+// MightContain reports whether the Bloom filter may contain the given path.
+//
+// Evaluated against the full path and each of its directory prefixes. A true
+// result indicates a possible match; false means the path definitely did not
+// change.
+func (f *Filter) MightContain(path []byte) (bool, error) {
+ if len(f.Data) == 0 {
+ return false, nil
+ }
+
+ keys, err := keyvec(path, f)
+ if err != nil {
+ return false, err
+ }
+
+ for i := range keys {
+ if filterContainsKey(f, keys[i]) {
+ return true, nil
+ }
+ }
+
+ return false, nil
+}
diff --git a/format/commitgraph/bloom/errors.go b/format/commitgraph/bloom/errors.go
new file mode 100644
index 00000000..fe38d1bc
--- /dev/null
+++ b/format/commitgraph/bloom/errors.go
@@ -0,0 +1,5 @@
+package bloom
+
+import "errors"
+
+var ErrInvalid = errors.New("bloom: invalid data")
diff --git a/format/commitgraph/bloom/filter.go b/format/commitgraph/bloom/filter.go
new file mode 100644
index 00000000..395dd5ce
--- /dev/null
+++ b/format/commitgraph/bloom/filter.go
@@ -0,0 +1,26 @@
+package bloom
+
+// Filter represents a changed-paths Bloom filter associated with a commit.
+//
+// The filter encodes which paths changed between a commit and its first
+// parent. Paths are expected to be in Git's slash-separated form and
+// are queried using a path and its prefixes (e.g. "a/b/c", "a/b", "a").
+type Filter struct {
+ Data []byte
+
+ HashVersion uint32
+ NumHashes uint32
+ BitsPerEntry uint32
+ MaxChangePaths uint32
+}
+
+// NewFilter constructs one query-ready bloom filter from raw data/settings.
+func NewFilter(data []byte, settings Settings) Filter {
+ return Filter{
+ Data: data,
+ HashVersion: settings.HashVersion,
+ NumHashes: settings.NumHashes,
+ BitsPerEntry: settings.BitsPerEntry,
+ MaxChangePaths: settings.MaxChangePaths,
+ }
+}
diff --git a/format/commitgraph/bloom/key.go b/format/commitgraph/bloom/key.go
new file mode 100644
index 00000000..a15df904
--- /dev/null
+++ b/format/commitgraph/bloom/key.go
@@ -0,0 +1,117 @@
+package bloom
+
+import "codeberg.org/lindenii/furgit/internal/intconv"
+
+type key struct {
+ hashes []uint32
+}
+
+func keyvec(path []byte, filter *Filter) ([]key, error) {
+ if len(path) == 0 {
+ return nil, nil
+ }
+
+ count := 1
+
+ for _, b := range path {
+ if b == '/' {
+ count++
+ }
+ }
+
+ keys := make([]key, 0, count)
+
+ full, err := keyFill(path, filter)
+ if err != nil {
+ return nil, err
+ }
+
+ keys = append(keys, full)
+
+ for i := len(path) - 1; i >= 0; i-- {
+ if path[i] == '/' {
+ k, err := keyFill(path[:i], filter)
+ if err != nil {
+ return nil, err
+ }
+
+ keys = append(keys, k)
+ }
+ }
+
+ return keys, nil
+}
+
+func keyFill(path []byte, filter *Filter) (key, error) {
+ const (
+ seed0 = 0x293ae76f
+ seed1 = 0x7e646e2c
+ )
+
+ var (
+ h0 uint32
+ h1 uint32
+ err error
+ )
+
+ switch filter.HashVersion {
+ case 2:
+ h0, err = murmur3SeededV2(seed0, path)
+ if err != nil {
+ return key{}, err
+ }
+
+ h1, err = murmur3SeededV2(seed1, path)
+ if err != nil {
+ return key{}, err
+ }
+ case 1:
+ h0, err = murmur3SeededV1(seed0, path)
+ if err != nil {
+ return key{}, err
+ }
+
+ h1, err = murmur3SeededV1(seed1, path)
+ if err != nil {
+ return key{}, err
+ }
+ default:
+ return key{}, ErrInvalid
+ }
+
+ hashCount, err := intconv.Uint32ToInt(filter.NumHashes)
+ if err != nil {
+ return key{}, ErrInvalid
+ }
+
+ hashes := make([]uint32, hashCount)
+ for i := range hashCount {
+ iU32, err := intconv.IntToUint32(i)
+ if err != nil {
+ return key{}, ErrInvalid
+ }
+
+ hashes[i] = h0 + iU32*h1
+ }
+
+ return key{hashes: hashes}, nil
+}
+
+func filterContainsKey(filter *Filter, key key) bool {
+ if len(filter.Data) == 0 {
+ return false
+ }
+
+ mod := uint64(len(filter.Data)) * 8
+ for _, h := range key.hashes {
+ idx := uint64(h) % mod
+ bytePos := idx / 8
+
+ bit := byte(1 << (idx & 7))
+ if filter.Data[bytePos]&bit == 0 {
+ return false
+ }
+ }
+
+ return true
+}
diff --git a/format/commitgraph/bloom/murmur.go b/format/commitgraph/bloom/murmur.go
new file mode 100644
index 00000000..363b63ae
--- /dev/null
+++ b/format/commitgraph/bloom/murmur.go
@@ -0,0 +1,127 @@
+package bloom
+
+import "codeberg.org/lindenii/furgit/internal/intconv"
+
+func murmur3SeededV2(seed uint32, data []byte) (uint32, error) {
+ const (
+ c1 = 0xcc9e2d51
+ c2 = 0x1b873593
+ r1 = 15
+ r2 = 13
+ m = 5
+ n = 0xe6546b64
+ )
+
+ h := seed
+
+ nblocks := len(data) / 4
+ for i := range nblocks {
+ k := uint32(data[4*i]) |
+ (uint32(data[4*i+1]) << 8) |
+ (uint32(data[4*i+2]) << 16) |
+ (uint32(data[4*i+3]) << 24)
+ k *= c1
+ k = (k << r1) | (k >> (32 - r1))
+ k *= c2
+
+ h ^= k
+ h = (h << r2) | (h >> (32 - r2))
+ h = h*m + n
+ }
+
+ var k1 uint32
+
+ tail := data[nblocks*4:]
+ switch len(tail) & 3 {
+ case 3:
+ k1 ^= uint32(tail[2]) << 16
+
+ fallthrough
+ case 2:
+ k1 ^= uint32(tail[1]) << 8
+
+ fallthrough
+ case 1:
+ k1 ^= uint32(tail[0])
+ k1 *= c1
+ k1 = (k1 << r1) | (k1 >> (32 - r1))
+ k1 *= c2
+ h ^= k1
+ }
+
+ dataLen, err := intconv.IntToUint32(len(data))
+ if err != nil {
+ return 0, err
+ }
+
+ h ^= dataLen
+ h ^= h >> 16
+ h *= 0x85ebca6b
+ h ^= h >> 13
+ h *= 0xc2b2ae35
+ h ^= h >> 16
+
+ return h, nil
+}
+
+func murmur3SeededV1(seed uint32, data []byte) (uint32, error) {
+ const (
+ c1 = 0xcc9e2d51
+ c2 = 0x1b873593
+ r1 = 15
+ r2 = 13
+ m = 5
+ n = 0xe6546b64
+ )
+
+ h := seed
+
+ nblocks := len(data) / 4
+ for i := range nblocks {
+ k := intconv.SignExtendByteToUint32(data[4*i]) |
+ (intconv.SignExtendByteToUint32(data[4*i+1]) << 8) |
+ (intconv.SignExtendByteToUint32(data[4*i+2]) << 16) |
+ (intconv.SignExtendByteToUint32(data[4*i+3]) << 24)
+ k *= c1
+ k = (k << r1) | (k >> (32 - r1))
+ k *= c2
+
+ h ^= k
+ h = (h << r2) | (h >> (32 - r2))
+ h = h*m + n
+ }
+
+ var k1 uint32
+
+ tail := data[nblocks*4:]
+ switch len(tail) & 3 {
+ case 3:
+ k1 ^= intconv.SignExtendByteToUint32(tail[2]) << 16
+
+ fallthrough
+ case 2:
+ k1 ^= intconv.SignExtendByteToUint32(tail[1]) << 8
+
+ fallthrough
+ case 1:
+ k1 ^= intconv.SignExtendByteToUint32(tail[0])
+ k1 *= c1
+ k1 = (k1 << r1) | (k1 >> (32 - r1))
+ k1 *= c2
+ h ^= k1
+ }
+
+ dataLen, err := intconv.IntToUint32(len(data))
+ if err != nil {
+ return 0, err
+ }
+
+ h ^= dataLen
+ h ^= h >> 16
+ h *= 0x85ebca6b
+ h ^= h >> 13
+ h *= 0xc2b2ae35
+ h ^= h >> 16
+
+ return h, nil
+}
diff --git a/format/commitgraph/bloom/settings.go b/format/commitgraph/bloom/settings.go
new file mode 100644
index 00000000..764653bd
--- /dev/null
+++ b/format/commitgraph/bloom/settings.go
@@ -0,0 +1,50 @@
+package bloom
+
+import (
+ "encoding/binary"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+)
+
+// Settings describe the changed-paths Bloom filter parameters stored in
+// commit-graph BDAT chunks.
+//
+// Obviously, they must match the repository's commit-graph settings to
+// interpret filters correctly.
+type Settings struct {
+ HashVersion uint32
+ NumHashes uint32
+ BitsPerEntry uint32
+ MaxChangePaths uint32
+}
+
+// ParseSettings reads Bloom filter settings from a BDAT chunk header.
+func ParseSettings(bdat []byte) (*Settings, error) {
+ if len(bdat) < DataHeaderSize {
+ return nil, ErrInvalid
+ }
+
+ settings := &Settings{
+ HashVersion: binary.BigEndian.Uint32(bdat[0:4]),
+ NumHashes: binary.BigEndian.Uint32(bdat[4:8]),
+ BitsPerEntry: binary.BigEndian.Uint32(bdat[8:12]),
+ MaxChangePaths: DefaultMaxChange,
+ }
+
+ switch settings.HashVersion {
+ case 1, 2:
+ default:
+ return nil, ErrInvalid
+ }
+
+ if settings.NumHashes == 0 {
+ return nil, ErrInvalid
+ }
+
+ _, err := intconv.Uint32ToInt(settings.NumHashes)
+ if err != nil {
+ return nil, ErrInvalid
+ }
+
+ return settings, nil
+}
diff --git a/format/commitgraph/constants.go b/format/commitgraph/constants.go
new file mode 100644
index 00000000..3a06a290
--- /dev/null
+++ b/format/commitgraph/constants.go
@@ -0,0 +1,32 @@
+package commitgraph
+
+const (
+ FileSignature = 0x43475048 // "CGPH"
+ FileVersion = 1
+)
+
+const (
+ ChunkOIDF = 0x4f494446 // "OIDF"
+ ChunkOIDL = 0x4f49444c // "OIDL"
+ ChunkCDAT = 0x43444154 // "CDAT"
+ ChunkGDA2 = 0x47444132 // "GDA2"
+ ChunkGDO2 = 0x47444f32 // "GDO2"
+ ChunkEDGE = 0x45444745 // "EDGE"
+ ChunkBIDX = 0x42494458 // "BIDX"
+ ChunkBDAT = 0x42444154 // "BDAT"
+ ChunkBASE = 0x42415345 // "BASE"
+)
+
+const (
+ HeaderSize = 8
+ ChunkEntrySize = 12
+ FanoutSize = 256 * 4
+)
+
+const (
+ ParentNone = 0x70000000
+ ParentExtraMask = 0x80000000
+ ParentLastMask = 0x7fffffff
+
+ GenerationOverflow = 0x80000000
+)
diff --git a/format/commitgraph/doc.go b/format/commitgraph/doc.go
new file mode 100644
index 00000000..abf5f3d3
--- /dev/null
+++ b/format/commitgraph/doc.go
@@ -0,0 +1,2 @@
+// Package commitgraph provides constants and common utilities for handling commit graphs.
+package commitgraph
diff --git a/format/commitgraph/read/bloom.go b/format/commitgraph/read/bloom.go
new file mode 100644
index 00000000..12dd6db3
--- /dev/null
+++ b/format/commitgraph/read/bloom.go
@@ -0,0 +1,117 @@
+package read
+
+import (
+ "encoding/binary"
+
+ "codeberg.org/lindenii/furgit/format/commitgraph/bloom"
+ "codeberg.org/lindenii/furgit/internal/intconv"
+)
+
+// HasBloom reports whether any layer has changed-path Bloom data.
+func (reader *Reader) HasBloom() bool {
+ for i := range reader.layers {
+ layer := &reader.layers[i]
+ if layer.chunkBloomIndex != nil && layer.chunkBloomData != nil && layer.bloomSettings != nil {
+ return true
+ }
+ }
+
+ return false
+}
+
+// BloomVersion returns the changed-path Bloom hash version, or 0 if absent.
+func (reader *Reader) BloomVersion() uint8 {
+ for i := len(reader.layers) - 1; i >= 0; i-- {
+ layer := &reader.layers[i]
+ if layer.bloomSettings != nil {
+ version, err := intconv.Uint32ToUint8(layer.bloomSettings.HashVersion)
+ if err != nil {
+ return 0
+ }
+
+ return version
+ }
+ }
+
+ return 0
+}
+
+// BloomFilterAt returns one commit's changed-path Bloom filter.
+//
+// The returned filter borrows reader-owned mapped commit-graph data and is
+// only valid until the reader is closed.
+//
+// Returns BloomUnavailableError when this commit graph has no Bloom data.
+func (reader *Reader) BloomFilterAt(pos Position) (bloom.Filter, error) {
+ layer, err := reader.layerByPosition(pos)
+ if err != nil {
+ return bloom.Filter{}, err
+ }
+
+ if layer.chunkBloomIndex == nil || layer.chunkBloomData == nil || layer.bloomSettings == nil {
+ return bloom.Filter{}, &BloomUnavailableError{Pos: pos}
+ }
+
+ start, end, err := bloomRange(layer, pos.Index)
+ if err != nil {
+ return bloom.Filter{}, err
+ }
+
+ filter := bloom.NewFilter(
+ layer.chunkBloomData[bloom.DataHeaderSize+start:bloom.DataHeaderSize+end],
+ *layer.bloomSettings,
+ )
+
+ return filter, nil
+}
+
+func bloomRange(layer *layer, commitIndex uint32) (int, int, error) {
+ off64 := uint64(commitIndex) * 4
+
+ off, err := intconv.Uint64ToInt(off64)
+ if err != nil {
+ return 0, 0, err
+ }
+
+ end := binary.BigEndian.Uint32(layer.chunkBloomIndex[off : off+4])
+
+ var start uint32
+
+ if commitIndex > 0 {
+ prevOff64 := uint64(commitIndex-1) * 4
+
+ prevOff, err := intconv.Uint64ToInt(prevOff64)
+ if err != nil {
+ return 0, 0, err
+ }
+
+ start = binary.BigEndian.Uint32(layer.chunkBloomIndex[prevOff : prevOff+4])
+ }
+
+ if end < start {
+ return 0, 0, &MalformedError{Path: layer.path, Reason: "invalid BIDX range"}
+ }
+
+ bdatLen := len(layer.chunkBloomData) - bloom.DataHeaderSize
+
+ bdatLenU32, err := intconv.IntToUint32(bdatLen)
+ if err != nil {
+ return 0, 0, err
+ }
+
+ if end > bdatLenU32 {
+ return 0, 0, &MalformedError{Path: layer.path, Reason: "BIDX range out of BDAT bounds"}
+ }
+
+ startInt, err := intconv.Uint64ToInt(uint64(start))
+ if err != nil {
+ return 0, 0, err
+ }
+
+ endInt, err := intconv.Uint64ToInt(uint64(end))
+ if err != nil {
+ return 0, 0, err
+ }
+
+ return startInt, endInt, nil
+}
diff --git a/format/commitgraph/read/close.go b/format/commitgraph/read/close.go
new file mode 100644
index 00000000..f8b6141a
--- /dev/null
+++ b/format/commitgraph/read/close.go
@@ -0,0 +1,20 @@
+package read
+
+// Close releases all mapped commit-graph files.
+//
+// Repeated calls to Close are undefined behavior.
+func (reader *Reader) Close() error {
+ var closeErr error
+
+ for i := len(reader.layers) - 1; i >= 0; i-- {
+ err := reader.layers[i].close()
+ if err != nil && closeErr == nil {
+ closeErr = err
+ }
+ }
+
+ reader.layers = nil
+ reader.total = 0
+
+ return closeErr
+}
diff --git a/format/commitgraph/read/commitat.go b/format/commitgraph/read/commitat.go
new file mode 100644
index 00000000..a39c5ccd
--- /dev/null
+++ b/format/commitgraph/read/commitat.go
@@ -0,0 +1,85 @@
+package read
+
+import (
+ "encoding/binary"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+// CommitAt returns decoded commit-graph metadata at one position.
+func (reader *Reader) CommitAt(pos Position) (Commit, error) {
+ layer, err := reader.layerByPosition(pos)
+ if err != nil {
+ return Commit{}, err
+ }
+
+ hashSize := reader.algo.Size()
+ stride := hashSize + 16
+
+ strideU64, err := intconv.IntToUint64(stride)
+ if err != nil {
+ return Commit{}, err
+ }
+
+ start64 := uint64(pos.Index) * strideU64
+ end64 := start64 + strideU64
+
+ start, err := intconv.Uint64ToInt(start64)
+ if err != nil {
+ return Commit{}, err
+ }
+
+ end, err := intconv.Uint64ToInt(end64)
+ if err != nil {
+ return Commit{}, err
+ }
+
+ record := layer.chunkCommit[start:end]
+
+ treeOID, err := objectid.FromBytes(reader.algo, record[:hashSize])
+ if err != nil {
+ return Commit{}, err
+ }
+
+ oid, err := reader.OIDAt(pos)
+ if err != nil {
+ return Commit{}, err
+ }
+
+ p1 := binary.BigEndian.Uint32(record[hashSize : hashSize+4])
+ p2 := binary.BigEndian.Uint32(record[hashSize+4 : hashSize+8])
+ genAndTimeHi := binary.BigEndian.Uint32(record[hashSize+8 : hashSize+12])
+ timeLow := binary.BigEndian.Uint32(record[hashSize+12 : hashSize+16])
+
+ timeHigh := uint64(genAndTimeHi & 0x3)
+ commitTimeU64 := (timeHigh << 32) | uint64(timeLow)
+
+ commitTime, err := intconv.Uint64ToInt64(commitTimeU64)
+ if err != nil {
+ return Commit{}, err
+ }
+
+ generationV1 := genAndTimeHi >> 2
+
+ generationV2, err := reader.readGenerationV2(layer, pos.Index, commitTimeU64)
+ if err != nil {
+ return Commit{}, err
+ }
+
+ parent1, parent2, extra, err := reader.decodeParents(layer, p1, p2)
+ if err != nil {
+ return Commit{}, err
+ }
+
+ return Commit{
+ OID: oid,
+ TreeOID: treeOID,
+ Parent1: parent1,
+ Parent2: parent2,
+ ExtraParents: extra,
+ CommitTimeUnix: commitTime,
+ GenerationV1: generationV1,
+ GenerationV2: generationV2,
+ }, nil
+}
diff --git a/format/commitgraph/read/commits.go b/format/commitgraph/read/commits.go
new file mode 100644
index 00000000..48984ecb
--- /dev/null
+++ b/format/commitgraph/read/commits.go
@@ -0,0 +1,20 @@
+package read
+
+import objectid "codeberg.org/lindenii/furgit/object/id"
+
+// Commit stores decoded commit-graph record data.
+type Commit struct {
+ OID objectid.ObjectID
+ TreeOID objectid.ObjectID
+ Parent1 ParentRef
+ Parent2 ParentRef
+ ExtraParents []Position
+ CommitTimeUnix int64
+ GenerationV1 uint32
+ GenerationV2 uint64
+}
+
+// NumCommits returns total commits across loaded layers.
+func (reader *Reader) NumCommits() uint32 {
+ return reader.total
+}
diff --git a/format/commitgraph/read/doc.go b/format/commitgraph/read/doc.go
new file mode 100644
index 00000000..573ddc19
--- /dev/null
+++ b/format/commitgraph/read/doc.go
@@ -0,0 +1,2 @@
+// Package read provides routines for reading commit graphs.
+package read
diff --git a/format/commitgraph/read/edges.go b/format/commitgraph/read/edges.go
new file mode 100644
index 00000000..96ffeb6d
--- /dev/null
+++ b/format/commitgraph/read/edges.go
@@ -0,0 +1,48 @@
+package read
+
+import (
+ "encoding/binary"
+
+ "codeberg.org/lindenii/furgit/format/commitgraph"
+ "codeberg.org/lindenii/furgit/internal/intconv"
+)
+
+func (reader *Reader) decodeExtraEdgeList(layer *layer, edgeStart uint32) ([]Position, error) {
+ if len(layer.chunkExtraEdges) == 0 {
+ return nil, &MalformedError{Path: layer.path, Reason: "missing EDGE chunk"}
+ }
+
+ out := make([]Position, 0)
+
+ cur := edgeStart
+ for {
+ off64 := uint64(cur) * 4
+
+ off, err := intconv.Uint64ToInt(off64)
+ if err != nil {
+ return nil, err
+ }
+
+ if off+4 > len(layer.chunkExtraEdges) {
+ return nil, &MalformedError{Path: layer.path, Reason: "EDGE index out of range"}
+ }
+
+ word := binary.BigEndian.Uint32(layer.chunkExtraEdges[off : off+4])
+ parentGlobal := word & commitgraph.ParentLastMask
+
+ parentPos, err := reader.globalToPosition(parentGlobal)
+ if err != nil {
+ return nil, err
+ }
+
+ out = append(out, parentPos)
+
+ if word&commitgraph.ParentExtraMask != 0 {
+ break
+ }
+
+ cur++
+ }
+
+ return out, nil
+}
diff --git a/format/commitgraph/read/errors.go b/format/commitgraph/read/errors.go
new file mode 100644
index 00000000..0a32a368
--- /dev/null
+++ b/format/commitgraph/read/errors.go
@@ -0,0 +1,58 @@
+package read
+
+import (
+ "fmt"
+
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+// NotFoundError reports a missing commit graph entry by object ID.
+type NotFoundError struct {
+ OID objectid.ObjectID
+}
+
+// Error implements error.
+func (err *NotFoundError) Error() string {
+ return fmt.Sprintf("commitgraph: object not found: %s", err.OID)
+}
+
+// PositionOutOfRangeError reports an invalid graph position.
+type PositionOutOfRangeError struct {
+ Pos Position
+}
+
+// Error implements error.
+func (err *PositionOutOfRangeError) Error() string {
+ return fmt.Sprintf("commitgraph: position out of range: graph=%d index=%d", err.Pos.Graph, err.Pos.Index)
+}
+
+// MalformedError reports malformed commit-graph data.
+type MalformedError struct {
+ Path string
+ Reason string
+}
+
+// Error implements error.
+func (err *MalformedError) Error() string {
+ return fmt.Sprintf("commitgraph: malformed %q: %s", err.Path, err.Reason)
+}
+
+// UnsupportedVersionError reports unsupported commit-graph version.
+type UnsupportedVersionError struct {
+ Version uint8
+}
+
+// Error implements error.
+func (err *UnsupportedVersionError) Error() string {
+ return fmt.Sprintf("commitgraph: unsupported version %d", err.Version)
+}
+
+// BloomUnavailableError reports missing changed-path bloom data at one position.
+type BloomUnavailableError struct {
+ Pos Position
+}
+
+// Error implements error.
+func (err *BloomUnavailableError) Error() string {
+ return fmt.Sprintf("commitgraph: bloom unavailable at position graph=%d index=%d", err.Pos.Graph, err.Pos.Index)
+}
diff --git a/format/commitgraph/read/generation.go b/format/commitgraph/read/generation.go
new file mode 100644
index 00000000..62e47996
--- /dev/null
+++ b/format/commitgraph/read/generation.go
@@ -0,0 +1,43 @@
+package read
+
+import (
+ "encoding/binary"
+
+ "codeberg.org/lindenii/furgit/format/commitgraph"
+ "codeberg.org/lindenii/furgit/internal/intconv"
+)
+
+func (reader *Reader) readGenerationV2(layer *layer, index uint32, commitTime uint64) (uint64, error) {
+ if len(layer.chunkGeneration) == 0 {
+ return 0, nil
+ }
+
+ off64 := uint64(index) * 4
+
+ off, err := intconv.Uint64ToInt(off64)
+ if err != nil {
+ return 0, err
+ }
+
+ value := binary.BigEndian.Uint32(layer.chunkGeneration[off : off+4])
+
+ if value&commitgraph.GenerationOverflow == 0 {
+ return commitTime + uint64(value), nil
+ }
+
+ gdo2Index := value ^ commitgraph.GenerationOverflow
+ gdo2Off64 := uint64(gdo2Index) * 8
+
+ gdo2Off, err := intconv.Uint64ToInt(gdo2Off64)
+ if err != nil {
+ return 0, err
+ }
+
+ if gdo2Off+8 > len(layer.chunkGenerationOv) {
+ return 0, &MalformedError{Path: layer.path, Reason: "GDO2 index out of range"}
+ }
+
+ overflow := binary.BigEndian.Uint64(layer.chunkGenerationOv[gdo2Off : gdo2Off+8])
+
+ return commitTime + overflow, nil
+}
diff --git a/format/commitgraph/read/hash.go b/format/commitgraph/read/hash.go
new file mode 100644
index 00000000..3a525afe
--- /dev/null
+++ b/format/commitgraph/read/hash.go
@@ -0,0 +1,79 @@
+package read
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+// HashVersion returns the commit-graph hash version.
+func (reader *Reader) HashVersion() uint8 {
+ return reader.hashVersion
+}
+
+func validateChainBaseHashes(algo objectid.Algorithm, chain []string, idx int, graph *layer) error {
+ if idx == 0 {
+ if len(graph.chunkBaseGraphs) != 0 {
+ return &MalformedError{Path: graph.path, Reason: "unexpected BASE chunk in first graph"}
+ }
+
+ return nil
+ }
+
+ hashSize := algo.Size()
+
+ expectedLen := idx * hashSize
+ if len(graph.chunkBaseGraphs) != expectedLen {
+ return &MalformedError{
+ Path: graph.path,
+ Reason: fmt.Sprintf("BASE chunk length %d does not match expected %d", len(graph.chunkBaseGraphs), expectedLen),
+ }
+ }
+
+ for i := range idx {
+ start := i * hashSize
+ end := start + hashSize
+
+ baseHash, err := objectid.FromBytes(algo, graph.chunkBaseGraphs[start:end])
+ if err != nil {
+ return err
+ }
+
+ if baseHash.String() != chain[i] {
+ return &MalformedError{
+ Path: graph.path,
+ Reason: fmt.Sprintf("BASE chunk mismatch at index %d", i),
+ }
+ }
+ }
+
+ return nil
+}
+
+func verifyTrailerHash(data []byte, algo objectid.Algorithm, path string) error {
+ hashSize := algo.Size()
+ if len(data) < hashSize {
+ return &MalformedError{Path: path, Reason: "file too short for trailer"}
+ }
+
+ hashImpl, err := algo.New()
+ if err != nil {
+ return err
+ }
+
+ _, err = io.Copy(hashImpl, bytes.NewReader(data[:len(data)-hashSize]))
+ if err != nil {
+ return err
+ }
+
+ got := hashImpl.Sum(nil)
+
+ want := data[len(data)-hashSize:]
+ if !bytes.Equal(got, want) {
+ return &MalformedError{Path: path, Reason: "trailer hash mismatch"}
+ }
+
+ return nil
+}
diff --git a/format/commitgraph/read/iterators.go b/format/commitgraph/read/iterators.go
new file mode 100644
index 00000000..85c56ff1
--- /dev/null
+++ b/format/commitgraph/read/iterators.go
@@ -0,0 +1,45 @@
+package read
+
+import (
+ "iter"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+// AllPositions iterates all commit positions in native layer order.
+func (reader *Reader) AllPositions() iter.Seq[Position] {
+ return func(yield func(Position) bool) {
+ for layerIdx := range reader.layers {
+ layer := &reader.layers[layerIdx]
+
+ graph, err := intconv.IntToUint32(layerIdx)
+ if err != nil {
+ return
+ }
+
+ for idx := range layer.numCommits {
+ if !yield(Position{Graph: graph, Index: idx}) {
+ return
+ }
+ }
+ }
+ }
+}
+
+// AllOIDs iterates all commit object IDs in native layer order.
+func (reader *Reader) AllOIDs() iter.Seq[objectid.ObjectID] {
+ return func(yield func(objectid.ObjectID) bool) {
+ positions := reader.AllPositions()
+ for pos := range positions {
+ oid, err := reader.OIDAt(pos)
+ if err != nil {
+ return
+ }
+
+ if !yield(oid) {
+ return
+ }
+ }
+ }
+}
diff --git a/format/commitgraph/read/layer.go b/format/commitgraph/read/layer.go
new file mode 100644
index 00000000..53ab1663
--- /dev/null
+++ b/format/commitgraph/read/layer.go
@@ -0,0 +1,28 @@
+package read
+
+import (
+ "os"
+
+ "codeberg.org/lindenii/furgit/format/commitgraph/bloom"
+)
+
+type layer struct {
+ path string
+ file *os.File
+ data []byte
+ numCommits uint32
+ baseCount uint32
+ globalFrom uint32
+
+ chunkOIDFanout []byte
+ chunkOIDLookup []byte
+ chunkCommit []byte
+ chunkGeneration []byte
+ chunkGenerationOv []byte
+ chunkExtraEdges []byte
+ chunkBloomIndex []byte
+ chunkBloomData []byte
+ chunkBaseGraphs []byte
+
+ bloomSettings *bloom.Settings
+}
diff --git a/format/commitgraph/read/layer_close.go b/format/commitgraph/read/layer_close.go
new file mode 100644
index 00000000..03dc91d5
--- /dev/null
+++ b/format/commitgraph/read/layer_close.go
@@ -0,0 +1,33 @@
+package read
+
+import "syscall"
+
+func closeLayers(layers []layer) {
+ for i := len(layers) - 1; i >= 0; i-- {
+ _ = layers[i].close()
+ }
+}
+
+func (layer *layer) close() error {
+ var closeErr error
+
+ if layer.data != nil {
+ err := syscall.Munmap(layer.data)
+ if err != nil {
+ closeErr = err
+ }
+
+ layer.data = nil
+ }
+
+ if layer.file != nil {
+ err := layer.file.Close()
+ if err != nil && closeErr == nil {
+ closeErr = err
+ }
+
+ layer.file = nil
+ }
+
+ return closeErr
+}
diff --git a/format/commitgraph/read/layer_lookup.go b/format/commitgraph/read/layer_lookup.go
new file mode 100644
index 00000000..84095788
--- /dev/null
+++ b/format/commitgraph/read/layer_lookup.go
@@ -0,0 +1,53 @@
+package read
+
+import (
+ "bytes"
+ "encoding/binary"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+func layerLookup(layer *layer, oid objectid.ObjectID) (uint32, bool) {
+ hashSize := oid.Size()
+ first := int(oid.RawBytes()[0])
+
+ var lo uint32
+ if first > 0 {
+ lo = binary.BigEndian.Uint32(layer.chunkOIDFanout[(first-1)*4 : first*4])
+ }
+
+ hi := binary.BigEndian.Uint32(layer.chunkOIDFanout[first*4 : (first+1)*4])
+ if hi == 0 || lo >= hi {
+ return 0, false
+ }
+
+ target := oid.RawBytes()
+ left := int(lo)
+
+ right := int(hi) - 1
+ for left <= right {
+ mid := left + (right-left)/2
+ start := mid * hashSize
+ end := start + hashSize
+
+ current := layer.chunkOIDLookup[start:end]
+
+ cmp := bytes.Compare(current, target)
+ switch {
+ case cmp == 0:
+ pos, err := intconv.IntToUint32(mid)
+ if err != nil {
+ return 0, false
+ }
+
+ return pos, true
+ case cmp < 0:
+ left = mid + 1
+ default:
+ right = mid - 1
+ }
+ }
+
+ return 0, false
+}
diff --git a/format/commitgraph/read/layer_open.go b/format/commitgraph/read/layer_open.go
new file mode 100644
index 00000000..21a97644
--- /dev/null
+++ b/format/commitgraph/read/layer_open.go
@@ -0,0 +1,81 @@
+package read
+
+import (
+ "os"
+ "syscall"
+
+ "codeberg.org/lindenii/furgit/format/commitgraph"
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+func openLayer(root *os.Root, relPath string, algo objectid.Algorithm) (*layer, error) {
+ file, err := root.Open(relPath)
+ if err != nil {
+ return nil, err
+ }
+
+ info, err := file.Stat()
+ if err != nil {
+ _ = file.Close()
+
+ return nil, err
+ }
+
+ size := info.Size()
+ if size < int64(commitgraph.HeaderSize+commitgraph.FanoutSize+algo.Size()) {
+ _ = file.Close()
+
+ return nil, &MalformedError{Path: relPath, Reason: "file too short"}
+ }
+
+ mapLen, err := intconv.Int64ToUint64(size)
+ if err != nil {
+ _ = file.Close()
+
+ return nil, err
+ }
+
+ mapLenInt, err := intconv.Uint64ToInt(mapLen)
+ if err != nil {
+ _ = file.Close()
+
+ return nil, err
+ }
+
+ fd, err := intconv.UintptrToInt(file.Fd())
+ if err != nil {
+ _ = file.Close()
+
+ return nil, err
+ }
+
+ data, err := syscall.Mmap(fd, 0, mapLenInt, syscall.PROT_READ, syscall.MAP_PRIVATE)
+ if err != nil {
+ _ = file.Close()
+
+ return nil, err
+ }
+
+ out := &layer{
+ path: relPath,
+ file: file,
+ data: data,
+ }
+
+ parseErr := parseLayer(out, algo)
+ if parseErr != nil {
+ _ = out.close()
+
+ return nil, parseErr
+ }
+
+ verifyErr := verifyTrailerHash(out.data, algo, relPath)
+ if verifyErr != nil {
+ _ = out.close()
+
+ return nil, verifyErr
+ }
+
+ return out, nil
+}
diff --git a/format/commitgraph/read/layer_parse.go b/format/commitgraph/read/layer_parse.go
new file mode 100644
index 00000000..13e36c0a
--- /dev/null
+++ b/format/commitgraph/read/layer_parse.go
@@ -0,0 +1,276 @@
+package read
+
+import (
+ "encoding/binary"
+
+ "codeberg.org/lindenii/furgit/format/commitgraph"
+ "codeberg.org/lindenii/furgit/format/commitgraph/bloom"
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+func parseLayer(layer *layer, algo objectid.Algorithm) error { //nolint:maintidx
+ if len(layer.data) < commitgraph.HeaderSize {
+ return &MalformedError{Path: layer.path, Reason: "file too short"}
+ }
+
+ header := layer.data[:commitgraph.HeaderSize]
+
+ signature := binary.BigEndian.Uint32(header[:4])
+ if signature != commitgraph.FileSignature {
+ return &MalformedError{Path: layer.path, Reason: "invalid signature"}
+ }
+
+ version := header[4]
+ if version != commitgraph.FileVersion {
+ return &UnsupportedVersionError{Version: version}
+ }
+
+ expectedHashVersion, err := intconv.Uint32ToUint8(algo.PackHashID())
+ if err != nil {
+ return err
+ }
+
+ hashVersion := header[5]
+ if hashVersion != expectedHashVersion {
+ return &MalformedError{Path: layer.path, Reason: "hash version does not match object format"}
+ }
+
+ numChunks := int(header[6])
+ baseCount := uint32(header[7])
+
+ tocLen := (numChunks + 1) * commitgraph.ChunkEntrySize
+ tocStart := commitgraph.HeaderSize
+
+ tocEnd := tocStart + tocLen
+ if tocEnd > len(layer.data) {
+ return &MalformedError{Path: layer.path, Reason: "truncated chunk table"}
+ }
+
+ type tocEntry struct {
+ id uint32
+ offset uint64
+ }
+
+ entries := make([]tocEntry, 0, numChunks+1)
+ for i := range numChunks + 1 {
+ entryOff := tocStart + i*commitgraph.ChunkEntrySize
+ entryData := layer.data[entryOff : entryOff+commitgraph.ChunkEntrySize]
+
+ entry := tocEntry{
+ id: binary.BigEndian.Uint32(entryData[:4]),
+ offset: binary.BigEndian.Uint64(entryData[4:]),
+ }
+ entries = append(entries, entry)
+ }
+
+ if entries[len(entries)-1].id != 0 {
+ return &MalformedError{Path: layer.path, Reason: "missing chunk table terminator"}
+ }
+
+ trailerStart := len(layer.data) - algo.Size()
+
+ chunks := make(map[uint32][]byte, numChunks)
+ for i := range numChunks {
+ entry := entries[i]
+ if entry.id == 0 {
+ return &MalformedError{Path: layer.path, Reason: "early chunk table terminator"}
+ }
+
+ next := entries[i+1]
+
+ start, err := intconv.Uint64ToInt(entry.offset)
+ if err != nil {
+ return err
+ }
+
+ end, err := intconv.Uint64ToInt(next.offset)
+ if err != nil {
+ return err
+ }
+
+ if start < tocEnd || end < start || end > trailerStart {
+ return &MalformedError{Path: layer.path, Reason: "invalid chunk offsets"}
+ }
+
+ if _, exists := chunks[entry.id]; exists {
+ return &MalformedError{Path: layer.path, Reason: "duplicate chunk id"}
+ }
+
+ chunks[entry.id] = layer.data[start:end]
+ }
+
+ oidf := chunks[commitgraph.ChunkOIDF]
+ if len(oidf) != commitgraph.FanoutSize {
+ return &MalformedError{Path: layer.path, Reason: "invalid OIDF length"}
+ }
+
+ layer.chunkOIDFanout = oidf
+ layer.numCommits = binary.BigEndian.Uint32(oidf[commitgraph.FanoutSize-4:])
+
+ for i := range 255 {
+ cur := binary.BigEndian.Uint32(oidf[i*4 : (i+1)*4])
+
+ next := binary.BigEndian.Uint32(oidf[(i+1)*4 : (i+2)*4])
+ if cur > next {
+ return &MalformedError{Path: layer.path, Reason: "non-monotonic OIDF fanout"}
+ }
+ }
+
+ hashSize := algo.Size()
+
+ hashSizeU64, err := intconv.IntToUint64(hashSize)
+ if err != nil {
+ return err
+ }
+
+ oidl := chunks[commitgraph.ChunkOIDL]
+ oidlWantLen64 := uint64(layer.numCommits) * hashSizeU64
+
+ oidlWantLen, err := intconv.Uint64ToInt(oidlWantLen64)
+ if err != nil {
+ return err
+ }
+
+ if len(oidl) != oidlWantLen {
+ return &MalformedError{Path: layer.path, Reason: "invalid OIDL length"}
+ }
+
+ layer.chunkOIDLookup = oidl
+
+ stride := hashSize + 16
+
+ strideU64, err := intconv.IntToUint64(stride)
+ if err != nil {
+ return err
+ }
+
+ cdat := chunks[commitgraph.ChunkCDAT]
+ cdatWantLen64 := uint64(layer.numCommits) * strideU64
+
+ cdatWantLen, err := intconv.Uint64ToInt(cdatWantLen64)
+ if err != nil {
+ return err
+ }
+
+ if len(cdat) != cdatWantLen {
+ return &MalformedError{Path: layer.path, Reason: "invalid CDAT length"}
+ }
+
+ layer.chunkCommit = cdat
+
+ gda2 := chunks[commitgraph.ChunkGDA2]
+ if len(gda2) != 0 {
+ wantLen64 := uint64(layer.numCommits) * 4
+
+ wantLen, err := intconv.Uint64ToInt(wantLen64)
+ if err != nil {
+ return err
+ }
+
+ if len(gda2) != wantLen {
+ return &MalformedError{Path: layer.path, Reason: "invalid GDA2 length"}
+ }
+
+ layer.chunkGeneration = gda2
+ }
+
+ gdo2 := chunks[commitgraph.ChunkGDO2]
+ if len(gdo2) != 0 {
+ if len(gdo2)%8 != 0 {
+ return &MalformedError{Path: layer.path, Reason: "invalid GDO2 length"}
+ }
+
+ layer.chunkGenerationOv = gdo2
+ }
+
+ edge := chunks[commitgraph.ChunkEDGE]
+ if len(edge) != 0 {
+ if len(edge)%4 != 0 {
+ return &MalformedError{Path: layer.path, Reason: "invalid EDGE length"}
+ }
+
+ layer.chunkExtraEdges = edge
+ }
+
+ base := chunks[commitgraph.ChunkBASE]
+ if baseCount == 0 {
+ if len(base) != 0 {
+ return &MalformedError{Path: layer.path, Reason: "unexpected BASE chunk"}
+ }
+ } else {
+ wantLen64 := uint64(baseCount) * hashSizeU64
+
+ wantLen, err := intconv.Uint64ToInt(wantLen64)
+ if err != nil {
+ return err
+ }
+
+ if len(base) != wantLen {
+ return &MalformedError{Path: layer.path, Reason: "invalid BASE length"}
+ }
+
+ layer.chunkBaseGraphs = base
+ }
+
+ layer.baseCount = baseCount
+
+ bidx := chunks[commitgraph.ChunkBIDX]
+
+ bdat := chunks[commitgraph.ChunkBDAT]
+ if len(bidx) != 0 || len(bdat) != 0 { //nolint:nestif
+ if len(bidx) == 0 || len(bdat) == 0 {
+ return &MalformedError{Path: layer.path, Reason: "BIDX/BDAT must both be present"}
+ }
+
+ bidxWantLen64 := uint64(layer.numCommits) * 4
+
+ bidxWantLen, err := intconv.Uint64ToInt(bidxWantLen64)
+ if err != nil {
+ return err
+ }
+
+ if len(bidx) != bidxWantLen {
+ return &MalformedError{Path: layer.path, Reason: "invalid BIDX length"}
+ }
+
+ if len(bdat) < bloom.DataHeaderSize {
+ return &MalformedError{Path: layer.path, Reason: "invalid BDAT length"}
+ }
+
+ settings, err := bloom.ParseSettings(bdat)
+ if err != nil {
+ return err
+ }
+
+ prev := uint32(0)
+
+ for i := range layer.numCommits {
+ off := int(i) * 4
+
+ cur := binary.BigEndian.Uint32(bidx[off : off+4])
+ if i > 0 && cur < prev {
+ return &MalformedError{Path: layer.path, Reason: "non-monotonic BIDX"}
+ }
+
+ bdatDataLen := len(bdat) - bloom.DataHeaderSize
+
+ bdatDataLenU32, err := intconv.IntToUint32(bdatDataLen)
+ if err != nil {
+ return err
+ }
+
+ if cur > bdatDataLenU32 {
+ return &MalformedError{Path: layer.path, Reason: "BIDX offset out of range"}
+ }
+
+ prev = cur
+ }
+
+ layer.chunkBloomIndex = bidx
+ layer.chunkBloomData = bdat
+ layer.bloomSettings = settings
+ }
+
+ return nil
+}
diff --git a/format/commitgraph/read/layer_pos.go b/format/commitgraph/read/layer_pos.go
new file mode 100644
index 00000000..7b87b381
--- /dev/null
+++ b/format/commitgraph/read/layer_pos.go
@@ -0,0 +1,21 @@
+package read
+
+import "codeberg.org/lindenii/furgit/internal/intconv"
+
+func (reader *Reader) layerByPosition(pos Position) (*layer, error) {
+ graphIdx, err := intconv.Uint64ToInt(uint64(pos.Graph))
+ if err != nil {
+ return nil, err
+ }
+
+ if graphIdx < 0 || graphIdx >= len(reader.layers) {
+ return nil, &PositionOutOfRangeError{Pos: pos}
+ }
+
+ layer := &reader.layers[graphIdx]
+ if pos.Index >= layer.numCommits {
+ return nil, &PositionOutOfRangeError{Pos: pos}
+ }
+
+ return layer, nil
+}
diff --git a/format/commitgraph/read/layerinfo.go b/format/commitgraph/read/layerinfo.go
new file mode 100644
index 00000000..83c4407d
--- /dev/null
+++ b/format/commitgraph/read/layerinfo.go
@@ -0,0 +1,23 @@
+package read
+
+// LayerInfo describes one loaded commit-graph layer.
+type LayerInfo struct {
+ Path string
+ BaseCount uint32
+ Commits uint32
+}
+
+// Layers returns loaded layer metadata in native chain order.
+func (reader *Reader) Layers() []LayerInfo {
+ out := make([]LayerInfo, 0, len(reader.layers))
+ for i := range reader.layers {
+ layer := reader.layers[i]
+ out = append(out, LayerInfo{
+ Path: layer.path,
+ BaseCount: layer.baseCount,
+ Commits: layer.numCommits,
+ })
+ }
+
+ return out
+}
diff --git a/format/commitgraph/read/lookup.go b/format/commitgraph/read/lookup.go
new file mode 100644
index 00000000..5f1b08f6
--- /dev/null
+++ b/format/commitgraph/read/lookup.go
@@ -0,0 +1,29 @@
+package read
+
+import (
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+// Lookup resolves one object ID to one graph position.
+func (reader *Reader) Lookup(oid objectid.ObjectID) (Position, error) {
+ if oid.Algorithm() != reader.algo {
+ return Position{}, &NotFoundError{OID: oid}
+ }
+
+ for layerIdx := len(reader.layers) - 1; layerIdx >= 0; layerIdx-- {
+ layer := &reader.layers[layerIdx]
+
+ found, ok := layerLookup(layer, oid)
+ if ok {
+ idxU32, err := intconv.IntToUint32(layerIdx)
+ if err != nil {
+ return Position{}, err
+ }
+
+ return Position{Graph: idxU32, Index: found}, nil
+ }
+ }
+
+ return Position{}, &NotFoundError{OID: oid}
+}
diff --git a/format/commitgraph/read/mode.go b/format/commitgraph/read/mode.go
new file mode 100644
index 00000000..76afa21f
--- /dev/null
+++ b/format/commitgraph/read/mode.go
@@ -0,0 +1,11 @@
+package read
+
+// OpenMode controls which commit-graph layout Open loads.
+type OpenMode uint8
+
+const (
+ // OpenSingle opens one commit-graph file at info/commit-graph.
+ OpenSingle OpenMode = iota
+ // OpenChain opens chained commit-graphs from info/commit-graphs.
+ OpenChain
+)
diff --git a/format/commitgraph/read/oidat.go b/format/commitgraph/read/oidat.go
new file mode 100644
index 00000000..908cbc1c
--- /dev/null
+++ b/format/commitgraph/read/oidat.go
@@ -0,0 +1,36 @@
+package read
+
+import (
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+// OIDAt returns object ID at one position.
+func (reader *Reader) OIDAt(pos Position) (objectid.ObjectID, error) {
+ layer, err := reader.layerByPosition(pos)
+ if err != nil {
+ return objectid.ObjectID{}, err
+ }
+
+ hashSize := reader.algo.Size()
+
+ hashSizeU64, err := intconv.IntToUint64(hashSize)
+ if err != nil {
+ return objectid.ObjectID{}, err
+ }
+
+ start64 := uint64(pos.Index) * hashSizeU64
+ end64 := start64 + hashSizeU64
+
+ start, err := intconv.Uint64ToInt(start64)
+ if err != nil {
+ return objectid.ObjectID{}, err
+ }
+
+ end, err := intconv.Uint64ToInt(end64)
+ if err != nil {
+ return objectid.ObjectID{}, err
+ }
+
+ return objectid.FromBytes(reader.algo, layer.chunkOIDLookup[start:end])
+}
diff --git a/format/commitgraph/read/open.go b/format/commitgraph/read/open.go
new file mode 100644
index 00000000..9c708b49
--- /dev/null
+++ b/format/commitgraph/read/open.go
@@ -0,0 +1,26 @@
+package read
+
+import (
+ "fmt"
+ "os"
+
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+// Open opens commit-graph data from one objects root.
+//
+// Open borrows root during construction and does not close it.
+func Open(root *os.Root, algo objectid.Algorithm, mode OpenMode) (*Reader, error) {
+ if algo.Size() == 0 {
+ return nil, objectid.ErrInvalidAlgorithm
+ }
+
+ switch mode {
+ case OpenSingle:
+ return openSingle(root, algo)
+ case OpenChain:
+ return openChain(root, algo)
+ default:
+ return nil, fmt.Errorf("commitgraph: invalid open mode %d", mode)
+ }
+}
diff --git a/format/commitgraph/read/open_chain.go b/format/commitgraph/read/open_chain.go
new file mode 100644
index 00000000..b55f3e57
--- /dev/null
+++ b/format/commitgraph/read/open_chain.go
@@ -0,0 +1,133 @@
+package read
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "os"
+ "strings"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+func openChain(root *os.Root, algo objectid.Algorithm) (*Reader, error) {
+ chainPath := "info/commit-graphs/commit-graph-chain"
+
+ file, err := root.Open(chainPath)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ return nil, &MalformedError{Path: chainPath, Reason: "missing commit-graph-chain"}
+ }
+
+ return nil, err
+ }
+
+ scanner := bufio.NewScanner(file)
+ hashes := make([]string, 0)
+
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if line == "" {
+ continue
+ }
+
+ hashes = append(hashes, line)
+ }
+
+ scanErr := scanner.Err()
+ closeErr := file.Close()
+
+ if scanErr != nil {
+ return nil, scanErr
+ }
+
+ if closeErr != nil {
+ return nil, closeErr
+ }
+
+ if len(hashes) == 0 {
+ return nil, &MalformedError{Path: chainPath, Reason: "empty chain"}
+ }
+
+ layers := make([]layer, 0, len(hashes))
+
+ var total uint32
+
+ hashVersion, err := intconv.Uint32ToUint8(algo.PackHashID())
+ if err != nil {
+ return nil, err
+ }
+
+ for i, hashHex := range hashes {
+ expectedBaseCount, err := intconv.IntToUint32(i)
+ if err != nil {
+ closeLayers(layers)
+
+ return nil, err
+ }
+
+ if len(hashHex) != algo.HexLen() {
+ closeLayers(layers)
+
+ return nil, &MalformedError{
+ Path: chainPath,
+ Reason: fmt.Sprintf("invalid graph hash length at line %d", i+1),
+ }
+ }
+
+ relPath := fmt.Sprintf("info/commit-graphs/graph-%s.graph", hashHex)
+
+ loaded, loadErr := openLayer(root, relPath, algo)
+ if loadErr != nil {
+ closeLayers(layers)
+
+ return nil, loadErr
+ }
+
+ if loaded.baseCount != expectedBaseCount {
+ _ = loaded.close()
+
+ closeLayers(layers)
+
+ return nil, &MalformedError{
+ Path: relPath,
+ Reason: fmt.Sprintf("BASE count %d does not match chain depth %d", loaded.baseCount, i),
+ }
+ }
+
+ validateErr := validateChainBaseHashes(algo, hashes, i, loaded)
+ if validateErr != nil {
+ _ = loaded.close()
+
+ closeLayers(layers)
+
+ return nil, validateErr
+ }
+
+ loaded.globalFrom = total
+ loaded.baseCount = expectedBaseCount
+
+ totalNext := total + loaded.numCommits
+ if totalNext < total {
+ _ = loaded.close()
+
+ closeLayers(layers)
+
+ return nil, &MalformedError{Path: relPath, Reason: "total commit count overflow"}
+ }
+
+ total = totalNext
+
+ layers = append(layers, *loaded)
+ }
+
+ out := &Reader{
+ algo: algo,
+ hashVersion: hashVersion,
+ layers: layers,
+ total: total,
+ }
+
+ return out, nil
+}
diff --git a/format/commitgraph/read/open_single.go b/format/commitgraph/read/open_single.go
new file mode 100644
index 00000000..9ad6607f
--- /dev/null
+++ b/format/commitgraph/read/open_single.go
@@ -0,0 +1,32 @@
+package read
+
+import (
+ "os"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+func openSingle(root *os.Root, algo objectid.Algorithm) (*Reader, error) {
+ graph, err := openLayer(root, "info/commit-graph", algo)
+ if err != nil {
+ return nil, err
+ }
+
+ graph.baseCount = 0
+ graph.globalFrom = 0
+
+ hashVersion, err := intconv.Uint32ToUint8(algo.PackHashID())
+ if err != nil {
+ return nil, err
+ }
+
+ out := &Reader{
+ algo: algo,
+ hashVersion: hashVersion,
+ layers: []layer{*graph},
+ total: graph.numCommits,
+ }
+
+ return out, nil
+}
diff --git a/format/commitgraph/read/parents.go b/format/commitgraph/read/parents.go
new file mode 100644
index 00000000..fcaad8b6
--- /dev/null
+++ b/format/commitgraph/read/parents.go
@@ -0,0 +1,67 @@
+package read
+
+import "codeberg.org/lindenii/furgit/format/commitgraph"
+
+// ParentRef references one parent position.
+type ParentRef struct {
+ Valid bool
+ Pos Position
+}
+
+func (reader *Reader) decodeParents(layer *layer, p1, p2 uint32) (ParentRef, ParentRef, []Position, error) {
+ parent1, err := reader.decodeSingleParent(p1)
+ if err != nil {
+ return ParentRef{}, ParentRef{}, nil, err
+ }
+
+ if p2 == commitgraph.ParentNone {
+ return parent1, ParentRef{}, nil, nil
+ }
+
+ if p2&commitgraph.ParentExtraMask == 0 {
+ parent2, err := reader.decodeSingleParent(p2)
+ if err != nil {
+ return ParentRef{}, ParentRef{}, nil, err
+ }
+
+ return parent1, parent2, nil, nil
+ }
+
+ edgeStart := p2 & commitgraph.ParentLastMask
+
+ parents, err := reader.decodeExtraEdgeList(layer, edgeStart)
+ if err != nil {
+ return ParentRef{}, ParentRef{}, nil, err
+ }
+
+ if len(parents) == 0 {
+ return ParentRef{}, ParentRef{}, nil, &MalformedError{Path: layer.path, Reason: "empty EDGE list"}
+ }
+
+ parent2 := ParentRef{Valid: true, Pos: parents[0]}
+ if len(parents) == 1 {
+ return parent1, parent2, nil, nil
+ }
+
+ return parent1, parent2, parents[1:], nil
+}
+
+func (reader *Reader) decodeSingleParent(raw uint32) (ParentRef, error) {
+ if raw == commitgraph.ParentNone {
+ return ParentRef{}, nil
+ }
+
+ if raw&commitgraph.ParentExtraMask != 0 {
+ return ParentRef{}, &MalformedError{
+ Path: "commit-graph",
+ Reason: "unexpected EDGE marker in single-parent slot",
+ }
+ }
+
+ pos, err := reader.globalToPosition(raw)
+ if err != nil {
+ return ParentRef{}, err
+ }
+
+ return ParentRef{Valid: true, Pos: pos}, nil
+}
diff --git a/format/commitgraph/read/position.go b/format/commitgraph/read/position.go
new file mode 100644
index 00000000..b2e1138b
--- /dev/null
+++ b/format/commitgraph/read/position.go
@@ -0,0 +1,38 @@
+package read
+
+import (
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+)
+
+// Position identifies one commit record by layer and row index.
+type Position struct {
+ Graph uint32
+ Index uint32
+}
+
+func (reader *Reader) globalToPosition(global uint32) (Position, error) {
+ for i := range reader.layers {
+ layer := &reader.layers[i]
+ from := layer.globalFrom
+
+ to := from + layer.numCommits
+ if global >= from && global < to {
+ graph, err := intconv.IntToUint32(i)
+ if err != nil {
+ return Position{}, err
+ }
+
+ return Position{
+ Graph: graph,
+ Index: global - from,
+ }, nil
+ }
+ }
+
+ return Position{}, &MalformedError{
+ Path: "commit-graph",
+ Reason: fmt.Sprintf("parent global position out of range: %d", global),
+ }
+}
diff --git a/format/commitgraph/read/read_test.go b/format/commitgraph/read/read_test.go
new file mode 100644
index 00000000..c65b183e
--- /dev/null
+++ b/format/commitgraph/read/read_test.go
@@ -0,0 +1,322 @@
+package read_test
+
+import (
+ "errors"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "testing"
+
+ "codeberg.org/lindenii/furgit/format/commitgraph/bloom"
+ "codeberg.org/lindenii/furgit/format/commitgraph/read"
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ "codeberg.org/lindenii/furgit/internal/testgit"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+func fixtureRepoPath(t *testing.T, algo objectid.Algorithm, name string) string {
+ t.Helper()
+
+ return filepath.Join("testdata", "fixtures", algo.String(), name, "repo.git")
+}
+
+func fixtureRepo(t *testing.T, algo objectid.Algorithm, name string) *testgit.TestRepo {
+ t.Helper()
+
+ return testgit.NewRepoFromFixture(t, algo, fixtureRepoPath(t, algo, name))
+}
+
+func TestReadSingleMatchesGit(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ testRepo := fixtureRepo(t, algo, "single_changed")
+
+ reader := openReader(t, testRepo, read.OpenSingle)
+
+ defer func() { _ = reader.Close() }()
+
+ allIDs := testRepo.RevList(t, "--all")
+ if len(allIDs) == 0 {
+ t.Fatal("git rev-list --all returned no commits")
+ }
+
+ wantCommitCount, err := intconv.IntToUint32(len(allIDs))
+ if err != nil {
+ t.Fatalf("len(allIDs) convert: %v", err)
+ }
+
+ if got := reader.NumCommits(); got != wantCommitCount {
+ t.Fatalf("NumCommits() = %d, want %d", got, len(allIDs))
+ }
+
+ if !reader.HasBloom() {
+ t.Fatal("HasBloom() = false, want true")
+ }
+
+ bloomVersion := reader.BloomVersion()
+ if bloomVersion == 0 {
+ t.Fatal("BloomVersion() = 0, want non-zero when HasBloom() is true")
+ }
+
+ for _, id := range allIDs {
+ pos, err := reader.Lookup(id)
+ if err != nil {
+ t.Fatalf("Lookup(%s): %v", id, err)
+ }
+
+ gotID, err := reader.OIDAt(pos)
+ if err != nil {
+ t.Fatalf("OIDAt(%+v): %v", pos, err)
+ }
+
+ if gotID != id {
+ t.Fatalf("OIDAt(Lookup(%s)) = %s, want %s", id, gotID, id)
+ }
+ }
+
+ step := max(len(allIDs)/24, 1)
+
+ for i, id := range allIDs {
+ if i%step != 0 && i != len(allIDs)-1 {
+ continue
+ }
+
+ verifyCommitAgainstGit(t, testRepo, reader, id)
+ }
+ })
+}
+
+func TestReadChainMatchesGit(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ testRepo := fixtureRepo(t, algo, "chain_changed")
+
+ reader := openReader(t, testRepo, read.OpenChain)
+
+ defer func() { _ = reader.Close() }()
+
+ layers := reader.Layers()
+ if len(layers) < 2 {
+ t.Fatalf("Layers len = %d, want >= 2", len(layers))
+ }
+
+ allIDs := testRepo.RevList(t, "--all")
+
+ wantCommitCount, err := intconv.IntToUint32(len(allIDs))
+ if err != nil {
+ t.Fatalf("len(allIDs) convert: %v", err)
+ }
+
+ if got := reader.NumCommits(); got != wantCommitCount {
+ t.Fatalf("NumCommits() = %d, want %d", got, len(allIDs))
+ }
+
+ step := max(len(allIDs)/20, 1)
+
+ for i, id := range allIDs {
+ pos, err := reader.Lookup(id)
+ if err != nil {
+ t.Fatalf("Lookup(%s): %v", id, err)
+ }
+
+ if i%step != 0 && i != len(allIDs)-1 {
+ continue
+ }
+
+ gotID, err := reader.OIDAt(pos)
+ if err != nil {
+ t.Fatalf("OIDAt(%+v): %v", pos, err)
+ }
+
+ if gotID != id {
+ t.Fatalf("OIDAt(Lookup(%s)) = %s, want %s", id, gotID, id)
+ }
+ }
+ })
+}
+
+func TestBloomUnavailableWithoutChangedPaths(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ testRepo := fixtureRepo(t, algo, "single_nochanged")
+
+ reader := openReader(t, testRepo, read.OpenSingle)
+
+ defer func() { _ = reader.Close() }()
+
+ head := testRepo.RevParse(t, "HEAD")
+
+ pos, err := reader.Lookup(head)
+ if err != nil {
+ t.Fatalf("Lookup(%s): %v", head, err)
+ }
+
+ _, err = reader.BloomFilterAt(pos)
+ if err == nil {
+ t.Fatal("BloomFilterAt() error = nil, want BloomUnavailableError")
+ }
+
+ unavailable, ok := errors.AsType[*read.BloomUnavailableError](err)
+ if !ok {
+ t.Fatalf("BloomFilterAt() error type = %T, want *BloomUnavailableError", err)
+ }
+
+ if unavailable.Pos != pos {
+ t.Fatalf("BloomUnavailableError.Pos = %+v, want %+v", unavailable.Pos, pos)
+ }
+ })
+}
+
+func openReader(tb testing.TB, testRepo *testgit.TestRepo, mode read.OpenMode) *read.Reader {
+ tb.Helper()
+
+ root := testRepo.OpenObjectsRoot(tb)
+
+ reader, err := read.Open(root, testRepo.Algorithm(), mode)
+ if err != nil {
+ tb.Fatalf("read.Open(objects): %v", err)
+ }
+
+ return reader
+}
+
+func verifyCommitAgainstGit(tb testing.TB, testRepo *testgit.TestRepo, reader *read.Reader, id objectid.ObjectID) {
+ tb.Helper()
+
+ pos, err := reader.Lookup(id)
+ if err != nil {
+ tb.Fatalf("Lookup(%s): %v", id, err)
+ }
+
+ commit, err := reader.CommitAt(pos)
+ if err != nil {
+ tb.Fatalf("CommitAt(%+v): %v", pos, err)
+ }
+
+ if commit.OID != id {
+ tb.Fatalf("CommitAt(%+v).OID = %s, want %s", pos, commit.OID, id)
+ }
+
+ treeHex := testRepo.Run(tb, "show", "-s", "--format=%T", id.String())
+
+ wantTree, err := objectid.ParseHex(testRepo.Algorithm(), treeHex)
+ if err != nil {
+ tb.Fatalf("parse tree id %q: %v", treeHex, err)
+ }
+
+ if commit.TreeOID != wantTree {
+ tb.Fatalf("CommitAt(%+v).TreeOID = %s, want %s", pos, commit.TreeOID, wantTree)
+ }
+
+ wantParents := parseOIDLine(tb, testRepo.Algorithm(), testRepo.Run(tb, "show", "-s", "--format=%P", id.String()))
+
+ gotParents := commitParents(tb, reader, commit)
+ if len(gotParents) != len(wantParents) {
+ tb.Fatalf("parent count for %s = %d, want %d", id, len(gotParents), len(wantParents))
+ }
+
+ for i := range gotParents {
+ if gotParents[i] != wantParents[i] {
+ tb.Fatalf("parent %d for %s = %s, want %s", i, id, gotParents[i], wantParents[i])
+ }
+ }
+
+ commitTimeRaw := testRepo.Run(tb, "show", "-s", "--format=%ct", id.String())
+
+ wantCommitTime, err := strconv.ParseInt(strings.TrimSpace(commitTimeRaw), 10, 64)
+ if err != nil {
+ tb.Fatalf("parse commit time %q: %v", commitTimeRaw, err)
+ }
+
+ if commit.CommitTimeUnix != wantCommitTime {
+ tb.Fatalf("CommitAt(%+v).CommitTimeUnix = %d, want %d", pos, commit.CommitTimeUnix, wantCommitTime)
+ }
+
+ filter, err := reader.BloomFilterAt(pos)
+ if err != nil {
+ tb.Fatalf("BloomFilterAt(%+v): %v", pos, err)
+ }
+
+ if filter.HashVersion != uint32(reader.BloomVersion()) {
+ tb.Fatalf("filter.HashVersion = %d, want %d", filter.HashVersion, reader.BloomVersion())
+ }
+
+ assertChangedPathsBloomPositive(tb, testRepo, filter, id)
+}
+
+func commitParents(tb testing.TB, reader *read.Reader, commit read.Commit) []objectid.ObjectID {
+ tb.Helper()
+
+ out := make([]objectid.ObjectID, 0, 2+len(commit.ExtraParents))
+
+ if commit.Parent1.Valid {
+ id, err := reader.OIDAt(commit.Parent1.Pos)
+ if err != nil {
+ tb.Fatalf("OIDAt(parent1 %+v): %v", commit.Parent1.Pos, err)
+ }
+
+ out = append(out, id)
+ }
+
+ if commit.Parent2.Valid {
+ id, err := reader.OIDAt(commit.Parent2.Pos)
+ if err != nil {
+ tb.Fatalf("OIDAt(parent2 %+v): %v", commit.Parent2.Pos, err)
+ }
+
+ out = append(out, id)
+ }
+
+ for _, parentPos := range commit.ExtraParents {
+ id, err := reader.OIDAt(parentPos)
+ if err != nil {
+ tb.Fatalf("OIDAt(extra parent %+v): %v", parentPos, err)
+ }
+
+ out = append(out, id)
+ }
+
+ return out
+}
+
+func assertChangedPathsBloomPositive(tb testing.TB, testRepo *testgit.TestRepo, filter bloom.Filter, commitID objectid.ObjectID) {
+ tb.Helper()
+
+ changedPaths := testRepo.Run(tb, "diff-tree", "--no-commit-id", "--name-only", "-r", "--root", commitID.String())
+ for line := range strings.SplitSeq(strings.TrimSpace(changedPaths), "\n") {
+ path := strings.TrimSpace(line)
+ if path == "" {
+ continue
+ }
+
+ mightContain, err := filter.MightContain([]byte(path))
+ if err != nil {
+ tb.Fatalf("MightContain(%q): %v", path, err)
+ }
+
+ if !mightContain {
+ tb.Fatalf("Bloom filter false negative for commit %s path %q", commitID, path)
+ }
+ }
+}
+
+func parseOIDLine(tb testing.TB, algo objectid.Algorithm, line string) []objectid.ObjectID {
+ tb.Helper()
+
+ toks := strings.Fields(line)
+
+ out := make([]objectid.ObjectID, 0, len(toks))
+ for _, tok := range toks {
+ id, err := objectid.ParseHex(algo, tok)
+ if err != nil {
+ tb.Fatalf("parse object id %q: %v", tok, err)
+ }
+
+ out = append(out, id)
+ }
+
+ return out
+}
diff --git a/format/commitgraph/read/reader.go b/format/commitgraph/read/reader.go
new file mode 100644
index 00000000..d5c84a70
--- /dev/null
+++ b/format/commitgraph/read/reader.go
@@ -0,0 +1,16 @@
+package read
+
+import objectid "codeberg.org/lindenii/furgit/object/id"
+
+// Reader provides read-only access to one mmap-backed commit-graph snapshot.
+//
+// It is safe for concurrent read-only queries.
+// Values returned by Reader methods are only valid until the reader is closed
+// when explicitly documented on that method.
+type Reader struct {
+ algo objectid.Algorithm
+ hashVersion uint8
+
+ layers []layer
+ total uint32
+}
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/HEAD b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/HEAD
new file mode 100644
index 00000000..cb089cd8
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/master
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/config b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/config
new file mode 100644
index 00000000..07d359d0
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/config
@@ -0,0 +1,4 @@
+[core]
+ repositoryformatversion = 0
+ filemode = true
+ bare = true
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/commit-graph-chain b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/commit-graph-chain
new file mode 100644
index 00000000..74c46b64
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/commit-graph-chain
@@ -0,0 +1,2 @@
+dd7578d5216ca76c25b19631ba90f7498aeabbe7
+bf985c21612a52070d8b008e6ef51edf8b609401
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/graph-bf985c21612a52070d8b008e6ef51edf8b609401.graph b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/graph-bf985c21612a52070d8b008e6ef51edf8b609401.graph
new file mode 100644
index 00000000..c31869c1
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/graph-bf985c21612a52070d8b008e6ef51edf8b609401.graph
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/graph-dd7578d5216ca76c25b19631ba90f7498aeabbe7.graph b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/graph-dd7578d5216ca76c25b19631ba90f7498aeabbe7.graph
new file mode 100644
index 00000000..241eb3cc
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/commit-graphs/graph-dd7578d5216ca76c25b19631ba90f7498aeabbe7.graph
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/packs b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/packs
new file mode 100644
index 00000000..61decf9b
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/info/packs
@@ -0,0 +1,2 @@
+P pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.pack
+
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.bitmap b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.bitmap
new file mode 100644
index 00000000..1508cf18
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.bitmap
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.idx b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.idx
new file mode 100644
index 00000000..00ee2646
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.idx
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.pack b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.pack
new file mode 100644
index 00000000..c65ae27f
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.pack
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.rev b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.rev
new file mode 100644
index 00000000..d0689f72
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/objects/pack/pack-15b064d6a8ef8cff520565f6db8c006b2e6f7f2f.rev
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/refs/heads/master b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/refs/heads/master
new file mode 100644
index 00000000..8942d437
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/chain_changed/repo.git/refs/heads/master
@@ -0,0 +1 @@
+46ca641fd65e566b8ecfa567a1f01766289192f8
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/HEAD b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/HEAD
new file mode 100644
index 00000000..b870d826
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/main
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/config b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/config
new file mode 100644
index 00000000..07d359d0
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/config
@@ -0,0 +1,4 @@
+[core]
+ repositoryformatversion = 0
+ filemode = true
+ bare = true
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/info/commit-graph b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/info/commit-graph
new file mode 100644
index 00000000..56b59a54
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/info/commit-graph
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/info/packs b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/info/packs
new file mode 100644
index 00000000..ecf5d272
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/info/packs
@@ -0,0 +1,2 @@
+P pack-34e9e132566989e2abfe8821731236c77f9bcbe9.pack
+
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.bitmap b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.bitmap
new file mode 100644
index 00000000..9fec7b16
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.bitmap
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.idx b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.idx
new file mode 100644
index 00000000..e30cbb5a
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.idx
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.pack b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.pack
new file mode 100644
index 00000000..8da45eab
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.pack
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.rev b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.rev
new file mode 100644
index 00000000..3bcd2e2c
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/objects/pack/pack-34e9e132566989e2abfe8821731236c77f9bcbe9.rev
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/refs/heads/main b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/refs/heads/main
new file mode 100644
index 00000000..090ca933
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_changed/repo.git/refs/heads/main
@@ -0,0 +1 @@
+d02a8dbd1a8fbaac8ab7f7f1533cc312ab2c9eec
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/HEAD b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/HEAD
new file mode 100644
index 00000000..cb089cd8
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/master
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/config b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/config
new file mode 100644
index 00000000..07d359d0
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/config
@@ -0,0 +1,4 @@
+[core]
+ repositoryformatversion = 0
+ filemode = true
+ bare = true
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/info/commit-graph b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/info/commit-graph
new file mode 100644
index 00000000..28f7d06a
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/info/commit-graph
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/info/packs b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/info/packs
new file mode 100644
index 00000000..8434a002
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/info/packs
@@ -0,0 +1,2 @@
+P pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.pack
+
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.bitmap b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.bitmap
new file mode 100644
index 00000000..64a36c71
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.bitmap
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.idx b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.idx
new file mode 100644
index 00000000..f5e16674
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.idx
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.pack b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.pack
new file mode 100644
index 00000000..8f82b451
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.pack
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.rev b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.rev
new file mode 100644
index 00000000..64771f70
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/objects/pack/pack-a3da595034c94bb16b6829d757a66b7d259b9ffc.rev
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/refs/heads/master b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/refs/heads/master
new file mode 100644
index 00000000..475cb2c1
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha1/single_nochanged/repo.git/refs/heads/master
@@ -0,0 +1 @@
+dda8217252bdf3e01fdf31309d0e5c3051b00945
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/HEAD b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/HEAD
new file mode 100644
index 00000000..cb089cd8
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/master
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/config b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/config
new file mode 100644
index 00000000..7d1c0006
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/config
@@ -0,0 +1,6 @@
+[extensions]
+ objectformat = sha256
+[core]
+ repositoryformatversion = 1
+ filemode = true
+ bare = true
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/commit-graph-chain b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/commit-graph-chain
new file mode 100644
index 00000000..4e7d76fe
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/commit-graph-chain
@@ -0,0 +1,2 @@
+505cab61f8ddfa614301e8f97943112739236c6bcd19ed4d1f7c6b830cab4f62
+77c47bd6ca2ce17208c9361717a5823c0cb4b5ee336a14959678e060d674ffb6
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/graph-505cab61f8ddfa614301e8f97943112739236c6bcd19ed4d1f7c6b830cab4f62.graph b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/graph-505cab61f8ddfa614301e8f97943112739236c6bcd19ed4d1f7c6b830cab4f62.graph
new file mode 100644
index 00000000..4a93de94
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/graph-505cab61f8ddfa614301e8f97943112739236c6bcd19ed4d1f7c6b830cab4f62.graph
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/graph-77c47bd6ca2ce17208c9361717a5823c0cb4b5ee336a14959678e060d674ffb6.graph b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/graph-77c47bd6ca2ce17208c9361717a5823c0cb4b5ee336a14959678e060d674ffb6.graph
new file mode 100644
index 00000000..7807351d
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/commit-graphs/graph-77c47bd6ca2ce17208c9361717a5823c0cb4b5ee336a14959678e060d674ffb6.graph
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/packs b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/packs
new file mode 100644
index 00000000..3b1241c4
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/info/packs
@@ -0,0 +1,2 @@
+P pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.pack
+
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.bitmap b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.bitmap
new file mode 100644
index 00000000..007fcd0e
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.bitmap
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.idx b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.idx
new file mode 100644
index 00000000..248cf8fc
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.idx
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.pack b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.pack
new file mode 100644
index 00000000..92cea7fb
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.pack
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.rev b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.rev
new file mode 100644
index 00000000..569862ce
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/objects/pack/pack-04168d0884c910f505cb9fbcf045957e44ccee06d812b5e531ae666014a26ed1.rev
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/refs/heads/master b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/refs/heads/master
new file mode 100644
index 00000000..29d83be8
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/chain_changed/repo.git/refs/heads/master
@@ -0,0 +1 @@
+10d2943dc7ad88011cae3b776d9565d6451a350ce1d16949bc8546a5fe6c0a53
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/HEAD b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/HEAD
new file mode 100644
index 00000000..b870d826
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/main
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/config b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/config
new file mode 100644
index 00000000..7d1c0006
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/config
@@ -0,0 +1,6 @@
+[extensions]
+ objectformat = sha256
+[core]
+ repositoryformatversion = 1
+ filemode = true
+ bare = true
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/info/commit-graph b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/info/commit-graph
new file mode 100644
index 00000000..f4dd0e0c
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/info/commit-graph
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/info/packs b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/info/packs
new file mode 100644
index 00000000..0f39ed89
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/info/packs
@@ -0,0 +1,2 @@
+P pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.pack
+
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.bitmap b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.bitmap
new file mode 100644
index 00000000..b5c5055c
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.bitmap
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.idx b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.idx
new file mode 100644
index 00000000..144778cd
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.idx
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.pack b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.pack
new file mode 100644
index 00000000..599ccae0
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.pack
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.rev b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.rev
new file mode 100644
index 00000000..3c093f31
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/objects/pack/pack-316dbc67dac12d131591640da0c55b76387cbf1fd2a117ab3d7ca0d854a031c9.rev
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/refs/heads/main b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/refs/heads/main
new file mode 100644
index 00000000..4ba32358
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_changed/repo.git/refs/heads/main
@@ -0,0 +1 @@
+a9ff114900e6be139ec66a2a61c930973d8c4bc6fd3b899405ee7ab8740bdbd3
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/HEAD b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/HEAD
new file mode 100644
index 00000000..cb089cd8
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/master
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/config b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/config
new file mode 100644
index 00000000..7d1c0006
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/config
@@ -0,0 +1,6 @@
+[extensions]
+ objectformat = sha256
+[core]
+ repositoryformatversion = 1
+ filemode = true
+ bare = true
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/info/commit-graph b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/info/commit-graph
new file mode 100644
index 00000000..f98ca4a1
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/info/commit-graph
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/info/packs b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/info/packs
new file mode 100644
index 00000000..65184c9a
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/info/packs
@@ -0,0 +1,2 @@
+P pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.pack
+
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.bitmap b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.bitmap
new file mode 100644
index 00000000..53530f4c
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.bitmap
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.idx b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.idx
new file mode 100644
index 00000000..b3a417a8
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.idx
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.pack b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.pack
new file mode 100644
index 00000000..d8dcedbf
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.pack
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.rev b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.rev
new file mode 100644
index 00000000..e50d1a81
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/objects/pack/pack-d335453f760b064e36459d780ec9bf0e5dd596c0ee1ac6310136067c4f13438b.rev
Binary files differ
diff --git a/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/refs/heads/master b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/refs/heads/master
new file mode 100644
index 00000000..a4e184b4
--- /dev/null
+++ b/format/commitgraph/read/testdata/fixtures/sha256/single_nochanged/repo.git/refs/heads/master
@@ -0,0 +1 @@
+7e396bf648e3b045c293d9fbdc533d4377d4e801d5d1fb57b84d22dd054a5860
diff --git a/format/doc.go b/format/doc.go
new file mode 100644
index 00000000..0d2ec813
--- /dev/null
+++ b/format/doc.go
@@ -0,0 +1,5 @@
+// Package format encapsulates various git-related file formats.
+//
+// These are particularly the ones that aren't necessarily associated with
+// a very clear domain that they obviously belong to.
+package format
diff --git a/format/packfile/delta/apply/apply.go b/format/packfile/delta/apply/apply.go
new file mode 100644
index 00000000..f5006e3c
--- /dev/null
+++ b/format/packfile/delta/apply/apply.go
@@ -0,0 +1,160 @@
+// Package apply applies Git delta instruction streams.
+package apply
+
+import "fmt"
+
+// Apply applies one Git delta instruction stream to base.
+func Apply(base, delta []byte) ([]byte, error) {
+ pos := 0
+
+ srcSize, err := readVarint(delta, &pos)
+ if err != nil {
+ return nil, err
+ }
+
+ dstSize, err := readVarint(delta, &pos)
+ if err != nil {
+ return nil, err
+ }
+
+ if srcSize != len(base) {
+ return nil, fmt.Errorf("delta/apply: delta source size mismatch: got %d want %d", srcSize, len(base))
+ }
+
+ out := make([]byte, dstSize)
+ outPos := 0
+
+ for pos < len(delta) {
+ op := delta[pos]
+ pos++
+
+ //nolint:nestif
+ if op&0x80 != 0 {
+ off := 0
+
+ if op&0x01 != 0 {
+ if pos >= len(delta) {
+ return nil, fmt.Errorf("delta/apply: malformed delta copy offset")
+ }
+
+ off |= int(delta[pos])
+ pos++
+ }
+
+ if op&0x02 != 0 {
+ if pos >= len(delta) {
+ return nil, fmt.Errorf("delta/apply: malformed delta copy offset")
+ }
+
+ off |= int(delta[pos]) << 8
+ pos++
+ }
+
+ if op&0x04 != 0 {
+ if pos >= len(delta) {
+ return nil, fmt.Errorf("delta/apply: malformed delta copy offset")
+ }
+
+ off |= int(delta[pos]) << 16
+ pos++
+ }
+
+ if op&0x08 != 0 {
+ if pos >= len(delta) {
+ return nil, fmt.Errorf("delta/apply: malformed delta copy offset")
+ }
+
+ off |= int(delta[pos]) << 24
+ pos++
+ }
+
+ n := 0
+
+ if op&0x10 != 0 {
+ if pos >= len(delta) {
+ return nil, fmt.Errorf("delta/apply: malformed delta copy size")
+ }
+
+ n |= int(delta[pos])
+ pos++
+ }
+
+ if op&0x20 != 0 {
+ if pos >= len(delta) {
+ return nil, fmt.Errorf("delta/apply: malformed delta copy size")
+ }
+
+ n |= int(delta[pos]) << 8
+ pos++
+ }
+
+ if op&0x40 != 0 {
+ if pos >= len(delta) {
+ return nil, fmt.Errorf("delta/apply: malformed delta copy size")
+ }
+
+ n |= int(delta[pos]) << 16
+ pos++
+ }
+
+ if n == 0 {
+ n = 0x10000
+ }
+
+ if off < 0 || n < 0 || off+n > len(base) || outPos+n > len(out) {
+ return nil, fmt.Errorf("delta/apply: delta copy out of bounds")
+ }
+
+ copy(out[outPos:outPos+n], base[off:off+n])
+ outPos += n
+
+ continue
+ }
+
+ if op == 0 {
+ return nil, fmt.Errorf("delta/apply: invalid delta opcode 0")
+ }
+
+ n := int(op)
+ if pos+n > len(delta) || outPos+n > len(out) {
+ return nil, fmt.Errorf("delta/apply: delta insert out of bounds")
+ }
+
+ copy(out[outPos:outPos+n], delta[pos:pos+n])
+ outPos += n
+ pos += n
+ }
+
+ if outPos != len(out) {
+ return nil, fmt.Errorf("delta/apply: delta output size mismatch: got %d want %d", outPos, len(out))
+ }
+
+ return out, nil
+}
+
+// readVarint parses one Git delta varint and advances pos.
+func readVarint(buf []byte, pos *int) (int, error) {
+ value := 0
+ shift := uint(0)
+
+ for {
+ if *pos >= len(buf) {
+ return 0, fmt.Errorf("delta/apply: malformed delta varint")
+ }
+
+ b := buf[*pos]
+ *pos++
+
+ value |= int(b&0x7f) << shift
+ if b&0x80 == 0 {
+ break
+ }
+
+ shift += 7
+ if shift > 63 {
+ return 0, fmt.Errorf("delta/apply: delta varint overflow")
+ }
+ }
+
+ return value, nil
+}
diff --git a/format/packfile/delta/apply/header.go b/format/packfile/delta/apply/header.go
new file mode 100644
index 00000000..69c9659a
--- /dev/null
+++ b/format/packfile/delta/apply/header.go
@@ -0,0 +1,47 @@
+package apply
+
+import (
+ "fmt"
+ "io"
+)
+
+// ReadHeaderSizes reads the first two varints in one inflated delta stream.
+//
+// Callers that continue reading the same stream should pass their own buffered
+// byte reader and keep using that same reader afterwards.
+func ReadHeaderSizes(reader io.ByteReader) (int, int, error) {
+ srcSize, err := readVarintFromByteReader(reader)
+ if err != nil {
+ return 0, 0, err
+ }
+
+ dstSize, err := readVarintFromByteReader(reader)
+ if err != nil {
+ return 0, 0, err
+ }
+
+ return srcSize, dstSize, nil
+}
+
+// readVarintFromByteReader parses one Git delta varint from reader.
+func readVarintFromByteReader(reader io.ByteReader) (int, error) {
+ value := 0
+ shift := uint(0)
+
+ for {
+ b, err := reader.ReadByte()
+ if err != nil {
+ return 0, fmt.Errorf("delta/apply: malformed delta varint: %w", err)
+ }
+
+ value |= int(b&0x7f) << shift
+ if b&0x80 == 0 {
+ return value, nil
+ }
+
+ shift += 7
+ if shift > 63 {
+ return 0, fmt.Errorf("delta/apply: delta varint overflow")
+ }
+ }
+}
diff --git a/format/packfile/delta/doc.go b/format/packfile/delta/doc.go
new file mode 100644
index 00000000..f63c96a8
--- /dev/null
+++ b/format/packfile/delta/doc.go
@@ -0,0 +1,2 @@
+// Package delta provides various routines to handle Git delta compression.
+package delta
diff --git a/format/packfile/doc.go b/format/packfile/doc.go
new file mode 100644
index 00000000..cd4aacfc
--- /dev/null
+++ b/format/packfile/doc.go
@@ -0,0 +1,5 @@
+// Package packfile provides Git packfile format parsing primitives.
+package packfile
+
+// TODO: This could probably be moved into object/store/packed when we get the pack ingestion semantics right?
+// Oh, wait, the other stores might still want pack constants like we provide here.
diff --git a/format/packfile/entry.go b/format/packfile/entry.go
new file mode 100644
index 00000000..0f9c7c8d
--- /dev/null
+++ b/format/packfile/entry.go
@@ -0,0 +1,76 @@
+package packfile
+
+import (
+ "fmt"
+
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+)
+
+// Entry is one parsed pack entry prefix, including any delta base reference
+// data that appears before the compressed payload.
+type Entry struct {
+ // Type is the pack entry type.
+ Type objecttype.Type
+ // Size is the declared resulting object size.
+ Size int64
+ // DataOffset is the byte offset from the start of the entry to the zlib
+ // payload bytes.
+ DataOffset int
+ // RefBaseID is the referenced base object ID bytes for ref-delta entries.
+ RefBaseID []byte
+ // OfsBaseDistance is the backward distance for ofs-delta entries.
+ OfsBaseDistance uint64
+}
+
+// ParseEntry parses one full pack entry prefix from data.
+//
+// hashSize must match the hash algorithm size used by the pack/index.
+func ParseEntry(data []byte, hashSize int) (Entry, error) {
+ var zero Entry
+
+ header, err := ParseEntryHeader(data)
+ if err != nil {
+ return zero, err
+ }
+
+ entry := Entry{
+ Type: header.Type,
+ Size: header.Size,
+ DataOffset: header.HeaderSize,
+ }
+
+ switch entry.Type {
+ case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag:
+ // Base object entries have no extra prefix fields.
+ case objecttype.TypeRefDelta:
+ if hashSize <= 0 {
+ return zero, fmt.Errorf("packfile: invalid hash size %d", hashSize)
+ }
+
+ end := entry.DataOffset + hashSize
+ if end > len(data) {
+ return zero, fmt.Errorf("packfile: truncated ref-delta base id")
+ }
+
+ entry.RefBaseID = data[entry.DataOffset:end]
+ entry.DataOffset = end
+ case objecttype.TypeOfsDelta:
+ dist, consumed, err := ParseOfsDeltaDistance(data[entry.DataOffset:])
+ if err != nil {
+ return zero, err
+ }
+
+ entry.OfsBaseDistance = dist
+ entry.DataOffset += consumed
+ case objecttype.TypeInvalid, objecttype.TypeFuture:
+ return zero, fmt.Errorf("packfile: unsupported object type %d", entry.Type)
+ default:
+ return zero, fmt.Errorf("packfile: unsupported object type %d", entry.Type)
+ }
+
+ if entry.DataOffset > len(data) {
+ return zero, fmt.Errorf("packfile: entry data offset out of bounds")
+ }
+
+ return entry, nil
+}
diff --git a/format/packfile/entry_header.go b/format/packfile/entry_header.go
new file mode 100644
index 00000000..05664268
--- /dev/null
+++ b/format/packfile/entry_header.go
@@ -0,0 +1,52 @@
+package packfile
+
+import (
+ "fmt"
+
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+)
+
+// EntryHeader is one parsed pack entry header prefix.
+type EntryHeader struct {
+ // Type is the entry type tag from the first header byte.
+ Type objecttype.Type
+ // Size is the declared resulting object size.
+ Size int64
+ // HeaderSize is the number of bytes consumed by the type/size header.
+ HeaderSize int
+}
+
+// ParseEntryHeader parses one pack entry type/size header from data.
+func ParseEntryHeader(data []byte) (EntryHeader, error) {
+ var zero EntryHeader
+ if len(data) == 0 {
+ return zero, fmt.Errorf("packfile: truncated entry header")
+ }
+
+ first := data[0]
+ header := EntryHeader{
+ Type: objecttype.Type((first >> 4) & 0x07),
+ Size: int64(first & 0x0f),
+ HeaderSize: 1,
+ }
+
+ shift := uint(4)
+
+ b := first
+ for b&0x80 != 0 {
+ if header.HeaderSize >= len(data) {
+ return zero, fmt.Errorf("packfile: truncated entry header")
+ }
+
+ b = data[header.HeaderSize]
+ header.HeaderSize++
+ header.Size |= int64(b&0x7f) << shift
+ shift += 7
+ }
+
+ if header.Size < 0 {
+ return zero, fmt.Errorf("packfile: negative entry size")
+ }
+
+ return header, nil
+}
diff --git a/format/packfile/header.go b/format/packfile/header.go
new file mode 100644
index 00000000..bc859a55
--- /dev/null
+++ b/format/packfile/header.go
@@ -0,0 +1,9 @@
+package packfile
+
+// Signature is the 4-byte "PACK" magic at the start of pack files.
+const Signature = 0x5041434b
+
+// VersionSupported reports whether one pack version is supported.
+func VersionSupported(version uint32) bool {
+ return version == 2 || version == 3
+}
diff --git a/format/packfile/ingest/api.go b/format/packfile/ingest/api.go
new file mode 100644
index 00000000..ce366a4f
--- /dev/null
+++ b/format/packfile/ingest/api.go
@@ -0,0 +1,195 @@
+package ingest
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "io"
+ "os"
+
+ objectid "codeberg.org/lindenii/furgit/object/id"
+ objectstorer "codeberg.org/lindenii/furgit/object/storer"
+)
+
+// Options controls one pack ingest operation.
+type Options struct {
+ // FixThin appends missing local bases for thin packs.
+ FixThin bool
+ // WriteRev writes a .rev alongside the .pack and .idx.
+ WriteRev bool
+ // Base supplies existing objects for thin-pack fixup.
+ Base objectstorer.Store
+ // Progress receives human-readable progress messages.
+ //
+ // When nil, no progress output is emitted.
+ Progress io.Writer
+ // ProgressFlush flushes transport output after progress writes.
+ //
+ // When nil, no explicit flush is attempted.
+ ProgressFlush func() error
+ // RequireTrailingEOF requires the source to hit EOF after the pack trailer.
+ //
+ // This is suitable for exact pack-file readers, but should be disabled for
+ // full-duplex transport streams like receive-pack where the peer keeps the
+ // connection open to read the server response.
+ RequireTrailingEOF bool
+}
+
+// Result describes one successful ingest transaction.
+type Result struct {
+ // PackName is the destination-relative filename of the written .pack.
+ PackName string
+ // IdxName is the destination-relative filename of the written .idx.
+ IdxName string
+ // RevName is the destination-relative filename of the written .rev.
+ //
+ // RevName is empty when writeRev is false.
+ RevName string
+ // PackHash is the final pack hash (same hash embedded in .idx/.rev trailers).
+ PackHash objectid.ObjectID
+ // ObjectCount is the final object count in the resulting pack.
+ //
+ // If thin fixup appends objects, this includes appended base objects.
+ ObjectCount uint32
+ // ThinFixed reports whether thin fixup appended local bases.
+ ThinFixed bool
+}
+
+// HeaderInfo describes the parsed PACK header.
+type HeaderInfo struct {
+ Version uint32
+ ObjectCount uint32
+}
+
+// DiscardResult describes one successful Discard call.
+type DiscardResult struct {
+ PackHash objectid.ObjectID
+ ObjectCount uint32
+}
+
+// Pending is one started ingest operation awaiting Continue or Discard.
+//
+// Exactly one of Continue or Discard may be called.
+type Pending struct {
+ reader *bufio.Reader
+ algo objectid.Algorithm
+ opts Options
+ header HeaderInfo
+ headerRaw [packHeaderSize]byte
+
+ finalized bool
+}
+
+// Ingest reads and validates one PACK header, returning one pending operation.
+func Ingest(
+ src io.Reader,
+ algo objectid.Algorithm,
+ opts Options,
+) (*Pending, error) {
+ if algo.Size() == 0 {
+ return nil, objectid.ErrInvalidAlgorithm
+ }
+
+ reader := bufio.NewReader(src)
+
+ header, headerRaw, err := readAndValidatePackHeader(reader)
+ if err != nil {
+ return nil, err
+ }
+
+ return &Pending{
+ reader: reader,
+ algo: algo,
+ opts: opts,
+ header: header,
+ headerRaw: headerRaw,
+ }, nil
+}
+
+// Header returns parsed PACK header info.
+func (pending *Pending) Header() HeaderInfo {
+ return pending.header
+}
+
+// Continue ingests the pack stream into destination and writes pack artifacts.
+//
+// Continue is terminal. Further use of pending is undefined behavior.
+//
+// Artifacts are published under content-addressed final names derived from the
+// resulting pack hash. If those final names already exist, Continue treats that
+// as success and removes its temporary files.
+func (pending *Pending) Continue(destination *os.Root) (Result, error) {
+ pending.finalized = true
+
+ if pending.header.ObjectCount == 0 {
+ return Result{}, ErrZeroObjectContinue
+ }
+
+ state, err := newIngestState(
+ pending.reader,
+ destination,
+ pending.algo,
+ pending.opts,
+ pending.header,
+ pending.headerRaw,
+ )
+ if err != nil {
+ return Result{}, err
+ }
+
+ return ingest(state)
+}
+
+// Discard consumes and verifies one zero-object pack stream without writing
+// files.
+//
+// Discard is terminal. Further use of pending is undefined behavior.
+func (pending *Pending) Discard() (DiscardResult, error) {
+ pending.finalized = true
+
+ if pending.header.ObjectCount != 0 {
+ return DiscardResult{}, ErrNonZeroDiscard
+ }
+
+ hashImpl, err := pending.algo.New()
+ if err != nil {
+ return DiscardResult{}, err
+ }
+
+ _, _ = hashImpl.Write(pending.headerRaw[:])
+
+ trailer := make([]byte, pending.algo.Size())
+
+ _, err = io.ReadFull(pending.reader, trailer)
+ if err != nil {
+ return DiscardResult{}, &PackTrailerMismatchError{}
+ }
+
+ computed := hashImpl.Sum(nil)
+ if !bytes.Equal(computed, trailer) {
+ return DiscardResult{}, &PackTrailerMismatchError{}
+ }
+
+ if pending.opts.RequireTrailingEOF {
+ var probe [1]byte
+
+ n, err := pending.reader.Read(probe[:])
+ if n > 0 || err == nil {
+ return DiscardResult{}, errors.New("packfile/ingest: pack has trailing garbage")
+ }
+
+ if err != io.EOF {
+ return DiscardResult{}, err
+ }
+ }
+
+ packHash, err := objectid.FromBytes(pending.algo, trailer)
+ if err != nil {
+ return DiscardResult{}, err
+ }
+
+ return DiscardResult{
+ PackHash: packHash,
+ ObjectCount: 0,
+ }, nil
+}
diff --git a/format/packfile/ingest/byteslice_reader.go b/format/packfile/ingest/byteslice_reader.go
new file mode 100644
index 00000000..a1570ef3
--- /dev/null
+++ b/format/packfile/ingest/byteslice_reader.go
@@ -0,0 +1,21 @@
+package ingest
+
+import "io"
+
+// byteSliceReader implements io.ByteReader on []byte.
+type byteSliceReader struct {
+ data []byte
+ pos int
+}
+
+// ReadByte reads one byte from receiver.
+func (reader *byteSliceReader) ReadByte() (byte, error) {
+ if reader.pos >= len(reader.data) {
+ return 0, io.EOF
+ }
+
+ b := reader.data[reader.pos]
+ reader.pos++
+
+ return b, nil
+}
diff --git a/format/packfile/ingest/cache.go b/format/packfile/ingest/cache.go
new file mode 100644
index 00000000..9a15f55f
--- /dev/null
+++ b/format/packfile/ingest/cache.go
@@ -0,0 +1,53 @@
+package ingest
+
+import (
+ "codeberg.org/lindenii/furgit/internal/lru"
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+)
+
+// deltaBaseCacheKey identifies one resolved base by record index.
+type deltaBaseCacheKey struct {
+ recordIdx int
+}
+
+// deltaBaseCacheValue stores one resolved base object payload.
+type deltaBaseCacheValue struct {
+ realType objecttype.Type
+ content []byte
+}
+
+// deltaBaseCache is a bounded LRU for resolved base payloads.
+type deltaBaseCache struct {
+ lru *lru.Cache[deltaBaseCacheKey, deltaBaseCacheValue]
+}
+
+// newDeltaBaseCache creates one bounded base cache.
+func newDeltaBaseCache(maxBytes int64) *deltaBaseCache {
+ return &deltaBaseCache{
+ lru: lru.New(
+ maxBytes,
+ func(_ deltaBaseCacheKey, value deltaBaseCacheValue) int64 {
+ return int64(len(value.content))
+ },
+ nil,
+ ),
+ }
+}
+
+// get returns one cache entry for recordIdx.
+func (cache *deltaBaseCache) get(recordIdx int) (objecttype.Type, []byte, bool) {
+ value, ok := cache.lru.Get(deltaBaseCacheKey{recordIdx: recordIdx})
+ if !ok {
+ return objecttype.TypeInvalid, nil, false
+ }
+
+ return value.realType, value.content, true
+}
+
+// add stores one cache entry for recordIdx.
+func (cache *deltaBaseCache) add(recordIdx int, realType objecttype.Type, content []byte) {
+ cache.lru.Add(deltaBaseCacheKey{recordIdx: recordIdx}, deltaBaseCacheValue{
+ realType: realType,
+ content: content,
+ })
+}
diff --git a/format/packfile/ingest/counting_writer.go b/format/packfile/ingest/counting_writer.go
new file mode 100644
index 00000000..051ad9d1
--- /dev/null
+++ b/format/packfile/ingest/counting_writer.go
@@ -0,0 +1,17 @@
+package ingest
+
+import "io"
+
+// countingWriter counts bytes written to dst.
+type countingWriter struct {
+ dst io.Writer
+ n int
+}
+
+// Write writes src to dst and tracks output byte count.
+func (writer *countingWriter) Write(src []byte) (int, error) {
+ n, err := writer.dst.Write(src)
+ writer.n += n
+
+ return n, err
+}
diff --git a/format/packfile/ingest/crc.go b/format/packfile/ingest/crc.go
new file mode 100644
index 00000000..f55af4ff
--- /dev/null
+++ b/format/packfile/ingest/crc.go
@@ -0,0 +1,22 @@
+package ingest
+
+import "fmt"
+
+// beginEntryCRC starts inline CRC accumulation for one packed entry.
+func (scanner *streamScanner) beginEntryCRC() {
+ scanner.entryCRC = 0
+ scanner.inEntryCRC = true
+}
+
+// endEntryCRC finishes inline CRC accumulation for one packed entry.
+func (scanner *streamScanner) endEntryCRC() (uint32, error) {
+ if !scanner.inEntryCRC {
+ return 0, fmt.Errorf("packfile/ingest: entry CRC not started")
+ }
+
+ crc := scanner.entryCRC
+ scanner.entryCRC = 0
+ scanner.inEntryCRC = false
+
+ return crc, nil
+}
diff --git a/format/packfile/ingest/delta_header.go b/format/packfile/ingest/delta_header.go
new file mode 100644
index 00000000..110cf83b
--- /dev/null
+++ b/format/packfile/ingest/delta_header.go
@@ -0,0 +1,11 @@
+package ingest
+
+import deltaapply "codeberg.org/lindenii/furgit/format/packfile/delta/apply"
+
+// finalizeStreamPackHash consumes trailer bytes and verifies stream integrity.
+// readDeltaHeaderSizes reads source and destination sizes from one delta payload.
+func readDeltaHeaderSizes(payload []byte) (int, int, error) {
+ reader := &byteSliceReader{data: payload}
+
+ return deltaapply.ReadHeaderSizes(reader)
+}
diff --git a/format/packfile/ingest/distance.go b/format/packfile/ingest/distance.go
new file mode 100644
index 00000000..9bc4d886
--- /dev/null
+++ b/format/packfile/ingest/distance.go
@@ -0,0 +1,30 @@
+package ingest
+
+import (
+ "fmt"
+ "io"
+)
+
+// readOfsDistanceFromStream reads one ofs-delta encoded distance.
+func readOfsDistanceFromStream(reader io.ByteReader) (uint64, int, error) {
+ first, err := reader.ReadByte()
+ if err != nil {
+ return 0, 0, fmt.Errorf("read ofs distance first byte: %w", err)
+ }
+
+ dist := uint64(first & 0x7f)
+ consumed := 1
+
+ b := first
+ for b&0x80 != 0 {
+ b, err = reader.ReadByte()
+ if err != nil {
+ return 0, 0, fmt.Errorf("read ofs distance continuation: %w", err)
+ }
+
+ consumed++
+ dist = ((dist + 1) << 7) + uint64(b&0x7f)
+ }
+
+ return dist, consumed, nil
+}
diff --git a/format/packfile/ingest/doc.go b/format/packfile/ingest/doc.go
new file mode 100644
index 00000000..2095068a
--- /dev/null
+++ b/format/packfile/ingest/doc.go
@@ -0,0 +1,3 @@
+// Package ingest implements streaming ingestion of one Git pack stream into a
+// destination root, producing .pack/.idx and optionally .rev.
+package ingest
diff --git a/format/packfile/ingest/drain.go b/format/packfile/ingest/drain.go
new file mode 100644
index 00000000..ed6ec821
--- /dev/null
+++ b/format/packfile/ingest/drain.go
@@ -0,0 +1,68 @@
+package ingest
+
+import (
+ "fmt"
+ "io"
+
+ "codeberg.org/lindenii/furgit/internal/compress/zlib"
+ objectheader "codeberg.org/lindenii/furgit/object/header"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+ packfmt "codeberg.org/lindenii/furgit/format/packfile"
+)
+
+// drainEntryPayload inflates one entry payload from stream and returns
+// (inflatedLength, oidForBaseEntry).
+func drainEntryPayload(state *ingestState, record objectRecord) (int64, objectid.ObjectID, error) {
+ var zero objectid.ObjectID
+
+ reader, err := zlib.NewReader(state.stream)
+ if err != nil {
+ return 0, zero, &MalformedPackEntryError{Offset: record.offset, Reason: fmt.Sprintf("open zlib stream: %v", err)}
+ }
+
+ defer func() { _ = reader.Close() }()
+
+ var total int64
+
+ if packfmt.IsBaseObjectType(record.packedType) {
+ header, ok := objectheader.Encode(record.packedType, record.declaredSize)
+ if !ok {
+ return 0, zero, &MalformedPackEntryError{Offset: record.offset, Reason: "encode object header"}
+ }
+
+ hashImpl, err := state.algo.New()
+ if err != nil {
+ return 0, zero, err
+ }
+
+ _, _ = hashImpl.Write(header)
+
+ n, err := io.Copy(hashImpl, reader)
+ if err != nil {
+ return 0, zero, &MalformedPackEntryError{Offset: record.offset, Reason: fmt.Sprintf("inflate base object: %v", err)}
+ }
+
+ total = n
+
+ oid, err := objectid.FromBytes(state.algo, hashImpl.Sum(nil))
+ if err != nil {
+ return 0, zero, err
+ }
+
+ return total, oid, nil
+ }
+
+ if record.packedType == objecttype.TypeOfsDelta || record.packedType == objecttype.TypeRefDelta {
+ n, err := io.Copy(io.Discard, reader)
+ if err != nil {
+ return 0, zero, &MalformedPackEntryError{Offset: record.offset, Reason: fmt.Sprintf("inflate delta payload: %v", err)}
+ }
+
+ total = n
+
+ return total, zero, nil
+ }
+
+ return 0, zero, &MalformedPackEntryError{Offset: record.offset, Reason: "unsupported payload type"}
+}
diff --git a/format/packfile/ingest/entry.go b/format/packfile/ingest/entry.go
new file mode 100644
index 00000000..4e2cab55
--- /dev/null
+++ b/format/packfile/ingest/entry.go
@@ -0,0 +1,92 @@
+package ingest
+
+import (
+ "fmt"
+
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+ packfmt "codeberg.org/lindenii/furgit/format/packfile"
+)
+
+// scanOneEntry scans one pack entry from stream and appends one record.
+func scanOneEntry(state *ingestState, startOffset uint64) (uint64, error) {
+ state.stream.beginEntryCRC()
+
+ record, err := parseEntryPrefix(state, startOffset)
+ if err != nil {
+ return 0, err
+ }
+
+ payloadStartConsumed := state.stream.consumed
+
+ contentLen, oid, err := drainEntryPayload(state, record)
+ if err != nil {
+ return 0, err
+ }
+
+ consumedInput := state.stream.consumed - payloadStartConsumed
+
+ if contentLen != record.declaredSize {
+ return 0, &MalformedPackEntryError{
+ Offset: startOffset,
+ Reason: fmt.Sprintf("inflated size mismatch got %d want %d", contentLen, record.declaredSize),
+ }
+ }
+
+ endOffset := startOffset + uint64(record.headerLen) + consumedInput
+ if endOffset > state.stream.consumed {
+ return 0, &MalformedPackEntryError{
+ Offset: startOffset,
+ Reason: fmt.Sprintf("entry end offset overflow got %d > stream %d", endOffset, state.stream.consumed),
+ }
+ }
+
+ record.packedLen = endOffset - startOffset
+
+ record.dataOffset = startOffset + uint64(record.headerLen)
+ if record.packedLen < uint64(record.headerLen) {
+ return 0, &MalformedPackEntryError{Offset: startOffset, Reason: "negative payload span"}
+ }
+
+ crc, err := state.stream.endEntryCRC()
+ if err != nil {
+ return 0, err
+ }
+
+ record.crc32 = crc
+
+ if packfmt.IsBaseObjectType(record.packedType) {
+ record.objectID = oid
+ record.realType = record.packedType
+ record.resolved = true
+ }
+
+ recordIdx := len(state.records)
+ state.records = append(state.records, record)
+
+ state.offsetToRecord[record.offset] = recordIdx
+ if record.resolved {
+ state.objectToRecord[record.objectID] = recordIdx
+ }
+
+ switch record.packedType {
+ case objecttype.TypeOfsDelta:
+ state.ofsDeltas = append(state.ofsDeltas, ofsDeltaRef{
+ baseOffset: record.baseOffset,
+ recordIdx: recordIdx,
+ })
+ case objecttype.TypeRefDelta:
+ state.refDeltas = append(state.refDeltas, refDeltaRef{
+ baseObject: record.baseObject,
+ recordIdx: recordIdx,
+ })
+ case objecttype.TypeInvalid,
+ objecttype.TypeCommit,
+ objecttype.TypeTree,
+ objecttype.TypeBlob,
+ objecttype.TypeTag,
+ objecttype.TypeFuture:
+ default:
+ }
+
+ return endOffset, nil
+}
diff --git a/format/packfile/ingest/entry_header.go b/format/packfile/ingest/entry_header.go
new file mode 100644
index 00000000..c74fdc16
--- /dev/null
+++ b/format/packfile/ingest/entry_header.go
@@ -0,0 +1,33 @@
+package ingest
+
+import (
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+)
+
+// encodePackEntryHeader encodes one non-delta packed entry header.
+func encodePackEntryHeader(ty objecttype.Type, size int64) []byte {
+ var out [16]byte
+
+ n := 0
+
+ s, err := intconv.Int64ToUint64(size)
+ if err != nil {
+ panic(err)
+ }
+
+ c := (uint8(ty) << 4) | byte(s&0x0f)
+
+ s >>= 4
+ for s != 0 {
+ out[n] = c | 0x80
+ n++
+ c = byte(s & 0x7f)
+ s >>= 7
+ }
+
+ out[n] = c
+ n++
+
+ return append([]byte(nil), out[:n]...)
+}
diff --git a/format/packfile/ingest/entry_prefix.go b/format/packfile/ingest/entry_prefix.go
new file mode 100644
index 00000000..a107b4e8
--- /dev/null
+++ b/format/packfile/ingest/entry_prefix.go
@@ -0,0 +1,95 @@
+package ingest
+
+import (
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+)
+
+// parseEntryPrefix parses one entry prefix from stream.
+func parseEntryPrefix(state *ingestState, startOffset uint64) (objectRecord, error) {
+ var record objectRecord
+
+ record.offset = startOffset
+
+ first, err := state.stream.ReadByte()
+ if err != nil {
+ return record, &MalformedPackEntryError{Offset: startOffset, Reason: fmt.Sprintf("read first header byte: %v", err)}
+ }
+
+ record.packedType = objecttype.Type((first >> 4) & 0x07)
+ size := int64(first & 0x0f)
+ headerLen := uint32(1)
+ shift := uint(4)
+ b := first
+
+ for b&0x80 != 0 {
+ b, err = state.stream.ReadByte()
+ if err != nil {
+ return record, &MalformedPackEntryError{Offset: startOffset, Reason: fmt.Sprintf("read size continuation: %v", err)}
+ }
+
+ headerLen++
+ size |= int64(b&0x7f) << shift
+ shift += 7
+ }
+
+ if size < 0 {
+ return record, &MalformedPackEntryError{Offset: startOffset, Reason: "negative declared size"}
+ }
+
+ record.declaredSize = size
+
+ switch record.packedType {
+ case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag:
+ case objecttype.TypeRefDelta:
+ baseRaw := make([]byte, state.algo.Size())
+
+ err := state.stream.readFull(baseRaw)
+ if err != nil {
+ return record, &MalformedPackEntryError{Offset: startOffset, Reason: fmt.Sprintf("read ref base: %v", err)}
+ }
+
+ baseID, err := objectid.FromBytes(state.algo, baseRaw)
+ if err != nil {
+ return record, &MalformedPackEntryError{Offset: startOffset, Reason: fmt.Sprintf("parse ref base: %v", err)}
+ }
+
+ record.baseObject = baseID
+
+ baseRawLen, err := intconv.IntToUint32(len(baseRaw))
+ if err != nil {
+ return record, err
+ }
+
+ headerLen += baseRawLen
+ case objecttype.TypeOfsDelta:
+ dist, consumed, err := readOfsDistanceFromStream(state.stream)
+ if err != nil {
+ return record, &MalformedPackEntryError{Offset: startOffset, Reason: err.Error()}
+ }
+
+ if startOffset <= dist {
+ return record, &MalformedPackEntryError{Offset: startOffset, Reason: "ofs base offset out of bounds"}
+ }
+
+ record.baseOffset = startOffset - dist
+
+ consumedUint32, err := intconv.IntToUint32(consumed)
+ if err != nil {
+ return record, err
+ }
+
+ headerLen += consumedUint32
+ case objecttype.TypeInvalid, objecttype.TypeFuture:
+ return record, &MalformedPackEntryError{Offset: startOffset, Reason: fmt.Sprintf("unsupported object type %d", record.packedType)}
+ default:
+ return record, &MalformedPackEntryError{Offset: startOffset, Reason: fmt.Sprintf("unsupported object type %d", record.packedType)}
+ }
+
+ record.headerLen = headerLen
+
+ return record, nil
+}
diff --git a/format/packfile/ingest/errors.go b/format/packfile/ingest/errors.go
new file mode 100644
index 00000000..f6ee9757
--- /dev/null
+++ b/format/packfile/ingest/errors.go
@@ -0,0 +1,75 @@
+package ingest
+
+import (
+ "errors"
+ "fmt"
+)
+
+// InvalidPackHeaderError reports an invalid or unsupported pack header.
+type InvalidPackHeaderError struct {
+ Reason string
+}
+
+// Error implements error.
+func (err *InvalidPackHeaderError) Error() string {
+ return "packfile/ingest: invalid pack header: " + err.Reason
+}
+
+// PackTrailerMismatchError reports a mismatch between computed and trailer pack hash.
+type PackTrailerMismatchError struct{}
+
+// Error implements error.
+func (err *PackTrailerMismatchError) Error() string {
+ return "packfile/ingest: pack trailer hash mismatch"
+}
+
+// ThinPackUnresolvedError reports unresolved REF deltas when fixThin is disabled
+// or when required bases cannot be found in base.
+type ThinPackUnresolvedError struct {
+ Count int
+}
+
+// Error implements error.
+func (err *ThinPackUnresolvedError) Error() string {
+ return fmt.Sprintf("packfile/ingest: unresolved thin deltas: %d", err.Count)
+}
+
+// MalformedPackEntryError reports malformed entry encoding at one pack offset.
+type MalformedPackEntryError struct {
+ Offset uint64
+ Reason string
+}
+
+// Error implements error.
+func (err *MalformedPackEntryError) Error() string {
+ return fmt.Sprintf("packfile/ingest: malformed pack entry at offset %d: %s", err.Offset, err.Reason)
+}
+
+// DeltaCycleError reports a detected cycle in delta dependency resolution.
+type DeltaCycleError struct {
+ Offset uint64
+}
+
+// Error implements error.
+func (err *DeltaCycleError) Error() string {
+ return fmt.Sprintf("packfile/ingest: delta cycle detected at offset %d", err.Offset)
+}
+
+// DestinationWriteError reports destination I/O failures.
+type DestinationWriteError struct {
+ Op string
+}
+
+// Error implements error.
+func (err *DestinationWriteError) Error() string {
+ return "packfile/ingest: destination write failure: " + err.Op
+}
+
+var errExternalThinBase = errors.New("packfile/ingest: external thin base required")
+
+var (
+ // ErrZeroObjectContinue indicates Continue was called for a zero-object pack.
+ ErrZeroObjectContinue = errors.New("packfile/ingest: cannot continue zero-object pack")
+ // ErrNonZeroDiscard indicates Discard was called for a non-zero-object pack.
+ ErrNonZeroDiscard = errors.New("packfile/ingest: cannot discard non-zero pack")
+)
diff --git a/format/packfile/ingest/file_section_writer.go b/format/packfile/ingest/file_section_writer.go
new file mode 100644
index 00000000..fa28c1a9
--- /dev/null
+++ b/format/packfile/ingest/file_section_writer.go
@@ -0,0 +1,22 @@
+package ingest
+
+import "os"
+
+// fileSectionWriter writes sequentially to file via WriteAt at one base offset.
+type fileSectionWriter struct {
+ file *os.File
+ off int64
+ pos int64
+}
+
+// Write writes src at current section position.
+func (writer *fileSectionWriter) Write(src []byte) (int, error) {
+ if len(src) == 0 {
+ return 0, nil
+ }
+
+ n, err := writer.file.WriteAt(src, writer.off+writer.pos)
+ writer.pos += int64(n)
+
+ return n, err
+}
diff --git a/format/packfile/ingest/fill.go b/format/packfile/ingest/fill.go
new file mode 100644
index 00000000..eca4e4d6
--- /dev/null
+++ b/format/packfile/ingest/fill.go
@@ -0,0 +1,44 @@
+package ingest
+
+import (
+ "errors"
+ "fmt"
+ "io"
+)
+
+// fill ensures at least min unread bytes are available in receiver's buffer.
+func (scanner *streamScanner) fill(minLen int) error {
+ if minLen <= 0 {
+ return nil
+ }
+
+ if minLen > len(scanner.buf) {
+ return fmt.Errorf("packfile/ingest: fill(%d) exceeds scanner buffer", minLen)
+ }
+
+ for scanner.n-scanner.off < minLen {
+ err := scanner.flushConsumedPrefix()
+ if err != nil {
+ return err
+ }
+
+ readN, err := scanner.src.Read(scanner.buf[scanner.n:])
+ if readN > 0 {
+ scanner.n += readN
+ }
+
+ if err != nil {
+ if errors.Is(err, io.EOF) && scanner.n-scanner.off >= minLen {
+ return nil
+ }
+
+ return err
+ }
+
+ if readN == 0 {
+ return io.ErrNoProgress
+ }
+ }
+
+ return nil
+}
diff --git a/format/packfile/ingest/finalize.go b/format/packfile/ingest/finalize.go
new file mode 100644
index 00000000..6fe4edb2
--- /dev/null
+++ b/format/packfile/ingest/finalize.go
@@ -0,0 +1,94 @@
+package ingest
+
+import (
+ "errors"
+ "fmt"
+ "io/fs"
+ "strings"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+)
+
+// finalizeArtifacts links temporary files to final names and returns Result.
+func finalizeArtifacts(state *ingestState) (Result, error) {
+ base := "pack-" + state.packHash.String()
+ packFinal := base + ".pack"
+ idxFinal := base + ".idx"
+
+ revFinal := ""
+ if state.opts.WriteRev {
+ revFinal = base + ".rev"
+ }
+
+ err := linkTempToFinal(state, state.packTmpName, packFinal)
+ if err != nil {
+ return Result{}, err
+ }
+
+ err = linkTempToFinal(state, state.idxTmpName, idxFinal)
+ if err != nil {
+ return Result{}, err
+ }
+
+ if state.opts.WriteRev {
+ err := linkTempToFinal(state, state.revTmpName, revFinal)
+ if err != nil {
+ return Result{}, err
+ }
+ }
+
+ objectCount, err := intconv.IntToUint32(len(state.records))
+ if err != nil {
+ return Result{}, err
+ }
+
+ return Result{
+ PackName: packFinal,
+ IdxName: idxFinal,
+ RevName: revFinal,
+ PackHash: state.packHash,
+ ObjectCount: objectCount,
+ ThinFixed: state.thinFixed,
+ }, nil
+}
+
+// rollbackTemporaryArtifacts removes temporary files after failure.
+func rollbackTemporaryArtifacts(state *ingestState) {
+ if state.packTmpName != "" {
+ _ = state.destination.Remove(state.packTmpName)
+ }
+
+ if state.idxTmpName != "" {
+ _ = state.destination.Remove(state.idxTmpName)
+ }
+
+ if state.revTmpName != "" {
+ _ = state.destination.Remove(state.revTmpName)
+ }
+}
+
+// linkTempToFinal hard-links tmp to final, tolerating existing final paths.
+func linkTempToFinal(state *ingestState, tmp, final string) error {
+ if tmp == "" || final == "" {
+ return fmt.Errorf("packfile/ingest: invalid finalize names tmp=%q final=%q", tmp, final)
+ }
+
+ if strings.Contains(final, "/") {
+ return fmt.Errorf("packfile/ingest: final name must be leaf: %q", final)
+ }
+
+ err := state.destination.Link(tmp, final)
+ if err == nil {
+ _ = state.destination.Remove(tmp)
+
+ return nil
+ }
+
+ if errors.Is(err, fs.ErrExist) {
+ _ = state.destination.Remove(tmp)
+
+ return nil
+ }
+
+ return err
+}
diff --git a/format/packfile/ingest/flush.go b/format/packfile/ingest/flush.go
new file mode 100644
index 00000000..96753170
--- /dev/null
+++ b/format/packfile/ingest/flush.go
@@ -0,0 +1,37 @@
+package ingest
+
+import "fmt"
+
+// flush writes all consumed-but-unflushed bytes to destination pack file.
+func (scanner *streamScanner) flush() error {
+ return scanner.flushConsumedPrefix()
+}
+
+// flushConsumedPrefix writes scanner.buf[:scanner.off] and compacts unread
+// bytes to the start of buffer.
+func (scanner *streamScanner) flushConsumedPrefix() error {
+ if scanner.off == 0 {
+ return nil
+ }
+
+ written := 0
+ for written < scanner.off {
+ n, err := scanner.dstFile.Write(scanner.buf[written:scanner.off])
+ if err != nil {
+ return &DestinationWriteError{Op: fmt.Sprintf("write pack: %v", err)}
+ }
+
+ if n == 0 {
+ return &DestinationWriteError{Op: "write pack: short write"}
+ }
+
+ written += n
+ }
+
+ unread := scanner.n - scanner.off
+ copy(scanner.buf[:unread], scanner.buf[scanner.off:scanner.n])
+ scanner.off = 0
+ scanner.n = unread
+
+ return nil
+}
diff --git a/format/packfile/ingest/hash.go b/format/packfile/ingest/hash.go
new file mode 100644
index 00000000..4b739c20
--- /dev/null
+++ b/format/packfile/ingest/hash.go
@@ -0,0 +1,27 @@
+package ingest
+
+import (
+ "fmt"
+
+ objectheader "codeberg.org/lindenii/furgit/object/header"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+)
+
+// hashCanonicalObject hashes canonical object bytes (header+content).
+func hashCanonicalObject(algo objectid.Algorithm, ty objecttype.Type, content []byte) (objectid.ObjectID, error) {
+ header, ok := objectheader.Encode(ty, int64(len(content)))
+ if !ok {
+ return objectid.ObjectID{}, fmt.Errorf("packfile/ingest: encode object header for type %d", ty)
+ }
+
+ hashImpl, err := algo.New()
+ if err != nil {
+ return objectid.ObjectID{}, err
+ }
+
+ _, _ = hashImpl.Write(header)
+ _, _ = hashImpl.Write(content)
+
+ return objectid.FromBytes(algo, hashImpl.Sum(nil))
+}
diff --git a/format/packfile/ingest/header.go b/format/packfile/ingest/header.go
new file mode 100644
index 00000000..6a214828
--- /dev/null
+++ b/format/packfile/ingest/header.go
@@ -0,0 +1,49 @@
+package ingest
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+
+ "codeberg.org/lindenii/furgit/format/packfile"
+)
+
+const packHeaderSize = 12
+
+// readAndValidatePackHeader reads one PACK header from src and validates it.
+func readAndValidatePackHeader(src io.Reader) (HeaderInfo, [packHeaderSize]byte, error) {
+ var hdr [packHeaderSize]byte
+
+ _, err := io.ReadFull(src, hdr[:])
+ if err != nil {
+ return HeaderInfo{}, [packHeaderSize]byte{}, &InvalidPackHeaderError{
+ Reason: fmt.Sprintf("read header: %v", err),
+ }
+ }
+
+ header, err := parseAndValidatePackHeader(hdr)
+ if err != nil {
+ return HeaderInfo{}, [packHeaderSize]byte{}, err
+ }
+
+ return header, hdr, nil
+}
+
+// parseAndValidatePackHeader validates one already-read PACK header.
+func parseAndValidatePackHeader(hdr [packHeaderSize]byte) (HeaderInfo, error) {
+ if binary.BigEndian.Uint32(hdr[:4]) != packfile.Signature {
+ return HeaderInfo{}, &InvalidPackHeaderError{Reason: "signature mismatch"}
+ }
+
+ version := binary.BigEndian.Uint32(hdr[4:8])
+ if !packfile.VersionSupported(version) {
+ return HeaderInfo{}, &InvalidPackHeaderError{
+ Reason: fmt.Sprintf("unsupported version %d", version),
+ }
+ }
+
+ return HeaderInfo{
+ Version: version,
+ ObjectCount: binary.BigEndian.Uint32(hdr[8:12]),
+ }, nil
+}
diff --git a/format/packfile/ingest/idx_write.go b/format/packfile/ingest/idx_write.go
new file mode 100644
index 00000000..506788b9
--- /dev/null
+++ b/format/packfile/ingest/idx_write.go
@@ -0,0 +1,266 @@
+package ingest
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "hash"
+ "io"
+ "slices"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ "codeberg.org/lindenii/furgit/internal/progress"
+)
+
+const (
+ idxMagicV2 = 0xff744f63
+ idxVersionV2 = 2
+)
+
+// writeIdx writes idx v2 for resolved records.
+func writeIdx(state *ingestState) error {
+ order := buildIdxOrder(state)
+
+ hashImpl, err := state.algo.New()
+ if err != nil {
+ return err
+ }
+
+ write := func(src []byte) error {
+ _, writeErr := state.idxFile.Write(src)
+ if writeErr != nil {
+ return writeErr
+ }
+
+ _, writeErr = hashImpl.Write(src)
+ if writeErr != nil {
+ return writeErr
+ }
+
+ return nil
+ }
+
+ var (
+ scratch [8]byte
+ fanout [256]uint32
+ )
+
+ writeProgressf(state, "writing index fanout...\r")
+
+ for _, recordIdx := range order {
+ idRaw := state.records[recordIdx].objectID.Bytes()
+ fanout[idRaw[0]]++
+ }
+
+ binary.BigEndian.PutUint32(scratch[:4], idxMagicV2)
+ binary.BigEndian.PutUint32(scratch[4:8], idxVersionV2)
+
+ err = write(scratch[:8])
+ if err != nil {
+ return err
+ }
+
+ var cumulative uint32
+ for i := range fanout {
+ cumulative += fanout[i]
+ binary.BigEndian.PutUint32(scratch[:4], cumulative)
+
+ err := write(scratch[:4])
+ if err != nil {
+ return err
+ }
+ }
+
+ writeProgressf(state, "writing index fanout: done.\n")
+
+ largeOffsetCount := 0
+
+ for idx := range state.records {
+ if state.records[idx].offset >= 0x80000000 {
+ largeOffsetCount++
+ }
+ }
+
+ oidMeter := progress.New(progress.Options{
+ Writer: state.opts.Progress,
+ Flush: state.opts.ProgressFlush,
+ Title: "writing index object ids",
+ Total: uint64(len(order)),
+ })
+
+ var oidDone uint64
+
+ for _, recordIdx := range order {
+ idRaw := state.records[recordIdx].objectID.Bytes()
+
+ err := write(idRaw)
+ if err != nil {
+ return err
+ }
+
+ oidDone++
+ oidMeter.Set(oidDone, 0)
+ }
+
+ if oidDone > 0 {
+ oidMeter.Stop("done")
+ }
+
+ crcMeter := progress.New(progress.Options{
+ Writer: state.opts.Progress,
+ Flush: state.opts.ProgressFlush,
+ Title: "writing index crc32",
+ Total: uint64(len(order)),
+ })
+
+ var crcDone uint64
+
+ for _, recordIdx := range order {
+ binary.BigEndian.PutUint32(scratch[:4], state.records[recordIdx].crc32)
+
+ err := write(scratch[:4])
+ if err != nil {
+ return err
+ }
+
+ crcDone++
+ crcMeter.Set(crcDone, 0)
+ }
+
+ if crcDone > 0 {
+ crcMeter.Stop("done")
+ }
+
+ largeOffsets := make([]uint64, 0)
+ offsetMeter := progress.New(progress.Options{
+ Writer: state.opts.Progress,
+ Flush: state.opts.ProgressFlush,
+ Title: "writing index offsets",
+ Total: uint64(len(order)),
+ })
+
+ var offsetDone uint64
+
+ for _, recordIdx := range order {
+ offset := state.records[recordIdx].offset
+ if offset >= 0x80000000 {
+ largeOffsetIdx, err := intconv.IntToUint32(len(largeOffsets))
+ if err != nil {
+ return err
+ }
+
+ word := 0x80000000 | largeOffsetIdx
+
+ largeOffsets = append(largeOffsets, offset)
+
+ binary.BigEndian.PutUint32(scratch[:4], word)
+ } else {
+ binary.BigEndian.PutUint32(scratch[:4], uint32(offset))
+ }
+
+ err := write(scratch[:4])
+ if err != nil {
+ return err
+ }
+
+ offsetDone++
+ offsetMeter.Set(offsetDone, 0)
+ }
+
+ if offsetDone > 0 {
+ offsetMeter.Stop("done")
+ }
+
+ total, err := intconv.IntToUint64(largeOffsetCount)
+ if err != nil {
+ return err
+ }
+
+ largeOffsetMeter := progress.New(progress.Options{
+ Writer: state.opts.Progress,
+ Flush: state.opts.ProgressFlush,
+ Title: "writing index large offsets",
+ Total: total,
+ })
+
+ var largeOffsetDone uint64
+
+ for _, off := range largeOffsets {
+ binary.BigEndian.PutUint64(scratch[:8], off)
+
+ err := write(scratch[:8])
+ if err != nil {
+ return err
+ }
+
+ largeOffsetDone++
+ largeOffsetMeter.Set(largeOffsetDone, 0)
+ }
+
+ if largeOffsetDone > 0 {
+ largeOffsetMeter.Stop("done")
+ }
+
+ writeProgressf(state, "writing index trailer...\r")
+
+ err = write(state.packHash.Bytes())
+ if err != nil {
+ return err
+ }
+
+ idxHash := hashImpl.Sum(nil)
+
+ _, err = state.idxFile.Write(idxHash)
+ if err != nil {
+ return err
+ }
+
+ err = state.idxFile.Sync()
+ if err != nil {
+ return err
+ }
+
+ writeProgressf(state, "writing index trailer: done.\n")
+
+ return nil
+}
+
+// buildIdxOrder returns record indexes sorted by ObjectID.
+func buildIdxOrder(state *ingestState) []int {
+ out := make([]int, 0, len(state.records))
+ for idx := range state.records {
+ out = append(out, idx)
+ }
+
+ slices.SortFunc(out, func(a, b int) int {
+ return bytes.Compare(state.records[a].objectID.Bytes(), state.records[b].objectID.Bytes())
+ })
+
+ return out
+}
+
+// verifyResolvedRecords checks that all records are fully resolved before index writing.
+func verifyResolvedRecords(state *ingestState) error {
+ for idx, record := range state.records {
+ if !record.resolved {
+ return fmt.Errorf("packfile/ingest: unresolved record %d at offset %d", idx, record.offset)
+ }
+ }
+
+ return nil
+}
+
+// writeAndHash writes src to dst and updates hash.
+func writeAndHash(dst io.Writer, hashImpl hash.Hash, src []byte) error {
+ _, err := dst.Write(src)
+ if err != nil {
+ return err
+ }
+
+ _, err = hashImpl.Write(src)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/format/packfile/ingest/ingest.go b/format/packfile/ingest/ingest.go
new file mode 100644
index 00000000..be65ff5f
--- /dev/null
+++ b/format/packfile/ingest/ingest.go
@@ -0,0 +1,68 @@
+package ingest
+
+import (
+ "fmt"
+)
+
+// ingest initializes transaction state and executes the ingest pipeline.
+func ingest(state *ingestState) (out Result, err error) {
+ err = openTemporaryArtifacts(state)
+ if err != nil {
+ return Result{}, err
+ }
+
+ defer func() {
+ _ = closeTemporaryArtifacts(state)
+ if err != nil {
+ rollbackTemporaryArtifacts(state)
+ }
+ }()
+
+ err = streamPackAndScan(state)
+ if err != nil {
+ return Result{}, err
+ }
+
+ err = resolveAll(state)
+ if err != nil {
+ return Result{}, err
+ }
+
+ err = maybeFixThin(state)
+ if err != nil {
+ return Result{}, err
+ }
+
+ if state.thinFixed {
+ err = resolveAll(state)
+ if err != nil {
+ return Result{}, err
+ }
+ }
+
+ if len(state.unresolvedRefDeltas) > 0 {
+ return Result{}, &ThinPackUnresolvedError{Count: len(state.unresolvedRefDeltas)}
+ }
+
+ err = verifyResolvedRecords(state)
+ if err != nil {
+ return Result{}, err
+ }
+
+ err = state.packFile.Sync()
+ if err != nil {
+ return Result{}, &DestinationWriteError{Op: fmt.Sprintf("sync pack: %v", err)}
+ }
+
+ err = writeIdx(state)
+ if err != nil {
+ return Result{}, err
+ }
+
+ err = writeRev(state)
+ if err != nil {
+ return Result{}, err
+ }
+
+ return finalizeArtifacts(state)
+}
diff --git a/format/packfile/ingest/ingest_test.go b/format/packfile/ingest/ingest_test.go
new file mode 100644
index 00000000..fb50d241
--- /dev/null
+++ b/format/packfile/ingest/ingest_test.go
@@ -0,0 +1,434 @@
+package ingest_test
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "io"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "codeberg.org/lindenii/furgit/internal/testgit"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+ "codeberg.org/lindenii/furgit/format/packfile/ingest"
+)
+
+type noExtraReadReader struct {
+ reader *bytes.Reader
+}
+
+func (r *noExtraReadReader) Read(p []byte) (int, error) {
+ if r.reader.Len() == 0 {
+ return 0, errors.New("unexpected extra read after pack trailer")
+ }
+
+ return r.reader.Read(p)
+}
+
+func beginAndContinue(
+ src io.Reader,
+ packRoot *os.Root,
+ algo objectid.Algorithm,
+ opts ingest.Options,
+) (ingest.Result, error) {
+ pending, err := ingest.Ingest(src, algo, opts)
+ if err != nil {
+ return ingest.Result{}, err
+ }
+
+ return pending.Continue(packRoot)
+}
+
+// fixturePath returns one fixture file path for the selected algorithm.
+func fixturePath(t *testing.T, algo objectid.Algorithm, name string) string {
+ t.Helper()
+
+ dir := algo.String()
+ if dir == "" {
+ t.Fatalf("unsupported fixture algorithm: %v", algo)
+ }
+
+ return filepath.Join("testdata", "fixtures", dir, name)
+}
+
+// fixtureBytes reads one fixture file fully.
+func fixtureBytes(t *testing.T, algo objectid.Algorithm, name string) []byte {
+ t.Helper()
+
+ path := fixturePath(t, algo, name)
+ dir := filepath.Dir(path)
+ base := filepath.Base(path)
+
+ root, err := os.OpenRoot(dir)
+ if err != nil {
+ t.Fatalf("open fixture root %q: %v", dir, err)
+ }
+
+ defer func() {
+ err := root.Close()
+ if err != nil {
+ t.Fatalf("close fixture root %q: %v", dir, err)
+ }
+ }()
+
+ data, err := root.ReadFile(base)
+ if err != nil {
+ t.Fatalf("read fixture %q: %v", base, err)
+ }
+
+ return data
+}
+
+// fixtureMetadata parses key=value metadata for one algorithm fixture set.
+func fixtureMetadata(t *testing.T, algo objectid.Algorithm) map[string]string {
+ t.Helper()
+
+ data := fixtureBytes(t, algo, "METADATA.txt")
+
+ out := make(map[string]string)
+ for line := range strings.SplitSeq(strings.TrimSpace(string(data)), "\n") {
+ line = strings.TrimSpace(line)
+ if line == "" {
+ continue
+ }
+
+ key, value, ok := strings.Cut(line, "=")
+ if !ok {
+ t.Fatalf("invalid fixture metadata line %q", line)
+ }
+
+ out[strings.TrimSpace(key)] = strings.TrimSpace(value)
+ }
+
+ return out
+}
+
+// fixtureOID returns one fixture metadata object ID value.
+func fixtureOID(t *testing.T, algo objectid.Algorithm, key string) objectid.ObjectID {
+ t.Helper()
+
+ meta := fixtureMetadata(t, algo)
+
+ hex, ok := meta[key]
+ if !ok {
+ t.Fatalf("missing fixture metadata key %q", key)
+ }
+
+ id, err := objectid.ParseHex(algo, hex)
+ if err != nil {
+ t.Fatalf("parse fixture metadata oid %q: %v", hex, err)
+ }
+
+ return id
+}
+
+// verifyReindexOracle regenerates idx/rev with upstream git index-pack and
+// compares bytes with files produced by ingest.
+func verifyReindexOracle(t *testing.T, repo *testgit.TestRepo, packName, idxName, revName string) {
+ t.Helper()
+
+ oracleDir := t.TempDir()
+ oracleIdxPath := filepath.Join(oracleDir, "oracle.idx")
+ _ = repo.Run(t, "index-pack", "--rev-index", "-o", oracleIdxPath, filepath.Join("objects", "pack", packName))
+ oracleRevPath := strings.TrimSuffix(oracleIdxPath, ".idx") + ".rev"
+
+ packRoot := repo.OpenPackRoot(t)
+
+ gotIdx, err := packRoot.ReadFile(idxName)
+ if err != nil {
+ t.Fatalf("read idx: %v", err)
+ }
+
+ oracleRoot, err := os.OpenRoot(oracleDir)
+ if err != nil {
+ t.Fatalf("open oracle root: %v", err)
+ }
+
+ defer func() {
+ err := oracleRoot.Close()
+ if err != nil {
+ t.Fatalf("close oracle root: %v", err)
+ }
+ }()
+
+ wantIdx, err := oracleRoot.ReadFile(filepath.Base(oracleIdxPath))
+ if err != nil {
+ t.Fatalf("read oracle idx: %v", err)
+ }
+
+ if !bytes.Equal(gotIdx, wantIdx) {
+ t.Fatal("idx bytes differ from git index-pack output")
+ }
+
+ gotRev, err := packRoot.ReadFile(revName)
+ if err != nil {
+ t.Fatalf("read rev: %v", err)
+ }
+
+ wantRev, err := oracleRoot.ReadFile(filepath.Base(oracleRevPath))
+ if err != nil {
+ t.Fatalf("read oracle rev: %v", err)
+ }
+
+ if !bytes.Equal(gotRev, wantRev) {
+ t.Fatal("rev bytes differ from git index-pack output")
+ }
+}
+
+func TestIngestNonThinPackWritesPackIdxRev(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ head := fixtureOID(t, algo, "head")
+ packBytes := fixtureBytes(t, algo, "nonthin.pack")
+
+ receiver := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true})
+
+ packRoot := receiver.OpenPackRoot(t)
+
+ result, err := beginAndContinue(bytes.NewReader(packBytes), packRoot, algo, ingest.Options{
+ WriteRev: true,
+ RequireTrailingEOF: true,
+ })
+ if err != nil {
+ t.Fatalf("Ingest: %v", err)
+ }
+
+ if result.ThinFixed {
+ t.Fatalf("ThinFixed = true, want false")
+ }
+
+ if result.RevName == "" {
+ t.Fatal("RevName is empty")
+ }
+
+ _, err = packRoot.Stat(result.PackName)
+ if err != nil {
+ t.Fatalf("stat pack: %v", err)
+ }
+
+ _, err = packRoot.Stat(result.IdxName)
+ if err != nil {
+ t.Fatalf("stat idx: %v", err)
+ }
+
+ _, err = packRoot.Stat(result.RevName)
+ if err != nil {
+ t.Fatalf("stat rev: %v", err)
+ }
+
+ _ = receiver.Run(t, "verify-pack", "-v", filepath.Join("objects", "pack", result.IdxName))
+ verifyReindexOracle(t, receiver, result.PackName, result.IdxName, result.RevName)
+
+ receiver.UpdateRef(t, "refs/heads/main", head)
+ _ = receiver.Run(t, "fsck", "--full", "--strict", "--no-progress", "--no-dangling")
+ })
+}
+
+func TestIngestThinPackWithoutFixReturnsUnresolved(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ thinPack := fixtureBytes(t, algo, "thin.pack")
+
+ receiver := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true})
+ packRoot := receiver.OpenPackRoot(t)
+
+ _, err := beginAndContinue(bytes.NewReader(thinPack), packRoot, algo, ingest.Options{
+ WriteRev: true,
+ RequireTrailingEOF: true,
+ })
+ if err == nil {
+ t.Fatal("Ingest error = nil, want error")
+ }
+
+ if _, ok := errors.AsType[*ingest.ThinPackUnresolvedError](err); !ok {
+ t.Fatalf("Ingest error type = %T (%v), want *ThinPackUnresolvedError", err, err)
+ }
+
+ entries, err := fs.ReadDir(packRoot.FS(), ".")
+ if err != nil {
+ t.Fatalf("ReadDir(pack): %v", err)
+ }
+
+ for _, entry := range entries {
+ if strings.HasSuffix(entry.Name(), ".pack") {
+ t.Fatalf("found finalized pack file after failure: %v", entry.Name())
+ }
+ }
+ })
+}
+
+func TestIngestThinPackWithFixThin(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ head := fixtureOID(t, algo, "head")
+ basePack := fixtureBytes(t, algo, "base.pack")
+ thinPack := fixtureBytes(t, algo, "thin.pack")
+ receiver := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true})
+
+ packRoot := receiver.OpenPackRoot(t)
+
+ _, err := beginAndContinue(bytes.NewReader(basePack), packRoot, algo, ingest.Options{
+ RequireTrailingEOF: true,
+ })
+ if err != nil {
+ t.Fatalf("ingest base pack: %v", err)
+ }
+
+ receiverRepo := receiver.OpenRepository(t)
+
+ result, err := beginAndContinue(bytes.NewReader(thinPack), packRoot, algo, ingest.Options{
+ FixThin: true,
+ WriteRev: true,
+ Base: receiverRepo.Objects(),
+ RequireTrailingEOF: true,
+ })
+ if err != nil {
+ t.Fatalf("Ingest(thin): %v", err)
+ }
+
+ if !result.ThinFixed {
+ t.Fatal("ThinFixed = false, want true")
+ }
+
+ _ = receiver.Run(t, "verify-pack", "-v", filepath.Join("objects", "pack", result.IdxName))
+ verifyReindexOracle(t, receiver, result.PackName, result.IdxName, result.RevName)
+ receiver.UpdateRef(t, "refs/heads/main", head)
+ _ = receiver.Run(t, "fsck", "--full", "--strict", "--no-progress", "--no-dangling")
+ })
+}
+
+func TestIngestPackTrailerMismatch(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ packBytes := fixtureBytes(t, algo, "nonthin.pack")
+ if len(packBytes) == 0 {
+ t.Fatal("empty pack stream")
+ }
+
+ packBytes[len(packBytes)-1] ^= 0xff
+
+ receiver := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true})
+ packRoot := receiver.OpenPackRoot(t)
+
+ _, err := beginAndContinue(bytes.NewReader(packBytes), packRoot, algo, ingest.Options{
+ WriteRev: true,
+ RequireTrailingEOF: true,
+ })
+ if err == nil {
+ t.Fatal("Ingest error = nil, want error")
+ }
+
+ if _, ok := errors.AsType[*ingest.PackTrailerMismatchError](err); !ok {
+ t.Fatalf("Ingest error type = %T (%v), want *PackTrailerMismatchError", err, err)
+ }
+
+ entries, err := fs.ReadDir(packRoot.FS(), ".")
+ if err != nil {
+ t.Fatalf("ReadDir(pack): %v", err)
+ }
+
+ for _, entry := range entries {
+ if strings.HasSuffix(entry.Name(), ".pack") {
+ t.Fatalf("found finalized pack file after failure: %v", entry.Name())
+ }
+ }
+ })
+}
+
+func zeroObjectPackBytes(t *testing.T, algo objectid.Algorithm) []byte {
+ t.Helper()
+
+ hashImpl, err := algo.New()
+ if err != nil {
+ t.Fatalf("algo.New: %v", err)
+ }
+
+ var header [12]byte
+ copy(header[:4], []byte{'P', 'A', 'C', 'K'})
+ binary.BigEndian.PutUint32(header[4:8], 2)
+ binary.BigEndian.PutUint32(header[8:12], 0)
+
+ _, _ = hashImpl.Write(header[:])
+
+ return append(header[:], hashImpl.Sum(nil)...)
+}
+
+func TestIngestDiscardZeroObjectPack(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ packBytes := zeroObjectPackBytes(t, algo)
+
+ pending, err := ingest.Ingest(bytes.NewReader(packBytes), algo, ingest.Options{
+ RequireTrailingEOF: true,
+ })
+ if err != nil {
+ t.Fatalf("Ingest: %v", err)
+ }
+
+ if pending.Header().ObjectCount != 0 {
+ t.Fatalf("ObjectCount = %d, want 0", pending.Header().ObjectCount)
+ }
+
+ discarded, err := pending.Discard()
+ if err != nil {
+ t.Fatalf("Discard: %v", err)
+ }
+
+ if discarded.ObjectCount != 0 {
+ t.Fatalf("Discard.ObjectCount = %d, want 0", discarded.ObjectCount)
+ }
+ })
+}
+
+func TestIngestContinueRejectsZeroObjectPack(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ packBytes := zeroObjectPackBytes(t, algo)
+ receiver := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true})
+ packRoot := receiver.OpenPackRoot(t)
+
+ pending, err := ingest.Ingest(bytes.NewReader(packBytes), algo, ingest.Options{
+ RequireTrailingEOF: true,
+ })
+ if err != nil {
+ t.Fatalf("Ingest: %v", err)
+ }
+
+ _, err = pending.Continue(packRoot)
+ if !errors.Is(err, ingest.ErrZeroObjectContinue) {
+ t.Fatalf("Continue error = %v, want ErrZeroObjectContinue", err)
+ }
+ })
+}
+
+func TestIngestCanFinishWithoutTrailingEOF(t *testing.T) {
+ t.Parallel()
+
+ testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { //nolint:thelper
+ head := fixtureOID(t, algo, "head")
+ packBytes := fixtureBytes(t, algo, "nonthin.pack")
+
+ receiver := testgit.NewRepo(t, testgit.RepoOptions{ObjectFormat: algo, Bare: true})
+ packRoot := receiver.OpenPackRoot(t)
+
+ result, err := beginAndContinue(&noExtraReadReader{reader: bytes.NewReader(packBytes)}, packRoot, algo, ingest.Options{
+ WriteRev: true,
+ })
+ if err != nil {
+ t.Fatalf("Ingest without trailing EOF: %v", err)
+ }
+
+ receiver.UpdateRef(t, "refs/heads/main", head)
+ _ = receiver.Run(t, "verify-pack", "-v", filepath.Join("objects", "pack", result.IdxName))
+ _ = receiver.Run(t, "fsck", "--full", "--strict", "--no-progress", "--no-dangling")
+ })
+}
diff --git a/format/packfile/ingest/progress_write.go b/format/packfile/ingest/progress_write.go
new file mode 100644
index 00000000..5b9f184b
--- /dev/null
+++ b/format/packfile/ingest/progress_write.go
@@ -0,0 +1,11 @@
+package ingest
+
+import "codeberg.org/lindenii/furgit/internal/utils"
+
+func writeProgressf(state *ingestState, format string, args ...any) {
+ utils.BestEffortFprintf(state.opts.Progress, format, args...)
+
+ if state.opts.ProgressFlush != nil {
+ _ = state.opts.ProgressFlush()
+ }
+}
diff --git a/format/packfile/ingest/record_content.go b/format/packfile/ingest/record_content.go
new file mode 100644
index 00000000..47f5321f
--- /dev/null
+++ b/format/packfile/ingest/record_content.go
@@ -0,0 +1,30 @@
+package ingest
+
+import (
+ "fmt"
+
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+ packfmt "codeberg.org/lindenii/furgit/format/packfile"
+)
+
+// readBaseRecordContent reads canonical base content for one non-delta record.
+func readBaseRecordContent(state *ingestState, idx int) (objecttype.Type, []byte, error) {
+ record := state.records[idx]
+ if !packfmt.IsBaseObjectType(record.packedType) {
+ return objecttype.TypeInvalid, nil, fmt.Errorf("packfile/ingest: record %d is not a base object", idx)
+ }
+
+ content, err := inflateRecordPayload(state, idx)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, err
+ }
+
+ if int64(len(content)) != record.declaredSize {
+ return objecttype.TypeInvalid, nil, &MalformedPackEntryError{
+ Offset: record.offset,
+ Reason: fmt.Sprintf("base content size mismatch got %d want %d", len(content), record.declaredSize),
+ }
+ }
+
+ return record.packedType, content, nil
+}
diff --git a/format/packfile/ingest/record_delta.go b/format/packfile/ingest/record_delta.go
new file mode 100644
index 00000000..31fb4b62
--- /dev/null
+++ b/format/packfile/ingest/record_delta.go
@@ -0,0 +1,60 @@
+package ingest
+
+import (
+ "fmt"
+
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+ deltaapply "codeberg.org/lindenii/furgit/format/packfile/delta/apply"
+)
+
+// applyDeltaRecord applies one delta record onto base content.
+func applyDeltaRecord(state *ingestState, idx int, baseType objecttype.Type, baseContent []byte) (objecttype.Type, []byte, error) {
+ record := state.records[idx]
+ if record.packedType != objecttype.TypeOfsDelta && record.packedType != objecttype.TypeRefDelta {
+ return objecttype.TypeInvalid, nil, fmt.Errorf("packfile/ingest: record %d is not a delta record", idx)
+ }
+
+ deltaPayload, err := inflateRecordPayload(state, idx)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, err
+ }
+
+ if int64(len(deltaPayload)) != record.declaredSize {
+ return objecttype.TypeInvalid, nil, &MalformedPackEntryError{
+ Offset: record.offset,
+ Reason: fmt.Sprintf("delta payload size mismatch got %d want %d", len(deltaPayload), record.declaredSize),
+ }
+ }
+
+ srcSize, dstSize, err := readDeltaHeaderSizes(deltaPayload)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, &MalformedPackEntryError{
+ Offset: record.offset,
+ Reason: fmt.Sprintf("read delta header: %v", err),
+ }
+ }
+
+ if srcSize != len(baseContent) {
+ return objecttype.TypeInvalid, nil, &MalformedPackEntryError{
+ Offset: record.offset,
+ Reason: fmt.Sprintf("delta source size mismatch got %d want %d", srcSize, len(baseContent)),
+ }
+ }
+
+ content, err := deltaapply.Apply(baseContent, deltaPayload)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, &MalformedPackEntryError{
+ Offset: record.offset,
+ Reason: fmt.Sprintf("apply delta: %v", err),
+ }
+ }
+
+ if len(content) != dstSize {
+ return objecttype.TypeInvalid, nil, &MalformedPackEntryError{
+ Offset: record.offset,
+ Reason: fmt.Sprintf("delta result size mismatch got %d want %d", len(content), dstSize),
+ }
+ }
+
+ return baseType, content, nil
+}
diff --git a/format/packfile/ingest/record_inflate.go b/format/packfile/ingest/record_inflate.go
new file mode 100644
index 00000000..b8eca25b
--- /dev/null
+++ b/format/packfile/ingest/record_inflate.go
@@ -0,0 +1,46 @@
+package ingest
+
+import (
+ "compress/zlib"
+ "fmt"
+ "io"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+)
+
+// inflateRecordPayload inflates one record's zlib payload from pack file.
+func inflateRecordPayload(state *ingestState, idx int) ([]byte, error) {
+ record := state.records[idx]
+ if record.packedLen < uint64(record.headerLen) {
+ return nil, &MalformedPackEntryError{Offset: record.offset, Reason: "entry packed span underflow"}
+ }
+
+ compressedOffset := record.offset + uint64(record.headerLen)
+ compressedLen := record.packedLen - uint64(record.headerLen)
+
+ compressedOffsetInt64, err := intconv.Uint64ToInt64(compressedOffset)
+ if err != nil {
+ return nil, err
+ }
+
+ compressedLenInt64, err := intconv.Uint64ToInt64(compressedLen)
+ if err != nil {
+ return nil, err
+ }
+
+ section := io.NewSectionReader(state.packFile, compressedOffsetInt64, compressedLenInt64)
+
+ reader, err := zlib.NewReader(section)
+ if err != nil {
+ return nil, &MalformedPackEntryError{Offset: record.offset, Reason: fmt.Sprintf("open payload zlib: %v", err)}
+ }
+
+ defer func() { _ = reader.Close() }()
+
+ out, err := io.ReadAll(reader)
+ if err != nil {
+ return nil, &MalformedPackEntryError{Offset: record.offset, Reason: fmt.Sprintf("inflate payload: %v", err)}
+ }
+
+ return out, nil
+}
diff --git a/format/packfile/ingest/record_resolve.go b/format/packfile/ingest/record_resolve.go
new file mode 100644
index 00000000..1ccc427b
--- /dev/null
+++ b/format/packfile/ingest/record_resolve.go
@@ -0,0 +1,117 @@
+package ingest
+
+import (
+ "fmt"
+
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+ packfmt "codeberg.org/lindenii/furgit/format/packfile"
+)
+
+// resolveRecord resolves one record and returns canonical type/content.
+func resolveRecord(state *ingestState, idx int, visiting map[int]struct{}) (objecttype.Type, []byte, error) {
+ if idx < 0 || idx >= len(state.records) {
+ return objecttype.TypeInvalid, nil, fmt.Errorf("packfile/ingest: record index out of bounds")
+ }
+
+ if _, ok := visiting[idx]; ok {
+ return objecttype.TypeInvalid, nil, &DeltaCycleError{Offset: state.records[idx].offset}
+ }
+
+ visiting[idx] = struct{}{}
+ defer delete(visiting, idx)
+
+ record := &state.records[idx]
+ if ty, content, ok := state.baseCache.get(idx); ok {
+ return ty, content, nil
+ }
+
+ if packfmt.IsBaseObjectType(record.packedType) {
+ ty, content, err := readBaseRecordContent(state, idx)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, err
+ }
+
+ if record.resolved {
+ state.baseCache.add(idx, record.realType, content)
+
+ return record.realType, content, nil
+ }
+
+ id, err := hashCanonicalObject(state.algo, ty, content)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, err
+ }
+
+ record.objectID = id
+ record.realType = ty
+ record.resolved = true
+ state.objectToRecord[id] = idx
+ state.baseCache.add(idx, ty, content)
+
+ return ty, content, nil
+ }
+
+ var (
+ baseType objecttype.Type
+ baseContent []byte
+ err error
+ )
+ switch record.packedType {
+ case objecttype.TypeOfsDelta:
+ baseIdx, ok := state.offsetToRecord[record.baseOffset]
+ if !ok {
+ return objecttype.TypeInvalid, nil, &MalformedPackEntryError{
+ Offset: record.offset,
+ Reason: "missing ofs-delta base entry",
+ }
+ }
+
+ baseType, baseContent, err = resolveRecord(state, baseIdx, visiting)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, err
+ }
+ case objecttype.TypeRefDelta:
+ baseIdx, ok := state.objectToRecord[record.baseObject]
+ if ok {
+ baseType, baseContent, err = resolveRecord(state, baseIdx, visiting)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, err
+ }
+ } else {
+ return objecttype.TypeInvalid, nil, errExternalThinBase
+ }
+ case objecttype.TypeInvalid,
+ objecttype.TypeCommit,
+ objecttype.TypeTree,
+ objecttype.TypeBlob,
+ objecttype.TypeTag,
+ objecttype.TypeFuture:
+ return objecttype.TypeInvalid, nil, &MalformedPackEntryError{
+ Offset: record.offset,
+ Reason: "unsupported delta type",
+ }
+ default:
+ return objecttype.TypeInvalid, nil, &MalformedPackEntryError{
+ Offset: record.offset,
+ Reason: "unsupported delta type",
+ }
+ }
+
+ ty, content, err := applyDeltaRecord(state, idx, baseType, baseContent)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, err
+ }
+
+ id, err := hashCanonicalObject(state.algo, ty, content)
+ if err != nil {
+ return objecttype.TypeInvalid, nil, err
+ }
+
+ record.objectID = id
+ record.realType = ty
+ record.resolved = true
+ state.objectToRecord[id] = idx
+ state.baseCache.add(idx, ty, content)
+
+ return ty, content, nil
+}
diff --git a/format/packfile/ingest/records.go b/format/packfile/ingest/records.go
new file mode 100644
index 00000000..75f157fa
--- /dev/null
+++ b/format/packfile/ingest/records.go
@@ -0,0 +1,46 @@
+package ingest
+
+import (
+ objectid "codeberg.org/lindenii/furgit/object/id"
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+)
+
+// objectRecord stores metadata for one packed object entry.
+type objectRecord struct {
+ // offset is the entry start offset in the pack file.
+ offset uint64
+ // headerLen is packed entry header length in bytes.
+ headerLen uint32
+ // packedLen is total packed entry length in bytes.
+ packedLen uint64
+ // crc32 is the CRC over the full packed entry.
+ crc32 uint32
+ // packedType is the entry type tag from the pack stream.
+ packedType objecttype.Type
+ // realType is canonical object type after delta resolution.
+ realType objecttype.Type
+ // declaredSize is the declared output object size for this entry.
+ declaredSize int64
+ // dataOffset is compressed payload start offset for this entry.
+ dataOffset uint64
+ // baseOffset is OFS base offset when packedType is OFS delta.
+ baseOffset uint64
+ // baseObject is REF base object ID when packedType is REF delta.
+ baseObject objectid.ObjectID
+ // objectID is final resolved object ID.
+ objectID objectid.ObjectID
+ // resolved reports whether objectID/realType are finalized.
+ resolved bool
+}
+
+// ofsDeltaRef maps one OFS delta record to its base offset.
+type ofsDeltaRef struct {
+ baseOffset uint64
+ recordIdx int
+}
+
+// refDeltaRef maps one REF delta record to its base object ID.
+type refDeltaRef struct {
+ baseObject objectid.ObjectID
+ recordIdx int
+}
diff --git a/format/packfile/ingest/resolve_all.go b/format/packfile/ingest/resolve_all.go
new file mode 100644
index 00000000..e0ad2281
--- /dev/null
+++ b/format/packfile/ingest/resolve_all.go
@@ -0,0 +1,71 @@
+package ingest
+
+import (
+ "errors"
+
+ "codeberg.org/lindenii/furgit/internal/progress"
+)
+
+// resolveAll resolves all delta records and finalizes ObjectID/RealType for every record.
+func resolveAll(state *ingestState) error {
+ state.unresolvedRefDeltas = state.unresolvedRefDeltas[:0]
+
+ var pending uint32
+
+ for idx := range state.records {
+ if !state.records[idx].resolved {
+ pending++
+ }
+ }
+
+ if pending == 0 {
+ return nil
+ }
+
+ var done uint32
+
+ meter := progress.New(progress.Options{
+ Writer: state.opts.Progress,
+ Flush: state.opts.ProgressFlush,
+ Title: "resolving deltas",
+ Total: uint64(pending),
+ })
+
+ for idx := range state.records {
+ if state.records[idx].resolved {
+ continue
+ }
+
+ done++
+ meter.Set(uint64(done), 0)
+
+ visiting := make(map[int]struct{})
+
+ ty, content, err := resolveRecord(state, idx, visiting)
+ if err != nil {
+ if errors.Is(err, errExternalThinBase) {
+ state.unresolvedRefDeltas = append(state.unresolvedRefDeltas, idx)
+
+ continue
+ }
+
+ return err
+ }
+
+ id, err := hashCanonicalObject(state.algo, ty, content)
+ if err != nil {
+ return err
+ }
+
+ record := &state.records[idx]
+ record.realType = ty
+ record.objectID = id
+ record.resolved = true
+ state.objectToRecord[id] = idx
+ state.baseCache.add(idx, ty, content)
+ }
+
+ meter.Stop("done")
+
+ return nil
+}
diff --git a/format/packfile/ingest/rev_write.go b/format/packfile/ingest/rev_write.go
new file mode 100644
index 00000000..f8c30c1b
--- /dev/null
+++ b/format/packfile/ingest/rev_write.go
@@ -0,0 +1,138 @@
+package ingest
+
+import (
+ "encoding/binary"
+ "slices"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ "codeberg.org/lindenii/furgit/internal/progress"
+)
+
+const (
+ revMagic = 0x52494458
+ revVersion = 1
+)
+
+// writeRev writes rev index for resolved records.
+func writeRev(state *ingestState) error {
+ if !state.opts.WriteRev {
+ return nil
+ }
+
+ idxOrder := buildIdxOrder(state)
+
+ recordToIdxPos := make([]int, len(state.records))
+ for pos, recordIdx := range idxOrder {
+ recordToIdxPos[recordIdx] = pos
+ }
+
+ packOrder := buildPackOrder(state)
+
+ hashImpl, err := state.algo.New()
+ if err != nil {
+ return err
+ }
+
+ var scratch [8]byte
+
+ writeProgressf(state, "writing reverse index header...\r")
+ binary.BigEndian.PutUint32(scratch[:4], revMagic)
+
+ err = writeAndHash(state.revFile, hashImpl, scratch[:4])
+ if err != nil {
+ return err
+ }
+
+ binary.BigEndian.PutUint32(scratch[:4], revVersion)
+
+ err = writeAndHash(state.revFile, hashImpl, scratch[:4])
+ if err != nil {
+ return err
+ }
+
+ binary.BigEndian.PutUint32(scratch[:4], state.algo.PackHashID())
+
+ err = writeAndHash(state.revFile, hashImpl, scratch[:4])
+ if err != nil {
+ return err
+ }
+
+ writeProgressf(state, "writing reverse index header: done.\n")
+
+ entriesMeter := progress.New(progress.Options{
+ Writer: state.opts.Progress,
+ Flush: state.opts.ProgressFlush,
+ Title: "writing reverse index entries",
+ Total: uint64(len(packOrder)),
+ })
+
+ var entriesDone uint64
+
+ for _, recordIdx := range packOrder {
+ recordPos, err := intconv.IntToUint32(recordToIdxPos[recordIdx])
+ if err != nil {
+ return err
+ }
+
+ binary.BigEndian.PutUint32(scratch[:4], recordPos)
+
+ err = writeAndHash(state.revFile, hashImpl, scratch[:4])
+ if err != nil {
+ return err
+ }
+
+ entriesDone++
+ entriesMeter.Set(entriesDone, 0)
+ }
+
+ if entriesDone > 0 {
+ entriesMeter.Stop("done")
+ }
+
+ writeProgressf(state, "writing reverse index trailer...\r")
+
+ err = writeAndHash(state.revFile, hashImpl, state.packHash.Bytes())
+ if err != nil {
+ return err
+ }
+
+ revHash := hashImpl.Sum(nil)
+
+ _, err = state.revFile.Write(revHash)
+ if err != nil {
+ return err
+ }
+
+ err = state.revFile.Sync()
+ if err != nil {
+ return err
+ }
+
+ writeProgressf(state, "writing reverse index trailer: done.\n")
+
+ return nil
+}
+
+// buildPackOrder returns record indexes sorted by pack offset.
+func buildPackOrder(state *ingestState) []int {
+ out := make([]int, 0, len(state.records))
+ for idx := range state.records {
+ out = append(out, idx)
+ }
+
+ slices.SortFunc(out, func(a, b int) int {
+ offA := state.records[a].offset
+
+ offB := state.records[b].offset
+ switch {
+ case offA < offB:
+ return -1
+ case offA > offB:
+ return 1
+ default:
+ return 0
+ }
+ })
+
+ return out
+}
diff --git a/format/packfile/ingest/rewrite_header_trailer.go b/format/packfile/ingest/rewrite_header_trailer.go
new file mode 100644
index 00000000..f1f18a39
--- /dev/null
+++ b/format/packfile/ingest/rewrite_header_trailer.go
@@ -0,0 +1,89 @@
+package ingest
+
+import (
+ "encoding/binary"
+ "io"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+// rewritePackHeaderAndTrailer rewrites object count and trailer hash using ReadAt/WriteAt.
+func rewritePackHeaderAndTrailer(state *ingestState) error {
+ var countRaw [4]byte
+
+ recordCountUint32, err := intconv.IntToUint32(len(state.records))
+ if err != nil {
+ return err
+ }
+
+ binary.BigEndian.PutUint32(countRaw[:], recordCountUint32)
+
+ _, err = state.packFile.WriteAt(countRaw[:], 8)
+ if err != nil {
+ return err
+ }
+
+ info, err := state.packFile.Stat()
+ if err != nil {
+ return err
+ }
+
+ endWithoutTrailer := info.Size()
+
+ hashImpl, err := state.algo.New()
+ if err != nil {
+ return err
+ }
+
+ var (
+ buf [128 << 10]byte
+ pos int64
+ )
+ for pos < endWithoutTrailer {
+ want := int64(len(buf))
+
+ remaining := endWithoutTrailer - pos
+ if remaining < want {
+ want = remaining
+ }
+
+ n, err := state.packFile.ReadAt(buf[:want], pos)
+ if err != nil && err != io.EOF {
+ return err
+ }
+
+ if n == 0 {
+ return io.ErrUnexpectedEOF
+ }
+
+ _, _ = hashImpl.Write(buf[:n])
+ pos += int64(n)
+ }
+
+ sum := hashImpl.Sum(nil)
+
+ _, err = state.packFile.WriteAt(sum, endWithoutTrailer)
+ if err != nil {
+ return err
+ }
+
+ packHash, err := objectid.FromBytes(state.algo, sum)
+ if err != nil {
+ return err
+ }
+
+ state.packHash = packHash
+ state.objectCountHeader = recordCountUint32
+
+ sumLenInt64 := int64(len(sum))
+
+ newConsumed, err := intconv.Int64ToUint64(endWithoutTrailer + sumLenInt64)
+ if err != nil {
+ return err
+ }
+
+ state.stream.consumed = newConsumed
+
+ return nil
+}
diff --git a/format/packfile/ingest/scan.go b/format/packfile/ingest/scan.go
new file mode 100644
index 00000000..de4e993c
--- /dev/null
+++ b/format/packfile/ingest/scan.go
@@ -0,0 +1,106 @@
+package ingest
+
+import (
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/internal/progress"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+// streamPackAndScan copies src into temp .pack while scanning packed entries.
+func streamPackAndScan(state *ingestState) error {
+ hashImpl, err := state.algo.New()
+ if err != nil {
+ return err
+ }
+
+ state.stream = newStreamScanner(
+ state.src,
+ state.packFile,
+ hashImpl,
+ state.algo.Size(),
+ )
+
+ writeProgressf(state, "validating pack header...\r")
+
+ err = seedStreamWithPackHeader(state)
+ if err != nil {
+ return err
+ }
+
+ writeProgressf(state, "validating pack header: done.\n")
+
+ state.records = make([]objectRecord, 0, state.objectCountHeader)
+ state.ofsDeltas = make([]ofsDeltaRef, 0, state.objectCountHeader)
+ state.refDeltas = make([]refDeltaRef, 0, state.objectCountHeader)
+
+ total := state.objectCountHeader
+ meter := progress.New(progress.Options{
+ Writer: state.opts.Progress,
+ Flush: state.opts.ProgressFlush,
+ Title: "receiving objects",
+ Total: uint64(total),
+ Throughput: true,
+ })
+
+ for i := range total {
+ nextOffset, err := scanOneEntry(state, state.stream.consumed)
+ if err != nil {
+ return err
+ }
+
+ if nextOffset != state.stream.consumed {
+ return fmt.Errorf("packfile/ingest: internal stream offset mismatch")
+ }
+
+ done := i + 1
+ meter.Set(uint64(done), state.stream.consumed)
+ }
+
+ meter.Stop("done")
+
+ err = state.stream.finishAndFlushTrailer(state.opts.RequireTrailingEOF)
+ if err != nil {
+ return err
+ }
+
+ if len(state.stream.packTrailer) != state.algo.Size() {
+ return fmt.Errorf("packfile/ingest: invalid trailer size")
+ }
+
+ packHash, err := objectid.FromBytes(state.algo, state.stream.packTrailer)
+ if err != nil {
+ return err
+ }
+
+ state.packHash = packHash
+
+ return state.stream.flush()
+}
+
+// seedStreamWithPackHeader writes the already-validated PACK header to output,
+// seeds the running pack hash, and advances stream offset accounting.
+func seedStreamWithPackHeader(state *ingestState) error {
+ written := 0
+ for written < len(state.packHeaderRaw) {
+ n, err := state.packFile.Write(state.packHeaderRaw[written:])
+ if err != nil {
+ return &DestinationWriteError{Op: fmt.Sprintf("write pack header: %v", err)}
+ }
+
+ if n == 0 {
+ return &DestinationWriteError{Op: "write pack header: short write"}
+ }
+
+ written += n
+ }
+
+ _, err := state.stream.hash.Write(state.packHeaderRaw[:])
+ if err != nil {
+ return err
+ }
+
+ state.stream.consumed = packHeaderSize
+
+ return nil
+}
diff --git a/format/packfile/ingest/state.go b/format/packfile/ingest/state.go
new file mode 100644
index 00000000..797323b2
--- /dev/null
+++ b/format/packfile/ingest/state.go
@@ -0,0 +1,70 @@
+package ingest
+
+import (
+ "io"
+ "os"
+
+ objectid "codeberg.org/lindenii/furgit/object/id"
+)
+
+const (
+ defaultDeltaBaseCacheMaxBytes = 32 << 20
+)
+
+// ingestState holds mutable state for one Ingest call.
+type ingestState struct {
+ src io.Reader
+ destination *os.Root
+ algo objectid.Algorithm
+ opts Options
+
+ packHeaderRaw [packHeaderSize]byte
+
+ packFile *os.File
+ packTmpName string
+ idxFile *os.File
+ idxTmpName string
+ revFile *os.File
+ revTmpName string
+
+ stream *streamScanner
+
+ records []objectRecord
+ ofsDeltas []ofsDeltaRef
+ refDeltas []refDeltaRef
+ unresolvedRefDeltas []int
+ offsetToRecord map[uint64]int
+ objectToRecord map[objectid.ObjectID]int
+
+ baseCache *deltaBaseCache
+ packHash objectid.ObjectID
+
+ objectCountHeader uint32
+ thinFixed bool
+}
+
+// newIngestState constructs one call-local ingest state.
+func newIngestState(
+ src io.Reader,
+ destination *os.Root,
+ algo objectid.Algorithm,
+ opts Options,
+ header HeaderInfo,
+ headerRaw [packHeaderSize]byte,
+) (*ingestState, error) {
+ if algo.Size() == 0 {
+ return nil, objectid.ErrInvalidAlgorithm
+ }
+
+ return &ingestState{
+ src: src,
+ destination: destination,
+ algo: algo,
+ opts: opts,
+ packHeaderRaw: headerRaw,
+ objectCountHeader: header.ObjectCount,
+ offsetToRecord: make(map[uint64]int),
+ objectToRecord: make(map[objectid.ObjectID]int),
+ baseCache: newDeltaBaseCache(defaultDeltaBaseCacheMaxBytes),
+ }, nil
+}
diff --git a/format/packfile/ingest/stream.go b/format/packfile/ingest/stream.go
new file mode 100644
index 00000000..a403087a
--- /dev/null
+++ b/format/packfile/ingest/stream.go
@@ -0,0 +1,111 @@
+package ingest
+
+import (
+ "errors"
+ "hash"
+ "io"
+ "os"
+)
+
+const streamScannerBufferSize = 64 << 10
+
+// streamScanner incrementally reads/consumes one pack stream while mirroring
+// consumed bytes into one destination pack file.
+type streamScanner struct {
+ src io.Reader
+ dstFile *os.File
+
+ // Input buffer window: buf[off:n] is unread.
+ buf []byte
+ off int
+ n int
+
+ // Absolute consumed stream bytes.
+ consumed uint64
+
+ // Running pack hash over consumed bytes while hashEnabled is true.
+ hash hash.Hash
+ hashSize int
+ hashEnabled bool
+
+ // Entry CRC state while one entry is being consumed.
+ entryCRC uint32
+ inEntryCRC bool
+
+ packTrailer []byte
+}
+
+// newStreamScanner constructs one scanner with fixed input buffering.
+func newStreamScanner(src io.Reader, dstFile *os.File, hash hash.Hash, hashSize int) *streamScanner {
+ return &streamScanner{
+ src: src,
+ dstFile: dstFile,
+ buf: make([]byte, streamScannerBufferSize),
+ hash: hash,
+ hashSize: hashSize,
+ hashEnabled: true,
+ }
+}
+
+// Read implements io.Reader.
+func (scanner *streamScanner) Read(dst []byte) (int, error) {
+ if len(dst) == 0 {
+ return 0, nil
+ }
+
+ if scanner.n-scanner.off == 0 {
+ err := scanner.fill(1)
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ return 0, io.EOF
+ }
+
+ return 0, err
+ }
+ }
+
+ unread := scanner.n - scanner.off
+ if unread == 0 {
+ return 0, io.EOF
+ }
+
+ n := min(len(dst), unread)
+
+ copy(dst, scanner.buf[scanner.off:scanner.off+n])
+
+ err := scanner.use(n)
+ if err != nil {
+ return 0, err
+ }
+
+ return n, nil
+}
+
+// ReadByte implements io.ByteReader without allocation.
+func (scanner *streamScanner) ReadByte() (byte, error) {
+ if scanner.n-scanner.off == 0 {
+ err := scanner.fill(1)
+ if err != nil {
+ return 0, err
+ }
+ }
+
+ b := scanner.buf[scanner.off]
+
+ err := scanner.use(1)
+ if err != nil {
+ return 0, err
+ }
+
+ return b, nil
+}
+
+// readFull reads exactly len(dst) bytes through receiver.
+func (scanner *streamScanner) readFull(dst []byte) error {
+ _, err := io.ReadFull(scanner, dst)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/format/packfile/ingest/temp.go b/format/packfile/ingest/temp.go
new file mode 100644
index 00000000..d0b7862c
--- /dev/null
+++ b/format/packfile/ingest/temp.go
@@ -0,0 +1,103 @@
+package ingest
+
+import (
+ "crypto/rand"
+ "errors"
+ "fmt"
+ "io/fs"
+ "os"
+)
+
+// openTemporaryArtifacts creates/open temp pack/idx/(rev) files under destination.
+func openTemporaryArtifacts(state *ingestState) error {
+ packName, packFile, err := createTempFile(state.destination, "tmp_pack_")
+ if err != nil {
+ return err
+ }
+
+ idxName, idxFile, err := createTempFile(state.destination, "tmp_idx_")
+ if err != nil {
+ _ = packFile.Close()
+ _ = state.destination.Remove(packName)
+
+ return err
+ }
+
+ revName := ""
+
+ var revFile *os.File
+ if state.opts.WriteRev {
+ revName, revFile, err = createTempFile(state.destination, "tmp_rev_")
+ if err != nil {
+ _ = idxFile.Close()
+ _ = state.destination.Remove(idxName)
+ _ = packFile.Close()
+ _ = state.destination.Remove(packName)
+
+ return err
+ }
+ }
+
+ state.packTmpName = packName
+ state.packFile = packFile
+ state.idxTmpName = idxName
+ state.idxFile = idxFile
+ state.revTmpName = revName
+ state.revFile = revFile
+
+ return nil
+}
+
+// closeTemporaryArtifacts closes all temporary artifact file descriptors.
+func closeTemporaryArtifacts(state *ingestState) error {
+ var out error
+
+ if state.packFile != nil {
+ err := state.packFile.Close()
+ if err != nil && out == nil {
+ out = err
+ }
+
+ state.packFile = nil
+ }
+
+ if state.idxFile != nil {
+ err := state.idxFile.Close()
+ if err != nil && out == nil {
+ out = err
+ }
+
+ state.idxFile = nil
+ }
+
+ if state.revFile != nil {
+ err := state.revFile.Close()
+ if err != nil && out == nil {
+ out = err
+ }
+
+ state.revFile = nil
+ }
+
+ return out
+}
+
+// createTempFile creates one temporary file under root using prefix.
+func createTempFile(root *os.Root, prefix string) (string, *os.File, error) {
+ for range 32 {
+ name := prefix + rand.Text()
+
+ file, err := root.OpenFile(name, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0o644)
+ if err == nil {
+ return name, file, nil
+ }
+
+ if errors.Is(err, fs.ErrExist) {
+ continue
+ }
+
+ return "", nil, fmt.Errorf("packfile/ingest: create temp file %q: %w", name, err)
+ }
+
+ return "", nil, fmt.Errorf("packfile/ingest: unable to create temporary file for prefix %q", prefix)
+}
diff --git a/format/packfile/ingest/testdata/fixtures/sha1/METADATA.txt b/format/packfile/ingest/testdata/fixtures/sha1/METADATA.txt
new file mode 100644
index 00000000..5fcbfe26
--- /dev/null
+++ b/format/packfile/ingest/testdata/fixtures/sha1/METADATA.txt
@@ -0,0 +1,3 @@
+format=sha1
+head=200c960359dad025b4170284c518919eb4a24305
+base=4bc507fc631ea78474d83c47548743c9f1dda0dc
diff --git a/format/packfile/ingest/testdata/fixtures/sha1/base.pack b/format/packfile/ingest/testdata/fixtures/sha1/base.pack
new file mode 100644
index 00000000..3d7a4903
--- /dev/null
+++ b/format/packfile/ingest/testdata/fixtures/sha1/base.pack
Binary files differ
diff --git a/format/packfile/ingest/testdata/fixtures/sha1/nonthin.pack b/format/packfile/ingest/testdata/fixtures/sha1/nonthin.pack
new file mode 100644
index 00000000..ea07c9a0
--- /dev/null
+++ b/format/packfile/ingest/testdata/fixtures/sha1/nonthin.pack
Binary files differ
diff --git a/format/packfile/ingest/testdata/fixtures/sha1/thin.pack b/format/packfile/ingest/testdata/fixtures/sha1/thin.pack
new file mode 100644
index 00000000..95084feb
--- /dev/null
+++ b/format/packfile/ingest/testdata/fixtures/sha1/thin.pack
Binary files differ
diff --git a/format/packfile/ingest/testdata/fixtures/sha256/METADATA.txt b/format/packfile/ingest/testdata/fixtures/sha256/METADATA.txt
new file mode 100644
index 00000000..8a5ea0a2
--- /dev/null
+++ b/format/packfile/ingest/testdata/fixtures/sha256/METADATA.txt
@@ -0,0 +1,3 @@
+format=sha256
+head=35cc0f4cd1c73524187540494058d233a2ecbd071c85d496a2250d8e0c805ef8
+base=b4abe46895f0bb5aa22fd42d28d428413f265359734c288752e3c2d270eec276
diff --git a/format/packfile/ingest/testdata/fixtures/sha256/base.pack b/format/packfile/ingest/testdata/fixtures/sha256/base.pack
new file mode 100644
index 00000000..52ceef74
--- /dev/null
+++ b/format/packfile/ingest/testdata/fixtures/sha256/base.pack
Binary files differ
diff --git a/format/packfile/ingest/testdata/fixtures/sha256/nonthin.pack b/format/packfile/ingest/testdata/fixtures/sha256/nonthin.pack
new file mode 100644
index 00000000..50db05d0
--- /dev/null
+++ b/format/packfile/ingest/testdata/fixtures/sha256/nonthin.pack
Binary files differ
diff --git a/format/packfile/ingest/testdata/fixtures/sha256/thin.pack b/format/packfile/ingest/testdata/fixtures/sha256/thin.pack
new file mode 100644
index 00000000..b331b915
--- /dev/null
+++ b/format/packfile/ingest/testdata/fixtures/sha256/thin.pack
Binary files differ
diff --git a/format/packfile/ingest/thin_append.go b/format/packfile/ingest/thin_append.go
new file mode 100644
index 00000000..779d477f
--- /dev/null
+++ b/format/packfile/ingest/thin_append.go
@@ -0,0 +1,91 @@
+package ingest
+
+import (
+ "compress/zlib"
+ "hash/crc32"
+ "io"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ objectid "codeberg.org/lindenii/furgit/object/id"
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+)
+
+// appendBaseObject appends one base object as a new packed non-delta entry.
+func appendBaseObject(state *ingestState, id objectid.ObjectID, realType objecttype.Type, content []byte) (int, error) {
+ start := state.stream.consumed
+
+ header := encodePackEntryHeader(realType, int64(len(content)))
+
+ startInt64, err := intconv.Uint64ToInt64(start)
+ if err != nil {
+ return 0, err
+ }
+
+ _, err = state.packFile.WriteAt(header, startInt64)
+ if err != nil {
+ return 0, err
+ }
+
+ headerLenInt64 := int64(len(header))
+ section := &fileSectionWriter{file: state.packFile, off: startInt64 + headerLenInt64}
+ crc := crc32.NewIEEE()
+
+ _, err = crc.Write(header)
+ if err != nil {
+ return 0, err
+ }
+
+ counting := &countingWriter{dst: section}
+
+ zw := zlib.NewWriter(io.MultiWriter(counting, crc))
+
+ _, err = zw.Write(content)
+ if err != nil {
+ return 0, err
+ }
+
+ err = zw.Close()
+ if err != nil {
+ return 0, err
+ }
+
+ headerLenUint64, err := intconv.IntToUint64(len(header))
+ if err != nil {
+ return 0, err
+ }
+
+ countingNUint64, err := intconv.IntToUint64(counting.n)
+ if err != nil {
+ return 0, err
+ }
+
+ packedLen := headerLenUint64 + countingNUint64
+ end := start + packedLen
+ state.stream.consumed = end
+
+ headerLenUint32, err := intconv.IntToUint32(len(header))
+ if err != nil {
+ return 0, err
+ }
+
+ record := objectRecord{
+ offset: start,
+ headerLen: headerLenUint32,
+ packedLen: packedLen,
+ crc32: crc.Sum32(),
+ packedType: realType,
+ realType: realType,
+ declaredSize: int64(len(content)),
+ dataOffset: start + headerLenUint64,
+ objectID: id,
+ resolved: true,
+ }
+
+ recordIdx := len(state.records)
+ state.records = append(state.records, record)
+ state.offsetToRecord[start] = recordIdx
+ state.objectToRecord[id] = recordIdx
+ state.baseCache.add(recordIdx, realType, content)
+
+ return recordIdx, nil
+}
diff --git a/format/packfile/ingest/thin_fix.go b/format/packfile/ingest/thin_fix.go
new file mode 100644
index 00000000..83e5572a
--- /dev/null
+++ b/format/packfile/ingest/thin_fix.go
@@ -0,0 +1,100 @@
+package ingest
+
+import (
+ "errors"
+ "fmt"
+
+ "codeberg.org/lindenii/furgit/internal/intconv"
+ "codeberg.org/lindenii/furgit/internal/progress"
+ objectstorer "codeberg.org/lindenii/furgit/object/storer"
+)
+
+// maybeFixThin appends missing bases and rewrites pack header/trailer when needed.
+func maybeFixThin(state *ingestState) error {
+ if len(state.unresolvedRefDeltas) == 0 {
+ return nil
+ }
+
+ writeProgressf(
+ state,
+ "fixing thin pack: %d unresolved bases\r",
+ len(state.unresolvedRefDeltas),
+ )
+
+ if !state.opts.FixThin {
+ return &ThinPackUnresolvedError{Count: len(state.unresolvedRefDeltas)}
+ }
+
+ if state.opts.Base == nil {
+ return &ThinPackUnresolvedError{Count: len(state.unresolvedRefDeltas)}
+ }
+
+ hashSize := int64(state.algo.Size())
+
+ info, err := state.packFile.Stat()
+ if err != nil {
+ return err
+ }
+
+ size := info.Size()
+ if size < hashSize {
+ return fmt.Errorf("packfile/ingest: pack too short to trim trailer")
+ }
+
+ newEnd := size - hashSize
+
+ err = state.packFile.Truncate(newEnd)
+ if err != nil {
+ return err
+ }
+
+ consumed, err := intconv.Int64ToUint64(newEnd)
+ if err != nil {
+ return err
+ }
+
+ state.stream.consumed = consumed
+
+ baseIDs := unresolvedThinBaseIDs(state)
+
+ total := len(baseIDs)
+ meter := progress.New(progress.Options{
+ Writer: state.opts.Progress,
+ Flush: state.opts.ProgressFlush,
+ Title: "fixing thin pack",
+ Total: uint64(total),
+ })
+ meter.Set(0, 0)
+
+ var appended uint64
+
+ for _, id := range baseIDs {
+ ty, content, err := state.opts.Base.ReadBytesContent(id)
+ if err != nil {
+ if errors.Is(err, objectstorer.ErrObjectNotFound) {
+ continue
+ }
+
+ return fmt.Errorf("packfile/ingest: read thin base %s: %w", id, err)
+ }
+
+ _, err = appendBaseObject(state, id, ty, content)
+ if err != nil {
+ return err
+ }
+
+ state.thinFixed = true
+
+ appended++
+ meter.Set(appended, 0)
+ }
+
+ err = rewritePackHeaderAndTrailer(state)
+ if err != nil {
+ return err
+ }
+
+ meter.Stop(fmt.Sprintf("appended %d/%d, done", appended, total))
+
+ return nil
+}
diff --git a/format/packfile/ingest/thin_unresolved.go b/format/packfile/ingest/thin_unresolved.go
new file mode 100644
index 00000000..757cc0e2
--- /dev/null
+++ b/format/packfile/ingest/thin_unresolved.go
@@ -0,0 +1,34 @@
+package ingest
+
+import (
+ "bytes"
+ "slices"
+
+ objectid "codeberg.org/lindenii/furgit/object/id"
+ objecttype "codeberg.org/lindenii/furgit/object/type"
+)
+
+// unresolvedThinBaseIDs returns sorted unique unresolved ref base IDs.
+func unresolvedThinBaseIDs(state *ingestState) []objectid.ObjectID {
+ seen := make(map[objectid.ObjectID]struct{})
+
+ for _, idx := range state.unresolvedRefDeltas {
+ record := state.records[idx]
+ if record.packedType != objecttype.TypeRefDelta {
+ continue
+ }
+
+ seen[record.baseObject] = struct{}{}
+ }
+
+ out := make([]objectid.ObjectID, 0, len(seen))
+ for id := range seen {
+ out = append(out, id)
+ }
+
+ slices.SortFunc(out, func(a, b objectid.ObjectID) int {
+ return bytes.Compare(a.RawBytes(), b.RawBytes())
+ })
+
+ return out
+}
diff --git a/format/packfile/ingest/trailer.go b/format/packfile/ingest/trailer.go
new file mode 100644
index 00000000..7a26a8f2
--- /dev/null
+++ b/format/packfile/ingest/trailer.go
@@ -0,0 +1,58 @@
+package ingest
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+)
+
+// finishAndFlushTrailer reads trailer hash bytes, verifies trailer checksum,
+// and optionally requires the source stream to hit EOF afterward.
+func (scanner *streamScanner) finishAndFlushTrailer(requireTrailingEOF bool) error {
+ if scanner.hashSize <= 0 {
+ return fmt.Errorf("packfile/ingest: invalid hash size")
+ }
+
+ trailer := make([]byte, scanner.hashSize)
+
+ scanner.hashEnabled = false
+
+ err := scanner.readFull(trailer)
+ if err != nil {
+ return &PackTrailerMismatchError{}
+ }
+
+ scanner.packTrailer = append(scanner.packTrailer[:0], trailer...)
+
+ if scanner.n-scanner.off > 0 {
+ return fmt.Errorf("packfile/ingest: pack has trailing garbage")
+ }
+
+ if !requireTrailingEOF {
+ computed := scanner.hash.Sum(nil)
+ if !bytes.Equal(computed, trailer) {
+ return &PackTrailerMismatchError{}
+ }
+
+ return nil
+ }
+
+ var probe [1]byte
+
+ n, err := scanner.Read(probe[:])
+ if n > 0 || err == nil {
+ return fmt.Errorf("packfile/ingest: pack has trailing garbage")
+ }
+
+ if !errors.Is(err, io.EOF) {
+ return err
+ }
+
+ computed := scanner.hash.Sum(nil)
+ if !bytes.Equal(computed, trailer) {
+ return &PackTrailerMismatchError{}
+ }
+
+ return nil
+}
diff --git a/format/packfile/ingest/use.go b/format/packfile/ingest/use.go
new file mode 100644
index 00000000..97f8757a
--- /dev/null
+++ b/format/packfile/ingest/use.go
@@ -0,0 +1,34 @@
+package ingest
+
+import (
+ "fmt"
+ "hash/crc32"
+)
+
+// use consumes n unread bytes and updates accounting/checksum state.
+func (scanner *streamScanner) use(n int) error {
+ if n < 0 || n > scanner.n-scanner.off {
+ return fmt.Errorf("packfile/ingest: invalid consume length %d", n)
+ }
+
+ if n == 0 {
+ return nil
+ }
+
+ chunk := scanner.buf[scanner.off : scanner.off+n]
+ if scanner.hashEnabled {
+ _, err := scanner.hash.Write(chunk)
+ if err != nil {
+ return err
+ }
+ }
+
+ if scanner.inEntryCRC {
+ scanner.entryCRC = crc32.Update(scanner.entryCRC, crc32.IEEETable, chunk)
+ }
+
+ scanner.off += n
+ scanner.consumed += uint64(n)
+
+ return nil
+}
diff --git a/format/packfile/object_type.go b/format/packfile/object_type.go
new file mode 100644
index 00000000..8382baa9
--- /dev/null
+++ b/format/packfile/object_type.go
@@ -0,0 +1,16 @@
+package packfile
+
+import objecttype "codeberg.org/lindenii/furgit/object/type"
+
+// IsBaseObjectType reports whether ty is one of the four canonical object
+// types encoded directly in pack entries.
+func IsBaseObjectType(ty objecttype.Type) bool {
+ switch ty {
+ case objecttype.TypeCommit, objecttype.TypeTree, objecttype.TypeBlob, objecttype.TypeTag:
+ return true
+ case objecttype.TypeInvalid, objecttype.TypeFuture, objecttype.TypeOfsDelta, objecttype.TypeRefDelta:
+ return false
+ default:
+ return false
+ }
+}
diff --git a/format/packfile/ofs.go b/format/packfile/ofs.go
new file mode 100644
index 00000000..4992a506
--- /dev/null
+++ b/format/packfile/ofs.go
@@ -0,0 +1,26 @@
+package packfile
+
+import "fmt"
+
+// ParseOfsDeltaDistance parses one ofs-delta backward distance.
+func ParseOfsDeltaDistance(buf []byte) (uint64, int, error) {
+ if len(buf) == 0 {
+ return 0, 0, fmt.Errorf("packfile: malformed ofs-delta distance")
+ }
+
+ b := buf[0]
+ dist := uint64(b & 0x7f)
+
+ consumed := 1
+ for b&0x80 != 0 {
+ if consumed >= len(buf) {
+ return 0, 0, fmt.Errorf("packfile: malformed ofs-delta distance")
+ }
+
+ b = buf[consumed]
+ consumed++
+ dist = ((dist + 1) << 7) + uint64(b&0x7f)
+ }
+
+ return dist, consumed, nil
+}