From 680d30bd77c4793fe5c1eaa05ad5217a2faee7c0 Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Sat, 21 Feb 2026 12:27:55 +0800 Subject: refstore/reftable: Add basic implementation --- refstore/reftable/TODO | 22 ++ refstore/reftable/lookup.go | 424 +++++++++++++++++++++++++++++++++++++ refstore/reftable/parse_helpers.go | 36 ++++ refstore/reftable/path.go | 8 + refstore/reftable/reftable_test.go | 172 +++++++++++++++ refstore/reftable/store.go | 266 +++++++++++++++++++++++ refstore/reftable/table.go | 231 ++++++++++++++++++++ 7 files changed, 1159 insertions(+) create mode 100644 refstore/reftable/TODO create mode 100644 refstore/reftable/lookup.go create mode 100644 refstore/reftable/parse_helpers.go create mode 100644 refstore/reftable/path.go create mode 100644 refstore/reftable/reftable_test.go create mode 100644 refstore/reftable/store.go create mode 100644 refstore/reftable/table.go diff --git a/refstore/reftable/TODO b/refstore/reftable/TODO new file mode 100644 index 00000000..70e0ea27 --- /dev/null +++ b/refstore/reftable/TODO @@ -0,0 +1,22 @@ +* One-time load -> immutable snapshots and such; publish only fully loaded + snapshots. +* Reload behavior for tables.list; read tables.list, open all listed tables, + and retry from the start if any listed table is missing during open. +* Snapshot staleness detection +* Resolve's block search should be restart aware. Then the linear scan is only + within the chosen restart window. +* Cache parsed index-block metadata per table and block offset. Reuse restart + offsets and decoded key boundaries across lookups to try to reduce repeated + parsing. +* Pin one snapshot for each high-level read call. ResolveFully should resolve + all symbolic hops against one snapshot and avoid repeated ensure/reload + checks per hop. +* Add lazy snapshot-derived caches for list-heavy operations. Build visible + merged refs and sorted names once per snapshot, and reuse for List and + Shorten. +* Make format parser stricter to match Git-level behavior. Validate unaligned + multi-ref-block tables require ref index, and enforce more precise + first-block/restart invariants where applicable. +* Improve parse and lookup diagnostics with contextual errors. Return + fmt.Errorf values that include table name, block offset, and record offset, + where possible. diff --git a/refstore/reftable/lookup.go b/refstore/reftable/lookup.go new file mode 100644 index 00000000..24f9adb5 --- /dev/null +++ b/refstore/reftable/lookup.go @@ -0,0 +1,424 @@ +package reftable + +import ( + "encoding/binary" + "fmt" + "strings" + + "codeberg.org/lindenii/furgit/objectid" +) + +// resolveRecord resolves one ref name inside a single table file. +func (table *tableFile) resolveRecord(name string) (recordValue, bool, error) { + if table.refIndexPos != 0 { + pos, ok, err := table.resolveRefBlockPosFromIndex(name, int(table.refIndexPos)) + if err != nil { + return recordValue{}, false, err + } + if !ok { + return recordValue{}, false, nil + } + return table.lookupInRefBlock(name, pos) + } + + // Without a ref index, fall back to scanning ref blocks in order. + pos := table.headerLen + for pos < table.refEnd { + for pos < table.refEnd && table.data[pos] == 0 { + pos++ + } + if pos >= table.refEnd { + break + } + if table.data[pos] != blockTypeRef { + return recordValue{}, false, fmt.Errorf("refstore/reftable: table %q: unexpected block type %q in ref section", table.name, table.data[pos]) + } + block, blockEnd, err := table.readBlockAt(pos) + if err != nil { + return recordValue{}, false, err + } + found, done, rec, err := lookupRecordInRefBlock(table, block, name) + if err != nil { + return recordValue{}, false, err + } + if found { + return rec, true, nil + } + if done { + return recordValue{}, false, nil + } + pos = table.nextBlockPos(blockEnd) + } + return recordValue{}, false, nil +} + +// resolveRefBlockPosFromIndex resolves a candidate ref block position via index blocks. +func (table *tableFile) resolveRefBlockPosFromIndex(name string, indexPos int) (int, bool, error) { + block, _, err := table.readBlockAt(indexPos) + if err != nil { + return 0, false, err + } + if block.blockType != blockTypeIndex { + return 0, false, fmt.Errorf("refstore/reftable: table %q: ref index root is not index block", table.name) + } + childPos, ok, err := lookupChildPosInIndexBlock(block, name) + if err != nil { + return 0, false, err + } + if !ok { + return 0, false, nil + } + if childPos < 0 || childPos >= len(table.data) { + return 0, false, fmt.Errorf("refstore/reftable: table %q: index child position out of range", table.name) + } + + childType := table.data[childPos] + switch childType { + case blockTypeRef: + return childPos, true, nil + case blockTypeIndex: + return table.resolveRefBlockPosFromIndex(name, childPos) + default: + return 0, false, fmt.Errorf("refstore/reftable: table %q: unexpected child block type %q", table.name, childType) + } +} + +// lookupInRefBlock searches one ref block by full ref name. +func (table *tableFile) lookupInRefBlock(name string, pos int) (recordValue, bool, error) { + block, _, err := table.readBlockAt(pos) + if err != nil { + return recordValue{}, false, err + } + if block.blockType != blockTypeRef { + return recordValue{}, false, fmt.Errorf("refstore/reftable: table %q: expected ref block at %d", table.name, pos) + } + found, _, rec, err := lookupRecordInRefBlock(table, block, name) + if err != nil { + return recordValue{}, false, err + } + return rec, found, nil +} + +// forEachRecord iterates all ref records in this table in lexical order. +func (table *tableFile) forEachRecord(fn func(name string, rec recordValue) error) error { + pos := table.headerLen + prevLast := "" + for pos < table.refEnd { + for pos < table.refEnd && table.data[pos] == 0 { + pos++ + } + if pos >= table.refEnd { + break + } + if table.data[pos] != blockTypeRef { + return fmt.Errorf("refstore/reftable: table %q: unexpected block type %q in ref section", table.name, table.data[pos]) + } + + block, blockEnd, err := table.readBlockAt(pos) + if err != nil { + return err + } + var first, last string + err = forEachRecordInRefBlock(table, block, func(name string, rec recordValue) error { + if first == "" { + first = name + } + last = name + return fn(name, rec) + }) + if err != nil { + return err + } + if prevLast != "" && first != "" && strings.Compare(first, prevLast) <= 0 { + return fmt.Errorf("refstore/reftable: table %q: ref blocks are not strictly ordered", table.name) + } + if last != "" { + prevLast = last + } + pos = table.nextBlockPos(blockEnd) + } + return nil +} + +// blockView is one decoded block boundary within the mapped table bytes. +type blockView struct { + blockType byte + start int + end int + first bool + payload []byte +} + +// readBlockAt validates and returns a block view starting at pos. +func (table *tableFile) readBlockAt(pos int) (blockView, int, error) { + if pos < 0 || pos+4 > len(table.data) { + return blockView{}, 0, fmt.Errorf("refstore/reftable: table %q: block header out of range", table.name) + } + blockLen := int(readUint24(table.data[pos+1 : pos+4])) + effectiveLen := blockLen + if pos == table.headerLen { + if blockLen < table.headerLen { + return blockView{}, 0, fmt.Errorf("refstore/reftable: table %q: invalid first block length", table.name) + } + effectiveLen = blockLen - table.headerLen + } + if effectiveLen < 4 { + return blockView{}, 0, fmt.Errorf("refstore/reftable: table %q: invalid block length", table.name) + } + end := pos + effectiveLen + if end > len(table.data) { + return blockView{}, 0, fmt.Errorf("refstore/reftable: table %q: block out of range", table.name) + } + view := blockView{blockType: table.data[pos], start: pos, end: end, first: pos == table.headerLen, payload: table.data[pos:end]} + return view, end, nil +} + +// nextBlockPos computes the next block start from current block end. +func (table *tableFile) nextBlockPos(blockEnd int) int { + if table.blockSize > 0 { + return alignUp(blockEnd, table.blockSize) + } + return blockEnd +} + +// lookupChildPosInIndexBlock selects a child block position for key. +func lookupChildPosInIndexBlock(block blockView, key string) (int, bool, error) { + off, recordsEnd, restarts, err := parseBlockLayout(block) + if err != nil { + return 0, false, err + } + if err := validateRestarts(block, restarts, off, recordsEnd, true); err != nil { + return 0, false, err + } + prev := "" + for off < recordsEnd { + name, v, nextOff, err := parseKeyedRecord(block.payload, off, recordsEnd, prev) + if err != nil { + return 0, false, err + } + if (v & 0x7) != 0 { + return 0, false, fmt.Errorf("index value_type must be 0") + } + childPos, nextOff, err := readVarint(block.payload, nextOff, recordsEnd) + if err != nil { + return 0, false, err + } + if strings.Compare(key, name) <= 0 { + if childPos > uint64(int(^uint(0)>>1)) { + return 0, false, fmt.Errorf("index child position overflows int") + } + return int(childPos), true, nil + } + prev = name + off = nextOff + } + if off != recordsEnd { + return 0, false, fmt.Errorf("malformed index block") + } + return 0, false, nil +} + +// lookupRecordInRefBlock searches one ref block and may short-circuit by sort order. +func lookupRecordInRefBlock(table *tableFile, block blockView, key string) (found bool, done bool, rec recordValue, err error) { + off, recordsEnd, restarts, err := parseBlockLayout(block) + if err != nil { + return false, false, recordValue{}, err + } + if err := validateRestarts(block, restarts, off, recordsEnd, true); err != nil { + return false, false, recordValue{}, err + } + prev := "" + for off < recordsEnd { + name, v, nextOff, err := parseKeyedRecord(block.payload, off, recordsEnd, prev) + if err != nil { + return false, false, recordValue{}, err + } + typeBits := byte(v & 0x7) + _, nextOff, err = readVarint(block.payload, nextOff, recordsEnd) + if err != nil { + return false, false, recordValue{}, err + } + recVal, nextOff, err := parseRefValue(block.payload, nextOff, recordsEnd, table.algo, typeBits) + if err != nil { + return false, false, recordValue{}, err + } + cmp := strings.Compare(name, key) + if cmp == 0 { + return true, true, recVal, nil + } + if cmp > 0 { + return false, true, recordValue{}, nil + } + prev = name + off = nextOff + } + if off != recordsEnd { + return false, false, recordValue{}, fmt.Errorf("malformed ref block") + } + return false, false, recordValue{}, nil +} + +// forEachRecordInRefBlock iterates all records in one ref block. +func forEachRecordInRefBlock(table *tableFile, block blockView, fn func(name string, rec recordValue) error) error { + off, recordsEnd, restarts, err := parseBlockLayout(block) + if err != nil { + return err + } + if err := validateRestarts(block, restarts, off, recordsEnd, true); err != nil { + return err + } + prev := "" + for off < recordsEnd { + name, v, nextOff, err := parseKeyedRecord(block.payload, off, recordsEnd, prev) + if err != nil { + return err + } + typeBits := byte(v & 0x7) + _, nextOff, err = readVarint(block.payload, nextOff, recordsEnd) + if err != nil { + return err + } + recVal, nextOff, err := parseRefValue(block.payload, nextOff, recordsEnd, table.algo, typeBits) + if err != nil { + return err + } + if err := fn(name, recVal); err != nil { + return err + } + prev = name + off = nextOff + } + if off != recordsEnd { + return fmt.Errorf("malformed ref block") + } + return nil +} + +// parseBlockLayout parses common record/restart regions for ref and index blocks. +func parseBlockLayout(block blockView) (recordsStart int, recordsEnd int, restarts []int, err error) { + if len(block.payload) < 6 { + return 0, 0, nil, fmt.Errorf("short block") + } + restartCount := int(binary.BigEndian.Uint16(block.payload[len(block.payload)-2:])) + if restartCount <= 0 { + return 0, 0, nil, fmt.Errorf("invalid restart count") + } + restarts = make([]int, restartCount) + restartBytes := restartCount * 3 + restartsStart := len(block.payload) - 2 - restartBytes + if restartsStart < 4 { + return 0, 0, nil, fmt.Errorf("invalid restart table") + } + for i := 0; i < restartCount; i++ { + off := restartsStart + i*3 + rel := int(readUint24(block.payload[off : off+3])) + base := block.start + if block.first { + // In the first block, restart offsets are relative to file start. + base = 0 + } + abs := base + rel + restarts[i] = abs - block.start + } + return 4, restartsStart, restarts, nil +} + +// validateRestarts validates restart monotonicity, bounds and record-prefix invariants. +func validateRestarts(block blockView, restarts []int, recordsStart, recordsEnd int, requirePrefixZero bool) error { + prev := -1 + for _, off := range restarts { + if off < recordsStart || off >= recordsEnd { + return fmt.Errorf("restart offset out of range") + } + if off <= prev { + return fmt.Errorf("restart offsets not strictly increasing") + } + prev = off + if requirePrefixZero { + prefix, _, err := readVarint(block.payload, off, recordsEnd) + if err != nil { + return err + } + if prefix != 0 { + return fmt.Errorf("restart record prefix length must be zero") + } + } + } + return nil +} + +// parseKeyedRecord parses one prefix-compressed key record header. +func parseKeyedRecord(buf []byte, off, end int, prev string) (name string, rawType uint64, next int, err error) { + prefixLen, next, err := readVarint(buf, off, end) + if err != nil { + return "", 0, 0, err + } + suffixAndType, next, err := readVarint(buf, next, end) + if err != nil { + return "", 0, 0, err + } + suffixLen := int(suffixAndType >> 3) + if suffixLen < 0 || next+suffixLen > end { + return "", 0, 0, fmt.Errorf("invalid suffix length") + } + if int(prefixLen) > len(prev) { + return "", 0, 0, fmt.Errorf("invalid prefix length") + } + name = prev[:prefixLen] + string(buf[next:next+suffixLen]) + next += suffixLen + if prev != "" && strings.Compare(name, prev) <= 0 { + return "", 0, 0, fmt.Errorf("keys not strictly increasing") + } + return name, suffixAndType, next, nil +} + +// parseRefValue parses one ref-record value payload according to value_type. +func parseRefValue(buf []byte, off, end int, algo objectid.Algorithm, valueType byte) (recordValue, int, error) { + switch valueType { + case 0x0: + return recordValue{deleted: true}, off, nil + case 0x1: + id, next, err := readObjectID(buf, off, end, algo) + if err != nil { + return recordValue{}, 0, err + } + return recordValue{detachedID: id, hasDetached: true}, next, nil + case 0x2: + id, next, err := readObjectID(buf, off, end, algo) + if err != nil { + return recordValue{}, 0, err + } + peeled, next, err := readObjectID(buf, next, end, algo) + if err != nil { + return recordValue{}, 0, err + } + peeledCopy := peeled + return recordValue{detachedID: id, hasDetached: true, peeled: &peeledCopy}, next, nil + case 0x3: + targetLen, next, err := readVarint(buf, off, end) + if err != nil { + return recordValue{}, 0, err + } + if targetLen > uint64(end-next) { + return recordValue{}, 0, fmt.Errorf("invalid symref target length") + } + target := string(buf[next : next+int(targetLen)]) + next += int(targetLen) + return recordValue{symbolicTarget: target}, next, nil + default: + return recordValue{}, 0, fmt.Errorf("unsupported ref value type %d", valueType) + } +} + +// readObjectID reads one object ID using the table algorithm width. +func readObjectID(buf []byte, off, end int, algo objectid.Algorithm) (objectid.ObjectID, int, error) { + sz := algo.Size() + if off < 0 || sz < 0 || off+sz > end { + return objectid.ObjectID{}, 0, fmt.Errorf("truncated object id") + } + id, err := objectid.FromBytes(algo, buf[off:off+sz]) + if err != nil { + return objectid.ObjectID{}, 0, err + } + return id, off + sz, nil +} diff --git a/refstore/reftable/parse_helpers.go b/refstore/reftable/parse_helpers.go new file mode 100644 index 00000000..b5da555e --- /dev/null +++ b/refstore/reftable/parse_helpers.go @@ -0,0 +1,36 @@ +package reftable + +import "fmt" + +// readUint24 reads a 24-bit big-endian unsigned integer. +func readUint24(b []byte) uint32 { + return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2]) +} + +// alignUp rounds pos up to the next multiple of blockSize. +func alignUp(pos, blockSize int) int { + rem := pos % blockSize + if rem == 0 { + return pos + } + return pos + (blockSize - rem) +} + +// readVarint decodes one reftable/ofs-delta style varint. +func readVarint(buf []byte, off, end int) (uint64, int, error) { + if off >= end { + return 0, 0, fmt.Errorf("unexpected EOF") + } + b := buf[off] + val := uint64(b & 0x7f) + off++ + for b&0x80 != 0 { + if off >= end { + return 0, 0, fmt.Errorf("unexpected EOF") + } + b = buf[off] + off++ + val = ((val + 1) << 7) | uint64(b&0x7f) + } + return val, off, nil +} diff --git a/refstore/reftable/path.go b/refstore/reftable/path.go new file mode 100644 index 00000000..3bdaf5cd --- /dev/null +++ b/refstore/reftable/path.go @@ -0,0 +1,8 @@ +package reftable + +import "path" + +// pathMatch applies path.Match to full ref names. +func pathMatch(pattern, name string) (bool, error) { + return path.Match(pattern, name) +} diff --git a/refstore/reftable/reftable_test.go b/refstore/reftable/reftable_test.go new file mode 100644 index 00000000..28201af2 --- /dev/null +++ b/refstore/reftable/reftable_test.go @@ -0,0 +1,172 @@ +package reftable_test + +import ( + "errors" + "os" + "path/filepath" + "slices" + "testing" + + "codeberg.org/lindenii/furgit/internal/testgit" + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/ref" + "codeberg.org/lindenii/furgit/refstore" + "codeberg.org/lindenii/furgit/refstore/reftable" +) + +// newBareReftableRepo creates a bare repository that uses reftable ref storage. +func newBareReftableRepo(tb testing.TB, algo objectid.Algorithm) *testgit.TestRepo { + tb.Helper() + return testgit.NewRepo(tb, algo, testgit.RepoOptions{Bare: true, RefFormat: "reftable"}) +} + +// openStore opens a reftable store against repoDir/reftable. +func openStore(tb testing.TB, repoDir string, algo objectid.Algorithm) *reftable.Store { + tb.Helper() + root, err := os.OpenRoot(filepath.Join(repoDir, "reftable")) + if err != nil { + tb.Fatalf("OpenRoot(reftable): %v", err) + } + tb.Cleanup(func() { _ = root.Close() }) + store, err := reftable.New(root, algo) + if err != nil { + tb.Fatalf("reftable.New: %v", err) + } + return store +} + +func TestResolveAndResolveFully(t *testing.T) { + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { + repo := newBareReftableRepo(t, algo) + _, _, id := repo.MakeCommit(t, "resolve") + repo.UpdateRef(t, "refs/heads/main", id) + repo.SymbolicRef(t, "HEAD", "refs/heads/main") + + store := openStore(t, repo.Dir(), algo) + head, err := store.Resolve("HEAD") + if err != nil { + t.Fatalf("Resolve(HEAD): %v", err) + } + sym, ok := head.(ref.Symbolic) + if !ok { + t.Fatalf("Resolve(HEAD) type = %T, want ref.Symbolic", head) + } + if sym.Target != "refs/heads/main" { + t.Fatalf("Resolve(HEAD) target = %q, want refs/heads/main", sym.Target) + } + + main, err := store.ResolveFully("HEAD") + if err != nil { + t.Fatalf("ResolveFully(HEAD): %v", err) + } + if main.ID != id { + t.Fatalf("ResolveFully(HEAD) id = %s, want %s", main.ID, id) + } + + if _, err := store.Resolve("refs/heads/missing"); !errors.Is(err, refstore.ErrReferenceNotFound) { + t.Fatalf("Resolve(missing) error = %v", err) + } + }) +} + +func TestResolveFullyCycle(t *testing.T) { + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { + repo := newBareReftableRepo(t, algo) + repo.SymbolicRef(t, "refs/heads/a", "refs/heads/b") + repo.SymbolicRef(t, "refs/heads/b", "refs/heads/a") + + store := openStore(t, repo.Dir(), algo) + if _, err := store.ResolveFully("refs/heads/a"); err == nil { + t.Fatalf("ResolveFully(cycle) expected error") + } + }) +} + +func TestListAndShorten(t *testing.T) { + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { + repo := newBareReftableRepo(t, algo) + _, _, id := repo.MakeCommit(t, "list") + repo.UpdateRef(t, "refs/heads/main", id) + repo.UpdateRef(t, "refs/heads/feature", id) + repo.UpdateRef(t, "refs/tags/main", id) + repo.UpdateRef(t, "refs/remotes/origin/main", id) + + store := openStore(t, repo.Dir(), algo) + all, err := store.List("") + if err != nil { + t.Fatalf("List(all): %v", err) + } + names := make([]string, 0, len(all)) + for _, entry := range all { + names = append(names, entry.Name()) + } + want := []string{"HEAD", "refs/heads/feature", "refs/heads/main", "refs/remotes/origin/main", "refs/tags/main"} + if !slices.Equal(names, want) { + t.Fatalf("List(all) = %v, want %v", names, want) + } + + heads, err := store.List("refs/heads/*") + if err != nil { + t.Fatalf("List(heads): %v", err) + } + headNames := make([]string, 0, len(heads)) + for _, entry := range heads { + headNames = append(headNames, entry.Name()) + } + wantHeads := []string{"refs/heads/feature", "refs/heads/main"} + if !slices.Equal(headNames, wantHeads) { + t.Fatalf("List(heads) = %v, want %v", headNames, wantHeads) + } + + short, err := store.Shorten("refs/remotes/origin/main") + if err != nil { + t.Fatalf("Shorten(remote): %v", err) + } + if short != "origin/main" { + t.Fatalf("Shorten(remote) = %q, want origin/main", short) + } + }) +} + +func TestTombstoneNewestWins(t *testing.T) { + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { + repo := newBareReftableRepo(t, algo) + _, _, oldID := repo.MakeCommit(t, "old") + repo.UpdateRef(t, "refs/heads/main", oldID) + _, _, newID := repo.MakeCommit(t, "new") + repo.UpdateRef(t, "refs/heads/main", newID) + repo.DeleteRef(t, "refs/heads/main") + + store := openStore(t, repo.Dir(), algo) + if _, err := store.Resolve("refs/heads/main"); !errors.Is(err, refstore.ErrReferenceNotFound) { + t.Fatalf("Resolve(main) after delete error = %v", err) + } + }) +} + +func TestAnnotatedTagPeeled(t *testing.T) { + testgit.ForEachAlgorithm(t, func(t *testing.T, algo objectid.Algorithm) { + repo := newBareReftableRepo(t, algo) + _, _, commitID := repo.MakeCommit(t, "tagged") + tagID := repo.TagAnnotated(t, "v1.0.0", commitID, "annotated") + + store := openStore(t, repo.Dir(), algo) + resolved, err := store.Resolve("refs/tags/v1.0.0") + if err != nil { + t.Fatalf("Resolve(tag): %v", err) + } + detached, ok := resolved.(ref.Detached) + if !ok { + t.Fatalf("Resolve(tag) type = %T, want ref.Detached", resolved) + } + if detached.ID != tagID { + t.Fatalf("Resolve(tag) id = %s, want %s", detached.ID, tagID) + } + if detached.Peeled == nil { + t.Fatalf("Resolve(tag) peeled = nil") + } + if *detached.Peeled != commitID { + t.Fatalf("Resolve(tag) peeled = %s, want %s", *detached.Peeled, commitID) + } + }) +} diff --git a/refstore/reftable/store.go b/refstore/reftable/store.go new file mode 100644 index 00000000..df874c69 --- /dev/null +++ b/refstore/reftable/store.go @@ -0,0 +1,266 @@ +// Package reftable provides read access to Git reftable reference storage. +// This store is experimental, has many issues, and should not be used in any serious capacity for now. +package reftable + +import ( + "errors" + "os" + "sort" + "strings" + "sync" + + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/ref" + "codeberg.org/lindenii/furgit/refstore" +) + +// Store reads references from a reftable stack rooted at $GIT_DIR/reftable. +// +// Store does not own root. Callers are responsible for closing root. +type Store struct { + // root is the reftable directory capability. + root *os.Root + // algo is the repository object ID algorithm. + algo objectid.Algorithm + + // loadOnce ensures tables.list and table handles are loaded once. + loadOnce sync.Once + // loadErr stores loading failure from loadOnce. + loadErr error + + // stateMu guards tables publication and close transitions. + stateMu sync.RWMutex + // tables are loaded in oldest-to-newest order from tables.list. + tables []*tableFile + // closed reports whether Close has been called. + closed bool +} + +var _ refstore.Store = (*Store)(nil) + +// New creates a read-only reftable store rooted at $GIT_DIR/reftable. +func New(root *os.Root, algo objectid.Algorithm) (*Store, error) { + if algo.Size() == 0 { + return nil, objectid.ErrInvalidAlgorithm + } + return &Store{root: root, algo: algo}, nil +} + +// Close releases mapped table resources associated with this store. +func (store *Store) Close() error { + store.stateMu.Lock() + if store.closed { + store.stateMu.Unlock() + return nil + } + store.closed = true + tables := store.tables + store.tables = nil + store.stateMu.Unlock() + + var closeErr error + for _, table := range tables { + if table == nil { + continue + } + if err := table.close(); err != nil && closeErr == nil { + closeErr = err + } + } + return closeErr +} + +// Resolve resolves a reference name to either a symbolic or detached ref. +func (store *Store) Resolve(name string) (ref.Ref, error) { + tables, err := store.ensureTables() + if err != nil { + return nil, err + } + for i := len(tables) - 1; i >= 0; i-- { + rec, found, err := tables[i].resolveRecord(name) + if err != nil { + return nil, err + } + if !found { + continue + } + if rec.deleted { + return nil, refstore.ErrReferenceNotFound + } + resolved, err := rec.toRef(name) + if err != nil { + return nil, err + } + return resolved, nil + } + return nil, refstore.ErrReferenceNotFound +} + +// ResolveFully resolves symbolic references until it reaches a detached value. +// +// ResolveFully resolves symbolic references only. It does not imply peeling +// annotated tag objects. +func (store *Store) ResolveFully(name string) (ref.Detached, error) { + seen := map[string]struct{}{} + cur := name + for { + if _, exists := seen[cur]; exists { + return ref.Detached{}, errors.New("refstore/reftable: symbolic reference cycle") + } + seen[cur] = struct{}{} + resolved, err := store.Resolve(cur) + if err != nil { + return ref.Detached{}, err + } + switch resolved := resolved.(type) { + case ref.Detached: + return resolved, nil + case ref.Symbolic: + if resolved.Target == "" { + return ref.Detached{}, errors.New("refstore/reftable: symbolic reference has empty target") + } + cur = resolved.Target + default: + return ref.Detached{}, errors.New("refstore/reftable: unsupported reference type") + } + } +} + +// List returns references matching pattern. +// +// Pattern uses path.Match syntax against full reference names. +// Empty pattern matches all references. +func (store *Store) List(pattern string) ([]ref.Ref, error) { + tables, err := store.ensureTables() + if err != nil { + return nil, err + } + visible := make(map[string]ref.Ref) + masked := make(map[string]struct{}) + + for i := len(tables) - 1; i >= 0; i-- { + if err := tables[i].forEachRecord(func(name string, rec recordValue) error { + if _, done := masked[name]; done { + return nil + } + masked[name] = struct{}{} + if rec.deleted { + return nil + } + resolved, err := rec.toRef(name) + if err != nil { + return err + } + visible[name] = resolved + return nil + }); err != nil { + return nil, err + } + } + + matchAll := pattern == "" + if !matchAll { + if _, err := pathMatch(pattern, "refs/heads/main"); err != nil { + return nil, err + } + } + + names := make([]string, 0, len(visible)) + for name := range visible { + names = append(names, name) + } + sort.Strings(names) + + out := make([]ref.Ref, 0, len(names)) + for _, name := range names { + if !matchAll { + ok, err := pathMatch(pattern, name) + if err != nil { + return nil, err + } + if !ok { + continue + } + } + out = append(out, visible[name]) + } + return out, nil +} + +// Shorten returns the shortest unambiguous shorthand for a full reference name. +func (store *Store) Shorten(name string) (string, error) { + refs, err := store.List("") + if err != nil { + return "", err + } + names := make([]string, 0, len(refs)) + found := false + for _, entry := range refs { + if entry == nil { + continue + } + full := entry.Name() + names = append(names, full) + if full == name { + found = true + } + } + if !found { + return "", refstore.ErrReferenceNotFound + } + return refstore.ShortenName(name, names), nil +} + +// ensureTables loads and validates tables listed by tables.list once. +func (store *Store) ensureTables() ([]*tableFile, error) { + store.loadOnce.Do(func() { + tables, err := store.loadTables() + store.stateMu.Lock() + store.tables = tables + store.loadErr = err + store.stateMu.Unlock() + }) + + store.stateMu.RLock() + defer store.stateMu.RUnlock() + if store.closed { + return nil, errors.New("refstore/reftable: store is closed") + } + return store.tables, store.loadErr +} + +// loadTables reads tables.list and opens all listed tables. +func (store *Store) loadTables() ([]*tableFile, error) { + listRaw, err := store.root.ReadFile("tables.list") + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + return nil, err + } + lines := strings.Split(string(listRaw), "\n") + names := make([]string, 0, len(lines)) + for _, line := range lines { + line = strings.TrimSuffix(line, "\r") + if line == "" { + continue + } + if strings.Contains(line, "/") { + return nil, errors.New("refstore/reftable: invalid table name") + } + names = append(names, line) + } + + out := make([]*tableFile, 0, len(names)) + for _, name := range names { + table, err := openTableFile(store.root, name, store.algo) + if err != nil { + for _, opened := range out { + _ = opened.close() + } + return nil, err + } + out = append(out, table) + } + return out, nil +} diff --git a/refstore/reftable/table.go b/refstore/reftable/table.go new file mode 100644 index 00000000..bbef1957 --- /dev/null +++ b/refstore/reftable/table.go @@ -0,0 +1,231 @@ +package reftable + +import ( + "encoding/binary" + "errors" + "fmt" + "hash/crc32" + "os" + "syscall" + + "codeberg.org/lindenii/furgit/objectid" + "codeberg.org/lindenii/furgit/ref" +) + +const ( + reftableMagic = "REFT" + + version1 = 1 + version2 = 2 + + blockTypeRef = byte('r') + blockTypeIndex = byte('i') +) + +var ( + hashIDSHA1 = binary.BigEndian.Uint32([]byte("sha1")) + hashIDSHA256 = binary.BigEndian.Uint32([]byte("s256")) +) + +// tableFile is one opened and mapped reftable file. +type tableFile struct { + // name is the table filename from tables.list. + name string + // algo is the expected object ID algorithm. + algo objectid.Algorithm + + // file is the opened table file. + file *os.File + // data is the mapped table bytes. + data []byte + + // headerLen is 24 for v1 or 28 for v2. + headerLen int + // blockSize is configured alignment; 0 means unaligned. + blockSize int + + // refEnd is the exclusive end of ref blocks section. + refEnd int + // refIndexPos is the root ref-index block position, or 0 when absent. + refIndexPos uint64 +} + +// recordValue is one decoded reference record value. +type recordValue struct { + // deleted marks a tombstone record. + deleted bool + // detachedID stores a direct object ID for detached refs. + detachedID objectid.ObjectID + // hasDetached reports whether detachedID is valid. + hasDetached bool + // peeled stores an optional peeled ID for annotated tags. + peeled *objectid.ObjectID + // symbolicTarget stores symref target for symbolic refs. + symbolicTarget string +} + +// openTableFile maps and validates one reftable file. +func openTableFile(root *os.Root, name string, algo objectid.Algorithm) (*tableFile, error) { + file, err := root.Open(name) + if err != nil { + return nil, err + } + info, err := file.Stat() + if err != nil { + _ = file.Close() + return nil, err + } + size := info.Size() + if size < 0 || size > int64(int(^uint(0)>>1)) { + _ = file.Close() + return nil, fmt.Errorf("refstore/reftable: table %q has unsupported size", name) + } + data, err := syscall.Mmap(int(file.Fd()), 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE) + if err != nil { + _ = file.Close() + return nil, err + } + out := &tableFile{name: name, algo: algo, file: file, data: data} + if err := out.parseMeta(); err != nil { + _ = out.close() + return nil, err + } + return out, nil +} + +// close unmaps and closes one table file. +func (table *tableFile) close() error { + var closeErr error + if table.data != nil { + if err := syscall.Munmap(table.data); err != nil && closeErr == nil { + closeErr = err + } + table.data = nil + } + if table.file != nil { + if err := table.file.Close(); err != nil && closeErr == nil { + closeErr = err + } + table.file = nil + } + return closeErr +} + +// parseMeta validates header/footer and section boundaries. +func (table *tableFile) parseMeta() error { + if len(table.data) < 24 { + return fmt.Errorf("refstore/reftable: table %q: file too short", table.name) + } + if string(table.data[:4]) != reftableMagic { + return fmt.Errorf("refstore/reftable: table %q: bad magic", table.name) + } + version := table.data[4] + switch version { + case version1: + table.headerLen = 24 + if table.algo != objectid.AlgorithmSHA1 { + return fmt.Errorf("refstore/reftable: table %q: version 1 requires sha1", table.name) + } + case version2: + table.headerLen = 28 + if len(table.data) < table.headerLen { + return fmt.Errorf("refstore/reftable: table %q: truncated header", table.name) + } + hashID := binary.BigEndian.Uint32(table.data[24:28]) + if err := validateHashID(hashID, table.algo); err != nil { + return fmt.Errorf("refstore/reftable: table %q: %w", table.name, err) + } + default: + return fmt.Errorf("refstore/reftable: table %q: unsupported version %d", table.name, version) + } + table.blockSize = int(readUint24(table.data[5:8])) + + footerLen := 68 + if version == version2 { + footerLen = 72 + } + if len(table.data) < footerLen { + return fmt.Errorf("refstore/reftable: table %q: missing footer", table.name) + } + footerStart := len(table.data) - footerLen + footer := table.data[footerStart:] + if string(footer[:4]) != reftableMagic || footer[4] != version { + return fmt.Errorf("refstore/reftable: table %q: invalid footer header", table.name) + } + wantCRC := binary.BigEndian.Uint32(footer[footerLen-4:]) + haveCRC := crc32.ChecksumIEEE(footer[:footerLen-4]) + if wantCRC != haveCRC { + return fmt.Errorf("refstore/reftable: table %q: footer crc mismatch", table.name) + } + if version == version2 { + hashID := binary.BigEndian.Uint32(footer[24:28]) + if err := validateHashID(hashID, table.algo); err != nil { + return fmt.Errorf("refstore/reftable: table %q: %w", table.name, err) + } + } + + off := table.headerLen + table.refIndexPos = binary.BigEndian.Uint64(footer[off : off+8]) + off += 8 + objPosAndLen := binary.BigEndian.Uint64(footer[off : off+8]) + off += 8 + objPos := objPosAndLen >> 5 + objIndexPos := binary.BigEndian.Uint64(footer[off : off+8]) + off += 8 + logPos := binary.BigEndian.Uint64(footer[off : off+8]) + off += 8 + logIndexPos := binary.BigEndian.Uint64(footer[off : off+8]) + _ = objIndexPos + _ = logIndexPos + + refEnd := uint64(footerStart) + if table.refIndexPos != 0 && table.refIndexPos < refEnd { + refEnd = table.refIndexPos + } + if objPos != 0 && objPos < refEnd { + refEnd = objPos + } + if logPos != 0 && logPos < refEnd { + refEnd = logPos + } + if refEnd < uint64(table.headerLen) || refEnd > uint64(len(table.data)) { + return fmt.Errorf("refstore/reftable: table %q: invalid ref section", table.name) + } + if table.refIndexPos > uint64(len(table.data)) { + return fmt.Errorf("refstore/reftable: table %q: invalid ref index position", table.name) + } + table.refEnd = int(refEnd) + return nil +} + +// validateHashID validates a reftable v2 hash identifier. +func validateHashID(hashID uint32, algo objectid.Algorithm) error { + switch hashID { + case hashIDSHA1: + if algo != objectid.AlgorithmSHA1 { + return errors.New("hash id sha1 mismatch") + } + return nil + case hashIDSHA256: + if algo != objectid.AlgorithmSHA256 { + return errors.New("hash id s256 mismatch") + } + return nil + default: + return fmt.Errorf("unknown hash id 0x%08x", hashID) + } +} + +// toRef converts a decoded record value into a public ref value. +func (record recordValue) toRef(name string) (ref.Ref, error) { + if record.deleted { + return nil, errors.New("refstore/reftable: cannot materialize deleted record") + } + if record.symbolicTarget != "" { + return ref.Symbolic{RefName: name, Target: record.symbolicTarget}, nil + } + if !record.hasDetached { + return nil, errors.New("refstore/reftable: malformed detached record") + } + return ref.Detached{RefName: name, ID: record.detachedID, Peeled: record.peeled}, nil +} -- cgit v1.3.1-10-gc9f91