From 55ea743f2d840711ac44e5a015303cd16fcf347d Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Sun, 14 Jun 2026 13:34:51 +0000 Subject: cmd/explain-pack: Add --- cmd/explain-pack/delta.go | 120 +++++++++++++++++++++ cmd/explain-pack/doc.go | 10 ++ cmd/explain-pack/entry.go | 257 ++++++++++++++++++++++++++++++++++++++++++++ cmd/explain-pack/fmt.go | 30 ++++++ cmd/explain-pack/main.go | 240 +++++++++++++++++++++++++++++++++++++++++ cmd/explain-pack/resolve.go | 161 +++++++++++++++++++++++++++ 6 files changed, 818 insertions(+) create mode 100644 cmd/explain-pack/delta.go create mode 100644 cmd/explain-pack/doc.go create mode 100644 cmd/explain-pack/entry.go create mode 100644 cmd/explain-pack/fmt.go create mode 100644 cmd/explain-pack/main.go create mode 100644 cmd/explain-pack/resolve.go (limited to 'cmd/explain-pack') diff --git a/cmd/explain-pack/delta.go b/cmd/explain-pack/delta.go new file mode 100644 index 00000000..22e32195 --- /dev/null +++ b/cmd/explain-pack/delta.go @@ -0,0 +1,120 @@ +package main + +import ( + "fmt" +) + +func (explainer *explainer) walkDelta(base, payload []byte, pos int) ([]byte, bool, error) { + explainer.printf("\tdelta\n") + + building := base != nil + + var result []byte + + insn := 0 + + for pos < len(payload) { + op := payload[pos] + pos++ + insn++ + + switch { + case op&0x80 != 0: + next, seg, err := explainer.decodeCopy(base, payload, pos, op) + if err != nil { + return nil, false, err + } + + pos = next + + if building { + result = append(result, seg...) + } + case op != 0: + next, lit, err := explainer.decodeInsert(payload, pos, int(op)) + if err != nil { + return nil, false, err + } + + pos = next + + if building { + result = append(result, lit...) + } + default: + explainer.printf("\t\tinvalid opcode 0x00; stopping delta decode\n") + + return nil, false, nil + } + } + + if !building { + return nil, false, nil + } + + return result, true, nil +} + +func (explainer *explainer) decodeCopy(base, payload []byte, pos int, op byte) (int, []byte, error) { + offset := 0 + + for i := range 4 { + if op&(1<= len(payload) { + return 0, nil, fmt.Errorf("truncated copy offset") + } + + offset |= int(payload[pos]) << (8 * uint(i)) + pos++ + } + + size := 0 + + for i := range 3 { + if op&(1<= len(payload) { + return 0, nil, fmt.Errorf("truncated copy size") + } + + size |= int(payload[pos]) << (8 * uint(i)) + pos++ + } + + if size == 0 { + size = 0x10000 + } + + explainer.printf("\t\tcpy %d from %d\n", size, offset) + + if base == nil { + return pos, nil, nil + } + + if offset < 0 || offset+size > len(base) { + return 0, nil, fmt.Errorf("copy of %d byte(s) from base offset %d exceeds base length %d", size, offset, len(base)) + } + + seg := base[offset : offset+size] + hexBlock(explainer.out, "\t\t\t", seg) + + return pos, seg, nil +} + +func (explainer *explainer) decodeInsert(payload []byte, pos, n int) (int, []byte, error) { + if pos+n > len(payload) { + return 0, nil, fmt.Errorf("truncated insert payload") + } + + lit := payload[pos : pos+n] + + explainer.printf("\t\tins %d\n", n) + hexBlock(explainer.out, "\t\t\t", lit) + + return pos + n, lit, nil +} diff --git a/cmd/explain-pack/doc.go b/cmd/explain-pack/doc.go new file mode 100644 index 00000000..f5fcc986 --- /dev/null +++ b/cmd/explain-pack/doc.go @@ -0,0 +1,10 @@ +// Command explain-pack reads a Git packfile and writes a +// human-readable explanation to stdout. +// +// With a pack filename argument +// the pack is mmap'd +// and a sibling .idx is used when present; +// with no argument the pack is read from stdin. +// A packfile does not record its object format, +// so the format must be given with -format. +package main diff --git a/cmd/explain-pack/entry.go b/cmd/explain-pack/entry.go new file mode 100644 index 00000000..0b796ef0 --- /dev/null +++ b/cmd/explain-pack/entry.go @@ -0,0 +1,257 @@ +package main + +import ( + "bytes" + "fmt" + "io" + + "lindenii.org/go/furgit/internal/compress/zlib" + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/internal/format/packfile/delta" + "lindenii.org/go/furgit/object/tree" + "lindenii.org/go/lgo/intconv" +) + +func (explainer *explainer) explainEntry(num, count, cursor int) (int, error) { + hashSize := explainer.objectFormat.Size() + + header, err := packfile.ParseEntryHeader(explainer.data[cursor:], hashSize) + if err != nil { + return 0, fmt.Errorf("entry %d at offset %d: %w", num, cursor, err) + } + + payloadStart := cursor + header.HeaderLen + if payloadStart > len(explainer.data) { + return 0, fmt.Errorf("entry %d at offset %d: header runs past the end of the pack", num, cursor) + } + + payload, consumed, err := inflateAt(explainer.data[payloadStart:]) + if err != nil { + return 0, fmt.Errorf("entry %d at offset %d: %w", num, cursor, err) + } + + next := payloadStart + consumed + + explainer.printf("object %d of %d\n", num, count) + explainer.printf("\tty\t%s\n", entryTypeLabel(header.Type)) + explainer.printf("\tofs\t%d\n", cursor) + explainer.printf("\thdrsz\t%d\n", header.HeaderLen) + explainer.printf("\tsz\t%d\n", header.Size) + + if uint64(len(payload)) != header.Size { + explainer.printf("\tnote\tdeclared %d byte(s) but inflated to %d\n", header.Size, len(payload)) + } + + if header.Type.IsBase() { + err = explainer.renderBase(cursor, header.Type, payload, consumed) + } else { + err = explainer.renderDelta(cursor, header, payload, consumed) + } + + if err != nil { + return 0, fmt.Errorf("entry %d at offset %d: %w", num, cursor, err) + } + + explainer.printf("\n") + + return next, nil +} + +func (explainer *explainer) renderBase(cursor int, entryType packfile.EntryType, content []byte, consumed int) error { + explainer.renderContent(entryType, content) + + explainer.printf("\tzlib\t%d\n", consumed) + + oid, err := explainer.recomputeOID(entryType, content) + if err != nil { + return err + } + + explainer.printf("\toid\t%s\n", oid) + + explainer.oidIndex[oid] = cursor + explainer.cache.Add(cursor, resolvedBase{entryType: entryType, content: content}) + + return nil +} + +func (explainer *explainer) renderDelta(cursor int, header packfile.EntryHeader, payload []byte, consumed int) error { + baseSize, resultSize, pos, err := delta.ParseHeaderSizes(payload) + if err != nil { + return fmt.Errorf("delta header: %w", err) + } + + err = explainer.renderBaseRef(cursor, header) + if err != nil { + return err + } + + explainer.printf("\tbasesz\t%d\n", baseSize) + explainer.printf("\tnewsz\t%d\n", resultSize) + + baseOffset, located, err := explainer.baseOffset(cursor, header) + if err != nil { + return err + } + + var ( + baseType packfile.EntryType + baseContent []byte + baseResolved bool + ) + + if located { + baseType, baseContent, baseResolved, err = explainer.reconstruct(baseOffset, 0) + if err != nil { + return err + } + } + + var walkBase []byte + if baseResolved { + walkBase = baseContent + } + + result, complete, err := explainer.walkDelta(walkBase, payload, pos) + if err != nil { + return err + } + + explainer.printf("\tzlib\t%d\n", consumed) + + switch { + case baseResolved && complete: + if uint64(len(result)) != resultSize { + explainer.printf("\tnote\tdelta produced %d byte(s) but declared %d\n", len(result), resultSize) + } + + explainer.renderContent(baseType, result) + + newOID, err := explainer.recomputeOID(baseType, result) + if err != nil { + return err + } + + explainer.printf("\tnewoid\t%s\n", newOID) + + explainer.oidIndex[newOID] = cursor + explainer.cache.Add(cursor, resolvedBase{entryType: baseType, content: result}) + case !baseResolved: + explainer.printf("\tnote\tbase not available in this pack; cannot reconstruct\n") + default: + explainer.printf("\tnote\tdelta decode incomplete; cannot reconstruct\n") + } + + return nil +} + +func (explainer *explainer) renderBaseRef(cursor int, header packfile.EntryHeader) error { + switch header.Type { + case packfile.EntryTypeOfsDelta: + dist, err := intconv.Uint64ToInt(header.OfsDistance) + if err != nil { + return fmt.Errorf("ofs-delta distance overflows int: %w", err) + } + + explainer.printf("\tbaseofs\t-%d = %d\n", dist, cursor-dist) + case packfile.EntryTypeRefDelta: + baseID, err := explainer.objectFormat.FromBytes(header.RefBase[:explainer.objectFormat.Size()]) + if err != nil { + return fmt.Errorf("ref-delta base ID: %w", err) + } + + explainer.printf("\tbaseoid\t%s\n", baseID) + case packfile.EntryTypeInvalid, + packfile.EntryTypeCommit, + packfile.EntryTypeTree, + packfile.EntryTypeBlob, + packfile.EntryTypeTag, + packfile.EntryTypeFuture: + } + + return nil +} + +func (explainer *explainer) renderContent(entryType packfile.EntryType, content []byte) { + switch entryType { + case packfile.EntryTypeCommit, packfile.EntryTypeTag: + explainer.printf("\tcontent\n") + indentBlock(explainer.out, "\t\t", content) + case packfile.EntryTypeTree: + explainer.renderTree(content) + case packfile.EntryTypeBlob, + packfile.EntryTypeOfsDelta, + packfile.EntryTypeRefDelta, + packfile.EntryTypeInvalid, + packfile.EntryTypeFuture: + explainer.printf("\thexdump\n") + hexBlock(explainer.out, "\t\t", content) + } +} + +func (explainer *explainer) renderTree(content []byte) { + parsed, err := tree.Parse(content, explainer.objectFormat) + if err != nil { + explainer.printf("\thexdump\t(not a valid tree: %v)\n", err) + hexBlock(explainer.out, "\t\t", content) + + return + } + + explainer.printf("\ttree\n") + + for _, entry := range parsed.Entries() { + mode := string(entry.Mode.Append(nil)) + explainer.printf( + "\t\t%s %s %s\t%s\n", + mode, entry.Mode.ObjectType().Name(), entry.ID, entry.Name, + ) + } +} + +func inflateAt(data []byte) ([]byte, int, error) { + reader := bytes.NewReader(data) + + zr, err := zlib.NewReader(reader) + if err != nil { + return nil, 0, fmt.Errorf("opening zlib stream: %w", err) + } + + content, err := io.ReadAll(zr) + closeErr := zr.Close() + + if err != nil { + return nil, 0, fmt.Errorf("inflating payload: %w", err) + } + + if closeErr != nil { + return nil, 0, fmt.Errorf("closing zlib stream: %w", closeErr) + } + + consumed := len(data) - reader.Len() + + return content, consumed, nil +} + +func entryTypeLabel(entryType packfile.EntryType) string { + switch entryType { + case packfile.EntryTypeCommit: + return "commit" + case packfile.EntryTypeTree: + return "tree" + case packfile.EntryTypeBlob: + return "blob" + case packfile.EntryTypeTag: + return "tag" + case packfile.EntryTypeOfsDelta: + return "ofs-delta" + case packfile.EntryTypeRefDelta: + return "ref-delta" + case packfile.EntryTypeInvalid: + return "invalid" + case packfile.EntryTypeFuture: + return "future" + default: + return fmt.Sprintf("unknown (%d)", entryType) + } +} diff --git a/cmd/explain-pack/fmt.go b/cmd/explain-pack/fmt.go new file mode 100644 index 00000000..a3d1b333 --- /dev/null +++ b/cmd/explain-pack/fmt.go @@ -0,0 +1,30 @@ +package main + +import ( + "bytes" + "encoding/hex" + "io" + + "lindenii.org/go/furgit/internal/utils" +) + +func indentBlock(out io.Writer, indent string, block []byte) { + lines := bytes.Split(block, []byte("\n")) + if n := len(lines); n > 0 && len(lines[n-1]) == 0 { + lines = lines[:n-1] + } + + for _, line := range lines { + utils.BestEffortFprintf(out, "%s%s\n", indent, line) + } +} + +func hexBlock(out io.Writer, indent string, data []byte) { + var buf bytes.Buffer + + dumper := hex.Dumper(&buf) + _, _ = dumper.Write(data) + _ = dumper.Close() + + indentBlock(out, indent, buf.Bytes()) +} diff --git a/cmd/explain-pack/main.go b/cmd/explain-pack/main.go new file mode 100644 index 00000000..af5b7480 --- /dev/null +++ b/cmd/explain-pack/main.go @@ -0,0 +1,240 @@ +package main + +import ( + "bufio" + "bytes" + "encoding/hex" + "flag" + "fmt" + "io" + "os" + "strings" + + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/internal/format/packidx" + "lindenii.org/go/furgit/internal/mmap" + "lindenii.org/go/furgit/internal/utils" + "lindenii.org/go/furgit/object/id" +) + +func main() { + format := flag.String("format", "", "object format of the pack: sha1 or sha256 (required)") + + flag.Parse() + + err := run(*format, flag.Args(), os.Stdin, os.Stdout) + if err != nil { + fmt.Fprintln(os.Stderr, "explain-pack:", err) + os.Exit(1) + } +} + +type explainer struct { + data []byte + objectFormat id.ObjectFormat + out *bufio.Writer + + idx *packidx.Packidx + + cache *baseCache + oidIndex map[id.ObjectID]int +} + +func run(format string, args []string, stdin io.Reader, stdout io.Writer) error { + if format == "" { + return fmt.Errorf("the -format flag is required (sha1 or sha256)") + } + + objectFormat, err := id.ParseObjectFormat(format) + if err != nil { + return fmt.Errorf("invalid -format %q: %w", format, err) + } + + if len(args) > 1 { + return fmt.Errorf("at most one pack file argument is accepted, got %d", len(args)) + } + + data, idx, closers, err := openInput(args, objectFormat, stdin) + if err != nil { + return err + } + + defer func() { + for _, c := range closers { + _ = c.Close() + } + }() + + out := bufio.NewWriter(stdout) + + explainer := &explainer{ + data: data, + objectFormat: objectFormat, + out: out, + idx: idx, + cache: newBaseCache(), + oidIndex: make(map[id.ObjectID]int), + } + + err = explainer.explain() + if err != nil { + return err + } + + return out.Flush() +} + +func openInput(args []string, objectFormat id.ObjectFormat, stdin io.Reader) ([]byte, *packidx.Packidx, []io.Closer, error) { + if len(args) == 0 { + data, err := io.ReadAll(stdin) + if err != nil { + return nil, nil, nil, fmt.Errorf("reading pack from stdin: %w", err) + } + + return data, nil, nil, nil + } + + packPath := args[0] + + packMapping, err := mapPath(packPath) + if err != nil { + return nil, nil, nil, err + } + + closers := []io.Closer{packMapping} + + idx, idxMapping, err := openIndex(packPath, objectFormat) + if err != nil { + _ = packMapping.Close() + + return nil, nil, nil, err + } + + if idxMapping != nil { + closers = append(closers, idxMapping) + } + + return packMapping.Data(), idx, closers, nil +} + +func openIndex(packPath string, objectFormat id.ObjectFormat) (*packidx.Packidx, *mmap.Mmap, error) { + idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx" + + file, err := os.Open(idxPath) //#nosec G304 + if err != nil { + if os.IsNotExist(err) { + return nil, nil, nil + } + + return nil, nil, fmt.Errorf("opening index %q: %w", idxPath, err) + } + + defer func() { _ = file.Close() }() + + mapping, err := mmap.Open(file) + if err != nil { + return nil, nil, fmt.Errorf("mapping index %q: %w", idxPath, err) + } + + idx, err := packidx.Parse(mapping.Data(), objectFormat.Size()) + if err != nil { + _ = mapping.Close() + + return nil, nil, fmt.Errorf("parsing index %q: %w", idxPath, err) + } + + return &idx, mapping, nil +} + +func mapPath(path string) (*mmap.Mmap, error) { + file, err := os.Open(path) //#nosec G304 + if err != nil { + return nil, fmt.Errorf("opening pack %q: %w", path, err) + } + + defer func() { _ = file.Close() }() + + mapping, err := mmap.Open(file) + if err != nil { + return nil, fmt.Errorf("mapping pack %q: %w", path, err) + } + + return mapping, nil +} + +func (explainer *explainer) printf(format string, args ...any) { + utils.BestEffortFprintf(explainer.out, format, args...) +} + +func (explainer *explainer) explain() error { + hashSize := explainer.objectFormat.Size() + + if len(explainer.data) < packfile.HeaderLen+hashSize { + return fmt.Errorf("pack is too short to contain a header and a %d-byte trailer", hashSize) + } + + count, err := explainer.explainHeader() + if err != nil { + return err + } + + cursor := packfile.HeaderLen + + for num := 1; num <= count; num++ { + next, err := explainer.explainEntry(num, count, cursor) + if err != nil { + return err + } + + cursor = next + } + + return explainer.explainTrailer(cursor) +} + +func (explainer *explainer) explainHeader() (int, error) { + header, err := packfile.ParseHeader(explainer.data[:packfile.HeaderLen]) + if err != nil { + return 0, fmt.Errorf("pack header: %w", err) + } + + explainer.printf("pack header\n") + explainer.printf("\tmagic\t\"PACK\"\n") + explainer.printf("\tversion\t2\n") + explainer.printf("\tobjects\t%d\n", header.ObjectCount) + explainer.printf("\n") + + return int(header.ObjectCount), nil +} + +func (explainer *explainer) explainTrailer(cursor int) error { + hashSize := explainer.objectFormat.Size() + trailerStart := len(explainer.data) - hashSize + + if cursor != trailerStart { + explainer.printf( + "note\t%d byte(s) between the last entry and the trailer were unaccounted for\n", + trailerStart-cursor, + ) + } + + trailer := explainer.data[trailerStart:] + + explainer.printf("pack trailer\n") + explainer.printf("\tchecksum\t%s\n", hex.EncodeToString(trailer)) + + hashImpl, err := explainer.objectFormat.New() + if err != nil { + return fmt.Errorf("object/store: %w", err) + } + + _, _ = hashImpl.Write(explainer.data[:trailerStart]) + + if bytes.Equal(hashImpl.Sum(nil), trailer) { + explainer.printf("\trecomputed\tmatches\n") + } else { + explainer.printf("\trecomputed\tMISMATCH (corrupt pack or wrong -format)\n") + } + + return nil +} diff --git a/cmd/explain-pack/resolve.go b/cmd/explain-pack/resolve.go new file mode 100644 index 00000000..4396fe19 --- /dev/null +++ b/cmd/explain-pack/resolve.go @@ -0,0 +1,161 @@ +package main + +import ( + "fmt" + + "lindenii.org/go/furgit/internal/cache/clock" + "lindenii.org/go/furgit/internal/format/packfile" + "lindenii.org/go/furgit/internal/format/packfile/delta" + "lindenii.org/go/furgit/object/header" + "lindenii.org/go/furgit/object/id" + "lindenii.org/go/lgo/intconv" +) + +const baseCacheMaxWeight = 64 << 20 + +type resolvedBase struct { + entryType packfile.EntryType + content []byte +} + +type baseCache = clock.Clock[int, resolvedBase] + +func newBaseCache() *baseCache { + return clock.New(baseCacheMaxWeight, func(_ int, base resolvedBase) uint64 { + return uint64(len(base.content)) + 32 + }) +} + +func (explainer *explainer) reconstruct(offset, depth int) (packfile.EntryType, []byte, bool, error) { + var zero packfile.EntryType + + if depth > delta.MaxChainDepth { + return zero, nil, false, fmt.Errorf("delta chain too deep at offset %d", offset) + } + + if cached, ok := explainer.cache.Get(offset); ok { + return cached.entryType, cached.content, true, nil + } + + header, err := packfile.ParseEntryHeader(explainer.data[offset:], explainer.objectFormat.Size()) + if err != nil { + return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + payloadStart := offset + header.HeaderLen + if payloadStart > len(explainer.data) { + return zero, nil, false, fmt.Errorf("entry at offset %d: header runs past end of pack", offset) + } + + if header.Type.IsBase() { + content, _, err := inflateAt(explainer.data[payloadStart:]) + if err != nil { + return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + explainer.cache.Add(offset, resolvedBase{entryType: header.Type, content: content}) + + return header.Type, content, true, nil + } + + baseOffset, ok, err := explainer.baseOffset(offset, header) + if err != nil { + return zero, nil, false, err + } + + if !ok { + return zero, nil, false, nil + } + + baseType, baseContent, ok, err := explainer.reconstruct(baseOffset, depth+1) + if err != nil || !ok { + return zero, nil, ok, err + } + + payload, _, err := inflateAt(explainer.data[payloadStart:]) + if err != nil { + return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + content, err := delta.Apply(baseContent, payload) + if err != nil { + return zero, nil, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + explainer.cache.Add(offset, resolvedBase{entryType: baseType, content: content}) + + return baseType, content, true, nil +} + +func (explainer *explainer) baseOffset(offset int, header packfile.EntryHeader) (int, bool, error) { + switch header.Type { + case packfile.EntryTypeOfsDelta: + dist, err := intconv.Uint64ToInt(header.OfsDistance) + if err != nil || dist <= 0 || dist > offset { + return 0, false, fmt.Errorf("entry at offset %d: ofs-delta base out of bounds", offset) + } + + return offset - dist, true, nil + case packfile.EntryTypeRefDelta: + refBytes := header.RefBase[:explainer.objectFormat.Size()] + + if explainer.idx != nil { + baseOffsetU, found, err := explainer.idx.Lookup(refBytes) + if err != nil { + return 0, false, fmt.Errorf("entry at offset %d: index lookup: %w", offset, err) + } + + if found { + baseOffset, err := intconv.Uint64ToInt(baseOffsetU) + if err != nil { + return 0, false, fmt.Errorf("entry at offset %d: index base offset overflows int: %w", offset, err) + } + + return baseOffset, true, nil + } + } + + baseID, err := explainer.objectFormat.FromBytes(refBytes) + if err != nil { + return 0, false, fmt.Errorf("entry at offset %d: %w", offset, err) + } + + if baseOffset, found := explainer.oidIndex[baseID]; found { + return baseOffset, true, nil + } + + return 0, false, nil + case packfile.EntryTypeInvalid, + packfile.EntryTypeCommit, + packfile.EntryTypeTree, + packfile.EntryTypeBlob, + packfile.EntryTypeTag, + packfile.EntryTypeFuture: + } + + return 0, false, fmt.Errorf("entry at offset %d: not a delta entry", offset) +} + +func (explainer *explainer) recomputeOID(entryType packfile.EntryType, content []byte) (id.ObjectID, error) { + var zero id.ObjectID + + objectType, err := entryType.ObjectType() + if err != nil { + return zero, err + } + + hashImpl, err := explainer.objectFormat.New() + if err != nil { + return zero, err + } + + _, _ = hashImpl.Write(header.Append(nil, objectType, len(content))) + _, _ = hashImpl.Write(content) + + oid, err := explainer.objectFormat.FromBytes(hashImpl.Sum(nil)) + if err != nil { + return zero, err + } + + return oid, nil +} -- cgit v1.3.1-10-gc9f91