aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Runxi Yu2025-11-14 00:00:00 +0000
committerGravatar Runxi Yu2025-11-14 00:00:00 +0000
commit9ef659a016d4ffeac931291984a4c71f9527a747 (patch)
tree957a76630fe248b638c0a9c84f7acef40a7ee9f5
parentInitial commit (diff)
signature
Read types and sizes without inflating entire object
-rw-r--r--loose.go111
-rw-r--r--obj.go19
-rw-r--r--pack_pack.go83
-rw-r--r--repo_test.go296
4 files changed, 487 insertions, 22 deletions
diff --git a/loose.go b/loose.go
index 78c483c7..c32311f5 100644
--- a/loose.go
+++ b/loose.go
@@ -10,6 +10,8 @@ import (
"strconv"
)
+const looseHeaderLimit = 4096
+
func loosePath(id Hash) string {
hex := id.String()
return filepath.Join("objects", hex[:2], hex[2:])
@@ -53,30 +55,11 @@ func (repo *Repository) looseReadTyped(id Hash) (ObjType, []byte, error) {
header := raw[:nul]
body := raw[nul+1:]
- space := bytes.IndexByte(header, ' ')
- if space < 0 {
- return ObjInvalid, nil, ErrInvalidObject
- }
- tyStr := string(header[:space])
- var ty ObjType
- switch tyStr {
- case "blob":
- ty = ObjBlob
- case "tree":
- ty = ObjTree
- case "commit":
- ty = ObjCommit
- case "tag":
- ty = ObjTag
- default:
- return ObjInvalid, nil, ErrInvalidObject
- }
- expect := header[space+1:]
- size, err := strconv.Atoi(string(expect))
+ ty, declaredSize, err := parseLooseHeader(header)
if err != nil {
- return ObjInvalid, nil, fmt.Errorf("furgit: loose: size parse: %w", err)
+ return ObjInvalid, nil, err
}
- if size != len(body) {
+ if declaredSize != int64(len(body)) {
return ObjInvalid, nil, ErrInvalidObject
}
if !verifyRawObject(raw, id) {
@@ -86,3 +69,87 @@ func (repo *Repository) looseReadTyped(id Hash) (ObjType, []byte, error) {
out := append([]byte(nil), body...)
return ty, out, nil
}
+
+func (repo *Repository) looseTypeSize(id Hash) (ObjType, int64, error) {
+ path := repo.repoPath(loosePath(id))
+ // #nosec G304
+ f, err := os.Open(path)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return ObjInvalid, 0, ErrNotFound
+ }
+ return ObjInvalid, 0, err
+ }
+ defer func() { _ = f.Close() }()
+
+ zr, err := zlib.NewReader(f)
+ if err != nil {
+ return ObjInvalid, 0, err
+ }
+ defer func() { _ = zr.Close() }()
+
+ header := make([]byte, 0, 64)
+ chunk := make([]byte, 128)
+ for {
+ n, readErr := zr.Read(chunk)
+ if n > 0 {
+ data := chunk[:n]
+ if nul := bytes.IndexByte(data, 0); nul >= 0 {
+ header = append(header, data[:nul]...)
+ if len(header) > looseHeaderLimit {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ break
+ }
+ header = append(header, data...)
+ if len(header) > looseHeaderLimit {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ }
+ if readErr != nil {
+ if readErr == io.EOF {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ return ObjInvalid, 0, readErr
+ }
+ }
+ return parseLooseHeader(header)
+}
+
+func parseLooseHeader(header []byte) (ObjType, int64, error) {
+ space := bytes.IndexByte(header, ' ')
+ if space < 0 {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ ty, err := objTypeFromName(string(header[:space]))
+ if err != nil {
+ return ObjInvalid, 0, err
+ }
+ expect := header[space+1:]
+ if len(expect) == 0 {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ size, err := strconv.ParseInt(string(expect), 10, 64)
+ if err != nil {
+ return ObjInvalid, 0, fmt.Errorf("furgit: loose: size parse: %w", err)
+ }
+ if size < 0 {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ return ty, size, nil
+}
+
+func objTypeFromName(name string) (ObjType, error) {
+ switch name {
+ case objNameBlob:
+ return ObjBlob, nil
+ case objNameTree:
+ return ObjTree, nil
+ case objNameCommit:
+ return ObjCommit, nil
+ case objNameTag:
+ return ObjTag, nil
+ default:
+ return ObjInvalid, ErrInvalidObject
+ }
+}
diff --git a/obj.go b/obj.go
index 5ce639f9..d3d69c25 100644
--- a/obj.go
+++ b/obj.go
@@ -117,3 +117,22 @@ func (repo *Repository) ReadObject(id Hash) (Object, error) {
}
return obj, err
}
+
+// ReadObjectTypeSize reports the object type and size without inflating the body.
+func (repo *Repository) ReadObjectTypeSize(id Hash) (ObjType, int64, error) {
+ ty, size, err := repo.looseTypeSize(id)
+ if err == nil {
+ return ty, size, nil
+ }
+ if !errors.Is(err, ErrNotFound) {
+ return ObjInvalid, 0, err
+ }
+ loc, err := repo.packIndexFind(id)
+ if err != nil {
+ if errors.Is(err, ErrNotFound) {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ return ObjInvalid, 0, err
+ }
+ return repo.packTypeSizeAtLocation(loc, nil)
+}
diff --git a/pack_pack.go b/pack_pack.go
index 20974669..ee4d2b7a 100644
--- a/pack_pack.go
+++ b/pack_pack.go
@@ -73,6 +73,25 @@ func (repo *Repository) packBodyResolveAtLocation(loc PackLocation) (ObjType, bo
return repo.packBodyResolveWithin(pf, loc.Offset)
}
+func (repo *Repository) packTypeSizeAtLocation(loc PackLocation, seen map[packKey]struct{}) (ObjType, int64, error) {
+ pf, err := repo.packFile(loc.PackPath)
+ if err != nil {
+ return ObjInvalid, 0, err
+ }
+ return repo.packTypeSizeWithin(pf, loc.Offset, seen)
+}
+
+func (repo *Repository) packTypeSizeByID(id Hash, seen map[packKey]struct{}) (ObjType, int64, error) {
+ loc, err := repo.packIndexFind(id)
+ if err == nil {
+ return repo.packTypeSizeAtLocation(loc, seen)
+ }
+ if !errors.Is(err, ErrNotFound) {
+ return ObjInvalid, 0, err
+ }
+ return repo.looseTypeSize(id)
+}
+
func packHeaderRead(r io.Reader) (ObjType, int, error) {
var b [1]byte
_, err := io.ReadFull(r, b[:])
@@ -203,6 +222,70 @@ func (repo *Repository) packBodyResolveByID(id Hash) (ObjType, borrowedBody, err
return ty, borrowedFromOwned(body), nil
}
+type packKey struct {
+ path string
+ ofs uint64
+}
+
+func (repo *Repository) packTypeSizeWithin(pf *packFile, ofs uint64, seen map[packKey]struct{}) (ObjType, int64, error) {
+ if pf == nil {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ if seen == nil {
+ seen = make(map[packKey]struct{})
+ }
+ key := packKey{path: pf.relPath, ofs: ofs}
+ if _, dup := seen[key]; dup {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ seen[key] = struct{}{}
+ defer delete(seen, key)
+
+ r, err := pf.cursor(ofs)
+ if err != nil {
+ return ObjInvalid, 0, err
+ }
+ ty, size, err := packHeaderRead(r)
+ if err != nil {
+ return ObjInvalid, 0, err
+ }
+ declaredSize := int64(size)
+
+ switch ty {
+ case ObjCommit, ObjTree, ObjBlob, ObjTag:
+ return ty, declaredSize, nil
+ case ObjRefDelta:
+ var base Hash
+ _, err := io.ReadFull(r, base[:])
+ if err != nil {
+ return ObjInvalid, 0, err
+ }
+ baseTy, _, err := repo.packTypeSizeByID(base, seen)
+ if err != nil {
+ return ObjInvalid, 0, err
+ }
+ return baseTy, declaredSize, nil
+ case ObjOfsDelta:
+ dist, err := packDeltaReadOfsDistance(r)
+ if err != nil {
+ return ObjInvalid, 0, err
+ }
+ if ofs <= dist {
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+ baseOfs := ofs - dist
+ baseTy, _, err := repo.packTypeSizeWithin(pf, baseOfs, seen)
+ if err != nil {
+ return ObjInvalid, 0, err
+ }
+ return baseTy, declaredSize, nil
+ case ObjInvalid, ObjFuture:
+ return ObjInvalid, 0, ErrInvalidObject
+ default:
+ return ObjInvalid, 0, ErrInvalidObject
+ }
+}
+
func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjType, borrowedBody, error) {
r, err := pf.cursor(ofs)
if err != nil {
diff --git a/repo_test.go b/repo_test.go
index 22b306c9..344f817f 100644
--- a/repo_test.go
+++ b/repo_test.go
@@ -3,10 +3,13 @@ package furgit
import (
"bytes"
"compress/zlib"
+ "encoding/binary"
"errors"
"fmt"
+ "math"
"os"
"path/filepath"
+ "sort"
"testing"
)
@@ -120,3 +123,296 @@ func TestResolveHEAD(t *testing.T) {
t.Fatal("expected error for detached HEAD")
}
}
+
+func TestReadObjectTypeSizeLoose(t *testing.T) {
+ t.Parallel()
+ root := t.TempDir()
+ repo, err := OpenRepository(root)
+ if err != nil {
+ t.Fatalf("OpenRepository error: %v", err)
+ }
+ t.Cleanup(func() { _ = repo.Close() })
+
+ data := []byte("header-only read")
+ id := writeLooseBlob(t, root, data)
+ ty, size, err := repo.ReadObjectTypeSize(id)
+ if err != nil {
+ t.Fatalf("ReadObjectTypeSize loose error: %v", err)
+ }
+ if ty != ObjBlob || size != int64(len(data)) {
+ t.Fatalf("unexpected loose metadata ty=%d size=%d", ty, size)
+ }
+}
+
+func TestReadObjectTypeSizePackedObjects(t *testing.T) {
+ t.Parallel()
+ root := t.TempDir()
+
+ objs := []testPackObject{
+ {finalType: ObjBlob, body: []byte("packed base payload")},
+ {
+ finalType: ObjBlob,
+ body: []byte("packed delta payload with extra bytes"),
+ encoding: packEncodingOfsDelta,
+ baseIndex: 0,
+ },
+ }
+ ids := writeTestPack(t, root, "pack-basic", objs)
+
+ repo, err := OpenRepository(root)
+ if err != nil {
+ t.Fatalf("OpenRepository error: %v", err)
+ }
+ t.Cleanup(func() { _ = repo.Close() })
+
+ ty, size, err := repo.ReadObjectTypeSize(ids[0])
+ if err != nil {
+ t.Fatalf("ReadObjectTypeSize base error: %v", err)
+ }
+ if ty != ObjBlob || size != int64(len(objs[0].body)) {
+ t.Fatalf("unexpected base metadata ty=%d size=%d", ty, size)
+ }
+
+ ty, size, err = repo.ReadObjectTypeSize(ids[1])
+ if err != nil {
+ t.Fatalf("ReadObjectTypeSize delta error: %v", err)
+ }
+ if ty != ObjBlob || size != int64(len(objs[1].body)) {
+ t.Fatalf("unexpected delta metadata ty=%d size=%d", ty, size)
+ }
+}
+
+func TestReadObjectTypeSizePackRefDeltaLooseBase(t *testing.T) {
+ t.Parallel()
+ root := t.TempDir()
+
+ looseBody := []byte("loose base for ref delta")
+ baseID := writeLooseBlob(t, root, looseBody)
+
+ objs := []testPackObject{
+ {
+ finalType: ObjBlob,
+ body: []byte("ref delta rewritten body"),
+ encoding: packEncodingRefDelta,
+ baseHash: baseID,
+ baseBody: looseBody,
+ },
+ }
+ ids := writeTestPack(t, root, "pack-ref", objs)
+
+ repo, err := OpenRepository(root)
+ if err != nil {
+ t.Fatalf("OpenRepository error: %v", err)
+ }
+ t.Cleanup(func() { _ = repo.Close() })
+
+ ty, size, err := repo.ReadObjectTypeSize(ids[0])
+ if err != nil {
+ t.Fatalf("ReadObjectTypeSize ref delta error: %v", err)
+ }
+ if ty != ObjBlob || size != int64(len(objs[0].body)) {
+ t.Fatalf("unexpected ref delta metadata ty=%d size=%d", ty, size)
+ }
+}
+
+type packObjectEncoding uint8
+
+const (
+ packEncodingFull packObjectEncoding = iota
+ packEncodingOfsDelta
+ packEncodingRefDelta
+)
+
+type testPackObject struct {
+ finalType ObjType
+ body []byte
+ encoding packObjectEncoding
+ baseIndex int
+ baseHash Hash
+ baseBody []byte
+}
+
+func writeTestPack(t *testing.T, root, name string, objs []testPackObject) []Hash {
+ t.Helper()
+ packDir := filepath.Join(root, "objects", "pack")
+ err := os.MkdirAll(packDir, 0o750)
+ if err != nil {
+ t.Fatalf("mkdir pack dir: %v", err)
+ }
+
+ var buf bytes.Buffer
+ buf.Write([]byte{'P', 'A', 'C', 'K'})
+ err = binary.Write(&buf, binary.BigEndian, uint32(packVersion2))
+ if err != nil {
+ t.Fatalf("write pack version: %v", err)
+ }
+ objCount := len(objs)
+ if objCount > math.MaxUint32 {
+ t.Fatalf("too many objects: %d", len(objs))
+ }
+ count32 := uint32(objCount) //#nosec G115
+ err = binary.Write(&buf, binary.BigEndian, count32)
+ if err != nil {
+ t.Fatalf("write pack count: %v", err)
+ }
+
+ offsets := make([]uint64, len(objs))
+ ids := make([]Hash, len(objs))
+
+ for i, obj := range objs {
+ offset := buf.Len()
+ if offset < 0 {
+ t.Fatalf("negative buffer length")
+ }
+ offsets[i] = uint64(offset)
+ header, err := headerForType(obj.finalType, obj.body)
+ if err != nil {
+ t.Fatalf("headerForType: %v", err)
+ }
+ raw := make([]byte, len(header)+len(obj.body))
+ copy(raw, header)
+ copy(raw[len(header):], obj.body)
+ ids[i] = computeRawHash(raw)
+
+ switch obj.encoding {
+ case packEncodingFull:
+ buf.Write(encodePackHeader(obj.finalType, len(obj.body)))
+ buf.Write(compressBytes(t, obj.body))
+ case packEncodingOfsDelta:
+ if obj.baseIndex < 0 || obj.baseIndex >= i {
+ t.Fatalf("invalid base index %d for ofs delta %d", obj.baseIndex, i)
+ }
+ buf.Write(encodePackHeader(ObjOfsDelta, len(obj.body)))
+ dist := offsets[i] - offsets[obj.baseIndex]
+ buf.Write(encodeOfsDistance(dist))
+ baseBody := objs[obj.baseIndex].body
+ delta := buildInsertOnlyDelta(len(baseBody), obj.body)
+ buf.Write(compressBytes(t, delta))
+ case packEncodingRefDelta:
+ if obj.baseHash == (Hash{}) {
+ t.Fatalf("ref delta %d missing base hash", i)
+ }
+ baseBody := obj.baseBody
+ if len(baseBody) == 0 {
+ t.Fatalf("ref delta %d missing base body", i)
+ }
+ buf.Write(encodePackHeader(ObjRefDelta, len(obj.body)))
+ buf.Write(obj.baseHash[:])
+ delta := buildInsertOnlyDelta(len(baseBody), obj.body)
+ buf.Write(compressBytes(t, delta))
+ default:
+ t.Fatalf("unknown encoding %d", obj.encoding)
+ }
+ }
+
+ packContent := append([]byte(nil), buf.Bytes()...)
+ packChecksum := newHash(packContent)
+ buf.Write(packChecksum[:])
+ packBytes := buf.Bytes()
+
+ packPath := filepath.Join(packDir, name+".pack")
+ err = os.WriteFile(packPath, packBytes, 0o600)
+ if err != nil {
+ t.Fatalf("write pack file: %v", err)
+ }
+
+ writeTestPackIndex(t, packDir, name, ids, offsets, packChecksum)
+ return ids
+}
+
+func writeTestPackIndex(t *testing.T, packDir, name string, ids []Hash, offsets []uint64, packChecksum [HashSize]byte) {
+ t.Helper()
+ type idxEntry struct {
+ id Hash
+ offset uint64
+ }
+ entries := make([]idxEntry, len(ids))
+ for i := range ids {
+ entries[i] = idxEntry{id: ids[i], offset: offsets[i]}
+ }
+ sort.Slice(entries, func(i, j int) bool {
+ return bytes.Compare(entries[i].id[:], entries[j].id[:]) < 0
+ })
+
+ var buf bytes.Buffer
+ err := binary.Write(&buf, binary.BigEndian, uint32(idxMagic))
+ if err != nil {
+ t.Fatalf("write idx magic: %v", err)
+ }
+ err = binary.Write(&buf, binary.BigEndian, uint32(idxVersion2))
+ if err != nil {
+ t.Fatalf("write idx version: %v", err)
+ }
+
+ var fanout [256]uint32
+ for _, entry := range entries {
+ first := int(entry.id[0])
+ for i := first; i < len(fanout); i++ {
+ fanout[i]++
+ }
+ }
+ for _, count := range fanout {
+ err = binary.Write(&buf, binary.BigEndian, count)
+ if err != nil {
+ t.Fatalf("write fanout: %v", err)
+ }
+ }
+
+ for _, entry := range entries {
+ buf.Write(entry.id[:])
+ }
+
+ buf.Write(make([]byte, len(entries)*4))
+
+ for _, entry := range entries {
+ if entry.offset >= 0x80000000 {
+ t.Fatalf("offset too large for 32-bit table")
+ }
+ var word [4]byte
+ binary.BigEndian.PutUint32(word[:], uint32(entry.offset))
+ buf.Write(word[:])
+ }
+
+ idxData := append([]byte(nil), buf.Bytes()...)
+ idxChecksum := newHash(idxData)
+ buf.Write(packChecksum[:])
+ buf.Write(idxChecksum[:])
+
+ idxPath := filepath.Join(packDir, name+".idx")
+ err = os.WriteFile(idxPath, buf.Bytes(), 0o600)
+ if err != nil {
+ t.Fatalf("write idx file: %v", err)
+ }
+}
+
+func buildInsertOnlyDelta(srcLen int, dst []byte) []byte {
+ var buf bytes.Buffer
+ buf.Write(encodeVarint(srcLen))
+ buf.Write(encodeVarint(len(dst)))
+ remaining := dst
+ for len(remaining) > 0 {
+ chunk := remaining
+ if len(chunk) > 127 {
+ chunk = remaining[:127]
+ }
+ buf.WriteByte(byte(len(chunk)))
+ buf.Write(chunk)
+ remaining = remaining[len(chunk):]
+ }
+ return buf.Bytes()
+}
+
+func encodeOfsDistance(dist uint64) []byte {
+ if dist == 0 {
+ return []byte{0}
+ }
+ var out []byte
+ out = append(out, byte(dist&0x7f))
+ for dist >>= 7; dist != 0; dist >>= 7 {
+ out = append(out, byte(((dist-1)&0x7f)|0x80))
+ }
+ for i, j := 0, len(out)-1; i < j; i, j = i+1, j-1 {
+ out[i], out[j] = out[j], out[i]
+ }
+ return out
+}