diff options
| author | 2025-11-14 00:00:00 +0000 | |
|---|---|---|
| committer | 2025-11-14 00:00:00 +0000 | |
| commit | 9ef659a016d4ffeac931291984a4c71f9527a747 (patch) | |
| tree | 957a76630fe248b638c0a9c84f7acef40a7ee9f5 | |
| parent | Initial commit (diff) | |
| signature | ||
Read types and sizes without inflating entire object
| -rw-r--r-- | loose.go | 111 | ||||
| -rw-r--r-- | obj.go | 19 | ||||
| -rw-r--r-- | pack_pack.go | 83 | ||||
| -rw-r--r-- | repo_test.go | 296 |
4 files changed, 487 insertions, 22 deletions
@@ -10,6 +10,8 @@ import ( "strconv" ) +const looseHeaderLimit = 4096 + func loosePath(id Hash) string { hex := id.String() return filepath.Join("objects", hex[:2], hex[2:]) @@ -53,30 +55,11 @@ func (repo *Repository) looseReadTyped(id Hash) (ObjType, []byte, error) { header := raw[:nul] body := raw[nul+1:] - space := bytes.IndexByte(header, ' ') - if space < 0 { - return ObjInvalid, nil, ErrInvalidObject - } - tyStr := string(header[:space]) - var ty ObjType - switch tyStr { - case "blob": - ty = ObjBlob - case "tree": - ty = ObjTree - case "commit": - ty = ObjCommit - case "tag": - ty = ObjTag - default: - return ObjInvalid, nil, ErrInvalidObject - } - expect := header[space+1:] - size, err := strconv.Atoi(string(expect)) + ty, declaredSize, err := parseLooseHeader(header) if err != nil { - return ObjInvalid, nil, fmt.Errorf("furgit: loose: size parse: %w", err) + return ObjInvalid, nil, err } - if size != len(body) { + if declaredSize != int64(len(body)) { return ObjInvalid, nil, ErrInvalidObject } if !verifyRawObject(raw, id) { @@ -86,3 +69,87 @@ func (repo *Repository) looseReadTyped(id Hash) (ObjType, []byte, error) { out := append([]byte(nil), body...) return ty, out, nil } + +func (repo *Repository) looseTypeSize(id Hash) (ObjType, int64, error) { + path := repo.repoPath(loosePath(id)) + // #nosec G304 + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return ObjInvalid, 0, ErrNotFound + } + return ObjInvalid, 0, err + } + defer func() { _ = f.Close() }() + + zr, err := zlib.NewReader(f) + if err != nil { + return ObjInvalid, 0, err + } + defer func() { _ = zr.Close() }() + + header := make([]byte, 0, 64) + chunk := make([]byte, 128) + for { + n, readErr := zr.Read(chunk) + if n > 0 { + data := chunk[:n] + if nul := bytes.IndexByte(data, 0); nul >= 0 { + header = append(header, data[:nul]...) + if len(header) > looseHeaderLimit { + return ObjInvalid, 0, ErrInvalidObject + } + break + } + header = append(header, data...) + if len(header) > looseHeaderLimit { + return ObjInvalid, 0, ErrInvalidObject + } + } + if readErr != nil { + if readErr == io.EOF { + return ObjInvalid, 0, ErrInvalidObject + } + return ObjInvalid, 0, readErr + } + } + return parseLooseHeader(header) +} + +func parseLooseHeader(header []byte) (ObjType, int64, error) { + space := bytes.IndexByte(header, ' ') + if space < 0 { + return ObjInvalid, 0, ErrInvalidObject + } + ty, err := objTypeFromName(string(header[:space])) + if err != nil { + return ObjInvalid, 0, err + } + expect := header[space+1:] + if len(expect) == 0 { + return ObjInvalid, 0, ErrInvalidObject + } + size, err := strconv.ParseInt(string(expect), 10, 64) + if err != nil { + return ObjInvalid, 0, fmt.Errorf("furgit: loose: size parse: %w", err) + } + if size < 0 { + return ObjInvalid, 0, ErrInvalidObject + } + return ty, size, nil +} + +func objTypeFromName(name string) (ObjType, error) { + switch name { + case objNameBlob: + return ObjBlob, nil + case objNameTree: + return ObjTree, nil + case objNameCommit: + return ObjCommit, nil + case objNameTag: + return ObjTag, nil + default: + return ObjInvalid, ErrInvalidObject + } +} @@ -117,3 +117,22 @@ func (repo *Repository) ReadObject(id Hash) (Object, error) { } return obj, err } + +// ReadObjectTypeSize reports the object type and size without inflating the body. +func (repo *Repository) ReadObjectTypeSize(id Hash) (ObjType, int64, error) { + ty, size, err := repo.looseTypeSize(id) + if err == nil { + return ty, size, nil + } + if !errors.Is(err, ErrNotFound) { + return ObjInvalid, 0, err + } + loc, err := repo.packIndexFind(id) + if err != nil { + if errors.Is(err, ErrNotFound) { + return ObjInvalid, 0, ErrInvalidObject + } + return ObjInvalid, 0, err + } + return repo.packTypeSizeAtLocation(loc, nil) +} diff --git a/pack_pack.go b/pack_pack.go index 20974669..ee4d2b7a 100644 --- a/pack_pack.go +++ b/pack_pack.go @@ -73,6 +73,25 @@ func (repo *Repository) packBodyResolveAtLocation(loc PackLocation) (ObjType, bo return repo.packBodyResolveWithin(pf, loc.Offset) } +func (repo *Repository) packTypeSizeAtLocation(loc PackLocation, seen map[packKey]struct{}) (ObjType, int64, error) { + pf, err := repo.packFile(loc.PackPath) + if err != nil { + return ObjInvalid, 0, err + } + return repo.packTypeSizeWithin(pf, loc.Offset, seen) +} + +func (repo *Repository) packTypeSizeByID(id Hash, seen map[packKey]struct{}) (ObjType, int64, error) { + loc, err := repo.packIndexFind(id) + if err == nil { + return repo.packTypeSizeAtLocation(loc, seen) + } + if !errors.Is(err, ErrNotFound) { + return ObjInvalid, 0, err + } + return repo.looseTypeSize(id) +} + func packHeaderRead(r io.Reader) (ObjType, int, error) { var b [1]byte _, err := io.ReadFull(r, b[:]) @@ -203,6 +222,70 @@ func (repo *Repository) packBodyResolveByID(id Hash) (ObjType, borrowedBody, err return ty, borrowedFromOwned(body), nil } +type packKey struct { + path string + ofs uint64 +} + +func (repo *Repository) packTypeSizeWithin(pf *packFile, ofs uint64, seen map[packKey]struct{}) (ObjType, int64, error) { + if pf == nil { + return ObjInvalid, 0, ErrInvalidObject + } + if seen == nil { + seen = make(map[packKey]struct{}) + } + key := packKey{path: pf.relPath, ofs: ofs} + if _, dup := seen[key]; dup { + return ObjInvalid, 0, ErrInvalidObject + } + seen[key] = struct{}{} + defer delete(seen, key) + + r, err := pf.cursor(ofs) + if err != nil { + return ObjInvalid, 0, err + } + ty, size, err := packHeaderRead(r) + if err != nil { + return ObjInvalid, 0, err + } + declaredSize := int64(size) + + switch ty { + case ObjCommit, ObjTree, ObjBlob, ObjTag: + return ty, declaredSize, nil + case ObjRefDelta: + var base Hash + _, err := io.ReadFull(r, base[:]) + if err != nil { + return ObjInvalid, 0, err + } + baseTy, _, err := repo.packTypeSizeByID(base, seen) + if err != nil { + return ObjInvalid, 0, err + } + return baseTy, declaredSize, nil + case ObjOfsDelta: + dist, err := packDeltaReadOfsDistance(r) + if err != nil { + return ObjInvalid, 0, err + } + if ofs <= dist { + return ObjInvalid, 0, ErrInvalidObject + } + baseOfs := ofs - dist + baseTy, _, err := repo.packTypeSizeWithin(pf, baseOfs, seen) + if err != nil { + return ObjInvalid, 0, err + } + return baseTy, declaredSize, nil + case ObjInvalid, ObjFuture: + return ObjInvalid, 0, ErrInvalidObject + default: + return ObjInvalid, 0, ErrInvalidObject + } +} + func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjType, borrowedBody, error) { r, err := pf.cursor(ofs) if err != nil { diff --git a/repo_test.go b/repo_test.go index 22b306c9..344f817f 100644 --- a/repo_test.go +++ b/repo_test.go @@ -3,10 +3,13 @@ package furgit import ( "bytes" "compress/zlib" + "encoding/binary" "errors" "fmt" + "math" "os" "path/filepath" + "sort" "testing" ) @@ -120,3 +123,296 @@ func TestResolveHEAD(t *testing.T) { t.Fatal("expected error for detached HEAD") } } + +func TestReadObjectTypeSizeLoose(t *testing.T) { + t.Parallel() + root := t.TempDir() + repo, err := OpenRepository(root) + if err != nil { + t.Fatalf("OpenRepository error: %v", err) + } + t.Cleanup(func() { _ = repo.Close() }) + + data := []byte("header-only read") + id := writeLooseBlob(t, root, data) + ty, size, err := repo.ReadObjectTypeSize(id) + if err != nil { + t.Fatalf("ReadObjectTypeSize loose error: %v", err) + } + if ty != ObjBlob || size != int64(len(data)) { + t.Fatalf("unexpected loose metadata ty=%d size=%d", ty, size) + } +} + +func TestReadObjectTypeSizePackedObjects(t *testing.T) { + t.Parallel() + root := t.TempDir() + + objs := []testPackObject{ + {finalType: ObjBlob, body: []byte("packed base payload")}, + { + finalType: ObjBlob, + body: []byte("packed delta payload with extra bytes"), + encoding: packEncodingOfsDelta, + baseIndex: 0, + }, + } + ids := writeTestPack(t, root, "pack-basic", objs) + + repo, err := OpenRepository(root) + if err != nil { + t.Fatalf("OpenRepository error: %v", err) + } + t.Cleanup(func() { _ = repo.Close() }) + + ty, size, err := repo.ReadObjectTypeSize(ids[0]) + if err != nil { + t.Fatalf("ReadObjectTypeSize base error: %v", err) + } + if ty != ObjBlob || size != int64(len(objs[0].body)) { + t.Fatalf("unexpected base metadata ty=%d size=%d", ty, size) + } + + ty, size, err = repo.ReadObjectTypeSize(ids[1]) + if err != nil { + t.Fatalf("ReadObjectTypeSize delta error: %v", err) + } + if ty != ObjBlob || size != int64(len(objs[1].body)) { + t.Fatalf("unexpected delta metadata ty=%d size=%d", ty, size) + } +} + +func TestReadObjectTypeSizePackRefDeltaLooseBase(t *testing.T) { + t.Parallel() + root := t.TempDir() + + looseBody := []byte("loose base for ref delta") + baseID := writeLooseBlob(t, root, looseBody) + + objs := []testPackObject{ + { + finalType: ObjBlob, + body: []byte("ref delta rewritten body"), + encoding: packEncodingRefDelta, + baseHash: baseID, + baseBody: looseBody, + }, + } + ids := writeTestPack(t, root, "pack-ref", objs) + + repo, err := OpenRepository(root) + if err != nil { + t.Fatalf("OpenRepository error: %v", err) + } + t.Cleanup(func() { _ = repo.Close() }) + + ty, size, err := repo.ReadObjectTypeSize(ids[0]) + if err != nil { + t.Fatalf("ReadObjectTypeSize ref delta error: %v", err) + } + if ty != ObjBlob || size != int64(len(objs[0].body)) { + t.Fatalf("unexpected ref delta metadata ty=%d size=%d", ty, size) + } +} + +type packObjectEncoding uint8 + +const ( + packEncodingFull packObjectEncoding = iota + packEncodingOfsDelta + packEncodingRefDelta +) + +type testPackObject struct { + finalType ObjType + body []byte + encoding packObjectEncoding + baseIndex int + baseHash Hash + baseBody []byte +} + +func writeTestPack(t *testing.T, root, name string, objs []testPackObject) []Hash { + t.Helper() + packDir := filepath.Join(root, "objects", "pack") + err := os.MkdirAll(packDir, 0o750) + if err != nil { + t.Fatalf("mkdir pack dir: %v", err) + } + + var buf bytes.Buffer + buf.Write([]byte{'P', 'A', 'C', 'K'}) + err = binary.Write(&buf, binary.BigEndian, uint32(packVersion2)) + if err != nil { + t.Fatalf("write pack version: %v", err) + } + objCount := len(objs) + if objCount > math.MaxUint32 { + t.Fatalf("too many objects: %d", len(objs)) + } + count32 := uint32(objCount) //#nosec G115 + err = binary.Write(&buf, binary.BigEndian, count32) + if err != nil { + t.Fatalf("write pack count: %v", err) + } + + offsets := make([]uint64, len(objs)) + ids := make([]Hash, len(objs)) + + for i, obj := range objs { + offset := buf.Len() + if offset < 0 { + t.Fatalf("negative buffer length") + } + offsets[i] = uint64(offset) + header, err := headerForType(obj.finalType, obj.body) + if err != nil { + t.Fatalf("headerForType: %v", err) + } + raw := make([]byte, len(header)+len(obj.body)) + copy(raw, header) + copy(raw[len(header):], obj.body) + ids[i] = computeRawHash(raw) + + switch obj.encoding { + case packEncodingFull: + buf.Write(encodePackHeader(obj.finalType, len(obj.body))) + buf.Write(compressBytes(t, obj.body)) + case packEncodingOfsDelta: + if obj.baseIndex < 0 || obj.baseIndex >= i { + t.Fatalf("invalid base index %d for ofs delta %d", obj.baseIndex, i) + } + buf.Write(encodePackHeader(ObjOfsDelta, len(obj.body))) + dist := offsets[i] - offsets[obj.baseIndex] + buf.Write(encodeOfsDistance(dist)) + baseBody := objs[obj.baseIndex].body + delta := buildInsertOnlyDelta(len(baseBody), obj.body) + buf.Write(compressBytes(t, delta)) + case packEncodingRefDelta: + if obj.baseHash == (Hash{}) { + t.Fatalf("ref delta %d missing base hash", i) + } + baseBody := obj.baseBody + if len(baseBody) == 0 { + t.Fatalf("ref delta %d missing base body", i) + } + buf.Write(encodePackHeader(ObjRefDelta, len(obj.body))) + buf.Write(obj.baseHash[:]) + delta := buildInsertOnlyDelta(len(baseBody), obj.body) + buf.Write(compressBytes(t, delta)) + default: + t.Fatalf("unknown encoding %d", obj.encoding) + } + } + + packContent := append([]byte(nil), buf.Bytes()...) + packChecksum := newHash(packContent) + buf.Write(packChecksum[:]) + packBytes := buf.Bytes() + + packPath := filepath.Join(packDir, name+".pack") + err = os.WriteFile(packPath, packBytes, 0o600) + if err != nil { + t.Fatalf("write pack file: %v", err) + } + + writeTestPackIndex(t, packDir, name, ids, offsets, packChecksum) + return ids +} + +func writeTestPackIndex(t *testing.T, packDir, name string, ids []Hash, offsets []uint64, packChecksum [HashSize]byte) { + t.Helper() + type idxEntry struct { + id Hash + offset uint64 + } + entries := make([]idxEntry, len(ids)) + for i := range ids { + entries[i] = idxEntry{id: ids[i], offset: offsets[i]} + } + sort.Slice(entries, func(i, j int) bool { + return bytes.Compare(entries[i].id[:], entries[j].id[:]) < 0 + }) + + var buf bytes.Buffer + err := binary.Write(&buf, binary.BigEndian, uint32(idxMagic)) + if err != nil { + t.Fatalf("write idx magic: %v", err) + } + err = binary.Write(&buf, binary.BigEndian, uint32(idxVersion2)) + if err != nil { + t.Fatalf("write idx version: %v", err) + } + + var fanout [256]uint32 + for _, entry := range entries { + first := int(entry.id[0]) + for i := first; i < len(fanout); i++ { + fanout[i]++ + } + } + for _, count := range fanout { + err = binary.Write(&buf, binary.BigEndian, count) + if err != nil { + t.Fatalf("write fanout: %v", err) + } + } + + for _, entry := range entries { + buf.Write(entry.id[:]) + } + + buf.Write(make([]byte, len(entries)*4)) + + for _, entry := range entries { + if entry.offset >= 0x80000000 { + t.Fatalf("offset too large for 32-bit table") + } + var word [4]byte + binary.BigEndian.PutUint32(word[:], uint32(entry.offset)) + buf.Write(word[:]) + } + + idxData := append([]byte(nil), buf.Bytes()...) + idxChecksum := newHash(idxData) + buf.Write(packChecksum[:]) + buf.Write(idxChecksum[:]) + + idxPath := filepath.Join(packDir, name+".idx") + err = os.WriteFile(idxPath, buf.Bytes(), 0o600) + if err != nil { + t.Fatalf("write idx file: %v", err) + } +} + +func buildInsertOnlyDelta(srcLen int, dst []byte) []byte { + var buf bytes.Buffer + buf.Write(encodeVarint(srcLen)) + buf.Write(encodeVarint(len(dst))) + remaining := dst + for len(remaining) > 0 { + chunk := remaining + if len(chunk) > 127 { + chunk = remaining[:127] + } + buf.WriteByte(byte(len(chunk))) + buf.Write(chunk) + remaining = remaining[len(chunk):] + } + return buf.Bytes() +} + +func encodeOfsDistance(dist uint64) []byte { + if dist == 0 { + return []byte{0} + } + var out []byte + out = append(out, byte(dist&0x7f)) + for dist >>= 7; dist != 0; dist >>= 7 { + out = append(out, byte(((dist-1)&0x7f)|0x80)) + } + for i, j := 0, len(out)-1; i < j; i, j = i+1, j-1 { + out[i], out[j] = out[j], out[i] + } + return out +} |
