From 52a2b00e1ab5a4bfe8c516e46424fc2dc3178be2 Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Wed, 28 Jan 2026 20:44:51 +0100 Subject: pack: Add _read to filenames --- pack_idx.go | 290 --------------------------- pack_idx_read.go | 290 +++++++++++++++++++++++++++ pack_pack.go | 578 ------------------------------------------------------ pack_pack_read.go | 578 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ pack_read_test.go | 149 ++++++++++++++ pack_test.go | 149 -------------- 6 files changed, 1017 insertions(+), 1017 deletions(-) delete mode 100644 pack_idx.go create mode 100644 pack_idx_read.go delete mode 100644 pack_pack.go create mode 100644 pack_pack_read.go create mode 100644 pack_read_test.go delete mode 100644 pack_test.go diff --git a/pack_idx.go b/pack_idx.go deleted file mode 100644 index 0dbb9bcf..00000000 --- a/pack_idx.go +++ /dev/null @@ -1,290 +0,0 @@ -package furgit - -import ( - "bytes" - "errors" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - "syscall" -) - -const ( - idxMagic = 0xff744f63 - idxVersion2 = 2 -) - -type packIndex struct { - repo *Repository - idxRel string - packPath string - - loadOnce sync.Once - loadErr error - - numObjects int - fanout []byte - names []byte - crcs []byte - offset32 []byte - offset64 []byte - data []byte - - closeOnce sync.Once -} - -func (pi *packIndex) Close() error { - if pi == nil { - return nil - } - var closeErr error - pi.closeOnce.Do(func() { - if len(pi.data) > 0 { - if err := syscall.Munmap(pi.data); closeErr == nil { - closeErr = err - } - pi.data = nil - pi.fanout = nil - pi.names = nil - pi.crcs = nil - pi.offset32 = nil - pi.offset64 = nil - pi.numObjects = 0 - } - }) - return closeErr -} - -func (pi *packIndex) ensureLoaded() error { - pi.loadOnce.Do(func() { - pi.loadErr = pi.load() - }) - return pi.loadErr -} - -func (pi *packIndex) load() error { - if pi.repo == nil { - return ErrInvalidObject - } - f, err := os.Open(pi.repo.repoPath(pi.idxRel)) - if err != nil { - return err - } - stat, err := f.Stat() - if err != nil { - _ = f.Close() - return err - } - if stat.Size() < 8+256*4 { - _ = f.Close() - return ErrInvalidObject - } - region, err := syscall.Mmap( - int(f.Fd()), - 0, - int(stat.Size()), - syscall.PROT_READ, - syscall.MAP_PRIVATE, - ) - if err != nil { - _ = f.Close() - return err - } - err = f.Close() - if err != nil { - _ = syscall.Munmap(region) - return err - } - err = pi.parse(region) - if err != nil { - _ = syscall.Munmap(region) - return err - } - pi.data = region - return nil -} - -func (repo *Repository) packIndexes() ([]*packIndex, error) { - repo.packIdxOnce.Do(func() { - repo.packIdx, repo.packIdxErr = repo.loadPackIndexes() - }) - return repo.packIdx, repo.packIdxErr -} - -func (repo *Repository) loadPackIndexes() ([]*packIndex, error) { - dir := filepath.Join(repo.rootPath, "objects", "pack") - entries, err := os.ReadDir(dir) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrNotFound - } - return nil, err - } - - idxs := make([]*packIndex, 0, len(entries)) - for _, entry := range entries { - if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { - continue - } - rel := filepath.Join("objects", "pack", entry.Name()) - packRel := strings.TrimSuffix(rel, ".idx") + ".pack" - idxs = append(idxs, &packIndex{ - repo: repo, - idxRel: rel, - packPath: packRel, - }) - } - if len(idxs) == 0 { - return nil, ErrNotFound - } - return idxs, nil -} - -func (pi *packIndex) parse(buf []byte) error { - if len(buf) < 8+256*4 { - return ErrInvalidObject - } - if readBE32(buf[0:4]) != idxMagic { - return ErrInvalidObject - } - if readBE32(buf[4:8]) != idxVersion2 { - return ErrInvalidObject - } - - const fanoutBytes = 256 * 4 - fanoutStart := 8 - fanoutEnd := fanoutStart + fanoutBytes - if fanoutEnd > len(buf) { - return ErrInvalidObject - } - pi.fanout = buf[fanoutStart:fanoutEnd] - nobj := int(readBE32(pi.fanout[len(pi.fanout)-4:])) - - namesStart := fanoutEnd - namesEnd := namesStart + nobj*pi.repo.hashAlgo.Size() - if namesEnd > len(buf) { - return ErrInvalidObject - } - - crcStart := namesEnd - crcEnd := crcStart + nobj*4 - if crcEnd > len(buf) { - return ErrInvalidObject - } - - off32Start := crcEnd - off32End := off32Start + nobj*4 - if off32End > len(buf) { - return ErrInvalidObject - } - - pi.offset32 = buf[off32Start:off32End] - - off64Start := off32End - trailerStart := len(buf) - 2*pi.repo.hashAlgo.Size() - if trailerStart < off64Start { - return ErrInvalidObject - } - if (trailerStart-off64Start)%8 != 0 { - return ErrInvalidObject - } - off64End := trailerStart - pi.offset64 = buf[off64Start:off64End] - - pi.numObjects = nobj - pi.names = buf[namesStart:namesEnd] - pi.crcs = buf[crcStart:crcEnd] - return nil -} - -func readBE32(b []byte) uint32 { - _ = b[3] - return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]) -} - -func readBE64(b []byte) uint64 { - _ = b[7] - return (uint64(b[0]) << 56) | (uint64(b[1]) << 48) | - (uint64(b[2]) << 40) | (uint64(b[3]) << 32) | - (uint64(b[4]) << 24) | (uint64(b[5]) << 16) | - (uint64(b[6]) << 8) | uint64(b[7]) -} - -func (pi *packIndex) fanoutEntry(i int) uint32 { - if len(pi.fanout) == 0 { - return 0 - } - entries := len(pi.fanout) / 4 - if i < 0 || i >= entries { - return 0 - } - start := i * 4 - return readBE32(pi.fanout[start : start+4]) -} - -func (pi *packIndex) offset(idx int) (uint64, error) { - start := idx * 4 - word := readBE32(pi.offset32[start : start+4]) - if word&0x80000000 == 0 { - return uint64(word), nil - } - pos := int(word & 0x7fffffff) - entries := len(pi.offset64) / 8 - if pos < 0 || pos >= entries { - return 0, errors.New("furgit: pack: corrupt 64-bit offset table") - } - base := pos * 8 - return readBE64(pi.offset64[base : base+8]), nil -} - -func (pi *packIndex) lookup(id Hash) (packlocation, error) { - err := pi.ensureLoaded() - if err != nil { - return packlocation{}, err - } - if id.algo != pi.repo.hashAlgo { - return packlocation{}, fmt.Errorf("furgit: hash algorithm mismatch: got %s, expected %s", id.algo.String(), pi.repo.hashAlgo.String()) - } - first := int(id.data[0]) - var lo int - if first > 0 { - lo = int(pi.fanoutEntry(first - 1)) - } - hi := int(pi.fanoutEntry(first)) - idx, found := bsearchHash(pi.names, pi.repo.hashAlgo.Size(), lo, hi, id) - if !found { - return packlocation{}, ErrNotFound - } - ofs, err := pi.offset(idx) - if err != nil { - return packlocation{}, err - } - return packlocation{ - PackPath: pi.packPath, - Offset: ofs, - }, nil -} - -func bsearchHash(names []byte, stride, lo, hi int, want Hash) (int, bool) { - for lo < hi { - mid := lo + (hi-lo)/2 - cmp := compareHash(names, stride, mid, want.data[:stride]) - if cmp == 0 { - return mid, true - } - if cmp > 0 { - hi = mid - } else { - lo = mid + 1 - } - } - return lo, false -} - -func compareHash(names []byte, stride, idx int, want []byte) int { - base := idx * stride - end := base + stride - return bytes.Compare(names[base:end], want) -} diff --git a/pack_idx_read.go b/pack_idx_read.go new file mode 100644 index 00000000..0dbb9bcf --- /dev/null +++ b/pack_idx_read.go @@ -0,0 +1,290 @@ +package furgit + +import ( + "bytes" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "syscall" +) + +const ( + idxMagic = 0xff744f63 + idxVersion2 = 2 +) + +type packIndex struct { + repo *Repository + idxRel string + packPath string + + loadOnce sync.Once + loadErr error + + numObjects int + fanout []byte + names []byte + crcs []byte + offset32 []byte + offset64 []byte + data []byte + + closeOnce sync.Once +} + +func (pi *packIndex) Close() error { + if pi == nil { + return nil + } + var closeErr error + pi.closeOnce.Do(func() { + if len(pi.data) > 0 { + if err := syscall.Munmap(pi.data); closeErr == nil { + closeErr = err + } + pi.data = nil + pi.fanout = nil + pi.names = nil + pi.crcs = nil + pi.offset32 = nil + pi.offset64 = nil + pi.numObjects = 0 + } + }) + return closeErr +} + +func (pi *packIndex) ensureLoaded() error { + pi.loadOnce.Do(func() { + pi.loadErr = pi.load() + }) + return pi.loadErr +} + +func (pi *packIndex) load() error { + if pi.repo == nil { + return ErrInvalidObject + } + f, err := os.Open(pi.repo.repoPath(pi.idxRel)) + if err != nil { + return err + } + stat, err := f.Stat() + if err != nil { + _ = f.Close() + return err + } + if stat.Size() < 8+256*4 { + _ = f.Close() + return ErrInvalidObject + } + region, err := syscall.Mmap( + int(f.Fd()), + 0, + int(stat.Size()), + syscall.PROT_READ, + syscall.MAP_PRIVATE, + ) + if err != nil { + _ = f.Close() + return err + } + err = f.Close() + if err != nil { + _ = syscall.Munmap(region) + return err + } + err = pi.parse(region) + if err != nil { + _ = syscall.Munmap(region) + return err + } + pi.data = region + return nil +} + +func (repo *Repository) packIndexes() ([]*packIndex, error) { + repo.packIdxOnce.Do(func() { + repo.packIdx, repo.packIdxErr = repo.loadPackIndexes() + }) + return repo.packIdx, repo.packIdxErr +} + +func (repo *Repository) loadPackIndexes() ([]*packIndex, error) { + dir := filepath.Join(repo.rootPath, "objects", "pack") + entries, err := os.ReadDir(dir) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotFound + } + return nil, err + } + + idxs := make([]*packIndex, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { + continue + } + rel := filepath.Join("objects", "pack", entry.Name()) + packRel := strings.TrimSuffix(rel, ".idx") + ".pack" + idxs = append(idxs, &packIndex{ + repo: repo, + idxRel: rel, + packPath: packRel, + }) + } + if len(idxs) == 0 { + return nil, ErrNotFound + } + return idxs, nil +} + +func (pi *packIndex) parse(buf []byte) error { + if len(buf) < 8+256*4 { + return ErrInvalidObject + } + if readBE32(buf[0:4]) != idxMagic { + return ErrInvalidObject + } + if readBE32(buf[4:8]) != idxVersion2 { + return ErrInvalidObject + } + + const fanoutBytes = 256 * 4 + fanoutStart := 8 + fanoutEnd := fanoutStart + fanoutBytes + if fanoutEnd > len(buf) { + return ErrInvalidObject + } + pi.fanout = buf[fanoutStart:fanoutEnd] + nobj := int(readBE32(pi.fanout[len(pi.fanout)-4:])) + + namesStart := fanoutEnd + namesEnd := namesStart + nobj*pi.repo.hashAlgo.Size() + if namesEnd > len(buf) { + return ErrInvalidObject + } + + crcStart := namesEnd + crcEnd := crcStart + nobj*4 + if crcEnd > len(buf) { + return ErrInvalidObject + } + + off32Start := crcEnd + off32End := off32Start + nobj*4 + if off32End > len(buf) { + return ErrInvalidObject + } + + pi.offset32 = buf[off32Start:off32End] + + off64Start := off32End + trailerStart := len(buf) - 2*pi.repo.hashAlgo.Size() + if trailerStart < off64Start { + return ErrInvalidObject + } + if (trailerStart-off64Start)%8 != 0 { + return ErrInvalidObject + } + off64End := trailerStart + pi.offset64 = buf[off64Start:off64End] + + pi.numObjects = nobj + pi.names = buf[namesStart:namesEnd] + pi.crcs = buf[crcStart:crcEnd] + return nil +} + +func readBE32(b []byte) uint32 { + _ = b[3] + return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]) +} + +func readBE64(b []byte) uint64 { + _ = b[7] + return (uint64(b[0]) << 56) | (uint64(b[1]) << 48) | + (uint64(b[2]) << 40) | (uint64(b[3]) << 32) | + (uint64(b[4]) << 24) | (uint64(b[5]) << 16) | + (uint64(b[6]) << 8) | uint64(b[7]) +} + +func (pi *packIndex) fanoutEntry(i int) uint32 { + if len(pi.fanout) == 0 { + return 0 + } + entries := len(pi.fanout) / 4 + if i < 0 || i >= entries { + return 0 + } + start := i * 4 + return readBE32(pi.fanout[start : start+4]) +} + +func (pi *packIndex) offset(idx int) (uint64, error) { + start := idx * 4 + word := readBE32(pi.offset32[start : start+4]) + if word&0x80000000 == 0 { + return uint64(word), nil + } + pos := int(word & 0x7fffffff) + entries := len(pi.offset64) / 8 + if pos < 0 || pos >= entries { + return 0, errors.New("furgit: pack: corrupt 64-bit offset table") + } + base := pos * 8 + return readBE64(pi.offset64[base : base+8]), nil +} + +func (pi *packIndex) lookup(id Hash) (packlocation, error) { + err := pi.ensureLoaded() + if err != nil { + return packlocation{}, err + } + if id.algo != pi.repo.hashAlgo { + return packlocation{}, fmt.Errorf("furgit: hash algorithm mismatch: got %s, expected %s", id.algo.String(), pi.repo.hashAlgo.String()) + } + first := int(id.data[0]) + var lo int + if first > 0 { + lo = int(pi.fanoutEntry(first - 1)) + } + hi := int(pi.fanoutEntry(first)) + idx, found := bsearchHash(pi.names, pi.repo.hashAlgo.Size(), lo, hi, id) + if !found { + return packlocation{}, ErrNotFound + } + ofs, err := pi.offset(idx) + if err != nil { + return packlocation{}, err + } + return packlocation{ + PackPath: pi.packPath, + Offset: ofs, + }, nil +} + +func bsearchHash(names []byte, stride, lo, hi int, want Hash) (int, bool) { + for lo < hi { + mid := lo + (hi-lo)/2 + cmp := compareHash(names, stride, mid, want.data[:stride]) + if cmp == 0 { + return mid, true + } + if cmp > 0 { + hi = mid + } else { + lo = mid + 1 + } + } + return lo, false +} + +func compareHash(names []byte, stride, idx int, want []byte) int { + base := idx * stride + end := base + stride + return bytes.Compare(names[base:end], want) +} diff --git a/pack_pack.go b/pack_pack.go deleted file mode 100644 index 628fa258..00000000 --- a/pack_pack.go +++ /dev/null @@ -1,578 +0,0 @@ -package furgit - -import ( - "encoding/binary" - "errors" - "io" - "os" - "sync" - "syscall" - - "codeberg.org/lindenii/furgit/internal/bufpool" - "codeberg.org/lindenii/furgit/internal/zlibx" -) - -const ( - packMagic = 0x5041434b - packVersion2 = 2 -) - -type packlocation struct { - PackPath string - Offset uint64 -} - -func (repo *Repository) packRead(id Hash) (ObjectType, bufpool.Buffer, error) { - loc, err := repo.packIndexFind(id) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return repo.packReadAt(loc, id) -} - -func (repo *Repository) packIndexFind(id Hash) (packlocation, error) { - idxs, err := repo.packIndexes() - if err != nil { - return packlocation{}, err - } - for _, idx := range idxs { - loc, err := idx.lookup(id) - if errors.Is(err, ErrNotFound) { - continue - } - if err != nil { - return packlocation{}, err - } - return loc, nil - } - return packlocation{}, ErrNotFound -} - -func (repo *Repository) packReadAt(loc packlocation, want Hash) (ObjectType, bufpool.Buffer, error) { - ty, body, err := repo.packBodyResolveAtLocation(loc) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return ty, body, nil -} - -func (repo *Repository) packBodyResolveAtLocation(loc packlocation) (ObjectType, bufpool.Buffer, error) { - pf, err := repo.packFile(loc.PackPath) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return repo.packBodyResolveWithin(pf, loc.Offset) -} - -func (repo *Repository) packTypeSizeAtLocation(loc packlocation, seen map[packKey]struct{}) (ObjectType, int64, error) { - pf, err := repo.packFile(loc.PackPath) - if err != nil { - return ObjectTypeInvalid, 0, err - } - return repo.packTypeSizeWithin(pf, loc.Offset, seen) -} - -func packHeaderParse(data []byte) (ObjectType, int, int, error) { - if len(data) == 0 { - return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF - } - b := data[0] - ty := ObjectType((b >> 4) & 0x07) - size := int(b & 0x0f) - shift := 4 - consumed := 1 - for (b & 0x80) != 0 { - if consumed >= len(data) { - return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF - } - b = data[consumed] - size |= int(b&0x7f) << shift - shift += 7 - consumed++ - } - return ty, size, consumed, nil -} - -func packSectionInflate(pf *packFile, start uint64, sizeHint int) (bufpool.Buffer, error) { - if start > uint64(len(pf.data)) { - return bufpool.Buffer{}, ErrInvalidObject - } - body, err := zlibx.DecompressSized(pf.data[start:], sizeHint) - if err != nil { - return bufpool.Buffer{}, err - } - if sizeHint > 0 && len(body.Bytes()) != sizeHint { - body.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - return body, nil -} - -func packDeltaReadOfsDistance(data []byte) (uint64, int, error) { - if len(data) == 0 { - return 0, 0, io.ErrUnexpectedEOF - } - b := data[0] - dist := uint64(b & 0x7f) - consumed := 1 - for (b & 0x80) != 0 { - if consumed >= len(data) { - return 0, 0, io.ErrUnexpectedEOF - } - b = data[consumed] - consumed++ - dist = ((dist + 1) << 7) + uint64(b&0x7f) - } - return dist, consumed, nil -} - -type packKey struct { - path string - ofs uint64 -} - -func (repo *Repository) packTypeSizeWithin(pf *packFile, ofs uint64, seen map[packKey]struct{}) (ObjectType, int64, error) { - if pf == nil { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - if seen == nil { - seen = make(map[packKey]struct{}) - } - var visited []packKey - defer func() { - for _, key := range visited { - delete(seen, key) - } - }() - - var declaredSize int64 - firstHeader := true - - for { - key := packKey{path: pf.relPath, ofs: ofs} - if _, dup := seen[key]; dup { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - seen[key] = struct{}{} - visited = append(visited, key) - - if ofs >= uint64(len(pf.data)) { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) - if err != nil { - return ObjectTypeInvalid, 0, err - } - if firstHeader { - declaredSize = int64(size) - firstHeader = false - } - - if uint64(consumed) > uint64(len(pf.data))-ofs { - return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF - } - dataStart := ofs + uint64(consumed) - switch ty { - case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: - return ty, declaredSize, nil - case ObjectTypeRefDelta: - hashEnd := dataStart + uint64(repo.hashAlgo.Size()) - if hashEnd > uint64(len(pf.data)) { - return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF - } - var base Hash - copy(base.data[:], pf.data[dataStart:hashEnd]) - base.algo = repo.hashAlgo - loc, err := repo.packIndexFind(base) - if err == nil { - pf, err = repo.packFile(loc.PackPath) - if err != nil { - return ObjectTypeInvalid, 0, err - } - ofs = loc.Offset - continue - } - if !errors.Is(err, ErrNotFound) { - return ObjectTypeInvalid, 0, err - } - baseTy, _, err := repo.looseTypeSize(base) - if err != nil { - return ObjectTypeInvalid, 0, err - } - return baseTy, declaredSize, nil - case ObjectTypeOfsDelta: - dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) - if err != nil { - return ObjectTypeInvalid, 0, err - } - if ofs <= dist { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - dataStart += uint64(distConsumed) - if dataStart > uint64(len(pf.data)) { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - ofs -= dist - case ObjectTypeInvalid, ObjectTypeFuture: - return ObjectTypeInvalid, 0, ErrInvalidObject - default: - return ObjectTypeInvalid, 0, ErrInvalidObject - } - } -} - -func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjectType, bufpool.Buffer, error) { - if pf == nil { - return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject - } - - type deltaFrame struct { - delta bufpool.Buffer - } - var frames []deltaFrame - defer func() { - for i := range frames { - frames[i].delta.Release() - } - }() - - var ( - body bufpool.Buffer - bodyReady bool - resultTy ObjectType - ) - fail := func(err error) (ObjectType, bufpool.Buffer, error) { - if bodyReady { - body.Release() - bodyReady = false - } - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - - resolved := false - for !resolved { - if ofs >= uint64(len(pf.data)) { - return fail(ErrInvalidObject) - } - ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) - if err != nil { - return fail(err) - } - if uint64(consumed) > uint64(len(pf.data))-ofs { - return fail(io.ErrUnexpectedEOF) - } - dataStart := ofs + uint64(consumed) - - switch ty { - case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: - body, err = packSectionInflate(pf, dataStart, size) - if err != nil { - return fail(err) - } - bodyReady = true - resultTy = ty - resolved = true - case ObjectTypeRefDelta: - hashEnd := dataStart + uint64(repo.hashAlgo.Size()) - if hashEnd > uint64(len(pf.data)) { - return fail(io.ErrUnexpectedEOF) - } - var base Hash - copy(base.data[:], pf.data[dataStart:hashEnd]) - base.algo = repo.hashAlgo - delta, err := packSectionInflate(pf, hashEnd, 0) - if err != nil { - return fail(err) - } - frames = append(frames, deltaFrame{delta: delta}) - - loc, err := repo.packIndexFind(base) - if err == nil { - pf, err = repo.packFile(loc.PackPath) - if err != nil { - return fail(err) - } - ofs = loc.Offset - continue - } - if !errors.Is(err, ErrNotFound) { - return fail(err) - } - resultTy, body, err = repo.looseReadTyped(base) - if err != nil { - return fail(err) - } - bodyReady = true - resolved = true - case ObjectTypeOfsDelta: - dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) - if err != nil { - return fail(err) - } - if ofs <= dist { - return fail(ErrInvalidObject) - } - deltaStart := dataStart + uint64(distConsumed) - if deltaStart > uint64(len(pf.data)) { - return fail(ErrInvalidObject) - } - delta, err := packSectionInflate(pf, deltaStart, 0) - if err != nil { - return fail(err) - } - frames = append(frames, deltaFrame{delta: delta}) - ofs -= dist - case ObjectTypeInvalid, ObjectTypeFuture: - return fail(ErrInvalidObject) - default: - return fail(ErrInvalidObject) - } - } - - for i := len(frames) - 1; i >= 0; i-- { - out, err := packDeltaApply(body, frames[i].delta) - body.Release() - bodyReady = false - frames[i].delta.Release() - if err != nil { - return fail(err) - } - body = out - bodyReady = true - } - frames = nil - return resultTy, body, nil -} - -func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) { - pos := 0 - baseBytes := base.Bytes() - deltaBytes := delta.Bytes() - srcSize, err := packVarintRead(deltaBytes, &pos) - if err != nil { - return bufpool.Buffer{}, err - } - dstSize, err := packVarintRead(deltaBytes, &pos) - if err != nil { - return bufpool.Buffer{}, err - } - if srcSize != len(baseBytes) { - return bufpool.Buffer{}, ErrInvalidObject - } - out := bufpool.Borrow(dstSize) - out.Resize(dstSize) - outBytes := out.Bytes() - outPos := 0 - - for pos < len(deltaBytes) { - op := deltaBytes[pos] - pos++ - switch { - case op&0x80 != 0: - off := 0 - n := 0 - if op&0x01 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) - pos++ - } - if op&0x02 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 8 - pos++ - } - if op&0x04 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 16 - pos++ - } - if op&0x08 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 24 - pos++ - } - if op&0x10 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) - pos++ - } - if op&0x20 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) << 8 - pos++ - } - if op&0x40 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) << 16 - pos++ - } - if n == 0 { - n = 0x10000 - } - if off+n > len(baseBytes) || outPos+n > len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - copy(outBytes[outPos:], baseBytes[off:off+n]) - outPos += n - case op != 0: - n := int(op) - if pos+n > len(deltaBytes) || outPos+n > len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - copy(outBytes[outPos:], deltaBytes[pos:pos+n]) - pos += n - outPos += n - default: - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - } - - if outPos != len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - return out, nil -} - -func packVarintRead(buf []byte, pos *int) (int, error) { - res := 0 - shift := 0 - for { - if *pos >= len(buf) { - return 0, ErrInvalidObject - } - b := buf[*pos] - *pos++ - res |= int(b&0x7f) << shift - if (b & 0x80) == 0 { - break - } - shift += 7 - } - return res, nil -} - -type packFile struct { - relPath string - size int64 - data []byte - closeMu sync.Once -} - -func openPackFile(absPath, rel string) (*packFile, error) { - f, err := os.Open(absPath) - if err != nil { - return nil, err - } - - stat, err := f.Stat() - if err != nil { - _ = f.Close() - return nil, err - } - if stat.Size() < 12 { - _ = f.Close() - return nil, ErrInvalidObject - } - - var headerArr [12]byte - header := headerArr[:] - _, err = io.ReadFull(f, header) - if err != nil { - _ = f.Close() - return nil, err - } - magic := binary.BigEndian.Uint32(header[:4]) - ver := binary.BigEndian.Uint32(header[4:8]) - if magic != packMagic || ver != packVersion2 { - _ = f.Close() - return nil, ErrInvalidObject - } - - region, err := syscall.Mmap( - int(f.Fd()), - 0, - int(stat.Size()), - syscall.PROT_READ, - syscall.MAP_PRIVATE, - ) - if err != nil { - _ = f.Close() - return nil, err - } - err = f.Close() - if err != nil { - _ = syscall.Munmap(region) - return nil, err - } - - return &packFile{ - relPath: rel, - size: stat.Size(), - data: region, - }, nil -} - -func (pf *packFile) Close() error { - if pf == nil { - return nil - } - var closeErr error - pf.closeMu.Do(func() { - if len(pf.data) > 0 { - if err := syscall.Munmap(pf.data); closeErr == nil { - closeErr = err - } - pf.data = nil - } - }) - return closeErr -} - -func (repo *Repository) packFile(rel string) (*packFile, error) { - repo.packFilesMu.RLock() - pf, ok := repo.packFiles[rel] - repo.packFilesMu.RUnlock() - if ok { - return pf, nil - } - - pf, err := openPackFile(repo.repoPath(rel), rel) - if err != nil { - return nil, err - } - - repo.packFilesMu.Lock() - if existing, ok := repo.packFiles[rel]; ok { - repo.packFilesMu.Unlock() - _ = pf.Close() - return existing, nil - } - repo.packFiles[rel] = pf - repo.packFilesMu.Unlock() - return pf, nil -} diff --git a/pack_pack_read.go b/pack_pack_read.go new file mode 100644 index 00000000..628fa258 --- /dev/null +++ b/pack_pack_read.go @@ -0,0 +1,578 @@ +package furgit + +import ( + "encoding/binary" + "errors" + "io" + "os" + "sync" + "syscall" + + "codeberg.org/lindenii/furgit/internal/bufpool" + "codeberg.org/lindenii/furgit/internal/zlibx" +) + +const ( + packMagic = 0x5041434b + packVersion2 = 2 +) + +type packlocation struct { + PackPath string + Offset uint64 +} + +func (repo *Repository) packRead(id Hash) (ObjectType, bufpool.Buffer, error) { + loc, err := repo.packIndexFind(id) + if err != nil { + return ObjectTypeInvalid, bufpool.Buffer{}, err + } + return repo.packReadAt(loc, id) +} + +func (repo *Repository) packIndexFind(id Hash) (packlocation, error) { + idxs, err := repo.packIndexes() + if err != nil { + return packlocation{}, err + } + for _, idx := range idxs { + loc, err := idx.lookup(id) + if errors.Is(err, ErrNotFound) { + continue + } + if err != nil { + return packlocation{}, err + } + return loc, nil + } + return packlocation{}, ErrNotFound +} + +func (repo *Repository) packReadAt(loc packlocation, want Hash) (ObjectType, bufpool.Buffer, error) { + ty, body, err := repo.packBodyResolveAtLocation(loc) + if err != nil { + return ObjectTypeInvalid, bufpool.Buffer{}, err + } + return ty, body, nil +} + +func (repo *Repository) packBodyResolveAtLocation(loc packlocation) (ObjectType, bufpool.Buffer, error) { + pf, err := repo.packFile(loc.PackPath) + if err != nil { + return ObjectTypeInvalid, bufpool.Buffer{}, err + } + return repo.packBodyResolveWithin(pf, loc.Offset) +} + +func (repo *Repository) packTypeSizeAtLocation(loc packlocation, seen map[packKey]struct{}) (ObjectType, int64, error) { + pf, err := repo.packFile(loc.PackPath) + if err != nil { + return ObjectTypeInvalid, 0, err + } + return repo.packTypeSizeWithin(pf, loc.Offset, seen) +} + +func packHeaderParse(data []byte) (ObjectType, int, int, error) { + if len(data) == 0 { + return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF + } + b := data[0] + ty := ObjectType((b >> 4) & 0x07) + size := int(b & 0x0f) + shift := 4 + consumed := 1 + for (b & 0x80) != 0 { + if consumed >= len(data) { + return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF + } + b = data[consumed] + size |= int(b&0x7f) << shift + shift += 7 + consumed++ + } + return ty, size, consumed, nil +} + +func packSectionInflate(pf *packFile, start uint64, sizeHint int) (bufpool.Buffer, error) { + if start > uint64(len(pf.data)) { + return bufpool.Buffer{}, ErrInvalidObject + } + body, err := zlibx.DecompressSized(pf.data[start:], sizeHint) + if err != nil { + return bufpool.Buffer{}, err + } + if sizeHint > 0 && len(body.Bytes()) != sizeHint { + body.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + return body, nil +} + +func packDeltaReadOfsDistance(data []byte) (uint64, int, error) { + if len(data) == 0 { + return 0, 0, io.ErrUnexpectedEOF + } + b := data[0] + dist := uint64(b & 0x7f) + consumed := 1 + for (b & 0x80) != 0 { + if consumed >= len(data) { + return 0, 0, io.ErrUnexpectedEOF + } + b = data[consumed] + consumed++ + dist = ((dist + 1) << 7) + uint64(b&0x7f) + } + return dist, consumed, nil +} + +type packKey struct { + path string + ofs uint64 +} + +func (repo *Repository) packTypeSizeWithin(pf *packFile, ofs uint64, seen map[packKey]struct{}) (ObjectType, int64, error) { + if pf == nil { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + if seen == nil { + seen = make(map[packKey]struct{}) + } + var visited []packKey + defer func() { + for _, key := range visited { + delete(seen, key) + } + }() + + var declaredSize int64 + firstHeader := true + + for { + key := packKey{path: pf.relPath, ofs: ofs} + if _, dup := seen[key]; dup { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + seen[key] = struct{}{} + visited = append(visited, key) + + if ofs >= uint64(len(pf.data)) { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) + if err != nil { + return ObjectTypeInvalid, 0, err + } + if firstHeader { + declaredSize = int64(size) + firstHeader = false + } + + if uint64(consumed) > uint64(len(pf.data))-ofs { + return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF + } + dataStart := ofs + uint64(consumed) + switch ty { + case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: + return ty, declaredSize, nil + case ObjectTypeRefDelta: + hashEnd := dataStart + uint64(repo.hashAlgo.Size()) + if hashEnd > uint64(len(pf.data)) { + return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF + } + var base Hash + copy(base.data[:], pf.data[dataStart:hashEnd]) + base.algo = repo.hashAlgo + loc, err := repo.packIndexFind(base) + if err == nil { + pf, err = repo.packFile(loc.PackPath) + if err != nil { + return ObjectTypeInvalid, 0, err + } + ofs = loc.Offset + continue + } + if !errors.Is(err, ErrNotFound) { + return ObjectTypeInvalid, 0, err + } + baseTy, _, err := repo.looseTypeSize(base) + if err != nil { + return ObjectTypeInvalid, 0, err + } + return baseTy, declaredSize, nil + case ObjectTypeOfsDelta: + dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) + if err != nil { + return ObjectTypeInvalid, 0, err + } + if ofs <= dist { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + dataStart += uint64(distConsumed) + if dataStart > uint64(len(pf.data)) { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + ofs -= dist + case ObjectTypeInvalid, ObjectTypeFuture: + return ObjectTypeInvalid, 0, ErrInvalidObject + default: + return ObjectTypeInvalid, 0, ErrInvalidObject + } + } +} + +func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjectType, bufpool.Buffer, error) { + if pf == nil { + return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject + } + + type deltaFrame struct { + delta bufpool.Buffer + } + var frames []deltaFrame + defer func() { + for i := range frames { + frames[i].delta.Release() + } + }() + + var ( + body bufpool.Buffer + bodyReady bool + resultTy ObjectType + ) + fail := func(err error) (ObjectType, bufpool.Buffer, error) { + if bodyReady { + body.Release() + bodyReady = false + } + return ObjectTypeInvalid, bufpool.Buffer{}, err + } + + resolved := false + for !resolved { + if ofs >= uint64(len(pf.data)) { + return fail(ErrInvalidObject) + } + ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) + if err != nil { + return fail(err) + } + if uint64(consumed) > uint64(len(pf.data))-ofs { + return fail(io.ErrUnexpectedEOF) + } + dataStart := ofs + uint64(consumed) + + switch ty { + case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: + body, err = packSectionInflate(pf, dataStart, size) + if err != nil { + return fail(err) + } + bodyReady = true + resultTy = ty + resolved = true + case ObjectTypeRefDelta: + hashEnd := dataStart + uint64(repo.hashAlgo.Size()) + if hashEnd > uint64(len(pf.data)) { + return fail(io.ErrUnexpectedEOF) + } + var base Hash + copy(base.data[:], pf.data[dataStart:hashEnd]) + base.algo = repo.hashAlgo + delta, err := packSectionInflate(pf, hashEnd, 0) + if err != nil { + return fail(err) + } + frames = append(frames, deltaFrame{delta: delta}) + + loc, err := repo.packIndexFind(base) + if err == nil { + pf, err = repo.packFile(loc.PackPath) + if err != nil { + return fail(err) + } + ofs = loc.Offset + continue + } + if !errors.Is(err, ErrNotFound) { + return fail(err) + } + resultTy, body, err = repo.looseReadTyped(base) + if err != nil { + return fail(err) + } + bodyReady = true + resolved = true + case ObjectTypeOfsDelta: + dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) + if err != nil { + return fail(err) + } + if ofs <= dist { + return fail(ErrInvalidObject) + } + deltaStart := dataStart + uint64(distConsumed) + if deltaStart > uint64(len(pf.data)) { + return fail(ErrInvalidObject) + } + delta, err := packSectionInflate(pf, deltaStart, 0) + if err != nil { + return fail(err) + } + frames = append(frames, deltaFrame{delta: delta}) + ofs -= dist + case ObjectTypeInvalid, ObjectTypeFuture: + return fail(ErrInvalidObject) + default: + return fail(ErrInvalidObject) + } + } + + for i := len(frames) - 1; i >= 0; i-- { + out, err := packDeltaApply(body, frames[i].delta) + body.Release() + bodyReady = false + frames[i].delta.Release() + if err != nil { + return fail(err) + } + body = out + bodyReady = true + } + frames = nil + return resultTy, body, nil +} + +func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) { + pos := 0 + baseBytes := base.Bytes() + deltaBytes := delta.Bytes() + srcSize, err := packVarintRead(deltaBytes, &pos) + if err != nil { + return bufpool.Buffer{}, err + } + dstSize, err := packVarintRead(deltaBytes, &pos) + if err != nil { + return bufpool.Buffer{}, err + } + if srcSize != len(baseBytes) { + return bufpool.Buffer{}, ErrInvalidObject + } + out := bufpool.Borrow(dstSize) + out.Resize(dstSize) + outBytes := out.Bytes() + outPos := 0 + + for pos < len(deltaBytes) { + op := deltaBytes[pos] + pos++ + switch { + case op&0x80 != 0: + off := 0 + n := 0 + if op&0x01 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) + pos++ + } + if op&0x02 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 8 + pos++ + } + if op&0x04 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 16 + pos++ + } + if op&0x08 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 24 + pos++ + } + if op&0x10 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) + pos++ + } + if op&0x20 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) << 8 + pos++ + } + if op&0x40 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) << 16 + pos++ + } + if n == 0 { + n = 0x10000 + } + if off+n > len(baseBytes) || outPos+n > len(outBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + copy(outBytes[outPos:], baseBytes[off:off+n]) + outPos += n + case op != 0: + n := int(op) + if pos+n > len(deltaBytes) || outPos+n > len(outBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + copy(outBytes[outPos:], deltaBytes[pos:pos+n]) + pos += n + outPos += n + default: + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + } + + if outPos != len(outBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + return out, nil +} + +func packVarintRead(buf []byte, pos *int) (int, error) { + res := 0 + shift := 0 + for { + if *pos >= len(buf) { + return 0, ErrInvalidObject + } + b := buf[*pos] + *pos++ + res |= int(b&0x7f) << shift + if (b & 0x80) == 0 { + break + } + shift += 7 + } + return res, nil +} + +type packFile struct { + relPath string + size int64 + data []byte + closeMu sync.Once +} + +func openPackFile(absPath, rel string) (*packFile, error) { + f, err := os.Open(absPath) + if err != nil { + return nil, err + } + + stat, err := f.Stat() + if err != nil { + _ = f.Close() + return nil, err + } + if stat.Size() < 12 { + _ = f.Close() + return nil, ErrInvalidObject + } + + var headerArr [12]byte + header := headerArr[:] + _, err = io.ReadFull(f, header) + if err != nil { + _ = f.Close() + return nil, err + } + magic := binary.BigEndian.Uint32(header[:4]) + ver := binary.BigEndian.Uint32(header[4:8]) + if magic != packMagic || ver != packVersion2 { + _ = f.Close() + return nil, ErrInvalidObject + } + + region, err := syscall.Mmap( + int(f.Fd()), + 0, + int(stat.Size()), + syscall.PROT_READ, + syscall.MAP_PRIVATE, + ) + if err != nil { + _ = f.Close() + return nil, err + } + err = f.Close() + if err != nil { + _ = syscall.Munmap(region) + return nil, err + } + + return &packFile{ + relPath: rel, + size: stat.Size(), + data: region, + }, nil +} + +func (pf *packFile) Close() error { + if pf == nil { + return nil + } + var closeErr error + pf.closeMu.Do(func() { + if len(pf.data) > 0 { + if err := syscall.Munmap(pf.data); closeErr == nil { + closeErr = err + } + pf.data = nil + } + }) + return closeErr +} + +func (repo *Repository) packFile(rel string) (*packFile, error) { + repo.packFilesMu.RLock() + pf, ok := repo.packFiles[rel] + repo.packFilesMu.RUnlock() + if ok { + return pf, nil + } + + pf, err := openPackFile(repo.repoPath(rel), rel) + if err != nil { + return nil, err + } + + repo.packFilesMu.Lock() + if existing, ok := repo.packFiles[rel]; ok { + repo.packFilesMu.Unlock() + _ = pf.Close() + return existing, nil + } + repo.packFiles[rel] = pf + repo.packFilesMu.Unlock() + return pf, nil +} diff --git a/pack_read_test.go b/pack_read_test.go new file mode 100644 index 00000000..184a4e5c --- /dev/null +++ b/pack_read_test.go @@ -0,0 +1,149 @@ +package furgit + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestPackfileRead(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + gitCmd(t, repoPath, "config", "gc.auto", "0") + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + err := os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("content1"), 0o644) + if err != nil { + t.Fatalf("failed to write file1.txt: %v", err) + } + err = os.WriteFile(filepath.Join(workDir, "file2.txt"), []byte("content2"), 0o644) + if err != nil { + t.Fatalf("failed to write file2.txt: %v", err) + } + + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit") + commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") + + gitCmd(t, repoPath, "repack", "-a", "-d") + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + hashObj, _ := repo.ParseHash(commitHash) + obj, err := repo.ReadObject(hashObj) + if err != nil { + t.Fatalf("ReadObject from pack failed: %v", err) + } + + commit, ok := obj.(*StoredCommit) + if !ok { + t.Fatalf("expected *StoredCommit, got %T", obj) + } + + treeObj, err := repo.ReadObject(commit.Tree) + if err != nil { + t.Fatalf("ReadObject tree failed: %v", err) + } + + tree, ok := treeObj.(*StoredTree) + if !ok { + t.Fatalf("expected *StoredTree, got %T", treeObj) + } + + if len(tree.Entries) != 2 { + t.Errorf("tree entries: got %d, want 2", len(tree.Entries)) + } + + gitLsTree := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) + for _, entry := range tree.Entries { + if !strings.Contains(gitLsTree, string(entry.Name)) { + t.Errorf("git ls-tree doesn't contain %s", entry.Name) + } + } +} + +func TestPackfileLarge(t *testing.T) { + if testing.Short() { + t.Skip("skipping large packfile test in short mode") + } + + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + gitCmd(t, repoPath, "config", "gc.auto", "0") + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + numFiles := 1000 + for i := 0; i < numFiles; i++ { + filename := filepath.Join(workDir, fmt.Sprintf("file%04d.txt", i)) + content := fmt.Sprintf("Content for file %d\n", i) + err := os.WriteFile(filename, []byte(content), 0o644) + if err != nil { + t.Fatalf("failed to write %s: %v", filename, err) + } + } + + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Large commit") + commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") + + gitCmd(t, repoPath, "repack", "-a", "-d") + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + hashObj, _ := repo.ParseHash(commitHash) + obj, _ := repo.ReadObject(hashObj) + commit := obj.(*StoredCommit) + + treeObj, _ := repo.ReadObject(commit.Tree) + tree := treeObj.(*StoredTree) + + if len(tree.Entries) != numFiles { + t.Errorf("tree entries: got %d, want %d", len(tree.Entries), numFiles) + } + + gitCount := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) + gitLines := strings.Count(gitCount, "\n") + 1 + if len(tree.Entries) != gitLines { + t.Errorf("furgit found %d entries, git found %d", len(tree.Entries), gitLines) + } + + for i := 0; i < 10; i++ { + idx := i * (numFiles / 10) + expectedName := fmt.Sprintf("file%04d.txt", idx) + entry := tree.Entry([]byte(expectedName)) + if entry == nil { + t.Errorf("expected to find entry %s", expectedName) + continue + } + + blobObj, _ := repo.ReadObject(entry.ID) + blob := blobObj.(*StoredBlob) + + expectedContent := fmt.Sprintf("Content for file %d\n", idx) + if string(blob.Data) != expectedContent { + t.Errorf("blob %s: got %q, want %q", expectedName, blob.Data, expectedContent) + } + + gitData := gitCatFile(t, repoPath, "blob", entry.ID.String()) + if !bytes.Equal(blob.Data, gitData) { + t.Errorf("blob %s: furgit data doesn't match git data", expectedName) + } + } +} diff --git a/pack_test.go b/pack_test.go deleted file mode 100644 index 184a4e5c..00000000 --- a/pack_test.go +++ /dev/null @@ -1,149 +0,0 @@ -package furgit - -import ( - "bytes" - "fmt" - "os" - "path/filepath" - "strings" - "testing" -) - -func TestPackfileRead(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitCmd(t, repoPath, "config", "gc.auto", "0") - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("content1"), 0o644) - if err != nil { - t.Fatalf("failed to write file1.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "file2.txt"), []byte("content2"), 0o644) - if err != nil { - t.Fatalf("failed to write file2.txt: %v", err) - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "repack", "-a", "-d") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hashObj, _ := repo.ParseHash(commitHash) - obj, err := repo.ReadObject(hashObj) - if err != nil { - t.Fatalf("ReadObject from pack failed: %v", err) - } - - commit, ok := obj.(*StoredCommit) - if !ok { - t.Fatalf("expected *StoredCommit, got %T", obj) - } - - treeObj, err := repo.ReadObject(commit.Tree) - if err != nil { - t.Fatalf("ReadObject tree failed: %v", err) - } - - tree, ok := treeObj.(*StoredTree) - if !ok { - t.Fatalf("expected *StoredTree, got %T", treeObj) - } - - if len(tree.Entries) != 2 { - t.Errorf("tree entries: got %d, want 2", len(tree.Entries)) - } - - gitLsTree := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) - for _, entry := range tree.Entries { - if !strings.Contains(gitLsTree, string(entry.Name)) { - t.Errorf("git ls-tree doesn't contain %s", entry.Name) - } - } -} - -func TestPackfileLarge(t *testing.T) { - if testing.Short() { - t.Skip("skipping large packfile test in short mode") - } - - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitCmd(t, repoPath, "config", "gc.auto", "0") - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - numFiles := 1000 - for i := 0; i < numFiles; i++ { - filename := filepath.Join(workDir, fmt.Sprintf("file%04d.txt", i)) - content := fmt.Sprintf("Content for file %d\n", i) - err := os.WriteFile(filename, []byte(content), 0o644) - if err != nil { - t.Fatalf("failed to write %s: %v", filename, err) - } - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Large commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "repack", "-a", "-d") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hashObj, _ := repo.ParseHash(commitHash) - obj, _ := repo.ReadObject(hashObj) - commit := obj.(*StoredCommit) - - treeObj, _ := repo.ReadObject(commit.Tree) - tree := treeObj.(*StoredTree) - - if len(tree.Entries) != numFiles { - t.Errorf("tree entries: got %d, want %d", len(tree.Entries), numFiles) - } - - gitCount := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) - gitLines := strings.Count(gitCount, "\n") + 1 - if len(tree.Entries) != gitLines { - t.Errorf("furgit found %d entries, git found %d", len(tree.Entries), gitLines) - } - - for i := 0; i < 10; i++ { - idx := i * (numFiles / 10) - expectedName := fmt.Sprintf("file%04d.txt", idx) - entry := tree.Entry([]byte(expectedName)) - if entry == nil { - t.Errorf("expected to find entry %s", expectedName) - continue - } - - blobObj, _ := repo.ReadObject(entry.ID) - blob := blobObj.(*StoredBlob) - - expectedContent := fmt.Sprintf("Content for file %d\n", idx) - if string(blob.Data) != expectedContent { - t.Errorf("blob %s: got %q, want %q", expectedName, blob.Data, expectedContent) - } - - gitData := gitCatFile(t, repoPath, "blob", entry.ID.String()) - if !bytes.Equal(blob.Data, gitData) { - t.Errorf("blob %s: furgit data doesn't match git data", expectedName) - } - } -} -- cgit v1.3.1-10-gc9f91