From 0b1516e5228126f4a51001bae7ed45ea0f797fcd Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Fri, 20 Feb 2026 12:51:16 +0800 Subject: Revert "packed: More uniform file naming scheme" This reverts commit 33de7fd28ce870d0b98016fcb42aa9ae5c0ca78a. --- pack_idx_read.go | 290 ++++++++++++++++++++++++++ pack_pack_read.go | 578 +++++++++++++++++++++++++++++++++++++++++++++++++++ pack_pack_write.go | 262 +++++++++++++++++++++++ pack_read_test.go | 149 +++++++++++++ pack_write_test.go | 236 +++++++++++++++++++++ packed_read_idx.go | 290 -------------------------- packed_read_pack.go | 578 --------------------------------------------------- packed_read_test.go | 149 ------------- packed_write_pack.go | 262 ----------------------- packed_write_test.go | 236 --------------------- 10 files changed, 1515 insertions(+), 1515 deletions(-) create mode 100644 pack_idx_read.go create mode 100644 pack_pack_read.go create mode 100644 pack_pack_write.go create mode 100644 pack_read_test.go create mode 100644 pack_write_test.go delete mode 100644 packed_read_idx.go delete mode 100644 packed_read_pack.go delete mode 100644 packed_read_test.go delete mode 100644 packed_write_pack.go delete mode 100644 packed_write_test.go diff --git a/pack_idx_read.go b/pack_idx_read.go new file mode 100644 index 00000000..0dbb9bcf --- /dev/null +++ b/pack_idx_read.go @@ -0,0 +1,290 @@ +package furgit + +import ( + "bytes" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "syscall" +) + +const ( + idxMagic = 0xff744f63 + idxVersion2 = 2 +) + +type packIndex struct { + repo *Repository + idxRel string + packPath string + + loadOnce sync.Once + loadErr error + + numObjects int + fanout []byte + names []byte + crcs []byte + offset32 []byte + offset64 []byte + data []byte + + closeOnce sync.Once +} + +func (pi *packIndex) Close() error { + if pi == nil { + return nil + } + var closeErr error + pi.closeOnce.Do(func() { + if len(pi.data) > 0 { + if err := syscall.Munmap(pi.data); closeErr == nil { + closeErr = err + } + pi.data = nil + pi.fanout = nil + pi.names = nil + pi.crcs = nil + pi.offset32 = nil + pi.offset64 = nil + pi.numObjects = 0 + } + }) + return closeErr +} + +func (pi *packIndex) ensureLoaded() error { + pi.loadOnce.Do(func() { + pi.loadErr = pi.load() + }) + return pi.loadErr +} + +func (pi *packIndex) load() error { + if pi.repo == nil { + return ErrInvalidObject + } + f, err := os.Open(pi.repo.repoPath(pi.idxRel)) + if err != nil { + return err + } + stat, err := f.Stat() + if err != nil { + _ = f.Close() + return err + } + if stat.Size() < 8+256*4 { + _ = f.Close() + return ErrInvalidObject + } + region, err := syscall.Mmap( + int(f.Fd()), + 0, + int(stat.Size()), + syscall.PROT_READ, + syscall.MAP_PRIVATE, + ) + if err != nil { + _ = f.Close() + return err + } + err = f.Close() + if err != nil { + _ = syscall.Munmap(region) + return err + } + err = pi.parse(region) + if err != nil { + _ = syscall.Munmap(region) + return err + } + pi.data = region + return nil +} + +func (repo *Repository) packIndexes() ([]*packIndex, error) { + repo.packIdxOnce.Do(func() { + repo.packIdx, repo.packIdxErr = repo.loadPackIndexes() + }) + return repo.packIdx, repo.packIdxErr +} + +func (repo *Repository) loadPackIndexes() ([]*packIndex, error) { + dir := filepath.Join(repo.rootPath, "objects", "pack") + entries, err := os.ReadDir(dir) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotFound + } + return nil, err + } + + idxs := make([]*packIndex, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { + continue + } + rel := filepath.Join("objects", "pack", entry.Name()) + packRel := strings.TrimSuffix(rel, ".idx") + ".pack" + idxs = append(idxs, &packIndex{ + repo: repo, + idxRel: rel, + packPath: packRel, + }) + } + if len(idxs) == 0 { + return nil, ErrNotFound + } + return idxs, nil +} + +func (pi *packIndex) parse(buf []byte) error { + if len(buf) < 8+256*4 { + return ErrInvalidObject + } + if readBE32(buf[0:4]) != idxMagic { + return ErrInvalidObject + } + if readBE32(buf[4:8]) != idxVersion2 { + return ErrInvalidObject + } + + const fanoutBytes = 256 * 4 + fanoutStart := 8 + fanoutEnd := fanoutStart + fanoutBytes + if fanoutEnd > len(buf) { + return ErrInvalidObject + } + pi.fanout = buf[fanoutStart:fanoutEnd] + nobj := int(readBE32(pi.fanout[len(pi.fanout)-4:])) + + namesStart := fanoutEnd + namesEnd := namesStart + nobj*pi.repo.hashAlgo.Size() + if namesEnd > len(buf) { + return ErrInvalidObject + } + + crcStart := namesEnd + crcEnd := crcStart + nobj*4 + if crcEnd > len(buf) { + return ErrInvalidObject + } + + off32Start := crcEnd + off32End := off32Start + nobj*4 + if off32End > len(buf) { + return ErrInvalidObject + } + + pi.offset32 = buf[off32Start:off32End] + + off64Start := off32End + trailerStart := len(buf) - 2*pi.repo.hashAlgo.Size() + if trailerStart < off64Start { + return ErrInvalidObject + } + if (trailerStart-off64Start)%8 != 0 { + return ErrInvalidObject + } + off64End := trailerStart + pi.offset64 = buf[off64Start:off64End] + + pi.numObjects = nobj + pi.names = buf[namesStart:namesEnd] + pi.crcs = buf[crcStart:crcEnd] + return nil +} + +func readBE32(b []byte) uint32 { + _ = b[3] + return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]) +} + +func readBE64(b []byte) uint64 { + _ = b[7] + return (uint64(b[0]) << 56) | (uint64(b[1]) << 48) | + (uint64(b[2]) << 40) | (uint64(b[3]) << 32) | + (uint64(b[4]) << 24) | (uint64(b[5]) << 16) | + (uint64(b[6]) << 8) | uint64(b[7]) +} + +func (pi *packIndex) fanoutEntry(i int) uint32 { + if len(pi.fanout) == 0 { + return 0 + } + entries := len(pi.fanout) / 4 + if i < 0 || i >= entries { + return 0 + } + start := i * 4 + return readBE32(pi.fanout[start : start+4]) +} + +func (pi *packIndex) offset(idx int) (uint64, error) { + start := idx * 4 + word := readBE32(pi.offset32[start : start+4]) + if word&0x80000000 == 0 { + return uint64(word), nil + } + pos := int(word & 0x7fffffff) + entries := len(pi.offset64) / 8 + if pos < 0 || pos >= entries { + return 0, errors.New("furgit: pack: corrupt 64-bit offset table") + } + base := pos * 8 + return readBE64(pi.offset64[base : base+8]), nil +} + +func (pi *packIndex) lookup(id Hash) (packlocation, error) { + err := pi.ensureLoaded() + if err != nil { + return packlocation{}, err + } + if id.algo != pi.repo.hashAlgo { + return packlocation{}, fmt.Errorf("furgit: hash algorithm mismatch: got %s, expected %s", id.algo.String(), pi.repo.hashAlgo.String()) + } + first := int(id.data[0]) + var lo int + if first > 0 { + lo = int(pi.fanoutEntry(first - 1)) + } + hi := int(pi.fanoutEntry(first)) + idx, found := bsearchHash(pi.names, pi.repo.hashAlgo.Size(), lo, hi, id) + if !found { + return packlocation{}, ErrNotFound + } + ofs, err := pi.offset(idx) + if err != nil { + return packlocation{}, err + } + return packlocation{ + PackPath: pi.packPath, + Offset: ofs, + }, nil +} + +func bsearchHash(names []byte, stride, lo, hi int, want Hash) (int, bool) { + for lo < hi { + mid := lo + (hi-lo)/2 + cmp := compareHash(names, stride, mid, want.data[:stride]) + if cmp == 0 { + return mid, true + } + if cmp > 0 { + hi = mid + } else { + lo = mid + 1 + } + } + return lo, false +} + +func compareHash(names []byte, stride, idx int, want []byte) int { + base := idx * stride + end := base + stride + return bytes.Compare(names[base:end], want) +} diff --git a/pack_pack_read.go b/pack_pack_read.go new file mode 100644 index 00000000..56098ee5 --- /dev/null +++ b/pack_pack_read.go @@ -0,0 +1,578 @@ +package furgit + +import ( + "encoding/binary" + "errors" + "io" + "os" + "sync" + "syscall" + + "codeberg.org/lindenii/furgit/internal/bufpool" + "codeberg.org/lindenii/furgit/internal/zlibx" +) + +const ( + packMagic = 0x5041434b + packVersion2 = 2 +) + +type packlocation struct { + PackPath string + Offset uint64 +} + +func (repo *Repository) packRead(id Hash) (ObjectType, bufpool.Buffer, error) { + loc, err := repo.packIndexFind(id) + if err != nil { + return ObjectTypeInvalid, bufpool.Buffer{}, err + } + return repo.packReadAt(loc, id) +} + +func (repo *Repository) packIndexFind(id Hash) (packlocation, error) { + idxs, err := repo.packIndexes() + if err != nil { + return packlocation{}, err + } + for _, idx := range idxs { + loc, err := idx.lookup(id) + if errors.Is(err, ErrNotFound) { + continue + } + if err != nil { + return packlocation{}, err + } + return loc, nil + } + return packlocation{}, ErrNotFound +} + +func (repo *Repository) packReadAt(loc packlocation, want Hash) (ObjectType, bufpool.Buffer, error) { + ty, body, err := repo.packBodyResolveAtLocation(loc) + if err != nil { + return ObjectTypeInvalid, bufpool.Buffer{}, err + } + return ty, body, nil +} + +func (repo *Repository) packBodyResolveAtLocation(loc packlocation) (ObjectType, bufpool.Buffer, error) { + pf, err := repo.packFile(loc.PackPath) + if err != nil { + return ObjectTypeInvalid, bufpool.Buffer{}, err + } + return repo.packBodyResolveWithin(pf, loc.Offset) +} + +func (repo *Repository) packTypeSizeAtLocation(loc packlocation, seen map[packKey]struct{}) (ObjectType, int64, error) { + pf, err := repo.packFile(loc.PackPath) + if err != nil { + return ObjectTypeInvalid, 0, err + } + return repo.packTypeSizeWithin(pf, loc.Offset, seen) +} + +func packHeaderParse(data []byte) (ObjectType, int, int, error) { + if len(data) == 0 { + return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF + } + b := data[0] + ty := ObjectType((b >> 4) & 0x07) + size := int(b & 0x0f) + shift := 4 + consumed := 1 + for (b & 0x80) != 0 { + if consumed >= len(data) { + return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF + } + b = data[consumed] + size |= int(b&0x7f) << shift + shift += 7 + consumed++ + } + return ty, size, consumed, nil +} + +func packSectionInflate(pf *packFile, start uint64, sizeHint int) (bufpool.Buffer, error) { + if start > uint64(len(pf.data)) { + return bufpool.Buffer{}, ErrInvalidObject + } + body, _, err := zlibx.DecompressSized(pf.data[start:], sizeHint) + if err != nil { + return bufpool.Buffer{}, err + } + if sizeHint > 0 && len(body.Bytes()) != sizeHint { + body.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + return body, nil +} + +func packDeltaReadOfsDistance(data []byte) (uint64, int, error) { + if len(data) == 0 { + return 0, 0, io.ErrUnexpectedEOF + } + b := data[0] + dist := uint64(b & 0x7f) + consumed := 1 + for (b & 0x80) != 0 { + if consumed >= len(data) { + return 0, 0, io.ErrUnexpectedEOF + } + b = data[consumed] + consumed++ + dist = ((dist + 1) << 7) + uint64(b&0x7f) + } + return dist, consumed, nil +} + +type packKey struct { + path string + ofs uint64 +} + +func (repo *Repository) packTypeSizeWithin(pf *packFile, ofs uint64, seen map[packKey]struct{}) (ObjectType, int64, error) { + if pf == nil { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + if seen == nil { + seen = make(map[packKey]struct{}) + } + var visited []packKey + defer func() { + for _, key := range visited { + delete(seen, key) + } + }() + + var declaredSize int64 + firstHeader := true + + for { + key := packKey{path: pf.relPath, ofs: ofs} + if _, dup := seen[key]; dup { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + seen[key] = struct{}{} + visited = append(visited, key) + + if ofs >= uint64(len(pf.data)) { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) + if err != nil { + return ObjectTypeInvalid, 0, err + } + if firstHeader { + declaredSize = int64(size) + firstHeader = false + } + + if uint64(consumed) > uint64(len(pf.data))-ofs { + return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF + } + dataStart := ofs + uint64(consumed) + switch ty { + case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: + return ty, declaredSize, nil + case ObjectTypeRefDelta: + hashEnd := dataStart + uint64(repo.hashAlgo.Size()) + if hashEnd > uint64(len(pf.data)) { + return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF + } + var base Hash + copy(base.data[:], pf.data[dataStart:hashEnd]) + base.algo = repo.hashAlgo + loc, err := repo.packIndexFind(base) + if err == nil { + pf, err = repo.packFile(loc.PackPath) + if err != nil { + return ObjectTypeInvalid, 0, err + } + ofs = loc.Offset + continue + } + if !errors.Is(err, ErrNotFound) { + return ObjectTypeInvalid, 0, err + } + baseTy, _, err := repo.looseTypeSize(base) + if err != nil { + return ObjectTypeInvalid, 0, err + } + return baseTy, declaredSize, nil + case ObjectTypeOfsDelta: + dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) + if err != nil { + return ObjectTypeInvalid, 0, err + } + if ofs <= dist { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + dataStart += uint64(distConsumed) + if dataStart > uint64(len(pf.data)) { + return ObjectTypeInvalid, 0, ErrInvalidObject + } + ofs -= dist + case ObjectTypeInvalid, ObjectTypeFuture: + return ObjectTypeInvalid, 0, ErrInvalidObject + default: + return ObjectTypeInvalid, 0, ErrInvalidObject + } + } +} + +func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjectType, bufpool.Buffer, error) { + if pf == nil { + return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject + } + + type deltaFrame struct { + delta bufpool.Buffer + } + var frames []deltaFrame + defer func() { + for i := range frames { + frames[i].delta.Release() + } + }() + + var ( + body bufpool.Buffer + bodyReady bool + resultTy ObjectType + ) + fail := func(err error) (ObjectType, bufpool.Buffer, error) { + if bodyReady { + body.Release() + bodyReady = false + } + return ObjectTypeInvalid, bufpool.Buffer{}, err + } + + resolved := false + for !resolved { + if ofs >= uint64(len(pf.data)) { + return fail(ErrInvalidObject) + } + ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) + if err != nil { + return fail(err) + } + if uint64(consumed) > uint64(len(pf.data))-ofs { + return fail(io.ErrUnexpectedEOF) + } + dataStart := ofs + uint64(consumed) + + switch ty { + case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: + body, err = packSectionInflate(pf, dataStart, size) + if err != nil { + return fail(err) + } + bodyReady = true + resultTy = ty + resolved = true + case ObjectTypeRefDelta: + hashEnd := dataStart + uint64(repo.hashAlgo.Size()) + if hashEnd > uint64(len(pf.data)) { + return fail(io.ErrUnexpectedEOF) + } + var base Hash + copy(base.data[:], pf.data[dataStart:hashEnd]) + base.algo = repo.hashAlgo + delta, err := packSectionInflate(pf, hashEnd, 0) + if err != nil { + return fail(err) + } + frames = append(frames, deltaFrame{delta: delta}) + + loc, err := repo.packIndexFind(base) + if err == nil { + pf, err = repo.packFile(loc.PackPath) + if err != nil { + return fail(err) + } + ofs = loc.Offset + continue + } + if !errors.Is(err, ErrNotFound) { + return fail(err) + } + resultTy, body, err = repo.looseReadTyped(base) + if err != nil { + return fail(err) + } + bodyReady = true + resolved = true + case ObjectTypeOfsDelta: + dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) + if err != nil { + return fail(err) + } + if ofs <= dist { + return fail(ErrInvalidObject) + } + deltaStart := dataStart + uint64(distConsumed) + if deltaStart > uint64(len(pf.data)) { + return fail(ErrInvalidObject) + } + delta, err := packSectionInflate(pf, deltaStart, 0) + if err != nil { + return fail(err) + } + frames = append(frames, deltaFrame{delta: delta}) + ofs -= dist + case ObjectTypeInvalid, ObjectTypeFuture: + return fail(ErrInvalidObject) + default: + return fail(ErrInvalidObject) + } + } + + for i := len(frames) - 1; i >= 0; i-- { + out, err := packDeltaApply(body, frames[i].delta) + body.Release() + bodyReady = false + frames[i].delta.Release() + if err != nil { + return fail(err) + } + body = out + bodyReady = true + } + frames = nil + return resultTy, body, nil +} + +func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) { + pos := 0 + baseBytes := base.Bytes() + deltaBytes := delta.Bytes() + srcSize, err := packVarintRead(deltaBytes, &pos) + if err != nil { + return bufpool.Buffer{}, err + } + dstSize, err := packVarintRead(deltaBytes, &pos) + if err != nil { + return bufpool.Buffer{}, err + } + if srcSize != len(baseBytes) { + return bufpool.Buffer{}, ErrInvalidObject + } + out := bufpool.Borrow(dstSize) + out.Resize(dstSize) + outBytes := out.Bytes() + outPos := 0 + + for pos < len(deltaBytes) { + op := deltaBytes[pos] + pos++ + switch { + case op&0x80 != 0: + off := 0 + n := 0 + if op&0x01 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) + pos++ + } + if op&0x02 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 8 + pos++ + } + if op&0x04 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 16 + pos++ + } + if op&0x08 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 24 + pos++ + } + if op&0x10 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) + pos++ + } + if op&0x20 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) << 8 + pos++ + } + if op&0x40 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) << 16 + pos++ + } + if n == 0 { + n = 0x10000 + } + if off+n > len(baseBytes) || outPos+n > len(outBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + copy(outBytes[outPos:], baseBytes[off:off+n]) + outPos += n + case op != 0: + n := int(op) + if pos+n > len(deltaBytes) || outPos+n > len(outBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + copy(outBytes[outPos:], deltaBytes[pos:pos+n]) + pos += n + outPos += n + default: + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + } + + if outPos != len(outBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + return out, nil +} + +func packVarintRead(buf []byte, pos *int) (int, error) { + res := 0 + shift := 0 + for { + if *pos >= len(buf) { + return 0, ErrInvalidObject + } + b := buf[*pos] + *pos++ + res |= int(b&0x7f) << shift + if (b & 0x80) == 0 { + break + } + shift += 7 + } + return res, nil +} + +type packFile struct { + relPath string + size int64 + data []byte + closeMu sync.Once +} + +func openPackFile(absPath, rel string) (*packFile, error) { + f, err := os.Open(absPath) + if err != nil { + return nil, err + } + + stat, err := f.Stat() + if err != nil { + _ = f.Close() + return nil, err + } + if stat.Size() < 12 { + _ = f.Close() + return nil, ErrInvalidObject + } + + var headerArr [12]byte + header := headerArr[:] + _, err = io.ReadFull(f, header) + if err != nil { + _ = f.Close() + return nil, err + } + magic := binary.BigEndian.Uint32(header[:4]) + ver := binary.BigEndian.Uint32(header[4:8]) + if magic != packMagic || ver != packVersion2 { + _ = f.Close() + return nil, ErrInvalidObject + } + + region, err := syscall.Mmap( + int(f.Fd()), + 0, + int(stat.Size()), + syscall.PROT_READ, + syscall.MAP_PRIVATE, + ) + if err != nil { + _ = f.Close() + return nil, err + } + err = f.Close() + if err != nil { + _ = syscall.Munmap(region) + return nil, err + } + + return &packFile{ + relPath: rel, + size: stat.Size(), + data: region, + }, nil +} + +func (pf *packFile) Close() error { + if pf == nil { + return nil + } + var closeErr error + pf.closeMu.Do(func() { + if len(pf.data) > 0 { + if err := syscall.Munmap(pf.data); closeErr == nil { + closeErr = err + } + pf.data = nil + } + }) + return closeErr +} + +func (repo *Repository) packFile(rel string) (*packFile, error) { + repo.packFilesMu.RLock() + pf, ok := repo.packFiles[rel] + repo.packFilesMu.RUnlock() + if ok { + return pf, nil + } + + pf, err := openPackFile(repo.repoPath(rel), rel) + if err != nil { + return nil, err + } + + repo.packFilesMu.Lock() + if existing, ok := repo.packFiles[rel]; ok { + repo.packFilesMu.Unlock() + _ = pf.Close() + return existing, nil + } + repo.packFiles[rel] = pf + repo.packFilesMu.Unlock() + return pf, nil +} diff --git a/pack_pack_write.go b/pack_pack_write.go new file mode 100644 index 00000000..a0baba13 --- /dev/null +++ b/pack_pack_write.go @@ -0,0 +1,262 @@ +package furgit + +import ( + "crypto/sha1" + "crypto/sha256" + "encoding/binary" + "errors" + "hash" + "io" + + "codeberg.org/lindenii/furgit/internal/zlib" +) + +// TODO +var errPackDeltaUnimplemented = errors.New("furgit: pack: delta writing not implemented") + +// packWriter writes a PACKv2 stream. +type packWriter struct { + w io.Writer + h hash.Hash + algo hashAlgorithm + objCount uint32 + wroteHeader bool + bytesWritten uint64 +} + +func newPackWriter(w io.Writer, algo hashAlgorithm, objCount uint32) (*packWriter, error) { + if w == nil { + return nil, ErrInvalidObject + } + h, err := algo.New() + if err != nil { + return nil, err + } + return &packWriter{ + w: w, + h: h, + algo: algo, + objCount: objCount, + }, nil +} + +func (pw *packWriter) writePacked(p []byte) error { + if len(p) == 0 { + return nil + } + n, err := pw.w.Write(p) + if n > 0 { + _, _ = pw.h.Write(p[:n]) + pw.bytesWritten += uint64(n) + } + if err != nil { + return err + } + if n != len(p) { + return io.ErrShortWrite + } + return nil +} + +func (pw *packWriter) WriteHeader() error { + if pw == nil || pw.wroteHeader { + return ErrInvalidObject + } + var hdr [12]byte + binary.BigEndian.PutUint32(hdr[0:4], packMagic) + binary.BigEndian.PutUint32(hdr[4:8], packVersion2) + binary.BigEndian.PutUint32(hdr[8:12], pw.objCount) + if err := pw.writePacked(hdr[:]); err != nil { + return err + } + pw.wroteHeader = true + return nil +} + +func (pw *packWriter) WriteObject(ty ObjectType, body []byte) error { + if pw == nil || !pw.wroteHeader { + return ErrInvalidObject + } + switch ty { + case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: + // remember that go switches don't fallthrough lol + default: + return ErrInvalidObject + } + if body == nil { + body = []byte{} + } + + hdr, err := packHeaderEncode(ty, len(body)) + if err != nil { + return err + } + if err := pw.writePacked(hdr); err != nil { + return err + } + + zw := zlib.NewWriter(&packHashWriter{pw: pw}) + if _, err := zw.Write(body); err != nil { + _ = zw.Close() + return err + } + return zw.Close() +} + +func (pw *packWriter) WriteOfsDelta(baseOffset uint64, baseSize, resultSize int, delta []byte) error { + _ = baseOffset + _ = baseSize + _ = resultSize + _ = delta + return errPackDeltaUnimplemented +} + +func (pw *packWriter) WriteRefDelta(base Hash, baseSize, resultSize int, delta []byte) error { + _ = base + _ = baseSize + _ = resultSize + _ = delta + return errPackDeltaUnimplemented +} + +func (pw *packWriter) Close() (Hash, error) { + if pw == nil || !pw.wroteHeader { + return Hash{}, ErrInvalidObject + } + sum := pw.h.Sum(nil) + if _, err := pw.w.Write(sum); err != nil { + return Hash{}, err + } + var out Hash + copy(out.data[:], sum) + out.algo = pw.algo + return out, nil +} + +type packHashWriter struct { + pw *packWriter +} + +func (w *packHashWriter) Write(p []byte) (int, error) { + if w == nil || w.pw == nil { + return 0, ErrInvalidObject + } + if err := w.pw.writePacked(p); err != nil { + return 0, err + } + return len(p), nil +} + +// packHeaderEncode encodes a pack object header (type + size). +func packHeaderEncode(ty ObjectType, size int) ([]byte, error) { + if size < 0 { + return nil, ErrInvalidObject + } + var out [16]byte + pos := 0 + + b := byte(size & 0x0f) + size >>= 4 + b |= byte(ty&0x07) << 4 + if size > 0 { + b |= 0x80 + } + out[pos] = b + pos++ + + for size > 0 { + b = byte(size & 0x7f) + size >>= 7 + if size > 0 { + b |= 0x80 + } + out[pos] = b + pos++ + } + + return out[:pos], nil +} + +// packVarintEncode encodes a 7-bit varint. +func packVarintEncode(size int) ([]byte, error) { + if size < 0 { + return nil, ErrInvalidObject + } + var out [16]byte + pos := 0 + for { + b := byte(size & 0x7f) + size >>= 7 + if size != 0 { + b |= 0x80 + } + out[pos] = b + pos++ + if size == 0 { + break + } + } + return out[:pos], nil +} + +// packOfsEncode encodes an ofs-delta distance. +func packOfsEncode(dist uint64) ([]byte, error) { + if dist == 0 { + return nil, ErrInvalidObject + } + var out [16]byte + pos := 0 + out[pos] = byte(dist & 0x7f) + pos++ + dist >>= 7 + for dist != 0 { + b := byte((dist - 1) & 0x7f) + out[pos] = b | 0x80 + pos++ + dist >>= 7 + } + for i, j := 0, pos-1; i < j; i, j = i+1, j-1 { + out[i], out[j] = out[j], out[i] + } + return out[:pos], nil +} + +// packWrite writes a pack stream for the provided object ids. +func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOptions) (Hash, error) { + if repo == nil { + return Hash{}, ErrInvalidObject + } + if opts.EnableDeltas || opts.EnableThinPack { + return Hash{}, errPackDeltaUnimplemented + } + if len(objects) > int(^uint32(0)) { + return Hash{}, ErrInvalidObject + } + + pw, err := newPackWriter(w, repo.hashAlgo, uint32(len(objects))) + if err != nil { + return Hash{}, err + } + if err := pw.WriteHeader(); err != nil { + return Hash{}, err + } + + for _, id := range objects { + ty, body, err := repo.ReadObjectTypeRaw(id) + if err != nil { + return Hash{}, err + } + if err := pw.WriteObject(ty, body); err != nil { + return Hash{}, err + } + } + + return pw.Close() +} + +type packWriteOptions struct { + EnableDeltas bool + EnableThinPack bool + MinDeltaSavings int + MaxDeltaDepth int +} diff --git a/pack_read_test.go b/pack_read_test.go new file mode 100644 index 00000000..184a4e5c --- /dev/null +++ b/pack_read_test.go @@ -0,0 +1,149 @@ +package furgit + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestPackfileRead(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + gitCmd(t, repoPath, "config", "gc.auto", "0") + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + err := os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("content1"), 0o644) + if err != nil { + t.Fatalf("failed to write file1.txt: %v", err) + } + err = os.WriteFile(filepath.Join(workDir, "file2.txt"), []byte("content2"), 0o644) + if err != nil { + t.Fatalf("failed to write file2.txt: %v", err) + } + + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit") + commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") + + gitCmd(t, repoPath, "repack", "-a", "-d") + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + hashObj, _ := repo.ParseHash(commitHash) + obj, err := repo.ReadObject(hashObj) + if err != nil { + t.Fatalf("ReadObject from pack failed: %v", err) + } + + commit, ok := obj.(*StoredCommit) + if !ok { + t.Fatalf("expected *StoredCommit, got %T", obj) + } + + treeObj, err := repo.ReadObject(commit.Tree) + if err != nil { + t.Fatalf("ReadObject tree failed: %v", err) + } + + tree, ok := treeObj.(*StoredTree) + if !ok { + t.Fatalf("expected *StoredTree, got %T", treeObj) + } + + if len(tree.Entries) != 2 { + t.Errorf("tree entries: got %d, want 2", len(tree.Entries)) + } + + gitLsTree := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) + for _, entry := range tree.Entries { + if !strings.Contains(gitLsTree, string(entry.Name)) { + t.Errorf("git ls-tree doesn't contain %s", entry.Name) + } + } +} + +func TestPackfileLarge(t *testing.T) { + if testing.Short() { + t.Skip("skipping large packfile test in short mode") + } + + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + gitCmd(t, repoPath, "config", "gc.auto", "0") + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + numFiles := 1000 + for i := 0; i < numFiles; i++ { + filename := filepath.Join(workDir, fmt.Sprintf("file%04d.txt", i)) + content := fmt.Sprintf("Content for file %d\n", i) + err := os.WriteFile(filename, []byte(content), 0o644) + if err != nil { + t.Fatalf("failed to write %s: %v", filename, err) + } + } + + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Large commit") + commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") + + gitCmd(t, repoPath, "repack", "-a", "-d") + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + hashObj, _ := repo.ParseHash(commitHash) + obj, _ := repo.ReadObject(hashObj) + commit := obj.(*StoredCommit) + + treeObj, _ := repo.ReadObject(commit.Tree) + tree := treeObj.(*StoredTree) + + if len(tree.Entries) != numFiles { + t.Errorf("tree entries: got %d, want %d", len(tree.Entries), numFiles) + } + + gitCount := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) + gitLines := strings.Count(gitCount, "\n") + 1 + if len(tree.Entries) != gitLines { + t.Errorf("furgit found %d entries, git found %d", len(tree.Entries), gitLines) + } + + for i := 0; i < 10; i++ { + idx := i * (numFiles / 10) + expectedName := fmt.Sprintf("file%04d.txt", idx) + entry := tree.Entry([]byte(expectedName)) + if entry == nil { + t.Errorf("expected to find entry %s", expectedName) + continue + } + + blobObj, _ := repo.ReadObject(entry.ID) + blob := blobObj.(*StoredBlob) + + expectedContent := fmt.Sprintf("Content for file %d\n", idx) + if string(blob.Data) != expectedContent { + t.Errorf("blob %s: got %q, want %q", expectedName, blob.Data, expectedContent) + } + + gitData := gitCatFile(t, repoPath, "blob", entry.ID.String()) + if !bytes.Equal(blob.Data, gitData) { + t.Errorf("blob %s: furgit data doesn't match git data", expectedName) + } + } +} diff --git a/pack_write_test.go b/pack_write_test.go new file mode 100644 index 00000000..da7ecfa7 --- /dev/null +++ b/pack_write_test.go @@ -0,0 +1,236 @@ +package furgit + +import ( + "bytes" + "crypto/rand" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestPackHeaderEncodeParseRoundtrip(t *testing.T) { + cases := []struct { + ty ObjectType + sizes []int + }{ + {ObjectTypeCommit, []int{0, 1, 15, 16, 127, 128, 1024, 1 << 20}}, + {ObjectTypeTree, []int{0, 3, 31, 32, 255, 256, 4096}}, + {ObjectTypeBlob, []int{0, 7, 63, 64, 511, 512, 99999}}, + {ObjectTypeTag, []int{0, 2, 14, 15, 16, 127, 128}}, + } + + for _, c := range cases { + for _, size := range c.sizes { + encoded, err := packHeaderEncode(c.ty, size) + if err != nil { + t.Fatalf("packHeaderEncode(%v,%d) error: %v", c.ty, size, err) + } + gotTy, gotSize, consumed, err := packHeaderParse(encoded) + if err != nil { + t.Fatalf("packHeaderParse error: %v", err) + } + if gotTy != c.ty || gotSize != size { + t.Fatalf("roundtrip mismatch: got (%v,%d), want (%v,%d)", gotTy, gotSize, c.ty, size) + } + if consumed != len(encoded) { + t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded)) + } + } + } +} + +func TestPackVarintEncodeRoundtrip(t *testing.T) { + values := []int{0, 1, 2, 7, 8, 127, 128, 129, 255, 1024, 1 << 20} + for _, v := range values { + encoded, err := packVarintEncode(v) + if err != nil { + t.Fatalf("packVarintEncode(%d) error: %v", v, err) + } + pos := 0 + got, err := packVarintRead(encoded, &pos) + if err != nil { + t.Fatalf("packVarintRead error: %v", err) + } + if got != v { + t.Fatalf("roundtrip mismatch: got %d, want %d", got, v) + } + if pos != len(encoded) { + t.Fatalf("pos=%d, encoded=%d", pos, len(encoded)) + } + } +} + +func TestPackOfsEncodeRoundtrip(t *testing.T) { + values := []uint64{1, 2, 7, 8, 9, 0x7f, 0x80, 0x81, 0x1000, 0x12345} + for _, v := range values { + encoded, err := packOfsEncode(v) + if err != nil { + t.Fatalf("packOfsEncode(%d) error: %v", v, err) + } + dist, consumed, err := packDeltaReadOfsDistance(encoded) + if err != nil { + t.Fatalf("packDeltaReadOfsDistance error: %v", err) + } + if dist != v { + t.Fatalf("roundtrip mismatch: got %d, want %d", dist, v) + } + if consumed != len(encoded) { + t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded)) + } + } +} + +func TestPackWriteNoDeltas(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + workDir, cleanupWork := setupWorkDir(t) + defer cleanupWork() + + const ( + fileCount = 1000 + fileSize = 1024 + ) + buf := make([]byte, fileSize) + for i := 0; i < fileCount; i++ { + if _, err := rand.Read(buf); err != nil { + t.Fatalf("rand.Read failed: %v", err) + } + name := filepath.Join(workDir, fmt.Sprintf("file%04d.bin", i)) + if err := os.WriteFile(name, buf, 0o644); err != nil { + t.Fatalf("failed to write %s: %v", name, err) + } + } + + gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") + gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit") + commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") + + commitBody := gitCatFile(t, repoPath, "commit", commitHash) + lines := bytes.Split(commitBody, []byte{'\n'}) + if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) { + t.Fatalf("commit missing tree header") + } + treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree ")))) + + lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeHash) + var blobHashes []string + for _, line := range strings.Split(lsTree, "\n") { + if line == "" { + continue + } + fields := strings.Fields(line) + if len(fields) < 3 { + t.Fatalf("unexpected ls-tree line: %q", line) + } + blobHashes = append(blobHashes, fields[2]) + } + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + var objects []Hash + commitID, _ := repo.ParseHash(commitHash) + objects = append(objects, commitID) + treeID, _ := repo.ParseHash(treeHash) + objects = append(objects, treeID) + for _, bh := range blobHashes { + id, _ := repo.ParseHash(bh) + objects = append(objects, id) + } + expectedOids := append([]string{commitHash, treeHash}, blobHashes...) + + packDir := filepath.Join(repoPath, "objects", "pack") + if err := os.MkdirAll(packDir, 0o755); err != nil { + t.Fatalf("failed to create pack dir: %v", err) + } + pf, err := os.CreateTemp(packDir, "furgit-test-*.pack") + if err != nil { + t.Fatalf("failed to create pack file: %v", err) + } + packPath := pf.Name() + idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx" + if _, err := repo.packWrite(pf, objects, packWriteOptions{}); err != nil { + _ = pf.Close() + t.Fatalf("packWrite failed: %v", err) + } + if err := pf.Close(); err != nil { + t.Fatalf("failed to close pack file: %v", err) + } + + defer func() { + _ = os.Remove(packPath) + _ = os.Remove(idxPath) + }() + + _ = gitCmd(t, repoPath, "index-pack", "-o", idxPath, packPath) + + verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath) + seen := make(map[string]struct{}) + for _, line := range strings.Split(verifyOut, "\n") { + if strings.TrimSpace(line) == "" { + continue + } + if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") { + continue + } + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + seen[parts[0]] = struct{}{} + } + for _, oid := range expectedOids { + if _, ok := seen[oid]; !ok { + t.Fatalf("verify-pack missing object %s", oid) + } + } + + for _, oid := range expectedOids { + if err := removeLooseObject(repoPath, oid); err != nil { + t.Fatalf("remove loose object %s: %v", oid, err) + } + } + for _, oid := range expectedOids { + _ = gitCmd(t, repoPath, "cat-file", "-p", oid) + } + + _ = gitCmd(t, repoPath, "fsck", "--full", "--strict") +} + +func TestPackWriteDeltasUnimplemented(t *testing.T) { + repoPath, cleanup := setupTestRepo(t) + defer cleanup() + + repo, err := OpenRepository(repoPath) + if err != nil { + t.Fatalf("OpenRepository failed: %v", err) + } + defer func() { _ = repo.Close() }() + + buf := new(bytes.Buffer) + _, err = repo.packWrite(buf, nil, packWriteOptions{EnableDeltas: true}) + if !errors.Is(err, errPackDeltaUnimplemented) { + t.Fatalf("expected errPackDeltaUnimplemented, got %v", err) + } +} + +func removeLooseObject(repoPath, oid string) error { + if len(oid) < 2 { + return ErrInvalidObject + } + path := filepath.Join(repoPath, "objects", oid[:2], oid[2:]) + if err := os.Remove(path); err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + return nil +} diff --git a/packed_read_idx.go b/packed_read_idx.go deleted file mode 100644 index 0dbb9bcf..00000000 --- a/packed_read_idx.go +++ /dev/null @@ -1,290 +0,0 @@ -package furgit - -import ( - "bytes" - "errors" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - "syscall" -) - -const ( - idxMagic = 0xff744f63 - idxVersion2 = 2 -) - -type packIndex struct { - repo *Repository - idxRel string - packPath string - - loadOnce sync.Once - loadErr error - - numObjects int - fanout []byte - names []byte - crcs []byte - offset32 []byte - offset64 []byte - data []byte - - closeOnce sync.Once -} - -func (pi *packIndex) Close() error { - if pi == nil { - return nil - } - var closeErr error - pi.closeOnce.Do(func() { - if len(pi.data) > 0 { - if err := syscall.Munmap(pi.data); closeErr == nil { - closeErr = err - } - pi.data = nil - pi.fanout = nil - pi.names = nil - pi.crcs = nil - pi.offset32 = nil - pi.offset64 = nil - pi.numObjects = 0 - } - }) - return closeErr -} - -func (pi *packIndex) ensureLoaded() error { - pi.loadOnce.Do(func() { - pi.loadErr = pi.load() - }) - return pi.loadErr -} - -func (pi *packIndex) load() error { - if pi.repo == nil { - return ErrInvalidObject - } - f, err := os.Open(pi.repo.repoPath(pi.idxRel)) - if err != nil { - return err - } - stat, err := f.Stat() - if err != nil { - _ = f.Close() - return err - } - if stat.Size() < 8+256*4 { - _ = f.Close() - return ErrInvalidObject - } - region, err := syscall.Mmap( - int(f.Fd()), - 0, - int(stat.Size()), - syscall.PROT_READ, - syscall.MAP_PRIVATE, - ) - if err != nil { - _ = f.Close() - return err - } - err = f.Close() - if err != nil { - _ = syscall.Munmap(region) - return err - } - err = pi.parse(region) - if err != nil { - _ = syscall.Munmap(region) - return err - } - pi.data = region - return nil -} - -func (repo *Repository) packIndexes() ([]*packIndex, error) { - repo.packIdxOnce.Do(func() { - repo.packIdx, repo.packIdxErr = repo.loadPackIndexes() - }) - return repo.packIdx, repo.packIdxErr -} - -func (repo *Repository) loadPackIndexes() ([]*packIndex, error) { - dir := filepath.Join(repo.rootPath, "objects", "pack") - entries, err := os.ReadDir(dir) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrNotFound - } - return nil, err - } - - idxs := make([]*packIndex, 0, len(entries)) - for _, entry := range entries { - if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { - continue - } - rel := filepath.Join("objects", "pack", entry.Name()) - packRel := strings.TrimSuffix(rel, ".idx") + ".pack" - idxs = append(idxs, &packIndex{ - repo: repo, - idxRel: rel, - packPath: packRel, - }) - } - if len(idxs) == 0 { - return nil, ErrNotFound - } - return idxs, nil -} - -func (pi *packIndex) parse(buf []byte) error { - if len(buf) < 8+256*4 { - return ErrInvalidObject - } - if readBE32(buf[0:4]) != idxMagic { - return ErrInvalidObject - } - if readBE32(buf[4:8]) != idxVersion2 { - return ErrInvalidObject - } - - const fanoutBytes = 256 * 4 - fanoutStart := 8 - fanoutEnd := fanoutStart + fanoutBytes - if fanoutEnd > len(buf) { - return ErrInvalidObject - } - pi.fanout = buf[fanoutStart:fanoutEnd] - nobj := int(readBE32(pi.fanout[len(pi.fanout)-4:])) - - namesStart := fanoutEnd - namesEnd := namesStart + nobj*pi.repo.hashAlgo.Size() - if namesEnd > len(buf) { - return ErrInvalidObject - } - - crcStart := namesEnd - crcEnd := crcStart + nobj*4 - if crcEnd > len(buf) { - return ErrInvalidObject - } - - off32Start := crcEnd - off32End := off32Start + nobj*4 - if off32End > len(buf) { - return ErrInvalidObject - } - - pi.offset32 = buf[off32Start:off32End] - - off64Start := off32End - trailerStart := len(buf) - 2*pi.repo.hashAlgo.Size() - if trailerStart < off64Start { - return ErrInvalidObject - } - if (trailerStart-off64Start)%8 != 0 { - return ErrInvalidObject - } - off64End := trailerStart - pi.offset64 = buf[off64Start:off64End] - - pi.numObjects = nobj - pi.names = buf[namesStart:namesEnd] - pi.crcs = buf[crcStart:crcEnd] - return nil -} - -func readBE32(b []byte) uint32 { - _ = b[3] - return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]) -} - -func readBE64(b []byte) uint64 { - _ = b[7] - return (uint64(b[0]) << 56) | (uint64(b[1]) << 48) | - (uint64(b[2]) << 40) | (uint64(b[3]) << 32) | - (uint64(b[4]) << 24) | (uint64(b[5]) << 16) | - (uint64(b[6]) << 8) | uint64(b[7]) -} - -func (pi *packIndex) fanoutEntry(i int) uint32 { - if len(pi.fanout) == 0 { - return 0 - } - entries := len(pi.fanout) / 4 - if i < 0 || i >= entries { - return 0 - } - start := i * 4 - return readBE32(pi.fanout[start : start+4]) -} - -func (pi *packIndex) offset(idx int) (uint64, error) { - start := idx * 4 - word := readBE32(pi.offset32[start : start+4]) - if word&0x80000000 == 0 { - return uint64(word), nil - } - pos := int(word & 0x7fffffff) - entries := len(pi.offset64) / 8 - if pos < 0 || pos >= entries { - return 0, errors.New("furgit: pack: corrupt 64-bit offset table") - } - base := pos * 8 - return readBE64(pi.offset64[base : base+8]), nil -} - -func (pi *packIndex) lookup(id Hash) (packlocation, error) { - err := pi.ensureLoaded() - if err != nil { - return packlocation{}, err - } - if id.algo != pi.repo.hashAlgo { - return packlocation{}, fmt.Errorf("furgit: hash algorithm mismatch: got %s, expected %s", id.algo.String(), pi.repo.hashAlgo.String()) - } - first := int(id.data[0]) - var lo int - if first > 0 { - lo = int(pi.fanoutEntry(first - 1)) - } - hi := int(pi.fanoutEntry(first)) - idx, found := bsearchHash(pi.names, pi.repo.hashAlgo.Size(), lo, hi, id) - if !found { - return packlocation{}, ErrNotFound - } - ofs, err := pi.offset(idx) - if err != nil { - return packlocation{}, err - } - return packlocation{ - PackPath: pi.packPath, - Offset: ofs, - }, nil -} - -func bsearchHash(names []byte, stride, lo, hi int, want Hash) (int, bool) { - for lo < hi { - mid := lo + (hi-lo)/2 - cmp := compareHash(names, stride, mid, want.data[:stride]) - if cmp == 0 { - return mid, true - } - if cmp > 0 { - hi = mid - } else { - lo = mid + 1 - } - } - return lo, false -} - -func compareHash(names []byte, stride, idx int, want []byte) int { - base := idx * stride - end := base + stride - return bytes.Compare(names[base:end], want) -} diff --git a/packed_read_pack.go b/packed_read_pack.go deleted file mode 100644 index 56098ee5..00000000 --- a/packed_read_pack.go +++ /dev/null @@ -1,578 +0,0 @@ -package furgit - -import ( - "encoding/binary" - "errors" - "io" - "os" - "sync" - "syscall" - - "codeberg.org/lindenii/furgit/internal/bufpool" - "codeberg.org/lindenii/furgit/internal/zlibx" -) - -const ( - packMagic = 0x5041434b - packVersion2 = 2 -) - -type packlocation struct { - PackPath string - Offset uint64 -} - -func (repo *Repository) packRead(id Hash) (ObjectType, bufpool.Buffer, error) { - loc, err := repo.packIndexFind(id) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return repo.packReadAt(loc, id) -} - -func (repo *Repository) packIndexFind(id Hash) (packlocation, error) { - idxs, err := repo.packIndexes() - if err != nil { - return packlocation{}, err - } - for _, idx := range idxs { - loc, err := idx.lookup(id) - if errors.Is(err, ErrNotFound) { - continue - } - if err != nil { - return packlocation{}, err - } - return loc, nil - } - return packlocation{}, ErrNotFound -} - -func (repo *Repository) packReadAt(loc packlocation, want Hash) (ObjectType, bufpool.Buffer, error) { - ty, body, err := repo.packBodyResolveAtLocation(loc) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return ty, body, nil -} - -func (repo *Repository) packBodyResolveAtLocation(loc packlocation) (ObjectType, bufpool.Buffer, error) { - pf, err := repo.packFile(loc.PackPath) - if err != nil { - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - return repo.packBodyResolveWithin(pf, loc.Offset) -} - -func (repo *Repository) packTypeSizeAtLocation(loc packlocation, seen map[packKey]struct{}) (ObjectType, int64, error) { - pf, err := repo.packFile(loc.PackPath) - if err != nil { - return ObjectTypeInvalid, 0, err - } - return repo.packTypeSizeWithin(pf, loc.Offset, seen) -} - -func packHeaderParse(data []byte) (ObjectType, int, int, error) { - if len(data) == 0 { - return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF - } - b := data[0] - ty := ObjectType((b >> 4) & 0x07) - size := int(b & 0x0f) - shift := 4 - consumed := 1 - for (b & 0x80) != 0 { - if consumed >= len(data) { - return ObjectTypeInvalid, 0, 0, io.ErrUnexpectedEOF - } - b = data[consumed] - size |= int(b&0x7f) << shift - shift += 7 - consumed++ - } - return ty, size, consumed, nil -} - -func packSectionInflate(pf *packFile, start uint64, sizeHint int) (bufpool.Buffer, error) { - if start > uint64(len(pf.data)) { - return bufpool.Buffer{}, ErrInvalidObject - } - body, _, err := zlibx.DecompressSized(pf.data[start:], sizeHint) - if err != nil { - return bufpool.Buffer{}, err - } - if sizeHint > 0 && len(body.Bytes()) != sizeHint { - body.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - return body, nil -} - -func packDeltaReadOfsDistance(data []byte) (uint64, int, error) { - if len(data) == 0 { - return 0, 0, io.ErrUnexpectedEOF - } - b := data[0] - dist := uint64(b & 0x7f) - consumed := 1 - for (b & 0x80) != 0 { - if consumed >= len(data) { - return 0, 0, io.ErrUnexpectedEOF - } - b = data[consumed] - consumed++ - dist = ((dist + 1) << 7) + uint64(b&0x7f) - } - return dist, consumed, nil -} - -type packKey struct { - path string - ofs uint64 -} - -func (repo *Repository) packTypeSizeWithin(pf *packFile, ofs uint64, seen map[packKey]struct{}) (ObjectType, int64, error) { - if pf == nil { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - if seen == nil { - seen = make(map[packKey]struct{}) - } - var visited []packKey - defer func() { - for _, key := range visited { - delete(seen, key) - } - }() - - var declaredSize int64 - firstHeader := true - - for { - key := packKey{path: pf.relPath, ofs: ofs} - if _, dup := seen[key]; dup { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - seen[key] = struct{}{} - visited = append(visited, key) - - if ofs >= uint64(len(pf.data)) { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) - if err != nil { - return ObjectTypeInvalid, 0, err - } - if firstHeader { - declaredSize = int64(size) - firstHeader = false - } - - if uint64(consumed) > uint64(len(pf.data))-ofs { - return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF - } - dataStart := ofs + uint64(consumed) - switch ty { - case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: - return ty, declaredSize, nil - case ObjectTypeRefDelta: - hashEnd := dataStart + uint64(repo.hashAlgo.Size()) - if hashEnd > uint64(len(pf.data)) { - return ObjectTypeInvalid, 0, io.ErrUnexpectedEOF - } - var base Hash - copy(base.data[:], pf.data[dataStart:hashEnd]) - base.algo = repo.hashAlgo - loc, err := repo.packIndexFind(base) - if err == nil { - pf, err = repo.packFile(loc.PackPath) - if err != nil { - return ObjectTypeInvalid, 0, err - } - ofs = loc.Offset - continue - } - if !errors.Is(err, ErrNotFound) { - return ObjectTypeInvalid, 0, err - } - baseTy, _, err := repo.looseTypeSize(base) - if err != nil { - return ObjectTypeInvalid, 0, err - } - return baseTy, declaredSize, nil - case ObjectTypeOfsDelta: - dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) - if err != nil { - return ObjectTypeInvalid, 0, err - } - if ofs <= dist { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - dataStart += uint64(distConsumed) - if dataStart > uint64(len(pf.data)) { - return ObjectTypeInvalid, 0, ErrInvalidObject - } - ofs -= dist - case ObjectTypeInvalid, ObjectTypeFuture: - return ObjectTypeInvalid, 0, ErrInvalidObject - default: - return ObjectTypeInvalid, 0, ErrInvalidObject - } - } -} - -func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjectType, bufpool.Buffer, error) { - if pf == nil { - return ObjectTypeInvalid, bufpool.Buffer{}, ErrInvalidObject - } - - type deltaFrame struct { - delta bufpool.Buffer - } - var frames []deltaFrame - defer func() { - for i := range frames { - frames[i].delta.Release() - } - }() - - var ( - body bufpool.Buffer - bodyReady bool - resultTy ObjectType - ) - fail := func(err error) (ObjectType, bufpool.Buffer, error) { - if bodyReady { - body.Release() - bodyReady = false - } - return ObjectTypeInvalid, bufpool.Buffer{}, err - } - - resolved := false - for !resolved { - if ofs >= uint64(len(pf.data)) { - return fail(ErrInvalidObject) - } - ty, size, consumed, err := packHeaderParse(pf.data[ofs:]) - if err != nil { - return fail(err) - } - if uint64(consumed) > uint64(len(pf.data))-ofs { - return fail(io.ErrUnexpectedEOF) - } - dataStart := ofs + uint64(consumed) - - switch ty { - case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: - body, err = packSectionInflate(pf, dataStart, size) - if err != nil { - return fail(err) - } - bodyReady = true - resultTy = ty - resolved = true - case ObjectTypeRefDelta: - hashEnd := dataStart + uint64(repo.hashAlgo.Size()) - if hashEnd > uint64(len(pf.data)) { - return fail(io.ErrUnexpectedEOF) - } - var base Hash - copy(base.data[:], pf.data[dataStart:hashEnd]) - base.algo = repo.hashAlgo - delta, err := packSectionInflate(pf, hashEnd, 0) - if err != nil { - return fail(err) - } - frames = append(frames, deltaFrame{delta: delta}) - - loc, err := repo.packIndexFind(base) - if err == nil { - pf, err = repo.packFile(loc.PackPath) - if err != nil { - return fail(err) - } - ofs = loc.Offset - continue - } - if !errors.Is(err, ErrNotFound) { - return fail(err) - } - resultTy, body, err = repo.looseReadTyped(base) - if err != nil { - return fail(err) - } - bodyReady = true - resolved = true - case ObjectTypeOfsDelta: - dist, distConsumed, err := packDeltaReadOfsDistance(pf.data[dataStart:]) - if err != nil { - return fail(err) - } - if ofs <= dist { - return fail(ErrInvalidObject) - } - deltaStart := dataStart + uint64(distConsumed) - if deltaStart > uint64(len(pf.data)) { - return fail(ErrInvalidObject) - } - delta, err := packSectionInflate(pf, deltaStart, 0) - if err != nil { - return fail(err) - } - frames = append(frames, deltaFrame{delta: delta}) - ofs -= dist - case ObjectTypeInvalid, ObjectTypeFuture: - return fail(ErrInvalidObject) - default: - return fail(ErrInvalidObject) - } - } - - for i := len(frames) - 1; i >= 0; i-- { - out, err := packDeltaApply(body, frames[i].delta) - body.Release() - bodyReady = false - frames[i].delta.Release() - if err != nil { - return fail(err) - } - body = out - bodyReady = true - } - frames = nil - return resultTy, body, nil -} - -func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) { - pos := 0 - baseBytes := base.Bytes() - deltaBytes := delta.Bytes() - srcSize, err := packVarintRead(deltaBytes, &pos) - if err != nil { - return bufpool.Buffer{}, err - } - dstSize, err := packVarintRead(deltaBytes, &pos) - if err != nil { - return bufpool.Buffer{}, err - } - if srcSize != len(baseBytes) { - return bufpool.Buffer{}, ErrInvalidObject - } - out := bufpool.Borrow(dstSize) - out.Resize(dstSize) - outBytes := out.Bytes() - outPos := 0 - - for pos < len(deltaBytes) { - op := deltaBytes[pos] - pos++ - switch { - case op&0x80 != 0: - off := 0 - n := 0 - if op&0x01 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) - pos++ - } - if op&0x02 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 8 - pos++ - } - if op&0x04 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 16 - pos++ - } - if op&0x08 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 24 - pos++ - } - if op&0x10 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) - pos++ - } - if op&0x20 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) << 8 - pos++ - } - if op&0x40 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) << 16 - pos++ - } - if n == 0 { - n = 0x10000 - } - if off+n > len(baseBytes) || outPos+n > len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - copy(outBytes[outPos:], baseBytes[off:off+n]) - outPos += n - case op != 0: - n := int(op) - if pos+n > len(deltaBytes) || outPos+n > len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - copy(outBytes[outPos:], deltaBytes[pos:pos+n]) - pos += n - outPos += n - default: - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - } - - if outPos != len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - return out, nil -} - -func packVarintRead(buf []byte, pos *int) (int, error) { - res := 0 - shift := 0 - for { - if *pos >= len(buf) { - return 0, ErrInvalidObject - } - b := buf[*pos] - *pos++ - res |= int(b&0x7f) << shift - if (b & 0x80) == 0 { - break - } - shift += 7 - } - return res, nil -} - -type packFile struct { - relPath string - size int64 - data []byte - closeMu sync.Once -} - -func openPackFile(absPath, rel string) (*packFile, error) { - f, err := os.Open(absPath) - if err != nil { - return nil, err - } - - stat, err := f.Stat() - if err != nil { - _ = f.Close() - return nil, err - } - if stat.Size() < 12 { - _ = f.Close() - return nil, ErrInvalidObject - } - - var headerArr [12]byte - header := headerArr[:] - _, err = io.ReadFull(f, header) - if err != nil { - _ = f.Close() - return nil, err - } - magic := binary.BigEndian.Uint32(header[:4]) - ver := binary.BigEndian.Uint32(header[4:8]) - if magic != packMagic || ver != packVersion2 { - _ = f.Close() - return nil, ErrInvalidObject - } - - region, err := syscall.Mmap( - int(f.Fd()), - 0, - int(stat.Size()), - syscall.PROT_READ, - syscall.MAP_PRIVATE, - ) - if err != nil { - _ = f.Close() - return nil, err - } - err = f.Close() - if err != nil { - _ = syscall.Munmap(region) - return nil, err - } - - return &packFile{ - relPath: rel, - size: stat.Size(), - data: region, - }, nil -} - -func (pf *packFile) Close() error { - if pf == nil { - return nil - } - var closeErr error - pf.closeMu.Do(func() { - if len(pf.data) > 0 { - if err := syscall.Munmap(pf.data); closeErr == nil { - closeErr = err - } - pf.data = nil - } - }) - return closeErr -} - -func (repo *Repository) packFile(rel string) (*packFile, error) { - repo.packFilesMu.RLock() - pf, ok := repo.packFiles[rel] - repo.packFilesMu.RUnlock() - if ok { - return pf, nil - } - - pf, err := openPackFile(repo.repoPath(rel), rel) - if err != nil { - return nil, err - } - - repo.packFilesMu.Lock() - if existing, ok := repo.packFiles[rel]; ok { - repo.packFilesMu.Unlock() - _ = pf.Close() - return existing, nil - } - repo.packFiles[rel] = pf - repo.packFilesMu.Unlock() - return pf, nil -} diff --git a/packed_read_test.go b/packed_read_test.go deleted file mode 100644 index 184a4e5c..00000000 --- a/packed_read_test.go +++ /dev/null @@ -1,149 +0,0 @@ -package furgit - -import ( - "bytes" - "fmt" - "os" - "path/filepath" - "strings" - "testing" -) - -func TestPackfileRead(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitCmd(t, repoPath, "config", "gc.auto", "0") - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - err := os.WriteFile(filepath.Join(workDir, "file1.txt"), []byte("content1"), 0o644) - if err != nil { - t.Fatalf("failed to write file1.txt: %v", err) - } - err = os.WriteFile(filepath.Join(workDir, "file2.txt"), []byte("content2"), 0o644) - if err != nil { - t.Fatalf("failed to write file2.txt: %v", err) - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "repack", "-a", "-d") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hashObj, _ := repo.ParseHash(commitHash) - obj, err := repo.ReadObject(hashObj) - if err != nil { - t.Fatalf("ReadObject from pack failed: %v", err) - } - - commit, ok := obj.(*StoredCommit) - if !ok { - t.Fatalf("expected *StoredCommit, got %T", obj) - } - - treeObj, err := repo.ReadObject(commit.Tree) - if err != nil { - t.Fatalf("ReadObject tree failed: %v", err) - } - - tree, ok := treeObj.(*StoredTree) - if !ok { - t.Fatalf("expected *StoredTree, got %T", treeObj) - } - - if len(tree.Entries) != 2 { - t.Errorf("tree entries: got %d, want 2", len(tree.Entries)) - } - - gitLsTree := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) - for _, entry := range tree.Entries { - if !strings.Contains(gitLsTree, string(entry.Name)) { - t.Errorf("git ls-tree doesn't contain %s", entry.Name) - } - } -} - -func TestPackfileLarge(t *testing.T) { - if testing.Short() { - t.Skip("skipping large packfile test in short mode") - } - - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - gitCmd(t, repoPath, "config", "gc.auto", "0") - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - numFiles := 1000 - for i := 0; i < numFiles; i++ { - filename := filepath.Join(workDir, fmt.Sprintf("file%04d.txt", i)) - content := fmt.Sprintf("Content for file %d\n", i) - err := os.WriteFile(filename, []byte(content), 0o644) - if err != nil { - t.Fatalf("failed to write %s: %v", filename, err) - } - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Large commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - gitCmd(t, repoPath, "repack", "-a", "-d") - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - hashObj, _ := repo.ParseHash(commitHash) - obj, _ := repo.ReadObject(hashObj) - commit := obj.(*StoredCommit) - - treeObj, _ := repo.ReadObject(commit.Tree) - tree := treeObj.(*StoredTree) - - if len(tree.Entries) != numFiles { - t.Errorf("tree entries: got %d, want %d", len(tree.Entries), numFiles) - } - - gitCount := gitCmd(t, repoPath, "ls-tree", commit.Tree.String()) - gitLines := strings.Count(gitCount, "\n") + 1 - if len(tree.Entries) != gitLines { - t.Errorf("furgit found %d entries, git found %d", len(tree.Entries), gitLines) - } - - for i := 0; i < 10; i++ { - idx := i * (numFiles / 10) - expectedName := fmt.Sprintf("file%04d.txt", idx) - entry := tree.Entry([]byte(expectedName)) - if entry == nil { - t.Errorf("expected to find entry %s", expectedName) - continue - } - - blobObj, _ := repo.ReadObject(entry.ID) - blob := blobObj.(*StoredBlob) - - expectedContent := fmt.Sprintf("Content for file %d\n", idx) - if string(blob.Data) != expectedContent { - t.Errorf("blob %s: got %q, want %q", expectedName, blob.Data, expectedContent) - } - - gitData := gitCatFile(t, repoPath, "blob", entry.ID.String()) - if !bytes.Equal(blob.Data, gitData) { - t.Errorf("blob %s: furgit data doesn't match git data", expectedName) - } - } -} diff --git a/packed_write_pack.go b/packed_write_pack.go deleted file mode 100644 index a0baba13..00000000 --- a/packed_write_pack.go +++ /dev/null @@ -1,262 +0,0 @@ -package furgit - -import ( - "crypto/sha1" - "crypto/sha256" - "encoding/binary" - "errors" - "hash" - "io" - - "codeberg.org/lindenii/furgit/internal/zlib" -) - -// TODO -var errPackDeltaUnimplemented = errors.New("furgit: pack: delta writing not implemented") - -// packWriter writes a PACKv2 stream. -type packWriter struct { - w io.Writer - h hash.Hash - algo hashAlgorithm - objCount uint32 - wroteHeader bool - bytesWritten uint64 -} - -func newPackWriter(w io.Writer, algo hashAlgorithm, objCount uint32) (*packWriter, error) { - if w == nil { - return nil, ErrInvalidObject - } - h, err := algo.New() - if err != nil { - return nil, err - } - return &packWriter{ - w: w, - h: h, - algo: algo, - objCount: objCount, - }, nil -} - -func (pw *packWriter) writePacked(p []byte) error { - if len(p) == 0 { - return nil - } - n, err := pw.w.Write(p) - if n > 0 { - _, _ = pw.h.Write(p[:n]) - pw.bytesWritten += uint64(n) - } - if err != nil { - return err - } - if n != len(p) { - return io.ErrShortWrite - } - return nil -} - -func (pw *packWriter) WriteHeader() error { - if pw == nil || pw.wroteHeader { - return ErrInvalidObject - } - var hdr [12]byte - binary.BigEndian.PutUint32(hdr[0:4], packMagic) - binary.BigEndian.PutUint32(hdr[4:8], packVersion2) - binary.BigEndian.PutUint32(hdr[8:12], pw.objCount) - if err := pw.writePacked(hdr[:]); err != nil { - return err - } - pw.wroteHeader = true - return nil -} - -func (pw *packWriter) WriteObject(ty ObjectType, body []byte) error { - if pw == nil || !pw.wroteHeader { - return ErrInvalidObject - } - switch ty { - case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag: - // remember that go switches don't fallthrough lol - default: - return ErrInvalidObject - } - if body == nil { - body = []byte{} - } - - hdr, err := packHeaderEncode(ty, len(body)) - if err != nil { - return err - } - if err := pw.writePacked(hdr); err != nil { - return err - } - - zw := zlib.NewWriter(&packHashWriter{pw: pw}) - if _, err := zw.Write(body); err != nil { - _ = zw.Close() - return err - } - return zw.Close() -} - -func (pw *packWriter) WriteOfsDelta(baseOffset uint64, baseSize, resultSize int, delta []byte) error { - _ = baseOffset - _ = baseSize - _ = resultSize - _ = delta - return errPackDeltaUnimplemented -} - -func (pw *packWriter) WriteRefDelta(base Hash, baseSize, resultSize int, delta []byte) error { - _ = base - _ = baseSize - _ = resultSize - _ = delta - return errPackDeltaUnimplemented -} - -func (pw *packWriter) Close() (Hash, error) { - if pw == nil || !pw.wroteHeader { - return Hash{}, ErrInvalidObject - } - sum := pw.h.Sum(nil) - if _, err := pw.w.Write(sum); err != nil { - return Hash{}, err - } - var out Hash - copy(out.data[:], sum) - out.algo = pw.algo - return out, nil -} - -type packHashWriter struct { - pw *packWriter -} - -func (w *packHashWriter) Write(p []byte) (int, error) { - if w == nil || w.pw == nil { - return 0, ErrInvalidObject - } - if err := w.pw.writePacked(p); err != nil { - return 0, err - } - return len(p), nil -} - -// packHeaderEncode encodes a pack object header (type + size). -func packHeaderEncode(ty ObjectType, size int) ([]byte, error) { - if size < 0 { - return nil, ErrInvalidObject - } - var out [16]byte - pos := 0 - - b := byte(size & 0x0f) - size >>= 4 - b |= byte(ty&0x07) << 4 - if size > 0 { - b |= 0x80 - } - out[pos] = b - pos++ - - for size > 0 { - b = byte(size & 0x7f) - size >>= 7 - if size > 0 { - b |= 0x80 - } - out[pos] = b - pos++ - } - - return out[:pos], nil -} - -// packVarintEncode encodes a 7-bit varint. -func packVarintEncode(size int) ([]byte, error) { - if size < 0 { - return nil, ErrInvalidObject - } - var out [16]byte - pos := 0 - for { - b := byte(size & 0x7f) - size >>= 7 - if size != 0 { - b |= 0x80 - } - out[pos] = b - pos++ - if size == 0 { - break - } - } - return out[:pos], nil -} - -// packOfsEncode encodes an ofs-delta distance. -func packOfsEncode(dist uint64) ([]byte, error) { - if dist == 0 { - return nil, ErrInvalidObject - } - var out [16]byte - pos := 0 - out[pos] = byte(dist & 0x7f) - pos++ - dist >>= 7 - for dist != 0 { - b := byte((dist - 1) & 0x7f) - out[pos] = b | 0x80 - pos++ - dist >>= 7 - } - for i, j := 0, pos-1; i < j; i, j = i+1, j-1 { - out[i], out[j] = out[j], out[i] - } - return out[:pos], nil -} - -// packWrite writes a pack stream for the provided object ids. -func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOptions) (Hash, error) { - if repo == nil { - return Hash{}, ErrInvalidObject - } - if opts.EnableDeltas || opts.EnableThinPack { - return Hash{}, errPackDeltaUnimplemented - } - if len(objects) > int(^uint32(0)) { - return Hash{}, ErrInvalidObject - } - - pw, err := newPackWriter(w, repo.hashAlgo, uint32(len(objects))) - if err != nil { - return Hash{}, err - } - if err := pw.WriteHeader(); err != nil { - return Hash{}, err - } - - for _, id := range objects { - ty, body, err := repo.ReadObjectTypeRaw(id) - if err != nil { - return Hash{}, err - } - if err := pw.WriteObject(ty, body); err != nil { - return Hash{}, err - } - } - - return pw.Close() -} - -type packWriteOptions struct { - EnableDeltas bool - EnableThinPack bool - MinDeltaSavings int - MaxDeltaDepth int -} diff --git a/packed_write_test.go b/packed_write_test.go deleted file mode 100644 index da7ecfa7..00000000 --- a/packed_write_test.go +++ /dev/null @@ -1,236 +0,0 @@ -package furgit - -import ( - "bytes" - "crypto/rand" - "errors" - "fmt" - "os" - "path/filepath" - "strings" - "testing" -) - -func TestPackHeaderEncodeParseRoundtrip(t *testing.T) { - cases := []struct { - ty ObjectType - sizes []int - }{ - {ObjectTypeCommit, []int{0, 1, 15, 16, 127, 128, 1024, 1 << 20}}, - {ObjectTypeTree, []int{0, 3, 31, 32, 255, 256, 4096}}, - {ObjectTypeBlob, []int{0, 7, 63, 64, 511, 512, 99999}}, - {ObjectTypeTag, []int{0, 2, 14, 15, 16, 127, 128}}, - } - - for _, c := range cases { - for _, size := range c.sizes { - encoded, err := packHeaderEncode(c.ty, size) - if err != nil { - t.Fatalf("packHeaderEncode(%v,%d) error: %v", c.ty, size, err) - } - gotTy, gotSize, consumed, err := packHeaderParse(encoded) - if err != nil { - t.Fatalf("packHeaderParse error: %v", err) - } - if gotTy != c.ty || gotSize != size { - t.Fatalf("roundtrip mismatch: got (%v,%d), want (%v,%d)", gotTy, gotSize, c.ty, size) - } - if consumed != len(encoded) { - t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded)) - } - } - } -} - -func TestPackVarintEncodeRoundtrip(t *testing.T) { - values := []int{0, 1, 2, 7, 8, 127, 128, 129, 255, 1024, 1 << 20} - for _, v := range values { - encoded, err := packVarintEncode(v) - if err != nil { - t.Fatalf("packVarintEncode(%d) error: %v", v, err) - } - pos := 0 - got, err := packVarintRead(encoded, &pos) - if err != nil { - t.Fatalf("packVarintRead error: %v", err) - } - if got != v { - t.Fatalf("roundtrip mismatch: got %d, want %d", got, v) - } - if pos != len(encoded) { - t.Fatalf("pos=%d, encoded=%d", pos, len(encoded)) - } - } -} - -func TestPackOfsEncodeRoundtrip(t *testing.T) { - values := []uint64{1, 2, 7, 8, 9, 0x7f, 0x80, 0x81, 0x1000, 0x12345} - for _, v := range values { - encoded, err := packOfsEncode(v) - if err != nil { - t.Fatalf("packOfsEncode(%d) error: %v", v, err) - } - dist, consumed, err := packDeltaReadOfsDistance(encoded) - if err != nil { - t.Fatalf("packDeltaReadOfsDistance error: %v", err) - } - if dist != v { - t.Fatalf("roundtrip mismatch: got %d, want %d", dist, v) - } - if consumed != len(encoded) { - t.Fatalf("consumed=%d, encoded=%d", consumed, len(encoded)) - } - } -} - -func TestPackWriteNoDeltas(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - const ( - fileCount = 1000 - fileSize = 1024 - ) - buf := make([]byte, fileSize) - for i := 0; i < fileCount; i++ { - if _, err := rand.Read(buf); err != nil { - t.Fatalf("rand.Read failed: %v", err) - } - name := filepath.Join(workDir, fmt.Sprintf("file%04d.bin", i)) - if err := os.WriteFile(name, buf, 0o644); err != nil { - t.Fatalf("failed to write %s: %v", name, err) - } - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Test commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - commitBody := gitCatFile(t, repoPath, "commit", commitHash) - lines := bytes.Split(commitBody, []byte{'\n'}) - if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) { - t.Fatalf("commit missing tree header") - } - treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree ")))) - - lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeHash) - var blobHashes []string - for _, line := range strings.Split(lsTree, "\n") { - if line == "" { - continue - } - fields := strings.Fields(line) - if len(fields) < 3 { - t.Fatalf("unexpected ls-tree line: %q", line) - } - blobHashes = append(blobHashes, fields[2]) - } - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - var objects []Hash - commitID, _ := repo.ParseHash(commitHash) - objects = append(objects, commitID) - treeID, _ := repo.ParseHash(treeHash) - objects = append(objects, treeID) - for _, bh := range blobHashes { - id, _ := repo.ParseHash(bh) - objects = append(objects, id) - } - expectedOids := append([]string{commitHash, treeHash}, blobHashes...) - - packDir := filepath.Join(repoPath, "objects", "pack") - if err := os.MkdirAll(packDir, 0o755); err != nil { - t.Fatalf("failed to create pack dir: %v", err) - } - pf, err := os.CreateTemp(packDir, "furgit-test-*.pack") - if err != nil { - t.Fatalf("failed to create pack file: %v", err) - } - packPath := pf.Name() - idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx" - if _, err := repo.packWrite(pf, objects, packWriteOptions{}); err != nil { - _ = pf.Close() - t.Fatalf("packWrite failed: %v", err) - } - if err := pf.Close(); err != nil { - t.Fatalf("failed to close pack file: %v", err) - } - - defer func() { - _ = os.Remove(packPath) - _ = os.Remove(idxPath) - }() - - _ = gitCmd(t, repoPath, "index-pack", "-o", idxPath, packPath) - - verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath) - seen := make(map[string]struct{}) - for _, line := range strings.Split(verifyOut, "\n") { - if strings.TrimSpace(line) == "" { - continue - } - if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") { - continue - } - parts := strings.Fields(line) - if len(parts) == 0 { - continue - } - seen[parts[0]] = struct{}{} - } - for _, oid := range expectedOids { - if _, ok := seen[oid]; !ok { - t.Fatalf("verify-pack missing object %s", oid) - } - } - - for _, oid := range expectedOids { - if err := removeLooseObject(repoPath, oid); err != nil { - t.Fatalf("remove loose object %s: %v", oid, err) - } - } - for _, oid := range expectedOids { - _ = gitCmd(t, repoPath, "cat-file", "-p", oid) - } - - _ = gitCmd(t, repoPath, "fsck", "--full", "--strict") -} - -func TestPackWriteDeltasUnimplemented(t *testing.T) { - repoPath, cleanup := setupTestRepo(t) - defer cleanup() - - repo, err := OpenRepository(repoPath) - if err != nil { - t.Fatalf("OpenRepository failed: %v", err) - } - defer func() { _ = repo.Close() }() - - buf := new(bytes.Buffer) - _, err = repo.packWrite(buf, nil, packWriteOptions{EnableDeltas: true}) - if !errors.Is(err, errPackDeltaUnimplemented) { - t.Fatalf("expected errPackDeltaUnimplemented, got %v", err) - } -} - -func removeLooseObject(repoPath, oid string) error { - if len(oid) < 2 { - return ErrInvalidObject - } - path := filepath.Join(repoPath, "objects", oid[:2], oid[2:]) - if err := os.Remove(path); err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - return nil -} -- cgit v1.3.1-10-gc9f91