diff options
| author | 2026-02-20 12:51:16 +0800 | |
|---|---|---|
| committer | 2026-02-20 12:57:31 +0800 | |
| commit | 9730560a82426408243cb15349f6955a4ba34f60 (patch) | |
| tree | 00cfcfc75db7b1de795b822f70246abd40199b86 | |
| parent | Revert "packed: Use random delta seed" (diff) | |
| signature | No signature | |
Revert "packed: Write packs with deltas"
This reverts commit 17c9aee0e781026353ead4ac749a3ae89c83d007.
| -rw-r--r-- | delta_read_apply.go | 159 | ||||
| -rw-r--r-- | delta_test.go | 55 | ||||
| -rw-r--r-- | delta_write_deltify.go | 306 | ||||
| -rw-r--r-- | delta_write_encode.go | 108 | ||||
| -rw-r--r-- | delta_write_select.go | 56 | ||||
| -rw-r--r-- | packed_read_pack.go | 150 | ||||
| -rw-r--r-- | packed_write_pack.go | 123 | ||||
| -rw-r--r-- | packed_write_test.go | 299 |
8 files changed, 170 insertions, 1086 deletions
diff --git a/delta_read_apply.go b/delta_read_apply.go deleted file mode 100644 index 48233292..00000000 --- a/delta_read_apply.go +++ /dev/null @@ -1,159 +0,0 @@ -package furgit - -import ( - "io" - - "codeberg.org/lindenii/furgit/internal/bufpool" -) - -func packDeltaReadOfsDistance(data []byte) (uint64, int, error) { - if len(data) == 0 { - return 0, 0, io.ErrUnexpectedEOF - } - b := data[0] - dist := uint64(b & 0x7f) - consumed := 1 - for (b & 0x80) != 0 { - if consumed >= len(data) { - return 0, 0, io.ErrUnexpectedEOF - } - b = data[consumed] - consumed++ - dist += 1 - dist <<= 7 - dist |= uint64(b & 0x7f) - } - return dist, consumed, nil -} - -func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) { - pos := 0 - baseBytes := base.Bytes() - deltaBytes := delta.Bytes() - srcSize, err := packVarintRead(deltaBytes, &pos) - if err != nil { - return bufpool.Buffer{}, err - } - dstSize, err := packVarintRead(deltaBytes, &pos) - if err != nil { - return bufpool.Buffer{}, err - } - if srcSize != len(baseBytes) { - return bufpool.Buffer{}, ErrInvalidObject - } - out := bufpool.Borrow(dstSize) - out.Resize(dstSize) - outBytes := out.Bytes() - outPos := 0 - - for pos < len(deltaBytes) { - op := deltaBytes[pos] - pos++ - switch { - case op&0x80 != 0: - off := 0 - n := 0 - if op&0x01 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) - pos++ - } - if op&0x02 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 8 - pos++ - } - if op&0x04 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 16 - pos++ - } - if op&0x08 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - off |= int(deltaBytes[pos]) << 24 - pos++ - } - if op&0x10 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) - pos++ - } - if op&0x20 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) << 8 - pos++ - } - if op&0x40 != 0 { - if pos >= len(deltaBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - n |= int(deltaBytes[pos]) << 16 - pos++ - } - if n == 0 { - n = 0x10000 - } - if off+n > len(baseBytes) || outPos+n > len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - copy(outBytes[outPos:], baseBytes[off:off+n]) - outPos += n - case op != 0: - n := int(op) - if pos+n > len(deltaBytes) || outPos+n > len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - copy(outBytes[outPos:], deltaBytes[pos:pos+n]) - pos += n - outPos += n - default: - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - } - - if outPos != len(outBytes) { - out.Release() - return bufpool.Buffer{}, ErrInvalidObject - } - return out, nil -} - -func packVarintRead(buf []byte, pos *int) (int, error) { - res := 0 - shift := 0 - for { - if *pos >= len(buf) { - return 0, ErrInvalidObject - } - b := buf[*pos] - *pos++ - res |= int(b&0x7f) << shift - if (b & 0x80) == 0 { - break - } - shift += 7 - } - return res, nil -} diff --git a/delta_test.go b/delta_test.go deleted file mode 100644 index f1ee5840..00000000 --- a/delta_test.go +++ /dev/null @@ -1,55 +0,0 @@ -package furgit - -import ( - "bytes" - "testing" - - "codeberg.org/lindenii/furgit/internal/bufpool" -) - -func TestDeltaEncodeApplyRoundtrip(t *testing.T) { - base := []byte("Hello, world!") - target := []byte("Hello, brave new world!") - - instr := []deltaInstruction{ - {copy: true, offset: 0, length: 7}, - {copy: false, data: []byte("brave new "), length: len("brave new ")}, - {copy: true, offset: 7, length: 6}, - } - delta, err := deltaEncode(len(base), len(target), instr) - if err != nil { - t.Fatalf("deltaEncode error: %v", err) - } - - out, err := packDeltaApply(bufpool.FromOwned(base), bufpool.FromOwned(delta)) - if err != nil { - t.Fatalf("packDeltaApply error: %v", err) - } - defer out.Release() - if !bytes.Equal(out.Bytes(), target) { - t.Fatalf("delta apply mismatch: got %q, want %q", out.Bytes(), target) - } -} - -func TestDeltifyRoundtrip(t *testing.T) { - base := []byte("The quick brown fox jumps over the lazy dog.") - target := []byte("The quick red fox jumps over the very lazy dog.") - - instr, err := deltify(base, target, 0) - if err != nil { - t.Fatalf("deltify error: %v", err) - } - delta, err := deltaEncode(len(base), len(target), instr) - if err != nil { - t.Fatalf("deltaEncode error: %v", err) - } - - out, err := packDeltaApply(bufpool.FromOwned(base), bufpool.FromOwned(delta)) - if err != nil { - t.Fatalf("packDeltaApply error: %v", err) - } - defer out.Release() - if !bytes.Equal(out.Bytes(), target) { - t.Fatalf("delta apply mismatch: got %q, want %q", out.Bytes(), target) - } -} diff --git a/delta_write_deltify.go b/delta_write_deltify.go deleted file mode 100644 index cd3d6aa8..00000000 --- a/delta_write_deltify.go +++ /dev/null @@ -1,306 +0,0 @@ -package furgit - -import ( - "bytes" - - "codeberg.org/lindenii/furgit/internal/murmurhash2" -) - -const ( - deltaMinChunk = 32 - deltaMaxChunk = 8192 - deltaSplitMask = (1 << 8) - 1 - deltaStretchMax = (1 << 24) - 1 -) - -var deltaGearTab = [...]uint32{ - 0x67ed26b7, 0x32da500c, 0x53d0fee0, 0xce387dc7, 0xcd406d90, 0x2e83a4d4, - 0x9fc9a38d, 0xb67259dc, 0xca6b1722, 0x6d2ea08c, 0x235cea2e, 0x3149bb5f, - 0x1beda787, 0x2a6b77d5, 0x2f22d9ac, 0x91fc0544, 0xe413acfa, 0x5a30ff7a, - 0xad6fdde0, 0x444fd0f5, 0x7ad87864, 0x58c5ff05, 0x8d2ec336, 0x2371f853, - 0x550f8572, 0x6aa448dd, 0x7c9ddbcf, 0x95221e14, 0x2a82ec33, 0xcbec5a78, - 0xc6795a0d, 0x243995b7, 0x1c909a2f, 0x4fded51c, 0x635d334b, 0x0e2b9999, - 0x2702968d, 0x856de1d5, 0x3325d60e, 0xeb6a7502, 0xec2a9844, 0x0905835a, - 0xa1820375, 0xa4be5cab, 0x96a6c058, 0x2c2ccd70, 0xba40fce3, 0xd794c46b, - 0x8fbae83e, 0xc3aa7899, 0x3d3ff8ed, 0xa0d42b5b, 0x571c0c97, 0xd2811516, - 0xf7e7b96c, 0x4fd2fcbd, 0xe2fdec94, 0x282cc436, 0x78e8e95c, 0x80a3b613, - 0xcfbee20c, 0xd4a32d1c, 0x2a12ff13, 0x6af82936, 0xe5630258, 0x8efa6a98, - 0x294fb2d1, 0xdeb57086, 0x5f0fddb3, 0xeceda7ce, 0x4c87305f, 0x3a6d3307, - 0xe22d2942, 0x9d060217, 0x1e42ed02, 0xb6f63b52, 0x4367f39f, 0x055cf262, - 0x03a461b2, 0x5ef9e382, 0x386bc03a, 0x2a1e79c7, 0xf1a0058b, 0xd4d2dea9, - 0x56baf37d, 0x5daff6cc, 0xf03a951d, 0xaef7de45, 0xa8f4581e, 0x3960b555, - 0xffbfff6d, 0xbe702a23, 0x8f5b6d6f, 0x061739fb, 0x98696f47, 0x3fd596d4, - 0x151eac6b, 0xa9fcc4f5, 0x69181a12, 0x3ac5a107, 0xb5198fe7, 0x96bcb1da, - 0x1b5ddf8e, 0xc757d650, 0x65865c3a, 0x8fc0a41a, 0x87435536, 0x99eda6f2, - 0x41874794, 0x29cff4e8, 0xb70efd9a, 0x3103f6e7, 0x84d2453b, 0x15a450bd, - 0x74f49af1, 0x60f664b1, 0xa1c86935, 0xfdafbce1, 0xe36353e3, 0x5d9ba739, - 0xbc0559ba, 0x708b0054, 0xd41d808c, 0xb2f31723, 0x9027c41f, 0xf136d165, - 0xb5374b12, 0x9420a6ac, 0x273958b6, 0xe6c2fad0, 0xebdc1f21, 0xfb33af8b, - 0xc71c25cd, 0xe9a2d8e5, 0xbeb38a50, 0xbceb7cc2, 0x4e4e73f0, 0xcd6c251d, - 0xde4c032c, 0x4b04ac30, 0x725b8b21, 0x4eb8c33b, 0x20d07b75, 0x0567aa63, - 0xb56b2bb7, 0xc1f5fd3a, 0xcafd35ca, 0x470dd4da, 0xfe4f94cd, 0xfb8de424, - 0xe8dbcf40, 0xfe50a37a, 0x62db5b5d, 0xf32f4ab6, 0x2c4a8a51, 0x18473dc0, - 0xfe0cbb6e, 0xfe399efd, 0xdf34ecc9, 0x6ccd5055, 0x46097073, 0x139135c2, - 0x721c76f6, 0x1c6a94b4, 0x6eee014d, 0x8a508e02, 0x3da538f5, 0x280d394f, - 0x5248a0c4, 0x3ce94c6c, 0x9a71ad3a, 0x8493dd05, 0xe43f0ab6, 0x18e4ed42, - 0x6c5c0e09, 0x42b06ec9, 0x8d330343, 0xa45b6f59, 0x2a573c0c, 0xd7fd3de6, - 0xeedeab68, 0x5c84dafc, 0xbbd1b1a8, 0xa3ce1ad1, 0x85b70bed, 0xb6add07f, - 0xa531309c, 0x8f8ab852, 0x564de332, 0xeac9ed0c, 0x73da402c, 0x3ec52761, - 0x43af2f4d, 0xd6ff45c8, 0x4c367462, 0xd553bd6a, 0x44724855, 0x3b2aa728, - 0x56e5eb65, 0xeaf16173, 0x33fa42ff, 0xd714bb5d, 0xfbd0a3b9, 0xaf517134, - 0x9416c8cd, 0x534cf94f, 0x548947c2, 0x34193569, 0x32f4389a, 0xfe7028bc, - 0xed73b1ed, 0x9db95770, 0x468e3922, 0x0440c3cd, 0x60059a62, 0x33504562, - 0x2b229fbd, 0x5174dca5, 0xf7028752, 0xd63c6aa8, 0x31276f38, 0x0646721c, - 0xb0191da8, 0xe00e6de0, 0x9eac1a6e, 0x9f7628a5, 0xed6c06ea, 0x0bb8af15, - 0xf119fb12, 0x38693c1c, 0x732bc0fe, 0x84953275, 0xb82ec888, 0x33a4f1b3, - 0x3099835e, 0x028a8782, 0x5fdd51d7, 0xc6c717b3, 0xb06caf71, 0x17c8c111, - 0x61bad754, 0x9fd03061, 0xe09df1af, 0x3bc9eb73, 0x85878413, 0x9889aaf2, - 0x3f5a9e46, 0x42c9f01f, 0x9984a4f4, 0xd5de43cc, 0xd294daed, 0xbecba2d2, - 0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba, -} - -type deltaBlock struct { - length int - offset int - hash uint32 -} - -type deltaTable struct { - blocks []deltaBlock - nblocks int - nalloc int - - offs []uint32 - len int - size int -} - -type deltaInstruction struct { - copy bool - offset int - length int - data []byte -} - -func deltify(base, target []byte, seed uint32) ([]deltaInstruction, error) { - if base == nil { - base = []byte{} - } - if target == nil { - target = []byte{} - } - dt, err := deltaTableInitMem(base, 0, len(base), seed) - if err != nil { - return nil, err - } - return deltifyMemMem(target, 0, len(target), seed, dt, base, 0, len(base)) -} - -func deltifyMemMem(target []byte, fileoffset, filesize int, seed uint32, dt *deltaTable, base []byte, baseOffset0, baseSize int) ([]deltaInstruction, error) { - const allocChunkSize = 64 - instr := make([]deltaInstruction, 0, allocChunkSize) - offset0 := fileoffset - - for fileoffset < filesize { - blocklen := deltaNextBlockLen(target, fileoffset, filesize) - if blocklen == 0 { - if fileoffset < filesize { - data := target[fileoffset:filesize] - instr = append(instr, deltaInstruction{ - copy: false, - offset: fileoffset - offset0, - length: len(data), - data: data, - }) - } - break - } - - block := dt.lookupBlock(target[fileoffset:fileoffset+blocklen], blocklen, seed, base, baseOffset0) - if block != nil { - if err := deltaStretchMemMem(base, baseOffset0, baseSize, block, target, fileoffset+blocklen, filesize, &blocklen); err != nil { - return nil, err - } - instr = append(instr, deltaInstruction{ - copy: true, - offset: block.offset, - length: blocklen, - }) - } else { - data := target[fileoffset : fileoffset+blocklen] - instr = append(instr, deltaInstruction{ - copy: false, - offset: fileoffset - offset0, - length: len(data), - data: data, - }) - } - fileoffset += blocklen - } - - return instr, nil -} - -func deltaTableInitMem(data []byte, fileoffset, filesize int, seed uint32) (*deltaTable, error) { - dt := &deltaTable{ - nalloc: 128, - size: 128, - } - dt.blocks = make([]deltaBlock, dt.nalloc) - dt.offs = make([]uint32, dt.size) - - offset0 := fileoffset - for fileoffset < filesize { - blocklen := deltaNextBlockLen(data, fileoffset, filesize) - if blocklen == 0 { - break - } - h := deltaHashBlock(data[fileoffset:fileoffset+blocklen], seed) - if err := dt.addBlock(data, offset0, blocklen, fileoffset-offset0, h); err != nil { - return nil, err - } - fileoffset += blocklen - } - return dt, nil -} - -func deltaHashBlock(p []byte, seed uint32) uint32 { - return murmurhash2.Sum32(p, seed) -} - -func deltaNextBlockLen(data []byte, fileoffset, filesize int) int { - if fileoffset >= filesize || filesize-fileoffset < deltaMinChunk { - return 0 - } - p := fileoffset + deltaMinChunk - end := fileoffset + deltaMaxChunk - if end > filesize { - end = filesize - } - var gh uint32 - for p < end { - gh = (gh << 1) + deltaGearTab[data[p]] - p++ - if (gh & deltaSplitMask) == 0 { - break - } - } - return p - fileoffset -} - -func (dt *deltaTable) addBlock(data []byte, offset0, length, offset int, h uint32) error { - if length == 0 { - return nil - } - if err := dt.resize(); err != nil { - return err - } - idx, found := dt.lookupSlot(data, offset0, offset, length, h) - if found { - return nil - } - dt.offs[idx] = uint32(dt.nblocks + 1) - dt.blocks[dt.nblocks] = deltaBlock{ - length: length, - offset: offset, - hash: h, - } - dt.nblocks++ - dt.len++ - return nil -} - -func (dt *deltaTable) lookupBlock(chunk []byte, length int, seed uint32, base []byte, baseOffset0 int) *deltaBlock { - if dt == nil || dt.size == 0 { - return nil - } - h := deltaHashBlock(chunk, seed) - for i := int(h % uint32(dt.size)); dt.offs[i] != 0; i = (i + 1) % dt.size { - block := &dt.blocks[dt.offs[i]-1] - if block.hash != h || block.length != length { - continue - } - baseOff := baseOffset0 + block.offset - if baseOff < 0 || baseOff+length > len(base) { - continue - } - if bytes.Equal(base[baseOff:baseOff+length], chunk) { - return block - } - } - return nil -} - -func (dt *deltaTable) lookupSlot(data []byte, offset0, offset, length int, h uint32) (int, bool) { - if dt == nil || dt.size == 0 { - return 0, false - } - for i := int(h % uint32(dt.size)); ; i = (i + 1) % dt.size { - if dt.offs[i] == 0 { - return i, false - } - block := &dt.blocks[dt.offs[i]-1] - if block.hash != h || block.length != length { - continue - } - left := offset0 + block.offset - right := offset0 + offset - if left < 0 || right < 0 || left+length > len(data) || right+length > len(data) { - continue - } - if bytes.Equal(data[left:left+length], data[right:right+length]) { - return i, true - } - } -} - -func (dt *deltaTable) resize() error { - if dt.nblocks == dt.nalloc { - newAlloc := dt.nalloc + 256 - newBlocks := make([]deltaBlock, newAlloc) - copy(newBlocks, dt.blocks[:dt.nblocks]) - dt.blocks = newBlocks - dt.nalloc = newAlloc - } - - if dt.offs == nil || (dt.len*100)/dt.size > 70 { - oldSize := dt.size - dt.size *= 2 - if dt.size == 0 { - dt.size = oldSize - return ErrInvalidObject - } - dt.offs = make([]uint32, dt.size) - for i := 0; i < dt.nblocks; i++ { - block := &dt.blocks[i] - idx := int(block.hash % uint32(dt.size)) - for dt.offs[idx] != 0 { - idx = (idx + 1) % dt.size - } - dt.offs[idx] = uint32(i + 1) - } - } - - return nil -} - -func deltaStretchMemMem(base []byte, baseOffset0, baseSize int, block *deltaBlock, data []byte, fileoffset, filesize int, blocklen *int) error { - baseOffset := baseOffset0 + block.offset + *blocklen - if baseOffset > baseSize { - return ErrInvalidObject - } - if fileoffset > filesize { - return ErrInvalidObject - } - maxlen := baseSize - baseOffset - if avail := filesize - fileoffset; avail < maxlen { - maxlen = avail - } - for i := 0; i < maxlen && *blocklen < deltaStretchMax-1; i++ { - if data[fileoffset+i] != base[baseOffset+i] { - break - } - *blocklen++ - } - return nil -} diff --git a/delta_write_encode.go b/delta_write_encode.go deleted file mode 100644 index 66c77052..00000000 --- a/delta_write_encode.go +++ /dev/null @@ -1,108 +0,0 @@ -package furgit - -const deltaCopyDefaultLen = 0x10000 - -func deltaEncode(baseSize, resultSize int, instr []deltaInstruction) ([]byte, error) { - if baseSize < 0 || resultSize < 0 { - return nil, ErrInvalidObject - } - out := make([]byte, 0, 64) - baseHdr, err := packVarintEncode(baseSize) - if err != nil { - return nil, err - } - out = append(out, baseHdr...) - resultHdr, err := packVarintEncode(resultSize) - if err != nil { - return nil, err - } - out = append(out, resultHdr...) - - for _, ins := range instr { - if ins.copy { - if ins.offset < 0 || ins.length <= 0 { - return nil, ErrInvalidObject - } - if ins.offset > 0xffffffff { - return nil, ErrInvalidObject - } - if ins.length > 0xffffff { - return nil, ErrInvalidObject - } - op := byte(0x80) - var tmp [7]byte - pos := 0 - - off := ins.offset - for i := 0; i < 4; i++ { - op |= 1 << i - tmp[pos] = byte(off & 0xff) - pos++ - off >>= 8 - if off == 0 { - break - } - } - if off != 0 { - return nil, ErrInvalidObject - } - - n := ins.length - if n != deltaCopyDefaultLen { - for i := 0; i < 3 && n > 0; i++ { - op |= 1 << (i + 4) - tmp[pos] = byte(n & 0xff) - pos++ - n >>= 8 - } - if n > 0 { - return nil, ErrInvalidObject - } - } - - out = append(out, op) - out = append(out, tmp[:pos]...) - continue - } - - data := ins.data - if ins.length > 0 { - if len(data) < ins.length { - return nil, ErrInvalidObject - } - data = data[:ins.length] - } - if len(data) == 0 { - return nil, ErrInvalidObject - } - for len(data) > 0 { - n := len(data) - if n > 127 { - n = 127 - } - out = append(out, byte(n)) - out = append(out, data[:n]...) - data = data[n:] - } - } - - return out, nil -} - -func deltaTry(base, target []byte, seed uint32, minSavings int) ([]byte, bool) { - if minSavings < 0 { - minSavings = 0 - } - instr, err := deltify(base, target, seed) - if err != nil { - return nil, false - } - delta, err := deltaEncode(len(base), len(target), instr) - if err != nil { - return nil, false - } - if len(delta)+minSavings >= len(target) { - return nil, false - } - return delta, true -} diff --git a/delta_write_select.go b/delta_write_select.go deleted file mode 100644 index ad8e0d0e..00000000 --- a/delta_write_select.go +++ /dev/null @@ -1,56 +0,0 @@ -package furgit - -const defaultDeltaWindow = 64 - -type objectToPack struct { - id Hash - ty ObjectType - body []byte - offset uint64 - deltaDepth int -} - -type deltaContext struct { - window int - candidates []*objectToPack -} - -func (ctx *deltaContext) addCandidate(obj *objectToPack) { - if ctx.window <= 0 { - return - } - ctx.candidates = append(ctx.candidates, obj) - if len(ctx.candidates) > ctx.window { - over := len(ctx.candidates) - ctx.window - ctx.candidates = ctx.candidates[over:] - } -} - -func pickDeltaBase(ctx *deltaContext, obj *objectToPack, seed uint32, minSavings, maxDepth int) (*objectToPack, []byte) { - if ctx == nil || len(ctx.candidates) == 0 { - return nil, nil - } - if maxDepth <= 0 { - maxDepth = 1 - } - var bestBase *objectToPack - var bestDelta []byte - for i := len(ctx.candidates) - 1; i >= 0; i-- { - base := ctx.candidates[i] - if base.ty != ObjectTypeBlob { - continue - } - if base.deltaDepth >= maxDepth { - continue - } - delta, ok := deltaTry(base.body, obj.body, seed, minSavings) - if !ok { - continue - } - if bestDelta == nil || len(delta) < len(bestDelta) { - bestDelta = delta - bestBase = base - } - } - return bestBase, bestDelta -} diff --git a/packed_read_pack.go b/packed_read_pack.go index 31279c8f..56098ee5 100644 --- a/packed_read_pack.go +++ b/packed_read_pack.go @@ -108,6 +108,24 @@ func packSectionInflate(pf *packFile, start uint64, sizeHint int) (bufpool.Buffe return body, nil } +func packDeltaReadOfsDistance(data []byte) (uint64, int, error) { + if len(data) == 0 { + return 0, 0, io.ErrUnexpectedEOF + } + b := data[0] + dist := uint64(b & 0x7f) + consumed := 1 + for (b & 0x80) != 0 { + if consumed >= len(data) { + return 0, 0, io.ErrUnexpectedEOF + } + b = data[consumed] + consumed++ + dist = ((dist + 1) << 7) + uint64(b&0x7f) + } + return dist, consumed, nil +} + type packKey struct { path string ofs uint64 @@ -326,6 +344,138 @@ func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjectT return resultTy, body, nil } +func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) { + pos := 0 + baseBytes := base.Bytes() + deltaBytes := delta.Bytes() + srcSize, err := packVarintRead(deltaBytes, &pos) + if err != nil { + return bufpool.Buffer{}, err + } + dstSize, err := packVarintRead(deltaBytes, &pos) + if err != nil { + return bufpool.Buffer{}, err + } + if srcSize != len(baseBytes) { + return bufpool.Buffer{}, ErrInvalidObject + } + out := bufpool.Borrow(dstSize) + out.Resize(dstSize) + outBytes := out.Bytes() + outPos := 0 + + for pos < len(deltaBytes) { + op := deltaBytes[pos] + pos++ + switch { + case op&0x80 != 0: + off := 0 + n := 0 + if op&0x01 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) + pos++ + } + if op&0x02 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 8 + pos++ + } + if op&0x04 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 16 + pos++ + } + if op&0x08 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 24 + pos++ + } + if op&0x10 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) + pos++ + } + if op&0x20 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) << 8 + pos++ + } + if op&0x40 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) << 16 + pos++ + } + if n == 0 { + n = 0x10000 + } + if off+n > len(baseBytes) || outPos+n > len(outBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + copy(outBytes[outPos:], baseBytes[off:off+n]) + outPos += n + case op != 0: + n := int(op) + if pos+n > len(deltaBytes) || outPos+n > len(outBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + copy(outBytes[outPos:], deltaBytes[pos:pos+n]) + pos += n + outPos += n + default: + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + } + + if outPos != len(outBytes) { + out.Release() + return bufpool.Buffer{}, ErrInvalidObject + } + return out, nil +} + +func packVarintRead(buf []byte, pos *int) (int, error) { + res := 0 + shift := 0 + for { + if *pos >= len(buf) { + return 0, ErrInvalidObject + } + b := buf[*pos] + *pos++ + res |= int(b&0x7f) << shift + if (b & 0x80) == 0 { + break + } + shift += 7 + } + return res, nil +} + type packFile struct { relPath string size int64 diff --git a/packed_write_pack.go b/packed_write_pack.go index f31f42d1..a0baba13 100644 --- a/packed_write_pack.go +++ b/packed_write_pack.go @@ -104,84 +104,19 @@ func (pw *packWriter) WriteObject(ty ObjectType, body []byte) error { } func (pw *packWriter) WriteOfsDelta(baseOffset uint64, baseSize, resultSize int, delta []byte) error { - if pw == nil || !pw.wroteHeader { - return ErrInvalidObject - } - if baseSize < 0 || resultSize < 0 { - return ErrInvalidObject - } - if delta == nil { - delta = []byte{} - } - deltaSize := len(delta) - if deltaSize <= 0 { - return ErrInvalidObject - } - currentOffset := pw.bytesWritten - if baseOffset >= currentOffset { - return ErrInvalidObject - } - dist := currentOffset - baseOffset - - hdr, err := packHeaderEncode(ObjectTypeOfsDelta, deltaSize) - if err != nil { - return err - } - if err := pw.writePacked(hdr); err != nil { - return err - } - ofs, err := packOfsEncode(dist) - if err != nil { - return err - } - if err := pw.writePacked(ofs); err != nil { - return err - } - - zw := zlib.NewWriter(&packHashWriter{pw: pw}) - if _, err := zw.Write(delta); err != nil { - _ = zw.Close() - return err - } - return zw.Close() + _ = baseOffset + _ = baseSize + _ = resultSize + _ = delta + return errPackDeltaUnimplemented } func (pw *packWriter) WriteRefDelta(base Hash, baseSize, resultSize int, delta []byte) error { - if pw == nil || !pw.wroteHeader { - return ErrInvalidObject - } - if baseSize < 0 || resultSize < 0 { - return ErrInvalidObject - } - if delta == nil { - delta = []byte{} - } - deltaSize := len(delta) - if deltaSize <= 0 { - return ErrInvalidObject - } - baseBytes := base.Bytes() - if len(baseBytes) == 0 { - return ErrInvalidObject - } - - hdr, err := packHeaderEncode(ObjectTypeRefDelta, deltaSize) - if err != nil { - return err - } - if err := pw.writePacked(hdr); err != nil { - return err - } - if err := pw.writePacked(baseBytes); err != nil { - return err - } - - zw := zlib.NewWriter(&packHashWriter{pw: pw}) - if _, err := zw.Write(delta); err != nil { - _ = zw.Close() - return err - } - return zw.Close() + _ = base + _ = baseSize + _ = resultSize + _ = delta + return errPackDeltaUnimplemented } func (pw *packWriter) Close() (Hash, error) { @@ -291,7 +226,7 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt if repo == nil { return Hash{}, ErrInvalidObject } - if opts.EnableThinPack { + if opts.EnableDeltas || opts.EnableThinPack { return Hash{}, errPackDeltaUnimplemented } if len(objects) > int(^uint32(0)) { @@ -306,45 +241,13 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt return Hash{}, err } - var dctx deltaContext - if opts.EnableDeltas { - dctx.window = defaultDeltaWindow - } - deltaSeed := uint32(0) - for _, id := range objects { ty, body, err := repo.ReadObjectTypeRaw(id) if err != nil { return Hash{}, err } - obj := &objectToPack{ - id: id, - ty: ty, - body: body, - } - startOffset := pw.bytesWritten - wroteDelta := false - - if opts.EnableDeltas && ty == ObjectTypeBlob { - base, delta := pickDeltaBase(&dctx, obj, deltaSeed, opts.MinDeltaSavings, opts.MaxDeltaDepth) - if base != nil && delta != nil { - if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil { - return Hash{}, err - } - wroteDelta = true - obj.deltaDepth = base.deltaDepth + 1 - } - } - if !wroteDelta { - if err := pw.WriteObject(ty, body); err != nil { - return Hash{}, err - } - obj.deltaDepth = 0 - } - obj.offset = startOffset - - if opts.EnableDeltas && ty == ObjectTypeBlob { - dctx.addCandidate(obj) + if err := pw.WriteObject(ty, body); err != nil { + return Hash{}, err } } diff --git a/packed_write_test.go b/packed_write_test.go index 63fd9266..da7ecfa7 100644 --- a/packed_write_test.go +++ b/packed_write_test.go @@ -3,15 +3,12 @@ package furgit import ( "bytes" "crypto/rand" + "errors" "fmt" "os" - "os/exec" "path/filepath" "strings" "testing" - - "codeberg.org/lindenii/furgit/internal/bufpool" - "codeberg.org/lindenii/furgit/internal/zlibx" ) func TestPackHeaderEncodeParseRoundtrip(t *testing.T) { @@ -172,20 +169,7 @@ func TestPackWriteNoDeltas(t *testing.T) { _ = os.Remove(idxPath) }() - if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil { - t.Fatalf("pack stream invalid: %v", err) - } - - cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath) - cmd.Dir = repoPath - cmd.Env = append(os.Environ(), - "GIT_CONFIG_GLOBAL=/dev/null", - "GIT_CONFIG_SYSTEM=/dev/null", - ) - output, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("git index-pack failed: %v\n%s", err, output) - } + _ = gitCmd(t, repoPath, "index-pack", "-o", idxPath, packPath) verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath) seen := make(map[string]struct{}) @@ -220,290 +204,21 @@ func TestPackWriteNoDeltas(t *testing.T) { _ = gitCmd(t, repoPath, "fsck", "--full", "--strict") } -func TestPackWriteDeltas(t *testing.T) { +func TestPackWriteDeltasUnimplemented(t *testing.T) { repoPath, cleanup := setupTestRepo(t) defer cleanup() - workDir, cleanupWork := setupWorkDir(t) - defer cleanupWork() - - const ( - fileCount = 200 - fileSize = 2048 - ) - base := bytes.Repeat([]byte("delta-base-"), fileSize/10) - for i := 0; i < fileCount; i++ { - buf := make([]byte, len(base)) - copy(buf, base) - buf[i%len(buf)] ^= byte(i) - name := filepath.Join(workDir, fmt.Sprintf("delta%04d.txt", i)) - if err := os.WriteFile(name, buf, 0o644); err != nil { - t.Fatalf("failed to write %s: %v", name, err) - } - } - - gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".") - gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Delta commit") - commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD") - - commitBody := gitCatFile(t, repoPath, "commit", commitHash) - lines := bytes.Split(commitBody, []byte{'\n'}) - if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) { - t.Fatalf("commit missing tree header") - } - treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree ")))) - - lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeHash) - var blobHashes []string - for _, line := range strings.Split(lsTree, "\n") { - if line == "" { - continue - } - fields := strings.Fields(line) - if len(fields) < 3 { - t.Fatalf("unexpected ls-tree line: %q", line) - } - blobHashes = append(blobHashes, fields[2]) - } - repo, err := OpenRepository(repoPath) if err != nil { t.Fatalf("OpenRepository failed: %v", err) } defer func() { _ = repo.Close() }() - var objects []Hash - commitID, _ := repo.ParseHash(commitHash) - objects = append(objects, commitID) - treeID, _ := repo.ParseHash(treeHash) - objects = append(objects, treeID) - for _, bh := range blobHashes { - id, _ := repo.ParseHash(bh) - objects = append(objects, id) - } - expectedOids := append([]string{commitHash, treeHash}, blobHashes...) - - packDir := filepath.Join(repoPath, "objects", "pack") - if err := os.MkdirAll(packDir, 0o755); err != nil { - t.Fatalf("failed to create pack dir: %v", err) - } - pf, err := os.CreateTemp(packDir, "furgit-delta-test-*.pack") - if err != nil { - t.Fatalf("failed to create pack file: %v", err) - } - packPath := pf.Name() - idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx" - if _, err := repo.packWrite(pf, objects, packWriteOptions{ - EnableDeltas: true, - MinDeltaSavings: 1, - }); err != nil { - _ = pf.Close() - t.Fatalf("packWrite failed: %v", err) - } - if err := pf.Close(); err != nil { - t.Fatalf("failed to close pack file: %v", err) - } - - defer func() { - _ = os.Remove(packPath) - _ = os.Remove(idxPath) - }() - - if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil { - t.Fatalf("pack stream invalid: %v", err) - } - - cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath) - cmd.Dir = repoPath - cmd.Env = append(os.Environ(), - "GIT_CONFIG_GLOBAL=/dev/null", - "GIT_CONFIG_SYSTEM=/dev/null", - ) - output, err := cmd.CombinedOutput() - if err != nil { - t.Fatalf("git index-pack failed: %v\n%s", err, output) - } - - verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath) - seen := make(map[string]struct{}) - for _, line := range strings.Split(verifyOut, "\n") { - if strings.TrimSpace(line) == "" { - continue - } - if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") { - continue - } - parts := strings.Fields(line) - if len(parts) == 0 { - continue - } - seen[parts[0]] = struct{}{} - } - for _, oid := range expectedOids { - if _, ok := seen[oid]; !ok { - t.Fatalf("verify-pack missing object %s", oid) - } - } - - for _, oid := range expectedOids { - if err := removeLooseObject(repoPath, oid); err != nil { - t.Fatalf("remove loose object %s: %v", oid, err) - } - } - for _, oid := range expectedOids { - _ = gitCmd(t, repoPath, "cat-file", "-p", oid) - } - - _ = gitCmd(t, repoPath, "fsck", "--full", "--strict") -} - -func checkPackStream(path string, algo hashAlgorithm, objectCount int) error { - data, err := os.ReadFile(path) - if err != nil { - return err - } - if len(data) < 12 { - return ErrInvalidObject - } - if readBE32(data[0:4]) != packMagic || readBE32(data[4:8]) != packVersion2 { - return ErrInvalidObject - } - pos := 12 - hashSize := algo.Size() - type objEntry struct { - offset uint64 - ty ObjectType - body []byte - } - byOffset := make(map[uint64]objEntry, objectCount) - byHash := make(map[string]objEntry, objectCount) - for i := 0; i < objectCount; i++ { - objOffset := uint64(pos) - ty, size, consumed, err := packHeaderParse(data[pos:]) - if err != nil { - return fmt.Errorf("obj %d header at %d: %v", i, pos, err) - } - pos += consumed - baseOffset := uint64(0) - baseTy := ObjectTypeInvalid - var baseBody []byte - var baseHash Hash - switch ty { - case ObjectTypeOfsDelta: - dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:]) - if err != nil { - return fmt.Errorf("obj %d ofs at %d: %v", i, pos, err) - } - pos += distConsumed - if dist == 0 || dist > objOffset { - return fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos) - } - baseOffset = objOffset - dist - base, ok := byOffset[baseOffset] - if !ok { - return fmt.Errorf("obj %d ofs at %d: missing base", i, pos) - } - baseTy = base.ty - baseBody = base.body - case ObjectTypeRefDelta: - if pos+hashSize > len(data) { - return ErrInvalidObject - } - copy(baseHash.data[:], data[pos:pos+hashSize]) - baseHash.algo = algo - baseEntry, ok := byHash[baseHash.String()] - if !ok { - return fmt.Errorf("obj %d ref base not found", i) - } - baseTy = baseEntry.ty - baseBody = baseEntry.body - pos += hashSize - default: - } - - payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size) - if err != nil { - return fmt.Errorf("obj %d zlib at %d: %v", i, pos, err) - } - payload := append([]byte(nil), payloadBuf.Bytes()...) - payloadBuf.Release() - pos += zconsumed - switch ty { - case ObjectTypeOfsDelta: - if baseBody == nil { - return fmt.Errorf("obj %d missing base body", i) - } - baseSize, resultSize, err := readDeltaSizes(payload) - if err != nil { - return fmt.Errorf("obj %d delta sizes: %v", i, err) - } - if baseSize != len(baseBody) { - return fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody)) - } - out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload)) - if err != nil { - return fmt.Errorf("obj %d delta apply: %v", i, err) - } - body := append([]byte(nil), out.Bytes()...) - out.Release() - if resultSize != len(body) { - return fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize) - } - byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body} - case ObjectTypeRefDelta: - if baseBody == nil { - return fmt.Errorf("obj %d missing ref base body", i) - } - baseSize, resultSize, err := readDeltaSizes(payload) - if err != nil { - return fmt.Errorf("obj %d ref delta sizes: %v", i, err) - } - if baseSize != len(baseBody) { - return fmt.Errorf("obj %d ref delta base size mismatch: got %d want %d", i, baseSize, len(baseBody)) - } - out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload)) - if err != nil { - return fmt.Errorf("obj %d ref delta apply: %v", i, err) - } - body := append([]byte(nil), out.Bytes()...) - out.Release() - if resultSize != len(body) { - return fmt.Errorf("obj %d ref delta result size mismatch: got %d want %d", i, len(body), resultSize) - } - byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body} - default: - if size >= 0 && len(payload) != size { - return fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size) - } - body := append([]byte(nil), payload...) - byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body} - } - - entry := byOffset[objOffset] - if entry.body != nil && entry.ty != ObjectTypeInvalid { - hdr, err := headerForType(entry.ty, entry.body) - if err != nil { - return err - } - raw := append(hdr, entry.body...) - hash := algo.Sum(raw) - byHash[hash.String()] = entry - } - } - return nil -} - -func readDeltaSizes(delta []byte) (int, int, error) { - pos := 0 - baseSize, err := packVarintRead(delta, &pos) - if err != nil { - return 0, 0, err - } - resultSize, err := packVarintRead(delta, &pos) - if err != nil { - return 0, 0, err + buf := new(bytes.Buffer) + _, err = repo.packWrite(buf, nil, packWriteOptions{EnableDeltas: true}) + if !errors.Is(err, errPackDeltaUnimplemented) { + t.Fatalf("expected errPackDeltaUnimplemented, got %v", err) } - return baseSize, resultSize, nil } func removeLooseObject(repoPath, oid string) error { |
