aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-02-20 12:51:16 +0800
committerGravatar Runxi Yu2026-02-20 12:57:31 +0800
commit9730560a82426408243cb15349f6955a4ba34f60 (patch)
tree00cfcfc75db7b1de795b822f70246abd40199b86
parentRevert "packed: Use random delta seed" (diff)
signatureNo signature
Revert "packed: Write packs with deltas"
This reverts commit 17c9aee0e781026353ead4ac749a3ae89c83d007.
-rw-r--r--delta_read_apply.go159
-rw-r--r--delta_test.go55
-rw-r--r--delta_write_deltify.go306
-rw-r--r--delta_write_encode.go108
-rw-r--r--delta_write_select.go56
-rw-r--r--packed_read_pack.go150
-rw-r--r--packed_write_pack.go123
-rw-r--r--packed_write_test.go299
8 files changed, 170 insertions, 1086 deletions
diff --git a/delta_read_apply.go b/delta_read_apply.go
deleted file mode 100644
index 48233292..00000000
--- a/delta_read_apply.go
+++ /dev/null
@@ -1,159 +0,0 @@
-package furgit
-
-import (
- "io"
-
- "codeberg.org/lindenii/furgit/internal/bufpool"
-)
-
-func packDeltaReadOfsDistance(data []byte) (uint64, int, error) {
- if len(data) == 0 {
- return 0, 0, io.ErrUnexpectedEOF
- }
- b := data[0]
- dist := uint64(b & 0x7f)
- consumed := 1
- for (b & 0x80) != 0 {
- if consumed >= len(data) {
- return 0, 0, io.ErrUnexpectedEOF
- }
- b = data[consumed]
- consumed++
- dist += 1
- dist <<= 7
- dist |= uint64(b & 0x7f)
- }
- return dist, consumed, nil
-}
-
-func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) {
- pos := 0
- baseBytes := base.Bytes()
- deltaBytes := delta.Bytes()
- srcSize, err := packVarintRead(deltaBytes, &pos)
- if err != nil {
- return bufpool.Buffer{}, err
- }
- dstSize, err := packVarintRead(deltaBytes, &pos)
- if err != nil {
- return bufpool.Buffer{}, err
- }
- if srcSize != len(baseBytes) {
- return bufpool.Buffer{}, ErrInvalidObject
- }
- out := bufpool.Borrow(dstSize)
- out.Resize(dstSize)
- outBytes := out.Bytes()
- outPos := 0
-
- for pos < len(deltaBytes) {
- op := deltaBytes[pos]
- pos++
- switch {
- case op&0x80 != 0:
- off := 0
- n := 0
- if op&0x01 != 0 {
- if pos >= len(deltaBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- off |= int(deltaBytes[pos])
- pos++
- }
- if op&0x02 != 0 {
- if pos >= len(deltaBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- off |= int(deltaBytes[pos]) << 8
- pos++
- }
- if op&0x04 != 0 {
- if pos >= len(deltaBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- off |= int(deltaBytes[pos]) << 16
- pos++
- }
- if op&0x08 != 0 {
- if pos >= len(deltaBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- off |= int(deltaBytes[pos]) << 24
- pos++
- }
- if op&0x10 != 0 {
- if pos >= len(deltaBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- n |= int(deltaBytes[pos])
- pos++
- }
- if op&0x20 != 0 {
- if pos >= len(deltaBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- n |= int(deltaBytes[pos]) << 8
- pos++
- }
- if op&0x40 != 0 {
- if pos >= len(deltaBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- n |= int(deltaBytes[pos]) << 16
- pos++
- }
- if n == 0 {
- n = 0x10000
- }
- if off+n > len(baseBytes) || outPos+n > len(outBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- copy(outBytes[outPos:], baseBytes[off:off+n])
- outPos += n
- case op != 0:
- n := int(op)
- if pos+n > len(deltaBytes) || outPos+n > len(outBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- copy(outBytes[outPos:], deltaBytes[pos:pos+n])
- pos += n
- outPos += n
- default:
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- }
-
- if outPos != len(outBytes) {
- out.Release()
- return bufpool.Buffer{}, ErrInvalidObject
- }
- return out, nil
-}
-
-func packVarintRead(buf []byte, pos *int) (int, error) {
- res := 0
- shift := 0
- for {
- if *pos >= len(buf) {
- return 0, ErrInvalidObject
- }
- b := buf[*pos]
- *pos++
- res |= int(b&0x7f) << shift
- if (b & 0x80) == 0 {
- break
- }
- shift += 7
- }
- return res, nil
-}
diff --git a/delta_test.go b/delta_test.go
deleted file mode 100644
index f1ee5840..00000000
--- a/delta_test.go
+++ /dev/null
@@ -1,55 +0,0 @@
-package furgit
-
-import (
- "bytes"
- "testing"
-
- "codeberg.org/lindenii/furgit/internal/bufpool"
-)
-
-func TestDeltaEncodeApplyRoundtrip(t *testing.T) {
- base := []byte("Hello, world!")
- target := []byte("Hello, brave new world!")
-
- instr := []deltaInstruction{
- {copy: true, offset: 0, length: 7},
- {copy: false, data: []byte("brave new "), length: len("brave new ")},
- {copy: true, offset: 7, length: 6},
- }
- delta, err := deltaEncode(len(base), len(target), instr)
- if err != nil {
- t.Fatalf("deltaEncode error: %v", err)
- }
-
- out, err := packDeltaApply(bufpool.FromOwned(base), bufpool.FromOwned(delta))
- if err != nil {
- t.Fatalf("packDeltaApply error: %v", err)
- }
- defer out.Release()
- if !bytes.Equal(out.Bytes(), target) {
- t.Fatalf("delta apply mismatch: got %q, want %q", out.Bytes(), target)
- }
-}
-
-func TestDeltifyRoundtrip(t *testing.T) {
- base := []byte("The quick brown fox jumps over the lazy dog.")
- target := []byte("The quick red fox jumps over the very lazy dog.")
-
- instr, err := deltify(base, target, 0)
- if err != nil {
- t.Fatalf("deltify error: %v", err)
- }
- delta, err := deltaEncode(len(base), len(target), instr)
- if err != nil {
- t.Fatalf("deltaEncode error: %v", err)
- }
-
- out, err := packDeltaApply(bufpool.FromOwned(base), bufpool.FromOwned(delta))
- if err != nil {
- t.Fatalf("packDeltaApply error: %v", err)
- }
- defer out.Release()
- if !bytes.Equal(out.Bytes(), target) {
- t.Fatalf("delta apply mismatch: got %q, want %q", out.Bytes(), target)
- }
-}
diff --git a/delta_write_deltify.go b/delta_write_deltify.go
deleted file mode 100644
index cd3d6aa8..00000000
--- a/delta_write_deltify.go
+++ /dev/null
@@ -1,306 +0,0 @@
-package furgit
-
-import (
- "bytes"
-
- "codeberg.org/lindenii/furgit/internal/murmurhash2"
-)
-
-const (
- deltaMinChunk = 32
- deltaMaxChunk = 8192
- deltaSplitMask = (1 << 8) - 1
- deltaStretchMax = (1 << 24) - 1
-)
-
-var deltaGearTab = [...]uint32{
- 0x67ed26b7, 0x32da500c, 0x53d0fee0, 0xce387dc7, 0xcd406d90, 0x2e83a4d4,
- 0x9fc9a38d, 0xb67259dc, 0xca6b1722, 0x6d2ea08c, 0x235cea2e, 0x3149bb5f,
- 0x1beda787, 0x2a6b77d5, 0x2f22d9ac, 0x91fc0544, 0xe413acfa, 0x5a30ff7a,
- 0xad6fdde0, 0x444fd0f5, 0x7ad87864, 0x58c5ff05, 0x8d2ec336, 0x2371f853,
- 0x550f8572, 0x6aa448dd, 0x7c9ddbcf, 0x95221e14, 0x2a82ec33, 0xcbec5a78,
- 0xc6795a0d, 0x243995b7, 0x1c909a2f, 0x4fded51c, 0x635d334b, 0x0e2b9999,
- 0x2702968d, 0x856de1d5, 0x3325d60e, 0xeb6a7502, 0xec2a9844, 0x0905835a,
- 0xa1820375, 0xa4be5cab, 0x96a6c058, 0x2c2ccd70, 0xba40fce3, 0xd794c46b,
- 0x8fbae83e, 0xc3aa7899, 0x3d3ff8ed, 0xa0d42b5b, 0x571c0c97, 0xd2811516,
- 0xf7e7b96c, 0x4fd2fcbd, 0xe2fdec94, 0x282cc436, 0x78e8e95c, 0x80a3b613,
- 0xcfbee20c, 0xd4a32d1c, 0x2a12ff13, 0x6af82936, 0xe5630258, 0x8efa6a98,
- 0x294fb2d1, 0xdeb57086, 0x5f0fddb3, 0xeceda7ce, 0x4c87305f, 0x3a6d3307,
- 0xe22d2942, 0x9d060217, 0x1e42ed02, 0xb6f63b52, 0x4367f39f, 0x055cf262,
- 0x03a461b2, 0x5ef9e382, 0x386bc03a, 0x2a1e79c7, 0xf1a0058b, 0xd4d2dea9,
- 0x56baf37d, 0x5daff6cc, 0xf03a951d, 0xaef7de45, 0xa8f4581e, 0x3960b555,
- 0xffbfff6d, 0xbe702a23, 0x8f5b6d6f, 0x061739fb, 0x98696f47, 0x3fd596d4,
- 0x151eac6b, 0xa9fcc4f5, 0x69181a12, 0x3ac5a107, 0xb5198fe7, 0x96bcb1da,
- 0x1b5ddf8e, 0xc757d650, 0x65865c3a, 0x8fc0a41a, 0x87435536, 0x99eda6f2,
- 0x41874794, 0x29cff4e8, 0xb70efd9a, 0x3103f6e7, 0x84d2453b, 0x15a450bd,
- 0x74f49af1, 0x60f664b1, 0xa1c86935, 0xfdafbce1, 0xe36353e3, 0x5d9ba739,
- 0xbc0559ba, 0x708b0054, 0xd41d808c, 0xb2f31723, 0x9027c41f, 0xf136d165,
- 0xb5374b12, 0x9420a6ac, 0x273958b6, 0xe6c2fad0, 0xebdc1f21, 0xfb33af8b,
- 0xc71c25cd, 0xe9a2d8e5, 0xbeb38a50, 0xbceb7cc2, 0x4e4e73f0, 0xcd6c251d,
- 0xde4c032c, 0x4b04ac30, 0x725b8b21, 0x4eb8c33b, 0x20d07b75, 0x0567aa63,
- 0xb56b2bb7, 0xc1f5fd3a, 0xcafd35ca, 0x470dd4da, 0xfe4f94cd, 0xfb8de424,
- 0xe8dbcf40, 0xfe50a37a, 0x62db5b5d, 0xf32f4ab6, 0x2c4a8a51, 0x18473dc0,
- 0xfe0cbb6e, 0xfe399efd, 0xdf34ecc9, 0x6ccd5055, 0x46097073, 0x139135c2,
- 0x721c76f6, 0x1c6a94b4, 0x6eee014d, 0x8a508e02, 0x3da538f5, 0x280d394f,
- 0x5248a0c4, 0x3ce94c6c, 0x9a71ad3a, 0x8493dd05, 0xe43f0ab6, 0x18e4ed42,
- 0x6c5c0e09, 0x42b06ec9, 0x8d330343, 0xa45b6f59, 0x2a573c0c, 0xd7fd3de6,
- 0xeedeab68, 0x5c84dafc, 0xbbd1b1a8, 0xa3ce1ad1, 0x85b70bed, 0xb6add07f,
- 0xa531309c, 0x8f8ab852, 0x564de332, 0xeac9ed0c, 0x73da402c, 0x3ec52761,
- 0x43af2f4d, 0xd6ff45c8, 0x4c367462, 0xd553bd6a, 0x44724855, 0x3b2aa728,
- 0x56e5eb65, 0xeaf16173, 0x33fa42ff, 0xd714bb5d, 0xfbd0a3b9, 0xaf517134,
- 0x9416c8cd, 0x534cf94f, 0x548947c2, 0x34193569, 0x32f4389a, 0xfe7028bc,
- 0xed73b1ed, 0x9db95770, 0x468e3922, 0x0440c3cd, 0x60059a62, 0x33504562,
- 0x2b229fbd, 0x5174dca5, 0xf7028752, 0xd63c6aa8, 0x31276f38, 0x0646721c,
- 0xb0191da8, 0xe00e6de0, 0x9eac1a6e, 0x9f7628a5, 0xed6c06ea, 0x0bb8af15,
- 0xf119fb12, 0x38693c1c, 0x732bc0fe, 0x84953275, 0xb82ec888, 0x33a4f1b3,
- 0x3099835e, 0x028a8782, 0x5fdd51d7, 0xc6c717b3, 0xb06caf71, 0x17c8c111,
- 0x61bad754, 0x9fd03061, 0xe09df1af, 0x3bc9eb73, 0x85878413, 0x9889aaf2,
- 0x3f5a9e46, 0x42c9f01f, 0x9984a4f4, 0xd5de43cc, 0xd294daed, 0xbecba2d2,
- 0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba,
-}
-
-type deltaBlock struct {
- length int
- offset int
- hash uint32
-}
-
-type deltaTable struct {
- blocks []deltaBlock
- nblocks int
- nalloc int
-
- offs []uint32
- len int
- size int
-}
-
-type deltaInstruction struct {
- copy bool
- offset int
- length int
- data []byte
-}
-
-func deltify(base, target []byte, seed uint32) ([]deltaInstruction, error) {
- if base == nil {
- base = []byte{}
- }
- if target == nil {
- target = []byte{}
- }
- dt, err := deltaTableInitMem(base, 0, len(base), seed)
- if err != nil {
- return nil, err
- }
- return deltifyMemMem(target, 0, len(target), seed, dt, base, 0, len(base))
-}
-
-func deltifyMemMem(target []byte, fileoffset, filesize int, seed uint32, dt *deltaTable, base []byte, baseOffset0, baseSize int) ([]deltaInstruction, error) {
- const allocChunkSize = 64
- instr := make([]deltaInstruction, 0, allocChunkSize)
- offset0 := fileoffset
-
- for fileoffset < filesize {
- blocklen := deltaNextBlockLen(target, fileoffset, filesize)
- if blocklen == 0 {
- if fileoffset < filesize {
- data := target[fileoffset:filesize]
- instr = append(instr, deltaInstruction{
- copy: false,
- offset: fileoffset - offset0,
- length: len(data),
- data: data,
- })
- }
- break
- }
-
- block := dt.lookupBlock(target[fileoffset:fileoffset+blocklen], blocklen, seed, base, baseOffset0)
- if block != nil {
- if err := deltaStretchMemMem(base, baseOffset0, baseSize, block, target, fileoffset+blocklen, filesize, &blocklen); err != nil {
- return nil, err
- }
- instr = append(instr, deltaInstruction{
- copy: true,
- offset: block.offset,
- length: blocklen,
- })
- } else {
- data := target[fileoffset : fileoffset+blocklen]
- instr = append(instr, deltaInstruction{
- copy: false,
- offset: fileoffset - offset0,
- length: len(data),
- data: data,
- })
- }
- fileoffset += blocklen
- }
-
- return instr, nil
-}
-
-func deltaTableInitMem(data []byte, fileoffset, filesize int, seed uint32) (*deltaTable, error) {
- dt := &deltaTable{
- nalloc: 128,
- size: 128,
- }
- dt.blocks = make([]deltaBlock, dt.nalloc)
- dt.offs = make([]uint32, dt.size)
-
- offset0 := fileoffset
- for fileoffset < filesize {
- blocklen := deltaNextBlockLen(data, fileoffset, filesize)
- if blocklen == 0 {
- break
- }
- h := deltaHashBlock(data[fileoffset:fileoffset+blocklen], seed)
- if err := dt.addBlock(data, offset0, blocklen, fileoffset-offset0, h); err != nil {
- return nil, err
- }
- fileoffset += blocklen
- }
- return dt, nil
-}
-
-func deltaHashBlock(p []byte, seed uint32) uint32 {
- return murmurhash2.Sum32(p, seed)
-}
-
-func deltaNextBlockLen(data []byte, fileoffset, filesize int) int {
- if fileoffset >= filesize || filesize-fileoffset < deltaMinChunk {
- return 0
- }
- p := fileoffset + deltaMinChunk
- end := fileoffset + deltaMaxChunk
- if end > filesize {
- end = filesize
- }
- var gh uint32
- for p < end {
- gh = (gh << 1) + deltaGearTab[data[p]]
- p++
- if (gh & deltaSplitMask) == 0 {
- break
- }
- }
- return p - fileoffset
-}
-
-func (dt *deltaTable) addBlock(data []byte, offset0, length, offset int, h uint32) error {
- if length == 0 {
- return nil
- }
- if err := dt.resize(); err != nil {
- return err
- }
- idx, found := dt.lookupSlot(data, offset0, offset, length, h)
- if found {
- return nil
- }
- dt.offs[idx] = uint32(dt.nblocks + 1)
- dt.blocks[dt.nblocks] = deltaBlock{
- length: length,
- offset: offset,
- hash: h,
- }
- dt.nblocks++
- dt.len++
- return nil
-}
-
-func (dt *deltaTable) lookupBlock(chunk []byte, length int, seed uint32, base []byte, baseOffset0 int) *deltaBlock {
- if dt == nil || dt.size == 0 {
- return nil
- }
- h := deltaHashBlock(chunk, seed)
- for i := int(h % uint32(dt.size)); dt.offs[i] != 0; i = (i + 1) % dt.size {
- block := &dt.blocks[dt.offs[i]-1]
- if block.hash != h || block.length != length {
- continue
- }
- baseOff := baseOffset0 + block.offset
- if baseOff < 0 || baseOff+length > len(base) {
- continue
- }
- if bytes.Equal(base[baseOff:baseOff+length], chunk) {
- return block
- }
- }
- return nil
-}
-
-func (dt *deltaTable) lookupSlot(data []byte, offset0, offset, length int, h uint32) (int, bool) {
- if dt == nil || dt.size == 0 {
- return 0, false
- }
- for i := int(h % uint32(dt.size)); ; i = (i + 1) % dt.size {
- if dt.offs[i] == 0 {
- return i, false
- }
- block := &dt.blocks[dt.offs[i]-1]
- if block.hash != h || block.length != length {
- continue
- }
- left := offset0 + block.offset
- right := offset0 + offset
- if left < 0 || right < 0 || left+length > len(data) || right+length > len(data) {
- continue
- }
- if bytes.Equal(data[left:left+length], data[right:right+length]) {
- return i, true
- }
- }
-}
-
-func (dt *deltaTable) resize() error {
- if dt.nblocks == dt.nalloc {
- newAlloc := dt.nalloc + 256
- newBlocks := make([]deltaBlock, newAlloc)
- copy(newBlocks, dt.blocks[:dt.nblocks])
- dt.blocks = newBlocks
- dt.nalloc = newAlloc
- }
-
- if dt.offs == nil || (dt.len*100)/dt.size > 70 {
- oldSize := dt.size
- dt.size *= 2
- if dt.size == 0 {
- dt.size = oldSize
- return ErrInvalidObject
- }
- dt.offs = make([]uint32, dt.size)
- for i := 0; i < dt.nblocks; i++ {
- block := &dt.blocks[i]
- idx := int(block.hash % uint32(dt.size))
- for dt.offs[idx] != 0 {
- idx = (idx + 1) % dt.size
- }
- dt.offs[idx] = uint32(i + 1)
- }
- }
-
- return nil
-}
-
-func deltaStretchMemMem(base []byte, baseOffset0, baseSize int, block *deltaBlock, data []byte, fileoffset, filesize int, blocklen *int) error {
- baseOffset := baseOffset0 + block.offset + *blocklen
- if baseOffset > baseSize {
- return ErrInvalidObject
- }
- if fileoffset > filesize {
- return ErrInvalidObject
- }
- maxlen := baseSize - baseOffset
- if avail := filesize - fileoffset; avail < maxlen {
- maxlen = avail
- }
- for i := 0; i < maxlen && *blocklen < deltaStretchMax-1; i++ {
- if data[fileoffset+i] != base[baseOffset+i] {
- break
- }
- *blocklen++
- }
- return nil
-}
diff --git a/delta_write_encode.go b/delta_write_encode.go
deleted file mode 100644
index 66c77052..00000000
--- a/delta_write_encode.go
+++ /dev/null
@@ -1,108 +0,0 @@
-package furgit
-
-const deltaCopyDefaultLen = 0x10000
-
-func deltaEncode(baseSize, resultSize int, instr []deltaInstruction) ([]byte, error) {
- if baseSize < 0 || resultSize < 0 {
- return nil, ErrInvalidObject
- }
- out := make([]byte, 0, 64)
- baseHdr, err := packVarintEncode(baseSize)
- if err != nil {
- return nil, err
- }
- out = append(out, baseHdr...)
- resultHdr, err := packVarintEncode(resultSize)
- if err != nil {
- return nil, err
- }
- out = append(out, resultHdr...)
-
- for _, ins := range instr {
- if ins.copy {
- if ins.offset < 0 || ins.length <= 0 {
- return nil, ErrInvalidObject
- }
- if ins.offset > 0xffffffff {
- return nil, ErrInvalidObject
- }
- if ins.length > 0xffffff {
- return nil, ErrInvalidObject
- }
- op := byte(0x80)
- var tmp [7]byte
- pos := 0
-
- off := ins.offset
- for i := 0; i < 4; i++ {
- op |= 1 << i
- tmp[pos] = byte(off & 0xff)
- pos++
- off >>= 8
- if off == 0 {
- break
- }
- }
- if off != 0 {
- return nil, ErrInvalidObject
- }
-
- n := ins.length
- if n != deltaCopyDefaultLen {
- for i := 0; i < 3 && n > 0; i++ {
- op |= 1 << (i + 4)
- tmp[pos] = byte(n & 0xff)
- pos++
- n >>= 8
- }
- if n > 0 {
- return nil, ErrInvalidObject
- }
- }
-
- out = append(out, op)
- out = append(out, tmp[:pos]...)
- continue
- }
-
- data := ins.data
- if ins.length > 0 {
- if len(data) < ins.length {
- return nil, ErrInvalidObject
- }
- data = data[:ins.length]
- }
- if len(data) == 0 {
- return nil, ErrInvalidObject
- }
- for len(data) > 0 {
- n := len(data)
- if n > 127 {
- n = 127
- }
- out = append(out, byte(n))
- out = append(out, data[:n]...)
- data = data[n:]
- }
- }
-
- return out, nil
-}
-
-func deltaTry(base, target []byte, seed uint32, minSavings int) ([]byte, bool) {
- if minSavings < 0 {
- minSavings = 0
- }
- instr, err := deltify(base, target, seed)
- if err != nil {
- return nil, false
- }
- delta, err := deltaEncode(len(base), len(target), instr)
- if err != nil {
- return nil, false
- }
- if len(delta)+minSavings >= len(target) {
- return nil, false
- }
- return delta, true
-}
diff --git a/delta_write_select.go b/delta_write_select.go
deleted file mode 100644
index ad8e0d0e..00000000
--- a/delta_write_select.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package furgit
-
-const defaultDeltaWindow = 64
-
-type objectToPack struct {
- id Hash
- ty ObjectType
- body []byte
- offset uint64
- deltaDepth int
-}
-
-type deltaContext struct {
- window int
- candidates []*objectToPack
-}
-
-func (ctx *deltaContext) addCandidate(obj *objectToPack) {
- if ctx.window <= 0 {
- return
- }
- ctx.candidates = append(ctx.candidates, obj)
- if len(ctx.candidates) > ctx.window {
- over := len(ctx.candidates) - ctx.window
- ctx.candidates = ctx.candidates[over:]
- }
-}
-
-func pickDeltaBase(ctx *deltaContext, obj *objectToPack, seed uint32, minSavings, maxDepth int) (*objectToPack, []byte) {
- if ctx == nil || len(ctx.candidates) == 0 {
- return nil, nil
- }
- if maxDepth <= 0 {
- maxDepth = 1
- }
- var bestBase *objectToPack
- var bestDelta []byte
- for i := len(ctx.candidates) - 1; i >= 0; i-- {
- base := ctx.candidates[i]
- if base.ty != ObjectTypeBlob {
- continue
- }
- if base.deltaDepth >= maxDepth {
- continue
- }
- delta, ok := deltaTry(base.body, obj.body, seed, minSavings)
- if !ok {
- continue
- }
- if bestDelta == nil || len(delta) < len(bestDelta) {
- bestDelta = delta
- bestBase = base
- }
- }
- return bestBase, bestDelta
-}
diff --git a/packed_read_pack.go b/packed_read_pack.go
index 31279c8f..56098ee5 100644
--- a/packed_read_pack.go
+++ b/packed_read_pack.go
@@ -108,6 +108,24 @@ func packSectionInflate(pf *packFile, start uint64, sizeHint int) (bufpool.Buffe
return body, nil
}
+func packDeltaReadOfsDistance(data []byte) (uint64, int, error) {
+ if len(data) == 0 {
+ return 0, 0, io.ErrUnexpectedEOF
+ }
+ b := data[0]
+ dist := uint64(b & 0x7f)
+ consumed := 1
+ for (b & 0x80) != 0 {
+ if consumed >= len(data) {
+ return 0, 0, io.ErrUnexpectedEOF
+ }
+ b = data[consumed]
+ consumed++
+ dist = ((dist + 1) << 7) + uint64(b&0x7f)
+ }
+ return dist, consumed, nil
+}
+
type packKey struct {
path string
ofs uint64
@@ -326,6 +344,138 @@ func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjectT
return resultTy, body, nil
}
+func packDeltaApply(base, delta bufpool.Buffer) (bufpool.Buffer, error) {
+ pos := 0
+ baseBytes := base.Bytes()
+ deltaBytes := delta.Bytes()
+ srcSize, err := packVarintRead(deltaBytes, &pos)
+ if err != nil {
+ return bufpool.Buffer{}, err
+ }
+ dstSize, err := packVarintRead(deltaBytes, &pos)
+ if err != nil {
+ return bufpool.Buffer{}, err
+ }
+ if srcSize != len(baseBytes) {
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ out := bufpool.Borrow(dstSize)
+ out.Resize(dstSize)
+ outBytes := out.Bytes()
+ outPos := 0
+
+ for pos < len(deltaBytes) {
+ op := deltaBytes[pos]
+ pos++
+ switch {
+ case op&0x80 != 0:
+ off := 0
+ n := 0
+ if op&0x01 != 0 {
+ if pos >= len(deltaBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ off |= int(deltaBytes[pos])
+ pos++
+ }
+ if op&0x02 != 0 {
+ if pos >= len(deltaBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ off |= int(deltaBytes[pos]) << 8
+ pos++
+ }
+ if op&0x04 != 0 {
+ if pos >= len(deltaBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ off |= int(deltaBytes[pos]) << 16
+ pos++
+ }
+ if op&0x08 != 0 {
+ if pos >= len(deltaBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ off |= int(deltaBytes[pos]) << 24
+ pos++
+ }
+ if op&0x10 != 0 {
+ if pos >= len(deltaBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ n |= int(deltaBytes[pos])
+ pos++
+ }
+ if op&0x20 != 0 {
+ if pos >= len(deltaBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ n |= int(deltaBytes[pos]) << 8
+ pos++
+ }
+ if op&0x40 != 0 {
+ if pos >= len(deltaBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ n |= int(deltaBytes[pos]) << 16
+ pos++
+ }
+ if n == 0 {
+ n = 0x10000
+ }
+ if off+n > len(baseBytes) || outPos+n > len(outBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ copy(outBytes[outPos:], baseBytes[off:off+n])
+ outPos += n
+ case op != 0:
+ n := int(op)
+ if pos+n > len(deltaBytes) || outPos+n > len(outBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ copy(outBytes[outPos:], deltaBytes[pos:pos+n])
+ pos += n
+ outPos += n
+ default:
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ }
+
+ if outPos != len(outBytes) {
+ out.Release()
+ return bufpool.Buffer{}, ErrInvalidObject
+ }
+ return out, nil
+}
+
+func packVarintRead(buf []byte, pos *int) (int, error) {
+ res := 0
+ shift := 0
+ for {
+ if *pos >= len(buf) {
+ return 0, ErrInvalidObject
+ }
+ b := buf[*pos]
+ *pos++
+ res |= int(b&0x7f) << shift
+ if (b & 0x80) == 0 {
+ break
+ }
+ shift += 7
+ }
+ return res, nil
+}
+
type packFile struct {
relPath string
size int64
diff --git a/packed_write_pack.go b/packed_write_pack.go
index f31f42d1..a0baba13 100644
--- a/packed_write_pack.go
+++ b/packed_write_pack.go
@@ -104,84 +104,19 @@ func (pw *packWriter) WriteObject(ty ObjectType, body []byte) error {
}
func (pw *packWriter) WriteOfsDelta(baseOffset uint64, baseSize, resultSize int, delta []byte) error {
- if pw == nil || !pw.wroteHeader {
- return ErrInvalidObject
- }
- if baseSize < 0 || resultSize < 0 {
- return ErrInvalidObject
- }
- if delta == nil {
- delta = []byte{}
- }
- deltaSize := len(delta)
- if deltaSize <= 0 {
- return ErrInvalidObject
- }
- currentOffset := pw.bytesWritten
- if baseOffset >= currentOffset {
- return ErrInvalidObject
- }
- dist := currentOffset - baseOffset
-
- hdr, err := packHeaderEncode(ObjectTypeOfsDelta, deltaSize)
- if err != nil {
- return err
- }
- if err := pw.writePacked(hdr); err != nil {
- return err
- }
- ofs, err := packOfsEncode(dist)
- if err != nil {
- return err
- }
- if err := pw.writePacked(ofs); err != nil {
- return err
- }
-
- zw := zlib.NewWriter(&packHashWriter{pw: pw})
- if _, err := zw.Write(delta); err != nil {
- _ = zw.Close()
- return err
- }
- return zw.Close()
+ _ = baseOffset
+ _ = baseSize
+ _ = resultSize
+ _ = delta
+ return errPackDeltaUnimplemented
}
func (pw *packWriter) WriteRefDelta(base Hash, baseSize, resultSize int, delta []byte) error {
- if pw == nil || !pw.wroteHeader {
- return ErrInvalidObject
- }
- if baseSize < 0 || resultSize < 0 {
- return ErrInvalidObject
- }
- if delta == nil {
- delta = []byte{}
- }
- deltaSize := len(delta)
- if deltaSize <= 0 {
- return ErrInvalidObject
- }
- baseBytes := base.Bytes()
- if len(baseBytes) == 0 {
- return ErrInvalidObject
- }
-
- hdr, err := packHeaderEncode(ObjectTypeRefDelta, deltaSize)
- if err != nil {
- return err
- }
- if err := pw.writePacked(hdr); err != nil {
- return err
- }
- if err := pw.writePacked(baseBytes); err != nil {
- return err
- }
-
- zw := zlib.NewWriter(&packHashWriter{pw: pw})
- if _, err := zw.Write(delta); err != nil {
- _ = zw.Close()
- return err
- }
- return zw.Close()
+ _ = base
+ _ = baseSize
+ _ = resultSize
+ _ = delta
+ return errPackDeltaUnimplemented
}
func (pw *packWriter) Close() (Hash, error) {
@@ -291,7 +226,7 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt
if repo == nil {
return Hash{}, ErrInvalidObject
}
- if opts.EnableThinPack {
+ if opts.EnableDeltas || opts.EnableThinPack {
return Hash{}, errPackDeltaUnimplemented
}
if len(objects) > int(^uint32(0)) {
@@ -306,45 +241,13 @@ func (repo *Repository) packWrite(w io.Writer, objects []Hash, opts packWriteOpt
return Hash{}, err
}
- var dctx deltaContext
- if opts.EnableDeltas {
- dctx.window = defaultDeltaWindow
- }
- deltaSeed := uint32(0)
-
for _, id := range objects {
ty, body, err := repo.ReadObjectTypeRaw(id)
if err != nil {
return Hash{}, err
}
- obj := &objectToPack{
- id: id,
- ty: ty,
- body: body,
- }
- startOffset := pw.bytesWritten
- wroteDelta := false
-
- if opts.EnableDeltas && ty == ObjectTypeBlob {
- base, delta := pickDeltaBase(&dctx, obj, deltaSeed, opts.MinDeltaSavings, opts.MaxDeltaDepth)
- if base != nil && delta != nil {
- if err := pw.WriteOfsDelta(base.offset, len(base.body), len(body), delta); err != nil {
- return Hash{}, err
- }
- wroteDelta = true
- obj.deltaDepth = base.deltaDepth + 1
- }
- }
- if !wroteDelta {
- if err := pw.WriteObject(ty, body); err != nil {
- return Hash{}, err
- }
- obj.deltaDepth = 0
- }
- obj.offset = startOffset
-
- if opts.EnableDeltas && ty == ObjectTypeBlob {
- dctx.addCandidate(obj)
+ if err := pw.WriteObject(ty, body); err != nil {
+ return Hash{}, err
}
}
diff --git a/packed_write_test.go b/packed_write_test.go
index 63fd9266..da7ecfa7 100644
--- a/packed_write_test.go
+++ b/packed_write_test.go
@@ -3,15 +3,12 @@ package furgit
import (
"bytes"
"crypto/rand"
+ "errors"
"fmt"
"os"
- "os/exec"
"path/filepath"
"strings"
"testing"
-
- "codeberg.org/lindenii/furgit/internal/bufpool"
- "codeberg.org/lindenii/furgit/internal/zlibx"
)
func TestPackHeaderEncodeParseRoundtrip(t *testing.T) {
@@ -172,20 +169,7 @@ func TestPackWriteNoDeltas(t *testing.T) {
_ = os.Remove(idxPath)
}()
- if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {
- t.Fatalf("pack stream invalid: %v", err)
- }
-
- cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath)
- cmd.Dir = repoPath
- cmd.Env = append(os.Environ(),
- "GIT_CONFIG_GLOBAL=/dev/null",
- "GIT_CONFIG_SYSTEM=/dev/null",
- )
- output, err := cmd.CombinedOutput()
- if err != nil {
- t.Fatalf("git index-pack failed: %v\n%s", err, output)
- }
+ _ = gitCmd(t, repoPath, "index-pack", "-o", idxPath, packPath)
verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath)
seen := make(map[string]struct{})
@@ -220,290 +204,21 @@ func TestPackWriteNoDeltas(t *testing.T) {
_ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
}
-func TestPackWriteDeltas(t *testing.T) {
+func TestPackWriteDeltasUnimplemented(t *testing.T) {
repoPath, cleanup := setupTestRepo(t)
defer cleanup()
- workDir, cleanupWork := setupWorkDir(t)
- defer cleanupWork()
-
- const (
- fileCount = 200
- fileSize = 2048
- )
- base := bytes.Repeat([]byte("delta-base-"), fileSize/10)
- for i := 0; i < fileCount; i++ {
- buf := make([]byte, len(base))
- copy(buf, base)
- buf[i%len(buf)] ^= byte(i)
- name := filepath.Join(workDir, fmt.Sprintf("delta%04d.txt", i))
- if err := os.WriteFile(name, buf, 0o644); err != nil {
- t.Fatalf("failed to write %s: %v", name, err)
- }
- }
-
- gitCmd(t, repoPath, "--work-tree="+workDir, "add", ".")
- gitCmd(t, repoPath, "--work-tree="+workDir, "commit", "-m", "Delta commit")
- commitHash := gitCmd(t, repoPath, "rev-parse", "HEAD")
-
- commitBody := gitCatFile(t, repoPath, "commit", commitHash)
- lines := bytes.Split(commitBody, []byte{'\n'})
- if len(lines) == 0 || !bytes.HasPrefix(lines[0], []byte("tree ")) {
- t.Fatalf("commit missing tree header")
- }
- treeHash := strings.TrimSpace(string(bytes.TrimPrefix(lines[0], []byte("tree "))))
-
- lsTree := gitCmd(t, repoPath, "ls-tree", "-r", treeHash)
- var blobHashes []string
- for _, line := range strings.Split(lsTree, "\n") {
- if line == "" {
- continue
- }
- fields := strings.Fields(line)
- if len(fields) < 3 {
- t.Fatalf("unexpected ls-tree line: %q", line)
- }
- blobHashes = append(blobHashes, fields[2])
- }
-
repo, err := OpenRepository(repoPath)
if err != nil {
t.Fatalf("OpenRepository failed: %v", err)
}
defer func() { _ = repo.Close() }()
- var objects []Hash
- commitID, _ := repo.ParseHash(commitHash)
- objects = append(objects, commitID)
- treeID, _ := repo.ParseHash(treeHash)
- objects = append(objects, treeID)
- for _, bh := range blobHashes {
- id, _ := repo.ParseHash(bh)
- objects = append(objects, id)
- }
- expectedOids := append([]string{commitHash, treeHash}, blobHashes...)
-
- packDir := filepath.Join(repoPath, "objects", "pack")
- if err := os.MkdirAll(packDir, 0o755); err != nil {
- t.Fatalf("failed to create pack dir: %v", err)
- }
- pf, err := os.CreateTemp(packDir, "furgit-delta-test-*.pack")
- if err != nil {
- t.Fatalf("failed to create pack file: %v", err)
- }
- packPath := pf.Name()
- idxPath := strings.TrimSuffix(packPath, ".pack") + ".idx"
- if _, err := repo.packWrite(pf, objects, packWriteOptions{
- EnableDeltas: true,
- MinDeltaSavings: 1,
- }); err != nil {
- _ = pf.Close()
- t.Fatalf("packWrite failed: %v", err)
- }
- if err := pf.Close(); err != nil {
- t.Fatalf("failed to close pack file: %v", err)
- }
-
- defer func() {
- _ = os.Remove(packPath)
- _ = os.Remove(idxPath)
- }()
-
- if err := checkPackStream(packPath, repo.hashAlgo, len(objects)); err != nil {
- t.Fatalf("pack stream invalid: %v", err)
- }
-
- cmd := exec.Command("git", "index-pack", "-o", idxPath, packPath)
- cmd.Dir = repoPath
- cmd.Env = append(os.Environ(),
- "GIT_CONFIG_GLOBAL=/dev/null",
- "GIT_CONFIG_SYSTEM=/dev/null",
- )
- output, err := cmd.CombinedOutput()
- if err != nil {
- t.Fatalf("git index-pack failed: %v\n%s", err, output)
- }
-
- verifyOut := gitCmd(t, repoPath, "verify-pack", "-v", idxPath)
- seen := make(map[string]struct{})
- for _, line := range strings.Split(verifyOut, "\n") {
- if strings.TrimSpace(line) == "" {
- continue
- }
- if strings.HasPrefix(line, "chain length") || strings.HasPrefix(line, "non delta") {
- continue
- }
- parts := strings.Fields(line)
- if len(parts) == 0 {
- continue
- }
- seen[parts[0]] = struct{}{}
- }
- for _, oid := range expectedOids {
- if _, ok := seen[oid]; !ok {
- t.Fatalf("verify-pack missing object %s", oid)
- }
- }
-
- for _, oid := range expectedOids {
- if err := removeLooseObject(repoPath, oid); err != nil {
- t.Fatalf("remove loose object %s: %v", oid, err)
- }
- }
- for _, oid := range expectedOids {
- _ = gitCmd(t, repoPath, "cat-file", "-p", oid)
- }
-
- _ = gitCmd(t, repoPath, "fsck", "--full", "--strict")
-}
-
-func checkPackStream(path string, algo hashAlgorithm, objectCount int) error {
- data, err := os.ReadFile(path)
- if err != nil {
- return err
- }
- if len(data) < 12 {
- return ErrInvalidObject
- }
- if readBE32(data[0:4]) != packMagic || readBE32(data[4:8]) != packVersion2 {
- return ErrInvalidObject
- }
- pos := 12
- hashSize := algo.Size()
- type objEntry struct {
- offset uint64
- ty ObjectType
- body []byte
- }
- byOffset := make(map[uint64]objEntry, objectCount)
- byHash := make(map[string]objEntry, objectCount)
- for i := 0; i < objectCount; i++ {
- objOffset := uint64(pos)
- ty, size, consumed, err := packHeaderParse(data[pos:])
- if err != nil {
- return fmt.Errorf("obj %d header at %d: %v", i, pos, err)
- }
- pos += consumed
- baseOffset := uint64(0)
- baseTy := ObjectTypeInvalid
- var baseBody []byte
- var baseHash Hash
- switch ty {
- case ObjectTypeOfsDelta:
- dist, distConsumed, err := packDeltaReadOfsDistance(data[pos:])
- if err != nil {
- return fmt.Errorf("obj %d ofs at %d: %v", i, pos, err)
- }
- pos += distConsumed
- if dist == 0 || dist > objOffset {
- return fmt.Errorf("obj %d ofs at %d: invalid dist", i, pos)
- }
- baseOffset = objOffset - dist
- base, ok := byOffset[baseOffset]
- if !ok {
- return fmt.Errorf("obj %d ofs at %d: missing base", i, pos)
- }
- baseTy = base.ty
- baseBody = base.body
- case ObjectTypeRefDelta:
- if pos+hashSize > len(data) {
- return ErrInvalidObject
- }
- copy(baseHash.data[:], data[pos:pos+hashSize])
- baseHash.algo = algo
- baseEntry, ok := byHash[baseHash.String()]
- if !ok {
- return fmt.Errorf("obj %d ref base not found", i)
- }
- baseTy = baseEntry.ty
- baseBody = baseEntry.body
- pos += hashSize
- default:
- }
-
- payloadBuf, zconsumed, err := zlibx.DecompressSized(data[pos:], size)
- if err != nil {
- return fmt.Errorf("obj %d zlib at %d: %v", i, pos, err)
- }
- payload := append([]byte(nil), payloadBuf.Bytes()...)
- payloadBuf.Release()
- pos += zconsumed
- switch ty {
- case ObjectTypeOfsDelta:
- if baseBody == nil {
- return fmt.Errorf("obj %d missing base body", i)
- }
- baseSize, resultSize, err := readDeltaSizes(payload)
- if err != nil {
- return fmt.Errorf("obj %d delta sizes: %v", i, err)
- }
- if baseSize != len(baseBody) {
- return fmt.Errorf("obj %d delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))
- }
- out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
- if err != nil {
- return fmt.Errorf("obj %d delta apply: %v", i, err)
- }
- body := append([]byte(nil), out.Bytes()...)
- out.Release()
- if resultSize != len(body) {
- return fmt.Errorf("obj %d delta result size mismatch: got %d want %d", i, len(body), resultSize)
- }
- byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}
- case ObjectTypeRefDelta:
- if baseBody == nil {
- return fmt.Errorf("obj %d missing ref base body", i)
- }
- baseSize, resultSize, err := readDeltaSizes(payload)
- if err != nil {
- return fmt.Errorf("obj %d ref delta sizes: %v", i, err)
- }
- if baseSize != len(baseBody) {
- return fmt.Errorf("obj %d ref delta base size mismatch: got %d want %d", i, baseSize, len(baseBody))
- }
- out, err := packDeltaApply(bufpool.FromOwned(baseBody), bufpool.FromOwned(payload))
- if err != nil {
- return fmt.Errorf("obj %d ref delta apply: %v", i, err)
- }
- body := append([]byte(nil), out.Bytes()...)
- out.Release()
- if resultSize != len(body) {
- return fmt.Errorf("obj %d ref delta result size mismatch: got %d want %d", i, len(body), resultSize)
- }
- byOffset[objOffset] = objEntry{offset: objOffset, ty: baseTy, body: body}
- default:
- if size >= 0 && len(payload) != size {
- return fmt.Errorf("obj %d size mismatch: got %d want %d", i, len(payload), size)
- }
- body := append([]byte(nil), payload...)
- byOffset[objOffset] = objEntry{offset: objOffset, ty: ty, body: body}
- }
-
- entry := byOffset[objOffset]
- if entry.body != nil && entry.ty != ObjectTypeInvalid {
- hdr, err := headerForType(entry.ty, entry.body)
- if err != nil {
- return err
- }
- raw := append(hdr, entry.body...)
- hash := algo.Sum(raw)
- byHash[hash.String()] = entry
- }
- }
- return nil
-}
-
-func readDeltaSizes(delta []byte) (int, int, error) {
- pos := 0
- baseSize, err := packVarintRead(delta, &pos)
- if err != nil {
- return 0, 0, err
- }
- resultSize, err := packVarintRead(delta, &pos)
- if err != nil {
- return 0, 0, err
+ buf := new(bytes.Buffer)
+ _, err = repo.packWrite(buf, nil, packWriteOptions{EnableDeltas: true})
+ if !errors.Is(err, errPackDeltaUnimplemented) {
+ t.Fatalf("expected errPackDeltaUnimplemented, got %v", err)
}
- return baseSize, resultSize, nil
}
func removeLooseObject(repoPath, oid string) error {