aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorGravatar Runxi Yu2025-11-19 08:00:00 +0800
committerGravatar Runxi Yu2025-11-19 08:00:00 +0800
commit811ba33e8401e2becf2c6caa03bf293fc610059a (patch)
treeee69450ff7a65cc46691693187780b5fd8e14870 /internal
parentadler32: Unroll update loop (diff)
signatureNo signature
Initial attempt to make a compressor with less overhead
io.Reader actually has massive overhead...
Diffstat (limited to 'internal')
-rw-r--r--internal/bench/decompress.go2
-rw-r--r--internal/flate/decompress_bytes.go99
-rw-r--r--internal/flate/decompress_test.go76
-rw-r--r--internal/zlib/decompress.go69
-rw-r--r--internal/zlib/decompress_test.go84
5 files changed, 330 insertions, 0 deletions
diff --git a/internal/bench/decompress.go b/internal/bench/decompress.go
index c4a4d274..2eab0ad2 100644
--- a/internal/bench/decompress.go
+++ b/internal/bench/decompress.go
@@ -1,3 +1,5 @@
+//go:build ignore
+
package main
import (
diff --git a/internal/flate/decompress_bytes.go b/internal/flate/decompress_bytes.go
new file mode 100644
index 00000000..2cd9fd89
--- /dev/null
+++ b/internal/flate/decompress_bytes.go
@@ -0,0 +1,99 @@
+package flate
+
+import (
+ "io"
+ "sync"
+
+ "git.sr.ht/~runxiyu/furgit/internal/bufpool"
+)
+
+// byteSliceReader implements Reader over an in-memory byte slice.
+type byteSliceReader struct {
+ data []byte
+ off int
+}
+
+func (r *byteSliceReader) Reset(data []byte) {
+ r.data = data
+ r.off = 0
+}
+
+func (r *byteSliceReader) Read(p []byte) (int, error) {
+ if r.off >= len(r.data) {
+ return 0, io.EOF
+ }
+ n := copy(p, r.data[r.off:])
+ r.off += n
+ return n, nil
+}
+
+func (r *byteSliceReader) ReadByte() (byte, error) {
+ if r.off >= len(r.data) {
+ return 0, io.EOF
+ }
+ b := r.data[r.off]
+ r.off++
+ return b, nil
+}
+
+// bufferDecompressor wraps the core decompressor with pooling state so that
+// byte-slice decompressions avoid repeated allocations.
+type bufferDecompressor struct {
+ dec decompressor
+ reader byteSliceReader
+}
+
+var bufferDecompressorPool = sync.Pool{
+ New: func() any {
+ fixedHuffmanDecoderInit()
+ d := &bufferDecompressor{}
+ d.dec.bits = new([maxNumLit + maxNumDist]int)
+ d.dec.codebits = new([numCodes]int)
+ d.dec.step = (*decompressor).nextBlock
+ return d
+ },
+}
+
+// Decompress inflates the provided DEFLATE stream and returns the full output
+// in a pooled bufpool.Buffer along with the number of consumed bytes from src.
+func Decompress(src []byte) (bufpool.Buffer, int, error) {
+ return DecompressDict(src, nil)
+}
+
+// DecompressDict inflates the provided DEFLATE stream using dict as the preset
+// dictionary and returns the full output in a pooled bufpool.Buffer. The second
+// returned value reports how many bytes of src were consumed.
+func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, int, error) {
+ d := bufferDecompressorPool.Get().(*bufferDecompressor)
+ defer func() {
+ d.reader.Reset(nil)
+ bufferDecompressorPool.Put(d)
+ }()
+
+ d.reader.Reset(src)
+ if err := d.dec.Reset(&d.reader, dict); err != nil {
+ return bufpool.Buffer{}, 0, err
+ }
+
+ out := bufpool.Borrow(bufpool.DefaultBufferCap)
+ out.Resize(0)
+
+ for {
+ if len(d.dec.toRead) > 0 {
+ out.Append(d.dec.toRead)
+ d.dec.toRead = nil
+ continue
+ }
+ if d.dec.err != nil {
+ if d.dec.err == io.EOF {
+ return out, d.reader.off, nil
+ }
+ out.Release()
+ return bufpool.Buffer{}, 0, d.dec.err
+ }
+ d.dec.step(&d.dec)
+ if d.dec.err != nil && len(d.dec.toRead) == 0 {
+ d.dec.toRead = d.dec.dict.readFlush()
+ }
+ }
+}
diff --git a/internal/flate/decompress_test.go b/internal/flate/decompress_test.go
new file mode 100644
index 00000000..089159d6
--- /dev/null
+++ b/internal/flate/decompress_test.go
@@ -0,0 +1,76 @@
+package flate
+
+import (
+ "bytes"
+ stdflate "compress/flate"
+ "testing"
+)
+
+func compressDeflate(t *testing.T, payload, dict []byte) []byte {
+ t.Helper()
+ var buf bytes.Buffer
+ var (
+ w *stdflate.Writer
+ err error
+ )
+ if dict != nil {
+ w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict)
+ } else {
+ w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression)
+ }
+ if err != nil {
+ t.Fatalf("NewWriter: %v", err)
+ }
+ if _, err := w.Write(payload); err != nil {
+ t.Fatalf("Write: %v", err)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatalf("Close: %v", err)
+ }
+ return buf.Bytes()
+}
+
+func TestDecompress(t *testing.T) {
+ payload := bytes.Repeat([]byte("golang"), 32)
+ compressed := compressDeflate(t, payload, nil)
+
+ out, _, err := Decompress(compressed)
+ if err != nil {
+ t.Fatalf("Decompress: %v", err)
+ }
+ defer out.Release()
+
+ if !bytes.Equal(out.Bytes(), payload) {
+ t.Fatalf("unexpected payload: got %q", out.Bytes())
+ }
+}
+
+func TestDecompressDict(t *testing.T) {
+ dict := []byte("furgit dictionary payload")
+ payload := append([]byte(nil), dict...)
+ payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...)
+
+ compressed := compressDeflate(t, payload, dict)
+
+ out, _, err := DecompressDict(compressed, dict)
+ if err != nil {
+ t.Fatalf("DecompressDict: %v", err)
+ }
+ defer out.Release()
+
+ if !bytes.Equal(out.Bytes(), payload) {
+ t.Fatalf("unexpected payload: got %q", out.Bytes())
+ }
+}
+
+func TestDecompressDictMissing(t *testing.T) {
+ dict := []byte("shared prefix to enforce dictionary usage")
+ payload := append([]byte(nil), dict...)
+ payload = append(payload, []byte(" trailing data to force reference")...)
+
+ compressed := compressDeflate(t, payload, dict)
+
+ if _, _, err := Decompress(compressed); err == nil {
+ t.Fatalf("expected error when dictionary missing")
+ }
+}
diff --git a/internal/zlib/decompress.go b/internal/zlib/decompress.go
new file mode 100644
index 00000000..55f6d3e2
--- /dev/null
+++ b/internal/zlib/decompress.go
@@ -0,0 +1,69 @@
+package zlib
+
+import (
+ "encoding/binary"
+ "io"
+
+ "git.sr.ht/~runxiyu/furgit/internal/adler32"
+ "git.sr.ht/~runxiyu/furgit/internal/bufpool"
+ "git.sr.ht/~runxiyu/furgit/internal/flate"
+)
+
+// Decompress inflates the provided zlib wrapped stream and returns the
+// uncompressed data inside a pooled bufpool.Buffer.
+func Decompress(src []byte) (bufpool.Buffer, error) {
+ return DecompressDict(src, nil)
+}
+
+// DecompressDict is like Decompress but accepts a preset dictionary. The
+// dictionary must match the checksum embedded in the stream if the dictionary
+// flag is present.
+func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, error) {
+ if len(src) < 6 {
+ return bufpool.Buffer{}, io.ErrUnexpectedEOF
+ }
+
+ cmf := src[0]
+ flg := src[1]
+ if (cmf&0x0f != zlibDeflate) || (cmf>>4 > zlibMaxWindow) || (binary.BigEndian.Uint16(src[:2])%31 != 0) {
+ return bufpool.Buffer{}, ErrHeader
+ }
+
+ offset := 2
+ haveDict := flg&0x20 != 0
+ if haveDict {
+ if len(src) < offset+4 {
+ return bufpool.Buffer{}, io.ErrUnexpectedEOF
+ }
+ if dict == nil {
+ return bufpool.Buffer{}, ErrDictionary
+ }
+ checksum := binary.BigEndian.Uint32(src[offset : offset+4])
+ if checksum != adler32.Checksum(dict) {
+ return bufpool.Buffer{}, ErrDictionary
+ }
+ offset += 4
+ }
+
+ if len(src[offset:]) < 4 {
+ return bufpool.Buffer{}, io.ErrUnexpectedEOF
+ }
+
+ deflateData := src[offset:]
+ out, consumed, err := flate.DecompressDict(deflateData, dict)
+ if err != nil {
+ return bufpool.Buffer{}, err
+ }
+
+ checksumPos := offset + consumed
+ if checksumPos+4 > len(src) {
+ out.Release()
+ return bufpool.Buffer{}, io.ErrUnexpectedEOF
+ }
+ expected := binary.BigEndian.Uint32(src[checksumPos : checksumPos+4])
+ if expected != adler32.Checksum(out.Bytes()) {
+ out.Release()
+ return bufpool.Buffer{}, ErrChecksum
+ }
+ return out, nil
+}
diff --git a/internal/zlib/decompress_test.go b/internal/zlib/decompress_test.go
new file mode 100644
index 00000000..bb517ae6
--- /dev/null
+++ b/internal/zlib/decompress_test.go
@@ -0,0 +1,84 @@
+package zlib
+
+import (
+ "bytes"
+ stdzlib "compress/zlib"
+ "testing"
+)
+
+func compressZlib(t *testing.T, payload, dict []byte) []byte {
+ t.Helper()
+ var buf bytes.Buffer
+ var (
+ w *stdzlib.Writer
+ err error
+ )
+ if dict != nil {
+ w, err = stdzlib.NewWriterLevelDict(&buf, stdzlib.DefaultCompression, dict)
+ } else {
+ w = stdzlib.NewWriter(&buf)
+ }
+ if err != nil {
+ t.Fatalf("NewWriter: %v", err)
+ }
+ if _, err := w.Write(payload); err != nil {
+ t.Fatalf("Write: %v", err)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatalf("Close: %v", err)
+ }
+ return buf.Bytes()
+}
+
+func TestDecompress(t *testing.T) {
+ payload := []byte("hello, zlib world!")
+ compressed := compressZlib(t, payload, nil)
+
+ out, err := Decompress(compressed)
+ if err != nil {
+ t.Fatalf("Decompress: %v", err)
+ }
+ defer out.Release()
+
+ if !bytes.Equal(out.Bytes(), payload) {
+ t.Fatalf("unexpected payload %q", out.Bytes())
+ }
+}
+
+func TestDecompressDict(t *testing.T) {
+ dict := []byte("git dictionary for zlib")
+ payload := append([]byte(nil), dict...)
+ payload = append(payload, []byte(" -- extended body -- extended body")...)
+ compressed := compressZlib(t, payload, dict)
+
+ out, err := DecompressDict(compressed, dict)
+ if err != nil {
+ t.Fatalf("DecompressDict: %v", err)
+ }
+ defer out.Release()
+
+ if !bytes.Equal(out.Bytes(), payload) {
+ t.Fatalf("unexpected payload %q", out.Bytes())
+ }
+}
+
+func TestDecompressDictMissing(t *testing.T) {
+ dict := []byte("preset dictionary")
+ payload := append([]byte(nil), dict...)
+ payload = append(payload, []byte(" .. more data ..")...)
+ compressed := compressZlib(t, payload, dict)
+
+ if _, err := Decompress(compressed); err != ErrDictionary {
+ t.Fatalf("expected ErrDictionary, got %v", err)
+ }
+}
+
+func TestDecompressChecksumError(t *testing.T) {
+ payload := []byte("checksum check")
+ compressed := compressZlib(t, payload, nil)
+ compressed[len(compressed)-1] ^= 0xff
+
+ if _, err := Decompress(compressed); err != ErrChecksum {
+ t.Fatalf("expected ErrChecksum, got %v", err)
+ }
+}