aboutsummaryrefslogtreecommitdiff
path: root/internal/flate
diff options
context:
space:
mode:
authorGravatar Runxi Yu2025-11-19 08:00:00 +0800
committerGravatar Runxi Yu2025-11-19 08:00:00 +0800
commit811ba33e8401e2becf2c6caa03bf293fc610059a (patch)
treeee69450ff7a65cc46691693187780b5fd8e14870 /internal/flate
parentadler32: Unroll update loop (diff)
signatureNo signature
Initial attempt to make a compressor with less overhead
io.Reader actually has massive overhead...
Diffstat (limited to 'internal/flate')
-rw-r--r--internal/flate/decompress_bytes.go99
-rw-r--r--internal/flate/decompress_test.go76
2 files changed, 175 insertions, 0 deletions
diff --git a/internal/flate/decompress_bytes.go b/internal/flate/decompress_bytes.go
new file mode 100644
index 00000000..2cd9fd89
--- /dev/null
+++ b/internal/flate/decompress_bytes.go
@@ -0,0 +1,99 @@
+package flate
+
+import (
+ "io"
+ "sync"
+
+ "git.sr.ht/~runxiyu/furgit/internal/bufpool"
+)
+
+// byteSliceReader implements Reader over an in-memory byte slice.
+type byteSliceReader struct {
+ data []byte
+ off int
+}
+
+func (r *byteSliceReader) Reset(data []byte) {
+ r.data = data
+ r.off = 0
+}
+
+func (r *byteSliceReader) Read(p []byte) (int, error) {
+ if r.off >= len(r.data) {
+ return 0, io.EOF
+ }
+ n := copy(p, r.data[r.off:])
+ r.off += n
+ return n, nil
+}
+
+func (r *byteSliceReader) ReadByte() (byte, error) {
+ if r.off >= len(r.data) {
+ return 0, io.EOF
+ }
+ b := r.data[r.off]
+ r.off++
+ return b, nil
+}
+
+// bufferDecompressor wraps the core decompressor with pooling state so that
+// byte-slice decompressions avoid repeated allocations.
+type bufferDecompressor struct {
+ dec decompressor
+ reader byteSliceReader
+}
+
+var bufferDecompressorPool = sync.Pool{
+ New: func() any {
+ fixedHuffmanDecoderInit()
+ d := &bufferDecompressor{}
+ d.dec.bits = new([maxNumLit + maxNumDist]int)
+ d.dec.codebits = new([numCodes]int)
+ d.dec.step = (*decompressor).nextBlock
+ return d
+ },
+}
+
+// Decompress inflates the provided DEFLATE stream and returns the full output
+// in a pooled bufpool.Buffer along with the number of consumed bytes from src.
+func Decompress(src []byte) (bufpool.Buffer, int, error) {
+ return DecompressDict(src, nil)
+}
+
+// DecompressDict inflates the provided DEFLATE stream using dict as the preset
+// dictionary and returns the full output in a pooled bufpool.Buffer. The second
+// returned value reports how many bytes of src were consumed.
+func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, int, error) {
+ d := bufferDecompressorPool.Get().(*bufferDecompressor)
+ defer func() {
+ d.reader.Reset(nil)
+ bufferDecompressorPool.Put(d)
+ }()
+
+ d.reader.Reset(src)
+ if err := d.dec.Reset(&d.reader, dict); err != nil {
+ return bufpool.Buffer{}, 0, err
+ }
+
+ out := bufpool.Borrow(bufpool.DefaultBufferCap)
+ out.Resize(0)
+
+ for {
+ if len(d.dec.toRead) > 0 {
+ out.Append(d.dec.toRead)
+ d.dec.toRead = nil
+ continue
+ }
+ if d.dec.err != nil {
+ if d.dec.err == io.EOF {
+ return out, d.reader.off, nil
+ }
+ out.Release()
+ return bufpool.Buffer{}, 0, d.dec.err
+ }
+ d.dec.step(&d.dec)
+ if d.dec.err != nil && len(d.dec.toRead) == 0 {
+ d.dec.toRead = d.dec.dict.readFlush()
+ }
+ }
+}
diff --git a/internal/flate/decompress_test.go b/internal/flate/decompress_test.go
new file mode 100644
index 00000000..089159d6
--- /dev/null
+++ b/internal/flate/decompress_test.go
@@ -0,0 +1,76 @@
+package flate
+
+import (
+ "bytes"
+ stdflate "compress/flate"
+ "testing"
+)
+
+func compressDeflate(t *testing.T, payload, dict []byte) []byte {
+ t.Helper()
+ var buf bytes.Buffer
+ var (
+ w *stdflate.Writer
+ err error
+ )
+ if dict != nil {
+ w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict)
+ } else {
+ w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression)
+ }
+ if err != nil {
+ t.Fatalf("NewWriter: %v", err)
+ }
+ if _, err := w.Write(payload); err != nil {
+ t.Fatalf("Write: %v", err)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatalf("Close: %v", err)
+ }
+ return buf.Bytes()
+}
+
+func TestDecompress(t *testing.T) {
+ payload := bytes.Repeat([]byte("golang"), 32)
+ compressed := compressDeflate(t, payload, nil)
+
+ out, _, err := Decompress(compressed)
+ if err != nil {
+ t.Fatalf("Decompress: %v", err)
+ }
+ defer out.Release()
+
+ if !bytes.Equal(out.Bytes(), payload) {
+ t.Fatalf("unexpected payload: got %q", out.Bytes())
+ }
+}
+
+func TestDecompressDict(t *testing.T) {
+ dict := []byte("furgit dictionary payload")
+ payload := append([]byte(nil), dict...)
+ payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...)
+
+ compressed := compressDeflate(t, payload, dict)
+
+ out, _, err := DecompressDict(compressed, dict)
+ if err != nil {
+ t.Fatalf("DecompressDict: %v", err)
+ }
+ defer out.Release()
+
+ if !bytes.Equal(out.Bytes(), payload) {
+ t.Fatalf("unexpected payload: got %q", out.Bytes())
+ }
+}
+
+func TestDecompressDictMissing(t *testing.T) {
+ dict := []byte("shared prefix to enforce dictionary usage")
+ payload := append([]byte(nil), dict...)
+ payload = append(payload, []byte(" trailing data to force reference")...)
+
+ compressed := compressDeflate(t, payload, dict)
+
+ if _, _, err := Decompress(compressed); err == nil {
+ t.Fatalf("expected error when dictionary missing")
+ }
+}