From ed0a113f034aa42aea23471c4bc0d7af159b7002 Mon Sep 17 00:00:00 2001
From: Runxi Yu
Date: Wed, 19 Nov 2025 08:00:00 +0800
Subject: Probably should name the custom packages specially

---
 internal/flate/LICENSE              |  27 --
 internal/flate/decompress_bytes.go  |  64 ---
 internal/flate/decompress_test.go   |  76 ----
 internal/flate/dict_decoder.go      | 182 --------
 internal/flate/inflate.go           | 839 ------------------------------------
 internal/flate/slice_inflate.go     | 479 --------------------
 internal/flatex/LICENSE             |  27 ++
 internal/flatex/decompress_bytes.go |  64 +++
 internal/flatex/decompress_test.go  |  76 ++++
 internal/flatex/dict_decoder.go     | 182 ++++++++
 internal/flatex/inflate.go          | 839 ++++++++++++++++++++++++++++++++++++
 internal/flatex/slice_inflate.go    | 479 ++++++++++++++++++++
 internal/zlib/LICENSE               |  27 --
 internal/zlib/decompress.go         |  69 ---
 internal/zlib/decompress_test.go    |  84 ----
 internal/zlib/reader.go             | 198 ---------
 internal/zlibx/LICENSE              |  27 ++
 internal/zlibx/decompress.go        |  69 +++
 internal/zlibx/decompress_test.go   |  84 ++++
 internal/zlibx/reader.go            | 198 +++++++++
 20 files changed, 2045 insertions(+), 2045 deletions(-)
 delete mode 100644 internal/flate/LICENSE
 delete mode 100644 internal/flate/decompress_bytes.go
 delete mode 100644 internal/flate/decompress_test.go
 delete mode 100644 internal/flate/dict_decoder.go
 delete mode 100644 internal/flate/inflate.go
 delete mode 100644 internal/flate/slice_inflate.go
 create mode 100644 internal/flatex/LICENSE
 create mode 100644 internal/flatex/decompress_bytes.go
 create mode 100644 internal/flatex/decompress_test.go
 create mode 100644 internal/flatex/dict_decoder.go
 create mode 100644 internal/flatex/inflate.go
 create mode 100644 internal/flatex/slice_inflate.go
 delete mode 100644 internal/zlib/LICENSE
 delete mode 100644 internal/zlib/decompress.go
 delete mode 100644 internal/zlib/decompress_test.go
 delete mode 100644 internal/zlib/reader.go
 create mode 100644 internal/zlibx/LICENSE
 create mode 100644 internal/zlibx/decompress.go
 create mode 100644 internal/zlibx/decompress_test.go
 create mode 100644 internal/zlibx/reader.go

(limited to 'internal')

diff --git a/internal/flate/LICENSE b/internal/flate/LICENSE
deleted file mode 100644
index 2a7cf70d..00000000
--- a/internal/flate/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright 2009 The Go Authors.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-   * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-   * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-   * Neither the name of Google LLC nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/internal/flate/decompress_bytes.go b/internal/flate/decompress_bytes.go
deleted file mode 100644
index 18fe21fc..00000000
--- a/internal/flate/decompress_bytes.go
+++ /dev/null
@@ -1,64 +0,0 @@
-package flate
-
-import (
-	"io"
-	"sync"
-
-	"git.sr.ht/~runxiyu/furgit/internal/bufpool"
-)
-
-// bufferDecompressor wraps the custom slice inflater so byte-slice
-// decompressions avoid repeated allocations.
-type bufferDecompressor struct {
-	inflater sliceInflater
-}
-
-var bufferDecompressorPool = sync.Pool{
-	New: func() any {
-		fixedHuffmanDecoderInit()
-		d := &bufferDecompressor{}
-		d.inflater.bits = new([maxNumLit + maxNumDist]int)
-		d.inflater.codebits = new([numCodes]int)
-		return d
-	},
-}
-
-// Decompress inflates the provided DEFLATE stream and returns the full output
-// in a pooled bufpool.Buffer along with the number of consumed bytes from src.
-func Decompress(src []byte) (bufpool.Buffer, int, error) {
-	return DecompressDict(src, nil)
-}
-
-// DecompressDict inflates the provided DEFLATE stream using dict as the preset
-// dictionary and returns the full output in a pooled bufpool.Buffer. The second
-// returned value reports how many bytes of src were consumed.
-func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, int, error) {
-	d := bufferDecompressorPool.Get().(*bufferDecompressor)
-	defer bufferDecompressorPool.Put(d)
-
-	if err := d.inflater.reset(src, dict); err != nil {
-		return bufpool.Buffer{}, 0, err
-	}
-
-	out := bufpool.Borrow(bufpool.DefaultBufferCap)
-	out.Resize(0)
-
-	for {
-		if len(d.inflater.toRead) > 0 {
-			out.Append(d.inflater.toRead)
-			d.inflater.toRead = nil
-			continue
-		}
-		if d.inflater.err != nil {
-			if d.inflater.err == io.EOF {
-				return out, d.inflater.pos, nil
-			}
-			out.Release()
-			return bufpool.Buffer{}, 0, d.inflater.err
-		}
-		d.inflater.step(&d.inflater)
-		if d.inflater.err != nil && len(d.inflater.toRead) == 0 {
-			d.inflater.toRead = d.inflater.dict.readFlush()
-		}
-	}
-}
diff --git a/internal/flate/decompress_test.go b/internal/flate/decompress_test.go
deleted file mode 100644
index 089159d6..00000000
--- a/internal/flate/decompress_test.go
+++ /dev/null
@@ -1,76 +0,0 @@
-package flate
-
-import (
-	"bytes"
-	stdflate "compress/flate"
-	"testing"
-)
-
-func compressDeflate(t *testing.T, payload, dict []byte) []byte {
-	t.Helper()
-	var buf bytes.Buffer
-	var (
-		w   *stdflate.Writer
-		err error
-	)
-	if dict != nil {
-		w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict)
-	} else {
-		w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression)
-	}
-	if err != nil {
-		t.Fatalf("NewWriter: %v", err)
-	}
-	if _, err := w.Write(payload); err != nil {
-		t.Fatalf("Write: %v", err)
-	}
-	if err := w.Close(); err != nil {
-		t.Fatalf("Close: %v", err)
-	}
-	return buf.Bytes()
-}
-
-func TestDecompress(t *testing.T) {
-	payload := bytes.Repeat([]byte("golang"), 32)
-	compressed := compressDeflate(t, payload, nil)
-
-	out, _, err := Decompress(compressed)
-	if err != nil {
-		t.Fatalf("Decompress: %v", err)
-	}
-	defer out.Release()
-
-	if !bytes.Equal(out.Bytes(), payload) {
-		t.Fatalf("unexpected payload: got %q", out.Bytes())
-	}
-}
-
-func TestDecompressDict(t *testing.T) {
-	dict := []byte("furgit dictionary payload")
-	payload := append([]byte(nil), dict...)
-	payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...)
-
-	compressed := compressDeflate(t, payload, dict)
-
-	out, _, err := DecompressDict(compressed, dict)
-	if err != nil {
-		t.Fatalf("DecompressDict: %v", err)
-	}
-	defer out.Release()
-
-	if !bytes.Equal(out.Bytes(), payload) {
-		t.Fatalf("unexpected payload: got %q", out.Bytes())
-	}
-}
-
-func TestDecompressDictMissing(t *testing.T) {
-	dict := []byte("shared prefix to enforce dictionary usage")
-	payload := append([]byte(nil), dict...)
-	payload = append(payload, []byte(" trailing data to force reference")...)
-
-	compressed := compressDeflate(t, payload, dict)
-
-	if _, _, err := Decompress(compressed); err == nil {
-		t.Fatalf("expected error when dictionary missing")
-	}
-}
diff --git a/internal/flate/dict_decoder.go b/internal/flate/dict_decoder.go
deleted file mode 100644
index d2c19040..00000000
--- a/internal/flate/dict_decoder.go
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package flate
-
-// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
-// LZ77 decompresses data through sequences of two forms of commands:
-//
-//   - Literal insertions: Runs of one or more symbols are inserted into the data
-//     stream as is. This is accomplished through the writeByte method for a
-//     single symbol, or combinations of writeSlice/writeMark for multiple symbols.
-//     Any valid stream must start with a literal insertion if no preset dictionary
-//     is used.
-//
-//   - Backward copies: Runs of one or more symbols are copied from previously
-//     emitted data. Backward copies come as the tuple (dist, length) where dist
-//     determines how far back in the stream to copy from and length determines how
-//     many bytes to copy. Note that it is valid for the length to be greater than
-//     the distance. Since LZ77 uses forward copies, that situation is used to
-//     perform a form of run-length encoding on repeated runs of symbols.
-//     The writeCopy and tryWriteCopy are used to implement this command.
-//
-// For performance reasons, this implementation performs little to no sanity
-// checks about the arguments. As such, the invariants documented for each
-// method call must be respected.
-type dictDecoder struct {
-	hist []byte // Sliding window history
-
-	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
-	wrPos int  // Current output position in buffer
-	rdPos int  // Have emitted hist[:rdPos] already
-	full  bool // Has a full window length been written yet?
-}
-
-// init initializes dictDecoder to have a sliding window dictionary of the given
-// size. If a preset dict is provided, it will initialize the dictionary with
-// the contents of dict.
-func (dd *dictDecoder) init(size int, dict []byte) {
-	*dd = dictDecoder{hist: dd.hist}
-
-	if cap(dd.hist) < size {
-		dd.hist = make([]byte, size)
-	}
-	dd.hist = dd.hist[:size]
-
-	if len(dict) > len(dd.hist) {
-		dict = dict[len(dict)-len(dd.hist):]
-	}
-	dd.wrPos = copy(dd.hist, dict)
-	if dd.wrPos == len(dd.hist) {
-		dd.wrPos = 0
-		dd.full = true
-	}
-	dd.rdPos = dd.wrPos
-}
-
-// histSize reports the total amount of historical data in the dictionary.
-func (dd *dictDecoder) histSize() int {
-	if dd.full {
-		return len(dd.hist)
-	}
-	return dd.wrPos
-}
-
-// availRead reports the number of bytes that can be flushed by readFlush.
-func (dd *dictDecoder) availRead() int {
-	return dd.wrPos - dd.rdPos
-}
-
-// availWrite reports the available amount of output buffer space.
-func (dd *dictDecoder) availWrite() int {
-	return len(dd.hist) - dd.wrPos
-}
-
-// writeSlice returns a slice of the available buffer to write data to.
-//
-// This invariant will be kept: len(s) <= availWrite()
-func (dd *dictDecoder) writeSlice() []byte {
-	return dd.hist[dd.wrPos:]
-}
-
-// writeMark advances the writer pointer by cnt.
-//
-// This invariant must be kept: 0 <= cnt <= availWrite()
-func (dd *dictDecoder) writeMark(cnt int) {
-	dd.wrPos += cnt
-}
-
-// writeByte writes a single byte to the dictionary.
-//
-// This invariant must be kept: 0 < availWrite()
-func (dd *dictDecoder) writeByte(c byte) {
-	dd.hist[dd.wrPos] = c
-	dd.wrPos++
-}
-
-// writeCopy copies a string at a given (dist, length) to the output.
-// This returns the number of bytes copied and may be less than the requested
-// length if the available space in the output buffer is too small.
-//
-// This invariant must be kept: 0 < dist <= histSize()
-func (dd *dictDecoder) writeCopy(dist, length int) int {
-	dstBase := dd.wrPos
-	dstPos := dstBase
-	srcPos := dstPos - dist
-	endPos := dstPos + length
-	if endPos > len(dd.hist) {
-		endPos = len(dd.hist)
-	}
-
-	// Copy non-overlapping section after destination position.
-	//
-	// This section is non-overlapping in that the copy length for this section
-	// is always less than or equal to the backwards distance. This can occur
-	// if a distance refers to data that wraps-around in the buffer.
-	// Thus, a backwards copy is performed here; that is, the exact bytes in
-	// the source prior to the copy is placed in the destination.
-	if srcPos < 0 {
-		srcPos += len(dd.hist)
-		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
-		srcPos = 0
-	}
-
-	// Copy possibly overlapping section before destination position.
-	//
-	// This section can overlap if the copy length for this section is larger
-	// than the backwards distance. This is allowed by LZ77 so that repeated
-	// strings can be succinctly represented using (dist, length) pairs.
-	// Thus, a forwards copy is performed here; that is, the bytes copied is
-	// possibly dependent on the resulting bytes in the destination as the copy
-	// progresses along. This is functionally equivalent to the following:
-	//
-	//	for i := 0; i < endPos-dstPos; i++ {
-	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
-	//	}
-	//	dstPos = endPos
-	//
-	for dstPos < endPos {
-		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
-	}
-
-	dd.wrPos = dstPos
-	return dstPos - dstBase
-}
-
-// tryWriteCopy tries to copy a string at a given (distance, length) to the
-// output. This specialized version is optimized for short distances.
-//
-// This method is designed to be inlined for performance reasons.
-//
-// This invariant must be kept: 0 < dist <= histSize()
-func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
-	dstPos := dd.wrPos
-	endPos := dstPos + length
-	if dstPos < dist || endPos > len(dd.hist) {
-		return 0
-	}
-	dstBase := dstPos
-	srcPos := dstPos - dist
-
-	// Copy possibly overlapping section before destination position.
-	for dstPos < endPos {
-		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
-	}
-
-	dd.wrPos = dstPos
-	return dstPos - dstBase
-}
-
-// readFlush returns a slice of the historical buffer that is ready to be
-// emitted to the user. The data returned by readFlush must be fully consumed
-// before calling any other dictDecoder methods.
-func (dd *dictDecoder) readFlush() []byte {
-	toRead := dd.hist[dd.rdPos:dd.wrPos]
-	dd.rdPos = dd.wrPos
-	if dd.wrPos == len(dd.hist) {
-		dd.wrPos, dd.rdPos = 0, 0
-		dd.full = true
-	}
-	return toRead
-}
diff --git a/internal/flate/inflate.go b/internal/flate/inflate.go
deleted file mode 100644
index cfcff084..00000000
--- a/internal/flate/inflate.go
+++ /dev/null
@@ -1,839 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package flate implements the DEFLATE compressed data format, described in
-// RFC 1951.  The [compress/gzip] and [compress/zlib] packages implement access
-// to DEFLATE-based file formats.
-package flate
-
-import (
-	"bufio"
-	"io"
-	"math/bits"
-	"strconv"
-	"sync"
-)
-
-const (
-	// The special code used to mark the end of a block.
-	endBlockMarker = 256
-	maxCodeLen     = 16      // max length of Huffman code
-	maxMatchOffset = 1 << 15 // The largest match offset
-	// The next three numbers come from the RFC section 3.2.7, with the
-	// additional proviso in section 3.2.5 which implies that distance codes
-	// 30 and 31 should never occur in compressed data.
-	maxNumLit  = 286
-	maxNumDist = 30
-	numCodes   = 19 // number of codes in Huffman meta-code
-)
-
-// Initialize the fixedHuffmanDecoder only once upon first use.
-var fixedOnce sync.Once
-var fixedHuffmanDecoder huffmanDecoder
-
-// A CorruptInputError reports the presence of corrupt input at a given offset.
-type CorruptInputError int64
-
-func (e CorruptInputError) Error() string {
-	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
-}
-
-// An InternalError reports an error in the flate code itself.
-type InternalError string
-
-func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
-
-// A ReadError reports an error encountered while reading input.
-//
-// Deprecated: No longer returned.
-type ReadError struct {
-	Offset int64 // byte offset where error occurred
-	Err    error // error returned by underlying Read
-}
-
-func (e *ReadError) Error() string {
-	return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
-}
-
-// A WriteError reports an error encountered while writing output.
-//
-// Deprecated: No longer returned.
-type WriteError struct {
-	Offset int64 // byte offset where error occurred
-	Err    error // error returned by underlying Write
-}
-
-func (e *WriteError) Error() string {
-	return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
-}
-
-// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
-// to switch to a new underlying [Reader]. This permits reusing a ReadCloser
-// instead of allocating a new one.
-type Resetter interface {
-	// Reset discards any buffered data and resets the Resetter as if it was
-	// newly initialized with the given reader.
-	Reset(r io.Reader, dict []byte) error
-}
-
-// The data structure for decoding Huffman tables is based on that of
-// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
-// For codes smaller than the table width, there are multiple entries
-// (each combination of trailing bits has the same value). For codes
-// larger than the table width, the table contains a link to an overflow
-// table. The width of each entry in the link table is the maximum code
-// size minus the chunk width.
-//
-// Note that you can do a lookup in the table even without all bits
-// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
-// have the property that shorter codes come before longer ones, the
-// bit length estimate in the result is a lower bound on the actual
-// number of bits.
-//
-// See the following:
-//	https://github.com/madler/zlib/raw/master/doc/algorithm.txt
-
-// chunk & 15 is number of bits
-// chunk >> 4 is value, including table link
-
-const (
-	huffmanChunkBits  = 9
-	huffmanNumChunks  = 1 << huffmanChunkBits
-	huffmanCountMask  = 15
-	huffmanValueShift = 4
-)
-
-type huffmanDecoder struct {
-	min      int                      // the minimum code length
-	chunks   [huffmanNumChunks]uint32 // chunks as described above
-	links    [][]uint32               // overflow links
-	linkMask uint32                   // mask the width of the link table
-}
-
-// Initialize Huffman decoding tables from array of code lengths.
-// Following this function, h is guaranteed to be initialized into a complete
-// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
-// degenerate case where the tree has only a single symbol with length 1. Empty
-// trees are permitted.
-func (h *huffmanDecoder) init(lengths []int) bool {
-	// Sanity enables additional runtime tests during Huffman
-	// table construction. It's intended to be used during
-	// development to supplement the currently ad-hoc unit tests.
-	const sanity = false
-
-	if h.min != 0 {
-		*h = huffmanDecoder{}
-	}
-
-	// Count number of codes of each length,
-	// compute min and max length.
-	var count [maxCodeLen]int
-	var min, max int
-	for _, n := range lengths {
-		if n == 0 {
-			continue
-		}
-		if min == 0 || n < min {
-			min = n
-		}
-		if n > max {
-			max = n
-		}
-		count[n]++
-	}
-
-	// Empty tree. The decompressor.huffSym function will fail later if the tree
-	// is used. Technically, an empty tree is only valid for the HDIST tree and
-	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
-	// is guaranteed to fail since it will attempt to use the tree to decode the
-	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
-	// guaranteed to fail later since the compressed data section must be
-	// composed of at least one symbol (the end-of-block marker).
-	if max == 0 {
-		return true
-	}
-
-	code := 0
-	var nextcode [maxCodeLen]int
-	for i := min; i <= max; i++ {
-		code <<= 1
-		nextcode[i] = code
-		code += count[i]
-	}
-
-	// Check that the coding is complete (i.e., that we've
-	// assigned all 2-to-the-max possible bit sequences).
-	// Exception: To be compatible with zlib, we also need to
-	// accept degenerate single-code codings. See also
-	// TestDegenerateHuffmanCoding.
-	if code != 1<<uint(max) && (code != 1 || max != 1) {
-		return false
-	}
-
-	h.min = min
-	if max > huffmanChunkBits {
-		numLinks := 1 << (uint(max) - huffmanChunkBits)
-		h.linkMask = uint32(numLinks - 1)
-
-		// create link tables
-		link := nextcode[huffmanChunkBits+1] >> 1
-		h.links = make([][]uint32, huffmanNumChunks-link)
-		for j := uint(link); j < huffmanNumChunks; j++ {
-			reverse := int(bits.Reverse16(uint16(j)))
-			reverse >>= uint(16 - huffmanChunkBits)
-			off := j - uint(link)
-			if sanity && h.chunks[reverse] != 0 {
-				panic("impossible: overwriting existing chunk")
-			}
-			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
-			h.links[off] = make([]uint32, numLinks)
-		}
-	}
-
-	for i, n := range lengths {
-		if n == 0 {
-			continue
-		}
-		code := nextcode[n]
-		nextcode[n]++
-		chunk := uint32(i<<huffmanValueShift | n)
-		reverse := int(bits.Reverse16(uint16(code)))
-		reverse >>= uint(16 - n)
-		if n <= huffmanChunkBits {
-			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
-				// We should never need to overwrite
-				// an existing chunk. Also, 0 is
-				// never a valid chunk, because the
-				// lower 4 "count" bits should be
-				// between 1 and 15.
-				if sanity && h.chunks[off] != 0 {
-					panic("impossible: overwriting existing chunk")
-				}
-				h.chunks[off] = chunk
-			}
-		} else {
-			j := reverse & (huffmanNumChunks - 1)
-			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
-				// Longer codes should have been
-				// associated with a link table above.
-				panic("impossible: not an indirect chunk")
-			}
-			value := h.chunks[j] >> huffmanValueShift
-			linktab := h.links[value]
-			reverse >>= huffmanChunkBits
-			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
-				if sanity && linktab[off] != 0 {
-					panic("impossible: overwriting existing chunk")
-				}
-				linktab[off] = chunk
-			}
-		}
-	}
-
-	if sanity {
-		// Above we've sanity checked that we never overwrote
-		// an existing entry. Here we additionally check that
-		// we filled the tables completely.
-		for i, chunk := range h.chunks {
-			if chunk == 0 {
-				// As an exception, in the degenerate
-				// single-code case, we allow odd
-				// chunks to be missing.
-				if code == 1 && i%2 == 1 {
-					continue
-				}
-				panic("impossible: missing chunk")
-			}
-		}
-		for _, linktab := range h.links {
-			for _, chunk := range linktab {
-				if chunk == 0 {
-					panic("impossible: missing chunk")
-				}
-			}
-		}
-	}
-
-	return true
-}
-
-// The actual read interface needed by [NewReader].
-// If the passed in [io.Reader] does not also have ReadByte,
-// the [NewReader] will introduce its own buffering.
-type Reader interface {
-	io.Reader
-	io.ByteReader
-}
-
-// Decompress state.
-type decompressor struct {
-	// Input source.
-	r       Reader
-	rBuf    *bufio.Reader // created if provided io.Reader does not implement io.ByteReader
-	roffset int64
-
-	// Input bits, in top of b.
-	b  uint32
-	nb uint
-
-	// Huffman decoders for literal/length, distance.
-	h1, h2 huffmanDecoder
-
-	// Length arrays used to define Huffman codes.
-	bits     *[maxNumLit + maxNumDist]int
-	codebits *[numCodes]int
-
-	// Output history, buffer.
-	dict dictDecoder
-
-	// Temporary buffer (avoids repeated allocation).
-	buf [4]byte
-
-	// Next step in the decompression,
-	// and decompression state.
-	step      func(*decompressor)
-	stepState int
-	final     bool
-	err       error
-	toRead    []byte
-	hl, hd    *huffmanDecoder
-	copyLen   int
-	copyDist  int
-}
-
-func (f *decompressor) nextBlock() {
-	for f.nb < 1+2 {
-		if f.err = f.moreBits(); f.err != nil {
-			return
-		}
-	}
-	f.final = f.b&1 == 1
-	f.b >>= 1
-	typ := f.b & 3
-	f.b >>= 2
-	f.nb -= 1 + 2
-	switch typ {
-	case 0:
-		f.dataBlock()
-	case 1:
-		// compressed, fixed Huffman tables
-		f.hl = &fixedHuffmanDecoder
-		f.hd = nil
-		f.huffmanBlock()
-	case 2:
-		// compressed, dynamic Huffman tables
-		if f.err = f.readHuffman(); f.err != nil {
-			break
-		}
-		f.hl = &f.h1
-		f.hd = &f.h2
-		f.huffmanBlock()
-	default:
-		// 3 is reserved.
-		f.err = CorruptInputError(f.roffset)
-	}
-}
-
-func (f *decompressor) Read(b []byte) (int, error) {
-	for {
-		if len(f.toRead) > 0 {
-			n := copy(b, f.toRead)
-			f.toRead = f.toRead[n:]
-			if len(f.toRead) == 0 {
-				return n, f.err
-			}
-			return n, nil
-		}
-		if f.err != nil {
-			return 0, f.err
-		}
-		f.step(f)
-		if f.err != nil && len(f.toRead) == 0 {
-			f.toRead = f.dict.readFlush() // Flush what's left in case of error
-		}
-	}
-}
-
-func (f *decompressor) Close() error {
-	if f.err == io.EOF {
-		return nil
-	}
-	return f.err
-}
-
-// RFC 1951 section 3.2.7.
-// Compression with dynamic Huffman codes
-
-var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
-
-func (f *decompressor) readHuffman() error {
-	// HLIT[5], HDIST[5], HCLEN[4].
-	for f.nb < 5+5+4 {
-		if err := f.moreBits(); err != nil {
-			return err
-		}
-	}
-	nlit := int(f.b&0x1F) + 257
-	if nlit > maxNumLit {
-		return CorruptInputError(f.roffset)
-	}
-	f.b >>= 5
-	ndist := int(f.b&0x1F) + 1
-	if ndist > maxNumDist {
-		return CorruptInputError(f.roffset)
-	}
-	f.b >>= 5
-	nclen := int(f.b&0xF) + 4
-	// numCodes is 19, so nclen is always valid.
-	f.b >>= 4
-	f.nb -= 5 + 5 + 4
-
-	// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
-	for i := 0; i < nclen; i++ {
-		for f.nb < 3 {
-			if err := f.moreBits(); err != nil {
-				return err
-			}
-		}
-		f.codebits[codeOrder[i]] = int(f.b & 0x7)
-		f.b >>= 3
-		f.nb -= 3
-	}
-	for i := nclen; i < len(codeOrder); i++ {
-		f.codebits[codeOrder[i]] = 0
-	}
-	if !f.h1.init(f.codebits[0:]) {
-		return CorruptInputError(f.roffset)
-	}
-
-	// HLIT + 257 code lengths, HDIST + 1 code lengths,
-	// using the code length Huffman code.
-	for i, n := 0, nlit+ndist; i < n; {
-		x, err := f.huffSym(&f.h1)
-		if err != nil {
-			return err
-		}
-		if x < 16 {
-			// Actual length.
-			f.bits[i] = x
-			i++
-			continue
-		}
-		// Repeat previous length or zero.
-		var rep int
-		var nb uint
-		var b int
-		switch x {
-		default:
-			return InternalError("unexpected length code")
-		case 16:
-			rep = 3
-			nb = 2
-			if i == 0 {
-				return CorruptInputError(f.roffset)
-			}
-			b = f.bits[i-1]
-		case 17:
-			rep = 3
-			nb = 3
-			b = 0
-		case 18:
-			rep = 11
-			nb = 7
-			b = 0
-		}
-		for f.nb < nb {
-			if err := f.moreBits(); err != nil {
-				return err
-			}
-		}
-		rep += int(f.b & uint32(1<<nb-1))
-		f.b >>= nb
-		f.nb -= nb
-		if i+rep > n {
-			return CorruptInputError(f.roffset)
-		}
-		for j := 0; j < rep; j++ {
-			f.bits[i] = b
-			i++
-		}
-	}
-
-	if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
-		return CorruptInputError(f.roffset)
-	}
-
-	// As an optimization, we can initialize the min bits to read at a time
-	// for the HLIT tree to the length of the EOB marker since we know that
-	// every block must terminate with one. This preserves the property that
-	// we never read any extra bytes after the end of the DEFLATE stream.
-	if f.h1.min < f.bits[endBlockMarker] {
-		f.h1.min = f.bits[endBlockMarker]
-	}
-
-	return nil
-}
-
-// Decode a single Huffman block from f.
-// hl and hd are the Huffman states for the lit/length values
-// and the distance values, respectively. If hd == nil, using the
-// fixed distance encoding associated with fixed Huffman blocks.
-func (f *decompressor) huffmanBlock() {
-	const (
-		stateInit = iota // Zero value must be stateInit
-		stateDict
-	)
-
-	switch f.stepState {
-	case stateInit:
-		goto readLiteral
-	case stateDict:
-		goto copyHistory
-	}
-
-readLiteral:
-	// Read literal and/or (length, distance) according to RFC section 3.2.3.
-	{
-		v, err := f.huffSym(f.hl)
-		if err != nil {
-			f.err = err
-			return
-		}
-		var n uint // number of bits extra
-		var length int
-		switch {
-		case v < 256:
-			f.dict.writeByte(byte(v))
-			if f.dict.availWrite() == 0 {
-				f.toRead = f.dict.readFlush()
-				f.step = (*decompressor).huffmanBlock
-				f.stepState = stateInit
-				return
-			}
-			goto readLiteral
-		case v == 256:
-			f.finishBlock()
-			return
-		// otherwise, reference to older data
-		case v < 265:
-			length = v - (257 - 3)
-			n = 0
-		case v < 269:
-			length = v*2 - (265*2 - 11)
-			n = 1
-		case v < 273:
-			length = v*4 - (269*4 - 19)
-			n = 2
-		case v < 277:
-			length = v*8 - (273*8 - 35)
-			n = 3
-		case v < 281:
-			length = v*16 - (277*16 - 67)
-			n = 4
-		case v < 285:
-			length = v*32 - (281*32 - 131)
-			n = 5
-		case v < maxNumLit:
-			length = 258
-			n = 0
-		default:
-			f.err = CorruptInputError(f.roffset)
-			return
-		}
-		if n > 0 {
-			for f.nb < n {
-				if err = f.moreBits(); err != nil {
-					f.err = err
-					return
-				}
-			}
-			length += int(f.b & uint32(1<<n-1))
-			f.b >>= n
-			f.nb -= n
-		}
-
-		var dist int
-		if f.hd == nil {
-			for f.nb < 5 {
-				if err = f.moreBits(); err != nil {
-					f.err = err
-					return
-				}
-			}
-			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
-			f.b >>= 5
-			f.nb -= 5
-		} else {
-			if dist, err = f.huffSym(f.hd); err != nil {
-				f.err = err
-				return
-			}
-		}
-
-		switch {
-		case dist < 4:
-			dist++
-		case dist < maxNumDist:
-			nb := uint(dist-2) >> 1
-			// have 1 bit in bottom of dist, need nb more.
-			extra := (dist & 1) << nb
-			for f.nb < nb {
-				if err = f.moreBits(); err != nil {
-					f.err = err
-					return
-				}
-			}
-			extra |= int(f.b & uint32(1<<nb-1))
-			f.b >>= nb
-			f.nb -= nb
-			dist = 1<<(nb+1) + 1 + extra
-		default:
-			f.err = CorruptInputError(f.roffset)
-			return
-		}
-
-		// No check on length; encoding can be prescient.
-		if dist > f.dict.histSize() {
-			f.err = CorruptInputError(f.roffset)
-			return
-		}
-
-		f.copyLen, f.copyDist = length, dist
-		goto copyHistory
-	}
-
-copyHistory:
-	// Perform a backwards copy according to RFC section 3.2.3.
-	{
-		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
-		if cnt == 0 {
-			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
-		}
-		f.copyLen -= cnt
-
-		if f.dict.availWrite() == 0 || f.copyLen > 0 {
-			f.toRead = f.dict.readFlush()
-			f.step = (*decompressor).huffmanBlock // We need to continue this work
-			f.stepState = stateDict
-			return
-		}
-		goto readLiteral
-	}
-}
-
-// Copy a single uncompressed data block from input to output.
-func (f *decompressor) dataBlock() {
-	// Uncompressed.
-	// Discard current half-byte.
-	f.nb = 0
-	f.b = 0
-
-	// Length then ones-complement of length.
-	nr, err := io.ReadFull(f.r, f.buf[0:4])
-	f.roffset += int64(nr)
-	if err != nil {
-		f.err = noEOF(err)
-		return
-	}
-	n := int(f.buf[0]) | int(f.buf[1])<<8
-	nn := int(f.buf[2]) | int(f.buf[3])<<8
-	if uint16(nn) != uint16(^n) {
-		f.err = CorruptInputError(f.roffset)
-		return
-	}
-
-	if n == 0 {
-		f.toRead = f.dict.readFlush()
-		f.finishBlock()
-		return
-	}
-
-	f.copyLen = n
-	f.copyData()
-}
-
-// copyData copies f.copyLen bytes from the underlying reader into f.hist.
-// It pauses for reads when f.hist is full.
-func (f *decompressor) copyData() {
-	buf := f.dict.writeSlice()
-	if len(buf) > f.copyLen {
-		buf = buf[:f.copyLen]
-	}
-
-	cnt, err := io.ReadFull(f.r, buf)
-	f.roffset += int64(cnt)
-	f.copyLen -= cnt
-	f.dict.writeMark(cnt)
-	if err != nil {
-		f.err = noEOF(err)
-		return
-	}
-
-	if f.dict.availWrite() == 0 || f.copyLen > 0 {
-		f.toRead = f.dict.readFlush()
-		f.step = (*decompressor).copyData
-		return
-	}
-	f.finishBlock()
-}
-
-func (f *decompressor) finishBlock() {
-	if f.final {
-		if f.dict.availRead() > 0 {
-			f.toRead = f.dict.readFlush()
-		}
-		f.err = io.EOF
-	}
-	f.step = (*decompressor).nextBlock
-}
-
-// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
-func noEOF(e error) error {
-	if e == io.EOF {
-		return io.ErrUnexpectedEOF
-	}
-	return e
-}
-
-func (f *decompressor) moreBits() error {
-	c, err := f.r.ReadByte()
-	if err != nil {
-		return noEOF(err)
-	}
-	f.roffset++
-	f.b |= uint32(c) << f.nb
-	f.nb += 8
-	return nil
-}
-
-// Read the next Huffman-encoded symbol from f according to h.
-func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
-	// Since a huffmanDecoder can be empty or be composed of a degenerate tree
-	// with single element, huffSym must error on these two edge cases. In both
-	// cases, the chunks slice will be 0 for the invalid sequence, leading it
-	// satisfy the n == 0 check below.
-	n := uint(h.min)
-	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
-	// but is smart enough to keep local variables in registers, so use nb and b,
-	// inline call to moreBits and reassign b,nb back to f on return.
-	nb, b := f.nb, f.b
-	for {
-		for nb < n {
-			c, err := f.r.ReadByte()
-			if err != nil {
-				f.b = b
-				f.nb = nb
-				return 0, noEOF(err)
-			}
-			f.roffset++
-			b |= uint32(c) << (nb & 31)
-			nb += 8
-		}
-		chunk := h.chunks[b&(huffmanNumChunks-1)]
-		n = uint(chunk & huffmanCountMask)
-		if n > huffmanChunkBits {
-			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
-			n = uint(chunk & huffmanCountMask)
-		}
-		if n <= nb {
-			if n == 0 {
-				f.b = b
-				f.nb = nb
-				f.err = CorruptInputError(f.roffset)
-				return 0, f.err
-			}
-			f.b = b >> (n & 31)
-			f.nb = nb - n
-			return int(chunk >> huffmanValueShift), nil
-		}
-	}
-}
-
-func (f *decompressor) makeReader(r io.Reader) {
-	if rr, ok := r.(Reader); ok {
-		f.rBuf = nil
-		f.r = rr
-		return
-	}
-	// Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor.
-	if f.rBuf != nil {
-		f.rBuf.Reset(r)
-	} else {
-		// bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader.
-		f.rBuf = bufio.NewReader(r)
-	}
-	f.r = f.rBuf
-}
-
-func fixedHuffmanDecoderInit() {
-	fixedOnce.Do(func() {
-		// These come from the RFC section 3.2.6.
-		var bits [288]int
-		for i := 0; i < 144; i++ {
-			bits[i] = 8
-		}
-		for i := 144; i < 256; i++ {
-			bits[i] = 9
-		}
-		for i := 256; i < 280; i++ {
-			bits[i] = 7
-		}
-		for i := 280; i < 288; i++ {
-			bits[i] = 8
-		}
-		fixedHuffmanDecoder.init(bits[:])
-	})
-}
-
-func (f *decompressor) Reset(r io.Reader, dict []byte) error {
-	*f = decompressor{
-		rBuf:     f.rBuf,
-		bits:     f.bits,
-		codebits: f.codebits,
-		dict:     f.dict,
-		step:     (*decompressor).nextBlock,
-	}
-	f.makeReader(r)
-	f.dict.init(maxMatchOffset, dict)
-	return nil
-}
-
-// NewReader returns a new ReadCloser that can be used
-// to read the uncompressed version of r.
-// If r does not also implement [io.ByteReader],
-// the decompressor may read more data than necessary from r.
-// The reader returns [io.EOF] after the final block in the DEFLATE stream has
-// been encountered. Any trailing data after the final block is ignored.
-//
-// The [io.ReadCloser] returned by NewReader also implements [Resetter].
-func NewReader(r io.Reader) io.ReadCloser {
-	fixedHuffmanDecoderInit()
-
-	var f decompressor
-	f.makeReader(r)
-	f.bits = new([maxNumLit + maxNumDist]int)
-	f.codebits = new([numCodes]int)
-	f.step = (*decompressor).nextBlock
-	f.dict.init(maxMatchOffset, nil)
-	return &f
-}
-
-// NewReaderDict is like [NewReader] but initializes the reader
-// with a preset dictionary. The returned reader behaves as if
-// the uncompressed data stream started with the given dictionary,
-// which has already been read. NewReaderDict is typically used
-// to read data compressed by [NewWriterDict].
-//
-// The ReadCloser returned by NewReaderDict also implements [Resetter].
-func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
-	fixedHuffmanDecoderInit()
-
-	var f decompressor
-	f.makeReader(r)
-	f.bits = new([maxNumLit + maxNumDist]int)
-	f.codebits = new([numCodes]int)
-	f.step = (*decompressor).nextBlock
-	f.dict.init(maxMatchOffset, dict)
-	return &f
-}
diff --git a/internal/flate/slice_inflate.go b/internal/flate/slice_inflate.go
deleted file mode 100644
index 0df4ed45..00000000
--- a/internal/flate/slice_inflate.go
+++ /dev/null
@@ -1,479 +0,0 @@
-package flate
-
-import (
-	"io"
-	"math/bits"
-)
-
-// sliceInflater is a specialized DEFLATE decoder that reads directly from an
-// in-memory byte slice. It mirrors the main decompressor but avoids the
-// overhead of the Reader interfaces, enabling faster byte-slice decoding.
-type sliceInflater struct {
-	input   []byte
-	pos     int
-	roffset int64
-
-	b  uint32
-	nb uint
-
-	h1, h2 huffmanDecoder
-
-	bits     *[maxNumLit + maxNumDist]int
-	codebits *[numCodes]int
-
-	dict dictDecoder
-
-	toRead    []byte
-	step      func(*sliceInflater)
-	stepState int
-	final     bool
-	err       error
-	hl, hd    *huffmanDecoder
-	copyLen   int
-	copyDist  int
-}
-
-func (f *sliceInflater) reset(src []byte, dict []byte) error {
-	bits := f.bits
-	codebits := f.codebits
-	dictState := f.dict
-	*f = sliceInflater{
-		input:    src,
-		bits:     bits,
-		codebits: codebits,
-		dict:     dictState,
-		step:     (*sliceInflater).nextBlock,
-	}
-	f.dict.init(maxMatchOffset, dict)
-	return nil
-}
-
-func (f *sliceInflater) readByte() (byte, error) {
-	if f.pos >= len(f.input) {
-		return 0, io.ErrUnexpectedEOF
-	}
-	b := f.input[f.pos]
-	f.pos++
-	f.roffset++
-	return b, nil
-}
-
-func (f *sliceInflater) readBytes(n int) ([]byte, error) {
-	if n < 0 || f.pos+n > len(f.input) {
-		f.pos = len(f.input)
-		return nil, io.ErrUnexpectedEOF
-	}
-	s := f.input[f.pos : f.pos+n]
-	f.pos += n
-	f.roffset += int64(n)
-	return s, nil
-}
-
-func (f *sliceInflater) nextBlock() {
-	for f.nb < 1+2 {
-		if err := f.moreBits(); err != nil {
-			f.err = err
-			return
-		}
-	}
-	f.final = f.b&1 == 1
-	f.b >>= 1
-	typ := f.b & 3
-	f.b >>= 2
-	f.nb -= 1 + 2
-	switch typ {
-	case 0:
-		f.dataBlock()
-	case 1:
-		f.hl = &fixedHuffmanDecoder
-		f.hd = nil
-		f.huffmanBlock()
-	case 2:
-		if err := f.readHuffman(); err != nil {
-			f.err = err
-			return
-		}
-		f.hl = &f.h1
-		f.hd = &f.h2
-		f.huffmanBlock()
-	default:
-		f.err = CorruptInputError(f.roffset)
-	}
-}
-
-func (f *sliceInflater) huffmanBlock() {
-	const (
-		stateInit = iota
-		stateDict
-	)
-	switch f.stepState {
-	case stateInit:
-		goto readLiteral
-	case stateDict:
-		goto copyHistory
-	}
-
-readLiteral:
-	{
-		v, err := f.huffSym(f.hl)
-		if err != nil {
-			f.err = err
-			return
-		}
-		var n uint
-		var length int
-		switch {
-		case v < 256:
-			f.dict.writeByte(byte(v))
-			if f.dict.availWrite() == 0 {
-				f.toRead = f.dict.readFlush()
-				f.step = (*sliceInflater).huffmanBlock
-				f.stepState = stateInit
-				return
-			}
-			goto readLiteral
-		case v == 256:
-			f.finishBlock()
-			return
-		case v < 265:
-			length = v - (257 - 3)
-			n = 0
-		case v < 269:
-			length = v*2 - (265*2 - 11)
-			n = 1
-		case v < 273:
-			length = v*4 - (269*4 - 19)
-			n = 2
-		case v < 277:
-			length = v*8 - (273*8 - 35)
-			n = 3
-		case v < 281:
-			length = v*16 - (277*16 - 67)
-			n = 4
-		case v < 285:
-			length = v*32 - (281*32 - 131)
-			n = 5
-		case v < maxNumLit:
-			length = 258
-			n = 0
-		default:
-			f.err = CorruptInputError(f.roffset)
-			return
-		}
-		if n > 0 {
-			for f.nb < n {
-				if err = f.moreBits(); err != nil {
-					f.err = err
-					return
-				}
-			}
-			length += int(f.b & uint32(1<<n-1))
-			f.b >>= n
-			f.nb -= n
-		}
-
-		var dist int
-		if f.hd == nil {
-			for f.nb < 5 {
-				if err = f.moreBits(); err != nil {
-					f.err = err
-					return
-				}
-			}
-			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
-			f.b >>= 5
-			f.nb -= 5
-		} else {
-			if dist, err = f.huffSym(f.hd); err != nil {
-				f.err = err
-				return
-			}
-		}
-
-		switch {
-		case dist < 4:
-			dist++
-		case dist < maxNumDist:
-			nb := uint(dist-2) >> 1
-			extra := (dist & 1) << nb
-			for f.nb < nb {
-				if err = f.moreBits(); err != nil {
-					f.err = err
-					return
-				}
-			}
-			extra |= int(f.b & uint32(1<<nb-1))
-			f.b >>= nb
-			f.nb -= nb
-			dist = 1<<(nb+1) + 1 + extra
-		default:
-			f.err = CorruptInputError(f.roffset)
-			return
-		}
-
-		if dist > f.dict.histSize() {
-			f.err = CorruptInputError(f.roffset)
-			return
-		}
-
-		f.copyLen, f.copyDist = length, dist
-		goto copyHistory
-	}
-
-copyHistory:
-	{
-		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
-		if cnt == 0 {
-			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
-		}
-		f.copyLen -= cnt
-
-		if f.dict.availWrite() == 0 || f.copyLen > 0 {
-			f.toRead = f.dict.readFlush()
-			f.step = (*sliceInflater).huffmanBlock
-			f.stepState = stateDict
-			return
-		}
-		goto readLiteral
-	}
-}
-
-func (f *sliceInflater) dataBlock() {
-	f.nb = 0
-	f.b = 0
-
-	hdr, err := f.readBytes(4)
-	if err != nil {
-		f.err = err
-		return
-	}
-	n := int(hdr[0]) | int(hdr[1])<<8
-	nn := int(hdr[2]) | int(hdr[3])<<8
-	if uint16(nn) != uint16(^n) {
-		f.err = CorruptInputError(f.roffset)
-		return
-	}
-
-	if n == 0 {
-		f.toRead = f.dict.readFlush()
-		f.finishBlock()
-		return
-	}
-
-	f.copyLen = n
-	f.copyData()
-}
-
-func (f *sliceInflater) copyData() {
-	for {
-		if f.copyLen == 0 {
-			f.finishBlock()
-			return
-		}
-		buf := f.dict.writeSlice()
-		if len(buf) == 0 {
-			f.toRead = f.dict.readFlush()
-			f.step = (*sliceInflater).copyData
-			return
-		}
-		n := f.copyLen
-		if n > len(buf) {
-			n = len(buf)
-		}
-		if f.pos+n > len(f.input) {
-			f.err = io.ErrUnexpectedEOF
-			return
-		}
-		copy(buf[:n], f.input[f.pos:f.pos+n])
-		f.pos += n
-		f.roffset += int64(n)
-		f.copyLen -= n
-		f.dict.writeMark(n)
-		if f.dict.availWrite() == 0 {
-			f.toRead = f.dict.readFlush()
-			f.step = (*sliceInflater).copyData
-			return
-		}
-	}
-}
-
-func (f *sliceInflater) finishBlock() {
-	if f.final {
-		if f.dict.availRead() > 0 {
-			f.toRead = f.dict.readFlush()
-		}
-		f.err = io.EOF
-	}
-	f.step = (*sliceInflater).nextBlock
-	f.stepState = 0
-}
-
-func (f *sliceInflater) moreBits() error {
-	c, err := f.readByte()
-	if err != nil {
-		return err
-	}
-	f.b |= uint32(c) << (f.nb & 31)
-	f.nb += 8
-	return nil
-}
-
-func (f *sliceInflater) huffSym(h *huffmanDecoder) (int, error) {
-	n := uint(h.min)
-	nb, b := f.nb, f.b
-	for {
-		for nb < n {
-			c, err := f.readByte()
-			if err != nil {
-				f.b = b
-				f.nb = nb
-				return 0, err
-			}
-			b |= uint32(c) << (nb & 31)
-			nb += 8
-		}
-		chunk := h.chunks[b&(huffmanNumChunks-1)]
-		n = uint(chunk & huffmanCountMask)
-		if n > huffmanChunkBits {
-			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
-			n = uint(chunk & huffmanCountMask)
-		}
-		if n <= nb {
-			if n == 0 {
-				f.b = b
-				f.nb = nb
-				f.err = CorruptInputError(f.roffset)
-				return 0, f.err
-			}
-			f.b = b >> (n & 31)
-			f.nb = nb - n
-			return int(chunk >> huffmanValueShift), nil
-		}
-	}
-}
-
-func (f *sliceInflater) readHuffman() error {
-	for f.nb < 5+5+4 {
-		if err := f.moreBits(); err != nil {
-			return err
-		}
-	}
-	nlit := int(f.b&0x1F) + 257
-	if nlit > maxNumLit {
-		return CorruptInputError(f.roffset)
-	}
-	f.b >>= 5
-	ndist := int(f.b&0x1F) + 1
-	if ndist > maxNumDist {
-		return CorruptInputError(f.roffset)
-	}
-	f.b >>= 5
-	nclen := int(f.b&0xF) + 4
-	f.b >>= 4
-	f.nb -= 5 + 5 + 4
-	codebits := f.codebits[:]
-	bits := f.bits[:]
-	for i := range codebits {
-		codebits[i] = 0
-	}
-	for i := range bits {
-		bits[i] = 0
-	}
-	for i := 0; i < nclen; i++ {
-		for f.nb < 3 {
-			if err := f.moreBits(); err != nil {
-				return err
-			}
-		}
-		codebits[codeOrder[i]] = int(f.b & 0x7)
-		f.b >>= 3
-		f.nb -= 3
-	}
-	if !f.h1.init(codebits) {
-		return CorruptInputError(f.roffset)
-	}
-	for i := range bits {
-		bits[i] = 0
-	}
-	i := 0
-	for i < nlit+ndist {
-		x, err := f.huffSym(&f.h1)
-		if err != nil {
-			return err
-		}
-		switch {
-		case x < 16:
-			bits[i] = x
-			i++
-		case x == 16:
-			if i == 0 {
-				return CorruptInputError(f.roffset)
-			}
-			repeat := 3
-			for f.nb < 2 {
-				if err := f.moreBits(); err != nil {
-					return err
-				}
-			}
-			repeat += int(f.b & 0x3)
-			f.b >>= 2
-			f.nb -= 2
-			for repeat > 0 {
-				if i >= len(bits) {
-					return CorruptInputError(f.roffset)
-				}
-				bits[i] = bits[i-1]
-				i++
-				repeat--
-			}
-		case x == 17:
-			repeat := 3
-			for f.nb < 3 {
-				if err := f.moreBits(); err != nil {
-					return err
-				}
-			}
-			repeat += int(f.b & 0x7)
-			f.b >>= 3
-			f.nb -= 3
-			for repeat > 0 {
-				if i >= len(bits) {
-					return CorruptInputError(f.roffset)
-				}
-				bits[i] = 0
-				i++
-				repeat--
-			}
-		case x == 18:
-			repeat := 11
-			for f.nb < 7 {
-				if err := f.moreBits(); err != nil {
-					return err
-				}
-			}
-			repeat += int(f.b & 0x7F)
-			f.b >>= 7
-			f.nb -= 7
-			for repeat > 0 {
-				if i >= len(bits) {
-					return CorruptInputError(f.roffset)
-				}
-				bits[i] = 0
-				i++
-				repeat--
-			}
-		default:
-			return CorruptInputError(f.roffset)
-		}
-	}
-	if !f.h1.init(bits[:nlit]) {
-		return CorruptInputError(f.roffset)
-	}
-	if !f.h2.init(bits[nlit : nlit+ndist]) {
-		return CorruptInputError(f.roffset)
-	}
-	if f.h1.min < bits[endBlockMarker] {
-		f.h1.min = bits[endBlockMarker]
-	}
-	return nil
-}
diff --git a/internal/flatex/LICENSE b/internal/flatex/LICENSE
new file mode 100644
index 00000000..2a7cf70d
--- /dev/null
+++ b/internal/flatex/LICENSE
@@ -0,0 +1,27 @@
+Copyright 2009 The Go Authors.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google LLC nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/internal/flatex/decompress_bytes.go b/internal/flatex/decompress_bytes.go
new file mode 100644
index 00000000..5a660b0a
--- /dev/null
+++ b/internal/flatex/decompress_bytes.go
@@ -0,0 +1,64 @@
+package flatex
+
+import (
+	"io"
+	"sync"
+
+	"git.sr.ht/~runxiyu/furgit/internal/bufpool"
+)
+
+// bufferDecompressor wraps the custom slice inflater so byte-slice
+// decompressions avoid repeated allocations.
+type bufferDecompressor struct {
+	inflater sliceInflater
+}
+
+var bufferDecompressorPool = sync.Pool{
+	New: func() any {
+		fixedHuffmanDecoderInit()
+		d := &bufferDecompressor{}
+		d.inflater.bits = new([maxNumLit + maxNumDist]int)
+		d.inflater.codebits = new([numCodes]int)
+		return d
+	},
+}
+
+// Decompress inflates the provided DEFLATE stream and returns the full output
+// in a pooled bufpool.Buffer along with the number of consumed bytes from src.
+func Decompress(src []byte) (bufpool.Buffer, int, error) {
+	return DecompressDict(src, nil)
+}
+
+// DecompressDict inflates the provided DEFLATE stream using dict as the preset
+// dictionary and returns the full output in a pooled bufpool.Buffer. The second
+// returned value reports how many bytes of src were consumed.
+func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, int, error) {
+	d := bufferDecompressorPool.Get().(*bufferDecompressor)
+	defer bufferDecompressorPool.Put(d)
+
+	if err := d.inflater.reset(src, dict); err != nil {
+		return bufpool.Buffer{}, 0, err
+	}
+
+	out := bufpool.Borrow(bufpool.DefaultBufferCap)
+	out.Resize(0)
+
+	for {
+		if len(d.inflater.toRead) > 0 {
+			out.Append(d.inflater.toRead)
+			d.inflater.toRead = nil
+			continue
+		}
+		if d.inflater.err != nil {
+			if d.inflater.err == io.EOF {
+				return out, d.inflater.pos, nil
+			}
+			out.Release()
+			return bufpool.Buffer{}, 0, d.inflater.err
+		}
+		d.inflater.step(&d.inflater)
+		if d.inflater.err != nil && len(d.inflater.toRead) == 0 {
+			d.inflater.toRead = d.inflater.dict.readFlush()
+		}
+	}
+}
diff --git a/internal/flatex/decompress_test.go b/internal/flatex/decompress_test.go
new file mode 100644
index 00000000..afb84292
--- /dev/null
+++ b/internal/flatex/decompress_test.go
@@ -0,0 +1,76 @@
+package flatex
+
+import (
+	"bytes"
+	stdflate "compress/flate"
+	"testing"
+)
+
+func compressDeflate(t *testing.T, payload, dict []byte) []byte {
+	t.Helper()
+	var buf bytes.Buffer
+	var (
+		w   *stdflate.Writer
+		err error
+	)
+	if dict != nil {
+		w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict)
+	} else {
+		w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression)
+	}
+	if err != nil {
+		t.Fatalf("NewWriter: %v", err)
+	}
+	if _, err := w.Write(payload); err != nil {
+		t.Fatalf("Write: %v", err)
+	}
+	if err := w.Close(); err != nil {
+		t.Fatalf("Close: %v", err)
+	}
+	return buf.Bytes()
+}
+
+func TestDecompress(t *testing.T) {
+	payload := bytes.Repeat([]byte("golang"), 32)
+	compressed := compressDeflate(t, payload, nil)
+
+	out, _, err := Decompress(compressed)
+	if err != nil {
+		t.Fatalf("Decompress: %v", err)
+	}
+	defer out.Release()
+
+	if !bytes.Equal(out.Bytes(), payload) {
+		t.Fatalf("unexpected payload: got %q", out.Bytes())
+	}
+}
+
+func TestDecompressDict(t *testing.T) {
+	dict := []byte("furgit dictionary payload")
+	payload := append([]byte(nil), dict...)
+	payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...)
+
+	compressed := compressDeflate(t, payload, dict)
+
+	out, _, err := DecompressDict(compressed, dict)
+	if err != nil {
+		t.Fatalf("DecompressDict: %v", err)
+	}
+	defer out.Release()
+
+	if !bytes.Equal(out.Bytes(), payload) {
+		t.Fatalf("unexpected payload: got %q", out.Bytes())
+	}
+}
+
+func TestDecompressDictMissing(t *testing.T) {
+	dict := []byte("shared prefix to enforce dictionary usage")
+	payload := append([]byte(nil), dict...)
+	payload = append(payload, []byte(" trailing data to force reference")...)
+
+	compressed := compressDeflate(t, payload, dict)
+
+	if _, _, err := Decompress(compressed); err == nil {
+		t.Fatalf("expected error when dictionary missing")
+	}
+}
diff --git a/internal/flatex/dict_decoder.go b/internal/flatex/dict_decoder.go
new file mode 100644
index 00000000..7a81e640
--- /dev/null
+++ b/internal/flatex/dict_decoder.go
@@ -0,0 +1,182 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package flatex
+
+// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
+// LZ77 decompresses data through sequences of two forms of commands:
+//
+//   - Literal insertions: Runs of one or more symbols are inserted into the data
+//     stream as is. This is accomplished through the writeByte method for a
+//     single symbol, or combinations of writeSlice/writeMark for multiple symbols.
+//     Any valid stream must start with a literal insertion if no preset dictionary
+//     is used.
+//
+//   - Backward copies: Runs of one or more symbols are copied from previously
+//     emitted data. Backward copies come as the tuple (dist, length) where dist
+//     determines how far back in the stream to copy from and length determines how
+//     many bytes to copy. Note that it is valid for the length to be greater than
+//     the distance. Since LZ77 uses forward copies, that situation is used to
+//     perform a form of run-length encoding on repeated runs of symbols.
+//     The writeCopy and tryWriteCopy are used to implement this command.
+//
+// For performance reasons, this implementation performs little to no sanity
+// checks about the arguments. As such, the invariants documented for each
+// method call must be respected.
+type dictDecoder struct {
+	hist []byte // Sliding window history
+
+	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
+	wrPos int  // Current output position in buffer
+	rdPos int  // Have emitted hist[:rdPos] already
+	full  bool // Has a full window length been written yet?
+}
+
+// init initializes dictDecoder to have a sliding window dictionary of the given
+// size. If a preset dict is provided, it will initialize the dictionary with
+// the contents of dict.
+func (dd *dictDecoder) init(size int, dict []byte) {
+	*dd = dictDecoder{hist: dd.hist}
+
+	if cap(dd.hist) < size {
+		dd.hist = make([]byte, size)
+	}
+	dd.hist = dd.hist[:size]
+
+	if len(dict) > len(dd.hist) {
+		dict = dict[len(dict)-len(dd.hist):]
+	}
+	dd.wrPos = copy(dd.hist, dict)
+	if dd.wrPos == len(dd.hist) {
+		dd.wrPos = 0
+		dd.full = true
+	}
+	dd.rdPos = dd.wrPos
+}
+
+// histSize reports the total amount of historical data in the dictionary.
+func (dd *dictDecoder) histSize() int {
+	if dd.full {
+		return len(dd.hist)
+	}
+	return dd.wrPos
+}
+
+// availRead reports the number of bytes that can be flushed by readFlush.
+func (dd *dictDecoder) availRead() int {
+	return dd.wrPos - dd.rdPos
+}
+
+// availWrite reports the available amount of output buffer space.
+func (dd *dictDecoder) availWrite() int {
+	return len(dd.hist) - dd.wrPos
+}
+
+// writeSlice returns a slice of the available buffer to write data to.
+//
+// This invariant will be kept: len(s) <= availWrite()
+func (dd *dictDecoder) writeSlice() []byte {
+	return dd.hist[dd.wrPos:]
+}
+
+// writeMark advances the writer pointer by cnt.
+//
+// This invariant must be kept: 0 <= cnt <= availWrite()
+func (dd *dictDecoder) writeMark(cnt int) {
+	dd.wrPos += cnt
+}
+
+// writeByte writes a single byte to the dictionary.
+//
+// This invariant must be kept: 0 < availWrite()
+func (dd *dictDecoder) writeByte(c byte) {
+	dd.hist[dd.wrPos] = c
+	dd.wrPos++
+}
+
+// writeCopy copies a string at a given (dist, length) to the output.
+// This returns the number of bytes copied and may be less than the requested
+// length if the available space in the output buffer is too small.
+//
+// This invariant must be kept: 0 < dist <= histSize()
+func (dd *dictDecoder) writeCopy(dist, length int) int {
+	dstBase := dd.wrPos
+	dstPos := dstBase
+	srcPos := dstPos - dist
+	endPos := dstPos + length
+	if endPos > len(dd.hist) {
+		endPos = len(dd.hist)
+	}
+
+	// Copy non-overlapping section after destination position.
+	//
+	// This section is non-overlapping in that the copy length for this section
+	// is always less than or equal to the backwards distance. This can occur
+	// if a distance refers to data that wraps-around in the buffer.
+	// Thus, a backwards copy is performed here; that is, the exact bytes in
+	// the source prior to the copy is placed in the destination.
+	if srcPos < 0 {
+		srcPos += len(dd.hist)
+		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
+		srcPos = 0
+	}
+
+	// Copy possibly overlapping section before destination position.
+	//
+	// This section can overlap if the copy length for this section is larger
+	// than the backwards distance. This is allowed by LZ77 so that repeated
+	// strings can be succinctly represented using (dist, length) pairs.
+	// Thus, a forwards copy is performed here; that is, the bytes copied is
+	// possibly dependent on the resulting bytes in the destination as the copy
+	// progresses along. This is functionally equivalent to the following:
+	//
+	//	for i := 0; i < endPos-dstPos; i++ {
+	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
+	//	}
+	//	dstPos = endPos
+	//
+	for dstPos < endPos {
+		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
+	}
+
+	dd.wrPos = dstPos
+	return dstPos - dstBase
+}
+
+// tryWriteCopy tries to copy a string at a given (distance, length) to the
+// output. This specialized version is optimized for short distances.
+//
+// This method is designed to be inlined for performance reasons.
+//
+// This invariant must be kept: 0 < dist <= histSize()
+func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
+	dstPos := dd.wrPos
+	endPos := dstPos + length
+	if dstPos < dist || endPos > len(dd.hist) {
+		return 0
+	}
+	dstBase := dstPos
+	srcPos := dstPos - dist
+
+	// Copy possibly overlapping section before destination position.
+	for dstPos < endPos {
+		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
+	}
+
+	dd.wrPos = dstPos
+	return dstPos - dstBase
+}
+
+// readFlush returns a slice of the historical buffer that is ready to be
+// emitted to the user. The data returned by readFlush must be fully consumed
+// before calling any other dictDecoder methods.
+func (dd *dictDecoder) readFlush() []byte {
+	toRead := dd.hist[dd.rdPos:dd.wrPos]
+	dd.rdPos = dd.wrPos
+	if dd.wrPos == len(dd.hist) {
+		dd.wrPos, dd.rdPos = 0, 0
+		dd.full = true
+	}
+	return toRead
+}
diff --git a/internal/flatex/inflate.go b/internal/flatex/inflate.go
new file mode 100644
index 00000000..5e9d861e
--- /dev/null
+++ b/internal/flatex/inflate.go
@@ -0,0 +1,839 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package flatex implements the DEFLATE compressed data format, described in
+// RFC 1951.  The [compress/gzip] and [compress/zlib] packages implement access
+// to DEFLATE-based file formats.
+package flatex
+
+import (
+	"bufio"
+	"io"
+	"math/bits"
+	"strconv"
+	"sync"
+)
+
+const (
+	// The special code used to mark the end of a block.
+	endBlockMarker = 256
+	maxCodeLen     = 16      // max length of Huffman code
+	maxMatchOffset = 1 << 15 // The largest match offset
+	// The next three numbers come from the RFC section 3.2.7, with the
+	// additional proviso in section 3.2.5 which implies that distance codes
+	// 30 and 31 should never occur in compressed data.
+	maxNumLit  = 286
+	maxNumDist = 30
+	numCodes   = 19 // number of codes in Huffman meta-code
+)
+
+// Initialize the fixedHuffmanDecoder only once upon first use.
+var fixedOnce sync.Once
+var fixedHuffmanDecoder huffmanDecoder
+
+// A CorruptInputError reports the presence of corrupt input at a given offset.
+type CorruptInputError int64
+
+func (e CorruptInputError) Error() string {
+	return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
+}
+
+// An InternalError reports an error in the flate code itself.
+type InternalError string
+
+func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
+
+// A ReadError reports an error encountered while reading input.
+//
+// Deprecated: No longer returned.
+type ReadError struct {
+	Offset int64 // byte offset where error occurred
+	Err    error // error returned by underlying Read
+}
+
+func (e *ReadError) Error() string {
+	return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
+}
+
+// A WriteError reports an error encountered while writing output.
+//
+// Deprecated: No longer returned.
+type WriteError struct {
+	Offset int64 // byte offset where error occurred
+	Err    error // error returned by underlying Write
+}
+
+func (e *WriteError) Error() string {
+	return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
+}
+
+// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
+// to switch to a new underlying [Reader]. This permits reusing a ReadCloser
+// instead of allocating a new one.
+type Resetter interface {
+	// Reset discards any buffered data and resets the Resetter as if it was
+	// newly initialized with the given reader.
+	Reset(r io.Reader, dict []byte) error
+}
+
+// The data structure for decoding Huffman tables is based on that of
+// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
+// For codes smaller than the table width, there are multiple entries
+// (each combination of trailing bits has the same value). For codes
+// larger than the table width, the table contains a link to an overflow
+// table. The width of each entry in the link table is the maximum code
+// size minus the chunk width.
+//
+// Note that you can do a lookup in the table even without all bits
+// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
+// have the property that shorter codes come before longer ones, the
+// bit length estimate in the result is a lower bound on the actual
+// number of bits.
+//
+// See the following:
+//	https://github.com/madler/zlib/raw/master/doc/algorithm.txt
+
+// chunk & 15 is number of bits
+// chunk >> 4 is value, including table link
+
+const (
+	huffmanChunkBits  = 9
+	huffmanNumChunks  = 1 << huffmanChunkBits
+	huffmanCountMask  = 15
+	huffmanValueShift = 4
+)
+
+type huffmanDecoder struct {
+	min      int                      // the minimum code length
+	chunks   [huffmanNumChunks]uint32 // chunks as described above
+	links    [][]uint32               // overflow links
+	linkMask uint32                   // mask the width of the link table
+}
+
+// Initialize Huffman decoding tables from array of code lengths.
+// Following this function, h is guaranteed to be initialized into a complete
+// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
+// degenerate case where the tree has only a single symbol with length 1. Empty
+// trees are permitted.
+func (h *huffmanDecoder) init(lengths []int) bool {
+	// Sanity enables additional runtime tests during Huffman
+	// table construction. It's intended to be used during
+	// development to supplement the currently ad-hoc unit tests.
+	const sanity = false
+
+	if h.min != 0 {
+		*h = huffmanDecoder{}
+	}
+
+	// Count number of codes of each length,
+	// compute min and max length.
+	var count [maxCodeLen]int
+	var min, max int
+	for _, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		if min == 0 || n < min {
+			min = n
+		}
+		if n > max {
+			max = n
+		}
+		count[n]++
+	}
+
+	// Empty tree. The decompressor.huffSym function will fail later if the tree
+	// is used. Technically, an empty tree is only valid for the HDIST tree and
+	// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
+	// is guaranteed to fail since it will attempt to use the tree to decode the
+	// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
+	// guaranteed to fail later since the compressed data section must be
+	// composed of at least one symbol (the end-of-block marker).
+	if max == 0 {
+		return true
+	}
+
+	code := 0
+	var nextcode [maxCodeLen]int
+	for i := min; i <= max; i++ {
+		code <<= 1
+		nextcode[i] = code
+		code += count[i]
+	}
+
+	// Check that the coding is complete (i.e., that we've
+	// assigned all 2-to-the-max possible bit sequences).
+	// Exception: To be compatible with zlib, we also need to
+	// accept degenerate single-code codings. See also
+	// TestDegenerateHuffmanCoding.
+	if code != 1<<uint(max) && (code != 1 || max != 1) {
+		return false
+	}
+
+	h.min = min
+	if max > huffmanChunkBits {
+		numLinks := 1 << (uint(max) - huffmanChunkBits)
+		h.linkMask = uint32(numLinks - 1)
+
+		// create link tables
+		link := nextcode[huffmanChunkBits+1] >> 1
+		h.links = make([][]uint32, huffmanNumChunks-link)
+		for j := uint(link); j < huffmanNumChunks; j++ {
+			reverse := int(bits.Reverse16(uint16(j)))
+			reverse >>= uint(16 - huffmanChunkBits)
+			off := j - uint(link)
+			if sanity && h.chunks[reverse] != 0 {
+				panic("impossible: overwriting existing chunk")
+			}
+			h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
+			h.links[off] = make([]uint32, numLinks)
+		}
+	}
+
+	for i, n := range lengths {
+		if n == 0 {
+			continue
+		}
+		code := nextcode[n]
+		nextcode[n]++
+		chunk := uint32(i<<huffmanValueShift | n)
+		reverse := int(bits.Reverse16(uint16(code)))
+		reverse >>= uint(16 - n)
+		if n <= huffmanChunkBits {
+			for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
+				// We should never need to overwrite
+				// an existing chunk. Also, 0 is
+				// never a valid chunk, because the
+				// lower 4 "count" bits should be
+				// between 1 and 15.
+				if sanity && h.chunks[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				h.chunks[off] = chunk
+			}
+		} else {
+			j := reverse & (huffmanNumChunks - 1)
+			if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
+				// Longer codes should have been
+				// associated with a link table above.
+				panic("impossible: not an indirect chunk")
+			}
+			value := h.chunks[j] >> huffmanValueShift
+			linktab := h.links[value]
+			reverse >>= huffmanChunkBits
+			for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
+				if sanity && linktab[off] != 0 {
+					panic("impossible: overwriting existing chunk")
+				}
+				linktab[off] = chunk
+			}
+		}
+	}
+
+	if sanity {
+		// Above we've sanity checked that we never overwrote
+		// an existing entry. Here we additionally check that
+		// we filled the tables completely.
+		for i, chunk := range h.chunks {
+			if chunk == 0 {
+				// As an exception, in the degenerate
+				// single-code case, we allow odd
+				// chunks to be missing.
+				if code == 1 && i%2 == 1 {
+					continue
+				}
+				panic("impossible: missing chunk")
+			}
+		}
+		for _, linktab := range h.links {
+			for _, chunk := range linktab {
+				if chunk == 0 {
+					panic("impossible: missing chunk")
+				}
+			}
+		}
+	}
+
+	return true
+}
+
+// The actual read interface needed by [NewReader].
+// If the passed in [io.Reader] does not also have ReadByte,
+// the [NewReader] will introduce its own buffering.
+type Reader interface {
+	io.Reader
+	io.ByteReader
+}
+
+// Decompress state.
+type decompressor struct {
+	// Input source.
+	r       Reader
+	rBuf    *bufio.Reader // created if provided io.Reader does not implement io.ByteReader
+	roffset int64
+
+	// Input bits, in top of b.
+	b  uint32
+	nb uint
+
+	// Huffman decoders for literal/length, distance.
+	h1, h2 huffmanDecoder
+
+	// Length arrays used to define Huffman codes.
+	bits     *[maxNumLit + maxNumDist]int
+	codebits *[numCodes]int
+
+	// Output history, buffer.
+	dict dictDecoder
+
+	// Temporary buffer (avoids repeated allocation).
+	buf [4]byte
+
+	// Next step in the decompression,
+	// and decompression state.
+	step      func(*decompressor)
+	stepState int
+	final     bool
+	err       error
+	toRead    []byte
+	hl, hd    *huffmanDecoder
+	copyLen   int
+	copyDist  int
+}
+
+func (f *decompressor) nextBlock() {
+	for f.nb < 1+2 {
+		if f.err = f.moreBits(); f.err != nil {
+			return
+		}
+	}
+	f.final = f.b&1 == 1
+	f.b >>= 1
+	typ := f.b & 3
+	f.b >>= 2
+	f.nb -= 1 + 2
+	switch typ {
+	case 0:
+		f.dataBlock()
+	case 1:
+		// compressed, fixed Huffman tables
+		f.hl = &fixedHuffmanDecoder
+		f.hd = nil
+		f.huffmanBlock()
+	case 2:
+		// compressed, dynamic Huffman tables
+		if f.err = f.readHuffman(); f.err != nil {
+			break
+		}
+		f.hl = &f.h1
+		f.hd = &f.h2
+		f.huffmanBlock()
+	default:
+		// 3 is reserved.
+		f.err = CorruptInputError(f.roffset)
+	}
+}
+
+func (f *decompressor) Read(b []byte) (int, error) {
+	for {
+		if len(f.toRead) > 0 {
+			n := copy(b, f.toRead)
+			f.toRead = f.toRead[n:]
+			if len(f.toRead) == 0 {
+				return n, f.err
+			}
+			return n, nil
+		}
+		if f.err != nil {
+			return 0, f.err
+		}
+		f.step(f)
+		if f.err != nil && len(f.toRead) == 0 {
+			f.toRead = f.dict.readFlush() // Flush what's left in case of error
+		}
+	}
+}
+
+func (f *decompressor) Close() error {
+	if f.err == io.EOF {
+		return nil
+	}
+	return f.err
+}
+
+// RFC 1951 section 3.2.7.
+// Compression with dynamic Huffman codes
+
+var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
+
+func (f *decompressor) readHuffman() error {
+	// HLIT[5], HDIST[5], HCLEN[4].
+	for f.nb < 5+5+4 {
+		if err := f.moreBits(); err != nil {
+			return err
+		}
+	}
+	nlit := int(f.b&0x1F) + 257
+	if nlit > maxNumLit {
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	ndist := int(f.b&0x1F) + 1
+	if ndist > maxNumDist {
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	nclen := int(f.b&0xF) + 4
+	// numCodes is 19, so nclen is always valid.
+	f.b >>= 4
+	f.nb -= 5 + 5 + 4
+
+	// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
+	for i := 0; i < nclen; i++ {
+		for f.nb < 3 {
+			if err := f.moreBits(); err != nil {
+				return err
+			}
+		}
+		f.codebits[codeOrder[i]] = int(f.b & 0x7)
+		f.b >>= 3
+		f.nb -= 3
+	}
+	for i := nclen; i < len(codeOrder); i++ {
+		f.codebits[codeOrder[i]] = 0
+	}
+	if !f.h1.init(f.codebits[0:]) {
+		return CorruptInputError(f.roffset)
+	}
+
+	// HLIT + 257 code lengths, HDIST + 1 code lengths,
+	// using the code length Huffman code.
+	for i, n := 0, nlit+ndist; i < n; {
+		x, err := f.huffSym(&f.h1)
+		if err != nil {
+			return err
+		}
+		if x < 16 {
+			// Actual length.
+			f.bits[i] = x
+			i++
+			continue
+		}
+		// Repeat previous length or zero.
+		var rep int
+		var nb uint
+		var b int
+		switch x {
+		default:
+			return InternalError("unexpected length code")
+		case 16:
+			rep = 3
+			nb = 2
+			if i == 0 {
+				return CorruptInputError(f.roffset)
+			}
+			b = f.bits[i-1]
+		case 17:
+			rep = 3
+			nb = 3
+			b = 0
+		case 18:
+			rep = 11
+			nb = 7
+			b = 0
+		}
+		for f.nb < nb {
+			if err := f.moreBits(); err != nil {
+				return err
+			}
+		}
+		rep += int(f.b & uint32(1<<nb-1))
+		f.b >>= nb
+		f.nb -= nb
+		if i+rep > n {
+			return CorruptInputError(f.roffset)
+		}
+		for j := 0; j < rep; j++ {
+			f.bits[i] = b
+			i++
+		}
+	}
+
+	if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
+		return CorruptInputError(f.roffset)
+	}
+
+	// As an optimization, we can initialize the min bits to read at a time
+	// for the HLIT tree to the length of the EOB marker since we know that
+	// every block must terminate with one. This preserves the property that
+	// we never read any extra bytes after the end of the DEFLATE stream.
+	if f.h1.min < f.bits[endBlockMarker] {
+		f.h1.min = f.bits[endBlockMarker]
+	}
+
+	return nil
+}
+
+// Decode a single Huffman block from f.
+// hl and hd are the Huffman states for the lit/length values
+// and the distance values, respectively. If hd == nil, using the
+// fixed distance encoding associated with fixed Huffman blocks.
+func (f *decompressor) huffmanBlock() {
+	const (
+		stateInit = iota // Zero value must be stateInit
+		stateDict
+	)
+
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	// Read literal and/or (length, distance) according to RFC section 3.2.3.
+	{
+		v, err := f.huffSym(f.hl)
+		if err != nil {
+			f.err = err
+			return
+		}
+		var n uint // number of bits extra
+		var length int
+		switch {
+		case v < 256:
+			f.dict.writeByte(byte(v))
+			if f.dict.availWrite() == 0 {
+				f.toRead = f.dict.readFlush()
+				f.step = (*decompressor).huffmanBlock
+				f.stepState = stateInit
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.finishBlock()
+			return
+		// otherwise, reference to older data
+		case v < 265:
+			length = v - (257 - 3)
+			n = 0
+		case v < 269:
+			length = v*2 - (265*2 - 11)
+			n = 1
+		case v < 273:
+			length = v*4 - (269*4 - 19)
+			n = 2
+		case v < 277:
+			length = v*8 - (273*8 - 35)
+			n = 3
+		case v < 281:
+			length = v*16 - (277*16 - 67)
+			n = 4
+		case v < 285:
+			length = v*32 - (281*32 - 131)
+			n = 5
+		case v < maxNumLit:
+			length = 258
+			n = 0
+		default:
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+		if n > 0 {
+			for f.nb < n {
+				if err = f.moreBits(); err != nil {
+					f.err = err
+					return
+				}
+			}
+			length += int(f.b & uint32(1<<n-1))
+			f.b >>= n
+			f.nb -= n
+		}
+
+		var dist int
+		if f.hd == nil {
+			for f.nb < 5 {
+				if err = f.moreBits(); err != nil {
+					f.err = err
+					return
+				}
+			}
+			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			f.b >>= 5
+			f.nb -= 5
+		} else {
+			if dist, err = f.huffSym(f.hd); err != nil {
+				f.err = err
+				return
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			// have 1 bit in bottom of dist, need nb more.
+			extra := (dist & 1) << nb
+			for f.nb < nb {
+				if err = f.moreBits(); err != nil {
+					f.err = err
+					return
+				}
+			}
+			extra |= int(f.b & uint32(1<<nb-1))
+			f.b >>= nb
+			f.nb -= nb
+			dist = 1<<(nb+1) + 1 + extra
+		default:
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		// No check on length; encoding can be prescient.
+		if dist > f.dict.histSize() {
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, dist
+		goto copyHistory
+	}
+
+copyHistory:
+	// Perform a backwards copy according to RFC section 3.2.3.
+	{
+		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if f.dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*decompressor).huffmanBlock // We need to continue this work
+			f.stepState = stateDict
+			return
+		}
+		goto readLiteral
+	}
+}
+
+// Copy a single uncompressed data block from input to output.
+func (f *decompressor) dataBlock() {
+	// Uncompressed.
+	// Discard current half-byte.
+	f.nb = 0
+	f.b = 0
+
+	// Length then ones-complement of length.
+	nr, err := io.ReadFull(f.r, f.buf[0:4])
+	f.roffset += int64(nr)
+	if err != nil {
+		f.err = noEOF(err)
+		return
+	}
+	n := int(f.buf[0]) | int(f.buf[1])<<8
+	nn := int(f.buf[2]) | int(f.buf[3])<<8
+	if uint16(nn) != uint16(^n) {
+		f.err = CorruptInputError(f.roffset)
+		return
+	}
+
+	if n == 0 {
+		f.toRead = f.dict.readFlush()
+		f.finishBlock()
+		return
+	}
+
+	f.copyLen = n
+	f.copyData()
+}
+
+// copyData copies f.copyLen bytes from the underlying reader into f.hist.
+// It pauses for reads when f.hist is full.
+func (f *decompressor) copyData() {
+	buf := f.dict.writeSlice()
+	if len(buf) > f.copyLen {
+		buf = buf[:f.copyLen]
+	}
+
+	cnt, err := io.ReadFull(f.r, buf)
+	f.roffset += int64(cnt)
+	f.copyLen -= cnt
+	f.dict.writeMark(cnt)
+	if err != nil {
+		f.err = noEOF(err)
+		return
+	}
+
+	if f.dict.availWrite() == 0 || f.copyLen > 0 {
+		f.toRead = f.dict.readFlush()
+		f.step = (*decompressor).copyData
+		return
+	}
+	f.finishBlock()
+}
+
+func (f *decompressor) finishBlock() {
+	if f.final {
+		if f.dict.availRead() > 0 {
+			f.toRead = f.dict.readFlush()
+		}
+		f.err = io.EOF
+	}
+	f.step = (*decompressor).nextBlock
+}
+
+// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
+func noEOF(e error) error {
+	if e == io.EOF {
+		return io.ErrUnexpectedEOF
+	}
+	return e
+}
+
+func (f *decompressor) moreBits() error {
+	c, err := f.r.ReadByte()
+	if err != nil {
+		return noEOF(err)
+	}
+	f.roffset++
+	f.b |= uint32(c) << f.nb
+	f.nb += 8
+	return nil
+}
+
+// Read the next Huffman-encoded symbol from f according to h.
+func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
+	// Since a huffmanDecoder can be empty or be composed of a degenerate tree
+	// with single element, huffSym must error on these two edge cases. In both
+	// cases, the chunks slice will be 0 for the invalid sequence, leading it
+	// satisfy the n == 0 check below.
+	n := uint(h.min)
+	// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
+	// but is smart enough to keep local variables in registers, so use nb and b,
+	// inline call to moreBits and reassign b,nb back to f on return.
+	nb, b := f.nb, f.b
+	for {
+		for nb < n {
+			c, err := f.r.ReadByte()
+			if err != nil {
+				f.b = b
+				f.nb = nb
+				return 0, noEOF(err)
+			}
+			f.roffset++
+			b |= uint32(c) << (nb & 31)
+			nb += 8
+		}
+		chunk := h.chunks[b&(huffmanNumChunks-1)]
+		n = uint(chunk & huffmanCountMask)
+		if n > huffmanChunkBits {
+			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
+			n = uint(chunk & huffmanCountMask)
+		}
+		if n <= nb {
+			if n == 0 {
+				f.b = b
+				f.nb = nb
+				f.err = CorruptInputError(f.roffset)
+				return 0, f.err
+			}
+			f.b = b >> (n & 31)
+			f.nb = nb - n
+			return int(chunk >> huffmanValueShift), nil
+		}
+	}
+}
+
+func (f *decompressor) makeReader(r io.Reader) {
+	if rr, ok := r.(Reader); ok {
+		f.rBuf = nil
+		f.r = rr
+		return
+	}
+	// Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor.
+	if f.rBuf != nil {
+		f.rBuf.Reset(r)
+	} else {
+		// bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader.
+		f.rBuf = bufio.NewReader(r)
+	}
+	f.r = f.rBuf
+}
+
+func fixedHuffmanDecoderInit() {
+	fixedOnce.Do(func() {
+		// These come from the RFC section 3.2.6.
+		var bits [288]int
+		for i := 0; i < 144; i++ {
+			bits[i] = 8
+		}
+		for i := 144; i < 256; i++ {
+			bits[i] = 9
+		}
+		for i := 256; i < 280; i++ {
+			bits[i] = 7
+		}
+		for i := 280; i < 288; i++ {
+			bits[i] = 8
+		}
+		fixedHuffmanDecoder.init(bits[:])
+	})
+}
+
+func (f *decompressor) Reset(r io.Reader, dict []byte) error {
+	*f = decompressor{
+		rBuf:     f.rBuf,
+		bits:     f.bits,
+		codebits: f.codebits,
+		dict:     f.dict,
+		step:     (*decompressor).nextBlock,
+	}
+	f.makeReader(r)
+	f.dict.init(maxMatchOffset, dict)
+	return nil
+}
+
+// NewReader returns a new ReadCloser that can be used
+// to read the uncompressed version of r.
+// If r does not also implement [io.ByteReader],
+// the decompressor may read more data than necessary from r.
+// The reader returns [io.EOF] after the final block in the DEFLATE stream has
+// been encountered. Any trailing data after the final block is ignored.
+//
+// The [io.ReadCloser] returned by NewReader also implements [Resetter].
+func NewReader(r io.Reader) io.ReadCloser {
+	fixedHuffmanDecoderInit()
+
+	var f decompressor
+	f.makeReader(r)
+	f.bits = new([maxNumLit + maxNumDist]int)
+	f.codebits = new([numCodes]int)
+	f.step = (*decompressor).nextBlock
+	f.dict.init(maxMatchOffset, nil)
+	return &f
+}
+
+// NewReaderDict is like [NewReader] but initializes the reader
+// with a preset dictionary. The returned reader behaves as if
+// the uncompressed data stream started with the given dictionary,
+// which has already been read. NewReaderDict is typically used
+// to read data compressed by [NewWriterDict].
+//
+// The ReadCloser returned by NewReaderDict also implements [Resetter].
+func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
+	fixedHuffmanDecoderInit()
+
+	var f decompressor
+	f.makeReader(r)
+	f.bits = new([maxNumLit + maxNumDist]int)
+	f.codebits = new([numCodes]int)
+	f.step = (*decompressor).nextBlock
+	f.dict.init(maxMatchOffset, dict)
+	return &f
+}
diff --git a/internal/flatex/slice_inflate.go b/internal/flatex/slice_inflate.go
new file mode 100644
index 00000000..9ece08f4
--- /dev/null
+++ b/internal/flatex/slice_inflate.go
@@ -0,0 +1,479 @@
+package flatex
+
+import (
+	"io"
+	"math/bits"
+)
+
+// sliceInflater is a specialized DEFLATE decoder that reads directly from an
+// in-memory byte slice. It mirrors the main decompressor but avoids the
+// overhead of the Reader interfaces, enabling faster byte-slice decoding.
+type sliceInflater struct {
+	input   []byte
+	pos     int
+	roffset int64
+
+	b  uint32
+	nb uint
+
+	h1, h2 huffmanDecoder
+
+	bits     *[maxNumLit + maxNumDist]int
+	codebits *[numCodes]int
+
+	dict dictDecoder
+
+	toRead    []byte
+	step      func(*sliceInflater)
+	stepState int
+	final     bool
+	err       error
+	hl, hd    *huffmanDecoder
+	copyLen   int
+	copyDist  int
+}
+
+func (f *sliceInflater) reset(src []byte, dict []byte) error {
+	bits := f.bits
+	codebits := f.codebits
+	dictState := f.dict
+	*f = sliceInflater{
+		input:    src,
+		bits:     bits,
+		codebits: codebits,
+		dict:     dictState,
+		step:     (*sliceInflater).nextBlock,
+	}
+	f.dict.init(maxMatchOffset, dict)
+	return nil
+}
+
+func (f *sliceInflater) readByte() (byte, error) {
+	if f.pos >= len(f.input) {
+		return 0, io.ErrUnexpectedEOF
+	}
+	b := f.input[f.pos]
+	f.pos++
+	f.roffset++
+	return b, nil
+}
+
+func (f *sliceInflater) readBytes(n int) ([]byte, error) {
+	if n < 0 || f.pos+n > len(f.input) {
+		f.pos = len(f.input)
+		return nil, io.ErrUnexpectedEOF
+	}
+	s := f.input[f.pos : f.pos+n]
+	f.pos += n
+	f.roffset += int64(n)
+	return s, nil
+}
+
+func (f *sliceInflater) nextBlock() {
+	for f.nb < 1+2 {
+		if err := f.moreBits(); err != nil {
+			f.err = err
+			return
+		}
+	}
+	f.final = f.b&1 == 1
+	f.b >>= 1
+	typ := f.b & 3
+	f.b >>= 2
+	f.nb -= 1 + 2
+	switch typ {
+	case 0:
+		f.dataBlock()
+	case 1:
+		f.hl = &fixedHuffmanDecoder
+		f.hd = nil
+		f.huffmanBlock()
+	case 2:
+		if err := f.readHuffman(); err != nil {
+			f.err = err
+			return
+		}
+		f.hl = &f.h1
+		f.hd = &f.h2
+		f.huffmanBlock()
+	default:
+		f.err = CorruptInputError(f.roffset)
+	}
+}
+
+func (f *sliceInflater) huffmanBlock() {
+	const (
+		stateInit = iota
+		stateDict
+	)
+	switch f.stepState {
+	case stateInit:
+		goto readLiteral
+	case stateDict:
+		goto copyHistory
+	}
+
+readLiteral:
+	{
+		v, err := f.huffSym(f.hl)
+		if err != nil {
+			f.err = err
+			return
+		}
+		var n uint
+		var length int
+		switch {
+		case v < 256:
+			f.dict.writeByte(byte(v))
+			if f.dict.availWrite() == 0 {
+				f.toRead = f.dict.readFlush()
+				f.step = (*sliceInflater).huffmanBlock
+				f.stepState = stateInit
+				return
+			}
+			goto readLiteral
+		case v == 256:
+			f.finishBlock()
+			return
+		case v < 265:
+			length = v - (257 - 3)
+			n = 0
+		case v < 269:
+			length = v*2 - (265*2 - 11)
+			n = 1
+		case v < 273:
+			length = v*4 - (269*4 - 19)
+			n = 2
+		case v < 277:
+			length = v*8 - (273*8 - 35)
+			n = 3
+		case v < 281:
+			length = v*16 - (277*16 - 67)
+			n = 4
+		case v < 285:
+			length = v*32 - (281*32 - 131)
+			n = 5
+		case v < maxNumLit:
+			length = 258
+			n = 0
+		default:
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+		if n > 0 {
+			for f.nb < n {
+				if err = f.moreBits(); err != nil {
+					f.err = err
+					return
+				}
+			}
+			length += int(f.b & uint32(1<<n-1))
+			f.b >>= n
+			f.nb -= n
+		}
+
+		var dist int
+		if f.hd == nil {
+			for f.nb < 5 {
+				if err = f.moreBits(); err != nil {
+					f.err = err
+					return
+				}
+			}
+			dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
+			f.b >>= 5
+			f.nb -= 5
+		} else {
+			if dist, err = f.huffSym(f.hd); err != nil {
+				f.err = err
+				return
+			}
+		}
+
+		switch {
+		case dist < 4:
+			dist++
+		case dist < maxNumDist:
+			nb := uint(dist-2) >> 1
+			extra := (dist & 1) << nb
+			for f.nb < nb {
+				if err = f.moreBits(); err != nil {
+					f.err = err
+					return
+				}
+			}
+			extra |= int(f.b & uint32(1<<nb-1))
+			f.b >>= nb
+			f.nb -= nb
+			dist = 1<<(nb+1) + 1 + extra
+		default:
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		if dist > f.dict.histSize() {
+			f.err = CorruptInputError(f.roffset)
+			return
+		}
+
+		f.copyLen, f.copyDist = length, dist
+		goto copyHistory
+	}
+
+copyHistory:
+	{
+		cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+		if cnt == 0 {
+			cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+		}
+		f.copyLen -= cnt
+
+		if f.dict.availWrite() == 0 || f.copyLen > 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*sliceInflater).huffmanBlock
+			f.stepState = stateDict
+			return
+		}
+		goto readLiteral
+	}
+}
+
+func (f *sliceInflater) dataBlock() {
+	f.nb = 0
+	f.b = 0
+
+	hdr, err := f.readBytes(4)
+	if err != nil {
+		f.err = err
+		return
+	}
+	n := int(hdr[0]) | int(hdr[1])<<8
+	nn := int(hdr[2]) | int(hdr[3])<<8
+	if uint16(nn) != uint16(^n) {
+		f.err = CorruptInputError(f.roffset)
+		return
+	}
+
+	if n == 0 {
+		f.toRead = f.dict.readFlush()
+		f.finishBlock()
+		return
+	}
+
+	f.copyLen = n
+	f.copyData()
+}
+
+func (f *sliceInflater) copyData() {
+	for {
+		if f.copyLen == 0 {
+			f.finishBlock()
+			return
+		}
+		buf := f.dict.writeSlice()
+		if len(buf) == 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*sliceInflater).copyData
+			return
+		}
+		n := f.copyLen
+		if n > len(buf) {
+			n = len(buf)
+		}
+		if f.pos+n > len(f.input) {
+			f.err = io.ErrUnexpectedEOF
+			return
+		}
+		copy(buf[:n], f.input[f.pos:f.pos+n])
+		f.pos += n
+		f.roffset += int64(n)
+		f.copyLen -= n
+		f.dict.writeMark(n)
+		if f.dict.availWrite() == 0 {
+			f.toRead = f.dict.readFlush()
+			f.step = (*sliceInflater).copyData
+			return
+		}
+	}
+}
+
+func (f *sliceInflater) finishBlock() {
+	if f.final {
+		if f.dict.availRead() > 0 {
+			f.toRead = f.dict.readFlush()
+		}
+		f.err = io.EOF
+	}
+	f.step = (*sliceInflater).nextBlock
+	f.stepState = 0
+}
+
+func (f *sliceInflater) moreBits() error {
+	c, err := f.readByte()
+	if err != nil {
+		return err
+	}
+	f.b |= uint32(c) << (f.nb & 31)
+	f.nb += 8
+	return nil
+}
+
+func (f *sliceInflater) huffSym(h *huffmanDecoder) (int, error) {
+	n := uint(h.min)
+	nb, b := f.nb, f.b
+	for {
+		for nb < n {
+			c, err := f.readByte()
+			if err != nil {
+				f.b = b
+				f.nb = nb
+				return 0, err
+			}
+			b |= uint32(c) << (nb & 31)
+			nb += 8
+		}
+		chunk := h.chunks[b&(huffmanNumChunks-1)]
+		n = uint(chunk & huffmanCountMask)
+		if n > huffmanChunkBits {
+			chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
+			n = uint(chunk & huffmanCountMask)
+		}
+		if n <= nb {
+			if n == 0 {
+				f.b = b
+				f.nb = nb
+				f.err = CorruptInputError(f.roffset)
+				return 0, f.err
+			}
+			f.b = b >> (n & 31)
+			f.nb = nb - n
+			return int(chunk >> huffmanValueShift), nil
+		}
+	}
+}
+
+func (f *sliceInflater) readHuffman() error {
+	for f.nb < 5+5+4 {
+		if err := f.moreBits(); err != nil {
+			return err
+		}
+	}
+	nlit := int(f.b&0x1F) + 257
+	if nlit > maxNumLit {
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	ndist := int(f.b&0x1F) + 1
+	if ndist > maxNumDist {
+		return CorruptInputError(f.roffset)
+	}
+	f.b >>= 5
+	nclen := int(f.b&0xF) + 4
+	f.b >>= 4
+	f.nb -= 5 + 5 + 4
+	codebits := f.codebits[:]
+	bits := f.bits[:]
+	for i := range codebits {
+		codebits[i] = 0
+	}
+	for i := range bits {
+		bits[i] = 0
+	}
+	for i := 0; i < nclen; i++ {
+		for f.nb < 3 {
+			if err := f.moreBits(); err != nil {
+				return err
+			}
+		}
+		codebits[codeOrder[i]] = int(f.b & 0x7)
+		f.b >>= 3
+		f.nb -= 3
+	}
+	if !f.h1.init(codebits) {
+		return CorruptInputError(f.roffset)
+	}
+	for i := range bits {
+		bits[i] = 0
+	}
+	i := 0
+	for i < nlit+ndist {
+		x, err := f.huffSym(&f.h1)
+		if err != nil {
+			return err
+		}
+		switch {
+		case x < 16:
+			bits[i] = x
+			i++
+		case x == 16:
+			if i == 0 {
+				return CorruptInputError(f.roffset)
+			}
+			repeat := 3
+			for f.nb < 2 {
+				if err := f.moreBits(); err != nil {
+					return err
+				}
+			}
+			repeat += int(f.b & 0x3)
+			f.b >>= 2
+			f.nb -= 2
+			for repeat > 0 {
+				if i >= len(bits) {
+					return CorruptInputError(f.roffset)
+				}
+				bits[i] = bits[i-1]
+				i++
+				repeat--
+			}
+		case x == 17:
+			repeat := 3
+			for f.nb < 3 {
+				if err := f.moreBits(); err != nil {
+					return err
+				}
+			}
+			repeat += int(f.b & 0x7)
+			f.b >>= 3
+			f.nb -= 3
+			for repeat > 0 {
+				if i >= len(bits) {
+					return CorruptInputError(f.roffset)
+				}
+				bits[i] = 0
+				i++
+				repeat--
+			}
+		case x == 18:
+			repeat := 11
+			for f.nb < 7 {
+				if err := f.moreBits(); err != nil {
+					return err
+				}
+			}
+			repeat += int(f.b & 0x7F)
+			f.b >>= 7
+			f.nb -= 7
+			for repeat > 0 {
+				if i >= len(bits) {
+					return CorruptInputError(f.roffset)
+				}
+				bits[i] = 0
+				i++
+				repeat--
+			}
+		default:
+			return CorruptInputError(f.roffset)
+		}
+	}
+	if !f.h1.init(bits[:nlit]) {
+		return CorruptInputError(f.roffset)
+	}
+	if !f.h2.init(bits[nlit : nlit+ndist]) {
+		return CorruptInputError(f.roffset)
+	}
+	if f.h1.min < bits[endBlockMarker] {
+		f.h1.min = bits[endBlockMarker]
+	}
+	return nil
+}
diff --git a/internal/zlib/LICENSE b/internal/zlib/LICENSE
deleted file mode 100644
index 2a7cf70d..00000000
--- a/internal/zlib/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright 2009 The Go Authors.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-   * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-   * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-   * Neither the name of Google LLC nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/internal/zlib/decompress.go b/internal/zlib/decompress.go
deleted file mode 100644
index 55f6d3e2..00000000
--- a/internal/zlib/decompress.go
+++ /dev/null
@@ -1,69 +0,0 @@
-package zlib
-
-import (
-	"encoding/binary"
-	"io"
-
-	"git.sr.ht/~runxiyu/furgit/internal/adler32"
-	"git.sr.ht/~runxiyu/furgit/internal/bufpool"
-	"git.sr.ht/~runxiyu/furgit/internal/flate"
-)
-
-// Decompress inflates the provided zlib wrapped stream and returns the
-// uncompressed data inside a pooled bufpool.Buffer.
-func Decompress(src []byte) (bufpool.Buffer, error) {
-	return DecompressDict(src, nil)
-}
-
-// DecompressDict is like Decompress but accepts a preset dictionary. The
-// dictionary must match the checksum embedded in the stream if the dictionary
-// flag is present.
-func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, error) {
-	if len(src) < 6 {
-		return bufpool.Buffer{}, io.ErrUnexpectedEOF
-	}
-
-	cmf := src[0]
-	flg := src[1]
-	if (cmf&0x0f != zlibDeflate) || (cmf>>4 > zlibMaxWindow) || (binary.BigEndian.Uint16(src[:2])%31 != 0) {
-		return bufpool.Buffer{}, ErrHeader
-	}
-
-	offset := 2
-	haveDict := flg&0x20 != 0
-	if haveDict {
-		if len(src) < offset+4 {
-			return bufpool.Buffer{}, io.ErrUnexpectedEOF
-		}
-		if dict == nil {
-			return bufpool.Buffer{}, ErrDictionary
-		}
-		checksum := binary.BigEndian.Uint32(src[offset : offset+4])
-		if checksum != adler32.Checksum(dict) {
-			return bufpool.Buffer{}, ErrDictionary
-		}
-		offset += 4
-	}
-
-	if len(src[offset:]) < 4 {
-		return bufpool.Buffer{}, io.ErrUnexpectedEOF
-	}
-
-	deflateData := src[offset:]
-	out, consumed, err := flate.DecompressDict(deflateData, dict)
-	if err != nil {
-		return bufpool.Buffer{}, err
-	}
-
-	checksumPos := offset + consumed
-	if checksumPos+4 > len(src) {
-		out.Release()
-		return bufpool.Buffer{}, io.ErrUnexpectedEOF
-	}
-	expected := binary.BigEndian.Uint32(src[checksumPos : checksumPos+4])
-	if expected != adler32.Checksum(out.Bytes()) {
-		out.Release()
-		return bufpool.Buffer{}, ErrChecksum
-	}
-	return out, nil
-}
diff --git a/internal/zlib/decompress_test.go b/internal/zlib/decompress_test.go
deleted file mode 100644
index bb517ae6..00000000
--- a/internal/zlib/decompress_test.go
+++ /dev/null
@@ -1,84 +0,0 @@
-package zlib
-
-import (
-	"bytes"
-	stdzlib "compress/zlib"
-	"testing"
-)
-
-func compressZlib(t *testing.T, payload, dict []byte) []byte {
-	t.Helper()
-	var buf bytes.Buffer
-	var (
-		w   *stdzlib.Writer
-		err error
-	)
-	if dict != nil {
-		w, err = stdzlib.NewWriterLevelDict(&buf, stdzlib.DefaultCompression, dict)
-	} else {
-		w = stdzlib.NewWriter(&buf)
-	}
-	if err != nil {
-		t.Fatalf("NewWriter: %v", err)
-	}
-	if _, err := w.Write(payload); err != nil {
-		t.Fatalf("Write: %v", err)
-	}
-	if err := w.Close(); err != nil {
-		t.Fatalf("Close: %v", err)
-	}
-	return buf.Bytes()
-}
-
-func TestDecompress(t *testing.T) {
-	payload := []byte("hello, zlib world!")
-	compressed := compressZlib(t, payload, nil)
-
-	out, err := Decompress(compressed)
-	if err != nil {
-		t.Fatalf("Decompress: %v", err)
-	}
-	defer out.Release()
-
-	if !bytes.Equal(out.Bytes(), payload) {
-		t.Fatalf("unexpected payload %q", out.Bytes())
-	}
-}
-
-func TestDecompressDict(t *testing.T) {
-	dict := []byte("git dictionary for zlib")
-	payload := append([]byte(nil), dict...)
-	payload = append(payload, []byte(" -- extended body -- extended body")...)
-	compressed := compressZlib(t, payload, dict)
-
-	out, err := DecompressDict(compressed, dict)
-	if err != nil {
-		t.Fatalf("DecompressDict: %v", err)
-	}
-	defer out.Release()
-
-	if !bytes.Equal(out.Bytes(), payload) {
-		t.Fatalf("unexpected payload %q", out.Bytes())
-	}
-}
-
-func TestDecompressDictMissing(t *testing.T) {
-	dict := []byte("preset dictionary")
-	payload := append([]byte(nil), dict...)
-	payload = append(payload, []byte(" .. more data ..")...)
-	compressed := compressZlib(t, payload, dict)
-
-	if _, err := Decompress(compressed); err != ErrDictionary {
-		t.Fatalf("expected ErrDictionary, got %v", err)
-	}
-}
-
-func TestDecompressChecksumError(t *testing.T) {
-	payload := []byte("checksum check")
-	compressed := compressZlib(t, payload, nil)
-	compressed[len(compressed)-1] ^= 0xff
-
-	if _, err := Decompress(compressed); err != ErrChecksum {
-		t.Fatalf("expected ErrChecksum, got %v", err)
-	}
-}
diff --git a/internal/zlib/reader.go b/internal/zlib/reader.go
deleted file mode 100644
index 2c8fd215..00000000
--- a/internal/zlib/reader.go
+++ /dev/null
@@ -1,198 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package zlib implements reading and writing of zlib format compressed data,
-as specified in RFC 1950.
-
-This package differs from the standard library's compress/zlib package
-in that it pools readers to reduce allocations. Writers are unchanged.
-
-Note that closing the reader causes it to be returned to a pool for
-reuse. Therefore, the caller must not retain references to the
-reader after closing it; in the standard library's compress/zlib package,
-it is legal to Reset a closed reader and continue using it; that is
-not allowed here, so there is simply no Resetter interface.
-
-The implementation provides filters that uncompress during reading
-and compress during writing.  For example, to write compressed data
-to a buffer:
-
-	var b bytes.Buffer
-	w := zlib.NewWriter(&b)
-	w.Write([]byte("hello, world\n"))
-	w.Close()
-
-and to read that data back:
-
-	r, err := zlib.NewReader(&b)
-	io.Copy(os.Stdout, r)
-	r.Close()
-*/
-package zlib
-
-import (
-	"bufio"
-	"encoding/binary"
-	"errors"
-	"hash"
-	"io"
-	"sync"
-
-	"git.sr.ht/~runxiyu/furgit/internal/adler32"
-	"git.sr.ht/~runxiyu/furgit/internal/flate"
-)
-
-const (
-	zlibDeflate   = 8
-	zlibMaxWindow = 7
-)
-
-var (
-	// ErrChecksum is returned when reading ZLIB data that has an invalid checksum.
-	ErrChecksum = errors.New("zlib: invalid checksum")
-	// ErrDictionary is returned when reading ZLIB data that has an invalid dictionary.
-	ErrDictionary = errors.New("zlib: invalid dictionary")
-	// ErrHeader is returned when reading ZLIB data that has an invalid header.
-	ErrHeader = errors.New("zlib: invalid header")
-)
-
-var pool = sync.Pool{
-	New: func() any {
-		r := new(reader)
-		return r
-	},
-}
-
-type reader struct {
-	r            flate.Reader
-	decompressor io.ReadCloser
-	digest       hash.Hash32
-	err          error
-	scratch      [4]byte
-}
-
-// NewReader creates a new ReadCloser.
-// Reads from the returned ReadCloser read and decompress data from r.
-// If r does not implement [io.ByteReader], the decompressor may read more
-// data than necessary from r.
-// It is the caller's responsibility to call Close on the ReadCloser when done.
-func NewReader(r io.Reader) (io.ReadCloser, error) {
-	return NewReaderDict(r, nil)
-}
-
-// NewReaderDict is like [NewReader] but uses a preset dictionary.
-// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
-// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary].
-func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
-	v := pool.Get()
-	z, ok := v.(*reader)
-	if !ok {
-		panic("zlib: pool returned unexpected type")
-	}
-	err := z.Reset(r, dict)
-	if err != nil {
-		return nil, err
-	}
-	return z, nil
-}
-
-func (z *reader) Read(p []byte) (int, error) {
-	if z.err != nil {
-		return 0, z.err
-	}
-
-	var n int
-	n, z.err = z.decompressor.Read(p)
-	z.digest.Write(p[0:n])
-	if z.err != io.EOF {
-		// In the normal case we return here.
-		return n, z.err
-	}
-
-	// Finished file; check checksum.
-	if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil {
-		if err == io.EOF {
-			err = io.ErrUnexpectedEOF
-		}
-		z.err = err
-		return n, z.err
-	}
-	// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
-	checksum := binary.BigEndian.Uint32(z.scratch[:4])
-	if checksum != z.digest.Sum32() {
-		z.err = ErrChecksum
-		return n, z.err
-	}
-	return n, io.EOF
-}
-
-// Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader].
-// In order for the ZLIB checksum to be verified, the reader must be
-// fully consumed until the [io.EOF].
-func (z *reader) Close() error {
-	if z.err != nil && z.err != io.EOF {
-		return z.err
-	}
-	z.err = z.decompressor.Close()
-	if z.err != nil {
-		return z.err
-	}
-
-	pool.Put(z)
-	return nil
-}
-
-func (z *reader) Reset(r io.Reader, dict []byte) error {
-	*z = reader{decompressor: z.decompressor}
-	if fr, ok := r.(flate.Reader); ok {
-		z.r = fr
-	} else {
-		z.r = bufio.NewReader(r)
-	}
-
-	// Read the header (RFC 1950 section 2.2.).
-	_, z.err = io.ReadFull(z.r, z.scratch[0:2])
-	if z.err != nil {
-		if z.err == io.EOF {
-			z.err = io.ErrUnexpectedEOF
-		}
-		return z.err
-	}
-	h := binary.BigEndian.Uint16(z.scratch[:2])
-	if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) {
-		z.err = ErrHeader
-		return z.err
-	}
-	haveDict := z.scratch[1]&0x20 != 0
-	if haveDict {
-		_, z.err = io.ReadFull(z.r, z.scratch[0:4])
-		if z.err != nil {
-			if z.err == io.EOF {
-				z.err = io.ErrUnexpectedEOF
-			}
-			return z.err
-		}
-		checksum := binary.BigEndian.Uint32(z.scratch[:4])
-		if checksum != adler32.Checksum(dict) {
-			z.err = ErrDictionary
-			return z.err
-		}
-	}
-
-	if z.decompressor == nil {
-		if haveDict {
-			z.decompressor = flate.NewReaderDict(z.r, dict)
-		} else {
-			z.decompressor = flate.NewReader(z.r)
-		}
-	} else {
-		z.err = z.decompressor.(flate.Resetter).Reset(z.r, dict)
-		if z.err != nil {
-			return z.err
-		}
-	}
-	z.digest = adler32.New()
-	return nil
-}
diff --git a/internal/zlibx/LICENSE b/internal/zlibx/LICENSE
new file mode 100644
index 00000000..2a7cf70d
--- /dev/null
+++ b/internal/zlibx/LICENSE
@@ -0,0 +1,27 @@
+Copyright 2009 The Go Authors.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google LLC nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/internal/zlibx/decompress.go b/internal/zlibx/decompress.go
new file mode 100644
index 00000000..34e62c6f
--- /dev/null
+++ b/internal/zlibx/decompress.go
@@ -0,0 +1,69 @@
+package zlibx
+
+import (
+	"encoding/binary"
+	"io"
+
+	"git.sr.ht/~runxiyu/furgit/internal/adler32"
+	"git.sr.ht/~runxiyu/furgit/internal/bufpool"
+	"git.sr.ht/~runxiyu/furgit/internal/flatex"
+)
+
+// Decompress inflates the provided zlib wrapped stream and returns the
+// uncompressed data inside a pooled bufpool.Buffer.
+func Decompress(src []byte) (bufpool.Buffer, error) {
+	return DecompressDict(src, nil)
+}
+
+// DecompressDict is like Decompress but accepts a preset dictionary. The
+// dictionary must match the checksum embedded in the stream if the dictionary
+// flag is present.
+func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, error) {
+	if len(src) < 6 {
+		return bufpool.Buffer{}, io.ErrUnexpectedEOF
+	}
+
+	cmf := src[0]
+	flg := src[1]
+	if (cmf&0x0f != zlibDeflate) || (cmf>>4 > zlibMaxWindow) || (binary.BigEndian.Uint16(src[:2])%31 != 0) {
+		return bufpool.Buffer{}, ErrHeader
+	}
+
+	offset := 2
+	haveDict := flg&0x20 != 0
+	if haveDict {
+		if len(src) < offset+4 {
+			return bufpool.Buffer{}, io.ErrUnexpectedEOF
+		}
+		if dict == nil {
+			return bufpool.Buffer{}, ErrDictionary
+		}
+		checksum := binary.BigEndian.Uint32(src[offset : offset+4])
+		if checksum != adler32.Checksum(dict) {
+			return bufpool.Buffer{}, ErrDictionary
+		}
+		offset += 4
+	}
+
+	if len(src[offset:]) < 4 {
+		return bufpool.Buffer{}, io.ErrUnexpectedEOF
+	}
+
+	deflateData := src[offset:]
+	out, consumed, err := flatex.DecompressDict(deflateData, dict)
+	if err != nil {
+		return bufpool.Buffer{}, err
+	}
+
+	checksumPos := offset + consumed
+	if checksumPos+4 > len(src) {
+		out.Release()
+		return bufpool.Buffer{}, io.ErrUnexpectedEOF
+	}
+	expected := binary.BigEndian.Uint32(src[checksumPos : checksumPos+4])
+	if expected != adler32.Checksum(out.Bytes()) {
+		out.Release()
+		return bufpool.Buffer{}, ErrChecksum
+	}
+	return out, nil
+}
diff --git a/internal/zlibx/decompress_test.go b/internal/zlibx/decompress_test.go
new file mode 100644
index 00000000..a4e9c608
--- /dev/null
+++ b/internal/zlibx/decompress_test.go
@@ -0,0 +1,84 @@
+package zlibx
+
+import (
+	"bytes"
+	stdzlib "compress/zlib"
+	"testing"
+)
+
+func compressZlib(t *testing.T, payload, dict []byte) []byte {
+	t.Helper()
+	var buf bytes.Buffer
+	var (
+		w   *stdzlib.Writer
+		err error
+	)
+	if dict != nil {
+		w, err = stdzlib.NewWriterLevelDict(&buf, stdzlib.DefaultCompression, dict)
+	} else {
+		w = stdzlib.NewWriter(&buf)
+	}
+	if err != nil {
+		t.Fatalf("NewWriter: %v", err)
+	}
+	if _, err := w.Write(payload); err != nil {
+		t.Fatalf("Write: %v", err)
+	}
+	if err := w.Close(); err != nil {
+		t.Fatalf("Close: %v", err)
+	}
+	return buf.Bytes()
+}
+
+func TestDecompress(t *testing.T) {
+	payload := []byte("hello, zlib world!")
+	compressed := compressZlib(t, payload, nil)
+
+	out, err := Decompress(compressed)
+	if err != nil {
+		t.Fatalf("Decompress: %v", err)
+	}
+	defer out.Release()
+
+	if !bytes.Equal(out.Bytes(), payload) {
+		t.Fatalf("unexpected payload %q", out.Bytes())
+	}
+}
+
+func TestDecompressDict(t *testing.T) {
+	dict := []byte("git dictionary for zlib")
+	payload := append([]byte(nil), dict...)
+	payload = append(payload, []byte(" -- extended body -- extended body")...)
+	compressed := compressZlib(t, payload, dict)
+
+	out, err := DecompressDict(compressed, dict)
+	if err != nil {
+		t.Fatalf("DecompressDict: %v", err)
+	}
+	defer out.Release()
+
+	if !bytes.Equal(out.Bytes(), payload) {
+		t.Fatalf("unexpected payload %q", out.Bytes())
+	}
+}
+
+func TestDecompressDictMissing(t *testing.T) {
+	dict := []byte("preset dictionary")
+	payload := append([]byte(nil), dict...)
+	payload = append(payload, []byte(" .. more data ..")...)
+	compressed := compressZlib(t, payload, dict)
+
+	if _, err := Decompress(compressed); err != ErrDictionary {
+		t.Fatalf("expected ErrDictionary, got %v", err)
+	}
+}
+
+func TestDecompressChecksumError(t *testing.T) {
+	payload := []byte("checksum check")
+	compressed := compressZlib(t, payload, nil)
+	compressed[len(compressed)-1] ^= 0xff
+
+	if _, err := Decompress(compressed); err != ErrChecksum {
+		t.Fatalf("expected ErrChecksum, got %v", err)
+	}
+}
diff --git a/internal/zlibx/reader.go b/internal/zlibx/reader.go
new file mode 100644
index 00000000..80bbbb58
--- /dev/null
+++ b/internal/zlibx/reader.go
@@ -0,0 +1,198 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package zlib implements reading and writing of zlib format compressed data,
+as specified in RFC 1950.
+
+This package differs from the standard library's compress/zlib package
+in that it pools readers to reduce allocations. Writers are unchanged.
+
+Note that closing the reader causes it to be returned to a pool for
+reuse. Therefore, the caller must not retain references to the
+reader after closing it; in the standard library's compress/zlib package,
+it is legal to Reset a closed reader and continue using it; that is
+not allowed here, so there is simply no Resetter interface.
+
+The implementation provides filters that uncompress during reading
+and compress during writing.  For example, to write compressed data
+to a buffer:
+
+	var b bytes.Buffer
+	w := zlib.NewWriter(&b)
+	w.Write([]byte("hello, world\n"))
+	w.Close()
+
+and to read that data back:
+
+	r, err := zlib.NewReader(&b)
+	io.Copy(os.Stdout, r)
+	r.Close()
+*/
+package zlibx
+
+import (
+	"bufio"
+	"encoding/binary"
+	"errors"
+	"hash"
+	"io"
+	"sync"
+
+	"git.sr.ht/~runxiyu/furgit/internal/adler32"
+	"git.sr.ht/~runxiyu/furgit/internal/flatex"
+)
+
+const (
+	zlibDeflate   = 8
+	zlibMaxWindow = 7
+)
+
+var (
+	// ErrChecksum is returned when reading ZLIB data that has an invalid checksum.
+	ErrChecksum = errors.New("zlib: invalid checksum")
+	// ErrDictionary is returned when reading ZLIB data that has an invalid dictionary.
+	ErrDictionary = errors.New("zlib: invalid dictionary")
+	// ErrHeader is returned when reading ZLIB data that has an invalid header.
+	ErrHeader = errors.New("zlib: invalid header")
+)
+
+var pool = sync.Pool{
+	New: func() any {
+		r := new(reader)
+		return r
+	},
+}
+
+type reader struct {
+	r            flatex.Reader
+	decompressor io.ReadCloser
+	digest       hash.Hash32
+	err          error
+	scratch      [4]byte
+}
+
+// NewReader creates a new ReadCloser.
+// Reads from the returned ReadCloser read and decompress data from r.
+// If r does not implement [io.ByteReader], the decompressor may read more
+// data than necessary from r.
+// It is the caller's responsibility to call Close on the ReadCloser when done.
+func NewReader(r io.Reader) (io.ReadCloser, error) {
+	return NewReaderDict(r, nil)
+}
+
+// NewReaderDict is like [NewReader] but uses a preset dictionary.
+// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
+// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary].
+func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
+	v := pool.Get()
+	z, ok := v.(*reader)
+	if !ok {
+		panic("zlib: pool returned unexpected type")
+	}
+	err := z.Reset(r, dict)
+	if err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+func (z *reader) Read(p []byte) (int, error) {
+	if z.err != nil {
+		return 0, z.err
+	}
+
+	var n int
+	n, z.err = z.decompressor.Read(p)
+	z.digest.Write(p[0:n])
+	if z.err != io.EOF {
+		// In the normal case we return here.
+		return n, z.err
+	}
+
+	// Finished file; check checksum.
+	if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		z.err = err
+		return n, z.err
+	}
+	// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
+	checksum := binary.BigEndian.Uint32(z.scratch[:4])
+	if checksum != z.digest.Sum32() {
+		z.err = ErrChecksum
+		return n, z.err
+	}
+	return n, io.EOF
+}
+
+// Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader].
+// In order for the ZLIB checksum to be verified, the reader must be
+// fully consumed until the [io.EOF].
+func (z *reader) Close() error {
+	if z.err != nil && z.err != io.EOF {
+		return z.err
+	}
+	z.err = z.decompressor.Close()
+	if z.err != nil {
+		return z.err
+	}
+
+	pool.Put(z)
+	return nil
+}
+
+func (z *reader) Reset(r io.Reader, dict []byte) error {
+	*z = reader{decompressor: z.decompressor}
+	if fr, ok := r.(flatex.Reader); ok {
+		z.r = fr
+	} else {
+		z.r = bufio.NewReader(r)
+	}
+
+	// Read the header (RFC 1950 section 2.2.).
+	_, z.err = io.ReadFull(z.r, z.scratch[0:2])
+	if z.err != nil {
+		if z.err == io.EOF {
+			z.err = io.ErrUnexpectedEOF
+		}
+		return z.err
+	}
+	h := binary.BigEndian.Uint16(z.scratch[:2])
+	if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) {
+		z.err = ErrHeader
+		return z.err
+	}
+	haveDict := z.scratch[1]&0x20 != 0
+	if haveDict {
+		_, z.err = io.ReadFull(z.r, z.scratch[0:4])
+		if z.err != nil {
+			if z.err == io.EOF {
+				z.err = io.ErrUnexpectedEOF
+			}
+			return z.err
+		}
+		checksum := binary.BigEndian.Uint32(z.scratch[:4])
+		if checksum != adler32.Checksum(dict) {
+			z.err = ErrDictionary
+			return z.err
+		}
+	}
+
+	if z.decompressor == nil {
+		if haveDict {
+			z.decompressor = flatex.NewReaderDict(z.r, dict)
+		} else {
+			z.decompressor = flatex.NewReader(z.r)
+		}
+	} else {
+		z.err = z.decompressor.(flatex.Resetter).Reset(z.r, dict)
+		if z.err != nil {
+			return z.err
+		}
+	}
+	z.digest = adler32.New()
+	return nil
+}
-- 
cgit v1.3.1-10-gc9f91