diff options
| author | 2025-11-19 08:00:00 +0800 | |
|---|---|---|
| committer | 2025-11-19 08:00:00 +0800 | |
| commit | ed0a113f034aa42aea23471c4bc0d7af159b7002 (patch) | |
| tree | 7e828011b9e213499ce382eb17e2552da6e48de4 /internal/flate | |
| parent | Remove some redundant code (diff) | |
| signature | No signature | |
Probably should name the custom packages specially
Diffstat (limited to 'internal/flate')
| -rw-r--r-- | internal/flate/LICENSE | 27 | ||||
| -rw-r--r-- | internal/flate/decompress_bytes.go | 64 | ||||
| -rw-r--r-- | internal/flate/decompress_test.go | 76 | ||||
| -rw-r--r-- | internal/flate/dict_decoder.go | 182 | ||||
| -rw-r--r-- | internal/flate/inflate.go | 839 | ||||
| -rw-r--r-- | internal/flate/slice_inflate.go | 479 |
6 files changed, 0 insertions, 1667 deletions
diff --git a/internal/flate/LICENSE b/internal/flate/LICENSE deleted file mode 100644 index 2a7cf70d..00000000 --- a/internal/flate/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright 2009 The Go Authors. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google LLC nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/flate/decompress_bytes.go b/internal/flate/decompress_bytes.go deleted file mode 100644 index 18fe21fc..00000000 --- a/internal/flate/decompress_bytes.go +++ /dev/null @@ -1,64 +0,0 @@ -package flate - -import ( - "io" - "sync" - - "git.sr.ht/~runxiyu/furgit/internal/bufpool" -) - -// bufferDecompressor wraps the custom slice inflater so byte-slice -// decompressions avoid repeated allocations. -type bufferDecompressor struct { - inflater sliceInflater -} - -var bufferDecompressorPool = sync.Pool{ - New: func() any { - fixedHuffmanDecoderInit() - d := &bufferDecompressor{} - d.inflater.bits = new([maxNumLit + maxNumDist]int) - d.inflater.codebits = new([numCodes]int) - return d - }, -} - -// Decompress inflates the provided DEFLATE stream and returns the full output -// in a pooled bufpool.Buffer along with the number of consumed bytes from src. -func Decompress(src []byte) (bufpool.Buffer, int, error) { - return DecompressDict(src, nil) -} - -// DecompressDict inflates the provided DEFLATE stream using dict as the preset -// dictionary and returns the full output in a pooled bufpool.Buffer. The second -// returned value reports how many bytes of src were consumed. -func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, int, error) { - d := bufferDecompressorPool.Get().(*bufferDecompressor) - defer bufferDecompressorPool.Put(d) - - if err := d.inflater.reset(src, dict); err != nil { - return bufpool.Buffer{}, 0, err - } - - out := bufpool.Borrow(bufpool.DefaultBufferCap) - out.Resize(0) - - for { - if len(d.inflater.toRead) > 0 { - out.Append(d.inflater.toRead) - d.inflater.toRead = nil - continue - } - if d.inflater.err != nil { - if d.inflater.err == io.EOF { - return out, d.inflater.pos, nil - } - out.Release() - return bufpool.Buffer{}, 0, d.inflater.err - } - d.inflater.step(&d.inflater) - if d.inflater.err != nil && len(d.inflater.toRead) == 0 { - d.inflater.toRead = d.inflater.dict.readFlush() - } - } -} diff --git a/internal/flate/decompress_test.go b/internal/flate/decompress_test.go deleted file mode 100644 index 089159d6..00000000 --- a/internal/flate/decompress_test.go +++ /dev/null @@ -1,76 +0,0 @@ -package flate - -import ( - "bytes" - stdflate "compress/flate" - "testing" -) - -func compressDeflate(t *testing.T, payload, dict []byte) []byte { - t.Helper() - var buf bytes.Buffer - var ( - w *stdflate.Writer - err error - ) - if dict != nil { - w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict) - } else { - w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression) - } - if err != nil { - t.Fatalf("NewWriter: %v", err) - } - if _, err := w.Write(payload); err != nil { - t.Fatalf("Write: %v", err) - } - if err := w.Close(); err != nil { - t.Fatalf("Close: %v", err) - } - return buf.Bytes() -} - -func TestDecompress(t *testing.T) { - payload := bytes.Repeat([]byte("golang"), 32) - compressed := compressDeflate(t, payload, nil) - - out, _, err := Decompress(compressed) - if err != nil { - t.Fatalf("Decompress: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload: got %q", out.Bytes()) - } -} - -func TestDecompressDict(t *testing.T) { - dict := []byte("furgit dictionary payload") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...) - - compressed := compressDeflate(t, payload, dict) - - out, _, err := DecompressDict(compressed, dict) - if err != nil { - t.Fatalf("DecompressDict: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload: got %q", out.Bytes()) - } -} - -func TestDecompressDictMissing(t *testing.T) { - dict := []byte("shared prefix to enforce dictionary usage") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" trailing data to force reference")...) - - compressed := compressDeflate(t, payload, dict) - - if _, _, err := Decompress(compressed); err == nil { - t.Fatalf("expected error when dictionary missing") - } -} diff --git a/internal/flate/dict_decoder.go b/internal/flate/dict_decoder.go deleted file mode 100644 index d2c19040..00000000 --- a/internal/flate/dict_decoder.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// dictDecoder implements the LZ77 sliding dictionary as used in decompression. -// LZ77 decompresses data through sequences of two forms of commands: -// -// - Literal insertions: Runs of one or more symbols are inserted into the data -// stream as is. This is accomplished through the writeByte method for a -// single symbol, or combinations of writeSlice/writeMark for multiple symbols. -// Any valid stream must start with a literal insertion if no preset dictionary -// is used. -// -// - Backward copies: Runs of one or more symbols are copied from previously -// emitted data. Backward copies come as the tuple (dist, length) where dist -// determines how far back in the stream to copy from and length determines how -// many bytes to copy. Note that it is valid for the length to be greater than -// the distance. Since LZ77 uses forward copies, that situation is used to -// perform a form of run-length encoding on repeated runs of symbols. -// The writeCopy and tryWriteCopy are used to implement this command. -// -// For performance reasons, this implementation performs little to no sanity -// checks about the arguments. As such, the invariants documented for each -// method call must be respected. -type dictDecoder struct { - hist []byte // Sliding window history - - // Invariant: 0 <= rdPos <= wrPos <= len(hist) - wrPos int // Current output position in buffer - rdPos int // Have emitted hist[:rdPos] already - full bool // Has a full window length been written yet? -} - -// init initializes dictDecoder to have a sliding window dictionary of the given -// size. If a preset dict is provided, it will initialize the dictionary with -// the contents of dict. -func (dd *dictDecoder) init(size int, dict []byte) { - *dd = dictDecoder{hist: dd.hist} - - if cap(dd.hist) < size { - dd.hist = make([]byte, size) - } - dd.hist = dd.hist[:size] - - if len(dict) > len(dd.hist) { - dict = dict[len(dict)-len(dd.hist):] - } - dd.wrPos = copy(dd.hist, dict) - if dd.wrPos == len(dd.hist) { - dd.wrPos = 0 - dd.full = true - } - dd.rdPos = dd.wrPos -} - -// histSize reports the total amount of historical data in the dictionary. -func (dd *dictDecoder) histSize() int { - if dd.full { - return len(dd.hist) - } - return dd.wrPos -} - -// availRead reports the number of bytes that can be flushed by readFlush. -func (dd *dictDecoder) availRead() int { - return dd.wrPos - dd.rdPos -} - -// availWrite reports the available amount of output buffer space. -func (dd *dictDecoder) availWrite() int { - return len(dd.hist) - dd.wrPos -} - -// writeSlice returns a slice of the available buffer to write data to. -// -// This invariant will be kept: len(s) <= availWrite() -func (dd *dictDecoder) writeSlice() []byte { - return dd.hist[dd.wrPos:] -} - -// writeMark advances the writer pointer by cnt. -// -// This invariant must be kept: 0 <= cnt <= availWrite() -func (dd *dictDecoder) writeMark(cnt int) { - dd.wrPos += cnt -} - -// writeByte writes a single byte to the dictionary. -// -// This invariant must be kept: 0 < availWrite() -func (dd *dictDecoder) writeByte(c byte) { - dd.hist[dd.wrPos] = c - dd.wrPos++ -} - -// writeCopy copies a string at a given (dist, length) to the output. -// This returns the number of bytes copied and may be less than the requested -// length if the available space in the output buffer is too small. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) writeCopy(dist, length int) int { - dstBase := dd.wrPos - dstPos := dstBase - srcPos := dstPos - dist - endPos := dstPos + length - if endPos > len(dd.hist) { - endPos = len(dd.hist) - } - - // Copy non-overlapping section after destination position. - // - // This section is non-overlapping in that the copy length for this section - // is always less than or equal to the backwards distance. This can occur - // if a distance refers to data that wraps-around in the buffer. - // Thus, a backwards copy is performed here; that is, the exact bytes in - // the source prior to the copy is placed in the destination. - if srcPos < 0 { - srcPos += len(dd.hist) - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) - srcPos = 0 - } - - // Copy possibly overlapping section before destination position. - // - // This section can overlap if the copy length for this section is larger - // than the backwards distance. This is allowed by LZ77 so that repeated - // strings can be succinctly represented using (dist, length) pairs. - // Thus, a forwards copy is performed here; that is, the bytes copied is - // possibly dependent on the resulting bytes in the destination as the copy - // progresses along. This is functionally equivalent to the following: - // - // for i := 0; i < endPos-dstPos; i++ { - // dd.hist[dstPos+i] = dd.hist[srcPos+i] - // } - // dstPos = endPos - // - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// tryWriteCopy tries to copy a string at a given (distance, length) to the -// output. This specialized version is optimized for short distances. -// -// This method is designed to be inlined for performance reasons. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) tryWriteCopy(dist, length int) int { - dstPos := dd.wrPos - endPos := dstPos + length - if dstPos < dist || endPos > len(dd.hist) { - return 0 - } - dstBase := dstPos - srcPos := dstPos - dist - - // Copy possibly overlapping section before destination position. - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// readFlush returns a slice of the historical buffer that is ready to be -// emitted to the user. The data returned by readFlush must be fully consumed -// before calling any other dictDecoder methods. -func (dd *dictDecoder) readFlush() []byte { - toRead := dd.hist[dd.rdPos:dd.wrPos] - dd.rdPos = dd.wrPos - if dd.wrPos == len(dd.hist) { - dd.wrPos, dd.rdPos = 0, 0 - dd.full = true - } - return toRead -} diff --git a/internal/flate/inflate.go b/internal/flate/inflate.go deleted file mode 100644 index cfcff084..00000000 --- a/internal/flate/inflate.go +++ /dev/null @@ -1,839 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package flate implements the DEFLATE compressed data format, described in -// RFC 1951. The [compress/gzip] and [compress/zlib] packages implement access -// to DEFLATE-based file formats. -package flate - -import ( - "bufio" - "io" - "math/bits" - "strconv" - "sync" -) - -const ( - // The special code used to mark the end of a block. - endBlockMarker = 256 - maxCodeLen = 16 // max length of Huffman code - maxMatchOffset = 1 << 15 // The largest match offset - // The next three numbers come from the RFC section 3.2.7, with the - // additional proviso in section 3.2.5 which implies that distance codes - // 30 and 31 should never occur in compressed data. - maxNumLit = 286 - maxNumDist = 30 - numCodes = 19 // number of codes in Huffman meta-code -) - -// Initialize the fixedHuffmanDecoder only once upon first use. -var fixedOnce sync.Once -var fixedHuffmanDecoder huffmanDecoder - -// A CorruptInputError reports the presence of corrupt input at a given offset. -type CorruptInputError int64 - -func (e CorruptInputError) Error() string { - return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) -} - -// An InternalError reports an error in the flate code itself. -type InternalError string - -func (e InternalError) Error() string { return "flate: internal error: " + string(e) } - -// A ReadError reports an error encountered while reading input. -// -// Deprecated: No longer returned. -type ReadError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Read -} - -func (e *ReadError) Error() string { - return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} - -// A WriteError reports an error encountered while writing output. -// -// Deprecated: No longer returned. -type WriteError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Write -} - -func (e *WriteError) Error() string { - return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} - -// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict] -// to switch to a new underlying [Reader]. This permits reusing a ReadCloser -// instead of allocating a new one. -type Resetter interface { - // Reset discards any buffered data and resets the Resetter as if it was - // newly initialized with the given reader. - Reset(r io.Reader, dict []byte) error -} - -// The data structure for decoding Huffman tables is based on that of -// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), -// For codes smaller than the table width, there are multiple entries -// (each combination of trailing bits has the same value). For codes -// larger than the table width, the table contains a link to an overflow -// table. The width of each entry in the link table is the maximum code -// size minus the chunk width. -// -// Note that you can do a lookup in the table even without all bits -// filled. Since the extra bits are zero, and the DEFLATE Huffman codes -// have the property that shorter codes come before longer ones, the -// bit length estimate in the result is a lower bound on the actual -// number of bits. -// -// See the following: -// https://github.com/madler/zlib/raw/master/doc/algorithm.txt - -// chunk & 15 is number of bits -// chunk >> 4 is value, including table link - -const ( - huffmanChunkBits = 9 - huffmanNumChunks = 1 << huffmanChunkBits - huffmanCountMask = 15 - huffmanValueShift = 4 -) - -type huffmanDecoder struct { - min int // the minimum code length - chunks [huffmanNumChunks]uint32 // chunks as described above - links [][]uint32 // overflow links - linkMask uint32 // mask the width of the link table -} - -// Initialize Huffman decoding tables from array of code lengths. -// Following this function, h is guaranteed to be initialized into a complete -// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a -// degenerate case where the tree has only a single symbol with length 1. Empty -// trees are permitted. -func (h *huffmanDecoder) init(lengths []int) bool { - // Sanity enables additional runtime tests during Huffman - // table construction. It's intended to be used during - // development to supplement the currently ad-hoc unit tests. - const sanity = false - - if h.min != 0 { - *h = huffmanDecoder{} - } - - // Count number of codes of each length, - // compute min and max length. - var count [maxCodeLen]int - var min, max int - for _, n := range lengths { - if n == 0 { - continue - } - if min == 0 || n < min { - min = n - } - if n > max { - max = n - } - count[n]++ - } - - // Empty tree. The decompressor.huffSym function will fail later if the tree - // is used. Technically, an empty tree is only valid for the HDIST tree and - // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree - // is guaranteed to fail since it will attempt to use the tree to decode the - // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is - // guaranteed to fail later since the compressed data section must be - // composed of at least one symbol (the end-of-block marker). - if max == 0 { - return true - } - - code := 0 - var nextcode [maxCodeLen]int - for i := min; i <= max; i++ { - code <<= 1 - nextcode[i] = code - code += count[i] - } - - // Check that the coding is complete (i.e., that we've - // assigned all 2-to-the-max possible bit sequences). - // Exception: To be compatible with zlib, we also need to - // accept degenerate single-code codings. See also - // TestDegenerateHuffmanCoding. - if code != 1<<uint(max) && (code != 1 || max != 1) { - return false - } - - h.min = min - if max > huffmanChunkBits { - numLinks := 1 << (uint(max) - huffmanChunkBits) - h.linkMask = uint32(numLinks - 1) - - // create link tables - link := nextcode[huffmanChunkBits+1] >> 1 - h.links = make([][]uint32, huffmanNumChunks-link) - for j := uint(link); j < huffmanNumChunks; j++ { - reverse := int(bits.Reverse16(uint16(j))) - reverse >>= uint(16 - huffmanChunkBits) - off := j - uint(link) - if sanity && h.chunks[reverse] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1)) - h.links[off] = make([]uint32, numLinks) - } - } - - for i, n := range lengths { - if n == 0 { - continue - } - code := nextcode[n] - nextcode[n]++ - chunk := uint32(i<<huffmanValueShift | n) - reverse := int(bits.Reverse16(uint16(code))) - reverse >>= uint(16 - n) - if n <= huffmanChunkBits { - for off := reverse; off < len(h.chunks); off += 1 << uint(n) { - // We should never need to overwrite - // an existing chunk. Also, 0 is - // never a valid chunk, because the - // lower 4 "count" bits should be - // between 1 and 15. - if sanity && h.chunks[off] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[off] = chunk - } - } else { - j := reverse & (huffmanNumChunks - 1) - if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { - // Longer codes should have been - // associated with a link table above. - panic("impossible: not an indirect chunk") - } - value := h.chunks[j] >> huffmanValueShift - linktab := h.links[value] - reverse >>= huffmanChunkBits - for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { - if sanity && linktab[off] != 0 { - panic("impossible: overwriting existing chunk") - } - linktab[off] = chunk - } - } - } - - if sanity { - // Above we've sanity checked that we never overwrote - // an existing entry. Here we additionally check that - // we filled the tables completely. - for i, chunk := range h.chunks { - if chunk == 0 { - // As an exception, in the degenerate - // single-code case, we allow odd - // chunks to be missing. - if code == 1 && i%2 == 1 { - continue - } - panic("impossible: missing chunk") - } - } - for _, linktab := range h.links { - for _, chunk := range linktab { - if chunk == 0 { - panic("impossible: missing chunk") - } - } - } - } - - return true -} - -// The actual read interface needed by [NewReader]. -// If the passed in [io.Reader] does not also have ReadByte, -// the [NewReader] will introduce its own buffering. -type Reader interface { - io.Reader - io.ByteReader -} - -// Decompress state. -type decompressor struct { - // Input source. - r Reader - rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader - roffset int64 - - // Input bits, in top of b. - b uint32 - nb uint - - // Huffman decoders for literal/length, distance. - h1, h2 huffmanDecoder - - // Length arrays used to define Huffman codes. - bits *[maxNumLit + maxNumDist]int - codebits *[numCodes]int - - // Output history, buffer. - dict dictDecoder - - // Temporary buffer (avoids repeated allocation). - buf [4]byte - - // Next step in the decompression, - // and decompression state. - step func(*decompressor) - stepState int - final bool - err error - toRead []byte - hl, hd *huffmanDecoder - copyLen int - copyDist int -} - -func (f *decompressor) nextBlock() { - for f.nb < 1+2 { - if f.err = f.moreBits(); f.err != nil { - return - } - } - f.final = f.b&1 == 1 - f.b >>= 1 - typ := f.b & 3 - f.b >>= 2 - f.nb -= 1 + 2 - switch typ { - case 0: - f.dataBlock() - case 1: - // compressed, fixed Huffman tables - f.hl = &fixedHuffmanDecoder - f.hd = nil - f.huffmanBlock() - case 2: - // compressed, dynamic Huffman tables - if f.err = f.readHuffman(); f.err != nil { - break - } - f.hl = &f.h1 - f.hd = &f.h2 - f.huffmanBlock() - default: - // 3 is reserved. - f.err = CorruptInputError(f.roffset) - } -} - -func (f *decompressor) Read(b []byte) (int, error) { - for { - if len(f.toRead) > 0 { - n := copy(b, f.toRead) - f.toRead = f.toRead[n:] - if len(f.toRead) == 0 { - return n, f.err - } - return n, nil - } - if f.err != nil { - return 0, f.err - } - f.step(f) - if f.err != nil && len(f.toRead) == 0 { - f.toRead = f.dict.readFlush() // Flush what's left in case of error - } - } -} - -func (f *decompressor) Close() error { - if f.err == io.EOF { - return nil - } - return f.err -} - -// RFC 1951 section 3.2.7. -// Compression with dynamic Huffman codes - -var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} - -func (f *decompressor) readHuffman() error { - // HLIT[5], HDIST[5], HCLEN[4]. - for f.nb < 5+5+4 { - if err := f.moreBits(); err != nil { - return err - } - } - nlit := int(f.b&0x1F) + 257 - if nlit > maxNumLit { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - ndist := int(f.b&0x1F) + 1 - if ndist > maxNumDist { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - nclen := int(f.b&0xF) + 4 - // numCodes is 19, so nclen is always valid. - f.b >>= 4 - f.nb -= 5 + 5 + 4 - - // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. - for i := 0; i < nclen; i++ { - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - f.codebits[codeOrder[i]] = int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - } - for i := nclen; i < len(codeOrder); i++ { - f.codebits[codeOrder[i]] = 0 - } - if !f.h1.init(f.codebits[0:]) { - return CorruptInputError(f.roffset) - } - - // HLIT + 257 code lengths, HDIST + 1 code lengths, - // using the code length Huffman code. - for i, n := 0, nlit+ndist; i < n; { - x, err := f.huffSym(&f.h1) - if err != nil { - return err - } - if x < 16 { - // Actual length. - f.bits[i] = x - i++ - continue - } - // Repeat previous length or zero. - var rep int - var nb uint - var b int - switch x { - default: - return InternalError("unexpected length code") - case 16: - rep = 3 - nb = 2 - if i == 0 { - return CorruptInputError(f.roffset) - } - b = f.bits[i-1] - case 17: - rep = 3 - nb = 3 - b = 0 - case 18: - rep = 11 - nb = 7 - b = 0 - } - for f.nb < nb { - if err := f.moreBits(); err != nil { - return err - } - } - rep += int(f.b & uint32(1<<nb-1)) - f.b >>= nb - f.nb -= nb - if i+rep > n { - return CorruptInputError(f.roffset) - } - for j := 0; j < rep; j++ { - f.bits[i] = b - i++ - } - } - - if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { - return CorruptInputError(f.roffset) - } - - // As an optimization, we can initialize the min bits to read at a time - // for the HLIT tree to the length of the EOB marker since we know that - // every block must terminate with one. This preserves the property that - // we never read any extra bytes after the end of the DEFLATE stream. - if f.h1.min < f.bits[endBlockMarker] { - f.h1.min = f.bits[endBlockMarker] - } - - return nil -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBlock() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - v, err := f.huffSym(f.hl) - if err != nil { - f.err = err - return - } - var n uint // number of bits extra - var length int - switch { - case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlock - f.stepState = stateInit - return - } - goto readLiteral - case v == 256: - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 - case v < maxNumLit: - length = 258 - n = 0 - default: - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - length += int(f.b & uint32(1<<n-1)) - f.b >>= n - f.nb -= n - } - - var dist int - if f.hd == nil { - for f.nb < 5 { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 - } else { - if dist, err = f.huffSym(f.hd); err != nil { - f.err = err - return - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << nb - for f.nb < nb { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - extra |= int(f.b & uint32(1<<nb-1)) - f.b >>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra - default: - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, dist - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlock // We need to continue this work - f.stepState = stateDict - return - } - goto readLiteral - } -} - -// Copy a single uncompressed data block from input to output. -func (f *decompressor) dataBlock() { - // Uncompressed. - // Discard current half-byte. - f.nb = 0 - f.b = 0 - - // Length then ones-complement of length. - nr, err := io.ReadFull(f.r, f.buf[0:4]) - f.roffset += int64(nr) - if err != nil { - f.err = noEOF(err) - return - } - n := int(f.buf[0]) | int(f.buf[1])<<8 - nn := int(f.buf[2]) | int(f.buf[3])<<8 - if uint16(nn) != uint16(^n) { - f.err = CorruptInputError(f.roffset) - return - } - - if n == 0 { - f.toRead = f.dict.readFlush() - f.finishBlock() - return - } - - f.copyLen = n - f.copyData() -} - -// copyData copies f.copyLen bytes from the underlying reader into f.hist. -// It pauses for reads when f.hist is full. -func (f *decompressor) copyData() { - buf := f.dict.writeSlice() - if len(buf) > f.copyLen { - buf = buf[:f.copyLen] - } - - cnt, err := io.ReadFull(f.r, buf) - f.roffset += int64(cnt) - f.copyLen -= cnt - f.dict.writeMark(cnt) - if err != nil { - f.err = noEOF(err) - return - } - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).copyData - return - } - f.finishBlock() -} - -func (f *decompressor) finishBlock() { - if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() - } - f.err = io.EOF - } - f.step = (*decompressor).nextBlock -} - -// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. -func noEOF(e error) error { - if e == io.EOF { - return io.ErrUnexpectedEOF - } - return e -} - -func (f *decompressor) moreBits() error { - c, err := f.r.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 - return nil -} - -// Read the next Huffman-encoded symbol from f according to h. -func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(h.min) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b - for { - for nb < n { - c, err := f.r.ReadByte() - if err != nil { - f.b = b - f.nb = nb - return 0, noEOF(err) - } - f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 - } - chunk := h.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - f.err = CorruptInputError(f.roffset) - return 0, f.err - } - f.b = b >> (n & 31) - f.nb = nb - n - return int(chunk >> huffmanValueShift), nil - } - } -} - -func (f *decompressor) makeReader(r io.Reader) { - if rr, ok := r.(Reader); ok { - f.rBuf = nil - f.r = rr - return - } - // Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor. - if f.rBuf != nil { - f.rBuf.Reset(r) - } else { - // bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader. - f.rBuf = bufio.NewReader(r) - } - f.r = f.rBuf -} - -func fixedHuffmanDecoderInit() { - fixedOnce.Do(func() { - // These come from the RFC section 3.2.6. - var bits [288]int - for i := 0; i < 144; i++ { - bits[i] = 8 - } - for i := 144; i < 256; i++ { - bits[i] = 9 - } - for i := 256; i < 280; i++ { - bits[i] = 7 - } - for i := 280; i < 288; i++ { - bits[i] = 8 - } - fixedHuffmanDecoder.init(bits[:]) - }) -} - -func (f *decompressor) Reset(r io.Reader, dict []byte) error { - *f = decompressor{ - rBuf: f.rBuf, - bits: f.bits, - codebits: f.codebits, - dict: f.dict, - step: (*decompressor).nextBlock, - } - f.makeReader(r) - f.dict.init(maxMatchOffset, dict) - return nil -} - -// NewReader returns a new ReadCloser that can be used -// to read the uncompressed version of r. -// If r does not also implement [io.ByteReader], -// the decompressor may read more data than necessary from r. -// The reader returns [io.EOF] after the final block in the DEFLATE stream has -// been encountered. Any trailing data after the final block is ignored. -// -// The [io.ReadCloser] returned by NewReader also implements [Resetter]. -func NewReader(r io.Reader) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, nil) - return &f -} - -// NewReaderDict is like [NewReader] but initializes the reader -// with a preset dictionary. The returned reader behaves as if -// the uncompressed data stream started with the given dictionary, -// which has already been read. NewReaderDict is typically used -// to read data compressed by [NewWriterDict]. -// -// The ReadCloser returned by NewReaderDict also implements [Resetter]. -func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, dict) - return &f -} diff --git a/internal/flate/slice_inflate.go b/internal/flate/slice_inflate.go deleted file mode 100644 index 0df4ed45..00000000 --- a/internal/flate/slice_inflate.go +++ /dev/null @@ -1,479 +0,0 @@ -package flate - -import ( - "io" - "math/bits" -) - -// sliceInflater is a specialized DEFLATE decoder that reads directly from an -// in-memory byte slice. It mirrors the main decompressor but avoids the -// overhead of the Reader interfaces, enabling faster byte-slice decoding. -type sliceInflater struct { - input []byte - pos int - roffset int64 - - b uint32 - nb uint - - h1, h2 huffmanDecoder - - bits *[maxNumLit + maxNumDist]int - codebits *[numCodes]int - - dict dictDecoder - - toRead []byte - step func(*sliceInflater) - stepState int - final bool - err error - hl, hd *huffmanDecoder - copyLen int - copyDist int -} - -func (f *sliceInflater) reset(src []byte, dict []byte) error { - bits := f.bits - codebits := f.codebits - dictState := f.dict - *f = sliceInflater{ - input: src, - bits: bits, - codebits: codebits, - dict: dictState, - step: (*sliceInflater).nextBlock, - } - f.dict.init(maxMatchOffset, dict) - return nil -} - -func (f *sliceInflater) readByte() (byte, error) { - if f.pos >= len(f.input) { - return 0, io.ErrUnexpectedEOF - } - b := f.input[f.pos] - f.pos++ - f.roffset++ - return b, nil -} - -func (f *sliceInflater) readBytes(n int) ([]byte, error) { - if n < 0 || f.pos+n > len(f.input) { - f.pos = len(f.input) - return nil, io.ErrUnexpectedEOF - } - s := f.input[f.pos : f.pos+n] - f.pos += n - f.roffset += int64(n) - return s, nil -} - -func (f *sliceInflater) nextBlock() { - for f.nb < 1+2 { - if err := f.moreBits(); err != nil { - f.err = err - return - } - } - f.final = f.b&1 == 1 - f.b >>= 1 - typ := f.b & 3 - f.b >>= 2 - f.nb -= 1 + 2 - switch typ { - case 0: - f.dataBlock() - case 1: - f.hl = &fixedHuffmanDecoder - f.hd = nil - f.huffmanBlock() - case 2: - if err := f.readHuffman(); err != nil { - f.err = err - return - } - f.hl = &f.h1 - f.hd = &f.h2 - f.huffmanBlock() - default: - f.err = CorruptInputError(f.roffset) - } -} - -func (f *sliceInflater) huffmanBlock() { - const ( - stateInit = iota - stateDict - ) - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - { - v, err := f.huffSym(f.hl) - if err != nil { - f.err = err - return - } - var n uint - var length int - switch { - case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() - f.step = (*sliceInflater).huffmanBlock - f.stepState = stateInit - return - } - goto readLiteral - case v == 256: - f.finishBlock() - return - case v < 265: - length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 - case v < maxNumLit: - length = 258 - n = 0 - default: - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - length += int(f.b & uint32(1<<n-1)) - f.b >>= n - f.nb -= n - } - - var dist int - if f.hd == nil { - for f.nb < 5 { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 - } else { - if dist, err = f.huffSym(f.hd); err != nil { - f.err = err - return - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - extra := (dist & 1) << nb - for f.nb < nb { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - extra |= int(f.b & uint32(1<<nb-1)) - f.b >>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra - default: - f.err = CorruptInputError(f.roffset) - return - } - - if dist > f.dict.histSize() { - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, dist - goto copyHistory - } - -copyHistory: - { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*sliceInflater).huffmanBlock - f.stepState = stateDict - return - } - goto readLiteral - } -} - -func (f *sliceInflater) dataBlock() { - f.nb = 0 - f.b = 0 - - hdr, err := f.readBytes(4) - if err != nil { - f.err = err - return - } - n := int(hdr[0]) | int(hdr[1])<<8 - nn := int(hdr[2]) | int(hdr[3])<<8 - if uint16(nn) != uint16(^n) { - f.err = CorruptInputError(f.roffset) - return - } - - if n == 0 { - f.toRead = f.dict.readFlush() - f.finishBlock() - return - } - - f.copyLen = n - f.copyData() -} - -func (f *sliceInflater) copyData() { - for { - if f.copyLen == 0 { - f.finishBlock() - return - } - buf := f.dict.writeSlice() - if len(buf) == 0 { - f.toRead = f.dict.readFlush() - f.step = (*sliceInflater).copyData - return - } - n := f.copyLen - if n > len(buf) { - n = len(buf) - } - if f.pos+n > len(f.input) { - f.err = io.ErrUnexpectedEOF - return - } - copy(buf[:n], f.input[f.pos:f.pos+n]) - f.pos += n - f.roffset += int64(n) - f.copyLen -= n - f.dict.writeMark(n) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() - f.step = (*sliceInflater).copyData - return - } - } -} - -func (f *sliceInflater) finishBlock() { - if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() - } - f.err = io.EOF - } - f.step = (*sliceInflater).nextBlock - f.stepState = 0 -} - -func (f *sliceInflater) moreBits() error { - c, err := f.readByte() - if err != nil { - return err - } - f.b |= uint32(c) << (f.nb & 31) - f.nb += 8 - return nil -} - -func (f *sliceInflater) huffSym(h *huffmanDecoder) (int, error) { - n := uint(h.min) - nb, b := f.nb, f.b - for { - for nb < n { - c, err := f.readByte() - if err != nil { - f.b = b - f.nb = nb - return 0, err - } - b |= uint32(c) << (nb & 31) - nb += 8 - } - chunk := h.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - f.err = CorruptInputError(f.roffset) - return 0, f.err - } - f.b = b >> (n & 31) - f.nb = nb - n - return int(chunk >> huffmanValueShift), nil - } - } -} - -func (f *sliceInflater) readHuffman() error { - for f.nb < 5+5+4 { - if err := f.moreBits(); err != nil { - return err - } - } - nlit := int(f.b&0x1F) + 257 - if nlit > maxNumLit { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - ndist := int(f.b&0x1F) + 1 - if ndist > maxNumDist { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - nclen := int(f.b&0xF) + 4 - f.b >>= 4 - f.nb -= 5 + 5 + 4 - codebits := f.codebits[:] - bits := f.bits[:] - for i := range codebits { - codebits[i] = 0 - } - for i := range bits { - bits[i] = 0 - } - for i := 0; i < nclen; i++ { - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - codebits[codeOrder[i]] = int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - } - if !f.h1.init(codebits) { - return CorruptInputError(f.roffset) - } - for i := range bits { - bits[i] = 0 - } - i := 0 - for i < nlit+ndist { - x, err := f.huffSym(&f.h1) - if err != nil { - return err - } - switch { - case x < 16: - bits[i] = x - i++ - case x == 16: - if i == 0 { - return CorruptInputError(f.roffset) - } - repeat := 3 - for f.nb < 2 { - if err := f.moreBits(); err != nil { - return err - } - } - repeat += int(f.b & 0x3) - f.b >>= 2 - f.nb -= 2 - for repeat > 0 { - if i >= len(bits) { - return CorruptInputError(f.roffset) - } - bits[i] = bits[i-1] - i++ - repeat-- - } - case x == 17: - repeat := 3 - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - repeat += int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - for repeat > 0 { - if i >= len(bits) { - return CorruptInputError(f.roffset) - } - bits[i] = 0 - i++ - repeat-- - } - case x == 18: - repeat := 11 - for f.nb < 7 { - if err := f.moreBits(); err != nil { - return err - } - } - repeat += int(f.b & 0x7F) - f.b >>= 7 - f.nb -= 7 - for repeat > 0 { - if i >= len(bits) { - return CorruptInputError(f.roffset) - } - bits[i] = 0 - i++ - repeat-- - } - default: - return CorruptInputError(f.roffset) - } - } - if !f.h1.init(bits[:nlit]) { - return CorruptInputError(f.roffset) - } - if !f.h2.init(bits[nlit : nlit+ndist]) { - return CorruptInputError(f.roffset) - } - if f.h1.min < bits[endBlockMarker] { - f.h1.min = bits[endBlockMarker] - } - return nil -} |
