diff options
| author | 2025-11-19 08:00:00 +0800 | |
|---|---|---|
| committer | 2025-11-19 08:00:00 +0800 | |
| commit | ed0a113f034aa42aea23471c4bc0d7af159b7002 (patch) | |
| tree | 7e828011b9e213499ce382eb17e2552da6e48de4 /internal/flatex | |
| parent | Remove some redundant code (diff) | |
| signature | No signature | |
Probably should name the custom packages specially
Diffstat (limited to 'internal/flatex')
| -rw-r--r-- | internal/flatex/LICENSE | 27 | ||||
| -rw-r--r-- | internal/flatex/decompress_bytes.go | 64 | ||||
| -rw-r--r-- | internal/flatex/decompress_test.go | 76 | ||||
| -rw-r--r-- | internal/flatex/dict_decoder.go | 182 | ||||
| -rw-r--r-- | internal/flatex/inflate.go | 839 | ||||
| -rw-r--r-- | internal/flatex/slice_inflate.go | 479 |
6 files changed, 1667 insertions, 0 deletions
diff --git a/internal/flatex/LICENSE b/internal/flatex/LICENSE new file mode 100644 index 00000000..2a7cf70d --- /dev/null +++ b/internal/flatex/LICENSE @@ -0,0 +1,27 @@ +Copyright 2009 The Go Authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/flatex/decompress_bytes.go b/internal/flatex/decompress_bytes.go new file mode 100644 index 00000000..5a660b0a --- /dev/null +++ b/internal/flatex/decompress_bytes.go @@ -0,0 +1,64 @@ +package flatex + +import ( + "io" + "sync" + + "git.sr.ht/~runxiyu/furgit/internal/bufpool" +) + +// bufferDecompressor wraps the custom slice inflater so byte-slice +// decompressions avoid repeated allocations. +type bufferDecompressor struct { + inflater sliceInflater +} + +var bufferDecompressorPool = sync.Pool{ + New: func() any { + fixedHuffmanDecoderInit() + d := &bufferDecompressor{} + d.inflater.bits = new([maxNumLit + maxNumDist]int) + d.inflater.codebits = new([numCodes]int) + return d + }, +} + +// Decompress inflates the provided DEFLATE stream and returns the full output +// in a pooled bufpool.Buffer along with the number of consumed bytes from src. +func Decompress(src []byte) (bufpool.Buffer, int, error) { + return DecompressDict(src, nil) +} + +// DecompressDict inflates the provided DEFLATE stream using dict as the preset +// dictionary and returns the full output in a pooled bufpool.Buffer. The second +// returned value reports how many bytes of src were consumed. +func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, int, error) { + d := bufferDecompressorPool.Get().(*bufferDecompressor) + defer bufferDecompressorPool.Put(d) + + if err := d.inflater.reset(src, dict); err != nil { + return bufpool.Buffer{}, 0, err + } + + out := bufpool.Borrow(bufpool.DefaultBufferCap) + out.Resize(0) + + for { + if len(d.inflater.toRead) > 0 { + out.Append(d.inflater.toRead) + d.inflater.toRead = nil + continue + } + if d.inflater.err != nil { + if d.inflater.err == io.EOF { + return out, d.inflater.pos, nil + } + out.Release() + return bufpool.Buffer{}, 0, d.inflater.err + } + d.inflater.step(&d.inflater) + if d.inflater.err != nil && len(d.inflater.toRead) == 0 { + d.inflater.toRead = d.inflater.dict.readFlush() + } + } +} diff --git a/internal/flatex/decompress_test.go b/internal/flatex/decompress_test.go new file mode 100644 index 00000000..afb84292 --- /dev/null +++ b/internal/flatex/decompress_test.go @@ -0,0 +1,76 @@ +package flatex + +import ( + "bytes" + stdflate "compress/flate" + "testing" +) + +func compressDeflate(t *testing.T, payload, dict []byte) []byte { + t.Helper() + var buf bytes.Buffer + var ( + w *stdflate.Writer + err error + ) + if dict != nil { + w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict) + } else { + w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression) + } + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + if _, err := w.Write(payload); err != nil { + t.Fatalf("Write: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + return buf.Bytes() +} + +func TestDecompress(t *testing.T) { + payload := bytes.Repeat([]byte("golang"), 32) + compressed := compressDeflate(t, payload, nil) + + out, _, err := Decompress(compressed) + if err != nil { + t.Fatalf("Decompress: %v", err) + } + defer out.Release() + + if !bytes.Equal(out.Bytes(), payload) { + t.Fatalf("unexpected payload: got %q", out.Bytes()) + } +} + +func TestDecompressDict(t *testing.T) { + dict := []byte("furgit dictionary payload") + payload := append([]byte(nil), dict...) + payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...) + + compressed := compressDeflate(t, payload, dict) + + out, _, err := DecompressDict(compressed, dict) + if err != nil { + t.Fatalf("DecompressDict: %v", err) + } + defer out.Release() + + if !bytes.Equal(out.Bytes(), payload) { + t.Fatalf("unexpected payload: got %q", out.Bytes()) + } +} + +func TestDecompressDictMissing(t *testing.T) { + dict := []byte("shared prefix to enforce dictionary usage") + payload := append([]byte(nil), dict...) + payload = append(payload, []byte(" trailing data to force reference")...) + + compressed := compressDeflate(t, payload, dict) + + if _, _, err := Decompress(compressed); err == nil { + t.Fatalf("expected error when dictionary missing") + } +} diff --git a/internal/flatex/dict_decoder.go b/internal/flatex/dict_decoder.go new file mode 100644 index 00000000..7a81e640 --- /dev/null +++ b/internal/flatex/dict_decoder.go @@ -0,0 +1,182 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flatex + +// dictDecoder implements the LZ77 sliding dictionary as used in decompression. +// LZ77 decompresses data through sequences of two forms of commands: +// +// - Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. +// +// - Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. +// +// For performance reasons, this implementation performs little to no sanity +// checks about the arguments. As such, the invariants documented for each +// method call must be respected. +type dictDecoder struct { + hist []byte // Sliding window history + + // Invariant: 0 <= rdPos <= wrPos <= len(hist) + wrPos int // Current output position in buffer + rdPos int // Have emitted hist[:rdPos] already + full bool // Has a full window length been written yet? +} + +// init initializes dictDecoder to have a sliding window dictionary of the given +// size. If a preset dict is provided, it will initialize the dictionary with +// the contents of dict. +func (dd *dictDecoder) init(size int, dict []byte) { + *dd = dictDecoder{hist: dd.hist} + + if cap(dd.hist) < size { + dd.hist = make([]byte, size) + } + dd.hist = dd.hist[:size] + + if len(dict) > len(dd.hist) { + dict = dict[len(dict)-len(dd.hist):] + } + dd.wrPos = copy(dd.hist, dict) + if dd.wrPos == len(dd.hist) { + dd.wrPos = 0 + dd.full = true + } + dd.rdPos = dd.wrPos +} + +// histSize reports the total amount of historical data in the dictionary. +func (dd *dictDecoder) histSize() int { + if dd.full { + return len(dd.hist) + } + return dd.wrPos +} + +// availRead reports the number of bytes that can be flushed by readFlush. +func (dd *dictDecoder) availRead() int { + return dd.wrPos - dd.rdPos +} + +// availWrite reports the available amount of output buffer space. +func (dd *dictDecoder) availWrite() int { + return len(dd.hist) - dd.wrPos +} + +// writeSlice returns a slice of the available buffer to write data to. +// +// This invariant will be kept: len(s) <= availWrite() +func (dd *dictDecoder) writeSlice() []byte { + return dd.hist[dd.wrPos:] +} + +// writeMark advances the writer pointer by cnt. +// +// This invariant must be kept: 0 <= cnt <= availWrite() +func (dd *dictDecoder) writeMark(cnt int) { + dd.wrPos += cnt +} + +// writeByte writes a single byte to the dictionary. +// +// This invariant must be kept: 0 < availWrite() +func (dd *dictDecoder) writeByte(c byte) { + dd.hist[dd.wrPos] = c + dd.wrPos++ +} + +// writeCopy copies a string at a given (dist, length) to the output. +// This returns the number of bytes copied and may be less than the requested +// length if the available space in the output buffer is too small. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) writeCopy(dist, length int) int { + dstBase := dd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := dstPos + length + if endPos > len(dd.hist) { + endPos = len(dd.hist) + } + + // Copy non-overlapping section after destination position. + // + // This section is non-overlapping in that the copy length for this section + // is always less than or equal to the backwards distance. This can occur + // if a distance refers to data that wraps-around in the buffer. + // Thus, a backwards copy is performed here; that is, the exact bytes in + // the source prior to the copy is placed in the destination. + if srcPos < 0 { + srcPos += len(dd.hist) + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) + srcPos = 0 + } + + // Copy possibly overlapping section before destination position. + // + // This section can overlap if the copy length for this section is larger + // than the backwards distance. This is allowed by LZ77 so that repeated + // strings can be succinctly represented using (dist, length) pairs. + // Thus, a forwards copy is performed here; that is, the bytes copied is + // possibly dependent on the resulting bytes in the destination as the copy + // progresses along. This is functionally equivalent to the following: + // + // for i := 0; i < endPos-dstPos; i++ { + // dd.hist[dstPos+i] = dd.hist[srcPos+i] + // } + // dstPos = endPos + // + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// tryWriteCopy tries to copy a string at a given (distance, length) to the +// output. This specialized version is optimized for short distances. +// +// This method is designed to be inlined for performance reasons. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) tryWriteCopy(dist, length int) int { + dstPos := dd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(dd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + // Copy possibly overlapping section before destination position. + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// readFlush returns a slice of the historical buffer that is ready to be +// emitted to the user. The data returned by readFlush must be fully consumed +// before calling any other dictDecoder methods. +func (dd *dictDecoder) readFlush() []byte { + toRead := dd.hist[dd.rdPos:dd.wrPos] + dd.rdPos = dd.wrPos + if dd.wrPos == len(dd.hist) { + dd.wrPos, dd.rdPos = 0, 0 + dd.full = true + } + return toRead +} diff --git a/internal/flatex/inflate.go b/internal/flatex/inflate.go new file mode 100644 index 00000000..5e9d861e --- /dev/null +++ b/internal/flatex/inflate.go @@ -0,0 +1,839 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flatex implements the DEFLATE compressed data format, described in +// RFC 1951. The [compress/gzip] and [compress/zlib] packages implement access +// to DEFLATE-based file formats. +package flatex + +import ( + "bufio" + "io" + "math/bits" + "strconv" + "sync" +) + +const ( + // The special code used to mark the end of a block. + endBlockMarker = 256 + maxCodeLen = 16 // max length of Huffman code + maxMatchOffset = 1 << 15 // The largest match offset + // The next three numbers come from the RFC section 3.2.7, with the + // additional proviso in section 3.2.5 which implies that distance codes + // 30 and 31 should never occur in compressed data. + maxNumLit = 286 + maxNumDist = 30 + numCodes = 19 // number of codes in Huffman meta-code +) + +// Initialize the fixedHuffmanDecoder only once upon first use. +var fixedOnce sync.Once +var fixedHuffmanDecoder huffmanDecoder + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) +} + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) Error() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +// +// Deprecated: No longer returned. +type ReadError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Read +} + +func (e *ReadError) Error() string { + return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// A WriteError reports an error encountered while writing output. +// +// Deprecated: No longer returned. +type WriteError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Write +} + +func (e *WriteError) Error() string { + return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict] +// to switch to a new underlying [Reader]. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. +// +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. +// +// See the following: +// https://github.com/madler/zlib/raw/master/doc/algorithm.txt + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + min int // the minimum code length + chunks [huffmanNumChunks]uint32 // chunks as described above + links [][]uint32 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. +func (h *huffmanDecoder) init(lengths []int) bool { + // Sanity enables additional runtime tests during Huffman + // table construction. It's intended to be used during + // development to supplement the currently ad-hoc unit tests. + const sanity = false + + if h.min != 0 { + *h = huffmanDecoder{} + } + + // Count number of codes of each length, + // compute min and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range lengths { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n]++ + } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). + if max == 0 { + return true + } + + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + code <<= 1 + nextcode[i] = code + code += count[i] + } + + // Check that the coding is complete (i.e., that we've + // assigned all 2-to-the-max possible bit sequences). + // Exception: To be compatible with zlib, we also need to + // accept degenerate single-code codings. See also + // TestDegenerateHuffmanCoding. + if code != 1<<uint(max) && (code != 1 || max != 1) { + return false + } + + h.min = min + if max > huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + h.links = make([][]uint32, huffmanNumChunks-link) + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(bits.Reverse16(uint16(j))) + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1)) + h.links[off] = make([]uint32, numLinks) + } + } + + for i, n := range lengths { + if n == 0 { + continue + } + code := nextcode[n] + nextcode[n]++ + chunk := uint32(i<<huffmanValueShift | n) + reverse := int(bits.Reverse16(uint16(code))) + reverse >>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { + // We should never need to overwrite + // an existing chunk. Also, 0 is + // never a valid chunk, because the + // lower 4 "count" bits should be + // between 1 and 15. + if sanity && h.chunks[off] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[off] = chunk + } + } else { + j := reverse & (huffmanNumChunks - 1) + if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { + // Longer codes should have been + // associated with a link table above. + panic("impossible: not an indirect chunk") + } + value := h.chunks[j] >> huffmanValueShift + linktab := h.links[value] + reverse >>= huffmanChunkBits + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { + if sanity && linktab[off] != 0 { + panic("impossible: overwriting existing chunk") + } + linktab[off] = chunk + } + } + } + + if sanity { + // Above we've sanity checked that we never overwrote + // an existing entry. Here we additionally check that + // we filled the tables completely. + for i, chunk := range h.chunks { + if chunk == 0 { + // As an exception, in the degenerate + // single-code case, we allow odd + // chunks to be missing. + if code == 1 && i%2 == 1 { + continue + } + panic("impossible: missing chunk") + } + } + for _, linktab := range h.links { + for _, chunk := range linktab { + if chunk == 0 { + panic("impossible: missing chunk") + } + } + } + } + + return true +} + +// The actual read interface needed by [NewReader]. +// If the passed in [io.Reader] does not also have ReadByte, +// the [NewReader] will introduce its own buffering. +type Reader interface { + io.Reader + io.ByteReader +} + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader + roffset int64 + + // Input bits, in top of b. + b uint32 + nb uint + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + // Output history, buffer. + dict dictDecoder + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + stepState int + final bool + err error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int +} + +func (f *decompressor) nextBlock() { + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlock() + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlock() + default: + // 3 is reserved. + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + if len(f.toRead) == 0 { + return n, f.err + } + return n, nil + } + if f.err != nil { + return 0, f.err + } + f.step(f) + if f.err != nil && len(f.toRead) == 0 { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + } + } +} + +func (f *decompressor) Close() error { + if f.err == io.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + // numCodes is 19, so nclen is always valid. + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + return err + } + } + rep += int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + if i+rep > n { + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + return CorruptInputError(f.roffset) + } + + // As an optimization, we can initialize the min bits to read at a time + // for the HLIT tree to the length of the EOB marker since we know that + // every block must terminate with one. This preserves the property that + // we never read any extra bytes after the end of the DEFLATE stream. + if f.h1.min < f.bits[endBlockMarker] { + f.h1.min = f.bits[endBlockMarker] + } + + return nil +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBlock() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + v, err := f.huffSym(f.hl) + if err != nil { + f.err = err + return + } + var n uint // number of bits extra + var length int + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlock + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + length += int(f.b & uint32(1<<n-1)) + f.b >>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + extra |= int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlock // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + f.nb = 0 + f.b = 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[0:4]) + f.roffset += int64(nr) + if err != nil { + f.err = noEOF(err) + return + } + n := int(f.buf[0]) | int(f.buf[1])<<8 + nn := int(f.buf[2]) | int(f.buf[3])<<8 + if uint16(nn) != uint16(^n) { + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + f.toRead = f.dict.readFlush() + f.finishBlock() + return + } + + f.copyLen = n + f.copyData() +} + +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. +func (f *decompressor) copyData() { + buf := f.dict.writeSlice() + if len(buf) > f.copyLen { + buf = buf[:f.copyLen] + } + + cnt, err := io.ReadFull(f.r, buf) + f.roffset += int64(cnt) + f.copyLen -= cnt + f.dict.writeMark(cnt) + if err != nil { + f.err = noEOF(err) + return + } + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).copyData + return + } + f.finishBlock() +} + +func (f *decompressor) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + f.err = io.EOF + } + f.step = (*decompressor).nextBlock +} + +// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. +func noEOF(e error) error { + if e == io.EOF { + return io.ErrUnexpectedEOF + } + return e +} + +func (f *decompressor) moreBits() error { + c, err := f.r.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(h.min) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + return 0, noEOF(err) + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & 31) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func (f *decompressor) makeReader(r io.Reader) { + if rr, ok := r.(Reader); ok { + f.rBuf = nil + f.r = rr + return + } + // Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor. + if f.rBuf != nil { + f.rBuf.Reset(r) + } else { + // bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader. + f.rBuf = bufio.NewReader(r) + } + f.r = f.rBuf +} + +func fixedHuffmanDecoderInit() { + fixedOnce.Do(func() { + // These come from the RFC section 3.2.6. + var bits [288]int + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + fixedHuffmanDecoder.init(bits[:]) + }) +} + +func (f *decompressor) Reset(r io.Reader, dict []byte) error { + *f = decompressor{ + rBuf: f.rBuf, + bits: f.bits, + codebits: f.codebits, + dict: f.dict, + step: (*decompressor).nextBlock, + } + f.makeReader(r) + f.dict.init(maxMatchOffset, dict) + return nil +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement [io.ByteReader], +// the decompressor may read more data than necessary from r. +// The reader returns [io.EOF] after the final block in the DEFLATE stream has +// been encountered. Any trailing data after the final block is ignored. +// +// The [io.ReadCloser] returned by NewReader also implements [Resetter]. +func NewReader(r io.Reader) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, nil) + return &f +} + +// NewReaderDict is like [NewReader] but initializes the reader +// with a preset dictionary. The returned reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by [NewWriterDict]. +// +// The ReadCloser returned by NewReaderDict also implements [Resetter]. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, dict) + return &f +} diff --git a/internal/flatex/slice_inflate.go b/internal/flatex/slice_inflate.go new file mode 100644 index 00000000..9ece08f4 --- /dev/null +++ b/internal/flatex/slice_inflate.go @@ -0,0 +1,479 @@ +package flatex + +import ( + "io" + "math/bits" +) + +// sliceInflater is a specialized DEFLATE decoder that reads directly from an +// in-memory byte slice. It mirrors the main decompressor but avoids the +// overhead of the Reader interfaces, enabling faster byte-slice decoding. +type sliceInflater struct { + input []byte + pos int + roffset int64 + + b uint32 + nb uint + + h1, h2 huffmanDecoder + + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + dict dictDecoder + + toRead []byte + step func(*sliceInflater) + stepState int + final bool + err error + hl, hd *huffmanDecoder + copyLen int + copyDist int +} + +func (f *sliceInflater) reset(src []byte, dict []byte) error { + bits := f.bits + codebits := f.codebits + dictState := f.dict + *f = sliceInflater{ + input: src, + bits: bits, + codebits: codebits, + dict: dictState, + step: (*sliceInflater).nextBlock, + } + f.dict.init(maxMatchOffset, dict) + return nil +} + +func (f *sliceInflater) readByte() (byte, error) { + if f.pos >= len(f.input) { + return 0, io.ErrUnexpectedEOF + } + b := f.input[f.pos] + f.pos++ + f.roffset++ + return b, nil +} + +func (f *sliceInflater) readBytes(n int) ([]byte, error) { + if n < 0 || f.pos+n > len(f.input) { + f.pos = len(f.input) + return nil, io.ErrUnexpectedEOF + } + s := f.input[f.pos : f.pos+n] + f.pos += n + f.roffset += int64(n) + return s, nil +} + +func (f *sliceInflater) nextBlock() { + for f.nb < 1+2 { + if err := f.moreBits(); err != nil { + f.err = err + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlock() + case 2: + if err := f.readHuffman(); err != nil { + f.err = err + return + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlock() + default: + f.err = CorruptInputError(f.roffset) + } +} + +func (f *sliceInflater) huffmanBlock() { + const ( + stateInit = iota + stateDict + ) + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + { + v, err := f.huffSym(f.hl) + if err != nil { + f.err = err + return + } + var n uint + var length int + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*sliceInflater).huffmanBlock + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + length += int(f.b & uint32(1<<n-1)) + f.b >>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + extra |= int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + f.err = CorruptInputError(f.roffset) + return + } + + if dist > f.dict.histSize() { + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*sliceInflater).huffmanBlock + f.stepState = stateDict + return + } + goto readLiteral + } +} + +func (f *sliceInflater) dataBlock() { + f.nb = 0 + f.b = 0 + + hdr, err := f.readBytes(4) + if err != nil { + f.err = err + return + } + n := int(hdr[0]) | int(hdr[1])<<8 + nn := int(hdr[2]) | int(hdr[3])<<8 + if uint16(nn) != uint16(^n) { + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + f.toRead = f.dict.readFlush() + f.finishBlock() + return + } + + f.copyLen = n + f.copyData() +} + +func (f *sliceInflater) copyData() { + for { + if f.copyLen == 0 { + f.finishBlock() + return + } + buf := f.dict.writeSlice() + if len(buf) == 0 { + f.toRead = f.dict.readFlush() + f.step = (*sliceInflater).copyData + return + } + n := f.copyLen + if n > len(buf) { + n = len(buf) + } + if f.pos+n > len(f.input) { + f.err = io.ErrUnexpectedEOF + return + } + copy(buf[:n], f.input[f.pos:f.pos+n]) + f.pos += n + f.roffset += int64(n) + f.copyLen -= n + f.dict.writeMark(n) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*sliceInflater).copyData + return + } + } +} + +func (f *sliceInflater) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + f.err = io.EOF + } + f.step = (*sliceInflater).nextBlock + f.stepState = 0 +} + +func (f *sliceInflater) moreBits() error { + c, err := f.readByte() + if err != nil { + return err + } + f.b |= uint32(c) << (f.nb & 31) + f.nb += 8 + return nil +} + +func (f *sliceInflater) huffSym(h *huffmanDecoder) (int, error) { + n := uint(h.min) + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.readByte() + if err != nil { + f.b = b + f.nb = nb + return 0, err + } + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & 31) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func (f *sliceInflater) readHuffman() error { + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + f.b >>= 4 + f.nb -= 5 + 5 + 4 + codebits := f.codebits[:] + bits := f.bits[:] + for i := range codebits { + codebits[i] = 0 + } + for i := range bits { + bits[i] = 0 + } + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + if !f.h1.init(codebits) { + return CorruptInputError(f.roffset) + } + for i := range bits { + bits[i] = 0 + } + i := 0 + for i < nlit+ndist { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + switch { + case x < 16: + bits[i] = x + i++ + case x == 16: + if i == 0 { + return CorruptInputError(f.roffset) + } + repeat := 3 + for f.nb < 2 { + if err := f.moreBits(); err != nil { + return err + } + } + repeat += int(f.b & 0x3) + f.b >>= 2 + f.nb -= 2 + for repeat > 0 { + if i >= len(bits) { + return CorruptInputError(f.roffset) + } + bits[i] = bits[i-1] + i++ + repeat-- + } + case x == 17: + repeat := 3 + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + repeat += int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + for repeat > 0 { + if i >= len(bits) { + return CorruptInputError(f.roffset) + } + bits[i] = 0 + i++ + repeat-- + } + case x == 18: + repeat := 11 + for f.nb < 7 { + if err := f.moreBits(); err != nil { + return err + } + } + repeat += int(f.b & 0x7F) + f.b >>= 7 + f.nb -= 7 + for repeat > 0 { + if i >= len(bits) { + return CorruptInputError(f.roffset) + } + bits[i] = 0 + i++ + repeat-- + } + default: + return CorruptInputError(f.roffset) + } + } + if !f.h1.init(bits[:nlit]) { + return CorruptInputError(f.roffset) + } + if !f.h2.init(bits[nlit : nlit+ndist]) { + return CorruptInputError(f.roffset) + } + if f.h1.min < bits[endBlockMarker] { + f.h1.min = bits[endBlockMarker] + } + return nil +} |
