diff options
| -rw-r--r-- | internal/flatex/decompress_bytes.go | 13 | ||||
| -rw-r--r-- | internal/flatex/decompress_test.go | 6 | ||||
| -rw-r--r-- | internal/flatex/inflate.go | 577 |
3 files changed, 9 insertions, 587 deletions
diff --git a/internal/flatex/decompress_bytes.go b/internal/flatex/decompress_bytes.go index 9f1ffcfd..45a8b834 100644 --- a/internal/flatex/decompress_bytes.go +++ b/internal/flatex/decompress_bytes.go @@ -14,17 +14,16 @@ type bufferDecompressor struct { var bufferDecompressorPool = sync.Pool{ New: func() any { fixedHuffmanDecoderInit() - d := &bufferDecompressor{} - d.inflater.bits = new([maxNumLit + maxNumDist]int) - d.inflater.codebits = new([numCodes]int) + d := &bufferDecompressor{ + inflater: sliceInflater{ + bits: new([maxNumLit + maxNumDist]int), + codebits: new([numCodes]int), + }, + } return d }, } -func Decompress(src []byte) (bufpool.Buffer, int, error) { - return DecompressSized(src, 0) -} - func DecompressSized(src []byte, sizeHint int) (bufpool.Buffer, int, error) { d := bufferDecompressorPool.Get().(*bufferDecompressor) defer bufferDecompressorPool.Put(d) diff --git a/internal/flatex/decompress_test.go b/internal/flatex/decompress_test.go index c991ea74..e53a6581 100644 --- a/internal/flatex/decompress_test.go +++ b/internal/flatex/decompress_test.go @@ -22,13 +22,13 @@ func compressDeflate(t *testing.T, payload []byte) []byte { return buf.Bytes() } -func TestDecompress(t *testing.T) { +func TestDecompressSized(t *testing.T) { payload := bytes.Repeat([]byte("golang"), 32) compressed := compressDeflate(t, payload) - out, _, err := Decompress(compressed) + out, _, err := DecompressSized(compressed, 0) if err != nil { - t.Fatalf("Decompress: %v", err) + t.Fatalf("DecompressSized: %v", err) } defer out.Release() diff --git a/internal/flatex/inflate.go b/internal/flatex/inflate.go index 1d32e8cd..a556dc42 100644 --- a/internal/flatex/inflate.go +++ b/internal/flatex/inflate.go @@ -8,8 +8,6 @@ package flatex import ( - "bufio" - "io" "math/bits" "strconv" "sync" @@ -41,44 +39,6 @@ func (e CorruptInputError) Error() string { return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) } -// An InternalError reports an error in the flate code itself. -type InternalError string - -func (e InternalError) Error() string { return "flate: internal error: " + string(e) } - -// A ReadError reports an error encountered while reading input. -// -// Deprecated: No longer returned. -type ReadError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Read -} - -func (e *ReadError) Error() string { - return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} - -// A WriteError reports an error encountered while writing output. -// -// Deprecated: No longer returned. -type WriteError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Write -} - -func (e *WriteError) Error() string { - return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} - -// Resetter resets a ReadCloser returned by [NewReader] -// to switch to a new underlying [Reader]. This permits reusing a ReadCloser -// instead of allocating a new one. -type Resetter interface { - // Reset discards any buffered data and resets the Resetter as if it was - // newly initialized with the given reader. - Reset(r io.Reader) error -} - // The data structure for decoding Huffman tables is based on that of // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), // For codes smaller than the table width, there are multiple entries @@ -260,514 +220,10 @@ func (h *huffmanDecoder) init(lengths []int) bool { return true } -// The actual read interface needed by [NewReader]. -// If the passed in [io.Reader] does not also have ReadByte, -// the [NewReader] will introduce its own buffering. -type Reader interface { - io.Reader - io.ByteReader -} - -// Decompress state. -type decompressor struct { - // Input source. - r Reader - rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader - roffset int64 - - // Input bits, in top of b. - b uint32 - nb uint - - // Huffman decoders for literal/length, distance. - h1, h2 huffmanDecoder - - // Length arrays used to define Huffman codes. - bits *[maxNumLit + maxNumDist]int - codebits *[numCodes]int - - // Output history, buffer. - window windowDecoder - - // Temporary buffer (avoids repeated allocation). - buf [4]byte - - // Next step in the decompression, - // and decompression state. - step func(*decompressor) - stepState int - final bool - err error - toRead []byte - hl, hd *huffmanDecoder - copyLen int - copyDist int -} - -func (f *decompressor) nextBlock() { - for f.nb < 1+2 { - if f.err = f.moreBits(); f.err != nil { - return - } - } - f.final = f.b&1 == 1 - f.b >>= 1 - typ := f.b & 3 - f.b >>= 2 - f.nb -= 1 + 2 - switch typ { - case 0: - f.dataBlock() - case 1: - // compressed, fixed Huffman tables - f.hl = &fixedHuffmanDecoder - f.hd = nil - f.huffmanBlock() - case 2: - // compressed, dynamic Huffman tables - if f.err = f.readHuffman(); f.err != nil { - break - } - f.hl = &f.h1 - f.hd = &f.h2 - f.huffmanBlock() - default: - // 3 is reserved. - f.err = CorruptInputError(f.roffset) - } -} - -func (f *decompressor) Read(b []byte) (int, error) { - for { - if len(f.toRead) > 0 { - n := copy(b, f.toRead) - f.toRead = f.toRead[n:] - if len(f.toRead) == 0 { - return n, f.err - } - return n, nil - } - if f.err != nil { - return 0, f.err - } - f.step(f) - if f.err != nil && len(f.toRead) == 0 { - f.toRead = f.window.readFlush() - } - } -} - -func (f *decompressor) Close() error { - if f.err == io.EOF { - return nil - } - return f.err -} - // RFC 1951 section 3.2.7. // Compression with dynamic Huffman codes - var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} -func (f *decompressor) readHuffman() error { - // HLIT[5], HDIST[5], HCLEN[4]. - for f.nb < 5+5+4 { - if err := f.moreBits(); err != nil { - return err - } - } - nlit := int(f.b&0x1F) + 257 - if nlit > maxNumLit { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - ndist := int(f.b&0x1F) + 1 - if ndist > maxNumDist { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - nclen := int(f.b&0xF) + 4 - // numCodes is 19, so nclen is always valid. - f.b >>= 4 - f.nb -= 5 + 5 + 4 - - // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. - for i := 0; i < nclen; i++ { - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - f.codebits[codeOrder[i]] = int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - } - for i := nclen; i < len(codeOrder); i++ { - f.codebits[codeOrder[i]] = 0 - } - if !f.h1.init(f.codebits[0:]) { - return CorruptInputError(f.roffset) - } - - // HLIT + 257 code lengths, HDIST + 1 code lengths, - // using the code length Huffman code. - for i, n := 0, nlit+ndist; i < n; { - x, err := f.huffSym(&f.h1) - if err != nil { - return err - } - if x < 16 { - // Actual length. - f.bits[i] = x - i++ - continue - } - // Repeat previous length or zero. - var rep int - var nb uint - var b int - switch x { - default: - return InternalError("unexpected length code") - case 16: - rep = 3 - nb = 2 - if i == 0 { - return CorruptInputError(f.roffset) - } - b = f.bits[i-1] - case 17: - rep = 3 - nb = 3 - b = 0 - case 18: - rep = 11 - nb = 7 - b = 0 - } - for f.nb < nb { - if err := f.moreBits(); err != nil { - return err - } - } - rep += int(f.b & uint32(1<<nb-1)) - f.b >>= nb - f.nb -= nb - if i+rep > n { - return CorruptInputError(f.roffset) - } - for j := 0; j < rep; j++ { - f.bits[i] = b - i++ - } - } - - if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { - return CorruptInputError(f.roffset) - } - - // As an optimization, we can initialize the min bits to read at a time - // for the HLIT tree to the length of the EOB marker since we know that - // every block must terminate with one. This preserves the property that - // we never read any extra bytes after the end of the DEFLATE stream. - if f.h1.min < f.bits[endBlockMarker] { - f.h1.min = f.bits[endBlockMarker] - } - - return nil -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBlock() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - v, err := f.huffSym(f.hl) - if err != nil { - f.err = err - return - } - var n uint // number of bits extra - var length int - switch { - case v < 256: - f.window.writeByte(byte(v)) - if f.window.availWrite() == 0 { - f.toRead = f.window.readFlush() - f.step = (*decompressor).huffmanBlock - f.stepState = stateInit - return - } - goto readLiteral - case v == 256: - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 - case v < maxNumLit: - length = 258 - n = 0 - default: - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - length += int(f.b & uint32(1<<n-1)) - f.b >>= n - f.nb -= n - } - - var dist int - if f.hd == nil { - for f.nb < 5 { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 - } else { - if dist, err = f.huffSym(f.hd); err != nil { - f.err = err - return - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << nb - for f.nb < nb { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - extra |= int(f.b & uint32(1<<nb-1)) - f.b >>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra - default: - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > f.window.histSize() { - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, dist - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := f.window.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = f.window.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if f.window.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.window.readFlush() - f.step = (*decompressor).huffmanBlock // We need to continue this work - f.stepState = stateDict - return - } - goto readLiteral - } -} - -// Copy a single uncompressed data block from input to output. -func (f *decompressor) dataBlock() { - // Uncompressed. - // Discard current half-byte. - f.nb = 0 - f.b = 0 - - // Length then ones-complement of length. - nr, err := io.ReadFull(f.r, f.buf[0:4]) - f.roffset += int64(nr) - if err != nil { - f.err = noEOF(err) - return - } - n := int(f.buf[0]) | int(f.buf[1])<<8 - nn := int(f.buf[2]) | int(f.buf[3])<<8 - if uint16(nn) != uint16(^n) { - f.err = CorruptInputError(f.roffset) - return - } - - if n == 0 { - f.toRead = f.window.readFlush() - f.finishBlock() - return - } - - f.copyLen = n - f.copyData() -} - -// copyData copies f.copyLen bytes from the underlying reader into f.hist. -// It pauses for reads when f.hist is full. -func (f *decompressor) copyData() { - buf := f.window.writeSlice() - if len(buf) > f.copyLen { - buf = buf[:f.copyLen] - } - - cnt, err := io.ReadFull(f.r, buf) - f.roffset += int64(cnt) - f.copyLen -= cnt - f.window.writeMark(cnt) - if err != nil { - f.err = noEOF(err) - return - } - - if f.window.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.window.readFlush() - f.step = (*decompressor).copyData - return - } - f.finishBlock() -} - -func (f *decompressor) finishBlock() { - if f.final { - if f.window.availRead() > 0 { - f.toRead = f.window.readFlush() - } - f.err = io.EOF - } - f.step = (*decompressor).nextBlock -} - -// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. -func noEOF(e error) error { - if e == io.EOF { - return io.ErrUnexpectedEOF - } - return e -} - -func (f *decompressor) moreBits() error { - c, err := f.r.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 - return nil -} - -// Read the next Huffman-encoded symbol from f according to h. -func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(h.min) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b - for { - for nb < n { - c, err := f.r.ReadByte() - if err != nil { - f.b = b - f.nb = nb - return 0, noEOF(err) - } - f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 - } - chunk := h.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - f.err = CorruptInputError(f.roffset) - return 0, f.err - } - f.b = b >> (n & 31) - f.nb = nb - n - return int(chunk >> huffmanValueShift), nil - } - } -} - -func (f *decompressor) makeReader(r io.Reader) { - if rr, ok := r.(Reader); ok { - f.rBuf = nil - f.r = rr - return - } - // Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor. - if f.rBuf != nil { - f.rBuf.Reset(r) - } else { - // bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader. - f.rBuf = bufio.NewReader(r) - } - f.r = f.rBuf -} - func fixedHuffmanDecoderInit() { fixedOnce.Do(func() { // These come from the RFC section 3.2.6. @@ -787,36 +243,3 @@ func fixedHuffmanDecoderInit() { fixedHuffmanDecoder.init(bits[:]) }) } - -func (f *decompressor) Reset(r io.Reader) error { - *f = decompressor{ - rBuf: f.rBuf, - bits: f.bits, - codebits: f.codebits, - window: f.window, - step: (*decompressor).nextBlock, - } - f.makeReader(r) - f.window.init(maxMatchOffset) - return nil -} - -// NewReader returns a new ReadCloser that can be used -// to read the uncompressed version of r. -// If r does not also implement [io.ByteReader], -// the decompressor may read more data than necessary from r. -// The reader returns [io.EOF] after the final block in the DEFLATE stream has -// been encountered. Any trailing data after the final block is ignored. -// -// The [io.ReadCloser] returned by NewReader also implements [Resetter]. -func NewReader(r io.Reader) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock - f.window.init(maxMatchOffset) - return &f -} |
