From ed0a113f034aa42aea23471c4bc0d7af159b7002 Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Wed, 19 Nov 2025 08:00:00 +0800 Subject: Probably should name the custom packages specially --- internal/flate/LICENSE | 27 -- internal/flate/decompress_bytes.go | 64 --- internal/flate/decompress_test.go | 76 ---- internal/flate/dict_decoder.go | 182 -------- internal/flate/inflate.go | 839 ------------------------------------ internal/flate/slice_inflate.go | 479 -------------------- internal/flatex/LICENSE | 27 ++ internal/flatex/decompress_bytes.go | 64 +++ internal/flatex/decompress_test.go | 76 ++++ internal/flatex/dict_decoder.go | 182 ++++++++ internal/flatex/inflate.go | 839 ++++++++++++++++++++++++++++++++++++ internal/flatex/slice_inflate.go | 479 ++++++++++++++++++++ internal/zlib/LICENSE | 27 -- internal/zlib/decompress.go | 69 --- internal/zlib/decompress_test.go | 84 ---- internal/zlib/reader.go | 198 --------- internal/zlibx/LICENSE | 27 ++ internal/zlibx/decompress.go | 69 +++ internal/zlibx/decompress_test.go | 84 ++++ internal/zlibx/reader.go | 198 +++++++++ 20 files changed, 2045 insertions(+), 2045 deletions(-) delete mode 100644 internal/flate/LICENSE delete mode 100644 internal/flate/decompress_bytes.go delete mode 100644 internal/flate/decompress_test.go delete mode 100644 internal/flate/dict_decoder.go delete mode 100644 internal/flate/inflate.go delete mode 100644 internal/flate/slice_inflate.go create mode 100644 internal/flatex/LICENSE create mode 100644 internal/flatex/decompress_bytes.go create mode 100644 internal/flatex/decompress_test.go create mode 100644 internal/flatex/dict_decoder.go create mode 100644 internal/flatex/inflate.go create mode 100644 internal/flatex/slice_inflate.go delete mode 100644 internal/zlib/LICENSE delete mode 100644 internal/zlib/decompress.go delete mode 100644 internal/zlib/decompress_test.go delete mode 100644 internal/zlib/reader.go create mode 100644 internal/zlibx/LICENSE create mode 100644 internal/zlibx/decompress.go create mode 100644 internal/zlibx/decompress_test.go create mode 100644 internal/zlibx/reader.go (limited to 'internal') diff --git a/internal/flate/LICENSE b/internal/flate/LICENSE deleted file mode 100644 index 2a7cf70d..00000000 --- a/internal/flate/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright 2009 The Go Authors. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google LLC nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/flate/decompress_bytes.go b/internal/flate/decompress_bytes.go deleted file mode 100644 index 18fe21fc..00000000 --- a/internal/flate/decompress_bytes.go +++ /dev/null @@ -1,64 +0,0 @@ -package flate - -import ( - "io" - "sync" - - "git.sr.ht/~runxiyu/furgit/internal/bufpool" -) - -// bufferDecompressor wraps the custom slice inflater so byte-slice -// decompressions avoid repeated allocations. -type bufferDecompressor struct { - inflater sliceInflater -} - -var bufferDecompressorPool = sync.Pool{ - New: func() any { - fixedHuffmanDecoderInit() - d := &bufferDecompressor{} - d.inflater.bits = new([maxNumLit + maxNumDist]int) - d.inflater.codebits = new([numCodes]int) - return d - }, -} - -// Decompress inflates the provided DEFLATE stream and returns the full output -// in a pooled bufpool.Buffer along with the number of consumed bytes from src. -func Decompress(src []byte) (bufpool.Buffer, int, error) { - return DecompressDict(src, nil) -} - -// DecompressDict inflates the provided DEFLATE stream using dict as the preset -// dictionary and returns the full output in a pooled bufpool.Buffer. The second -// returned value reports how many bytes of src were consumed. -func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, int, error) { - d := bufferDecompressorPool.Get().(*bufferDecompressor) - defer bufferDecompressorPool.Put(d) - - if err := d.inflater.reset(src, dict); err != nil { - return bufpool.Buffer{}, 0, err - } - - out := bufpool.Borrow(bufpool.DefaultBufferCap) - out.Resize(0) - - for { - if len(d.inflater.toRead) > 0 { - out.Append(d.inflater.toRead) - d.inflater.toRead = nil - continue - } - if d.inflater.err != nil { - if d.inflater.err == io.EOF { - return out, d.inflater.pos, nil - } - out.Release() - return bufpool.Buffer{}, 0, d.inflater.err - } - d.inflater.step(&d.inflater) - if d.inflater.err != nil && len(d.inflater.toRead) == 0 { - d.inflater.toRead = d.inflater.dict.readFlush() - } - } -} diff --git a/internal/flate/decompress_test.go b/internal/flate/decompress_test.go deleted file mode 100644 index 089159d6..00000000 --- a/internal/flate/decompress_test.go +++ /dev/null @@ -1,76 +0,0 @@ -package flate - -import ( - "bytes" - stdflate "compress/flate" - "testing" -) - -func compressDeflate(t *testing.T, payload, dict []byte) []byte { - t.Helper() - var buf bytes.Buffer - var ( - w *stdflate.Writer - err error - ) - if dict != nil { - w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict) - } else { - w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression) - } - if err != nil { - t.Fatalf("NewWriter: %v", err) - } - if _, err := w.Write(payload); err != nil { - t.Fatalf("Write: %v", err) - } - if err := w.Close(); err != nil { - t.Fatalf("Close: %v", err) - } - return buf.Bytes() -} - -func TestDecompress(t *testing.T) { - payload := bytes.Repeat([]byte("golang"), 32) - compressed := compressDeflate(t, payload, nil) - - out, _, err := Decompress(compressed) - if err != nil { - t.Fatalf("Decompress: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload: got %q", out.Bytes()) - } -} - -func TestDecompressDict(t *testing.T) { - dict := []byte("furgit dictionary payload") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...) - - compressed := compressDeflate(t, payload, dict) - - out, _, err := DecompressDict(compressed, dict) - if err != nil { - t.Fatalf("DecompressDict: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload: got %q", out.Bytes()) - } -} - -func TestDecompressDictMissing(t *testing.T) { - dict := []byte("shared prefix to enforce dictionary usage") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" trailing data to force reference")...) - - compressed := compressDeflate(t, payload, dict) - - if _, _, err := Decompress(compressed); err == nil { - t.Fatalf("expected error when dictionary missing") - } -} diff --git a/internal/flate/dict_decoder.go b/internal/flate/dict_decoder.go deleted file mode 100644 index d2c19040..00000000 --- a/internal/flate/dict_decoder.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// dictDecoder implements the LZ77 sliding dictionary as used in decompression. -// LZ77 decompresses data through sequences of two forms of commands: -// -// - Literal insertions: Runs of one or more symbols are inserted into the data -// stream as is. This is accomplished through the writeByte method for a -// single symbol, or combinations of writeSlice/writeMark for multiple symbols. -// Any valid stream must start with a literal insertion if no preset dictionary -// is used. -// -// - Backward copies: Runs of one or more symbols are copied from previously -// emitted data. Backward copies come as the tuple (dist, length) where dist -// determines how far back in the stream to copy from and length determines how -// many bytes to copy. Note that it is valid for the length to be greater than -// the distance. Since LZ77 uses forward copies, that situation is used to -// perform a form of run-length encoding on repeated runs of symbols. -// The writeCopy and tryWriteCopy are used to implement this command. -// -// For performance reasons, this implementation performs little to no sanity -// checks about the arguments. As such, the invariants documented for each -// method call must be respected. -type dictDecoder struct { - hist []byte // Sliding window history - - // Invariant: 0 <= rdPos <= wrPos <= len(hist) - wrPos int // Current output position in buffer - rdPos int // Have emitted hist[:rdPos] already - full bool // Has a full window length been written yet? -} - -// init initializes dictDecoder to have a sliding window dictionary of the given -// size. If a preset dict is provided, it will initialize the dictionary with -// the contents of dict. -func (dd *dictDecoder) init(size int, dict []byte) { - *dd = dictDecoder{hist: dd.hist} - - if cap(dd.hist) < size { - dd.hist = make([]byte, size) - } - dd.hist = dd.hist[:size] - - if len(dict) > len(dd.hist) { - dict = dict[len(dict)-len(dd.hist):] - } - dd.wrPos = copy(dd.hist, dict) - if dd.wrPos == len(dd.hist) { - dd.wrPos = 0 - dd.full = true - } - dd.rdPos = dd.wrPos -} - -// histSize reports the total amount of historical data in the dictionary. -func (dd *dictDecoder) histSize() int { - if dd.full { - return len(dd.hist) - } - return dd.wrPos -} - -// availRead reports the number of bytes that can be flushed by readFlush. -func (dd *dictDecoder) availRead() int { - return dd.wrPos - dd.rdPos -} - -// availWrite reports the available amount of output buffer space. -func (dd *dictDecoder) availWrite() int { - return len(dd.hist) - dd.wrPos -} - -// writeSlice returns a slice of the available buffer to write data to. -// -// This invariant will be kept: len(s) <= availWrite() -func (dd *dictDecoder) writeSlice() []byte { - return dd.hist[dd.wrPos:] -} - -// writeMark advances the writer pointer by cnt. -// -// This invariant must be kept: 0 <= cnt <= availWrite() -func (dd *dictDecoder) writeMark(cnt int) { - dd.wrPos += cnt -} - -// writeByte writes a single byte to the dictionary. -// -// This invariant must be kept: 0 < availWrite() -func (dd *dictDecoder) writeByte(c byte) { - dd.hist[dd.wrPos] = c - dd.wrPos++ -} - -// writeCopy copies a string at a given (dist, length) to the output. -// This returns the number of bytes copied and may be less than the requested -// length if the available space in the output buffer is too small. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) writeCopy(dist, length int) int { - dstBase := dd.wrPos - dstPos := dstBase - srcPos := dstPos - dist - endPos := dstPos + length - if endPos > len(dd.hist) { - endPos = len(dd.hist) - } - - // Copy non-overlapping section after destination position. - // - // This section is non-overlapping in that the copy length for this section - // is always less than or equal to the backwards distance. This can occur - // if a distance refers to data that wraps-around in the buffer. - // Thus, a backwards copy is performed here; that is, the exact bytes in - // the source prior to the copy is placed in the destination. - if srcPos < 0 { - srcPos += len(dd.hist) - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) - srcPos = 0 - } - - // Copy possibly overlapping section before destination position. - // - // This section can overlap if the copy length for this section is larger - // than the backwards distance. This is allowed by LZ77 so that repeated - // strings can be succinctly represented using (dist, length) pairs. - // Thus, a forwards copy is performed here; that is, the bytes copied is - // possibly dependent on the resulting bytes in the destination as the copy - // progresses along. This is functionally equivalent to the following: - // - // for i := 0; i < endPos-dstPos; i++ { - // dd.hist[dstPos+i] = dd.hist[srcPos+i] - // } - // dstPos = endPos - // - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// tryWriteCopy tries to copy a string at a given (distance, length) to the -// output. This specialized version is optimized for short distances. -// -// This method is designed to be inlined for performance reasons. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) tryWriteCopy(dist, length int) int { - dstPos := dd.wrPos - endPos := dstPos + length - if dstPos < dist || endPos > len(dd.hist) { - return 0 - } - dstBase := dstPos - srcPos := dstPos - dist - - // Copy possibly overlapping section before destination position. - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// readFlush returns a slice of the historical buffer that is ready to be -// emitted to the user. The data returned by readFlush must be fully consumed -// before calling any other dictDecoder methods. -func (dd *dictDecoder) readFlush() []byte { - toRead := dd.hist[dd.rdPos:dd.wrPos] - dd.rdPos = dd.wrPos - if dd.wrPos == len(dd.hist) { - dd.wrPos, dd.rdPos = 0, 0 - dd.full = true - } - return toRead -} diff --git a/internal/flate/inflate.go b/internal/flate/inflate.go deleted file mode 100644 index cfcff084..00000000 --- a/internal/flate/inflate.go +++ /dev/null @@ -1,839 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package flate implements the DEFLATE compressed data format, described in -// RFC 1951. The [compress/gzip] and [compress/zlib] packages implement access -// to DEFLATE-based file formats. -package flate - -import ( - "bufio" - "io" - "math/bits" - "strconv" - "sync" -) - -const ( - // The special code used to mark the end of a block. - endBlockMarker = 256 - maxCodeLen = 16 // max length of Huffman code - maxMatchOffset = 1 << 15 // The largest match offset - // The next three numbers come from the RFC section 3.2.7, with the - // additional proviso in section 3.2.5 which implies that distance codes - // 30 and 31 should never occur in compressed data. - maxNumLit = 286 - maxNumDist = 30 - numCodes = 19 // number of codes in Huffman meta-code -) - -// Initialize the fixedHuffmanDecoder only once upon first use. -var fixedOnce sync.Once -var fixedHuffmanDecoder huffmanDecoder - -// A CorruptInputError reports the presence of corrupt input at a given offset. -type CorruptInputError int64 - -func (e CorruptInputError) Error() string { - return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) -} - -// An InternalError reports an error in the flate code itself. -type InternalError string - -func (e InternalError) Error() string { return "flate: internal error: " + string(e) } - -// A ReadError reports an error encountered while reading input. -// -// Deprecated: No longer returned. -type ReadError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Read -} - -func (e *ReadError) Error() string { - return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} - -// A WriteError reports an error encountered while writing output. -// -// Deprecated: No longer returned. -type WriteError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Write -} - -func (e *WriteError) Error() string { - return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} - -// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict] -// to switch to a new underlying [Reader]. This permits reusing a ReadCloser -// instead of allocating a new one. -type Resetter interface { - // Reset discards any buffered data and resets the Resetter as if it was - // newly initialized with the given reader. - Reset(r io.Reader, dict []byte) error -} - -// The data structure for decoding Huffman tables is based on that of -// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), -// For codes smaller than the table width, there are multiple entries -// (each combination of trailing bits has the same value). For codes -// larger than the table width, the table contains a link to an overflow -// table. The width of each entry in the link table is the maximum code -// size minus the chunk width. -// -// Note that you can do a lookup in the table even without all bits -// filled. Since the extra bits are zero, and the DEFLATE Huffman codes -// have the property that shorter codes come before longer ones, the -// bit length estimate in the result is a lower bound on the actual -// number of bits. -// -// See the following: -// https://github.com/madler/zlib/raw/master/doc/algorithm.txt - -// chunk & 15 is number of bits -// chunk >> 4 is value, including table link - -const ( - huffmanChunkBits = 9 - huffmanNumChunks = 1 << huffmanChunkBits - huffmanCountMask = 15 - huffmanValueShift = 4 -) - -type huffmanDecoder struct { - min int // the minimum code length - chunks [huffmanNumChunks]uint32 // chunks as described above - links [][]uint32 // overflow links - linkMask uint32 // mask the width of the link table -} - -// Initialize Huffman decoding tables from array of code lengths. -// Following this function, h is guaranteed to be initialized into a complete -// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a -// degenerate case where the tree has only a single symbol with length 1. Empty -// trees are permitted. -func (h *huffmanDecoder) init(lengths []int) bool { - // Sanity enables additional runtime tests during Huffman - // table construction. It's intended to be used during - // development to supplement the currently ad-hoc unit tests. - const sanity = false - - if h.min != 0 { - *h = huffmanDecoder{} - } - - // Count number of codes of each length, - // compute min and max length. - var count [maxCodeLen]int - var min, max int - for _, n := range lengths { - if n == 0 { - continue - } - if min == 0 || n < min { - min = n - } - if n > max { - max = n - } - count[n]++ - } - - // Empty tree. The decompressor.huffSym function will fail later if the tree - // is used. Technically, an empty tree is only valid for the HDIST tree and - // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree - // is guaranteed to fail since it will attempt to use the tree to decode the - // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is - // guaranteed to fail later since the compressed data section must be - // composed of at least one symbol (the end-of-block marker). - if max == 0 { - return true - } - - code := 0 - var nextcode [maxCodeLen]int - for i := min; i <= max; i++ { - code <<= 1 - nextcode[i] = code - code += count[i] - } - - // Check that the coding is complete (i.e., that we've - // assigned all 2-to-the-max possible bit sequences). - // Exception: To be compatible with zlib, we also need to - // accept degenerate single-code codings. See also - // TestDegenerateHuffmanCoding. - if code != 1< huffmanChunkBits { - numLinks := 1 << (uint(max) - huffmanChunkBits) - h.linkMask = uint32(numLinks - 1) - - // create link tables - link := nextcode[huffmanChunkBits+1] >> 1 - h.links = make([][]uint32, huffmanNumChunks-link) - for j := uint(link); j < huffmanNumChunks; j++ { - reverse := int(bits.Reverse16(uint16(j))) - reverse >>= uint(16 - huffmanChunkBits) - off := j - uint(link) - if sanity && h.chunks[reverse] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[reverse] = uint32(off<>= uint(16 - n) - if n <= huffmanChunkBits { - for off := reverse; off < len(h.chunks); off += 1 << uint(n) { - // We should never need to overwrite - // an existing chunk. Also, 0 is - // never a valid chunk, because the - // lower 4 "count" bits should be - // between 1 and 15. - if sanity && h.chunks[off] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[off] = chunk - } - } else { - j := reverse & (huffmanNumChunks - 1) - if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { - // Longer codes should have been - // associated with a link table above. - panic("impossible: not an indirect chunk") - } - value := h.chunks[j] >> huffmanValueShift - linktab := h.links[value] - reverse >>= huffmanChunkBits - for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { - if sanity && linktab[off] != 0 { - panic("impossible: overwriting existing chunk") - } - linktab[off] = chunk - } - } - } - - if sanity { - // Above we've sanity checked that we never overwrote - // an existing entry. Here we additionally check that - // we filled the tables completely. - for i, chunk := range h.chunks { - if chunk == 0 { - // As an exception, in the degenerate - // single-code case, we allow odd - // chunks to be missing. - if code == 1 && i%2 == 1 { - continue - } - panic("impossible: missing chunk") - } - } - for _, linktab := range h.links { - for _, chunk := range linktab { - if chunk == 0 { - panic("impossible: missing chunk") - } - } - } - } - - return true -} - -// The actual read interface needed by [NewReader]. -// If the passed in [io.Reader] does not also have ReadByte, -// the [NewReader] will introduce its own buffering. -type Reader interface { - io.Reader - io.ByteReader -} - -// Decompress state. -type decompressor struct { - // Input source. - r Reader - rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader - roffset int64 - - // Input bits, in top of b. - b uint32 - nb uint - - // Huffman decoders for literal/length, distance. - h1, h2 huffmanDecoder - - // Length arrays used to define Huffman codes. - bits *[maxNumLit + maxNumDist]int - codebits *[numCodes]int - - // Output history, buffer. - dict dictDecoder - - // Temporary buffer (avoids repeated allocation). - buf [4]byte - - // Next step in the decompression, - // and decompression state. - step func(*decompressor) - stepState int - final bool - err error - toRead []byte - hl, hd *huffmanDecoder - copyLen int - copyDist int -} - -func (f *decompressor) nextBlock() { - for f.nb < 1+2 { - if f.err = f.moreBits(); f.err != nil { - return - } - } - f.final = f.b&1 == 1 - f.b >>= 1 - typ := f.b & 3 - f.b >>= 2 - f.nb -= 1 + 2 - switch typ { - case 0: - f.dataBlock() - case 1: - // compressed, fixed Huffman tables - f.hl = &fixedHuffmanDecoder - f.hd = nil - f.huffmanBlock() - case 2: - // compressed, dynamic Huffman tables - if f.err = f.readHuffman(); f.err != nil { - break - } - f.hl = &f.h1 - f.hd = &f.h2 - f.huffmanBlock() - default: - // 3 is reserved. - f.err = CorruptInputError(f.roffset) - } -} - -func (f *decompressor) Read(b []byte) (int, error) { - for { - if len(f.toRead) > 0 { - n := copy(b, f.toRead) - f.toRead = f.toRead[n:] - if len(f.toRead) == 0 { - return n, f.err - } - return n, nil - } - if f.err != nil { - return 0, f.err - } - f.step(f) - if f.err != nil && len(f.toRead) == 0 { - f.toRead = f.dict.readFlush() // Flush what's left in case of error - } - } -} - -func (f *decompressor) Close() error { - if f.err == io.EOF { - return nil - } - return f.err -} - -// RFC 1951 section 3.2.7. -// Compression with dynamic Huffman codes - -var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} - -func (f *decompressor) readHuffman() error { - // HLIT[5], HDIST[5], HCLEN[4]. - for f.nb < 5+5+4 { - if err := f.moreBits(); err != nil { - return err - } - } - nlit := int(f.b&0x1F) + 257 - if nlit > maxNumLit { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - ndist := int(f.b&0x1F) + 1 - if ndist > maxNumDist { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - nclen := int(f.b&0xF) + 4 - // numCodes is 19, so nclen is always valid. - f.b >>= 4 - f.nb -= 5 + 5 + 4 - - // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. - for i := 0; i < nclen; i++ { - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - f.codebits[codeOrder[i]] = int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - } - for i := nclen; i < len(codeOrder); i++ { - f.codebits[codeOrder[i]] = 0 - } - if !f.h1.init(f.codebits[0:]) { - return CorruptInputError(f.roffset) - } - - // HLIT + 257 code lengths, HDIST + 1 code lengths, - // using the code length Huffman code. - for i, n := 0, nlit+ndist; i < n; { - x, err := f.huffSym(&f.h1) - if err != nil { - return err - } - if x < 16 { - // Actual length. - f.bits[i] = x - i++ - continue - } - // Repeat previous length or zero. - var rep int - var nb uint - var b int - switch x { - default: - return InternalError("unexpected length code") - case 16: - rep = 3 - nb = 2 - if i == 0 { - return CorruptInputError(f.roffset) - } - b = f.bits[i-1] - case 17: - rep = 3 - nb = 3 - b = 0 - case 18: - rep = 11 - nb = 7 - b = 0 - } - for f.nb < nb { - if err := f.moreBits(); err != nil { - return err - } - } - rep += int(f.b & uint32(1<>= nb - f.nb -= nb - if i+rep > n { - return CorruptInputError(f.roffset) - } - for j := 0; j < rep; j++ { - f.bits[i] = b - i++ - } - } - - if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { - return CorruptInputError(f.roffset) - } - - // As an optimization, we can initialize the min bits to read at a time - // for the HLIT tree to the length of the EOB marker since we know that - // every block must terminate with one. This preserves the property that - // we never read any extra bytes after the end of the DEFLATE stream. - if f.h1.min < f.bits[endBlockMarker] { - f.h1.min = f.bits[endBlockMarker] - } - - return nil -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBlock() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - v, err := f.huffSym(f.hl) - if err != nil { - f.err = err - return - } - var n uint // number of bits extra - var length int - switch { - case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlock - f.stepState = stateInit - return - } - goto readLiteral - case v == 256: - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 - case v < maxNumLit: - length = 258 - n = 0 - default: - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - length += int(f.b & uint32(1<>= n - f.nb -= n - } - - var dist int - if f.hd == nil { - for f.nb < 5 { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 - } else { - if dist, err = f.huffSym(f.hd); err != nil { - f.err = err - return - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << nb - for f.nb < nb { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - extra |= int(f.b & uint32(1<>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra - default: - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, dist - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlock // We need to continue this work - f.stepState = stateDict - return - } - goto readLiteral - } -} - -// Copy a single uncompressed data block from input to output. -func (f *decompressor) dataBlock() { - // Uncompressed. - // Discard current half-byte. - f.nb = 0 - f.b = 0 - - // Length then ones-complement of length. - nr, err := io.ReadFull(f.r, f.buf[0:4]) - f.roffset += int64(nr) - if err != nil { - f.err = noEOF(err) - return - } - n := int(f.buf[0]) | int(f.buf[1])<<8 - nn := int(f.buf[2]) | int(f.buf[3])<<8 - if uint16(nn) != uint16(^n) { - f.err = CorruptInputError(f.roffset) - return - } - - if n == 0 { - f.toRead = f.dict.readFlush() - f.finishBlock() - return - } - - f.copyLen = n - f.copyData() -} - -// copyData copies f.copyLen bytes from the underlying reader into f.hist. -// It pauses for reads when f.hist is full. -func (f *decompressor) copyData() { - buf := f.dict.writeSlice() - if len(buf) > f.copyLen { - buf = buf[:f.copyLen] - } - - cnt, err := io.ReadFull(f.r, buf) - f.roffset += int64(cnt) - f.copyLen -= cnt - f.dict.writeMark(cnt) - if err != nil { - f.err = noEOF(err) - return - } - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).copyData - return - } - f.finishBlock() -} - -func (f *decompressor) finishBlock() { - if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() - } - f.err = io.EOF - } - f.step = (*decompressor).nextBlock -} - -// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. -func noEOF(e error) error { - if e == io.EOF { - return io.ErrUnexpectedEOF - } - return e -} - -func (f *decompressor) moreBits() error { - c, err := f.r.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << f.nb - f.nb += 8 - return nil -} - -// Read the next Huffman-encoded symbol from f according to h. -func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(h.min) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b - for { - for nb < n { - c, err := f.r.ReadByte() - if err != nil { - f.b = b - f.nb = nb - return 0, noEOF(err) - } - f.roffset++ - b |= uint32(c) << (nb & 31) - nb += 8 - } - chunk := h.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - f.err = CorruptInputError(f.roffset) - return 0, f.err - } - f.b = b >> (n & 31) - f.nb = nb - n - return int(chunk >> huffmanValueShift), nil - } - } -} - -func (f *decompressor) makeReader(r io.Reader) { - if rr, ok := r.(Reader); ok { - f.rBuf = nil - f.r = rr - return - } - // Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor. - if f.rBuf != nil { - f.rBuf.Reset(r) - } else { - // bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader. - f.rBuf = bufio.NewReader(r) - } - f.r = f.rBuf -} - -func fixedHuffmanDecoderInit() { - fixedOnce.Do(func() { - // These come from the RFC section 3.2.6. - var bits [288]int - for i := 0; i < 144; i++ { - bits[i] = 8 - } - for i := 144; i < 256; i++ { - bits[i] = 9 - } - for i := 256; i < 280; i++ { - bits[i] = 7 - } - for i := 280; i < 288; i++ { - bits[i] = 8 - } - fixedHuffmanDecoder.init(bits[:]) - }) -} - -func (f *decompressor) Reset(r io.Reader, dict []byte) error { - *f = decompressor{ - rBuf: f.rBuf, - bits: f.bits, - codebits: f.codebits, - dict: f.dict, - step: (*decompressor).nextBlock, - } - f.makeReader(r) - f.dict.init(maxMatchOffset, dict) - return nil -} - -// NewReader returns a new ReadCloser that can be used -// to read the uncompressed version of r. -// If r does not also implement [io.ByteReader], -// the decompressor may read more data than necessary from r. -// The reader returns [io.EOF] after the final block in the DEFLATE stream has -// been encountered. Any trailing data after the final block is ignored. -// -// The [io.ReadCloser] returned by NewReader also implements [Resetter]. -func NewReader(r io.Reader) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, nil) - return &f -} - -// NewReaderDict is like [NewReader] but initializes the reader -// with a preset dictionary. The returned reader behaves as if -// the uncompressed data stream started with the given dictionary, -// which has already been read. NewReaderDict is typically used -// to read data compressed by [NewWriterDict]. -// -// The ReadCloser returned by NewReaderDict also implements [Resetter]. -func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, dict) - return &f -} diff --git a/internal/flate/slice_inflate.go b/internal/flate/slice_inflate.go deleted file mode 100644 index 0df4ed45..00000000 --- a/internal/flate/slice_inflate.go +++ /dev/null @@ -1,479 +0,0 @@ -package flate - -import ( - "io" - "math/bits" -) - -// sliceInflater is a specialized DEFLATE decoder that reads directly from an -// in-memory byte slice. It mirrors the main decompressor but avoids the -// overhead of the Reader interfaces, enabling faster byte-slice decoding. -type sliceInflater struct { - input []byte - pos int - roffset int64 - - b uint32 - nb uint - - h1, h2 huffmanDecoder - - bits *[maxNumLit + maxNumDist]int - codebits *[numCodes]int - - dict dictDecoder - - toRead []byte - step func(*sliceInflater) - stepState int - final bool - err error - hl, hd *huffmanDecoder - copyLen int - copyDist int -} - -func (f *sliceInflater) reset(src []byte, dict []byte) error { - bits := f.bits - codebits := f.codebits - dictState := f.dict - *f = sliceInflater{ - input: src, - bits: bits, - codebits: codebits, - dict: dictState, - step: (*sliceInflater).nextBlock, - } - f.dict.init(maxMatchOffset, dict) - return nil -} - -func (f *sliceInflater) readByte() (byte, error) { - if f.pos >= len(f.input) { - return 0, io.ErrUnexpectedEOF - } - b := f.input[f.pos] - f.pos++ - f.roffset++ - return b, nil -} - -func (f *sliceInflater) readBytes(n int) ([]byte, error) { - if n < 0 || f.pos+n > len(f.input) { - f.pos = len(f.input) - return nil, io.ErrUnexpectedEOF - } - s := f.input[f.pos : f.pos+n] - f.pos += n - f.roffset += int64(n) - return s, nil -} - -func (f *sliceInflater) nextBlock() { - for f.nb < 1+2 { - if err := f.moreBits(); err != nil { - f.err = err - return - } - } - f.final = f.b&1 == 1 - f.b >>= 1 - typ := f.b & 3 - f.b >>= 2 - f.nb -= 1 + 2 - switch typ { - case 0: - f.dataBlock() - case 1: - f.hl = &fixedHuffmanDecoder - f.hd = nil - f.huffmanBlock() - case 2: - if err := f.readHuffman(); err != nil { - f.err = err - return - } - f.hl = &f.h1 - f.hd = &f.h2 - f.huffmanBlock() - default: - f.err = CorruptInputError(f.roffset) - } -} - -func (f *sliceInflater) huffmanBlock() { - const ( - stateInit = iota - stateDict - ) - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - { - v, err := f.huffSym(f.hl) - if err != nil { - f.err = err - return - } - var n uint - var length int - switch { - case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() - f.step = (*sliceInflater).huffmanBlock - f.stepState = stateInit - return - } - goto readLiteral - case v == 256: - f.finishBlock() - return - case v < 265: - length = v - (257 - 3) - n = 0 - case v < 269: - length = v*2 - (265*2 - 11) - n = 1 - case v < 273: - length = v*4 - (269*4 - 19) - n = 2 - case v < 277: - length = v*8 - (273*8 - 35) - n = 3 - case v < 281: - length = v*16 - (277*16 - 67) - n = 4 - case v < 285: - length = v*32 - (281*32 - 131) - n = 5 - case v < maxNumLit: - length = 258 - n = 0 - default: - f.err = CorruptInputError(f.roffset) - return - } - if n > 0 { - for f.nb < n { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - length += int(f.b & uint32(1<>= n - f.nb -= n - } - - var dist int - if f.hd == nil { - for f.nb < 5 { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) - f.b >>= 5 - f.nb -= 5 - } else { - if dist, err = f.huffSym(f.hd); err != nil { - f.err = err - return - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - extra := (dist & 1) << nb - for f.nb < nb { - if err = f.moreBits(); err != nil { - f.err = err - return - } - } - extra |= int(f.b & uint32(1<>= nb - f.nb -= nb - dist = 1<<(nb+1) + 1 + extra - default: - f.err = CorruptInputError(f.roffset) - return - } - - if dist > f.dict.histSize() { - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, dist - goto copyHistory - } - -copyHistory: - { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*sliceInflater).huffmanBlock - f.stepState = stateDict - return - } - goto readLiteral - } -} - -func (f *sliceInflater) dataBlock() { - f.nb = 0 - f.b = 0 - - hdr, err := f.readBytes(4) - if err != nil { - f.err = err - return - } - n := int(hdr[0]) | int(hdr[1])<<8 - nn := int(hdr[2]) | int(hdr[3])<<8 - if uint16(nn) != uint16(^n) { - f.err = CorruptInputError(f.roffset) - return - } - - if n == 0 { - f.toRead = f.dict.readFlush() - f.finishBlock() - return - } - - f.copyLen = n - f.copyData() -} - -func (f *sliceInflater) copyData() { - for { - if f.copyLen == 0 { - f.finishBlock() - return - } - buf := f.dict.writeSlice() - if len(buf) == 0 { - f.toRead = f.dict.readFlush() - f.step = (*sliceInflater).copyData - return - } - n := f.copyLen - if n > len(buf) { - n = len(buf) - } - if f.pos+n > len(f.input) { - f.err = io.ErrUnexpectedEOF - return - } - copy(buf[:n], f.input[f.pos:f.pos+n]) - f.pos += n - f.roffset += int64(n) - f.copyLen -= n - f.dict.writeMark(n) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() - f.step = (*sliceInflater).copyData - return - } - } -} - -func (f *sliceInflater) finishBlock() { - if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() - } - f.err = io.EOF - } - f.step = (*sliceInflater).nextBlock - f.stepState = 0 -} - -func (f *sliceInflater) moreBits() error { - c, err := f.readByte() - if err != nil { - return err - } - f.b |= uint32(c) << (f.nb & 31) - f.nb += 8 - return nil -} - -func (f *sliceInflater) huffSym(h *huffmanDecoder) (int, error) { - n := uint(h.min) - nb, b := f.nb, f.b - for { - for nb < n { - c, err := f.readByte() - if err != nil { - f.b = b - f.nb = nb - return 0, err - } - b |= uint32(c) << (nb & 31) - nb += 8 - } - chunk := h.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - f.err = CorruptInputError(f.roffset) - return 0, f.err - } - f.b = b >> (n & 31) - f.nb = nb - n - return int(chunk >> huffmanValueShift), nil - } - } -} - -func (f *sliceInflater) readHuffman() error { - for f.nb < 5+5+4 { - if err := f.moreBits(); err != nil { - return err - } - } - nlit := int(f.b&0x1F) + 257 - if nlit > maxNumLit { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - ndist := int(f.b&0x1F) + 1 - if ndist > maxNumDist { - return CorruptInputError(f.roffset) - } - f.b >>= 5 - nclen := int(f.b&0xF) + 4 - f.b >>= 4 - f.nb -= 5 + 5 + 4 - codebits := f.codebits[:] - bits := f.bits[:] - for i := range codebits { - codebits[i] = 0 - } - for i := range bits { - bits[i] = 0 - } - for i := 0; i < nclen; i++ { - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - codebits[codeOrder[i]] = int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - } - if !f.h1.init(codebits) { - return CorruptInputError(f.roffset) - } - for i := range bits { - bits[i] = 0 - } - i := 0 - for i < nlit+ndist { - x, err := f.huffSym(&f.h1) - if err != nil { - return err - } - switch { - case x < 16: - bits[i] = x - i++ - case x == 16: - if i == 0 { - return CorruptInputError(f.roffset) - } - repeat := 3 - for f.nb < 2 { - if err := f.moreBits(); err != nil { - return err - } - } - repeat += int(f.b & 0x3) - f.b >>= 2 - f.nb -= 2 - for repeat > 0 { - if i >= len(bits) { - return CorruptInputError(f.roffset) - } - bits[i] = bits[i-1] - i++ - repeat-- - } - case x == 17: - repeat := 3 - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - repeat += int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - for repeat > 0 { - if i >= len(bits) { - return CorruptInputError(f.roffset) - } - bits[i] = 0 - i++ - repeat-- - } - case x == 18: - repeat := 11 - for f.nb < 7 { - if err := f.moreBits(); err != nil { - return err - } - } - repeat += int(f.b & 0x7F) - f.b >>= 7 - f.nb -= 7 - for repeat > 0 { - if i >= len(bits) { - return CorruptInputError(f.roffset) - } - bits[i] = 0 - i++ - repeat-- - } - default: - return CorruptInputError(f.roffset) - } - } - if !f.h1.init(bits[:nlit]) { - return CorruptInputError(f.roffset) - } - if !f.h2.init(bits[nlit : nlit+ndist]) { - return CorruptInputError(f.roffset) - } - if f.h1.min < bits[endBlockMarker] { - f.h1.min = bits[endBlockMarker] - } - return nil -} diff --git a/internal/flatex/LICENSE b/internal/flatex/LICENSE new file mode 100644 index 00000000..2a7cf70d --- /dev/null +++ b/internal/flatex/LICENSE @@ -0,0 +1,27 @@ +Copyright 2009 The Go Authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/flatex/decompress_bytes.go b/internal/flatex/decompress_bytes.go new file mode 100644 index 00000000..5a660b0a --- /dev/null +++ b/internal/flatex/decompress_bytes.go @@ -0,0 +1,64 @@ +package flatex + +import ( + "io" + "sync" + + "git.sr.ht/~runxiyu/furgit/internal/bufpool" +) + +// bufferDecompressor wraps the custom slice inflater so byte-slice +// decompressions avoid repeated allocations. +type bufferDecompressor struct { + inflater sliceInflater +} + +var bufferDecompressorPool = sync.Pool{ + New: func() any { + fixedHuffmanDecoderInit() + d := &bufferDecompressor{} + d.inflater.bits = new([maxNumLit + maxNumDist]int) + d.inflater.codebits = new([numCodes]int) + return d + }, +} + +// Decompress inflates the provided DEFLATE stream and returns the full output +// in a pooled bufpool.Buffer along with the number of consumed bytes from src. +func Decompress(src []byte) (bufpool.Buffer, int, error) { + return DecompressDict(src, nil) +} + +// DecompressDict inflates the provided DEFLATE stream using dict as the preset +// dictionary and returns the full output in a pooled bufpool.Buffer. The second +// returned value reports how many bytes of src were consumed. +func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, int, error) { + d := bufferDecompressorPool.Get().(*bufferDecompressor) + defer bufferDecompressorPool.Put(d) + + if err := d.inflater.reset(src, dict); err != nil { + return bufpool.Buffer{}, 0, err + } + + out := bufpool.Borrow(bufpool.DefaultBufferCap) + out.Resize(0) + + for { + if len(d.inflater.toRead) > 0 { + out.Append(d.inflater.toRead) + d.inflater.toRead = nil + continue + } + if d.inflater.err != nil { + if d.inflater.err == io.EOF { + return out, d.inflater.pos, nil + } + out.Release() + return bufpool.Buffer{}, 0, d.inflater.err + } + d.inflater.step(&d.inflater) + if d.inflater.err != nil && len(d.inflater.toRead) == 0 { + d.inflater.toRead = d.inflater.dict.readFlush() + } + } +} diff --git a/internal/flatex/decompress_test.go b/internal/flatex/decompress_test.go new file mode 100644 index 00000000..afb84292 --- /dev/null +++ b/internal/flatex/decompress_test.go @@ -0,0 +1,76 @@ +package flatex + +import ( + "bytes" + stdflate "compress/flate" + "testing" +) + +func compressDeflate(t *testing.T, payload, dict []byte) []byte { + t.Helper() + var buf bytes.Buffer + var ( + w *stdflate.Writer + err error + ) + if dict != nil { + w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict) + } else { + w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression) + } + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + if _, err := w.Write(payload); err != nil { + t.Fatalf("Write: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + return buf.Bytes() +} + +func TestDecompress(t *testing.T) { + payload := bytes.Repeat([]byte("golang"), 32) + compressed := compressDeflate(t, payload, nil) + + out, _, err := Decompress(compressed) + if err != nil { + t.Fatalf("Decompress: %v", err) + } + defer out.Release() + + if !bytes.Equal(out.Bytes(), payload) { + t.Fatalf("unexpected payload: got %q", out.Bytes()) + } +} + +func TestDecompressDict(t *testing.T) { + dict := []byte("furgit dictionary payload") + payload := append([]byte(nil), dict...) + payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...) + + compressed := compressDeflate(t, payload, dict) + + out, _, err := DecompressDict(compressed, dict) + if err != nil { + t.Fatalf("DecompressDict: %v", err) + } + defer out.Release() + + if !bytes.Equal(out.Bytes(), payload) { + t.Fatalf("unexpected payload: got %q", out.Bytes()) + } +} + +func TestDecompressDictMissing(t *testing.T) { + dict := []byte("shared prefix to enforce dictionary usage") + payload := append([]byte(nil), dict...) + payload = append(payload, []byte(" trailing data to force reference")...) + + compressed := compressDeflate(t, payload, dict) + + if _, _, err := Decompress(compressed); err == nil { + t.Fatalf("expected error when dictionary missing") + } +} diff --git a/internal/flatex/dict_decoder.go b/internal/flatex/dict_decoder.go new file mode 100644 index 00000000..7a81e640 --- /dev/null +++ b/internal/flatex/dict_decoder.go @@ -0,0 +1,182 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flatex + +// dictDecoder implements the LZ77 sliding dictionary as used in decompression. +// LZ77 decompresses data through sequences of two forms of commands: +// +// - Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. +// +// - Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. +// +// For performance reasons, this implementation performs little to no sanity +// checks about the arguments. As such, the invariants documented for each +// method call must be respected. +type dictDecoder struct { + hist []byte // Sliding window history + + // Invariant: 0 <= rdPos <= wrPos <= len(hist) + wrPos int // Current output position in buffer + rdPos int // Have emitted hist[:rdPos] already + full bool // Has a full window length been written yet? +} + +// init initializes dictDecoder to have a sliding window dictionary of the given +// size. If a preset dict is provided, it will initialize the dictionary with +// the contents of dict. +func (dd *dictDecoder) init(size int, dict []byte) { + *dd = dictDecoder{hist: dd.hist} + + if cap(dd.hist) < size { + dd.hist = make([]byte, size) + } + dd.hist = dd.hist[:size] + + if len(dict) > len(dd.hist) { + dict = dict[len(dict)-len(dd.hist):] + } + dd.wrPos = copy(dd.hist, dict) + if dd.wrPos == len(dd.hist) { + dd.wrPos = 0 + dd.full = true + } + dd.rdPos = dd.wrPos +} + +// histSize reports the total amount of historical data in the dictionary. +func (dd *dictDecoder) histSize() int { + if dd.full { + return len(dd.hist) + } + return dd.wrPos +} + +// availRead reports the number of bytes that can be flushed by readFlush. +func (dd *dictDecoder) availRead() int { + return dd.wrPos - dd.rdPos +} + +// availWrite reports the available amount of output buffer space. +func (dd *dictDecoder) availWrite() int { + return len(dd.hist) - dd.wrPos +} + +// writeSlice returns a slice of the available buffer to write data to. +// +// This invariant will be kept: len(s) <= availWrite() +func (dd *dictDecoder) writeSlice() []byte { + return dd.hist[dd.wrPos:] +} + +// writeMark advances the writer pointer by cnt. +// +// This invariant must be kept: 0 <= cnt <= availWrite() +func (dd *dictDecoder) writeMark(cnt int) { + dd.wrPos += cnt +} + +// writeByte writes a single byte to the dictionary. +// +// This invariant must be kept: 0 < availWrite() +func (dd *dictDecoder) writeByte(c byte) { + dd.hist[dd.wrPos] = c + dd.wrPos++ +} + +// writeCopy copies a string at a given (dist, length) to the output. +// This returns the number of bytes copied and may be less than the requested +// length if the available space in the output buffer is too small. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) writeCopy(dist, length int) int { + dstBase := dd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := dstPos + length + if endPos > len(dd.hist) { + endPos = len(dd.hist) + } + + // Copy non-overlapping section after destination position. + // + // This section is non-overlapping in that the copy length for this section + // is always less than or equal to the backwards distance. This can occur + // if a distance refers to data that wraps-around in the buffer. + // Thus, a backwards copy is performed here; that is, the exact bytes in + // the source prior to the copy is placed in the destination. + if srcPos < 0 { + srcPos += len(dd.hist) + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) + srcPos = 0 + } + + // Copy possibly overlapping section before destination position. + // + // This section can overlap if the copy length for this section is larger + // than the backwards distance. This is allowed by LZ77 so that repeated + // strings can be succinctly represented using (dist, length) pairs. + // Thus, a forwards copy is performed here; that is, the bytes copied is + // possibly dependent on the resulting bytes in the destination as the copy + // progresses along. This is functionally equivalent to the following: + // + // for i := 0; i < endPos-dstPos; i++ { + // dd.hist[dstPos+i] = dd.hist[srcPos+i] + // } + // dstPos = endPos + // + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// tryWriteCopy tries to copy a string at a given (distance, length) to the +// output. This specialized version is optimized for short distances. +// +// This method is designed to be inlined for performance reasons. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) tryWriteCopy(dist, length int) int { + dstPos := dd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(dd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + // Copy possibly overlapping section before destination position. + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// readFlush returns a slice of the historical buffer that is ready to be +// emitted to the user. The data returned by readFlush must be fully consumed +// before calling any other dictDecoder methods. +func (dd *dictDecoder) readFlush() []byte { + toRead := dd.hist[dd.rdPos:dd.wrPos] + dd.rdPos = dd.wrPos + if dd.wrPos == len(dd.hist) { + dd.wrPos, dd.rdPos = 0, 0 + dd.full = true + } + return toRead +} diff --git a/internal/flatex/inflate.go b/internal/flatex/inflate.go new file mode 100644 index 00000000..5e9d861e --- /dev/null +++ b/internal/flatex/inflate.go @@ -0,0 +1,839 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flatex implements the DEFLATE compressed data format, described in +// RFC 1951. The [compress/gzip] and [compress/zlib] packages implement access +// to DEFLATE-based file formats. +package flatex + +import ( + "bufio" + "io" + "math/bits" + "strconv" + "sync" +) + +const ( + // The special code used to mark the end of a block. + endBlockMarker = 256 + maxCodeLen = 16 // max length of Huffman code + maxMatchOffset = 1 << 15 // The largest match offset + // The next three numbers come from the RFC section 3.2.7, with the + // additional proviso in section 3.2.5 which implies that distance codes + // 30 and 31 should never occur in compressed data. + maxNumLit = 286 + maxNumDist = 30 + numCodes = 19 // number of codes in Huffman meta-code +) + +// Initialize the fixedHuffmanDecoder only once upon first use. +var fixedOnce sync.Once +var fixedHuffmanDecoder huffmanDecoder + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) +} + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) Error() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +// +// Deprecated: No longer returned. +type ReadError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Read +} + +func (e *ReadError) Error() string { + return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// A WriteError reports an error encountered while writing output. +// +// Deprecated: No longer returned. +type WriteError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Write +} + +func (e *WriteError) Error() string { + return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict] +// to switch to a new underlying [Reader]. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. +// +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. +// +// See the following: +// https://github.com/madler/zlib/raw/master/doc/algorithm.txt + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + min int // the minimum code length + chunks [huffmanNumChunks]uint32 // chunks as described above + links [][]uint32 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. +func (h *huffmanDecoder) init(lengths []int) bool { + // Sanity enables additional runtime tests during Huffman + // table construction. It's intended to be used during + // development to supplement the currently ad-hoc unit tests. + const sanity = false + + if h.min != 0 { + *h = huffmanDecoder{} + } + + // Count number of codes of each length, + // compute min and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range lengths { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n]++ + } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). + if max == 0 { + return true + } + + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + code <<= 1 + nextcode[i] = code + code += count[i] + } + + // Check that the coding is complete (i.e., that we've + // assigned all 2-to-the-max possible bit sequences). + // Exception: To be compatible with zlib, we also need to + // accept degenerate single-code codings. See also + // TestDegenerateHuffmanCoding. + if code != 1< huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + h.links = make([][]uint32, huffmanNumChunks-link) + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(bits.Reverse16(uint16(j))) + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[reverse] = uint32(off<>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { + // We should never need to overwrite + // an existing chunk. Also, 0 is + // never a valid chunk, because the + // lower 4 "count" bits should be + // between 1 and 15. + if sanity && h.chunks[off] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[off] = chunk + } + } else { + j := reverse & (huffmanNumChunks - 1) + if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { + // Longer codes should have been + // associated with a link table above. + panic("impossible: not an indirect chunk") + } + value := h.chunks[j] >> huffmanValueShift + linktab := h.links[value] + reverse >>= huffmanChunkBits + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { + if sanity && linktab[off] != 0 { + panic("impossible: overwriting existing chunk") + } + linktab[off] = chunk + } + } + } + + if sanity { + // Above we've sanity checked that we never overwrote + // an existing entry. Here we additionally check that + // we filled the tables completely. + for i, chunk := range h.chunks { + if chunk == 0 { + // As an exception, in the degenerate + // single-code case, we allow odd + // chunks to be missing. + if code == 1 && i%2 == 1 { + continue + } + panic("impossible: missing chunk") + } + } + for _, linktab := range h.links { + for _, chunk := range linktab { + if chunk == 0 { + panic("impossible: missing chunk") + } + } + } + } + + return true +} + +// The actual read interface needed by [NewReader]. +// If the passed in [io.Reader] does not also have ReadByte, +// the [NewReader] will introduce its own buffering. +type Reader interface { + io.Reader + io.ByteReader +} + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader + roffset int64 + + // Input bits, in top of b. + b uint32 + nb uint + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + // Output history, buffer. + dict dictDecoder + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + stepState int + final bool + err error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int +} + +func (f *decompressor) nextBlock() { + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlock() + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlock() + default: + // 3 is reserved. + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + if len(f.toRead) == 0 { + return n, f.err + } + return n, nil + } + if f.err != nil { + return 0, f.err + } + f.step(f) + if f.err != nil && len(f.toRead) == 0 { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + } + } +} + +func (f *decompressor) Close() error { + if f.err == io.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + // numCodes is 19, so nclen is always valid. + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + return err + } + } + rep += int(f.b & uint32(1<>= nb + f.nb -= nb + if i+rep > n { + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + return CorruptInputError(f.roffset) + } + + // As an optimization, we can initialize the min bits to read at a time + // for the HLIT tree to the length of the EOB marker since we know that + // every block must terminate with one. This preserves the property that + // we never read any extra bytes after the end of the DEFLATE stream. + if f.h1.min < f.bits[endBlockMarker] { + f.h1.min = f.bits[endBlockMarker] + } + + return nil +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBlock() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + v, err := f.huffSym(f.hl) + if err != nil { + f.err = err + return + } + var n uint // number of bits extra + var length int + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlock + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + extra |= int(f.b & uint32(1<>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlock // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + f.nb = 0 + f.b = 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[0:4]) + f.roffset += int64(nr) + if err != nil { + f.err = noEOF(err) + return + } + n := int(f.buf[0]) | int(f.buf[1])<<8 + nn := int(f.buf[2]) | int(f.buf[3])<<8 + if uint16(nn) != uint16(^n) { + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + f.toRead = f.dict.readFlush() + f.finishBlock() + return + } + + f.copyLen = n + f.copyData() +} + +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. +func (f *decompressor) copyData() { + buf := f.dict.writeSlice() + if len(buf) > f.copyLen { + buf = buf[:f.copyLen] + } + + cnt, err := io.ReadFull(f.r, buf) + f.roffset += int64(cnt) + f.copyLen -= cnt + f.dict.writeMark(cnt) + if err != nil { + f.err = noEOF(err) + return + } + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).copyData + return + } + f.finishBlock() +} + +func (f *decompressor) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + f.err = io.EOF + } + f.step = (*decompressor).nextBlock +} + +// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. +func noEOF(e error) error { + if e == io.EOF { + return io.ErrUnexpectedEOF + } + return e +} + +func (f *decompressor) moreBits() error { + c, err := f.r.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(h.min) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + return 0, noEOF(err) + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & 31) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func (f *decompressor) makeReader(r io.Reader) { + if rr, ok := r.(Reader); ok { + f.rBuf = nil + f.r = rr + return + } + // Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor. + if f.rBuf != nil { + f.rBuf.Reset(r) + } else { + // bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader. + f.rBuf = bufio.NewReader(r) + } + f.r = f.rBuf +} + +func fixedHuffmanDecoderInit() { + fixedOnce.Do(func() { + // These come from the RFC section 3.2.6. + var bits [288]int + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + fixedHuffmanDecoder.init(bits[:]) + }) +} + +func (f *decompressor) Reset(r io.Reader, dict []byte) error { + *f = decompressor{ + rBuf: f.rBuf, + bits: f.bits, + codebits: f.codebits, + dict: f.dict, + step: (*decompressor).nextBlock, + } + f.makeReader(r) + f.dict.init(maxMatchOffset, dict) + return nil +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement [io.ByteReader], +// the decompressor may read more data than necessary from r. +// The reader returns [io.EOF] after the final block in the DEFLATE stream has +// been encountered. Any trailing data after the final block is ignored. +// +// The [io.ReadCloser] returned by NewReader also implements [Resetter]. +func NewReader(r io.Reader) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, nil) + return &f +} + +// NewReaderDict is like [NewReader] but initializes the reader +// with a preset dictionary. The returned reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by [NewWriterDict]. +// +// The ReadCloser returned by NewReaderDict also implements [Resetter]. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, dict) + return &f +} diff --git a/internal/flatex/slice_inflate.go b/internal/flatex/slice_inflate.go new file mode 100644 index 00000000..9ece08f4 --- /dev/null +++ b/internal/flatex/slice_inflate.go @@ -0,0 +1,479 @@ +package flatex + +import ( + "io" + "math/bits" +) + +// sliceInflater is a specialized DEFLATE decoder that reads directly from an +// in-memory byte slice. It mirrors the main decompressor but avoids the +// overhead of the Reader interfaces, enabling faster byte-slice decoding. +type sliceInflater struct { + input []byte + pos int + roffset int64 + + b uint32 + nb uint + + h1, h2 huffmanDecoder + + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + dict dictDecoder + + toRead []byte + step func(*sliceInflater) + stepState int + final bool + err error + hl, hd *huffmanDecoder + copyLen int + copyDist int +} + +func (f *sliceInflater) reset(src []byte, dict []byte) error { + bits := f.bits + codebits := f.codebits + dictState := f.dict + *f = sliceInflater{ + input: src, + bits: bits, + codebits: codebits, + dict: dictState, + step: (*sliceInflater).nextBlock, + } + f.dict.init(maxMatchOffset, dict) + return nil +} + +func (f *sliceInflater) readByte() (byte, error) { + if f.pos >= len(f.input) { + return 0, io.ErrUnexpectedEOF + } + b := f.input[f.pos] + f.pos++ + f.roffset++ + return b, nil +} + +func (f *sliceInflater) readBytes(n int) ([]byte, error) { + if n < 0 || f.pos+n > len(f.input) { + f.pos = len(f.input) + return nil, io.ErrUnexpectedEOF + } + s := f.input[f.pos : f.pos+n] + f.pos += n + f.roffset += int64(n) + return s, nil +} + +func (f *sliceInflater) nextBlock() { + for f.nb < 1+2 { + if err := f.moreBits(); err != nil { + f.err = err + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlock() + case 2: + if err := f.readHuffman(); err != nil { + f.err = err + return + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlock() + default: + f.err = CorruptInputError(f.roffset) + } +} + +func (f *sliceInflater) huffmanBlock() { + const ( + stateInit = iota + stateDict + ) + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + { + v, err := f.huffSym(f.hl) + if err != nil { + f.err = err + return + } + var n uint + var length int + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*sliceInflater).huffmanBlock + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + f.err = err + return + } + } + extra |= int(f.b & uint32(1<>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + f.err = CorruptInputError(f.roffset) + return + } + + if dist > f.dict.histSize() { + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*sliceInflater).huffmanBlock + f.stepState = stateDict + return + } + goto readLiteral + } +} + +func (f *sliceInflater) dataBlock() { + f.nb = 0 + f.b = 0 + + hdr, err := f.readBytes(4) + if err != nil { + f.err = err + return + } + n := int(hdr[0]) | int(hdr[1])<<8 + nn := int(hdr[2]) | int(hdr[3])<<8 + if uint16(nn) != uint16(^n) { + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + f.toRead = f.dict.readFlush() + f.finishBlock() + return + } + + f.copyLen = n + f.copyData() +} + +func (f *sliceInflater) copyData() { + for { + if f.copyLen == 0 { + f.finishBlock() + return + } + buf := f.dict.writeSlice() + if len(buf) == 0 { + f.toRead = f.dict.readFlush() + f.step = (*sliceInflater).copyData + return + } + n := f.copyLen + if n > len(buf) { + n = len(buf) + } + if f.pos+n > len(f.input) { + f.err = io.ErrUnexpectedEOF + return + } + copy(buf[:n], f.input[f.pos:f.pos+n]) + f.pos += n + f.roffset += int64(n) + f.copyLen -= n + f.dict.writeMark(n) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*sliceInflater).copyData + return + } + } +} + +func (f *sliceInflater) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + f.err = io.EOF + } + f.step = (*sliceInflater).nextBlock + f.stepState = 0 +} + +func (f *sliceInflater) moreBits() error { + c, err := f.readByte() + if err != nil { + return err + } + f.b |= uint32(c) << (f.nb & 31) + f.nb += 8 + return nil +} + +func (f *sliceInflater) huffSym(h *huffmanDecoder) (int, error) { + n := uint(h.min) + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.readByte() + if err != nil { + f.b = b + f.nb = nb + return 0, err + } + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & 31) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func (f *sliceInflater) readHuffman() error { + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + f.b >>= 4 + f.nb -= 5 + 5 + 4 + codebits := f.codebits[:] + bits := f.bits[:] + for i := range codebits { + codebits[i] = 0 + } + for i := range bits { + bits[i] = 0 + } + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + if !f.h1.init(codebits) { + return CorruptInputError(f.roffset) + } + for i := range bits { + bits[i] = 0 + } + i := 0 + for i < nlit+ndist { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + switch { + case x < 16: + bits[i] = x + i++ + case x == 16: + if i == 0 { + return CorruptInputError(f.roffset) + } + repeat := 3 + for f.nb < 2 { + if err := f.moreBits(); err != nil { + return err + } + } + repeat += int(f.b & 0x3) + f.b >>= 2 + f.nb -= 2 + for repeat > 0 { + if i >= len(bits) { + return CorruptInputError(f.roffset) + } + bits[i] = bits[i-1] + i++ + repeat-- + } + case x == 17: + repeat := 3 + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + repeat += int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + for repeat > 0 { + if i >= len(bits) { + return CorruptInputError(f.roffset) + } + bits[i] = 0 + i++ + repeat-- + } + case x == 18: + repeat := 11 + for f.nb < 7 { + if err := f.moreBits(); err != nil { + return err + } + } + repeat += int(f.b & 0x7F) + f.b >>= 7 + f.nb -= 7 + for repeat > 0 { + if i >= len(bits) { + return CorruptInputError(f.roffset) + } + bits[i] = 0 + i++ + repeat-- + } + default: + return CorruptInputError(f.roffset) + } + } + if !f.h1.init(bits[:nlit]) { + return CorruptInputError(f.roffset) + } + if !f.h2.init(bits[nlit : nlit+ndist]) { + return CorruptInputError(f.roffset) + } + if f.h1.min < bits[endBlockMarker] { + f.h1.min = bits[endBlockMarker] + } + return nil +} diff --git a/internal/zlib/LICENSE b/internal/zlib/LICENSE deleted file mode 100644 index 2a7cf70d..00000000 --- a/internal/zlib/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright 2009 The Go Authors. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google LLC nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/zlib/decompress.go b/internal/zlib/decompress.go deleted file mode 100644 index 55f6d3e2..00000000 --- a/internal/zlib/decompress.go +++ /dev/null @@ -1,69 +0,0 @@ -package zlib - -import ( - "encoding/binary" - "io" - - "git.sr.ht/~runxiyu/furgit/internal/adler32" - "git.sr.ht/~runxiyu/furgit/internal/bufpool" - "git.sr.ht/~runxiyu/furgit/internal/flate" -) - -// Decompress inflates the provided zlib wrapped stream and returns the -// uncompressed data inside a pooled bufpool.Buffer. -func Decompress(src []byte) (bufpool.Buffer, error) { - return DecompressDict(src, nil) -} - -// DecompressDict is like Decompress but accepts a preset dictionary. The -// dictionary must match the checksum embedded in the stream if the dictionary -// flag is present. -func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, error) { - if len(src) < 6 { - return bufpool.Buffer{}, io.ErrUnexpectedEOF - } - - cmf := src[0] - flg := src[1] - if (cmf&0x0f != zlibDeflate) || (cmf>>4 > zlibMaxWindow) || (binary.BigEndian.Uint16(src[:2])%31 != 0) { - return bufpool.Buffer{}, ErrHeader - } - - offset := 2 - haveDict := flg&0x20 != 0 - if haveDict { - if len(src) < offset+4 { - return bufpool.Buffer{}, io.ErrUnexpectedEOF - } - if dict == nil { - return bufpool.Buffer{}, ErrDictionary - } - checksum := binary.BigEndian.Uint32(src[offset : offset+4]) - if checksum != adler32.Checksum(dict) { - return bufpool.Buffer{}, ErrDictionary - } - offset += 4 - } - - if len(src[offset:]) < 4 { - return bufpool.Buffer{}, io.ErrUnexpectedEOF - } - - deflateData := src[offset:] - out, consumed, err := flate.DecompressDict(deflateData, dict) - if err != nil { - return bufpool.Buffer{}, err - } - - checksumPos := offset + consumed - if checksumPos+4 > len(src) { - out.Release() - return bufpool.Buffer{}, io.ErrUnexpectedEOF - } - expected := binary.BigEndian.Uint32(src[checksumPos : checksumPos+4]) - if expected != adler32.Checksum(out.Bytes()) { - out.Release() - return bufpool.Buffer{}, ErrChecksum - } - return out, nil -} diff --git a/internal/zlib/decompress_test.go b/internal/zlib/decompress_test.go deleted file mode 100644 index bb517ae6..00000000 --- a/internal/zlib/decompress_test.go +++ /dev/null @@ -1,84 +0,0 @@ -package zlib - -import ( - "bytes" - stdzlib "compress/zlib" - "testing" -) - -func compressZlib(t *testing.T, payload, dict []byte) []byte { - t.Helper() - var buf bytes.Buffer - var ( - w *stdzlib.Writer - err error - ) - if dict != nil { - w, err = stdzlib.NewWriterLevelDict(&buf, stdzlib.DefaultCompression, dict) - } else { - w = stdzlib.NewWriter(&buf) - } - if err != nil { - t.Fatalf("NewWriter: %v", err) - } - if _, err := w.Write(payload); err != nil { - t.Fatalf("Write: %v", err) - } - if err := w.Close(); err != nil { - t.Fatalf("Close: %v", err) - } - return buf.Bytes() -} - -func TestDecompress(t *testing.T) { - payload := []byte("hello, zlib world!") - compressed := compressZlib(t, payload, nil) - - out, err := Decompress(compressed) - if err != nil { - t.Fatalf("Decompress: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload %q", out.Bytes()) - } -} - -func TestDecompressDict(t *testing.T) { - dict := []byte("git dictionary for zlib") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" -- extended body -- extended body")...) - compressed := compressZlib(t, payload, dict) - - out, err := DecompressDict(compressed, dict) - if err != nil { - t.Fatalf("DecompressDict: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload %q", out.Bytes()) - } -} - -func TestDecompressDictMissing(t *testing.T) { - dict := []byte("preset dictionary") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" .. more data ..")...) - compressed := compressZlib(t, payload, dict) - - if _, err := Decompress(compressed); err != ErrDictionary { - t.Fatalf("expected ErrDictionary, got %v", err) - } -} - -func TestDecompressChecksumError(t *testing.T) { - payload := []byte("checksum check") - compressed := compressZlib(t, payload, nil) - compressed[len(compressed)-1] ^= 0xff - - if _, err := Decompress(compressed); err != ErrChecksum { - t.Fatalf("expected ErrChecksum, got %v", err) - } -} diff --git a/internal/zlib/reader.go b/internal/zlib/reader.go deleted file mode 100644 index 2c8fd215..00000000 --- a/internal/zlib/reader.go +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -/* -Package zlib implements reading and writing of zlib format compressed data, -as specified in RFC 1950. - -This package differs from the standard library's compress/zlib package -in that it pools readers to reduce allocations. Writers are unchanged. - -Note that closing the reader causes it to be returned to a pool for -reuse. Therefore, the caller must not retain references to the -reader after closing it; in the standard library's compress/zlib package, -it is legal to Reset a closed reader and continue using it; that is -not allowed here, so there is simply no Resetter interface. - -The implementation provides filters that uncompress during reading -and compress during writing. For example, to write compressed data -to a buffer: - - var b bytes.Buffer - w := zlib.NewWriter(&b) - w.Write([]byte("hello, world\n")) - w.Close() - -and to read that data back: - - r, err := zlib.NewReader(&b) - io.Copy(os.Stdout, r) - r.Close() -*/ -package zlib - -import ( - "bufio" - "encoding/binary" - "errors" - "hash" - "io" - "sync" - - "git.sr.ht/~runxiyu/furgit/internal/adler32" - "git.sr.ht/~runxiyu/furgit/internal/flate" -) - -const ( - zlibDeflate = 8 - zlibMaxWindow = 7 -) - -var ( - // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. - ErrChecksum = errors.New("zlib: invalid checksum") - // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary. - ErrDictionary = errors.New("zlib: invalid dictionary") - // ErrHeader is returned when reading ZLIB data that has an invalid header. - ErrHeader = errors.New("zlib: invalid header") -) - -var pool = sync.Pool{ - New: func() any { - r := new(reader) - return r - }, -} - -type reader struct { - r flate.Reader - decompressor io.ReadCloser - digest hash.Hash32 - err error - scratch [4]byte -} - -// NewReader creates a new ReadCloser. -// Reads from the returned ReadCloser read and decompress data from r. -// If r does not implement [io.ByteReader], the decompressor may read more -// data than necessary from r. -// It is the caller's responsibility to call Close on the ReadCloser when done. -func NewReader(r io.Reader) (io.ReadCloser, error) { - return NewReaderDict(r, nil) -} - -// NewReaderDict is like [NewReader] but uses a preset dictionary. -// NewReaderDict ignores the dictionary if the compressed data does not refer to it. -// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary]. -func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) { - v := pool.Get() - z, ok := v.(*reader) - if !ok { - panic("zlib: pool returned unexpected type") - } - err := z.Reset(r, dict) - if err != nil { - return nil, err - } - return z, nil -} - -func (z *reader) Read(p []byte) (int, error) { - if z.err != nil { - return 0, z.err - } - - var n int - n, z.err = z.decompressor.Read(p) - z.digest.Write(p[0:n]) - if z.err != io.EOF { - // In the normal case we return here. - return n, z.err - } - - // Finished file; check checksum. - if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } - z.err = err - return n, z.err - } - // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). - checksum := binary.BigEndian.Uint32(z.scratch[:4]) - if checksum != z.digest.Sum32() { - z.err = ErrChecksum - return n, z.err - } - return n, io.EOF -} - -// Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader]. -// In order for the ZLIB checksum to be verified, the reader must be -// fully consumed until the [io.EOF]. -func (z *reader) Close() error { - if z.err != nil && z.err != io.EOF { - return z.err - } - z.err = z.decompressor.Close() - if z.err != nil { - return z.err - } - - pool.Put(z) - return nil -} - -func (z *reader) Reset(r io.Reader, dict []byte) error { - *z = reader{decompressor: z.decompressor} - if fr, ok := r.(flate.Reader); ok { - z.r = fr - } else { - z.r = bufio.NewReader(r) - } - - // Read the header (RFC 1950 section 2.2.). - _, z.err = io.ReadFull(z.r, z.scratch[0:2]) - if z.err != nil { - if z.err == io.EOF { - z.err = io.ErrUnexpectedEOF - } - return z.err - } - h := binary.BigEndian.Uint16(z.scratch[:2]) - if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) { - z.err = ErrHeader - return z.err - } - haveDict := z.scratch[1]&0x20 != 0 - if haveDict { - _, z.err = io.ReadFull(z.r, z.scratch[0:4]) - if z.err != nil { - if z.err == io.EOF { - z.err = io.ErrUnexpectedEOF - } - return z.err - } - checksum := binary.BigEndian.Uint32(z.scratch[:4]) - if checksum != adler32.Checksum(dict) { - z.err = ErrDictionary - return z.err - } - } - - if z.decompressor == nil { - if haveDict { - z.decompressor = flate.NewReaderDict(z.r, dict) - } else { - z.decompressor = flate.NewReader(z.r) - } - } else { - z.err = z.decompressor.(flate.Resetter).Reset(z.r, dict) - if z.err != nil { - return z.err - } - } - z.digest = adler32.New() - return nil -} diff --git a/internal/zlibx/LICENSE b/internal/zlibx/LICENSE new file mode 100644 index 00000000..2a7cf70d --- /dev/null +++ b/internal/zlibx/LICENSE @@ -0,0 +1,27 @@ +Copyright 2009 The Go Authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/zlibx/decompress.go b/internal/zlibx/decompress.go new file mode 100644 index 00000000..34e62c6f --- /dev/null +++ b/internal/zlibx/decompress.go @@ -0,0 +1,69 @@ +package zlibx + +import ( + "encoding/binary" + "io" + + "git.sr.ht/~runxiyu/furgit/internal/adler32" + "git.sr.ht/~runxiyu/furgit/internal/bufpool" + "git.sr.ht/~runxiyu/furgit/internal/flatex" +) + +// Decompress inflates the provided zlib wrapped stream and returns the +// uncompressed data inside a pooled bufpool.Buffer. +func Decompress(src []byte) (bufpool.Buffer, error) { + return DecompressDict(src, nil) +} + +// DecompressDict is like Decompress but accepts a preset dictionary. The +// dictionary must match the checksum embedded in the stream if the dictionary +// flag is present. +func DecompressDict(src []byte, dict []byte) (bufpool.Buffer, error) { + if len(src) < 6 { + return bufpool.Buffer{}, io.ErrUnexpectedEOF + } + + cmf := src[0] + flg := src[1] + if (cmf&0x0f != zlibDeflate) || (cmf>>4 > zlibMaxWindow) || (binary.BigEndian.Uint16(src[:2])%31 != 0) { + return bufpool.Buffer{}, ErrHeader + } + + offset := 2 + haveDict := flg&0x20 != 0 + if haveDict { + if len(src) < offset+4 { + return bufpool.Buffer{}, io.ErrUnexpectedEOF + } + if dict == nil { + return bufpool.Buffer{}, ErrDictionary + } + checksum := binary.BigEndian.Uint32(src[offset : offset+4]) + if checksum != adler32.Checksum(dict) { + return bufpool.Buffer{}, ErrDictionary + } + offset += 4 + } + + if len(src[offset:]) < 4 { + return bufpool.Buffer{}, io.ErrUnexpectedEOF + } + + deflateData := src[offset:] + out, consumed, err := flatex.DecompressDict(deflateData, dict) + if err != nil { + return bufpool.Buffer{}, err + } + + checksumPos := offset + consumed + if checksumPos+4 > len(src) { + out.Release() + return bufpool.Buffer{}, io.ErrUnexpectedEOF + } + expected := binary.BigEndian.Uint32(src[checksumPos : checksumPos+4]) + if expected != adler32.Checksum(out.Bytes()) { + out.Release() + return bufpool.Buffer{}, ErrChecksum + } + return out, nil +} diff --git a/internal/zlibx/decompress_test.go b/internal/zlibx/decompress_test.go new file mode 100644 index 00000000..a4e9c608 --- /dev/null +++ b/internal/zlibx/decompress_test.go @@ -0,0 +1,84 @@ +package zlibx + +import ( + "bytes" + stdzlib "compress/zlib" + "testing" +) + +func compressZlib(t *testing.T, payload, dict []byte) []byte { + t.Helper() + var buf bytes.Buffer + var ( + w *stdzlib.Writer + err error + ) + if dict != nil { + w, err = stdzlib.NewWriterLevelDict(&buf, stdzlib.DefaultCompression, dict) + } else { + w = stdzlib.NewWriter(&buf) + } + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + if _, err := w.Write(payload); err != nil { + t.Fatalf("Write: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + return buf.Bytes() +} + +func TestDecompress(t *testing.T) { + payload := []byte("hello, zlib world!") + compressed := compressZlib(t, payload, nil) + + out, err := Decompress(compressed) + if err != nil { + t.Fatalf("Decompress: %v", err) + } + defer out.Release() + + if !bytes.Equal(out.Bytes(), payload) { + t.Fatalf("unexpected payload %q", out.Bytes()) + } +} + +func TestDecompressDict(t *testing.T) { + dict := []byte("git dictionary for zlib") + payload := append([]byte(nil), dict...) + payload = append(payload, []byte(" -- extended body -- extended body")...) + compressed := compressZlib(t, payload, dict) + + out, err := DecompressDict(compressed, dict) + if err != nil { + t.Fatalf("DecompressDict: %v", err) + } + defer out.Release() + + if !bytes.Equal(out.Bytes(), payload) { + t.Fatalf("unexpected payload %q", out.Bytes()) + } +} + +func TestDecompressDictMissing(t *testing.T) { + dict := []byte("preset dictionary") + payload := append([]byte(nil), dict...) + payload = append(payload, []byte(" .. more data ..")...) + compressed := compressZlib(t, payload, dict) + + if _, err := Decompress(compressed); err != ErrDictionary { + t.Fatalf("expected ErrDictionary, got %v", err) + } +} + +func TestDecompressChecksumError(t *testing.T) { + payload := []byte("checksum check") + compressed := compressZlib(t, payload, nil) + compressed[len(compressed)-1] ^= 0xff + + if _, err := Decompress(compressed); err != ErrChecksum { + t.Fatalf("expected ErrChecksum, got %v", err) + } +} diff --git a/internal/zlibx/reader.go b/internal/zlibx/reader.go new file mode 100644 index 00000000..80bbbb58 --- /dev/null +++ b/internal/zlibx/reader.go @@ -0,0 +1,198 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zlib implements reading and writing of zlib format compressed data, +as specified in RFC 1950. + +This package differs from the standard library's compress/zlib package +in that it pools readers to reduce allocations. Writers are unchanged. + +Note that closing the reader causes it to be returned to a pool for +reuse. Therefore, the caller must not retain references to the +reader after closing it; in the standard library's compress/zlib package, +it is legal to Reset a closed reader and continue using it; that is +not allowed here, so there is simply no Resetter interface. + +The implementation provides filters that uncompress during reading +and compress during writing. For example, to write compressed data +to a buffer: + + var b bytes.Buffer + w := zlib.NewWriter(&b) + w.Write([]byte("hello, world\n")) + w.Close() + +and to read that data back: + + r, err := zlib.NewReader(&b) + io.Copy(os.Stdout, r) + r.Close() +*/ +package zlibx + +import ( + "bufio" + "encoding/binary" + "errors" + "hash" + "io" + "sync" + + "git.sr.ht/~runxiyu/furgit/internal/adler32" + "git.sr.ht/~runxiyu/furgit/internal/flatex" +) + +const ( + zlibDeflate = 8 + zlibMaxWindow = 7 +) + +var ( + // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. + ErrChecksum = errors.New("zlib: invalid checksum") + // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary. + ErrDictionary = errors.New("zlib: invalid dictionary") + // ErrHeader is returned when reading ZLIB data that has an invalid header. + ErrHeader = errors.New("zlib: invalid header") +) + +var pool = sync.Pool{ + New: func() any { + r := new(reader) + return r + }, +} + +type reader struct { + r flatex.Reader + decompressor io.ReadCloser + digest hash.Hash32 + err error + scratch [4]byte +} + +// NewReader creates a new ReadCloser. +// Reads from the returned ReadCloser read and decompress data from r. +// If r does not implement [io.ByteReader], the decompressor may read more +// data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser when done. +func NewReader(r io.Reader) (io.ReadCloser, error) { + return NewReaderDict(r, nil) +} + +// NewReaderDict is like [NewReader] but uses a preset dictionary. +// NewReaderDict ignores the dictionary if the compressed data does not refer to it. +// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary]. +func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) { + v := pool.Get() + z, ok := v.(*reader) + if !ok { + panic("zlib: pool returned unexpected type") + } + err := z.Reset(r, dict) + if err != nil { + return nil, err + } + return z, nil +} + +func (z *reader) Read(p []byte) (int, error) { + if z.err != nil { + return 0, z.err + } + + var n int + n, z.err = z.decompressor.Read(p) + z.digest.Write(p[0:n]) + if z.err != io.EOF { + // In the normal case we return here. + return n, z.err + } + + // Finished file; check checksum. + if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + z.err = err + return n, z.err + } + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + checksum := binary.BigEndian.Uint32(z.scratch[:4]) + if checksum != z.digest.Sum32() { + z.err = ErrChecksum + return n, z.err + } + return n, io.EOF +} + +// Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader]. +// In order for the ZLIB checksum to be verified, the reader must be +// fully consumed until the [io.EOF]. +func (z *reader) Close() error { + if z.err != nil && z.err != io.EOF { + return z.err + } + z.err = z.decompressor.Close() + if z.err != nil { + return z.err + } + + pool.Put(z) + return nil +} + +func (z *reader) Reset(r io.Reader, dict []byte) error { + *z = reader{decompressor: z.decompressor} + if fr, ok := r.(flatex.Reader); ok { + z.r = fr + } else { + z.r = bufio.NewReader(r) + } + + // Read the header (RFC 1950 section 2.2.). + _, z.err = io.ReadFull(z.r, z.scratch[0:2]) + if z.err != nil { + if z.err == io.EOF { + z.err = io.ErrUnexpectedEOF + } + return z.err + } + h := binary.BigEndian.Uint16(z.scratch[:2]) + if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) { + z.err = ErrHeader + return z.err + } + haveDict := z.scratch[1]&0x20 != 0 + if haveDict { + _, z.err = io.ReadFull(z.r, z.scratch[0:4]) + if z.err != nil { + if z.err == io.EOF { + z.err = io.ErrUnexpectedEOF + } + return z.err + } + checksum := binary.BigEndian.Uint32(z.scratch[:4]) + if checksum != adler32.Checksum(dict) { + z.err = ErrDictionary + return z.err + } + } + + if z.decompressor == nil { + if haveDict { + z.decompressor = flatex.NewReaderDict(z.r, dict) + } else { + z.decompressor = flatex.NewReader(z.r) + } + } else { + z.err = z.decompressor.(flatex.Resetter).Reset(z.r, dict) + if z.err != nil { + return z.err + } + } + z.digest = adler32.New() + return nil +} -- cgit v1.3.1-10-gc9f91