// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package flatex implements the DEFLATE compressed data format, described in // RFC 1951. The [compress/gzip] and [compress/zlib] packages implement access // to DEFLATE-based file formats. package flatex import ( "math/bits" "strconv" "sync" ) const ( // The special code used to mark the end of a block. endBlockMarker = 256 maxCodeLen = 16 // max length of Huffman code maxMatchOffset = 1 << 15 // The largest match offset // The next three numbers come from the RFC section 3.2.7, with the // additional proviso in section 3.2.5 which implies that distance codes // 30 and 31 should never occur in compressed data. maxNumLit = 286 maxNumDist = 30 numCodes = 19 // number of codes in Huffman meta-code ) // A CorruptInputError reports the presence of corrupt input at a given offset. type CorruptInputError int64 func (e CorruptInputError) Error() string { return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) } // The data structure for decoding Huffman tables is based on that of // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), // For codes smaller than the table width, there are multiple entries // (each combination of trailing bits has the same value). For codes // larger than the table width, the table contains a link to an overflow // table. The width of each entry in the link table is the maximum code // size minus the chunk width. // // Note that you can do a lookup in the table even without all bits // filled. Since the extra bits are zero, and the DEFLATE Huffman codes // have the property that shorter codes come before longer ones, the // bit length estimate in the result is a lower bound on the actual // number of bits. // // See the following: // https://github.com/madler/zlib/raw/master/doc/algorithm.txt // chunk & 15 is number of bits // chunk >> 4 is value, including table link const ( huffmanChunkBits = 9 huffmanNumChunks = 1 << huffmanChunkBits huffmanCountMask = 15 huffmanValueShift = 4 ) type huffmanDecoder struct { min int // the minimum code length chunks [huffmanNumChunks]uint32 // chunks as described above links [][]uint32 // overflow links linkMask uint32 // mask the width of the link table } // Initialize Huffman decoding tables from array of code lengths. // Following this function, h is guaranteed to be initialized into a complete // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a // degenerate case where the tree has only a single symbol with length 1. Empty // trees are permitted. func (h *huffmanDecoder) init(lengths []int) bool { // Sanity enables additional runtime tests during Huffman // table construction. It's intended to be used during // development to supplement the currently ad-hoc unit tests. const sanity = false if h.min != 0 { *h = huffmanDecoder{} } // Count number of codes of each length, // compute min and max length. var count [maxCodeLen]int var min, max int for _, n := range lengths { if n == 0 { continue } if min == 0 || n < min { min = n } if n > max { max = n } count[n]++ } // Empty tree. The decompressor.huffSym function will fail later if the tree // is used. Technically, an empty tree is only valid for the HDIST tree and // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree // is guaranteed to fail since it will attempt to use the tree to decode the // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is // guaranteed to fail later since the compressed data section must be // composed of at least one symbol (the end-of-block marker). if max == 0 { return true } code := 0 var nextcode [maxCodeLen]int for i := min; i <= max; i++ { code <<= 1 nextcode[i] = code code += count[i] } // Check that the coding is complete (i.e., that we've // assigned all 2-to-the-max possible bit sequences). // Exception: To be compatible with zlib, we also need to // accept degenerate single-code codings. See also // TestDegenerateHuffmanCoding. if code != 1< huffmanChunkBits { numLinks := 1 << (uint(max) - huffmanChunkBits) h.linkMask = uint32(numLinks - 1) // create link tables link := nextcode[huffmanChunkBits+1] >> 1 h.links = make([][]uint32, huffmanNumChunks-link) for j := uint(link); j < huffmanNumChunks; j++ { reverse := int(bits.Reverse16(uint16(j))) reverse >>= uint(16 - huffmanChunkBits) off := j - uint(link) if sanity && h.chunks[reverse] != 0 { panic("impossible: overwriting existing chunk") } h.chunks[reverse] = uint32(off<>= uint(16 - n) if n <= huffmanChunkBits { for off := reverse; off < len(h.chunks); off += 1 << uint(n) { // We should never need to overwrite // an existing chunk. Also, 0 is // never a valid chunk, because the // lower 4 "count" bits should be // between 1 and 15. if sanity && h.chunks[off] != 0 { panic("impossible: overwriting existing chunk") } h.chunks[off] = chunk } } else { j := reverse & (huffmanNumChunks - 1) if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { // Longer codes should have been // associated with a link table above. panic("impossible: not an indirect chunk") } value := h.chunks[j] >> huffmanValueShift linktab := h.links[value] reverse >>= huffmanChunkBits for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { if sanity && linktab[off] != 0 { panic("impossible: overwriting existing chunk") } linktab[off] = chunk } } } if sanity { // Above we've sanity checked that we never overwrote // an existing entry. Here we additionally check that // we filled the tables completely. for i, chunk := range h.chunks { if chunk == 0 { // As an exception, in the degenerate // single-code case, we allow odd // chunks to be missing. if code == 1 && i%2 == 1 { continue } panic("impossible: missing chunk") } } for _, linktab := range h.links { for _, chunk := range linktab { if chunk == 0 { panic("impossible: missing chunk") } } } } return true } // RFC 1951 section 3.2.7. // Compression with dynamic Huffman codes var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} var ( // Initialize the fixedHuffmanDecoder only once upon first use. fixedOnce sync.Once fixedHuffmanDecoder huffmanDecoder ) func fixedHuffmanDecoderInit() { fixedOnce.Do(func() { // These come from the RFC section 3.2.6. var bits [288]int for i := 0; i < 144; i++ { bits[i] = 8 } for i := 144; i < 256; i++ { bits[i] = 9 } for i := 256; i < 280; i++ { bits[i] = 7 } for i := 280; i < 288; i++ { bits[i] = 8 } fixedHuffmanDecoder.init(bits[:]) }) }