aboutsummaryrefslogtreecommitdiff
path: root/internal/flatex/huffman.go
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-02-20 19:06:13 +0800
committerGravatar Runxi Yu2026-02-20 19:07:14 +0800
commitaa513c069c1418734aea894dc944e27c6a78a3bb (patch)
tree687f0a11bb550fa088fd82a98ceb8979bbc35f69 /internal/flatex/huffman.go
parentComment on prior reverts removing the pack writing API (diff)
signatureNo signature
Delete everything, I'm redesigning this.
I'll stop using a flat package and make things much more modular. And also experiment with streaming APIs so large blobs don't OOM us.
Diffstat (limited to 'internal/flatex/huffman.go')
-rw-r--r--internal/flatex/huffman.go245
1 files changed, 0 insertions, 245 deletions
diff --git a/internal/flatex/huffman.go b/internal/flatex/huffman.go
deleted file mode 100644
index 32172dbb..00000000
--- a/internal/flatex/huffman.go
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package flatex implements the DEFLATE compressed data format, described in
-// RFC 1951. The [compress/gzip] and [compress/zlib] packages implement access
-// to DEFLATE-based file formats.
-package flatex
-
-import (
- "math/bits"
- "strconv"
- "sync"
-)
-
-const (
- // The special code used to mark the end of a block.
- endBlockMarker = 256
- maxCodeLen = 16 // max length of Huffman code
- maxMatchOffset = 1 << 15 // The largest match offset
- // The next three numbers come from the RFC section 3.2.7, with the
- // additional proviso in section 3.2.5 which implies that distance codes
- // 30 and 31 should never occur in compressed data.
- maxNumLit = 286
- maxNumDist = 30
- numCodes = 19 // number of codes in Huffman meta-code
-)
-
-// A CorruptInputError reports the presence of corrupt input at a given offset.
-type CorruptInputError int64
-
-func (e CorruptInputError) Error() string {
- return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
-}
-
-// The data structure for decoding Huffman tables is based on that of
-// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
-// For codes smaller than the table width, there are multiple entries
-// (each combination of trailing bits has the same value). For codes
-// larger than the table width, the table contains a link to an overflow
-// table. The width of each entry in the link table is the maximum code
-// size minus the chunk width.
-//
-// Note that you can do a lookup in the table even without all bits
-// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
-// have the property that shorter codes come before longer ones, the
-// bit length estimate in the result is a lower bound on the actual
-// number of bits.
-//
-// See the following:
-// https://github.com/madler/zlib/raw/master/doc/algorithm.txt
-
-// chunk & 15 is number of bits
-// chunk >> 4 is value, including table link
-
-const (
- huffmanChunkBits = 9
- huffmanNumChunks = 1 << huffmanChunkBits
- huffmanCountMask = 15
- huffmanValueShift = 4
-)
-
-type huffmanDecoder struct {
- min int // the minimum code length
- chunks [huffmanNumChunks]uint32 // chunks as described above
- links [][]uint32 // overflow links
- linkMask uint32 // mask the width of the link table
-}
-
-// Initialize Huffman decoding tables from array of code lengths.
-// Following this function, h is guaranteed to be initialized into a complete
-// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
-// degenerate case where the tree has only a single symbol with length 1. Empty
-// trees are permitted.
-func (h *huffmanDecoder) init(lengths []int) bool {
- // Sanity enables additional runtime tests during Huffman
- // table construction. It's intended to be used during
- // development to supplement the currently ad-hoc unit tests.
- const sanity = false
-
- if h.min != 0 {
- *h = huffmanDecoder{}
- }
-
- // Count number of codes of each length,
- // compute min and max length.
- var count [maxCodeLen]int
- var min, max int
- for _, n := range lengths {
- if n == 0 {
- continue
- }
- if min == 0 || n < min {
- min = n
- }
- if n > max {
- max = n
- }
- count[n]++
- }
-
- // Empty tree. The decompressor.huffSym function will fail later if the tree
- // is used. Technically, an empty tree is only valid for the HDIST tree and
- // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
- // is guaranteed to fail since it will attempt to use the tree to decode the
- // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
- // guaranteed to fail later since the compressed data section must be
- // composed of at least one symbol (the end-of-block marker).
- if max == 0 {
- return true
- }
-
- code := 0
- var nextcode [maxCodeLen]int
- for i := min; i <= max; i++ {
- code <<= 1
- nextcode[i] = code
- code += count[i]
- }
-
- // Check that the coding is complete (i.e., that we've
- // assigned all 2-to-the-max possible bit sequences).
- // Exception: To be compatible with zlib, we also need to
- // accept degenerate single-code codings. See also
- // TestDegenerateHuffmanCoding.
- if code != 1<<uint(max) && (code != 1 || max != 1) {
- return false
- }
-
- h.min = min
- if max > huffmanChunkBits {
- numLinks := 1 << (uint(max) - huffmanChunkBits)
- h.linkMask = uint32(numLinks - 1)
-
- // create link tables
- link := nextcode[huffmanChunkBits+1] >> 1
- h.links = make([][]uint32, huffmanNumChunks-link)
- for j := uint(link); j < huffmanNumChunks; j++ {
- reverse := int(bits.Reverse16(uint16(j)))
- reverse >>= uint(16 - huffmanChunkBits)
- off := j - uint(link)
- if sanity && h.chunks[reverse] != 0 {
- panic("impossible: overwriting existing chunk")
- }
- h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
- h.links[off] = make([]uint32, numLinks)
- }
- }
-
- for i, n := range lengths {
- if n == 0 {
- continue
- }
- code := nextcode[n]
- nextcode[n]++
- chunk := uint32(i<<huffmanValueShift | n)
- reverse := int(bits.Reverse16(uint16(code)))
- reverse >>= uint(16 - n)
- if n <= huffmanChunkBits {
- for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
- // We should never need to overwrite
- // an existing chunk. Also, 0 is
- // never a valid chunk, because the
- // lower 4 "count" bits should be
- // between 1 and 15.
- if sanity && h.chunks[off] != 0 {
- panic("impossible: overwriting existing chunk")
- }
- h.chunks[off] = chunk
- }
- } else {
- j := reverse & (huffmanNumChunks - 1)
- if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
- // Longer codes should have been
- // associated with a link table above.
- panic("impossible: not an indirect chunk")
- }
- value := h.chunks[j] >> huffmanValueShift
- linktab := h.links[value]
- reverse >>= huffmanChunkBits
- for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
- if sanity && linktab[off] != 0 {
- panic("impossible: overwriting existing chunk")
- }
- linktab[off] = chunk
- }
- }
- }
-
- if sanity {
- // Above we've sanity checked that we never overwrote
- // an existing entry. Here we additionally check that
- // we filled the tables completely.
- for i, chunk := range h.chunks {
- if chunk == 0 {
- // As an exception, in the degenerate
- // single-code case, we allow odd
- // chunks to be missing.
- if code == 1 && i%2 == 1 {
- continue
- }
- panic("impossible: missing chunk")
- }
- }
- for _, linktab := range h.links {
- for _, chunk := range linktab {
- if chunk == 0 {
- panic("impossible: missing chunk")
- }
- }
- }
- }
-
- return true
-}
-
-// RFC 1951 section 3.2.7.
-// Compression with dynamic Huffman codes
-var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
-
-var (
- // Initialize the fixedHuffmanDecoder only once upon first use.
- fixedOnce sync.Once
- fixedHuffmanDecoder huffmanDecoder
-)
-
-func fixedHuffmanDecoderInit() {
- fixedOnce.Do(func() {
- // These come from the RFC section 3.2.6.
- var bits [288]int
- for i := 0; i < 144; i++ {
- bits[i] = 8
- }
- for i := 144; i < 256; i++ {
- bits[i] = 9
- }
- for i := 256; i < 280; i++ {
- bits[i] = 7
- }
- for i := 280; i < 288; i++ {
- bits[i] = 8
- }
- fixedHuffmanDecoder.init(bits[:])
- })
-}