diff options
| author | 2026-03-05 17:36:48 +0800 | |
|---|---|---|
| committer | 2026-03-05 18:38:29 +0800 | |
| commit | beabb6085d42cbb961e3a5dc217fdd840fee4b0d (patch) | |
| tree | 64ea334e74925284228254631bd4e8bea89001d2 /internal/compress/flate | |
| parent | internal/zlib: Unexport Reset (diff) | |
| signature | No signature | |
internal/compress: Import flate and such from klauspost/compress
Diffstat (limited to 'internal/compress/flate')
105 files changed, 11630 insertions, 0 deletions
diff --git a/internal/compress/flate/_gen/gen_inflate.go b/internal/compress/flate/_gen/gen_inflate.go new file mode 100644 index 00000000..33f14005 --- /dev/null +++ b/internal/compress/flate/_gen/gen_inflate.go @@ -0,0 +1,303 @@ +//go:build generate +// +build generate + +//go:generate go run $GOFILE +//go:generate go fmt ../inflate_gen.go + +package main + +import ( + "os" + "strings" +) + +func main() { + f, err := os.Create("../inflate_gen.go") + if err != nil { + panic(err) + } + defer f.Close() + types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader", "Reader"} + names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader", "GenericReader"} + imports := []string{"bytes", "bufio", "fmt", "strings", "math/bits"} + f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( +`) + + for _, imp := range imports { + f.WriteString("\t\"" + imp + "\"\n") + } + f.WriteString(")\n\n") + + template := ` + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) $FUNCNAME$() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.($TYPE$) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = $FUNCNAME$ + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb®SizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb®SizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb®SizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = $FUNCNAME$ // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +` + for i, t := range types { + s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1) + s = strings.Replace(s, "$TYPE$", t, -1) + f.WriteString(s) + } + f.WriteString("func (f *decompressor) huffmanBlockDecoder() {\n") + f.WriteString("\tswitch f.r.(type) {\n") + for i, t := range types { + f.WriteString("\t\tcase " + t + ":\n") + f.WriteString("\t\t\tf.huffman" + names[i] + "()\n") + } + f.WriteString("\t\tdefault:\n") + f.WriteString("\t\t\tf.huffmanGenericReader()\n") + f.WriteString("\t}\n}\n") +} diff --git a/internal/compress/flate/deflate.go b/internal/compress/flate/deflate.go new file mode 100644 index 00000000..ac4a2344 --- /dev/null +++ b/internal/compress/flate/deflate.go @@ -0,0 +1,996 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright (c) 2015 Klaus Post +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "errors" + "fmt" + "io" + "math" + + "codeberg.org/lindenii/furgit/internal/compress/internal/le" +) + +const ( + NoCompression = 0 + BestSpeed = 1 + BestCompression = 9 + DefaultCompression = -1 + + // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman + // entropy encoding. This mode is useful in compressing data that has + // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) + // that lacks an entropy encoder. Compression gains are achieved when + // certain bytes in the input stream occur more frequently than others. + // + // Note that HuffmanOnly produces a compressed output that is + // RFC 1951 compliant. That is, any valid DEFLATE decompressor will + // continue to be able to decompress this output. + HuffmanOnly = -2 + ConstantCompression = HuffmanOnly // compatibility alias. + + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + logMaxOffsetSize = 15 // Standard DEFLATE + minMatchLength = 4 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sense + + // The maximum number of tokens we will encode at the time. + // Smaller sizes usually creates less optimal blocks. + // Bigger can make context switching slow. + // We use this for levels 7-9, so we make it big. + maxFlateBlockTokens = 1 << 15 + maxStoreBlockSize = 65535 + hashBits = 17 // After 17 performance degrades + hashSize = 1 << hashBits + hashMask = (1 << hashBits) - 1 + hashShift = (hashBits + minMatchLength - 1) / minMatchLength + maxHashOffset = 1 << 28 + + skipNever = math.MaxInt32 + + debugDeflate = false +) + +type compressionLevel struct { + good, lazy, nice, chain, fastSkipHashing, level int +} + +// Compression levels have been rebalanced from zlib deflate defaults +// to give a bigger spread in speed and compression. +// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ +var levels = []compressionLevel{ + {}, // 0 + // Level 1-6 uses specialized algorithm - values not used + {0, 0, 0, 0, 0, 1}, + {0, 0, 0, 0, 0, 2}, + {0, 0, 0, 0, 0, 3}, + {0, 0, 0, 0, 0, 4}, + {0, 0, 0, 0, 0, 5}, + {0, 0, 0, 0, 0, 6}, + // Levels 7-9 use increasingly more lazy matching + // and increasingly stringent conditions for "good enough". + {8, 12, 16, 24, skipNever, 7}, + {16, 30, 40, 64, skipNever, 8}, + {32, 258, 258, 1024, skipNever, 9}, +} + +// advancedState contains state for the advanced levels, with bigger hash tables, etc. +type advancedState struct { + // deflate state + length int + offset int + maxInsertIndex int + chainHead int + hashOffset int + + ii uint16 // position of last match, intended to overflow to reset. + + // input window: unprocessed data is window[index:windowEnd] + index int + hashMatch [maxMatchLength + minMatchLength]uint32 + + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + hashHead [hashSize]uint32 + hashPrev [windowSize]uint32 +} + +type compressor struct { + compressionLevel + + h *huffmanEncoder + w *huffmanBitWriter + + // compression algorithm + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + + window []byte + windowEnd int + blockStart int // window index where current tokens start + err error + + // queued output tokens + tokens tokens + fast fastEnc + state *advancedState + + sync bool // requesting flush + byteAvailable bool // if true, still need to process window[index-1]. +} + +func (d *compressor) fillDeflate(b []byte) int { + s := d.state + if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { + // shift the window by windowSize + //copy(d.window[:], d.window[windowSize:2*windowSize]) + *(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:]) + s.index -= windowSize + d.windowEnd -= windowSize + if d.blockStart >= windowSize { + d.blockStart -= windowSize + } else { + d.blockStart = math.MaxInt32 + } + s.hashOffset += windowSize + if s.hashOffset > maxHashOffset { + delta := s.hashOffset - 1 + s.hashOffset -= delta + s.chainHead -= delta + // Iterate over slices instead of arrays to avoid copying + // the entire table onto the stack (Issue #18625). + for i, v := range s.hashPrev[:] { + if int(v) > delta { + s.hashPrev[i] = uint32(int(v) - delta) + } else { + s.hashPrev[i] = 0 + } + } + for i, v := range s.hashHead[:] { + if int(v) > delta { + s.hashHead[i] = uint32(int(v) - delta) + } else { + s.hashHead[i] = 0 + } + } + } + } + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + var window []byte + if d.blockStart <= index { + window = d.window[d.blockStart:index] + } + d.blockStart = index + //d.w.writeBlock(tok, eof, window) + d.w.writeBlockDynamic(tok, eof, window, d.sync) + return d.w.err + } + return nil +} + +// writeBlockSkip writes the current block and uses the number of tokens +// to determine if the block should be stored on no matches, or +// only huffman encoded. +func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + if d.blockStart <= index { + window := d.window[d.blockStart:index] + // If we removed less than a 64th of all literals + // we huffman compress the block. + if int(tok.n) > len(window)-int(tok.n>>6) { + d.w.writeBlockHuff(eof, window, d.sync) + } else { + // Write a dynamic huffman block. + d.w.writeBlockDynamic(tok, eof, window, d.sync) + } + } else { + d.w.writeBlock(tok, eof, nil) + } + d.blockStart = index + return d.w.err + } + return nil +} + +// fillWindow will fill the current window with the supplied +// dictionary and calculate all hashes. +// This is much faster than doing a full encode. +// Should only be used after a start/reset. +func (d *compressor) fillWindow(b []byte) { + // Do not fill window if we are in store-only or huffman mode. + if d.level <= 0 && d.level > -MinCustomWindowSize { + return + } + if d.fast != nil { + // encode the last data, but discard the result + if len(b) > maxMatchOffset { + b = b[len(b)-maxMatchOffset:] + } + d.fast.Encode(&d.tokens, b) + d.tokens.Reset() + return + } + s := d.state + // If we are given too much, cut it. + if len(b) > windowSize { + b = b[len(b)-windowSize:] + } + // Add all to window. + n := copy(d.window[d.windowEnd:], b) + + // Calculate 256 hashes at the time (more L1 cache hits) + loops := (n + 256 - minMatchLength) / 256 + for j := range loops { + startindex := j * 256 + end := min(startindex+256+minMatchLength-1, n) + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + + if dstSize <= 0 { + continue + } + + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + } + // Update window information. + d.windowEnd += n + s.index = n +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead +func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) { + minMatchLook := min(lookahead, maxMatchLength) + + win := d.window[0 : pos+minMatchLook] + + // We quit when we get a match that's at least nice long + nice := min(d.nice, len(win)-pos) + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.chain + length = minMatchLength - 1 + + wEnd := win[pos+length] + wPos := win[pos:] + minIndex := max(pos-windowSize, 0) + offset = 0 + + if d.chain < 100 { + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + if n > length { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i <= minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex { + break + } + } + return + } + + // Minimum gain to accept a match. + cGain := 4 + + // Some like it higher (CSV), some like it lower (JSON) + const baseCost = 3 + // Base is 4 bytes at with an additional cost. + // Matches must be better than this. + + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + if n > length { + // Calculate gain. Estimate + newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) + + //fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length) + if newGain > cGain { + length = n + offset = pos - i + cGain = newGain + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + } + if i <= minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex { + break + } + } + return +} + +func (d *compressor) writeStoredBlock(buf []byte) error { + if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { + return d.w.err + } + d.w.writeBytes(buf) + return d.w.err +} + +// hash4 returns a hash representation of the first 4 bytes +// of the supplied slice. +// The caller must ensure that len(b) >= 4. +func hash4(b []byte) uint32 { + return hash4u(le.Load32(b, 0), hashBits) +} + +// hash4 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4u(u uint32, h uint8) uint32 { + return (u * prime4bytes) >> (32 - h) +} + +// bulkHash4 will compute hashes using the same +// algorithm as hash4 +func bulkHash4(b []byte, dst []uint32) { + if len(b) < 4 { + return + } + hb := le.Load32(b, 0) + + dst[0] = hash4u(hb, hashBits) + end := len(b) - 4 + 1 + for i := 1; i < end; i++ { + hb = (hb >> 8) | uint32(b[i+3])<<24 + dst[i] = hash4u(hb, hashBits) + } +} + +func (d *compressor) initDeflate() { + d.window = make([]byte, 2*windowSize) + d.byteAvailable = false + d.err = nil + if d.state == nil { + return + } + s := d.state + s.index = 0 + s.hashOffset = 1 + s.length = minMatchLength - 1 + s.offset = 0 + s.chainHead = -1 +} + +// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, +// meaning it always has lazy matching on. +func (d *compressor) deflateLazy() { + s := d.state + // Sanity enables additional runtime tests. + // It's intended to be used during development + // to supplement the currently ad-hoc unit tests. + const sanity = debugDeflate + + if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { + return + } + if d.windowEnd != s.index && d.chain > 100 { + // Get literal huffman coder. + if d.h == nil { + d.h = newHuffmanEncoder(maxFlateBlockTokens) + } + var tmp [256]uint16 + toIndex := d.window[s.index:d.windowEnd] + toIndex = toIndex[:min(len(toIndex), maxFlateBlockTokens)] + for _, v := range toIndex { + tmp[v]++ + } + d.h.generate(tmp[:], 15) + } + + s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + + for { + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - s.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + return + } + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + // Flush current output block if any. + if d.byteAvailable { + // There is still one pending token that needs to be flushed + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + } + if d.tokens.n > 0 { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + return + } + } + if s.index < s.maxInsertIndex { + // Update the hash + hash := hash4(d.window[s.index:]) + ch := s.hashHead[hash] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[hash] = uint32(s.index + s.hashOffset) + } + prevLength := s.length + prevOffset := s.offset + s.length = minMatchLength - 1 + s.offset = 0 + minIndex := max(s.index-windowSize, 0) + + if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { + if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok { + s.length = newLength + s.offset = newOffset + } + } + + if prevLength >= minMatchLength && s.length <= prevLength { + // No better match, but check for better match at end... + // + // Skip forward a number of bytes. + // Offset of 2 seems to yield best results. 3 is sometimes better. + const checkOff = 2 + + // Check all, except full length + if prevLength < maxMatchLength-checkOff { + prevIndex := s.index - 1 + if prevIndex+prevLength < s.maxInsertIndex { + end := min(lookahead, maxMatchLength+checkOff) + end += prevIndex + + // Hash at match end. + h := hash4(d.window[prevIndex+prevLength:]) + ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { + length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) + // It seems like a pure length metric is best. + if length > prevLength { + prevLength = length + prevOffset = prevIndex - ch2 + + // Extend back... + for i := checkOff - 1; i >= 0; i-- { + if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] { + // Emit tokens we "owe" + for j := 0; j <= i; j++ { + d.tokens.AddLiteral(d.window[prevIndex+j]) + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } + } + break + } else { + prevLength++ + } + } + } else if false { + // Check one further ahead. + // Only rarely better, disabled for now. + prevIndex++ + h := hash4(d.window[prevIndex+prevLength:]) + ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { + length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) + // It seems like a pure length metric is best. + if length > prevLength+checkOff { + prevLength = length + prevOffset = prevIndex - ch2 + prevIndex-- + + // Extend back... + for i := checkOff; i >= 0; i-- { + if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] { + // Emit tokens we "owe" + for j := 0; j <= i; j++ { + d.tokens.AddLiteral(d.window[prevIndex+j]) + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } + } + break + } else { + prevLength++ + } + } + } + } + } + } + } + } + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) + + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + newIndex := s.index + prevLength - 1 + // Calculate missing hashes + end := min(newIndex, s.maxInsertIndex) + end += minMatchLength - 1 + startindex := min(s.index+1, s.maxInsertIndex) + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + if dstSize > 0 { + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + } + + s.index = newIndex + d.byteAvailable = false + s.length = minMatchLength - 1 + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.ii = 0 + } else { + // Reset, if we got a match this run. + if s.length >= minMatchLength { + s.ii = 0 + } + // We have a byte waiting. Emit it. + if d.byteAvailable { + s.ii++ + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + + // If we have a long run of no matches, skip additional bytes + // Resets when s.ii overflows after 64KB. + if n := int(s.ii) - d.chain; n > 0 { + n = 1 + int(n>>6) + for j := 0; j < n; j++ { + if s.index >= d.windowEnd-1 { + break + } + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + // Index... + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } + s.index++ + } + // Flush last byte + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + } + } else { + s.index++ + d.byteAvailable = true + } + } + } +} + +func (d *compressor) store() { + if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + d.windowEnd = 0 + } +} + +// fillWindow will fill the buffer with data for huffman-only compression. +// The number of bytes copied is returned. +func (d *compressor) fillBlock(b []byte) int { + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +// storeHuff will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeHuff() { + if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { + return + } + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + d.windowEnd = 0 +} + +// storeFast will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeFast() { + // We only compress if we have maxStoreBlockSize. + if d.windowEnd < len(d.window) { + if !d.sync { + return + } + // Handle extremely small sizes. + if d.windowEnd < 128 { + if d.windowEnd == 0 { + return + } + if d.windowEnd <= 32 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + } else { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], true) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 + d.fast.Reset() + return + } + } + + d.fast.Encode(&d.tokens, d.window[:d.windowEnd]) + // If we made zero matches, store the block as is. + if d.tokens.n == 0 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + // If we removed less than 1/16th, huffman compress the block. + } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } else { + d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 +} + +// write will add input byte to the stream. +// Unless an error occurs all bytes will be consumed. +func (d *compressor) write(b []byte) (n int, err error) { + if d.err != nil { + return 0, d.err + } + n = len(b) + for len(b) > 0 { + if d.windowEnd == len(d.window) || d.sync { + d.step(d) + } + b = b[d.fill(d, b):] + if d.err != nil { + return 0, d.err + } + } + return n, d.err +} + +func (d *compressor) syncFlush() error { + d.sync = true + if d.err != nil { + return d.err + } + d.step(d) + if d.err == nil { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.err = d.w.err + } + d.sync = false + return d.err +} + +func (d *compressor) init(w io.Writer, level int) (err error) { + d.w = newHuffmanBitWriter(w) + + switch { + case level == NoCompression: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).store + case level == ConstantCompression: + d.w.logNewTablePenalty = 10 + d.window = make([]byte, 32<<10) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeHuff + case level == DefaultCompression: + level = 5 + fallthrough + case level >= 1 && level <= 6: + d.w.logNewTablePenalty = 7 + d.fast = newFastEnc(level) + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast + case 7 <= level && level <= 9: + d.w.logNewTablePenalty = 8 + d.state = &advancedState{} + d.compressionLevel = levels[level] + d.initDeflate() + d.fill = (*compressor).fillDeflate + d.step = (*compressor).deflateLazy + case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize: + d.w.logNewTablePenalty = 7 + d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize} + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast + default: + return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) + } + d.level = level + return nil +} + +// reset the state of the compressor. +func (d *compressor) reset(w io.Writer) { + d.w.reset(w) + d.sync = false + d.err = nil + // We only need to reset a few things for Snappy. + if d.fast != nil { + d.fast.Reset() + d.windowEnd = 0 + d.tokens.Reset() + return + } + switch d.compressionLevel.chain { + case 0: + // level was NoCompression or ConstantCompression. + d.windowEnd = 0 + default: + s := d.state + s.chainHead = -1 + for i := range s.hashHead { + s.hashHead[i] = 0 + } + for i := range s.hashPrev { + s.hashPrev[i] = 0 + } + s.hashOffset = 1 + s.index, d.windowEnd = 0, 0 + d.blockStart, d.byteAvailable = 0, false + d.tokens.Reset() + s.length = minMatchLength - 1 + s.offset = 0 + s.ii = 0 + s.maxInsertIndex = 0 + } +} + +func (d *compressor) close() error { + if d.err != nil { + return d.err + } + d.sync = true + d.step(d) + if d.err != nil { + return d.err + } + if d.w.writeStoredHeader(0, true); d.w.err != nil { + return d.w.err + } + d.w.flush() + d.w.reset(nil) + return d.w.err +} + +// NewWriter returns a new Writer compressing data at the given level. +// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); +// higher levels typically run slower but compress more. +// Level 0 (NoCompression) does not attempt any compression; it only adds the +// necessary DEFLATE framing. +// Level -1 (DefaultCompression) uses the default compression level. +// Level -2 (ConstantCompression) will use Huffman compression only, giving +// a very fast compression for all types of input, but sacrificing considerable +// compression efficiency. +// +// If level is in the range [-2, 9] then the error returned will be nil. +// Otherwise the error returned will be non-nil. +func NewWriter(w io.Writer, level int) (*Writer, error) { + var dw Writer + if err := dw.d.init(w, level); err != nil { + return nil, err + } + return &dw, nil +} + +// NewWriterDict is like NewWriter but initializes the new +// Writer with a preset dictionary. The returned Writer behaves +// as if the dictionary had been written to it without producing +// any compressed output. The compressed data written to w +// can only be decompressed by a Reader initialized with the +// same dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { + zw, err := NewWriter(w, level) + if err != nil { + return nil, err + } + zw.d.fillWindow(dict) + zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. + return zw, err +} + +// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow. +const MinCustomWindowSize = 32 + +// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow. +const MaxCustomWindowSize = windowSize + +// NewWriterWindow returns a new Writer compressing data with a custom window size. +// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize. +func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) { + if windowSize < MinCustomWindowSize { + return nil, errors.New("flate: requested window size less than MinWindowSize") + } + if windowSize > MaxCustomWindowSize { + return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize") + } + var dw Writer + if err := dw.d.init(w, -windowSize); err != nil { + return nil, err + } + return &dw, nil +} + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + d compressor + dict []byte +} + +// Write writes data to w, which will eventually write the +// compressed form of data to its underlying writer. +func (w *Writer) Write(data []byte) (n int, err error) { + return w.d.write(data) +} + +// Flush flushes any pending data to the underlying writer. +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. +// Flush does not return until the data has been written. +// Calling Flush when there is no pending data still causes the Writer +// to emit a sync marker of at least 4 bytes. +// If the underlying writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (w *Writer) Flush() error { + // For more about flushing: + // http://www.bolet.org/~pornin/deflate-flush.html + return w.d.syncFlush() +} + +// Close flushes and closes the writer. +func (w *Writer) Close() error { + return w.d.close() +} + +// Reset discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level and dictionary. +func (w *Writer) Reset(dst io.Writer) { + if len(w.dict) > 0 { + // w was created with NewWriterDict + w.d.reset(dst) + if dst != nil { + w.d.fillWindow(w.dict) + } + } else { + // w was created with NewWriter + w.d.reset(dst) + } +} + +// ResetDict discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level, but sets a specific dictionary. +func (w *Writer) ResetDict(dst io.Writer, dict []byte) { + w.dict = dict + w.d.reset(dst) + w.d.fillWindow(w.dict) +} diff --git a/internal/compress/flate/deflate_test.go b/internal/compress/flate/deflate_test.go new file mode 100644 index 00000000..8e082968 --- /dev/null +++ b/internal/compress/flate/deflate_test.go @@ -0,0 +1,706 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright (c) 2015 Klaus Post +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "fmt" + "io" + "os" + "reflect" + "strings" + "sync" + "testing" +) + +type deflateTest struct { + in []byte + level int + out []byte +} + +type deflateInflateTest struct { + in []byte +} + +type reverseBitsTest struct { + in uint16 + bitCount uint8 + out uint16 +} + +var deflateTests = []*deflateTest{ + 0: {[]byte{}, 0, []byte{0x3, 0x0}}, + 1: {[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}}, + 2: {[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}}, + 3: {[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}}, + + 4: {[]byte{0x11}, 0, []byte{0x0, 0x1, 0x0, 0xfe, 0xff, 0x11, 0x3, 0x0}}, + 5: {[]byte{0x11, 0x12}, 0, []byte{0x0, 0x2, 0x0, 0xfd, 0xff, 0x11, 0x12, 0x3, 0x0}}, + 6: {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 0, + []byte{0x0, 0x8, 0x0, 0xf7, 0xff, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x3, 0x0}, + }, + 7: {[]byte{}, 1, []byte{0x3, 0x0}}, + 8: {[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}}, + 9: {[]byte{0x11, 0x12}, BestCompression, []byte{0x12, 0x14, 0x2, 0xc, 0x0}}, + 10: {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, BestCompression, []byte{0x12, 0x84, 0x1, 0xc0, 0x0}}, + 11: {[]byte{}, 9, []byte{0x3, 0x0}}, + 12: {[]byte{0x11}, 9, []byte{0x12, 0x4, 0xc, 0x0}}, + 13: {[]byte{0x11, 0x12}, 9, []byte{0x12, 0x14, 0x2, 0xc, 0x0}}, + 14: {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{0x12, 0x84, 0x1, 0xc0, 0x0}}, +} + +var deflateInflateTests = []*deflateInflateTest{ + {[]byte{}}, + {[]byte{0x11}}, + {[]byte{0x11, 0x12}}, + {[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}}, + {[]byte{0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13}}, + {largeDataChunk()}, +} + +var reverseBitsTests = []*reverseBitsTest{ + {1, 1, 1}, + {1, 2, 2}, + {1, 3, 4}, + {1, 4, 8}, + {1, 5, 16}, + {17, 5, 17}, + {257, 9, 257}, + {29, 5, 23}, +} + +func largeDataChunk() []byte { + result := make([]byte, 100000) + for i := range result { + result[i] = byte(i * i & 0xFF) + } + return result +} + +func TestBulkHash4(t *testing.T) { + for _, x := range deflateTests { + y := x.out + if len(y) >= minMatchLength { + y = append(y, y...) + for j := 4; j < len(y); j++ { + y := y[:j] + dst := make([]uint32, len(y)-minMatchLength+1) + for i := range dst { + dst[i] = uint32(i + 100) + } + bulkHash4(y, dst) + for i, val := range dst { + got := val + expect := hash4(y[i:]) + if got != expect && got == uint32(i)+100 { + t.Errorf("Len:%d Index:%d, expected 0x%08x but not modified", len(y), i, expect) + } else if got != expect { + t.Errorf("Len:%d Index:%d, got 0x%08x expected:0x%08x", len(y), i, got, expect) + } else { + //t.Logf("Len:%d Index:%d OK (0x%08x)", len(y), i, got) + } + } + } + } + } +} + +func TestDeflate(t *testing.T) { + for i, h := range deflateTests { + var buf bytes.Buffer + w, err := NewWriter(&buf, h.level) + if err != nil { + t.Errorf("NewWriter: %v", err) + continue + } + w.Write(h.in) + w.Close() + if !bytes.Equal(buf.Bytes(), h.out) { + t.Errorf("%d: Deflate(%d, %x) got \n%#v, want \n%#v", i, h.level, h.in, buf.Bytes(), h.out) + } + } +} + +// A sparseReader returns a stream consisting of 0s followed by 1<<16 1s. +// This tests missing hash references in a very large input. +type sparseReader struct { + l int64 + cur int64 +} + +func (r *sparseReader) Read(b []byte) (n int, err error) { + if r.cur >= r.l { + return 0, io.EOF + } + n = len(b) + cur := r.cur + int64(n) + if cur > r.l { + n -= int(cur - r.l) + cur = r.l + } + for i := range b[0:n] { + if r.cur+int64(i) >= r.l-1<<16 { + b[i] = 1 + } else { + b[i] = 0 + } + } + r.cur = cur + return +} + +func TestVeryLongSparseChunk(t *testing.T) { + if testing.Short() { + t.Skip("skipping sparse chunk during short test") + } + var buf bytes.Buffer + w, err := NewWriter(&buf, 1) + if err != nil { + t.Errorf("NewWriter: %v", err) + return + } + if _, err = io.Copy(w, &sparseReader{l: 23e8}); err != nil { + t.Errorf("Compress failed: %v", err) + return + } + t.Log("Length:", buf.Len()) +} + +func TestOneMByte(t *testing.T) { + var input [1024 * 1024]byte + + var compressedOutput bytes.Buffer + for level := HuffmanOnly; level <= BestCompression; level++ { + compressedOutput.Reset() + compressor, err := NewWriter(&compressedOutput, level) + if err != nil { + t.Fatalf("create: %s", err) + } + // Use single write... + if _, err := compressor.Write(input[:]); err != nil { + t.Fatalf("compress: %s", err) + } + + if err := compressor.Close(); err != nil { + t.Fatalf("close: %s", err) + } + + var decompressedOutput bytes.Buffer + + decompresser := NewReader(&compressedOutput) + t.Log("level:", level, "compressed:", compressedOutput.Len()) + if _, err := io.Copy(&decompressedOutput, decompresser); err != nil { + t.Fatalf("decompress: %s", err) + } + + if !bytes.Equal(input[:], decompressedOutput.Bytes()) { + t.Fatal("input and output do not match") + } + } +} + +type syncBuffer struct { + buf bytes.Buffer + mu sync.RWMutex + closed bool + ready chan bool +} + +func newSyncBuffer() *syncBuffer { + return &syncBuffer{ready: make(chan bool, 1)} +} + +func (b *syncBuffer) Read(p []byte) (n int, err error) { + for { + b.mu.RLock() + n, err = b.buf.Read(p) + b.mu.RUnlock() + if n > 0 || b.closed { + return + } + <-b.ready + } +} + +func (b *syncBuffer) signal() { + select { + case b.ready <- true: + default: + } +} + +func (b *syncBuffer) Write(p []byte) (n int, err error) { + n, err = b.buf.Write(p) + b.signal() + return +} + +func (b *syncBuffer) WriteMode() { + b.mu.Lock() +} + +func (b *syncBuffer) ReadMode() { + b.mu.Unlock() + b.signal() +} + +func (b *syncBuffer) Close() error { + b.closed = true + b.signal() + return nil +} + +func testSync(t *testing.T, level int, input []byte, name string) { + if len(input) == 0 { + return + } + + t.Logf("--testSync %d, %d, %s", level, len(input), name) + buf := newSyncBuffer() + buf1 := new(bytes.Buffer) + buf.WriteMode() + w, err := NewWriter(io.MultiWriter(buf, buf1), level) + if err != nil { + t.Errorf("NewWriter: %v", err) + return + } + r := NewReader(buf) + + // Write half the input and read back. + for i := range 2 { + var lo, hi int + if i == 0 { + lo, hi = 0, (len(input)+1)/2 + } else { + lo, hi = (len(input)+1)/2, len(input) + } + t.Logf("#%d: write %d-%d", i, lo, hi) + if _, err := w.Write(input[lo:hi]); err != nil { + t.Errorf("testSync: write: %v", err) + return + } + if i == 0 { + if err := w.Flush(); err != nil { + t.Errorf("testSync: flush: %v", err) + return + } + } else { + if err := w.Close(); err != nil { + t.Errorf("testSync: close: %v", err) + } + } + buf.ReadMode() + out := make([]byte, hi-lo+1) + m, err := io.ReadAtLeast(r, out, hi-lo) + t.Logf("#%d: read %d", i, m) + if m != hi-lo || err != nil { + t.Errorf("testSync/%d (%d, %d, %s): read %d: %d, %v (%d left)", i, level, len(input), name, hi-lo, m, err, buf.buf.Len()) + return + } + if !bytes.Equal(input[lo:hi], out[:hi-lo]) { + t.Errorf("testSync/%d: read wrong bytes: %x vs %x", i, input[lo:hi], out[:hi-lo]) + return + } + // This test originally checked that after reading + // the first half of the input, there was nothing left + // in the read buffer (buf.buf.Len() != 0) but that is + // not necessarily the case: the write Flush may emit + // some extra framing bits that are not necessary + // to process to obtain the first half of the uncompressed + // data. The test ran correctly most of the time, because + // the background goroutine had usually read even + // those extra bits by now, but it's not a useful thing to + // check. + buf.WriteMode() + } + buf.ReadMode() + out := make([]byte, 10) + if n, err := r.Read(out); n > 0 || err != io.EOF { + t.Errorf("testSync (%d, %d, %s): final Read: %d, %v (hex: %x)", level, len(input), name, n, err, out[0:n]) + } + if buf.buf.Len() != 0 { + t.Errorf("testSync (%d, %d, %s): extra data at end", level, len(input), name) + } + r.Close() + + // stream should work for ordinary reader too + r = NewReader(buf1) + out, err = io.ReadAll(r) + if err != nil { + t.Errorf("testSync: read: %s", err) + return + } + r.Close() + if !bytes.Equal(input, out) { + t.Errorf("testSync: decompress(compress(data)) != data: level=%d input=%s", level, name) + } +} + +func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name string, limit int) { + var buffer bytes.Buffer + w, err := NewWriter(&buffer, level) + if err != nil { + t.Errorf("NewWriter: %v", err) + return + } + w.Write(input) + w.Close() + if limit > 0 { + t.Logf("level: %d - Size:%.2f%%, %d b\n", level, float64(buffer.Len()*100)/float64(limit), buffer.Len()) + } + if limit > 0 && buffer.Len() > limit { + t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit) + } + + r := NewReader(&buffer) + out, err := io.ReadAll(r) + if err != nil { + t.Errorf("read: %s", err) + return + } + r.Close() + if !bytes.Equal(input, out) { + os.WriteFile("testdata/fails/"+t.Name()+".got", out, os.ModePerm) + os.WriteFile("testdata/fails/"+t.Name()+".want", input, os.ModePerm) + t.Errorf("decompress(compress(data)) != data: level=%d input=%s", level, name) + return + } + testSync(t, level, input, name) +} + +func testToFromWithLimit(t *testing.T, input []byte, name string, limit [11]int) { + for i := range 10 { + testToFromWithLevelAndLimit(t, i, input, name, limit[i]) + } + testToFromWithLevelAndLimit(t, -2, input, name, limit[10]) +} + +func TestDeflateInflate(t *testing.T) { + for i, h := range deflateInflateTests { + testToFromWithLimit(t, h.in, fmt.Sprintf("#%d", i), [11]int{}) + } +} + +func TestReverseBits(t *testing.T) { + for _, h := range reverseBitsTests { + if v := reverseBits(h.in, h.bitCount); v != h.out { + t.Errorf("reverseBits(%v,%v) = %v, want %v", + h.in, h.bitCount, v, h.out) + } + } +} + +type deflateInflateStringTest struct { + filename string + label string + limit [11]int // Number 11 is ConstantCompression +} + +var deflateInflateStringTests = []deflateInflateStringTest{ + { + "../testdata/e.txt", + "2.718281828...", + [...]int{100018, 67900, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790, 43683 + 100}, + }, + { + "../testdata/Mark.Twain-Tom.Sawyer.txt", + "Mark.Twain-Tom.Sawyer", + [...]int{387999, 185000, 182361, 179974, 174124, 168819, 162936, 160506, 160295, 160295, 233460 + 100}, + }, +} + +func TestDeflateInflateString(t *testing.T) { + for _, test := range deflateInflateStringTests { + gold, err := os.ReadFile(test.filename) + if err != nil { + t.Error(err) + } + // Remove returns that may be present on Windows + neutral := strings.Map(func(r rune) rune { + if r != '\r' { + return r + } + return -1 + }, string(gold)) + + testToFromWithLimit(t, []byte(neutral), test.label, test.limit) + + if testing.Short() { + break + } + } +} + +func TestReaderDict(t *testing.T) { + const ( + dict = "hello world" + text = "hello again world" + ) + var b bytes.Buffer + w, err := NewWriter(&b, 5) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + w.Write([]byte(dict)) + w.Flush() + b.Reset() + w.Write([]byte(text)) + w.Close() + + r := NewReaderDict(&b, []byte(dict)) + data, err := io.ReadAll(r) + if err != nil { + t.Fatal(err) + } + if string(data) != "hello again world" { + t.Fatalf("read returned %q want %q", string(data), text) + } +} + +func TestWriterDict(t *testing.T) { + const ( + dict = "hello world Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua." + text = "hello world Lorem ipsum dolor sit amet" + ) + // This test is sensitive to algorithm changes that skip + // data in favour of speed. Higher levels are less prone to this + // so we test level 4-9. + for l := 4; l < 9; l++ { + var b bytes.Buffer + w, err := NewWriter(&b, l) + if err != nil { + t.Fatalf("level %d, NewWriter: %v", l, err) + } + w.Write([]byte(dict)) + w.Flush() + b.Reset() + w.Write([]byte(text)) + w.Close() + + var b1 bytes.Buffer + w, _ = NewWriterDict(&b1, l, []byte(dict)) + w.Write([]byte(text)) + w.Close() + + if !bytes.Equal(b1.Bytes(), b.Bytes()) { + t.Errorf("level %d, writer wrote\n%v\n want\n%v", l, b1.Bytes(), b.Bytes()) + } + } +} + +// See http://code.google.com/p/go/issues/detail?id=2508 +func TestRegression2508(t *testing.T) { + if testing.Short() { + t.Logf("test disabled with -short") + return + } + w, err := NewWriter(io.Discard, 1) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + buf := make([]byte, 1024) + for range 131072 { + if _, err := w.Write(buf); err != nil { + t.Fatalf("writer failed: %v", err) + } + } + w.Close() +} + +func TestWriterReset(t *testing.T) { + for level := -2; level <= 9; level++ { + if level == -1 { + level++ + } + if testing.Short() && level > 1 { + break + } + w, err := NewWriter(io.Discard, level) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + buf := []byte("hello world") + for range 1024 { + w.Write(buf) + } + w.Reset(io.Discard) + + wref, err := NewWriter(io.Discard, level) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + + // DeepEqual doesn't compare functions. + w.d.fill, wref.d.fill = nil, nil + w.d.step, wref.d.step = nil, nil + w.d.state, wref.d.state = nil, nil + w.d.fast, wref.d.fast = nil, nil + + // hashMatch is always overwritten when used. + if w.d.tokens.n != 0 { + t.Errorf("level %d Writer not reset after Reset. %d tokens were present", level, w.d.tokens.n) + } + // As long as the length is 0, we don't care about the content. + w.d.tokens = wref.d.tokens + + // We don't care if there are values in the window, as long as it is at d.index is 0 + w.d.window = wref.d.window + if !reflect.DeepEqual(w, wref) { + t.Errorf("level %d Writer not reset after Reset", level) + } + } + + for i := HuffmanOnly; i <= BestCompression; i++ { + testResetOutput(t, fmt.Sprint("level-", i), func(w io.Writer) (*Writer, error) { return NewWriter(w, i) }) + } + dict := []byte(strings.Repeat("we are the world - how are you?", 3)) + for i := HuffmanOnly; i <= BestCompression; i++ { + testResetOutput(t, fmt.Sprint("dict-level-", i), func(w io.Writer) (*Writer, error) { return NewWriterDict(w, i, dict) }) + } + for i := HuffmanOnly; i <= BestCompression; i++ { + testResetOutput(t, fmt.Sprint("dict-reset-level-", i), func(w io.Writer) (*Writer, error) { + w2, err := NewWriter(nil, i) + if err != nil { + return w2, err + } + w2.ResetDict(w, dict) + return w2, nil + }) + } + testResetOutput(t, fmt.Sprint("dict-reset-window"), func(w io.Writer) (*Writer, error) { + w2, err := NewWriterWindow(nil, 1024) + if err != nil { + return w2, err + } + w2.ResetDict(w, dict) + return w2, nil + }) +} + +func testResetOutput(t *testing.T, name string, newWriter func(w io.Writer) (*Writer, error)) { + t.Run(name, func(t *testing.T) { + buf := new(bytes.Buffer) + w, err := newWriter(buf) + if err != nil { + t.Fatalf("NewWriter: %v", err) + } + b := []byte("hello world - how are you doing?") + for range 1024 { + w.Write(b) + } + w.Close() + out1 := buf.Bytes() + + buf2 := new(bytes.Buffer) + w.Reset(buf2) + for range 1024 { + w.Write(b) + } + w.Close() + out2 := buf2.Bytes() + + if len(out1) != len(out2) { + t.Errorf("got %d, expected %d bytes", len(out2), len(out1)) + } + if !bytes.Equal(out1, out2) { + mm := 0 + for i, b := range out1[:len(out2)] { + if b != out2[i] { + t.Errorf("mismatch index %d: %02x, expected %02x", i, out2[i], b) + } + mm++ + if mm == 10 { + t.Fatal("Stopping") + } + } + } + t.Logf("got %d bytes", len(out1)) + }) +} + +// TestBestSpeed tests that round-tripping through deflate and then inflate +// recovers the original input. The Write sizes are near the thresholds in the +// compressor.encSpeed method (0, 16, 128), as well as near maxStoreBlockSize +// (65535). +func TestBestSpeed(t *testing.T) { + abc := make([]byte, 128) + for i := range abc { + abc[i] = byte(i) + } + abcabc := bytes.Repeat(abc, 131072/len(abc)) + var want []byte + + testCases := [][]int{ + {65536, 0}, + {65536, 1}, + {65536, 1, 256}, + {65536, 1, 65536}, + {65536, 14}, + {65536, 15}, + {65536, 16}, + {65536, 16, 256}, + {65536, 16, 65536}, + {65536, 127}, + {65536, 128}, + {65536, 128, 256}, + {65536, 128, 65536}, + {65536, 129}, + {65536, 65536, 256}, + {65536, 65536, 65536}, + } + + for i, tc := range testCases { + if testing.Short() && i > 5 { + t.Skip() + } + for _, firstN := range []int{1, 65534, 65535, 65536, 65537, 131072} { + tc[0] = firstN + outer: + for _, flush := range []bool{false, true} { + buf := new(bytes.Buffer) + want = want[:0] + + w, err := NewWriter(buf, BestSpeed) + if err != nil { + t.Errorf("i=%d, firstN=%d, flush=%t: NewWriter: %v", i, firstN, flush, err) + continue + } + for _, n := range tc { + want = append(want, abcabc[:n]...) + if _, err := w.Write(abcabc[:n]); err != nil { + t.Errorf("i=%d, firstN=%d, flush=%t: Write: %v", i, firstN, flush, err) + continue outer + } + if !flush { + continue + } + if err := w.Flush(); err != nil { + t.Errorf("i=%d, firstN=%d, flush=%t: Flush: %v", i, firstN, flush, err) + continue outer + } + } + if err := w.Close(); err != nil { + t.Errorf("i=%d, firstN=%d, flush=%t: Close: %v", i, firstN, flush, err) + continue + } + + r := NewReader(buf) + got, err := io.ReadAll(r) + if err != nil { + t.Errorf("i=%d, firstN=%d, flush=%t: ReadAll: %v", i, firstN, flush, err) + continue + } + r.Close() + + if !bytes.Equal(got, want) { + t.Errorf("i=%d, firstN=%d, flush=%t: corruption during deflate-then-inflate", i, firstN, flush) + continue + } + } + } + } +} diff --git a/internal/compress/flate/dict_decoder.go b/internal/compress/flate/dict_decoder.go new file mode 100644 index 00000000..cb855abc --- /dev/null +++ b/internal/compress/flate/dict_decoder.go @@ -0,0 +1,181 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// dictDecoder implements the LZ77 sliding dictionary as used in decompression. +// LZ77 decompresses data through sequences of two forms of commands: +// +// - Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. +// +// - Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. +// +// For performance reasons, this implementation performs little to no sanity +// checks about the arguments. As such, the invariants documented for each +// method call must be respected. +type dictDecoder struct { + hist []byte // Sliding window history + + // Invariant: 0 <= rdPos <= wrPos <= len(hist) + wrPos int // Current output position in buffer + rdPos int // Have emitted hist[:rdPos] already + full bool // Has a full window length been written yet? +} + +// init initializes dictDecoder to have a sliding window dictionary of the given +// size. If a preset dict is provided, it will initialize the dictionary with +// the contents of dict. +func (dd *dictDecoder) init(size int, dict []byte) { + *dd = dictDecoder{hist: dd.hist} + + if cap(dd.hist) < size { + dd.hist = make([]byte, size) + } + dd.hist = dd.hist[:size] + + if len(dict) > len(dd.hist) { + dict = dict[len(dict)-len(dd.hist):] + } + dd.wrPos = copy(dd.hist, dict) + if dd.wrPos == len(dd.hist) { + dd.wrPos = 0 + dd.full = true + } + dd.rdPos = dd.wrPos +} + +// histSize reports the total amount of historical data in the dictionary. +func (dd *dictDecoder) histSize() int { + if dd.full { + return len(dd.hist) + } + return dd.wrPos +} + +// availRead reports the number of bytes that can be flushed by readFlush. +func (dd *dictDecoder) availRead() int { + return dd.wrPos - dd.rdPos +} + +// availWrite reports the available amount of output buffer space. +func (dd *dictDecoder) availWrite() int { + return len(dd.hist) - dd.wrPos +} + +// writeSlice returns a slice of the available buffer to write data to. +// +// This invariant will be kept: len(s) <= availWrite() +func (dd *dictDecoder) writeSlice() []byte { + return dd.hist[dd.wrPos:] +} + +// writeMark advances the writer pointer by cnt. +// +// This invariant must be kept: 0 <= cnt <= availWrite() +func (dd *dictDecoder) writeMark(cnt int) { + dd.wrPos += cnt +} + +// writeByte writes a single byte to the dictionary. +// +// This invariant must be kept: 0 < availWrite() +func (dd *dictDecoder) writeByte(c byte) { + dd.hist[dd.wrPos] = c + dd.wrPos++ +} + +// writeCopy copies a string at a given (dist, length) to the output. +// This returns the number of bytes copied and may be less than the requested +// length if the available space in the output buffer is too small. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) writeCopy(dist, length int) int { + dstBase := dd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := min(dstPos+length, len(dd.hist)) + + // Copy non-overlapping section after destination position. + // + // This section is non-overlapping in that the copy length for this section + // is always less than or equal to the backwards distance. This can occur + // if a distance refers to data that wraps-around in the buffer. + // Thus, a backwards copy is performed here; that is, the exact bytes in + // the source prior to the copy is placed in the destination. + if srcPos < 0 { + srcPos += len(dd.hist) + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) + srcPos = 0 + } + + // Copy possibly overlapping section before destination position. + // + // This section can overlap if the copy length for this section is larger + // than the backwards distance. This is allowed by LZ77 so that repeated + // strings can be succinctly represented using (dist, length) pairs. + // Thus, a forwards copy is performed here; that is, the bytes copied is + // possibly dependent on the resulting bytes in the destination as the copy + // progresses along. This is functionally equivalent to the following: + // + // for i := 0; i < endPos-dstPos; i++ { + // dd.hist[dstPos+i] = dd.hist[srcPos+i] + // } + // dstPos = endPos + // + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// tryWriteCopy tries to copy a string at a given (distance, length) to the +// output. This specialized version is optimized for short distances. +// +// This method is designed to be inlined for performance reasons. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) tryWriteCopy(dist, length int) int { + dstPos := dd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(dd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + // Copy possibly overlapping section before destination position. +loop: + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + if dstPos < endPos { + goto loop // Avoid for-loop so that this function can be inlined + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// readFlush returns a slice of the historical buffer that is ready to be +// emitted to the user. The data returned by readFlush must be fully consumed +// before calling any other dictDecoder methods. +func (dd *dictDecoder) readFlush() []byte { + toRead := dd.hist[dd.rdPos:dd.wrPos] + dd.rdPos = dd.wrPos + if dd.wrPos == len(dd.hist) { + dd.wrPos, dd.rdPos = 0, 0 + dd.full = true + } + return toRead +} diff --git a/internal/compress/flate/dict_decoder_test.go b/internal/compress/flate/dict_decoder_test.go new file mode 100644 index 00000000..9275cff7 --- /dev/null +++ b/internal/compress/flate/dict_decoder_test.go @@ -0,0 +1,139 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "strings" + "testing" +) + +func TestDictDecoder(t *testing.T) { + const ( + abc = "ABC\n" + fox = "The quick brown fox jumped over the lazy dog!\n" + poem = "The Road Not Taken\nRobert Frost\n" + + "\n" + + "Two roads diverged in a yellow wood,\n" + + "And sorry I could not travel both\n" + + "And be one traveler, long I stood\n" + + "And looked down one as far as I could\n" + + "To where it bent in the undergrowth;\n" + + "\n" + + "Then took the other, as just as fair,\n" + + "And having perhaps the better claim,\n" + + "Because it was grassy and wanted wear;\n" + + "Though as for that the passing there\n" + + "Had worn them really about the same,\n" + + "\n" + + "And both that morning equally lay\n" + + "In leaves no step had trodden black.\n" + + "Oh, I kept the first for another day!\n" + + "Yet knowing how way leads on to way,\n" + + "I doubted if I should ever come back.\n" + + "\n" + + "I shall be telling this with a sigh\n" + + "Somewhere ages and ages hence:\n" + + "Two roads diverged in a wood, and I-\n" + + "I took the one less traveled by,\n" + + "And that has made all the difference.\n" + ) + + var poemRefs = []struct { + dist int // Backward distance (0 if this is an insertion) + length int // Length of copy or insertion + }{ + {0, 38}, {33, 3}, {0, 48}, {79, 3}, {0, 11}, {34, 5}, {0, 6}, {23, 7}, + {0, 8}, {50, 3}, {0, 2}, {69, 3}, {34, 5}, {0, 4}, {97, 3}, {0, 4}, + {43, 5}, {0, 6}, {7, 4}, {88, 7}, {0, 12}, {80, 3}, {0, 2}, {141, 4}, + {0, 1}, {196, 3}, {0, 3}, {157, 3}, {0, 6}, {181, 3}, {0, 2}, {23, 3}, + {77, 3}, {28, 5}, {128, 3}, {110, 4}, {70, 3}, {0, 4}, {85, 6}, {0, 2}, + {182, 6}, {0, 4}, {133, 3}, {0, 7}, {47, 5}, {0, 20}, {112, 5}, {0, 1}, + {58, 3}, {0, 8}, {59, 3}, {0, 4}, {173, 3}, {0, 5}, {114, 3}, {0, 4}, + {92, 5}, {0, 2}, {71, 3}, {0, 2}, {76, 5}, {0, 1}, {46, 3}, {96, 4}, + {130, 4}, {0, 3}, {360, 3}, {0, 3}, {178, 5}, {0, 7}, {75, 3}, {0, 3}, + {45, 6}, {0, 6}, {299, 6}, {180, 3}, {70, 6}, {0, 1}, {48, 3}, {66, 4}, + {0, 3}, {47, 5}, {0, 9}, {325, 3}, {0, 1}, {359, 3}, {318, 3}, {0, 2}, + {199, 3}, {0, 1}, {344, 3}, {0, 3}, {248, 3}, {0, 10}, {310, 3}, {0, 3}, + {93, 6}, {0, 3}, {252, 3}, {157, 4}, {0, 2}, {273, 5}, {0, 14}, {99, 4}, + {0, 1}, {464, 4}, {0, 2}, {92, 4}, {495, 3}, {0, 1}, {322, 4}, {16, 4}, + {0, 3}, {402, 3}, {0, 2}, {237, 4}, {0, 2}, {432, 4}, {0, 1}, {483, 5}, + {0, 2}, {294, 4}, {0, 2}, {306, 3}, {113, 5}, {0, 1}, {26, 4}, {164, 3}, + {488, 4}, {0, 1}, {542, 3}, {248, 6}, {0, 5}, {205, 3}, {0, 8}, {48, 3}, + {449, 6}, {0, 2}, {192, 3}, {328, 4}, {9, 5}, {433, 3}, {0, 3}, {622, 25}, + {615, 5}, {46, 5}, {0, 2}, {104, 3}, {475, 10}, {549, 3}, {0, 4}, {597, 8}, + {314, 3}, {0, 1}, {473, 6}, {317, 5}, {0, 1}, {400, 3}, {0, 3}, {109, 3}, + {151, 3}, {48, 4}, {0, 4}, {125, 3}, {108, 3}, {0, 2}, + } + + var got, want bytes.Buffer + var dd dictDecoder + dd.init(1<<11, nil) + + var writeCopy = func(dist, length int) { + for length > 0 { + cnt := dd.tryWriteCopy(dist, length) + if cnt == 0 { + cnt = dd.writeCopy(dist, length) + } + + length -= cnt + if dd.availWrite() == 0 { + got.Write(dd.readFlush()) + } + } + } + var writeString = func(str string) { + for len(str) > 0 { + cnt := copy(dd.writeSlice(), str) + str = str[cnt:] + dd.writeMark(cnt) + if dd.availWrite() == 0 { + got.Write(dd.readFlush()) + } + } + } + + writeString(".") + want.WriteByte('.') + + str := poem + for _, ref := range poemRefs { + if ref.dist == 0 { + writeString(str[:ref.length]) + } else { + writeCopy(ref.dist, ref.length) + } + str = str[ref.length:] + } + want.WriteString(poem) + + writeCopy(dd.histSize(), 33) + want.Write(want.Bytes()[:33]) + + writeString(abc) + writeCopy(len(abc), 59*len(abc)) + want.WriteString(strings.Repeat(abc, 60)) + + writeString(fox) + writeCopy(len(fox), 9*len(fox)) + want.WriteString(strings.Repeat(fox, 10)) + + writeString(".") + writeCopy(1, 9) + want.WriteString(strings.Repeat(".", 10)) + + writeString(strings.ToUpper(poem)) + writeCopy(len(poem), 7*len(poem)) + want.WriteString(strings.Repeat(strings.ToUpper(poem), 8)) + + writeCopy(dd.histSize(), 10) + want.Write(want.Bytes()[want.Len()-dd.histSize():][:10]) + + got.Write(dd.readFlush()) + if got.String() != want.String() { + t.Errorf("final string mismatch:\ngot %q\nwant %q", got.String(), want.String()) + } +} diff --git a/internal/compress/flate/example_test.go b/internal/compress/flate/example_test.go new file mode 100644 index 00000000..d541e0e1 --- /dev/null +++ b/internal/compress/flate/example_test.go @@ -0,0 +1,245 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate_test + +import ( + "bytes" + "fmt" + "io" + "log" + "os" + "strings" + "sync" + + "codeberg.org/lindenii/furgit/internal/compress/flate" +) + +// In performance critical applications, Reset can be used to discard the +// current compressor or decompressor state and reinitialize them quickly +// by taking advantage of previously allocated memory. +func Example_reset() { + proverbs := []string{ + "Don't communicate by sharing memory, share memory by communicating.\n", + "Concurrency is not parallelism.\n", + "The bigger the interface, the weaker the abstraction.\n", + "Documentation is for users.\n", + } + + var r strings.Reader + var b bytes.Buffer + buf := make([]byte, 32<<10) + + zw, err := flate.NewWriter(nil, flate.DefaultCompression) + if err != nil { + log.Fatal(err) + } + zr := flate.NewReader(nil) + + for _, s := range proverbs { + r.Reset(s) + b.Reset() + + // Reset the compressor and encode from some input stream. + zw.Reset(&b) + if _, err := io.CopyBuffer(zw, &r, buf); err != nil { + log.Fatal(err) + } + if err := zw.Close(); err != nil { + log.Fatal(err) + } + + // Reset the decompressor and decode to some output stream. + if err := zr.(flate.Resetter).Reset(&b, nil); err != nil { + log.Fatal(err) + } + if _, err := io.CopyBuffer(os.Stdout, zr, buf); err != nil { + log.Fatal(err) + } + if err := zr.Close(); err != nil { + log.Fatal(err) + } + } + + // Output: + // Don't communicate by sharing memory, share memory by communicating. + // Concurrency is not parallelism. + // The bigger the interface, the weaker the abstraction. + // Documentation is for users. +} + +// A preset dictionary can be used to improve the compression ratio. +// The downside to using a dictionary is that the compressor and decompressor +// must agree in advance what dictionary to use. +func Example_dictionary() { + // The dictionary is a string of bytes. When compressing some input data, + // the compressor will attempt to substitute substrings with matches found + // in the dictionary. As such, the dictionary should only contain substrings + // that are expected to be found in the actual data stream. + const dict = `<?xml version="1.0"?>` + `<book>` + `<data>` + `<meta name="` + `" content="` + + // The data to compress should (but is not required to) contain frequent + // substrings that match those in the dictionary. + const data = `<?xml version="1.0"?> +<book> + <meta name="title" content="The Go Programming Language"/> + <meta name="authors" content="Alan Donovan and Brian Kernighan"/> + <meta name="published" content="2015-10-26"/> + <meta name="isbn" content="978-0134190440"/> + <data>...</data> +</book> +` + + var b bytes.Buffer + + // Compress the data using the specially crafted dictionary. + zw, err := flate.NewWriterDict(&b, flate.BestCompression, []byte(dict)) + if err != nil { + log.Fatal(err) + } + if _, err := io.Copy(zw, strings.NewReader(data)); err != nil { + log.Fatal(err) + } + if err := zw.Close(); err != nil { + log.Fatal(err) + } + + // The decompressor must use the same dictionary as the compressor. + // Otherwise, the input may appear as corrupted. + fmt.Println("Decompressed output using the dictionary:") + zr := flate.NewReaderDict(bytes.NewReader(b.Bytes()), []byte(dict)) + if _, err := io.Copy(os.Stdout, zr); err != nil { + log.Fatal(err) + } + if err := zr.Close(); err != nil { + log.Fatal(err) + } + + fmt.Println() + + // Substitute all of the bytes in the dictionary with a '#' to visually + // demonstrate the approximate effectiveness of using a preset dictionary. + fmt.Println("Substrings matched by the dictionary are marked with #:") + hashDict := []byte(dict) + for i := range hashDict { + hashDict[i] = '#' + } + zr = flate.NewReaderDict(&b, hashDict) + if _, err := io.Copy(os.Stdout, zr); err != nil { + log.Fatal(err) + } + if err := zr.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // Decompressed output using the dictionary: + // <?xml version="1.0"?> + // <book> + // <meta name="title" content="The Go Programming Language"/> + // <meta name="authors" content="Alan Donovan and Brian Kernighan"/> + // <meta name="published" content="2015-10-26"/> + // <meta name="isbn" content="978-0134190440"/> + // <data>...</data> + // </book> + // + // Substrings matched by the dictionary are marked with #: + // ##################### + // ###### + // ############title###########The Go Programming Language"/# + // ############authors###########Alan Donovan and Brian Kernighan"/# + // ############published###########2015-10-26"/# + // ############isbn###########978-0134190440"/# + // ######...</##### + // </##### +} + +// DEFLATE is suitable for transmitting compressed data across the network. +func Example_synchronization() { + var wg sync.WaitGroup + defer wg.Wait() + + // Use io.Pipe to simulate a network connection. + // A real network application should take care to properly close the + // underlying connection. + rp, wp := io.Pipe() + + // Start a goroutine to act as the transmitter. + wg.Add(1) + go func() { + defer wg.Done() + defer wp.Close() + + zw, err := flate.NewWriter(wp, flate.BestSpeed) + if err != nil { + log.Fatal(err) + } + + b := make([]byte, 256) + for m := range strings.FieldsSeq("A long time ago in a galaxy far, far away...") { + // We use a simple framing format where the first byte is the + // message length, followed the message itself. + b[0] = uint8(copy(b[1:], m)) + + if _, err := zw.Write(b[:1+len(m)]); err != nil { + log.Fatal(err) + } + + // Flush ensures that the receiver can read all data sent so far. + if err := zw.Flush(); err != nil { + log.Fatal(err) + } + } + + if err := zw.Close(); err != nil { + log.Fatal(err) + } + }() + + // Start a goroutine to act as the receiver. + wg.Add(1) + go func() { + defer wg.Done() + + zr := flate.NewReader(rp) + + b := make([]byte, 256) + for { + // Read the message length. + // This is guaranteed to return for every corresponding + // Flush and Close on the transmitter side. + if _, err := io.ReadFull(zr, b[:1]); err != nil { + if err == io.EOF { + break // The transmitter closed the stream + } + log.Fatal(err) + } + + // Read the message content. + n := int(b[0]) + if _, err := io.ReadFull(zr, b[:n]); err != nil { + log.Fatal(err) + } + + fmt.Printf("Received %d bytes: %s\n", n, b[:n]) + } + fmt.Println() + + if err := zr.Close(); err != nil { + log.Fatal(err) + } + }() + + // Output: + // Received 1 bytes: A + // Received 4 bytes: long + // Received 4 bytes: time + // Received 3 bytes: ago + // Received 2 bytes: in + // Received 1 bytes: a + // Received 6 bytes: galaxy + // Received 4 bytes: far, + // Received 3 bytes: far + // Received 7 bytes: away... +} diff --git a/internal/compress/flate/fast_encoder.go b/internal/compress/flate/fast_encoder.go new file mode 100644 index 00000000..39393020 --- /dev/null +++ b/internal/compress/flate/fast_encoder.go @@ -0,0 +1,189 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Modified for deflate by Klaus Post (c) 2015. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "fmt" + + "codeberg.org/lindenii/furgit/internal/compress/internal/le" +) + +type fastEnc interface { + Encode(dst *tokens, src []byte) + Reset() +} + +func newFastEnc(level int) fastEnc { + switch level { + case 1: + return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}} + case 2: + return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}} + case 3: + return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}} + case 4: + return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}} + case 5: + return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}} + case 6: + return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}} + default: + panic("invalid level specified") + } +} + +const ( + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. + baseMatchOffset = 1 // The smallest match offset + baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 + maxMatchOffset = 1 << 15 // The largest match offset + + bTableBits = 17 // Bits used in the big tables + bTableSize = 1 << bTableBits // Size of the table + allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history. + bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. +) + +const ( + prime3bytes = 506832829 + prime4bytes = 2654435761 + prime5bytes = 889523592379 + prime6bytes = 227718039650203 + prime7bytes = 58295818150454627 + prime8bytes = 0xcf1bbcdcb7a56463 +) + +func load3232(b []byte, i int32) uint32 { + return le.Load32(b, i) +} + +func load6432(b []byte, i int32) uint64 { + return le.Load64(b, i) +} + +type tableEntry struct { + offset int32 +} + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastGen struct { + hist []byte + cur int32 +} + +func (e *fastGen) addBlock(src []byte) int32 { + // check if we have space already + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < maxMatchOffset*2 { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + // copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + *(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +type tableEntryPrev struct { + Cur tableEntry + Prev tableEntry +} + +// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash7(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64)) +} + +// hashLen returns a hash of the lowest mls bytes of with length output bits. +// mls must be >=3 and <=8. Any other value will return hash for 4 bytes. +// length should always be < 32. +// Preferably length and mls should be a constant for inlining. +func hashLen(u uint64, length, mls uint8) uint32 { + switch mls { + case 3: + return (uint32(u<<8) * prime3bytes) >> (32 - length) + case 5: + return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length)) + case 6: + return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length)) + case 7: + return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length)) + case 8: + return uint32((u * prime8bytes) >> (64 - length)) + default: + return (uint32(u) * prime4bytes) >> (32 - length) + } +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlen(s, t int, src []byte) int32 { + if debugDeflate { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + a := src[s:min(s+maxMatchLength-4, len(src))] + b := src[t:] + return int32(matchLen(a, b)) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlenLong(s, t int, src []byte) int32 { + if debugDeflate { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + return int32(matchLen(src[s:], src[t:])) +} + +// Reset the encoding table. +func (e *fastGen) Reset() { + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= bufferReset { + e.cur += maxMatchOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} diff --git a/internal/compress/flate/flate_test.go b/internal/compress/flate/flate_test.go new file mode 100644 index 00000000..9817efef --- /dev/null +++ b/internal/compress/flate/flate_test.go @@ -0,0 +1,366 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This test tests some internals of the flate package. +// The tests in package compress/gzip serve as the +// end-to-end test of the decompressor. + +package flate + +import ( + "archive/zip" + "bytes" + "compress/flate" + "encoding/hex" + "fmt" + "io" + "os" + "testing" +) + +// The following test should not panic. +func TestIssue5915(t *testing.T) { + bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 6, 0, 11, 0, 8, 0, 6, 6, 10, 8} + var h huffmanDecoder + if h.init(bits) { + t.Fatalf("Given sequence of bits is bad, and should not succeed.") + } +} + +// The following test should not panic. +func TestIssue5962(t *testing.T) { + bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, + 5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11} + var h huffmanDecoder + if h.init(bits) { + t.Fatalf("Given sequence of bits is bad, and should not succeed.") + } +} + +// The following test should not panic. +func TestIssue6255(t *testing.T) { + bits1 := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11} + bits2 := []int{11, 13} + var h huffmanDecoder + if !h.init(bits1) { + t.Fatalf("Given sequence of bits is good and should succeed.") + } + if h.init(bits2) { + t.Fatalf("Given sequence of bits is bad and should not succeed.") + } +} + +func TestInvalidEncoding(t *testing.T) { + // Initialize Huffman decoder to recognize "0". + var h huffmanDecoder + if !h.init([]int{1}) { + t.Fatal("Failed to initialize Huffman decoder") + } + + // Initialize decompressor with invalid Huffman coding. + var f decompressor + f.r = bytes.NewReader([]byte{0xff}) + + _, err := f.huffSym(&h) + if err == nil { + t.Fatal("Should have rejected invalid bit sequence") + } +} + +func TestRegressions(t *testing.T) { + // Test fuzzer regressions + data, err := os.ReadFile("testdata/regression.zip") + if err != nil { + t.Fatal(err) + } + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + for _, tt := range zr.File { + data, err := tt.Open() + if err != nil { + t.Fatal(err) + } + data1, err := io.ReadAll(data) + if err != nil { + t.Fatal(err) + } + t.Run(tt.Name, func(t *testing.T) { + if testing.Short() && len(data1) > 10000 { + t.SkipNow() + } + for level := 0; level <= 9; level++ { + t.Run(fmt.Sprint(tt.Name+"-level", 1), func(t *testing.T) { + buf := new(bytes.Buffer) + fw, err := NewWriter(buf, level) + if err != nil { + t.Error(err) + } + n, err := fw.Write(data1) + if n != len(data1) { + t.Error("short write") + } + if err != nil { + t.Error(err) + } + err = fw.Close() + if err != nil { + t.Error(err) + } + fr1 := NewReader(buf) + data2, err := io.ReadAll(fr1) + if err != nil { + t.Error(err) + } + if !bytes.Equal(data1, data2) { + t.Error("not equal") + } + // Do it again... + buf.Reset() + fw.Reset(buf) + n, err = fw.Write(data1) + if n != len(data1) { + t.Error("short write") + } + if err != nil { + t.Error(err) + } + err = fw.Close() + if err != nil { + t.Error(err) + } + fr1 = flate.NewReader(buf) + data2, err = io.ReadAll(fr1) + if err != nil { + t.Error(err) + } + if !bytes.Equal(data1, data2) { + t.Error("not equal") + } + }) + } + t.Run(tt.Name+"stateless", func(t *testing.T) { + // Split into two and use history... + buf := new(bytes.Buffer) + err = StatelessDeflate(buf, data1[:len(data1)/2], false, nil) + if err != nil { + t.Error(err) + } + + // Use top half as dictionary... + dict := data1[:len(data1)/2] + err = StatelessDeflate(buf, data1[len(data1)/2:], true, dict) + if err != nil { + t.Error(err) + } + t.Log(buf.Len()) + fr1 := NewReader(buf) + data2, err := io.ReadAll(fr1) + if err != nil { + t.Error(err) + } + if !bytes.Equal(data1, data2) { + //fmt.Printf("want:%x\ngot: %x\n", data1, data2) + t.Error("not equal") + } + }) + }) + } +} + +func TestInvalidBits(t *testing.T) { + oversubscribed := []int{1, 2, 3, 4, 4, 5} + incomplete := []int{1, 2, 4, 4} + var h huffmanDecoder + if h.init(oversubscribed) { + t.Fatal("Should reject oversubscribed bit-length set") + } + if h.init(incomplete) { + t.Fatal("Should reject incomplete bit-length set") + } +} + +func TestStreams(t *testing.T) { + // To verify any of these hexstrings as valid or invalid flate streams + // according to the C zlib library, you can use the Python wrapper library: + // >>> hex_string = "010100feff11" + // >>> import zlib + // >>> zlib.decompress(hex_string.decode("hex"), -15) # Negative means raw DEFLATE + // '\x11' + + testCases := []struct { + desc string // Description of the stream + stream string // Hexstring of the input DEFLATE stream + want string // Expected result. Use "fail" to expect failure + }{{ + "degenerate HCLenTree", + "05e0010000000000100000000000000000000000000000000000000000000000" + + "00000000000000000004", + "fail", + }, { + "complete HCLenTree, empty HLitTree, empty HDistTree", + "05e0010400000000000000000000000000000000000000000000000000000000" + + "00000000000000000010", + "fail", + }, { + "empty HCLenTree", + "05e0010000000000000000000000000000000000000000000000000000000000" + + "00000000000000000010", + "fail", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree, use missing HDist symbol", + "000100feff000de0010400000000100000000000000000000000000000000000" + + "0000000000000000000000000000002c", + "fail", + }, { + "complete HCLenTree, complete HLitTree, degenerate HDistTree, use missing HDist symbol", + "000100feff000de0010000000000000000000000000000000000000000000000" + + "00000000000000000610000000004070", + "fail", + }, { + "complete HCLenTree, empty HLitTree, empty HDistTree", + "05e0010400000000100400000000000000000000000000000000000000000000" + + "0000000000000000000000000008", + "fail", + }, { + "complete HCLenTree, empty HLitTree, degenerate HDistTree", + "05e0010400000000100400000000000000000000000000000000000000000000" + + "0000000000000000000800000008", + "fail", + }, { + "complete HCLenTree, degenerate HLitTree, degenerate HDistTree, use missing HLit symbol", + "05e0010400000000100000000000000000000000000000000000000000000000" + + "0000000000000000001c", + "fail", + }, { + "complete HCLenTree, complete HLitTree, too large HDistTree", + "edff870500000000200400000000000000000000000000000000000000000000" + + "000000000000000000080000000000000004", + "fail", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree, excessive repeater code", + "edfd870500000000200400000000000000000000000000000000000000000000" + + "000000000000000000e8b100", + "fail", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree of normal length 30", + "05fd01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07000000fe01", + "", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree of excessive length 31", + "05fe01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07000000fc03", + "fail", + }, { + "complete HCLenTree, over-subscribed HLitTree, empty HDistTree", + "05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07f00f", + "fail", + }, { + "complete HCLenTree, under-subscribed HLitTree, empty HDistTree", + "05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" + + "fffffffffcffffffff07f00f", + "fail", + }, { + "complete HCLenTree, complete HLitTree with single code, empty HDistTree", + "05e001240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07f00f", + "01", + }, { + "complete HCLenTree, complete HLitTree with multiple codes, empty HDistTree", + "05e301240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07807f", + "01", + }, { + "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HDist symbol", + "000100feff000de0010400000000100000000000000000000000000000000000" + + "0000000000000000000000000000003c", + "00000000", + }, { + "complete HCLenTree, degenerate HLitTree, degenerate HDistTree", + "05e0010400000000100000000000000000000000000000000000000000000000" + + "0000000000000000000c", + "", + }, { + "complete HCLenTree, degenerate HLitTree, empty HDistTree", + "05e0010400000000100000000000000000000000000000000000000000000000" + + "00000000000000000004", + "", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree, spanning repeater code", + "edfd870500000000200400000000000000000000000000000000000000000000" + + "000000000000000000e8b000", + "", + }, { + "complete HCLenTree with length codes, complete HLitTree, empty HDistTree", + "ede0010400000000100000000000000000000000000000000000000000000000" + + "0000000000000000000400004000", + "", + }, { + "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit symbol 284 with count 31", + "000100feff00ede0010400000000100000000000000000000000000000000000" + + "000000000000000000000000000000040000407f00", + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "000000", + }, { + "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit and HDist symbols", + "0cc2010d00000082b0ac4aff0eb07d27060000ffff", + "616263616263", + }, { + "fixed block, use reserved symbol 287", + "33180700", + "fail", + }, { + "raw block", + "010100feff11", + "11", + }, { + "issue 10426 - over-subscribed HCLenTree causes a hang", + "344c4a4e494d4b070000ff2e2eff2e2e2e2e2eff", + "fail", + }, { + "issue 11030 - empty HDistTree unexpectedly leads to error", + "05c0070600000080400fff37a0ca", + "", + }, { + "issue 11033 - empty HDistTree unexpectedly leads to error", + "050fb109c020cca5d017dcbca044881ee1034ec149c8980bbc413c2ab35be9dc" + + "b1473449922449922411202306ee97b0383a521b4ffdcf3217f9f7d3adb701", + "3130303634342068652e706870005d05355f7ed957ff084a90925d19e3ebc6d0" + + "c6d7", + }} + + for i, tc := range testCases { + data, err := hex.DecodeString(tc.stream) + if err != nil { + t.Fatal(err) + } + data, err = io.ReadAll(NewReader(bytes.NewReader(data))) + if tc.want == "fail" { + if err == nil { + t.Errorf("#%d (%s): got nil error, want non-nil", i, tc.desc) + } + } else { + if err != nil { + t.Errorf("#%d (%s): %v", i, tc.desc, err) + continue + } + if got := hex.EncodeToString(data); got != tc.want { + t.Errorf("#%d (%s):\ngot %q\nwant %q", i, tc.desc, got, tc.want) + } + + } + } +} diff --git a/internal/compress/flate/fuzz_test.go b/internal/compress/flate/fuzz_test.go new file mode 100644 index 00000000..b97cd055 --- /dev/null +++ b/internal/compress/flate/fuzz_test.go @@ -0,0 +1,174 @@ +//go:build go1.18 + +package flate + +import ( + "bytes" + "flag" + "io" + "os" + "strconv" + "testing" + + "codeberg.org/lindenii/furgit/internal/compress/internal/fuzz" +) + +// Fuzzing tweaks: +var fuzzStartF = flag.Int("start", HuffmanOnly, "Start fuzzing at this level") +var fuzzEndF = flag.Int("end", BestCompression, "End fuzzing at this level (inclusive)") +var fuzzMaxF = flag.Int("max", 1<<20, "Maximum input size") +var fuzzSLF = flag.Bool("sl", true, "Include stateless encodes") +var fuzzWindow = flag.Bool("windows", true, "Include windowed encodes") + +func TestMain(m *testing.M) { + flag.Parse() + os.Exit(m.Run()) +} + +func FuzzEncoding(f *testing.F) { + fuzz.AddFromZip(f, "testdata/regression.zip", fuzz.TypeRaw, false) + fuzz.AddFromZip(f, "testdata/fuzz/encode-raw-corpus.zip", fuzz.TypeRaw, testing.Short()) + fuzz.AddFromZip(f, "testdata/fuzz/FuzzEncoding.zip", fuzz.TypeGoFuzz, testing.Short()) + + startFuzz := *fuzzStartF + endFuzz := *fuzzEndF + maxSize := *fuzzMaxF + stateless := *fuzzSLF + fuzzWindow := *fuzzWindow + + decoder := NewReader(nil) + buf := new(bytes.Buffer) + encs := make([]*Writer, endFuzz-startFuzz+1) + for i := range encs { + var err error + encs[i], err = NewWriter(nil, i+startFuzz) + if err != nil { + f.Fatal(err.Error()) + } + } + + f.Fuzz(func(t *testing.T, data []byte) { + if len(data) > maxSize { + return + } + for level := startFuzz; level <= endFuzz; level++ { + msg := "level " + strconv.Itoa(level) + ":" + buf.Reset() + fw := encs[level-startFuzz] + fw.Reset(buf) + n, err := fw.Write(data) + if n != len(data) { + t.Fatal(msg + "short write") + } + if err != nil { + t.Fatal(msg + err.Error()) + } + err = fw.Close() + if err != nil { + t.Fatal(msg + err.Error()) + } + decoder.(Resetter).Reset(buf, nil) + data2, err := io.ReadAll(decoder) + if err != nil { + t.Fatal(msg + err.Error()) + } + if !bytes.Equal(data, data2) { + t.Fatal(msg + "not equal") + } + // Do it again... (also uses copy) + msg = "level " + strconv.Itoa(level) + " (reset):" + buf.Reset() + fw.Reset(buf) + _, err = io.Copy(fw, bytes.NewReader(data)) + if err != nil { + t.Fatal(msg + err.Error()) + } + err = fw.Close() + if err != nil { + t.Fatal(msg + err.Error()) + } + decoder.(Resetter).Reset(buf, nil) + data2, err = io.ReadAll(decoder) + if err != nil { + t.Fatal(msg + err.Error()) + } + if !bytes.Equal(data, data2) { + t.Fatal(msg + "not equal") + } + } + if stateless { + // Split into two and use history... + msg := "stateless:" + buf.Reset() + err := StatelessDeflate(buf, data[:len(data)/2], false, nil) + if err != nil { + t.Error(err) + } + + // Use top half as dictionary... + dict := data[:len(data)/2] + err = StatelessDeflate(buf, data[len(data)/2:], true, dict) + if err != nil { + t.Error(err) + } + + decoder.(Resetter).Reset(buf, nil) + data2, err := io.ReadAll(decoder) + if err != nil { + t.Error(err) + } + if !bytes.Equal(data, data2) { + //fmt.Printf("want:%x\ngot: %x\n", data1, data2) + t.Error(msg + "not equal") + } + } + if fuzzWindow { + msg := "windowed:" + buf.Reset() + fw, err := NewWriterWindow(buf, 1000) + if err != nil { + t.Fatal(msg + err.Error()) + } + fw.Reset(buf) + n, err := fw.Write(data) + if n != len(data) { + t.Fatal(msg + "short write") + } + if err != nil { + t.Fatal(msg + err.Error()) + } + err = fw.Close() + if err != nil { + t.Fatal(msg + err.Error()) + } + decoder.(Resetter).Reset(buf, nil) + data2, err := io.ReadAll(decoder) + if err != nil { + t.Fatal(msg + err.Error()) + } + if !bytes.Equal(data, data2) { + t.Fatal(msg + "not equal") + } + // Do it again... + msg = msg + " (reset):" + buf.Reset() + fw.Reset(buf) + n, err = fw.Write(data) + if n != len(data) { + t.Fatal(msg + "short write") + } + if err != nil { + t.Fatal(msg + err.Error()) + } + err = fw.Close() + if err != nil { + t.Fatal(msg + err.Error()) + } + decoder.(Resetter).Reset(buf, nil) + data2, err = io.ReadAll(decoder) + if err != nil { + t.Fatal(msg + err.Error()) + } + } + }) +} diff --git a/internal/compress/flate/huffman_bit_writer.go b/internal/compress/flate/huffman_bit_writer.go new file mode 100644 index 00000000..aeab2043 --- /dev/null +++ b/internal/compress/flate/huffman_bit_writer.go @@ -0,0 +1,1174 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "fmt" + "io" + "math" + + "codeberg.org/lindenii/furgit/internal/compress/internal/le" +) + +const ( + // The largest offset code. + offsetCodeCount = 30 + + // The special code used to mark the end of a block. + endBlockMarker = 256 + + // The first length code. + lengthCodesStart = 257 + + // The number of codegen codes. + codegenCodeCount = 19 + badCode = 255 + + // maxPredefinedTokens is the maximum number of tokens + // where we check if fixed size is smaller. + maxPredefinedTokens = 250 + + // bufferFlushSize indicates the buffer size + // after which bytes are flushed to the writer. + // Should preferably be a multiple of 6, since + // we accumulate 6 bytes between writes to the buffer. + bufferFlushSize = 246 +) + +// Minimum length code that emits bits. +const lengthExtraBitsMinCode = 8 + +// The number of extra bits needed by length code X - LENGTH_CODES_START. +var lengthExtraBits = [32]uint8{ + /* 257 */ 0, 0, 0, + /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, + /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + /* 280 */ 4, 5, 5, 5, 5, 0, +} + +// The length indicated by length code X - LENGTH_CODES_START. +var lengthBase = [32]uint8{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, + 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 255, +} + +// Minimum offset code that emits bits. +const offsetExtraBitsMinCode = 4 + +// offset code word extra bits. +var offsetExtraBits = [32]int8{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + /* extended window */ + 14, 14, +} + +var offsetCombined = [32]uint32{} + +func init() { + var offsetBase = [32]uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, + } + + for i := range offsetCombined[:] { + // Don't use extended window values... + if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 { + continue + } + offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8) + } +} + +// The odd order in which the codegen code sizes are written. +var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +type huffmanBitWriter struct { + // writer is the underlying writer. + // Do not use it directly; use the write method, which ensures + // that Write errors are sticky. + writer io.Writer + + // Data waiting to be written is bytes[0:nbytes] + // and then the low nbits of bits. + bits uint64 + nbits uint8 + nbytes uint8 + lastHuffMan bool + literalEncoding *huffmanEncoder + tmpLitEncoding *huffmanEncoder + offsetEncoding *huffmanEncoder + codegenEncoding *huffmanEncoder + err error + lastHeader int + // Set between 0 (reused block can be up to 2x the size) + logNewTablePenalty uint + bytes [256 + 8]byte + literalFreq [lengthCodesStart + 32]uint16 + offsetFreq [32]uint16 + codegenFreq [codegenCodeCount]uint16 + + // codegen must have an extra space for the final symbol. + codegen [literalCount + offsetCodeCount + 1]uint8 +} + +// Huffman reuse. +// +// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections. +// +// This is controlled by several variables: +// +// If lastHeader is non-zero the Huffman table can be reused. +// This also indicates that a Huffman table has been generated that can output all +// possible symbols. +// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated +// an EOB with the previous table must be written. +// +// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. +// +// An incoming block estimates the output size of a new table using a 'fresh' by calculating the +// optimal size and adding a penalty in 'logNewTablePenalty'. +// A Huffman table is not optimal, which is why we add a penalty, and generating a new table +// is slower both for compression and decompression. + +func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { + return &huffmanBitWriter{ + writer: w, + literalEncoding: newHuffmanEncoder(literalCount), + tmpLitEncoding: newHuffmanEncoder(literalCount), + codegenEncoding: newHuffmanEncoder(codegenCodeCount), + offsetEncoding: newHuffmanEncoder(offsetCodeCount), + } +} + +func (w *huffmanBitWriter) reset(writer io.Writer) { + w.writer = writer + w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil + w.lastHeader = 0 + w.lastHuffMan = false +} + +func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { + a := t.offHist[:offsetCodeCount] + b := w.offsetEncoding.codes + b = b[:len(a)] + for i, v := range a { + if v != 0 && b[i].zero() { + return false + } + } + + a = t.extraHist[:literalCount-256] + b = w.literalEncoding.codes[256:literalCount] + b = b[:len(a)] + for i, v := range a { + if v != 0 && b[i].zero() { + return false + } + } + + a = t.litHist[:256] + b = w.literalEncoding.codes[:len(a)] + for i, v := range a { + if v != 0 && b[i].zero() { + return false + } + } + return true +} + +func (w *huffmanBitWriter) flush() { + if w.err != nil { + w.nbits = 0 + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + n := w.nbytes + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + if w.nbits > 8 { // Avoid underflow + w.nbits -= 8 + } else { + w.nbits = 0 + } + n++ + } + w.bits = 0 + if n > 0 { + w.write(w.bytes[:n]) + } + w.nbytes = 0 +} + +func (w *huffmanBitWriter) write(b []byte) { + if w.err != nil { + return + } + _, w.err = w.writer.Write(b) +} + +func (w *huffmanBitWriter) writeBits(b int32, nb uint8) { + w.bits |= uint64(b) << (w.nbits & 63) + w.nbits += nb + if w.nbits >= 48 { + w.writeOutBits() + } +} + +func (w *huffmanBitWriter) writeBytes(bytes []byte) { + if w.err != nil { + return + } + n := w.nbytes + if w.nbits&7 != 0 { + w.err = InternalError("writeBytes with unfinished bits") + return + } + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + w.nbits -= 8 + n++ + } + if n != 0 { + w.write(w.bytes[:n]) + } + w.nbytes = 0 + w.write(bytes) +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying +// the literal and offset lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegenFreq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code badCode is an end marker +// +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +// litenc, offenc The literal and offset encoder to use +func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { + for i := range w.codegenFreq { + w.codegenFreq[i] = 0 + } + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + codegen := w.codegen[:] // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + cgnl := codegen[:numLiterals] + for i := range cgnl { + cgnl[i] = litEnc.codes[i].len() + } + + cgnl = codegen[numLiterals : numLiterals+numOffsets] + for i := range cgnl { + cgnl[i] = offEnc.codes[i].len() + } + codegen[numLiterals+numOffsets] = badCode + + size := codegen[0] + count := 1 + outIndex := 0 + for inIndex := 1; size != badCode; inIndex++ { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + nextSize := codegen[inIndex] + if nextSize == size { + count++ + continue + } + // We need to generate codegen indicating "count" of size. + if size != 0 { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + count-- + for count >= 3 { + n := min(6, count) + codegen[outIndex] = 16 + outIndex++ + codegen[outIndex] = uint8(n - 3) + outIndex++ + w.codegenFreq[16]++ + count -= n + } + } else { + for count >= 11 { + n := min(138, count) + codegen[outIndex] = 18 + outIndex++ + codegen[outIndex] = uint8(n - 11) + outIndex++ + w.codegenFreq[18]++ + count -= n + } + if count >= 3 { + // count >= 3 && count <= 10 + codegen[outIndex] = 17 + outIndex++ + codegen[outIndex] = uint8(count - 3) + outIndex++ + w.codegenFreq[17]++ + count = 0 + } + } + count-- + for ; count >= 0; count-- { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + } + // Set up invariant for next time through the loop. + size = nextSize + count = 1 + } + // Marker indicating the end of the codegen. + codegen[outIndex] = badCode +} + +func (w *huffmanBitWriter) codegens() int { + numCodegens := len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return numCodegens +} + +func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { + numCodegens = len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return 3 + 5 + 5 + 4 + (3 * numCodegens) + + w.codegenEncoding.bitLength(w.codegenFreq[:]) + + int(w.codegenFreq[16])*2 + + int(w.codegenFreq[17])*3 + + int(w.codegenFreq[18])*7, numCodegens +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { + size = litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + return size +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { + header, numCodegens := w.headerSize() + size = header + + litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + + extraBits + return size, numCodegens +} + +// extraBitSize will return the number of bits that will be written +// as "extra" bits on matches. +func (w *huffmanBitWriter) extraBitSize() int { + total := 0 + for i, n := range w.literalFreq[257:literalCount] { + total += int(n) * int(lengthExtraBits[i&31]) + } + for i, n := range w.offsetFreq[:offsetCodeCount] { + total += int(n) * int(offsetExtraBits[i&31]) + } + return total +} + +// fixedSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) fixedSize(extraBits int) int { + return 3 + + fixedLiteralEncoding.bitLength(w.literalFreq[:]) + + fixedOffsetEncoding.bitLength(w.offsetFreq[:]) + + extraBits +} + +// storedSize calculates the stored size, including header. +// The function returns the size in bits and whether the block +// fits inside a single block. +func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { + if in == nil { + return 0, false + } + if len(in) <= maxStoreBlockSize { + return (len(in) + 5) * 8, true + } + return 0, false +} + +func (w *huffmanBitWriter) writeCode(c hcode) { + // The function does not get inlined if we "& 63" the shift. + w.bits |= c.code64() << (w.nbits & 63) + w.nbits += c.len() + if w.nbits >= 48 { + w.writeOutBits() + } +} + +// writeOutBits will write bits to the buffer. +func (w *huffmanBitWriter) writeOutBits() { + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + n := w.nbytes + + // We overwrite, but faster... + le.Store64(w.bytes[:], n, bits) + n += 6 + + if n >= bufferFlushSize { + if w.err != nil { + n = 0 + return + } + w.write(w.bytes[:n]) + n = 0 + } + + w.nbytes = n +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen +func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { + if w.err != nil { + return + } + var firstBits int32 = 4 + if isEof { + firstBits = 5 + } + w.writeBits(firstBits, 3) + w.writeBits(int32(numLiterals-257), 5) + w.writeBits(int32(numOffsets-1), 5) + w.writeBits(int32(numCodegens-4), 4) + + for i := range numCodegens { + value := uint(w.codegenEncoding.codes[codegenOrder[i]].len()) + w.writeBits(int32(value), 3) + } + + i := 0 + for { + var codeWord = uint32(w.codegen[i]) + i++ + if codeWord == badCode { + break + } + w.writeCode(w.codegenEncoding.codes[codeWord]) + + switch codeWord { + case 16: + w.writeBits(int32(w.codegen[i]), 2) + i++ + case 17: + w.writeBits(int32(w.codegen[i]), 3) + i++ + case 18: + w.writeBits(int32(w.codegen[i]), 7) + i++ + } + } +} + +// writeStoredHeader will write a stored header. +// If the stored block is only used for EOF, +// it is replaced with a fixed huffman block. +func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + // To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes. + if length == 0 && isEof { + w.writeFixedHeader(isEof) + // EOB: 7 bits, value: 0 + w.writeBits(0, 7) + w.flush() + return + } + + var flag int32 + if isEof { + flag = 1 + } + w.writeBits(flag, 3) + w.flush() + w.writeBits(int32(length), 16) + w.writeBits(int32(^uint16(length)), 16) +} + +func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + // Indicate that we are a fixed Huffman block + var value int32 = 2 + if isEof { + value = 3 + } + w.writeBits(value, 3) +} + +// writeBlock will write a block of tokens with the smallest encoding. +// The original input can be supplied, and if the huffman encoded data +// is larger than the original bytes, the data will be written as a +// stored block. +// If the input is nil, the tokens will always be Huffman encoded. +func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { + if w.err != nil { + return + } + + tokens.AddEOB() + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + numLiterals, numOffsets := w.indexTokens(tokens, false) + w.generate() + var extraBits int + storedSize, storable := w.storedSize(input) + if storable { + extraBits = w.extraBitSize() + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var literalEncoding = fixedLiteralEncoding + var offsetEncoding = fixedOffsetEncoding + var size = math.MaxInt32 + if tokens.n < maxPredefinedTokens { + size = w.fixedSize(extraBits) + } + + // Dynamic Huffman? + var numCodegens int + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + if dynamicSize < size { + size = dynamicSize + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Stored bytes? + if storable && storedSize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Huffman. + if literalEncoding == fixedLiteralEncoding { + w.writeFixedHeader(eof) + } else { + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + } + + // Write the tokens. + w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes) +} + +// writeBlockDynamic encodes a block using a dynamic Huffman table. +// This should be used if the symbols used have a disproportionate +// histogram distribution. +// If input is supplied and the compression savings are below 1/16th of the +// input size the block is stored. +func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + sync = sync || eof + if sync { + tokens.AddEOB() + } + + // We cannot reuse pure huffman table, and must mark as EOF. + if (w.lastHuffMan || eof) && w.lastHeader > 0 { + // We will not try to reuse. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } + + // fillReuse enables filling of empty values. + // This will make encodings always reusable without testing. + // However, this does not appear to benefit on most cases. + const fillReuse = false + + // Check if we can reuse... + if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) { + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + numLiterals, numOffsets := w.indexTokens(tokens, true) + extraBits := 0 + ssize, storable := w.storedSize(input) + + const usePrefs = true + if storable || w.lastHeader > 0 { + extraBits = w.extraBitSize() + } + + var size int + + // Check if we should reuse. + if w.lastHeader > 0 { + // Estimate size for using a new table. + // Use the previous header size as the best estimate. + newSize := w.lastHeader + tokens.EstimatedBits() + newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty + + // The estimated size is calculated as an optimal table. + // We add a penalty to make it more realistic and re-use a bit more. + reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits + + // Check if a new table is better. + if newSize < reuseSize { + // Write the EOB we owe. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + size = newSize + w.lastHeader = 0 + } else { + size = reuseSize + } + + if tokens.n < maxPredefinedTokens { + if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size { + // Check if we get a reasonable size decrease. + if storable && ssize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } + } + // Check if we get a reasonable size decrease. + if storable && ssize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + } + + // We want a new block/table + if w.lastHeader == 0 { + if fillReuse && !sync { + w.fillTokens() + numLiterals, numOffsets = maxNumLit, maxNumDist + } else { + w.literalFreq[endBlockMarker] = 1 + } + + w.generate() + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + + var numCodegens int + if fillReuse && !sync { + // Reindex for accurate size... + w.indexTokens(tokens, true) + } + size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + // Store predefined, if we don't get a reasonable improvement. + if tokens.n < maxPredefinedTokens { + if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size { + // Store bytes, if we don't get an improvement. + if storable && ssize <= preSize { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } + } + + if storable && ssize <= size { + // Store bytes, if we don't get an improvement. + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Write Huffman table. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + if !sync { + w.lastHeader, _ = w.headerSize() + } + w.lastHuffMan = false + } + + if sync { + w.lastHeader = 0 + } + // Write the tokens. + w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) +} + +func (w *huffmanBitWriter) fillTokens() { + for i, v := range w.literalFreq[:literalCount] { + if v == 0 { + w.literalFreq[i] = 1 + } + } + for i, v := range w.offsetFreq[:offsetCodeCount] { + if v == 0 { + w.offsetFreq[i] = 1 + } + } +} + +// indexTokens indexes a slice of tokens, and updates +// literalFreq and offsetFreq, and generates literalEncoding +// and offsetEncoding. +// The number of literal and offset tokens is returned. +func (w *huffmanBitWriter) indexTokens(t *tokens, alwaysEOB bool) (numLiterals, numOffsets int) { + //copy(w.literalFreq[:], t.litHist[:]) + *(*[256]uint16)(w.literalFreq[:]) = t.litHist + //copy(w.literalFreq[256:], t.extraHist[:]) + *(*[32]uint16)(w.literalFreq[256:]) = t.extraHist + w.offsetFreq = t.offHist + + if t.n == 0 { + return + } + if alwaysEOB { + w.literalFreq[endBlockMarker] = 1 + } + + // get the number of literals + numLiterals = len(w.literalFreq) + for w.literalFreq[numLiterals-1] == 0 { + numLiterals-- + } + // get the number of offsets + numOffsets = len(w.offsetFreq) + for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { + numOffsets-- + } + if numOffsets == 0 { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one offset to be sure that the offset huffman tree could be encoded. + w.offsetFreq[0] = 1 + numOffsets = 1 + } + return +} + +func (w *huffmanBitWriter) generate() { + w.literalEncoding.generate(w.literalFreq[:literalCount], 15) + w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeTokens writes a slice of tokens to the output. +// codes for literal and offset encoding must be supplied. +func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { + if w.err != nil { + return + } + if len(tokens) == 0 { + return + } + + // Only last token should be endBlockMarker. + var deferEOB bool + if tokens[len(tokens)-1] == endBlockMarker { + tokens = tokens[:len(tokens)-1] + deferEOB = true + } + + // Create slices up to the next power of two to avoid bounds checks. + lits := leCodes[:256] + offs := oeCodes[:32] + lengths := leCodes[lengthCodesStart:] + lengths = lengths[:32] + + // Go 1.16 LOVES having these on stack. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes + + for _, t := range tokens { + if t < 256 { + //w.writeCode(lits[t.literal()]) + c := lits[t] + bits |= c.code64() << (nbits & 63) + nbits += c.len() + if nbits >= 48 { + le.Store64(w.bytes[:], nbytes, bits) + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + continue + } + + // Write the length + length := t.length() + lengthCode := lengthCode(length) & 31 + if false { + w.writeCode(lengths[lengthCode]) + } else { + // inlined + c := lengths[lengthCode] + bits |= c.code64() << (nbits & 63) + nbits += c.len() + if nbits >= 48 { + le.Store64(w.bytes[:], nbytes, bits) + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + + if lengthCode >= lengthExtraBitsMinCode { + extraLengthBits := lengthExtraBits[lengthCode] + //w.writeBits(extraLength, extraLengthBits) + extraLength := int32(length - lengthBase[lengthCode]) + bits |= uint64(extraLength) << (nbits & 63) + nbits += extraLengthBits + if nbits >= 48 { + le.Store64(w.bytes[:], nbytes, bits) + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + // Write the offset + offset := t.offset() + offsetCode := (offset >> 16) & 31 + if false { + w.writeCode(offs[offsetCode]) + } else { + // inlined + c := offs[offsetCode] + bits |= c.code64() << (nbits & 63) + nbits += c.len() + if nbits >= 48 { + le.Store64(w.bytes[:], nbytes, bits) + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + + if offsetCode >= offsetExtraBitsMinCode { + offsetComb := offsetCombined[offsetCode] + //w.writeBits(extraOffset, extraOffsetBits) + bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63) + nbits += uint8(offsetComb) + if nbits >= 48 { + le.Store64(w.bytes[:], nbytes, bits) + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + + if deferEOB { + w.writeCode(leCodes[endBlockMarker]) + } +} + +// huffOffset is a static offset encoder used for huffman only encoding. +// It can be reused since we will not be encoding offset values. +var huffOffset *huffmanEncoder + +func init() { + w := newHuffmanBitWriter(nil) + w.offsetFreq[0] = 1 + huffOffset = newHuffmanEncoder(offsetCodeCount) + huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeBlockHuff encodes a block of bytes as either +// Huffman encoded literals or uncompressed bytes if the +// results only gains very little from compression. +func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + // Clear histogram + for i := range w.literalFreq[:] { + w.literalFreq[i] = 0 + } + if !w.lastHuffMan { + for i := range w.offsetFreq[:] { + w.offsetFreq[i] = 0 + } + } + + const numLiterals = endBlockMarker + 1 + const numOffsets = 1 + + // Add everything as literals + // We have to estimate the header size. + // Assume header is around 70 bytes: + // https://stackoverflow.com/a/25454430 + const guessHeaderSizeBits = 70 * 8 + histogram(input, w.literalFreq[:numLiterals]) + ssize, storable := w.storedSize(input) + if storable && len(input) > 1024 { + // Quick check for incompressible content. + abs := float64(0) + avg := float64(len(input)) / 256 + max := float64(len(input) * 2) + for _, v := range w.literalFreq[:256] { + diff := float64(v) - avg + abs += diff * diff + if abs > max { + break + } + } + if abs < max { + if debugDeflate { + fmt.Println("stored", abs, "<", max) + } + // No chance we can compress this... + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + } + w.literalFreq[endBlockMarker] = 1 + w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15) + estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals]) + if estBits < math.MaxInt32 { + estBits += w.lastHeader + if w.lastHeader == 0 { + estBits += guessHeaderSizeBits + } + estBits += estBits >> w.logNewTablePenalty + } + + // Store bytes, if we don't get a reasonable improvement. + if storable && ssize <= estBits { + if debugDeflate { + fmt.Println("stored,", ssize, "<=", estBits) + } + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + if w.lastHeader > 0 { + reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256]) + + if estBits < reuseSize { + if debugDeflate { + fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes") + } + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } else if debugDeflate { + fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8) + } + } + + count := 0 + if w.lastHeader == 0 { + // Use the temp encoding, so swap. + w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + numCodegens := w.codegens() + + // Huffman. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + w.lastHuffMan = true + w.lastHeader, _ = w.headerSize() + if debugDeflate { + count += w.lastHeader + fmt.Println("header:", count/8) + } + } + + encoding := w.literalEncoding.codes[:256] + // Go 1.16 LOVES having these on stack. At least 1.5x the speed. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes + + if debugDeflate { + count -= int(nbytes)*8 + int(nbits) + } + // Unroll, write 3 codes/loop. + // Fastest number of unrolls. + for len(input) > 3 { + // We must have at least 48 bits free. + if nbits >= 8 { + n := nbits >> 3 + le.Store64(w.bytes[:], nbytes, bits) + bits >>= (n * 8) & 63 + nbits -= n * 8 + nbytes += n + } + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + if debugDeflate { + count += int(nbytes) * 8 + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + a, b := encoding[input[0]], encoding[input[1]] + bits |= a.code64() << (nbits & 63) + bits |= b.code64() << ((nbits + a.len()) & 63) + c := encoding[input[2]] + nbits += b.len() + a.len() + bits |= c.code64() << (nbits & 63) + nbits += c.len() + input = input[3:] + } + + // Remaining... + for _, t := range input { + if nbits >= 48 { + le.Store64(w.bytes[:], nbytes, bits) + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + if debugDeflate { + count += int(nbytes) * 8 + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + // Bitwriting inlined, ~30% speedup + c := encoding[t] + bits |= c.code64() << (nbits & 63) + + nbits += c.len() + if debugDeflate { + count += int(c.len()) + } + } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + + if debugDeflate { + nb := count + int(nbytes)*8 + int(nbits) + fmt.Println("wrote", nb, "bits,", nb/8, "bytes.") + } + // Flush if needed to have space. + if w.nbits >= 48 { + w.writeOutBits() + } + + if eof || sync { + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } +} diff --git a/internal/compress/flate/huffman_bit_writer_test.go b/internal/compress/flate/huffman_bit_writer_test.go new file mode 100644 index 00000000..dfb93e32 --- /dev/null +++ b/internal/compress/flate/huffman_bit_writer_test.go @@ -0,0 +1,381 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "flag" + "fmt" + "os" + "path/filepath" + "strings" + "testing" +) + +var update = flag.Bool("update", false, "update reference files") + +// TestBlockHuff tests huffman encoding against reference files +// to detect possible regressions. +// If encoding/bit allocation changes you can regenerate these files +// by using the -update flag. +func TestBlockHuff(t *testing.T) { + // determine input files + match, err := filepath.Glob("testdata/huffman-*.in") + if err != nil { + t.Fatal(err) + } + + for _, in := range match { + out := in // for files where input and output are identical + if strings.HasSuffix(in, ".in") { + out = in[:len(in)-len(".in")] + ".golden" + } + t.Run(in, func(t *testing.T) { + testBlockHuff(t, in, out) + }) + } +} + +func testBlockHuff(t *testing.T, in, out string) { + all, err := os.ReadFile(in) + if err != nil { + t.Error(err) + return + } + var buf bytes.Buffer + bw := newHuffmanBitWriter(&buf) + bw.logNewTablePenalty = 8 + bw.writeBlockHuff(false, all, false) + bw.flush() + got := buf.Bytes() + + want, err := os.ReadFile(out) + if err != nil && !*update { + t.Error(err) + return + } + + t.Logf("Testing %q", in) + if !bytes.Equal(got, want) { + if *update { + if in != out { + t.Logf("Updating %q", out) + if err := os.WriteFile(out, got, 0666); err != nil { + t.Error(err) + } + return + } + // in == out: don't accidentally destroy input + t.Errorf("WARNING: -update did not rewrite input file %s", in) + } + + t.Errorf("%q != %q (see %q)", in, out, in+".got") + if err := os.WriteFile(in+".got", got, 0666); err != nil { + t.Error(err) + } + return + } + t.Log("Output ok") + + // Test if the writer produces the same output after reset. + buf.Reset() + bw.reset(&buf) + bw.writeBlockHuff(false, all, false) + bw.flush() + got = buf.Bytes() + if !bytes.Equal(got, want) { + t.Errorf("after reset %q != %q (see %q)", in, out, in+".reset.got") + if err := os.WriteFile(in+".reset.got", got, 0666); err != nil { + t.Error(err) + } + return + } + t.Log("Reset ok") + testWriterEOF(t, "huff", huffTest{input: in}, true) +} + +type huffTest struct { + tokens []token + input string // File name of input data matching the tokens. + want string // File name of data with the expected output with input available. + wantNoInput string // File name of the expected output when no input is available. +} + +const ml = 0x7fc00000 // Maximum length token. Used to reduce the size of writeBlockTests + +var writeBlockTests = []huffTest{ + { + input: "testdata/huffman-null-max.in", + want: "testdata/huffman-null-max.%s.expect", + wantNoInput: "testdata/huffman-null-max.%s.expect-noinput", + tokens: []token{0x0, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, 0x0, 0x0}, + }, + { + input: "testdata/huffman-pi.in", + want: "testdata/huffman-pi.%s.expect", + wantNoInput: "testdata/huffman-pi.%s.expect-noinput", + tokens: []token{0x33, 0x2e, 0x31, 0x34, 0x31, 0x35, 0x39, 0x32, 0x36, 0x35, 0x33, 0x35, 0x38, 0x39, 0x37, 0x39, 0x33, 0x32, 0x33, 0x38, 0x34, 0x36, 0x32, 0x36, 0x34, 0x33, 0x33, 0x38, 0x33, 0x32, 0x37, 0x39, 0x35, 0x30, 0x32, 0x38, 0x38, 0x34, 0x31, 0x39, 0x37, 0x31, 0x36, 0x39, 0x33, 0x39, 0x39, 0x33, 0x37, 0x35, 0x31, 0x30, 0x35, 0x38, 0x32, 0x30, 0x39, 0x37, 0x34, 0x39, 0x34, 0x34, 0x35, 0x39, 0x32, 0x33, 0x30, 0x37, 0x38, 0x31, 0x36, 0x34, 0x30, 0x36, 0x32, 0x38, 0x36, 0x32, 0x30, 0x38, 0x39, 0x39, 0x38, 0x36, 0x32, 0x38, 0x30, 0x33, 0x34, 0x38, 0x32, 0x35, 0x33, 0x34, 0x32, 0x31, 0x31, 0x37, 0x30, 0x36, 0x37, 0x39, 0x38, 0x32, 0x31, 0x34, 0x38, 0x30, 0x38, 0x36, 0x35, 0x31, 0x33, 0x32, 0x38, 0x32, 0x33, 0x30, 0x36, 0x36, 0x34, 0x37, 0x30, 0x39, 0x33, 0x38, 0x34, 0x34, 0x36, 0x30, 0x39, 0x35, 0x35, 0x30, 0x35, 0x38, 0x32, 0x32, 0x33, 0x31, 0x37, 0x32, 0x35, 0x33, 0x35, 0x39, 0x34, 0x30, 0x38, 0x31, 0x32, 0x38, 0x34, 0x38, 0x31, 0x31, 0x31, 0x37, 0x34, 0x4040007e, 0x34, 0x31, 0x30, 0x32, 0x37, 0x30, 0x31, 0x39, 0x33, 0x38, 0x35, 0x32, 0x31, 0x31, 0x30, 0x35, 0x35, 0x35, 0x39, 0x36, 0x34, 0x34, 0x36, 0x32, 0x32, 0x39, 0x34, 0x38, 0x39, 0x35, 0x34, 0x39, 0x33, 0x30, 0x33, 0x38, 0x31, 0x40400012, 0x32, 0x38, 0x38, 0x31, 0x30, 0x39, 0x37, 0x35, 0x36, 0x36, 0x35, 0x39, 0x33, 0x33, 0x34, 0x34, 0x36, 0x40400047, 0x37, 0x35, 0x36, 0x34, 0x38, 0x32, 0x33, 0x33, 0x37, 0x38, 0x36, 0x37, 0x38, 0x33, 0x31, 0x36, 0x35, 0x32, 0x37, 0x31, 0x32, 0x30, 0x31, 0x39, 0x30, 0x39, 0x31, 0x34, 0x4040001a, 0x35, 0x36, 0x36, 0x39, 0x32, 0x33, 0x34, 0x36, 0x404000b2, 0x36, 0x31, 0x30, 0x34, 0x35, 0x34, 0x33, 0x32, 0x36, 0x40400032, 0x31, 0x33, 0x33, 0x39, 0x33, 0x36, 0x30, 0x37, 0x32, 0x36, 0x30, 0x32, 0x34, 0x39, 0x31, 0x34, 0x31, 0x32, 0x37, 0x33, 0x37, 0x32, 0x34, 0x35, 0x38, 0x37, 0x30, 0x30, 0x36, 0x36, 0x30, 0x36, 0x33, 0x31, 0x35, 0x35, 0x38, 0x38, 0x31, 0x37, 0x34, 0x38, 0x38, 0x31, 0x35, 0x32, 0x30, 0x39, 0x32, 0x30, 0x39, 0x36, 0x32, 0x38, 0x32, 0x39, 0x32, 0x35, 0x34, 0x30, 0x39, 0x31, 0x37, 0x31, 0x35, 0x33, 0x36, 0x34, 0x33, 0x36, 0x37, 0x38, 0x39, 0x32, 0x35, 0x39, 0x30, 0x33, 0x36, 0x30, 0x30, 0x31, 0x31, 0x33, 0x33, 0x30, 0x35, 0x33, 0x30, 0x35, 0x34, 0x38, 0x38, 0x32, 0x30, 0x34, 0x36, 0x36, 0x35, 0x32, 0x31, 0x33, 0x38, 0x34, 0x31, 0x34, 0x36, 0x39, 0x35, 0x31, 0x39, 0x34, 0x31, 0x35, 0x31, 0x31, 0x36, 0x30, 0x39, 0x34, 0x33, 0x33, 0x30, 0x35, 0x37, 0x32, 0x37, 0x30, 0x33, 0x36, 0x35, 0x37, 0x35, 0x39, 0x35, 0x39, 0x31, 0x39, 0x35, 0x33, 0x30, 0x39, 0x32, 0x31, 0x38, 0x36, 0x31, 0x31, 0x37, 0x404000e9, 0x33, 0x32, 0x40400009, 0x39, 0x33, 0x31, 0x30, 0x35, 0x31, 0x31, 0x38, 0x35, 0x34, 0x38, 0x30, 0x37, 0x4040010e, 0x33, 0x37, 0x39, 0x39, 0x36, 0x32, 0x37, 0x34, 0x39, 0x35, 0x36, 0x37, 0x33, 0x35, 0x31, 0x38, 0x38, 0x35, 0x37, 0x35, 0x32, 0x37, 0x32, 0x34, 0x38, 0x39, 0x31, 0x32, 0x32, 0x37, 0x39, 0x33, 0x38, 0x31, 0x38, 0x33, 0x30, 0x31, 0x31, 0x39, 0x34, 0x39, 0x31, 0x32, 0x39, 0x38, 0x33, 0x33, 0x36, 0x37, 0x33, 0x33, 0x36, 0x32, 0x34, 0x34, 0x30, 0x36, 0x35, 0x36, 0x36, 0x34, 0x33, 0x30, 0x38, 0x36, 0x30, 0x32, 0x31, 0x33, 0x39, 0x34, 0x39, 0x34, 0x36, 0x33, 0x39, 0x35, 0x32, 0x32, 0x34, 0x37, 0x33, 0x37, 0x31, 0x39, 0x30, 0x37, 0x30, 0x32, 0x31, 0x37, 0x39, 0x38, 0x40800099, 0x37, 0x30, 0x32, 0x37, 0x37, 0x30, 0x35, 0x33, 0x39, 0x32, 0x31, 0x37, 0x31, 0x37, 0x36, 0x32, 0x39, 0x33, 0x31, 0x37, 0x36, 0x37, 0x35, 0x40800232, 0x37, 0x34, 0x38, 0x31, 0x40400006, 0x36, 0x36, 0x39, 0x34, 0x30, 0x404001e7, 0x30, 0x30, 0x30, 0x35, 0x36, 0x38, 0x31, 0x32, 0x37, 0x31, 0x34, 0x35, 0x32, 0x36, 0x33, 0x35, 0x36, 0x30, 0x38, 0x32, 0x37, 0x37, 0x38, 0x35, 0x37, 0x37, 0x31, 0x33, 0x34, 0x32, 0x37, 0x35, 0x37, 0x37, 0x38, 0x39, 0x36, 0x40400129, 0x33, 0x36, 0x33, 0x37, 0x31, 0x37, 0x38, 0x37, 0x32, 0x31, 0x34, 0x36, 0x38, 0x34, 0x34, 0x30, 0x39, 0x30, 0x31, 0x32, 0x32, 0x34, 0x39, 0x35, 0x33, 0x34, 0x33, 0x30, 0x31, 0x34, 0x36, 0x35, 0x34, 0x39, 0x35, 0x38, 0x35, 0x33, 0x37, 0x31, 0x30, 0x35, 0x30, 0x37, 0x39, 0x404000ca, 0x36, 0x40400153, 0x38, 0x39, 0x32, 0x33, 0x35, 0x34, 0x404001c9, 0x39, 0x35, 0x36, 0x31, 0x31, 0x32, 0x31, 0x32, 0x39, 0x30, 0x32, 0x31, 0x39, 0x36, 0x30, 0x38, 0x36, 0x34, 0x30, 0x33, 0x34, 0x34, 0x31, 0x38, 0x31, 0x35, 0x39, 0x38, 0x31, 0x33, 0x36, 0x32, 0x39, 0x37, 0x37, 0x34, 0x40400074, 0x30, 0x39, 0x39, 0x36, 0x30, 0x35, 0x31, 0x38, 0x37, 0x30, 0x37, 0x32, 0x31, 0x31, 0x33, 0x34, 0x39, 0x40800000, 0x38, 0x33, 0x37, 0x32, 0x39, 0x37, 0x38, 0x30, 0x34, 0x39, 0x39, 0x404002da, 0x39, 0x37, 0x33, 0x31, 0x37, 0x33, 0x32, 0x38, 0x4040018a, 0x36, 0x33, 0x31, 0x38, 0x35, 0x40400301, 0x404002e8, 0x34, 0x35, 0x35, 0x33, 0x34, 0x36, 0x39, 0x30, 0x38, 0x33, 0x30, 0x32, 0x36, 0x34, 0x32, 0x35, 0x32, 0x32, 0x33, 0x30, 0x404002e3, 0x40400267, 0x38, 0x35, 0x30, 0x33, 0x35, 0x32, 0x36, 0x31, 0x39, 0x33, 0x31, 0x31, 0x40400212, 0x31, 0x30, 0x31, 0x30, 0x30, 0x30, 0x33, 0x31, 0x33, 0x37, 0x38, 0x33, 0x38, 0x37, 0x35, 0x32, 0x38, 0x38, 0x36, 0x35, 0x38, 0x37, 0x35, 0x33, 0x33, 0x32, 0x30, 0x38, 0x33, 0x38, 0x31, 0x34, 0x32, 0x30, 0x36, 0x40400140, 0x4040012b, 0x31, 0x34, 0x37, 0x33, 0x30, 0x33, 0x35, 0x39, 0x4080032e, 0x39, 0x30, 0x34, 0x32, 0x38, 0x37, 0x35, 0x35, 0x34, 0x36, 0x38, 0x37, 0x33, 0x31, 0x31, 0x35, 0x39, 0x35, 0x40400355, 0x33, 0x38, 0x38, 0x32, 0x33, 0x35, 0x33, 0x37, 0x38, 0x37, 0x35, 0x4080037f, 0x39, 0x4040013a, 0x31, 0x40400148, 0x38, 0x30, 0x35, 0x33, 0x4040018a, 0x32, 0x32, 0x36, 0x38, 0x30, 0x36, 0x36, 0x31, 0x33, 0x30, 0x30, 0x31, 0x39, 0x32, 0x37, 0x38, 0x37, 0x36, 0x36, 0x31, 0x31, 0x31, 0x39, 0x35, 0x39, 0x40400237, 0x36, 0x40800124, 0x38, 0x39, 0x33, 0x38, 0x30, 0x39, 0x35, 0x32, 0x35, 0x37, 0x32, 0x30, 0x31, 0x30, 0x36, 0x35, 0x34, 0x38, 0x35, 0x38, 0x36, 0x33, 0x32, 0x37, 0x4040009a, 0x39, 0x33, 0x36, 0x31, 0x35, 0x33, 0x40400220, 0x4080015c, 0x32, 0x33, 0x30, 0x33, 0x30, 0x31, 0x39, 0x35, 0x32, 0x30, 0x33, 0x35, 0x33, 0x30, 0x31, 0x38, 0x35, 0x32, 0x40400171, 0x40400075, 0x33, 0x36, 0x32, 0x32, 0x35, 0x39, 0x39, 0x34, 0x31, 0x33, 0x40400254, 0x34, 0x39, 0x37, 0x32, 0x31, 0x37, 0x404000de, 0x33, 0x34, 0x37, 0x39, 0x31, 0x33, 0x31, 0x35, 0x31, 0x35, 0x35, 0x37, 0x34, 0x38, 0x35, 0x37, 0x32, 0x34, 0x32, 0x34, 0x35, 0x34, 0x31, 0x35, 0x30, 0x36, 0x39, 0x4040013f, 0x38, 0x32, 0x39, 0x35, 0x33, 0x33, 0x31, 0x31, 0x36, 0x38, 0x36, 0x31, 0x37, 0x32, 0x37, 0x38, 0x40400337, 0x39, 0x30, 0x37, 0x35, 0x30, 0x39, 0x4040010d, 0x37, 0x35, 0x34, 0x36, 0x33, 0x37, 0x34, 0x36, 0x34, 0x39, 0x33, 0x39, 0x33, 0x31, 0x39, 0x32, 0x35, 0x35, 0x30, 0x36, 0x30, 0x34, 0x30, 0x30, 0x39, 0x4040026b, 0x31, 0x36, 0x37, 0x31, 0x31, 0x33, 0x39, 0x30, 0x30, 0x39, 0x38, 0x40400335, 0x34, 0x30, 0x31, 0x32, 0x38, 0x35, 0x38, 0x33, 0x36, 0x31, 0x36, 0x30, 0x33, 0x35, 0x36, 0x33, 0x37, 0x30, 0x37, 0x36, 0x36, 0x30, 0x31, 0x30, 0x34, 0x40400172, 0x38, 0x31, 0x39, 0x34, 0x32, 0x39, 0x4080041e, 0x404000ef, 0x4040028b, 0x37, 0x38, 0x33, 0x37, 0x34, 0x404004a8, 0x38, 0x32, 0x35, 0x35, 0x33, 0x37, 0x40800209, 0x32, 0x36, 0x38, 0x4040002e, 0x34, 0x30, 0x34, 0x37, 0x404001d1, 0x34, 0x404004b5, 0x4040038d, 0x38, 0x34, 0x404003a8, 0x36, 0x40c0031f, 0x33, 0x33, 0x31, 0x33, 0x36, 0x37, 0x37, 0x30, 0x32, 0x38, 0x39, 0x38, 0x39, 0x31, 0x35, 0x32, 0x40400062, 0x35, 0x32, 0x31, 0x36, 0x32, 0x30, 0x35, 0x36, 0x39, 0x36, 0x40400411, 0x30, 0x35, 0x38, 0x40400477, 0x35, 0x40400498, 0x35, 0x31, 0x31, 0x40400209, 0x38, 0x32, 0x34, 0x33, 0x30, 0x30, 0x33, 0x35, 0x35, 0x38, 0x37, 0x36, 0x34, 0x30, 0x32, 0x34, 0x37, 0x34, 0x39, 0x36, 0x34, 0x37, 0x33, 0x32, 0x36, 0x33, 0x4040043e, 0x39, 0x39, 0x32, 0x4040044b, 0x34, 0x32, 0x36, 0x39, 0x40c002c5, 0x37, 0x404001d6, 0x34, 0x4040053d, 0x4040041d, 0x39, 0x33, 0x34, 0x31, 0x37, 0x404001ad, 0x31, 0x32, 0x4040002a, 0x34, 0x4040019e, 0x31, 0x35, 0x30, 0x33, 0x30, 0x32, 0x38, 0x36, 0x31, 0x38, 0x32, 0x39, 0x37, 0x34, 0x35, 0x35, 0x35, 0x37, 0x30, 0x36, 0x37, 0x34, 0x40400135, 0x35, 0x30, 0x35, 0x34, 0x39, 0x34, 0x35, 0x38, 0x404001c5, 0x39, 0x40400051, 0x35, 0x36, 0x404001ec, 0x37, 0x32, 0x31, 0x30, 0x37, 0x39, 0x40400159, 0x33, 0x30, 0x4040010a, 0x33, 0x32, 0x31, 0x31, 0x36, 0x35, 0x33, 0x34, 0x34, 0x39, 0x38, 0x37, 0x32, 0x30, 0x32, 0x37, 0x4040011b, 0x30, 0x32, 0x33, 0x36, 0x34, 0x4040022e, 0x35, 0x34, 0x39, 0x39, 0x31, 0x31, 0x39, 0x38, 0x40400418, 0x34, 0x4040011b, 0x35, 0x33, 0x35, 0x36, 0x36, 0x33, 0x36, 0x39, 0x40400450, 0x32, 0x36, 0x35, 0x404002e4, 0x37, 0x38, 0x36, 0x32, 0x35, 0x35, 0x31, 0x404003da, 0x31, 0x37, 0x35, 0x37, 0x34, 0x36, 0x37, 0x32, 0x38, 0x39, 0x30, 0x39, 0x37, 0x37, 0x37, 0x37, 0x40800453, 0x30, 0x30, 0x30, 0x404005fd, 0x37, 0x30, 0x404004df, 0x36, 0x404003e9, 0x34, 0x39, 0x31, 0x4040041e, 0x40400297, 0x32, 0x31, 0x34, 0x37, 0x37, 0x32, 0x33, 0x35, 0x30, 0x31, 0x34, 0x31, 0x34, 0x40400643, 0x33, 0x35, 0x36, 0x404004af, 0x31, 0x36, 0x31, 0x33, 0x36, 0x31, 0x31, 0x35, 0x37, 0x33, 0x35, 0x32, 0x35, 0x40400504, 0x33, 0x34, 0x4040005b, 0x31, 0x38, 0x4040047b, 0x38, 0x34, 0x404005e7, 0x33, 0x33, 0x32, 0x33, 0x39, 0x30, 0x37, 0x33, 0x39, 0x34, 0x31, 0x34, 0x33, 0x33, 0x33, 0x34, 0x35, 0x34, 0x37, 0x37, 0x36, 0x32, 0x34, 0x40400242, 0x32, 0x35, 0x31, 0x38, 0x39, 0x38, 0x33, 0x35, 0x36, 0x39, 0x34, 0x38, 0x35, 0x35, 0x36, 0x32, 0x30, 0x39, 0x39, 0x32, 0x31, 0x39, 0x32, 0x32, 0x32, 0x31, 0x38, 0x34, 0x32, 0x37, 0x4040023e, 0x32, 0x404000ba, 0x36, 0x38, 0x38, 0x37, 0x36, 0x37, 0x31, 0x37, 0x39, 0x30, 0x40400055, 0x30, 0x40800106, 0x36, 0x36, 0x404003e7, 0x38, 0x38, 0x36, 0x32, 0x37, 0x32, 0x404006dc, 0x31, 0x37, 0x38, 0x36, 0x30, 0x38, 0x35, 0x37, 0x40400073, 0x33, 0x408002fc, 0x37, 0x39, 0x37, 0x36, 0x36, 0x38, 0x31, 0x404002bd, 0x30, 0x30, 0x39, 0x35, 0x33, 0x38, 0x38, 0x40400638, 0x33, 0x404006a5, 0x30, 0x36, 0x38, 0x30, 0x30, 0x36, 0x34, 0x32, 0x32, 0x35, 0x31, 0x32, 0x35, 0x32, 0x4040057b, 0x37, 0x33, 0x39, 0x32, 0x40400297, 0x40400474, 0x34, 0x408006b3, 0x38, 0x36, 0x32, 0x36, 0x39, 0x34, 0x35, 0x404001e5, 0x34, 0x31, 0x39, 0x36, 0x35, 0x32, 0x38, 0x35, 0x30, 0x40400099, 0x4040039c, 0x31, 0x38, 0x36, 0x33, 0x404001be, 0x34, 0x40800154, 0x32, 0x30, 0x33, 0x39, 0x4040058b, 0x34, 0x35, 0x404002bc, 0x32, 0x33, 0x37, 0x4040042c, 0x36, 0x40400510, 0x35, 0x36, 0x40400638, 0x37, 0x31, 0x39, 0x31, 0x37, 0x32, 0x38, 0x40400171, 0x37, 0x36, 0x34, 0x36, 0x35, 0x37, 0x35, 0x37, 0x33, 0x39, 0x40400101, 0x33, 0x38, 0x39, 0x40400748, 0x38, 0x33, 0x32, 0x36, 0x34, 0x35, 0x39, 0x39, 0x35, 0x38, 0x404006a7, 0x30, 0x34, 0x37, 0x38, 0x404001de, 0x40400328, 0x39, 0x4040002d, 0x36, 0x34, 0x30, 0x37, 0x38, 0x39, 0x35, 0x31, 0x4040008e, 0x36, 0x38, 0x33, 0x4040012f, 0x32, 0x35, 0x39, 0x35, 0x37, 0x30, 0x40400468, 0x38, 0x32, 0x32, 0x404002c8, 0x32, 0x4040061b, 0x34, 0x30, 0x37, 0x37, 0x32, 0x36, 0x37, 0x31, 0x39, 0x34, 0x37, 0x38, 0x40400319, 0x38, 0x32, 0x36, 0x30, 0x31, 0x34, 0x37, 0x36, 0x39, 0x39, 0x30, 0x39, 0x404004e8, 0x30, 0x31, 0x33, 0x36, 0x33, 0x39, 0x34, 0x34, 0x33, 0x4040027f, 0x33, 0x30, 0x40400105, 0x32, 0x30, 0x33, 0x34, 0x39, 0x36, 0x32, 0x35, 0x32, 0x34, 0x35, 0x31, 0x37, 0x404003b5, 0x39, 0x36, 0x35, 0x31, 0x34, 0x33, 0x31, 0x34, 0x32, 0x39, 0x38, 0x30, 0x39, 0x31, 0x39, 0x30, 0x36, 0x35, 0x39, 0x32, 0x40400282, 0x37, 0x32, 0x32, 0x31, 0x36, 0x39, 0x36, 0x34, 0x36, 0x40400419, 0x4040007a, 0x35, 0x4040050e, 0x34, 0x40800565, 0x38, 0x40400559, 0x39, 0x37, 0x4040057b, 0x35, 0x34, 0x4040049d, 0x4040023e, 0x37, 0x4040065a, 0x38, 0x34, 0x36, 0x38, 0x31, 0x33, 0x4040008c, 0x36, 0x38, 0x33, 0x38, 0x36, 0x38, 0x39, 0x34, 0x32, 0x37, 0x37, 0x34, 0x31, 0x35, 0x35, 0x39, 0x39, 0x31, 0x38, 0x35, 0x4040005a, 0x32, 0x34, 0x35, 0x39, 0x35, 0x33, 0x39, 0x35, 0x39, 0x34, 0x33, 0x31, 0x404005b7, 0x37, 0x40400012, 0x36, 0x38, 0x30, 0x38, 0x34, 0x35, 0x404002e7, 0x37, 0x33, 0x4040081e, 0x39, 0x35, 0x38, 0x34, 0x38, 0x36, 0x35, 0x33, 0x38, 0x404006e8, 0x36, 0x32, 0x404000f2, 0x36, 0x30, 0x39, 0x404004b6, 0x36, 0x30, 0x38, 0x30, 0x35, 0x31, 0x32, 0x34, 0x33, 0x38, 0x38, 0x34, 0x4040013a, 0x4040000b, 0x34, 0x31, 0x33, 0x4040030f, 0x37, 0x36, 0x32, 0x37, 0x38, 0x40400341, 0x37, 0x31, 0x35, 0x4040059b, 0x33, 0x35, 0x39, 0x39, 0x37, 0x37, 0x30, 0x30, 0x31, 0x32, 0x39, 0x40400472, 0x38, 0x39, 0x34, 0x34, 0x31, 0x40400277, 0x36, 0x38, 0x35, 0x35, 0x4040005f, 0x34, 0x30, 0x36, 0x33, 0x404008e6, 0x32, 0x30, 0x37, 0x32, 0x32, 0x40400158, 0x40800203, 0x34, 0x38, 0x31, 0x35, 0x38, 0x40400205, 0x404001fe, 0x4040027a, 0x40400298, 0x33, 0x39, 0x34, 0x35, 0x32, 0x32, 0x36, 0x37, 0x40c00496, 0x38, 0x4040058a, 0x32, 0x31, 0x404002ea, 0x32, 0x40400387, 0x35, 0x34, 0x36, 0x36, 0x36, 0x4040051b, 0x32, 0x33, 0x39, 0x38, 0x36, 0x34, 0x35, 0x36, 0x404004c4, 0x31, 0x36, 0x33, 0x35, 0x40800253, 0x40400811, 0x37, 0x404008ad, 0x39, 0x38, 0x4040045e, 0x39, 0x33, 0x36, 0x33, 0x34, 0x4040075b, 0x37, 0x34, 0x33, 0x32, 0x34, 0x4040047b, 0x31, 0x35, 0x30, 0x37, 0x36, 0x404004bb, 0x37, 0x39, 0x34, 0x35, 0x31, 0x30, 0x39, 0x4040003e, 0x30, 0x39, 0x34, 0x30, 0x404006a6, 0x38, 0x38, 0x37, 0x39, 0x37, 0x31, 0x30, 0x38, 0x39, 0x33, 0x404008f0, 0x36, 0x39, 0x31, 0x33, 0x36, 0x38, 0x36, 0x37, 0x32, 0x4040025b, 0x404001fe, 0x35, 0x4040053f, 0x40400468, 0x40400801, 0x31, 0x37, 0x39, 0x32, 0x38, 0x36, 0x38, 0x404008cc, 0x38, 0x37, 0x34, 0x37, 0x4080079e, 0x38, 0x32, 0x34, 0x4040097a, 0x38, 0x4040025b, 0x37, 0x31, 0x34, 0x39, 0x30, 0x39, 0x36, 0x37, 0x35, 0x39, 0x38, 0x404006ef, 0x33, 0x36, 0x35, 0x40400134, 0x38, 0x31, 0x4040005c, 0x40400745, 0x40400936, 0x36, 0x38, 0x32, 0x39, 0x4040057e, 0x38, 0x37, 0x32, 0x32, 0x36, 0x35, 0x38, 0x38, 0x30, 0x40400611, 0x35, 0x40400249, 0x34, 0x32, 0x37, 0x30, 0x34, 0x37, 0x37, 0x35, 0x35, 0x4040081e, 0x33, 0x37, 0x39, 0x36, 0x34, 0x31, 0x34, 0x35, 0x31, 0x35, 0x32, 0x404005fd, 0x32, 0x33, 0x34, 0x33, 0x36, 0x34, 0x35, 0x34, 0x404005de, 0x34, 0x34, 0x34, 0x37, 0x39, 0x35, 0x4040003c, 0x40400523, 0x408008e6, 0x34, 0x31, 0x4040052a, 0x33, 0x40400304, 0x35, 0x32, 0x33, 0x31, 0x40800841, 0x31, 0x36, 0x36, 0x31, 0x404008b2, 0x35, 0x39, 0x36, 0x39, 0x35, 0x33, 0x36, 0x32, 0x33, 0x31, 0x34, 0x404005ff, 0x32, 0x34, 0x38, 0x34, 0x39, 0x33, 0x37, 0x31, 0x38, 0x37, 0x31, 0x31, 0x30, 0x31, 0x34, 0x35, 0x37, 0x36, 0x35, 0x34, 0x40400761, 0x30, 0x32, 0x37, 0x39, 0x39, 0x33, 0x34, 0x34, 0x30, 0x33, 0x37, 0x34, 0x32, 0x30, 0x30, 0x37, 0x4040093f, 0x37, 0x38, 0x35, 0x33, 0x39, 0x30, 0x36, 0x32, 0x31, 0x39, 0x40800299, 0x40400345, 0x38, 0x34, 0x37, 0x408003d2, 0x38, 0x33, 0x33, 0x32, 0x31, 0x34, 0x34, 0x35, 0x37, 0x31, 0x40400284, 0x40400776, 0x34, 0x33, 0x35, 0x30, 0x40400928, 0x40400468, 0x35, 0x33, 0x31, 0x39, 0x31, 0x30, 0x34, 0x38, 0x34, 0x38, 0x31, 0x30, 0x30, 0x35, 0x33, 0x37, 0x30, 0x36, 0x404008bc, 0x4080059d, 0x40800781, 0x31, 0x40400559, 0x37, 0x4040031b, 0x35, 0x404007ec, 0x4040040c, 0x36, 0x33, 0x408007dc, 0x34, 0x40400971, 0x4080034e, 0x408003f5, 0x38, 0x4080052d, 0x40800887, 0x39, 0x40400187, 0x39, 0x31, 0x404008ce, 0x38, 0x31, 0x34, 0x36, 0x37, 0x35, 0x31, 0x4040062b, 0x31, 0x32, 0x33, 0x39, 0x40c001a9, 0x39, 0x30, 0x37, 0x31, 0x38, 0x36, 0x34, 0x39, 0x34, 0x32, 0x33, 0x31, 0x39, 0x36, 0x31, 0x35, 0x36, 0x404001ec, 0x404006bc, 0x39, 0x35, 0x40400926, 0x40400469, 0x4040011b, 0x36, 0x30, 0x33, 0x38, 0x40400a25, 0x4040016f, 0x40400384, 0x36, 0x32, 0x4040045a, 0x35, 0x4040084c, 0x36, 0x33, 0x38, 0x39, 0x33, 0x37, 0x37, 0x38, 0x37, 0x404008c5, 0x404000f8, 0x39, 0x37, 0x39, 0x32, 0x30, 0x37, 0x37, 0x33, 0x404005d7, 0x32, 0x31, 0x38, 0x32, 0x35, 0x36, 0x404007df, 0x36, 0x36, 0x404006d6, 0x34, 0x32, 0x4080067e, 0x36, 0x404006e6, 0x34, 0x34, 0x40400024, 0x35, 0x34, 0x39, 0x32, 0x30, 0x32, 0x36, 0x30, 0x35, 0x40400ab3, 0x408003e4, 0x32, 0x30, 0x31, 0x34, 0x39, 0x404004d2, 0x38, 0x35, 0x30, 0x37, 0x33, 0x40400599, 0x36, 0x36, 0x36, 0x30, 0x40400194, 0x32, 0x34, 0x33, 0x34, 0x30, 0x40400087, 0x30, 0x4040076b, 0x38, 0x36, 0x33, 0x40400956, 0x404007e4, 0x4040042b, 0x40400174, 0x35, 0x37, 0x39, 0x36, 0x32, 0x36, 0x38, 0x35, 0x36, 0x40400140, 0x35, 0x30, 0x38, 0x40400523, 0x35, 0x38, 0x37, 0x39, 0x36, 0x39, 0x39, 0x40400711, 0x35, 0x37, 0x34, 0x40400a18, 0x38, 0x34, 0x30, 0x404008b3, 0x31, 0x34, 0x35, 0x39, 0x31, 0x4040078c, 0x37, 0x30, 0x40400234, 0x30, 0x31, 0x40400be7, 0x31, 0x32, 0x40400c74, 0x30, 0x404003c3, 0x33, 0x39, 0x40400b2a, 0x40400112, 0x37, 0x31, 0x35, 0x404003b0, 0x34, 0x32, 0x30, 0x40800bf2, 0x39, 0x40400bc2, 0x30, 0x37, 0x40400341, 0x40400795, 0x40400aaf, 0x40400c62, 0x32, 0x31, 0x40400960, 0x32, 0x35, 0x31, 0x4040057b, 0x40400944, 0x39, 0x32, 0x404001b2, 0x38, 0x32, 0x36, 0x40400b66, 0x32, 0x40400278, 0x33, 0x32, 0x31, 0x35, 0x37, 0x39, 0x31, 0x39, 0x38, 0x34, 0x31, 0x34, 0x4080087b, 0x39, 0x31, 0x36, 0x34, 0x408006e8, 0x39, 0x40800b58, 0x404008db, 0x37, 0x32, 0x32, 0x40400321, 0x35, 0x404008a4, 0x40400141, 0x39, 0x31, 0x30, 0x404000bc, 0x40400c5b, 0x35, 0x32, 0x38, 0x30, 0x31, 0x37, 0x40400231, 0x37, 0x31, 0x32, 0x40400914, 0x38, 0x33, 0x32, 0x40400373, 0x31, 0x40400589, 0x30, 0x39, 0x33, 0x35, 0x33, 0x39, 0x36, 0x35, 0x37, 0x4040064b, 0x31, 0x30, 0x38, 0x33, 0x40400069, 0x35, 0x31, 0x4040077a, 0x40400d5a, 0x31, 0x34, 0x34, 0x34, 0x32, 0x31, 0x30, 0x30, 0x40400202, 0x30, 0x33, 0x4040019c, 0x31, 0x31, 0x30, 0x33, 0x40400c81, 0x40400009, 0x40400026, 0x40c00602, 0x35, 0x31, 0x36, 0x404005d9, 0x40800883, 0x4040092a, 0x35, 0x40800c42, 0x38, 0x35, 0x31, 0x37, 0x31, 0x34, 0x33, 0x37, 0x40400605, 0x4040006d, 0x31, 0x35, 0x35, 0x36, 0x35, 0x30, 0x38, 0x38, 0x404003b9, 0x39, 0x38, 0x39, 0x38, 0x35, 0x39, 0x39, 0x38, 0x32, 0x33, 0x38, 0x404001cf, 0x404009ba, 0x33, 0x4040016c, 0x4040043e, 0x404009c3, 0x38, 0x40800e05, 0x33, 0x32, 0x40400107, 0x35, 0x40400305, 0x33, 0x404001ca, 0x39, 0x4040041b, 0x39, 0x38, 0x4040087d, 0x34, 0x40400cb8, 0x37, 0x4040064b, 0x30, 0x37, 0x404000e5, 0x34, 0x38, 0x31, 0x34, 0x31, 0x40400539, 0x38, 0x35, 0x39, 0x34, 0x36, 0x31, 0x40400bc9, 0x38, 0x30}, + }, + { + input: "testdata/huffman-rand-1k.in", + want: "testdata/huffman-rand-1k.%s.expect", + wantNoInput: "testdata/huffman-rand-1k.%s.expect-noinput", + tokens: []token{0xf8, 0x8b, 0x96, 0x76, 0x48, 0xd, 0x85, 0x94, 0x25, 0x80, 0xaf, 0xc2, 0xfe, 0x8d, 0xe8, 0x20, 0xeb, 0x17, 0x86, 0xc9, 0xb7, 0xc5, 0xde, 0x6, 0xea, 0x7d, 0x18, 0x8b, 0xe7, 0x3e, 0x7, 0xda, 0xdf, 0xff, 0x6c, 0x73, 0xde, 0xcc, 0xe7, 0x6d, 0x8d, 0x4, 0x19, 0x49, 0x7f, 0x47, 0x1f, 0x48, 0x15, 0xb0, 0xe8, 0x9e, 0xf2, 0x31, 0x59, 0xde, 0x34, 0xb4, 0x5b, 0xe5, 0xe0, 0x9, 0x11, 0x30, 0xc2, 0x88, 0x5b, 0x7c, 0x5d, 0x14, 0x13, 0x6f, 0x23, 0xa9, 0xd, 0xbc, 0x2d, 0x23, 0xbe, 0xd9, 0xed, 0x75, 0x4, 0x6c, 0x99, 0xdf, 0xfd, 0x70, 0x66, 0xe6, 0xee, 0xd9, 0xb1, 0x9e, 0x6e, 0x83, 0x59, 0xd5, 0xd4, 0x80, 0x59, 0x98, 0x77, 0x89, 0x43, 0x38, 0xc9, 0xaf, 0x30, 0x32, 0x9a, 0x20, 0x1b, 0x46, 0x3d, 0x67, 0x6e, 0xd7, 0x72, 0x9e, 0x4e, 0x21, 0x4f, 0xc6, 0xe0, 0xd4, 0x7b, 0x4, 0x8d, 0xa5, 0x3, 0xf6, 0x5, 0x9b, 0x6b, 0xdc, 0x2a, 0x93, 0x77, 0x28, 0xfd, 0xb4, 0x62, 0xda, 0x20, 0xe7, 0x1f, 0xab, 0x6b, 0x51, 0x43, 0x39, 0x2f, 0xa0, 0x92, 0x1, 0x6c, 0x75, 0x3e, 0xf4, 0x35, 0xfd, 0x43, 0x2e, 0xf7, 0xa4, 0x75, 0xda, 0xea, 0x9b, 0xa, 0x64, 0xb, 0xe0, 0x23, 0x29, 0xbd, 0xf7, 0xe7, 0x83, 0x3c, 0xfb, 0xdf, 0xb3, 0xae, 0x4f, 0xa4, 0x47, 0x55, 0x99, 0xde, 0x2f, 0x96, 0x6e, 0x1c, 0x43, 0x4c, 0x87, 0xe2, 0x7c, 0xd9, 0x5f, 0x4c, 0x7c, 0xe8, 0x90, 0x3, 0xdb, 0x30, 0x95, 0xd6, 0x22, 0xc, 0x47, 0xb8, 0x4d, 0x6b, 0xbd, 0x24, 0x11, 0xab, 0x2c, 0xd7, 0xbe, 0x6e, 0x7a, 0xd6, 0x8, 0xa3, 0x98, 0xd8, 0xdd, 0x15, 0x6a, 0xfa, 0x93, 0x30, 0x1, 0x25, 0x1d, 0xa2, 0x74, 0x86, 0x4b, 0x6a, 0x95, 0xe8, 0xe1, 0x4e, 0xe, 0x76, 0xb9, 0x49, 0xa9, 0x5f, 0xa0, 0xa6, 0x63, 0x3c, 0x7e, 0x7e, 0x20, 0x13, 0x4f, 0xbb, 0x66, 0x92, 0xb8, 0x2e, 0xa4, 0xfa, 0x48, 0xcb, 0xae, 0xb9, 0x3c, 0xaf, 0xd3, 0x1f, 0xe1, 0xd5, 0x8d, 0x42, 0x6d, 0xf0, 0xfc, 0x8c, 0xc, 0x0, 0xde, 0x40, 0xab, 0x8b, 0x47, 0x97, 0x4e, 0xa8, 0xcf, 0x8e, 0xdb, 0xa6, 0x8b, 0x20, 0x9, 0x84, 0x7a, 0x66, 0xe5, 0x98, 0x29, 0x2, 0x95, 0xe6, 0x38, 0x32, 0x60, 0x3, 0xe3, 0x9a, 0x1e, 0x54, 0xe8, 0x63, 0x80, 0x48, 0x9c, 0xe7, 0x63, 0x33, 0x6e, 0xa0, 0x65, 0x83, 0xfa, 0xc6, 0xba, 0x7a, 0x43, 0x71, 0x5, 0xf5, 0x68, 0x69, 0x85, 0x9c, 0xba, 0x45, 0xcd, 0x6b, 0xb, 0x19, 0xd1, 0xbb, 0x7f, 0x70, 0x85, 0x92, 0xd1, 0xb4, 0x64, 0x82, 0xb1, 0xe4, 0x62, 0xc5, 0x3c, 0x46, 0x1f, 0x92, 0x31, 0x1c, 0x4e, 0x41, 0x77, 0xf7, 0xe7, 0x87, 0xa2, 0xf, 0x6e, 0xe8, 0x92, 0x3, 0x6b, 0xa, 0xe7, 0xa9, 0x3b, 0x11, 0xda, 0x66, 0x8a, 0x29, 0xda, 0x79, 0xe1, 0x64, 0x8d, 0xe3, 0x54, 0xd4, 0xf5, 0xef, 0x64, 0x87, 0x3b, 0xf4, 0xc2, 0xf4, 0x71, 0x13, 0xa9, 0xe9, 0xe0, 0xa2, 0x6, 0x14, 0xab, 0x5d, 0xa7, 0x96, 0x0, 0xd6, 0xc3, 0xcc, 0x57, 0xed, 0x39, 0x6a, 0x25, 0xcd, 0x76, 0xea, 0xba, 0x3a, 0xf2, 0xa1, 0x95, 0x5d, 0xe5, 0x71, 0xcf, 0x9c, 0x62, 0x9e, 0x6a, 0xfa, 0xd5, 0x31, 0xd1, 0xa8, 0x66, 0x30, 0x33, 0xaa, 0x51, 0x17, 0x13, 0x82, 0x99, 0xc8, 0x14, 0x60, 0x9f, 0x4d, 0x32, 0x6d, 0xda, 0x19, 0x26, 0x21, 0xdc, 0x7e, 0x2e, 0x25, 0x67, 0x72, 0xca, 0xf, 0x92, 0xcd, 0xf6, 0xd6, 0xcb, 0x97, 0x8a, 0x33, 0x58, 0x73, 0x70, 0x91, 0x1d, 0xbf, 0x28, 0x23, 0xa3, 0xc, 0xf1, 0x83, 0xc3, 0xc8, 0x56, 0x77, 0x68, 0xe3, 0x82, 0xba, 0xb9, 0x57, 0x56, 0x57, 0x9c, 0xc3, 0xd6, 0x14, 0x5, 0x3c, 0xb1, 0xaf, 0x93, 0xc8, 0x8a, 0x57, 0x7f, 0x53, 0xfa, 0x2f, 0xaa, 0x6e, 0x66, 0x83, 0xfa, 0x33, 0xd1, 0x21, 0xab, 0x1b, 0x71, 0xb4, 0x7c, 0xda, 0xfd, 0xfb, 0x7f, 0x20, 0xab, 0x5e, 0xd5, 0xca, 0xfd, 0xdd, 0xe0, 0xee, 0xda, 0xba, 0xa8, 0x27, 0x99, 0x97, 0x69, 0xc1, 0x3c, 0x82, 0x8c, 0xa, 0x5c, 0x2d, 0x5b, 0x88, 0x3e, 0x34, 0x35, 0x86, 0x37, 0x46, 0x79, 0xe1, 0xaa, 0x19, 0xfb, 0xaa, 0xde, 0x15, 0x9, 0xd, 0x1a, 0x57, 0xff, 0xb5, 0xf, 0xf3, 0x2b, 0x5a, 0x6a, 0x4d, 0x19, 0x77, 0x71, 0x45, 0xdf, 0x4f, 0xb3, 0xec, 0xf1, 0xeb, 0x18, 0x53, 0x3e, 0x3b, 0x47, 0x8, 0x9a, 0x73, 0xa0, 0x5c, 0x8c, 0x5f, 0xeb, 0xf, 0x3a, 0xc2, 0x43, 0x67, 0xb4, 0x66, 0x67, 0x80, 0x58, 0xe, 0xc1, 0xec, 0x40, 0xd4, 0x22, 0x94, 0xca, 0xf9, 0xe8, 0x92, 0xe4, 0x69, 0x38, 0xbe, 0x67, 0x64, 0xca, 0x50, 0xc7, 0x6, 0x67, 0x42, 0x6e, 0xa3, 0xf0, 0xb7, 0x6c, 0xf2, 0xe8, 0x5f, 0xb1, 0xaf, 0xe7, 0xdb, 0xbb, 0x77, 0xb5, 0xf8, 0xcb, 0x8, 0xc4, 0x75, 0x7e, 0xc0, 0xf9, 0x1c, 0x7f, 0x3c, 0x89, 0x2f, 0xd2, 0x58, 0x3a, 0xe2, 0xf8, 0x91, 0xb6, 0x7b, 0x24, 0x27, 0xe9, 0xae, 0x84, 0x8b, 0xde, 0x74, 0xac, 0xfd, 0xd9, 0xb7, 0x69, 0x2a, 0xec, 0x32, 0x6f, 0xf0, 0x92, 0x84, 0xf1, 0x40, 0xc, 0x8a, 0xbc, 0x39, 0x6e, 0x2e, 0x73, 0xd4, 0x6e, 0x8a, 0x74, 0x2a, 0xdc, 0x60, 0x1f, 0xa3, 0x7, 0xde, 0x75, 0x8b, 0x74, 0xc8, 0xfe, 0x63, 0x75, 0xf6, 0x3d, 0x63, 0xac, 0x33, 0x89, 0xc3, 0xf0, 0xf8, 0x2d, 0x6b, 0xb4, 0x9e, 0x74, 0x8b, 0x5c, 0x33, 0xb4, 0xca, 0xa8, 0xe4, 0x99, 0xb6, 0x90, 0xa1, 0xef, 0xf, 0xd3, 0x61, 0xb2, 0xc6, 0x1a, 0x94, 0x7c, 0x44, 0x55, 0xf4, 0x45, 0xff, 0x9e, 0xa5, 0x5a, 0xc6, 0xa0, 0xe8, 0x2a, 0xc1, 0x8d, 0x6f, 0x34, 0x11, 0xb9, 0xbe, 0x4e, 0xd9, 0x87, 0x97, 0x73, 0xcf, 0x3d, 0x23, 0xae, 0xd5, 0x1a, 0x5e, 0xae, 0x5d, 0x6a, 0x3, 0xf9, 0x22, 0xd, 0x10, 0xd9, 0x47, 0x69, 0x15, 0x3f, 0xee, 0x52, 0xa3, 0x8, 0xd2, 0x3c, 0x51, 0xf4, 0xf8, 0x9d, 0xe4, 0x98, 0x89, 0xc8, 0x67, 0x39, 0xd5, 0x5e, 0x35, 0x78, 0x27, 0xe8, 0x3c, 0x80, 0xae, 0x79, 0x71, 0xd2, 0x93, 0xf4, 0xaa, 0x51, 0x12, 0x1c, 0x4b, 0x1b, 0xe5, 0x6e, 0x15, 0x6f, 0xe4, 0xbb, 0x51, 0x9b, 0x45, 0x9f, 0xf9, 0xc4, 0x8c, 0x2a, 0xfb, 0x1a, 0xdf, 0x55, 0xd3, 0x48, 0x93, 0x27, 0x1, 0x26, 0xc2, 0x6b, 0x55, 0x6d, 0xa2, 0xfb, 0x84, 0x8b, 0xc9, 0x9e, 0x28, 0xc2, 0xef, 0x1a, 0x24, 0xec, 0x9b, 0xae, 0xbd, 0x60, 0xe9, 0x15, 0x35, 0xee, 0x42, 0xa4, 0x33, 0x5b, 0xfa, 0xf, 0xb6, 0xf7, 0x1, 0xa6, 0x2, 0x4c, 0xca, 0x90, 0x58, 0x3a, 0x96, 0x41, 0xe7, 0xcb, 0x9, 0x8c, 0xdb, 0x85, 0x4d, 0xa8, 0x89, 0xf3, 0xb5, 0x8e, 0xfd, 0x75, 0x5b, 0x4f, 0xed, 0xde, 0x3f, 0xeb, 0x38, 0xa3, 0xbe, 0xb0, 0x73, 0xfc, 0xb8, 0x54, 0xf7, 0x4c, 0x30, 0x67, 0x2e, 0x38, 0xa2, 0x54, 0x18, 0xba, 0x8, 0xbf, 0xf2, 0x39, 0xd5, 0xfe, 0xa5, 0x41, 0xc6, 0x66, 0x66, 0xba, 0x81, 0xef, 0x67, 0xe4, 0xe6, 0x3c, 0xc, 0xca, 0xa4, 0xa, 0x79, 0xb3, 0x57, 0x8b, 0x8a, 0x75, 0x98, 0x18, 0x42, 0x2f, 0x29, 0xa3, 0x82, 0xef, 0x9f, 0x86, 0x6, 0x23, 0xe1, 0x75, 0xfa, 0x8, 0xb1, 0xde, 0x17, 0x4a}, + }, + { + input: "testdata/huffman-rand-limit.in", + want: "testdata/huffman-rand-limit.%s.expect", + wantNoInput: "testdata/huffman-rand-limit.%s.expect-noinput", + tokens: []token{0x61, 0x51c00000, 0xa, 0xf8, 0x8b, 0x96, 0x76, 0x48, 0xa, 0x85, 0x94, 0x25, 0x80, 0xaf, 0xc2, 0xfe, 0x8d, 0xe8, 0x20, 0xeb, 0x17, 0x86, 0xc9, 0xb7, 0xc5, 0xde, 0x6, 0xea, 0x7d, 0x18, 0x8b, 0xe7, 0x3e, 0x7, 0xda, 0xdf, 0xff, 0x6c, 0x73, 0xde, 0xcc, 0xe7, 0x6d, 0x8d, 0x4, 0x19, 0x49, 0x7f, 0x47, 0x1f, 0x48, 0x15, 0xb0, 0xe8, 0x9e, 0xf2, 0x31, 0x59, 0xde, 0x34, 0xb4, 0x5b, 0xe5, 0xe0, 0x9, 0x11, 0x30, 0xc2, 0x88, 0x5b, 0x7c, 0x5d, 0x14, 0x13, 0x6f, 0x23, 0xa9, 0xa, 0xbc, 0x2d, 0x23, 0xbe, 0xd9, 0xed, 0x75, 0x4, 0x6c, 0x99, 0xdf, 0xfd, 0x70, 0x66, 0xe6, 0xee, 0xd9, 0xb1, 0x9e, 0x6e, 0x83, 0x59, 0xd5, 0xd4, 0x80, 0x59, 0x98, 0x77, 0x89, 0x43, 0x38, 0xc9, 0xaf, 0x30, 0x32, 0x9a, 0x20, 0x1b, 0x46, 0x3d, 0x67, 0x6e, 0xd7, 0x72, 0x9e, 0x4e, 0x21, 0x4f, 0xc6, 0xe0, 0xd4, 0x7b, 0x4, 0x8d, 0xa5, 0x3, 0xf6, 0x5, 0x9b, 0x6b, 0xdc, 0x2a, 0x93, 0x77, 0x28, 0xfd, 0xb4, 0x62, 0xda, 0x20, 0xe7, 0x1f, 0xab, 0x6b, 0x51, 0x43, 0x39, 0x2f, 0xa0, 0x92, 0x1, 0x6c, 0x75, 0x3e, 0xf4, 0x35, 0xfd, 0x43, 0x2e, 0xf7, 0xa4, 0x75, 0xda, 0xea, 0x9b, 0xa}, + }, + { + input: "testdata/huffman-shifts.in", + want: "testdata/huffman-shifts.%s.expect", + wantNoInput: "testdata/huffman-shifts.%s.expect-noinput", + tokens: []token{0x31, 0x30, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x52400001, 0xd, 0xa, 0x32, 0x33, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7f400001}, + }, + { + input: "testdata/huffman-text-shift.in", + want: "testdata/huffman-text-shift.%s.expect", + wantNoInput: "testdata/huffman-text-shift.%s.expect-noinput", + tokens: []token{0x2f, 0x2f, 0x43, 0x6f, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x32, 0x30, 0x30, 0x39, 0x54, 0x68, 0x47, 0x6f, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x2e, 0x41, 0x6c, 0x6c, 0x40800016, 0x72, 0x72, 0x76, 0x64, 0x2e, 0xd, 0xa, 0x2f, 0x2f, 0x55, 0x6f, 0x66, 0x74, 0x68, 0x69, 0x6f, 0x75, 0x72, 0x63, 0x63, 0x6f, 0x64, 0x69, 0x67, 0x6f, 0x76, 0x72, 0x6e, 0x64, 0x62, 0x79, 0x42, 0x53, 0x44, 0x2d, 0x74, 0x79, 0x6c, 0x40400020, 0x6c, 0x69, 0x63, 0x6e, 0x74, 0x68, 0x74, 0x63, 0x6e, 0x62, 0x66, 0x6f, 0x75, 0x6e, 0x64, 0x69, 0x6e, 0x74, 0x68, 0x4c, 0x49, 0x43, 0x45, 0x4e, 0x53, 0x45, 0x66, 0x69, 0x6c, 0x2e, 0xd, 0xa, 0xd, 0xa, 0x70, 0x63, 0x6b, 0x67, 0x6d, 0x69, 0x6e, 0x4040000a, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x22, 0x6f, 0x22, 0x4040000c, 0x66, 0x75, 0x6e, 0x63, 0x6d, 0x69, 0x6e, 0x28, 0x29, 0x7b, 0xd, 0xa, 0x9, 0x76, 0x72, 0x62, 0x3d, 0x6d, 0x6b, 0x28, 0x5b, 0x5d, 0x62, 0x79, 0x74, 0x2c, 0x36, 0x35, 0x35, 0x33, 0x35, 0x29, 0xd, 0xa, 0x9, 0x66, 0x2c, 0x5f, 0x3a, 0x3d, 0x6f, 0x2e, 0x43, 0x72, 0x74, 0x28, 0x22, 0x68, 0x75, 0x66, 0x66, 0x6d, 0x6e, 0x2d, 0x6e, 0x75, 0x6c, 0x6c, 0x2d, 0x6d, 0x78, 0x2e, 0x69, 0x6e, 0x22, 0x40800021, 0x2e, 0x57, 0x72, 0x69, 0x74, 0x28, 0x62, 0x29, 0xd, 0xa, 0x7d, 0xd, 0xa, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x58, 0x78, 0x79, 0x7a, 0x21, 0x22, 0x23, 0xc2, 0xa4, 0x25, 0x26, 0x2f, 0x3f, 0x22}, + }, + { + input: "testdata/huffman-text.in", + want: "testdata/huffman-text.%s.expect", + wantNoInput: "testdata/huffman-text.%s.expect-noinput", + tokens: []token{0x2f, 0x2f, 0x20, 0x43, 0x6f, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x20, 0x32, 0x30, 0x30, 0x39, 0x20, 0x54, 0x68, 0x65, 0x20, 0x47, 0x6f, 0x20, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x73, 0x2e, 0x20, 0x41, 0x6c, 0x6c, 0x20, 0x4080001e, 0x73, 0x20, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x2e, 0xd, 0xa, 0x2f, 0x2f, 0x20, 0x55, 0x73, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x74, 0x68, 0x69, 0x73, 0x20, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x20, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x69, 0x73, 0x20, 0x67, 0x6f, 0x76, 0x65, 0x72, 0x6e, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20, 0x61, 0x20, 0x42, 0x53, 0x44, 0x2d, 0x73, 0x74, 0x79, 0x6c, 0x65, 0x40800036, 0x6c, 0x69, 0x63, 0x65, 0x6e, 0x73, 0x65, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x62, 0x65, 0x20, 0x66, 0x6f, 0x75, 0x6e, 0x64, 0x20, 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x4c, 0x49, 0x43, 0x45, 0x4e, 0x53, 0x45, 0x20, 0x66, 0x69, 0x6c, 0x65, 0x2e, 0xd, 0xa, 0xd, 0xa, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x20, 0x6d, 0x61, 0x69, 0x6e, 0x4040000f, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x22, 0x6f, 0x73, 0x22, 0x4040000e, 0x66, 0x75, 0x6e, 0x63, 0x4080001b, 0x28, 0x29, 0x20, 0x7b, 0xd, 0xa, 0x9, 0x76, 0x61, 0x72, 0x20, 0x62, 0x20, 0x3d, 0x20, 0x6d, 0x61, 0x6b, 0x65, 0x28, 0x5b, 0x5d, 0x62, 0x79, 0x74, 0x65, 0x2c, 0x20, 0x36, 0x35, 0x35, 0x33, 0x35, 0x29, 0xd, 0xa, 0x9, 0x66, 0x2c, 0x20, 0x5f, 0x20, 0x3a, 0x3d, 0x20, 0x6f, 0x73, 0x2e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x28, 0x22, 0x68, 0x75, 0x66, 0x66, 0x6d, 0x61, 0x6e, 0x2d, 0x6e, 0x75, 0x6c, 0x6c, 0x2d, 0x6d, 0x61, 0x78, 0x2e, 0x69, 0x6e, 0x22, 0x4080002a, 0x2e, 0x57, 0x72, 0x69, 0x74, 0x65, 0x28, 0x62, 0x29, 0xd, 0xa, 0x7d, 0xd, 0xa}, + }, + { + input: "testdata/huffman-zero.in", + want: "testdata/huffman-zero.%s.expect", + wantNoInput: "testdata/huffman-zero.%s.expect-noinput", + tokens: []token{0x30, ml, 0x4b800000}, + }, + { + input: "", + want: "", + wantNoInput: "testdata/null-long-match.%s.expect-noinput", + tokens: []token{0x0, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, 0x41400000}, + }, +} + +// TestWriteBlock tests if the writeBlock encoding has changed. +// To update the reference files use the "-update" flag on the test. +func TestWriteBlock(t *testing.T) { + for _, test := range writeBlockTests { + testBlock(t, test, "wb") + } +} + +// TestWriteBlockDynamic tests if the writeBlockDynamic encoding has changed. +// To update the reference files use the "-update" flag on the test. +func TestWriteBlockDynamic(t *testing.T) { + for _, test := range writeBlockTests { + testBlock(t, test, "dyn") + } +} + +// TestWriteBlockDynamic tests if the writeBlockDynamic encoding has changed. +// To update the reference files use the "-update" flag on the test. +func TestWriteBlockDynamicSync(t *testing.T) { + for _, test := range writeBlockTests { + testBlock(t, test, "sync") + } +} + +// testBlock tests a block against its references, +// or regenerate the references, if "-update" flag is set. +func testBlock(t *testing.T, test huffTest, ttype string) { + if test.want != "" { + test.want = fmt.Sprintf(test.want, ttype) + } + const gotSuffix = ".got" + test.wantNoInput = fmt.Sprintf(test.wantNoInput, ttype) + tokens := indexTokens(test.tokens) + if *update { + if test.input != "" { + t.Logf("Updating %q", test.want) + input, err := os.ReadFile(test.input) + if err != nil { + t.Error(err) + return + } + + f, err := os.Create(test.want) + if err != nil { + t.Error(err) + return + } + defer f.Close() + bw := newHuffmanBitWriter(f) + writeToType(t, ttype, bw, tokens, input) + } + + t.Logf("Updating %q", test.wantNoInput) + f, err := os.Create(test.wantNoInput) + if err != nil { + t.Error(err) + return + } + defer f.Close() + bw := newHuffmanBitWriter(f) + writeToType(t, ttype, bw, tokens, nil) + return + } + + if test.input != "" { + t.Logf("Testing %q", test.want) + input, err := os.ReadFile(test.input) + if err != nil { + t.Error(err) + return + } + want, err := os.ReadFile(test.want) + if err != nil { + t.Error(err) + return + } + var buf bytes.Buffer + bw := newHuffmanBitWriter(&buf) + writeToType(t, ttype, bw, tokens, input) + + got := buf.Bytes() + if !bytes.Equal(got, want) { + t.Errorf("writeBlock did not yield expected result for file %q with input. See %q", test.want, test.want+gotSuffix) + if err := os.WriteFile(test.want+gotSuffix, got, 0666); err != nil { + t.Error(err) + } + } + t.Log("Output ok") + + // Test if the writer produces the same output after reset. + buf.Reset() + bw.reset(&buf) + writeToType(t, ttype, bw, tokens, input) + bw.flush() + got = buf.Bytes() + if !bytes.Equal(got, want) { + t.Errorf("reset: writeBlock did not yield expected result for file %q with input. See %q", test.want, test.want+".reset"+gotSuffix) + if err := os.WriteFile(test.want+".reset"+gotSuffix, got, 0666); err != nil { + t.Error(err) + } + return + } + t.Log("Reset ok") + testWriterEOF(t, "wb", test, true) + } + t.Logf("Testing %q", test.wantNoInput) + wantNI, err := os.ReadFile(test.wantNoInput) + if err != nil { + t.Error(err) + return + } + var buf bytes.Buffer + bw := newHuffmanBitWriter(&buf) + writeToType(t, ttype, bw, tokens, nil) + + got := buf.Bytes() + if !bytes.Equal(got, wantNI) { + t.Errorf("writeBlock did not yield expected result for file %q with input. See %q", test.wantNoInput, test.wantNoInput+gotSuffix) + if err := os.WriteFile(test.wantNoInput+gotSuffix, got, 0666); err != nil { + t.Error(err) + } + } else if got[0]&1 == 1 { + t.Error("got unexpected EOF") + return + } + + t.Log("Output ok") + + // Test if the writer produces the same output after reset. + buf.Reset() + bw.reset(&buf) + writeToType(t, ttype, bw, tokens, nil) + bw.flush() + got = buf.Bytes() + if !bytes.Equal(got, wantNI) { + t.Errorf("reset: writeBlock did not yield expected result for file %q without input. See %q", test.wantNoInput, test.wantNoInput+".reset"+gotSuffix) + if err := os.WriteFile(test.wantNoInput+".reset"+gotSuffix, got, 0666); err != nil { + t.Error(err) + } + return + } + t.Log("Reset ok") + testWriterEOF(t, "wb", test, false) +} + +func writeToType(t *testing.T, ttype string, bw *huffmanBitWriter, tok tokens, input []byte) { + switch ttype { + case "wb": + bw.writeBlock(&tok, false, input) + case "dyn": + bw.writeBlockDynamic(&tok, false, input, false) + case "sync": + bw.writeBlockDynamic(&tok, false, input, true) + default: + panic("unknown test type") + } + + if bw.err != nil { + t.Error(bw.err) + return + } + + bw.flush() + if bw.err != nil { + t.Error(bw.err) + return + } +} + +// testWriterEOF tests if the written block contains an EOF marker. +func testWriterEOF(t *testing.T, ttype string, test huffTest, useInput bool) { + if useInput && test.input == "" { + return + } + var input []byte + if useInput { + var err error + input, err = os.ReadFile(test.input) + if err != nil { + t.Error(err) + return + } + } + var buf bytes.Buffer + bw := newHuffmanBitWriter(&buf) + tokens := indexTokens(test.tokens) + switch ttype { + case "wb": + bw.writeBlock(&tokens, true, input) + case "dyn": + bw.writeBlockDynamic(&tokens, true, input, true) + case "huff": + bw.writeBlockHuff(true, input, true) + default: + panic("unknown test type") + } + if bw.err != nil { + t.Error(bw.err) + return + } + + bw.flush() + if bw.err != nil { + t.Error(bw.err) + return + } + b := buf.Bytes() + if len(b) == 0 { + t.Error("no output received") + return + } + if b[0]&1 != 1 { + t.Errorf("block not marked with EOF for input %q", test.input) + return + } + t.Log("EOF ok") +} diff --git a/internal/compress/flate/huffman_code.go b/internal/compress/flate/huffman_code.go new file mode 100644 index 00000000..5f901bd0 --- /dev/null +++ b/internal/compress/flate/huffman_code.go @@ -0,0 +1,417 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "math" + "math/bits" +) + +const ( + maxBitsLimit = 16 + // number of valid literals + literalCount = 286 +) + +// hcode is a huffman code with a bit code and bit length. +type hcode uint32 + +func (h hcode) len() uint8 { + return uint8(h) +} + +func (h hcode) code64() uint64 { + return uint64(h >> 8) +} + +func (h hcode) zero() bool { + return h == 0 +} + +type huffmanEncoder struct { + codes []hcode + bitCount [17]int32 + + // Allocate a reusable buffer with the longest possible frequency table. + // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. + // The largest of these is literalCount, so we allocate for that case. + freqcache [literalCount + 1]literalNode +} + +type literalNode struct { + literal uint16 + freq uint16 +} + +// A levelInfo describes the state of the constructed tree for a given depth. +type levelInfo struct { + // Our level. for better printing + level int32 + + // The frequency of the last node at this level + lastFreq int32 + + // The frequency of the next character to add to this level + nextCharFreq int32 + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + nextPairFreq int32 + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed int32 +} + +// set sets the code and length of an hcode. +func (h *hcode) set(code uint16, length uint8) { + *h = hcode(length) | (hcode(code) << 8) +} + +func newhcode(code uint16, length uint8) hcode { + return hcode(length) | (hcode(code) << 8) +} + +func reverseBits(number uint16, bitLength byte) uint16 { + return bits.Reverse16(number << ((16 - bitLength) & 15)) +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} } + +func newHuffmanEncoder(size int) *huffmanEncoder { + // Make capacity to next power of two. + c := uint(bits.Len32(uint32(size - 1))) + return &huffmanEncoder{codes: make([]hcode, size, 1<<c)} +} + +// Generates a HuffmanCode corresponding to the fixed literal table +func generateFixedLiteralEncoding() *huffmanEncoder { + h := newHuffmanEncoder(literalCount) + codes := h.codes + var ch uint16 + for ch = range uint16(literalCount) { + var bits uint16 + var size uint8 + switch { + case ch < 144: + // size 8, 000110000 .. 10111111 + bits = ch + 48 + size = 8 + case ch < 256: + // size 9, 110010000 .. 111111111 + bits = ch + 400 - 144 + size = 9 + case ch < 280: + // size 7, 0000000 .. 0010111 + bits = ch - 256 + size = 7 + default: + // size 8, 11000000 .. 11000111 + bits = ch + 192 - 280 + size = 8 + } + codes[ch] = newhcode(reverseBits(bits, size), size) + } + return h +} + +func generateFixedOffsetEncoding() *huffmanEncoder { + h := newHuffmanEncoder(30) + codes := h.codes + for ch := range codes { + codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5) + } + return h +} + +var fixedLiteralEncoding = generateFixedLiteralEncoding() +var fixedOffsetEncoding = generateFixedOffsetEncoding() + +func (h *huffmanEncoder) bitLength(freq []uint16) int { + var total int + for i, f := range freq { + if f != 0 { + total += int(f) * int(h.codes[i].len()) + } + } + return total +} + +func (h *huffmanEncoder) bitLengthRaw(b []byte) int { + var total int + for _, f := range b { + total += int(h.codes[f].len()) + } + return total +} + +// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused. +func (h *huffmanEncoder) canReuseBits(freq []uint16) int { + var total int + for i, f := range freq { + if f != 0 { + code := h.codes[i] + if code.zero() { + return math.MaxInt32 + } + total += int(f) * int(code.len()) + } + } + return total +} + +// Return the number of literals assigned to each bit size in the Huffman encoding +// +// This method is only called when list.length >= 3 +// The cases of 0, 1, and 2 literals are handled by special case code. +// +// list An array of the literals with non-zero frequencies +// +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// +// maxBits The maximum number of bits that should be used to encode any literal. +// +// Must be less than 16. +// +// return An integer array in which array[i] indicates the number of literals +// +// that should be encoded in i bits. +func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { + if maxBits >= maxBitsLimit { + panic("flate: maxBits too large") + } + n := int32(len(list)) + list = list[0 : n+1] + list[n] = maxNode() + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + if maxBits > n-1 { + maxBits = n - 1 + } + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed==0. This makes level1.nextPairFreq + // be a legitimate value that never gets chosen. + var levels [maxBitsLimit]levelInfo + // leafCounts[i] counts the number of literals at the left + // of ancestors of the rightmost node at level i. + // leafCounts[i][j] is the number of literals at the left + // of the level j ancestor. + var leafCounts [maxBitsLimit][maxBitsLimit]int32 + + // Descending to only have 1 bounds check. + l2f := int32(list[2].freq) + l1f := int32(list[1].freq) + l0f := int32(list[0].freq) + int32(list[1].freq) + + for level := int32(1); level <= maxBits; level++ { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + levels[level] = levelInfo{ + level: level, + lastFreq: l1f, + nextCharFreq: l2f, + nextPairFreq: l0f, + } + leafCounts[level][level] = 2 + if level == 1 { + levels[level].nextPairFreq = math.MaxInt32 + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + levels[maxBits].needed = 2*n - 4 + + level := uint32(maxBits) + for level < 16 { + l := &levels[level] + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To make sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.needed = 0 + levels[level+1].nextPairFreq = math.MaxInt32 + level++ + continue + } + + prevFreq := l.lastFreq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := leafCounts[level][level] + 1 + l.lastFreq = l.nextCharFreq + // Lower leafCounts are the same of the previous node. + leafCounts[level][level] = n + e := list[n] + if e.literal < math.MaxUint16 { + l.nextCharFreq = int32(e.freq) + } else { + l.nextCharFreq = math.MaxInt32 + } + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastFreq = l.nextPairFreq + // Take leaf counts from the lower level, except counts[level] remains the same. + if true { + save := leafCounts[level][level] + leafCounts[level] = leafCounts[level-1] + leafCounts[level][level] = save + } else { + copy(leafCounts[level][:level], leafCounts[level-1][:level]) + } + levels[l.level-1].needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + if l.level == maxBits { + // All done! + break + } + levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq + level++ + } else { + // If we stole from below, move down temporarily to replenish it. + for levels[level-1].needed > 0 { + level-- + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + if leafCounts[maxBits][maxBits] != n { + panic("leafCounts[maxBits][maxBits] != n") + } + + bitCount := h.bitCount[:maxBits+1] + bits := 1 + counts := &leafCounts[maxBits] + for level := maxBits; level > 0; level-- { + // chain.leafCount gives the number of literals requiring at least "bits" + // bits to encode. + bitCount[bits] = counts[level] - counts[level-1] + bits++ + } + return bitCount +} + +// Look at the leaves and assign them a bit count and an encoding as specified +// in RFC 1951 3.2.2 +func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { + code := uint16(0) + for n, bits := range bitCount { + code <<= 1 + if n == 0 || bits == 0 { + continue + } + // The literals list[len(list)-bits] .. list[len(list)-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + chunk := list[len(list)-int(bits):] + + sortByLiteral(chunk) + for _, node := range chunk { + h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n)) + code++ + } + list = list[0 : len(list)-int(bits)] + } +} + +// Update this Huffman Code object to be the minimum code for the specified frequency count. +// +// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. +// maxBits The maximum number of bits to use for any literal. +func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { + list := h.freqcache[:len(freq)+1] + codes := h.codes[:len(freq)] + // Number of non-zero literals + count := 0 + // Set list to be the set of all non-zero literals and their frequencies + for i, f := range freq { + if f != 0 { + list[count] = literalNode{uint16(i), f} + count++ + } else { + codes[i] = 0 + } + } + list[count] = literalNode{} + + list = list[:count] + if count <= 2 { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for i, node := range list { + // "list" is in order of increasing literal value. + h.codes[node.literal].set(uint16(i), 1) + } + return + } + sortByFreq(list) + + // Get the number of literals for each bit count + bitCount := h.bitCounts(list, maxBits) + // And do the assignment + h.assignEncodingAndSize(bitCount, list) +} + +// atLeastOne clamps the result between 1 and 15. +func atLeastOne(v float32) float32 { + if v < 1 { + return 1 + } + if v > 15 { + return 15 + } + return v +} + +func histogram(b []byte, h []uint16) { + if true && len(b) >= 8<<10 { + // Split for bigger inputs + histogramSplit(b, h) + } else { + h = h[:256] + for _, t := range b { + h[t]++ + } + } +} + +func histogramSplit(b []byte, h []uint16) { + // Tested, and slightly faster than 2-way. + // Writing to separate arrays and combining is also slightly slower. + h = h[:256] + for len(b)&3 != 0 { + h[b[0]]++ + b = b[1:] + } + n := len(b) / 4 + x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:] + y, z, w = y[:len(x)], z[:len(x)], w[:len(x)] + for i, t := range x { + v0 := &h[t] + v1 := &h[y[i]] + v3 := &h[w[i]] + v2 := &h[z[i]] + *v0++ + *v1++ + *v2++ + *v3++ + } +} diff --git a/internal/compress/flate/huffman_sortByFreq.go b/internal/compress/flate/huffman_sortByFreq.go new file mode 100644 index 00000000..6c05ba8c --- /dev/null +++ b/internal/compress/flate/huffman_sortByFreq.go @@ -0,0 +1,159 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByFreq(data []literalNode) { + n := len(data) + quickSortByFreq(data, 0, n, maxDepth(n)) +} + +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivotByFreq(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSortByFreq(data, a, mlo, maxDepth) + a = mhi // i.e., quickSortByFreq(data, mhi, b) + } else { + quickSortByFreq(data, mhi, b, maxDepth) + b = mlo // i.e., quickSortByFreq(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSortByFreq(data, a, b) + } +} + +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) + medianOfThreeSortByFreq(data, m, m-s, m+s) + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThreeSortByFreq(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { + } + b := a + for { + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot + } + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot + } + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSortByFreq(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// quickSortByFreq, loosely following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/internal/compress/flate/huffman_sortByLiteral.go b/internal/compress/flate/huffman_sortByLiteral.go new file mode 100644 index 00000000..93f1aea1 --- /dev/null +++ b/internal/compress/flate/huffman_sortByLiteral.go @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByLiteral(data []literalNode) { + n := len(data) + quickSort(data, 0, n, maxDepth(n)) +} + +func quickSort(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo, maxDepth) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b, maxDepth) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].literal < data[i-6].literal { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSort(data, a, b) + } +} +func heapSort(data []literalNode, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDown(data, lo, i, first) + } +} + +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data[first+child].literal < data[first+child+1].literal { + child++ + } + if data[first+root].literal > data[first+child].literal { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && data[a].literal < data[pivot].literal; a++ { + } + b := a + for { + for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot + } + for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].literal > data[pivot].literal { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot + } + for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSort(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data[j].literal < data[j-1].literal; j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returns 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThree(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].literal < data[m1].literal { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/internal/compress/flate/inflate.go b/internal/compress/flate/inflate.go new file mode 100644 index 00000000..6e90126d --- /dev/null +++ b/internal/compress/flate/inflate.go @@ -0,0 +1,865 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flate implements the DEFLATE compressed data format, described in +// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file +// formats. +package flate + +import ( + "bufio" + "compress/flate" + "fmt" + "io" + "math/bits" + "sync" +) + +const ( + maxCodeLen = 16 // max length of Huffman code + maxCodeLenMask = 15 // mask for max length of Huffman code + // The next three numbers come from the RFC section 3.2.7, with the + // additional proviso in section 3.2.5 which implies that distance codes + // 30 and 31 should never occur in compressed data. + maxNumLit = 286 + maxNumDist = 30 + numCodes = 19 // number of codes in Huffman meta-code + + debugDecode = false +) + +// Value of length - 3 and extra bits. +type lengthExtra struct { + length, extra uint8 +} + +var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}} + +var bitMask32 = [32]uint32{ + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, + 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, + 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, + 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, +} // up to 32 bits + +// Initialize the fixedHuffmanDecoder only once upon first use. +var fixedOnce sync.Once +var fixedHuffmanDecoder huffmanDecoder + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError = flate.CorruptInputError + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) Error() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +// +// Deprecated: No longer returned. +type ReadError = flate.ReadError + +// A WriteError reports an error encountered while writing output. +// +// Deprecated: No longer returned. +type WriteError = flate.WriteError + +// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to +// to switch to a new underlying Reader. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. +// +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. +// +// See the following: +// http://www.gzip.org/algorithm.txt + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + maxRead int // the maximum number of bits we can read and not overread + chunks *[huffmanNumChunks]uint16 // chunks as described above + links [][]uint16 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. +func (h *huffmanDecoder) init(lengths []int) bool { + // Sanity enables additional runtime tests during Huffman + // table construction. It's intended to be used during + // development to supplement the currently ad-hoc unit tests. + const sanity = false + + if h.chunks == nil { + h.chunks = new([huffmanNumChunks]uint16) + } + + if h.maxRead != 0 { + *h = huffmanDecoder{chunks: h.chunks, links: h.links} + } + + // Count number of codes of each length, + // compute maxRead and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range lengths { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n&maxCodeLenMask]++ + } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). + if max == 0 { + return true + } + + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + code <<= 1 + nextcode[i&maxCodeLenMask] = code + code += count[i&maxCodeLenMask] + } + + // Check that the coding is complete (i.e., that we've + // assigned all 2-to-the-max possible bit sequences). + // Exception: To be compatible with zlib, we also need to + // accept degenerate single-code codings. See also + // TestDegenerateHuffmanCoding. + if code != 1<<uint(max) && !(code == 1 && max == 1) { + if debugDecode { + fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)") + } + return false + } + + h.maxRead = min + + chunks := h.chunks[:] + for i := range chunks { + chunks[i] = 0 + } + + if max > huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + if cap(h.links) < huffmanNumChunks-link { + h.links = make([][]uint16, huffmanNumChunks-link) + } else { + h.links = h.links[:huffmanNumChunks-link] + } + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(bits.Reverse16(uint16(j))) + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1)) + if cap(h.links[off]) < numLinks { + h.links[off] = make([]uint16, numLinks) + } else { + h.links[off] = h.links[off][:numLinks] + } + } + } else { + h.links = h.links[:0] + } + + for i, n := range lengths { + if n == 0 { + continue + } + code := nextcode[n] + nextcode[n]++ + chunk := uint16(i<<huffmanValueShift | n) + reverse := int(bits.Reverse16(uint16(code))) + reverse >>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { + // We should never need to overwrite + // an existing chunk. Also, 0 is + // never a valid chunk, because the + // lower 4 "count" bits should be + // between 1 and 15. + if sanity && h.chunks[off] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[off] = chunk + } + } else { + j := reverse & (huffmanNumChunks - 1) + if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { + // Longer codes should have been + // associated with a link table above. + panic("impossible: not an indirect chunk") + } + value := h.chunks[j] >> huffmanValueShift + linktab := h.links[value] + reverse >>= huffmanChunkBits + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { + if sanity && linktab[off] != 0 { + panic("impossible: overwriting existing chunk") + } + linktab[off] = chunk + } + } + } + + if sanity { + // Above we've sanity checked that we never overwrote + // an existing entry. Here we additionally check that + // we filled the tables completely. + for i, chunk := range h.chunks { + if chunk == 0 { + // As an exception, in the degenerate + // single-code case, we allow odd + // chunks to be missing. + if code == 1 && i%2 == 1 { + continue + } + panic("impossible: missing chunk") + } + } + for _, linktab := range h.links { + for _, chunk := range linktab { + if chunk == 0 { + panic("impossible: missing chunk") + } + } + } + } + + return true +} + +// Reader is the actual read interface needed by NewReader. +// If the passed in io.Reader does not also have ReadByte, +// the NewReader will introduce its own buffering. +type Reader interface { + io.Reader + io.ByteReader +} + +type step uint8 + +const ( + copyData step = iota + 1 + nextBlock + huffmanBytesBuffer + huffmanBytesReader + huffmanBufioReader + huffmanStringsReader + huffmanGenericReader +) + +// flushMode tells decompressor when to return data +type flushMode uint8 + +const ( + syncFlush flushMode = iota // return data after sync flush block + partialFlush // return data after each block +) + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + roffset int64 + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + // Output history, buffer. + dict dictDecoder + + // Next step in the decompression, + // and decompression state. + step step + stepState int + err error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Input bits, in top of b. + b uint32 + + nb uint + final bool + + flushMode flushMode +} + +func (f *decompressor) nextBlock() { + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + if debugDecode { + fmt.Println("stored block") + } + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlockDecoder() + if debugDecode { + fmt.Println("predefinied huffman block") + } + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlockDecoder() + if debugDecode { + fmt.Println("dynamic huffman block") + } + default: + // 3 is reserved. + if debugDecode { + fmt.Println("reserved data block encountered") + } + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + if len(f.toRead) == 0 { + return n, f.err + } + return n, nil + } + if f.err != nil { + return 0, f.err + } + + f.doStep() + + if f.err != nil && len(f.toRead) == 0 { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + } + } +} + +// WriteTo implements the io.WriteTo interface for io.Copy and friends. +func (f *decompressor) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + flushed := false + for { + if len(f.toRead) > 0 { + n, err := w.Write(f.toRead) + total += int64(n) + if err != nil { + f.err = err + return total, err + } + if n != len(f.toRead) { + return total, io.ErrShortWrite + } + f.toRead = f.toRead[:0] + } + if f.err != nil && flushed { + if f.err == io.EOF { + return total, nil + } + return total, f.err + } + if f.err == nil { + f.doStep() + } + if len(f.toRead) == 0 && f.err != nil && !flushed { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + flushed = true + } + } +} + +func (f *decompressor) Close() error { + if f.err == io.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + if debugDecode { + fmt.Println("nlit > maxNumLit", nlit) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + if debugDecode { + fmt.Println("ndist > maxNumDist", ndist) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + // numCodes is 19, so nclen is always valid. + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := range nclen { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + if debugDecode { + fmt.Println("init codebits failed") + } + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + if debugDecode { + fmt.Println("i==0") + } + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits:", err) + } + return err + } + } + rep += int(f.b & uint32(1<<(nb®SizeMaskUint32)-1)) + f.b >>= nb & regSizeMaskUint32 + f.nb -= nb + if i+rep > n { + if debugDecode { + fmt.Println("i+rep > n", i, rep, n) + } + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + if debugDecode { + fmt.Println("init2 failed") + } + return CorruptInputError(f.roffset) + } + + // As an optimization, we can initialize the maxRead bits to read at a time + // for the HLIT tree to the length of the EOB marker since we know that + // every block must terminate with one. This preserves the property that + // we never read any extra bytes after the end of the DEFLATE stream. + if f.h1.maxRead < f.bits[endBlockMarker] { + f.h1.maxRead = f.bits[endBlockMarker] + } + if !f.final { + // If not the final block, the smallest block possible is + // a predefined table, BTYPE=01, with a single EOB marker. + // This will take up 3 + 7 bits. + f.h1.maxRead += 10 + } + + return nil +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + left := (f.nb) & 7 + f.nb -= left + f.b >>= left + + offBytes := f.nb >> 3 + // Unfilled values will be overwritten. + f.buf[0] = uint8(f.b) + f.buf[1] = uint8(f.b >> 8) + f.buf[2] = uint8(f.b >> 16) + f.buf[3] = uint8(f.b >> 24) + + f.roffset += int64(offBytes) + f.nb, f.b = 0, 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[offBytes:4]) + f.roffset += int64(nr) + if err != nil { + f.err = noEOF(err) + return + } + n := uint16(f.buf[0]) | uint16(f.buf[1])<<8 + nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8 + if nn != ^n { + if debugDecode { + ncomp := ^n + fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp) + } + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + if f.flushMode == syncFlush { + f.toRead = f.dict.readFlush() + } + + f.finishBlock() + return + } + + f.copyLen = int(n) + f.copyData() +} + +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. +func (f *decompressor) copyData() { + buf := f.dict.writeSlice() + if len(buf) > f.copyLen { + buf = buf[:f.copyLen] + } + + cnt, err := io.ReadFull(f.r, buf) + f.roffset += int64(cnt) + f.copyLen -= cnt + f.dict.writeMark(cnt) + if err != nil { + f.err = noEOF(err) + return + } + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = copyData + return + } + f.finishBlock() +} + +func (f *decompressor) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + + f.err = io.EOF + } else if f.flushMode == partialFlush && f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + + f.step = nextBlock +} + +func (f *decompressor) doStep() { + switch f.step { + case copyData: + f.copyData() + case nextBlock: + f.nextBlock() + case huffmanBytesBuffer: + f.huffmanBytesBuffer() + case huffmanBytesReader: + f.huffmanBytesReader() + case huffmanBufioReader: + f.huffmanBufioReader() + case huffmanStringsReader: + f.huffmanStringsReader() + case huffmanGenericReader: + f.huffmanGenericReader() + default: + panic("BUG: unexpected step state") + } +} + +// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. +func noEOF(e error) error { + if e == io.EOF { + return io.ErrUnexpectedEOF + } + return e +} + +func (f *decompressor) moreBits() error { + c, err := f.r.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << (f.nb & regSizeMaskUint32) + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(h.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + return 0, noEOF(err) + } + f.roffset++ + b |= uint32(c) << (nb & regSizeMaskUint32) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & regSizeMaskUint32) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func makeReader(r io.Reader) Reader { + if rr, ok := r.(Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +func fixedHuffmanDecoderInit() { + fixedOnce.Do(func() { + // These come from the RFC section 3.2.6. + var bits [288]int + for i := range 144 { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + fixedHuffmanDecoder.init(bits[:]) + }) +} + +func (f *decompressor) Reset(r io.Reader, dict []byte) error { + *f = decompressor{ + r: makeReader(r), + bits: f.bits, + codebits: f.codebits, + h1: f.h1, + h2: f.h2, + dict: f.dict, + step: nextBlock, + } + f.dict.init(maxMatchOffset, dict) + return nil +} + +type ReaderOpt func(*decompressor) + +// WithPartialBlock tells decompressor to return after each block, +// so it can read data written with partial flush +func WithPartialBlock() ReaderOpt { + return func(f *decompressor) { + f.flushMode = partialFlush + } +} + +// WithDict initializes the reader with a preset dictionary +func WithDict(dict []byte) ReaderOpt { + return func(f *decompressor) { + f.dict.init(maxMatchOffset, dict) + } +} + +// NewReaderOpts returns new reader with provided options +func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = nextBlock + f.dict.init(maxMatchOffset, nil) + + for _, opt := range opts { + opt(&f) + } + + return &f +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + return NewReaderOpts(r) +} + +// NewReaderDict is like NewReader but initializes the reader +// with a preset dictionary. The returned Reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by NewWriterDict. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + return NewReaderOpts(r, WithDict(dict)) +} diff --git a/internal/compress/flate/inflate_gen.go b/internal/compress/flate/inflate_gen.go new file mode 100644 index 00000000..2b2f993f --- /dev/null +++ b/internal/compress/flate/inflate_gen.go @@ -0,0 +1,1283 @@ +// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( + "bufio" + "bytes" + "fmt" + "math/bits" + "strings" +) + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesBuffer() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Buffer) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = huffmanBytesBuffer + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = huffmanBytesBuffer // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = huffmanBytesReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = huffmanBytesReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBufioReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bufio.Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = huffmanBufioReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = huffmanBufioReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanStringsReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*strings.Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = huffmanStringsReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = huffmanStringsReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanGenericReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = huffmanGenericReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = huffmanGenericReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +func (f *decompressor) huffmanBlockDecoder() { + switch f.r.(type) { + case *bytes.Buffer: + f.huffmanBytesBuffer() + case *bytes.Reader: + f.huffmanBytesReader() + case *bufio.Reader: + f.huffmanBufioReader() + case *strings.Reader: + f.huffmanStringsReader() + case Reader: + f.huffmanGenericReader() + default: + f.huffmanGenericReader() + } +} diff --git a/internal/compress/flate/inflate_test.go b/internal/compress/flate/inflate_test.go new file mode 100644 index 00000000..d018991c --- /dev/null +++ b/internal/compress/flate/inflate_test.go @@ -0,0 +1,302 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "crypto/rand" + "io" + "os" + "strconv" + "strings" + "testing" +) + +func TestReset(t *testing.T) { + ss := []string{ + "lorem ipsum izzle fo rizzle", + "the quick brown fox jumped over", + } + + deflated := make([]bytes.Buffer, 2) + for i, s := range ss { + w, _ := NewWriter(&deflated[i], 1) + w.Write([]byte(s)) + w.Close() + } + + inflated := make([]bytes.Buffer, 2) + + f := NewReader(&deflated[0]) + io.Copy(&inflated[0], f) + f.(Resetter).Reset(&deflated[1], nil) + io.Copy(&inflated[1], f) + f.Close() + + for i, s := range ss { + if s != inflated[i].String() { + t.Errorf("inflated[%d]:\ngot %q\nwant %q", i, inflated[i], s) + } + } +} + +func TestReaderTruncated(t *testing.T) { + vectors := []struct{ input, output string }{ + {"\x00", ""}, + {"\x00\f", ""}, + {"\x00\f\x00", ""}, + {"\x00\f\x00\xf3\xff", ""}, + {"\x00\f\x00\xf3\xffhello", "hello"}, + {"\x00\f\x00\xf3\xffhello, world", "hello, world"}, + {"\x02", ""}, + {"\xf2H\xcd", "He"}, + {"\xf2HÍ0a\u0084\t", "Hel\x90\x90\x90\x90\x90"}, + {"\xf2HÍ0a\u0084\t\x00", "Hel\x90\x90\x90\x90\x90"}, + } + + for i, v := range vectors { + r := strings.NewReader(v.input) + zr := NewReader(r) + b, err := io.ReadAll(zr) + if err != io.ErrUnexpectedEOF { + t.Errorf("test %d, error mismatch: got %v, want io.ErrUnexpectedEOF", i, err) + } + if string(b) != v.output { + t.Errorf("test %d, output mismatch: got %q, want %q", i, b, v.output) + } + } +} + +func TestResetDict(t *testing.T) { + dict := []byte("the lorem fox") + ss := []string{ + "lorem ipsum izzle fo rizzle", + "the quick brown fox jumped over", + } + + deflated := make([]bytes.Buffer, len(ss)) + for i, s := range ss { + w, _ := NewWriterDict(&deflated[i], DefaultCompression, dict) + w.Write([]byte(s)) + w.Close() + } + + inflated := make([]bytes.Buffer, len(ss)) + + f := NewReader(nil) + for i := range inflated { + f.(Resetter).Reset(&deflated[i], dict) + io.Copy(&inflated[i], f) + } + f.Close() + + for i, s := range ss { + if s != inflated[i].String() { + t.Errorf("inflated[%d]:\ngot %q\nwant %q", i, inflated[i], s) + } + } +} + +// Tests ported from zlib/test/infcover.c +type infTest struct { + hex string + id string + n int +} + +var infTests = []infTest{ + {"0 0 0 0 0", "invalid stored block lengths", 1}, + {"3 0", "fixed", 0}, + {"6", "invalid block type", 1}, + {"1 1 0 fe ff 0", "stored", 0}, + {"fc 0 0", "too many length or distance symbols", 1}, + {"4 0 fe ff", "invalid code lengths set", 1}, + {"4 0 24 49 0", "invalid bit length repeat", 1}, + {"4 0 24 e9 ff ff", "invalid bit length repeat", 1}, + {"4 0 24 e9 ff 6d", "invalid code -- missing end-of-block", 1}, + {"4 80 49 92 24 49 92 24 71 ff ff 93 11 0", "invalid literal/lengths set", 1}, + {"4 80 49 92 24 49 92 24 f b4 ff ff c3 84", "invalid distances set", 1}, + {"4 c0 81 8 0 0 0 0 20 7f eb b 0 0", "invalid literal/length code", 1}, + {"2 7e ff ff", "invalid distance code", 1}, + {"c c0 81 0 0 0 0 0 90 ff 6b 4 0", "invalid distance too far back", 1}, + + // also trailer mismatch just in inflate() + {"1f 8b 8 0 0 0 0 0 0 0 3 0 0 0 0 1", "incorrect data check", -1}, + {"1f 8b 8 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 1", "incorrect length check", -1}, + {"5 c0 21 d 0 0 0 80 b0 fe 6d 2f 91 6c", "pull 17", 0}, + {"5 e0 81 91 24 cb b2 2c 49 e2 f 2e 8b 9a 47 56 9f fb fe ec d2 ff 1f", "long code", 0}, + {"ed c0 1 1 0 0 0 40 20 ff 57 1b 42 2c 4f", "length extra", 0}, + {"ed cf c1 b1 2c 47 10 c4 30 fa 6f 35 1d 1 82 59 3d fb be 2e 2a fc f c", "long distance and extra", 0}, + {"ed c0 81 0 0 0 0 80 a0 fd a9 17 a9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6", "window end", 0}, +} + +func TestInflate(t *testing.T) { + for _, test := range infTests { + hex := strings.Split(test.hex, " ") + data := make([]byte, len(hex)) + for i, h := range hex { + b, _ := strconv.ParseInt(h, 16, 32) + data[i] = byte(b) + } + buf := bytes.NewReader(data) + r := NewReader(buf) + + _, err := io.Copy(io.Discard, r) + if (test.n == 0 && err == nil) || (test.n != 0 && err != nil) { + t.Logf("%q: OK:", test.id) + t.Logf(" - got %v", err) + continue + } + + if test.n == 0 && err != nil { + t.Errorf("%q: Expected no error, but got %v", test.id, err) + continue + } + + if test.n != 0 && err == nil { + t.Errorf("%q:Expected an error, but got none", test.id) + continue + } + t.Fatal(test.n, err) + } + + for _, test := range infOutTests { + hex := strings.Split(test.hex, " ") + data := make([]byte, len(hex)) + for i, h := range hex { + b, _ := strconv.ParseInt(h, 16, 32) + data[i] = byte(b) + } + buf := bytes.NewReader(data) + r := NewReader(buf) + + _, err := io.Copy(io.Discard, r) + if test.err == (err != nil) { + t.Logf("%q: OK:", test.id) + t.Logf(" - got %v", err) + continue + } + + if test.err == false && err != nil { + t.Errorf("%q: Expected no error, but got %v", test.id, err) + continue + } + + if test.err && err == nil { + t.Errorf("%q: Expected an error, but got none", test.id) + continue + } + t.Fatal(test.err, err) + } + +} + +// Tests ported from zlib/test/infcover.c +// Since zlib inflate is push (writer) instead of pull (reader) +// some of the window size tests have been removed, since they +// are irrelevant. +type infOutTest struct { + hex string + id string + step int + win int + length int + err bool +} + +var infOutTests = []infOutTest{ + {"2 8 20 80 0 3 0", "inflate_fast TYPE return", 0, -15, 258, false}, + {"63 18 5 40 c 0", "window wrap", 3, -8, 300, false}, + {"e5 e0 81 ad 6d cb b2 2c c9 01 1e 59 63 ae 7d ee fb 4d fd b5 35 41 68 ff 7f 0f 0 0 0", "fast length extra bits", 0, -8, 258, true}, + {"25 fd 81 b5 6d 59 b6 6a 49 ea af 35 6 34 eb 8c b9 f6 b9 1e ef 67 49 50 fe ff ff 3f 0 0", "fast distance extra bits", 0, -8, 258, true}, + {"3 7e 0 0 0 0 0", "fast invalid distance code", 0, -8, 258, true}, + {"1b 7 0 0 0 0 0", "fast invalid literal/length code", 0, -8, 258, true}, + {"d c7 1 ae eb 38 c 4 41 a0 87 72 de df fb 1f b8 36 b1 38 5d ff ff 0", "fast 2nd level codes and too far back", 0, -8, 258, true}, + {"63 18 5 8c 10 8 0 0 0 0", "very common case", 0, -8, 259, false}, + {"63 60 60 18 c9 0 8 18 18 18 26 c0 28 0 29 0 0 0", "contiguous and wrap around window", 6, -8, 259, false}, + {"63 0 3 0 0 0 0 0", "copy direct from output", 0, -8, 259, false}, + {"1f 8b 0 0", "bad gzip method", 0, 31, 0, true}, + {"1f 8b 8 80", "bad gzip flags", 0, 31, 0, true}, + {"77 85", "bad zlib method", 0, 15, 0, true}, + {"78 9c", "bad zlib window size", 0, 8, 0, true}, + {"1f 8b 8 1e 0 0 0 0 0 0 1 0 0 0 0 0 0", "bad header crc", 0, 47, 1, true}, + {"1f 8b 8 2 0 0 0 0 0 0 1d 26 3 0 0 0 0 0 0 0 0 0", "check gzip length", 0, 47, 0, true}, + {"78 90", "bad zlib header check", 0, 47, 0, true}, + {"8 b8 0 0 0 1", "need dictionary", 0, 8, 0, true}, + {"63 18 68 30 d0 0 0", "force split window update", 4, -8, 259, false}, + {"3 0", "use fixed blocks", 0, -15, 1, false}, + {"", "bad window size", 0, 1, 0, true}, +} + +func TestWriteTo(t *testing.T) { + input := make([]byte, 100000) + n, err := rand.Read(input) + if err != nil { + t.Fatal(err) + } + if n != len(input) { + t.Fatal("did not fill buffer") + } + compressed := &bytes.Buffer{} + w, err := NewWriter(compressed, -2) + if err != nil { + t.Fatal(err) + } + n, err = w.Write(input) + if err != nil { + t.Fatal(err) + } + if n != len(input) { + t.Fatal("did not fill buffer") + } + w.Close() + buf := compressed.Bytes() + + dec := NewReader(bytes.NewBuffer(buf)) + // ReadAll does not use WriteTo, but we wrap it in a NopCloser to be sure. + readall, err := io.ReadAll(io.NopCloser(dec)) + if err != nil { + t.Fatal(err) + } + if len(readall) != len(input) { + t.Fatal("did not decompress everything") + } + + dec = NewReader(bytes.NewBuffer(buf)) + wtbuf := &bytes.Buffer{} + written, err := dec.(io.WriterTo).WriteTo(wtbuf) + if err != nil { + t.Fatal(err) + } + if written != int64(len(input)) { + t.Error("Returned length did not match, expected", len(input), "got", written) + } + if wtbuf.Len() != len(input) { + t.Error("Actual Length did not match, expected", len(input), "got", wtbuf.Len()) + } + if !bytes.Equal(wtbuf.Bytes(), input) { + t.Fatal("output did not match input") + } +} + +func TestReaderPartialBlock(t *testing.T) { + data, err := os.ReadFile("testdata/partial-block") + if err != nil { + t.Error(err) + } + + r := NewReaderOpts(bytes.NewReader(data), WithPartialBlock()) + rb := make([]byte, 32) + n, err := r.Read(rb) + if err != nil { + t.Fatalf("Read: %v", err) + } + + expected := "hello, world" + actual := string(rb[:n]) + if expected != actual { + t.Fatalf("expected: %v, got: %v", expected, actual) + } +} diff --git a/internal/compress/flate/level1.go b/internal/compress/flate/level1.go new file mode 100644 index 00000000..41c312e8 --- /dev/null +++ b/internal/compress/flate/level1.go @@ -0,0 +1,215 @@ +package flate + +import ( + "fmt" + + "codeberg.org/lindenii/furgit/internal/compress/internal/le" +) + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL1 struct { + fastGen + table [tableSize]tableEntry +} + +// EncodeL1 uses a similar algorithm to level 1 +func (e *fastEncL1) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashBytes = 5 + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + var t int32 + for { + nextHash := hashLen(cv, tableBits, hashBytes) + candidate = e.table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur} + nextHash = hashLen(now, tableBits, hashBytes) + t = candidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + + // Do one right away... + cv = now + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur} + + t = candidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + cv = now + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + l := e.matchlenLong(int(s+4), int(t+4), src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && le.Load8(src, t-1) == le.Load8(src, s-1) { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + // Save the match found + if false { + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + } else { + // Inlined... + xoffset := uint32(s - t - baseMatchOffset) + xlength := l + oc := offsetCode(xoffset) + xoffset |= oc << 16 + for xlength > 0 { + xl := xlength + if xl > 258 { + if xl > 258+baseMatchLength { + xl = 258 + } else { + xl = 258 - baseMatchLength + } + } + xlength -= xl + xl -= baseMatchLength + dst.extraHist[lengthCodes1[uint8(xl)]]++ + dst.offHist[oc]++ + dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset) + dst.n++ + } + } + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + if s >= sLimit { + // Index first pair after match end. + if int(s+l+8) < len(src) { + cv := load6432(src, s) + e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hashLen(x, tableBits, hashBytes) + e.table[prevHash] = tableEntry{offset: o} + x >>= 16 + currHash := hashLen(x, tableBits, hashBytes) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2} + + t = candidate.offset - e.cur + if s-t > maxMatchOffset || uint32(x) != load3232(src, t) { + cv = x >> 8 + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/internal/compress/flate/level2.go b/internal/compress/flate/level2.go new file mode 100644 index 00000000..c8d047f2 --- /dev/null +++ b/internal/compress/flate/level2.go @@ -0,0 +1,214 @@ +package flate + +import "fmt" + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL2 struct { + fastGen + table [bTableSize]tableEntry +} + +// EncodeL2 uses a similar algorithm to level 1, but is capable +// of matching across blocks giving better compression at a small slowdown. +func (e *fastEncL2) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashBytes = 5 + ) + + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + // When should we start skipping if we haven't found matches in a long while. + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hashLen(cv, bTableBits, hashBytes) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidate = e.table[nextHash] + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur} + nextHash = hashLen(now, bTableBits, hashBytes) + + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + + // Do one right away... + cv = now + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur} + + offset = s - (candidate.offset - e.cur) + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { + break + } + cv = now + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset - e.cur + l := e.matchlenLong(int(s+4), int(t+4), src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+l+8) < len(src) { + cv := load6432(src, s) + e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // Store every second hash in-between, but offset by 1. + for i := s - l + 2; i < s-5; i += 7 { + x := load6432(src, i) + nextHash := hashLen(x, bTableBits, hashBytes) + e.table[nextHash] = tableEntry{offset: e.cur + i} + // Skip one + x >>= 16 + nextHash = hashLen(x, bTableBits, hashBytes) + e.table[nextHash] = tableEntry{offset: e.cur + i + 2} + // Skip one + x >>= 16 + nextHash = hashLen(x, bTableBits, hashBytes) + e.table[nextHash] = tableEntry{offset: e.cur + i + 4} + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 to s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hashLen(x, bTableBits, hashBytes) + prevHash2 := hashLen(x>>8, bTableBits, hashBytes) + e.table[prevHash] = tableEntry{offset: o} + e.table[prevHash2] = tableEntry{offset: o + 1} + currHash := hashLen(x>>16, bTableBits, hashBytes) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { + cv = x >> 24 + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/internal/compress/flate/level3.go b/internal/compress/flate/level3.go new file mode 100644 index 00000000..33f9fb15 --- /dev/null +++ b/internal/compress/flate/level3.go @@ -0,0 +1,241 @@ +package flate + +import "fmt" + +// fastEncL3 +type fastEncL3 struct { + fastGen + table [1 << 16]tableEntryPrev +} + +// Encode uses a similar algorithm to level 2, will check up to two candidates. +func (e *fastEncL3) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + tableBits = 16 + tableSize = 1 << tableBits + hashBytes = 5 + ) + + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + } + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + e.table[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // Skip if too small. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 7 + nextS := s + var candidate tableEntry + for { + nextHash := hashLen(cv, tableBits, hashBytes) + s = nextS + nextS = s + 1 + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidates := e.table[nextHash] + now := load6432(src, nextS) + + // Safe offset distance until s + 4... + minOffset := e.cur + s - (maxMatchOffset - 4) + e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} + + // Check both candidates + candidate = candidates.Cur + if candidate.offset < minOffset { + cv = now + // Previous will also be invalid, we have nothing. + continue + } + + if uint32(cv) == load3232(src, candidate.offset-e.cur) { + if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) { + break + } + // Both match and are valid, pick longest. + offset := s - (candidate.offset - e.cur) + o2 := s - (candidates.Prev.offset - e.cur) + l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) + if l2 > l1 { + candidate = candidates.Prev + } + break + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { + break + } + } + cv = now + } + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + // + t := candidate.offset - e.cur + l := e.matchlenLong(int(s+4), int(t+4), src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + t += l + // Index first pair after match end. + if int(t+8) < len(src) && t > 0 { + cv = load6432(src, t) + nextHash := hashLen(cv, tableBits, hashBytes) + e.table[nextHash] = tableEntryPrev{ + Prev: e.table[nextHash].Cur, + Cur: tableEntry{offset: e.cur + t}, + } + } + goto emitRemainder + } + + // Store every 5th hash in-between. + for i := s - l + 2; i < s-5; i += 6 { + nextHash := hashLen(load6432(src, i), tableBits, hashBytes) + e.table[nextHash] = tableEntryPrev{ + Prev: e.table[nextHash].Cur, + Cur: tableEntry{offset: e.cur + i}} + } + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 to s. + x := load6432(src, s-2) + prevHash := hashLen(x, tableBits, hashBytes) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 2}, + } + x >>= 8 + prevHash = hashLen(x, tableBits, hashBytes) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 1}, + } + x >>= 8 + currHash := hashLen(x, tableBits, hashBytes) + candidates := e.table[currHash] + cv = x + e.table[currHash] = tableEntryPrev{ + Prev: candidates.Cur, + Cur: tableEntry{offset: s + e.cur}, + } + + // Check both candidates + candidate = candidates.Cur + minOffset := e.cur + s - (maxMatchOffset - 4) + + if candidate.offset > minOffset { + if uint32(cv) == load3232(src, candidate.offset-e.cur) { + // Found a match... + continue + } + candidate = candidates.Prev + if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { + // Match at prev... + continue + } + } + cv = x >> 8 + s++ + break + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/internal/compress/flate/level4.go b/internal/compress/flate/level4.go new file mode 100644 index 00000000..88509e19 --- /dev/null +++ b/internal/compress/flate/level4.go @@ -0,0 +1,221 @@ +package flate + +import "fmt" + +type fastEncL4 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntry +} + +func (e *fastEncL4) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.bTable[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var t int32 + for { + nextHashS := hashLen(cv, tableBits, hashShortBytes) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + e.bTable[nextHashL] = entry + + t = lCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + // We got a long match. Use that. + break + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + // Found a 4 match... + lCandidate = e.bTable[hash7(next, tableBits)] + + // If the next long is a candidate, check if we should use that instead... + lOff := lCandidate.offset - e.cur + if nextS-lOff < maxMatchOffset && load3232(src, lOff) == uint32(next) { + l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) + if l2 > l1 { + s = nextS + t = lCandidate.offset - e.cur + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + l := e.matchlenLong(int(s+4), int(t+4), src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic("s-t") + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+8) < len(src) { + cv := load6432(src, s) + e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur} + e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // Store every 3rd hash in-between + if true { + i := nextS + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 + + i += 3 + for ; i < s-1; i += 3 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hashLen(x, tableBits, hashShortBytes) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + e.bTable[prevHashL] = tableEntry{offset: o} + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/internal/compress/flate/level5.go b/internal/compress/flate/level5.go new file mode 100644 index 00000000..a22ad7d1 --- /dev/null +++ b/internal/compress/flate/level5.go @@ -0,0 +1,705 @@ +package flate + +import "fmt" + +type fastEncL5 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hashLen(cv, tableBits, hashShortBytes) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hashLen(next, tableBits, hashShortBytes) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, t) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, t2) { + l = e.matchlen(int(s+4), int(t+4), src) + 4 + ml1 := e.matchlen(int(s+4), int(t2+4), src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + // Found a 4 match... + l = e.matchlen(int(s+4), int(t+4), src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, t2) == uint32(next) { + ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, t2) == uint32(next) { + ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + if l == 0 { + // Extend the 4-byte match as long as possible. + l = e.matchlenLong(int(s+4), int(t+4), src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(int(s+l), int(t+l), src) + } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + t2 := eLong - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(int(s2), int(t2), src); l2 > l { + t = t2 + l = l2 + s = s2 + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hashLen(x, tableBits, hashShortBytes) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} + +// fastEncL5Window is a level 5 encoder, +// but with a custom window size. +type fastEncL5Window struct { + hist []byte + cur int32 + maxOffset int32 + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5Window) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 + ) + maxMatchOffset := e.maxOffset + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hashLen(cv, tableBits, hashShortBytes) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hashLen(next, tableBits, hashShortBytes) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, t) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, t2) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, t2) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, t2) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + if l == 0 { + // Extend the 4-byte match as long as possible. + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + t2 := eLong - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { + t = t2 + l = l2 + s = s2 + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hashLen(x, tableBits, hashShortBytes) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} + +// Reset the encoding table. +func (e *fastEncL5Window) Reset() { + // We keep the same allocs, since we are compressing the same block sizes. + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= int32(bufferReset) { + e.cur += e.maxOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} + +func (e *fastEncL5Window) addBlock(src []byte) int32 { + // check if we have space already + maxMatchOffset := e.maxOffset + + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < int(maxMatchOffset*2) { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + s1 := min(int(s)+maxMatchLength-4, len(src)) + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:s1], src[t:])) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 { + if debugDeflate { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} diff --git a/internal/compress/flate/level6.go b/internal/compress/flate/level6.go new file mode 100644 index 00000000..96f5bb43 --- /dev/null +++ b/internal/compress/flate/level6.go @@ -0,0 +1,325 @@ +package flate + +import "fmt" + +type fastEncL6 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL6) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + // Repeat MUST be > 1 and within range + repeat := int32(1) + for { + const skipLog = 7 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hashLen(cv, tableBits, hashShortBytes) + nextHashL := hash7(cv, tableBits) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + // Calculate hashes of 'next' + nextHashS = hashLen(next, tableBits, hashShortBytes) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, t) { + // Long candidate matches at least 4 bytes. + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // Check the previous long candidate as well. + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, t2) { + l = e.matchlen(int(s+4), int(t+4), src) + 4 + ml1 := e.matchlen(int(s+4), int(t2+4), src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + // Current value did not match, but check if previous long value does. + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) { + // Found a 4 match... + l = e.matchlen(int(s+4), int(t+4), src) + 4 + + // Look up next long candidate (at nextS) + lCandidate = e.bTable[nextHashL] + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // Check repeat at s + repOff + const repOff = 1 + t2 := s - repeat + repOff + if load3232(src, t2) == uint32(cv>>(8*repOff)) { + ml := e.matchlen(int(s+4+repOff), int(t2+4), src) + 4 + if ml > l { + t = t2 + l = ml + s += repOff + // Not worth checking more. + break + } + } + + // If the next long is a candidate, use that... + t2 = lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, t2) == uint32(next) { + ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + // This is ok, but check previous as well. + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, t2) == uint32(next) { + ml := e.matchlen(int(nextS+4), int(t2+4), src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + if l == 0 { + l = e.matchlenLong(int(s+4), int(t+4), src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(int(s+l), int(t+l), src) + } + + // Try to locate a better match by checking the end-of-match... + if sAt := s + l; sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] + // Test current + t2 := eLong.Cur.offset - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 + if off < maxMatchOffset { + if off > 0 && t2 >= 0 { + if l2 := e.matchlenLong(int(s2), int(t2), src); l2 > l { + t = t2 + l = l2 + s = s2 + } + } + // Test next: + t2 = eLong.Prev.offset - e.cur - l + skipBeginning + off := s2 - t2 + if off > 0 && off < maxMatchOffset && t2 >= 0 { + if l2 := e.matchlenLong(int(s2), int(t2), src); l2 > l { + t = t2 + l = l2 + s = s2 + } + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if false { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + repeat = s - t + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index after match end. + for i := nextS + 1; i < int32(len(src))-8; i += 2 { + cv := load6432(src, i) + e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur + } + goto emitRemainder + } + + // Store every long hash in-between and every second short. + if true { + for i := nextS + 1; i < s-1; i += 2 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong2 := &e.bTable[hash7(cv>>8, tableBits)] + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + eLong.Cur, eLong.Prev = t, eLong.Cur + eLong2.Cur, eLong2.Prev = t2, eLong2.Cur + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + cv = load6432(src, s) + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/internal/compress/flate/matchlen_generic.go b/internal/compress/flate/matchlen_generic.go new file mode 100644 index 00000000..63c0637d --- /dev/null +++ b/internal/compress/flate/matchlen_generic.go @@ -0,0 +1,34 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package flate + +import ( + "math/bits" + + "codeberg.org/lindenii/furgit/internal/compress/internal/le" +) + +// matchLen returns the maximum common prefix length of a and b. +// a must be the shortest of the two. +func matchLen(a, b []byte) (n int) { + left := len(a) + for left >= 8 { + diff := le.Load64(a, n) ^ le.Load64(b, n) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 + } + n += 8 + left -= 8 + } + + a = a[n:] + b = b[n:] + for i := range a { + if a[i] != b[i] { + break + } + n++ + } + return n +} diff --git a/internal/compress/flate/reader_test.go b/internal/compress/flate/reader_test.go new file mode 100644 index 00000000..6eedfb9b --- /dev/null +++ b/internal/compress/flate/reader_test.go @@ -0,0 +1,108 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "io" + "os" + "runtime" + "strings" + "testing" +) + +func TestNlitOutOfRange(t *testing.T) { + // Trying to decode this bogus flate data, which has a Huffman table + // with nlit=288, should not panic. + io.Copy(io.Discard, NewReader(strings.NewReader( + "\xfc\xfe\x36\xe7\x5e\x1c\xef\xb3\x55\x58\x77\xb6\x56\xb5\x43\xf4"+ + "\x6f\xf2\xd2\xe6\x3d\x99\xa0\x85\x8c\x48\xeb\xf8\xda\x83\x04\x2a"+ + "\x75\xc4\xf8\x0f\x12\x11\xb9\xb4\x4b\x09\xa0\xbe\x8b\x91\x4c"))) +} + +const ( + digits = iota + twain + random +) + +var testfiles = []string{ + // Digits is the digits of the irrational number e. Its decimal representation + // does not repeat, but there are only 10 possible digits, so it should be + // reasonably compressible. + digits: "../testdata/e.txt", + // Twain is Project Gutenberg's edition of Mark Twain's classic English novel. + twain: "../testdata/Mark.Twain-Tom.Sawyer.txt", + // Random bytes + random: "../testdata/sharnd.out", +} + +func benchmarkDecode(b *testing.B, testfile, level, n int) { + b.ReportAllocs() + b.StopTimer() + b.SetBytes(int64(n)) + buf0, err := os.ReadFile(testfiles[testfile]) + if err != nil { + b.Fatal(err) + } + if len(buf0) == 0 { + b.Fatalf("test file %q has no data", testfiles[testfile]) + } + compressed := new(bytes.Buffer) + w, err := NewWriter(compressed, level) + if err != nil { + b.Fatal(err) + } + for i := 0; i < n; i += len(buf0) { + if len(buf0) > n-i { + buf0 = buf0[:n-i] + } + io.Copy(w, bytes.NewReader(buf0)) + } + w.Close() + buf1 := compressed.Bytes() + buf0, compressed, w = nil, nil, nil + r := NewReader(bytes.NewReader(buf1)) + res := r.(Resetter) + runtime.GC() + b.StartTimer() + + for i := 0; i < b.N; i++ { + _ = res.Reset(bytes.NewReader(buf1), nil) + _, _ = io.Copy(io.Discard, r) + } +} + +// These short names are so that gofmt doesn't break the BenchmarkXxx function +// bodies below over multiple lines. +const ( + constant = ConstantCompression + speed = BestSpeed + default_ = DefaultCompression + compress = BestCompression + oneK = -1024 +) + +func BenchmarkDecodeDigitsSpeed1e4(b *testing.B) { benchmarkDecode(b, digits, speed, 1e4) } +func BenchmarkDecodeDigitsSpeed1e5(b *testing.B) { benchmarkDecode(b, digits, speed, 1e5) } +func BenchmarkDecodeDigitsSpeed1e6(b *testing.B) { benchmarkDecode(b, digits, speed, 1e6) } +func BenchmarkDecodeDigitsDefault1e4(b *testing.B) { benchmarkDecode(b, digits, default_, 1e4) } +func BenchmarkDecodeDigitsDefault1e5(b *testing.B) { benchmarkDecode(b, digits, default_, 1e5) } +func BenchmarkDecodeDigitsDefault1e6(b *testing.B) { benchmarkDecode(b, digits, default_, 1e6) } +func BenchmarkDecodeDigitsCompress1e4(b *testing.B) { benchmarkDecode(b, digits, compress, 1e4) } +func BenchmarkDecodeDigitsCompress1e5(b *testing.B) { benchmarkDecode(b, digits, compress, 1e5) } +func BenchmarkDecodeDigitsCompress1e6(b *testing.B) { benchmarkDecode(b, digits, compress, 1e6) } +func BenchmarkDecodeTwainSpeed1e4(b *testing.B) { benchmarkDecode(b, twain, speed, 1e4) } +func BenchmarkDecodeTwainSpeed1e5(b *testing.B) { benchmarkDecode(b, twain, speed, 1e5) } +func BenchmarkDecodeTwainSpeed1e6(b *testing.B) { benchmarkDecode(b, twain, speed, 1e6) } +func BenchmarkDecodeTwainDefault1e4(b *testing.B) { benchmarkDecode(b, twain, default_, 1e4) } +func BenchmarkDecodeTwainDefault1e5(b *testing.B) { benchmarkDecode(b, twain, default_, 1e5) } +func BenchmarkDecodeTwainDefault1e6(b *testing.B) { benchmarkDecode(b, twain, default_, 1e6) } +func BenchmarkDecodeTwainCompress1e4(b *testing.B) { benchmarkDecode(b, twain, compress, 1e4) } +func BenchmarkDecodeTwainCompress1e5(b *testing.B) { benchmarkDecode(b, twain, compress, 1e5) } +func BenchmarkDecodeTwainCompress1e6(b *testing.B) { benchmarkDecode(b, twain, compress, 1e6) } +func BenchmarkDecodeRandomSpeed1e4(b *testing.B) { benchmarkDecode(b, random, speed, 1e4) } +func BenchmarkDecodeRandomSpeed1e5(b *testing.B) { benchmarkDecode(b, random, speed, 1e5) } +func BenchmarkDecodeRandomSpeed1e6(b *testing.B) { benchmarkDecode(b, random, speed, 1e6) } diff --git a/internal/compress/flate/regmask_amd64.go b/internal/compress/flate/regmask_amd64.go new file mode 100644 index 00000000..6ed28061 --- /dev/null +++ b/internal/compress/flate/regmask_amd64.go @@ -0,0 +1,37 @@ +package flate + +const ( + // Masks for shifts with register sizes of the shift value. + // This can be used to work around the x86 design of shifting by mod register size. + // It can be used when a variable shift is always smaller than the register size. + + // reg8SizeMaskX - shift value is 8 bits, shifted is X + reg8SizeMask8 = 7 + reg8SizeMask16 = 15 + reg8SizeMask32 = 31 + reg8SizeMask64 = 63 + + // reg16SizeMaskX - shift value is 16 bits, shifted is X + reg16SizeMask8 = reg8SizeMask8 + reg16SizeMask16 = reg8SizeMask16 + reg16SizeMask32 = reg8SizeMask32 + reg16SizeMask64 = reg8SizeMask64 + + // reg32SizeMaskX - shift value is 32 bits, shifted is X + reg32SizeMask8 = reg8SizeMask8 + reg32SizeMask16 = reg8SizeMask16 + reg32SizeMask32 = reg8SizeMask32 + reg32SizeMask64 = reg8SizeMask64 + + // reg64SizeMaskX - shift value is 64 bits, shifted is X + reg64SizeMask8 = reg8SizeMask8 + reg64SizeMask16 = reg8SizeMask16 + reg64SizeMask32 = reg8SizeMask32 + reg64SizeMask64 = reg8SizeMask64 + + // regSizeMaskUintX - shift value is uint, shifted is X + regSizeMaskUint8 = reg8SizeMask8 + regSizeMaskUint16 = reg8SizeMask16 + regSizeMaskUint32 = reg8SizeMask32 + regSizeMaskUint64 = reg8SizeMask64 +) diff --git a/internal/compress/flate/regmask_other.go b/internal/compress/flate/regmask_other.go new file mode 100644 index 00000000..e62caf71 --- /dev/null +++ b/internal/compress/flate/regmask_other.go @@ -0,0 +1,39 @@ +//go:build !amd64 + +package flate + +const ( + // Masks for shifts with register sizes of the shift value. + // This can be used to work around the x86 design of shifting by mod register size. + // It can be used when a variable shift is always smaller than the register size. + + // reg8SizeMaskX - shift value is 8 bits, shifted is X + reg8SizeMask8 = 0xff + reg8SizeMask16 = 0xff + reg8SizeMask32 = 0xff + reg8SizeMask64 = 0xff + + // reg16SizeMaskX - shift value is 16 bits, shifted is X + reg16SizeMask8 = 0xffff + reg16SizeMask16 = 0xffff + reg16SizeMask32 = 0xffff + reg16SizeMask64 = 0xffff + + // reg32SizeMaskX - shift value is 32 bits, shifted is X + reg32SizeMask8 = 0xffffffff + reg32SizeMask16 = 0xffffffff + reg32SizeMask32 = 0xffffffff + reg32SizeMask64 = 0xffffffff + + // reg64SizeMaskX - shift value is 64 bits, shifted is X + reg64SizeMask8 = 0xffffffffffffffff + reg64SizeMask16 = 0xffffffffffffffff + reg64SizeMask32 = 0xffffffffffffffff + reg64SizeMask64 = 0xffffffffffffffff + + // regSizeMaskUintX - shift value is uint, shifted is X + regSizeMaskUint8 = ^uint(0) + regSizeMaskUint16 = ^uint(0) + regSizeMaskUint32 = ^uint(0) + regSizeMaskUint64 = ^uint(0) +) diff --git a/internal/compress/flate/stateless.go b/internal/compress/flate/stateless.go new file mode 100644 index 00000000..7e944bfb --- /dev/null +++ b/internal/compress/flate/stateless.go @@ -0,0 +1,325 @@ +package flate + +import ( + "io" + "math" + "sync" + + "codeberg.org/lindenii/furgit/internal/compress/internal/le" +) + +const ( + maxStatelessBlock = math.MaxInt16 + // dictionary will be taken from maxStatelessBlock, so limit it. + maxStatelessDict = 8 << 10 + + slTableBits = 13 + slTableSize = 1 << slTableBits + slTableShift = 32 - slTableBits +) + +type statelessWriter struct { + dst io.Writer + closed bool +} + +func (s *statelessWriter) Close() error { + if s.closed { + return nil + } + s.closed = true + // Emit EOF block + return StatelessDeflate(s.dst, nil, true, nil) +} + +func (s *statelessWriter) Write(p []byte) (n int, err error) { + err = StatelessDeflate(s.dst, p, false, nil) + if err != nil { + return 0, err + } + return len(p), nil +} + +func (s *statelessWriter) Reset(w io.Writer) { + s.dst = w + s.closed = false +} + +// NewStatelessWriter will do compression but without maintaining any state +// between Write calls. +// There will be no memory kept between Write calls, +// but compression and speed will be suboptimal. +// Because of this, the size of actual Write calls will affect output size. +func NewStatelessWriter(dst io.Writer) io.WriteCloser { + return &statelessWriter{dst: dst} +} + +// bitWriterPool contains bit writers that can be reused. +var bitWriterPool = sync.Pool{ + New: func() any { + return newHuffmanBitWriter(nil) + }, +} + +// tokensPool contains tokens struct objects that can be reused +var tokensPool = sync.Pool{ + New: func() any { + return &tokens{} + }, +} + +// StatelessDeflate allows compressing directly to a Writer without retaining state. +// When returning everything will be flushed. +// Up to 8KB of an optional dictionary can be given which is presumed to precede the block. +// Longer dictionaries will be truncated and will still produce valid output. +// Sending nil dictionary is perfectly fine. +func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { + bw := bitWriterPool.Get().(*huffmanBitWriter) + bw.reset(out) + defer func() { + // don't keep a reference to our output + bw.reset(nil) + bitWriterPool.Put(bw) + }() + if eof && len(in) == 0 { + // Just write an EOF block. + // Could be faster... + bw.writeStoredHeader(0, true) + bw.flush() + return bw.err + } + + // Truncate dict + if len(dict) > maxStatelessDict { + dict = dict[len(dict)-maxStatelessDict:] + } + + // For subsequent loops, keep shallow dict reference to avoid alloc+copy. + var inDict []byte + + dst := tokensPool.Get().(*tokens) + dst.Reset() + defer func() { + tokensPool.Put(dst) + }() + + for len(in) > 0 { + todo := in + if len(inDict) > 0 { + if len(todo) > maxStatelessBlock-maxStatelessDict { + todo = todo[:maxStatelessBlock-maxStatelessDict] + } + } else if len(todo) > maxStatelessBlock-len(dict) { + todo = todo[:maxStatelessBlock-len(dict)] + } + inOrg := in + in = in[len(todo):] + uncompressed := todo + if len(dict) > 0 { + // combine dict and source + bufLen := len(todo) + len(dict) + combined := make([]byte, bufLen) + copy(combined, dict) + copy(combined[len(dict):], todo) + todo = combined + } + // Compress + if len(inDict) == 0 { + statelessEnc(dst, todo, int16(len(dict))) + } else { + statelessEnc(dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict) + } + isEof := eof && len(in) == 0 + + if dst.n == 0 { + bw.writeStoredHeader(len(uncompressed), isEof) + if bw.err != nil { + return bw.err + } + bw.writeBytes(uncompressed) + } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { + // If we removed less than 1/16th, huffman compress the block. + bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) + } else { + bw.writeBlockDynamic(dst, isEof, uncompressed, len(in) == 0) + } + if len(in) > 0 { + // Retain a dict if we have more + inDict = inOrg[len(uncompressed)-maxStatelessDict:] + dict = nil + dst.Reset() + } + if bw.err != nil { + return bw.err + } + } + if !eof { + // Align, only a stored block can do that. + bw.writeStoredHeader(0, false) + } + bw.flush() + return bw.err +} + +func hashSL(u uint32) uint32 { + return (u * 0x1e35a7bd) >> slTableShift +} + +func load3216(b []byte, i int16) uint32 { + return le.Load32(b, i) +} + +func load6416(b []byte, i int16) uint64 { + return le.Load64(b, i) +} + +func statelessEnc(dst *tokens, src []byte, startAt int16) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + type tableEntry struct { + offset int16 + } + + var table [slTableSize]tableEntry + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src)-int(startAt) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = 0 + return + } + // Index until startAt + if startAt > 0 { + cv := load3232(src, 0) + for i := range startAt { + table[hashSL(cv)] = tableEntry{offset: i} + cv = (cv >> 8) | (uint32(src[i+4]) << 24) + } + } + + s := startAt + 1 + nextEmit := startAt + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int16(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3216(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hashSL(cv) + candidate = table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit || nextS <= 0 { + goto emitRemainder + } + + now := load6416(src, nextS) + table[nextHash] = tableEntry{offset: s} + nextHash = hashSL(uint32(now)) + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = table[nextHash] + now >>= 8 + table[nextHash] = tableEntry{offset: s} + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + cv = uint32(now) + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset + l := int16(matchLen(src[s+4:], src[t+4:]) + 4) + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + // Save the match found + dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6416(src, s-2) + o := s - 2 + prevHash := hashSL(uint32(x)) + table[prevHash] = tableEntry{offset: o} + x >>= 16 + currHash := hashSL(uint32(x)) + candidate = table[currHash] + table[currHash] = tableEntry{offset: o + 2} + + if uint32(x) != load3216(src, candidate.offset) { + cv = uint32(x >> 8) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/internal/compress/flate/testdata/fuzz/FuzzEncoding.zip b/internal/compress/flate/testdata/fuzz/FuzzEncoding.zip Binary files differnew file mode 100644 index 00000000..feae35f1 --- /dev/null +++ b/internal/compress/flate/testdata/fuzz/FuzzEncoding.zip diff --git a/internal/compress/flate/testdata/fuzz/encode-raw-corpus.zip b/internal/compress/flate/testdata/fuzz/encode-raw-corpus.zip Binary files differnew file mode 100644 index 00000000..7b33f54f --- /dev/null +++ b/internal/compress/flate/testdata/fuzz/encode-raw-corpus.zip diff --git a/internal/compress/flate/testdata/huffman-null-max.dyn.expect b/internal/compress/flate/testdata/huffman-null-max.dyn.expect Binary files differnew file mode 100644 index 00000000..c0816514 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-null-max.dyn.expect diff --git a/internal/compress/flate/testdata/huffman-null-max.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-null-max.dyn.expect-noinput Binary files differnew file mode 100644 index 00000000..c0816514 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-null-max.dyn.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-null-max.golden b/internal/compress/flate/testdata/huffman-null-max.golden Binary files differnew file mode 100644 index 00000000..db422ca3 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-null-max.golden diff --git a/internal/compress/flate/testdata/huffman-null-max.in b/internal/compress/flate/testdata/huffman-null-max.in Binary files differnew file mode 100644 index 00000000..5dfddf07 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-null-max.in diff --git a/internal/compress/flate/testdata/huffman-null-max.sync.expect b/internal/compress/flate/testdata/huffman-null-max.sync.expect Binary files differnew file mode 100644 index 00000000..c0816514 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-null-max.sync.expect diff --git a/internal/compress/flate/testdata/huffman-null-max.sync.expect-noinput b/internal/compress/flate/testdata/huffman-null-max.sync.expect-noinput Binary files differnew file mode 100644 index 00000000..c0816514 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-null-max.sync.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-null-max.wb.expect b/internal/compress/flate/testdata/huffman-null-max.wb.expect Binary files differnew file mode 100644 index 00000000..c0816514 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-null-max.wb.expect diff --git a/internal/compress/flate/testdata/huffman-null-max.wb.expect-noinput b/internal/compress/flate/testdata/huffman-null-max.wb.expect-noinput Binary files differnew file mode 100644 index 00000000..c0816514 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-null-max.wb.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-pi.dyn.expect b/internal/compress/flate/testdata/huffman-pi.dyn.expect Binary files differnew file mode 100644 index 00000000..e4396ac6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-pi.dyn.expect diff --git a/internal/compress/flate/testdata/huffman-pi.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-pi.dyn.expect-noinput Binary files differnew file mode 100644 index 00000000..e4396ac6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-pi.dyn.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-pi.golden b/internal/compress/flate/testdata/huffman-pi.golden Binary files differnew file mode 100644 index 00000000..23d8f7f9 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-pi.golden diff --git a/internal/compress/flate/testdata/huffman-pi.in b/internal/compress/flate/testdata/huffman-pi.in new file mode 100644 index 00000000..efaed434 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-pi.in @@ -0,0 +1 @@ +3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420198938095257201065485863278865936153381827968230301952035301852968995773622599413891249721775283479131515574857242454150695950829533116861727855889075098381754637464939319255060400927701671139009848824012858361603563707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104752162056966024058038150193511253382430035587640247496473263914199272604269922796782354781636009341721641219924586315030286182974555706749838505494588586926995690927210797509302955321165344987202755960236480665499119881834797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548161361157352552133475741849468438523323907394143334547762416862518983569485562099219222184272550254256887671790494601653466804988627232791786085784383827967976681454100953883786360950680064225125205117392984896084128488626945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645995813390478027590099465764078951269468398352595709825822620522489407726719478268482601476990902640136394437455305068203496252451749399651431429809190659250937221696461515709858387410597885959772975498930161753928468138268683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244136549762780797715691435997700129616089441694868555848406353422072225828488648158456028506016842739452267467678895252138522549954666727823986456596116354886230577456498035593634568174324112515076069479451096596094025228879710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821682998948722658804857564014270477555132379641451523746234364542858444795265867821051141354735739523113427166102135969536231442952484937187110145765403590279934403742007310578539062198387447808478489683321445713868751943506430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675142691239748940907186494231961567945208095146550225231603881930142093762137855956638937787083039069792077346722182562599661501421503068038447734549202605414665925201497442850732518666002132434088190710486331734649651453905796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007230558763176359421873125147120532928191826186125867321579198414848829164470609575270695722091756711672291098169091528017350671274858322287183520935396572512108357915136988209144421006751033467110314126711136990865851639831501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064204675259070915481416549859461637180
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-pi.sync.expect b/internal/compress/flate/testdata/huffman-pi.sync.expect Binary files differnew file mode 100644 index 00000000..e4396ac6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-pi.sync.expect diff --git a/internal/compress/flate/testdata/huffman-pi.sync.expect-noinput b/internal/compress/flate/testdata/huffman-pi.sync.expect-noinput Binary files differnew file mode 100644 index 00000000..e4396ac6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-pi.sync.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-pi.wb.expect b/internal/compress/flate/testdata/huffman-pi.wb.expect Binary files differnew file mode 100644 index 00000000..e4396ac6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-pi.wb.expect diff --git a/internal/compress/flate/testdata/huffman-pi.wb.expect-noinput b/internal/compress/flate/testdata/huffman-pi.wb.expect-noinput Binary files differnew file mode 100644 index 00000000..e4396ac6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-pi.wb.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect b/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect Binary files differnew file mode 100644 index 00000000..09dc798e --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect diff --git a/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput Binary files differnew file mode 100644 index 00000000..0c24742f --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-rand-1k.golden b/internal/compress/flate/testdata/huffman-rand-1k.golden Binary files differnew file mode 100644 index 00000000..09dc798e --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-1k.golden diff --git a/internal/compress/flate/testdata/huffman-rand-1k.in b/internal/compress/flate/testdata/huffman-rand-1k.in Binary files differnew file mode 100644 index 00000000..ce038ebb --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-1k.in diff --git a/internal/compress/flate/testdata/huffman-rand-1k.sync.expect b/internal/compress/flate/testdata/huffman-rand-1k.sync.expect Binary files differnew file mode 100644 index 00000000..09dc798e --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-1k.sync.expect diff --git a/internal/compress/flate/testdata/huffman-rand-1k.sync.expect-noinput b/internal/compress/flate/testdata/huffman-rand-1k.sync.expect-noinput Binary files differnew file mode 100644 index 00000000..0c24742f --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-1k.sync.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-rand-1k.wb.expect b/internal/compress/flate/testdata/huffman-rand-1k.wb.expect Binary files differnew file mode 100644 index 00000000..09dc798e --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-1k.wb.expect diff --git a/internal/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput b/internal/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput Binary files differnew file mode 100644 index 00000000..0c24742f --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect b/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect Binary files differnew file mode 100644 index 00000000..881e59c9 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect diff --git a/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput Binary files differnew file mode 100644 index 00000000..881e59c9 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-rand-limit.golden b/internal/compress/flate/testdata/huffman-rand-limit.golden Binary files differnew file mode 100644 index 00000000..9ca0eb1c --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-limit.golden diff --git a/internal/compress/flate/testdata/huffman-rand-limit.in b/internal/compress/flate/testdata/huffman-rand-limit.in new file mode 100644 index 00000000..fb5b1be6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-limit.in @@ -0,0 +1,4 @@ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +øvH +
%¯Âþè ëÉ·ÅÞê}ç>ÚßÿlsÞÌçmIGH°èò1YÞ4Ž[åà 0Â[|]o#© +Œ-#ŸÙíulßýpfæîÙ±nYÕÔYwC8ɯ02 F=gn×rN!OÆàÔ{¥ökÜ*w(ýŽbÚ ç«kQC9/ lu>ô5ýC.÷€uÚê diff --git a/internal/compress/flate/testdata/huffman-rand-limit.sync.expect b/internal/compress/flate/testdata/huffman-rand-limit.sync.expect Binary files differnew file mode 100644 index 00000000..881e59c9 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-limit.sync.expect diff --git a/internal/compress/flate/testdata/huffman-rand-limit.sync.expect-noinput b/internal/compress/flate/testdata/huffman-rand-limit.sync.expect-noinput Binary files differnew file mode 100644 index 00000000..881e59c9 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-limit.sync.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-rand-limit.wb.expect b/internal/compress/flate/testdata/huffman-rand-limit.wb.expect Binary files differnew file mode 100644 index 00000000..881e59c9 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-limit.wb.expect diff --git a/internal/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput b/internal/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput Binary files differnew file mode 100644 index 00000000..881e59c9 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-rand-max.golden b/internal/compress/flate/testdata/huffman-rand-max.golden Binary files differnew file mode 100644 index 00000000..47d53c89 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-max.golden diff --git a/internal/compress/flate/testdata/huffman-rand-max.in b/internal/compress/flate/testdata/huffman-rand-max.in Binary files differnew file mode 100644 index 00000000..8418633d --- /dev/null +++ b/internal/compress/flate/testdata/huffman-rand-max.in diff --git a/internal/compress/flate/testdata/huffman-shifts.dyn.expect b/internal/compress/flate/testdata/huffman-shifts.dyn.expect Binary files differnew file mode 100644 index 00000000..7812c1c6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-shifts.dyn.expect diff --git a/internal/compress/flate/testdata/huffman-shifts.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-shifts.dyn.expect-noinput Binary files differnew file mode 100644 index 00000000..7812c1c6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-shifts.dyn.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-shifts.golden b/internal/compress/flate/testdata/huffman-shifts.golden Binary files differnew file mode 100644 index 00000000..f5133778 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-shifts.golden diff --git a/internal/compress/flate/testdata/huffman-shifts.in b/internal/compress/flate/testdata/huffman-shifts.in new file mode 100644 index 00000000..7c7a50d1 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-shifts.in @@ -0,0 +1,2 @@ +101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010
+232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-shifts.sync.expect b/internal/compress/flate/testdata/huffman-shifts.sync.expect Binary files differnew file mode 100644 index 00000000..7812c1c6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-shifts.sync.expect diff --git a/internal/compress/flate/testdata/huffman-shifts.sync.expect-noinput b/internal/compress/flate/testdata/huffman-shifts.sync.expect-noinput Binary files differnew file mode 100644 index 00000000..7812c1c6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-shifts.sync.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-shifts.wb.expect b/internal/compress/flate/testdata/huffman-shifts.wb.expect Binary files differnew file mode 100644 index 00000000..7812c1c6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-shifts.wb.expect diff --git a/internal/compress/flate/testdata/huffman-shifts.wb.expect-noinput b/internal/compress/flate/testdata/huffman-shifts.wb.expect-noinput Binary files differnew file mode 100644 index 00000000..7812c1c6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-shifts.wb.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-text-shift.dyn.expect b/internal/compress/flate/testdata/huffman-text-shift.dyn.expect Binary files differnew file mode 100644 index 00000000..71ce3aeb --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text-shift.dyn.expect diff --git a/internal/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput Binary files differnew file mode 100644 index 00000000..71ce3aeb --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-text-shift.golden b/internal/compress/flate/testdata/huffman-text-shift.golden Binary files differnew file mode 100644 index 00000000..ff023114 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text-shift.golden diff --git a/internal/compress/flate/testdata/huffman-text-shift.in b/internal/compress/flate/testdata/huffman-text-shift.in new file mode 100644 index 00000000..cc5c3ad6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text-shift.in @@ -0,0 +1,14 @@ +//Copyright2009ThGoAuthor.Allrightrrvd.
+//UofthiourccodigovrndbyBSD-tyl
+//licnthtcnbfoundinthLICENSEfil.
+
+pckgmin
+
+import"o"
+
+funcmin(){
+ vrb=mk([]byt,65535)
+ f,_:=o.Crt("huffmn-null-mx.in")
+ f.Writ(b)
+}
+ABCDEFGHIJKLMNOPQRSTUVXxyz!"#€%&/?"
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text-shift.sync.expect b/internal/compress/flate/testdata/huffman-text-shift.sync.expect Binary files differnew file mode 100644 index 00000000..71ce3aeb --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text-shift.sync.expect diff --git a/internal/compress/flate/testdata/huffman-text-shift.sync.expect-noinput b/internal/compress/flate/testdata/huffman-text-shift.sync.expect-noinput Binary files differnew file mode 100644 index 00000000..71ce3aeb --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text-shift.sync.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-text-shift.wb.expect b/internal/compress/flate/testdata/huffman-text-shift.wb.expect Binary files differnew file mode 100644 index 00000000..71ce3aeb --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text-shift.wb.expect diff --git a/internal/compress/flate/testdata/huffman-text-shift.wb.expect-noinput b/internal/compress/flate/testdata/huffman-text-shift.wb.expect-noinput Binary files differnew file mode 100644 index 00000000..71ce3aeb --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text-shift.wb.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-text.dyn.expect b/internal/compress/flate/testdata/huffman-text.dyn.expect new file mode 100644 index 00000000..d448727c --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text.dyn.expect @@ -0,0 +1 @@ +Ë_Kó0Åñëòœê`KÇó0AasÄ)^Hþ²¥IÉbß»¬_>ç4
a¢=Í-^
á1`_² 1 ìÃÌ Å:ÁYÓà-F66!
A
`Îa€è©C;Aâþô°Nyr4ßUä!¡€GKСøÖ#ÂóÓáør:B[G3Ω.òLè¥õ×¶ýbFRuM]Œ^â³Å(#ZìÐËÕíi
íöÿvÉÙB¯ð
»BH2S]¢u/ýÚçÖœüÖWóTŒG©nýrö
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-text.dyn.expect-noinput new file mode 100644 index 00000000..d448727c --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text.dyn.expect-noinput @@ -0,0 +1 @@ +Ë_Kó0Åñëòœê`KÇó0AasÄ)^Hþ²¥IÉbß»¬_>ç4
a¢=Í-^
á1`_² 1 ìÃÌ Å:ÁYÓà-F66!
A
`Îa€è©C;Aâþô°Nyr4ßUä!¡€GKСøÖ#ÂóÓáør:B[G3Ω.òLè¥õ×¶ýbFRuM]Œ^â³Å(#ZìÐËÕíi
íöÿvÉÙB¯ð
»BH2S]¢u/ýÚçÖœüÖWóTŒG©nýrö
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.golden b/internal/compress/flate/testdata/huffman-text.golden new file mode 100644 index 00000000..6d34c61f --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text.golden @@ -0,0 +1,3 @@ +ÀAKó0ðóxŸÃZØÚñŸLPØaÎ!xâADÒöI&#IEüîþÇp]¢LÆ¿íö¯Fðp² 1Õ88h¢$³ô5SÓà- F66!
)v.ô0Y¢í
ûóÃ&åÅ SÓÀÙN|d£2:åÑ +t|ëàùéxz9 骺£²É×3 +&&=ù£²Ÿ¬ðÃŽUD=Fuòã³]²¬q³ÛýßUL+œÆîö©>FQYÊÂLZÊoüäÜfTßµõEÅŽÒõ{ŽYʶbúeú
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.in b/internal/compress/flate/testdata/huffman-text.in new file mode 100644 index 00000000..73398b98 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text.in @@ -0,0 +1,13 @@ +// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "os"
+
+func main() {
+ var b = make([]byte, 65535)
+ f, _ := os.Create("huffman-null-max.in")
+ f.Write(b)
+}
diff --git a/internal/compress/flate/testdata/huffman-text.sync.expect b/internal/compress/flate/testdata/huffman-text.sync.expect new file mode 100644 index 00000000..d448727c --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text.sync.expect @@ -0,0 +1 @@ +Ë_Kó0Åñëòœê`KÇó0AasÄ)^Hþ²¥IÉbß»¬_>ç4
a¢=Í-^
á1`_² 1 ìÃÌ Å:ÁYÓà-F66!
A
`Îa€è©C;Aâþô°Nyr4ßUä!¡€GKСøÖ#ÂóÓáør:B[G3Ω.òLè¥õ×¶ýbFRuM]Œ^â³Å(#ZìÐËÕíi
íöÿvÉÙB¯ð
»BH2S]¢u/ýÚçÖœüÖWóTŒG©nýrö
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.sync.expect-noinput b/internal/compress/flate/testdata/huffman-text.sync.expect-noinput new file mode 100644 index 00000000..d448727c --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text.sync.expect-noinput @@ -0,0 +1 @@ +Ë_Kó0Åñëòœê`KÇó0AasÄ)^Hþ²¥IÉbß»¬_>ç4
a¢=Í-^
á1`_² 1 ìÃÌ Å:ÁYÓà-F66!
A
`Îa€è©C;Aâþô°Nyr4ßUä!¡€GKСøÖ#ÂóÓáør:B[G3Ω.òLè¥õ×¶ýbFRuM]Œ^â³Å(#ZìÐËÕíi
íöÿvÉÙB¯ð
»BH2S]¢u/ýÚçÖœüÖWóTŒG©nýrö
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.wb.expect b/internal/compress/flate/testdata/huffman-text.wb.expect new file mode 100644 index 00000000..d448727c --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text.wb.expect @@ -0,0 +1 @@ +Ë_Kó0Åñëòœê`KÇó0AasÄ)^Hþ²¥IÉbß»¬_>ç4
a¢=Í-^
á1`_² 1 ìÃÌ Å:ÁYÓà-F66!
A
`Îa€è©C;Aâþô°Nyr4ßUä!¡€GKСøÖ#ÂóÓáør:B[G3Ω.òLè¥õ×¶ýbFRuM]Œ^â³Å(#ZìÐËÕíi
íöÿvÉÙB¯ð
»BH2S]¢u/ýÚçÖœüÖWóTŒG©nýrö
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-text.wb.expect-noinput b/internal/compress/flate/testdata/huffman-text.wb.expect-noinput new file mode 100644 index 00000000..d448727c --- /dev/null +++ b/internal/compress/flate/testdata/huffman-text.wb.expect-noinput @@ -0,0 +1 @@ +Ë_Kó0Åñëòœê`KÇó0AasÄ)^Hþ²¥IÉbß»¬_>ç4
a¢=Í-^
á1`_² 1 ìÃÌ Å:ÁYÓà-F66!
A
`Îa€è©C;Aâþô°Nyr4ßUä!¡€GKСøÖ#ÂóÓáør:B[G3Ω.òLè¥õ×¶ýbFRuM]Œ^â³Å(#ZìÐËÕíi
íöÿvÉÙB¯ð
»BH2S]¢u/ýÚçÖœüÖWóTŒG©nýrö
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-zero.dyn.expect b/internal/compress/flate/testdata/huffman-zero.dyn.expect Binary files differnew file mode 100644 index 00000000..dbe401c5 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-zero.dyn.expect diff --git a/internal/compress/flate/testdata/huffman-zero.dyn.expect-noinput b/internal/compress/flate/testdata/huffman-zero.dyn.expect-noinput Binary files differnew file mode 100644 index 00000000..dbe401c5 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-zero.dyn.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-zero.golden b/internal/compress/flate/testdata/huffman-zero.golden Binary files differnew file mode 100644 index 00000000..5abdbaff --- /dev/null +++ b/internal/compress/flate/testdata/huffman-zero.golden diff --git a/internal/compress/flate/testdata/huffman-zero.in b/internal/compress/flate/testdata/huffman-zero.in new file mode 100644 index 00000000..349be0e6 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-zero.in @@ -0,0 +1 @@ +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
\ No newline at end of file diff --git a/internal/compress/flate/testdata/huffman-zero.sync.expect b/internal/compress/flate/testdata/huffman-zero.sync.expect Binary files differnew file mode 100644 index 00000000..dbe401c5 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-zero.sync.expect diff --git a/internal/compress/flate/testdata/huffman-zero.sync.expect-noinput b/internal/compress/flate/testdata/huffman-zero.sync.expect-noinput Binary files differnew file mode 100644 index 00000000..dbe401c5 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-zero.sync.expect-noinput diff --git a/internal/compress/flate/testdata/huffman-zero.wb.expect b/internal/compress/flate/testdata/huffman-zero.wb.expect Binary files differnew file mode 100644 index 00000000..dbe401c5 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-zero.wb.expect diff --git a/internal/compress/flate/testdata/huffman-zero.wb.expect-noinput b/internal/compress/flate/testdata/huffman-zero.wb.expect-noinput Binary files differnew file mode 100644 index 00000000..dbe401c5 --- /dev/null +++ b/internal/compress/flate/testdata/huffman-zero.wb.expect-noinput diff --git a/internal/compress/flate/testdata/null-long-match.dyn.expect-noinput b/internal/compress/flate/testdata/null-long-match.dyn.expect-noinput Binary files differnew file mode 100644 index 00000000..8b92d9fc --- /dev/null +++ b/internal/compress/flate/testdata/null-long-match.dyn.expect-noinput diff --git a/internal/compress/flate/testdata/null-long-match.sync.expect-noinput b/internal/compress/flate/testdata/null-long-match.sync.expect-noinput Binary files differnew file mode 100644 index 00000000..8b92d9fc --- /dev/null +++ b/internal/compress/flate/testdata/null-long-match.sync.expect-noinput diff --git a/internal/compress/flate/testdata/null-long-match.wb.expect-noinput b/internal/compress/flate/testdata/null-long-match.wb.expect-noinput Binary files differnew file mode 100644 index 00000000..8b92d9fc --- /dev/null +++ b/internal/compress/flate/testdata/null-long-match.wb.expect-noinput diff --git a/internal/compress/flate/testdata/partial-block b/internal/compress/flate/testdata/partial-block new file mode 100644 index 00000000..b14e816a --- /dev/null +++ b/internal/compress/flate/testdata/partial-block @@ -0,0 +1 @@ +ÊHÍÉÉ×Q(Ï/ÊI
\ No newline at end of file diff --git a/internal/compress/flate/testdata/regression.zip b/internal/compress/flate/testdata/regression.zip Binary files differnew file mode 100644 index 00000000..73cf8403 --- /dev/null +++ b/internal/compress/flate/testdata/regression.zip diff --git a/internal/compress/flate/testdata/tokens.bin b/internal/compress/flate/testdata/tokens.bin new file mode 100644 index 00000000..b93c6968 --- /dev/null +++ b/internal/compress/flate/testdata/tokens.bin @@ -0,0 +1,63 @@ +<mediawiki xmlns="http://www..org/xml/export-0.3/"°:xsiŽw32001/XMLSchema-instance" xsi:sLocationóÎ šÈ.xsd" version="0.3:lang="en"> + <siteinfo> + name>Wikipú</¢base>ëen.¶š/Main_Page</¬·generator>MediaWiki 1.6alpha</cžfirst-letter</ <spaces key="-2"é</¬«¬1">SpecialÀ0" /ɪ1">TalkÅÀ2">User©À3© tØÆ4">ÜÝÀ5®âÊ6">ImageÀ7ªÚÊ8µœÀ9®âÊ10">TemplateâÀ1Ó®âÌ2">HelpÞÂ3ªÚÌ4">CategoryœÂ5®âÌ00">PortÄ101àŠÃs·Òõ<pag<title>AaA</id>1</idreviÔ32899315ŠÎmestamp>2005-12-27T18:46:47ZãØ <contribu<userJsmethers</Šid>614213«ìÓ <text xml:Ã="preserve">#REDIRECT [[AAA]]</¬æ</û</ºÃ lgeriAãÒ€ Ç€18063769§Ç07-03T11:13:13ZãÇØDocuª802ýÀ€minorcomment>adding cur_id=5: {{R from CamelCase}}Ò®ÎÔa]]ÔŠ§ämericanSamoaÉ6°9ÔË4:1õ
äŒto°6 ÛŠ°èÝ Þ·ppliedEthics·8·º5898943Š·2-02-25T15:43:11·Ðip>Conø script</iäÐÔAutomated cÔÁ¿äŽ ethics]] +³èccessibleComputing10ºŠº°3-0422:18:38ZäèØAms80¬ª75ÔFixing redirectŒäÌ_cÍôìdž ¹ºê 9-22T16:02:5ændre Engelsª300ÞÀ
×ɶda programming uageéìnarchismï2Ãø
€4213683ï6-û
T01:41:25ïâCJames7ßìŠ83238«Î©¢èª/* t Communism */ too many bracketsÙÊ{{Öm}} +'''åm''' originas a term of abuse ¯ usgainst early [[work®class]] [[radical]]s includthe [[DiggerŠofEnglish Revolution]] and±sans-culotte|''s''ÈFrencÇ.[Îuk.encarta.msn.com/encyclop_761568770/£.html] Whilstis stillin a pejorative wayódescribe ''"any act thaÆviolent means³troyéorganiz¿¢©societyË''<ref>àwww.cas.sc.edu/socy/faculty/deflem/zhistorintpolýá History of Intúµ£al Police CoopeÞon],ðfinal protocols¥¢ÈConference¥Rome for®Social Defense AÛ
tsŸ, 1898</÷, it h©
lso been taken up posilabel by self-definÅ
æts. + +The word '''is [[etymology|derived]]À[[Greekö|Greek]] ''[[Wik¶ary:&#945;57;6163±45;|Æö]]'' (ªwithout [[÷üs (ruler, chief, king)).m§[[politð philosophy]], is¡beliefù
''Ùs'' are unneðaryñshould be aboed, althoughÞre·differÕi±
pret
sßwha®isÚ°«referñrelï[[s
moveŒ]]s)°advÆ©eü elimièauthoritarian instituïs, particul£
[[state]].Å
ŒìšeìšDª
iÐ_of_±
m] on WÈ, aed 2006öŒ ð
Õ[[öÛmostÜts it, does not imply [[chaos]], [[nihilism]], orÔnomie]], but raÄ a harmoniouàanti-Í]]ò. In pla»ÛregardÇs
structuresÜcoercive ecoc²ÆtsõalšØ õ«d upon [[¯ntŒ
s©£autonomßindividuals, [[mutual aid£õ
[[govünce]]. Ñ +üeüeasily Ìbyit is ÜÔÑù
offerî¢Âœtheyve to truly freHowever, ideÿb¿how an«t»ety mightík vèconsiderably, esly rt§s; µeÇdisagreªýŠ÷be brou§. + +== Oípredecessors == + +[[Peter Kropotkin|³os, arguaéfo
corded [[úory³humanÁ wasâed oµprinciples.å¶®.ì[[MäAid: A Factor¿E²
¥
ª'', 1902.MùthropologÂfollow»
öævhunter-gašer bands wÆegalµÿ
lÊd di
§l«rÁumud weÖdecree¹wå
d had eq×óŸresourcÈFriedrich»|€Freiʵarx¡Ùve//Ü
s/1884/oØ-family/indexæðŠFamiþ
PrivÞProperty,°S®¥884ÉÄ +[[þ°:WilliamGodwin.jpg|thumb|right|150px|£ €]] + +Ætœ€y OŠ
sÚÞrray Rothbard|]] findattitude¢[[TaoisºÆ[[H¡China|AnciÕ¬(Toronto)¶á§Â.''ð¬pril 14]] [[2002Ütoxicpop.co.uk/library/tÎÁT¡ mirrorŸgeocitiesÚSoHo/5705¿anÉ Vanity siteÂÎà°, üëlewrockwellr/a×-chineseÑðese LiberÒTÈé¬an extrËÃ
¢mise¥journals/jls/9_2_3.pdfceptRolâºllecÀ
¢Change Toward Laissez FaireÒJôÌÍStudies, 9 (2) Fall 1990Ô éŽ found similarþØ
stoicism|Û[Zenò CitiumÓAccord¢oÇ, ZenorepudiïomnipotºsýùtÙ²n¢ñ
regi¿«ÜoclaimÐsäeigntymoral law°. ô
b¹crayonö
îµ.jspœbritt1910Àl, wen by ðãEa Britannica, 1910]ŽÀ[[AnabaptistÄof 16th century Euãœsomeð©ñreligi·Írunn¬of modernÚm. [[BertråRussellin û''ÛWestŒPÊwrites®Ÿs
îsincñey helÕŸgood€ will be guiat y mo¬[[the Holy Spirit]]...[f]ºpreÅy arrive a€î€¥².¡»|œÿºÒ'' in ''A¡Çä
connecÈ ùal circum«ÅsÀºÊ¥iest
®¹¹nt da®1945«Î¥ (True Levºrs)|rЊé׬àtic dur¶¥ime
ŒŠCivil War
ð€é¡ô
®×
ºÌìzpubånotes/aan-ßÙœ
Üt Timeline],áɲ94ïÓ +InÙïeraÒ©to§toçÞthingn î[[Louis-Armand de Lom d'Arce de Lahontan, Baron |¹ ]]ôNouveaux voyages dݧl'Amérique septentrãŠeØ(1703), w© heœšÜäNÛšò·øÿUniš©s|indigenº©ÆÝóhad noÄlaw«¡risŽ¡rsâppŽrÀas beó¢
yº÷ªeµ°.lib.virginia©cgi-local/DHI/dhi.cgi?id=dv1-12 Dic©ŠÛ
öIç- ANARCHISM]ø³â Means¯¥lÊѬleader¢Ø§ IndãMÿ£,Ûšrepely±at hœòùsoü°[his³cestorsä
1793¬êºkø¢Èۢà pub€Š ''An EnquiryÙrning«§Justice©web.bilkent.edu.tr/On·Ë¬e¯.upennÂjlynch/Frank/Ç/pjtpÛ]. A§§ did noíäàªÀú
˱ÞrÒhavá£book
majoµ ÷ÿŠže鲩Àm Buµõpoint no¥tyet exist£©®±ãtïìknown mainly¯šult hurl¬[[bourgeoi¶[[GirondiÓ
mà ݲ el«
·rȲ. + +==TŽ®®ôÕ==ÌPierre_Joseph_ProudhoÕ110px|thumb|left|« «Â«]] +{{ma¥rticles|[[Ö-ª¹ïîž(õ£ory)]]}} + +Itý£oÂùit wasn't until¿ê ®[[WÉisù?®in 1840œÑîØadopÓÝŒp. Iâì±iÏasoòaŠme ¹Åó¬
úorªÃ€Ìnsw»Öa©Ó¢Ï×[[¬thefäI®is â¡oppoªÐª³òŒ£¹ž (propriétéëown¡ï¢¿lete rightº¿ß¹ýi£¶êÁšsh, such as exploiõ¿ûképrofitæ éÍ=ÖpÁ
-Øâ¶Òš|ñ
, õÞìïÚ¿€
±×/subject/ø
s/ü/ch03.htm
p3. L£Šû effiâ ca±¡doöˬ¿¢š±·õ¯ supporѬhe caê'possesß§' -âs canlimiº·¥ÈƧ, capitã§ndíúaÔãé
Àªõšö¢³j¬±'s õšå«œµ¯º] (¯ellisme), involžÂxcŠ¡Ì¯Óîgroups cíµtrad¡produc¢irªr usÃ
''ÿ''ÐreŠe amount of±·°€inæ TÛensure¹¡n²°®âofès. WÝÏ«°ly join toge€öco-Í
Ëshop·Å·est-» bankúb¯t upæprovideͰ°ð«¿äì²£ñinfluentialÉin ø¹ÑÁñí¯ê¬iÆllowÐÉacÌ[[R¥1848¢ºce.ýø€x:â deve®ÁÊa numbЮs overlifÜ¡ýô¡ŽºŽofÀ£ÂForòdetail£¯scuÅ seêìŠ|õ
]].'' + +==Max Stirner's EgѪ==€ š®åÝEgoIts Own''ã³ù±st¶«® Àß®noÌ ofà§¿
îa¥, natur¢žïãlþ ®
Íû¶-ŒmilluÕŽ ''ghostsÕ
iøsay
¬¥§ç°¶tóÏHeݹd eÜÆamÜamorêåØuû
'û¹ùÔts' ©whe©¬ irôÚdo so. For himësäŒcomeéžµÀØž:þWhoå¥és¹toÄ,defe€éghim¶µongð¥Ü§A®ÙWÈI¥in my powÂáçÀyÉ. So longIŒert myìalder, I aÇòrie· + +³ nØÈhimÐ-«Ü
÷Îè ''. N¡theless,ê
o«nyö¥¥»-€ôŒê£ÃÃçÁú¿divers==ÅÙéÚŠBenjaminTuckeræµàµüš¥ Š]]€î¿ÒÈÛ³ š1825 [[Josiah Warreë å¢ÄipèšÂÌÏÞ²]] experiÅ heaì«RobºOwe€öÎŒNew Harmon§Ãfàfew years amidst mucÏËconflict.š blamšýÖ«ty'sÒoæâ¯[[Å°ššž€. î±eedoþœseØÃš©iesèÀŒ³Ú€Äܱ ÀŠÅ[[Utopia (ïy)|×[[M÷¯T¬Ä³õ33ÔwrotÞ Peaceful istÌÐòøÑb«periodÿûé. ï
£íatö¡ÙmaníóεŒfor矊doctrinwÑϺª¢(''à¶y'' XIV (Dece®00):1)Í becam÷¡ýáeetò߯ŠB. òÎnïÝediÏÔšAugustÔŸ1Œ908;¬ïide÷¬Ån¬°Ûist-œ®issýõÀØŠ°Ð. «'òºößincorporaçÕ°€aáÆÜþorists: 'ÚØÑ|Îalö¡Þ;¯cÉíŠàprice|ªõheterodoxics|]]³[[l㜯æ¡åvalue]]);í]]'s marke»»Á'õ¯mÔaÑ[[HerÝSpencŠ€º²Ådom£ÓÌong¬éª'sÕ·ÍãeÛ¿ä[[ó®ÊàÆÈa»Ÿ¡tÆ-paó¢§ü|첡걡ܻŒî37¡¡«Pay â®ó¿y: Sel²·s F·Wr²£Ï
¡ R.Ð,ÆÂguard Press, Yorkò26, Kraus Re ŸÞ., Millwood, NYŠ73.Ë¡[[Ê
Ò§€üþ¥«ûâàø€ÎÊsystem¡Î
Á°Že abun²¡of ëpetèÂÜèÔºfÅÒ Ý°÷receivfull³Ûîérõ¡O 19Ÿ¹
ÉÛludîLysa³®Spoon²[[StepÒP¹¥rewŒÊ¡[[ViêÌYarrosþ¬FïýÛÃrnaéalÛakunin»é¬ã¬ØÍÙ£|Mikhail 1814-1876ë¬óª¡ingmÝAµÓÜáÍM¯§ìÀÀŽrsh reöon þû®À
û. Twentyí±ž64£ÊŽÜ¶ŠÀ'Ý ',¢ëmôÃanšó¶currenÿö
×. DâÑoÍgenuinénks÷ေœî×ôsign«©ant
østarKarlû]] ·a¶·figÈÜ:Ño eøsucõ¿Gene«Counci ÈŸÀ. ÂȲo«÷×to ¶Á¹ž±|±t«³who©®ÊÔ
 ShortŽfЫ[[Ø
°ársÔŠ1868Ìü êpolari®¯into two camps,äé
ËÆirìÙôs®š³±earÿЀà betwãáëȱÒÿyŒÕäÚrýûfavoured (inÊ'ï rds)€ð©åÛggle饬À¡æÿè²àŠÄpar¿ÒeÊÛgi¿Ê A¿ŽùÂ
ëfocÕçonÎÅity. + +écha¿ÎeÐéa²×àî€ÙiÙ
iìºða¥þo¡®ðœÃÈ«endʪȿa²µ®[[rulØü¿ëÚÈa¶ÚØst²ì¿¬
|Û ÕŠlñcyc¯Ìhp/adpage.phpÀ1969 S¿Í®Ž±±1872±Ü climaxЬ§éspli¢ÛŠ
two¯ÛHague CongÛ (1872)|µ®iáÂÍcµ±Îÿ×ÚÒt_Í_to_Žm|¡n®öªñ€Ê˵˹Åêdemocracy|§tÑòæn©À[[ŽãÓ©®Â''ðš¥d€nÐÄ®»ÇlòÆ
of £-w»Äés|''.]]€¿¡€°¬|Ù}}Š€ÌП͚€äÚÚ€Í
bothÇæ
µÕtíÜ¡âì1870±
ðÛ¬d awayÒö'sŸi (ïÏco¢Ö±Þm)žembracÑýª€Ós.ËtÈÇå㟳² 룻Ï×iveµÝÈ
·bßsÝëÌneed,£èûÙnefac.net/node/157ÝûÍÚž¬¶ÀDéjacqueø€perÓŸä«be£ªÚÜ
ò
ªÇ±ôÇãóonbooks²bleed/ÑÍÕ/De€Î×ÛªíÆjoseph.d¬.éµ.fr/ecrits/lettreapjpÀ De l'être-¹äõ¬Ã¢le et femelle - L³ à P.Jåµ±] (§Ùáµ€û|]])ØUnlike·¶,Üôªíèää®ÍâŠÛÊÁ² to쮯he Ìsfaöºí°ÔºhatÜ ma®éµir³e³å±nnounce©ø¡ŠÏUS pub
£ed ¢Ý Leaireú58-1861). + +PÁ¡¯,øsee³ëimp¬nü¢, outl×ConqueªBrend FielïœçªÀžºHe fel«-
¥Èbeneål©ýºä,îŽÃµ×Ä·è°97). Subsequºµ êÈ Emma GoldôšÀAlex×Berkman. M©Ìo-syndßÔ(õž²³w) saw£ðõöŸve. Isaac PuentŠ1932ÙþÙo÷rio¶ÓÇbŲSpanýCNTìmanifestoš ëøt-Ñ׬î + +Sÿšsœliked mergÚº² m. Sç
§¹»¢ê ÁaintaÞã¶öº¯Ü®Ô©s
Õñy. ð»exaÇ,ü¢ù¢wþÀess¹Š
óå߀÷
÷ÆzeteticÓmac±àdebates/apx1pubsl],Öšípseudo«.×§Šž/šŠöÌagandaùûdeedÎJohannîØÖ¥¡±¡[[¡ÀîŸnspokºúºßŠªÙÿŠþÍó¬ÊÿõýïÆrayÈíØgerØêá±÷u¥i¹Î²»ÀhighîÉfilôt³sü»ÁÁ[[riot]]s£¢assaýîËÑrreŽý[[tešê¢̡[[ž]]aèýª¢¥encouraged€šÇŸce, ôÌÏÒmb]]КÔÃǧadìî¿|íÿo furtº«Ä¢éùæªen§£'Þ«ÞÚÞ]]'¢One
¿³ýºgÐóŠo saidѧÕÉ©¡äquickeÏdÁÜÔlyŒ³w£çÅanÝûø«ÃŽn¹€ereÜàÖ×ssacøúÒenemØpeople mus÷âÇâðÂÂ{{fact}} í'«¶ÿ³æßošß, dynamite, earÛ¿ŽåükerDŠŸ + +ªªØÛnoënsensuÍ£Þݱegitimacyuti×ÂáøÄýÃ.У¬[[ErricÏ€ß
staŽÇùӞ؊«ÃédesirableÅ
ݧsetýs.ÜÙème §
ùy dºà¯Ùõé. (·îÇOn V»c ÓÐÎĪfNechaevØÎõÖidentifiÙ¥÷o-pacif¬|ÀËä°ÓÔ[[nonv¢Ê[[Leo TolstoÍŸoseËiåvieÝ«Šº[[ChÛ¶ŽàÁ µþ»Ênot³ÁÖŠtÜi©éÇ®ž
«¶ÓÊsee£þ|¬Ç¥ÉFlaÉÈŠ îm.sv¯75pxËéred-and-bÀflag, comÓºÑɿٮ º¥§ÒŒ£ñ«Õ¢î±y 20å±ÿ«ã¥°ÀÁÅ
Ù¥ŠñÙÓÇ׊©é®óÔ pursuѲindö±«®Ù[[g£k³©prim©ö°Øí¢°ºº¡ªghÇŽŸõ + +A¥[[PԬΠe|1871ÕÂÍ]] Ô ¯
«rgedÕõÒcÍ''BoursesëTravailÝÕùÃës°§îâ¶ÖunioãŠvâÝ[[Confédér·Génle du𩜰nfedÎofñ, CGT)¹¶1895¡Æ¡å¶
Â.ÿmìPataudå¯ougetºœô€áCGT saw¢Ÿ®ÖÅ]] ÔŽÔ²ûŸ»Ò. É191ýŽ×³¥·Ámãappeßú[[BolshevÙ. ú-styleÁºaû³ific¢ôô®ª
Ú¶é
o 1921Åremߌ®Spainþã©mid¬ó0Ùß¶øÍÉŸhe Worl¶(IWW),ùâœ1905¥USšoâ¯ÇÿûÐsát€ßush¡¯È¿ÛϲâÈ923 100,000 mÓÆìµdÖ±
ÁÍ£Ú3²åliciëŽÑ¡æ¯À«by rankó̬,Õšody×a s¯øðûhnspi€ÒAnglophªÛð«CNT_tu_votar_y_ellos_decûâÃã
žÇ2004. Reads: DoèletãiÜÖ ôÂlives/ You voÍÐ
cide/ÃaæÛ it/êòy, Aè, Self-managÂ
§ 㢠ÓïÒÇ¥ ¬'s,õ0äýÅÈÌŹssful²ÛÖción NacØŸ delÊbajoÙŸôÚ®Ãur:Ú¡ü
ýPîÑñ÷¯¯ï¢sЊ®aÑ
hipÉ1.58 millΟŸ934
êplayërolè[[£ÿùøŽSeeª:ïñÖ¿ÎÑSµÞkÏRicardo Flores MagónÚkeyºß¥Mexå±ö ßatin÷|£¥ùÖÈåexteüïîZapªta ArmûÜŒ©Ž|¥]] rebªåرÅÞªory occup²îin ArgÖnaBerlin192î¶œëá³îêœÙãå
ô[[žÄ ŒContemporÌËminuµÜù슚
¯áÔ;šÖsmalleíóçs, 20s30s.ŽlargŸÃÊ»ÈÛtodaËùÙÿÀÊÉ®NTîÀÇðæpaid-upà6ÇöÐÈñÕ÷vÖÿŠï©[[s ©ÙîŽê ŽŽØõÆ»€ŒÓÄSolidarity Aÿ¥«KÓ£FÞø×îÚÒÂÄ×ì
óç2»ÃÀ. écÍcŸíôàÎ¥
mïæ€ßé
³ÀžšÇÙÛ«dëàœ°âç¡€ê·. P©Šs×
ÔÀBob BlackÑÜÿ¿€©sœÅŸÄŠÂγManŒÔÛ|ÎÔ£ðŽí«Ž¢ up€â Óýs©Ä
ÂâÔ«¢Û«mainô1917øªœÞ£Šßseis°»eÆÿ¢Ôñ²€íþ ñ«¥Ëàa°åùâÙk»ŽœFebrud Octoberºs£§œÑ€ù§ÔáßßïpÛœŠŠurn«²Ä¢ôñÇž §
ÀÝŸiÄŽ¢áë°ch culðŠ1918 [[KroñÐd¥¢ß ²inâÑœ²ýÂimí
ör d׺derÄ¡ØorÔïvÏѧ¥s¡[[Ukraæ®ì¢ãŽÆäº |civil waãíš³Whiù¥ëùÍMakhnovshÅö°asa®¯ÿ ŒäNë«]]). + +Expðÿï
°ú¢·n㊷ïá¡leav×amongñÆŠÃÊ©sponseË×c
çóïþãu¡ing. BothªÚicÿ÷uóÑiǪ°â¥Š, ãÐxpo×¥îŒÒ£¶themðÍ©¯ÜËò¶u÷ɺøofºÇõÐÊÀõl×oµØ
êâæÉÀÂÁ§resulìéësœäuŽ ÿýç¶ly»ûŠêvã
¡¢ãÁè¬ÞÌ;Š€ë§eÄ£ew ûø³Ï§úŒ¬üÉöšUSŠ¢óº÷Âí¶[[CGTŸñ
IWWã
gægýmselves©ÇÁ¹çÊmÌ|¡ŠœIn ÆŠ¬Éelo Truda]] pŠÒðÍiles çŸ÷Ä
¡aã¢në newÏõ¿µðÒÖ®¯Áý°Tþßß¡Ÿ,¿èŒ[[PlatûŽ¢O¿Ìðž×¬Ò¿Ó
Æâ¯ó²ãÜÈ©®ß¢èÉús. ''üùͺõÙÒ¡±õ®ÂëÙʞ䢿
Þñƶ¬áõ«û'œÄ Ÿù ', 'tacÊ Éväibµ'Ò
' ÜÏ'.ܯ§ñš€ÕI²š¡UK'Á÷°Šèµ [[North EasªóÂéðÀn¹ežöébñ®¡Canadaøfçåfas££û Aµ Žž ëÐCNT-ǵ¶Ù-car-ع¥·á¿¥270px|¡ÁÎ1936²MšèÚèçøµâ carØ¡œ×žŒ»¹µôŠžÑÉÍvÖ|œ Ÿ©]] +Ió1920Þ193ú£ÖiØÈº±ŸÁíãô¶ß©ü sûäÚ§Ÿû¬ç¬³âs,×õcffiíÁ choiceÎÿÍŒ×§ÒØœp»Ÿšè»©¹œ÷©®Soviet-ÝÙì®ÔŽÝtëœy? Luigi Fabbrií¢»¯Itaû²mÏÂrguӌڪ€÷»ØÆóÿÞt: + +:÷ºFá
Œ¹Ý³ð anêàêçžõ,û¥šs, u³¹®ü®©Ýÿ³Èëimaginò¹Ð·Øutæ©glor°Û¬ÙªpžÞÅÚ
·yÝ¿ÃÀí²Âʧ×ÕtÊæÚl¹ðÌŲªÎ© rioØÒËÓÑa '¢eont'ëÖ¢or.univ-montp3.fr/ra_forum/en/ Â/berry_david/m_or_á £Ê£§Ö©ÃÑùŸñto±Í
Ý£¬a£Ýò¥Ìbyó§ôrs ÍÈé ÞÛŠ€ÐñÀžš36,ÊÅ€úÆÐ¥helîbÙ»µbœîá. MonthßêØáŠpon€Ë¢¹
t˧coupñå§ŠŽÆ®Â-39)ü²w³ÜrepüÁ È|Þt-䵚fá
wü¢øaÍmilitiaíšÀrolÂ[[ciÓôÎrcelon£¬ø¥ßЩ·¢øf ruŒ×ÂÃeyÆŒ·ÍzÂó£ý¢ºªêÿ«Š9»®²los˹³Ã
bÁ°àèè×»Ááúžââ÷æö°ÛÃɹ¯µø ¢Íç¶ðš§tro¬ÜŠ ÐÔ
Ù.Äå»troopsœå¹°ÈÛøËÝecu
[[POUM|disï£nàŒržé£¯€çÔ°î197ŸöêÍÉÙneo|©¶I¥Ìª»ŸKingdomžÀ
þÁŽ
aäÿ|òø¥ºß
©]]Ù€ðyÌ©Ðrecÿ¹comb³güþphysí °ŠÉ°rely°Ý¥¶ÊМïí9§°€œžtendééºçüUSm¯µ«ti-RaËïžýUS)¶åKfa]]÷RʵëËÕLeoþÉ®šòöÊ|šÊñ8-1910¶ÕÅÊŠª
³ç¶Ð +äÏ¥t culÝÄïbälifÞoutûË athieÂäuçÇÚšÛì«ò èsŸÀorû§Á±±oÕŒlas°ùí·ïÔŠ ¬ß¬diâ€ØÃÑô£Â¡
ç⟠¢Í ̵íÒØerÚ¡ÐGodÇߨõÊthly
€ü°íöÑ»churcheÁÔÑãÀJesus' teaò©gõ¹öÑticÃäorruÖû¡Ñi¥ódeclÿ€oÂé±Ù. ÙÏï÷šèŸ¥®Þ× cheekœÌricÂ
ÍÑmÖѵ
ºõœ§ÔÄÔ¢¿Š²þïÒ£of God¯¿in YouýïÜ»ÙÛŠaÛ
·baséÒäܺ¯·Ñ÷ÈÈ¶Ä â¬ [[»«ïŽšªy¹·occaø[[taxÅ| tax߻Ω€[[vegeꀯ»všÛ. + +Õ®ñ
Ûç¹rooÈs olí³ñè's birÜÍ[[á
ÿºxhibðÚø
ÀÏÎîî·žalïœÎ. By®obey utteë늺õBibl«æû ×ЩÁçÚÔsixteenÓ»ô¯óµÓð'-ò¹©Ð«¶Ž§iÇŽ©ÞÓ€t¿obeû·ÅÉoÆóá¡rejeû¢ý)±»ûhieräû
÷ª(²indµÑnonŠåÁú®ºÄóÄÞçgod¯çàßÜÕ£iteªtypÍþµó œbeginnÚ««¹iÓØÅabal§€,áÔ¬ßmodelsöÎÅÀÒ£ëÅßÅ鞺ç€Ñ€ò⩜ÌÃÐi-Xu³[[Budd¿ší®óî¡€by ïŸù[[well-fiÇÆâ¯Ì®ªãÏýæÿûŸenv·¬ÒžŠÌ¢»¶µëãØ¹¥óîÌÉaçnù±€äëminâšåStarhawk²hoÂÔe¶ÐtenÉ¿²µÜói¢
[[a¿÷ŪµfemÕÇÛï-4ËÞž¶|Ùa-FÓEõŠõîŠõë[[Jenny d'HéØâurå[[Juliette AdamÖªŸÃÌ
Á[[mysogynóäºó§ößÖué185ÞÔ €¿a-fÄa°æÑî¹
Ø¡åŠÈòªáÊpatriõ]ÎfæâæroblemÚâ𠹌l¹£øŠªµÔáÀÖa¡ÛdúÐб¶©Ô«aÌÍÛç'' dï£ÇÂ70sôî×îõïÄ¿ÚsallydaòîoTwoòªòðÝ¡ - Twoä¥ - Wñ
¿¡:Ó°Þ²tµûÄ,Öãond-wavÜöÅ|«®Ò³ÓÚ,µä
ÍÞ¬Üåû®œ¢öûî©Ïð¹uÜ£Œ®³ú§ÍÏ êÈ
dØœÑðÝ©fema«³Ø¯Šçï©Ë²×ÏyÌëÖÊ·ä×ÁÔ¬ªáîáÔîÔÀìàåÝûȳî
²žÅæÈžâðܹøŒcreñg€ÒÔ¥Õׯ©¹ ÈÃÃþ¥Ë»ìáå§ÍÕ schooðaddÓsÿÖ×Ç«[[Eco-ÔéûßžºâóÞof žæÿÄöþßÑÏ
ßñÊšçø20th-cܧ¯òÕª[[Voltairàde CleyÆŽÏ»šíÄšÄMóÎWollstonecraËÁȬ»°otoøöšews
ÙÊìçÌŠÂprecursÜItïŽb®¹ÇæÓ²ìyŎ該ÄÖîèê÷̵û²Miss÷ìist;±£ist. SöÑðùᢡ²Á±ª¯æ¡rÇ. I make óœar¡îýileԬ̰³Õôß ÏaµÐÿ©ŠñÑì¥õªSÿôŸÃĬtir«lЬ±ÑœÅäºÞ¢æ¶Ûöalw«ú⟱ӢŒï¶ói»žµÜªÚ±€Ï
Ò¿±µFreÚåmené ŸèãÞf
Ì
£
.Û±Þrn day°£ËÐáìªðÍÆëä×ÑgroÝŽ Úof Quiet RumoĢЎéÑŽto sprõÊkiúú¬ª
âè¶êbroå. Wendy McElÔÿüœµ
m takeâèû¯šÒ«°òlíü
websiteýiÛts.net I-Ȭo-íǬªé Smile.JPGÒ¬¬Ø®È-1995)ð}} +
òš«êᯱs-«¥ÿÎÆ¶ðÖªüÝë¿ý
£ö[[fãí§À±ï€Áó§branᩞÈÝŸtÊúçðÝãœÒŒÜÐtÊÓo￳gáÁ³Àù»©¬£ynthÓŠµ[[ÒÔ ï÷
[[AÝîn© µÿgerʲÙך¹Ïʳ°êëùßÀ¥ øÖ©¯Æú-agÅ©œ¿©ªíepçÞó¹LawÃCìôÂÍÔ°³
§(Åž×David ïôm²š)ÊÞacµšJan NarvesÚ)¥Ç¹m|õ§[[Ayn RanÙüÆŒNozick¬ÝŒAÕinleÉ£©®¢æâžÑ·Áݱڱ¹áÛ®õÁoöŸÏ°¥HarÐ
ÌRalph Raico]]ºßúÛ¶ã¢píRôñ¬¯²®ÙüGust§«de MoláiAuberon »Ž]] Ả|Ã, Úë·praxeology.net/MR-GM-PSÿPre×ÉÁÁ³¯P·Ï±SecuͲ¯ÀŠsÿÁ
J. Huston McCulloch, Ož® PaîŒ Se÷Ë #2 (Ri¥d M. E¡ŠÆEditor)ç²:¯ Ce¯Øü¥ÓãòMayœ7ëŽÛÅ-harÔÚ§µ|£ÉÑíóôáy/1787âuÔË꬜œÄ²Ïury''] EcáðPolytechnŽç,rRechercþ§ Ep÷mologie Apple,
é§Ã©e au CNRS (ªõ) OpÃ
ÝÈspuhתœçsÂãMcKaain; Elkin, Gary; Neal, D×
''eò¶infoΣfaq/Ând11ÙÈ Repî
ûEÐd D¡Éæª Bry¥×aÀѺ²ÈT¿y FAQçÎ5.2]§°£¬FAQ V11.2''ºödË20,÷÷6áÌ×ðè̀Ⳝ̯whe¡·ÛŸÉÚlèÆÍÙ¢Ä²àŠ®áiâžb¡þ ü¶å¢ü¶øª š
ï¬ÂÕêî¡m|Gåm|Eco-óñعČôÊû·Ì³ÿuntÀÀÞÄtak¯àÆhe£Áŵ®.Ç¡Ü
[[Ìïç®öÚ[[deepÛȯ«Žldviewþ¯Š»bâÆØÇ€Êå[[s¹µÑßÊË¡§ 튰¡Êearth-äªÖí¿. OfªâíÁ±éÓ Ø°éEÀø³!]]ô¹êakeæãÎñ¢t°sit¬Anoì
àöé[[e¥Åþsee
ÞšãoҰð aphýúªwŽÇÄmŠÉ©ª¯qÆœÄò¹¶òØ¥×óþ ¥§³elf.¥šP§íÈëøìûÁ§vocúa reÝ¿º³pre-§¿usuåagrÐ๯I×ÛÏóªÇ. iséchnï
úÖéŠ[[alieŽ|°ÏîÔ§žáÍÞÁišÝÚçÓ͵ÓÑÓ©á
ŠËLudËꣵþžJean-J¡ªs RoÕþau]]. ¬þüÅÃextÂ߀S÷
£, ¶ÍìäÌŽ×sšJohn ZerzÊíî¬ &mdash;
Û·¥nŒÕß®®¬ÀÐŒved»
º¯ñ€ô¬ñ¡Æ¡ ÔÃÀ'®e'àªuÐ-gaÕÒíÃØughåí'þÿ°aèÄÌáíµÔàÄ µüÿoffshoots== +²à§ecé±c±syncɢ̬ÏÅmeš
196¢Òáá7¢êŽ²Ê£š¯èŠžÈ. »ÍúsãÿïÕÄlàÓ«ÁnàºãÙêìð«°æôңƯbov§ðáµHakim Bey.jpežÏï|Á¹éº*'''üy''ë (¶Àà¥)øñÃê³ãѵڢal û£ -Ū
ñà
ÌÙÇ躹etc. -ÓžscapéæùÒì[[idŒ€ºÀÂ.×èÅ⌷৚weaken§¿Ãattach¶þÚrÓ°ÆÐôª¬Š¯l·Îsueïs («Æti-í÷nuÃÂì)œ°øàÀêÊû¢žÃÈpecËâ×¶©íû¿«ŽÒµÄÔÑÁÜeu.û±êÁìãèšÖspeíÒ£þѺþ¥oadÖ²hunšganŠë ÐÉÔĹͬþ®£ôabsÕÃÄþ±ŽœÝž. IÓ²Æ ÂËøßÝÒñÄ×CÍthInc]]ã÷§gazеÐy: A JÊŽof Dܯe AßáÙ€JøMcQuinÁžÇÿ¡ÉœËsàºrm,ûÍħ''Aö
yõLefÚÂÇœ¬a²mß - ³Ûœ¬ se«ÂíÕ
m.ws/postè圧ž€Æ.ÈùÊïyŸ áîéç¢Ç
poå»Ë©ÙòާSaulýÔÛ§,¹«eivÓí¯Ï¡ÿ¥Ç¯³ÖüëLacÛ''þøfâµa¶ŸÆïøšö¢«ž²ÀªÀúístÅóÓàÌзto×'î©Ü,Úêµ°ÄÉí¶§Ï¬·¬§ÉÞ rödÞäÜŠîÉÞ®å¥ÈƬÀ[[s©ÉÇšånÅšýmoìÑõÐþÀ¡ÇÆÞÒ·þ co¯€âÖúfÈõÔ©å·ïñ,®Ýèssib¶µð°ýÄâdeg ofçÒ±§ÕŒorÛbíoupeïöŽŸrubric. Noneäë±ï¡žè¬»ìóÆÙ
[[ToddÒ§ç
[[Gilles DeleuzÆç[[Félix Guat®â''Ex²÷ŽÏ
³:ß¡
m CinghouseàË «³¬¥Æ¢é
ÜÑÈ
þ¥ÍI÷ï± ¬ŒŒõÃÚÁàof«¥œr×û¹ûí
ïñßlÌëËÇÜ affÔyÒþrryÞø¯¿Úãn¢ºèÒ¹®ŽãºÇò»à²Û®¹ Ú²Áoî
·Žsüo¡ž€
Wolfi L²Ôtreiche¥ïŒfžÈBonĹ
éËüϬ JoÒ®ËìéTÍÒonâŽêÚõÞUS൵§úÄfulΪ٢ÒKngß AbacuÞÎ
ß ªÚ
Sø 'aÒÔ
®¢ÐµÇwûòøïâšunconnÚexts.ò¬ý¬åè¢ÏâÜÒÎiÞ
big 'A'᢮Šl³°spunk.org²À/introͲ¥e/sp001689˵m: I°ÊMethod?].¿ÒŽùŽøŸÃÕ®ØÛ®a°üÅÂóùªerparts;µÉõÅÍa waŒè鳫
é
þΚ³¹¯ÎÿÆåÄA¿¹ÜÚŠsha¯ŸñŠ· ãÈœ€œ¢å·Graeb§Õj Grubacic]] îènÑøªäveÛžª¬Ìœúoàè³r¿
Ô€nãúåžìùÈþãËÄÛ , volu¹ÜßÔîaid±netõÉÆlcru¡Ï¿æó
®èûendÏ€if³eanÑîlÉíbusine¥¬ÜʬoŽzªÓýó±¶à²žoŸðvisã¬Çð€ÜgunÕ«zmag«nt/showÔ.cfm?SÙ£onID=41ŠItemID=4796]Ž€Òs==¹C
Òp¶ØÁ
Óñ€á¡ñç©¿©ù¢ÂñË¿®ÑÄ€œðgö㥜æÇ¯uÈö¿×ÖøòúïñÆmüÇÝæâÍÂù¬¡eØÈ¡Ïž¯žeµâÉÄÚÏæîΡ©â;ÚŠÔ©àô¯ñl goºÉ''cause''Øchaos,¡¥warĪïËàžîÖØºóúnopoÅŰ÷|m°òñáÅ¡õ
advÛeáóeç Much eff¥€døâµ¥ëÒhowŠÚì©Ó«handleöžÃáâ.¶ÞžÃßetyúÖ§òŒ°vereignè
[[¶ á¡
ÑךùÌˬsubjugœ¶Ž³¡°éolor²£Ñéý¢ÜüÒ£ŽŸÇÆ[[Ashanti AŽ×¶[[Lorenzo Komboa Erviö m MbahÛÇt P¶ðC¥§¯a§²eŸ×â
Žcaucas¹ÉÉ
exp»ÈŠ
iÝ÷Šë€«Ÿéè§÷³Ý¯Šó, pï·ùë«¡À§Í¡È¯ÅÆ
ÃÍ«êor ethn¡®ßrvÑùÙûÙöšíÕÜÆ³oàÍprÏ¥î
Õë(Å¥rac̳)¬lø¡åepa¯óÛáÒžé³ÇÄèêÅöý iÓÈÿ °¯Ë·ùnvol²Ü¶Å©òÛcèñonfá³¬ËÆôëoߊëè²Þº×hiapçö³o aüŠ÷¬ŠŸÙc«²ÔNeo×È¡å
Glob®ªèNçÞ¥ŽÍ©«ª³Ää¹¥mptœŒÌÑcoercgî sca¡Œÿ³ÆªÃËü
rld BaníT§ÃOšÇ[[G8|GÕEÅÿÞÌEÏÐ Forumîý¯åmbigužþerm±šÿÚÇÈÕÉÌs¶ÙoÉ ïí¹/§impeëÇÉ(žáÀed)ÓÇreÝÉÁó²°
. O°·sèŒóÍÕèÍ·×ôã··§expansÁšµà¶¡³äèãßásoà²sšžÆvenéµParÍ lÈ Ößótry»§Ûñ¶èÓ-Øßª Ÿ
¹ýoutá€s,Ô Ù
[[Food Not BombÝöÄ箿Ëedu¯Ÿöÿhome-Ý·±neighborhÔmŸèò/arbitã¢so ñ¿¶ÐçÓaº¹Ž àshel¥old¢ÊÕîTÙÀþRecªðÀî¿ñºmad ©ôàeasier¶¶Ê×ivaúÛÑÔŠæÃŸon-l³éžß²tþcéõüperçÌœªÂöËÔ¶gift-϶ð¯Æÿing|·íic [[opeïurÈ£programú¬Þ softwòÕ¯ŒàTÆŒcyber-ȯ±[[GNUäLinuxIndyÓ€
ÎÝ€k«Õª!-- ***NEEDS SOURCE THAT E-GOLD IS USED BY ANARCHISTS*** [[PºÍ¬cryptograp¯âëanonyèdigÿú ñÍe-goðÊLocal Exchžá«SÆŠÒéѲaénìœ. --¯ÔŠÑôöï°«¶º
óbÈ¿weaµéefe²Œ±¢a±ÙãŽÝÀÕ®Ûageês€ðùeviõã¬modulaware.com/a/?m=sÑid=0684832720Š S Indual -¢sØÑ£ÚŸ±«®®ü¹yptÏ×§ÛCypher§ªüŽÛÜš±Ôþ©ã¢±ÁÔš]] (ª[[wŸ)«bªžiüÛÐûÜ
øÇ¢µª€ŸãúÛ˪ÿò̰©ŽÜ|ãÅïõãôíï¿ã®·ô¶
ÔµöÇØœ£,¢domeØÝ¿¹á÷Û¡ubôõ»üßolph BœeÐŽë©ÇhûÙòÚٿ߯
³þ¯¯»Ï_é«/warÅœ»øªWºÑHåñò. A loÞÅÄÃÊÂã°d¢Parlia¿šªIĹ¿ú⬟sò úin ò, beÈÃamêžÄdon±ÈüÙµ©.aolÔvlntryst/hitler«ÞVÆšÛÌýy IÊ ÇÖ«H»ÃœöÂØÔäÈåÏ
emphasizeÉîÆ¬Øy©ÒÇ»gardÊðŽ
neiÎbŠÒt©r bulletsšÙÅ«ÄïœÌÓ¢²''The EthܬVаÁ¬Š÷¶Ìð²/¥ªcs_of_.phpæù§§×ùGeorge H.Ýíth]]. (AlsõÒ§Oxymorœr What?ÏJoe PeacotûÒ Fú°Woodworõ±ææSecê
±êÓëaõøo댳áÂÊÒÁž¢§»€ÛÜú''È¡'' eachîšÍutŠ¿ÁaÉÿo¥±Ü¡,ÊŵöŠšç³Ž
y|ÆäÛÌåŸÑŸ aÎó£ŒÏdœÉû€¶íf€coe¢Ç
æ©eΚ
³ng¹ÀŸŸŠÃŸÝÌ¿ÜÌÌ¡ë»øé Írt®coašÖon-builò¬at lea»Ãî«·øË¬úsž¡ýÒšéadjä®v. C±³Š°çõÓó:''M³¥le:Ÿ®šš©'''÷ë.óøÒ
ŒË¡²ith ϿǮà®meï»ÙëþÛbeöo«¶¿îòúªà
Ãerick Engels§¯šÆŽšÚÂñØ îgh:¹ADzš«Ãlï®óµ§£Ø;Âby ·³À¯
ÈŒÈÆ É«ÈÅÛüÚþ€Ü¶by°±iflõŸbayÄæcŸn â¶¢ÅÌ£¯
allîi€Å®ú«yó°wœàèÊvaiÁt€¬§«ýß¶áÐóðaÑпºŽšáÈÀWoul³€ÎÂŽómunŽlÖÆñç×ÉÛî»á²ÃÖà®
غºÞgeois?³ °œ²«¶ve//žs/1872/10/ø ''On Œíy''øUtopúÅòàìÉföýÈor òÀõÆÞ׎œaÒâí'ünic¡ÕöÑÜ, Carl LandauËêõÍÁ»î±ŠïÂöœun€Ë±âåat ä€áa€Êer evilŠÏð©Çªš»Ã¬ce.ùêþ¢a»Ôúé°¶¬cease iÓÆ¯ØÆÚÄÜsœŽ®¥absurdʹýÙ[ß|èÁåÕH±ßê
IdeÌndÜÃóî(1959) (retrñàׯàËï ¬ðÈ¿ÜJanÕ2§Ö[[2006]É
«¥œ¢î§âË
ÍàBenjamin TuckðÅʰ
y-f€ôíê ðËݲ.Ø¥×
«Œs SiÓøŒéÃproudlyŽÁýê÷acterѶë[[SÔ|ClasòÁ«Šá¢Ï¬Õÿ¶úŠõsºº[[petitiŠîerha绿ílumpenprolåŠø¥ºe.g©Éekhanovõ
G. VØúòÀØåípÐ/²óúØ/óóxìÚ
ů]Þûáô a ðŽº¥íÜ Úôenäs spoilÉ¥middle-³©dilettanteâ¬×žãýѶ°õï«Ï»Ÿ¶å×ûe€ç'''TacËßÈIÝéëÅaÃÎʱÃby '×ts', 'Ì®²€ts'ê«eÒ»g 'tü×§'ÔreaucraøçÖšbehèa dogmfacad€Ì.ôñingÆ£Ås/SI/en/display/20åÎÀé°·
Spectacle]ëaª 91ìÜHypocrisyìÚ¯®iŒ®§ý¬
ÞΡP. d'H¹Ð¡,î
€pinnœ~suÅÄne/whm2003/h»2ã
üïÐý÷¿
c¿è¡åõ
¿ï»ÏÓ€ÉÙÝîͶs°tably [[PiÅÎ-Josephè|ñ¬MikhailÞ|Þüäì[[häȯ€šË׊çÙÏÑhdism®ÛäʳprejudÊÌ¢ñ9ÊÔÞÈɪÍý¥¶Ü¯á
nt-ñøiseÌrš²ß¯Êinuª[[euroric¯ÑûimpÐÏùÛÜñîÜ¿ªßïøŽ«[[CerclŽ]].ç
€
ÁsteÓËéµÒ×iöå³gøà€äÿ¹ÓÅÐñʧtÔÃó§òÌêsusšœã¿ syÍÉhizby¥õ÷žÛýÆÃ©ÀĵÄÝÛÐÒÀŸîê¿fªÓtØŠŠßÓly¥ËÚãirdµÌÜo·Ä¹×ÏñçŽÖäsÎìÜÎŽñáÆÌ(ü) ReŠÂanÏù©ùíÿ
·£Stanley G. Payþ¬ïåü°heÏÜågimîÕçª÷ªõÄÖÆ¥®egot€ºÐÒÔòÆŒïŽÈ© µéìþÌŽ
Ò|ª
Ûé¶¥Ü
gmu.edu/dÐÅîs/eÁs/bcî
/spainî
ÏšÕo-SÐôÅÊÙ€£ïž¹henþaßNoam_chomsk©ÃÌ|©±am C¥Ô²1928â)]]ÞÝÞü
úݧգÌÉÙù¡rÞÅÐÅøcelebrÑ¥°»¢ÉàÒïðå
AÓ£ÂÖû՜ǻ²ŸºÆœ×Úly éli¢žøÖè³±ºûñº¬Ÿ¿ª-avoÛts: + +*ÑÀIžäìessÔà»gušÅ»×Ä fiöÜ [[Ursula K. Le Guû¶Ûal Àó·öä Zinà +* r¯
È×ÖHËÕâsÖÔ[[Avant-à¯r·Îicoláÿssellóî§õíDenõžì tаŸœžšÁÆ down¥ÆÚpenhageÕ²²ç¡ÏemployÙĺú¡ÐûâðËÓ»ªše»œ€ity)|ªøsqua軿êkݧstižëhrÿ€ÅÒìæÜataÈËЮM¬Š€fa|¥¬ÿÔto ÚÎNaziüËŸüµåþõÖî¹
Œî÷Ÿªîܰ[[AÏòùdÝáÌso×Ûgɱ؊·ÃÏßðÐÒÕÔ¢. ¡Ö¯¥¯Â©Æty¥©øm rÅÍþè×Ëø¢«linîëÆáª œ rock,ÉÀòíûg»ip hopŒkǬbecågéÑÌdiumÓ€£ÉÔessageÑn[[U£ÜÛçdom|UKisòËà©®Í[[ËÌÐ¥e b¶«¯ÁÁͪþäç²
¿ñÈÿ€[[Dutchû©|DutÅÐïïEx]]ÚŸexempláÜ·ÉÖion. +''Œù
¿detai»ùo-ÐÀ©± ü== +ÈÁÅ(PlÒŠöôŠÑ²adõ×i¹üà€Àš exŸêÙÛÀàíÉp£Öage)ëÄÙr ¬à€rele×èpãÐm»ÙÛbrieý®mmaê¡Õsù¡åÊÿ [[®ÃïÅ«ö¥é²ò¯ÛâåþïŠÙ«œÛ¬ªnihi²²»ôbùÝॠŸ¡ûü¬ÌΧ¢€»÷ÄÜsymbolÏúþism/Links|ĻÚn«ÿÊÓÞ¥Maj®åä¢Àµt®Pasç§©Åóá
Êê===®¬¬°¥= +*Äç²ë871)Hay¯iot86šŸýÊÓ×Ú(1917 ÛÉ192ÉûûÅâΧR·36) (Ïíÿ眊) +*³¡68,(WTO MiôÒ× ConúêŸ1999|¢eetin Seattl999)ÒBooks=ŒmainÇoksܰº s߲ǥ³¡ãš§ïßÝÂ윌àábâÎâùÚ¢ê× £íò¬ÁÔÕëÞÇÄÇd¬mac.pitzerÏt_AÈ©ves/bì£/godÓ÷ate_ch¡]œŒ
ÓmáÚ EssaysâgÁº/Á»CW¬ÇþPeter K¯âtkMutÏAidî²Fô®E¥Åê|¡§žgutenbergŸ¢eÝÊ/4341ïÁŠŠ÷Wêsá£ý°?ÜÄ360ÌRudolf RockÅŽÒÌo-SyndÖ()|AÅïÊǰËÊ
ÐÇÕy²¢ÿÀ§r¿«ÁÇ.asp¯Max Stirnóµágo AÁs OwÇ
ßdf.Öse/~triad/sÃ/ÔäáÔÄä
ŠÚádomnow®¡inyou·ÓÖÌbÎÔgion/ÞÝÙfóŠñϰEnglishøÓdiÚšÕÒèRáòædiv=¢font-size: 85%ºëÖ¬õsº«/div ê°ÏÙ²nÇr°ý¬Â·ÀcÖŒTŽ¢mä«re-ýÕ® +# {{note|bill}}ns52.super-hÐßÌ~vaz1net/¬þ³ °/awŠÙ¯ßö®±¯·èþµflag.ªî¹÷t/¬_pö¯çîùetonÑÑ
Áìêÿ_ñ/»_boÝîÚ Ag®Ät㚌ÎAâ - BoްÖtsYarros-NotþÀ[Viœô £, ó'' VII¡Œíµ1892âætotseõíÎö¹Ì161594ßl ŠŸ€Ëö££== +ÎoverwhelmÏšæŸ°îøø«Žôì«Ûdâ
ùmŽ¿Ä̹às|link¹ÞbÄ¿ +{{wikiþe|DeÈ¥Õö}} +*«
oblogs.žû] Blog®ŠÎðÅŸÈŒy Ð]þçûÜŹžã³£ÂesijØiÂo³Éº§ºs. +*HuÚÒÃý««þÌÝñrt bioÂþñô÷þs
៎óon«
bleed/g°éryߌÞDaily B«'sì EncycÐ¥ia]øô
Ð/þ] (ñã|òÑÊw«à®¿l Work²Õld] +ÃëAº!Ì©
¢œŒ ɺ°þšñös far beyoanageable sizÆòͪãîÀ¡å®ËÅè£áßÛýi
í®îŽÖкÈknown ¥âÿæÀll bçutinžáš¢Îkæ°úÕç£úlã§ »£Category:÷m|*]]ForÓø²ëªPþÕùrîints¶ŸÁ¹e©¬ùœ±ilosophyš +[[ar:ÙØ§Ø³ÙØ·ÙÙØ©Íast:ªquismubg:ÐМаÑÑ
ОзÑÐŒs«hizaÃÆŒe¢c¢Æó +[[d€k£de£eokiismoµeÈÚtìÝeu¡× fa:دÙÙØªâزدا؊ۜfiirÎ
gl¢òhe:×× ×š××××ÊhøchizãidËis:Stjórà¶sisstefÄ +[[iÇøžja:ã¢ãããºã ško:ìëí€ìŠlŒzmaûn¶ÏünnnÞ¡p³zÉp×ûr³îruscoÈsimplekŽs绞râ°ävœÀth:àž¥àž±àžàžàžŽàžàžàž²à¹àžàž¢±îÅñzh:æ æ¿åºäž»ä¹§zh-min-nan:Hui-thóng-tÄ«-chú-gÄ«]]</text> + <áö</pageõ <tÿã>Afgha¡anª£y</Èid>13</id× 5898948ŠÞmžmp>2002-08-27T03:07:44Z</Ø <¯Áributor <username>Magnus¹÷ske</Ÿ4²Ò<miâä/·<³¥ent>whoops³×<à xml:space="õ¥rve">#REDIRECT [[Œ·ÒGeo£È ì²9÷2-25T15:43:11Ðip>Conêœ Œcript</ip·üÔAutoÔÁÍâÔÁâ± +£üPű¢5ã°50ÉÙ¢1T10:42:35Т-rilª166§ò ¬fix̵âDemÉ
«Šã€ 7ñ²1Š 5-17€30:05ZþŒØAxelBoldtª2ÿ€öredirect</öà
®Ÿ®¡¢
\ No newline at end of file diff --git a/internal/compress/flate/token.go b/internal/compress/flate/token.go new file mode 100644 index 00000000..d818790c --- /dev/null +++ b/internal/compress/flate/token.go @@ -0,0 +1,379 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + // bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits + // bits 16-22 offsetcode - 5 bits + // bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits + // bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits + lengthShift = 22 + offsetMask = 1<<lengthShift - 1 + typeMask = 3 << 30 + literalType = 0 << 30 + matchType = 1 << 30 + matchOffsetOnlyMask = 0xffff +) + +// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH) +// is lengthCodes[length - MIN_MATCH_LENGTH] +var lengthCodes = [256]uint8{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, + 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, + 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, +} + +// lengthCodes1 is length codes, but starting at 1. +var lengthCodes1 = [256]uint8{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, 13, 13, 13, + 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, + 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, + 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, + 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 29, +} + +var offsetCodes = [256]uint32{ + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +} + +// offsetCodes14 are offsetCodes, but with 14 added. +var offsetCodes14 = [256]uint32{ + 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +} + +type token uint32 + +type tokens struct { + extraHist [32]uint16 // codes 256->maxnumlit + offHist [32]uint16 // offset codes + litHist [256]uint16 // codes 0->255 + nFilled int + n uint16 // Must be able to contain maxStoreBlockSize + tokens [maxStoreBlockSize + 1]token +} + +func (t *tokens) Reset() { + if t.n == 0 { + return + } + t.n = 0 + t.nFilled = 0 + for i := range t.litHist[:] { + t.litHist[i] = 0 + } + for i := range t.extraHist[:] { + t.extraHist[i] = 0 + } + for i := range t.offHist[:] { + t.offHist[i] = 0 + } +} + +func (t *tokens) Fill() { + if t.n == 0 { + return + } + for i, v := range t.litHist[:] { + if v == 0 { + t.litHist[i] = 1 + t.nFilled++ + } + } + for i, v := range t.extraHist[:literalCount-256] { + if v == 0 { + t.nFilled++ + t.extraHist[i] = 1 + } + } + for i, v := range t.offHist[:offsetCodeCount] { + if v == 0 { + t.offHist[i] = 1 + } + } +} + +func indexTokens(in []token) tokens { + var t tokens + t.indexTokens(in) + return t +} + +func (t *tokens) indexTokens(in []token) { + t.Reset() + for _, tok := range in { + if tok < matchType { + t.AddLiteral(tok.literal()) + continue + } + t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask) + } +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +func emitLiteral(dst *tokens, lit []byte) { + for _, v := range lit { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } +} + +func (t *tokens) AddLiteral(lit byte) { + t.tokens[t.n] = token(lit) + t.litHist[lit]++ + t.n++ +} + +// from https://stackoverflow.com/a/28730362 +func mFastLog2(val float32) float32 { + ux := int32(math.Float32bits(val)) + log2 := (float32)(((ux >> 23) & 255) - 128) + ux &= -0x7f800001 + ux += 127 << 23 + uval := math.Float32frombits(uint32(ux)) + log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 + return log2 +} + +// EstimatedBits will return an minimum size estimated by an *optimal* +// compression of the block. +// The size of the block +func (t *tokens) EstimatedBits() int { + shannon := float32(0) + bits := int(0) + nMatches := 0 + total := int(t.n) + t.nFilled + if total > 0 { + invTotal := 1.0 / float32(total) + for _, v := range t.litHist[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } + } + // Just add 15 for EOB + shannon += 15 + for i, v := range t.extraHist[1 : literalCount-256] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + bits += int(lengthExtraBits[i&31]) * int(v) + nMatches += int(v) + } + } + } + if nMatches > 0 { + invTotal := 1.0 / float32(nMatches) + for i, v := range t.offHist[:offsetCodeCount] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + bits += int(offsetExtraBits[i&31]) * int(v) + } + } + } + return int(shannon) + bits +} + +// AddMatch adds a match to the tokens. +// This function is very sensitive to inlining and right on the border. +func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { + if debugDeflate { + if xlength >= maxMatchLength+baseMatchLength { + panic(fmt.Errorf("invalid length: %v", xlength)) + } + if xoffset >= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + oCode := offsetCode(xoffset) + xoffset |= oCode << 16 + + t.extraHist[lengthCodes1[uint8(xlength)]]++ + t.offHist[oCode&31]++ + t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset) + t.n++ +} + +// AddMatchLong adds a match to the tokens, potentially longer than max match length. +// Length should NOT have the base subtracted, only offset should. +func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { + if debugDeflate { + if xoffset >= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + oc := offsetCode(xoffset) + xoffset |= oc << 16 + for xlength > 0 { + xl := xlength + if xl > 258 { + // We need to have at least baseMatchLength left over for next loop. + if xl > 258+baseMatchLength { + xl = 258 + } else { + xl = 258 - baseMatchLength + } + } + xlength -= xl + xl -= baseMatchLength + t.extraHist[lengthCodes1[uint8(xl)]]++ + t.offHist[oc&31]++ + t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset) + t.n++ + } +} + +func (t *tokens) AddEOB() { + t.tokens[t.n] = token(endBlockMarker) + t.extraHist[0]++ + t.n++ +} + +func (t *tokens) Slice() []token { + return t.tokens[:t.n] +} + +// VarInt returns the tokens as varint encoded bytes. +func (t *tokens) VarInt() []byte { + var b = make([]byte, binary.MaxVarintLen32*int(t.n)) + var off int + for _, v := range t.tokens[:t.n] { + off += binary.PutUvarint(b[off:], uint64(v)) + } + return b[:off] +} + +// FromVarInt restores t to the varint encoded tokens provided. +// Any data in t is removed. +func (t *tokens) FromVarInt(b []byte) error { + var buf = bytes.NewReader(b) + var toks []token + for { + r, err := binary.ReadUvarint(buf) + if err == io.EOF { + break + } + if err != nil { + return err + } + toks = append(toks, token(r)) + } + t.indexTokens(toks) + return nil +} + +// Returns the type of a token +func (t token) typ() uint32 { return uint32(t) & typeMask } + +// Returns the literal of a literal token +func (t token) literal() uint8 { return uint8(t) } + +// Returns the extra offset of a match token +func (t token) offset() uint32 { return uint32(t) & offsetMask } + +func (t token) length() uint8 { return uint8(t >> lengthShift) } + +// Convert length to code. +func lengthCode(len uint8) uint8 { return lengthCodes[len] } + +// Returns the offset code corresponding to a specific offset +func offsetCode(off uint32) uint32 { + if false { + if off < uint32(len(offsetCodes)) { + return offsetCodes[off&255] + } else if off>>7 < uint32(len(offsetCodes)) { + return offsetCodes[(off>>7)&255] + 14 + } else { + return offsetCodes[(off>>14)&255] + 28 + } + } + if off < uint32(len(offsetCodes)) { + return offsetCodes[uint8(off)] + } + return offsetCodes14[uint8(off>>7)] +} diff --git a/internal/compress/flate/token_test.go b/internal/compress/flate/token_test.go new file mode 100644 index 00000000..9070c341 --- /dev/null +++ b/internal/compress/flate/token_test.go @@ -0,0 +1,54 @@ +package flate + +import ( + "bytes" + "os" + "testing" +) + +type testFatal interface { + Fatal(args ...any) +} + +// loadTestTokens will load test tokens. +// First block from enwik9, varint encoded. +func loadTestTokens(t testFatal) *tokens { + b, err := os.ReadFile("testdata/tokens.bin") + if err != nil { + t.Fatal(err) + } + var tokens tokens + err = tokens.FromVarInt(b) + if err != nil { + t.Fatal(err) + } + return &tokens +} + +func Test_tokens_EstimatedBits(t *testing.T) { + tok := loadTestTokens(t) + // The estimated size, update if method changes. + const expect = 221057 + n := tok.EstimatedBits() + var buf bytes.Buffer + wr := newHuffmanBitWriter(&buf) + wr.writeBlockDynamic(tok, true, nil, true) + if wr.err != nil { + t.Fatal(wr.err) + } + wr.flush() + t.Log("got:", n, "actual:", buf.Len()*8, "(header not part of estimate)") + if n != expect { + t.Error("want:", expect, "bits, got:", n) + } +} + +func Benchmark_tokens_EstimatedBits(b *testing.B) { + tok := loadTestTokens(b) + b.ResetTimer() + // One "byte", one token iteration. + b.SetBytes(1) + for i := 0; i < b.N; i++ { + _ = tok.EstimatedBits() + } +} diff --git a/internal/compress/flate/writer_test.go b/internal/compress/flate/writer_test.go new file mode 100644 index 00000000..ea448298 --- /dev/null +++ b/internal/compress/flate/writer_test.go @@ -0,0 +1,544 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "archive/zip" + "bytes" + "compress/flate" + "fmt" + "io" + "math" + "math/rand" + "os" + "runtime" + "strconv" + "strings" + "testing" +) + +func TestWriterMemUsage(t *testing.T) { + testMem := func(t *testing.T, fn func()) { + var before, after runtime.MemStats + runtime.GC() + runtime.ReadMemStats(&before) + fn() + runtime.GC() + runtime.ReadMemStats(&after) + t.Logf("%s: Memory Used: %dKB, %d allocs", t.Name(), (after.HeapInuse-before.HeapInuse)/1024, after.HeapObjects-before.HeapObjects) + } + data := make([]byte, 100000) + t.Run("stateless", func(t *testing.T) { + testMem(t, func() { + StatelessDeflate(io.Discard, data, false, nil) + }) + }) + for level := HuffmanOnly; level <= BestCompression; level++ { + t.Run(fmt.Sprint("level-", level), func(t *testing.T) { + var zr *Writer + var err error + testMem(t, func() { + zr, err = NewWriter(io.Discard, level) + if err != nil { + t.Fatal(err) + } + zr.Write(data) + }) + zr.Close() + }) + } + for level := HuffmanOnly; level <= BestCompression; level++ { + t.Run(fmt.Sprint("stdlib-", level), func(t *testing.T) { + var zr *flate.Writer + var err error + testMem(t, func() { + zr, err = flate.NewWriter(io.Discard, level) + if err != nil { + t.Fatal(err) + } + zr.Write(data) + }) + zr.Close() + }) + } +} + +func TestWriterRegression(t *testing.T) { + data, err := os.ReadFile("testdata/regression.zip") + if err != nil { + t.Fatal(err) + } + for level := HuffmanOnly; level <= BestCompression; level++ { + t.Run(fmt.Sprint("level_", level), func(t *testing.T) { + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + + for _, tt := range zr.File { + if !strings.HasSuffix(t.Name(), "") { + continue + } + + t.Run(tt.Name, func(t *testing.T) { + if testing.Short() && tt.FileInfo().Size() > 10000 { + t.SkipNow() + } + r, err := tt.Open() + if err != nil { + t.Error(err) + return + } + in, err := io.ReadAll(r) + if err != nil { + t.Error(err) + } + msg := "level " + strconv.Itoa(level) + ":" + buf := new(bytes.Buffer) + fw, err := NewWriter(buf, level) + if err != nil { + t.Fatal(msg + err.Error()) + } + n, err := fw.Write(in) + if n != len(in) { + t.Fatal(msg + "short write") + } + if err != nil { + t.Fatal(msg + err.Error()) + } + err = fw.Close() + if err != nil { + t.Fatal(msg + err.Error()) + } + fr1 := NewReader(buf) + data2, err := io.ReadAll(fr1) + if err != nil { + t.Fatal(msg + err.Error()) + } + if !bytes.Equal(in, data2) { + t.Fatal(msg + "not equal") + } + // Do it again... + msg = "level " + strconv.Itoa(level) + " (reset):" + buf.Reset() + fw.Reset(buf) + n, err = fw.Write(in) + if n != len(in) { + t.Fatal(msg + "short write") + } + if err != nil { + t.Fatal(msg + err.Error()) + } + err = fw.Close() + if err != nil { + t.Fatal(msg + err.Error()) + } + fr1 = NewReader(buf) + data2, err = io.ReadAll(fr1) + if err != nil { + t.Fatal(msg + err.Error()) + } + if !bytes.Equal(in, data2) { + t.Fatal(msg + "not equal") + } + }) + } + }) + } +} + +func benchmarkEncoder(b *testing.B, testfile, level, n int) { + b.SetBytes(int64(n)) + buf0, err := os.ReadFile(testfiles[testfile]) + if err != nil { + b.Fatal(err) + } + if len(buf0) == 0 { + b.Fatalf("test file %q has no data", testfiles[testfile]) + } + buf1 := make([]byte, n) + for i := 0; i < n; i += len(buf0) { + if len(buf0) > n-i { + buf0 = buf0[:n-i] + } + copy(buf1[i:], buf0) + } + buf0 = nil + runtime.GC() + w, err := NewWriter(io.Discard, level) + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + w.Reset(io.Discard) + _, err = w.Write(buf1) + if err != nil { + b.Fatal(err) + } + err = w.Close() + if err != nil { + b.Fatal(err) + } + } +} + +func BenchmarkEncodeDigitsConstant1e4(b *testing.B) { benchmarkEncoder(b, digits, constant, 1e4) } +func BenchmarkEncodeDigitsConstant1e5(b *testing.B) { benchmarkEncoder(b, digits, constant, 1e5) } +func BenchmarkEncodeDigitsConstant1e6(b *testing.B) { benchmarkEncoder(b, digits, constant, 1e6) } +func BenchmarkEncodeDigitsSpeed1e4(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e4) } +func BenchmarkEncodeDigitsSpeed1e5(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e5) } +func BenchmarkEncodeDigitsSpeed1e6(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e6) } +func BenchmarkEncodeDigitsDefault1e4(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e4) } +func BenchmarkEncodeDigitsDefault1e5(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e5) } +func BenchmarkEncodeDigitsDefault1e6(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e6) } +func BenchmarkEncodeDigitsCompress1e4(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e4) } +func BenchmarkEncodeDigitsCompress1e5(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e5) } +func BenchmarkEncodeDigitsCompress1e6(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e6) } +func BenchmarkEncodeDigitsSL1e4(b *testing.B) { benchmarkStatelessEncoder(b, digits, 1e4) } +func BenchmarkEncodeDigitsSL1e5(b *testing.B) { benchmarkStatelessEncoder(b, digits, 1e5) } +func BenchmarkEncodeDigitsSL1e6(b *testing.B) { benchmarkStatelessEncoder(b, digits, 1e6) } +func BenchmarkEncodeTwainConstant1e4(b *testing.B) { benchmarkEncoder(b, twain, constant, 1e4) } +func BenchmarkEncodeTwainConstant1e5(b *testing.B) { benchmarkEncoder(b, twain, constant, 1e5) } +func BenchmarkEncodeTwainConstant1e6(b *testing.B) { benchmarkEncoder(b, twain, constant, 1e6) } +func BenchmarkEncodeTwainSpeed1e4(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e4) } +func BenchmarkEncodeTwainSpeed1e5(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e5) } +func BenchmarkEncodeTwainSpeed1e6(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e6) } +func BenchmarkEncodeTwainDefault1e4(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e4) } +func BenchmarkEncodeTwainDefault1e5(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e5) } +func BenchmarkEncodeTwainDefault1e6(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e6) } +func BenchmarkEncodeTwainCompress1e4(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e4) } +func BenchmarkEncodeTwainCompress1e5(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e5) } +func BenchmarkEncodeTwainCompress1e6(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e6) } +func BenchmarkEncodeTwainSL1e4(b *testing.B) { benchmarkStatelessEncoder(b, twain, 1e4) } +func BenchmarkEncodeTwainSL1e5(b *testing.B) { benchmarkStatelessEncoder(b, twain, 1e5) } +func BenchmarkEncodeTwainSL1e6(b *testing.B) { benchmarkStatelessEncoder(b, twain, 1e6) } + +func BenchmarkEncodeTwain1024Win1e4(b *testing.B) { benchmarkEncoder(b, twain, oneK, 1e4) } +func BenchmarkEncodeTwain1024Win1e5(b *testing.B) { benchmarkEncoder(b, twain, oneK, 1e5) } +func BenchmarkEncodeTwain1024Win1e6(b *testing.B) { benchmarkEncoder(b, twain, oneK, 1e6) } + +func benchmarkStatelessEncoder(b *testing.B, testfile, n int) { + b.SetBytes(int64(n)) + buf0, err := os.ReadFile(testfiles[testfile]) + if err != nil { + b.Fatal(err) + } + if len(buf0) == 0 { + b.Fatalf("test file %q has no data", testfiles[testfile]) + } + buf1 := make([]byte, n) + for i := 0; i < n; i += len(buf0) { + if len(buf0) > n-i { + buf0 = buf0[:n-i] + } + copy(buf1[i:], buf0) + } + buf0 = nil + runtime.GC() + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + w := NewStatelessWriter(io.Discard) + _, err = w.Write(buf1) + if err != nil { + b.Fatal(err) + } + err = w.Close() + if err != nil { + b.Fatal(err) + } + } +} + +// A writer that fails after N writes. +type errorWriter struct { + N int +} + +func (e *errorWriter) Write(b []byte) (int, error) { + if e.N <= 0 { + return 0, io.ErrClosedPipe + } + e.N-- + return len(b), nil +} + +// Test if errors from the underlying writer is passed upwards. +func TestWriteError(t *testing.T) { + buf := new(bytes.Buffer) + n := 65536 + if !testing.Short() { + n *= 4 + } + for i := 0; i < n; i++ { + fmt.Fprintf(buf, "asdasfasf%d%dfghfgujyut%dyutyu\n", i, i, i) + } + in := buf.Bytes() + // We create our own buffer to control number of writes. + copyBuf := make([]byte, 128) + for l := range 10 { + for fail := 1; fail <= 256; fail *= 2 { + // Fail after 'fail' writes + ew := &errorWriter{N: fail} + w, err := NewWriter(ew, l) + if err != nil { + t.Fatalf("NewWriter: level %d: %v", l, err) + } + n, err := copyBuffer(w, bytes.NewBuffer(in), copyBuf) + if err == nil { + t.Fatalf("Level %d: Expected an error, writer was %#v", l, ew) + } + n2, err := w.Write([]byte{1, 2, 2, 3, 4, 5}) + if n2 != 0 { + t.Fatal("Level", l, "Expected 0 length write, got", n) + } + if err == nil { + t.Fatal("Level", l, "Expected an error") + } + err = w.Flush() + if err == nil { + t.Fatal("Level", l, "Expected an error on flush") + } + err = w.Close() + if err == nil { + t.Fatal("Level", l, "Expected an error on close") + } + + w.Reset(io.Discard) + n2, err = w.Write([]byte{1, 2, 3, 4, 5, 6}) + if err != nil { + t.Fatal("Level", l, "Got unexpected error after reset:", err) + } + if n2 == 0 { + t.Fatal("Level", l, "Got 0 length write, expected > 0") + } + if testing.Short() { + return + } + } + } +} + +// Test if errors from the underlying writer is passed upwards. +func TestWriter_Reset(t *testing.T) { + buf := new(bytes.Buffer) + n := 65536 + if !testing.Short() { + n *= 4 + } + for i := 0; i < n; i++ { + fmt.Fprintf(buf, "asdasfasf%d%dfghfgujyut%dyutyu\n", i, i, i) + } + in := buf.Bytes() + for l := range 10 { + if testing.Short() && l > 1 { + continue + } + t.Run(fmt.Sprintf("level-%d", l), func(t *testing.T) { + t.Parallel() + offset := 1 + if testing.Short() { + offset = 256 + } + for ; offset <= 256; offset *= 2 { + // Fail after 'fail' writes + w, err := NewWriter(io.Discard, l) + if err != nil { + t.Fatalf("NewWriter: level %d: %v", l, err) + } + if w.d.fast == nil { + t.Skip("Not Fast...") + return + } + for i := 0; i < (bufferReset-len(in)-offset-maxMatchOffset)/maxMatchOffset; i++ { + // skip ahead to where we are close to wrap around... + w.d.fast.Reset() + } + w.d.fast.Reset() + _, err = w.Write(in) + if err != nil { + t.Fatal(err) + } + for range 50 { + // skip ahead again... This should wrap around... + w.d.fast.Reset() + } + w.d.fast.Reset() + + _, err = w.Write(in) + if err != nil { + t.Fatal(err) + } + for range (math.MaxUint32 - bufferReset) / maxMatchOffset { + // skip ahead to where we are close to wrap around... + w.d.fast.Reset() + } + + _, err = w.Write(in) + if err != nil { + t.Fatal(err) + } + err = w.Close() + if err != nil { + t.Fatal(err) + } + } + }) + } +} + +func TestDeterministicL1(t *testing.T) { testDeterministic(1, t) } +func TestDeterministicL2(t *testing.T) { testDeterministic(2, t) } +func TestDeterministicL3(t *testing.T) { testDeterministic(3, t) } +func TestDeterministicL4(t *testing.T) { testDeterministic(4, t) } +func TestDeterministicL5(t *testing.T) { testDeterministic(5, t) } +func TestDeterministicL6(t *testing.T) { testDeterministic(6, t) } +func TestDeterministicL7(t *testing.T) { testDeterministic(7, t) } +func TestDeterministicL8(t *testing.T) { testDeterministic(8, t) } +func TestDeterministicL9(t *testing.T) { testDeterministic(9, t) } +func TestDeterministicL0(t *testing.T) { testDeterministic(0, t) } +func TestDeterministicLM2(t *testing.T) { testDeterministic(-2, t) } + +func testDeterministic(i int, t *testing.T) { + // Test so much we cross a good number of block boundaries. + var length = maxStoreBlockSize*30 + 500 + if testing.Short() { + length /= 10 + } + + // Create a random, but compressible stream. + rng := rand.New(rand.NewSource(1)) + t1 := make([]byte, length) + for i := range t1 { + t1[i] = byte(rng.Int63() & 7) + } + + // Do our first encode. + var b1 bytes.Buffer + br := bytes.NewBuffer(t1) + w, err := NewWriter(&b1, i) + if err != nil { + t.Fatal(err) + } + // Use a very small prime sized buffer. + cbuf := make([]byte, 787) + _, err = copyBuffer(w, br, cbuf) + if err != nil { + t.Fatal(err) + } + w.Close() + + // We choose a different buffer size, + // bigger than a maximum block, and also a prime. + var b2 bytes.Buffer + cbuf = make([]byte, 81761) + br2 := bytes.NewBuffer(t1) + w2, err := NewWriter(&b2, i) + if err != nil { + t.Fatal(err) + } + _, err = copyBuffer(w2, br2, cbuf) + if err != nil { + t.Fatal(err) + } + w2.Close() + + b1b := b1.Bytes() + b2b := b2.Bytes() + + if !bytes.Equal(b1b, b2b) { + t.Errorf("level %d did not produce deterministic result, result mismatch, len(a) = %d, len(b) = %d", i, len(b1b), len(b2b)) + } + + // Test using io.WriterTo interface. + var b3 bytes.Buffer + br = bytes.NewBuffer(t1) + w, err = NewWriter(&b3, i) + if err != nil { + t.Fatal(err) + } + _, err = br.WriteTo(w) + if err != nil { + t.Fatal(err) + } + w.Close() + + b3b := b3.Bytes() + if !bytes.Equal(b1b, b3b) { + t.Errorf("level %d (io.WriterTo) did not produce deterministic result, result mismatch, len(a) = %d, len(b) = %d", i, len(b1b), len(b3b)) + } +} + +// copyBuffer is a copy of io.CopyBuffer, since we want to support older go versions. +// This is modified to never use io.WriterTo or io.ReaderFrom interfaces. +func copyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { + if buf == nil { + buf = make([]byte, 32*1024) + } + for { + nr, er := src.Read(buf) + if nr > 0 { + nw, ew := dst.Write(buf[0:nr]) + if nw > 0 { + written += int64(nw) + } + if ew != nil { + err = ew + break + } + if nr != nw { + err = io.ErrShortWrite + break + } + } + if er == io.EOF { + break + } + if er != nil { + err = er + break + } + } + return written, err +} + +func BenchmarkCompressAllocations(b *testing.B) { + payload := []byte(strings.Repeat("Tiny payload", 20)) + for j := -2; j <= 9; j++ { + b.Run("level("+strconv.Itoa(j)+")", func(b *testing.B) { + b.Run("flate", func(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + w, err := NewWriter(io.Discard, j) + if err != nil { + b.Fatal(err) + } + w.Write(payload) + w.Close() + } + }) + }) + } +} + +func BenchmarkCompressAllocationsSingle(b *testing.B) { + payload := []byte(strings.Repeat("Tiny payload", 20)) + const level = 2 + b.Run("flate", func(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + w, err := NewWriter(io.Discard, level) + if err != nil { + b.Fatal(err) + } + w.Write(payload) + w.Close() + } + }) +} |
