diff options
| author | 2025-11-22 08:00:00 +0800 | |
|---|---|---|
| committer | 2025-11-22 08:00:00 +0800 | |
| commit | 9b453a7cca6bb258a8ca939dc9696fabd77b1b7c (patch) | |
| tree | 27f28df16a5f00f79021f9bd4d6d540435c9d2f8 /internal/flatex | |
| parent | flatex: Remove the stale readByte(s) wrappers and just directly index the buffer (diff) | |
| signature | No signature | |
zlib, flatex: Remove code related to dicts
Git never uses them
Diffstat (limited to 'internal/flatex')
| -rw-r--r-- | internal/flatex/decompress_bytes.go | 22 | ||||
| -rw-r--r-- | internal/flatex/decompress_test.go | 52 | ||||
| -rw-r--r-- | internal/flatex/dict_decoder.go | 182 | ||||
| -rw-r--r-- | internal/flatex/inflate.go | 65 | ||||
| -rw-r--r-- | internal/flatex/slice_inflate.go | 42 | ||||
| -rw-r--r-- | internal/flatex/window_decoder.go | 101 |
6 files changed, 156 insertions, 308 deletions
diff --git a/internal/flatex/decompress_bytes.go b/internal/flatex/decompress_bytes.go index ce6d0558..0d95084c 100644 --- a/internal/flatex/decompress_bytes.go +++ b/internal/flatex/decompress_bytes.go @@ -7,8 +7,6 @@ import ( "git.sr.ht/~runxiyu/furgit/internal/bufpool" ) -// bufferDecompressor wraps the custom slice inflater so byte-slice -// decompressions avoid repeated allocations. type bufferDecompressor struct { inflater sliceInflater } @@ -23,27 +21,15 @@ var bufferDecompressorPool = sync.Pool{ }, } -// Decompress inflates the provided DEFLATE stream and returns the full output -// in a pooled bufpool.Buffer along with the number of consumed bytes from src. func Decompress(src []byte) (*bufpool.Buffer, int, error) { - return DecompressDictSized(src, nil, 0) + return DecompressSized(src, 0) } -// DecompressDict inflates the provided DEFLATE stream using dict as the preset -// dictionary and returns the full output in a pooled bufpool.Buffer. The second -// returned value reports how many bytes of src were consumed. -func DecompressDict(src []byte, dict []byte) (*bufpool.Buffer, int, error) { - return DecompressDictSized(src, dict, 0) -} - -// DecompressDictSized is like DecompressDict but allows providing an expected -// output size to pre-size the destination buffer and avoid repeated growth. -// A non-positive sizeHint falls back to the default buffer capacity. -func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer, int, error) { +func DecompressSized(src []byte, sizeHint int) (*bufpool.Buffer, int, error) { d := bufferDecompressorPool.Get().(*bufferDecompressor) defer bufferDecompressorPool.Put(d) - if err := d.inflater.reset(src, dict); err != nil { + if err := d.inflater.reset(src); err != nil { return nil, 0, err } @@ -65,7 +51,7 @@ func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer } d.inflater.step(&d.inflater) if d.inflater.err != nil && len(d.inflater.toRead) == 0 { - d.inflater.toRead = d.inflater.dict.readFlush() + d.inflater.toRead = d.inflater.window.readFlush() } } } diff --git a/internal/flatex/decompress_test.go b/internal/flatex/decompress_test.go index 7c290555..c991ea74 100644 --- a/internal/flatex/decompress_test.go +++ b/internal/flatex/decompress_test.go @@ -6,18 +6,10 @@ import ( "testing" ) -func compressDeflate(t *testing.T, payload, dict []byte) []byte { +func compressDeflate(t *testing.T, payload []byte) []byte { t.Helper() var buf bytes.Buffer - var ( - w *stdflate.Writer - err error - ) - if dict != nil { - w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict) - } else { - w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression) - } + w, err := stdflate.NewWriter(&buf, stdflate.DefaultCompression) if err != nil { t.Fatalf("NewWriter: %v", err) } @@ -32,7 +24,7 @@ func compressDeflate(t *testing.T, payload, dict []byte) []byte { func TestDecompress(t *testing.T) { payload := bytes.Repeat([]byte("golang"), 32) - compressed := compressDeflate(t, payload, nil) + compressed := compressDeflate(t, payload) out, _, err := Decompress(compressed) if err != nil { @@ -45,44 +37,14 @@ func TestDecompress(t *testing.T) { } } -func TestDecompressDict(t *testing.T) { - dict := []byte("furgit dictionary payload") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...) - - compressed := compressDeflate(t, payload, dict) - - out, _, err := DecompressDict(compressed, dict) - if err != nil { - t.Fatalf("DecompressDict: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload: got %q", out.Bytes()) - } -} - -func TestDecompressDictMissing(t *testing.T) { - dict := []byte("shared prefix to enforce dictionary usage") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" trailing data to force reference")...) - - compressed := compressDeflate(t, payload, dict) - - if _, _, err := Decompress(compressed); err == nil { - t.Fatalf("expected error when dictionary missing") - } -} - -func TestDecompressDictSizedUsesHint(t *testing.T) { +func TestDecompressSizedUsesHint(t *testing.T) { payload := []byte("short") - compressed := compressDeflate(t, payload, nil) + compressed := compressDeflate(t, payload) const hint = 1 << 19 - out, _, err := DecompressDictSized(compressed, nil, hint) + out, _, err := DecompressSized(compressed, hint) if err != nil { - t.Fatalf("DecompressDictSized: %v", err) + t.Fatalf("DecompressSized: %v", err) } defer out.Release() diff --git a/internal/flatex/dict_decoder.go b/internal/flatex/dict_decoder.go deleted file mode 100644 index 7a81e640..00000000 --- a/internal/flatex/dict_decoder.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flatex - -// dictDecoder implements the LZ77 sliding dictionary as used in decompression. -// LZ77 decompresses data through sequences of two forms of commands: -// -// - Literal insertions: Runs of one or more symbols are inserted into the data -// stream as is. This is accomplished through the writeByte method for a -// single symbol, or combinations of writeSlice/writeMark for multiple symbols. -// Any valid stream must start with a literal insertion if no preset dictionary -// is used. -// -// - Backward copies: Runs of one or more symbols are copied from previously -// emitted data. Backward copies come as the tuple (dist, length) where dist -// determines how far back in the stream to copy from and length determines how -// many bytes to copy. Note that it is valid for the length to be greater than -// the distance. Since LZ77 uses forward copies, that situation is used to -// perform a form of run-length encoding on repeated runs of symbols. -// The writeCopy and tryWriteCopy are used to implement this command. -// -// For performance reasons, this implementation performs little to no sanity -// checks about the arguments. As such, the invariants documented for each -// method call must be respected. -type dictDecoder struct { - hist []byte // Sliding window history - - // Invariant: 0 <= rdPos <= wrPos <= len(hist) - wrPos int // Current output position in buffer - rdPos int // Have emitted hist[:rdPos] already - full bool // Has a full window length been written yet? -} - -// init initializes dictDecoder to have a sliding window dictionary of the given -// size. If a preset dict is provided, it will initialize the dictionary with -// the contents of dict. -func (dd *dictDecoder) init(size int, dict []byte) { - *dd = dictDecoder{hist: dd.hist} - - if cap(dd.hist) < size { - dd.hist = make([]byte, size) - } - dd.hist = dd.hist[:size] - - if len(dict) > len(dd.hist) { - dict = dict[len(dict)-len(dd.hist):] - } - dd.wrPos = copy(dd.hist, dict) - if dd.wrPos == len(dd.hist) { - dd.wrPos = 0 - dd.full = true - } - dd.rdPos = dd.wrPos -} - -// histSize reports the total amount of historical data in the dictionary. -func (dd *dictDecoder) histSize() int { - if dd.full { - return len(dd.hist) - } - return dd.wrPos -} - -// availRead reports the number of bytes that can be flushed by readFlush. -func (dd *dictDecoder) availRead() int { - return dd.wrPos - dd.rdPos -} - -// availWrite reports the available amount of output buffer space. -func (dd *dictDecoder) availWrite() int { - return len(dd.hist) - dd.wrPos -} - -// writeSlice returns a slice of the available buffer to write data to. -// -// This invariant will be kept: len(s) <= availWrite() -func (dd *dictDecoder) writeSlice() []byte { - return dd.hist[dd.wrPos:] -} - -// writeMark advances the writer pointer by cnt. -// -// This invariant must be kept: 0 <= cnt <= availWrite() -func (dd *dictDecoder) writeMark(cnt int) { - dd.wrPos += cnt -} - -// writeByte writes a single byte to the dictionary. -// -// This invariant must be kept: 0 < availWrite() -func (dd *dictDecoder) writeByte(c byte) { - dd.hist[dd.wrPos] = c - dd.wrPos++ -} - -// writeCopy copies a string at a given (dist, length) to the output. -// This returns the number of bytes copied and may be less than the requested -// length if the available space in the output buffer is too small. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) writeCopy(dist, length int) int { - dstBase := dd.wrPos - dstPos := dstBase - srcPos := dstPos - dist - endPos := dstPos + length - if endPos > len(dd.hist) { - endPos = len(dd.hist) - } - - // Copy non-overlapping section after destination position. - // - // This section is non-overlapping in that the copy length for this section - // is always less than or equal to the backwards distance. This can occur - // if a distance refers to data that wraps-around in the buffer. - // Thus, a backwards copy is performed here; that is, the exact bytes in - // the source prior to the copy is placed in the destination. - if srcPos < 0 { - srcPos += len(dd.hist) - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) - srcPos = 0 - } - - // Copy possibly overlapping section before destination position. - // - // This section can overlap if the copy length for this section is larger - // than the backwards distance. This is allowed by LZ77 so that repeated - // strings can be succinctly represented using (dist, length) pairs. - // Thus, a forwards copy is performed here; that is, the bytes copied is - // possibly dependent on the resulting bytes in the destination as the copy - // progresses along. This is functionally equivalent to the following: - // - // for i := 0; i < endPos-dstPos; i++ { - // dd.hist[dstPos+i] = dd.hist[srcPos+i] - // } - // dstPos = endPos - // - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// tryWriteCopy tries to copy a string at a given (distance, length) to the -// output. This specialized version is optimized for short distances. -// -// This method is designed to be inlined for performance reasons. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) tryWriteCopy(dist, length int) int { - dstPos := dd.wrPos - endPos := dstPos + length - if dstPos < dist || endPos > len(dd.hist) { - return 0 - } - dstBase := dstPos - srcPos := dstPos - dist - - // Copy possibly overlapping section before destination position. - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// readFlush returns a slice of the historical buffer that is ready to be -// emitted to the user. The data returned by readFlush must be fully consumed -// before calling any other dictDecoder methods. -func (dd *dictDecoder) readFlush() []byte { - toRead := dd.hist[dd.rdPos:dd.wrPos] - dd.rdPos = dd.wrPos - if dd.wrPos == len(dd.hist) { - dd.wrPos, dd.rdPos = 0, 0 - dd.full = true - } - return toRead -} diff --git a/internal/flatex/inflate.go b/internal/flatex/inflate.go index ca85b355..1d32e8cd 100644 --- a/internal/flatex/inflate.go +++ b/internal/flatex/inflate.go @@ -70,13 +70,13 @@ func (e *WriteError) Error() string { return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() } -// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict] +// Resetter resets a ReadCloser returned by [NewReader] // to switch to a new underlying [Reader]. This permits reusing a ReadCloser // instead of allocating a new one. type Resetter interface { // Reset discards any buffered data and resets the Resetter as if it was // newly initialized with the given reader. - Reset(r io.Reader, dict []byte) error + Reset(r io.Reader) error } // The data structure for decoding Huffman tables is based on that of @@ -287,7 +287,7 @@ type decompressor struct { codebits *[numCodes]int // Output history, buffer. - dict dictDecoder + window windowDecoder // Temporary buffer (avoids repeated allocation). buf [4]byte @@ -352,7 +352,7 @@ func (f *decompressor) Read(b []byte) (int, error) { } f.step(f) if f.err != nil && len(f.toRead) == 0 { - f.toRead = f.dict.readFlush() // Flush what's left in case of error + f.toRead = f.window.readFlush() } } } @@ -506,9 +506,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + f.window.writeByte(byte(v)) + if f.window.availWrite() == 0 { + f.toRead = f.window.readFlush() f.step = (*decompressor).huffmanBlock f.stepState = stateInit return @@ -596,7 +596,7 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { + if dist > f.window.histSize() { f.err = CorruptInputError(f.roffset) return } @@ -608,14 +608,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := f.window.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = f.window.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if f.window.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.window.readFlush() f.step = (*decompressor).huffmanBlock // We need to continue this work f.stepState = stateDict return @@ -646,7 +646,7 @@ func (f *decompressor) dataBlock() { } if n == 0 { - f.toRead = f.dict.readFlush() + f.toRead = f.window.readFlush() f.finishBlock() return } @@ -658,7 +658,7 @@ func (f *decompressor) dataBlock() { // copyData copies f.copyLen bytes from the underlying reader into f.hist. // It pauses for reads when f.hist is full. func (f *decompressor) copyData() { - buf := f.dict.writeSlice() + buf := f.window.writeSlice() if len(buf) > f.copyLen { buf = buf[:f.copyLen] } @@ -666,14 +666,14 @@ func (f *decompressor) copyData() { cnt, err := io.ReadFull(f.r, buf) f.roffset += int64(cnt) f.copyLen -= cnt - f.dict.writeMark(cnt) + f.window.writeMark(cnt) if err != nil { f.err = noEOF(err) return } - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if f.window.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.window.readFlush() f.step = (*decompressor).copyData return } @@ -682,8 +682,8 @@ func (f *decompressor) copyData() { func (f *decompressor) finishBlock() { if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() + if f.window.availRead() > 0 { + f.toRead = f.window.readFlush() } f.err = io.EOF } @@ -788,16 +788,16 @@ func fixedHuffmanDecoderInit() { }) } -func (f *decompressor) Reset(r io.Reader, dict []byte) error { +func (f *decompressor) Reset(r io.Reader) error { *f = decompressor{ rBuf: f.rBuf, bits: f.bits, codebits: f.codebits, - dict: f.dict, + window: f.window, step: (*decompressor).nextBlock, } f.makeReader(r) - f.dict.init(maxMatchOffset, dict) + f.window.init(maxMatchOffset) return nil } @@ -817,25 +817,6 @@ func NewReader(r io.Reader) io.ReadCloser { f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, nil) - return &f -} - -// NewReaderDict is like [NewReader] but initializes the reader -// with a preset dictionary. The returned reader behaves as if -// the uncompressed data stream started with the given dictionary, -// which has already been read. NewReaderDict is typically used -// to read data compressed by [NewWriterDict]. -// -// The ReadCloser returned by NewReaderDict also implements [Resetter]. -func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, dict) + f.window.init(maxMatchOffset) return &f } diff --git a/internal/flatex/slice_inflate.go b/internal/flatex/slice_inflate.go index d16a6441..3e07e744 100644 --- a/internal/flatex/slice_inflate.go +++ b/internal/flatex/slice_inflate.go @@ -21,7 +21,7 @@ type sliceInflater struct { bits *[maxNumLit + maxNumDist]int codebits *[numCodes]int - dict dictDecoder + window windowDecoder toRead []byte step func(*sliceInflater) @@ -33,18 +33,18 @@ type sliceInflater struct { copyDist int } -func (f *sliceInflater) reset(src []byte, dict []byte) error { +func (f *sliceInflater) reset(src []byte) error { bits := f.bits codebits := f.codebits - dictState := f.dict + windowState := f.window *f = sliceInflater{ input: src, bits: bits, codebits: codebits, - dict: dictState, + window: windowState, step: (*sliceInflater).nextBlock, } - f.dict.init(maxMatchOffset, dict) + f.window.init(maxMatchOffset) return nil } @@ -103,9 +103,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + f.window.writeByte(byte(v)) + if f.window.availWrite() == 0 { + f.toRead = f.window.readFlush() f.step = (*sliceInflater).huffmanBlock f.stepState = stateInit return @@ -190,7 +190,7 @@ readLiteral: return } - if dist > f.dict.histSize() { + if dist > f.window.histSize() { f.err = CorruptInputError(f.roffset) return } @@ -201,14 +201,14 @@ readLiteral: copyHistory: { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := f.window.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = f.window.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if f.window.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.window.readFlush() f.step = (*sliceInflater).huffmanBlock f.stepState = stateDict return @@ -237,7 +237,7 @@ func (f *sliceInflater) dataBlock() { } if n == 0 { - f.toRead = f.dict.readFlush() + f.toRead = f.window.readFlush() f.finishBlock() return } @@ -252,9 +252,9 @@ func (f *sliceInflater) copyData() { f.finishBlock() return } - buf := f.dict.writeSlice() + buf := f.window.writeSlice() if len(buf) == 0 { - f.toRead = f.dict.readFlush() + f.toRead = f.window.readFlush() f.step = (*sliceInflater).copyData return } @@ -270,9 +270,9 @@ func (f *sliceInflater) copyData() { f.pos += n f.roffset += int64(n) f.copyLen -= n - f.dict.writeMark(n) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + f.window.writeMark(n) + if f.window.availWrite() == 0 { + f.toRead = f.window.readFlush() f.step = (*sliceInflater).copyData return } @@ -281,8 +281,8 @@ func (f *sliceInflater) copyData() { func (f *sliceInflater) finishBlock() { if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() + if f.window.availRead() > 0 { + f.toRead = f.window.readFlush() } f.err = io.EOF } diff --git a/internal/flatex/window_decoder.go b/internal/flatex/window_decoder.go new file mode 100644 index 00000000..492c6a96 --- /dev/null +++ b/internal/flatex/window_decoder.go @@ -0,0 +1,101 @@ +package flatex + +// windowDecoder implements the sliding window used in decompression. +type windowDecoder struct { + hist []byte + + wrPos int + rdPos int + full bool +} + +func (wd *windowDecoder) init(size int) { + *wd = windowDecoder{hist: wd.hist} + + if cap(wd.hist) < size { + wd.hist = make([]byte, size) + } + wd.hist = wd.hist[:size] + + wd.wrPos = 0 + wd.rdPos = 0 + wd.full = false +} + +func (wd *windowDecoder) histSize() int { + if wd.full { + return len(wd.hist) + } + return wd.wrPos +} + +func (wd *windowDecoder) availRead() int { + return wd.wrPos - wd.rdPos +} + +func (wd *windowDecoder) availWrite() int { + return len(wd.hist) - wd.wrPos +} + +func (wd *windowDecoder) writeSlice() []byte { + return wd.hist[wd.wrPos:] +} + +func (wd *windowDecoder) writeMark(cnt int) { + wd.wrPos += cnt +} + +func (wd *windowDecoder) writeByte(c byte) { + wd.hist[wd.wrPos] = c + wd.wrPos++ +} + +func (wd *windowDecoder) writeCopy(dist, length int) int { + dstBase := wd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := dstPos + length + if endPos > len(wd.hist) { + endPos = len(wd.hist) + } + + if srcPos < 0 { + srcPos += len(wd.hist) + dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:]) + srcPos = 0 + } + + for dstPos < endPos { + dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:dstPos]) + } + + wd.wrPos = dstPos + return dstPos - dstBase +} + +func (wd *windowDecoder) tryWriteCopy(dist, length int) int { + dstPos := wd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(wd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + for dstPos < endPos { + dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:dstPos]) + } + + wd.wrPos = dstPos + return dstPos - dstBase +} + +func (wd *windowDecoder) readFlush() []byte { + toRead := wd.hist[wd.rdPos:wd.wrPos] + wd.rdPos = wd.wrPos + if wd.wrPos == len(wd.hist) { + wd.wrPos, wd.rdPos = 0, 0 + wd.full = true + } + return toRead +} |
