diff options
| author | 2025-11-22 08:00:00 +0800 | |
|---|---|---|
| committer | 2025-11-22 08:00:00 +0800 | |
| commit | 9b453a7cca6bb258a8ca939dc9696fabd77b1b7c (patch) | |
| tree | 27f28df16a5f00f79021f9bd4d6d540435c9d2f8 /internal | |
| parent | flatex: Remove the stale readByte(s) wrappers and just directly index the buffer (diff) | |
| signature | No signature | |
zlib, flatex: Remove code related to dicts
Git never uses them
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/flatex/decompress_bytes.go | 22 | ||||
| -rw-r--r-- | internal/flatex/decompress_test.go | 52 | ||||
| -rw-r--r-- | internal/flatex/dict_decoder.go | 182 | ||||
| -rw-r--r-- | internal/flatex/inflate.go | 65 | ||||
| -rw-r--r-- | internal/flatex/slice_inflate.go | 42 | ||||
| -rw-r--r-- | internal/flatex/window_decoder.go | 101 | ||||
| -rw-r--r-- | internal/zlibx/decompress.go | 34 | ||||
| -rw-r--r-- | internal/zlibx/decompress_test.go | 49 | ||||
| -rw-r--r-- | internal/zlibx/reader.go | 38 |
9 files changed, 171 insertions, 414 deletions
diff --git a/internal/flatex/decompress_bytes.go b/internal/flatex/decompress_bytes.go index ce6d0558..0d95084c 100644 --- a/internal/flatex/decompress_bytes.go +++ b/internal/flatex/decompress_bytes.go @@ -7,8 +7,6 @@ import ( "git.sr.ht/~runxiyu/furgit/internal/bufpool" ) -// bufferDecompressor wraps the custom slice inflater so byte-slice -// decompressions avoid repeated allocations. type bufferDecompressor struct { inflater sliceInflater } @@ -23,27 +21,15 @@ var bufferDecompressorPool = sync.Pool{ }, } -// Decompress inflates the provided DEFLATE stream and returns the full output -// in a pooled bufpool.Buffer along with the number of consumed bytes from src. func Decompress(src []byte) (*bufpool.Buffer, int, error) { - return DecompressDictSized(src, nil, 0) + return DecompressSized(src, 0) } -// DecompressDict inflates the provided DEFLATE stream using dict as the preset -// dictionary and returns the full output in a pooled bufpool.Buffer. The second -// returned value reports how many bytes of src were consumed. -func DecompressDict(src []byte, dict []byte) (*bufpool.Buffer, int, error) { - return DecompressDictSized(src, dict, 0) -} - -// DecompressDictSized is like DecompressDict but allows providing an expected -// output size to pre-size the destination buffer and avoid repeated growth. -// A non-positive sizeHint falls back to the default buffer capacity. -func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer, int, error) { +func DecompressSized(src []byte, sizeHint int) (*bufpool.Buffer, int, error) { d := bufferDecompressorPool.Get().(*bufferDecompressor) defer bufferDecompressorPool.Put(d) - if err := d.inflater.reset(src, dict); err != nil { + if err := d.inflater.reset(src); err != nil { return nil, 0, err } @@ -65,7 +51,7 @@ func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer } d.inflater.step(&d.inflater) if d.inflater.err != nil && len(d.inflater.toRead) == 0 { - d.inflater.toRead = d.inflater.dict.readFlush() + d.inflater.toRead = d.inflater.window.readFlush() } } } diff --git a/internal/flatex/decompress_test.go b/internal/flatex/decompress_test.go index 7c290555..c991ea74 100644 --- a/internal/flatex/decompress_test.go +++ b/internal/flatex/decompress_test.go @@ -6,18 +6,10 @@ import ( "testing" ) -func compressDeflate(t *testing.T, payload, dict []byte) []byte { +func compressDeflate(t *testing.T, payload []byte) []byte { t.Helper() var buf bytes.Buffer - var ( - w *stdflate.Writer - err error - ) - if dict != nil { - w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict) - } else { - w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression) - } + w, err := stdflate.NewWriter(&buf, stdflate.DefaultCompression) if err != nil { t.Fatalf("NewWriter: %v", err) } @@ -32,7 +24,7 @@ func compressDeflate(t *testing.T, payload, dict []byte) []byte { func TestDecompress(t *testing.T) { payload := bytes.Repeat([]byte("golang"), 32) - compressed := compressDeflate(t, payload, nil) + compressed := compressDeflate(t, payload) out, _, err := Decompress(compressed) if err != nil { @@ -45,44 +37,14 @@ func TestDecompress(t *testing.T) { } } -func TestDecompressDict(t *testing.T) { - dict := []byte("furgit dictionary payload") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...) - - compressed := compressDeflate(t, payload, dict) - - out, _, err := DecompressDict(compressed, dict) - if err != nil { - t.Fatalf("DecompressDict: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload: got %q", out.Bytes()) - } -} - -func TestDecompressDictMissing(t *testing.T) { - dict := []byte("shared prefix to enforce dictionary usage") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" trailing data to force reference")...) - - compressed := compressDeflate(t, payload, dict) - - if _, _, err := Decompress(compressed); err == nil { - t.Fatalf("expected error when dictionary missing") - } -} - -func TestDecompressDictSizedUsesHint(t *testing.T) { +func TestDecompressSizedUsesHint(t *testing.T) { payload := []byte("short") - compressed := compressDeflate(t, payload, nil) + compressed := compressDeflate(t, payload) const hint = 1 << 19 - out, _, err := DecompressDictSized(compressed, nil, hint) + out, _, err := DecompressSized(compressed, hint) if err != nil { - t.Fatalf("DecompressDictSized: %v", err) + t.Fatalf("DecompressSized: %v", err) } defer out.Release() diff --git a/internal/flatex/dict_decoder.go b/internal/flatex/dict_decoder.go deleted file mode 100644 index 7a81e640..00000000 --- a/internal/flatex/dict_decoder.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flatex - -// dictDecoder implements the LZ77 sliding dictionary as used in decompression. -// LZ77 decompresses data through sequences of two forms of commands: -// -// - Literal insertions: Runs of one or more symbols are inserted into the data -// stream as is. This is accomplished through the writeByte method for a -// single symbol, or combinations of writeSlice/writeMark for multiple symbols. -// Any valid stream must start with a literal insertion if no preset dictionary -// is used. -// -// - Backward copies: Runs of one or more symbols are copied from previously -// emitted data. Backward copies come as the tuple (dist, length) where dist -// determines how far back in the stream to copy from and length determines how -// many bytes to copy. Note that it is valid for the length to be greater than -// the distance. Since LZ77 uses forward copies, that situation is used to -// perform a form of run-length encoding on repeated runs of symbols. -// The writeCopy and tryWriteCopy are used to implement this command. -// -// For performance reasons, this implementation performs little to no sanity -// checks about the arguments. As such, the invariants documented for each -// method call must be respected. -type dictDecoder struct { - hist []byte // Sliding window history - - // Invariant: 0 <= rdPos <= wrPos <= len(hist) - wrPos int // Current output position in buffer - rdPos int // Have emitted hist[:rdPos] already - full bool // Has a full window length been written yet? -} - -// init initializes dictDecoder to have a sliding window dictionary of the given -// size. If a preset dict is provided, it will initialize the dictionary with -// the contents of dict. -func (dd *dictDecoder) init(size int, dict []byte) { - *dd = dictDecoder{hist: dd.hist} - - if cap(dd.hist) < size { - dd.hist = make([]byte, size) - } - dd.hist = dd.hist[:size] - - if len(dict) > len(dd.hist) { - dict = dict[len(dict)-len(dd.hist):] - } - dd.wrPos = copy(dd.hist, dict) - if dd.wrPos == len(dd.hist) { - dd.wrPos = 0 - dd.full = true - } - dd.rdPos = dd.wrPos -} - -// histSize reports the total amount of historical data in the dictionary. -func (dd *dictDecoder) histSize() int { - if dd.full { - return len(dd.hist) - } - return dd.wrPos -} - -// availRead reports the number of bytes that can be flushed by readFlush. -func (dd *dictDecoder) availRead() int { - return dd.wrPos - dd.rdPos -} - -// availWrite reports the available amount of output buffer space. -func (dd *dictDecoder) availWrite() int { - return len(dd.hist) - dd.wrPos -} - -// writeSlice returns a slice of the available buffer to write data to. -// -// This invariant will be kept: len(s) <= availWrite() -func (dd *dictDecoder) writeSlice() []byte { - return dd.hist[dd.wrPos:] -} - -// writeMark advances the writer pointer by cnt. -// -// This invariant must be kept: 0 <= cnt <= availWrite() -func (dd *dictDecoder) writeMark(cnt int) { - dd.wrPos += cnt -} - -// writeByte writes a single byte to the dictionary. -// -// This invariant must be kept: 0 < availWrite() -func (dd *dictDecoder) writeByte(c byte) { - dd.hist[dd.wrPos] = c - dd.wrPos++ -} - -// writeCopy copies a string at a given (dist, length) to the output. -// This returns the number of bytes copied and may be less than the requested -// length if the available space in the output buffer is too small. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) writeCopy(dist, length int) int { - dstBase := dd.wrPos - dstPos := dstBase - srcPos := dstPos - dist - endPos := dstPos + length - if endPos > len(dd.hist) { - endPos = len(dd.hist) - } - - // Copy non-overlapping section after destination position. - // - // This section is non-overlapping in that the copy length for this section - // is always less than or equal to the backwards distance. This can occur - // if a distance refers to data that wraps-around in the buffer. - // Thus, a backwards copy is performed here; that is, the exact bytes in - // the source prior to the copy is placed in the destination. - if srcPos < 0 { - srcPos += len(dd.hist) - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) - srcPos = 0 - } - - // Copy possibly overlapping section before destination position. - // - // This section can overlap if the copy length for this section is larger - // than the backwards distance. This is allowed by LZ77 so that repeated - // strings can be succinctly represented using (dist, length) pairs. - // Thus, a forwards copy is performed here; that is, the bytes copied is - // possibly dependent on the resulting bytes in the destination as the copy - // progresses along. This is functionally equivalent to the following: - // - // for i := 0; i < endPos-dstPos; i++ { - // dd.hist[dstPos+i] = dd.hist[srcPos+i] - // } - // dstPos = endPos - // - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// tryWriteCopy tries to copy a string at a given (distance, length) to the -// output. This specialized version is optimized for short distances. -// -// This method is designed to be inlined for performance reasons. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) tryWriteCopy(dist, length int) int { - dstPos := dd.wrPos - endPos := dstPos + length - if dstPos < dist || endPos > len(dd.hist) { - return 0 - } - dstBase := dstPos - srcPos := dstPos - dist - - // Copy possibly overlapping section before destination position. - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// readFlush returns a slice of the historical buffer that is ready to be -// emitted to the user. The data returned by readFlush must be fully consumed -// before calling any other dictDecoder methods. -func (dd *dictDecoder) readFlush() []byte { - toRead := dd.hist[dd.rdPos:dd.wrPos] - dd.rdPos = dd.wrPos - if dd.wrPos == len(dd.hist) { - dd.wrPos, dd.rdPos = 0, 0 - dd.full = true - } - return toRead -} diff --git a/internal/flatex/inflate.go b/internal/flatex/inflate.go index ca85b355..1d32e8cd 100644 --- a/internal/flatex/inflate.go +++ b/internal/flatex/inflate.go @@ -70,13 +70,13 @@ func (e *WriteError) Error() string { return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() } -// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict] +// Resetter resets a ReadCloser returned by [NewReader] // to switch to a new underlying [Reader]. This permits reusing a ReadCloser // instead of allocating a new one. type Resetter interface { // Reset discards any buffered data and resets the Resetter as if it was // newly initialized with the given reader. - Reset(r io.Reader, dict []byte) error + Reset(r io.Reader) error } // The data structure for decoding Huffman tables is based on that of @@ -287,7 +287,7 @@ type decompressor struct { codebits *[numCodes]int // Output history, buffer. - dict dictDecoder + window windowDecoder // Temporary buffer (avoids repeated allocation). buf [4]byte @@ -352,7 +352,7 @@ func (f *decompressor) Read(b []byte) (int, error) { } f.step(f) if f.err != nil && len(f.toRead) == 0 { - f.toRead = f.dict.readFlush() // Flush what's left in case of error + f.toRead = f.window.readFlush() } } } @@ -506,9 +506,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + f.window.writeByte(byte(v)) + if f.window.availWrite() == 0 { + f.toRead = f.window.readFlush() f.step = (*decompressor).huffmanBlock f.stepState = stateInit return @@ -596,7 +596,7 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > f.dict.histSize() { + if dist > f.window.histSize() { f.err = CorruptInputError(f.roffset) return } @@ -608,14 +608,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := f.window.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = f.window.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if f.window.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.window.readFlush() f.step = (*decompressor).huffmanBlock // We need to continue this work f.stepState = stateDict return @@ -646,7 +646,7 @@ func (f *decompressor) dataBlock() { } if n == 0 { - f.toRead = f.dict.readFlush() + f.toRead = f.window.readFlush() f.finishBlock() return } @@ -658,7 +658,7 @@ func (f *decompressor) dataBlock() { // copyData copies f.copyLen bytes from the underlying reader into f.hist. // It pauses for reads when f.hist is full. func (f *decompressor) copyData() { - buf := f.dict.writeSlice() + buf := f.window.writeSlice() if len(buf) > f.copyLen { buf = buf[:f.copyLen] } @@ -666,14 +666,14 @@ func (f *decompressor) copyData() { cnt, err := io.ReadFull(f.r, buf) f.roffset += int64(cnt) f.copyLen -= cnt - f.dict.writeMark(cnt) + f.window.writeMark(cnt) if err != nil { f.err = noEOF(err) return } - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if f.window.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.window.readFlush() f.step = (*decompressor).copyData return } @@ -682,8 +682,8 @@ func (f *decompressor) copyData() { func (f *decompressor) finishBlock() { if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() + if f.window.availRead() > 0 { + f.toRead = f.window.readFlush() } f.err = io.EOF } @@ -788,16 +788,16 @@ func fixedHuffmanDecoderInit() { }) } -func (f *decompressor) Reset(r io.Reader, dict []byte) error { +func (f *decompressor) Reset(r io.Reader) error { *f = decompressor{ rBuf: f.rBuf, bits: f.bits, codebits: f.codebits, - dict: f.dict, + window: f.window, step: (*decompressor).nextBlock, } f.makeReader(r) - f.dict.init(maxMatchOffset, dict) + f.window.init(maxMatchOffset) return nil } @@ -817,25 +817,6 @@ func NewReader(r io.Reader) io.ReadCloser { f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, nil) - return &f -} - -// NewReaderDict is like [NewReader] but initializes the reader -// with a preset dictionary. The returned reader behaves as if -// the uncompressed data stream started with the given dictionary, -// which has already been read. NewReaderDict is typically used -// to read data compressed by [NewWriterDict]. -// -// The ReadCloser returned by NewReaderDict also implements [Resetter]. -func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, dict) + f.window.init(maxMatchOffset) return &f } diff --git a/internal/flatex/slice_inflate.go b/internal/flatex/slice_inflate.go index d16a6441..3e07e744 100644 --- a/internal/flatex/slice_inflate.go +++ b/internal/flatex/slice_inflate.go @@ -21,7 +21,7 @@ type sliceInflater struct { bits *[maxNumLit + maxNumDist]int codebits *[numCodes]int - dict dictDecoder + window windowDecoder toRead []byte step func(*sliceInflater) @@ -33,18 +33,18 @@ type sliceInflater struct { copyDist int } -func (f *sliceInflater) reset(src []byte, dict []byte) error { +func (f *sliceInflater) reset(src []byte) error { bits := f.bits codebits := f.codebits - dictState := f.dict + windowState := f.window *f = sliceInflater{ input: src, bits: bits, codebits: codebits, - dict: dictState, + window: windowState, step: (*sliceInflater).nextBlock, } - f.dict.init(maxMatchOffset, dict) + f.window.init(maxMatchOffset) return nil } @@ -103,9 +103,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + f.window.writeByte(byte(v)) + if f.window.availWrite() == 0 { + f.toRead = f.window.readFlush() f.step = (*sliceInflater).huffmanBlock f.stepState = stateInit return @@ -190,7 +190,7 @@ readLiteral: return } - if dist > f.dict.histSize() { + if dist > f.window.histSize() { f.err = CorruptInputError(f.roffset) return } @@ -201,14 +201,14 @@ readLiteral: copyHistory: { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := f.window.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = f.window.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if f.window.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.window.readFlush() f.step = (*sliceInflater).huffmanBlock f.stepState = stateDict return @@ -237,7 +237,7 @@ func (f *sliceInflater) dataBlock() { } if n == 0 { - f.toRead = f.dict.readFlush() + f.toRead = f.window.readFlush() f.finishBlock() return } @@ -252,9 +252,9 @@ func (f *sliceInflater) copyData() { f.finishBlock() return } - buf := f.dict.writeSlice() + buf := f.window.writeSlice() if len(buf) == 0 { - f.toRead = f.dict.readFlush() + f.toRead = f.window.readFlush() f.step = (*sliceInflater).copyData return } @@ -270,9 +270,9 @@ func (f *sliceInflater) copyData() { f.pos += n f.roffset += int64(n) f.copyLen -= n - f.dict.writeMark(n) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + f.window.writeMark(n) + if f.window.availWrite() == 0 { + f.toRead = f.window.readFlush() f.step = (*sliceInflater).copyData return } @@ -281,8 +281,8 @@ func (f *sliceInflater) copyData() { func (f *sliceInflater) finishBlock() { if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() + if f.window.availRead() > 0 { + f.toRead = f.window.readFlush() } f.err = io.EOF } diff --git a/internal/flatex/window_decoder.go b/internal/flatex/window_decoder.go new file mode 100644 index 00000000..492c6a96 --- /dev/null +++ b/internal/flatex/window_decoder.go @@ -0,0 +1,101 @@ +package flatex + +// windowDecoder implements the sliding window used in decompression. +type windowDecoder struct { + hist []byte + + wrPos int + rdPos int + full bool +} + +func (wd *windowDecoder) init(size int) { + *wd = windowDecoder{hist: wd.hist} + + if cap(wd.hist) < size { + wd.hist = make([]byte, size) + } + wd.hist = wd.hist[:size] + + wd.wrPos = 0 + wd.rdPos = 0 + wd.full = false +} + +func (wd *windowDecoder) histSize() int { + if wd.full { + return len(wd.hist) + } + return wd.wrPos +} + +func (wd *windowDecoder) availRead() int { + return wd.wrPos - wd.rdPos +} + +func (wd *windowDecoder) availWrite() int { + return len(wd.hist) - wd.wrPos +} + +func (wd *windowDecoder) writeSlice() []byte { + return wd.hist[wd.wrPos:] +} + +func (wd *windowDecoder) writeMark(cnt int) { + wd.wrPos += cnt +} + +func (wd *windowDecoder) writeByte(c byte) { + wd.hist[wd.wrPos] = c + wd.wrPos++ +} + +func (wd *windowDecoder) writeCopy(dist, length int) int { + dstBase := wd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := dstPos + length + if endPos > len(wd.hist) { + endPos = len(wd.hist) + } + + if srcPos < 0 { + srcPos += len(wd.hist) + dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:]) + srcPos = 0 + } + + for dstPos < endPos { + dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:dstPos]) + } + + wd.wrPos = dstPos + return dstPos - dstBase +} + +func (wd *windowDecoder) tryWriteCopy(dist, length int) int { + dstPos := wd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(wd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + for dstPos < endPos { + dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:dstPos]) + } + + wd.wrPos = dstPos + return dstPos - dstBase +} + +func (wd *windowDecoder) readFlush() []byte { + toRead := wd.hist[wd.rdPos:wd.wrPos] + wd.rdPos = wd.wrPos + if wd.wrPos == len(wd.hist) { + wd.wrPos, wd.rdPos = 0, 0 + wd.full = true + } + return toRead +} diff --git a/internal/zlibx/decompress.go b/internal/zlibx/decompress.go index 68a92587..df4a34be 100644 --- a/internal/zlibx/decompress.go +++ b/internal/zlibx/decompress.go @@ -9,28 +9,11 @@ import ( "git.sr.ht/~runxiyu/furgit/internal/flatex" ) -// Decompress inflates the provided zlib wrapped stream and returns the -// uncompressed data inside a pooled bufpool.Buffer. func Decompress(src []byte) (*bufpool.Buffer, error) { return DecompressSized(src, 0) } -// DecompressSized inflates the provided zlib stream, using sizeHint to -// preallocate the output buffer when known (e.g. packfile entries). func DecompressSized(src []byte, sizeHint int) (*bufpool.Buffer, error) { - return DecompressDictSized(src, nil, sizeHint) -} - -// DecompressDict is like Decompress but accepts a preset dictionary. The -// dictionary must match the checksum embedded in the stream if the dictionary -// flag is present. -func DecompressDict(src []byte, dict []byte) (*bufpool.Buffer, error) { - return DecompressDictSized(src, dict, 0) -} - -// DecompressDictSized is like DecompressDict but allows providing an expected -// uncompressed size to avoid buffer growth copies. -func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer, error) { if len(src) < 6 { return nil, io.ErrUnexpectedEOF } @@ -42,19 +25,8 @@ func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer } offset := 2 - haveDict := flg&0x20 != 0 - if haveDict { - if len(src) < offset+4 { - return nil, io.ErrUnexpectedEOF - } - if dict == nil { - return nil, ErrDictionary - } - checksum := binary.BigEndian.Uint32(src[offset : offset+4]) - if checksum != adler32.Checksum(dict) { - return nil, ErrDictionary - } - offset += 4 + if flg&0x20 != 0 { + return nil, ErrHeader } if len(src[offset:]) < 4 { @@ -62,7 +34,7 @@ func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer } deflateData := src[offset:] - out, consumed, err := flatex.DecompressDictSized(deflateData, dict, sizeHint) + out, consumed, err := flatex.DecompressSized(deflateData, sizeHint) if err != nil { return nil, err } diff --git a/internal/zlibx/decompress_test.go b/internal/zlibx/decompress_test.go index 3dfc07a5..8dcc6c02 100644 --- a/internal/zlibx/decompress_test.go +++ b/internal/zlibx/decompress_test.go @@ -6,21 +6,10 @@ import ( "testing" ) -func compressZlib(t *testing.T, payload, dict []byte) []byte { +func compressZlib(t *testing.T, payload []byte) []byte { t.Helper() var buf bytes.Buffer - var ( - w *stdzlib.Writer - err error - ) - if dict != nil { - w, err = stdzlib.NewWriterLevelDict(&buf, stdzlib.DefaultCompression, dict) - } else { - w = stdzlib.NewWriter(&buf) - } - if err != nil { - t.Fatalf("NewWriter: %v", err) - } + w := stdzlib.NewWriter(&buf) if _, err := w.Write(payload); err != nil { t.Fatalf("Write: %v", err) } @@ -32,7 +21,7 @@ func compressZlib(t *testing.T, payload, dict []byte) []byte { func TestDecompress(t *testing.T) { payload := []byte("hello, zlib world!") - compressed := compressZlib(t, payload, nil) + compressed := compressZlib(t, payload) out, err := Decompress(compressed) if err != nil { @@ -45,37 +34,9 @@ func TestDecompress(t *testing.T) { } } -func TestDecompressDict(t *testing.T) { - dict := []byte("git dictionary for zlib") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" -- extended body -- extended body")...) - compressed := compressZlib(t, payload, dict) - - out, err := DecompressDict(compressed, dict) - if err != nil { - t.Fatalf("DecompressDict: %v", err) - } - defer out.Release() - - if !bytes.Equal(out.Bytes(), payload) { - t.Fatalf("unexpected payload %q", out.Bytes()) - } -} - -func TestDecompressDictMissing(t *testing.T) { - dict := []byte("preset dictionary") - payload := append([]byte(nil), dict...) - payload = append(payload, []byte(" .. more data ..")...) - compressed := compressZlib(t, payload, dict) - - if _, err := Decompress(compressed); err != ErrDictionary { - t.Fatalf("expected ErrDictionary, got %v", err) - } -} - func TestDecompressChecksumError(t *testing.T) { payload := []byte("checksum check") - compressed := compressZlib(t, payload, nil) + compressed := compressZlib(t, payload) compressed[len(compressed)-1] ^= 0xff if _, err := Decompress(compressed); err != ErrChecksum { @@ -85,7 +46,7 @@ func TestDecompressChecksumError(t *testing.T) { func TestDecompressSizedUsesHint(t *testing.T) { payload := []byte("tiny payload") - compressed := compressZlib(t, payload, nil) + compressed := compressZlib(t, payload) const hint = 1 << 20 out, err := DecompressSized(compressed, hint) diff --git a/internal/zlibx/reader.go b/internal/zlibx/reader.go index 9a4b4315..6e3a53ea 100644 --- a/internal/zlibx/reader.go +++ b/internal/zlibx/reader.go @@ -55,8 +55,6 @@ const ( var ( // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. ErrChecksum = errors.New("zlib: invalid checksum") - // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary. - ErrDictionary = errors.New("zlib: invalid dictionary") // ErrHeader is returned when reading ZLIB data that has an invalid header. ErrHeader = errors.New("zlib: invalid header") ) @@ -82,19 +80,12 @@ type reader struct { // data than necessary from r. // It is the caller's responsibility to call Close on the ReadCloser when done. func NewReader(r io.Reader) (io.ReadCloser, error) { - return NewReaderDict(r, nil) -} - -// NewReaderDict is like [NewReader] but uses a preset dictionary. -// NewReaderDict ignores the dictionary if the compressed data does not refer to it. -// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary]. -func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) { v := pool.Get() z, ok := v.(*reader) if !ok { panic("zlib: pool returned unexpected type") } - err := z.Reset(r, dict) + err := z.Reset(r) if err != nil { return nil, err } @@ -147,7 +138,7 @@ func (z *reader) Close() error { return nil } -func (z *reader) Reset(r io.Reader, dict []byte) error { +func (z *reader) Reset(r io.Reader) error { *z = reader{decompressor: z.decompressor} if fr, ok := r.(flatex.Reader); ok { z.r = fr @@ -168,30 +159,15 @@ func (z *reader) Reset(r io.Reader, dict []byte) error { z.err = ErrHeader return z.err } - haveDict := z.scratch[1]&0x20 != 0 - if haveDict { - _, z.err = io.ReadFull(z.r, z.scratch[0:4]) - if z.err != nil { - if z.err == io.EOF { - z.err = io.ErrUnexpectedEOF - } - return z.err - } - checksum := binary.BigEndian.Uint32(z.scratch[:4]) - if checksum != adler32.Checksum(dict) { - z.err = ErrDictionary - return z.err - } + if z.scratch[1]&0x20 != 0 { + z.err = ErrHeader + return z.err } if z.decompressor == nil { - if haveDict { - z.decompressor = flatex.NewReaderDict(z.r, dict) - } else { - z.decompressor = flatex.NewReader(z.r) - } + z.decompressor = flatex.NewReader(z.r) } else { - z.err = z.decompressor.(flatex.Resetter).Reset(z.r, dict) + z.err = z.decompressor.(flatex.Resetter).Reset(z.r) if z.err != nil { return z.err } |
