aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--internal/flatex/decompress_bytes.go22
-rw-r--r--internal/flatex/decompress_test.go52
-rw-r--r--internal/flatex/dict_decoder.go182
-rw-r--r--internal/flatex/inflate.go65
-rw-r--r--internal/flatex/slice_inflate.go42
-rw-r--r--internal/flatex/window_decoder.go101
-rw-r--r--internal/zlibx/decompress.go34
-rw-r--r--internal/zlibx/decompress_test.go49
-rw-r--r--internal/zlibx/reader.go38
9 files changed, 171 insertions, 414 deletions
diff --git a/internal/flatex/decompress_bytes.go b/internal/flatex/decompress_bytes.go
index ce6d0558..0d95084c 100644
--- a/internal/flatex/decompress_bytes.go
+++ b/internal/flatex/decompress_bytes.go
@@ -7,8 +7,6 @@ import (
"git.sr.ht/~runxiyu/furgit/internal/bufpool"
)
-// bufferDecompressor wraps the custom slice inflater so byte-slice
-// decompressions avoid repeated allocations.
type bufferDecompressor struct {
inflater sliceInflater
}
@@ -23,27 +21,15 @@ var bufferDecompressorPool = sync.Pool{
},
}
-// Decompress inflates the provided DEFLATE stream and returns the full output
-// in a pooled bufpool.Buffer along with the number of consumed bytes from src.
func Decompress(src []byte) (*bufpool.Buffer, int, error) {
- return DecompressDictSized(src, nil, 0)
+ return DecompressSized(src, 0)
}
-// DecompressDict inflates the provided DEFLATE stream using dict as the preset
-// dictionary and returns the full output in a pooled bufpool.Buffer. The second
-// returned value reports how many bytes of src were consumed.
-func DecompressDict(src []byte, dict []byte) (*bufpool.Buffer, int, error) {
- return DecompressDictSized(src, dict, 0)
-}
-
-// DecompressDictSized is like DecompressDict but allows providing an expected
-// output size to pre-size the destination buffer and avoid repeated growth.
-// A non-positive sizeHint falls back to the default buffer capacity.
-func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer, int, error) {
+func DecompressSized(src []byte, sizeHint int) (*bufpool.Buffer, int, error) {
d := bufferDecompressorPool.Get().(*bufferDecompressor)
defer bufferDecompressorPool.Put(d)
- if err := d.inflater.reset(src, dict); err != nil {
+ if err := d.inflater.reset(src); err != nil {
return nil, 0, err
}
@@ -65,7 +51,7 @@ func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer
}
d.inflater.step(&d.inflater)
if d.inflater.err != nil && len(d.inflater.toRead) == 0 {
- d.inflater.toRead = d.inflater.dict.readFlush()
+ d.inflater.toRead = d.inflater.window.readFlush()
}
}
}
diff --git a/internal/flatex/decompress_test.go b/internal/flatex/decompress_test.go
index 7c290555..c991ea74 100644
--- a/internal/flatex/decompress_test.go
+++ b/internal/flatex/decompress_test.go
@@ -6,18 +6,10 @@ import (
"testing"
)
-func compressDeflate(t *testing.T, payload, dict []byte) []byte {
+func compressDeflate(t *testing.T, payload []byte) []byte {
t.Helper()
var buf bytes.Buffer
- var (
- w *stdflate.Writer
- err error
- )
- if dict != nil {
- w, err = stdflate.NewWriterDict(&buf, stdflate.DefaultCompression, dict)
- } else {
- w, err = stdflate.NewWriter(&buf, stdflate.DefaultCompression)
- }
+ w, err := stdflate.NewWriter(&buf, stdflate.DefaultCompression)
if err != nil {
t.Fatalf("NewWriter: %v", err)
}
@@ -32,7 +24,7 @@ func compressDeflate(t *testing.T, payload, dict []byte) []byte {
func TestDecompress(t *testing.T) {
payload := bytes.Repeat([]byte("golang"), 32)
- compressed := compressDeflate(t, payload, nil)
+ compressed := compressDeflate(t, payload)
out, _, err := Decompress(compressed)
if err != nil {
@@ -45,44 +37,14 @@ func TestDecompress(t *testing.T) {
}
}
-func TestDecompressDict(t *testing.T) {
- dict := []byte("furgit dictionary payload")
- payload := append([]byte(nil), dict...)
- payload = append(payload, []byte(" -- and some more data repeated -- and some more data repeated")...)
-
- compressed := compressDeflate(t, payload, dict)
-
- out, _, err := DecompressDict(compressed, dict)
- if err != nil {
- t.Fatalf("DecompressDict: %v", err)
- }
- defer out.Release()
-
- if !bytes.Equal(out.Bytes(), payload) {
- t.Fatalf("unexpected payload: got %q", out.Bytes())
- }
-}
-
-func TestDecompressDictMissing(t *testing.T) {
- dict := []byte("shared prefix to enforce dictionary usage")
- payload := append([]byte(nil), dict...)
- payload = append(payload, []byte(" trailing data to force reference")...)
-
- compressed := compressDeflate(t, payload, dict)
-
- if _, _, err := Decompress(compressed); err == nil {
- t.Fatalf("expected error when dictionary missing")
- }
-}
-
-func TestDecompressDictSizedUsesHint(t *testing.T) {
+func TestDecompressSizedUsesHint(t *testing.T) {
payload := []byte("short")
- compressed := compressDeflate(t, payload, nil)
+ compressed := compressDeflate(t, payload)
const hint = 1 << 19
- out, _, err := DecompressDictSized(compressed, nil, hint)
+ out, _, err := DecompressSized(compressed, hint)
if err != nil {
- t.Fatalf("DecompressDictSized: %v", err)
+ t.Fatalf("DecompressSized: %v", err)
}
defer out.Release()
diff --git a/internal/flatex/dict_decoder.go b/internal/flatex/dict_decoder.go
deleted file mode 100644
index 7a81e640..00000000
--- a/internal/flatex/dict_decoder.go
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package flatex
-
-// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
-// LZ77 decompresses data through sequences of two forms of commands:
-//
-// - Literal insertions: Runs of one or more symbols are inserted into the data
-// stream as is. This is accomplished through the writeByte method for a
-// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
-// Any valid stream must start with a literal insertion if no preset dictionary
-// is used.
-//
-// - Backward copies: Runs of one or more symbols are copied from previously
-// emitted data. Backward copies come as the tuple (dist, length) where dist
-// determines how far back in the stream to copy from and length determines how
-// many bytes to copy. Note that it is valid for the length to be greater than
-// the distance. Since LZ77 uses forward copies, that situation is used to
-// perform a form of run-length encoding on repeated runs of symbols.
-// The writeCopy and tryWriteCopy are used to implement this command.
-//
-// For performance reasons, this implementation performs little to no sanity
-// checks about the arguments. As such, the invariants documented for each
-// method call must be respected.
-type dictDecoder struct {
- hist []byte // Sliding window history
-
- // Invariant: 0 <= rdPos <= wrPos <= len(hist)
- wrPos int // Current output position in buffer
- rdPos int // Have emitted hist[:rdPos] already
- full bool // Has a full window length been written yet?
-}
-
-// init initializes dictDecoder to have a sliding window dictionary of the given
-// size. If a preset dict is provided, it will initialize the dictionary with
-// the contents of dict.
-func (dd *dictDecoder) init(size int, dict []byte) {
- *dd = dictDecoder{hist: dd.hist}
-
- if cap(dd.hist) < size {
- dd.hist = make([]byte, size)
- }
- dd.hist = dd.hist[:size]
-
- if len(dict) > len(dd.hist) {
- dict = dict[len(dict)-len(dd.hist):]
- }
- dd.wrPos = copy(dd.hist, dict)
- if dd.wrPos == len(dd.hist) {
- dd.wrPos = 0
- dd.full = true
- }
- dd.rdPos = dd.wrPos
-}
-
-// histSize reports the total amount of historical data in the dictionary.
-func (dd *dictDecoder) histSize() int {
- if dd.full {
- return len(dd.hist)
- }
- return dd.wrPos
-}
-
-// availRead reports the number of bytes that can be flushed by readFlush.
-func (dd *dictDecoder) availRead() int {
- return dd.wrPos - dd.rdPos
-}
-
-// availWrite reports the available amount of output buffer space.
-func (dd *dictDecoder) availWrite() int {
- return len(dd.hist) - dd.wrPos
-}
-
-// writeSlice returns a slice of the available buffer to write data to.
-//
-// This invariant will be kept: len(s) <= availWrite()
-func (dd *dictDecoder) writeSlice() []byte {
- return dd.hist[dd.wrPos:]
-}
-
-// writeMark advances the writer pointer by cnt.
-//
-// This invariant must be kept: 0 <= cnt <= availWrite()
-func (dd *dictDecoder) writeMark(cnt int) {
- dd.wrPos += cnt
-}
-
-// writeByte writes a single byte to the dictionary.
-//
-// This invariant must be kept: 0 < availWrite()
-func (dd *dictDecoder) writeByte(c byte) {
- dd.hist[dd.wrPos] = c
- dd.wrPos++
-}
-
-// writeCopy copies a string at a given (dist, length) to the output.
-// This returns the number of bytes copied and may be less than the requested
-// length if the available space in the output buffer is too small.
-//
-// This invariant must be kept: 0 < dist <= histSize()
-func (dd *dictDecoder) writeCopy(dist, length int) int {
- dstBase := dd.wrPos
- dstPos := dstBase
- srcPos := dstPos - dist
- endPos := dstPos + length
- if endPos > len(dd.hist) {
- endPos = len(dd.hist)
- }
-
- // Copy non-overlapping section after destination position.
- //
- // This section is non-overlapping in that the copy length for this section
- // is always less than or equal to the backwards distance. This can occur
- // if a distance refers to data that wraps-around in the buffer.
- // Thus, a backwards copy is performed here; that is, the exact bytes in
- // the source prior to the copy is placed in the destination.
- if srcPos < 0 {
- srcPos += len(dd.hist)
- dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
- srcPos = 0
- }
-
- // Copy possibly overlapping section before destination position.
- //
- // This section can overlap if the copy length for this section is larger
- // than the backwards distance. This is allowed by LZ77 so that repeated
- // strings can be succinctly represented using (dist, length) pairs.
- // Thus, a forwards copy is performed here; that is, the bytes copied is
- // possibly dependent on the resulting bytes in the destination as the copy
- // progresses along. This is functionally equivalent to the following:
- //
- // for i := 0; i < endPos-dstPos; i++ {
- // dd.hist[dstPos+i] = dd.hist[srcPos+i]
- // }
- // dstPos = endPos
- //
- for dstPos < endPos {
- dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
- }
-
- dd.wrPos = dstPos
- return dstPos - dstBase
-}
-
-// tryWriteCopy tries to copy a string at a given (distance, length) to the
-// output. This specialized version is optimized for short distances.
-//
-// This method is designed to be inlined for performance reasons.
-//
-// This invariant must be kept: 0 < dist <= histSize()
-func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
- dstPos := dd.wrPos
- endPos := dstPos + length
- if dstPos < dist || endPos > len(dd.hist) {
- return 0
- }
- dstBase := dstPos
- srcPos := dstPos - dist
-
- // Copy possibly overlapping section before destination position.
- for dstPos < endPos {
- dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
- }
-
- dd.wrPos = dstPos
- return dstPos - dstBase
-}
-
-// readFlush returns a slice of the historical buffer that is ready to be
-// emitted to the user. The data returned by readFlush must be fully consumed
-// before calling any other dictDecoder methods.
-func (dd *dictDecoder) readFlush() []byte {
- toRead := dd.hist[dd.rdPos:dd.wrPos]
- dd.rdPos = dd.wrPos
- if dd.wrPos == len(dd.hist) {
- dd.wrPos, dd.rdPos = 0, 0
- dd.full = true
- }
- return toRead
-}
diff --git a/internal/flatex/inflate.go b/internal/flatex/inflate.go
index ca85b355..1d32e8cd 100644
--- a/internal/flatex/inflate.go
+++ b/internal/flatex/inflate.go
@@ -70,13 +70,13 @@ func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
-// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
+// Resetter resets a ReadCloser returned by [NewReader]
// to switch to a new underlying [Reader]. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
- Reset(r io.Reader, dict []byte) error
+ Reset(r io.Reader) error
}
// The data structure for decoding Huffman tables is based on that of
@@ -287,7 +287,7 @@ type decompressor struct {
codebits *[numCodes]int
// Output history, buffer.
- dict dictDecoder
+ window windowDecoder
// Temporary buffer (avoids repeated allocation).
buf [4]byte
@@ -352,7 +352,7 @@ func (f *decompressor) Read(b []byte) (int, error) {
}
f.step(f)
if f.err != nil && len(f.toRead) == 0 {
- f.toRead = f.dict.readFlush() // Flush what's left in case of error
+ f.toRead = f.window.readFlush()
}
}
}
@@ -506,9 +506,9 @@ readLiteral:
var length int
switch {
case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
+ f.window.writeByte(byte(v))
+ if f.window.availWrite() == 0 {
+ f.toRead = f.window.readFlush()
f.step = (*decompressor).huffmanBlock
f.stepState = stateInit
return
@@ -596,7 +596,7 @@ readLiteral:
}
// No check on length; encoding can be prescient.
- if dist > f.dict.histSize() {
+ if dist > f.window.histSize() {
f.err = CorruptInputError(f.roffset)
return
}
@@ -608,14 +608,14 @@ readLiteral:
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+ cnt := f.window.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+ cnt = f.window.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
+ if f.window.availWrite() == 0 || f.copyLen > 0 {
+ f.toRead = f.window.readFlush()
f.step = (*decompressor).huffmanBlock // We need to continue this work
f.stepState = stateDict
return
@@ -646,7 +646,7 @@ func (f *decompressor) dataBlock() {
}
if n == 0 {
- f.toRead = f.dict.readFlush()
+ f.toRead = f.window.readFlush()
f.finishBlock()
return
}
@@ -658,7 +658,7 @@ func (f *decompressor) dataBlock() {
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
- buf := f.dict.writeSlice()
+ buf := f.window.writeSlice()
if len(buf) > f.copyLen {
buf = buf[:f.copyLen]
}
@@ -666,14 +666,14 @@ func (f *decompressor) copyData() {
cnt, err := io.ReadFull(f.r, buf)
f.roffset += int64(cnt)
f.copyLen -= cnt
- f.dict.writeMark(cnt)
+ f.window.writeMark(cnt)
if err != nil {
f.err = noEOF(err)
return
}
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
+ if f.window.availWrite() == 0 || f.copyLen > 0 {
+ f.toRead = f.window.readFlush()
f.step = (*decompressor).copyData
return
}
@@ -682,8 +682,8 @@ func (f *decompressor) copyData() {
func (f *decompressor) finishBlock() {
if f.final {
- if f.dict.availRead() > 0 {
- f.toRead = f.dict.readFlush()
+ if f.window.availRead() > 0 {
+ f.toRead = f.window.readFlush()
}
f.err = io.EOF
}
@@ -788,16 +788,16 @@ func fixedHuffmanDecoderInit() {
})
}
-func (f *decompressor) Reset(r io.Reader, dict []byte) error {
+func (f *decompressor) Reset(r io.Reader) error {
*f = decompressor{
rBuf: f.rBuf,
bits: f.bits,
codebits: f.codebits,
- dict: f.dict,
+ window: f.window,
step: (*decompressor).nextBlock,
}
f.makeReader(r)
- f.dict.init(maxMatchOffset, dict)
+ f.window.init(maxMatchOffset)
return nil
}
@@ -817,25 +817,6 @@ func NewReader(r io.Reader) io.ReadCloser {
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
- f.dict.init(maxMatchOffset, nil)
- return &f
-}
-
-// NewReaderDict is like [NewReader] but initializes the reader
-// with a preset dictionary. The returned reader behaves as if
-// the uncompressed data stream started with the given dictionary,
-// which has already been read. NewReaderDict is typically used
-// to read data compressed by [NewWriterDict].
-//
-// The ReadCloser returned by NewReaderDict also implements [Resetter].
-func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
- fixedHuffmanDecoderInit()
-
- var f decompressor
- f.makeReader(r)
- f.bits = new([maxNumLit + maxNumDist]int)
- f.codebits = new([numCodes]int)
- f.step = (*decompressor).nextBlock
- f.dict.init(maxMatchOffset, dict)
+ f.window.init(maxMatchOffset)
return &f
}
diff --git a/internal/flatex/slice_inflate.go b/internal/flatex/slice_inflate.go
index d16a6441..3e07e744 100644
--- a/internal/flatex/slice_inflate.go
+++ b/internal/flatex/slice_inflate.go
@@ -21,7 +21,7 @@ type sliceInflater struct {
bits *[maxNumLit + maxNumDist]int
codebits *[numCodes]int
- dict dictDecoder
+ window windowDecoder
toRead []byte
step func(*sliceInflater)
@@ -33,18 +33,18 @@ type sliceInflater struct {
copyDist int
}
-func (f *sliceInflater) reset(src []byte, dict []byte) error {
+func (f *sliceInflater) reset(src []byte) error {
bits := f.bits
codebits := f.codebits
- dictState := f.dict
+ windowState := f.window
*f = sliceInflater{
input: src,
bits: bits,
codebits: codebits,
- dict: dictState,
+ window: windowState,
step: (*sliceInflater).nextBlock,
}
- f.dict.init(maxMatchOffset, dict)
+ f.window.init(maxMatchOffset)
return nil
}
@@ -103,9 +103,9 @@ readLiteral:
var length int
switch {
case v < 256:
- f.dict.writeByte(byte(v))
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
+ f.window.writeByte(byte(v))
+ if f.window.availWrite() == 0 {
+ f.toRead = f.window.readFlush()
f.step = (*sliceInflater).huffmanBlock
f.stepState = stateInit
return
@@ -190,7 +190,7 @@ readLiteral:
return
}
- if dist > f.dict.histSize() {
+ if dist > f.window.histSize() {
f.err = CorruptInputError(f.roffset)
return
}
@@ -201,14 +201,14 @@ readLiteral:
copyHistory:
{
- cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
+ cnt := f.window.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
- cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
+ cnt = f.window.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
- if f.dict.availWrite() == 0 || f.copyLen > 0 {
- f.toRead = f.dict.readFlush()
+ if f.window.availWrite() == 0 || f.copyLen > 0 {
+ f.toRead = f.window.readFlush()
f.step = (*sliceInflater).huffmanBlock
f.stepState = stateDict
return
@@ -237,7 +237,7 @@ func (f *sliceInflater) dataBlock() {
}
if n == 0 {
- f.toRead = f.dict.readFlush()
+ f.toRead = f.window.readFlush()
f.finishBlock()
return
}
@@ -252,9 +252,9 @@ func (f *sliceInflater) copyData() {
f.finishBlock()
return
}
- buf := f.dict.writeSlice()
+ buf := f.window.writeSlice()
if len(buf) == 0 {
- f.toRead = f.dict.readFlush()
+ f.toRead = f.window.readFlush()
f.step = (*sliceInflater).copyData
return
}
@@ -270,9 +270,9 @@ func (f *sliceInflater) copyData() {
f.pos += n
f.roffset += int64(n)
f.copyLen -= n
- f.dict.writeMark(n)
- if f.dict.availWrite() == 0 {
- f.toRead = f.dict.readFlush()
+ f.window.writeMark(n)
+ if f.window.availWrite() == 0 {
+ f.toRead = f.window.readFlush()
f.step = (*sliceInflater).copyData
return
}
@@ -281,8 +281,8 @@ func (f *sliceInflater) copyData() {
func (f *sliceInflater) finishBlock() {
if f.final {
- if f.dict.availRead() > 0 {
- f.toRead = f.dict.readFlush()
+ if f.window.availRead() > 0 {
+ f.toRead = f.window.readFlush()
}
f.err = io.EOF
}
diff --git a/internal/flatex/window_decoder.go b/internal/flatex/window_decoder.go
new file mode 100644
index 00000000..492c6a96
--- /dev/null
+++ b/internal/flatex/window_decoder.go
@@ -0,0 +1,101 @@
+package flatex
+
+// windowDecoder implements the sliding window used in decompression.
+type windowDecoder struct {
+ hist []byte
+
+ wrPos int
+ rdPos int
+ full bool
+}
+
+func (wd *windowDecoder) init(size int) {
+ *wd = windowDecoder{hist: wd.hist}
+
+ if cap(wd.hist) < size {
+ wd.hist = make([]byte, size)
+ }
+ wd.hist = wd.hist[:size]
+
+ wd.wrPos = 0
+ wd.rdPos = 0
+ wd.full = false
+}
+
+func (wd *windowDecoder) histSize() int {
+ if wd.full {
+ return len(wd.hist)
+ }
+ return wd.wrPos
+}
+
+func (wd *windowDecoder) availRead() int {
+ return wd.wrPos - wd.rdPos
+}
+
+func (wd *windowDecoder) availWrite() int {
+ return len(wd.hist) - wd.wrPos
+}
+
+func (wd *windowDecoder) writeSlice() []byte {
+ return wd.hist[wd.wrPos:]
+}
+
+func (wd *windowDecoder) writeMark(cnt int) {
+ wd.wrPos += cnt
+}
+
+func (wd *windowDecoder) writeByte(c byte) {
+ wd.hist[wd.wrPos] = c
+ wd.wrPos++
+}
+
+func (wd *windowDecoder) writeCopy(dist, length int) int {
+ dstBase := wd.wrPos
+ dstPos := dstBase
+ srcPos := dstPos - dist
+ endPos := dstPos + length
+ if endPos > len(wd.hist) {
+ endPos = len(wd.hist)
+ }
+
+ if srcPos < 0 {
+ srcPos += len(wd.hist)
+ dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:])
+ srcPos = 0
+ }
+
+ for dstPos < endPos {
+ dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:dstPos])
+ }
+
+ wd.wrPos = dstPos
+ return dstPos - dstBase
+}
+
+func (wd *windowDecoder) tryWriteCopy(dist, length int) int {
+ dstPos := wd.wrPos
+ endPos := dstPos + length
+ if dstPos < dist || endPos > len(wd.hist) {
+ return 0
+ }
+ dstBase := dstPos
+ srcPos := dstPos - dist
+
+ for dstPos < endPos {
+ dstPos += copy(wd.hist[dstPos:endPos], wd.hist[srcPos:dstPos])
+ }
+
+ wd.wrPos = dstPos
+ return dstPos - dstBase
+}
+
+func (wd *windowDecoder) readFlush() []byte {
+ toRead := wd.hist[wd.rdPos:wd.wrPos]
+ wd.rdPos = wd.wrPos
+ if wd.wrPos == len(wd.hist) {
+ wd.wrPos, wd.rdPos = 0, 0
+ wd.full = true
+ }
+ return toRead
+}
diff --git a/internal/zlibx/decompress.go b/internal/zlibx/decompress.go
index 68a92587..df4a34be 100644
--- a/internal/zlibx/decompress.go
+++ b/internal/zlibx/decompress.go
@@ -9,28 +9,11 @@ import (
"git.sr.ht/~runxiyu/furgit/internal/flatex"
)
-// Decompress inflates the provided zlib wrapped stream and returns the
-// uncompressed data inside a pooled bufpool.Buffer.
func Decompress(src []byte) (*bufpool.Buffer, error) {
return DecompressSized(src, 0)
}
-// DecompressSized inflates the provided zlib stream, using sizeHint to
-// preallocate the output buffer when known (e.g. packfile entries).
func DecompressSized(src []byte, sizeHint int) (*bufpool.Buffer, error) {
- return DecompressDictSized(src, nil, sizeHint)
-}
-
-// DecompressDict is like Decompress but accepts a preset dictionary. The
-// dictionary must match the checksum embedded in the stream if the dictionary
-// flag is present.
-func DecompressDict(src []byte, dict []byte) (*bufpool.Buffer, error) {
- return DecompressDictSized(src, dict, 0)
-}
-
-// DecompressDictSized is like DecompressDict but allows providing an expected
-// uncompressed size to avoid buffer growth copies.
-func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer, error) {
if len(src) < 6 {
return nil, io.ErrUnexpectedEOF
}
@@ -42,19 +25,8 @@ func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer
}
offset := 2
- haveDict := flg&0x20 != 0
- if haveDict {
- if len(src) < offset+4 {
- return nil, io.ErrUnexpectedEOF
- }
- if dict == nil {
- return nil, ErrDictionary
- }
- checksum := binary.BigEndian.Uint32(src[offset : offset+4])
- if checksum != adler32.Checksum(dict) {
- return nil, ErrDictionary
- }
- offset += 4
+ if flg&0x20 != 0 {
+ return nil, ErrHeader
}
if len(src[offset:]) < 4 {
@@ -62,7 +34,7 @@ func DecompressDictSized(src []byte, dict []byte, sizeHint int) (*bufpool.Buffer
}
deflateData := src[offset:]
- out, consumed, err := flatex.DecompressDictSized(deflateData, dict, sizeHint)
+ out, consumed, err := flatex.DecompressSized(deflateData, sizeHint)
if err != nil {
return nil, err
}
diff --git a/internal/zlibx/decompress_test.go b/internal/zlibx/decompress_test.go
index 3dfc07a5..8dcc6c02 100644
--- a/internal/zlibx/decompress_test.go
+++ b/internal/zlibx/decompress_test.go
@@ -6,21 +6,10 @@ import (
"testing"
)
-func compressZlib(t *testing.T, payload, dict []byte) []byte {
+func compressZlib(t *testing.T, payload []byte) []byte {
t.Helper()
var buf bytes.Buffer
- var (
- w *stdzlib.Writer
- err error
- )
- if dict != nil {
- w, err = stdzlib.NewWriterLevelDict(&buf, stdzlib.DefaultCompression, dict)
- } else {
- w = stdzlib.NewWriter(&buf)
- }
- if err != nil {
- t.Fatalf("NewWriter: %v", err)
- }
+ w := stdzlib.NewWriter(&buf)
if _, err := w.Write(payload); err != nil {
t.Fatalf("Write: %v", err)
}
@@ -32,7 +21,7 @@ func compressZlib(t *testing.T, payload, dict []byte) []byte {
func TestDecompress(t *testing.T) {
payload := []byte("hello, zlib world!")
- compressed := compressZlib(t, payload, nil)
+ compressed := compressZlib(t, payload)
out, err := Decompress(compressed)
if err != nil {
@@ -45,37 +34,9 @@ func TestDecompress(t *testing.T) {
}
}
-func TestDecompressDict(t *testing.T) {
- dict := []byte("git dictionary for zlib")
- payload := append([]byte(nil), dict...)
- payload = append(payload, []byte(" -- extended body -- extended body")...)
- compressed := compressZlib(t, payload, dict)
-
- out, err := DecompressDict(compressed, dict)
- if err != nil {
- t.Fatalf("DecompressDict: %v", err)
- }
- defer out.Release()
-
- if !bytes.Equal(out.Bytes(), payload) {
- t.Fatalf("unexpected payload %q", out.Bytes())
- }
-}
-
-func TestDecompressDictMissing(t *testing.T) {
- dict := []byte("preset dictionary")
- payload := append([]byte(nil), dict...)
- payload = append(payload, []byte(" .. more data ..")...)
- compressed := compressZlib(t, payload, dict)
-
- if _, err := Decompress(compressed); err != ErrDictionary {
- t.Fatalf("expected ErrDictionary, got %v", err)
- }
-}
-
func TestDecompressChecksumError(t *testing.T) {
payload := []byte("checksum check")
- compressed := compressZlib(t, payload, nil)
+ compressed := compressZlib(t, payload)
compressed[len(compressed)-1] ^= 0xff
if _, err := Decompress(compressed); err != ErrChecksum {
@@ -85,7 +46,7 @@ func TestDecompressChecksumError(t *testing.T) {
func TestDecompressSizedUsesHint(t *testing.T) {
payload := []byte("tiny payload")
- compressed := compressZlib(t, payload, nil)
+ compressed := compressZlib(t, payload)
const hint = 1 << 20
out, err := DecompressSized(compressed, hint)
diff --git a/internal/zlibx/reader.go b/internal/zlibx/reader.go
index 9a4b4315..6e3a53ea 100644
--- a/internal/zlibx/reader.go
+++ b/internal/zlibx/reader.go
@@ -55,8 +55,6 @@ const (
var (
// ErrChecksum is returned when reading ZLIB data that has an invalid checksum.
ErrChecksum = errors.New("zlib: invalid checksum")
- // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary.
- ErrDictionary = errors.New("zlib: invalid dictionary")
// ErrHeader is returned when reading ZLIB data that has an invalid header.
ErrHeader = errors.New("zlib: invalid header")
)
@@ -82,19 +80,12 @@ type reader struct {
// data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser when done.
func NewReader(r io.Reader) (io.ReadCloser, error) {
- return NewReaderDict(r, nil)
-}
-
-// NewReaderDict is like [NewReader] but uses a preset dictionary.
-// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
-// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary].
-func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
v := pool.Get()
z, ok := v.(*reader)
if !ok {
panic("zlib: pool returned unexpected type")
}
- err := z.Reset(r, dict)
+ err := z.Reset(r)
if err != nil {
return nil, err
}
@@ -147,7 +138,7 @@ func (z *reader) Close() error {
return nil
}
-func (z *reader) Reset(r io.Reader, dict []byte) error {
+func (z *reader) Reset(r io.Reader) error {
*z = reader{decompressor: z.decompressor}
if fr, ok := r.(flatex.Reader); ok {
z.r = fr
@@ -168,30 +159,15 @@ func (z *reader) Reset(r io.Reader, dict []byte) error {
z.err = ErrHeader
return z.err
}
- haveDict := z.scratch[1]&0x20 != 0
- if haveDict {
- _, z.err = io.ReadFull(z.r, z.scratch[0:4])
- if z.err != nil {
- if z.err == io.EOF {
- z.err = io.ErrUnexpectedEOF
- }
- return z.err
- }
- checksum := binary.BigEndian.Uint32(z.scratch[:4])
- if checksum != adler32.Checksum(dict) {
- z.err = ErrDictionary
- return z.err
- }
+ if z.scratch[1]&0x20 != 0 {
+ z.err = ErrHeader
+ return z.err
}
if z.decompressor == nil {
- if haveDict {
- z.decompressor = flatex.NewReaderDict(z.r, dict)
- } else {
- z.decompressor = flatex.NewReader(z.r)
- }
+ z.decompressor = flatex.NewReader(z.r)
} else {
- z.err = z.decompressor.(flatex.Resetter).Reset(z.r, dict)
+ z.err = z.decompressor.(flatex.Resetter).Reset(z.r)
if z.err != nil {
return z.err
}