diff options
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/zlib/LICENSE | 27 | ||||
| -rw-r--r-- | internal/zlib/reader.go | 198 | ||||
| -rw-r--r-- | internal/zlib/writer.go | 194 | ||||
| -rw-r--r-- | internal/zlibx/reader.go | 7 |
4 files changed, 424 insertions, 2 deletions
diff --git a/internal/zlib/LICENSE b/internal/zlib/LICENSE new file mode 100644 index 00000000..2a7cf70d --- /dev/null +++ b/internal/zlib/LICENSE @@ -0,0 +1,27 @@ +Copyright 2009 The Go Authors. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google LLC nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/internal/zlib/reader.go b/internal/zlib/reader.go new file mode 100644 index 00000000..7602890b --- /dev/null +++ b/internal/zlib/reader.go @@ -0,0 +1,198 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zlib implements reading and writing of zlib format compressed data, +as specified in RFC 1950. + +This package differs from the standard library's compress/zlib package +in that it pools readers to reduce allocations. Writers are unchanged. + +Note that closing the reader causes it to be returned to a pool for +reuse. Therefore, the caller must not retain references to the +reader after closing it; in the standard library's compress/zlib package, +it is legal to Reset a closed reader and continue using it; that is +not allowed here, so there is simply no Resetter interface. + +The implementation provides filters that uncompress during reading +and compress during writing. For example, to write compressed data +to a buffer: + + var b bytes.Buffer + w := zlib.NewWriter(&b) + w.Write([]byte("hello, world\n")) + w.Close() + +and to read that data back: + + r, err := zlib.NewReader(&b) + io.Copy(os.Stdout, r) + r.Close() +*/ +package zlib + +import ( + "bufio" + "compress/flate" + "encoding/binary" + "errors" + "hash" + "io" + "sync" + + "git.sr.ht/~runxiyu/furgit/internal/adler32" +) + +const ( + zlibDeflate = 8 + zlibMaxWindow = 7 +) + +var ( + // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. + ErrChecksum = errors.New("zlib: invalid checksum") + // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary. + ErrDictionary = errors.New("zlib: invalid dictionary") + // ErrHeader is returned when reading ZLIB data that has an invalid header. + ErrHeader = errors.New("zlib: invalid header") +) + +var pool = sync.Pool{ + New: func() any { + r := new(reader) + return r + }, +} + +type reader struct { + r flate.Reader + decompressor io.ReadCloser + digest hash.Hash32 + err error + scratch [4]byte +} + +// NewReader creates a new ReadCloser. +// Reads from the returned ReadCloser read and decompress data from r. +// If r does not implement [io.ByteReader], the decompressor may read more +// data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser when done. +func NewReader(r io.Reader) (io.ReadCloser, error) { + return NewReaderDict(r, nil) +} + +// NewReaderDict is like [NewReader] but uses a preset dictionary. +// NewReaderDict ignores the dictionary if the compressed data does not refer to it. +// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary]. +func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) { + v := pool.Get() + z, ok := v.(*reader) + if !ok { + panic("zlib: pool returned unexpected type") + } + err := z.Reset(r, dict) + if err != nil { + return nil, err + } + return z, nil +} + +func (z *reader) Read(p []byte) (int, error) { + if z.err != nil { + return 0, z.err + } + + var n int + n, z.err = z.decompressor.Read(p) + z.digest.Write(p[0:n]) + if z.err != io.EOF { + // In the normal case we return here. + return n, z.err + } + + // Finished file; check checksum. + if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + z.err = err + return n, z.err + } + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + checksum := binary.BigEndian.Uint32(z.scratch[:4]) + if checksum != z.digest.Sum32() { + z.err = ErrChecksum + return n, z.err + } + return n, io.EOF +} + +// Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader]. +// In order for the ZLIB checksum to be verified, the reader must be +// fully consumed until the [io.EOF]. +func (z *reader) Close() error { + if z.err != nil && z.err != io.EOF { + return z.err + } + z.err = z.decompressor.Close() + if z.err != nil { + return z.err + } + + pool.Put(z) + return nil +} + +func (z *reader) Reset(r io.Reader, dict []byte) error { + *z = reader{decompressor: z.decompressor} + if fr, ok := r.(flate.Reader); ok { + z.r = fr + } else { + z.r = bufio.NewReader(r) + } + + // Read the header (RFC 1950 section 2.2.). + _, z.err = io.ReadFull(z.r, z.scratch[0:2]) + if z.err != nil { + if z.err == io.EOF { + z.err = io.ErrUnexpectedEOF + } + return z.err + } + h := binary.BigEndian.Uint16(z.scratch[:2]) + if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) { + z.err = ErrHeader + return z.err + } + haveDict := z.scratch[1]&0x20 != 0 + if haveDict { + _, z.err = io.ReadFull(z.r, z.scratch[0:4]) + if z.err != nil { + if z.err == io.EOF { + z.err = io.ErrUnexpectedEOF + } + return z.err + } + checksum := binary.BigEndian.Uint32(z.scratch[:4]) + if checksum != adler32.Checksum(dict) { + z.err = ErrDictionary + return z.err + } + } + + if z.decompressor == nil { + if haveDict { + z.decompressor = flate.NewReaderDict(z.r, dict) + } else { + z.decompressor = flate.NewReader(z.r) + } + } else { + z.err = z.decompressor.(flate.Resetter).Reset(z.r, dict) + if z.err != nil { + return z.err + } + } + z.digest = adler32.New() + return nil +} diff --git a/internal/zlib/writer.go b/internal/zlib/writer.go new file mode 100644 index 00000000..65e96809 --- /dev/null +++ b/internal/zlib/writer.go @@ -0,0 +1,194 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zlib + +import ( + "compress/flate" + "encoding/binary" + "fmt" + "hash" + "io" + + "git.sr.ht/~runxiyu/furgit/internal/adler32" +) + +// These constants are copied from the [flate] package, so that code that imports +// [compress/zlib] does not also have to import [compress/flate]. +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression + HuffmanOnly = flate.HuffmanOnly +) + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see [NewWriter]). +type Writer struct { + w io.Writer + level int + dict []byte + compressor *flate.Writer + digest hash.Hash32 + err error + scratch [4]byte + wroteHeader bool +} + +// NewWriter creates a new [Writer]. +// Writes to the returned Writer are compressed and written to w. +// +// It is the caller's responsibility to call Close on the Writer when done. +// Writes may be buffered and not flushed until Close. +func NewWriter(w io.Writer) *Writer { + z, _ := NewWriterLevelDict(w, DefaultCompression, nil) + return z +} + +// NewWriterLevel is like [NewWriter] but specifies the compression level instead +// of assuming [DefaultCompression]. +// +// The compression level can be [DefaultCompression], [NoCompression], [HuffmanOnly] +// or any integer value between [BestSpeed] and [BestCompression] inclusive. +// The error returned will be nil if the level is valid. +func NewWriterLevel(w io.Writer, level int) (*Writer, error) { + return NewWriterLevelDict(w, level, nil) +} + +// NewWriterLevelDict is like [NewWriterLevel] but specifies a dictionary to +// compress with. +// +// The dictionary may be nil. If not, its contents should not be modified until +// the Writer is closed. +func NewWriterLevelDict(w io.Writer, level int, dict []byte) (*Writer, error) { + if level < HuffmanOnly || level > BestCompression { + return nil, fmt.Errorf("zlib: invalid compression level: %d", level) + } + return &Writer{ + w: w, + level: level, + dict: dict, + }, nil +} + +// Reset clears the state of the [Writer] z such that it is equivalent to its +// initial state from [NewWriterLevel] or [NewWriterLevelDict], but instead writing +// to w. +func (z *Writer) Reset(w io.Writer) { + z.w = w + // z.level and z.dict left unchanged. + if z.compressor != nil { + z.compressor.Reset(w) + } + if z.digest != nil { + z.digest.Reset() + } + z.err = nil + z.scratch = [4]byte{} + z.wroteHeader = false +} + +// writeHeader writes the ZLIB header. +func (z *Writer) writeHeader() (err error) { + z.wroteHeader = true + // ZLIB has a two-byte header (as documented in RFC 1950). + // The first four bits is the CINFO (compression info), which is 7 for the default deflate window size. + // The next four bits is the CM (compression method), which is 8 for deflate. + z.scratch[0] = 0x78 + // The next two bits is the FLEVEL (compression level). The four values are: + // 0=fastest, 1=fast, 2=default, 3=best. + // The next bit, FDICT, is set if a dictionary is given. + // The final five FCHECK bits form a mod-31 checksum. + switch z.level { + case -2, 0, 1: + z.scratch[1] = 0 << 6 + case 2, 3, 4, 5: + z.scratch[1] = 1 << 6 + case 6, -1: + z.scratch[1] = 2 << 6 + case 7, 8, 9: + z.scratch[1] = 3 << 6 + default: + panic("unreachable") + } + if z.dict != nil { + z.scratch[1] |= 1 << 5 + } + z.scratch[1] += uint8(31 - binary.BigEndian.Uint16(z.scratch[:2])%31) + if _, err = z.w.Write(z.scratch[0:2]); err != nil { + return err + } + if z.dict != nil { + // The next four bytes are the Adler-32 checksum of the dictionary. + binary.BigEndian.PutUint32(z.scratch[:], adler32.Checksum(z.dict)) + if _, err = z.w.Write(z.scratch[0:4]); err != nil { + return err + } + } + if z.compressor == nil { + // Initialize deflater unless the Writer is being reused + // after a Reset call. + z.compressor, err = flate.NewWriterDict(z.w, z.level, z.dict) + if err != nil { + return err + } + z.digest = adler32.New() + } + return nil +} + +// Write writes a compressed form of p to the underlying [io.Writer]. The +// compressed bytes are not necessarily flushed until the [Writer] is closed or +// explicitly flushed. +func (z *Writer) Write(p []byte) (n int, err error) { + if !z.wroteHeader { + z.err = z.writeHeader() + } + if z.err != nil { + return 0, z.err + } + if len(p) == 0 { + return 0, nil + } + n, err = z.compressor.Write(p) + if err != nil { + z.err = err + return + } + z.digest.Write(p) + return +} + +// Flush flushes the Writer to its underlying [io.Writer]. +func (z *Writer) Flush() error { + if !z.wroteHeader { + z.err = z.writeHeader() + } + if z.err != nil { + return z.err + } + z.err = z.compressor.Flush() + return z.err +} + +// Close closes the Writer, flushing any unwritten data to the underlying +// [io.Writer], but does not close the underlying io.Writer. +func (z *Writer) Close() error { + if !z.wroteHeader { + z.err = z.writeHeader() + } + if z.err != nil { + return z.err + } + z.err = z.compressor.Close() + if z.err != nil { + return z.err + } + checksum := z.digest.Sum32() + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + binary.BigEndian.PutUint32(z.scratch[:], checksum) + _, z.err = z.w.Write(z.scratch[0:4]) + return z.err +} diff --git a/internal/zlibx/reader.go b/internal/zlibx/reader.go index 80bbbb58..9a4b4315 100644 --- a/internal/zlibx/reader.go +++ b/internal/zlibx/reader.go @@ -3,11 +3,14 @@ // license that can be found in the LICENSE file. /* -Package zlib implements reading and writing of zlib format compressed data, +Package zlibx implements reading of zlib format compressed data, as specified in RFC 1950. This package differs from the standard library's compress/zlib package -in that it pools readers to reduce allocations. Writers are unchanged. +in that it pools readers to reduce allocations. Writing is unsupported. + +THis package will likely be refactorered much more for our specific +use case of only doing full decompressions to byte slices. Note that closing the reader causes it to be returned to a pool for reuse. Therefore, the caller must not retain references to the |
