aboutsummaryrefslogtreecommitdiff
path: root/internal/zlib
diff options
context:
space:
mode:
authorGravatar Runxi Yu2025-11-19 08:00:00 +0800
committerGravatar Runxi Yu2025-11-19 08:00:00 +0800
commit962a428cf95ef8776296bb83eabc28537beda3dd (patch)
treecef1189e25ed12b8d4ec473949374658aa2e53ac /internal/zlib
parentProbably should name the custom packages specially (diff)
signatureNo signature
Switch back to internal zlib for loose objects too
Still marginally less overhead due to pooling
Diffstat (limited to 'internal/zlib')
-rw-r--r--internal/zlib/LICENSE27
-rw-r--r--internal/zlib/reader.go198
-rw-r--r--internal/zlib/writer.go194
3 files changed, 419 insertions, 0 deletions
diff --git a/internal/zlib/LICENSE b/internal/zlib/LICENSE
new file mode 100644
index 00000000..2a7cf70d
--- /dev/null
+++ b/internal/zlib/LICENSE
@@ -0,0 +1,27 @@
+Copyright 2009 The Go Authors.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google LLC nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/internal/zlib/reader.go b/internal/zlib/reader.go
new file mode 100644
index 00000000..7602890b
--- /dev/null
+++ b/internal/zlib/reader.go
@@ -0,0 +1,198 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package zlib implements reading and writing of zlib format compressed data,
+as specified in RFC 1950.
+
+This package differs from the standard library's compress/zlib package
+in that it pools readers to reduce allocations. Writers are unchanged.
+
+Note that closing the reader causes it to be returned to a pool for
+reuse. Therefore, the caller must not retain references to the
+reader after closing it; in the standard library's compress/zlib package,
+it is legal to Reset a closed reader and continue using it; that is
+not allowed here, so there is simply no Resetter interface.
+
+The implementation provides filters that uncompress during reading
+and compress during writing. For example, to write compressed data
+to a buffer:
+
+ var b bytes.Buffer
+ w := zlib.NewWriter(&b)
+ w.Write([]byte("hello, world\n"))
+ w.Close()
+
+and to read that data back:
+
+ r, err := zlib.NewReader(&b)
+ io.Copy(os.Stdout, r)
+ r.Close()
+*/
+package zlib
+
+import (
+ "bufio"
+ "compress/flate"
+ "encoding/binary"
+ "errors"
+ "hash"
+ "io"
+ "sync"
+
+ "git.sr.ht/~runxiyu/furgit/internal/adler32"
+)
+
+const (
+ zlibDeflate = 8
+ zlibMaxWindow = 7
+)
+
+var (
+ // ErrChecksum is returned when reading ZLIB data that has an invalid checksum.
+ ErrChecksum = errors.New("zlib: invalid checksum")
+ // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary.
+ ErrDictionary = errors.New("zlib: invalid dictionary")
+ // ErrHeader is returned when reading ZLIB data that has an invalid header.
+ ErrHeader = errors.New("zlib: invalid header")
+)
+
+var pool = sync.Pool{
+ New: func() any {
+ r := new(reader)
+ return r
+ },
+}
+
+type reader struct {
+ r flate.Reader
+ decompressor io.ReadCloser
+ digest hash.Hash32
+ err error
+ scratch [4]byte
+}
+
+// NewReader creates a new ReadCloser.
+// Reads from the returned ReadCloser read and decompress data from r.
+// If r does not implement [io.ByteReader], the decompressor may read more
+// data than necessary from r.
+// It is the caller's responsibility to call Close on the ReadCloser when done.
+func NewReader(r io.Reader) (io.ReadCloser, error) {
+ return NewReaderDict(r, nil)
+}
+
+// NewReaderDict is like [NewReader] but uses a preset dictionary.
+// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
+// If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary].
+func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
+ v := pool.Get()
+ z, ok := v.(*reader)
+ if !ok {
+ panic("zlib: pool returned unexpected type")
+ }
+ err := z.Reset(r, dict)
+ if err != nil {
+ return nil, err
+ }
+ return z, nil
+}
+
+func (z *reader) Read(p []byte) (int, error) {
+ if z.err != nil {
+ return 0, z.err
+ }
+
+ var n int
+ n, z.err = z.decompressor.Read(p)
+ z.digest.Write(p[0:n])
+ if z.err != io.EOF {
+ // In the normal case we return here.
+ return n, z.err
+ }
+
+ // Finished file; check checksum.
+ if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil {
+ if err == io.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ z.err = err
+ return n, z.err
+ }
+ // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
+ checksum := binary.BigEndian.Uint32(z.scratch[:4])
+ if checksum != z.digest.Sum32() {
+ z.err = ErrChecksum
+ return n, z.err
+ }
+ return n, io.EOF
+}
+
+// Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader].
+// In order for the ZLIB checksum to be verified, the reader must be
+// fully consumed until the [io.EOF].
+func (z *reader) Close() error {
+ if z.err != nil && z.err != io.EOF {
+ return z.err
+ }
+ z.err = z.decompressor.Close()
+ if z.err != nil {
+ return z.err
+ }
+
+ pool.Put(z)
+ return nil
+}
+
+func (z *reader) Reset(r io.Reader, dict []byte) error {
+ *z = reader{decompressor: z.decompressor}
+ if fr, ok := r.(flate.Reader); ok {
+ z.r = fr
+ } else {
+ z.r = bufio.NewReader(r)
+ }
+
+ // Read the header (RFC 1950 section 2.2.).
+ _, z.err = io.ReadFull(z.r, z.scratch[0:2])
+ if z.err != nil {
+ if z.err == io.EOF {
+ z.err = io.ErrUnexpectedEOF
+ }
+ return z.err
+ }
+ h := binary.BigEndian.Uint16(z.scratch[:2])
+ if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) {
+ z.err = ErrHeader
+ return z.err
+ }
+ haveDict := z.scratch[1]&0x20 != 0
+ if haveDict {
+ _, z.err = io.ReadFull(z.r, z.scratch[0:4])
+ if z.err != nil {
+ if z.err == io.EOF {
+ z.err = io.ErrUnexpectedEOF
+ }
+ return z.err
+ }
+ checksum := binary.BigEndian.Uint32(z.scratch[:4])
+ if checksum != adler32.Checksum(dict) {
+ z.err = ErrDictionary
+ return z.err
+ }
+ }
+
+ if z.decompressor == nil {
+ if haveDict {
+ z.decompressor = flate.NewReaderDict(z.r, dict)
+ } else {
+ z.decompressor = flate.NewReader(z.r)
+ }
+ } else {
+ z.err = z.decompressor.(flate.Resetter).Reset(z.r, dict)
+ if z.err != nil {
+ return z.err
+ }
+ }
+ z.digest = adler32.New()
+ return nil
+}
diff --git a/internal/zlib/writer.go b/internal/zlib/writer.go
new file mode 100644
index 00000000..65e96809
--- /dev/null
+++ b/internal/zlib/writer.go
@@ -0,0 +1,194 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package zlib
+
+import (
+ "compress/flate"
+ "encoding/binary"
+ "fmt"
+ "hash"
+ "io"
+
+ "git.sr.ht/~runxiyu/furgit/internal/adler32"
+)
+
+// These constants are copied from the [flate] package, so that code that imports
+// [compress/zlib] does not also have to import [compress/flate].
+const (
+ NoCompression = flate.NoCompression
+ BestSpeed = flate.BestSpeed
+ BestCompression = flate.BestCompression
+ DefaultCompression = flate.DefaultCompression
+ HuffmanOnly = flate.HuffmanOnly
+)
+
+// A Writer takes data written to it and writes the compressed
+// form of that data to an underlying writer (see [NewWriter]).
+type Writer struct {
+ w io.Writer
+ level int
+ dict []byte
+ compressor *flate.Writer
+ digest hash.Hash32
+ err error
+ scratch [4]byte
+ wroteHeader bool
+}
+
+// NewWriter creates a new [Writer].
+// Writes to the returned Writer are compressed and written to w.
+//
+// It is the caller's responsibility to call Close on the Writer when done.
+// Writes may be buffered and not flushed until Close.
+func NewWriter(w io.Writer) *Writer {
+ z, _ := NewWriterLevelDict(w, DefaultCompression, nil)
+ return z
+}
+
+// NewWriterLevel is like [NewWriter] but specifies the compression level instead
+// of assuming [DefaultCompression].
+//
+// The compression level can be [DefaultCompression], [NoCompression], [HuffmanOnly]
+// or any integer value between [BestSpeed] and [BestCompression] inclusive.
+// The error returned will be nil if the level is valid.
+func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
+ return NewWriterLevelDict(w, level, nil)
+}
+
+// NewWriterLevelDict is like [NewWriterLevel] but specifies a dictionary to
+// compress with.
+//
+// The dictionary may be nil. If not, its contents should not be modified until
+// the Writer is closed.
+func NewWriterLevelDict(w io.Writer, level int, dict []byte) (*Writer, error) {
+ if level < HuffmanOnly || level > BestCompression {
+ return nil, fmt.Errorf("zlib: invalid compression level: %d", level)
+ }
+ return &Writer{
+ w: w,
+ level: level,
+ dict: dict,
+ }, nil
+}
+
+// Reset clears the state of the [Writer] z such that it is equivalent to its
+// initial state from [NewWriterLevel] or [NewWriterLevelDict], but instead writing
+// to w.
+func (z *Writer) Reset(w io.Writer) {
+ z.w = w
+ // z.level and z.dict left unchanged.
+ if z.compressor != nil {
+ z.compressor.Reset(w)
+ }
+ if z.digest != nil {
+ z.digest.Reset()
+ }
+ z.err = nil
+ z.scratch = [4]byte{}
+ z.wroteHeader = false
+}
+
+// writeHeader writes the ZLIB header.
+func (z *Writer) writeHeader() (err error) {
+ z.wroteHeader = true
+ // ZLIB has a two-byte header (as documented in RFC 1950).
+ // The first four bits is the CINFO (compression info), which is 7 for the default deflate window size.
+ // The next four bits is the CM (compression method), which is 8 for deflate.
+ z.scratch[0] = 0x78
+ // The next two bits is the FLEVEL (compression level). The four values are:
+ // 0=fastest, 1=fast, 2=default, 3=best.
+ // The next bit, FDICT, is set if a dictionary is given.
+ // The final five FCHECK bits form a mod-31 checksum.
+ switch z.level {
+ case -2, 0, 1:
+ z.scratch[1] = 0 << 6
+ case 2, 3, 4, 5:
+ z.scratch[1] = 1 << 6
+ case 6, -1:
+ z.scratch[1] = 2 << 6
+ case 7, 8, 9:
+ z.scratch[1] = 3 << 6
+ default:
+ panic("unreachable")
+ }
+ if z.dict != nil {
+ z.scratch[1] |= 1 << 5
+ }
+ z.scratch[1] += uint8(31 - binary.BigEndian.Uint16(z.scratch[:2])%31)
+ if _, err = z.w.Write(z.scratch[0:2]); err != nil {
+ return err
+ }
+ if z.dict != nil {
+ // The next four bytes are the Adler-32 checksum of the dictionary.
+ binary.BigEndian.PutUint32(z.scratch[:], adler32.Checksum(z.dict))
+ if _, err = z.w.Write(z.scratch[0:4]); err != nil {
+ return err
+ }
+ }
+ if z.compressor == nil {
+ // Initialize deflater unless the Writer is being reused
+ // after a Reset call.
+ z.compressor, err = flate.NewWriterDict(z.w, z.level, z.dict)
+ if err != nil {
+ return err
+ }
+ z.digest = adler32.New()
+ }
+ return nil
+}
+
+// Write writes a compressed form of p to the underlying [io.Writer]. The
+// compressed bytes are not necessarily flushed until the [Writer] is closed or
+// explicitly flushed.
+func (z *Writer) Write(p []byte) (n int, err error) {
+ if !z.wroteHeader {
+ z.err = z.writeHeader()
+ }
+ if z.err != nil {
+ return 0, z.err
+ }
+ if len(p) == 0 {
+ return 0, nil
+ }
+ n, err = z.compressor.Write(p)
+ if err != nil {
+ z.err = err
+ return
+ }
+ z.digest.Write(p)
+ return
+}
+
+// Flush flushes the Writer to its underlying [io.Writer].
+func (z *Writer) Flush() error {
+ if !z.wroteHeader {
+ z.err = z.writeHeader()
+ }
+ if z.err != nil {
+ return z.err
+ }
+ z.err = z.compressor.Flush()
+ return z.err
+}
+
+// Close closes the Writer, flushing any unwritten data to the underlying
+// [io.Writer], but does not close the underlying io.Writer.
+func (z *Writer) Close() error {
+ if !z.wroteHeader {
+ z.err = z.writeHeader()
+ }
+ if z.err != nil {
+ return z.err
+ }
+ z.err = z.compressor.Close()
+ if z.err != nil {
+ return z.err
+ }
+ checksum := z.digest.Sum32()
+ // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
+ binary.BigEndian.PutUint32(z.scratch[:], checksum)
+ _, z.err = z.w.Write(z.scratch[0:4])
+ return z.err
+}