aboutsummaryrefslogtreecommitdiff
path: root/internal/adler32
diff options
context:
space:
mode:
authorGravatar Runxi Yu2025-11-19 08:00:00 +0800
committerGravatar Runxi Yu2025-11-19 08:00:03 +0800
commit7d60b65249c8af355d739ccf44482a899d89c9c1 (patch)
tree387fc8fe242be2bf8df137dbad626dfcf8b93995 /internal/adler32
parentIgnore some errors in the tests (actually from stdlib but linter complains) (diff)
signatureNo signature
Replace adler32 with github.com/mhr3/adler32-simd
Unfortunately no VSX support yet
Diffstat (limited to 'internal/adler32')
-rw-r--r--internal/adler32/LICENSE47
-rw-r--r--internal/adler32/LICENSE.ZLIB17
-rw-r--r--internal/adler32/README1
-rw-r--r--internal/adler32/adler32.go100
-rw-r--r--internal/adler32/adler32_amd64.go181
-rw-r--r--internal/adler32/adler32_amd64_avx2.s173
-rw-r--r--internal/adler32/adler32_arm64.go71
-rw-r--r--internal/adler32/adler32_avx2.go6
-rw-r--r--internal/adler32/adler32_avx2.s263
-rw-r--r--internal/adler32/adler32_fallback.go19
-rw-r--r--internal/adler32/adler32_generic.go98
-rw-r--r--internal/adler32/adler32_neon.go6
-rw-r--r--internal/adler32/adler32_neon.s208
-rw-r--r--internal/adler32/adler32_sse3.go6
-rw-r--r--internal/adler32/adler32_sse3.s214
-rw-r--r--internal/adler32/adler32_test.go157
-rw-r--r--internal/adler32/bench_test.go24
-rw-r--r--internal/adler32/testhash.go231
18 files changed, 930 insertions, 892 deletions
diff --git a/internal/adler32/LICENSE b/internal/adler32/LICENSE
index 2a7cf70d..5cec357a 100644
--- a/internal/adler32/LICENSE
+++ b/internal/adler32/LICENSE
@@ -1,27 +1,30 @@
-Copyright 2009 The Go Authors.
+Copyright (c) 2024, Michal Hruby
+Copyright (c) 2017 The Chromium Authors. All rights reserved.
+Copyright (c) 1995-2024 Mark Adler
+Copyright (c) 1995-2024 Jean-loup Gailly
+Copyright (c) 2022 Adam Stylinski
+
+BSD 2-Clause License
+
Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
+modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google LLC nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/internal/adler32/LICENSE.ZLIB b/internal/adler32/LICENSE.ZLIB
new file mode 100644
index 00000000..c75c1568
--- /dev/null
+++ b/internal/adler32/LICENSE.ZLIB
@@ -0,0 +1,17 @@
+Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
diff --git a/internal/adler32/README b/internal/adler32/README
new file mode 100644
index 00000000..b80acd00
--- /dev/null
+++ b/internal/adler32/README
@@ -0,0 +1 @@
+This package was mostly copied from github.com/mhr3/adler32-simd.
diff --git a/internal/adler32/adler32.go b/internal/adler32/adler32.go
deleted file mode 100644
index c349cd5d..00000000
--- a/internal/adler32/adler32.go
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package adler32 implements the Adler-32 checksum.
-//
-// It is defined in RFC 1950:
-//
-// Adler-32 is composed of two sums accumulated per byte: s1 is
-// the sum of all bytes, s2 is the sum of all s1 values. Both sums
-// are done modulo 65521. s1 is initialized to 1, s2 to zero. The
-// Adler-32 checksum is stored as s2*65536 + s1 in most-
-// significant-byte first (network) order.
-package adler32
-
-import (
- "errors"
- "hash"
-
- "git.sr.ht/~runxiyu/furgit/internal/byteorder"
-)
-
-const (
- // mod is the largest prime that is less than 65536.
- mod = 65521
- // nmax is the largest n such that
- // 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1.
- // It is mentioned in RFC 1950 (search for "5552").
- nmax = 5552
-)
-
-// The size of an Adler-32 checksum in bytes.
-const Size = 4
-
-// digest represents the partial evaluation of a checksum.
-// The low 16 bits are s1, the high 16 bits are s2.
-type digest uint32
-
-func (d *digest) Reset() { *d = 1 }
-
-// New returns a new hash.Hash32 computing the Adler-32 checksum. Its
-// Sum method will lay the value out in big-endian byte order. The
-// returned Hash32 also implements [encoding.BinaryMarshaler] and
-// [encoding.BinaryUnmarshaler] to marshal and unmarshal the internal
-// state of the hash.
-func New() hash.Hash32 {
- d := new(digest)
- d.Reset()
- return d
-}
-
-func (d *digest) Size() int { return Size }
-
-func (d *digest) BlockSize() int { return 4 }
-
-const (
- magic = "adl\x01"
- marshaledSize = len(magic) + 4
-)
-
-func (d *digest) AppendBinary(b []byte) ([]byte, error) {
- b = append(b, magic...)
- b = byteorder.BEAppendUint32(b, uint32(*d))
- return b, nil
-}
-
-func (d *digest) MarshalBinary() ([]byte, error) {
- return d.AppendBinary(make([]byte, 0, marshaledSize))
-}
-
-func (d *digest) UnmarshalBinary(b []byte) error {
- if len(b) < len(magic) || string(b[:len(magic)]) != magic {
- return errors.New("hash/adler32: invalid hash state identifier")
- }
- if len(b) != marshaledSize {
- return errors.New("hash/adler32: invalid hash state size")
- }
- *d = digest(byteorder.BEUint32(b[len(magic):]))
- return nil
-}
-
-func (d *digest) Clone() (hash.Cloner, error) {
- r := *d
- return &r, nil
-}
-
-func (d *digest) Write(p []byte) (nn int, err error) {
- *d = update(*d, p)
- return len(p), nil
-}
-
-func (d *digest) Sum32() uint32 { return uint32(*d) }
-
-func (d *digest) Sum(in []byte) []byte {
- s := uint32(*d)
- return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
-}
-
-// Checksum returns the Adler-32 checksum of data.
-func Checksum(data []byte) uint32 { return uint32(update(1, data)) }
diff --git a/internal/adler32/adler32_amd64.go b/internal/adler32/adler32_amd64.go
index a109ccc9..75a34c72 100644
--- a/internal/adler32/adler32_amd64.go
+++ b/internal/adler32/adler32_amd64.go
@@ -1,130 +1,91 @@
-//go:build amd64 && !purego
-
package adler32
-import "golang.org/x/sys/cpu"
+import (
+ "encoding/binary"
+ "errors"
+ "hash"
+ "hash/adler32"
+
+ "golang.org/x/sys/cpu"
+)
+
+// The size of an Adler-32 checksum in bytes.
+const Size = 4
+
+var (
+ hasSSE3 = cpu.X86.HasSSE3
+ hasAVX2 = cpu.X86.HasAVX2
+)
+
+// digest represents the partial evaluation of a checksum.
+// The low 16 bits are s1, the high 16 bits are s2.
+type digest uint32
-var updateFn func(d digest, p []byte) digest = updateGeneric
+func (d *digest) Reset() { *d = 1 }
-func init() {
- switch {
- case cpu.X86.HasAVX2:
- updateFn = updateAVX2
- default:
- updateFn = updateGeneric
+// New returns a new hash.Hash32 computing the Adler-32 checksum.
+func New() hash.Hash32 {
+ if !hasSSE3 {
+ return adler32.New()
}
+ d := new(digest)
+ d.Reset()
+ return d
}
-func update(d digest, p []byte) digest {
- return updateFn(d, p)
+func (d *digest) MarshalBinary() ([]byte, error) {
+ b := make([]byte, 0, marshaledSize)
+ b = append(b, magic...)
+ b = binary.BigEndian.AppendUint32(b, uint32(*d))
+ return b, nil
}
-func updateGeneric(d digest, p []byte) digest {
- s1, s2 := uint32(d&0xffff), uint32(d>>16)
-
- for len(p) > 0 {
- var q []byte
- if len(p) > nmax {
- p, q = p[:nmax], p[nmax:]
- }
+func (d *digest) UnmarshalBinary(b []byte) error {
+ if len(b) < len(magic) || string(b[:len(magic)]) != magic {
+ return errors.New("hash/adler32: invalid hash state identifier")
+ }
+ if len(b) != marshaledSize {
+ return errors.New("hash/adler32: invalid hash state size")
+ }
+ *d = digest(binary.BigEndian.Uint32(b[len(magic):]))
+ return nil
+}
- for len(p) >= 32 {
- v := p[:32]
- p = p[32:]
+func (d *digest) Size() int { return Size }
- s1 += uint32(v[0])
- s2 += s1
- s1 += uint32(v[1])
- s2 += s1
- s1 += uint32(v[2])
- s2 += s1
- s1 += uint32(v[3])
- s2 += s1
- s1 += uint32(v[4])
- s2 += s1
- s1 += uint32(v[5])
- s2 += s1
- s1 += uint32(v[6])
- s2 += s1
- s1 += uint32(v[7])
- s2 += s1
- s1 += uint32(v[8])
- s2 += s1
- s1 += uint32(v[9])
- s2 += s1
- s1 += uint32(v[10])
- s2 += s1
- s1 += uint32(v[11])
- s2 += s1
- s1 += uint32(v[12])
- s2 += s1
- s1 += uint32(v[13])
- s2 += s1
- s1 += uint32(v[14])
- s2 += s1
- s1 += uint32(v[15])
- s2 += s1
- s1 += uint32(v[16])
- s2 += s1
- s1 += uint32(v[17])
- s2 += s1
- s1 += uint32(v[18])
- s2 += s1
- s1 += uint32(v[19])
- s2 += s1
- s1 += uint32(v[20])
- s2 += s1
- s1 += uint32(v[21])
- s2 += s1
- s1 += uint32(v[22])
- s2 += s1
- s1 += uint32(v[23])
- s2 += s1
- s1 += uint32(v[24])
- s2 += s1
- s1 += uint32(v[25])
- s2 += s1
- s1 += uint32(v[26])
- s2 += s1
- s1 += uint32(v[27])
- s2 += s1
- s1 += uint32(v[28])
- s2 += s1
- s1 += uint32(v[29])
- s2 += s1
- s1 += uint32(v[30])
- s2 += s1
- s1 += uint32(v[31])
- s2 += s1
- }
+func (d *digest) BlockSize() int { return 4 }
- for i := 0; i < len(p); i++ {
- x := p[i]
- s1 += uint32(x)
- s2 += s1
+func (d *digest) Write(data []byte) (nn int, err error) {
+ if len(data) >= 64 {
+ var h uint32
+ if hasAVX2 {
+ h = adler32_avx2(uint32(*d), data)
+ } else {
+ h = adler32_sse3(uint32(*d), data)
}
-
- s1 %= mod
- s2 %= mod
- p = q
+ *d = digest(h)
+ } else {
+ h := update(uint32(*d), data)
+ *d = digest(h)
}
-
- return digest(s2<<16 | s1)
+ return len(data), nil
}
-//go:noescape
-func adler32AVX2(state uint32, b []byte) uint32
+func (d *digest) Sum32() uint32 { return uint32(*d) }
-func updateAVX2(d digest, p []byte) digest {
- s := uint32(d)
- for len(p) > 0 {
- chunk := p
- if len(chunk) > nmax {
- chunk = p[:nmax]
- }
- s = adler32AVX2(s, chunk)
+func (d *digest) Sum(in []byte) []byte {
+ s := uint32(*d)
+ return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
+}
+
+// Checksum returns the Adler-32 checksum of data.
+func Checksum(data []byte) uint32 {
+ if !hasSSE3 || len(data) < 64 {
+ return update(1, data)
+ }
- p = p[len(chunk):]
+ if hasAVX2 {
+ return adler32_avx2(1, data)
}
- return digest(s)
+ return adler32_sse3(1, data)
}
diff --git a/internal/adler32/adler32_amd64_avx2.s b/internal/adler32/adler32_amd64_avx2.s
deleted file mode 100644
index da8402a3..00000000
--- a/internal/adler32/adler32_amd64_avx2.s
+++ /dev/null
@@ -1,173 +0,0 @@
-//go:build amd64 && !purego
-
-#include "textflag.h"
-
-// func adler32AVX2(state uint32, b []byte) uint32
-// state = s2<<16 | s1
-// len(b) <= nmax enforced by Go wrapper
-TEXT ·adler32AVX2(SB), NOSPLIT, $0-40
- // state uint32 at 0
- // b.ptr *byte at 8
- // b.len int at 16
- // b.cap int at 24
- // ret uint32 at 32
-
- MOVL state+0(FP), AX
- MOVQ b_base+8(FP), SI
- MOVQ b_len+16(FP), CX
-
- // s1 in R10d, s2 in R11d
- MOVL AX, R10
- ANDL $0xffff, R10 // s1 = low 16 bits
- SHRL $16, AX
- MOVL AX, R11 // s2 = high 16 bits
-
- // Just use the scalar tail if len < 32
- CMPQ CX, $32
- JLT tail
-
-loop32:
- VMOVDQU (SI), X0
- VMOVDQU 16(SI), X1
- ADDQ $32, SI
- SUBQ $32, CX
-
- // Split 32 bytes into uint16 lanes (four groups of eight bytes)
- VPMOVZXBW X0, Y2
- VPMOVZXBW X1, Y3
- VEXTRACTI128 $0, Y2, X4
- VEXTRACTI128 $1, Y2, X5
- VEXTRACTI128 $0, Y3, X6
- VEXTRACTI128 $1, Y3, X7
-
- // Process each group using prefix sums (only adds, no multiplies)
- // Group 0 (bytes 0..7)
- VMOVDQA X4, X8
- VPSLLDQ $2, X8, X9
- VPADDW X9, X8, X8
- VPSLLDQ $4, X8, X9
- VPADDW X9, X8, X8
- VPSLLDQ $8, X8, X9
- VPADDW X9, X8, X8
- VPEXTRW $7, X8, R8
- VPSRLDQ $8, X8, X9
- VPADDW X9, X8, X8
- VPSRLDQ $4, X8, X9
- VPADDW X9, X8, X8
- VPSRLDQ $2, X8, X9
- VPADDW X9, X8, X8
- VPEXTRW $0, X8, R9
- MOVL R10, R12
- SHLL $3, R12
- ADDL R12, R11
- ADDL R9, R11
- ADDL R8, R10
-
- // Group 1 (bytes 8..15)
- VMOVDQA X5, X8
- VPSLLDQ $2, X8, X9
- VPADDW X9, X8, X8
- VPSLLDQ $4, X8, X9
- VPADDW X9, X8, X8
- VPSLLDQ $8, X8, X9
- VPADDW X9, X8, X8
- VPEXTRW $7, X8, R8
- VPSRLDQ $8, X8, X9
- VPADDW X9, X8, X8
- VPSRLDQ $4, X8, X9
- VPADDW X9, X8, X8
- VPSRLDQ $2, X8, X9
- VPADDW X9, X8, X8
- VPEXTRW $0, X8, R9
- MOVL R10, R12
- SHLL $3, R12
- ADDL R12, R11
- ADDL R9, R11
- ADDL R8, R10
-
- // Group 2 (bytes 16..23)
- VMOVDQA X6, X8
- VPSLLDQ $2, X8, X9
- VPADDW X9, X8, X8
- VPSLLDQ $4, X8, X9
- VPADDW X9, X8, X8
- VPSLLDQ $8, X8, X9
- VPADDW X9, X8, X8
- VPEXTRW $7, X8, R8
- VPSRLDQ $8, X8, X9
- VPADDW X9, X8, X8
- VPSRLDQ $4, X8, X9
- VPADDW X9, X8, X8
- VPSRLDQ $2, X8, X9
- VPADDW X9, X8, X8
- VPEXTRW $0, X8, R9
- MOVL R10, R12
- SHLL $3, R12
- ADDL R12, R11
- ADDL R9, R11
- ADDL R8, R10
-
- // Group 3 (bytes 24..31)
- VMOVDQA X7, X8
- VPSLLDQ $2, X8, X9
- VPADDW X9, X8, X8
- VPSLLDQ $4, X8, X9
- VPADDW X9, X8, X8
- VPSLLDQ $8, X8, X9
- VPADDW X9, X8, X8
- VPEXTRW $7, X8, R8
- VPSRLDQ $8, X8, X9
- VPADDW X9, X8, X8
- VPSRLDQ $4, X8, X9
- VPADDW X9, X8, X8
- VPSRLDQ $2, X8, X9
- VPADDW X9, X8, X8
- VPEXTRW $0, X8, R9
- MOVL R10, R12
- SHLL $3, R12
- ADDL R12, R11
- ADDL R9, R11
- ADDL R8, R10
-
- CMPQ CX, $32
- JGE loop32
-
-tail:
- TESTQ CX, CX
- JEQ done
-
-tail_loop:
- MOVBLZX (SI), R8
- INCQ SI
- DECQ CX
-
- ADDL R8, R10
- ADDL R10, R11
-
- TESTQ CX, CX
- JNE tail_loop
-
-done:
- MOVL $65521, R8
-
- // Reduce s1 %= mod
- MOVL R10, AX
- XORL DX, DX
- DIVL R8
- MOVL DX, R10
-
- // Reduce s2 %= mod
- MOVL R11, AX
- XORL DX, DX
- DIVL R8
- MOVL DX, R11
-
- MOVL R11, AX
- SHLL $16, AX
- ANDL $0xffff, R10
- ORL R10, AX
-
- VZEROUPPER
-
- MOVL AX, ret+32(FP)
- RET
diff --git a/internal/adler32/adler32_arm64.go b/internal/adler32/adler32_arm64.go
new file mode 100644
index 00000000..830a2580
--- /dev/null
+++ b/internal/adler32/adler32_arm64.go
@@ -0,0 +1,71 @@
+package adler32
+
+import (
+ "encoding/binary"
+ "errors"
+ "hash"
+)
+
+// The size of an Adler-32 checksum in bytes.
+const Size = 4
+
+// digest represents the partial evaluation of a checksum.
+// The low 16 bits are s1, the high 16 bits are s2.
+type digest uint32
+
+func (d *digest) Reset() { *d = 1 }
+
+// New returns a new hash.Hash32 computing the Adler-32 checksum.
+func New() hash.Hash32 {
+ d := new(digest)
+ d.Reset()
+ return d
+}
+
+func (d *digest) MarshalBinary() ([]byte, error) {
+ b := make([]byte, 0, marshaledSize)
+ b = append(b, magic...)
+ b = binary.BigEndian.AppendUint32(b, uint32(*d))
+ return b, nil
+}
+
+func (d *digest) UnmarshalBinary(b []byte) error {
+ if len(b) < len(magic) || string(b[:len(magic)]) != magic {
+ return errors.New("hash/adler32: invalid hash state identifier")
+ }
+ if len(b) != marshaledSize {
+ return errors.New("hash/adler32: invalid hash state size")
+ }
+ *d = digest(binary.BigEndian.Uint32(b[len(magic):]))
+ return nil
+}
+
+func (d *digest) Size() int { return Size }
+
+func (d *digest) BlockSize() int { return 4 }
+
+func (d *digest) Write(data []byte) (nn int, err error) {
+ if len(data) >= 64 {
+ h := adler32_neon(uint32(*d), data)
+ *d = digest(h)
+ } else {
+ h := update(uint32(*d), data)
+ *d = digest(h)
+ }
+ return len(data), nil
+}
+
+func (d *digest) Sum32() uint32 { return uint32(*d) }
+
+func (d *digest) Sum(in []byte) []byte {
+ s := uint32(*d)
+ return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
+}
+
+// Checksum returns the Adler-32 checksum of data.
+func Checksum(data []byte) uint32 {
+ if len(data) >= 64 {
+ return adler32_neon(1, data)
+ }
+ return update(1, data)
+}
diff --git a/internal/adler32/adler32_avx2.go b/internal/adler32/adler32_avx2.go
new file mode 100644
index 00000000..042812b8
--- /dev/null
+++ b/internal/adler32/adler32_avx2.go
@@ -0,0 +1,6 @@
+//go:build !purego && amd64
+
+package adler32
+
+//go:noescape
+func adler32_avx2(in uint32, buf []byte) uint32
diff --git a/internal/adler32/adler32_avx2.s b/internal/adler32/adler32_avx2.s
new file mode 100644
index 00000000..37e46bb5
--- /dev/null
+++ b/internal/adler32/adler32_avx2.s
@@ -0,0 +1,263 @@
+//go:build !noasm && amd64
+
+#include "textflag.h"
+
+DATA LCPI0_0<>+0x00(SB)/8, $0x191a1b1c1d1e1f20
+DATA LCPI0_0<>+0x08(SB)/8, $0x1112131415161718
+DATA LCPI0_0<>+0x10(SB)/8, $0x090a0b0c0d0e0f10
+DATA LCPI0_0<>+0x18(SB)/8, $0x0102030405060708
+GLOBL LCPI0_0<>(SB), (RODATA|NOPTR), $32
+
+DATA LCPI0_1<>+0x00(SB)/8, $0x0001000100010001
+DATA LCPI0_1<>+0x08(SB)/8, $0x0001000100010001
+DATA LCPI0_1<>+0x10(SB)/8, $0x0001000100010001
+DATA LCPI0_1<>+0x18(SB)/8, $0x0001000100010001
+GLOBL LCPI0_1<>(SB), (RODATA|NOPTR), $32
+
+DATA LCPI0_2<>+0x00(SB)/2, $0x0001
+GLOBL LCPI0_2<>(SB), (RODATA|NOPTR), $2
+
+TEXT ·adler32_avx2(SB), NOSPLIT, $0-36
+ MOVLQZX adler+0(FP), DI
+ MOVQ src+8(FP), SI
+ MOVQ len+16(FP), DX
+ MOVQ cap+24(FP), CX
+ WORD $0x8548; BYTE $0xf6 // TESTQ SI, SI // test rsi, rsi
+ JE LBB0_1 // <-- // je .LBB0_1
+ WORD $0xf889 // MOVL DI, AX // mov eax, edi
+ WORD $0x8548; BYTE $0xd2 // TESTQ DX, DX // test rdx, rdx
+ JE LBB0_2 // <-- // je .LBB0_2
+ NOP // (skipped) // push rbp
+ NOP // (skipped) // mov rbp, rsp
+ NOP // (skipped) // and rsp, -8
+ WORD $0xc189 // MOVL AX, CX // mov ecx, eax
+ WORD $0xe9c1; BYTE $0x10 // SHRL $0x10, CX // shr ecx, 16
+ WORD $0xb70f; BYTE $0xc0 // MOVZX AX, AX // movzx eax, ax
+ CMPQ DX, $0x20 // <-- // cmp rdx, 32
+ JB LBB0_17 // <-- // jb .LBB0_17
+ LONG $0x078071bf; BYTE $0x80 // MOVL $-0x7ff87f8f, DI // mov edi, 2147975281
+ LONG $0xc0eff9c5 // VPXOR X0, X0, X0 // vpxor xmm0, xmm0, xmm0
+ VMOVDQA LCPI0_0<>(SB), Y1 // <-- // vmovdqa ymm1, ymmword ptr [rip + .LCPI0_0]
+ VPBROADCASTW LCPI0_2<>(SB), Y2 // <-- // vpbroadcastw ymm2, word ptr [rip + .LCPI0_2]
+ JMP LBB0_6 // <-- // jmp .LBB0_6
+
+LBB0_7:
+ LONG $0xf46ffdc5 // VMOVDQA Y4, Y6 // vmovdqa ymm6, ymm4
+ LONG $0xedefd1c5 // VPXOR X5, X5, X5 // vpxor xmm5, xmm5, xmm5
+
+LBB0_14:
+ SUBQ AX, DX // <-- // sub rdx, rax
+ LONG $0xf572ddc5; BYTE $0x05 // ? // vpslld ymm4, ymm5, 5
+ LONG $0xdbfeddc5 // VPADDD Y3, Y4, Y3 // vpaddd ymm3, ymm4, ymm3
+ LONG $0x397de3c4; WORD $0x01f4 // VEXTRACTI128 $0x1, Y6, X4 // vextracti128 xmm4, ymm6, 1
+ LONG $0xecc6c8c5; BYTE $0x88 // VSHUFPS $-0x78, X4, X6, X5 // vshufps xmm5, xmm6, xmm4, 136
+ LONG $0xe470f9c5; BYTE $0x88 // VPSHUFD $-0x78, X4, X4 // vpshufd xmm4, xmm4, 136
+ LONG $0xe4fed1c5 // VPADDD X4, X5, X4 // vpaddd xmm4, xmm5, xmm4
+ LONG $0xec70f9c5; BYTE $0x55 // VPSHUFD $0x55, X4, X5 // vpshufd xmm5, xmm4, 85
+ LONG $0xe4fed1c5 // VPADDD X4, X5, X4 // vpaddd xmm4, xmm5, xmm4
+ LONG $0xe07ef9c5 // VMOVD X4, AX // vmovd eax, xmm4
+ MOVQ AX, CX // <-- // mov rcx, rax
+ IMULQ DI, CX // <-- // imul rcx, rdi
+ SHRQ $0x2f, CX // <-- // shr rcx, 47
+ LONG $0xfff1c969; WORD $0x0000 // IMULL $0xfff1, CX, CX // imul ecx, ecx, 65521
+ WORD $0xc829 // SUBL CX, AX // sub eax, ecx
+ LONG $0x397de3c4; WORD $0x01dc // VEXTRACTI128 $0x1, Y3, X4 // vextracti128 xmm4, ymm3, 1
+ LONG $0xdbfed9c5 // VPADDD X3, X4, X3 // vpaddd xmm3, xmm4, xmm3
+ LONG $0xe370f9c5; BYTE $0xee // VPSHUFD $-0x12, X3, X4 // vpshufd xmm4, xmm3, 238
+ LONG $0xdcfee1c5 // VPADDD X4, X3, X3 // vpaddd xmm3, xmm3, xmm4
+ LONG $0xe370f9c5; BYTE $0x55 // VPSHUFD $0x55, X3, X4 // vpshufd xmm4, xmm3, 85
+ LONG $0xdbfed9c5 // VPADDD X3, X4, X3 // vpaddd xmm3, xmm4, xmm3
+ LONG $0xd97ef9c5 // VMOVD X3, CX // vmovd ecx, xmm3
+ MOVQ CX, R8 // <-- // mov r8, rcx
+ IMULQ DI, R8 // <-- // imul r8, rdi
+ SHRQ $0x2f, R8 // <-- // shr r8, 47
+ LONG $0xf1c06945; WORD $0x00ff; BYTE $0x00 // IMULL $0xfff1, R8, R8 // imul r8d, r8d, 65521
+ WORD $0x2944; BYTE $0xc1 // SUBL R8, CX // sub ecx, r8d
+ CMPQ DX, $0x1f // <-- // cmp rdx, 31
+ JBE LBB0_15 // <-- // jbe .LBB0_15
+
+LBB0_6:
+ LONG $0xe06ef9c5 // VMOVD AX, X4 // vmovd xmm4, eax
+ LONG $0xd96ef9c5 // VMOVD CX, X3 // vmovd xmm3, ecx
+ CMPQ DX, $0x15b0 // <-- // cmp rdx, 5552
+ LONG $0x15b0b841; WORD $0x0000 // MOVL $0x15b0, R8 // mov r8d, 5552
+ LONG $0xc2420f4c // CMOVB DX, R8 // cmovb r8, rdx
+ WORD $0x8944; BYTE $0xc0 // MOVL R8, AX // mov eax, r8d
+ LONG $0x001fe025; BYTE $0x00 // ANDL $0x1fe0, AX // and eax, 8160
+ JE LBB0_7 // <-- // je .LBB0_7
+ ADDQ $-0x20, R8 // <-- // add r8, -32
+ LONG $0xedefd1c5 // VPXOR X5, X5, X5 // vpxor xmm5, xmm5, xmm5
+ LONG $0x20c0f641 // TESTL $0x20, R8 // test r8b, 32
+ JNE LBB0_9 // <-- // jne .LBB0_9
+ LONG $0x2e6ffec5 // VMOVDQU 0(SI), Y5 // vmovdqu ymm5, ymmword ptr [rsi]
+ ADDQ $0x20, SI // <-- // add rsi, 32
+ LEAQ -0x20(AX), CX // <-- // lea rcx, [rax - 32]
+ LONG $0xf0f6d5c5 // VPSADBW Y0, Y5, Y6 // vpsadbw ymm6, ymm5, ymm0
+ LONG $0xf4fecdc5 // VPADDD Y4, Y6, Y6 // vpaddd ymm6, ymm6, ymm4
+ LONG $0x0455e2c4; BYTE $0xe9 // VPMADDUBSW Y1, Y5, Y5 // vpmaddubsw ymm5, ymm5, ymm1
+ LONG $0xeaf5d5c5 // VPMADDWD Y2, Y5, Y5 // vpmaddwd ymm5, ymm5, ymm2
+ LONG $0xdbfed5c5 // VPADDD Y3, Y5, Y3 // vpaddd ymm3, ymm5, ymm3
+ LONG $0xec6ffdc5 // VMOVDQA Y4, Y5 // vmovdqa ymm5, ymm4
+ LONG $0xe66ffdc5 // VMOVDQA Y6, Y4 // vmovdqa ymm4, ymm6
+ CMPQ R8, $0x20 // <-- // cmp r8, 32
+ JAE LBB0_12 // <-- // jae .LBB0_12
+ JMP LBB0_14 // <-- // jmp .LBB0_14
+
+LBB0_9:
+ MOVQ AX, CX // <-- // mov rcx, rax
+ CMPQ R8, $0x20 // <-- // cmp r8, 32
+ JB LBB0_14 // <-- // jb .LBB0_14
+
+LBB0_12:
+ LONG $0x366ffec5 // VMOVDQU 0(SI), Y6 // vmovdqu ymm6, ymmword ptr [rsi]
+ LONG $0x7e6ffec5; BYTE $0x20 // VMOVDQU 0x20(SI), Y7 // vmovdqu ymm7, ymmword ptr [rsi + 32]
+ LONG $0xc0f64dc5 // VPSADBW Y0, Y6, Y8 // vpsadbw ymm8, ymm6, ymm0
+ LONG $0xc4fe3dc5 // VPADDD Y4, Y8, Y8 // vpaddd ymm8, ymm8, ymm4
+ LONG $0xecfed5c5 // VPADDD Y4, Y5, Y5 // vpaddd ymm5, ymm5, ymm4
+ LONG $0x044de2c4; BYTE $0xe1 // VPMADDUBSW Y1, Y6, Y4 // vpmaddubsw ymm4, ymm6, ymm1
+ LONG $0xe2f5ddc5 // VPMADDWD Y2, Y4, Y4 // vpmaddwd ymm4, ymm4, ymm2
+ LONG $0xdbfeddc5 // VPADDD Y3, Y4, Y3 // vpaddd ymm3, ymm4, ymm3
+ ADDQ $0x40, SI // <-- // add rsi, 64
+ LONG $0xe0f6c5c5 // VPSADBW Y0, Y7, Y4 // vpsadbw ymm4, ymm7, ymm0
+ LONG $0xe4febdc5 // VPADDD Y4, Y8, Y4 // vpaddd ymm4, ymm8, ymm4
+ LONG $0xedfebdc5 // VPADDD Y5, Y8, Y5 // vpaddd ymm5, ymm8, ymm5
+ LONG $0x0445e2c4; BYTE $0xf1 // VPMADDUBSW Y1, Y7, Y6 // vpmaddubsw ymm6, ymm7, ymm1
+ LONG $0xf2f5cdc5 // VPMADDWD Y2, Y6, Y6 // vpmaddwd ymm6, ymm6, ymm2
+ LONG $0xdbfecdc5 // VPADDD Y3, Y6, Y3 // vpaddd ymm3, ymm6, ymm3
+ ADDQ $-0x40, CX // <-- // add rcx, -64
+ JNE LBB0_12 // <-- // jne .LBB0_12
+ LONG $0xf46ffdc5 // VMOVDQA Y4, Y6 // vmovdqa ymm6, ymm4
+ JMP LBB0_14 // <-- // jmp .LBB0_14
+
+LBB0_1:
+ LONG $0x000001b8; BYTE $0x00 // MOVL $0x1, AX // mov eax, 1
+
+LBB0_2:
+ MOVL AX, ret+32(FP) // <--
+ RET // <-- // ret
+
+LBB0_15:
+ WORD $0x8548; BYTE $0xd2 // TESTQ DX, DX // test rdx, rdx
+ JE LBB0_16 // <-- // je .LBB0_16
+
+LBB0_17:
+ CMPQ DX, $0x10 // <-- // cmp rdx, 16
+ JB LBB0_20 // <-- // jb .LBB0_20
+ WORD $0xb60f; BYTE $0x3e // MOVZX 0(SI), DI // movzx edi, byte ptr [rsi]
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ LONG $0x017eb60f // MOVZX 0x1(SI), DI // movzx edi, byte ptr [rsi + 1]
+ WORD $0xc701 // ADDL AX, DI // add edi, eax
+ WORD $0xf901 // ADDL DI, CX // add ecx, edi
+ LONG $0x0246b60f // MOVZX 0x2(SI), AX // movzx eax, byte ptr [rsi + 2]
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ LONG $0x037eb60f // MOVZX 0x3(SI), DI // movzx edi, byte ptr [rsi + 3]
+ WORD $0xc701 // ADDL AX, DI // add edi, eax
+ WORD $0xf901 // ADDL DI, CX // add ecx, edi
+ LONG $0x0446b60f // MOVZX 0x4(SI), AX // movzx eax, byte ptr [rsi + 4]
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ LONG $0x057eb60f // MOVZX 0x5(SI), DI // movzx edi, byte ptr [rsi + 5]
+ WORD $0xc701 // ADDL AX, DI // add edi, eax
+ WORD $0xf901 // ADDL DI, CX // add ecx, edi
+ LONG $0x0646b60f // MOVZX 0x6(SI), AX // movzx eax, byte ptr [rsi + 6]
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ LONG $0x077eb60f // MOVZX 0x7(SI), DI // movzx edi, byte ptr [rsi + 7]
+ WORD $0xc701 // ADDL AX, DI // add edi, eax
+ WORD $0xf901 // ADDL DI, CX // add ecx, edi
+ LONG $0x0846b60f // MOVZX 0x8(SI), AX // movzx eax, byte ptr [rsi + 8]
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ LONG $0x097eb60f // MOVZX 0x9(SI), DI // movzx edi, byte ptr [rsi + 9]
+ WORD $0xc701 // ADDL AX, DI // add edi, eax
+ WORD $0xf901 // ADDL DI, CX // add ecx, edi
+ LONG $0x0a46b60f // MOVZX 0xa(SI), AX // movzx eax, byte ptr [rsi + 10]
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ LONG $0x0b7eb60f // MOVZX 0xb(SI), DI // movzx edi, byte ptr [rsi + 11]
+ WORD $0xc701 // ADDL AX, DI // add edi, eax
+ WORD $0xf901 // ADDL DI, CX // add ecx, edi
+ LONG $0x0c46b60f // MOVZX 0xc(SI), AX // movzx eax, byte ptr [rsi + 12]
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ LONG $0x0d7eb60f // MOVZX 0xd(SI), DI // movzx edi, byte ptr [rsi + 13]
+ WORD $0xc701 // ADDL AX, DI // add edi, eax
+ WORD $0xf901 // ADDL DI, CX // add ecx, edi
+ LONG $0x46b60f44; BYTE $0x0e // MOVZX 0xe(SI), R8 // movzx r8d, byte ptr [rsi + 14]
+ WORD $0x0141; BYTE $0xf8 // ADDL DI, R8 // add r8d, edi
+ WORD $0x0144; BYTE $0xc1 // ADDL R8, CX // add ecx, r8d
+ LONG $0x0f46b60f // MOVZX 0xf(SI), AX // movzx eax, byte ptr [rsi + 15]
+ WORD $0x0144; BYTE $0xc0 // ADDL R8, AX // add eax, r8d
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ ADDQ $-0x10, DX // <-- // add rdx, -16
+ JE LBB0_27 // <-- // je .LBB0_27
+ ADDQ $0x10, SI // <-- // add rsi, 16
+
+LBB0_20:
+ LEAQ -0x1(DX), DI // <-- // lea rdi, [rdx - 1]
+ MOVQ DX, R9 // <-- // mov r9, rdx
+ ANDQ $0x3, R9 // <-- // and r9, 3
+ JE LBB0_24 // <-- // je .LBB0_24
+ XORL R8, R8 // <-- // xor r8d, r8d
+
+LBB0_22:
+ LONG $0x14b60f46; BYTE $0x06 // MOVZX 0(SI)(R8*1), R10 // movzx r10d, byte ptr [rsi + r8]
+ WORD $0x0144; BYTE $0xd0 // ADDL R10, AX // add eax, r10d
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ INCQ R8 // <-- // inc r8
+ CMPQ R9, R8 // <-- // cmp r9, r8
+ JNE LBB0_22 // <-- // jne .LBB0_22
+ ADDQ R8, SI // <-- // add rsi, r8
+ SUBQ R8, DX // <-- // sub rdx, r8
+
+LBB0_24:
+ CMPQ DI, $0x3 // <-- // cmp rdi, 3
+ JB LBB0_27 // <-- // jb .LBB0_27
+ XORL DI, DI // <-- // xor edi, edi
+
+LBB0_26:
+ LONG $0x04b60f44; BYTE $0x3e // MOVZX 0(SI)(DI*1), R8 // movzx r8d, byte ptr [rsi + rdi]
+ WORD $0x0141; BYTE $0xc0 // ADDL AX, R8 // add r8d, eax
+ WORD $0x0144; BYTE $0xc1 // ADDL R8, CX // add ecx, r8d
+ LONG $0x3e44b60f; BYTE $0x01 // MOVZX 0x1(SI)(DI*1), AX // movzx eax, byte ptr [rsi + rdi + 1]
+ WORD $0x0144; BYTE $0xc0 // ADDL R8, AX // add eax, r8d
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ LONG $0x44b60f44; WORD $0x023e // MOVZX 0x2(SI)(DI*1), R8 // movzx r8d, byte ptr [rsi + rdi + 2]
+ WORD $0x0141; BYTE $0xc0 // ADDL AX, R8 // add r8d, eax
+ WORD $0x0144; BYTE $0xc1 // ADDL R8, CX // add ecx, r8d
+ LONG $0x3e44b60f; BYTE $0x03 // MOVZX 0x3(SI)(DI*1), AX // movzx eax, byte ptr [rsi + rdi + 3]
+ WORD $0x0144; BYTE $0xc0 // ADDL R8, AX // add eax, r8d
+ WORD $0xc101 // ADDL AX, CX // add ecx, eax
+ ADDQ $0x4, DI // <-- // add rdi, 4
+ CMPQ DX, DI // <-- // cmp rdx, rdi
+ JNE LBB0_26 // <-- // jne .LBB0_26
+
+LBB0_27:
+ LONG $0x000f908d; WORD $0xffff // LEAL -0xfff1(AX), DX // lea edx, [rax - 65521]
+ CMPL AX, $0xfff1 // <-- // cmp eax, 65521
+ WORD $0x420f; BYTE $0xd0 // CMOVB AX, DX // cmovb edx, eax
+ WORD $0xc889 // MOVL CX, AX // mov eax, ecx
+ LONG $0x078071be; BYTE $0x80 // MOVL $-0x7ff87f8f, SI // mov esi, 2147975281
+ IMULQ AX, SI // <-- // imul rsi, rax
+ SHRQ $0x2f, SI // <-- // shr rsi, 47
+ LONG $0xfff1c669; WORD $0x0000 // IMULL $0xfff1, SI, AX // imul eax, esi, 65521
+ WORD $0xc129 // SUBL AX, CX // sub ecx, eax
+ WORD $0xe1c1; BYTE $0x10 // SHLL $0x10, CX // shl ecx, 16
+ WORD $0xd109 // ORL DX, CX // or ecx, edx
+ WORD $0xc889 // MOVL CX, AX // mov eax, ecx
+ NOP // (skipped) // mov rsp, rbp
+ NOP // (skipped) // pop rbp
+ VZEROUPPER // <-- // vzeroupper
+ MOVL AX, ret+32(FP) // <--
+ RET // <-- // ret
+
+LBB0_16:
+ WORD $0xe1c1; BYTE $0x10 // SHLL $0x10, CX // shl ecx, 16
+ WORD $0xc809 // ORL CX, AX // or eax, ecx
+ NOP // (skipped) // mov rsp, rbp
+ NOP // (skipped) // pop rbp
+ VZEROUPPER // <-- // vzeroupper
+ MOVL AX, ret+32(FP) // <--
+ RET // <-- // ret
diff --git a/internal/adler32/adler32_fallback.go b/internal/adler32/adler32_fallback.go
new file mode 100644
index 00000000..c213c3c1
--- /dev/null
+++ b/internal/adler32/adler32_fallback.go
@@ -0,0 +1,19 @@
+//go:build (!arm64 && !amd64) || purego
+
+package adler32
+
+import (
+ "hash"
+ "hash/adler32"
+)
+
+// The size of an Adler-32 checksum in bytes.
+const Size = 4
+
+// New returns a new hash.Hash32 computing the Adler-32 checksum.
+func New() hash.Hash32 {
+ return adler32.New()
+}
+
+// Checksum returns the Adler-32 checksum of data.
+func Checksum(data []byte) uint32 { return adler32.Checksum(data) }
diff --git a/internal/adler32/adler32_generic.go b/internal/adler32/adler32_generic.go
index 8ba330a5..0908d8f7 100644
--- a/internal/adler32/adler32_generic.go
+++ b/internal/adler32/adler32_generic.go
@@ -1,96 +1,44 @@
-//go:build !amd64 || purego
-
package adler32
-func update(d digest, p []byte) digest {
- s1, s2 := uint32(d&0xffff), uint32(d>>16)
+const (
+ // mod is the largest prime that is less than 65536.
+ mod = 65521
+ // nmax is the largest n such that
+ // 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1.
+ // It is mentioned in RFC 1950 (search for "5552").
+ nmax = 5552
+
+ // binary representation compatible with standard library.
+ magic = "adl\x01"
+ marshaledSize = len(magic) + 4
+)
+// Add p to the running checksum d.
+func update(d uint32, p []byte) uint32 {
+ s1, s2 := d&0xffff, d>>16
for len(p) > 0 {
var q []byte
if len(p) > nmax {
p, q = p[:nmax], p[nmax:]
}
-
- for len(p) >= 32 {
- v := p[:32]
- p = p[32:]
-
- s1 += uint32(v[0])
- s2 += s1
- s1 += uint32(v[1])
- s2 += s1
- s1 += uint32(v[2])
- s2 += s1
- s1 += uint32(v[3])
- s2 += s1
- s1 += uint32(v[4])
- s2 += s1
- s1 += uint32(v[5])
- s2 += s1
- s1 += uint32(v[6])
- s2 += s1
- s1 += uint32(v[7])
- s2 += s1
- s1 += uint32(v[8])
- s2 += s1
- s1 += uint32(v[9])
- s2 += s1
- s1 += uint32(v[10])
- s2 += s1
- s1 += uint32(v[11])
- s2 += s1
- s1 += uint32(v[12])
- s2 += s1
- s1 += uint32(v[13])
- s2 += s1
- s1 += uint32(v[14])
+ for len(p) >= 4 {
+ s1 += uint32(p[0])
s2 += s1
- s1 += uint32(v[15])
+ s1 += uint32(p[1])
s2 += s1
- s1 += uint32(v[16])
+ s1 += uint32(p[2])
s2 += s1
- s1 += uint32(v[17])
- s2 += s1
- s1 += uint32(v[18])
- s2 += s1
- s1 += uint32(v[19])
- s2 += s1
- s1 += uint32(v[20])
- s2 += s1
- s1 += uint32(v[21])
- s2 += s1
- s1 += uint32(v[22])
- s2 += s1
- s1 += uint32(v[23])
- s2 += s1
- s1 += uint32(v[24])
- s2 += s1
- s1 += uint32(v[25])
- s2 += s1
- s1 += uint32(v[26])
- s2 += s1
- s1 += uint32(v[27])
- s2 += s1
- s1 += uint32(v[28])
- s2 += s1
- s1 += uint32(v[29])
- s2 += s1
- s1 += uint32(v[30])
- s2 += s1
- s1 += uint32(v[31])
+ s1 += uint32(p[3])
s2 += s1
+ p = p[4:]
}
-
- for i := 0; i < len(p); i++ {
- x := p[i]
+ for _, x := range p {
s1 += uint32(x)
s2 += s1
}
-
s1 %= mod
s2 %= mod
p = q
}
-
- return digest(s2<<16 | s1)
+ return s2<<16 | s1
}
diff --git a/internal/adler32/adler32_neon.go b/internal/adler32/adler32_neon.go
new file mode 100644
index 00000000..521b71e0
--- /dev/null
+++ b/internal/adler32/adler32_neon.go
@@ -0,0 +1,6 @@
+//go:build !purego && arm64
+
+package adler32
+
+//go:noescape
+func adler32_neon(in uint32, buf []byte) uint32
diff --git a/internal/adler32/adler32_neon.s b/internal/adler32/adler32_neon.s
new file mode 100644
index 00000000..91a69519
--- /dev/null
+++ b/internal/adler32/adler32_neon.s
@@ -0,0 +1,208 @@
+//go:build !noasm && arm64
+
+#include "textflag.h"
+
+DATA mult_table<>+0x00(SB)/8, $0x001d001e001f0020
+DATA mult_table<>+0x08(SB)/8, $0x0019001a001b001c
+DATA mult_table<>+0x10(SB)/8, $0x0015001600170018
+DATA mult_table<>+0x18(SB)/8, $0x0011001200130014
+DATA mult_table<>+0x20(SB)/8, $0x000d000e000f0010
+DATA mult_table<>+0x28(SB)/8, $0x0009000a000b000c
+DATA mult_table<>+0x30(SB)/8, $0x0005000600070008
+DATA mult_table<>+0x38(SB)/8, $0x0001000200030004
+GLOBL mult_table<>(SB), (RODATA|NOPTR), $64
+
+TEXT ·adler32_neon(SB), NOSPLIT, $0-36
+ MOVW adler+0(FP), R0
+ MOVD buf+8(FP), R1
+ MOVD buf_len+16(FP), R2
+ MOVD buf_cap+24(FP), R3
+ NOP // (skipped) // stp x29, x30, [sp, #-16]!
+ ANDS $15, R1, R10 // <-- // ands x10, x1, #0xf
+ ANDW $65535, R0, R8 // <-- // and w8, w0, #0xffff
+ LSRW $16, R0, R9 // <-- // lsr w9, w0, #16
+ NOP // (skipped) // mov x29, sp
+ BEQ LBB0_4 // <-- // b.eq .LBB0_4
+ ADD $1, R1, R11 // <-- // add x11, x1, #1
+ MOVD R1, R12 // <-- // mov x12, x1
+
+LBB0_2:
+ WORD $0x3840158d // MOVBU.P 1(R12), R13 // ldrb w13, [x12], #1
+ SUB $1, R2, R2 // <-- // sub x2, x2, #1
+ TST $15, R11 // <-- // tst x11, #0xf
+ ADD $1, R11, R11 // <-- // add x11, x11, #1
+ ADDW R13, R8, R8 // <-- // add w8, w8, w13
+ ADDW R9, R8, R9 // <-- // add w9, w8, w9
+ BNE LBB0_2 // <-- // b.ne .LBB0_2
+ MOVW $32881, R11 // <-- // mov w11, #32881
+ MOVW $65521, R13 // <-- // mov w13, #65521
+ MOVKW $(32775<<16), R11 // <-- // movk w11, #32775, lsl #16
+ MOVW $4294901775, R12 // <-- // mov w12, #-65521
+ MOVW $65520, R14 // <-- // mov w14, #65520
+ SUB R10, R1, R10 // <-- // sub x10, x1, x10
+ UMULL R11, R9, R11 // <-- // umull x11, w9, w11
+ ADDW R12, R8, R12 // <-- // add w12, w8, w12
+ CMPW R14, R8 // <-- // cmp w8, w14
+ ADD $16, R10, R1 // <-- // add x1, x10, #16
+ LSR $47, R11, R11 // <-- // lsr x11, x11, #47
+ CSELW HI, R12, R8, R8 // <-- // csel w8, w12, w8, hi
+ MSUBW R13, R9, R11, R9 // <-- // msub w9, w11, w13, w9
+
+LBB0_4:
+ AND $31, R2, R10 // <-- // and x10, x2, #0x1f
+ CMP $32, R2 // <-- // cmp x2, #32
+ BCC LBB0_9 // <-- // b.lo .LBB0_9
+ MOVD $mult_table<>(SB), R11 // <-- // adrp x11, mult_table
+ ADD $0, R11, R11 // <-- // add x11, x11, :lo12:mult_table
+ MOVW $32881, R14 // <-- // mov w14, #32881
+ MOVW $173, R12 // <-- // mov w12, #173
+ MOVD $137438953440, R13 // <-- // mov x13, #137438953440
+ MOVKW $(32775<<16), R14 // <-- // movk w14, #32775, lsl #16
+ VLD1 (R11), [V0.H8, V1.H8, V2.H8, V3.H8] // <-- // ld1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x11]
+ LSR $5, R2, R11 // <-- // lsr x11, x2, #5
+ MOVW $65521, R15 // <-- // mov w15, #65521
+ VEXT $8, V0.B16, V0.B16, V4.B16 // <-- // ext v4.16b, v0.16b, v0.16b, #8
+ VEXT $8, V1.B16, V1.B16, V5.B16 // <-- // ext v5.16b, v1.16b, v1.16b, #8
+ VEXT $8, V2.B16, V2.B16, V6.B16 // <-- // ext v6.16b, v2.16b, v2.16b, #8
+ VEXT $8, V3.B16, V3.B16, V7.B16 // <-- // ext v7.16b, v3.16b, v3.16b, #8
+
+LBB0_6:
+ CMP $173, R11 // <-- // cmp x11, #173
+ MOVD R1, R2 // <-- // mov x2, x1
+ CSEL LO, R11, R12, R16 // <-- // csel x16, x11, x12, lo
+ WORD $0x6f00e414 // VMOVI $0, V20.D2 // movi v20.2d, #0000000000000000
+ MULW R16, R8, R0 // <-- // mul w0, w8, w16
+ ADD R16<<5, R13, R17 // <-- // add x17, x13, x16, lsl #5
+ WORD $0x6f00e410 // VMOVI $0, V16.D2 // movi v16.2d, #0000000000000000
+ AND $137438953440, R17, R17 // <-- // and x17, x17, #0x1fffffffe0
+ WORD $0x6f00e412 // VMOVI $0, V18.D2 // movi v18.2d, #0000000000000000
+ WORD $0x6f00e413 // VMOVI $0, V19.D2 // movi v19.2d, #0000000000000000
+ WORD $0x6f00e415 // VMOVI $0, V21.D2 // movi v21.2d, #0000000000000000
+ VMOV R0, V20.S[3] // <-- // mov v20.s[3], w0
+ MOVW R16, R0 // <-- // mov w0, w16
+ WORD $0x6f00e411 // VMOVI $0, V17.D2 // movi v17.2d, #0000000000000000
+
+LBB0_7:
+ WORD $0xacc15857 // FLDPQ.P 32(R2), (F23, F22) // ldp q23, q22, [x2], #32
+ SUBSW $1, R0, R0 // <-- // subs w0, w0, #1
+ VADD V17.S4, V20.S4, V20.S4 // <-- // add v20.4s, v20.4s, v17.4s
+ WORD $0x2e3712b5 // VUADDW V23.B8, V21.H8, V21.H8 // uaddw v21.8h, v21.8h, v23.8b
+ WORD $0x6e371273 // VUADDW2 V23.B16, V19.H8, V19.H8 // uaddw2 v19.8h, v19.8h, v23.16b
+ WORD $0x6e202ad8 // VUADDLP V22.B16, V24.H8 // uaddlp v24.8h, v22.16b
+ WORD $0x2e361252 // VUADDW V22.B8, V18.H8, V18.H8 // uaddw v18.8h, v18.8h, v22.8b
+ WORD $0x6e361210 // VUADDW2 V22.B16, V16.H8, V16.H8 // uaddw2 v16.8h, v16.8h, v22.16b
+ WORD $0x6e206af8 // VUADALP V23.B16, V24.H8 // uadalp v24.8h, v23.16b
+ WORD $0x6e606b11 // VUADALP V24.H8, V17.S4 // uadalp v17.4s, v24.8h
+ BNE LBB0_7 // <-- // b.ne .LBB0_7
+ VSHL $5, V20.S4, V20.S4 // <-- // shl v20.4s, v20.4s, #5
+ ADD R17, R1, R17 // <-- // add x17, x1, x17
+ SUBS R16, R11, R11 // <-- // subs x11, x11, x16
+ ADD $32, R17, R1 // <-- // add x1, x17, #32
+ WORD $0x2e6082b4 // VUMLAL V0.H4, V21.H4, V20.S4 // umlal v20.4s, v21.4h, v0.4h
+ VEXT $8, V21.B16, V21.B16, V21.B16 // <-- // ext v21.16b, v21.16b, v21.16b, #8
+ WORD $0x2e6482b4 // VUMLAL V4.H4, V21.H4, V20.S4 // umlal v20.4s, v21.4h, v4.4h
+ VEXT $8, V19.B16, V19.B16, V21.B16 // <-- // ext v21.16b, v19.16b, v19.16b, #8
+ WORD $0x2e618274 // VUMLAL V1.H4, V19.H4, V20.S4 // umlal v20.4s, v19.4h, v1.4h
+ VEXT $8, V18.B16, V18.B16, V19.B16 // <-- // ext v19.16b, v18.16b, v18.16b, #8
+ WORD $0x2e6582b4 // VUMLAL V5.H4, V21.H4, V20.S4 // umlal v20.4s, v21.4h, v5.4h
+ WORD $0x2e628254 // VUMLAL V2.H4, V18.H4, V20.S4 // umlal v20.4s, v18.4h, v2.4h
+ WORD $0x2e668274 // VUMLAL V6.H4, V19.H4, V20.S4 // umlal v20.4s, v19.4h, v6.4h
+ WORD $0x2e638214 // VUMLAL V3.H4, V16.H4, V20.S4 // umlal v20.4s, v16.4h, v3.4h
+ VEXT $8, V16.B16, V16.B16, V16.B16 // <-- // ext v16.16b, v16.16b, v16.16b, #8
+ WORD $0x2e678214 // VUMLAL V7.H4, V16.H4, V20.S4 // umlal v20.4s, v16.4h, v7.4h
+ WORD $0x4eb1be30 // VADDP V17.S4, V17.S4, V16.S4 // addp v16.4s, v17.4s, v17.4s
+ WORD $0x4eb4be91 // VADDP V20.S4, V20.S4, V17.S4 // addp v17.4s, v20.4s, v20.4s
+ WORD $0x0eb1be10 // VADDP V17.S2, V16.S2, V16.S2 // addp v16.2s, v16.2s, v17.2s
+ VMOV V16.S[1], R0 // <-- // mov w0, v16.s[1]
+ FMOVS F16, R2 // <-- // fmov w2, s16
+ ADDW R8, R2, R8 // <-- // add w8, w2, w8
+ ADDW R9, R0, R9 // <-- // add w9, w0, w9
+ UMULL R14, R8, R0 // <-- // umull x0, w8, w14
+ UMULL R14, R9, R2 // <-- // umull x2, w9, w14
+ LSR $47, R0, R0 // <-- // lsr x0, x0, #47
+ LSR $47, R2, R2 // <-- // lsr x2, x2, #47
+ MSUBW R15, R8, R0, R8 // <-- // msub w8, w0, w15, w8
+ MSUBW R15, R9, R2, R9 // <-- // msub w9, w2, w15, w9
+ BNE LBB0_6 // <-- // b.ne .LBB0_6
+
+LBB0_9:
+ CBZ R10, LBB0_15 // <-- // cbz x10, .LBB0_15
+ CMP $16, R10 // <-- // cmp x10, #16
+ BCC LBB0_13 // <-- // b.lo .LBB0_13
+ WORD $0x3940002b // MOVBU (R1), R11 // ldrb w11, [x1]
+ SUBS $16, R10, R10 // <-- // subs x10, x10, #16
+ WORD $0x3940042c // MOVBU 1(R1), R12 // ldrb w12, [x1, #1]
+ WORD $0x3940082d // MOVBU 2(R1), R13 // ldrb w13, [x1, #2]
+ ADDW R11, R8, R8 // <-- // add w8, w8, w11
+ WORD $0x39400c2b // MOVBU 3(R1), R11 // ldrb w11, [x1, #3]
+ ADDW R9, R8, R9 // <-- // add w9, w8, w9
+ ADDW R12, R8, R8 // <-- // add w8, w8, w12
+ WORD $0x3940102c // MOVBU 4(R1), R12 // ldrb w12, [x1, #4]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R13, R8, R8 // <-- // add w8, w8, w13
+ WORD $0x3940142d // MOVBU 5(R1), R13 // ldrb w13, [x1, #5]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R11, R8, R8 // <-- // add w8, w8, w11
+ WORD $0x3940182b // MOVBU 6(R1), R11 // ldrb w11, [x1, #6]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R12, R8, R8 // <-- // add w8, w8, w12
+ WORD $0x39401c2c // MOVBU 7(R1), R12 // ldrb w12, [x1, #7]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R13, R8, R8 // <-- // add w8, w8, w13
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R11, R8, R8 // <-- // add w8, w8, w11
+ WORD $0x3940202b // MOVBU 8(R1), R11 // ldrb w11, [x1, #8]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R12, R8, R8 // <-- // add w8, w8, w12
+ WORD $0x3940242c // MOVBU 9(R1), R12 // ldrb w12, [x1, #9]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ WORD $0x3940382d // MOVBU 14(R1), R13 // ldrb w13, [x1, #14]
+ ADDW R11, R8, R8 // <-- // add w8, w8, w11
+ WORD $0x3940282b // MOVBU 10(R1), R11 // ldrb w11, [x1, #10]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R12, R8, R8 // <-- // add w8, w8, w12
+ WORD $0x39402c2c // MOVBU 11(R1), R12 // ldrb w12, [x1, #11]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R11, R8, R8 // <-- // add w8, w8, w11
+ WORD $0x3940302b // MOVBU 12(R1), R11 // ldrb w11, [x1, #12]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R12, R8, R8 // <-- // add w8, w8, w12
+ WORD $0x3940342c // MOVBU 13(R1), R12 // ldrb w12, [x1, #13]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R11, R8, R8 // <-- // add w8, w8, w11
+ WORD $0x39403c2b // MOVBU 15(R1), R11 // ldrb w11, [x1, #15]
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R12, R8, R8 // <-- // add w8, w8, w12
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R13, R8, R8 // <-- // add w8, w8, w13
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ ADDW R11, R8, R8 // <-- // add w8, w8, w11
+ ADDW R8, R9, R9 // <-- // add w9, w9, w8
+ BEQ LBB0_14 // <-- // b.eq .LBB0_14
+ ADD $16, R1, R1 // <-- // add x1, x1, #16
+
+LBB0_13:
+ WORD $0x3840142b // MOVBU.P 1(R1), R11 // ldrb w11, [x1], #1
+ SUBS $1, R10, R10 // <-- // subs x10, x10, #1
+ ADDW R11, R8, R8 // <-- // add w8, w8, w11
+ ADDW R9, R8, R9 // <-- // add w9, w8, w9
+ BNE LBB0_13 // <-- // b.ne .LBB0_13
+
+LBB0_14:
+ MOVW $32881, R10 // <-- // mov w10, #32881
+ MOVW $65521, R12 // <-- // mov w12, #65521
+ MOVKW $(32775<<16), R10 // <-- // movk w10, #32775, lsl #16
+ MOVW $4294901775, R11 // <-- // mov w11, #-65521
+ MOVW $65520, R13 // <-- // mov w13, #65520
+ ADDW R11, R8, R11 // <-- // add w11, w8, w11
+ UMULL R10, R9, R10 // <-- // umull x10, w9, w10
+ CMPW R13, R8 // <-- // cmp w8, w13
+ CSELW HI, R11, R8, R8 // <-- // csel w8, w11, w8, hi
+ LSR $47, R10, R10 // <-- // lsr x10, x10, #47
+ MSUBW R12, R9, R10, R9 // <-- // msub w9, w10, w12, w9
+
+LBB0_15:
+ ORRW R9<<16, R8, R0 // <-- // orr w0, w8, w9, lsl #16
+ NOP // (skipped) // ldp x29, x30, [sp], #16
+ MOVW R0, ret+32(FP) // <--
+ RET // <-- // ret
diff --git a/internal/adler32/adler32_sse3.go b/internal/adler32/adler32_sse3.go
new file mode 100644
index 00000000..8e8c8a9b
--- /dev/null
+++ b/internal/adler32/adler32_sse3.go
@@ -0,0 +1,6 @@
+//go:build !purego && amd64
+
+package adler32
+
+//go:noescape
+func adler32_sse3(in uint32, buf []byte) uint32
diff --git a/internal/adler32/adler32_sse3.s b/internal/adler32/adler32_sse3.s
new file mode 100644
index 00000000..3538e0d9
--- /dev/null
+++ b/internal/adler32/adler32_sse3.s
@@ -0,0 +1,214 @@
+//go:build !noasm && amd64
+
+#include "textflag.h"
+
+DATA LCPI0_0<>+0x00(SB)/8, $0x191a1b1c1d1e1f20
+DATA LCPI0_0<>+0x08(SB)/8, $0x1112131415161718
+GLOBL LCPI0_0<>(SB), (RODATA|NOPTR), $16
+
+DATA LCPI0_1<>+0x00(SB)/8, $0x0001000100010001
+DATA LCPI0_1<>+0x08(SB)/8, $0x0001000100010001
+GLOBL LCPI0_1<>(SB), (RODATA|NOPTR), $16
+
+DATA LCPI0_2<>+0x00(SB)/8, $0x090a0b0c0d0e0f10
+DATA LCPI0_2<>+0x08(SB)/8, $0x0102030405060708
+GLOBL LCPI0_2<>(SB), (RODATA|NOPTR), $16
+
+TEXT ·adler32_sse3(SB), NOSPLIT, $0-36
+ MOVLQZX adler+0(FP), DI
+ MOVQ buf+8(FP), SI
+ MOVQ buf_len+16(FP), DX
+ MOVQ buf_cap+24(FP), CX
+ NOP // (skipped) // push rbp
+ NOP // (skipped) // mov rbp, rsp
+ NOP // (skipped) // and rsp, -8
+ WORD $0xf889 // MOVL DI, AX // mov eax, edi
+ LONG $0xc8b70f44 // MOVZX AX, R9 // movzx r9d, ax
+ WORD $0xe8c1; BYTE $0x10 // SHRL $0x10, AX // shr eax, 16
+ WORD $0xd189 // MOVL DX, CX // mov ecx, edx
+ WORD $0xe183; BYTE $0x1f // ANDL $0x1f, CX // and ecx, 31
+ CMPQ DX, $0x20 // <-- // cmp rdx, 32
+ JAE LBB0_2 // <-- // jae .LBB0_2
+ WORD $0x8944; BYTE $0xcf // MOVL R9, DI // mov edi, r9d
+ JMP LBB0_6 // <-- // jmp .LBB0_6
+
+LBB0_2:
+ SHRQ $0x5, DX // <-- // shr rdx, 5
+ LONG $0xc0ef0f66 // PXOR X0, X0 // pxor xmm0, xmm0
+ MOVO LCPI0_0<>(SB), X1 // <-- // movdqa xmm1, xmmword ptr [rip + .LCPI0_0]
+ MOVO LCPI0_1<>(SB), X2 // <-- // movdqa xmm2, xmmword ptr [rip + .LCPI0_1]
+ MOVO LCPI0_2<>(SB), X3 // <-- // movdqa xmm3, xmmword ptr [rip + .LCPI0_2]
+ LONG $0x8071b841; WORD $0x8007 // MOVL $-0x7ff87f8f, R8 // mov r8d, 2147975281
+
+LBB0_3:
+ CMPQ DX, $0xad // <-- // cmp rdx, 173
+ LONG $0x00adba41; WORD $0x0000 // MOVL $0xad, R10 // mov r10d, 173
+ LONG $0xd2420f4c // CMOVB DX, R10 // cmovb r10, rdx
+ WORD $0x8944; BYTE $0xcf // MOVL R9, DI // mov edi, r9d
+ LONG $0xfaaf0f41 // IMULL R10, DI // imul edi, r10d
+ LONG $0xef6e0f66 // MOVD DI, X5 // movd xmm5, edi
+ LONG $0xe06e0f66 // MOVD AX, X4 // movd xmm4, eax
+ WORD $0x8944; BYTE $0xd0 // MOVL R10, AX // mov eax, r10d
+ LONG $0xf6ef0f66 // PXOR X6, X6 // pxor xmm6, xmm6
+
+LBB0_4:
+ LONG $0x3e6f0ff3 // MOVDQU 0(SI), X7 // movdqu xmm7, xmmword ptr [rsi]
+ LONG $0x6f0f4466; BYTE $0xc7 // MOVDQA X7, X8 // movdqa xmm8, xmm7
+ LONG $0x04380f66; BYTE $0xf9 // PMADDUBSW X1, X7 // pmaddubsw xmm7, xmm1
+ LONG $0xfaf50f66 // PMADDWD X2, X7 // pmaddwd xmm7, xmm2
+ LONG $0xfcfe0f66 // PADDD X4, X7 // paddd xmm7, xmm4
+ LONG $0x666f0ff3; BYTE $0x10 // MOVDQU 0x10(SI), X4 // movdqu xmm4, xmmword ptr [rsi + 16]
+ LONG $0xeefe0f66 // PADDD X6, X5 // paddd xmm5, xmm6
+ LONG $0xf60f4466; BYTE $0xc0 // PSADBW X0, X8 // psadbw xmm8, xmm0
+ LONG $0xfe0f4466; BYTE $0xc6 // PADDD X6, X8 // paddd xmm8, xmm6
+ LONG $0xf46f0f66 // MOVDQA X4, X6 // movdqa xmm6, xmm4
+ LONG $0xf0f60f66 // PSADBW X0, X6 // psadbw xmm6, xmm0
+ LONG $0xfe0f4166; BYTE $0xf0 // PADDD X8, X6 // paddd xmm6, xmm8
+ LONG $0x04380f66; BYTE $0xe3 // PMADDUBSW X3, X4 // pmaddubsw xmm4, xmm3
+ LONG $0xe2f50f66 // PMADDWD X2, X4 // pmaddwd xmm4, xmm2
+ LONG $0xe7fe0f66 // PADDD X7, X4 // paddd xmm4, xmm7
+ ADDQ $0x20, SI // <-- // add rsi, 32
+ WORD $0xc8ff // DECL AX // dec eax
+ JNE LBB0_4 // <-- // jne .LBB0_4
+ LONG $0xf5720f66; BYTE $0x05 // PSLLD $0x5, X5 // pslld xmm5, 5
+ LONG $0xe5fe0f66 // PADDD X5, X4 // paddd xmm4, xmm5
+ LONG $0xee700f66; BYTE $0xb1 // PSHUFD $0xb1, X6, X5 // pshufd xmm5, xmm6, 177
+ LONG $0xeefe0f66 // PADDD X6, X5 // paddd xmm5, xmm6
+ LONG $0xf5700f66; BYTE $0xee // PSHUFD $0xee, X5, X6 // pshufd xmm6, xmm5, 238
+ LONG $0xf5fe0f66 // PADDD X5, X6 // paddd xmm6, xmm5
+ LONG $0xf77e0f66 // MOVD X6, DI // movd edi, xmm6
+ WORD $0x0144; BYTE $0xcf // ADDL R9, DI // add edi, r9d
+ LONG $0xec700f66; BYTE $0xb1 // PSHUFD $0xb1, X4, X5 // pshufd xmm5, xmm4, 177
+ LONG $0xecfe0f66 // PADDD X4, X5 // paddd xmm5, xmm4
+ LONG $0xe5700f66; BYTE $0xee // PSHUFD $0xee, X5, X4 // pshufd xmm4, xmm5, 238
+ LONG $0xe5fe0f66 // PADDD X5, X4 // paddd xmm4, xmm5
+ LONG $0xe07e0f66 // MOVD X4, AX // movd eax, xmm4
+ MOVQ DI, R9 // <-- // mov r9, rdi
+ IMULQ R8, R9 // <-- // imul r9, r8
+ SHRQ $0x2f, R9 // <-- // shr r9, 47
+ LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00 // IMULL $0xfff1, R9, R9 // imul r9d, r9d, 65521
+ WORD $0x2944; BYTE $0xcf // SUBL R9, DI // sub edi, r9d
+ MOVQ AX, R9 // <-- // mov r9, rax
+ IMULQ R8, R9 // <-- // imul r9, r8
+ SHRQ $0x2f, R9 // <-- // shr r9, 47
+ LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00 // IMULL $0xfff1, R9, R9 // imul r9d, r9d, 65521
+ WORD $0x2944; BYTE $0xc8 // SUBL R9, AX // sub eax, r9d
+ WORD $0x8941; BYTE $0xf9 // MOVL DI, R9 // mov r9d, edi
+ SUBQ R10, DX // <-- // sub rdx, r10
+ JNE LBB0_3 // <-- // jne .LBB0_3
+
+LBB0_6:
+ WORD $0x8548; BYTE $0xc9 // TESTQ CX, CX // test rcx, rcx
+ JE LBB0_18 // <-- // je .LBB0_18
+ CMPL CX, $0x10 // <-- // cmp ecx, 16
+ JB LBB0_10 // <-- // jb .LBB0_10
+ WORD $0xb60f; BYTE $0x16 // MOVZX 0(SI), DX // movzx edx, byte ptr [rsi]
+ WORD $0xd701 // ADDL DX, DI // add edi, edx
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ LONG $0x0156b60f // MOVZX 0x1(SI), DX // movzx edx, byte ptr [rsi + 1]
+ WORD $0xfa01 // ADDL DI, DX // add edx, edi
+ WORD $0xd001 // ADDL DX, AX // add eax, edx
+ LONG $0x027eb60f // MOVZX 0x2(SI), DI // movzx edi, byte ptr [rsi + 2]
+ WORD $0xd701 // ADDL DX, DI // add edi, edx
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ LONG $0x0356b60f // MOVZX 0x3(SI), DX // movzx edx, byte ptr [rsi + 3]
+ WORD $0xfa01 // ADDL DI, DX // add edx, edi
+ WORD $0xd001 // ADDL DX, AX // add eax, edx
+ LONG $0x047eb60f // MOVZX 0x4(SI), DI // movzx edi, byte ptr [rsi + 4]
+ WORD $0xd701 // ADDL DX, DI // add edi, edx
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ LONG $0x0556b60f // MOVZX 0x5(SI), DX // movzx edx, byte ptr [rsi + 5]
+ WORD $0xfa01 // ADDL DI, DX // add edx, edi
+ WORD $0xd001 // ADDL DX, AX // add eax, edx
+ LONG $0x067eb60f // MOVZX 0x6(SI), DI // movzx edi, byte ptr [rsi + 6]
+ WORD $0xd701 // ADDL DX, DI // add edi, edx
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ LONG $0x0756b60f // MOVZX 0x7(SI), DX // movzx edx, byte ptr [rsi + 7]
+ WORD $0xfa01 // ADDL DI, DX // add edx, edi
+ WORD $0xd001 // ADDL DX, AX // add eax, edx
+ LONG $0x087eb60f // MOVZX 0x8(SI), DI // movzx edi, byte ptr [rsi + 8]
+ WORD $0xd701 // ADDL DX, DI // add edi, edx
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ LONG $0x0956b60f // MOVZX 0x9(SI), DX // movzx edx, byte ptr [rsi + 9]
+ WORD $0xfa01 // ADDL DI, DX // add edx, edi
+ WORD $0xd001 // ADDL DX, AX // add eax, edx
+ LONG $0x0a7eb60f // MOVZX 0xa(SI), DI // movzx edi, byte ptr [rsi + 10]
+ WORD $0xd701 // ADDL DX, DI // add edi, edx
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ LONG $0x0b56b60f // MOVZX 0xb(SI), DX // movzx edx, byte ptr [rsi + 11]
+ WORD $0xfa01 // ADDL DI, DX // add edx, edi
+ WORD $0xd001 // ADDL DX, AX // add eax, edx
+ LONG $0x0c7eb60f // MOVZX 0xc(SI), DI // movzx edi, byte ptr [rsi + 12]
+ WORD $0xd701 // ADDL DX, DI // add edi, edx
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ LONG $0x0d56b60f // MOVZX 0xd(SI), DX // movzx edx, byte ptr [rsi + 13]
+ WORD $0xfa01 // ADDL DI, DX // add edx, edi
+ WORD $0xd001 // ADDL DX, AX // add eax, edx
+ LONG $0x46b60f44; BYTE $0x0e // MOVZX 0xe(SI), R8 // movzx r8d, byte ptr [rsi + 14]
+ WORD $0x0141; BYTE $0xd0 // ADDL DX, R8 // add r8d, edx
+ WORD $0x0144; BYTE $0xc0 // ADDL R8, AX // add eax, r8d
+ LONG $0x0f7eb60f // MOVZX 0xf(SI), DI // movzx edi, byte ptr [rsi + 15]
+ WORD $0x0144; BYTE $0xc7 // ADDL R8, DI // add edi, r8d
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ ADDQ $-0x10, CX // <-- // add rcx, -16
+ JE LBB0_17 // <-- // je .LBB0_17
+ ADDQ $0x10, SI // <-- // add rsi, 16
+
+LBB0_10:
+ LEAQ -0x1(CX), DX // <-- // lea rdx, [rcx - 1]
+ MOVQ CX, R9 // <-- // mov r9, rcx
+ ANDQ $0x3, R9 // <-- // and r9, 3
+ JE LBB0_14 // <-- // je .LBB0_14
+ XORL R8, R8 // <-- // xor r8d, r8d
+
+LBB0_12:
+ LONG $0x14b60f46; BYTE $0x06 // MOVZX 0(SI)(R8*1), R10 // movzx r10d, byte ptr [rsi + r8]
+ WORD $0x0144; BYTE $0xd7 // ADDL R10, DI // add edi, r10d
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ INCQ R8 // <-- // inc r8
+ CMPQ R9, R8 // <-- // cmp r9, r8
+ JNE LBB0_12 // <-- // jne .LBB0_12
+ ADDQ R8, SI // <-- // add rsi, r8
+ SUBQ R8, CX // <-- // sub rcx, r8
+
+LBB0_14:
+ CMPQ DX, $0x3 // <-- // cmp rdx, 3
+ JB LBB0_17 // <-- // jb .LBB0_17
+ XORL DX, DX // <-- // xor edx, edx
+
+LBB0_16:
+ LONG $0x04b60f44; BYTE $0x16 // MOVZX 0(SI)(DX*1), R8 // movzx r8d, byte ptr [rsi + rdx]
+ WORD $0x0141; BYTE $0xf8 // ADDL DI, R8 // add r8d, edi
+ WORD $0x0144; BYTE $0xc0 // ADDL R8, AX // add eax, r8d
+ LONG $0x167cb60f; BYTE $0x01 // MOVZX 0x1(SI)(DX*1), DI // movzx edi, byte ptr [rsi + rdx + 1]
+ WORD $0x0144; BYTE $0xc7 // ADDL R8, DI // add edi, r8d
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ LONG $0x44b60f44; WORD $0x0216 // MOVZX 0x2(SI)(DX*1), R8 // movzx r8d, byte ptr [rsi + rdx + 2]
+ WORD $0x0141; BYTE $0xf8 // ADDL DI, R8 // add r8d, edi
+ WORD $0x0144; BYTE $0xc0 // ADDL R8, AX // add eax, r8d
+ LONG $0x167cb60f; BYTE $0x03 // MOVZX 0x3(SI)(DX*1), DI // movzx edi, byte ptr [rsi + rdx + 3]
+ WORD $0x0144; BYTE $0xc7 // ADDL R8, DI // add edi, r8d
+ WORD $0xf801 // ADDL DI, AX // add eax, edi
+ ADDQ $0x4, DX // <-- // add rdx, 4
+ CMPQ CX, DX // <-- // cmp rcx, rdx
+ JNE LBB0_16 // <-- // jne .LBB0_16
+
+LBB0_17:
+ LONG $0x000f8f8d; WORD $0xffff // LEAL -0xfff1(DI), CX // lea ecx, [rdi - 65521]
+ CMPL DI, $0xfff1 // <-- // cmp edi, 65521
+ WORD $0x420f; BYTE $0xcf // CMOVB DI, CX // cmovb ecx, edi
+ WORD $0xc289 // MOVL AX, DX // mov edx, eax
+ LONG $0x078071be; BYTE $0x80 // MOVL $-0x7ff87f8f, SI // mov esi, 2147975281
+ IMULQ DX, SI // <-- // imul rsi, rdx
+ SHRQ $0x2f, SI // <-- // shr rsi, 47
+ LONG $0xfff1d669; WORD $0x0000 // IMULL $0xfff1, SI, DX // imul edx, esi, 65521
+ WORD $0xd029 // SUBL DX, AX // sub eax, edx
+ WORD $0xcf89 // MOVL CX, DI // mov edi, ecx
+
+LBB0_18:
+ WORD $0xe0c1; BYTE $0x10 // SHLL $0x10, AX // shl eax, 16
+ WORD $0xf809 // ORL DI, AX // or eax, edi
+ NOP // (skipped) // mov rsp, rbp
+ NOP // (skipped) // pop rbp
+ MOVL AX, ret+32(FP) // <--
+ RET // <-- // ret
diff --git a/internal/adler32/adler32_test.go b/internal/adler32/adler32_test.go
deleted file mode 100644
index 12db7e50..00000000
--- a/internal/adler32/adler32_test.go
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package adler32
-
-import (
- "encoding"
- "hash"
- "io"
- "strings"
- "testing"
-)
-
-func TestHashInterface(t *testing.T) {
- TestHash(t, func() hash.Hash { return New() })
-}
-
-var golden = []struct {
- out uint32
- in string
- halfState string // marshaled hash state after first half of in written, used by TestGoldenMarshal
-}{
- {0x00000001, "", "adl\x01\x00\x00\x00\x01"},
- {0x00620062, "a", "adl\x01\x00\x00\x00\x01"},
- {0x012600c4, "ab", "adl\x01\x00b\x00b"},
- {0x024d0127, "abc", "adl\x01\x00b\x00b"},
- {0x03d8018b, "abcd", "adl\x01\x01&\x00\xc4"},
- {0x05c801f0, "abcde", "adl\x01\x01&\x00\xc4"},
- {0x081e0256, "abcdef", "adl\x01\x02M\x01'"},
- {0x0adb02bd, "abcdefg", "adl\x01\x02M\x01'"},
- {0x0e000325, "abcdefgh", "adl\x01\x03\xd8\x01\x8b"},
- {0x118e038e, "abcdefghi", "adl\x01\x03\xd8\x01\x8b"},
- {0x158603f8, "abcdefghij", "adl\x01\x05\xc8\x01\xf0"},
- {0x3f090f02, "Discard medicine more than two years old.", "adl\x01NU\a\x87"},
- {0x46d81477, "He who has a shady past knows that nice guys finish last.", "adl\x01\x89\x8e\t\xe9"},
- {0x40ee0ee1, "I wouldn't marry him with a ten foot pole.", "adl\x01R\t\ag"},
- {0x16661315, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave", "adl\x01\u007f\xbb\t\x10"},
- {0x5b2e1480, "The days of the digital watch are numbered. -Tom Stoppard", "adl\x01\x99:\n~"},
- {0x8c3c09ea, "Nepal premier won't resign.", "adl\x01\"\x05\x05\x05"},
- {0x45ac18fd, "For every action there is an equal and opposite government program.", "adl\x01\xcc\xfa\f\x00"},
- {0x53c61462, "His money is twice tainted: 'taint yours and 'taint mine.", "adl\x01\x93\xa9\n\b"},
- {0x7e511e63, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977", "adl\x01e\xf5\x10\x14"},
- {0xe4801a6a, "It's a tiny change to the code and not completely disgusting. - Bob Manchek", "adl\x01\xee\x00\f\xb2"},
- {0x61b507df, "size: a.out: bad magic", "adl\x01\x1a\xfc\x04\x1d"},
- {0xb8631171, "The major problem is with sendmail. -Mark Horton", "adl\x01mi\b\xdc"},
- {0x8b5e1904, "Give me a rock, paper and scissors and I will move the world. CCFestoon", "adl\x01\xe3\n\f\x9f"},
- {0x7cc6102b, "If the enemy is within range, then so are you.", "adl\x01_\xe0\b\x1e"},
- {0x700318e7, "It's well we cannot hear the screams/That we create in others' dreams.", "adl\x01ۘ\f\x87"},
- {0x1e601747, "You remind me of a TV show, but that's all right: I watch it anyway.", "adl\x01\xcc}\v\x83"},
- {0xb55b0b09, "C is as portable as Stonehedge!!", "adl\x01,^\x05\xad"},
- {0x39111dd0, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley", "adl\x01M\xd1\x0e\xc8"},
- {0x91dd304f, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule", "adl\x01#\xd8\x17\xd7"},
- {0x2e5d1316, "How can you write a big system without C++? -Paul Glick", "adl\x01\x8fU\n\x0f"},
- {0xd0201df6, "'Invariant assertions' is the most elegant programming technique! -Tom Szymanski", "adl\x01/\x98\x0e\xc4"},
- {0x211297c8, strings.Repeat("\xff", 5548) + "8", "adl\x01\x9a\xa6\xcb\xc1"},
- {0xbaa198c8, strings.Repeat("\xff", 5549) + "9", "adl\x01gu\xcc\xc0"},
- {0x553499be, strings.Repeat("\xff", 5550) + "0", "adl\x01gu\xcc\xc0"},
- {0xf0c19abe, strings.Repeat("\xff", 5551) + "1", "adl\x015CͿ"},
- {0x8d5c9bbe, strings.Repeat("\xff", 5552) + "2", "adl\x015CͿ"},
- {0x2af69cbe, strings.Repeat("\xff", 5553) + "3", "adl\x01\x04\x10ξ"},
- {0xc9809dbe, strings.Repeat("\xff", 5554) + "4", "adl\x01\x04\x10ξ"},
- {0x69189ebe, strings.Repeat("\xff", 5555) + "5", "adl\x01\xd3\xcdϽ"},
- {0x86af0001, strings.Repeat("\x00", 1e5), "adl\x01\xc3P\x00\x01"},
- {0x79660b4d, strings.Repeat("a", 1e5), "adl\x01\x81k\x05\xa7"},
- {0x110588ee, strings.Repeat("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e4), "adl\x01e\xd2\xc4p"},
-}
-
-// checksum is a slow but simple implementation of the Adler-32 checksum.
-// It is a straight port of the sample code in RFC 1950 section 9.
-func checksum(p []byte) uint32 {
- s1, s2 := uint32(1), uint32(0)
- for _, x := range p {
- s1 = (s1 + uint32(x)) % mod
- s2 = (s2 + s1) % mod
- }
- return s2<<16 | s1
-}
-
-func TestGolden(t *testing.T) {
- for _, g := range golden {
- in := g.in
- if len(in) > 220 {
- in = in[:100] + "..." + in[len(in)-100:]
- }
- p := []byte(g.in)
- if got := checksum(p); got != g.out {
- t.Errorf("simple implementation: checksum(%q) = 0x%x want 0x%x", in, got, g.out)
- continue
- }
- if got := Checksum(p); got != g.out {
- t.Errorf("optimized implementation: Checksum(%q) = 0x%x want 0x%x", in, got, g.out)
- continue
- }
- }
-}
-
-func TestGoldenMarshal(t *testing.T) {
- for _, g := range golden {
- h := New()
- h2 := New()
-
- _, _ = io.WriteString(h, g.in[:len(g.in)/2])
-
- state, err := h.(encoding.BinaryMarshaler).MarshalBinary()
- if err != nil {
- t.Errorf("could not marshal: %v", err)
- continue
- }
-
- stateAppend, err := h.(encoding.BinaryAppender).AppendBinary(make([]byte, 4, 32))
- if err != nil {
- t.Errorf("could not marshal: %v", err)
- continue
- }
- stateAppend = stateAppend[4:]
-
- if string(state) != g.halfState {
- t.Errorf("checksum(%q) state = %q, want %q", g.in, state, g.halfState)
- continue
- }
-
- if string(stateAppend) != g.halfState {
- t.Errorf("checksum(%q) state = %q, want %q", g.in, stateAppend, g.halfState)
- continue
- }
-
- if err := h2.(encoding.BinaryUnmarshaler).UnmarshalBinary(state); err != nil {
- t.Errorf("could not unmarshal: %v", err)
- continue
- }
-
- _, _ = io.WriteString(h, g.in[len(g.in)/2:])
- _, _ = io.WriteString(h2, g.in[len(g.in)/2:])
-
- if h.Sum32() != h2.Sum32() {
- t.Errorf("checksum(%q) = 0x%x != marshaled (0x%x)", g.in, h.Sum32(), h2.Sum32())
- }
- }
-}
-
-func BenchmarkAdler32KB(b *testing.B) {
- b.SetBytes(1024)
- data := make([]byte, 1024)
- for i := range data {
- data[i] = byte(i)
- }
- h := New()
- in := make([]byte, 0, h.Size())
-
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- h.Reset()
- h.Write(data)
- h.Sum(in)
- }
-}
diff --git a/internal/adler32/bench_test.go b/internal/adler32/bench_test.go
deleted file mode 100644
index fd1e4710..00000000
--- a/internal/adler32/bench_test.go
+++ /dev/null
@@ -1,24 +0,0 @@
-package adler32_test
-
-import (
- "testing"
-
- "git.sr.ht/~runxiyu/furgit/internal/adler32"
-)
-
-const benchmarkSize = 64 * 1024
-
-var data = make([]byte, benchmarkSize)
-
-func init() {
- for i := 0; i < benchmarkSize; i++ {
- data[i] = byte(i % 256)
- }
-}
-
-func BenchmarkChecksum(b *testing.B) {
- b.ReportAllocs()
- for i := 0; i < b.N; i++ {
- adler32.Checksum(data)
- }
-}
diff --git a/internal/adler32/testhash.go b/internal/adler32/testhash.go
deleted file mode 100644
index 4c31aae1..00000000
--- a/internal/adler32/testhash.go
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright 2024 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package adler32
-
-import (
- "bytes"
- "hash"
- "io"
- "math/rand"
- "testing"
- "time"
-)
-
-type MakeHash func() hash.Hash
-
-// TestHash performs a set of tests on hash.Hash implementations, checking the
-// documented requirements of Write, Sum, Reset, Size, and BlockSize.
-func TestHash(t *testing.T, mh MakeHash) {
- TestHashWithoutClone(t, mh)
-
- // Test whether the results after cloning are consistent.
- t.Run("Clone", func(t *testing.T) {
- h, ok := mh().(hash.Cloner)
- if !ok {
- t.Fatalf("%T does not implement hash.Cloner", mh)
- }
- h3, err := h.Clone()
- if err != nil {
- t.Fatalf("Clone failed: %v", err)
- }
- prefix := []byte("tmp")
- writeToHash(t, h, prefix)
- h2, err := h.Clone()
- if err != nil {
- t.Fatalf("Clone failed: %v", err)
- }
- prefixSum := h.Sum(nil)
- if !bytes.Equal(prefixSum, h2.Sum(nil)) {
- t.Fatalf("%T Clone results are inconsistent", h)
- }
- suffix := []byte("tmp2")
- writeToHash(t, h, suffix)
- writeToHash(t, h3, append(prefix, suffix...))
- compositeSum := h3.Sum(nil)
- if !bytes.Equal(h.Sum(nil), compositeSum) {
- t.Fatalf("%T Clone results are inconsistent", h)
- }
- if !bytes.Equal(h2.Sum(nil), prefixSum) {
- t.Fatalf("%T Clone results are inconsistent", h)
- }
- writeToHash(t, h2, suffix)
- if !bytes.Equal(h.Sum(nil), compositeSum) {
- t.Fatalf("%T Clone results are inconsistent", h)
- }
- if !bytes.Equal(h2.Sum(nil), compositeSum) {
- t.Fatalf("%T Clone results are inconsistent", h)
- }
- })
-}
-
-func TestHashWithoutClone(t *testing.T, mh MakeHash) {
- // Test that Sum returns an appended digest matching output of Size
- t.Run("SumAppend", func(t *testing.T) {
- h := mh()
- rng := newRandReader(t)
-
- emptyBuff := []byte("")
- shortBuff := []byte("a")
- longBuff := make([]byte, h.BlockSize()+1)
- _, _ = rng.Read(longBuff)
-
- // Set of example strings to append digest to
- prefixes := [][]byte{nil, emptyBuff, shortBuff, longBuff}
-
- // Go to each string and check digest gets appended to and is correct size.
- for _, prefix := range prefixes {
- h.Reset()
-
- sum := getSum(t, h, prefix) // Append new digest to prefix
-
- // Check that Sum didn't alter the prefix
- if !bytes.Equal(sum[:len(prefix)], prefix) {
- t.Errorf("Sum alters passed buffer instead of appending; got %x, want %x", sum[:len(prefix)], prefix)
- }
-
- // Check that the appended sum wasn't affected by the prefix
- if expectedSum := getSum(t, h, nil); !bytes.Equal(sum[len(prefix):], expectedSum) {
- t.Errorf("Sum behavior affected by data in the input buffer; got %x, want %x", sum[len(prefix):], expectedSum)
- }
-
- // Check size of append
- if got, want := len(sum)-len(prefix), h.Size(); got != want {
- t.Errorf("Sum appends number of bytes != Size; got %v , want %v", got, want)
- }
- }
- })
-
- // Test that Hash.Write never returns error.
- t.Run("WriteWithoutError", func(t *testing.T) {
- h := mh()
- rng := newRandReader(t)
-
- emptySlice := []byte("")
- shortSlice := []byte("a")
- longSlice := make([]byte, h.BlockSize()+1)
- _, _ = rng.Read(longSlice)
-
- // Set of example strings to append digest to
- slices := [][]byte{emptySlice, shortSlice, longSlice}
-
- for _, slice := range slices {
- writeToHash(t, h, slice) // Writes and checks Write doesn't error
- }
- })
-
- t.Run("ResetState", func(t *testing.T) {
- h := mh()
- rng := newRandReader(t)
-
- emptySum := getSum(t, h, nil)
-
- // Write to hash and then Reset it and see if Sum is same as emptySum
- writeEx := make([]byte, h.BlockSize())
- _, _ = rng.Read(writeEx)
- writeToHash(t, h, writeEx)
- h.Reset()
- resetSum := getSum(t, h, nil)
-
- if !bytes.Equal(emptySum, resetSum) {
- t.Errorf("Reset hash yields different Sum than new hash; got %x, want %x", emptySum, resetSum)
- }
- })
-
- // Check that Write isn't reading from beyond input slice's bounds
- t.Run("OutOfBoundsRead", func(t *testing.T) {
- h := mh()
- blockSize := h.BlockSize()
- rng := newRandReader(t)
-
- msg := make([]byte, blockSize)
- _, _ = rng.Read(msg)
- writeToHash(t, h, msg)
- expectedDigest := getSum(t, h, nil) // Record control digest
-
- h.Reset()
-
- // Make a buffer with msg in the middle and data on either end
- buff := make([]byte, blockSize*3)
- endOfPrefix, startOfSuffix := blockSize, blockSize*2
-
- copy(buff[endOfPrefix:startOfSuffix], msg)
- _, _ = rng.Read(buff[:endOfPrefix])
- _, _ = rng.Read(buff[startOfSuffix:])
-
- writeToHash(t, h, buff[endOfPrefix:startOfSuffix])
- testDigest := getSum(t, h, nil)
-
- if !bytes.Equal(testDigest, expectedDigest) {
- t.Errorf("Write affected by data outside of input slice bounds; got %x, want %x", testDigest, expectedDigest)
- }
- })
-
- // Test that multiple calls to Write is stateful
- t.Run("StatefulWrite", func(t *testing.T) {
- h := mh()
- rng := newRandReader(t)
-
- prefix, suffix := make([]byte, h.BlockSize()), make([]byte, h.BlockSize())
- _, _ = rng.Read(prefix)
- _, _ = rng.Read(suffix)
-
- // Write prefix then suffix sequentially and record resulting hash
- writeToHash(t, h, prefix)
- writeToHash(t, h, suffix)
- serialSum := getSum(t, h, nil)
-
- h.Reset()
-
- // Write prefix and suffix at the same time and record resulting hash
- writeToHash(t, h, append(prefix, suffix...))
- compositeSum := getSum(t, h, nil)
-
- // Check that sequential writing results in the same as writing all at once
- if !bytes.Equal(compositeSum, serialSum) {
- t.Errorf("two successive Write calls resulted in a different Sum than a single one; got %x, want %x", compositeSum, serialSum)
- }
- })
-}
-
-// Helper function for writing. Verifies that Write does not error.
-func writeToHash(t *testing.T, h hash.Hash, p []byte) {
- t.Helper()
-
- before := make([]byte, len(p))
- copy(before, p)
-
- n, err := h.Write(p)
- if err != nil || n != len(p) {
- t.Errorf("Write returned error; got (%v, %v), want (nil, %v)", err, n, len(p))
- }
-
- if !bytes.Equal(p, before) {
- t.Errorf("Write modified input slice; got %x, want %x", p, before)
- }
-}
-
-// Helper function for getting Sum. Checks that Sum doesn't change hash state.
-func getSum(t *testing.T, h hash.Hash, buff []byte) []byte {
- t.Helper()
-
- testBuff := make([]byte, len(buff))
- copy(testBuff, buff)
-
- sum := h.Sum(buff)
- testSum := h.Sum(testBuff)
-
- // Check that Sum doesn't change underlying hash state
- if !bytes.Equal(sum, testSum) {
- t.Errorf("successive calls to Sum yield different results; got %x, want %x", sum, testSum)
- }
-
- return sum
-}
-
-func newRandReader(t *testing.T) io.Reader {
- seed := time.Now().UnixNano()
- t.Logf("Deterministic RNG seed: 0x%x", seed)
- return rand.New(rand.NewSource(seed))
-}