aboutsummaryrefslogtreecommitdiff
path: root/internal/adler32
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-02-20 19:06:13 +0800
committerGravatar Runxi Yu2026-02-20 19:07:14 +0800
commitaa513c069c1418734aea894dc944e27c6a78a3bb (patch)
tree687f0a11bb550fa088fd82a98ceb8979bbc35f69 /internal/adler32
parentComment on prior reverts removing the pack writing API (diff)
Delete everything, I'm redesigning this.
I'll stop using a flat package and make things much more modular. And also experiment with streaming APIs so large blobs don't OOM us.
Diffstat (limited to 'internal/adler32')
-rw-r--r--internal/adler32/LICENSE30
-rw-r--r--internal/adler32/LICENSE.ZLIB17
-rw-r--r--internal/adler32/README1
-rw-r--r--internal/adler32/adler32_amd64.go93
-rw-r--r--internal/adler32/adler32_arm64.go73
-rw-r--r--internal/adler32/adler32_avx2.go6
-rw-r--r--internal/adler32/adler32_avx2.s263
-rw-r--r--internal/adler32/adler32_fallback.go19
-rw-r--r--internal/adler32/adler32_generic.go45
-rw-r--r--internal/adler32/adler32_neon.go6
-rw-r--r--internal/adler32/adler32_neon.s208
-rw-r--r--internal/adler32/adler32_sse3.go6
-rw-r--r--internal/adler32/adler32_sse3.s214
-rw-r--r--internal/adler32/bench_test.go22
14 files changed, 0 insertions, 1003 deletions
diff --git a/internal/adler32/LICENSE b/internal/adler32/LICENSE
deleted file mode 100644
index 5cec357a..00000000
--- a/internal/adler32/LICENSE
+++ /dev/null
@@ -1,30 +0,0 @@
-Copyright (c) 2024, Michal Hruby
-Copyright (c) 2017 The Chromium Authors. All rights reserved.
-Copyright (c) 1995-2024 Mark Adler
-Copyright (c) 1995-2024 Jean-loup Gailly
-Copyright (c) 2022 Adam Stylinski
-
-BSD 2-Clause License
-
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
diff --git a/internal/adler32/LICENSE.ZLIB b/internal/adler32/LICENSE.ZLIB
deleted file mode 100644
index c75c1568..00000000
--- a/internal/adler32/LICENSE.ZLIB
+++ /dev/null
@@ -1,17 +0,0 @@
-Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not
- claim that you wrote the original software. If you use this software
- in a product, an acknowledgment in the product documentation would be
- appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be
- misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
diff --git a/internal/adler32/README b/internal/adler32/README
deleted file mode 100644
index b80acd00..00000000
--- a/internal/adler32/README
+++ /dev/null
@@ -1 +0,0 @@
-This package was mostly copied from github.com/mhr3/adler32-simd.
diff --git a/internal/adler32/adler32_amd64.go b/internal/adler32/adler32_amd64.go
deleted file mode 100644
index 88a854ed..00000000
--- a/internal/adler32/adler32_amd64.go
+++ /dev/null
@@ -1,93 +0,0 @@
-//go:build amd64 && !purego
-
-package adler32
-
-import (
- "encoding/binary"
- "errors"
- "hash"
- "hash/adler32"
-
- "golang.org/x/sys/cpu"
-)
-
-// The size of an Adler-32 checksum in bytes.
-const Size = 4
-
-var (
- hasSSE3 = cpu.X86.HasSSE3
- hasAVX2 = cpu.X86.HasAVX2
-)
-
-// digest represents the partial evaluation of a checksum.
-// The low 16 bits are s1, the high 16 bits are s2.
-type digest uint32
-
-func (d *digest) Reset() { *d = 1 }
-
-// New returns a new hash.Hash32 computing the Adler-32 checksum.
-func New() hash.Hash32 {
- if !hasSSE3 {
- return adler32.New()
- }
- d := new(digest)
- d.Reset()
- return d
-}
-
-func (d *digest) MarshalBinary() ([]byte, error) {
- b := make([]byte, 0, marshaledSize)
- b = append(b, magic...)
- b = binary.BigEndian.AppendUint32(b, uint32(*d))
- return b, nil
-}
-
-func (d *digest) UnmarshalBinary(b []byte) error {
- if len(b) < len(magic) || string(b[:len(magic)]) != magic {
- return errors.New("hash/adler32: invalid hash state identifier")
- }
- if len(b) != marshaledSize {
- return errors.New("hash/adler32: invalid hash state size")
- }
- *d = digest(binary.BigEndian.Uint32(b[len(magic):]))
- return nil
-}
-
-func (d *digest) Size() int { return Size }
-
-func (d *digest) BlockSize() int { return 4 }
-
-func (d *digest) Write(data []byte) (nn int, err error) {
- if len(data) >= 64 {
- var h uint32
- if hasAVX2 {
- h = adler32_avx2(uint32(*d), data)
- } else {
- h = adler32_sse3(uint32(*d), data)
- }
- *d = digest(h)
- } else {
- h := update(uint32(*d), data)
- *d = digest(h)
- }
- return len(data), nil
-}
-
-func (d *digest) Sum32() uint32 { return uint32(*d) }
-
-func (d *digest) Sum(in []byte) []byte {
- s := uint32(*d)
- return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
-}
-
-// Checksum returns the Adler-32 checksum of data.
-func Checksum(data []byte) uint32 {
- if !hasSSE3 || len(data) < 64 {
- return update(1, data)
- }
-
- if hasAVX2 {
- return adler32_avx2(1, data)
- }
- return adler32_sse3(1, data)
-}
diff --git a/internal/adler32/adler32_arm64.go b/internal/adler32/adler32_arm64.go
deleted file mode 100644
index ddf9cb5e..00000000
--- a/internal/adler32/adler32_arm64.go
+++ /dev/null
@@ -1,73 +0,0 @@
-//go:build arm64 && !purego
-
-package adler32
-
-import (
- "encoding/binary"
- "errors"
- "hash"
-)
-
-// The size of an Adler-32 checksum in bytes.
-const Size = 4
-
-// digest represents the partial evaluation of a checksum.
-// The low 16 bits are s1, the high 16 bits are s2.
-type digest uint32
-
-func (d *digest) Reset() { *d = 1 }
-
-// New returns a new hash.Hash32 computing the Adler-32 checksum.
-func New() hash.Hash32 {
- d := new(digest)
- d.Reset()
- return d
-}
-
-func (d *digest) MarshalBinary() ([]byte, error) {
- b := make([]byte, 0, marshaledSize)
- b = append(b, magic...)
- b = binary.BigEndian.AppendUint32(b, uint32(*d))
- return b, nil
-}
-
-func (d *digest) UnmarshalBinary(b []byte) error {
- if len(b) < len(magic) || string(b[:len(magic)]) != magic {
- return errors.New("hash/adler32: invalid hash state identifier")
- }
- if len(b) != marshaledSize {
- return errors.New("hash/adler32: invalid hash state size")
- }
- *d = digest(binary.BigEndian.Uint32(b[len(magic):]))
- return nil
-}
-
-func (d *digest) Size() int { return Size }
-
-func (d *digest) BlockSize() int { return 4 }
-
-func (d *digest) Write(data []byte) (nn int, err error) {
- if len(data) >= 64 {
- h := adler32_neon(uint32(*d), data)
- *d = digest(h)
- } else {
- h := update(uint32(*d), data)
- *d = digest(h)
- }
- return len(data), nil
-}
-
-func (d *digest) Sum32() uint32 { return uint32(*d) }
-
-func (d *digest) Sum(in []byte) []byte {
- s := uint32(*d)
- return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
-}
-
-// Checksum returns the Adler-32 checksum of data.
-func Checksum(data []byte) uint32 {
- if len(data) >= 64 {
- return adler32_neon(1, data)
- }
- return update(1, data)
-}
diff --git a/internal/adler32/adler32_avx2.go b/internal/adler32/adler32_avx2.go
deleted file mode 100644
index 042812b8..00000000
--- a/internal/adler32/adler32_avx2.go
+++ /dev/null
@@ -1,6 +0,0 @@
-//go:build !purego && amd64
-
-package adler32
-
-//go:noescape
-func adler32_avx2(in uint32, buf []byte) uint32
diff --git a/internal/adler32/adler32_avx2.s b/internal/adler32/adler32_avx2.s
deleted file mode 100644
index 1b9a1c50..00000000
--- a/internal/adler32/adler32_avx2.s
+++ /dev/null
@@ -1,263 +0,0 @@
-//go:build !purego && amd64
-
-#include "textflag.h"
-
-DATA weights_1_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20
-DATA weights_1_32<>+0x08(SB)/8, $0x1112131415161718
-DATA weights_1_32<>+0x10(SB)/8, $0x090a0b0c0d0e0f10
-DATA weights_1_32<>+0x18(SB)/8, $0x0102030405060708
-GLOBL weights_1_32<>(SB), (RODATA|NOPTR), $32
-
-DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001
-DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001
-DATA ones_u16<>+0x10(SB)/8, $0x0001000100010001
-DATA ones_u16<>+0x18(SB)/8, $0x0001000100010001
-GLOBL ones_u16<>(SB), (RODATA|NOPTR), $32
-
-DATA one_u16<>+0x00(SB)/2, $0x0001
-GLOBL one_u16<>(SB), (RODATA|NOPTR), $2
-
-TEXT ·adler32_avx2(SB), NOSPLIT, $0-36
- MOVLQZX in+0(FP), DI
- MOVQ buf_base+8(FP), SI
- MOVQ buf_len+16(FP), DX
- MOVQ buf_cap+24(FP), CX
- WORD $0x8548; BYTE $0xf6
- JE return_one
- WORD $0xf889
- WORD $0x8548; BYTE $0xd2
- JE return_result
- NOP
- NOP
- NOP
- WORD $0xc189
- WORD $0xe9c1; BYTE $0x10
- WORD $0xb70f; BYTE $0xc0
- CMPQ DX, $0x20
- JB tail16_check
- LONG $0x078071bf; BYTE $0x80
- LONG $0xc0eff9c5
- VMOVDQA weights_1_32<>(SB), Y1
- VPBROADCASTW one_u16<>(SB), Y2
- JMP block_loop_setup
-
-block_accum_init:
- LONG $0xf46ffdc5
- LONG $0xedefd1c5
-
-block_reduce:
- SUBQ AX, DX
- LONG $0xf572ddc5; BYTE $0x05
- LONG $0xdbfeddc5
- LONG $0x397de3c4; WORD $0x01f4
- LONG $0xecc6c8c5; BYTE $0x88
- LONG $0xe470f9c5; BYTE $0x88
- LONG $0xe4fed1c5
- LONG $0xec70f9c5; BYTE $0x55
- LONG $0xe4fed1c5
- LONG $0xe07ef9c5
- MOVQ AX, CX
- IMULQ DI, CX
- SHRQ $0x2f, CX
- LONG $0xfff1c969; WORD $0x0000
- WORD $0xc829
- LONG $0x397de3c4; WORD $0x01dc
- LONG $0xdbfed9c5
- LONG $0xe370f9c5; BYTE $0xee
- LONG $0xdcfee1c5
- LONG $0xe370f9c5; BYTE $0x55
- LONG $0xdbfed9c5
- LONG $0xd97ef9c5
- MOVQ CX, R8
- IMULQ DI, R8
- SHRQ $0x2f, R8
- LONG $0xf1c06945; WORD $0x00ff; BYTE $0x00
- WORD $0x2944; BYTE $0xc1
- CMPQ DX, $0x1f
- JBE tail_check
-
-block_loop_setup:
- LONG $0xe06ef9c5
- LONG $0xd96ef9c5
- CMPQ DX, $0x15b0
- LONG $0x15b0b841; WORD $0x0000
- LONG $0xc2420f4c
- WORD $0x8944; BYTE $0xc0
- LONG $0x001fe025; BYTE $0x00
- JE block_accum_init
- ADDQ $-0x20, R8
- LONG $0xedefd1c5
- LONG $0x20c0f641
- JNE block_loop_entry
- LONG $0x2e6ffec5
- ADDQ $0x20, SI
- LEAQ -0x20(AX), CX
- LONG $0xf0f6d5c5
- LONG $0xf4fecdc5
- LONG $0x0455e2c4; BYTE $0xe9
- LONG $0xeaf5d5c5
- LONG $0xdbfed5c5
- LONG $0xec6ffdc5
- LONG $0xe66ffdc5
- CMPQ R8, $0x20
- JAE block_loop_64
- JMP block_reduce
-
-block_loop_entry:
- MOVQ AX, CX
- CMPQ R8, $0x20
- JB block_reduce
-
-block_loop_64:
- LONG $0x366ffec5
- LONG $0x7e6ffec5; BYTE $0x20
- LONG $0xc0f64dc5
- LONG $0xc4fe3dc5
- LONG $0xecfed5c5
- LONG $0x044de2c4; BYTE $0xe1
- LONG $0xe2f5ddc5
- LONG $0xdbfeddc5
- ADDQ $0x40, SI
- LONG $0xe0f6c5c5
- LONG $0xe4febdc5
- LONG $0xedfebdc5
- LONG $0x0445e2c4; BYTE $0xf1
- LONG $0xf2f5cdc5
- LONG $0xdbfecdc5
- ADDQ $-0x40, CX
- JNE block_loop_64
- LONG $0xf46ffdc5
- JMP block_reduce
-
-return_one:
- LONG $0x000001b8; BYTE $0x00
-
-return_result:
- MOVL AX, ret+32(FP)
- RET
-
-tail_check:
- WORD $0x8548; BYTE $0xd2
- JE return_no_tail
-
-tail16_check:
- CMPQ DX, $0x10
- JB tail_bytes_setup
- WORD $0xb60f; BYTE $0x3e
- WORD $0xf801
- WORD $0xc101
- LONG $0x017eb60f
- WORD $0xc701
- WORD $0xf901
- LONG $0x0246b60f
- WORD $0xf801
- WORD $0xc101
- LONG $0x037eb60f
- WORD $0xc701
- WORD $0xf901
- LONG $0x0446b60f
- WORD $0xf801
- WORD $0xc101
- LONG $0x057eb60f
- WORD $0xc701
- WORD $0xf901
- LONG $0x0646b60f
- WORD $0xf801
- WORD $0xc101
- LONG $0x077eb60f
- WORD $0xc701
- WORD $0xf901
- LONG $0x0846b60f
- WORD $0xf801
- WORD $0xc101
- LONG $0x097eb60f
- WORD $0xc701
- WORD $0xf901
- LONG $0x0a46b60f
- WORD $0xf801
- WORD $0xc101
- LONG $0x0b7eb60f
- WORD $0xc701
- WORD $0xf901
- LONG $0x0c46b60f
- WORD $0xf801
- WORD $0xc101
- LONG $0x0d7eb60f
- WORD $0xc701
- WORD $0xf901
- LONG $0x46b60f44; BYTE $0x0e
- WORD $0x0141; BYTE $0xf8
- WORD $0x0144; BYTE $0xc1
- LONG $0x0f46b60f
- WORD $0x0144; BYTE $0xc0
- WORD $0xc101
- ADDQ $-0x10, DX
- JE final_reduce
- ADDQ $0x10, SI
-
-tail_bytes_setup:
- LEAQ -0x1(DX), DI
- MOVQ DX, R9
- ANDQ $0x3, R9
- JE tail_dword_setup
- XORL R8, R8
-
-tail_byte_loop:
- LONG $0x14b60f46; BYTE $0x06
- WORD $0x0144; BYTE $0xd0
- WORD $0xc101
- INCQ R8
- CMPQ R9, R8
- JNE tail_byte_loop
- ADDQ R8, SI
- SUBQ R8, DX
-
-tail_dword_setup:
- CMPQ DI, $0x3
- JB final_reduce
- XORL DI, DI
-
-tail_dword_loop:
- LONG $0x04b60f44; BYTE $0x3e
- WORD $0x0141; BYTE $0xc0
- WORD $0x0144; BYTE $0xc1
- LONG $0x3e44b60f; BYTE $0x01
- WORD $0x0144; BYTE $0xc0
- WORD $0xc101
- LONG $0x44b60f44; WORD $0x023e
- WORD $0x0141; BYTE $0xc0
- WORD $0x0144; BYTE $0xc1
- LONG $0x3e44b60f; BYTE $0x03
- WORD $0x0144; BYTE $0xc0
- WORD $0xc101
- ADDQ $0x4, DI
- CMPQ DX, DI
- JNE tail_dword_loop
-
-final_reduce:
- LONG $0x000f908d; WORD $0xffff
- CMPL AX, $0xfff1
- WORD $0x420f; BYTE $0xd0
- WORD $0xc889
- LONG $0x078071be; BYTE $0x80
- IMULQ AX, SI
- SHRQ $0x2f, SI
- LONG $0xfff1c669; WORD $0x0000
- WORD $0xc129
- WORD $0xe1c1; BYTE $0x10
- WORD $0xd109
- WORD $0xc889
- NOP
- NOP
- VZEROUPPER
- MOVL AX, ret+32(FP)
- RET
-
-return_no_tail:
- WORD $0xe1c1; BYTE $0x10
- WORD $0xc809
- NOP
- NOP
- VZEROUPPER
- MOVL AX, ret+32(FP)
- RET
diff --git a/internal/adler32/adler32_fallback.go b/internal/adler32/adler32_fallback.go
deleted file mode 100644
index c213c3c1..00000000
--- a/internal/adler32/adler32_fallback.go
+++ /dev/null
@@ -1,19 +0,0 @@
-//go:build (!arm64 && !amd64) || purego
-
-package adler32
-
-import (
- "hash"
- "hash/adler32"
-)
-
-// The size of an Adler-32 checksum in bytes.
-const Size = 4
-
-// New returns a new hash.Hash32 computing the Adler-32 checksum.
-func New() hash.Hash32 {
- return adler32.New()
-}
-
-// Checksum returns the Adler-32 checksum of data.
-func Checksum(data []byte) uint32 { return adler32.Checksum(data) }
diff --git a/internal/adler32/adler32_generic.go b/internal/adler32/adler32_generic.go
deleted file mode 100644
index f33e0f9b..00000000
--- a/internal/adler32/adler32_generic.go
+++ /dev/null
@@ -1,45 +0,0 @@
-// Package adler32 implements the Adler-32 checksum.
-package adler32
-
-const (
- // mod is the largest prime that is less than 65536.
- mod = 65521
- // nmax is the largest n such that
- // 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1.
- // It is mentioned in RFC 1950 (search for "5552").
- nmax = 5552
-
- // binary representation compatible with standard library.
- magic = "adl\x01"
- marshaledSize = len(magic) + 4
-)
-
-// Add p to the running checksum d.
-func update(d uint32, p []byte) uint32 {
- s1, s2 := d&0xffff, d>>16
- for len(p) > 0 {
- var q []byte
- if len(p) > nmax {
- p, q = p[:nmax], p[nmax:]
- }
- for len(p) >= 4 {
- s1 += uint32(p[0])
- s2 += s1
- s1 += uint32(p[1])
- s2 += s1
- s1 += uint32(p[2])
- s2 += s1
- s1 += uint32(p[3])
- s2 += s1
- p = p[4:]
- }
- for _, x := range p {
- s1 += uint32(x)
- s2 += s1
- }
- s1 %= mod
- s2 %= mod
- p = q
- }
- return s2<<16 | s1
-}
diff --git a/internal/adler32/adler32_neon.go b/internal/adler32/adler32_neon.go
deleted file mode 100644
index 521b71e0..00000000
--- a/internal/adler32/adler32_neon.go
+++ /dev/null
@@ -1,6 +0,0 @@
-//go:build !purego && arm64
-
-package adler32
-
-//go:noescape
-func adler32_neon(in uint32, buf []byte) uint32
diff --git a/internal/adler32/adler32_neon.s b/internal/adler32/adler32_neon.s
deleted file mode 100644
index 08b170bd..00000000
--- a/internal/adler32/adler32_neon.s
+++ /dev/null
@@ -1,208 +0,0 @@
-//go:build !purego && arm64
-
-#include "textflag.h"
-
-DATA mult_table<>+0x00(SB)/8, $0x001d001e001f0020
-DATA mult_table<>+0x08(SB)/8, $0x0019001a001b001c
-DATA mult_table<>+0x10(SB)/8, $0x0015001600170018
-DATA mult_table<>+0x18(SB)/8, $0x0011001200130014
-DATA mult_table<>+0x20(SB)/8, $0x000d000e000f0010
-DATA mult_table<>+0x28(SB)/8, $0x0009000a000b000c
-DATA mult_table<>+0x30(SB)/8, $0x0005000600070008
-DATA mult_table<>+0x38(SB)/8, $0x0001000200030004
-GLOBL mult_table<>(SB), (RODATA|NOPTR), $64
-
-TEXT ·adler32_neon(SB), NOSPLIT, $0-36
- MOVW in+0(FP), R0
- MOVD buf_base+8(FP), R1
- MOVD buf_len+16(FP), R2
- MOVD buf_cap+24(FP), R3
- NOP
- ANDS $15, R1, R10
- ANDW $65535, R0, R8
- LSRW $16, R0, R9
- NOP
- BEQ vector_loop_setup
- ADD $1, R1, R11
- MOVD R1, R12
-
-align_loop:
- WORD $0x3840158d
- SUB $1, R2, R2
- TST $15, R11
- ADD $1, R11, R11
- ADDW R13, R8, R8
- ADDW R9, R8, R9
- BNE align_loop
- MOVW $32881, R11
- MOVW $65521, R13
- MOVKW $(32775<<16), R11
- MOVW $4294901775, R12
- MOVW $65520, R14
- SUB R10, R1, R10
- UMULL R11, R9, R11
- ADDW R12, R8, R12
- CMPW R14, R8
- ADD $16, R10, R1
- LSR $47, R11, R11
- CSELW HI, R12, R8, R8
- MSUBW R13, R9, R11, R9
-
-vector_loop_setup:
- AND $31, R2, R10
- CMP $32, R2
- BCC tail_entry
- MOVD $mult_table<>(SB), R11
- ADD $0, R11, R11
- MOVW $32881, R14
- MOVW $173, R12
- MOVD $137438953440, R13
- MOVKW $(32775<<16), R14
- VLD1 (R11), [V0.H8, V1.H8, V2.H8, V3.H8]
- LSR $5, R2, R11
- MOVW $65521, R15
- VEXT $8, V0.B16, V0.B16, V4.B16
- VEXT $8, V1.B16, V1.B16, V5.B16
- VEXT $8, V2.B16, V2.B16, V6.B16
- VEXT $8, V3.B16, V3.B16, V7.B16
-
-vector_outer_loop:
- CMP $173, R11
- MOVD R1, R2
- CSEL LO, R11, R12, R16
- WORD $0x6f00e414
- MULW R16, R8, R0
- ADD R16<<5, R13, R17
- WORD $0x6f00e410
- AND $137438953440, R17, R17
- WORD $0x6f00e412
- WORD $0x6f00e413
- WORD $0x6f00e415
- VMOV R0, V20.S[3]
- MOVW R16, R0
- WORD $0x6f00e411
-
-vector_inner_loop:
- WORD $0xacc15857
- SUBSW $1, R0, R0
- VADD V17.S4, V20.S4, V20.S4
- WORD $0x2e3712b5
- WORD $0x6e371273
- WORD $0x6e202ad8
- WORD $0x2e361252
- WORD $0x6e361210
- WORD $0x6e206af8
- WORD $0x6e606b11
- BNE vector_inner_loop
- VSHL $5, V20.S4, V20.S4
- ADD R17, R1, R17
- SUBS R16, R11, R11
- ADD $32, R17, R1
- WORD $0x2e6082b4
- VEXT $8, V21.B16, V21.B16, V21.B16
- WORD $0x2e6482b4
- VEXT $8, V19.B16, V19.B16, V21.B16
- WORD $0x2e618274
- VEXT $8, V18.B16, V18.B16, V19.B16
- WORD $0x2e6582b4
- WORD $0x2e628254
- WORD $0x2e668274
- WORD $0x2e638214
- VEXT $8, V16.B16, V16.B16, V16.B16
- WORD $0x2e678214
- WORD $0x4eb1be30
- WORD $0x4eb4be91
- WORD $0x0eb1be10
- VMOV V16.S[1], R0
- FMOVS F16, R2
- ADDW R8, R2, R8
- ADDW R9, R0, R9
- UMULL R14, R8, R0
- UMULL R14, R9, R2
- LSR $47, R0, R0
- LSR $47, R2, R2
- MSUBW R15, R8, R0, R8
- MSUBW R15, R9, R2, R9
- BNE vector_outer_loop
-
-tail_entry:
- CBZ R10, return_result
- CMP $16, R10
- BCC tail_byte_loop
- WORD $0x3940002b
- SUBS $16, R10, R10
- WORD $0x3940042c
- WORD $0x3940082d
- ADDW R11, R8, R8
- WORD $0x39400c2b
- ADDW R9, R8, R9
- ADDW R12, R8, R8
- WORD $0x3940102c
- ADDW R8, R9, R9
- ADDW R13, R8, R8
- WORD $0x3940142d
- ADDW R8, R9, R9
- ADDW R11, R8, R8
- WORD $0x3940182b
- ADDW R8, R9, R9
- ADDW R12, R8, R8
- WORD $0x39401c2c
- ADDW R8, R9, R9
- ADDW R13, R8, R8
- ADDW R8, R9, R9
- ADDW R11, R8, R8
- WORD $0x3940202b
- ADDW R8, R9, R9
- ADDW R12, R8, R8
- WORD $0x3940242c
- ADDW R8, R9, R9
- WORD $0x3940382d
- ADDW R11, R8, R8
- WORD $0x3940282b
- ADDW R8, R9, R9
- ADDW R12, R8, R8
- WORD $0x39402c2c
- ADDW R8, R9, R9
- ADDW R11, R8, R8
- WORD $0x3940302b
- ADDW R8, R9, R9
- ADDW R12, R8, R8
- WORD $0x3940342c
- ADDW R8, R9, R9
- ADDW R11, R8, R8
- WORD $0x39403c2b
- ADDW R8, R9, R9
- ADDW R12, R8, R8
- ADDW R8, R9, R9
- ADDW R13, R8, R8
- ADDW R8, R9, R9
- ADDW R11, R8, R8
- ADDW R8, R9, R9
- BEQ final_reduce
- ADD $16, R1, R1
-
-tail_byte_loop:
- WORD $0x3840142b
- SUBS $1, R10, R10
- ADDW R11, R8, R8
- ADDW R9, R8, R9
- BNE tail_byte_loop
-
-final_reduce:
- MOVW $32881, R10
- MOVW $65521, R12
- MOVKW $(32775<<16), R10
- MOVW $4294901775, R11
- MOVW $65520, R13
- ADDW R11, R8, R11
- UMULL R10, R9, R10
- CMPW R13, R8
- CSELW HI, R11, R8, R8
- LSR $47, R10, R10
- MSUBW R12, R9, R10, R9
-
-return_result:
- ORRW R9<<16, R8, R0
- NOP
- MOVW R0, ret+32(FP)
- RET
diff --git a/internal/adler32/adler32_sse3.go b/internal/adler32/adler32_sse3.go
deleted file mode 100644
index 8e8c8a9b..00000000
--- a/internal/adler32/adler32_sse3.go
+++ /dev/null
@@ -1,6 +0,0 @@
-//go:build !purego && amd64
-
-package adler32
-
-//go:noescape
-func adler32_sse3(in uint32, buf []byte) uint32
diff --git a/internal/adler32/adler32_sse3.s b/internal/adler32/adler32_sse3.s
deleted file mode 100644
index 5880bab8..00000000
--- a/internal/adler32/adler32_sse3.s
+++ /dev/null
@@ -1,214 +0,0 @@
-//go:build !purego && amd64
-
-#include "textflag.h"
-
-DATA weights_17_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20
-DATA weights_17_32<>+0x08(SB)/8, $0x1112131415161718
-GLOBL weights_17_32<>(SB), (RODATA|NOPTR), $16
-
-DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001
-DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001
-GLOBL ones_u16<>(SB), (RODATA|NOPTR), $16
-
-DATA weights_1_16<>+0x00(SB)/8, $0x090a0b0c0d0e0f10
-DATA weights_1_16<>+0x08(SB)/8, $0x0102030405060708
-GLOBL weights_1_16<>(SB), (RODATA|NOPTR), $16
-
-TEXT ·adler32_sse3(SB), NOSPLIT, $0-36
- MOVLQZX in+0(FP), DI
- MOVQ buf_base+8(FP), SI
- MOVQ buf_len+16(FP), DX
- MOVQ buf_cap+24(FP), CX
- NOP
- NOP
- NOP
- WORD $0xf889
- LONG $0xc8b70f44
- WORD $0xe8c1; BYTE $0x10
- WORD $0xd189
- WORD $0xe183; BYTE $0x1f
- CMPQ DX, $0x20
- JAE block_loop_setup
- WORD $0x8944; BYTE $0xcf
- JMP tail_entry
-
-block_loop_setup:
- SHRQ $0x5, DX
- LONG $0xc0ef0f66
- MOVO weights_17_32<>(SB), X1
- MOVO ones_u16<>(SB), X2
- MOVO weights_1_16<>(SB), X3
- LONG $0x8071b841; WORD $0x8007
-
-block_outer_loop:
- CMPQ DX, $0xad
- LONG $0x00adba41; WORD $0x0000
- LONG $0xd2420f4c
- WORD $0x8944; BYTE $0xcf
- LONG $0xfaaf0f41
- LONG $0xef6e0f66
- LONG $0xe06e0f66
- WORD $0x8944; BYTE $0xd0
- LONG $0xf6ef0f66
-
-block_inner_loop:
- LONG $0x3e6f0ff3
- LONG $0x6f0f4466; BYTE $0xc7
- LONG $0x04380f66; BYTE $0xf9
- LONG $0xfaf50f66
- LONG $0xfcfe0f66
- LONG $0x666f0ff3; BYTE $0x10
- LONG $0xeefe0f66
- LONG $0xf60f4466; BYTE $0xc0
- LONG $0xfe0f4466; BYTE $0xc6
- LONG $0xf46f0f66
- LONG $0xf0f60f66
- LONG $0xfe0f4166; BYTE $0xf0
- LONG $0x04380f66; BYTE $0xe3
- LONG $0xe2f50f66
- LONG $0xe7fe0f66
- ADDQ $0x20, SI
- WORD $0xc8ff
- JNE block_inner_loop
- LONG $0xf5720f66; BYTE $0x05
- LONG $0xe5fe0f66
- LONG $0xee700f66; BYTE $0xb1
- LONG $0xeefe0f66
- LONG $0xf5700f66; BYTE $0xee
- LONG $0xf5fe0f66
- LONG $0xf77e0f66
- WORD $0x0144; BYTE $0xcf
- LONG $0xec700f66; BYTE $0xb1
- LONG $0xecfe0f66
- LONG $0xe5700f66; BYTE $0xee
- LONG $0xe5fe0f66
- LONG $0xe07e0f66
- MOVQ DI, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00
- WORD $0x2944; BYTE $0xcf
- MOVQ AX, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00
- WORD $0x2944; BYTE $0xc8
- WORD $0x8941; BYTE $0xf9
- SUBQ R10, DX
- JNE block_outer_loop
-
-tail_entry:
- WORD $0x8548; BYTE $0xc9
- JE return_result
- CMPL CX, $0x10
- JB tail_bytes_setup
- WORD $0xb60f; BYTE $0x16
- WORD $0xd701
- WORD $0xf801
- LONG $0x0156b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x027eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0356b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x047eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0556b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x067eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0756b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x087eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0956b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x0a7eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0b56b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x0c7eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0d56b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x46b60f44; BYTE $0x0e
- WORD $0x0141; BYTE $0xd0
- WORD $0x0144; BYTE $0xc0
- LONG $0x0f7eb60f
- WORD $0x0144; BYTE $0xc7
- WORD $0xf801
- ADDQ $-0x10, CX
- JE final_reduce
- ADDQ $0x10, SI
-
-tail_bytes_setup:
- LEAQ -0x1(CX), DX
- MOVQ CX, R9
- ANDQ $0x3, R9
- JE tail_dword_setup
- XORL R8, R8
-
-tail_byte_loop:
- LONG $0x14b60f46; BYTE $0x06
- WORD $0x0144; BYTE $0xd7
- WORD $0xf801
- INCQ R8
- CMPQ R9, R8
- JNE tail_byte_loop
- ADDQ R8, SI
- SUBQ R8, CX
-
-tail_dword_setup:
- CMPQ DX, $0x3
- JB final_reduce
- XORL DX, DX
-
-tail_dword_loop:
- LONG $0x04b60f44; BYTE $0x16
- WORD $0x0141; BYTE $0xf8
- WORD $0x0144; BYTE $0xc0
- LONG $0x167cb60f; BYTE $0x01
- WORD $0x0144; BYTE $0xc7
- WORD $0xf801
- LONG $0x44b60f44; WORD $0x0216
- WORD $0x0141; BYTE $0xf8
- WORD $0x0144; BYTE $0xc0
- LONG $0x167cb60f; BYTE $0x03
- WORD $0x0144; BYTE $0xc7
- WORD $0xf801
- ADDQ $0x4, DX
- CMPQ CX, DX
- JNE tail_dword_loop
-
-final_reduce:
- LONG $0x000f8f8d; WORD $0xffff
- CMPL DI, $0xfff1
- WORD $0x420f; BYTE $0xcf
- WORD $0xc289
- LONG $0x078071be; BYTE $0x80
- IMULQ DX, SI
- SHRQ $0x2f, SI
- LONG $0xfff1d669; WORD $0x0000
- WORD $0xd029
- WORD $0xcf89
-
-return_result:
- WORD $0xe0c1; BYTE $0x10
- WORD $0xf809
- NOP
- NOP
- MOVL AX, ret+32(FP)
- RET
diff --git a/internal/adler32/bench_test.go b/internal/adler32/bench_test.go
deleted file mode 100644
index 7744b903..00000000
--- a/internal/adler32/bench_test.go
+++ /dev/null
@@ -1,22 +0,0 @@
-package adler32
-
-import (
- "testing"
-)
-
-const benchmarkSize = 64 * 1024
-
-var data = make([]byte, benchmarkSize)
-
-func init() {
- for i := range benchmarkSize {
- data[i] = byte(i % 256)
- }
-}
-
-func BenchmarkChecksum(b *testing.B) {
- b.ReportAllocs()
- for range b.N {
- Checksum(data)
- }
-}