diff options
| author | 2026-02-20 19:06:13 +0800 | |
|---|---|---|
| committer | 2026-02-20 19:07:14 +0800 | |
| commit | aa513c069c1418734aea894dc944e27c6a78a3bb (patch) | |
| tree | 687f0a11bb550fa088fd82a98ceb8979bbc35f69 /internal/adler32 | |
| parent | Comment on prior reverts removing the pack writing API (diff) | |
Delete everything, I'm redesigning this.
I'll stop using a flat package and make things much more modular.
And also experiment with streaming APIs so large blobs don't OOM us.
Diffstat (limited to 'internal/adler32')
| -rw-r--r-- | internal/adler32/LICENSE | 30 | ||||
| -rw-r--r-- | internal/adler32/LICENSE.ZLIB | 17 | ||||
| -rw-r--r-- | internal/adler32/README | 1 | ||||
| -rw-r--r-- | internal/adler32/adler32_amd64.go | 93 | ||||
| -rw-r--r-- | internal/adler32/adler32_arm64.go | 73 | ||||
| -rw-r--r-- | internal/adler32/adler32_avx2.go | 6 | ||||
| -rw-r--r-- | internal/adler32/adler32_avx2.s | 263 | ||||
| -rw-r--r-- | internal/adler32/adler32_fallback.go | 19 | ||||
| -rw-r--r-- | internal/adler32/adler32_generic.go | 45 | ||||
| -rw-r--r-- | internal/adler32/adler32_neon.go | 6 | ||||
| -rw-r--r-- | internal/adler32/adler32_neon.s | 208 | ||||
| -rw-r--r-- | internal/adler32/adler32_sse3.go | 6 | ||||
| -rw-r--r-- | internal/adler32/adler32_sse3.s | 214 | ||||
| -rw-r--r-- | internal/adler32/bench_test.go | 22 |
14 files changed, 0 insertions, 1003 deletions
diff --git a/internal/adler32/LICENSE b/internal/adler32/LICENSE deleted file mode 100644 index 5cec357a..00000000 --- a/internal/adler32/LICENSE +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) 2024, Michal Hruby -Copyright (c) 2017 The Chromium Authors. All rights reserved. -Copyright (c) 1995-2024 Mark Adler -Copyright (c) 1995-2024 Jean-loup Gailly -Copyright (c) 2022 Adam Stylinski - -BSD 2-Clause License - - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/internal/adler32/LICENSE.ZLIB b/internal/adler32/LICENSE.ZLIB deleted file mode 100644 index c75c1568..00000000 --- a/internal/adler32/LICENSE.ZLIB +++ /dev/null @@ -1,17 +0,0 @@ -Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. diff --git a/internal/adler32/README b/internal/adler32/README deleted file mode 100644 index b80acd00..00000000 --- a/internal/adler32/README +++ /dev/null @@ -1 +0,0 @@ -This package was mostly copied from github.com/mhr3/adler32-simd. diff --git a/internal/adler32/adler32_amd64.go b/internal/adler32/adler32_amd64.go deleted file mode 100644 index 88a854ed..00000000 --- a/internal/adler32/adler32_amd64.go +++ /dev/null @@ -1,93 +0,0 @@ -//go:build amd64 && !purego - -package adler32 - -import ( - "encoding/binary" - "errors" - "hash" - "hash/adler32" - - "golang.org/x/sys/cpu" -) - -// The size of an Adler-32 checksum in bytes. -const Size = 4 - -var ( - hasSSE3 = cpu.X86.HasSSE3 - hasAVX2 = cpu.X86.HasAVX2 -) - -// digest represents the partial evaluation of a checksum. -// The low 16 bits are s1, the high 16 bits are s2. -type digest uint32 - -func (d *digest) Reset() { *d = 1 } - -// New returns a new hash.Hash32 computing the Adler-32 checksum. -func New() hash.Hash32 { - if !hasSSE3 { - return adler32.New() - } - d := new(digest) - d.Reset() - return d -} - -func (d *digest) MarshalBinary() ([]byte, error) { - b := make([]byte, 0, marshaledSize) - b = append(b, magic...) - b = binary.BigEndian.AppendUint32(b, uint32(*d)) - return b, nil -} - -func (d *digest) UnmarshalBinary(b []byte) error { - if len(b) < len(magic) || string(b[:len(magic)]) != magic { - return errors.New("hash/adler32: invalid hash state identifier") - } - if len(b) != marshaledSize { - return errors.New("hash/adler32: invalid hash state size") - } - *d = digest(binary.BigEndian.Uint32(b[len(magic):])) - return nil -} - -func (d *digest) Size() int { return Size } - -func (d *digest) BlockSize() int { return 4 } - -func (d *digest) Write(data []byte) (nn int, err error) { - if len(data) >= 64 { - var h uint32 - if hasAVX2 { - h = adler32_avx2(uint32(*d), data) - } else { - h = adler32_sse3(uint32(*d), data) - } - *d = digest(h) - } else { - h := update(uint32(*d), data) - *d = digest(h) - } - return len(data), nil -} - -func (d *digest) Sum32() uint32 { return uint32(*d) } - -func (d *digest) Sum(in []byte) []byte { - s := uint32(*d) - return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) -} - -// Checksum returns the Adler-32 checksum of data. -func Checksum(data []byte) uint32 { - if !hasSSE3 || len(data) < 64 { - return update(1, data) - } - - if hasAVX2 { - return adler32_avx2(1, data) - } - return adler32_sse3(1, data) -} diff --git a/internal/adler32/adler32_arm64.go b/internal/adler32/adler32_arm64.go deleted file mode 100644 index ddf9cb5e..00000000 --- a/internal/adler32/adler32_arm64.go +++ /dev/null @@ -1,73 +0,0 @@ -//go:build arm64 && !purego - -package adler32 - -import ( - "encoding/binary" - "errors" - "hash" -) - -// The size of an Adler-32 checksum in bytes. -const Size = 4 - -// digest represents the partial evaluation of a checksum. -// The low 16 bits are s1, the high 16 bits are s2. -type digest uint32 - -func (d *digest) Reset() { *d = 1 } - -// New returns a new hash.Hash32 computing the Adler-32 checksum. -func New() hash.Hash32 { - d := new(digest) - d.Reset() - return d -} - -func (d *digest) MarshalBinary() ([]byte, error) { - b := make([]byte, 0, marshaledSize) - b = append(b, magic...) - b = binary.BigEndian.AppendUint32(b, uint32(*d)) - return b, nil -} - -func (d *digest) UnmarshalBinary(b []byte) error { - if len(b) < len(magic) || string(b[:len(magic)]) != magic { - return errors.New("hash/adler32: invalid hash state identifier") - } - if len(b) != marshaledSize { - return errors.New("hash/adler32: invalid hash state size") - } - *d = digest(binary.BigEndian.Uint32(b[len(magic):])) - return nil -} - -func (d *digest) Size() int { return Size } - -func (d *digest) BlockSize() int { return 4 } - -func (d *digest) Write(data []byte) (nn int, err error) { - if len(data) >= 64 { - h := adler32_neon(uint32(*d), data) - *d = digest(h) - } else { - h := update(uint32(*d), data) - *d = digest(h) - } - return len(data), nil -} - -func (d *digest) Sum32() uint32 { return uint32(*d) } - -func (d *digest) Sum(in []byte) []byte { - s := uint32(*d) - return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) -} - -// Checksum returns the Adler-32 checksum of data. -func Checksum(data []byte) uint32 { - if len(data) >= 64 { - return adler32_neon(1, data) - } - return update(1, data) -} diff --git a/internal/adler32/adler32_avx2.go b/internal/adler32/adler32_avx2.go deleted file mode 100644 index 042812b8..00000000 --- a/internal/adler32/adler32_avx2.go +++ /dev/null @@ -1,6 +0,0 @@ -//go:build !purego && amd64 - -package adler32 - -//go:noescape -func adler32_avx2(in uint32, buf []byte) uint32 diff --git a/internal/adler32/adler32_avx2.s b/internal/adler32/adler32_avx2.s deleted file mode 100644 index 1b9a1c50..00000000 --- a/internal/adler32/adler32_avx2.s +++ /dev/null @@ -1,263 +0,0 @@ -//go:build !purego && amd64 - -#include "textflag.h" - -DATA weights_1_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20 -DATA weights_1_32<>+0x08(SB)/8, $0x1112131415161718 -DATA weights_1_32<>+0x10(SB)/8, $0x090a0b0c0d0e0f10 -DATA weights_1_32<>+0x18(SB)/8, $0x0102030405060708 -GLOBL weights_1_32<>(SB), (RODATA|NOPTR), $32 - -DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001 -DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001 -DATA ones_u16<>+0x10(SB)/8, $0x0001000100010001 -DATA ones_u16<>+0x18(SB)/8, $0x0001000100010001 -GLOBL ones_u16<>(SB), (RODATA|NOPTR), $32 - -DATA one_u16<>+0x00(SB)/2, $0x0001 -GLOBL one_u16<>(SB), (RODATA|NOPTR), $2 - -TEXT ·adler32_avx2(SB), NOSPLIT, $0-36 - MOVLQZX in+0(FP), DI - MOVQ buf_base+8(FP), SI - MOVQ buf_len+16(FP), DX - MOVQ buf_cap+24(FP), CX - WORD $0x8548; BYTE $0xf6 - JE return_one - WORD $0xf889 - WORD $0x8548; BYTE $0xd2 - JE return_result - NOP - NOP - NOP - WORD $0xc189 - WORD $0xe9c1; BYTE $0x10 - WORD $0xb70f; BYTE $0xc0 - CMPQ DX, $0x20 - JB tail16_check - LONG $0x078071bf; BYTE $0x80 - LONG $0xc0eff9c5 - VMOVDQA weights_1_32<>(SB), Y1 - VPBROADCASTW one_u16<>(SB), Y2 - JMP block_loop_setup - -block_accum_init: - LONG $0xf46ffdc5 - LONG $0xedefd1c5 - -block_reduce: - SUBQ AX, DX - LONG $0xf572ddc5; BYTE $0x05 - LONG $0xdbfeddc5 - LONG $0x397de3c4; WORD $0x01f4 - LONG $0xecc6c8c5; BYTE $0x88 - LONG $0xe470f9c5; BYTE $0x88 - LONG $0xe4fed1c5 - LONG $0xec70f9c5; BYTE $0x55 - LONG $0xe4fed1c5 - LONG $0xe07ef9c5 - MOVQ AX, CX - IMULQ DI, CX - SHRQ $0x2f, CX - LONG $0xfff1c969; WORD $0x0000 - WORD $0xc829 - LONG $0x397de3c4; WORD $0x01dc - LONG $0xdbfed9c5 - LONG $0xe370f9c5; BYTE $0xee - LONG $0xdcfee1c5 - LONG $0xe370f9c5; BYTE $0x55 - LONG $0xdbfed9c5 - LONG $0xd97ef9c5 - MOVQ CX, R8 - IMULQ DI, R8 - SHRQ $0x2f, R8 - LONG $0xf1c06945; WORD $0x00ff; BYTE $0x00 - WORD $0x2944; BYTE $0xc1 - CMPQ DX, $0x1f - JBE tail_check - -block_loop_setup: - LONG $0xe06ef9c5 - LONG $0xd96ef9c5 - CMPQ DX, $0x15b0 - LONG $0x15b0b841; WORD $0x0000 - LONG $0xc2420f4c - WORD $0x8944; BYTE $0xc0 - LONG $0x001fe025; BYTE $0x00 - JE block_accum_init - ADDQ $-0x20, R8 - LONG $0xedefd1c5 - LONG $0x20c0f641 - JNE block_loop_entry - LONG $0x2e6ffec5 - ADDQ $0x20, SI - LEAQ -0x20(AX), CX - LONG $0xf0f6d5c5 - LONG $0xf4fecdc5 - LONG $0x0455e2c4; BYTE $0xe9 - LONG $0xeaf5d5c5 - LONG $0xdbfed5c5 - LONG $0xec6ffdc5 - LONG $0xe66ffdc5 - CMPQ R8, $0x20 - JAE block_loop_64 - JMP block_reduce - -block_loop_entry: - MOVQ AX, CX - CMPQ R8, $0x20 - JB block_reduce - -block_loop_64: - LONG $0x366ffec5 - LONG $0x7e6ffec5; BYTE $0x20 - LONG $0xc0f64dc5 - LONG $0xc4fe3dc5 - LONG $0xecfed5c5 - LONG $0x044de2c4; BYTE $0xe1 - LONG $0xe2f5ddc5 - LONG $0xdbfeddc5 - ADDQ $0x40, SI - LONG $0xe0f6c5c5 - LONG $0xe4febdc5 - LONG $0xedfebdc5 - LONG $0x0445e2c4; BYTE $0xf1 - LONG $0xf2f5cdc5 - LONG $0xdbfecdc5 - ADDQ $-0x40, CX - JNE block_loop_64 - LONG $0xf46ffdc5 - JMP block_reduce - -return_one: - LONG $0x000001b8; BYTE $0x00 - -return_result: - MOVL AX, ret+32(FP) - RET - -tail_check: - WORD $0x8548; BYTE $0xd2 - JE return_no_tail - -tail16_check: - CMPQ DX, $0x10 - JB tail_bytes_setup - WORD $0xb60f; BYTE $0x3e - WORD $0xf801 - WORD $0xc101 - LONG $0x017eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0246b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x037eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0446b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x057eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0646b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x077eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0846b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x097eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0a46b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x0b7eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x0c46b60f - WORD $0xf801 - WORD $0xc101 - LONG $0x0d7eb60f - WORD $0xc701 - WORD $0xf901 - LONG $0x46b60f44; BYTE $0x0e - WORD $0x0141; BYTE $0xf8 - WORD $0x0144; BYTE $0xc1 - LONG $0x0f46b60f - WORD $0x0144; BYTE $0xc0 - WORD $0xc101 - ADDQ $-0x10, DX - JE final_reduce - ADDQ $0x10, SI - -tail_bytes_setup: - LEAQ -0x1(DX), DI - MOVQ DX, R9 - ANDQ $0x3, R9 - JE tail_dword_setup - XORL R8, R8 - -tail_byte_loop: - LONG $0x14b60f46; BYTE $0x06 - WORD $0x0144; BYTE $0xd0 - WORD $0xc101 - INCQ R8 - CMPQ R9, R8 - JNE tail_byte_loop - ADDQ R8, SI - SUBQ R8, DX - -tail_dword_setup: - CMPQ DI, $0x3 - JB final_reduce - XORL DI, DI - -tail_dword_loop: - LONG $0x04b60f44; BYTE $0x3e - WORD $0x0141; BYTE $0xc0 - WORD $0x0144; BYTE $0xc1 - LONG $0x3e44b60f; BYTE $0x01 - WORD $0x0144; BYTE $0xc0 - WORD $0xc101 - LONG $0x44b60f44; WORD $0x023e - WORD $0x0141; BYTE $0xc0 - WORD $0x0144; BYTE $0xc1 - LONG $0x3e44b60f; BYTE $0x03 - WORD $0x0144; BYTE $0xc0 - WORD $0xc101 - ADDQ $0x4, DI - CMPQ DX, DI - JNE tail_dword_loop - -final_reduce: - LONG $0x000f908d; WORD $0xffff - CMPL AX, $0xfff1 - WORD $0x420f; BYTE $0xd0 - WORD $0xc889 - LONG $0x078071be; BYTE $0x80 - IMULQ AX, SI - SHRQ $0x2f, SI - LONG $0xfff1c669; WORD $0x0000 - WORD $0xc129 - WORD $0xe1c1; BYTE $0x10 - WORD $0xd109 - WORD $0xc889 - NOP - NOP - VZEROUPPER - MOVL AX, ret+32(FP) - RET - -return_no_tail: - WORD $0xe1c1; BYTE $0x10 - WORD $0xc809 - NOP - NOP - VZEROUPPER - MOVL AX, ret+32(FP) - RET diff --git a/internal/adler32/adler32_fallback.go b/internal/adler32/adler32_fallback.go deleted file mode 100644 index c213c3c1..00000000 --- a/internal/adler32/adler32_fallback.go +++ /dev/null @@ -1,19 +0,0 @@ -//go:build (!arm64 && !amd64) || purego - -package adler32 - -import ( - "hash" - "hash/adler32" -) - -// The size of an Adler-32 checksum in bytes. -const Size = 4 - -// New returns a new hash.Hash32 computing the Adler-32 checksum. -func New() hash.Hash32 { - return adler32.New() -} - -// Checksum returns the Adler-32 checksum of data. -func Checksum(data []byte) uint32 { return adler32.Checksum(data) } diff --git a/internal/adler32/adler32_generic.go b/internal/adler32/adler32_generic.go deleted file mode 100644 index f33e0f9b..00000000 --- a/internal/adler32/adler32_generic.go +++ /dev/null @@ -1,45 +0,0 @@ -// Package adler32 implements the Adler-32 checksum. -package adler32 - -const ( - // mod is the largest prime that is less than 65536. - mod = 65521 - // nmax is the largest n such that - // 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1. - // It is mentioned in RFC 1950 (search for "5552"). - nmax = 5552 - - // binary representation compatible with standard library. - magic = "adl\x01" - marshaledSize = len(magic) + 4 -) - -// Add p to the running checksum d. -func update(d uint32, p []byte) uint32 { - s1, s2 := d&0xffff, d>>16 - for len(p) > 0 { - var q []byte - if len(p) > nmax { - p, q = p[:nmax], p[nmax:] - } - for len(p) >= 4 { - s1 += uint32(p[0]) - s2 += s1 - s1 += uint32(p[1]) - s2 += s1 - s1 += uint32(p[2]) - s2 += s1 - s1 += uint32(p[3]) - s2 += s1 - p = p[4:] - } - for _, x := range p { - s1 += uint32(x) - s2 += s1 - } - s1 %= mod - s2 %= mod - p = q - } - return s2<<16 | s1 -} diff --git a/internal/adler32/adler32_neon.go b/internal/adler32/adler32_neon.go deleted file mode 100644 index 521b71e0..00000000 --- a/internal/adler32/adler32_neon.go +++ /dev/null @@ -1,6 +0,0 @@ -//go:build !purego && arm64 - -package adler32 - -//go:noescape -func adler32_neon(in uint32, buf []byte) uint32 diff --git a/internal/adler32/adler32_neon.s b/internal/adler32/adler32_neon.s deleted file mode 100644 index 08b170bd..00000000 --- a/internal/adler32/adler32_neon.s +++ /dev/null @@ -1,208 +0,0 @@ -//go:build !purego && arm64 - -#include "textflag.h" - -DATA mult_table<>+0x00(SB)/8, $0x001d001e001f0020 -DATA mult_table<>+0x08(SB)/8, $0x0019001a001b001c -DATA mult_table<>+0x10(SB)/8, $0x0015001600170018 -DATA mult_table<>+0x18(SB)/8, $0x0011001200130014 -DATA mult_table<>+0x20(SB)/8, $0x000d000e000f0010 -DATA mult_table<>+0x28(SB)/8, $0x0009000a000b000c -DATA mult_table<>+0x30(SB)/8, $0x0005000600070008 -DATA mult_table<>+0x38(SB)/8, $0x0001000200030004 -GLOBL mult_table<>(SB), (RODATA|NOPTR), $64 - -TEXT ·adler32_neon(SB), NOSPLIT, $0-36 - MOVW in+0(FP), R0 - MOVD buf_base+8(FP), R1 - MOVD buf_len+16(FP), R2 - MOVD buf_cap+24(FP), R3 - NOP - ANDS $15, R1, R10 - ANDW $65535, R0, R8 - LSRW $16, R0, R9 - NOP - BEQ vector_loop_setup - ADD $1, R1, R11 - MOVD R1, R12 - -align_loop: - WORD $0x3840158d - SUB $1, R2, R2 - TST $15, R11 - ADD $1, R11, R11 - ADDW R13, R8, R8 - ADDW R9, R8, R9 - BNE align_loop - MOVW $32881, R11 - MOVW $65521, R13 - MOVKW $(32775<<16), R11 - MOVW $4294901775, R12 - MOVW $65520, R14 - SUB R10, R1, R10 - UMULL R11, R9, R11 - ADDW R12, R8, R12 - CMPW R14, R8 - ADD $16, R10, R1 - LSR $47, R11, R11 - CSELW HI, R12, R8, R8 - MSUBW R13, R9, R11, R9 - -vector_loop_setup: - AND $31, R2, R10 - CMP $32, R2 - BCC tail_entry - MOVD $mult_table<>(SB), R11 - ADD $0, R11, R11 - MOVW $32881, R14 - MOVW $173, R12 - MOVD $137438953440, R13 - MOVKW $(32775<<16), R14 - VLD1 (R11), [V0.H8, V1.H8, V2.H8, V3.H8] - LSR $5, R2, R11 - MOVW $65521, R15 - VEXT $8, V0.B16, V0.B16, V4.B16 - VEXT $8, V1.B16, V1.B16, V5.B16 - VEXT $8, V2.B16, V2.B16, V6.B16 - VEXT $8, V3.B16, V3.B16, V7.B16 - -vector_outer_loop: - CMP $173, R11 - MOVD R1, R2 - CSEL LO, R11, R12, R16 - WORD $0x6f00e414 - MULW R16, R8, R0 - ADD R16<<5, R13, R17 - WORD $0x6f00e410 - AND $137438953440, R17, R17 - WORD $0x6f00e412 - WORD $0x6f00e413 - WORD $0x6f00e415 - VMOV R0, V20.S[3] - MOVW R16, R0 - WORD $0x6f00e411 - -vector_inner_loop: - WORD $0xacc15857 - SUBSW $1, R0, R0 - VADD V17.S4, V20.S4, V20.S4 - WORD $0x2e3712b5 - WORD $0x6e371273 - WORD $0x6e202ad8 - WORD $0x2e361252 - WORD $0x6e361210 - WORD $0x6e206af8 - WORD $0x6e606b11 - BNE vector_inner_loop - VSHL $5, V20.S4, V20.S4 - ADD R17, R1, R17 - SUBS R16, R11, R11 - ADD $32, R17, R1 - WORD $0x2e6082b4 - VEXT $8, V21.B16, V21.B16, V21.B16 - WORD $0x2e6482b4 - VEXT $8, V19.B16, V19.B16, V21.B16 - WORD $0x2e618274 - VEXT $8, V18.B16, V18.B16, V19.B16 - WORD $0x2e6582b4 - WORD $0x2e628254 - WORD $0x2e668274 - WORD $0x2e638214 - VEXT $8, V16.B16, V16.B16, V16.B16 - WORD $0x2e678214 - WORD $0x4eb1be30 - WORD $0x4eb4be91 - WORD $0x0eb1be10 - VMOV V16.S[1], R0 - FMOVS F16, R2 - ADDW R8, R2, R8 - ADDW R9, R0, R9 - UMULL R14, R8, R0 - UMULL R14, R9, R2 - LSR $47, R0, R0 - LSR $47, R2, R2 - MSUBW R15, R8, R0, R8 - MSUBW R15, R9, R2, R9 - BNE vector_outer_loop - -tail_entry: - CBZ R10, return_result - CMP $16, R10 - BCC tail_byte_loop - WORD $0x3940002b - SUBS $16, R10, R10 - WORD $0x3940042c - WORD $0x3940082d - ADDW R11, R8, R8 - WORD $0x39400c2b - ADDW R9, R8, R9 - ADDW R12, R8, R8 - WORD $0x3940102c - ADDW R8, R9, R9 - ADDW R13, R8, R8 - WORD $0x3940142d - ADDW R8, R9, R9 - ADDW R11, R8, R8 - WORD $0x3940182b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - WORD $0x39401c2c - ADDW R8, R9, R9 - ADDW R13, R8, R8 - ADDW R8, R9, R9 - ADDW R11, R8, R8 - WORD $0x3940202b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - WORD $0x3940242c - ADDW R8, R9, R9 - WORD $0x3940382d - ADDW R11, R8, R8 - WORD $0x3940282b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - WORD $0x39402c2c - ADDW R8, R9, R9 - ADDW R11, R8, R8 - WORD $0x3940302b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - WORD $0x3940342c - ADDW R8, R9, R9 - ADDW R11, R8, R8 - WORD $0x39403c2b - ADDW R8, R9, R9 - ADDW R12, R8, R8 - ADDW R8, R9, R9 - ADDW R13, R8, R8 - ADDW R8, R9, R9 - ADDW R11, R8, R8 - ADDW R8, R9, R9 - BEQ final_reduce - ADD $16, R1, R1 - -tail_byte_loop: - WORD $0x3840142b - SUBS $1, R10, R10 - ADDW R11, R8, R8 - ADDW R9, R8, R9 - BNE tail_byte_loop - -final_reduce: - MOVW $32881, R10 - MOVW $65521, R12 - MOVKW $(32775<<16), R10 - MOVW $4294901775, R11 - MOVW $65520, R13 - ADDW R11, R8, R11 - UMULL R10, R9, R10 - CMPW R13, R8 - CSELW HI, R11, R8, R8 - LSR $47, R10, R10 - MSUBW R12, R9, R10, R9 - -return_result: - ORRW R9<<16, R8, R0 - NOP - MOVW R0, ret+32(FP) - RET diff --git a/internal/adler32/adler32_sse3.go b/internal/adler32/adler32_sse3.go deleted file mode 100644 index 8e8c8a9b..00000000 --- a/internal/adler32/adler32_sse3.go +++ /dev/null @@ -1,6 +0,0 @@ -//go:build !purego && amd64 - -package adler32 - -//go:noescape -func adler32_sse3(in uint32, buf []byte) uint32 diff --git a/internal/adler32/adler32_sse3.s b/internal/adler32/adler32_sse3.s deleted file mode 100644 index 5880bab8..00000000 --- a/internal/adler32/adler32_sse3.s +++ /dev/null @@ -1,214 +0,0 @@ -//go:build !purego && amd64 - -#include "textflag.h" - -DATA weights_17_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20 -DATA weights_17_32<>+0x08(SB)/8, $0x1112131415161718 -GLOBL weights_17_32<>(SB), (RODATA|NOPTR), $16 - -DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001 -DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001 -GLOBL ones_u16<>(SB), (RODATA|NOPTR), $16 - -DATA weights_1_16<>+0x00(SB)/8, $0x090a0b0c0d0e0f10 -DATA weights_1_16<>+0x08(SB)/8, $0x0102030405060708 -GLOBL weights_1_16<>(SB), (RODATA|NOPTR), $16 - -TEXT ·adler32_sse3(SB), NOSPLIT, $0-36 - MOVLQZX in+0(FP), DI - MOVQ buf_base+8(FP), SI - MOVQ buf_len+16(FP), DX - MOVQ buf_cap+24(FP), CX - NOP - NOP - NOP - WORD $0xf889 - LONG $0xc8b70f44 - WORD $0xe8c1; BYTE $0x10 - WORD $0xd189 - WORD $0xe183; BYTE $0x1f - CMPQ DX, $0x20 - JAE block_loop_setup - WORD $0x8944; BYTE $0xcf - JMP tail_entry - -block_loop_setup: - SHRQ $0x5, DX - LONG $0xc0ef0f66 - MOVO weights_17_32<>(SB), X1 - MOVO ones_u16<>(SB), X2 - MOVO weights_1_16<>(SB), X3 - LONG $0x8071b841; WORD $0x8007 - -block_outer_loop: - CMPQ DX, $0xad - LONG $0x00adba41; WORD $0x0000 - LONG $0xd2420f4c - WORD $0x8944; BYTE $0xcf - LONG $0xfaaf0f41 - LONG $0xef6e0f66 - LONG $0xe06e0f66 - WORD $0x8944; BYTE $0xd0 - LONG $0xf6ef0f66 - -block_inner_loop: - LONG $0x3e6f0ff3 - LONG $0x6f0f4466; BYTE $0xc7 - LONG $0x04380f66; BYTE $0xf9 - LONG $0xfaf50f66 - LONG $0xfcfe0f66 - LONG $0x666f0ff3; BYTE $0x10 - LONG $0xeefe0f66 - LONG $0xf60f4466; BYTE $0xc0 - LONG $0xfe0f4466; BYTE $0xc6 - LONG $0xf46f0f66 - LONG $0xf0f60f66 - LONG $0xfe0f4166; BYTE $0xf0 - LONG $0x04380f66; BYTE $0xe3 - LONG $0xe2f50f66 - LONG $0xe7fe0f66 - ADDQ $0x20, SI - WORD $0xc8ff - JNE block_inner_loop - LONG $0xf5720f66; BYTE $0x05 - LONG $0xe5fe0f66 - LONG $0xee700f66; BYTE $0xb1 - LONG $0xeefe0f66 - LONG $0xf5700f66; BYTE $0xee - LONG $0xf5fe0f66 - LONG $0xf77e0f66 - WORD $0x0144; BYTE $0xcf - LONG $0xec700f66; BYTE $0xb1 - LONG $0xecfe0f66 - LONG $0xe5700f66; BYTE $0xee - LONG $0xe5fe0f66 - LONG $0xe07e0f66 - MOVQ DI, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00 - WORD $0x2944; BYTE $0xcf - MOVQ AX, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00 - WORD $0x2944; BYTE $0xc8 - WORD $0x8941; BYTE $0xf9 - SUBQ R10, DX - JNE block_outer_loop - -tail_entry: - WORD $0x8548; BYTE $0xc9 - JE return_result - CMPL CX, $0x10 - JB tail_bytes_setup - WORD $0xb60f; BYTE $0x16 - WORD $0xd701 - WORD $0xf801 - LONG $0x0156b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x027eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0356b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x047eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0556b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x067eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0756b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x087eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0956b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x0a7eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0b56b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x0c7eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0d56b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x46b60f44; BYTE $0x0e - WORD $0x0141; BYTE $0xd0 - WORD $0x0144; BYTE $0xc0 - LONG $0x0f7eb60f - WORD $0x0144; BYTE $0xc7 - WORD $0xf801 - ADDQ $-0x10, CX - JE final_reduce - ADDQ $0x10, SI - -tail_bytes_setup: - LEAQ -0x1(CX), DX - MOVQ CX, R9 - ANDQ $0x3, R9 - JE tail_dword_setup - XORL R8, R8 - -tail_byte_loop: - LONG $0x14b60f46; BYTE $0x06 - WORD $0x0144; BYTE $0xd7 - WORD $0xf801 - INCQ R8 - CMPQ R9, R8 - JNE tail_byte_loop - ADDQ R8, SI - SUBQ R8, CX - -tail_dword_setup: - CMPQ DX, $0x3 - JB final_reduce - XORL DX, DX - -tail_dword_loop: - LONG $0x04b60f44; BYTE $0x16 - WORD $0x0141; BYTE $0xf8 - WORD $0x0144; BYTE $0xc0 - LONG $0x167cb60f; BYTE $0x01 - WORD $0x0144; BYTE $0xc7 - WORD $0xf801 - LONG $0x44b60f44; WORD $0x0216 - WORD $0x0141; BYTE $0xf8 - WORD $0x0144; BYTE $0xc0 - LONG $0x167cb60f; BYTE $0x03 - WORD $0x0144; BYTE $0xc7 - WORD $0xf801 - ADDQ $0x4, DX - CMPQ CX, DX - JNE tail_dword_loop - -final_reduce: - LONG $0x000f8f8d; WORD $0xffff - CMPL DI, $0xfff1 - WORD $0x420f; BYTE $0xcf - WORD $0xc289 - LONG $0x078071be; BYTE $0x80 - IMULQ DX, SI - SHRQ $0x2f, SI - LONG $0xfff1d669; WORD $0x0000 - WORD $0xd029 - WORD $0xcf89 - -return_result: - WORD $0xe0c1; BYTE $0x10 - WORD $0xf809 - NOP - NOP - MOVL AX, ret+32(FP) - RET diff --git a/internal/adler32/bench_test.go b/internal/adler32/bench_test.go deleted file mode 100644 index 7744b903..00000000 --- a/internal/adler32/bench_test.go +++ /dev/null @@ -1,22 +0,0 @@ -package adler32 - -import ( - "testing" -) - -const benchmarkSize = 64 * 1024 - -var data = make([]byte, benchmarkSize) - -func init() { - for i := range benchmarkSize { - data[i] = byte(i % 256) - } -} - -func BenchmarkChecksum(b *testing.B) { - b.ReportAllocs() - for range b.N { - Checksum(data) - } -} |
