aboutsummaryrefslogtreecommitdiff
path: root/internal/adler32
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-01-06 13:01:54 +0800
committerGravatar Runxi Yu2026-01-06 13:01:54 +0800
commitba327e76c5b110044ec8ebb75630e79506dfbed8 (patch)
treefdbe157a61a1449093b1b8b3c5ec453f85a523de /internal/adler32
parentTODO: Note to use archsimd (diff)
signatureNo signature
adler32: Drop SSE3 support
Diffstat (limited to 'internal/adler32')
-rw-r--r--internal/adler32/adler32_amd64.go16
-rw-r--r--internal/adler32/adler32_sse3.go6
-rw-r--r--internal/adler32/adler32_sse3.s214
3 files changed, 4 insertions, 232 deletions
diff --git a/internal/adler32/adler32_amd64.go b/internal/adler32/adler32_amd64.go
index 88a854ed..3fbcb4d8 100644
--- a/internal/adler32/adler32_amd64.go
+++ b/internal/adler32/adler32_amd64.go
@@ -15,7 +15,6 @@ import (
const Size = 4
var (
- hasSSE3 = cpu.X86.HasSSE3
hasAVX2 = cpu.X86.HasAVX2
)
@@ -27,7 +26,7 @@ func (d *digest) Reset() { *d = 1 }
// New returns a new hash.Hash32 computing the Adler-32 checksum.
func New() hash.Hash32 {
- if !hasSSE3 {
+ if !hasAVX2 {
return adler32.New()
}
d := new(digest)
@@ -60,11 +59,7 @@ func (d *digest) BlockSize() int { return 4 }
func (d *digest) Write(data []byte) (nn int, err error) {
if len(data) >= 64 {
var h uint32
- if hasAVX2 {
- h = adler32_avx2(uint32(*d), data)
- } else {
- h = adler32_sse3(uint32(*d), data)
- }
+ h = adler32_avx2(uint32(*d), data)
*d = digest(h)
} else {
h := update(uint32(*d), data)
@@ -82,12 +77,9 @@ func (d *digest) Sum(in []byte) []byte {
// Checksum returns the Adler-32 checksum of data.
func Checksum(data []byte) uint32 {
- if !hasSSE3 || len(data) < 64 {
+ if !hasAVX2 || len(data) < 64 {
return update(1, data)
}
- if hasAVX2 {
- return adler32_avx2(1, data)
- }
- return adler32_sse3(1, data)
+ return adler32_avx2(1, data)
}
diff --git a/internal/adler32/adler32_sse3.go b/internal/adler32/adler32_sse3.go
deleted file mode 100644
index 8e8c8a9b..00000000
--- a/internal/adler32/adler32_sse3.go
+++ /dev/null
@@ -1,6 +0,0 @@
-//go:build !purego && amd64
-
-package adler32
-
-//go:noescape
-func adler32_sse3(in uint32, buf []byte) uint32
diff --git a/internal/adler32/adler32_sse3.s b/internal/adler32/adler32_sse3.s
deleted file mode 100644
index 5880bab8..00000000
--- a/internal/adler32/adler32_sse3.s
+++ /dev/null
@@ -1,214 +0,0 @@
-//go:build !purego && amd64
-
-#include "textflag.h"
-
-DATA weights_17_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20
-DATA weights_17_32<>+0x08(SB)/8, $0x1112131415161718
-GLOBL weights_17_32<>(SB), (RODATA|NOPTR), $16
-
-DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001
-DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001
-GLOBL ones_u16<>(SB), (RODATA|NOPTR), $16
-
-DATA weights_1_16<>+0x00(SB)/8, $0x090a0b0c0d0e0f10
-DATA weights_1_16<>+0x08(SB)/8, $0x0102030405060708
-GLOBL weights_1_16<>(SB), (RODATA|NOPTR), $16
-
-TEXT ·adler32_sse3(SB), NOSPLIT, $0-36
- MOVLQZX in+0(FP), DI
- MOVQ buf_base+8(FP), SI
- MOVQ buf_len+16(FP), DX
- MOVQ buf_cap+24(FP), CX
- NOP
- NOP
- NOP
- WORD $0xf889
- LONG $0xc8b70f44
- WORD $0xe8c1; BYTE $0x10
- WORD $0xd189
- WORD $0xe183; BYTE $0x1f
- CMPQ DX, $0x20
- JAE block_loop_setup
- WORD $0x8944; BYTE $0xcf
- JMP tail_entry
-
-block_loop_setup:
- SHRQ $0x5, DX
- LONG $0xc0ef0f66
- MOVO weights_17_32<>(SB), X1
- MOVO ones_u16<>(SB), X2
- MOVO weights_1_16<>(SB), X3
- LONG $0x8071b841; WORD $0x8007
-
-block_outer_loop:
- CMPQ DX, $0xad
- LONG $0x00adba41; WORD $0x0000
- LONG $0xd2420f4c
- WORD $0x8944; BYTE $0xcf
- LONG $0xfaaf0f41
- LONG $0xef6e0f66
- LONG $0xe06e0f66
- WORD $0x8944; BYTE $0xd0
- LONG $0xf6ef0f66
-
-block_inner_loop:
- LONG $0x3e6f0ff3
- LONG $0x6f0f4466; BYTE $0xc7
- LONG $0x04380f66; BYTE $0xf9
- LONG $0xfaf50f66
- LONG $0xfcfe0f66
- LONG $0x666f0ff3; BYTE $0x10
- LONG $0xeefe0f66
- LONG $0xf60f4466; BYTE $0xc0
- LONG $0xfe0f4466; BYTE $0xc6
- LONG $0xf46f0f66
- LONG $0xf0f60f66
- LONG $0xfe0f4166; BYTE $0xf0
- LONG $0x04380f66; BYTE $0xe3
- LONG $0xe2f50f66
- LONG $0xe7fe0f66
- ADDQ $0x20, SI
- WORD $0xc8ff
- JNE block_inner_loop
- LONG $0xf5720f66; BYTE $0x05
- LONG $0xe5fe0f66
- LONG $0xee700f66; BYTE $0xb1
- LONG $0xeefe0f66
- LONG $0xf5700f66; BYTE $0xee
- LONG $0xf5fe0f66
- LONG $0xf77e0f66
- WORD $0x0144; BYTE $0xcf
- LONG $0xec700f66; BYTE $0xb1
- LONG $0xecfe0f66
- LONG $0xe5700f66; BYTE $0xee
- LONG $0xe5fe0f66
- LONG $0xe07e0f66
- MOVQ DI, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00
- WORD $0x2944; BYTE $0xcf
- MOVQ AX, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00
- WORD $0x2944; BYTE $0xc8
- WORD $0x8941; BYTE $0xf9
- SUBQ R10, DX
- JNE block_outer_loop
-
-tail_entry:
- WORD $0x8548; BYTE $0xc9
- JE return_result
- CMPL CX, $0x10
- JB tail_bytes_setup
- WORD $0xb60f; BYTE $0x16
- WORD $0xd701
- WORD $0xf801
- LONG $0x0156b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x027eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0356b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x047eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0556b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x067eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0756b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x087eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0956b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x0a7eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0b56b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x0c7eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0d56b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x46b60f44; BYTE $0x0e
- WORD $0x0141; BYTE $0xd0
- WORD $0x0144; BYTE $0xc0
- LONG $0x0f7eb60f
- WORD $0x0144; BYTE $0xc7
- WORD $0xf801
- ADDQ $-0x10, CX
- JE final_reduce
- ADDQ $0x10, SI
-
-tail_bytes_setup:
- LEAQ -0x1(CX), DX
- MOVQ CX, R9
- ANDQ $0x3, R9
- JE tail_dword_setup
- XORL R8, R8
-
-tail_byte_loop:
- LONG $0x14b60f46; BYTE $0x06
- WORD $0x0144; BYTE $0xd7
- WORD $0xf801
- INCQ R8
- CMPQ R9, R8
- JNE tail_byte_loop
- ADDQ R8, SI
- SUBQ R8, CX
-
-tail_dword_setup:
- CMPQ DX, $0x3
- JB final_reduce
- XORL DX, DX
-
-tail_dword_loop:
- LONG $0x04b60f44; BYTE $0x16
- WORD $0x0141; BYTE $0xf8
- WORD $0x0144; BYTE $0xc0
- LONG $0x167cb60f; BYTE $0x01
- WORD $0x0144; BYTE $0xc7
- WORD $0xf801
- LONG $0x44b60f44; WORD $0x0216
- WORD $0x0141; BYTE $0xf8
- WORD $0x0144; BYTE $0xc0
- LONG $0x167cb60f; BYTE $0x03
- WORD $0x0144; BYTE $0xc7
- WORD $0xf801
- ADDQ $0x4, DX
- CMPQ CX, DX
- JNE tail_dword_loop
-
-final_reduce:
- LONG $0x000f8f8d; WORD $0xffff
- CMPL DI, $0xfff1
- WORD $0x420f; BYTE $0xcf
- WORD $0xc289
- LONG $0x078071be; BYTE $0x80
- IMULQ DX, SI
- SHRQ $0x2f, SI
- LONG $0xfff1d669; WORD $0x0000
- WORD $0xd029
- WORD $0xcf89
-
-return_result:
- WORD $0xe0c1; BYTE $0x10
- WORD $0xf809
- NOP
- NOP
- MOVL AX, ret+32(FP)
- RET