diff options
| author | 2026-01-06 13:01:54 +0800 | |
|---|---|---|
| committer | 2026-01-06 13:01:54 +0800 | |
| commit | ba327e76c5b110044ec8ebb75630e79506dfbed8 (patch) | |
| tree | fdbe157a61a1449093b1b8b3c5ec453f85a523de | |
| parent | TODO: Note to use archsimd (diff) | |
| signature | No signature | |
adler32: Drop SSE3 support
| -rw-r--r-- | internal/adler32/adler32_amd64.go | 16 | ||||
| -rw-r--r-- | internal/adler32/adler32_sse3.go | 6 | ||||
| -rw-r--r-- | internal/adler32/adler32_sse3.s | 214 |
3 files changed, 4 insertions, 232 deletions
diff --git a/internal/adler32/adler32_amd64.go b/internal/adler32/adler32_amd64.go index 88a854ed..3fbcb4d8 100644 --- a/internal/adler32/adler32_amd64.go +++ b/internal/adler32/adler32_amd64.go @@ -15,7 +15,6 @@ import ( const Size = 4 var ( - hasSSE3 = cpu.X86.HasSSE3 hasAVX2 = cpu.X86.HasAVX2 ) @@ -27,7 +26,7 @@ func (d *digest) Reset() { *d = 1 } // New returns a new hash.Hash32 computing the Adler-32 checksum. func New() hash.Hash32 { - if !hasSSE3 { + if !hasAVX2 { return adler32.New() } d := new(digest) @@ -60,11 +59,7 @@ func (d *digest) BlockSize() int { return 4 } func (d *digest) Write(data []byte) (nn int, err error) { if len(data) >= 64 { var h uint32 - if hasAVX2 { - h = adler32_avx2(uint32(*d), data) - } else { - h = adler32_sse3(uint32(*d), data) - } + h = adler32_avx2(uint32(*d), data) *d = digest(h) } else { h := update(uint32(*d), data) @@ -82,12 +77,9 @@ func (d *digest) Sum(in []byte) []byte { // Checksum returns the Adler-32 checksum of data. func Checksum(data []byte) uint32 { - if !hasSSE3 || len(data) < 64 { + if !hasAVX2 || len(data) < 64 { return update(1, data) } - if hasAVX2 { - return adler32_avx2(1, data) - } - return adler32_sse3(1, data) + return adler32_avx2(1, data) } diff --git a/internal/adler32/adler32_sse3.go b/internal/adler32/adler32_sse3.go deleted file mode 100644 index 8e8c8a9b..00000000 --- a/internal/adler32/adler32_sse3.go +++ /dev/null @@ -1,6 +0,0 @@ -//go:build !purego && amd64 - -package adler32 - -//go:noescape -func adler32_sse3(in uint32, buf []byte) uint32 diff --git a/internal/adler32/adler32_sse3.s b/internal/adler32/adler32_sse3.s deleted file mode 100644 index 5880bab8..00000000 --- a/internal/adler32/adler32_sse3.s +++ /dev/null @@ -1,214 +0,0 @@ -//go:build !purego && amd64 - -#include "textflag.h" - -DATA weights_17_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20 -DATA weights_17_32<>+0x08(SB)/8, $0x1112131415161718 -GLOBL weights_17_32<>(SB), (RODATA|NOPTR), $16 - -DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001 -DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001 -GLOBL ones_u16<>(SB), (RODATA|NOPTR), $16 - -DATA weights_1_16<>+0x00(SB)/8, $0x090a0b0c0d0e0f10 -DATA weights_1_16<>+0x08(SB)/8, $0x0102030405060708 -GLOBL weights_1_16<>(SB), (RODATA|NOPTR), $16 - -TEXT ·adler32_sse3(SB), NOSPLIT, $0-36 - MOVLQZX in+0(FP), DI - MOVQ buf_base+8(FP), SI - MOVQ buf_len+16(FP), DX - MOVQ buf_cap+24(FP), CX - NOP - NOP - NOP - WORD $0xf889 - LONG $0xc8b70f44 - WORD $0xe8c1; BYTE $0x10 - WORD $0xd189 - WORD $0xe183; BYTE $0x1f - CMPQ DX, $0x20 - JAE block_loop_setup - WORD $0x8944; BYTE $0xcf - JMP tail_entry - -block_loop_setup: - SHRQ $0x5, DX - LONG $0xc0ef0f66 - MOVO weights_17_32<>(SB), X1 - MOVO ones_u16<>(SB), X2 - MOVO weights_1_16<>(SB), X3 - LONG $0x8071b841; WORD $0x8007 - -block_outer_loop: - CMPQ DX, $0xad - LONG $0x00adba41; WORD $0x0000 - LONG $0xd2420f4c - WORD $0x8944; BYTE $0xcf - LONG $0xfaaf0f41 - LONG $0xef6e0f66 - LONG $0xe06e0f66 - WORD $0x8944; BYTE $0xd0 - LONG $0xf6ef0f66 - -block_inner_loop: - LONG $0x3e6f0ff3 - LONG $0x6f0f4466; BYTE $0xc7 - LONG $0x04380f66; BYTE $0xf9 - LONG $0xfaf50f66 - LONG $0xfcfe0f66 - LONG $0x666f0ff3; BYTE $0x10 - LONG $0xeefe0f66 - LONG $0xf60f4466; BYTE $0xc0 - LONG $0xfe0f4466; BYTE $0xc6 - LONG $0xf46f0f66 - LONG $0xf0f60f66 - LONG $0xfe0f4166; BYTE $0xf0 - LONG $0x04380f66; BYTE $0xe3 - LONG $0xe2f50f66 - LONG $0xe7fe0f66 - ADDQ $0x20, SI - WORD $0xc8ff - JNE block_inner_loop - LONG $0xf5720f66; BYTE $0x05 - LONG $0xe5fe0f66 - LONG $0xee700f66; BYTE $0xb1 - LONG $0xeefe0f66 - LONG $0xf5700f66; BYTE $0xee - LONG $0xf5fe0f66 - LONG $0xf77e0f66 - WORD $0x0144; BYTE $0xcf - LONG $0xec700f66; BYTE $0xb1 - LONG $0xecfe0f66 - LONG $0xe5700f66; BYTE $0xee - LONG $0xe5fe0f66 - LONG $0xe07e0f66 - MOVQ DI, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00 - WORD $0x2944; BYTE $0xcf - MOVQ AX, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00 - WORD $0x2944; BYTE $0xc8 - WORD $0x8941; BYTE $0xf9 - SUBQ R10, DX - JNE block_outer_loop - -tail_entry: - WORD $0x8548; BYTE $0xc9 - JE return_result - CMPL CX, $0x10 - JB tail_bytes_setup - WORD $0xb60f; BYTE $0x16 - WORD $0xd701 - WORD $0xf801 - LONG $0x0156b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x027eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0356b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x047eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0556b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x067eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0756b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x087eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0956b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x0a7eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0b56b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x0c7eb60f - WORD $0xd701 - WORD $0xf801 - LONG $0x0d56b60f - WORD $0xfa01 - WORD $0xd001 - LONG $0x46b60f44; BYTE $0x0e - WORD $0x0141; BYTE $0xd0 - WORD $0x0144; BYTE $0xc0 - LONG $0x0f7eb60f - WORD $0x0144; BYTE $0xc7 - WORD $0xf801 - ADDQ $-0x10, CX - JE final_reduce - ADDQ $0x10, SI - -tail_bytes_setup: - LEAQ -0x1(CX), DX - MOVQ CX, R9 - ANDQ $0x3, R9 - JE tail_dword_setup - XORL R8, R8 - -tail_byte_loop: - LONG $0x14b60f46; BYTE $0x06 - WORD $0x0144; BYTE $0xd7 - WORD $0xf801 - INCQ R8 - CMPQ R9, R8 - JNE tail_byte_loop - ADDQ R8, SI - SUBQ R8, CX - -tail_dword_setup: - CMPQ DX, $0x3 - JB final_reduce - XORL DX, DX - -tail_dword_loop: - LONG $0x04b60f44; BYTE $0x16 - WORD $0x0141; BYTE $0xf8 - WORD $0x0144; BYTE $0xc0 - LONG $0x167cb60f; BYTE $0x01 - WORD $0x0144; BYTE $0xc7 - WORD $0xf801 - LONG $0x44b60f44; WORD $0x0216 - WORD $0x0141; BYTE $0xf8 - WORD $0x0144; BYTE $0xc0 - LONG $0x167cb60f; BYTE $0x03 - WORD $0x0144; BYTE $0xc7 - WORD $0xf801 - ADDQ $0x4, DX - CMPQ CX, DX - JNE tail_dword_loop - -final_reduce: - LONG $0x000f8f8d; WORD $0xffff - CMPL DI, $0xfff1 - WORD $0x420f; BYTE $0xcf - WORD $0xc289 - LONG $0x078071be; BYTE $0x80 - IMULQ DX, SI - SHRQ $0x2f, SI - LONG $0xfff1d669; WORD $0x0000 - WORD $0xd029 - WORD $0xcf89 - -return_result: - WORD $0xe0c1; BYTE $0x10 - WORD $0xf809 - NOP - NOP - MOVL AX, ret+32(FP) - RET |
