diff options
| author | 2026-05-18 07:19:10 +0000 | |
|---|---|---|
| committer | 2026-05-18 07:19:10 +0000 | |
| commit | 6e77b4491e5fd6536417f843668cd4e27c079528 (patch) | |
| tree | ea1ce4cffa01a8ee2f82c7892bee7b18944d4660 | |
| parent | internal/cpu: Import back (diff) | |
| signature | No signature | |
internal/adler32: Import back
| -rw-r--r-- | internal/adler32/LICENSE | 30 | ||||
| -rw-r--r-- | internal/adler32/LICENSE.ZLIB | 17 | ||||
| -rw-r--r-- | internal/adler32/README | 1 | ||||
| -rw-r--r-- | internal/adler32/adler32_amd64.go | 83 | ||||
| -rw-r--r-- | internal/adler32/adler32_avx2.go | 6 | ||||
| -rw-r--r-- | internal/adler32/adler32_avx2.s | 251 | ||||
| -rw-r--r-- | internal/adler32/adler32_fallback.go | 16 | ||||
| -rw-r--r-- | internal/adler32/adler32_generic.go | 52 | ||||
| -rw-r--r-- | internal/adler32/bench_test.go | 26 | ||||
| -rw-r--r-- | internal/adler32/doc.go | 2 |
10 files changed, 484 insertions, 0 deletions
diff --git a/internal/adler32/LICENSE b/internal/adler32/LICENSE new file mode 100644 index 00000000..5cec357a --- /dev/null +++ b/internal/adler32/LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2024, Michal Hruby +Copyright (c) 2017 The Chromium Authors. All rights reserved. +Copyright (c) 1995-2024 Mark Adler +Copyright (c) 1995-2024 Jean-loup Gailly +Copyright (c) 2022 Adam Stylinski + +BSD 2-Clause License + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/internal/adler32/LICENSE.ZLIB b/internal/adler32/LICENSE.ZLIB new file mode 100644 index 00000000..c75c1568 --- /dev/null +++ b/internal/adler32/LICENSE.ZLIB @@ -0,0 +1,17 @@ +Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. diff --git a/internal/adler32/README b/internal/adler32/README new file mode 100644 index 00000000..b80acd00 --- /dev/null +++ b/internal/adler32/README @@ -0,0 +1 @@ +This package was mostly copied from github.com/mhr3/adler32-simd. diff --git a/internal/adler32/adler32_amd64.go b/internal/adler32/adler32_amd64.go new file mode 100644 index 00000000..952283e2 --- /dev/null +++ b/internal/adler32/adler32_amd64.go @@ -0,0 +1,83 @@ +//go:build amd64 && !purego + +package adler32 + +import ( + "encoding/binary" + "errors" + "hash" + "hash/adler32" + + "codeberg.org/lindenii/furgit/internal/cpu" +) + +// digest represents the partial evaluation of a checksum. +// The low 16 bits are s1, the high 16 bits are s2. +type digest uint32 + +func (d *digest) Reset() { *d = 1 } + +// New returns a new hash.Hash32 computing the Adler-32 checksum. +func New() hash.Hash32 { + if !cpu.X86.HasAVX2 { + return adler32.New() + } + + d := new(digest) + d.Reset() + + return d +} + +func (d *digest) MarshalBinary() ([]byte, error) { + b := make([]byte, 0, marshaledSize) + b = append(b, magic...) + b = binary.BigEndian.AppendUint32(b, uint32(*d)) + + return b, nil +} + +func (d *digest) UnmarshalBinary(b []byte) error { + if len(b) < len(magic) || string(b[:len(magic)]) != magic { + return errors.New("internal/adler32: invalid hash state identifier") + } + + if len(b) != marshaledSize { + return errors.New("internal/adler32: invalid hash state size") + } + + *d = digest(binary.BigEndian.Uint32(b[len(magic):])) + + return nil +} + +func (d *digest) Size() int { return Size } + +func (d *digest) BlockSize() int { return 4 } + +func (d *digest) Write(data []byte) (nn int, err error) { + if cpu.X86.HasAVX2 && len(data) >= 64 { + h := adler32_avx2(uint32(*d), data) + *d = digest(h) + } else { + h := update(uint32(*d), data) + *d = digest(h) + } + + return len(data), nil +} + +func (d *digest) Sum32() uint32 { return uint32(*d) } + +func (d *digest) Sum(in []byte) []byte { + return binary.BigEndian.AppendUint32(in, uint32(*d)) +} + +// Checksum returns the Adler-32 checksum of data. +func Checksum(data []byte) uint32 { + if cpu.X86.HasAVX2 && len(data) >= 64 { + return adler32_avx2(1, data) + } + + return adler32.Checksum(data) +} diff --git a/internal/adler32/adler32_avx2.go b/internal/adler32/adler32_avx2.go new file mode 100644 index 00000000..042812b8 --- /dev/null +++ b/internal/adler32/adler32_avx2.go @@ -0,0 +1,6 @@ +//go:build !purego && amd64 + +package adler32 + +//go:noescape +func adler32_avx2(in uint32, buf []byte) uint32 diff --git a/internal/adler32/adler32_avx2.s b/internal/adler32/adler32_avx2.s new file mode 100644 index 00000000..a883e357 --- /dev/null +++ b/internal/adler32/adler32_avx2.s @@ -0,0 +1,251 @@ +//go:build !purego && amd64 + +#include "textflag.h" + +DATA adler32AVX2ByteWeights<>+0x00(SB)/8, $0x191a1b1c1d1e1f20 +DATA adler32AVX2ByteWeights<>+0x08(SB)/8, $0x1112131415161718 +DATA adler32AVX2ByteWeights<>+0x10(SB)/8, $0x090a0b0c0d0e0f10 +DATA adler32AVX2ByteWeights<>+0x18(SB)/8, $0x0102030405060708 +GLOBL adler32AVX2ByteWeights<>(SB), (RODATA|NOPTR), $32 + +DATA adler32AVX2WordOne<>+0x00(SB)/2, $0x0001 +GLOBL adler32AVX2WordOne<>(SB), (RODATA|NOPTR), $2 + +TEXT ·adler32_avx2(SB), NOSPLIT, $0-36 + MOVLQZX in+0(FP), DI + MOVQ buf_base+8(FP), SI + MOVQ buf_len+16(FP), DX + MOVQ buf_cap+24(FP), CX + TESTQ SI, SI + JE return_one + MOVL DI, AX + TESTQ DX, DX + JE return_current + MOVL AX, CX + SHRL $0x10, CX + MOVWLZX AX, AX + CMPQ DX, $0x20 + JB scalar_unrolled16 + MOVL $2147975281, DI + VPXOR X0, X0, X0 + VMOVDQA adler32AVX2ByteWeights<>(SB), Y1 + VPBROADCASTW adler32AVX2WordOne<>(SB), Y2 + JMP vector_outer + +vector_tail_init: + VMOVDQA Y4, Y6 + VPXOR X5, X5, X5 + +vector_reduce_finalize_chunk: + SUBQ AX, DX + VPSLLD $0x05, Y5, Y4 + VPADDD Y3, Y4, Y3 + VEXTRACTI128 $0x1, Y6, X4 + VSHUFPS $0x88, X4, X6, X5 + VPSHUFD $0x88, X4, X4 + VPADDD X4, X5, X4 + VPSHUFD $0x55, X4, X5 + VPADDD X4, X5, X4 + VMOVD X4, AX + MOVQ AX, CX + IMULQ DI, CX + SHRQ $0x2f, CX + IMULL $0xfff1, CX + SUBL CX, AX + VEXTRACTI128 $0x1, Y3, X4 + VPADDD X3, X4, X3 + VPSHUFD $0xee, X3, X4 + VPADDD X4, X3, X3 + VPSHUFD $0x55, X3, X4 + VPADDD X3, X4, X3 + VMOVD X3, CX + MOVQ CX, R8 + IMULQ DI, R8 + SHRQ $0x2f, R8 + IMULL $0xfff1, R8 + SUBL R8, CX + CMPQ DX, $0x1f + JBE scalar_entry + +vector_outer: + VMOVD AX, X4 + VMOVD CX, X3 + CMPQ DX, $0x15b0 + MOVL $0x15b0, R8 + CMOVQCS DX, R8 + MOVL R8, AX + ANDL $0x1fe0, AX + JE vector_tail_init + ADDQ $-0x20, R8 + VPXOR X5, X5, X5 + TESTL $0x20, R8 + JNE vector_block32_check + VMOVDQU 0(SI), Y5 + ADDQ $0x20, SI + LEAQ -0x20(AX), CX + VPSADBW Y0, Y5, Y6 + VPADDD Y4, Y6, Y6 + VPMADDUBSW Y1, Y5, Y5 + VPMADDWD Y2, Y5, Y5 + VPADDD Y3, Y5, Y3 + VMOVDQA Y4, Y5 + VMOVDQA Y6, Y4 + CMPQ R8, $0x20 + JAE vector_block64_loop + JMP vector_reduce_finalize_chunk + +vector_block32_check: + MOVQ AX, CX + CMPQ R8, $0x20 + JB vector_reduce_finalize_chunk + +vector_block64_loop: + VMOVDQU 0(SI), Y6 + VMOVDQU 0x20(SI), Y7 + VPSADBW Y0, Y6, Y8 + VPADDD Y4, Y8, Y8 + VPADDD Y4, Y5, Y5 + VPMADDUBSW Y1, Y6, Y4 + VPMADDWD Y2, Y4, Y4 + VPADDD Y3, Y4, Y3 + ADDQ $0x40, SI + VPSADBW Y0, Y7, Y4 + VPADDD Y4, Y8, Y4 + VPADDD Y5, Y8, Y5 + VPMADDUBSW Y1, Y7, Y6 + VPMADDWD Y2, Y6, Y6 + VPADDD Y3, Y6, Y3 + ADDQ $-0x40, CX + JNE vector_block64_loop + VMOVDQA Y4, Y6 + JMP vector_reduce_finalize_chunk + +return_one: + MOVL $0x1, AX + +return_current: + MOVL AX, ret+32(FP) + RET + +scalar_entry: + TESTQ DX, DX + JE return_final + +scalar_unrolled16: + CMPQ DX, $0x10 + JB scalar_byte_prelude + MOVBLZX 0(SI), DI + ADDL DI, AX + ADDL AX, CX + MOVBLZX 0x1(SI), DI + ADDL AX, DI + ADDL DI, CX + MOVBLZX 0x2(SI), AX + ADDL DI, AX + ADDL AX, CX + MOVBLZX 0x3(SI), DI + ADDL AX, DI + ADDL DI, CX + MOVBLZX 0x4(SI), AX + ADDL DI, AX + ADDL AX, CX + MOVBLZX 0x5(SI), DI + ADDL AX, DI + ADDL DI, CX + MOVBLZX 0x6(SI), AX + ADDL DI, AX + ADDL AX, CX + MOVBLZX 0x7(SI), DI + ADDL AX, DI + ADDL DI, CX + MOVBLZX 0x8(SI), AX + ADDL DI, AX + ADDL AX, CX + MOVBLZX 0x9(SI), DI + ADDL AX, DI + ADDL DI, CX + MOVBLZX 0xa(SI), AX + ADDL DI, AX + ADDL AX, CX + MOVBLZX 0xb(SI), DI + ADDL AX, DI + ADDL DI, CX + MOVBLZX 0xc(SI), AX + ADDL DI, AX + ADDL AX, CX + MOVBLZX 0xd(SI), DI + ADDL AX, DI + ADDL DI, CX + MOVBLZX 0xe(SI), R8 + ADDL DI, R8 + ADDL R8, CX + MOVBLZX 0xf(SI), AX + ADDL R8, AX + ADDL AX, CX + ADDQ $-0x10, DX + JE scalar_finalize + ADDQ $0x10, SI + +scalar_byte_prelude: + LEAQ -0x1(DX), DI + MOVQ DX, R9 + ANDQ $0x3, R9 + JE scalar_dword_prelude + XORL R8, R8 + +scalar_byte_prelude_loop: + MOVBLZX 0(SI)(R8*1), R10 + ADDL R10, AX + ADDL AX, CX + INCQ R8 + CMPQ R9, R8 + JNE scalar_byte_prelude_loop + ADDQ R8, SI + SUBQ R8, DX + +scalar_dword_prelude: + CMPQ DI, $0x3 + JB scalar_finalize + XORL DI, DI + +scalar_dword_loop: + MOVBLZX 0(SI)(DI*1), R8 + ADDL AX, R8 + ADDL R8, CX + MOVBLZX 0x1(SI)(DI*1), AX + ADDL R8, AX + ADDL AX, CX + MOVBLZX 0x2(SI)(DI*1), R8 + ADDL AX, R8 + ADDL R8, CX + MOVBLZX 0x3(SI)(DI*1), AX + ADDL R8, AX + ADDL AX, CX + ADDQ $0x4, DI + CMPQ DX, DI + JNE scalar_dword_loop + +scalar_finalize: + LEAL -0xfff1(AX), DX + CMPL AX, $0xfff1 + CMOVLCS AX, DX + MOVL CX, AX + MOVL $2147975281, SI + IMULQ AX, SI + SHRQ $0x2f, SI + MOVL SI, AX + IMULL $0xfff1, AX + SUBL AX, CX + SHLL $0x10, CX + ORL DX, CX + MOVL CX, AX + VZEROUPPER + MOVL AX, ret+32(FP) + RET + +return_final: + SHLL $0x10, CX + ORL CX, AX + VZEROUPPER + MOVL AX, ret+32(FP) + RET diff --git a/internal/adler32/adler32_fallback.go b/internal/adler32/adler32_fallback.go new file mode 100644 index 00000000..47bd1e08 --- /dev/null +++ b/internal/adler32/adler32_fallback.go @@ -0,0 +1,16 @@ +//go:build !amd64 || purego + +package adler32 + +import ( + "hash" + "hash/adler32" +) + +// New returns a new hash.Hash32 computing the Adler-32 checksum. +func New() hash.Hash32 { + return adler32.New() +} + +// Checksum returns the Adler-32 checksum of data. +func Checksum(data []byte) uint32 { return adler32.Checksum(data) } diff --git a/internal/adler32/adler32_generic.go b/internal/adler32/adler32_generic.go new file mode 100644 index 00000000..08ab483f --- /dev/null +++ b/internal/adler32/adler32_generic.go @@ -0,0 +1,52 @@ +package adler32 + +const ( + // Size of an Adler-32 checksum in bytes. + Size = 4 + + // mod is the largest prime that is less than 65536. + mod = 65521 + // nmax is the largest n such that + // 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1. + // It is mentioned in RFC 1950 (search for "5552"). + nmax = 5552 + + // binary representation compatible with standard library. + magic = "adl\x01" + marshaledSize = len(magic) + 4 +) + +// Add p to the running checksum d. +func update(d uint32, p []byte) uint32 { + s1, s2 := d&0xffff, d>>16 + + for len(p) > 0 { + var q []byte + if len(p) > nmax { + p, q = p[:nmax], p[nmax:] + } + + for len(p) >= 4 { + s1 += uint32(p[0]) + s2 += s1 + s1 += uint32(p[1]) + s2 += s1 + s1 += uint32(p[2]) + s2 += s1 + s1 += uint32(p[3]) + s2 += s1 + p = p[4:] + } + + for _, x := range p { + s1 += uint32(x) + s2 += s1 + } + + s1 %= mod + s2 %= mod + p = q + } + + return s2<<16 | s1 +} diff --git a/internal/adler32/bench_test.go b/internal/adler32/bench_test.go new file mode 100644 index 00000000..1161221a --- /dev/null +++ b/internal/adler32/bench_test.go @@ -0,0 +1,26 @@ +package adler32_test + +import ( + "testing" + + "codeberg.org/lindenii/furgit/internal/adler32" +) + +const benchmarkSize = 64 * 1024 + +//nolint:gochecknoglobals +var data = make([]byte, benchmarkSize) + +func init() { //nolint:gochecknoinits + for i := range benchmarkSize { + data[i] = byte(i % 256) + } +} + +func BenchmarkChecksum(b *testing.B) { + b.ReportAllocs() + + for b.Loop() { + adler32.Checksum(data) + } +} diff --git a/internal/adler32/doc.go b/internal/adler32/doc.go new file mode 100644 index 00000000..add30867 --- /dev/null +++ b/internal/adler32/doc.go @@ -0,0 +1,2 @@ +// Package adler32 implements an SIMD-optimized Adler-32 checksum. +package adler32 |
