aboutsummaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorGravatar Runxi Yu2026-05-18 07:19:10 +0000
committerGravatar Runxi Yu2026-05-18 07:19:10 +0000
commit6e77b4491e5fd6536417f843668cd4e27c079528 (patch)
treeea1ce4cffa01a8ee2f82c7892bee7b18944d4660 /internal
parentinternal/cpu: Import back (diff)
signatureNo signature
internal/adler32: Import back
Diffstat (limited to 'internal')
-rw-r--r--internal/adler32/LICENSE30
-rw-r--r--internal/adler32/LICENSE.ZLIB17
-rw-r--r--internal/adler32/README1
-rw-r--r--internal/adler32/adler32_amd64.go83
-rw-r--r--internal/adler32/adler32_avx2.go6
-rw-r--r--internal/adler32/adler32_avx2.s251
-rw-r--r--internal/adler32/adler32_fallback.go16
-rw-r--r--internal/adler32/adler32_generic.go52
-rw-r--r--internal/adler32/bench_test.go26
-rw-r--r--internal/adler32/doc.go2
10 files changed, 484 insertions, 0 deletions
diff --git a/internal/adler32/LICENSE b/internal/adler32/LICENSE
new file mode 100644
index 00000000..5cec357a
--- /dev/null
+++ b/internal/adler32/LICENSE
@@ -0,0 +1,30 @@
+Copyright (c) 2024, Michal Hruby
+Copyright (c) 2017 The Chromium Authors. All rights reserved.
+Copyright (c) 1995-2024 Mark Adler
+Copyright (c) 1995-2024 Jean-loup Gailly
+Copyright (c) 2022 Adam Stylinski
+
+BSD 2-Clause License
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/internal/adler32/LICENSE.ZLIB b/internal/adler32/LICENSE.ZLIB
new file mode 100644
index 00000000..c75c1568
--- /dev/null
+++ b/internal/adler32/LICENSE.ZLIB
@@ -0,0 +1,17 @@
+Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
diff --git a/internal/adler32/README b/internal/adler32/README
new file mode 100644
index 00000000..b80acd00
--- /dev/null
+++ b/internal/adler32/README
@@ -0,0 +1 @@
+This package was mostly copied from github.com/mhr3/adler32-simd.
diff --git a/internal/adler32/adler32_amd64.go b/internal/adler32/adler32_amd64.go
new file mode 100644
index 00000000..952283e2
--- /dev/null
+++ b/internal/adler32/adler32_amd64.go
@@ -0,0 +1,83 @@
+//go:build amd64 && !purego
+
+package adler32
+
+import (
+ "encoding/binary"
+ "errors"
+ "hash"
+ "hash/adler32"
+
+ "codeberg.org/lindenii/furgit/internal/cpu"
+)
+
+// digest represents the partial evaluation of a checksum.
+// The low 16 bits are s1, the high 16 bits are s2.
+type digest uint32
+
+func (d *digest) Reset() { *d = 1 }
+
+// New returns a new hash.Hash32 computing the Adler-32 checksum.
+func New() hash.Hash32 {
+ if !cpu.X86.HasAVX2 {
+ return adler32.New()
+ }
+
+ d := new(digest)
+ d.Reset()
+
+ return d
+}
+
+func (d *digest) MarshalBinary() ([]byte, error) {
+ b := make([]byte, 0, marshaledSize)
+ b = append(b, magic...)
+ b = binary.BigEndian.AppendUint32(b, uint32(*d))
+
+ return b, nil
+}
+
+func (d *digest) UnmarshalBinary(b []byte) error {
+ if len(b) < len(magic) || string(b[:len(magic)]) != magic {
+ return errors.New("internal/adler32: invalid hash state identifier")
+ }
+
+ if len(b) != marshaledSize {
+ return errors.New("internal/adler32: invalid hash state size")
+ }
+
+ *d = digest(binary.BigEndian.Uint32(b[len(magic):]))
+
+ return nil
+}
+
+func (d *digest) Size() int { return Size }
+
+func (d *digest) BlockSize() int { return 4 }
+
+func (d *digest) Write(data []byte) (nn int, err error) {
+ if cpu.X86.HasAVX2 && len(data) >= 64 {
+ h := adler32_avx2(uint32(*d), data)
+ *d = digest(h)
+ } else {
+ h := update(uint32(*d), data)
+ *d = digest(h)
+ }
+
+ return len(data), nil
+}
+
+func (d *digest) Sum32() uint32 { return uint32(*d) }
+
+func (d *digest) Sum(in []byte) []byte {
+ return binary.BigEndian.AppendUint32(in, uint32(*d))
+}
+
+// Checksum returns the Adler-32 checksum of data.
+func Checksum(data []byte) uint32 {
+ if cpu.X86.HasAVX2 && len(data) >= 64 {
+ return adler32_avx2(1, data)
+ }
+
+ return adler32.Checksum(data)
+}
diff --git a/internal/adler32/adler32_avx2.go b/internal/adler32/adler32_avx2.go
new file mode 100644
index 00000000..042812b8
--- /dev/null
+++ b/internal/adler32/adler32_avx2.go
@@ -0,0 +1,6 @@
+//go:build !purego && amd64
+
+package adler32
+
+//go:noescape
+func adler32_avx2(in uint32, buf []byte) uint32
diff --git a/internal/adler32/adler32_avx2.s b/internal/adler32/adler32_avx2.s
new file mode 100644
index 00000000..a883e357
--- /dev/null
+++ b/internal/adler32/adler32_avx2.s
@@ -0,0 +1,251 @@
+//go:build !purego && amd64
+
+#include "textflag.h"
+
+DATA adler32AVX2ByteWeights<>+0x00(SB)/8, $0x191a1b1c1d1e1f20
+DATA adler32AVX2ByteWeights<>+0x08(SB)/8, $0x1112131415161718
+DATA adler32AVX2ByteWeights<>+0x10(SB)/8, $0x090a0b0c0d0e0f10
+DATA adler32AVX2ByteWeights<>+0x18(SB)/8, $0x0102030405060708
+GLOBL adler32AVX2ByteWeights<>(SB), (RODATA|NOPTR), $32
+
+DATA adler32AVX2WordOne<>+0x00(SB)/2, $0x0001
+GLOBL adler32AVX2WordOne<>(SB), (RODATA|NOPTR), $2
+
+TEXT ·adler32_avx2(SB), NOSPLIT, $0-36
+ MOVLQZX in+0(FP), DI
+ MOVQ buf_base+8(FP), SI
+ MOVQ buf_len+16(FP), DX
+ MOVQ buf_cap+24(FP), CX
+ TESTQ SI, SI
+ JE return_one
+ MOVL DI, AX
+ TESTQ DX, DX
+ JE return_current
+ MOVL AX, CX
+ SHRL $0x10, CX
+ MOVWLZX AX, AX
+ CMPQ DX, $0x20
+ JB scalar_unrolled16
+ MOVL $2147975281, DI
+ VPXOR X0, X0, X0
+ VMOVDQA adler32AVX2ByteWeights<>(SB), Y1
+ VPBROADCASTW adler32AVX2WordOne<>(SB), Y2
+ JMP vector_outer
+
+vector_tail_init:
+ VMOVDQA Y4, Y6
+ VPXOR X5, X5, X5
+
+vector_reduce_finalize_chunk:
+ SUBQ AX, DX
+ VPSLLD $0x05, Y5, Y4
+ VPADDD Y3, Y4, Y3
+ VEXTRACTI128 $0x1, Y6, X4
+ VSHUFPS $0x88, X4, X6, X5
+ VPSHUFD $0x88, X4, X4
+ VPADDD X4, X5, X4
+ VPSHUFD $0x55, X4, X5
+ VPADDD X4, X5, X4
+ VMOVD X4, AX
+ MOVQ AX, CX
+ IMULQ DI, CX
+ SHRQ $0x2f, CX
+ IMULL $0xfff1, CX
+ SUBL CX, AX
+ VEXTRACTI128 $0x1, Y3, X4
+ VPADDD X3, X4, X3
+ VPSHUFD $0xee, X3, X4
+ VPADDD X4, X3, X3
+ VPSHUFD $0x55, X3, X4
+ VPADDD X3, X4, X3
+ VMOVD X3, CX
+ MOVQ CX, R8
+ IMULQ DI, R8
+ SHRQ $0x2f, R8
+ IMULL $0xfff1, R8
+ SUBL R8, CX
+ CMPQ DX, $0x1f
+ JBE scalar_entry
+
+vector_outer:
+ VMOVD AX, X4
+ VMOVD CX, X3
+ CMPQ DX, $0x15b0
+ MOVL $0x15b0, R8
+ CMOVQCS DX, R8
+ MOVL R8, AX
+ ANDL $0x1fe0, AX
+ JE vector_tail_init
+ ADDQ $-0x20, R8
+ VPXOR X5, X5, X5
+ TESTL $0x20, R8
+ JNE vector_block32_check
+ VMOVDQU 0(SI), Y5
+ ADDQ $0x20, SI
+ LEAQ -0x20(AX), CX
+ VPSADBW Y0, Y5, Y6
+ VPADDD Y4, Y6, Y6
+ VPMADDUBSW Y1, Y5, Y5
+ VPMADDWD Y2, Y5, Y5
+ VPADDD Y3, Y5, Y3
+ VMOVDQA Y4, Y5
+ VMOVDQA Y6, Y4
+ CMPQ R8, $0x20
+ JAE vector_block64_loop
+ JMP vector_reduce_finalize_chunk
+
+vector_block32_check:
+ MOVQ AX, CX
+ CMPQ R8, $0x20
+ JB vector_reduce_finalize_chunk
+
+vector_block64_loop:
+ VMOVDQU 0(SI), Y6
+ VMOVDQU 0x20(SI), Y7
+ VPSADBW Y0, Y6, Y8
+ VPADDD Y4, Y8, Y8
+ VPADDD Y4, Y5, Y5
+ VPMADDUBSW Y1, Y6, Y4
+ VPMADDWD Y2, Y4, Y4
+ VPADDD Y3, Y4, Y3
+ ADDQ $0x40, SI
+ VPSADBW Y0, Y7, Y4
+ VPADDD Y4, Y8, Y4
+ VPADDD Y5, Y8, Y5
+ VPMADDUBSW Y1, Y7, Y6
+ VPMADDWD Y2, Y6, Y6
+ VPADDD Y3, Y6, Y3
+ ADDQ $-0x40, CX
+ JNE vector_block64_loop
+ VMOVDQA Y4, Y6
+ JMP vector_reduce_finalize_chunk
+
+return_one:
+ MOVL $0x1, AX
+
+return_current:
+ MOVL AX, ret+32(FP)
+ RET
+
+scalar_entry:
+ TESTQ DX, DX
+ JE return_final
+
+scalar_unrolled16:
+ CMPQ DX, $0x10
+ JB scalar_byte_prelude
+ MOVBLZX 0(SI), DI
+ ADDL DI, AX
+ ADDL AX, CX
+ MOVBLZX 0x1(SI), DI
+ ADDL AX, DI
+ ADDL DI, CX
+ MOVBLZX 0x2(SI), AX
+ ADDL DI, AX
+ ADDL AX, CX
+ MOVBLZX 0x3(SI), DI
+ ADDL AX, DI
+ ADDL DI, CX
+ MOVBLZX 0x4(SI), AX
+ ADDL DI, AX
+ ADDL AX, CX
+ MOVBLZX 0x5(SI), DI
+ ADDL AX, DI
+ ADDL DI, CX
+ MOVBLZX 0x6(SI), AX
+ ADDL DI, AX
+ ADDL AX, CX
+ MOVBLZX 0x7(SI), DI
+ ADDL AX, DI
+ ADDL DI, CX
+ MOVBLZX 0x8(SI), AX
+ ADDL DI, AX
+ ADDL AX, CX
+ MOVBLZX 0x9(SI), DI
+ ADDL AX, DI
+ ADDL DI, CX
+ MOVBLZX 0xa(SI), AX
+ ADDL DI, AX
+ ADDL AX, CX
+ MOVBLZX 0xb(SI), DI
+ ADDL AX, DI
+ ADDL DI, CX
+ MOVBLZX 0xc(SI), AX
+ ADDL DI, AX
+ ADDL AX, CX
+ MOVBLZX 0xd(SI), DI
+ ADDL AX, DI
+ ADDL DI, CX
+ MOVBLZX 0xe(SI), R8
+ ADDL DI, R8
+ ADDL R8, CX
+ MOVBLZX 0xf(SI), AX
+ ADDL R8, AX
+ ADDL AX, CX
+ ADDQ $-0x10, DX
+ JE scalar_finalize
+ ADDQ $0x10, SI
+
+scalar_byte_prelude:
+ LEAQ -0x1(DX), DI
+ MOVQ DX, R9
+ ANDQ $0x3, R9
+ JE scalar_dword_prelude
+ XORL R8, R8
+
+scalar_byte_prelude_loop:
+ MOVBLZX 0(SI)(R8*1), R10
+ ADDL R10, AX
+ ADDL AX, CX
+ INCQ R8
+ CMPQ R9, R8
+ JNE scalar_byte_prelude_loop
+ ADDQ R8, SI
+ SUBQ R8, DX
+
+scalar_dword_prelude:
+ CMPQ DI, $0x3
+ JB scalar_finalize
+ XORL DI, DI
+
+scalar_dword_loop:
+ MOVBLZX 0(SI)(DI*1), R8
+ ADDL AX, R8
+ ADDL R8, CX
+ MOVBLZX 0x1(SI)(DI*1), AX
+ ADDL R8, AX
+ ADDL AX, CX
+ MOVBLZX 0x2(SI)(DI*1), R8
+ ADDL AX, R8
+ ADDL R8, CX
+ MOVBLZX 0x3(SI)(DI*1), AX
+ ADDL R8, AX
+ ADDL AX, CX
+ ADDQ $0x4, DI
+ CMPQ DX, DI
+ JNE scalar_dword_loop
+
+scalar_finalize:
+ LEAL -0xfff1(AX), DX
+ CMPL AX, $0xfff1
+ CMOVLCS AX, DX
+ MOVL CX, AX
+ MOVL $2147975281, SI
+ IMULQ AX, SI
+ SHRQ $0x2f, SI
+ MOVL SI, AX
+ IMULL $0xfff1, AX
+ SUBL AX, CX
+ SHLL $0x10, CX
+ ORL DX, CX
+ MOVL CX, AX
+ VZEROUPPER
+ MOVL AX, ret+32(FP)
+ RET
+
+return_final:
+ SHLL $0x10, CX
+ ORL CX, AX
+ VZEROUPPER
+ MOVL AX, ret+32(FP)
+ RET
diff --git a/internal/adler32/adler32_fallback.go b/internal/adler32/adler32_fallback.go
new file mode 100644
index 00000000..47bd1e08
--- /dev/null
+++ b/internal/adler32/adler32_fallback.go
@@ -0,0 +1,16 @@
+//go:build !amd64 || purego
+
+package adler32
+
+import (
+ "hash"
+ "hash/adler32"
+)
+
+// New returns a new hash.Hash32 computing the Adler-32 checksum.
+func New() hash.Hash32 {
+ return adler32.New()
+}
+
+// Checksum returns the Adler-32 checksum of data.
+func Checksum(data []byte) uint32 { return adler32.Checksum(data) }
diff --git a/internal/adler32/adler32_generic.go b/internal/adler32/adler32_generic.go
new file mode 100644
index 00000000..08ab483f
--- /dev/null
+++ b/internal/adler32/adler32_generic.go
@@ -0,0 +1,52 @@
+package adler32
+
+const (
+ // Size of an Adler-32 checksum in bytes.
+ Size = 4
+
+ // mod is the largest prime that is less than 65536.
+ mod = 65521
+ // nmax is the largest n such that
+ // 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1.
+ // It is mentioned in RFC 1950 (search for "5552").
+ nmax = 5552
+
+ // binary representation compatible with standard library.
+ magic = "adl\x01"
+ marshaledSize = len(magic) + 4
+)
+
+// Add p to the running checksum d.
+func update(d uint32, p []byte) uint32 {
+ s1, s2 := d&0xffff, d>>16
+
+ for len(p) > 0 {
+ var q []byte
+ if len(p) > nmax {
+ p, q = p[:nmax], p[nmax:]
+ }
+
+ for len(p) >= 4 {
+ s1 += uint32(p[0])
+ s2 += s1
+ s1 += uint32(p[1])
+ s2 += s1
+ s1 += uint32(p[2])
+ s2 += s1
+ s1 += uint32(p[3])
+ s2 += s1
+ p = p[4:]
+ }
+
+ for _, x := range p {
+ s1 += uint32(x)
+ s2 += s1
+ }
+
+ s1 %= mod
+ s2 %= mod
+ p = q
+ }
+
+ return s2<<16 | s1
+}
diff --git a/internal/adler32/bench_test.go b/internal/adler32/bench_test.go
new file mode 100644
index 00000000..1161221a
--- /dev/null
+++ b/internal/adler32/bench_test.go
@@ -0,0 +1,26 @@
+package adler32_test
+
+import (
+ "testing"
+
+ "codeberg.org/lindenii/furgit/internal/adler32"
+)
+
+const benchmarkSize = 64 * 1024
+
+//nolint:gochecknoglobals
+var data = make([]byte, benchmarkSize)
+
+func init() { //nolint:gochecknoinits
+ for i := range benchmarkSize {
+ data[i] = byte(i % 256)
+ }
+}
+
+func BenchmarkChecksum(b *testing.B) {
+ b.ReportAllocs()
+
+ for b.Loop() {
+ adler32.Checksum(data)
+ }
+}
diff --git a/internal/adler32/doc.go b/internal/adler32/doc.go
new file mode 100644
index 00000000..add30867
--- /dev/null
+++ b/internal/adler32/doc.go
@@ -0,0 +1,2 @@
+// Package adler32 implements an SIMD-optimized Adler-32 checksum.
+package adler32