mirror of
https://github.com/cloudflare/cloudflared.git
synced 2025-07-29 15:39:58 +00:00
RTG-1339 Support post-quantum hybrid key exchange
Func spec: https://wiki.cfops.it/x/ZcBKHw
This commit is contained in:

committed by
Devin Carr

parent
3e0ff3a771
commit
11cbff4ff7
149
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x.go
generated
vendored
Normal file
149
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x.go
generated
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
// Package keccakf1600 provides a two and four-way Keccak-f[1600] permutation in parallel.
|
||||
//
|
||||
// Keccak-f[1600] is the permutation underlying several algorithms such as
|
||||
// Keccak, SHA3 and SHAKE. Running two or four permutations in parallel is
|
||||
// useful in some scenarios like in hash-based signatures.
|
||||
//
|
||||
// # Limitations
|
||||
//
|
||||
// Note that not all the architectures support SIMD instructions. This package
|
||||
// uses AVX2 instructions that are available in some AMD64 architectures
|
||||
// and NEON instructions that are available in some ARM64 architectures.
|
||||
//
|
||||
// For those systems not supporting these, the package still provides the
|
||||
// expected functionality by means of a generic and slow implementation.
|
||||
// The recommendation is to beforehand verify IsEnabledX4() and IsEnabledX2()
|
||||
// to determine if the current system supports the SIMD implementation.
|
||||
package keccakf1600
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
|
||||
"github.com/cloudflare/circl/internal/sha3"
|
||||
"golang.org/x/sys/cpu"
|
||||
)
|
||||
|
||||
// StateX4 contains state for the four-way permutation including the four
|
||||
// interleaved [25]uint64 buffers. Call Initialize() before use to initialize
|
||||
// and get a pointer to the interleaved buffer.
|
||||
type StateX4 struct {
|
||||
// Go guarantees a to be aligned on 8 bytes, whereas we need it to be
|
||||
// aligned on 32 bytes for bet performance. Thus we leave some headroom
|
||||
// to be able to move the start of the state.
|
||||
|
||||
// 4 x 25 uint64s for the interleaved states and three uint64s headroom
|
||||
// to fix alignment.
|
||||
a [103]uint64
|
||||
|
||||
// Offset into a that is 32 byte aligned.
|
||||
offset int
|
||||
}
|
||||
|
||||
// StateX2 contains state for the two-way permutation including the two
|
||||
// interleaved [25]uint64 buffers. Call Initialize() before use to initialize
|
||||
// and get a pointer to the interleaved buffer.
|
||||
type StateX2 struct {
|
||||
// Go guarantees a to be aligned on 8 bytes, whereas we need it to be
|
||||
// aligned on 32 bytes for bet performance. Thus we leave some headroom
|
||||
// to be able to move the start of the state.
|
||||
|
||||
// 2 x 25 uint64s for the interleaved states and three uint64s headroom
|
||||
// to fix alignment.
|
||||
a [53]uint64
|
||||
|
||||
// Offset into a that is 32 byte aligned.
|
||||
offset int
|
||||
}
|
||||
|
||||
// IsEnabledX4 returns true if the architecture supports a four-way SIMD
|
||||
// implementation provided in this package.
|
||||
func IsEnabledX4() bool { return cpu.X86.HasAVX2 }
|
||||
|
||||
// IsEnabledX2 returns true if the architecture supports a two-way SIMD
|
||||
// implementation provided in this package.
|
||||
func IsEnabledX2() bool {
|
||||
// After Go 1.16 the flag cpu.ARM64.HasSHA3 is no longer exposed.
|
||||
return false
|
||||
}
|
||||
|
||||
// Initialize the state and returns the buffer on which the four permutations
|
||||
// will act: a uint64 slice of length 100. The first permutation will act
|
||||
// on {a[0], a[4], ..., a[96]}, the second on {a[1], a[5], ..., a[97]}, etc.
|
||||
func (s *StateX4) Initialize() []uint64 {
|
||||
rp := unsafe.Pointer(&s.a[0])
|
||||
|
||||
// uint64s are always aligned by a multiple of 8. Compute the remainder
|
||||
// of the address modulo 32 divided by 8.
|
||||
rem := (int(uintptr(rp)&31) >> 3)
|
||||
|
||||
if rem != 0 {
|
||||
s.offset = 4 - rem
|
||||
}
|
||||
|
||||
// The slice we return will be aligned on 32 byte boundary.
|
||||
return s.a[s.offset : s.offset+100]
|
||||
}
|
||||
|
||||
// Initialize the state and returns the buffer on which the two permutations
|
||||
// will act: a uint64 slice of length 50. The first permutation will act
|
||||
// on {a[0], a[2], ..., a[48]} and the second on {a[1], a[3], ..., a[49]}.
|
||||
func (s *StateX2) Initialize() []uint64 {
|
||||
rp := unsafe.Pointer(&s.a[0])
|
||||
|
||||
// uint64s are always aligned by a multiple of 8. Compute the remainder
|
||||
// of the address modulo 32 divided by 8.
|
||||
rem := (int(uintptr(rp)&31) >> 3)
|
||||
|
||||
if rem != 0 {
|
||||
s.offset = 4 - rem
|
||||
}
|
||||
|
||||
// The slice we return will be aligned on 32 byte boundary.
|
||||
return s.a[s.offset : s.offset+50]
|
||||
}
|
||||
|
||||
// Permute performs the four parallel Keccak-f[1600]s interleaved on the slice
|
||||
// returned from Initialize().
|
||||
func (s *StateX4) Permute() {
|
||||
if IsEnabledX4() {
|
||||
permuteSIMDx4(s.a[s.offset:])
|
||||
} else {
|
||||
permuteScalarX4(s.a[s.offset:]) // A slower generic implementation.
|
||||
}
|
||||
}
|
||||
|
||||
// Permute performs the two parallel Keccak-f[1600]s interleaved on the slice
|
||||
// returned from Initialize().
|
||||
func (s *StateX2) Permute() {
|
||||
if IsEnabledX2() {
|
||||
permuteSIMDx2(s.a[s.offset:])
|
||||
} else {
|
||||
permuteScalarX2(s.a[s.offset:]) // A slower generic implementation.
|
||||
}
|
||||
}
|
||||
|
||||
func permuteScalarX4(a []uint64) {
|
||||
var buf [25]uint64
|
||||
for i := 0; i < 4; i++ {
|
||||
for j := 0; j < 25; j++ {
|
||||
buf[j] = a[4*j+i]
|
||||
}
|
||||
sha3.KeccakF1600(&buf)
|
||||
for j := 0; j < 25; j++ {
|
||||
a[4*j+i] = buf[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func permuteScalarX2(a []uint64) {
|
||||
var buf [25]uint64
|
||||
for i := 0; i < 2; i++ {
|
||||
for j := 0; j < 25; j++ {
|
||||
buf[j] = a[2*j+i]
|
||||
}
|
||||
sha3.KeccakF1600(&buf)
|
||||
for j := 0; j < 25; j++ {
|
||||
a[2*j+i] = buf[j]
|
||||
}
|
||||
}
|
||||
}
|
13
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x2_arm64.go
generated
vendored
Normal file
13
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x2_arm64.go
generated
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
//go:build arm64 && go1.16
|
||||
// +build arm64,go1.16
|
||||
|
||||
package keccakf1600
|
||||
|
||||
import "github.com/cloudflare/circl/internal/sha3"
|
||||
|
||||
func permuteSIMDx2(state []uint64) { f1600x2ARM(&state[0], &sha3.RC) }
|
||||
|
||||
func permuteSIMDx4(state []uint64) { permuteScalarX4(state) }
|
||||
|
||||
//go:noescape
|
||||
func f1600x2ARM(state *uint64, rc *[24]uint64)
|
130
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x2_arm64.s
generated
vendored
Normal file
130
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x2_arm64.s
generated
vendored
Normal file
@@ -0,0 +1,130 @@
|
||||
// +build arm64,go1.16
|
||||
|
||||
// Taken from https://github.com/bwesterb/armed-keccak
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func f1600x2ARM(state *uint64, rc *[24]uint64)
|
||||
TEXT ·f1600x2ARM(SB), NOSPLIT, $0-16
|
||||
MOVD state+0(FP), R0
|
||||
MOVD rc+8(FP), R1
|
||||
MOVD R0, R2
|
||||
MOVD $24, R3
|
||||
|
||||
VLD1.P 64(R0), [ V0.B16, V1.B16, V2.B16, V3.B16]
|
||||
VLD1.P 64(R0), [ V4.B16, V5.B16, V6.B16, V7.B16]
|
||||
VLD1.P 64(R0), [ V8.B16, V9.B16, V10.B16, V11.B16]
|
||||
VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
|
||||
VLD1.P 64(R0), [V16.B16, V17.B16, V18.B16, V19.B16]
|
||||
VLD1.P 64(R0), [V20.B16, V21.B16, V22.B16, V23.B16]
|
||||
VLD1.P (R0), [V24.B16]
|
||||
|
||||
loop:
|
||||
// Execute theta but without xorring into the state yet.
|
||||
VEOR3 V10.B16, V5.B16, V0.B16, V25.B16
|
||||
VEOR3 V11.B16, V6.B16, V1.B16, V26.B16
|
||||
VEOR3 V12.B16, V7.B16, V2.B16, V27.B16
|
||||
VEOR3 V13.B16, V8.B16, V3.B16, V28.B16
|
||||
VEOR3 V14.B16, V9.B16, V4.B16, V29.B16
|
||||
|
||||
VEOR3 V20.B16, V15.B16, V25.B16, V25.B16
|
||||
VEOR3 V21.B16, V16.B16, V26.B16, V26.B16
|
||||
VEOR3 V22.B16, V17.B16, V27.B16, V27.B16
|
||||
VEOR3 V23.B16, V18.B16, V28.B16, V28.B16
|
||||
VEOR3 V24.B16, V19.B16, V29.B16, V29.B16
|
||||
|
||||
// Xor parities from step theta into the state at the same time as
|
||||
// exeuting rho and pi.
|
||||
VRAX1 V26.D2, V29.D2, V30.D2
|
||||
VRAX1 V29.D2, V27.D2, V29.D2
|
||||
VRAX1 V27.D2, V25.D2, V27.D2
|
||||
VRAX1 V25.D2, V28.D2, V25.D2
|
||||
VRAX1 V28.D2, V26.D2, V28.D2
|
||||
|
||||
VEOR V30.B16, V0.B16, V0.B16
|
||||
VMOV V1.B16, V31.B16
|
||||
|
||||
VXAR $20, V27.D2, V6.D2, V1.D2
|
||||
VXAR $44, V25.D2, V9.D2, V6.D2
|
||||
VXAR $3 , V28.D2, V22.D2, V9.D2
|
||||
VXAR $25, V25.D2, V14.D2, V22.D2
|
||||
VXAR $46, V30.D2, V20.D2, V14.D2
|
||||
VXAR $2 , V28.D2, V2.D2, V20.D2
|
||||
VXAR $21, V28.D2, V12.D2, V2.D2
|
||||
VXAR $39, V29.D2, V13.D2, V12.D2
|
||||
VXAR $56, V25.D2, V19.D2, V13.D2
|
||||
VXAR $8 , V29.D2, V23.D2, V19.D2
|
||||
VXAR $23, V30.D2, V15.D2, V23.D2
|
||||
VXAR $37, V25.D2, V4.D2, V15.D2
|
||||
VXAR $50, V25.D2, V24.D2, V4.D2
|
||||
VXAR $62, V27.D2, V21.D2, V24.D2
|
||||
VXAR $9 , V29.D2, V8.D2, V21.D2
|
||||
VXAR $19, V27.D2, V16.D2, V8.D2
|
||||
VXAR $28, V30.D2, V5.D2, V16.D2
|
||||
VXAR $36, V29.D2, V3.D2, V5.D2
|
||||
VXAR $43, V29.D2, V18.D2, V3.D2
|
||||
VXAR $49, V28.D2, V17.D2, V18.D2
|
||||
VXAR $54, V27.D2, V11.D2, V17.D2
|
||||
VXAR $58, V28.D2, V7.D2, V11.D2
|
||||
VXAR $61, V30.D2, V10.D2, V7.D2
|
||||
VXAR $63, V27.D2, V31.D2, V10.D2
|
||||
|
||||
// Chi
|
||||
VBCAX V1.B16, V2.B16, V0.B16, V25.B16
|
||||
VBCAX V2.B16, V3.B16, V1.B16, V26.B16
|
||||
VBCAX V3.B16, V4.B16, V2.B16, V2.B16
|
||||
VBCAX V4.B16, V0.B16, V3.B16, V3.B16
|
||||
VBCAX V0.B16, V1.B16, V4.B16, V4.B16
|
||||
VMOV V25.B16, V0.B16
|
||||
VMOV V26.B16, V1.B16
|
||||
|
||||
VBCAX V6.B16, V7.B16, V5.B16, V25.B16
|
||||
VBCAX V7.B16, V8.B16, V6.B16, V26.B16
|
||||
VBCAX V8.B16, V9.B16, V7.B16, V7.B16
|
||||
VBCAX V9.B16, V5.B16, V8.B16, V8.B16
|
||||
VBCAX V5.B16, V6.B16, V9.B16, V9.B16
|
||||
VMOV V25.B16, V5.B16
|
||||
VMOV V26.B16, V6.B16
|
||||
|
||||
VBCAX V11.B16, V12.B16, V10.B16, V25.B16
|
||||
VBCAX V12.B16, V13.B16, V11.B16, V26.B16
|
||||
VBCAX V13.B16, V14.B16, V12.B16, V12.B16
|
||||
VBCAX V14.B16, V10.B16, V13.B16, V13.B16
|
||||
VBCAX V10.B16, V11.B16, V14.B16, V14.B16
|
||||
VMOV V25.B16, V10.B16
|
||||
VMOV V26.B16, V11.B16
|
||||
|
||||
VBCAX V16.B16, V17.B16, V15.B16, V25.B16
|
||||
VBCAX V17.B16, V18.B16, V16.B16, V26.B16
|
||||
VBCAX V18.B16, V19.B16, V17.B16, V17.B16
|
||||
VBCAX V19.B16, V15.B16, V18.B16, V18.B16
|
||||
VBCAX V15.B16, V16.B16, V19.B16, V19.B16
|
||||
VMOV V25.B16, V15.B16
|
||||
VMOV V26.B16, V16.B16
|
||||
|
||||
VBCAX V21.B16, V22.B16, V20.B16, V25.B16
|
||||
VBCAX V22.B16, V23.B16, V21.B16, V26.B16
|
||||
VBCAX V23.B16, V24.B16, V22.B16, V22.B16
|
||||
VBCAX V24.B16, V20.B16, V23.B16, V23.B16
|
||||
VBCAX V20.B16, V21.B16, V24.B16, V24.B16
|
||||
VMOV V25.B16, V20.B16
|
||||
VMOV V26.B16, V21.B16
|
||||
|
||||
// Iota
|
||||
VLD1R.P 8(R1), [V25.D2]
|
||||
VEOR V25.B16, V0.B16, V0.B16
|
||||
|
||||
SUBS $1, R3, R3
|
||||
CBNZ R3, loop
|
||||
|
||||
MOVD R2, R0
|
||||
|
||||
VST1.P [ V0.B16, V1.B16, V2.B16, V3.B16], 64(R0)
|
||||
VST1.P [ V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
|
||||
VST1.P [ V8.B16, V9.B16, V10.B16, V11.B16], 64(R0)
|
||||
VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R0)
|
||||
VST1.P [V16.B16, V17.B16, V18.B16, V19.B16], 64(R0)
|
||||
VST1.P [V20.B16, V21.B16, V22.B16, V23.B16], 64(R0)
|
||||
VST1.P [V24.B16], (R0)
|
||||
|
||||
RET
|
7
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x4_amd64.go
generated
vendored
Normal file
7
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x4_amd64.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
package keccakf1600
|
||||
|
||||
import "github.com/cloudflare/circl/internal/sha3"
|
||||
|
||||
func permuteSIMDx4(state []uint64) { f1600x4AVX2(&state[0], &sha3.RC) }
|
||||
|
||||
func permuteSIMDx2(state []uint64) { permuteScalarX2(state) }
|
894
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x4_amd64.s
generated
vendored
Normal file
894
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x4_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,894 @@
|
||||
// Code generated by command: go run src.go -out ../../f1600x4_amd64.s -stubs ../../f1600x4stubs_amd64.go -pkg keccakf1600. DO NOT EDIT.
|
||||
|
||||
// +build amd64
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func f1600x4AVX2(state *uint64, rc *[24]uint64)
|
||||
// Requires: AVX, AVX2
|
||||
TEXT ·f1600x4AVX2(SB), NOSPLIT, $0-16
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ rc+8(FP), CX
|
||||
MOVQ $0x0000000000000006, DX
|
||||
|
||||
loop:
|
||||
VMOVDQA (AX), Y0
|
||||
VMOVDQA 32(AX), Y1
|
||||
VMOVDQA 64(AX), Y2
|
||||
VMOVDQA 96(AX), Y3
|
||||
VMOVDQA 128(AX), Y4
|
||||
VPXOR 160(AX), Y0, Y0
|
||||
VPXOR 192(AX), Y1, Y1
|
||||
VPXOR 224(AX), Y2, Y2
|
||||
VPXOR 256(AX), Y3, Y3
|
||||
VPXOR 288(AX), Y4, Y4
|
||||
VPXOR 320(AX), Y0, Y0
|
||||
VPXOR 352(AX), Y1, Y1
|
||||
VPXOR 384(AX), Y2, Y2
|
||||
VPXOR 416(AX), Y3, Y3
|
||||
VPXOR 448(AX), Y4, Y4
|
||||
VPXOR 480(AX), Y0, Y0
|
||||
VPXOR 512(AX), Y1, Y1
|
||||
VPXOR 544(AX), Y2, Y2
|
||||
VPXOR 576(AX), Y3, Y3
|
||||
VPXOR 608(AX), Y4, Y4
|
||||
VPXOR 640(AX), Y0, Y0
|
||||
VPXOR 672(AX), Y1, Y1
|
||||
VPXOR 704(AX), Y2, Y2
|
||||
VPXOR 736(AX), Y3, Y3
|
||||
VPXOR 768(AX), Y4, Y4
|
||||
VPSLLQ $0x01, Y1, Y5
|
||||
VPSLLQ $0x01, Y2, Y6
|
||||
VPSLLQ $0x01, Y3, Y7
|
||||
VPSLLQ $0x01, Y4, Y8
|
||||
VPSLLQ $0x01, Y0, Y9
|
||||
VPSRLQ $0x3f, Y1, Y10
|
||||
VPSRLQ $0x3f, Y2, Y11
|
||||
VPSRLQ $0x3f, Y3, Y12
|
||||
VPSRLQ $0x3f, Y4, Y13
|
||||
VPSRLQ $0x3f, Y0, Y14
|
||||
VPOR Y5, Y10, Y10
|
||||
VPOR Y6, Y11, Y11
|
||||
VPOR Y7, Y12, Y12
|
||||
VPOR Y8, Y13, Y13
|
||||
VPOR Y9, Y14, Y14
|
||||
VPXOR Y10, Y4, Y10
|
||||
VPXOR Y11, Y0, Y11
|
||||
VPXOR Y12, Y1, Y12
|
||||
VPXOR Y13, Y2, Y13
|
||||
VPXOR Y14, Y3, Y14
|
||||
VPXOR (AX), Y10, Y0
|
||||
VPXOR 192(AX), Y11, Y1
|
||||
VPXOR 384(AX), Y12, Y2
|
||||
VPXOR 576(AX), Y13, Y3
|
||||
VPXOR 768(AX), Y14, Y4
|
||||
VPSLLQ $0x2c, Y1, Y6
|
||||
VPSLLQ $0x2b, Y2, Y7
|
||||
VPSLLQ $0x15, Y3, Y8
|
||||
VPSLLQ $0x0e, Y4, Y9
|
||||
VPSRLQ $0x14, Y1, Y1
|
||||
VPSRLQ $0x15, Y2, Y2
|
||||
VPSRLQ $0x2b, Y3, Y3
|
||||
VPSRLQ $0x32, Y4, Y4
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VPBROADCASTQ (CX), Y0
|
||||
VPXOR Y0, Y5, Y5
|
||||
VMOVDQA Y5, (AX)
|
||||
VMOVDQA Y6, 192(AX)
|
||||
VMOVDQA Y7, 384(AX)
|
||||
VMOVDQA Y8, 576(AX)
|
||||
VMOVDQA Y9, 768(AX)
|
||||
VPXOR 96(AX), Y13, Y0
|
||||
VPXOR 288(AX), Y14, Y1
|
||||
VPXOR 320(AX), Y10, Y2
|
||||
VPXOR 512(AX), Y11, Y3
|
||||
VPXOR 704(AX), Y12, Y4
|
||||
VPSLLQ $0x1c, Y0, Y5
|
||||
VPSLLQ $0x14, Y1, Y6
|
||||
VPSLLQ $0x03, Y2, Y7
|
||||
VPSLLQ $0x2d, Y3, Y8
|
||||
VPSLLQ $0x3d, Y4, Y9
|
||||
VPSRLQ $0x24, Y0, Y0
|
||||
VPSRLQ $0x2c, Y1, Y1
|
||||
VPSRLQ $0x3d, Y2, Y2
|
||||
VPSRLQ $0x13, Y3, Y3
|
||||
VPSRLQ $0x03, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 320(AX)
|
||||
VMOVDQA Y6, 512(AX)
|
||||
VMOVDQA Y7, 704(AX)
|
||||
VMOVDQA Y8, 96(AX)
|
||||
VMOVDQA Y9, 288(AX)
|
||||
VPXOR 32(AX), Y11, Y0
|
||||
VPXOR 224(AX), Y12, Y1
|
||||
VPXOR 416(AX), Y13, Y2
|
||||
VPXOR 608(AX), Y14, Y3
|
||||
VPXOR 640(AX), Y10, Y4
|
||||
VPSLLQ $0x01, Y0, Y5
|
||||
VPSLLQ $0x06, Y1, Y6
|
||||
VPSLLQ $0x19, Y2, Y7
|
||||
VPSLLQ $0x08, Y3, Y8
|
||||
VPSLLQ $0x12, Y4, Y9
|
||||
VPSRLQ $0x3f, Y0, Y0
|
||||
VPSRLQ $0x3a, Y1, Y1
|
||||
VPSRLQ $0x27, Y2, Y2
|
||||
VPSRLQ $0x38, Y3, Y3
|
||||
VPSRLQ $0x2e, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 640(AX)
|
||||
VMOVDQA Y6, 32(AX)
|
||||
VMOVDQA Y7, 224(AX)
|
||||
VMOVDQA Y8, 416(AX)
|
||||
VMOVDQA Y9, 608(AX)
|
||||
VPXOR 128(AX), Y14, Y0
|
||||
VPXOR 160(AX), Y10, Y1
|
||||
VPXOR 352(AX), Y11, Y2
|
||||
VPXOR 544(AX), Y12, Y3
|
||||
VPXOR 736(AX), Y13, Y4
|
||||
VPSLLQ $0x1b, Y0, Y5
|
||||
VPSLLQ $0x24, Y1, Y6
|
||||
VPSLLQ $0x0a, Y2, Y7
|
||||
VPSLLQ $0x0f, Y3, Y8
|
||||
VPSLLQ $0x38, Y4, Y9
|
||||
VPSRLQ $0x25, Y0, Y0
|
||||
VPSRLQ $0x1c, Y1, Y1
|
||||
VPSRLQ $0x36, Y2, Y2
|
||||
VPSRLQ $0x31, Y3, Y3
|
||||
VPSRLQ $0x08, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 160(AX)
|
||||
VMOVDQA Y6, 352(AX)
|
||||
VMOVDQA Y7, 544(AX)
|
||||
VMOVDQA Y8, 736(AX)
|
||||
VMOVDQA Y9, 128(AX)
|
||||
VPXOR 64(AX), Y12, Y0
|
||||
VPXOR 256(AX), Y13, Y1
|
||||
VPXOR 448(AX), Y14, Y2
|
||||
VPXOR 480(AX), Y10, Y3
|
||||
VPXOR 672(AX), Y11, Y4
|
||||
VPSLLQ $0x3e, Y0, Y5
|
||||
VPSLLQ $0x37, Y1, Y6
|
||||
VPSLLQ $0x27, Y2, Y7
|
||||
VPSLLQ $0x29, Y3, Y8
|
||||
VPSLLQ $0x02, Y4, Y9
|
||||
VPSRLQ $0x02, Y0, Y0
|
||||
VPSRLQ $0x09, Y1, Y1
|
||||
VPSRLQ $0x19, Y2, Y2
|
||||
VPSRLQ $0x17, Y3, Y3
|
||||
VPSRLQ $0x3e, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 480(AX)
|
||||
VMOVDQA Y6, 672(AX)
|
||||
VMOVDQA Y7, 64(AX)
|
||||
VMOVDQA Y8, 256(AX)
|
||||
VMOVDQA Y9, 448(AX)
|
||||
VMOVDQA (AX), Y0
|
||||
VMOVDQA 32(AX), Y1
|
||||
VMOVDQA 64(AX), Y2
|
||||
VMOVDQA 96(AX), Y3
|
||||
VMOVDQA 128(AX), Y4
|
||||
VPXOR 160(AX), Y0, Y0
|
||||
VPXOR 192(AX), Y1, Y1
|
||||
VPXOR 224(AX), Y2, Y2
|
||||
VPXOR 256(AX), Y3, Y3
|
||||
VPXOR 288(AX), Y4, Y4
|
||||
VPXOR 320(AX), Y0, Y0
|
||||
VPXOR 352(AX), Y1, Y1
|
||||
VPXOR 384(AX), Y2, Y2
|
||||
VPXOR 416(AX), Y3, Y3
|
||||
VPXOR 448(AX), Y4, Y4
|
||||
VPXOR 480(AX), Y0, Y0
|
||||
VPXOR 512(AX), Y1, Y1
|
||||
VPXOR 544(AX), Y2, Y2
|
||||
VPXOR 576(AX), Y3, Y3
|
||||
VPXOR 608(AX), Y4, Y4
|
||||
VPXOR 640(AX), Y0, Y0
|
||||
VPXOR 672(AX), Y1, Y1
|
||||
VPXOR 704(AX), Y2, Y2
|
||||
VPXOR 736(AX), Y3, Y3
|
||||
VPXOR 768(AX), Y4, Y4
|
||||
VPSLLQ $0x01, Y1, Y5
|
||||
VPSLLQ $0x01, Y2, Y6
|
||||
VPSLLQ $0x01, Y3, Y7
|
||||
VPSLLQ $0x01, Y4, Y8
|
||||
VPSLLQ $0x01, Y0, Y9
|
||||
VPSRLQ $0x3f, Y1, Y10
|
||||
VPSRLQ $0x3f, Y2, Y11
|
||||
VPSRLQ $0x3f, Y3, Y12
|
||||
VPSRLQ $0x3f, Y4, Y13
|
||||
VPSRLQ $0x3f, Y0, Y14
|
||||
VPOR Y5, Y10, Y10
|
||||
VPOR Y6, Y11, Y11
|
||||
VPOR Y7, Y12, Y12
|
||||
VPOR Y8, Y13, Y13
|
||||
VPOR Y9, Y14, Y14
|
||||
VPXOR Y10, Y4, Y10
|
||||
VPXOR Y11, Y0, Y11
|
||||
VPXOR Y12, Y1, Y12
|
||||
VPXOR Y13, Y2, Y13
|
||||
VPXOR Y14, Y3, Y14
|
||||
VPXOR (AX), Y10, Y0
|
||||
VPXOR 512(AX), Y11, Y1
|
||||
VPXOR 224(AX), Y12, Y2
|
||||
VPXOR 736(AX), Y13, Y3
|
||||
VPXOR 448(AX), Y14, Y4
|
||||
VPSLLQ $0x2c, Y1, Y6
|
||||
VPSLLQ $0x2b, Y2, Y7
|
||||
VPSLLQ $0x15, Y3, Y8
|
||||
VPSLLQ $0x0e, Y4, Y9
|
||||
VPSRLQ $0x14, Y1, Y1
|
||||
VPSRLQ $0x15, Y2, Y2
|
||||
VPSRLQ $0x2b, Y3, Y3
|
||||
VPSRLQ $0x32, Y4, Y4
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VPBROADCASTQ 8(CX), Y0
|
||||
VPXOR Y0, Y5, Y5
|
||||
VMOVDQA Y5, (AX)
|
||||
VMOVDQA Y6, 512(AX)
|
||||
VMOVDQA Y7, 224(AX)
|
||||
VMOVDQA Y8, 736(AX)
|
||||
VMOVDQA Y9, 448(AX)
|
||||
VPXOR 576(AX), Y13, Y0
|
||||
VPXOR 288(AX), Y14, Y1
|
||||
VPXOR 640(AX), Y10, Y2
|
||||
VPXOR 352(AX), Y11, Y3
|
||||
VPXOR 64(AX), Y12, Y4
|
||||
VPSLLQ $0x1c, Y0, Y5
|
||||
VPSLLQ $0x14, Y1, Y6
|
||||
VPSLLQ $0x03, Y2, Y7
|
||||
VPSLLQ $0x2d, Y3, Y8
|
||||
VPSLLQ $0x3d, Y4, Y9
|
||||
VPSRLQ $0x24, Y0, Y0
|
||||
VPSRLQ $0x2c, Y1, Y1
|
||||
VPSRLQ $0x3d, Y2, Y2
|
||||
VPSRLQ $0x13, Y3, Y3
|
||||
VPSRLQ $0x03, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 640(AX)
|
||||
VMOVDQA Y6, 352(AX)
|
||||
VMOVDQA Y7, 64(AX)
|
||||
VMOVDQA Y8, 576(AX)
|
||||
VMOVDQA Y9, 288(AX)
|
||||
VPXOR 192(AX), Y11, Y0
|
||||
VPXOR 704(AX), Y12, Y1
|
||||
VPXOR 416(AX), Y13, Y2
|
||||
VPXOR 128(AX), Y14, Y3
|
||||
VPXOR 480(AX), Y10, Y4
|
||||
VPSLLQ $0x01, Y0, Y5
|
||||
VPSLLQ $0x06, Y1, Y6
|
||||
VPSLLQ $0x19, Y2, Y7
|
||||
VPSLLQ $0x08, Y3, Y8
|
||||
VPSLLQ $0x12, Y4, Y9
|
||||
VPSRLQ $0x3f, Y0, Y0
|
||||
VPSRLQ $0x3a, Y1, Y1
|
||||
VPSRLQ $0x27, Y2, Y2
|
||||
VPSRLQ $0x38, Y3, Y3
|
||||
VPSRLQ $0x2e, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 480(AX)
|
||||
VMOVDQA Y6, 192(AX)
|
||||
VMOVDQA Y7, 704(AX)
|
||||
VMOVDQA Y8, 416(AX)
|
||||
VMOVDQA Y9, 128(AX)
|
||||
VPXOR 768(AX), Y14, Y0
|
||||
VPXOR 320(AX), Y10, Y1
|
||||
VPXOR 32(AX), Y11, Y2
|
||||
VPXOR 544(AX), Y12, Y3
|
||||
VPXOR 256(AX), Y13, Y4
|
||||
VPSLLQ $0x1b, Y0, Y5
|
||||
VPSLLQ $0x24, Y1, Y6
|
||||
VPSLLQ $0x0a, Y2, Y7
|
||||
VPSLLQ $0x0f, Y3, Y8
|
||||
VPSLLQ $0x38, Y4, Y9
|
||||
VPSRLQ $0x25, Y0, Y0
|
||||
VPSRLQ $0x1c, Y1, Y1
|
||||
VPSRLQ $0x36, Y2, Y2
|
||||
VPSRLQ $0x31, Y3, Y3
|
||||
VPSRLQ $0x08, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 320(AX)
|
||||
VMOVDQA Y6, 32(AX)
|
||||
VMOVDQA Y7, 544(AX)
|
||||
VMOVDQA Y8, 256(AX)
|
||||
VMOVDQA Y9, 768(AX)
|
||||
VPXOR 384(AX), Y12, Y0
|
||||
VPXOR 96(AX), Y13, Y1
|
||||
VPXOR 608(AX), Y14, Y2
|
||||
VPXOR 160(AX), Y10, Y3
|
||||
VPXOR 672(AX), Y11, Y4
|
||||
VPSLLQ $0x3e, Y0, Y5
|
||||
VPSLLQ $0x37, Y1, Y6
|
||||
VPSLLQ $0x27, Y2, Y7
|
||||
VPSLLQ $0x29, Y3, Y8
|
||||
VPSLLQ $0x02, Y4, Y9
|
||||
VPSRLQ $0x02, Y0, Y0
|
||||
VPSRLQ $0x09, Y1, Y1
|
||||
VPSRLQ $0x19, Y2, Y2
|
||||
VPSRLQ $0x17, Y3, Y3
|
||||
VPSRLQ $0x3e, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 160(AX)
|
||||
VMOVDQA Y6, 672(AX)
|
||||
VMOVDQA Y7, 384(AX)
|
||||
VMOVDQA Y8, 96(AX)
|
||||
VMOVDQA Y9, 608(AX)
|
||||
VMOVDQA (AX), Y0
|
||||
VMOVDQA 32(AX), Y1
|
||||
VMOVDQA 64(AX), Y2
|
||||
VMOVDQA 96(AX), Y3
|
||||
VMOVDQA 128(AX), Y4
|
||||
VPXOR 160(AX), Y0, Y0
|
||||
VPXOR 192(AX), Y1, Y1
|
||||
VPXOR 224(AX), Y2, Y2
|
||||
VPXOR 256(AX), Y3, Y3
|
||||
VPXOR 288(AX), Y4, Y4
|
||||
VPXOR 320(AX), Y0, Y0
|
||||
VPXOR 352(AX), Y1, Y1
|
||||
VPXOR 384(AX), Y2, Y2
|
||||
VPXOR 416(AX), Y3, Y3
|
||||
VPXOR 448(AX), Y4, Y4
|
||||
VPXOR 480(AX), Y0, Y0
|
||||
VPXOR 512(AX), Y1, Y1
|
||||
VPXOR 544(AX), Y2, Y2
|
||||
VPXOR 576(AX), Y3, Y3
|
||||
VPXOR 608(AX), Y4, Y4
|
||||
VPXOR 640(AX), Y0, Y0
|
||||
VPXOR 672(AX), Y1, Y1
|
||||
VPXOR 704(AX), Y2, Y2
|
||||
VPXOR 736(AX), Y3, Y3
|
||||
VPXOR 768(AX), Y4, Y4
|
||||
VPSLLQ $0x01, Y1, Y5
|
||||
VPSLLQ $0x01, Y2, Y6
|
||||
VPSLLQ $0x01, Y3, Y7
|
||||
VPSLLQ $0x01, Y4, Y8
|
||||
VPSLLQ $0x01, Y0, Y9
|
||||
VPSRLQ $0x3f, Y1, Y10
|
||||
VPSRLQ $0x3f, Y2, Y11
|
||||
VPSRLQ $0x3f, Y3, Y12
|
||||
VPSRLQ $0x3f, Y4, Y13
|
||||
VPSRLQ $0x3f, Y0, Y14
|
||||
VPOR Y5, Y10, Y10
|
||||
VPOR Y6, Y11, Y11
|
||||
VPOR Y7, Y12, Y12
|
||||
VPOR Y8, Y13, Y13
|
||||
VPOR Y9, Y14, Y14
|
||||
VPXOR Y10, Y4, Y10
|
||||
VPXOR Y11, Y0, Y11
|
||||
VPXOR Y12, Y1, Y12
|
||||
VPXOR Y13, Y2, Y13
|
||||
VPXOR Y14, Y3, Y14
|
||||
VPXOR (AX), Y10, Y0
|
||||
VPXOR 352(AX), Y11, Y1
|
||||
VPXOR 704(AX), Y12, Y2
|
||||
VPXOR 256(AX), Y13, Y3
|
||||
VPXOR 608(AX), Y14, Y4
|
||||
VPSLLQ $0x2c, Y1, Y6
|
||||
VPSLLQ $0x2b, Y2, Y7
|
||||
VPSLLQ $0x15, Y3, Y8
|
||||
VPSLLQ $0x0e, Y4, Y9
|
||||
VPSRLQ $0x14, Y1, Y1
|
||||
VPSRLQ $0x15, Y2, Y2
|
||||
VPSRLQ $0x2b, Y3, Y3
|
||||
VPSRLQ $0x32, Y4, Y4
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VPBROADCASTQ 16(CX), Y0
|
||||
VPXOR Y0, Y5, Y5
|
||||
VMOVDQA Y5, (AX)
|
||||
VMOVDQA Y6, 352(AX)
|
||||
VMOVDQA Y7, 704(AX)
|
||||
VMOVDQA Y8, 256(AX)
|
||||
VMOVDQA Y9, 608(AX)
|
||||
VPXOR 736(AX), Y13, Y0
|
||||
VPXOR 288(AX), Y14, Y1
|
||||
VPXOR 480(AX), Y10, Y2
|
||||
VPXOR 32(AX), Y11, Y3
|
||||
VPXOR 384(AX), Y12, Y4
|
||||
VPSLLQ $0x1c, Y0, Y5
|
||||
VPSLLQ $0x14, Y1, Y6
|
||||
VPSLLQ $0x03, Y2, Y7
|
||||
VPSLLQ $0x2d, Y3, Y8
|
||||
VPSLLQ $0x3d, Y4, Y9
|
||||
VPSRLQ $0x24, Y0, Y0
|
||||
VPSRLQ $0x2c, Y1, Y1
|
||||
VPSRLQ $0x3d, Y2, Y2
|
||||
VPSRLQ $0x13, Y3, Y3
|
||||
VPSRLQ $0x03, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 480(AX)
|
||||
VMOVDQA Y6, 32(AX)
|
||||
VMOVDQA Y7, 384(AX)
|
||||
VMOVDQA Y8, 736(AX)
|
||||
VMOVDQA Y9, 288(AX)
|
||||
VPXOR 512(AX), Y11, Y0
|
||||
VPXOR 64(AX), Y12, Y1
|
||||
VPXOR 416(AX), Y13, Y2
|
||||
VPXOR 768(AX), Y14, Y3
|
||||
VPXOR 160(AX), Y10, Y4
|
||||
VPSLLQ $0x01, Y0, Y5
|
||||
VPSLLQ $0x06, Y1, Y6
|
||||
VPSLLQ $0x19, Y2, Y7
|
||||
VPSLLQ $0x08, Y3, Y8
|
||||
VPSLLQ $0x12, Y4, Y9
|
||||
VPSRLQ $0x3f, Y0, Y0
|
||||
VPSRLQ $0x3a, Y1, Y1
|
||||
VPSRLQ $0x27, Y2, Y2
|
||||
VPSRLQ $0x38, Y3, Y3
|
||||
VPSRLQ $0x2e, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 160(AX)
|
||||
VMOVDQA Y6, 512(AX)
|
||||
VMOVDQA Y7, 64(AX)
|
||||
VMOVDQA Y8, 416(AX)
|
||||
VMOVDQA Y9, 768(AX)
|
||||
VPXOR 448(AX), Y14, Y0
|
||||
VPXOR 640(AX), Y10, Y1
|
||||
VPXOR 192(AX), Y11, Y2
|
||||
VPXOR 544(AX), Y12, Y3
|
||||
VPXOR 96(AX), Y13, Y4
|
||||
VPSLLQ $0x1b, Y0, Y5
|
||||
VPSLLQ $0x24, Y1, Y6
|
||||
VPSLLQ $0x0a, Y2, Y7
|
||||
VPSLLQ $0x0f, Y3, Y8
|
||||
VPSLLQ $0x38, Y4, Y9
|
||||
VPSRLQ $0x25, Y0, Y0
|
||||
VPSRLQ $0x1c, Y1, Y1
|
||||
VPSRLQ $0x36, Y2, Y2
|
||||
VPSRLQ $0x31, Y3, Y3
|
||||
VPSRLQ $0x08, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 640(AX)
|
||||
VMOVDQA Y6, 192(AX)
|
||||
VMOVDQA Y7, 544(AX)
|
||||
VMOVDQA Y8, 96(AX)
|
||||
VMOVDQA Y9, 448(AX)
|
||||
VPXOR 224(AX), Y12, Y0
|
||||
VPXOR 576(AX), Y13, Y1
|
||||
VPXOR 128(AX), Y14, Y2
|
||||
VPXOR 320(AX), Y10, Y3
|
||||
VPXOR 672(AX), Y11, Y4
|
||||
VPSLLQ $0x3e, Y0, Y5
|
||||
VPSLLQ $0x37, Y1, Y6
|
||||
VPSLLQ $0x27, Y2, Y7
|
||||
VPSLLQ $0x29, Y3, Y8
|
||||
VPSLLQ $0x02, Y4, Y9
|
||||
VPSRLQ $0x02, Y0, Y0
|
||||
VPSRLQ $0x09, Y1, Y1
|
||||
VPSRLQ $0x19, Y2, Y2
|
||||
VPSRLQ $0x17, Y3, Y3
|
||||
VPSRLQ $0x3e, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 320(AX)
|
||||
VMOVDQA Y6, 672(AX)
|
||||
VMOVDQA Y7, 224(AX)
|
||||
VMOVDQA Y8, 576(AX)
|
||||
VMOVDQA Y9, 128(AX)
|
||||
VMOVDQA (AX), Y0
|
||||
VMOVDQA 32(AX), Y1
|
||||
VMOVDQA 64(AX), Y2
|
||||
VMOVDQA 96(AX), Y3
|
||||
VMOVDQA 128(AX), Y4
|
||||
VPXOR 160(AX), Y0, Y0
|
||||
VPXOR 192(AX), Y1, Y1
|
||||
VPXOR 224(AX), Y2, Y2
|
||||
VPXOR 256(AX), Y3, Y3
|
||||
VPXOR 288(AX), Y4, Y4
|
||||
VPXOR 320(AX), Y0, Y0
|
||||
VPXOR 352(AX), Y1, Y1
|
||||
VPXOR 384(AX), Y2, Y2
|
||||
VPXOR 416(AX), Y3, Y3
|
||||
VPXOR 448(AX), Y4, Y4
|
||||
VPXOR 480(AX), Y0, Y0
|
||||
VPXOR 512(AX), Y1, Y1
|
||||
VPXOR 544(AX), Y2, Y2
|
||||
VPXOR 576(AX), Y3, Y3
|
||||
VPXOR 608(AX), Y4, Y4
|
||||
VPXOR 640(AX), Y0, Y0
|
||||
VPXOR 672(AX), Y1, Y1
|
||||
VPXOR 704(AX), Y2, Y2
|
||||
VPXOR 736(AX), Y3, Y3
|
||||
VPXOR 768(AX), Y4, Y4
|
||||
VPSLLQ $0x01, Y1, Y5
|
||||
VPSLLQ $0x01, Y2, Y6
|
||||
VPSLLQ $0x01, Y3, Y7
|
||||
VPSLLQ $0x01, Y4, Y8
|
||||
VPSLLQ $0x01, Y0, Y9
|
||||
VPSRLQ $0x3f, Y1, Y10
|
||||
VPSRLQ $0x3f, Y2, Y11
|
||||
VPSRLQ $0x3f, Y3, Y12
|
||||
VPSRLQ $0x3f, Y4, Y13
|
||||
VPSRLQ $0x3f, Y0, Y14
|
||||
VPOR Y5, Y10, Y10
|
||||
VPOR Y6, Y11, Y11
|
||||
VPOR Y7, Y12, Y12
|
||||
VPOR Y8, Y13, Y13
|
||||
VPOR Y9, Y14, Y14
|
||||
VPXOR Y10, Y4, Y10
|
||||
VPXOR Y11, Y0, Y11
|
||||
VPXOR Y12, Y1, Y12
|
||||
VPXOR Y13, Y2, Y13
|
||||
VPXOR Y14, Y3, Y14
|
||||
VPXOR (AX), Y10, Y0
|
||||
VPXOR 32(AX), Y11, Y1
|
||||
VPXOR 64(AX), Y12, Y2
|
||||
VPXOR 96(AX), Y13, Y3
|
||||
VPXOR 128(AX), Y14, Y4
|
||||
VPSLLQ $0x2c, Y1, Y6
|
||||
VPSLLQ $0x2b, Y2, Y7
|
||||
VPSLLQ $0x15, Y3, Y8
|
||||
VPSLLQ $0x0e, Y4, Y9
|
||||
VPSRLQ $0x14, Y1, Y1
|
||||
VPSRLQ $0x15, Y2, Y2
|
||||
VPSRLQ $0x2b, Y3, Y3
|
||||
VPSRLQ $0x32, Y4, Y4
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VPBROADCASTQ 24(CX), Y0
|
||||
VPXOR Y0, Y5, Y5
|
||||
VMOVDQA Y5, (AX)
|
||||
VMOVDQA Y6, 32(AX)
|
||||
VMOVDQA Y7, 64(AX)
|
||||
VMOVDQA Y8, 96(AX)
|
||||
VMOVDQA Y9, 128(AX)
|
||||
VPXOR 256(AX), Y13, Y0
|
||||
VPXOR 288(AX), Y14, Y1
|
||||
VPXOR 160(AX), Y10, Y2
|
||||
VPXOR 192(AX), Y11, Y3
|
||||
VPXOR 224(AX), Y12, Y4
|
||||
VPSLLQ $0x1c, Y0, Y5
|
||||
VPSLLQ $0x14, Y1, Y6
|
||||
VPSLLQ $0x03, Y2, Y7
|
||||
VPSLLQ $0x2d, Y3, Y8
|
||||
VPSLLQ $0x3d, Y4, Y9
|
||||
VPSRLQ $0x24, Y0, Y0
|
||||
VPSRLQ $0x2c, Y1, Y1
|
||||
VPSRLQ $0x3d, Y2, Y2
|
||||
VPSRLQ $0x13, Y3, Y3
|
||||
VPSRLQ $0x03, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 160(AX)
|
||||
VMOVDQA Y6, 192(AX)
|
||||
VMOVDQA Y7, 224(AX)
|
||||
VMOVDQA Y8, 256(AX)
|
||||
VMOVDQA Y9, 288(AX)
|
||||
VPXOR 352(AX), Y11, Y0
|
||||
VPXOR 384(AX), Y12, Y1
|
||||
VPXOR 416(AX), Y13, Y2
|
||||
VPXOR 448(AX), Y14, Y3
|
||||
VPXOR 320(AX), Y10, Y4
|
||||
VPSLLQ $0x01, Y0, Y5
|
||||
VPSLLQ $0x06, Y1, Y6
|
||||
VPSLLQ $0x19, Y2, Y7
|
||||
VPSLLQ $0x08, Y3, Y8
|
||||
VPSLLQ $0x12, Y4, Y9
|
||||
VPSRLQ $0x3f, Y0, Y0
|
||||
VPSRLQ $0x3a, Y1, Y1
|
||||
VPSRLQ $0x27, Y2, Y2
|
||||
VPSRLQ $0x38, Y3, Y3
|
||||
VPSRLQ $0x2e, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 320(AX)
|
||||
VMOVDQA Y6, 352(AX)
|
||||
VMOVDQA Y7, 384(AX)
|
||||
VMOVDQA Y8, 416(AX)
|
||||
VMOVDQA Y9, 448(AX)
|
||||
VPXOR 608(AX), Y14, Y0
|
||||
VPXOR 480(AX), Y10, Y1
|
||||
VPXOR 512(AX), Y11, Y2
|
||||
VPXOR 544(AX), Y12, Y3
|
||||
VPXOR 576(AX), Y13, Y4
|
||||
VPSLLQ $0x1b, Y0, Y5
|
||||
VPSLLQ $0x24, Y1, Y6
|
||||
VPSLLQ $0x0a, Y2, Y7
|
||||
VPSLLQ $0x0f, Y3, Y8
|
||||
VPSLLQ $0x38, Y4, Y9
|
||||
VPSRLQ $0x25, Y0, Y0
|
||||
VPSRLQ $0x1c, Y1, Y1
|
||||
VPSRLQ $0x36, Y2, Y2
|
||||
VPSRLQ $0x31, Y3, Y3
|
||||
VPSRLQ $0x08, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 480(AX)
|
||||
VMOVDQA Y6, 512(AX)
|
||||
VMOVDQA Y7, 544(AX)
|
||||
VMOVDQA Y8, 576(AX)
|
||||
VMOVDQA Y9, 608(AX)
|
||||
VPXOR 704(AX), Y12, Y0
|
||||
VPXOR 736(AX), Y13, Y1
|
||||
VPXOR 768(AX), Y14, Y2
|
||||
VPXOR 640(AX), Y10, Y3
|
||||
VPXOR 672(AX), Y11, Y4
|
||||
VPSLLQ $0x3e, Y0, Y5
|
||||
VPSLLQ $0x37, Y1, Y6
|
||||
VPSLLQ $0x27, Y2, Y7
|
||||
VPSLLQ $0x29, Y3, Y8
|
||||
VPSLLQ $0x02, Y4, Y9
|
||||
VPSRLQ $0x02, Y0, Y0
|
||||
VPSRLQ $0x09, Y1, Y1
|
||||
VPSRLQ $0x19, Y2, Y2
|
||||
VPSRLQ $0x17, Y3, Y3
|
||||
VPSRLQ $0x3e, Y4, Y4
|
||||
VPOR Y5, Y0, Y0
|
||||
VPOR Y6, Y1, Y1
|
||||
VPOR Y7, Y2, Y2
|
||||
VPOR Y8, Y3, Y3
|
||||
VPOR Y9, Y4, Y4
|
||||
VPANDN Y2, Y1, Y5
|
||||
VPANDN Y3, Y2, Y6
|
||||
VPANDN Y4, Y3, Y7
|
||||
VPANDN Y0, Y4, Y8
|
||||
VPANDN Y1, Y0, Y9
|
||||
VPXOR Y0, Y5, Y5
|
||||
VPXOR Y1, Y6, Y6
|
||||
VPXOR Y2, Y7, Y7
|
||||
VPXOR Y3, Y8, Y8
|
||||
VPXOR Y4, Y9, Y9
|
||||
VMOVDQA Y5, 640(AX)
|
||||
VMOVDQA Y6, 672(AX)
|
||||
VMOVDQA Y7, 704(AX)
|
||||
VMOVDQA Y8, 736(AX)
|
||||
VMOVDQA Y9, 768(AX)
|
||||
ADDQ $0x20, CX
|
||||
SUBQ $0x00000001, DX
|
||||
JNZ loop
|
||||
RET
|
9
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x4stubs_amd64.go
generated
vendored
Normal file
9
vendor/github.com/cloudflare/circl/simd/keccakf1600/f1600x4stubs_amd64.go
generated
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
// Code generated by command: go run src.go -out ../../f1600x4_amd64.s -stubs ../../f1600x4stubs_amd64.go -pkg keccakf1600. DO NOT EDIT.
|
||||
|
||||
//go:build amd64
|
||||
// +build amd64
|
||||
|
||||
package keccakf1600
|
||||
|
||||
//go:noescape
|
||||
func f1600x4AVX2(state *uint64, rc *[24]uint64)
|
8
vendor/github.com/cloudflare/circl/simd/keccakf1600/fallback.go
generated
vendored
Normal file
8
vendor/github.com/cloudflare/circl/simd/keccakf1600/fallback.go
generated
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
//go:build (!amd64 && !arm64) || (arm64 && !go1.16)
|
||||
// +build !amd64,!arm64 arm64,!go1.16
|
||||
|
||||
package keccakf1600
|
||||
|
||||
func permuteSIMDx2(state []uint64) { permuteScalarX2(state) }
|
||||
|
||||
func permuteSIMDx4(state []uint64) { permuteScalarX4(state) }
|
Reference in New Issue
Block a user