ossl: Update the generated assembly files from OpenSSL 3.0.

Tested with:	cryptocheck -d ossl0 -a all -z on amd64
Reviewed by:	markj
Differential Revision:	https://reviews.freebsd.org/D41568
This commit is contained in:
John Baldwin 2023-08-29 14:44:15 -07:00
parent 73653b72af
commit c0855eaa3e
99 changed files with 37538 additions and 1959 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -58,6 +58,13 @@ _armv8_sha512_probe:
ret
.size _armv8_sha512_probe,.-_armv8_sha512_probe
.globl _armv8_cpuid_probe
.type _armv8_cpuid_probe,%function
_armv8_cpuid_probe:
mrs x0, midr_el1
ret
.size _armv8_cpuid_probe,.-_armv8_cpuid_probe
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,%function
.align 5

View file

@ -1,14 +1,32 @@
/* Do not modify. This file is auto-generated from armv8-mont.pl. */
#ifndef __KERNEL__
# include "arm_arch.h"
.hidden OPENSSL_armv8_rsa_neonized
#endif
.text
.globl bn_mul_mont
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
.Lbn_mul_mont:
tst x5,#3
b.ne .Lmul_mont
cmp x5,#32
b.le .Lscalar_impl
#ifndef __KERNEL__
adrp x17,OPENSSL_armv8_rsa_neonized
ldr w17,[x17,#:lo12:OPENSSL_armv8_rsa_neonized]
cbnz w17, bn_mul8x_mont_neon
#endif
.Lscalar_impl:
tst x5,#7
b.eq __bn_sqr8x_mont
tst x5,#3
b.eq __bn_mul4x_mont
.Lmul_mont:
stp x29,x30,[sp,#-64]!
add x29,sp,#0
@ -132,7 +150,7 @@ bn_mul_mont:
mul x16,x14,x15 // np[j]*m1
adds x12,x12,x6
umulh x17,x14,x15
str x12,[x22,#-16] // tp[j-1]
stur x12,[x22,#-16] // tp[j-1]
cbnz x21,.Linner
.Linner_skip:
@ -188,13 +206,13 @@ bn_mul_mont:
csel x14,x23,x8,lo // did it borrow?
ldr x23,[x22],#8
ldr x8,[x0],#8
str xzr,[x22,#-16] // wipe tp
str x14,[x0,#-16]
stur xzr,[x22,#-16] // wipe tp
stur x14,[x0,#-16]
cbnz x5,.Lcond_copy
csel x14,x23,x8,lo
str xzr,[x22,#-8] // wipe tp
str x14,[x0,#-8]
stur xzr,[x22,#-8] // wipe tp
stur x14,[x0,#-8]
ldp x19,x20,[x29,#16]
mov sp,x29
@ -204,6 +222,704 @@ bn_mul_mont:
ldr x29,[sp],#64
ret
.size bn_mul_mont,.-bn_mul_mont
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
stp x29,x30,[sp,#-80]!
mov x16,sp
stp d8,d9,[sp,#16]
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
lsl x5,x5,#1
eor v14.16b,v14.16b,v14.16b
.align 4
.LNEON_8n:
eor v6.16b,v6.16b,v6.16b
sub x7,sp,#128
eor v7.16b,v7.16b,v7.16b
sub x7,x7,x5,lsl#4
eor v8.16b,v8.16b,v8.16b
and x7,x7,#-64
eor v9.16b,v9.16b,v9.16b
mov sp,x7 // alloca
eor v10.16b,v10.16b,v10.16b
add x7,x7,#256
eor v11.16b,v11.16b,v11.16b
sub x8,x5,#8
eor v12.16b,v12.16b,v12.16b
eor v13.16b,v13.16b,v13.16b
.LNEON_8n_init:
st1 {v6.2d,v7.2d},[x7],#32
subs x8,x8,#8
st1 {v8.2d,v9.2d},[x7],#32
st1 {v10.2d,v11.2d},[x7],#32
st1 {v12.2d,v13.2d},[x7],#32
bne .LNEON_8n_init
add x6,sp,#256
ld1 {v0.4s,v1.4s},[x1],#32
add x10,sp,#8
ldr s30,[x4],#4
mov x9,x5
b .LNEON_8n_outer
.align 4
.LNEON_8n_outer:
ldr s28,[x2],#4 // *b++
uxtl v28.4s,v28.4h
add x7,sp,#128
ld1 {v2.4s,v3.4s},[x3],#32
umlal v6.2d,v28.2s,v0.s[0]
umlal v7.2d,v28.2s,v0.s[1]
umlal v8.2d,v28.2s,v0.s[2]
shl v29.2d,v6.2d,#16
ext v29.16b,v29.16b,v29.16b,#8
umlal v9.2d,v28.2s,v0.s[3]
add v29.2d,v29.2d,v6.2d
umlal v10.2d,v28.2s,v1.s[0]
mul v29.2s,v29.2s,v30.2s
umlal v11.2d,v28.2s,v1.s[1]
st1 {v28.2s},[sp] // put aside smashed b[8*i+0]
umlal v12.2d,v28.2s,v1.s[2]
uxtl v29.4s,v29.4h
umlal v13.2d,v28.2s,v1.s[3]
ldr s28,[x2],#4 // *b++
umlal v6.2d,v29.2s,v2.s[0]
umlal v7.2d,v29.2s,v2.s[1]
uxtl v28.4s,v28.4h
umlal v8.2d,v29.2s,v2.s[2]
ushr v15.2d,v6.2d,#16
umlal v9.2d,v29.2s,v2.s[3]
umlal v10.2d,v29.2s,v3.s[0]
ext v6.16b,v6.16b,v6.16b,#8
add v6.2d,v6.2d,v15.2d
umlal v11.2d,v29.2s,v3.s[1]
ushr v6.2d,v6.2d,#16
umlal v12.2d,v29.2s,v3.s[2]
umlal v13.2d,v29.2s,v3.s[3]
add v16.2d,v7.2d,v6.2d
ins v7.d[0],v16.d[0]
st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+0]
umlal v7.2d,v28.2s,v0.s[0]
ld1 {v6.2d},[x6],#16
umlal v8.2d,v28.2s,v0.s[1]
umlal v9.2d,v28.2s,v0.s[2]
shl v29.2d,v7.2d,#16
ext v29.16b,v29.16b,v29.16b,#8
umlal v10.2d,v28.2s,v0.s[3]
add v29.2d,v29.2d,v7.2d
umlal v11.2d,v28.2s,v1.s[0]
mul v29.2s,v29.2s,v30.2s
umlal v12.2d,v28.2s,v1.s[1]
st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+1]
umlal v13.2d,v28.2s,v1.s[2]
uxtl v29.4s,v29.4h
umlal v6.2d,v28.2s,v1.s[3]
ldr s28,[x2],#4 // *b++
umlal v7.2d,v29.2s,v2.s[0]
umlal v8.2d,v29.2s,v2.s[1]
uxtl v28.4s,v28.4h
umlal v9.2d,v29.2s,v2.s[2]
ushr v15.2d,v7.2d,#16
umlal v10.2d,v29.2s,v2.s[3]
umlal v11.2d,v29.2s,v3.s[0]
ext v7.16b,v7.16b,v7.16b,#8
add v7.2d,v7.2d,v15.2d
umlal v12.2d,v29.2s,v3.s[1]
ushr v7.2d,v7.2d,#16
umlal v13.2d,v29.2s,v3.s[2]
umlal v6.2d,v29.2s,v3.s[3]
add v16.2d,v8.2d,v7.2d
ins v8.d[0],v16.d[0]
st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+1]
umlal v8.2d,v28.2s,v0.s[0]
ld1 {v7.2d},[x6],#16
umlal v9.2d,v28.2s,v0.s[1]
umlal v10.2d,v28.2s,v0.s[2]
shl v29.2d,v8.2d,#16
ext v29.16b,v29.16b,v29.16b,#8
umlal v11.2d,v28.2s,v0.s[3]
add v29.2d,v29.2d,v8.2d
umlal v12.2d,v28.2s,v1.s[0]
mul v29.2s,v29.2s,v30.2s
umlal v13.2d,v28.2s,v1.s[1]
st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+2]
umlal v6.2d,v28.2s,v1.s[2]
uxtl v29.4s,v29.4h
umlal v7.2d,v28.2s,v1.s[3]
ldr s28,[x2],#4 // *b++
umlal v8.2d,v29.2s,v2.s[0]
umlal v9.2d,v29.2s,v2.s[1]
uxtl v28.4s,v28.4h
umlal v10.2d,v29.2s,v2.s[2]
ushr v15.2d,v8.2d,#16
umlal v11.2d,v29.2s,v2.s[3]
umlal v12.2d,v29.2s,v3.s[0]
ext v8.16b,v8.16b,v8.16b,#8
add v8.2d,v8.2d,v15.2d
umlal v13.2d,v29.2s,v3.s[1]
ushr v8.2d,v8.2d,#16
umlal v6.2d,v29.2s,v3.s[2]
umlal v7.2d,v29.2s,v3.s[3]
add v16.2d,v9.2d,v8.2d
ins v9.d[0],v16.d[0]
st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+2]
umlal v9.2d,v28.2s,v0.s[0]
ld1 {v8.2d},[x6],#16
umlal v10.2d,v28.2s,v0.s[1]
umlal v11.2d,v28.2s,v0.s[2]
shl v29.2d,v9.2d,#16
ext v29.16b,v29.16b,v29.16b,#8
umlal v12.2d,v28.2s,v0.s[3]
add v29.2d,v29.2d,v9.2d
umlal v13.2d,v28.2s,v1.s[0]
mul v29.2s,v29.2s,v30.2s
umlal v6.2d,v28.2s,v1.s[1]
st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+3]
umlal v7.2d,v28.2s,v1.s[2]
uxtl v29.4s,v29.4h
umlal v8.2d,v28.2s,v1.s[3]
ldr s28,[x2],#4 // *b++
umlal v9.2d,v29.2s,v2.s[0]
umlal v10.2d,v29.2s,v2.s[1]
uxtl v28.4s,v28.4h
umlal v11.2d,v29.2s,v2.s[2]
ushr v15.2d,v9.2d,#16
umlal v12.2d,v29.2s,v2.s[3]
umlal v13.2d,v29.2s,v3.s[0]
ext v9.16b,v9.16b,v9.16b,#8
add v9.2d,v9.2d,v15.2d
umlal v6.2d,v29.2s,v3.s[1]
ushr v9.2d,v9.2d,#16
umlal v7.2d,v29.2s,v3.s[2]
umlal v8.2d,v29.2s,v3.s[3]
add v16.2d,v10.2d,v9.2d
ins v10.d[0],v16.d[0]
st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+3]
umlal v10.2d,v28.2s,v0.s[0]
ld1 {v9.2d},[x6],#16
umlal v11.2d,v28.2s,v0.s[1]
umlal v12.2d,v28.2s,v0.s[2]
shl v29.2d,v10.2d,#16
ext v29.16b,v29.16b,v29.16b,#8
umlal v13.2d,v28.2s,v0.s[3]
add v29.2d,v29.2d,v10.2d
umlal v6.2d,v28.2s,v1.s[0]
mul v29.2s,v29.2s,v30.2s
umlal v7.2d,v28.2s,v1.s[1]
st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+4]
umlal v8.2d,v28.2s,v1.s[2]
uxtl v29.4s,v29.4h
umlal v9.2d,v28.2s,v1.s[3]
ldr s28,[x2],#4 // *b++
umlal v10.2d,v29.2s,v2.s[0]
umlal v11.2d,v29.2s,v2.s[1]
uxtl v28.4s,v28.4h
umlal v12.2d,v29.2s,v2.s[2]
ushr v15.2d,v10.2d,#16
umlal v13.2d,v29.2s,v2.s[3]
umlal v6.2d,v29.2s,v3.s[0]
ext v10.16b,v10.16b,v10.16b,#8
add v10.2d,v10.2d,v15.2d
umlal v7.2d,v29.2s,v3.s[1]
ushr v10.2d,v10.2d,#16
umlal v8.2d,v29.2s,v3.s[2]
umlal v9.2d,v29.2s,v3.s[3]
add v16.2d,v11.2d,v10.2d
ins v11.d[0],v16.d[0]
st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+4]
umlal v11.2d,v28.2s,v0.s[0]
ld1 {v10.2d},[x6],#16
umlal v12.2d,v28.2s,v0.s[1]
umlal v13.2d,v28.2s,v0.s[2]
shl v29.2d,v11.2d,#16
ext v29.16b,v29.16b,v29.16b,#8
umlal v6.2d,v28.2s,v0.s[3]
add v29.2d,v29.2d,v11.2d
umlal v7.2d,v28.2s,v1.s[0]
mul v29.2s,v29.2s,v30.2s
umlal v8.2d,v28.2s,v1.s[1]
st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+5]
umlal v9.2d,v28.2s,v1.s[2]
uxtl v29.4s,v29.4h
umlal v10.2d,v28.2s,v1.s[3]
ldr s28,[x2],#4 // *b++
umlal v11.2d,v29.2s,v2.s[0]
umlal v12.2d,v29.2s,v2.s[1]
uxtl v28.4s,v28.4h
umlal v13.2d,v29.2s,v2.s[2]
ushr v15.2d,v11.2d,#16
umlal v6.2d,v29.2s,v2.s[3]
umlal v7.2d,v29.2s,v3.s[0]
ext v11.16b,v11.16b,v11.16b,#8
add v11.2d,v11.2d,v15.2d
umlal v8.2d,v29.2s,v3.s[1]
ushr v11.2d,v11.2d,#16
umlal v9.2d,v29.2s,v3.s[2]
umlal v10.2d,v29.2s,v3.s[3]
add v16.2d,v12.2d,v11.2d
ins v12.d[0],v16.d[0]
st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+5]
umlal v12.2d,v28.2s,v0.s[0]
ld1 {v11.2d},[x6],#16
umlal v13.2d,v28.2s,v0.s[1]
umlal v6.2d,v28.2s,v0.s[2]
shl v29.2d,v12.2d,#16
ext v29.16b,v29.16b,v29.16b,#8
umlal v7.2d,v28.2s,v0.s[3]
add v29.2d,v29.2d,v12.2d
umlal v8.2d,v28.2s,v1.s[0]
mul v29.2s,v29.2s,v30.2s
umlal v9.2d,v28.2s,v1.s[1]
st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+6]
umlal v10.2d,v28.2s,v1.s[2]
uxtl v29.4s,v29.4h
umlal v11.2d,v28.2s,v1.s[3]
ldr s28,[x2],#4 // *b++
umlal v12.2d,v29.2s,v2.s[0]
umlal v13.2d,v29.2s,v2.s[1]
uxtl v28.4s,v28.4h
umlal v6.2d,v29.2s,v2.s[2]
ushr v15.2d,v12.2d,#16
umlal v7.2d,v29.2s,v2.s[3]
umlal v8.2d,v29.2s,v3.s[0]
ext v12.16b,v12.16b,v12.16b,#8
add v12.2d,v12.2d,v15.2d
umlal v9.2d,v29.2s,v3.s[1]
ushr v12.2d,v12.2d,#16
umlal v10.2d,v29.2s,v3.s[2]
umlal v11.2d,v29.2s,v3.s[3]
add v16.2d,v13.2d,v12.2d
ins v13.d[0],v16.d[0]
st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+6]
umlal v13.2d,v28.2s,v0.s[0]
ld1 {v12.2d},[x6],#16
umlal v6.2d,v28.2s,v0.s[1]
umlal v7.2d,v28.2s,v0.s[2]
shl v29.2d,v13.2d,#16
ext v29.16b,v29.16b,v29.16b,#8
umlal v8.2d,v28.2s,v0.s[3]
add v29.2d,v29.2d,v13.2d
umlal v9.2d,v28.2s,v1.s[0]
mul v29.2s,v29.2s,v30.2s
umlal v10.2d,v28.2s,v1.s[1]
st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+7]
umlal v11.2d,v28.2s,v1.s[2]
uxtl v29.4s,v29.4h
umlal v12.2d,v28.2s,v1.s[3]
ld1 {v28.2s},[sp] // pull smashed b[8*i+0]
umlal v13.2d,v29.2s,v2.s[0]
ld1 {v0.4s,v1.4s},[x1],#32
umlal v6.2d,v29.2s,v2.s[1]
umlal v7.2d,v29.2s,v2.s[2]
mov v5.16b,v13.16b
ushr v5.2d,v5.2d,#16
ext v13.16b,v13.16b,v13.16b,#8
umlal v8.2d,v29.2s,v2.s[3]
umlal v9.2d,v29.2s,v3.s[0]
add v13.2d,v13.2d,v5.2d
umlal v10.2d,v29.2s,v3.s[1]
ushr v13.2d,v13.2d,#16
eor v15.16b,v15.16b,v15.16b
ins v13.d[1],v15.d[0]
umlal v11.2d,v29.2s,v3.s[2]
umlal v12.2d,v29.2s,v3.s[3]
add v6.2d,v6.2d,v13.2d
st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+7]
add x10,sp,#8 // rewind
sub x8,x5,#8
b .LNEON_8n_inner
.align 4
.LNEON_8n_inner:
subs x8,x8,#8
umlal v6.2d,v28.2s,v0.s[0]
ld1 {v13.2d},[x6]
umlal v7.2d,v28.2s,v0.s[1]
ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+0]
umlal v8.2d,v28.2s,v0.s[2]
ld1 {v2.4s,v3.4s},[x3],#32
umlal v9.2d,v28.2s,v0.s[3]
b.eq .LInner_jump
add x6,x6,#16 // don't advance in last iteration
.LInner_jump:
umlal v10.2d,v28.2s,v1.s[0]
umlal v11.2d,v28.2s,v1.s[1]
umlal v12.2d,v28.2s,v1.s[2]
umlal v13.2d,v28.2s,v1.s[3]
ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+1]
umlal v6.2d,v29.2s,v2.s[0]
umlal v7.2d,v29.2s,v2.s[1]
umlal v8.2d,v29.2s,v2.s[2]
umlal v9.2d,v29.2s,v2.s[3]
umlal v10.2d,v29.2s,v3.s[0]
umlal v11.2d,v29.2s,v3.s[1]
umlal v12.2d,v29.2s,v3.s[2]
umlal v13.2d,v29.2s,v3.s[3]
st1 {v6.2d},[x7],#16
umlal v7.2d,v28.2s,v0.s[0]
ld1 {v6.2d},[x6]
umlal v8.2d,v28.2s,v0.s[1]
ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+1]
umlal v9.2d,v28.2s,v0.s[2]
b.eq .LInner_jump1
add x6,x6,#16 // don't advance in last iteration
.LInner_jump1:
umlal v10.2d,v28.2s,v0.s[3]
umlal v11.2d,v28.2s,v1.s[0]
umlal v12.2d,v28.2s,v1.s[1]
umlal v13.2d,v28.2s,v1.s[2]
umlal v6.2d,v28.2s,v1.s[3]
ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+2]
umlal v7.2d,v29.2s,v2.s[0]
umlal v8.2d,v29.2s,v2.s[1]
umlal v9.2d,v29.2s,v2.s[2]
umlal v10.2d,v29.2s,v2.s[3]
umlal v11.2d,v29.2s,v3.s[0]
umlal v12.2d,v29.2s,v3.s[1]
umlal v13.2d,v29.2s,v3.s[2]
umlal v6.2d,v29.2s,v3.s[3]
st1 {v7.2d},[x7],#16
umlal v8.2d,v28.2s,v0.s[0]
ld1 {v7.2d},[x6]
umlal v9.2d,v28.2s,v0.s[1]
ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+2]
umlal v10.2d,v28.2s,v0.s[2]
b.eq .LInner_jump2
add x6,x6,#16 // don't advance in last iteration
.LInner_jump2:
umlal v11.2d,v28.2s,v0.s[3]
umlal v12.2d,v28.2s,v1.s[0]
umlal v13.2d,v28.2s,v1.s[1]
umlal v6.2d,v28.2s,v1.s[2]
umlal v7.2d,v28.2s,v1.s[3]
ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+3]
umlal v8.2d,v29.2s,v2.s[0]
umlal v9.2d,v29.2s,v2.s[1]
umlal v10.2d,v29.2s,v2.s[2]
umlal v11.2d,v29.2s,v2.s[3]
umlal v12.2d,v29.2s,v3.s[0]
umlal v13.2d,v29.2s,v3.s[1]
umlal v6.2d,v29.2s,v3.s[2]
umlal v7.2d,v29.2s,v3.s[3]
st1 {v8.2d},[x7],#16
umlal v9.2d,v28.2s,v0.s[0]
ld1 {v8.2d},[x6]
umlal v10.2d,v28.2s,v0.s[1]
ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+3]
umlal v11.2d,v28.2s,v0.s[2]
b.eq .LInner_jump3
add x6,x6,#16 // don't advance in last iteration
.LInner_jump3:
umlal v12.2d,v28.2s,v0.s[3]
umlal v13.2d,v28.2s,v1.s[0]
umlal v6.2d,v28.2s,v1.s[1]
umlal v7.2d,v28.2s,v1.s[2]
umlal v8.2d,v28.2s,v1.s[3]
ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+4]
umlal v9.2d,v29.2s,v2.s[0]
umlal v10.2d,v29.2s,v2.s[1]
umlal v11.2d,v29.2s,v2.s[2]
umlal v12.2d,v29.2s,v2.s[3]
umlal v13.2d,v29.2s,v3.s[0]
umlal v6.2d,v29.2s,v3.s[1]
umlal v7.2d,v29.2s,v3.s[2]
umlal v8.2d,v29.2s,v3.s[3]
st1 {v9.2d},[x7],#16
umlal v10.2d,v28.2s,v0.s[0]
ld1 {v9.2d},[x6]
umlal v11.2d,v28.2s,v0.s[1]
ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+4]
umlal v12.2d,v28.2s,v0.s[2]
b.eq .LInner_jump4
add x6,x6,#16 // don't advance in last iteration
.LInner_jump4:
umlal v13.2d,v28.2s,v0.s[3]
umlal v6.2d,v28.2s,v1.s[0]
umlal v7.2d,v28.2s,v1.s[1]
umlal v8.2d,v28.2s,v1.s[2]
umlal v9.2d,v28.2s,v1.s[3]
ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+5]
umlal v10.2d,v29.2s,v2.s[0]
umlal v11.2d,v29.2s,v2.s[1]
umlal v12.2d,v29.2s,v2.s[2]
umlal v13.2d,v29.2s,v2.s[3]
umlal v6.2d,v29.2s,v3.s[0]
umlal v7.2d,v29.2s,v3.s[1]
umlal v8.2d,v29.2s,v3.s[2]
umlal v9.2d,v29.2s,v3.s[3]
st1 {v10.2d},[x7],#16
umlal v11.2d,v28.2s,v0.s[0]
ld1 {v10.2d},[x6]
umlal v12.2d,v28.2s,v0.s[1]
ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+5]
umlal v13.2d,v28.2s,v0.s[2]
b.eq .LInner_jump5
add x6,x6,#16 // don't advance in last iteration
.LInner_jump5:
umlal v6.2d,v28.2s,v0.s[3]
umlal v7.2d,v28.2s,v1.s[0]
umlal v8.2d,v28.2s,v1.s[1]
umlal v9.2d,v28.2s,v1.s[2]
umlal v10.2d,v28.2s,v1.s[3]
ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+6]
umlal v11.2d,v29.2s,v2.s[0]
umlal v12.2d,v29.2s,v2.s[1]
umlal v13.2d,v29.2s,v2.s[2]
umlal v6.2d,v29.2s,v2.s[3]
umlal v7.2d,v29.2s,v3.s[0]
umlal v8.2d,v29.2s,v3.s[1]
umlal v9.2d,v29.2s,v3.s[2]
umlal v10.2d,v29.2s,v3.s[3]
st1 {v11.2d},[x7],#16
umlal v12.2d,v28.2s,v0.s[0]
ld1 {v11.2d},[x6]
umlal v13.2d,v28.2s,v0.s[1]
ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+6]
umlal v6.2d,v28.2s,v0.s[2]
b.eq .LInner_jump6
add x6,x6,#16 // don't advance in last iteration
.LInner_jump6:
umlal v7.2d,v28.2s,v0.s[3]
umlal v8.2d,v28.2s,v1.s[0]
umlal v9.2d,v28.2s,v1.s[1]
umlal v10.2d,v28.2s,v1.s[2]
umlal v11.2d,v28.2s,v1.s[3]
ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+7]
umlal v12.2d,v29.2s,v2.s[0]
umlal v13.2d,v29.2s,v2.s[1]
umlal v6.2d,v29.2s,v2.s[2]
umlal v7.2d,v29.2s,v2.s[3]
umlal v8.2d,v29.2s,v3.s[0]
umlal v9.2d,v29.2s,v3.s[1]
umlal v10.2d,v29.2s,v3.s[2]
umlal v11.2d,v29.2s,v3.s[3]
st1 {v12.2d},[x7],#16
umlal v13.2d,v28.2s,v0.s[0]
ld1 {v12.2d},[x6]
umlal v6.2d,v28.2s,v0.s[1]
ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+7]
umlal v7.2d,v28.2s,v0.s[2]
b.eq .LInner_jump7
add x6,x6,#16 // don't advance in last iteration
.LInner_jump7:
umlal v8.2d,v28.2s,v0.s[3]
umlal v9.2d,v28.2s,v1.s[0]
umlal v10.2d,v28.2s,v1.s[1]
umlal v11.2d,v28.2s,v1.s[2]
umlal v12.2d,v28.2s,v1.s[3]
b.ne .LInner_after_rewind8
sub x1,x1,x5,lsl#2 // rewind
.LInner_after_rewind8:
umlal v13.2d,v29.2s,v2.s[0]
ld1 {v28.2s},[sp] // pull smashed b[8*i+0]
umlal v6.2d,v29.2s,v2.s[1]
ld1 {v0.4s,v1.4s},[x1],#32
umlal v7.2d,v29.2s,v2.s[2]
add x10,sp,#8 // rewind
umlal v8.2d,v29.2s,v2.s[3]
umlal v9.2d,v29.2s,v3.s[0]
umlal v10.2d,v29.2s,v3.s[1]
umlal v11.2d,v29.2s,v3.s[2]
st1 {v13.2d},[x7],#16
umlal v12.2d,v29.2s,v3.s[3]
bne .LNEON_8n_inner
add x6,sp,#128
st1 {v6.2d,v7.2d},[x7],#32
eor v2.16b,v2.16b,v2.16b // v2
st1 {v8.2d,v9.2d},[x7],#32
eor v3.16b,v3.16b,v3.16b // v3
st1 {v10.2d,v11.2d},[x7],#32
st1 {v12.2d},[x7]
subs x9,x9,#8
ld1 {v6.2d,v7.2d},[x6],#32
ld1 {v8.2d,v9.2d},[x6],#32
ld1 {v10.2d,v11.2d},[x6],#32
ld1 {v12.2d,v13.2d},[x6],#32
b.eq .LInner_8n_jump_2steps
sub x3,x3,x5,lsl#2 // rewind
b .LNEON_8n_outer
.LInner_8n_jump_2steps:
add x7,sp,#128
st1 {v2.2d,v3.2d}, [sp],#32 // start wiping stack frame
mov v5.16b,v6.16b
ushr v15.2d,v6.2d,#16
ext v6.16b,v6.16b,v6.16b,#8
st1 {v2.2d,v3.2d}, [sp],#32
add v6.2d,v6.2d,v15.2d
st1 {v2.2d,v3.2d}, [sp],#32
ushr v15.2d,v6.2d,#16
st1 {v2.2d,v3.2d}, [sp],#32
zip1 v6.4h,v5.4h,v6.4h
ins v15.d[1],v14.d[0]
mov x8,x5
b .LNEON_tail_entry
.align 4
.LNEON_tail:
add v6.2d,v6.2d,v15.2d
mov v5.16b,v6.16b
ushr v15.2d,v6.2d,#16
ext v6.16b,v6.16b,v6.16b,#8
ld1 {v8.2d,v9.2d}, [x6],#32
add v6.2d,v6.2d,v15.2d
ld1 {v10.2d,v11.2d}, [x6],#32
ushr v15.2d,v6.2d,#16
ld1 {v12.2d,v13.2d}, [x6],#32
zip1 v6.4h,v5.4h,v6.4h
ins v15.d[1],v14.d[0]
.LNEON_tail_entry:
add v7.2d,v7.2d,v15.2d
st1 {v6.s}[0], [x7],#4
ushr v15.2d,v7.2d,#16
mov v5.16b,v7.16b
ext v7.16b,v7.16b,v7.16b,#8
add v7.2d,v7.2d,v15.2d
ushr v15.2d,v7.2d,#16
zip1 v7.4h,v5.4h,v7.4h
ins v15.d[1],v14.d[0]
add v8.2d,v8.2d,v15.2d
st1 {v7.s}[0], [x7],#4
ushr v15.2d,v8.2d,#16
mov v5.16b,v8.16b
ext v8.16b,v8.16b,v8.16b,#8
add v8.2d,v8.2d,v15.2d
ushr v15.2d,v8.2d,#16
zip1 v8.4h,v5.4h,v8.4h
ins v15.d[1],v14.d[0]
add v9.2d,v9.2d,v15.2d
st1 {v8.s}[0], [x7],#4
ushr v15.2d,v9.2d,#16
mov v5.16b,v9.16b
ext v9.16b,v9.16b,v9.16b,#8
add v9.2d,v9.2d,v15.2d
ushr v15.2d,v9.2d,#16
zip1 v9.4h,v5.4h,v9.4h
ins v15.d[1],v14.d[0]
add v10.2d,v10.2d,v15.2d
st1 {v9.s}[0], [x7],#4
ushr v15.2d,v10.2d,#16
mov v5.16b,v10.16b
ext v10.16b,v10.16b,v10.16b,#8
add v10.2d,v10.2d,v15.2d
ushr v15.2d,v10.2d,#16
zip1 v10.4h,v5.4h,v10.4h
ins v15.d[1],v14.d[0]
add v11.2d,v11.2d,v15.2d
st1 {v10.s}[0], [x7],#4
ushr v15.2d,v11.2d,#16
mov v5.16b,v11.16b
ext v11.16b,v11.16b,v11.16b,#8
add v11.2d,v11.2d,v15.2d
ushr v15.2d,v11.2d,#16
zip1 v11.4h,v5.4h,v11.4h
ins v15.d[1],v14.d[0]
add v12.2d,v12.2d,v15.2d
st1 {v11.s}[0], [x7],#4
ushr v15.2d,v12.2d,#16
mov v5.16b,v12.16b
ext v12.16b,v12.16b,v12.16b,#8
add v12.2d,v12.2d,v15.2d
ushr v15.2d,v12.2d,#16
zip1 v12.4h,v5.4h,v12.4h
ins v15.d[1],v14.d[0]
add v13.2d,v13.2d,v15.2d
st1 {v12.s}[0], [x7],#4
ushr v15.2d,v13.2d,#16
mov v5.16b,v13.16b
ext v13.16b,v13.16b,v13.16b,#8
add v13.2d,v13.2d,v15.2d
ushr v15.2d,v13.2d,#16
zip1 v13.4h,v5.4h,v13.4h
ins v15.d[1],v14.d[0]
ld1 {v6.2d,v7.2d}, [x6],#32
subs x8,x8,#8
st1 {v13.s}[0], [x7],#4
bne .LNEON_tail
st1 {v15.s}[0], [x7],#4 // top-most bit
sub x3,x3,x5,lsl#2 // rewind x3
subs x1,sp,#0 // clear carry flag
add x2,sp,x5,lsl#2
.LNEON_sub:
ldp w4,w5,[x1],#8
ldp w6,w7,[x1],#8
ldp w8,w9,[x3],#8
ldp w10,w11,[x3],#8
sbcs w8,w4,w8
sbcs w9,w5,w9
sbcs w10,w6,w10
sbcs w11,w7,w11
sub x17,x2,x1
stp w8,w9,[x0],#8
stp w10,w11,[x0],#8
cbnz x17,.LNEON_sub
ldr w10, [x1] // load top-most bit
mov x11,sp
eor v0.16b,v0.16b,v0.16b
sub x11,x2,x11 // this is num*4
eor v1.16b,v1.16b,v1.16b
mov x1,sp
sub x0,x0,x11 // rewind x0
mov x3,x2 // second 3/4th of frame
sbcs w10,w10,wzr // result is carry flag
.LNEON_copy_n_zap:
ldp w4,w5,[x1],#8
ldp w6,w7,[x1],#8
ldp w8,w9,[x0],#8
ldp w10,w11,[x0]
sub x0,x0,#8
b.cs .LCopy_1
mov w8,w4
mov w9,w5
mov w10,w6
mov w11,w7
.LCopy_1:
st1 {v0.2d,v1.2d}, [x3],#32 // wipe
st1 {v0.2d,v1.2d}, [x3],#32 // wipe
ldp w4,w5,[x1],#8
ldp w6,w7,[x1],#8
stp w8,w9,[x0],#8
stp w10,w11,[x0],#8
sub x1,x1,#32
ldp w8,w9,[x0],#8
ldp w10,w11,[x0]
sub x0,x0,#8
b.cs .LCopy_2
mov w8, w4
mov w9, w5
mov w10, w6
mov w11, w7
.LCopy_2:
st1 {v0.2d,v1.2d}, [x1],#32 // wipe
st1 {v0.2d,v1.2d}, [x3],#32 // wipe
sub x17,x2,x1 // preserves carry
stp w8,w9,[x0],#8
stp w10,w11,[x0],#8
cbnz x17,.LNEON_copy_n_zap
mov sp,x16
ldp d14,d15,[sp,#64]
ldp d12,d13,[sp,#48]
ldp d10,d11,[sp,#32]
ldp d8,d9,[sp,#16]
ldr x29,[sp],#80
ret // bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
.type __bn_sqr8x_mont,%function
.align 5
__bn_sqr8x_mont:
@ -519,7 +1235,7 @@ __bn_sqr8x_mont:
ldp x10,x11,[x2,#8*4]
ldp x12,x13,[x2,#8*6]
adds x19,x19,x6
ldr x4,[x0,#-8*8]
ldur x4,[x0,#-8*8]
adcs x20,x20,x7
ldp x6,x7,[x1,#8*0]
adcs x21,x21,x8
@ -714,7 +1430,7 @@ __bn_sqr8x_mont:
//adc x28,xzr,xzr // moved below
cbz x27,.Lsqr8x8_post_condition
ldr x4,[x2,#-8*8]
ldur x4,[x2,#-8*8]
ldp x6,x7,[x1,#8*0]
ldp x8,x9,[x1,#8*2]
ldp x10,x11,[x1,#8*4]
@ -772,7 +1488,7 @@ __bn_sqr8x_mont:
ldp x12,x13,[x2,#8*6]
cbz x27,.Lsqr8x_tail_break
ldr x4,[x0,#-8*8]
ldur x4,[x0,#-8*8]
adds x19,x19,x6
adcs x20,x20,x7
ldp x6,x7,[x1,#8*0]

File diff suppressed because it is too large Load diff

View file

@ -3823,7 +3823,7 @@ ecp_nistz256_ord_mul_mont:
////////////////////////////////////////////////////////////////////////
// void ecp_nistz256_ord_sqr_mont(uint64_t res[4], uint64_t a[4],
// int rep);
// uint64_t rep);
.globl ecp_nistz256_ord_sqr_mont
.type ecp_nistz256_ord_sqr_mont,%function
.align 4
@ -4023,7 +4023,7 @@ ecp_nistz256_scatter_w5:
ldp x4,x5,[x1] // X
ldp x6,x7,[x1,#16]
str w4,[x0,#64*0-4]
stur w4,[x0,#64*0-4]
lsr x4,x4,#32
str w5,[x0,#64*1-4]
lsr x5,x5,#32
@ -4039,7 +4039,7 @@ ecp_nistz256_scatter_w5:
ldp x4,x5,[x1,#32] // Y
ldp x6,x7,[x1,#48]
str w4,[x0,#64*0-4]
stur w4,[x0,#64*0-4]
lsr x4,x4,#32
str w5,[x0,#64*1-4]
lsr x5,x5,#32
@ -4055,7 +4055,7 @@ ecp_nistz256_scatter_w5:
ldp x4,x5,[x1,#64] // Z
ldp x6,x7,[x1,#80]
str w4,[x0,#64*0-4]
stur w4,[x0,#64*0-4]
lsr x4,x4,#32
str w5,[x0,#64*1-4]
lsr x5,x5,#32

View file

@ -2,6 +2,7 @@
#include "arm_arch.h"
#if __ARM_MAX_ARCH__>=7
.arch armv8-a+crypto
.text
.globl gcm_init_v8
.type gcm_init_v8,%function

View file

@ -574,22 +574,22 @@ SHA3_squeeze:
.type KeccakF1600_ce,%function
.align 5
KeccakF1600_ce:
mov x9,#12
mov x9,#24
adr x10,iotas
b .Loop_ce
.align 4
.Loop_ce:
////////////////////////////////////////////////// Theta
.inst 0xce052819 //eor3 v25.16b,v0.16b,v5.16b,v10.16b
.inst 0xce062c3a //eor3 v26.16b,v1.16b,v6.16b,v11.16b
.inst 0xce07305b //eor3 v27.16b,v2.16b,v7.16b,v12.16b
.inst 0xce08347c //eor3 v28.16b,v3.16b,v8.16b,v13.16b
.inst 0xce09389d //eor3 v29.16b,v4.16b,v9.16b,v14.16b
.inst 0xce0f5339 //eor3 v25.16b,v25.16b, v15.16b,v20.16b
.inst 0xce10575a //eor3 v26.16b,v26.16b, v16.16b,v21.16b
.inst 0xce115b7b //eor3 v27.16b,v27.16b, v17.16b,v22.16b
.inst 0xce125f9c //eor3 v28.16b,v28.16b, v18.16b,v23.16b
.inst 0xce1363bd //eor3 v29.16b,v29.16b, v19.16b,v24.16b
.inst 0xce0f2a99 //eor3 v25.16b,v20.16b,v15.16b,v10.16b
.inst 0xce102eba //eor3 v26.16b,v21.16b,v16.16b,v11.16b
.inst 0xce1132db //eor3 v27.16b,v22.16b,v17.16b,v12.16b
.inst 0xce1236fc //eor3 v28.16b,v23.16b,v18.16b,v13.16b
.inst 0xce133b1d //eor3 v29.16b,v24.16b,v19.16b,v14.16b
.inst 0xce050339 //eor3 v25.16b,v25.16b, v5.16b,v0.16b
.inst 0xce06075a //eor3 v26.16b,v26.16b, v6.16b,v1.16b
.inst 0xce070b7b //eor3 v27.16b,v27.16b, v7.16b,v2.16b
.inst 0xce080f9c //eor3 v28.16b,v28.16b, v8.16b,v3.16b
.inst 0xce0913bd //eor3 v29.16b,v29.16b, v9.16b,v4.16b
.inst 0xce7b8f3e //rax1 v30.16b,v25.16b,v27.16b // D[1]
.inst 0xce7c8f5f //rax1 v31.16b,v26.16b,v28.16b // D[2]
@ -598,13 +598,15 @@ KeccakF1600_ce:
.inst 0xce7a8fbd //rax1 v29.16b,v29.16b,v26.16b // D[0]
////////////////////////////////////////////////// Theta+Rho+Pi
.inst 0xce9e50d9 //xar v25.16b, v6.16b,v30.16b,#64-44 // C[0]=A[0][1]
.inst 0xce9efc39 //xar v25.16b, v1.16b,v30.16b,#64-1 // C[0]=A[2][0]
.inst 0xce9e50c1 //xar v1.16b,v6.16b,v30.16b,#64-44
.inst 0xce9cb126 //xar v6.16b,v9.16b,v28.16b,#64-20
.inst 0xce9f0ec9 //xar v9.16b,v22.16b,v31.16b,#64-61
.inst 0xce9c65d6 //xar v22.16b,v14.16b,v28.16b,#64-39
.inst 0xce9dba8e //xar v14.16b,v20.16b,v29.16b,#64-18
.inst 0xce9f0854 //xar v20.16b,v2.16b,v31.16b,#64-62
.inst 0xce9f085a //xar v26.16b, v2.16b,v31.16b,#64-62 // C[1]=A[4][0]
.inst 0xce9f5582 //xar v2.16b,v12.16b,v31.16b,#64-43
.inst 0xce9b9dac //xar v12.16b,v13.16b,v27.16b,#64-25
@ -614,145 +616,57 @@ KeccakF1600_ce:
.inst 0xce9c948f //xar v15.16b,v4.16b,v28.16b,#64-27
eor v0.16b,v0.16b,v29.16b
ldr x11,[x10],#8
.inst 0xce9bae5a //xar v26.16b, v18.16b,v27.16b,#64-21 // C[1]=A[0][3]
.inst 0xce9fc632 //xar v18.16b,v17.16b,v31.16b,#64-15
.inst 0xce9ed971 //xar v17.16b,v11.16b,v30.16b,#64-10
.inst 0xce9fe8eb //xar v11.16b,v7.16b,v31.16b,#64-6
.inst 0xce9df547 //xar v7.16b,v10.16b,v29.16b,#64-3
.inst 0xce9efc2a //xar v10.16b,v1.16b,v30.16b,#64-1 // *
.inst 0xce9ccb04 //xar v4.16b,v24.16b,v28.16b,#64-14
.inst 0xce9ccb1c //xar v28.16b, v24.16b,v28.16b,#64-14 // D[4]=A[0][4]
.inst 0xce9efab8 //xar v24.16b,v21.16b,v30.16b,#64-2
.inst 0xce9b2515 //xar v21.16b,v8.16b,v27.16b,#64-55
.inst 0xce9e4e08 //xar v8.16b,v16.16b,v30.16b,#64-45
.inst 0xce9b2508 //xar v8.16b,v8.16b,v27.16b,#64-55 // A[1][3]=A[4][1]
.inst 0xce9e4e04 //xar v4.16b,v16.16b,v30.16b,#64-45 // A[0][4]=A[1][3]
.inst 0xce9d70b0 //xar v16.16b,v5.16b,v29.16b,#64-36
.inst 0xce9b907b //xar v27.16b, v3.16b,v27.16b,#64-28 // C[2]=A[1][0]
.inst 0xce9b9065 //xar v5.16b,v3.16b,v27.16b,#64-28
eor v0.16b,v0.16b,v29.16b
.inst 0xce9bae5b //xar v27.16b, v18.16b,v27.16b,#64-21 // D[3]=A[0][3]
.inst 0xce9fc623 //xar v3.16b,v17.16b,v31.16b,#64-15 // A[0][3]=A[3][3]
.inst 0xce9ed97e //xar v30.16b, v11.16b,v30.16b,#64-10 // D[1]=A[3][2]
.inst 0xce9fe8ff //xar v31.16b, v7.16b,v31.16b,#64-6 // D[2]=A[2][1]
.inst 0xce9df55d //xar v29.16b, v10.16b,v29.16b,#64-3 // D[0]=A[1][2]
////////////////////////////////////////////////// Chi+Iota
dup v31.2d,x11 // borrow C[6]
.inst 0xce22641c //bcax v28.16b, v0.16b,v2.16b,v25.16b // *
.inst 0xce3a0b21 //bcax v1.16b,v25.16b, v26.16b, v2.16b // *
.inst 0xce246842 //bcax v2.16b,v2.16b,v4.16b,v26.16b
.inst 0xce201343 //bcax v3.16b,v26.16b, v0.16b,v4.16b
.inst 0xce390084 //bcax v4.16b,v4.16b,v25.16b, v0.16b
.inst 0xce362354 //bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1]
.inst 0xce375915 //bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1]
.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b
.inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b
.inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1]
.inst 0xce271b65 //bcax v5.16b,v27.16b, v7.16b,v6.16b // *
.inst 0xce281cd9 //bcax v25.16b, v6.16b,v8.16b,v7.16b // *
.inst 0xce2920e7 //bcax v7.16b,v7.16b,v9.16b,v8.16b
.inst 0xce3b2508 //bcax v8.16b,v8.16b,v27.16b, v9.16b
.inst 0xce266d29 //bcax v9.16b,v9.16b,v6.16b,v27.16b
ld1r {v26.2d},[x10],#8
eor v0.16b,v28.16b,v31.16b // Iota
.inst 0xce2c2d5a //bcax v26.16b, v10.16b,v12.16b,v11.16b // *
.inst 0xce2d317b //bcax v27.16b, v11.16b,v13.16b,v12.16b // *
.inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b
.inst 0xce2a39ad //bcax v13.16b,v13.16b,v10.16b,v14.16b
.inst 0xce2b29ce //bcax v14.16b,v14.16b,v11.16b,v10.16b
.inst 0xce3141fc //bcax v28.16b, v15.16b,v17.16b,v16.16b // *
.inst 0xce32461d //bcax v29.16b, v16.16b,v18.16b,v17.16b // *
.inst 0xce334a31 //bcax v17.16b,v17.16b,v19.16b,v18.16b
.inst 0xce2f4e52 //bcax v18.16b,v18.16b,v15.16b,v19.16b
.inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3]
.inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3]
.inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b
.inst 0xce3e41ef //bcax v15.16b,v15.16b,v30.16b, v16.16b
.inst 0xce237a10 //bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3]
.inst 0xce36569e //bcax v30.16b, v20.16b,v22.16b,v21.16b // *
.inst 0xce375abf //bcax v31.16b, v21.16b,v23.16b,v22.16b // *
.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b
.inst 0xce3462f7 //bcax v23.16b,v23.16b,v20.16b,v24.16b
.inst 0xce355318 //bcax v24.16b,v24.16b,v21.16b,v20.16b
////////////////////////////////////////////////// Theta
.inst 0xce056806 //eor3 v6.16b,v0.16b,v5.16b,v26.16b
.inst 0xce196c2a //eor3 v10.16b,v1.16b,v25.16b,v27.16b
.inst 0xce07304b //eor3 v11.16b,v2.16b,v7.16b,v12.16b
.inst 0xce08346f //eor3 v15.16b,v3.16b,v8.16b,v13.16b
.inst 0xce093890 //eor3 v16.16b,v4.16b,v9.16b,v14.16b
.inst 0xce1c78c6 //eor3 v6.16b,v6.16b, v28.16b,v30.16b
.inst 0xce1d7d4a //eor3 v10.16b,v10.16b, v29.16b,v31.16b
.inst 0xce11596b //eor3 v11.16b,v11.16b, v17.16b,v22.16b
.inst 0xce125def //eor3 v15.16b,v15.16b, v18.16b,v23.16b
.inst 0xce136210 //eor3 v16.16b,v16.16b, v19.16b,v24.16b
.inst 0xce6b8cd4 //rax1 v20.16b,v6.16b,v11.16b // D[1]
.inst 0xce6f8d55 //rax1 v21.16b,v10.16b,v15.16b // D[2]
.inst 0xce708d6b //rax1 v11.16b,v11.16b,v16.16b // D[3]
.inst 0xce668def //rax1 v15.16b,v15.16b,v6.16b // D[4]
.inst 0xce6a8e10 //rax1 v16.16b,v16.16b,v10.16b // D[0]
////////////////////////////////////////////////// Theta+Rho+Pi
.inst 0xce945326 //xar v6.16b, v25.16b,v20.16b,#64-44 // C[0]=A[0][1]
.inst 0xce8fb139 //xar v25.16b,v9.16b,v15.16b,#64-20
.inst 0xce950ec9 //xar v9.16b,v22.16b,v21.16b,#64-61
.inst 0xce8f65d6 //xar v22.16b,v14.16b,v15.16b,#64-39
.inst 0xce90bbce //xar v14.16b,v30.16b,v16.16b,#64-18
.inst 0xce95085e //xar v30.16b,v2.16b,v21.16b,#64-62
.inst 0xce955582 //xar v2.16b,v12.16b,v21.16b,#64-43
.inst 0xce8b9dac //xar v12.16b,v13.16b,v11.16b,#64-25
.inst 0xce8fe26d //xar v13.16b,v19.16b,v15.16b,#64-8
.inst 0xce8b22f3 //xar v19.16b,v23.16b,v11.16b,#64-56
.inst 0xce905f97 //xar v23.16b,v28.16b,v16.16b,#64-41
.inst 0xce8f949c //xar v28.16b,v4.16b,v15.16b,#64-27
eor v0.16b,v0.16b,v16.16b
ldr x11,[x10],#8
.inst 0xce8bae4a //xar v10.16b, v18.16b,v11.16b,#64-21 // C[1]=A[0][3]
.inst 0xce95c632 //xar v18.16b,v17.16b,v21.16b,#64-15
.inst 0xce94db71 //xar v17.16b,v27.16b,v20.16b,#64-10
.inst 0xce95e8fb //xar v27.16b,v7.16b,v21.16b,#64-6
.inst 0xce90f747 //xar v7.16b,v26.16b,v16.16b,#64-3
.inst 0xce94fc3a //xar v26.16b,v1.16b,v20.16b,#64-1 // *
.inst 0xce8fcb04 //xar v4.16b,v24.16b,v15.16b,#64-14
.inst 0xce94fbf8 //xar v24.16b,v31.16b,v20.16b,#64-2
.inst 0xce8b251f //xar v31.16b,v8.16b,v11.16b,#64-55
.inst 0xce944fa8 //xar v8.16b,v29.16b,v20.16b,#64-45
.inst 0xce9070bd //xar v29.16b,v5.16b,v16.16b,#64-36
.inst 0xce8b906b //xar v11.16b, v3.16b,v11.16b,#64-28 // C[2]=A[1][0]
////////////////////////////////////////////////// Chi+Iota
dup v21.2d,x11 // borrow C[6]
.inst 0xce22180f //bcax v15.16b, v0.16b,v2.16b,v6.16b // *
.inst 0xce2a08c1 //bcax v1.16b,v6.16b, v10.16b, v2.16b // *
.inst 0xce242842 //bcax v2.16b,v2.16b,v4.16b,v10.16b
.inst 0xce201143 //bcax v3.16b,v10.16b, v0.16b,v4.16b
.inst 0xce260084 //bcax v4.16b,v4.16b,v6.16b, v0.16b
.inst 0xce276565 //bcax v5.16b,v11.16b, v7.16b,v25.16b // *
.inst 0xce281f26 //bcax v6.16b, v25.16b,v8.16b,v7.16b // *
.inst 0xce2920e7 //bcax v7.16b,v7.16b,v9.16b,v8.16b
.inst 0xce2b2508 //bcax v8.16b,v8.16b,v11.16b, v9.16b
.inst 0xce392d29 //bcax v9.16b,v9.16b,v25.16b,v11.16b
eor v0.16b,v15.16b,v21.16b // Iota
.inst 0xce2c6f4a //bcax v10.16b, v26.16b,v12.16b,v27.16b // *
.inst 0xce2d336b //bcax v11.16b, v27.16b,v13.16b,v12.16b // *
.inst 0xce2c7f2a //bcax v10.16b,v25.16b, v12.16b,v31.16b
.inst 0xce2d33eb //bcax v11.16b,v31.16b, v13.16b,v12.16b
.inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b
.inst 0xce3a39ad //bcax v13.16b,v13.16b,v26.16b,v14.16b
.inst 0xce3b69ce //bcax v14.16b,v14.16b,v27.16b,v26.16b
.inst 0xce3939ad //bcax v13.16b,v13.16b,v25.16b, v14.16b
.inst 0xce3f65ce //bcax v14.16b,v14.16b,v31.16b, v25.16b
.inst 0xce31778f //bcax v15.16b, v28.16b,v17.16b,v29.16b // *
.inst 0xce3247b0 //bcax v16.16b, v29.16b,v18.16b,v17.16b // *
.inst 0xce334a31 //bcax v17.16b,v17.16b,v19.16b,v18.16b
.inst 0xce3c4e52 //bcax v18.16b,v18.16b,v28.16b,v19.16b
.inst 0xce3d7273 //bcax v19.16b,v19.16b,v29.16b,v28.16b
.inst 0xce2913a7 //bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3]
.inst 0xce252488 //bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3]
.inst 0xce261529 //bcax v9.16b,v9.16b,v6.16b,v5.16b
.inst 0xce3d18a5 //bcax v5.16b,v5.16b,v29.16b, v6.16b
.inst 0xce2474c6 //bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3]
.inst 0xce207363 //bcax v3.16b,v27.16b, v0.16b,v28.16b
.inst 0xce210384 //bcax v4.16b,v28.16b, v1.16b,v0.16b
.inst 0xce220400 //bcax v0.16b,v0.16b,v2.16b,v1.16b
.inst 0xce3b0821 //bcax v1.16b,v1.16b,v27.16b, v2.16b
.inst 0xce3c6c42 //bcax v2.16b,v2.16b,v28.16b, v27.16b
eor v0.16b,v0.16b,v26.16b
.inst 0xce367fd4 //bcax v20.16b, v30.16b,v22.16b,v31.16b // *
.inst 0xce375bf5 //bcax v21.16b, v31.16b,v23.16b,v22.16b // *
.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b
.inst 0xce3e62f7 //bcax v23.16b,v23.16b,v30.16b,v24.16b
.inst 0xce3f7b18 //bcax v24.16b,v24.16b,v31.16b,v30.16b
subs x9,x9,#1
bne .Loop_ce

View file

@ -23,17 +23,12 @@ poly1305_init:
csel x0,xzr,x0,eq
b.eq .Lno_key
#ifdef __ILP32__
ldrsw x11,.LOPENSSL_armcap_P
#else
ldr x11,.LOPENSSL_armcap_P
#endif
adr x10,.LOPENSSL_armcap_P
adrp x17,OPENSSL_armcap_P
ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
ldp x7,x8,[x1] // load key
mov x9,#0xfffffffc0fffffff
movk x9,#0x0fff,lsl#48
ldr w17,[x10,x11]
#ifdef __ARMEB__
rev x7,x7 // flip bytes
rev x8,x8
@ -45,10 +40,10 @@ poly1305_init:
tst w17,#ARMV7_NEON
adr x12,poly1305_blocks
adr x7,poly1305_blocks_neon
adr x13,poly1305_emit
adr x8,poly1305_emit_neon
adr x12,.Lpoly1305_blocks
adr x7,.Lpoly1305_blocks_neon
adr x13,.Lpoly1305_emit
adr x8,.Lpoly1305_emit_neon
csel x12,x12,x7,eq
csel x13,x13,x8,eq
@ -67,6 +62,7 @@ poly1305_init:
.type poly1305_blocks,%function
.align 5
poly1305_blocks:
.Lpoly1305_blocks:
ands x2,x2,#-16
b.eq .Lno_data
@ -131,6 +127,7 @@ poly1305_blocks:
.type poly1305_emit,%function
.align 5
poly1305_emit:
.Lpoly1305_emit:
ldp x4,x5,[x0] // load hash base 2^64
ldr x6,[x0,#16]
ldp x10,x11,[x2] // load nonce
@ -225,10 +222,11 @@ poly1305_splat:
.type poly1305_blocks_neon,%function
.align 5
poly1305_blocks_neon:
.Lpoly1305_blocks_neon:
ldr x17,[x0,#24]
cmp x2,#128
b.hs .Lblocks_neon
cbz x17,poly1305_blocks
cbz x17,.Lpoly1305_blocks
.Lblocks_neon:
.inst 0xd503233f // paciasp
@ -371,7 +369,7 @@ poly1305_blocks_neon:
csel x16,x17,x16,lo
mov x4,#1
str x4,[x0,#-24] // set is_base2_26
stur x4,[x0,#-24] // set is_base2_26
sub x0,x0,#48 // restore original x0
b .Ldo_neon
@ -808,6 +806,7 @@ poly1305_blocks_neon:
.type poly1305_emit_neon,%function
.align 5
poly1305_emit_neon:
.Lpoly1305_emit_neon:
ldr x17,[x0,#24]
cbz x17,poly1305_emit
@ -860,12 +859,6 @@ poly1305_emit_neon:
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
.LOPENSSL_armcap_P:
#ifdef __ILP32__
.long OPENSSL_armcap_P-.
#else
.quad OPENSSL_armcap_P-.
#endif
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2

View file

@ -1,22 +1,18 @@
/* Do not modify. This file is auto-generated from sha1-armv8.pl. */
#include "arm_arch.h"
#ifndef __KERNEL__
# include "arm_arch.h"
.hidden OPENSSL_armcap_P
#endif
.text
.hidden OPENSSL_armcap_P
.globl sha1_block_data_order
.type sha1_block_data_order,%function
.align 6
sha1_block_data_order:
#ifdef __ILP32__
ldrsw x16,.LOPENSSL_armcap_P
#else
ldr x16,.LOPENSSL_armcap_P
#endif
adr x17,.LOPENSSL_armcap_P
add x16,x16,x17
ldr w16,[x16]
adrp x16,OPENSSL_armcap_P
ldr w16,[x16,#:lo12:OPENSSL_armcap_P]
tst w16,#ARMV8_SHA1
b.ne .Lv8_entry
@ -37,7 +33,7 @@ sha1_block_data_order:
movz w28,#0x7999
sub x2,x2,#1
movk w28,#0x5a82,lsl#16
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x3,x3,#32
#else
rev32 x3,x3
@ -45,7 +41,7 @@ sha1_block_data_order:
add w24,w24,w28 // warm it up
add w24,w24,w3
lsr x4,x3,#32
ldr x5,[x1,#-56]
ldur x5,[x1,#-56]
bic w25,w23,w21
and w26,w22,w21
ror w27,w20,#27
@ -55,7 +51,7 @@ sha1_block_data_order:
ror w21,w21,#2
add w23,w23,w4 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x5,x5,#32
#else
rev32 x5,x5
@ -70,7 +66,7 @@ sha1_block_data_order:
add w22,w22,w5 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
lsr x6,x5,#32
ldr x7,[x1,#-48]
ldur x7,[x1,#-48]
bic w25,w21,w24
and w26,w20,w24
ror w27,w23,#27
@ -80,7 +76,7 @@ sha1_block_data_order:
ror w24,w24,#2
add w21,w21,w6 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x7,x7,#32
#else
rev32 x7,x7
@ -95,7 +91,7 @@ sha1_block_data_order:
add w20,w20,w7 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
lsr x8,x7,#32
ldr x9,[x1,#-40]
ldur x9,[x1,#-40]
bic w25,w24,w22
and w26,w23,w22
ror w27,w21,#27
@ -105,7 +101,7 @@ sha1_block_data_order:
ror w22,w22,#2
add w24,w24,w8 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x9,x9,#32
#else
rev32 x9,x9
@ -120,7 +116,7 @@ sha1_block_data_order:
add w23,w23,w9 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
lsr x10,x9,#32
ldr x11,[x1,#-32]
ldur x11,[x1,#-32]
bic w25,w22,w20
and w26,w21,w20
ror w27,w24,#27
@ -130,7 +126,7 @@ sha1_block_data_order:
ror w20,w20,#2
add w22,w22,w10 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x11,x11,#32
#else
rev32 x11,x11
@ -145,7 +141,7 @@ sha1_block_data_order:
add w21,w21,w11 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
lsr x12,x11,#32
ldr x13,[x1,#-24]
ldur x13,[x1,#-24]
bic w25,w20,w23
and w26,w24,w23
ror w27,w22,#27
@ -155,7 +151,7 @@ sha1_block_data_order:
ror w23,w23,#2
add w20,w20,w12 // future e+=X[i]
add w21,w21,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x13,x13,#32
#else
rev32 x13,x13
@ -170,7 +166,7 @@ sha1_block_data_order:
add w24,w24,w13 // future e+=X[i]
add w20,w20,w25 // e+=F(b,c,d)
lsr x14,x13,#32
ldr x15,[x1,#-16]
ldur x15,[x1,#-16]
bic w25,w23,w21
and w26,w22,w21
ror w27,w20,#27
@ -180,7 +176,7 @@ sha1_block_data_order:
ror w21,w21,#2
add w23,w23,w14 // future e+=X[i]
add w24,w24,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x15,x15,#32
#else
rev32 x15,x15
@ -195,7 +191,7 @@ sha1_block_data_order:
add w22,w22,w15 // future e+=X[i]
add w23,w23,w25 // e+=F(b,c,d)
lsr x16,x15,#32
ldr x17,[x1,#-8]
ldur x17,[x1,#-8]
bic w25,w21,w24
and w26,w20,w24
ror w27,w23,#27
@ -205,7 +201,7 @@ sha1_block_data_order:
ror w24,w24,#2
add w21,w21,w16 // future e+=X[i]
add w22,w22,w25 // e+=F(b,c,d)
#ifdef __ARMEB__
#ifdef __AARCH64EB__
ror x17,x17,#32
#else
rev32 x17,x17
@ -1211,12 +1207,6 @@ sha1_block_armv8:
.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39
.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
.LOPENSSL_armcap_P:
#ifdef __ILP32__
.long OPENSSL_armcap_P-.
#else
.quad OPENSSL_armcap_P-.
#endif
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2

View file

@ -1,7 +1,7 @@
/* Do not modify. This file is auto-generated from sha512-armv8.pl. */
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// Licensed under the Apache License 2.0 (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
// in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
@ -28,6 +28,7 @@
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%)
//
// (*) Software SHA256 results are of lesser relevance, presented
// mostly for informational purposes.
@ -53,27 +54,23 @@
// deliver much less improvement, likely *negative* on Cortex-A5x.
// Which is why NEON support is limited to SHA256.]
// $output is the last argument if it looks like a file (it has an extension)
// $flavour is the first argument if it doesn't look like a file
#ifndef __KERNEL__
# include "arm_arch.h"
.hidden OPENSSL_armcap_P
#endif
.text
.hidden OPENSSL_armcap_P
.globl sha256_block_data_order
.type sha256_block_data_order,%function
.align 6
sha256_block_data_order:
#ifndef __KERNEL__
# ifdef __ILP32__
ldrsw x16,.LOPENSSL_armcap_P
# else
ldr x16,.LOPENSSL_armcap_P
# endif
adr x17,.LOPENSSL_armcap_P
add x16,x16,x17
ldr w16,[x16]
adrp x16,OPENSSL_armcap_P
ldr w16,[x16,#:lo12:OPENSSL_armcap_P]
tst w16,#ARMV8_SHA256
b.ne .Lv8_entry
tst w16,#ARMV7_NEON
@ -1064,15 +1061,6 @@ sha256_block_data_order:
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long 0 //terminator
.size .LK256,.-.LK256
#ifndef __KERNEL__
.align 3
.LOPENSSL_armcap_P:
# ifdef __ILP32__
.long OPENSSL_armcap_P-.
# else
.quad OPENSSL_armcap_P-.
# endif
#endif
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2

View file

@ -1,7 +1,7 @@
/* Do not modify. This file is auto-generated from sha512-armv8.pl. */
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// Licensed under the Apache License 2.0 (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
// in the file LICENSE in the source distribution or at
// https://www.openssl.org/source/license.html
@ -28,6 +28,7 @@
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%)
//
// (*) Software SHA256 results are of lesser relevance, presented
// mostly for informational purposes.
@ -53,27 +54,23 @@
// deliver much less improvement, likely *negative* on Cortex-A5x.
// Which is why NEON support is limited to SHA256.]
// $output is the last argument if it looks like a file (it has an extension)
// $flavour is the first argument if it doesn't look like a file
#ifndef __KERNEL__
# include "arm_arch.h"
.hidden OPENSSL_armcap_P
#endif
.text
.hidden OPENSSL_armcap_P
.globl sha512_block_data_order
.type sha512_block_data_order,%function
.align 6
sha512_block_data_order:
#ifndef __KERNEL__
# ifdef __ILP32__
ldrsw x16,.LOPENSSL_armcap_P
# else
ldr x16,.LOPENSSL_armcap_P
# endif
adr x17,.LOPENSSL_armcap_P
add x16,x16,x17
ldr w16,[x16]
adrp x16,OPENSSL_armcap_P
ldr w16,[x16,#:lo12:OPENSSL_armcap_P]
tst w16,#ARMV8_SHA512
b.ne .Lv8_entry
#endif
@ -1086,15 +1083,6 @@ sha512_block_data_order:
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.quad 0 // terminator
.size .LK512,.-.LK512
#ifndef __KERNEL__
.align 3
.LOPENSSL_armcap_P:
# ifdef __ILP32__
.long OPENSSL_armcap_P-.
# else
.quad OPENSSL_armcap_P-.
# endif
#endif
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2

View file

@ -91,12 +91,12 @@ _vpaes_consts:
.align 2
.size _vpaes_consts,.-_vpaes_consts
.align 6
##
## _aes_preheat
##
## Fills register %r10 -> .aes_consts (so you can -fPIC)
## and %xmm9-%xmm15 as specified below.
##
//
// _aes_preheat
//
// Fills register %r10 -> .aes_consts (so you can -fPIC)
// and %xmm9-%xmm15 as specified below.
//
.type _vpaes_encrypt_preheat,%function
.align 4
_vpaes_encrypt_preheat:
@ -108,21 +108,21 @@ _vpaes_encrypt_preheat:
ret
.size _vpaes_encrypt_preheat,.-_vpaes_encrypt_preheat
##
## _aes_encrypt_core
##
## AES-encrypt %xmm0.
##
## Inputs:
## %xmm0 = input
## %xmm9-%xmm15 as in _vpaes_preheat
## (%rdx) = scheduled keys
##
## Output in %xmm0
## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax
## Preserves %xmm6 - %xmm8 so you get some local vectors
##
##
//
// _aes_encrypt_core
//
// AES-encrypt %xmm0.
//
// Inputs:
// %xmm0 = input
// %xmm9-%xmm15 as in _vpaes_preheat
// (%rdx) = scheduled keys
//
// Output in %xmm0
// Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax
// Preserves %xmm6 - %xmm8 so you get some local vectors
//
//
.type _vpaes_encrypt_core,%function
.align 4
_vpaes_encrypt_core:
@ -328,11 +328,11 @@ _vpaes_decrypt_preheat:
ret
.size _vpaes_decrypt_preheat,.-_vpaes_decrypt_preheat
##
## Decryption core
##
## Same API as encryption core.
##
//
// Decryption core
//
// Same API as encryption core.
//
.type _vpaes_decrypt_core,%function
.align 4
_vpaes_decrypt_core:
@ -577,11 +577,11 @@ _vpaes_decrypt_2x:
tbl v1.16b, {v8.16b},v2.16b
ret
.size _vpaes_decrypt_2x,.-_vpaes_decrypt_2x
########################################################
## ##
## AES key schedule ##
## ##
########################################################
////////////////////////////////////////////////////////
// //
// AES key schedule //
// //
////////////////////////////////////////////////////////
.type _vpaes_key_preheat,%function
.align 4
_vpaes_key_preheat:
@ -637,14 +637,14 @@ _vpaes_schedule_core:
b.eq .Lschedule_192
// 128: fall though
##
## .schedule_128
##
## 128-bit specific part of key schedule.
##
## This schedule is really simple, because all its parts
## are accomplished by the subroutines.
##
//
// .schedule_128
//
// 128-bit specific part of key schedule.
//
// This schedule is really simple, because all its parts
// are accomplished by the subroutines.
//
.Lschedule_128:
mov x0, #10 // mov $10, %esi
@ -655,21 +655,21 @@ _vpaes_schedule_core:
bl _vpaes_schedule_mangle // write output
b .Loop_schedule_128
##
## .aes_schedule_192
##
## 192-bit specific part of key schedule.
##
## The main body of this schedule is the same as the 128-bit
## schedule, but with more smearing. The long, high side is
## stored in %xmm7 as before, and the short, low side is in
## the high bits of %xmm6.
##
## This schedule is somewhat nastier, however, because each
## round produces 192 bits of key material, or 1.5 round keys.
## Therefore, on each cycle we do 2 rounds and produce 3 round
## keys.
##
//
// .aes_schedule_192
//
// 192-bit specific part of key schedule.
//
// The main body of this schedule is the same as the 128-bit
// schedule, but with more smearing. The long, high side is
// stored in %xmm7 as before, and the short, low side is in
// the high bits of %xmm6.
//
// This schedule is somewhat nastier, however, because each
// round produces 192 bits of key material, or 1.5 round keys.
// Therefore, on each cycle we do 2 rounds and produce 3 round
// keys.
//
.align 4
.Lschedule_192:
sub x0, x0, #8
@ -693,16 +693,16 @@ _vpaes_schedule_core:
bl _vpaes_schedule_192_smear
b .Loop_schedule_192
##
## .aes_schedule_256
##
## 256-bit specific part of key schedule.
##
## The structure here is very similar to the 128-bit
## schedule, but with an additional "low side" in
## %xmm6. The low side's rounds are the same as the
## high side's, except no rcon and no rotation.
##
//
// .aes_schedule_256
//
// 256-bit specific part of key schedule.
//
// The structure here is very similar to the 128-bit
// schedule, but with an additional "low side" in
// %xmm6. The low side's rounds are the same as the
// high side's, except no rcon and no rotation.
//
.align 4
.Lschedule_256:
ld1 {v0.16b}, [x0] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
@ -729,16 +729,16 @@ _vpaes_schedule_core:
b .Loop_schedule_256
##
## .aes_schedule_mangle_last
##
## Mangler for last round of key schedule
## Mangles %xmm0
## when encrypting, outputs out(%xmm0) ^ 63
## when decrypting, outputs unskew(%xmm0)
##
## Always called right before return... jumps to cleanup and exits
##
//
// .aes_schedule_mangle_last
//
// Mangler for last round of key schedule
// Mangles %xmm0
// when encrypting, outputs out(%xmm0) ^ 63
// when decrypting, outputs unskew(%xmm0)
//
// Always called right before return... jumps to cleanup and exits
//
.align 4
.Lschedule_mangle_last:
// schedule last round key from xmm0
@ -772,20 +772,20 @@ _vpaes_schedule_core:
ret
.size _vpaes_schedule_core,.-_vpaes_schedule_core
##
## .aes_schedule_192_smear
##
## Smear the short, low side in the 192-bit key schedule.
##
## Inputs:
## %xmm7: high side, b a x y
## %xmm6: low side, d c 0 0
## %xmm13: 0
##
## Outputs:
## %xmm6: b+c+d b+c 0 0
## %xmm0: b+c+d b+c b a
##
//
// .aes_schedule_192_smear
//
// Smear the short, low side in the 192-bit key schedule.
//
// Inputs:
// %xmm7: high side, b a x y
// %xmm6: low side, d c 0 0
// %xmm13: 0
//
// Outputs:
// %xmm6: b+c+d b+c 0 0
// %xmm0: b+c+d b+c b a
//
.type _vpaes_schedule_192_smear,%function
.align 4
_vpaes_schedule_192_smear:
@ -801,24 +801,24 @@ _vpaes_schedule_192_smear:
ret
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
##
## .aes_schedule_round
##
## Runs one main round of the key schedule on %xmm0, %xmm7
##
## Specifically, runs subbytes on the high dword of %xmm0
## then rotates it by one byte and xors into the low dword of
## %xmm7.
##
## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
## next rcon.
##
## Smears the dwords of %xmm7 by xoring the low into the
## second low, result into third, result into highest.
##
## Returns results in %xmm7 = %xmm0.
## Clobbers %xmm1-%xmm4, %r11.
##
//
// .aes_schedule_round
//
// Runs one main round of the key schedule on %xmm0, %xmm7
//
// Specifically, runs subbytes on the high dword of %xmm0
// then rotates it by one byte and xors into the low dword of
// %xmm7.
//
// Adds rcon from low byte of %xmm8, then rotates %xmm8 for
// next rcon.
//
// Smears the dwords of %xmm7 by xoring the low into the
// second low, result into third, result into highest.
//
// Returns results in %xmm7 = %xmm0.
// Clobbers %xmm1-%xmm4, %r11.
//
.type _vpaes_schedule_round,%function
.align 4
_vpaes_schedule_round:
@ -866,15 +866,15 @@ _vpaes_schedule_low_round:
ret
.size _vpaes_schedule_round,.-_vpaes_schedule_round
##
## .aes_schedule_transform
##
## Linear-transform %xmm0 according to tables at (%r11)
##
## Requires that %xmm9 = 0x0F0F... as in preheat
## Output in %xmm0
## Clobbers %xmm1, %xmm2
##
//
// .aes_schedule_transform
//
// Linear-transform %xmm0 according to tables at (%r11)
//
// Requires that %xmm9 = 0x0F0F... as in preheat
// Output in %xmm0
// Clobbers %xmm1, %xmm2
//
.type _vpaes_schedule_transform,%function
.align 4
_vpaes_schedule_transform:
@ -888,29 +888,29 @@ _vpaes_schedule_transform:
ret
.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
##
## .aes_schedule_mangle
##
## Mangle xmm0 from (basis-transformed) standard version
## to our version.
##
## On encrypt,
## xor with 0x63
## multiply by circulant 0,1,1,1
## apply shiftrows transform
##
## On decrypt,
## xor with 0x63
## multiply by "inverse mixcolumns" circulant E,B,D,9
## deskew
## apply shiftrows transform
##
##
## Writes out to (%rdx), and increments or decrements it
## Keeps track of round number mod 4 in %r8
## Preserves xmm0
## Clobbers xmm1-xmm5
##
//
// .aes_schedule_mangle
//
// Mangle xmm0 from (basis-transformed) standard version
// to our version.
//
// On encrypt,
// xor with 0x63
// multiply by circulant 0,1,1,1
// apply shiftrows transform
//
// On decrypt,
// xor with 0x63
// multiply by "inverse mixcolumns" circulant E,B,D,9
// deskew
// apply shiftrows transform
//
//
// Writes out to (%rdx), and increments or decrements it
// Keeps track of round number mod 4 in %r8
// Preserves xmm0
// Clobbers xmm1-xmm5
//
.type _vpaes_schedule_mangle,%function
.align 4
_vpaes_schedule_mangle:

File diff suppressed because it is too large Load diff

View file

@ -788,3 +788,24 @@ aesni_gcm_encrypt:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -49,39 +49,47 @@ aesni_multi_cbc_encrypt:
.Lenc4x_loop_grande:
movl %edx,24(%rsp)
xorl %edx,%edx
movl -64(%rdi),%ecx
movq -80(%rdi),%r8
cmpl %edx,%ecx
movq -72(%rdi),%r12
cmovgl %ecx,%edx
testl %ecx,%ecx
movdqu -56(%rdi),%xmm2
movl %ecx,32(%rsp)
cmovleq %rsp,%r8
movl -24(%rdi),%ecx
movq -40(%rdi),%r9
cmpl %edx,%ecx
movq -32(%rdi),%r13
cmovgl %ecx,%edx
testl %ecx,%ecx
movdqu -16(%rdi),%xmm3
movl %ecx,36(%rsp)
cmovleq %rsp,%r9
movl 16(%rdi),%ecx
movq 0(%rdi),%r10
cmpl %edx,%ecx
movq 8(%rdi),%r14
cmovgl %ecx,%edx
testl %ecx,%ecx
movdqu 24(%rdi),%xmm4
movl %ecx,40(%rsp)
cmovleq %rsp,%r10
movl 56(%rdi),%ecx
movq 40(%rdi),%r11
cmpl %edx,%ecx
movq 48(%rdi),%r15
cmovgl %ecx,%edx
testl %ecx,%ecx
movdqu 64(%rdi),%xmm5
movl %ecx,44(%rsp)
cmovleq %rsp,%r11
@ -261,6 +269,7 @@ aesni_multi_cbc_encrypt:
leaq 160(%rdi),%rdi
decl %edx
jnz .Lenc4x_loop_grande
@ -331,39 +340,47 @@ aesni_multi_cbc_decrypt:
.Ldec4x_loop_grande:
movl %edx,24(%rsp)
xorl %edx,%edx
movl -64(%rdi),%ecx
movq -80(%rdi),%r8
cmpl %edx,%ecx
movq -72(%rdi),%r12
cmovgl %ecx,%edx
testl %ecx,%ecx
movdqu -56(%rdi),%xmm6
movl %ecx,32(%rsp)
cmovleq %rsp,%r8
movl -24(%rdi),%ecx
movq -40(%rdi),%r9
cmpl %edx,%ecx
movq -32(%rdi),%r13
cmovgl %ecx,%edx
testl %ecx,%ecx
movdqu -16(%rdi),%xmm7
movl %ecx,36(%rsp)
cmovleq %rsp,%r9
movl 16(%rdi),%ecx
movq 0(%rdi),%r10
cmpl %edx,%ecx
movq 8(%rdi),%r14
cmovgl %ecx,%edx
testl %ecx,%ecx
movdqu 24(%rdi),%xmm8
movl %ecx,40(%rsp)
cmovleq %rsp,%r10
movl 56(%rdi),%ecx
movq 40(%rdi),%r11
cmpl %edx,%ecx
movq 48(%rdi),%r15
cmovgl %ecx,%edx
testl %ecx,%ecx
movdqu 64(%rdi),%xmm9
movl %ecx,44(%rsp)
cmovleq %rsp,%r11
@ -599,89 +616,121 @@ _avx_cbc_enc_shortcut:
.Lenc8x_loop_grande:
xorl %edx,%edx
movl -144(%rdi),%ecx
movq -160(%rdi),%r8
cmpl %edx,%ecx
movq -152(%rdi),%rbx
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -136(%rdi),%xmm2
movl %ecx,32(%rsp)
cmovleq %rsp,%r8
subq %r8,%rbx
movq %rbx,64(%rsp)
movl -104(%rdi),%ecx
movq -120(%rdi),%r9
cmpl %edx,%ecx
movq -112(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -96(%rdi),%xmm3
movl %ecx,36(%rsp)
cmovleq %rsp,%r9
subq %r9,%rbp
movq %rbp,72(%rsp)
movl -64(%rdi),%ecx
movq -80(%rdi),%r10
cmpl %edx,%ecx
movq -72(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -56(%rdi),%xmm4
movl %ecx,40(%rsp)
cmovleq %rsp,%r10
subq %r10,%rbp
movq %rbp,80(%rsp)
movl -24(%rdi),%ecx
movq -40(%rdi),%r11
cmpl %edx,%ecx
movq -32(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -16(%rdi),%xmm5
movl %ecx,44(%rsp)
cmovleq %rsp,%r11
subq %r11,%rbp
movq %rbp,88(%rsp)
movl 16(%rdi),%ecx
movq 0(%rdi),%r12
cmpl %edx,%ecx
movq 8(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 24(%rdi),%xmm6
movl %ecx,48(%rsp)
cmovleq %rsp,%r12
subq %r12,%rbp
movq %rbp,96(%rsp)
movl 56(%rdi),%ecx
movq 40(%rdi),%r13
cmpl %edx,%ecx
movq 48(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 64(%rdi),%xmm7
movl %ecx,52(%rsp)
cmovleq %rsp,%r13
subq %r13,%rbp
movq %rbp,104(%rsp)
movl 96(%rdi),%ecx
movq 80(%rdi),%r14
cmpl %edx,%ecx
movq 88(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 104(%rdi),%xmm8
movl %ecx,56(%rsp)
cmovleq %rsp,%r14
subq %r14,%rbp
movq %rbp,112(%rsp)
movl 136(%rdi),%ecx
movq 120(%rdi),%r15
cmpl %edx,%ecx
movq 128(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 144(%rdi),%xmm9
movl %ecx,60(%rsp)
cmovleq %rsp,%r15
@ -1056,96 +1105,128 @@ _avx_cbc_dec_shortcut:
.Ldec8x_loop_grande:
xorl %edx,%edx
movl -144(%rdi),%ecx
movq -160(%rdi),%r8
cmpl %edx,%ecx
movq -152(%rdi),%rbx
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -136(%rdi),%xmm2
movl %ecx,32(%rsp)
cmovleq %rsp,%r8
subq %r8,%rbx
movq %rbx,64(%rsp)
vmovdqu %xmm2,192(%rsp)
movl -104(%rdi),%ecx
movq -120(%rdi),%r9
cmpl %edx,%ecx
movq -112(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -96(%rdi),%xmm3
movl %ecx,36(%rsp)
cmovleq %rsp,%r9
subq %r9,%rbp
movq %rbp,72(%rsp)
vmovdqu %xmm3,208(%rsp)
movl -64(%rdi),%ecx
movq -80(%rdi),%r10
cmpl %edx,%ecx
movq -72(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -56(%rdi),%xmm4
movl %ecx,40(%rsp)
cmovleq %rsp,%r10
subq %r10,%rbp
movq %rbp,80(%rsp)
vmovdqu %xmm4,224(%rsp)
movl -24(%rdi),%ecx
movq -40(%rdi),%r11
cmpl %edx,%ecx
movq -32(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu -16(%rdi),%xmm5
movl %ecx,44(%rsp)
cmovleq %rsp,%r11
subq %r11,%rbp
movq %rbp,88(%rsp)
vmovdqu %xmm5,240(%rsp)
movl 16(%rdi),%ecx
movq 0(%rdi),%r12
cmpl %edx,%ecx
movq 8(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 24(%rdi),%xmm6
movl %ecx,48(%rsp)
cmovleq %rsp,%r12
subq %r12,%rbp
movq %rbp,96(%rsp)
vmovdqu %xmm6,256(%rsp)
movl 56(%rdi),%ecx
movq 40(%rdi),%r13
cmpl %edx,%ecx
movq 48(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 64(%rdi),%xmm7
movl %ecx,52(%rsp)
cmovleq %rsp,%r13
subq %r13,%rbp
movq %rbp,104(%rsp)
vmovdqu %xmm7,272(%rsp)
movl 96(%rdi),%ecx
movq 80(%rdi),%r14
cmpl %edx,%ecx
movq 88(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 104(%rdi),%xmm8
movl %ecx,56(%rsp)
cmovleq %rsp,%r14
subq %r14,%rbp
movq %rbp,112(%rsp)
vmovdqu %xmm8,288(%rsp)
movl 136(%rdi),%ecx
movq 120(%rdi),%r15
cmpl %edx,%ecx
movq 128(%rdi),%rbp
cmovgl %ecx,%edx
testl %ecx,%ecx
vmovdqu 144(%rdi),%xmm9
movl %ecx,60(%rsp)
cmovleq %rsp,%r15
@ -1506,3 +1587,24 @@ _avx_cbc_dec_shortcut:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -3034,3 +3034,24 @@ aesni_cbc_sha1_enc_shaext:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha1_enc_shaext,.-aesni_cbc_sha1_enc_shaext
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -4434,3 +4434,24 @@ aesni_cbc_sha256_enc_shaext:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -6,6 +6,7 @@
.align 16
aesni_encrypt:
.cfi_startproc
.byte 243,15,30,250
movups (%rdi),%xmm2
movl 240(%rdx),%eax
movups (%rdx),%xmm0
@ -32,6 +33,7 @@ aesni_encrypt:
.align 16
aesni_decrypt:
.cfi_startproc
.byte 243,15,30,250
movups (%rdi),%xmm2
movl 240(%rdx),%eax
movups (%rdx),%xmm0
@ -519,6 +521,7 @@ _aesni_decrypt8:
.align 16
aesni_ecb_encrypt:
.cfi_startproc
.byte 243,15,30,250
andq $-16,%rdx
jz .Lecb_ret
@ -863,6 +866,7 @@ aesni_ecb_encrypt:
.align 16
aesni_ccm64_encrypt_blocks:
.cfi_startproc
.byte 243,15,30,250
movl 240(%rcx),%eax
movdqu (%r8),%xmm6
movdqa .Lincrement64(%rip),%xmm9
@ -928,6 +932,7 @@ aesni_ccm64_encrypt_blocks:
.align 16
aesni_ccm64_decrypt_blocks:
.cfi_startproc
.byte 243,15,30,250
movl 240(%rcx),%eax
movups (%r8),%xmm6
movdqu (%r9),%xmm3
@ -1027,6 +1032,7 @@ aesni_ccm64_decrypt_blocks:
.align 16
aesni_ctr32_encrypt_blocks:
.cfi_startproc
.byte 243,15,30,250
cmpq $1,%rdx
jne .Lctr32_bulk
@ -1605,6 +1611,7 @@ aesni_ctr32_encrypt_blocks:
.align 16
aesni_xts_encrypt:
.cfi_startproc
.byte 243,15,30,250
leaq (%rsp),%r11
.cfi_def_cfa_register %r11
pushq %rbp
@ -2075,6 +2082,7 @@ aesni_xts_encrypt:
.align 16
aesni_xts_decrypt:
.cfi_startproc
.byte 243,15,30,250
leaq (%rsp),%r11
.cfi_def_cfa_register %r11
pushq %rbp
@ -2582,6 +2590,7 @@ aesni_xts_decrypt:
.align 32
aesni_ocb_encrypt:
.cfi_startproc
.byte 243,15,30,250
leaq (%rsp),%rax
pushq %rbx
.cfi_adjust_cfa_offset 8
@ -3009,6 +3018,7 @@ __ocb_encrypt1:
.align 32
aesni_ocb_decrypt:
.cfi_startproc
.byte 243,15,30,250
leaq (%rsp),%rax
pushq %rbx
.cfi_adjust_cfa_offset 8
@ -3446,6 +3456,7 @@ __ocb_decrypt1:
.align 16
aesni_cbc_encrypt:
.cfi_startproc
.byte 243,15,30,250
testq %rdx,%rdx
jz .Lcbc_ret
@ -4473,3 +4484,24 @@ __aesni_set_encrypt_key:
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

File diff suppressed because it is too large Load diff

View file

@ -2192,3 +2192,24 @@ ChaCha20_8x:
.byte 0xf3,0xc3
.cfi_endproc
.size ChaCha20_8x,.-ChaCha20_8x
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -1669,6 +1669,7 @@ Camellia_Ekeygen:
.align 16
Camellia_cbc_encrypt:
.cfi_startproc
.byte 243,15,30,250
cmpq $0,%rdx
je .Lcbc_abort
pushq %rbx
@ -1923,3 +1924,24 @@ Camellia_cbc_encrypt:
.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54,95,54,52,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -1036,3 +1036,24 @@ padlock_ctr32_encrypt:
.align 8
.Lpadlock_saved_context:
.quad 0
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -7342,3 +7342,24 @@ ecp_nistz256_point_add_affinex:
.byte 0xf3,0xc3
.cfi_endproc
.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -7,6 +7,7 @@
.align 16
gcm_gmult_4bit:
.cfi_startproc
.byte 243,15,30,250
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
@ -118,6 +119,7 @@ gcm_gmult_4bit:
.align 16
gcm_ghash_4bit:
.cfi_startproc
.byte 243,15,30,250
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
@ -865,6 +867,7 @@ gcm_init_clmul:
.align 16
gcm_gmult_clmul:
.cfi_startproc
.byte 243,15,30,250
.L_gmult_clmul:
movdqu (%rdi),%xmm0
movdqa .Lbswap_mask(%rip),%xmm5
@ -918,6 +921,7 @@ gcm_gmult_clmul:
.align 32
gcm_ghash_clmul:
.cfi_startproc
.byte 243,15,30,250
.L_ghash_clmul:
movdqa .Lbswap_mask(%rip),%xmm10
@ -1412,6 +1416,7 @@ gcm_init_avx:
.align 32
gcm_gmult_avx:
.cfi_startproc
.byte 243,15,30,250
jmp .L_gmult_clmul
.cfi_endproc
.size gcm_gmult_avx,.-gcm_gmult_avx
@ -1420,6 +1425,7 @@ gcm_gmult_avx:
.align 32
gcm_ghash_avx:
.cfi_startproc
.byte 243,15,30,250
vzeroupper
vmovdqu (%rdi),%xmm10
@ -1846,3 +1852,24 @@ gcm_ghash_avx:
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -523,3 +523,24 @@ iotas:
.quad 0x8000000080008008
.size iotas,.-iotas
.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -2,9 +2,9 @@
.text
.align 16
.globl md5_block_asm_data_order
.type md5_block_asm_data_order,@function
md5_block_asm_data_order:
.globl ossl_md5_block_asm_data_order
.type ossl_md5_block_asm_data_order,@function
ossl_md5_block_asm_data_order:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
@ -681,4 +681,25 @@ md5_block_asm_data_order:
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size md5_block_asm_data_order,.-md5_block_asm_data_order
.size ossl_md5_block_asm_data_order,.-ossl_md5_block_asm_data_order
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -2067,3 +2067,24 @@ xor128_decrypt_n_pad:
.byte 0xf3,0xc3
.cfi_endproc
.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -1280,3 +1280,24 @@ rc4_md5_enc:
.byte 0xf3,0xc3
.cfi_endproc
.size rc4_md5_enc,.-rc4_md5_enc
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -7,6 +7,7 @@
.align 16
RC4:
.cfi_startproc
.byte 243,15,30,250
orq %rsi,%rsi
jne .Lentry
.byte 0xf3,0xc3
@ -536,6 +537,7 @@ RC4:
.align 16
RC4_set_key:
.cfi_startproc
.byte 243,15,30,250
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
@ -610,6 +612,7 @@ RC4_set_key:
.align 16
RC4_options:
.cfi_startproc
.byte 243,15,30,250
leaq .Lopts(%rip),%rax
movl OPENSSL_ia32cap_P(%rip),%edx
btl $20,%edx
@ -631,3 +634,24 @@ RC4_options:
.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.size RC4_options,.-RC4_options
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -1743,3 +1743,24 @@ rsaz_avx2_eligible:
.long 2,2,2,2, 3,3,3,3
.long 4,4,4,4, 4,4,4,4
.align 64
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -0,0 +1,902 @@
/* Do not modify. This file is auto-generated from rsaz-avx512.pl. */
.globl ossl_rsaz_avx512ifma_eligible
.type ossl_rsaz_avx512ifma_eligible,@function
.align 32
ossl_rsaz_avx512ifma_eligible:
movl OPENSSL_ia32cap_P+8(%rip),%ecx
xorl %eax,%eax
andl $2149777408,%ecx
cmpl $2149777408,%ecx
cmovel %ecx,%eax
.byte 0xf3,0xc3
.size ossl_rsaz_avx512ifma_eligible, .-ossl_rsaz_avx512ifma_eligible
.text
.globl ossl_rsaz_amm52x20_x1_256
.type ossl_rsaz_amm52x20_x1_256,@function
.align 32
ossl_rsaz_amm52x20_x1_256:
.cfi_startproc
.byte 243,15,30,250
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
.Lrsaz_amm52x20_x1_256_body:
vpxord %ymm0,%ymm0,%ymm0
vmovdqa64 %ymm0,%ymm1
vmovdqa64 %ymm0,%ymm16
vmovdqa64 %ymm0,%ymm17
vmovdqa64 %ymm0,%ymm18
vmovdqa64 %ymm0,%ymm19
xorl %r9d,%r9d
movq %rdx,%r11
movq $0xfffffffffffff,%rax
movl $5,%ebx
.align 32
.Lloop5:
movq 0(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq %r8,%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
movq 8(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq %r8,%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
movq 16(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq %r8,%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
movq 24(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq %r8,%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
leaq 32(%r11),%r11
decl %ebx
jne .Lloop5
vmovdqa64 .Lmask52x4(%rip),%ymm4
vpbroadcastq %r9,%ymm3
vpblendd $3,%ymm3,%ymm1,%ymm1
vpsrlq $52,%ymm1,%ymm24
vpsrlq $52,%ymm16,%ymm25
vpsrlq $52,%ymm17,%ymm26
vpsrlq $52,%ymm18,%ymm27
vpsrlq $52,%ymm19,%ymm28
valignq $3,%ymm27,%ymm28,%ymm28
valignq $3,%ymm26,%ymm27,%ymm27
valignq $3,%ymm25,%ymm26,%ymm26
valignq $3,%ymm24,%ymm25,%ymm25
valignq $3,%ymm0,%ymm24,%ymm24
vpandq %ymm4,%ymm1,%ymm1
vpandq %ymm4,%ymm16,%ymm16
vpandq %ymm4,%ymm17,%ymm17
vpandq %ymm4,%ymm18,%ymm18
vpandq %ymm4,%ymm19,%ymm19
vpaddq %ymm24,%ymm1,%ymm1
vpaddq %ymm25,%ymm16,%ymm16
vpaddq %ymm26,%ymm17,%ymm17
vpaddq %ymm27,%ymm18,%ymm18
vpaddq %ymm28,%ymm19,%ymm19
vpcmpuq $1,%ymm1,%ymm4,%k1
vpcmpuq $1,%ymm16,%ymm4,%k2
vpcmpuq $1,%ymm17,%ymm4,%k3
vpcmpuq $1,%ymm18,%ymm4,%k4
vpcmpuq $1,%ymm19,%ymm4,%k5
kmovb %k1,%r14d
kmovb %k2,%r13d
kmovb %k3,%r12d
kmovb %k4,%r11d
kmovb %k5,%r10d
vpcmpuq $0,%ymm1,%ymm4,%k1
vpcmpuq $0,%ymm16,%ymm4,%k2
vpcmpuq $0,%ymm17,%ymm4,%k3
vpcmpuq $0,%ymm18,%ymm4,%k4
vpcmpuq $0,%ymm19,%ymm4,%k5
kmovb %k1,%r9d
kmovb %k2,%r8d
kmovb %k3,%ebx
kmovb %k4,%ecx
kmovb %k5,%edx
shlb $4,%r13b
orb %r13b,%r14b
shlb $4,%r11b
orb %r11b,%r12b
addb %r14b,%r14b
adcb %r12b,%r12b
adcb %r10b,%r10b
shlb $4,%r8b
orb %r8b,%r9b
shlb $4,%cl
orb %cl,%bl
addb %r9b,%r14b
adcb %bl,%r12b
adcb %dl,%r10b
xorb %r9b,%r14b
xorb %bl,%r12b
xorb %dl,%r10b
kmovb %r14d,%k1
shrb $4,%r14b
kmovb %r14d,%k2
kmovb %r12d,%k3
shrb $4,%r12b
kmovb %r12d,%k4
kmovb %r10d,%k5
vpsubq %ymm4,%ymm1,%ymm1{%k1}
vpsubq %ymm4,%ymm16,%ymm16{%k2}
vpsubq %ymm4,%ymm17,%ymm17{%k3}
vpsubq %ymm4,%ymm18,%ymm18{%k4}
vpsubq %ymm4,%ymm19,%ymm19{%k5}
vpandq %ymm4,%ymm1,%ymm1
vpandq %ymm4,%ymm16,%ymm16
vpandq %ymm4,%ymm17,%ymm17
vpandq %ymm4,%ymm18,%ymm18
vpandq %ymm4,%ymm19,%ymm19
vmovdqu64 %ymm1,(%rdi)
vmovdqu64 %ymm16,32(%rdi)
vmovdqu64 %ymm17,64(%rdi)
vmovdqu64 %ymm18,96(%rdi)
vmovdqu64 %ymm19,128(%rdi)
vzeroupper
movq 0(%rsp),%r15
.cfi_restore %r15
movq 8(%rsp),%r14
.cfi_restore %r14
movq 16(%rsp),%r13
.cfi_restore %r13
movq 24(%rsp),%r12
.cfi_restore %r12
movq 32(%rsp),%rbp
.cfi_restore %rbp
movq 40(%rsp),%rbx
.cfi_restore %rbx
leaq 48(%rsp),%rsp
.cfi_adjust_cfa_offset -48
.Lrsaz_amm52x20_x1_256_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size ossl_rsaz_amm52x20_x1_256, .-ossl_rsaz_amm52x20_x1_256
.data
.align 32
.Lmask52x4:
.quad 0xfffffffffffff
.quad 0xfffffffffffff
.quad 0xfffffffffffff
.quad 0xfffffffffffff
.text
.globl ossl_rsaz_amm52x20_x2_256
.type ossl_rsaz_amm52x20_x2_256,@function
.align 32
ossl_rsaz_amm52x20_x2_256:
.cfi_startproc
.byte 243,15,30,250
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
.Lrsaz_amm52x20_x2_256_body:
vpxord %ymm0,%ymm0,%ymm0
vmovdqa64 %ymm0,%ymm1
vmovdqa64 %ymm0,%ymm16
vmovdqa64 %ymm0,%ymm17
vmovdqa64 %ymm0,%ymm18
vmovdqa64 %ymm0,%ymm19
vmovdqa64 %ymm0,%ymm2
vmovdqa64 %ymm0,%ymm20
vmovdqa64 %ymm0,%ymm21
vmovdqa64 %ymm0,%ymm22
vmovdqa64 %ymm0,%ymm23
xorl %r9d,%r9d
xorl %r15d,%r15d
movq %rdx,%r11
movq $0xfffffffffffff,%rax
movl $20,%ebx
.align 32
.Lloop20:
movq 0(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 0(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
movq %r12,%r10
adcq $0,%r10
movq (%r8),%r13
imulq %r9,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 0(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r9
adcq %r12,%r10
shrq $52,%r9
salq $12,%r10
orq %r10,%r9
vpmadd52luq 0(%rsi),%ymm3,%ymm1
vpmadd52luq 32(%rsi),%ymm3,%ymm16
vpmadd52luq 64(%rsi),%ymm3,%ymm17
vpmadd52luq 96(%rsi),%ymm3,%ymm18
vpmadd52luq 128(%rsi),%ymm3,%ymm19
vpmadd52luq 0(%rcx),%ymm4,%ymm1
vpmadd52luq 32(%rcx),%ymm4,%ymm16
vpmadd52luq 64(%rcx),%ymm4,%ymm17
vpmadd52luq 96(%rcx),%ymm4,%ymm18
vpmadd52luq 128(%rcx),%ymm4,%ymm19
valignq $1,%ymm1,%ymm16,%ymm1
valignq $1,%ymm16,%ymm17,%ymm16
valignq $1,%ymm17,%ymm18,%ymm17
valignq $1,%ymm18,%ymm19,%ymm18
valignq $1,%ymm19,%ymm0,%ymm19
vmovq %xmm1,%r13
addq %r13,%r9
vpmadd52huq 0(%rsi),%ymm3,%ymm1
vpmadd52huq 32(%rsi),%ymm3,%ymm16
vpmadd52huq 64(%rsi),%ymm3,%ymm17
vpmadd52huq 96(%rsi),%ymm3,%ymm18
vpmadd52huq 128(%rsi),%ymm3,%ymm19
vpmadd52huq 0(%rcx),%ymm4,%ymm1
vpmadd52huq 32(%rcx),%ymm4,%ymm16
vpmadd52huq 64(%rcx),%ymm4,%ymm17
vpmadd52huq 96(%rcx),%ymm4,%ymm18
vpmadd52huq 128(%rcx),%ymm4,%ymm19
movq 160(%r11),%r13
vpbroadcastq %r13,%ymm3
movq 160(%rsi),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r15
movq %r12,%r10
adcq $0,%r10
movq 8(%r8),%r13
imulq %r15,%r13
andq %rax,%r13
vpbroadcastq %r13,%ymm4
movq 160(%rcx),%rdx
mulxq %r13,%r13,%r12
addq %r13,%r15
adcq %r12,%r10
shrq $52,%r15
salq $12,%r10
orq %r10,%r15
vpmadd52luq 160(%rsi),%ymm3,%ymm2
vpmadd52luq 192(%rsi),%ymm3,%ymm20
vpmadd52luq 224(%rsi),%ymm3,%ymm21
vpmadd52luq 256(%rsi),%ymm3,%ymm22
vpmadd52luq 288(%rsi),%ymm3,%ymm23
vpmadd52luq 160(%rcx),%ymm4,%ymm2
vpmadd52luq 192(%rcx),%ymm4,%ymm20
vpmadd52luq 224(%rcx),%ymm4,%ymm21
vpmadd52luq 256(%rcx),%ymm4,%ymm22
vpmadd52luq 288(%rcx),%ymm4,%ymm23
valignq $1,%ymm2,%ymm20,%ymm2
valignq $1,%ymm20,%ymm21,%ymm20
valignq $1,%ymm21,%ymm22,%ymm21
valignq $1,%ymm22,%ymm23,%ymm22
valignq $1,%ymm23,%ymm0,%ymm23
vmovq %xmm2,%r13
addq %r13,%r15
vpmadd52huq 160(%rsi),%ymm3,%ymm2
vpmadd52huq 192(%rsi),%ymm3,%ymm20
vpmadd52huq 224(%rsi),%ymm3,%ymm21
vpmadd52huq 256(%rsi),%ymm3,%ymm22
vpmadd52huq 288(%rsi),%ymm3,%ymm23
vpmadd52huq 160(%rcx),%ymm4,%ymm2
vpmadd52huq 192(%rcx),%ymm4,%ymm20
vpmadd52huq 224(%rcx),%ymm4,%ymm21
vpmadd52huq 256(%rcx),%ymm4,%ymm22
vpmadd52huq 288(%rcx),%ymm4,%ymm23
leaq 8(%r11),%r11
decl %ebx
jne .Lloop20
vmovdqa64 .Lmask52x4(%rip),%ymm4
vpbroadcastq %r9,%ymm3
vpblendd $3,%ymm3,%ymm1,%ymm1
vpsrlq $52,%ymm1,%ymm24
vpsrlq $52,%ymm16,%ymm25
vpsrlq $52,%ymm17,%ymm26
vpsrlq $52,%ymm18,%ymm27
vpsrlq $52,%ymm19,%ymm28
valignq $3,%ymm27,%ymm28,%ymm28
valignq $3,%ymm26,%ymm27,%ymm27
valignq $3,%ymm25,%ymm26,%ymm26
valignq $3,%ymm24,%ymm25,%ymm25
valignq $3,%ymm0,%ymm24,%ymm24
vpandq %ymm4,%ymm1,%ymm1
vpandq %ymm4,%ymm16,%ymm16
vpandq %ymm4,%ymm17,%ymm17
vpandq %ymm4,%ymm18,%ymm18
vpandq %ymm4,%ymm19,%ymm19
vpaddq %ymm24,%ymm1,%ymm1
vpaddq %ymm25,%ymm16,%ymm16
vpaddq %ymm26,%ymm17,%ymm17
vpaddq %ymm27,%ymm18,%ymm18
vpaddq %ymm28,%ymm19,%ymm19
vpcmpuq $1,%ymm1,%ymm4,%k1
vpcmpuq $1,%ymm16,%ymm4,%k2
vpcmpuq $1,%ymm17,%ymm4,%k3
vpcmpuq $1,%ymm18,%ymm4,%k4
vpcmpuq $1,%ymm19,%ymm4,%k5
kmovb %k1,%r14d
kmovb %k2,%r13d
kmovb %k3,%r12d
kmovb %k4,%r11d
kmovb %k5,%r10d
vpcmpuq $0,%ymm1,%ymm4,%k1
vpcmpuq $0,%ymm16,%ymm4,%k2
vpcmpuq $0,%ymm17,%ymm4,%k3
vpcmpuq $0,%ymm18,%ymm4,%k4
vpcmpuq $0,%ymm19,%ymm4,%k5
kmovb %k1,%r9d
kmovb %k2,%r8d
kmovb %k3,%ebx
kmovb %k4,%ecx
kmovb %k5,%edx
shlb $4,%r13b
orb %r13b,%r14b
shlb $4,%r11b
orb %r11b,%r12b
addb %r14b,%r14b
adcb %r12b,%r12b
adcb %r10b,%r10b
shlb $4,%r8b
orb %r8b,%r9b
shlb $4,%cl
orb %cl,%bl
addb %r9b,%r14b
adcb %bl,%r12b
adcb %dl,%r10b
xorb %r9b,%r14b
xorb %bl,%r12b
xorb %dl,%r10b
kmovb %r14d,%k1
shrb $4,%r14b
kmovb %r14d,%k2
kmovb %r12d,%k3
shrb $4,%r12b
kmovb %r12d,%k4
kmovb %r10d,%k5
vpsubq %ymm4,%ymm1,%ymm1{%k1}
vpsubq %ymm4,%ymm16,%ymm16{%k2}
vpsubq %ymm4,%ymm17,%ymm17{%k3}
vpsubq %ymm4,%ymm18,%ymm18{%k4}
vpsubq %ymm4,%ymm19,%ymm19{%k5}
vpandq %ymm4,%ymm1,%ymm1
vpandq %ymm4,%ymm16,%ymm16
vpandq %ymm4,%ymm17,%ymm17
vpandq %ymm4,%ymm18,%ymm18
vpandq %ymm4,%ymm19,%ymm19
vpbroadcastq %r15,%ymm3
vpblendd $3,%ymm3,%ymm2,%ymm2
vpsrlq $52,%ymm2,%ymm24
vpsrlq $52,%ymm20,%ymm25
vpsrlq $52,%ymm21,%ymm26
vpsrlq $52,%ymm22,%ymm27
vpsrlq $52,%ymm23,%ymm28
valignq $3,%ymm27,%ymm28,%ymm28
valignq $3,%ymm26,%ymm27,%ymm27
valignq $3,%ymm25,%ymm26,%ymm26
valignq $3,%ymm24,%ymm25,%ymm25
valignq $3,%ymm0,%ymm24,%ymm24
vpandq %ymm4,%ymm2,%ymm2
vpandq %ymm4,%ymm20,%ymm20
vpandq %ymm4,%ymm21,%ymm21
vpandq %ymm4,%ymm22,%ymm22
vpandq %ymm4,%ymm23,%ymm23
vpaddq %ymm24,%ymm2,%ymm2
vpaddq %ymm25,%ymm20,%ymm20
vpaddq %ymm26,%ymm21,%ymm21
vpaddq %ymm27,%ymm22,%ymm22
vpaddq %ymm28,%ymm23,%ymm23
vpcmpuq $1,%ymm2,%ymm4,%k1
vpcmpuq $1,%ymm20,%ymm4,%k2
vpcmpuq $1,%ymm21,%ymm4,%k3
vpcmpuq $1,%ymm22,%ymm4,%k4
vpcmpuq $1,%ymm23,%ymm4,%k5
kmovb %k1,%r14d
kmovb %k2,%r13d
kmovb %k3,%r12d
kmovb %k4,%r11d
kmovb %k5,%r10d
vpcmpuq $0,%ymm2,%ymm4,%k1
vpcmpuq $0,%ymm20,%ymm4,%k2
vpcmpuq $0,%ymm21,%ymm4,%k3
vpcmpuq $0,%ymm22,%ymm4,%k4
vpcmpuq $0,%ymm23,%ymm4,%k5
kmovb %k1,%r9d
kmovb %k2,%r8d
kmovb %k3,%ebx
kmovb %k4,%ecx
kmovb %k5,%edx
shlb $4,%r13b
orb %r13b,%r14b
shlb $4,%r11b
orb %r11b,%r12b
addb %r14b,%r14b
adcb %r12b,%r12b
adcb %r10b,%r10b
shlb $4,%r8b
orb %r8b,%r9b
shlb $4,%cl
orb %cl,%bl
addb %r9b,%r14b
adcb %bl,%r12b
adcb %dl,%r10b
xorb %r9b,%r14b
xorb %bl,%r12b
xorb %dl,%r10b
kmovb %r14d,%k1
shrb $4,%r14b
kmovb %r14d,%k2
kmovb %r12d,%k3
shrb $4,%r12b
kmovb %r12d,%k4
kmovb %r10d,%k5
vpsubq %ymm4,%ymm2,%ymm2{%k1}
vpsubq %ymm4,%ymm20,%ymm20{%k2}
vpsubq %ymm4,%ymm21,%ymm21{%k3}
vpsubq %ymm4,%ymm22,%ymm22{%k4}
vpsubq %ymm4,%ymm23,%ymm23{%k5}
vpandq %ymm4,%ymm2,%ymm2
vpandq %ymm4,%ymm20,%ymm20
vpandq %ymm4,%ymm21,%ymm21
vpandq %ymm4,%ymm22,%ymm22
vpandq %ymm4,%ymm23,%ymm23
vmovdqu64 %ymm1,(%rdi)
vmovdqu64 %ymm16,32(%rdi)
vmovdqu64 %ymm17,64(%rdi)
vmovdqu64 %ymm18,96(%rdi)
vmovdqu64 %ymm19,128(%rdi)
vmovdqu64 %ymm2,160(%rdi)
vmovdqu64 %ymm20,192(%rdi)
vmovdqu64 %ymm21,224(%rdi)
vmovdqu64 %ymm22,256(%rdi)
vmovdqu64 %ymm23,288(%rdi)
vzeroupper
movq 0(%rsp),%r15
.cfi_restore %r15
movq 8(%rsp),%r14
.cfi_restore %r14
movq 16(%rsp),%r13
.cfi_restore %r13
movq 24(%rsp),%r12
.cfi_restore %r12
movq 32(%rsp),%rbp
.cfi_restore %rbp
movq 40(%rsp),%rbx
.cfi_restore %rbx
leaq 48(%rsp),%rsp
.cfi_adjust_cfa_offset -48
.Lrsaz_amm52x20_x2_256_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size ossl_rsaz_amm52x20_x2_256, .-ossl_rsaz_amm52x20_x2_256
.text
.align 32
.globl ossl_extract_multiplier_2x20_win5
.type ossl_extract_multiplier_2x20_win5,@function
ossl_extract_multiplier_2x20_win5:
.cfi_startproc
.byte 243,15,30,250
leaq (%rcx,%rcx,4),%rax
salq $5,%rax
addq %rax,%rsi
vmovdqa64 .Lones(%rip),%ymm23
vpbroadcastq %rdx,%ymm22
leaq 10240(%rsi),%rax
vpxor %xmm4,%xmm4,%xmm4
vmovdqa64 %ymm4,%ymm3
vmovdqa64 %ymm4,%ymm2
vmovdqa64 %ymm4,%ymm1
vmovdqa64 %ymm4,%ymm0
vmovdqa64 %ymm4,%ymm21
.align 32
.Lloop:
vpcmpq $0,%ymm21,%ymm22,%k1
addq $320,%rsi
vpaddq %ymm23,%ymm21,%ymm21
vmovdqu64 -320(%rsi),%ymm16
vmovdqu64 -288(%rsi),%ymm17
vmovdqu64 -256(%rsi),%ymm18
vmovdqu64 -224(%rsi),%ymm19
vmovdqu64 -192(%rsi),%ymm20
vpblendmq %ymm16,%ymm0,%ymm0{%k1}
vpblendmq %ymm17,%ymm1,%ymm1{%k1}
vpblendmq %ymm18,%ymm2,%ymm2{%k1}
vpblendmq %ymm19,%ymm3,%ymm3{%k1}
vpblendmq %ymm20,%ymm4,%ymm4{%k1}
cmpq %rsi,%rax
jne .Lloop
vmovdqu64 %ymm0,(%rdi)
vmovdqu64 %ymm1,32(%rdi)
vmovdqu64 %ymm2,64(%rdi)
vmovdqu64 %ymm3,96(%rdi)
vmovdqu64 %ymm4,128(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size ossl_extract_multiplier_2x20_win5, .-ossl_extract_multiplier_2x20_win5
.data
.align 32
.Lones:
.quad 1,1,1,1
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -2014,3 +2014,24 @@ rsaz_512_gather4:
.Linc:
.long 0,0, 1,1
.long 2,2, 2,2
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -30,28 +30,36 @@ sha1_multi_block:
.Loop_grande:
movl %edx,280(%rsp)
xorl %edx,%edx
movq 0(%rsi),%r8
movl 8(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,0(%rbx)
cmovleq %rbp,%r8
movq 16(%rsi),%r9
movl 24(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,4(%rbx)
cmovleq %rbp,%r9
movq 32(%rsi),%r10
movl 40(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,8(%rbx)
cmovleq %rbp,%r10
movq 48(%rsi),%r11
movl 56(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
@ -2586,14 +2594,18 @@ _shaext_shortcut:
.Loop_grande_shaext:
movl %edx,280(%rsp)
xorl %edx,%edx
movq 0(%rsi),%r8
movl 8(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,0(%rbx)
cmovleq %rsp,%r8
movq 16(%rsi),%r9
movl 24(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
@ -2969,28 +2981,36 @@ _avx_shortcut:
.Loop_grande_avx:
movl %edx,280(%rsp)
xorl %edx,%edx
movq 0(%rsi),%r8
movl 8(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,0(%rbx)
cmovleq %rbp,%r8
movq 16(%rsi),%r9
movl 24(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,4(%rbx)
cmovleq %rbp,%r9
movq 32(%rsi),%r10
movl 40(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,8(%rbx)
cmovleq %rbp,%r10
movq 48(%rsi),%r11
movl 56(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
@ -5054,56 +5074,72 @@ _avx2_shortcut:
movl %edx,552(%rsp)
xorl %edx,%edx
leaq 512(%rsp),%rbx
movq 0(%rsi),%r12
movl 8(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,0(%rbx)
cmovleq %rbp,%r12
movq 16(%rsi),%r13
movl 24(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,4(%rbx)
cmovleq %rbp,%r13
movq 32(%rsi),%r14
movl 40(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,8(%rbx)
cmovleq %rbp,%r14
movq 48(%rsi),%r15
movl 56(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,12(%rbx)
cmovleq %rbp,%r15
movq 64(%rsi),%r8
movl 72(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,16(%rbx)
cmovleq %rbp,%r8
movq 80(%rsi),%r9
movl 88(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,20(%rbx)
cmovleq %rbp,%r9
movq 96(%rsi),%r10
movl 104(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,24(%rbx)
cmovleq %rbp,%r10
movq 112(%rsi),%r11
movl 120(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
@ -7266,3 +7302,24 @@ K_XX_XX:
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
.byte 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -5449,3 +5449,24 @@ K_XX_XX:
.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -31,28 +31,36 @@ sha256_multi_block:
.Loop_grande:
movl %edx,280(%rsp)
xorl %edx,%edx
movq 0(%rsi),%r8
movl 8(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,0(%rbx)
cmovleq %rbp,%r8
movq 16(%rsi),%r9
movl 24(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,4(%rbx)
cmovleq %rbp,%r9
movq 32(%rsi),%r10
movl 40(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,8(%rbx)
cmovleq %rbp,%r10
movq 48(%rsi),%r11
movl 56(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
@ -2655,14 +2663,18 @@ _shaext_shortcut:
.Loop_grande_shaext:
movl %edx,280(%rsp)
xorl %edx,%edx
movq 0(%rsi),%r8
movl 8(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,0(%rbx)
cmovleq %rsp,%r8
movq 16(%rsi),%r9
movl 24(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
@ -3157,28 +3169,36 @@ _avx_shortcut:
.Loop_grande_avx:
movl %edx,280(%rsp)
xorl %edx,%edx
movq 0(%rsi),%r8
movl 8(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,0(%rbx)
cmovleq %rbp,%r8
movq 16(%rsi),%r9
movl 24(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,4(%rbx)
cmovleq %rbp,%r9
movq 32(%rsi),%r10
movl 40(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,8(%rbx)
cmovleq %rbp,%r10
movq 48(%rsi),%r11
movl 56(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
@ -5420,56 +5440,72 @@ _avx2_shortcut:
movl %edx,552(%rsp)
xorl %edx,%edx
leaq 512(%rsp),%rbx
movq 0(%rsi),%r12
movl 8(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,0(%rbx)
cmovleq %rbp,%r12
movq 16(%rsi),%r13
movl 24(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,4(%rbx)
cmovleq %rbp,%r13
movq 32(%rsi),%r14
movl 40(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,8(%rbx)
cmovleq %rbp,%r14
movq 48(%rsi),%r15
movl 56(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,12(%rbx)
cmovleq %rbp,%r15
movq 64(%rsi),%r8
movl 72(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,16(%rbx)
cmovleq %rbp,%r8
movq 80(%rsi),%r9
movl 88(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,20(%rbx)
cmovleq %rbp,%r9
movq 96(%rsi),%r10
movl 104(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
testl %ecx,%ecx
movl %ecx,24(%rbx)
cmovleq %rbp,%r10
movq 112(%rsi),%r11
movl 120(%rsi),%ecx
cmpl %edx,%ecx
cmovgl %ecx,%edx
@ -7947,3 +7983,24 @@ K256_shaext:
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.byte 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -5455,3 +5455,24 @@ sha256_block_data_order_avx2:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_block_data_order_avx2,.-sha256_block_data_order_avx2
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -5460,3 +5460,24 @@ sha512_block_data_order_avx2:
.byte 0xf3,0xc3
.cfi_endproc
.size sha512_block_data_order_avx2,.-sha512_block_data_order_avx2
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -624,6 +624,7 @@ _vpaes_schedule_mangle:
.align 16
vpaes_set_encrypt_key:
.cfi_startproc
.byte 243,15,30,250
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
@ -642,6 +643,7 @@ vpaes_set_encrypt_key:
.align 16
vpaes_set_decrypt_key:
.cfi_startproc
.byte 243,15,30,250
movl %esi,%eax
shrl $5,%eax
addl $5,%eax
@ -665,6 +667,7 @@ vpaes_set_decrypt_key:
.align 16
vpaes_encrypt:
.cfi_startproc
.byte 243,15,30,250
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_encrypt_core
@ -678,6 +681,7 @@ vpaes_encrypt:
.align 16
vpaes_decrypt:
.cfi_startproc
.byte 243,15,30,250
movdqu (%rdi),%xmm0
call _vpaes_preheat
call _vpaes_decrypt_core
@ -690,6 +694,7 @@ vpaes_decrypt:
.align 16
vpaes_cbc_encrypt:
.cfi_startproc
.byte 243,15,30,250
xchgq %rcx,%rdx
subq $16,%rcx
jc .Lcbc_abort
@ -852,3 +857,24 @@ _vpaes_consts:
.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
.align 64
.size _vpaes_consts,.-_vpaes_consts
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -878,3 +878,24 @@ whirlpool_block:
.byte 228,39,65,139,167,125,149,216
.byte 251,238,124,102,221,23,71,158
.byte 202,45,191,7,173,90,131,51
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -801,3 +801,24 @@ x25519_fe64_tobytes:
.cfi_endproc
.size x25519_fe64_tobytes,.-x25519_fe64_tobytes
.byte 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -310,3 +310,24 @@ bn_GF2m_mul_2x2:
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -1238,3 +1238,24 @@ bn_mulx4x_mont:
.size bn_mulx4x_mont,.-bn_mulx4x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -3602,3 +3602,24 @@ bn_gather5:
.long 0,0, 1,1
.long 2,2, 2,2
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -13,6 +13,8 @@
.type OPENSSL_atomic_add,@function
.align 16
OPENSSL_atomic_add:
.cfi_startproc
.byte 243,15,30,250
movl (%rdi),%eax
.Lspin: leaq (%rsi,%rax,1),%r8
.byte 0xf0
@ -21,16 +23,20 @@ OPENSSL_atomic_add:
movl %r8d,%eax
.byte 0x48,0x98
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
.globl OPENSSL_rdtsc
.type OPENSSL_rdtsc,@function
.align 16
OPENSSL_rdtsc:
.cfi_startproc
.byte 243,15,30,250
rdtsc
shlq $32,%rdx
orq %rdx,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
.globl OPENSSL_ia32_cpuid
@ -38,6 +44,7 @@ OPENSSL_rdtsc:
.align 16
OPENSSL_ia32_cpuid:
.cfi_startproc
.byte 243,15,30,250
movq %rbx,%r8
.cfi_register %rbx,%r8
@ -206,6 +213,8 @@ OPENSSL_ia32_cpuid:
.type OPENSSL_cleanse,@function
.align 16
OPENSSL_cleanse:
.cfi_startproc
.byte 243,15,30,250
xorq %rax,%rax
cmpq $15,%rsi
jae .Lot
@ -235,12 +244,15 @@ OPENSSL_cleanse:
cmpq $0,%rsi
jne .Little
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,@function
.align 16
CRYPTO_memcmp:
.cfi_startproc
.byte 243,15,30,250
xorq %rax,%rax
xorq %r10,%r10
cmpq $0,%rdx
@ -269,11 +281,14 @@ CRYPTO_memcmp:
shrq $63,%rax
.Lno_data:
.byte 0xf3,0xc3
.cfi_endproc
.size CRYPTO_memcmp,.-CRYPTO_memcmp
.globl OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,@function
.align 16
OPENSSL_wipe_cpu:
.cfi_startproc
.byte 243,15,30,250
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
@ -300,11 +315,14 @@ OPENSSL_wipe_cpu:
xorq %r11,%r11
leaq 8(%rsp),%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
.globl OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,@function
.align 16
OPENSSL_instrument_bus:
.cfi_startproc
.byte 243,15,30,250
movq %rdi,%r10
movq %rsi,%rcx
movq %rsi,%r11
@ -331,12 +349,15 @@ OPENSSL_instrument_bus:
movq %r11,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
.globl OPENSSL_instrument_bus2
.type OPENSSL_instrument_bus2,@function
.align 16
OPENSSL_instrument_bus2:
.cfi_startproc
.byte 243,15,30,250
movq %rdi,%r10
movq %rsi,%rcx
movq %rdx,%r11
@ -379,11 +400,14 @@ OPENSSL_instrument_bus2:
movq 8(%rsp),%rax
subq %rcx,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.globl OPENSSL_ia32_rdrand_bytes
.type OPENSSL_ia32_rdrand_bytes,@function
.align 16
OPENSSL_ia32_rdrand_bytes:
.cfi_startproc
.byte 243,15,30,250
xorq %rax,%rax
cmpq $0,%rsi
je .Ldone_rdrand_bytes
@ -420,11 +444,14 @@ OPENSSL_ia32_rdrand_bytes:
.Ldone_rdrand_bytes:
xorq %r10,%r10
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_ia32_rdrand_bytes,.-OPENSSL_ia32_rdrand_bytes
.globl OPENSSL_ia32_rdseed_bytes
.type OPENSSL_ia32_rdseed_bytes,@function
.align 16
OPENSSL_ia32_rdseed_bytes:
.cfi_startproc
.byte 243,15,30,250
xorq %rax,%rax
cmpq $0,%rsi
je .Ldone_rdseed_bytes
@ -461,4 +488,26 @@ OPENSSL_ia32_rdseed_bytes:
.Ldone_rdseed_bytes:
xorq %r10,%r10
.byte 0xf3,0xc3
.cfi_endproc
.size OPENSSL_ia32_rdseed_bytes,.-OPENSSL_ia32_rdseed_bytes
.section ".note.gnu.property", "a"
.p2align 3
.long 1f - 0f
.long 4f - 1f
.long 5
0:
# "GNU" encoded with .byte, since .asciz isn't supported
# on Solaris.
.byte 0x47
.byte 0x4e
.byte 0x55
.byte 0
1:
.p2align 3
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 3
4:

View file

@ -1,7 +1,7 @@
/* Do not modify. This file is auto-generated from aes-armv4.pl. */
@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ Licensed under the Apache License 2.0 (the "License"). You may not use
@ this file except in compliance with the License. You can obtain a copy
@ in the file LICENSE in the source distribution or at
@ https://www.openssl.org/source/license.html
@ -39,13 +39,14 @@
@ Profiler-assisted and platform-specific optimization resulted in 16%
@ improvement on Cortex A8 core and ~21.5 cycles per byte.
@ $output is the last argument if it looks like a file (it has an extension)
@ $flavour is the first argument if it doesn't look like a file
#ifndef __KERNEL__
# include "arm_arch.h"
#else
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
#endif
.text
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
.thumb
@ -54,6 +55,8 @@
#undef __thumb2__
#endif
.text
.type AES_Te,%object
.align 5
AES_Te:

File diff suppressed because it is too large Load diff

View file

@ -1,13 +1,14 @@
/* Do not modify. This file is auto-generated from armv4-gf2m.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
.text
.type mul_1x1_ialu,%function
.align 5
mul_1x1_ialu:
@ -100,11 +101,13 @@ bn_GF2m_mul_2x2:
#if __ARM_MAX_ARCH__>=7
stmdb sp!,{r10,lr}
ldr r12,.LOPENSSL_armcap
# if !defined(_WIN32)
adr r10,.LOPENSSL_armcap
ldr r12,[r12,r10]
#ifdef __APPLE__
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
#endif
# endif
tst r12,#ARMV7_NEON
itt ne
ldrne r10,[sp],#8
@ -218,7 +221,11 @@ bn_GF2m_mul_2x2:
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
# ifdef _WIN32
.word OPENSSL_armcap_P
# else
.word OPENSSL_armcap_P-.
# endif
#endif
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2

View file

@ -1,7 +1,6 @@
/* Do not modify. This file is auto-generated from armv4-mont.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__)
.syntax unified
.thumb
@ -9,10 +8,16 @@
.code 32
#endif
.text
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
# ifdef _WIN32
.word OPENSSL_armcap_P
# else
.word OPENSSL_armcap_P-.Lbn_mul_mont
# endif
#endif
.globl bn_mul_mont
@ -26,12 +31,14 @@ bn_mul_mont:
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,.Lbn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,.LOPENSSL_armcap
#if !defined(_WIN32)
adr r2,.Lbn_mul_mont
ldr r0,[r0,r2]
#ifdef __APPLE__
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r0,[r0]
#endif
# endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu

View file

@ -1,7 +1,6 @@
/* Do not modify. This file is auto-generated from armv4cpuid.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
.thumb
@ -10,6 +9,8 @@
#undef __thumb2__
#endif
.text
.align 5
.globl OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function

View file

@ -1,7 +1,7 @@
/* Do not modify. This file is auto-generated from bsaes-armv7.pl. */
@ Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
@ Copyright 2012-2023 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ Licensed under the Apache License 2.0 (the "License"). You may not use
@ this file except in compliance with the License. You can obtain a copy
@ in the file LICENSE in the source distribution or at
@ https://www.openssl.org/source/license.html
@ -14,7 +14,7 @@
@ details see http://www.openssl.org/~appro/cryptogams/.
@
@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel
@ of Linaro. Permission to use under GPL terms is granted.
@ of Linaro.
@ ====================================================================
@ Bit-sliced AES for ARM NEON
@ -50,6 +50,8 @@
@ April-August 2013
@ Add CBC, CTR and XTS subroutines and adapt for kernel use; courtesy of Ard.
@ $output is the last argument if it looks like a file (it has an extension)
@ $flavour is the first argument if it doesn't look like a file
#ifndef __KERNEL__
# include "arm_arch.h"
@ -74,7 +76,6 @@
.arch armv7-a
.fpu neon
.text
.syntax unified @ ARMv7-capable assembler is expected to handle this
#if defined(__thumb2__) && !defined(__APPLE__)
.thumb
@ -83,6 +84,8 @@
# undef __thumb2__
#endif
.text
.type _bsaes_decrypt8,%function
.align 4
_bsaes_decrypt8:
@ -1071,18 +1074,18 @@ _bsaes_key_convert:
.globl bsaes_cbc_encrypt
.type bsaes_cbc_encrypt,%function
.globl ossl_bsaes_cbc_encrypt
.type ossl_bsaes_cbc_encrypt,%function
.align 5
bsaes_cbc_encrypt:
ossl_bsaes_cbc_encrypt:
#ifndef __KERNEL__
cmp r2, #128
#ifndef __thumb__
blo AES_cbc_encrypt
#else
bhs 1f
bhs .Lcbc_do_bsaes
b AES_cbc_encrypt
1:
.Lcbc_do_bsaes:
#endif
#endif
@ -1336,12 +1339,12 @@ bsaes_cbc_encrypt:
vst1.8 {q15}, [r8] @ return IV
VFP_ABI_POP
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc}
.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
.size ossl_bsaes_cbc_encrypt,.-ossl_bsaes_cbc_encrypt
.globl bsaes_ctr32_encrypt_blocks
.type bsaes_ctr32_encrypt_blocks,%function
.globl ossl_bsaes_ctr32_encrypt_blocks
.type ossl_bsaes_ctr32_encrypt_blocks,%function
.align 5
bsaes_ctr32_encrypt_blocks:
ossl_bsaes_ctr32_encrypt_blocks:
cmp r2, #8 @ use plain AES for
blo .Lctr_enc_short @ small sizes
@ -1564,11 +1567,11 @@ bsaes_ctr32_encrypt_blocks:
vstmia sp!, {q0,q1}
ldmia sp!, {r4,r5,r6,r7,r8, pc}
.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
.globl bsaes_xts_encrypt
.type bsaes_xts_encrypt,%function
.size ossl_bsaes_ctr32_encrypt_blocks,.-ossl_bsaes_ctr32_encrypt_blocks
.globl ossl_bsaes_xts_encrypt
.type ossl_bsaes_xts_encrypt,%function
.align 4
bsaes_xts_encrypt:
ossl_bsaes_xts_encrypt:
mov ip, sp
stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} @ 0x20
VFP_ABI_PUSH
@ -2043,12 +2046,12 @@ bsaes_xts_encrypt:
VFP_ABI_POP
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return
.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
.size ossl_bsaes_xts_encrypt,.-ossl_bsaes_xts_encrypt
.globl bsaes_xts_decrypt
.type bsaes_xts_decrypt,%function
.globl ossl_bsaes_xts_decrypt
.type ossl_bsaes_xts_decrypt,%function
.align 4
bsaes_xts_decrypt:
ossl_bsaes_xts_decrypt:
mov ip, sp
stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} @ 0x20
VFP_ABI_PUSH
@ -2554,5 +2557,5 @@ bsaes_xts_decrypt:
VFP_ABI_POP
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return
.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
.size ossl_bsaes_xts_decrypt,.-ossl_bsaes_xts_decrypt
#endif

View file

@ -1,7 +1,6 @@
/* Do not modify. This file is auto-generated from chacha-armv4.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#endif
@ -15,6 +14,8 @@
#define ldrhsb ldrbhs
#endif
.text
.align 5
.Lsigma:
.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral
@ -22,7 +23,11 @@
.long 1,0,0,0
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
# ifdef _WIN32
.word OPENSSL_armcap_P
# else
.word OPENSSL_armcap_P-.LChaCha20_ctr32
# endif
#else
.word -1
#endif
@ -49,8 +54,10 @@ ChaCha20_ctr32:
cmp r2,#192 @ test len
bls .Lshort
ldr r4,[r14,#-32]
# if !defined(_WIN32)
ldr r4,[r14,r4]
# ifdef __APPLE__
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r4,[r4]
# endif
tst r4,#ARMV7_NEON

View file

@ -1,13 +1,13 @@
/* Do not modify. This file is auto-generated from ecp_nistz256-armv4.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
.section .rodata
.globl ecp_nistz256_precomputed
.type ecp_nistz256_precomputed,%object
.align 12
@ -2381,6 +2381,8 @@ ecp_nistz256_precomputed:
.byte 0xec,0xf0,0x42,0x88,0xd0,0x81,0x51,0xf9,0x1b,0xbc,0x43,0xa4,0x37,0xf1,0xd7,0x90,0x21,0x7e,0xa0,0x3e,0x63,0xfb,0x21,0xfa,0x12,0xfb,0xde,0xc7,0xbf,0xb3,0x58,0xe7,0x76,0x42,0x20,0x01,0x3d,0x66,0x80,0xf1,0xb8,0xaf,0xfa,0x7d,0x96,0x89,0x36,0x48,0x95,0xd9,0x6e,0x6d,0xe6,0x4f,0xff,0x2a,0x47,0x61,0xf2,0x04,0xb7,0x83,0x14,0xce
.byte 0x0a,0x3c,0x73,0x17,0x50,0x88,0x03,0x25,0x4a,0xe3,0x13,0x55,0x8b,0x7e,0x50,0x38,0xfc,0x14,0x0b,0x04,0x8e,0xa8,0x5b,0xd6,0x72,0x20,0x60,0xe9,0xaa,0x22,0x82,0x11,0xc6,0xc4,0xd7,0xb9,0xc8,0x0c,0x7e,0x05,0xfb,0x90,0xe4,0x9c,0x28,0x89,0x29,0x99,0x63,0x4d,0xec,0x7b,0x50,0xbd,0xd8,0xa3,0x5b,0x50,0x77,0x19,0x81,0x92,0xce,0x82
.size ecp_nistz256_precomputed,.-ecp_nistz256_precomputed
.text
.align 5
.LRR:@ 2^512 mod P precomputed for NIST P256 polynomial
.long 0x00000003, 0x00000000, 0xffffffff, 0xfffffffb

View file

@ -1,7 +1,6 @@
/* Do not modify. This file is auto-generated from ghash-armv4.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
@ -13,6 +12,8 @@
.code 32
#endif
.text
.type rem_4bit,%object
.align 5
rem_4bit:

View file

@ -2,10 +2,17 @@
#include "arm_arch.h"
#if __ARM_MAX_ARCH__>=7
.text
.fpu neon
#ifdef __thumb2__
.syntax unified
.thumb
# define INST(a,b,c,d) .byte c,0xef,a,b
#else
.code 32
#undef __thumb2__
# define INST(a,b,c,d) .byte a,b,c,0xf2
#endif
.text
.globl gcm_init_v8
.type gcm_init_v8,%function
.align 4
@ -29,23 +36,23 @@ gcm_init_v8:
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
INST(0xa8,0x0e,0xa8,0xf2) @ pmull q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
INST(0xa9,0x4e,0xa9,0xf2) @ pmull2 q2,q12,q12
INST(0xa0,0x2e,0xa0,0xf2) @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
@ -68,23 +75,23 @@ gcm_gmult_v8:
#endif
vext.8 q3,q9,q9,#8
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
INST(0x86,0x0e,0xa8,0xf2) @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
INST(0x87,0x4e,0xa9,0xf2) @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
INST(0xa2,0x2e,0xaa,0xf2) @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
@ -120,6 +127,7 @@ gcm_ghash_v8:
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
it eq
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
@ -136,26 +144,28 @@ gcm_ghash_v8:
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
INST(0x8e,0x8e,0xa8,0xf2) @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
INST(0x8f,0xce,0xa9,0xf2) @ pmull2 q6,q12,q7
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
INST(0x86,0x0e,0xac,0xf2) @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
it lo
movlo r12,#0 @ is it time to zero r12?
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
INST(0xa2,0xae,0xaa,0xf2) @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
INST(0x87,0x4e,0xad,0xf2) @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
INST(0xa5,0x2e,0xab,0xf2) @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
it eq
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
@ -167,7 +177,7 @@ gcm_ghash_v8:
vrev64.8 q8,q8
#endif
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
@ -177,15 +187,15 @@ gcm_ghash_v8:
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
INST(0x8e,0x8e,0xa8,0xf2) @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
INST(0x8f,0xce,0xa9,0xf2) @ pmull2 q6,q12,q7
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
@ -198,23 +208,23 @@ gcm_ghash_v8:
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
INST(0x86,0x0e,0xa8,0xf2) @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
INST(0x87,0x4e,0xa9,0xf2) @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
INST(0xa2,0x2e,0xaa,0xf2) @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
INST(0x26,0x4e,0xe0,0xf2) @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
INST(0x26,0x0e,0xa0,0xf2) @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10

View file

@ -1,8 +1,6 @@
/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__)
.syntax unified
.thumb
@ -10,6 +8,8 @@
.code 32
#endif
.text
.type iotas32, %object
.align 5
iotas32:
@ -1826,7 +1826,14 @@ KeccakF1600_enter:
#endif
blo .Lround2x
#if __ARM_ARCH__>=5
ldr pc,[sp,#440]
#else
ldr lr,[sp,#440]
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size KeccakF1600_int,.-KeccakF1600_int
.type KeccakF1600, %function
@ -1865,7 +1872,14 @@ KeccakF1600:
stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
add sp,sp,#440+20
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size KeccakF1600,.-KeccakF1600
.globl SHA3_absorb
.type SHA3_absorb,%function
@ -2011,7 +2025,14 @@ SHA3_absorb:
.Labsorb_abort:
add sp,sp,#456+32
mov r0,r12 @ return value
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size SHA3_absorb,.-SHA3_absorb
.globl SHA3_squeeze
.type SHA3_squeeze,%function
@ -2156,7 +2177,14 @@ SHA3_squeeze:
.align 4
.Lsqueeze_done:
add sp,sp,#24
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size SHA3_squeeze,.-SHA3_squeeze
#if __ARM_MAX_ARCH__>=7
.fpu neon
@ -2362,7 +2390,7 @@ KeccakF1600_neon:
subs r3, r3, #1
bne .Loop_neon
.word 0xe12fff1e
bx lr
.size KeccakF1600_neon,.-KeccakF1600_neon
.globl SHA3_absorb_neon

View file

@ -1,7 +1,6 @@
/* Do not modify. This file is auto-generated from poly1305-armv4.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__)
.syntax unified
.thumb
@ -9,6 +8,8 @@
.code 32
#endif
.text
.globl poly1305_emit
.globl poly1305_blocks
.globl poly1305_init
@ -53,8 +54,10 @@ poly1305_init:
and r4,r4,r10
#if __ARM_MAX_ARCH__>=7
# if !defined(_WIN32)
ldr r12,[r11,r12] @ OPENSSL_armcap_P
# ifdef __APPLE__
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
# endif
#endif
@ -69,32 +72,22 @@ poly1305_init:
#if __ARM_MAX_ARCH__>=7
tst r12,#ARMV7_NEON @ check for NEON
# ifdef __APPLE__
adr r9,poly1305_blocks_neon
adr r11,poly1305_blocks
# ifdef __thumb2__
it ne
# endif
# ifdef __thumb2__
adr r9,.Lpoly1305_blocks_neon
adr r11,.Lpoly1305_blocks
adr r12,.Lpoly1305_emit
adr r10,.Lpoly1305_emit_neon
itt ne
movne r11,r9
adr r12,poly1305_emit
adr r10,poly1305_emit_neon
# ifdef __thumb2__
it ne
# endif
movne r12,r10
orr r11,r11,#1 @ thumb-ify address
orr r12,r12,#1
# else
# ifdef __thumb2__
itete eq
# endif
addeq r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
addne r12,r11,#(.Lpoly1305_emit_neon-.Lpoly1305_init)
addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
# endif
# ifdef __thumb2__
orr r12,r12,#1 @ thumb-ify address
orr r11,r11,#1
# endif
#endif
ldrb r9,[r1,#11]
orr r6,r6,r7,lsl#8
@ -1162,7 +1155,11 @@ poly1305_emit_neon:
.Lzeros:
.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.LOPENSSL_armcap:
# ifdef _WIN32
.word OPENSSL_armcap_P
# else
.word OPENSSL_armcap_P-.Lpoly1305_init
# endif
#endif
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2

View file

@ -1,7 +1,6 @@
/* Do not modify. This file is auto-generated from sha1-armv4-large.pl. */
#include "arm_arch.h"
.text
#if defined(__thumb2__)
.syntax unified
.thumb
@ -9,6 +8,8 @@
.code 32
#endif
.text
.globl sha1_block_data_order
.type sha1_block_data_order,%function
@ -16,12 +17,14 @@
sha1_block_data_order:
#if __ARM_MAX_ARCH__>=7
.Lsha1_block:
adr r3,.Lsha1_block
ldr r12,.LOPENSSL_armcap
# if !defined(_WIN32)
adr r3,.Lsha1_block
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
#endif
# endif
tst r12,#ARMV8_SHA1
bne .LARMv8
tst r12,#ARMV7_NEON
@ -486,7 +489,11 @@ sha1_block_data_order:
.LK_60_79:.word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
# ifdef _WIN32
.word OPENSSL_armcap_P
# else
.word OPENSSL_armcap_P-.Lsha1_block
# endif
#endif
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2

View file

@ -1,7 +1,7 @@
/* Do not modify. This file is auto-generated from sha256-armv4.pl. */
@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ Licensed under the Apache License 2.0 (the "License"). You may not use
@ this file except in compliance with the License. You can obtain a copy
@ in the file LICENSE in the source distribution or at
@ https://www.openssl.org/source/license.html
@ -44,6 +44,8 @@
@
@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
@ $output is the last argument if it looks like a file (it has an extension)
@ $flavour is the first argument if it doesn't look like a file
#ifndef __KERNEL__
# include "arm_arch.h"
#else
@ -51,7 +53,6 @@
# define __ARM_MAX_ARCH__ 7
#endif
.text
#if defined(__thumb2__)
.syntax unified
.thumb
@ -59,6 +60,8 @@
.code 32
#endif
.text
.type K256,%object
.align 5
K256:
@ -82,7 +85,11 @@ K256:
.word 0 @ terminator
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.LOPENSSL_armcap:
# ifdef _WIN32
.word OPENSSL_armcap_P
# else
.word OPENSSL_armcap_P-.Lsha256_block_data_order
# endif
#endif
.align 5
@ -97,10 +104,12 @@ sha256_block_data_order:
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
# if !defined(_WIN32)
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
#endif
# endif
tst r12,#ARMV8_SHA256
bne .LARMv8
tst r12,#ARMV7_NEON

View file

@ -1,7 +1,7 @@
/* Do not modify. This file is auto-generated from sha512-armv4.pl. */
@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
@
@ Licensed under the OpenSSL license (the "License"). You may not use
@ Licensed under the Apache License 2.0 (the "License"). You may not use
@ this file except in compliance with the License. You can obtain a copy
@ in the file LICENSE in the source distribution or at
@ https://www.openssl.org/source/license.html
@ -74,7 +74,6 @@
# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
#endif
.text
#if defined(__thumb2__)
.syntax unified
.thumb
@ -83,6 +82,8 @@
.code 32
#endif
.text
.type K512,%object
.align 5
K512:
@ -129,7 +130,11 @@ K512:
.size K512,.-K512
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.LOPENSSL_armcap:
# ifdef _WIN32
.word OPENSSL_armcap_P
# else
.word OPENSSL_armcap_P-.Lsha512_block_data_order
# endif
.skip 32-4
#else
.skip 32
@ -146,10 +151,12 @@ sha512_block_data_order:
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
# if !defined(_WIN32)
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
# endif
# if defined(__APPLE__) || defined(_WIN32)
ldr r12,[r12]
#endif
# endif
tst r12,#ARMV7_NEON
bne .LNEON
#endif

File diff suppressed because it is too large Load diff

View file

@ -6,6 +6,11 @@
.align 16
aesni_encrypt:
.L_aesni_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 12(%esp),%edx
movups (%eax),%xmm2
@ -33,6 +38,11 @@ aesni_encrypt:
.align 16
aesni_decrypt:
.L_aesni_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 12(%esp),%edx
movups (%eax),%xmm2
@ -58,6 +68,11 @@ aesni_decrypt:
.type _aesni_encrypt2,@function
.align 16
_aesni_encrypt2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -85,6 +100,11 @@ _aesni_encrypt2:
.type _aesni_decrypt2,@function
.align 16
_aesni_decrypt2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -112,6 +132,11 @@ _aesni_decrypt2:
.type _aesni_encrypt3,@function
.align 16
_aesni_encrypt3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -144,6 +169,11 @@ _aesni_encrypt3:
.type _aesni_decrypt3,@function
.align 16
_aesni_decrypt3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -176,6 +206,11 @@ _aesni_decrypt3:
.type _aesni_encrypt4,@function
.align 16
_aesni_encrypt4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
movups 16(%edx),%xmm1
shll $4,%ecx
@ -214,6 +249,11 @@ _aesni_encrypt4:
.type _aesni_decrypt4,@function
.align 16
_aesni_decrypt4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
movups 16(%edx),%xmm1
shll $4,%ecx
@ -252,6 +292,11 @@ _aesni_decrypt4:
.type _aesni_encrypt6,@function
.align 16
_aesni_encrypt6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -306,6 +351,11 @@ _aesni_encrypt6:
.type _aesni_decrypt6,@function
.align 16
_aesni_decrypt6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -362,6 +412,11 @@ _aesni_decrypt6:
.align 16
aesni_ecb_encrypt:
.L_aesni_ecb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -597,6 +652,11 @@ aesni_ecb_encrypt:
.align 16
aesni_ccm64_encrypt_blocks:
.L_aesni_ccm64_encrypt_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -685,6 +745,11 @@ aesni_ccm64_encrypt_blocks:
.align 16
aesni_ccm64_decrypt_blocks:
.L_aesni_ccm64_decrypt_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -808,6 +873,11 @@ aesni_ccm64_decrypt_blocks:
.align 16
aesni_ctr32_encrypt_blocks:
.L_aesni_ctr32_encrypt_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1046,6 +1116,11 @@ aesni_ctr32_encrypt_blocks:
.align 16
aesni_xts_encrypt:
.L_aesni_xts_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1406,6 +1481,11 @@ aesni_xts_encrypt:
.align 16
aesni_xts_decrypt:
.L_aesni_xts_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1796,6 +1876,11 @@ aesni_xts_decrypt:
.align 16
aesni_ocb_encrypt:
.L_aesni_ocb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2191,6 +2276,11 @@ aesni_ocb_encrypt:
.align 16
aesni_ocb_decrypt:
.L_aesni_ocb_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2586,6 +2676,11 @@ aesni_ocb_decrypt:
.align 16
aesni_cbc_encrypt:
.L_aesni_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2845,6 +2940,11 @@ aesni_cbc_encrypt:
.type _aesni_set_encrypt_key,@function
.align 16
_aesni_set_encrypt_key:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
testl %eax,%eax
@ -3180,6 +3280,11 @@ _aesni_set_encrypt_key:
.align 16
aesni_set_encrypt_key:
.L_aesni_set_encrypt_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 8(%esp),%ecx
movl 12(%esp),%edx
@ -3191,6 +3296,11 @@ aesni_set_encrypt_key:
.align 16
aesni_set_decrypt_key:
.L_aesni_set_decrypt_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 8(%esp),%ecx
movl 12(%esp),%edx
@ -3237,6 +3347,23 @@ aesni_set_decrypt_key:
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl aesni_encrypt
@ -3244,6 +3371,11 @@ aesni_set_decrypt_key:
.align 16
aesni_encrypt:
.L_aesni_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 12(%esp),%edx
movups (%eax),%xmm2
@ -3271,6 +3403,11 @@ aesni_encrypt:
.align 16
aesni_decrypt:
.L_aesni_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 12(%esp),%edx
movups (%eax),%xmm2
@ -3296,6 +3433,11 @@ aesni_decrypt:
.type _aesni_encrypt2,@function
.align 16
_aesni_encrypt2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -3323,6 +3465,11 @@ _aesni_encrypt2:
.type _aesni_decrypt2,@function
.align 16
_aesni_decrypt2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -3350,6 +3497,11 @@ _aesni_decrypt2:
.type _aesni_encrypt3,@function
.align 16
_aesni_encrypt3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -3382,6 +3534,11 @@ _aesni_encrypt3:
.type _aesni_decrypt3,@function
.align 16
_aesni_decrypt3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -3414,6 +3571,11 @@ _aesni_decrypt3:
.type _aesni_encrypt4,@function
.align 16
_aesni_encrypt4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
movups 16(%edx),%xmm1
shll $4,%ecx
@ -3452,6 +3614,11 @@ _aesni_encrypt4:
.type _aesni_decrypt4,@function
.align 16
_aesni_decrypt4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
movups 16(%edx),%xmm1
shll $4,%ecx
@ -3490,6 +3657,11 @@ _aesni_decrypt4:
.type _aesni_encrypt6,@function
.align 16
_aesni_encrypt6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -3544,6 +3716,11 @@ _aesni_encrypt6:
.type _aesni_decrypt6,@function
.align 16
_aesni_decrypt6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movups (%edx),%xmm0
shll $4,%ecx
movups 16(%edx),%xmm1
@ -3600,6 +3777,11 @@ _aesni_decrypt6:
.align 16
aesni_ecb_encrypt:
.L_aesni_ecb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3835,6 +4017,11 @@ aesni_ecb_encrypt:
.align 16
aesni_ccm64_encrypt_blocks:
.L_aesni_ccm64_encrypt_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3923,6 +4110,11 @@ aesni_ccm64_encrypt_blocks:
.align 16
aesni_ccm64_decrypt_blocks:
.L_aesni_ccm64_decrypt_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -4046,6 +4238,11 @@ aesni_ccm64_decrypt_blocks:
.align 16
aesni_ctr32_encrypt_blocks:
.L_aesni_ctr32_encrypt_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -4284,6 +4481,11 @@ aesni_ctr32_encrypt_blocks:
.align 16
aesni_xts_encrypt:
.L_aesni_xts_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -4644,6 +4846,11 @@ aesni_xts_encrypt:
.align 16
aesni_xts_decrypt:
.L_aesni_xts_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -5034,6 +5241,11 @@ aesni_xts_decrypt:
.align 16
aesni_ocb_encrypt:
.L_aesni_ocb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -5429,6 +5641,11 @@ aesni_ocb_encrypt:
.align 16
aesni_ocb_decrypt:
.L_aesni_ocb_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -5824,6 +6041,11 @@ aesni_ocb_decrypt:
.align 16
aesni_cbc_encrypt:
.L_aesni_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -6083,6 +6305,11 @@ aesni_cbc_encrypt:
.type _aesni_set_encrypt_key,@function
.align 16
_aesni_set_encrypt_key:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
testl %eax,%eax
@ -6418,6 +6645,11 @@ _aesni_set_encrypt_key:
.align 16
aesni_set_encrypt_key:
.L_aesni_set_encrypt_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 8(%esp),%ecx
movl 12(%esp),%edx
@ -6429,6 +6661,11 @@ aesni_set_encrypt_key:
.align 16
aesni_set_decrypt_key:
.L_aesni_set_decrypt_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 8(%esp),%ecx
movl 12(%esp),%edx
@ -6475,4 +6712,21 @@ aesni_set_decrypt_key:
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
BF_encrypt:
.L_BF_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -356,6 +361,11 @@ BF_encrypt:
.align 16
BF_decrypt:
.L_BF_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -706,6 +716,11 @@ BF_decrypt:
.align 16
BF_cbc_encrypt:
.L_BF_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -769,21 +784,56 @@ BF_cbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L006ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L007ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L008ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L009ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L010ejend
.L011ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L012ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L013ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L010ejend:
xorl %ecx,%eax
@ -895,6 +945,23 @@ BF_cbc_encrypt:
.long .L006ej7-.L004PIC_point
.align 64
.size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl BF_encrypt
@ -902,6 +969,11 @@ BF_cbc_encrypt:
.align 16
BF_encrypt:
.L_BF_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -1252,6 +1324,11 @@ BF_encrypt:
.align 16
BF_decrypt:
.L_BF_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -1602,6 +1679,11 @@ BF_decrypt:
.align 16
BF_cbc_encrypt:
.L_BF_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -1665,21 +1747,56 @@ BF_cbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L006ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L007ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L008ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L009ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L010ejend
.L011ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L012ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L013ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L010ejend:
xorl %ecx,%eax
@ -1791,4 +1908,21 @@ BF_cbc_encrypt:
.long .L006ej7-.L004PIC_point
.align 64
.size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
bn_mul_add_words:
.L_bn_mul_add_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L000PIC_me_up
.L000PIC_me_up:
popl %eax
@ -289,6 +294,11 @@ bn_mul_add_words:
.align 16
bn_mul_words:
.L_bn_mul_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L010PIC_me_up
.L010PIC_me_up:
popl %eax
@ -471,6 +481,11 @@ bn_mul_words:
.align 16
bn_sqr_words:
.L_bn_sqr_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L017PIC_me_up
.L017PIC_me_up:
popl %eax
@ -612,6 +627,11 @@ bn_sqr_words:
.align 16
bn_div_words:
.L_bn_div_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%eax
movl 12(%esp),%ecx
@ -623,6 +643,11 @@ bn_div_words:
.align 16
bn_add_words:
.L_bn_add_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -805,6 +830,11 @@ bn_add_words:
.align 16
bn_sub_words:
.L_bn_sub_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -987,6 +1017,11 @@ bn_sub_words:
.align 16
bn_sub_part_words:
.L_bn_sub_part_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1529,6 +1564,23 @@ bn_sub_part_words:
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl bn_mul_add_words
@ -1536,6 +1588,11 @@ bn_sub_part_words:
.align 16
bn_mul_add_words:
.L_bn_mul_add_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc .L000maw_non_sse2
@ -1816,6 +1873,11 @@ bn_mul_add_words:
.align 16
bn_mul_words:
.L_bn_mul_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc .L009mw_non_sse2
@ -1995,6 +2057,11 @@ bn_mul_words:
.align 16
bn_sqr_words:
.L_bn_sqr_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc .L015sqr_non_sse2
@ -2133,6 +2200,11 @@ bn_sqr_words:
.align 16
bn_div_words:
.L_bn_div_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%eax
movl 12(%esp),%ecx
@ -2144,6 +2216,11 @@ bn_div_words:
.align 16
bn_add_words:
.L_bn_add_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2326,6 +2403,11 @@ bn_add_words:
.align 16
bn_sub_words:
.L_bn_sub_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2508,6 +2590,11 @@ bn_sub_words:
.align 16
bn_sub_part_words:
.L_bn_sub_part_words_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3050,4 +3137,21 @@ bn_sub_part_words:
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
CAST_encrypt:
.L_CAST_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -376,6 +381,11 @@ CAST_encrypt:
.align 16
CAST_decrypt:
.L_CAST_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -743,6 +753,11 @@ CAST_decrypt:
.align 16
CAST_cbc_encrypt:
.L_CAST_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -806,21 +821,56 @@ CAST_cbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L008ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L009ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L010ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L011ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L012ejend
.L013ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L014ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L015ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L012ejend:
xorl %ecx,%eax
@ -932,6 +982,23 @@ CAST_cbc_encrypt:
.long .L008ej7-.L006PIC_point
.align 64
.size CAST_cbc_encrypt,.-.L_CAST_cbc_encrypt_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl CAST_encrypt
@ -939,6 +1006,11 @@ CAST_cbc_encrypt:
.align 16
CAST_encrypt:
.L_CAST_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -1309,6 +1381,11 @@ CAST_encrypt:
.align 16
CAST_decrypt:
.L_CAST_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -1676,6 +1753,11 @@ CAST_decrypt:
.align 16
CAST_cbc_encrypt:
.L_CAST_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -1739,21 +1821,56 @@ CAST_cbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L008ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L009ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L010ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L011ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L012ejend
.L013ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L014ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L015ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L012ejend:
xorl %ecx,%eax
@ -1865,4 +1982,21 @@ CAST_cbc_encrypt:
.long .L008ej7-.L006PIC_point
.align 64
.size CAST_cbc_encrypt,.-.L_CAST_cbc_encrypt_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
ChaCha20_ctr32:
.L_ChaCha20_ctr32_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -379,6 +384,11 @@ ChaCha20_ctr32:
.align 16
ChaCha20_ssse3:
.L_ChaCha20_ssse3_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -534,6 +544,11 @@ ChaCha20_ssse3:
.align 16
ChaCha20_xop:
.L_ChaCha20_xop_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1008,6 +1023,23 @@ ChaCha20_xop:
ret
.size ChaCha20_xop,.-.L_ChaCha20_xop_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl ChaCha20_ctr32
@ -1015,6 +1047,11 @@ ChaCha20_xop:
.align 16
ChaCha20_ctr32:
.L_ChaCha20_ctr32_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1388,6 +1425,11 @@ ChaCha20_ctr32:
.align 16
ChaCha20_ssse3:
.L_ChaCha20_ssse3_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1543,6 +1585,11 @@ ChaCha20_ssse3:
.align 16
ChaCha20_xop:
.L_ChaCha20_xop_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2017,4 +2064,21 @@ ChaCha20_xop:
ret
.size ChaCha20_xop,.-.L_ChaCha20_xop_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
Camellia_EncryptBlock_Rounds:
.L_Camellia_EncryptBlock_Rounds_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -60,6 +65,11 @@ Camellia_EncryptBlock_Rounds:
.align 16
Camellia_EncryptBlock:
.L_Camellia_EncryptBlock_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl $128,%eax
subl 4(%esp),%eax
movl $3,%eax
@ -72,6 +82,11 @@ Camellia_EncryptBlock:
.align 16
Camellia_encrypt:
.L_Camellia_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -124,6 +139,11 @@ Camellia_encrypt:
.type _x86_Camellia_encrypt,@function
.align 16
_x86_Camellia_encrypt:
#ifdef __CET__
.byte 243,15,30,251
#endif
xorl (%edi),%eax
xorl 4(%edi),%ebx
xorl 8(%edi),%ecx
@ -354,6 +374,11 @@ _x86_Camellia_encrypt:
.align 16
Camellia_DecryptBlock_Rounds:
.L_Camellia_DecryptBlock_Rounds_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -408,6 +433,11 @@ Camellia_DecryptBlock_Rounds:
.align 16
Camellia_DecryptBlock:
.L_Camellia_DecryptBlock_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl $128,%eax
subl 4(%esp),%eax
movl $3,%eax
@ -420,6 +450,11 @@ Camellia_DecryptBlock:
.align 16
Camellia_decrypt:
.L_Camellia_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -472,6 +507,11 @@ Camellia_decrypt:
.type _x86_Camellia_decrypt,@function
.align 16
_x86_Camellia_decrypt:
#ifdef __CET__
.byte 243,15,30,251
#endif
xorl (%edi),%eax
xorl 4(%edi),%ebx
xorl 8(%edi),%ecx
@ -702,6 +742,11 @@ _x86_Camellia_decrypt:
.align 16
Camellia_Ekeygen:
.L_Camellia_Ekeygen_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1543,6 +1588,11 @@ Camellia_Ekeygen:
.align 16
Camellia_set_key:
.L_Camellia_set_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebx
movl 8(%esp),%ecx
movl 12(%esp),%ebx
@ -2095,6 +2145,11 @@ Camellia_set_key:
.align 16
Camellia_cbc_encrypt:
.L_Camellia_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2374,6 +2429,23 @@ Camellia_cbc_encrypt:
.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl Camellia_EncryptBlock_Rounds
@ -2381,6 +2453,11 @@ Camellia_cbc_encrypt:
.align 16
Camellia_EncryptBlock_Rounds:
.L_Camellia_EncryptBlock_Rounds_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2435,6 +2512,11 @@ Camellia_EncryptBlock_Rounds:
.align 16
Camellia_EncryptBlock:
.L_Camellia_EncryptBlock_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl $128,%eax
subl 4(%esp),%eax
movl $3,%eax
@ -2447,6 +2529,11 @@ Camellia_EncryptBlock:
.align 16
Camellia_encrypt:
.L_Camellia_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2499,6 +2586,11 @@ Camellia_encrypt:
.type _x86_Camellia_encrypt,@function
.align 16
_x86_Camellia_encrypt:
#ifdef __CET__
.byte 243,15,30,251
#endif
xorl (%edi),%eax
xorl 4(%edi),%ebx
xorl 8(%edi),%ecx
@ -2729,6 +2821,11 @@ _x86_Camellia_encrypt:
.align 16
Camellia_DecryptBlock_Rounds:
.L_Camellia_DecryptBlock_Rounds_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2783,6 +2880,11 @@ Camellia_DecryptBlock_Rounds:
.align 16
Camellia_DecryptBlock:
.L_Camellia_DecryptBlock_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl $128,%eax
subl 4(%esp),%eax
movl $3,%eax
@ -2795,6 +2897,11 @@ Camellia_DecryptBlock:
.align 16
Camellia_decrypt:
.L_Camellia_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2847,6 +2954,11 @@ Camellia_decrypt:
.type _x86_Camellia_decrypt,@function
.align 16
_x86_Camellia_decrypt:
#ifdef __CET__
.byte 243,15,30,251
#endif
xorl (%edi),%eax
xorl 4(%edi),%ebx
xorl 8(%edi),%ecx
@ -3077,6 +3189,11 @@ _x86_Camellia_decrypt:
.align 16
Camellia_Ekeygen:
.L_Camellia_Ekeygen_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3918,6 +4035,11 @@ Camellia_Ekeygen:
.align 16
Camellia_set_key:
.L_Camellia_set_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebx
movl 8(%esp),%ecx
movl 12(%esp),%ebx
@ -4470,6 +4592,11 @@ Camellia_set_key:
.align 16
Camellia_cbc_encrypt:
.L_Camellia_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -4749,4 +4876,21 @@ Camellia_cbc_encrypt:
.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
bn_mul_comba8:
.L_bn_mul_comba8_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
movl 12(%esp),%esi
pushl %edi
@ -550,6 +555,11 @@ bn_mul_comba8:
.align 16
bn_mul_comba4:
.L_bn_mul_comba4_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
movl 12(%esp),%esi
pushl %edi
@ -718,6 +728,11 @@ bn_mul_comba4:
.align 16
bn_sqr_comba8:
.L_bn_sqr_comba8_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
pushl %ebp
@ -1126,6 +1141,11 @@ bn_sqr_comba8:
.align 16
bn_sqr_comba4:
.L_bn_sqr_comba4_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
pushl %ebp
@ -1253,6 +1273,23 @@ bn_sqr_comba4:
popl %esi
ret
.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl bn_mul_comba8
@ -1260,6 +1297,11 @@ bn_sqr_comba4:
.align 16
bn_mul_comba8:
.L_bn_mul_comba8_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
movl 12(%esp),%esi
pushl %edi
@ -1804,6 +1846,11 @@ bn_mul_comba8:
.align 16
bn_mul_comba4:
.L_bn_mul_comba4_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
movl 12(%esp),%esi
pushl %edi
@ -1972,6 +2019,11 @@ bn_mul_comba4:
.align 16
bn_sqr_comba8:
.L_bn_sqr_comba8_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
pushl %ebp
@ -2380,6 +2432,11 @@ bn_sqr_comba8:
.align 16
bn_sqr_comba4:
.L_bn_sqr_comba4_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
pushl %ebp
@ -2507,4 +2564,21 @@ bn_sqr_comba4:
popl %esi
ret
.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
fcrypt_body:
.L_fcrypt_body_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -878,6 +883,23 @@ fcrypt_body:
popl %ebp
ret
.size fcrypt_body,.-.L_fcrypt_body_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl fcrypt_body
@ -885,6 +907,11 @@ fcrypt_body:
.align 16
fcrypt_body:
.L_fcrypt_body_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1753,4 +1780,21 @@ fcrypt_body:
popl %ebp
ret
.size fcrypt_body,.-.L_fcrypt_body_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -5,6 +5,11 @@
.type _x86_DES_encrypt,@function
.align 16
_x86_DES_encrypt:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ecx
movl (%ecx),%eax
@ -476,6 +481,11 @@ _x86_DES_encrypt:
.type _x86_DES_decrypt,@function
.align 16
_x86_DES_decrypt:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ecx
movl 120(%ecx),%eax
@ -949,6 +959,11 @@ _x86_DES_decrypt:
.align 16
DES_encrypt1:
.L_DES_encrypt1_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
@ -1062,6 +1077,11 @@ DES_encrypt1:
.align 16
DES_encrypt2:
.L_DES_encrypt2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
@ -1105,6 +1125,11 @@ DES_encrypt2:
.align 16
DES_encrypt3:
.L_DES_encrypt3_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebx
movl 8(%esp),%ebx
pushl %ebp
@ -1226,6 +1251,11 @@ DES_encrypt3:
.align 16
DES_decrypt3:
.L_DES_decrypt3_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebx
movl 8(%esp),%ebx
pushl %ebp
@ -1347,6 +1377,11 @@ DES_decrypt3:
.align 16
DES_ncbc_encrypt:
.L_DES_ncbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -1408,21 +1443,56 @@ DES_ncbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L012ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L013ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L014ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L015ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L016ejend
.L017ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L018ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L019ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L016ejend:
xorl %ecx,%eax
@ -1527,6 +1597,11 @@ DES_ncbc_encrypt:
.align 16
DES_ede3_cbc_encrypt:
.L_DES_ede3_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -1592,21 +1667,56 @@ DES_ede3_cbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L036ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L037ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L038ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L039ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L040ejend
.L041ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L042ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L043ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L040ejend:
xorl %ecx,%eax
@ -1837,12 +1947,34 @@ DES_SPtrans:
.long 8519680,131200,537002112,545259520
.long 128,545390592,8519808,0
.long 536870912,545259648,131072,8519808
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl DES_SPtrans
.type _x86_DES_encrypt,@function
.align 16
_x86_DES_encrypt:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ecx
movl (%ecx),%eax
@ -2314,6 +2446,11 @@ _x86_DES_encrypt:
.type _x86_DES_decrypt,@function
.align 16
_x86_DES_decrypt:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ecx
movl 120(%ecx),%eax
@ -2787,6 +2924,11 @@ _x86_DES_decrypt:
.align 16
DES_encrypt1:
.L_DES_encrypt1_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
@ -2900,6 +3042,11 @@ DES_encrypt1:
.align 16
DES_encrypt2:
.L_DES_encrypt2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
@ -2943,6 +3090,11 @@ DES_encrypt2:
.align 16
DES_encrypt3:
.L_DES_encrypt3_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebx
movl 8(%esp),%ebx
pushl %ebp
@ -3064,6 +3216,11 @@ DES_encrypt3:
.align 16
DES_decrypt3:
.L_DES_decrypt3_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebx
movl 8(%esp),%ebx
pushl %ebp
@ -3185,6 +3342,11 @@ DES_decrypt3:
.align 16
DES_ncbc_encrypt:
.L_DES_ncbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -3246,21 +3408,56 @@ DES_ncbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L012ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L013ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L014ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L015ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L016ejend
.L017ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L018ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L019ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L016ejend:
xorl %ecx,%eax
@ -3365,6 +3562,11 @@ DES_ncbc_encrypt:
.align 16
DES_ede3_cbc_encrypt:
.L_DES_ede3_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -3430,21 +3632,56 @@ DES_ede3_cbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L036ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L037ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L038ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L039ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L040ejend
.L041ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L042ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L043ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L040ejend:
xorl %ecx,%eax
@ -3675,4 +3912,21 @@ DES_SPtrans:
.long 8519680,131200,537002112,545259520
.long 128,545390592,8519808,0
.long 536870912,545259648,131072,8519808
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
padlock_capability:
.L_padlock_capability_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebx
pushfl
popl %eax
@ -66,6 +71,11 @@ padlock_capability:
.align 16
padlock_key_bswap:
.L_padlock_key_bswap_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 240(%edx),%ecx
incl %ecx
@ -84,6 +94,11 @@ padlock_key_bswap:
.align 16
padlock_verify_context:
.L_padlock_verify_context_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
leal .Lpadlock_saved_context-.L004verify_pic_point,%eax
pushfl
@ -95,6 +110,11 @@ padlock_verify_context:
.type _padlock_verify_ctx,@function
.align 16
_padlock_verify_ctx:
#ifdef __CET__
.byte 243,15,30,251
#endif
addl (%esp),%eax
btl $30,4(%esp)
jnc .L005verified
@ -111,6 +131,11 @@ _padlock_verify_ctx:
.align 16
padlock_reload_key:
.L_padlock_reload_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushfl
popfl
ret
@ -120,6 +145,11 @@ padlock_reload_key:
.align 16
padlock_aes_block:
.L_padlock_aes_block_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
pushl %ebx
@ -140,6 +170,11 @@ padlock_aes_block:
.align 16
padlock_ecb_encrypt:
.L_padlock_ecb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -319,6 +354,11 @@ padlock_ecb_encrypt:
.align 16
padlock_cbc_encrypt:
.L_padlock_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -502,6 +542,11 @@ padlock_cbc_encrypt:
.align 16
padlock_cfb_encrypt:
.L_padlock_cfb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -624,6 +669,11 @@ padlock_cfb_encrypt:
.align 16
padlock_ofb_encrypt:
.L_padlock_ofb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -746,6 +796,11 @@ padlock_ofb_encrypt:
.align 16
padlock_ctr32_encrypt:
.L_padlock_ctr32_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -853,6 +908,11 @@ padlock_ctr32_encrypt:
.align 16
padlock_xstore:
.L_padlock_xstore_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
movl 8(%esp),%edi
movl 12(%esp),%edx
@ -863,6 +923,11 @@ padlock_xstore:
.type _win32_segv_handler,@function
.align 16
_win32_segv_handler:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl $1,%eax
movl 4(%esp),%edx
movl 12(%esp),%ecx
@ -878,6 +943,11 @@ _win32_segv_handler:
.align 16
padlock_sha1_oneshot:
.L_padlock_sha1_oneshot_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
xorl %eax,%eax
@ -909,6 +979,11 @@ padlock_sha1_oneshot:
.align 16
padlock_sha1_blocks:
.L_padlock_sha1_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
movl 12(%esp),%edi
@ -939,6 +1014,11 @@ padlock_sha1_blocks:
.align 16
padlock_sha256_oneshot:
.L_padlock_sha256_oneshot_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
xorl %eax,%eax
@ -970,6 +1050,11 @@ padlock_sha256_oneshot:
.align 16
padlock_sha256_blocks:
.L_padlock_sha256_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
movl 12(%esp),%edi
@ -1000,6 +1085,11 @@ padlock_sha256_blocks:
.align 16
padlock_sha512_blocks:
.L_padlock_sha512_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
movl 12(%esp),%edi
@ -1041,6 +1131,23 @@ padlock_sha512_blocks:
.align 4
.Lpadlock_saved_context:
.long 0
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl padlock_capability
@ -1048,6 +1155,11 @@ padlock_sha512_blocks:
.align 16
padlock_capability:
.L_padlock_capability_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebx
pushfl
popl %eax
@ -1108,6 +1220,11 @@ padlock_capability:
.align 16
padlock_key_bswap:
.L_padlock_key_bswap_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 240(%edx),%ecx
incl %ecx
@ -1126,6 +1243,11 @@ padlock_key_bswap:
.align 16
padlock_verify_context:
.L_padlock_verify_context_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
leal .Lpadlock_saved_context-.L004verify_pic_point,%eax
pushfl
@ -1137,6 +1259,11 @@ padlock_verify_context:
.type _padlock_verify_ctx,@function
.align 16
_padlock_verify_ctx:
#ifdef __CET__
.byte 243,15,30,251
#endif
addl (%esp),%eax
btl $30,4(%esp)
jnc .L005verified
@ -1153,6 +1280,11 @@ _padlock_verify_ctx:
.align 16
padlock_reload_key:
.L_padlock_reload_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushfl
popfl
ret
@ -1162,6 +1294,11 @@ padlock_reload_key:
.align 16
padlock_aes_block:
.L_padlock_aes_block_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
pushl %ebx
@ -1182,6 +1319,11 @@ padlock_aes_block:
.align 16
padlock_ecb_encrypt:
.L_padlock_ecb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1361,6 +1503,11 @@ padlock_ecb_encrypt:
.align 16
padlock_cbc_encrypt:
.L_padlock_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1544,6 +1691,11 @@ padlock_cbc_encrypt:
.align 16
padlock_cfb_encrypt:
.L_padlock_cfb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1666,6 +1818,11 @@ padlock_cfb_encrypt:
.align 16
padlock_ofb_encrypt:
.L_padlock_ofb_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1788,6 +1945,11 @@ padlock_ofb_encrypt:
.align 16
padlock_ctr32_encrypt:
.L_padlock_ctr32_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1895,6 +2057,11 @@ padlock_ctr32_encrypt:
.align 16
padlock_xstore:
.L_padlock_xstore_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
movl 8(%esp),%edi
movl 12(%esp),%edx
@ -1905,6 +2072,11 @@ padlock_xstore:
.type _win32_segv_handler,@function
.align 16
_win32_segv_handler:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl $1,%eax
movl 4(%esp),%edx
movl 12(%esp),%ecx
@ -1920,6 +2092,11 @@ _win32_segv_handler:
.align 16
padlock_sha1_oneshot:
.L_padlock_sha1_oneshot_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
xorl %eax,%eax
@ -1951,6 +2128,11 @@ padlock_sha1_oneshot:
.align 16
padlock_sha1_blocks:
.L_padlock_sha1_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
movl 12(%esp),%edi
@ -1981,6 +2163,11 @@ padlock_sha1_blocks:
.align 16
padlock_sha256_oneshot:
.L_padlock_sha256_oneshot_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
xorl %eax,%eax
@ -2012,6 +2199,11 @@ padlock_sha256_oneshot:
.align 16
padlock_sha256_blocks:
.L_padlock_sha256_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
movl 12(%esp),%edi
@ -2042,6 +2234,11 @@ padlock_sha256_blocks:
.align 16
padlock_sha512_blocks:
.L_padlock_sha512_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %esi
movl 12(%esp),%edi
@ -2083,4 +2280,21 @@ padlock_sha512_blocks:
.align 4
.Lpadlock_saved_context:
.long 0
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -2389,6 +2389,11 @@ ecp_nistz256_precomputed:
.align 16
ecp_nistz256_mul_by_2:
.L_ecp_nistz256_mul_by_2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2408,6 +2413,11 @@ ecp_nistz256_mul_by_2:
.align 16
ecp_nistz256_mul_by_3:
.L_ecp_nistz256_mul_by_3_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2433,6 +2443,11 @@ ecp_nistz256_mul_by_3:
.align 16
ecp_nistz256_div_by_2:
.L_ecp_nistz256_div_by_2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2449,6 +2464,11 @@ ecp_nistz256_div_by_2:
.type _ecp_nistz256_div_by_2,@function
.align 16
_ecp_nistz256_div_by_2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ebp
xorl %edx,%edx
movl 4(%esi),%ebx
@ -2532,6 +2552,11 @@ _ecp_nistz256_div_by_2:
.align 16
ecp_nistz256_add:
.L_ecp_nistz256_add_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2549,6 +2574,11 @@ ecp_nistz256_add:
.type _ecp_nistz256_add,@function
.align 16
_ecp_nistz256_add:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -2626,6 +2656,11 @@ _ecp_nistz256_add:
.align 16
ecp_nistz256_sub:
.L_ecp_nistz256_sub_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2643,6 +2678,11 @@ ecp_nistz256_sub:
.type _ecp_nistz256_sub,@function
.align 16
_ecp_nistz256_sub:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -2701,6 +2741,11 @@ _ecp_nistz256_sub:
.align 16
ecp_nistz256_neg:
.L_ecp_nistz256_neg_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2729,6 +2774,11 @@ ecp_nistz256_neg:
.type _picup_eax,@function
.align 16
_picup_eax:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esp),%eax
ret
.size _picup_eax,.-_picup_eax
@ -2737,6 +2787,11 @@ _picup_eax:
.align 16
ecp_nistz256_to_mont:
.L_ecp_nistz256_to_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2760,6 +2815,11 @@ ecp_nistz256_to_mont:
.align 16
ecp_nistz256_from_mont:
.L_ecp_nistz256_from_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2783,6 +2843,11 @@ ecp_nistz256_from_mont:
.align 16
ecp_nistz256_mul_mont:
.L_ecp_nistz256_mul_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2806,6 +2871,11 @@ ecp_nistz256_mul_mont:
.align 16
ecp_nistz256_sqr_mont:
.L_ecp_nistz256_sqr_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2827,6 +2897,11 @@ ecp_nistz256_sqr_mont:
.type _ecp_nistz256_mul_mont,@function
.align 16
_ecp_nistz256_mul_mont:
#ifdef __CET__
.byte 243,15,30,251
#endif
andl $83886080,%eax
cmpl $83886080,%eax
jne .L004mul_mont_ialu
@ -3724,6 +3799,11 @@ _ecp_nistz256_mul_mont:
.align 16
ecp_nistz256_scatter_w5:
.L_ecp_nistz256_scatter_w5_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3757,6 +3837,11 @@ ecp_nistz256_scatter_w5:
.align 16
ecp_nistz256_gather_w5:
.L_ecp_nistz256_gather_w5_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3851,6 +3936,11 @@ ecp_nistz256_gather_w5:
.align 16
ecp_nistz256_scatter_w7:
.L_ecp_nistz256_scatter_w7_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3882,6 +3972,11 @@ ecp_nistz256_scatter_w7:
.align 16
ecp_nistz256_gather_w7:
.L_ecp_nistz256_gather_w7_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -4096,6 +4191,11 @@ ecp_nistz256_gather_w7:
.align 16
ecp_nistz256_point_double:
.L_ecp_nistz256_point_double_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -4222,6 +4322,11 @@ ecp_nistz256_point_double:
.align 16
ecp_nistz256_point_add:
.L_ecp_nistz256_point_add_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -4735,6 +4840,11 @@ ecp_nistz256_point_add:
.align 16
ecp_nistz256_point_add_affine:
.L_ecp_nistz256_point_add_affine_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -5163,6 +5273,23 @@ ecp_nistz256_point_add_affine:
ret
.size ecp_nistz256_point_add_affine,.-.L_ecp_nistz256_point_add_affine_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl ecp_nistz256_precomputed
@ -7553,6 +7680,11 @@ ecp_nistz256_precomputed:
.align 16
ecp_nistz256_mul_by_2:
.L_ecp_nistz256_mul_by_2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7572,6 +7704,11 @@ ecp_nistz256_mul_by_2:
.align 16
ecp_nistz256_mul_by_3:
.L_ecp_nistz256_mul_by_3_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7597,6 +7734,11 @@ ecp_nistz256_mul_by_3:
.align 16
ecp_nistz256_div_by_2:
.L_ecp_nistz256_div_by_2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7613,6 +7755,11 @@ ecp_nistz256_div_by_2:
.type _ecp_nistz256_div_by_2,@function
.align 16
_ecp_nistz256_div_by_2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ebp
xorl %edx,%edx
movl 4(%esi),%ebx
@ -7696,6 +7843,11 @@ _ecp_nistz256_div_by_2:
.align 16
ecp_nistz256_add:
.L_ecp_nistz256_add_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7713,6 +7865,11 @@ ecp_nistz256_add:
.type _ecp_nistz256_add,@function
.align 16
_ecp_nistz256_add:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -7790,6 +7947,11 @@ _ecp_nistz256_add:
.align 16
ecp_nistz256_sub:
.L_ecp_nistz256_sub_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7807,6 +7969,11 @@ ecp_nistz256_sub:
.type _ecp_nistz256_sub,@function
.align 16
_ecp_nistz256_sub:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
@ -7865,6 +8032,11 @@ _ecp_nistz256_sub:
.align 16
ecp_nistz256_neg:
.L_ecp_nistz256_neg_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7893,6 +8065,11 @@ ecp_nistz256_neg:
.type _picup_eax,@function
.align 16
_picup_eax:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esp),%eax
ret
.size _picup_eax,.-_picup_eax
@ -7901,6 +8078,11 @@ _picup_eax:
.align 16
ecp_nistz256_to_mont:
.L_ecp_nistz256_to_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7924,6 +8106,11 @@ ecp_nistz256_to_mont:
.align 16
ecp_nistz256_from_mont:
.L_ecp_nistz256_from_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7947,6 +8134,11 @@ ecp_nistz256_from_mont:
.align 16
ecp_nistz256_mul_mont:
.L_ecp_nistz256_mul_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7970,6 +8162,11 @@ ecp_nistz256_mul_mont:
.align 16
ecp_nistz256_sqr_mont:
.L_ecp_nistz256_sqr_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7991,6 +8188,11 @@ ecp_nistz256_sqr_mont:
.type _ecp_nistz256_mul_mont,@function
.align 16
_ecp_nistz256_mul_mont:
#ifdef __CET__
.byte 243,15,30,251
#endif
andl $83886080,%eax
cmpl $83886080,%eax
jne .L004mul_mont_ialu
@ -8888,6 +9090,11 @@ _ecp_nistz256_mul_mont:
.align 16
ecp_nistz256_scatter_w5:
.L_ecp_nistz256_scatter_w5_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -8921,6 +9128,11 @@ ecp_nistz256_scatter_w5:
.align 16
ecp_nistz256_gather_w5:
.L_ecp_nistz256_gather_w5_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -9015,6 +9227,11 @@ ecp_nistz256_gather_w5:
.align 16
ecp_nistz256_scatter_w7:
.L_ecp_nistz256_scatter_w7_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -9046,6 +9263,11 @@ ecp_nistz256_scatter_w7:
.align 16
ecp_nistz256_gather_w7:
.L_ecp_nistz256_gather_w7_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -9260,6 +9482,11 @@ ecp_nistz256_gather_w7:
.align 16
ecp_nistz256_point_double:
.L_ecp_nistz256_point_double_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -9386,6 +9613,11 @@ ecp_nistz256_point_double:
.align 16
ecp_nistz256_point_add:
.L_ecp_nistz256_point_add_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -9899,6 +10131,11 @@ ecp_nistz256_point_add:
.align 16
ecp_nistz256_point_add_affine:
.L_ecp_nistz256_point_add_affine_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -10327,4 +10564,21 @@ ecp_nistz256_point_add_affine:
ret
.size ecp_nistz256_point_add_affine,.-.L_ecp_nistz256_point_add_affine_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
gcm_gmult_4bit_x86:
.L_gcm_gmult_4bit_x86_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -100,6 +105,11 @@ gcm_gmult_4bit_x86:
.align 16
gcm_ghash_4bit_x86:
.L_gcm_ghash_4bit_x86_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -209,6 +219,11 @@ gcm_ghash_4bit_x86:
.align 16
gcm_gmult_4bit_mmx:
.L_gcm_gmult_4bit_mmx_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -308,6 +323,11 @@ gcm_gmult_4bit_mmx:
.align 16
gcm_ghash_4bit_mmx:
.L_gcm_ghash_4bit_mmx_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -912,6 +932,11 @@ gcm_ghash_4bit_mmx:
.align 16
gcm_init_clmul:
.L_gcm_init_clmul_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%eax
call .L010pic
@ -981,6 +1006,11 @@ gcm_init_clmul:
.align 16
gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 8(%esp),%edx
call .L011pic
@ -1034,6 +1064,11 @@ gcm_gmult_clmul:
.align 16
gcm_ghash_clmul:
.L_gcm_ghash_clmul_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1264,6 +1299,23 @@ gcm_ghash_clmul:
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl gcm_gmult_4bit_x86
@ -1271,6 +1323,11 @@ gcm_ghash_clmul:
.align 16
gcm_gmult_4bit_x86:
.L_gcm_gmult_4bit_x86_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1365,6 +1422,11 @@ gcm_gmult_4bit_x86:
.align 16
gcm_ghash_4bit_x86:
.L_gcm_ghash_4bit_x86_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1474,6 +1536,11 @@ gcm_ghash_4bit_x86:
.align 16
gcm_gmult_4bit_mmx:
.L_gcm_gmult_4bit_mmx_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1573,6 +1640,11 @@ gcm_gmult_4bit_mmx:
.align 16
gcm_ghash_4bit_mmx:
.L_gcm_ghash_4bit_mmx_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2177,6 +2249,11 @@ gcm_ghash_4bit_mmx:
.align 16
gcm_init_clmul:
.L_gcm_init_clmul_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%eax
call .L010pic
@ -2246,6 +2323,11 @@ gcm_init_clmul:
.align 16
gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%eax
movl 8(%esp),%edx
call .L011pic
@ -2299,6 +2381,11 @@ gcm_gmult_clmul:
.align 16
gcm_ghash_clmul:
.L_gcm_ghash_clmul_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2529,4 +2616,21 @@ gcm_ghash_clmul:
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -1,11 +1,16 @@
/* Do not modify. This file is auto-generated from md5-586.pl. */
#ifdef PIC
.text
.globl md5_block_asm_data_order
.type md5_block_asm_data_order,@function
.globl ossl_md5_block_asm_data_order
.type ossl_md5_block_asm_data_order,@function
.align 16
md5_block_asm_data_order:
.L_md5_block_asm_data_order_begin:
ossl_md5_block_asm_data_order:
.L_ossl_md5_block_asm_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
movl 12(%esp),%edi
@ -677,14 +682,36 @@ md5_block_asm_data_order:
popl %edi
popl %esi
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
.size ossl_md5_block_asm_data_order,.-.L_ossl_md5_block_asm_data_order_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl md5_block_asm_data_order
.type md5_block_asm_data_order,@function
.globl ossl_md5_block_asm_data_order
.type ossl_md5_block_asm_data_order,@function
.align 16
md5_block_asm_data_order:
.L_md5_block_asm_data_order_begin:
ossl_md5_block_asm_data_order:
.L_ossl_md5_block_asm_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
movl 12(%esp),%edi
@ -1356,5 +1383,22 @@ md5_block_asm_data_order:
popl %edi
popl %esi
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
.size ossl_md5_block_asm_data_order,.-.L_ossl_md5_block_asm_data_order_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -7,6 +7,11 @@
.align 16
poly1305_init:
.L_poly1305_init_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -68,6 +73,11 @@ poly1305_init:
.align 16
poly1305_blocks:
.L_poly1305_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -236,6 +246,11 @@ poly1305_blocks:
.align 16
poly1305_emit:
.L_poly1305_emit_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -295,6 +310,11 @@ poly1305_emit:
.type _poly1305_init_sse2,@function
.align 16
_poly1305_init_sse2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movdqu 24(%edi),%xmm4
leal 48(%edi),%edi
movl %esp,%ebp
@ -497,6 +517,11 @@ _poly1305_init_sse2:
.type _poly1305_blocks_sse2,@function
.align 16
_poly1305_blocks_sse2:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1258,6 +1283,11 @@ _poly1305_blocks_sse2:
.type _poly1305_emit_sse2,@function
.align 16
_poly1305_emit_sse2:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1351,6 +1381,11 @@ _poly1305_emit_sse2:
.type _poly1305_init_avx2,@function
.align 16
_poly1305_init_avx2:
#ifdef __CET__
.byte 243,15,30,251
#endif
vmovdqu 24(%edi),%xmm4
leal 48(%edi),%edi
movl %esp,%ebp
@ -1522,6 +1557,11 @@ _poly1305_init_avx2:
.type _poly1305_blocks_avx2,@function
.align 16
_poly1305_blocks_avx2:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1910,6 +1950,23 @@ _poly1305_blocks_avx2:
.byte 114,103,62,0
.align 4
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.align 64
@ -1918,6 +1975,11 @@ _poly1305_blocks_avx2:
.align 16
poly1305_init:
.L_poly1305_init_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1979,6 +2041,11 @@ poly1305_init:
.align 16
poly1305_blocks:
.L_poly1305_blocks_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2147,6 +2214,11 @@ poly1305_blocks:
.align 16
poly1305_emit:
.L_poly1305_emit_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2206,6 +2278,11 @@ poly1305_emit:
.type _poly1305_init_sse2,@function
.align 16
_poly1305_init_sse2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movdqu 24(%edi),%xmm4
leal 48(%edi),%edi
movl %esp,%ebp
@ -2408,6 +2485,11 @@ _poly1305_init_sse2:
.type _poly1305_blocks_sse2,@function
.align 16
_poly1305_blocks_sse2:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3169,6 +3251,11 @@ _poly1305_blocks_sse2:
.type _poly1305_emit_sse2,@function
.align 16
_poly1305_emit_sse2:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3262,6 +3349,11 @@ _poly1305_emit_sse2:
.type _poly1305_init_avx2,@function
.align 16
_poly1305_init_avx2:
#ifdef __CET__
.byte 243,15,30,251
#endif
vmovdqu 24(%edi),%xmm4
leal 48(%edi),%edi
movl %esp,%ebp
@ -3433,6 +3525,11 @@ _poly1305_init_avx2:
.type _poly1305_blocks_avx2,@function
.align 16
_poly1305_blocks_avx2:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3821,4 +3918,21 @@ _poly1305_blocks_avx2:
.byte 114,103,62,0
.align 4
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
RC4:
.L_RC4_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -272,6 +277,11 @@ RC4:
.align 16
RC4_set_key:
.L_RC4_set_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -350,6 +360,11 @@ RC4_set_key:
.align 16
RC4_options:
.L_RC4_options_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L018pic_point
.L018pic_point:
popl %eax
@ -380,6 +395,23 @@ RC4_options:
.align 64
.size RC4_options,.-.L_RC4_options_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl RC4
@ -387,6 +419,11 @@ RC4_options:
.align 16
RC4:
.L_RC4_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -650,6 +687,11 @@ RC4:
.align 16
RC4_set_key:
.L_RC4_set_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -725,6 +767,11 @@ RC4_set_key:
.align 16
RC4_options:
.L_RC4_options_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L016pic_point
.L016pic_point:
popl %eax
@ -752,4 +799,21 @@ RC4_options:
.align 64
.size RC4_options,.-.L_RC4_options_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
RC5_32_encrypt:
.L_RC5_32_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %esi
@ -197,6 +202,11 @@ RC5_32_encrypt:
.align 16
RC5_32_decrypt:
.L_RC5_32_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %esi
@ -390,6 +400,11 @@ RC5_32_decrypt:
.align 16
RC5_32_cbc_encrypt:
.L_RC5_32_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -449,21 +464,56 @@ RC5_32_cbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L010ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L011ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L012ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L013ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L014ejend
.L015ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L016ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L017ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L014ejend:
xorl %ecx,%eax
@ -563,6 +613,23 @@ RC5_32_cbc_encrypt:
.long .L010ej7-.L008PIC_point
.align 64
.size RC5_32_cbc_encrypt,.-.L_RC5_32_cbc_encrypt_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl RC5_32_encrypt
@ -570,6 +637,11 @@ RC5_32_cbc_encrypt:
.align 16
RC5_32_encrypt:
.L_RC5_32_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %esi
@ -761,6 +833,11 @@ RC5_32_encrypt:
.align 16
RC5_32_decrypt:
.L_RC5_32_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %esi
@ -954,6 +1031,11 @@ RC5_32_decrypt:
.align 16
RC5_32_cbc_encrypt:
.L_RC5_32_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
@ -1013,21 +1095,56 @@ RC5_32_cbc_encrypt:
xorl %edx,%edx
jmp *%ebp
.L010ej7:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 6(%esi),%dh
shll $8,%edx
.L011ej6:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 5(%esi),%dh
.L012ej5:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 4(%esi),%dl
.L013ej4:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl (%esi),%ecx
jmp .L014ejend
.L015ej3:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 2(%esi),%ch
shll $8,%ecx
.L016ej2:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb 1(%esi),%ch
.L017ej1:
#ifdef __CET__
.byte 243,15,30,251
#endif
movb (%esi),%cl
.L014ejend:
xorl %ecx,%eax
@ -1127,4 +1244,21 @@ RC5_32_cbc_encrypt:
.long .L010ej7-.L008PIC_point
.align 64
.size RC5_32_cbc_encrypt,.-.L_RC5_32_cbc_encrypt_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
ripemd160_block_asm_data_order:
.L_ripemd160_block_asm_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%eax
pushl %esi
@ -1964,6 +1969,23 @@ ripemd160_block_asm_data_order:
popl %esi
ret
.size ripemd160_block_asm_data_order,.-.L_ripemd160_block_asm_data_order_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl ripemd160_block_asm_data_order
@ -1971,6 +1993,11 @@ ripemd160_block_asm_data_order:
.align 16
ripemd160_block_asm_data_order:
.L_ripemd160_block_asm_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%eax
pushl %esi
@ -3929,4 +3956,21 @@ ripemd160_block_asm_data_order:
popl %esi
ret
.size ripemd160_block_asm_data_order,.-.L_ripemd160_block_asm_data_order_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
sha1_block_data_order:
.L_sha1_block_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1400,6 +1405,11 @@ sha1_block_data_order:
.type _sha1_block_data_order_shaext,@function
.align 16
_sha1_block_data_order_shaext:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1570,6 +1580,11 @@ _sha1_block_data_order_shaext:
.type _sha1_block_data_order_ssse3,@function
.align 16
_sha1_block_data_order_ssse3:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2789,6 +2804,11 @@ _sha1_block_data_order_ssse3:
.type _sha1_block_data_order_avx,@function
.align 16
_sha1_block_data_order_avx:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -3969,6 +3989,23 @@ _sha1_block_data_order_avx:
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl sha1_block_data_order
@ -3976,6 +4013,11 @@ _sha1_block_data_order_avx:
.align 16
sha1_block_data_order:
.L_sha1_block_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -5370,6 +5412,11 @@ sha1_block_data_order:
.type _sha1_block_data_order_shaext,@function
.align 16
_sha1_block_data_order_shaext:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -5540,6 +5587,11 @@ _sha1_block_data_order_shaext:
.type _sha1_block_data_order_ssse3,@function
.align 16
_sha1_block_data_order_ssse3:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -6759,6 +6811,11 @@ _sha1_block_data_order_ssse3:
.type _sha1_block_data_order_avx,@function
.align 16
_sha1_block_data_order_avx:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -7939,4 +7996,21 @@ _sha1_block_data_order_avx:
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
sha256_block_data_order:
.L_sha256_block_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -6782,6 +6787,23 @@ sha256_block_data_order:
ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl sha256_block_data_order
@ -6789,6 +6811,11 @@ sha256_block_data_order:
.align 16
sha256_block_data_order:
.L_sha256_block_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -13565,4 +13592,21 @@ sha256_block_data_order:
ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
sha512_block_data_order:
.L_sha512_block_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2828,6 +2833,23 @@ sha512_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl sha512_block_data_order
@ -2835,6 +2857,11 @@ sha512_block_data_order:
.align 16
sha512_block_data_order:
.L_sha512_block_data_order_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -5657,4 +5684,21 @@ sha512_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -61,6 +61,11 @@
.type _vpaes_preheat,@function
.align 16
_vpaes_preheat:
#ifdef __CET__
.byte 243,15,30,251
#endif
addl (%esp),%ebp
movdqa -48(%ebp),%xmm7
movdqa -16(%ebp),%xmm6
@ -69,6 +74,11 @@ _vpaes_preheat:
.type _vpaes_encrypt_core,@function
.align 16
_vpaes_encrypt_core:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl $16,%ecx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
@ -146,6 +156,11 @@ _vpaes_encrypt_core:
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
#ifdef __CET__
.byte 243,15,30,251
#endif
leal 608(%ebp),%ebx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
@ -234,6 +249,11 @@ _vpaes_decrypt_core:
.type _vpaes_schedule_core,@function
.align 16
_vpaes_schedule_core:
#ifdef __CET__
.byte 243,15,30,251
#endif
addl (%esp),%ebp
movdqu (%esi),%xmm0
movdqa 320(%ebp),%xmm2
@ -328,6 +348,11 @@ _vpaes_schedule_core:
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
#ifdef __CET__
.byte 243,15,30,251
#endif
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pxor %xmm1,%xmm6
@ -340,6 +365,11 @@ _vpaes_schedule_192_smear:
.type _vpaes_schedule_round,@function
.align 16
_vpaes_schedule_round:
#ifdef __CET__
.byte 243,15,30,251
#endif
movdqa 8(%esp),%xmm2
pxor %xmm1,%xmm1
.byte 102,15,58,15,202,15
@ -389,6 +419,11 @@ _vpaes_schedule_round:
.type _vpaes_schedule_transform,@function
.align 16
_vpaes_schedule_transform:
#ifdef __CET__
.byte 243,15,30,251
#endif
movdqa -16(%ebp),%xmm2
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
@ -404,6 +439,11 @@ _vpaes_schedule_transform:
.type _vpaes_schedule_mangle,@function
.align 16
_vpaes_schedule_mangle:
#ifdef __CET__
.byte 243,15,30,251
#endif
movdqa %xmm0,%xmm4
movdqa 128(%ebp),%xmm5
testl %edi,%edi
@ -465,6 +505,11 @@ _vpaes_schedule_mangle:
.align 16
vpaes_set_encrypt_key:
.L_vpaes_set_encrypt_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -498,6 +543,11 @@ vpaes_set_encrypt_key:
.align 16
vpaes_set_decrypt_key:
.L_vpaes_set_decrypt_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -536,6 +586,11 @@ vpaes_set_decrypt_key:
.align 16
vpaes_encrypt:
.L_vpaes_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -565,6 +620,11 @@ vpaes_encrypt:
.align 16
vpaes_decrypt:
.L_vpaes_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -594,6 +654,11 @@ vpaes_decrypt:
.align 16
vpaes_cbc_encrypt:
.L_vpaes_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -660,6 +725,23 @@ vpaes_cbc_encrypt:
popl %ebp
ret
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.align 64
@ -722,6 +804,11 @@ vpaes_cbc_encrypt:
.type _vpaes_preheat,@function
.align 16
_vpaes_preheat:
#ifdef __CET__
.byte 243,15,30,251
#endif
addl (%esp),%ebp
movdqa -48(%ebp),%xmm7
movdqa -16(%ebp),%xmm6
@ -730,6 +817,11 @@ _vpaes_preheat:
.type _vpaes_encrypt_core,@function
.align 16
_vpaes_encrypt_core:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl $16,%ecx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
@ -807,6 +899,11 @@ _vpaes_encrypt_core:
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
#ifdef __CET__
.byte 243,15,30,251
#endif
leal 608(%ebp),%ebx
movl 240(%edx),%eax
movdqa %xmm6,%xmm1
@ -895,6 +992,11 @@ _vpaes_decrypt_core:
.type _vpaes_schedule_core,@function
.align 16
_vpaes_schedule_core:
#ifdef __CET__
.byte 243,15,30,251
#endif
addl (%esp),%ebp
movdqu (%esi),%xmm0
movdqa 320(%ebp),%xmm2
@ -989,6 +1091,11 @@ _vpaes_schedule_core:
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
#ifdef __CET__
.byte 243,15,30,251
#endif
pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
pxor %xmm1,%xmm6
@ -1001,6 +1108,11 @@ _vpaes_schedule_192_smear:
.type _vpaes_schedule_round,@function
.align 16
_vpaes_schedule_round:
#ifdef __CET__
.byte 243,15,30,251
#endif
movdqa 8(%esp),%xmm2
pxor %xmm1,%xmm1
.byte 102,15,58,15,202,15
@ -1050,6 +1162,11 @@ _vpaes_schedule_round:
.type _vpaes_schedule_transform,@function
.align 16
_vpaes_schedule_transform:
#ifdef __CET__
.byte 243,15,30,251
#endif
movdqa -16(%ebp),%xmm2
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
@ -1065,6 +1182,11 @@ _vpaes_schedule_transform:
.type _vpaes_schedule_mangle,@function
.align 16
_vpaes_schedule_mangle:
#ifdef __CET__
.byte 243,15,30,251
#endif
movdqa %xmm0,%xmm4
movdqa 128(%ebp),%xmm5
testl %edi,%edi
@ -1126,6 +1248,11 @@ _vpaes_schedule_mangle:
.align 16
vpaes_set_encrypt_key:
.L_vpaes_set_encrypt_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1159,6 +1286,11 @@ vpaes_set_encrypt_key:
.align 16
vpaes_set_decrypt_key:
.L_vpaes_set_decrypt_key_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1197,6 +1329,11 @@ vpaes_set_decrypt_key:
.align 16
vpaes_encrypt:
.L_vpaes_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1226,6 +1363,11 @@ vpaes_encrypt:
.align 16
vpaes_decrypt:
.L_vpaes_decrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1255,6 +1397,11 @@ vpaes_decrypt:
.align 16
vpaes_cbc_encrypt:
.L_vpaes_cbc_encrypt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1321,4 +1468,21 @@ vpaes_cbc_encrypt:
popl %ebp
ret
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
whirlpool_block_mmx:
.L_whirlpool_block_mmx_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -1106,6 +1111,23 @@ whirlpool_block_mmx:
.byte 251,238,124,102,221,23,71,158
.byte 202,45,191,7,173,90,131,51
.size whirlpool_block_mmx,.-.L_whirlpool_block_mmx_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl whirlpool_block_mmx
@ -1113,6 +1135,11 @@ whirlpool_block_mmx:
.align 16
whirlpool_block_mmx:
.L_whirlpool_block_mmx_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -2213,4 +2240,21 @@ whirlpool_block_mmx:
.byte 251,238,124,102,221,23,71,158
.byte 202,45,191,7,173,90,131,51
.size whirlpool_block_mmx,.-.L_whirlpool_block_mmx_begin
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -4,6 +4,11 @@
.type _mul_1x1_mmx,@function
.align 16
_mul_1x1_mmx:
#ifdef __CET__
.byte 243,15,30,251
#endif
subl $36,%esp
movl %eax,%ecx
leal (%eax,%eax,1),%edx
@ -107,6 +112,11 @@ _mul_1x1_mmx:
.type _mul_1x1_ialu,@function
.align 16
_mul_1x1_ialu:
#ifdef __CET__
.byte 243,15,30,251
#endif
subl $36,%esp
movl %eax,%ecx
leal (%eax,%eax,1),%edx
@ -241,6 +251,11 @@ _mul_1x1_ialu:
.align 16
bn_GF2m_mul_2x2:
.L_bn_GF2m_mul_2x2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L000PIC_me_up
.L000PIC_me_up:
popl %edx
@ -345,11 +360,33 @@ bn_GF2m_mul_2x2:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.type _mul_1x1_mmx,@function
.align 16
_mul_1x1_mmx:
#ifdef __CET__
.byte 243,15,30,251
#endif
subl $36,%esp
movl %eax,%ecx
leal (%eax,%eax,1),%edx
@ -453,6 +490,11 @@ _mul_1x1_mmx:
.type _mul_1x1_ialu,@function
.align 16
_mul_1x1_ialu:
#ifdef __CET__
.byte 243,15,30,251
#endif
subl $36,%esp
movl %eax,%ecx
leal (%eax,%eax,1),%edx
@ -587,6 +629,11 @@ _mul_1x1_ialu:
.align 16
bn_GF2m_mul_2x2:
.L_bn_GF2m_mul_2x2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
leal OPENSSL_ia32cap_P,%edx
movl (%edx),%eax
movl 4(%edx),%edx
@ -688,4 +735,21 @@ bn_GF2m_mul_2x2:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
bn_mul_mont:
.L_bn_mul_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -475,6 +480,23 @@ bn_mul_mont:
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl bn_mul_mont
@ -482,6 +504,11 @@ bn_mul_mont:
.align 16
bn_mul_mont:
.L_bn_mul_mont_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -948,4 +975,21 @@ bn_mul_mont:
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
.comm OPENSSL_ia32cap_P,16,4
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

View file

@ -6,6 +6,11 @@
.align 16
OPENSSL_ia32_cpuid:
.L_OPENSSL_ia32_cpuid_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -150,6 +155,11 @@ OPENSSL_ia32_cpuid:
.align 16
OPENSSL_rdtsc:
.L_OPENSSL_rdtsc_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
xorl %eax,%eax
xorl %edx,%edx
call .L009PIC_me_up
@ -167,6 +177,11 @@ OPENSSL_rdtsc:
.align 16
OPENSSL_instrument_halt:
.L_OPENSSL_instrument_halt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
call .L011PIC_me_up
.L011PIC_me_up:
popl %ecx
@ -199,6 +214,11 @@ OPENSSL_instrument_halt:
.align 16
OPENSSL_far_spin:
.L_OPENSSL_far_spin_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushfl
popl %eax
btl $9,%eax
@ -226,6 +246,11 @@ OPENSSL_far_spin:
.align 16
OPENSSL_wipe_cpu:
.L_OPENSSL_wipe_cpu_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
xorl %eax,%eax
xorl %edx,%edx
call .L015PIC_me_up
@ -257,6 +282,11 @@ OPENSSL_wipe_cpu:
.align 16
OPENSSL_atomic_add:
.L_OPENSSL_atomic_add_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%ecx
pushl %ebx
@ -276,6 +306,11 @@ OPENSSL_atomic_add:
.align 16
OPENSSL_cleanse:
.L_OPENSSL_cleanse_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%ecx
xorl %eax,%eax
@ -313,6 +348,11 @@ OPENSSL_cleanse:
.align 16
CRYPTO_memcmp:
.L_CRYPTO_memcmp_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
movl 12(%esp),%esi
@ -342,6 +382,11 @@ CRYPTO_memcmp:
.align 16
OPENSSL_instrument_bus:
.L_OPENSSL_instrument_bus_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -390,6 +435,11 @@ OPENSSL_instrument_bus:
.align 16
OPENSSL_instrument_bus2:
.L_OPENSSL_instrument_bus2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -451,6 +501,11 @@ OPENSSL_instrument_bus2:
.align 16
OPENSSL_ia32_rdrand_bytes:
.L_OPENSSL_ia32_rdrand_bytes_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %ebx
xorl %eax,%eax
@ -494,6 +549,11 @@ OPENSSL_ia32_rdrand_bytes:
.align 16
OPENSSL_ia32_rdseed_bytes:
.L_OPENSSL_ia32_rdseed_bytes_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %ebx
xorl %eax,%eax
@ -537,6 +597,23 @@ OPENSSL_ia32_rdseed_bytes:
.comm OPENSSL_ia32cap_P,16,4
.section .init
call OPENSSL_cpuid_setup
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#else
.text
.globl OPENSSL_ia32_cpuid
@ -544,6 +621,11 @@ OPENSSL_ia32_rdseed_bytes:
.align 16
OPENSSL_ia32_cpuid:
.L_OPENSSL_ia32_cpuid_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -688,6 +770,11 @@ OPENSSL_ia32_cpuid:
.align 16
OPENSSL_rdtsc:
.L_OPENSSL_rdtsc_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
xorl %eax,%eax
xorl %edx,%edx
leal OPENSSL_ia32cap_P,%ecx
@ -702,6 +789,11 @@ OPENSSL_rdtsc:
.align 16
OPENSSL_instrument_halt:
.L_OPENSSL_instrument_halt_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
leal OPENSSL_ia32cap_P,%ecx
btl $4,(%ecx)
jnc .L010nohalt
@ -731,6 +823,11 @@ OPENSSL_instrument_halt:
.align 16
OPENSSL_far_spin:
.L_OPENSSL_far_spin_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushfl
popl %eax
btl $9,%eax
@ -758,6 +855,11 @@ OPENSSL_far_spin:
.align 16
OPENSSL_wipe_cpu:
.L_OPENSSL_wipe_cpu_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
xorl %eax,%eax
xorl %edx,%edx
leal OPENSSL_ia32cap_P,%ecx
@ -786,6 +888,11 @@ OPENSSL_wipe_cpu:
.align 16
OPENSSL_atomic_add:
.L_OPENSSL_atomic_add_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%ecx
pushl %ebx
@ -805,6 +912,11 @@ OPENSSL_atomic_add:
.align 16
OPENSSL_cleanse:
.L_OPENSSL_cleanse_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
movl 4(%esp),%edx
movl 8(%esp),%ecx
xorl %eax,%eax
@ -842,6 +954,11 @@ OPENSSL_cleanse:
.align 16
CRYPTO_memcmp:
.L_CRYPTO_memcmp_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %esi
pushl %edi
movl 12(%esp),%esi
@ -871,6 +988,11 @@ CRYPTO_memcmp:
.align 16
OPENSSL_instrument_bus:
.L_OPENSSL_instrument_bus_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -916,6 +1038,11 @@ OPENSSL_instrument_bus:
.align 16
OPENSSL_instrument_bus2:
.L_OPENSSL_instrument_bus2_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %ebp
pushl %ebx
pushl %esi
@ -974,6 +1101,11 @@ OPENSSL_instrument_bus2:
.align 16
OPENSSL_ia32_rdrand_bytes:
.L_OPENSSL_ia32_rdrand_bytes_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %ebx
xorl %eax,%eax
@ -1017,6 +1149,11 @@ OPENSSL_ia32_rdrand_bytes:
.align 16
OPENSSL_ia32_rdseed_bytes:
.L_OPENSSL_ia32_rdseed_bytes_begin:
#ifdef __CET__
.byte 243,15,30,251
#endif
pushl %edi
pushl %ebx
xorl %eax,%eax
@ -1060,4 +1197,21 @@ OPENSSL_ia32_rdseed_bytes:
.comm OPENSSL_ia32cap_P,16,4
.section .init
call OPENSSL_cpuid_setup
.section ".note.gnu.property", "a"
.p2align 2
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.asciz "GNU"
1:
.p2align 2
.long 0xc0000002
.long 3f - 2f
2:
.long 3
3:
.p2align 2
4:
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -667,7 +667,7 @@ vpaes_cbc_encrypt:
vor 24,0,0
sub. 30, 30, 0
vperm 0, 0, 0, 29
vsel 1, 28, 0, 30
vsel 1,28,0,30
vor 28,0,0
stvx 1, 0, 4
addi 4, 4, 16
@ -719,7 +719,7 @@ vpaes_cbc_encrypt:
vor 24,25,25
sub. 30, 30, 0
vperm 0, 0, 0, 29
vsel 1, 28, 0, 30
vsel 1,28,0,30
vor 28,0,0
stvx 1, 0, 4
addi 4, 4, 16
@ -1037,7 +1037,7 @@ _vpaes_schedule_core:
vperm 0, 0, 0, 29
li 10, 4
vsel 2, 28, 0, 30
vsel 2,28,0,30
li 11, 8
stvx 2, 0, 5
li 12, 12
@ -1059,7 +1059,7 @@ _vpaes_schedule_core:
addi 9, 5, -15
vperm 0, 0, 0, 29
li 10, 4
vsel 2, 28, 0, 30
vsel 2,28,0,30
li 11, 8
stvx 2, 0, 5
li 12, 12
@ -1150,7 +1150,7 @@ _vpaes_schedule_low_round:
vsldoi 1, 9, 7, 12
vxor 7, 7, 1
vspltisb 1, 0x0f
vspltisb 1,0x0f
vsldoi 4, 9, 7, 8
@ -1246,7 +1246,7 @@ _vpaes_schedule_mangle:
vperm 1, 3, 3, 29
vsel 2, 28, 1, 30
vsel 2,28,1,30
vor 28,1,1
stvx 2, 0, 5
blr
@ -1297,7 +1297,7 @@ _vpaes_schedule_mangle:
vperm 1, 3, 3, 29
vsel 2, 28, 1, 30
vsel 2,28,1,30
vor 28,1,1
stvx 2, 0, 5
blr

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,354 @@
/* Do not modify. This file is auto-generated from ecp_nistp521-ppc64.pl. */
.machine "any"
.abiversion 2
.text
.globl p521_felem_mul
.type p521_felem_mul,@function
.align 5
p521_felem_mul:
.localentry p521_felem_mul,0
mr 12,1
stdu 1,-16*13(1)
stxv 52,-16*12(12)
stxv 53,-16*11(12)
stxv 54,-16*10(12)
stxv 55,-16*9(12)
stxv 56,-16*8(12)
stxv 57,-16*7(12)
stxv 58,-16*6(12)
stxv 59,-16*5(12)
stxv 60,-16*4(12)
stxv 61,-16*3(12)
stxv 62,-16*2(12)
stxv 63,-16*1(12)
vspltisw 0,0
lxsd 13,0(4)
lxsd 14,8(4)
lxsd 15,16(4)
lxsd 16,24(4)
lxsd 17,32(4)
lxsd 18,40(4)
lxsd 19,48(4)
lxsd 20,56(4)
lxsd 21,64(4)
lxsd 3,0(5)
lxsd 4,8(5)
lxsd 5,16(5)
lxsd 6,24(5)
lxsd 7,32(5)
lxsd 8,40(5)
lxsd 9,48(5)
lxsd 10,56(5)
lxsd 11,64(5)
.long 0x12ED1823
xxpermdi 33,45,46,0b00
xxpermdi 34,36,35,0b00
.long 0x13011023
xxpermdi 34,37,36,0b00
.long 0x13211023
.long 0x132F1E63
xxpermdi 34,38,37,0b00
.long 0x13411023
xxpermdi 44,47,48,0b00
xxpermdi 54,36,35,0b00
.long 0x134CB6A3
xxpermdi 34,39,38,0b00
.long 0x13611023
xxpermdi 54,37,36,0b00
.long 0x136CB6E3
.long 0x13711EE3
xxpermdi 34,40,39,0b00
.long 0x13811023
xxpermdi 54,38,37,0b00
.long 0x138CB723
xxpermdi 34,41,40,0b00
.long 0x13A11023
xxpermdi 54,39,38,0b00
.long 0x13ACB763
xxpermdi 34,42,41,0b00
.long 0x13C11023
xxpermdi 54,40,39,0b00
.long 0x13CCB7A3
xxpermdi 34,43,42,0b00
.long 0x13E11023
xxpermdi 54,41,40,0b00
.long 0x13ECB7E3
xxpermdi 33,49,50,0b00
xxpermdi 34,36,35,0b00
.long 0x13811723
xxpermdi 34,37,36,0b00
.long 0x13A11763
.long 0x13B31F63
xxpermdi 34,38,37,0b00
.long 0x13C117A3
xxpermdi 44,51,52,0b00
xxpermdi 54,36,35,0b00
.long 0x13CCB7A3
xxpermdi 34,39,38,0b00
.long 0x13E117E3
xxpermdi 54,37,36,0b00
.long 0x13ECB7E3
.long 0x13F51FE3
li 8,0
li 9,1
mtvsrdd 33,9,8
.long 0x10630DC4
.long 0x10840DC4
.long 0x10A50DC4
.long 0x10C60DC4
.long 0x10E70DC4
.long 0x11080DC4
.long 0x11290DC4
.long 0x114A0DC4
.long 0x116B0DC4
.long 0x13D55FA3
xxpermdi 34,43,42,0b00
xxpermdi 33,52,53,0b00
.long 0x13A11763
xxpermdi 33,51,52,0b00
.long 0x13811723
.long 0x13954F23
xxpermdi 33,50,51,0b00
.long 0x136116E3
xxpermdi 54,41,40,0b00
xxpermdi 44,52,53,0b00
.long 0x136CB6E3
xxpermdi 33,49,50,0b00
.long 0x134116A3
xxpermdi 44,51,52,0b00
.long 0x134CB6A3
.long 0x13553EA3
xxpermdi 33,48,49,0b00
.long 0x13211663
xxpermdi 44,50,51,0b00
.long 0x132CB663
xxpermdi 33,47,48,0b00
.long 0x13011623
xxpermdi 44,49,50,0b00
.long 0x130CB623
xxpermdi 33,46,47,0b00
.long 0x12E115E3
xxpermdi 44,48,49,0b00
.long 0x12ECB5E3
xxpermdi 34,39,38,0b00
xxpermdi 33,52,53,0b00
.long 0x13211663
xxpermdi 33,51,52,0b00
.long 0x13011623
.long 0x13152E23
xxpermdi 33,50,51,0b00
.long 0x12E115E3
xxpermdi 54,37,36,0b00
xxpermdi 44,52,53,0b00
.long 0x12ECB5E3
stxv 55,0(3)
stxv 56,16(3)
stxv 57,32(3)
stxv 58,48(3)
stxv 59,64(3)
stxv 60,80(3)
stxv 61,96(3)
stxv 62,112(3)
stxv 63,128(3)
ld 12,0(1)
lxv 52,-16*12(12)
lxv 53,-16*11(12)
lxv 54,-16*10(12)
lxv 55,-16*9(12)
lxv 56,-16*8(12)
lxv 57,-16*7(12)
lxv 58,-16*6(12)
lxv 59,-16*5(12)
lxv 60,-16*4(12)
lxv 61,-16*3(12)
lxv 62,-16*2(12)
lxv 63,-16*1(12)
mr 1,12
blr
.size p521_felem_mul,.-p521_felem_mul
.globl p521_felem_square
.type p521_felem_square,@function
.align 5
p521_felem_square:
.localentry p521_felem_square,0
mr 12,1
stdu 1,-16*13(1)
stxv 52,-16*12(12)
stxv 53,-16*11(12)
stxv 54,-16*10(12)
stxv 55,-16*9(12)
stxv 56,-16*8(12)
stxv 57,-16*7(12)
stxv 58,-16*6(12)
stxv 59,-16*5(12)
stxv 60,-16*4(12)
stxv 61,-16*3(12)
stxv 62,-16*2(12)
stxv 63,-16*1(12)
vspltisw 0,0
lxsd 13,0(4)
lxsd 14,8(4)
lxsd 15,16(4)
lxsd 16,24(4)
lxsd 17,32(4)
lxsd 18,40(4)
lxsd 19,48(4)
lxsd 20,56(4)
lxsd 21,64(4)
li 8,0
li 9,1
mtvsrdd 33,9,8
.long 0x106D0DC4
.long 0x108E0DC4
.long 0x10AF0DC4
.long 0x10D00DC4
.long 0x10F10DC4
.long 0x11120DC4
.long 0x11330DC4
.long 0x11540DC4
.long 0x11750DC4
.long 0x12ED6823
.long 0x130D2023
xxpermdi 33,45,46,0b00
xxpermdi 34,37,46,0b00
.long 0x13211023
xxpermdi 34,38,37,0b00
.long 0x13411023
xxpermdi 34,39,38,0b00
.long 0x13611023
.long 0x136F7EE3
xxpermdi 34,40,39,0b00
.long 0x13811023
.long 0x138F3723
xxpermdi 34,41,40,0b00
.long 0x13A11023
xxpermdi 44,47,48,0b00
xxpermdi 54,39,48,0b00
.long 0x13ACB763
xxpermdi 34,42,41,0b00
.long 0x13C11023
xxpermdi 54,40,39,0b00
.long 0x13CCB7A3
xxpermdi 34,43,42,0b00
.long 0x13E11023
xxpermdi 54,41,40,0b00
.long 0x13ECB7E3
.long 0x13F18FE3
.long 0x13124623
.long 0x13534EA3
.long 0x13945723
.long 0x13D55FA3
mtvsrdd 33,9,8
.long 0x11080DC4
.long 0x11290DC4
.long 0x114A0DC4
.long 0x116B0DC4
.long 0x13B45F63
.long 0x13935F23
xxpermdi 34,43,42,0b00
xxpermdi 33,50,51,0b00
.long 0x136116E3
xxpermdi 33,49,50,0b00
.long 0x134116A3
xxpermdi 33,48,49,0b00
.long 0x13211663
.long 0x13324E63
xxpermdi 33,47,48,0b00
.long 0x13011623
.long 0x13114E23
xxpermdi 33,46,47,0b00
.long 0x12E115E3
xxpermdi 34,41,40,0b00
xxpermdi 33,48,49,0b00
.long 0x12E115E3
stxv 55,0(3)
stxv 56,16(3)
stxv 57,32(3)
stxv 58,48(3)
stxv 59,64(3)
stxv 60,80(3)
stxv 61,96(3)
stxv 62,112(3)
stxv 63,128(3)
ld 12,0(1)
lxv 52,-16*12(12)
lxv 53,-16*11(12)
lxv 54,-16*10(12)
lxv 55,-16*9(12)
lxv 56,-16*8(12)
lxv 57,-16*7(12)
lxv 58,-16*6(12)
lxv 59,-16*5(12)
lxv 60,-16*4(12)
lxv 61,-16*3(12)
lxv 62,-16*2(12)
lxv 63,-16*1(12)
mr 1,12
blr
.size p521_felem_square,.-p521_felem_square

View file

@ -304,19 +304,19 @@ KeccakF1600:
dword_le_load:
.localentry dword_le_load,0
lbzu 0,1(3)
lbzu 4,1(3)
lbzu 5,1(3)
lbz 0,1(3)
lbz 4,2(3)
lbz 5,3(3)
insrdi 0,4,8,48
lbzu 4,1(3)
lbz 4,4(3)
insrdi 0,5,8,40
lbzu 5,1(3)
lbz 5,5(3)
insrdi 0,4,8,32
lbzu 4,1(3)
lbz 4,6(3)
insrdi 0,5,8,24
lbzu 5,1(3)
lbz 5,7(3)
insrdi 0,4,8,16
lbzu 4,1(3)
lbzu 4,8(3)
insrdi 0,5,8,8
insrdi 0,4,8,0
blr
@ -579,21 +579,21 @@ SHA3_squeeze:
cmpldi 30,8
blt .Lsqueeze_tail
stbu 0,1(29)
stb 0,1(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,2(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,3(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,4(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,5(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,6(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,7(29)
srdi 0,0,8
stbu 0,1(29)
stbu 0,8(29)
subic. 30,30,8
beq .Lsqueeze_done

File diff suppressed because it is too large Load diff

View file

@ -674,7 +674,7 @@ vpaes_cbc_encrypt:
vor 24,0,0
sub. 30, 30, 0
vperm 0, 0, 0, 29
vsel 1, 28, 0, 30
vsel 1,28,0,30
vor 28,0,0
stvx 1, 0, 4
addi 4, 4, 16
@ -726,7 +726,7 @@ vpaes_cbc_encrypt:
vor 24,25,25
sub. 30, 30, 0
vperm 0, 0, 0, 29
vsel 1, 28, 0, 30
vsel 1,28,0,30
vor 28,0,0
stvx 1, 0, 4
addi 4, 4, 16
@ -1044,7 +1044,7 @@ _vpaes_schedule_core:
vperm 0, 0, 0, 29
li 10, 4
vsel 2, 28, 0, 30
vsel 2,28,0,30
li 11, 8
stvx 2, 0, 5
li 12, 12
@ -1066,7 +1066,7 @@ _vpaes_schedule_core:
addi 9, 5, -15
vperm 0, 0, 0, 29
li 10, 4
vsel 2, 28, 0, 30
vsel 2,28,0,30
li 11, 8
stvx 2, 0, 5
li 12, 12
@ -1157,7 +1157,7 @@ _vpaes_schedule_low_round:
vsldoi 1, 9, 7, 12
vxor 7, 7, 1
vspltisb 1, 0x0f
vspltisb 1,0x0f
vsldoi 4, 9, 7, 8
@ -1253,7 +1253,7 @@ _vpaes_schedule_mangle:
vperm 1, 3, 3, 29
vsel 2, 28, 1, 30
vsel 2,28,1,30
vor 28,1,1
stvx 2, 0, 5
blr
@ -1304,7 +1304,7 @@ _vpaes_schedule_mangle:
vperm 1, 3, 3, 29
vsel 2, 28, 1, 30
vsel 2,28,1,30
vor 28,1,1
stvx 2, 0, 5
blr

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,354 @@
/* Do not modify. This file is auto-generated from ecp_nistp521-ppc64.pl. */
.machine "any"
.abiversion 2
.text
.globl p521_felem_mul
.type p521_felem_mul,@function
.align 5
p521_felem_mul:
.localentry p521_felem_mul,0
mr 12,1
stdu 1,-16*13(1)
stxv 52,-16*12(12)
stxv 53,-16*11(12)
stxv 54,-16*10(12)
stxv 55,-16*9(12)
stxv 56,-16*8(12)
stxv 57,-16*7(12)
stxv 58,-16*6(12)
stxv 59,-16*5(12)
stxv 60,-16*4(12)
stxv 61,-16*3(12)
stxv 62,-16*2(12)
stxv 63,-16*1(12)
vspltisw 0,0
lxsd 13,0(4)
lxsd 14,8(4)
lxsd 15,16(4)
lxsd 16,24(4)
lxsd 17,32(4)
lxsd 18,40(4)
lxsd 19,48(4)
lxsd 20,56(4)
lxsd 21,64(4)
lxsd 3,0(5)
lxsd 4,8(5)
lxsd 5,16(5)
lxsd 6,24(5)
lxsd 7,32(5)
lxsd 8,40(5)
lxsd 9,48(5)
lxsd 10,56(5)
lxsd 11,64(5)
.long 0x12ED1823
xxpermdi 33,45,46,0b00
xxpermdi 34,36,35,0b00
.long 0x13011023
xxpermdi 34,37,36,0b00
.long 0x13211023
.long 0x132F1E63
xxpermdi 34,38,37,0b00
.long 0x13411023
xxpermdi 44,47,48,0b00
xxpermdi 54,36,35,0b00
.long 0x134CB6A3
xxpermdi 34,39,38,0b00
.long 0x13611023
xxpermdi 54,37,36,0b00
.long 0x136CB6E3
.long 0x13711EE3
xxpermdi 34,40,39,0b00
.long 0x13811023
xxpermdi 54,38,37,0b00
.long 0x138CB723
xxpermdi 34,41,40,0b00
.long 0x13A11023
xxpermdi 54,39,38,0b00
.long 0x13ACB763
xxpermdi 34,42,41,0b00
.long 0x13C11023
xxpermdi 54,40,39,0b00
.long 0x13CCB7A3
xxpermdi 34,43,42,0b00
.long 0x13E11023
xxpermdi 54,41,40,0b00
.long 0x13ECB7E3
xxpermdi 33,49,50,0b00
xxpermdi 34,36,35,0b00
.long 0x13811723
xxpermdi 34,37,36,0b00
.long 0x13A11763
.long 0x13B31F63
xxpermdi 34,38,37,0b00
.long 0x13C117A3
xxpermdi 44,51,52,0b00
xxpermdi 54,36,35,0b00
.long 0x13CCB7A3
xxpermdi 34,39,38,0b00
.long 0x13E117E3
xxpermdi 54,37,36,0b00
.long 0x13ECB7E3
.long 0x13F51FE3
li 8,0
li 9,1
mtvsrdd 33,9,8
.long 0x10630DC4
.long 0x10840DC4
.long 0x10A50DC4
.long 0x10C60DC4
.long 0x10E70DC4
.long 0x11080DC4
.long 0x11290DC4
.long 0x114A0DC4
.long 0x116B0DC4
.long 0x13D55FA3
xxpermdi 34,43,42,0b00
xxpermdi 33,52,53,0b00
.long 0x13A11763
xxpermdi 33,51,52,0b00
.long 0x13811723
.long 0x13954F23
xxpermdi 33,50,51,0b00
.long 0x136116E3
xxpermdi 54,41,40,0b00
xxpermdi 44,52,53,0b00
.long 0x136CB6E3
xxpermdi 33,49,50,0b00
.long 0x134116A3
xxpermdi 44,51,52,0b00
.long 0x134CB6A3
.long 0x13553EA3
xxpermdi 33,48,49,0b00
.long 0x13211663
xxpermdi 44,50,51,0b00
.long 0x132CB663
xxpermdi 33,47,48,0b00
.long 0x13011623
xxpermdi 44,49,50,0b00
.long 0x130CB623
xxpermdi 33,46,47,0b00
.long 0x12E115E3
xxpermdi 44,48,49,0b00
.long 0x12ECB5E3
xxpermdi 34,39,38,0b00
xxpermdi 33,52,53,0b00
.long 0x13211663
xxpermdi 33,51,52,0b00
.long 0x13011623
.long 0x13152E23
xxpermdi 33,50,51,0b00
.long 0x12E115E3
xxpermdi 54,37,36,0b00
xxpermdi 44,52,53,0b00
.long 0x12ECB5E3
stxv 55,0(3)
stxv 56,16(3)
stxv 57,32(3)
stxv 58,48(3)
stxv 59,64(3)
stxv 60,80(3)
stxv 61,96(3)
stxv 62,112(3)
stxv 63,128(3)
ld 12,0(1)
lxv 52,-16*12(12)
lxv 53,-16*11(12)
lxv 54,-16*10(12)
lxv 55,-16*9(12)
lxv 56,-16*8(12)
lxv 57,-16*7(12)
lxv 58,-16*6(12)
lxv 59,-16*5(12)
lxv 60,-16*4(12)
lxv 61,-16*3(12)
lxv 62,-16*2(12)
lxv 63,-16*1(12)
mr 1,12
blr
.size p521_felem_mul,.-p521_felem_mul
.globl p521_felem_square
.type p521_felem_square,@function
.align 5
p521_felem_square:
.localentry p521_felem_square,0
mr 12,1
stdu 1,-16*13(1)
stxv 52,-16*12(12)
stxv 53,-16*11(12)
stxv 54,-16*10(12)
stxv 55,-16*9(12)
stxv 56,-16*8(12)
stxv 57,-16*7(12)
stxv 58,-16*6(12)
stxv 59,-16*5(12)
stxv 60,-16*4(12)
stxv 61,-16*3(12)
stxv 62,-16*2(12)
stxv 63,-16*1(12)
vspltisw 0,0
lxsd 13,0(4)
lxsd 14,8(4)
lxsd 15,16(4)
lxsd 16,24(4)
lxsd 17,32(4)
lxsd 18,40(4)
lxsd 19,48(4)
lxsd 20,56(4)
lxsd 21,64(4)
li 8,0
li 9,1
mtvsrdd 33,9,8
.long 0x106D0DC4
.long 0x108E0DC4
.long 0x10AF0DC4
.long 0x10D00DC4
.long 0x10F10DC4
.long 0x11120DC4
.long 0x11330DC4
.long 0x11540DC4
.long 0x11750DC4
.long 0x12ED6823
.long 0x130D2023
xxpermdi 33,45,46,0b00
xxpermdi 34,37,46,0b00
.long 0x13211023
xxpermdi 34,38,37,0b00
.long 0x13411023
xxpermdi 34,39,38,0b00
.long 0x13611023
.long 0x136F7EE3
xxpermdi 34,40,39,0b00
.long 0x13811023
.long 0x138F3723
xxpermdi 34,41,40,0b00
.long 0x13A11023
xxpermdi 44,47,48,0b00
xxpermdi 54,39,48,0b00
.long 0x13ACB763
xxpermdi 34,42,41,0b00
.long 0x13C11023
xxpermdi 54,40,39,0b00
.long 0x13CCB7A3
xxpermdi 34,43,42,0b00
.long 0x13E11023
xxpermdi 54,41,40,0b00
.long 0x13ECB7E3
.long 0x13F18FE3
.long 0x13124623
.long 0x13534EA3
.long 0x13945723
.long 0x13D55FA3
mtvsrdd 33,9,8
.long 0x11080DC4
.long 0x11290DC4
.long 0x114A0DC4
.long 0x116B0DC4
.long 0x13B45F63
.long 0x13935F23
xxpermdi 34,43,42,0b00
xxpermdi 33,50,51,0b00
.long 0x136116E3
xxpermdi 33,49,50,0b00
.long 0x134116A3
xxpermdi 33,48,49,0b00
.long 0x13211663
.long 0x13324E63
xxpermdi 33,47,48,0b00
.long 0x13011623
.long 0x13114E23
xxpermdi 33,46,47,0b00
.long 0x12E115E3
xxpermdi 34,41,40,0b00
xxpermdi 33,48,49,0b00
.long 0x12E115E3
stxv 55,0(3)
stxv 56,16(3)
stxv 57,32(3)
stxv 58,48(3)
stxv 59,64(3)
stxv 60,80(3)
stxv 61,96(3)
stxv 62,112(3)
stxv 63,128(3)
ld 12,0(1)
lxv 52,-16*12(12)
lxv 53,-16*11(12)
lxv 54,-16*10(12)
lxv 55,-16*9(12)
lxv 56,-16*8(12)
lxv 57,-16*7(12)
lxv 58,-16*6(12)
lxv 59,-16*5(12)
lxv 60,-16*4(12)
lxv 61,-16*3(12)
lxv 62,-16*2(12)
lxv 63,-16*1(12)
mr 1,12
blr
.size p521_felem_square,.-p521_felem_square

View file

@ -304,19 +304,19 @@ KeccakF1600:
dword_le_load:
.localentry dword_le_load,0
lbzu 0,1(3)
lbzu 4,1(3)
lbzu 5,1(3)
lbz 0,1(3)
lbz 4,2(3)
lbz 5,3(3)
insrdi 0,4,8,48
lbzu 4,1(3)
lbz 4,4(3)
insrdi 0,5,8,40
lbzu 5,1(3)
lbz 5,5(3)
insrdi 0,4,8,32
lbzu 4,1(3)
lbz 4,6(3)
insrdi 0,5,8,24
lbzu 5,1(3)
lbz 5,7(3)
insrdi 0,4,8,16
lbzu 4,1(3)
lbzu 4,8(3)
insrdi 0,5,8,8
insrdi 0,4,8,0
blr
@ -579,21 +579,21 @@ SHA3_squeeze:
cmpldi 30,8
blt .Lsqueeze_tail
stbu 0,1(29)
stb 0,1(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,2(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,3(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,4(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,5(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,6(29)
srdi 0,0,8
stbu 0,1(29)
stb 0,7(29)
srdi 0,0,8
stbu 0,1(29)
stbu 0,8(29)
subic. 30,30,8
beq .Lsqueeze_done

File diff suppressed because it is too large Load diff

View file

@ -674,7 +674,7 @@ vpaes_cbc_encrypt:
vor 24,0,0
sub. 30, 30, 0
vperm 0, 0, 0, 29
vsel 1, 28, 0, 30
vsel 1,28,0,30
vor 28,0,0
stvx 1, 0, 4
addi 4, 4, 16
@ -726,7 +726,7 @@ vpaes_cbc_encrypt:
vor 24,25,25
sub. 30, 30, 0
vperm 0, 0, 0, 29
vsel 1, 28, 0, 30
vsel 1,28,0,30
vor 28,0,0
stvx 1, 0, 4
addi 4, 4, 16
@ -1044,7 +1044,7 @@ _vpaes_schedule_core:
vperm 0, 0, 0, 29
li 10, 4
vsel 2, 28, 0, 30
vsel 2,28,0,30
li 11, 8
stvx 2, 0, 5
li 12, 12
@ -1066,7 +1066,7 @@ _vpaes_schedule_core:
addi 9, 5, -15
vperm 0, 0, 0, 29
li 10, 4
vsel 2, 28, 0, 30
vsel 2,28,0,30
li 11, 8
stvx 2, 0, 5
li 12, 12
@ -1157,7 +1157,7 @@ _vpaes_schedule_low_round:
vsldoi 1, 7, 9, 16-12
vxor 7, 7, 1
vspltisb 1, 0x0f
vspltisb 1,0x0f
vsldoi 4, 7, 9, 16-8
@ -1253,7 +1253,7 @@ _vpaes_schedule_mangle:
vperm 1, 3, 3, 29
vsel 2, 28, 1, 30
vsel 2,28,1,30
vor 28,1,1
stvx 2, 0, 5
blr
@ -1304,7 +1304,7 @@ _vpaes_schedule_mangle:
vperm 1, 3, 3, 29
vsel 2, 28, 1, 30
vsel 2,28,1,30
vor 28,1,1
stvx 2, 0, 5
blr