621 lines
11 KiB
ArmAsm
621 lines
11 KiB
ArmAsm
|
#include "x86_arch.h"
|
||
|
.text
|
||
|
|
||
|
.hidden OPENSSL_ia32cap_P
|
||
|
|
||
|
.globl RC4
|
||
|
.type RC4,@function
|
||
|
.align 16
|
||
|
RC4: orq %rsi,%rsi
|
||
|
jne .Lentry
|
||
|
retq
|
||
|
.Lentry:
|
||
|
pushq %rbx
|
||
|
pushq %r12
|
||
|
pushq %r13
|
||
|
.Lprologue:
|
||
|
movq %rsi,%r11
|
||
|
movq %rdx,%r12
|
||
|
movq %rcx,%r13
|
||
|
xorq %r10,%r10
|
||
|
xorq %rcx,%rcx
|
||
|
|
||
|
leaq 8(%rdi),%rdi
|
||
|
movb -8(%rdi),%r10b
|
||
|
movb -4(%rdi),%cl
|
||
|
cmpl $-1,256(%rdi)
|
||
|
je .LRC4_CHAR
|
||
|
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||
|
xorq %rbx,%rbx
|
||
|
incb %r10b
|
||
|
subq %r10,%rbx
|
||
|
subq %r12,%r13
|
||
|
movl (%rdi,%r10,4),%eax
|
||
|
testq $-16,%r11
|
||
|
jz .Lloop1
|
||
|
btl $IA32CAP_BIT0_INTEL,%r8d
|
||
|
jc .Lintel
|
||
|
andq $7,%rbx
|
||
|
leaq 1(%r10),%rsi
|
||
|
jz .Loop8
|
||
|
subq %rbx,%r11
|
||
|
.Loop8_warmup:
|
||
|
addb %al,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
movl %edx,(%rdi,%r10,4)
|
||
|
addb %dl,%al
|
||
|
incb %r10b
|
||
|
movl (%rdi,%rax,4),%edx
|
||
|
movl (%rdi,%r10,4),%eax
|
||
|
xorb (%r12),%dl
|
||
|
movb %dl,(%r13,%r12,1)
|
||
|
leaq 1(%r12),%r12
|
||
|
decq %rbx
|
||
|
jnz .Loop8_warmup
|
||
|
|
||
|
leaq 1(%r10),%rsi
|
||
|
jmp .Loop8
|
||
|
.align 16
|
||
|
.Loop8:
|
||
|
addb %al,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
movl 0(%rdi,%rsi,4),%ebx
|
||
|
rorq $8,%r8
|
||
|
movl %edx,0(%rdi,%r10,4)
|
||
|
addb %al,%dl
|
||
|
movb (%rdi,%rdx,4),%r8b
|
||
|
addb %bl,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
movl 4(%rdi,%rsi,4),%eax
|
||
|
rorq $8,%r8
|
||
|
movl %edx,4(%rdi,%r10,4)
|
||
|
addb %bl,%dl
|
||
|
movb (%rdi,%rdx,4),%r8b
|
||
|
addb %al,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
movl 8(%rdi,%rsi,4),%ebx
|
||
|
rorq $8,%r8
|
||
|
movl %edx,8(%rdi,%r10,4)
|
||
|
addb %al,%dl
|
||
|
movb (%rdi,%rdx,4),%r8b
|
||
|
addb %bl,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
movl 12(%rdi,%rsi,4),%eax
|
||
|
rorq $8,%r8
|
||
|
movl %edx,12(%rdi,%r10,4)
|
||
|
addb %bl,%dl
|
||
|
movb (%rdi,%rdx,4),%r8b
|
||
|
addb %al,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
movl 16(%rdi,%rsi,4),%ebx
|
||
|
rorq $8,%r8
|
||
|
movl %edx,16(%rdi,%r10,4)
|
||
|
addb %al,%dl
|
||
|
movb (%rdi,%rdx,4),%r8b
|
||
|
addb %bl,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
movl 20(%rdi,%rsi,4),%eax
|
||
|
rorq $8,%r8
|
||
|
movl %edx,20(%rdi,%r10,4)
|
||
|
addb %bl,%dl
|
||
|
movb (%rdi,%rdx,4),%r8b
|
||
|
addb %al,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
movl 24(%rdi,%rsi,4),%ebx
|
||
|
rorq $8,%r8
|
||
|
movl %edx,24(%rdi,%r10,4)
|
||
|
addb %al,%dl
|
||
|
movb (%rdi,%rdx,4),%r8b
|
||
|
addb $8,%sil
|
||
|
addb %bl,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
movl -4(%rdi,%rsi,4),%eax
|
||
|
rorq $8,%r8
|
||
|
movl %edx,28(%rdi,%r10,4)
|
||
|
addb %bl,%dl
|
||
|
movb (%rdi,%rdx,4),%r8b
|
||
|
addb $8,%r10b
|
||
|
rorq $8,%r8
|
||
|
subq $8,%r11
|
||
|
|
||
|
xorq (%r12),%r8
|
||
|
movq %r8,(%r13,%r12,1)
|
||
|
leaq 8(%r12),%r12
|
||
|
|
||
|
testq $-8,%r11
|
||
|
jnz .Loop8
|
||
|
cmpq $0,%r11
|
||
|
jne .Lloop1
|
||
|
jmp .Lexit
|
||
|
|
||
|
.align 16
|
||
|
.Lintel:
|
||
|
testq $-32,%r11
|
||
|
jz .Lloop1
|
||
|
andq $15,%rbx
|
||
|
jz .Loop16_is_hot
|
||
|
subq %rbx,%r11
|
||
|
.Loop16_warmup:
|
||
|
addb %al,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
movl %edx,(%rdi,%r10,4)
|
||
|
addb %dl,%al
|
||
|
incb %r10b
|
||
|
movl (%rdi,%rax,4),%edx
|
||
|
movl (%rdi,%r10,4),%eax
|
||
|
xorb (%r12),%dl
|
||
|
movb %dl,(%r13,%r12,1)
|
||
|
leaq 1(%r12),%r12
|
||
|
decq %rbx
|
||
|
jnz .Loop16_warmup
|
||
|
|
||
|
movq %rcx,%rbx
|
||
|
xorq %rcx,%rcx
|
||
|
movb %bl,%cl
|
||
|
|
||
|
.Loop16_is_hot:
|
||
|
leaq (%rdi,%r10,4),%rsi
|
||
|
addb %al,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
pxor %xmm0,%xmm0
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
addb %dl,%al
|
||
|
movl 4(%rsi),%ebx
|
||
|
movzbl %al,%eax
|
||
|
movl %edx,0(%rsi)
|
||
|
addb %bl,%cl
|
||
|
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||
|
jmp .Loop16_enter
|
||
|
.align 16
|
||
|
.Loop16:
|
||
|
addb %al,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
pxor %xmm0,%xmm2
|
||
|
psllq $8,%xmm1
|
||
|
pxor %xmm0,%xmm0
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
addb %dl,%al
|
||
|
movl 4(%rsi),%ebx
|
||
|
movzbl %al,%eax
|
||
|
movl %edx,0(%rsi)
|
||
|
pxor %xmm1,%xmm2
|
||
|
addb %bl,%cl
|
||
|
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||
|
movdqu %xmm2,(%r13,%r12,1)
|
||
|
leaq 16(%r12),%r12
|
||
|
.Loop16_enter:
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
pxor %xmm1,%xmm1
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
addb %dl,%bl
|
||
|
movl 8(%rsi),%eax
|
||
|
movzbl %bl,%ebx
|
||
|
movl %edx,4(%rsi)
|
||
|
addb %al,%cl
|
||
|
pinsrw $0,(%rdi,%rbx,4),%xmm1
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
addb %dl,%al
|
||
|
movl 12(%rsi),%ebx
|
||
|
movzbl %al,%eax
|
||
|
movl %edx,8(%rsi)
|
||
|
addb %bl,%cl
|
||
|
pinsrw $1,(%rdi,%rax,4),%xmm0
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
addb %dl,%bl
|
||
|
movl 16(%rsi),%eax
|
||
|
movzbl %bl,%ebx
|
||
|
movl %edx,12(%rsi)
|
||
|
addb %al,%cl
|
||
|
pinsrw $1,(%rdi,%rbx,4),%xmm1
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
addb %dl,%al
|
||
|
movl 20(%rsi),%ebx
|
||
|
movzbl %al,%eax
|
||
|
movl %edx,16(%rsi)
|
||
|
addb %bl,%cl
|
||
|
pinsrw $2,(%rdi,%rax,4),%xmm0
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
addb %dl,%bl
|
||
|
movl 24(%rsi),%eax
|
||
|
movzbl %bl,%ebx
|
||
|
movl %edx,20(%rsi)
|
||
|
addb %al,%cl
|
||
|
pinsrw $2,(%rdi,%rbx,4),%xmm1
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
addb %dl,%al
|
||
|
movl 28(%rsi),%ebx
|
||
|
movzbl %al,%eax
|
||
|
movl %edx,24(%rsi)
|
||
|
addb %bl,%cl
|
||
|
pinsrw $3,(%rdi,%rax,4),%xmm0
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
addb %dl,%bl
|
||
|
movl 32(%rsi),%eax
|
||
|
movzbl %bl,%ebx
|
||
|
movl %edx,28(%rsi)
|
||
|
addb %al,%cl
|
||
|
pinsrw $3,(%rdi,%rbx,4),%xmm1
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
addb %dl,%al
|
||
|
movl 36(%rsi),%ebx
|
||
|
movzbl %al,%eax
|
||
|
movl %edx,32(%rsi)
|
||
|
addb %bl,%cl
|
||
|
pinsrw $4,(%rdi,%rax,4),%xmm0
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
addb %dl,%bl
|
||
|
movl 40(%rsi),%eax
|
||
|
movzbl %bl,%ebx
|
||
|
movl %edx,36(%rsi)
|
||
|
addb %al,%cl
|
||
|
pinsrw $4,(%rdi,%rbx,4),%xmm1
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
addb %dl,%al
|
||
|
movl 44(%rsi),%ebx
|
||
|
movzbl %al,%eax
|
||
|
movl %edx,40(%rsi)
|
||
|
addb %bl,%cl
|
||
|
pinsrw $5,(%rdi,%rax,4),%xmm0
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
addb %dl,%bl
|
||
|
movl 48(%rsi),%eax
|
||
|
movzbl %bl,%ebx
|
||
|
movl %edx,44(%rsi)
|
||
|
addb %al,%cl
|
||
|
pinsrw $5,(%rdi,%rbx,4),%xmm1
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
addb %dl,%al
|
||
|
movl 52(%rsi),%ebx
|
||
|
movzbl %al,%eax
|
||
|
movl %edx,48(%rsi)
|
||
|
addb %bl,%cl
|
||
|
pinsrw $6,(%rdi,%rax,4),%xmm0
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
addb %dl,%bl
|
||
|
movl 56(%rsi),%eax
|
||
|
movzbl %bl,%ebx
|
||
|
movl %edx,52(%rsi)
|
||
|
addb %al,%cl
|
||
|
pinsrw $6,(%rdi,%rbx,4),%xmm1
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
addb %dl,%al
|
||
|
movl 60(%rsi),%ebx
|
||
|
movzbl %al,%eax
|
||
|
movl %edx,56(%rsi)
|
||
|
addb %bl,%cl
|
||
|
pinsrw $7,(%rdi,%rax,4),%xmm0
|
||
|
addb $16,%r10b
|
||
|
movdqu (%r12),%xmm2
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %ebx,(%rdi,%rcx,4)
|
||
|
addb %dl,%bl
|
||
|
movzbl %bl,%ebx
|
||
|
movl %edx,60(%rsi)
|
||
|
leaq (%rdi,%r10,4),%rsi
|
||
|
pinsrw $7,(%rdi,%rbx,4),%xmm1
|
||
|
movl (%rsi),%eax
|
||
|
movq %rcx,%rbx
|
||
|
xorq %rcx,%rcx
|
||
|
subq $16,%r11
|
||
|
movb %bl,%cl
|
||
|
testq $-16,%r11
|
||
|
jnz .Loop16
|
||
|
|
||
|
psllq $8,%xmm1
|
||
|
pxor %xmm0,%xmm2
|
||
|
pxor %xmm1,%xmm2
|
||
|
movdqu %xmm2,(%r13,%r12,1)
|
||
|
leaq 16(%r12),%r12
|
||
|
|
||
|
cmpq $0,%r11
|
||
|
jne .Lloop1
|
||
|
jmp .Lexit
|
||
|
|
||
|
.align 16
|
||
|
.Lloop1:
|
||
|
addb %al,%cl
|
||
|
movl (%rdi,%rcx,4),%edx
|
||
|
movl %eax,(%rdi,%rcx,4)
|
||
|
movl %edx,(%rdi,%r10,4)
|
||
|
addb %dl,%al
|
||
|
incb %r10b
|
||
|
movl (%rdi,%rax,4),%edx
|
||
|
movl (%rdi,%r10,4),%eax
|
||
|
xorb (%r12),%dl
|
||
|
movb %dl,(%r13,%r12,1)
|
||
|
leaq 1(%r12),%r12
|
||
|
decq %r11
|
||
|
jnz .Lloop1
|
||
|
jmp .Lexit
|
||
|
|
||
|
.align 16
|
||
|
.LRC4_CHAR:
|
||
|
addb $1,%r10b
|
||
|
movzbl (%rdi,%r10,1),%eax
|
||
|
testq $-8,%r11
|
||
|
jz .Lcloop1
|
||
|
jmp .Lcloop8
|
||
|
.align 16
|
||
|
.Lcloop8:
|
||
|
movl (%r12),%r8d
|
||
|
movl 4(%r12),%r9d
|
||
|
addb %al,%cl
|
||
|
leaq 1(%r10),%rsi
|
||
|
movzbl (%rdi,%rcx,1),%edx
|
||
|
movzbl %sil,%esi
|
||
|
movzbl (%rdi,%rsi,1),%ebx
|
||
|
movb %al,(%rdi,%rcx,1)
|
||
|
cmpq %rsi,%rcx
|
||
|
movb %dl,(%rdi,%r10,1)
|
||
|
jne .Lcmov0
|
||
|
movq %rax,%rbx
|
||
|
.Lcmov0:
|
||
|
addb %al,%dl
|
||
|
xorb (%rdi,%rdx,1),%r8b
|
||
|
rorl $8,%r8d
|
||
|
addb %bl,%cl
|
||
|
leaq 1(%rsi),%r10
|
||
|
movzbl (%rdi,%rcx,1),%edx
|
||
|
movzbl %r10b,%r10d
|
||
|
movzbl (%rdi,%r10,1),%eax
|
||
|
movb %bl,(%rdi,%rcx,1)
|
||
|
cmpq %r10,%rcx
|
||
|
movb %dl,(%rdi,%rsi,1)
|
||
|
jne .Lcmov1
|
||
|
movq %rbx,%rax
|
||
|
.Lcmov1:
|
||
|
addb %bl,%dl
|
||
|
xorb (%rdi,%rdx,1),%r8b
|
||
|
rorl $8,%r8d
|
||
|
addb %al,%cl
|
||
|
leaq 1(%r10),%rsi
|
||
|
movzbl (%rdi,%rcx,1),%edx
|
||
|
movzbl %sil,%esi
|
||
|
movzbl (%rdi,%rsi,1),%ebx
|
||
|
movb %al,(%rdi,%rcx,1)
|
||
|
cmpq %rsi,%rcx
|
||
|
movb %dl,(%rdi,%r10,1)
|
||
|
jne .Lcmov2
|
||
|
movq %rax,%rbx
|
||
|
.Lcmov2:
|
||
|
addb %al,%dl
|
||
|
xorb (%rdi,%rdx,1),%r8b
|
||
|
rorl $8,%r8d
|
||
|
addb %bl,%cl
|
||
|
leaq 1(%rsi),%r10
|
||
|
movzbl (%rdi,%rcx,1),%edx
|
||
|
movzbl %r10b,%r10d
|
||
|
movzbl (%rdi,%r10,1),%eax
|
||
|
movb %bl,(%rdi,%rcx,1)
|
||
|
cmpq %r10,%rcx
|
||
|
movb %dl,(%rdi,%rsi,1)
|
||
|
jne .Lcmov3
|
||
|
movq %rbx,%rax
|
||
|
.Lcmov3:
|
||
|
addb %bl,%dl
|
||
|
xorb (%rdi,%rdx,1),%r8b
|
||
|
rorl $8,%r8d
|
||
|
addb %al,%cl
|
||
|
leaq 1(%r10),%rsi
|
||
|
movzbl (%rdi,%rcx,1),%edx
|
||
|
movzbl %sil,%esi
|
||
|
movzbl (%rdi,%rsi,1),%ebx
|
||
|
movb %al,(%rdi,%rcx,1)
|
||
|
cmpq %rsi,%rcx
|
||
|
movb %dl,(%rdi,%r10,1)
|
||
|
jne .Lcmov4
|
||
|
movq %rax,%rbx
|
||
|
.Lcmov4:
|
||
|
addb %al,%dl
|
||
|
xorb (%rdi,%rdx,1),%r9b
|
||
|
rorl $8,%r9d
|
||
|
addb %bl,%cl
|
||
|
leaq 1(%rsi),%r10
|
||
|
movzbl (%rdi,%rcx,1),%edx
|
||
|
movzbl %r10b,%r10d
|
||
|
movzbl (%rdi,%r10,1),%eax
|
||
|
movb %bl,(%rdi,%rcx,1)
|
||
|
cmpq %r10,%rcx
|
||
|
movb %dl,(%rdi,%rsi,1)
|
||
|
jne .Lcmov5
|
||
|
movq %rbx,%rax
|
||
|
.Lcmov5:
|
||
|
addb %bl,%dl
|
||
|
xorb (%rdi,%rdx,1),%r9b
|
||
|
rorl $8,%r9d
|
||
|
addb %al,%cl
|
||
|
leaq 1(%r10),%rsi
|
||
|
movzbl (%rdi,%rcx,1),%edx
|
||
|
movzbl %sil,%esi
|
||
|
movzbl (%rdi,%rsi,1),%ebx
|
||
|
movb %al,(%rdi,%rcx,1)
|
||
|
cmpq %rsi,%rcx
|
||
|
movb %dl,(%rdi,%r10,1)
|
||
|
jne .Lcmov6
|
||
|
movq %rax,%rbx
|
||
|
.Lcmov6:
|
||
|
addb %al,%dl
|
||
|
xorb (%rdi,%rdx,1),%r9b
|
||
|
rorl $8,%r9d
|
||
|
addb %bl,%cl
|
||
|
leaq 1(%rsi),%r10
|
||
|
movzbl (%rdi,%rcx,1),%edx
|
||
|
movzbl %r10b,%r10d
|
||
|
movzbl (%rdi,%r10,1),%eax
|
||
|
movb %bl,(%rdi,%rcx,1)
|
||
|
cmpq %r10,%rcx
|
||
|
movb %dl,(%rdi,%rsi,1)
|
||
|
jne .Lcmov7
|
||
|
movq %rbx,%rax
|
||
|
.Lcmov7:
|
||
|
addb %bl,%dl
|
||
|
xorb (%rdi,%rdx,1),%r9b
|
||
|
rorl $8,%r9d
|
||
|
leaq -8(%r11),%r11
|
||
|
movl %r8d,(%r13)
|
||
|
leaq 8(%r12),%r12
|
||
|
movl %r9d,4(%r13)
|
||
|
leaq 8(%r13),%r13
|
||
|
|
||
|
testq $-8,%r11
|
||
|
jnz .Lcloop8
|
||
|
cmpq $0,%r11
|
||
|
jne .Lcloop1
|
||
|
jmp .Lexit
|
||
|
.align 16
|
||
|
.Lcloop1:
|
||
|
addb %al,%cl
|
||
|
movzbl %cl,%ecx
|
||
|
movzbl (%rdi,%rcx,1),%edx
|
||
|
movb %al,(%rdi,%rcx,1)
|
||
|
movb %dl,(%rdi,%r10,1)
|
||
|
addb %al,%dl
|
||
|
addb $1,%r10b
|
||
|
movzbl %dl,%edx
|
||
|
movzbl %r10b,%r10d
|
||
|
movzbl (%rdi,%rdx,1),%edx
|
||
|
movzbl (%rdi,%r10,1),%eax
|
||
|
xorb (%r12),%dl
|
||
|
leaq 1(%r12),%r12
|
||
|
movb %dl,(%r13)
|
||
|
leaq 1(%r13),%r13
|
||
|
subq $1,%r11
|
||
|
jnz .Lcloop1
|
||
|
jmp .Lexit
|
||
|
|
||
|
.align 16
|
||
|
.Lexit:
|
||
|
subb $1,%r10b
|
||
|
movl %r10d,-8(%rdi)
|
||
|
movl %ecx,-4(%rdi)
|
||
|
|
||
|
movq (%rsp),%r13
|
||
|
movq 8(%rsp),%r12
|
||
|
movq 16(%rsp),%rbx
|
||
|
addq $24,%rsp
|
||
|
.Lepilogue:
|
||
|
retq
|
||
|
.size RC4,.-RC4
|
||
|
.globl RC4_set_key
|
||
|
.type RC4_set_key,@function
|
||
|
.align 16
|
||
|
RC4_set_key:
|
||
|
leaq 8(%rdi),%rdi
|
||
|
leaq (%rdx,%rsi,1),%rdx
|
||
|
negq %rsi
|
||
|
movq %rsi,%rcx
|
||
|
xorl %eax,%eax
|
||
|
xorq %r9,%r9
|
||
|
xorq %r10,%r10
|
||
|
xorq %r11,%r11
|
||
|
|
||
|
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||
|
btl $IA32CAP_BIT0_INTELP4,%r8d
|
||
|
jc .Lc1stloop
|
||
|
jmp .Lw1stloop
|
||
|
|
||
|
.align 16
|
||
|
.Lw1stloop:
|
||
|
movl %eax,(%rdi,%rax,4)
|
||
|
addb $1,%al
|
||
|
jnc .Lw1stloop
|
||
|
|
||
|
xorq %r9,%r9
|
||
|
xorq %r8,%r8
|
||
|
.align 16
|
||
|
.Lw2ndloop:
|
||
|
movl (%rdi,%r9,4),%r10d
|
||
|
addb (%rdx,%rsi,1),%r8b
|
||
|
addb %r10b,%r8b
|
||
|
addq $1,%rsi
|
||
|
movl (%rdi,%r8,4),%r11d
|
||
|
cmovzq %rcx,%rsi
|
||
|
movl %r10d,(%rdi,%r8,4)
|
||
|
movl %r11d,(%rdi,%r9,4)
|
||
|
addb $1,%r9b
|
||
|
jnc .Lw2ndloop
|
||
|
jmp .Lexit_key
|
||
|
|
||
|
.align 16
|
||
|
.Lc1stloop:
|
||
|
movb %al,(%rdi,%rax,1)
|
||
|
addb $1,%al
|
||
|
jnc .Lc1stloop
|
||
|
|
||
|
xorq %r9,%r9
|
||
|
xorq %r8,%r8
|
||
|
.align 16
|
||
|
.Lc2ndloop:
|
||
|
movb (%rdi,%r9,1),%r10b
|
||
|
addb (%rdx,%rsi,1),%r8b
|
||
|
addb %r10b,%r8b
|
||
|
addq $1,%rsi
|
||
|
movb (%rdi,%r8,1),%r11b
|
||
|
jnz .Lcnowrap
|
||
|
movq %rcx,%rsi
|
||
|
.Lcnowrap:
|
||
|
movb %r10b,(%rdi,%r8,1)
|
||
|
movb %r11b,(%rdi,%r9,1)
|
||
|
addb $1,%r9b
|
||
|
jnc .Lc2ndloop
|
||
|
movl $-1,256(%rdi)
|
||
|
|
||
|
.align 16
|
||
|
.Lexit_key:
|
||
|
xorl %eax,%eax
|
||
|
movl %eax,-8(%rdi)
|
||
|
movl %eax,-4(%rdi)
|
||
|
retq
|
||
|
.size RC4_set_key,.-RC4_set_key
|
||
|
|
||
|
.globl RC4_options
|
||
|
.type RC4_options,@function
|
||
|
.align 16
|
||
|
RC4_options:
|
||
|
leaq .Lopts(%rip),%rax
|
||
|
movl OPENSSL_ia32cap_P(%rip),%edx
|
||
|
btl $IA32CAP_BIT0_INTELP4,%edx
|
||
|
jc .L8xchar
|
||
|
btl $IA32CAP_BIT0_INTEL,%edx
|
||
|
jnc .Ldone
|
||
|
addq $25,%rax
|
||
|
retq
|
||
|
.L8xchar:
|
||
|
addq $12,%rax
|
||
|
.Ldone:
|
||
|
retq
|
||
|
.align 64
|
||
|
.Lopts:
|
||
|
.byte 114,99,52,40,56,120,44,105,110,116,41,0
|
||
|
.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
|
||
|
.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
|
||
|
.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||
|
.align 64
|
||
|
.size RC4_options,.-RC4_options
|
||
|
#if defined(HAVE_GNU_STACK)
|
||
|
.section .note.GNU-stack,"",%progbits
|
||
|
#endif
|