ftu/blst/elf/ct_inverse_mod_256-x86_64.s
2022-09-09 02:47:49 -04:00

1186 lines
18 KiB
ArmAsm

.text
.globl ct_inverse_mod_256
.type ct_inverse_mod_256,@function
.align 32
ct_inverse_mod_256:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $1072,%rsp
.cfi_adjust_cfa_offset 1072
leaq 48+511(%rsp),%rax
andq $-512,%rax
movq %rdi,32(%rsp)
movq %rcx,40(%rsp)
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 0(%rdx),%r12
movq 8(%rdx),%r13
movq 16(%rdx),%r14
movq 24(%rdx),%r15
movq %r8,0(%rax)
movq %r9,8(%rax)
movq %r10,16(%rax)
movq %r11,24(%rax)
movq %r12,32(%rax)
movq %r13,40(%rax)
movq %r14,48(%rax)
movq %r15,56(%rax)
movq %rax,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,64(%rdi)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,72(%rdi)
xorq $256,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq 64(%rsi),%r8
movq 104(%rsi),%r12
movq %r8,%r9
imulq 0(%rsp),%r8
movq %r12,%r13
imulq 8(%rsp),%r12
addq %r12,%r8
movq %r8,32(%rdi)
sarq $63,%r8
movq %r8,40(%rdi)
movq %r8,48(%rdi)
movq %r8,56(%rdi)
movq %r8,64(%rdi)
leaq 64(%rsi),%rsi
imulq %rdx,%r9
imulq %rcx,%r13
addq %r13,%r9
movq %r9,72(%rdi)
sarq $63,%r9
movq %r9,80(%rdi)
movq %r9,88(%rdi)
movq %r9,96(%rdi)
movq %r9,104(%rdi)
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_256x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_256x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_256x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_256x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_256x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_256x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_256x63
sarq $63,%rbp
movq %rbp,40(%rdi)
movq %rbp,48(%rdi)
movq %rbp,56(%rdi)
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_512x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_512x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_512x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_512x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_512x63
xorq $256+64,%rsi
movl $31,%edx
call __ab_approximation_31_256
movq %r12,16(%rsp)
movq %r13,24(%rsp)
movq $256,%rdi
xorq %rsi,%rdi
call __smulq_256_n_shift_by_31
movq %rdx,0(%rsp)
movq %rcx,8(%rsp)
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 32(%rdi),%rdi
call __smulq_256_n_shift_by_31
movq %rdx,16(%rsp)
movq %rcx,24(%rsp)
movq 0(%rsp),%rdx
movq 8(%rsp),%rcx
leaq 64(%rsi),%rsi
leaq 32(%rdi),%rdi
call __smulq_256x63
movq 16(%rsp),%rdx
movq 24(%rsp),%rcx
leaq 40(%rdi),%rdi
call __smulq_512x63
xorq $256+64,%rsi
movl $47,%edx
movq 0(%rsi),%r8
movq 32(%rsi),%r10
call __inner_loop_62_256
leaq 64(%rsi),%rsi
movq %r12,%rdx
movq %r13,%rcx
movq 32(%rsp),%rdi
call __smulq_512x63
adcq %rbp,%rdx
movq 40(%rsp),%rsi
movq %rdx,%rax
sarq $63,%rdx
movq %rdx,%r8
movq %rdx,%r9
andq 0(%rsi),%r8
movq %rdx,%r10
andq 8(%rsi),%r9
andq 16(%rsi),%r10
andq 24(%rsi),%rdx
addq %r8,%r12
adcq %r9,%r13
adcq %r10,%r14
adcq %rdx,%r15
adcq $0,%rax
movq %rax,%rdx
negq %rax
orq %rax,%rdx
sarq $63,%rax
movq %rdx,%r8
movq %rdx,%r9
andq 0(%rsi),%r8
movq %rdx,%r10
andq 8(%rsi),%r9
andq 16(%rsi),%r10
andq 24(%rsi),%rdx
xorq %rax,%r8
xorq %rcx,%rcx
xorq %rax,%r9
subq %rax,%rcx
xorq %rax,%r10
xorq %rax,%rdx
addq %rcx,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%rdx
addq %r8,%r12
adcq %r9,%r13
adcq %r10,%r14
adcq %rdx,%r15
movq %r12,32(%rdi)
movq %r13,40(%rdi)
movq %r14,48(%rdi)
movq %r15,56(%rdi)
leaq 1072(%rsp),%r8
movq 0(%r8),%r15
.cfi_restore %r15
movq 8(%r8),%r14
.cfi_restore %r14
movq 16(%r8),%r13
.cfi_restore %r13
movq 24(%r8),%r12
.cfi_restore %r12
movq 32(%r8),%rbx
.cfi_restore %rbx
movq 40(%r8),%rbp
.cfi_restore %rbp
leaq 48(%r8),%rsp
.cfi_adjust_cfa_offset -1072-8*6
.byte 0xf3,0xc3
.cfi_endproc
.size ct_inverse_mod_256,.-ct_inverse_mod_256
.type __smulq_512x63,@function
.align 32
__smulq_512x63:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%rbp
movq %rdx,%rbx
sarq $63,%rdx
xorq %rax,%rax
subq %rdx,%rax
xorq %rdx,%rbx
addq %rax,%rbx
xorq %rdx,%r8
xorq %rdx,%r9
xorq %rdx,%r10
xorq %rdx,%r11
xorq %rdx,%rbp
addq %r8,%rax
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
adcq $0,%rbp
mulq %rbx
movq %rax,0(%rdi)
movq %r9,%rax
movq %rdx,%r9
mulq %rbx
addq %rax,%r9
movq %r10,%rax
adcq $0,%rdx
movq %r9,8(%rdi)
movq %rdx,%r10
mulq %rbx
addq %rax,%r10
movq %r11,%rax
adcq $0,%rdx
movq %r10,16(%rdi)
movq %rdx,%r11
andq %rbx,%rbp
negq %rbp
mulq %rbx
addq %rax,%r11
adcq %rdx,%rbp
movq %r11,24(%rdi)
movq 40(%rsi),%r8
movq 48(%rsi),%r9
movq 56(%rsi),%r10
movq 64(%rsi),%r11
movq 72(%rsi),%r12
movq 80(%rsi),%r13
movq 88(%rsi),%r14
movq 96(%rsi),%r15
movq %rcx,%rdx
sarq $63,%rdx
xorq %rax,%rax
subq %rdx,%rax
xorq %rdx,%rcx
addq %rax,%rcx
xorq %rdx,%r8
xorq %rdx,%r9
xorq %rdx,%r10
xorq %rdx,%r11
xorq %rdx,%r12
xorq %rdx,%r13
xorq %rdx,%r14
xorq %rdx,%r15
addq %r8,%rax
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
adcq $0,%r12
adcq $0,%r13
adcq $0,%r14
adcq $0,%r15
mulq %rcx
movq %rax,%r8
movq %r9,%rax
movq %rdx,%r9
mulq %rcx
addq %rax,%r9
movq %r10,%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rcx
addq %rax,%r10
movq %r11,%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rcx
addq %rax,%r11
movq %r12,%rax
adcq $0,%rdx
movq %rdx,%r12
mulq %rcx
addq %rax,%r12
movq %r13,%rax
adcq $0,%rdx
movq %rdx,%r13
mulq %rcx
addq %rax,%r13
movq %r14,%rax
adcq $0,%rdx
movq %rdx,%r14
mulq %rcx
addq %rax,%r14
movq %r15,%rax
adcq $0,%rdx
movq %rdx,%r15
imulq %rcx
addq %rax,%r15
adcq $0,%rdx
movq %rbp,%rbx
sarq $63,%rbp
addq 0(%rdi),%r8
adcq 8(%rdi),%r9
adcq 16(%rdi),%r10
adcq 24(%rdi),%r11
adcq %rbx,%r12
adcq %rbp,%r13
adcq %rbp,%r14
adcq %rbp,%r15
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r12,32(%rdi)
movq %r13,40(%rdi)
movq %r14,48(%rdi)
movq %r15,56(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size __smulq_512x63,.-__smulq_512x63
.type __smulq_256x63,@function
.align 32
__smulq_256x63:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movq 0+0(%rsi),%r8
movq 0+8(%rsi),%r9
movq 0+16(%rsi),%r10
movq 0+24(%rsi),%r11
movq 0+32(%rsi),%rbp
movq %rdx,%rbx
sarq $63,%rdx
xorq %rax,%rax
subq %rdx,%rax
xorq %rdx,%rbx
addq %rax,%rbx
xorq %rdx,%r8
xorq %rdx,%r9
xorq %rdx,%r10
xorq %rdx,%r11
xorq %rdx,%rbp
addq %r8,%rax
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
adcq $0,%rbp
mulq %rbx
movq %rax,%r8
movq %r9,%rax
movq %rdx,%r9
mulq %rbx
addq %rax,%r9
movq %r10,%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbx
addq %rax,%r10
movq %r11,%rax
adcq $0,%rdx
movq %rdx,%r11
andq %rbx,%rbp
negq %rbp
mulq %rbx
addq %rax,%r11
adcq %rdx,%rbp
movq %rcx,%rdx
movq 40+0(%rsi),%r12
movq 40+8(%rsi),%r13
movq 40+16(%rsi),%r14
movq 40+24(%rsi),%r15
movq 40+32(%rsi),%rcx
movq %rdx,%rbx
sarq $63,%rdx
xorq %rax,%rax
subq %rdx,%rax
xorq %rdx,%rbx
addq %rax,%rbx
xorq %rdx,%r12
xorq %rdx,%r13
xorq %rdx,%r14
xorq %rdx,%r15
xorq %rdx,%rcx
addq %r12,%rax
adcq $0,%r13
adcq $0,%r14
adcq $0,%r15
adcq $0,%rcx
mulq %rbx
movq %rax,%r12
movq %r13,%rax
movq %rdx,%r13
mulq %rbx
addq %rax,%r13
movq %r14,%rax
adcq $0,%rdx
movq %rdx,%r14
mulq %rbx
addq %rax,%r14
movq %r15,%rax
adcq $0,%rdx
movq %rdx,%r15
andq %rbx,%rcx
negq %rcx
mulq %rbx
addq %rax,%r15
adcq %rdx,%rcx
addq %r12,%r8
adcq %r13,%r9
adcq %r14,%r10
adcq %r15,%r11
adcq %rcx,%rbp
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %rbp,32(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size __smulq_256x63,.-__smulq_256x63
.type __smulq_256_n_shift_by_31,@function
.align 32
__smulq_256_n_shift_by_31:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movq %rdx,0(%rdi)
movq %rcx,8(%rdi)
movq %rdx,%rbp
movq 0+0(%rsi),%r8
movq 0+8(%rsi),%r9
movq 0+16(%rsi),%r10
movq 0+24(%rsi),%r11
movq %rbp,%rbx
sarq $63,%rbp
xorq %rax,%rax
subq %rbp,%rax
xorq %rbp,%rbx
addq %rax,%rbx
xorq %rbp,%r8
xorq %rbp,%r9
xorq %rbp,%r10
xorq %rbp,%r11
addq %r8,%rax
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
mulq %rbx
movq %rax,%r8
movq %r9,%rax
andq %rbx,%rbp
negq %rbp
movq %rdx,%r9
mulq %rbx
addq %rax,%r9
movq %r10,%rax
adcq $0,%rdx
movq %rdx,%r10
mulq %rbx
addq %rax,%r10
movq %r11,%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbx
addq %rax,%r11
adcq %rdx,%rbp
movq 32+0(%rsi),%r12
movq 32+8(%rsi),%r13
movq 32+16(%rsi),%r14
movq 32+24(%rsi),%r15
movq %rcx,%rbx
sarq $63,%rcx
xorq %rax,%rax
subq %rcx,%rax
xorq %rcx,%rbx
addq %rax,%rbx
xorq %rcx,%r12
xorq %rcx,%r13
xorq %rcx,%r14
xorq %rcx,%r15
addq %r12,%rax
adcq $0,%r13
adcq $0,%r14
adcq $0,%r15
mulq %rbx
movq %rax,%r12
movq %r13,%rax
andq %rbx,%rcx
negq %rcx
movq %rdx,%r13
mulq %rbx
addq %rax,%r13
movq %r14,%rax
adcq $0,%rdx
movq %rdx,%r14
mulq %rbx
addq %rax,%r14
movq %r15,%rax
adcq $0,%rdx
movq %rdx,%r15
mulq %rbx
addq %rax,%r15
adcq %rdx,%rcx
addq %r12,%r8
adcq %r13,%r9
adcq %r14,%r10
adcq %r15,%r11
adcq %rcx,%rbp
movq 0(%rdi),%rdx
movq 8(%rdi),%rcx
shrdq $31,%r9,%r8
shrdq $31,%r10,%r9
shrdq $31,%r11,%r10
shrdq $31,%rbp,%r11
sarq $63,%rbp
xorq %rax,%rax
subq %rbp,%rax
xorq %rbp,%r8
xorq %rbp,%r9
xorq %rbp,%r10
xorq %rbp,%r11
addq %rax,%r8
adcq $0,%r9
adcq $0,%r10
adcq $0,%r11
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
xorq %rbp,%rdx
xorq %rbp,%rcx
addq %rax,%rdx
addq %rax,%rcx
.byte 0xf3,0xc3
.cfi_endproc
.size __smulq_256_n_shift_by_31,.-__smulq_256_n_shift_by_31
.type __ab_approximation_31_256,@function
.align 32
__ab_approximation_31_256:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movq 24(%rsi),%r9
movq 56(%rsi),%r11
movq 16(%rsi),%rbx
movq 48(%rsi),%rbp
movq 8(%rsi),%r8
movq 40(%rsi),%r10
movq %r9,%rax
orq %r11,%rax
cmovzq %rbx,%r9
cmovzq %rbp,%r11
cmovzq %r8,%rbx
movq 0(%rsi),%r8
cmovzq %r10,%rbp
movq 32(%rsi),%r10
movq %r9,%rax
orq %r11,%rax
cmovzq %rbx,%r9
cmovzq %rbp,%r11
cmovzq %r8,%rbx
cmovzq %r10,%rbp
movq %r9,%rax
orq %r11,%rax
bsrq %rax,%rcx
leaq 1(%rcx),%rcx
cmovzq %r8,%r9
cmovzq %r10,%r11
cmovzq %rax,%rcx
negq %rcx
shldq %cl,%rbx,%r9
shldq %cl,%rbp,%r11
movl $0x7FFFFFFF,%eax
andq %rax,%r8
andq %rax,%r10
notq %rax
andq %rax,%r9
andq %rax,%r11
orq %r9,%r8
orq %r11,%r10
jmp __inner_loop_31_256
.byte 0xf3,0xc3
.cfi_endproc
.size __ab_approximation_31_256,.-__ab_approximation_31_256
.type __inner_loop_31_256,@function
.align 32
__inner_loop_31_256:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movq $0x7FFFFFFF80000000,%rcx
movq $0x800000007FFFFFFF,%r13
movq $0x7FFFFFFF7FFFFFFF,%r15
.Loop_31_256:
cmpq %r10,%r8
movq %r8,%rax
movq %r10,%rbx
movq %rcx,%rbp
movq %r13,%r14
cmovbq %r10,%r8
cmovbq %rax,%r10
cmovbq %r13,%rcx
cmovbq %rbp,%r13
subq %r10,%r8
subq %r13,%rcx
addq %r15,%rcx
testq $1,%rax
cmovzq %rax,%r8
cmovzq %rbx,%r10
cmovzq %rbp,%rcx
cmovzq %r14,%r13
shrq $1,%r8
addq %r13,%r13
subq %r15,%r13
subl $1,%edx
jnz .Loop_31_256
shrq $32,%r15
movl %ecx,%edx
movl %r13d,%r12d
shrq $32,%rcx
shrq $32,%r13
subq %r15,%rdx
subq %r15,%rcx
subq %r15,%r12
subq %r15,%r13
.byte 0xf3,0xc3
.cfi_endproc
.size __inner_loop_31_256,.-__inner_loop_31_256
.type __inner_loop_62_256,@function
.align 32
__inner_loop_62_256:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movl %edx,%r15d
movq $1,%rdx
xorq %rcx,%rcx
xorq %r12,%r12
movq %rdx,%r13
movq %rdx,%r14
.Loop_62_256:
xorq %rax,%rax
testq %r14,%r8
movq %r10,%rbx
cmovnzq %r10,%rax
subq %r8,%rbx
movq %r8,%rbp
subq %rax,%r8
cmovcq %rbx,%r8
cmovcq %rbp,%r10
movq %rdx,%rax
cmovcq %r12,%rdx
cmovcq %rax,%r12
movq %rcx,%rbx
cmovcq %r13,%rcx
cmovcq %rbx,%r13
xorq %rax,%rax
xorq %rbx,%rbx
shrq $1,%r8
testq %r14,%rbp
cmovnzq %r12,%rax
cmovnzq %r13,%rbx
addq %r12,%r12
addq %r13,%r13
subq %rax,%rdx
subq %rbx,%rcx
subl $1,%r15d
jnz .Loop_62_256
.byte 0xf3,0xc3
.cfi_endproc
.size __inner_loop_62_256,.-__inner_loop_62_256
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a",@note
.long 4,2f-1f,5
.byte 0x47,0x4E,0x55,0
1: .long 0xc0000002,4,3
.align 8
2: