ftu/blst/elf/add_mod_384-x86_64.s

1810 lines
32 KiB
ArmAsm
Raw Normal View History

2022-09-09 06:47:49 +00:00
.text
.globl add_mod_384
.hidden add_mod_384
.type add_mod_384,@function
.align 32
add_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $8,%rsp
.cfi_adjust_cfa_offset 8
call __add_mod_384
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size add_mod_384,.-add_mod_384
.type __add_mod_384,@function
.align 32
__add_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
__add_mod_384_a_is_loaded:
addq 0(%rdx),%r8
adcq 8(%rdx),%r9
adcq 16(%rdx),%r10
movq %r8,%r14
adcq 24(%rdx),%r11
movq %r9,%r15
adcq 32(%rdx),%r12
movq %r10,%rax
adcq 40(%rdx),%r13
movq %r11,%rbx
sbbq %rdx,%rdx
subq 0(%rcx),%r8
sbbq 8(%rcx),%r9
movq %r12,%rbp
sbbq 16(%rcx),%r10
sbbq 24(%rcx),%r11
sbbq 32(%rcx),%r12
movq %r13,%rsi
sbbq 40(%rcx),%r13
sbbq $0,%rdx
cmovcq %r14,%r8
cmovcq %r15,%r9
cmovcq %rax,%r10
movq %r8,0(%rdi)
cmovcq %rbx,%r11
movq %r9,8(%rdi)
cmovcq %rbp,%r12
movq %r10,16(%rdi)
cmovcq %rsi,%r13
movq %r11,24(%rdi)
movq %r12,32(%rdi)
movq %r13,40(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size __add_mod_384,.-__add_mod_384
.globl add_mod_384x
.hidden add_mod_384x
.type add_mod_384x,@function
.align 32
add_mod_384x:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $24,%rsp
.cfi_adjust_cfa_offset 24
movq %rsi,0(%rsp)
movq %rdx,8(%rsp)
leaq 48(%rsi),%rsi
leaq 48(%rdx),%rdx
leaq 48(%rdi),%rdi
call __add_mod_384
movq 0(%rsp),%rsi
movq 8(%rsp),%rdx
leaq -48(%rdi),%rdi
call __add_mod_384
movq 24+0(%rsp),%r15
.cfi_restore %r15
movq 24+8(%rsp),%r14
.cfi_restore %r14
movq 24+16(%rsp),%r13
.cfi_restore %r13
movq 24+24(%rsp),%r12
.cfi_restore %r12
movq 24+32(%rsp),%rbx
.cfi_restore %rbx
movq 24+40(%rsp),%rbp
.cfi_restore %rbp
leaq 24+48(%rsp),%rsp
.cfi_adjust_cfa_offset -24-8*6
.byte 0xf3,0xc3
.cfi_endproc
.size add_mod_384x,.-add_mod_384x
.globl rshift_mod_384
.hidden rshift_mod_384
.type rshift_mod_384,@function
.align 32
rshift_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdi
.cfi_adjust_cfa_offset 8
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
.Loop_rshift_mod_384:
call __rshift_mod_384
decl %edx
jnz .Loop_rshift_mod_384
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r12,32(%rdi)
movq %r13,40(%rdi)
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size rshift_mod_384,.-rshift_mod_384
.type __rshift_mod_384,@function
.align 32
__rshift_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movq $1,%rsi
movq 0(%rcx),%r14
andq %r8,%rsi
movq 8(%rcx),%r15
negq %rsi
movq 16(%rcx),%rax
andq %rsi,%r14
movq 24(%rcx),%rbx
andq %rsi,%r15
movq 32(%rcx),%rbp
andq %rsi,%rax
andq %rsi,%rbx
andq %rsi,%rbp
andq 40(%rcx),%rsi
addq %r8,%r14
adcq %r9,%r15
adcq %r10,%rax
adcq %r11,%rbx
adcq %r12,%rbp
adcq %r13,%rsi
sbbq %r13,%r13
shrq $1,%r14
movq %r15,%r8
shrq $1,%r15
movq %rax,%r9
shrq $1,%rax
movq %rbx,%r10
shrq $1,%rbx
movq %rbp,%r11
shrq $1,%rbp
movq %rsi,%r12
shrq $1,%rsi
shlq $63,%r8
shlq $63,%r9
orq %r14,%r8
shlq $63,%r10
orq %r15,%r9
shlq $63,%r11
orq %rax,%r10
shlq $63,%r12
orq %rbx,%r11
shlq $63,%r13
orq %rbp,%r12
orq %rsi,%r13
.byte 0xf3,0xc3
.cfi_endproc
.size __rshift_mod_384,.-__rshift_mod_384
.globl div_by_2_mod_384
.hidden div_by_2_mod_384
.type div_by_2_mod_384,@function
.align 32
div_by_2_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdi
.cfi_adjust_cfa_offset 8
movq 0(%rsi),%r8
movq %rdx,%rcx
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
call __rshift_mod_384
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r12,32(%rdi)
movq %r13,40(%rdi)
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size div_by_2_mod_384,.-div_by_2_mod_384
.globl lshift_mod_384
.hidden lshift_mod_384
.type lshift_mod_384,@function
.align 32
lshift_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdi
.cfi_adjust_cfa_offset 8
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
.Loop_lshift_mod_384:
addq %r8,%r8
adcq %r9,%r9
adcq %r10,%r10
movq %r8,%r14
adcq %r11,%r11
movq %r9,%r15
adcq %r12,%r12
movq %r10,%rax
adcq %r13,%r13
movq %r11,%rbx
sbbq %rdi,%rdi
subq 0(%rcx),%r8
sbbq 8(%rcx),%r9
movq %r12,%rbp
sbbq 16(%rcx),%r10
sbbq 24(%rcx),%r11
sbbq 32(%rcx),%r12
movq %r13,%rsi
sbbq 40(%rcx),%r13
sbbq $0,%rdi
movq (%rsp),%rdi
cmovcq %r14,%r8
cmovcq %r15,%r9
cmovcq %rax,%r10
cmovcq %rbx,%r11
cmovcq %rbp,%r12
cmovcq %rsi,%r13
decl %edx
jnz .Loop_lshift_mod_384
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r12,32(%rdi)
movq %r13,40(%rdi)
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size lshift_mod_384,.-lshift_mod_384
.type __lshift_mod_384,@function
.align 32
__lshift_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
addq %r8,%r8
adcq %r9,%r9
adcq %r10,%r10
movq %r8,%r14
adcq %r11,%r11
movq %r9,%r15
adcq %r12,%r12
movq %r10,%rax
adcq %r13,%r13
movq %r11,%rbx
sbbq %rdx,%rdx
subq 0(%rcx),%r8
sbbq 8(%rcx),%r9
movq %r12,%rbp
sbbq 16(%rcx),%r10
sbbq 24(%rcx),%r11
sbbq 32(%rcx),%r12
movq %r13,%rsi
sbbq 40(%rcx),%r13
sbbq $0,%rdx
cmovcq %r14,%r8
cmovcq %r15,%r9
cmovcq %rax,%r10
cmovcq %rbx,%r11
cmovcq %rbp,%r12
cmovcq %rsi,%r13
.byte 0xf3,0xc3
.cfi_endproc
.size __lshift_mod_384,.-__lshift_mod_384
.globl mul_by_3_mod_384
.hidden mul_by_3_mod_384
.type mul_by_3_mod_384,@function
.align 32
mul_by_3_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rsi
.cfi_adjust_cfa_offset 8
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
movq %rdx,%rcx
call __lshift_mod_384
movq (%rsp),%rdx
call __add_mod_384_a_is_loaded
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size mul_by_3_mod_384,.-mul_by_3_mod_384
.globl mul_by_8_mod_384
.hidden mul_by_8_mod_384
.type mul_by_8_mod_384,@function
.align 32
mul_by_8_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $8,%rsp
.cfi_adjust_cfa_offset 8
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
movq %rdx,%rcx
call __lshift_mod_384
call __lshift_mod_384
call __lshift_mod_384
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r12,32(%rdi)
movq %r13,40(%rdi)
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size mul_by_8_mod_384,.-mul_by_8_mod_384
.globl mul_by_3_mod_384x
.hidden mul_by_3_mod_384x
.type mul_by_3_mod_384x,@function
.align 32
mul_by_3_mod_384x:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rsi
.cfi_adjust_cfa_offset 8
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
movq %rdx,%rcx
call __lshift_mod_384
movq (%rsp),%rdx
call __add_mod_384_a_is_loaded
movq (%rsp),%rsi
leaq 48(%rdi),%rdi
movq 48(%rsi),%r8
movq 56(%rsi),%r9
movq 64(%rsi),%r10
movq 72(%rsi),%r11
movq 80(%rsi),%r12
movq 88(%rsi),%r13
call __lshift_mod_384
movq $48,%rdx
addq (%rsp),%rdx
call __add_mod_384_a_is_loaded
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size mul_by_3_mod_384x,.-mul_by_3_mod_384x
.globl mul_by_8_mod_384x
.hidden mul_by_8_mod_384x
.type mul_by_8_mod_384x,@function
.align 32
mul_by_8_mod_384x:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rsi
.cfi_adjust_cfa_offset 8
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
movq %rdx,%rcx
call __lshift_mod_384
call __lshift_mod_384
call __lshift_mod_384
movq (%rsp),%rsi
movq %r8,0(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
movq %r11,24(%rdi)
movq %r12,32(%rdi)
movq %r13,40(%rdi)
movq 48+0(%rsi),%r8
movq 48+8(%rsi),%r9
movq 48+16(%rsi),%r10
movq 48+24(%rsi),%r11
movq 48+32(%rsi),%r12
movq 48+40(%rsi),%r13
call __lshift_mod_384
call __lshift_mod_384
call __lshift_mod_384
movq %r8,48+0(%rdi)
movq %r9,48+8(%rdi)
movq %r10,48+16(%rdi)
movq %r11,48+24(%rdi)
movq %r12,48+32(%rdi)
movq %r13,48+40(%rdi)
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size mul_by_8_mod_384x,.-mul_by_8_mod_384x
.globl cneg_mod_384
.hidden cneg_mod_384
.type cneg_mod_384,@function
.align 32
cneg_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
pushq %rdx
.cfi_adjust_cfa_offset 8
movq 0(%rsi),%rdx
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq %rdx,%r8
movq 24(%rsi),%r11
orq %r9,%rdx
movq 32(%rsi),%r12
orq %r10,%rdx
movq 40(%rsi),%r13
orq %r11,%rdx
movq $-1,%rsi
orq %r12,%rdx
orq %r13,%rdx
movq 0(%rcx),%r14
cmovnzq %rsi,%rdx
movq 8(%rcx),%r15
movq 16(%rcx),%rax
andq %rdx,%r14
movq 24(%rcx),%rbx
andq %rdx,%r15
movq 32(%rcx),%rbp
andq %rdx,%rax
movq 40(%rcx),%rsi
andq %rdx,%rbx
movq 0(%rsp),%rcx
andq %rdx,%rbp
andq %rdx,%rsi
subq %r8,%r14
sbbq %r9,%r15
sbbq %r10,%rax
sbbq %r11,%rbx
sbbq %r12,%rbp
sbbq %r13,%rsi
orq %rcx,%rcx
cmovzq %r8,%r14
cmovzq %r9,%r15
cmovzq %r10,%rax
movq %r14,0(%rdi)
cmovzq %r11,%rbx
movq %r15,8(%rdi)
cmovzq %r12,%rbp
movq %rax,16(%rdi)
cmovzq %r13,%rsi
movq %rbx,24(%rdi)
movq %rbp,32(%rdi)
movq %rsi,40(%rdi)
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size cneg_mod_384,.-cneg_mod_384
.globl sub_mod_384
.hidden sub_mod_384
.type sub_mod_384,@function
.align 32
sub_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $8,%rsp
.cfi_adjust_cfa_offset 8
call __sub_mod_384
movq 8(%rsp),%r15
.cfi_restore %r15
movq 16(%rsp),%r14
.cfi_restore %r14
movq 24(%rsp),%r13
.cfi_restore %r13
movq 32(%rsp),%r12
.cfi_restore %r12
movq 40(%rsp),%rbx
.cfi_restore %rbx
movq 48(%rsp),%rbp
.cfi_restore %rbp
leaq 56(%rsp),%rsp
.cfi_adjust_cfa_offset -56
.byte 0xf3,0xc3
.cfi_endproc
.size sub_mod_384,.-sub_mod_384
.type __sub_mod_384,@function
.align 32
__sub_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
subq 0(%rdx),%r8
movq 0(%rcx),%r14
sbbq 8(%rdx),%r9
movq 8(%rcx),%r15
sbbq 16(%rdx),%r10
movq 16(%rcx),%rax
sbbq 24(%rdx),%r11
movq 24(%rcx),%rbx
sbbq 32(%rdx),%r12
movq 32(%rcx),%rbp
sbbq 40(%rdx),%r13
movq 40(%rcx),%rsi
sbbq %rdx,%rdx
andq %rdx,%r14
andq %rdx,%r15
andq %rdx,%rax
andq %rdx,%rbx
andq %rdx,%rbp
andq %rdx,%rsi
addq %r14,%r8
adcq %r15,%r9
movq %r8,0(%rdi)
adcq %rax,%r10
movq %r9,8(%rdi)
adcq %rbx,%r11
movq %r10,16(%rdi)
adcq %rbp,%r12
movq %r11,24(%rdi)
adcq %rsi,%r13
movq %r12,32(%rdi)
movq %r13,40(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size __sub_mod_384,.-__sub_mod_384
.globl sub_mod_384x
.hidden sub_mod_384x
.type sub_mod_384x,@function
.align 32
sub_mod_384x:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $24,%rsp
.cfi_adjust_cfa_offset 24
movq %rsi,0(%rsp)
movq %rdx,8(%rsp)
leaq 48(%rsi),%rsi
leaq 48(%rdx),%rdx
leaq 48(%rdi),%rdi
call __sub_mod_384
movq 0(%rsp),%rsi
movq 8(%rsp),%rdx
leaq -48(%rdi),%rdi
call __sub_mod_384
movq 24+0(%rsp),%r15
.cfi_restore %r15
movq 24+8(%rsp),%r14
.cfi_restore %r14
movq 24+16(%rsp),%r13
.cfi_restore %r13
movq 24+24(%rsp),%r12
.cfi_restore %r12
movq 24+32(%rsp),%rbx
.cfi_restore %rbx
movq 24+40(%rsp),%rbp
.cfi_restore %rbp
leaq 24+48(%rsp),%rsp
.cfi_adjust_cfa_offset -24-8*6
.byte 0xf3,0xc3
.cfi_endproc
.size sub_mod_384x,.-sub_mod_384x
.globl mul_by_1_plus_i_mod_384x
.hidden mul_by_1_plus_i_mod_384x
.type mul_by_1_plus_i_mod_384x,@function
.align 32
mul_by_1_plus_i_mod_384x:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $56,%rsp
.cfi_adjust_cfa_offset 56
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
movq 24(%rsi),%r11
movq 32(%rsi),%r12
movq 40(%rsi),%r13
movq %r8,%r14
addq 48(%rsi),%r8
movq %r9,%r15
adcq 56(%rsi),%r9
movq %r10,%rax
adcq 64(%rsi),%r10
movq %r11,%rbx
adcq 72(%rsi),%r11
movq %r12,%rcx
adcq 80(%rsi),%r12
movq %r13,%rbp
adcq 88(%rsi),%r13
movq %rdi,48(%rsp)
sbbq %rdi,%rdi
subq 48(%rsi),%r14
sbbq 56(%rsi),%r15
sbbq 64(%rsi),%rax
sbbq 72(%rsi),%rbx
sbbq 80(%rsi),%rcx
sbbq 88(%rsi),%rbp
sbbq %rsi,%rsi
movq %r8,0(%rsp)
movq 0(%rdx),%r8
movq %r9,8(%rsp)
movq 8(%rdx),%r9
movq %r10,16(%rsp)
movq 16(%rdx),%r10
movq %r11,24(%rsp)
movq 24(%rdx),%r11
movq %r12,32(%rsp)
andq %rsi,%r8
movq 32(%rdx),%r12
movq %r13,40(%rsp)
andq %rsi,%r9
movq 40(%rdx),%r13
andq %rsi,%r10
andq %rsi,%r11
andq %rsi,%r12
andq %rsi,%r13
movq 48(%rsp),%rsi
addq %r8,%r14
movq 0(%rsp),%r8
adcq %r9,%r15
movq 8(%rsp),%r9
adcq %r10,%rax
movq 16(%rsp),%r10
adcq %r11,%rbx
movq 24(%rsp),%r11
adcq %r12,%rcx
movq 32(%rsp),%r12
adcq %r13,%rbp
movq 40(%rsp),%r13
movq %r14,0(%rsi)
movq %r8,%r14
movq %r15,8(%rsi)
movq %rax,16(%rsi)
movq %r9,%r15
movq %rbx,24(%rsi)
movq %rcx,32(%rsi)
movq %r10,%rax
movq %rbp,40(%rsi)
subq 0(%rdx),%r8
movq %r11,%rbx
sbbq 8(%rdx),%r9
sbbq 16(%rdx),%r10
movq %r12,%rcx
sbbq 24(%rdx),%r11
sbbq 32(%rdx),%r12
movq %r13,%rbp
sbbq 40(%rdx),%r13
sbbq $0,%rdi
cmovcq %r14,%r8
cmovcq %r15,%r9
cmovcq %rax,%r10
movq %r8,48(%rsi)
cmovcq %rbx,%r11
movq %r9,56(%rsi)
cmovcq %rcx,%r12
movq %r10,64(%rsi)
cmovcq %rbp,%r13
movq %r11,72(%rsi)
movq %r12,80(%rsi)
movq %r13,88(%rsi)
movq 56+0(%rsp),%r15
.cfi_restore %r15
movq 56+8(%rsp),%r14
.cfi_restore %r14
movq 56+16(%rsp),%r13
.cfi_restore %r13
movq 56+24(%rsp),%r12
.cfi_restore %r12
movq 56+32(%rsp),%rbx
.cfi_restore %rbx
movq 56+40(%rsp),%rbp
.cfi_restore %rbp
leaq 56+48(%rsp),%rsp
.cfi_adjust_cfa_offset -56-8*6
.byte 0xf3,0xc3
.cfi_endproc
.size mul_by_1_plus_i_mod_384x,.-mul_by_1_plus_i_mod_384x
.globl sgn0_pty_mod_384
.hidden sgn0_pty_mod_384
.type sgn0_pty_mod_384,@function
.align 32
sgn0_pty_mod_384:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movq 0(%rdi),%r8
movq 8(%rdi),%r9
movq 16(%rdi),%r10
movq 24(%rdi),%r11
movq 32(%rdi),%rcx
movq 40(%rdi),%rdx
xorq %rax,%rax
movq %r8,%rdi
addq %r8,%r8
adcq %r9,%r9
adcq %r10,%r10
adcq %r11,%r11
adcq %rcx,%rcx
adcq %rdx,%rdx
adcq $0,%rax
subq 0(%rsi),%r8
sbbq 8(%rsi),%r9
sbbq 16(%rsi),%r10
sbbq 24(%rsi),%r11
sbbq 32(%rsi),%rcx
sbbq 40(%rsi),%rdx
sbbq $0,%rax
notq %rax
andq $1,%rdi
andq $2,%rax
orq %rdi,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size sgn0_pty_mod_384,.-sgn0_pty_mod_384
.globl sgn0_pty_mod_384x
.hidden sgn0_pty_mod_384x
.type sgn0_pty_mod_384x,@function
.align 32
sgn0_pty_mod_384x:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-24
subq $8,%rsp
.cfi_adjust_cfa_offset 8
movq 48(%rdi),%r8
movq 56(%rdi),%r9
movq 64(%rdi),%r10
movq 72(%rdi),%r11
movq 80(%rdi),%rcx
movq 88(%rdi),%rdx
movq %r8,%rbx
orq %r9,%r8
orq %r10,%r8
orq %r11,%r8
orq %rcx,%r8
orq %rdx,%r8
leaq 0(%rdi),%rax
xorq %rdi,%rdi
movq %rbx,%rbp
addq %rbx,%rbx
adcq %r9,%r9
adcq %r10,%r10
adcq %r11,%r11
adcq %rcx,%rcx
adcq %rdx,%rdx
adcq $0,%rdi
subq 0(%rsi),%rbx
sbbq 8(%rsi),%r9
sbbq 16(%rsi),%r10
sbbq 24(%rsi),%r11
sbbq 32(%rsi),%rcx
sbbq 40(%rsi),%rdx
sbbq $0,%rdi
movq %r8,0(%rsp)
notq %rdi
andq $1,%rbp
andq $2,%rdi
orq %rbp,%rdi
movq 0(%rax),%r8
movq 8(%rax),%r9
movq 16(%rax),%r10
movq 24(%rax),%r11
movq 32(%rax),%rcx
movq 40(%rax),%rdx
movq %r8,%rbx
orq %r9,%r8
orq %r10,%r8
orq %r11,%r8
orq %rcx,%r8
orq %rdx,%r8
xorq %rax,%rax
movq %rbx,%rbp
addq %rbx,%rbx
adcq %r9,%r9
adcq %r10,%r10
adcq %r11,%r11
adcq %rcx,%rcx
adcq %rdx,%rdx
adcq $0,%rax
subq 0(%rsi),%rbx
sbbq 8(%rsi),%r9
sbbq 16(%rsi),%r10
sbbq 24(%rsi),%r11
sbbq 32(%rsi),%rcx
sbbq 40(%rsi),%rdx
sbbq $0,%rax
movq 0(%rsp),%rbx
notq %rax
testq %r8,%r8
cmovzq %rdi,%rbp
testq %rbx,%rbx
cmovnzq %rdi,%rax
andq $1,%rbp
andq $2,%rax
orq %rbp,%rax
movq 8(%rsp),%rbx
.cfi_restore %rbx
movq 16(%rsp),%rbp
.cfi_restore %rbp
leaq 24(%rsp),%rsp
.cfi_adjust_cfa_offset -24
.byte 0xf3,0xc3
.cfi_endproc
.size sgn0_pty_mod_384x,.-sgn0_pty_mod_384x
.globl vec_select_48
.hidden vec_select_48
.type vec_select_48,@function
.align 32
vec_select_48:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movd %ecx,%xmm5
pxor %xmm4,%xmm4
pshufd $0,%xmm5,%xmm5
movdqu (%rsi),%xmm0
leaq 24(%rsi),%rsi
pcmpeqd %xmm4,%xmm5
movdqu (%rdx),%xmm1
leaq 24(%rdx),%rdx
pcmpeqd %xmm5,%xmm4
leaq 24(%rdi),%rdi
pand %xmm4,%xmm0
movdqu 0+16-24(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 0+16-24(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,0-24(%rdi)
pand %xmm4,%xmm2
movdqu 16+16-24(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 16+16-24(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,16-24(%rdi)
pand %xmm4,%xmm0
pand %xmm5,%xmm1
por %xmm1,%xmm0
movdqu %xmm0,32-24(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size vec_select_48,.-vec_select_48
.globl vec_select_96
.hidden vec_select_96
.type vec_select_96,@function
.align 32
vec_select_96:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movd %ecx,%xmm5
pxor %xmm4,%xmm4
pshufd $0,%xmm5,%xmm5
movdqu (%rsi),%xmm0
leaq 48(%rsi),%rsi
pcmpeqd %xmm4,%xmm5
movdqu (%rdx),%xmm1
leaq 48(%rdx),%rdx
pcmpeqd %xmm5,%xmm4
leaq 48(%rdi),%rdi
pand %xmm4,%xmm0
movdqu 0+16-48(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 0+16-48(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,0-48(%rdi)
pand %xmm4,%xmm2
movdqu 16+16-48(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 16+16-48(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,16-48(%rdi)
pand %xmm4,%xmm0
movdqu 32+16-48(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 32+16-48(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,32-48(%rdi)
pand %xmm4,%xmm2
movdqu 48+16-48(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 48+16-48(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,48-48(%rdi)
pand %xmm4,%xmm0
movdqu 64+16-48(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 64+16-48(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,64-48(%rdi)
pand %xmm4,%xmm2
pand %xmm5,%xmm3
por %xmm3,%xmm2
movdqu %xmm2,80-48(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size vec_select_96,.-vec_select_96
.globl vec_select_192
.hidden vec_select_192
.type vec_select_192,@function
.align 32
vec_select_192:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movd %ecx,%xmm5
pxor %xmm4,%xmm4
pshufd $0,%xmm5,%xmm5
movdqu (%rsi),%xmm0
leaq 96(%rsi),%rsi
pcmpeqd %xmm4,%xmm5
movdqu (%rdx),%xmm1
leaq 96(%rdx),%rdx
pcmpeqd %xmm5,%xmm4
leaq 96(%rdi),%rdi
pand %xmm4,%xmm0
movdqu 0+16-96(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 0+16-96(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,0-96(%rdi)
pand %xmm4,%xmm2
movdqu 16+16-96(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 16+16-96(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,16-96(%rdi)
pand %xmm4,%xmm0
movdqu 32+16-96(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 32+16-96(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,32-96(%rdi)
pand %xmm4,%xmm2
movdqu 48+16-96(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 48+16-96(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,48-96(%rdi)
pand %xmm4,%xmm0
movdqu 64+16-96(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 64+16-96(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,64-96(%rdi)
pand %xmm4,%xmm2
movdqu 80+16-96(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 80+16-96(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,80-96(%rdi)
pand %xmm4,%xmm0
movdqu 96+16-96(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 96+16-96(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,96-96(%rdi)
pand %xmm4,%xmm2
movdqu 112+16-96(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 112+16-96(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,112-96(%rdi)
pand %xmm4,%xmm0
movdqu 128+16-96(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 128+16-96(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,128-96(%rdi)
pand %xmm4,%xmm2
movdqu 144+16-96(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 144+16-96(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,144-96(%rdi)
pand %xmm4,%xmm0
movdqu 160+16-96(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 160+16-96(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,160-96(%rdi)
pand %xmm4,%xmm2
pand %xmm5,%xmm3
por %xmm3,%xmm2
movdqu %xmm2,176-96(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size vec_select_192,.-vec_select_192
.globl vec_select_144
.hidden vec_select_144
.type vec_select_144,@function
.align 32
vec_select_144:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movd %ecx,%xmm5
pxor %xmm4,%xmm4
pshufd $0,%xmm5,%xmm5
movdqu (%rsi),%xmm0
leaq 72(%rsi),%rsi
pcmpeqd %xmm4,%xmm5
movdqu (%rdx),%xmm1
leaq 72(%rdx),%rdx
pcmpeqd %xmm5,%xmm4
leaq 72(%rdi),%rdi
pand %xmm4,%xmm0
movdqu 0+16-72(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 0+16-72(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,0-72(%rdi)
pand %xmm4,%xmm2
movdqu 16+16-72(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 16+16-72(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,16-72(%rdi)
pand %xmm4,%xmm0
movdqu 32+16-72(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 32+16-72(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,32-72(%rdi)
pand %xmm4,%xmm2
movdqu 48+16-72(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 48+16-72(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,48-72(%rdi)
pand %xmm4,%xmm0
movdqu 64+16-72(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 64+16-72(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,64-72(%rdi)
pand %xmm4,%xmm2
movdqu 80+16-72(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 80+16-72(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,80-72(%rdi)
pand %xmm4,%xmm0
movdqu 96+16-72(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 96+16-72(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,96-72(%rdi)
pand %xmm4,%xmm2
movdqu 112+16-72(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 112+16-72(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,112-72(%rdi)
pand %xmm4,%xmm0
pand %xmm5,%xmm1
por %xmm1,%xmm0
movdqu %xmm0,128-72(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size vec_select_144,.-vec_select_144
.globl vec_select_288
.hidden vec_select_288
.type vec_select_288,@function
.align 32
vec_select_288:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
movd %ecx,%xmm5
pxor %xmm4,%xmm4
pshufd $0,%xmm5,%xmm5
movdqu (%rsi),%xmm0
leaq 144(%rsi),%rsi
pcmpeqd %xmm4,%xmm5
movdqu (%rdx),%xmm1
leaq 144(%rdx),%rdx
pcmpeqd %xmm5,%xmm4
leaq 144(%rdi),%rdi
pand %xmm4,%xmm0
movdqu 0+16-144(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 0+16-144(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,0-144(%rdi)
pand %xmm4,%xmm2
movdqu 16+16-144(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 16+16-144(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,16-144(%rdi)
pand %xmm4,%xmm0
movdqu 32+16-144(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 32+16-144(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,32-144(%rdi)
pand %xmm4,%xmm2
movdqu 48+16-144(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 48+16-144(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,48-144(%rdi)
pand %xmm4,%xmm0
movdqu 64+16-144(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 64+16-144(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,64-144(%rdi)
pand %xmm4,%xmm2
movdqu 80+16-144(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 80+16-144(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,80-144(%rdi)
pand %xmm4,%xmm0
movdqu 96+16-144(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 96+16-144(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,96-144(%rdi)
pand %xmm4,%xmm2
movdqu 112+16-144(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 112+16-144(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,112-144(%rdi)
pand %xmm4,%xmm0
movdqu 128+16-144(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 128+16-144(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,128-144(%rdi)
pand %xmm4,%xmm2
movdqu 144+16-144(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 144+16-144(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,144-144(%rdi)
pand %xmm4,%xmm0
movdqu 160+16-144(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 160+16-144(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,160-144(%rdi)
pand %xmm4,%xmm2
movdqu 176+16-144(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 176+16-144(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,176-144(%rdi)
pand %xmm4,%xmm0
movdqu 192+16-144(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 192+16-144(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,192-144(%rdi)
pand %xmm4,%xmm2
movdqu 208+16-144(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 208+16-144(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,208-144(%rdi)
pand %xmm4,%xmm0
movdqu 224+16-144(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 224+16-144(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,224-144(%rdi)
pand %xmm4,%xmm2
movdqu 240+16-144(%rsi),%xmm0
pand %xmm5,%xmm3
movdqu 240+16-144(%rdx),%xmm1
por %xmm3,%xmm2
movdqu %xmm2,240-144(%rdi)
pand %xmm4,%xmm0
movdqu 256+16-144(%rsi),%xmm2
pand %xmm5,%xmm1
movdqu 256+16-144(%rdx),%xmm3
por %xmm1,%xmm0
movdqu %xmm0,256-144(%rdi)
pand %xmm4,%xmm2
pand %xmm5,%xmm3
por %xmm3,%xmm2
movdqu %xmm2,272-144(%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size vec_select_288,.-vec_select_288
.globl vec_prefetch
.hidden vec_prefetch
.type vec_prefetch,@function
.align 32
vec_prefetch:
.cfi_startproc
.byte 0xf3,0x0f,0x1e,0xfa
leaq -1(%rdi,%rsi,1),%rsi
movq $64,%rax
xorq %r8,%r8
prefetchnta (%rdi)
leaq (%rdi,%rax,1),%rdi
cmpq %rsi,%rdi
cmovaq %rsi,%rdi
cmovaq %r8,%rax
prefetchnta (%rdi)
leaq (%rdi,%rax,1),%rdi
cmpq %rsi,%rdi
cmovaq %rsi,%rdi
cmovaq %r8,%rax
prefetchnta (%rdi)
leaq (%rdi,%rax,1),%rdi
cmpq %rsi,%rdi
cmovaq %rsi,%rdi
cmovaq %r8,%rax
prefetchnta (%rdi)
leaq (%rdi,%rax,1),%rdi
cmpq %rsi,%rdi
cmovaq %rsi,%rdi
cmovaq %r8,%rax
prefetchnta (%rdi)
leaq (%rdi,%rax,1),%rdi
cmpq %rsi,%rdi
cmovaq %rsi,%rdi
cmovaq %r8,%rax
prefetchnta (%rdi)
leaq (%rdi,%rax,1),%rdi
cmpq %rsi,%rdi
cmovaq %rsi,%rdi
prefetchnta (%rdi)
.byte 0xf3,0xc3
.cfi_endproc
.size vec_prefetch,.-vec_prefetch
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a",@note
.long 4,2f-1f,5
.byte 0x47,0x4E,0x55,0
1: .long 0xc0000002,4,3
.align 8
2: