ftu/blst/asm/add_mod_256-armv8.pl
2022-09-09 02:47:49 -04:00

413 lines
8.1 KiB
Raku
Executable file

#!/usr/bin/env perl
#
# Copyright Supranational LLC
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
$flavour = shift;
$output = shift;
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output";
} else {
open STDOUT,">$output";
}
($r_ptr,$a_ptr,$b_ptr,$n_ptr) = map("x$_", 0..3);
@mod=map("x$_",(4..7));
@a=map("x$_",(8..11));
@b=map("x$_",(12..15));
@t=map("x$_",(16,17,1..3));
$code.=<<___;
.text
.globl add_mod_256
.hidden add_mod_256
.type add_mod_256,%function
.align 5
add_mod_256:
ldp @a[0],@a[1],[$a_ptr]
ldp @b[0],@b[1],[$b_ptr]
ldp @a[2],@a[3],[$a_ptr,#16]
adds @a[0],@a[0],@b[0]
ldp @b[2],@b[3],[$b_ptr,#16]
adcs @a[1],@a[1],@b[1]
ldp @mod[0],@mod[1],[$n_ptr]
adcs @a[2],@a[2],@b[2]
ldp @mod[2],@mod[3],[$n_ptr,#16]
adcs @a[3],@a[3],@b[3]
adc @t[4],xzr,xzr
subs @t[0],@a[0],@mod[0]
sbcs @t[1],@a[1],@mod[1]
sbcs @t[2],@a[2],@mod[2]
sbcs @t[3],@a[3],@mod[3]
sbcs xzr,@t[4],xzr
csel @a[0],@a[0],@t[0],lo
csel @a[1],@a[1],@t[1],lo
csel @a[2],@a[2],@t[2],lo
stp @a[0],@a[1],[$r_ptr]
csel @a[3],@a[3],@t[3],lo
stp @a[2],@a[3],[$r_ptr,#16]
ret
.size add_mod_256,.-add_mod_256
.globl mul_by_3_mod_256
.hidden mul_by_3_mod_256
.type mul_by_3_mod_256,%function
.align 5
mul_by_3_mod_256:
ldp @b[0],@b[1],[$a_ptr]
ldp @b[2],@b[3],[$a_ptr,#16]
adds @a[0],@b[0],@b[0]
ldp @mod[0],@mod[1],[$b_ptr]
adcs @a[1],@b[1],@b[1]
ldp @mod[2],@mod[3],[$b_ptr,#16]
adcs @a[2],@b[2],@b[2]
adcs @a[3],@b[3],@b[3]
adc @t[4],xzr,xzr
subs @t[0],@a[0],@mod[0]
sbcs @t[1],@a[1],@mod[1]
sbcs @t[2],@a[2],@mod[2]
sbcs @t[3],@a[3],@mod[3]
sbcs xzr,@t[4],xzr
csel @a[0],@a[0],@t[0],lo
csel @a[1],@a[1],@t[1],lo
csel @a[2],@a[2],@t[2],lo
csel @a[3],@a[3],@t[3],lo
adds @a[0],@a[0],@b[0]
adcs @a[1],@a[1],@b[1]
adcs @a[2],@a[2],@b[2]
adcs @a[3],@a[3],@b[3]
adc @t[4],xzr,xzr
subs @t[0],@a[0],@mod[0]
sbcs @t[1],@a[1],@mod[1]
sbcs @t[2],@a[2],@mod[2]
sbcs @t[3],@a[3],@mod[3]
sbcs xzr,@t[4],xzr
csel @a[0],@a[0],@t[0],lo
csel @a[1],@a[1],@t[1],lo
csel @a[2],@a[2],@t[2],lo
stp @a[0],@a[1],[$r_ptr]
csel @a[3],@a[3],@t[3],lo
stp @a[2],@a[3],[$r_ptr,#16]
ret
.size mul_by_3_mod_256,.-mul_by_3_mod_256
.globl lshift_mod_256
.hidden lshift_mod_256
.type lshift_mod_256,%function
.align 5
lshift_mod_256:
ldp @a[0],@a[1],[$a_ptr]
ldp @a[2],@a[3],[$a_ptr,#16]
ldp @mod[0],@mod[1],[$n_ptr]
ldp @mod[2],@mod[3],[$n_ptr,#16]
.Loop_lshift_mod_256:
adds @a[0],@a[0],@a[0]
sub $b_ptr,$b_ptr,#1
adcs @a[1],@a[1],@a[1]
adcs @a[2],@a[2],@a[2]
adcs @a[3],@a[3],@a[3]
adc @t[4],xzr,xzr
subs @b[0],@a[0],@mod[0]
sbcs @b[1],@a[1],@mod[1]
sbcs @b[2],@a[2],@mod[2]
sbcs @b[3],@a[3],@mod[3]
sbcs xzr,@t[4],xzr
csel @a[0],@a[0],@b[0],lo
csel @a[1],@a[1],@b[1],lo
csel @a[2],@a[2],@b[2],lo
csel @a[3],@a[3],@b[3],lo
cbnz $b_ptr,.Loop_lshift_mod_256
stp @a[0],@a[1],[$r_ptr]
stp @a[2],@a[3],[$r_ptr,#16]
ret
.size lshift_mod_256,.-lshift_mod_256
.globl rshift_mod_256
.hidden rshift_mod_256
.type rshift_mod_256,%function
.align 5
rshift_mod_256:
ldp @a[0],@a[1],[$a_ptr]
ldp @a[2],@a[3],[$a_ptr,#16]
ldp @mod[0],@mod[1],[$n_ptr]
ldp @mod[2],@mod[3],[$n_ptr,#16]
.Loop_rshift:
adds @b[0],@a[0],@mod[0]
sub $b_ptr,$b_ptr,#1
adcs @b[1],@a[1],@mod[1]
adcs @b[2],@a[2],@mod[2]
adcs @b[3],@a[3],@mod[3]
adc @t[4],xzr,xzr
tst @a[0],#1
csel @b[0],@b[0],@a[0],ne
csel @b[1],@b[1],@a[1],ne
csel @b[2],@b[2],@a[2],ne
csel @b[3],@b[3],@a[3],ne
csel @t[4],@t[4],xzr,ne
extr @a[0],@b[1],@b[0],#1
extr @a[1],@b[2],@b[1],#1
extr @a[2],@b[3],@b[2],#1
extr @a[3],@t[4],@b[3],#1
cbnz $b_ptr,.Loop_rshift
stp @a[0],@a[1],[$r_ptr]
stp @a[2],@a[3],[$r_ptr,#16]
ret
.size rshift_mod_256,.-rshift_mod_256
.globl cneg_mod_256
.hidden cneg_mod_256
.type cneg_mod_256,%function
.align 5
cneg_mod_256:
ldp @a[0],@a[1],[$a_ptr]
ldp @mod[0],@mod[1],[$n_ptr]
ldp @a[2],@a[3],[$a_ptr,#16]
subs @b[0],@mod[0],@a[0]
ldp @mod[2],@mod[3],[$n_ptr,#16]
orr @mod[0],@a[0],@a[1]
sbcs @b[1],@mod[1],@a[1]
orr @mod[1],@a[2],@a[3]
sbcs @b[2],@mod[2],@a[2]
orr @t[4],@mod[0],@mod[1]
sbc @b[3],@mod[3],@a[3]
cmp @t[4],#0
csetm @t[4],ne
ands $b_ptr,$b_ptr,@t[4]
csel @a[0],@a[0],@b[0],eq
csel @a[1],@a[1],@b[1],eq
csel @a[2],@a[2],@b[2],eq
stp @a[0],@a[1],[$r_ptr]
csel @a[3],@a[3],@b[3],eq
stp @a[2],@a[3],[$r_ptr,#16]
ret
.size cneg_mod_256,.-cneg_mod_256
.globl sub_mod_256
.hidden sub_mod_256
.type sub_mod_256,%function
.align 5
sub_mod_256:
ldp @a[0],@a[1],[$a_ptr]
ldp @b[0],@b[1],[$b_ptr]
ldp @a[2],@a[3],[$a_ptr,#16]
subs @a[0],@a[0],@b[0]
ldp @b[2],@b[3],[$b_ptr,#16]
sbcs @a[1],@a[1],@b[1]
ldp @mod[0],@mod[1],[$n_ptr]
sbcs @a[2],@a[2],@b[2]
ldp @mod[2],@mod[3],[$n_ptr,#16]
sbcs @a[3],@a[3],@b[3]
sbc @t[4],xzr,xzr
and @mod[0],@mod[0],@t[4]
and @mod[1],@mod[1],@t[4]
adds @a[0],@a[0],@mod[0]
and @mod[2],@mod[2],@t[4]
adcs @a[1],@a[1],@mod[1]
and @mod[3],@mod[3],@t[4]
adcs @a[2],@a[2],@mod[2]
stp @a[0],@a[1],[$r_ptr]
adc @a[3],@a[3],@mod[3]
stp @a[2],@a[3],[$r_ptr,#16]
ret
.size sub_mod_256,.-sub_mod_256
.globl check_mod_256
.hidden check_mod_256
.type check_mod_256,%function
.align 5
check_mod_256:
ldp @a[0],@a[1],[$r_ptr]
ldp @a[2],@a[3],[$r_ptr,#16]
ldp @mod[0],@mod[1],[$a_ptr]
ldp @mod[2],@mod[3],[$a_ptr,#16]
#ifdef __AARCH64EB__
rev @a[0],@a[0]
rev @a[1],@a[1]
rev @a[2],@a[2]
rev @a[3],@a[3]
#endif
subs xzr,@a[0],@mod[0]
sbcs xzr,@a[1],@mod[1]
orr @a[0],@a[0],@a[1]
sbcs xzr,@a[2],@mod[2]
orr @a[0],@a[0],@a[2]
sbcs xzr,@a[3],@mod[3]
orr @a[0],@a[0],@a[3]
sbc $a_ptr,xzr,xzr
cmp @a[0],#0
mov x0,#1
csel x0,x0,xzr,ne
and x0,x0,$a_ptr
ret
.size check_mod_256,.-check_mod_256
.globl add_n_check_mod_256
.hidden add_n_check_mod_256
.type add_n_check_mod_256,%function
.align 5
add_n_check_mod_256:
ldp @a[0],@a[1],[$a_ptr]
ldp @b[0],@b[1],[$b_ptr]
ldp @a[2],@a[3],[$a_ptr,#16]
ldp @b[2],@b[3],[$b_ptr,#16]
#ifdef __AARCH64EB__
rev @a[0],@a[0]
rev @b[0],@b[0]
rev @a[1],@a[1]
rev @b[1],@b[1]
rev @a[2],@a[2]
rev @b[2],@b[2]
rev @a[3],@a[3]
rev @b[3],@b[3]
#endif
adds @a[0],@a[0],@b[0]
ldp @mod[0],@mod[1],[$n_ptr]
adcs @a[1],@a[1],@b[1]
ldp @mod[2],@mod[3],[$n_ptr,#16]
adcs @a[2],@a[2],@b[2]
adcs @a[3],@a[3],@b[3]
adc @t[4],xzr,xzr
subs @t[0],@a[0],@mod[0]
sbcs @t[1],@a[1],@mod[1]
sbcs @t[2],@a[2],@mod[2]
sbcs @t[3],@a[3],@mod[3]
sbcs xzr,@t[4],xzr
csel @a[0],@a[0],@t[0],lo
csel @a[1],@a[1],@t[1],lo
csel @a[2],@a[2],@t[2],lo
csel @a[3],@a[3],@t[3],lo
orr @t[0], @a[0], @a[1]
orr @t[1], @a[2], @a[3]
orr @t[0], @t[0], @t[1]
#ifdef __AARCH64EB__
rev @a[0],@a[0]
rev @a[1],@a[1]
rev @a[2],@a[2]
rev @a[3],@a[3]
#endif
stp @a[0],@a[1],[$r_ptr]
stp @a[2],@a[3],[$r_ptr,#16]
mov @t[1], #1
cmp @t[0], #0
csel x0, @t[1], xzr, ne
ret
.size add_n_check_mod_256,.-add_n_check_mod_256
.globl sub_n_check_mod_256
.hidden sub_n_check_mod_256
.type sub_n_check_mod_256,%function
.align 5
sub_n_check_mod_256:
ldp @a[0],@a[1],[$a_ptr]
ldp @b[0],@b[1],[$b_ptr]
ldp @a[2],@a[3],[$a_ptr,#16]
ldp @b[2],@b[3],[$b_ptr,#16]
#ifdef __AARCH64EB__
rev @a[0],@a[0]
rev @b[0],@b[0]
rev @a[1],@a[1]
rev @b[1],@b[1]
rev @a[2],@a[2]
rev @b[2],@b[2]
rev @a[3],@a[3]
rev @b[3],@b[3]
#endif
subs @a[0],@a[0],@b[0]
sbcs @a[1],@a[1],@b[1]
ldp @mod[0],@mod[1],[$n_ptr]
sbcs @a[2],@a[2],@b[2]
ldp @mod[2],@mod[3],[$n_ptr,#16]
sbcs @a[3],@a[3],@b[3]
sbc @t[4],xzr,xzr
and @mod[0],@mod[0],@t[4]
and @mod[1],@mod[1],@t[4]
adds @a[0],@a[0],@mod[0]
and @mod[2],@mod[2],@t[4]
adcs @a[1],@a[1],@mod[1]
and @mod[3],@mod[3],@t[4]
adcs @a[2],@a[2],@mod[2]
adc @a[3],@a[3],@mod[3]
orr @t[0], @a[0], @a[1]
orr @t[1], @a[2], @a[3]
orr @t[0], @t[0], @t[1]
#ifdef __AARCH64EB__
rev @a[0],@a[0]
rev @a[1],@a[1]
rev @a[2],@a[2]
rev @a[3],@a[3]
#endif
stp @a[0],@a[1],[$r_ptr]
stp @a[2],@a[3],[$r_ptr,#16]
mov @t[1], #1
cmp @t[0], #0
csel x0, @t[1], xzr, ne
ret
.size sub_n_check_mod_256,.-sub_n_check_mod_256
___
print $code;
close STDOUT;