413 lines
8.1 KiB
Raku
Executable file
413 lines
8.1 KiB
Raku
Executable file
#!/usr/bin/env perl
|
|
#
|
|
# Copyright Supranational LLC
|
|
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
$flavour = shift;
|
|
$output = shift;
|
|
|
|
if ($flavour && $flavour ne "void") {
|
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
|
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
|
die "can't locate arm-xlate.pl";
|
|
|
|
open STDOUT,"| \"$^X\" $xlate $flavour $output";
|
|
} else {
|
|
open STDOUT,">$output";
|
|
}
|
|
|
|
($r_ptr,$a_ptr,$b_ptr,$n_ptr) = map("x$_", 0..3);
|
|
|
|
@mod=map("x$_",(4..7));
|
|
@a=map("x$_",(8..11));
|
|
@b=map("x$_",(12..15));
|
|
@t=map("x$_",(16,17,1..3));
|
|
|
|
$code.=<<___;
|
|
.text
|
|
|
|
.globl add_mod_256
|
|
.hidden add_mod_256
|
|
.type add_mod_256,%function
|
|
.align 5
|
|
add_mod_256:
|
|
ldp @a[0],@a[1],[$a_ptr]
|
|
ldp @b[0],@b[1],[$b_ptr]
|
|
|
|
ldp @a[2],@a[3],[$a_ptr,#16]
|
|
adds @a[0],@a[0],@b[0]
|
|
ldp @b[2],@b[3],[$b_ptr,#16]
|
|
adcs @a[1],@a[1],@b[1]
|
|
ldp @mod[0],@mod[1],[$n_ptr]
|
|
adcs @a[2],@a[2],@b[2]
|
|
ldp @mod[2],@mod[3],[$n_ptr,#16]
|
|
adcs @a[3],@a[3],@b[3]
|
|
adc @t[4],xzr,xzr
|
|
|
|
subs @t[0],@a[0],@mod[0]
|
|
sbcs @t[1],@a[1],@mod[1]
|
|
sbcs @t[2],@a[2],@mod[2]
|
|
sbcs @t[3],@a[3],@mod[3]
|
|
sbcs xzr,@t[4],xzr
|
|
|
|
csel @a[0],@a[0],@t[0],lo
|
|
csel @a[1],@a[1],@t[1],lo
|
|
csel @a[2],@a[2],@t[2],lo
|
|
stp @a[0],@a[1],[$r_ptr]
|
|
csel @a[3],@a[3],@t[3],lo
|
|
stp @a[2],@a[3],[$r_ptr,#16]
|
|
|
|
ret
|
|
.size add_mod_256,.-add_mod_256
|
|
|
|
.globl mul_by_3_mod_256
|
|
.hidden mul_by_3_mod_256
|
|
.type mul_by_3_mod_256,%function
|
|
.align 5
|
|
mul_by_3_mod_256:
|
|
ldp @b[0],@b[1],[$a_ptr]
|
|
ldp @b[2],@b[3],[$a_ptr,#16]
|
|
|
|
adds @a[0],@b[0],@b[0]
|
|
ldp @mod[0],@mod[1],[$b_ptr]
|
|
adcs @a[1],@b[1],@b[1]
|
|
ldp @mod[2],@mod[3],[$b_ptr,#16]
|
|
adcs @a[2],@b[2],@b[2]
|
|
adcs @a[3],@b[3],@b[3]
|
|
adc @t[4],xzr,xzr
|
|
|
|
subs @t[0],@a[0],@mod[0]
|
|
sbcs @t[1],@a[1],@mod[1]
|
|
sbcs @t[2],@a[2],@mod[2]
|
|
sbcs @t[3],@a[3],@mod[3]
|
|
sbcs xzr,@t[4],xzr
|
|
|
|
csel @a[0],@a[0],@t[0],lo
|
|
csel @a[1],@a[1],@t[1],lo
|
|
csel @a[2],@a[2],@t[2],lo
|
|
csel @a[3],@a[3],@t[3],lo
|
|
|
|
adds @a[0],@a[0],@b[0]
|
|
adcs @a[1],@a[1],@b[1]
|
|
adcs @a[2],@a[2],@b[2]
|
|
adcs @a[3],@a[3],@b[3]
|
|
adc @t[4],xzr,xzr
|
|
|
|
subs @t[0],@a[0],@mod[0]
|
|
sbcs @t[1],@a[1],@mod[1]
|
|
sbcs @t[2],@a[2],@mod[2]
|
|
sbcs @t[3],@a[3],@mod[3]
|
|
sbcs xzr,@t[4],xzr
|
|
|
|
csel @a[0],@a[0],@t[0],lo
|
|
csel @a[1],@a[1],@t[1],lo
|
|
csel @a[2],@a[2],@t[2],lo
|
|
stp @a[0],@a[1],[$r_ptr]
|
|
csel @a[3],@a[3],@t[3],lo
|
|
stp @a[2],@a[3],[$r_ptr,#16]
|
|
|
|
ret
|
|
.size mul_by_3_mod_256,.-mul_by_3_mod_256
|
|
|
|
.globl lshift_mod_256
|
|
.hidden lshift_mod_256
|
|
.type lshift_mod_256,%function
|
|
.align 5
|
|
lshift_mod_256:
|
|
ldp @a[0],@a[1],[$a_ptr]
|
|
ldp @a[2],@a[3],[$a_ptr,#16]
|
|
|
|
ldp @mod[0],@mod[1],[$n_ptr]
|
|
ldp @mod[2],@mod[3],[$n_ptr,#16]
|
|
|
|
.Loop_lshift_mod_256:
|
|
adds @a[0],@a[0],@a[0]
|
|
sub $b_ptr,$b_ptr,#1
|
|
adcs @a[1],@a[1],@a[1]
|
|
adcs @a[2],@a[2],@a[2]
|
|
adcs @a[3],@a[3],@a[3]
|
|
adc @t[4],xzr,xzr
|
|
|
|
subs @b[0],@a[0],@mod[0]
|
|
sbcs @b[1],@a[1],@mod[1]
|
|
sbcs @b[2],@a[2],@mod[2]
|
|
sbcs @b[3],@a[3],@mod[3]
|
|
sbcs xzr,@t[4],xzr
|
|
|
|
csel @a[0],@a[0],@b[0],lo
|
|
csel @a[1],@a[1],@b[1],lo
|
|
csel @a[2],@a[2],@b[2],lo
|
|
csel @a[3],@a[3],@b[3],lo
|
|
|
|
cbnz $b_ptr,.Loop_lshift_mod_256
|
|
|
|
stp @a[0],@a[1],[$r_ptr]
|
|
stp @a[2],@a[3],[$r_ptr,#16]
|
|
|
|
ret
|
|
.size lshift_mod_256,.-lshift_mod_256
|
|
|
|
.globl rshift_mod_256
|
|
.hidden rshift_mod_256
|
|
.type rshift_mod_256,%function
|
|
.align 5
|
|
rshift_mod_256:
|
|
ldp @a[0],@a[1],[$a_ptr]
|
|
ldp @a[2],@a[3],[$a_ptr,#16]
|
|
|
|
ldp @mod[0],@mod[1],[$n_ptr]
|
|
ldp @mod[2],@mod[3],[$n_ptr,#16]
|
|
|
|
.Loop_rshift:
|
|
adds @b[0],@a[0],@mod[0]
|
|
sub $b_ptr,$b_ptr,#1
|
|
adcs @b[1],@a[1],@mod[1]
|
|
adcs @b[2],@a[2],@mod[2]
|
|
adcs @b[3],@a[3],@mod[3]
|
|
adc @t[4],xzr,xzr
|
|
tst @a[0],#1
|
|
|
|
csel @b[0],@b[0],@a[0],ne
|
|
csel @b[1],@b[1],@a[1],ne
|
|
csel @b[2],@b[2],@a[2],ne
|
|
csel @b[3],@b[3],@a[3],ne
|
|
csel @t[4],@t[4],xzr,ne
|
|
|
|
extr @a[0],@b[1],@b[0],#1
|
|
extr @a[1],@b[2],@b[1],#1
|
|
extr @a[2],@b[3],@b[2],#1
|
|
extr @a[3],@t[4],@b[3],#1
|
|
|
|
cbnz $b_ptr,.Loop_rshift
|
|
|
|
stp @a[0],@a[1],[$r_ptr]
|
|
stp @a[2],@a[3],[$r_ptr,#16]
|
|
|
|
ret
|
|
.size rshift_mod_256,.-rshift_mod_256
|
|
|
|
.globl cneg_mod_256
|
|
.hidden cneg_mod_256
|
|
.type cneg_mod_256,%function
|
|
.align 5
|
|
cneg_mod_256:
|
|
ldp @a[0],@a[1],[$a_ptr]
|
|
ldp @mod[0],@mod[1],[$n_ptr]
|
|
|
|
ldp @a[2],@a[3],[$a_ptr,#16]
|
|
subs @b[0],@mod[0],@a[0]
|
|
ldp @mod[2],@mod[3],[$n_ptr,#16]
|
|
orr @mod[0],@a[0],@a[1]
|
|
sbcs @b[1],@mod[1],@a[1]
|
|
orr @mod[1],@a[2],@a[3]
|
|
sbcs @b[2],@mod[2],@a[2]
|
|
orr @t[4],@mod[0],@mod[1]
|
|
sbc @b[3],@mod[3],@a[3]
|
|
|
|
cmp @t[4],#0
|
|
csetm @t[4],ne
|
|
ands $b_ptr,$b_ptr,@t[4]
|
|
|
|
csel @a[0],@a[0],@b[0],eq
|
|
csel @a[1],@a[1],@b[1],eq
|
|
csel @a[2],@a[2],@b[2],eq
|
|
stp @a[0],@a[1],[$r_ptr]
|
|
csel @a[3],@a[3],@b[3],eq
|
|
stp @a[2],@a[3],[$r_ptr,#16]
|
|
|
|
ret
|
|
.size cneg_mod_256,.-cneg_mod_256
|
|
|
|
.globl sub_mod_256
|
|
.hidden sub_mod_256
|
|
.type sub_mod_256,%function
|
|
.align 5
|
|
sub_mod_256:
|
|
ldp @a[0],@a[1],[$a_ptr]
|
|
ldp @b[0],@b[1],[$b_ptr]
|
|
|
|
ldp @a[2],@a[3],[$a_ptr,#16]
|
|
subs @a[0],@a[0],@b[0]
|
|
ldp @b[2],@b[3],[$b_ptr,#16]
|
|
sbcs @a[1],@a[1],@b[1]
|
|
ldp @mod[0],@mod[1],[$n_ptr]
|
|
sbcs @a[2],@a[2],@b[2]
|
|
ldp @mod[2],@mod[3],[$n_ptr,#16]
|
|
sbcs @a[3],@a[3],@b[3]
|
|
sbc @t[4],xzr,xzr
|
|
|
|
and @mod[0],@mod[0],@t[4]
|
|
and @mod[1],@mod[1],@t[4]
|
|
adds @a[0],@a[0],@mod[0]
|
|
and @mod[2],@mod[2],@t[4]
|
|
adcs @a[1],@a[1],@mod[1]
|
|
and @mod[3],@mod[3],@t[4]
|
|
adcs @a[2],@a[2],@mod[2]
|
|
stp @a[0],@a[1],[$r_ptr]
|
|
adc @a[3],@a[3],@mod[3]
|
|
stp @a[2],@a[3],[$r_ptr,#16]
|
|
|
|
ret
|
|
.size sub_mod_256,.-sub_mod_256
|
|
|
|
.globl check_mod_256
|
|
.hidden check_mod_256
|
|
.type check_mod_256,%function
|
|
.align 5
|
|
check_mod_256:
|
|
ldp @a[0],@a[1],[$r_ptr]
|
|
ldp @a[2],@a[3],[$r_ptr,#16]
|
|
ldp @mod[0],@mod[1],[$a_ptr]
|
|
ldp @mod[2],@mod[3],[$a_ptr,#16]
|
|
|
|
#ifdef __AARCH64EB__
|
|
rev @a[0],@a[0]
|
|
rev @a[1],@a[1]
|
|
rev @a[2],@a[2]
|
|
rev @a[3],@a[3]
|
|
#endif
|
|
|
|
subs xzr,@a[0],@mod[0]
|
|
sbcs xzr,@a[1],@mod[1]
|
|
orr @a[0],@a[0],@a[1]
|
|
sbcs xzr,@a[2],@mod[2]
|
|
orr @a[0],@a[0],@a[2]
|
|
sbcs xzr,@a[3],@mod[3]
|
|
orr @a[0],@a[0],@a[3]
|
|
sbc $a_ptr,xzr,xzr
|
|
|
|
cmp @a[0],#0
|
|
mov x0,#1
|
|
csel x0,x0,xzr,ne
|
|
and x0,x0,$a_ptr
|
|
|
|
ret
|
|
.size check_mod_256,.-check_mod_256
|
|
|
|
.globl add_n_check_mod_256
|
|
.hidden add_n_check_mod_256
|
|
.type add_n_check_mod_256,%function
|
|
.align 5
|
|
add_n_check_mod_256:
|
|
ldp @a[0],@a[1],[$a_ptr]
|
|
ldp @b[0],@b[1],[$b_ptr]
|
|
ldp @a[2],@a[3],[$a_ptr,#16]
|
|
ldp @b[2],@b[3],[$b_ptr,#16]
|
|
|
|
#ifdef __AARCH64EB__
|
|
rev @a[0],@a[0]
|
|
rev @b[0],@b[0]
|
|
rev @a[1],@a[1]
|
|
rev @b[1],@b[1]
|
|
rev @a[2],@a[2]
|
|
rev @b[2],@b[2]
|
|
rev @a[3],@a[3]
|
|
rev @b[3],@b[3]
|
|
#endif
|
|
|
|
adds @a[0],@a[0],@b[0]
|
|
ldp @mod[0],@mod[1],[$n_ptr]
|
|
adcs @a[1],@a[1],@b[1]
|
|
ldp @mod[2],@mod[3],[$n_ptr,#16]
|
|
adcs @a[2],@a[2],@b[2]
|
|
adcs @a[3],@a[3],@b[3]
|
|
adc @t[4],xzr,xzr
|
|
|
|
subs @t[0],@a[0],@mod[0]
|
|
sbcs @t[1],@a[1],@mod[1]
|
|
sbcs @t[2],@a[2],@mod[2]
|
|
sbcs @t[3],@a[3],@mod[3]
|
|
sbcs xzr,@t[4],xzr
|
|
|
|
csel @a[0],@a[0],@t[0],lo
|
|
csel @a[1],@a[1],@t[1],lo
|
|
csel @a[2],@a[2],@t[2],lo
|
|
csel @a[3],@a[3],@t[3],lo
|
|
|
|
orr @t[0], @a[0], @a[1]
|
|
orr @t[1], @a[2], @a[3]
|
|
orr @t[0], @t[0], @t[1]
|
|
|
|
#ifdef __AARCH64EB__
|
|
rev @a[0],@a[0]
|
|
rev @a[1],@a[1]
|
|
rev @a[2],@a[2]
|
|
rev @a[3],@a[3]
|
|
#endif
|
|
|
|
stp @a[0],@a[1],[$r_ptr]
|
|
stp @a[2],@a[3],[$r_ptr,#16]
|
|
|
|
mov @t[1], #1
|
|
cmp @t[0], #0
|
|
csel x0, @t[1], xzr, ne
|
|
|
|
ret
|
|
.size add_n_check_mod_256,.-add_n_check_mod_256
|
|
|
|
.globl sub_n_check_mod_256
|
|
.hidden sub_n_check_mod_256
|
|
.type sub_n_check_mod_256,%function
|
|
.align 5
|
|
sub_n_check_mod_256:
|
|
ldp @a[0],@a[1],[$a_ptr]
|
|
ldp @b[0],@b[1],[$b_ptr]
|
|
ldp @a[2],@a[3],[$a_ptr,#16]
|
|
ldp @b[2],@b[3],[$b_ptr,#16]
|
|
|
|
#ifdef __AARCH64EB__
|
|
rev @a[0],@a[0]
|
|
rev @b[0],@b[0]
|
|
rev @a[1],@a[1]
|
|
rev @b[1],@b[1]
|
|
rev @a[2],@a[2]
|
|
rev @b[2],@b[2]
|
|
rev @a[3],@a[3]
|
|
rev @b[3],@b[3]
|
|
#endif
|
|
|
|
subs @a[0],@a[0],@b[0]
|
|
sbcs @a[1],@a[1],@b[1]
|
|
ldp @mod[0],@mod[1],[$n_ptr]
|
|
sbcs @a[2],@a[2],@b[2]
|
|
ldp @mod[2],@mod[3],[$n_ptr,#16]
|
|
sbcs @a[3],@a[3],@b[3]
|
|
sbc @t[4],xzr,xzr
|
|
|
|
and @mod[0],@mod[0],@t[4]
|
|
and @mod[1],@mod[1],@t[4]
|
|
adds @a[0],@a[0],@mod[0]
|
|
and @mod[2],@mod[2],@t[4]
|
|
adcs @a[1],@a[1],@mod[1]
|
|
and @mod[3],@mod[3],@t[4]
|
|
adcs @a[2],@a[2],@mod[2]
|
|
adc @a[3],@a[3],@mod[3]
|
|
|
|
orr @t[0], @a[0], @a[1]
|
|
orr @t[1], @a[2], @a[3]
|
|
orr @t[0], @t[0], @t[1]
|
|
|
|
#ifdef __AARCH64EB__
|
|
rev @a[0],@a[0]
|
|
rev @a[1],@a[1]
|
|
rev @a[2],@a[2]
|
|
rev @a[3],@a[3]
|
|
#endif
|
|
|
|
stp @a[0],@a[1],[$r_ptr]
|
|
stp @a[2],@a[3],[$r_ptr,#16]
|
|
|
|
mov @t[1], #1
|
|
cmp @t[0], #0
|
|
csel x0, @t[1], xzr, ne
|
|
|
|
ret
|
|
.size sub_n_check_mod_256,.-sub_n_check_mod_256
|
|
___
|
|
|
|
print $code;
|
|
|
|
close STDOUT;
|