140 lines
4.1 KiB
C
140 lines
4.1 KiB
C
|
/*
|
||
|
* Copyright Supranational LLC
|
||
|
* Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||
|
* SPDX-License-Identifier: Apache-2.0
|
||
|
*/
|
||
|
|
||
|
#include "fields.h"
|
||
|
|
||
|
#ifdef __OPTIMIZE_SIZE__
|
||
|
/*
|
||
|
* 608 multiplications for scalar inversion modulo BLS12-381 prime, 32%
|
||
|
* more than corresponding optimal addition-chain, plus mispredicted
|
||
|
* branch penalties on top of that... The addition chain below was
|
||
|
* measured to be >50% faster.
|
||
|
*/
|
||
|
static void flt_reciprocal_fp(vec384 out, const vec384 inp)
|
||
|
{
|
||
|
static const byte BLS12_381_P_minus_2[] = {
|
||
|
TO_BYTES(0xb9feffffffffaaa9), TO_BYTES(0x1eabfffeb153ffff),
|
||
|
TO_BYTES(0x6730d2a0f6b0f624), TO_BYTES(0x64774b84f38512bf),
|
||
|
TO_BYTES(0x4b1ba7b6434bacd7), TO_BYTES(0x1a0111ea397fe69a)
|
||
|
};
|
||
|
|
||
|
exp_mont_384(out, inp, BLS12_381_P_minus_2, 381, BLS12_381_P, p0);
|
||
|
}
|
||
|
#else
|
||
|
# define sqr(ret,a) sqr_fp(ret,a)
|
||
|
# define mul(ret,a,b) mul_fp(ret,a,b)
|
||
|
# define sqr_n_mul(ret,a,n,b) sqr_n_mul_fp(ret,a,n,b)
|
||
|
|
||
|
# include "recip-addchain.h"
|
||
|
static void flt_reciprocal_fp(vec384 out, const vec384 inp)
|
||
|
{
|
||
|
RECIPROCAL_MOD_BLS12_381_P(out, inp, vec384);
|
||
|
}
|
||
|
# undef RECIPROCAL_MOD_BLS12_381_P
|
||
|
# undef sqr_n_mul
|
||
|
# undef mul
|
||
|
# undef sqr
|
||
|
#endif
|
||
|
|
||
|
static void flt_reciprocal_fp2(vec384x out, const vec384x inp)
|
||
|
{
|
||
|
vec384 t0, t1;
|
||
|
|
||
|
/*
|
||
|
* |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i
|
||
|
*/
|
||
|
sqr_fp(t0, inp[0]);
|
||
|
sqr_fp(t1, inp[1]);
|
||
|
add_fp(t0, t0, t1);
|
||
|
flt_reciprocal_fp(t1, t0);
|
||
|
mul_fp(out[0], inp[0], t1);
|
||
|
mul_fp(out[1], inp[1], t1);
|
||
|
neg_fp(out[1], out[1]);
|
||
|
}
|
||
|
|
||
|
static void reciprocal_fp(vec384 out, const vec384 inp)
|
||
|
{
|
||
|
static const vec384 Px8 = { /* left-aligned value of the modulus */
|
||
|
TO_LIMB_T(0xcff7fffffffd5558), TO_LIMB_T(0xf55ffff58a9ffffd),
|
||
|
TO_LIMB_T(0x39869507b587b120), TO_LIMB_T(0x23ba5c279c2895fb),
|
||
|
TO_LIMB_T(0x58dd3db21a5d66bb), TO_LIMB_T(0xd0088f51cbff34d2)
|
||
|
};
|
||
|
#ifdef __BLST_NO_ASM__
|
||
|
# define RRx4 BLS12_381_RR
|
||
|
#else
|
||
|
static const vec384 RRx4 = { /* (4<<768)%P */
|
||
|
TO_LIMB_T(0x5f7e7cd070d107c2), TO_LIMB_T(0xec839a9ac49c13c8),
|
||
|
TO_LIMB_T(0x6933786f44f4ef0b), TO_LIMB_T(0xd6bf8b9c676be983),
|
||
|
TO_LIMB_T(0xd3adaaaa4dcefb06), TO_LIMB_T(0x12601bc1d82bc175)
|
||
|
};
|
||
|
#endif
|
||
|
union { vec768 x; vec384 r[2]; } temp;
|
||
|
|
||
|
ct_inverse_mod_383(temp.x, inp, BLS12_381_P, Px8);
|
||
|
redc_mont_384(temp.r[0], temp.x, BLS12_381_P, p0);
|
||
|
mul_mont_384(temp.r[0], temp.r[0], RRx4, BLS12_381_P, p0);
|
||
|
|
||
|
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||
|
/* sign goes straight to flt_reciprocal */
|
||
|
mul_mont_384(temp.r[1], temp.r[0], inp, BLS12_381_P, p0);
|
||
|
if (vec_is_equal(temp.r[1], BLS12_381_Rx.p, sizeof(vec384)) |
|
||
|
vec_is_zero(temp.r[1], sizeof(vec384)))
|
||
|
vec_copy(out, temp.r[0], sizeof(vec384));
|
||
|
else
|
||
|
flt_reciprocal_fp(out, inp);
|
||
|
#else
|
||
|
vec_copy(out, temp.r[0], sizeof(vec384));
|
||
|
#endif
|
||
|
#undef RRx4
|
||
|
}
|
||
|
|
||
|
void blst_fp_inverse(vec384 out, const vec384 inp)
|
||
|
{ reciprocal_fp(out, inp); }
|
||
|
|
||
|
void blst_fp_eucl_inverse(vec384 ret, const vec384 a)
|
||
|
{ reciprocal_fp(ret, a); }
|
||
|
|
||
|
static void reciprocal_fp2(vec384x out, const vec384x inp)
|
||
|
{
|
||
|
vec384 t0, t1;
|
||
|
|
||
|
/*
|
||
|
* |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i
|
||
|
*/
|
||
|
sqr_fp(t0, inp[0]);
|
||
|
sqr_fp(t1, inp[1]);
|
||
|
add_fp(t0, t0, t1);
|
||
|
reciprocal_fp(t1, t0);
|
||
|
mul_fp(out[0], inp[0], t1);
|
||
|
mul_fp(out[1], inp[1], t1);
|
||
|
neg_fp(out[1], out[1]);
|
||
|
}
|
||
|
|
||
|
void blst_fp2_inverse(vec384x out, const vec384x inp)
|
||
|
{ reciprocal_fp2(out, inp); }
|
||
|
|
||
|
void blst_fp2_eucl_inverse(vec384x out, const vec384x inp)
|
||
|
{ reciprocal_fp2(out, inp); }
|
||
|
|
||
|
static void reciprocal_fr(vec256 out, const vec256 inp)
|
||
|
{
|
||
|
static const vec256 rx2 = { /* left-aligned value of the modulus */
|
||
|
TO_LIMB_T(0xfffffffe00000002), TO_LIMB_T(0xa77b4805fffcb7fd),
|
||
|
TO_LIMB_T(0x6673b0101343b00a), TO_LIMB_T(0xe7db4ea6533afa90),
|
||
|
};
|
||
|
vec512 temp;
|
||
|
|
||
|
ct_inverse_mod_256(temp, inp, BLS12_381_r, rx2);
|
||
|
redc_mont_256(out, temp, BLS12_381_r, r0);
|
||
|
mul_mont_sparse_256(out, out, BLS12_381_rRR, BLS12_381_r, r0);
|
||
|
}
|
||
|
|
||
|
void blst_fr_inverse(vec256 out, const vec256 inp)
|
||
|
{ reciprocal_fr(out, inp); }
|
||
|
|
||
|
void blst_fr_eucl_inverse(vec256 out, const vec256 inp)
|
||
|
{ reciprocal_fr(out, inp); }
|