From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/generic/brootinv.c | 159 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 gmp-6.3.0/mpn/generic/brootinv.c (limited to 'gmp-6.3.0/mpn/generic/brootinv.c') diff --git a/gmp-6.3.0/mpn/generic/brootinv.c b/gmp-6.3.0/mpn/generic/brootinv.c new file mode 100644 index 0000000..e91b597 --- /dev/null +++ b/gmp-6.3.0/mpn/generic/brootinv.c @@ -0,0 +1,159 @@ +/* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b). + + Contributed to the GNU project by Martin Boij (as part of perfpow.c). + +Copyright 2009, 2010, 2012, 2013, 2018 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include "gmp-impl.h" + +/* Computes a^2e (mod B). Uses right-to-left binary algorithm, since + typical use will have e small. */ +static mp_limb_t +powsquaredlimb (mp_limb_t a, mp_limb_t e) +{ + mp_limb_t r; + + r = 1; + /* if (LIKELY (e != 0)) */ + do { + a *= a; + if (e & 1) + r *= a; + e >>= 1; + } while (e != 0); + + return r; +} + +/* Compute r such that r^k * y = 1 (mod B^n). + + Iterates + r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b) + using Hensel lifting, each time doubling the number of known bits in r. + + Works just for odd k. Else the Hensel lifting degenerates. + + FIXME: + + (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows). + + (2) Rewrite iteration as + r' <-- r - k^{-1} r (r^k y - 1) + and take advantage of the zero low part of r^k y - 1. + + (3) Use wrap-around trick. + + (4) Use a small table to get starting value. + + Scratch need: bn + (((bn + 1) >> 1) + 1) + scratch for mpn_powlo + Currently mpn_powlo requires 3*bn + so that 5*bn is surely enough, where bn = ceil (bnb / GMP_NUMB_BITS). +*/ + +void +mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp) +{ + mp_ptr tp2, tp3; + mp_limb_t kinv, k2, r0, y0; + mp_size_t order[GMP_LIMB_BITS + 1]; + int d; + + ASSERT (bn > 0); + ASSERT ((k & 1) != 0); + + tp2 = tp + bn; + tp3 = tp + bn + ((bn + 3) >> 1); + k2 = (k >> 1) + 1; /* (k + 1) / 2 , but avoid k+1 overflow */ + + binvert_limb (kinv, k); + + /* 4-bit initial approximation: + + y%16 | 1 3 5 7 9 11 13 15, + k%4 +-------------------------+k2%2 + 1 | 1 11 13 7 9 3 5 15 | 1 + 3 | 1 3 5 7 9 11 13 15 | 0 + + */ + y0 = yp[0]; + + r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 3) & 8); /* 4 bits */ + r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3f)); /* 8 bits */ + r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2 & 0x3fff)); /* 16 bits */ +#if GMP_NUMB_BITS > 16 + { + unsigned prec = 16; + do + { + r0 = kinv * (k2 * r0 * 2 - y0 * powsquaredlimb(r0, k2)); + prec *= 2; + } + while (prec < GMP_NUMB_BITS); + } +#endif + + rp[0] = r0; + if (bn == 1) + return; + + d = 0; + for (; bn != 2; bn = (bn + 1) >> 1) + order[d++] = bn; + + order[d] = 2; + bn = 1; + + do + { + mpn_sqr (tp, rp, bn); /* Result may overlap tp2 */ + tp2[bn] = mpn_mul_1 (tp2, rp, bn, k2 << 1); + + bn = order[d]; + + mpn_powlo (rp, tp, &k2, 1, bn, tp3); + mpn_mullo_n (tp, yp, rp, bn); + + /* mpn_sub (tp, tp2, ((bn + 1) >> 1) + 1, tp, bn); */ + /* The function above is not handled, ((bn + 1) >> 1) + 1 <= bn*/ + { + mp_size_t pbn = (bn + 3) >> 1; /* Size of tp2 */ + int borrow; + borrow = mpn_sub_n (tp, tp2, tp, pbn) != 0; + if (bn > pbn) /* 3 < bn */ + { + if (borrow) + mpn_com (tp + pbn, tp + pbn, bn - pbn); + else + mpn_neg (tp + pbn, tp + pbn, bn - pbn); + } + } + mpn_pi1_bdiv_q_1 (rp, tp, bn, k, kinv, 0); + } + while (--d >= 0); +} -- cgit v1.2.3