From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/generic/broot.c | 195 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 gmp-6.3.0/mpn/generic/broot.c (limited to 'gmp-6.3.0/mpn/generic/broot.c') diff --git a/gmp-6.3.0/mpn/generic/broot.c b/gmp-6.3.0/mpn/generic/broot.c new file mode 100644 index 0000000..02fe75a --- /dev/null +++ b/gmp-6.3.0/mpn/generic/broot.c @@ -0,0 +1,195 @@ +/* mpn_broot -- Compute hensel sqrt + + Contributed to the GNU project by Niels Möller + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY + SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. + +Copyright 2012 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include "gmp-impl.h" + +/* Computes a^e (mod B). Uses right-to-left binary algorithm, since + typical use will have e small. */ +static mp_limb_t +powlimb (mp_limb_t a, mp_limb_t e) +{ + mp_limb_t r = 1; + mp_limb_t s = a; + + for (r = 1, s = a; e > 0; e >>= 1, s *= s) + if (e & 1) + r *= s; + + return r; +} + +/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd. + + Iterates + + r' <-- r - r * (a^{k-1} r^k - 1) / n + + If + + a^{k-1} r^k = 1 (mod 2^m), + + then + + a^{k-1} r'^k = 1 (mod 2^{2m}), + + Compute the update term as + + r' = r - (a^{k-1} r^{k+1} - r) / k + + where we still have cancellation of low limbs. + + */ +void +mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k) +{ + mp_size_t sizes[GMP_LIMB_BITS * 2]; + mp_ptr akm1, tp, rnp, ep; + mp_limb_t a0, r0, km1, kp1h, kinv; + mp_size_t rn; + unsigned i; + + TMP_DECL; + + ASSERT (n > 0); + ASSERT (ap[0] & 1); + ASSERT (k & 1); + ASSERT (k >= 3); + + TMP_MARK; + + akm1 = TMP_ALLOC_LIMBS (4*n); + tp = akm1 + n; + + km1 = k-1; + /* FIXME: Could arrange the iteration so we don't need to compute + this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note + that we can use wraparound also for a*r, since the low half is + unchanged from the previous iteration. Or possibly mulmid. Also, + a r = a^{1/k}, so we get that value too, for free? */ + mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */ + + a0 = ap[0]; + binvert_limb (kinv, k); + + /* 4 bits: a^{1/k - 1} (mod 16): + + a % 8 + 1 3 5 7 + k%4 +------- + 1 |1 1 1 1 + 3 |1 9 9 1 + */ + r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8); + r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */ + r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */ + r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */ +#if GMP_NUMB_BITS > 32 + { + unsigned prec = 32; + do + { + r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); + prec *= 2; + } + while (prec < GMP_NUMB_BITS); + } +#endif + + rp[0] = r0; + if (n == 1) + { + TMP_FREE; + return; + } + + /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */ + kp1h = k/2 + 1; + + /* FIXME: Special case for two limb iteration. */ + rnp = TMP_ALLOC_LIMBS (2*n + 1); + ep = rnp + n; + + /* FIXME: Possible to this on the fly with some bit fiddling. */ + for (i = 0; n > 1; n = (n + 1)/2) + sizes[i++] = n; + + rn = 1; + + while (i-- > 0) + { + /* Compute x^{k+1}. */ + mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the + final iteration. */ + mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp); + + /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */ + + mpn_mullo_n (ep, rnp, akm1, sizes[i]); + ASSERT (mpn_cmp (ep, rp, rn) == 0); + + ASSERT (sizes[i] <= 2*rn); + mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0); + mpn_neg (rp + rn, rp + rn, sizes[i] - rn); + rn = sizes[i]; + } + TMP_FREE; +} + +/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */ +void +mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k) +{ + mp_ptr tp; + TMP_DECL; + + ASSERT (n > 0); + ASSERT (ap[0] & 1); + ASSERT (k & 1); + + if (k == 1) + { + MPN_COPY (rp, ap, n); + return; + } + + TMP_MARK; + tp = TMP_ALLOC_LIMBS (n); + + mpn_broot_invm1 (tp, ap, n, k); + mpn_mullo_n (rp, tp, ap, n); + + TMP_FREE; +} -- cgit v1.2.3