From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/generic/hgcd2.c | 283 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 gmp-6.3.0/mpn/generic/hgcd2.c (limited to 'gmp-6.3.0/mpn/generic/hgcd2.c') diff --git a/gmp-6.3.0/mpn/generic/hgcd2.c b/gmp-6.3.0/mpn/generic/hgcd2.c new file mode 100644 index 0000000..43d4d48 --- /dev/null +++ b/gmp-6.3.0/mpn/generic/hgcd2.c @@ -0,0 +1,283 @@ +/* hgcd2.c + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY + SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 1996, 1998, 2000-2004, 2008, 2012, 2019 Free Software Foundation, +Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include "gmp-impl.h" +#include "longlong.h" + +#include "mpn/generic/hgcd2-div.h" + +#if GMP_NAIL_BITS != 0 +#error Nails not implemented +#endif + +/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs + matrix M. Returns 1 if we make progress, i.e. can perform at least + one subtraction. Otherwise returns zero. */ + +/* FIXME: Possible optimizations: + + The div2 function starts with checking the most significant bit of + the numerator. We can maintained normalized operands here, call + hgcd with normalized operands only, which should make the code + simpler and possibly faster. + + Experiment with table lookups on the most significant bits. + + This function is also a candidate for assembler implementation. +*/ +int +mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, + struct hgcd_matrix1 *M) +{ + mp_limb_t u00, u01, u10, u11; + + if (ah < 2 || bh < 2) + return 0; + + if (ah > bh || (ah == bh && al > bl)) + { + sub_ddmmss (ah, al, ah, al, bh, bl); + if (ah < 2) + return 0; + + u00 = u01 = u11 = 1; + u10 = 0; + } + else + { + sub_ddmmss (bh, bl, bh, bl, ah, al); + if (bh < 2) + return 0; + + u00 = u10 = u11 = 1; + u01 = 0; + } + + if (ah < bh) + goto subtract_a; + + for (;;) + { + ASSERT (ah >= bh); + if (ah == bh) + goto done; + + if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) + { + ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); + bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); + + break; + } + + /* Subtract a -= q b, and multiply M from the right by (1 q ; 0 + 1), affecting the second column of M. */ + ASSERT (ah > bh); + sub_ddmmss (ah, al, ah, al, bh, bl); + + if (ah < 2) + goto done; + + if (ah <= bh) + { + /* Use q = 1 */ + u01 += u00; + u11 += u10; + } + else + { + mp_limb_t r[2]; + mp_limb_t q = div2 (r, ah, al, bh, bl); + al = r[0]; ah = r[1]; + if (ah < 2) + { + /* A is too small, but q is correct. */ + u01 += q * u00; + u11 += q * u10; + goto done; + } + q++; + u01 += q * u00; + u11 += q * u10; + } + subtract_a: + ASSERT (bh >= ah); + if (ah == bh) + goto done; + + if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) + { + ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); + bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); + + goto subtract_a1; + } + + /* Subtract b -= q a, and multiply M from the right by (1 0 ; q + 1), affecting the first column of M. */ + sub_ddmmss (bh, bl, bh, bl, ah, al); + + if (bh < 2) + goto done; + + if (bh <= ah) + { + /* Use q = 1 */ + u00 += u01; + u10 += u11; + } + else + { + mp_limb_t r[2]; + mp_limb_t q = div2 (r, bh, bl, ah, al); + bl = r[0]; bh = r[1]; + if (bh < 2) + { + /* B is too small, but q is correct. */ + u00 += q * u01; + u10 += q * u11; + goto done; + } + q++; + u00 += q * u01; + u10 += q * u11; + } + } + + /* NOTE: Since we discard the least significant half limb, we don't get a + truly maximal M (corresponding to |a - b| < 2^{GMP_LIMB_BITS +1}). */ + /* Single precision loop */ + for (;;) + { + ASSERT (ah >= bh); + + ah -= bh; + if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) + break; + + if (ah <= bh) + { + /* Use q = 1 */ + u01 += u00; + u11 += u10; + } + else + { + mp_double_limb_t rq = div1 (ah, bh); + mp_limb_t q = rq.d1; + ah = rq.d0; + + if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) + { + /* A is too small, but q is correct. */ + u01 += q * u00; + u11 += q * u10; + break; + } + q++; + u01 += q * u00; + u11 += q * u10; + } + subtract_a1: + ASSERT (bh >= ah); + + bh -= ah; + if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) + break; + + if (bh <= ah) + { + /* Use q = 1 */ + u00 += u01; + u10 += u11; + } + else + { + mp_double_limb_t rq = div1 (bh, ah); + mp_limb_t q = rq.d1; + bh = rq.d0; + + if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) + { + /* B is too small, but q is correct. */ + u00 += q * u01; + u10 += q * u11; + break; + } + q++; + u00 += q * u01; + u10 += q * u11; + } + } + + done: + M->u[0][0] = u00; M->u[0][1] = u01; + M->u[1][0] = u10; M->u[1][1] = u11; + + return 1; +} + +/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must + * have space for n + 1 limbs. Uses three buffers to avoid a copy*/ +mp_size_t +mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M, + mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n) +{ + mp_limb_t ah, bh; + + /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as + + r = u00 * a + r += u10 * b + b *= u11 + b += u01 * a + */ + +#if HAVE_NATIVE_mpn_addaddmul_1msb0 + ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]); + bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]); +#else + ah = mpn_mul_1 (rp, ap, n, M->u[0][0]); + ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]); + + bh = mpn_mul_1 (bp, bp, n, M->u[1][1]); + bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]); +#endif + rp[n] = ah; + bp[n] = bh; + + n += (ah | bh) > 0; + return n; +} -- cgit v1.2.3