From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/generic/gcd.c | 266 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 gmp-6.3.0/mpn/generic/gcd.c (limited to 'gmp-6.3.0/mpn/generic/gcd.c') diff --git a/gmp-6.3.0/mpn/generic/gcd.c b/gmp-6.3.0/mpn/generic/gcd.c new file mode 100644 index 0000000..3f92cbf --- /dev/null +++ b/gmp-6.3.0/mpn/generic/gcd.c @@ -0,0 +1,266 @@ +/* mpn/gcd.c: mpn_gcd for gcd of two odd integers. + +Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include "gmp-impl.h" +#include "longlong.h" + +/* Uses the HGCD operation described in + + N. Möller, On Schönhage's algorithm and subquadratic integer gcd + computation, Math. Comp. 77 (2008), 589-607. + + to reduce inputs until they are of size below GCD_DC_THRESHOLD, and + then uses Lehmer's algorithm. +*/ + +/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n + + * 2)/3, which gives a balanced multiplication in + * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better + * performance. The matrix-vector multiplication is then + * 4:1-unbalanced, with matrix elements of size n/6, and vector + * elements of size p = 2n/3. */ + +/* From analysis of the theoretical running time, it appears that when + * multiplication takes time O(n^alpha), p should be chosen so that + * the ratio of the time for the mpn_hgcd call, and the time for the + * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha - + * 1). */ +#ifdef TUNE_GCD_P +#define P_TABLE_SIZE 10000 +mp_size_t p_table[P_TABLE_SIZE]; +#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3) +#else +#define CHOOSE_P(n) (2*(n) / 3) +#endif + +struct gcd_ctx +{ + mp_ptr gp; + mp_size_t gn; +}; + +static void +gcd_hook (void *p, mp_srcptr gp, mp_size_t gn, + mp_srcptr qp, mp_size_t qn, int d) +{ + struct gcd_ctx *ctx = (struct gcd_ctx *) p; + MPN_COPY (ctx->gp, gp, gn); + ctx->gn = gn; +} + +mp_size_t +mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n) +{ + mp_size_t talloc; + mp_size_t scratch; + mp_size_t matrix_scratch; + + struct gcd_ctx ctx; + mp_ptr tp; + TMP_DECL; + + ASSERT (usize >= n); + ASSERT (n > 0); + ASSERT (vp[n-1] > 0); + + /* FIXME: Check for small sizes first, before setting up temporary + storage etc. */ + talloc = MPN_GCD_SUBDIV_STEP_ITCH(n); + + /* For initial division */ + scratch = usize - n + 1; + if (scratch > talloc) + talloc = scratch; + +#if TUNE_GCD_P + if (CHOOSE_P (n) > 0) +#else + if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) +#endif + { + mp_size_t hgcd_scratch; + mp_size_t update_scratch; + mp_size_t p = CHOOSE_P (n); + mp_size_t scratch; +#if TUNE_GCD_P + /* Worst case, since we don't guarantee that n - CHOOSE_P(n) + is increasing */ + matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n); + hgcd_scratch = mpn_hgcd_itch (n); + update_scratch = 2*(n - 1); +#else + matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); + hgcd_scratch = mpn_hgcd_itch (n - p); + update_scratch = p + n - 1; +#endif + scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); + if (scratch > talloc) + talloc = scratch; + } + + TMP_MARK; + tp = TMP_ALLOC_LIMBS(talloc); + + if (usize > n) + { + mpn_tdiv_qr (tp, up, 0, up, usize, vp, n); + + if (mpn_zero_p (up, n)) + { + MPN_COPY (gp, vp, n); + ctx.gn = n; + goto done; + } + } + + ctx.gp = gp; + +#if TUNE_GCD_P + while (CHOOSE_P (n) > 0) +#else + while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) +#endif + { + struct hgcd_matrix M; + mp_size_t p = CHOOSE_P (n); + mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); + mp_size_t nn; + mpn_hgcd_matrix_init (&M, n - p, tp); + nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch); + if (nn > 0) + { + ASSERT (M.n <= (n - p - 1)/2); + ASSERT (M.n + p <= (p + n - 1) / 2); + /* Temporary storage 2 (p + M->n) <= p + n - 1. */ + n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch); + } + else + { + /* Temporary storage n */ + n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp); + if (n == 0) + goto done; + } + } + + while (n > 2) + { + struct hgcd_matrix1 M; + mp_limb_t uh, ul, vh, vl; + mp_limb_t mask; + + mask = up[n-1] | vp[n-1]; + ASSERT (mask > 0); + + if (mask & GMP_NUMB_HIGHBIT) + { + uh = up[n-1]; ul = up[n-2]; + vh = vp[n-1]; vl = vp[n-2]; + } + else + { + int shift; + + count_leading_zeros (shift, mask); + uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]); + ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]); + vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]); + vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]); + } + + /* Try an mpn_hgcd2 step */ + if (mpn_hgcd2 (uh, ul, vh, vl, &M)) + { + n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n); + MP_PTR_SWAP (up, tp); + } + else + { + /* mpn_hgcd2 has failed. Then either one of a or b is very + small, or the difference is very small. Perform one + subtraction followed by one division. */ + + /* Temporary storage n */ + n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp); + if (n == 0) + goto done; + } + } + + ASSERT(up[n-1] | vp[n-1]); + + /* Due to the calling convention for mpn_gcd, at most one can be even. */ + if ((up[0] & 1) == 0) + MP_PTR_SWAP (up, vp); + ASSERT ((up[0] & 1) != 0); + + { + mp_limb_t u0, u1, v0, v1; + mp_double_limb_t g; + + u0 = up[0]; + v0 = vp[0]; + + if (n == 1) + { + int cnt; + count_trailing_zeros (cnt, v0); + *gp = mpn_gcd_11 (u0, v0 >> cnt); + ctx.gn = 1; + goto done; + } + + v1 = vp[1]; + if (UNLIKELY (v0 == 0)) + { + v0 = v1; + v1 = 0; + /* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could + when this situation occurs internally. */ + } + if ((v0 & 1) == 0) + { + int cnt; + count_trailing_zeros (cnt, v0); + v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt); + v1 >>= cnt; + } + + u1 = up[1]; + g = mpn_gcd_22 (u1, u0, v1, v0); + gp[0] = g.d0; + gp[1] = g.d1; + ctx.gn = 1 + (g.d1 > 0); + } +done: + TMP_FREE; + return ctx.gn; +} -- cgit v1.2.3