From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/generic/perfsqr.c | 238 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 gmp-6.3.0/mpn/generic/perfsqr.c (limited to 'gmp-6.3.0/mpn/generic/perfsqr.c') diff --git a/gmp-6.3.0/mpn/generic/perfsqr.c b/gmp-6.3.0/mpn/generic/perfsqr.c new file mode 100644 index 0000000..1ea5c84 --- /dev/null +++ b/gmp-6.3.0/mpn/generic/perfsqr.c @@ -0,0 +1,238 @@ +/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square, + zero otherwise. + +Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software +Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include /* for NULL */ +#include "gmp-impl.h" +#include "longlong.h" + +#include "perfsqr.h" + + +/* change this to "#define TRACE(x) x" for diagnostics */ +#define TRACE(x) + + + +/* PERFSQR_MOD_* detects non-squares using residue tests. + + A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h. It takes + {up,usize} modulo a selected modulus to get a remainder r. For 32-bit or + 64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34, + or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP + used. PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this + case too. + + PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or + PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table + data indicating residues and non-residues modulo those divisors. The + table data is in 1 or 2 limbs worth of bits respectively, per the size of + each d. + + A "modexact" style remainder is taken to reduce r modulo d. + PERFSQR_MOD_IDX implements this, producing an index "idx" for use with + the table data. Notice there's just one multiplication by a constant + "inv", for each d. + + The modexact doesn't produce a true r%d remainder, instead idx satisfies + "-(idx<> MOD34_BITS); \ + } while (0) + +/* FIXME: The %= here isn't good, and might destroy any savings from keeping + the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm). + Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor + and a shift count, like mpn_preinv_divrem_1. But mod_34lsub1 is our + normal case, so lets not worry too much about mod_1. */ +#define PERFSQR_MOD_PP(r, up, usize) \ + do { \ + if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD)) \ + { \ + (r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM, \ + PERFSQR_PP_INVERTED); \ + (r) %= PERFSQR_PP; \ + } \ + else \ + { \ + (r) = mpn_mod_1 (up, usize, PERFSQR_PP); \ + } \ + } while (0) + +#define PERFSQR_MOD_IDX(idx, r, d, inv) \ + do { \ + mp_limb_t q; \ + ASSERT ((r) <= PERFSQR_MOD_MASK); \ + ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1); \ + ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK); \ + \ + q = ((r) * (inv)) & PERFSQR_MOD_MASK; \ + ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK)); \ + (idx) = (q * (d)) >> PERFSQR_MOD_BITS; \ + } while (0) + +#define PERFSQR_MOD_1(r, d, inv, mask) \ + do { \ + unsigned idx; \ + ASSERT ((d) <= GMP_LIMB_BITS); \ + PERFSQR_MOD_IDX(idx, r, d, inv); \ + TRACE (printf (" PERFSQR_MOD_1 d=%u r=%lu idx=%u\n", \ + d, r%d, idx)); \ + if ((((mask) >> idx) & 1) == 0) \ + { \ + TRACE (printf (" non-square\n")); \ + return 0; \ + } \ + } while (0) + +/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the + sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch. */ +#define PERFSQR_MOD_2(r, d, inv, mhi, mlo) \ + do { \ + mp_limb_t m; \ + unsigned idx; \ + ASSERT ((d) <= 2*GMP_LIMB_BITS); \ + \ + PERFSQR_MOD_IDX (idx, r, d, inv); \ + TRACE (printf (" PERFSQR_MOD_2 d=%u r=%lu idx=%u\n", \ + d, r%d, idx)); \ + m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi)); \ + idx %= GMP_LIMB_BITS; \ + if (((m >> idx) & 1) == 0) \ + { \ + TRACE (printf (" non-square\n")); \ + return 0; \ + } \ + } while (0) + + +int +mpn_perfect_square_p (mp_srcptr up, mp_size_t usize) +{ + ASSERT (usize >= 1); + + TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize)); + + /* The first test excludes 212/256 (82.8%) of the perfect square candidates + in O(1) time. */ + { + unsigned idx = up[0] % 0x100; + if (((sq_res_0x100[idx / GMP_LIMB_BITS] + >> (idx % GMP_LIMB_BITS)) & 1) == 0) + return 0; + } + +#if 0 + /* Check that we have even multiplicity of 2, and then check that the rest is + a possible perfect square. Leave disabled until we can determine this + really is an improvement. If it is, it could completely replace the + simple probe above, since this should throw out more non-squares, but at + the expense of somewhat more cycles. */ + { + mp_limb_t lo; + int cnt; + lo = up[0]; + while (lo == 0) + up++, lo = up[0], usize--; + count_trailing_zeros (cnt, lo); + if ((cnt & 1) != 0) + return 0; /* return of not even multiplicity of 2 */ + lo >>= cnt; /* shift down to align lowest non-zero bit */ + if ((lo & 6) != 0) + return 0; + } +#endif + + + /* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares + according to their residues modulo small primes (or powers of + primes). See perfsqr.h. */ + PERFSQR_MOD_TEST (up, usize); + + + /* For the third and last test, we finally compute the square root, + to make sure we've really got a perfect square. */ + { + mp_ptr root_ptr; + int res; + TMP_DECL; + + TMP_MARK; + root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2); + + /* Iff mpn_sqrtrem returns zero, the square is perfect. */ + res = ! mpn_sqrtrem (root_ptr, NULL, up, usize); + TMP_FREE; + + return res; + } +} -- cgit v1.2.3