From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/generic/compute_powtab.c | 373 +++++++++++++++++++++++++++++++++ 1 file changed, 373 insertions(+) create mode 100644 gmp-6.3.0/mpn/generic/compute_powtab.c (limited to 'gmp-6.3.0/mpn/generic/compute_powtab.c') diff --git a/gmp-6.3.0/mpn/generic/compute_powtab.c b/gmp-6.3.0/mpn/generic/compute_powtab.c new file mode 100644 index 0000000..f4fbc64 --- /dev/null +++ b/gmp-6.3.0/mpn/generic/compute_powtab.c @@ -0,0 +1,373 @@ +/* mpn_compute_powtab. + + Contributed to the GNU project by Torbjorn Granlund. + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY + SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 1991-2017 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +/* + CAVEATS: + * The exptab and powtab vectors are in opposite orders. Probably OK. + * Consider getting rid of exptab, doing bit ops on the un argument instead. + * Consider rounding greatest power slightly upwards to save adjustments. + * In powtab_decide, consider computing cost from just the 2-3 largest + operands, since smaller operand contribute little. This makes most sense + if exptab is suppressed. +*/ + +#include "gmp-impl.h" + +#ifndef DIV_1_VS_MUL_1_PERCENT +#define DIV_1_VS_MUL_1_PERCENT 150 +#endif + +#define SET_powers_t(dest, ptr, size, dib, b, sh) \ + do { \ + dest.p = ptr; \ + dest.n = size; \ + dest.digits_in_base = dib; \ + dest.base = b; \ + dest.shift = sh; \ + } while (0) + +#if DIV_1_VS_MUL_1_PERCENT > 120 +#define HAVE_mpn_compute_powtab_mul 1 +static void +mpn_compute_powtab_mul (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, + int base, const size_t *exptab, size_t n_pows) +{ + mp_size_t n; + mp_ptr p, t; + mp_limb_t cy; + long start_idx; + int c; + + mp_limb_t big_base = mp_bases[base].big_base; + int chars_per_limb = mp_bases[base].chars_per_limb; + + mp_ptr powtab_mem_ptr = powtab_mem; + + size_t digits_in_base = chars_per_limb; + + powers_t *pt = powtab; + + p = powtab_mem_ptr; + powtab_mem_ptr += 1; + p[0] = big_base; + + SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); + pt++; + + t = powtab_mem_ptr; + powtab_mem_ptr += 2; + t[1] = mpn_mul_1 (t, p, 1, big_base); + n = 2; + + digits_in_base *= 2; + + c = t[0] == 0; + t += c; + n -= c; + mp_size_t shift = c; + + SET_powers_t (pt[0], t, n, digits_in_base, base, shift); + p = t; + pt++; + + if (exptab[0] == ((size_t) chars_per_limb << n_pows)) + { + start_idx = n_pows - 2; + } + else + { + if (((digits_in_base + chars_per_limb) << (n_pows-2)) <= exptab[0]) + { + /* 3, sometimes adjusted to 4. */ + t = powtab_mem_ptr; + powtab_mem_ptr += 4; + t[n] = cy = mpn_mul_1 (t, p, n, big_base); + n += cy != 0;; + + digits_in_base += chars_per_limb; + + c = t[0] == 0; + t += c; + n -= c; + shift += c; + } + else + { + /* 2 copy, will always become 3 with back-multiplication. */ + t = powtab_mem_ptr; + powtab_mem_ptr += 3; + t[0] = p[0]; + t[1] = p[1]; + } + + SET_powers_t (pt[0], t, n, digits_in_base, base, shift); + p = t; + pt++; + start_idx = n_pows - 3; + } + + for (long pi = start_idx; pi >= 0; pi--) + { + t = powtab_mem_ptr; + powtab_mem_ptr += 2 * n + 2; + + ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); + + mpn_sqr (t, p, n); + + digits_in_base *= 2; + n *= 2; + n -= t[n - 1] == 0; + shift *= 2; + + c = t[0] == 0; + t += c; + n -= c; + shift += c; + + /* Adjust new value if it is too small as input to the next squaring. */ + if (((digits_in_base + chars_per_limb) << pi) <= exptab[0]) + { + t[n] = cy = mpn_mul_1 (t, t, n, big_base); + n += cy != 0; + + digits_in_base += chars_per_limb; + + c = t[0] == 0; + t += c; + n -= c; + shift += c; + } + + SET_powers_t (pt[0], t, n, digits_in_base, base, shift); + + /* Adjust previous value if it is not at its target power. */ + if (pt[-1].digits_in_base < exptab[pi + 1]) + { + mp_size_t n = pt[-1].n; + mp_ptr p = pt[-1].p; + p[n] = cy = mpn_mul_1 (p, p, n, big_base); + n += cy != 0; + + ASSERT (pt[-1].digits_in_base + chars_per_limb == exptab[pi + 1]); + pt[-1].digits_in_base = exptab[pi + 1]; + + c = p[0] == 0; + pt[-1].p = p + c; + pt[-1].n = n - c; + pt[-1].shift += c; + } + + p = t; + pt++; + } +} +#endif + +#if DIV_1_VS_MUL_1_PERCENT < 275 +#define HAVE_mpn_compute_powtab_div 1 +static void +mpn_compute_powtab_div (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, + int base, const size_t *exptab, size_t n_pows) +{ + mp_ptr p, t; + + mp_limb_t big_base = mp_bases[base].big_base; + int chars_per_limb = mp_bases[base].chars_per_limb; + + mp_ptr powtab_mem_ptr = powtab_mem; + + size_t digits_in_base = chars_per_limb; + + powers_t *pt = powtab; + + p = powtab_mem_ptr; + powtab_mem_ptr += 1; + p[0] = big_base; + + SET_powers_t (pt[0], p, 1, digits_in_base, base, 0); + pt++; + + mp_size_t n = 1; + mp_size_t shift = 0; + for (long pi = n_pows - 1; pi >= 0; pi--) + { + t = powtab_mem_ptr; + powtab_mem_ptr += 2 * n; + + ASSERT (powtab_mem_ptr < powtab_mem + mpn_str_powtab_alloc (un)); + + mpn_sqr (t, p, n); + n = 2 * n - 1; n += t[n] != 0; + digits_in_base *= 2; + + if (digits_in_base != exptab[pi]) /* if ((((un - 1) >> pi) & 2) == 0) */ + { +#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1 + if (__GMP_LIKELY (base == 10)) + mpn_pi1_bdiv_q_1 (t, t, n, big_base >> MP_BASES_BIG_BASE_CTZ_10, + MP_BASES_BIG_BASE_BINVERTED_10, + MP_BASES_BIG_BASE_CTZ_10); + else +#endif + /* FIXME: We could use _pi1 here if we add big_base_binverted and + big_base_ctz fields to struct bases. That would add about 2 KiB + to mp_bases.c. + FIXME: Use mpn_bdiv_q_1 here when mpn_divexact_1 is converted to + mpn_bdiv_q_1 for more machines. */ + mpn_divexact_1 (t, t, n, big_base); + + n -= t[n - 1] == 0; + digits_in_base -= chars_per_limb; + } + + shift *= 2; + /* Strip low zero limbs, but be careful to keep the result divisible by + big_base. */ + while (t[0] == 0 && (t[1] & ((big_base & -big_base) - 1)) == 0) + { + t++; + n--; + shift++; + } + p = t; + + SET_powers_t (pt[0], p, n, digits_in_base, base, shift); + pt++; + } + + /* Strip any remaining low zero limbs. */ + pt -= n_pows + 1; + for (long pi = n_pows; pi >= 0; pi--) + { + mp_ptr t = pt[pi].p; + mp_size_t shift = pt[pi].shift; + mp_size_t n = pt[pi].n; + int c; + c = t[0] == 0; + t += c; + n -= c; + shift += c; + pt[pi].p = t; + pt[pi].shift = shift; + pt[pi].n = n; + } +} +#endif + +static long +powtab_decide (size_t *exptab, size_t un, int base) +{ + int chars_per_limb = mp_bases[base].chars_per_limb; + long n_pows = 0; + for (size_t pn = (un + 1) >> 1; pn != 1; pn = (pn + 1) >> 1) + { + exptab[n_pows] = pn * chars_per_limb; + n_pows++; + } + exptab[n_pows] = chars_per_limb; + +#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div + size_t pn = un - 1; + size_t xn = (un + 1) >> 1; + unsigned mcost = 1; + unsigned dcost = 1; + for (long i = n_pows - 2; i >= 0; i--) + { + size_t pow = (pn >> (i + 1)) + 1; + + if (pow & 1) + dcost += pow; + + if (xn != (pow << i)) + { + if (pow > 2 && (pow & 1) == 0) + mcost += 2 * pow; + else + mcost += pow; + } + else + { + if (pow & 1) + mcost += pow; + } + } + + dcost = dcost * DIV_1_VS_MUL_1_PERCENT / 100; + + if (mcost <= dcost) + return n_pows; + else + return -n_pows; +#elif HAVE_mpn_compute_powtab_mul + return n_pows; +#elif HAVE_mpn_compute_powtab_div + return -n_pows; +#else +#error "no powtab function available" +#endif +} + +size_t +mpn_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base) +{ + size_t exptab[GMP_LIMB_BITS]; + + long n_pows = powtab_decide (exptab, un, base); + +#if HAVE_mpn_compute_powtab_mul && HAVE_mpn_compute_powtab_div + if (n_pows >= 0) + { + mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); + return n_pows; + } + else + { + mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); + return -n_pows; + } +#elif HAVE_mpn_compute_powtab_mul + ASSERT (n_pows > 0); + mpn_compute_powtab_mul (powtab, powtab_mem, un, base, exptab, n_pows); + return n_pows; +#elif HAVE_mpn_compute_powtab_div + ASSERT (n_pows < 0); + mpn_compute_powtab_div (powtab, powtab_mem, un, base, exptab, -n_pows); + return -n_pows; +#else +#error "no powtab function available" +#endif +} -- cgit v1.2.3