From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpz/oddfac_1.c | 435 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 435 insertions(+) create mode 100644 gmp-6.3.0/mpz/oddfac_1.c (limited to 'gmp-6.3.0/mpz/oddfac_1.c') diff --git a/gmp-6.3.0/mpz/oddfac_1.c b/gmp-6.3.0/mpz/oddfac_1.c new file mode 100644 index 0000000..3f4c085 --- /dev/null +++ b/gmp-6.3.0/mpz/oddfac_1.c @@ -0,0 +1,435 @@ +/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!. + +Contributed to the GNU project by Marco Bodrato. + +THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. +IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. +IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR +DISAPPEAR IN A FUTURE GNU MP RELEASE. + +Copyright 2010-2012, 2015-2017, 2020, 2021 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#include "gmp-impl.h" +#include "longlong.h" + +/* TODO: + - split this file in smaller parts with functions that can be recycled for different computations. + */ + +/**************************************************************/ +/* Section macros: common macros, for mswing/fac/bin (&sieve) */ +/**************************************************************/ + +#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ + if ((PR) > (MAX_PR)) { \ + (VEC)[(I)++] = (PR); \ + (PR) = 1; \ + } + +#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ + do { \ + if ((PR) > (MAX_PR)) { \ + (VEC)[(I)++] = (PR); \ + (PR) = (P); \ + } else \ + (PR) *= (P); \ + } while (0) + +#define LOOP_ON_SIEVE_CONTINUE(prime,end) \ + __max_i = (end); \ + \ + do { \ + ++__i; \ + if ((*__sieve & __mask) == 0) \ + { \ + mp_limb_t prime; \ + prime = id_to_n(__i) + +#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ + do { \ + mp_limb_t __mask, *__sieve, __max_i, __i; \ + \ + __i = (start)-(off); \ + __sieve = (sieve) + __i / GMP_LIMB_BITS; \ + __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ + __i += (off); \ + \ + LOOP_ON_SIEVE_CONTINUE(prime,end) + +#define LOOP_ON_SIEVE_STOP \ + } \ + __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ + __sieve += __mask & 1; \ + } while (__i <= __max_i) + +#define LOOP_ON_SIEVE_END \ + LOOP_ON_SIEVE_STOP; \ + } while (0) + +/*********************************************************/ +/* Section sieve: sieving functions and tools for primes */ +/*********************************************************/ + +#if WANT_ASSERT +static mp_limb_t +bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } +#endif + +/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ +static mp_limb_t +id_to_n (mp_limb_t id) { return id*3+1+(id&1); } + +/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ +static mp_limb_t +n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } + +#if WANT_ASSERT +static mp_size_t +primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } +#endif + +/*********************************************************/ +/* Section mswing: 2-multiswing factorial */ +/*********************************************************/ + +/* Returns an approximation of the sqare root of x. + * It gives: + * limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2 + * or + * x <= limb_apprsqrt (x) ^ 2 <= x * 9/8 + */ +static mp_limb_t +limb_apprsqrt (mp_limb_t x) +{ + int s; + + ASSERT (x > 2); + count_leading_zeros (s, x); + s = (GMP_LIMB_BITS - s) >> 1; + return ((CNST_LIMB(1) << (s - 1)) + (x >> 1 >> s)); +} + +#if 0 +/* A count-then-exponentiate variant for SWING_A_PRIME */ +#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ + do { \ + mp_limb_t __q, __prime; \ + int __exp; \ + __prime = (P); \ + __exp = 0; \ + __q = (N); \ + do { \ + __q /= __prime; \ + __exp += __q & 1; \ + } while (__q >= __prime); \ + if (__exp) { /* Store $prime^{exp}$ */ \ + for (__q = __prime; --__exp; __q *= __prime); \ + FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \ + }; \ + } while (0) +#else +#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ + do { \ + mp_limb_t __q, __prime; \ + __prime = (P); \ + FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ + __q = (N); \ + do { \ + __q /= __prime; \ + if ((__q & 1) != 0) (PR) *= __prime; \ + } while (__q >= __prime); \ + } while (0) +#endif + +#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ + do { \ + mp_limb_t __prime; \ + __prime = (P); \ + if ((((N) / __prime) & 1) != 0) \ + FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \ + } while (0) + +/* mpz_2multiswing_1 computes the odd part of the 2-multiswing + factorial of the parameter n. The result x is an odd positive + integer so that multiswing(n,2) = x 2^a. + + Uses the algorithm described by Peter Luschny in "Divide, Swing and + Conquer the Factorial!". + + The pointer sieve points to primesieve_size(n) limbs containing a + bit-array where primes are marked as 0. + Enough (FIXME: explain :-) limbs must be pointed by factors. + */ + +static void +mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors) +{ + mp_limb_t prod, max_prod; + mp_size_t j; + + ASSERT (n > 25); + + j = 0; + prod = -(n & 1); + n &= ~ CNST_LIMB(1); /* n-1, if n is odd */ + + prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */ + max_prod = GMP_NUMB_MAX / (n-1); + + /* Handle prime = 3 separately. */ + SWING_A_PRIME (3, n, prod, max_prod, factors, j); + + /* Swing primes from 5 to n/3 */ + { + mp_limb_t s, l_max_prod; + + s = limb_apprsqrt(n); + ASSERT (s >= 5); + s = n_to_bit (s); + ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n); + ASSERT (s < n_to_bit (n / 3)); + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); + SWING_A_PRIME (prime, n, prod, max_prod, factors, j); + LOOP_ON_SIEVE_STOP; + + ASSERT (max_prod <= GMP_NUMB_MAX / 3); + + l_max_prod = max_prod * 3; + + LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n/3)); + SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j); + LOOP_ON_SIEVE_END; + } + + /* Store primes from (n+1)/2 to n */ + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve); + FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); + LOOP_ON_SIEVE_END; + + if (LIKELY (j != 0)) + { + factors[j++] = prod; + mpz_prodlimbs (x, factors, j); + } + else + { + ASSERT (ALLOC (x) > 0); + PTR (x)[0] = prod; + SIZ (x) = 1; + } +} + +#undef SWING_A_PRIME +#undef SH_SWING_A_PRIME +#undef LOOP_ON_SIEVE_END +#undef LOOP_ON_SIEVE_STOP +#undef LOOP_ON_SIEVE_BEGIN +#undef LOOP_ON_SIEVE_CONTINUE +#undef FACTOR_LIST_APPEND + +/*********************************************************/ +/* Section oddfac: odd factorial, needed also by binomial*/ +/*********************************************************/ + +/* FIXME: refine che following estimate. */ + +#if TUNE_PROGRAM_BUILD +#define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD_LIMIT*FAC_DSC_THRESHOLD_LIMIT-1)+1) - 1) +#else +#define FACTORS_PER_LIMB (GMP_NUMB_BITS * 2 / (LOG2C(FAC_DSC_THRESHOLD*FAC_DSC_THRESHOLD-1)+1) - 1) +#endif + +/* mpz_oddfac_1 computes the odd part of the factorial of the + parameter n. I.e. n! = x 2^a, where x is the returned value: an + odd positive integer. + + If flag != 0 a square is skipped in the DSC part, e.g. + if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!. + + If n is too small, flag is ignored, and an ASSERT can be triggered. + + TODO: FAC_DSC_THRESHOLD is used here with two different roles: + - to decide when prime factorisation is needed, + - to stop the recursion, once sieving is done. + Maybe two thresholds can do a better job. + */ +void +mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag) +{ + ASSERT (n <= GMP_NUMB_MAX); + ASSERT (flag == 0 || (flag == 1 && n > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1 && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD))); + + if (n <= ODD_FACTORIAL_TABLE_LIMIT) + { + MPZ_NEWALLOC (x, 1)[0] = __gmp_oddfac_table[n]; + SIZ (x) = 1; + } + else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1) + { + mp_ptr px; + + px = MPZ_NEWALLOC (x, 2); + umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]); + SIZ (x) = 2; + } + else + { + unsigned s; + mp_ptr factors; + + s = 0; + { + mp_limb_t tn; + mp_limb_t prod, max_prod; + mp_size_t j; + TMP_SDECL; + +#if TUNE_PROGRAM_BUILD + ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD); + ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2)); +#endif + + /* Compute the number of recursive steps for the DSC algorithm. */ + for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++) + tn >>= 1; + + j = 0; + + TMP_SMARK; + factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB); + ASSERT (tn >= FACTORS_PER_LIMB); + + prod = 1; +#if TUNE_PROGRAM_BUILD + max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD_LIMIT * FAC_DSC_THRESHOLD_LIMIT); +#else + max_prod = GMP_NUMB_MAX / (FAC_DSC_THRESHOLD * FAC_DSC_THRESHOLD); +#endif + + ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1); + do { + factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX; + mp_limb_t diff = (tn - ODD_DOUBLEFACTORIAL_TABLE_LIMIT) & -CNST_LIMB (2); + if ((diff & 2) != 0) + { + FACTOR_LIST_STORE (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff, prod, max_prod, factors, j); + diff -= 2; + } + if (diff != 0) + { + mp_limb_t fac = (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2) * + (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + diff); + do { + FACTOR_LIST_STORE (fac, prod, max_prod, factors, j); + diff -= 4; + fac += diff * 2; + } while (diff != 0); + } + max_prod <<= 2; + tn >>= 1; + } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1); + + factors[j++] = prod; + factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1]; + factors[j++] = __gmp_oddfac_table[tn >> 1]; + mpz_prodlimbs (x, factors, j); + + TMP_SFREE; + } + + if (s != 0) + /* Use the algorithm described by Peter Luschny in "Divide, + Swing and Conquer the Factorial!". + + Improvement: there are two temporary buffers, factors and + square, that are never used together; with a good estimate + of the maximal needed size, they could share a single + allocation. + */ + { + mpz_t mswing; + mp_ptr sieve; + mp_size_t size; + TMP_DECL; + + TMP_MARK; + + flag--; + size = n / GMP_NUMB_BITS + 4; + ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1)); + /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS); + one more can be overwritten by mul, another for the sieve */ + MPZ_TMP_INIT (mswing, size); + /* Initialize size, so that ASSERT can check it correctly. */ + ASSERT_CODE (SIZ (mswing) = 0); + + /* Put the sieve on the second half, it will be overwritten by the last mswing. */ + sieve = PTR (mswing) + size / 2 + 1; + + size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1; + + factors = TMP_ALLOC_LIMBS (size); + do { + mp_ptr square, px; + mp_size_t nx, ns; + mp_limb_t cy; + TMP_DECL; + + s--; + ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */ + mpz_2multiswing_1 (mswing, n >> s, sieve, factors); + + TMP_MARK; + nx = SIZ (x); + if (s == flag) { + size = nx; + square = TMP_ALLOC_LIMBS (size); + MPN_COPY (square, PTR (x), nx); + } else { + size = nx << 1; + square = TMP_ALLOC_LIMBS (size); + mpn_sqr (square, PTR (x), nx); + size -= (square[size - 1] == 0); + } + ns = SIZ (mswing); + nx = size + ns; + px = MPZ_NEWALLOC (x, nx); + ASSERT (ns <= size); + cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */ + + SIZ(x) = nx - (cy == 0); + TMP_FREE; + } while (s != 0); + TMP_FREE; + } + } +} + +#undef FACTORS_PER_LIMB +#undef FACTOR_LIST_STORE -- cgit v1.2.3