From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/generic/mu_bdiv_qr.c | 312 +++++++++++++++++++++++++++++++++++++ 1 file changed, 312 insertions(+) create mode 100644 gmp-6.3.0/mpn/generic/mu_bdiv_qr.c (limited to 'gmp-6.3.0/mpn/generic/mu_bdiv_qr.c') diff --git a/gmp-6.3.0/mpn/generic/mu_bdiv_qr.c b/gmp-6.3.0/mpn/generic/mu_bdiv_qr.c new file mode 100644 index 0000000..540ad73 --- /dev/null +++ b/gmp-6.3.0/mpn/generic/mu_bdiv_qr.c @@ -0,0 +1,312 @@ +/* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn, + where qn = nn-dn, storing the result in {qp,qn}. Overlap allowed between Q + and N; all other overlap disallowed. + + Contributed to the GNU project by Torbjorn Granlund. + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY + SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. + +Copyright 2005-2007, 2009, 2010, 2012, 2017 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + + +/* + The idea of the algorithm used herein is to compute a smaller inverted value + than used in the standard Barrett algorithm, and thus save time in the + Newton iterations, and pay just a small price when using the inverted value + for developing quotient bits. This algorithm was presented at ICMS 2006. +*/ + +#include "gmp-impl.h" + + +/* N = {np,nn} + D = {dp,dn} + + Requirements: N >= D + D >= 1 + D odd + dn >= 2 + nn >= 2 + scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn). + + Write quotient to Q = {qp,nn-dn}. + + FIXME: When iterating, perhaps do the small step before loop, not after. + FIXME: Try to avoid the scalar divisions when computing inverse size. + FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible. In + particular, when dn==in, tp and rp could use the same space. +*/ +static mp_limb_t +mpn_mu_bdiv_qr_old (mp_ptr qp, + mp_ptr rp, + mp_srcptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn, + mp_ptr scratch) +{ + mp_size_t qn; + mp_size_t in; + mp_limb_t cy, c0; + mp_size_t tn, wn; + + qn = nn - dn; + + ASSERT (dn >= 2); + ASSERT (qn >= 2); + + if (qn > dn) + { + mp_size_t b; + + /* |_______________________| dividend + |________| divisor */ + +#define ip scratch /* in */ +#define tp (scratch + in) /* dn+in or next_size(dn) or rest >= binvert_itch(in) */ +#define scratch_out (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */ + + /* Compute an inverse size that is a nice partition of the quotient. */ + b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */ + in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */ + + /* Some notes on allocation: + + When in = dn, R dies when mpn_mullo returns, if in < dn the low in + limbs of R dies at that point. We could save memory by letting T live + just under R, and let the upper part of T expand into R. These changes + should reduce itch to perhaps 3dn. + */ + + mpn_binvert (ip, dp, in, tp); + + MPN_COPY (rp, np, dn); + np += dn; + cy = 0; + + while (qn > in) + { + mpn_mullo_n (qp, rp, ip, in); + + if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) + mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[dn+in-1...in] */ + else + { + tn = mpn_mulmod_bnm1_next_size (dn); + mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out); + wn = dn + in - tn; /* number of wrapped limbs */ + if (wn > 0) + { + c0 = mpn_sub_n (tp + tn, tp, rp, wn); + mpn_decr_u (tp + wn, c0); + } + } + + qp += in; + qn -= in; + + if (dn != in) + { + /* Subtract tp[dn-1...in] from partial remainder. */ + cy += mpn_sub_n (rp, rp + in, tp + in, dn - in); + if (cy == 2) + { + mpn_incr_u (tp + dn, 1); + cy = 1; + } + } + /* Subtract tp[dn+in-1...dn] from dividend. */ + cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy); + np += in; + } + + /* Generate last qn limbs. */ + mpn_mullo_n (qp, rp, ip, qn); + + if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) + mpn_mul (tp, dp, dn, qp, qn); /* mulhi, need tp[qn+in-1...in] */ + else + { + tn = mpn_mulmod_bnm1_next_size (dn); + mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out); + wn = dn + qn - tn; /* number of wrapped limbs */ + if (wn > 0) + { + c0 = mpn_sub_n (tp + tn, tp, rp, wn); + mpn_decr_u (tp + wn, c0); + } + } + + if (dn != qn) + { + cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn); + if (cy == 2) + { + mpn_incr_u (tp + dn, 1); + cy = 1; + } + } + return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy); + +#undef ip +#undef tp +#undef scratch_out + } + else + { + /* |_______________________| dividend + |________________| divisor */ + +#define ip scratch /* in */ +#define tp (scratch + in) /* dn+in or next_size(dn) or rest >= binvert_itch(in) */ +#define scratch_out (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */ + + /* Compute half-sized inverse. */ + in = qn - (qn >> 1); + + mpn_binvert (ip, dp, in, tp); + + mpn_mullo_n (qp, np, ip, in); /* low `in' quotient limbs */ + + if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) + mpn_mul (tp, dp, dn, qp, in); /* mulhigh */ + else + { + tn = mpn_mulmod_bnm1_next_size (dn); + mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out); + wn = dn + in - tn; /* number of wrapped limbs */ + if (wn > 0) + { + c0 = mpn_sub_n (tp + tn, tp, np, wn); + mpn_decr_u (tp + wn, c0); + } + } + + qp += in; + qn -= in; + + cy = mpn_sub_n (rp, np + in, tp + in, dn); + mpn_mullo_n (qp, rp, ip, qn); /* high qn quotient limbs */ + + if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) + mpn_mul (tp, dp, dn, qp, qn); /* mulhigh */ + else + { + tn = mpn_mulmod_bnm1_next_size (dn); + mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out); + wn = dn + qn - tn; /* number of wrapped limbs */ + if (wn > 0) + { + c0 = mpn_sub_n (tp + tn, tp, rp, wn); + mpn_decr_u (tp + wn, c0); + } + } + + cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn); + if (cy == 2) + { + mpn_incr_u (tp + dn, 1); + cy = 1; + } + return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy); + +#undef ip +#undef tp +#undef scratch_out + } +} + +mp_limb_t +mpn_mu_bdiv_qr (mp_ptr qp, + mp_ptr rp, + mp_srcptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn, + mp_ptr scratch) +{ + mp_limb_t cy = mpn_mu_bdiv_qr_old (qp, rp, np, nn, dp, dn, scratch); + + /* R' B^{qn} = U - Q' D + * + * Q = B^{qn} - Q' (assuming Q' != 0) + * + * R B^{qn} = U + Q D = U + B^{qn} D - Q' D + * = B^{qn} D + R' + */ + + if (UNLIKELY (!mpn_neg (qp, qp, nn - dn))) + { + /* Zero quotient. */ + ASSERT (cy == 0); + return 0; + } + else + { + mp_limb_t cy2 = mpn_add_n (rp, rp, dp, dn); + ASSERT (cy2 >= cy); + + return cy2 - cy; + } +} + + +mp_size_t +mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn) +{ + mp_size_t qn, in, tn, itch_binvert, itch_out, itches; + mp_size_t b; + + ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD); + + qn = nn - dn; + + if (qn > dn) + { + b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */ + in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */ + } + else + { + in = qn - (qn >> 1); + } + + if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) + { + tn = dn + in; + itch_out = 0; + } + else + { + tn = mpn_mulmod_bnm1_next_size (dn); + itch_out = mpn_mulmod_bnm1_itch (tn, dn, in); + } + + itch_binvert = mpn_binvert_itch (in); + itches = tn + itch_out; + return in + MAX (itches, itch_binvert); +} -- cgit v1.2.3