From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/generic/mu_div_q.c | 184 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 gmp-6.3.0/mpn/generic/mu_div_q.c (limited to 'gmp-6.3.0/mpn/generic/mu_div_q.c') diff --git a/gmp-6.3.0/mpn/generic/mu_div_q.c b/gmp-6.3.0/mpn/generic/mu_div_q.c new file mode 100644 index 0000000..44cfb40 --- /dev/null +++ b/gmp-6.3.0/mpn/generic/mu_div_q.c @@ -0,0 +1,184 @@ +/* mpn_mu_div_q. + + Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. + + THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY + SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST + GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. + +Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + + +/* + The idea of the algorithm used herein is to compute a smaller inverted value + than used in the standard Barrett algorithm, and thus save time in the + Newton iterations, and pay just a small price when using the inverted value + for developing quotient bits. This algorithm was presented at ICMS 2006. +*/ + +/* + Things to work on: + + 1. This is a rudimentary implementation of mpn_mu_div_q. The algorithm is + probably close to optimal, except when mpn_mu_divappr_q fails. + + 2. We used to fall back to mpn_mu_div_qr when we detect a possible + mpn_mu_divappr_q rounding problem, now we multiply and compare. + Unfortunately, since mpn_mu_divappr_q does not return the partial + remainder, this also doesn't become optimal. A mpn_mu_divappr_qr could + solve that. + + 3. The allocations done here should be made from the scratch area, which + then would need to be amended. +*/ + +#include /* for NULL */ +#include "gmp-impl.h" + + +mp_limb_t +mpn_mu_div_q (mp_ptr qp, + mp_srcptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn, + mp_ptr scratch) +{ + mp_ptr tp, rp; + mp_size_t qn; + mp_limb_t cy, qh; + TMP_DECL; + + TMP_MARK; + + qn = nn - dn; + + tp = TMP_BALLOC_LIMBS (qn + 1); + + if (qn >= dn) /* nn >= 2*dn + 1 */ + { + /* |_______________________| dividend + |________| divisor */ + + rp = TMP_BALLOC_LIMBS (nn + 1); + MPN_COPY (rp + 1, np, nn); + rp[0] = 0; + + qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0; + if (qh != 0) + mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn); + + cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch); + + if (UNLIKELY (cy != 0)) + { + /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was + canonically reduced, replace the returned value of B^(qn-dn)+eps + by the largest possible value. */ + mp_size_t i; + for (i = 0; i < qn + 1; i++) + tp[i] = GMP_NUMB_MAX; + } + + /* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is + smaller than the max error, we cannot trust the quotient. */ + if (tp[0] > 4) + { + MPN_COPY (qp, tp + 1, qn); + } + else + { + mp_limb_t cy; + mp_ptr pp; + + pp = rp; + mpn_mul (pp, tp + 1, qn, dp, dn); + + cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0; + + if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */ + qh -= mpn_sub_1 (qp, tp + 1, qn, 1); + else /* Same as above */ + MPN_COPY (qp, tp + 1, qn); + } + } + else + { + /* |_______________________| dividend + |________________| divisor */ + + /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed + here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only + the most significant dn-1 limbs will actually be read, but it is not + pretty. */ + + qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2, + dp + dn - (qn + 1), qn + 1, scratch); + + /* The max error of mpn_mu_divappr_q is +4, but we get an additional + error from the divisor truncation. */ + if (tp[0] > 6) + { + MPN_COPY (qp, tp + 1, qn); + } + else + { + mp_limb_t cy; + + /* FIXME: a shorter product should be enough; we may use already + allocated space... */ + rp = TMP_BALLOC_LIMBS (nn); + mpn_mul (rp, dp, dn, tp + 1, qn); + + cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0; + + if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */ + qh -= mpn_sub_1 (qp, tp + 1, qn, 1); + else /* Same as above */ + MPN_COPY (qp, tp + 1, qn); + } + } + + TMP_FREE; + return qh; +} + +mp_size_t +mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k) +{ + mp_size_t qn; + + qn = nn - dn; + if (qn >= dn) + { + return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k); + } + else + { + return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k); + } +} -- cgit v1.2.3