From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001
From: Duncan Wilkie <antigravityd@gmail.com>
Date: Sat, 18 Nov 2023 06:11:09 -0600
Subject: Initial commit.

---
 gmp-6.3.0/mpn/generic/mu_div_q.c | 184 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 184 insertions(+)
 create mode 100644 gmp-6.3.0/mpn/generic/mu_div_q.c

(limited to 'gmp-6.3.0/mpn/generic/mu_div_q.c')

diff --git a/gmp-6.3.0/mpn/generic/mu_div_q.c b/gmp-6.3.0/mpn/generic/mu_div_q.c
new file mode 100644
index 0000000..44cfb40
--- /dev/null
+++ b/gmp-6.3.0/mpn/generic/mu_div_q.c
@@ -0,0 +1,184 @@
+/* mpn_mu_div_q.
+
+   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+
+/*
+   The idea of the algorithm used herein is to compute a smaller inverted value
+   than used in the standard Barrett algorithm, and thus save time in the
+   Newton iterations, and pay just a small price when using the inverted value
+   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+*/
+
+/*
+  Things to work on:
+
+  1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
+     probably close to optimal, except when mpn_mu_divappr_q fails.
+
+  2. We used to fall back to mpn_mu_div_qr when we detect a possible
+     mpn_mu_divappr_q rounding problem, now we multiply and compare.
+     Unfortunately, since mpn_mu_divappr_q does not return the partial
+     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
+     solve that.
+
+  3. The allocations done here should be made from the scratch area, which
+     then would need to be amended.
+*/
+
+#include <stdlib.h>		/* for NULL */
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_mu_div_q (mp_ptr qp,
+	      mp_srcptr np, mp_size_t nn,
+	      mp_srcptr dp, mp_size_t dn,
+	      mp_ptr scratch)
+{
+  mp_ptr tp, rp;
+  mp_size_t qn;
+  mp_limb_t cy, qh;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  qn = nn - dn;
+
+  tp = TMP_BALLOC_LIMBS (qn + 1);
+
+  if (qn >= dn)			/* nn >= 2*dn + 1 */
+    {
+       /* |_______________________|   dividend
+			 |________|   divisor  */
+
+      rp = TMP_BALLOC_LIMBS (nn + 1);
+      MPN_COPY (rp + 1, np, nn);
+      rp[0] = 0;
+
+      qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
+      if (qh != 0)
+	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
+
+      cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
+
+      if (UNLIKELY (cy != 0))
+	{
+	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
+	     canonically reduced, replace the returned value of B^(qn-dn)+eps
+	     by the largest possible value.  */
+	  mp_size_t i;
+	  for (i = 0; i < qn + 1; i++)
+	    tp[i] = GMP_NUMB_MAX;
+	}
+
+      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
+	 smaller than the max error, we cannot trust the quotient.  */
+      if (tp[0] > 4)
+	{
+	  MPN_COPY (qp, tp + 1, qn);
+	}
+      else
+	{
+	  mp_limb_t cy;
+	  mp_ptr pp;
+
+	  pp = rp;
+	  mpn_mul (pp, tp + 1, qn, dp, dn);
+
+	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
+
+	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+	  else /* Same as above */
+	    MPN_COPY (qp, tp + 1, qn);
+	}
+    }
+  else
+    {
+       /* |_______________________|   dividend
+		 |________________|   divisor  */
+
+      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
+	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
+	 the most significant dn-1 limbs will actually be read, but it is not
+	 pretty.  */
+
+      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
+			     dp + dn - (qn + 1), qn + 1, scratch);
+
+      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
+         error from the divisor truncation.  */
+      if (tp[0] > 6)
+	{
+	  MPN_COPY (qp, tp + 1, qn);
+	}
+      else
+	{
+	  mp_limb_t cy;
+
+	  /* FIXME: a shorter product should be enough; we may use already
+	     allocated space... */
+	  rp = TMP_BALLOC_LIMBS (nn);
+	  mpn_mul (rp, dp, dn, tp + 1, qn);
+
+	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
+
+	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
+	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
+	  else /* Same as above */
+	    MPN_COPY (qp, tp + 1, qn);
+	}
+    }
+
+  TMP_FREE;
+  return qh;
+}
+
+mp_size_t
+mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
+{
+  mp_size_t qn;
+
+  qn = nn - dn;
+  if (qn >= dn)
+    {
+      return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
+    }
+  else
+    {
+      return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
+    }
+}
-- 
cgit v1.2.3