From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/tests/refmpn.c | 2630 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2630 insertions(+) create mode 100644 gmp-6.3.0/tests/refmpn.c (limited to 'gmp-6.3.0/tests/refmpn.c') diff --git a/gmp-6.3.0/tests/refmpn.c b/gmp-6.3.0/tests/refmpn.c new file mode 100644 index 0000000..42bb411 --- /dev/null +++ b/gmp-6.3.0/tests/refmpn.c @@ -0,0 +1,2630 @@ +/* Reference mpn functions, designed to be simple, portable and independent + of the normal gmp code. Speed isn't a consideration. + +Copyright 1996-2009, 2011-2014 Free Software Foundation, Inc. + +This file is part of the GNU MP Library test suite. + +The GNU MP Library test suite is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 3 of the License, +or (at your option) any later version. + +The GNU MP Library test suite is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +Public License for more details. + +You should have received a copy of the GNU General Public License along with +the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */ + + +/* Most routines have assertions representing what the mpn routines are + supposed to accept. Many of these reference routines do sensible things + outside these ranges (eg. for size==0), but the assertions are present to + pick up bad parameters passed here that are about to be passed the same + to a real mpn routine being compared. */ + +/* always do assertion checking */ +#define WANT_ASSERT 1 + +#include /* for NULL */ +#include /* for malloc */ + +#include "gmp-impl.h" +#include "longlong.h" + +#include "tests.h" + + + +/* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes + in bytes. */ +int +byte_overlap_p (const void *v_xp, mp_size_t xsize, + const void *v_yp, mp_size_t ysize) +{ + const char *xp = (const char *) v_xp; + const char *yp = (const char *) v_yp; + + ASSERT (xsize >= 0); + ASSERT (ysize >= 0); + + /* no wraparounds */ + ASSERT (xp+xsize >= xp); + ASSERT (yp+ysize >= yp); + + if (xp + xsize <= yp) + return 0; + + if (yp + ysize <= xp) + return 0; + + return 1; +} + +/* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */ +int +refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) +{ + return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES, + yp, ysize * GMP_LIMB_BYTES); +} + +/* Check overlap for a routine defined to work low to high. */ +int +refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) +{ + return (dst <= src || ! refmpn_overlap_p (dst, size, src, size)); +} + +/* Check overlap for a routine defined to work high to low. */ +int +refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) +{ + return (dst >= src || ! refmpn_overlap_p (dst, size, src, size)); +} + +/* Check overlap for a standard routine requiring equal or separate. */ +int +refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) +{ + return (dst == src || ! refmpn_overlap_p (dst, size, src, size)); +} +int +refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2, + mp_size_t size) +{ + return (refmpn_overlap_fullonly_p (dst, src1, size) + && refmpn_overlap_fullonly_p (dst, src2, size)); +} + + +mp_ptr +refmpn_malloc_limbs (mp_size_t size) +{ + mp_ptr p; + ASSERT (size >= 0); + if (size == 0) + size = 1; + p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES)); + ASSERT (p != NULL); + return p; +} + +/* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free + * memory allocated by refmpn_malloc_limbs_aligned. */ +void +refmpn_free_limbs (mp_ptr p) +{ + free (p); +} + +mp_ptr +refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size) +{ + mp_ptr p; + p = refmpn_malloc_limbs (size); + refmpn_copyi (p, ptr, size); + return p; +} + +/* malloc n limbs on a multiple of m bytes boundary */ +mp_ptr +refmpn_malloc_limbs_aligned (mp_size_t n, size_t m) +{ + return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m); +} + + +void +refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value) +{ + mp_size_t i; + ASSERT (size >= 0); + for (i = 0; i < size; i++) + ptr[i] = value; +} + +void +refmpn_zero (mp_ptr ptr, mp_size_t size) +{ + refmpn_fill (ptr, size, CNST_LIMB(0)); +} + +void +refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize) +{ + ASSERT (newsize >= oldsize); + refmpn_zero (ptr+oldsize, newsize-oldsize); +} + +int +refmpn_zero_p (mp_srcptr ptr, mp_size_t size) +{ + mp_size_t i; + for (i = 0; i < size; i++) + if (ptr[i] != 0) + return 0; + return 1; +} + +mp_size_t +refmpn_normalize (mp_srcptr ptr, mp_size_t size) +{ + ASSERT (size >= 0); + while (size > 0 && ptr[size-1] == 0) + size--; + return size; +} + +/* the highest one bit in x */ +mp_limb_t +refmpn_msbone (mp_limb_t x) +{ + mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1); + + while (n != 0) + { + if (x & n) + break; + n >>= 1; + } + return n; +} + +/* a mask of the highest one bit plus and all bits below */ +mp_limb_t +refmpn_msbone_mask (mp_limb_t x) +{ + if (x == 0) + return 0; + + return (refmpn_msbone (x) << 1) - 1; +} + +/* How many digits in the given base will fit in a limb. + Notice that the product b is allowed to be equal to the limit + 2^GMP_NUMB_BITS, this ensures the result for base==2 will be + GMP_NUMB_BITS (and similarly other powers of 2). */ +int +refmpn_chars_per_limb (int base) +{ + mp_limb_t limit[2], b[2]; + int chars_per_limb; + + ASSERT (base >= 2); + + limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */ + limit[1] = 1; + b[0] = 1; /* b = 1 */ + b[1] = 0; + + chars_per_limb = 0; + for (;;) + { + if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base)) + break; + if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0) + break; + chars_per_limb++; + } + return chars_per_limb; +} + +/* The biggest value base**n which fits in GMP_NUMB_BITS. */ +mp_limb_t +refmpn_big_base (int base) +{ + int chars_per_limb = refmpn_chars_per_limb (base); + int i; + mp_limb_t bb; + + ASSERT (base >= 2); + bb = 1; + for (i = 0; i < chars_per_limb; i++) + bb *= base; + return bb; +} + + +void +refmpn_setbit (mp_ptr ptr, unsigned long bit) +{ + ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS); +} + +void +refmpn_clrbit (mp_ptr ptr, unsigned long bit) +{ + ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS)); +} + +#define REFMPN_TSTBIT(ptr,bit) \ + (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0) + +int +refmpn_tstbit (mp_srcptr ptr, unsigned long bit) +{ + return REFMPN_TSTBIT (ptr, bit); +} + +unsigned long +refmpn_scan0 (mp_srcptr ptr, unsigned long bit) +{ + while (REFMPN_TSTBIT (ptr, bit) != 0) + bit++; + return bit; +} + +unsigned long +refmpn_scan1 (mp_srcptr ptr, unsigned long bit) +{ + while (REFMPN_TSTBIT (ptr, bit) == 0) + bit++; + return bit; +} + +void +refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size) +{ + ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); + refmpn_copyi (rp, sp, size); +} + +void +refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size) +{ + mp_size_t i; + + ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); + ASSERT (size >= 0); + + for (i = 0; i < size; i++) + rp[i] = sp[i]; +} + +void +refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size) +{ + mp_size_t i; + + ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); + ASSERT (size >= 0); + + for (i = size-1; i >= 0; i--) + rp[i] = sp[i]; +} + +/* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low + zeros to wsize. If x is longer, then copy just the high wsize limbs. */ +void +refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize) +{ + ASSERT (wsize >= 0); + ASSERT (xsize >= 0); + + /* high part of x if x bigger than w */ + if (xsize > wsize) + { + xp += xsize - wsize; + xsize = wsize; + } + + refmpn_copy (wp + wsize-xsize, xp, xsize); + refmpn_zero (wp, wsize-xsize); +} + +int +refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size) +{ + mp_size_t i; + + ASSERT (size >= 1); + ASSERT_MPN (xp, size); + ASSERT_MPN (yp, size); + + for (i = size-1; i >= 0; i--) + { + if (xp[i] > yp[i]) return 1; + if (xp[i] < yp[i]) return -1; + } + return 0; +} + +int +refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size) +{ + if (size == 0) + return 0; + else + return refmpn_cmp (xp, yp, size); +} + +int +refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize, + mp_srcptr yp, mp_size_t ysize) +{ + int opp, cmp; + + ASSERT_MPN (xp, xsize); + ASSERT_MPN (yp, ysize); + + opp = (xsize < ysize); + if (opp) + MPN_SRCPTR_SWAP (xp,xsize, yp,ysize); + + if (! refmpn_zero_p (xp+ysize, xsize-ysize)) + cmp = 1; + else + cmp = refmpn_cmp (xp, yp, ysize); + + return (opp ? -cmp : cmp); +} + +int +refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size) +{ + mp_size_t i; + ASSERT (size >= 0); + + for (i = 0; i < size; i++) + if (xp[i] != yp[i]) + return 0; + return 1; +} + + +#define LOGOPS(operation) \ + { \ + mp_size_t i; \ + \ + ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ + ASSERT (size >= 1); \ + ASSERT_MPN (s1p, size); \ + ASSERT_MPN (s2p, size); \ + \ + for (i = 0; i < size; i++) \ + rp[i] = operation; \ + } + +void +refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + LOGOPS (s1p[i] & s2p[i]); +} +void +refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + LOGOPS (s1p[i] & ~s2p[i]); +} +void +refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK); +} +void +refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + LOGOPS (s1p[i] | s2p[i]); +} +void +refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK)); +} +void +refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK); +} +void +refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + LOGOPS (s1p[i] ^ s2p[i]); +} +void +refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK); +} + + +/* set *dh,*dl to mh:ml - sh:sl, in full limbs */ +void +refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl, + mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl) +{ + *dl = ml - sl; + *dh = mh - sh - (ml < sl); +} + + +/* set *w to x+y, return 0 or 1 carry */ +mp_limb_t +ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) +{ + mp_limb_t sum, cy; + + ASSERT_LIMB (x); + ASSERT_LIMB (y); + + sum = x + y; +#if GMP_NAIL_BITS == 0 + *w = sum; + cy = (sum < x); +#else + *w = sum & GMP_NUMB_MASK; + cy = (sum >> GMP_NUMB_BITS); +#endif + return cy; +} + +/* set *w to x-y, return 0 or 1 borrow */ +mp_limb_t +ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) +{ + mp_limb_t diff, cy; + + ASSERT_LIMB (x); + ASSERT_LIMB (y); + + diff = x - y; +#if GMP_NAIL_BITS == 0 + *w = diff; + cy = (diff > x); +#else + *w = diff & GMP_NUMB_MASK; + cy = (diff >> GMP_NUMB_BITS) & 1; +#endif + return cy; +} + +/* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */ +mp_limb_t +adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) +{ + mp_limb_t r; + + ASSERT_LIMB (x); + ASSERT_LIMB (y); + ASSERT (c == 0 || c == 1); + + r = ref_addc_limb (w, x, y); + return r + ref_addc_limb (w, *w, c); +} + +/* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */ +mp_limb_t +sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) +{ + mp_limb_t r; + + ASSERT_LIMB (x); + ASSERT_LIMB (y); + ASSERT (c == 0 || c == 1); + + r = ref_subc_limb (w, x, y); + return r + ref_subc_limb (w, *w, c); +} + + +#define AORS_1(operation) \ + { \ + mp_size_t i; \ + \ + ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ + ASSERT (size >= 1); \ + ASSERT_MPN (sp, size); \ + ASSERT_LIMB (n); \ + \ + for (i = 0; i < size; i++) \ + n = operation (&rp[i], sp[i], n); \ + return n; \ + } + +mp_limb_t +refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) +{ + AORS_1 (ref_addc_limb); +} +mp_limb_t +refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) +{ + AORS_1 (ref_subc_limb); +} + +#define AORS_NC(operation) \ + { \ + mp_size_t i; \ + \ + ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ + ASSERT (carry == 0 || carry == 1); \ + ASSERT (size >= 1); \ + ASSERT_MPN (s1p, size); \ + ASSERT_MPN (s2p, size); \ + \ + for (i = 0; i < size; i++) \ + carry = operation (&rp[i], s1p[i], s2p[i], carry); \ + return carry; \ + } + +mp_limb_t +refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, + mp_limb_t carry) +{ + AORS_NC (adc); +} +mp_limb_t +refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, + mp_limb_t carry) +{ + AORS_NC (sbb); +} + + +mp_limb_t +refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0)); +} +mp_limb_t +refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0)); +} + +mp_limb_t +refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + if (cnd != 0) + return refmpn_add_n (rp, s1p, s2p, size); + else + { + refmpn_copyi (rp, s1p, size); + return 0; + } +} +mp_limb_t +refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + if (cnd != 0) + return refmpn_sub_n (rp, s1p, s2p, size); + else + { + refmpn_copyi (rp, s1p, size); + return 0; + } +} + + +#define AORS_ERR1_N(operation) \ + { \ + mp_size_t i; \ + mp_limb_t carry2; \ + \ + ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ + ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ + ASSERT (! refmpn_overlap_p (rp, size, yp, size)); \ + ASSERT (! refmpn_overlap_p (ep, 2, s1p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 2, s2p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 2, yp, size)); \ + ASSERT (! refmpn_overlap_p (ep, 2, rp, size)); \ + \ + ASSERT (carry == 0 || carry == 1); \ + ASSERT (size >= 1); \ + ASSERT_MPN (s1p, size); \ + ASSERT_MPN (s2p, size); \ + ASSERT_MPN (yp, size); \ + \ + ep[0] = ep[1] = CNST_LIMB(0); \ + \ + for (i = 0; i < size; i++) \ + { \ + carry = operation (&rp[i], s1p[i], s2p[i], carry); \ + if (carry == 1) \ + { \ + carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]); \ + carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ + ASSERT (carry2 == 0); \ + } \ + } \ + return carry; \ + } + +mp_limb_t +refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, + mp_ptr ep, mp_srcptr yp, + mp_size_t size, mp_limb_t carry) +{ + AORS_ERR1_N (adc); +} +mp_limb_t +refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, + mp_ptr ep, mp_srcptr yp, + mp_size_t size, mp_limb_t carry) +{ + AORS_ERR1_N (sbb); +} + + +#define AORS_ERR2_N(operation) \ + { \ + mp_size_t i; \ + mp_limb_t carry2; \ + \ + ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ + ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ + ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ + ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 4, s1p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 4, s2p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 4, y1p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 4, y2p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 4, rp, size)); \ + \ + ASSERT (carry == 0 || carry == 1); \ + ASSERT (size >= 1); \ + ASSERT_MPN (s1p, size); \ + ASSERT_MPN (s2p, size); \ + ASSERT_MPN (y1p, size); \ + ASSERT_MPN (y2p, size); \ + \ + ep[0] = ep[1] = CNST_LIMB(0); \ + ep[2] = ep[3] = CNST_LIMB(0); \ + \ + for (i = 0; i < size; i++) \ + { \ + carry = operation (&rp[i], s1p[i], s2p[i], carry); \ + if (carry == 1) \ + { \ + carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ + carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ + ASSERT (carry2 == 0); \ + carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ + carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ + ASSERT (carry2 == 0); \ + } \ + } \ + return carry; \ + } + +mp_limb_t +refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, + mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, + mp_size_t size, mp_limb_t carry) +{ + AORS_ERR2_N (adc); +} +mp_limb_t +refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, + mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, + mp_size_t size, mp_limb_t carry) +{ + AORS_ERR2_N (sbb); +} + + +#define AORS_ERR3_N(operation) \ + { \ + mp_size_t i; \ + mp_limb_t carry2; \ + \ + ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ + ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ + ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ + ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ + ASSERT (! refmpn_overlap_p (rp, size, y3p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 6, s1p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 6, s2p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 6, y1p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 6, y2p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 6, y3p, size)); \ + ASSERT (! refmpn_overlap_p (ep, 6, rp, size)); \ + \ + ASSERT (carry == 0 || carry == 1); \ + ASSERT (size >= 1); \ + ASSERT_MPN (s1p, size); \ + ASSERT_MPN (s2p, size); \ + ASSERT_MPN (y1p, size); \ + ASSERT_MPN (y2p, size); \ + ASSERT_MPN (y3p, size); \ + \ + ep[0] = ep[1] = CNST_LIMB(0); \ + ep[2] = ep[3] = CNST_LIMB(0); \ + ep[4] = ep[5] = CNST_LIMB(0); \ + \ + for (i = 0; i < size; i++) \ + { \ + carry = operation (&rp[i], s1p[i], s2p[i], carry); \ + if (carry == 1) \ + { \ + carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ + carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ + ASSERT (carry2 == 0); \ + carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ + carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ + ASSERT (carry2 == 0); \ + carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]); \ + carry2 = ref_addc_limb (&ep[5], ep[5], carry2); \ + ASSERT (carry2 == 0); \ + } \ + } \ + return carry; \ + } + +mp_limb_t +refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, + mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, + mp_size_t size, mp_limb_t carry) +{ + AORS_ERR3_N (adc); +} +mp_limb_t +refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, + mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, + mp_size_t size, mp_limb_t carry) +{ + AORS_ERR3_N (sbb); +} + + +mp_limb_t +refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, + mp_size_t n, unsigned int s) +{ + mp_limb_t cy; + mp_ptr tp; + + ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); + ASSERT (n >= 1); + ASSERT (0 < s && s < GMP_NUMB_BITS); + ASSERT_MPN (up, n); + ASSERT_MPN (vp, n); + + tp = refmpn_malloc_limbs (n); + cy = refmpn_lshift (tp, vp, n, s); + cy += refmpn_add_n (rp, up, tp, n); + free (tp); + return cy; +} +mp_limb_t +refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + return refmpn_addlsh_n (rp, up, vp, n, 1); +} +mp_limb_t +refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + return refmpn_addlsh_n (rp, up, vp, n, 2); +} +mp_limb_t +refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) +{ + return refmpn_addlsh_n (rp, rp, vp, n, s); +} +mp_limb_t +refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) +{ + return refmpn_addlsh_n (rp, rp, vp, n, 1); +} +mp_limb_t +refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) +{ + return refmpn_addlsh_n (rp, rp, vp, n, 2); +} +mp_limb_t +refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) +{ + return refmpn_addlsh_n (rp, vp, rp, n, s); +} +mp_limb_t +refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) +{ + return refmpn_addlsh_n (rp, vp, rp, n, 1); +} +mp_limb_t +refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) +{ + return refmpn_addlsh_n (rp, vp, rp, n, 2); +} +mp_limb_t +refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, + mp_size_t n, unsigned int s, mp_limb_t carry) +{ + mp_limb_t cy; + + ASSERT (carry <= (CNST_LIMB(1) << s)); + + cy = refmpn_addlsh_n (rp, up, vp, n, s); + cy += refmpn_add_1 (rp, rp, n, carry); + return cy; +} +mp_limb_t +refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) +{ + return refmpn_addlsh_nc (rp, up, vp, n, 1, carry); +} +mp_limb_t +refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) +{ + return refmpn_addlsh_nc (rp, up, vp, n, 2, carry); +} + +mp_limb_t +refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, + mp_size_t n, unsigned int s) +{ + mp_limb_t cy; + mp_ptr tp; + + ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); + ASSERT (n >= 1); + ASSERT (0 < s && s < GMP_NUMB_BITS); + ASSERT_MPN (up, n); + ASSERT_MPN (vp, n); + + tp = refmpn_malloc_limbs (n); + cy = mpn_lshift (tp, vp, n, s); + cy += mpn_sub_n (rp, up, tp, n); + free (tp); + return cy; +} +mp_limb_t +refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + return refmpn_sublsh_n (rp, up, vp, n, 1); +} +mp_limb_t +refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + return refmpn_sublsh_n (rp, up, vp, n, 2); +} +mp_limb_t +refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) +{ + return refmpn_sublsh_n (rp, rp, vp, n, s); +} +mp_limb_t +refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) +{ + return refmpn_sublsh_n (rp, rp, vp, n, 1); +} +mp_limb_t +refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) +{ + return refmpn_sublsh_n (rp, rp, vp, n, 2); +} +mp_limb_t +refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) +{ + return refmpn_sublsh_n (rp, vp, rp, n, s); +} +mp_limb_t +refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) +{ + return refmpn_sublsh_n (rp, vp, rp, n, 1); +} +mp_limb_t +refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) +{ + return refmpn_sublsh_n (rp, vp, rp, n, 2); +} +mp_limb_t +refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, + mp_size_t n, unsigned int s, mp_limb_t carry) +{ + mp_limb_t cy; + + ASSERT (carry <= (CNST_LIMB(1) << s)); + + cy = refmpn_sublsh_n (rp, up, vp, n, s); + cy += refmpn_sub_1 (rp, rp, n, carry); + return cy; +} +mp_limb_t +refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) +{ + return refmpn_sublsh_nc (rp, up, vp, n, 1, carry); +} +mp_limb_t +refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) +{ + return refmpn_sublsh_nc (rp, up, vp, n, 2, carry); +} + +mp_limb_signed_t +refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, + mp_size_t n, unsigned int s) +{ + mp_limb_signed_t cy; + mp_ptr tp; + + ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); + ASSERT (n >= 1); + ASSERT (0 < s && s < GMP_NUMB_BITS); + ASSERT_MPN (up, n); + ASSERT_MPN (vp, n); + + tp = refmpn_malloc_limbs (n); + cy = mpn_lshift (tp, vp, n, s); + cy -= mpn_sub_n (rp, tp, up, n); + free (tp); + return cy; +} +mp_limb_signed_t +refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + return refmpn_rsblsh_n (rp, up, vp, n, 1); +} +mp_limb_signed_t +refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + return refmpn_rsblsh_n (rp, up, vp, n, 2); +} +mp_limb_signed_t +refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, + mp_size_t n, unsigned int s, mp_limb_signed_t carry) +{ + mp_limb_signed_t cy; + + ASSERT (carry == -1 || (carry >> s) == 0); + + cy = refmpn_rsblsh_n (rp, up, vp, n, s); + if (carry > 0) + cy += refmpn_add_1 (rp, rp, n, carry); + else + cy -= refmpn_sub_1 (rp, rp, n, -carry); + return cy; +} +mp_limb_signed_t +refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) +{ + return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry); +} +mp_limb_signed_t +refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) +{ + return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry); +} + +mp_limb_t +refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + mp_limb_t cya, cys; + + ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); + ASSERT (n >= 1); + ASSERT_MPN (up, n); + ASSERT_MPN (vp, n); + + cya = mpn_add_n (rp, up, vp, n); + cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); + rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); + return cys; +} +mp_limb_t +refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + mp_limb_t cya, cys; + + ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); + ASSERT (n >= 1); + ASSERT_MPN (up, n); + ASSERT_MPN (vp, n); + + cya = mpn_sub_n (rp, up, vp, n); + cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); + rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); + return cys; +} + +/* Twos complement, return borrow. */ +mp_limb_t +refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size) +{ + mp_ptr zeros; + mp_limb_t ret; + + ASSERT (size >= 1); + + zeros = refmpn_malloc_limbs (size); + refmpn_fill (zeros, size, CNST_LIMB(0)); + ret = refmpn_sub_n (dst, zeros, src, size); + free (zeros); + return ret; +} + + +#define AORS(aors_n, aors_1) \ + { \ + mp_limb_t c; \ + ASSERT (s1size >= s2size); \ + ASSERT (s2size >= 1); \ + c = aors_n (rp, s1p, s2p, s2size); \ + if (s1size-s2size != 0) \ + c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \ + return c; \ + } +mp_limb_t +refmpn_add (mp_ptr rp, + mp_srcptr s1p, mp_size_t s1size, + mp_srcptr s2p, mp_size_t s2size) +{ + AORS (refmpn_add_n, refmpn_add_1); +} +mp_limb_t +refmpn_sub (mp_ptr rp, + mp_srcptr s1p, mp_size_t s1size, + mp_srcptr s2p, mp_size_t s2size) +{ + AORS (refmpn_sub_n, refmpn_sub_1); +} + + +#define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2) +#define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2) + +#define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1) +#define HIGHMASK SHIFTHIGH(LOWMASK) + +#define LOWPART(x) ((x) & LOWMASK) +#define HIGHPART(x) SHIFTLOW((x) & HIGHMASK) + +/* Set return:*lo to x*y, using full limbs not nails. */ +mp_limb_t +refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y) +{ + mp_limb_t hi, s; + + *lo = LOWPART(x) * LOWPART(y); + hi = HIGHPART(x) * HIGHPART(y); + + s = LOWPART(x) * HIGHPART(y); + hi += HIGHPART(s); + s = SHIFTHIGH(LOWPART(s)); + *lo += s; + hi += (*lo < s); + + s = HIGHPART(x) * LOWPART(y); + hi += HIGHPART(s); + s = SHIFTHIGH(LOWPART(s)); + *lo += s; + hi += (*lo < s); + + return hi; +} + +mp_limb_t +refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo) +{ + return refmpn_umul_ppmm (lo, x, y); +} + +mp_limb_t +refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier, + mp_limb_t carry) +{ + mp_size_t i; + mp_limb_t hi, lo; + + ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); + ASSERT (size >= 1); + ASSERT_MPN (sp, size); + ASSERT_LIMB (multiplier); + ASSERT_LIMB (carry); + + multiplier <<= GMP_NAIL_BITS; + for (i = 0; i < size; i++) + { + hi = refmpn_umul_ppmm (&lo, sp[i], multiplier); + lo >>= GMP_NAIL_BITS; + ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry))); + rp[i] = lo; + carry = hi; + } + return carry; +} + +mp_limb_t +refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) +{ + return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); +} + + +mp_limb_t +refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, + mp_srcptr mult, mp_size_t msize) +{ + mp_ptr src_copy; + mp_limb_t ret; + mp_size_t i; + + ASSERT (refmpn_overlap_fullonly_p (dst, src, size)); + ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); + ASSERT (size >= msize); + ASSERT_MPN (mult, msize); + + /* in case dst==src */ + src_copy = refmpn_malloc_limbs (size); + refmpn_copyi (src_copy, src, size); + src = src_copy; + + dst[size] = refmpn_mul_1 (dst, src, size, mult[0]); + for (i = 1; i < msize-1; i++) + dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); + ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); + + free (src_copy); + return ret; +} + +mp_limb_t +refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2); +} +mp_limb_t +refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3); +} +mp_limb_t +refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4); +} +mp_limb_t +refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5); +} +mp_limb_t +refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6); +} + +#define AORSMUL_1C(operation_n) \ + { \ + mp_ptr p; \ + mp_limb_t ret; \ + \ + ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ + \ + p = refmpn_malloc_limbs (size); \ + ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \ + ret += operation_n (rp, rp, p, size); \ + \ + free (p); \ + return ret; \ + } + +mp_limb_t +refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, + mp_limb_t multiplier, mp_limb_t carry) +{ + AORSMUL_1C (refmpn_add_n); +} +mp_limb_t +refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, + mp_limb_t multiplier, mp_limb_t carry) +{ + AORSMUL_1C (refmpn_sub_n); +} + + +mp_limb_t +refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) +{ + return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); +} +mp_limb_t +refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) +{ + return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); +} + + +mp_limb_t +refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, + mp_srcptr mult, mp_size_t msize) +{ + mp_ptr src_copy; + mp_limb_t ret; + mp_size_t i; + + ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size)); + ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); + ASSERT (size >= msize); + ASSERT_MPN (mult, msize); + + /* in case dst==src */ + src_copy = refmpn_malloc_limbs (size); + refmpn_copyi (src_copy, src, size); + src = src_copy; + + for (i = 0; i < msize-1; i++) + dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); + ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); + + free (src_copy); + return ret; +} + +mp_limb_t +refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2); +} +mp_limb_t +refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3); +} +mp_limb_t +refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4); +} +mp_limb_t +refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5); +} +mp_limb_t +refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6); +} +mp_limb_t +refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7); +} +mp_limb_t +refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) +{ + return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8); +} + +mp_limb_t +refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p, + mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, + mp_limb_t carry) +{ + mp_ptr p; + mp_limb_t acy, scy; + + /* Destinations can't overlap. */ + ASSERT (! refmpn_overlap_p (r1p, size, r2p, size)); + ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size)); + ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size)); + ASSERT (size >= 1); + + /* in case r1p==s1p or r1p==s2p */ + p = refmpn_malloc_limbs (size); + + acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1); + scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1); + refmpn_copyi (r1p, p, size); + + free (p); + return 2 * acy + scy; +} + +mp_limb_t +refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, + mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0)); +} + + +/* Right shift hi,lo and return the low limb of the result. + Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ +mp_limb_t +rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) +{ + ASSERT (shift < GMP_NUMB_BITS); + if (shift == 0) + return lo; + else + return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK; +} + +/* Left shift hi,lo and return the high limb of the result. + Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ +mp_limb_t +lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) +{ + ASSERT (shift < GMP_NUMB_BITS); + if (shift == 0) + return hi; + else + return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK; +} + + +mp_limb_t +refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) +{ + mp_limb_t ret; + mp_size_t i; + + ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); + ASSERT (size >= 1); + ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); + ASSERT_MPN (sp, size); + + ret = rshift_make (sp[0], CNST_LIMB(0), shift); + + for (i = 0; i < size-1; i++) + rp[i] = rshift_make (sp[i+1], sp[i], shift); + + rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift); + return ret; +} + +mp_limb_t +refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) +{ + mp_limb_t ret; + mp_size_t i; + + ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); + ASSERT (size >= 1); + ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); + ASSERT_MPN (sp, size); + + ret = lshift_make (CNST_LIMB(0), sp[size-1], shift); + + for (i = size-2; i >= 0; i--) + rp[i+1] = lshift_make (sp[i+1], sp[i], shift); + + rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift); + return ret; +} + +void +refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size) +{ + mp_size_t i; + + /* We work downwards since mpn_lshiftc needs that. */ + ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); + + for (i = size - 1; i >= 0; i--) + rp[i] = (~sp[i]) & GMP_NUMB_MASK; +} + +mp_limb_t +refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) +{ + mp_limb_t res; + + /* No asserts here, refmpn_lshift will assert what we need. */ + + res = refmpn_lshift (rp, sp, size, shift); + refmpn_com (rp, rp, size); + return res; +} + +/* accepting shift==0 and doing a plain copyi or copyd in that case */ +mp_limb_t +refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) +{ + if (shift == 0) + { + refmpn_copyi (rp, sp, size); + return 0; + } + else + { + return refmpn_rshift (rp, sp, size, shift); + } +} +mp_limb_t +refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) +{ + if (shift == 0) + { + refmpn_copyd (rp, sp, size); + return 0; + } + else + { + return refmpn_lshift (rp, sp, size, shift); + } +} + +/* accepting size==0 too */ +mp_limb_t +refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, + unsigned shift) +{ + return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift)); +} +mp_limb_t +refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, + unsigned shift) +{ + return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift)); +} + +/* Divide h,l by d, return quotient, store remainder to *rp. + Operates on full limbs, not nails. + Must have h < d. + __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */ +mp_limb_t +refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d) +{ + mp_limb_t q, r; + int n; + + ASSERT (d != 0); + ASSERT (h < d); + +#if 0 + udiv_qrnnd (q, r, h, l, d); + *rp = r; + return q; +#endif + + n = refmpn_count_leading_zeros (d); + d <<= n; + + if (n != 0) + { + h = (h << n) | (l >> (GMP_LIMB_BITS - n)); + l <<= n; + } + + __udiv_qrnnd_c (q, r, h, l, d); + r >>= n; + *rp = r; + return q; +} + +mp_limb_t +refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp) +{ + return refmpn_udiv_qrnnd (rp, h, l, d); +} + +/* This little subroutine avoids some bad code generation from i386 gcc 3.0 + -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */ +static mp_limb_t +refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size, + mp_limb_t divisor, mp_limb_t carry) +{ + mp_size_t i; + mp_limb_t rem[1]; + for (i = size-1; i >= 0; i--) + { + rp[i] = refmpn_udiv_qrnnd (rem, carry, + sp[i] << GMP_NAIL_BITS, + divisor << GMP_NAIL_BITS); + carry = *rem >> GMP_NAIL_BITS; + } + return carry; +} + +mp_limb_t +refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, + mp_limb_t divisor, mp_limb_t carry) +{ + mp_ptr sp_orig; + mp_ptr prod; + mp_limb_t carry_orig; + + ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); + ASSERT (size >= 0); + ASSERT (carry < divisor); + ASSERT_MPN (sp, size); + ASSERT_LIMB (divisor); + ASSERT_LIMB (carry); + + if (size == 0) + return carry; + + sp_orig = refmpn_memdup_limbs (sp, size); + prod = refmpn_malloc_limbs (size); + carry_orig = carry; + + carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry); + + /* check by multiplying back */ +#if 0 + printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n", + size, divisor, carry_orig, carry); + mpn_trace("s",sp_copy,size); + mpn_trace("r",rp,size); + printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry)); + mpn_trace("p",prod,size); +#endif + ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig); + ASSERT (refmpn_cmp (prod, sp_orig, size) == 0); + free (sp_orig); + free (prod); + + return carry; +} + +mp_limb_t +refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor) +{ + return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0)); +} + + +mp_limb_t +refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, + mp_limb_t carry) +{ + mp_ptr p = refmpn_malloc_limbs (size); + carry = refmpn_divmod_1c (p, sp, size, divisor, carry); + free (p); + return carry; +} + +mp_limb_t +refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor) +{ + return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0)); +} + +mp_limb_t +refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, + mp_limb_t inverse) +{ + ASSERT (divisor & GMP_NUMB_HIGHBIT); + ASSERT (inverse == refmpn_invert_limb (divisor)); + return refmpn_mod_1 (sp, size, divisor); +} + +/* This implementation will be rather slow, but has the advantage of being + in a different style than the libgmp versions. */ +mp_limb_t +refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n) +{ + ASSERT ((GMP_NUMB_BITS % 4) == 0); + return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1); +} + + +mp_limb_t +refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize, + mp_srcptr sp, mp_size_t size, mp_limb_t divisor, + mp_limb_t carry) +{ + mp_ptr z; + + z = refmpn_malloc_limbs (xsize); + refmpn_fill (z, xsize, CNST_LIMB(0)); + + carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry); + carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry); + + free (z); + return carry; +} + +mp_limb_t +refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize, + mp_srcptr sp, mp_size_t size, mp_limb_t divisor) +{ + return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0)); +} + +mp_limb_t +refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize, + mp_srcptr sp, mp_size_t size, + mp_limb_t divisor, mp_limb_t inverse, unsigned shift) +{ + ASSERT (size >= 0); + ASSERT (shift == refmpn_count_leading_zeros (divisor)); + ASSERT (inverse == refmpn_invert_limb (divisor << shift)); + + return refmpn_divrem_1 (rp, xsize, sp, size, divisor); +} + +mp_limb_t +refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn, + mp_ptr np, mp_size_t nn, + mp_srcptr dp) +{ + mp_ptr tp; + mp_limb_t qh; + + tp = refmpn_malloc_limbs (nn + qxn); + refmpn_zero (tp, qxn); + refmpn_copyi (tp + qxn, np, nn); + qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2); + refmpn_copyi (np, tp, 2); + free (tp); + return qh; +} + +/* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers + paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d + since d has the high bit set. */ + +mp_limb_t +refmpn_invert_limb (mp_limb_t d) +{ + mp_limb_t r; + ASSERT (d & GMP_LIMB_HIGHBIT); + return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d); +} + +void +refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) +{ + mp_ptr qp, tp; + TMP_DECL; + TMP_MARK; + + tp = TMP_ALLOC_LIMBS (2 * n); + qp = TMP_ALLOC_LIMBS (n + 1); + + MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1); + + refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n); + refmpn_copyi (rp, qp, n); + + TMP_FREE; +} + +void +refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) +{ + mp_ptr tp; + mp_limb_t binv; + TMP_DECL; + TMP_MARK; + + /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing + code. To make up for it, we check that the inverse is correct using a + multiply. */ + + tp = TMP_ALLOC_LIMBS (2 * n); + + MPN_ZERO (tp, n); + tp[0] = 1; + binvert_limb (binv, up[0]); + mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv); + + refmpn_mul_n (tp, rp, up, n); + ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1)); + + TMP_FREE; +} + +/* The aim is to produce a dst quotient and return a remainder c, satisfying + c*b^n + src-i == 3*dst, where i is the incoming carry. + + Some value c==0, c==1 or c==2 will satisfy, so just try each. + + If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero + remainder from the first division attempt determines the correct + remainder (3-c), but don't bother with that, since we can't guarantee + anything about GMP_NUMB_BITS when using nails. + + If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos + complement negative, ie. b^n+a-i, and the calculation produces c1 + satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This + means it's enough to just add any borrow back at the end. + + A borrow only occurs when a==0 or a==1, and, by the same reasoning as in + mpn/generic/diveby3.c, the c1 that results in those cases will only be 0 + or 1 respectively, so with 1 added the final return value is still in the + prescribed range 0 to 2. */ + +mp_limb_t +refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry) +{ + mp_ptr spcopy; + mp_limb_t c, cs; + + ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); + ASSERT (size >= 1); + ASSERT (carry <= 2); + ASSERT_MPN (sp, size); + + spcopy = refmpn_malloc_limbs (size); + cs = refmpn_sub_1 (spcopy, sp, size, carry); + + for (c = 0; c <= 2; c++) + if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0) + goto done; + ASSERT_FAIL (no value of c satisfies); + + done: + c += cs; + ASSERT (c <= 2); + + free (spcopy); + return c; +} + +mp_limb_t +refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size) +{ + return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0)); +} + + +/* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */ +void +refmpn_mul_basecase (mp_ptr prodp, + mp_srcptr up, mp_size_t usize, + mp_srcptr vp, mp_size_t vsize) +{ + mp_size_t i; + + ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); + ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); + ASSERT (usize >= vsize); + ASSERT (vsize >= 1); + ASSERT_MPN (up, usize); + ASSERT_MPN (vp, vsize); + + prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]); + for (i = 1; i < vsize; i++) + prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]); +} + + +/* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */ +void +refmpn_mulmid_basecase (mp_ptr rp, + mp_srcptr up, mp_size_t un, + mp_srcptr vp, mp_size_t vn) +{ + mp_limb_t cy; + mp_size_t i; + + ASSERT (un >= vn); + ASSERT (vn >= 1); + ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un)); + ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn)); + ASSERT_MPN (up, un); + ASSERT_MPN (vp, vn); + + rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]); + rp[un - vn + 2] = CNST_LIMB (0); + for (i = 1; i < vn; i++) + { + cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]); + cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy); + cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy); + ASSERT (cy == 0); + } +} + +void +refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, + mp_ptr scratch) +{ + refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); +} + +void +refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + /* FIXME: this could be made faster by using refmpn_mul and then subtracting + off products near the middle product region boundary */ + refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); +} + +void +refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un, + mp_srcptr vp, mp_size_t vn) +{ + /* FIXME: this could be made faster by using refmpn_mul and then subtracting + off products near the middle product region boundary */ + refmpn_mulmid_basecase (rp, up, un, vp, vn); +} + + + +#define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD)) +#define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD)) +#define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD)) +#if WANT_FFT +#define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD)) +#else +#define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */ +#endif + +void +refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) +{ + mp_ptr tp, rp; + mp_size_t tn; + + if (vn < TOOM3_THRESHOLD) + { + /* In the mpn_mul_basecase and toom2 ranges, use our own mul_basecase. */ + if (vn != 0) + refmpn_mul_basecase (wp, up, un, vp, vn); + else + MPN_ZERO (wp, un); + return; + } + + MPN_ZERO (wp, vn); + rp = refmpn_malloc_limbs (2 * vn); + + if (vn < TOOM4_THRESHOLD) + tn = mpn_toom22_mul_itch (vn, vn); + else if (vn < TOOM6_THRESHOLD) + tn = mpn_toom33_mul_itch (vn, vn); + else if (vn < FFT_THRESHOLD) + tn = mpn_toom44_mul_itch (vn, vn); + else + tn = mpn_toom6h_mul_itch (vn, vn); + tp = refmpn_malloc_limbs (tn); + + while (un >= vn) + { + if (vn < TOOM4_THRESHOLD) + /* In the toom3 range, use mpn_toom22_mul. */ + mpn_toom22_mul (rp, up, vn, vp, vn, tp); + else if (vn < TOOM6_THRESHOLD) + /* In the toom4 range, use mpn_toom33_mul. */ + mpn_toom33_mul (rp, up, vn, vp, vn, tp); + else if (vn < FFT_THRESHOLD) + /* In the toom6 range, use mpn_toom44_mul. */ + mpn_toom44_mul (rp, up, vn, vp, vn, tp); + else + /* For the largest operands, use mpn_toom6h_mul. */ + mpn_toom6h_mul (rp, up, vn, vp, vn, tp); + + ASSERT_NOCARRY (refmpn_add (wp, rp, 2 * vn, wp, vn)); + wp += vn; + + up += vn; + un -= vn; + } + + free (tp); + + if (un != 0) + { + refmpn_mul (rp, vp, vn, up, un); + ASSERT_NOCARRY (refmpn_add (wp, rp, un + vn, wp, vn)); + } + free (rp); +} + +void +refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) +{ + refmpn_mul (prodp, up, size, vp, size); +} + +void +refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) +{ + mp_ptr tp = refmpn_malloc_limbs (2*size); + refmpn_mul (tp, up, size, vp, size); + refmpn_copyi (prodp, tp, size); + free (tp); +} + +void +refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size) +{ + refmpn_mul (dst, src, size, src, size); +} + +void +refmpn_sqrlo (mp_ptr dst, mp_srcptr src, mp_size_t size) +{ + refmpn_mullo_n (dst, src, src, size); +} + +/* Allowing usize= 0); + ASSERT (vsize >= 0); + ASSERT_MPN (up, usize); + ASSERT_MPN (vp, vsize); + + if (usize == 0) + { + refmpn_fill (prodp, vsize, CNST_LIMB(0)); + return; + } + + if (vsize == 0) + { + refmpn_fill (prodp, usize, CNST_LIMB(0)); + return; + } + + if (usize >= vsize) + refmpn_mul (prodp, up, usize, vp, vsize); + else + refmpn_mul (prodp, vp, vsize, up, usize); +} + + +mp_limb_t +refmpn_gcd_11 (mp_limb_t x, mp_limb_t y) +{ + /* The non-ref function also requires input operands to be odd, but + below refmpn_gcd_1 doesn't guarantee that. */ + ASSERT (x > 0); + ASSERT (y > 0); + do + { + while ((x & 1) == 0) x >>= 1; + while ((y & 1) == 0) y >>= 1; + + if (x < y) + MP_LIMB_T_SWAP (x, y); + + x -= y; + } + while (x != 0); + + return y; +} + +mp_double_limb_t +refmpn_gcd_22 (mp_limb_t x1, mp_limb_t x0, mp_limb_t y1, mp_limb_t y0) +{ + ASSERT ((x0 & 1) != 0); + ASSERT ((y0 & 1) != 0); + mp_double_limb_t g; + mp_limb_t cy; + + do + { + while ((x0 & 1) == 0) + { + x0 = (x1 << (GMP_NUMB_BITS - 1)) | (x0 >> 1); + x1 >>= 1; + } + while ((y0 & 1) == 0) + { + y0 = (y1 << (GMP_NUMB_BITS - 1)) | (y0 >> 1); + y1 >>= 1; + } + + + if (x1 < y1 || (x1 == y1 && x0 < y0)) + { + mp_limb_t t; + t = x1; x1 = y1; y1 = t; + t = x0; x0 = y0; y0 = t; + } + + cy = (x0 < y0); + x0 -= y0; + x1 -= y1 + cy; + } + while ((x1 | x0) != 0); + + g.d1 = y1; + g.d0 = y0; + return g; +} + +mp_limb_t +refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y) +{ + mp_limb_t x; + int twos; + + ASSERT (y != 0); + ASSERT (! refmpn_zero_p (xp, xsize)); + ASSERT_MPN (xp, xsize); + ASSERT_LIMB (y); + + x = refmpn_mod_1 (xp, xsize, y); + if (x == 0) + return y; + + twos = 0; + while ((x & 1) == 0 && (y & 1) == 0) + { + x >>= 1; + y >>= 1; + twos++; + } + + return refmpn_gcd_11 (x, y) << twos; +} + + +/* Based on the full limb x, not nails. */ +unsigned +refmpn_count_leading_zeros (mp_limb_t x) +{ + unsigned n = 0; + + ASSERT (x != 0); + + while ((x & GMP_LIMB_HIGHBIT) == 0) + { + x <<= 1; + n++; + } + return n; +} + +/* Full limbs allowed, not limited to nails. */ +unsigned +refmpn_count_trailing_zeros (mp_limb_t x) +{ + unsigned n = 0; + + ASSERT (x != 0); + ASSERT_LIMB (x); + + while ((x & 1) == 0) + { + x >>= 1; + n++; + } + return n; +} + +/* Strip factors of two (low zero bits) from {p,size} by right shifting. + The return value is the number of twos stripped. */ +mp_size_t +refmpn_strip_twos (mp_ptr p, mp_size_t size) +{ + mp_size_t limbs; + unsigned shift; + + ASSERT (size >= 1); + ASSERT (! refmpn_zero_p (p, size)); + ASSERT_MPN (p, size); + + for (limbs = 0; p[0] == 0; limbs++) + { + refmpn_copyi (p, p+1, size-1); + p[size-1] = 0; + } + + shift = refmpn_count_trailing_zeros (p[0]); + if (shift) + refmpn_rshift (p, p, size, shift); + + return limbs*GMP_NUMB_BITS + shift; +} + +mp_limb_t +refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize) +{ + int cmp; + + ASSERT (ysize >= 1); + ASSERT (xsize >= ysize); + ASSERT ((xp[0] & 1) != 0); + ASSERT ((yp[0] & 1) != 0); + /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */ + ASSERT (yp[ysize-1] != 0); + ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize)); + ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize)); + ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize)); + if (xsize == ysize) + ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1])); + ASSERT_MPN (xp, xsize); + ASSERT_MPN (yp, ysize); + + refmpn_strip_twos (xp, xsize); + MPN_NORMALIZE (xp, xsize); + MPN_NORMALIZE (yp, ysize); + + for (;;) + { + cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize); + if (cmp == 0) + break; + if (cmp < 0) + MPN_PTR_SWAP (xp,xsize, yp,ysize); + + ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize)); + + refmpn_strip_twos (xp, xsize); + MPN_NORMALIZE (xp, xsize); + } + + refmpn_copyi (gp, xp, xsize); + return xsize; +} + +unsigned long +ref_popc_limb (mp_limb_t src) +{ + unsigned long count; + int i; + + count = 0; + for (i = 0; i < GMP_LIMB_BITS; i++) + { + count += (src & 1); + src >>= 1; + } + return count; +} + +unsigned long +refmpn_popcount (mp_srcptr sp, mp_size_t size) +{ + unsigned long count = 0; + mp_size_t i; + + ASSERT (size >= 0); + ASSERT_MPN (sp, size); + + for (i = 0; i < size; i++) + count += ref_popc_limb (sp[i]); + return count; +} + +unsigned long +refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) +{ + mp_ptr d; + unsigned long count; + + ASSERT (size >= 0); + ASSERT_MPN (s1p, size); + ASSERT_MPN (s2p, size); + + if (size == 0) + return 0; + + d = refmpn_malloc_limbs (size); + refmpn_xor_n (d, s1p, s2p, size); + count = refmpn_popcount (d, size); + free (d); + return count; +} + + +/* set r to a%d */ +void +refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2]) +{ + mp_limb_t D[2]; + int n; + + ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2)); + ASSERT_MPN (a, 2); + ASSERT_MPN (d, 2); + + D[1] = d[1], D[0] = d[0]; + r[1] = a[1], r[0] = a[0]; + n = 0; + + for (;;) + { + if (D[1] & GMP_NUMB_HIGHBIT) + break; + if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0) + break; + refmpn_lshift (D, D, (mp_size_t) 2, 1); + n++; + ASSERT (n <= GMP_NUMB_BITS); + } + + while (n >= 0) + { + if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0) + ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2)); + refmpn_rshift (D, D, (mp_size_t) 2, 1); + n--; + } + + ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0); +} + + + +/* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in + particular the trial quotient is allowed to be 2 too big. */ +mp_limb_t +refmpn_sb_div_qr (mp_ptr qp, + mp_ptr np, mp_size_t nsize, + mp_srcptr dp, mp_size_t dsize) +{ + mp_limb_t retval = 0; + mp_size_t i; + mp_limb_t d1 = dp[dsize-1]; + mp_ptr np_orig = refmpn_memdup_limbs (np, nsize); + + ASSERT (nsize >= dsize); + /* ASSERT (dsize > 2); */ + ASSERT (dsize >= 2); + ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT); + ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np); + ASSERT_MPN (np, nsize); + ASSERT_MPN (dp, dsize); + + i = nsize-dsize; + if (refmpn_cmp (np+i, dp, dsize) >= 0) + { + ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize)); + retval = 1; + } + + for (i--; i >= 0; i--) + { + mp_limb_t n0 = np[i+dsize]; + mp_limb_t n1 = np[i+dsize-1]; + mp_limb_t q, dummy_r; + + ASSERT (n0 <= d1); + if (n0 == d1) + q = GMP_NUMB_MAX; + else + q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS, + d1 << GMP_NAIL_BITS); + + n0 -= refmpn_submul_1 (np+i, dp, dsize, q); + ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX); + if (n0) + { + q--; + if (! refmpn_add_n (np+i, np+i, dp, dsize)) + { + q--; + ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize)); + } + } + np[i+dsize] = 0; + + qp[i] = q; + } + + /* remainder < divisor */ +#if 0 /* ASSERT triggers gcc 4.2.1 bug */ + ASSERT (refmpn_cmp (np, dp, dsize) < 0); +#endif + + /* multiply back to original */ + { + mp_ptr mp = refmpn_malloc_limbs (nsize); + + refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize); + if (retval) + ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize)); + ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize)); + ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0); + + free (mp); + } + + free (np_orig); + return retval; +} + +/* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in + particular the trial quotient is allowed to be 2 too big. */ +void +refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, + mp_ptr np, mp_size_t nsize, + mp_srcptr dp, mp_size_t dsize) +{ + ASSERT (qxn == 0); + ASSERT_MPN (np, nsize); + ASSERT_MPN (dp, dsize); + ASSERT (dsize > 0); + ASSERT (dp[dsize-1] != 0); + + if (dsize == 1) + { + rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]); + return; + } + else + { + mp_ptr n2p = refmpn_malloc_limbs (nsize+1); + mp_ptr d2p = refmpn_malloc_limbs (dsize); + int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS; + + n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm); + ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm)); + + refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize); + refmpn_rshift_or_copy (rp, n2p, dsize, norm); + + /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */ + free (n2p); + free (d2p); + } +} + +mp_limb_t +refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) +{ + mp_size_t j; + mp_limb_t cy; + + ASSERT_MPN (up, 2*n); + /* ASSERT about directed overlap rp, up */ + /* ASSERT about overlap rp, mp */ + /* ASSERT about overlap up, mp */ + + for (j = n - 1; j >= 0; j--) + { + up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); + up++; + } + cy = mpn_add_n (rp, up, up - n, n); + return cy; +} + +size_t +refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size) +{ + unsigned char *d; + size_t dsize; + + ASSERT (size >= 0); + ASSERT (base >= 2); + ASSERT (base < numberof (mp_bases)); + ASSERT (size == 0 || src[size-1] != 0); + ASSERT_MPN (src, size); + + MPN_SIZEINBASE (dsize, src, size, base); + ASSERT (dsize >= 1); + ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES)); + + if (size == 0) + { + dst[0] = 0; + return 1; + } + + /* don't clobber input for power of 2 bases */ + if (POW2_P (base)) + src = refmpn_memdup_limbs (src, size); + + d = dst + dsize; + do + { + d--; + ASSERT (d >= dst); + *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base); + size -= (src[size-1] == 0); + } + while (size != 0); + + /* Move result back and decrement dsize if we didn't generate + the maximum possible digits. */ + if (d != dst) + { + size_t i; + dsize -= d - dst; + for (i = 0; i < dsize; i++) + dst[i] = d[i]; + } + + if (POW2_P (base)) + free (src); + + return dsize; +} + + +mp_limb_t +ref_bswap_limb (mp_limb_t src) +{ + mp_limb_t dst; + int i; + + dst = 0; + for (i = 0; i < GMP_LIMB_BYTES; i++) + { + dst = (dst << 8) + (src & 0xFF); + src >>= 8; + } + return dst; +} + + +/* These random functions are mostly for transitional purposes while adding + nail support, since they're independent of the normal mpn routines. They + can probably be removed when those normal routines are reliable, though + perhaps something independent would still be useful at times. */ + +#if GMP_LIMB_BITS == 32 +#define RAND_A CNST_LIMB(0x29CF535) +#endif +#if GMP_LIMB_BITS == 64 +#define RAND_A CNST_LIMB(0xBAECD515DAF0B49D) +#endif + +mp_limb_t refmpn_random_seed; + +mp_limb_t +refmpn_random_half (void) +{ + refmpn_random_seed = refmpn_random_seed * RAND_A + 1; + return (refmpn_random_seed >> GMP_LIMB_BITS/2); +} + +mp_limb_t +refmpn_random_limb (void) +{ + return ((refmpn_random_half () << (GMP_LIMB_BITS/2)) + | refmpn_random_half ()) & GMP_NUMB_MASK; +} + +void +refmpn_random (mp_ptr ptr, mp_size_t size) +{ + mp_size_t i; + if (GMP_NAIL_BITS == 0) + { + mpn_random (ptr, size); + return; + } + + for (i = 0; i < size; i++) + ptr[i] = refmpn_random_limb (); +} + +void +refmpn_random2 (mp_ptr ptr, mp_size_t size) +{ + mp_size_t i; + mp_limb_t bit, mask, limb; + int run; + + if (GMP_NAIL_BITS == 0) + { + mpn_random2 (ptr, size); + return; + } + +#define RUN_MODULUS 32 + + /* start with ones at a random pos in the high limb */ + bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS); + mask = 0; + run = 0; + + for (i = size-1; i >= 0; i--) + { + limb = 0; + do + { + if (run == 0) + { + run = (refmpn_random_half () % RUN_MODULUS) + 1; + mask = ~mask; + } + + limb |= (bit & mask); + bit >>= 1; + run--; + } + while (bit != 0); + + ptr[i] = limb; + bit = GMP_NUMB_HIGHBIT; + } +} + +/* This is a simple bitwise algorithm working high to low across "s" and + testing each time whether setting the bit would make s^2 exceed n. */ +mp_size_t +refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize) +{ + mp_ptr tp, dp; + mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs; + unsigned ibit; + long i; + mp_limb_t c; + + ASSERT (nsize >= 0); + + /* If n==0, then s=0 and r=0. */ + if (nsize == 0) + return 0; + + ASSERT (np[nsize - 1] != 0); + ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize)); + ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize)); + ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize)); + + /* root */ + ssize = (nsize+1)/2; + refmpn_zero (sp, ssize); + + /* the remainder so far */ + dp = refmpn_memdup_limbs (np, nsize); + dsize = nsize; + + /* temporary */ + talloc = 2*ssize + 1; + tp = refmpn_malloc_limbs (talloc); + + for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--) + { + /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i + is added to it */ + + ilimbs = (i+1) / GMP_NUMB_BITS; + ibit = (i+1) % GMP_NUMB_BITS; + refmpn_zero (tp, ilimbs); + c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit); + tsize = ilimbs + ssize; + tp[tsize] = c; + tsize += (c != 0); + + ilimbs = (2*i) / GMP_NUMB_BITS; + ibit = (2*i) % GMP_NUMB_BITS; + if (ilimbs + 1 > tsize) + { + refmpn_zero_extend (tp, tsize, ilimbs + 1); + tsize = ilimbs + 1; + } + c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs, + CNST_LIMB(1) << ibit); + ASSERT (tsize < talloc); + tp[tsize] = c; + tsize += (c != 0); + + if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0) + { + /* set this bit in s and subtract from the remainder */ + refmpn_setbit (sp, i); + + ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize)); + dsize = refmpn_normalize (dp, dsize); + } + } + + if (rp == NULL) + { + ret = ! refmpn_zero_p (dp, dsize); + } + else + { + ASSERT (dsize == 0 || dp[dsize-1] != 0); + refmpn_copy (rp, dp, dsize); + ret = dsize; + } + + free (dp); + free (tp); + return ret; +} -- cgit v1.2.3