aboutsummaryrefslogtreecommitdiff
path: root/gmp-6.3.0/tune
diff options
context:
space:
mode:
authorDuncan Wilkie <antigravityd@gmail.com>2023-11-18 06:11:09 -0600
committerDuncan Wilkie <antigravityd@gmail.com>2023-11-18 06:11:09 -0600
commit11da511c784eca003deb90c23570f0873954e0de (patch)
treee14fdd3d5d6345956d67e79ae771d0633d28362b /gmp-6.3.0/tune
Initial commit.
Diffstat (limited to 'gmp-6.3.0/tune')
-rw-r--r--gmp-6.3.0/tune/Makefile957
-rw-r--r--gmp-6.3.0/tune/Makefile.am187
-rw-r--r--gmp-6.3.0/tune/Makefile.in957
-rw-r--r--gmp-6.3.0/tune/README501
-rw-r--r--gmp-6.3.0/tune/alpha.asm59
-rw-r--r--gmp-6.3.0/tune/common.c2945
-rw-r--r--gmp-6.3.0/tune/div_qr_1_tune.c50
-rw-r--r--gmp-6.3.0/tune/div_qr_1n_pi1_1.c38
-rw-r--r--gmp-6.3.0/tune/div_qr_1n_pi1_2.c38
-rw-r--r--gmp-6.3.0/tune/div_qr_1n_pi1_3.c38
-rw-r--r--gmp-6.3.0/tune/div_qr_1n_pi1_4.c38
-rw-r--r--gmp-6.3.0/tune/divrem1div.c41
-rw-r--r--gmp-6.3.0/tune/divrem1inv.c41
-rw-r--r--gmp-6.3.0/tune/divrem2div.c40
-rw-r--r--gmp-6.3.0/tune/divrem2inv.c40
-rw-r--r--gmp-6.3.0/tune/freq.c893
-rw-r--r--gmp-6.3.0/tune/gcdext_double.c38
-rw-r--r--gmp-6.3.0/tune/gcdext_single.c38
-rw-r--r--gmp-6.3.0/tune/gcdextod.c39
-rw-r--r--gmp-6.3.0/tune/gcdextos.c39
-rw-r--r--gmp-6.3.0/tune/hgcd2-1.c39
-rw-r--r--gmp-6.3.0/tune/hgcd2-2.c39
-rw-r--r--gmp-6.3.0/tune/hgcd2-3.c39
-rw-r--r--gmp-6.3.0/tune/hgcd2-4.c39
-rw-r--r--gmp-6.3.0/tune/hgcd2-5.c39
-rw-r--r--gmp-6.3.0/tune/hgcd2.c49
-rw-r--r--gmp-6.3.0/tune/hgcd_appr_lehmer.c39
-rw-r--r--gmp-6.3.0/tune/hgcd_lehmer.c39
-rw-r--r--gmp-6.3.0/tune/hgcd_reduce_1.c40
-rw-r--r--gmp-6.3.0/tune/hgcd_reduce_2.c39
-rw-r--r--gmp-6.3.0/tune/hppa.asm42
-rw-r--r--gmp-6.3.0/tune/hppa2.asm44
-rw-r--r--gmp-6.3.0/tune/hppa2w.asm44
-rw-r--r--gmp-6.3.0/tune/ia64.asm47
-rw-r--r--gmp-6.3.0/tune/jacbase1.c37
-rw-r--r--gmp-6.3.0/tune/jacbase2.c37
-rw-r--r--gmp-6.3.0/tune/jacbase3.c37
-rw-r--r--gmp-6.3.0/tune/jacbase4.c37
-rw-r--r--gmp-6.3.0/tune/many.pl1334
-rw-r--r--gmp-6.3.0/tune/mod_1_1-1.c40
-rw-r--r--gmp-6.3.0/tune/mod_1_1-2.c40
-rw-r--r--gmp-6.3.0/tune/mod_1_div.c45
-rw-r--r--gmp-6.3.0/tune/mod_1_inv.c45
-rw-r--r--gmp-6.3.0/tune/modlinv.c177
-rw-r--r--gmp-6.3.0/tune/noop.c67
-rw-r--r--gmp-6.3.0/tune/pentium.asm60
-rw-r--r--gmp-6.3.0/tune/powerpc.asm53
-rw-r--r--gmp-6.3.0/tune/powerpc64.asm49
-rw-r--r--gmp-6.3.0/tune/powm_mod.c38
-rw-r--r--gmp-6.3.0/tune/powm_redc.c40
-rw-r--r--gmp-6.3.0/tune/pre_divrem_1.c40
-rw-r--r--gmp-6.3.0/tune/set_strb.c46
-rw-r--r--gmp-6.3.0/tune/set_strp.c42
-rw-r--r--gmp-6.3.0/tune/set_strs.c42
-rw-r--r--gmp-6.3.0/tune/sparcv9.asm45
-rw-r--r--gmp-6.3.0/tune/speed-ext.c233
-rw-r--r--gmp-6.3.0/tune/speed.c1419
-rw-r--r--gmp-6.3.0/tune/speed.h3981
-rw-r--r--gmp-6.3.0/tune/sqr_basecase.c2
-rw-r--r--gmp-6.3.0/tune/time.c1598
-rw-r--r--gmp-6.3.0/tune/tune-gcd-p.c225
-rw-r--r--gmp-6.3.0/tune/tuneup.c3072
-rw-r--r--gmp-6.3.0/tune/x86_64.asm55
63 files changed, 20531 insertions, 0 deletions
diff --git a/gmp-6.3.0/tune/Makefile b/gmp-6.3.0/tune/Makefile
new file mode 100644
index 0000000..24a6e9c
--- /dev/null
+++ b/gmp-6.3.0/tune/Makefile
@@ -0,0 +1,957 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# tune/Makefile. Generated from Makefile.in by configure.
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+
+# Copyright 2000-2003, 2005-2011 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of either:
+#
+# * the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# or
+#
+# * the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# or both in parallel, as here.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received copies of the GNU General Public License and the
+# GNU Lesser General Public License along with the GNU MP Library. If not,
+# see https://www.gnu.org/licenses/.
+
+# Copyright 1996, 1998-2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of either:
+#
+# * the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# or
+#
+# * the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# or both in parallel, as here.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received copies of the GNU General Public License and the
+# GNU Lesser General Public License along with the GNU MP Library. If not,
+# see https://www.gnu.org/licenses/.
+
+
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/gmp
+pkgincludedir = $(includedir)/gmp
+pkglibdir = $(libdir)/gmp
+pkglibexecdir = $(libexecdir)/gmp
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = pentiumm-pc-linux-gnu
+host_triplet = pentiumm-pc-linux-gnu
+EXTRA_PROGRAMS = speed$(EXEEXT) speed-dynamic$(EXEEXT) \
+ speed-ext$(EXEEXT) tuneup$(EXEEXT) tune-gcd-p$(EXEEXT)
+subdir = tune
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \
+ $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__DEPENDENCIES_1 =
+am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) \
+ $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+am_libspeed_la_OBJECTS = common.lo divrem1div.lo divrem1inv.lo \
+ divrem2div.lo divrem2inv.lo div_qr_1n_pi1_1.lo \
+ div_qr_1n_pi1_2.lo div_qr_1n_pi1_3.lo div_qr_1n_pi1_4.lo \
+ div_qr_1_tune.lo freq.lo gcdext_single.lo gcdext_double.lo \
+ gcdextod.lo gcdextos.lo hgcd_lehmer.lo hgcd_appr_lehmer.lo \
+ hgcd_reduce_1.lo hgcd_reduce_2.lo jacbase1.lo jacbase2.lo \
+ jacbase3.lo jacbase4.lo hgcd2-1.lo hgcd2-2.lo hgcd2-3.lo \
+ hgcd2-4.lo hgcd2-5.lo mod_1_div.lo mod_1_inv.lo mod_1_1-1.lo \
+ mod_1_1-2.lo modlinv.lo noop.lo powm_mod.lo powm_redc.lo \
+ pre_divrem_1.lo set_strb.lo set_strs.lo set_strp.lo time.lo
+libspeed_la_OBJECTS = $(am_libspeed_la_OBJECTS)
+AM_V_lt = $(am__v_lt_$(V))
+am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY))
+am__v_lt_0 = --silent
+am__v_lt_1 =
+libspeed_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libspeed_la_LDFLAGS) $(LDFLAGS) -o $@
+am_speed_OBJECTS = speed.$(OBJEXT)
+speed_OBJECTS = $(am_speed_OBJECTS)
+speed_LDADD = $(LDADD)
+speed_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
+speed_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(speed_LDFLAGS) $(LDFLAGS) -o $@
+am_speed_dynamic_OBJECTS = speed.$(OBJEXT)
+speed_dynamic_OBJECTS = $(am_speed_dynamic_OBJECTS)
+speed_dynamic_LDADD = $(LDADD)
+speed_dynamic_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
+am_speed_ext_OBJECTS = speed-ext.$(OBJEXT)
+speed_ext_OBJECTS = $(am_speed_ext_OBJECTS)
+speed_ext_LDADD = $(LDADD)
+speed_ext_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
+speed_ext_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(speed_ext_LDFLAGS) $(LDFLAGS) -o $@
+am_tune_gcd_p_OBJECTS = tune-gcd-p.$(OBJEXT)
+tune_gcd_p_OBJECTS = $(am_tune_gcd_p_OBJECTS)
+tune_gcd_p_LDADD = $(LDADD)
+tune_gcd_p_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(tune_gcd_p_LDFLAGS) $(LDFLAGS) -o $@
+am_tuneup_OBJECTS = tuneup.$(OBJEXT) hgcd2.$(OBJEXT)
+am__objects_1 = div_qr_2.$(OBJEXT) bdiv_q.$(OBJEXT) bdiv_qr.$(OBJEXT) \
+ dcpi1_div_qr.$(OBJEXT) dcpi1_divappr_q.$(OBJEXT) \
+ dcpi1_bdiv_qr.$(OBJEXT) dcpi1_bdiv_q.$(OBJEXT) \
+ invertappr.$(OBJEXT) invert.$(OBJEXT) binvert.$(OBJEXT) \
+ divrem_2.$(OBJEXT) gcd.$(OBJEXT) gcdext.$(OBJEXT) \
+ get_str.$(OBJEXT) set_str.$(OBJEXT) matrix22_mul.$(OBJEXT) \
+ hgcd.$(OBJEXT) hgcd_appr.$(OBJEXT) hgcd_reduce.$(OBJEXT) \
+ mul_n.$(OBJEXT) sqr.$(OBJEXT) sec_powm.$(OBJEXT) \
+ mullo_n.$(OBJEXT) mul_fft.$(OBJEXT) mul.$(OBJEXT) \
+ tdiv_qr.$(OBJEXT) mulmod_bnm1.$(OBJEXT) sqrmod_bnm1.$(OBJEXT) \
+ mulmid.$(OBJEXT) mulmid_n.$(OBJEXT) toom42_mulmid.$(OBJEXT) \
+ sqrlo.$(OBJEXT) sqrlo_basecase.$(OBJEXT) \
+ nussbaumer_mul.$(OBJEXT) toom6h_mul.$(OBJEXT) \
+ toom8h_mul.$(OBJEXT) toom6_sqr.$(OBJEXT) toom8_sqr.$(OBJEXT) \
+ toom22_mul.$(OBJEXT) toom2_sqr.$(OBJEXT) toom33_mul.$(OBJEXT) \
+ toom3_sqr.$(OBJEXT) toom44_mul.$(OBJEXT) toom4_sqr.$(OBJEXT)
+am__objects_2 = $(am__objects_1) divrem_1.$(OBJEXT) mod_1.$(OBJEXT)
+nodist_tuneup_OBJECTS = sqr_basecase.$(OBJEXT) fac_ui.$(OBJEXT) \
+ $(am__objects_2)
+tuneup_OBJECTS = $(am_tuneup_OBJECTS) $(nodist_tuneup_OBJECTS)
+am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) libspeed.la
+tuneup_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(tuneup_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_$(V))
+am__v_P_ = $(am__v_P_$(AM_DEFAULT_VERBOSITY))
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_$(V))
+am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_$(V))
+am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I. -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_$(V))
+am__v_CC_ = $(am__v_CC_$(AM_DEFAULT_VERBOSITY))
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_$(V))
+am__v_CCLD_ = $(am__v_CCLD_$(AM_DEFAULT_VERBOSITY))
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
+ $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
+ $(tune_gcd_p_SOURCES) $(tuneup_SOURCES) \
+ $(nodist_tuneup_SOURCES)
+DIST_SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
+ $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
+ $(tune_gcd_p_SOURCES) $(tuneup_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+HEADERS = $(noinst_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/../mpn/Makeasm.am $(srcdir)/Makefile.in \
+ README
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = 32
+ACLOCAL = ${SHELL} /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0/missing aclocal-1.15
+AMTAR = $${TAR-tar}
+AM_DEFAULT_VERBOSITY = 1
+AR = ar
+AS = as
+ASMFLAGS = -Wa,--noexecstack
+AUTOCONF = ${SHELL} /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0/missing autoconf
+AUTOHEADER = ${SHELL} /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0/missing autoheader
+AUTOMAKE = ${SHELL} /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0/missing automake-1.15
+AWK = gawk
+CALLING_CONVENTIONS_OBJS = x86call.lo x86check$U.lo
+CC = gcc
+CCAS = gcc -c
+CC_FOR_BUILD = gcc
+CFLAGS = -m32 -O2 -pedantic -fomit-frame-pointer -mtune=pentium3 -march=pentium3
+CPP = gcc -E
+CPPFLAGS =
+CPP_FOR_BUILD = gcc -E
+CXX =
+CXXCPP =
+CXXFLAGS =
+CYGPATH_W = echo
+DEFN_LONG_LONG_LIMB = /* #undef _LONG_LONG_LIMB */
+DEFS = -DHAVE_CONFIG_H
+DLLTOOL = dlltool
+DSYMUTIL =
+DUMPBIN =
+ECHO_C =
+ECHO_N = -n
+ECHO_T =
+EGREP = /usr/bin/grep -E
+EXEEXT =
+EXEEXT_FOR_BUILD =
+FGREP = /usr/bin/grep -F
+GMP_LDFLAGS =
+GMP_LIMB_BITS = 32
+GMP_NAIL_BITS = 0
+GREP = /usr/bin/grep
+HAVE_CLOCK_01 = 1
+HAVE_CPUTIME_01 = 0
+HAVE_GETRUSAGE_01 = 1
+HAVE_GETTIMEOFDAY_01 = 1
+HAVE_HOST_CPU_FAMILY_power = 0
+HAVE_HOST_CPU_FAMILY_powerpc = 0
+HAVE_SIGACTION_01 = 1
+HAVE_SIGALTSTACK_01 = 1
+HAVE_SIGSTACK_01 = 1
+HAVE_STACK_T_01 = 1
+HAVE_SYS_RESOURCE_H_01 = 1
+INSTALL = /usr/bin/install -c
+INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_SCRIPT = ${INSTALL}
+INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
+LD = /usr/bin/ld
+LDFLAGS =
+LEX = flex
+LEXLIB = -lfl
+LEX_OUTPUT_ROOT = lex.yy
+LIBCURSES = -lncurses
+LIBGMPXX_LDFLAGS =
+LIBGMP_DLL = 0
+LIBGMP_LDFLAGS =
+LIBM = -lm
+LIBM_FOR_BUILD = -lm
+LIBOBJS =
+LIBREADLINE = -lreadline
+LIBS =
+LIBTOOL = $(SHELL) $(top_builddir)/libtool
+LIPO =
+LN_S = ln -s
+LTLIBOBJS =
+LT_SYS_LIBRARY_PATH =
+M4 = m4
+MAINT = #
+MAKEINFO = ${SHELL} /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0/missing makeinfo
+MANIFEST_TOOL = :
+MKDIR_P = /usr/bin/mkdir -p
+NM = /usr/bin/nm -B
+NMEDIT =
+OBJDUMP = objdump
+OBJEXT = o
+OTOOL =
+OTOOL64 =
+PACKAGE = gmp
+PACKAGE_BUGREPORT = gmp-bugs@gmplib.org (see https://gmplib.org/manual/Reporting-Bugs.html)
+PACKAGE_NAME = GNU MP
+PACKAGE_STRING = GNU MP 6.3.0
+PACKAGE_TARNAME = gmp
+PACKAGE_URL = http://www.gnu.org/software/gmp/
+PACKAGE_VERSION = 6.3.0
+PATH_SEPARATOR = :
+RANLIB = ranlib
+SED = /usr/bin/sed
+SET_MAKE =
+SHELL = /bin/sh
+SPEED_CYCLECOUNTER_OBJ = pentium.lo
+STRIP = strip
+TAL_OBJECT = tal-reent.lo
+TUNE_LIBS =
+TUNE_SQR_OBJ =
+U_FOR_BUILD =
+VERSION = 6.3.0
+WITH_READLINE_01 = 1
+YACC = bison -y
+YFLAGS =
+abs_builddir = /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0/tune
+abs_srcdir = /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0/tune
+abs_top_builddir = /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0
+abs_top_srcdir = /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0
+ac_ct_AR = ar
+ac_ct_CC = gcc
+ac_ct_CXX =
+ac_ct_DUMPBIN =
+am__leading_dot = .
+am__tar = $${TAR-tar} chof - "$$tardir"
+am__untar = $${TAR-tar} xf -
+bindir = ${exec_prefix}/bin
+build = pentiumm-pc-linux-gnu
+build_alias =
+build_cpu = pentiumm
+build_os = linux-gnu
+build_vendor = pc
+builddir = .
+datadir = ${datarootdir}
+datarootdir = ${prefix}/share
+docdir = ${datarootdir}/doc/${PACKAGE_TARNAME}
+dvidir = ${docdir}
+exec_prefix = ${prefix}
+gmp_srclinks = mpn/add.c mpn/add_1.c mpn/add_n.asm mpn/sub.c mpn/sub_1.c mpn/sub_n.asm mpn/cnd_add_n.asm mpn/cnd_sub_n.asm mpn/cnd_swap.c mpn/neg.c mpn/com.c mpn/mul_1.asm mpn/addmul_1.asm mpn/submul_1.asm mpn/add_err1_n.c mpn/add_err2_n.c mpn/add_err3_n.c mpn/sub_err1_n.c mpn/sub_err2_n.c mpn/sub_err3_n.c mpn/lshift.asm mpn/rshift.asm mpn/dive_1.asm mpn/diveby3.c mpn/divis.c mpn/divrem.c mpn/divrem_1.asm mpn/divrem_2.asm mpn/fib2_ui.c mpn/fib2m.c mpn/mod_1.c mpn/mod_34lsub1.asm mpn/mode1o.asm mpn/pre_mod_1.c mpn/dump.c mpn/mod_1_1.asm mpn/mod_1_2.c mpn/mod_1_3.c mpn/mod_1_4.asm mpn/lshiftc.c mpn/mul.c mpn/mul_fft.c mpn/mul_n.c mpn/sqr.c mpn/mul_basecase.asm mpn/sqr_basecase.asm mpn/nussbaumer_mul.c mpn/mulmid_basecase.c mpn/toom42_mulmid.c mpn/mulmid_n.c mpn/mulmid.c mpn/random.c mpn/random2.c mpn/pow_1.c mpn/rootrem.c mpn/sqrtrem.c mpn/sizeinbase.c mpn/get_str.c mpn/set_str.c mpn/compute_powtab.c mpn/scan0.c mpn/scan1.c mpn/popcount.asm mpn/hamdist.asm mpn/cmp.c mpn/zero_p.c mpn/perfsqr.c mpn/perfpow.c mpn/strongfibo.c mpn/gcd_11.asm mpn/gcd_22.c mpn/gcd_1.c mpn/gcd.c mpn/gcdext_1.c mpn/gcdext.c mpn/gcd_subdiv_step.c mpn/gcdext_lehmer.c mpn/div_q.c mpn/tdiv_qr.c mpn/jacbase.c mpn/jacobi_2.c mpn/jacobi.c mpn/get_d.c mpn/matrix22_mul.c mpn/matrix22_mul1_inverse_vector.c mpn/hgcd_matrix.c mpn/hgcd2.c mpn/hgcd_step.c mpn/hgcd_reduce.c mpn/hgcd.c mpn/hgcd_appr.c mpn/hgcd2_jacobi.c mpn/hgcd_jacobi.c mpn/mullo_n.c mpn/mullo_basecase.c mpn/sqrlo.c mpn/sqrlo_basecase.c mpn/toom22_mul.c mpn/toom32_mul.c mpn/toom42_mul.c mpn/toom52_mul.c mpn/toom62_mul.c mpn/toom33_mul.c mpn/toom43_mul.c mpn/toom53_mul.c mpn/toom54_mul.c mpn/toom63_mul.c mpn/toom44_mul.c mpn/toom6h_mul.c mpn/toom6_sqr.c mpn/toom8h_mul.c mpn/toom8_sqr.c mpn/toom_couple_handling.c mpn/toom2_sqr.c mpn/toom3_sqr.c mpn/toom4_sqr.c mpn/toom_eval_dgr3_pm1.c mpn/toom_eval_dgr3_pm2.c mpn/toom_eval_pm1.c mpn/toom_eval_pm2.c mpn/toom_eval_pm2exp.c mpn/toom_eval_pm2rexp.c mpn/toom_interpolate_5pts.c mpn/toom_interpolate_6pts.c mpn/toom_interpolate_7pts.c mpn/toom_interpolate_8pts.c mpn/toom_interpolate_12pts.c mpn/toom_interpolate_16pts.c mpn/invertappr.c mpn/invert.c mpn/binvert.c mpn/mulmod_bnm1.c mpn/sqrmod_bnm1.c mpn/mulmod_bknp1.c mpn/div_qr_1.c mpn/div_qr_1n_pi1.c mpn/div_qr_2.c mpn/div_qr_2n_pi1.c mpn/div_qr_2u_pi1.c mpn/sbpi1_div_q.c mpn/sbpi1_div_qr.c mpn/sbpi1_divappr_q.c mpn/dcpi1_div_q.c mpn/dcpi1_div_qr.c mpn/dcpi1_divappr_q.c mpn/mu_div_qr.c mpn/mu_divappr_q.c mpn/mu_div_q.c mpn/bdiv_q_1.asm mpn/sbpi1_bdiv_q.c mpn/sbpi1_bdiv_qr.c mpn/sbpi1_bdiv_r.c mpn/dcpi1_bdiv_q.c mpn/dcpi1_bdiv_qr.c mpn/mu_bdiv_q.c mpn/mu_bdiv_qr.c mpn/bdiv_q.c mpn/bdiv_qr.c mpn/broot.c mpn/brootinv.c mpn/bsqrt.c mpn/bsqrtinv.c mpn/divexact.c mpn/bdiv_dbm1c.asm mpn/redc_1.c mpn/redc_2.c mpn/redc_n.c mpn/powm.c mpn/powlo.c mpn/sec_powm.c mpn/sec_mul.c mpn/sec_sqr.c mpn/sec_div_qr.c mpn/sec_div_r.c mpn/sec_pi1_div_qr.c mpn/sec_pi1_div_r.c mpn/sec_add_1.c mpn/sec_sub_1.c mpn/sec_invert.c mpn/trialdiv.c mpn/remove.c mpn/and_n.c mpn/andn_n.c mpn/nand_n.c mpn/ior_n.c mpn/iorn_n.c mpn/nior_n.c mpn/xor_n.c mpn/xnor_n.c mpn/copyi.asm mpn/copyd.asm mpn/zero.c mpn/sec_tabselect.asm mpn/comb_tables.c mpn/umul.asm mpn/udiv.asm mpn/add_n_sub_n.c gmp-mparam.h
+host = pentiumm-pc-linux-gnu
+host_alias =
+host_cpu = pentiumm
+host_os = linux-gnu
+host_vendor = pc
+htmldir = ${docdir}
+includedir = ${prefix}/include
+infodir = ${datarootdir}/info
+install_sh = ${SHELL} /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0/install-sh
+libdir = ${exec_prefix}/lib
+libexecdir = ${exec_prefix}/libexec
+localedir = ${datarootdir}/locale
+localstatedir = ${prefix}/var
+mandir = ${datarootdir}/man
+mkdir_p = $(MKDIR_P)
+mpn_objects = add$U.lo add_1$U.lo add_n.lo sub$U.lo sub_1$U.lo sub_n.lo cnd_add_n.lo cnd_sub_n.lo cnd_swap$U.lo neg$U.lo com$U.lo mul_1.lo addmul_1.lo submul_1.lo add_err1_n$U.lo add_err2_n$U.lo add_err3_n$U.lo sub_err1_n$U.lo sub_err2_n$U.lo sub_err3_n$U.lo lshift.lo rshift.lo dive_1.lo diveby3$U.lo divis$U.lo divrem$U.lo divrem_1.lo divrem_2.lo fib2_ui$U.lo fib2m$U.lo mod_1$U.lo mod_34lsub1.lo mode1o.lo pre_mod_1$U.lo dump$U.lo mod_1_1.lo mod_1_2$U.lo mod_1_3$U.lo mod_1_4.lo lshiftc$U.lo mul$U.lo mul_fft$U.lo mul_n$U.lo sqr$U.lo mul_basecase.lo sqr_basecase.lo nussbaumer_mul$U.lo mulmid_basecase$U.lo toom42_mulmid$U.lo mulmid_n$U.lo mulmid$U.lo random$U.lo random2$U.lo pow_1$U.lo rootrem$U.lo sqrtrem$U.lo sizeinbase$U.lo get_str$U.lo set_str$U.lo compute_powtab$U.lo scan0$U.lo scan1$U.lo popcount.lo hamdist.lo cmp$U.lo zero_p$U.lo perfsqr$U.lo perfpow$U.lo strongfibo$U.lo gcd_11.lo gcd_22$U.lo gcd_1$U.lo gcd$U.lo gcdext_1$U.lo gcdext$U.lo gcd_subdiv_step$U.lo gcdext_lehmer$U.lo div_q$U.lo tdiv_qr$U.lo jacbase$U.lo jacobi_2$U.lo jacobi$U.lo get_d$U.lo matrix22_mul$U.lo matrix22_mul1_inverse_vector$U.lo hgcd_matrix$U.lo hgcd2$U.lo hgcd_step$U.lo hgcd_reduce$U.lo hgcd$U.lo hgcd_appr$U.lo hgcd2_jacobi$U.lo hgcd_jacobi$U.lo mullo_n$U.lo mullo_basecase$U.lo sqrlo$U.lo sqrlo_basecase$U.lo toom22_mul$U.lo toom32_mul$U.lo toom42_mul$U.lo toom52_mul$U.lo toom62_mul$U.lo toom33_mul$U.lo toom43_mul$U.lo toom53_mul$U.lo toom54_mul$U.lo toom63_mul$U.lo toom44_mul$U.lo toom6h_mul$U.lo toom6_sqr$U.lo toom8h_mul$U.lo toom8_sqr$U.lo toom_couple_handling$U.lo toom2_sqr$U.lo toom3_sqr$U.lo toom4_sqr$U.lo toom_eval_dgr3_pm1$U.lo toom_eval_dgr3_pm2$U.lo toom_eval_pm1$U.lo toom_eval_pm2$U.lo toom_eval_pm2exp$U.lo toom_eval_pm2rexp$U.lo toom_interpolate_5pts$U.lo toom_interpolate_6pts$U.lo toom_interpolate_7pts$U.lo toom_interpolate_8pts$U.lo toom_interpolate_12pts$U.lo toom_interpolate_16pts$U.lo invertappr$U.lo invert$U.lo binvert$U.lo mulmod_bnm1$U.lo sqrmod_bnm1$U.lo mulmod_bknp1$U.lo div_qr_1$U.lo div_qr_1n_pi1$U.lo div_qr_2$U.lo div_qr_2n_pi1$U.lo div_qr_2u_pi1$U.lo sbpi1_div_q$U.lo sbpi1_div_qr$U.lo sbpi1_divappr_q$U.lo dcpi1_div_q$U.lo dcpi1_div_qr$U.lo dcpi1_divappr_q$U.lo mu_div_qr$U.lo mu_divappr_q$U.lo mu_div_q$U.lo bdiv_q_1.lo sbpi1_bdiv_q$U.lo sbpi1_bdiv_qr$U.lo sbpi1_bdiv_r$U.lo dcpi1_bdiv_q$U.lo dcpi1_bdiv_qr$U.lo mu_bdiv_q$U.lo mu_bdiv_qr$U.lo bdiv_q$U.lo bdiv_qr$U.lo broot$U.lo brootinv$U.lo bsqrt$U.lo bsqrtinv$U.lo divexact$U.lo bdiv_dbm1c.lo redc_1$U.lo redc_2$U.lo redc_n$U.lo powm$U.lo powlo$U.lo sec_powm$U.lo sec_mul$U.lo sec_sqr$U.lo sec_div_qr$U.lo sec_div_r$U.lo sec_pi1_div_qr$U.lo sec_pi1_div_r$U.lo sec_add_1$U.lo sec_sub_1$U.lo sec_invert$U.lo trialdiv$U.lo remove$U.lo and_n$U.lo andn_n$U.lo nand_n$U.lo ior_n$U.lo iorn_n$U.lo nior_n$U.lo xor_n$U.lo xnor_n$U.lo copyi.lo copyd.lo zero$U.lo sec_tabselect.lo comb_tables$U.lo umul.lo udiv.lo add_n_sub_n$U.lo
+mpn_objs_in_libgmp = mpn/add$U.lo mpn/add_1$U.lo mpn/add_n.lo mpn/sub$U.lo mpn/sub_1$U.lo mpn/sub_n.lo mpn/cnd_add_n.lo mpn/cnd_sub_n.lo mpn/cnd_swap$U.lo mpn/neg$U.lo mpn/com$U.lo mpn/mul_1.lo mpn/addmul_1.lo mpn/submul_1.lo mpn/add_err1_n$U.lo mpn/add_err2_n$U.lo mpn/add_err3_n$U.lo mpn/sub_err1_n$U.lo mpn/sub_err2_n$U.lo mpn/sub_err3_n$U.lo mpn/lshift.lo mpn/rshift.lo mpn/dive_1.lo mpn/diveby3$U.lo mpn/divis$U.lo mpn/divrem$U.lo mpn/divrem_1.lo mpn/divrem_2.lo mpn/fib2_ui$U.lo mpn/fib2m$U.lo mpn/mod_1$U.lo mpn/mod_34lsub1.lo mpn/mode1o.lo mpn/pre_mod_1$U.lo mpn/dump$U.lo mpn/mod_1_1.lo mpn/mod_1_2$U.lo mpn/mod_1_3$U.lo mpn/mod_1_4.lo mpn/lshiftc$U.lo mpn/mul$U.lo mpn/mul_fft$U.lo mpn/mul_n$U.lo mpn/sqr$U.lo mpn/mul_basecase.lo mpn/sqr_basecase.lo mpn/nussbaumer_mul$U.lo mpn/mulmid_basecase$U.lo mpn/toom42_mulmid$U.lo mpn/mulmid_n$U.lo mpn/mulmid$U.lo mpn/random$U.lo mpn/random2$U.lo mpn/pow_1$U.lo mpn/rootrem$U.lo mpn/sqrtrem$U.lo mpn/sizeinbase$U.lo mpn/get_str$U.lo mpn/set_str$U.lo mpn/compute_powtab$U.lo mpn/scan0$U.lo mpn/scan1$U.lo mpn/popcount.lo mpn/hamdist.lo mpn/cmp$U.lo mpn/zero_p$U.lo mpn/perfsqr$U.lo mpn/perfpow$U.lo mpn/strongfibo$U.lo mpn/gcd_11.lo mpn/gcd_22$U.lo mpn/gcd_1$U.lo mpn/gcd$U.lo mpn/gcdext_1$U.lo mpn/gcdext$U.lo mpn/gcd_subdiv_step$U.lo mpn/gcdext_lehmer$U.lo mpn/div_q$U.lo mpn/tdiv_qr$U.lo mpn/jacbase$U.lo mpn/jacobi_2$U.lo mpn/jacobi$U.lo mpn/get_d$U.lo mpn/matrix22_mul$U.lo mpn/matrix22_mul1_inverse_vector$U.lo mpn/hgcd_matrix$U.lo mpn/hgcd2$U.lo mpn/hgcd_step$U.lo mpn/hgcd_reduce$U.lo mpn/hgcd$U.lo mpn/hgcd_appr$U.lo mpn/hgcd2_jacobi$U.lo mpn/hgcd_jacobi$U.lo mpn/mullo_n$U.lo mpn/mullo_basecase$U.lo mpn/sqrlo$U.lo mpn/sqrlo_basecase$U.lo mpn/toom22_mul$U.lo mpn/toom32_mul$U.lo mpn/toom42_mul$U.lo mpn/toom52_mul$U.lo mpn/toom62_mul$U.lo mpn/toom33_mul$U.lo mpn/toom43_mul$U.lo mpn/toom53_mul$U.lo mpn/toom54_mul$U.lo mpn/toom63_mul$U.lo mpn/toom44_mul$U.lo mpn/toom6h_mul$U.lo mpn/toom6_sqr$U.lo mpn/toom8h_mul$U.lo mpn/toom8_sqr$U.lo mpn/toom_couple_handling$U.lo mpn/toom2_sqr$U.lo mpn/toom3_sqr$U.lo mpn/toom4_sqr$U.lo mpn/toom_eval_dgr3_pm1$U.lo mpn/toom_eval_dgr3_pm2$U.lo mpn/toom_eval_pm1$U.lo mpn/toom_eval_pm2$U.lo mpn/toom_eval_pm2exp$U.lo mpn/toom_eval_pm2rexp$U.lo mpn/toom_interpolate_5pts$U.lo mpn/toom_interpolate_6pts$U.lo mpn/toom_interpolate_7pts$U.lo mpn/toom_interpolate_8pts$U.lo mpn/toom_interpolate_12pts$U.lo mpn/toom_interpolate_16pts$U.lo mpn/invertappr$U.lo mpn/invert$U.lo mpn/binvert$U.lo mpn/mulmod_bnm1$U.lo mpn/sqrmod_bnm1$U.lo mpn/mulmod_bknp1$U.lo mpn/div_qr_1$U.lo mpn/div_qr_1n_pi1$U.lo mpn/div_qr_2$U.lo mpn/div_qr_2n_pi1$U.lo mpn/div_qr_2u_pi1$U.lo mpn/sbpi1_div_q$U.lo mpn/sbpi1_div_qr$U.lo mpn/sbpi1_divappr_q$U.lo mpn/dcpi1_div_q$U.lo mpn/dcpi1_div_qr$U.lo mpn/dcpi1_divappr_q$U.lo mpn/mu_div_qr$U.lo mpn/mu_divappr_q$U.lo mpn/mu_div_q$U.lo mpn/bdiv_q_1.lo mpn/sbpi1_bdiv_q$U.lo mpn/sbpi1_bdiv_qr$U.lo mpn/sbpi1_bdiv_r$U.lo mpn/dcpi1_bdiv_q$U.lo mpn/dcpi1_bdiv_qr$U.lo mpn/mu_bdiv_q$U.lo mpn/mu_bdiv_qr$U.lo mpn/bdiv_q$U.lo mpn/bdiv_qr$U.lo mpn/broot$U.lo mpn/brootinv$U.lo mpn/bsqrt$U.lo mpn/bsqrtinv$U.lo mpn/divexact$U.lo mpn/bdiv_dbm1c.lo mpn/redc_1$U.lo mpn/redc_2$U.lo mpn/redc_n$U.lo mpn/powm$U.lo mpn/powlo$U.lo mpn/sec_powm$U.lo mpn/sec_mul$U.lo mpn/sec_sqr$U.lo mpn/sec_div_qr$U.lo mpn/sec_div_r$U.lo mpn/sec_pi1_div_qr$U.lo mpn/sec_pi1_div_r$U.lo mpn/sec_add_1$U.lo mpn/sec_sub_1$U.lo mpn/sec_invert$U.lo mpn/trialdiv$U.lo mpn/remove$U.lo mpn/and_n$U.lo mpn/andn_n$U.lo mpn/nand_n$U.lo mpn/ior_n$U.lo mpn/iorn_n$U.lo mpn/nior_n$U.lo mpn/xor_n$U.lo mpn/xnor_n$U.lo mpn/copyi.lo mpn/copyd.lo mpn/zero$U.lo mpn/sec_tabselect.lo mpn/comb_tables$U.lo mpn/umul.lo mpn/udiv.lo mpn/add_n_sub_n$U.lo
+oldincludedir = /usr/include
+pdfdir = ${docdir}
+prefix = /home/dnw/Code/ERA-calc/c-src/gmp-6.3.0/bin
+program_transform_name = s,x,x,
+psdir = ${docdir}
+sbindir = ${exec_prefix}/sbin
+sharedstatedir = ${prefix}/com
+srcdir = .
+sysconfdir = ${prefix}/etc
+target_alias =
+top_build_prefix = ../
+top_builddir = ..
+top_srcdir = ..
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests
+AM_LDFLAGS = -no-install
+EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm \
+ ia64.asm powerpc.asm powerpc64.asm x86_64.asm many.pl
+
+noinst_HEADERS = speed.h
+#STATIC =
+
+# Prefer -static on the speed and tune programs, since that can avoid
+# overheads of shared library linkages on some systems. Libtool tends to
+# botch -static if configured with --disable-static, perhaps reasonably
+# enough. In any event under --disable-static the only choice is a dynamic
+# link so there's no point in -static.
+#
+STATIC = -static
+EXTRA_LTLIBRARIES = libspeed.la
+libspeed_la_SOURCES = \
+ common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
+ div_qr_1n_pi1_1.c div_qr_1n_pi1_2.c div_qr_1n_pi1_3.c \
+ div_qr_1n_pi1_4.c div_qr_1_tune.c \
+ freq.c \
+ gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
+ hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
+ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
+ hgcd2-1.c hgcd2-2.c hgcd2-3.c hgcd2-4.c hgcd2-5.c \
+ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
+ noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
+ set_strb.c set_strs.c set_strp.c time.c
+
+libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
+ $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)
+libspeed_la_LDFLAGS = $(STATIC)
+DEPENDENCIES = libspeed.la
+LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
+speed_SOURCES = speed.c
+speed_LDFLAGS = $(STATIC)
+speed_dynamic_SOURCES = speed.c
+speed_ext_SOURCES = speed-ext.c
+speed_ext_LDFLAGS = $(STATIC)
+tuneup_SOURCES = tuneup.c hgcd2.c
+nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
+tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
+tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
+tuneup_LDFLAGS = $(STATIC)
+tune_gcd_p_SOURCES = tune-gcd-p.c
+tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
+tune_gcd_p_LDFLAGS = $(STATIC)
+
+# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
+CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
+ $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
+ stg.gnuplot stg.data \
+ mtg.gnuplot mtg.data \
+ fibg.gnuplot fibg.data \
+ graph.gnuplot graph.data \
+ $(MANY_CLEAN)
+
+DISTCLEANFILES = sqr_basecase.c $(MANY_DISTCLEAN)
+
+# Generating these little files at build time seems better than including
+# them in the distribution, since the list can be changed more easily.
+#
+# mpn/generic/tdiv_qr.c uses mpn_divrem_1 and mpn_divrem_2, but only for 1
+# and 2 limb divisors, which are never used during tuning, so it doesn't
+# matter whether it picks up a tuned or untuned version of those.
+#
+# divrem_1 and mod_1 are recompiled renamed to "_tune" to avoid a linking
+# problem. If a native divrem_1 provides an mpn_divrem_1c entrypoint then
+# common.c will want that, but the generic divrem_1 doesn't provide it,
+# likewise for mod_1. The simplest way around this is to have the tune
+# build versions renamed suitably.
+#
+# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the
+# recompiled object will be rebuilt if that file changes.
+TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
+TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
+ dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
+ invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
+ get_str.c set_str.c matrix22_mul.c \
+ hgcd.c hgcd_appr.c hgcd_reduce.c \
+ mul_n.c sqr.c sec_powm.c \
+ mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
+ mulmid.c mulmid_n.c toom42_mulmid.c sqrlo.c sqrlo_basecase.c \
+ nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
+ toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+
+
+# COMPILE minus CC.
+#
+COMPILE_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $(ASMFLAGS)
+
+
+# Flags used for preprocessing (in ansi2knr rules).
+#
+PREPROCESS_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS)
+
+
+# Recent versions of automake (1.5 and up for instance) append automake
+# generated suffixes to this $(SUFFIXES) list. This is essential for us,
+# since .c must come after .s, .S and .asm. If .c is before .s, for
+# instance, then in the mpn directory "make" will see add_n.c mentioned in
+# an explicit rule (the ansi2knr stuff) and decide it must have add_n.c,
+# even if add_n.c doesn't exist but add_n.s does. See GNU make
+# documentation "(make)Implicit Rule Search", part 5c.
+#
+# On IRIX 6 native make this doesn't work properly though. Somehow .c
+# remains ahead of .s, perhaps because .c.s is a builtin rule. .asm works
+# fine though, and mpn/mips3 uses this.
+#
+SUFFIXES = .s .S .asm
+
+# can be overridden during development, eg. "make RM_TMP=: mul_1.lo"
+RM_TMP = rm -f
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .s .S .asm .c .lo .o .obj
+$(srcdir)/Makefile.in: # $(srcdir)/Makefile.am $(srcdir)/../mpn/Makeasm.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tune/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu --ignore-deps tune/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+$(srcdir)/../mpn/Makeasm.am $(am__empty):
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: # $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): # $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) $(EXTRA_libspeed_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(libspeed_la_LINK) $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS)
+
+speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) $(EXTRA_speed_DEPENDENCIES)
+ @rm -f speed$(EXEEXT)
+ $(AM_V_CCLD)$(speed_LINK) $(speed_OBJECTS) $(speed_LDADD) $(LIBS)
+
+speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) $(EXTRA_speed_dynamic_DEPENDENCIES)
+ @rm -f speed-dynamic$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS)
+
+speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) $(EXTRA_speed_ext_DEPENDENCIES)
+ @rm -f speed-ext$(EXEEXT)
+ $(AM_V_CCLD)$(speed_ext_LINK) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS)
+
+tune-gcd-p$(EXEEXT): $(tune_gcd_p_OBJECTS) $(tune_gcd_p_DEPENDENCIES) $(EXTRA_tune_gcd_p_DEPENDENCIES)
+ @rm -f tune-gcd-p$(EXEEXT)
+ $(AM_V_CCLD)$(tune_gcd_p_LINK) $(tune_gcd_p_OBJECTS) $(tune_gcd_p_LDADD) $(LIBS)
+
+tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) $(EXTRA_tuneup_DEPENDENCIES)
+ @rm -f tuneup$(EXEEXT)
+ $(AM_V_CCLD)$(tuneup_LINK) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+.c.o:
+ $(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+ $(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+ $(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+ -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool cscopelist-am ctags ctags-am distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+
+$(top_builddir)/tests/libtests.la:
+ cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+tune:
+ $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
+ ./tuneup
+
+allprogs: $(EXTRA_PROGRAMS)
+
+$(TUNE_MPN_SRCS_BASIC):
+ for i in $(TUNE_MPN_SRCS_BASIC); do \
+ echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \
+ echo "#include \"mpn/generic/$$i\"" >>$$i; \
+ done
+
+divrem_1.c:
+ echo "#define TUNE_PROGRAM_BUILD 1" >divrem_1.c
+ echo "#define __gmpn_divrem_1 mpn_divrem_1_tune" >>divrem_1.c
+ echo "#include \"mpn/generic/divrem_1.c\"" >>divrem_1.c
+
+mod_1.c:
+ echo "#define TUNE_PROGRAM_BUILD 1" >mod_1.c
+ echo "#define __gmpn_mod_1 mpn_mod_1_tune" >>mod_1.c
+ echo "#include \"mpn/generic/mod_1.c\"" >>mod_1.c
+
+sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
+ echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
+ echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
+
+# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
+fac_ui.c: $(top_builddir)/mpz/fac_ui.c
+ echo "#define TUNE_PROGRAM_BUILD 1" >fac_ui.c
+ echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
+ echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
+ echo "#include \"mpz/oddfac_1.c\"" >>fac_ui.c
+ echo "#include \"mpz/fac_ui.c\"" >>fac_ui.c
+
+# .s assembler, no preprocessing.
+#
+.s.o:
+ $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+.s.obj:
+ $(CCAS) $(COMPILE_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+.s.lo:
+ $(LIBTOOL) --mode=compile --tag=CC $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .S assembler, preprocessed with cpp.
+#
+# It's necessary to run $(CPP) separately, since it seems not all compilers
+# recognise .S files, in particular "cc" on HP-UX 10 and 11 doesn't (and
+# will silently do nothing if given a .S).
+#
+# For .lo we need a helper script, as described below for .asm.lo.
+#
+.S.o:
+ $(CPP) $(PREPROCESS_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(RM_TMP) tmp-$*.s
+.S.obj:
+ $(CPP) $(PREPROCESS_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(RM_TMP) tmp-$*.s
+.S.lo:
+ $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/cpp-ccas --cpp="$(CPP) $(PREPROCESS_FLAGS)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .asm assembler, preprocessed with m4.
+#
+# .o and .obj are non-PIC and just need m4 followed by a compile.
+#
+# .lo is a bit tricky. Libtool (as of version 1.5) has foo.lo as a little
+# text file, and .libs/foo.o and foo.o as the PIC and non-PIC objects,
+# respectively. It'd be asking for lots of trouble to try to create foo.lo
+# ourselves, so instead arrange to invoke libtool like a --mode=compile, but
+# with a special m4-ccas script which first m4 preprocesses, then compiles.
+# --tag=CC is necessary since foo.asm is otherwise unknown to libtool.
+#
+# Libtool adds -DPIC when building a shared object and the .asm files look
+# for that. But it should be noted that the other PIC flags are on occasion
+# important too, in particular FreeBSD 2.2.8 gas 1.92.3 requires -k before
+# it accepts PIC constructs like @GOT, and gcc adds that flag only under
+# -fPIC. (Later versions of gas are happy to accept PIC stuff any time.)
+#
+.asm.o:
+ $(M4) -DOPERATION_$* `test -f '$<' || echo '$(srcdir)/'`$< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(RM_TMP) tmp-$*.s
+.asm.obj:
+ $(M4) -DOPERATION_$* `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(RM_TMP) tmp-$*.s
+.asm.lo:
+ $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+.NOTPARALLEL:
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/gmp-6.3.0/tune/Makefile.am b/gmp-6.3.0/tune/Makefile.am
new file mode 100644
index 0000000..0f564ed
--- /dev/null
+++ b/gmp-6.3.0/tune/Makefile.am
@@ -0,0 +1,187 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2000-2003, 2005-2011 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of either:
+#
+# * the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# or
+#
+# * the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# or both in parallel, as here.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received copies of the GNU General Public License and the
+# GNU Lesser General Public License along with the GNU MP Library. If not,
+# see https://www.gnu.org/licenses/.
+
+
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests
+AM_LDFLAGS = -no-install
+
+EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm \
+ ia64.asm powerpc.asm powerpc64.asm x86_64.asm many.pl
+noinst_HEADERS = speed.h
+
+# Prefer -static on the speed and tune programs, since that can avoid
+# overheads of shared library linkages on some systems. Libtool tends to
+# botch -static if configured with --disable-static, perhaps reasonably
+# enough. In any event under --disable-static the only choice is a dynamic
+# link so there's no point in -static.
+#
+if ENABLE_STATIC
+STATIC = -static
+else
+STATIC =
+endif
+
+
+EXTRA_LTLIBRARIES = libspeed.la
+
+libspeed_la_SOURCES = \
+ common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
+ div_qr_1n_pi1_1.c div_qr_1n_pi1_2.c div_qr_1n_pi1_3.c \
+ div_qr_1n_pi1_4.c div_qr_1_tune.c \
+ freq.c \
+ gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
+ hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
+ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
+ hgcd2-1.c hgcd2-2.c hgcd2-3.c hgcd2-4.c hgcd2-5.c \
+ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
+ noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
+ set_strb.c set_strs.c set_strp.c time.c
+
+libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
+ $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)
+libspeed_la_LDFLAGS = $(STATIC)
+
+$(top_builddir)/tests/libtests.la:
+ cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+
+# The library code is faster static than shared on some systems, so do
+# tuning and measuring with static, since users who care about maximizing
+# speed will be using that. speed-dynamic exists to show the difference.
+#
+# On Solaris 8, gcc 2.95.2 -static is somehow broken (it creates executables
+# that immediately seg fault), so -all-static is not used. The only thing
+# -all-static does is make libc static linked as well as libgmp, and that
+# makes a difference only when measuring malloc and friends in the speed
+# program. This can always be forced with "make speed_LDFLAGS=-all-static
+# ..." if desired, see tune/README.
+
+EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup tune-gcd-p
+
+DEPENDENCIES = libspeed.la
+LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
+
+speed_SOURCES = speed.c
+speed_LDFLAGS = $(STATIC)
+
+speed_dynamic_SOURCES = speed.c
+
+speed_ext_SOURCES = speed-ext.c
+speed_ext_LDFLAGS = $(STATIC)
+
+tuneup_SOURCES = tuneup.c hgcd2.c
+nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
+tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
+tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
+tuneup_LDFLAGS = $(STATIC)
+
+tune_gcd_p_SOURCES = tune-gcd-p.c
+tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
+tune_gcd_p_LDFLAGS = $(STATIC)
+
+
+tune:
+ $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
+ ./tuneup
+
+allprogs: $(EXTRA_PROGRAMS)
+
+# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
+CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
+ $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
+ stg.gnuplot stg.data \
+ mtg.gnuplot mtg.data \
+ fibg.gnuplot fibg.data \
+ graph.gnuplot graph.data \
+ $(MANY_CLEAN)
+DISTCLEANFILES = sqr_basecase.c $(MANY_DISTCLEAN)
+
+
+# Generating these little files at build time seems better than including
+# them in the distribution, since the list can be changed more easily.
+#
+# mpn/generic/tdiv_qr.c uses mpn_divrem_1 and mpn_divrem_2, but only for 1
+# and 2 limb divisors, which are never used during tuning, so it doesn't
+# matter whether it picks up a tuned or untuned version of those.
+#
+# divrem_1 and mod_1 are recompiled renamed to "_tune" to avoid a linking
+# problem. If a native divrem_1 provides an mpn_divrem_1c entrypoint then
+# common.c will want that, but the generic divrem_1 doesn't provide it,
+# likewise for mod_1. The simplest way around this is to have the tune
+# build versions renamed suitably.
+#
+# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the
+# recompiled object will be rebuilt if that file changes.
+
+TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
+TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
+ dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
+ invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
+ get_str.c set_str.c matrix22_mul.c \
+ hgcd.c hgcd_appr.c hgcd_reduce.c \
+ mul_n.c sqr.c sec_powm.c \
+ mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
+ mulmid.c mulmid_n.c toom42_mulmid.c sqrlo.c sqrlo_basecase.c \
+ nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
+ toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+
+$(TUNE_MPN_SRCS_BASIC):
+ for i in $(TUNE_MPN_SRCS_BASIC); do \
+ echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \
+ echo "#include \"mpn/generic/$$i\"" >>$$i; \
+ done
+
+divrem_1.c:
+ echo "#define TUNE_PROGRAM_BUILD 1" >divrem_1.c
+ echo "#define __gmpn_divrem_1 mpn_divrem_1_tune" >>divrem_1.c
+ echo "#include \"mpn/generic/divrem_1.c\"" >>divrem_1.c
+
+mod_1.c:
+ echo "#define TUNE_PROGRAM_BUILD 1" >mod_1.c
+ echo "#define __gmpn_mod_1 mpn_mod_1_tune" >>mod_1.c
+ echo "#include \"mpn/generic/mod_1.c\"" >>mod_1.c
+
+sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
+ echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
+ echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
+
+# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
+fac_ui.c: $(top_builddir)/mpz/fac_ui.c
+ echo "#define TUNE_PROGRAM_BUILD 1" >fac_ui.c
+ echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
+ echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
+ echo "#include \"mpz/oddfac_1.c\"" >>fac_ui.c
+ echo "#include \"mpz/fac_ui.c\"" >>fac_ui.c
+
+include ../mpn/Makeasm.am
+
+.NOTPARALLEL:
+
diff --git a/gmp-6.3.0/tune/Makefile.in b/gmp-6.3.0/tune/Makefile.in
new file mode 100644
index 0000000..7db531a
--- /dev/null
+++ b/gmp-6.3.0/tune/Makefile.in
@@ -0,0 +1,957 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2000-2003, 2005-2011 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of either:
+#
+# * the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# or
+#
+# * the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# or both in parallel, as here.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received copies of the GNU General Public License and the
+# GNU Lesser General Public License along with the GNU MP Library. If not,
+# see https://www.gnu.org/licenses/.
+
+# Copyright 1996, 1998-2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of either:
+#
+# * the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# or
+#
+# * the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# or both in parallel, as here.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received copies of the GNU General Public License and the
+# GNU Lesser General Public License along with the GNU MP Library. If not,
+# see https://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+EXTRA_PROGRAMS = speed$(EXEEXT) speed-dynamic$(EXEEXT) \
+ speed-ext$(EXEEXT) tuneup$(EXEEXT) tune-gcd-p$(EXEEXT)
+subdir = tune
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \
+ $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__DEPENDENCIES_1 =
+am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) \
+ $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+am_libspeed_la_OBJECTS = common.lo divrem1div.lo divrem1inv.lo \
+ divrem2div.lo divrem2inv.lo div_qr_1n_pi1_1.lo \
+ div_qr_1n_pi1_2.lo div_qr_1n_pi1_3.lo div_qr_1n_pi1_4.lo \
+ div_qr_1_tune.lo freq.lo gcdext_single.lo gcdext_double.lo \
+ gcdextod.lo gcdextos.lo hgcd_lehmer.lo hgcd_appr_lehmer.lo \
+ hgcd_reduce_1.lo hgcd_reduce_2.lo jacbase1.lo jacbase2.lo \
+ jacbase3.lo jacbase4.lo hgcd2-1.lo hgcd2-2.lo hgcd2-3.lo \
+ hgcd2-4.lo hgcd2-5.lo mod_1_div.lo mod_1_inv.lo mod_1_1-1.lo \
+ mod_1_1-2.lo modlinv.lo noop.lo powm_mod.lo powm_redc.lo \
+ pre_divrem_1.lo set_strb.lo set_strs.lo set_strp.lo time.lo
+libspeed_la_OBJECTS = $(am_libspeed_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+libspeed_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libspeed_la_LDFLAGS) $(LDFLAGS) -o $@
+am_speed_OBJECTS = speed.$(OBJEXT)
+speed_OBJECTS = $(am_speed_OBJECTS)
+speed_LDADD = $(LDADD)
+speed_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
+speed_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(speed_LDFLAGS) $(LDFLAGS) -o $@
+am_speed_dynamic_OBJECTS = speed.$(OBJEXT)
+speed_dynamic_OBJECTS = $(am_speed_dynamic_OBJECTS)
+speed_dynamic_LDADD = $(LDADD)
+speed_dynamic_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
+am_speed_ext_OBJECTS = speed-ext.$(OBJEXT)
+speed_ext_OBJECTS = $(am_speed_ext_OBJECTS)
+speed_ext_LDADD = $(LDADD)
+speed_ext_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
+speed_ext_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(speed_ext_LDFLAGS) $(LDFLAGS) -o $@
+am_tune_gcd_p_OBJECTS = tune-gcd-p.$(OBJEXT)
+tune_gcd_p_OBJECTS = $(am_tune_gcd_p_OBJECTS)
+tune_gcd_p_LDADD = $(LDADD)
+tune_gcd_p_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(tune_gcd_p_LDFLAGS) $(LDFLAGS) -o $@
+am_tuneup_OBJECTS = tuneup.$(OBJEXT) hgcd2.$(OBJEXT)
+am__objects_1 = div_qr_2.$(OBJEXT) bdiv_q.$(OBJEXT) bdiv_qr.$(OBJEXT) \
+ dcpi1_div_qr.$(OBJEXT) dcpi1_divappr_q.$(OBJEXT) \
+ dcpi1_bdiv_qr.$(OBJEXT) dcpi1_bdiv_q.$(OBJEXT) \
+ invertappr.$(OBJEXT) invert.$(OBJEXT) binvert.$(OBJEXT) \
+ divrem_2.$(OBJEXT) gcd.$(OBJEXT) gcdext.$(OBJEXT) \
+ get_str.$(OBJEXT) set_str.$(OBJEXT) matrix22_mul.$(OBJEXT) \
+ hgcd.$(OBJEXT) hgcd_appr.$(OBJEXT) hgcd_reduce.$(OBJEXT) \
+ mul_n.$(OBJEXT) sqr.$(OBJEXT) sec_powm.$(OBJEXT) \
+ mullo_n.$(OBJEXT) mul_fft.$(OBJEXT) mul.$(OBJEXT) \
+ tdiv_qr.$(OBJEXT) mulmod_bnm1.$(OBJEXT) sqrmod_bnm1.$(OBJEXT) \
+ mulmid.$(OBJEXT) mulmid_n.$(OBJEXT) toom42_mulmid.$(OBJEXT) \
+ sqrlo.$(OBJEXT) sqrlo_basecase.$(OBJEXT) \
+ nussbaumer_mul.$(OBJEXT) toom6h_mul.$(OBJEXT) \
+ toom8h_mul.$(OBJEXT) toom6_sqr.$(OBJEXT) toom8_sqr.$(OBJEXT) \
+ toom22_mul.$(OBJEXT) toom2_sqr.$(OBJEXT) toom33_mul.$(OBJEXT) \
+ toom3_sqr.$(OBJEXT) toom44_mul.$(OBJEXT) toom4_sqr.$(OBJEXT)
+am__objects_2 = $(am__objects_1) divrem_1.$(OBJEXT) mod_1.$(OBJEXT)
+nodist_tuneup_OBJECTS = sqr_basecase.$(OBJEXT) fac_ui.$(OBJEXT) \
+ $(am__objects_2)
+tuneup_OBJECTS = $(am_tuneup_OBJECTS) $(nodist_tuneup_OBJECTS)
+am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) libspeed.la
+tuneup_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(tuneup_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
+ $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
+ $(tune_gcd_p_SOURCES) $(tuneup_SOURCES) \
+ $(nodist_tuneup_SOURCES)
+DIST_SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
+ $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
+ $(tune_gcd_p_SOURCES) $(tuneup_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+HEADERS = $(noinst_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/../mpn/Makeasm.am $(srcdir)/Makefile.in \
+ README
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/tests
+AM_LDFLAGS = -no-install
+EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm \
+ ia64.asm powerpc.asm powerpc64.asm x86_64.asm many.pl
+
+noinst_HEADERS = speed.h
+@ENABLE_STATIC_FALSE@STATIC =
+
+# Prefer -static on the speed and tune programs, since that can avoid
+# overheads of shared library linkages on some systems. Libtool tends to
+# botch -static if configured with --disable-static, perhaps reasonably
+# enough. In any event under --disable-static the only choice is a dynamic
+# link so there's no point in -static.
+#
+@ENABLE_STATIC_TRUE@STATIC = -static
+EXTRA_LTLIBRARIES = libspeed.la
+libspeed_la_SOURCES = \
+ common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
+ div_qr_1n_pi1_1.c div_qr_1n_pi1_2.c div_qr_1n_pi1_3.c \
+ div_qr_1n_pi1_4.c div_qr_1_tune.c \
+ freq.c \
+ gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
+ hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
+ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
+ hgcd2-1.c hgcd2-2.c hgcd2-3.c hgcd2-4.c hgcd2-5.c \
+ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
+ noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
+ set_strb.c set_strs.c set_strp.c time.c
+
+libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
+ $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
+
+libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)
+libspeed_la_LDFLAGS = $(STATIC)
+DEPENDENCIES = libspeed.la
+LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
+speed_SOURCES = speed.c
+speed_LDFLAGS = $(STATIC)
+speed_dynamic_SOURCES = speed.c
+speed_ext_SOURCES = speed-ext.c
+speed_ext_LDFLAGS = $(STATIC)
+tuneup_SOURCES = tuneup.c hgcd2.c
+nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
+tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
+tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
+tuneup_LDFLAGS = $(STATIC)
+tune_gcd_p_SOURCES = tune-gcd-p.c
+tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
+tune_gcd_p_LDFLAGS = $(STATIC)
+
+# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
+CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
+ $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
+ stg.gnuplot stg.data \
+ mtg.gnuplot mtg.data \
+ fibg.gnuplot fibg.data \
+ graph.gnuplot graph.data \
+ $(MANY_CLEAN)
+
+DISTCLEANFILES = sqr_basecase.c $(MANY_DISTCLEAN)
+
+# Generating these little files at build time seems better than including
+# them in the distribution, since the list can be changed more easily.
+#
+# mpn/generic/tdiv_qr.c uses mpn_divrem_1 and mpn_divrem_2, but only for 1
+# and 2 limb divisors, which are never used during tuning, so it doesn't
+# matter whether it picks up a tuned or untuned version of those.
+#
+# divrem_1 and mod_1 are recompiled renamed to "_tune" to avoid a linking
+# problem. If a native divrem_1 provides an mpn_divrem_1c entrypoint then
+# common.c will want that, but the generic divrem_1 doesn't provide it,
+# likewise for mod_1. The simplest way around this is to have the tune
+# build versions renamed suitably.
+#
+# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the
+# recompiled object will be rebuilt if that file changes.
+TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
+TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
+ dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
+ invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
+ get_str.c set_str.c matrix22_mul.c \
+ hgcd.c hgcd_appr.c hgcd_reduce.c \
+ mul_n.c sqr.c sec_powm.c \
+ mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
+ mulmid.c mulmid_n.c toom42_mulmid.c sqrlo.c sqrlo_basecase.c \
+ nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
+ toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+
+
+# COMPILE minus CC.
+#
+COMPILE_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) $(ASMFLAGS)
+
+
+# Flags used for preprocessing (in ansi2knr rules).
+#
+PREPROCESS_FLAGS = $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS)
+
+
+# Recent versions of automake (1.5 and up for instance) append automake
+# generated suffixes to this $(SUFFIXES) list. This is essential for us,
+# since .c must come after .s, .S and .asm. If .c is before .s, for
+# instance, then in the mpn directory "make" will see add_n.c mentioned in
+# an explicit rule (the ansi2knr stuff) and decide it must have add_n.c,
+# even if add_n.c doesn't exist but add_n.s does. See GNU make
+# documentation "(make)Implicit Rule Search", part 5c.
+#
+# On IRIX 6 native make this doesn't work properly though. Somehow .c
+# remains ahead of .s, perhaps because .c.s is a builtin rule. .asm works
+# fine though, and mpn/mips3 uses this.
+#
+SUFFIXES = .s .S .asm
+
+# can be overridden during development, eg. "make RM_TMP=: mul_1.lo"
+RM_TMP = rm -f
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .s .S .asm .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/../mpn/Makeasm.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tune/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu --ignore-deps tune/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+$(srcdir)/../mpn/Makeasm.am $(am__empty):
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) $(EXTRA_libspeed_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(libspeed_la_LINK) $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS)
+
+speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) $(EXTRA_speed_DEPENDENCIES)
+ @rm -f speed$(EXEEXT)
+ $(AM_V_CCLD)$(speed_LINK) $(speed_OBJECTS) $(speed_LDADD) $(LIBS)
+
+speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) $(EXTRA_speed_dynamic_DEPENDENCIES)
+ @rm -f speed-dynamic$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS)
+
+speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) $(EXTRA_speed_ext_DEPENDENCIES)
+ @rm -f speed-ext$(EXEEXT)
+ $(AM_V_CCLD)$(speed_ext_LINK) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS)
+
+tune-gcd-p$(EXEEXT): $(tune_gcd_p_OBJECTS) $(tune_gcd_p_DEPENDENCIES) $(EXTRA_tune_gcd_p_DEPENDENCIES)
+ @rm -f tune-gcd-p$(EXEEXT)
+ $(AM_V_CCLD)$(tune_gcd_p_LINK) $(tune_gcd_p_OBJECTS) $(tune_gcd_p_LDADD) $(LIBS)
+
+tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) $(EXTRA_tuneup_DEPENDENCIES)
+ @rm -f tuneup$(EXEEXT)
+ $(AM_V_CCLD)$(tuneup_LINK) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+.c.o:
+ $(AM_V_CC)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+ $(AM_V_CC)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+ $(AM_V_CC)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+ -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool cscopelist-am ctags ctags-am distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+
+$(top_builddir)/tests/libtests.la:
+ cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+
+tune:
+ $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
+ ./tuneup
+
+allprogs: $(EXTRA_PROGRAMS)
+
+$(TUNE_MPN_SRCS_BASIC):
+ for i in $(TUNE_MPN_SRCS_BASIC); do \
+ echo "#define TUNE_PROGRAM_BUILD 1" >$$i; \
+ echo "#include \"mpn/generic/$$i\"" >>$$i; \
+ done
+
+divrem_1.c:
+ echo "#define TUNE_PROGRAM_BUILD 1" >divrem_1.c
+ echo "#define __gmpn_divrem_1 mpn_divrem_1_tune" >>divrem_1.c
+ echo "#include \"mpn/generic/divrem_1.c\"" >>divrem_1.c
+
+mod_1.c:
+ echo "#define TUNE_PROGRAM_BUILD 1" >mod_1.c
+ echo "#define __gmpn_mod_1 mpn_mod_1_tune" >>mod_1.c
+ echo "#include \"mpn/generic/mod_1.c\"" >>mod_1.c
+
+sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
+ echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
+ echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
+
+# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
+fac_ui.c: $(top_builddir)/mpz/fac_ui.c
+ echo "#define TUNE_PROGRAM_BUILD 1" >fac_ui.c
+ echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
+ echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
+ echo "#include \"mpz/oddfac_1.c\"" >>fac_ui.c
+ echo "#include \"mpz/fac_ui.c\"" >>fac_ui.c
+
+# .s assembler, no preprocessing.
+#
+.s.o:
+ $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+.s.obj:
+ $(CCAS) $(COMPILE_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi`
+.s.lo:
+ $(LIBTOOL) --mode=compile --tag=CC $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .S assembler, preprocessed with cpp.
+#
+# It's necessary to run $(CPP) separately, since it seems not all compilers
+# recognise .S files, in particular "cc" on HP-UX 10 and 11 doesn't (and
+# will silently do nothing if given a .S).
+#
+# For .lo we need a helper script, as described below for .asm.lo.
+#
+.S.o:
+ $(CPP) $(PREPROCESS_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(RM_TMP) tmp-$*.s
+.S.obj:
+ $(CPP) $(PREPROCESS_FLAGS) `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(RM_TMP) tmp-$*.s
+.S.lo:
+ $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/cpp-ccas --cpp="$(CPP) $(PREPROCESS_FLAGS)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+# .asm assembler, preprocessed with m4.
+#
+# .o and .obj are non-PIC and just need m4 followed by a compile.
+#
+# .lo is a bit tricky. Libtool (as of version 1.5) has foo.lo as a little
+# text file, and .libs/foo.o and foo.o as the PIC and non-PIC objects,
+# respectively. It'd be asking for lots of trouble to try to create foo.lo
+# ourselves, so instead arrange to invoke libtool like a --mode=compile, but
+# with a special m4-ccas script which first m4 preprocesses, then compiles.
+# --tag=CC is necessary since foo.asm is otherwise unknown to libtool.
+#
+# Libtool adds -DPIC when building a shared object and the .asm files look
+# for that. But it should be noted that the other PIC flags are on occasion
+# important too, in particular FreeBSD 2.2.8 gas 1.92.3 requires -k before
+# it accepts PIC constructs like @GOT, and gcc adds that flag only under
+# -fPIC. (Later versions of gas are happy to accept PIC stuff any time.)
+#
+.asm.o:
+ $(M4) -DOPERATION_$* `test -f '$<' || echo '$(srcdir)/'`$< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(RM_TMP) tmp-$*.s
+.asm.obj:
+ $(M4) -DOPERATION_$* `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(RM_TMP) tmp-$*.s
+.asm.lo:
+ $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+
+.NOTPARALLEL:
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/gmp-6.3.0/tune/README b/gmp-6.3.0/tune/README
new file mode 100644
index 0000000..f76407f
--- /dev/null
+++ b/gmp-6.3.0/tune/README
@@ -0,0 +1,501 @@
+Copyright 2000-2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/.
+
+
+
+
+
+ GMP SPEED MEASURING AND PARAMETER TUNING
+
+
+The programs in this directory are for knowledgeable users who want to
+measure GMP routines on their machine, and perhaps tweak some settings or
+identify things that can be improved.
+
+The programs here are tools, not ready to run solutions. Nothing is built
+in a normal "make all", but various Makefile targets described below exist.
+
+Relatively few systems and CPUs have been tested, so be sure to verify that
+results are sensible before relying on them.
+
+
+
+
+MISCELLANEOUS NOTES
+
+--enable-assert
+
+ Don't configure with --enable-assert, since the extra code added by
+ assertion checking may influence measurements.
+
+Direct mapped caches
+
+ Some effort has been made to accommodate CPUs with direct mapped caches,
+ by putting data blocks more or less contiguously on the stack. But this
+ will depend on TMP_ALLOC using alloca, and even then it may or may not
+ be enough.
+
+FreeBSD 4.2 i486 getrusage
+
+ This getrusage seems to be a bit doubtful, it looks like it's
+ microsecond accurate, but sometimes ru_utime remains unchanged after a
+ time of many microseconds has elapsed. It'd be good to detect this in
+ the time.c initializations, but for now the suggestion is to pretend it
+ doesn't exist.
+
+ ./configure ac_cv_func_getrusage=no
+
+NetBSD 1.4.1 m68k macintosh time base
+
+ On this system it's been found getrusage often goes backwards, making it
+ unusable (time.c getrusage_backwards_p detects this). gettimeofday
+ sometimes doesn't update atomically when it crosses a 1 second boundary.
+ Not sure what to do about this. Expect possible intermittent failures.
+
+SCO OpenUNIX 8 /etc/hw
+
+ /etc/hw takes about a second to return the cpu frequency, which suggests
+ perhaps it's measuring each time it runs. If this is annoying when
+ running the speed program repeatedly then set a GMP_CPU_FREQUENCY
+ environment variable (see TIME BASE section below).
+
+Timing on GNU/Linux
+
+ On Linux, timing currently uses the cycle counter. This is unreliable,
+ since the counter is not saved and restored at context switches (unlike
+ FreeBSD and Solaris where the cycle counter is "virtualized").
+
+ Using the clock_gettime method with CLOCK_PROCESS_CPUTIME_ID (posix) or
+ CLOCK_VIRTUAL (BSD) should be more reliable. To get clock_gettime
+ with glibc, one has to link with -lrt (which also drags in the pthreads
+ threading library). configure.in must be hacked to detect this and
+ arrange proper linking. Something like
+
+ old_LIBS="$LIBS"
+ AC_SEARCH_LIBS(clock_gettime, rt, [AC_DEFINE(HAVE_CLOCK_GETTIME)])
+ TUNE_LIBS="$LIBS"
+ LIBS="$old_LIBS"
+
+ AC_SUBST(TUNE_LIBS)
+
+ might work.
+
+Low resolution timebase
+
+ Parameter tuning can be very time consuming if the only timebase
+ available is a 10 millisecond clock tick, to the point of being
+ unusable. This is currently the case on VAX and ARM systems.
+
+
+
+
+PARAMETER TUNING
+
+The "tuneup" program runs some tests designed to find the best settings for
+various thresholds, like MUL_TOOM22_THRESHOLD. Its output can be put
+into gmp-mparam.h. The program is built and run with
+
+ make tune
+
+If the thresholds indicated are grossly different from the values in the
+selected gmp-mparam.h then there may be a performance boost in applicable
+size ranges by changing gmp-mparam.h accordingly.
+
+Be sure to do a full reconfigure and rebuild to get any newly set thresholds
+to take effect. A partial rebuild is enough sometimes, but a fresh
+configure and make is certain to be correct.
+
+If a CPU has specific tuned parameters coming from a gmp-mparam.h in one of
+the mpn subdirectories then the values from "make tune" should be similar.
+But check that the configured CPU is right and there are no machine specific
+effects causing a difference.
+
+It's hoped the compiler and options used won't have too much effect on
+thresholds, since for most CPUs they ultimately come down to comparisons
+between assembler subroutines. Missing out on the longlong.h macros by not
+using gcc will probably have an effect.
+
+Some thresholds produced by the tune program are merely single values chosen
+from what's a range of sizes where two algorithms are pretty much the same
+speed. When this happens the program is likely to give somewhat different
+values on successive runs. This is noticeable on the toom3 thresholds for
+instance.
+
+
+
+
+SPEED PROGRAM
+
+The "speed" program can be used for measuring and comparing various
+routines, and producing tables of data or gnuplot graphs. Compile it with
+
+ make speed
+
+(Or on DOS systems "make speed.exe".)
+
+Here are some examples of how to use it. Check the code for all the
+options.
+
+Draw a graph of mpn_mul_n, stepping through sizes by 10 or a factor of 1.05
+(whichever is greater).
+
+ ./speed -s 10-5000 -t 10 -f 1.05 -P foo mpn_mul_n
+ gnuplot foo.gnuplot
+
+Compare mpn_add_n and an mpn_lshift by 1, showing times in cycles and
+showing under mpn_lshift the difference between it and mpn_add_n.
+
+ ./speed -s 1-40 -c -d mpn_add_n mpn_lshift.1
+
+Using option -c for times in cycles is interesting but normally only
+necessary when looking carefully at assembler subroutines. You might think
+it would always give an integer value, but this doesn't happen in practice,
+probably due to overheads in the time measurements.
+
+In the free-form output the "#" symbol against a measurement means the
+corresponding routine is fastest at that size. This is a convenient visual
+cue when comparing different routines. The graph data files <name>.data
+don't get this since it would upset gnuplot or other data viewers.
+
+
+
+
+TIME BASE
+
+The time measuring method is determined in time.c, based on what the
+configured host has available. A cycle counter is preferred, possibly
+supplemented by another method if the counter has a limited range. A
+microsecond accurate getrusage() or gettimeofday() will work quite well too.
+
+The cycle counters (except possibly on alpha) and gettimeofday() will depend
+on the machine being otherwise idle, or rather on other jobs not stealing
+CPU time from the measuring program. Short routines (those that complete
+within a timeslice) should work even on a busy machine.
+
+Some trouble is taken by speed_measure() in common.c to avoid ill effects
+from sporadic interrupts, or other intermittent things (like cron waking up
+every minute). But generally an idle machine will be necessary to be
+certain of consistent results.
+
+The CPU frequency is needed to convert between cycles and seconds, or for
+when a cycle counter is supplemented by getrusage() etc. The speed program
+will convert as necessary according to the output format requested. The
+tune program will work with either cycles or seconds.
+
+freq.c knows how to get the frequency on some systems, or can measure a
+cycle counter against gettimeofday() or getrusage(), but when that fails, or
+needs to be overridden, an environment variable GMP_CPU_FREQUENCY can be
+used (in Hertz). For example in "bash" on a 650 MHz machine,
+
+ export GMP_CPU_FREQUENCY=650e6
+
+A high precision time base makes it possible to get accurate measurements in
+a shorter time.
+
+
+
+
+EXAMPLE COMPARISONS - VARIOUS
+
+Here are some ideas for things that can be done with the speed program.
+
+There's always going to be a certain amount of overhead in the time
+measurements, due to reading the time base, and in the loop that runs a
+routine enough times to get a reading of the desired precision. Noop
+functions taking various arguments are available to measure this. The
+"overhead" printed by the speed program each time in its intro is the "noop"
+routine, but note that this is just for information, it isn't deducted from
+the times printed or anything.
+
+ ./speed -s 1 noop noop_wxs noop_wxys
+
+To see how many cycles per limb a routine is taking, look at the time
+increase when the size increments, using option -D. This avoids fixed
+overheads in the measuring. Also, remember many of the assembler routines
+have unrolled loops, so it might be necessary to compare times at, say, 16,
+32, 48, 64 etc to see what the unrolled part is taking, as opposed to any
+finishing off.
+
+ ./speed -s 16-64 -t 16 -C -D mpn_add_n
+
+The -C option on its own gives cycles per limb, but is really only useful at
+big sizes where fixed overheads are small compared to the code doing the
+real work. Remember of course memory caching and/or page swapping will
+affect results at large sizes.
+
+ ./speed -s 500000 -C mpn_add_n
+
+Once a calculation stops fitting in the CPU data cache, it's going to start
+taking longer. Exactly where this happens depends on the cache priming in
+the measuring routines, and on what sort of "least recently used" the
+hardware does. Here's an example for a CPU with a 16kbyte L1 data cache and
+32-bit limb, showing a suddenly steeper curve for mpn_add_n at about 2000
+limbs.
+
+ ./speed -s 1-4000 -t 5 -f 1.02 -P foo mpn_add_n
+ gnuplot foo.gnuplot
+
+When a routine has an unrolled loop for, say, multiples of 8 limbs and then
+an ordinary loop for the remainder, it can happen that it's actually faster
+to do an operation on, say, 8 limbs than it is on 7 limbs. The following
+draws a graph of mpn_sub_n, to see whether times smoothly increase with
+size.
+
+ ./speed -s 1-100 -c -P foo mpn_sub_n
+ gnuplot foo.gnuplot
+
+If mpn_lshift and mpn_rshift have special case code for shifts by 1, it
+ought to be faster (or at least not slower) than shifting by, say, 2 bits.
+
+ ./speed -s 1-200 -c mpn_rshift.1 mpn_rshift.2
+
+An mpn_lshift by 1 can be done by mpn_add_n adding a number to itself, and
+if the lshift isn't faster there's an obvious improvement that's possible.
+
+ ./speed -s 1-200 -c mpn_lshift.1 mpn_add_n_self
+
+On some CPUs (AMD K6 for example) an "in-place" mpn_add_n where the
+destination is one of the sources is faster than a separate destination.
+Here's an example to see this. ".1" selects dst==src1 for mpn_add_n (and
+mpn_sub_n), for other values see speed.h SPEED_ROUTINE_MPN_BINARY_N_CALL.
+
+ ./speed -s 1-200 -c mpn_add_n mpn_add_n.1
+
+The gmp manual points out that divisions by powers of two should be done
+using a right shift because it'll be significantly faster than an actual
+division. The following shows by what factor mpn_rshift is faster than
+mpn_divrem_1, using division by 32 as an example.
+
+ ./speed -s 10-20 -r mpn_rshift.5 mpn_divrem_1.32
+
+
+
+
+EXAMPLE COMPARISONS - MULTIPLICATION
+
+mul_basecase takes a ".<r>" parameter. If positive, it gives the second
+(smaller) operand size. For example to show speeds for 3x3 up to 20x3 in
+cycles,
+
+ ./speed -s 3-20 -c mpn_mul_basecase.3
+
+A negative ".<-r>" parameter fixes the size of the product to the absolute
+value r. For example to show speeds for 10x10 up to 19x1 in cycles,
+
+ ./speed -s 10-19 -c mpn_mul_basecase.-20
+
+mul_basecase with no parameter does an NxN multiply, so for example to show
+speeds in cycles for 1x1, 2x2, 3x3, etc, up to 20x20, in cycles,
+
+ ./speed -s 1-20 -c mpn_mul_basecase
+
+sqr_basecase is implemented by a "triangular" method on most CPUs, making it
+up to twice as fast as mul_basecase. In practice loop overheads and the
+products on the diagonal mean it falls short of this. Here's an example
+running the two and showing by what factor an NxN mul_basecase is slower
+than an NxN sqr_basecase. (Some versions of sqr_basecase only allow sizes
+below SQR_TOOM2_THRESHOLD, so if it crashes at that point don't worry.)
+
+ ./speed -s 1-20 -r mpn_sqr_basecase mpn_mul_basecase
+
+The technique described above with -CD for showing the time difference in
+cycles per limb between two size operations can be done on an NxN
+mul_basecase using -E to change the basis for the size increment to N*N.
+For instance a 20x20 operation is taken to be doing 400 limbs, and a 16x16
+doing 256 limbs. The following therefore shows the per crossproduct speed
+of mul_basecase and sqr_basecase at around 20x20 limbs.
+
+ ./speed -s 16-20 -t 4 -CDE mpn_mul_basecase mpn_sqr_basecase
+
+Of course sqr_basecase isn't really doing NxN crossproducts, but it can be
+interesting to compare it to mul_basecase as if it was. For sqr_basecase
+the -F option can be used to base the deltas on N*(N+1)/2 operations, which
+is the triangular products sqr_basecase does. For example,
+
+ ./speed -s 16-20 -t 4 -CDF mpn_sqr_basecase
+
+Both -E and -F are preliminary and might change. A consistent approach to
+using them when claiming certain per crossproduct or per triangularproduct
+speeds hasn't really been established, but the increment between speeds in
+the range karatsuba will call seems sensible, that being k to k/2. For
+instance, if the karatsuba threshold was 20 for the multiply and 30 for the
+square,
+
+ ./speed -s 10-20 -t 10 -CDE mpn_mul_basecase
+ ./speed -s 15-30 -t 15 -CDF mpn_sqr_basecase
+
+
+
+EXAMPLE COMPARISONS - MALLOC
+
+The gmp manual recommends application programs avoid excessive initializing
+and clearing of mpz_t variables (and mpq_t and mpf_t too). Every new
+variable will at a minimum go through an init, a realloc for its first
+store, and finally a clear. Quite how long that takes depends on the C
+library. The following compares an mpz_init/realloc/clear to a 10 limb
+mpz_add. Don't be surprised if the mallocing is quite slow.
+
+ ./speed -s 10 -c mpz_init_realloc_clear mpz_add
+
+On some systems malloc and free are much slower when dynamic linked. The
+speed-dynamic program can be used to see this. For example the following
+measures malloc/free, first static then dynamic.
+
+ ./speed -s 10 -c malloc_free
+ ./speed-dynamic -s 10 -c malloc_free
+
+Of course a real world program has big problems if it's doing so many
+mallocs and frees that it gets slowed down by a dynamic linked malloc.
+
+
+
+
+
+EXAMPLE COMPARISONS - STRING CONVERSIONS
+
+mpn_get_str does a binary to string conversion. The base is specified with
+a ".<r>" parameter, or decimal by default. Power of 2 bases are much faster
+than general bases. The following compares decimal and hex for instance.
+
+ ./speed -s 1-20 -c mpn_get_str mpn_get_str.16
+
+Smaller bases need more divisions to split a given size number, and so are
+slower. The following compares base 3 and base 9. On small operands 9 will
+be nearly twice as fast, though at bigger sizes this reduces since in the
+current implementation both divide repeatedly by 3^20 (or 3^40 for 64 bit
+limbs) and those divisions come to dominate.
+
+ ./speed -s 1-20 -cr mpn_get_str.3 mpn_get_str.9
+
+mpn_set_str does a string to binary conversion. The base is specified with
+a ".<r>" parameter, or decimal by default. Power of 2 bases are faster than
+general bases on large conversions.
+
+ ./speed -s 1-512 -f 2 -c mpn_set_str.8 mpn_set_str.10
+
+mpn_set_str also has some special case code for decimal which is a bit
+faster than the general case, basically by giving the compiler a chance to
+optimize some multiplications by 10.
+
+ ./speed -s 20-40 -c mpn_set_str.9 mpn_set_str.10 mpn_set_str.11
+
+
+
+
+EXAMPLE COMPARISONS - GCDs
+
+mpn_gcd_1 has a threshold for when to reduce using an initial x%y when both
+x and y are single limbs. This isn't tuned currently, but a value can be
+established by a measurement like
+
+ ./speed -s 10-32 mpn_gcd_1.10
+
+This runs src[0] from 10 to 32 bits, and y fixed at 10 bits. If the div
+threshold is high, say 31 so it's effectively disabled then a 32x10 bit gcd
+is done by nibbling away at the 32-bit operands bit-by-bit. When the
+threshold is small, say 1 bit, then an initial x%y is done to reduce it to a
+10x10 bit operation.
+
+The threshold in mpn/generic/gcd_1.c or the various assembler
+implementations can be tweaked up or down until there's no more speedups on
+interesting combinations of sizes. Note that this affects only a 1x1 limb
+operation and so isn't very important. (An Nx1 limb operation always does
+an initial modular reduction, using mpn_mod_1 or mpn_modexact_1_odd.)
+
+
+
+
+SPEED PROGRAM EXTENSIONS
+
+Potentially lots of things could be made available in the program, but it's
+been left at only the things that have actually been wanted and are likely
+to be reasonably useful in the future.
+
+Extensions should be fairly easy to make though. speed-ext.c is an example,
+in a style that should suit one-off tests, or new code fragments under
+development.
+
+many.pl is a script for generating a new speed program supplemented with
+alternate versions of the standard routines. It can be used for measuring
+experimental code, or for comparing different implementations that exist
+within a CPU family.
+
+
+
+
+THRESHOLD EXAMINING
+
+The speed program can be used to examine the speeds of different algorithms
+to check the tune program has done the right thing. For example to examine
+the karatsuba multiply threshold,
+
+ ./speed -s 5-40 mpn_mul_basecase mpn_kara_mul_n
+
+When examining the toom3 threshold, remember it depends on the karatsuba
+threshold, so the right karatsuba threshold needs to be compiled into the
+library first. The tune program uses specially recompiled versions of
+mpn/mul_n.c etc for this reason, but the speed program simply uses the
+normal libgmp.la.
+
+Note further that the various routines may recurse into themselves on sizes
+far enough above applicable thresholds. For example, mpn_kara_mul_n will
+recurse into itself on sizes greater than twice the compiled-in
+MUL_TOOM22_THRESHOLD.
+
+When doing the above comparison between mul_basecase and kara_mul_n what's
+probably of interest is mul_basecase versus a kara_mul_n that does one level
+of Karatsuba then calls to mul_basecase, but this only happens on sizes less
+than twice the compiled MUL_TOOM22_THRESHOLD. A larger value for that
+setting can be compiled-in to avoid the problem if necessary. The same
+applies to toom3 and DC, though in a trickier fashion.
+
+There are some upper limits on some of the thresholds, arising from arrays
+dimensioned according to a threshold (mpn_mul_n), or asm code with certain
+sized displacements (some x86 versions of sqr_basecase). So putting huge
+values for the thresholds, even just for testing, may fail.
+
+
+
+
+FUTURE
+
+Make a program to check the time base is working properly, for small and
+large measurements. Make it able to test each available method, including
+perhaps the apparent resolution of each.
+
+Make a general mechanism for specifying operand overlap, and a syntax like
+maybe "mpn_add_n.dst=src2" to select it. Some measuring routines do this
+sort of thing with the "r" parameter currently.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/gmp-6.3.0/tune/alpha.asm b/gmp-6.3.0/tune/alpha.asm
new file mode 100644
index 0000000..888c77f
--- /dev/null
+++ b/gmp-6.3.0/tune/alpha.asm
@@ -0,0 +1,59 @@
+dnl Alpha time stamp counter access routine.
+
+dnl Copyright 2000, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned int p[2]);
+C
+
+C The rpcc instruction returns a 64-bit value split into two 32-bit fields.
+C The lower 32 bits are set by the hardware, and the upper 32 bits are set
+C by the operating system. The real per-process cycle count is the sum of
+C these halves.
+
+C Unfortunately, some operating systems don't get this right. NetBSD 1.3 is
+C known to sometimes put garbage in the upper half. Whether newer NetBSD
+C versions get it right, is unknown to us.
+
+C rpcc measures cycles elapsed in the user program and hence should be very
+C accurate even on a busy system. Losing cache contents due to task
+C switching may have an effect though.
+
+ASM_START()
+PROLOGUE(speed_cyclecounter)
+ rpcc r0
+ srl r0,32,r1
+ addq r1,r0,r0
+ stl r0,0(r16)
+ stl r31,4(r16) C zero upper return word
+ ret r31,(r26),1
+EPILOGUE(speed_cyclecounter)
+ASM_END()
diff --git a/gmp-6.3.0/tune/common.c b/gmp-6.3.0/tune/common.c
new file mode 100644
index 0000000..48da6c6
--- /dev/null
+++ b/gmp-6.3.0/tune/common.c
@@ -0,0 +1,2945 @@
+/* Shared speed subroutines.
+
+Copyright 1999-2006, 2008-2017, 2019-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __GMP_NO_ATTRIBUTE_CONST_PURE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h> /* for qsort */
+#include <string.h>
+#include <unistd.h>
+#if 0
+#include <sys/ioctl.h>
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "tests.h"
+#include "speed.h"
+
+
+int speed_option_addrs = 0;
+int speed_option_verbose = 0;
+int speed_option_cycles_broken = 0;
+
+
+/* Provide __clz_tab even if it's not required, for the benefit of new code
+ being tested with many.pl. */
+#ifndef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#include "mp_clz_tab.c"
+#undef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif
+
+
+void
+pentium_wbinvd(void)
+{
+#if 0
+ {
+ static int fd = -2;
+
+ if (fd == -2)
+ {
+ fd = open ("/dev/wbinvd", O_RDWR);
+ if (fd == -1)
+ perror ("open /dev/wbinvd");
+ }
+
+ if (fd != -1)
+ ioctl (fd, 0, 0);
+ }
+#endif
+
+#if 0
+#define WBINVDSIZE 1024*1024*2
+ {
+ static char *p = NULL;
+ int i, sum;
+
+ if (p == NULL)
+ p = malloc (WBINVDSIZE);
+
+#if 0
+ for (i = 0; i < WBINVDSIZE; i++)
+ p[i] = i & 0xFF;
+#endif
+
+ sum = 0;
+ for (i = 0; i < WBINVDSIZE; i++)
+ sum += p[i];
+
+ mpn_cache_fill_dummy (sum);
+ }
+#endif
+}
+
+
+int
+double_cmp_ptr (const double *p, const double *q)
+{
+ if (*p > *q) return 1;
+ if (*p < *q) return -1;
+ return 0;
+}
+
+
+/* Measure the speed of a given routine.
+
+ The routine is run with enough repetitions to make it take at least
+ speed_precision * speed_unittime. This aims to minimize the effects of a
+ limited accuracy time base and the overhead of the measuring itself.
+
+ Measurements are made looking for 4 results within TOLERANCE of each
+ other (or 3 for routines taking longer than 2 seconds). This aims to get
+ an accurate reading even if some runs are bloated by interrupts or task
+ switches or whatever.
+
+ The given (*fun)() is expected to run its function "s->reps" many times
+ and return the total elapsed time measured using speed_starttime() and
+ speed_endtime(). If the function doesn't support the given s->size or
+ s->r, -1.0 should be returned. See the various base routines below. */
+
+double
+speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s)
+{
+#define TOLERANCE 1.01 /* 1% */
+ const int max_zeros = 10;
+
+ struct speed_params s_dummy;
+ int i, j, e;
+ double t[30];
+ double t_unsorted[30];
+ double reps_d;
+ int zeros = 0;
+
+ /* Use dummy parameters if caller doesn't provide any. Only a few special
+ "fun"s will cope with this, speed_noop() is one. */
+ if (s == NULL)
+ {
+ memset (&s_dummy, '\0', sizeof (s_dummy));
+ s = &s_dummy;
+ }
+
+ s->reps = 1;
+ s->time_divisor = 1.0;
+ for (i = 0; i < numberof (t); i++)
+ {
+ for (;;)
+ {
+ s->src_num = 0;
+ s->dst_num = 0;
+
+ t[i] = (*fun) (s);
+
+ if (speed_option_verbose >= 3)
+ gmp_printf("size=%ld reps=%u r=%Md attempt=%d %.9f\n",
+ (long) s->size, s->reps, s->r, i, t[i]);
+
+ if (t[i] == 0.0)
+ {
+ zeros++;
+ if (zeros > max_zeros)
+ {
+ fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros);
+ abort ();
+ }
+ if (s->reps < 10000)
+ s->reps *= 2;
+
+ continue;
+ }
+
+ if (t[i] == -1.0)
+ return -1.0;
+
+ if (t[i] >= speed_unittime * speed_precision)
+ break;
+
+ /* go to a value of reps to make t[i] >= precision */
+ reps_d = ceil (1.1 * s->reps
+ * speed_unittime * speed_precision
+ / MAX (t[i], speed_unittime));
+ if (reps_d > 2e9 || reps_d < 1.0)
+ {
+ fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d);
+ fprintf (stderr, " (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n",
+ s->reps, speed_unittime, speed_precision, t[i]);
+ abort ();
+ }
+ s->reps = (unsigned) reps_d;
+ }
+ t[i] /= s->reps;
+ t_unsorted[i] = t[i];
+
+ if (speed_precision == 0)
+ return t[i];
+
+ /* require 3 values within TOLERANCE when >= 2 secs, 4 when below */
+ if (t[0] >= 2.0)
+ e = 3;
+ else
+ e = 4;
+
+ /* Look for e many t[]'s within TOLERANCE of each other to consider a
+ valid measurement. Return smallest among them. */
+ if (i >= e)
+ {
+ qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);
+ for (j = e-1; j < i; j++)
+ if (t[j] <= t[j-e+1] * TOLERANCE)
+ return t[j-e+1] / s->time_divisor;
+ }
+ }
+
+ fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n",
+ e, (TOLERANCE-1.0)*100.0);
+ fprintf (stderr, " unsorted sorted\n");
+ fprintf (stderr, " %.12f %.12f is about %.1f%%\n",
+ t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0),
+ 100*(TOLERANCE-1.0));
+ for (i = 0; i < numberof (t); i++)
+ fprintf (stderr, " %.09f %.09f\n", t_unsorted[i], t[i]);
+
+ return -1.0;
+}
+
+
+/* Read all of ptr,size to get it into the CPU memory cache.
+
+ A call to mpn_cache_fill_dummy() is used to make sure the compiler
+ doesn't optimize away the whole loop. Using "volatile mp_limb_t sum"
+ would work too, but the function call means we don't rely on every
+ compiler actually implementing volatile properly.
+
+ mpn_cache_fill_dummy() is in a separate source file to stop gcc thinking
+ it can inline it. */
+
+void
+mpn_cache_fill (mp_srcptr ptr, mp_size_t size)
+{
+ mp_limb_t sum = 0;
+ mp_size_t i;
+
+ for (i = 0; i < size; i++)
+ sum += ptr[i];
+
+ mpn_cache_fill_dummy(sum);
+}
+
+
+void
+mpn_cache_fill_write (mp_ptr ptr, mp_size_t size)
+{
+ mpn_cache_fill (ptr, size);
+
+#if 0
+ mpn_random (ptr, size);
+#endif
+
+#if 0
+ mp_size_t i;
+
+ for (i = 0; i < size; i++)
+ ptr[i] = i;
+#endif
+}
+
+
+void
+speed_operand_src (struct speed_params *s, mp_ptr ptr, mp_size_t size)
+{
+ if (s->src_num >= numberof (s->src))
+ {
+ fprintf (stderr, "speed_operand_src: no room left in s->src[]\n");
+ abort ();
+ }
+ s->src[s->src_num].ptr = ptr;
+ s->src[s->src_num].size = size;
+ s->src_num++;
+}
+
+
+void
+speed_operand_dst (struct speed_params *s, mp_ptr ptr, mp_size_t size)
+{
+ if (s->dst_num >= numberof (s->dst))
+ {
+ fprintf (stderr, "speed_operand_dst: no room left in s->dst[]\n");
+ abort ();
+ }
+ s->dst[s->dst_num].ptr = ptr;
+ s->dst[s->dst_num].size = size;
+ s->dst_num++;
+}
+
+
+void
+speed_cache_fill (struct speed_params *s)
+{
+ static struct speed_params prev;
+ int i;
+
+ /* FIXME: need a better way to get the format string for a pointer */
+
+ if (speed_option_addrs)
+ {
+ int different;
+
+ different = (s->dst_num != prev.dst_num || s->src_num != prev.src_num);
+ for (i = 0; i < s->dst_num; i++)
+ different |= (s->dst[i].ptr != prev.dst[i].ptr);
+ for (i = 0; i < s->src_num; i++)
+ different |= (s->src[i].ptr != prev.src[i].ptr);
+
+ if (different)
+ {
+ if (s->dst_num != 0)
+ {
+ printf ("dst");
+ for (i = 0; i < s->dst_num; i++)
+ printf (" %08lX", (unsigned long) s->dst[i].ptr);
+ printf (" ");
+ }
+
+ if (s->src_num != 0)
+ {
+ printf ("src");
+ for (i = 0; i < s->src_num; i++)
+ printf (" %08lX", (unsigned long) s->src[i].ptr);
+ printf (" ");
+ }
+ printf (" (cf sp approx %08lX)\n", (unsigned long) &different);
+
+ }
+
+ memcpy (&prev, s, sizeof(prev));
+ }
+
+ switch (s->cache) {
+ case 0:
+ for (i = 0; i < s->dst_num; i++)
+ mpn_cache_fill_write (s->dst[i].ptr, s->dst[i].size);
+ for (i = 0; i < s->src_num; i++)
+ mpn_cache_fill (s->src[i].ptr, s->src[i].size);
+ break;
+ case 1:
+ pentium_wbinvd();
+ break;
+ }
+}
+
+
+/* Miscellaneous options accepted by tune and speed programs under -o. */
+
+void
+speed_option_set (const char *s)
+{
+ int n;
+
+ if (strcmp (s, "addrs") == 0)
+ {
+ speed_option_addrs = 1;
+ }
+ else if (strcmp (s, "verbose") == 0)
+ {
+ speed_option_verbose++;
+ }
+ else if (sscanf (s, "verbose=%d", &n) == 1)
+ {
+ speed_option_verbose = n;
+ }
+ else if (strcmp (s, "cycles-broken") == 0)
+ {
+ speed_option_cycles_broken = 1;
+ }
+ else
+ {
+ printf ("Unrecognised -o option: %s\n", s);
+ exit (1);
+ }
+}
+
+
+/* The following are basic speed running routines for various gmp functions.
+ Many are very similar and use speed.h macros.
+
+ Each routine allocates it's own destination space for the result of the
+ function, because only it can know what the function needs.
+
+ speed_starttime() and speed_endtime() are put tight around the code to be
+ measured. Any setups are done outside the timed portion.
+
+ Each routine is responsible for its own cache priming.
+ speed_cache_fill() is a good way to do this, see examples in speed.h.
+ One cache priming possibility, for CPUs with write-allocate cache, and
+ functions that don't take too long, is to do one dummy call before timing
+ so as to cache everything that gets used. But speed_measure() runs a
+ routine at least twice and will take the smaller time, so this might not
+ be necessary.
+
+ Data alignment will be important, for source, destination and temporary
+ workspace. A routine can align its destination and workspace. Programs
+ using the routines will ensure s->xp and s->yp are aligned. Aligning
+ onto a CACHE_LINE_SIZE boundary is suggested. s->align_wp and
+ s->align_wp2 should be respected where it makes sense to do so.
+ SPEED_TMP_ALLOC_LIMBS is a good way to do this.
+
+ A loop of the following form can be expected to turn into good assembler
+ code on most CPUs, thereby minimizing overhead in the measurement. It
+ can always be assumed s->reps >= 1.
+
+ i = s->reps
+ do
+ foo();
+ while (--i != 0);
+
+ Additional parameters might be added to "struct speed_params" in the
+ future. Routines should ignore anything they don't use.
+
+ s->size can be used creatively, and s->xp and s->yp can be ignored. For
+ example, speed_mpz_fac_ui() uses s->size as n for the factorial. s->r is
+ just a user-supplied parameter. speed_mpn_lshift() uses it as a shift,
+ speed_mpn_mul_1() uses it as a multiplier. */
+
+
+/* MPN_COPY etc can be macros, so the _CALL forms are necessary */
+double
+speed_MPN_COPY (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (MPN_COPY);
+}
+double
+speed_MPN_COPY_INCR (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (MPN_COPY_INCR);
+}
+double
+speed_MPN_COPY_DECR (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (MPN_COPY_DECR);
+}
+#if HAVE_NATIVE_mpn_copyi
+double
+speed_mpn_copyi (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_copyi);
+}
+#endif
+#if HAVE_NATIVE_mpn_copyd
+double
+speed_mpn_copyd (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_copyd);
+}
+#endif
+double
+speed_memcpy (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY_BYTES (memcpy);
+}
+double
+speed_mpn_com (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_com);
+}
+double
+speed_mpn_neg (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_neg);
+}
+double
+speed_mpn_sec_tabselect (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TABSELECT (mpn_sec_tabselect);
+}
+
+
+double
+speed_mpn_addmul_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1 (mpn_addmul_1);
+}
+double
+speed_mpn_submul_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1 (mpn_submul_1);
+}
+
+#if HAVE_NATIVE_mpn_addmul_2
+double
+speed_mpn_addmul_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_2 (mpn_addmul_2);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_3
+double
+speed_mpn_addmul_3 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_3 (mpn_addmul_3);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_4
+double
+speed_mpn_addmul_4 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_4 (mpn_addmul_4);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_5
+double
+speed_mpn_addmul_5 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_5 (mpn_addmul_5);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_6
+double
+speed_mpn_addmul_6 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_6 (mpn_addmul_6);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_7
+double
+speed_mpn_addmul_7 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_7 (mpn_addmul_7);
+}
+#endif
+#if HAVE_NATIVE_mpn_addmul_8
+double
+speed_mpn_addmul_8 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_8 (mpn_addmul_8);
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+double
+speed_mpn_addaddmul_1msb0 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_ADDADDMUL1_MSB0 (mpn_addaddmul_1msb0);
+}
+#endif
+double
+speed_mpn_mul_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1 (mpn_mul_1);
+}
+double
+speed_mpn_mul_1_inplace (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_mul_1);
+}
+
+#if HAVE_NATIVE_mpn_mul_2
+double
+speed_mpn_mul_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_2 (mpn_mul_2);
+}
+#endif
+#if HAVE_NATIVE_mpn_mul_3
+double
+speed_mpn_mul_3 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_3 (mpn_mul_3);
+}
+#endif
+#if HAVE_NATIVE_mpn_mul_4
+double
+speed_mpn_mul_4 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_4 (mpn_mul_4);
+}
+#endif
+#if HAVE_NATIVE_mpn_mul_5
+double
+speed_mpn_mul_5 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_5 (mpn_mul_5);
+}
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+double
+speed_mpn_mul_6 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_6 (mpn_mul_6);
+}
+#endif
+
+
+double
+speed_mpn_lshift (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshift);
+}
+double
+speed_mpn_lshiftc (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshiftc);
+}
+double
+speed_mpn_rshift (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1 (mpn_rshift);
+}
+
+
+/* The carry-in variants (if available) are good for measuring because they
+ won't skip a division if high<divisor. Alternately, use -1 as a divisor
+ with the plain _1 forms. */
+double
+speed_mpn_divrem_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1);
+}
+double
+speed_mpn_divrem_1f (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1);
+}
+#if HAVE_NATIVE_mpn_divrem_1c
+double
+speed_mpn_divrem_1c (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_1C (mpn_divrem_1c);
+}
+double
+speed_mpn_divrem_1cf (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_1CF (mpn_divrem_1c);
+}
+#endif
+
+double
+speed_mpn_divrem_1_div (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_div);
+}
+double
+speed_mpn_divrem_1f_div (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_div);
+}
+double
+speed_mpn_divrem_1_inv (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_inv);
+}
+double
+speed_mpn_divrem_1f_inv (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_inv);
+}
+double
+speed_mpn_mod_1_div (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_div);
+}
+double
+speed_mpn_mod_1_inv (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_inv);
+}
+
+double
+speed_mpn_preinv_divrem_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PREINV_DIVREM_1 (mpn_preinv_divrem_1);
+}
+double
+speed_mpn_preinv_divrem_1f (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PREINV_DIVREM_1F (mpn_preinv_divrem_1);
+}
+
+#if GMP_NUMB_BITS % 4 == 0
+double
+speed_mpn_mod_34lsub1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_34LSUB1 (mpn_mod_34lsub1);
+}
+#endif
+
+double
+speed_mpn_divrem_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2);
+}
+double
+speed_mpn_divrem_2_div (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_div);
+}
+double
+speed_mpn_divrem_2_inv (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_inv);
+}
+
+double
+speed_mpn_div_qr_1n_pi1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1);
+}
+double
+speed_mpn_div_qr_1n_pi1_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_1);
+}
+double
+speed_mpn_div_qr_1n_pi1_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_2);
+}
+double
+speed_mpn_div_qr_1n_pi1_3 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_3);
+}
+double
+speed_mpn_div_qr_1n_pi1_4 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_4);
+}
+
+double
+speed_mpn_div_qr_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1);
+}
+
+double
+speed_mpn_div_qr_2n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 1);
+}
+double
+speed_mpn_div_qr_2u (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 0);
+}
+
+double
+speed_mpn_mod_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1);
+}
+#if HAVE_NATIVE_mpn_mod_1c
+double
+speed_mpn_mod_1c (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1C (mpn_mod_1c);
+}
+#endif
+double
+speed_mpn_preinv_mod_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1);
+}
+double
+speed_mpn_mod_1_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps);
+}
+double
+speed_mpn_mod_1_1_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1);
+}
+double
+speed_mpn_mod_1_1_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2);
+}
+double
+speed_mpn_mod_1_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2);
+}
+double
+speed_mpn_mod_1_3 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_3p,mpn_mod_1s_3p_cps,3);
+}
+double
+speed_mpn_mod_1_4 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_4p,mpn_mod_1s_4p_cps,4);
+}
+
+double
+speed_mpn_divexact_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVEXACT_1 (mpn_divexact_1);
+}
+
+double
+speed_mpn_divexact_by3 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_divexact_by3);
+}
+
+double
+speed_mpn_bdiv_dbm1c (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BDIV_DBM1C (mpn_bdiv_dbm1c);
+}
+
+double
+speed_mpn_bdiv_q_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BDIV_Q_1 (mpn_bdiv_q_1);
+}
+
+double
+speed_mpn_pi1_bdiv_q_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_BDIV_Q_1 (mpn_pi1_bdiv_q_1);
+}
+
+#if HAVE_NATIVE_mpn_modexact_1_odd
+double
+speed_mpn_modexact_1_odd (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MODEXACT_1_ODD (mpn_modexact_1_odd);
+}
+#endif
+
+double
+speed_mpn_modexact_1c_odd (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd);
+}
+
+double
+speed_mpz_mod (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_MOD (mpz_mod);
+}
+
+double
+speed_mpn_sbpi1_div_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32, 2,0);
+}
+double
+speed_mpn_dcpi1_div_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv, 6,3);
+}
+double
+speed_mpn_sbpi1_divappr_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32, 2,0);
+}
+double
+speed_mpn_dcpi1_divappr_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_divappr_q, &inv, 6,3);
+}
+double
+speed_mpn_mu_div_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MU_DIV_QR (mpn_mu_div_qr, mpn_mu_div_qr_itch);
+}
+double
+speed_mpn_mu_divappr_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_divappr_q, mpn_mu_divappr_q_itch);
+}
+double
+speed_mpn_mu_div_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_div_q, mpn_mu_div_q_itch);
+}
+double
+speed_mpn_mupi_div_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUPI_DIV_QR (mpn_preinv_mu_div_qr, mpn_preinv_mu_div_qr_itch);
+}
+
+double
+speed_mpn_sbpi1_bdiv_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr);
+}
+double
+speed_mpn_dcpi1_bdiv_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_dcpi1_bdiv_qr);
+}
+double
+speed_mpn_sbpi1_bdiv_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_sbpi1_bdiv_q);
+}
+double
+speed_mpn_dcpi1_bdiv_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
+}
+double
+speed_mpn_sbpi1_bdiv_r (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PI1_BDIV_R (mpn_sbpi1_bdiv_r);
+}
+double
+speed_mpn_mu_bdiv_q (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MU_BDIV_Q (mpn_mu_bdiv_q, mpn_mu_bdiv_q_itch);
+}
+double
+speed_mpn_mu_bdiv_qr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MU_BDIV_QR (mpn_mu_bdiv_qr, mpn_mu_bdiv_qr_itch);
+}
+
+double
+speed_mpn_broot (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BROOT (mpn_broot);
+}
+double
+speed_mpn_broot_invm1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1);
+}
+double
+speed_mpn_brootinv (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size);
+}
+
+double
+speed_mpn_binvert (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch);
+}
+
+double
+speed_mpn_invert (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_INVERT (mpn_invert, mpn_invert_itch);
+}
+
+double
+speed_mpn_invertappr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_INVERTAPPR (mpn_invertappr, mpn_invertappr_itch);
+}
+
+double
+speed_mpn_ni_invertappr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_INVERTAPPR (mpn_ni_invertappr, mpn_invertappr_itch);
+}
+
+double
+speed_mpn_sec_invert (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SEC_INVERT (mpn_sec_invert, mpn_sec_invert_itch);
+}
+
+double
+speed_mpn_redc_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_REDC_1 (mpn_redc_1);
+}
+double
+speed_mpn_redc_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_REDC_2 (mpn_redc_2);
+}
+double
+speed_mpn_redc_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_REDC_N (mpn_redc_n);
+}
+
+
+double
+speed_mpn_popcount (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_POPCOUNT (mpn_popcount);
+}
+double
+speed_mpn_hamdist (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HAMDIST (mpn_hamdist);
+}
+
+
+double
+speed_mpn_add_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N (mpn_add_n);
+}
+double
+speed_mpn_sub_n (struct speed_params *s)
+{
+SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);
+}
+double
+speed_mpn_add_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1 (mpn_add_1);
+}
+double
+speed_mpn_add_1_inplace (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_add_1);
+}
+double
+speed_mpn_sub_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1 (mpn_sub_1);
+}
+double
+speed_mpn_sub_1_inplace (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_sub_1);
+}
+
+double
+speed_mpn_add_err1_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_add_err1_n);
+}
+double
+speed_mpn_sub_err1_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_sub_err1_n);
+}
+double
+speed_mpn_add_err2_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_add_err2_n);
+}
+double
+speed_mpn_sub_err2_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_sub_err2_n);
+}
+double
+speed_mpn_add_err3_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_add_err3_n);
+}
+double
+speed_mpn_sub_err3_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_sub_err3_n);
+}
+
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+double
+speed_mpn_add_n_sub_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_add_n_sub_n (ap, sp, s->xp, s->yp, s->size));
+}
+#endif
+
+#if HAVE_NATIVE_mpn_addlsh1_n == 1
+double
+speed_mpn_addlsh1_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh1_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n == 1
+double
+speed_mpn_sublsh1_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh1_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+double
+speed_mpn_addlsh1_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip1);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+double
+speed_mpn_addlsh1_n_ip2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip2);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+double
+speed_mpn_sublsh1_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_sublsh1_n_ip1);
+}
+#endif
+#if HAVE_NATIVE_mpn_rsblsh1_n == 1
+double
+speed_mpn_rsblsh1_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh1_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n == 1
+double
+speed_mpn_addlsh2_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh2_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n == 1
+double
+speed_mpn_sublsh2_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh2_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+double
+speed_mpn_addlsh2_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip1);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+double
+speed_mpn_addlsh2_n_ip2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip2);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+double
+speed_mpn_sublsh2_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_sublsh2_n_ip1);
+}
+#endif
+#if HAVE_NATIVE_mpn_rsblsh2_n == 1
+double
+speed_mpn_rsblsh2_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh2_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n
+double
+speed_mpn_addlsh_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh_n (wp, xp, yp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n
+double
+speed_mpn_sublsh_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh_n (wp, xp, yp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+double
+speed_mpn_addlsh_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip1 (wp, s->xp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+double
+speed_mpn_addlsh_n_ip2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip2 (wp, s->xp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+double
+speed_mpn_sublsh_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_sublsh_n_ip1 (wp, s->xp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_n
+double
+speed_mpn_rsblsh_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_rsblsh_n (wp, xp, yp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_rsh1add_n
+double
+speed_mpn_rsh1add_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1add_n);
+}
+#endif
+#if HAVE_NATIVE_mpn_rsh1sub_n
+double
+speed_mpn_rsh1sub_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1sub_n);
+}
+#endif
+
+double
+speed_mpn_cnd_add_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_add_n (1, wp, xp, yp, s->size));
+}
+double
+speed_mpn_cnd_sub_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_sub_n (1, wp, xp, yp, s->size));
+}
+
+/* mpn_and_n etc can be macros and so have to be handled with
+ SPEED_ROUTINE_MPN_BINARY_N_CALL forms */
+double
+speed_mpn_and_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, xp, yp, s->size));
+}
+double
+speed_mpn_andn_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, xp, yp, s->size));
+}
+double
+speed_mpn_nand_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, xp, yp, s->size));
+}
+double
+speed_mpn_ior_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, xp, yp, s->size));
+}
+double
+speed_mpn_iorn_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, xp, yp, s->size));
+}
+double
+speed_mpn_nior_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, xp, yp, s->size));
+}
+double
+speed_mpn_xor_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, xp, yp, s->size));
+}
+double
+speed_mpn_xnor_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, xp, yp, s->size));
+}
+
+
+double
+speed_mpn_mul_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUL_N (mpn_mul_n);
+}
+double
+speed_mpn_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQR (mpn_sqr);
+}
+double
+speed_mpn_mul_n_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQR_CALL (mpn_mul_n (wp, s->xp, s->xp, s->size));
+}
+
+double
+speed_mpn_mul_basecase (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUL(mpn_mul_basecase);
+}
+double
+speed_mpn_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUL(mpn_mul);
+}
+double
+speed_mpn_sqr_basecase (struct speed_params *s)
+{
+ /* FIXME: size restrictions on some versions of sqr_basecase */
+ SPEED_ROUTINE_MPN_SQR (mpn_sqr_basecase);
+}
+
+#if HAVE_NATIVE_mpn_sqr_diagonal
+double
+speed_mpn_sqr_diagonal (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQR (mpn_sqr_diagonal);
+}
+#endif
+
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+double
+speed_mpn_sqr_diag_addlsh1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL (mpn_sqr_diag_addlsh1 (wp, tp, s->xp, s->size));
+}
+#endif
+
+double
+speed_mpn_toom2_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM2_SQR (mpn_toom2_sqr);
+}
+double
+speed_mpn_toom3_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM3_SQR (mpn_toom3_sqr);
+}
+double
+speed_mpn_toom4_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr);
+}
+double
+speed_mpn_toom6_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr);
+}
+double
+speed_mpn_toom8_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM8_SQR (mpn_toom8_sqr);
+}
+double
+speed_mpn_toom22_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
+}
+double
+speed_mpn_toom33_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul);
+}
+double
+speed_mpn_toom44_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
+}
+double
+speed_mpn_toom6h_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
+}
+double
+speed_mpn_toom8h_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul);
+}
+
+double
+speed_mpn_toom32_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM32_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom42_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM42_MUL (mpn_toom42_mul);
+}
+double
+speed_mpn_toom43_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM43_MUL (mpn_toom43_mul);
+}
+double
+speed_mpn_toom63_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul);
+}
+double
+speed_mpn_toom32_for_toom43_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom43_for_toom32_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul);
+}
+double
+speed_mpn_toom32_for_toom53_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul);
+}
+double
+speed_mpn_toom53_for_toom32_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul);
+}
+double
+speed_mpn_toom42_for_toom53_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul);
+}
+double
+speed_mpn_toom53_for_toom42_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
+}
+double
+speed_mpn_toom43_for_toom54_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL (mpn_toom43_mul);
+}
+double
+speed_mpn_toom54_for_toom43_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL (mpn_toom54_mul);
+}
+
+double
+speed_mpn_nussbaumer_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUL_N_CALL
+ (mpn_nussbaumer_mul (wp, s->xp, s->size, s->yp, s->size));
+}
+double
+speed_mpn_nussbaumer_mul_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQR_CALL
+ (mpn_nussbaumer_mul (wp, s->xp, s->size, s->xp, s->size));
+}
+
+#if WANT_OLD_FFT_FULL
+double
+speed_mpn_mul_fft_full (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUL_N_CALL
+ (mpn_mul_fft_full (wp, s->xp, s->size, s->yp, s->size));
+}
+double
+speed_mpn_mul_fft_full_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQR_CALL
+ (mpn_mul_fft_full (wp, s->xp, s->size, s->xp, s->size));
+}
+#endif
+
+/* These are mod 2^N+1 multiplies and squares. If s->r is supplied it's
+ used as k, otherwise the best k for the size is used. If s->size isn't a
+ multiple of 2^k it's rounded up to make the effective operation size. */
+
+#define SPEED_ROUTINE_MPN_MUL_FFT_CALL(call, sqr) \
+ { \
+ mp_ptr wp; \
+ mp_size_t pl; \
+ int k; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ if (s->r != 0) \
+ k = s->r; \
+ else \
+ k = mpn_fft_best_k (s->size, sqr); \
+ \
+ TMP_MARK; \
+ pl = mpn_fft_next_size (s->size, k); \
+ SPEED_TMP_ALLOC_LIMBS (wp, pl+1, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ if (!sqr) \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, pl+1); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+double
+speed_mpn_mul_fft (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUL_FFT_CALL
+ (mpn_mul_fft (wp, pl, s->xp, s->size, s->yp, s->size, k), 0);
+}
+
+double
+speed_mpn_mul_fft_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUL_FFT_CALL
+ (mpn_mul_fft (wp, pl, s->xp, s->size, s->xp, s->size, k), 1);
+}
+
+double
+speed_mpn_fft_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MUL_N_CALL (mpn_fft_mul (wp, s->xp, s->size, s->yp, s->size));
+}
+
+double
+speed_mpn_fft_sqr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQR_CALL (mpn_fft_mul (wp, s->xp, s->size, s->xp, s->size));
+}
+
+double
+speed_mpn_sqrlo (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQRLO (mpn_sqrlo);
+}
+double
+speed_mpn_sqrlo_basecase (struct speed_params *s)
+{
+ SPEED_RESTRICT_COND (ABOVE_THRESHOLD (s->size, MIN (3, SQRLO_BASECASE_THRESHOLD))
+ && BELOW_THRESHOLD (s->size, SQRLO_DC_THRESHOLD));
+ SPEED_ROUTINE_MPN_SQRLO (mpn_sqrlo_basecase);
+}
+double
+speed_mpn_mullo_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULLO_N (mpn_mullo_n);
+}
+double
+speed_mpn_mullo_basecase (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULLO_BASECASE (mpn_mullo_basecase);
+}
+
+double
+speed_mpn_mulmid_basecase (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase);
+}
+
+double
+speed_mpn_mulmid (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMID (mpn_mulmid);
+}
+
+double
+speed_mpn_mulmid_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n);
+}
+
+double
+speed_mpn_toom42_mulmid (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid);
+}
+
+double
+speed_mpn_mulmod_bnm1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_mulmod_bnm1 (wp, s->size, s->xp, s->size, s->yp, s->size, tp));
+}
+
+double
+speed_mpn_bc_mulmod_bnm1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_bc_mulmod_bnm1 (wp, s->xp, s->yp, s->size, tp));
+}
+
+double
+speed_mpn_mulmod_bnm1_rounded (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED (mpn_mulmod_bnm1);
+}
+
+double
+speed_mpn_sqrmod_bnm1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_sqrmod_bnm1 (wp, s->size, s->xp, s->size, tp));
+}
+
+double
+speed_mpn_mulmod_bknp1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_mulmod_bknp1 (wp, s->xp, s->yp, nk, k, tp),1);
+}
+
+double
+speed_mpn_sqrmod_bknp1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_sqrmod_bknp1 (wp, s->xp, nk, k, tp),1);
+}
+
+static void
+mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+ unsigned k, mp_ptr tp)
+{
+ if (k > 2)
+ mpn_mulmod_bknp1 (rp, ap, bp, n, k, tp);
+ else
+ {
+ n *= k;
+ mpn_mul_n (tp, ap, bp, n);
+ mpn_sub_n (rp, tp, tp + n, n);
+ }
+}
+
+static void
+mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t n,
+ unsigned k, mp_ptr tp)
+{
+ if (k > 2)
+ mpn_sqrmod_bknp1 (rp, ap, n, k, tp);
+ else
+ {
+ n *= k;
+ mpn_sqr (tp, ap, n);
+ mpn_sub_n (rp, tp, tp + n, n);
+ }
+}
+
+double
+speed_mpn_mulmod_bnp1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_bc_mulmod_bnp1 (wp, s->xp, s->yp, nk, k, tp),0);
+}
+
+double
+speed_mpn_sqrmod_bnp1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL (mpn_bc_sqrmod_bnp1 (wp, s->xp, nk, k, tp),0);
+}
+
+double
+speed_mpn_matrix22_mul (struct speed_params *s)
+{
+ /* Speed params only includes 2 inputs, so we have to invent the
+ other 6. */
+
+ mp_ptr a;
+ mp_ptr r;
+ mp_ptr b;
+ mp_ptr tp;
+ mp_size_t itch;
+ unsigned i;
+ double t;
+ TMP_DECL;
+
+ TMP_MARK;
+ SPEED_TMP_ALLOC_LIMBS (a, 4 * s->size, s->align_xp);
+ SPEED_TMP_ALLOC_LIMBS (b, 4 * s->size, s->align_yp);
+ SPEED_TMP_ALLOC_LIMBS (r, 8 * s->size + 4, s->align_wp);
+
+ MPN_COPY (a, s->xp, s->size);
+ mpn_random (a + s->size, 3 * s->size);
+ MPN_COPY (b, s->yp, s->size);
+ mpn_random (b + s->size, 3 * s->size);
+
+ itch = mpn_matrix22_mul_itch (s->size, s->size);
+ SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);
+
+ speed_operand_src (s, a, 4 * s->size);
+ speed_operand_src (s, b, 4 * s->size);
+ speed_operand_dst (s, r, 8 * s->size + 4);
+ speed_operand_dst (s, tp, itch);
+ speed_cache_fill (s);
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ mp_size_t sz = s->size;
+ MPN_COPY (r + 0 * sz + 0, a + 0 * sz, sz);
+ MPN_COPY (r + 2 * sz + 1, a + 1 * sz, sz);
+ MPN_COPY (r + 4 * sz + 2, a + 2 * sz, sz);
+ MPN_COPY (r + 6 * sz + 3, a + 3 * sz, sz);
+ mpn_matrix22_mul (r, r + 2 * sz + 1, r + 4 * sz + 2, r + 6 * sz + 3, sz,
+ b, b + 1 * sz, b + 2 * sz, b + 3 * sz, sz,
+ tp);
+ }
+ while (--i != 0);
+ t = speed_endtime();
+ TMP_FREE;
+ return t;
+}
+
+double
+speed_mpn_hgcd2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2);
+}
+double
+speed_mpn_hgcd2_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_1);
+}
+double
+speed_mpn_hgcd2_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_2);
+}
+double
+speed_mpn_hgcd2_3 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_3);
+}
+double
+speed_mpn_hgcd2_4 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_4);
+}
+double
+speed_mpn_hgcd2_5 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2_5);
+}
+
+double
+speed_mpn_hgcd (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);
+}
+
+double
+speed_mpn_hgcd_lehmer (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch);
+}
+
+double
+speed_mpn_hgcd_appr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
+}
+
+double
+speed_mpn_hgcd_appr_lehmer (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch);
+}
+
+double
+speed_mpn_hgcd_reduce (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
+}
+double
+speed_mpn_hgcd_reduce_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
+}
+double
+speed_mpn_hgcd_reduce_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
+}
+
+double
+speed_mpn_gcd (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCD (mpn_gcd);
+}
+
+double
+speed_mpn_gcdext (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext);
+}
+#if 0
+double
+speed_mpn_gcdext_lehmer (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCDEXT (__gmpn_gcdext_lehmer);
+}
+#endif
+double
+speed_mpn_gcdext_single (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_single);
+}
+double
+speed_mpn_gcdext_double (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_double);
+}
+double
+speed_mpn_gcdext_one_single (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_single);
+}
+double
+speed_mpn_gcdext_one_double (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_double);
+}
+double
+speed_mpn_gcd_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1);
+}
+double
+speed_mpn_gcd_11 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCD_11 (mpn_gcd_11);
+}
+double
+speed_mpn_gcd_1N (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1);
+}
+double
+speed_mpn_gcd_22 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GCD_22 (mpn_gcd_22);
+}
+
+double
+speed_gmp_primesieve (struct speed_params *s)
+{
+ SPEED_ROUTINE_GMP_PRIMESIEVE (gmp_primesieve);
+}
+
+double
+speed_mpz_nextprime (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_NEXTPRIME (mpz_nextprime);
+}
+
+double
+speed_mpz_nextprime_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_UNARY_1 (mpz_nextprime);
+}
+
+double
+speed_mpz_prevprime (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_NEXTPRIME (mpz_prevprime);
+}
+
+double
+speed_mpz_prevprime_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_UNARY_1 (mpz_prevprime);
+}
+
+double
+speed_mpz_jacobi (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_JACOBI (mpz_jacobi);
+}
+double
+speed_mpn_jacobi_base (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base);
+}
+double
+speed_mpn_jacobi_base_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_1);
+}
+double
+speed_mpn_jacobi_base_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_2);
+}
+double
+speed_mpn_jacobi_base_3 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3);
+}
+double
+speed_mpn_jacobi_base_4 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_4);
+}
+
+
+double
+speed_mpn_sqrtrem (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_sqrtrem (wp, wp2, s->xp, s->size));
+}
+
+double
+speed_mpn_sqrt (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_sqrtrem (wp, NULL, s->xp, s->size));
+}
+
+double
+speed_mpn_rootrem (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_rootrem (wp, wp2, s->xp, s->size, s->r));
+}
+
+double
+speed_mpn_root (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_rootrem (wp, NULL, s->xp, s->size, s->r));
+}
+
+
+double
+speed_mpn_perfect_power_p (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PERFECT_POWER (mpn_perfect_power_p);
+}
+
+double
+speed_mpn_perfect_square_p (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_PERFECT_SQUARE (mpn_perfect_square_p);
+}
+
+
+double
+speed_mpz_fac_ui (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui);
+}
+
+double
+speed_mpz_2fac_ui (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_UI (mpz_2fac_ui);
+}
+
+double
+speed_mpz_primorial_ui (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_UI (mpz_primorial_ui);
+}
+
+
+double
+speed_mpn_fib2_ui (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_FIB2_UI (mpn_fib2_ui);
+}
+double
+speed_mpz_fib_ui (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_FIB_UI (mpz_fib_ui);
+}
+double
+speed_mpz_fib2_ui (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_FIB2_UI (mpz_fib2_ui);
+}
+double
+speed_mpz_lucnum_ui (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_LUCNUM_UI (mpz_lucnum_ui);
+}
+double
+speed_mpz_lucnum2_ui (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_LUCNUM2_UI (mpz_lucnum2_ui);
+}
+
+
+double
+speed_mpz_powm (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_POWM (mpz_powm);
+}
+double
+speed_mpz_powm_mod (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_POWM (mpz_powm_mod);
+}
+double
+speed_mpz_powm_redc (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc);
+}
+double
+speed_mpz_powm_sec (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_POWM (mpz_powm_sec);
+}
+double
+speed_mpz_powm_ui (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui);
+}
+
+
+double
+speed_binvert_limb (struct speed_params *s)
+{
+ SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb);
+}
+
+
+double
+speed_noop (struct speed_params *s)
+{
+ unsigned i;
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ noop ();
+ while (--i != 0);
+ return speed_endtime ();
+}
+
+double
+speed_noop_wxs (struct speed_params *s)
+{
+ mp_ptr wp;
+ unsigned i;
+ double t;
+ TMP_DECL;
+
+ TMP_MARK;
+ wp = TMP_ALLOC_LIMBS (1);
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ noop_wxs (wp, s->xp, s->size);
+ while (--i != 0);
+ t = speed_endtime ();
+
+ TMP_FREE;
+ return t;
+}
+
+double
+speed_noop_wxys (struct speed_params *s)
+{
+ mp_ptr wp;
+ unsigned i;
+ double t;
+ TMP_DECL;
+
+ TMP_MARK;
+ wp = TMP_ALLOC_LIMBS (1);
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ noop_wxys (wp, s->xp, s->yp, s->size);
+ while (--i != 0);
+ t = speed_endtime ();
+
+ TMP_FREE;
+ return t;
+}
+
+
+#define SPEED_ROUTINE_ALLOC_FREE(variables, calls) \
+ { \
+ unsigned i; \
+ variables; \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ calls; \
+ } \
+ while (--i != 0); \
+ return speed_endtime (); \
+ }
+
+
+/* Compare these to see how much malloc/free costs and then how much
+ __gmp_default_allocate/free and mpz_init/clear add. mpz_init/clear or
+ mpq_init/clear will be doing a 1 limb allocate, so use that as the size
+ when including them in comparisons. */
+
+double
+speed_malloc_free (struct speed_params *s)
+{
+ size_t bytes = s->size * GMP_LIMB_BYTES;
+ SPEED_ROUTINE_ALLOC_FREE (void *p,
+ p = malloc (bytes);
+ free (p));
+}
+
+double
+speed_malloc_realloc_free (struct speed_params *s)
+{
+ size_t bytes = s->size * GMP_LIMB_BYTES;
+ SPEED_ROUTINE_ALLOC_FREE (void *p,
+ p = malloc (GMP_LIMB_BYTES);
+ p = realloc (p, bytes);
+ free (p));
+}
+
+double
+speed_gmp_allocate_free (struct speed_params *s)
+{
+ size_t bytes = s->size * GMP_LIMB_BYTES;
+ SPEED_ROUTINE_ALLOC_FREE (void *p,
+ p = (*__gmp_allocate_func) (bytes);
+ (*__gmp_free_func) (p, bytes));
+}
+
+double
+speed_gmp_allocate_reallocate_free (struct speed_params *s)
+{
+ size_t bytes = s->size * GMP_LIMB_BYTES;
+ SPEED_ROUTINE_ALLOC_FREE
+ (void *p,
+ p = (*__gmp_allocate_func) (GMP_LIMB_BYTES);
+ p = (*__gmp_reallocate_func) (p, bytes, GMP_LIMB_BYTES);
+ (*__gmp_free_func) (p, bytes));
+}
+
+double
+speed_mpz_init_clear (struct speed_params *s)
+{
+ SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
+ mpz_init (z);
+ mpz_clear (z));
+}
+
+double
+speed_mpz_init_realloc_clear (struct speed_params *s)
+{
+ SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
+ mpz_init (z);
+ _mpz_realloc (z, s->size);
+ mpz_clear (z));
+}
+
+double
+speed_mpq_init_clear (struct speed_params *s)
+{
+ SPEED_ROUTINE_ALLOC_FREE (mpq_t q,
+ mpq_init (q);
+ mpq_clear (q));
+}
+
+double
+speed_mpf_init_clear (struct speed_params *s)
+{
+ SPEED_ROUTINE_ALLOC_FREE (mpf_t f,
+ mpf_init (f);
+ mpf_clear (f));
+}
+
+
+/* Compare this to mpn_add_n to see how much overhead mpz_add adds. Note
+ that repeatedly calling mpz_add with the same data gives branch prediction
+ in it an advantage. */
+
+double
+speed_mpz_add (struct speed_params *s)
+{
+ mpz_t w, x, y;
+ unsigned i;
+ double t;
+
+ mpz_init (w);
+ mpz_init (x);
+ mpz_init (y);
+
+ mpz_set_n (x, s->xp, s->size);
+ mpz_set_n (y, s->yp, s->size);
+ mpz_add (w, x, y);
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ mpz_add (w, x, y);
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+
+ mpz_clear (w);
+ mpz_clear (x);
+ mpz_clear (y);
+ return t;
+}
+
+
+/* An inverse (s->r) or (s->size)/2 modulo s->size limbs */
+
+double
+speed_mpz_invert (struct speed_params *s)
+{
+ mpz_t a, m, r;
+ mp_size_t k;
+ unsigned i;
+ double t;
+
+ if (s->r == 0)
+ k = s->size/2;
+ else if (s->r < GMP_LIMB_HIGHBIT)
+ k = s->r;
+ else /* s->r < 0 */
+ k = s->size - (-s->r);
+
+ SPEED_RESTRICT_COND (k > 0 && k <= s->size);
+
+ mpz_init_set_n (m, s->yp, s->size);
+ mpz_setbit (m, 0); /* force m to odd */
+
+ mpz_init_set_n (a, s->xp, k);
+
+ mpz_init (r);
+ while (mpz_invert (r, a, m) == 0)
+ mpz_add_ui (a, a, 1);
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ mpz_invert (r, a, m);
+ while (--i != 0);
+ t = speed_endtime ();
+
+ mpz_clear (r);
+ mpz_clear (a);
+ mpz_clear (m);
+ return t;
+ }
+
+/* If r==0, calculate binomial(size,size/2),
+ otherwise calculate binomial(size,r). */
+
+double
+speed_mpz_bin_uiui (struct speed_params *s)
+{
+ mpz_t w;
+ unsigned long k;
+ unsigned i;
+ double t;
+
+ mpz_init (w);
+ if (s->r != 0)
+ k = s->r;
+ else
+ k = s->size/2;
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ mpz_bin_uiui (w, s->size, k);
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+
+ mpz_clear (w);
+ return t;
+}
+
+/* If r==0, calculate binomial(2^size,size),
+ otherwise calculate binomial(2^size,r). */
+
+double
+speed_mpz_bin_ui (struct speed_params *s)
+{
+ mpz_t w, x;
+ unsigned long k;
+ unsigned i;
+ double t;
+
+ mpz_init (w);
+ mpz_init_set_ui (x, 0);
+
+ mpz_setbit (x, s->size);
+
+ if (s->r != 0)
+ k = s->r;
+ else
+ k = s->size;
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ mpz_bin_ui (w, x, k);
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+
+ mpz_clear (w);
+ mpz_clear (x);
+ return t;
+}
+
+/* If r==0, calculate mfac(size,log(size)),
+ otherwise calculate mfac(size,r). */
+
+double
+speed_mpz_mfac_uiui (struct speed_params *s)
+{
+ mpz_t w;
+ unsigned long k;
+ unsigned i;
+ double t;
+
+ mpz_init (w);
+ if (s->r != 0)
+ k = s->r;
+ else
+ for (k = 1; s->size >> k; ++k);
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ mpz_mfac_uiui (w, s->size, k);
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+
+ mpz_clear (w);
+ return t;
+}
+
+/* The multiplies are successively dependent so the latency is measured, not
+ the issue rate. There's only 10 per loop so the code doesn't get too big
+ since umul_ppmm is several instructions on some cpus.
+
+ Putting the arguments as "h,l,l,h" gets slightly better code from gcc
+ 2.95.2 on x86, it puts only one mov between each mul, not two. That mov
+ though will probably show up as a bogus extra cycle though.
+
+ The measuring function macros are into three parts to avoid overflowing
+ preprocessor expansion space if umul_ppmm is big.
+
+ Limitations:
+
+ The default umul_ppmm doing h*l will be getting increasing numbers of
+ high zero bits in the calculation. CPUs with data-dependent multipliers
+ will want to use umul_ppmm.1 to get some randomization into the
+ calculation. The extra xors and fetches will be a slowdown of course. */
+
+#define SPEED_MACRO_UMUL_PPMM_A \
+ { \
+ mp_limb_t h, l; \
+ unsigned i; \
+ double t; \
+ \
+ s->time_divisor = 10; \
+ \
+ h = s->xp[0]; \
+ l = s->yp[0]; \
+ \
+ if (s->r == 1) \
+ { \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ {
+
+#define SPEED_MACRO_UMUL_PPMM_B \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ } \
+ else \
+ { \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ {
+
+#define SPEED_MACRO_UMUL_PPMM_C \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ } \
+ \
+ /* stop the compiler optimizing away the whole calculation! */ \
+ noop_1 (h); \
+ noop_1 (l); \
+ \
+ return t; \
+ }
+
+
+double
+speed_umul_ppmm (struct speed_params *s)
+{
+ SPEED_MACRO_UMUL_PPMM_A;
+ {
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[0]; l ^= s->yp_block[0];
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[1]; l ^= s->yp_block[1];
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[2]; l ^= s->yp_block[2];
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[3]; l ^= s->yp_block[3];
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[4]; l ^= s->yp_block[4];
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[5]; l ^= s->yp_block[5];
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[6]; l ^= s->yp_block[6];
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[7]; l ^= s->yp_block[7];
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[8]; l ^= s->yp_block[8];
+ umul_ppmm (h, l, l, h); h ^= s->xp_block[9]; l ^= s->yp_block[9];
+ }
+ SPEED_MACRO_UMUL_PPMM_B;
+ {
+ umul_ppmm (h, l, l, h);
+ umul_ppmm (h, l, l, h);
+ umul_ppmm (h, l, l, h);
+ umul_ppmm (h, l, l, h);
+ umul_ppmm (h, l, l, h);
+ umul_ppmm (h, l, l, h);
+ umul_ppmm (h, l, l, h);
+ umul_ppmm (h, l, l, h);
+ umul_ppmm (h, l, l, h);
+ umul_ppmm (h, l, l, h);
+ }
+ SPEED_MACRO_UMUL_PPMM_C;
+}
+
+
+#if HAVE_NATIVE_mpn_umul_ppmm
+double
+speed_mpn_umul_ppmm (struct speed_params *s)
+{
+ SPEED_MACRO_UMUL_PPMM_A;
+ {
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[0]; l ^= s->yp_block[0];
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[1]; l ^= s->yp_block[1];
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[2]; l ^= s->yp_block[2];
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[3]; l ^= s->yp_block[3];
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[4]; l ^= s->yp_block[4];
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[5]; l ^= s->yp_block[5];
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[6]; l ^= s->yp_block[6];
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[7]; l ^= s->yp_block[7];
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[8]; l ^= s->yp_block[8];
+ h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[9]; l ^= s->yp_block[9];
+ }
+ SPEED_MACRO_UMUL_PPMM_B;
+ {
+ h = mpn_umul_ppmm (&l, h, l);
+ h = mpn_umul_ppmm (&l, h, l);
+ h = mpn_umul_ppmm (&l, h, l);
+ h = mpn_umul_ppmm (&l, h, l);
+ h = mpn_umul_ppmm (&l, h, l);
+ h = mpn_umul_ppmm (&l, h, l);
+ h = mpn_umul_ppmm (&l, h, l);
+ h = mpn_umul_ppmm (&l, h, l);
+ h = mpn_umul_ppmm (&l, h, l);
+ h = mpn_umul_ppmm (&l, h, l);
+ }
+ SPEED_MACRO_UMUL_PPMM_C;
+}
+#endif
+
+#if HAVE_NATIVE_mpn_umul_ppmm_r
+double
+speed_mpn_umul_ppmm_r (struct speed_params *s)
+{
+ SPEED_MACRO_UMUL_PPMM_A;
+ {
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[0]; l ^= s->yp_block[0];
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[1]; l ^= s->yp_block[1];
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[2]; l ^= s->yp_block[2];
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[3]; l ^= s->yp_block[3];
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[4]; l ^= s->yp_block[4];
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[5]; l ^= s->yp_block[5];
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[6]; l ^= s->yp_block[6];
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[7]; l ^= s->yp_block[7];
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[8]; l ^= s->yp_block[8];
+ h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[9]; l ^= s->yp_block[9];
+ }
+ SPEED_MACRO_UMUL_PPMM_B;
+ {
+ h = mpn_umul_ppmm_r (h, l, &l);
+ h = mpn_umul_ppmm_r (h, l, &l);
+ h = mpn_umul_ppmm_r (h, l, &l);
+ h = mpn_umul_ppmm_r (h, l, &l);
+ h = mpn_umul_ppmm_r (h, l, &l);
+ h = mpn_umul_ppmm_r (h, l, &l);
+ h = mpn_umul_ppmm_r (h, l, &l);
+ h = mpn_umul_ppmm_r (h, l, &l);
+ h = mpn_umul_ppmm_r (h, l, &l);
+ h = mpn_umul_ppmm_r (h, l, &l);
+ }
+ SPEED_MACRO_UMUL_PPMM_C;
+}
+#endif
+
+
+/* The divisions are successively dependent so latency is measured, not
+ issue rate. There's only 10 per loop so the code doesn't get too big,
+ especially for udiv_qrnnd_preinv and preinv2norm, which are several
+ instructions each.
+
+ Note that it's only the division which is measured here, there's no data
+ fetching and no shifting if the divisor gets normalized.
+
+ In speed_udiv_qrnnd with gcc 2.95.2 on x86 the parameters "q,r,r,q,d"
+ generate x86 div instructions with nothing in between.
+
+ The measuring function macros are in two parts to avoid overflowing
+ preprocessor expansion space if udiv_qrnnd etc are big.
+
+ Limitations:
+
+ Don't blindly use this to set UDIV_TIME in gmp-mparam.h, check the code
+ generated first.
+
+ CPUs with data-dependent divisions may want more attention paid to the
+ randomness of the data used. Probably the measurement wanted is over
+ uniformly distributed numbers, but what's here might not be giving that. */
+
+#define SPEED_ROUTINE_UDIV_QRNND_A(normalize) \
+ { \
+ double t; \
+ unsigned i; \
+ mp_limb_t q, r, d; \
+ mp_limb_t dinv; \
+ \
+ s->time_divisor = 10; \
+ \
+ /* divisor from "r" parameter, or a default */ \
+ d = s->r; \
+ if (d == 0) \
+ d = mp_bases[10].big_base; \
+ \
+ if (normalize) \
+ { \
+ unsigned norm; \
+ count_leading_zeros (norm, d); \
+ d <<= norm; \
+ invert_limb (dinv, d); \
+ } \
+ \
+ q = s->xp[0]; \
+ r = s->yp[0] % d; \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ {
+
+#define SPEED_ROUTINE_UDIV_QRNND_B \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ /* stop the compiler optimizing away the whole calculation! */ \
+ noop_1 (q); \
+ noop_1 (r); \
+ \
+ return t; \
+ }
+
+double
+speed_udiv_qrnnd (struct speed_params *s)
+{
+ SPEED_ROUTINE_UDIV_QRNND_A (UDIV_NEEDS_NORMALIZATION);
+ {
+ udiv_qrnnd (q, r, r, q, d);
+ udiv_qrnnd (q, r, r, q, d);
+ udiv_qrnnd (q, r, r, q, d);
+ udiv_qrnnd (q, r, r, q, d);
+ udiv_qrnnd (q, r, r, q, d);
+ udiv_qrnnd (q, r, r, q, d);
+ udiv_qrnnd (q, r, r, q, d);
+ udiv_qrnnd (q, r, r, q, d);
+ udiv_qrnnd (q, r, r, q, d);
+ udiv_qrnnd (q, r, r, q, d);
+ }
+ SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+double
+speed_udiv_qrnnd_c (struct speed_params *s)
+{
+ SPEED_ROUTINE_UDIV_QRNND_A (1);
+ {
+ __udiv_qrnnd_c (q, r, r, q, d);
+ __udiv_qrnnd_c (q, r, r, q, d);
+ __udiv_qrnnd_c (q, r, r, q, d);
+ __udiv_qrnnd_c (q, r, r, q, d);
+ __udiv_qrnnd_c (q, r, r, q, d);
+ __udiv_qrnnd_c (q, r, r, q, d);
+ __udiv_qrnnd_c (q, r, r, q, d);
+ __udiv_qrnnd_c (q, r, r, q, d);
+ __udiv_qrnnd_c (q, r, r, q, d);
+ __udiv_qrnnd_c (q, r, r, q, d);
+ }
+ SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+#if HAVE_NATIVE_mpn_udiv_qrnnd
+double
+speed_mpn_udiv_qrnnd (struct speed_params *s)
+{
+ SPEED_ROUTINE_UDIV_QRNND_A (1);
+ {
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ q = mpn_udiv_qrnnd (&r, r, q, d);
+ }
+ SPEED_ROUTINE_UDIV_QRNND_B;
+}
+#endif
+
+#if HAVE_NATIVE_mpn_udiv_qrnnd_r
+double
+speed_mpn_udiv_qrnnd_r (struct speed_params *s)
+{
+ SPEED_ROUTINE_UDIV_QRNND_A (1);
+ {
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ q = mpn_udiv_qrnnd_r (r, q, d, &r);
+ }
+ SPEED_ROUTINE_UDIV_QRNND_B;
+}
+#endif
+
+
+double
+speed_invert_limb (struct speed_params *s)
+{
+ SPEED_ROUTINE_INVERT_LIMB_CALL (invert_limb (dinv, d));
+}
+
+
+/* xp[0] might not be particularly random, but should give an indication how
+ "/" runs. Same for speed_operator_mod below. */
+double
+speed_operator_div (struct speed_params *s)
+{
+ double t;
+ unsigned i;
+ mp_limb_t x, q, d;
+
+ s->time_divisor = 10;
+
+ /* divisor from "r" parameter, or a default */
+ d = s->r;
+ if (d == 0)
+ d = mp_bases[10].big_base;
+
+ x = s->xp[0];
+ q = 0;
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ q ^= x; q /= d;
+ q ^= x; q /= d;
+ q ^= x; q /= d;
+ q ^= x; q /= d;
+ q ^= x; q /= d;
+ q ^= x; q /= d;
+ q ^= x; q /= d;
+ q ^= x; q /= d;
+ q ^= x; q /= d;
+ q ^= x; q /= d;
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+
+ /* stop the compiler optimizing away the whole calculation! */
+ noop_1 (q);
+
+ return t;
+}
+
+double
+speed_operator_mod (struct speed_params *s)
+{
+ double t;
+ unsigned i;
+ mp_limb_t x, r, d;
+
+ s->time_divisor = 10;
+
+ /* divisor from "r" parameter, or a default */
+ d = s->r;
+ if (d == 0)
+ d = mp_bases[10].big_base;
+
+ x = s->xp[0];
+ r = 0;
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ r ^= x; r %= d;
+ r ^= x; r %= d;
+ r ^= x; r %= d;
+ r ^= x; r %= d;
+ r ^= x; r %= d;
+ r ^= x; r %= d;
+ r ^= x; r %= d;
+ r ^= x; r %= d;
+ r ^= x; r %= d;
+ r ^= x; r %= d;
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+
+ /* stop the compiler optimizing away the whole calculation! */
+ noop_1 (r);
+
+ return t;
+}
+
+
+/* r==0 measures on data with the values uniformly distributed. This will
+ be typical for count_trailing_zeros in a GCD etc.
+
+ r==1 measures on data with the resultant count uniformly distributed
+ between 0 and GMP_LIMB_BITS-1. This is probably sensible for
+ count_leading_zeros on the high limbs of divisors. */
+
+int
+speed_routine_count_zeros_setup (struct speed_params *s,
+ mp_ptr xp, int leading, int zero)
+{
+ int i, c;
+ mp_limb_t n;
+
+ if (s->r == 0)
+ {
+ /* Make uniformly distributed data. If zero isn't allowed then change
+ it to 1 for leading, or 0x800..00 for trailing. */
+ MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE);
+ if (! zero)
+ for (i = 0; i < SPEED_BLOCK_SIZE; i++)
+ if (xp[i] == 0)
+ xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT;
+ }
+ else if (s->r == 1)
+ {
+ /* Make counts uniformly distributed. A randomly chosen bit is set, and
+ for leading the rest above it are cleared, or for trailing then the
+ rest below. */
+ for (i = 0; i < SPEED_BLOCK_SIZE; i++)
+ {
+ mp_limb_t set = CNST_LIMB(1) << (s->yp_block[i] % GMP_LIMB_BITS);
+ mp_limb_t keep_below = set-1;
+ mp_limb_t keep_above = MP_LIMB_T_MAX ^ keep_below;
+ mp_limb_t keep = (leading ? keep_below : keep_above);
+ xp[i] = (s->xp_block[i] & keep) | set;
+ }
+ }
+ else
+ {
+ return 0;
+ }
+
+ /* Account for the effect of n^=c. */
+ c = 0;
+ for (i = 0; i < SPEED_BLOCK_SIZE; i++)
+ {
+ n = xp[i];
+ xp[i] ^= c;
+
+ if (leading)
+ count_leading_zeros (c, n);
+ else
+ count_trailing_zeros (c, n);
+ }
+
+ return 1;
+}
+
+double
+speed_count_leading_zeros (struct speed_params *s)
+{
+#ifdef COUNT_LEADING_ZEROS_0
+#define COUNT_LEADING_ZEROS_0_ALLOWED 1
+#else
+#define COUNT_LEADING_ZEROS_0_ALLOWED 0
+#endif
+
+ SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED);
+ count_leading_zeros (c, n);
+ SPEED_ROUTINE_COUNT_ZEROS_B ();
+}
+double
+speed_count_trailing_zeros (struct speed_params *s)
+{
+ SPEED_ROUTINE_COUNT_ZEROS_A (0, 0);
+ count_trailing_zeros (c, n);
+ SPEED_ROUTINE_COUNT_ZEROS_B ();
+}
+
+
+double
+speed_mpn_get_str (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_GET_STR (mpn_get_str);
+}
+
+double
+speed_mpn_set_str (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SET_STR_CALL (mpn_set_str (wp, xp, s->size, base));
+}
+double
+speed_mpn_bc_set_str (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SET_STR_CALL (mpn_bc_set_str (wp, xp, s->size, base));
+}
+
+double
+speed_MPN_ZERO (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_ZERO_CALL (MPN_ZERO (wp, s->size));
+}
+
+
+int
+speed_randinit (struct speed_params *s, gmp_randstate_ptr rstate)
+{
+ if (s->r == 0)
+ gmp_randinit_default (rstate);
+ else if (s->r == 1)
+ gmp_randinit_mt (rstate);
+ else
+ {
+ return gmp_randinit_lc_2exp_size (rstate, s->r);
+ }
+ return 1;
+}
+
+double
+speed_gmp_randseed (struct speed_params *s)
+{
+ gmp_randstate_t rstate;
+ unsigned i;
+ double t;
+ mpz_t x;
+
+ SPEED_RESTRICT_COND (s->size >= 1);
+ SPEED_RESTRICT_COND (speed_randinit (s, rstate));
+
+ /* s->size bits of seed */
+ mpz_init_set_n (x, s->xp, s->size);
+ mpz_fdiv_r_2exp (x, x, (unsigned long) s->size);
+
+ /* cache priming */
+ gmp_randseed (rstate, x);
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ gmp_randseed (rstate, x);
+ while (--i != 0);
+ t = speed_endtime ();
+
+ gmp_randclear (rstate);
+ mpz_clear (x);
+ return t;
+}
+
+double
+speed_gmp_randseed_ui (struct speed_params *s)
+{
+ gmp_randstate_t rstate;
+ unsigned i, j;
+ double t;
+
+ SPEED_RESTRICT_COND (speed_randinit (s, rstate));
+
+ /* cache priming */
+ gmp_randseed_ui (rstate, 123L);
+
+ speed_starttime ();
+ i = s->reps;
+ j = 0;
+ do
+ {
+ gmp_randseed_ui (rstate, (unsigned long) s->xp_block[j]);
+ j++;
+ if (j >= SPEED_BLOCK_SIZE)
+ j = 0;
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+
+ gmp_randclear (rstate);
+ return t;
+}
+
+double
+speed_mpz_urandomb (struct speed_params *s)
+{
+ gmp_randstate_t rstate;
+ mpz_t z;
+ unsigned i;
+ double t;
+
+ SPEED_RESTRICT_COND (s->size >= 0);
+ SPEED_RESTRICT_COND (speed_randinit (s, rstate));
+
+ mpz_init (z);
+
+ /* cache priming */
+ mpz_urandomb (z, rstate, (unsigned long) s->size);
+ mpz_urandomb (z, rstate, (unsigned long) s->size);
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ mpz_urandomb (z, rstate, (unsigned long) s->size);
+ while (--i != 0);
+ t = speed_endtime ();
+
+ mpz_clear (z);
+ gmp_randclear (rstate);
+ return t;
+}
diff --git a/gmp-6.3.0/tune/div_qr_1_tune.c b/gmp-6.3.0/tune/div_qr_1_tune.c
new file mode 100644
index 0000000..2a623f0
--- /dev/null
+++ b/gmp-6.3.0/tune/div_qr_1_tune.c
@@ -0,0 +1,50 @@
+/* mpn/generic/div_qr_1, using tuned threshold and method.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define TUNE_PROGRAM_BUILD 1
+
+#include "gmp-impl.h"
+
+mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t mpn_div_qr_1n_pi1_3 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t mpn_div_qr_1n_pi1_4 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+#if !HAVE_NATIVE_mpn_div_qr_1n_pi1
+#define __gmpn_div_qr_1n_pi1 \
+ (div_qr_1n_pi1_method <= 2 \
+ ? (div_qr_1n_pi1_method == 1 ? mpn_div_qr_1n_pi1_1 : mpn_div_qr_1n_pi1_2) \
+ : (div_qr_1n_pi1_method == 3 ? mpn_div_qr_1n_pi1_3 : mpn_div_qr_1n_pi1_4))
+#endif
+
+#undef mpn_div_qr_1
+#define mpn_div_qr_1 mpn_div_qr_1_tune
+
+#include "mpn/generic/div_qr_1.c"
diff --git a/gmp-6.3.0/tune/div_qr_1n_pi1_1.c b/gmp-6.3.0/tune/div_qr_1n_pi1_1.c
new file mode 100644
index 0000000..e64a3c7
--- /dev/null
+++ b/gmp-6.3.0/tune/div_qr_1n_pi1_1.c
@@ -0,0 +1,38 @@
+/* mpn/generic/div_qr_1n_pi1.c method 1.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef DIV_QR_1N_METHOD
+#define DIV_QR_1N_METHOD 1
+#undef mpn_div_qr_1n_pi1
+#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_1
+
+#include "mpn/generic/div_qr_1n_pi1.c"
diff --git a/gmp-6.3.0/tune/div_qr_1n_pi1_2.c b/gmp-6.3.0/tune/div_qr_1n_pi1_2.c
new file mode 100644
index 0000000..c5432ea
--- /dev/null
+++ b/gmp-6.3.0/tune/div_qr_1n_pi1_2.c
@@ -0,0 +1,38 @@
+/* mpn/generic/div_qr_1n_pi1.c method 2.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef DIV_QR_1N_METHOD
+#define DIV_QR_1N_METHOD 2
+#undef mpn_div_qr_1n_pi1
+#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_2
+
+#include "mpn/generic/div_qr_1n_pi1.c"
diff --git a/gmp-6.3.0/tune/div_qr_1n_pi1_3.c b/gmp-6.3.0/tune/div_qr_1n_pi1_3.c
new file mode 100644
index 0000000..826244c
--- /dev/null
+++ b/gmp-6.3.0/tune/div_qr_1n_pi1_3.c
@@ -0,0 +1,38 @@
+/* mpn/generic/div_qr_1n_pi1.c method 3.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef DIV_QR_1N_METHOD
+#define DIV_QR_1N_METHOD 3
+#undef mpn_div_qr_1n_pi1
+#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_3
+
+#include "mpn/generic/div_qr_1n_pi1.c"
diff --git a/gmp-6.3.0/tune/div_qr_1n_pi1_4.c b/gmp-6.3.0/tune/div_qr_1n_pi1_4.c
new file mode 100644
index 0000000..0f69ea0
--- /dev/null
+++ b/gmp-6.3.0/tune/div_qr_1n_pi1_4.c
@@ -0,0 +1,38 @@
+/* mpn/generic/div_qr_1n_pi1.c method 4.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef DIV_QR_1N_METHOD
+#define DIV_QR_1N_METHOD 4
+#undef mpn_div_qr_1n_pi1
+#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_4
+
+#include "mpn/generic/div_qr_1n_pi1.c"
diff --git a/gmp-6.3.0/tune/divrem1div.c b/gmp-6.3.0/tune/divrem1div.c
new file mode 100644
index 0000000..0089971
--- /dev/null
+++ b/gmp-6.3.0/tune/divrem1div.c
@@ -0,0 +1,41 @@
+/* mpn/generic/divrem_1.c forced to use plain udiv_qrnnd.
+
+Copyright 2000, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define OPERATION_divrem_1
+
+#include "gmp-impl.h"
+
+#undef DIVREM_1_NORM_THRESHOLD
+#undef DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX
+#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_divrem_1 mpn_divrem_1_div
+
+#include "mpn/generic/divrem_1.c"
diff --git a/gmp-6.3.0/tune/divrem1inv.c b/gmp-6.3.0/tune/divrem1inv.c
new file mode 100644
index 0000000..82c8528
--- /dev/null
+++ b/gmp-6.3.0/tune/divrem1inv.c
@@ -0,0 +1,41 @@
+/* mpn/generic/divrem_1.c forced to use mul-by-inverse udiv_qrnnd_preinv.
+
+Copyright 2000, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define OPERATION_divrem_1
+
+#include "gmp-impl.h"
+
+#undef DIVREM_1_NORM_THRESHOLD
+#undef DIVREM_1_UNNORM_THRESHOLD
+#define DIVREM_1_NORM_THRESHOLD 0
+#define DIVREM_1_UNNORM_THRESHOLD 0
+#define __gmpn_divrem_1 mpn_divrem_1_inv
+
+#include "mpn/generic/divrem_1.c"
diff --git a/gmp-6.3.0/tune/divrem2div.c b/gmp-6.3.0/tune/divrem2div.c
new file mode 100644
index 0000000..8331d8f
--- /dev/null
+++ b/gmp-6.3.0/tune/divrem2div.c
@@ -0,0 +1,40 @@
+/* mpn/generic/divrem_2.c forced to use plain udiv_qrnnd. */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#ifdef DIVREM_2_THRESHOLD
+#undef DIVREM_2_THRESHOLD
+#endif
+#define DIVREM_2_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_divrem_2 mpn_divrem_2_div
+
+#include "mpn/generic/divrem_2.c"
diff --git a/gmp-6.3.0/tune/divrem2inv.c b/gmp-6.3.0/tune/divrem2inv.c
new file mode 100644
index 0000000..8ae87f5
--- /dev/null
+++ b/gmp-6.3.0/tune/divrem2inv.c
@@ -0,0 +1,40 @@
+/* mpn/generic/divrem_2.c forced to use udiv_qrnnd_preinv. */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#ifdef DIVREM_2_THRESHOLD
+#undef DIVREM_2_THRESHOLD
+#endif
+#define DIVREM_2_THRESHOLD 0
+#define __gmpn_divrem_2 mpn_divrem_2_inv
+
+#include "mpn/generic/divrem_2.c"
diff --git a/gmp-6.3.0/tune/freq.c b/gmp-6.3.0/tune/freq.c
new file mode 100644
index 0000000..ee38506
--- /dev/null
+++ b/gmp-6.3.0/tune/freq.c
@@ -0,0 +1,893 @@
+/* CPU frequency determination.
+
+Copyright 1999-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* Currently we don't get a CPU frequency on the following systems,
+
+ alphaev5-cray-unicosmk2.0.6.X
+ times() has been seen at 13.33 ns (75 MHz), which is probably not the
+ cpu frequency. Measuring the cycle counter against that would be
+ possible though. But currently we don't use the cycle counter due to
+ unicos having int==8bytes where tune/alpha.asm assumes int==4bytes.
+
+ m68040-unknown-netbsd1.4.1
+ Not sure if the system even knows the cpu frequency. There's no
+ cycle counter to measure, though we could perhaps make a loop taking
+ a known number of cycles and measure that.
+
+ power-ibm-aix4.2.1.0
+ power2-ibm-aix4.3.1.0
+ powerpc604-ibm-aix4.3.1.0
+ powerpc604-ibm-aix4.3.3.0
+ powerpc630-ibm-aix4.3.3.0
+ powerpc-unknown-netbsd1.6
+ Don't know where any info hides on these. mftb is not related to the
+ cpu frequency so doesn't help.
+
+ sparc-unknown-linux-gnu [maybe]
+ Don't know where any info hides on this.
+
+ t90-cray-unicos10.0.X
+ The times() call seems to be for instance 2.22 nanoseconds, which
+ might be the cpu frequency (450 mhz), but need to confirm that.
+
+*/
+
+#include "config.h"
+
+#if HAVE_INVENT_H
+#include <invent.h> /* for IRIX invent_cpuinfo_t */
+#endif
+
+#include <stdio.h>
+#include <stdlib.h> /* for getenv, qsort */
+#include <string.h> /* for memcmp */
+
+#if HAVE_UNISTD_H
+#include <unistd.h> /* for sysconf */
+#endif
+
+#include <sys/types.h>
+
+#if HAVE_SYS_ATTRIBUTES_H
+#include <sys/attributes.h> /* for IRIX attr_get(), needs sys/types.h */
+#endif
+
+#if HAVE_SYS_IOGRAPH_H
+#include <sys/iograph.h> /* for IRIX INFO_LBL_DETAIL_INVENT */
+#endif
+
+#if HAVE_SYS_PARAM_H /* for constants needed by NetBSD <sys/sysctl.h> */
+#include <sys/param.h> /* and needed by HPUX <sys/pstat.h> */
+#endif
+
+#if HAVE_SYS_PSTAT_H
+#include <sys/pstat.h> /* for HPUX pstat_getprocessor() */
+#endif
+
+#if HAVE_SYS_SYSCTL_H
+#include <sys/sysctl.h> /* for sysctlbyname() */
+#endif
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h> /* for struct timeval */
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h> /* for struct rusage */
+#endif
+
+#if HAVE_SYS_PROCESSOR_H
+#include <sys/processor.h> /* for solaris processor_info_t */
+#endif
+
+/* On AIX 5.1 with gcc 2.9-aix51-020209 in -maix64 mode, <sys/sysinfo.h>
+ gets an error about "fill" in "struct cpuinfo" having a negative size,
+ apparently due to __64BIT_KERNEL not being defined because _KERNEL is not
+ defined. Avoid this file if we don't actually need it, which we don't on
+ AIX since there's no getsysinfo there. */
+#if HAVE_SYS_SYSINFO_H && HAVE_GETSYSINFO
+#include <sys/sysinfo.h> /* for OSF getsysinfo */
+#endif
+
+#if HAVE_MACHINE_HAL_SYSINFO_H
+#include <machine/hal_sysinfo.h> /* for OSF GSI_CPU_INFO, struct cpu_info */
+#endif
+
+/* Remove definitions from NetBSD <sys/param.h>, to avoid conflicts with
+ gmp-impl.h. */
+#ifdef MIN
+#undef MIN
+#endif
+#ifdef MAX
+#undef MAX
+#endif
+
+#include "gmp-impl.h"
+
+#include "speed.h"
+
+
+#define HELP(str) \
+ if (help) \
+ { \
+ printf (" - %s\n", str); \
+ return 0; \
+ }
+
+
+/* GMP_CPU_FREQUENCY environment variable. Should be in Hertz and can be
+ floating point, for example "450e6". */
+static int
+freq_environment (int help)
+{
+ char *e;
+
+ HELP ("environment variable GMP_CPU_FREQUENCY (in Hertz)");
+
+ e = getenv ("GMP_CPU_FREQUENCY");
+ if (e == NULL)
+ return 0;
+
+ speed_cycletime = 1.0 / atof (e);
+
+ if (speed_option_verbose)
+ printf ("Using GMP_CPU_FREQUENCY %.2f for cycle time %.3g\n",
+ atof (e), speed_cycletime);
+
+ return 1;
+}
+
+
+/* getsysinfo is available on OSF, or 4.0 and up at least.
+ The man page (on 4.0) suggests a 0 return indicates information not
+ available, but that seems to be the normal return for GSI_CPU_INFO. */
+static int
+freq_getsysinfo (int help)
+{
+#if HAVE_GETSYSINFO
+ struct cpu_info c;
+ int start;
+
+ HELP ("getsysinfo() GSI_CPU_INFO");
+
+ start = 0;
+ if (getsysinfo (GSI_CPU_INFO, (caddr_t) &c, sizeof (c),
+ &start, NULL, NULL) != -1)
+ {
+ speed_cycletime = 1e-6 / (double) c.mhz;
+ if (speed_option_verbose)
+ printf ("Using getsysinfo() GSI_CPU_INFO %u for cycle time %.3g\n",
+ c.mhz, speed_cycletime);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+
+/* In HPUX 10 and up, pstat_getprocessor() psp_iticksperclktick is the
+ number of CPU cycles (ie. the CR16 register) per CLK_TCK. HPUX 9 doesn't
+ have that field in pst_processor though, and has no apparent
+ equivalent. */
+
+static int
+freq_pstat_getprocessor (int help)
+{
+#if HAVE_PSTAT_GETPROCESSOR && HAVE_PSP_ITICKSPERCLKTICK
+ struct pst_processor p;
+
+ HELP ("pstat_getprocessor() psp_iticksperclktick");
+
+ if (pstat_getprocessor (&p, sizeof(p), 1, 0) != -1)
+ {
+ long c = clk_tck();
+ speed_cycletime = 1.0 / (c * p.psp_iticksperclktick);
+ if (speed_option_verbose)
+ printf ("Using pstat_getprocessor() psp_iticksperclktick %lu and clk_tck %ld for cycle time %.3g\n",
+ (unsigned long) p.psp_iticksperclktick, c,
+ speed_cycletime);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+
+/* i386 FreeBSD 2.2.8 sysctlbyname machdep.i586_freq is in Hertz.
+ There's no obvious defines available to get this from plain sysctl. */
+static int
+freq_sysctlbyname_i586_freq (int help)
+{
+#if HAVE_SYSCTLBYNAME
+ unsigned val;
+ size_t size;
+
+ HELP ("sysctlbyname() machdep.i586_freq");
+
+ size = sizeof(val);
+ if (sysctlbyname ("machdep.i586_freq", &val, &size, NULL, 0) == 0
+ && size == sizeof(val))
+ {
+ speed_cycletime = 1.0 / (double) val;
+ if (speed_option_verbose)
+ printf ("Using sysctlbyname() machdep.i586_freq %u for cycle time %.3g\n",
+ val, speed_cycletime);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+
+/* i368 FreeBSD 3.3 sysctlbyname machdep.tsc_freq is in Hertz.
+ There's no obvious defines to get this from plain sysctl. */
+
+static int
+freq_sysctlbyname_tsc_freq (int help)
+{
+#if HAVE_SYSCTLBYNAME
+ unsigned val;
+ size_t size;
+
+ HELP ("sysctlbyname() machdep.tsc_freq");
+
+ size = sizeof(val);
+ if (sysctlbyname ("machdep.tsc_freq", &val, &size, NULL, 0) == 0
+ && size == sizeof(val))
+ {
+ speed_cycletime = 1.0 / (double) val;
+ if (speed_option_verbose)
+ printf ("Using sysctlbyname() machdep.tsc_freq %u for cycle time %.3g\n",
+ val, speed_cycletime);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+
+/* Apple powerpc Darwin 1.3 sysctl hw.cpufrequency is in hertz. For some
+ reason only seems to be available from sysctl(), not sysctlbyname(). */
+
+static int
+freq_sysctl_hw_cpufrequency (int help)
+{
+#if HAVE_SYSCTL && defined (CTL_HW) && defined (HW_CPU_FREQ)
+ int mib[2];
+ unsigned val;
+ size_t size;
+
+ HELP ("sysctl() hw.cpufrequency");
+
+ mib[0] = CTL_HW;
+ mib[1] = HW_CPU_FREQ;
+ size = sizeof(val);
+ if (sysctl (mib, 2, &val, &size, NULL, 0) == 0)
+ {
+ speed_cycletime = 1.0 / (double) val;
+ if (speed_option_verbose)
+ printf ("Using sysctl() hw.cpufrequency %u for cycle time %.3g\n",
+ val, speed_cycletime);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+
+/* The following ssyctl hw.model strings have been observed,
+
+ Alpha FreeBSD 4.1: Digital AlphaPC 164LX 599 MHz
+ NetBSD 1.4: Digital AlphaPC 164LX 599 MHz
+ NetBSD 1.6.1: CY7C601 @ 40 MHz, TMS390C602A FPU
+
+ NetBSD 1.4 doesn't seem to have sysctlbyname, so sysctl() is used. */
+
+static int
+freq_sysctl_hw_model (int help)
+{
+#if HAVE_SYSCTL && defined (CTL_HW) && defined (HW_MODEL)
+ int mib[2];
+ char str[128];
+ unsigned val;
+ size_t size;
+ char *p;
+ int end;
+
+ HELP ("sysctl() hw.model");
+
+ mib[0] = CTL_HW;
+ mib[1] = HW_MODEL;
+ size = sizeof(str);
+ if (sysctl (mib, 2, str, &size, NULL, 0) == 0)
+ {
+ for (p = str; *p != '\0'; p++)
+ {
+ end = 0;
+ if (sscanf (p, "%u MHz%n", &val, &end) == 1 && end != 0)
+ {
+ speed_cycletime = 1e-6 / (double) val;
+ if (speed_option_verbose)
+ printf ("Using sysctl() hw.model %u for cycle time %.3g\n",
+ val, speed_cycletime);
+ return 1;
+ }
+ }
+ }
+#endif
+ return 0;
+}
+
+
+/* /proc/cpuinfo for linux kernel.
+
+ Linux doesn't seem to have any system call to get the CPU frequency, at
+ least not in 2.0.x or 2.2.x, so it's necessary to read /proc/cpuinfo.
+
+ i386 2.0.36 - "bogomips" is the CPU frequency.
+
+ i386 2.2.13 - has both "cpu MHz" and "bogomips", and it's "cpu MHz" which
+ is the frequency.
+
+ alpha 2.2.5 - "cycle frequency [Hz]" seems to be right, "BogoMIPS" is
+ very slightly different.
+
+ alpha 2.2.18pre21 - "cycle frequency [Hz]" is 0 on at least one system,
+ "BogoMIPS" seems near enough.
+
+ powerpc 2.2.19 - "clock" is the frequency, bogomips is something weird
+ */
+
+static int
+freq_proc_cpuinfo (int help)
+{
+ FILE *fp;
+ char buf[128];
+ double val;
+ int ret = 0;
+ int end;
+
+ HELP ("linux kernel /proc/cpuinfo file, cpu MHz or bogomips");
+
+ if ((fp = fopen ("/proc/cpuinfo", "r")) != NULL)
+ {
+ while (fgets (buf, sizeof (buf), fp) != NULL)
+ {
+ if (sscanf (buf, "cycle frequency [Hz] : %lf", &val) == 1
+ && val != 0.0)
+ {
+ speed_cycletime = 1.0 / val;
+ if (speed_option_verbose)
+ printf ("Using /proc/cpuinfo \"cycle frequency\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+ ret = 1;
+ break;
+ }
+ if (sscanf (buf, "cpu MHz : %lf\n", &val) == 1)
+ {
+ speed_cycletime = 1e-6 / val;
+ if (speed_option_verbose)
+ printf ("Using /proc/cpuinfo \"cpu MHz\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+ ret = 1;
+ break;
+ }
+ end = 0;
+ if (sscanf (buf, "clock : %lfMHz\n%n", &val, &end) == 1 && end != 0)
+ {
+ speed_cycletime = 1e-6 / val;
+ if (speed_option_verbose)
+ printf ("Using /proc/cpuinfo \"clock\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+ ret = 1;
+ break;
+ }
+ if (sscanf (buf, "bogomips : %lf\n", &val) == 1
+ || sscanf (buf, "BogoMIPS : %lf\n", &val) == 1)
+ {
+ speed_cycletime = 1e-6 / val;
+ if (speed_option_verbose)
+ printf ("Using /proc/cpuinfo \"bogomips\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+ ret = 1;
+ break;
+ }
+ }
+ fclose (fp);
+ }
+ return ret;
+}
+
+
+/* /bin/sysinfo for SunOS 4.
+ Prints a line like: cpu0 is a "75 MHz TI,TMS390Z55" CPU */
+static int
+freq_sunos_sysinfo (int help)
+{
+ int ret = 0;
+#if HAVE_POPEN
+ FILE *fp;
+ char buf[128];
+ double val;
+ int end;
+
+ HELP ("SunOS /bin/sysinfo program output, cpu0");
+
+ /* Error messages are sent to /dev/null in case /bin/sysinfo doesn't
+ exist. The brackets are necessary for some shells. */
+ if ((fp = popen ("(/bin/sysinfo) 2>/dev/null", "r")) != NULL)
+ {
+ while (fgets (buf, sizeof (buf), fp) != NULL)
+ {
+ end = 0;
+ if (sscanf (buf, " cpu0 is a \"%lf MHz%n", &val, &end) == 1
+ && end != 0)
+ {
+ speed_cycletime = 1e-6 / val;
+ if (speed_option_verbose)
+ printf ("Using /bin/sysinfo \"cpu0 MHz\" %.2f for cycle time %.3g\n", val, speed_cycletime);
+ ret = 1;
+ break;
+ }
+ }
+ pclose (fp);
+ }
+#endif
+ return ret;
+}
+
+
+/* "/etc/hw -r cpu" for SCO OpenUnix 8, printing a line like
+ The speed of the CPU is approximately 450MHz
+ */
+static int
+freq_sco_etchw (int help)
+{
+ int ret = 0;
+#if HAVE_POPEN
+ FILE *fp;
+ char buf[128];
+ double val;
+ int end;
+
+ HELP ("SCO /etc/hw program output");
+
+ /* Error messages are sent to /dev/null in case /etc/hw doesn't exist.
+ The brackets are necessary for some shells. */
+ if ((fp = popen ("(/etc/hw -r cpu) 2>/dev/null", "r")) != NULL)
+ {
+ while (fgets (buf, sizeof (buf), fp) != NULL)
+ {
+ end = 0;
+ if (sscanf (buf, " The speed of the CPU is approximately %lfMHz%n",
+ &val, &end) == 1 && end != 0)
+ {
+ speed_cycletime = 1e-6 / val;
+ if (speed_option_verbose)
+ printf ("Using /etc/hw %.2f MHz, for cycle time %.3g\n",
+ val, speed_cycletime);
+ ret = 1;
+ break;
+ }
+ }
+ pclose (fp);
+ }
+#endif
+ return ret;
+}
+
+
+/* attr_get("/hw/cpunum/0",INFO_LBL_DETAIL_INVENT) ic_cpu_info.cpufq for
+ IRIX 6.5. Past versions don't have INFO_LBL_DETAIL_INVENT,
+ invent_cpuinfo_t, or /hw/cpunum/0.
+
+ The same information is available from the "hinv -c processor" command,
+ but it seems better to make a system call where possible. */
+
+static int
+freq_attr_get_invent (int help)
+{
+ int ret = 0;
+#if HAVE_ATTR_GET && HAVE_INVENT_H && defined (INFO_LBL_DETAIL_INVENT)
+ invent_cpuinfo_t inv;
+ int len, val;
+
+ HELP ("attr_get(\"/hw/cpunum/0\") ic_cpu_info.cpufq");
+
+ len = sizeof (inv);
+ if (attr_get ("/hw/cpunum/0", INFO_LBL_DETAIL_INVENT,
+ (char *) &inv, &len, 0) == 0
+ && len == sizeof (inv)
+ && inv.ic_gen.ig_invclass == INV_PROCESSOR)
+ {
+ val = inv.ic_cpu_info.cpufq;
+ speed_cycletime = 1e-6 / val;
+ if (speed_option_verbose)
+ printf ("Using attr_get(\"/hw/cpunum/0\") ic_cpu_info.cpufq %d MHz for cycle time %.3g\n", val, speed_cycletime);
+ ret = 1;
+ }
+#endif
+ return ret;
+}
+
+
+/* FreeBSD on i386 gives a line like the following at bootup, and which can
+ be read back from /var/run/dmesg.boot.
+
+ CPU: AMD Athlon(tm) Processor (755.29-MHz 686-class CPU)
+ CPU: Pentium 4 (1707.56-MHz 686-class CPU)
+ CPU: i486 DX4 (486-class CPU)
+
+ This is useful on FreeBSD 4.x, where there's no sysctl machdep.tsc_freq
+ or machdep.i586_freq.
+
+ It's better to use /var/run/dmesg.boot than to run /sbin/dmesg, since the
+ latter prints the current system message buffer, which is a limited size
+ and can wrap around if the system is up for a long time. */
+
+static int
+freq_bsd_dmesg (int help)
+{
+ FILE *fp;
+ char buf[256], *p;
+ double val;
+ int ret = 0;
+ int end;
+
+ HELP ("BSD /var/run/dmesg.boot file");
+
+ if ((fp = fopen ("/var/run/dmesg.boot", "r")) != NULL)
+ {
+ while (fgets (buf, sizeof (buf), fp) != NULL)
+ {
+ if (memcmp (buf, "CPU:", 4) == 0)
+ {
+ for (p = buf; *p != '\0'; p++)
+ {
+ end = 0;
+ if (sscanf (p, "(%lf-MHz%n", &val, &end) == 1 && end != 0)
+ {
+ speed_cycletime = 1e-6 / val;
+ if (speed_option_verbose)
+ printf ("Using /var/run/dmesg.boot CPU: %.2f MHz for cycle time %.3g\n", val, speed_cycletime);
+ ret = 1;
+ break;
+ }
+ }
+ }
+ }
+ fclose (fp);
+ }
+ return ret;
+}
+
+
+/* "hinv -c processor" for IRIX. The following lines have been seen,
+
+ 1 150 MHZ IP20 Processor
+ 2 195 MHZ IP27 Processors
+ Processor 0: 500 MHZ IP35
+
+ This information is available from attr_get() on IRIX 6.5 (see above),
+ but on IRIX 6.2 it's not clear where to look, so fall back on
+ parsing. */
+
+static int
+freq_irix_hinv (int help)
+{
+ int ret = 0;
+#if HAVE_POPEN
+ FILE *fp;
+ char buf[128];
+ double val;
+ int nproc, end;
+
+ HELP ("IRIX \"hinv -c processor\" output");
+
+ /* Error messages are sent to /dev/null in case hinv doesn't exist. The
+ brackets are necessary for some shells. */
+ if ((fp = popen ("(hinv -c processor) 2>/dev/null", "r")) != NULL)
+ {
+ while (fgets (buf, sizeof (buf), fp) != NULL)
+ {
+ end = 0;
+ if (sscanf (buf, "Processor 0: %lf MHZ%n", &val, &end) == 1
+ && end != 0)
+ {
+ found:
+ speed_cycletime = 1e-6 / val;
+ if (speed_option_verbose)
+ printf ("Using hinv -c processor \"%.2f MHZ\" for cycle time %.3g\n", val, speed_cycletime);
+ ret = 1;
+ break;
+ }
+ end = 0;
+ if (sscanf (buf, "%d %lf MHZ%n", &nproc, &val, &end) == 2
+ && end != 0)
+ goto found;
+ }
+ pclose (fp);
+ }
+#endif
+ return ret;
+}
+
+
+/* processor_info() for Solaris. "psrinfo" is the command-line interface to
+ this. "prtconf -vp" gives similar information.
+
+ Apple Darwin has a processor_info, but in an incompatible style. It
+ doesn't have <sys/processor.h>, so test for that. */
+
+static int
+freq_processor_info (int help)
+{
+#if HAVE_PROCESSOR_INFO && HAVE_SYS_PROCESSOR_H
+ processor_info_t p;
+ int i, n, mhz = 0;
+
+ HELP ("processor_info() pi_clock");
+
+ n = sysconf (_SC_NPROCESSORS_CONF);
+ for (i = 0; i < n; i++)
+ {
+ if (processor_info (i, &p) != 0)
+ continue;
+ if (p.pi_state != P_ONLINE)
+ continue;
+
+ if (mhz != 0 && p.pi_clock != mhz)
+ {
+ fprintf (stderr,
+ "freq_processor_info(): There's more than one CPU and they have different clock speeds\n");
+ return 0;
+ }
+
+ mhz = p.pi_clock;
+ }
+
+ speed_cycletime = 1.0e-6 / (double) mhz;
+
+ if (speed_option_verbose)
+ printf ("Using processor_info() %d mhz for cycle time %.3g\n",
+ mhz, speed_cycletime);
+ return 1;
+
+#else
+ return 0;
+#endif
+}
+
+
+#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETTIMEOFDAY
+static double
+freq_measure_gettimeofday_one (void)
+{
+#define call_gettimeofday(t) gettimeofday (&(t), NULL)
+#define timeval_tv_sec(t) ((t).tv_sec)
+#define timeval_tv_usec(t) ((t).tv_usec)
+ FREQ_MEASURE_ONE ("gettimeofday", struct timeval,
+ call_gettimeofday, speed_cyclecounter,
+ timeval_tv_sec, timeval_tv_usec);
+}
+#endif
+
+#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETRUSAGE
+static double
+freq_measure_getrusage_one (void)
+{
+#define call_getrusage(t) getrusage (0, &(t))
+#define rusage_tv_sec(t) ((t).ru_utime.tv_sec)
+#define rusage_tv_usec(t) ((t).ru_utime.tv_usec)
+ FREQ_MEASURE_ONE ("getrusage", struct rusage,
+ call_getrusage, speed_cyclecounter,
+ rusage_tv_sec, rusage_tv_usec);
+}
+#endif
+
+
+/* MEASURE_MATCH is how many readings within MEASURE_TOLERANCE of each other
+ are required. This must be at least 2. */
+#define MEASURE_MAX_ATTEMPTS 20
+#define MEASURE_TOLERANCE 1.005 /* 0.5% */
+#define MEASURE_MATCH 3
+
+double
+freq_measure (const char *name, double (*one) (void))
+{
+ double t[MEASURE_MAX_ATTEMPTS];
+ int i, j;
+
+ for (i = 0; i < numberof (t); i++)
+ {
+ t[i] = (*one) ();
+
+ qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);
+ if (speed_option_verbose >= 3)
+ for (j = 0; j <= i; j++)
+ printf (" t[%d] is %.6g\n", j, t[j]);
+
+ for (j = 0; j+MEASURE_MATCH-1 <= i; j++)
+ {
+ if (t[j+MEASURE_MATCH-1] <= t[j] * MEASURE_TOLERANCE)
+ {
+ /* use the average of the range found */
+ return (t[j+MEASURE_MATCH-1] + t[j]) / 2.0;
+ }
+ }
+ }
+ return -1.0;
+}
+
+static int
+freq_measure_getrusage (int help)
+{
+#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETRUSAGE
+ double cycletime;
+
+ if (! getrusage_microseconds_p ())
+ return 0;
+ if (! cycles_works_p ())
+ return 0;
+
+ HELP ("cycle counter measured with microsecond getrusage()");
+
+ cycletime = freq_measure ("getrusage", freq_measure_getrusage_one);
+ if (cycletime == -1.0)
+ return 0;
+
+ speed_cycletime = cycletime;
+ if (speed_option_verbose)
+ printf ("Using getrusage() measured cycle counter %.4g (%.2f MHz)\n",
+ speed_cycletime, 1e-6/speed_cycletime);
+ return 1;
+
+#else
+ return 0;
+#endif
+}
+
+static int
+freq_measure_gettimeofday (int help)
+{
+#if HAVE_SPEED_CYCLECOUNTER && HAVE_GETTIMEOFDAY
+ double cycletime;
+
+ if (! gettimeofday_microseconds_p ())
+ return 0;
+ if (! cycles_works_p ())
+ return 0;
+
+ HELP ("cycle counter measured with microsecond gettimeofday()");
+
+ cycletime = freq_measure ("gettimeofday", freq_measure_gettimeofday_one);
+ if (cycletime == -1.0)
+ return 0;
+
+ speed_cycletime = cycletime;
+ if (speed_option_verbose)
+ printf ("Using gettimeofday() measured cycle counter %.4g (%.2f MHz)\n",
+ speed_cycletime, 1e-6/speed_cycletime);
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+
+/* Each function returns 1 if it succeeds in setting speed_cycletime, or 0
+ if not.
+
+ In general system call tests are first since they're fast, then file
+ tests, then tests running programs. Necessary exceptions to this rule
+ are noted. The measuring is last since it's time consuming, and rather
+ wasteful of cpu. */
+
+static int
+freq_all (int help)
+{
+ return
+ /* This should be first, so an environment variable can override
+ anything the system gives. */
+ freq_environment (help)
+
+ || freq_attr_get_invent (help)
+ || freq_getsysinfo (help)
+ || freq_pstat_getprocessor (help)
+ || freq_sysctl_hw_model (help)
+ || freq_sysctl_hw_cpufrequency (help)
+ || freq_sysctlbyname_i586_freq (help)
+ || freq_sysctlbyname_tsc_freq (help)
+
+ /* SCO openunix 8 puts a dummy pi_clock==16 in processor_info, so be
+ sure to check /etc/hw before that function. */
+ || freq_sco_etchw (help)
+
+ || freq_processor_info (help)
+ || freq_proc_cpuinfo (help)
+ || freq_bsd_dmesg (help)
+ || freq_irix_hinv (help)
+ || freq_sunos_sysinfo (help)
+ || freq_measure_getrusage (help)
+ || freq_measure_gettimeofday (help);
+}
+
+
+void
+speed_cycletime_init (void)
+{
+ static int attempted = 0;
+
+ if (attempted)
+ return;
+ attempted = 1;
+
+ if (freq_all (0))
+ return;
+
+ if (speed_option_verbose)
+ printf ("CPU frequency couldn't be determined\n");
+}
+
+
+void
+speed_cycletime_fail (const char *str)
+{
+ fprintf (stderr, "Measuring with: %s\n", speed_time_string);
+ fprintf (stderr, "%s,\n", str);
+ fprintf (stderr, "but none of the following are available,\n");
+ freq_all (1);
+ abort ();
+}
+
+/* speed_time_init leaves speed_cycletime set to either 0.0 or 1.0 when the
+ CPU frequency is unknown. 0.0 is when the time base is in seconds, so
+ that's no good if cycles are wanted. 1.0 is when the time base is in
+ cycles, which conversely is no good if seconds are wanted. */
+void
+speed_cycletime_need_cycles (void)
+{
+ speed_time_init ();
+ if (speed_cycletime == 0.0)
+ speed_cycletime_fail
+ ("Need to know CPU frequency to give times in cycles");
+}
+void
+speed_cycletime_need_seconds (void)
+{
+ speed_time_init ();
+ if (speed_cycletime == 1.0)
+ speed_cycletime_fail
+ ("Need to know CPU frequency to convert cycles to seconds");
+}
diff --git a/gmp-6.3.0/tune/gcdext_double.c b/gmp-6.3.0/tune/gcdext_double.c
new file mode 100644
index 0000000..2b2ba15
--- /dev/null
+++ b/gmp-6.3.0/tune/gcdext_double.c
@@ -0,0 +1,38 @@
+/* mpn/generic/gcdext.c forced to use double limb calculations. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 0
+#define __gmpn_gcdext mpn_gcdext_double
+
+#include "../mpn/generic/gcdext.c"
diff --git a/gmp-6.3.0/tune/gcdext_single.c b/gmp-6.3.0/tune/gcdext_single.c
new file mode 100644
index 0000000..3c1d28c
--- /dev/null
+++ b/gmp-6.3.0/tune/gcdext_single.c
@@ -0,0 +1,38 @@
+/* mpn/generic/gcdext.c forced to use single limb calculations. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_gcdext mpn_gcdext_single
+
+#include "../mpn/generic/gcdext.c"
diff --git a/gmp-6.3.0/tune/gcdextod.c b/gmp-6.3.0/tune/gcdextod.c
new file mode 100644
index 0000000..f40cae6
--- /dev/null
+++ b/gmp-6.3.0/tune/gcdextod.c
@@ -0,0 +1,39 @@
+/* mpn/generic/gcdext.c forced to one double limb step. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 0
+#define WANT_GCDEXT_ONE_STEP 1
+#define __gmpn_gcdext mpn_gcdext_one_double
+
+#include "../mpn/generic/gcdext.c"
diff --git a/gmp-6.3.0/tune/gcdextos.c b/gmp-6.3.0/tune/gcdextos.c
new file mode 100644
index 0000000..f51ff52
--- /dev/null
+++ b/gmp-6.3.0/tune/gcdextos.c
@@ -0,0 +1,39 @@
+/* mpn/generic/gcdext.c forced to one single limb step. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD MP_SIZE_T_MAX
+#define WANT_GCDEXT_ONE_STEP 1
+#define __gmpn_gcdext mpn_gcdext_one_single
+
+#include "../mpn/generic/gcdext.c"
diff --git a/gmp-6.3.0/tune/hgcd2-1.c b/gmp-6.3.0/tune/hgcd2-1.c
new file mode 100644
index 0000000..1e8948c
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd2-1.c
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd2.c method 1.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD2_DIV1_METHOD
+#define HGCD2_DIV1_METHOD 1
+#define __gmpn_hgcd2 mpn_hgcd2_1
+/* Not used, but renamed to not get duplicate definitions */
+#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_1
+
+#include "mpn/generic/hgcd2.c"
diff --git a/gmp-6.3.0/tune/hgcd2-2.c b/gmp-6.3.0/tune/hgcd2-2.c
new file mode 100644
index 0000000..bbb123b
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd2-2.c
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd2.c method 2.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD2_DIV1_METHOD
+#define HGCD2_DIV1_METHOD 2
+#define __gmpn_hgcd2 mpn_hgcd2_2
+/* Not used, but renamed to not get duplicate definitions */
+#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_2
+
+#include "mpn/generic/hgcd2.c"
diff --git a/gmp-6.3.0/tune/hgcd2-3.c b/gmp-6.3.0/tune/hgcd2-3.c
new file mode 100644
index 0000000..ac62108
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd2-3.c
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd2.c method 3.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD2_DIV1_METHOD
+#define HGCD2_DIV1_METHOD 3
+#define __gmpn_hgcd2 mpn_hgcd2_3
+/* Not used, but renamed to not get duplicate definitions */
+#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_3
+
+#include "mpn/generic/hgcd2.c"
diff --git a/gmp-6.3.0/tune/hgcd2-4.c b/gmp-6.3.0/tune/hgcd2-4.c
new file mode 100644
index 0000000..ec7f927
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd2-4.c
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd2.c method 4.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD2_DIV1_METHOD
+#define HGCD2_DIV1_METHOD 4
+#define __gmpn_hgcd2 mpn_hgcd2_4
+/* Not used, but renamed to not get duplicate definitions */
+#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_4
+
+#include "mpn/generic/hgcd2.c"
diff --git a/gmp-6.3.0/tune/hgcd2-5.c b/gmp-6.3.0/tune/hgcd2-5.c
new file mode 100644
index 0000000..ed66171
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd2-5.c
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd2.c method 5.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD2_DIV1_METHOD
+#define HGCD2_DIV1_METHOD 5
+#define __gmpn_hgcd2 mpn_hgcd2_5
+/* Not used, but renamed to not get duplicate definitions */
+#define __gmpn_hgcd_mul_matrix1_vector mpn_hgcd_mul_matrix1_vector_5
+
+#include "mpn/generic/hgcd2.c"
diff --git a/gmp-6.3.0/tune/hgcd2.c b/gmp-6.3.0/tune/hgcd2.c
new file mode 100644
index 0000000..146af72
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd2.c
@@ -0,0 +1,49 @@
+/* mpn/generic/hgcd2.c for tuning
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define TUNE_PROGRAM_BUILD 1
+
+#include "gmp-impl.h"
+
+hgcd2_func_t mpn_hgcd2_default;
+
+hgcd2_func_t *hgcd2_func = &mpn_hgcd2_default;
+
+int
+mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+ struct hgcd_matrix1 *M)
+{
+ return hgcd2_func(ah, al, bh, bl, M);
+}
+
+#undef mpn_hgcd2
+#define mpn_hgcd2 mpn_hgcd2_default
+
+#include "mpn/generic/hgcd2.c"
diff --git a/gmp-6.3.0/tune/hgcd_appr_lehmer.c b/gmp-6.3.0/tune/hgcd_appr_lehmer.c
new file mode 100644
index 0000000..aa43a07
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd_appr_lehmer.c
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd_appr.c forced to use Lehmer's quadratic algorithm. */
+
+/*
+Copyright 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_appr mpn_hgcd_appr_lehmer
+#define __gmpn_hgcd_appr_itch mpn_hgcd_appr_lehmer_itch
+
+#include "../mpn/generic/hgcd_appr.c"
diff --git a/gmp-6.3.0/tune/hgcd_lehmer.c b/gmp-6.3.0/tune/hgcd_lehmer.c
new file mode 100644
index 0000000..364749d
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd_lehmer.c
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd.c forced to use Lehmer's quadratic algorithm. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD_THRESHOLD
+#define HGCD_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd mpn_hgcd_lehmer
+#define __gmpn_hgcd_itch mpn_hgcd_lehmer_itch
+
+#include "../mpn/generic/hgcd.c"
diff --git a/gmp-6.3.0/tune/hgcd_reduce_1.c b/gmp-6.3.0/tune/hgcd_reduce_1.c
new file mode 100644
index 0000000..5052233
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd_reduce_1.c
@@ -0,0 +1,40 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_1
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_1_itch
+
+
+#include "../mpn/generic/hgcd_reduce.c"
diff --git a/gmp-6.3.0/tune/hgcd_reduce_2.c b/gmp-6.3.0/tune/hgcd_reduce_2.c
new file mode 100644
index 0000000..5d802e0
--- /dev/null
+++ b/gmp-6.3.0/tune/hgcd_reduce_2.c
@@ -0,0 +1,39 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 0
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch
+
+#include "../mpn/generic/hgcd_reduce.c"
diff --git a/gmp-6.3.0/tune/hppa.asm b/gmp-6.3.0/tune/hppa.asm
new file mode 100644
index 0000000..fc9d62e
--- /dev/null
+++ b/gmp-6.3.0/tune/hppa.asm
@@ -0,0 +1,42 @@
+dnl HPPA 32-bit time stamp counter access routine.
+
+dnl Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl void speed_cyclecounter (unsigned p[2]);
+dnl
+dnl Get the HPPA interval timer.
+
+PROLOGUE(speed_cyclecounter)
+ mfctl %cr16,%r28
+ stw %r28,0(0,%r26)
+ bv 0(%r2)
+ stw %r0,4(0,%r26)
+EPILOGUE(speed_cyclecounter)
diff --git a/gmp-6.3.0/tune/hppa2.asm b/gmp-6.3.0/tune/hppa2.asm
new file mode 100644
index 0000000..57ef4c4
--- /dev/null
+++ b/gmp-6.3.0/tune/hppa2.asm
@@ -0,0 +1,44 @@
+dnl HPPA 64-bit time stamp counter access routine.
+
+dnl Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl void speed_cyclecounter (unsigned p[2]);
+dnl
+dnl Get the HPPA interval timer.
+
+ .level 2.0
+PROLOGUE(speed_cyclecounter)
+ mfctl %cr16,%r28
+ stw %r28,0(0,%r26) ; low word
+ extrd,u %r28,31,32,%r28
+ bve (%r2)
+ stw %r28,4(0,%r26) ; high word
+EPILOGUE(speed_cyclecounter)
diff --git a/gmp-6.3.0/tune/hppa2w.asm b/gmp-6.3.0/tune/hppa2w.asm
new file mode 100644
index 0000000..215a0cc
--- /dev/null
+++ b/gmp-6.3.0/tune/hppa2w.asm
@@ -0,0 +1,44 @@
+dnl HPPA 64-bit time stamp counter access routine.
+
+dnl Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl void speed_cyclecounter (unsigned p[2]);
+dnl
+dnl Get the HPPA interval timer.
+
+ .level 2.0w
+PROLOGUE(speed_cyclecounter)
+ mfctl %cr16,%r28
+ stw %r28,0(0,%r26) ; low word
+ extrd,u %r28,31,32,%r28
+ bve (%r2)
+ stw %r28,4(0,%r26) ; high word
+EPILOGUE(speed_cyclecounter)
diff --git a/gmp-6.3.0/tune/ia64.asm b/gmp-6.3.0/tune/ia64.asm
new file mode 100644
index 0000000..0651111
--- /dev/null
+++ b/gmp-6.3.0/tune/ia64.asm
@@ -0,0 +1,47 @@
+dnl IA-64 time stamp counter access routine.
+
+dnl Copyright 2000, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned int p[2]);
+C
+
+ASM_START()
+PROLOGUE(speed_cyclecounter)
+ mov r14 = ar.itc
+ ;;
+ st4 [r32] = r14, 4
+ shr.u r14 = r14, 32
+ ;;
+ st4 [r32] = r14
+ br.ret.sptk.many b0
+EPILOGUE(speed_cyclecounter)
+ASM_END()
diff --git a/gmp-6.3.0/tune/jacbase1.c b/gmp-6.3.0/tune/jacbase1.c
new file mode 100644
index 0000000..89a584d
--- /dev/null
+++ b/gmp-6.3.0/tune/jacbase1.c
@@ -0,0 +1,37 @@
+/* mpn/generic/jacbase.c method 1.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 1
+#define __gmpn_jacobi_base mpn_jacobi_base_1
+
+#include "mpn/generic/jacbase.c"
diff --git a/gmp-6.3.0/tune/jacbase2.c b/gmp-6.3.0/tune/jacbase2.c
new file mode 100644
index 0000000..253d835
--- /dev/null
+++ b/gmp-6.3.0/tune/jacbase2.c
@@ -0,0 +1,37 @@
+/* mpn/generic/jacbase.c method 2.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 2
+#define __gmpn_jacobi_base mpn_jacobi_base_2
+
+#include "mpn/generic/jacbase.c"
diff --git a/gmp-6.3.0/tune/jacbase3.c b/gmp-6.3.0/tune/jacbase3.c
new file mode 100644
index 0000000..4440f31
--- /dev/null
+++ b/gmp-6.3.0/tune/jacbase3.c
@@ -0,0 +1,37 @@
+/* mpn/generic/jacbase.c method 3.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 3
+#define __gmpn_jacobi_base mpn_jacobi_base_3
+
+#include "mpn/generic/jacbase.c"
diff --git a/gmp-6.3.0/tune/jacbase4.c b/gmp-6.3.0/tune/jacbase4.c
new file mode 100644
index 0000000..daea3bb
--- /dev/null
+++ b/gmp-6.3.0/tune/jacbase4.c
@@ -0,0 +1,37 @@
+/* mpn/generic/jacbase.c method 4.
+
+Copyright 2002, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 4
+#define __gmpn_jacobi_base mpn_jacobi_base_4
+
+#include "mpn/generic/jacbase.c"
diff --git a/gmp-6.3.0/tune/many.pl b/gmp-6.3.0/tune/many.pl
new file mode 100644
index 0000000..524a67d
--- /dev/null
+++ b/gmp-6.3.0/tune/many.pl
@@ -0,0 +1,1334 @@
+#! /usr/bin/perl -w
+
+# Copyright 2000-2002 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of either:
+#
+# * the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# or
+#
+# * the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# or both in parallel, as here.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received copies of the GNU General Public License and the
+# GNU Lesser General Public License along with the GNU MP Library. If not,
+# see https://www.gnu.org/licenses/.
+
+
+# Usage: cd $builddir/tune
+# perl $srcdir/tune/many.pl [-t] <files/dirs>...
+#
+# Output: speed-many.c
+# try-many.c
+# Makefile.many
+#
+# Make alternate versions of various mpn routines available for measuring
+# and testing.
+#
+# The $srcdir and $builddir in the invocation above just means the script
+# lives in the tune source directory, but should be run in the tune build
+# directory. When not using a separate object directory this just becomes
+#
+# cd tune
+# perl many.pl [-t] <files/dirs>...
+#
+#
+# SINGLE FILES
+#
+# Suppose $HOME/newcode/mul_1_experiment.asm is a new implementation of
+# mpn_mul_1, then
+#
+# cd $builddir/tune
+# perl $srcdir/tune/many.pl $HOME/newcode/mul_1_experiment.asm
+#
+# will produce rules and renaming so that a speed program incorporating it
+# can be built,
+#
+# make -f Makefile.many speed-many
+#
+# then for example it can be compared to the standard mul_1,
+#
+# ./speed-many -s 1-30 mpn_mul_1 mpn_mul_1_experiment
+#
+# An expanded try program can be used to check correctness,
+#
+# make -f Makefile.many try-many
+#
+# and run
+#
+# ./try-many mpn_mul_1_experiment
+#
+# Files can be ".c", ".S" or ".asm". ".s" files can't be used because they
+# don't get any preprocessing so there's no way to do renaming of their
+# functions.
+#
+#
+# WHOLE DIRECTORIES
+#
+# If a directory is given, then all files in it will be made available.
+# For example,
+#
+# cd $builddir/tune
+# perl $srcdir/tune/many.pl $HOME/newcode
+#
+# Each file should have a suffix, like "_experiment" above.
+#
+#
+# MPN DIRECTORIES
+#
+# mpn directories from the GMP source tree can be included, and this is a
+# convenient way to compare multiple implementations suiting different chips
+# in a CPU family. For example the following would make all x86 routines
+# available,
+#
+# cd $builddir/tune
+# perl $srcdir/tune/many.pl `find $srcdir/mpn/x86 -type d`
+#
+# On a new x86 chip a comparison could then be made to see how existing code
+# runs. For example,
+#
+# make -f Makefile.many speed-many
+# ./speed-many -s 1-30 -c \
+# mpn_add_n_x86 mpn_add_n_pentium mpn_add_n_k6 mpn_add_n_k7
+#
+# Files in "mpn" subdirectories don't need the "_experiment" style suffix
+# described above, instead a suffix is constructed from the subdirectory.
+# For example "mpn/x86/k7/mmx/mod_1.asm" will generate a function
+# mpn_mod_1_k7_mmx. The rule is to take the last directory name after the
+# "mpn", or the last two if there's three or more. (Check the generated
+# speed-many.c if in doubt.)
+#
+#
+# GENERIC C
+#
+# The mpn/generic directory can be included too, just like any processor
+# specific directory. This is a good way to compare assembler and generic C
+# implementations. For example,
+#
+# cd $builddir/tune
+# perl $srcdir/tune/many.pl $srcdir/mpn/generic
+#
+# or if just a few routines are of interest, then for example
+#
+# cd $builddir/tune
+# perl $srcdir/tune/many.pl \
+# $srcdir/mpn/generic/lshift.c \
+# $srcdir/mpn/generic/mod_1.c \
+# $srcdir/mpn/generic/aorsmul_1.c
+#
+# giving mpn_lshift_generic etc.
+#
+#
+# TESTS/DEVEL PROGRAMS
+#
+# Makefile.many also has rules to build the tests/devel programs with suitable
+# renaming, and with some parameters for correctness or speed. This is less
+# convenient than the speed and try programs, but provides an independent
+# check. For example,
+#
+# make -f Makefile.many tests_mul_1_experimental
+# ./tests_mul_1_experimental
+#
+# and for speed
+#
+# make -f Makefile.many tests_mul_1_experimental_sp
+# ./tests_mul_1_experimental_sp
+#
+# Not all the programs support speed measuring, in which case only the
+# correctness test will be useful.
+#
+# The parameters for repetitions and host clock speed are -D defines. Some
+# defaults are provided at the end of Makefile.many, but probably these will
+# want to be overridden. For example,
+#
+# rm tests_mul_1_experimental.o
+# make -f Makefile.many \
+# CFLAGS_TESTS="-DSIZE=50 -DTIMES=1000 -DRANDOM -DCLOCK=175000000" \
+# tests_mul_1_experimental
+# ./tests_mul_1_experimental
+#
+#
+# OTHER NOTES
+#
+# The mappings of file names to functions, and the macros to then use for
+# speed measuring etc are driven by @table below. The scheme isn't
+# completely general, it's only got as many variations as have been needed
+# so far.
+#
+# Some functions are only made available in speed-many, or others only in
+# try-many. An @table entry speed=>none means no speed measuring is
+# available, or try=>none no try program testing. These can be removed
+# if/when the respective programs get the necessary support.
+#
+# If a file has "1c" or "nc" carry-in entrypoints, they're renamed and made
+# available too. These are recognised from PROLOGUE or MULFUNC_PROLOGUE in
+# .S and .asm files, or from a line starting with "mpn_foo_1c" in a .c file
+# (possibly via a #define), and on that basis are entirely optional. This
+# entrypoint matching is done for the standard entrypoints too, but it would
+# be very unusual to have for instance a mul_1c without a mul_1.
+#
+# Some mpz files are recognized. For example an experimental copy of
+# mpz/powm.c could be included as powm_new.c and would be called
+# mpz_powm_new. So far only speed measuring is available for these.
+#
+# For the ".S" and ".asm" files, both PIC and non-PIC objects are built.
+# The PIC functions have a "_pic" suffix, for example "mpn_mod_1_k7_mmx_pic".
+# This can be ignored for routines that don't differ for PIC, or for CPUs
+# where everything is PIC anyway.
+#
+# K&R compilers are supported via the same ansi2knr mechanism used by
+# automake, though it's hard to believe anyone will have much interest in
+# measuring a compiler so old that it doesn't even have an ANSI mode.
+#
+# The "-t" option can be used to print a trace of the files found and what's
+# done with them. A great deal of obscure output is produced, but it can
+# indicate where or why some files aren't being recognised etc. For
+# example,
+#
+# cd $builddir/tune
+# perl $srcdir/tune/many.pl -t $HOME/newcode/add_n_weird.asm
+#
+# In general, when including new code, all that's really necessary is that
+# it will compile or assemble under the current configuration. It's fine if
+# some code doesn't actually run due to bugs, or to needing a newer CPU or
+# whatever, simply don't ask for the offending routines when invoking
+# speed-many or try-many, or don't try to run them on sizes they don't yet
+# support, or whatever.
+#
+#
+# CPU SPECIFICS
+#
+# x86 - All the x86 code will assemble on any system, but code for newer
+# chips might not run on older chips. Expect SIGILLs from new
+# instructions on old chips.
+#
+# A few "new" instructions, like cmov for instance, are done as macros
+# and will generate some equivalent plain i386 code when HAVE_HOST_CPU
+# in config.m4 indicates an old CPU. It won't run fast, but it does
+# make it possible to test correctness.
+#
+#
+# INTERNALS
+#
+# The nonsense involving $ENV is some hooks used during development to add
+# additional functions temporarily.
+#
+#
+# FUTURE
+#
+# Maybe the C files should be compiled pic and non-pic too. Wait until
+# there's a difference that might be of interest.
+#
+# Warn if a file provides no functions.
+#
+# Allow mpz and mpn files of the same name. Currently the mpn fib2_ui
+# matching hides the mpz version of that. Will need to check the file
+# contents to see which it is. Would be worth allowing an "mpz_" or "mpn_"
+# prefix on the filenames to have working versions of both in one directory.
+#
+#
+# LIMITATIONS
+#
+# Some of the command lines can become very long when a lot of files are
+# included. If this is a problem on a given system the only suggestion is
+# to run many.pl for just those that are actually wanted at a particular
+# time.
+#
+# DOS 8.3 or SysV 14 char filesystems won't work, since the long filenames
+# generated will almost certainly fail to be unique.
+
+
+use strict;
+use File::Basename;
+use Getopt::Std;
+
+my %opt;
+getopts('t', \%opt);
+
+my @DIRECTORIES = @ARGV;
+if (defined $ENV{directories}) { push @DIRECTORIES, @{$ENV{directories}} }
+
+
+# regexp - matched against the start of the filename. If a grouping "(...)"
+# is present then only the first such part is used.
+#
+# mulfunc - filenames to be generated from a multi-function file.
+#
+# funs - functions provided by the file, defaulting to the filename with mpn
+# (or mpX).
+#
+# mpX - prefix like "mpz", defaulting to "mpn".
+#
+# ret - return value type.
+#
+# args, args_<fun> - arguments for the given function. If an args_<fun> is
+# set then it's used, otherwise plain args is used. "mp_limb_t
+# carry" is appended for carry-in variants.
+#
+# try - try.c TYPE_ to use, defaulting to TYPE_fun with the function name
+# in upper case. "C" is appended for carry-in variants. Can be
+# 'none' for no try program entry.
+#
+# speed - SPEED_ROUTINE_ to use, handled like "try".
+#
+# speed_flags - SPEED_ROUTINE_ to use, handled like "try".
+
+
+my @table =
+ (
+ {
+ 'regexp'=> 'add_n|sub_n|addlsh1_n|sublsh1_n|rsh1add_n|rsh1sub_n',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+ 'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ },
+ {
+ 'regexp'=> 'aors_n',
+ 'mulfunc'=> ['add_n','sub_n'],
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+ 'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ },
+
+ {
+ 'regexp'=> 'addmul_1|submul_1',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
+ 'speed_flags'=> 'FLAG_R',
+ },
+ {
+ 'regexp'=> 'aorsmul_1',
+ 'mulfunc'=> ['addmul_1','submul_1'],
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
+ 'speed_flags'=> 'FLAG_R',
+ },
+
+ {
+ 'regexp'=> 'addmul_2|submul_2',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_2',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'try-minsize' => 2,
+ },
+ {
+ 'regexp'=> 'addmul_3|submul_3',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_3',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'try-minsize' => 3,
+ },
+ {
+ 'regexp'=> 'addmul_4|submul_4',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_4',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'try-minsize' => 4,
+ },
+ {
+ 'regexp'=> 'addmul_5|submul_5',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_5',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'try-minsize' => 5,
+ },
+ {
+ 'regexp'=> 'addmul_6|submul_6',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_6',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'try-minsize' => 6,
+ },
+ {
+ 'regexp'=> 'addmul_7|submul_7',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_7',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'try-minsize' => 7,
+ },
+ {
+ 'regexp'=> 'addmul_8|submul_8',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr yp',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_8',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'try-minsize' => 8,
+ },
+
+ {
+ 'regexp'=> 'add_n_sub_n',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr sum, mp_ptr diff, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ },
+
+ {
+ 'regexp'=> 'com|copyi|copyd',
+ 'ret' => 'void',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
+ 'speed' => 'SPEED_ROUTINE_MPN_COPY',
+ },
+
+ {
+ 'regexp'=> 'dive_1',
+ 'funs' => ['divexact_1'],
+ 'ret' => 'void',
+ 'args' => 'mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor',
+ 'speed_flags'=> 'FLAG_R',
+ },
+ {
+ 'regexp'=> 'diveby3',
+ 'funs' => ['divexact_by3c'],
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr dst, mp_srcptr src, mp_size_t size',
+ 'carrys'=> [''],
+ 'speed' => 'SPEED_ROUTINE_MPN_COPY',
+ },
+
+ # mpn_preinv_divrem_1 is an optional extra entrypoint
+ {
+ 'regexp'=> 'divrem_1',
+ 'funs' => ['divrem_1', 'preinv_divrem_1'],
+ 'ret' => 'mp_limb_t',
+ 'args_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor',
+ 'args_preinv_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse, unsigned shift',
+ 'speed_flags'=> 'FLAG_R',
+ 'speed_suffixes' => ['f'],
+ },
+ {
+ 'regexp'=> 'pre_divrem_1',
+ 'funs' => ['preinv_divrem_1'],
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr qp, mp_size_t qxn, mp_srcptr ap, mp_size_t asize, mp_limb_t divisor, mp_limb_t inverse, int shift',
+ 'speed_flags' => 'FLAG_R',
+ },
+
+ {
+ 'regexp'=> 'divrem_2',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr qp, mp_size_t qxn, mp_srcptr np, mp_size_t nsize, mp_srcptr dp',
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'sb_divrem_mn',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr qp, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize',
+ 'speed' => 'SPEED_ROUTINE_MPN_DC_DIVREM_SB',
+ 'try-minsize' => 3,
+ },
+ {
+ 'regexp'=> 'tdiv_qr',
+ 'ret' => 'void',
+ 'args' => 'mp_ptr qp, mp_size_t qxn, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize',
+ 'speed' => 'none',
+ },
+
+ {
+ 'regexp'=> 'get_str',
+ 'ret' => 'size_t',
+ 'args' => 'unsigned char *str, int base, mp_ptr mptr, mp_size_t msize',
+ 'speed_flags' => 'FLAG_R_OPTIONAL',
+ 'try' => 'none',
+ },
+ {
+ 'regexp'=> 'set_str',
+ 'ret' => 'mp_size_t',
+ 'args' => 'mp_ptr xp, const unsigned char *str, size_t str_len, int base',
+ 'speed_flags' => 'FLAG_R_OPTIONAL',
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'fac_ui',
+ 'mpX' => 'mpz',
+ 'ret' => 'void',
+ 'args' => 'mpz_ptr r, unsigned long n',
+ 'speed_flags' => 'FLAG_NODATA',
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'fib2_ui',
+ 'ret' => 'void',
+ 'args' => 'mp_ptr fp, mp_ptr f1p, unsigned long n',
+ 'rename'=> ['__gmp_fib_table'],
+ 'speed_flags' => 'FLAG_NODATA',
+ 'try' => 'none',
+ },
+ {
+ 'regexp'=> 'fib_ui',
+ 'mpX' => 'mpz',
+ 'ret' => 'void',
+ 'args' => 'mpz_ptr fn, unsigned long n',
+ 'speed_flags' => 'FLAG_NODATA',
+ 'try' => 'none',
+ },
+ {
+ 'regexp'=> 'fib2_ui',
+ 'mpX' => 'mpz',
+ 'ret' => 'void',
+ 'args' => 'mpz_ptr fn, mpz_ptr fnsub1, unsigned long n',
+ 'speed_flags' => 'FLAG_NODATA',
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'lucnum_ui',
+ 'mpX' => 'mpz',
+ 'ret' => 'void',
+ 'args' => 'mpz_ptr ln, unsigned long n',
+ 'speed_flags' => 'FLAG_NODATA',
+ 'try' => 'none',
+ },
+ {
+ 'regexp'=> 'lucnum2_ui',
+ 'mpX' => 'mpz',
+ 'ret' => 'void',
+ 'args' => 'mpz_ptr ln, mpz_ptr lnsub1, unsigned long n',
+ 'speed_flags' => 'FLAG_NODATA',
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'gcd_1',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr xp, mp_size_t xsize, mp_limb_t y',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'speed_suffixes' => ['N'],
+ },
+ {
+ 'regexp'=> '(gcd)(?!(_1|ext|_finda))',
+ 'ret' => 'mp_size_t',
+ 'args' => 'mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize',
+ },
+ {
+ 'regexp'=> 'gcd_finda',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_srcptr cp',
+ },
+
+
+ {
+ 'regexp'=> 'jacobi',
+ 'funs' => ['jacobi', 'legendre', 'kronecker'],
+ 'mpX' => 'mpz',
+ 'ret' => 'int',
+ 'args' => 'mpz_srcptr a, mpz_srcptr b',
+ 'try-legendre' => 'TYPE_MPZ_JACOBI',
+ },
+ {
+ 'regexp'=> 'jacbase',
+ 'funs' => ['jacobi_base'],
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_limb_t a, mp_limb_t b, int bit1',
+ 'speed' => 'SPEED_ROUTINE_MPN_JACBASE',
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'logops_n',
+ 'mulfunc'=> ['and_n','andn_n','nand_n','ior_n','iorn_n','nior_n','xor_n','xnor_n'],
+ 'ret' => 'void',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+ 'speed' => 'SPEED_ROUTINE_MPN_BINARY_N',
+ },
+
+ {
+ 'regexp'=> '[lr]shift',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
+ 'speed_flags'=> 'FLAG_R',
+ },
+
+ # mpn_preinv_mod_1 is an optional extra entrypoint
+ {
+ 'regexp'=> '(mod_1)(?!_rs)',
+ 'funs' => ['mod_1','preinv_mod_1'],
+ 'ret' => 'mp_limb_t',
+ 'args_mod_1' => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor',
+ 'args_preinv_mod_1'=> 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse',
+ 'speed_flags'=> 'FLAG_R',
+ },
+ {
+ 'regexp'=> 'pre_mod_1',
+ 'funs' => ['preinv_mod_1'],
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse',
+ 'speed_flags'=> 'FLAG_R',
+ },
+ {
+ 'regexp'=> 'mod_34lsub1',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_srcptr src, mp_size_t len',
+ },
+ {
+ 'regexp'=> 'invert_limb',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_limb_t divisor',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'try' => 'none',
+ },
+
+ {
+ # not for use with hppa reversed argument versions of mpn_umul_ppmm
+ 'regexp'=> 'udiv',
+ 'funs' => ['udiv_qrnnd','udiv_qrnnd_r'],
+ 'ret' => 'mp_limb_t',
+ 'args_udiv_qrnnd' => 'mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t',
+ 'args_udiv_qrnnd_r' => 'mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *',
+ 'speed' => 'none',
+ 'try-minsize' => 2,
+ },
+
+ {
+ 'regexp'=> 'mode1o',
+ 'funs' => ['modexact_1_odd'],
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_srcptr src, mp_size_t size, mp_limb_t divisor',
+ 'speed_flags'=> 'FLAG_R',
+ },
+ {
+ 'regexp'=> 'modlinv',
+ 'funs' => ['modlimb_invert'],
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_limb_t v',
+ 'carrys'=> [''],
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'mul_1',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_1',
+ 'speed_flags'=> 'FLAG_R',
+ },
+ {
+ 'regexp'=> 'mul_2',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr mult',
+ 'speed' => 'SPEED_ROUTINE_MPN_UNARY_2',
+ 'speed_flags'=> 'FLAG_R',
+ },
+
+ {
+ 'regexp'=> 'mul_basecase',
+ 'ret' => 'void',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize',
+ 'speed_flags' => 'FLAG_R_OPTIONAL | FLAG_RSIZE',
+ },
+ {
+ 'regexp'=> '(mul_n)[_.]',
+ 'ret' => 'void',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+ 'rename'=> ['kara_mul_n','kara_sqr_n','toom3_mul_n','toom3_sqr_n'],
+ },
+ {
+ 'regexp'=> 'umul',
+ 'funs' => ['umul_ppmm','umul_ppmm_r'],
+ 'ret' => 'mp_limb_t',
+ 'args_umul_ppmm' => 'mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2',
+ 'args_umul_ppmm_r' => 'mp_limb_t m1, mp_limb_t m2, mp_limb_t *lowptr',
+ 'speed' => 'none',
+ 'try-minsize' => 3,
+ },
+
+
+ {
+ 'regexp'=> 'popham',
+ 'mulfunc'=> ['popcount','hamdist'],
+ 'ret' => 'unsigned long',
+ 'args_popcount'=> 'mp_srcptr xp, mp_size_t size',
+ 'args_hamdist' => 'mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+ },
+ {
+ 'regexp'=> 'popcount',
+ 'ret' => 'unsigned long',
+ 'args' => 'mp_srcptr xp, mp_size_t size',
+ },
+ {
+ 'regexp'=> 'hamdist',
+ 'ret' => 'unsigned long',
+ 'args' => 'mp_srcptr xp, mp_srcptr yp, mp_size_t size',
+ # extra renaming to support sharing a data table with mpn_popcount
+ 'rename'=> ['popcount'],
+ },
+
+ {
+ 'regexp'=> 'sqr_basecase',
+ 'ret' => 'void',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
+ 'speed' => 'SPEED_ROUTINE_MPN_SQR',
+ 'try' => 'TYPE_SQR',
+ },
+ {
+ 'regexp'=> 'sqr_diagonal',
+ 'ret' => 'void',
+ 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'sqrtrem',
+ 'ret' => 'mp_size_t',
+ 'args' => 'mp_ptr root, mp_ptr rem, mp_srcptr src, mp_size_t size',
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'cntlz',
+ 'funs' => ['count_leading_zeros'],
+ 'ret' => 'unsigned',
+ 'args' => 'mp_limb_t',
+ 'macro-before' => "#undef COUNT_LEADING_ZEROS_0",
+ 'macro-speed' =>
+'#ifdef COUNT_LEADING_ZEROS_0
+#define COUNT_LEADING_ZEROS_0_ALLOWED 1
+#else
+#define COUNT_LEADING_ZEROS_0_ALLOWED 0
+#endif
+ SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED);
+ $fun (c, n);
+ SPEED_ROUTINE_COUNT_ZEROS_B ()',
+ 'speed_flags'=> 'FLAG_R_OPTIONAL',
+ 'try' => 'none',
+ },
+ {
+ 'regexp'=> 'cnttz',
+ 'funs' => ['count_trailing_zeros'],
+ 'ret' => 'unsigned',
+ 'args' => 'mp_limb_t',
+ 'macro-speed' => '
+ SPEED_ROUTINE_COUNT_ZEROS_A (0, 0);
+ $fun (c, n);
+ SPEED_ROUTINE_COUNT_ZEROS_B ()',
+ 'speed_flags' => 'FLAG_R_OPTIONAL',
+ 'try' => 'none',
+ },
+
+ {
+ 'regexp'=> 'zero',
+ 'ret' => 'void',
+ 'args' => 'mp_ptr ptr, mp_size_t size',
+ },
+
+ {
+ 'regexp'=> '(powm)(?!_ui)',
+ 'mpX' => 'mpz',
+ 'ret' => 'void',
+ 'args' => 'mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m',
+ 'try' => 'none',
+ },
+ {
+ 'regexp'=> 'powm_ui',
+ 'mpX' => 'mpz',
+ 'ret' => 'void',
+ 'args' => 'mpz_ptr r, mpz_srcptr b, unsigned long e, mpz_srcptr m',
+ 'try' => 'none',
+ },
+
+ # special for use during development
+ {
+ 'regexp'=> 'back',
+ 'funs' => ['back_to_back'],
+ 'ret' => 'void',
+ 'args' => 'void',
+ 'pic' => 'no',
+ 'try' => 'none',
+ 'speed_flags'=> 'FLAG_NODATA',
+ },
+ );
+
+if (defined $ENV{table2}) {
+ my @newtable = @{$ENV{table2}};
+ push @newtable, @table;
+ @table = @newtable;
+}
+
+
+my %pictable =
+ (
+ 'yes' => {
+ 'suffix' => '_pic',
+ 'asmflags'=> '$(ASMFLAGS_PIC)',
+ 'cflags' => '$(CFLAGS_PIC)',
+ },
+ 'no' => {
+ 'suffix' => '',
+ 'asmflags'=> '',
+ 'cflags' => '',
+ },
+ );
+
+
+my $builddir = $ENV{builddir};
+$builddir = "." if (! defined $builddir);
+
+my $top_builddir = "${builddir}/..";
+
+
+open(MAKEFILE, "<${builddir}/Makefile")
+ or die "Cannot open ${builddir}/Makefile: $!\n"
+ . "Is this a tune build directory?";
+my ($srcdir, $top_srcdir);
+while (<MAKEFILE>) {
+ if (/^srcdir = (.*)/) { $srcdir = $1; }
+ if (/^top_srcdir = (.*)/) { $top_srcdir = $1; }
+}
+die "Cannot find \$srcdir in Makefile\n" if (! defined $srcdir);
+die "Cannot find \$top_srcdir in Makefile\n" if (! defined $top_srcdir);
+print "srcdir $srcdir\n" if $opt{'t'};
+print "top_srcdir $top_srcdir\n" if $opt{'t'};
+close(MAKEFILE);
+
+
+open(SPEED, ">speed-many.c") or die;
+print SPEED
+"/* speed-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */
+
+";
+my $SPEED_EXTRA_ROUTINES = "#define SPEED_EXTRA_ROUTINES \\\n";
+my $SPEED_EXTRA_PROTOS = "#define SPEED_EXTRA_PROTOS \\\n";
+my $SPEED_CODE = "";
+
+open(TRY, ">try-many.c") or die;
+print TRY
+ "/* try-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */\n" .
+ "\n";
+my $TRY_EXTRA_ROUTINES = "#define EXTRA_ROUTINES \\\n";
+my $TRY_EXTRA_PROTOS = "#define EXTRA_PROTOS \\\n";
+
+open(FD,"<${top_builddir}/libtool") or die "Cannot open \"${top_builddir}/libtool\": $!\n";
+my $pic_flag;
+while (<FD>) {
+ if (/^pic_flag="?([^"]*)"?$/) {
+ $pic_flag=$1;
+ last;
+ }
+}
+close FD;
+if (! defined $pic_flag) {
+ die "Cannot find pic_flag in ${top_builddir}/libtool";
+}
+
+my $CFLAGS_PIC = $pic_flag;
+
+my $ASMFLAGS_PIC = "";
+foreach (split /[ \t]/, $pic_flag) {
+ if (/^-D/) {
+ $ASMFLAGS_PIC .= " " . $_;
+ }
+}
+
+open(MAKEFILE, ">Makefile.many") or die;
+print MAKEFILE
+ "# Makefile.many generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST\n" .
+ "\n" .
+ "all: speed-many try-many\n" .
+ "\n" .
+ "#--------- begin included copy of basic Makefile ----------\n" .
+ "\n";
+open(FD,"<${builddir}/Makefile") or die "Cannot open \"${builddir}/Makefile\": $!\n";
+print MAKEFILE <FD>;
+close FD;
+print MAKEFILE
+ "\n" .
+ "#--------- end included copy of basic Makefile ----------\n" .
+ "\n" .
+ "CFLAGS_PIC = $CFLAGS_PIC\n" .
+ "ASMFLAGS_PIC = $ASMFLAGS_PIC\n" .
+ "\n";
+
+my $CLEAN="";
+my $MANY_OBJS="";
+
+
+sub print_ansi2knr {
+ my ($base,$file,$includes) = @_;
+ if (! defined $file) { $file = "$base.c"; }
+ if (! defined $includes) { $includes = ""; }
+
+ print MAKEFILE <<EOF;
+${base}_.c: $file \$(ANSI2KNR)
+ \$(CPP) \$(DEFS) \$(INCLUDES) $includes \$(AM_CPPFLAGS) \$(CPPFLAGS) $file | sed 's/^# \([0-9]\)/#line \\1/' | \$(ANSI2KNR) >${base}_.c
+
+EOF
+}
+
+
+# Spawning a glob is a touch slow when there's lots of files.
+my @files = ();
+foreach my $dir (@DIRECTORIES) {
+ print "dir $dir\n" if $opt{'t'};
+ if (-f $dir) {
+ push @files,$dir;
+ } else {
+ if (! opendir DD,$dir) {
+ print "Cannot open $dir: $!\n";
+ } else {
+ push @files, map {$_="$dir/$_"} grep /\.(c|asm|S|h)$/, readdir DD;
+ closedir DD;
+ }
+ }
+}
+@files = sort @files;
+print "@files ",join(" ",@files),"\n" if $opt{'t'};
+
+my $count_files = 0;
+my $count_functions = 0;
+my %seen_obj;
+my %seen_file;
+
+foreach my $file_full (@files) {
+ if (! -f $file_full) {
+ print "Not a file: $file_full\n";
+ next;
+ }
+ if (defined $seen_file{$file_full}) {
+ print "Skipping duplicate file: $file_full\n";
+ next;
+ }
+ $seen_file{$file_full} = 1;
+
+ my ($FILE,$path,$lang) = fileparse($file_full,"\.[a-zA-Z]+");
+ $path =~ s/\/$//;
+ print "file $FILE path $path lang $lang\n" if $opt{'t'};
+
+ my @pic_choices;
+ if ($lang eq '.asm') { @pic_choices=('no','yes'); }
+ elsif ($lang eq '.c') { @pic_choices=('no'); }
+ elsif ($lang eq '.S') { @pic_choices=('no','yes'); }
+ elsif ($lang eq '.h') { @pic_choices=('no'); }
+ else { next };
+
+ my ($t, $file_match);
+ foreach my $p (@table) {
+ # print " ",$p->{'regexp'},"\n" if $opt{'t'};
+ if ($FILE =~ "^($p->{'regexp'})") {
+ $t = $p;
+ $file_match = $1;
+ $file_match = $2 if defined $2;
+ last;
+ }
+ }
+ next if ! defined $t;
+ print "match $t->{'regexp'} $FILE ($file_full)\n" if $opt{'t'};
+
+ if (! open FD,"<$file_full") { print "Can't open $file_full: $!\n"; next }
+ my @file_contents = <FD>;
+ close FD;
+
+ my $objs;
+ if (defined $t->{'mulfunc'}) { $objs = $t->{'mulfunc'}; }
+ else { $objs = [$file_match]; }
+ print "objs @$objs\n" if $opt{'t'};
+
+ my $ret = $t->{'ret'};
+ if (! defined $ret && $lang eq '.h') { $ret = ''; }
+ if (! defined $ret) { die "$FILE return type not defined\n" };
+ print "ret $ret\n" if $opt{'t'};
+
+ my $mpX = $t->{'mpX'};
+ if (! defined $mpX) { $mpX = ($lang eq '.h' ? '' : 'mpn'); }
+ $mpX = "${mpX}_" if $mpX ne '';
+ print "mpX $mpX\n" if $opt{'t'};
+
+ my $carrys;
+ if (defined $t->{'carrys'}) { $carrys = $t->{'carrys'}; }
+ else { $carrys = ['','c']; }
+ print "carrys $carrys @$carrys\n" if $opt{'t'};
+
+ # some restriction functions are implemented, but they're not very useful
+ my $restriction='';
+
+ my $suffix;
+ if ($FILE =~ ("${file_match}_(.+)")) {
+ $suffix = $1;
+ } elsif ($path =~ /\/mp[zn]\/(.*)$/) {
+ # derive the suffix from the path
+ $suffix = $1;
+ $suffix =~ s/\//_/g;
+ # use last directory name, or if there's 3 or more then the last two
+ if ($suffix =~ /([^_]*_)+([^_]+_[^_]+)$/) {
+ $suffix = $2;
+ } elsif ($suffix =~ /([^_]*_)*([^_]+)$/) {
+ $suffix = $2;
+ }
+ } else {
+ die "Can't determine suffix for: $file_full (path $path)\n";
+ }
+ print "suffix $suffix\n" if $opt{'t'};
+
+ $count_files++;
+
+ foreach my $obj (@{$objs}) {
+ print "obj $obj\n" if $opt{'t'};
+
+ my $obj_with_suffix = "${obj}_$suffix";
+ if (defined $seen_obj{$obj_with_suffix}) {
+ print "Skipping duplicate object: $obj_with_suffix\n";
+ print " first from: $seen_obj{$obj_with_suffix}\n";
+ print " now from: $file_full\n";
+ next;
+ }
+ $seen_obj{$obj_with_suffix} = $file_full;
+
+ my $funs = $t->{'funs'};
+ $funs = [$obj] if ! defined $funs;
+ print "funs @$funs\n" if $opt{'t'};
+
+ if (defined $t->{'pic'}) { @pic_choices = ('no'); }
+
+ foreach my $pic (map {$pictable{$_}} @pic_choices) {
+ print "pic $pic->{'suffix'}\n" if $opt{'t'};
+
+ my $objbase = "${obj}_$suffix$pic->{'suffix'}";
+ print "objbase $objbase\n" if $opt{'t'};
+
+ if ($path !~ "." && -f "${objbase}.c") {
+ die "Already have ${objbase}.c";
+ }
+
+ my $tmp_file = "tmp-$objbase.c";
+
+ my $renaming;
+ foreach my $fun (@{$funs}) {
+ if ($mpX eq 'mpn_' && $lang eq '.c') {
+ $renaming .= "\t\t-DHAVE_NATIVE_mpn_$fun=1 \\\n";
+ }
+
+ # The carry-in variant is with a "c" appended, unless there's a "_1"
+ # somewhere, eg. "modexact_1_odd", in which case that becomes "_1c".
+ my $fun_carry = $fun;
+ if (! ($fun_carry =~ s/_1/_1c/)) { $fun_carry = "${fun}c"; }
+
+ $renaming .=
+ "\t\t-D__g$mpX$fun=$mpX${fun}_$suffix$pic->{'suffix'} \\\n" .
+ "\t\t-D__g$mpX$fun_carry=$mpX${fun_carry}_$suffix$pic->{'suffix'} \\\n";
+ }
+ foreach my $r (@{$t->{'rename'}}) {
+ if ($r =~ /^__gmp/) {
+ $renaming .= "\\\n" .
+ "\t\t-D$r=${r}_$suffix$pic->{'suffix'}";
+ } else {
+ $renaming .= "\\\n" .
+ "\t\t-D__g$mpX$r=$mpX${r}_$suffix$pic->{'suffix'}";
+ }
+ }
+ print "renaming $renaming\n" if $opt{'t'};
+
+ print MAKEFILE "\n";
+ if ($lang eq '.asm') {
+ print MAKEFILE
+ "$objbase.o: $file_full \$(ASM_HEADERS)\n" .
+ " \$(M4) \$(M4FLAGS) -DOPERATION_$obj $pic->{'asmflags'} \\\n" .
+ "$renaming" .
+ " $file_full >tmp-$objbase.s\n" .
+ " \$(CCAS) \$(COMPILE_FLAGS) $pic->{'cflags'} tmp-$objbase.s -o $objbase.o\n" .
+ " \$(RM_TMP) tmp-$objbase.s\n";
+ $MANY_OBJS .= " $objbase.o";
+
+ } elsif ($lang eq '.c') {
+ print MAKEFILE
+ "$objbase.o: $file_full\n" .
+ " \$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\n" .
+ "$renaming" .
+ " -c $file_full -o $objbase.o\n";
+ print_ansi2knr($objbase,
+ $file_full,
+ " -DOPERATION_$obj\\\n$renaming\t\t");
+ $MANY_OBJS .= " $objbase\$U.o";
+
+ } elsif ($lang eq '.S') {
+ print MAKEFILE
+ "$objbase.o: $file_full\n" .
+ " \$(COMPILE) -g $pic->{'asmflags'} \\\n" .
+ "$renaming" .
+ " -c $file_full -o $objbase.o\n";
+ $MANY_OBJS .= " $objbase.o";
+
+ } elsif ($lang eq '.h') {
+ print MAKEFILE
+ "$objbase.o: tmp-$objbase.c $file_full\n" .
+ " \$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\n" .
+ "$renaming" .
+ " -c tmp-$objbase.c -o $objbase.o\n";
+ print_ansi2knr($objbase,
+ "tmp-$objbase.c",
+ " -DOPERATION_$obj\\\n$renaming\t\t");
+ $MANY_OBJS .= " $objbase\$U.o";
+
+ $CLEAN .= " tmp-$objbase.c";
+ open(TMP_C,">tmp-$objbase.c")
+ or die "Can't create tmp-$objbase.c: $!\n";
+ print TMP_C
+"/* tmp-$objbase.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */
+
+#include \"gmp.h\"
+#include \"gmp-impl.h\"
+#include \"longlong.h\"
+#include \"speed.h\"
+
+";
+ }
+
+ my $tests_program = "$top_srcdir/tests/devel/$obj.c";
+ if (-f $tests_program) {
+ $tests_program = "\$(top_srcdir)/tests/devel/$obj.c";
+ print_ansi2knr("tests_${objbase}",
+ $tests_program,
+ "\\\n$renaming\t\t\$(CFLAGS_TESTS_SP)");
+ print_ansi2knr("tests_${objbase}_sp",
+ $tests_program,
+ "\\\n$renaming\t\t\$(CFLAGS_TESTS_SP)");
+
+ print MAKEFILE <<EOF;
+tests_$objbase.o: $tests_program
+ \$(COMPILE) \$(CFLAGS_TESTS) \\
+$renaming -c $tests_program -o tests_$objbase.o
+
+tests_$objbase: $objbase\$U.o tests_$objbase\$U.o ../libgmp.la
+ \$(LINK) tests_$objbase\$U.o $objbase\$U.o ../libgmp.la -o tests_$objbase
+
+tests_${objbase}_sp.o: $tests_program
+ \$(COMPILE) \$(CFLAGS_TESTS_SP) \\
+$renaming -c $tests_program -o tests_${objbase}_sp.o
+
+tests_${objbase}_sp: $objbase\$U.o tests_${objbase}_sp\$U.o ../libgmp.la
+ \$(LINK) tests_${objbase}_sp\$U.o $objbase\$U.o ../libgmp.la -o tests_${objbase}_sp
+
+EOF
+ $CLEAN .= " tests_$objbase tests_${objbase}_sp";
+ }
+
+ foreach my $fun (@{$funs}) {
+ print "fun $fun\n" if $opt{'t'};
+
+ if ($lang eq '.h') {
+ my $macro_before = $t->{'macro_before'};
+ $macro_before = "" if ! defined $macro_before;
+ print TMP_C
+"$macro_before
+#undef $fun
+#include \"$file_full\"
+
+";
+ }
+
+ my $args = $t->{"args_$fun"};
+ if (! defined $args) { $args = $t->{'args'}; }
+ if (! defined $args) { die "Need args for $fun\n"; }
+ print "args $args\n" if $opt{'t'};
+
+ foreach my $carry (@$carrys) {
+ print "carry $carry\n" if $opt{'t'};
+
+ my $fun_carry = $fun;
+ if (! ($fun_carry =~ s/_1/_1$carry/)) { $fun_carry = "$fun$carry"; }
+ print "fun_carry $fun_carry\n" if $opt{'t'};
+
+ if ($lang =~ /\.(asm|S)/
+ && ! grep(m"PROLOGUE\((.* )?$mpX$fun_carry[ ,)]",@file_contents)) {
+ print "no PROLOGUE $mpX$fun_carry\n" if $opt{'t'};
+ next;
+ }
+ if ($lang eq '.c'
+ && ! grep(m"^(#define FUNCTION\s+)?$mpX$fun_carry\W", @file_contents)) {
+ print "no mention of $mpX$fun_carry\n" if $opt{'t'};
+ next;
+ }
+ if ($lang eq '.h'
+ && ! grep(m"^#define $fun_carry\W", @file_contents)) {
+ print "no mention of #define $fun_carry\n" if $opt{'t'};
+ next;
+ }
+
+ $count_functions++;
+
+ my $carryarg;
+ if (defined $t->{'carryarg'}) { $carryarg = $t->{'carryarg'}; }
+ if ($carry eq '') { $carryarg = ''; }
+ else { $carryarg = ', mp_limb_t carry'; }
+ print "carryarg $carryarg\n" if $opt{'t'};
+
+ my $funfull="$mpX${fun_carry}_$suffix$pic->{'suffix'}";
+ print "funfull $funfull\n" if $opt{'t'};
+
+ if ($lang ne '.h') {
+ my $proto = "$t->{'ret'} $funfull _PROTO (($args$carryarg)); \\\n";
+ $SPEED_EXTRA_PROTOS .= $proto;
+ $TRY_EXTRA_PROTOS .= $proto;
+ }
+
+ my $try_type = $t->{"try-$fun"};
+ $try_type = $t->{'try'} if ! defined $try_type;
+ if (! defined $try_type) {
+ if ($mpX eq 'mpn_') {
+ $try_type = "TYPE_\U$fun_carry";
+ } else {
+ $try_type = "TYPE_\U$mpX\U$fun_carry";
+ }
+ }
+ print "try_type $try_type\n" if $opt{'t'};
+
+ my $try_minsize = $t->{'try-minsize'};
+ if (defined $try_minsize) {
+ $try_minsize = ", " . $try_minsize;
+ } else {
+ $try_minsize = "";
+ }
+ print "try_minsize $try_minsize\n" if $opt{'t'};
+
+ if ($try_type ne 'none') {
+ $TRY_EXTRA_ROUTINES .=
+ " { TRY($mpX${fun_carry}_$suffix$pic->{'suffix'}), $try_type$try_minsize }, \\\n";
+ }
+
+ my $speed_flags = $t->{'speed_flags'};
+ $speed_flags = '0' if ! defined $speed_flags;
+ print "speed_flags $speed_flags\n" if $opt{'t'};
+
+ my $speed_routine = $t->{'speed'};
+ $speed_routine = "SPEED_ROUTINE_\U$mpX\U$fun"
+ if !defined $speed_routine;
+ if (! ($speed_routine =~ s/_1/_1\U$carry/)) {
+ $speed_routine = "$speed_routine\U$carry";
+ }
+ print "speed_routine $speed_routine\n" if $opt{'t'};
+
+ my @speed_suffixes = ();
+ push (@speed_suffixes, '') if $speed_routine ne 'none';
+ push (@speed_suffixes, @{$t->{'speed_suffixes'}})
+ if defined $t->{'speed_suffixes'};
+
+ my $macro_speed = $t->{'macro-speed'};
+ $macro_speed = "$speed_routine ($fun_carry)" if ! defined $macro_speed;
+ $macro_speed =~ s/\$fun/$fun_carry/g;
+
+ foreach my $S (@speed_suffixes) {
+ my $Sfunfull="$mpX${fun_carry}${S}_$suffix$pic->{'suffix'}";
+
+ $SPEED_EXTRA_PROTOS .=
+ "double speed_$Sfunfull _PROTO ((struct speed_params *s)); \\\n";
+ $SPEED_EXTRA_ROUTINES .=
+ " { \"$Sfunfull\", speed_$Sfunfull, $speed_flags }, \\\n";
+ if ($lang eq '.h') {
+ print TMP_C
+"double
+speed_$Sfunfull (struct speed_params *s)
+{
+$macro_speed
+}
+
+";
+ } else {
+ $SPEED_CODE .=
+ "double\n" .
+ "speed_$Sfunfull (struct speed_params *s)\n" .
+ "{\n" .
+ "$restriction" .
+ " $speed_routine\U$S\E ($funfull)\n" .
+ "}\n";
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+
+print SPEED $SPEED_EXTRA_PROTOS . "\n";
+print SPEED $SPEED_EXTRA_ROUTINES . "\n";
+if (defined $ENV{speedinc}) { print SPEED $ENV{speedinc} . "\n"; }
+print SPEED
+ "#include \"speed.c\"\n" .
+ "\n";
+print SPEED $SPEED_CODE;
+
+print TRY $TRY_EXTRA_ROUTINES . "\n";
+print TRY $TRY_EXTRA_PROTOS . "\n";
+my $tryinc = "";
+if (defined $ENV{tryinc}) {
+ $tryinc = $ENV{tryinc};
+ print TRY "#include \"$tryinc\"\n";
+}
+print "tryinc $tryinc\n" if $opt{'t'};
+print TRY
+ "#include \"try.c\"\n" .
+ "\n";
+
+my $extra_libraries = "";
+if (defined $ENV{extra_libraries}) { $extra_libraries = $ENV{extra_libraries};}
+
+my $trydeps = "";
+if (defined $ENV{trydeps}) { $trydeps = $ENV{trydeps}; }
+$trydeps .= " $tryinc";
+print "trydeps $trydeps\n" if $opt{'t'};
+
+print MAKEFILE <<EOF;
+
+MANY_OBJS = $MANY_OBJS
+MANY_CLEAN = \$(MANY_OBJS) \\
+ speed-many.c speed-many\$U.o speed-many\$(EXEEXT) \\
+ try-many.c try-many\$U.o try-many \\
+ $CLEAN
+MANY_DISTCLEAN = Makefile.many
+
+speed-many: \$(MANY_OBJS) speed-many\$U.o libspeed.la $extra_libraries
+ \$(LINK) \$(LDFLAGS) speed-many\$U.o \$(MANY_OBJS) \$(LDADD) \$(LIBS) $extra_libraries
+
+try-many: \$(MANY_OBJS) try-many\$U.o libspeed.la $extra_libraries
+ \$(LINK) \$(LDFLAGS) try-many\$U.o \$(MANY_OBJS) \$(LDADD) \$(LIBS) $extra_libraries
+
+try-many.o: try-many.c \$(top_srcdir)/tests/devel/try.c $trydeps
+ \$(COMPILE) -I\$(top_srcdir)/tests/devel -c try-many.c
+
+EOF
+
+print_ansi2knr("speed-many");
+print_ansi2knr("try-many",
+ "\$(top_srcdir)/tests/devel/try.c",
+ "-I\$(top_srcdir)/tests/devel");
+
+print MAKEFILE <<EOF;
+RM_TMP = rm -f
+CFLAGS_TESTS = -DSIZE=50 -DTIMES=1 -DRANDOM -DCLOCK=333000000
+CFLAGS_TESTS_SP = -DSIZE=1024 -DNOCHECK -DOPS=200000000 -DCLOCK=333000000
+EOF
+
+close MAKEFILE or die;
+
+print "Total $count_files files, $count_functions functions\n";
+
+
+
+# Local variables:
+# perl-indent-level: 2
+# End:
diff --git a/gmp-6.3.0/tune/mod_1_1-1.c b/gmp-6.3.0/tune/mod_1_1-1.c
new file mode 100644
index 0000000..7de1e42
--- /dev/null
+++ b/gmp-6.3.0/tune/mod_1_1-1.c
@@ -0,0 +1,40 @@
+/* mpn/generic/mod_1_1.c method 1.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 1
+#undef mpn_mod_1_1p
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p mpn_mod_1_1p_1
+#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_1
+
+#include "mpn/generic/mod_1_1.c"
diff --git a/gmp-6.3.0/tune/mod_1_1-2.c b/gmp-6.3.0/tune/mod_1_1-2.c
new file mode 100644
index 0000000..980a64f
--- /dev/null
+++ b/gmp-6.3.0/tune/mod_1_1-2.c
@@ -0,0 +1,40 @@
+/* mpn/generic/mod_1_1.c method 2.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 2
+#undef mpn_mod_1_1p
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p mpn_mod_1_1p_2
+#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_2
+
+#include "mpn/generic/mod_1_1.c"
diff --git a/gmp-6.3.0/tune/mod_1_div.c b/gmp-6.3.0/tune/mod_1_div.c
new file mode 100644
index 0000000..6268d4e
--- /dev/null
+++ b/gmp-6.3.0/tune/mod_1_div.c
@@ -0,0 +1,45 @@
+/* mpn/generic/mod_1.c forced to use plain udiv_qrnnd.
+
+Copyright 2000, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define OPERATION_mod_1
+
+#include "gmp-impl.h"
+
+#undef MOD_1_NORM_THRESHOLD
+#undef MOD_1_UNNORM_THRESHOLD
+#undef MOD_1N_TO_MOD_1_1_THRESHOLD
+#undef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_mod_1 mpn_mod_1_div
+
+#include "mpn/generic/mod_1.c"
diff --git a/gmp-6.3.0/tune/mod_1_inv.c b/gmp-6.3.0/tune/mod_1_inv.c
new file mode 100644
index 0000000..3b42710
--- /dev/null
+++ b/gmp-6.3.0/tune/mod_1_inv.c
@@ -0,0 +1,45 @@
+/* mpn/generic/mod_1.c forced to use mul-by-inverse udiv_qrnnd_preinv.
+
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define OPERATION_mod_1
+
+#include "gmp-impl.h"
+
+#undef MOD_1_NORM_THRESHOLD
+#undef MOD_1_UNNORM_THRESHOLD
+#undef MOD_1N_TO_MOD_1_1_THRESHOLD
+#undef MOD_1U_TO_MOD_1_1_THRESHOLD
+#define MOD_1_NORM_THRESHOLD 0
+#define MOD_1_UNNORM_THRESHOLD 0
+#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_mod_1 mpn_mod_1_inv
+
+#include "mpn/generic/mod_1.c"
diff --git a/gmp-6.3.0/tune/modlinv.c b/gmp-6.3.0/tune/modlinv.c
new file mode 100644
index 0000000..42c583a
--- /dev/null
+++ b/gmp-6.3.0/tune/modlinv.c
@@ -0,0 +1,177 @@
+/* Alternate implementations of binvert_limb to compare speeds. */
+
+/*
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "speed.h"
+
+
+/* Like the standard version in gmp-impl.h, but with the expressions using a
+ "1-" form. This has the same number of steps, but "1-" is on the
+ dependent chain, whereas the "2*" in the standard version isn't.
+ Depending on the CPU this should be the same or a touch slower. */
+
+#if GMP_LIMB_BITS <= 32
+#define binvert_limb_mul1(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __inv; \
+ ASSERT ((__n & 1) == 1); \
+ __inv = binvert_limb_table[(__n&0xFF)/2]; /* 8 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+#endif
+
+#if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
+#define binvert_limb_mul1(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __inv; \
+ ASSERT ((__n & 1) == 1); \
+ __inv = binvert_limb_table[(__n&0xFF)/2]; /* 8 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 64 */ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+#endif
+
+
+/* The loop based version used in GMP 3.0 and earlier. Usually slower than
+ multiplying, due to the number of steps that must be performed. Much
+ slower when the processor has a good multiply. */
+
+#define binvert_limb_loop(inv,n) \
+ do { \
+ mp_limb_t __v = (n); \
+ mp_limb_t __v_orig = __v; \
+ mp_limb_t __make_zero = 1; \
+ mp_limb_t __two_i = 1; \
+ mp_limb_t __v_inv = 0; \
+ \
+ ASSERT ((__v & 1) == 1); \
+ \
+ do \
+ { \
+ while ((__two_i & __make_zero) == 0) \
+ __two_i <<= 1, __v <<= 1; \
+ __v_inv += __two_i; \
+ __make_zero -= __v; \
+ } \
+ while (__make_zero); \
+ \
+ ASSERT (__v_orig * __v_inv == 1); \
+ (inv) = __v_inv; \
+ } while (0)
+
+
+/* Another loop based version with conditionals, but doing a fixed number of
+ steps. */
+
+#define binvert_limb_cond(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __rem = (1 - __n) >> 1; \
+ mp_limb_t __inv = GMP_LIMB_HIGHBIT; \
+ int __count; \
+ \
+ ASSERT ((__n & 1) == 1); \
+ \
+ __count = GMP_LIMB_BITS-1; \
+ do \
+ { \
+ __inv >>= 1; \
+ if (__rem & 1) \
+ { \
+ __inv |= GMP_LIMB_HIGHBIT; \
+ __rem -= __n; \
+ } \
+ __rem >>= 1; \
+ } \
+ while (-- __count); \
+ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+
+
+/* Another loop based bitwise version, but purely arithmetic, no
+ conditionals. */
+
+#define binvert_limb_arith(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __rem = (1 - __n) >> 1; \
+ mp_limb_t __inv = GMP_LIMB_HIGHBIT; \
+ mp_limb_t __lowbit; \
+ int __count; \
+ \
+ ASSERT ((__n & 1) == 1); \
+ \
+ __count = GMP_LIMB_BITS-1; \
+ do \
+ { \
+ __lowbit = __rem & 1; \
+ __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1)); \
+ __rem = (__rem - (__n & -__lowbit)) >> 1; \
+ } \
+ while (-- __count); \
+ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+
+
+double
+speed_binvert_limb_mul1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1);
+}
+double
+speed_binvert_limb_loop (struct speed_params *s)
+{
+ SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop);
+}
+double
+speed_binvert_limb_cond (struct speed_params *s)
+{
+ SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond);
+}
+double
+speed_binvert_limb_arith (struct speed_params *s)
+{
+ SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith);
+}
diff --git a/gmp-6.3.0/tune/noop.c b/gmp-6.3.0/tune/noop.c
new file mode 100644
index 0000000..c127b73
--- /dev/null
+++ b/gmp-6.3.0/tune/noop.c
@@ -0,0 +1,67 @@
+/* Noop routines.
+
+ These are in a separate file to stop gcc recognising do-nothing functions
+ and optimizing away calls to them. */
+
+/*
+Copyright 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#include "speed.h"
+
+
+void
+noop (void)
+{
+}
+
+/*ARGSUSED*/
+void
+noop_1 (mp_limb_t n)
+{
+}
+
+/*ARGSUSED*/
+void
+noop_wxs (mp_ptr wp, mp_srcptr xp, mp_size_t size)
+{
+}
+
+/*ARGSUSED*/
+void
+noop_wxys (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+}
+
+/*ARGSUSED*/
+void
+mpn_cache_fill_dummy (mp_limb_t n)
+{
+}
diff --git a/gmp-6.3.0/tune/pentium.asm b/gmp-6.3.0/tune/pentium.asm
new file mode 100644
index 0000000..fb1e833
--- /dev/null
+++ b/gmp-6.3.0/tune/pentium.asm
@@ -0,0 +1,60 @@
+dnl x86 pentium time stamp counter access routine.
+
+dnl Copyright 1999, 2000, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned p[2]);
+C
+C Get the pentium rdtsc cycle counter, storing the least significant word in
+C p[0] and the most significant in p[1].
+C
+C cpuid is used to serialize execution. On big measurements this won't be
+C significant but it may help make small single measurements more accurate.
+
+ .text
+ ALIGN(8)
+
+defframe(PARAM_P,4)
+
+PROLOGUE(speed_cyclecounter)
+deflit(`FRAME',0)
+ pushl %ebx
+FRAME_pushl()
+ xorl %eax, %eax
+ cpuid
+ rdtsc
+ movl PARAM_P, %ebx
+ movl %eax, (%ebx)
+ movl %edx, 4(%ebx)
+ popl %ebx
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/tune/powerpc.asm b/gmp-6.3.0/tune/powerpc.asm
new file mode 100644
index 0000000..2f4ac27
--- /dev/null
+++ b/gmp-6.3.0/tune/powerpc.asm
@@ -0,0 +1,53 @@
+dnl PowerPC mftb_function -- read time base registers.
+
+dnl Copyright 2002 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void mftb_function (unsigned a[2]);
+C
+
+ASM_START()
+PROLOGUE(mftb_function)
+
+ C r3 a
+
+L(again):
+ mftbu r4
+ mftb r5
+ mftbu r6
+ cmpw cr0, r4, r6
+ bne L(again)
+
+ stw r5, 0(r3)
+ stw r4, 4(r3)
+ blr
+
+EPILOGUE()
diff --git a/gmp-6.3.0/tune/powerpc64.asm b/gmp-6.3.0/tune/powerpc64.asm
new file mode 100644
index 0000000..1ade996
--- /dev/null
+++ b/gmp-6.3.0/tune/powerpc64.asm
@@ -0,0 +1,49 @@
+dnl PowerPC mftb_function -- read time base registers, 64-bit integer.
+
+dnl Copyright 2002-2004 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void mftb_function (unsigned a[2]);
+C
+
+ASM_START()
+PROLOGUE(mftb_function)
+
+ C r3 a
+
+ mftb r5
+
+ srdi r4, r5, 32
+ stw r5, 0(r3)
+ stw r4, 4(r3)
+ blr
+
+EPILOGUE()
diff --git a/gmp-6.3.0/tune/powm_mod.c b/gmp-6.3.0/tune/powm_mod.c
new file mode 100644
index 0000000..765fd7b
--- /dev/null
+++ b/gmp-6.3.0/tune/powm_mod.c
@@ -0,0 +1,38 @@
+/* mpz/powm.c forced to use division. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#undef POWM_THRESHOLD
+#define POWM_THRESHOLD 1
+#define __gmpz_powm mpz_powm_mod
+
+#include "../mpz/powm.c"
diff --git a/gmp-6.3.0/tune/powm_redc.c b/gmp-6.3.0/tune/powm_redc.c
new file mode 100644
index 0000000..8584614
--- /dev/null
+++ b/gmp-6.3.0/tune/powm_redc.c
@@ -0,0 +1,40 @@
+/* mpz/powm.c forced to use REDC. */
+
+/*
+Copyright 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+/* WANT_GLOBAL_REDC makes redc() available for speed and tune program use. */
+#undef POWM_THRESHOLD
+#define POWM_THRESHOLD MP_SIZE_T_MAX
+#define WANT_REDC_GLOBAL 1
+#define __gmpz_powm mpz_powm_redc
+
+#include "../mpz/powm.c"
diff --git a/gmp-6.3.0/tune/pre_divrem_1.c b/gmp-6.3.0/tune/pre_divrem_1.c
new file mode 100644
index 0000000..66d00da
--- /dev/null
+++ b/gmp-6.3.0/tune/pre_divrem_1.c
@@ -0,0 +1,40 @@
+/* mpn_preinv_divrem_1 -- if not already in libgmp.
+
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+
+#if ! USE_PREINV_DIVREM_1
+
+#undef USE_PREINV_DIVREM_1
+#define USE_PREINV_DIVREM_1 1
+
+#include "mpn/generic/pre_divrem_1.c"
+
+#endif
diff --git a/gmp-6.3.0/tune/set_strb.c b/gmp-6.3.0/tune/set_strb.c
new file mode 100644
index 0000000..128c41b
--- /dev/null
+++ b/gmp-6.3.0/tune/set_strb.c
@@ -0,0 +1,46 @@
+/* mpn_set_str_basecase -- mpn_set_str forced to its basecase.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __gmpn_set_str mpn_set_str_basecase
+#define __gmpn_bc_set_str mpn_bc_set_str_basecase
+#define __gmpn_dc_set_str mpn_dc_set_str_basecase
+
+#include "gmp-impl.h"
+
+#ifndef SIZE_T_MAX
+#define SIZE_T_MAX ((size_t) ULONG_MAX)
+#endif
+
+#undef SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD SIZE_T_MAX /* always */
+#undef SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD SIZE_T_MAX /* always */
+
+#include "mpn/generic/set_str.c"
diff --git a/gmp-6.3.0/tune/set_strp.c b/gmp-6.3.0/tune/set_strp.c
new file mode 100644
index 0000000..3053b60
--- /dev/null
+++ b/gmp-6.3.0/tune/set_strp.c
@@ -0,0 +1,42 @@
+/* mpn_set_str_subquad -- mpn_set_str forced to the sub-quadratic case.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define TUNE_PROGRAM_BUILD 1 /* for gmp-impl.h */
+
+#include "gmp-impl.h"
+
+void
+mpn_pre_set_str (mp_ptr wp, unsigned char *str, size_t str_len, powers_t *powtab, mp_ptr tp)
+{
+ if (BELOW_THRESHOLD (str_len, set_str_dc_threshold))
+ mpn_bc_set_str (wp, str, str_len, powtab->base);
+ else
+ mpn_dc_set_str (wp, str, str_len, powtab, tp);
+}
diff --git a/gmp-6.3.0/tune/set_strs.c b/gmp-6.3.0/tune/set_strs.c
new file mode 100644
index 0000000..d2a9fc2
--- /dev/null
+++ b/gmp-6.3.0/tune/set_strs.c
@@ -0,0 +1,42 @@
+/* mpn_set_str_subquad -- mpn_set_str forced to the sub-quadratic case.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define __gmpn_set_str mpn_set_str_subquad
+#define __gmpn_bc_set_str mpn_bc_set_str_subquad
+#define __gmpn_dc_set_str mpn_dc_set_str_subquad
+
+#include "gmp-impl.h"
+
+#undef SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD 2 /* never */
+#undef SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD 2 /* never */
+
+#include "mpn/generic/set_str.c"
diff --git a/gmp-6.3.0/tune/sparcv9.asm b/gmp-6.3.0/tune/sparcv9.asm
new file mode 100644
index 0000000..f0981c7
--- /dev/null
+++ b/gmp-6.3.0/tune/sparcv9.asm
@@ -0,0 +1,45 @@
+dnl Sparc v9 32-bit time stamp counter access routine.
+
+dnl Copyright 2000, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned p[2]);
+C
+C Get the sparc v9 tick counter.
+
+ASM_START()
+PROLOGUE(speed_cyclecounter)
+ rd %tick,%g1
+ st %g1,[%o0] C low 32 bits
+ srlx %g1,32,%g4
+ retl
+ st %g4,[%o0+4] C high 32 bits
+EPILOGUE(speed_cyclecounter)
diff --git a/gmp-6.3.0/tune/speed-ext.c b/gmp-6.3.0/tune/speed-ext.c
new file mode 100644
index 0000000..e7fb8b9
--- /dev/null
+++ b/gmp-6.3.0/tune/speed-ext.c
@@ -0,0 +1,233 @@
+/* An example of extending the speed program to measure routines not in GMP.
+
+Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* The extension here is three versions of an mpn arithmetic mean. These
+ aren't meant to be particularly useful, just examples.
+
+ You can run something like the following to compare their speeds.
+
+ ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
+
+ On RISC chips, mean_open() might be fastest if the compiler is doing a
+ good job. On the register starved x86s, mean_calls will be fastest.
+
+
+ Notes:
+
+ SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
+ by speed.c in useful places. SPEED_EXTRA_PROTOS goes after the header
+ files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
+
+ The advantage of this #include "speed.c" scheme is that there's no
+ editing of a copy of that file, and new features in new versions of it
+ will be immediately available.
+
+ In a real program the routines mean_calls() etc would probably be in
+ separate C or assembler source files, and just the measuring
+ speed_mean_calls() etc would be here. Linking against other libraries
+ for things to measure is perfectly possible too.
+
+ When attempting to compare two versions of the same named routine, say
+ like the generic and assembler versions of mpn_add_n(), creative use of
+ cc -D or #define is suggested, so one or both can be renamed and linked
+ into the same program. It'll be much easier to compare them side by side
+ than with separate programs for each.
+
+ common.c has notes on writing speed measuring routines.
+
+ Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
+ not using libtool) to get common.o and other objects needed by speed.c. */
+
+
+#define SPEED_EXTRA_PROTOS \
+ double speed_mean_calls (struct speed_params *s); \
+ double speed_mean_open (struct speed_params *s); \
+ double speed_mean_open2 (struct speed_params *s);
+
+#define SPEED_EXTRA_ROUTINES \
+ { "mean_calls", speed_mean_calls }, \
+ { "mean_open", speed_mean_open }, \
+ { "mean_open2", speed_mean_open2 },
+
+#include "speed.c"
+
+
+/* A straightforward implementation calling mpn subroutines.
+
+ wp,size is set to (xp,size + yp,size) / 2. The return value is the
+ remainder from the division. The other versions are the same. */
+
+mp_limb_t
+mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+ mp_limb_t c, ret;
+
+ ASSERT (size >= 1);
+
+ c = mpn_add_n (wp, xp, yp, size);
+ ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
+ wp[size-1] |= (c << (GMP_LIMB_BITS-1));
+ return ret;
+}
+
+
+/* An open-coded version, making one pass over the data. The right shift is
+ done as the added limbs are produced. The addition code follows
+ mpn/generic/add_n.c. */
+
+mp_limb_t
+mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+ mp_limb_t w, wprev, x, y, c, ret;
+ mp_size_t i;
+
+ ASSERT (size >= 1);
+
+ x = xp[0];
+ y = yp[0];
+
+ wprev = x + y;
+ c = (wprev < x);
+ ret = (wprev & 1);
+
+#define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
+
+ for (i = 1; i < size; i++)
+ {
+ x = xp[i];
+ y = yp[i];
+
+ w = x + c;
+ c = (w < x);
+ w += y;
+ c += (w < y);
+
+ wp[i-1] = RSHIFT (w, wprev);
+ wprev = w;
+ }
+
+ wp[i-1] = RSHIFT (c, wprev);
+
+ return ret;
+}
+
+
+/* Another one-pass version, but right shifting the source limbs rather than
+ the result limbs. There's not much chance of this being better than the
+ above, but it's an alternative at least. */
+
+mp_limb_t
+mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
+{
+ mp_limb_t w, x, y, xnext, ynext, c, ret;
+ mp_size_t i;
+
+ ASSERT (size >= 1);
+
+ x = xp[0];
+ y = yp[0];
+
+ /* ret is the low bit of x+y, c is the carry out of that low bit add */
+ ret = (x ^ y) & 1;
+ c = (x & y) & 1;
+
+ for (i = 0; i < size-1; i++)
+ {
+ xnext = xp[i+1];
+ ynext = yp[i+1];
+ x = RSHIFT (xnext, x);
+ y = RSHIFT (ynext, y);
+
+ w = x + c;
+ c = (w < x);
+ w += y;
+ c += (w < y);
+ wp[i] = w;
+
+ x = xnext;
+ y = ynext;
+ }
+
+ wp[i] = (x >> 1) + (y >> 1) + c;
+
+ return ret;
+}
+
+
+/* The speed measuring routines are the same apart from which function they
+ run, so a macro is used. Actually this macro is the same as
+ SPEED_ROUTINE_MPN_BINARY_N. */
+
+#define SPEED_ROUTINE_MEAN(mean_fun) \
+ { \
+ unsigned i; \
+ mp_ptr wp; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ mean_fun (wp, s->xp, s->yp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+double
+speed_mean_calls (struct speed_params *s)
+{
+ SPEED_ROUTINE_MEAN (mean_calls);
+}
+
+double
+speed_mean_open (struct speed_params *s)
+{
+ SPEED_ROUTINE_MEAN (mean_open);
+}
+
+double
+speed_mean_open2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MEAN (mean_open2);
+}
diff --git a/gmp-6.3.0/tune/speed.c b/gmp-6.3.0/tune/speed.c
new file mode 100644
index 0000000..f8909bc
--- /dev/null
+++ b/gmp-6.3.0/tune/speed.c
@@ -0,0 +1,1419 @@
+/* Speed measuring program.
+
+Copyright 1999-2003, 2005, 2006, 2008-2022 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* Usage message is in the code below, run with no arguments to print it.
+ See README for interesting applications.
+
+ To add a new routine foo(), create a speed_foo() function in the style of
+ the existing ones and add an entry in the routine[] array. Put FLAG_R if
+ speed_foo() wants an "r" parameter.
+
+ The routines don't have help messages or descriptions, but most have
+ suggestive names. See the source code for full details.
+
+*/
+
+#include "config.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_UNISTD_H
+#include <unistd.h> /* for getpid, R_OK */
+#endif
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h> /* for struct timeval */
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h> /* for getrusage() */
+#endif
+
+
+#include "gmp-impl.h"
+#include "longlong.h" /* for the benefit of speed-many.c */
+#include "tests.h"
+#include "speed.h"
+
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+#if !HAVE_STRTOUL
+#define strtoul(p,e,b) (unsigned long) strtol(p,e,b)
+#endif
+
+#ifdef SPEED_EXTRA_PROTOS
+SPEED_EXTRA_PROTOS
+#endif
+#ifdef SPEED_EXTRA_PROTOS2
+SPEED_EXTRA_PROTOS2
+#endif
+
+
+#if GMP_LIMB_BITS == 32
+#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
+#endif
+#if GMP_LIMB_BITS == 64
+#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
+#endif
+
+
+#define CMP_ABSOLUTE 1
+#define CMP_RATIO 2
+#define CMP_DIFFERENCE 3
+#define CMP_DIFFPREV 4
+int option_cmp = CMP_ABSOLUTE;
+
+#define UNIT_SECONDS 1
+#define UNIT_CYCLES 2
+#define UNIT_CYCLESPERLIMB 3
+int option_unit = UNIT_SECONDS;
+
+#define DATA_RANDOM 1
+#define DATA_RANDOM2 2
+#define DATA_ZEROS 3
+#define DATA_AAS 4
+#define DATA_FFS 5
+#define DATA_2FD 6
+int option_data = DATA_RANDOM;
+
+int option_square = 0;
+double option_factor = 0.0;
+mp_size_t option_step = 1;
+int option_gnuplot = 0;
+char *option_gnuplot_basename;
+struct size_array_t {
+ mp_size_t start, end;
+} *size_array = NULL;
+mp_size_t size_num = 0;
+mp_size_t size_allocnum = 0;
+int option_resource_usage = 0;
+long option_seed = 123456789;
+
+struct speed_params sp;
+
+#define COLUMN_WIDTH 13 /* for the free-form output */
+
+#define FLAG_R (1<<0) /* require ".r" */
+#define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */
+#define FLAG_RSIZE (1<<2)
+#define FLAG_NODATA (1<<3) /* don't alloc xp, yp */
+
+const struct routine_t {
+ /* constants */
+ const char *name;
+ speed_function_t fun;
+ int flag;
+} routine[] = {
+
+ { "noop", speed_noop },
+ { "noop_wxs", speed_noop_wxs },
+ { "noop_wxys", speed_noop_wxys },
+
+ { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL },
+ { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL },
+ { "mpn_add_1", speed_mpn_add_1, FLAG_R },
+ { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R },
+ { "mpn_sub_1", speed_mpn_sub_1, FLAG_R },
+ { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R },
+
+ { "mpn_add_err1_n", speed_mpn_add_err1_n },
+ { "mpn_add_err2_n", speed_mpn_add_err2_n },
+ { "mpn_add_err3_n", speed_mpn_add_err3_n },
+ { "mpn_sub_err1_n", speed_mpn_sub_err1_n },
+ { "mpn_sub_err2_n", speed_mpn_sub_err2_n },
+ { "mpn_sub_err3_n", speed_mpn_sub_err3_n },
+
+#if HAVE_NATIVE_mpn_add_n_sub_n
+ { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL },
+#endif
+
+ { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R },
+ { "mpn_submul_1", speed_mpn_submul_1, FLAG_R },
+#if HAVE_NATIVE_mpn_addmul_2
+ { "mpn_addmul_2", speed_mpn_addmul_2, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_3
+ { "mpn_addmul_3", speed_mpn_addmul_3, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_4
+ { "mpn_addmul_4", speed_mpn_addmul_4, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_5
+ { "mpn_addmul_5", speed_mpn_addmul_5, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_6
+ { "mpn_addmul_6", speed_mpn_addmul_6, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_7
+ { "mpn_addmul_7", speed_mpn_addmul_7, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addmul_8
+ { "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addaddmul_1msb0
+ { "mpn_addaddmul_1msb0", speed_mpn_addaddmul_1msb0, FLAG_R_OPTIONAL },
+#endif
+ { "mpn_mul_1", speed_mpn_mul_1, FLAG_R },
+ { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
+#if HAVE_NATIVE_mpn_mul_2
+ { "mpn_mul_2", speed_mpn_mul_2, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_mul_3
+ { "mpn_mul_3", speed_mpn_mul_3, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_mul_4
+ { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_mul_5
+ { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+ { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL },
+#endif
+
+ { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R },
+ { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R },
+#if HAVE_NATIVE_mpn_divrem_1c
+ { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R },
+ { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R },
+#endif
+ { "mpn_mod_1", speed_mpn_mod_1, FLAG_R },
+#if HAVE_NATIVE_mpn_mod_1c
+ { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R },
+#endif
+ { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R },
+ { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
+ { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R },
+
+ { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R },
+ { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R },
+ { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R },
+ { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R },
+ { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R },
+ { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R },
+
+ { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R },
+ { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R },
+ { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
+ { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
+ { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R },
+ { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R },
+
+ { "mpn_divrem_2", speed_mpn_divrem_2, },
+ { "mpn_divrem_2_div", speed_mpn_divrem_2_div, },
+ { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, },
+
+ { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R },
+ { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R },
+ { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R },
+ { "mpn_div_qr_1n_pi1_3",speed_mpn_div_qr_1n_pi1_3, FLAG_R },
+ { "mpn_div_qr_1n_pi1_4",speed_mpn_div_qr_1n_pi1_4, FLAG_R },
+ { "mpn_div_qr_1", speed_mpn_div_qr_1, FLAG_R },
+
+ { "mpn_div_qr_2n", speed_mpn_div_qr_2n, },
+ { "mpn_div_qr_2u", speed_mpn_div_qr_2u, },
+
+ { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R },
+ { "mpn_divexact_by3", speed_mpn_divexact_by3 },
+
+ { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R },
+ { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL },
+ { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL },
+
+#if HAVE_NATIVE_mpn_modexact_1_odd
+ { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R },
+#endif
+ { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
+
+#if GMP_NUMB_BITS % 4 == 0
+ { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 },
+#endif
+
+ { "mpn_lshift", speed_mpn_lshift, FLAG_R },
+ { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R },
+ { "mpn_rshift", speed_mpn_rshift, FLAG_R },
+
+ { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL },
+ { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL },
+ { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL },
+ { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL },
+ { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL },
+ { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL },
+ { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL },
+ { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL },
+ { "mpn_com", speed_mpn_com },
+ { "mpn_neg", speed_mpn_neg },
+
+ { "mpn_popcount", speed_mpn_popcount },
+ { "mpn_hamdist", speed_mpn_hamdist },
+
+ { "mpn_matrix22_mul", speed_mpn_matrix22_mul },
+
+ { "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA },
+ { "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA },
+ { "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA },
+ { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA },
+ { "mpn_hgcd2_4", speed_mpn_hgcd2_4, FLAG_NODATA },
+ { "mpn_hgcd2_5", speed_mpn_hgcd2_5, FLAG_NODATA },
+ { "mpn_hgcd", speed_mpn_hgcd },
+ { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
+ { "mpn_hgcd_appr", speed_mpn_hgcd_appr },
+ { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
+
+ { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce },
+ { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 },
+ { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 },
+
+ { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
+ { "mpn_gcd_11", speed_mpn_gcd_11, FLAG_R_OPTIONAL },
+ { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
+ { "mpn_gcd_22", speed_mpn_gcd_22, FLAG_R_OPTIONAL },
+
+ { "mpn_gcd", speed_mpn_gcd },
+
+ { "mpn_gcdext", speed_mpn_gcdext },
+ { "mpn_gcdext_single", speed_mpn_gcdext_single },
+ { "mpn_gcdext_double", speed_mpn_gcdext_double },
+ { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
+ { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
+#if 0
+ { "mpn_gcdext_lehmer", speed_mpn_gcdext_lehmer },
+#endif
+
+ { "gmp_primesieve", speed_gmp_primesieve, FLAG_NODATA },
+ { "mpz_nextprime", speed_mpz_nextprime },
+ { "mpz_nextprime_1", speed_mpz_nextprime_1, FLAG_R_OPTIONAL },
+ { "mpz_prevprime", speed_mpz_prevprime },
+ { "mpz_prevprime_1", speed_mpz_prevprime_1, FLAG_R_OPTIONAL },
+
+ { "mpz_jacobi", speed_mpz_jacobi },
+ { "mpn_jacobi_base", speed_mpn_jacobi_base },
+ { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 },
+ { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 },
+ { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 },
+ { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 },
+
+ { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL },
+ { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
+ { "mpn_sqr_basecase", speed_mpn_sqr_basecase },
+#if HAVE_NATIVE_mpn_sqr_diagonal
+ { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal },
+#endif
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+ { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
+#endif
+
+ { "mpn_mul_n", speed_mpn_mul_n },
+ { "mpn_sqr", speed_mpn_sqr },
+
+ { "mpn_toom2_sqr", speed_mpn_toom2_sqr },
+ { "mpn_toom3_sqr", speed_mpn_toom3_sqr },
+ { "mpn_toom4_sqr", speed_mpn_toom4_sqr },
+ { "mpn_toom6_sqr", speed_mpn_toom6_sqr },
+ { "mpn_toom8_sqr", speed_mpn_toom8_sqr },
+ { "mpn_toom22_mul", speed_mpn_toom22_mul },
+ { "mpn_toom33_mul", speed_mpn_toom33_mul },
+ { "mpn_toom44_mul", speed_mpn_toom44_mul },
+ { "mpn_toom6h_mul", speed_mpn_toom6h_mul },
+ { "mpn_toom8h_mul", speed_mpn_toom8h_mul },
+ { "mpn_toom32_mul", speed_mpn_toom32_mul },
+ { "mpn_toom42_mul", speed_mpn_toom42_mul },
+ { "mpn_toom43_mul", speed_mpn_toom43_mul },
+ { "mpn_toom63_mul", speed_mpn_toom63_mul },
+ { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul },
+ { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
+#if WANT_OLD_FFT_FULL
+ { "mpn_mul_fft_full", speed_mpn_mul_fft_full },
+ { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
+#endif
+ { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL },
+ { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
+
+ { "mpn_sqrlo", speed_mpn_sqrlo },
+ { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase },
+ { "mpn_mullo_n", speed_mpn_mullo_n },
+ { "mpn_mullo_basecase", speed_mpn_mullo_basecase },
+
+ { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
+ { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid },
+ { "mpn_mulmid_n", speed_mpn_mulmid_n },
+ { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL },
+
+ { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 },
+ { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 },
+ { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
+ { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 },
+
+ { "mpn_mulmod_bknp1", speed_mpn_mulmod_bknp1, FLAG_R_OPTIONAL },
+ { "mpn_sqrmod_bknp1", speed_mpn_sqrmod_bknp1, FLAG_R_OPTIONAL },
+ { "mpn_mulmod_bnp1", speed_mpn_mulmod_bnp1 },
+ { "mpn_sqrmod_bnp1", speed_mpn_sqrmod_bnp1 },
+
+ { "mpn_invert", speed_mpn_invert },
+ { "mpn_invertappr", speed_mpn_invertappr },
+ { "mpn_ni_invertappr", speed_mpn_ni_invertappr },
+ { "mpn_binvert", speed_mpn_binvert },
+ { "mpn_sec_invert", speed_mpn_sec_invert },
+
+ { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL},
+ { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL},
+ { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL},
+ { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL},
+ { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
+ { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
+
+ { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr },
+ { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr },
+ { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q },
+ { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q },
+ { "mpn_sbpi1_bdiv_r", speed_mpn_sbpi1_bdiv_r },
+
+ { "mpn_broot", speed_mpn_broot, FLAG_R },
+ { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R },
+ { "mpn_brootinv", speed_mpn_brootinv, FLAG_R },
+
+ { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL },
+ { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL },
+ { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL },
+
+ { "mpn_sqrtrem", speed_mpn_sqrtrem },
+ { "mpn_rootrem", speed_mpn_rootrem, FLAG_R },
+ { "mpn_sqrt", speed_mpn_sqrt },
+ { "mpn_root", speed_mpn_root, FLAG_R },
+
+ { "mpn_perfect_power_p", speed_mpn_perfect_power_p, },
+ { "mpn_perfect_square_p", speed_mpn_perfect_square_p, },
+
+ { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA },
+ { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA },
+ { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA },
+ { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA },
+ { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA },
+
+ { "mpz_add", speed_mpz_add },
+ { "mpz_invert", speed_mpz_invert, FLAG_R_OPTIONAL },
+ { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
+ { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL },
+ { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA },
+ { "mpz_2fac_ui", speed_mpz_2fac_ui, FLAG_NODATA },
+ { "mpz_mfac_uiui", speed_mpz_mfac_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
+ { "mpz_primorial_ui", speed_mpz_primorial_ui, FLAG_NODATA },
+ { "mpz_powm", speed_mpz_powm, FLAG_R_OPTIONAL },
+ { "mpz_powm_mod", speed_mpz_powm_mod },
+ { "mpz_powm_redc", speed_mpz_powm_redc },
+ { "mpz_powm_sec", speed_mpz_powm_sec },
+ { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL },
+
+ { "mpz_mod", speed_mpz_mod },
+ { "mpn_redc_1", speed_mpn_redc_1 },
+ { "mpn_redc_2", speed_mpn_redc_2 },
+ { "mpn_redc_n", speed_mpn_redc_n },
+
+ { "MPN_COPY", speed_MPN_COPY },
+ { "MPN_COPY_INCR", speed_MPN_COPY_INCR },
+ { "MPN_COPY_DECR", speed_MPN_COPY_DECR },
+ { "memcpy", speed_memcpy },
+#if HAVE_NATIVE_mpn_copyi
+ { "mpn_copyi", speed_mpn_copyi },
+#endif
+#if HAVE_NATIVE_mpn_copyd
+ { "mpn_copyd", speed_mpn_copyd },
+#endif
+ { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_addlsh1_n == 1
+ { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n == 1
+ { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+ { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+ { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh1_n == 1
+ { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n == 1
+ { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n == 1
+ { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+ { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+ { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+ { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh2_n == 1
+ { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n
+ { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n
+ { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+ { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+ { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+ { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_n
+ { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_rsh1add_n
+ { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_rsh1sub_n
+ { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
+#endif
+
+ { "mpn_cnd_add_n", speed_mpn_cnd_add_n, FLAG_R_OPTIONAL },
+ { "mpn_cnd_sub_n", speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL },
+
+ { "MPN_ZERO", speed_MPN_ZERO },
+
+ { "binvert_limb", speed_binvert_limb, FLAG_NODATA },
+ { "binvert_limb_mul1", speed_binvert_limb_mul1, FLAG_NODATA },
+ { "binvert_limb_loop", speed_binvert_limb_loop, FLAG_NODATA },
+ { "binvert_limb_cond", speed_binvert_limb_cond, FLAG_NODATA },
+ { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
+
+ { "malloc_free", speed_malloc_free },
+ { "malloc_realloc_free", speed_malloc_realloc_free },
+ { "gmp_allocate_free", speed_gmp_allocate_free },
+ { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
+ { "mpz_init_clear", speed_mpz_init_clear },
+ { "mpq_init_clear", speed_mpq_init_clear },
+ { "mpf_init_clear", speed_mpf_init_clear },
+ { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear },
+
+ { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_umul_ppmm
+ { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_umul_ppmm_r
+ { "mpn_umul_ppmm_r", speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
+#endif
+
+ { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
+ { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
+
+ { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL },
+ { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_udiv_qrnnd
+ { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_udiv_qrnnd_r
+ { "mpn_udiv_qrnnd_r", speed_mpn_udiv_qrnnd_r, FLAG_R_OPTIONAL },
+#endif
+ { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL },
+
+ { "operator_div", speed_operator_div, FLAG_R_OPTIONAL },
+ { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL },
+
+ { "gmp_randseed", speed_gmp_randseed, FLAG_R_OPTIONAL },
+ { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
+ { "mpz_urandomb", speed_mpz_urandomb, FLAG_R_OPTIONAL | FLAG_NODATA },
+
+#ifdef SPEED_EXTRA_ROUTINES
+ SPEED_EXTRA_ROUTINES
+#endif
+#ifdef SPEED_EXTRA_ROUTINES2
+ SPEED_EXTRA_ROUTINES2
+#endif
+};
+
+
+struct choice_t {
+ const struct routine_t *p;
+ mp_limb_t r;
+ double scale;
+ double time;
+ int no_time;
+ double prev_time;
+ const char *name;
+};
+struct choice_t *choice;
+int num_choices = 0;
+
+
+void
+data_fill (mp_ptr ptr, mp_size_t size)
+{
+ switch (option_data) {
+ case DATA_RANDOM:
+ mpn_random (ptr, size);
+ break;
+ case DATA_RANDOM2:
+ mpn_random2 (ptr, size);
+ break;
+ case DATA_ZEROS:
+ MPN_ZERO (ptr, size);
+ break;
+ case DATA_AAS:
+ MPN_FILL (ptr, size, GMP_NUMB_0xAA);
+ break;
+ case DATA_FFS:
+ MPN_FILL (ptr, size, GMP_NUMB_MAX);
+ break;
+ case DATA_2FD:
+ MPN_FILL (ptr, size, GMP_NUMB_MAX);
+ ptr[0] -= 2;
+ break;
+ default:
+ abort();
+ /*NOTREACHED*/
+ }
+}
+
+/* The code here handling the various combinations of output options isn't
+ too attractive, but it works and is fairly clean. */
+
+#define SIZE_TO_DIVISOR(n) \
+ (option_square == 1 ? (n)*(n) \
+ : option_square == 2 ? (n)*((n)+1)/2 \
+ : (n))
+
+void
+run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
+{
+ const char *first_open_fastest, *first_open_notfastest, *first_close;
+ int i, fastest, want_data;
+ double fastest_time;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ /* allocate data, unless all routines are NODATA */
+ want_data = 0;
+ for (i = 0; i < num_choices; i++)
+ want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
+
+ if (want_data)
+ {
+ SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
+ SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
+
+ data_fill (s->xp, s->size);
+ data_fill (s->yp, s->size);
+ }
+ else
+ {
+ sp.xp = NULL;
+ sp.yp = NULL;
+ }
+
+ if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
+ {
+ first_open_fastest = "(#";
+ first_open_notfastest = " (";
+ first_close = ")";
+ }
+ else
+ {
+ first_open_fastest = "#";
+ first_open_notfastest = " ";
+ first_close = "";
+ }
+
+ fastest = -1;
+ fastest_time = -1.0;
+ for (i = 0; i < num_choices; i++)
+ {
+ s->r = choice[i].r;
+ choice[i].time = speed_measure (choice[i].p->fun, s);
+ choice[i].no_time = (choice[i].time == -1.0);
+ if (! choice[i].no_time)
+ choice[i].time *= choice[i].scale;
+
+ /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
+ is before any differences. */
+ {
+ double t;
+ t = choice[i].time;
+ if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
+ {
+ if (choice[i].prev_time == -1.0)
+ choice[i].no_time = 1;
+ else
+ choice[i].time = choice[i].time - choice[i].prev_time;
+ }
+ choice[i].prev_time = t;
+ }
+
+ if (choice[i].no_time)
+ continue;
+
+ /* Look for the fastest after CMP_DIFFPREV has been applied, but
+ before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown
+ if there's more than one routine. */
+ if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
+ {
+ fastest = i;
+ fastest_time = choice[i].time;
+ }
+
+ if (option_cmp == CMP_DIFFPREV)
+ {
+ /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
+ if (option_unit == UNIT_CYCLES)
+ choice[i].time /= speed_cycletime;
+ else if (option_unit == UNIT_CYCLESPERLIMB)
+ {
+ if (prev_size == -1)
+ choice[i].time /= speed_cycletime;
+ else
+ choice[i].time /= (speed_cycletime
+ * (SIZE_TO_DIVISOR(s->size)
+ - SIZE_TO_DIVISOR(prev_size)));
+ }
+ }
+ else
+ {
+ if (option_unit == UNIT_CYCLES)
+ choice[i].time /= speed_cycletime;
+ else if (option_unit == UNIT_CYCLESPERLIMB)
+ choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
+
+ if (option_cmp == CMP_RATIO && i > 0)
+ {
+ /* A ratio isn't affected by the units chosen. */
+ if (choice[0].no_time || choice[0].time == 0.0)
+ choice[i].no_time = 1;
+ else
+ choice[i].time /= choice[0].time;
+ }
+ else if (option_cmp == CMP_DIFFERENCE && i > 0)
+ {
+ if (choice[0].no_time)
+ {
+ choice[i].no_time = 1;
+ continue;
+ }
+ choice[i].time -= choice[0].time;
+ }
+ }
+ }
+
+ if (option_gnuplot)
+ {
+ /* In CMP_DIFFPREV, don't print anything for the first size, start
+ with the second where an actual difference is available.
+
+ In CMP_RATIO, print the first column as 1.0.
+
+ The 9 decimals printed is much more than the expected precision of
+ the measurements actually. */
+
+ if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
+ {
+ fprintf (fp, "%-6ld ", s->size);
+ for (i = 0; i < num_choices; i++)
+ fprintf (fp, " %.9e",
+ choice[i].no_time ? 0.0
+ : (option_cmp == CMP_RATIO && i == 0) ? 1.0
+ : choice[i].time);
+ fprintf (fp, "\n");
+ }
+ }
+ else
+ {
+ fprintf (fp, "%-6ld ", s->size);
+ for (i = 0; i < num_choices; i++)
+ {
+ char buf[128];
+ int decimals;
+
+ if (choice[i].no_time)
+ {
+ fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
+ }
+ else
+ {if (option_unit == UNIT_CYCLESPERLIMB
+ || (option_cmp == CMP_RATIO && i > 0))
+ decimals = 4;
+ else if (option_unit == UNIT_CYCLES)
+ decimals = 2;
+ else
+ decimals = 9;
+
+ sprintf (buf, "%s%.*f%s",
+ i == fastest ? first_open_fastest : first_open_notfastest,
+ decimals, choice[i].time, first_close);
+ fprintf (fp, " %*s", COLUMN_WIDTH, buf);
+ }
+ }
+ fprintf (fp, "\n");
+ }
+
+ TMP_FREE;
+}
+
+void
+run_all (FILE *fp)
+{
+ mp_size_t prev_size;
+ int i;
+ TMP_DECL;
+
+ TMP_MARK;
+ SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
+ SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
+
+ data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
+ data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
+
+ for (i = 0; i < size_num; i++)
+ {
+ sp.size = size_array[i].start;
+ prev_size = -1;
+ for (;;)
+ {
+ mp_size_t step;
+
+ if (option_data == DATA_2FD && sp.size >= 2)
+ sp.xp[sp.size-1] = 2;
+
+ run_one (fp, &sp, prev_size);
+ prev_size = sp.size;
+
+ if (option_data == DATA_2FD && sp.size >= 2)
+ sp.xp[sp.size-1] = MP_LIMB_T_MAX;
+
+ if (option_factor != 0.0)
+ {
+ step = (mp_size_t) (sp.size * option_factor - sp.size);
+ if (step < 1)
+ step = 1;
+ }
+ else
+ step = 1;
+ if (step < option_step)
+ step = option_step;
+
+ sp.size += step;
+ if (sp.size > size_array[i].end)
+ break;
+ }
+ }
+
+ TMP_FREE;
+}
+
+
+FILE *
+fopen_for_write (const char *filename)
+{
+ FILE *fp;
+ if ((fp = fopen (filename, "w")) == NULL)
+ {
+ fprintf (stderr, "Cannot create %s\n", filename);
+ exit(1);
+ }
+ return fp;
+}
+
+void
+fclose_written (FILE *fp, const char *filename)
+{
+ int err;
+
+ err = ferror (fp);
+ err |= fclose (fp);
+
+ if (err)
+ {
+ fprintf (stderr, "Error writing %s\n", filename);
+ exit(1);
+ }
+}
+
+
+void
+run_gnuplot (int argc, char *argv[])
+{
+ char *plot_filename;
+ char *data_filename;
+ FILE *fp;
+ int i;
+
+ plot_filename = (char *) (*__gmp_allocate_func)
+ (strlen (option_gnuplot_basename) + 20);
+ data_filename = (char *) (*__gmp_allocate_func)
+ (strlen (option_gnuplot_basename) + 20);
+
+ sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
+ sprintf (data_filename, "%s.data", option_gnuplot_basename);
+
+ fp = fopen_for_write (plot_filename);
+
+ fprintf (fp, "# Generated with:\n");
+ fprintf (fp, "#");
+ for (i = 0; i < argc; i++)
+ fprintf (fp, " %s", argv[i]);
+ fprintf (fp, "\n");
+ fprintf (fp, "\n");
+
+ fprintf (fp, "reset\n");
+
+ /* Putting the key at the top left is usually good, and you can change it
+ interactively if it's not. */
+ fprintf (fp, "set key left\n");
+
+ /* write underscores, not subscripts */
+ fprintf (fp, "set termoption noenhanced\n");
+
+ /* designed to make it possible to see crossovers easily */
+ fprintf (fp, "set style data lines\n");
+
+ fprintf (fp, "plot ");
+ for (i = 0; i < num_choices; i++)
+ {
+ fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
+ fprintf (fp, " title \"%s\"", choice[i].name);
+
+ if (i != num_choices-1)
+ fprintf (fp, ", \\");
+ fprintf (fp, "\n");
+ }
+
+ fprintf (fp, "load \"-\"\n");
+ fclose_written (fp, plot_filename);
+
+ fp = fopen_for_write (data_filename);
+
+ /* Unbuffered so you can see where the program was up to if it crashes or
+ you kill it. */
+ setbuf (fp, NULL);
+
+ run_all (fp);
+ fclose_written (fp, data_filename);
+}
+
+
+/* Return a limb with n many one bits (starting from the least significant) */
+
+#define LIMB_ONES(n) \
+ ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \
+ : (n) == 0 ? CNST_LIMB(0) \
+ : (CNST_LIMB(1) << (n)) - 1)
+
+mp_limb_t
+r_string (const char *s)
+{
+ const char *s_orig = s;
+ long n;
+
+ if (strcmp (s, "aas") == 0)
+ return GMP_NUMB_0xAA;
+
+ {
+ mpz_t z;
+ mp_limb_t l;
+ int set, siz;
+
+ mpz_init (z);
+ set = mpz_set_str (z, s, 0);
+ siz = SIZ(z);
+ l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
+ mpz_clear (z);
+ if (set == 0)
+ {
+ if (siz > 1 || siz < -1)
+ printf ("Warning, r parameter %s truncated to %d bits\n",
+ s_orig, GMP_LIMB_BITS);
+ return l;
+ }
+ }
+
+ if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
+ n = strtoul (s+2, (char **) &s, 16);
+ else
+ n = strtol (s, (char **) &s, 10);
+
+ if (strcmp (s, "bits") == 0)
+ {
+ mp_limb_t l;
+ if (n > GMP_LIMB_BITS)
+ {
+ fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
+ n, GMP_LIMB_BITS);
+ exit (1);
+ }
+ mpn_random (&l, 1);
+ return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
+ }
+ else if (strcmp (s, "ones") == 0)
+ {
+ if (n > GMP_LIMB_BITS)
+ {
+ fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
+ n, GMP_LIMB_BITS);
+ exit (1);
+ }
+ return LIMB_ONES (n);
+ }
+ else if (*s != '\0')
+ {
+ fprintf (stderr, "invalid r parameter: %s\n", s_orig);
+ exit (1);
+ }
+
+ return n;
+}
+
+
+void
+routine_find (struct choice_t *c, const char *s_orig)
+{
+ const char *s;
+ int i;
+ size_t nlen;
+
+ c->name = s_orig;
+ s = strchr (s_orig, '*');
+ if (s != NULL)
+ {
+ c->scale = atof(s_orig);
+ s++;
+ }
+ else
+ {
+ c->scale = 1.0;
+ s = s_orig;
+ }
+
+ for (i = 0; i < numberof (routine); i++)
+ {
+ nlen = strlen (routine[i].name);
+ if (memcmp (s, routine[i].name, nlen) != 0)
+ continue;
+
+ if (s[nlen] == '.')
+ {
+ /* match, with a .r parameter */
+
+ if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
+ {
+ fprintf (stderr,
+ "Choice %s bad: doesn't take a \".<r>\" parameter\n",
+ s_orig);
+ exit (1);
+ }
+
+ c->p = &routine[i];
+ c->r = r_string (s + nlen + 1);
+ return;
+ }
+
+ if (s[nlen] == '\0')
+ {
+ /* match, with no parameter */
+
+ if (routine[i].flag & FLAG_R)
+ {
+ fprintf (stderr,
+ "Choice %s bad: needs a \".<r>\" parameter\n",
+ s_orig);
+ exit (1);
+ }
+
+ c->p = &routine[i];
+ c->r = 0;
+ return;
+ }
+ }
+
+ fprintf (stderr, "Choice %s unrecognised\n", s_orig);
+ exit (1);
+}
+
+
+void
+usage (void)
+{
+ int i;
+
+ speed_time_init ();
+
+ printf ("Usage: speed [-options] -s size <routine>...\n");
+ printf ("Measure the speed of some routines.\n");
+ printf ("Times are in seconds, accuracy is shown.\n");
+ printf ("\n");
+ printf (" -p num set precision as number of time units each routine must run\n");
+ printf (" -s size[-end][,size[-end]]... sizes to measure\n");
+ printf (" single sizes or ranges, sep with comma or use multiple -s\n");
+ printf (" -t step step through sizes by given amount\n");
+ printf (" -f factor step through sizes by given factor (eg. 1.05)\n");
+ printf (" -r show times as ratios of the first routine\n");
+ printf (" -d show times as difference from the first routine\n");
+ printf (" -D show times as difference from previous size shown\n");
+ printf (" -c show times in CPU cycles\n");
+ printf (" -C show times in cycles per limb\n");
+ printf (" -u print resource usage (memory) at end\n");
+ printf (" -P name output plot files \"name.gnuplot\" and \"name.data\"\n");
+ printf (" -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
+ printf (" -x, -y, -w, -W <align> specify data alignments, sources and dests\n");
+ printf (" -o addrs print addresses of data blocks\n");
+ printf ("\n");
+ printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
+ printf ("is greater.\n");
+ printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
+ printf ("size and the previous size.\n");
+ printf ("\n");
+ printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
+ printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
+ printf ("a log/log plot).\n");
+ printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
+ printf ("when viewing more than one routine, it means same axis scales for all data).\n");
+ printf ("\n");
+ printf ("The available routines are as follows.\n");
+ printf ("\n");
+
+ for (i = 0; i < numberof (routine); i++)
+ {
+ if (routine[i].flag & FLAG_R)
+ printf ("\t%s.r\n", routine[i].name);
+ else if (routine[i].flag & FLAG_R_OPTIONAL)
+ printf ("\t%s (optional .r)\n", routine[i].name);
+ else
+ printf ("\t%s\n", routine[i].name);
+ }
+ printf ("\n");
+ printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
+ printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
+ printf ("\n");
+ printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
+ printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
+ printf ("\n");
+ printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
+ printf ("The fastest routine at each size is marked with a # (free form output only).\n");
+ printf ("\n");
+ printf ("%s", speed_time_string);
+ printf ("\n");
+ printf ("Gnuplot home page http://www.gnuplot.info/\n");
+ printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
+}
+
+void
+check_align_option (const char *name, mp_size_t align)
+{
+ if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
+ {
+ fprintf (stderr, "Alignment request out of range: %s %ld\n",
+ name, (long) align);
+ fprintf (stderr, " should be 0 to %d (limbs), inclusive\n",
+ SPEED_TMP_ALLOC_ADJUST_MASK);
+ exit (1);
+ }
+}
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+ int opt;
+
+ /* Unbuffered so output goes straight out when directed to a pipe or file
+ and isn't lost on killing the program half way. */
+ setbuf (stdout, NULL);
+
+ for (;;)
+ {
+ opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
+ if (opt == EOF)
+ break;
+
+ switch (opt) {
+ case 'a':
+ if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM;
+ else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
+ else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
+ else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS;
+ else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
+ else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
+ else
+ {
+ fprintf (stderr, "unrecognised data option: %s\n", optarg);
+ exit (1);
+ }
+ break;
+ case 'C':
+ if (option_unit != UNIT_SECONDS) goto bad_unit;
+ option_unit = UNIT_CYCLESPERLIMB;
+ break;
+ case 'c':
+ if (option_unit != UNIT_SECONDS)
+ {
+ bad_unit:
+ fprintf (stderr, "cannot use more than one of -c, -C\n");
+ exit (1);
+ }
+ option_unit = UNIT_CYCLES;
+ break;
+ case 'D':
+ if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
+ option_cmp = CMP_DIFFPREV;
+ break;
+ case 'd':
+ if (option_cmp != CMP_ABSOLUTE)
+ {
+ bad_cmp:
+ fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
+ exit (1);
+ }
+ option_cmp = CMP_DIFFERENCE;
+ break;
+ case 'E':
+ option_square = 1;
+ break;
+ case 'F':
+ option_square = 2;
+ break;
+ case 'f':
+ option_factor = atof (optarg);
+ if (option_factor <= 1.0)
+ {
+ fprintf (stderr, "-f factor must be > 1.0\n");
+ exit (1);
+ }
+ break;
+ case 'o':
+ speed_option_set (optarg);
+ break;
+ case 'P':
+ option_gnuplot = 1;
+ option_gnuplot_basename = optarg;
+ break;
+ case 'p':
+ speed_precision = atoi (optarg);
+ break;
+ case 'R':
+ option_seed = time (NULL);
+ break;
+ case 'r':
+ if (option_cmp != CMP_ABSOLUTE)
+ goto bad_cmp;
+ option_cmp = CMP_RATIO;
+ break;
+ case 's':
+ {
+ char *s;
+ for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
+ {
+ if (size_num == size_allocnum)
+ {
+ size_array = (struct size_array_t *)
+ __gmp_allocate_or_reallocate
+ (size_array,
+ size_allocnum * sizeof(size_array[0]),
+ (size_allocnum+10) * sizeof(size_array[0]));
+ size_allocnum += 10;
+ }
+ if (sscanf (s, "%ld-%ld",
+ &size_array[size_num].start,
+ &size_array[size_num].end) != 2)
+ {
+ size_array[size_num].start = size_array[size_num].end
+ = atol (s);
+ }
+
+ if (size_array[size_num].start < 0
+ || size_array[size_num].end < 0
+ || size_array[size_num].start > size_array[size_num].end)
+ {
+ fprintf (stderr, "invalid size parameter: %s\n", s);
+ exit (1);
+ }
+
+ size_num++;
+ }
+ }
+ break;
+ case 't':
+ option_step = atol (optarg);
+ if (option_step < 1)
+ {
+ fprintf (stderr, "-t step must be >= 1\n");
+ exit (1);
+ }
+ break;
+ case 'u':
+ option_resource_usage = 1;
+ break;
+ case 'z':
+ sp.cache = 1;
+ break;
+ case 'x':
+ sp.align_xp = atol (optarg);
+ check_align_option ("-x", sp.align_xp);
+ break;
+ case 'y':
+ sp.align_yp = atol (optarg);
+ check_align_option ("-y", sp.align_yp);
+ break;
+ case 'w':
+ sp.align_wp = atol (optarg);
+ check_align_option ("-w", sp.align_wp);
+ break;
+ case 'W':
+ sp.align_wp2 = atol (optarg);
+ check_align_option ("-W", sp.align_wp2);
+ break;
+ case '?':
+ exit(1);
+ }
+ }
+
+ if (optind >= argc)
+ {
+ usage ();
+ exit (1);
+ }
+
+ if (size_num == 0)
+ {
+ fprintf (stderr, "-s <size> must be specified\n");
+ exit (1);
+ }
+
+ gmp_randinit_default (__gmp_rands);
+ __gmp_rands_initialized = 1;
+ gmp_randseed_ui (__gmp_rands, option_seed);
+
+ choice = (struct choice_t *) (*__gmp_allocate_func)
+ ((argc - optind) * sizeof(choice[0]));
+ for ( ; optind < argc; optind++)
+ {
+ struct choice_t c;
+ routine_find (&c, argv[optind]);
+ choice[num_choices] = c;
+ num_choices++;
+ }
+
+ if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
+ num_choices < 2)
+ {
+ fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
+ }
+
+ speed_time_init ();
+ if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
+ speed_cycletime_need_cycles ();
+ else
+ speed_cycletime_need_seconds ();
+
+ if (option_gnuplot)
+ {
+ run_gnuplot (argc, argv);
+ }
+ else
+ {
+ if (option_unit == UNIT_SECONDS)
+ printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
+ else
+ printf ("overhead %.2f cycles",
+ speed_measure (speed_noop, NULL) / speed_cycletime);
+ printf (", precision %d units of %.2e secs",
+ speed_precision, speed_unittime);
+
+ if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
+ printf (", CPU freq unknown\n");
+ else
+ printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
+
+ printf (" ");
+ for (i = 0; i < num_choices; i++)
+ printf (" %*s", COLUMN_WIDTH, choice[i].name);
+ printf ("\n");
+
+ run_all (stdout);
+ }
+
+ if (option_resource_usage)
+ {
+#if HAVE_GETRUSAGE
+ {
+ /* This doesn't give data sizes on linux 2.0.x, only utime. */
+ struct rusage r;
+ if (getrusage (RUSAGE_SELF, &r) != 0)
+ perror ("getrusage");
+ else
+ printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
+ (long) r.ru_utime.tv_sec, (long) r.ru_utime.tv_usec,
+ r.ru_idrss, r.ru_isrss, r.ru_ixrss);
+ }
+#else
+ printf ("getrusage() not available\n");
+#endif
+
+ /* Linux kernel. */
+ {
+ char buf[128];
+ sprintf (buf, "/proc/%d/status", getpid());
+ if (access (buf, R_OK) == 0)
+ {
+ sprintf (buf, "cat /proc/%d/status", getpid());
+ system (buf);
+ }
+
+ }
+ }
+
+ return 0;
+}
diff --git a/gmp-6.3.0/tune/speed.h b/gmp-6.3.0/tune/speed.h
new file mode 100644
index 0000000..f09472c
--- /dev/null
+++ b/gmp-6.3.0/tune/speed.h
@@ -0,0 +1,3981 @@
+/* Header for speed and threshold things.
+
+Copyright 1999-2003, 2005, 2006, 2008-2017, 2019-2022 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#ifndef __SPEED_H__
+#define __SPEED_H__
+
+
+/* Pad ptr,oldsize with zero limbs (at the most significant end) to make it
+ newsize long. */
+#define MPN_ZERO_EXTEND(ptr, oldsize, newsize) \
+ do { \
+ ASSERT ((newsize) >= (oldsize)); \
+ MPN_ZERO ((ptr)+(oldsize), (newsize)-(oldsize)); \
+ } while (0)
+
+/* A mask of the least significant n bits. Note 1<<32 doesn't give zero on
+ x86 family CPUs, hence the separate case for GMP_LIMB_BITS. */
+#define MP_LIMB_T_LOWBITMASK(n) \
+ ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1)
+
+
+/* align must be a power of 2 here, usually CACHE_LINE_SIZE is a good choice */
+
+#define TMP_ALLOC_ALIGNED(bytes, align) \
+ align_pointer (TMP_ALLOC ((bytes) + (align)-1), (align))
+#define TMP_ALLOC_LIMBS_ALIGNED(limbs, align) \
+ ((mp_ptr) TMP_ALLOC_ALIGNED ((limbs)*sizeof(mp_limb_t), align))
+
+/* CACHE_LINE_SIZE is our default alignment for speed operands, and the
+ limit on what s->align_xp etc and then request for off-alignment. Maybe
+ this should be an option of some sort, but in any case here are some line
+ sizes,
+
+ bytes
+ 32 pentium
+ 64 athlon
+ 64 itanium-2 L1
+ 128 itanium-2 L2
+*/
+#define CACHE_LINE_SIZE 64 /* bytes */
+
+#define SPEED_TMP_ALLOC_ADJUST_MASK (CACHE_LINE_SIZE/GMP_LIMB_BYTES - 1)
+
+/* Set ptr to a TMP_ALLOC block of the given limbs, with the given limb
+ alignment. */
+#define SPEED_TMP_ALLOC_LIMBS(ptr, limbs, align) \
+ do { \
+ mp_ptr __ptr; \
+ mp_size_t __ptr_align, __ptr_add; \
+ \
+ ASSERT ((CACHE_LINE_SIZE % GMP_LIMB_BYTES) == 0); \
+ __ptr = TMP_ALLOC_LIMBS ((limbs) + SPEED_TMP_ALLOC_ADJUST_MASK); \
+ __ptr_align = (__ptr - (mp_ptr) NULL); \
+ __ptr_add = ((align) - __ptr_align) & SPEED_TMP_ALLOC_ADJUST_MASK; \
+ (ptr) = __ptr + __ptr_add; \
+ } while (0)
+
+
+/* This is the size for s->xp_block and s->yp_block, used in certain
+ routines that want to run across many different data values and use
+ s->size for a different purpose, eg. SPEED_ROUTINE_MPN_GCD_1.
+
+ 512 means 2kbytes of data for each of xp_block and yp_block, making 4k
+ total, which should fit easily in any L1 data cache. */
+
+#define SPEED_BLOCK_SIZE 512 /* limbs */
+
+
+extern double speed_unittime;
+extern double speed_cycletime;
+extern int speed_precision;
+extern char speed_time_string[];
+void speed_time_init (void);
+void speed_cycletime_fail (const char *str);
+void speed_cycletime_init (void);
+void speed_cycletime_need_cycles (void);
+void speed_cycletime_need_seconds (void);
+void speed_starttime (void);
+double speed_endtime (void);
+
+
+struct speed_params {
+ unsigned reps; /* how many times to run the routine */
+ mp_ptr xp; /* first argument */
+ mp_ptr yp; /* second argument */
+ mp_size_t size; /* size of both arguments */
+ mp_limb_t r; /* user supplied parameter */
+ mp_size_t align_xp; /* alignment of xp */
+ mp_size_t align_yp; /* alignment of yp */
+ mp_size_t align_wp; /* intended alignment of wp */
+ mp_size_t align_wp2; /* intended alignment of wp2 */
+ mp_ptr xp_block; /* first special SPEED_BLOCK_SIZE block */
+ mp_ptr yp_block; /* second special SPEED_BLOCK_SIZE block */
+
+ double time_divisor; /* optionally set by the speed routine */
+
+ /* used by the cache priming things */
+ int cache;
+ unsigned src_num, dst_num;
+ struct {
+ mp_ptr ptr;
+ mp_size_t size;
+ } src[5], dst[4];
+};
+
+typedef double (*speed_function_t) (struct speed_params *);
+
+double speed_measure (speed_function_t fun, struct speed_params *);
+
+/* Prototypes for speed measuring routines */
+
+double speed_back_to_back (struct speed_params *);
+double speed_count_leading_zeros (struct speed_params *);
+double speed_count_trailing_zeros (struct speed_params *);
+double speed_find_a (struct speed_params *);
+double speed_gmp_allocate_free (struct speed_params *);
+double speed_gmp_allocate_reallocate_free (struct speed_params *);
+double speed_invert_limb (struct speed_params *);
+double speed_malloc_free (struct speed_params *);
+double speed_malloc_realloc_free (struct speed_params *);
+double speed_memcpy (struct speed_params *);
+double speed_binvert_limb (struct speed_params *);
+double speed_binvert_limb_mul1 (struct speed_params *);
+double speed_binvert_limb_loop (struct speed_params *);
+double speed_binvert_limb_cond (struct speed_params *);
+double speed_binvert_limb_arith (struct speed_params *);
+
+double speed_mpf_init_clear (struct speed_params *);
+
+double speed_mpn_add_n (struct speed_params *);
+double speed_mpn_add_1 (struct speed_params *);
+double speed_mpn_add_1_inplace (struct speed_params *);
+double speed_mpn_add_err1_n (struct speed_params *);
+double speed_mpn_add_err2_n (struct speed_params *);
+double speed_mpn_add_err3_n (struct speed_params *);
+double speed_mpn_addlsh_n (struct speed_params *);
+double speed_mpn_addlsh1_n (struct speed_params *);
+double speed_mpn_addlsh2_n (struct speed_params *);
+double speed_mpn_addlsh_n_ip1 (struct speed_params *);
+double speed_mpn_addlsh1_n_ip1 (struct speed_params *);
+double speed_mpn_addlsh2_n_ip1 (struct speed_params *);
+double speed_mpn_addlsh_n_ip2 (struct speed_params *);
+double speed_mpn_addlsh1_n_ip2 (struct speed_params *);
+double speed_mpn_addlsh2_n_ip2 (struct speed_params *);
+double speed_mpn_add_n_sub_n (struct speed_params *);
+double speed_mpn_and_n (struct speed_params *);
+double speed_mpn_andn_n (struct speed_params *);
+double speed_mpn_addmul_1 (struct speed_params *);
+double speed_mpn_addmul_2 (struct speed_params *);
+double speed_mpn_addmul_3 (struct speed_params *);
+double speed_mpn_addmul_4 (struct speed_params *);
+double speed_mpn_addmul_5 (struct speed_params *);
+double speed_mpn_addmul_6 (struct speed_params *);
+double speed_mpn_addmul_7 (struct speed_params *);
+double speed_mpn_addmul_8 (struct speed_params *);
+double speed_mpn_addaddmul_1msb0 (struct speed_params *);
+double speed_mpn_cnd_add_n (struct speed_params *);
+double speed_mpn_cnd_sub_n (struct speed_params *);
+double speed_mpn_com (struct speed_params *);
+double speed_mpn_neg (struct speed_params *);
+double speed_mpn_copyd (struct speed_params *);
+double speed_mpn_copyi (struct speed_params *);
+double speed_MPN_COPY (struct speed_params *);
+double speed_MPN_COPY_DECR (struct speed_params *);
+double speed_MPN_COPY_INCR (struct speed_params *);
+double speed_mpn_sec_tabselect (struct speed_params *);
+double speed_mpn_divexact_1 (struct speed_params *);
+double speed_mpn_divexact_by3 (struct speed_params *);
+double speed_mpn_bdiv_q_1 (struct speed_params *);
+double speed_mpn_pi1_bdiv_q_1 (struct speed_params *);
+double speed_mpn_bdiv_dbm1c (struct speed_params *);
+double speed_mpn_divrem_1 (struct speed_params *);
+double speed_mpn_divrem_1f (struct speed_params *);
+double speed_mpn_divrem_1c (struct speed_params *);
+double speed_mpn_divrem_1cf (struct speed_params *);
+double speed_mpn_divrem_1_div (struct speed_params *);
+double speed_mpn_divrem_1f_div (struct speed_params *);
+double speed_mpn_divrem_1_inv (struct speed_params *);
+double speed_mpn_divrem_1f_inv (struct speed_params *);
+double speed_mpn_divrem_2 (struct speed_params *);
+double speed_mpn_divrem_2_div (struct speed_params *);
+double speed_mpn_divrem_2_inv (struct speed_params *);
+double speed_mpn_div_qr_1n_pi1 (struct speed_params *);
+double speed_mpn_div_qr_1n_pi1_1 (struct speed_params *);
+double speed_mpn_div_qr_1n_pi1_2 (struct speed_params *);
+double speed_mpn_div_qr_1n_pi1_3 (struct speed_params *);
+double speed_mpn_div_qr_1n_pi1_4 (struct speed_params *);
+double speed_mpn_div_qr_1 (struct speed_params *);
+double speed_mpn_div_qr_2n (struct speed_params *);
+double speed_mpn_div_qr_2u (struct speed_params *);
+double speed_mpn_fib2_ui (struct speed_params *);
+double speed_mpn_matrix22_mul (struct speed_params *);
+double speed_mpn_hgcd2 (struct speed_params *);
+double speed_mpn_hgcd2_1 (struct speed_params *);
+double speed_mpn_hgcd2_2 (struct speed_params *);
+double speed_mpn_hgcd2_3 (struct speed_params *);
+double speed_mpn_hgcd2_4 (struct speed_params *);
+double speed_mpn_hgcd2_5 (struct speed_params *);
+double speed_mpn_hgcd (struct speed_params *);
+double speed_mpn_hgcd_lehmer (struct speed_params *);
+double speed_mpn_hgcd_appr (struct speed_params *);
+double speed_mpn_hgcd_appr_lehmer (struct speed_params *);
+double speed_mpn_hgcd_reduce (struct speed_params *);
+double speed_mpn_hgcd_reduce_1 (struct speed_params *);
+double speed_mpn_hgcd_reduce_2 (struct speed_params *);
+double speed_mpn_gcd (struct speed_params *);
+double speed_mpn_gcd_1 (struct speed_params *);
+double speed_mpn_gcd_11 (struct speed_params *);
+double speed_mpn_gcd_1N (struct speed_params *);
+double speed_mpn_gcd_22 (struct speed_params *);
+double speed_mpn_gcdext (struct speed_params *);
+double speed_mpn_gcdext_double (struct speed_params *);
+double speed_mpn_gcdext_one_double (struct speed_params *);
+double speed_mpn_gcdext_one_single (struct speed_params *);
+double speed_mpn_gcdext_single (struct speed_params *);
+double speed_mpn_get_str (struct speed_params *);
+double speed_mpn_hamdist (struct speed_params *);
+double speed_mpn_ior_n (struct speed_params *);
+double speed_mpn_iorn_n (struct speed_params *);
+double speed_mpn_jacobi_base (struct speed_params *);
+double speed_mpn_jacobi_base_1 (struct speed_params *);
+double speed_mpn_jacobi_base_2 (struct speed_params *);
+double speed_mpn_jacobi_base_3 (struct speed_params *);
+double speed_mpn_jacobi_base_4 (struct speed_params *);
+double speed_mpn_lshift (struct speed_params *);
+double speed_mpn_lshiftc (struct speed_params *);
+double speed_mpn_mod_1 (struct speed_params *);
+double speed_mpn_mod_1c (struct speed_params *);
+double speed_mpn_mod_1_div (struct speed_params *);
+double speed_mpn_mod_1_inv (struct speed_params *);
+double speed_mpn_mod_1_1 (struct speed_params *);
+double speed_mpn_mod_1_1_1 (struct speed_params *);
+double speed_mpn_mod_1_1_2 (struct speed_params *);
+double speed_mpn_mod_1_2 (struct speed_params *);
+double speed_mpn_mod_1_3 (struct speed_params *);
+double speed_mpn_mod_1_4 (struct speed_params *);
+double speed_mpn_mod_34lsub1 (struct speed_params *);
+double speed_mpn_modexact_1_odd (struct speed_params *);
+double speed_mpn_modexact_1c_odd (struct speed_params *);
+double speed_mpn_mul_1 (struct speed_params *);
+double speed_mpn_mul_1_inplace (struct speed_params *);
+double speed_mpn_mul_2 (struct speed_params *);
+double speed_mpn_mul_3 (struct speed_params *);
+double speed_mpn_mul_4 (struct speed_params *);
+double speed_mpn_mul_5 (struct speed_params *);
+double speed_mpn_mul_6 (struct speed_params *);
+double speed_mpn_mul (struct speed_params *);
+double speed_mpn_mul_basecase (struct speed_params *);
+double speed_mpn_mulmid (struct speed_params *);
+double speed_mpn_mulmid_basecase (struct speed_params *);
+double speed_mpn_mul_fft (struct speed_params *);
+double speed_mpn_mul_fft_sqr (struct speed_params *);
+double speed_mpn_fft_mul (struct speed_params *);
+double speed_mpn_fft_sqr (struct speed_params *);
+#if WANT_OLD_FFT_FULL
+double speed_mpn_mul_fft_full (struct speed_params *);
+double speed_mpn_mul_fft_full_sqr (struct speed_params *);
+#endif
+double speed_mpn_nussbaumer_mul (struct speed_params *);
+double speed_mpn_nussbaumer_mul_sqr (struct speed_params *);
+double speed_mpn_mul_n (struct speed_params *);
+double speed_mpn_mul_n_sqr (struct speed_params *);
+double speed_mpn_mulmid_n (struct speed_params *);
+double speed_mpn_sqrlo (struct speed_params *);
+double speed_mpn_sqrlo_basecase (struct speed_params *);
+double speed_mpn_mullo_n (struct speed_params *);
+double speed_mpn_mullo_basecase (struct speed_params *);
+double speed_mpn_nand_n (struct speed_params *);
+double speed_mpn_nior_n (struct speed_params *);
+double speed_mpn_popcount (struct speed_params *);
+double speed_mpn_preinv_divrem_1 (struct speed_params *);
+double speed_mpn_preinv_divrem_1f (struct speed_params *);
+double speed_mpn_preinv_mod_1 (struct speed_params *);
+double speed_mpn_sbpi1_div_qr (struct speed_params *);
+double speed_mpn_dcpi1_div_qr (struct speed_params *);
+double speed_mpn_sbpi1_divappr_q (struct speed_params *);
+double speed_mpn_dcpi1_divappr_q (struct speed_params *);
+double speed_mpn_mu_div_qr (struct speed_params *);
+double speed_mpn_mu_divappr_q (struct speed_params *);
+double speed_mpn_mupi_div_qr (struct speed_params *);
+double speed_mpn_mu_div_q (struct speed_params *);
+double speed_mpn_sbpi1_bdiv_qr (struct speed_params *);
+double speed_mpn_dcpi1_bdiv_qr (struct speed_params *);
+double speed_mpn_sbpi1_bdiv_q (struct speed_params *);
+double speed_mpn_dcpi1_bdiv_q (struct speed_params *);
+double speed_mpn_sbpi1_bdiv_r (struct speed_params *);
+double speed_mpn_mu_bdiv_q (struct speed_params *);
+double speed_mpn_mu_bdiv_qr (struct speed_params *);
+double speed_mpn_broot (struct speed_params *);
+double speed_mpn_broot_invm1 (struct speed_params *);
+double speed_mpn_brootinv (struct speed_params *);
+double speed_mpn_invert (struct speed_params *);
+double speed_mpn_invertappr (struct speed_params *);
+double speed_mpn_ni_invertappr (struct speed_params *);
+double speed_mpn_sec_invert (struct speed_params *s);
+double speed_mpn_binvert (struct speed_params *);
+double speed_mpn_redc_1 (struct speed_params *);
+double speed_mpn_redc_2 (struct speed_params *);
+double speed_mpn_redc_n (struct speed_params *);
+double speed_mpn_rsblsh_n (struct speed_params *);
+double speed_mpn_rsblsh1_n (struct speed_params *);
+double speed_mpn_rsblsh2_n (struct speed_params *);
+double speed_mpn_rsh1add_n (struct speed_params *);
+double speed_mpn_rsh1sub_n (struct speed_params *);
+double speed_mpn_rshift (struct speed_params *);
+double speed_mpn_sb_divrem_m3 (struct speed_params *);
+double speed_mpn_sb_divrem_m3_div (struct speed_params *);
+double speed_mpn_sb_divrem_m3_inv (struct speed_params *);
+double speed_mpn_set_str (struct speed_params *);
+double speed_mpn_bc_set_str (struct speed_params *);
+double speed_mpn_dc_set_str (struct speed_params *);
+double speed_mpn_set_str_pre (struct speed_params *);
+double speed_mpn_sqr_basecase (struct speed_params *);
+double speed_mpn_sqr_diag_addlsh1 (struct speed_params *);
+double speed_mpn_sqr_diagonal (struct speed_params *);
+double speed_mpn_sqr (struct speed_params *);
+double speed_mpn_sqrtrem (struct speed_params *);
+double speed_mpn_rootrem (struct speed_params *);
+double speed_mpn_sqrt (struct speed_params *);
+double speed_mpn_root (struct speed_params *);
+double speed_mpn_perfect_power_p (struct speed_params *);
+double speed_mpn_perfect_square_p (struct speed_params *);
+double speed_mpn_sub_n (struct speed_params *);
+double speed_mpn_sub_1 (struct speed_params *);
+double speed_mpn_sub_1_inplace (struct speed_params *);
+double speed_mpn_sub_err1_n (struct speed_params *);
+double speed_mpn_sub_err2_n (struct speed_params *);
+double speed_mpn_sub_err3_n (struct speed_params *);
+double speed_mpn_sublsh_n (struct speed_params *);
+double speed_mpn_sublsh1_n (struct speed_params *);
+double speed_mpn_sublsh2_n (struct speed_params *);
+double speed_mpn_sublsh_n_ip1 (struct speed_params *);
+double speed_mpn_sublsh1_n_ip1 (struct speed_params *);
+double speed_mpn_sublsh2_n_ip1 (struct speed_params *);
+double speed_mpn_submul_1 (struct speed_params *);
+double speed_mpn_toom2_sqr (struct speed_params *);
+double speed_mpn_toom3_sqr (struct speed_params *);
+double speed_mpn_toom4_sqr (struct speed_params *);
+double speed_mpn_toom6_sqr (struct speed_params *);
+double speed_mpn_toom8_sqr (struct speed_params *);
+double speed_mpn_toom22_mul (struct speed_params *);
+double speed_mpn_toom33_mul (struct speed_params *);
+double speed_mpn_toom44_mul (struct speed_params *);
+double speed_mpn_toom6h_mul (struct speed_params *);
+double speed_mpn_toom8h_mul (struct speed_params *);
+double speed_mpn_toom32_mul (struct speed_params *);
+double speed_mpn_toom42_mul (struct speed_params *);
+double speed_mpn_toom43_mul (struct speed_params *);
+double speed_mpn_toom63_mul (struct speed_params *);
+double speed_mpn_toom32_for_toom43_mul (struct speed_params *);
+double speed_mpn_toom43_for_toom32_mul (struct speed_params *);
+double speed_mpn_toom32_for_toom53_mul (struct speed_params *);
+double speed_mpn_toom53_for_toom32_mul (struct speed_params *);
+double speed_mpn_toom42_for_toom53_mul (struct speed_params *);
+double speed_mpn_toom53_for_toom42_mul (struct speed_params *);
+double speed_mpn_toom43_for_toom54_mul (struct speed_params *);
+double speed_mpn_toom54_for_toom43_mul (struct speed_params *);
+double speed_mpn_toom42_mulmid (struct speed_params *);
+double speed_mpn_mulmod_bnm1 (struct speed_params *);
+double speed_mpn_bc_mulmod_bnm1 (struct speed_params *);
+double speed_mpn_mulmod_bnm1_rounded (struct speed_params *);
+double speed_mpn_sqrmod_bnm1 (struct speed_params *);
+double speed_mpn_mulmod_bknp1 (struct speed_params *);
+double speed_mpn_sqrmod_bknp1 (struct speed_params *);
+double speed_mpn_mulmod_bnp1 (struct speed_params *);
+double speed_mpn_sqrmod_bnp1 (struct speed_params *);
+double speed_mpn_udiv_qrnnd (struct speed_params *);
+double speed_mpn_udiv_qrnnd_r (struct speed_params *);
+double speed_mpn_umul_ppmm (struct speed_params *);
+double speed_mpn_umul_ppmm_r (struct speed_params *);
+double speed_mpn_xnor_n (struct speed_params *);
+double speed_mpn_xor_n (struct speed_params *);
+double speed_MPN_ZERO (struct speed_params *);
+
+double speed_mpq_init_clear (struct speed_params *);
+
+double speed_mpz_add (struct speed_params *);
+double speed_mpz_invert (struct speed_params *);
+double speed_mpz_bin_uiui (struct speed_params *);
+double speed_mpz_bin_ui (struct speed_params *);
+double speed_mpz_fac_ui (struct speed_params *);
+double speed_mpz_2fac_ui (struct speed_params *);
+double speed_mpz_mfac_uiui (struct speed_params *);
+double speed_mpz_primorial_ui (struct speed_params *);
+double speed_mpz_fib_ui (struct speed_params *);
+double speed_mpz_fib2_ui (struct speed_params *);
+double speed_mpz_init_clear (struct speed_params *);
+double speed_mpz_init_realloc_clear (struct speed_params *);
+double speed_gmp_primesieve (struct speed_params *);
+double speed_mpz_nextprime (struct speed_params *);
+double speed_mpz_nextprime_1 (struct speed_params *);
+double speed_mpz_prevprime (struct speed_params *);
+double speed_mpz_prevprime_1 (struct speed_params *);
+double speed_mpz_jacobi (struct speed_params *);
+double speed_mpz_lucnum_ui (struct speed_params *);
+double speed_mpz_lucnum2_ui (struct speed_params *);
+double speed_mpz_mod (struct speed_params *);
+double speed_mpz_powm (struct speed_params *);
+double speed_mpz_powm_mod (struct speed_params *);
+double speed_mpz_powm_redc (struct speed_params *);
+double speed_mpz_powm_sec (struct speed_params *);
+double speed_mpz_powm_ui (struct speed_params *);
+double speed_mpz_urandomb (struct speed_params *);
+
+double speed_gmp_randseed (struct speed_params *);
+double speed_gmp_randseed_ui (struct speed_params *);
+
+double speed_noop (struct speed_params *);
+double speed_noop_wxs (struct speed_params *);
+double speed_noop_wxys (struct speed_params *);
+
+double speed_operator_div (struct speed_params *);
+double speed_operator_mod (struct speed_params *);
+
+double speed_udiv_qrnnd (struct speed_params *);
+double speed_udiv_qrnnd_preinv1 (struct speed_params *);
+double speed_udiv_qrnnd_preinv2 (struct speed_params *);
+double speed_udiv_qrnnd_preinv3 (struct speed_params *);
+double speed_udiv_qrnnd_c (struct speed_params *);
+double speed_umul_ppmm (struct speed_params *);
+
+/* Prototypes for other routines */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* low 32-bits in p[0], high 32-bits in p[1] */
+void speed_cyclecounter (unsigned p[2]);
+
+#if defined (__cplusplus)
+}
+#endif
+
+void mftb_function (unsigned p[2]);
+
+double speed_cyclecounter_diff (const unsigned [2], const unsigned [2]);
+int gettimeofday_microseconds_p (void);
+int getrusage_microseconds_p (void);
+int cycles_works_p (void);
+long clk_tck (void);
+double freq_measure (const char *, double (*)(void));
+
+int double_cmp_ptr (const double *, const double *);
+void pentium_wbinvd (void);
+typedef int (*qsort_function_t) (const void *, const void *);
+
+void noop (void);
+void noop_1 (mp_limb_t);
+void noop_wxs (mp_ptr, mp_srcptr, mp_size_t);
+void noop_wxys (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void mpn_cache_fill (mp_srcptr, mp_size_t);
+void mpn_cache_fill_dummy (mp_limb_t);
+void speed_cache_fill (struct speed_params *);
+void speed_operand_src (struct speed_params *, mp_ptr, mp_size_t);
+void speed_operand_dst (struct speed_params *, mp_ptr, mp_size_t);
+
+extern int speed_option_addrs;
+extern int speed_option_verbose;
+extern int speed_option_cycles_broken;
+void speed_option_set (const char *);
+
+mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t mpn_div_qr_1n_pi1_3 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t mpn_div_qr_1n_pi1_4 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+mp_limb_t mpn_divrem_2_inv (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+
+int mpn_jacobi_base_1 (mp_limb_t, mp_limb_t, int);
+int mpn_jacobi_base_2 (mp_limb_t, mp_limb_t, int);
+int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int);
+int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int);
+
+int mpn_hgcd2_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_3 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_4 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+int mpn_hgcd2_5 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
+
+mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]);
+mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]);
+
+void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
+void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
+
+mp_size_t mpn_gcdext_one_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_gcdext_one_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_gcdext_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_gcdext_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_hgcd_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+mp_size_t mpn_hgcd_lehmer_itch (mp_size_t);
+
+int mpn_hgcd_appr_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+mp_size_t mpn_hgcd_appr_lehmer_itch (mp_size_t);
+
+mp_size_t mpn_hgcd_reduce_1 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
+mp_size_t mpn_hgcd_reduce_1_itch (mp_size_t, mp_size_t);
+
+mp_size_t mpn_hgcd_reduce_2 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
+mp_size_t mpn_hgcd_reduce_2_itch (mp_size_t, mp_size_t);
+
+mp_limb_t mpn_sb_divrem_mn_div (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+mp_limb_t mpn_sb_divrem_mn_inv (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_size_t mpn_set_str_basecase (mp_ptr, const unsigned char *, size_t, int);
+void mpn_pre_set_str (mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr);
+
+void mpz_powm_mod (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
+void mpz_powm_redc (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
+
+int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
+
+
+/* "get" is called repeatedly until it ticks over, just in case on a fast
+ processor it takes less than a microsecond, though this is probably
+ unlikely if it's a system call.
+
+ speed_cyclecounter is called on the same side of the "get" for the start
+ and end measurements. It doesn't matter how long it takes from the "get"
+ sample to the cycles sample, since that period will cancel out in the
+ difference calculation (assuming it's the same each time).
+
+ Letting the test run for more than a process time slice is probably only
+ going to reduce accuracy, especially for getrusage when the cycle counter
+ is real time, or for gettimeofday if the cycle counter is in fact process
+ time. Use CLK_TCK/2 as a reasonable stop.
+
+ It'd be desirable to be quite accurate here. The default speed_precision
+ for a cycle counter is 10000 cycles, so to mix that with getrusage or
+ gettimeofday the frequency should be at least that accurate. But running
+ measurements for 10000 microseconds (or more) is too long. Be satisfied
+ with just a half clock tick (5000 microseconds usually). */
+
+#define FREQ_MEASURE_ONE(name, type, get, getc, sec, usec) \
+ do { \
+ type st1, st, et1, et; \
+ unsigned sc[2], ec[2]; \
+ long dt, half_tick; \
+ double dc, cyc; \
+ \
+ half_tick = (1000000L / clk_tck()) / 2; \
+ \
+ get (st1); \
+ do { \
+ get (st); \
+ } while (usec(st) == usec(st1) && sec(st) == sec(st1)); \
+ \
+ getc (sc); \
+ \
+ for (;;) \
+ { \
+ get (et1); \
+ do { \
+ get (et); \
+ } while (usec(et) == usec(et1) && sec(et) == sec(et1)); \
+ \
+ getc (ec); \
+ \
+ dc = speed_cyclecounter_diff (ec, sc); \
+ \
+ /* allow secs to cancel before multiplying */ \
+ dt = sec(et) - sec(st); \
+ dt = dt * 1000000L + (usec(et) - usec(st)); \
+ \
+ if (dt >= half_tick) \
+ break; \
+ } \
+ \
+ cyc = dt * 1e-6 / dc; \
+ \
+ if (speed_option_verbose >= 2) \
+ printf ("freq_measure_%s_one() dc=%.6g dt=%ld cyc=%.6g\n", \
+ name, dc, dt, cyc); \
+ \
+ return dt * 1e-6 / dc; \
+ \
+ } while (0)
+
+
+
+
+/* The measuring routines use these big macros to save duplication for
+ similar forms. They also get used for some automatically generated
+ measuring of new implementations of functions.
+
+ Having something like SPEED_ROUTINE_BINARY_N as a subroutine accepting a
+ function pointer is considered undesirable since it's not the way a
+ normal application will be calling, and some processors might do
+ different things with an indirect call, like not branch predicting, or
+ doing a full pipe flush. At least some of the "functions" measured are
+ actually macros too.
+
+ The net effect is to bloat the object code, possibly in a big way, but
+ only what's being measured is being run, so that doesn't matter.
+
+ The loop forms don't try to cope with __GMP_ATTRIBUTE_PURE or
+ ATTRIBUTE_CONST on the called functions. Adding a cast to a non-pure
+ function pointer doesn't work in gcc 3.2. Using an actual non-pure
+ function pointer variable works, but stands a real risk of a
+ non-optimizing compiler generating unnecessary overheads in the call.
+ Currently the best idea is not to use those attributes for a timing
+ program build. __GMP_NO_ATTRIBUTE_CONST_PURE will tell gmp.h and
+ gmp-impl.h to omit them from routines there. */
+
+#define SPEED_RESTRICT_COND(cond) if (!(cond)) return -1.0;
+
+/* For mpn_copy or similar. */
+#define SPEED_ROUTINE_MPN_COPY_CALL(call) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_MPN_COPY(function) \
+ SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size))
+
+#define SPEED_ROUTINE_MPN_TABSELECT(function) \
+ { \
+ mp_ptr xp, wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ if (s->r == 0) \
+ s->r = s->size; /* default to a quadratic shape */ \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (xp, s->size * s->r, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, xp, s->size * s->r); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, s->size, s->r, (s->r) / 2); \
+ while (--i != 0); \
+ t = speed_endtime () / s->r; \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+#define SPEED_ROUTINE_MPN_COPYC(function) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, s->xp, s->size, 0); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+/* s->size is still in limbs, and it's limbs which are copied, but
+ "function" takes a size in bytes not limbs. */
+#define SPEED_ROUTINE_MPN_COPY_BYTES(function) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, s->xp, s->size * GMP_LIMB_BYTES); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+/* For mpn_add_n, mpn_sub_n, or similar. */
+#define SPEED_ROUTINE_MPN_BINARY_N_CALL(call) \
+ { \
+ mp_ptr wp; \
+ mp_ptr xp, yp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ xp = s->xp; \
+ yp = s->yp; \
+ \
+ if (s->r == 0) ; \
+ else if (s->r == 1) { xp = wp; } \
+ else if (s->r == 2) { yp = wp; } \
+ else if (s->r == 3) { xp = wp; yp = wp; } \
+ else if (s->r == 4) { yp = xp; } \
+ else { \
+ TMP_FREE; \
+ return -1.0; \
+ } \
+ \
+ /* initialize wp if operand overlap */ \
+ if (xp == wp || yp == wp) \
+ MPN_COPY (wp, s->xp, s->size); \
+ \
+ speed_operand_src (s, xp, s->size); \
+ speed_operand_src (s, yp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+/* For mpn_aors_errK_n, where 1 <= K <= 3. */
+#define SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL(call, K) \
+ { \
+ mp_ptr wp; \
+ mp_ptr xp, yp; \
+ mp_ptr zp[K]; \
+ mp_limb_t ep[2*K]; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ /* (don't have a mechanism to specify zp alignments) */ \
+ for (i = 0; i < K; i++) \
+ SPEED_TMP_ALLOC_LIMBS (zp[i], s->size, 0); \
+ \
+ xp = s->xp; \
+ yp = s->yp; \
+ \
+ if (s->r == 0) ; \
+ else if (s->r == 1) { xp = wp; } \
+ else if (s->r == 2) { yp = wp; } \
+ else if (s->r == 3) { xp = wp; yp = wp; } \
+ else if (s->r == 4) { yp = xp; } \
+ else { \
+ TMP_FREE; \
+ return -1.0; \
+ } \
+ \
+ /* initialize wp if operand overlap */ \
+ if (xp == wp || yp == wp) \
+ MPN_COPY (wp, s->xp, s->size); \
+ \
+ speed_operand_src (s, xp, s->size); \
+ speed_operand_src (s, yp, s->size); \
+ for (i = 0; i < K; i++) \
+ speed_operand_src (s, zp[i], s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR1_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], s->size, 0), 1)
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR2_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], s->size, 0), 2)
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR3_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], zp[2], s->size, 0), 3)
+
+
+/* For mpn_add_n, mpn_sub_n, or similar. */
+#define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call) \
+ { \
+ mp_ptr ap, sp; \
+ mp_ptr xp, yp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (sp, s->size, s->align_wp); \
+ \
+ xp = s->xp; \
+ yp = s->yp; \
+ \
+ if ((s->r & 1) != 0) { xp = ap; } \
+ if ((s->r & 2) != 0) { yp = ap; } \
+ if ((s->r & 4) != 0) { xp = sp; } \
+ if ((s->r & 8) != 0) { yp = sp; } \
+ if ((s->r & 3) == 3 || (s->r & 12) == 12) \
+ { \
+ TMP_FREE; \
+ return -1.0; \
+ } \
+ \
+ /* initialize ap if operand overlap */ \
+ if (xp == ap || yp == ap) \
+ MPN_COPY (ap, s->xp, s->size); \
+ /* initialize sp if operand overlap */ \
+ if (xp == sp || yp == sp) \
+ MPN_COPY (sp, s->xp, s->size); \
+ \
+ speed_operand_src (s, xp, s->size); \
+ speed_operand_src (s, yp, s->size); \
+ speed_operand_dst (s, ap, s->size); \
+ speed_operand_dst (s, sp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_BINARY_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size))
+
+#define SPEED_ROUTINE_MPN_BINARY_NC(function) \
+ SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size, 0))
+
+
+/* For mpn_lshift, mpn_rshift, mpn_mul_1, with r, or similar. */
+#define SPEED_ROUTINE_MPN_UNARY_1_CALL(call) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_UNARY_1(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_UNARY_1C(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
+
+/* FIXME: wp is uninitialized here, should start it off from xp */
+#define SPEED_ROUTINE_MPN_UNARY_1_INPLACE(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, wp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_DIVEXACT_1(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_BDIV_Q_1(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL(call) \
+ { \
+ unsigned shift; \
+ mp_limb_t dinv; \
+ \
+ SPEED_RESTRICT_COND (s->size > 0); \
+ SPEED_RESTRICT_COND (s->r != 0); \
+ \
+ count_trailing_zeros (shift, s->r); \
+ binvert_limb (dinv, s->r >> shift); \
+ \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL (call); \
+ }
+#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1(function) \
+ SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL \
+ ((*function) (wp, s->xp, s->size, s->r, dinv, shift))
+
+#define SPEED_ROUTINE_MPN_BDIV_DBM1C(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
+
+#define SPEED_ROUTINE_MPN_DIVREM_1(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_DIVREM_1C(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0, s->xp, s->size, s->r, 0))
+
+#define SPEED_ROUTINE_MPN_DIVREM_1F(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r))
+
+#define SPEED_ROUTINE_MPN_DIVREM_1CF(function) \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0, s->r, 0))
+
+
+#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL(call) \
+ { \
+ unsigned shift; \
+ mp_limb_t dinv; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ SPEED_RESTRICT_COND (s->r != 0); \
+ \
+ count_leading_zeros (shift, s->r); \
+ invert_limb (dinv, s->r << shift); \
+ \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL (call); \
+ } \
+
+#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1(function) \
+ SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL \
+ ((*function) (wp, 0, s->xp, s->size, s->r, dinv, shift))
+
+/* s->size limbs worth of fraction part */
+#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1F(function) \
+ SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL \
+ ((*function) (wp, s->size, s->xp, 0, s->r, dinv, shift))
+
+
+/* s->r is duplicated to form the multiplier, defaulting to
+ MP_BASES_BIG_BASE_10. Not sure if that's particularly useful, but at
+ least it provides some control. */
+#define SPEED_ROUTINE_MPN_UNARY_N(function,N) \
+ { \
+ mp_ptr wp; \
+ mp_size_t wn; \
+ unsigned i; \
+ double t; \
+ mp_limb_t yp[N]; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= N); \
+ \
+ TMP_MARK; \
+ wn = s->size + N-1; \
+ SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp); \
+ for (i = 0; i < N; i++) \
+ yp[i] = (s->r != 0 ? s->r : MP_BASES_BIG_BASE_10); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, yp, (mp_size_t) N); \
+ speed_operand_dst (s, wp, wn); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, s->xp, s->size, yp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_UNARY_2(function) \
+ SPEED_ROUTINE_MPN_UNARY_N (function, 2)
+#define SPEED_ROUTINE_MPN_UNARY_3(function) \
+ SPEED_ROUTINE_MPN_UNARY_N (function, 3)
+#define SPEED_ROUTINE_MPN_UNARY_4(function) \
+ SPEED_ROUTINE_MPN_UNARY_N (function, 4)
+#define SPEED_ROUTINE_MPN_UNARY_5(function) \
+ SPEED_ROUTINE_MPN_UNARY_N (function, 5)
+#define SPEED_ROUTINE_MPN_UNARY_6(function) \
+ SPEED_ROUTINE_MPN_UNARY_N (function, 6)
+#define SPEED_ROUTINE_MPN_UNARY_7(function) \
+ SPEED_ROUTINE_MPN_UNARY_N (function, 7)
+#define SPEED_ROUTINE_MPN_UNARY_8(function) \
+ SPEED_ROUTINE_MPN_UNARY_N (function, 8)
+
+#define SPEED_ROUTINE_MPN_ADDADDMUL1_MSB0(function) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ mp_limb_t r; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ r = s->r != 0 ? s->r : MP_BASES_BIG_BASE_10; \
+ r &= ~GMP_NUMB_HIGHBIT; \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, s->xp, s->yp, s->size, r, r); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+/* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
+#define SPEED_ROUTINE_MPN_MUL(function) \
+ { \
+ mp_ptr wp; \
+ mp_size_t size1; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = (s->r == 0 ? s->size : s->r); \
+ if (size1 < 0) size1 = -size1 - s->size; \
+ \
+ SPEED_RESTRICT_COND (size1 >= 1); \
+ SPEED_RESTRICT_COND (s->size >= size1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, size1); \
+ speed_operand_dst (s, wp, size1 + s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, s->xp, s->size, s->yp, size1); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+#define SPEED_ROUTINE_MPN_MUL_N_CALL(call) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, 2*s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_MUL_N(function) \
+ SPEED_ROUTINE_MPN_MUL_N_CALL (function (wp, s->xp, s->yp, s->size));
+
+#define SPEED_ROUTINE_MPN_MULLO_N_CALL(call) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_MULLO_N(function) \
+ SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
+
+#define SPEED_ROUTINE_MPN_MULLO_BASECASE(function) \
+ SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
+
+#define SPEED_ROUTINE_MPN_SQRLO(function) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, s->xp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */
+#define SPEED_ROUTINE_MPN_MULMID(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_size_t size1; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = (s->r == 0 ? (2 * s->size - 1) : s->r); \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (size1 >= s->size); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
+ \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size1 - s->size + 3); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, size1, s->yp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_MULMID_N(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_size_t size1; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = 2 * s->size - 1; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
+ \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size1 - s->size + 3); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, s->yp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function) \
+ { \
+ mp_ptr wp, xp, scratch; \
+ mp_size_t size1, scratch_size; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = 2 * s->size - 1; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
+ scratch_size = mpn_toom42_mulmid_itch (s->size); \
+ SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0); \
+ \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size1 - s->size + 3); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, s->yp, s->size, scratch); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call) \
+ { \
+ mp_ptr wp, tp; \
+ unsigned i; \
+ double t; \
+ mp_size_t itch; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ itch = mpn_mulmod_bnm1_itch (s->size, s->size, s->size); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, 2 * s->size); \
+ speed_operand_dst (s, tp, itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED(function) \
+ { \
+ mp_ptr wp, tp; \
+ unsigned i; \
+ double t; \
+ mp_size_t size, itch; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ size = mpn_mulmod_bnm1_next_size (s->size); \
+ itch = mpn_mulmod_bnm1_itch (size, size, size); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size); \
+ speed_operand_dst (s, tp, itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, size, s->xp, s->size, s->yp, s->size, tp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#ifndef MOD_BKNP1_USE11
+#define MOD_BKNP1_USE11 0
+#endif
+#ifndef MOD_BKNP1_ONLY3
+#define MOD_BKNP1_ONLY3 0
+#endif
+
+#define SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL(call,use_r) \
+ { \
+ mp_ptr wp, tp; \
+ unsigned i, k; \
+ double t; \
+ mp_size_t itch, nk; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (!use_r || (s->r == 0) || \
+ (s->r == 3) || (s->r == 5) || (s->r == 7) || \
+ (s->r == 13) || (s->r == 17) || \
+ ((MOD_BKNP1_USE11) && (s->r == 11))); \
+ \
+ if (!use_r || (s->r < 2)) \
+ { \
+ if (s->size % 3 == 0) {nk = s->size / (k = 3);} \
+ else if (s->size % 5 == 0) {nk = s->size / (k = 5);} \
+ else if (s->size % 7 == 0) {nk = s->size / (k = 7);} \
+ else if (s->size % 11 == 0) {nk = s->size / (k = 11);} \
+ else if (s->size % 13 == 0) {nk = s->size / (k = 13);} \
+ else if (s->size % 17 == 0) {nk = s->size / (k = 17);} \
+ else nk = s->size / (k = 1); \
+ } \
+ else nk = s->size / (k = s->r); \
+ \
+ if (MOD_BKNP1_ONLY3) \
+ k = 3; \
+ SPEED_RESTRICT_COND ((!use_r || (k > 2)) && (s->size == k * nk)); \
+ SPEED_RESTRICT_COND ((GMP_NUMB_MAX % k == 0) || (nk % 3 != 0) || \
+ ((MOD_BKNP1_USE11) && (k == 11))); \
+ \
+ itch = mpn_mulmod_bknp1_itch (s->size); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size + 2, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \
+ \
+ s->xp [s->size] &= 1; \
+ s->yp [s->size] &= 1; \
+ speed_operand_src (s, s->xp, s->size + 1); \
+ speed_operand_src (s, s->yp, s->size + 1); \
+ speed_operand_dst (s, wp, 2 * s->size + 2); \
+ speed_operand_dst (s, tp, itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \
+ { \
+ mp_ptr wp, tspace; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= minsize); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, 2*s->size); \
+ speed_operand_dst (s, tspace, tsize); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
+ mpn_toom22_mul_itch (s->size, s->size), \
+ MPN_TOOM22_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
+ mpn_toom33_mul_itch (s->size, s->size), \
+ MPN_TOOM33_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
+ mpn_toom44_mul_itch (s->size, s->size), \
+ MPN_TOOM44_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
+ mpn_toom6h_mul_itch (s->size, s->size), \
+ MPN_TOOM6H_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
+ mpn_toom8h_mul_itch (s->size, s->size), \
+ MPN_TOOM8H_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace), \
+ mpn_toom32_mul_itch (s->size, 2*s->size/3), \
+ MPN_TOOM32_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM42_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \
+ mpn_toom42_mul_itch (s->size, s->size/2), \
+ MPN_TOOM42_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM43_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace), \
+ mpn_toom43_mul_itch (s->size, s->size*3/4), \
+ MPN_TOOM43_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM63_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \
+ mpn_toom63_mul_itch (s->size, s->size/2), \
+ MPN_TOOM63_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace), \
+ mpn_toom32_mul_itch (s->size, 17*s->size/24), \
+ MPN_TOOM32_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace), \
+ mpn_toom43_mul_itch (s->size, 17*s->size/24), \
+ MPN_TOOM43_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace), \
+ mpn_toom32_mul_itch (s->size, 19*s->size/30), \
+ MPN_TOOM32_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace), \
+ mpn_toom53_mul_itch (s->size, 19*s->size/30), \
+ MPN_TOOM53_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace), \
+ mpn_toom42_mul_itch (s->size, 11*s->size/20), \
+ MPN_TOOM42_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace), \
+ mpn_toom53_mul_itch (s->size, 11*s->size/20), \
+ MPN_TOOM53_MUL_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace), \
+ mpn_toom42_mul_itch (s->size, 5*s->size/6), \
+ MPN_TOOM54_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace), \
+ mpn_toom54_mul_itch (s->size, 5*s->size/6), \
+ MPN_TOOM54_MUL_MINSIZE)
+
+
+
+#define SPEED_ROUTINE_MPN_SQR_CALL(call) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, 2*s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_SQR(function) \
+ SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))
+
+#define SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL(call) \
+ { \
+ mp_ptr wp, tp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, tp, 2 * s->size); \
+ speed_operand_dst (s, wp, 2 * s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime () / 2; \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize) \
+ { \
+ mp_ptr wp, tspace; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= minsize); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, 2*s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, 2*s->size); \
+ speed_operand_dst (s, tspace, tsize); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_TOOM2_SQR(function) \
+ SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+ mpn_toom2_sqr_itch (s->size), \
+ MPN_TOOM2_SQR_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM3_SQR(function) \
+ SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+ mpn_toom3_sqr_itch (s->size), \
+ MPN_TOOM3_SQR_MINSIZE)
+
+
+#define SPEED_ROUTINE_MPN_TOOM4_SQR(function) \
+ SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+ mpn_toom4_sqr_itch (s->size), \
+ MPN_TOOM4_SQR_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM6_SQR(function) \
+ SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+ mpn_toom6_sqr_itch (s->size), \
+ MPN_TOOM6_SQR_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_TOOM8_SQR(function) \
+ SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
+ mpn_toom8_sqr_itch (s->size), \
+ MPN_TOOM8_SQR_MINSIZE)
+
+#define SPEED_ROUTINE_MPN_MOD_CALL(call) \
+ { \
+ unsigned i; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ \
+ return speed_endtime (); \
+ }
+
+#define SPEED_ROUTINE_MPN_MOD_1(function) \
+ SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size, s->r))
+
+#define SPEED_ROUTINE_MPN_MOD_1C(function) \
+ SPEED_ROUTINE_MPN_MOD_CALL ((*function)(s->xp, s->size, s->r, CNST_LIMB(0)))
+
+#define SPEED_ROUTINE_MPN_MODEXACT_1_ODD(function) \
+ SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r));
+
+#define SPEED_ROUTINE_MPN_MODEXACT_1C_ODD(function) \
+ SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r, CNST_LIMB(0)));
+
+#define SPEED_ROUTINE_MPN_MOD_34LSUB1(function) \
+ SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size))
+
+#define SPEED_ROUTINE_MPN_PREINV_MOD_1(function) \
+ { \
+ unsigned i; \
+ mp_limb_t inv; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ SPEED_RESTRICT_COND (s->r & GMP_LIMB_HIGHBIT); \
+ \
+ invert_limb (inv, s->r); \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ (*function) (s->xp, s->size, s->r, inv); \
+ while (--i != 0); \
+ \
+ return speed_endtime (); \
+ }
+
+#define SPEED_ROUTINE_MPN_MOD_1_1(function,pfunc) \
+ { \
+ unsigned i; \
+ mp_limb_t inv[4]; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ \
+ mpn_mod_1_1p_cps (inv, s->r); \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ pfunc (inv, s->r); \
+ function (s->xp, s->size, s->r << inv[1], inv); \
+ } while (--i != 0); \
+ \
+ return speed_endtime (); \
+ }
+#define SPEED_ROUTINE_MPN_MOD_1_N(function,pfunc,N) \
+ { \
+ unsigned i; \
+ mp_limb_t inv[N+3]; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (s->r <= ~(mp_limb_t)0 / N); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ pfunc (inv, s->r); \
+ function (s->xp, s->size, s->r, inv); \
+ } while (--i != 0); \
+ \
+ return speed_endtime (); \
+ }
+
+
+/* A division of 2*s->size by s->size limbs */
+
+#define SPEED_ROUTINE_MPN_DC_DIVREM_CALL(call) \
+ { \
+ unsigned i; \
+ mp_ptr a, d, q, r; \
+ double t; \
+ gmp_pi1_t dinv; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (a, 2*s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (d, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (q, s->size+1, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (r, s->size, s->align_wp2); \
+ \
+ MPN_COPY (a, s->xp, s->size); \
+ MPN_COPY (a+s->size, s->xp, s->size); \
+ \
+ MPN_COPY (d, s->yp, s->size); \
+ \
+ /* normalize the data */ \
+ d[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ a[2*s->size-1] = d[s->size-1] - 1; \
+ \
+ invert_pi1 (dinv, d[s->size-1], d[s->size-2]); \
+ \
+ speed_operand_src (s, a, 2*s->size); \
+ speed_operand_src (s, d, s->size); \
+ speed_operand_dst (s, q, s->size+1); \
+ speed_operand_dst (s, r, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+/* A remainder 2*s->size by s->size limbs */
+
+#define SPEED_ROUTINE_MPZ_MOD(function) \
+ { \
+ unsigned i; \
+ mpz_t a, d, r; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ mpz_init_set_n (d, s->yp, s->size); \
+ \
+ /* high part less than d, low part a duplicate copied in */ \
+ mpz_init_set_n (a, s->xp, s->size); \
+ mpz_mod (a, a, d); \
+ mpz_mul_2exp (a, a, GMP_LIMB_BITS * s->size); \
+ MPN_COPY (PTR(a), s->xp, s->size); \
+ \
+ mpz_init (r); \
+ \
+ speed_operand_src (s, PTR(a), SIZ(a)); \
+ speed_operand_src (s, PTR(d), SIZ(d)); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (r, a, d); \
+ while (--i != 0); \
+ return speed_endtime (); \
+ }
+
+#define SPEED_ROUTINE_MPN_PI1_DIV(function, INV, DMIN, QMIN) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, ap, qp; \
+ gmp_pi1_t inv; \
+ double t; \
+ mp_size_t size1; \
+ TMP_DECL; \
+ \
+ size1 = (s->r == 0 ? 2 * s->size : s->r); \
+ \
+ SPEED_RESTRICT_COND (s->size >= DMIN); \
+ SPEED_RESTRICT_COND (size1 - s->size >= QMIN); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2); \
+ \
+ /* we don't fill in dividend completely when size1 > s->size */ \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (ap + size1 - s->size, s->xp, s->size); \
+ \
+ MPN_COPY (dp, s->yp, s->size); \
+ \
+ /* normalize the data */ \
+ dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ ap[size1 - 1] = dp[s->size - 1] - 1; \
+ \
+ invert_pi1 (inv, dp[s->size-1], dp[s->size-2]); \
+ \
+ speed_operand_src (s, ap, size1); \
+ speed_operand_dst (s, tp, size1); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, qp, size1 - s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, ap, size1); \
+ function (qp, tp, size1, dp, s->size, INV); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_MPN_MU_DIV_Q(function,itchfn) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, qp, scratch; \
+ double t; \
+ mp_size_t itch; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ \
+ itch = itchfn (2 * s->size, s->size, 0); \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
+ \
+ MPN_COPY (tp, s->xp, s->size); \
+ MPN_COPY (tp+s->size, s->xp, s->size); \
+ \
+ /* normalize the data */ \
+ dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ tp[2*s->size-1] = dp[s->size-1] - 1; \
+ \
+ speed_operand_dst (s, qp, s->size); \
+ speed_operand_src (s, tp, 2 * s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, scratch, itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ function (qp, tp, 2 * s->size, dp, s->size, scratch); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_MPN_MU_DIV_QR(function,itchfn) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, qp, rp, scratch; \
+ double t; \
+ mp_size_t size1, itch; \
+ TMP_DECL; \
+ \
+ size1 = (s->r == 0 ? 2 * s->size : s->r); \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ SPEED_RESTRICT_COND (size1 >= s->size); \
+ \
+ itch = itchfn (size1, s->size, 0); \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
+ SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
+ \
+ /* we don't fill in dividend completely when size1 > s->size */ \
+ MPN_COPY (tp, s->xp, s->size); \
+ MPN_COPY (tp + size1 - s->size, s->xp, s->size); \
+ \
+ MPN_COPY (dp, s->yp, s->size); \
+ \
+ /* normalize the data */ \
+ dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ tp[size1 - 1] = dp[s->size - 1] - 1; \
+ \
+ speed_operand_dst (s, qp, size1 - s->size); \
+ speed_operand_dst (s, rp, s->size); \
+ speed_operand_src (s, tp, size1); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, scratch, itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ function (qp, rp, tp, size1, dp, s->size, scratch); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, qp, rp, ip, scratch, tmp; \
+ double t; \
+ mp_size_t size1, itch; \
+ TMP_DECL; \
+ \
+ size1 = (s->r == 0 ? 2 * s->size : s->r); \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ SPEED_RESTRICT_COND (size1 >= s->size); \
+ \
+ itch = itchfn (size1, s->size, s->size); \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
+ SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
+ SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */ \
+ \
+ /* we don't fill in dividend completely when size1 > s->size */ \
+ MPN_COPY (tp, s->xp, s->size); \
+ MPN_COPY (tp + size1 - s->size, s->xp, s->size); \
+ \
+ MPN_COPY (dp, s->yp, s->size); \
+ \
+ /* normalize the data */ \
+ dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ tp[size1 - 1] = dp[s->size-1] - 1; \
+ \
+ tmp = TMP_ALLOC_LIMBS (mpn_invert_itch (s->size)); \
+ mpn_invert (ip, dp, s->size, tmp); \
+ \
+ speed_operand_dst (s, qp, size1 - s->size); \
+ speed_operand_dst (s, rp, s->size); \
+ speed_operand_src (s, tp, size1); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_src (s, ip, s->size); \
+ speed_operand_dst (s, scratch, itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, ap, qp; \
+ mp_limb_t inv; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2); \
+ \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (ap+s->size, s->xp, s->size); \
+ \
+ /* divisor must be odd */ \
+ MPN_COPY (dp, s->yp, s->size); \
+ dp[0] |= 1; \
+ binvert_limb (inv, dp[0]); \
+ inv = -inv; \
+ \
+ speed_operand_src (s, ap, 2*s->size); \
+ speed_operand_dst (s, tp, 2*s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, qp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, ap, 2*s->size); \
+ function (qp, tp, 2*s->size, dp, s->size, inv); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_MPN_PI1_BDIV_Q(function) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, qp; \
+ mp_limb_t inv; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2); \
+ \
+ /* divisor must be odd */ \
+ MPN_COPY (dp, s->yp, s->size); \
+ dp[0] |= 1; \
+ binvert_limb (inv, dp[0]); \
+ inv = -inv; \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, tp, s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, qp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, s->xp, s->size); \
+ function (qp, tp, s->size, dp, s->size, inv); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_MPN_PI1_BDIV_R(function) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, ap; \
+ mp_limb_t inv; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2); \
+ \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (ap+s->size, s->xp, s->size); \
+ \
+ /* divisor must be odd */ \
+ MPN_COPY (dp, s->yp, s->size); \
+ dp[0] |= 1; \
+ binvert_limb (inv, dp[0]); \
+ inv = -inv; \
+ \
+ speed_operand_src (s, ap, 2*s->size); \
+ speed_operand_dst (s, tp, 2*s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, ap, 2*s->size); \
+ function (tp, 2*s->size, dp, s->size, inv); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_MPN_MU_BDIV_Q(function,itchfn) \
+ { \
+ unsigned i; \
+ mp_ptr dp, qp, scratch; \
+ double t; \
+ mp_size_t itch; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ \
+ itch = itchfn (s->size, s->size); \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
+ \
+ /* divisor must be odd */ \
+ MPN_COPY (dp, s->yp, s->size); \
+ dp[0] |= 1; \
+ \
+ speed_operand_dst (s, qp, s->size); \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, scratch, itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ function (qp, s->xp, s->size, dp, s->size, scratch); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_MPN_MU_BDIV_QR(function,itchfn) \
+ { \
+ unsigned i; \
+ mp_ptr dp, tp, qp, rp, scratch; \
+ double t; \
+ mp_size_t itch; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ \
+ itch = itchfn (2 * s->size, s->size); \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
+ SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
+ \
+ MPN_COPY (tp, s->xp, s->size); \
+ MPN_COPY (tp+s->size, s->xp, s->size); \
+ \
+ /* divisor must be odd */ \
+ MPN_COPY (dp, s->yp, s->size); \
+ dp[0] |= 1; \
+ \
+ speed_operand_dst (s, qp, s->size); \
+ speed_operand_dst (s, rp, s->size); \
+ speed_operand_src (s, tp, 2 * s->size); \
+ speed_operand_src (s, dp, s->size); \
+ speed_operand_dst (s, scratch, itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ function (qp, rp, tp, 2 * s->size, dp, s->size, scratch); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_BROOT(function) \
+ { \
+ SPEED_RESTRICT_COND (s->r & 1); \
+ s->xp[0] |= 1; \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL \
+ ((*function) (wp, s->xp, s->size, s->r)); \
+ }
+
+#define SPEED_ROUTINE_MPN_BROOTINV(function, itch) \
+ { \
+ mp_ptr wp, tp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ TMP_MARK; \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (s->r & 1); \
+ wp = TMP_ALLOC_LIMBS (s->size); \
+ tp = TMP_ALLOC_LIMBS ( (itch)); \
+ s->xp[0] |= 1; \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ (*function) (wp, s->xp, s->size, s->r, tp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_INVERT(function,itchfn) \
+ { \
+ long i; \
+ mp_ptr up, tp, ip; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
+ \
+ MPN_COPY (up, s->xp, s->size); \
+ \
+ /* normalize the data */ \
+ up[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ \
+ speed_operand_src (s, up, s->size); \
+ speed_operand_dst (s, tp, s->size); \
+ speed_operand_dst (s, ip, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (ip, up, s->size, tp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_INVERTAPPR(function,itchfn) \
+ { \
+ long i; \
+ mp_ptr up, tp, ip; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
+ \
+ MPN_COPY (up, s->xp, s->size); \
+ \
+ /* normalize the data */ \
+ up[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ \
+ speed_operand_src (s, up, s->size); \
+ speed_operand_dst (s, tp, s->size); \
+ speed_operand_dst (s, ip, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (ip, up, s->size, tp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_NI_INVERTAPPR(function,itchfn) \
+ { \
+ long i; \
+ mp_ptr up, tp, ip; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 3); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
+ \
+ MPN_COPY (up, s->xp, s->size); \
+ \
+ /* normalize the data */ \
+ up[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ \
+ speed_operand_src (s, up, s->size); \
+ speed_operand_dst (s, tp, s->size); \
+ speed_operand_dst (s, ip, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (ip, up, s->size, tp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_BINVERT(function,itchfn) \
+ { \
+ long i; \
+ mp_ptr up, tp, ip; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
+ \
+ MPN_COPY (up, s->xp, s->size); \
+ \
+ /* normalize the data */ \
+ up[0] |= 1; \
+ \
+ speed_operand_src (s, up, s->size); \
+ speed_operand_dst (s, tp, s->size); \
+ speed_operand_dst (s, ip, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (ip, up, s->size, tp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_SEC_INVERT(function,itchfn) \
+ { \
+ long i; \
+ mp_ptr up, mp, tp, ip; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
+ \
+ speed_operand_src (s, up, s->size); \
+ speed_operand_dst (s, tp, s->size); \
+ speed_operand_dst (s, ip, s->size); \
+ speed_cache_fill (s); \
+ \
+ MPN_COPY (mp, s->yp, s->size); \
+ /* Must be odd */ \
+ mp[0] |= 1; \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ MPN_COPY (up, s->xp, s->size); \
+ function (ip, up, mp, s->size, 2*s->size*GMP_NUMB_BITS, tp); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_REDC_1(function) \
+ { \
+ unsigned i; \
+ mp_ptr cp, mp, tp, ap; \
+ mp_limb_t inv; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2); \
+ \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (ap+s->size, s->xp, s->size); \
+ \
+ /* modulus must be odd */ \
+ MPN_COPY (mp, s->yp, s->size); \
+ mp[0] |= 1; \
+ binvert_limb (inv, mp[0]); \
+ inv = -inv; \
+ \
+ speed_operand_src (s, ap, 2*s->size+1); \
+ speed_operand_dst (s, tp, 2*s->size+1); \
+ speed_operand_src (s, mp, s->size); \
+ speed_operand_dst (s, cp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, ap, 2*s->size); \
+ function (cp, tp, mp, s->size, inv); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_REDC_2(function) \
+ { \
+ unsigned i; \
+ mp_ptr cp, mp, tp, ap; \
+ mp_limb_t invp[2]; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2); \
+ \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (ap+s->size, s->xp, s->size); \
+ \
+ /* modulus must be odd */ \
+ MPN_COPY (mp, s->yp, s->size); \
+ mp[0] |= 1; \
+ mpn_binvert (invp, mp, 2, tp); \
+ invp[0] = -invp[0]; invp[1] = ~invp[1]; \
+ \
+ speed_operand_src (s, ap, 2*s->size+1); \
+ speed_operand_dst (s, tp, 2*s->size+1); \
+ speed_operand_src (s, mp, s->size); \
+ speed_operand_dst (s, cp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, ap, 2*s->size); \
+ function (cp, tp, mp, s->size, invp); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+#define SPEED_ROUTINE_REDC_N(function) \
+ { \
+ unsigned i; \
+ mp_ptr cp, mp, tp, ap, invp; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size > 8); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2); \
+ SPEED_TMP_ALLOC_LIMBS (invp, s->size, s->align_wp2); /* align? */ \
+ \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (ap+s->size, s->xp, s->size); \
+ \
+ /* modulus must be odd */ \
+ MPN_COPY (mp, s->yp, s->size); \
+ mp[0] |= 1; \
+ mpn_binvert (invp, mp, s->size, tp); \
+ \
+ speed_operand_src (s, ap, 2*s->size+1); \
+ speed_operand_dst (s, tp, 2*s->size+1); \
+ speed_operand_src (s, mp, s->size); \
+ speed_operand_dst (s, cp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do { \
+ MPN_COPY (tp, ap, 2*s->size); \
+ function (cp, tp, mp, s->size, invp); \
+ } while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+#define SPEED_ROUTINE_MPN_POPCOUNT(function) \
+ { \
+ unsigned i; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (s->xp, s->size); \
+ while (--i != 0); \
+ \
+ return speed_endtime (); \
+ }
+
+#define SPEED_ROUTINE_MPN_HAMDIST(function) \
+ { \
+ unsigned i; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (s->xp, s->yp, s->size); \
+ while (--i != 0); \
+ \
+ return speed_endtime (); \
+ }
+
+
+#define SPEED_ROUTINE_MPZ_UI(function) \
+ { \
+ mpz_t z; \
+ unsigned i; \
+ double t; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ mpz_init (z); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (z, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ mpz_clear (z); \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPZ_FAC_UI(function) SPEED_ROUTINE_MPZ_UI(function)
+#define SPEED_ROUTINE_MPZ_FIB_UI(function) SPEED_ROUTINE_MPZ_UI(function)
+#define SPEED_ROUTINE_MPZ_LUCNUM_UI(function) SPEED_ROUTINE_MPZ_UI(function)
+
+
+#define SPEED_ROUTINE_MPZ_UNARY_1(function) \
+ { \
+ mpz_t z, a; \
+ unsigned i; \
+ mp_limb_t ls; \
+ double t; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ mpz_init (z); \
+ ls = s->size; \
+ mpz_roinit_n (a, &ls, s->size != 0); \
+ \
+ if (s->r < 2) \
+ { \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (z, a); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ } \
+ else \
+ { \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ int j = s->r; \
+ mpz_set (z, a); \
+ do \
+ { \
+ function (z, z); \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ s->time_divisor = s->r; \
+ } \
+ \
+ mpz_clear (z); \
+ return t; \
+ }
+
+
+#define SPEED_ROUTINE_MPZ_2_UI(function) \
+ { \
+ mpz_t z, z2; \
+ unsigned i; \
+ double t; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ mpz_init (z); \
+ mpz_init (z2); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (z, z2, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ mpz_clear (z); \
+ mpz_clear (z2); \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPZ_FIB2_UI(function) SPEED_ROUTINE_MPZ_2_UI(function)
+#define SPEED_ROUTINE_MPZ_LUCNUM2_UI(function) SPEED_ROUTINE_MPZ_2_UI(function)
+
+
+#define SPEED_ROUTINE_MPN_FIB2_UI(function) \
+ { \
+ mp_ptr fp, f1p; \
+ mp_size_t alloc; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ TMP_MARK; \
+ alloc = MPN_FIB2_SIZE (s->size); \
+ SPEED_TMP_ALLOC_LIMBS (fp, alloc, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (f1p, alloc, s->align_yp); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (fp, f1p, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+
+/* Calculate b^e mod m for random b and m of s->size limbs and random e of 6
+ limbs. m is forced to odd so that redc can be used. e is limited in
+ size so the calculation doesn't take too long. */
+#define SPEED_ROUTINE_MPZ_POWM(function) \
+ { \
+ mpz_t r, b, e, m; \
+ unsigned i; \
+ double t; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ mpz_init (r); \
+ if (s->r < 2) \
+ mpz_init_set_n (b, s->xp, s->size); \
+ else \
+ mpz_init_set_ui (b, s->r); \
+ mpz_init_set_n (m, s->yp, s->size); \
+ mpz_setbit (m, 0); /* force m to odd */ \
+ mpz_init_set_n (e, s->xp_block, 6); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (r, b, e, m); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ mpz_clear (r); \
+ mpz_clear (b); \
+ mpz_clear (e); \
+ mpz_clear (m); \
+ return t; \
+ }
+
+/* (m-2)^0xAAAAAAAA mod m */
+#define SPEED_ROUTINE_MPZ_POWM_UI(function) \
+ { \
+ mpz_t r, b, m; \
+ unsigned long e; \
+ unsigned i; \
+ double t; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ mpz_init (r); \
+ \
+ /* force m to odd */ \
+ mpz_init (m); \
+ mpz_set_n (m, s->xp, s->size); \
+ PTR(m)[0] |= 1; \
+ \
+ e = (~ (unsigned long) 0) / 3; \
+ if (s->r != 0) \
+ e = s->r; \
+ \
+ mpz_init_set (b, m); \
+ mpz_sub_ui (b, b, 2); \
+/* printf ("%X\n", mpz_get_ui(m)); */ \
+ i = s->reps; \
+ speed_starttime (); \
+ do \
+ function (r, b, e, m); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ mpz_clear (r); \
+ mpz_clear (b); \
+ mpz_clear (m); \
+ return t; \
+ }
+
+
+#define SPEED_ROUTINE_MPN_ADDSUB_CALL(call) \
+ { \
+ mp_ptr wp, wp2, xp, yp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2); \
+ xp = s->xp; \
+ yp = s->yp; \
+ \
+ if (s->r == 0) ; \
+ else if (s->r == 1) { xp = wp; } \
+ else if (s->r == 2) { yp = wp2; } \
+ else if (s->r == 3) { xp = wp; yp = wp2; } \
+ else if (s->r == 4) { xp = wp2; yp = wp; } \
+ else { \
+ TMP_FREE; \
+ return -1.0; \
+ } \
+ if (xp != s->xp) MPN_COPY (xp, s->xp, s->size); \
+ if (yp != s->yp) MPN_COPY (yp, s->yp, s->size); \
+ \
+ speed_operand_src (s, xp, s->size); \
+ speed_operand_src (s, yp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_operand_dst (s, wp2, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_ADDSUB_N(function) \
+ SPEED_ROUTINE_MPN_ADDSUB_CALL \
+ (function (wp, wp2, xp, yp, s->size));
+
+#define SPEED_ROUTINE_MPN_ADDSUB_NC(function) \
+ SPEED_ROUTINE_MPN_ADDSUB_CALL \
+ (function (wp, wp2, xp, yp, s->size, 0));
+
+
+/* Doing an Nx1 gcd with the given r. */
+#define SPEED_ROUTINE_MPN_GCD_1N(function) \
+ { \
+ mp_ptr xp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (s->r != 0); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp); \
+ MPN_COPY (xp, s->xp, s->size); \
+ xp[0] |= refmpn_zero_p (xp, s->size); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (xp, s->size, s->r); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+/* SPEED_BLOCK_SIZE many one GCDs of s->size bits each. */
+
+#define SPEED_ROUTINE_MPN_GCD_1_CALL(setup, call) \
+ { \
+ unsigned i, j; \
+ mp_ptr px, py; \
+ mp_limb_t x_mask, y_mask; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (s->size <= mp_bits_per_limb); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (px, SPEED_BLOCK_SIZE, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (py, SPEED_BLOCK_SIZE, s->align_yp); \
+ MPN_COPY (px, s->xp_block, SPEED_BLOCK_SIZE); \
+ MPN_COPY (py, s->yp_block, SPEED_BLOCK_SIZE); \
+ \
+ x_mask = MP_LIMB_T_LOWBITMASK (s->size); \
+ y_mask = MP_LIMB_T_LOWBITMASK (s->r != 0 ? s->r : s->size); \
+ for (i = 0; i < SPEED_BLOCK_SIZE; i++) \
+ { \
+ px[i] &= x_mask; px[i] += (px[i] == 0); \
+ py[i] &= y_mask; py[i] += (py[i] == 0); \
+ setup; \
+ } \
+ \
+ speed_operand_src (s, px, SPEED_BLOCK_SIZE); \
+ speed_operand_src (s, py, SPEED_BLOCK_SIZE); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ j = SPEED_BLOCK_SIZE; \
+ do \
+ { \
+ call; \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ \
+ s->time_divisor = SPEED_BLOCK_SIZE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_GCD_1(function) \
+ SPEED_ROUTINE_MPN_GCD_1_CALL( , function (&px[j-1], 1, py[j-1]))
+
+#define SPEED_ROUTINE_MPN_GCD_11(function) \
+ SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1, py[i] |= 1), \
+ function (px[j-1], py[j-1]))
+
+/* Multiply limbs by (B+1). Then we get a gcd exceeding one limb, so
+ we can measure gcd_22 loop only, without gcd_11. */
+#define SPEED_ROUTINE_MPN_GCD_22(function) \
+ SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1, py[i] |= 1), \
+ function (px[j-1], px[j-1], py[j-1], py[j-1]))
+
+#define SPEED_ROUTINE_MPN_JACBASE(function) \
+ SPEED_ROUTINE_MPN_GCD_1_CALL \
+ ({ \
+ /* require x<y, y odd, y!=1 */ \
+ px[i] %= py[i]; \
+ px[i] |= 1; \
+ py[i] |= 1; \
+ if (py[i]==1) py[i]=3; \
+ }, \
+ function (px[j-1], py[j-1], 0))
+
+#define SPEED_ROUTINE_MPN_HGCD2(function) \
+ { \
+ unsigned i, j; \
+ struct hgcd_matrix1 m = {{{0,0},{0,0}}}; \
+ double t; \
+ \
+ speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE); \
+ speed_operand_src (s, s->yp_block, SPEED_BLOCK_SIZE); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ mp_limb_t chain = 0; \
+ do \
+ { \
+ for (j = 0; j < SPEED_BLOCK_SIZE; j+= 2) \
+ { \
+ /* randomized but successively dependent */ \
+ function (s->xp_block[j] | GMP_NUMB_HIGHBIT, \
+ s->xp_block[j+1] + chain, \
+ s->yp_block[j] | GMP_NUMB_HIGHBIT, \
+ s->yp_block[j+1], &m); \
+ chain += m.u[0][0]; \
+ } \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ /* make sure the compiler won't optimize away chain */ \
+ noop_1 (chain); \
+ \
+ s->time_divisor = SPEED_BLOCK_SIZE / 2; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc) \
+ { \
+ mp_size_t hgcd_init_itch, hgcd_itch; \
+ mp_ptr ap, bp, wp, tmp1; \
+ struct hgcd_matrix hgcd; \
+ int res; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ if (s->size < 2) \
+ return -1; \
+ \
+ TMP_MARK; \
+ \
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \
+ \
+ s->xp[s->size - 1] |= 1; \
+ s->yp[s->size - 1] |= 1; \
+ \
+ hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
+ hgcd_itch = itchfunc (s->size); \
+ \
+ SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, ap, s->size + 1); \
+ speed_operand_dst (s, bp, s->size + 1); \
+ speed_operand_dst (s, wp, hgcd_itch); \
+ speed_operand_dst (s, tmp1, hgcd_init_itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (bp, s->yp, s->size); \
+ mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
+ res = func (ap, bp, s->size, &hgcd, wp); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc) \
+ { \
+ mp_size_t hgcd_init_itch, hgcd_step_itch; \
+ mp_ptr ap, bp, wp, tmp1; \
+ struct hgcd_matrix hgcd; \
+ mp_size_t p = s->size/2; \
+ int res; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ if (s->size < 2) \
+ return -1; \
+ \
+ TMP_MARK; \
+ \
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \
+ \
+ s->xp[s->size - 1] |= 1; \
+ s->yp[s->size - 1] |= 1; \
+ \
+ hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
+ hgcd_step_itch = itchfunc (s->size, p); \
+ \
+ SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, ap, s->size + 1); \
+ speed_operand_dst (s, bp, s->size + 1); \
+ speed_operand_dst (s, wp, hgcd_step_itch); \
+ speed_operand_dst (s, tmp1, hgcd_init_itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (bp, s->yp, s->size); \
+ mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
+ res = func (&hgcd, ap, bp, s->size, p, wp); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ TMP_FREE; \
+ return t; \
+ }
+
+/* Run some GCDs of s->size limbs each. The number of different data values
+ is decreased as s->size**2, since GCD is a quadratic algorithm.
+ SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
+ though, because the plain gcd is about twice as fast as gcdext. */
+
+#define SPEED_ROUTINE_MPN_GCD_CALL(datafactor, call) \
+ { \
+ unsigned i; \
+ mp_size_t j, pieces, psize; \
+ mp_ptr wp, wp2, xtmp, ytmp, px, py; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size+1, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp2, s->size+1, s->align_wp2); \
+ \
+ pieces = SPEED_BLOCK_SIZE * datafactor / s->size / s->size; \
+ pieces = MIN (pieces, SPEED_BLOCK_SIZE / s->size); \
+ pieces = MAX (pieces, 1); \
+ \
+ psize = pieces * s->size; \
+ px = TMP_ALLOC_LIMBS (psize); \
+ py = TMP_ALLOC_LIMBS (psize); \
+ MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize); \
+ MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize); \
+ \
+ /* Requirements: x >= y, y must be odd, high limbs != 0. \
+ No need to ensure random numbers are really great. */ \
+ for (j = 0; j < pieces; j++) \
+ { \
+ mp_ptr x = px + j * s->size; \
+ mp_ptr y = py + j * s->size; \
+ if (x[s->size - 1] == 0) x[s->size - 1] = 1; \
+ if (y[s->size - 1] == 0) y[s->size - 1] = 1; \
+ \
+ if (x[s->size - 1] < y[s->size - 1]) \
+ MP_LIMB_T_SWAP (x[s->size - 1], y[s->size - 1]); \
+ else if (x[s->size - 1] == y[s->size - 1]) \
+ { \
+ x[s->size - 1] = 2; \
+ y[s->size - 1] = 1; \
+ } \
+ y[0] |= 1; \
+ } \
+ \
+ speed_operand_src (s, px, psize); \
+ speed_operand_src (s, py, psize); \
+ speed_operand_dst (s, xtmp, s->size); \
+ speed_operand_dst (s, ytmp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ j = pieces; \
+ do \
+ { \
+ MPN_COPY (xtmp, px+(j - 1)*s->size, s->size); \
+ MPN_COPY (ytmp, py+(j - 1)*s->size, s->size); \
+ call; \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ \
+ s->time_divisor = pieces; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_GCD(function) \
+ SPEED_ROUTINE_MPN_GCD_CALL (8, function (wp, xtmp, s->size, ytmp, s->size))
+
+#define SPEED_ROUTINE_MPN_GCDEXT(function) \
+ SPEED_ROUTINE_MPN_GCD_CALL \
+ (4, { mp_size_t wp2size; \
+ function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size); })
+
+
+#define SPEED_ROUTINE_MPN_GCDEXT_ONE(function) \
+ { \
+ unsigned i; \
+ mp_size_t j, pieces, psize, wp2size; \
+ mp_ptr wp, wp2, xtmp, ytmp, px, py; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ \
+ SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1, s->align_yp); \
+ MPN_COPY (xtmp, s->xp, s->size); \
+ MPN_COPY (ytmp, s->yp, s->size); \
+ \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size+1, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp2, s->size+1, s->align_wp2); \
+ \
+ pieces = SPEED_BLOCK_SIZE / 3; \
+ psize = 3 * pieces; \
+ px = TMP_ALLOC_LIMBS (psize); \
+ py = TMP_ALLOC_LIMBS (psize); \
+ MPN_COPY (px, s->xp_block, psize); \
+ MPN_COPY (py, s->yp_block, psize); \
+ \
+ /* x must have at least as many bits as y, \
+ high limbs must be non-zero */ \
+ for (j = 0; j < pieces; j++) \
+ { \
+ mp_ptr x = px+3*j; \
+ mp_ptr y = py+3*j; \
+ x[2] += (x[2] == 0); \
+ y[2] += (y[2] == 0); \
+ if (x[2] < y[2]) \
+ MP_LIMB_T_SWAP (x[2], y[2]); \
+ } \
+ \
+ speed_operand_src (s, px, psize); \
+ speed_operand_src (s, py, psize); \
+ speed_operand_dst (s, xtmp, s->size); \
+ speed_operand_dst (s, ytmp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ mp_ptr x = px; \
+ mp_ptr y = py; \
+ mp_ptr xth = &xtmp[s->size-3]; \
+ mp_ptr yth = &ytmp[s->size-3]; \
+ j = pieces; \
+ do \
+ { \
+ xth[0] = x[0], xth[1] = x[1], xth[2] = x[2]; \
+ yth[0] = y[0], yth[1] = y[1], yth[2] = y[2]; \
+ \
+ ytmp[0] |= 1; /* y must be odd, */ \
+ \
+ function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size); \
+ \
+ x += 3; \
+ y += 3; \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ \
+ s->time_divisor = pieces; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_GMP_PRIMESIEVE(function) \
+{ \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ mp_limb_t a = s->size * GMP_LIMB_BITS * 3; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, a); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+}
+
+
+/* Calculate nextprime(n) for random n of s->size bits (not limbs). */
+#define SPEED_ROUTINE_MPZ_NEXTPRIME(function) \
+ { \
+ unsigned i, j; \
+ mpz_t wp, n; \
+ double t; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 10); \
+ \
+ mpz_init (wp); \
+ mpz_init_set_n (n, s->xp, s->size); \
+ /* limit to s->size bits, as this function is very slow */ \
+ mpz_tdiv_r_2exp (n, n, s->size); \
+ /* set high bits so operand and result are genaral s->size bits */ \
+ mpz_setbit (n, s->size - 1); \
+ mpz_clrbit (n, s->size - 2); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ /* nextprime timing is variable, so average over many calls */ \
+ j = SPEED_BLOCK_SIZE - 1; \
+ /* starts on random, after measures prime to next prime */ \
+ function (wp, n); \
+ do \
+ { \
+ function (wp, wp); \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ mpz_clear (wp); \
+ mpz_clear (n); \
+ \
+ s->time_divisor = SPEED_BLOCK_SIZE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPZ_JACOBI(function) \
+ { \
+ mpz_t a, b; \
+ unsigned i; \
+ mp_size_t j, pieces, psize; \
+ mp_ptr px, py; \
+ double t; \
+ TMP_DECL; \
+ \
+ TMP_MARK; \
+ pieces = SPEED_BLOCK_SIZE / MAX (s->size, 1); \
+ pieces = MAX (pieces, 1); \
+ s->time_divisor = pieces; \
+ \
+ psize = pieces * s->size; \
+ px = TMP_ALLOC_LIMBS (psize); \
+ py = TMP_ALLOC_LIMBS (psize); \
+ MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize); \
+ MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize); \
+ \
+ for (j = 0; j < pieces; j++) \
+ { \
+ mp_ptr x = px+j*s->size; \
+ mp_ptr y = py+j*s->size; \
+ \
+ /* y odd */ \
+ y[0] |= 1; \
+ \
+ /* high limbs non-zero */ \
+ if (x[s->size-1] == 0) x[s->size-1] = 1; \
+ if (y[s->size-1] == 0) y[s->size-1] = 1; \
+ } \
+ \
+ SIZ(a) = s->size; \
+ SIZ(b) = s->size; \
+ \
+ speed_operand_src (s, px, psize); \
+ speed_operand_src (s, py, psize); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ j = pieces; \
+ do \
+ { \
+ PTR(a) = px+(j-1)*s->size; \
+ PTR(b) = py+(j-1)*s->size; \
+ function (a, b); \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_DIVREM_2(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_limb_t yp[2]; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ /* source is destroyed */ \
+ MPN_COPY (xp, s->xp, s->size); \
+ \
+ /* divisor must be normalized */ \
+ MPN_COPY (yp, s->yp_block, 2); \
+ yp[1] |= GMP_NUMB_HIGHBIT; \
+ \
+ speed_operand_src (s, xp, s->size); \
+ speed_operand_src (s, yp, 2); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, 0, xp, s->size, yp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_DIV_QR_1(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_limb_t d; \
+ mp_limb_t r; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ d = s->r; \
+ if (d == 0) \
+ d = 1; \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ r = function (wp, wp+s->size-1, s->xp, s->size, d); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_DIV_QR_1N_PI1(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_limb_t d, dinv; \
+ mp_limb_t r; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ d = s->r; \
+ /* divisor must be normalized */ \
+ SPEED_RESTRICT_COND (d & GMP_NUMB_HIGHBIT); \
+ invert_limb (dinv, d); \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ r = function (wp, s->xp, s->size, 0, d, dinv); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_DIV_QR_2(function, norm) \
+ { \
+ mp_ptr wp, xp; \
+ mp_limb_t yp[2]; \
+ mp_limb_t rp[2]; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ /* divisor must be normalized */ \
+ MPN_COPY (yp, s->yp_block, 2); \
+ if (norm) \
+ yp[1] |= GMP_NUMB_HIGHBIT; \
+ else \
+ { \
+ yp[1] &= ~GMP_NUMB_HIGHBIT; \
+ if (yp[1] == 0) \
+ yp[1] = 1; \
+ } \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, yp, 2); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_operand_dst (s, rp, 2); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, rp, s->xp, s->size, yp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MODLIMB_INVERT(function) \
+ { \
+ unsigned i, j; \
+ mp_ptr xp; \
+ mp_limb_t n = 1; \
+ double t; \
+ \
+ xp = s->xp_block-1; \
+ \
+ speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ j = SPEED_BLOCK_SIZE; \
+ do \
+ { \
+ /* randomized but successively dependent */ \
+ n += (xp[j] << 1); \
+ \
+ function (n, n); \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ /* make sure the compiler won't optimize away n */ \
+ noop_1 (n); \
+ \
+ s->time_divisor = SPEED_BLOCK_SIZE; \
+ return t; \
+ }
+
+
+#define SPEED_ROUTINE_MPN_SQRTROOT_CALL(call) \
+ { \
+ mp_ptr wp, wp2; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_operand_dst (s, wp2, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+/* Calculate worst case for perfect_power
+ Worst case is multiple prime factors larger than trial div limit. */
+#define SPEED_ROUTINE_MPN_PERFECT_POWER(function) \
+ { \
+ mpz_t r; \
+ unsigned i, power; \
+ double t; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 10); \
+ \
+ mpz_init (r); \
+ power = s->size * GMP_NUMB_BITS / 17; \
+ mpz_ui_pow_ui(r, (1 << 17) - 1, power - 1); \
+ mpz_mul_ui(r, r, (1 << 16) + 1); /* larger than 1000th prime */ \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (PTR(r), SIZ(r)); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ mpz_clear (r); \
+ return t; \
+ }
+
+/* Calculate worst case (larger prime) for perfect_square */
+#define SPEED_ROUTINE_MPN_PERFECT_SQUARE(function) \
+ { \
+ mpz_t r; \
+ unsigned i; \
+ double t; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ mpz_init_set_n (r, s->xp, s->size / 2); \
+ mpz_setbit (r, s->size * GMP_NUMB_BITS / 2 - 1); \
+ mpz_mul (r, r, r); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (PTR(r), SIZ(r)); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ mpz_clear (r); \
+ return t; \
+ }
+
+
+/* s->size controls the number of limbs in the input, s->r is the base, or
+ decimal by default. */
+#define SPEED_ROUTINE_MPN_GET_STR(function) \
+ { \
+ unsigned char *wp; \
+ mp_size_t wn; \
+ mp_ptr xp; \
+ int base; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ base = s->r == 0 ? 10 : s->r; \
+ SPEED_RESTRICT_COND (base >= 2 && base <= 256); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (xp, s->size + 1, s->align_xp); \
+ \
+ MPN_SIZEINBASE (wn, s->xp, s->size, base); \
+ wp = (unsigned char *) TMP_ALLOC (wn); \
+ \
+ /* use this during development to guard against overflowing wp */ \
+ /* \
+ MPN_COPY (xp, s->xp, s->size); \
+ ASSERT_ALWAYS (mpn_get_str (wp, base, xp, s->size) <= wn); \
+ */ \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, xp, s->size); \
+ speed_operand_dst (s, (mp_ptr) wp, wn/GMP_LIMB_BYTES); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ MPN_COPY (xp, s->xp, s->size); \
+ function (wp, base, xp, s->size); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+/* s->size controls the number of digits in the input, s->r is the base, or
+ decimal by default. */
+#define SPEED_ROUTINE_MPN_SET_STR_CALL(call) \
+ { \
+ unsigned char *xp; \
+ mp_ptr wp; \
+ mp_size_t wn; \
+ unsigned i; \
+ int base; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ base = s->r == 0 ? 10 : s->r; \
+ SPEED_RESTRICT_COND (base >= 2 && base <= 256); \
+ \
+ TMP_MARK; \
+ \
+ xp = (unsigned char *) TMP_ALLOC (s->size); \
+ for (i = 0; i < s->size; i++) \
+ xp[i] = s->xp[i] % base; \
+ \
+ LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base); \
+ SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp); \
+ \
+ /* use this during development to check wn is big enough */ \
+ /* \
+ ASSERT_ALWAYS (mpn_set_str (wp, xp, s->size, base) <= wn); \
+ */ \
+ \
+ speed_operand_src (s, (mp_ptr) xp, s->size/GMP_LIMB_BYTES); \
+ speed_operand_dst (s, wp, wn); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+
+/* Run an accel gcd find_a() function over various data values. A set of
+ values is used in case some run particularly fast or slow. The size
+ parameter is ignored, the amount of data tested is fixed. */
+
+#define SPEED_ROUTINE_MPN_GCD_FINDA(function) \
+ { \
+ unsigned i, j; \
+ mp_limb_t cp[SPEED_BLOCK_SIZE][2]; \
+ double t; \
+ TMP_DECL; \
+ \
+ TMP_MARK; \
+ \
+ /* low must be odd, high must be non-zero */ \
+ for (i = 0; i < SPEED_BLOCK_SIZE; i++) \
+ { \
+ cp[i][0] = s->xp_block[i] | 1; \
+ cp[i][1] = s->yp_block[i] + (s->yp_block[i] == 0); \
+ } \
+ \
+ speed_operand_src (s, &cp[0][0], 2*SPEED_BLOCK_SIZE); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ j = SPEED_BLOCK_SIZE; \
+ do \
+ { \
+ function (cp[j-1]); \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ \
+ s->time_divisor = SPEED_BLOCK_SIZE; \
+ return t; \
+ }
+
+
+/* "call" should do "count_foo_zeros(c,n)".
+ Give leading=1 if foo is leading zeros, leading=0 for trailing.
+ Give zero=1 if n=0 is allowed in the call, zero=0 if not. */
+
+#define SPEED_ROUTINE_COUNT_ZEROS_A(leading, zero) \
+ { \
+ mp_ptr xp; \
+ int i, c; \
+ unsigned j; \
+ mp_limb_t n; \
+ double t; \
+ TMP_DECL; \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (xp, SPEED_BLOCK_SIZE, s->align_xp); \
+ \
+ if (! speed_routine_count_zeros_setup (s, xp, leading, zero)) \
+ return -1.0; \
+ speed_operand_src (s, xp, SPEED_BLOCK_SIZE); \
+ speed_cache_fill (s); \
+ \
+ c = 0; \
+ speed_starttime (); \
+ j = s->reps; \
+ do { \
+ for (i = 0; i < SPEED_BLOCK_SIZE; i++) \
+ { \
+ n = xp[i]; \
+ n ^= c; \
+
+#define SPEED_ROUTINE_COUNT_ZEROS_B() \
+ } \
+ } while (--j != 0); \
+ t = speed_endtime (); \
+ \
+ /* don't let c go dead */ \
+ noop_1 (c); \
+ \
+ s->time_divisor = SPEED_BLOCK_SIZE; \
+ \
+ TMP_FREE; \
+ return t; \
+ } \
+
+#define SPEED_ROUTINE_COUNT_ZEROS_C(call, leading, zero) \
+ do { \
+ SPEED_ROUTINE_COUNT_ZEROS_A (leading, zero); \
+ call; \
+ SPEED_ROUTINE_COUNT_ZEROS_B (); \
+ } while (0) \
+
+#define SPEED_ROUTINE_COUNT_LEADING_ZEROS_C(call,zero) \
+ SPEED_ROUTINE_COUNT_ZEROS_C (call, 1, zero)
+#define SPEED_ROUTINE_COUNT_LEADING_ZEROS(fun) \
+ SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 1, 0)
+
+#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS_C(call,zero) \
+ SPEED_ROUTINE_COUNT_ZEROS_C (call, 0, zero)
+#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS(call) \
+ SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 0, 0)
+
+
+#define SPEED_ROUTINE_INVERT_LIMB_CALL(call) \
+ { \
+ unsigned i, j; \
+ mp_limb_t d, dinv=0; \
+ mp_ptr xp = s->xp_block - 1; \
+ \
+ s->time_divisor = SPEED_BLOCK_SIZE; \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ j = SPEED_BLOCK_SIZE; \
+ do \
+ { \
+ d = dinv ^ xp[j]; \
+ d |= GMP_LIMB_HIGHBIT; \
+ do { call; } while (0); \
+ } \
+ while (--j != 0); \
+ } \
+ while (--i != 0); \
+ \
+ /* don't let the compiler optimize everything away */ \
+ noop_1 (dinv); \
+ \
+ return speed_endtime(); \
+ }
+
+
+#define SPEED_ROUTINE_MPN_BACK_TO_BACK(function) \
+ { \
+ unsigned i; \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (); \
+ while (--i != 0); \
+ return speed_endtime (); \
+ }
+
+
+#define SPEED_ROUTINE_MPN_ZERO_CALL(call) \
+ { \
+ mp_ptr wp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 0); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_ZERO(function) \
+ SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))
+
+
+#endif
diff --git a/gmp-6.3.0/tune/sqr_basecase.c b/gmp-6.3.0/tune/sqr_basecase.c
new file mode 100644
index 0000000..93adac5
--- /dev/null
+++ b/gmp-6.3.0/tune/sqr_basecase.c
@@ -0,0 +1,2 @@
+/* not sure that an empty file can compile, so put in a dummy */
+int sqr_basecase_dummy;
diff --git a/gmp-6.3.0/tune/time.c b/gmp-6.3.0/tune/time.c
new file mode 100644
index 0000000..5ba482b
--- /dev/null
+++ b/gmp-6.3.0/tune/time.c
@@ -0,0 +1,1598 @@
+/* Time routines for speed measurements.
+
+Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* Usage:
+
+ The code in this file implements the lowest level of time measuring,
+ simple one-time measuring of time between two points.
+
+ void speed_starttime (void)
+ double speed_endtime (void)
+ Call speed_starttime to start measuring, and then call speed_endtime
+ when done.
+
+ speed_endtime returns the time taken, in seconds. Or if the timebase
+ is in CPU cycles and the CPU frequency is unknown then speed_endtime
+ returns cycles. Applications can identify the cycles return by
+ checking for speed_cycletime (described below) equal to 1.0.
+
+ If some sort of temporary glitch occurs then speed_endtime returns
+ 0.0. Currently this is for various cases where a negative time has
+ occurred. This unfortunately occurs with getrusage on some systems,
+ and with the hppa cycle counter on hpux.
+
+ double speed_cycletime
+ The time in seconds for each CPU cycle. For example on a 100 MHz CPU
+ this would be 1.0e-8.
+
+ If the CPU frequency is unknown, then speed_cycletime is either 0.0
+ or 1.0. It's 0.0 when speed_endtime is returning seconds, or it's
+ 1.0 when speed_endtime is returning cycles.
+
+ It may be noted that "speed_endtime() / speed_cycletime" gives a
+ measured time in cycles, irrespective of whether speed_endtime is
+ returning cycles or seconds. (Assuming cycles can be had, ie. it's
+ either cycles already or the cpu frequency is known. See also
+ speed_cycletime_need_cycles below.)
+
+ double speed_unittime
+ The unit of time measurement accuracy for the timing method in use.
+ This is in seconds or cycles, as per speed_endtime.
+
+ char speed_time_string[]
+ A null-terminated string describing the time method in use.
+
+ void speed_time_init (void)
+ Initialize time measuring. speed_starttime() does this
+ automatically, so it's only needed if an application wants to inspect
+ the above global variables before making a measurement.
+
+ int speed_precision
+ The intended accuracy of time measurements. speed_measure() in
+ common.c for instance runs target routines with enough repetitions so
+ it takes at least "speed_unittime * speed_precision" (this expression
+ works for both cycles or seconds from speed_endtime).
+
+ A program can provide an option so the user to set speed_precision.
+ If speed_precision is zero when speed_time_init or speed_starttime
+ first run then it gets a default based on the measuring method
+ chosen. (More precision for higher accuracy methods.)
+
+ void speed_cycletime_need_seconds (void)
+ Call this to demand that speed_endtime will return seconds, and not
+ cycles. If only cycles are available then an error is printed and
+ the program exits.
+
+ void speed_cycletime_need_cycles (void)
+ Call this to demand that speed_cycletime is non-zero, so that
+ "speed_endtime() / speed_cycletime" will give times in cycles.
+
+
+
+ Notes:
+
+ Various combinations of cycle counter, read_real_time(), getrusage(),
+ gettimeofday() and times() can arise, according to which are available
+ and their precision.
+
+
+ Allowing speed_endtime() to return either seconds or cycles is only a
+ slight complication and makes it possible for the speed program to do
+ some sensible things without demanding the CPU frequency. If seconds are
+ being measured then it can always print seconds, and if cycles are being
+ measured then it can always print them without needing to know how long
+ they are. Also the tune program doesn't care at all what the units are.
+
+ GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
+ fail. This will be needed if times in seconds are wanted but a cycle
+ counter is being used, or if times in cycles are wanted but getrusage or
+ another seconds based timer is in use.
+
+ If the measuring method uses a cycle counter but supplements it with
+ getrusage or the like, then knowing the CPU frequency is mandatory since
+ the code compares values from the two.
+
+
+ Not done:
+
+ Solaris gethrtime() seems no more than a slow way to access the Sparc V9
+ cycle counter. gethrvtime() seems to be relevant only to light weight
+ processes, it doesn't for instance give nanosecond virtual time. So
+ neither of these are used.
+
+
+ Bugs:
+
+ getrusage_microseconds_p is fundamentally flawed, getrusage and
+ gettimeofday can have resolutions other than clock ticks or microseconds,
+ for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
+
+
+ Enhancements:
+
+ The SGI hardware counter has 64 bits on some machines, which could be
+ used when available. But perhaps 32 bits is enough range, and then rely
+ on the getrusage supplement.
+
+ Maybe getrusage (or times) should be used as a supplement for any
+ wall-clock measuring method. Currently a wall clock with a good range
+ (eg. a 64-bit cycle counter) is used without a supplement.
+
+ On PowerPC the timebase registers could be used, but would have to do
+ something to find out the speed. On 6xx chips it's normally 1/4 bus
+ speed, on 4xx chips it's either that or an external clock. Measuring
+ against gettimeofday might be ok. */
+
+
+#include "config.h"
+
+#include <errno.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h> /* for getenv() */
+
+#if HAVE_FCNTL_H
+#include <fcntl.h> /* for open() */
+#endif
+
+#if HAVE_STDINT_H
+#include <stdint.h> /* for uint64_t */
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h> /* for sysconf() */
+#endif
+
+#include <sys/types.h>
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h> /* for struct timeval */
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h> /* for mmap() */
+#endif
+
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h> /* for struct rusage */
+#endif
+
+#if HAVE_SYS_SYSSGI_H
+#include <sys/syssgi.h> /* for syssgi() */
+#endif
+
+#if HAVE_SYS_SYSTEMCFG_H
+#include <sys/systemcfg.h> /* for RTC_POWER on AIX */
+#endif
+
+#if HAVE_SYS_TIMES_H
+#include <sys/times.h> /* for times() and struct tms */
+#endif
+
+#include "gmp-impl.h"
+
+#include "speed.h"
+
+
+/* strerror is only used for some stuff on newish systems, no need to have a
+ proper replacement */
+#if ! HAVE_STRERROR
+#define strerror(n) "<strerror not available>"
+#endif
+
+
+char speed_time_string[256];
+int speed_precision = 0;
+double speed_unittime;
+double speed_cycletime = 0.0;
+
+
+/* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
+ native cc */
+#define M_2POWU (((double) INT_MAX + 1.0) * 2.0)
+
+#define M_2POW32 4294967296.0
+#define M_2POW64 (M_2POW32 * M_2POW32)
+
+
+/* Conditionals for the time functions available are done with normal C
+ code, which is a lot easier than wildly nested preprocessor directives.
+
+ The choice of what to use is partly made at run-time, according to
+ whether the cycle counter works and the measured accuracy of getrusage
+ and gettimeofday.
+
+ A routine that's not available won't be getting called, but is an abort()
+ to be sure it isn't called mistakenly.
+
+ It can be assumed that if a function exists then its data type will, but
+ if the function doesn't then the data type might or might not exist, so
+ the type can't be used unconditionally. The "struct_rusage" etc macros
+ provide dummies when the respective function doesn't exist. */
+
+
+#if HAVE_SPEED_CYCLECOUNTER
+static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
+#else
+static const int have_cycles = 0;
+#define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available)
+#endif
+
+/* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
+ microseconds. Same #ifdefs here as in longlong.h. */
+#if defined (__GNUC__) && ! defined (NO_ASM) \
+ && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
+static const int have_stck = 1;
+static const int use_stck = 1; /* always use when available */
+typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */
+#define STCK(timestamp) \
+ do { \
+ asm ("stck %0" : "=Q" (timestamp)); \
+ } while (0)
+#else
+static const int have_stck = 0;
+static const int use_stck = 0;
+typedef unsigned long stck_t; /* dummy */
+#define STCK(timestamp) ASSERT_FAIL (stck instruction not available)
+#endif
+#define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */
+
+/* mftb
+ Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
+ and a loop (see powerpc64.asm). */
+#if HAVE_HOST_CPU_FAMILY_powerpc
+static const int have_mftb = 1;
+#if defined (__GNUC__) && ! defined (NO_ASM)
+#define MFTB(a) \
+ do { \
+ unsigned __h1, __l, __h2; \
+ do { \
+ asm volatile ("mftbu %0\n" \
+ "mftb %1\n" \
+ "mftbu %2" \
+ : "=r" (__h1), \
+ "=r" (__l), \
+ "=r" (__h2)); \
+ } while (__h1 != __h2); \
+ a[0] = __l; \
+ a[1] = __h1; \
+ } while (0)
+#else
+#define MFTB(a) mftb_function (a)
+#endif
+#else /* ! powerpc */
+static const int have_mftb = 0;
+#define MFTB(a) \
+ do { \
+ a[0] = 0; \
+ a[1] = 0; \
+ ASSERT_FAIL (mftb not available); \
+ } while (0)
+#endif
+
+/* Unicos 10.X has syssgi(), but not mmap(). */
+#if HAVE_SYSSGI && HAVE_MMAP
+static const int have_sgi = 1;
+#else
+static const int have_sgi = 0;
+#endif
+
+#if HAVE_READ_REAL_TIME
+static const int have_rrt = 1;
+#else
+static const int have_rrt = 0;
+#define read_real_time(t,s) ASSERT_FAIL (read_real_time not available)
+#define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available)
+#define RTC_POWER 1
+#define RTC_POWER_PC 2
+#define timebasestruct_t struct timebasestruct_dummy
+struct timebasestruct_dummy {
+ int flag;
+ unsigned int tb_high;
+ unsigned int tb_low;
+};
+#endif
+
+#if HAVE_CLOCK_GETTIME
+static const int have_cgt = 1;
+#define struct_timespec struct timespec
+#else
+static const int have_cgt = 0;
+#define struct_timespec struct timespec_dummy
+#define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1)
+#define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1)
+#endif
+
+#if HAVE_GETRUSAGE
+static const int have_grus = 1;
+#define struct_rusage struct rusage
+#else
+static const int have_grus = 0;
+#define getrusage(n,ru) ASSERT_FAIL (getrusage not available)
+#define struct_rusage struct rusage_dummy
+#endif
+
+#if HAVE_GETTIMEOFDAY
+static const int have_gtod = 1;
+#define struct_timeval struct timeval
+#else
+static const int have_gtod = 0;
+#define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available)
+#define struct_timeval struct timeval_dummy
+#endif
+
+#if HAVE_TIMES
+static const int have_times = 1;
+#define struct_tms struct tms
+#else
+static const int have_times = 0;
+#define times(tms) ASSERT_FAIL (times not available)
+#define struct_tms struct tms_dummy
+#endif
+
+struct tms_dummy {
+ long tms_utime;
+};
+struct timeval_dummy {
+ long tv_sec;
+ long tv_usec;
+};
+struct rusage_dummy {
+ struct_timeval ru_utime;
+};
+struct timespec_dummy {
+ long tv_sec;
+ long tv_nsec;
+};
+
+static int use_cycles;
+static int use_mftb;
+static int use_sgi;
+static int use_rrt;
+static int use_cgt;
+static int use_gtod;
+static int use_grus;
+static int use_times;
+static int use_tick_boundary;
+
+static unsigned start_cycles[2];
+static stck_t start_stck;
+static unsigned start_mftb[2];
+static unsigned start_sgi;
+static timebasestruct_t start_rrt;
+static struct_timespec start_cgt;
+static struct_rusage start_grus;
+static struct_timeval start_gtod;
+static struct_tms start_times;
+
+static double cycles_limit = 1e100;
+static double mftb_unittime;
+static double sgi_unittime;
+static double cgt_unittime;
+static double grus_unittime;
+static double gtod_unittime;
+static double times_unittime;
+
+/* for RTC_POWER format, ie. seconds and nanoseconds */
+#define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9)
+
+
+/* Return a string representing a time in seconds, nicely formatted.
+ Eg. "10.25ms". */
+char *
+unittime_string (double t)
+{
+ static char buf[128];
+
+ const char *unit;
+ int prec;
+
+ /* choose units and scale */
+ if (t < 1e-6)
+ t *= 1e9, unit = "ns";
+ else if (t < 1e-3)
+ t *= 1e6, unit = "us";
+ else if (t < 1.0)
+ t *= 1e3, unit = "ms";
+ else
+ unit = "s";
+
+ /* want 4 significant figures */
+ if (t < 1.0)
+ prec = 4;
+ else if (t < 10.0)
+ prec = 3;
+ else if (t < 100.0)
+ prec = 2;
+ else
+ prec = 1;
+
+ sprintf (buf, "%.*f%s", prec, t, unit);
+ return buf;
+}
+
+
+static jmp_buf cycles_works_buf;
+
+static RETSIGTYPE
+cycles_works_handler (int sig)
+{
+ longjmp (cycles_works_buf, 1);
+}
+
+int
+cycles_works_p (void)
+{
+ static int result = -1;
+
+ if (result != -1)
+ goto done;
+
+ /* FIXME: On linux, the cycle counter is not saved and restored over
+ * context switches, making it almost useless for precise cputime
+ * measurements. When available, it's better to use clock_gettime,
+ * which seems to have reasonable accuracy (tested on x86_32,
+ * linux-2.6.26, glibc-2.7). However, there are also some linux
+ * systems where clock_gettime is broken in one way or the other,
+ * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
+ * kind-of implemented but broken (needs code to detect that), and
+ * on those systems a wall-clock cycle counter is the least bad
+ * fallback.
+ *
+ * So we need some code to disable the cycle counter on some but not
+ * all linux systems. */
+#ifdef SIGILL
+ {
+ RETSIGTYPE (*old_handler) (int);
+ unsigned cycles[2];
+
+ old_handler = signal (SIGILL, cycles_works_handler);
+ if (old_handler == SIG_ERR)
+ {
+ if (speed_option_verbose)
+ printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
+ goto yes;
+ }
+ if (setjmp (cycles_works_buf))
+ {
+ if (speed_option_verbose)
+ printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
+ result = 0;
+ goto done;
+ }
+ speed_cyclecounter (cycles);
+ signal (SIGILL, old_handler);
+ if (speed_option_verbose)
+ printf ("cycles_works_p(): speed_cyclecounter() works\n");
+ }
+#else
+
+ if (speed_option_verbose)
+ printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
+ goto yes;
+#endif
+
+ yes:
+ result = 1;
+
+ done:
+ return result;
+}
+
+
+/* The number of clock ticks per second, but looking at sysconf rather than
+ just CLK_TCK, where possible. */
+long
+clk_tck (void)
+{
+ static long result = -1L;
+ if (result != -1L)
+ return result;
+
+#if HAVE_SYSCONF
+ result = sysconf (_SC_CLK_TCK);
+ if (result != -1L)
+ {
+ if (speed_option_verbose)
+ printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
+ return result;
+ }
+
+ fprintf (stderr,
+ "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
+#endif
+
+#ifdef CLK_TCK
+ result = CLK_TCK;
+ if (speed_option_verbose)
+ printf ("CLK_TCK is %ld per second\n", result);
+ return result;
+#else
+ fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
+ abort ();
+#endif
+}
+
+
+/* If two times can be observed less than half a clock tick apart, then
+ assume "get" is microsecond accurate.
+
+ Two times only 1 microsecond apart are not believed, since some kernels
+ take it upon themselves to ensure gettimeofday doesn't return the same
+ value twice, for the benefit of applications using it for a timestamp.
+ This is obviously very stupid given the speed of CPUs these days.
+
+ Making "reps" many calls to noop_1() is designed to waste some CPU, with
+ a view to getting measurements 2 microseconds (or more) apart. "reps" is
+ increased progressively until such a period is seen.
+
+ The outer loop "attempts" are just to allow for any random nonsense or
+ system load upsetting the measurements (ie. making two successive calls
+ to "get" come out as a longer interval than normal).
+
+ Bugs:
+
+ The assumption that any interval less than a half tick implies
+ microsecond resolution is obviously fairly rash, the true resolution
+ could be anything between a microsecond and that half tick. Perhaps
+ something special would have to be done on a system where this is the
+ case, since there's no obvious reliable way to detect it
+ automatically. */
+
+#define MICROSECONDS_P(name, type, get, sec, usec) \
+ { \
+ static int result = -1; \
+ type st, et; \
+ long dt, half_tick; \
+ unsigned attempt, reps, i, j; \
+ \
+ if (result != -1) \
+ return result; \
+ \
+ result = 0; \
+ half_tick = (1000000L / clk_tck ()) / 2; \
+ \
+ for (attempt = 0; attempt < 5; attempt++) \
+ { \
+ reps = 0; \
+ for (;;) \
+ { \
+ get (st); \
+ for (i = 0; i < reps; i++) \
+ for (j = 0; j < 100; j++) \
+ noop_1 (CNST_LIMB(0)); \
+ get (et); \
+ \
+ dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \
+ \
+ if (speed_option_verbose >= 2) \
+ printf ("%s attempt=%u, reps=%u, dt=%ld\n", \
+ name, attempt, reps, dt); \
+ \
+ if (dt >= 2) \
+ break; \
+ \
+ reps = (reps == 0 ? 1 : 2*reps); \
+ if (reps == 0) \
+ break; /* uint overflow, not normal */ \
+ } \
+ \
+ if (dt < half_tick) \
+ { \
+ result = 1; \
+ break; \
+ } \
+ } \
+ \
+ if (speed_option_verbose) \
+ { \
+ if (result) \
+ printf ("%s is microsecond accurate\n", name); \
+ else \
+ printf ("%s is only %s clock tick accurate\n", \
+ name, unittime_string (1.0/clk_tck())); \
+ } \
+ return result; \
+ }
+
+
+int
+gettimeofday_microseconds_p (void)
+{
+#define call_gettimeofday(t) gettimeofday (&(t), NULL)
+#define timeval_tv_sec(t) ((t).tv_sec)
+#define timeval_tv_usec(t) ((t).tv_usec)
+ MICROSECONDS_P ("gettimeofday", struct_timeval,
+ call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
+}
+
+int
+getrusage_microseconds_p (void)
+{
+#define call_getrusage(t) getrusage (0, &(t))
+#define rusage_tv_sec(t) ((t).ru_utime.tv_sec)
+#define rusage_tv_usec(t) ((t).ru_utime.tv_usec)
+ MICROSECONDS_P ("getrusage", struct_rusage,
+ call_getrusage, rusage_tv_sec, rusage_tv_usec);
+}
+
+/* Test whether getrusage goes backwards, return non-zero if it does
+ (suggesting it's flawed).
+
+ On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
+ microsecond accurate, but has been seen remaining unchanged after many
+ microseconds have elapsed. It also regularly goes backwards by 1000 to
+ 5000 usecs, this has been seen after between 500 and 4000 attempts taking
+ perhaps 0.03 seconds. We consider this too broken for good measuring.
+ We used to have configure pretend getrusage didn't exist on this system,
+ but a runtime test should be more reliable, since we imagine the problem
+ is not confined to just this exact system tuple. */
+
+int
+getrusage_backwards_p (void)
+{
+ static int result = -1;
+ struct rusage start, prev, next;
+ long d;
+ int i;
+
+ if (result != -1)
+ return result;
+
+ getrusage (0, &start);
+ memcpy (&next, &start, sizeof (next));
+
+ result = 0;
+ i = 0;
+ for (;;)
+ {
+ memcpy (&prev, &next, sizeof (prev));
+ getrusage (0, &next);
+
+ if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
+ || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
+ && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
+ {
+ if (speed_option_verbose)
+ printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
+ i,
+ (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
+ (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
+ result = 1;
+ break;
+ }
+
+ /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
+ attempts, whichever comes first */
+ d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
+ + (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
+ i++;
+ if (i > 50000 || (i > 1000 && d > 100000))
+ break;
+ }
+
+ return result;
+}
+
+/* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
+ of glibc (some time post 2.2).
+
+ CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
+ defined, but returning -1 for an error). */
+
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+# define CGT_ID CLOCK_PROCESS_CPUTIME_ID
+#else
+# ifdef CLOCK_VIRTUAL
+# define CGT_ID CLOCK_VIRTUAL
+# endif
+#endif
+#ifdef CGT_ID
+const int have_cgt_id = 1;
+#else
+const int have_cgt_id = 0;
+# define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1)
+#endif
+
+#define CGT_DELAY_COUNT 1000
+
+int
+cgt_works_p (void)
+{
+ static int result = -1;
+ struct_timespec unit;
+
+ if (! have_cgt)
+ return 0;
+
+ if (! have_cgt_id)
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime don't know what ID to use\n");
+ result = 0;
+ return result;
+ }
+
+ if (result != -1)
+ return result;
+
+ /* trial run to see if it works */
+ if (clock_gettime (CGT_ID, &unit) != 0)
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+
+ /* get the resolution */
+ if (clock_getres (CGT_ID, &unit) != 0)
+ {
+ if (speed_option_verbose)
+ printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+
+ cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
+ if (speed_option_verbose)
+ printf ("clock_gettime is %s accurate\n", unittime_string (cgt_unittime));
+
+ if (cgt_unittime < 10e-9)
+ {
+ /* Do we believe this? */
+ struct timespec start, end;
+ static volatile int counter;
+ double duration;
+ if (clock_gettime (CGT_ID, &start))
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+ /* Loop of at least 1000 memory accesses, ought to take at
+ least 100 ns*/
+ for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
+ ;
+ if (clock_gettime (CGT_ID, &end))
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+ duration = (end.tv_sec + end.tv_nsec * 1e-9
+ - start.tv_sec - start.tv_nsec * 1e-9);
+ if (speed_option_verbose)
+ printf ("delay loop of %d rounds took %s (according to clock_gettime)\n",
+ CGT_DELAY_COUNT, unittime_string (duration));
+ if (duration < 100e-9)
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d not believable\n", CGT_ID);
+ result = 0;
+ return result;
+ }
+ }
+ result = 1;
+ return result;
+}
+
+
+static double
+freq_measure_mftb_one (void)
+{
+#define call_gettimeofday(t) gettimeofday (&(t), NULL)
+#define timeval_tv_sec(t) ((t).tv_sec)
+#define timeval_tv_usec(t) ((t).tv_usec)
+ FREQ_MEASURE_ONE ("mftb", struct_timeval,
+ call_gettimeofday, MFTB,
+ timeval_tv_sec, timeval_tv_usec);
+}
+
+
+static jmp_buf mftb_works_buf;
+
+static RETSIGTYPE
+mftb_works_handler (int sig)
+{
+ longjmp (mftb_works_buf, 1);
+}
+
+int
+mftb_works_p (void)
+{
+ unsigned a[2];
+ RETSIGTYPE (*old_handler) (int);
+ double cycletime;
+
+ /* suppress a warning about a[] unused */
+ a[0] = 0;
+
+ if (! have_mftb)
+ return 0;
+
+#ifdef SIGILL
+ old_handler = signal (SIGILL, mftb_works_handler);
+ if (old_handler == SIG_ERR)
+ {
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
+ return 1;
+ }
+ if (setjmp (mftb_works_buf))
+ {
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
+ return 0;
+ }
+ MFTB (a);
+ signal (SIGILL, old_handler);
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): mftb works\n");
+#else
+
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
+#endif
+
+#if ! HAVE_GETTIMEOFDAY
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
+ return 0;
+#endif
+
+ /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
+ other chips it can be driven from an external clock. */
+ cycletime = freq_measure ("mftb", freq_measure_mftb_one);
+ if (cycletime == -1.0)
+ {
+ if (speed_option_verbose)
+ printf ("mftb_works_p(): cannot measure mftb period\n");
+ return 0;
+ }
+
+ mftb_unittime = cycletime;
+ return 1;
+}
+
+
+volatile unsigned *sgi_addr;
+
+int
+sgi_works_p (void)
+{
+#if HAVE_SYSSGI && HAVE_MMAP
+ static int result = -1;
+
+ size_t pagesize, offset;
+ __psunsigned_t phys, physpage;
+ void *virtpage;
+ unsigned period_picoseconds;
+ int size, fd;
+
+ if (result != -1)
+ return result;
+
+ phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
+ if (phys == (__psunsigned_t) -1)
+ {
+ /* ENODEV is the error when a counter is not available */
+ if (speed_option_verbose)
+ printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
+ result = 0;
+ return result;
+ }
+ sgi_unittime = period_picoseconds * 1e-12;
+
+ /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
+ Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
+ obvious way to identify that without SGI_CYCLECNTR_SIZE. */
+#ifdef SGI_CYCLECNTR_SIZE
+ size = syssgi (SGI_CYCLECNTR_SIZE);
+ if (size == -1)
+ {
+ if (speed_option_verbose)
+ {
+ printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
+ printf (" will assume size==4\n");
+ }
+ size = 32;
+ }
+#else
+ size = 32;
+#endif
+
+ if (size < 32)
+ {
+ printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
+ result = 0;
+ return result;
+ }
+
+ pagesize = getpagesize();
+ offset = (size_t) phys & (pagesize-1);
+ physpage = phys - offset;
+
+ /* shouldn't cross over a page boundary */
+ ASSERT_ALWAYS (offset + size/8 <= pagesize);
+
+ fd = open("/dev/mmem", O_RDONLY);
+ if (fd == -1)
+ {
+ if (speed_option_verbose)
+ printf ("open /dev/mmem: %s\n", strerror (errno));
+ result = 0;
+ return result;
+ }
+
+ virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
+ if (virtpage == (void *) -1)
+ {
+ if (speed_option_verbose)
+ printf ("mmap /dev/mmem: %s\n", strerror (errno));
+ result = 0;
+ return result;
+ }
+
+ /* address of least significant 4 bytes, knowing mips is big endian */
+ sgi_addr = (unsigned *) ((char *) virtpage + offset
+ + size/8 - sizeof(unsigned));
+ result = 1;
+ return result;
+
+#else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
+ return 0;
+#endif
+}
+
+
+#define DEFAULT(var,n) \
+ do { \
+ if (! (var)) \
+ (var) = (n); \
+ } while (0)
+
+void
+speed_time_init (void)
+{
+ double supplement_unittime = 0.0;
+
+ static int speed_time_initialized = 0;
+ if (speed_time_initialized)
+ return;
+ speed_time_initialized = 1;
+
+ speed_cycletime_init ();
+
+ if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
+ {
+ use_cycles = 1;
+ DEFAULT (speed_cycletime, 1.0);
+ speed_unittime = speed_cycletime;
+ DEFAULT (speed_precision, 10000);
+ strcpy (speed_time_string, "CPU cycle counter");
+
+ /* only used if a supplementary method is chosen below */
+ cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
+ * speed_cycletime;
+
+ if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
+ {
+ /* this is a good combination */
+ use_grus = 1;
+ supplement_unittime = grus_unittime = 1.0e-6;
+ strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
+ }
+ else if (have_cycles == 1)
+ {
+ /* When speed_cyclecounter has a limited range, look for something
+ to supplement it. */
+ if (have_gtod && gettimeofday_microseconds_p())
+ {
+ use_gtod = 1;
+ supplement_unittime = gtod_unittime = 1.0e-6;
+ strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
+ }
+ else if (have_grus)
+ {
+ use_grus = 1;
+ supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
+ sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
+ }
+ else if (have_times)
+ {
+ use_times = 1;
+ supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
+ sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
+ }
+ else if (have_gtod)
+ {
+ use_gtod = 1;
+ supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
+ sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
+ }
+ else
+ {
+ fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
+ fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n");
+ }
+ }
+
+ if (use_grus || use_times || use_gtod)
+ {
+ /* must know cycle period to compare cycles to other measuring
+ (via cycles_limit) */
+ speed_cycletime_need_seconds ();
+
+ if (speed_precision * supplement_unittime > cycles_limit)
+ {
+ fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
+ fprintf (stderr, " cycle counter and limited precision supplemental method\n");
+ fprintf (stderr, " (%s)\n", speed_time_string);
+ }
+ }
+ }
+ else if (have_stck)
+ {
+ strcpy (speed_time_string, "STCK timestamp");
+ /* stck is in units of 2^-12 microseconds, which is very likely higher
+ resolution than a cpu cycle */
+ if (speed_cycletime == 0.0)
+ speed_cycletime_fail
+ ("Need to know CPU frequency for effective stck unit");
+ speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
+ DEFAULT (speed_precision, 10000);
+ }
+ else if (have_mftb && mftb_works_p ())
+ {
+ use_mftb = 1;
+ DEFAULT (speed_precision, 10000);
+ speed_unittime = mftb_unittime;
+ sprintf (speed_time_string, "mftb counter (%s)",
+ unittime_string (speed_unittime));
+ }
+ else if (have_sgi && sgi_works_p ())
+ {
+ use_sgi = 1;
+ DEFAULT (speed_precision, 10000);
+ speed_unittime = sgi_unittime;
+ sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
+ unittime_string (speed_unittime));
+ /* supplemented with getrusage, which we assume to have 1ms resolution */
+ use_grus = 1;
+ supplement_unittime = 1e-3;
+ }
+ else if (have_rrt)
+ {
+ timebasestruct_t t;
+ use_rrt = 1;
+ DEFAULT (speed_precision, 10000);
+ read_real_time (&t, sizeof(t));
+ switch (t.flag) {
+ case RTC_POWER:
+ /* FIXME: What's the actual RTC resolution? */
+ speed_unittime = 1e-7;
+ strcpy (speed_time_string, "read_real_time() power nanoseconds");
+ break;
+ case RTC_POWER_PC:
+ t.tb_high = 1;
+ t.tb_low = 0;
+ time_base_to_time (&t, sizeof(t));
+ speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
+ sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
+ unittime_string (speed_unittime));
+ break;
+ default:
+ fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
+ t.flag);
+ abort ();
+ }
+ }
+ else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
+ {
+ /* use clock_gettime if microsecond or better resolution */
+ choose_cgt:
+ use_cgt = 1;
+ speed_unittime = cgt_unittime;
+ DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
+ strcpy (speed_time_string, "microsecond accurate clock_gettime()");
+ }
+ else if (have_times && clk_tck() > 1000000)
+ {
+ /* Cray vector systems have times() which is clock cycle resolution
+ (eg. 450 MHz). */
+ DEFAULT (speed_precision, 10000);
+ goto choose_times;
+ }
+ else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
+ {
+ use_grus = 1;
+ speed_unittime = grus_unittime = 1.0e-6;
+ DEFAULT (speed_precision, 1000);
+ strcpy (speed_time_string, "microsecond accurate getrusage()");
+ }
+ else if (have_gtod && gettimeofday_microseconds_p())
+ {
+ use_gtod = 1;
+ speed_unittime = gtod_unittime = 1.0e-6;
+ DEFAULT (speed_precision, 1000);
+ strcpy (speed_time_string, "microsecond accurate gettimeofday()");
+ }
+ else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
+ {
+ /* use clock_gettime if 1 tick or better resolution */
+ goto choose_cgt;
+ }
+ else if (have_times)
+ {
+ use_tick_boundary = 1;
+ DEFAULT (speed_precision, 200);
+ choose_times:
+ use_times = 1;
+ speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
+ sprintf (speed_time_string, "%s clock tick times()",
+ unittime_string (speed_unittime));
+ }
+ else if (have_grus)
+ {
+ use_grus = 1;
+ use_tick_boundary = 1;
+ speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
+ DEFAULT (speed_precision, 200);
+ sprintf (speed_time_string, "%s clock tick getrusage()\n",
+ unittime_string (speed_unittime));
+ }
+ else if (have_gtod)
+ {
+ use_gtod = 1;
+ use_tick_boundary = 1;
+ speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
+ DEFAULT (speed_precision, 200);
+ sprintf (speed_time_string, "%s clock tick gettimeofday()",
+ unittime_string (speed_unittime));
+ }
+ else
+ {
+ fprintf (stderr, "No time measuring method available\n");
+ fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
+ abort ();
+ }
+
+ if (speed_option_verbose)
+ {
+ printf ("speed_time_init: %s\n", speed_time_string);
+ printf (" speed_precision %d\n", speed_precision);
+ printf (" speed_unittime %.2g\n", speed_unittime);
+ if (supplement_unittime)
+ printf (" supplement_unittime %.2g\n", supplement_unittime);
+ printf (" use_tick_boundary %d\n", use_tick_boundary);
+ if (have_cycles)
+ printf (" cycles_limit %.2g seconds\n", cycles_limit);
+ }
+}
+
+
+
+/* Burn up CPU until a clock tick boundary, for greater accuracy. Set the
+ corresponding "start_foo" appropriately too. */
+
+void
+grus_tick_boundary (void)
+{
+ struct_rusage prev;
+ getrusage (0, &prev);
+ do {
+ getrusage (0, &start_grus);
+ } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
+}
+
+void
+gtod_tick_boundary (void)
+{
+ struct_timeval prev;
+ gettimeofday (&prev, NULL);
+ do {
+ gettimeofday (&start_gtod, NULL);
+ } while (start_gtod.tv_usec == prev.tv_usec);
+}
+
+void
+times_tick_boundary (void)
+{
+ struct_tms prev;
+ times (&prev);
+ do
+ times (&start_times);
+ while (start_times.tms_utime == prev.tms_utime);
+}
+
+
+/* "have_" values are tested to let unused code go dead. */
+
+void
+speed_starttime (void)
+{
+ speed_time_init ();
+
+ if (have_grus && use_grus)
+ {
+ if (use_tick_boundary)
+ grus_tick_boundary ();
+ else
+ getrusage (0, &start_grus);
+ }
+
+ if (have_gtod && use_gtod)
+ {
+ if (use_tick_boundary)
+ gtod_tick_boundary ();
+ else
+ gettimeofday (&start_gtod, NULL);
+ }
+
+ if (have_times && use_times)
+ {
+ if (use_tick_boundary)
+ times_tick_boundary ();
+ else
+ times (&start_times);
+ }
+
+ if (have_cgt && use_cgt)
+ clock_gettime (CGT_ID, &start_cgt);
+
+ if (have_rrt && use_rrt)
+ read_real_time (&start_rrt, sizeof(start_rrt));
+
+ if (have_sgi && use_sgi)
+ start_sgi = *sgi_addr;
+
+ if (have_mftb && use_mftb)
+ MFTB (start_mftb);
+
+ if (have_stck && use_stck)
+ STCK (start_stck);
+
+ /* Cycles sampled last for maximum accuracy. */
+ if (have_cycles && use_cycles)
+ speed_cyclecounter (start_cycles);
+}
+
+
+/* Calculate the difference between two cycle counter samples, as a "double"
+ counter of cycles.
+
+ The start and end values are allowed to cancel in integers in case the
+ counter values are bigger than the 53 bits that normally fit in a double.
+
+ This works even if speed_cyclecounter() puts a value bigger than 32-bits
+ in the low word (the high word always gets a 2**32 multiplier though). */
+
+double
+speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
+{
+ unsigned d;
+ double t;
+
+ if (have_cycles == 1)
+ {
+ t = (end[0] - start[0]);
+ }
+ else
+ {
+ d = end[0] - start[0];
+ t = d - (d > end[0] ? M_2POWU : 0.0);
+ t += (end[1] - start[1]) * M_2POW32;
+ }
+ return t;
+}
+
+
+double
+speed_mftb_diff (const unsigned end[2], const unsigned start[2])
+{
+ unsigned d;
+ double t;
+
+ d = end[0] - start[0];
+ t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
+ t += (end[1] - start[1]) * M_2POW32;
+ return t;
+}
+
+
+/* Calculate the difference between "start" and "end" using fields "sec" and
+ "psec", where each "psec" is a "punit" of a second.
+
+ The seconds parts are allowed to cancel before being combined with the
+ psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
+ double.
+
+ Total time is only calculated in a "double" since an integer count of
+ psecs might overflow. 2^32 microseconds is only a bit over an hour, or
+ 2^32 nanoseconds only about 4 seconds.
+
+ The casts to "long" are for the benefit of timebasestruct_t, where the
+ fields are only "unsigned int", but we want a signed difference. */
+
+#define DIFF_SECS_ROUTINE(sec, psec, punit) \
+ { \
+ long sec_diff, psec_diff; \
+ sec_diff = (long) end->sec - (long) start->sec; \
+ psec_diff = (long) end->psec - (long) start->psec; \
+ return (double) sec_diff + punit * (double) psec_diff; \
+ }
+
+double
+timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
+{
+ DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
+}
+
+double
+rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
+{
+ DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
+}
+
+double
+timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
+{
+ DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
+}
+
+/* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
+double
+timebasestruct_diff_secs (const timebasestruct_t *end,
+ const timebasestruct_t *start)
+{
+ DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
+}
+
+
+double
+speed_endtime (void)
+{
+#define END_USE(name,value) \
+ do { \
+ if (speed_option_verbose >= 3) \
+ printf ("speed_endtime(): used %s\n", name); \
+ result = value; \
+ goto done; \
+ } while (0)
+
+#define END_ENOUGH(name,value) \
+ do { \
+ if (speed_option_verbose >= 3) \
+ printf ("speed_endtime(): %s gives enough precision\n", name); \
+ result = value; \
+ goto done; \
+ } while (0)
+
+#define END_EXCEED(name,value) \
+ do { \
+ if (speed_option_verbose >= 3) \
+ printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
+ name); \
+ result = value; \
+ goto done; \
+ } while (0)
+
+ unsigned end_cycles[2];
+ stck_t end_stck;
+ unsigned end_mftb[2];
+ unsigned end_sgi;
+ timebasestruct_t end_rrt;
+ struct_timespec end_cgt;
+ struct_timeval end_gtod;
+ struct_rusage end_grus;
+ struct_tms end_times;
+ double t_gtod, t_grus, t_times, t_cgt;
+ double t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
+ double result;
+
+ /* Cycles sampled first for maximum accuracy.
+ "have_" values tested to let unused code go dead. */
+
+ if (have_cycles && use_cycles) speed_cyclecounter (end_cycles);
+ if (have_stck && use_stck) STCK (end_stck);
+ if (have_mftb && use_mftb) MFTB (end_mftb);
+ if (have_sgi && use_sgi) end_sgi = *sgi_addr;
+ if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt));
+ if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt);
+ if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL);
+ if (have_grus && use_grus) getrusage (0, &end_grus);
+ if (have_times && use_times) times (&end_times);
+
+ result = -1.0;
+
+ if (speed_option_verbose >= 4)
+ {
+ printf ("speed_endtime():\n");
+ if (use_cycles)
+ printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n",
+ start_cycles[1], start_cycles[0],
+ end_cycles[1], end_cycles[0]);
+
+ if (use_stck)
+ printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck);
+
+ if (use_mftb)
+ printf (" mftb 0x%X,%08X -> 0x%X,%08X\n",
+ start_mftb[1], start_mftb[0],
+ end_mftb[1], end_mftb[0]);
+
+ if (use_sgi)
+ printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi);
+
+ if (use_rrt)
+ printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n",
+ start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
+ end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
+
+ if (use_cgt)
+ printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n",
+ (long) start_cgt.tv_sec, (long) start_cgt.tv_nsec,
+ (long) end_cgt.tv_sec, (long) end_cgt.tv_nsec);
+
+ if (use_gtod)
+ printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n",
+ (long) start_gtod.tv_sec, (long) start_gtod.tv_usec,
+ (long) end_gtod.tv_sec, (long) end_gtod.tv_usec);
+
+ if (use_grus)
+ printf (" getrusage %ld.%06ld -> %ld.%06ld\n",
+ (long) start_grus.ru_utime.tv_sec,
+ (long) start_grus.ru_utime.tv_usec,
+ (long) end_grus.ru_utime.tv_sec,
+ (long) end_grus.ru_utime.tv_usec);
+
+ if (use_times)
+ printf (" times %ld -> %ld\n",
+ start_times.tms_utime, end_times.tms_utime);
+ }
+
+ if (use_rrt)
+ {
+ time_base_to_time (&start_rrt, sizeof(start_rrt));
+ time_base_to_time (&end_rrt, sizeof(end_rrt));
+ t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
+ END_USE ("read_real_time()", t_rrt);
+ }
+
+ if (use_cgt)
+ {
+ t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
+ END_USE ("clock_gettime()", t_cgt);
+ }
+
+ if (use_grus)
+ {
+ t_grus = rusage_diff_secs (&end_grus, &start_grus);
+
+ /* Use getrusage() if the cycle counter limit would be exceeded, or if
+ it provides enough accuracy already. */
+ if (use_cycles)
+ {
+ if (t_grus >= speed_precision*grus_unittime)
+ END_ENOUGH ("getrusage()", t_grus);
+ if (t_grus >= cycles_limit)
+ END_EXCEED ("getrusage()", t_grus);
+ }
+ }
+
+ if (use_times)
+ {
+ t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
+
+ /* Use times() if the cycle counter limit would be exceeded, or if
+ it provides enough accuracy already. */
+ if (use_cycles)
+ {
+ if (t_times >= speed_precision*times_unittime)
+ END_ENOUGH ("times()", t_times);
+ if (t_times >= cycles_limit)
+ END_EXCEED ("times()", t_times);
+ }
+ }
+
+ if (use_gtod)
+ {
+ t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
+
+ /* Use gettimeofday() if it measured a value bigger than the cycle
+ counter can handle. */
+ if (use_cycles)
+ {
+ if (t_gtod >= cycles_limit)
+ END_EXCEED ("gettimeofday()", t_gtod);
+ }
+ }
+
+ if (use_mftb)
+ {
+ t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
+ END_USE ("mftb", t_mftb);
+ }
+
+ if (use_stck)
+ {
+ t_stck = (end_stck - start_stck) * STCK_PERIOD;
+ END_USE ("stck", t_stck);
+ }
+
+ if (use_sgi)
+ {
+ t_sgi = (end_sgi - start_sgi) * sgi_unittime;
+ END_USE ("SGI hardware counter", t_sgi);
+ }
+
+ if (use_cycles)
+ {
+ t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
+ * speed_cycletime;
+ END_USE ("cycle counter", t_cycles);
+ }
+
+ if (use_grus && getrusage_microseconds_p())
+ END_USE ("getrusage()", t_grus);
+
+ if (use_gtod && gettimeofday_microseconds_p())
+ END_USE ("gettimeofday()", t_gtod);
+
+ if (use_times) END_USE ("times()", t_times);
+ if (use_grus) END_USE ("getrusage()", t_grus);
+ if (use_gtod) END_USE ("gettimeofday()", t_gtod);
+
+ fprintf (stderr, "speed_endtime(): oops, no time method available\n");
+ abort ();
+
+ done:
+ if (result < 0.0)
+ {
+ if (speed_option_verbose >= 2)
+ fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
+ result = 0.0;
+ }
+ return result;
+}
diff --git a/gmp-6.3.0/tune/tune-gcd-p.c b/gmp-6.3.0/tune/tune-gcd-p.c
new file mode 100644
index 0000000..3b5a4a8
--- /dev/null
+++ b/gmp-6.3.0/tune/tune-gcd-p.c
@@ -0,0 +1,225 @@
+/* tune-gcd-p
+
+ Tune the choice for splitting p in divide-and-conquer gcd.
+
+Copyright 2008, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define TUNE_GCD_P 1
+
+#include "../mpn/gcd.c"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "speed.h"
+
+/* Search for minimum over a range. FIXME: Implement golden-section /
+ fibonacci search*/
+static int
+search (double *minp, double (*f)(void *, int), void *ctx, int start, int end)
+{
+ int x[4];
+ double y[4];
+
+ int best_i;
+
+ x[0] = start;
+ x[3] = end;
+
+ y[0] = f(ctx, x[0]);
+ y[3] = f(ctx, x[3]);
+
+ for (;;)
+ {
+ int i;
+ int length = x[3] - x[0];
+
+ x[1] = x[0] + length/3;
+ x[2] = x[0] + 2*length/3;
+
+ y[1] = f(ctx, x[1]);
+ y[2] = f(ctx, x[2]);
+
+#if 0
+ printf("%d: %f, %d: %f, %d:, %f %d: %f\n",
+ x[0], y[0], x[1], y[1], x[2], y[2], x[3], y[3]);
+#endif
+ for (best_i = 0, i = 1; i < 4; i++)
+ if (y[i] < y[best_i])
+ best_i = i;
+
+ if (length <= 4)
+ break;
+
+ if (best_i >= 2)
+ {
+ x[0] = x[1];
+ y[0] = y[1];
+ }
+ else
+ {
+ x[3] = x[2];
+ y[3] = y[2];
+ }
+ }
+ *minp = y[best_i];
+ return x[best_i];
+}
+
+static int
+compare_double(const void *ap, const void *bp)
+{
+ double a = * (const double *) ap;
+ double b = * (const double *) bp;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+static double
+median (double *v, size_t n)
+{
+ qsort(v, n, sizeof(*v), compare_double);
+
+ return v[n/2];
+}
+
+#define TIME(res, code) do { \
+ double time_measurement[5]; \
+ unsigned time_i; \
+ \
+ for (time_i = 0; time_i < 5; time_i++) \
+ { \
+ speed_starttime(); \
+ code; \
+ time_measurement[time_i] = speed_endtime(); \
+ } \
+ res = median(time_measurement, 5); \
+} while (0)
+
+struct bench_data
+{
+ mp_size_t n;
+ mp_ptr ap;
+ mp_ptr bp;
+ mp_ptr up;
+ mp_ptr vp;
+ mp_ptr gp;
+};
+
+static double
+bench_gcd (void *ctx, int p)
+{
+ struct bench_data *data = (struct bench_data *) ctx;
+ double t;
+
+ p_table[data->n] = p;
+ TIME(t, {
+ MPN_COPY (data->up, data->ap, data->n);
+ MPN_COPY (data->vp, data->bp, data->n);
+ mpn_gcd (data->gp, data->up, data->n, data->vp, data->n);
+ });
+
+ return t;
+}
+
+int
+main(int argc, char **argv)
+{
+ gmp_randstate_t rands; struct bench_data data;
+ mp_size_t n;
+
+ TMP_DECL;
+
+ /* Unbuffered so if output is redirected to a file it isn't lost if the
+ program is killed part way through. */
+ setbuf (stdout, NULL);
+ setbuf (stderr, NULL);
+
+ gmp_randinit_default (rands);
+
+ TMP_MARK;
+
+ data.ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+ data.bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+ data.up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+ data.vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+ data.gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+
+ mpn_random (data.ap, P_TABLE_SIZE);
+ mpn_random (data.bp, P_TABLE_SIZE);
+
+ memset (p_table, 0, sizeof(p_table));
+
+ for (n = 100; n < P_TABLE_SIZE; n++)
+ {
+ mp_size_t p;
+ mp_size_t best_p;
+ double best_time;
+ double lehmer_time;
+
+ if (data.ap[n-1] == 0)
+ data.ap[n-1] = 1;
+
+ if (data.bp[n-1] == 0)
+ data.bp[n-1] = 1;
+
+ data.n = n;
+
+ lehmer_time = bench_gcd (&data, 0);
+
+ best_p = search (&best_time, bench_gcd, &data, n/5, 4*n/5);
+ if (best_time > lehmer_time)
+ best_p = 0;
+
+ printf("%6zu %6zu %5.3g", n, best_p, (double) best_p / n);
+ if (best_p > 0)
+ {
+ double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
+ printf(" %5.3g%%", speedup);
+ if (speedup < 1.0)
+ {
+ printf(" (ignored)");
+ best_p = 0;
+ }
+ }
+ printf("\n");
+
+ p_table[n] = best_p;
+ }
+ TMP_FREE;
+ gmp_randclear(rands);
+ return 0;
+}
diff --git a/gmp-6.3.0/tune/tuneup.c b/gmp-6.3.0/tune/tuneup.c
new file mode 100644
index 0000000..8ae211e
--- /dev/null
+++ b/gmp-6.3.0/tune/tuneup.c
@@ -0,0 +1,3072 @@
+/* Create tuned thresholds for various algorithms.
+
+Copyright 1999-2003, 2005, 2006, 2008-2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+/* Usage: tuneup [-t] [-t] [-p precision]
+
+ -t turns on some diagnostic traces, a second -t turns on more traces.
+
+ Notes:
+
+ The code here isn't a vision of loveliness, mainly because it's subject
+ to ongoing changes according to new things wanting to be tuned, and
+ practical requirements of systems tested.
+
+ Sometimes running the program twice produces slightly different results.
+ This is probably because there's so little separating algorithms near
+ their crossover, and on that basis it should make little or no difference
+ to the final speed of the relevant routines, but nothing has been done to
+ check that carefully.
+
+ Algorithm:
+
+ The thresholds are determined as follows. A crossover may not be a
+ single size but rather a range where it oscillates between method A or
+ method B faster. If the threshold is set making B used where A is faster
+ (or vice versa) that's bad. Badness is the percentage time lost and
+ total badness is the sum of this over all sizes measured. The threshold
+ is set to minimize total badness.
+
+ Suppose, as sizes increase, method B becomes faster than method A. The
+ effect of the rule is that, as you look at increasing sizes, isolated
+ points where B is faster are ignored, but when it's consistently faster,
+ or faster on balance, then the threshold is set there. The same result
+ is obtained thinking in the other direction of A becoming faster at
+ smaller sizes.
+
+ In practice the thresholds tend to be chosen to bring on the next
+ algorithm fairly quickly.
+
+ This rule is attractive because it's got a basis in reason and is fairly
+ easy to implement, but no work has been done to actually compare it in
+ absolute terms to other possibilities.
+
+ Implementation:
+
+ In a normal library build the thresholds are constants. To tune them
+ selected objects are recompiled with the thresholds as global variables
+ instead. #define TUNE_PROGRAM_BUILD does this, with help from code at
+ the end of gmp-impl.h, and rules in tune/Makefile.am.
+
+ MUL_TOOM22_THRESHOLD for example uses a recompiled mpn_mul_n. The
+ threshold is set to "size+1" to avoid karatsuba, or to "size" to use one
+ level, but recurse into the basecase.
+
+ MUL_TOOM33_THRESHOLD makes use of the tuned MUL_TOOM22_THRESHOLD value.
+ Other routines in turn will make use of both of those. Naturally the
+ dependants must be tuned first.
+
+ In a couple of cases, like DIVEXACT_1_THRESHOLD, there's no recompiling,
+ just a threshold based on comparing two routines (mpn_divrem_1 and
+ mpn_divexact_1), and no further use of the value determined.
+
+ Flags like USE_PREINV_MOD_1 or JACOBI_BASE_METHOD are even simpler, being
+ just comparisons between certain routines on representative data.
+
+ Shortcuts are applied when native (assembler) versions of routines exist.
+ For instance a native mpn_sqr_basecase is assumed to be always faster
+ than mpn_mul_basecase, with no measuring.
+
+ No attempt is made to tune within assembler routines, for instance
+ DIVREM_1_NORM_THRESHOLD. An assembler mpn_divrem_1 is expected to be
+ written and tuned all by hand. Assembler routines that might have hard
+ limits are recompiled though, to make them accept a bigger range of sizes
+ than normal, eg. mpn_sqr_basecase to compare against mpn_toom2_sqr.
+
+ Limitations:
+
+ The FFTs aren't subject to the same badness rule as the other thresholds,
+ so each k is probably being brought on a touch early. This isn't likely
+ to make a difference, and the simpler probing means fewer tests.
+
+*/
+
+#define TUNE_PROGRAM_BUILD 1 /* for gmp-impl.h */
+
+#include "config.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "tests.h"
+#include "speed.h"
+
+#if !HAVE_DECL_OPTARG
+extern char *optarg;
+extern int optind, opterr;
+#endif
+
+
+#define DEFAULT_MAX_SIZE 1000 /* limbs */
+
+#if WANT_FFT
+mp_size_t option_fft_max_size = 50000; /* limbs */
+#else
+mp_size_t option_fft_max_size = 0;
+#endif
+int option_trace = 0;
+int option_fft_trace = 0;
+struct speed_params s;
+
+struct dat_t {
+ mp_size_t size;
+ double d;
+} *dat = NULL;
+int ndat = 0;
+int allocdat = 0;
+
+/* This is not defined if mpn_sqr_basecase doesn't declare a limit. In that
+ case use zero here, which for params.max_size means no limit. */
+#ifndef TUNE_SQR_TOOM2_MAX
+#define TUNE_SQR_TOOM2_MAX 0
+#endif
+
+mp_size_t mul_toom22_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom33_threshold = MUL_TOOM33_THRESHOLD_LIMIT;
+mp_size_t mul_toom44_threshold = MUL_TOOM44_THRESHOLD_LIMIT;
+mp_size_t mul_toom6h_threshold = MUL_TOOM6H_THRESHOLD_LIMIT;
+mp_size_t mul_toom8h_threshold = MUL_TOOM8H_THRESHOLD_LIMIT;
+mp_size_t mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom43_to_toom54_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_fft_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_fft_modf_threshold = MP_SIZE_T_MAX;
+mp_size_t sqr_basecase_threshold = MP_SIZE_T_MAX;
+mp_size_t sqr_toom2_threshold
+ = (TUNE_SQR_TOOM2_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_TOOM2_MAX);
+mp_size_t sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
+mp_size_t sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
+mp_size_t sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
+mp_size_t sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
+mp_size_t sqr_fft_threshold = MP_SIZE_T_MAX;
+mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX;
+mp_size_t mullo_basecase_threshold = MP_SIZE_T_MAX;
+mp_size_t mullo_dc_threshold = MP_SIZE_T_MAX;
+mp_size_t mullo_mul_n_threshold = MP_SIZE_T_MAX;
+mp_size_t sqrlo_basecase_threshold = MP_SIZE_T_MAX;
+mp_size_t sqrlo_dc_threshold = MP_SIZE_T_MAX;
+mp_size_t sqrlo_sqr_threshold = MP_SIZE_T_MAX;
+mp_size_t mulmid_toom42_threshold = MP_SIZE_T_MAX;
+mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX;
+mp_size_t sqrmod_bnm1_threshold = MP_SIZE_T_MAX;
+mp_size_t div_qr_2_pi2_threshold = MP_SIZE_T_MAX;
+mp_size_t dc_div_qr_threshold = MP_SIZE_T_MAX;
+mp_size_t dc_divappr_q_threshold = MP_SIZE_T_MAX;
+mp_size_t mu_div_qr_threshold = MP_SIZE_T_MAX;
+mp_size_t mu_divappr_q_threshold = MP_SIZE_T_MAX;
+mp_size_t mupi_div_qr_threshold = MP_SIZE_T_MAX;
+mp_size_t mu_div_q_threshold = MP_SIZE_T_MAX;
+mp_size_t dc_bdiv_qr_threshold = MP_SIZE_T_MAX;
+mp_size_t dc_bdiv_q_threshold = MP_SIZE_T_MAX;
+mp_size_t mu_bdiv_qr_threshold = MP_SIZE_T_MAX;
+mp_size_t mu_bdiv_q_threshold = MP_SIZE_T_MAX;
+mp_size_t inv_mulmod_bnm1_threshold = MP_SIZE_T_MAX;
+mp_size_t inv_newton_threshold = MP_SIZE_T_MAX;
+mp_size_t inv_appr_threshold = MP_SIZE_T_MAX;
+mp_size_t binv_newton_threshold = MP_SIZE_T_MAX;
+mp_size_t redc_1_to_redc_2_threshold = MP_SIZE_T_MAX;
+mp_size_t redc_1_to_redc_n_threshold = MP_SIZE_T_MAX;
+mp_size_t redc_2_to_redc_n_threshold = MP_SIZE_T_MAX;
+mp_size_t matrix22_strassen_threshold = MP_SIZE_T_MAX;
+mp_size_t hgcd_threshold = MP_SIZE_T_MAX;
+mp_size_t hgcd_appr_threshold = MP_SIZE_T_MAX;
+mp_size_t hgcd_reduce_threshold = MP_SIZE_T_MAX;
+mp_size_t gcd_dc_threshold = MP_SIZE_T_MAX;
+mp_size_t gcdext_dc_threshold = MP_SIZE_T_MAX;
+int div_qr_1n_pi1_method = 0;
+mp_size_t div_qr_1_norm_threshold = MP_SIZE_T_MAX;
+mp_size_t div_qr_1_unnorm_threshold = MP_SIZE_T_MAX;
+mp_size_t divrem_1_norm_threshold = MP_SIZE_T_MAX;
+mp_size_t divrem_1_unnorm_threshold = MP_SIZE_T_MAX;
+mp_size_t mod_1_norm_threshold = MP_SIZE_T_MAX;
+mp_size_t mod_1_unnorm_threshold = MP_SIZE_T_MAX;
+int mod_1_1p_method = 0;
+mp_size_t mod_1n_to_mod_1_1_threshold = MP_SIZE_T_MAX;
+mp_size_t mod_1u_to_mod_1_1_threshold = MP_SIZE_T_MAX;
+mp_size_t mod_1_1_to_mod_1_2_threshold = MP_SIZE_T_MAX;
+mp_size_t mod_1_2_to_mod_1_4_threshold = MP_SIZE_T_MAX;
+mp_size_t preinv_mod_1_to_mod_1_threshold = MP_SIZE_T_MAX;
+mp_size_t divrem_2_threshold = MP_SIZE_T_MAX;
+mp_size_t get_str_dc_threshold = MP_SIZE_T_MAX;
+mp_size_t get_str_precompute_threshold = MP_SIZE_T_MAX;
+mp_size_t set_str_dc_threshold = MP_SIZE_T_MAX;
+mp_size_t set_str_precompute_threshold = MP_SIZE_T_MAX;
+mp_size_t fac_odd_threshold = 0;
+mp_size_t fac_dsc_threshold = FAC_DSC_THRESHOLD_LIMIT;
+
+mp_size_t fft_modf_sqr_threshold = MP_SIZE_T_MAX;
+mp_size_t fft_modf_mul_threshold = MP_SIZE_T_MAX;
+
+struct param_t {
+ const char *name;
+ speed_function_t function;
+ speed_function_t function2;
+ double step_factor; /* how much to step relatively */
+ int step; /* how much to step absolutely */
+ double function_fudge; /* multiplier for "function" speeds */
+ int stop_since_change;
+ double stop_factor;
+ mp_size_t min_size;
+ int min_is_always;
+ mp_size_t max_size;
+ mp_size_t check_size;
+ mp_size_t size_extra;
+
+#define DATA_HIGH_LT_R 1
+#define DATA_HIGH_GE_R 2
+ int data_high;
+
+ int noprint;
+};
+
+
+/* These are normally undefined when false, which suits "#if" fine.
+ But give them zero values so they can be used in plain C "if"s. */
+#ifndef UDIV_PREINV_ALWAYS
+#define UDIV_PREINV_ALWAYS 0
+#endif
+#ifndef HAVE_NATIVE_mpn_divexact_1
+#define HAVE_NATIVE_mpn_divexact_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_div_qr_1n_pi1
+#define HAVE_NATIVE_mpn_div_qr_1n_pi1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_divrem_1
+#define HAVE_NATIVE_mpn_divrem_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_divrem_2
+#define HAVE_NATIVE_mpn_divrem_2 0
+#endif
+#ifndef HAVE_NATIVE_mpn_mod_1
+#define HAVE_NATIVE_mpn_mod_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_mod_1_1p
+#define HAVE_NATIVE_mpn_mod_1_1p 0
+#endif
+#ifndef HAVE_NATIVE_mpn_modexact_1_odd
+#define HAVE_NATIVE_mpn_modexact_1_odd 0
+#endif
+#ifndef HAVE_NATIVE_mpn_preinv_divrem_1
+#define HAVE_NATIVE_mpn_preinv_divrem_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_preinv_mod_1
+#define HAVE_NATIVE_mpn_preinv_mod_1 0
+#endif
+#ifndef HAVE_NATIVE_mpn_sqr_basecase
+#define HAVE_NATIVE_mpn_sqr_basecase 0
+#endif
+
+
+#define MAX3(a,b,c) MAX (MAX (a, b), c)
+
+mp_limb_t
+randlimb_norm (void)
+{
+ mp_limb_t n;
+ mpn_random (&n, 1);
+ n |= GMP_NUMB_HIGHBIT;
+ return n;
+}
+
+#define GMP_NUMB_HALFMASK ((CNST_LIMB(1) << (GMP_NUMB_BITS/2)) - 1)
+
+mp_limb_t
+randlimb_half (void)
+{
+ mp_limb_t n;
+ mpn_random (&n, 1);
+ n &= GMP_NUMB_HALFMASK;
+ n += (n==0);
+ return n;
+}
+
+
+/* Add an entry to the end of the dat[] array, reallocing to make it bigger
+ if necessary. */
+void
+add_dat (mp_size_t size, double d)
+{
+#define ALLOCDAT_STEP 500
+
+ ASSERT_ALWAYS (ndat <= allocdat);
+
+ if (ndat == allocdat)
+ {
+ dat = (struct dat_t *) __gmp_allocate_or_reallocate
+ (dat, allocdat * sizeof(dat[0]),
+ (allocdat+ALLOCDAT_STEP) * sizeof(dat[0]));
+ allocdat += ALLOCDAT_STEP;
+ }
+
+ dat[ndat].size = size;
+ dat[ndat].d = d;
+ ndat++;
+}
+
+
+/* Return the threshold size based on the data accumulated. */
+mp_size_t
+analyze_dat (int final)
+{
+ double x, min_x;
+ int j, min_j;
+
+ /* If the threshold is set at dat[0].size, any positive values are bad. */
+ x = 0.0;
+ for (j = 0; j < ndat; j++)
+ if (dat[j].d > 0.0)
+ x += dat[j].d;
+
+ if (option_trace >= 2 && final)
+ {
+ printf ("\n");
+ printf ("x is the sum of the badness from setting thresh at given size\n");
+ printf (" (minimum x is sought)\n");
+ printf ("size=%ld first x=%.4f\n", (long) dat[j].size, x);
+ }
+
+ min_x = x;
+ min_j = 0;
+
+
+ /* When stepping to the next dat[j].size, positive values are no longer
+ bad (so subtracted), negative values become bad (so add the absolute
+ value, meaning subtract). */
+ for (j = 0; j < ndat; x -= dat[j].d, j++)
+ {
+ if (option_trace >= 2 && final)
+ printf ("size=%ld x=%.4f\n", (long) dat[j].size, x);
+
+ if (x < min_x)
+ {
+ min_x = x;
+ min_j = j;
+ }
+ }
+
+ return min_j;
+}
+
+
+/* Measuring for recompiled mpn/generic/div_qr_1.c,
+ * mpn/generic/divrem_1.c, mpn/generic/mod_1.c and mpz/fac_ui.c */
+
+mp_limb_t mpn_div_qr_1_tune (mp_ptr, mp_limb_t *, mp_srcptr, mp_size_t, mp_limb_t);
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+mp_limb_t mpn_divrem_1_tune (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_mod_1_tune (mp_srcptr, mp_size_t, mp_limb_t);
+void mpz_fac_ui_tune (mpz_ptr, unsigned long);
+
+#if defined (__cplusplus)
+}
+#endif
+
+double
+speed_mpn_mod_1_tune (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_tune);
+}
+double
+speed_mpn_divrem_1_tune (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);
+}
+double
+speed_mpz_fac_ui_tune (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune);
+}
+double
+speed_mpn_div_qr_1_tune (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1_tune);
+}
+
+double
+tuneup_measure (speed_function_t fun,
+ const struct param_t *param,
+ struct speed_params *s)
+{
+ static struct param_t dummy;
+ double t;
+ TMP_DECL;
+
+ if (! param)
+ param = &dummy;
+
+ s->size += param->size_extra;
+
+ TMP_MARK;
+ SPEED_TMP_ALLOC_LIMBS (s->xp, s->size, 0);
+ SPEED_TMP_ALLOC_LIMBS (s->yp, s->size, 0);
+
+ mpn_random (s->xp, s->size);
+ mpn_random (s->yp, s->size);
+
+ switch (param->data_high) {
+ case DATA_HIGH_LT_R:
+ s->xp[s->size-1] %= s->r;
+ s->yp[s->size-1] %= s->r;
+ break;
+ case DATA_HIGH_GE_R:
+ s->xp[s->size-1] |= s->r;
+ s->yp[s->size-1] |= s->r;
+ break;
+ }
+
+ t = speed_measure (fun, s);
+
+ s->size -= param->size_extra;
+
+ TMP_FREE;
+ return t;
+}
+
+
+#define PRINT_WIDTH 31
+
+void
+print_define_start (const char *name)
+{
+ printf ("#define %-*s ", PRINT_WIDTH, name);
+ if (option_trace)
+ printf ("...\n");
+}
+
+void
+print_define_end_remark (const char *name, mp_size_t value, const char *remark)
+{
+ if (option_trace)
+ printf ("#define %-*s ", PRINT_WIDTH, name);
+
+ if (value == MP_SIZE_T_MAX)
+ printf ("MP_SIZE_T_MAX");
+ else
+ printf ("%5ld", (long) value);
+
+ if (remark != NULL)
+ printf (" /* %s */", remark);
+ printf ("\n");
+ fflush (stdout);
+}
+
+void
+print_define_end (const char *name, mp_size_t value)
+{
+ const char *remark;
+ if (value == MP_SIZE_T_MAX)
+ remark = "never";
+ else if (value == 0)
+ remark = "always";
+ else
+ remark = NULL;
+ print_define_end_remark (name, value, remark);
+}
+
+void
+print_define (const char *name, mp_size_t value)
+{
+ print_define_start (name);
+ print_define_end (name, value);
+}
+
+void
+print_define_remark (const char *name, mp_size_t value, const char *remark)
+{
+ print_define_start (name);
+ print_define_end_remark (name, value, remark);
+}
+
+void
+print_define_with_speedup (const char *name, mp_size_t value,
+ mp_size_t runner_up, double speedup)
+{
+ char buf[100];
+ snprintf (buf, sizeof(buf), "%.2f%% faster than %ld",
+ 100.0 * (speedup - 1), runner_up);
+ print_define_remark (name, value, buf);
+}
+
+void
+one (mp_size_t *threshold, struct param_t *param)
+{
+ int since_positive, since_thresh_change;
+ int thresh_idx, new_thresh_idx;
+
+#define DEFAULT(x,n) do { if (! (x)) (x) = (n); } while (0)
+
+ DEFAULT (param->function_fudge, 1.0);
+ DEFAULT (param->function2, param->function);
+ DEFAULT (param->step_factor, 0.01); /* small steps by default */
+ DEFAULT (param->step, 1); /* small steps by default */
+ DEFAULT (param->stop_since_change, 80);
+ DEFAULT (param->stop_factor, 1.2);
+ DEFAULT (param->min_size, 10);
+ DEFAULT (param->max_size, DEFAULT_MAX_SIZE);
+
+ if (param->check_size != 0)
+ {
+ double t1, t2;
+ s.size = param->check_size;
+
+ *threshold = s.size+1;
+ t1 = tuneup_measure (param->function, param, &s);
+
+ *threshold = s.size;
+ t2 = tuneup_measure (param->function2, param, &s);
+ if (t1 == -1.0 || t2 == -1.0)
+ {
+ printf ("Oops, can't run both functions at size %ld\n",
+ (long) s.size);
+ abort ();
+ }
+ t1 *= param->function_fudge;
+
+ /* ask that t2 is at least 4% below t1 */
+ if (t1 < t2*1.04)
+ {
+ if (option_trace)
+ printf ("function2 never enough faster: t1=%.9f t2=%.9f\n", t1, t2);
+ *threshold = MP_SIZE_T_MAX;
+ if (! param->noprint)
+ print_define (param->name, *threshold);
+ return;
+ }
+
+ if (option_trace >= 2)
+ printf ("function2 enough faster at size=%ld: t1=%.9f t2=%.9f\n",
+ (long) s.size, t1, t2);
+ }
+
+ if (! param->noprint || option_trace)
+ print_define_start (param->name);
+
+ ndat = 0;
+ since_positive = 0;
+ since_thresh_change = 0;
+ thresh_idx = 0;
+
+ if (option_trace >= 2)
+ {
+ printf (" algorithm-A algorithm-B ratio possible\n");
+ printf (" (seconds) (seconds) diff thresh\n");
+ }
+
+ for (s.size = param->min_size;
+ s.size < param->max_size;
+ s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), param->step))
+ {
+ double ti, tiplus1, d;
+
+ /*
+ FIXME: check minimum size requirements are met, possibly by just
+ checking for the -1 returns from the speed functions.
+ */
+
+ /* using method A at this size */
+ *threshold = s.size+1;
+ ti = tuneup_measure (param->function, param, &s);
+ if (ti == -1.0)
+ abort ();
+ ti *= param->function_fudge;
+
+ /* using method B at this size */
+ *threshold = s.size;
+ tiplus1 = tuneup_measure (param->function2, param, &s);
+ if (tiplus1 == -1.0)
+ abort ();
+
+ /* Calculate the fraction by which the one or the other routine is
+ slower. */
+ if (tiplus1 >= ti)
+ d = (tiplus1 - ti) / tiplus1; /* negative */
+ else
+ d = (tiplus1 - ti) / ti; /* positive */
+
+ add_dat (s.size, d);
+
+ new_thresh_idx = analyze_dat (0);
+
+ if (option_trace >= 2)
+ printf ("size=%ld %.9f %.9f % .4f %c %ld\n",
+ (long) s.size, ti, tiplus1, d,
+ ti > tiplus1 ? '#' : ' ',
+ (long) dat[new_thresh_idx].size);
+
+ /* Stop if the last time method i was faster was more than a
+ certain number of measurements ago. */
+#define STOP_SINCE_POSITIVE 200
+ if (d >= 0)
+ since_positive = 0;
+ else
+ if (++since_positive > STOP_SINCE_POSITIVE)
+ {
+ if (option_trace >= 1)
+ printf ("stopped due to since_positive (%d)\n",
+ STOP_SINCE_POSITIVE);
+ break;
+ }
+
+ /* Stop if method A has become slower by a certain factor. */
+ if (ti >= tiplus1 * param->stop_factor)
+ {
+ if (option_trace >= 1)
+ printf ("stopped due to ti >= tiplus1 * factor (%.1f)\n",
+ param->stop_factor);
+ break;
+ }
+
+ /* Stop if the threshold implied hasn't changed in a certain
+ number of measurements. (It's this condition that usually
+ stops the loop.) */
+ if (thresh_idx != new_thresh_idx)
+ since_thresh_change = 0, thresh_idx = new_thresh_idx;
+ else
+ if (++since_thresh_change > param->stop_since_change)
+ {
+ if (option_trace >= 1)
+ printf ("stopped due to since_thresh_change (%d)\n",
+ param->stop_since_change);
+ break;
+ }
+
+ /* Stop if the threshold implied is more than a certain number of
+ measurements ago. */
+#define STOP_SINCE_AFTER 500
+ if (ndat - thresh_idx > STOP_SINCE_AFTER)
+ {
+ if (option_trace >= 1)
+ printf ("stopped due to ndat - thresh_idx > amount (%d)\n",
+ STOP_SINCE_AFTER);
+ break;
+ }
+
+ /* Stop when the size limit is reached before the end of the
+ crossover, but only show this as an error for >= the default max
+ size. FIXME: Maybe should make it a param choice whether this is
+ an error. */
+ if (s.size >= param->max_size && param->max_size >= DEFAULT_MAX_SIZE)
+ {
+ fprintf (stderr, "%s\n", param->name);
+ fprintf (stderr, "sizes %ld to %ld total %d measurements\n",
+ (long) dat[0].size, (long) dat[ndat-1].size, ndat);
+ fprintf (stderr, " max size reached before end of crossover\n");
+ break;
+ }
+ }
+
+ if (option_trace >= 1)
+ printf ("sizes %ld to %ld total %d measurements\n",
+ (long) dat[0].size, (long) dat[ndat-1].size, ndat);
+
+ *threshold = dat[analyze_dat (1)].size;
+
+ if (param->min_is_always)
+ {
+ if (*threshold == param->min_size)
+ *threshold = 0;
+ }
+
+ if (! param->noprint || option_trace)
+ print_define_end (param->name, *threshold);
+}
+
+/* Time N different FUNCTIONS with the same parameters and size, to
+ select the fastest. Since *_METHOD defines start numbering from
+ one, if functions[i] is fastest, the value of the define is i+1.
+ Also output a comment with speedup compared to the next fastest
+ function. The NAME argument is used only for trace output.
+
+ Returns the index of the fastest function.
+*/
+int
+one_method (int n, speed_function_t *functions,
+ const char *name, const char *define,
+ const struct param_t *param)
+{
+ double *t;
+ int i;
+ int method;
+ int method_runner_up;
+
+ TMP_DECL;
+ TMP_MARK;
+ t = (double*) TMP_ALLOC (n * sizeof (*t));
+
+ for (i = 0; i < n; i++)
+ {
+ t[i] = tuneup_measure (functions[i], param, &s);
+ if (option_trace >= 1)
+ printf ("size=%ld, %s, method %d %.9f\n",
+ (long) s.size, name, i + 1, t[i]);
+ if (t[i] == -1.0)
+ {
+ printf ("Oops, can't measure all %s methods\n", name);
+ abort ();
+ }
+ }
+ method = 0;
+ for (i = 1; i < n; i++)
+ if (t[i] < t[method])
+ method = i;
+
+ method_runner_up = (method == 0);
+ for (i = 0; i < n; i++)
+ if (i != method && t[i] < t[method_runner_up])
+ method_runner_up = i;
+
+ print_define_with_speedup (define, method + 1, method_runner_up + 1,
+ t[method_runner_up] / t[method]);
+
+ TMP_FREE;
+ return method;
+}
+
+
+/* Special probing for the fft thresholds. The size restrictions on the
+ FFTs mean the graph of time vs size has a step effect. See this for
+ example using
+
+ ./speed -s 4096-16384 -t 128 -P foo mpn_mul_fft.8 mpn_mul_fft.9
+ gnuplot foo.gnuplot
+
+ The current approach is to compare routines at the midpoint of relevant
+ steps. Arguably a more sophisticated system of threshold data is wanted
+ if this step effect remains. */
+
+struct fft_param_t {
+ const char *table_name;
+ const char *threshold_name;
+ const char *modf_threshold_name;
+ mp_size_t *p_threshold;
+ mp_size_t *p_modf_threshold;
+ mp_size_t first_size;
+ mp_size_t max_size;
+ speed_function_t function;
+ speed_function_t mul_modf_function;
+ speed_function_t mul_function;
+ mp_size_t sqr;
+};
+
+
+/* mpn_mul_fft requires pl a multiple of 2^k limbs, but with
+ N=pl*BIT_PER_MP_LIMB it internally also pads out so N/2^k is a multiple
+ of 2^(k-1) bits. */
+
+mp_size_t
+fft_step_size (int k)
+{
+ mp_size_t step;
+
+ step = MAX ((mp_size_t) 1 << (k-1), GMP_LIMB_BITS) / GMP_LIMB_BITS;
+ step *= (mp_size_t) 1 << k;
+
+ if (step <= 0)
+ {
+ printf ("Can't handle k=%d\n", k);
+ abort ();
+ }
+
+ return step;
+}
+
+mp_size_t
+fft_next_size (mp_size_t pl, int k)
+{
+ mp_size_t m = fft_step_size (k);
+
+/* printf ("[k=%d %ld] %ld ->", k, m, pl); */
+
+ if (pl == 0 || (pl & (m-1)) != 0)
+ pl = (pl | (m-1)) + 1;
+
+/* printf (" %ld\n", pl); */
+ return pl;
+}
+
+#define NMAX_DEFAULT 1000000
+#define MAX_REPS 25
+#define MIN_REPS 5
+
+static inline size_t
+mpn_mul_fft_lcm (size_t a, unsigned int k)
+{
+ unsigned int l = k;
+
+ while (a % 2 == 0 && k > 0)
+ {
+ a >>= 1;
+ k--;
+ }
+ return a << l;
+}
+
+mp_size_t
+fftfill (mp_size_t pl, int k, int sqr)
+{
+ mp_size_t maxLK;
+ mp_bitcnt_t N, Nprime, nprime, M;
+
+ N = pl * GMP_NUMB_BITS;
+ M = N >> k;
+
+ maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k);
+
+ Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
+ nprime = Nprime / GMP_NUMB_BITS;
+ if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+ {
+ size_t K2;
+ for (;;)
+ {
+ K2 = 1L << mpn_fft_best_k (nprime, sqr);
+ if ((nprime & (K2 - 1)) == 0)
+ break;
+ nprime = (nprime + K2 - 1) & -K2;
+ Nprime = nprime * GMP_LIMB_BITS;
+ }
+ }
+ ASSERT_ALWAYS (nprime < pl);
+
+ return Nprime;
+}
+
+static int
+compare_double (const void *ap, const void *bp)
+{
+ double a = * (const double *) ap;
+ double b = * (const double *) bp;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+double
+median (double *times, int n)
+{
+ qsort (times, n, sizeof (double), compare_double);
+ return times[n/2];
+}
+
+#define FFT_CACHE_SIZE 25
+typedef struct fft_cache
+{
+ mp_size_t n;
+ double time;
+} fft_cache_t;
+
+fft_cache_t fft_cache[FFT_CACHE_SIZE];
+
+double
+cached_measure (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, int k,
+ int n_measurements)
+{
+ int i;
+ double t, ttab[MAX_REPS];
+
+ if (fft_cache[k].n == n)
+ return fft_cache[k].time;
+
+ for (i = 0; i < n_measurements; i++)
+ {
+ speed_starttime ();
+ mpn_mul_fft (rp, n, ap, n, bp, n, k);
+ ttab[i] = speed_endtime ();
+ }
+
+ t = median (ttab, n_measurements);
+ fft_cache[k].n = n;
+ fft_cache[k].time = t;
+ return t;
+}
+
+#define INSERT_FFTTAB(idx, nval, kval) \
+ do { \
+ fft_tab[idx].n = nval; \
+ fft_tab[idx].k = kval; \
+ fft_tab[idx+1].n = (1 << 27) - 1; /* sentinel, 27b wide field */ \
+ fft_tab[idx+1].k = (1 << 5) - 1; \
+ } while (0)
+
+int
+fftmes (mp_size_t nmin, mp_size_t nmax, int initial_k, struct fft_param_t *p, int idx, int print)
+{
+ mp_size_t n, n1, prev_n1;
+ int k, best_k, last_best_k, kmax;
+ int eff, prev_eff;
+ double t0, t1;
+ int n_measurements;
+ mp_limb_t *ap, *bp, *rp;
+ mp_size_t alloc;
+ struct fft_table_nk *fft_tab;
+
+ fft_tab = mpn_fft_table3[p->sqr];
+
+ for (k = 0; k < FFT_CACHE_SIZE; k++)
+ fft_cache[k].n = 0;
+
+ if (nmin < (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
+ {
+ nmin = (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD);
+ }
+
+ if (print)
+ printf ("#define %s%*s", p->table_name, 38, "");
+
+ if (idx == 0)
+ {
+ INSERT_FFTTAB (0, nmin, initial_k);
+
+ if (print)
+ {
+ printf ("\\\n { ");
+ printf ("{%7u,%2u}", fft_tab[0].n, fft_tab[0].k);
+ }
+
+ idx = 1;
+ }
+
+ ap = (mp_ptr) malloc (sizeof (mp_limb_t));
+ if (p->sqr)
+ bp = ap;
+ else
+ bp = (mp_ptr) malloc (sizeof (mp_limb_t));
+ rp = (mp_ptr) malloc (sizeof (mp_limb_t));
+ alloc = 1;
+
+ /* Round n to comply to initial k value */
+ n = (nmin + ((1ul << initial_k) - 1)) & (MP_SIZE_T_MAX << initial_k);
+
+ n_measurements = (18 - initial_k) | 1;
+ n_measurements = MAX (n_measurements, MIN_REPS);
+ n_measurements = MIN (n_measurements, MAX_REPS);
+
+ last_best_k = initial_k;
+ best_k = initial_k;
+
+ while (n < nmax)
+ {
+ int start_k, end_k;
+
+ /* Assume the current best k is best until we hit its next FFT step. */
+ t0 = 99999;
+
+ prev_n1 = n + 1;
+
+ start_k = MAX (4, best_k - 4);
+ end_k = MIN (24, best_k + 4);
+ for (k = start_k; k <= end_k; k++)
+ {
+ n1 = mpn_fft_next_size (prev_n1, k);
+
+ eff = 200 * (n1 * GMP_NUMB_BITS >> k) / fftfill (n1, k, p->sqr);
+
+ if (eff < 70) /* avoid measuring too slow fft:s */
+ continue;
+
+ if (n1 > alloc)
+ {
+ alloc = n1;
+ if (p->sqr)
+ {
+ ap = (mp_ptr) realloc (ap, sizeof (mp_limb_t));
+ rp = (mp_ptr) realloc (rp, sizeof (mp_limb_t));
+ ap = bp = (mp_ptr) realloc (ap, alloc * sizeof (mp_limb_t));
+ mpn_random (ap, alloc);
+ rp = (mp_ptr) realloc (rp, alloc * sizeof (mp_limb_t));
+ }
+ else
+ {
+ ap = (mp_ptr) realloc (ap, sizeof (mp_limb_t));
+ bp = (mp_ptr) realloc (bp, sizeof (mp_limb_t));
+ rp = (mp_ptr) realloc (rp, sizeof (mp_limb_t));
+ ap = (mp_ptr) realloc (ap, alloc * sizeof (mp_limb_t));
+ mpn_random (ap, alloc);
+ bp = (mp_ptr) realloc (bp, alloc * sizeof (mp_limb_t));
+ mpn_random (bp, alloc);
+ rp = (mp_ptr) realloc (rp, alloc * sizeof (mp_limb_t));
+ }
+ }
+
+ t1 = cached_measure (rp, ap, bp, n1, k, n_measurements);
+
+ if (t1 * n_measurements > 0.3)
+ n_measurements -= 2;
+ n_measurements = MAX (n_measurements, MIN_REPS);
+
+ if (t1 < t0)
+ {
+ best_k = k;
+ t0 = t1;
+ }
+ }
+
+ n1 = mpn_fft_next_size (prev_n1, best_k);
+
+ if (last_best_k != best_k)
+ {
+ ASSERT_ALWAYS ((prev_n1 & ((1ul << last_best_k) - 1)) == 1);
+
+ if (idx >= FFT_TABLE3_SIZE)
+ {
+ printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
+ abort ();
+ }
+ INSERT_FFTTAB (idx, prev_n1 >> last_best_k, best_k);
+
+ if (print)
+ {
+ printf (", ");
+ if (idx % 4 == 0)
+ printf ("\\\n ");
+ printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
+ }
+
+ if (option_trace >= 2)
+ {
+ printf ("{%lu,%u}\n", prev_n1, best_k);
+ fflush (stdout);
+ }
+
+ last_best_k = best_k;
+ idx++;
+ }
+
+ for (;;)
+ {
+ prev_n1 = n1;
+ prev_eff = fftfill (prev_n1, best_k, p->sqr);
+ n1 = mpn_fft_next_size (prev_n1 + 1, best_k);
+ eff = fftfill (n1, best_k, p->sqr);
+
+ if (eff != prev_eff)
+ break;
+ }
+
+ n = prev_n1;
+ }
+
+ kmax = sizeof (mp_size_t) * 4; /* GMP_MP_SIZE_T_BITS / 2 */
+ kmax = MIN (kmax, 25-1);
+ for (k = last_best_k + 1; k <= kmax; k++)
+ {
+ if (idx >= FFT_TABLE3_SIZE)
+ {
+ printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
+ abort ();
+ }
+ INSERT_FFTTAB (idx, ((1ul << (2*k-2)) + 1) >> (k-1), k);
+
+ if (print)
+ {
+ printf (", ");
+ if (idx % 4 == 0)
+ printf ("\\\n ");
+ printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
+ }
+
+ idx++;
+ }
+
+ if (print)
+ printf (" }\n");
+
+ free (ap);
+ if (! p->sqr)
+ free (bp);
+ free (rp);
+
+ return idx;
+}
+
+void
+fft (struct fft_param_t *p)
+{
+ mp_size_t size;
+ int k, idx, initial_k;
+
+ /*** Generate MUL_FFT_MODF_THRESHOLD / SQR_FFT_MODF_THRESHOLD ***/
+
+#if 1
+ {
+ /* Use plain one() mechanism, for some reasonable initial values of k. The
+ advantage is that we don't depend on mpn_fft_table3, which can therefore
+ leave it completely uninitialized. */
+
+ static struct param_t param;
+ mp_size_t thres, best_thres;
+ int best_k;
+ char buf[20];
+
+ best_thres = MP_SIZE_T_MAX;
+ best_k = -1;
+
+ for (k = 5; k <= 7; k++)
+ {
+ param.name = p->modf_threshold_name;
+ param.min_size = 100;
+ param.max_size = 2000;
+ param.function = p->mul_function;
+ param.step_factor = 0.0;
+ param.step = 4;
+ param.function2 = p->mul_modf_function;
+ param.noprint = 1;
+ s.r = k;
+ one (&thres, &param);
+ if (thres < best_thres)
+ {
+ best_thres = thres;
+ best_k = k;
+ }
+ }
+
+ *(p->p_modf_threshold) = best_thres;
+ sprintf (buf, "k = %d", best_k);
+ print_define_remark (p->modf_threshold_name, best_thres, buf);
+ initial_k = best_k;
+ }
+#else
+ size = p->first_size;
+ for (;;)
+ {
+ double tk, tm;
+
+ size = mpn_fft_next_size (size+1, mpn_fft_best_k (size+1, p->sqr));
+ k = mpn_fft_best_k (size, p->sqr);
+
+ if (size >= p->max_size)
+ break;
+
+ s.size = size + fft_step_size (k) / 2;
+ s.r = k;
+ tk = tuneup_measure (p->mul_modf_function, NULL, &s);
+ if (tk == -1.0)
+ abort ();
+
+ tm = tuneup_measure (p->mul_function, NULL, &s);
+ if (tm == -1.0)
+ abort ();
+
+ if (option_trace >= 2)
+ printf ("at %ld size=%ld k=%d %.9f size=%ld modf %.9f\n",
+ (long) size,
+ (long) size + fft_step_size (k) / 2, k, tk,
+ (long) s.size, tm);
+
+ if (tk < tm)
+ {
+ *p->p_modf_threshold = s.size;
+ print_define (p->modf_threshold_name, *p->p_modf_threshold);
+ break;
+ }
+ }
+ initial_k = ?;
+#endif
+
+ /*** Generate MUL_FFT_TABLE3 / SQR_FFT_TABLE3 ***/
+
+ idx = fftmes (*p->p_modf_threshold, p->max_size, initial_k, p, 0, 1);
+ printf ("#define %s_SIZE %d\n", p->table_name, idx);
+
+ /*** Generate MUL_FFT_THRESHOLD / SQR_FFT_THRESHOLD ***/
+
+ size = 2 * *p->p_modf_threshold; /* OK? */
+ for (;;)
+ {
+ double tk, tm;
+ mp_size_t mulmod_size, mul_size;;
+
+ if (size >= p->max_size)
+ break;
+
+ mulmod_size = mpn_mulmod_bnm1_next_size (2 * (size + 1)) / 2;
+ mul_size = (size + mulmod_size) / 2; /* middle of step */
+
+ s.size = mulmod_size;
+ tk = tuneup_measure (p->function, NULL, &s);
+ if (tk == -1.0)
+ abort ();
+
+ s.size = mul_size;
+ tm = tuneup_measure (p->mul_function, NULL, &s);
+ if (tm == -1.0)
+ abort ();
+
+ if (option_trace >= 2)
+ printf ("at %ld size=%ld %.9f size=%ld mul %.9f\n",
+ (long) size,
+ (long) mulmod_size, tk,
+ (long) mul_size, tm);
+
+ size = mulmod_size;
+
+ if (tk < tm)
+ {
+ *p->p_threshold = s.size;
+ print_define (p->threshold_name, *p->p_threshold);
+ break;
+ }
+ }
+}
+
+/* Compare mpn_mul_1 to whatever fast exact single-limb division we have. This
+ is currently mpn_divexact_1, but will become mpn_bdiv_1_qr_pi2 or somesuch.
+ This is used in get_str and set_str. */
+void
+relspeed_div_1_vs_mul_1 (void)
+{
+ const size_t max_opsize = 100;
+ mp_size_t n;
+ long j;
+ mp_limb_t rp[max_opsize];
+ mp_limb_t ap[max_opsize];
+ double multime, divtime;
+
+ mpn_random (ap, max_opsize);
+
+ multime = 0;
+ for (n = max_opsize; n > 1; n--)
+ {
+ mpn_mul_1 (rp, ap, n, MP_BASES_BIG_BASE_10);
+ speed_starttime ();
+ for (j = speed_precision; j != 0 ; j--)
+ mpn_mul_1 (rp, ap, n, MP_BASES_BIG_BASE_10);
+ multime += speed_endtime () / n;
+ }
+
+ divtime = 0;
+ for (n = max_opsize; n > 1; n--)
+ {
+ /* Make input divisible for good measure. */
+ ap[n - 1] = mpn_mul_1 (ap, ap, n - 1, MP_BASES_BIG_BASE_10);
+
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
+ mpn_pi1_bdiv_q_1 (rp, ap, n, MP_BASES_BIG_BASE_10,
+ MP_BASES_BIG_BASE_BINVERTED_10,
+ MP_BASES_BIG_BASE_CTZ_10);
+#else
+ mpn_divexact_1 (rp, ap, n, MP_BASES_BIG_BASE_10);
+#endif
+ speed_starttime ();
+ for (j = speed_precision; j != 0 ; j--)
+ {
+#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 || ! HAVE_NATIVE_mpn_divexact_1
+ mpn_pi1_bdiv_q_1 (rp, ap, n, MP_BASES_BIG_BASE_10,
+ MP_BASES_BIG_BASE_BINVERTED_10,
+ MP_BASES_BIG_BASE_CTZ_10);
+#else
+ mpn_divexact_1 (rp, ap, n, MP_BASES_BIG_BASE_10);
+#endif
+ }
+ divtime += speed_endtime () / n;
+ }
+
+ print_define ("DIV_1_VS_MUL_1_PERCENT", (int) (100 * divtime/multime));
+}
+
+
+/* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
+ giving wrong results. */
+void
+tune_mul_n (void)
+{
+ static struct param_t param;
+ mp_size_t next_toom_start;
+ int something_changed;
+
+ param.function = speed_mpn_mul_n;
+
+ param.name = "MUL_TOOM22_THRESHOLD";
+ param.min_size = MAX (4, MPN_TOOM22_MUL_MINSIZE);
+ param.max_size = MUL_TOOM22_THRESHOLD_LIMIT-1;
+ one (&mul_toom22_threshold, &param);
+
+ param.noprint = 1;
+
+ /* Threshold sequence loop. Disable functions that would be used in a very
+ narrow range, re-measuring things when that happens. */
+ something_changed = 1;
+ while (something_changed)
+ {
+ something_changed = 0;
+
+ next_toom_start = mul_toom22_threshold;
+
+ if (mul_toom33_threshold != 0)
+ {
+ param.name = "MUL_TOOM33_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM33_MUL_MINSIZE);
+ param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
+ one (&mul_toom33_threshold, &param);
+
+ if (next_toom_start * 1.05 >= mul_toom33_threshold)
+ {
+ mul_toom33_threshold = 0;
+ something_changed = 1;
+ }
+ }
+
+ next_toom_start = MAX (next_toom_start, mul_toom33_threshold);
+
+ if (mul_toom44_threshold != 0)
+ {
+ param.name = "MUL_TOOM44_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM44_MUL_MINSIZE);
+ param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
+ one (&mul_toom44_threshold, &param);
+
+ if (next_toom_start * 1.05 >= mul_toom44_threshold)
+ {
+ mul_toom44_threshold = 0;
+ something_changed = 1;
+ }
+ }
+
+ next_toom_start = MAX (next_toom_start, mul_toom44_threshold);
+
+ if (mul_toom6h_threshold != 0)
+ {
+ param.name = "MUL_TOOM6H_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM6H_MUL_MINSIZE);
+ param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
+ one (&mul_toom6h_threshold, &param);
+
+ if (next_toom_start * 1.05 >= mul_toom6h_threshold)
+ {
+ mul_toom6h_threshold = 0;
+ something_changed = 1;
+ }
+ }
+
+ next_toom_start = MAX (next_toom_start, mul_toom6h_threshold);
+
+ if (mul_toom8h_threshold != 0)
+ {
+ param.name = "MUL_TOOM8H_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM8H_MUL_MINSIZE);
+ param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
+ one (&mul_toom8h_threshold, &param);
+
+ if (next_toom_start * 1.05 >= mul_toom8h_threshold)
+ {
+ mul_toom8h_threshold = 0;
+ something_changed = 1;
+ }
+ }
+ }
+
+ print_define ("MUL_TOOM33_THRESHOLD", MUL_TOOM33_THRESHOLD);
+ print_define ("MUL_TOOM44_THRESHOLD", MUL_TOOM44_THRESHOLD);
+ print_define ("MUL_TOOM6H_THRESHOLD", MUL_TOOM6H_THRESHOLD);
+ print_define ("MUL_TOOM8H_THRESHOLD", MUL_TOOM8H_THRESHOLD);
+
+ /* disabled until tuned */
+ MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
+}
+
+void
+tune_mul (void)
+{
+ static struct param_t param;
+ mp_size_t thres;
+
+ param.noprint = 1;
+
+ param.function = speed_mpn_toom32_for_toom43_mul;
+ param.function2 = speed_mpn_toom43_for_toom32_mul;
+ param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
+ param.min_size = MPN_TOOM43_MUL_MINSIZE * 24 / 17;
+ one (&thres, &param);
+ mul_toom32_to_toom43_threshold = thres * 17 / 24;
+ print_define ("MUL_TOOM32_TO_TOOM43_THRESHOLD", mul_toom32_to_toom43_threshold);
+
+ param.function = speed_mpn_toom32_for_toom53_mul;
+ param.function2 = speed_mpn_toom53_for_toom32_mul;
+ param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
+ param.min_size = MPN_TOOM53_MUL_MINSIZE * 30 / 19;
+ one (&thres, &param);
+ mul_toom32_to_toom53_threshold = thres * 19 / 30;
+ print_define ("MUL_TOOM32_TO_TOOM53_THRESHOLD", mul_toom32_to_toom53_threshold);
+
+ param.function = speed_mpn_toom42_for_toom53_mul;
+ param.function2 = speed_mpn_toom53_for_toom42_mul;
+ param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
+ param.min_size = MPN_TOOM53_MUL_MINSIZE * 20 / 11;
+ one (&thres, &param);
+ mul_toom42_to_toom53_threshold = thres * 11 / 20;
+ print_define ("MUL_TOOM42_TO_TOOM53_THRESHOLD", mul_toom42_to_toom53_threshold);
+
+ param.function = speed_mpn_toom42_mul;
+ param.function2 = speed_mpn_toom63_mul;
+ param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
+ param.min_size = MPN_TOOM63_MUL_MINSIZE * 2;
+ one (&thres, &param);
+ mul_toom42_to_toom63_threshold = thres / 2;
+ print_define ("MUL_TOOM42_TO_TOOM63_THRESHOLD", mul_toom42_to_toom63_threshold);
+
+ /* Use ratio 5/6 when measuring, the middle of the range 2/3 to 1. */
+ param.function = speed_mpn_toom43_for_toom54_mul;
+ param.function2 = speed_mpn_toom54_for_toom43_mul;
+ param.name = "MUL_TOOM43_TO_TOOM54_THRESHOLD";
+ param.min_size = MPN_TOOM54_MUL_MINSIZE * 6 / 5;
+ one (&thres, &param);
+ mul_toom43_to_toom54_threshold = thres * 5 / 6;
+ print_define ("MUL_TOOM43_TO_TOOM54_THRESHOLD", mul_toom43_to_toom54_threshold);
+}
+
+
+void
+tune_mullo (void)
+{
+ static struct param_t param;
+
+ param.function = speed_mpn_mullo_n;
+
+ param.name = "MULLO_BASECASE_THRESHOLD";
+ param.min_size = 2;
+ param.min_is_always = 1;
+ param.max_size = MULLO_BASECASE_THRESHOLD_LIMIT-1;
+ param.stop_factor = 1.5;
+ param.noprint = 1;
+ one (&mullo_basecase_threshold, &param);
+
+ param.name = "MULLO_DC_THRESHOLD";
+ param.min_size = 8;
+ param.min_is_always = 0;
+ param.max_size = 1000;
+ one (&mullo_dc_threshold, &param);
+
+ if (mullo_basecase_threshold >= mullo_dc_threshold)
+ {
+ print_define ("MULLO_BASECASE_THRESHOLD", mullo_dc_threshold);
+ print_define_remark ("MULLO_DC_THRESHOLD", 0, "never mpn_mullo_basecase");
+ }
+ else
+ {
+ print_define ("MULLO_BASECASE_THRESHOLD", mullo_basecase_threshold);
+ print_define ("MULLO_DC_THRESHOLD", mullo_dc_threshold);
+ }
+
+ if (WANT_FFT && mul_fft_threshold < MP_SIZE_T_MAX / 2)
+ {
+ param.name = "MULLO_MUL_N_THRESHOLD";
+ param.min_size = mullo_dc_threshold;
+ param.max_size = 2 * mul_fft_threshold;
+ param.noprint = 0;
+ param.step_factor = 0.03;
+ one (&mullo_mul_n_threshold, &param);
+ }
+ else
+ print_define_remark ("MULLO_MUL_N_THRESHOLD", MP_SIZE_T_MAX,
+ "without FFT use mullo forever");
+}
+
+void
+tune_sqrlo (void)
+{
+ static struct param_t param;
+
+ param.function = speed_mpn_sqrlo;
+
+ param.name = "SQRLO_BASECASE_THRESHOLD";
+ param.min_size = 2;
+ param.min_is_always = 1;
+ param.max_size = SQRLO_BASECASE_THRESHOLD_LIMIT-1;
+ param.stop_factor = 1.5;
+ param.noprint = 1;
+ one (&sqrlo_basecase_threshold, &param);
+
+ param.name = "SQRLO_DC_THRESHOLD";
+ param.min_size = 8;
+ param.min_is_always = 0;
+ param.max_size = SQRLO_DC_THRESHOLD_LIMIT-1;
+ one (&sqrlo_dc_threshold, &param);
+
+ if (sqrlo_basecase_threshold >= sqrlo_dc_threshold)
+ {
+ print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_dc_threshold);
+ print_define_remark ("SQRLO_DC_THRESHOLD", 0, "never mpn_sqrlo_basecase");
+ }
+ else
+ {
+ print_define ("SQRLO_BASECASE_THRESHOLD", sqrlo_basecase_threshold);
+ print_define ("SQRLO_DC_THRESHOLD", sqrlo_dc_threshold);
+ }
+
+ if (WANT_FFT && sqr_fft_threshold < MP_SIZE_T_MAX / 2)
+ {
+ param.name = "SQRLO_SQR_THRESHOLD";
+ param.min_size = sqrlo_dc_threshold;
+ param.max_size = 2 * sqr_fft_threshold;
+ param.noprint = 0;
+ param.step_factor = 0.03;
+ one (&sqrlo_sqr_threshold, &param);
+ }
+ else
+ print_define_remark ("SQRLO_SQR_THRESHOLD", MP_SIZE_T_MAX,
+ "without FFT use sqrlo forever");
+}
+
+void
+tune_mulmid (void)
+{
+ static struct param_t param;
+
+ param.name = "MULMID_TOOM42_THRESHOLD";
+ param.function = speed_mpn_mulmid_n;
+ param.min_size = 4;
+ param.max_size = 100;
+ one (&mulmid_toom42_threshold, &param);
+}
+
+void
+tune_mulmod_bnm1 (void)
+{
+ static struct param_t param;
+
+ param.name = "MULMOD_BNM1_THRESHOLD";
+ param.function = speed_mpn_mulmod_bnm1;
+ param.min_size = 4;
+ param.max_size = 100;
+ one (&mulmod_bnm1_threshold, &param);
+}
+
+void
+tune_sqrmod_bnm1 (void)
+{
+ static struct param_t param;
+
+ param.name = "SQRMOD_BNM1_THRESHOLD";
+ param.function = speed_mpn_sqrmod_bnm1;
+ param.min_size = 4;
+ param.max_size = 100;
+ one (&sqrmod_bnm1_threshold, &param);
+}
+
+
+/* Start the basecase from 3, since 1 is a special case, and if mul_basecase
+ is faster only at size==2 then we don't want to bother with extra code
+ just for that. Start karatsuba from 4 same as MUL above. */
+
+void
+tune_sqr (void)
+{
+ /* disabled until tuned */
+ SQR_FFT_THRESHOLD = MP_SIZE_T_MAX;
+
+ if (HAVE_NATIVE_mpn_sqr_basecase)
+ {
+ print_define_remark ("SQR_BASECASE_THRESHOLD", 0, "always (native)");
+ sqr_basecase_threshold = 0;
+ }
+ else
+ {
+ static struct param_t param;
+ param.name = "SQR_BASECASE_THRESHOLD";
+ param.function = speed_mpn_sqr;
+ param.min_size = 3;
+ param.min_is_always = 1;
+ param.max_size = TUNE_SQR_TOOM2_MAX;
+ param.noprint = 1;
+ one (&sqr_basecase_threshold, &param);
+ }
+
+ {
+ static struct param_t param;
+ param.name = "SQR_TOOM2_THRESHOLD";
+ param.function = speed_mpn_sqr;
+ param.min_size = MAX (4, MPN_TOOM2_SQR_MINSIZE);
+ param.max_size = TUNE_SQR_TOOM2_MAX;
+ param.noprint = 1;
+ one (&sqr_toom2_threshold, &param);
+
+ if (! HAVE_NATIVE_mpn_sqr_basecase
+ && sqr_toom2_threshold < sqr_basecase_threshold)
+ {
+ /* Karatsuba becomes faster than mul_basecase before
+ sqr_basecase does. Arrange for the expression
+ "BELOW_THRESHOLD (un, SQR_TOOM2_THRESHOLD))" which
+ selects mpn_sqr_basecase in mpn_sqr to be false, by setting
+ SQR_TOOM2_THRESHOLD to zero, making
+ SQR_BASECASE_THRESHOLD the toom2 threshold. */
+
+ sqr_basecase_threshold = SQR_TOOM2_THRESHOLD;
+ SQR_TOOM2_THRESHOLD = 0;
+
+ print_define_remark ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold,
+ "toom2");
+ print_define_remark ("SQR_TOOM2_THRESHOLD",SQR_TOOM2_THRESHOLD,
+ "never sqr_basecase");
+ }
+ else
+ {
+ if (! HAVE_NATIVE_mpn_sqr_basecase)
+ print_define ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold);
+ print_define ("SQR_TOOM2_THRESHOLD", SQR_TOOM2_THRESHOLD);
+ }
+ }
+
+ {
+ static struct param_t param;
+ mp_size_t next_toom_start;
+ int something_changed;
+
+ param.function = speed_mpn_sqr;
+ param.noprint = 1;
+
+ /* Threshold sequence loop. Disable functions that would be used in a very
+ narrow range, re-measuring things when that happens. */
+ something_changed = 1;
+ while (something_changed)
+ {
+ something_changed = 0;
+
+ next_toom_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
+
+ sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
+ param.name = "SQR_TOOM3_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM3_SQR_MINSIZE);
+ param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
+ one (&sqr_toom3_threshold, &param);
+
+ next_toom_start = MAX (next_toom_start, sqr_toom3_threshold);
+
+ if (sqr_toom4_threshold != 0)
+ {
+ param.name = "SQR_TOOM4_THRESHOLD";
+ sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
+ param.min_size = MAX (next_toom_start, MPN_TOOM4_SQR_MINSIZE);
+ param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
+ one (&sqr_toom4_threshold, &param);
+
+ if (next_toom_start * 1.05 >= sqr_toom4_threshold)
+ {
+ sqr_toom4_threshold = 0;
+ something_changed = 1;
+ }
+ }
+
+ next_toom_start = MAX (next_toom_start, sqr_toom4_threshold);
+
+ if (sqr_toom6_threshold != 0)
+ {
+ param.name = "SQR_TOOM6_THRESHOLD";
+ sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
+ param.min_size = MAX (next_toom_start, MPN_TOOM6_SQR_MINSIZE);
+ param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
+ one (&sqr_toom6_threshold, &param);
+
+ if (next_toom_start * 1.05 >= sqr_toom6_threshold)
+ {
+ sqr_toom6_threshold = 0;
+ something_changed = 1;
+ }
+ }
+
+ next_toom_start = MAX (next_toom_start, sqr_toom6_threshold);
+
+ if (sqr_toom8_threshold != 0)
+ {
+ param.name = "SQR_TOOM8_THRESHOLD";
+ sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
+ param.min_size = MAX (next_toom_start, MPN_TOOM8_SQR_MINSIZE);
+ param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
+ one (&sqr_toom8_threshold, &param);
+
+ if (next_toom_start * 1.05 >= sqr_toom8_threshold)
+ {
+ sqr_toom8_threshold = 0;
+ something_changed = 1;
+ }
+ }
+ }
+
+ print_define ("SQR_TOOM3_THRESHOLD", SQR_TOOM3_THRESHOLD);
+ print_define ("SQR_TOOM4_THRESHOLD", SQR_TOOM4_THRESHOLD);
+ print_define ("SQR_TOOM6_THRESHOLD", SQR_TOOM6_THRESHOLD);
+ print_define ("SQR_TOOM8_THRESHOLD", SQR_TOOM8_THRESHOLD);
+ }
+}
+
+
+void
+tune_dc_div (void)
+{
+ s.r = 0; /* clear to make speed function do 2n/n */
+ {
+ static struct param_t param;
+ param.name = "DC_DIV_QR_THRESHOLD";
+ param.function = speed_mpn_sbpi1_div_qr;
+ param.function2 = speed_mpn_dcpi1_div_qr;
+ param.min_size = 6;
+ one (&dc_div_qr_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "DC_DIVAPPR_Q_THRESHOLD";
+ param.function = speed_mpn_sbpi1_divappr_q;
+ param.function2 = speed_mpn_dcpi1_divappr_q;
+ param.min_size = 6;
+ one (&dc_divappr_q_threshold, &param);
+ }
+}
+
+static double
+speed_mpn_sbordcpi1_div_qr (struct speed_params *s)
+{
+ if (s->size < DC_DIV_QR_THRESHOLD)
+ return speed_mpn_sbpi1_div_qr (s);
+ else
+ return speed_mpn_dcpi1_div_qr (s);
+}
+
+void
+tune_mu_div (void)
+{
+ s.r = 0; /* clear to make speed function do 2n/n */
+ {
+ static struct param_t param;
+ param.name = "MU_DIV_QR_THRESHOLD";
+ param.function = speed_mpn_dcpi1_div_qr;
+ param.function2 = speed_mpn_mu_div_qr;
+ param.min_size = mul_toom22_threshold;
+ param.max_size = 5000;
+ param.step_factor = 0.02;
+ one (&mu_div_qr_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "MU_DIVAPPR_Q_THRESHOLD";
+ param.function = speed_mpn_dcpi1_divappr_q;
+ param.function2 = speed_mpn_mu_divappr_q;
+ param.min_size = mul_toom22_threshold;
+ param.max_size = 5000;
+ param.step_factor = 0.02;
+ one (&mu_divappr_q_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "MUPI_DIV_QR_THRESHOLD";
+ param.function = speed_mpn_sbordcpi1_div_qr;
+ param.function2 = speed_mpn_mupi_div_qr;
+ param.min_size = 6;
+ param.min_is_always = 1;
+ param.max_size = 1000;
+ param.step_factor = 0.02;
+ one (&mupi_div_qr_threshold, &param);
+ }
+}
+
+void
+tune_dc_bdiv (void)
+{
+ s.r = 0; /* clear to make speed function do 2n/n*/
+ {
+ static struct param_t param;
+ param.name = "DC_BDIV_QR_THRESHOLD";
+ param.function = speed_mpn_sbpi1_bdiv_qr;
+ param.function2 = speed_mpn_dcpi1_bdiv_qr;
+ param.min_size = 4;
+ one (&dc_bdiv_qr_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "DC_BDIV_Q_THRESHOLD";
+ param.function = speed_mpn_sbpi1_bdiv_q;
+ param.function2 = speed_mpn_dcpi1_bdiv_q;
+ param.min_size = 4;
+ one (&dc_bdiv_q_threshold, &param);
+ }
+}
+
+void
+tune_mu_bdiv (void)
+{
+ s.r = 0; /* clear to make speed function do 2n/n*/
+ {
+ static struct param_t param;
+ param.name = "MU_BDIV_QR_THRESHOLD";
+ param.function = speed_mpn_dcpi1_bdiv_qr;
+ param.function2 = speed_mpn_mu_bdiv_qr;
+ param.min_size = dc_bdiv_qr_threshold;
+ param.max_size = 5000;
+ param.step_factor = 0.02;
+ one (&mu_bdiv_qr_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "MU_BDIV_Q_THRESHOLD";
+ param.function = speed_mpn_dcpi1_bdiv_q;
+ param.function2 = speed_mpn_mu_bdiv_q;
+ param.min_size = dc_bdiv_q_threshold;
+ param.max_size = 5000;
+ param.step_factor = 0.02;
+ one (&mu_bdiv_q_threshold, &param);
+ }
+}
+
+void
+tune_invertappr (void)
+{
+ static struct param_t param;
+
+ param.function = speed_mpn_ni_invertappr;
+ param.name = "INV_MULMOD_BNM1_THRESHOLD";
+ param.min_size = 5;
+ one (&inv_mulmod_bnm1_threshold, &param);
+
+ param.function = speed_mpn_invertappr;
+ param.name = "INV_NEWTON_THRESHOLD";
+ param.min_size = 5;
+ one (&inv_newton_threshold, &param);
+}
+
+void
+tune_invert (void)
+{
+ static struct param_t param;
+
+ param.function = speed_mpn_invert;
+ param.name = "INV_APPR_THRESHOLD";
+ param.min_size = 5;
+ one (&inv_appr_threshold, &param);
+}
+
+void
+tune_binvert (void)
+{
+ static struct param_t param;
+
+ param.function = speed_mpn_binvert;
+ param.name = "BINV_NEWTON_THRESHOLD";
+ param.min_size = 8; /* pointless with smaller operands */
+ one (&binv_newton_threshold, &param);
+}
+
+void
+tune_redc (void)
+{
+#define TUNE_REDC_2_MAX 100
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
+
+#if WANT_REDC_2
+ {
+ static struct param_t param;
+ param.name = "REDC_1_TO_REDC_2_THRESHOLD";
+ param.function = speed_mpn_redc_1;
+ param.function2 = speed_mpn_redc_2;
+ param.min_size = 1;
+ param.min_is_always = 1;
+ param.max_size = TUNE_REDC_2_MAX;
+ param.noprint = 1;
+ param.stop_factor = 1.5;
+ one (&redc_1_to_redc_2_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "REDC_2_TO_REDC_N_THRESHOLD";
+ param.function = speed_mpn_redc_2;
+ param.function2 = speed_mpn_redc_n;
+ param.min_size = 16;
+ param.noprint = 1;
+ one (&redc_2_to_redc_n_threshold, &param);
+ }
+ if (redc_1_to_redc_2_threshold >= redc_2_to_redc_n_threshold)
+ {
+ redc_2_to_redc_n_threshold = 0; /* disable redc_2 */
+
+ /* Never use redc2, measure redc_1 -> redc_n cutoff, store result as
+ REDC_1_TO_REDC_2_THRESHOLD. */
+ {
+ static struct param_t param;
+ param.name = "REDC_1_TO_REDC_2_THRESHOLD";
+ param.function = speed_mpn_redc_1;
+ param.function2 = speed_mpn_redc_n;
+ param.min_size = 16;
+ param.noprint = 1;
+ one (&redc_1_to_redc_2_threshold, &param);
+ }
+ }
+ print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
+ print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
+#else
+ {
+ static struct param_t param;
+ param.name = "REDC_1_TO_REDC_N_THRESHOLD";
+ param.function = speed_mpn_redc_1;
+ param.function2 = speed_mpn_redc_n;
+ param.min_size = 16;
+ one (&redc_1_to_redc_n_threshold, &param);
+ }
+#endif
+}
+
+void
+tune_matrix22_mul (void)
+{
+ static struct param_t param;
+ param.name = "MATRIX22_STRASSEN_THRESHOLD";
+ param.function = speed_mpn_matrix22_mul;
+ param.min_size = 2;
+ one (&matrix22_strassen_threshold, &param);
+}
+
+void
+tune_hgcd2 (void)
+{
+ static struct param_t param;
+ hgcd2_func_t *f[5] =
+ { mpn_hgcd2_1,
+ mpn_hgcd2_2,
+ mpn_hgcd2_3,
+ mpn_hgcd2_4,
+ mpn_hgcd2_5 };
+ speed_function_t speed_f[5] =
+ { speed_mpn_hgcd2_1,
+ speed_mpn_hgcd2_2,
+ speed_mpn_hgcd2_3,
+ speed_mpn_hgcd2_4,
+ speed_mpn_hgcd2_5 };
+ int best;
+
+ s.size = 1;
+ best = one_method (5, speed_f, "mpn_hgcd2", "HGCD2_DIV1_METHOD", &param);
+
+ /* Use selected function when tuning hgcd and gcd */
+ hgcd2_func = f[best];
+}
+
+void
+tune_hgcd (void)
+{
+ static struct param_t param;
+ param.name = "HGCD_THRESHOLD";
+ param.function = speed_mpn_hgcd;
+ /* We seem to get strange results for small sizes */
+ param.min_size = 30;
+ one (&hgcd_threshold, &param);
+}
+
+void
+tune_hgcd_appr (void)
+{
+ static struct param_t param;
+ param.name = "HGCD_APPR_THRESHOLD";
+ param.function = speed_mpn_hgcd_appr;
+ /* We seem to get strange results for small sizes */
+ param.min_size = 50;
+ param.stop_since_change = 150;
+ one (&hgcd_appr_threshold, &param);
+}
+
+void
+tune_hgcd_reduce (void)
+{
+ static struct param_t param;
+ param.name = "HGCD_REDUCE_THRESHOLD";
+ param.function = speed_mpn_hgcd_reduce;
+ param.min_size = 30;
+ param.max_size = 7000;
+ param.step_factor = 0.04;
+ one (&hgcd_reduce_threshold, &param);
+}
+
+void
+tune_gcd_dc (void)
+{
+ static struct param_t param;
+ param.name = "GCD_DC_THRESHOLD";
+ param.function = speed_mpn_gcd;
+ param.min_size = hgcd_threshold;
+ param.max_size = 3000;
+ param.step_factor = 0.02;
+ one (&gcd_dc_threshold, &param);
+}
+
+void
+tune_gcdext_dc (void)
+{
+ static struct param_t param;
+ param.name = "GCDEXT_DC_THRESHOLD";
+ param.function = speed_mpn_gcdext;
+ param.min_size = hgcd_threshold;
+ param.max_size = 3000;
+ param.step_factor = 0.02;
+ one (&gcdext_dc_threshold, &param);
+}
+
+/* In tune_powm_sec we compute the table used by the win_size function. The
+ cutoff points are in exponent bits, disregarding other operand sizes. It is
+ not possible to use the one framework since it currently uses a granularity
+ of full limbs.
+*/
+
+/* This win_size replaces the variant in the powm code, allowing us to
+ control k in the k-ary algorithms. */
+int winsize;
+int
+win_size (mp_bitcnt_t eb)
+{
+ return winsize;
+}
+
+void
+tune_powm_sec (void)
+{
+ mp_size_t n;
+ int k, i;
+ mp_size_t itch;
+ mp_bitcnt_t nbits, nbits_next, possible_nbits_cutoff;
+ const int n_max = 3000 / GMP_NUMB_BITS;
+ const int n_measurements = 5;
+ mp_ptr rp, bp, ep, mp, tp;
+ double ttab[n_measurements], tk, tkp1;
+ TMP_DECL;
+ TMP_MARK;
+
+ possible_nbits_cutoff = 0;
+
+ k = 1;
+
+ winsize = 10; /* the itch function needs this */
+ itch = mpn_sec_powm_itch (n_max, n_max * GMP_NUMB_BITS, n_max);
+
+ rp = TMP_ALLOC_LIMBS (n_max);
+ bp = TMP_ALLOC_LIMBS (n_max);
+ ep = TMP_ALLOC_LIMBS (n_max);
+ mp = TMP_ALLOC_LIMBS (n_max);
+ tp = TMP_ALLOC_LIMBS (itch);
+
+ mpn_random (bp, n_max);
+ mpn_random (mp, n_max);
+ mp[0] |= 1;
+
+/* How about taking the M operand size into account?
+
+ An operation R=powm(B,E,N) will take time O(log(E)*M(log(N))) (assuming
+ B = O(M)).
+
+ Using k-ary and no sliding window, the precomputation will need time
+ O(2^(k-1)*M(log(N))) and the main computation will need O(log(E)*S(N)) +
+ O(log(E)/k*M(N)), for the squarings, multiplications, respectively.
+
+ An operation R=powm_sec(B,E,N) will take time like powm.
+
+ Using k-ary, the precomputation will need time O(2^k*M(log(N))) and the
+ main computation will need O(log(E)*S(N)) + O(log(E)/k*M(N)) +
+ O(log(E)/k*2^k*log(N)), for the squarings, multiplications, and full
+ table reads, respectively. */
+
+ printf ("#define POWM_SEC_TABLE ");
+
+ /* For nbits == 1, we should always use k == 1, so no need to tune
+ that. Starting with nbits == 2 also ensure that nbits always is
+ larger than the windowsize k+1. */
+ for (nbits = 2; nbits <= n_max * GMP_NUMB_BITS; )
+ {
+ n = (nbits - 1) / GMP_NUMB_BITS + 1;
+
+ /* Generate E such that sliding-window for k and k+1 works equally
+ well/poorly (but sliding is not used in powm_sec, of course). */
+ for (i = 0; i < n; i++)
+ ep[i] = ~CNST_LIMB(0);
+
+ winsize = k;
+ for (i = 0; i < n_measurements; i++)
+ {
+ speed_starttime ();
+ mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
+ ttab[i] = speed_endtime ();
+ }
+ tk = median (ttab, n_measurements);
+
+ winsize = k + 1;
+ speed_starttime ();
+ for (i = 0; i < n_measurements; i++)
+ {
+ speed_starttime ();
+ mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
+ ttab[i] = speed_endtime ();
+ }
+ tkp1 = median (ttab, n_measurements);
+/*
+ printf ("testing: %ld, %d", nbits, k, ep[n-1]);
+ printf (" %10.5f %10.5f\n", tk, tkp1);
+*/
+ if (tkp1 < tk)
+ {
+ if (possible_nbits_cutoff)
+ {
+ /* Two consecutive sizes indicate k increase, obey. */
+
+ /* Must always have x[k] >= k */
+ ASSERT_ALWAYS (possible_nbits_cutoff >= k);
+
+ if (k > 1)
+ printf (",");
+ printf ("%ld", (long) possible_nbits_cutoff);
+ k++;
+ possible_nbits_cutoff = 0;
+ }
+ else
+ {
+ /* One measurement indicate k increase, save nbits for further
+ consideration. */
+ /* The new larger k gets used for sizes > the cutoff
+ value, hence the cutoff should be one less than the
+ smallest size where it gives a speedup. */
+ possible_nbits_cutoff = nbits - 1;
+ }
+ }
+ else
+ possible_nbits_cutoff = 0;
+
+ nbits_next = nbits * 65 / 64;
+ nbits = nbits_next + (nbits_next == nbits);
+ }
+ printf ("\n");
+ TMP_FREE;
+}
+
+
+/* size_extra==1 reflects the fact that with high<divisor one division is
+ always skipped. Forcing high<divisor while testing ensures consistency
+ while stepping through sizes, ie. that size-1 divides will be done each
+ time.
+
+ min_size==2 and min_is_always are used so that if plain division is only
+ better at size==1 then don't bother including that code just for that
+ case, instead go with preinv always and get a size saving. */
+
+#define DIV_1_PARAMS \
+ param.check_size = 256; \
+ param.min_size = 2; \
+ param.min_is_always = 1; \
+ param.data_high = DATA_HIGH_LT_R; \
+ param.size_extra = 1; \
+ param.stop_factor = 2.0;
+
+
+double (*tuned_speed_mpn_divrem_1) (struct speed_params *);
+
+void
+tune_divrem_1 (void)
+{
+ /* plain version by default */
+ tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1;
+
+ /* No support for tuning native assembler code, do that by hand and put
+ the results in the .asm file, there's no need for such thresholds to
+ appear in gmp-mparam.h. */
+ if (HAVE_NATIVE_mpn_divrem_1)
+ return;
+
+ if (GMP_NAIL_BITS != 0)
+ {
+ print_define_remark ("DIVREM_1_NORM_THRESHOLD", MP_SIZE_T_MAX,
+ "no preinv with nails");
+ print_define_remark ("DIVREM_1_UNNORM_THRESHOLD", MP_SIZE_T_MAX,
+ "no preinv with nails");
+ return;
+ }
+
+ if (UDIV_PREINV_ALWAYS)
+ {
+ print_define_remark ("DIVREM_1_NORM_THRESHOLD", 0L, "preinv always");
+ print_define ("DIVREM_1_UNNORM_THRESHOLD", 0L);
+ return;
+ }
+
+ tuned_speed_mpn_divrem_1 = speed_mpn_divrem_1_tune;
+
+ /* Tune for the integer part of mpn_divrem_1. This will very possibly be
+ a bit out for the fractional part, but that's too bad, the integer part
+ is more important. */
+ {
+ static struct param_t param;
+ param.name = "DIVREM_1_NORM_THRESHOLD";
+ DIV_1_PARAMS;
+ s.r = randlimb_norm ();
+ param.function = speed_mpn_divrem_1_tune;
+ one (&divrem_1_norm_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "DIVREM_1_UNNORM_THRESHOLD";
+ DIV_1_PARAMS;
+ s.r = randlimb_half ();
+ param.function = speed_mpn_divrem_1_tune;
+ one (&divrem_1_unnorm_threshold, &param);
+ }
+}
+
+void
+tune_div_qr_1 (void)
+{
+ if (!HAVE_NATIVE_mpn_div_qr_1n_pi1)
+ {
+ static struct param_t param;
+ speed_function_t f[] =
+ {
+ speed_mpn_div_qr_1n_pi1_1,
+ speed_mpn_div_qr_1n_pi1_2,
+ speed_mpn_div_qr_1n_pi1_3,
+ speed_mpn_div_qr_1n_pi1_4,
+ };
+
+ s.size = 10;
+ s.r = randlimb_norm ();
+
+ one_method (numberof(f), f, "mpn_div_qr_1n_pi1", "DIV_QR_1N_PI1_METHOD", &param);
+ }
+
+ {
+ static struct param_t param;
+ param.name = "DIV_QR_1_NORM_THRESHOLD";
+ DIV_1_PARAMS;
+ param.min_size = 1;
+ param.min_is_always = 0;
+ s.r = randlimb_norm ();
+ param.function = speed_mpn_div_qr_1_tune;
+ one (&div_qr_1_norm_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "DIV_QR_1_UNNORM_THRESHOLD";
+ DIV_1_PARAMS;
+ param.min_size = 1;
+ param.min_is_always = 0;
+ s.r = randlimb_half();
+ param.function = speed_mpn_div_qr_1_tune;
+ one (&div_qr_1_unnorm_threshold, &param);
+ }
+}
+
+
+void
+tune_mod_1 (void)
+{
+ /* No support for tuning native assembler code, do that by hand and put
+ the results in the .asm file, there's no need for such thresholds to
+ appear in gmp-mparam.h. */
+ if (HAVE_NATIVE_mpn_mod_1)
+ return;
+
+ if (GMP_NAIL_BITS != 0)
+ {
+ print_define_remark ("MOD_1_NORM_THRESHOLD", MP_SIZE_T_MAX,
+ "no preinv with nails");
+ print_define_remark ("MOD_1_UNNORM_THRESHOLD", MP_SIZE_T_MAX,
+ "no preinv with nails");
+ return;
+ }
+
+ if (!HAVE_NATIVE_mpn_mod_1_1p)
+ {
+ static struct param_t param;
+ speed_function_t f[2] =
+ {
+ speed_mpn_mod_1_1_1,
+ speed_mpn_mod_1_1_2,
+ };
+
+ s.size = 10;
+ s.r = randlimb_half ();
+ one_method (2, f, "mpn_mod_1_1", "MOD_1_1P_METHOD", &param);
+ }
+
+ if (UDIV_PREINV_ALWAYS)
+ {
+ print_define ("MOD_1_NORM_THRESHOLD", 0L);
+ print_define ("MOD_1_UNNORM_THRESHOLD", 0L);
+ }
+ else
+ {
+ {
+ static struct param_t param;
+ param.name = "MOD_1_NORM_THRESHOLD";
+ DIV_1_PARAMS;
+ s.r = randlimb_norm ();
+ param.function = speed_mpn_mod_1_tune;
+ one (&mod_1_norm_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "MOD_1_UNNORM_THRESHOLD";
+ DIV_1_PARAMS;
+ s.r = randlimb_half ();
+ param.function = speed_mpn_mod_1_tune;
+ one (&mod_1_unnorm_threshold, &param);
+ }
+ }
+ {
+ static struct param_t param;
+
+ param.check_size = 256;
+
+ s.r = randlimb_norm ();
+ param.function = speed_mpn_mod_1_tune;
+
+ param.name = "MOD_1N_TO_MOD_1_1_THRESHOLD";
+ param.min_size = 2;
+ one (&mod_1n_to_mod_1_1_threshold, &param);
+ }
+
+ {
+ static struct param_t param;
+
+ param.check_size = 256;
+ s.r = randlimb_half ();
+ param.noprint = 1;
+
+ param.function = speed_mpn_mod_1_1;
+ param.function2 = speed_mpn_mod_1_2;
+ param.min_is_always = 1;
+ param.name = "MOD_1_1_TO_MOD_1_2_THRESHOLD";
+ param.min_size = 2;
+ one (&mod_1_1_to_mod_1_2_threshold, &param);
+
+ param.function = speed_mpn_mod_1_2;
+ param.function2 = speed_mpn_mod_1_4;
+ param.min_is_always = 1;
+ param.name = "MOD_1_2_TO_MOD_1_4_THRESHOLD";
+ param.min_size = 1;
+ one (&mod_1_2_to_mod_1_4_threshold, &param);
+
+ if (mod_1_1_to_mod_1_2_threshold >= mod_1_2_to_mod_1_4_threshold)
+ {
+ /* Never use mod_1_2, measure mod_1_1 -> mod_1_4 */
+ mod_1_2_to_mod_1_4_threshold = 0;
+
+ param.function = speed_mpn_mod_1_1;
+ param.function2 = speed_mpn_mod_1_4;
+ param.min_is_always = 1;
+ param.name = "MOD_1_1_TO_MOD_1_4_THRESHOLD fake";
+ param.min_size = 2;
+ one (&mod_1_1_to_mod_1_2_threshold, &param);
+ }
+
+ param.function = speed_mpn_mod_1_tune;
+ param.function2 = NULL;
+ param.name = "MOD_1U_TO_MOD_1_1_THRESHOLD";
+ param.min_size = 2;
+ param.min_is_always = 0;
+ one (&mod_1u_to_mod_1_1_threshold, &param);
+
+ if (mod_1u_to_mod_1_1_threshold >= mod_1_1_to_mod_1_2_threshold)
+ mod_1_1_to_mod_1_2_threshold = 0;
+ if (mod_1u_to_mod_1_1_threshold >= mod_1_2_to_mod_1_4_threshold)
+ mod_1_2_to_mod_1_4_threshold = 0;
+
+ print_define_remark ("MOD_1U_TO_MOD_1_1_THRESHOLD", mod_1u_to_mod_1_1_threshold, NULL);
+ print_define_remark ("MOD_1_1_TO_MOD_1_2_THRESHOLD", mod_1_1_to_mod_1_2_threshold,
+ mod_1_1_to_mod_1_2_threshold == 0 ? "never mpn_mod_1_1p" : NULL);
+ print_define_remark ("MOD_1_2_TO_MOD_1_4_THRESHOLD", mod_1_2_to_mod_1_4_threshold,
+ mod_1_2_to_mod_1_4_threshold == 0 ? "never mpn_mod_1s_2p" : NULL);
+ }
+
+ {
+ static struct param_t param;
+
+ param.check_size = 256;
+
+ param.name = "PREINV_MOD_1_TO_MOD_1_THRESHOLD";
+ s.r = randlimb_norm ();
+ param.function = speed_mpn_preinv_mod_1;
+ param.function2 = speed_mpn_mod_1_tune;
+ param.min_size = 1;
+ one (&preinv_mod_1_to_mod_1_threshold, &param);
+ }
+}
+
+
+/* A non-zero DIVREM_1_UNNORM_THRESHOLD (or DIVREM_1_NORM_THRESHOLD) would
+ imply that udiv_qrnnd_preinv is worth using, but it seems most
+ straightforward to compare mpn_preinv_divrem_1 and mpn_divrem_1_div
+ directly. */
+
+void
+tune_preinv_divrem_1 (void)
+{
+ static struct param_t param;
+ speed_function_t divrem_1;
+ const char *divrem_1_name;
+ double t1, t2;
+
+ if (GMP_NAIL_BITS != 0)
+ {
+ print_define_remark ("USE_PREINV_DIVREM_1", 0, "no preinv with nails");
+ return;
+ }
+
+ /* Any native version of mpn_preinv_divrem_1 is assumed to exist because
+ it's faster than mpn_divrem_1. */
+ if (HAVE_NATIVE_mpn_preinv_divrem_1)
+ {
+ print_define_remark ("USE_PREINV_DIVREM_1", 1, "native");
+ return;
+ }
+
+ /* If udiv_qrnnd_preinv is the only division method then of course
+ mpn_preinv_divrem_1 should be used. */
+ if (UDIV_PREINV_ALWAYS)
+ {
+ print_define_remark ("USE_PREINV_DIVREM_1", 1, "preinv always");
+ return;
+ }
+
+ /* If we've got an assembler version of mpn_divrem_1, then compare against
+ that, not the mpn_divrem_1_div generic C. */
+ if (HAVE_NATIVE_mpn_divrem_1)
+ {
+ divrem_1 = speed_mpn_divrem_1;
+ divrem_1_name = "mpn_divrem_1";
+ }
+ else
+ {
+ divrem_1 = speed_mpn_divrem_1_div;
+ divrem_1_name = "mpn_divrem_1_div";
+ }
+
+ param.data_high = DATA_HIGH_LT_R; /* allow skip one division */
+ s.size = 200; /* generous but not too big */
+ /* Divisor, nonzero. Unnormalized so as to exercise the shift!=0 case,
+ since in general that's probably most common, though in fact for a
+ 64-bit limb mp_bases[10].big_base is normalized. */
+ s.r = urandom() & (GMP_NUMB_MASK >> 4);
+ if (s.r == 0) s.r = 123;
+
+ t1 = tuneup_measure (speed_mpn_preinv_divrem_1, &param, &s);
+ t2 = tuneup_measure (divrem_1, &param, &s);
+ if (t1 == -1.0 || t2 == -1.0)
+ {
+ printf ("Oops, can't measure mpn_preinv_divrem_1 and %s at %ld\n",
+ divrem_1_name, (long) s.size);
+ abort ();
+ }
+ if (option_trace >= 1)
+ printf ("size=%ld, mpn_preinv_divrem_1 %.9f, %s %.9f\n",
+ (long) s.size, t1, divrem_1_name, t2);
+
+ print_define_remark ("USE_PREINV_DIVREM_1", (mp_size_t) (t1 < t2), NULL);
+}
+
+
+
+void
+tune_divrem_2 (void)
+{
+ static struct param_t param;
+
+ /* No support for tuning native assembler code, do that by hand and put
+ the results in the .asm file, and there's no need for such thresholds
+ to appear in gmp-mparam.h. */
+ if (HAVE_NATIVE_mpn_divrem_2)
+ return;
+
+ if (GMP_NAIL_BITS != 0)
+ {
+ print_define_remark ("DIVREM_2_THRESHOLD", MP_SIZE_T_MAX,
+ "no preinv with nails");
+ return;
+ }
+
+ if (UDIV_PREINV_ALWAYS)
+ {
+ print_define_remark ("DIVREM_2_THRESHOLD", 0L, "preinv always");
+ return;
+ }
+
+ /* Tune for the integer part of mpn_divrem_2. This will very possibly be
+ a bit out for the fractional part, but that's too bad, the integer part
+ is more important.
+
+ min_size must be >=2 since nsize>=2 is required, but is set to 4 to save
+ code space if plain division is better only at size==2 or size==3. */
+ param.name = "DIVREM_2_THRESHOLD";
+ param.check_size = 256;
+ param.min_size = 4;
+ param.min_is_always = 1;
+ param.size_extra = 2; /* does qsize==nsize-2 divisions */
+ param.stop_factor = 2.0;
+
+ s.r = randlimb_norm ();
+ param.function = speed_mpn_divrem_2;
+ one (&divrem_2_threshold, &param);
+}
+
+void
+tune_div_qr_2 (void)
+{
+ static struct param_t param;
+ param.name = "DIV_QR_2_PI2_THRESHOLD";
+ param.function = speed_mpn_div_qr_2n;
+ param.check_size = 500;
+ param.min_size = 4;
+ one (&div_qr_2_pi2_threshold, &param);
+}
+
+/* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
+ tune for that. Its speed can differ on odd or even divisor, so take an
+ average threshold for the two.
+
+ mpn_divrem_1 can vary with high<divisor or not, whereas mpn_divexact_1
+ might not vary that way, but don't test this since high<divisor isn't
+ expected to occur often with small divisors. */
+
+void
+tune_divexact_1 (void)
+{
+ static struct param_t param;
+ mp_size_t thresh[2], average;
+ int low, i;
+
+ /* Any native mpn_divexact_1 is assumed to incorporate all the speed of a
+ full mpn_divrem_1. */
+ if (HAVE_NATIVE_mpn_divexact_1)
+ {
+ print_define_remark ("DIVEXACT_1_THRESHOLD", 0, "always (native)");
+ return;
+ }
+
+ ASSERT_ALWAYS (tuned_speed_mpn_divrem_1 != NULL);
+
+ param.name = "DIVEXACT_1_THRESHOLD";
+ param.data_high = DATA_HIGH_GE_R;
+ param.check_size = 256;
+ param.min_size = 2;
+ param.stop_factor = 1.5;
+ param.function = tuned_speed_mpn_divrem_1;
+ param.function2 = speed_mpn_divexact_1;
+ param.noprint = 1;
+
+ print_define_start (param.name);
+
+ for (low = 0; low <= 1; low++)
+ {
+ s.r = randlimb_half();
+ if (low == 0)
+ s.r |= 1;
+ else
+ s.r &= ~CNST_LIMB(7);
+
+ one (&thresh[low], &param);
+ if (option_trace)
+ printf ("low=%d thresh %ld\n", low, (long) thresh[low]);
+
+ if (thresh[low] == MP_SIZE_T_MAX)
+ {
+ average = MP_SIZE_T_MAX;
+ goto divexact_1_done;
+ }
+ }
+
+ if (option_trace)
+ {
+ printf ("average of:");
+ for (i = 0; i < numberof(thresh); i++)
+ printf (" %ld", (long) thresh[i]);
+ printf ("\n");
+ }
+
+ average = 0;
+ for (i = 0; i < numberof(thresh); i++)
+ average += thresh[i];
+ average /= numberof(thresh);
+
+ /* If divexact turns out to be better as early as 3 limbs, then use it
+ always, so as to reduce code size and conditional jumps. */
+ if (average <= 3)
+ average = 0;
+
+ divexact_1_done:
+ print_define_end (param.name, average);
+}
+
+
+/* The generic mpn_modexact_1_odd skips a divide step if high<divisor, the
+ same as mpn_mod_1, but this might not be true of an assembler
+ implementation. The threshold used is an average based on data where a
+ divide can be skipped and where it can't.
+
+ If modexact turns out to be better as early as 3 limbs, then use it
+ always, so as to reduce code size and conditional jumps. */
+
+void
+tune_modexact_1_odd (void)
+{
+ static struct param_t param;
+ mp_size_t thresh_lt, thresh_ge, average;
+
+#if 0
+ /* Any native mpn_modexact_1_odd is assumed to incorporate all the speed
+ of a full mpn_mod_1. */
+ if (HAVE_NATIVE_mpn_modexact_1_odd)
+ {
+ print_define_remark ("BMOD_1_TO_MOD_1_THRESHOLD", MP_SIZE_T_MAX, "always bmod_1");
+ return;
+ }
+#endif
+
+ param.name = "BMOD_1_TO_MOD_1_THRESHOLD";
+ param.check_size = 256;
+ param.min_size = 2;
+ param.stop_factor = 1.5;
+ param.function = speed_mpn_modexact_1c_odd;
+ param.function2 = speed_mpn_mod_1_tune;
+ param.noprint = 1;
+ s.r = randlimb_half () | 1;
+
+ print_define_start (param.name);
+
+ param.data_high = DATA_HIGH_LT_R;
+ one (&thresh_lt, &param);
+ if (option_trace)
+ printf ("lt thresh %ld\n", (long) thresh_lt);
+
+ average = thresh_lt;
+ if (thresh_lt != MP_SIZE_T_MAX)
+ {
+ param.data_high = DATA_HIGH_GE_R;
+ one (&thresh_ge, &param);
+ if (option_trace)
+ printf ("ge thresh %ld\n", (long) thresh_ge);
+
+ if (thresh_ge != MP_SIZE_T_MAX)
+ {
+ average = (thresh_ge + thresh_lt) / 2;
+ if (thresh_ge <= 3)
+ average = 0;
+ }
+ }
+
+ print_define_end (param.name, average);
+}
+
+
+void
+tune_jacobi_base (void)
+{
+ static struct param_t param;
+ speed_function_t f[4] =
+ {
+ speed_mpn_jacobi_base_1,
+ speed_mpn_jacobi_base_2,
+ speed_mpn_jacobi_base_3,
+ speed_mpn_jacobi_base_4,
+ };
+
+ s.size = GMP_LIMB_BITS * 3 / 4;
+
+ one_method (4, f, "mpn_jacobi_base", "JACOBI_BASE_METHOD", &param);
+}
+
+
+void
+tune_get_str (void)
+{
+ /* Tune for decimal, it being most common. Some rough testing suggests
+ other bases are different, but not by very much. */
+ s.r = 10;
+ {
+ static struct param_t param;
+ GET_STR_PRECOMPUTE_THRESHOLD = 0;
+ param.name = "GET_STR_DC_THRESHOLD";
+ param.function = speed_mpn_get_str;
+ param.min_size = 4;
+ param.max_size = GET_STR_THRESHOLD_LIMIT;
+ one (&get_str_dc_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.name = "GET_STR_PRECOMPUTE_THRESHOLD";
+ param.function = speed_mpn_get_str;
+ param.min_size = GET_STR_DC_THRESHOLD;
+ param.max_size = GET_STR_THRESHOLD_LIMIT;
+ one (&get_str_precompute_threshold, &param);
+ }
+}
+
+
+double
+speed_mpn_pre_set_str (struct speed_params *s)
+{
+ unsigned char *str;
+ mp_ptr wp;
+ mp_size_t wn;
+ unsigned i;
+ int base;
+ double t;
+ mp_ptr powtab_mem, tp;
+ powers_t powtab[GMP_LIMB_BITS];
+ mp_size_t un;
+ int chars_per_limb;
+ TMP_DECL;
+
+ SPEED_RESTRICT_COND (s->size >= 1);
+
+ base = s->r == 0 ? 10 : s->r;
+ SPEED_RESTRICT_COND (base >= 2 && base <= 256);
+
+ TMP_MARK;
+
+ str = (unsigned char *) TMP_ALLOC (s->size);
+ for (i = 0; i < s->size; i++)
+ str[i] = s->xp[i] % base;
+
+ LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);
+ SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
+
+ /* use this during development to check wn is big enough */
+ /*
+ ASSERT_ALWAYS (mpn_set_str (wp, str, s->size, base) <= wn);
+ */
+
+ speed_operand_src (s, (mp_ptr) str, s->size/GMP_LIMB_BYTES);
+ speed_operand_dst (s, wp, wn);
+ speed_cache_fill (s);
+
+ chars_per_limb = mp_bases[base].chars_per_limb;
+ un = s->size / chars_per_limb + 1;
+ powtab_mem = TMP_BALLOC_LIMBS (mpn_str_powtab_alloc (un));
+ size_t n_pows = mpn_compute_powtab (powtab, powtab_mem, un, base);
+ powers_t *pt = powtab + n_pows;
+ tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ mpn_pre_set_str (wp, str, s->size, pt, tp);
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+
+ TMP_FREE;
+ return t;
+}
+
+void
+tune_set_str (void)
+{
+ s.r = 10; /* decimal */
+ {
+ static struct param_t param;
+ SET_STR_PRECOMPUTE_THRESHOLD = 0;
+ param.step_factor = 0.01;
+ param.name = "SET_STR_DC_THRESHOLD";
+ param.function = speed_mpn_pre_set_str;
+ param.min_size = 100;
+ param.max_size = 50000;
+ one (&set_str_dc_threshold, &param);
+ }
+ {
+ static struct param_t param;
+ param.step_factor = 0.02;
+ param.name = "SET_STR_PRECOMPUTE_THRESHOLD";
+ param.function = speed_mpn_set_str;
+ param.min_size = SET_STR_DC_THRESHOLD;
+ param.max_size = 100000;
+ one (&set_str_precompute_threshold, &param);
+ }
+}
+
+
+void
+tune_fft_mul (void)
+{
+ static struct fft_param_t param;
+
+ if (option_fft_max_size == 0)
+ return;
+
+ param.table_name = "MUL_FFT_TABLE3";
+ param.threshold_name = "MUL_FFT_THRESHOLD";
+ param.p_threshold = &mul_fft_threshold;
+ param.modf_threshold_name = "MUL_FFT_MODF_THRESHOLD";
+ param.p_modf_threshold = &mul_fft_modf_threshold;
+ param.first_size = MUL_TOOM33_THRESHOLD / 2;
+ param.max_size = option_fft_max_size;
+ param.function = speed_mpn_fft_mul;
+ param.mul_modf_function = speed_mpn_mul_fft;
+ param.mul_function = speed_mpn_mul_n;
+ param.sqr = 0;
+ fft (&param);
+}
+
+
+void
+tune_fft_sqr (void)
+{
+ static struct fft_param_t param;
+
+ if (option_fft_max_size == 0)
+ return;
+
+ param.table_name = "SQR_FFT_TABLE3";
+ param.threshold_name = "SQR_FFT_THRESHOLD";
+ param.p_threshold = &sqr_fft_threshold;
+ param.modf_threshold_name = "SQR_FFT_MODF_THRESHOLD";
+ param.p_modf_threshold = &sqr_fft_modf_threshold;
+ param.first_size = SQR_TOOM3_THRESHOLD / 2;
+ param.max_size = option_fft_max_size;
+ param.function = speed_mpn_fft_sqr;
+ param.mul_modf_function = speed_mpn_mul_fft_sqr;
+ param.mul_function = speed_mpn_sqr;
+ param.sqr = 1;
+ fft (&param);
+}
+
+void
+tune_fac_ui (void)
+{
+ static struct param_t param;
+
+ param.function = speed_mpz_fac_ui_tune;
+
+ param.name = "FAC_DSC_THRESHOLD";
+ param.min_size = 70;
+ param.max_size = FAC_DSC_THRESHOLD_LIMIT;
+ one (&fac_dsc_threshold, &param);
+
+ param.name = "FAC_ODD_THRESHOLD";
+ param.min_size = 22;
+ param.stop_factor = 1.7;
+ param.min_is_always = 1;
+ one (&fac_odd_threshold, &param);
+}
+
+void
+all (void)
+{
+ time_t start_time, end_time;
+ TMP_DECL;
+
+ TMP_MARK;
+ SPEED_TMP_ALLOC_LIMBS (s.xp_block, SPEED_BLOCK_SIZE, 0);
+ SPEED_TMP_ALLOC_LIMBS (s.yp_block, SPEED_BLOCK_SIZE, 0);
+
+ mpn_random (s.xp_block, SPEED_BLOCK_SIZE);
+ mpn_random (s.yp_block, SPEED_BLOCK_SIZE);
+
+ fprintf (stderr, "Parameters for %s\n", GMP_MPARAM_H_SUGGEST);
+
+ speed_time_init ();
+ fprintf (stderr, "Using: %s\n", speed_time_string);
+
+ fprintf (stderr, "speed_precision %d", speed_precision);
+ if (speed_unittime == 1.0)
+ fprintf (stderr, ", speed_unittime 1 cycle");
+ else
+ fprintf (stderr, ", speed_unittime %.2e secs", speed_unittime);
+ if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
+ fprintf (stderr, ", CPU freq unknown\n");
+ else
+ fprintf (stderr, ", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
+
+ fprintf (stderr, "DEFAULT_MAX_SIZE %d, fft_max_size %ld\n",
+ DEFAULT_MAX_SIZE, (long) option_fft_max_size);
+ fprintf (stderr, "\n");
+
+ time (&start_time);
+ {
+ struct tm *tp;
+ tp = localtime (&start_time);
+ printf ("/* Generated by tuneup.c, %d-%02d-%02d, ",
+ tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
+
+#ifdef __GNUC__
+ /* gcc sub-minor version doesn't seem to come through as a define */
+ printf ("gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__);
+#define PRINTED_COMPILER
+#endif
+#if defined (__SUNPRO_C)
+ printf ("Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100);
+#define PRINTED_COMPILER
+#endif
+#if ! defined (__GNUC__) && defined (__sgi) && defined (_COMPILER_VERSION)
+ /* gcc defines __sgi and _COMPILER_VERSION on irix 6, avoid that */
+ printf ("MIPSpro C %d.%d.%d */\n",
+ _COMPILER_VERSION / 100,
+ _COMPILER_VERSION / 10 % 10,
+ _COMPILER_VERSION % 10);
+#define PRINTED_COMPILER
+#endif
+#if defined (__DECC) && defined (__DECC_VER)
+ printf ("DEC C %d */\n", __DECC_VER);
+#define PRINTED_COMPILER
+#endif
+#if ! defined (PRINTED_COMPILER)
+ printf ("system compiler */\n");
+#endif
+ }
+ printf ("\n");
+
+ tune_divrem_1 ();
+ tune_mod_1 ();
+ tune_preinv_divrem_1 ();
+ tune_div_qr_1 ();
+#if 0
+ tune_divrem_2 ();
+#endif
+ tune_div_qr_2 ();
+ tune_divexact_1 ();
+ tune_modexact_1_odd ();
+ printf("\n");
+
+ relspeed_div_1_vs_mul_1 ();
+ printf("\n");
+
+ tune_mul_n ();
+ printf("\n");
+
+ tune_mul ();
+ printf("\n");
+
+ tune_sqr ();
+ printf("\n");
+
+ tune_mulmid ();
+ printf("\n");
+
+ tune_mulmod_bnm1 ();
+ tune_sqrmod_bnm1 ();
+ printf("\n");
+
+ tune_fft_mul ();
+ printf("\n");
+
+ tune_fft_sqr ();
+ printf ("\n");
+
+ tune_mullo ();
+ tune_sqrlo ();
+ printf("\n");
+
+ tune_dc_div ();
+ tune_dc_bdiv ();
+
+ printf("\n");
+ tune_invertappr ();
+ tune_invert ();
+ printf("\n");
+
+ tune_binvert ();
+ tune_redc ();
+ printf("\n");
+
+ tune_mu_div ();
+ tune_mu_bdiv ();
+ printf("\n");
+
+ tune_powm_sec ();
+ printf("\n");
+
+ tune_get_str ();
+ tune_set_str ();
+ printf("\n");
+
+ tune_fac_ui ();
+ printf("\n");
+
+ tune_matrix22_mul ();
+ tune_hgcd2 ();
+ tune_hgcd ();
+ tune_hgcd_appr ();
+ tune_hgcd_reduce();
+ tune_gcd_dc ();
+ tune_gcdext_dc ();
+ tune_jacobi_base ();
+ printf("\n");
+
+ time (&end_time);
+ printf ("/* Tuneup completed successfully, took %ld seconds */\n",
+ (long) (end_time - start_time));
+
+ TMP_FREE;
+}
+
+
+int
+main (int argc, char *argv[])
+{
+ int opt;
+
+ /* Unbuffered so if output is redirected to a file it isn't lost if the
+ program is killed part way through. */
+ setbuf (stdout, NULL);
+ setbuf (stderr, NULL);
+
+ while ((opt = getopt(argc, argv, "f:o:p:t")) != EOF)
+ {
+ switch (opt) {
+ case 'f':
+ if (optarg[0] == 't')
+ option_fft_trace = 2;
+ else
+ option_fft_max_size = atol (optarg);
+ break;
+ case 'o':
+ speed_option_set (optarg);
+ break;
+ case 'p':
+ speed_precision = atoi (optarg);
+ break;
+ case 't':
+ option_trace++;
+ break;
+ case '?':
+ exit(1);
+ }
+ }
+
+ all ();
+ exit (0);
+}
diff --git a/gmp-6.3.0/tune/x86_64.asm b/gmp-6.3.0/tune/x86_64.asm
new file mode 100644
index 0000000..b7ec44c
--- /dev/null
+++ b/gmp-6.3.0/tune/x86_64.asm
@@ -0,0 +1,55 @@
+dnl x86 pentium time stamp counter access routine.
+
+dnl Copyright 1999, 2000, 2003-2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+
+C void speed_cyclecounter (unsigned p[2]);
+C
+C Get the pentium rdtsc cycle counter, storing the least significant word in
+C p[0] and the most significant in p[1].
+C
+C cpuid is used to serialize execution. On big measurements this won't be
+C significant but it may help make small single measurements more accurate.
+
+PROLOGUE(speed_cyclecounter)
+
+ C rdi p
+
+ movq %rbx, %r10
+ xorl %eax, %eax
+ cpuid
+ rdtsc
+ movl %eax, (%rdi)
+ movl %edx, 4(%rdi)
+ movq %r10, %rbx
+ ret
+EPILOGUE()