aboutsummaryrefslogtreecommitdiff
path: root/gmp-6.3.0/mpn/sparc64
diff options
context:
space:
mode:
authorDuncan Wilkie <antigravityd@gmail.com>2023-11-18 06:11:09 -0600
committerDuncan Wilkie <antigravityd@gmail.com>2023-11-18 06:11:09 -0600
commit11da511c784eca003deb90c23570f0873954e0de (patch)
treee14fdd3d5d6345956d67e79ae771d0633d28362b /gmp-6.3.0/mpn/sparc64
Initial commit.
Diffstat (limited to 'gmp-6.3.0/mpn/sparc64')
-rw-r--r--gmp-6.3.0/mpn/sparc64/README125
-rw-r--r--gmp-6.3.0/mpn/sparc64/copyd.asm89
-rw-r--r--gmp-6.3.0/mpn/sparc64/copyi.asm86
-rw-r--r--gmp-6.3.0/mpn/sparc64/dive_1.c161
-rw-r--r--gmp-6.3.0/mpn/sparc64/divrem_1.c242
-rw-r--r--gmp-6.3.0/mpn/sparc64/gcd_11.asm87
-rw-r--r--gmp-6.3.0/mpn/sparc64/gmp-mparam.h139
-rw-r--r--gmp-6.3.0/mpn/sparc64/lshift.asm140
-rw-r--r--gmp-6.3.0/mpn/sparc64/lshiftc.asm147
-rw-r--r--gmp-6.3.0/mpn/sparc64/mod_1.c238
-rw-r--r--gmp-6.3.0/mpn/sparc64/mod_1_4.c235
-rw-r--r--gmp-6.3.0/mpn/sparc64/mode1o.c196
-rw-r--r--gmp-6.3.0/mpn/sparc64/rshift.asm142
-rw-r--r--gmp-6.3.0/mpn/sparc64/sec_tabselect.asm162
-rw-r--r--gmp-6.3.0/mpn/sparc64/sparc64.h217
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparc1234/add_n.asm241
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparc1234/addmul_1.asm606
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparc1234/addmul_2.asm551
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparc1234/lshiftc.asm165
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparc1234/mul_1.asm580
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparc1234/sqr_diagonal.asm342
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparc1234/sub_n.asm241
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparc1234/submul_1.asm68
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparc34/gmp-mparam.h222
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/add_n.asm68
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/addlsh1_n.asm41
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/addlsh2_n.asm41
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/addlshC_n.asm69
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/addmul_1.asm86
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/gmp-mparam.h154
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/mul_1.asm82
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblsh1_n.asm41
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblsh2_n.asm41
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblshC_n.asm69
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/sub_n.asm68
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/sublsh1_n.asm41
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/sublsh2_n.asm41
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/sublshC_n.asm69
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct1/submul_1.asm86
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/add_n.asm126
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/addmul_1.asm182
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_2.asm228
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_4.asm219
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/aorslsh_n.asm147
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm147
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_q_1.asm137
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm145
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm129
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/hamdist.asm78
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/invert_limb.asm92
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.asm77
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.m488
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_1_4.asm233
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_34lsub1.asm117
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/mode1o.asm82
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/mul_1.asm174
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/popcount.asm70
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm93
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/sub_n.asm144
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct3/submul_1.asm170
-rw-r--r--gmp-6.3.0/mpn/sparc64/ultrasparct45/gmp-mparam.h174
61 files changed, 9471 insertions, 0 deletions
diff --git a/gmp-6.3.0/mpn/sparc64/README b/gmp-6.3.0/mpn/sparc64/README
new file mode 100644
index 0000000..e2c051a
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/README
@@ -0,0 +1,125 @@
+Copyright 1997, 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/.
+
+
+
+
+
+This directory contains mpn functions for 64-bit V9 SPARC
+
+RELEVANT OPTIMIZATION ISSUES
+
+Notation:
+ IANY = shift/add/sub/logical/sethi
+ IADDLOG = add/sub/logical/sethi
+ MEM = ld*/st*
+ FA = fadd*/fsub*/f*to*/fmov*
+ FM = fmul*
+
+UltraSPARC can issue four instructions per cycle, with these restrictions:
+* Two IANY instructions, but only one of these may be a shift. If there is a
+ shift and an IANY instruction, the shift must precede the IANY instruction.
+* One FA.
+* One FM.
+* One branch.
+* One MEM.
+* IANY/IADDLOG/MEM must be insn 1, 2, or 3 in an issue bundle. Taken branches
+ should not be in slot 4, since that makes the delay insn come from separate
+ bundle.
+* If two IANY/IADDLOG instructions are to be executed in the same cycle and one
+ of these is setting the condition codes, that instruction must be the second
+ one.
+
+To summarize, ignoring branches, these are the bundles that can reach the peak
+execution speed:
+
+insn1 iany iany mem iany iany mem iany iany mem
+insn2 iaddlog mem iany mem iaddlog iany mem iaddlog iany
+insn3 mem iaddlog iaddlog fa fa fa fm fm fm
+insn4 fa/fm fa/fm fa/fm fm fm fm fa fa fa
+
+The 64-bit integer multiply instruction mulx takes from 5 cycles to 35 cycles,
+depending on the position of the most significant bit of the first source
+operand. When used for 32x32->64 multiplication, it needs 20 cycles.
+Furthermore, it stalls the processor while executing. We stay away from that
+instruction, and instead use floating-point operations.
+
+Floating-point add and multiply units are fully pipelined. The latency for
+UltraSPARC-1/2 is 3 cycles and for UltraSPARC-3 it is 4 cycles.
+
+Integer conditional move instructions cannot dual-issue with other integer
+instructions. No conditional move can issue 1-5 cycles after a load. (This
+might have been fixed for UltraSPARC-3.)
+
+The UltraSPARC-3 pipeline is very simular to the one of UltraSPARC-1/2 , but is
+somewhat slower. Branches execute slower, and there may be other new stalls.
+But integer multiply doesn't stall the entire CPU and also has a much lower
+latency. But it's still not pipelined, and thus useless for our needs.
+
+STATUS
+
+* mpn_lshift, mpn_rshift: The current code runs at 2.0 cycles/limb on
+ UltraSPARC-1/2 and 2.65 on UltraSPARC-3. For UltraSPARC-1/2, the IEU0
+ functional unit is saturated with shifts.
+
+* mpn_add_n, mpn_sub_n: The current code runs at 4 cycles/limb on
+ UltraSPARC-1/2 and 4.5 cycles/limb on UltraSPARC-3. The 4 instruction
+ recurrency is the speed limiter.
+
+* mpn_addmul_1: The current code runs at 14 cycles/limb asymptotically on
+ UltraSPARC-1/2 and 17.5 cycles/limb on UltraSPARC-3. On UltraSPARC-1/2, the
+ code sustains 4 instructions/cycle. It might be possible to invent a better
+ way of summing the intermediate 49-bit operands, but it is unlikely that it
+ will save enough instructions to save an entire cycle.
+
+ The load-use of the u operand is not enough scheduled for good L2 cache
+ performance. The UltraSPARC-1/2 L1 cache is direct mapped, and since we use
+ temporary stack slots that will conflict with the u and r operands, we miss
+ to L2 very often. The load-use of the std/ldx pairs via the stack are
+ perhaps over-scheduled.
+
+ It would be possible to save two instructions: (1) The mov could be avoided
+ if the std/ldx were less scheduled. (2) The ldx of the r operand could be
+ split into two ld instructions, saving the shifts/masks.
+
+ It should be possible to reach 14 cycles/limb for UltraSPARC-3 if the fp
+ operations where rescheduled for this processor's 4-cycle latency.
+
+* mpn_mul_1: The current code is a straightforward edit of the mpn_addmul_1
+ code. It would be possible to shave one or two cycles from it, with some
+ labour.
+
+* mpn_submul_1: Simpleminded code just calling mpn_mul_1 + mpn_sub_n. This
+ means that it runs at 18 cycles/limb on UltraSPARC-1/2 and 23 cycles/limb on
+ UltraSPARC-3. It would be possible to either match the mpn_addmul_1
+ performance, or in the worst case use one more instruction group.
+
+* US1/US2 cache conflict resolving. The direct mapped L1 date cache of US1/US2
+ is a problem for mul_1, addmul_1 (and a prospective submul_1). We should
+ allocate a larger cache area, and put the stack temp area in a place that
+ doesn't cause cache conflicts.
diff --git a/gmp-6.3.0/mpn/sparc64/copyd.asm b/gmp-6.3.0/mpn/sparc64/copyd.asm
new file mode 100644
index 0000000..ab105d3
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/copyd.asm
@@ -0,0 +1,89 @@
+dnl SPARC v9 mpn_copyd -- Copy a limb vector, decrementing.
+
+dnl Copyright 1999-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 2
+C UltraSPARC 3: 2.5
+C UltraSPARC T1: 17
+C UltraSPARC T3: 6
+C UltraSPARC T4/T5: 2
+
+C INPUT PARAMETERS
+C rptr %o0
+C sptr %o1
+C n %o2
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_copyd)
+ sllx %o2,3,%g1
+ add %g1,%o0,%o0
+ add %g1,%o1,%o1
+ addcc %o2,-8,%o2
+ bl,pt %xcc,L(end01234567)
+ nop
+L(loop1):
+ ldx [%o1-8],%g1
+ ldx [%o1-16],%g2
+ ldx [%o1-24],%g3
+ ldx [%o1-32],%g4
+ ldx [%o1-40],%g5
+ ldx [%o1-48],%o3
+ ldx [%o1-56],%o4
+ ldx [%o1-64],%o5
+ add %o1,-64,%o1
+ stx %g1,[%o0-8]
+ stx %g2,[%o0-16]
+ stx %g3,[%o0-24]
+ stx %g4,[%o0-32]
+ stx %g5,[%o0-40]
+ stx %o3,[%o0-48]
+ stx %o4,[%o0-56]
+ stx %o5,[%o0-64]
+ addcc %o2,-8,%o2
+ bge,pt %xcc,L(loop1)
+ add %o0,-64,%o0
+L(end01234567):
+ addcc %o2,8,%o2
+ bz,pn %xcc,L(end)
+ nop
+L(loop2):
+ ldx [%o1-8],%g1
+ add %o1,-8,%o1
+ addcc %o2,-1,%o2
+ stx %g1,[%o0-8]
+ bg,pt %xcc,L(loop2)
+ add %o0,-8,%o0
+L(end): retl
+ nop
+EPILOGUE(mpn_copyd)
diff --git a/gmp-6.3.0/mpn/sparc64/copyi.asm b/gmp-6.3.0/mpn/sparc64/copyi.asm
new file mode 100644
index 0000000..45663dc
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/copyi.asm
@@ -0,0 +1,86 @@
+dnl SPARC v9 mpn_copyi -- Copy a limb vector, incrementing.
+
+dnl Copyright 1999-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 2
+C UltraSPARC 3: 2.5
+C UltraSPARC T1: 17
+C UltraSPARC T3: 6
+C UltraSPARC T4/T5: 2
+
+C INPUT PARAMETERS
+C rptr %o0
+C sptr %o1
+C n %o2
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_copyi)
+ addcc %o2,-8,%o2
+ bl,pt %xcc,L(end01234567)
+ nop
+L(loop1):
+ ldx [%o1+0],%g1
+ ldx [%o1+8],%g2
+ ldx [%o1+16],%g3
+ ldx [%o1+24],%g4
+ ldx [%o1+32],%g5
+ ldx [%o1+40],%o3
+ ldx [%o1+48],%o4
+ ldx [%o1+56],%o5
+ add %o1,64,%o1
+ stx %g1,[%o0+0]
+ stx %g2,[%o0+8]
+ stx %g3,[%o0+16]
+ stx %g4,[%o0+24]
+ stx %g5,[%o0+32]
+ stx %o3,[%o0+40]
+ stx %o4,[%o0+48]
+ stx %o5,[%o0+56]
+ addcc %o2,-8,%o2
+ bge,pt %xcc,L(loop1)
+ add %o0,64,%o0
+L(end01234567):
+ addcc %o2,8,%o2
+ bz,pn %xcc,L(end)
+ nop
+L(loop2):
+ ldx [%o1+0],%g1
+ add %o1,8,%o1
+ addcc %o2,-1,%o2
+ stx %g1,[%o0+0]
+ bg,pt %xcc,L(loop2)
+ add %o0,8,%o0
+L(end): retl
+ nop
+EPILOGUE(mpn_copyi)
diff --git a/gmp-6.3.0/mpn/sparc64/dive_1.c b/gmp-6.3.0/mpn/sparc64/dive_1.c
new file mode 100644
index 0000000..4264f29
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/dive_1.c
@@ -0,0 +1,161 @@
+/* UltraSPARC 64 mpn_divexact_1 -- mpn by limb exact division.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2000, 2001, 2003, 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+
+/* 64-bit divisor 32-bit divisor
+ cycles/limb cycles/limb
+ (approx) (approx)
+ Ultrasparc 2i: 110 70
+*/
+
+
+/* There are two key ideas here to reduce mulx's. Firstly when the divisor
+ is 32-bits the high of q*d can be calculated without the two 32x32->64
+ cross-products involving the high 32-bits of the divisor, that being zero
+ of course. Secondly umul_ppmm_lowequal and umul_ppmm_half_lowequal save
+ one mulx (each) knowing the low of q*d is equal to the input limb l.
+
+ For size==1, a simple udivx is used. This is faster than calculating an
+ inverse.
+
+ For a 32-bit divisor and small sizes, an attempt was made at a simple
+ udivx loop (two per 64-bit limb), but it turned out to be slower than
+ mul-by-inverse. At size==2 the inverse is about 260 cycles total
+ compared to a udivx at 291. Perhaps the latter would suit when size==2
+ but the high 32-bits of the second limb is zero (saving one udivx), but
+ it doesn't seem worth a special case just for that. */
+
+void
+mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+ mp_limb_t inverse, s, s_next, c, l, ls, q;
+ unsigned rshift, lshift;
+ mp_limb_t lshift_mask;
+ mp_limb_t divisor_h;
+
+ ASSERT (size >= 1);
+ ASSERT (divisor != 0);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));
+ ASSERT_MPN (src, size);
+ ASSERT_LIMB (divisor);
+
+ s = *src++; /* src low limb */
+ size--;
+ if (size == 0)
+ {
+ *dst = s / divisor;
+ return;
+ }
+
+ if ((divisor & 1) == 0)
+ {
+ count_trailing_zeros (rshift, divisor);
+ divisor >>= rshift;
+ lshift = 64 - rshift;
+
+ lshift_mask = MP_LIMB_T_MAX;
+ }
+ else
+ {
+ rshift = 0;
+
+ /* rshift==0 means no shift, so must mask out other part in this case */
+ lshift = 0;
+ lshift_mask = 0;
+ }
+
+ binvert_limb (inverse, divisor);
+
+ c = 0;
+ divisor_h = HIGH32 (divisor);
+
+ if (divisor_h == 0)
+ {
+ /* 32-bit divisor */
+ do
+ {
+ s_next = *src++;
+ ls = (s >> rshift) | ((s_next << lshift) & lshift_mask);
+ s = s_next;
+
+ SUBC_LIMB (c, l, ls, c);
+
+ q = l * inverse;
+ *dst++ = q;
+
+ umul_ppmm_half_lowequal (l, q, divisor, l);
+ c += l;
+
+ size--;
+ }
+ while (size != 0);
+
+ ls = s >> rshift;
+ l = ls - c;
+ q = l * inverse;
+ *dst = q;
+ }
+ else
+ {
+ /* 64-bit divisor */
+ mp_limb_t divisor_l = LOW32 (divisor);
+ do
+ {
+ s_next = *src++;
+ ls = (s >> rshift) | ((s_next << lshift) & lshift_mask);
+ s = s_next;
+
+ SUBC_LIMB (c, l, ls, c);
+
+ q = l * inverse;
+ *dst++ = q;
+
+ umul_ppmm_lowequal (l, q, divisor, divisor_h, divisor_l, l);
+ c += l;
+
+ size--;
+ }
+ while (size != 0);
+
+ ls = s >> rshift;
+ l = ls - c;
+ q = l * inverse;
+ *dst = q;
+ }
+}
diff --git a/gmp-6.3.0/mpn/sparc64/divrem_1.c b/gmp-6.3.0/mpn/sparc64/divrem_1.c
new file mode 100644
index 0000000..ac94565
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/divrem_1.c
@@ -0,0 +1,242 @@
+/* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
+
+Copyright 1991, 1993, 1994, 1996, 1998-2001, 2003 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+
+/* 64-bit divisor 32-bit divisor
+ cycles/limb cycles/limb
+ (approx) (approx)
+ integer fraction integer fraction
+ Ultrasparc 2i: 160 160 122 96
+*/
+
+
+/* 32-bit divisors are treated in special case code. This requires 4 mulx
+ per limb instead of 8 in the general case.
+
+ For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
+ addressing, to get the two halves of each limb read in the correct order.
+ This is kept in an adj variable. Doing that measures about 4 c/l faster
+ than just writing HALF_ENDIAN_ADJ(i) in the integer loop. The latter
+ shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well
+ (on gcc 3.2.1 at least). The fraction loop doesn't seem affected, but we
+ still use a variable since that ought to work out best. */
+
+mp_limb_t
+mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs,
+ mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
+{
+ mp_size_t total_size_limbs;
+ mp_size_t i;
+
+ ASSERT (xsize_limbs >= 0);
+ ASSERT (size_limbs >= 0);
+ ASSERT (d_limb != 0);
+ /* FIXME: What's the correct overlap rule when xsize!=0? */
+ ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs,
+ ap_limbptr, size_limbs));
+
+ total_size_limbs = size_limbs + xsize_limbs;
+ if (UNLIKELY (total_size_limbs == 0))
+ return 0;
+
+ /* udivx is good for total_size==1, and no need to bother checking
+ limb<divisor, since if that's likely the caller should check */
+ if (UNLIKELY (total_size_limbs == 1))
+ {
+ mp_limb_t a, q;
+ a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0);
+ q = a / d_limb;
+ qp_limbptr[0] = q;
+ return a - q*d_limb;
+ }
+
+ if (d_limb <= CNST_LIMB(0xFFFFFFFF))
+ {
+ mp_size_t size, xsize, total_size, adj;
+ unsigned *qp, n1, n0, q, r, nshift, norm_rmask;
+ mp_limb_t dinv_limb;
+ const unsigned *ap;
+ int norm, norm_rshift;
+
+ size = 2 * size_limbs;
+ xsize = 2 * xsize_limbs;
+ total_size = size + xsize;
+
+ ap = (unsigned *) ap_limbptr;
+ qp = (unsigned *) qp_limbptr;
+
+ qp += xsize;
+ r = 0; /* initial remainder */
+
+ if (LIKELY (size != 0))
+ {
+ n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)];
+
+ /* If the length of the source is uniformly distributed, then
+ there's a 50% chance of the high 32-bits being zero, which we
+ can skip. */
+ if (n1 == 0)
+ {
+ n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)];
+ total_size--;
+ size--;
+ ASSERT (size > 0); /* because always even */
+ qp[size + HALF_ENDIAN_ADJ(1)] = 0;
+ }
+
+ /* Skip a division if high < divisor (high quotient 0). Testing
+ here before before normalizing will still skip as often as
+ possible. */
+ if (n1 < d_limb)
+ {
+ r = n1;
+ size--;
+ qp[size + HALF_ENDIAN_ADJ(size)] = 0;
+ total_size--;
+ if (total_size == 0)
+ return r;
+ }
+ }
+
+ count_leading_zeros_32 (norm, d_limb);
+ norm -= 32;
+ d_limb <<= norm;
+ r <<= norm;
+
+ norm_rshift = 32 - norm;
+ norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
+
+ invert_half_limb (dinv_limb, d_limb);
+
+ if (LIKELY (size != 0))
+ {
+ i = size - 1;
+ adj = HALF_ENDIAN_ADJ (i);
+ n1 = ap[i + adj];
+ adj = -adj;
+ r |= ((n1 >> norm_rshift) & norm_rmask);
+ for ( ; i > 0; i--)
+ {
+ n0 = ap[i-1 + adj];
+ adj = -adj;
+ nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
+ udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
+ qp[i + adj] = q;
+ n1 = n0;
+ }
+ nshift = n1 << norm;
+ udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
+ qp[0 + HALF_ENDIAN_ADJ(0)] = q;
+ }
+ qp -= xsize;
+ adj = HALF_ENDIAN_ADJ (0);
+ for (i = xsize-1; i >= 0; i--)
+ {
+ udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb);
+ adj = -adj;
+ qp[i + adj] = q;
+ }
+
+ return r >> norm;
+ }
+ else
+ {
+ mp_srcptr ap;
+ mp_ptr qp;
+ mp_size_t size, xsize, total_size;
+ mp_limb_t d, n1, n0, q, r, dinv, nshift, norm_rmask;
+ int norm, norm_rshift;
+
+ ap = ap_limbptr;
+ qp = qp_limbptr;
+ size = size_limbs;
+ xsize = xsize_limbs;
+ total_size = total_size_limbs;
+ d = d_limb;
+
+ qp += total_size; /* above high limb */
+ r = 0; /* initial remainder */
+
+ if (LIKELY (size != 0))
+ {
+ /* Skip a division if high < divisor (high quotient 0). Testing
+ here before before normalizing will still skip as often as
+ possible. */
+ n1 = ap[size-1];
+ if (n1 < d)
+ {
+ r = n1;
+ *--qp = 0;
+ total_size--;
+ if (total_size == 0)
+ return r;
+ size--;
+ }
+ }
+
+ count_leading_zeros (norm, d);
+ d <<= norm;
+ r <<= norm;
+
+ norm_rshift = GMP_LIMB_BITS - norm;
+ norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0));
+
+ invert_limb (dinv, d);
+
+ if (LIKELY (size != 0))
+ {
+ n1 = ap[size-1];
+ r |= ((n1 >> norm_rshift) & norm_rmask);
+ for (i = size-2; i >= 0; i--)
+ {
+ n0 = ap[i];
+ nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
+ udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
+ *--qp = q;
+ n1 = n0;
+ }
+ nshift = n1 << norm;
+ udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
+ *--qp = q;
+ }
+ for (i = 0; i < xsize; i++)
+ {
+ udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv);
+ *--qp = q;
+ }
+ return r >> norm;
+ }
+}
diff --git a/gmp-6.3.0/mpn/sparc64/gcd_11.asm b/gmp-6.3.0/mpn/sparc64/gcd_11.asm
new file mode 100644
index 0000000..2dd200d
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/gcd_11.asm
@@ -0,0 +1,87 @@
+dnl SPARC64 mpn_gcd_11.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for SPARC by Torbjörn
+dnl Granlund.
+
+dnl Copyright 2000-2002, 2005, 2009, 2011-2013, 2021 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit (approx)
+C UltraSPARC 1&2: 5.1
+C UltraSPARC 3: 5.0
+C UltraSPARC T1: 11.4
+C UltraSPARC T3: 10
+C UltraSPARC T4: 6
+C Numbers measured with: speed -CD -s32-64 -t32 mpn_gcd_1
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+ RODATA
+ TYPE(ctz_table,object)
+ctz_table:
+ .byte MAXSHIFT
+forloop(i,1,MASK,
+` .byte m4_count_trailing_zeros(i)
+')
+ SIZE(ctz_table,.-ctz_table)
+
+define(`u0', `%o0')
+define(`v0', `%o1')
+
+ASM_START()
+PROLOGUE(mpn_gcd_11)
+ LEA64(ctz_table, o5, g4)
+ b L(odd)
+ mov u0, %o4
+
+ ALIGN(16)
+L(top): movcc %xcc, %o4, v0 C v = min(u,v)
+ movcc %xcc, %o2, %o0 C u = |v - u]
+L(mid): ldub [%o5+%g1], %g5 C
+ brz,pn %g1, L(shift_alot) C
+ srlx %o0, %g5, %o4 C new u, odd
+L(odd): subcc v0, %o4, %o2 C v - u, set flags for branch and movcc
+ sub %o4, v0, %o0 C u - v
+ bnz,pt %xcc, L(top) C
+ and %o2, MASK, %g1 C extract low MAXSHIFT bits from (v-u)
+
+ retl
+ mov v0, %o0
+
+L(shift_alot):
+ mov %o4, %o0
+ b L(mid)
+ and %o4, MASK, %g1 C
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/gmp-mparam.h b/gmp-6.3.0/mpn/sparc64/gmp-mparam.h
new file mode 100644
index 0000000..5ac2c46
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/gmp-mparam.h
@@ -0,0 +1,139 @@
+/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2002, 2004, 2006, 2008-2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 500 MHz ultrasparc2 running GNU/Linux */
+
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 4
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 22
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 27
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define USE_PREINV_DIVREM_1 1
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 30
+#define MUL_TOOM33_THRESHOLD 187
+#define MUL_TOOM44_THRESHOLD 278
+#define MUL_TOOM6H_THRESHOLD 278
+#define MUL_TOOM8H_THRESHOLD 357
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 201
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 199
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 154
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 107
+
+#define SQR_BASECASE_THRESHOLD 13
+#define SQR_TOOM2_THRESHOLD 69
+#define SQR_TOOM3_THRESHOLD 116
+#define SQR_TOOM4_THRESHOLD 336
+#define SQR_TOOM6_THRESHOLD 336
+#define SQR_TOOM8_THRESHOLD 454
+
+#define MULMOD_BNM1_THRESHOLD 17
+#define SQRMOD_BNM1_THRESHOLD 23
+
+#define MUL_FFT_MODF_THRESHOLD 248 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 248, 5}, { 9, 4}, { 19, 6}, { 5, 5}, \
+ { 15, 6}, { 8, 5}, { 17, 6}, { 21, 7}, \
+ { 19, 8}, { 11, 7}, { 25, 8}, { 15, 7}, \
+ { 31, 8}, { 27, 9}, { 15, 8}, { 33, 9}, \
+ { 19, 8}, { 39, 9}, { 27,10}, { 15, 9}, \
+ { 39,10}, { 23, 9}, { 47,11}, { 15,10}, \
+ { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \
+ { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255,10}, { 71, 9}, { 143, 8}, { 287,10}, \
+ { 79,11}, { 47,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 50
+#define MUL_FFT_THRESHOLD 1984
+
+#define SQR_FFT_MODF_THRESHOLD 236 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 236, 5}, { 8, 4}, { 17, 5}, { 19, 6}, \
+ { 10, 5}, { 21, 6}, { 19, 7}, { 10, 6}, \
+ { 21, 7}, { 21, 8}, { 21, 9}, { 11, 8}, \
+ { 23, 9}, { 19, 8}, { 43, 9}, { 23,10}, \
+ { 15, 9}, { 43,10}, { 23,11}, { 15,10}, \
+ { 31, 9}, { 63,10}, { 47, 8}, { 191,11}, \
+ { 31,10}, { 63, 8}, { 255, 7}, { 511, 9}, \
+ { 135, 8}, { 271,10}, { 71, 9}, { 143, 8}, \
+ { 287, 7}, { 575,11}, { 47, 9}, { 191, 8}, \
+ { 383,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 49
+#define SQR_FFT_THRESHOLD 1120
+
+#define MULLO_BASECASE_THRESHOLD 16
+#define MULLO_DC_THRESHOLD 41
+#define MULLO_MUL_N_THRESHOLD 3791
+
+#define DC_DIV_QR_THRESHOLD 27
+#define DC_DIVAPPR_Q_THRESHOLD 100
+#define DC_BDIV_QR_THRESHOLD 47
+#define DC_BDIV_Q_THRESHOLD 174
+
+#define INV_MULMOD_BNM1_THRESHOLD 58
+#define INV_NEWTON_THRESHOLD 13
+#define INV_APPR_THRESHOLD 9
+
+#define BINV_NEWTON_THRESHOLD 187
+#define REDC_1_TO_REDC_2_THRESHOLD 10
+#define REDC_2_TO_REDC_N_THRESHOLD 115
+
+#define MU_DIV_QR_THRESHOLD 680
+#define MU_DIVAPPR_Q_THRESHOLD 618
+#define MUPI_DIV_QR_THRESHOLD 0 /* always */
+#define MU_BDIV_QR_THRESHOLD 748
+#define MU_BDIV_Q_THRESHOLD 889
+
+#define MATRIX22_STRASSEN_THRESHOLD 13
+#define HGCD_THRESHOLD 53
+#define GCD_DC_THRESHOLD 283
+#define GCDEXT_DC_THRESHOLD 186
+#define JACOBI_BASE_METHOD 2
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 16
+#define SET_STR_DC_THRESHOLD 390
+#define SET_STR_PRECOMPUTE_THRESHOLD 1665
diff --git a/gmp-6.3.0/mpn/sparc64/lshift.asm b/gmp-6.3.0/mpn/sparc64/lshift.asm
new file mode 100644
index 0000000..90bbb45
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/lshift.asm
@@ -0,0 +1,140 @@
+dnl SPARC v9 mpn_lshift
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 2
+C UltraSPARC 3: 2.5
+C UltraSPARC T1: 17.5
+C UltraSPARC T3: 8
+C UltraSPARC T4: 3
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`cnt', `%i3')
+
+define(`tcnt', `%i4')
+define(`retval', `%i5')
+define(`u0', `%l0')
+define(`u1', `%l1')
+define(`r0', `%l6')
+define(`r1', `%l7')
+define(`u0_off', `%o0')
+define(`u1_off', `%o1')
+define(`r0_off', `%o2')
+define(`r1_off', `%o3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshift)
+ save %sp, -176, %sp
+
+ sllx n, 3, n
+ sub %g0, cnt, tcnt
+
+ sub up, 8, u1_off
+ add rp, (5 * 8), r1_off
+
+ ldx [n + u1_off], u1 C WAS: up - 8
+ add u1_off, (3 * 8), u1_off
+
+ sub r1_off, 8, r0_off
+ sub u1_off, 8, u0_off
+
+ subcc n, (3 * 8), n
+ srlx u1, tcnt, retval
+
+ bl,pn %xcc, L(end12)
+ sllx u1, cnt, %l3
+
+ ldx [n + u0_off], u0 C WAS: up - 16
+ subcc n, (2 * 8), n
+
+ ldx [n + u1_off], u1 C WAS: up - 24
+
+ bl,pn %xcc, L(end34)
+ srlx u0, tcnt, %l4
+
+ b,a L(top)
+ ALIGN(16)
+L(top):
+ sllx u0, cnt, %l2
+ or %l4, %l3, r0
+
+ ldx [n + u0_off], u0 C WAS: up - 16
+ srlx u1, tcnt, %l5
+
+ stx r0, [n + r0_off] C WAS: rp - 8
+ subcc n, (2 * 8), n
+
+ sllx u1, cnt, %l3
+ or %l2, %l5, r1
+
+ ldx [n + u1_off], u1 C WAS: up - 24
+ srlx u0, tcnt, %l4
+
+ bge,pt %xcc, L(top)
+ stx r1, [n + r1_off] C WAS: rp - 16
+
+L(end34):
+ sllx u0, cnt, %l2
+ or %l4, %l3, r0
+
+ srlx u1, tcnt, %l5
+ stx r0, [n + r0_off] C WAS: rp - 8
+
+ or %l2, %l5, r1
+ sub n, (2 * 8), %o5
+
+ sllx u1, cnt, %l3
+ stx r1, [%o5 + r1_off] C WAS: rp - 16
+
+L(end12):
+ andcc n, 8, %g0
+ bz,pn %xcc, L(done)
+ nop
+
+ ldx [n + u0_off], u1
+ srlx u1, tcnt, %l4
+ or %l4, %l3, r0
+ stx r0, [r0_off - 24]
+ sllx u1, cnt, %l3
+L(done):
+ stx %l3, [r0_off - 32]
+
+ ret
+ restore retval, 0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/lshiftc.asm b/gmp-6.3.0/mpn/sparc64/lshiftc.asm
new file mode 100644
index 0000000..4a0f0a3
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/lshiftc.asm
@@ -0,0 +1,147 @@
+dnl SPARC v9 mpn_lshiftc
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 3
+C UltraSPARC 3: 3
+C UltraSPARC T1: 17
+C UltraSPARC T3: 10
+C UltraSPARC T4: 3.5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`cnt', `%i3')
+
+define(`tcnt', `%i4')
+define(`retval', `%i5')
+define(`u0', `%l0')
+define(`u1', `%l1')
+define(`r0', `%l6')
+define(`r1', `%l7')
+define(`u0_off', `%o0')
+define(`u1_off', `%o1')
+define(`r0_off', `%o2')
+define(`r1_off', `%o3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshiftc)
+ save %sp, -176, %sp
+
+ sllx n, 3, n
+ sub %g0, cnt, tcnt
+
+ sub up, 8, u1_off
+ add rp, (5 * 8), r1_off
+
+ ldx [n + u1_off], u1 C WAS: up - 8
+ add u1_off, (3 * 8), u1_off
+
+ sub r1_off, 8, r0_off
+ sub u1_off, 8, u0_off
+
+ subcc n, (3 * 8), n
+ srlx u1, tcnt, retval
+
+ bl,pn %xcc, L(end12)
+ sllx u1, cnt, %l3
+
+ ldx [n + u0_off], u0 C WAS: up - 16
+ subcc n, (2 * 8), n
+
+ ldx [n + u1_off], u1 C WAS: up - 24
+
+ bl,pn %xcc, L(end34)
+ srlx u0, tcnt, %l4
+
+ b,a L(top)
+ ALIGN(16)
+L(top):
+ not %l3, %l3
+ sllx u0, cnt, %l2
+
+ andn %l3, %l4, r0
+ ldx [n + u0_off], u0 C WAS: up - 16
+
+ srlx u1, tcnt, %l5
+ stx r0, [n + r0_off] C WAS: rp - 8
+
+ subcc n, (2 * 8), n
+ not %l2, %l2
+
+ sllx u1, cnt, %l3
+ andn %l2, %l5, r1
+
+ ldx [n + u1_off], u1 C WAS: up - 24
+ srlx u0, tcnt, %l4
+
+ bge,pt %xcc, L(top)
+ stx r1, [n + r1_off] C WAS: rp - 16
+
+L(end34):
+ not %l3, %l3
+ sllx u0, cnt, %l2
+
+ andn %l3, %l4, r0
+ srlx u1, tcnt, %l5
+
+ stx r0, [n + r0_off] C WAS: rp - 8
+ not %l2, %l2
+
+ andn %l2, %l5, r1
+ sub n, (2 * 8), %o5
+
+ sllx u1, cnt, %l3
+ stx r1, [%o5 + r1_off] C WAS: rp - 16
+
+L(end12):
+ andcc n, 8, %g0
+ bz %xcc, L(done)+4
+ not %l3, %l3
+
+ ldx [n + u0_off], u1
+ srlx u1, tcnt, %l4
+ andn %l3, %l4, r0
+ stx r0, [r0_off - 24]
+ sllx u1, cnt, %l3
+L(done):
+ not %l3, %l3
+ stx %l3, [r0_off - 32]
+
+ ret
+ restore retval, 0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/mod_1.c b/gmp-6.3.0/mpn/sparc64/mod_1.c
new file mode 100644
index 0000000..ab53f9d
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/mod_1.c
@@ -0,0 +1,238 @@
+/* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder.
+
+Copyright 1991, 1993, 1994, 1999-2001, 2003, 2010 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+
+/* 64-bit divisor 32-bit divisor
+ cycles/limb cycles/limb
+ (approx) (approx)
+ Ultrasparc 2i: 160 120
+*/
+
+
+/* 32-bit divisors are treated in special case code. This requires 4 mulx
+ per limb instead of 8 in the general case.
+
+ For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
+ addressing, to get the two halves of each limb read in the correct order.
+ This is kept in an adj variable. Doing that measures about 6 c/l faster
+ than just writing HALF_ENDIAN_ADJ(i) in the loop. The latter shouldn't
+ be 6 cycles worth of work, but perhaps it doesn't schedule well (on gcc
+ 3.2.1 at least).
+
+ A simple udivx/umulx loop for the 32-bit case was attempted for small
+ sizes, but at size==2 it was only about the same speed and at size==3 was
+ slower. */
+
+static mp_limb_t
+mpn_mod_1_anynorm (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
+{
+ int norm, norm_rshift;
+ mp_limb_t src_high_limb;
+ mp_size_t i;
+
+ ASSERT (size_limbs >= 0);
+ ASSERT (d_limb != 0);
+
+ if (UNLIKELY (size_limbs == 0))
+ return 0;
+
+ src_high_limb = src_limbptr[size_limbs-1];
+
+ /* udivx is good for size==1, and no need to bother checking limb<divisor,
+ since if that's likely the caller should check */
+ if (UNLIKELY (size_limbs == 1))
+ return src_high_limb % d_limb;
+
+ if (d_limb <= CNST_LIMB(0xFFFFFFFF))
+ {
+ unsigned *src, n1, n0, r, dummy_q, nshift, norm_rmask;
+ mp_size_t size, adj;
+ mp_limb_t dinv_limb;
+
+ size = 2 * size_limbs; /* halfwords */
+ src = (unsigned *) src_limbptr;
+
+ /* prospective initial remainder, if < d */
+ r = src_high_limb >> 32;
+
+ /* If the length of the source is uniformly distributed, then there's
+ a 50% chance of the high 32-bits being zero, which we can skip. */
+ if (r == 0)
+ {
+ r = (unsigned) src_high_limb;
+ size--;
+ ASSERT (size > 0); /* because always even */
+ }
+
+ /* Skip a division if high < divisor. Having the test here before
+ normalizing will still skip as often as possible. */
+ if (r < d_limb)
+ {
+ size--;
+ ASSERT (size > 0); /* because size==1 handled above */
+ }
+ else
+ r = 0;
+
+ count_leading_zeros_32 (norm, d_limb);
+ norm -= 32;
+ d_limb <<= norm;
+
+ norm_rshift = 32 - norm;
+ norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
+ i = size-1;
+ adj = HALF_ENDIAN_ADJ (i);
+ n1 = src [i + adj];
+ r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
+
+ invert_half_limb (dinv_limb, d_limb);
+ adj = -adj;
+
+ for (i--; i >= 0; i--)
+ {
+ n0 = src [i + adj];
+ adj = -adj;
+ nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
+ udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
+ n1 = n0;
+ }
+
+ /* same as loop, but without n0 */
+ nshift = n1 << norm;
+ udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
+
+ ASSERT ((r & ((1 << norm) - 1)) == 0);
+ return r >> norm;
+ }
+ else
+ {
+ mp_srcptr src;
+ mp_size_t size;
+ mp_limb_t n1, n0, r, dinv, dummy_q, nshift, norm_rmask;
+
+ src = src_limbptr;
+ size = size_limbs;
+ r = src_high_limb; /* initial remainder */
+
+ /* Skip a division if high < divisor. Having the test here before
+ normalizing will still skip as often as possible. */
+ if (r < d_limb)
+ {
+ size--;
+ ASSERT (size > 0); /* because size==1 handled above */
+ }
+ else
+ r = 0;
+
+ count_leading_zeros (norm, d_limb);
+ d_limb <<= norm;
+
+ norm_rshift = GMP_LIMB_BITS - norm;
+ norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
+
+ src += size;
+ n1 = *--src;
+ r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
+
+ invert_limb (dinv, d_limb);
+
+ for (i = size-2; i >= 0; i--)
+ {
+ n0 = *--src;
+ nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
+ udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
+ n1 = n0;
+ }
+
+ /* same as loop, but without n0 */
+ nshift = n1 << norm;
+ udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
+
+ ASSERT ((r & ((CNST_LIMB(1) << norm) - 1)) == 0);
+ return r >> norm;
+ }
+}
+
+mp_limb_t
+mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+ ASSERT (n >= 0);
+ ASSERT (b != 0);
+
+ /* Should this be handled at all? Rely on callers? Note un==0 is currently
+ required by mpz/fdiv_r_ui.c and possibly other places. */
+ if (n == 0)
+ return 0;
+
+ if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
+ {
+ if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
+ {
+ return mpn_mod_1_anynorm (ap, n, b);
+ }
+ else
+ {
+ mp_limb_t pre[4];
+ mpn_mod_1_1p_cps (pre, b);
+ return mpn_mod_1_1p (ap, n, b, pre);
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
+ {
+ return mpn_mod_1_anynorm (ap, n, b);
+ }
+ else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
+ {
+ mp_limb_t pre[4];
+ mpn_mod_1_1p_cps (pre, b);
+ return mpn_mod_1_1p (ap, n, b << pre[1], pre);
+ }
+ else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
+ {
+ mp_limb_t pre[5];
+ mpn_mod_1s_2p_cps (pre, b);
+ return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
+ }
+ else
+ {
+ mp_limb_t pre[7];
+ mpn_mod_1s_4p_cps (pre, b);
+ return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
+ }
+ }
+}
diff --git a/gmp-6.3.0/mpn/sparc64/mod_1_4.c b/gmp-6.3.0/mpn/sparc64/mod_1_4.c
new file mode 100644
index 0000000..735a402
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/mod_1_4.c
@@ -0,0 +1,235 @@
+/* mpn_mod_1s_4p (ap, n, b, cps)
+ Divide (ap,,n) by b. Return the single-limb remainder.
+ Requires that d < B / 4.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+ Based on a suggestion by Peter L. Montgomery.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008-2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+void
+mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
+{
+ mp_limb_t bi;
+ mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+ int cnt;
+
+ ASSERT (b <= (~(mp_limb_t) 0) / 4);
+
+ count_leading_zeros (cnt, b);
+
+ b <<= cnt;
+ invert_limb (bi, b);
+
+ cps[0] = bi;
+ cps[1] = cnt;
+
+ B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
+ cps[2] = B1modb >> cnt;
+
+ udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+ cps[3] = B2modb >> cnt;
+
+ udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
+ cps[4] = B3modb >> cnt;
+
+ udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
+ cps[5] = B4modb >> cnt;
+
+ udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
+ cps[6] = B5modb >> cnt;
+
+#if WANT_ASSERT
+ {
+ int i;
+ b = cps[2];
+ for (i = 3; i <= 6; i++)
+ {
+ b += cps[i];
+ ASSERT (b >= cps[i]);
+ }
+ }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
+{
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+ mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+ mp_size_t i;
+ int cnt;
+
+ ASSERT (n >= 1);
+
+ B1modb = cps[2];
+ B2modb = cps[3];
+ B3modb = cps[4];
+ B4modb = cps[5];
+ B5modb = cps[6];
+
+ if ((b >> 32) == 0)
+ {
+ switch (n & 3)
+ {
+ case 0:
+ umul_ppmm_s (ph, pl, ap[n - 3], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
+ umul_ppmm_s (ch, cl, ap[n - 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+ umul_ppmm_s (rh, rl, ap[n - 1], B3modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 4;
+ break;
+ case 1:
+ rh = 0;
+ rl = ap[n - 1];
+ n -= 1;
+ break;
+ case 2:
+ rh = ap[n - 1];
+ rl = ap[n - 2];
+ n -= 2;
+ break;
+ case 3:
+ umul_ppmm_s (ph, pl, ap[n - 2], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
+ umul_ppmm_s (rh, rl, ap[n - 1], B2modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 3;
+ break;
+ }
+
+ for (i = n - 4; i >= 0; i -= 4)
+ {
+ /* rr = ap[i] < B
+ + ap[i+1] * (B mod b) <= (B-1)(b-1)
+ + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
+ + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
+ + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
+ + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
+ */
+ umul_ppmm_s (ph, pl, ap[i + 1], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+
+ umul_ppmm_s (ch, cl, ap[i + 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm_s (ch, cl, ap[i + 3], B3modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm_s (ch, cl, rl, B4modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm_s (rh, rl, rh, B5modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ }
+
+ umul_ppmm_s (rh, cl, rh, B1modb);
+ add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+ }
+ else
+ {
+ switch (n & 3)
+ {
+ case 0:
+ umul_ppmm (ph, pl, ap[n - 3], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
+ umul_ppmm (ch, cl, ap[n - 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+ umul_ppmm (rh, rl, ap[n - 1], B3modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 4;
+ break;
+ case 1:
+ rh = 0;
+ rl = ap[n - 1];
+ n -= 1;
+ break;
+ case 2:
+ rh = ap[n - 1];
+ rl = ap[n - 2];
+ n -= 2;
+ break;
+ case 3:
+ umul_ppmm (ph, pl, ap[n - 2], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+ umul_ppmm (rh, rl, ap[n - 1], B2modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 3;
+ break;
+ }
+
+ for (i = n - 4; i >= 0; i -= 4)
+ {
+ /* rr = ap[i] < B
+ + ap[i+1] * (B mod b) <= (B-1)(b-1)
+ + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
+ + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
+ + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
+ + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
+ */
+ umul_ppmm (ph, pl, ap[i + 1], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
+
+ umul_ppmm (ch, cl, ap[i + 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (ch, cl, ap[i + 3], B3modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (ch, cl, rl, B4modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (rh, rl, rh, B5modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ }
+
+ umul_ppmm (rh, cl, rh, B1modb);
+ add_ssaaaa (rh, rl, rh, rl, 0, cl);
+ }
+
+ bi = cps[0];
+ cnt = cps[1];
+
+ r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+ return r >> cnt;
+}
diff --git a/gmp-6.3.0/mpn/sparc64/mode1o.c b/gmp-6.3.0/mpn/sparc64/mode1o.c
new file mode 100644
index 0000000..771c999
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/mode1o.c
@@ -0,0 +1,196 @@
+/* UltraSPARC 64 mpn_modexact_1c_odd -- mpn by limb exact style remainder.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2000-2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+
+/* 64-bit divisor 32-bit divisor
+ cycles/limb cycles/limb
+ (approx) (approx)
+ Ultrasparc 2i: ? ?
+*/
+
+
+/* This implementation reduces the number of multiplies done, knowing that
+ on ultrasparc 1 and 2 the mulx instruction stalls the whole chip.
+
+ The key idea is to use the fact that the low limb of q*d equals l, this
+ being the whole purpose of the q calculated. It means there's no need to
+ calculate the lowest 32x32->64 part of the q*d, instead it can be
+ inferred from l and the other three 32x32->64 parts. See sparc64.h for
+ details.
+
+ When d is 32-bits, the same applies, but in this case there's only one
+ other 32x32->64 part (ie. HIGH(q)*d).
+
+ The net effect is that for 64-bit divisor each limb is 4 mulx, or for
+ 32-bit divisor each is 2 mulx.
+
+ Enhancements:
+
+ No doubt this could be done in assembler, if that helped the scheduling,
+ or perhaps guaranteed good code irrespective of the compiler.
+
+ Alternatives:
+
+ It might be possibly to use floating point. The loop is dominated by
+ multiply latency, so not sure if floats would improve that. One
+ possibility would be to take two limbs at a time, with a 128 bit inverse,
+ if there's enough registers, which could effectively use float throughput
+ to reduce total latency across two limbs. */
+
+#define ASSERT_RETVAL(r) \
+ ASSERT (orig_c < d ? r < d : r <= d)
+
+mp_limb_t
+mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c)
+{
+ mp_limb_t c = orig_c;
+ mp_limb_t s, l, q, h, inverse;
+
+ ASSERT (size >= 1);
+ ASSERT (d & 1);
+ ASSERT_MPN (src, size);
+ ASSERT_LIMB (d);
+ ASSERT_LIMB (c);
+
+ /* udivx is faster than 10 or 12 mulx's for one limb via an inverse */
+ if (size == 1)
+ {
+ s = src[0];
+ if (s > c)
+ {
+ l = s-c;
+ h = l % d;
+ if (h != 0)
+ h = d - h;
+ }
+ else
+ {
+ l = c-s;
+ h = l % d;
+ }
+ return h;
+ }
+
+ binvert_limb (inverse, d);
+
+ if (d <= 0xFFFFFFFF)
+ {
+ s = *src++;
+ size--;
+ do
+ {
+ SUBC_LIMB (c, l, s, c);
+ s = *src++;
+ q = l * inverse;
+ umul_ppmm_half_lowequal (h, q, d, l);
+ c += h;
+ size--;
+ }
+ while (size != 0);
+
+ if (s <= d)
+ {
+ /* With high s <= d the final step can be a subtract and addback.
+ If c==0 then the addback will restore to l>=0. If c==d then
+ will get l==d if s==0, but that's ok per the function
+ definition. */
+
+ l = c - s;
+ l += (l > c ? d : 0);
+
+ ASSERT_RETVAL (l);
+ return l;
+ }
+ else
+ {
+ /* Can't skip a divide, just do the loop code once more. */
+ SUBC_LIMB (c, l, s, c);
+ q = l * inverse;
+ umul_ppmm_half_lowequal (h, q, d, l);
+ c += h;
+
+ ASSERT_RETVAL (c);
+ return c;
+ }
+ }
+ else
+ {
+ mp_limb_t dl = LOW32 (d);
+ mp_limb_t dh = HIGH32 (d);
+ long i;
+
+ s = *src++;
+ size--;
+ do
+ {
+ SUBC_LIMB (c, l, s, c);
+ s = *src++;
+ q = l * inverse;
+ umul_ppmm_lowequal (h, q, d, dh, dl, l);
+ c += h;
+ size--;
+ }
+ while (size != 0);
+
+ if (s <= d)
+ {
+ /* With high s <= d the final step can be a subtract and addback.
+ If c==0 then the addback will restore to l>=0. If c==d then
+ will get l==d if s==0, but that's ok per the function
+ definition. */
+
+ l = c - s;
+ l += (l > c ? d : 0);
+
+ ASSERT_RETVAL (l);
+ return l;
+ }
+ else
+ {
+ /* Can't skip a divide, just do the loop code once more. */
+ SUBC_LIMB (c, l, s, c);
+ q = l * inverse;
+ umul_ppmm_lowequal (h, q, d, dh, dl, l);
+ c += h;
+
+ ASSERT_RETVAL (c);
+ return c;
+ }
+ }
+}
diff --git a/gmp-6.3.0/mpn/sparc64/rshift.asm b/gmp-6.3.0/mpn/sparc64/rshift.asm
new file mode 100644
index 0000000..3f8e11f
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/rshift.asm
@@ -0,0 +1,142 @@
+dnl SPARC v9 mpn_rshift
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 2
+C UltraSPARC 3: 2.5
+C UltraSPARC T1: 17.5
+C UltraSPARC T3: 8
+C UltraSPARC T4: 3
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`cnt', `%i3')
+
+define(`tcnt', `%i4')
+define(`retval', `%i5')
+define(`u0', `%l0')
+define(`u1', `%l1')
+define(`r0', `%l6')
+define(`r1', `%l7')
+define(`u0_off', `%o0')
+define(`u1_off', `%o1')
+define(`r0_off', `%o2')
+define(`r1_off', `%o3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_rshift)
+ save %sp, -176, %sp
+
+ sllx n, 3, n
+ sub %g0, cnt, tcnt
+
+ add up, n, up
+ add rp, n, rp
+
+ neg n, n
+ sub up, (2 * 8), u0_off
+ sub rp, (5 * 8), r0_off
+
+ ldx [n + up], u1 C WAS: up + 0
+ sub u0_off, (1 * 8), u1_off
+ sub r0_off, (1 * 8), r1_off
+
+ subcc n, -(3 * 8), n
+ sllx u1, tcnt, retval
+
+ bg,pn %xcc, L(end12)
+ srlx u1, cnt, %l3
+
+ ldx [n + u0_off], u0 C WAS: up + 0
+ subcc n, -(2 * 8), n
+
+ ldx [n + u1_off], u1 C WAS: up + 8
+
+ bg,pn %xcc, L(end34)
+ sllx u0, tcnt, %l4
+
+ b,a L(top)
+ ALIGN(16)
+L(top):
+ srlx u0, cnt, %l2
+ or %l3, %l4, r0
+
+ ldx [n + u0_off], u0 C WAS: up + 0
+ sllx u1, tcnt, %l5
+
+ stx r0, [n + r0_off] C WAS: rp + 0
+ subcc n, -(2 * 8), n
+
+ srlx u1, cnt, %l3
+ or %l2, %l5, r1
+
+ ldx [n + u1_off], u1 C WAS: up + 8
+ sllx u0, tcnt, %l4
+
+ ble,pt %xcc, L(top)
+ stx r1, [n + r1_off] C WAS: rp + 8
+
+L(end34):
+ srlx u0, cnt, %l2
+ or %l3, %l4, r0
+
+ sllx u1, tcnt, %l5
+ stx r0, [n + r0_off] C WAS: rp + 0
+
+ or %l2, %l5, r1
+ sub n, -(2 * 8), %o5
+
+ srlx u1, cnt, %l3
+ stx r1, [%o5 + r1_off] C WAS: rp + 8
+
+L(end12):
+ andcc n, 8, %g0
+ bz,pn %xcc, L(done)
+ nop
+
+ ldx [n + u0_off], u1
+ sllx u1, tcnt, %l4
+ or %l3, %l4, r0
+ stx r0, [r0_off + 24]
+ srlx u1, cnt, %l3
+L(done):
+ stx %l3, [r0_off + 32]
+
+ ret
+ restore retval, 0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/sec_tabselect.asm b/gmp-6.3.0/mpn/sparc64/sec_tabselect.asm
new file mode 100644
index 0000000..22e0dc5
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/sec_tabselect.asm
@@ -0,0 +1,162 @@
+dnl SPARC v9 mpn_sec_tabselect.
+
+dnl Contributed to the GNU project by Torbjörn Granlund and David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 2 hopefully
+C UltraSPARC 3: 3
+C UltraSPARC T1: 17
+C UltraSPARC T3: ?
+C UltraSPARC T4/T5: 2.25 hopefully
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`tp', `%i1')
+define(`n', `%i2')
+define(`nents', `%i3')
+define(`which', `%i4')
+
+define(`i', `%g1')
+define(`j', `%g3')
+define(`stride', `%g4')
+define(`tporig', `%g5')
+define(`mask', `%o0')
+
+define(`data0', `%l0')
+define(`data1', `%l1')
+define(`data2', `%l2')
+define(`data3', `%l3')
+define(`t0', `%l4')
+define(`t1', `%l5')
+define(`t2', `%l6')
+define(`t3', `%l7')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sec_tabselect)
+ save %sp, -176, %sp
+
+ sllx n, 3, stride
+ sub n, 4, j
+ brlz j, L(outer_end)
+ mov tp, tporig
+
+L(outer_loop):
+ clr data0
+ clr data1
+ clr data2
+ clr data3
+ mov tporig, tp
+ mov nents, i
+ mov which, %o1
+
+L(top): subcc %o1, 1, %o1 C set carry iff o1 = 0
+ ldx [tp + 0], t0
+ subc %g0, %g0, mask
+ ldx [tp + 8], t1
+ sub i, 1, i
+ ldx [tp + 16], t2
+ ldx [tp + 24], t3
+ add tp, stride, tp
+ and t0, mask, t0
+ and t1, mask, t1
+ or t0, data0, data0
+ and t2, mask, t2
+ or t1, data1, data1
+ and t3, mask, t3
+ or t2, data2, data2
+ brnz i, L(top)
+ or t3, data3, data3
+
+ stx data0, [rp + 0]
+ subcc j, 4, j
+ stx data1, [rp + 8]
+ stx data2, [rp + 16]
+ stx data3, [rp + 24]
+ add tporig, (4 * 8), tporig
+
+ brgez j, L(outer_loop)
+ add rp, (4 * 8), rp
+L(outer_end):
+
+
+ andcc n, 2, %g0
+ be L(b0x)
+ nop
+L(b1x): clr data0
+ clr data1
+ mov tporig, tp
+ mov nents, i
+ mov which, %o1
+
+L(tp2): subcc %o1, 1, %o1
+ ldx [tp + 0], t0
+ subc %g0, %g0, mask
+ ldx [tp + 8], t1
+ sub i, 1, i
+ add tp, stride, tp
+ and t0, mask, t0
+ and t1, mask, t1
+ or t0, data0, data0
+ brnz i, L(tp2)
+ or t1, data1, data1
+
+ stx data0, [rp + 0]
+ stx data1, [rp + 8]
+ add tporig, (2 * 8), tporig
+ add rp, (2 * 8), rp
+
+
+L(b0x): andcc n, 1, %g0
+ be L(b00)
+ nop
+L(b01): clr data0
+ mov tporig, tp
+ mov nents, i
+ mov which, %o1
+
+L(tp1): subcc %o1, 1, %o1
+ ldx [tp + 0], t0
+ subc %g0, %g0, mask
+ sub i, 1, i
+ add tp, stride, tp
+ and t0, mask, t0
+ brnz i, L(tp1)
+ or t0, data0, data0
+
+ stx data0, [rp + 0]
+
+L(b00): ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/sparc64.h b/gmp-6.3.0/mpn/sparc64/sparc64.h
new file mode 100644
index 0000000..8698a82
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/sparc64.h
@@ -0,0 +1,217 @@
+/* UltraSPARC 64 support macros.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#define LOW32(x) ((x) & 0xFFFFFFFF)
+#define HIGH32(x) ((x) >> 32)
+
+
+/* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
+ Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
+ effect of swapping the two halves in this case. */
+#if HAVE_LIMB_BIG_ENDIAN
+#define HALF_ENDIAN_ADJ(i) (1 - (((i) & 1) << 1)) /* +1 even, -1 odd */
+#endif
+#if HAVE_LIMB_LITTLE_ENDIAN
+#define HALF_ENDIAN_ADJ(i) 0 /* no adjust */
+#endif
+#ifndef HALF_ENDIAN_ADJ
+Error, error, unknown limb endianness;
+#endif
+
+
+/* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
+ of that product is equal to l. dh and dl are the 32-bit halves of d.
+
+ |-----high----||----low-----|
+ +------+------+
+ | | ph = qh * dh
+ +------+------+
+ +------+------+
+ | | pm1 = ql * dh
+ +------+------+
+ +------+------+
+ | | pm2 = qh * dl
+ +------+------+
+ +------+------+
+ | | pl = ql * dl (not calculated)
+ +------+------+
+
+ Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
+ + HIGH(pl) == HIGH(l). The only thing we need from those product parts
+ is whether they produce a carry into the high.
+
+ pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
+ time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
+ HIGH(l). pl is never actually calculated. */
+
+#define umul_ppmm_lowequal(h, q, d, dh, dl, l) \
+ do { \
+ mp_limb_t ql, qh, ph, pm1, pm2, pm_l; \
+ ASSERT (dh == HIGH32(d)); \
+ ASSERT (dl == LOW32(d)); \
+ ASSERT (q*d == l); \
+ \
+ ql = LOW32 (q); \
+ qh = HIGH32 (q); \
+ \
+ pm1 = ql * dh; \
+ pm2 = qh * dl; \
+ ph = qh * dh; \
+ \
+ pm_l = LOW32 (pm1) + LOW32 (pm2); \
+ \
+ (h) = ph + HIGH32 (pm1) + HIGH32 (pm2) \
+ + HIGH32 (pm_l) + ((pm_l << 32) > l); \
+ \
+ ASSERT_HIGH_PRODUCT (h, q, d); \
+ } while (0)
+
+
+/* Set h to the high of q*d, assuming the low limb of that product is equal
+ to l, and that d fits in 32-bits.
+
+ |-----high----||----low-----|
+ +------+------+
+ | | pm = qh * dl
+ +------+------+
+ +------+------+
+ | | pl = ql * dl (not calculated)
+ +------+------+
+
+ Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
+ time there's a carry from that sum is when LOW(pm) > HIGH(l). There's no
+ need to calculate pl to determine this. */
+
+#define umul_ppmm_half_lowequal(h, q, d, l) \
+ do { \
+ mp_limb_t pm; \
+ ASSERT (q*d == l); \
+ ASSERT (HIGH32(d) == 0); \
+ \
+ pm = HIGH32(q) * d; \
+ (h) = HIGH32(pm) + ((pm << 32) > l); \
+ ASSERT_HIGH_PRODUCT (h, q, d); \
+ } while (0)
+
+
+/* check that h is the high limb of x*y */
+#if WANT_ASSERT
+#define ASSERT_HIGH_PRODUCT(h, x, y) \
+ do { \
+ mp_limb_t want_h, dummy; \
+ umul_ppmm (want_h, dummy, x, y); \
+ ASSERT (h == want_h); \
+ } while (0)
+#else
+#define ASSERT_HIGH_PRODUCT(h, q, d) \
+ do { } while (0)
+#endif
+
+
+/* Multiply u anv v, where v < 2^32. */
+#define umul_ppmm_s(w1, w0, u, v) \
+ do { \
+ UWtype __x0, __x2; \
+ UWtype __ul, __vl, __uh; \
+ UWtype __u = (u), __v = (v); \
+ \
+ __ul = __ll_lowpart (__u); \
+ __uh = __ll_highpart (__u); \
+ __vl = __ll_lowpart (__v); \
+ \
+ __x0 = (UWtype) __ul * __vl; \
+ __x2 = (UWtype) __uh * __vl; \
+ \
+ (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2; \
+ (w0) = (__x2 << W_TYPE_SIZE/2) + __x0; \
+ } while (0)
+
+/* Count the leading zeros on a limb, but assuming it fits in 32 bits.
+ The count returned will be in the range 32 to 63.
+ This is the 32-bit generic C count_leading_zeros from longlong.h. */
+#define count_leading_zeros_32(count, x) \
+ do { \
+ mp_limb_t __xr = (x); \
+ unsigned __a; \
+ ASSERT ((x) != 0); \
+ ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF)); \
+ __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1) \
+ : (__xr < ((UWtype) 1 << 24) ? 16 + 1 : 24 + 1); \
+ \
+ (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \
+ } while (0)
+
+
+/* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
+ 32 bits and is normalized (high bit set). */
+#define invert_half_limb(inv, d) \
+ do { \
+ mp_limb_t _n; \
+ ASSERT ((d) <= 0xFFFFFFFF); \
+ ASSERT ((d) & 0x80000000); \
+ _n = (((mp_limb_t) -(d)) << 32) - 1; \
+ (inv) = (mp_limb_t) (unsigned) (_n / (d)); \
+ } while (0)
+
+
+/* Divide nh:nl by d, setting q to the quotient and r to the remainder.
+ q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
+ dinv_limb is similarly a 32-bit inverse but in an mp_limb_t. */
+
+#define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb) \
+ do { \
+ unsigned _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q; \
+ mp_limb_t _n, _x; \
+ ASSERT (d_limb <= 0xFFFFFFFF); \
+ ASSERT (dinv_limb <= 0xFFFFFFFF); \
+ ASSERT (d_limb & 0x80000000); \
+ ASSERT (nh < d_limb); \
+ _n10 = (nl); \
+ _n2 = (nh); \
+ _n1 = (int) _n10 >> 31; \
+ _nadj = _n10 + (_n1 & d_limb); \
+ _x = dinv_limb * (_n2 - _n1) + _nadj; \
+ _q11n = ~(_n2 + HIGH32 (_x)); /* -q1-1 */ \
+ _n = ((mp_limb_t) _n2 << 32) + _n10; \
+ _x = _n + d_limb * _q11n; /* n-q1*d-d */ \
+ _xh = HIGH32 (_x) - d_limb; /* high(n-q1*d-d) */ \
+ ASSERT (_xh == 0 || _xh == ~0); \
+ _r = _x + (d_limb & _xh); /* addback */ \
+ _q = _xh - _q11n; /* q1+1-addback */ \
+ ASSERT (_r < d_limb); \
+ ASSERT (d_limb * _q + _r == _n); \
+ (r) = _r; \
+ (q) = _q; \
+ } while (0)
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparc1234/add_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/add_n.asm
new file mode 100644
index 0000000..92374d2
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/add_n.asm
@@ -0,0 +1,241 @@
+dnl SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl store sum in a third limb vector.
+
+dnl Copyright 2001-2003, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 4
+C UltraSPARC 3: 4.5
+
+C Compute carry-out from the most significant bits of u,v, and r, where
+C r=u+v+carry_in, using logic operations.
+
+C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
+C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
+C Therefore, it seems futile to try to optimize this any further...
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`vp', `%i2')
+define(`n', `%i3')
+
+define(`u0', `%l0')
+define(`u1', `%l2')
+define(`u2', `%l4')
+define(`u3', `%l6')
+define(`v0', `%l1')
+define(`v1', `%l3')
+define(`v2', `%l5')
+define(`v3', `%l7')
+
+define(`cy',`%i4')
+
+define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_add_nc)
+ save %sp,-160,%sp
+
+ fitod %f0,%f0 C make sure f0 contains small, quiet number
+ subcc n,4,%g0
+ bl,pn %xcc,.Loop0
+ nop
+ b,a L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_add_n)
+ save %sp,-160,%sp
+
+ fitod %f0,%f0 C make sure f0 contains small, quiet number
+ subcc n,4,%g0
+ bl,pn %xcc,.Loop0
+ mov 0,cy
+L(com):
+ ldx [up+0],u0
+ ldx [vp+0],v0
+ add up,32,up
+ ldx [up-24],u1
+ ldx [vp+8],v1
+ add vp,32,vp
+ ldx [up-16],u2
+ ldx [vp-16],v2
+ ldx [up-8],u3
+ ldx [vp-8],v3
+ subcc n,8,n
+ add u0,v0,%g1 C main add
+ add %g1,cy,%g5 C carry add
+ or u0,v0,%g2
+ bl,pn %xcc,.Lend4567
+ fanop
+ b,a .Loop
+
+ .align 16
+C START MAIN LOOP
+.Loop: andn %g2,%g5,%g2
+ and u0,v0,%g3
+ ldx [up+0],u0
+ fanop
+C --
+ or %g3,%g2,%g2
+ ldx [vp+0],v0
+ add up,32,up
+ fanop
+C --
+ srlx %g2,63,cy
+ add u1,v1,%g1
+ stx %g5,[rp+0]
+ fanop
+C --
+ add %g1,cy,%g5
+ or u1,v1,%g2
+ fmnop
+ fanop
+C --
+ andn %g2,%g5,%g2
+ and u1,v1,%g3
+ ldx [up-24],u1
+ fanop
+C --
+ or %g3,%g2,%g2
+ ldx [vp+8],v1
+ add vp,32,vp
+ fanop
+C --
+ srlx %g2,63,cy
+ add u2,v2,%g1
+ stx %g5,[rp+8]
+ fanop
+C --
+ add %g1,cy,%g5
+ or u2,v2,%g2
+ fmnop
+ fanop
+C --
+ andn %g2,%g5,%g2
+ and u2,v2,%g3
+ ldx [up-16],u2
+ fanop
+C --
+ or %g3,%g2,%g2
+ ldx [vp-16],v2
+ add rp,32,rp
+ fanop
+C --
+ srlx %g2,63,cy
+ add u3,v3,%g1
+ stx %g5,[rp-16]
+ fanop
+C --
+ add %g1,cy,%g5
+ or u3,v3,%g2
+ fmnop
+ fanop
+C --
+ andn %g2,%g5,%g2
+ and u3,v3,%g3
+ ldx [up-8],u3
+ fanop
+C --
+ or %g3,%g2,%g2
+ subcc n,4,n
+ ldx [vp-8],v3
+ fanop
+C --
+ srlx %g2,63,cy
+ add u0,v0,%g1
+ stx %g5,[rp-8]
+ fanop
+C --
+ add %g1,cy,%g5
+ or u0,v0,%g2
+ bge,pt %xcc,.Loop
+ fanop
+C END MAIN LOOP
+.Lend4567:
+ andn %g2,%g5,%g2
+ and u0,v0,%g3
+ or %g3,%g2,%g2
+ srlx %g2,63,cy
+ add u1,v1,%g1
+ stx %g5,[rp+0]
+ add %g1,cy,%g5
+ or u1,v1,%g2
+ andn %g2,%g5,%g2
+ and u1,v1,%g3
+ or %g3,%g2,%g2
+ srlx %g2,63,cy
+ add u2,v2,%g1
+ stx %g5,[rp+8]
+ add %g1,cy,%g5
+ or u2,v2,%g2
+ andn %g2,%g5,%g2
+ and u2,v2,%g3
+ or %g3,%g2,%g2
+ add rp,32,rp
+ srlx %g2,63,cy
+ add u3,v3,%g1
+ stx %g5,[rp-16]
+ add %g1,cy,%g5
+ or u3,v3,%g2
+ andn %g2,%g5,%g2
+ and u3,v3,%g3
+ or %g3,%g2,%g2
+ srlx %g2,63,cy
+ stx %g5,[rp-8]
+
+ addcc n,4,n
+ bz,pn %xcc,.Lret
+ fanop
+
+.Loop0: ldx [up],u0
+ add up,8,up
+ ldx [vp],v0
+ add vp,8,vp
+ add rp,8,rp
+ subcc n,1,n
+ add u0,v0,%g1
+ or u0,v0,%g2
+ add %g1,cy,%g5
+ and u0,v0,%g3
+ andn %g2,%g5,%g2
+ stx %g5,[rp-8]
+ or %g3,%g2,%g2
+ bnz,pt %xcc,.Loop0
+ srlx %g2,63,cy
+
+.Lret: mov cy,%i0
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparc1234/addmul_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/addmul_1.asm
new file mode 100644
index 0000000..48a9414
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/addmul_1.asm
@@ -0,0 +1,606 @@
+dnl SPARC v9 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl Copyright 1998, 2000-2004 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 14
+C UltraSPARC 3: 17.5
+
+C Algorithm: We use eight floating-point multiplies per limb product, with the
+C invariant v operand split into four 16-bit pieces, and the up operand split
+C into 32-bit pieces. We sum pairs of 48-bit partial products using
+C floating-point add, then convert the four 49-bit product-sums and transfer
+C them to the integer unit.
+
+C Possible optimizations:
+C 0. Rewrite to use algorithm of mpn_addmul_2.
+C 1. Align the stack area where we transfer the four 49-bit product-sums
+C to a 32-byte boundary. That would minimize the cache collision.
+C (UltraSPARC-1/2 use a direct-mapped cache.) (Perhaps even better would
+C be to align the area to map to the area immediately before up?)
+C 2. Sum the 4 49-bit quantities using 32-bit operations, as in the
+C develop mpn_addmul_2. This would save many integer instructions.
+C 3. Unrolling. Questionable if it is worth the code expansion, given that
+C it could only save 1 cycle/limb.
+C 4. Specialize for particular v values. If its upper 32 bits are zero, we
+C could save many operations, in the FPU (fmuld), but more so in the IEU
+C since we'll be summing 48-bit quantities, which might be simpler.
+C 5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
+C the i00,i16,i32,i48 RAW less apart. The latter apart-scheduling should
+C not be greater than needed for L2 cache latency, and also not so great
+C that i16 needs to be copied.
+C 6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
+C to get high IEU bandwidth. (12 of the 14 cycles will be free for 2 IEU
+C ops.)
+
+C Instruction classification (as per UltraSPARC-1/2 functional units):
+C 8 FM
+C 10 FA
+C 12 MEM
+C 10 ISHIFT + 14 IADDLOG
+C 1 BRANCH
+C 55 insns totally (plus one mov insn that should be optimized out)
+
+C The loop executes 56 instructions in 14 cycles on UltraSPARC-1/2, i.e we
+C sustain the peak execution rate of 4 instructions/cycle.
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+C v i3
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+
+define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
+define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
+define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
+define(`u00',`%f32') define(`u32', `%f34')
+define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+PROLOGUE(mpn_addmul_1)
+
+C Initialization. (1) Split v operand into four 16-bit chunks and store them
+C as IEEE double in fp registers. (2) Clear upper 32 bits of fp register pairs
+C f2 and f4. (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+
+ save %sp, -256, %sp
+ mov -1, %g4
+ srlx %g4, 48, xffff C store mask in register `xffff'
+ and %i3, xffff, %g2
+ stx %g2, [%sp+2223+0]
+ srlx %i3, 16, %g3
+ and %g3, xffff, %g3
+ stx %g3, [%sp+2223+8]
+ srlx %i3, 32, %g2
+ and %g2, xffff, %g2
+ stx %g2, [%sp+2223+16]
+ srlx %i3, 48, %g3
+ stx %g3, [%sp+2223+24]
+ srlx %g4, 32, xffffffff C store mask in register `xffffffff'
+
+ sllx %i2, 3, %i2
+ mov 0, cy C clear cy
+ add %i0, %i2, %i0
+ add %i1, %i2, %i1
+ neg %i2
+ add %i1, 4, %i5
+ add %i0, -32, %i4
+ add %i0, -16, %i0
+
+ ldd [%sp+2223+0], v00
+ ldd [%sp+2223+8], v16
+ ldd [%sp+2223+16], v32
+ ldd [%sp+2223+24], v48
+ ld [%sp+2223+0],%f2 C zero f2
+ ld [%sp+2223+0],%f4 C zero f4
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fxtod v00, v00
+ fxtod v16, v16
+ fxtod v32, v32
+ fxtod v48, v48
+
+C Start real work. (We sneakingly read f3 and f5 above...)
+C The software pipeline is very deep, requiring 4 feed-in stages.
+
+ fxtod %f2, u00
+ fxtod %f4, u32
+ fmuld u00, v00, a00
+ fmuld u00, v16, a16
+ fmuld u00, v32, p32
+ fmuld u32, v00, r32
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_two_or_more
+ fmuld u32, v16, r48
+
+.L_one:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ fdtox a32, a32
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ std a32, [%sp+2223+16]
+ std a48, [%sp+2223+24]
+ add %i2, 8, %i2
+
+ fdtox r64, a00
+ ldx [%i0+%i2], rlimb C read rp[i]
+ fdtox r80, a16
+ ldx [%sp+2223+0], i00
+ ldx [%sp+2223+8], i16
+ ldx [%sp+2223+16], i32
+ ldx [%sp+2223+24], i48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ add %i2, 8, %i2
+
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ add i00, %g5, %g5 C i00+ now in g5
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ sllx i48, 32, %l6 C (i48 << 32)
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_1
+ add %i2, 8, %i2
+
+.L_two_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ fdtox a32, a32
+ fxtod %f2, u00
+ fxtod %f4, u32
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_three_or_more
+ fmuld u32, v16, r48
+
+.L_two:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ ldx [%sp+2223+8], i16
+ ldx [%sp+2223+16], i32
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ std a32, [%sp+2223+16]
+ std a48, [%sp+2223+24]
+ add %i2, 8, %i2
+
+ fdtox r64, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ add i00, %g5, %g5 C i00+ now in g5
+ fdtox r80, a16
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_2
+ add %i2, 8, %i2
+
+.L_three_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_four_or_more
+ fmuld u32, v16, r48
+
+.L_three:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ add i00, %g5, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_3
+ add %i2, 8, %i2
+
+.L_four_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ add i00, %g5, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .Loop
+ fmuld u32, v16, r48
+
+.L_four:
+ b,a .L_out_4
+
+C BEGIN MAIN LOOP
+ .align 16
+.Loop:
+C 00
+ srlx %o4, 16, %o5 C (x >> 16)
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+C 01
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+C 02
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+C 03
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ add i00, %g5, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+C 04
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+C 05
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+C 06
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+C 07
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+C 08
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+C 09
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+C 10
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+C 11
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+C 12
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+C 13
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .Loop
+ fmuld u32, v16, r48
+C END MAIN LOOP
+
+.L_out_4:
+ srlx %o4, 16, %o5 C (x >> 16)
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ fdtox a00, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ add i00, %g5, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_3:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ fdtox r64, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ add i00, %g5, %g5 C i00+ now in g5
+ fdtox r80, a16
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_2:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ add i00, %g5, %g5 C i00+ now in g5
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_1:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ or %i3, %o5, %o5
+ stx %o5, [%i4+%i2]
+
+ sllx i00, 0, %g2
+ add %g2, cy, cy
+ sllx i16, 16, %g3
+ add %g3, cy, cy
+
+ return %i7+8
+ mov cy, %o0
+EPILOGUE(mpn_addmul_1)
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparc1234/addmul_2.asm b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/addmul_2.asm
new file mode 100644
index 0000000..37674d7
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/addmul_2.asm
@@ -0,0 +1,551 @@
+dnl SPARC v9 64-bit mpn_addmul_2 -- Multiply an n limb number with 2-limb
+dnl number and add the result to a n limb vector.
+
+dnl Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 9
+C UltraSPARC 3: 10
+
+C Algorithm: We use 16 floating-point multiplies per limb product, with the
+C 2-limb v operand split into eight 16-bit pieces, and the n-limb u operand
+C split into 32-bit pieces. We sum four 48-bit partial products using
+C floating-point add, then convert the resulting four 50-bit quantities and
+C transfer them to the integer unit.
+
+C Possible optimizations:
+C 1. Align the stack area where we transfer the four 50-bit product-sums
+C to a 32-byte boundary. That would minimize the cache collision.
+C (UltraSPARC-1/2 use a direct-mapped cache.) (Perhaps even better would
+C be to align the area to map to the area immediately before up?)
+C 2. Perform two of the fp->int conversions with integer instructions. We
+C can get almost ten free IEU slots, if we clean up bookkeeping and the
+C silly carry-limb code.
+C 3. For an mpn_addmul_1 based on this, we need to fix the silly carry-limb
+C code.
+
+C OSP (Overlapping software pipeline) version of mpn_mul_basecase:
+C Operand swap will require 8 LDDA and 8 FXTOD, which will mean 8 cycles.
+C FI = 20
+C L = 9 x un * vn
+C WDFI = 10 x vn / 2
+C WD = 4
+
+C Instruction classification (as per UltraSPARC functional units).
+C Assuming silly carry code is fixed. Includes bookkeeping.
+C
+C mpn_addmul_X mpn_mul_X
+C 1 2 1 2
+C ========== ==========
+C FM 8 16 8 16
+C FA 10 18 10 18
+C MEM 12 12 10 10
+C ISHIFT 6 6 6 6
+C IADDLOG 11 11 10 10
+C BRANCH 1 1 1 1
+C
+C TOTAL IEU 17 17 16 16
+C TOTAL 48 64 45 61
+C
+C IEU cycles 8.5 8.5 8 8
+C MEM cycles 12 12 10 10
+C ISSUE cycles 12 16 11.25 15.25
+C FPU cycles 10 18 10 18
+C cycles/loop 12 18 12 18
+C cycles/limb 12 9 12 9
+
+
+C INPUT PARAMETERS
+C rp[n + 1] i0
+C up[n] i1
+C n i2
+C vp[2] i3
+
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+
+C Combine registers:
+C u00_hi= u32_hi
+C u00_lo= u32_lo
+C a000 = out000
+C a016 = out016
+C Free: f52 f54
+
+
+define(`p000', `%f8') define(`p016',`%f10')
+define(`p032',`%f12') define(`p048',`%f14')
+define(`p064',`%f16') define(`p080',`%f18')
+define(`p096a',`%f20') define(`p112a',`%f22')
+define(`p096b',`%f56') define(`p112b',`%f58')
+
+define(`out000',`%f0') define(`out016',`%f6')
+
+define(`v000',`%f24') define(`v016',`%f26')
+define(`v032',`%f28') define(`v048',`%f30')
+define(`v064',`%f44') define(`v080',`%f46')
+define(`v096',`%f48') define(`v112',`%f50')
+
+define(`u00',`%f32') define(`u32', `%f34')
+
+define(`a000',`%f36') define(`a016',`%f38')
+define(`a032',`%f40') define(`a048',`%f42')
+define(`a064',`%f60') define(`a080',`%f62')
+
+define(`u00_hi',`%f2') define(`u32_hi',`%f4')
+define(`u00_lo',`%f3') define(`u32_lo',`%f5')
+
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1')
+define(`r00',`%l2') define(`r32',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+
+PROLOGUE(mpn_addmul_2)
+
+C Initialization. (1) Split v operand into eight 16-bit chunks and store them
+C as IEEE double in fp registers. (2) Clear upper 32 bits of fp register pairs
+C f2 and f4. (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+C This code could be better scheduled.
+
+ save %sp, -256, %sp
+
+ifdef(`HAVE_VIS',
+` mov -1, %g4
+ wr %g0, 0xD2, %asi
+ srlx %g4, 32, xffffffff C store mask in register `xffffffff'
+ ldda [%i3+6] %asi, v000
+ ldda [%i3+4] %asi, v016
+ ldda [%i3+2] %asi, v032
+ ldda [%i3+0] %asi, v048
+ fxtod v000, v000
+ ldda [%i3+14] %asi, v064
+ fxtod v016, v016
+ ldda [%i3+12] %asi, v080
+ fxtod v032, v032
+ ldda [%i3+10] %asi, v096
+ fxtod v048, v048
+ ldda [%i3+8] %asi, v112
+ fxtod v064, v064
+ fxtod v080, v080
+ fxtod v096, v096
+ fxtod v112, v112
+ fzero u00_hi
+ fzero u32_hi
+',
+` mov -1, %g4
+ ldx [%i3+0], %l0 C vp[0]
+ srlx %g4, 48, xffff C store mask in register `xffff'
+ ldx [%i3+8], %l1 C vp[1]
+
+ and %l0, xffff, %g2
+ stx %g2, [%sp+2223+0]
+ srlx %l0, 16, %g3
+ and %g3, xffff, %g3
+ stx %g3, [%sp+2223+8]
+ srlx %l0, 32, %g2
+ and %g2, xffff, %g2
+ stx %g2, [%sp+2223+16]
+ srlx %l0, 48, %g3
+ stx %g3, [%sp+2223+24]
+ and %l1, xffff, %g2
+ stx %g2, [%sp+2223+32]
+ srlx %l1, 16, %g3
+ and %g3, xffff, %g3
+ stx %g3, [%sp+2223+40]
+ srlx %l1, 32, %g2
+ and %g2, xffff, %g2
+ stx %g2, [%sp+2223+48]
+ srlx %l1, 48, %g3
+ stx %g3, [%sp+2223+56]
+
+ srlx %g4, 32, xffffffff C store mask in register `xffffffff'
+
+ ldd [%sp+2223+0], v000
+ ldd [%sp+2223+8], v016
+ ldd [%sp+2223+16], v032
+ ldd [%sp+2223+24], v048
+ fxtod v000, v000
+ ldd [%sp+2223+32], v064
+ fxtod v016, v016
+ ldd [%sp+2223+40], v080
+ fxtod v032, v032
+ ldd [%sp+2223+48], v096
+ fxtod v048, v048
+ ldd [%sp+2223+56], v112
+ fxtod v064, v064
+ ld [%sp+2223+0], u00_hi C zero u00_hi
+ fxtod v080, v080
+ ld [%sp+2223+0], u32_hi C zero u32_hi
+ fxtod v096, v096
+ fxtod v112, v112
+')
+C Initialization done.
+ mov 0, %g2
+ mov 0, rlimb
+ mov 0, %g4
+ add %i0, -8, %i0 C BOOKKEEPING
+
+C Start software pipeline.
+
+ ld [%i1+4], u00_lo C read low 32 bits of up[i]
+ fxtod u00_hi, u00
+C mid
+ ld [%i1+0], u32_lo C read high 32 bits of up[i]
+ fmuld u00, v000, a000
+ fmuld u00, v016, a016
+ fmuld u00, v032, a032
+ fmuld u00, v048, a048
+ add %i2, -1, %i2 C BOOKKEEPING
+ fmuld u00, v064, p064
+ add %i1, 8, %i1 C BOOKKEEPING
+ fxtod u32_hi, u32
+ fmuld u00, v080, p080
+ fmuld u00, v096, p096a
+ brnz,pt %i2, .L_2_or_more
+ fmuld u00, v112, p112a
+
+.L1: fdtox a000, out000
+ fmuld u32, v000, p000
+ fdtox a016, out016
+ fmuld u32, v016, p016
+ fmovd p064, a064
+ fmuld u32, v032, p032
+ fmovd p080, a080
+ fmuld u32, v048, p048
+ std out000, [%sp+2223+16]
+ faddd p000, a032, a000
+ fmuld u32, v064, p064
+ std out016, [%sp+2223+24]
+ fxtod u00_hi, u00
+ faddd p016, a048, a016
+ fmuld u32, v080, p080
+ faddd p032, a064, a032
+ fmuld u32, v096, p096b
+ faddd p048, a080, a048
+ fmuld u32, v112, p112b
+C mid
+ fdtox a000, out000
+ fdtox a016, out016
+ faddd p064, p096a, a064
+ faddd p080, p112a, a080
+ std out000, [%sp+2223+0]
+ b .L_wd2
+ std out016, [%sp+2223+8]
+
+.L_2_or_more:
+ ld [%i1+4], u00_lo C read low 32 bits of up[i]
+ fdtox a000, out000
+ fmuld u32, v000, p000
+ fdtox a016, out016
+ fmuld u32, v016, p016
+ fmovd p064, a064
+ fmuld u32, v032, p032
+ fmovd p080, a080
+ fmuld u32, v048, p048
+ std out000, [%sp+2223+16]
+ faddd p000, a032, a000
+ fmuld u32, v064, p064
+ std out016, [%sp+2223+24]
+ fxtod u00_hi, u00
+ faddd p016, a048, a016
+ fmuld u32, v080, p080
+ faddd p032, a064, a032
+ fmuld u32, v096, p096b
+ faddd p048, a080, a048
+ fmuld u32, v112, p112b
+C mid
+ ld [%i1+0], u32_lo C read high 32 bits of up[i]
+ fdtox a000, out000
+ fmuld u00, v000, p000
+ fdtox a016, out016
+ fmuld u00, v016, p016
+ faddd p064, p096a, a064
+ fmuld u00, v032, p032
+ faddd p080, p112a, a080
+ fmuld u00, v048, p048
+ add %i2, -1, %i2 C BOOKKEEPING
+ std out000, [%sp+2223+0]
+ faddd p000, a032, a000
+ fmuld u00, v064, p064
+ add %i1, 8, %i1 C BOOKKEEPING
+ std out016, [%sp+2223+8]
+ fxtod u32_hi, u32
+ faddd p016, a048, a016
+ fmuld u00, v080, p080
+ faddd p032, a064, a032
+ fmuld u00, v096, p096a
+ faddd p048, a080, a048
+ brnz,pt %i2, .L_3_or_more
+ fmuld u00, v112, p112a
+
+ b .Lend
+ nop
+
+C 64 32 0
+C . . .
+C . |__rXXX_| 32
+C . |___cy___| 34
+C . |_______i00__| 50
+C |_______i16__| . 50
+
+
+C BEGIN MAIN LOOP
+ .align 16
+.L_3_or_more:
+.Loop: ld [%i1+4], u00_lo C read low 32 bits of up[i]
+ and %g2, xffffffff, %g2
+ fdtox a000, out000
+ fmuld u32, v000, p000
+C
+ lduw [%i0+4+8], r00 C read low 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a016, out016
+ fmuld u32, v016, p016
+C
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], i00
+ faddd p064, p096b, a064
+ fmuld u32, v032, p032
+C
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+ faddd p080, p112b, a080
+ fmuld u32, v048, p048
+C
+ nop
+ std out000, [%sp+2223+16]
+ faddd p000, a032, a000
+ fmuld u32, v064, p064
+C
+ add i00, r00, rlimb
+ add %i0, 8, %i0 C BOOKKEEPING
+ std out016, [%sp+2223+24]
+ fxtod u00_hi, u00
+C
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ faddd p016, a048, a016
+ fmuld u32, v080, p080
+C
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ faddd p032, a064, a032
+ fmuld u32, v096, p096b
+C
+ stw %l5, [%i0+4]
+ nop
+ faddd p048, a080, a048
+ fmuld u32, v112, p112b
+C midloop
+ ld [%i1+0], u32_lo C read high 32 bits of up[i]
+ and %g2, xffffffff, %g2
+ fdtox a000, out000
+ fmuld u00, v000, p000
+C
+ lduw [%i0+0], r32 C read high 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a016, out016
+ fmuld u00, v016, p016
+C
+ srlx %l5, 32, cy
+ ldx [%sp+2223+0], i00
+ faddd p064, p096a, a064
+ fmuld u00, v032, p032
+C
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+8], i16
+ faddd p080, p112a, a080
+ fmuld u00, v048, p048
+C
+ add %i2, -1, %i2 C BOOKKEEPING
+ std out000, [%sp+2223+0]
+ faddd p000, a032, a000
+ fmuld u00, v064, p064
+C
+ add i00, r32, rlimb
+ add %i1, 8, %i1 C BOOKKEEPING
+ std out016, [%sp+2223+8]
+ fxtod u32_hi, u32
+C
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ faddd p016, a048, a016
+ fmuld u00, v080, p080
+C
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ faddd p032, a064, a032
+ fmuld u00, v096, p096a
+C
+ stw %l5, [%i0+0]
+ faddd p048, a080, a048
+ brnz,pt %i2, .Loop
+ fmuld u00, v112, p112a
+C END MAIN LOOP
+
+C WIND-DOWN PHASE 1
+.Lend: and %g2, xffffffff, %g2
+ fdtox a000, out000
+ fmuld u32, v000, p000
+ lduw [%i0+4+8], r00 C read low 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a016, out016
+ fmuld u32, v016, p016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], i00
+ faddd p064, p096b, a064
+ fmuld u32, v032, p032
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+ faddd p080, p112b, a080
+ fmuld u32, v048, p048
+ std out000, [%sp+2223+16]
+ faddd p000, a032, a000
+ fmuld u32, v064, p064
+ add i00, r00, rlimb
+ add %i0, 8, %i0 C BOOKKEEPING
+ std out016, [%sp+2223+24]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ faddd p016, a048, a016
+ fmuld u32, v080, p080
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ faddd p032, a064, a032
+ fmuld u32, v096, p096b
+ stw %l5, [%i0+4]
+ faddd p048, a080, a048
+ fmuld u32, v112, p112b
+C mid
+ and %g2, xffffffff, %g2
+ fdtox a000, out000
+ lduw [%i0+0], r32 C read high 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a016, out016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+0], i00
+ faddd p064, p096a, a064
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+8], i16
+ faddd p080, p112a, a080
+ std out000, [%sp+2223+0]
+ add i00, r32, rlimb
+ std out016, [%sp+2223+8]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+0]
+
+C WIND-DOWN PHASE 2
+.L_wd2: and %g2, xffffffff, %g2
+ fdtox a032, out000
+ lduw [%i0+4+8], r00 C read low 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a048, out016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], i00
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+ std out000, [%sp+2223+16]
+ add i00, r00, rlimb
+ add %i0, 8, %i0 C BOOKKEEPING
+ std out016, [%sp+2223+24]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+4]
+C mid
+ and %g2, xffffffff, %g2
+ fdtox a064, out000
+ lduw [%i0+0], r32 C read high 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a080, out016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+0], i00
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+8], i16
+ std out000, [%sp+2223+0]
+ add i00, r32, rlimb
+ std out016, [%sp+2223+8]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+0]
+
+C WIND-DOWN PHASE 3
+.L_wd3: and %g2, xffffffff, %g2
+ fdtox p096b, out000
+ add %g2, rlimb, %l5
+ fdtox p112b, out016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], rlimb
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+ std out000, [%sp+2223+16]
+ add %i0, 8, %i0 C BOOKKEEPING
+ std out016, [%sp+2223+24]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+4]
+C mid
+ and %g2, xffffffff, %g2
+ add %g2, rlimb, %l5
+ srlx %l5, 32, cy
+ ldx [%sp+2223+0], rlimb
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+8], i16
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+0]
+
+ and %g2, xffffffff, %g2
+ add %g2, rlimb, %l5
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], i00
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+
+ sllx i16, 16, %g2
+ add i00, cy, cy
+ return %i7+8
+ add %g2, cy, %o0
+EPILOGUE(mpn_addmul_2)
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparc1234/lshiftc.asm b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/lshiftc.asm
new file mode 100644
index 0000000..47286d5
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/lshiftc.asm
@@ -0,0 +1,165 @@
+dnl SPARC v9 mpn_lshiftc
+
+dnl Copyright 1996, 2000-2003, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 3
+C UltraSPARC 3: 2.67
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`cnt',`%i3')
+
+define(`u0', `%l0')
+define(`u1', `%l2')
+define(`u2', `%l4')
+define(`u3', `%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshiftc)
+ save %sp,-160,%sp
+
+ sllx n,3,%g1
+ sub %g0,cnt,tnc C negate shift count
+ add up,%g1,up C make %o1 point at end of src
+ add rp,%g1,rp C make %o0 point at end of res
+ ldx [up-8],u3 C load first limb
+ subcc n,5,n
+ srlx u3,tnc,%i5 C compute function result
+ bl,pn %xcc,.Lend1234
+ sllx u3,cnt,%g3
+
+ subcc n,4,n
+ ldx [up-16],u0
+ ldx [up-24],u1
+ add up,-32,up
+ ldx [up-0],u2
+ ldx [up-8],u3
+ srlx u0,tnc,%g2
+ bl,pn %xcc,.Lend5678
+ not %g3, %g3
+
+ b,a .Loop
+ ALIGN(16)
+.Loop:
+ sllx u0,cnt,%g1
+ andn %g3,%g2,%g3
+ ldx [up-16],u0
+ fanop
+C --
+ srlx u1,tnc,%g2
+ subcc n,4,n
+ stx %g3,[rp-8]
+ not %g1, %g1
+C --
+ sllx u1,cnt,%g3
+ andn %g1,%g2,%g1
+ ldx [up-24],u1
+ fanop
+C --
+ srlx u2,tnc,%g2
+ stx %g1,[rp-16]
+ add up,-32,up
+ not %g3, %g3
+C --
+ sllx u2,cnt,%g1
+ andn %g3,%g2,%g3
+ ldx [up-0],u2
+ fanop
+C --
+ srlx u3,tnc,%g2
+ stx %g3,[rp-24]
+ add rp,-32,rp
+ not %g1, %g1
+C --
+ sllx u3,cnt,%g3
+ andn %g1,%g2,%g1
+ ldx [up-8],u3
+ fanop
+C --
+ srlx u0,tnc,%g2
+ stx %g1,[rp-0]
+ bge,pt %xcc,.Loop
+ not %g3, %g3
+C --
+.Lend5678:
+ sllx u0,cnt,%g1
+ andn %g3,%g2,%g3
+ srlx u1,tnc,%g2
+ stx %g3,[rp-8]
+ not %g1, %g1
+ sllx u1,cnt,%g3
+ andn %g1,%g2,%g1
+ srlx u2,tnc,%g2
+ stx %g1,[rp-16]
+ not %g3, %g3
+ sllx u2,cnt,%g1
+ andn %g3,%g2,%g3
+ srlx u3,tnc,%g2
+ stx %g3,[rp-24]
+ add rp,-32,rp
+ not %g1, %g1
+ sllx u3,cnt,%g3 C carry...
+ andn %g1,%g2,%g1
+ stx %g1,[rp-0]
+
+.Lend1234:
+ addcc n,4,n
+ bz,pn %xcc,.Lret
+ fanop
+.Loop0:
+ add rp,-8,rp
+ subcc n,1,n
+ ldx [up-16],u3
+ add up,-8,up
+ srlx u3,tnc,%g2
+ not %g3, %g3
+ andn %g3,%g2,%g3
+ stx %g3,[rp]
+ sllx u3,cnt,%g3
+ bnz,pt %xcc,.Loop0
+ fanop
+.Lret:
+ not %g3, %g3
+ stx %g3,[rp-8]
+ mov %i5,%i0
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparc1234/mul_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/mul_1.asm
new file mode 100644
index 0000000..871d562
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/mul_1.asm
@@ -0,0 +1,580 @@
+dnl SPARC v9 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright 1998, 2000-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 14
+C UltraSPARC 3: 18.5
+
+C Algorithm: We use eight floating-point multiplies per limb product, with the
+C invariant v operand split into four 16-bit pieces, and the s1 operand split
+C into 32-bit pieces. We sum pairs of 48-bit partial products using
+C floating-point add, then convert the four 49-bit product-sums and transfer
+C them to the integer unit.
+
+C Possible optimizations:
+C 1. Align the stack area where we transfer the four 49-bit product-sums
+C to a 32-byte boundary. That would minimize the cache collision.
+C (UltraSPARC-1/2 use a direct-mapped cache.) (Perhaps even better would
+C be to align the area to map to the area immediately before s1?)
+C 2. Sum the 4 49-bit quantities using 32-bit operations, as in the
+C develop mpn_addmul_2. This would save many integer instructions.
+C 3. Unrolling. Questionable if it is worth the code expansion, given that
+C it could only save 1 cycle/limb.
+C 4. Specialize for particular v values. If its upper 32 bits are zero, we
+C could save many operations, in the FPU (fmuld), but more so in the IEU
+C since we'll be summing 48-bit quantities, which might be simpler.
+C 5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
+C the i00,i16,i32,i48 RAW less apart. The latter apart-scheduling should
+C not be greater than needed for L2 cache latency, and also not so great
+C that i16 needs to be copied.
+C 6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
+C to get high IEU bandwidth. (12 of the 14 cycles will be free for 2 IEU
+C ops.)
+
+C Instruction classification (as per UltraSPARC-1/2 functional units):
+C 8 FM
+C 10 FA
+C 11 MEM
+C 9 ISHIFT + 10? IADDLOG
+C 1 BRANCH
+C 49 insns totally (plus three mov insns that should be optimized out)
+
+C The loop executes 53 instructions in 14 cycles on UltraSPARC-1/2, i.e we
+C sustain 3.79 instructions/cycle.
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+C v i3
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+
+define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
+define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
+define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
+define(`u00',`%f32') define(`u32', `%f34')
+define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+PROLOGUE(mpn_mul_1)
+
+C Initialization. (1) Split v operand into four 16-bit chunks and store them
+C as IEEE double in fp registers. (2) Clear upper 32 bits of fp register pairs
+C f2 and f4. (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+
+ save %sp, -256, %sp
+ mov -1, %g4
+ srlx %g4, 48, xffff C store mask in register `xffff'
+ and %i3, xffff, %g2
+ stx %g2, [%sp+2223+0]
+ srlx %i3, 16, %g3
+ and %g3, xffff, %g3
+ stx %g3, [%sp+2223+8]
+ srlx %i3, 32, %g2
+ and %g2, xffff, %g2
+ stx %g2, [%sp+2223+16]
+ srlx %i3, 48, %g3
+ stx %g3, [%sp+2223+24]
+ srlx %g4, 32, xffffffff C store mask in register `xffffffff'
+
+ sllx %i2, 3, %i2
+ mov 0, cy C clear cy
+ add %i0, %i2, %i0
+ add %i1, %i2, %i1
+ neg %i2
+ add %i1, 4, %i5
+ add %i0, -32, %i4
+ add %i0, -16, %i0
+
+ ldd [%sp+2223+0], v00
+ ldd [%sp+2223+8], v16
+ ldd [%sp+2223+16], v32
+ ldd [%sp+2223+24], v48
+ ld [%sp+2223+0],%f2 C zero f2
+ ld [%sp+2223+0],%f4 C zero f4
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fxtod v00, v00
+ fxtod v16, v16
+ fxtod v32, v32
+ fxtod v48, v48
+
+C Start real work. (We sneakingly read f3 and f5 above...)
+C The software pipeline is very deep, requiring 4 feed-in stages.
+
+ fxtod %f2, u00
+ fxtod %f4, u32
+ fmuld u00, v00, a00
+ fmuld u00, v16, a16
+ fmuld u00, v32, p32
+ fmuld u32, v00, r32
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_two_or_more
+ fmuld u32, v16, r48
+
+.L_one:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ fdtox a32, a32
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ std a32, [%sp+2223+16]
+ std a48, [%sp+2223+24]
+ add %i2, 8, %i2
+
+ fdtox r64, a00
+ fdtox r80, a16
+ ldx [%sp+2223+0], i00
+ ldx [%sp+2223+8], i16
+ ldx [%sp+2223+16], i32
+ ldx [%sp+2223+24], i48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ add %i2, 8, %i2
+
+ mov i00, %g5 C i00+ now in g5
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ sllx i48, 32, %l6 C (i48 << 32)
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_1
+ add %i2, 8, %i2
+
+.L_two_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ fdtox a32, a32
+ fxtod %f2, u00
+ fxtod %f4, u32
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_three_or_more
+ fmuld u32, v16, r48
+
+.L_two:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ ldx [%sp+2223+8], i16
+ ldx [%sp+2223+16], i32
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ std a32, [%sp+2223+16]
+ std a48, [%sp+2223+24]
+ add %i2, 8, %i2
+
+ fdtox r64, a00
+ mov i00, %g5 C i00+ now in g5
+ fdtox r80, a16
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_2
+ add %i2, 8, %i2
+
+.L_three_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_four_or_more
+ fmuld u32, v16, r48
+
+.L_three:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ faddd p48, r48, a48
+ mov i00, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_3
+ add %i2, 8, %i2
+
+.L_four_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ faddd p48, r48, a48
+ mov i00, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .Loop
+ fmuld u32, v16, r48
+
+.L_four:
+ b,a .L_out_4
+
+C BEGIN MAIN LOOP
+ .align 16
+.Loop:
+C 00
+ srlx %o4, 16, %o5 C (x >> 16)
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+C 01
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+C 02
+ faddd p48, r48, a48
+C 03
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ mov i00, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+C 04
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+C 05
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+C 06
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+C 07
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+C 08
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+C 09
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+C 10
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+C 11
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+C 12
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+C 13
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .Loop
+ fmuld u32, v16, r48
+C END MAIN LOOP
+
+.L_out_4:
+ srlx %o4, 16, %o5 C (x >> 16)
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ fdtox a00, a00
+ faddd p48, r48, a48
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ mov i00, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_3:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ fdtox r64, a00
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ mov i00, %g5 C i00+ now in g5
+ fdtox r80, a16
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_2:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ mov i00, %g5 C i00+ now in g5
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_1:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ or %i3, %o5, %o5
+ stx %o5, [%i4+%i2]
+
+ sllx i00, 0, %g2
+ add %g2, cy, cy
+ sllx i16, 16, %g3
+ add %g3, cy, cy
+
+ return %i7+8
+ mov cy, %o0
+EPILOGUE(mpn_mul_1)
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparc1234/sqr_diagonal.asm b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/sqr_diagonal.asm
new file mode 100644
index 0000000..43c69d3
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/sqr_diagonal.asm
@@ -0,0 +1,342 @@
+dnl SPARC v9 64-bit mpn_sqr_diagonal.
+
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 22
+C UltraSPARC 3: 36
+
+C This was generated by the Sun C compiler. It runs at 22 cycles/limb on the
+C UltraSPARC-1/2, three cycles slower than theoretically possible for optimal
+C code using the same algorithm. For 1-3 limbs, a special loop was generated,
+C which causes performance problems in particular for 2 and 3 limbs.
+C Ultimately, this should be replaced by hand-written code in the same software
+C pipeline style as e.g., addmul_1.asm.
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sqr_diagonal)
+ save %sp, -240, %sp
+
+ sethi %hi(0x1ffc00), %o0
+ sethi %hi(0x3ffc00), %o1
+ add %o0, 1023, %o7
+ cmp %i2, 4
+ add %o1, 1023, %o4
+ or %g0, %i1, %g1
+ or %g0, %i0, %o0
+ bl,pn %xcc, .Lsmall
+ or %g0, 0, %g2
+
+ ldx [%i1], %o1
+ add %i1, 24, %g1
+ or %g0, 3, %g2
+ srlx %o1, 42, %g3
+ stx %g3, [%sp+2279]
+ and %o1, %o7, %o2
+ stx %o2, [%sp+2263]
+ srlx %o1, 21, %o1
+ ldd [%sp+2279], %f0
+ and %o1, %o7, %o1
+ stx %o1, [%sp+2271]
+ ldx [%i1+8], %o2
+ fxtod %f0, %f12
+ srlx %o2, 21, %o1
+ and %o2, %o7, %g3
+ ldd [%sp+2263], %f2
+ fmuld %f12, %f12, %f10
+ srlx %o2, 42, %o2
+ ldd [%sp+2271], %f0
+ and %o1, %o7, %o1
+ fxtod %f2, %f8
+ stx %o2, [%sp+2279]
+ stx %o1, [%sp+2271]
+ fxtod %f0, %f0
+ stx %g3, [%sp+2263]
+ fdtox %f10, %f14
+ fmuld %f12, %f8, %f6
+ ldx [%i1+16], %o2
+ std %f14, [%sp+2255]
+ fmuld %f0, %f0, %f2
+ fmuld %f8, %f8, %f10
+ srlx %o2, 42, %o1
+ faddd %f6, %f6, %f6
+ fmuld %f12, %f0, %f12
+ fmuld %f0, %f8, %f8
+ ldd [%sp+2279], %f0
+ ldd [%sp+2263], %f4
+ fdtox %f10, %f10
+ std %f10, [%sp+2239]
+ faddd %f2, %f6, %f6
+ ldd [%sp+2271], %f2
+ fdtox %f12, %f12
+ std %f12, [%sp+2247]
+ fdtox %f8, %f8
+ std %f8, [%sp+2231]
+ fdtox %f6, %f6
+ std %f6, [%sp+2223]
+
+.Loop: srlx %o2, 21, %g3
+ stx %o1, [%sp+2279]
+ add %g2, 1, %g2
+ and %g3, %o7, %o1
+ ldx [%sp+2255], %g4
+ cmp %g2, %i2
+ stx %o1, [%sp+2271]
+ add %g1, 8, %g1
+ add %o0, 16, %o0
+ ldx [%sp+2239], %o1
+ fxtod %f0, %f10
+ fxtod %f4, %f14
+ ldx [%sp+2231], %i0
+ ldx [%sp+2223], %g5
+ ldx [%sp+2247], %g3
+ and %o2, %o7, %o2
+ fxtod %f2, %f8
+ fmuld %f10, %f10, %f0
+ stx %o2, [%sp+2263]
+ fmuld %f10, %f14, %f6
+ ldx [%g1-8], %o2
+ fmuld %f10, %f8, %f12
+ fdtox %f0, %f2
+ ldd [%sp+2279], %f0
+ fmuld %f8, %f8, %f4
+ faddd %f6, %f6, %f6
+ fmuld %f14, %f14, %f10
+ std %f2, [%sp+2255]
+ sllx %g4, 20, %g4
+ ldd [%sp+2271], %f2
+ fmuld %f8, %f14, %f8
+ sllx %i0, 22, %i1
+ fdtox %f12, %f12
+ std %f12, [%sp+2247]
+ sllx %g5, 42, %i0
+ add %o1, %i1, %o1
+ faddd %f4, %f6, %f6
+ ldd [%sp+2263], %f4
+ add %o1, %i0, %o1
+ add %g3, %g4, %g3
+ fdtox %f10, %f10
+ std %f10, [%sp+2239]
+ srlx %o1, 42, %g4
+ and %g5, %o4, %i0
+ fdtox %f8, %f8
+ std %f8, [%sp+2231]
+ srlx %g5, 22, %g5
+ sub %g4, %i0, %g4
+ fdtox %f6, %f6
+ std %f6, [%sp+2223]
+ srlx %g4, 63, %g4
+ add %g3, %g5, %g3
+ add %g3, %g4, %g3
+ stx %o1, [%o0-16]
+ srlx %o2, 42, %o1
+ bl,pt %xcc, .Loop
+ stx %g3, [%o0-8]
+
+ stx %o1, [%sp+2279]
+ srlx %o2, 21, %o1
+ fxtod %f0, %f16
+ ldx [%sp+2223], %g3
+ fxtod %f4, %f6
+ and %o2, %o7, %o3
+ stx %o3, [%sp+2263]
+ fxtod %f2, %f4
+ and %o1, %o7, %o1
+ ldx [%sp+2231], %o2
+ sllx %g3, 42, %g4
+ fmuld %f16, %f16, %f14
+ stx %o1, [%sp+2271]
+ fmuld %f16, %f6, %f8
+ add %o0, 48, %o0
+ ldx [%sp+2239], %o1
+ sllx %o2, 22, %o2
+ fmuld %f4, %f4, %f10
+ ldx [%sp+2255], %o3
+ fdtox %f14, %f14
+ fmuld %f4, %f6, %f2
+ std %f14, [%sp+2255]
+ faddd %f8, %f8, %f12
+ add %o1, %o2, %o2
+ fmuld %f16, %f4, %f4
+ ldd [%sp+2279], %f0
+ sllx %o3, 20, %g5
+ add %o2, %g4, %o2
+ fmuld %f6, %f6, %f6
+ srlx %o2, 42, %o3
+ and %g3, %o4, %g4
+ srlx %g3, 22, %g3
+ faddd %f10, %f12, %f16
+ ldd [%sp+2271], %f12
+ ldd [%sp+2263], %f8
+ fxtod %f0, %f0
+ sub %o3, %g4, %o3
+ ldx [%sp+2247], %o1
+ srlx %o3, 63, %o3
+ fdtox %f2, %f10
+ fxtod %f8, %f8
+ std %f10, [%sp+2231]
+ fdtox %f6, %f6
+ std %f6, [%sp+2239]
+ add %o1, %g5, %o1
+ fmuld %f0, %f0, %f2
+ fdtox %f16, %f16
+ std %f16, [%sp+2223]
+ add %o1, %g3, %o1
+ fdtox %f4, %f4
+ std %f4, [%sp+2247]
+ fmuld %f0, %f8, %f10
+ fxtod %f12, %f12
+ add %o1, %o3, %o1
+ stx %o2, [%o0-48]
+ fmuld %f8, %f8, %f6
+ stx %o1, [%o0-40]
+ fdtox %f2, %f2
+ ldx [%sp+2231], %o2
+ faddd %f10, %f10, %f10
+ ldx [%sp+2223], %g3
+ fmuld %f12, %f12, %f4
+ fdtox %f6, %f6
+ ldx [%sp+2239], %o1
+ sllx %o2, 22, %o2
+ fmuld %f12, %f8, %f8
+ sllx %g3, 42, %g5
+ ldx [%sp+2255], %o3
+ fmuld %f0, %f12, %f0
+ add %o1, %o2, %o2
+ faddd %f4, %f10, %f4
+ ldx [%sp+2247], %o1
+ add %o2, %g5, %o2
+ and %g3, %o4, %g4
+ fdtox %f8, %f8
+ sllx %o3, 20, %g5
+ std %f8, [%sp+2231]
+ fdtox %f0, %f0
+ srlx %o2, 42, %o3
+ add %o1, %g5, %o1
+ fdtox %f4, %f4
+ srlx %g3, 22, %g3
+ sub %o3, %g4, %o3
+ std %f6, [%sp+2239]
+ std %f4, [%sp+2223]
+ srlx %o3, 63, %o3
+ add %o1, %g3, %o1
+ std %f2, [%sp+2255]
+ add %o1, %o3, %o1
+ std %f0, [%sp+2247]
+ stx %o2, [%o0-32]
+ stx %o1, [%o0-24]
+ ldx [%sp+2231], %o2
+ ldx [%sp+2223], %o3
+ ldx [%sp+2239], %o1
+ sllx %o2, 22, %o2
+ sllx %o3, 42, %g5
+ ldx [%sp+2255], %g4
+ and %o3, %o4, %g3
+ add %o1, %o2, %o2
+ ldx [%sp+2247], %o1
+ add %o2, %g5, %o2
+ stx %o2, [%o0-16]
+ sllx %g4, 20, %g4
+ srlx %o2, 42, %o2
+ add %o1, %g4, %o1
+ srlx %o3, 22, %o3
+ sub %o2, %g3, %o2
+ srlx %o2, 63, %o2
+ add %o1, %o3, %o1
+ add %o1, %o2, %o1
+ stx %o1, [%o0-8]
+ ret
+ restore %g0, %g0, %g0
+.Lsmall:
+ ldx [%g1], %o2
+.Loop0:
+ and %o2, %o7, %o1
+ stx %o1, [%sp+2263]
+ add %g2, 1, %g2
+ srlx %o2, 21, %o1
+ add %g1, 8, %g1
+ srlx %o2, 42, %o2
+ stx %o2, [%sp+2279]
+ and %o1, %o7, %o1
+ ldd [%sp+2263], %f0
+ cmp %g2, %i2
+ stx %o1, [%sp+2271]
+ fxtod %f0, %f6
+ ldd [%sp+2279], %f0
+ ldd [%sp+2271], %f4
+ fxtod %f0, %f2
+ fmuld %f6, %f6, %f0
+ fxtod %f4, %f10
+ fmuld %f2, %f6, %f4
+ fdtox %f0, %f0
+ std %f0, [%sp+2239]
+ fmuld %f10, %f6, %f8
+ fmuld %f10, %f10, %f0
+ faddd %f4, %f4, %f6
+ fmuld %f2, %f2, %f4
+ fdtox %f8, %f8
+ std %f8, [%sp+2231]
+ fmuld %f2, %f10, %f2
+ faddd %f0, %f6, %f0
+ fdtox %f4, %f4
+ std %f4, [%sp+2255]
+ fdtox %f2, %f2
+ std %f2, [%sp+2247]
+ fdtox %f0, %f0
+ std %f0, [%sp+2223]
+ ldx [%sp+2239], %o1
+ ldx [%sp+2255], %g4
+ ldx [%sp+2231], %o2
+ sllx %g4, 20, %g4
+ ldx [%sp+2223], %o3
+ sllx %o2, 22, %o2
+ sllx %o3, 42, %g5
+ add %o1, %o2, %o2
+ ldx [%sp+2247], %o1
+ add %o2, %g5, %o2
+ stx %o2, [%o0]
+ and %o3, %o4, %g3
+ srlx %o2, 42, %o2
+ add %o1, %g4, %o1
+ srlx %o3, 22, %o3
+ sub %o2, %g3, %o2
+ srlx %o2, 63, %o2
+ add %o1, %o3, %o1
+ add %o1, %o2, %o1
+ stx %o1, [%o0+8]
+ add %o0, 16, %o0
+ bl,a,pt %xcc, .Loop0
+ ldx [%g1], %o2
+ ret
+ restore %g0, %g0, %g0
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparc1234/sub_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/sub_n.asm
new file mode 100644
index 0000000..9fb7f70
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/sub_n.asm
@@ -0,0 +1,241 @@
+dnl SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
+
+dnl Copyright 2001-2003, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 4
+C UltraSPARC 3: 4.5
+
+C Compute carry-out from the most significant bits of u,v, and r, where
+C r=u-v-carry_in, using logic operations.
+
+C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
+C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
+C Therefore, it seems futile to try to optimize this any further...
+
+C INPUT PARAMETERS
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`vp',`%i2')
+define(`n',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+define(`v0',`%l1')
+define(`v1',`%l3')
+define(`v2',`%l5')
+define(`v3',`%l7')
+
+define(`cy',`%i4')
+
+define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sub_nc)
+ save %sp,-160,%sp
+
+ fitod %f0,%f0 C make sure f0 contains small, quiet number
+ subcc n,4,%g0
+ bl,pn %xcc,.Loop0
+ nop
+ b,a L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_sub_n)
+ save %sp,-160,%sp
+
+ fitod %f0,%f0 C make sure f0 contains small, quiet number
+ subcc n,4,%g0
+ bl,pn %xcc,.Loop0
+ mov 0,cy
+L(com):
+ ldx [up+0],u0
+ ldx [vp+0],v0
+ add up,32,up
+ ldx [up-24],u1
+ ldx [vp+8],v1
+ add vp,32,vp
+ ldx [up-16],u2
+ ldx [vp-16],v2
+ ldx [up-8],u3
+ ldx [vp-8],v3
+ subcc n,8,n
+ sub u0,v0,%g1 C main sub
+ sub %g1,cy,%g5 C carry sub
+ orn u0,v0,%g2
+ bl,pn %xcc,.Lend4567
+ fanop
+ b,a .Loop
+
+ .align 16
+C START MAIN LOOP
+.Loop: orn %g5,%g2,%g2
+ andn u0,v0,%g3
+ ldx [up+0],u0
+ fanop
+C --
+ andn %g2,%g3,%g2
+ ldx [vp+0],v0
+ add up,32,up
+ fanop
+C --
+ srlx %g2,63,cy
+ sub u1,v1,%g1
+ stx %g5,[rp+0]
+ fanop
+C --
+ sub %g1,cy,%g5
+ orn u1,v1,%g2
+ fmnop
+ fanop
+C --
+ orn %g5,%g2,%g2
+ andn u1,v1,%g3
+ ldx [up-24],u1
+ fanop
+C --
+ andn %g2,%g3,%g2
+ ldx [vp+8],v1
+ add vp,32,vp
+ fanop
+C --
+ srlx %g2,63,cy
+ sub u2,v2,%g1
+ stx %g5,[rp+8]
+ fanop
+C --
+ sub %g1,cy,%g5
+ orn u2,v2,%g2
+ fmnop
+ fanop
+C --
+ orn %g5,%g2,%g2
+ andn u2,v2,%g3
+ ldx [up-16],u2
+ fanop
+C --
+ andn %g2,%g3,%g2
+ ldx [vp-16],v2
+ add rp,32,rp
+ fanop
+C --
+ srlx %g2,63,cy
+ sub u3,v3,%g1
+ stx %g5,[rp-16]
+ fanop
+C --
+ sub %g1,cy,%g5
+ orn u3,v3,%g2
+ fmnop
+ fanop
+C --
+ orn %g5,%g2,%g2
+ andn u3,v3,%g3
+ ldx [up-8],u3
+ fanop
+C --
+ andn %g2,%g3,%g2
+ subcc n,4,n
+ ldx [vp-8],v3
+ fanop
+C --
+ srlx %g2,63,cy
+ sub u0,v0,%g1
+ stx %g5,[rp-8]
+ fanop
+C --
+ sub %g1,cy,%g5
+ orn u0,v0,%g2
+ bge,pt %xcc,.Loop
+ fanop
+C END MAIN LOOP
+.Lend4567:
+ orn %g5,%g2,%g2
+ andn u0,v0,%g3
+ andn %g2,%g3,%g2
+ srlx %g2,63,cy
+ sub u1,v1,%g1
+ stx %g5,[rp+0]
+ sub %g1,cy,%g5
+ orn u1,v1,%g2
+ orn %g5,%g2,%g2
+ andn u1,v1,%g3
+ andn %g2,%g3,%g2
+ srlx %g2,63,cy
+ sub u2,v2,%g1
+ stx %g5,[rp+8]
+ sub %g1,cy,%g5
+ orn u2,v2,%g2
+ orn %g5,%g2,%g2
+ andn u2,v2,%g3
+ andn %g2,%g3,%g2
+ add rp,32,rp
+ srlx %g2,63,cy
+ sub u3,v3,%g1
+ stx %g5,[rp-16]
+ sub %g1,cy,%g5
+ orn u3,v3,%g2
+ orn %g5,%g2,%g2
+ andn u3,v3,%g3
+ andn %g2,%g3,%g2
+ srlx %g2,63,cy
+ stx %g5,[rp-8]
+
+ addcc n,4,n
+ bz,pn %xcc,.Lret
+ fanop
+
+.Loop0: ldx [up],u0
+ add up,8,up
+ ldx [vp],v0
+ add vp,8,vp
+ add rp,8,rp
+ subcc n,1,n
+ sub u0,v0,%g1
+ orn u0,v0,%g2
+ sub %g1,cy,%g5
+ andn u0,v0,%g3
+ orn %g5,%g2,%g2
+ stx %g5,[rp-8]
+ andn %g2,%g3,%g2
+ bnz,pt %xcc,.Loop0
+ srlx %g2,63,cy
+
+.Lret: mov cy,%i0
+ ret
+ restore
+EPILOGUE(mpn_sub_n)
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparc1234/submul_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/submul_1.asm
new file mode 100644
index 0000000..0bdb566
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparc1234/submul_1.asm
@@ -0,0 +1,68 @@
+dnl SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright 2001-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 18
+C UltraSPARC 3: 23
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+C v i3
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+
+PROLOGUE(mpn_submul_1)
+ save %sp,-176,%sp
+
+ sllx %i2, 3, %g2
+ or %g0, %i1, %o1
+ add %g2, 15, %o0
+ or %g0, %i2, %o2
+ and %o0, -16, %o0
+ sub %sp, %o0, %sp
+ add %sp, 2223, %o0
+ or %g0, %o0, %l0
+ call mpn_mul_1
+ or %g0, %i3, %o3
+ or %g0, %o0, %l1 C preserve carry value from mpn_mul_1
+ or %g0, %i0, %o0
+ or %g0, %i0, %o1
+ or %g0, %l0, %o2
+ call mpn_sub_n
+ or %g0, %i2, %o3
+ ret
+ restore %l1, %o0, %o0 C sum carry values
+EPILOGUE(mpn_submul_1)
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparc34/gmp-mparam.h b/gmp-6.3.0/mpn/sparc64/ultrasparc34/gmp-mparam.h
new file mode 100644
index 0000000..c88e680
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparc34/gmp-mparam.h
@@ -0,0 +1,222 @@
+/* ultrasparc3/4 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2002, 2004, 2006, 2008-2010, 2014, 2015 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 1593 MHz ultrasparc3 running Solaris 10 (swift.nada.kth.se) */
+/* FFT tuning limit = 100 M */
+/* Generated by tuneup.c, 2015-10-09, gcc 3.4 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 22
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 29
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_1N_PI1_METHOD 1
+#define DIV_QR_1_NORM_THRESHOLD 2
+#define DIV_QR_1_UNNORM_THRESHOLD 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM33_THRESHOLD 93
+#define MUL_TOOM44_THRESHOLD 142
+#define MUL_TOOM6H_THRESHOLD 165
+#define MUL_TOOM8H_THRESHOLD 278
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 93
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 88
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 50
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 67
+
+#define SQR_BASECASE_THRESHOLD 7
+#define SQR_TOOM2_THRESHOLD 70
+#define SQR_TOOM3_THRESHOLD 101
+#define SQR_TOOM4_THRESHOLD 184
+#define SQR_TOOM6_THRESHOLD 0 /* always */
+#define SQR_TOOM8_THRESHOLD 339
+
+#define MULMID_TOOM42_THRESHOLD 40
+
+#define MULMOD_BNM1_THRESHOLD 14
+#define SQRMOD_BNM1_THRESHOLD 9
+
+#define MUL_FFT_MODF_THRESHOLD 212 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 212, 5}, { 13, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
+ { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
+ { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
+ { 23, 8}, { 47, 9}, { 27,10}, { 15, 9}, \
+ { 39,10}, { 23, 9}, { 47,11}, { 15,10}, \
+ { 31, 9}, { 63, 8}, { 127, 7}, { 255, 9}, \
+ { 67,10}, { 39, 9}, { 79, 8}, { 159, 7}, \
+ { 319, 9}, { 83,10}, { 47, 9}, { 95, 8}, \
+ { 191, 7}, { 383,10}, { 55,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255, 7}, { 511,10}, \
+ { 71, 9}, { 143, 8}, { 287,10}, { 79, 9}, \
+ { 159, 8}, { 319, 9}, { 175, 8}, { 351,11}, \
+ { 47,10}, { 95, 9}, { 191, 8}, { 383, 7}, \
+ { 767,10}, { 103,12}, { 31,11}, { 63,10}, \
+ { 127, 9}, { 287,11}, { 79,10}, { 159, 9}, \
+ { 319, 8}, { 639,10}, { 175, 9}, { 351, 8}, \
+ { 703,11}, { 95,10}, { 207, 9}, { 415,11}, \
+ { 111,10}, { 223, 9}, { 479,12}, { 63,11}, \
+ { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \
+ { 575,10}, { 319, 9}, { 639,11}, { 175,10}, \
+ { 351,11}, { 191,10}, { 383,11}, { 207,10}, \
+ { 415,11}, { 223,10}, { 447,13}, { 63,12}, \
+ { 127,11}, { 287,10}, { 575,11}, { 319,10}, \
+ { 703,12}, { 191,11}, { 383,12}, { 223,11}, \
+ { 447,13}, { 127,12}, { 287,11}, { 575,12}, \
+ { 351,13}, { 191,12}, { 479,14}, { 127,13}, \
+ { 255,12}, { 575,13}, { 319,12}, { 703,13}, \
+ { 383,12}, { 767,13}, { 447,12}, { 895,14}, \
+ { 255,13}, { 511,12}, { 1023,13}, { 575,12}, \
+ { 1151,13}, { 703,14}, { 383,13}, { 831,12}, \
+ { 1663,13}, { 895,15}, { 255,14}, { 511,13}, \
+ { 1151,14}, { 639,13}, { 1407,12}, { 2815,14}, \
+ { 767,13}, { 1663,14}, { 895,13}, { 1791,15}, \
+ { 511,14}, { 1023,13}, { 2047,14}, { 1151,13}, \
+ { 2303,14}, { 1407,13}, { 2815,15}, { 767,14}, \
+ { 1791,16}, { 511,15}, { 1023,14}, { 2303,15}, \
+ { 1279,14}, { 2815,15}, { 1535,14}, { 3199,15}, \
+ { 1791,16}, { 1023,15}, { 2047,14}, { 4223,15}, \
+ { 2303,14}, { 4863,15}, { 2815,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 171
+#define MUL_FFT_THRESHOLD 2240
+
+#define SQR_FFT_MODF_THRESHOLD 244 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 244, 5}, { 8, 4}, { 17, 5}, { 15, 6}, \
+ { 8, 5}, { 17, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
+ { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
+ { 15, 8}, { 31, 9}, { 19, 8}, { 39, 9}, \
+ { 27,10}, { 15, 9}, { 39,10}, { 23, 9}, \
+ { 47,11}, { 15,10}, { 31, 9}, { 67,10}, \
+ { 39, 9}, { 79, 8}, { 159,10}, { 47, 9}, \
+ { 95,10}, { 55,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255,10}, { 71, 9}, { 143, 8}, \
+ { 287, 7}, { 575,10}, { 79, 9}, { 159,11}, \
+ { 47, 9}, { 191, 8}, { 383, 7}, { 767, 9}, \
+ { 207,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 255, 8}, { 511,10}, { 135, 9}, { 271,10}, \
+ { 143, 9}, { 287,11}, { 79,10}, { 159, 9}, \
+ { 319, 8}, { 639,10}, { 175, 9}, { 351, 8}, \
+ { 703, 7}, { 1407,11}, { 95,10}, { 191, 9}, \
+ { 383, 8}, { 767,10}, { 207, 9}, { 415,10}, \
+ { 223, 9}, { 447,12}, { 63,11}, { 127,10}, \
+ { 271, 9}, { 543,10}, { 287, 9}, { 575, 8}, \
+ { 1151,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 351, 9}, { 703, 8}, { 1407, 7}, { 2815,11}, \
+ { 207,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447, 9}, { 895,13}, { 63,11}, { 271,10}, \
+ { 543,11}, { 287,12}, { 159,11}, { 351,10}, \
+ { 703,12}, { 191,11}, { 415,10}, { 831,12}, \
+ { 223,13}, { 127,12}, { 255,11}, { 511,10}, \
+ { 1023,11}, { 543,12}, { 287,11}, { 607,12}, \
+ { 319,11}, { 639,12}, { 415,11}, { 895,12}, \
+ { 479,14}, { 127,13}, { 255,12}, { 543,11}, \
+ { 1087,12}, { 575,11}, { 1151,13}, { 319,12}, \
+ { 639,11}, { 1279,12}, { 703,10}, { 2815,12}, \
+ { 831,11}, { 1663,13}, { 447,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1087,13}, { 703,12}, \
+ { 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \
+ { 895,15}, { 255,14}, { 511,13}, { 1215,14}, \
+ { 639,13}, { 1279,14}, { 767,13}, { 1663,14}, \
+ { 895,13}, { 1919,15}, { 511,14}, { 1023,13}, \
+ { 2175,14}, { 1151,13}, { 2431,14}, { 1407,15}, \
+ { 767,14}, { 1791,16}, { 511,15}, { 1023,14}, \
+ { 2303,15}, { 1279,14}, { 2815,15}, { 1535,14}, \
+ { 3199,15}, { 1791,16}, { 1023,15}, { 2047,14}, \
+ { 4351,15}, { 2303,14}, { 4863,15}, { 2815,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 184
+#define SQR_FFT_THRESHOLD 1728
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 29
+#define MULLO_MUL_N_THRESHOLD 4392
+#define SQRLO_BASECASE_THRESHOLD 2
+#define SQRLO_DC_THRESHOLD 63
+#define SQRLO_SQR_THRESHOLD 3176
+
+#define DC_DIV_QR_THRESHOLD 16
+#define DC_DIVAPPR_Q_THRESHOLD 64
+#define DC_BDIV_QR_THRESHOLD 30
+#define DC_BDIV_Q_THRESHOLD 86
+
+#define INV_MULMOD_BNM1_THRESHOLD 58
+#define INV_NEWTON_THRESHOLD 17
+#define INV_APPR_THRESHOLD 15
+
+#define BINV_NEWTON_THRESHOLD 109
+#define REDC_1_TO_REDC_2_THRESHOLD 0 /* always */
+#define REDC_2_TO_REDC_N_THRESHOLD 117
+
+#define MU_DIV_QR_THRESHOLD 618
+#define MU_DIVAPPR_Q_THRESHOLD 618
+#define MUPI_DIV_QR_THRESHOLD 0 /* always */
+#define MU_BDIV_QR_THRESHOLD 680
+#define MU_BDIV_Q_THRESHOLD 807
+
+#define POWM_SEC_TABLE 3,22,102,579,1555
+
+#define GET_STR_DC_THRESHOLD 20
+#define GET_STR_PRECOMPUTE_THRESHOLD 28
+#define SET_STR_DC_THRESHOLD 381
+#define SET_STR_PRECOMPUTE_THRESHOLD 1042
+
+#define FAC_DSC_THRESHOLD 462
+#define FAC_ODD_THRESHOLD 0 /* always */
+
+#define MATRIX22_STRASSEN_THRESHOLD 12
+#define HGCD_THRESHOLD 45
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 1094
+#define GCD_DC_THRESHOLD 126
+#define GCDEXT_DC_THRESHOLD 132
+#define JACOBI_BASE_METHOD 4
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/add_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/add_n.asm
new file mode 100644
index 0000000..954c7f6
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/add_n.asm
@@ -0,0 +1,68 @@
+dnl SPARC v9 mpn_add_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: ?
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_add_nc)
+ b,a L(ent)
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+ mov 0, cy
+L(ent): cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+ add n, -1, n
+ srlx %o4, 32, %g1
+ srlx %o5, 32, %g2
+ addccc %o4, %o5, %g3
+ addccc %g1, %g2, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ addc %g0, %g0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/addlsh1_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/addlsh1_n.asm
new file mode 100644
index 0000000..3134797
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/addlsh1_n.asm
@@ -0,0 +1,41 @@
+dnl SPARC v9 mpn_addlsh1_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 1)
+define(RSH, 63)
+
+define(func, mpn_addlsh1_n)
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n)
+
+include_mpn(`sparc64/ultrasparct1/addlshC_n.asm')
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/addlsh2_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/addlsh2_n.asm
new file mode 100644
index 0000000..ee1afd0
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/addlsh2_n.asm
@@ -0,0 +1,41 @@
+dnl SPARC v9 mpn_addlsh2_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+define(func, mpn_addlsh2_n)
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n)
+
+include_mpn(`sparc64/ultrasparct1/addlshC_n.asm')
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/addlshC_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/addlshC_n.asm
new file mode 100644
index 0000000..5be9a0d
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/addlshC_n.asm
@@ -0,0 +1,69 @@
+dnl SPARC v9 mpn_addlshC_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+C cycles/limb
+C UltraSPARC T1: 21
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ mov 0, cy
+ mov 0, %g5
+ cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+
+ sllx %o5, LSH, %g4
+ add n, -1, n
+ or %g5, %g4, %g4
+ srlx %o5, RSH, %g5
+
+ srlx %o4, 32, %g1
+ srlx %g4, 32, %g2
+ addccc %o4, %g4, %g3
+ addccc %g1, %g2, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ addc %g5, %g0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/addmul_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/addmul_1.asm
new file mode 100644
index 0000000..29dba96
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/addmul_1.asm
@@ -0,0 +1,86 @@
+dnl SPARC v9 mpn_addmul_1 for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 74
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_addmul_1)
+ save %sp, -176, %sp
+ mov 1, %o2
+ mov %i0, %g2
+ srlx %i3, 32, %o4
+ sllx %o2, 32, %o2
+ srl %i3, 0, %i3
+ mov 0, %g3
+ mov 0, %i0
+
+L(top): ldx [%i1+%g3], %g1
+ srl %g1, 0, %g4
+ mulx %g4, %i3, %o5
+ srlx %g1, 32, %g1
+ mulx %g1, %i3, %g5
+ mulx %g4, %o4, %g4
+ mulx %g1, %o4, %g1
+ srlx %o5, 32, %o1
+ add %g5, %o1, %o1
+ addcc %o1, %g4, %g4
+ srl %o5, 0, %o0
+ ldx [%g2+%g3], %o5
+ sllx %g4, 32, %o1
+ add %g1, %o2, %l1
+ movlu %xcc, %l1, %g1
+ add %o1, %o0, %l0
+ addcc %l0, %i0, %g5
+ srlx %g4, 32, %i0
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ addcc %o5, %g5, %g5
+ stx %g5, [%g2+%g3]
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ add %i2, -1, %i2
+ add %i0, %g1, %i0
+ brnz,pt %i2, L(top)
+ add %g3, 8, %g3
+ return %i7+8
+ nop
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/gmp-mparam.h b/gmp-6.3.0/mpn/sparc64/ultrasparct1/gmp-mparam.h
new file mode 100644
index 0000000..99db78a
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/gmp-mparam.h
@@ -0,0 +1,154 @@
+/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2002, 2004, 2006, 2008-2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 1000 MHz ultrasparc t1 running GNU/Linux */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 13
+#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 34
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 8
+#define MUL_TOOM33_THRESHOLD 50
+#define MUL_TOOM44_THRESHOLD 99
+#define MUL_TOOM6H_THRESHOLD 125
+#define MUL_TOOM8H_THRESHOLD 187
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 77
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 65
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 50
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 34
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 14
+#define SQR_TOOM3_THRESHOLD 57
+#define SQR_TOOM4_THRESHOLD 133
+#define SQR_TOOM6_THRESHOLD 156
+#define SQR_TOOM8_THRESHOLD 260
+
+#define MULMID_TOOM42_THRESHOLD 12
+
+#define MULMOD_BNM1_THRESHOLD 7
+#define SQRMOD_BNM1_THRESHOLD 7
+
+#define MUL_FFT_MODF_THRESHOLD 176 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 176, 5}, { 7, 6}, { 4, 5}, { 9, 6}, \
+ { 5, 5}, { 11, 6}, { 11, 7}, { 6, 6}, \
+ { 13, 7}, { 7, 6}, { 15, 7}, { 9, 8}, \
+ { 5, 7}, { 13, 8}, { 7, 7}, { 15, 6}, \
+ { 32, 7}, { 24, 8}, { 21, 9}, { 11, 8}, \
+ { 23,10}, { 7, 9}, { 15, 8}, { 33, 9}, \
+ { 19, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 43,10}, { 23,11}, { 15,10}, { 31, 9}, \
+ { 63, 8}, { 127, 9}, { 67,10}, { 39, 9}, \
+ { 79, 8}, { 159,10}, { 47, 9}, { 95,11}, \
+ { 2048,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 53
+#define MUL_FFT_THRESHOLD 1728
+
+
+#define SQR_FFT_MODF_THRESHOLD 148 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 148, 5}, { 7, 6}, { 4, 5}, { 9, 6}, \
+ { 5, 5}, { 11, 6}, { 11, 7}, { 6, 6}, \
+ { 13, 7}, { 7, 6}, { 15, 7}, { 13, 8}, \
+ { 7, 7}, { 16, 8}, { 9, 6}, { 38, 7}, \
+ { 20, 8}, { 11, 7}, { 24, 8}, { 13, 9}, \
+ { 7, 7}, { 30, 8}, { 19, 9}, { 11, 8}, \
+ { 25,10}, { 7, 9}, { 15, 8}, { 31, 9}, \
+ { 19, 8}, { 39, 9}, { 27,10}, { 15, 9}, \
+ { 39,10}, { 23, 9}, { 47, 8}, { 95, 9}, \
+ { 51,11}, { 15,10}, { 31, 8}, { 127,10}, \
+ { 39, 9}, { 79, 8}, { 159,10}, { 47, 9}, \
+ { 95,11}, { 2048,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 58
+#define SQR_FFT_THRESHOLD 1344
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 28
+#define MULLO_MUL_N_THRESHOLD 3176
+
+#define DC_DIV_QR_THRESHOLD 27
+#define DC_DIVAPPR_Q_THRESHOLD 106
+#define DC_BDIV_QR_THRESHOLD 27
+#define DC_BDIV_Q_THRESHOLD 62
+
+#define INV_MULMOD_BNM1_THRESHOLD 14
+#define INV_NEWTON_THRESHOLD 163
+#define INV_APPR_THRESHOLD 117
+
+#define BINV_NEWTON_THRESHOLD 166
+#define REDC_1_TO_REDC_N_THRESHOLD 31
+
+#define MU_DIV_QR_THRESHOLD 734
+#define MU_DIVAPPR_Q_THRESHOLD 748
+#define MUPI_DIV_QR_THRESHOLD 67
+#define MU_BDIV_QR_THRESHOLD 562
+#define MU_BDIV_Q_THRESHOLD 734
+
+#define POWM_SEC_TABLE 4,29,188,643,2741
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 58
+#define HGCD_APPR_THRESHOLD 55
+#define HGCD_REDUCE_THRESHOLD 637
+#define GCD_DC_THRESHOLD 186
+#define GCDEXT_DC_THRESHOLD 140
+#define JACOBI_BASE_METHOD 3
+
+#define GET_STR_DC_THRESHOLD 20
+#define GET_STR_PRECOMPUTE_THRESHOLD 33
+#define SET_STR_DC_THRESHOLD 268
+#define SET_STR_PRECOMPUTE_THRESHOLD 960
+
+#define FAC_DSC_THRESHOLD 268
+#define FAC_ODD_THRESHOLD 0 /* always */
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/mul_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/mul_1.asm
new file mode 100644
index 0000000..1fea2a1
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/mul_1.asm
@@ -0,0 +1,82 @@
+dnl SPARC v9 mpn_mul_1 for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 68
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_mul_1)
+ save %sp, -176, %sp
+ mov 1, %o2
+ mov %i0, %g2
+ srlx %i3, 32, %o4
+ sllx %o2, 32, %o2
+ srl %i3, 0, %i3
+ mov 0, %g3
+ mov 0, %i0
+
+L(top): ldx [%i1+%g3], %g1
+ srl %g1, 0, %g4
+ mulx %g4, %i3, %o5
+ srlx %g1, 32, %g1
+ mulx %g1, %i3, %g5
+ mulx %g4, %o4, %g4
+ mulx %g1, %o4, %g1
+ srlx %o5, 32, %o1
+ add %g5, %o1, %o1
+ addcc %o1, %g4, %g4
+ srl %o5, 0, %o0
+ sllx %g4, 32, %o1
+ add %g1, %o2, %l1
+ movlu %xcc, %l1, %g1
+ add %o1, %o0, %l0
+ addcc %l0, %i0, %g5
+ srlx %g4, 32, %i0
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ stx %g5, [%g2+%g3]
+ add %i2, -1, %i2
+ add %i0, %g1, %i0
+ brnz,pt %i2, L(top)
+ add %g3, 8, %g3
+ return %i7+8
+ nop
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblsh1_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblsh1_n.asm
new file mode 100644
index 0000000..51bd4ab
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblsh1_n.asm
@@ -0,0 +1,41 @@
+dnl SPARC v9 mpn_rsblsh1_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 1)
+define(RSH, 63)
+
+define(func, mpn_rsblsh1_n)
+
+MULFUNC_PROLOGUE(mpn_rsblsh1_n)
+
+include_mpn(`sparc64/ultrasparct1/rsblshC_n.asm')
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblsh2_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblsh2_n.asm
new file mode 100644
index 0000000..f0d208e
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblsh2_n.asm
@@ -0,0 +1,41 @@
+dnl SPARC v9 mpn_rsblsh2_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+define(func, mpn_rsblsh2_n)
+
+MULFUNC_PROLOGUE(mpn_rsblsh2_n)
+
+include_mpn(`sparc64/ultrasparct1/rsblshC_n.asm')
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblshC_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblshC_n.asm
new file mode 100644
index 0000000..7c03e9f
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/rsblshC_n.asm
@@ -0,0 +1,69 @@
+dnl SPARC v9 mpn_rsblshC_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+C cycles/limb
+C UltraSPARC T1: 21
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ mov 0, cy
+ mov 0, %g5
+ cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+
+ sllx %o5, LSH, %g4
+ add n, -1, n
+ or %g5, %g4, %g4
+ srlx %o5, RSH, %g5
+
+ srlx %o4, 32, %g1
+ srlx %g4, 32, %g2
+ subccc %g4, %o4, %g3
+ subccc %g2, %g1, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ subc %g5, %g0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/sub_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/sub_n.asm
new file mode 100644
index 0000000..c2af89f
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/sub_n.asm
@@ -0,0 +1,68 @@
+dnl SPARC v9 mpn_sub_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: ?
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sub_nc)
+ b,a L(ent)
+EPILOGUE()
+PROLOGUE(mpn_sub_n)
+ mov 0, cy
+L(ent): cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+ add n, -1, n
+ srlx %o4, 32, %g1
+ srlx %o5, 32, %g2
+ subccc %o4, %o5, %g3
+ subccc %g1, %g2, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ addc %g0, %g0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/sublsh1_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/sublsh1_n.asm
new file mode 100644
index 0000000..8c8fa80
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/sublsh1_n.asm
@@ -0,0 +1,41 @@
+dnl SPARC v9 mpn_sublsh1_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 1)
+define(RSH, 63)
+
+define(func, mpn_sublsh1_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh1_n)
+
+include_mpn(`sparc64/ultrasparct1/sublshC_n.asm')
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/sublsh2_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/sublsh2_n.asm
new file mode 100644
index 0000000..2fd5eee
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/sublsh2_n.asm
@@ -0,0 +1,41 @@
+dnl SPARC v9 mpn_sublsh2_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+define(func, mpn_sublsh2_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh2_n)
+
+include_mpn(`sparc64/ultrasparct1/sublshC_n.asm')
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/sublshC_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/sublshC_n.asm
new file mode 100644
index 0000000..01eafef
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/sublshC_n.asm
@@ -0,0 +1,69 @@
+dnl SPARC v9 mpn_sublshC_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+C cycles/limb
+C UltraSPARC T1: 21
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ mov 0, cy
+ mov 0, %g5
+ cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+
+ sllx %o5, LSH, %g4
+ add n, -1, n
+ or %g5, %g4, %g4
+ srlx %o5, RSH, %g5
+
+ srlx %o4, 32, %g1
+ srlx %g4, 32, %g2
+ subccc %o4, %g4, %g3
+ subccc %g1, %g2, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ addc %g5, %g0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct1/submul_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct1/submul_1.asm
new file mode 100644
index 0000000..4f553a8
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct1/submul_1.asm
@@ -0,0 +1,86 @@
+dnl SPARC v9 mpn_submul_1 for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 74
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_submul_1)
+ save %sp, -176, %sp
+ mov 1, %o2
+ mov %i0, %g2
+ srlx %i3, 32, %o4
+ sllx %o2, 32, %o2
+ srl %i3, 0, %i3
+ mov 0, %g3
+ mov 0, %i0
+
+L(top): ldx [%i1+%g3], %g1
+ srl %g1, 0, %g4
+ mulx %g4, %i3, %o5
+ srlx %g1, 32, %g1
+ mulx %g1, %i3, %g5
+ mulx %g4, %o4, %g4
+ mulx %g1, %o4, %g1
+ srlx %o5, 32, %o1
+ add %g5, %o1, %o1
+ addcc %o1, %g4, %g4
+ srl %o5, 0, %o0
+ ldx [%g2+%g3], %o5
+ sllx %g4, 32, %o1
+ add %g1, %o2, %l1
+ movlu %xcc, %l1, %g1
+ add %o1, %o0, %l0
+ addcc %l0, %i0, %g5
+ srlx %g4, 32, %i0
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ subcc %o5, %g5, %g5
+ stx %g5, [%g2+%g3]
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ add %i2, -1, %i2
+ add %i0, %g1, %i0
+ brnz,pt %i2, L(top)
+ add %g3, 8, %g3
+ return %i7+8
+ nop
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/add_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/add_n.asm
new file mode 100644
index 0000000..0170746
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/add_n.asm
@@ -0,0 +1,126 @@
+dnl SPARC v9 mpn_add_n for T3/T4.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 8
+C UltraSPARC T4: 3
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`vp', `%i2')
+define(`n', `%i3')
+define(`cy', `%i4')
+
+define(`u0_off', `%l2')
+define(`u1_off', `%l3')
+define(`loop_n', `%l6')
+define(`tmp', `%l7')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_add_nc)
+ save %sp, -176, %sp
+ b,a L(ent)
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+ save %sp, -176, %sp
+
+ mov 0, cy
+L(ent):
+ subcc n, 1, n
+ be L(final_one)
+ cmp %g0, cy
+
+ ldx [up + 0], %o4
+ sllx n, 3, tmp
+
+ ldx [vp + 0], %o5
+ add up, tmp, u0_off
+
+ ldx [up + 8], %g5
+ neg tmp, loop_n
+
+ ldx [vp + 8], %g1
+ add u0_off, 8, u1_off
+
+ sub loop_n, -(2 * 8), loop_n
+
+ brgez,pn loop_n, L(loop_tail)
+ add vp, (2 * 8), vp
+
+ b,a L(top)
+ ALIGN(16)
+L(top):
+ addxccc(%o4, %o5, tmp)
+ ldx [vp + 0], %o5
+
+ add rp, (2 * 8), rp
+ ldx [loop_n + u0_off], %o4
+
+ add vp, (2 * 8), vp
+ stx tmp, [rp - 16]
+
+ addxccc(%g1, %g5, tmp)
+ ldx [vp - 8], %g1
+
+ ldx [loop_n + u1_off], %g5
+ sub loop_n, -(2 * 8), loop_n
+
+ brlz loop_n, L(top)
+ stx tmp, [rp - 8]
+
+L(loop_tail):
+ addxccc(%o4, %o5, %g3)
+ add loop_n, u0_off, up
+
+ addxccc(%g1, %g5, %g5)
+ stx %g3, [rp + 0]
+
+ brgz,pt loop_n, L(done)
+ stx %g5, [rp + 8]
+
+ add rp, (2 * 8), rp
+L(final_one):
+ ldx [up+0], %o4
+ ldx [vp+0], %o5
+ addxccc(%o4, %o5, %g3)
+ stx %g3, [rp+0]
+
+L(done):
+ addxc(%g0, %g0, %i0)
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/addmul_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/addmul_1.asm
new file mode 100644
index 0000000..939811e
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/addmul_1.asm
@@ -0,0 +1,182 @@
+dnl SPARC v9 mpn_addmul_1 for T3/T4/T5.
+
+dnl Contributed to the GNU project by David Miller and Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 26
+C UltraSPARC T4: 4.5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+define(`u0', `%l0')
+define(`u1', `%l1')
+define(`u2', `%l2')
+define(`u3', `%l3')
+define(`r0', `%l4')
+define(`r1', `%l5')
+define(`r2', `%l6')
+define(`r3', `%l7')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_addmul_1)
+ save %sp, -176, %sp
+ ldx [up+0], %g1
+
+ and n, 3, %g3
+ brz %g3, L(b0)
+ addcc %g0, %g0, %g5 C clear carry limb, flag
+ cmp %g3, 2
+ bcs %xcc, L(b01)
+ nop
+ be %xcc, L(b10)
+ ldx [up+8], %g5
+
+L(b11): ldx [up+16], u3
+ mulx %g1, v0, %o2
+ umulxhi(%g1, v0, %o3)
+ ldx [rp+0], r1
+ mulx %g5, v0, %o4
+ ldx [rp+8], r2
+ umulxhi(%g5, v0, %o5)
+ ldx [rp+16], r3
+ mulx u3, v0, %g4
+ umulxhi(u3, v0, %g5)
+ addcc %o3, %o4, %o4
+ addxccc(%o5, %g4, %g4)
+ addxc( %g0, %g5, %g5)
+ addcc r1, %o2, r1
+ stx r1, [rp+0]
+ addxccc(r2, %o4, r2)
+ stx r2, [rp+8]
+ addxccc(r3, %g4, r3)
+ stx r3, [rp+16]
+ add n, -3, n
+ add up, 24, up
+ brz n, L(xit)
+ add rp, 24, rp
+ b L(com)
+ nop
+
+L(b10): mulx %g1, v0, %o4
+ ldx [rp+0], r2
+ umulxhi(%g1, v0, %o5)
+ ldx [rp+8], r3
+ mulx %g5, v0, %g4
+ umulxhi(%g5, v0, %g5)
+ addcc %o5, %g4, %g4
+ addxc( %g0, %g5, %g5)
+ addcc r2, %o4, r2
+ stx r2, [rp+0]
+ addxccc(r3, %g4, r3)
+ stx r3, [rp+8]
+ add n, -2, n
+ add up, 16, up
+ brz n, L(xit)
+ add rp, 16, rp
+ b L(com)
+ nop
+
+L(b01): ldx [rp+0], r3
+ mulx %g1, v0, %g4
+ umulxhi(%g1, v0, %g5)
+ addcc r3, %g4, r3
+ stx r3, [rp+0]
+ add n, -1, n
+ add up, 8, up
+ brz n, L(xit)
+ add rp, 8, rp
+
+L(com): ldx [up+0], %g1
+L(b0): ldx [up+8], u1
+ ldx [up+16], u2
+ ldx [up+24], u3
+ mulx %g1, v0, %o0
+ umulxhi(%g1, v0, %o1)
+ b L(lo0)
+ nop
+
+ ALIGN(16)
+L(top): ldx [up+0], u0
+ addxc( %g0, %g5, %g5) C propagate carry into carry limb
+ ldx [up+8], u1
+ addcc r0, %o0, r0
+ ldx [up+16], u2
+ addxccc(r1, %o2, r1)
+ ldx [up+24], u3
+ addxccc(r2, %o4, r2)
+ stx r0, [rp-32]
+ addxccc(r3, %g4, r3)
+ stx r1, [rp-24]
+ mulx u0, v0, %o0
+ stx r2, [rp-16]
+ umulxhi(u0, v0, %o1)
+ stx r3, [rp-8]
+L(lo0): mulx u1, v0, %o2
+ ldx [rp+0], r0
+ umulxhi(u1, v0, %o3)
+ ldx [rp+8], r1
+ mulx u2, v0, %o4
+ ldx [rp+16], r2
+ umulxhi(u2, v0, %o5)
+ ldx [rp+24], r3
+ mulx u3, v0, %g4
+ addxccc(%g5, %o0, %o0)
+ umulxhi(u3, v0, %g5)
+ add up, 32, up
+ addxccc(%o1, %o2, %o2)
+ add rp, 32, rp
+ addxccc(%o3, %o4, %o4)
+ add n, -4, n
+ addxccc(%o5, %g4, %g4)
+ brgz n, L(top)
+ nop
+
+ addxc( %g0, %g5, %g5)
+ addcc r0, %o0, r0
+ stx r0, [rp-32]
+ addxccc(r1, %o2, r1)
+ stx r1, [rp-24]
+ addxccc(r2, %o4, r2)
+ stx r2, [rp-16]
+ addxccc(r3, %g4, r3)
+ stx r3, [rp-8]
+L(xit): addxc( %g0, %g5, %i0)
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_2.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_2.asm
new file mode 100644
index 0000000..ccc6a44
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_2.asm
@@ -0,0 +1,228 @@
+dnl SPARC v9 mpn_mul_2 and mpn_addmul_2 for T3/T4/T5.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb cycles/limb
+C mul_2 addmul_2
+C UltraSPARC T3: 22.5 23.5
+C UltraSPARC T4: 3.25 3.75
+
+
+C The code is reasonably scheduled but also relies on OoO. There was hope that
+C this could run at around 3.0 and 3.5 c/l respectively, on T4. Two cycles per
+C iteration needs to be removed.
+C
+C We could almost use 2-way unrolling, but currently the wN registers live too
+C long. By changing add x,w1,w1 to add x,w1,w0, i.e. migrate the values down-
+C wards, 2-way unrolling should become possible. With n-indexed addressing it
+C should run no slower.
+C
+C The rp loads to g1/g3 are very much over-scheduled. Presumably, they could
+C be postponed a full way, and then just one register could be used.
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`vp', `%i3')
+
+define(`v0', `%o0')
+define(`v1', `%o1')
+
+define(`w0', `%o2')
+define(`w1', `%o3')
+define(`w2', `%o4')
+define(`w3', `%o5')
+
+ifdef(`OPERATION_mul_2',`
+ define(`AM2', `')
+ define(`ADDX', `addcc`'$1')
+ define(`func', `mpn_mul_2')
+')
+ifdef(`OPERATION_addmul_2',`
+ define(`AM2', `$1')
+ define(`ADDX', `addxccc($1,$2,$3)')
+ define(`func', `mpn_addmul_2')
+')
+
+
+MULFUNC_PROLOGUE(mpn_mul_2 mpn_addmul_2)
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ save %sp, -176, %sp
+
+ ldx [vp+0], v0 C load v0
+ and n, 3, %g5
+ ldx [vp+8], v1 C load v1
+ add n, -6, n
+ ldx [up+0], %g4
+ brz %g5, L(b0)
+ cmp %g5, 2
+ bcs L(b1)
+ nop
+ be L(b2)
+ nop
+
+L(b3):
+AM2(` ldx [rp+0], %g1')
+ mulx %g4, v0, w2
+ umulxhi(%g4, v0, w3)
+ ldx [up+8], %i5
+ mulx %g4, v1, %l3
+ umulxhi(%g4, v1, %l7)
+AM2(` ldx [rp+8], %g3')
+ add up, -8, up
+ add rp, -8, rp
+ b L(lo3)
+ mov 0, w0
+
+L(b2):
+AM2(` ldx [rp+0], %g3')
+ mulx %g4, v0, w3
+ umulxhi(%g4, v0, w0)
+ ldx [up+8], %i4
+ mulx %g4, v1, %l1
+ umulxhi(%g4, v1, %l5)
+AM2(` ldx [rp+8], %g1')
+ add rp, 16, rp
+ brlz n, L(end)
+ mov 0, w1
+ ba L(top)
+ add up, 16, up
+
+L(b1):
+AM2(` ldx [rp+0], %g1')
+ mulx %g4, v0, w0
+ umulxhi(%g4, v0, w1)
+ ldx [up+8], %i5
+ mulx %g4, v1, %l3
+ umulxhi(%g4, v1, %l7)
+AM2(` ldx [rp+8], %g3')
+ add up, 8, up
+ add rp, 8, rp
+ b L(lo1)
+ mov 0, w2
+
+L(b0):
+AM2(` ldx [rp+0], %g3')
+ mulx %g4, v0, w1
+ umulxhi(%g4, v0, w2)
+ ldx [up+8], %i4
+ mulx %g4, v1, %l1
+ umulxhi(%g4, v1, %l5)
+AM2(` ldx [rp+8], %g1')
+ b L(lo0)
+ mov 0, w3
+
+ ALIGN(16) C cycle
+L(top): mulx %i4, v0, %l2 C 0->5
+ umulxhi(%i4, v0, %l6) C 0->5
+ ldx [up+0], %i5 C 1->6
+AM2(` addcc w3, %g3, w3') C 1
+ stx w3, [rp-16] C 2
+ ADDX(` %l1, w0, w0') C 2
+ addxccc(%l5, w1, w1) C 3
+ mulx %i4, v1, %l3 C 3->9
+ umulxhi(%i4, v1, %l7) C 4->9
+AM2(` ldx [rp+0], %g3') C 4
+ addcc %l2, w0, w0 C 5
+ addxccc(%l6, w1, w1) C 5
+ addxc( %g0, %g0, w2) C 6
+L(lo1): mulx %i5, v0, %l0 C 6
+ umulxhi(%i5, v0, %l4) C 7
+ ldx [up+8], %i4 C 7
+AM2(` addcc w0, %g1, w0') C 8
+ stx w0, [rp-8] C 8
+ ADDX(` %l3, w1, w1') C 9
+ addxccc(%l7, w2, w2) C 9
+ mulx %i5, v1, %l1 C 10
+ umulxhi(%i5, v1, %l5) C 10
+AM2(` ldx [rp+8], %g1') C 11
+ addcc %l0, w1, w1 C 11
+ addxccc(%l4, w2, w2) C 12
+ addxc( %g0, %g0, w3) C 12
+L(lo0): mulx %i4, v0, %l2 C 13
+ umulxhi(%i4, v0, %l6) C 13
+ ldx [up+16], %i5 C 14
+AM2(` addcc w1, %g3, w1') C 14
+ stx w1, [rp+0] C 15
+ ADDX(` %l1, w2, w2') C 15
+ addxccc(%l5, w3, w3) C 16
+ mulx %i4, v1, %l3 C 16
+ umulxhi(%i4, v1, %l7) C 17
+AM2(` ldx [rp+16], %g3') C 17
+ addcc %l2, w2, w2 C 18
+ addxccc(%l6, w3, w3) C 18
+ addxc( %g0, %g0, w0) C 19
+L(lo3): mulx %i5, v0, %l0 C 19
+ umulxhi(%i5, v0, %l4) C 20
+ ldx [up+24], %i4 C 20
+AM2(` addcc w2, %g1, w2') C 21
+ stx w2, [rp+8] C 21
+ ADDX(` %l3, w3, w3') C 22
+ addxccc(%l7, w0, w0) C 22
+ mulx %i5, v1, %l1 C 23
+ umulxhi(%i5, v1, %l5) C 23
+AM2(` ldx [rp+24], %g1') C 24
+ addcc %l0, w3, w3 C 24
+ addxccc(%l4, w0, w0) C 25
+ addxc( %g0, %g0, w1) C 25
+ add up, 32, up
+ add rp, 32, rp
+ brgz n, L(top)
+ add n, -4, n
+
+L(end): mulx %i4, v0, %l2
+ umulxhi(%i4, v0, %l6)
+AM2(` addcc w3, %g3, w3')
+ stx w3, [rp-16]
+ ADDX(` %l1, w0, w0')
+ addxccc(%l5, w1, w1)
+ mulx %i4, v1, %l3
+ umulxhi(%i4, v1, %l7)
+ addcc %l2, w0, w0
+ addxccc(%l6, w1, w1)
+ addxc( %g0, %g0, w2)
+AM2(` addcc w0, %g1, w0')
+ stx w0, [rp-8]
+ ADDX(` %l3, w1, w1')
+ stx w1, [rp+0]
+ addxc(%l7, w2, %i0)
+
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_4.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_4.asm
new file mode 100644
index 0000000..845f6d6
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/aormul_4.asm
@@ -0,0 +1,219 @@
+dnl SPARC v9 mpn_mul_4 and mpn_addmul_4 for T3/T4/T5.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb cycles/limb
+C mul_4 addmul_4
+C UltraSPARC T3: 21.5 22.0
+C UltraSPARC T4: 2.625 2.75
+
+
+C The code is well-scheduled and relies on OoO very little. There is hope that
+C this will run at around 2.5 and 2.75 c/l respectively, on T4.
+
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`vp', `%i3')
+
+define(`v0', `%g1')
+define(`v1', `%o7')
+define(`v2', `%g2')
+define(`v3', `%i3')
+
+define(`w0', `%o0')
+define(`w1', `%o1')
+define(`w2', `%o2')
+define(`w3', `%o3')
+define(`w4', `%o4')
+
+define(`r0', `%o5')
+
+define(`u0', `%i4')
+define(`u1', `%i5')
+
+define(`rp0', `rp')
+define(`rp1', `%g3')
+define(`rp2', `%g4')
+define(`up0', `up')
+define(`up1', `%g5')
+
+ifdef(`OPERATION_mul_4',`
+ define(`AM4', `')
+ define(`ADDX', `addcc`'$1')
+ define(`func', `mpn_mul_4')
+')
+ifdef(`OPERATION_addmul_4',`
+ define(`AM4', `$1')
+ define(`ADDX', `addxccc($1,$2,$3)')
+ define(`func', `mpn_addmul_4')
+')
+
+
+MULFUNC_PROLOGUE(mpn_mul_4 mpn_addmul_4)
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ save %sp, -176, %sp
+
+ ldx [up + 0], u1 C load up[0] early
+ andcc n, 1, %g0 C is n odd?
+ ldx [vp + 0], v0
+ sllx n, 3, n
+ ldx [vp + 8], v1
+ add n, -28, n
+ ldx [vp + 16], v2
+ add rp, -16, rp
+ ldx [vp + 24], v3
+ add up, n, up0
+ add rp, n, rp0
+ add up0, 8, up1
+ add rp0, 8, rp1
+ add rp0, 16, rp2
+ mulx u1, v0, %l0
+ mov 0, w0
+ mulx u1, v1, %l1
+ mov 0, w1
+ mulx u1, v2, %l2
+ mov 0, w2
+ mulx u1, v3, %l3
+ mov 0, w3
+
+ be L(evn)
+ neg n, n
+
+L(odd): mov u1, u0
+ ldx [up1 + n], u1
+AM4(` ldx [rp2 + n], r0')
+ umulxhi(u0, v0, %l4)
+ umulxhi(u0, v1, %l5)
+ umulxhi(u0, v2, %l6)
+ umulxhi(u0, v3, %l7)
+ b L(mid)
+ add n, 8, n
+
+L(evn): ldx [up1 + n], u0
+AM4(` ldx [rp2 + n], r0')
+ umulxhi(u1, v0, %l4)
+ umulxhi(u1, v1, %l5)
+ umulxhi(u1, v2, %l6)
+ umulxhi(u1, v3, %l7)
+ add n, 16, n
+
+ ALIGN(16)
+L(top): addcc %l0, w0, w0
+ mulx u0, v0, %l0 C w 0
+ addxccc(%l1, w1, w1)
+ mulx u0, v1, %l1 C w 1
+ addxccc(%l2, w2, w2)
+ mulx u0, v2, %l2 C w 2
+ addxccc(%l3, w3, w3)
+ mulx u0, v3, %l3 C w 3
+ ldx [up0 + n], u1
+ addxc( %g0, %g0, w4)
+AM4(` addcc r0, w0, w0')
+ stx w0, [rp0 + n]
+ ADDX(` %l4, w1, w0')
+ umulxhi(u0, v0, %l4) C w 1
+AM4(` ldx [rp1 + n], r0')
+ addxccc(%l5, w2, w1)
+ umulxhi(u0, v1, %l5) C w 2
+ addxccc(%l6, w3, w2)
+ umulxhi(u0, v2, %l6) C w 3
+ addxc( %l7, w4, w3)
+ umulxhi(u0, v3, %l7) C w 4
+L(mid): addcc %l0, w0, w0
+ mulx u1, v0, %l0 C w 1
+ addxccc(%l1, w1, w1)
+ mulx u1, v1, %l1 C w 2
+ addxccc(%l2, w2, w2)
+ mulx u1, v2, %l2 C w 3
+ addxccc(%l3, w3, w3)
+ mulx u1, v3, %l3 C w 4
+ ldx [up1 + n], u0
+ addxc( %g0, %g0, w4)
+AM4(` addcc r0, w0, w0')
+ stx w0, [rp1 + n]
+ ADDX(` %l4, w1, w0')
+ umulxhi(u1, v0, %l4) C w 2
+AM4(` ldx [rp2 + n], r0')
+ addxccc(%l5, w2, w1)
+ umulxhi(u1, v1, %l5) C w 3
+ addxccc(%l6, w3, w2)
+ umulxhi(u1, v2, %l6) C w 4
+ addxc( %l7, w4, w3)
+ umulxhi(u1, v3, %l7) C w 5
+ brlz n, L(top)
+ add n, 16, n
+
+L(end): addcc %l0, w0, w0
+ mulx u0, v0, %l0
+ addxccc(%l1, w1, w1)
+ mulx u0, v1, %l1
+ addxccc(%l2, w2, w2)
+ mulx u0, v2, %l2
+ addxccc(%l3, w3, w3)
+ mulx u0, v3, %l3
+ addxc( %g0, %g0, w4)
+AM4(` addcc r0, w0, w0')
+ stx w0, [rp0 + n]
+ ADDX(` %l4, w1, w0')
+ umulxhi(u0, v0, %l4)
+AM4(` ldx [rp1 + n], r0')
+ addxccc(%l5, w2, w1)
+ umulxhi(u0, v1, %l5)
+ addxccc(%l6, w3, w2)
+ umulxhi(u0, v2, %l6)
+ addxc( %l7, w4, w3)
+ umulxhi(u0, v3, %l7)
+ addcc %l0, w0, w0
+ addxccc(%l1, w1, w1)
+ addxccc(%l2, w2, w2)
+ addxccc(%l3, w3, w3)
+ addxc( %g0, %g0, w4)
+AM4(` addcc r0, w0, w0')
+ stx w0, [rp1 + n]
+ ADDX(` %l4, w1, w0')
+ addxccc(%l5, w2, w1)
+ addxccc(%l6, w3, w2)
+ stx w0, [rp2 + n]
+ add n, 16, n
+ stx w1, [rp1 + n]
+ stx w2, [rp2 + n]
+ addxc( %l7, w4, %i0)
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/aorslsh_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/aorslsh_n.asm
new file mode 100644
index 0000000..1014b1b
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/aorslsh_n.asm
@@ -0,0 +1,147 @@
+dnl SPARC v9 mpn_addlsh_n and mpn_sublsh_n for T3/T4/T5.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 11
+C UltraSPARC T4: 4
+
+C For sublsh_n we combine the two shifted limbs using xnor, using the identity
+C (a xor not b) = (not (a xor b)) which equals (not (a or b)) when (a and b) =
+C 0 as it is in our usage. This gives us the ones complement for free.
+C Unfortunately, the same trick will not work for rsblsh_n, which will instead
+C require a separate negation.
+C
+C FIXME: Add rsblsh_n to this file.
+
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`vp', `%i2')
+define(`n', `%i3')
+define(`cnt',`%i4')
+
+define(`tnc',`%o5')
+
+ifdef(`OPERATION_addlsh_n',`
+ define(`INITCY', `subcc %g0, 0, %g0')
+ define(`MERGE', `or')
+ define(`func', `mpn_addlsh_n')
+')
+ifdef(`OPERATION_sublsh_n',`
+ define(`INITCY', `subcc %g0, 1, %g0')
+ define(`MERGE', `xnor')
+ define(`func', `mpn_sublsh_n')
+')
+
+define(`rp0', `rp')
+define(`rp1', `%o2')
+define(`up0', `up')
+define(`up1', `%o3')
+define(`vp0', `vp')
+define(`vp1', `%o4')
+
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_sublsh_n)
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ save %sp, -176, %sp
+ mov 64, tnc
+ sub tnc, cnt, tnc
+
+ andcc n, 1, %g0
+ sllx n, 3, n
+ add n, -16, n
+ add up, n, up0
+ add vp, n, vp0
+ add rp, n, rp0
+ add up0, 8, up1
+ add vp0, 8, vp1
+ add rp0, -8, rp1
+ add rp0, -16, rp0
+ neg n, n
+ be L(evn)
+ INITCY
+
+L(odd): ldx [vp0 + n], %l1
+ mov 0, %l2
+ ldx [up0 + n], %l5
+ sllx %l1, cnt, %g3
+ brgez n, L(wd1)
+ add n, 8, n
+ ldx [vp0 + n], %l0
+ b L(lo1)
+ sllx %l1, cnt, %g3
+
+L(evn): ldx [vp0 + n], %l0
+ mov 0, %l3
+ ldx [up0 + n], %l4
+ ldx [vp1 + n], %l1
+ b L(lo0)
+ sllx %l0, cnt, %g1
+
+L(top): addxccc(%l6, %l4, %o0)
+ ldx [vp0 + n], %l0
+ sllx %l1, cnt, %g3
+ stx %o0, [rp0 + n]
+L(lo1): srlx %l1, tnc, %l3
+ MERGE %l2, %g3, %l7
+ ldx [up0 + n], %l4
+ addxccc(%l7, %l5, %o1)
+ ldx [vp1 + n], %l1
+ sllx %l0, cnt, %g1
+ stx %o1, [rp1 + n]
+L(lo0): srlx %l0, tnc, %l2
+ MERGE %l3, %g1, %l6
+ ldx [up1 + n], %l5
+ brlz,pt n, L(top)
+ add n, 16, n
+
+ addxccc(%l6, %l4, %o0)
+ sllx %l1, cnt, %g3
+ stx %o0, [rp0 + n]
+L(wd1): srlx %l1, tnc, %l3
+ MERGE %l2, %g3, %l7
+ addxccc(%l7, %l5, %o1)
+ stx %o1, [rp1 + n]
+
+ifdef(`OPERATION_addlsh_n',
+` addxc( %l3, %g0, %i0)')
+ifdef(`OPERATION_sublsh_n',
+` addxc( %g0, %g0, %g1)
+ add %g1, -1, %g1
+ sub %l3, %g1, %i0')
+
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm
new file mode 100644
index 0000000..550860d
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm
@@ -0,0 +1,147 @@
+dnl SPARC T3/T4/T5 mpn_bdiv_dbm1c.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 25
+C UltraSPARC T4/T5: 4
+
+C INPUT PARAMETERS
+define(`qp', `%i0')
+define(`ap', `%i1')
+define(`n', `%i2')
+define(`bd', `%i3')
+define(`h', `%i4')
+
+define(`plo0',`%g4') define(`plo1',`%g5')
+define(`phi0',`%l0') define(`phi1',`%l1')
+define(`a0', `%g1') define(`a1', `%g3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_bdiv_dbm1c)
+ save %sp, -176, %sp
+
+ and n, 3, %g5
+ ldx [ap + 0], %g2
+ add n, -5, n
+ brz %g5, L(b0)
+ cmp %g5, 2
+ bcs %xcc, L(b1)
+ nop
+ be %xcc, L(b2)
+ nop
+
+L(b3): ldx [ap + 8], a0
+ mulx bd, %g2, plo1
+ umulxhi(bd, %g2, phi1)
+ ldx [ap + 16], a1
+ add qp, -24, qp
+ b L(lo3)
+ add ap, -8, ap
+
+L(b2): ldx [ap + 8], a1
+ mulx bd, %g2, plo0
+ umulxhi(bd, %g2, phi0)
+ brlz,pt n, L(wd2)
+ nop
+L(gt2): ldx [ap + 16], a0
+ add ap, 16, ap
+ b L(lo2)
+ add n, -1, n
+
+L(b1): mulx bd, %g2, plo1
+ umulxhi(bd, %g2, phi1)
+ brlz,pn n, L(wd1)
+ add qp, -8, qp
+L(gt1): ldx [ap + 8], a0
+ ldx [ap + 16], a1
+ b L(lo1)
+ add ap, 8, ap
+
+L(b0): ldx [ap + 8], a1
+ mulx bd, %g2, plo0
+ umulxhi(bd, %g2, phi0)
+ ldx [ap + 16], a0
+ b L(lo0)
+ add qp, -16, qp
+
+L(top): ldx [ap + 0], a0
+ sub h, phi1, h
+L(lo2): mulx bd, a1, plo1
+ umulxhi(bd, a1, phi1)
+ subcc h, plo0, h
+ addxc( phi0, %g0, phi0)
+ stx h, [qp + 0]
+ ldx [ap + 8], a1
+ sub h, phi0, h
+L(lo1): mulx bd, a0, plo0
+ umulxhi(bd, a0, phi0)
+ subcc h, plo1, h
+ addxc( phi1, %g0, phi1)
+ stx h, [qp + 8]
+ ldx [ap + 16], a0
+ sub h, phi1, h
+L(lo0): mulx bd, a1, plo1
+ umulxhi(bd, a1, phi1)
+ subcc h, plo0, h
+ addxc( phi0, %g0, phi0)
+ stx h, [qp + 16]
+ ldx [ap + 24], a1
+ sub h, phi0, h
+L(lo3): mulx bd, a0, plo0
+ umulxhi(bd, a0, phi0)
+ subcc h, plo1, h
+ addxc( phi1, %g0, phi1)
+ stx h, [qp + 24]
+ add ap, 32, ap
+ add qp, 32, qp
+ brgz,pt n, L(top)
+ add n, -4, n
+
+L(end): sub h, phi1, h
+L(wd2): mulx bd, a1, plo1
+ umulxhi(bd, a1, phi1)
+ subcc h, plo0, h
+ addxc( phi0, %g0, phi0)
+ stx h, [qp + 0]
+ sub h, phi0, h
+L(wd1): subcc h, plo1, h
+ addxc( phi1, %g0, phi1)
+ stx h, [qp + 8]
+ sub h, phi1, %i0
+
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_q_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_q_1.asm
new file mode 100644
index 0000000..9847047
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/bdiv_q_1.asm
@@ -0,0 +1,137 @@
+dnl SPARC T3/T4/T5 mpn_bdiv_q_1.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 31
+C UltraSPARC T4/T5: 20-26 hits 20 early, then sharply drops
+
+C INPUT PARAMETERS
+define(`qp', `%i0')
+define(`ap', `%i1')
+define(`n', `%i2')
+define(`d', `%i3')
+define(`dinv',`%i4')
+define(`cnt', `%i5')
+
+define(`tnc', `%o2')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_bdiv_q_1)
+ save %sp, -176, %sp
+ ldx [ap], %o5
+ add d, -1, %g1
+ andn %g1, d, %g1
+ popc %g1, cnt
+
+ srlx d, cnt, d
+ srlx d, 1, %g1
+ and %g1, 127, %g1
+ LEA64(binvert_limb_table, g2, g4)
+ ldub [%g2+%g1], %g1
+ add %g1, %g1, %g2
+ mulx %g1, %g1, %g1
+ mulx %g1, d, %g1
+ sub %g2, %g1, %g2
+ add %g2, %g2, %g1
+ mulx %g2, %g2, %g2
+ mulx %g2, d, %g2
+ sub %g1, %g2, %g1
+ add %g1, %g1, %o7
+ mulx %g1, %g1, %g1
+ mulx %g1, d, %g1
+ add n, -2, n
+ brz,pt cnt, L(norm)
+ sub %o7, %g1, dinv
+
+ brlz,pt n, L(edu)
+ srlx %o5, cnt, %o5
+ b L(eee)
+ mov 0, %g4
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+ save %sp, -176, %sp
+ ldx [ap], %o5
+
+ brz,pt cnt, L(norm)
+ add n, -2, n
+
+L(unorm):
+ brlz,pt n, L(edu)
+ srlx %o5, cnt, %o5
+ mov 0, %g4
+L(eee): sub %g0, cnt, tnc
+
+L(tpu): ldx [ap+8], %g3
+ add ap, 8, ap
+ sllx %g3, tnc, %g5
+ or %g5, %o5, %g5
+ srlx %g3, cnt, %o5
+ subcc %g5, %g4, %g4
+ mulx %g4, dinv, %g1
+ stx %g1, [qp]
+ add qp, 8, qp
+ umulxhi(d, %g1, %g1)
+ addxc( %g1, %g0, %g4)
+ brgz,pt n, L(tpu)
+ add n, -1, n
+
+ sub %o5, %g4, %o5
+L(edu): mulx %o5, dinv, %g1
+ return %i7+8
+ stx %g1, [%o0]
+
+L(norm):
+ mulx dinv, %o5, %g1
+ brlz,pt n, L(edn)
+ stx %g1, [qp]
+ add qp, 8, qp
+ addcc %g0, 0, %g4
+
+L(tpn): umulxhi(d, %g1, %g1)
+ ldx [ap+8], %g5
+ add ap, 8, ap
+ addxc( %g1, %g0, %g1)
+ subcc %g5, %g1, %g1
+ mulx %g1, dinv, %g1
+ stx %g1, [qp]
+ add qp, 8, qp
+ brgz,pt n, L(tpn)
+ add n, -1, n
+
+L(edn): return %i7+8
+ nop
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm
new file mode 100644
index 0000000..49ccaec
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm
@@ -0,0 +1,145 @@
+dnl SPARC v9 mpn_cnd_add_n and mpn_cnd_sub_n for T3/T4/T5.
+
+dnl Contributed to the GNU project by David Miller and Torbjörn Granlund.
+
+dnl Copyright 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 8.5
+C UltraSPARC T4: 3
+
+C We use a double-pointer trick to allow indexed addressing. Its setup
+C cost might be a problem in these functions, since we don't expect huge n
+C arguments.
+C
+C For sub we need ~(a & mask) = (~a | ~mask) but by complementing mask we can
+C instead do ~(a & ~mask) = (~a | mask), allowing us to use the orn insn.
+
+C INPUT PARAMETERS
+define(`cnd', `%i0')
+define(`rp', `%i1')
+define(`up', `%i2')
+define(`vp', `%i3')
+define(`n', `%i4')
+
+define(`mask', `cnd')
+define(`up0', `%l0') define(`up1', `%l1')
+define(`vp0', `%l2') define(`vp1', `%l3')
+define(`rp0', `%g4') define(`rp1', `%g5')
+define(`u0', `%l4') define(`u1', `%l5')
+define(`v0', `%l6') define(`v1', `%l7')
+define(`x0', `%g1') define(`x1', `%g3')
+define(`w0', `%g1') define(`w1', `%g3')
+
+ifdef(`OPERATION_cnd_add_n',`
+ define(`LOGOP', `and $1, $2, $3')
+ define(`MAKEMASK',`cmp %g0, $1
+ addxc( %g0, %g0, $2)
+ neg $2, $2')
+ define(`INITCY', `addcc %g0, 0, %g0')
+ define(`RETVAL', `addxc( %g0, %g0, %i0)')
+ define(`func', `mpn_cnd_add_n')
+')
+ifdef(`OPERATION_cnd_sub_n',`
+ define(`LOGOP', `orn $2, $1, $3')
+ define(`MAKEMASK',`cmp $1, 1
+ addxc( %g0, %g0, $2)
+ neg $2, $2')
+ define(`INITCY', `subcc %g0, 1, %g0')
+ define(`RETVAL', `addxc( %g0, %g0, %i0)
+ xor %i0, 1, %i0')
+ define(`func', `mpn_cnd_sub_n')
+')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ save %sp, -176, %sp
+
+ MAKEMASK(cnd,mask)
+
+ andcc n, 1, %g0
+ sllx n, 3, n
+ add n, -16, n
+ add vp, n, vp0
+ add up, n, up0
+ add rp, n, rp0
+ neg n, n
+ be L(evn)
+ INITCY
+
+L(odd): ldx [vp0 + n], v1
+ ldx [up0 + n], u1
+ LOGOP( v1, mask, x1)
+ addxccc(u1, x1, w1)
+ stx w1, [rp0 + n]
+ add n, 8, n
+ brgz n, L(rtn)
+ nop
+
+L(evn): add vp0, 8, vp1
+ add up0, 8, up1
+ add rp0, -24, rp1
+ ldx [vp0 + n], v0
+ ldx [vp1 + n], v1
+ ldx [up0 + n], u0
+ ldx [up1 + n], u1
+ add n, 16, n
+ brgz n, L(end)
+ add rp0, -16, rp0
+
+L(top): LOGOP( v0, mask, x0)
+ ldx [vp0 + n], v0
+ LOGOP( v1, mask, x1)
+ ldx [vp1 + n], v1
+ addxccc(u0, x0, w0)
+ ldx [up0 + n], u0
+ addxccc(u1, x1, w1)
+ ldx [up1 + n], u1
+ stx w0, [rp0 + n]
+ add n, 16, n
+ brlez n, L(top)
+ stx w1, [rp1 + n]
+
+L(end): LOGOP( v0, mask, x0)
+ LOGOP( v1, mask, x1)
+ addxccc(u0, x0, w0)
+ addxccc(u1, x1, w1)
+ stx w0, [rp0 + n]
+ stx w1, [rp1 + 32]
+
+L(rtn): RETVAL
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm
new file mode 100644
index 0000000..d7dbdf9
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm
@@ -0,0 +1,129 @@
+dnl SPARC T3/T4/T5 mpn_divexact_1.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 31
+C UltraSPARC T4/T5: 20-26 hits 20 early, then sharply drops
+
+C INPUT PARAMETERS
+define(`qp', `%i0')
+define(`ap', `%i1')
+define(`n', `%i2')
+define(`d', `%i3')
+
+define(`dinv',`%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_divexact_1)
+ save %sp, -176, %sp
+ cmp n, 1
+ bne,pt %xcc, L(gt1)
+ ldx [ap], %o5
+ udivx %o5, d, %g1
+ stx %g1, [qp]
+ return %i7+8
+ nop
+
+L(gt1): add d, -1, %g1
+ andn %g1, d, %g1
+ popc %g1, %i4 C i4 = count_trailing_zeros(d)
+
+ srlx d, %i4, d
+ srlx d, 1, %g1
+ and %g1, 127, %g1
+
+ LEA64(binvert_limb_table, g2, g4)
+ ldub [%g2+%g1], %g1
+ add %g1, %g1, %g2
+ mulx %g1, %g1, %g1
+ mulx %g1, d, %g1
+ sub %g2, %g1, %g2
+ add %g2, %g2, %g1
+ mulx %g2, %g2, %g2
+ mulx %g2, d, %g2
+ sub %g1, %g2, %g1
+ add %g1, %g1, %o7
+ mulx %g1, %g1, %g1
+ mulx %g1, d, %g1
+ add n, -2, n
+ brz,pt %i4, L(norm)
+ sub %o7, %g1, dinv
+
+L(unnorm):
+ mov 0, %g4
+ sub %g0, %i4, %o2
+ srlx %o5, %i4, %o5
+L(top_unnorm):
+ ldx [ap+8], %g3
+ add ap, 8, ap
+ sllx %g3, %o2, %g5
+ or %g5, %o5, %g5
+ srlx %g3, %i4, %o5
+ subcc %g5, %g4, %g4
+ mulx %g4, dinv, %g1
+ stx %g1, [qp]
+ add qp, 8, qp
+ umulxhi(d, %g1, %g1)
+ addxc( %g1, %g0, %g4)
+ brgz,pt n, L(top_unnorm)
+ add n, -1, n
+
+ sub %o5, %g4, %g4
+ mulx %g4, dinv, %g1
+ stx %g1, [qp]
+ return %i7+8
+ nop
+
+L(norm):
+ mulx dinv, %o5, %g1
+ stx %g1, [qp]
+ add qp, 8, qp
+ addcc %g0, 0, %g4
+L(top_norm):
+ umulxhi(d, %g1, %g1)
+ ldx [ap+8], %g5
+ add ap, 8, ap
+ addxc( %g1, %g0, %g1)
+ subcc %g5, %g1, %g1
+ mulx %g1, dinv, %g1
+ stx %g1, [qp]
+ add qp, 8, qp
+ brgz,pt n, L(top_norm)
+ add n, -1, n
+
+ return %i7+8
+ nop
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/hamdist.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/hamdist.asm
new file mode 100644
index 0000000..20ed8bf
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/hamdist.asm
@@ -0,0 +1,78 @@
+dnl SPARC v9 mpn_hamdist for T3/T4.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 18
+C UltraSPARC T4: 3.5
+
+C INPUT PARAMETERS
+define(`up', `%o0')
+define(`vp', `%o1')
+define(`n', `%o2')
+define(`pcnt', `%o5')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_hamdist)
+ subcc n, 1, n
+ be L(final_one)
+ clr pcnt
+L(top):
+ ldx [up + 0], %g1
+ ldx [vp + 0], %g2
+ ldx [up + 8], %o4
+ ldx [vp + 8], %g3
+ sub n, 2, n
+ xor %g1, %g2, %g1
+ add up, 16, up
+ popc %g1, %g2
+ add vp, 16, vp
+ xor %o4, %g3, %o4
+ add pcnt, %g2, pcnt
+ popc %o4, %g3
+ brgz n, L(top)
+ add pcnt, %g3, pcnt
+ brlz,pt n, L(done)
+ nop
+L(final_one):
+ ldx [up + 0], %g1
+ ldx [vp + 0], %g2
+ xor %g1,%g2, %g1
+ popc %g1, %g2
+ add pcnt, %g2, pcnt
+L(done):
+ retl
+ mov pcnt, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/invert_limb.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/invert_limb.asm
new file mode 100644
index 0000000..4da49cf
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/invert_limb.asm
@@ -0,0 +1,92 @@
+dnl SPARC T3/T4/T5 mpn_invert_limb.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: ?
+C UltraSPARC T4/T5: ?
+
+C INPUT PARAMETERS
+define(`d', `%o0')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_invert_limb)
+ srlx d, 54, %g1
+ LEA64(approx_tab, g2, g3)
+ and %g1, 0x1fe, %g1
+ srlx d, 24, %g4
+ lduh [%g2+%g1], %g3
+ add %g4, 1, %g4
+ sllx %g3, 11, %g2
+ add %g2, -1, %g2
+ mulx %g3, %g3, %g3
+ mulx %g3, %g4, %g3
+ srlx %g3, 40, %g3
+ sub %g2, %g3, %g2
+ sllx %g2, 60, %g1
+ mulx %g2, %g2, %g3
+ mulx %g3, %g4, %g4
+ sub %g1, %g4, %g1
+ srlx %g1, 47, %g1
+ sllx %g2, 13, %g2
+ add %g1, %g2, %g1
+ and d, 1, %g2
+ srlx %g1, 1, %g4
+ sub %g0, %g2, %g3
+ and %g4, %g3, %g3
+ srlx d, 1, %g4
+ add %g4, %g2, %g2
+ mulx %g1, %g2, %g2
+ sub %g3, %g2, %g2
+ umulxhi(%g1, %g2, %g2)
+ srlx %g2, 1, %g2
+ sllx %g1, 31, %g1
+ add %g2, %g1, %g1
+ mulx %g1, d, %g3
+ umulxhi(d, %g1, %g4)
+ addcc %g3, d, %g0
+ addxc( %g4, d, %o0)
+ jmp %o7+8
+ sub %g1, %o0, %o0
+EPILOGUE()
+
+ RODATA
+ ALIGN(2)
+ TYPE( approx_tab, object)
+ SIZE( approx_tab, 512)
+approx_tab:
+forloop(i,256,512-1,dnl
+` .half eval(0x7fd00/i)
+')dnl
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.asm
new file mode 100644
index 0000000..c79032d
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.asm
@@ -0,0 +1,77 @@
+dnl SPARC v9-2011 simulation support.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(__gmpn_umulh)
+ save %sp, -176, %sp
+ ldx [%sp+2047+176+256], %o0
+ ldx [%sp+2047+176+256+8], %o1
+ rd %ccr, %o4
+ srl %o0, 0, %l4
+ srl %o1, 0, %l1
+ srlx %o1, 32, %o1
+ mulx %o1, %l4, %l2
+ srlx %o0, 32, %o0
+ mulx %o0, %l1, %l3
+ mulx %l1, %l4, %l1
+ srlx %l1, 32, %l1
+ add %l2, %l1, %l2
+ addcc %l2, %l3, %l2
+ mulx %o1, %o0, %o1
+ mov 0, %l1
+ movcs %xcc, 1, %l1
+ sllx %l1, 32, %l1
+ add %o1, %l1, %o1
+ srlx %l2, 32, %o0
+ add %o1, %o0, %o0
+ stx %o0, [%sp+2047+176+256]
+ wr %o4, 0, %ccr
+ ret
+ restore
+EPILOGUE()
+
+PROLOGUE(__gmpn_lzcnt)
+ save %sp, -176, %sp
+ ldx [%sp+2047+176+256], %o0
+ brz,a %o0, 2f
+ mov 64, %o1
+ brlz %o0, 2f
+ mov 0, %o1
+1: sllx %o0, 1, %o0
+ brgz %o0, 1b
+ add %o1, 1, %o1
+ stx %o1, [%sp+2047+176+256]
+2: ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.m4 b/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.m4
new file mode 100644
index 0000000..e5d6d8e
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/missing.m4
@@ -0,0 +1,88 @@
+dnl SPARC v9-2011 simulation support.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl Usage addxccc(r1,r2,r3, t1)
+dnl 64-bit add with carry-in and carry-out
+dnl FIXME: Register g2 must not be destination
+
+define(`addxccc',`dnl
+ add %sp, -512, %sp
+ stx %g2, [%sp+2047+256+16]
+ mov 0, %g2
+ movcs %xcc, -1, %g2
+ addcc %g2, 1, %g0
+ addccc $1, $2, $3
+ ldx [%sp+2047+256+16], %g2
+ sub %sp, -512, %sp
+')
+
+
+dnl Usage addxc(r1,r2,r3, t1,t2)
+dnl 64-bit add with carry-in
+
+define(`addxc',`dnl
+ bcc %xcc, 1f
+ add $1, $2, $3
+ add $3, 1, $3
+1:
+')
+
+
+dnl Usage umulxhi(r1,r2,r3)
+dnl 64-bit multiply returning upper 64 bits
+dnl Calls __gmpn_umulh using a non-standard calling convention
+
+define(`umulxhi',`dnl
+ add %sp, -512, %sp
+ stx $1, [%sp+2047+256]
+ stx $2, [%sp+2047+256+8]
+ stx %o7, [%sp+2047+256+16]
+ call __gmpn_umulh
+ nop
+ ldx [%sp+2047+256+16], %o7
+ ldx [%sp+2047+256], $3
+ sub %sp, -512, %sp
+')
+dnl Usage lzcnt(r1,r2)
+dnl Plain count leading zeros
+dnl Calls __gmpn_lzcnt using a non-standard calling convention
+
+define(`lzcnt',`dnl
+ add %sp, -512, %sp
+ stx %o7, [%sp+2047+256+16]
+ call __gmpn_lzcnt
+ stx $1, [%sp+2047+256]
+ ldx [%sp+2047+256+16], %o7
+ ldx [%sp+2047+256], $2
+ sub %sp, -512, %sp
+')
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_1_4.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_1_4.asm
new file mode 100644
index 0000000..08facbd
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_1_4.asm
@@ -0,0 +1,233 @@
+dnl SPARC T3/T4/T5 mpn_mod_1s_4p.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 30
+C UltraSPARC T4/T5: 4
+
+C INPUT PARAMETERS
+define(`ap', `%o0')
+define(`n', `%o1')
+define(`d', `%o2')
+define(`cps', `%o3')
+
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_mod_1s_4p)
+ save %sp, -176, %sp
+ ldx [%i3+16], %o4
+ ldx [%i3+24], %o3
+ ldx [%i3+32], %o2
+ ldx [%i3+40], %o1
+ ldx [%i3+48], %o0
+
+ and %i1, 3, %g3
+ sllx %i1, 3, %g1
+ add %i0, %g1, %i0
+ brz %g3, L(b00)
+ cmp %g3, 2
+ bcs %xcc, L(b01)
+ nop
+ be %xcc, L(b10)
+ nop
+
+L(b11): ldx [%i0-16], %g2
+ mulx %g2, %o4, %g5
+ umulxhi(%g2, %o4, %g3)
+ ldx [%i0-24], %g4
+ addcc %g5, %g4, %g5
+ addxc( %g3, %g0, %g4)
+ ldx [%i0-8], %g2
+ mulx %g2, %o3, %g1
+ umulxhi(%g2, %o3, %g3)
+ addcc %g1, %g5, %g1
+ addxc( %g3, %g4, %g2)
+ ba,pt %xcc, .L8
+ add %i0, -32, %i0
+
+L(b00): ldx [%i0-24], %g3
+ mulx %g3, %o4, %g2
+ umulxhi(%g3, %o4, %g5)
+ ldx [%i0-32], %g4
+ addcc %g2, %g4, %g2
+ addxc( %g5, %g0, %g3)
+ ldx [%i0-16], %g4
+ mulx %g4, %o3, %g5
+ umulxhi(%g4, %o3, %i5)
+ addcc %g2, %g5, %g5
+ addxc( %g3, %i5, %g4)
+ ldx [%i0-8], %g2
+ mulx %g2, %o2, %g1
+ umulxhi(%g2, %o2, %g3)
+ addcc %g1, %g5, %g1
+ addxc( %g3, %g4, %g2)
+ ba,pt %xcc, .L8
+ add %i0, -40, %i0
+
+L(b01): ldx [%i0-8], %g1
+ mov 0, %g2
+ ba,pt %xcc, .L8
+ add %i0, -16, %i0
+
+L(b10): ldx [%i0-8], %g2
+ ldx [%i0-16], %g1
+ add %i0, -24, %i0
+
+.L8: add %i1, -5, %g3
+ brlz,pn %g3, L(end)
+ nop
+
+L(top): ldx [%i0-16], %i4
+ mulx %i4, %o4, %o5
+ umulxhi(%i4, %o4, %i1)
+ ldx [%i0-24], %i5
+ addcc %o5, %i5, %o5
+ addxc( %i1, %g0, %i4)
+ ldx [%i0-8], %i5
+ mulx %i5, %o3, %o7
+ umulxhi(%i5, %o3, %i1)
+ addcc %o5, %o7, %o7
+ addxc( %i4, %i1, %i5)
+ ldx [%i0+0], %g4
+ mulx %g4, %o2, %i1
+ umulxhi(%g4, %o2, %i4)
+ addcc %o7, %i1, %i1
+ addxc( %i5, %i4, %g4)
+ mulx %g1, %o1, %i5
+ umulxhi(%g1, %o1, %i4)
+ addcc %i1, %i5, %i5
+ addxc( %g4, %i4, %g5)
+ mulx %g2, %o0, %g1
+ umulxhi(%g2, %o0, %g4)
+ addcc %g1, %i5, %g1
+ addxc( %g4, %g5, %g2)
+ add %g3, -4, %g3
+ brgez,pt %g3, L(top)
+ add %i0, -32, %i0
+
+L(end): mulx %g2, %o4, %g5
+ umulxhi(%g2, %o4, %g3)
+ addcc %g1, %g5, %g5
+ addxc( %g3, %g0, %g2)
+ ldx [%i3+8], %i0
+ ldx [%i3], %g4
+ sub %g0, %i0, %i5
+ srlx %g5, %i5, %i5
+ sllx %g2, %i0, %g2
+ or %i5, %g2, %g1
+ mulx %g1, %g4, %l7
+ umulxhi(%g1, %g4, %g3)
+ sllx %g5, %i0, %g2
+ add %g1, 1, %g1
+ addcc %l7, %g2, %g5
+ addxc( %g3, %g1, %g1)
+ mulx %g1, %i2, %g1
+ sub %g2, %g1, %g2
+ cmp %g2, %g5
+ add %i2, %g2, %g1
+ movlu %xcc, %g2, %g1
+ subcc %g1, %i2, %g2
+ movgeu %xcc, %g2, %g1
+ return %i7+8
+ srlx %g1, %o0, %o0
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_4p_cps)
+ save %sp, -176, %sp
+ lzcnt( %i1, %i5)
+ sllx %i1, %i5, %i1
+ call mpn_invert_limb, 0
+ mov %i1, %o0
+ stx %o0, [%i0]
+ sra %i5, 0, %g1
+ stx %g1, [%i0+8]
+ sub %g0, %i5, %g2
+ srlx %o0, %g2, %g2
+ mov 1, %g1
+ sllx %g1, %i5, %g1
+ or %g2, %g1, %g2
+ sub %g0, %i1, %g1
+ mulx %g2, %g1, %g2
+ srlx %g2, %i5, %g1
+ stx %g1, [%i0+16]
+
+ umulxhi(%o0, %g2, %g3)
+ add %g2, %g3, %g3
+ xnor %g0, %g3, %g3
+ mulx %g3, %i1, %g3
+ mulx %g2, %o0, %g2
+ cmp %g2, %g3
+ add %i1, %g3, %g1
+ movgeu %xcc, %g3, %g1
+ srlx %g1, %i5, %g2
+ stx %g2, [%i0+24]
+
+ umulxhi(%o0, %g1, %g3)
+ add %g1, %g3, %g3
+ xnor %g0, %g3, %g3
+ mulx %g3, %i1, %g3
+ mulx %g1, %o0, %g1
+ cmp %g1, %g3
+ add %i1, %g3, %g2
+ movgeu %xcc, %g3, %g2
+ srlx %g2, %i5, %g1
+ stx %g1, [%i0+32]
+
+ umulxhi(%o0, %g2, %g3)
+ add %g2, %g3, %g3
+ xnor %g0, %g3, %g3
+ mulx %g3, %i1, %g3
+ mulx %g2, %o0, %g2
+ cmp %g2, %g3
+ add %i1, %g3, %g1
+ movgeu %xcc, %g3, %g1
+ srlx %g1, %i5, %g2
+ stx %g2, [%i0+40]
+
+ umulxhi(%o0, %g1, %g2)
+ add %g1, %g2, %g2
+ xnor %g0, %g2, %g2
+ mulx %g2, %i1, %g2
+ mulx %g1, %o0, %o0
+ cmp %o0, %g2
+ add %i1, %g2, %g3
+ movgeu %xcc, %g2, %g3
+ srlx %g3, %i5, %i5
+ stx %i5, [%i0+48]
+
+ return %i7+8
+ nop
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_34lsub1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_34lsub1.asm
new file mode 100644
index 0000000..8744280
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/mod_34lsub1.asm
@@ -0,0 +1,117 @@
+dnl SPARC v9 mpn_mod_34lsub1 for T3/T4/T5.
+
+dnl Copyright 2005, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: -
+C UltraSPARC T3: 5
+C UltraSPARC T4: 1.57
+
+C This is based on the powerpc64/mode64 code.
+
+C INPUT PARAMETERS
+define(`up', `%i0')
+define(`n', `%i1')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_mod_34lsub1)
+ save %sp, -176, %sp
+
+ mov 0, %g1
+ mov 0, %g3
+ mov 0, %g4
+ addcc %g0, 0, %g5
+
+ add n, -3, n
+ brlz n, L(lt3)
+ nop
+
+ add n, -3, n
+ ldx [up+0], %l5
+ ldx [up+8], %l6
+ ldx [up+16], %l7
+ brlz n, L(end)
+ add up, 24, up
+
+ ALIGN(16)
+L(top): addxccc(%g1, %l5, %g1)
+ ldx [up+0], %l5
+ addxccc(%g3, %l6, %g3)
+ ldx [up+8], %l6
+ addxccc(%g4, %l7, %g4)
+ ldx [up+16], %l7
+ add n, -3, n
+ brgez n, L(top)
+ add up, 24, up
+
+L(end): addxccc( %g1, %l5, %g1)
+ addxccc(%g3, %l6, %g3)
+ addxccc(%g4, %l7, %g4)
+ addxc( %g5, %g0, %g5)
+
+L(lt3): cmp n, -2
+ blt L(2)
+ nop
+
+ ldx [up+0], %l5
+ mov 0, %l6
+ beq L(1)
+ addcc %g1, %l5, %g1
+
+ ldx [up+8], %l6
+L(1): addxccc(%g3, %l6, %g3)
+ addxccc(%g4, %g0, %g4)
+ addxc( %g5, %g0, %g5)
+
+L(2): sllx %g1, 16, %l0
+ srlx %l0, 16, %l0 C %l0 = %g1 mod 2^48
+ srlx %g1, 48, %l3 C %l3 = %g1 div 2^48
+ srl %g3, 0, %g1
+ sllx %g1, 16, %l4 C %l4 = (%g3 mod 2^32) << 16
+ srlx %g3, 32, %l5 C %l5 = %g3 div 2^32
+ sethi %hi(0xffff0000), %g1
+ andn %g4, %g1, %g1
+ sllx %g1, 32, %l6 C %l6 = (%g4 mod 2^16) << 32
+ srlx %g4, 16, %l7 C %l7 = %g4 div 2^16
+
+ add %l0, %l3, %l0
+ add %l4, %l5, %l4
+ add %l6, %l7, %l6
+
+ add %l0, %l4, %l0
+ add %l6, %g5, %l6
+
+ add %l0, %l6, %i0
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/mode1o.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/mode1o.asm
new file mode 100644
index 0000000..494e1d3
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/mode1o.asm
@@ -0,0 +1,82 @@
+dnl SPARC T3/T4/T5 mpn_modexact_1c_odd.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 30
+C UltraSPARC T4/T5: 26
+
+C INPUT PARAMETERS
+define(`ap', `%o0')
+define(`n', `%o1')
+define(`d', `%o2')
+define(`cy', `%o3')
+
+define(`dinv',`%o5')
+define(`a0', `%g1')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_modexact_1c_odd)
+ srlx d, 1, %g1
+ and %g1, 127, %g1
+
+ LEA64(binvert_limb_table, g2, g4)
+ ldub [%g2+%g1], %g1
+ add %g1, %g1, %g2
+ mulx %g1, %g1, %g1
+ mulx %g1, d, %g1
+ sub %g2, %g1, %g2
+ add %g2, %g2, %g1
+ mulx %g2, %g2, %g2
+ mulx %g2, d, %g2
+ sub %g1, %g2, %g1
+ add %g1, %g1, %o5
+ mulx %g1, %g1, %g1
+ mulx %g1, d, %g1
+ sub %o5, %g1, dinv
+ add n, -1, n
+
+L(top): ldx [ap], a0
+ add ap, 8, ap
+ subcc a0, cy, %g3
+ mulx %g3, dinv, %g5
+ umulxhi(d, %g5, %g5)
+ addxc( %g5, %g0, cy)
+ brnz,pt n, L(top)
+ add n, -1, n
+
+ retl
+ mov cy, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/mul_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/mul_1.asm
new file mode 100644
index 0000000..af05d62
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/mul_1.asm
@@ -0,0 +1,174 @@
+dnl SPARC v9 mpn_mul_1 for T3/T4/T5.
+
+dnl Contributed to the GNU project by David Miller and Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 23
+C UltraSPARC T4: 3
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_mul_1)
+ save %sp, -176, %sp
+
+ and n, 3, %g5
+ add n, -4, n
+ brz %g5, L(b0)
+ cmp %g5, 2
+ bcs %xcc, L(b1)
+ nop
+ be %xcc, L(b2)
+ nop
+
+L(b3): addcc %g0, %g0, %i5
+ ldx [up+0], %l0
+ ldx [up+8], %l1
+ ldx [up+16], %l2
+ mulx %l0, v0, %o0
+ umulxhi(%l0, v0, %o1)
+ brgz n, L(gt3)
+ add rp, -8, rp
+ mulx %l1, v0, %o2
+ umulxhi(%l1, v0, %o3)
+ b L(wd3)
+ nop
+L(gt3): ldx [up+24], %l3
+ mulx %l1, v0, %o2
+ umulxhi(%l1, v0, %o3)
+ add up, 24, up
+ b L(lo3)
+ add n, -3, n
+
+L(b2): addcc %g0, %g0, %o1
+ ldx [up+0], %l1
+ ldx [up+8], %l2
+ brgz n, L(gt2)
+ add rp, -16, rp
+ mulx %l1, v0, %o2
+ umulxhi(%l1, v0, %o3)
+ mulx %l2, v0, %o4
+ umulxhi(%l2, v0, %o5)
+ b L(wd2)
+ nop
+L(gt2): ldx [up+16], %l3
+ mulx %l1, v0, %o2
+ umulxhi(%l1, v0, %o3)
+ ldx [up+24], %l0
+ mulx %l2, v0, %o4
+ umulxhi(%l2, v0, %o5)
+ add up, 16, up
+ b L(lo2)
+ add n, -2, n
+
+L(b1): addcc %g0, %g0, %o3
+ ldx [up+0], %l2
+ brgz n, L(gt1)
+ nop
+ mulx %l2, v0, %o4
+ stx %o4, [rp+0]
+ umulxhi(%l2, v0, %i0)
+ ret
+ restore
+L(gt1): ldx [up+8], %l3
+ ldx [up+16], %l0
+ mulx %l2, v0, %o4
+ umulxhi(%l2, v0, %o5)
+ ldx [up+24], %l1
+ mulx %l3, v0, %i4
+ umulxhi(%l3, v0, %i5)
+ add rp, -24, rp
+ add up, 8, up
+ b L(lo1)
+ add n, -1, n
+
+L(b0): addcc %g0, %g0, %o5
+ ldx [up+0], %l3
+ ldx [up+8], %l0
+ ldx [up+16], %l1
+ mulx %l3, v0, %i4
+ umulxhi(%l3, v0, %i5)
+ ldx [up+24], %l2
+ mulx %l0, v0, %o0
+ umulxhi(%l0, v0, %o1)
+ b L(lo0)
+ nop
+
+ ALIGN(16)
+L(top): ldx [up+0], %l3 C 0
+ addxccc(%i4, %o5, %i4) C 0
+ mulx %l1, v0, %o2 C 1
+ stx %i4, [rp+0] C 1
+ umulxhi(%l1, v0, %o3) C 2
+L(lo3): ldx [up+8], %l0 C 2
+ addxccc(%o0, %i5, %o0) C 3
+ mulx %l2, v0, %o4 C 3
+ stx %o0, [rp+8] C 4
+ umulxhi(%l2, v0, %o5) C 4
+L(lo2): ldx [up+16], %l1 C 5
+ addxccc(%o2, %o1, %o2) C 5
+ mulx %l3, v0, %i4 C 6
+ stx %o2, [rp+16] C 6
+ umulxhi(%l3, v0, %i5) C 7
+L(lo1): ldx [up+24], %l2 C 7
+ addxccc(%o4, %o3, %o4) C 8
+ mulx %l0, v0, %o0 C 8
+ stx %o4, [rp+24] C 9
+ umulxhi(%l0, v0, %o1) C 9
+ add rp, 32, rp C 10
+L(lo0): add up, 32, up C 10
+ brgz n, L(top) C 11
+ add n, -4, n C 11
+
+L(end): addxccc(%i4, %o5, %i4)
+ mulx %l1, v0, %o2
+ stx %i4, [rp+0]
+ umulxhi(%l1, v0, %o3)
+ addxccc(%o0, %i5, %o0)
+L(wd3): mulx %l2, v0, %o4
+ stx %o0, [rp+8]
+ umulxhi(%l2, v0, %o5)
+ addxccc(%o2, %o1, %o2)
+L(wd2): stx %o2, [rp+16]
+ addxccc(%o4, %o3, %o4)
+ stx %o4, [rp+24]
+ addxc( %g0, %o5, %i0)
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/popcount.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/popcount.asm
new file mode 100644
index 0000000..de80f3c
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/popcount.asm
@@ -0,0 +1,70 @@
+dnl SPARC v9 mpn_popcount for T3/T4.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 15
+C UltraSPARC T4: 2.5
+
+C INPUT PARAMETERS
+define(`up', `%o0')
+define(`n', `%o1')
+define(`pcnt', `%o5')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_popcount)
+ subcc n, 1, n
+ be L(final_one)
+ clr pcnt
+L(top):
+ ldx [up + 0], %g1
+ sub n, 2, n
+ ldx [up + 8], %o4
+ add up, 16, up
+ popc %g1, %g2
+ popc %o4, %g3
+ add pcnt, %g2, pcnt
+ brgz n, L(top)
+ add pcnt, %g3, pcnt
+ brlz,pt n, L(done)
+ nop
+L(final_one):
+ ldx [up + 0], %g1
+ popc %g1, %g2
+ add pcnt, %g2, pcnt
+L(done):
+ retl
+ mov pcnt, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm
new file mode 100644
index 0000000..d46499f
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm
@@ -0,0 +1,93 @@
+dnl SPARC v9 mpn_sqr_diag_addlsh1 for T3/T4/T5.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: ?
+C UltraSPARC T4: >= 4.5
+
+
+define(`rp', `%i0')
+define(`tp', `%i1')
+define(`up', `%i2')
+define(`n', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sqr_diag_addlsh1)
+ save %sp, -176, %sp
+
+ ldx [up+0], %g1
+ mulx %g1, %g1, %o0
+ umulxhi(%g1, %g1, %g2)
+ stx %o0, [rp+0]
+
+ ldx [up+8], %g1
+ ldx [tp+0], %g4
+ ldx [tp+8], %g5
+ mulx %g1, %g1, %o0
+ orcc %g0, %g0, %o5
+ b L(dm)
+ add n, -2, n
+
+ ALIGN(16)
+L(top): ldx [up+8], %g1
+ addcc %g4, %o2, %o2
+ addxccc(%g5, %o0, %g3)
+ ldx [tp+16], %g4
+ ldx [tp+24], %g5
+ mulx %g1, %g1, %o0
+ stx %o2, [rp+8]
+ stx %g3, [rp+16]
+ add rp, 16, rp
+ add tp, 16, tp
+L(dm): add %g2, %o5, %o2
+ umulxhi(%g1, %g1, %g2)
+ addxccc(%g4, %g4, %g4)
+ addxccc(%g5, %g5, %g5)
+ add up, 8, up
+ addxc( %g0, %g0, %o5)
+ brnz n, L(top)
+ add n, -1, n
+
+ addcc %o2, %g4, %g4
+ addxccc(%o0, %g5, %g5)
+ stx %g4, [rp+8]
+ stx %g5, [rp+16]
+ addxc( %o5, %g2, %g2)
+ stx %g2, [rp+24]
+
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/sub_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/sub_n.asm
new file mode 100644
index 0000000..0e4bc93
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/sub_n.asm
@@ -0,0 +1,144 @@
+dnl SPARC v9 mpn_sub_n for T3/T4.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 8
+C UltraSPARC T4: 3
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`vp', `%i2')
+define(`n', `%i3')
+define(`cy', `%i4')
+
+define(`u0_off', `%l0')
+define(`u1_off', `%l1')
+define(`v0_off', `%l2')
+define(`v1_off', `%l3')
+define(`r0_off', `%l4')
+define(`r1_off', `%l5')
+define(`loop_n', `%l6')
+define(`tmp', `%l7')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sub_nc)
+ save %sp, -176, %sp
+ ba,pt %xcc, L(ent)
+ xor cy, 1, cy
+EPILOGUE()
+PROLOGUE(mpn_sub_n)
+ save %sp, -176, %sp
+ mov 1, cy
+L(ent):
+ subcc n, 1, n
+ be L(final_one)
+ cmp %g0, cy
+
+ ldx [up + 0], %o4
+ sllx n, 3, tmp
+
+ ldx [vp + 0], %o5
+ add up, tmp, u0_off
+
+ ldx [up + 8], %g5
+ add vp, tmp, v0_off
+
+ ldx [vp + 8], %g1
+ add rp, tmp, r0_off
+
+ neg tmp, loop_n
+ add u0_off, 8, u1_off
+
+ add v0_off, 8, v1_off
+ sub loop_n, -(2 * 8), loop_n
+
+ sub r0_off, 16, r0_off
+ brgez,pn loop_n, L(loop_tail)
+ sub r0_off, 8, r1_off
+
+ b,a L(top)
+ ALIGN(16)
+L(top):
+ xnor %o5, 0, tmp
+ ldx [loop_n + v0_off], %o5
+
+ addxccc(%o4, tmp, %g3)
+ ldx [loop_n + u0_off], %o4
+
+ xnor %g1, 0, %g1
+ stx %g3, [loop_n + r0_off]
+
+ addxccc(%g5, %g1, tmp)
+ ldx [loop_n + v1_off], %g1
+
+ ldx [loop_n + u1_off], %g5
+ sub loop_n, -(2 * 8), loop_n
+
+ brlz loop_n, L(top)
+ stx tmp, [loop_n + r1_off]
+
+L(loop_tail):
+ xnor %o5, 0, tmp
+ xnor %g1, 0, %g1
+
+ addxccc(%o4, tmp, %g3)
+ add loop_n, u0_off, up
+
+ addxccc(%g5, %g1, %g5)
+ add loop_n, r0_off, rp
+
+ stx %g3, [rp + 0]
+ add loop_n, v0_off, vp
+
+ brgz,pt loop_n, L(done)
+ stx %g5, [rp + 8]
+
+ add rp, (2 * 8), rp
+
+L(final_one):
+ ldx [up+0], %o4
+ ldx [vp+0], %o5
+ xnor %o5, %g0, %o5
+ addxccc(%o4, %o5, %g3)
+ stx %g3, [rp+0]
+
+L(done):
+ clr %i0
+ movcc %xcc, 1, %i0
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/submul_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/submul_1.asm
new file mode 100644
index 0000000..5635d1b
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/submul_1.asm
@@ -0,0 +1,170 @@
+dnl SPARC v9 mpn_submul_1 for T3/T4/T5.
+
+dnl Contributed to the GNU project by David Miller and Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T3: 26
+C UltraSPARC T4: 4.5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_submul_1)
+ save %sp, -176, %sp
+ ldx [up+0], %g1
+
+ and n, 3, %g5
+ add n, -4, n
+ brz %g5, L(b00)
+ cmp %g5, 2
+ bcs %xcc, L(b01)
+ nop
+ bne %xcc, L(b11)
+ ldx [up+8], %g4
+
+L(b10): add up, 16, up
+ addcc %g0, 0, %g3
+ mulx %g1, v0, %l4
+ umulxhi(%g1, v0, %l5)
+ ldx [rp+0], %o2
+ mulx %g4, v0, %l6
+ umulxhi(%g4, v0, %l7)
+ brlz n, L(wd2)
+ nop
+L(gt2): ldx [up+0], %o0
+ b L(lo2)
+ nop
+
+L(b00): add rp, -16, rp
+ addcc %g0, 0, %g3
+ ldx [up+8], %o1
+ mulx %g1, v0, %l0
+ umulxhi(%g1, v0, %l1)
+ ldx [up+16], %o0
+ ldx [rp+16], %o2
+ mulx %o1, v0, %l2
+ umulxhi(%o1, v0, %l3)
+ b L(lo0)
+ nop
+
+L(b01): add up, 8, up
+ add rp, -8, rp
+ addcc %g0, 0, %g3
+ ldx [rp+8], %o3
+ mulx %g1, v0, %l6
+ umulxhi(%g1, v0, %l7)
+ brlz n, L(wd1)
+ nop
+ ldx [up+0], %o0
+ ldx [up+8], %o1
+ mulx %o0, v0, %l0
+ umulxhi(%o0, v0, %l1)
+ b L(lo1)
+ nop
+
+L(b11): add up, 24, up
+ add rp, 8, rp
+ addcc %g0, 0, %g3
+ mulx %g1, v0, %l2
+ umulxhi(%g1, v0, %l3)
+ ldx [up-8], %o1
+ ldx [rp-8], %o3
+ mulx %g4, v0, %l4
+ umulxhi(%g4, v0, %l5)
+ brlz n, L(end)
+ nop
+
+ ALIGN(16)
+L(top): ldx [up+0], %o0
+ addxccc(%g3, %l2, %g1)
+ ldx [rp+0], %o2
+ addxc( %g0, %l3, %g3)
+ mulx %o1, v0, %l6
+ subcc %o3, %g1, %g4
+ umulxhi(%o1, v0, %l7)
+ stx %g4, [rp-8]
+L(lo2): ldx [up+8], %o1
+ addxccc(%g3, %l4, %g1)
+ ldx [rp+8], %o3
+ addxc( %g0, %l5, %g3)
+ mulx %o0, v0, %l0
+ subcc %o2, %g1, %g4
+ umulxhi(%o0, v0, %l1)
+ stx %g4, [rp+0]
+L(lo1): ldx [up+16], %o0
+ addxccc(%g3, %l6, %g1)
+ ldx [rp+16], %o2
+ addxc( %g0, %l7, %g3)
+ mulx %o1, v0, %l2
+ subcc %o3, %g1, %g4
+ umulxhi(%o1, v0, %l3)
+ stx %g4, [rp+8]
+L(lo0): ldx [up+24], %o1
+ addxccc(%g3, %l0, %g1)
+ ldx [rp+24], %o3
+ addxc( %g0, %l1, %g3)
+ mulx %o0, v0, %l4
+ subcc %o2, %g1, %g4
+ umulxhi(%o0, v0, %l5)
+ stx %g4, [rp+16]
+ add n, -4, n
+ add up, 32, up
+ brgez n, L(top)
+ add rp, 32, rp
+
+L(end): addxccc(%g3, %l2, %g1)
+ ldx [rp+0], %o2
+ addxc( %g0, %l3, %g3)
+ mulx %o1, v0, %l6
+ subcc %o3, %g1, %g4
+ umulxhi(%o1, v0, %l7)
+ stx %g4, [rp-8]
+L(wd2): addxccc(%g3, %l4, %g1)
+ ldx [rp+8], %o3
+ addxc( %g0, %l5, %g3)
+ subcc %o2, %g1, %g4
+ stx %g4, [rp+0]
+L(wd1): addxccc(%g3, %l6, %g1)
+ addxc( %g0, %l7, %g3)
+ subcc %o3, %g1, %g4
+ stx %g4, [rp+8]
+ addxc( %g0, %g3, %i0)
+ ret
+ restore
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct45/gmp-mparam.h b/gmp-6.3.0/mpn/sparc64/ultrasparct45/gmp-mparam.h
new file mode 100644
index 0000000..c10fd0d
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct45/gmp-mparam.h
@@ -0,0 +1,174 @@
+/* Sparc64 T4-T5 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 3600 MHz ultrasparct5 running GNU/Linux */
+/* FFT tuning limit = 0.5 M */
+/* Generated by tuneup.c, 2019-10-01, gcc 7.4 */
+
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 3
+#define MOD_1_1P_METHOD 2 /* 0.34% faster than 1 */
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13
+#define USE_PREINV_DIVREM_1 1
+/* From gcc105.fsffrance.org, 2023-07-25 */
+#define DIV_QR_1N_PI1_METHOD 4 /* 7.06% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD 3
+#define DIV_QR_1_UNNORM_THRESHOLD 2
+#define DIV_QR_2_PI2_THRESHOLD 5
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 19
+
+#define DIV_1_VS_MUL_1_PERCENT 654
+
+#define MUL_TOOM22_THRESHOLD 40
+#define MUL_TOOM33_THRESHOLD 129
+#define MUL_TOOM44_THRESHOLD 372
+#define MUL_TOOM6H_THRESHOLD 494
+#define MUL_TOOM8H_THRESHOLD 656
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 126
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 247
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 225
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 219
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 188
+
+#define SQR_BASECASE_THRESHOLD 20
+#define SQR_TOOM2_THRESHOLD 59
+#define SQR_TOOM3_THRESHOLD 107
+#define SQR_TOOM4_THRESHOLD 298
+#define SQR_TOOM6_THRESHOLD 399
+#define SQR_TOOM8_THRESHOLD 562
+
+#define MULMID_TOOM42_THRESHOLD 48
+
+#define MULMOD_BNM1_THRESHOLD 25
+#define SQRMOD_BNM1_THRESHOLD 23
+
+#define MUL_FFT_MODF_THRESHOLD 555 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 555, 5}, { 29, 6}, { 31, 7}, { 31, 8}, \
+ { 17, 7}, { 36, 8}, { 19, 7}, { 39, 8}, \
+ { 21, 7}, { 43, 8}, { 29, 9}, { 15, 8}, \
+ { 31, 7}, { 63, 8}, { 35, 9}, { 19, 8}, \
+ { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \
+ { 57,10}, { 15, 8}, { 61, 9}, { 31, 8}, \
+ { 67, 9}, { 35, 8}, { 71, 9}, { 39, 8}, \
+ { 81, 9}, { 43,10}, { 23, 9}, { 59,11}, \
+ { 15,10}, { 31, 9}, { 71,10}, { 39, 9}, \
+ { 87,10}, { 47, 9}, { 99,10}, { 55, 9}, \
+ { 115,11}, { 31,10}, { 63, 9}, { 131,10}, \
+ { 87,11}, { 47,10}, { 111, 9}, { 223,12}, \
+ { 31,11}, { 63,10}, { 135,11}, { 79,10}, \
+ { 159,11}, { 95,10}, { 191,11}, { 111,12}, \
+ { 63,11}, { 143,10}, { 287,11}, { 159,12}, \
+ { 95,11}, { 191,10}, { 383, 9}, { 767,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 75
+#define MUL_FFT_THRESHOLD 5760
+
+#define SQR_FFT_MODF_THRESHOLD 372 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 372, 5}, { 23, 6}, { 12, 5}, { 25, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 25, 8}, \
+ { 13, 7}, { 28, 8}, { 15, 7}, { 31, 8}, \
+ { 27, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
+ { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
+ { 31,10}, { 79,11}, { 47,10}, { 95,12}, \
+ { 31,11}, { 63,10}, { 135,11}, { 79,10}, \
+ { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \
+ { 383,11}, { 111,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271,11}, { 143,10}, \
+ { 287, 9}, { 575,10}, { 303, 9}, { 607,11}, \
+ { 159,10}, { 319, 9}, { 639,12}, { 95,11}, \
+ { 191,10}, { 383, 9}, { 767,11}, { 207,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 75
+#define SQR_FFT_THRESHOLD 3776
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 35
+#define MULLO_MUL_N_THRESHOLD 11278
+#define SQRLO_BASECASE_THRESHOLD 0 /* always */
+#define SQRLO_DC_THRESHOLD 168
+#define SQRLO_SQR_THRESHOLD 7511
+
+#define DC_DIV_QR_THRESHOLD 36
+#define DC_DIVAPPR_Q_THRESHOLD 103
+#define DC_BDIV_QR_THRESHOLD 28
+#define DC_BDIV_Q_THRESHOLD 88
+
+#define INV_MULMOD_BNM1_THRESHOLD 78
+#define INV_NEWTON_THRESHOLD 181
+#define INV_APPR_THRESHOLD 118
+
+#define BINV_NEWTON_THRESHOLD 296
+#define REDC_1_TO_REDC_2_THRESHOLD 4
+#define REDC_2_TO_REDC_N_THRESHOLD 79
+
+#define MU_DIV_QR_THRESHOLD 1970
+#define MU_DIVAPPR_Q_THRESHOLD 1970
+#define MUPI_DIV_QR_THRESHOLD 82
+#define MU_BDIV_QR_THRESHOLD 1528
+#define MU_BDIV_Q_THRESHOLD 1970
+
+#define POWM_SEC_TABLE 1,58,102,1509
+
+#define GET_STR_DC_THRESHOLD 15
+#define GET_STR_PRECOMPUTE_THRESHOLD 29
+#define SET_STR_DC_THRESHOLD 686
+#define SET_STR_PRECOMPUTE_THRESHOLD 2717
+
+#define FAC_DSC_THRESHOLD 336
+#define FAC_ODD_THRESHOLD 24
+
+#define MATRIX22_STRASSEN_THRESHOLD 32
+#define HGCD2_DIV1_METHOD 1 /* 0.66% faster than 3 */
+#define HGCD_THRESHOLD 57
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 3389
+#define GCD_DC_THRESHOLD 386
+#define GCDEXT_DC_THRESHOLD 288
+#define JACOBI_BASE_METHOD 4 /* 2.50% faster than 3 */