aboutsummaryrefslogtreecommitdiff
path: root/gmp-6.3.0/mpn/sparc32
diff options
context:
space:
mode:
Diffstat (limited to 'gmp-6.3.0/mpn/sparc32')
-rw-r--r--gmp-6.3.0/mpn/sparc32/README71
-rw-r--r--gmp-6.3.0/mpn/sparc32/add_n.asm245
-rw-r--r--gmp-6.3.0/mpn/sparc32/addmul_1.asm155
-rw-r--r--gmp-6.3.0/mpn/sparc32/gmp-mparam.h67
-rw-r--r--gmp-6.3.0/mpn/sparc32/lshift.asm105
-rw-r--r--gmp-6.3.0/mpn/sparc32/mul_1.asm146
-rw-r--r--gmp-6.3.0/mpn/sparc32/rshift.asm102
-rw-r--r--gmp-6.3.0/mpn/sparc32/sparc-defs.m497
-rw-r--r--gmp-6.3.0/mpn/sparc32/sub_n.asm335
-rw-r--r--gmp-6.3.0/mpn/sparc32/submul_1.asm155
-rw-r--r--gmp-6.3.0/mpn/sparc32/udiv.asm147
-rw-r--r--gmp-6.3.0/mpn/sparc32/udiv_nfp.asm202
-rw-r--r--gmp-6.3.0/mpn/sparc32/ultrasparct1/add_n.asm70
-rw-r--r--gmp-6.3.0/mpn/sparc32/ultrasparct1/addmul_1.asm90
-rw-r--r--gmp-6.3.0/mpn/sparc32/ultrasparct1/gmp-mparam.h153
-rw-r--r--gmp-6.3.0/mpn/sparc32/ultrasparct1/mul_1.asm83
-rw-r--r--gmp-6.3.0/mpn/sparc32/ultrasparct1/sqr_diagonal.asm55
-rw-r--r--gmp-6.3.0/mpn/sparc32/ultrasparct1/sub_n.asm70
-rw-r--r--gmp-6.3.0/mpn/sparc32/ultrasparct1/submul_1.asm91
-rw-r--r--gmp-6.3.0/mpn/sparc32/umul.asm77
-rw-r--r--gmp-6.3.0/mpn/sparc32/v8/addmul_1.asm109
-rw-r--r--gmp-6.3.0/mpn/sparc32/v8/gmp-mparam.h73
-rw-r--r--gmp-6.3.0/mpn/sparc32/v8/mul_1.asm93
-rw-r--r--gmp-6.3.0/mpn/sparc32/v8/submul_1.asm67
-rw-r--r--gmp-6.3.0/mpn/sparc32/v8/supersparc/gmp-mparam.h73
-rw-r--r--gmp-6.3.0/mpn/sparc32/v8/supersparc/udiv.asm131
-rw-r--r--gmp-6.3.0/mpn/sparc32/v8/udiv.asm131
-rw-r--r--gmp-6.3.0/mpn/sparc32/v8/umul.asm40
-rw-r--r--gmp-6.3.0/mpn/sparc32/v9/README4
-rw-r--r--gmp-6.3.0/mpn/sparc32/v9/add_n.asm129
-rw-r--r--gmp-6.3.0/mpn/sparc32/v9/addmul_1.asm306
-rw-r--r--gmp-6.3.0/mpn/sparc32/v9/gmp-mparam.h204
-rw-r--r--gmp-6.3.0/mpn/sparc32/v9/mul_1.asm287
-rw-r--r--gmp-6.3.0/mpn/sparc32/v9/sqr_diagonal.asm462
-rw-r--r--gmp-6.3.0/mpn/sparc32/v9/sub_n.asm129
-rw-r--r--gmp-6.3.0/mpn/sparc32/v9/submul_1.asm316
-rw-r--r--gmp-6.3.0/mpn/sparc32/v9/udiv.asm52
37 files changed, 5122 insertions, 0 deletions
diff --git a/gmp-6.3.0/mpn/sparc32/README b/gmp-6.3.0/mpn/sparc32/README
new file mode 100644
index 0000000..f2dd116
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/README
@@ -0,0 +1,71 @@
+Copyright 1996, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/.
+
+
+
+
+
+This directory contains mpn functions for various SPARC chips. Code that
+runs only on version 8 SPARC implementations, is in the v8 subdirectory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+ Load and Store timing
+
+On most early SPARC implementations, the ST instructions takes multiple
+cycles, while a STD takes just a single cycle more than an ST. For the CPUs
+in SPARCstation I and II, the times are 3 and 4 cycles, respectively.
+Therefore, combining two ST instructions into a STD when possible is a
+significant optimization.
+
+Later SPARC implementations have single cycle ST.
+
+For SuperSPARC, we can perform just one memory instruction per cycle, even
+if up to two integer instructions can be executed in its pipeline. For
+programs that perform so many memory operations that there are not enough
+non-memory operations to issue in parallel with all memory operations, using
+LDD and STD when possible helps.
+
+UltraSPARC-1/2 has very slow integer multiplication. In the v9 subdirectory,
+we therefore use floating-point multiplication.
+
+STATUS
+
+1. On a SuperSPARC, mpn_lshift and mpn_rshift run at 3 cycles/limb, or 2.5
+ cycles/limb asymptotically. We could optimize speed for special counts
+ by using ADDXCC.
+
+2. On a SuperSPARC, mpn_add_n and mpn_sub_n runs at 2.5 cycles/limb, or 2
+ cycles/limb asymptotically.
+
+3. mpn_mul_1 runs at what is believed to be optimal speed.
+
+4. On SuperSPARC, mpn_addmul_1 and mpn_submul_1 could both be improved by a
+ cycle by avoiding one of the add instructions. See a29k/addmul_1.
+
+The speed of the code for other SPARC implementations is uncertain.
diff --git a/gmp-6.3.0/mpn/sparc32/add_n.asm b/gmp-6.3.0/mpn/sparc32/add_n.asm
new file mode 100644
index 0000000..8549195
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/add_n.asm
@@ -0,0 +1,245 @@
+dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl sum in a third limb vector.
+
+dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(n,%o3)
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(1) C branch if alignment differs
+ nop
+C ** V1a **
+L(0): andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
+ be L(v1) C if no, branch
+ nop
+C Add least significant limb separately to align res_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1): addx %g0,%g0,%o4 C save cy in register
+ cmp n,2 C if n < 2 ...
+ bl L(end2) C ... branch to tail code
+ subcc %g0,%o4,%g0 C restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc n,-10,n
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt L(fin1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1):
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin1):
+ addcc n,8-2,n
+ blt L(end1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1):
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1)
+ subcc %g0,%o4,%g0 C restore cy
+L(end1):
+ addxcc %g4,%g2,%o4
+ addxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+
+ andcc n,1,%g0
+ be L(ret1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+
+L(1): xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(2)
+ nop
+C ** V1b **
+ mov s2_ptr,%g1
+ mov s1_ptr,s2_ptr
+ b L(0)
+ mov %g1,s1_ptr
+
+C ** V2 **
+C If we come here, the alignment of s1_ptr and res_ptr as well as the
+C alignment of s2_ptr and res_ptr differ. Since there are only two ways
+C things can be aligned (that we care about) we now know that the alignment
+C of s1_ptr and s2_ptr are the same.
+
+L(2): cmp n,1
+ be L(jone)
+ nop
+ andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0
+ be L(v2) C if no, branch
+ nop
+C Add least significant limb separately to align s1_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L(v2): addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ blt L(fin2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop2)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin2):
+ addcc n,8-2,n
+ blt L(end2)
+ subcc %g0,%o4,%g0 C restore cy
+L(loope2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope2)
+ subcc %g0,%o4,%g0 C restore cy
+L(end2):
+ andcc n,1,%g0
+ be L(ret2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+L(jone):
+ ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+L(ret2):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+EPILOGUE(mpn_add_n)
diff --git a/gmp-6.3.0/mpn/sparc32/addmul_1.asm b/gmp-6.3.0/mpn/sparc32/addmul_1.asm
new file mode 100644
index 0000000..92d5d78
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/addmul_1.asm
@@ -0,0 +1,155 @@
+dnl SPARC mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl result to a second limb vector.
+
+dnl Copyright 1992-1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ C Make S1_PTR and RES_PTR point at the end of their blocks
+ C and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ addcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne L(loop0)
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ addcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne L(loop)
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_addmul_1)
diff --git a/gmp-6.3.0/mpn/sparc32/gmp-mparam.h b/gmp-6.3.0/mpn/sparc32/gmp-mparam.h
new file mode 100644
index 0000000..a3bc612
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/gmp-mparam.h
@@ -0,0 +1,67 @@
+/* SPARC v7 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+/* Generated by tuneup.c, 2002-03-13, gcc 2.95, Weitek 8701 */
+
+#define MUL_TOOM22_THRESHOLD 8
+#define MUL_TOOM33_THRESHOLD 466
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 16
+#define SQR_TOOM3_THRESHOLD 258
+
+#define DIV_SB_PREINV_THRESHOLD 4
+#define DIV_DC_THRESHOLD 28
+#define POWM_THRESHOLD 28
+
+#define GCD_ACCEL_THRESHOLD 3
+#define JACOBI_BASE_METHOD 2
+
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 4
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 4
+#define USE_PREINV_DIVREM_1 1
+#define USE_PREINV_MOD_1 1
+#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIVEXACT_1_THRESHOLD 120
+#define MODEXACT_1_ODD_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define GET_STR_DC_THRESHOLD 21
+#define GET_STR_PRECOMPUTE_THRESHOLD 25
+#define SET_STR_THRESHOLD 1012
+
+#define MUL_FFT_TABLE { 272, 672, 1152, 3584, 10240, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD 264
+#define MUL_FFT_THRESHOLD 2304
+
+#define SQR_FFT_TABLE { 304, 736, 1152, 3584, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD 248
+#define SQR_FFT_THRESHOLD 2304
diff --git a/gmp-6.3.0/mpn/sparc32/lshift.asm b/gmp-6.3.0/mpn/sparc32/lshift.asm
new file mode 100644
index 0000000..8321343
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/lshift.asm
@@ -0,0 +1,105 @@
+dnl SPARC mpn_lshift -- Shift a number left.
+
+dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr %o0
+C src_ptr %o1
+C size %o2
+C cnt %o3
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ sll %o2,2,%g1
+ add %o1,%g1,%o1 C make %o1 point at end of src
+ ld [%o1-4],%g2 C load first limb
+ sub %g0,%o3,%o5 C negate shift count
+ add %o0,%g1,%o0 C make %o0 point at end of res
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 C number of limbs in first loop
+ srl %g2,%o5,%g1 C compute function result
+ be L(0) C if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 C adjust count for main loop
+
+L(loop0):
+ ld [%o1-8],%g3
+ add %o0,-4,%o0
+ add %o1,-4,%o1
+ addcc %g4,-1,%g4
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne L(loop0)
+ st %o4,[%o0+0]
+
+L(0): tst %o2
+ be L(end)
+ nop
+
+L(loop):
+ ld [%o1-8],%g3
+ add %o0,-16,%o0
+ addcc %o2,-4,%o2
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+
+ ld [%o1-12],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+12]
+ srl %g2,%o5,%g1
+
+ ld [%o1-16],%g3
+ sll %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0+8]
+ srl %g3,%o5,%g1
+
+ ld [%o1-20],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+4]
+ srl %g2,%o5,%g1
+
+ add %o1,-16,%o1
+ or %g4,%g1,%g4
+ bne L(loop)
+ st %g4,[%o0+0]
+
+L(end): sll %g2,%o3,%g2
+ st %g2,[%o0-4]
+ retl
+ ld [%sp+80],%o0
+EPILOGUE(mpn_lshift)
diff --git a/gmp-6.3.0/mpn/sparc32/mul_1.asm b/gmp-6.3.0/mpn/sparc32/mul_1.asm
new file mode 100644
index 0000000..42b4168
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/mul_1.asm
@@ -0,0 +1,146 @@
+dnl SPARC mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright 1992-1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ C Make S1_PTR and RES_PTR point at the end of their blocks
+ C and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne,a L(loop0)
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g1,[%o4+%o2]
+
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2 C g2 = S1_LIMB iff S2_LIMB < 0, else 0
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne,a L(loop)
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_mul_1)
diff --git a/gmp-6.3.0/mpn/sparc32/rshift.asm b/gmp-6.3.0/mpn/sparc32/rshift.asm
new file mode 100644
index 0000000..e155476
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/rshift.asm
@@ -0,0 +1,102 @@
+dnl SPARC mpn_rshift -- Shift a number right.
+
+dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr %o0
+C src_ptr %o1
+C size %o2
+C cnt %o3
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ld [%o1],%g2 C load first limb
+ sub %g0,%o3,%o5 C negate shift count
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 C number of limbs in first loop
+ sll %g2,%o5,%g1 C compute function result
+ be L(0) C if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 C adjust count for main loop
+
+L(loop0):
+ ld [%o1+4],%g3
+ add %o0,4,%o0
+ add %o1,4,%o1
+ addcc %g4,-1,%g4
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne L(loop0)
+ st %o4,[%o0-4]
+
+L(0): tst %o2
+ be L(end)
+ nop
+
+L(loop):
+ ld [%o1+4],%g3
+ add %o0,16,%o0
+ addcc %o2,-4,%o2
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+
+ ld [%o1+8],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-16]
+ sll %g2,%o5,%g1
+
+ ld [%o1+12],%g3
+ srl %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0-12]
+ sll %g3,%o5,%g1
+
+ ld [%o1+16],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-8]
+ sll %g2,%o5,%g1
+
+ add %o1,16,%o1
+ or %g4,%g1,%g4
+ bne L(loop)
+ st %g4,[%o0-4]
+
+L(end): srl %g2,%o3,%g2
+ st %g2,[%o0-0]
+ retl
+ ld [%sp+80],%o0
+EPILOGUE(mpn_rshift)
diff --git a/gmp-6.3.0/mpn/sparc32/sparc-defs.m4 b/gmp-6.3.0/mpn/sparc32/sparc-defs.m4
new file mode 100644
index 0000000..fff0ff8
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/sparc-defs.m4
@@ -0,0 +1,97 @@
+divert(-1)
+
+dnl m4 macros for SPARC assembler (32 and 64 bit).
+
+
+dnl Copyright 2002, 2011, 2013, 2017, 2021 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+changecom(;) dnl cannot use default # since that's used in REGISTER decls
+
+
+dnl Usage: REGISTER(reg,attr)
+dnl
+dnl Give a ".register reg,attr" directive, if the assembler supports it.
+dnl HAVE_REGISTER comes from the GMP_ASM_SPARC_REGISTER configure test.
+
+define(REGISTER,
+m4_assert_numargs(2)
+m4_assert_defined(`HAVE_REGISTER')
+`ifelse(HAVE_REGISTER,yes,
+`.register `$1',`$2'')')
+
+
+C Testing mechanism for running newer code on older processors
+ifdef(`FAKE_T3',`
+ include_mpn(`sparc64/ultrasparct3/missing.m4')
+',`
+ define(`addxccc', ``addxccc' $1, $2, $3')
+ define(`addxc', ``addxc' $1, $2, $3')
+ define(`umulxhi', ``umulxhi' $1, $2, $3')
+ define(`lzcnt', ``lzd' $1, $2')
+')
+
+dnl Usage: LEA64(symbol,reg,pic_reg)
+dnl
+dnl Use whatever 64-bit code sequence is appropriate to load "symbol" into
+dnl register "reg", potentially using register "pic_reg" to perform the
+dnl calculations.
+dnl
+dnl Caveat: We used to use the setx pseudo insn here, but some GNU/Linux
+dnl releases causes invalid code or relocs for that.
+dnl
+dnl Optimisation 1: Use thunk call instead of RDPC which causes pipeline
+dnl replay for some sparcs.
+dnl
+dnl Optimisation 2: Do the two symbol building sequences in parallel instead
+dnl of one after the other. That might need one more scratch register.
+
+define(LEA64,
+m4_assert_numargs(3)
+`ifdef(`PIC',`
+ rd %pc, %`$2'
+ sethi %hi(_GLOBAL_OFFSET_TABLE_+4), %`$3'
+ add %`$3', %lo(_GLOBAL_OFFSET_TABLE_+8), %`$3'
+ add %`$2', %`$3', %`$3'
+ifelse(HAVE_GDOP,yes,`
+ sethi %gdop_hix22(`$1'), %`$2'
+ xor %`$2', %gdop_lox10(`$1'), %`$2'
+ ldx [%`$3' + %`$2'], %`$2', %gdop(`$1')
+',`
+ sethi %hi(`$1'), %`$2'
+ or %`$2', %lo(`$1'), %`$2'
+ ldx [%`$3' + %`$2'], %`$2'
+')',`
+ sethi %h44(`$1'), %`$2'
+ or %`$2', %m44(`$1'), %`$2'
+ sllx %`$2', 12, %`$2'
+ or %`$2', %l44(`$1'), %$2
+')')
+
+divert
diff --git a/gmp-6.3.0/mpn/sparc32/sub_n.asm b/gmp-6.3.0/mpn/sparc32/sub_n.asm
new file mode 100644
index 0000000..24a576d
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/sub_n.asm
@@ -0,0 +1,335 @@
+dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
+
+dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(n,%o3)
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(1) C branch if alignment differs
+ nop
+C ** V1a **
+ andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
+ be L(v1) C if no, branch
+ nop
+C Add least significant limb separately to align res_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1): addx %g0,%g0,%o4 C save cy in register
+ cmp n,2 C if n < 2 ...
+ bl L(end2) C ... branch to tail code
+ subcc %g0,%o4,%g0 C restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc n,-10,n
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt L(fin1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1):
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin1):
+ addcc n,8-2,n
+ blt L(end1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1):
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1)
+ subcc %g0,%o4,%g0 C restore cy
+L(end1):
+ subxcc %g4,%g2,%o4
+ subxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+
+ andcc n,1,%g0
+ be L(ret1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+
+L(1): xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(2)
+ nop
+C ** V1b **
+ andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
+ be L(v1b) C if no, branch
+ nop
+C Add least significant limb separately to align res_ptr and s1_ptr
+ ld [s2_ptr],%g4
+ add s2_ptr,4,s2_ptr
+ ld [s1_ptr],%g2
+ add s1_ptr,4,s1_ptr
+ add n,-1,n
+ subcc %g2,%g4,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1b): addx %g0,%g0,%o4 C save cy in register
+ cmp n,2 C if n < 2 ...
+ bl L(end2) C ... branch to tail code
+ subcc %g0,%o4,%g0 C restore cy
+
+ ld [s2_ptr+0],%g4
+ addcc n,-10,n
+ ld [s2_ptr+4],%g1
+ ldd [s1_ptr+0],%g2
+ blt L(fin1b)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1b):
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+16],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+20],%g1
+ ldd [s1_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+24],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+28],%g1
+ ldd [s1_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+32],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+36],%g1
+ ldd [s1_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1b)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin1b):
+ addcc n,8-2,n
+ blt L(end1b)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1b):
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1b)
+ subcc %g0,%o4,%g0 C restore cy
+L(end1b):
+ subxcc %g2,%g4,%o4
+ subxcc %g3,%g1,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+
+ andcc n,1,%g0
+ be L(ret1b)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+ ld [s2_ptr+8],%g4
+ ld [s1_ptr+8],%g2
+ subxcc %g2,%g4,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1b):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+
+C ** V2 **
+C If we come here, the alignment of s1_ptr and res_ptr as well as the
+C alignment of s2_ptr and res_ptr differ. Since there are only two ways
+C things can be aligned (that we care about) we now know that the alignment
+C of s1_ptr and s2_ptr are the same.
+
+L(2): cmp n,1
+ be L(jone)
+ nop
+ andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0
+ be L(v2) C if no, branch
+ nop
+C Add least significant limb separately to align s1_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L(v2): addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ blt L(fin2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop2)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin2):
+ addcc n,8-2,n
+ blt L(end2)
+ subcc %g0,%o4,%g0 C restore cy
+L(loope2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope2)
+ subcc %g0,%o4,%g0 C restore cy
+L(end2):
+ andcc n,1,%g0
+ be L(ret2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+L(jone):
+ ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+L(ret2):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+EPILOGUE(mpn_sub_n)
diff --git a/gmp-6.3.0/mpn/sparc32/submul_1.asm b/gmp-6.3.0/mpn/sparc32/submul_1.asm
new file mode 100644
index 0000000..73f9377
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/submul_1.asm
@@ -0,0 +1,155 @@
+dnl SPARC mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl the result from a second limb vector.
+
+dnl Copyright 1992-1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ C Make S1_PTR and RES_PTR point at the end of their blocks
+ C and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ subcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne L(loop0)
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ subcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne L(loop)
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_submul_1)
diff --git a/gmp-6.3.0/mpn/sparc32/udiv.asm b/gmp-6.3.0/mpn/sparc32/udiv.asm
new file mode 100644
index 0000000..cbc24b1
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/udiv.asm
@@ -0,0 +1,147 @@
+dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+dnl This is for v7 CPUs with a floating-point unit.
+
+dnl Copyright 1993, 1994, 1996, 2000, 2021 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr i0
+C n1 i1
+C n0 i2
+C d i3
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+ save %sp,-104,%sp
+ sethi %hi(0x80000000),%g1
+
+ sethi %hi(0x41e00000),%i4
+ mov 0,%i5
+ std %i4,[%fp-8]
+ ldd [%fp-8],%f12 C 0r2147483648
+ faddd %f12,%f12,%f8 C 0r4294967296
+
+ mov %i0,%i5
+
+ sub %i1,%g1,%l0
+ sub %i2,%g1,%l1
+ std %l0,[%fp-8]
+ ldd [%fp-8],%f10
+
+ fitod %f10,%f4
+ faddd %f4,%f12,%f4
+
+ fitod %f11,%f2
+ faddd %f2,%f12,%f2
+
+ fmuld %f4,%f8,%f6
+
+ sub %i3,%g1,%l2
+ st %l2,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ fitod %f10,%f4
+ faddd %f4,%f12,%f4
+
+ fdivd %f2,%f4,%f2
+ fcmped %f2,%f12
+ nop
+ fbge,a L(1)
+ fsubd %f2,%f12,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L(2)
+ ld [%fp-8],%i4
+L(1):
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ add %i4,%g1,%i4
+L(2):
+ wr %g0,%i4,%y
+ sra %i3,31,%g2
+ and %i4,%g2,%g2
+ andcc %g0,0,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,0,%g1
+ add %g1,%g2,%i0
+ rd %y,%g3
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L(3)
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L(3):
+ blu L(4)
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L(4):
+ st %o7,[%i5]
+ ret
+ restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/gmp-6.3.0/mpn/sparc32/udiv_nfp.asm b/gmp-6.3.0/mpn/sparc32/udiv_nfp.asm
new file mode 100644
index 0000000..ebbb820
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/udiv_nfp.asm
@@ -0,0 +1,202 @@
+dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+dnl This is for v7 CPUs without a floating-point unit.
+
+dnl Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr o0
+C n1 o1
+C n0 o2
+C d o3
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+ tst %o3
+ bneg L(largedivisor)
+ mov 8,%g1
+
+ b L(p1)
+ addxcc %o2,%o2,%o2
+
+L(plop):
+ bcc L(n1)
+ addxcc %o2,%o2,%o2
+L(p1): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n2)
+ addxcc %o2,%o2,%o2
+L(p2): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n3)
+ addxcc %o2,%o2,%o2
+L(p3): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n4)
+ addxcc %o2,%o2,%o2
+L(p4): addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne L(plop)
+ subcc %o1,%o3,%o4
+ bcc L(n5)
+ addxcc %o2,%o2,%o2
+L(p5): st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(nlop):
+ bcc L(p1)
+ addxcc %o2,%o2,%o2
+L(n1): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p2)
+ addxcc %o2,%o2,%o2
+L(n2): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p3)
+ addxcc %o2,%o2,%o2
+L(n3): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p4)
+ addxcc %o2,%o2,%o2
+L(n4): addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne L(nlop)
+ subcc %o4,%o3,%o1
+ bcc L(p5)
+ addxcc %o2,%o2,%o2
+L(n5): st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(largedivisor):
+ and %o2,1,%o5 C %o5 = n0 & 1
+
+ srl %o2,1,%o2
+ sll %o1,31,%g2
+ or %g2,%o2,%o2 C %o2 = lo(n1n0 >> 1)
+ srl %o1,1,%o1 C %o1 = hi(n1n0 >> 1)
+
+ and %o3,1,%g2
+ srl %o3,1,%g3 C %g3 = floor(d / 2)
+ add %g3,%g2,%g3 C %g3 = ceil(d / 2)
+
+ b L(Lp1)
+ addxcc %o2,%o2,%o2
+
+L(Lplop):
+ bcc L(Ln1)
+ addxcc %o2,%o2,%o2
+L(Lp1): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln2)
+ addxcc %o2,%o2,%o2
+L(Lp2): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln3)
+ addxcc %o2,%o2,%o2
+L(Lp3): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln4)
+ addxcc %o2,%o2,%o2
+L(Lp4): addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne L(Lplop)
+ subcc %o1,%g3,%o4
+ bcc L(Ln5)
+ addxcc %o2,%o2,%o2
+L(Lp5): add %o1,%o1,%o1 C << 1
+ tst %g2
+ bne L(oddp)
+ add %o5,%o1,%o1
+ st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(Lnlop):
+ bcc L(Lp1)
+ addxcc %o2,%o2,%o2
+L(Ln1): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp2)
+ addxcc %o2,%o2,%o2
+L(Ln2): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp3)
+ addxcc %o2,%o2,%o2
+L(Ln3): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp4)
+ addxcc %o2,%o2,%o2
+L(Ln4): addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne L(Lnlop)
+ subcc %o4,%g3,%o1
+ bcc L(Lp5)
+ addxcc %o2,%o2,%o2
+L(Ln5): add %o4,%o4,%o4 C << 1
+ tst %g2
+ bne L(oddn)
+ add %o5,%o4,%o4
+ st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(oddp):
+ xnor %g0,%o2,%o2
+ C q' in %o2. r' in %o1
+ addcc %o1,%o2,%o1
+ bcc L(Lp6)
+ addx %o2,0,%o2
+ sub %o1,%o3,%o1
+L(Lp6): subcc %o1,%o3,%g0
+ bcs L(Lp7)
+ subx %o2,-1,%o2
+ sub %o1,%o3,%o1
+L(Lp7): st %o1,[%o0]
+ retl
+ mov %o2,%o0
+
+L(oddn):
+ xnor %g0,%o2,%o2
+ C q' in %o2. r' in %o4
+ addcc %o4,%o2,%o4
+ bcc L(Ln6)
+ addx %o2,0,%o2
+ sub %o4,%o3,%o4
+L(Ln6): subcc %o4,%o3,%g0
+ bcs L(Ln7)
+ subx %o2,-1,%o2
+ sub %o4,%o3,%o4
+L(Ln7): st %o4,[%o0]
+ retl
+ mov %o2,%o0
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/gmp-6.3.0/mpn/sparc32/ultrasparct1/add_n.asm b/gmp-6.3.0/mpn/sparc32/ultrasparct1/add_n.asm
new file mode 100644
index 0000000..c781596
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/ultrasparct1/add_n.asm
@@ -0,0 +1,70 @@
+dnl SPARC T1 32-bit mpn_add_n.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp', %o0)
+define(`ap', %o1)
+define(`bp', %o2)
+define(`n', %o3)
+define(`cy', %o4)
+
+define(`i', %o3)
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc)
+
+ASM_START()
+PROLOGUE(mpn_add_nc)
+ b L(ent)
+ srl cy, 0, cy C strip any bogus high bits
+EPILOGUE()
+
+PROLOGUE(mpn_add_n)
+ mov 0, cy
+L(ent): srl n, 0, n C strip any bogus high bits
+ sll n, 2, n
+ add ap, n, ap
+ add bp, n, bp
+ add rp, n, rp
+ neg n, i
+
+L(top): lduw [ap+i], %g1
+ lduw [bp+i], %g2
+ add %g1, %g2, %g3
+ add %g3, cy, %g3
+ stw %g3, [rp+i]
+ add i, 4, i
+ brnz i, L(top)
+ srlx %g3, 32, cy
+
+ retl
+ mov cy, %o0 C return value
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc32/ultrasparct1/addmul_1.asm b/gmp-6.3.0/mpn/sparc32/ultrasparct1/addmul_1.asm
new file mode 100644
index 0000000..89da186
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/ultrasparct1/addmul_1.asm
@@ -0,0 +1,90 @@
+dnl SPARC T1 32-bit mpn_addmul_1.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2010, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 24
+C UltraSPARC T2: 19
+C UltraSPARC T3: 19
+C UltraSPARC T4: 5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ save %sp, -96, %sp
+ srl n, 0, %o4
+ srl v0, 0, %g1
+ subcc %o4, 1, %o4
+ be L(final_one)
+ clr %o5
+
+L(top): lduw [up+0], %l0
+ lduw [rp+0], %l2
+ lduw [up+4], %l1
+ lduw [rp+4], %l3
+ mulx %l0, %g1, %g3
+ add up, 8, up
+ mulx %l1, %g1, %o3
+ sub %o4, 2, %o4
+ add rp, 8, rp
+ add %l2, %g3, %g3
+ add %o5, %g3, %g3
+ stw %g3, [rp-8]
+ srlx %g3, 32, %o5
+ add %l3, %o3, %o3
+ add %o5, %o3, %o3
+ stw %o3, [rp-4]
+ brgz %o4, L(top)
+ srlx %o3, 32, %o5
+
+ brlz,pt %o4, L(done)
+ nop
+
+L(final_one):
+ lduw [up+0], %l0
+ lduw [rp+0], %l2
+ mulx %l0, %g1, %g3
+ add %l2, %g3, %g3
+ add %o5, %g3, %g3
+ stw %g3, [rp+0]
+ srlx %g3, 32, %o5
+
+L(done):
+ ret
+ restore %o5, 0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc32/ultrasparct1/gmp-mparam.h b/gmp-6.3.0/mpn/sparc32/ultrasparct1/gmp-mparam.h
new file mode 100644
index 0000000..6f9d5a4
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/ultrasparct1/gmp-mparam.h
@@ -0,0 +1,153 @@
+/* UltraSPARC T 32-bit gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 3
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 9
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 10
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 21
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 22
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 35
+
+#define MUL_TOOM22_THRESHOLD 14
+#define MUL_TOOM33_THRESHOLD 98
+#define MUL_TOOM44_THRESHOLD 166
+#define MUL_TOOM6H_THRESHOLD 226
+#define MUL_TOOM8H_THRESHOLD 333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 139
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 98
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 120
+
+#define SQR_BASECASE_THRESHOLD 6
+#define SQR_TOOM2_THRESHOLD 34
+#define SQR_TOOM3_THRESHOLD 110
+#define SQR_TOOM4_THRESHOLD 178
+#define SQR_TOOM6_THRESHOLD 240
+#define SQR_TOOM8_THRESHOLD 333
+
+#define MULMID_TOOM42_THRESHOLD 22
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 13
+
+#define MUL_FFT_MODF_THRESHOLD 280 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 280, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
+ { 9, 5}, { 19, 6}, { 13, 7}, { 7, 6}, \
+ { 17, 7}, { 9, 6}, { 20, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 21, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 33, 8}, { 19, 7}, { 41, 8}, { 23, 7}, \
+ { 49, 8}, { 27, 9}, { 15, 8}, { 31, 7}, \
+ { 63, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
+ { 79, 9}, { 47,10}, { 31, 9}, { 79,10}, \
+ { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255,10}, { 79, 9}, { 159, 8}, { 319,10}, \
+ { 95, 9}, { 191, 8}, { 383,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 143, 9}, { 287,10}, \
+ { 159, 9}, { 319,10}, { 175,11}, { 95,10}, \
+ { 191, 9}, { 383,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 66
+#define MUL_FFT_THRESHOLD 3712
+
+#define SQR_FFT_MODF_THRESHOLD 240 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 240, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
+ { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \
+ { 20, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 25, 9}, \
+ { 7, 8}, { 15, 7}, { 33, 8}, { 19, 7}, \
+ { 39, 8}, { 23, 7}, { 47, 8}, { 27, 9}, \
+ { 15, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 39, 8}, \
+ { 79, 9}, { 47,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 71, 8}, { 143, 9}, { 79,10}, \
+ { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 9}, { 143,10}, { 79, 9}, { 159, 8}, \
+ { 319, 9}, { 175,10}, { 95, 9}, { 191, 8}, \
+ { 383, 9}, { 207,11}, { 63,10}, { 127, 9}, \
+ { 255,10}, { 143, 9}, { 287,10}, { 159, 9}, \
+ { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 70
+#define SQR_FFT_THRESHOLD 2624
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 51
+#define MULLO_MUL_N_THRESHOLD 6633
+
+#define DC_DIV_QR_THRESHOLD 51
+#define DC_DIVAPPR_Q_THRESHOLD 202
+#define DC_BDIV_QR_THRESHOLD 47
+#define DC_BDIV_Q_THRESHOLD 124
+
+#define INV_MULMOD_BNM1_THRESHOLD 26
+#define INV_NEWTON_THRESHOLD 266
+#define INV_APPR_THRESHOLD 222
+
+#define BINV_NEWTON_THRESHOLD 296
+#define REDC_1_TO_REDC_N_THRESHOLD 59
+
+#define MU_DIV_QR_THRESHOLD 1334
+#define MU_DIVAPPR_Q_THRESHOLD 1499
+#define MUPI_DIV_QR_THRESHOLD 116
+#define MU_BDIV_QR_THRESHOLD 1057
+#define MU_BDIV_Q_THRESHOLD 1334
+
+#define POWM_SEC_TABLE 6,35,213,724,2618
+
+#define MATRIX22_STRASSEN_THRESHOLD 15
+#define HGCD_THRESHOLD 84
+#define HGCD_APPR_THRESHOLD 101
+#define HGCD_REDUCE_THRESHOLD 1437
+#define GCD_DC_THRESHOLD 372
+#define GCDEXT_DC_THRESHOLD 253
+#define JACOBI_BASE_METHOD 2
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
+#define SET_STR_DC_THRESHOLD 399
+#define SET_STR_PRECOMPUTE_THRESHOLD 885
+
+#define FAC_DSC_THRESHOLD 179
+#define FAC_ODD_THRESHOLD 29
diff --git a/gmp-6.3.0/mpn/sparc32/ultrasparct1/mul_1.asm b/gmp-6.3.0/mpn/sparc32/ultrasparct1/mul_1.asm
new file mode 100644
index 0000000..0239cd2
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/ultrasparct1/mul_1.asm
@@ -0,0 +1,83 @@
+dnl SPARC T1 32-bit mpn_mul_1.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2010, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 20
+C UltraSPARC T2: 18
+C UltraSPARC T3: 18
+C UltraSPARC T4: 4
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+define(`v0', `%o3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ srl n, 0, n
+ srl v0, 0, v0
+ subcc n, 1, n
+ be L(final_one)
+ clr %o5
+
+L(top): lduw [up+0], %g1
+ lduw [up+4], %g2
+ mulx %g1, v0, %g3
+ add up, 8, up
+ mulx %g2, v0, %o4
+ sub n, 2, n
+ add rp, 8, rp
+ add %o5, %g3, %g3
+ stw %g3, [rp-8]
+ srlx %g3, 32, %o5
+ add %o5, %o4, %o4
+ stw %o4, [rp-4]
+ brgz n, L(top)
+ srlx %o4, 32, %o5
+
+ brlz,pt n, L(done)
+ nop
+
+L(final_one):
+ lduw [up+0], %g1
+ mulx %g1, v0, %g3
+ add %o5, %g3, %g3
+ stw %g3, [rp+0]
+ srlx %g3, 32, %o5
+
+L(done):
+ retl
+ mov %o5, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc32/ultrasparct1/sqr_diagonal.asm b/gmp-6.3.0/mpn/sparc32/ultrasparct1/sqr_diagonal.asm
new file mode 100644
index 0000000..3b906ef
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/ultrasparct1/sqr_diagonal.asm
@@ -0,0 +1,55 @@
+dnl SPARC T1 32-bit mpn_sqr_diagonal.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+ deccc n C n--
+ nop
+
+L(top): lduw [up+0], %g1
+ add up, 4, up C up++
+ mulx %g1, %g1, %g3
+ stw %g3, [rp+0]
+ srlx %g3, 32, %g4
+ stw %g4, [rp+4]
+ add rp, 8, rp C rp += 2
+ bnz %icc, L(top)
+ deccc n C n--
+
+ retl
+ nop
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc32/ultrasparct1/sub_n.asm b/gmp-6.3.0/mpn/sparc32/ultrasparct1/sub_n.asm
new file mode 100644
index 0000000..946bc3f
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/ultrasparct1/sub_n.asm
@@ -0,0 +1,70 @@
+dnl SPARC T1 32-bit mpn_sub_n.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp', %o0)
+define(`ap', %o1)
+define(`bp', %o2)
+define(`n', %o3)
+define(`cy', %o4)
+
+define(`i', %o3)
+
+MULFUNC_PROLOGUE(mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(mpn_sub_nc)
+ b L(ent)
+ srl cy, 0, cy C strip any bogus high bits
+EPILOGUE()
+
+PROLOGUE(mpn_sub_n)
+ mov 0, cy
+L(ent): srl n, 0, n C strip any bogus high bits
+ sll n, 2, n
+ add ap, n, ap
+ add bp, n, bp
+ add rp, n, rp
+ neg n, i
+
+L(top): lduw [ap+i], %g1
+ lduw [bp+i], %g2
+ sub %g1, %g2, %g3
+ sub %g3, cy, %g3
+ stw %g3, [rp+i]
+ add i, 4, i
+ brnz i, L(top)
+ srlx %g3, 63, cy
+
+ retl
+ mov cy, %o0 C return value
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc32/ultrasparct1/submul_1.asm b/gmp-6.3.0/mpn/sparc32/ultrasparct1/submul_1.asm
new file mode 100644
index 0000000..8920070
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/ultrasparct1/submul_1.asm
@@ -0,0 +1,91 @@
+dnl SPARC T1 32-bit mpn_submul_1.
+
+dnl Contributed to the GNU project by David Miller.
+
+dnl Copyright 2010, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 24
+C UltraSPARC T2: 19
+C UltraSPARC T3: 19
+C UltraSPARC T4: 5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ save %sp, -96, %sp
+ srl n, 0, %o4
+ srl v0, 0, %g1
+ subcc %o4, 1, %o4
+ be L(final_one)
+ subcc %g0, 0, %o5
+
+L(top): lduw [up+0], %l0
+ lduw [rp+0], %l2
+ lduw [up+4], %l1
+ lduw [rp+4], %l3
+ mulx %l0, %g1, %g3
+ add up, 8, up
+ mulx %l1, %g1, %o3
+ sub %o4, 2, %o4
+ add rp, 8, rp
+ addx %o5, %g3, %g3
+ srlx %g3, 32, %o5
+ subcc %l2, %g3, %g3
+ stw %g3, [rp-8]
+ addx %o5, %o3, %o3
+ srlx %o3, 32, %o5
+ subcc %l3, %o3, %o3
+ brgz %o4, L(top)
+ stw %o3, [rp-4]
+
+ brlz,pt %o4, L(done)
+ nop
+
+L(final_one):
+ lduw [up+0], %l0
+ lduw [rp+0], %l2
+ mulx %l0, %g1, %g3
+ addx %o5, %g3, %g3
+ srlx %g3, 32, %o5
+ subcc %l2, %g3, %g3
+ stw %g3, [rp+0]
+
+L(done):
+ addx %o5, 0, %o5
+ ret
+ restore %o5, 0, %o0
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/sparc32/umul.asm b/gmp-6.3.0/mpn/sparc32/umul.asm
new file mode 100644
index 0000000..3a20b95
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/umul.asm
@@ -0,0 +1,77 @@
+dnl SPARC mpn_umul_ppmm -- support for longlong.h for non-gcc.
+
+dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+ wr %g0,%o1,%y
+ sra %o2,31,%g2 C Don't move this insn
+ and %o1,%g2,%g2 C Don't move this insn
+ andcc %g0,0,%g1 C Don't move this insn
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,0,%g1
+ rd %y,%g3
+ st %g3,[%o0]
+ retl
+ add %g1,%g2,%o0
+EPILOGUE(mpn_umul_ppmm)
diff --git a/gmp-6.3.0/mpn/sparc32/v8/addmul_1.asm b/gmp-6.3.0/mpn/sparc32/v8/addmul_1.asm
new file mode 100644
index 0000000..0bf1b24
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v8/addmul_1.asm
@@ -0,0 +1,109 @@
+dnl SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl add the result to a second limb vector.
+
+dnl Copyright 1992-1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ ld [%o1+0],%o4
+ andcc %o2,1,%g0
+ be L(bx0)
+ andcc %o2,2,%g0
+L(bx1): be L(01)
+ orcc %g0,%g0,%g2
+L(b11): add %o0,-8,%o0
+ b L(11)
+ add %o1,-8,%o1
+L(bx0): be L(b00)
+ orcc %g0,%g0,%g2
+L(b10): add %o0,-12,%o0
+ b L(10)
+ add %o1,4,%o1
+L(b00): add %o0,-4,%o0
+ b L(00)
+ add %o1,-4,%o1
+
+L(top): addcc %g3,%g2,%g3 C 1
+ ld [%o1+4],%o4 C 2
+ rd %y,%g2 C 1
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] C 1
+L(00): umul %o4,%o3,%g3 C 2
+ ld [%o0+4],%g1 C 2
+ addxcc %g3,%g2,%g3 C 2
+ ld [%o1+8],%o4 C 3
+ rd %y,%g2 C 2
+ addx %g0,%g2,%g2
+ nop
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+4] C 2
+L(11): umul %o4,%o3,%g3 C 3
+ addxcc %g3,%g2,%g3 C 3
+ ld [%o1+12],%o4 C 4
+ rd %y,%g2 C 3
+ add %o1,16,%o1
+ addx %g0,%g2,%g2
+ ld [%o0+8],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+8] C 3
+L(10): umul %o4,%o3,%g3 C 4
+ addxcc %g3,%g2,%g3 C 4
+ ld [%o1+0],%o4 C 1
+ rd %y,%g2 C 4
+ addx %g0,%g2,%g2
+ ld [%o0+12],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+12] C 4
+ add %o0,16,%o0
+ addx %g0,%g2,%g2
+L(01): addcc %o2,-4,%o2
+ bg L(top)
+ umul %o4,%o3,%g3 C 1
+
+ addcc %g3,%g2,%g3 C 4
+ rd %y,%g2 C 4
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] C 4
+
+ retl
+ addx %g0,%g2,%o0
+EPILOGUE(mpn_addmul_1)
diff --git a/gmp-6.3.0/mpn/sparc32/v8/gmp-mparam.h b/gmp-6.3.0/mpn/sparc32/v8/gmp-mparam.h
new file mode 100644
index 0000000..e57897b
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v8/gmp-mparam.h
@@ -0,0 +1,73 @@
+/* SPARC v8 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* Generated by tuneup.c, 2004-02-07, gcc 2.95 */
+
+#define MUL_TOOM22_THRESHOLD 10
+#define MUL_TOOM33_THRESHOLD 65
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 18
+#define SQR_TOOM3_THRESHOLD 65
+
+#define DIV_SB_PREINV_THRESHOLD 5
+#define DIV_DC_THRESHOLD 24
+#define POWM_THRESHOLD 38
+
+#define HGCD_THRESHOLD 69
+#define GCD_ACCEL_THRESHOLD 3
+#define GCD_DC_THRESHOLD 498
+#define JACOBI_BASE_METHOD 2
+
+#define DIVREM_1_NORM_THRESHOLD 6
+#define DIVREM_1_UNNORM_THRESHOLD 11
+#define MOD_1_NORM_THRESHOLD 5
+#define MOD_1_UNNORM_THRESHOLD 9
+#define USE_PREINV_DIVREM_1 1
+#define USE_PREINV_MOD_1 1
+#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define MODEXACT_1_ODD_THRESHOLD 4
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 23
+#define SET_STR_THRESHOLD 1679
+
+#define MUL_FFT_TABLE { 272, 672, 1152, 2560, 10240, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD 264
+#define MUL_FFT_THRESHOLD 1792
+
+#define SQR_FFT_TABLE { 304, 672, 1152, 3584, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD 264
+#define SQR_FFT_THRESHOLD 1728
diff --git a/gmp-6.3.0/mpn/sparc32/v8/mul_1.asm b/gmp-6.3.0/mpn/sparc32/v8/mul_1.asm
new file mode 100644
index 0000000..d03a0e6
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v8/mul_1.asm
@@ -0,0 +1,93 @@
+dnl SPARC v8 mpn_mul_1 -- Multiply a limb vector with a single limb and
+dnl store the product in a second limb vector.
+
+dnl Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ ld [%o1+0],%o4
+ andcc %o2,1,%g0
+ be L(bx0)
+ andcc %o2,2,%g0
+L(bx1): be L(01)
+ orcc %g0,%g0,%g2
+L(b11): add %o0,-8,%o0
+ b L(11)
+ add %o1,-8,%o1
+L(bx0): be L(b00)
+ orcc %g0,%g0,%g2
+L(b10): add %o0,-12,%o0
+ b L(10)
+ add %o1,4,%o1
+L(b00): add %o0,-4,%o0
+ b L(00)
+ add %o1,-4,%o1
+
+L(top): addcc %g3,%g2,%g3 C 1
+ ld [%o1+4],%o4 C 2
+ st %g3,[%o0+0] C 1
+ rd %y,%g2 C 1
+L(00): umul %o4,%o3,%g3 C 2
+ addxcc %g3,%g2,%g3 C 2
+ ld [%o1+8],%o4 C 3
+ st %g3,[%o0+4] C 2
+ rd %y,%g2 C 2
+L(11): umul %o4,%o3,%g3 C 3
+ addxcc %g3,%g2,%g3 C 3
+ ld [%o1+12],%o4 C 4
+ add %o1,16,%o1
+ st %g3,[%o0+8] C 3
+ rd %y,%g2 C 3
+L(10): umul %o4,%o3,%g3 C 4
+ addxcc %g3,%g2,%g3 C 4
+ ld [%o1+0],%o4 C 1
+ st %g3,[%o0+12] C 4
+ add %o0,16,%o0
+ rd %y,%g2 C 4
+ addx %g0,%g2,%g2
+L(01): addcc %o2,-4,%o2
+ bg L(top)
+ umul %o4,%o3,%g3 C 1
+
+ addcc %g3,%g2,%g3 C 4
+ st %g3,[%o0+0] C 4
+ rd %y,%g2 C 4
+
+ retl
+ addx %g0,%g2,%o0
+EPILOGUE(mpn_mul_1)
diff --git a/gmp-6.3.0/mpn/sparc32/v8/submul_1.asm b/gmp-6.3.0/mpn/sparc32/v8/submul_1.asm
new file mode 100644
index 0000000..187314e
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v8/submul_1.asm
@@ -0,0 +1,67 @@
+dnl SPARC v8 mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright 1992-1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ sub %g0,%o2,%o2 C negate ...
+ sll %o2,2,%o2 C ... and scale size
+ sub %o1,%o2,%o1 C o1 is offset s1_ptr
+ sub %o0,%o2,%g1 C g1 is offset res_ptr
+
+ mov 0,%o0 C clear cy_limb
+
+L(loop):
+ ld [%o1+%o2],%o4
+ ld [%g1+%o2],%g2
+ umul %o4,%o3,%o5
+ rd %y,%g3
+ addcc %o5,%o0,%o5
+ addx %g3,0,%o0
+ subcc %g2,%o5,%g2
+ addx %o0,0,%o0
+ st %g2,[%g1+%o2]
+
+ addcc %o2,4,%o2
+ bne L(loop)
+ nop
+
+ retl
+ nop
+EPILOGUE(mpn_submul_1)
diff --git a/gmp-6.3.0/mpn/sparc32/v8/supersparc/gmp-mparam.h b/gmp-6.3.0/mpn/sparc32/v8/supersparc/gmp-mparam.h
new file mode 100644
index 0000000..1ac9239
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v8/supersparc/gmp-mparam.h
@@ -0,0 +1,73 @@
+/* SuperSPARC gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* Generated by tuneup.c, 2004-02-10, gcc 3.3 */
+
+#define MUL_TOOM22_THRESHOLD 14
+#define MUL_TOOM33_THRESHOLD 81
+
+#define SQR_BASECASE_THRESHOLD 5
+#define SQR_TOOM2_THRESHOLD 28
+#define SQR_TOOM3_THRESHOLD 86
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* always */
+#define DIV_DC_THRESHOLD 26
+#define POWM_THRESHOLD 79
+
+#define HGCD_THRESHOLD 97
+#define GCD_ACCEL_THRESHOLD 3
+#define GCD_DC_THRESHOLD 470
+#define JACOBI_BASE_METHOD 2
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 3
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 3
+#define USE_PREINV_DIVREM_1 1
+#define USE_PREINV_MOD_1 1
+#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
+
+#define GET_STR_DC_THRESHOLD 19
+#define GET_STR_PRECOMPUTE_THRESHOLD 34
+#define SET_STR_THRESHOLD 3524
+
+#define MUL_FFT_TABLE { 304, 800, 1408, 3584, 10240, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD 264
+#define MUL_FFT_THRESHOLD 2304
+
+#define SQR_FFT_TABLE { 336, 800, 1408, 3584, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD 280
+#define SQR_FFT_THRESHOLD 2304
diff --git a/gmp-6.3.0/mpn/sparc32/v8/supersparc/udiv.asm b/gmp-6.3.0/mpn/sparc32/v8/supersparc/udiv.asm
new file mode 100644
index 0000000..12f66ce
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v8/supersparc/udiv.asm
@@ -0,0 +1,131 @@
+dnl SuperSPARC mpn_udiv_qrnnd division support, used from longlong.h.
+dnl This is for SuperSPARC only, to compensate for its semi-functional
+dnl udiv instruction.
+
+dnl Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr i0
+C n1 i1
+C n0 i2
+C d i3
+
+ASM_START()
+
+ifdef(`PIC',
+` TEXT
+L(getpc):
+ retl
+ nop')
+
+ TEXT
+ ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(mpn_udiv_qrnnd)
+ save %sp,-104,%sp
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+
+ifdef(`PIC',
+`L(pc): call L(getpc) C put address of this insn in %o7
+ ldd [%o7+L(C0)-L(pc)],%f8',
+` sethi %hi(L(C0)),%o7
+ ldd [%o7+%lo(L(C0))],%f8')
+
+ fitod %f10,%f4
+ cmp %i1,0
+ bge L(248)
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L(248):
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L(249)
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L(249):
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L(250)
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L(250):
+ fdivd %f2,%f4,%f2
+
+ifdef(`PIC',
+` ldd [%o7+L(C1)-L(pc)],%f4',
+` sethi %hi(L(C1)),%o7
+ ldd [%o7+%lo(L(C1))],%f4')
+
+ fcmped %f2,%f4
+ nop
+ fbge,a L(251)
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L(252)
+ ld [%fp-8],%i4
+L(251):
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L(252):
+ umul %i3,%i4,%g3
+ rd %y,%i0
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L(253)
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L(253):
+ blu L(246)
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L(246):
+ st %o7,[%i5]
+ ret
+ restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/gmp-6.3.0/mpn/sparc32/v8/udiv.asm b/gmp-6.3.0/mpn/sparc32/v8/udiv.asm
new file mode 100644
index 0000000..12f66ce
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v8/udiv.asm
@@ -0,0 +1,131 @@
+dnl SuperSPARC mpn_udiv_qrnnd division support, used from longlong.h.
+dnl This is for SuperSPARC only, to compensate for its semi-functional
+dnl udiv instruction.
+
+dnl Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr i0
+C n1 i1
+C n0 i2
+C d i3
+
+ASM_START()
+
+ifdef(`PIC',
+` TEXT
+L(getpc):
+ retl
+ nop')
+
+ TEXT
+ ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(mpn_udiv_qrnnd)
+ save %sp,-104,%sp
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+
+ifdef(`PIC',
+`L(pc): call L(getpc) C put address of this insn in %o7
+ ldd [%o7+L(C0)-L(pc)],%f8',
+` sethi %hi(L(C0)),%o7
+ ldd [%o7+%lo(L(C0))],%f8')
+
+ fitod %f10,%f4
+ cmp %i1,0
+ bge L(248)
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L(248):
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L(249)
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L(249):
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L(250)
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L(250):
+ fdivd %f2,%f4,%f2
+
+ifdef(`PIC',
+` ldd [%o7+L(C1)-L(pc)],%f4',
+` sethi %hi(L(C1)),%o7
+ ldd [%o7+%lo(L(C1))],%f4')
+
+ fcmped %f2,%f4
+ nop
+ fbge,a L(251)
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L(252)
+ ld [%fp-8],%i4
+L(251):
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L(252):
+ umul %i3,%i4,%g3
+ rd %y,%i0
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L(253)
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L(253):
+ blu L(246)
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L(246):
+ st %o7,[%i5]
+ ret
+ restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/gmp-6.3.0/mpn/sparc32/v8/umul.asm b/gmp-6.3.0/mpn/sparc32/v8/umul.asm
new file mode 100644
index 0000000..1a2e84b
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v8/umul.asm
@@ -0,0 +1,40 @@
+dnl SPARC v8 mpn_umul_ppmm -- support for longlong.h for non-gcc.
+
+dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+ umul %o1,%o2,%g2
+ st %g2,[%o0]
+ retl
+ rd %y,%o0
+EPILOGUE(mpn_umul_ppmm)
diff --git a/gmp-6.3.0/mpn/sparc32/v9/README b/gmp-6.3.0/mpn/sparc32/v9/README
new file mode 100644
index 0000000..9b39713
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v9/README
@@ -0,0 +1,4 @@
+Code for SPARC processors implementing version 9 of the SPARC architecture.
+This code is for systems that doesn't preserve the full 64-bit contents of
+integer register at context switch. For other systems (such as Solaris 7 or
+later) use the code in ../../sparc64.
diff --git a/gmp-6.3.0/mpn/sparc32/v9/add_n.asm b/gmp-6.3.0/mpn/sparc32/v9/add_n.asm
new file mode 100644
index 0000000..7bd5974
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v9/add_n.asm
@@ -0,0 +1,129 @@
+dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl sum in a third limb vector.
+
+dnl Copyright 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(rp,%o0)
+define(s1p,%o1)
+define(s2p,%o2)
+define(n,%o3)
+define(cy,%g1)
+
+C This code uses 64-bit operations on `o' and `g' registers. It doesn't
+C require that `o' registers' upper 32 bits are preserved by the operating
+C system, but if they are not, they must be zeroed. That is indeed what
+C happens at least on Slowaris 2.5 and 2.6.
+
+C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
+C about 10 cycles/limb from the Ecache.
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ lduw [s1p+0],%o4
+ lduw [s2p+0],%o5
+ addcc n,-2,n
+ bl,pn %icc,L(end1)
+ lduw [s1p+4],%g2
+ lduw [s2p+4],%g3
+ be,pn %icc,L(end2)
+ mov 0,cy
+
+ .align 16
+L(loop):
+ add %o4,%o5,%g4
+ add rp,8,rp
+ lduw [s1p+8],%o4
+ fitod %f0,%f2
+C ---
+ add cy,%g4,%g4
+ addcc n,-1,n
+ lduw [s2p+8],%o5
+ fitod %f0,%f2
+C ---
+ srlx %g4,32,cy
+ add s2p,8,s2p
+ stw %g4,[rp-8]
+ be,pn %icc,L(exito)+4
+C ---
+ add %g2,%g3,%g4
+ addcc n,-1,n
+ lduw [s1p+12],%g2
+ fitod %f0,%f2
+C ---
+ add cy,%g4,%g4
+ add s1p,8,s1p
+ lduw [s2p+4],%g3
+ fitod %f0,%f2
+C ---
+ srlx %g4,32,cy
+ bne,pt %icc,L(loop)
+ stw %g4,[rp-4]
+C ---
+L(exite):
+ add %o4,%o5,%g4
+ add cy,%g4,%g4
+ srlx %g4,32,cy
+ stw %g4,[rp+0]
+ add %g2,%g3,%g4
+ add cy,%g4,%g4
+ stw %g4,[rp+4]
+ retl
+ srlx %g4,32,%o0
+
+L(exito):
+ add %g2,%g3,%g4
+ add cy,%g4,%g4
+ srlx %g4,32,cy
+ stw %g4,[rp-4]
+ add %o4,%o5,%g4
+ add cy,%g4,%g4
+ stw %g4,[rp+0]
+ retl
+ srlx %g4,32,%o0
+
+L(end1):
+ add %o4,%o5,%g4
+ stw %g4,[rp+0]
+ retl
+ srlx %g4,32,%o0
+
+L(end2):
+ add %o4,%o5,%g4
+ srlx %g4,32,cy
+ stw %g4,[rp+0]
+ add %g2,%g3,%g4
+ add cy,%g4,%g4
+ stw %g4,[rp+4]
+ retl
+ srlx %g4,32,%o0
+EPILOGUE(mpn_add_n)
diff --git a/gmp-6.3.0/mpn/sparc32/v9/addmul_1.asm b/gmp-6.3.0/mpn/sparc32/v9/addmul_1.asm
new file mode 100644
index 0000000..2adf7a8
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v9/addmul_1.asm
@@ -0,0 +1,306 @@
+dnl SPARC v9 32-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Algorithm: We use two floating-point multiplies per limb product, with the
+C invariant v operand split into two 16-bit pieces, and the u operand split
+C into 32-bit pieces. We convert the two 48-bit products and transfer them to
+C the integer unit.
+
+C cycles/limb
+C UltraSPARC 1&2: 6.5
+C UltraSPARC 3: ?
+
+C Possible optimizations:
+C 1. Combine 32-bit memory operations into 64-bit operations. Since we're
+C memory bandwidth limited, this could save 1.5 cycles/limb.
+C 2. Unroll the inner loop. Since we already use alternate temporary areas,
+C it is very straightforward to unroll, using an exit branch midways.
+C Unrolling would allow deeper scheduling which could improve speed for L2
+C cache case.
+C 3. For mpn_mul_1: Use more alternating temp areas. The std'es and ldx'es
+C aren't sufficiently apart-scheduled with just two temp areas.
+C 4. Specialize for particular v values. If its upper 16 bits are zero, we
+C could save many operations.
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+C v i3
+
+define(`FSIZE',224)
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ add %sp, -FSIZE, %sp
+ sethi %hi(0xffff), %g1
+ srl %o3, 16, %g2
+ or %g1, %lo(0xffff), %g1
+ and %o3, %g1, %g1
+ stx %g1, [%sp+104]
+ stx %g2, [%sp+112]
+ ldd [%sp+104], %f6
+ ldd [%sp+112], %f8
+ fxtod %f6, %f6
+ fxtod %f8, %f8
+ ld [%sp+104], %f10 C zero f10
+
+ mov 0, %g3 C cy = 0
+
+define(`fanop', `fitod %f18, %f0') C A quasi nop running in the FA pipe
+
+ add %sp, 160, %o5 C point in scratch area
+ and %o5, -32, %o5 C align at 0 (mod 32) in scratch area
+
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_two_or_more
+ fxtod %f10, %f2
+
+ fmuld %f2, %f8, %f16
+ fmuld %f2, %f6, %f4
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+16]
+ std %f12, [%o5+24]
+ ldx [%o5+16], %g2 C p16
+ ldx [%o5+24], %g1 C p0
+ lduw [%o0], %g5 C read rp[i]
+ b .L1
+ add %o0, -16, %o0
+
+ .align 16
+.L_two_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fmuld %f2, %f8, %f16
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_three_or_more
+ fxtod %f10, %f2
+
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+0]
+ std %f12, [%o5+8]
+ lduw [%o0], %g5 C read rp[i]
+ ldx [%o5+16], %g2 C p16
+ ldx [%o5+24], %g1 C p0
+ b .L2
+ add %o0, -12, %o0
+
+ .align 16
+.L_three_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_four_or_more
+ fxtod %f10, %f2
+
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+ fdtox %f16, %f14
+ ldx [%o5+16], %g2 C p16
+ fdtox %f4, %f12
+ ldx [%o5+24], %g1 C p0
+ std %f14, [%o5+16]
+ std %f12, [%o5+24]
+ lduw [%o0], %g5 C read rp[i]
+ b .L3
+ add %o0, -8, %o0
+
+ .align 16
+.L_four_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_five_or_more
+ fxtod %f10, %f2
+
+ fdtox %f16, %f14
+ ldx [%o5+16], %g2 C p16
+ fdtox %f4, %f12
+ ldx [%o5+24], %g1 C p0
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ lduw [%o0], %g5 C read rp[i]
+ b .L4
+ add %o0, -4, %o0
+
+ .align 16
+.L_five_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+ ldx [%o5+16], %g2 C p16
+ fdtox %f4, %f12
+ ldx [%o5+24], %g1 C p0
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ lduw [%o0], %g5 C read rp[i]
+ bne,pt %icc, .Loop
+ fxtod %f10, %f2
+ b,a .L5
+
+C BEGIN MAIN LOOP
+ .align 16
+C -- 0
+.Loop: nop
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+C -- 1
+ sllx %g2, 16, %g4 C (p16 << 16)
+ add %o0, 4, %o0 C rp++
+ ldx [%o5+0], %g2 C p16
+ fdtox %f4, %f12
+C -- 2
+ nop
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ fanop
+C -- 3
+ nop
+ add %g3, %g4, %g4 C p += cy
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+C -- 4
+ nop
+ add %g5, %g4, %g4 C p += rp[i]
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+C -- 5
+ xor %o5, 16, %o5 C alternate scratch variables
+ add %o1, 4, %o1 C up++
+ stw %g4, [%o0-4]
+ fanop
+C -- 6
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0], %g5 C read rp[i]
+ bne,pt %icc, .Loop
+ fxtod %f10, %f2
+C END MAIN LOOP
+
+.L5: fdtox %f16, %f14
+ sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ fdtox %f4, %f12
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g4, %g3, %g4 C p += cy
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+ add %g5, %g4, %g4 C p += rp[i]
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+ xor %o5, 16, %o5
+ stw %g4, [%o0+0]
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0+4], %g5 C read rp[i]
+
+.L4: fdtox %f16, %f14
+ sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ fdtox %f4, %f12
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g3, %g4, %g4 C p += cy
+ std %f14, [%o5+0]
+ add %g5, %g4, %g4 C p += rp[i]
+ std %f12, [%o5+8]
+ xor %o5, 16, %o5
+ stw %g4, [%o0+4]
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0+8], %g5 C read rp[i]
+
+.L3: sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g3, %g4, %g4 C p += cy
+ add %g5, %g4, %g4 C p += rp[i]
+ xor %o5, 16, %o5
+ stw %g4, [%o0+8]
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0+12], %g5 C read rp[i]
+
+.L2: sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g3, %g4, %g4 C p += cy
+ add %g5, %g4, %g4 C p += rp[i]
+ stw %g4, [%o0+12]
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0+16], %g5 C read rp[i]
+
+.L1: sllx %g2, 16, %g4 C (p16 << 16)
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ add %g3, %g4, %g4 C p += cy
+ add %g5, %g4, %g4 C p += rp[i]
+ stw %g4, [%o0+16]
+ srlx %g4, 32, %g3 C new cy
+
+ mov %g3, %o0
+ retl
+ sub %sp, -FSIZE, %sp
+EPILOGUE(mpn_addmul_1)
diff --git a/gmp-6.3.0/mpn/sparc32/v9/gmp-mparam.h b/gmp-6.3.0/mpn/sparc32/v9/gmp-mparam.h
new file mode 100644
index 0000000..f909e2c
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v9/gmp-mparam.h
@@ -0,0 +1,204 @@
+/* SPARC v9 32-bit gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2002, 2004, 2009-2011, 2014 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1593 MHz ultrasparc3 running Solaris 10 (swift.nada.kth.se) */
+/* FFT tuning limit = 25000000 */
+/* Generated by tuneup.c, 2014-03-16, gcc 3.4 */
+
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 4
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 13
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 12
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 32
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_1N_PI1_METHOD 1
+#define DIV_QR_1_NORM_THRESHOLD 4
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM33_THRESHOLD 43
+#define MUL_TOOM44_THRESHOLD 126
+#define MUL_TOOM6H_THRESHOLD 161
+#define MUL_TOOM8H_THRESHOLD 208
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 80
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 85
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 55
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 72
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 64
+#define SQR_TOOM3_THRESHOLD 85
+#define SQR_TOOM4_THRESHOLD 152
+#define SQR_TOOM6_THRESHOLD 185
+#define SQR_TOOM8_THRESHOLD 324
+
+#define MULMID_TOOM42_THRESHOLD 64
+
+#define MULMOD_BNM1_THRESHOLD 12
+#define SQRMOD_BNM1_THRESHOLD 16
+
+#define MUL_FFT_MODF_THRESHOLD 288 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 288, 5}, { 9, 4}, { 19, 5}, { 11, 6}, \
+ { 6, 5}, { 14, 6}, { 8, 5}, { 17, 6}, \
+ { 9, 5}, { 20, 6}, { 13, 7}, { 7, 6}, \
+ { 16, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 15, 6}, \
+ { 31, 7}, { 19, 8}, { 11, 7}, { 23, 9}, \
+ { 7, 8}, { 15, 7}, { 31, 8}, { 19, 7}, \
+ { 39, 8}, { 27, 9}, { 15, 8}, { 31, 7}, \
+ { 63, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
+ { 79, 9}, { 47,10}, { 31, 9}, { 71, 8}, \
+ { 143, 9}, { 79,10}, { 47, 9}, { 95,11}, \
+ { 31,10}, { 63, 9}, { 135, 8}, { 271, 9}, \
+ { 143, 8}, { 287,10}, { 79, 9}, { 175,10}, \
+ { 95, 9}, { 191, 8}, { 383,10}, { 111,11}, \
+ { 63,10}, { 143, 9}, { 287, 8}, { 575,10}, \
+ { 175,11}, { 95,10}, { 191, 9}, { 415, 8}, \
+ { 831,12}, { 63,11}, { 127,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 351, 9}, { 703,11}, \
+ { 191,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447, 9}, { 895, 8}, { 1791,12}, { 127,11}, \
+ { 287,10}, { 607, 9}, { 1215, 8}, { 2431,11}, \
+ { 319, 9}, { 1279,11}, { 351,12}, { 191,11}, \
+ { 415,10}, { 831,11}, { 447,10}, { 895, 9}, \
+ { 1791,11}, { 479,13}, { 127,12}, { 255,11}, \
+ { 575,10}, { 1151,11}, { 607,12}, { 319,11}, \
+ { 703,12}, { 383,11}, { 831,12}, { 447,11}, \
+ { 895,10}, { 1791,11}, { 959,13}, { 255,12}, \
+ { 575,11}, { 1215,10}, { 2431,12}, { 703,13}, \
+ { 383,12}, { 959,14}, { 255,13}, { 511,12}, \
+ { 1087,11}, { 2175,12}, { 1215,11}, { 2431,13}, \
+ { 639,12}, { 1407,11}, { 2943,13}, { 895,12}, \
+ { 1919,14}, { 511,13}, { 1151,12}, { 2431,13}, \
+ { 1407,14}, { 767,13}, { 1791,15}, { 511,14}, \
+ { 1023,13}, { 2431,14}, { 1279,13}, { 2943,12}, \
+ { 5887,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 143
+#define MUL_FFT_THRESHOLD 2240
+
+#define SQR_FFT_MODF_THRESHOLD 244 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 244, 5}, { 8, 4}, { 17, 5}, { 17, 6}, \
+ { 9, 5}, { 19, 6}, { 17, 7}, { 9, 6}, \
+ { 20, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 25, 9}, \
+ { 7, 8}, { 15, 7}, { 33, 8}, { 19, 7}, \
+ { 39, 8}, { 23, 9}, { 15, 8}, { 39, 9}, \
+ { 23,10}, { 15, 9}, { 31, 8}, { 63, 9}, \
+ { 47,10}, { 31, 9}, { 63, 8}, { 127, 9}, \
+ { 71, 8}, { 143, 7}, { 287, 9}, { 79,10}, \
+ { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 9}, { 143, 8}, { 287,10}, { 79, 9}, \
+ { 159, 8}, { 319, 9}, { 175, 8}, { 351, 7}, \
+ { 703,10}, { 95, 9}, { 191, 8}, { 383, 9}, \
+ { 207, 8}, { 415, 9}, { 223,11}, { 63,10}, \
+ { 127, 9}, { 271,10}, { 143, 9}, { 287, 8}, \
+ { 575,10}, { 159, 9}, { 319,10}, { 175, 9}, \
+ { 351, 8}, { 703,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207, 9}, { 415, 8}, { 831,10}, \
+ { 223,12}, { 63,11}, { 127,10}, { 271, 9}, \
+ { 543,10}, { 287, 9}, { 575,11}, { 159,10}, \
+ { 319, 9}, { 639,10}, { 351, 9}, { 703, 8}, \
+ { 1407,11}, { 191,10}, { 415, 9}, { 831,11}, \
+ { 223,10}, { 447, 9}, { 895,10}, { 479,12}, \
+ { 127,11}, { 255,10}, { 543,11}, { 287,10}, \
+ { 575,11}, { 319,10}, { 639,11}, { 351,10}, \
+ { 703,12}, { 191,11}, { 415,10}, { 831,11}, \
+ { 447,10}, { 895, 9}, { 1791,13}, { 127,12}, \
+ { 255,11}, { 575,12}, { 319,11}, { 703,10}, \
+ { 1407,12}, { 383,11}, { 831,12}, { 447,11}, \
+ { 959,10}, { 1919, 9}, { 3839,13}, { 255,12}, \
+ { 575,11}, { 1151,12}, { 703,11}, { 1407,13}, \
+ { 383,12}, { 959,14}, { 255,13}, { 511,12}, \
+ { 1215,11}, { 2431,13}, { 639,12}, { 1407,13}, \
+ { 767,12}, { 1599,13}, { 895,12}, { 1919,14}, \
+ { 511,13}, { 1151,12}, { 2431,13}, { 1407,12}, \
+ { 2815,14}, { 767,13}, { 1535,12}, { 3071,13}, \
+ { 1919,15}, { 511,14}, { 1023,13}, { 2431,14}, \
+ { 1279,13}, { 2943,12}, { 5887,14}, { 16384,15}, \
+ { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 153
+#define SQR_FFT_THRESHOLD 2112
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 144
+#define MULLO_MUL_N_THRESHOLD 4292
+
+#define DC_DIV_QR_THRESHOLD 74
+#define DC_DIVAPPR_Q_THRESHOLD 406
+#define DC_BDIV_QR_THRESHOLD 63
+#define DC_BDIV_Q_THRESHOLD 363
+
+#define INV_MULMOD_BNM1_THRESHOLD 108
+#define INV_NEWTON_THRESHOLD 351
+#define INV_APPR_THRESHOLD 303
+
+#define BINV_NEWTON_THRESHOLD 354
+#define REDC_1_TO_REDC_N_THRESHOLD 61
+
+#define MU_DIV_QR_THRESHOLD 998
+#define MU_DIVAPPR_Q_THRESHOLD 1099
+#define MUPI_DIV_QR_THRESHOLD 118
+#define MU_BDIV_QR_THRESHOLD 807
+#define MU_BDIV_Q_THRESHOLD 979
+
+#define POWM_SEC_TABLE 3,22,127,624,779,2351
+
+#define MATRIX22_STRASSEN_THRESHOLD 7
+#define HGCD_THRESHOLD 90
+#define HGCD_APPR_THRESHOLD 123
+#define HGCD_REDUCE_THRESHOLD 1494
+#define GCD_DC_THRESHOLD 283
+#define GCDEXT_DC_THRESHOLD 192
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
+#define SET_STR_DC_THRESHOLD 290
+#define SET_STR_PRECOMPUTE_THRESHOLD 634
+
+#define FAC_DSC_THRESHOLD 156
+#define FAC_ODD_THRESHOLD 25
diff --git a/gmp-6.3.0/mpn/sparc32/v9/mul_1.asm b/gmp-6.3.0/mpn/sparc32/v9/mul_1.asm
new file mode 100644
index 0000000..40aeffa
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v9/mul_1.asm
@@ -0,0 +1,287 @@
+dnl SPARC v9 32-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Algorithm: We use two floating-point multiplies per limb product, with the
+C invariant v operand split into two 16-bit pieces, and the u operand split
+C into 32-bit pieces. We convert the two 48-bit products and transfer them to
+C the integer unit.
+
+C cycles/limb
+C UltraSPARC 1&2: 6.5
+C UltraSPARC 3: ?
+
+C Possible optimizations:
+C 1. Combine 32-bit memory operations into 64-bit operations. Since we're
+C memory bandwidth limited, this could save 1.5 cycles/limb.
+C 2. Unroll the inner loop. Since we already use alternate temporary areas,
+C it is very straightforward to unroll, using an exit branch midways.
+C Unrolling would allow deeper scheduling which could improve speed for L2
+C cache case.
+C 3. For mpn_mul_1: Use more alternating temp areas. The std'es and ldx'es
+C aren't sufficiently apart-scheduled with just two temp areas.
+C 4. Specialize for particular v values. If its upper 16 bits are zero, we
+C could save many operations.
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+C v i3
+
+define(`FSIZE',224)
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ add %sp, -FSIZE, %sp
+ sethi %hi(0xffff), %g1
+ srl %o3, 16, %g2
+ or %g1, %lo(0xffff), %g1
+ and %o3, %g1, %g1
+ stx %g1, [%sp+104]
+ stx %g2, [%sp+112]
+ ldd [%sp+104], %f6
+ ldd [%sp+112], %f8
+ fxtod %f6, %f6
+ fxtod %f8, %f8
+ ld [%sp+104], %f10 C zero f10
+
+ mov 0, %g3 C cy = 0
+
+define(`fanop', `fitod %f18, %f0') C A quasi nop running in the FA pipe
+
+ add %sp, 160, %o5 C point in scratch area
+ and %o5, -32, %o5 C align at 0 (mod 32) in scratch area
+
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_two_or_more
+ fxtod %f10, %f2
+
+ fmuld %f2, %f8, %f16
+ fmuld %f2, %f6, %f4
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+16]
+ std %f12, [%o5+24]
+ ldx [%o5+16], %g2 C p16
+ ldx [%o5+24], %g1 C p0
+ b .L1
+ add %o0, -16, %o0
+
+ .align 16
+.L_two_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fmuld %f2, %f8, %f16
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_three_or_more
+ fxtod %f10, %f2
+
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+0]
+ std %f12, [%o5+8]
+ ldx [%o5+16], %g2 C p16
+ ldx [%o5+24], %g1 C p0
+ b .L2
+ add %o0, -12, %o0
+
+ .align 16
+.L_three_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_four_or_more
+ fxtod %f10, %f2
+
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+ fdtox %f16, %f14
+ ldx [%o5+16], %g2 C p16
+ fdtox %f4, %f12
+ ldx [%o5+24], %g1 C p0
+ std %f14, [%o5+16]
+ std %f12, [%o5+24]
+ b .L3
+ add %o0, -8, %o0
+
+ .align 16
+.L_four_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_five_or_more
+ fxtod %f10, %f2
+
+ fdtox %f16, %f14
+ ldx [%o5+16], %g2 C p16
+ fdtox %f4, %f12
+ ldx [%o5+24], %g1 C p0
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ b .L4
+ add %o0, -4, %o0
+
+ .align 16
+.L_five_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+ ldx [%o5+16], %g2 C p16
+ fdtox %f4, %f12
+ ldx [%o5+24], %g1 C p0
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .Loop
+ fxtod %f10, %f2
+ b,a .L5
+
+C BEGIN MAIN LOOP
+ .align 16
+C -- 0
+.Loop: nop
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+C -- 1
+ sllx %g2, 16, %g4 C (p16 << 16)
+ add %o0, 4, %o0 C rp++
+ ldx [%o5+0], %g2 C p16
+ fdtox %f4, %f12
+C -- 2
+ nop
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ fanop
+C -- 3
+ nop
+ add %g3, %g4, %g4 C p += cy
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+C -- 4
+ srlx %g4, 32, %g3 C new cy
+ add %o1, 4, %o1 C up++
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+C -- 5
+ xor %o5, 16, %o5 C alternate scratch variables
+ stw %g4, [%o0-4]
+ bne,pt %icc, .Loop
+ fxtod %f10, %f2
+C END MAIN LOOP
+
+.L5: fdtox %f16, %f14
+ sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ fdtox %f4, %f12
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g4, %g3, %g4 C p += cy
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+ xor %o5, 16, %o5
+ stw %g4, [%o0+0]
+ srlx %g4, 32, %g3 C new cy
+
+.L4: fdtox %f16, %f14
+ sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ fdtox %f4, %f12
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g3, %g4, %g4 C p += cy
+ std %f14, [%o5+0]
+ std %f12, [%o5+8]
+ xor %o5, 16, %o5
+ stw %g4, [%o0+4]
+ srlx %g4, 32, %g3 C new cy
+
+.L3: sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g3, %g4, %g4 C p += cy
+ xor %o5, 16, %o5
+ stw %g4, [%o0+8]
+ srlx %g4, 32, %g3 C new cy
+
+.L2: sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g3, %g4, %g4 C p += cy
+ stw %g4, [%o0+12]
+ srlx %g4, 32, %g3 C new cy
+
+.L1: sllx %g2, 16, %g4 C (p16 << 16)
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ add %g3, %g4, %g4 C p += cy
+ stw %g4, [%o0+16]
+ srlx %g4, 32, %g3 C new cy
+
+ mov %g3, %o0
+ retl
+ sub %sp, -FSIZE, %sp
+EPILOGUE(mpn_mul_1)
diff --git a/gmp-6.3.0/mpn/sparc32/v9/sqr_diagonal.asm b/gmp-6.3.0/mpn/sparc32/v9/sqr_diagonal.asm
new file mode 100644
index 0000000..e024279
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v9/sqr_diagonal.asm
@@ -0,0 +1,462 @@
+dnl SPARC v9 32-bit mpn_sqr_diagonal.
+
+dnl Copyright 2001, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+
+C This code uses a very deep software pipeline, due to the need for moving data
+C forth and back between the integer registers and floating-point registers.
+C
+C A VIS variant of this code would make the pipeline less deep, since the
+C masking now done in the integer unit could take place in the floating-point
+C unit using the FAND instruction. It would be possible to save several cycles
+C too.
+C
+C On UltraSPARC 1 and 2, this code runs at 11 cycles/limb from the Dcache and
+C not much slower from the Ecache. It would perhaps be possible to shave off
+C one cycle, but not easily. We cannot do better than 10 cycles/limb with the
+C used instructions, since we have 10 memory operations per limb. But a VIS
+C variant could run three cycles faster than the corresponding non-VIS code.
+
+C This is non-pipelined code showing the algorithm:
+C
+C .Loop:
+C lduw [up+0],%g4 C 00000000hhhhllll
+C sllx %g4,16,%g3 C 0000hhhhllll0000
+C or %g3,%g4,%g2 C 0000hhhhXXXXllll
+C andn %g2,%g5,%g2 C 0000hhhh0000llll
+C stx %g2,[%fp+80]
+C ldd [%fp+80],%f0
+C fitod %f0,%f4 C hi16
+C fitod %f1,%f6 C lo16
+C ld [up+0],%f9
+C fxtod %f8,%f2
+C fmuld %f2,%f4,%f4
+C fmuld %f2,%f6,%f6
+C fdtox %f4,%f4
+C fdtox %f6,%f6
+C std %f4,[%fp-24]
+C std %f6,[%fp-16]
+C ldx [%fp-24],%g2
+C ldx [%fp-16],%g1
+C sllx %g2,16,%g2
+C add %g2,%g1,%g1
+C stw %g1,[rp+0]
+C srlx %g1,32,%l0
+C stw %l0,[rp+4]
+C add up,4,up
+C subcc n,1,n
+C bne,pt %icc,.Loop
+C add rp,8,rp
+
+define(`fanop',`fitod %f12,%f10') dnl A quasi nop running in the FA pipe
+
+ASM_START()
+
+ TEXT
+ ALIGN(4)
+.Lnoll:
+ .word 0
+
+PROLOGUE(mpn_sqr_diagonal)
+ save %sp,-256,%sp
+
+ifdef(`PIC',
+`.Lpc: rd %pc,%o7
+ ld [%o7+.Lnoll-.Lpc],%f8',
+` sethi %hi(.Lnoll),%g1
+ ld [%g1+%lo(.Lnoll)],%f8')
+
+ sethi %hi(0xffff0000),%g5
+ add %i1,-8,%i1
+
+ lduw [%i1+8],%g4
+ add %i1,4,%i1 C s1_ptr++
+ sllx %g4,16,%g3 C 0000hhhhllll0000
+ or %g3,%g4,%g2 C 0000hhhhXXXXllll
+ subcc %i2,1,%i2
+ bne,pt %icc,.L_grt_1
+ andn %g2,%g5,%g2 C 0000hhhh0000llll
+
+ add %i1,4,%i1 C s1_ptr++
+ stx %g2,[%fp+80]
+ ld [%i1],%f9
+ ldd [%fp+80],%f0
+ fxtod %f8,%f2
+ fitod %f0,%f4
+ fitod %f1,%f6
+ fmuld %f2,%f4,%f4
+ fmuld %f2,%f6,%f6
+ fdtox %f4,%f4
+ fdtox %f6,%f6
+ std %f4,[%fp-24]
+ std %f6,[%fp-16]
+
+ add %fp, 80, %l3
+ add %fp, -24, %l4
+ add %fp, 72, %l5
+ b .L1
+ add %fp, -40, %l6
+
+.L_grt_1:
+ stx %g2,[%fp+80]
+ lduw [%i1+8],%g4
+ add %i1,4,%i1 C s1_ptr++
+ sllx %g4,16,%g3 C 0000hhhhllll0000
+ or %g3,%g4,%g2 C 0000hhhhXXXXllll
+ subcc %i2,1,%i2
+ bne,pt %icc,.L_grt_2
+ andn %g2,%g5,%g2 C 0000hhhh0000llll
+
+ stx %g2,[%fp+72]
+ ld [%i1],%f9
+ add %i1,4,%i1 C s1_ptr++
+ ldd [%fp+80],%f0
+ fxtod %f8,%f2
+ fitod %f0,%f4
+ fitod %f1,%f6
+ fmuld %f2,%f4,%f4
+ ld [%i1],%f9
+ fmuld %f2,%f6,%f6
+ ldd [%fp+72],%f0
+ fdtox %f4,%f4
+ fdtox %f6,%f6
+ std %f4,[%fp-24]
+ fxtod %f8,%f2
+ std %f6,[%fp-16]
+ fitod %f0,%f4
+ fitod %f1,%f6
+ fmuld %f2,%f4,%f4
+ fmuld %f2,%f6,%f6
+ fdtox %f4,%f4
+
+ add %fp, 72, %l3
+ add %fp, -40, %l4
+ add %fp, 80, %l5
+ b .L2
+ add %fp, -24, %l6
+
+.L_grt_2:
+ stx %g2,[%fp+72]
+ lduw [%i1+8],%g4
+ ld [%i1],%f9
+ add %i1,4,%i1 C s1_ptr++
+ ldd [%fp+80],%f0
+ sllx %g4,16,%g3 C 0000hhhhllll0000
+ or %g3,%g4,%g2 C 0000hhhhXXXXllll
+ subcc %i2,1,%i2
+ fxtod %f8,%f2
+ bne,pt %icc,.L_grt_3
+ andn %g2,%g5,%g2 C 0000hhhh0000llll
+
+ stx %g2,[%fp+80]
+ fitod %f0,%f4
+ fitod %f1,%f6
+ fmuld %f2,%f4,%f4
+ ld [%i1],%f9
+ fmuld %f2,%f6,%f6
+ add %i1,4,%i1 C s1_ptr++
+ ldd [%fp+72],%f0
+ fdtox %f4,%f4
+ fdtox %f6,%f6
+ std %f4,[%fp-24]
+ fxtod %f8,%f2
+ std %f6,[%fp-16]
+ fitod %f0,%f4
+ fitod %f1,%f6
+ fmuld %f2,%f4,%f4
+ ld [%i1],%f9
+ add %fp, 80, %l3
+ fmuld %f2,%f6,%f6
+ add %fp, -24, %l4
+ ldd [%fp+80],%f0
+ add %fp, 72, %l5
+ fdtox %f4,%f4
+ b .L3
+ add %fp, -40, %l6
+
+.L_grt_3:
+ stx %g2,[%fp+80]
+ fitod %f0,%f4
+ lduw [%i1+8],%g4
+ fitod %f1,%f6
+ fmuld %f2,%f4,%f4
+ ld [%i1],%f9
+ fmuld %f2,%f6,%f6
+ add %i1,4,%i1 C s1_ptr++
+ ldd [%fp+72],%f0
+ fdtox %f4,%f4
+ sllx %g4,16,%g3 C 0000hhhhllll0000
+ fdtox %f6,%f6
+ or %g3,%g4,%g2 C 0000hhhhXXXXllll
+ subcc %i2,1,%i2
+ std %f4,[%fp-24]
+ fxtod %f8,%f2
+ std %f6,[%fp-16]
+ bne,pt %icc,.L_grt_4
+ andn %g2,%g5,%g2 C 0000hhhh0000llll
+
+ stx %g2,[%fp+72]
+ fitod %f0,%f4
+ fitod %f1,%f6
+ add %fp, 72, %l3
+ fmuld %f2,%f4,%f4
+ add %fp, -40, %l4
+ ld [%i1],%f9
+ fmuld %f2,%f6,%f6
+ add %i1,4,%i1 C s1_ptr++
+ ldd [%fp+80],%f0
+ add %fp, 80, %l5
+ fdtox %f4,%f4
+ b .L4
+ add %fp, -24, %l6
+
+.L_grt_4:
+ stx %g2,[%fp+72]
+ fitod %f0,%f4
+ lduw [%i1+8],%g4
+ fitod %f1,%f6
+ fmuld %f2,%f4,%f4
+ ld [%i1],%f9
+ fmuld %f2,%f6,%f6
+ add %i1,4,%i1 C s1_ptr++
+ ldd [%fp+80],%f0
+ fdtox %f4,%f4
+ sllx %g4,16,%g3 C 0000hhhhllll0000
+ fdtox %f6,%f6
+ or %g3,%g4,%g2 C 0000hhhhXXXXllll
+ subcc %i2,1,%i2
+ std %f4,[%fp-40]
+ fxtod %f8,%f2
+ std %f6,[%fp-32]
+ be,pn %icc,.L5
+ andn %g2,%g5,%g2 C 0000hhhh0000llll
+
+ b,a .Loop
+
+ .align 16
+C --- LOOP BEGIN
+.Loop: nop
+ nop
+ stx %g2,[%fp+80]
+ fitod %f0,%f4
+C ---
+ nop
+ nop
+ lduw [%i1+8],%g4
+ fitod %f1,%f6
+C ---
+ nop
+ nop
+ ldx [%fp-24],%g2 C p16
+ fanop
+C ---
+ nop
+ nop
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f4,%f4
+C ---
+ sllx %g2,16,%g2 C align p16
+ add %i0,8,%i0 C res_ptr++
+ ld [%i1],%f9
+ fmuld %f2,%f6,%f6
+C ---
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i1,4,%i1 C s1_ptr++
+ ldd [%fp+72],%f0
+ fanop
+C ---
+ srlx %g1,32,%l0
+ nop
+ stw %g1,[%i0-8]
+ fdtox %f4,%f4
+C ---
+ sllx %g4,16,%g3 C 0000hhhhllll0000
+ nop
+ stw %l0,[%i0-4]
+ fdtox %f6,%f6
+C ---
+ or %g3,%g4,%g2 C 0000hhhhXXXXllll
+ subcc %i2,1,%i2
+ std %f4,[%fp-24]
+ fxtod %f8,%f2
+C ---
+ std %f6,[%fp-16]
+ andn %g2,%g5,%g2 C 0000hhhh0000llll
+ be,pn %icc,.Lend
+ fanop
+C --- LOOP MIDDLE
+ nop
+ nop
+ stx %g2,[%fp+72]
+ fitod %f0,%f4
+C ---
+ nop
+ nop
+ lduw [%i1+8],%g4
+ fitod %f1,%f6
+C ---
+ nop
+ nop
+ ldx [%fp-40],%g2 C p16
+ fanop
+C ---
+ nop
+ nop
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f4,%f4
+C ---
+ sllx %g2,16,%g2 C align p16
+ add %i0,8,%i0 C res_ptr++
+ ld [%i1],%f9
+ fmuld %f2,%f6,%f6
+C ---
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i1,4,%i1 C s1_ptr++
+ ldd [%fp+80],%f0
+ fanop
+C ---
+ srlx %g1,32,%l0
+ nop
+ stw %g1,[%i0-8]
+ fdtox %f4,%f4
+C ---
+ sllx %g4,16,%g3 C 0000hhhhllll0000
+ nop
+ stw %l0,[%i0-4]
+ fdtox %f6,%f6
+C ---
+ or %g3,%g4,%g2 C 0000hhhhXXXXllll
+ subcc %i2,1,%i2
+ std %f4,[%fp-40]
+ fxtod %f8,%f2
+C ---
+ std %f6,[%fp-32]
+ andn %g2,%g5,%g2 C 0000hhhh0000llll
+ bne,pt %icc,.Loop
+ fanop
+C --- LOOP END
+
+.L5: add %fp, 80, %l3
+ add %fp, -24, %l4
+ add %fp, 72, %l5
+ b .Ltail
+ add %fp, -40, %l6
+
+.Lend: add %fp, 72, %l3
+ add %fp, -40, %l4
+ add %fp, 80, %l5
+ add %fp, -24, %l6
+.Ltail: stx %g2,[%l3]
+ fitod %f0,%f4
+ fitod %f1,%f6
+ ldx [%l4],%g2 C p16
+ ldx [%l4+8],%g1 C p0
+ fmuld %f2,%f4,%f4
+ sllx %g2,16,%g2 C align p16
+ add %i0,8,%i0 C res_ptr++
+ ld [%i1],%f9
+ fmuld %f2,%f6,%f6
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i1,4,%i1 C s1_ptr++
+ ldd [%l5],%f0
+ srlx %g1,32,%l0
+ stw %g1,[%i0-8]
+ fdtox %f4,%f4
+ stw %l0,[%i0-4]
+.L4: fdtox %f6,%f6
+ std %f4,[%l4]
+ fxtod %f8,%f2
+ std %f6,[%l4+8]
+
+ fitod %f0,%f4
+ fitod %f1,%f6
+ ldx [%l6],%g2 C p16
+ ldx [%l6+8],%g1 C p0
+ fmuld %f2,%f4,%f4
+ sllx %g2,16,%g2 C align p16
+ add %i0,8,%i0 C res_ptr++
+ ld [%i1],%f9
+ fmuld %f2,%f6,%f6
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ ldd [%l3],%f0
+ srlx %g1,32,%l0
+ stw %g1,[%i0-8]
+ fdtox %f4,%f4
+ stw %l0,[%i0-4]
+.L3: fdtox %f6,%f6
+ std %f4,[%l6]
+ fxtod %f8,%f2
+ std %f6,[%l6+8]
+
+ fitod %f0,%f4
+ fitod %f1,%f6
+ ldx [%l4],%g2 C p16
+ ldx [%l4+8],%g1 C p0
+ fmuld %f2,%f4,%f4
+ sllx %g2,16,%g2 C align p16
+ add %i0,8,%i0 C res_ptr++
+ fmuld %f2,%f6,%f6
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ srlx %g1,32,%l0
+ stw %g1,[%i0-8]
+ fdtox %f4,%f4
+ stw %l0,[%i0-4]
+.L2: fdtox %f6,%f6
+ std %f4,[%l4]
+ std %f6,[%l4+8]
+
+ ldx [%l6],%g2 C p16
+ ldx [%l6+8],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ add %i0,8,%i0 C res_ptr++
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ srlx %g1,32,%l0
+ stw %g1,[%i0-8]
+ stw %l0,[%i0-4]
+
+.L1: ldx [%l4],%g2 C p16
+ ldx [%l4+8],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ add %i0,8,%i0 C res_ptr++
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ srlx %g1,32,%l0
+ stw %g1,[%i0-8]
+ stw %l0,[%i0-4]
+
+ ret
+ restore %g0,%g0,%o0
+
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/gmp-6.3.0/mpn/sparc32/v9/sub_n.asm b/gmp-6.3.0/mpn/sparc32/v9/sub_n.asm
new file mode 100644
index 0000000..636c73b
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v9/sub_n.asm
@@ -0,0 +1,129 @@
+dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
+
+dnl Copyright 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(rp,%o0)
+define(s1p,%o1)
+define(s2p,%o2)
+define(n,%o3)
+define(cy,%g1)
+
+C This code uses 64-bit operations on `o' and `g' registers. It doesn't
+C require that `o' registers' upper 32 bits are preserved by the operating
+C system, but if they are not, they must be zeroed. That is indeed what
+C happens at least on Slowaris 2.5 and 2.6.
+
+C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
+C about 10 cycles/limb from the Ecache.
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ lduw [s1p+0],%o4
+ lduw [s2p+0],%o5
+ addcc n,-2,n
+ bl,pn %icc,L(end1)
+ lduw [s1p+4],%g2
+ lduw [s2p+4],%g3
+ be,pn %icc,L(end2)
+ mov 0,cy
+
+ .align 16
+L(loop):
+ sub %o4,%o5,%g4
+ add rp,8,rp
+ lduw [s1p+8],%o4
+ fitod %f0,%f2
+C ---
+ sub %g4,cy,%g4
+ addcc n,-1,n
+ lduw [s2p+8],%o5
+ fitod %f0,%f2
+C ---
+ srlx %g4,63,cy
+ add s2p,8,s2p
+ stw %g4,[rp-8]
+ be,pn %icc,L(exito)+4
+C ---
+ sub %g2,%g3,%g4
+ addcc n,-1,n
+ lduw [s1p+12],%g2
+ fitod %f0,%f2
+C ---
+ sub %g4,cy,%g4
+ add s1p,8,s1p
+ lduw [s2p+4],%g3
+ fitod %f0,%f2
+C ---
+ srlx %g4,63,cy
+ bne,pt %icc,L(loop)
+ stw %g4,[rp-4]
+C ---
+L(exite):
+ sub %o4,%o5,%g4
+ sub %g4,cy,%g4
+ srlx %g4,63,cy
+ stw %g4,[rp+0]
+ sub %g2,%g3,%g4
+ sub %g4,cy,%g4
+ stw %g4,[rp+4]
+ retl
+ srlx %g4,63,%o0
+
+L(exito):
+ sub %g2,%g3,%g4
+ sub %g4,cy,%g4
+ srlx %g4,63,cy
+ stw %g4,[rp-4]
+ sub %o4,%o5,%g4
+ sub %g4,cy,%g4
+ stw %g4,[rp+0]
+ retl
+ srlx %g4,63,%o0
+
+L(end1):
+ sub %o4,%o5,%g4
+ stw %g4,[rp+0]
+ retl
+ srlx %g4,63,%o0
+
+L(end2):
+ sub %o4,%o5,%g4
+ srlx %g4,63,cy
+ stw %g4,[rp+0]
+ sub %g2,%g3,%g4
+ sub %g4,cy,%g4
+ stw %g4,[rp+4]
+ retl
+ srlx %g4,63,%o0
+EPILOGUE(mpn_sub_n)
diff --git a/gmp-6.3.0/mpn/sparc32/v9/submul_1.asm b/gmp-6.3.0/mpn/sparc32/v9/submul_1.asm
new file mode 100644
index 0000000..92d0ce7
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v9/submul_1.asm
@@ -0,0 +1,316 @@
+dnl SPARC v9 32-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Algorithm: We use two floating-point multiplies per limb product, with the
+C invariant v operand split into two 16-bit pieces, and the u operand split
+C into 32-bit pieces. We convert the two 48-bit products and transfer them to
+C the integer unit.
+
+C cycles/limb
+C UltraSPARC 1&2: 6.5
+C UltraSPARC 3: ?
+
+C Possible optimizations:
+C 1. Combine 32-bit memory operations into 64-bit operations. Since we're
+C memory bandwidth limited, this could save 1.5 cycles/limb.
+C 2. Unroll the inner loop. Since we already use alternate temporary areas,
+C it is very straightforward to unroll, using an exit branch midways.
+C Unrolling would allow deeper scheduling which could improve speed for L2
+C cache case.
+C 3. For mpn_mul_1: Use more alternating temp areas. The std'es and ldx'es
+C aren't sufficiently apart-scheduled with just two temp areas.
+C 4. Specialize for particular v values. If its upper 16 bits are zero, we
+C could save many operations.
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+C v i3
+
+define(`FSIZE',224)
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ add %sp, -FSIZE, %sp
+ sethi %hi(0xffff), %g1
+ srl %o3, 16, %g2
+ or %g1, %lo(0xffff), %g1
+ and %o3, %g1, %g1
+ stx %g1, [%sp+104]
+ stx %g2, [%sp+112]
+ ldd [%sp+104], %f6
+ ldd [%sp+112], %f8
+ fxtod %f6, %f6
+ fxtod %f8, %f8
+ ld [%sp+104], %f10 C zero f10
+
+ mov 0, %g3 C cy = 0
+
+define(`fanop', `fitod %f18, %f0') C A quasi nop running in the FA pipe
+
+ add %sp, 160, %o5 C point in scratch area
+ and %o5, -32, %o5 C align at 0 (mod 32) in scratch area
+
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_two_or_more
+ fxtod %f10, %f2
+
+ fmuld %f2, %f8, %f16
+ fmuld %f2, %f6, %f4
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+16]
+ std %f12, [%o5+24]
+ ldx [%o5+16], %g2 C p16
+ ldx [%o5+24], %g1 C p0
+ lduw [%o0], %g5 C read rp[i]
+ b .L1
+ add %o0, -16, %o0
+
+ .align 16
+.L_two_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fmuld %f2, %f8, %f16
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_three_or_more
+ fxtod %f10, %f2
+
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+0]
+ std %f12, [%o5+8]
+ lduw [%o0], %g5 C read rp[i]
+ ldx [%o5+16], %g2 C p16
+ ldx [%o5+24], %g1 C p0
+ b .L2
+ add %o0, -12, %o0
+
+ .align 16
+.L_three_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_four_or_more
+ fxtod %f10, %f2
+
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+ fdtox %f16, %f14
+ ldx [%o5+16], %g2 C p16
+ fdtox %f4, %f12
+ ldx [%o5+24], %g1 C p0
+ std %f14, [%o5+16]
+ std %f12, [%o5+24]
+ lduw [%o0], %g5 C read rp[i]
+ b .L3
+ add %o0, -8, %o0
+
+ .align 16
+.L_four_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+ fdtox %f4, %f12
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ bne,pt %icc, .L_five_or_more
+ fxtod %f10, %f2
+
+ fdtox %f16, %f14
+ ldx [%o5+16], %g2 C p16
+ fdtox %f4, %f12
+ ldx [%o5+24], %g1 C p0
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ lduw [%o0], %g5 C read rp[i]
+ b .L4
+ add %o0, -4, %o0
+
+ .align 16
+.L_five_or_more:
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+ ldx [%o5+16], %g2 C p16
+ fdtox %f4, %f12
+ ldx [%o5+24], %g1 C p0
+ std %f14, [%o5+16]
+ fmuld %f2, %f8, %f16
+ std %f12, [%o5+24]
+ fmuld %f2, %f6, %f4
+ add %o1, 4, %o1 C up++
+ lduw [%o0], %g5 C read rp[i]
+ bne,pt %icc, .Loop
+ fxtod %f10, %f2
+ b,a .L5
+
+C BEGIN MAIN LOOP
+ .align 16
+C -- 0
+.Loop: sub %g0, %g3, %g3
+ subcc %o2, 1, %o2
+ ld [%o1], %f11 C read up[i]
+ fdtox %f16, %f14
+C -- 1
+ sllx %g2, 16, %g4 C (p16 << 16)
+ add %o0, 4, %o0 C rp++
+ ldx [%o5+0], %g2 C p16
+ fdtox %f4, %f12
+C -- 2
+ srl %g3, 0, %g3 C zero most significant 32 bits
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ fanop
+C -- 3
+ nop
+ add %g3, %g4, %g4 C p += cy
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+C -- 4
+ nop
+ sub %g5, %g4, %g4 C p += rp[i]
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+C -- 5
+ xor %o5, 16, %o5 C alternate scratch variables
+ add %o1, 4, %o1 C up++
+ stw %g4, [%o0-4]
+ fanop
+C -- 6
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0], %g5 C read rp[i]
+ bne,pt %icc, .Loop
+ fxtod %f10, %f2
+C END MAIN LOOP
+
+.L5: sub %g0, %g3, %g3
+ fdtox %f16, %f14
+ sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ fdtox %f4, %f12
+ srl %g3, 0, %g3 C zero most significant 32 bits
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g4, %g3, %g4 C p += cy
+ std %f14, [%o5+0]
+ fmuld %f2, %f8, %f16
+ sub %g5, %g4, %g4 C p += rp[i]
+ std %f12, [%o5+8]
+ fmuld %f2, %f6, %f4
+ xor %o5, 16, %o5
+ stw %g4, [%o0+0]
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0+4], %g5 C read rp[i]
+
+ sub %g0, %g3, %g3
+.L4: fdtox %f16, %f14
+ sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ fdtox %f4, %f12
+ srl %g3, 0, %g3 C zero most significant 32 bits
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g3, %g4, %g4 C p += cy
+ std %f14, [%o5+0]
+ sub %g5, %g4, %g4 C p += rp[i]
+ std %f12, [%o5+8]
+ xor %o5, 16, %o5
+ stw %g4, [%o0+4]
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0+8], %g5 C read rp[i]
+
+ sub %g0, %g3, %g3
+.L3: sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ srl %g3, 0, %g3 C zero most significant 32 bits
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g3, %g4, %g4 C p += cy
+ sub %g5, %g4, %g4 C p += rp[i]
+ xor %o5, 16, %o5
+ stw %g4, [%o0+8]
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0+12], %g5 C read rp[i]
+
+ sub %g0, %g3, %g3
+.L2: sllx %g2, 16, %g4 C (p16 << 16)
+ ldx [%o5+0], %g2 C p16
+ srl %g3, 0, %g3 C zero most significant 32 bits
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ ldx [%o5+8], %g1 C p0
+ add %g3, %g4, %g4 C p += cy
+ sub %g5, %g4, %g4 C p += rp[i]
+ stw %g4, [%o0+12]
+ srlx %g4, 32, %g3 C new cy
+ lduw [%o0+16], %g5 C read rp[i]
+
+ sub %g0, %g3, %g3
+.L1: sllx %g2, 16, %g4 C (p16 << 16)
+ srl %g3, 0, %g3 C zero most significant 32 bits
+ add %g1, %g4, %g4 C p = p0 + (p16 << 16)
+ add %g3, %g4, %g4 C p += cy
+ sub %g5, %g4, %g4 C p += rp[i]
+ stw %g4, [%o0+16]
+ srlx %g4, 32, %g3 C new cy
+
+ sub %g0, %g3, %o0
+ retl
+ sub %sp, -FSIZE, %sp
+EPILOGUE(mpn_submul_1)
diff --git a/gmp-6.3.0/mpn/sparc32/v9/udiv.asm b/gmp-6.3.0/mpn/sparc32/v9/udiv.asm
new file mode 100644
index 0000000..61dde97
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc32/v9/udiv.asm
@@ -0,0 +1,52 @@
+dnl SPARC v9 32-bit mpn_udiv_qrnnd - division support for longlong.h.
+
+dnl Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr o0
+C n1 o1
+C n0 o2
+C d o3
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+ sllx %o1, 32, %g1 C shift upper dividend limb
+ srl %o2, 0, %g2 C zero extend lower dividend limb
+ srl %o3, 0, %g3 C zero extend divisor
+ or %g2, %g1, %g1 C assemble 64-bit dividend
+ udivx %g1, %g3, %g1
+ mulx %g1, %g3, %g4
+ sub %g2, %g4, %g2
+ st %g2, [%o0] C store remainder
+ retl
+ mov %g1, %o0 C return quotient
+EPILOGUE(mpn_udiv_qrnnd)