aboutsummaryrefslogtreecommitdiff
path: root/gmp-6.3.0/mpn/pa32/hppa1_1
diff options
context:
space:
mode:
Diffstat (limited to 'gmp-6.3.0/mpn/pa32/hppa1_1')
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/addmul_1.asm106
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/gmp-mparam.h72
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/mul_1.asm102
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/add_n.asm83
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/addmul_1.asm201
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/lshift.asm95
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/rshift.asm92
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/sub_n.asm84
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/submul_1.asm207
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/sqr_diagonal.asm60
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/submul_1.asm115
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/udiv.asm102
-rw-r--r--gmp-6.3.0/mpn/pa32/hppa1_1/umul.asm47
13 files changed, 1366 insertions, 0 deletions
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/addmul_1.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/addmul_1.asm
new file mode 100644
index 0000000..ec2f219
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/addmul_1.asm
@@ -0,0 +1,106 @@
+dnl HP-PA 1.1 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl result to a second limb vector.
+
+dnl Copyright 1992-1994, 2000-2002 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr r26
+C s1_ptr r25
+C size r24
+C s2_limb r23
+
+C This runs at 11 cycles/limb on a PA7000. With the used instructions, it can
+C not become faster due to data cache contention after a store. On the PA7100
+C it runs at 10 cycles/limb.
+
+C There are some ideas described in mul_1.asm that applies to this code too.
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+C .callinfo frame=64,no_calls
+
+ ldo 64(%r30),%r30
+ fldws,ma 4(%r25),%fr5
+ stw %r23,-16(%r30) C move s2_limb ...
+ addib,= -1,%r24,L(just_one_limb)
+ fldws -16(%r30),%fr4 C ... into fr4
+ add %r0,%r0,%r0 C clear carry
+ xmpyu %fr4,%fr5,%fr6
+ fldws,ma 4(%r25),%fr7
+ fstds %fr6,-16(%r30)
+ xmpyu %fr4,%fr7,%fr8
+ ldw -12(%r30),%r19 C least significant limb in product
+ ldw -16(%r30),%r28
+
+ fstds %fr8,-16(%r30)
+ addib,= -1,%r24,L(end)
+ ldw -12(%r30),%r1
+
+C Main loop
+LDEF(loop)
+ ldws 0(%r26),%r29
+ fldws,ma 4(%r25),%fr5
+ add %r29,%r19,%r19
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ xmpyu %fr4,%fr5,%fr6
+ ldw -16(%r30),%r28
+ fstds %fr6,-16(%r30)
+ addc %r0,%r28,%r28
+ addib,<> -1,%r24,L(loop)
+ ldw -12(%r30),%r1
+
+LDEF(end)
+ ldw 0(%r26),%r29
+ add %r29,%r19,%r19
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ ldw -16(%r30),%r28
+ ldws 0(%r26),%r29
+ addc %r0,%r28,%r28
+ add %r29,%r19,%r19
+ stws,ma %r19,4(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+LDEF(just_one_limb)
+ xmpyu %fr4,%fr5,%fr6
+ ldw 0(%r26),%r29
+ fstds %fr6,-16(%r30)
+ ldw -12(%r30),%r1
+ ldw -16(%r30),%r28
+ add %r29,%r1,%r19
+ stw %r19,0(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/gmp-mparam.h b/gmp-6.3.0/mpn/pa32/hppa1_1/gmp-mparam.h
new file mode 100644
index 0000000..1261b24
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/gmp-mparam.h
@@ -0,0 +1,72 @@
+/* HP-PA 1.1 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* Generated by tuneup.c, 2004-02-07, gcc 2.8 (pa7100/100MHz) */
+
+#define MUL_TOOM22_THRESHOLD 30
+#define MUL_TOOM33_THRESHOLD 89
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 55
+#define SQR_TOOM3_THRESHOLD 101
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* always */
+#define DIV_DC_THRESHOLD 84
+#define POWM_THRESHOLD 166
+
+#define HGCD_THRESHOLD 231
+#define GCD_ACCEL_THRESHOLD 3
+#define GCD_DC_THRESHOLD 823
+#define JACOBI_BASE_METHOD 2
+
+#define DIVREM_1_NORM_THRESHOLD 5
+#define DIVREM_1_UNNORM_THRESHOLD 11
+#define MOD_1_NORM_THRESHOLD 5
+#define MOD_1_UNNORM_THRESHOLD 10
+#define USE_PREINV_DIVREM_1 1
+#define USE_PREINV_MOD_1 1
+#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 23
+#define SET_STR_THRESHOLD 6589
+
+#define MUL_FFT_TABLE { 464, 928, 1920, 4608, 14336, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD 480
+#define MUL_FFT_THRESHOLD 3328
+
+#define SQR_FFT_TABLE { 528, 1184, 2176, 5632, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD 520
+#define SQR_FFT_THRESHOLD 3328
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/mul_1.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/mul_1.asm
new file mode 100644
index 0000000..6e60c2f
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/mul_1.asm
@@ -0,0 +1,102 @@
+dnl HP-PA 1.1 mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl result in a second limb vector.
+
+dnl Copyright 1992-1994, 2000-2002 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr r26
+C s1_ptr r25
+C size r24
+C s2_limb r23
+
+C This runs at 9 cycles/limb on a PA7000. With the used instructions, it can
+C not become faster due to data cache contention after a store. On the PA7100
+C it runs at 7 cycles/limb.
+
+C We could use fldds to read two limbs at a time from the S1 array, and that
+C could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
+C PA7100, respectively. We don't do that since it does not seem worth the
+C (alignment) troubles...
+
+C At least the PA7100 is rumored to be able to deal with cache-misses without
+C stalling instruction issue. If this is true, and the cache is actually also
+C lockup-free, we should use a deeper software pipeline, and load from S1 very
+C early! (The loads and stores to -12(sp) will surely be in the cache.)
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+C .callinfo frame=64,no_calls
+
+ ldo 64(%r30),%r30
+ fldws,ma 4(%r25),%fr5
+ stw %r23,-16(%r30) C move s2_limb ...
+ addib,= -1,%r24,L(just_one_limb)
+ fldws -16(%r30),%fr4 C ... into fr4
+ add %r0,%r0,%r0 C clear carry
+ xmpyu %fr4,%fr5,%fr6
+ fldws,ma 4(%r25),%fr7
+ fstds %fr6,-16(%r30)
+ xmpyu %fr4,%fr7,%fr8
+ ldw -12(%r30),%r19 C least significant limb in product
+ ldw -16(%r30),%r28
+
+ fstds %fr8,-16(%r30)
+ addib,= -1,%r24,L(end)
+ ldw -12(%r30),%r1
+
+C Main loop
+LDEF(loop)
+ fldws,ma 4(%r25),%fr5
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ xmpyu %fr4,%fr5,%fr6
+ ldw -16(%r30),%r28
+ fstds %fr6,-16(%r30)
+ addib,<> -1,%r24,L(loop)
+ ldw -12(%r30),%r1
+
+LDEF(end)
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ ldw -16(%r30),%r28
+ stws,ma %r19,4(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+LDEF(just_one_limb)
+ xmpyu %fr4,%fr5,%fr6
+ fstds %fr6,-16(%r30)
+ ldw -16(%r30),%r28
+ ldo -64(%r30),%r30
+ bv 0(%r2)
+ fstws %fr6R,0(%r26)
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/add_n.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/add_n.asm
new file mode 100644
index 0000000..b96d403
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/add_n.asm
@@ -0,0 +1,83 @@
+dnl HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl sum in a third limb vector. Optimized for the PA7100, where is runs at
+dnl 4.25 cycles/limb.
+
+dnl Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr r26
+C s1_ptr r25
+C s2_ptr r24
+C size r23
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,<= -5,%r23,L(rest)
+ add %r20,%r19,%r28 C add first limbs ignoring cy
+
+LDEF(loop)
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addc %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addc %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addc %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -4,%r23,L(loop)
+ addc %r20,%r19,%r28
+
+LDEF(rest)
+ addib,= 4,%r23,L(end)
+ nop
+
+LDEF(eloop)
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -1,%r23,L(eloop)
+ addc %r20,%r19,%r28
+
+LDEF(end)
+ stws %r28,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r0,%r28
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/addmul_1.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
new file mode 100644
index 0000000..fb16100
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
@@ -0,0 +1,201 @@
+dnl HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl add the result to a second limb vector.
+
+dnl Copyright 1995, 2000-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`res_ptr',`%r26')
+define(`s1_ptr',`%r25')
+define(`size_param',`%r24')
+define(`s2_limb',`%r23')
+
+define(`cylimb',`%r28')
+define(`s0',`%r19')
+define(`s1',`%r20')
+define(`s2',`%r3')
+define(`s3',`%r4')
+define(`lo0',`%r21')
+define(`lo1',`%r5')
+define(`lo2',`%r6')
+define(`lo3',`%r7')
+define(`hi0',`%r22')
+define(`hi1',`%r23') C safe to reuse
+define(`hi2',`%r29')
+define(`hi3',`%r1')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+C .callinfo frame=128,no_calls
+
+ ldo 128(%r30),%r30
+ stws s2_limb,-16(%r30)
+ add %r0,%r0,cylimb C clear cy and cylimb
+ addib,< -4,size_param,L(few_limbs)
+ fldws -16(%r30),%fr31R
+
+ ldo -112(%r30),%r31
+ stw %r3,-96(%r30)
+ stw %r4,-92(%r30)
+ stw %r5,-88(%r30)
+ stw %r6,-84(%r30)
+ stw %r7,-80(%r30)
+
+ bb,>=,n s1_ptr,29,L(0)
+
+ fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r31)
+ ldws -16(%r31),cylimb
+ ldws -12(%r31),lo0
+ add s0,lo0,s0
+ addib,< -1,size_param,L(few_limbs)
+ stws,ma s0,4(res_ptr)
+
+C start software pipeline ----------------------------------------------------
+LDEF(0)
+ fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ xmpyu %fr4L,%fr31R,%fr5
+ xmpyu %fr4R,%fr31R,%fr6
+ xmpyu %fr8L,%fr31R,%fr9
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ fstds %fr6,-8(%r31)
+ fstds %fr9,0(%r31)
+ fstds %fr10,8(%r31)
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ addc lo1,hi0,lo1
+ addc lo2,hi1,lo2
+ addc lo3,hi2,lo3
+
+ addib,< -4,size_param,L(end)
+ addc %r0,hi3,cylimb C propagate carry into cylimb
+C main loop ------------------------------------------------------------------
+LDEF(loop)
+ fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ ldws 0(res_ptr),s0
+ xmpyu %fr4L,%fr31R,%fr5
+ ldws 4(res_ptr),s1
+ xmpyu %fr4R,%fr31R,%fr6
+ ldws 8(res_ptr),s2
+ xmpyu %fr8L,%fr31R,%fr9
+ ldws 12(res_ptr),s3
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ add s0,lo0,s0
+ fstds %fr6,-8(%r31)
+ addc s1,lo1,s1
+ fstds %fr9,0(%r31)
+ addc s2,lo2,s2
+ fstds %fr10,8(%r31)
+ addc s3,lo3,s3
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ stws,ma s0,4(res_ptr)
+ addc lo1,hi0,lo1
+ stws,ma s1,4(res_ptr)
+ addc lo2,hi1,lo2
+ stws,ma s2,4(res_ptr)
+ addc lo3,hi2,lo3
+ stws,ma s3,4(res_ptr)
+
+ addib,>= -4,size_param,L(loop)
+ addc %r0,hi3,cylimb C propagate carry into cylimb
+C finish software pipeline ---------------------------------------------------
+LDEF(end)
+ ldws 0(res_ptr),s0
+ ldws 4(res_ptr),s1
+ ldws 8(res_ptr),s2
+ ldws 12(res_ptr),s3
+
+ add s0,lo0,s0
+ stws,ma s0,4(res_ptr)
+ addc s1,lo1,s1
+ stws,ma s1,4(res_ptr)
+ addc s2,lo2,s2
+ stws,ma s2,4(res_ptr)
+ addc s3,lo3,s3
+ stws,ma s3,4(res_ptr)
+
+C restore callee-saves registers ---------------------------------------------
+ ldw -96(%r30),%r3
+ ldw -92(%r30),%r4
+ ldw -88(%r30),%r5
+ ldw -84(%r30),%r6
+ ldw -80(%r30),%r7
+
+LDEF(few_limbs)
+ addib,=,n 4,size_param,L(ret)
+
+LDEF(loop2)
+ fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r30)
+ ldws -16(%r30),hi0
+ ldws -12(%r30),lo0
+ addc lo0,cylimb,lo0
+ addc %r0,hi0,cylimb
+ add s0,lo0,s0
+ stws,ma s0,4(res_ptr)
+ addib,<> -1,size_param,L(loop2)
+ nop
+
+LDEF(ret)
+ addc %r0,cylimb,cylimb
+ bv 0(%r2)
+ ldo -128(%r30),%r30
+EPILOGUE(mpn_addmul_1)
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/lshift.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/lshift.asm
new file mode 100644
index 0000000..d65db2a
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/lshift.asm
@@ -0,0 +1,95 @@
+dnl HP-PA mpn_lshift -- Shift a number left.
+dnl Optimized for the PA7100, where is runs at 3.25 cycles/limb.
+
+dnl Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr r26
+C s_ptr r25
+C size r24
+C cnt r23
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ sh2add %r24,%r25,%r25
+ sh2add %r24,%r26,%r26
+ ldws,mb -4(0,%r25),%r22
+ subi 32,%r23,%r1
+ mtsar %r1
+ addib,= -1,%r24,L(0004)
+ vshd %r0,%r22,%r28 C compute carry out limb
+ ldws,mb -4(0,%r25),%r29
+ addib,<= -5,%r24,L(rest)
+ vshd %r22,%r29,%r20
+
+LDEF(loop)
+ ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ addib,> -4,%r24,L(loop)
+ vshd %r22,%r29,%r20
+
+LDEF(rest)
+ addib,= 4,%r24,L(end1)
+ nop
+
+LDEF(eloop)
+ ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ addib,<= -1,%r24,L(end2)
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ addib,> -1,%r24,L(eloop)
+ vshd %r22,%r29,%r20
+
+LDEF(end1)
+ stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+
+LDEF(end2)
+ stws,mb %r20,-4(0,%r26)
+
+LDEF(0004)
+ vshd %r22,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/rshift.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/rshift.asm
new file mode 100644
index 0000000..f7896fc
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/rshift.asm
@@ -0,0 +1,92 @@
+dnl HP-PA mpn_rshift -- Shift a number right.
+dnl Optimized for the PA7100, where is runs at 3.25 cycles/limb.
+
+dnl Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr r26
+C s_ptr r25
+C size r24
+C cnt r23
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ldws,ma 4(0,%r25),%r22
+ mtsar %r23
+ addib,= -1,%r24,L(0004)
+ vshd %r22,%r0,%r28 C compute carry out limb
+ ldws,ma 4(0,%r25),%r29
+ addib,<= -5,%r24,L(rest)
+ vshd %r29,%r22,%r20
+
+LDEF(loop)
+ ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,> -4,%r24,L(loop)
+ vshd %r29,%r22,%r20
+
+LDEF(rest)
+ addib,= 4,%r24,L(end1)
+ nop
+
+LDEF(eloop)
+ ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ addib,<= -1,%r24,L(end2)
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,> -1,%r24,L(eloop)
+ vshd %r29,%r22,%r20
+
+LDEF(end1)
+ stws,ma %r20,4(0,%r26)
+ vshd %r0,%r29,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+
+LDEF(end2)
+ stws,ma %r20,4(0,%r26)
+
+LDEF(0004)
+ vshd %r0,%r22,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/sub_n.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/sub_n.asm
new file mode 100644
index 0000000..df3f6e8
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/sub_n.asm
@@ -0,0 +1,84 @@
+dnl HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector. Optimized for the PA7100, where
+dnl is runs at 4.25 cycles/limb.
+
+dnl Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr r26
+C s1_ptr r25
+C s2_ptr r24
+C size r23
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,<= -5,%r23,L(rest)
+ sub %r20,%r19,%r28 C subtract first limbs ignoring cy
+
+LDEF(loop)
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ subb %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ subb %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ subb %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -4,%r23,L(loop)
+ subb %r20,%r19,%r28
+
+LDEF(rest)
+ addib,= 4,%r23,L(end)
+ nop
+
+LDEF(eloop)
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -1,%r23,L(eloop)
+ subb %r20,%r19,%r28
+
+LDEF(end)
+ stws %r28,0(0,%r26)
+ addc %r0,%r0,%r28
+ bv 0(%r2)
+ subi 1,%r28,%r28
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/submul_1.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/submul_1.asm
new file mode 100644
index 0000000..5ea08cb
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/pa7100/submul_1.asm
@@ -0,0 +1,207 @@
+dnl HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright 1995, 2000-2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`res_ptr',`%r26')
+define(`s1_ptr',`%r25')
+define(`size_param',`%r24')
+define(`s2_limb',`%r23')
+
+define(`cylimb',`%r28')
+define(`s0',`%r19')
+define(`s1',`%r20')
+define(`s2',`%r3')
+define(`s3',`%r4')
+define(`lo0',`%r21')
+define(`lo1',`%r5')
+define(`lo2',`%r6')
+define(`lo3',`%r7')
+define(`hi0',`%r22')
+define(`hi1',`%r23') C safe to reuse
+define(`hi2',`%r29')
+define(`hi3',`%r1')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+C .callinfo frame=128,no_calls
+
+ ldo 128(%r30),%r30
+ stws s2_limb,-16(%r30)
+ add %r0,%r0,cylimb C clear cy and cylimb
+ addib,< -4,size_param,L(few_limbs)
+ fldws -16(%r30),%fr31R
+
+ ldo -112(%r30),%r31
+ stw %r3,-96(%r30)
+ stw %r4,-92(%r30)
+ stw %r5,-88(%r30)
+ stw %r6,-84(%r30)
+ stw %r7,-80(%r30)
+
+ bb,>=,n s1_ptr,29,L(0)
+
+ fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r31)
+ ldws -16(%r31),cylimb
+ ldws -12(%r31),lo0
+ sub s0,lo0,s0
+ add s0,lo0,%r0 C invert cy
+ addib,< -1,size_param,L(few_limbs)
+ stws,ma s0,4(res_ptr)
+
+C start software pipeline ----------------------------------------------------
+LDEF(0)
+ fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ xmpyu %fr4L,%fr31R,%fr5
+ xmpyu %fr4R,%fr31R,%fr6
+ xmpyu %fr8L,%fr31R,%fr9
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ fstds %fr6,-8(%r31)
+ fstds %fr9,0(%r31)
+ fstds %fr10,8(%r31)
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ addc lo1,hi0,lo1
+ addc lo2,hi1,lo2
+ addc lo3,hi2,lo3
+
+ addib,< -4,size_param,L(end)
+ addc %r0,hi3,cylimb C propagate carry into cylimb
+C main loop ------------------------------------------------------------------
+LDEF(loop)
+ fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ ldws 0(res_ptr),s0
+ xmpyu %fr4L,%fr31R,%fr5
+ ldws 4(res_ptr),s1
+ xmpyu %fr4R,%fr31R,%fr6
+ ldws 8(res_ptr),s2
+ xmpyu %fr8L,%fr31R,%fr9
+ ldws 12(res_ptr),s3
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ sub s0,lo0,s0
+ fstds %fr6,-8(%r31)
+ subb s1,lo1,s1
+ fstds %fr9,0(%r31)
+ subb s2,lo2,s2
+ fstds %fr10,8(%r31)
+ subb s3,lo3,s3
+ subb %r0,%r0,lo0 C these two insns ...
+ add lo0,lo0,%r0 C ... just invert cy
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ stws,ma s0,4(res_ptr)
+ addc lo1,hi0,lo1
+ stws,ma s1,4(res_ptr)
+ addc lo2,hi1,lo2
+ stws,ma s2,4(res_ptr)
+ addc lo3,hi2,lo3
+ stws,ma s3,4(res_ptr)
+
+ addib,>= -4,size_param,L(loop)
+ addc %r0,hi3,cylimb C propagate carry into cylimb
+C finish software pipeline ---------------------------------------------------
+LDEF(end)
+ ldws 0(res_ptr),s0
+ ldws 4(res_ptr),s1
+ ldws 8(res_ptr),s2
+ ldws 12(res_ptr),s3
+
+ sub s0,lo0,s0
+ stws,ma s0,4(res_ptr)
+ subb s1,lo1,s1
+ stws,ma s1,4(res_ptr)
+ subb s2,lo2,s2
+ stws,ma s2,4(res_ptr)
+ subb s3,lo3,s3
+ stws,ma s3,4(res_ptr)
+ subb %r0,%r0,lo0 C these two insns ...
+ add lo0,lo0,%r0 C ... invert cy
+
+C restore callee-saves registers ---------------------------------------------
+ ldw -96(%r30),%r3
+ ldw -92(%r30),%r4
+ ldw -88(%r30),%r5
+ ldw -84(%r30),%r6
+ ldw -80(%r30),%r7
+
+LDEF(few_limbs)
+ addib,=,n 4,size_param,L(ret)
+
+LDEF(loop2)
+ fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r30)
+ ldws -16(%r30),hi0
+ ldws -12(%r30),lo0
+ addc lo0,cylimb,lo0
+ addc %r0,hi0,cylimb
+ sub s0,lo0,s0
+ add s0,lo0,%r0 C invert cy
+ stws,ma s0,4(res_ptr)
+ addib,<> -1,size_param,L(loop2)
+ nop
+
+LDEF(ret)
+ addc %r0,cylimb,cylimb
+ bv 0(%r2)
+ ldo -128(%r30),%r30
+EPILOGUE(mpn_submul_1)
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/sqr_diagonal.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/sqr_diagonal.asm
new file mode 100644
index 0000000..1c7a18e
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/sqr_diagonal.asm
@@ -0,0 +1,60 @@
+dnl HP-PA 1.1 32-bit mpn_sqr_diagonal.
+
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This code runs at 6 cycles/limb on the PA7100 and 2.5 cycles/limb on PA8x00.
+C 2-way unrolling wouldn't help the PA7100; it could however bring times down
+C to 2.0 cycles/limb for the PA8x00.
+
+C INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`n',`%r24')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+ ldo 4(rp),rp
+ fldws,ma 4(up),%fr4r
+ addib,= -1,n,L(exit)
+ xmpyu %fr4r,%fr4r,%fr5
+
+LDEF(loop)
+ fldws,ma 4(up),%fr4r
+ fstws %fr5r,-4(rp)
+ fstws,ma %fr5l,8(rp)
+ addib,<> -1,n,L(loop)
+ xmpyu %fr4r,%fr4r,%fr5
+
+LDEF(exit)
+ fstws %fr5r,-4(rp)
+ bv 0(%r2)
+ fstws %fr5l,0(rp)
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/submul_1.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/submul_1.asm
new file mode 100644
index 0000000..a9b11d2
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/submul_1.asm
@@ -0,0 +1,115 @@
+dnl HP-PA 1.1 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl the result from a second limb vector.
+
+dnl Copyright 1992-1994, 2000-2002 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr r26
+C s1_ptr r25
+C size r24
+C s2_limb r23
+
+C This runs at 12 cycles/limb on a PA7000. With the used instructions, it can
+C not become faster due to data cache contention after a store. On the PA7100
+C it runs at 11 cycles/limb.
+
+C There are some ideas described in mul_1.asm that applies to this code too.
+
+C It seems possible to make this run as fast as mpn_addmul_1, if we use
+C sub,>>= %r29,%r19,%r22
+C addi 1,%r28,%r28
+C but that requires reworking the hairy software pipeline...
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+C .callinfo frame=64,no_calls
+
+ ldo 64(%r30),%r30
+ fldws,ma 4(%r25),%fr5
+ stw %r23,-16(%r30) C move s2_limb ...
+ addib,= -1,%r24,L(just_one_limb)
+ fldws -16(%r30),%fr4 C ... into fr4
+ add %r0,%r0,%r0 C clear carry
+ xmpyu %fr4,%fr5,%fr6
+ fldws,ma 4(%r25),%fr7
+ fstds %fr6,-16(%r30)
+ xmpyu %fr4,%fr7,%fr8
+ ldw -12(%r30),%r19 C least significant limb in product
+ ldw -16(%r30),%r28
+
+ fstds %fr8,-16(%r30)
+ addib,= -1,%r24,L(end)
+ ldw -12(%r30),%r1
+
+C Main loop
+LDEF(loop)
+ ldws 0(%r26),%r29
+ fldws,ma 4(%r25),%fr5
+ sub %r29,%r19,%r22
+ add %r22,%r19,%r0
+ stws,ma %r22,4(%r26)
+ addc %r28,%r1,%r19
+ xmpyu %fr4,%fr5,%fr6
+ ldw -16(%r30),%r28
+ fstds %fr6,-16(%r30)
+ addc %r0,%r28,%r28
+ addib,<> -1,%r24,L(loop)
+ ldw -12(%r30),%r1
+
+LDEF(end)
+ ldw 0(%r26),%r29
+ sub %r29,%r19,%r22
+ add %r22,%r19,%r0
+ stws,ma %r22,4(%r26)
+ addc %r28,%r1,%r19
+ ldw -16(%r30),%r28
+ ldws 0(%r26),%r29
+ addc %r0,%r28,%r28
+ sub %r29,%r19,%r22
+ add %r22,%r19,%r0
+ stws,ma %r22,4(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+LDEF(just_one_limb)
+ xmpyu %fr4,%fr5,%fr6
+ ldw 0(%r26),%r29
+ fstds %fr6,-16(%r30)
+ ldw -12(%r30),%r1
+ ldw -16(%r30),%r28
+ sub %r29,%r1,%r22
+ add %r22,%r1,%r0
+ stw %r22,0(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/udiv.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/udiv.asm
new file mode 100644
index 0000000..626ecd2
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/udiv.asm
@@ -0,0 +1,102 @@
+dnl HP-PA __udiv_qrnnd division support, used from longlong.h.
+dnl This version runs fast on PA 7000 and later.
+
+dnl Copyright 1993, 1994, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr gr26
+C n1 gr25
+C n0 gr24
+C d gr23
+
+C This file has caused a lot of trouble, since it demands PIC reference to
+C static data, which triggers bugs in gas (at least version 2.7 through
+C 2.11.2). When the bug is triggered, many bogus relocs are generated. The
+C current solution is to stuff data right into the code, and refer it using
+C absolute offsets. Fragile to be sure, but nothing else seems to work.
+
+ASM_START()
+ifdef(`PIC',`',
+` RODATA
+ INT64(0000, 0x43f00000, 0x0) C 2^64
+')
+
+PROLOGUE(mpn_udiv_qrnnd)
+C .callinfo frame=64,no_calls
+
+ ldo 64(%r30),%r30
+
+ stws %r25,-16(0,%r30) C n_hi
+ stws %r24,-12(0,%r30) C n_lo
+
+ifdef(`PIC',
+` bl .+20,%r31
+ dep %r0,31,2,%r31
+ .word 0x0 C padding for alignment
+ .word 0x43f00000, 0x0 C 2^64
+ ldo 4(%r31),%r31',
+` ldil `L'%L(0000),%r31
+ ldo R%L(0000)(%r31),%r31')
+
+ fldds -16(0,%r30),%fr5
+ stws %r23,-12(0,%r30)
+ comib,<= 0,%r25,L(1)
+ fcnvxf,dbl,dbl %fr5,%fr5
+ fldds 0(0,%r31),%fr4
+ fadd,dbl %fr4,%fr5,%fr5
+
+LDEF(1)
+ fcpy,sgl %fr0,%fr6L
+ fldws -12(0,%r30),%fr6R
+ fcnvxf,dbl,dbl %fr6,%fr4
+
+ fdiv,dbl %fr5,%fr4,%fr5
+
+ fcnvfx,dbl,dbl %fr5,%fr4
+ fstws %fr4R,-16(%r30)
+ xmpyu %fr4R,%fr6R,%fr6
+ ldws -16(%r30),%r28
+ fstds %fr6,-16(0,%r30)
+ ldws -12(0,%r30),%r21
+ ldws -16(0,%r30),%r20
+ sub %r24,%r21,%r22
+ subb %r25,%r20,%r20
+ comib,= 0,%r20,L(2)
+ ldo -64(%r30),%r30
+
+ add %r22,%r23,%r22
+ ldo -1(%r28),%r28
+
+LDEF(2)
+ bv 0(%r2)
+ stws %r22,0(0,%r26)
+
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/gmp-6.3.0/mpn/pa32/hppa1_1/umul.asm b/gmp-6.3.0/mpn/pa32/hppa1_1/umul.asm
new file mode 100644
index 0000000..18b923c
--- /dev/null
+++ b/gmp-6.3.0/mpn/pa32/hppa1_1/umul.asm
@@ -0,0 +1,47 @@
+dnl Copyright 1999, 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+C .callinfo frame=64,no_calls
+
+ ldo 64(%r30),%r30
+ stw %r25,-16(0,%r30)
+ fldws -16(0,%r30),%fr22R
+ stw %r24,-16(0,%r30)
+ fldws -16(0,%r30),%fr22L
+ xmpyu %fr22R,%fr22L,%fr22
+ fstds %fr22,-16(0,%r30)
+ ldw -16(0,%r30),%r28
+ ldw -12(0,%r30),%r29
+ stw %r29,0(0,%r26)
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+EPILOGUE()