aboutsummaryrefslogtreecommitdiff
path: root/gmp-6.3.0/mpn/s390_32
diff options
context:
space:
mode:
Diffstat (limited to 'gmp-6.3.0/mpn/s390_32')
-rw-r--r--gmp-6.3.0/mpn/s390_32/README37
-rw-r--r--gmp-6.3.0/mpn/s390_32/addmul_1.asm93
-rw-r--r--gmp-6.3.0/mpn/s390_32/copyd.asm145
-rw-r--r--gmp-6.3.0/mpn/s390_32/copyi.asm69
-rw-r--r--gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm72
-rw-r--r--gmp-6.3.0/mpn/s390_32/esame/aors_n.asm137
-rw-r--r--gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm173
-rw-r--r--gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm65
-rw-r--r--gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h177
-rw-r--r--gmp-6.3.0/mpn/s390_32/esame/mul_1.asm66
-rw-r--r--gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm130
-rw-r--r--gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm203
-rw-r--r--gmp-6.3.0/mpn/s390_32/esame/submul_1.asm70
-rw-r--r--gmp-6.3.0/mpn/s390_32/gmp-mparam.h138
-rw-r--r--gmp-6.3.0/mpn/s390_32/logops_n.asm295
-rw-r--r--gmp-6.3.0/mpn/s390_32/lshift.asm144
-rw-r--r--gmp-6.3.0/mpn/s390_32/lshiftc.asm156
-rw-r--r--gmp-6.3.0/mpn/s390_32/mul_1.asm85
-rw-r--r--gmp-6.3.0/mpn/s390_32/rshift.asm138
-rw-r--r--gmp-6.3.0/mpn/s390_32/sec_tabselect.asm140
-rw-r--r--gmp-6.3.0/mpn/s390_32/submul_1.asm93
21 files changed, 2626 insertions, 0 deletions
diff --git a/gmp-6.3.0/mpn/s390_32/README b/gmp-6.3.0/mpn/s390_32/README
new file mode 100644
index 0000000..59519ba
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/README
@@ -0,0 +1,37 @@
+All current (2001) S/390 and z/Architecture machines are single-issue,
+but some newer machines have a deep pipeline. Software-pipelining is
+therefore beneficial.
+
+* mpn_add_n, mpn_sub_n: Use code along the lines below. Two-way unrolling
+ would be adequate.
+
+ mp_limb_t
+ mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+ {
+ mp_limb_t a, b, r, cy;
+ mp_size_t i;
+ mp_limb_t mm = -1;
+
+ cy = 0;
+ up += n;
+ vp += n;
+ rp += n;
+ i = -n;
+ do
+ {
+ a = up[i];
+ b = vp[i];
+ r = a + b + cy;
+ rp[i] = r;
+ cy = (((a & b) | ((a | b) & (r ^ mm)))) >> 31;
+ i++;
+ }
+ while (i < 0);
+ return cy;
+ }
+
+* mpn_lshift, mpn_rshift: Use SLDL/SRDL, and two-way unrolling.
+
+* mpn_mul_1, mpn_addmul_1, mpn_submul_1: For machines with just signed
+ multiply (MR), use two loops, similar to the corresponding VAX or
+ POWER functions. Handle carry like for mpn_add_n.
diff --git a/gmp-6.3.0/mpn/s390_32/addmul_1.asm b/gmp-6.3.0/mpn/s390_32/addmul_1.asm
new file mode 100644
index 0000000..97189a8
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/addmul_1.asm
@@ -0,0 +1,93 @@
+dnl S/390 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl result to a second limb vector.
+
+dnl Copyright 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(`rp',2)
+define(`up',3)
+define(`n',4)
+define(`vlimb',5)
+define(`cylimb',7)
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ stm 6,7,24(15)
+ slr cylimb,cylimb # clear cylimb
+ ltr vlimb,vlimb
+ jnl .Loopp
+
+.Loopn: l 1,0(up) # load from u
+ lr 6,1 #
+ mr 0,vlimb # multiply signed
+ alr 0,6 # add vlimb to phi
+ sra 6,31 # make mask
+ nr 6,vlimb # 0 or vlimb
+ alr 0,6 # conditionally add vlimb to phi
+ alr 1,cylimb # add carry limb to plo
+ brc 8+4,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ l 6,0(rp) # load r limb
+ alr 6,1 # add u limb to plo
+ brc 8+4,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ lr cylimb,0 # new cylimb
+ st 6,0(rp) # store
+ la up,4(,up)
+ la rp,4(,rp)
+ brct n,.Loopn
+
+ lr 2,cylimb
+ lm 6,7,24(15)
+ br 14
+
+.Loopp: l 1,0(up) # load from u
+ lr 6,1 #
+ mr 0,vlimb # multiply signed
+ sra 6,31 # make mask
+ nr 6,vlimb # 0 or vlimb
+ alr 0,6 # conditionally add vlimb to phi
+ alr 1,cylimb # add carry limb to plo
+ brc 8+4,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ l 6,0(rp) # load r limb
+ alr 6,1 # add u limb to plo
+ brc 8+4,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ lr cylimb,0 # new cylimb
+ st 6,0(rp) # store
+ la up,4(,up)
+ la rp,4(,rp)
+ brct n,.Loopp
+
+ lr 2,cylimb
+ lm 6,7,24(15)
+ br 14
+EPILOGUE(mpn_addmul_1)
diff --git a/gmp-6.3.0/mpn/s390_32/copyd.asm b/gmp-6.3.0/mpn/s390_32/copyd.asm
new file mode 100644
index 0000000..ff252bc
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/copyd.asm
@@ -0,0 +1,145 @@
+dnl S/390-32 mpn_copyd
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C cycles/limb
+C z900 1.65
+C z990 1.125
+C z9 ?
+C z10 ?
+C z196 ?
+
+C FIXME:
+C * Avoid saving/restoring callee-saves registers for n < 3. This could be
+C done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
+C We could then use r3...r10 in main loop.
+
+C INPUT PARAMETERS
+define(`rp_param', `%r2')
+define(`up_param', `%r3')
+define(`n', `%r4')
+
+define(`rp', `%r8')
+define(`up', `%r9')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+ stm %r6, %r11, 24(%r15)
+
+ lr %r1, n
+ sll %r1, 2
+ la %r10, 8(n)
+ ahi %r1, -32
+ srl %r10, 3
+ lhi %r11, -32
+
+ la rp, 0(%r1,rp_param) C FIXME use lay on z990 and later
+ la up, 0(%r1,up_param) C FIXME use lay on z990 and later
+
+ lhi %r7, 7
+ nr %r7, n C n mod 8
+ chi %r7, 2
+ jh L(b34567)
+ chi %r7, 1
+ je L(b1)
+ jh L(b2)
+
+L(b0): brct %r10, L(top)
+ j L(end)
+
+L(b1): l %r0, 28(up)
+ ahi up, -4
+ st %r0, 28(rp)
+ ahi rp, -4
+ brct %r10, L(top)
+ j L(end)
+
+L(b2): lm %r0, %r1, 24(up)
+ ahi up, -8
+ stm %r0, %r1, 24(rp)
+ ahi rp, -8
+ brct %r10, L(top)
+ j L(end)
+
+L(b34567):
+ chi %r7, 4
+ jl L(b3)
+ je L(b4)
+ chi %r7, 6
+ je L(b6)
+ jh L(b7)
+
+L(b5): lm %r0, %r4, 12(up)
+ ahi up, -20
+ stm %r0, %r4, 12(rp)
+ ahi rp, -20
+ brct %r10, L(top)
+ j L(end)
+
+L(b3): lm %r0, %r2, 20(up)
+ ahi up, -12
+ stm %r0, %r2, 20(rp)
+ ahi rp, -12
+ brct %r10, L(top)
+ j L(end)
+
+L(b4): lm %r0, %r3, 16(up)
+ ahi up, -16
+ stm %r0, %r3, 16(rp)
+ ahi rp, -16
+ brct %r10, L(top)
+ j L(end)
+
+L(b6): lm %r0, %r5, 8(up)
+ ahi up, -24
+ stm %r0, %r5, 8(rp)
+ ahi rp, -24
+ brct %r10, L(top)
+ j L(end)
+
+L(b7): lm %r0, %r6, 4(up)
+ ahi up, -28
+ stm %r0, %r6, 4(rp)
+ ahi rp, -28
+ brct %r10, L(top)
+ j L(end)
+
+L(top): lm %r0, %r7, 0(up)
+ la up, 0(%r11,up)
+ stm %r0, %r7, 0(rp)
+ la rp, 0(%r11,rp)
+ brct %r10, L(top)
+
+L(end): lm %r6, %r11, 24(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/copyi.asm b/gmp-6.3.0/mpn/s390_32/copyi.asm
new file mode 100644
index 0000000..1df32f1
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/copyi.asm
@@ -0,0 +1,69 @@
+dnl S/390-32 mpn_copyi
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 0.75
+C z990 0.375
+C z9 ?
+C z10 ?
+C z196 ?
+
+C NOTE
+C * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+ ltr %r4, %r4
+ sll %r4, 2
+ je L(rtn)
+ ahi %r4, -1
+ lr %r5, %r4
+ srl %r5, 8
+ ltr %r5, %r5 C < 256 bytes to copy?
+ je L(1)
+
+L(top): mvc 0(256, rp), 0(up)
+ la rp, 256(rp)
+ la up, 256(up)
+ brct %r5, L(top)
+
+L(1): bras %r5, L(2) C make r5 point to mvc insn
+ mvc 0(1, rp), 0(up)
+L(2): ex %r4, 0(%r5) C execute mvc with length ((n-1) mod 256)+1
+L(rtn): br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm b/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm
new file mode 100644
index 0000000..4375b74
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm
@@ -0,0 +1,72 @@
+dnl S/390-32 mpn_addmul_1 for systems with MLR instruction
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 18.5
+C z990 10
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`v0', `%r5')
+
+define(`z', `%r9')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ stm %r9, %r12, 36(%r15)
+ lhi %r12, 0 C zero index register
+ ahi %r12, 0 C clear carry fla
+ lhi %r11, 0 C clear carry limb
+ lhi z, 0 C clear carry limb
+
+L(top): l %r1, 0(%r12,up)
+ l %r10, 0(%r12,rp)
+ mlr %r0, v0
+ alcr %r1, %r10
+ alcr %r0, z
+ alr %r1, %r11
+ lr %r11, %r0
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct n, L(top)
+
+ lhi %r2, 0
+ alcr %r2, %r11
+
+ lm %r9, %r12, 36(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm b/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm
new file mode 100644
index 0000000..98b0dbc
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm
@@ -0,0 +1,137 @@
+dnl S/390-32 mpn_add_n and mpn_sub_n.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 2.75-3 (fast for even n, slow for odd n)
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n
+C * Use r0 and save/restore one less register
+C * Using logops_n's v1 inner loop operand order make the loop about 20%
+C faster, at the expense of highly alignment-dependent performance.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_add_n', `
+ define(ADSB, al)
+ define(ADSBCR, alcr)
+ define(ADSBC, alc)
+ define(RETVAL,`dnl
+ lhi %r2, 0
+ alcr %r2, %r2')
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(ADSB, sl)
+ define(ADSBCR, slbr)
+ define(ADSBC, slb)
+ define(RETVAL,`dnl
+ slbr %r2, %r2
+ lcr %r2, %r2')
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ stm %r6, %r8, 24(%r15)
+
+ ahi n, 3
+ lhi %r7, 3
+ lr %r1, n
+ srl %r1, 2
+ nr %r7, n C n mod 4
+ je L(b1)
+ chi %r7, 2
+ jl L(b2)
+ jne L(b0)
+
+L(b3): lm %r5, %r7, 0(up)
+ la up, 12(up)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 4(vp)
+ ADSBC %r7, 8(vp)
+ la vp, 12(vp)
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b0): lm %r5, %r8, 0(up) C This redundant insns is no mistake,
+ la up, 16(up) C it is needed to make main loop run
+ ADSB %r5, 0(vp) C fast for n = 0 (mod 4).
+ ADSBC %r6, 4(vp)
+ j L(m0)
+
+L(b1): l %r5, 0(up)
+ la up, 4(up)
+ ADSB %r5, 0(vp)
+ la vp, 4(vp)
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b2): lm %r5, %r6, 0(up)
+ la up, 8(up)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 4(vp)
+ la vp, 8(vp)
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(top): lm %r5, %r8, 0(up)
+ la up, 16(up)
+ ADSBC %r5, 0(vp)
+ ADSBC %r6, 4(vp)
+L(m0): ADSBC %r7, 8(vp)
+ ADSBC %r8, 12(vp)
+ la vp, 16(vp)
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+ brct %r1, L(top)
+
+L(end): RETVAL
+ lm %r6, %r8, 24(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm b/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm
new file mode 100644
index 0000000..f2b222b
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm
@@ -0,0 +1,173 @@
+dnl S/390-32 mpn_addlsh1_n
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 9.25
+C z990 5
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n
+C * Compute RETVAL for sublsh1_n less stupidly
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+ define(ADDSUBC, alr)
+ define(ADDSUBE, alcr)
+ define(INITCY, `lhi %r13, -1')
+ define(RETVAL, `alr %r1, %r13
+ lhi %r2, 2
+ alr %r2, %r1')
+ define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+ define(ADDSUBC, slr)
+ define(ADDSUBE, slbr)
+ define(INITCY, `lhi %r13, 0')
+ define(RETVAL, `slr %r1, %r13
+ lhi %r2, 1
+ alr %r2, %r1')
+ define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+ stm %r6, %r13, 24(%r15)
+
+ la %r0, 3(n)
+ lhi %r7, 3
+ srl %r0, 2
+ nr %r7, n C n mod 4
+ je L(b0)
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): lm %r5, %r7, 0(up)
+ la up, 12(up)
+ lm %r9, %r11, 0(vp)
+ la vp, 12(vp)
+
+ alr %r9, %r9
+ alcr %r10, %r10
+ alcr %r11, %r11
+ slbr %r1, %r1
+
+ ADDSUBC %r5, %r9
+ ADDSUBE %r6, %r10
+ ADDSUBE %r7, %r11
+ slbr %r13, %r13
+
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ brct %r0, L(top)
+ j L(end)
+
+L(b0): lhi %r1, -1
+ INITCY
+ j L(top)
+
+L(b1): l %r5, 0(up)
+ la up, 4(up)
+ l %r9, 0(vp)
+ la vp, 4(vp)
+
+ alr %r9, %r9
+ slbr %r1, %r1
+ ADDSUBC %r5, %r9
+ slbr %r13, %r13
+
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ brct %r0, L(top)
+ j L(end)
+
+L(b2): lm %r5, %r6, 0(up)
+ la up, 8(up)
+ lm %r9, %r10, 0(vp)
+ la vp, 8(vp)
+
+ alr %r9, %r9
+ alcr %r10, %r10
+ slbr %r1, %r1
+
+ ADDSUBC %r5, %r9
+ ADDSUBE %r6, %r10
+ slbr %r13, %r13
+
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ brct %r0, L(top)
+ j L(end)
+
+L(top): lm %r9, %r12, 0(vp)
+ la vp, 16(vp)
+
+ ahi %r1, 1 C restore carry
+
+ alcr %r9, %r9
+ alcr %r10, %r10
+ alcr %r11, %r11
+ alcr %r12, %r12
+
+ slbr %r1, %r1 C save carry
+
+ lm %r5, %r8, 0(up)
+ la up, 16(up)
+
+ ahi %r13, 1 C restore carry
+
+ ADDSUBE %r5, %r9
+ ADDSUBE %r6, %r10
+ ADDSUBE %r7, %r11
+ ADDSUBE %r8, %r12
+
+ slbr %r13, %r13
+
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+ brct %r0, L(top)
+
+L(end):
+ RETVAL
+ lm %r6, %r13, 24(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm b/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm
new file mode 100644
index 0000000..568a2a4
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm
@@ -0,0 +1,65 @@
+dnl S/390-32 mpn_bdiv_dbm1c for systems with MLR instruction.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 14
+C z990 10
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`qp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`bd', `%r5')
+define(`cy', `%r6')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+ stm %r6, %r7, 24(%r15)
+ lhi %r7, 0 C zero index register
+
+L(top): l %r1, 0(%r7,up)
+ mlr %r0, bd
+ slr %r6, %r1
+ st %r6, 0(%r7,qp)
+ slbr %r6, %r0
+ la %r7, 4(%r7)
+ brct n, L(top)
+
+ lr %r2, %r6
+ lm %r6, %r7, 24(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h b/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h
new file mode 100644
index 0000000..c0e5046
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h
@@ -0,0 +1,177 @@
+/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000-2008-2011, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 4400 MHz IBM z196 running in 32-bit mode */
+/* FFT tuning limit = 0.5M */
+/* Generated by tuneup.c, 2017-01-02, gcc 4.9 */
+
+#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 45
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 18
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 3
+#define USE_PREINV_DIVREM_1 0
+#define DIV_QR_1N_PI1_METHOD 1
+#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 6
+#define BMOD_1_TO_MOD_1_THRESHOLD 0 /* always */
+
+#define DIV_1_VS_MUL_1_PERCENT 320
+
+#define MUL_TOOM22_THRESHOLD 12
+#define MUL_TOOM33_THRESHOLD 81
+#define MUL_TOOM44_THRESHOLD 130
+#define MUL_TOOM6H_THRESHOLD 173
+#define MUL_TOOM8H_THRESHOLD 260
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 83
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 86
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 112
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 18
+#define SQR_TOOM3_THRESHOLD 69
+#define SQR_TOOM4_THRESHOLD 178
+#define SQR_TOOM6_THRESHOLD 254
+#define SQR_TOOM8_THRESHOLD 406
+
+#define MULMID_TOOM42_THRESHOLD 30
+
+#define MULMOD_BNM1_THRESHOLD 12
+#define SQRMOD_BNM1_THRESHOLD 7
+
+#define MUL_FFT_MODF_THRESHOLD 276 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 276, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
+ { 9, 5}, { 19, 6}, { 13, 7}, { 7, 6}, \
+ { 17, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 25, 8}, { 15, 7}, { 31, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 9}, { 15, 8}, \
+ { 39, 9}, { 23,10}, { 15, 9}, { 31, 8}, \
+ { 67, 9}, { 39, 8}, { 79, 9}, { 47,10}, \
+ { 31, 9}, { 71, 8}, { 143, 9}, { 79,10}, \
+ { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 7}, { 511, 9}, { 143,10}, { 79, 9}, \
+ { 159, 8}, { 319, 9}, { 175, 8}, { 351,10}, \
+ { 95, 9}, { 191, 8}, { 383,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \
+ { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \
+ { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \
+ { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271, 9}, { 543, 8}, { 1087,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 351, 9}, \
+ { 703, 8}, { 1407,11}, { 191,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 479, 9}, { 959, 8}, \
+ { 1919,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 89
+#define MUL_FFT_THRESHOLD 2688
+
+#define SQR_FFT_MODF_THRESHOLD 240 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 240, 5}, { 17, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 25, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
+ { 23, 9}, { 15, 8}, { 39, 9}, { 23,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 47,10}, \
+ { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \
+ { 143,10}, { 47,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255, 7}, { 511, 9}, { 143,10}, \
+ { 79, 9}, { 159, 8}, { 319, 9}, { 175, 8}, \
+ { 351, 7}, { 703,10}, { 95, 9}, { 191, 8}, \
+ { 383, 9}, { 207, 8}, { 415,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \
+ { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \
+ { 175, 9}, { 351, 8}, { 703, 7}, { 1407,11}, \
+ { 95,10}, { 191, 9}, { 383,10}, { 207, 9}, \
+ { 415,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 287, 9}, { 575,11}, { 159,10}, \
+ { 351, 9}, { 703, 8}, { 1407,11}, { 191,10}, \
+ { 415, 9}, { 831,11}, { 223,10}, { 479,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 84
+#define SQR_FFT_THRESHOLD 1856
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 27
+#define MULLO_MUL_N_THRESHOLD 5240
+#define SQRLO_BASECASE_THRESHOLD 0 /* always */
+#define SQRLO_DC_THRESHOLD 65
+#define SQRLO_SQR_THRESHOLD 3470
+
+#define DC_DIV_QR_THRESHOLD 32
+#define DC_DIVAPPR_Q_THRESHOLD 135
+#define DC_BDIV_QR_THRESHOLD 32
+#define DC_BDIV_Q_THRESHOLD 80
+
+#define INV_MULMOD_BNM1_THRESHOLD 42
+#define INV_NEWTON_THRESHOLD 177
+#define INV_APPR_THRESHOLD 139
+
+#define BINV_NEWTON_THRESHOLD 179
+#define REDC_1_TO_REDC_N_THRESHOLD 39
+
+#define MU_DIV_QR_THRESHOLD 872
+#define MU_DIVAPPR_Q_THRESHOLD 998
+#define MUPI_DIV_QR_THRESHOLD 66
+#define MU_BDIV_QR_THRESHOLD 748
+#define MU_BDIV_Q_THRESHOLD 906
+
+#define POWM_SEC_TABLE 9,34,257,946,2913
+
+#define GET_STR_DC_THRESHOLD 10
+#define GET_STR_PRECOMPUTE_THRESHOLD 16
+#define SET_STR_DC_THRESHOLD 1045
+#define SET_STR_PRECOMPUTE_THRESHOLD 1800
+
+#define FAC_DSC_THRESHOLD 77
+#define FAC_ODD_THRESHOLD 24
+
+#define MATRIX22_STRASSEN_THRESHOLD 15
+#define HGCD_THRESHOLD 121
+#define HGCD_APPR_THRESHOLD 142
+#define HGCD_REDUCE_THRESHOLD 1679
+#define GCD_DC_THRESHOLD 389
+#define GCDEXT_DC_THRESHOLD 285
+#define JACOBI_BASE_METHOD 4
diff --git a/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm b/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm
new file mode 100644
index 0000000..04be963
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm
@@ -0,0 +1,66 @@
+dnl S/390-32 mpn_mul_1 for systems with MLR instruction
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 14
+C z990 9
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`v0', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ stm %r11, %r12, 44(%r15)
+ lhi %r12, 0 C zero index register
+ ahi %r12, 0 C clear carry flag
+ lhi %r11, 0 C clear carry limb
+
+L(top): l %r1, 0(%r12,up)
+ mlr %r0, v0
+ alcr %r1, %r11
+ lr %r11, %r0 C copy high part to carry limb
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct n, L(top)
+
+ lhi %r2, 0
+ alcr %r2, %r11
+
+ lm %r11, %r12, 44(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm b/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm
new file mode 100644
index 0000000..2c8138d
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm
@@ -0,0 +1,130 @@
+dnl S/390-32/esame mpn_mul_basecase.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 ?
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Perhaps add special case for un <= 2.
+C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped
+C up by about 10%.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`un', `%r4')
+define(`vp', `%r5')
+define(`vn', `%r6')
+
+define(`zero', `%r8')
+
+ASM_START()
+PROLOGUE(mpn_mul_basecase)
+ chi un, 2
+ jhe L(ge2)
+
+C un = vn = 1
+ l %r1, 0(vp)
+ ml %r0, 0(up)
+ st %r1, 0(rp)
+ st %r0, 4(rp)
+ br %r14
+
+L(ge2): C jne L(gen)
+
+
+L(gen):
+C mul_1 =======================================================================
+
+ stm %r6, %r12, 24(%r15)
+ lhi zero, 0
+ ahi un, -1
+
+ l %r7, 0(vp)
+ l %r11, 0(up)
+ lhi %r12, 4 C init index register
+ mlr %r10, %r7
+ lr %r9, un
+ st %r11, 0(rp)
+ cr %r15, %r15 C clear carry flag
+
+L(tm): l %r1, 0(%r12,up)
+ mlr %r0, %r7
+ alcr %r1, %r10
+ lr %r10, %r0 C copy high part to carry limb
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct %r9, L(tm)
+
+ alcr %r0, zero
+ st %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+ ahi vn, -1
+ je L(outer_end)
+L(outer_loop):
+
+ la rp, 4(rp) C rp += 1
+ la vp, 4(vp) C up += 1
+ l %r7, 0(vp)
+ l %r11, 0(up)
+ lhi %r12, 4 C init index register
+ mlr %r10, %r7
+ lr %r9, un
+ al %r11, 0(rp)
+ st %r11, 0(rp)
+
+L(tam): l %r1, 0(%r12,up)
+ l %r11, 0(%r12,rp)
+ mlr %r0, %r7
+ alcr %r1, %r11
+ alcr %r0, zero
+ alr %r1, %r10
+ lr %r10, %r0
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct %r9, L(tam)
+
+ alcr %r0, zero
+ st %r0, 0(%r12,rp)
+
+ brct vn, L(outer_loop)
+L(outer_end):
+
+ lm %r6, %r12, 24(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm b/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm
new file mode 100644
index 0000000..f45f87a
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm
@@ -0,0 +1,203 @@
+dnl S/390-32 mpn_sqr_basecase.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 23
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Clean up.
+C * Stop iterating addmul_1 loop at latest for n = 2, implement longer tail.
+C This will ask for basecase handling of n = 3.
+C * Update counters and pointers more straightforwardly, possibly lowering
+C register usage.
+C * Should we use this allocation-free style for more sqr_basecase asm
+C implementations? The only disadvantage is that it requires R != U.
+C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped
+C up by about 10%. The sqr_diag_addlsh1 loop could probably be sped up even
+C more.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+
+define(`zero', `%r8')
+define(`rp_saved', `%r9')
+define(`up_saved', `%r13')
+define(`n_saved', `%r14')
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+ ahi n, -2
+ jhe L(ge2)
+
+C n = 1
+ l %r5, 0(up)
+ mlr %r4, %r5
+ st %r5, 0(rp)
+ st %r4, 4(rp)
+ br %r14
+
+L(ge2): jne L(gen)
+
+C n = 2
+ stm %r6, %r8, 24(%r15)
+ lhi zero, 0
+
+ l %r5, 0(up)
+ mlr %r4, %r5 C u0 * u0
+ l %r1, 4(up)
+ mlr %r0, %r1 C u1 * u1
+ st %r5, 0(rp)
+
+ l %r7, 0(up)
+ ml %r6, 4(up) C u0 * u1
+ alr %r7, %r7
+ alcr %r6, %r6
+ alcr %r0, zero
+
+ alr %r4, %r7
+ alcr %r1, %r6
+ alcr %r0, zero
+ st %r4, 4(rp)
+ st %r1, 8(rp)
+ st %r0, 12(rp)
+
+ lm %r6, %r8, 24(%r15)
+ br %r14
+
+L(gen):
+C mul_1 =======================================================================
+
+ stm %r6, %r14, 24(%r15)
+ lhi zero, 0
+ lr up_saved, up
+ lr rp_saved, rp
+ lr n_saved, n
+
+ l %r6, 0(up)
+ l %r11, 4(up)
+ lhi %r12, 8 C init index register
+ mlr %r10, %r6
+ lr %r5, n
+ st %r11, 4(rp)
+ cr %r15, %r15 C clear carry flag
+
+L(tm): l %r1, 0(%r12,up)
+ mlr %r0, %r6
+ alcr %r1, %r10
+ lr %r10, %r0 C copy high part to carry limb
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct %r5, L(tm)
+
+ alcr %r0, zero
+ st %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+ ahi n, -1
+ je L(outer_end)
+L(outer_loop):
+
+ la rp, 8(rp) C rp += 2
+ la up, 4(up) C up += 1
+ l %r6, 0(up)
+ l %r11, 4(up)
+ lhi %r12, 8 C init index register
+ mlr %r10, %r6
+ lr %r5, n
+ al %r11, 4(rp)
+ st %r11, 4(rp)
+
+L(tam): l %r1, 0(%r12,up)
+ l %r7, 0(%r12,rp)
+ mlr %r0, %r6
+ alcr %r1, %r7
+ alcr %r0, zero
+ alr %r1, %r10
+ lr %r10, %r0
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct %r5, L(tam)
+
+ alcr %r0, zero
+ st %r0, 0(%r12,rp)
+
+ brct n, L(outer_loop)
+L(outer_end):
+
+ l %r6, 4(up)
+ l %r1, 8(up)
+ lr %r7, %r0 C Same as: l %r7, 12(,rp)
+ mlr %r0, %r6
+ alr %r1, %r7
+ alcr %r0, zero
+ st %r1, 12(rp)
+ st %r0, 16(rp)
+
+C sqr_diag_addlsh1 ============================================================
+
+define(`up', `up_saved')
+define(`rp', `rp_saved')
+ la n, 1(n_saved)
+
+ l %r1, 0(up)
+ mlr %r0, %r1
+ st %r1, 0(rp)
+C clr %r15, %r15 C clear carry (already clear per above)
+
+L(top): l %r11, 4(up)
+ la up, 4(up)
+ l %r6, 4(rp)
+ l %r7, 8(rp)
+ mlr %r10, %r11
+ alcr %r6, %r6
+ alcr %r7, %r7
+ alcr %r10, zero C propagate carry to high product limb
+ alr %r6, %r0
+ alcr %r7, %r11
+ stm %r6, %r7, 4(rp)
+ la rp, 8(rp)
+ lr %r0, %r10 C copy carry limb
+ brct n, L(top)
+
+ alcr %r0, zero
+ st %r0, 4(rp)
+
+ lm %r6, %r14, 24(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm b/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm
new file mode 100644
index 0000000..a71e57e
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm
@@ -0,0 +1,70 @@
+dnl S/390-32 mpn_submul_1 for systems with MLR instruction.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 20
+C z990 11
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`v0', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ stm %r9, %r12, 36(%r15)
+ lhi %r12, 0
+ slr %r11, %r11
+
+L(top): l %r1, 0(%r12, up)
+ l %r10, 0(%r12, rp)
+ mlr %r0, v0
+ slbr %r10, %r1
+ slbr %r9, %r9
+ slr %r0, %r9 C conditional incr
+ slr %r10, %r11
+ lr %r11, %r0
+ st %r10, 0(%r12, rp)
+ la %r12, 4(%r12)
+ brct %r4, L(top)
+
+ lr %r2, %r11
+ slbr %r9, %r9
+ slr %r2, %r9
+
+ lm %r9, %r12, 36(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/gmp-mparam.h b/gmp-6.3.0/mpn/s390_32/gmp-mparam.h
new file mode 100644
index 0000000..1aca74a
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/gmp-mparam.h
@@ -0,0 +1,138 @@
+/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 770 MHz IBM z900 running in 32-bit mode, using just traditional insns */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 5
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 5
+#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 15
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 30
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 19
+#define MUL_TOOM33_THRESHOLD 114
+#define MUL_TOOM44_THRESHOLD 166
+#define MUL_TOOM6H_THRESHOLD 226
+#define MUL_TOOM8H_THRESHOLD 333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 106
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 113
+
+#define SQR_BASECASE_THRESHOLD 7
+#define SQR_TOOM2_THRESHOLD 40
+#define SQR_TOOM3_THRESHOLD 126
+#define SQR_TOOM4_THRESHOLD 192
+#define SQR_TOOM6_THRESHOLD 246
+#define SQR_TOOM8_THRESHOLD 357
+
+#define MULMID_TOOM42_THRESHOLD 28
+
+#define MULMOD_BNM1_THRESHOLD 12
+#define SQRMOD_BNM1_THRESHOLD 18
+
+#define MUL_FFT_MODF_THRESHOLD 244 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 244, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 8, 5}, { 17, 6}, { 13, 7}, { 7, 6}, \
+ { 16, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 33, 8}, { 19, 7}, { 39, 8}, { 23, 7}, \
+ { 47, 8}, { 27, 9}, { 15, 8}, { 39, 9}, \
+ { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \
+ { 63, 9}, { 39, 8}, { 79, 9}, { 47,10}, \
+ { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \
+ { 143, 9}, { 79,10}, { 47,11}, { 2048,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 48
+#define MUL_FFT_THRESHOLD 2688
+
+#define SQR_FFT_MODF_THRESHOLD 216 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 216, 5}, { 7, 4}, { 15, 5}, { 17, 6}, \
+ { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \
+ { 20, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 25, 9}, \
+ { 7, 8}, { 15, 7}, { 33, 8}, { 19, 7}, \
+ { 39, 8}, { 23, 9}, { 15, 8}, { 39, 9}, \
+ { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \
+ { 63, 9}, { 39, 8}, { 79, 9}, { 47,10}, \
+ { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \
+ { 143, 9}, { 79,10}, { 47,11}, { 2048,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 44
+#define SQR_FFT_THRESHOLD 1856
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 61
+#define MULLO_MUL_N_THRESHOLD 5240
+
+#define DC_DIV_QR_THRESHOLD 70
+#define DC_DIVAPPR_Q_THRESHOLD 234
+#define DC_BDIV_QR_THRESHOLD 59
+#define DC_BDIV_Q_THRESHOLD 137
+
+#define INV_MULMOD_BNM1_THRESHOLD 36
+#define INV_NEWTON_THRESHOLD 327
+#define INV_APPR_THRESHOLD 268
+
+#define BINV_NEWTON_THRESHOLD 324
+#define REDC_1_TO_REDC_N_THRESHOLD 63
+
+#define MU_DIV_QR_THRESHOLD 1099
+#define MU_DIVAPPR_Q_THRESHOLD 1360
+#define MUPI_DIV_QR_THRESHOLD 138
+#define MU_BDIV_QR_THRESHOLD 889
+#define MU_BDIV_Q_THRESHOLD 1234
+
+#define MATRIX22_STRASSEN_THRESHOLD 18
+#define HGCD_THRESHOLD 167
+#define GCD_DC_THRESHOLD 518
+#define GCDEXT_DC_THRESHOLD 378
+#define JACOBI_BASE_METHOD 2
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 25
+#define SET_STR_DC_THRESHOLD 577
+#define SET_STR_PRECOMPUTE_THRESHOLD 1217
diff --git a/gmp-6.3.0/mpn/s390_32/logops_n.asm b/gmp-6.3.0/mpn/s390_32/logops_n.asm
new file mode 100644
index 0000000..1f2cd2a
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/logops_n.asm
@@ -0,0 +1,295 @@
+dnl S/390-32 logops.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb variant 1 variant 2 variant 3
+C rp!=up rp=up
+C z900 ? ? ? ?
+C z990 2.5 1 2.75 2.75
+C z9 ? ? ?
+C z10 ? ? ?
+C z196 ? ? ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`nn', `%r5')
+
+ifdef(`OPERATION_and_n',`
+ define(`func',`mpn_and_n')
+ define(`VARIANT_1')
+ define(`LOGOPC',`nc')
+ define(`LOGOP',`n')')
+ifdef(`OPERATION_andn_n',`
+ define(`func',`mpn_andn_n')
+ define(`VARIANT_2')
+ define(`LOGOP',`n')')
+ifdef(`OPERATION_nand_n',`
+ define(`func',`mpn_nand_n')
+ define(`VARIANT_3')
+ define(`LOGOP',`n')')
+ifdef(`OPERATION_ior_n',`
+ define(`func',`mpn_ior_n')
+ define(`VARIANT_1')
+ define(`LOGOPC',`oc')
+ define(`LOGOP',`o')')
+ifdef(`OPERATION_iorn_n',`
+ define(`func',`mpn_iorn_n')
+ define(`VARIANT_2')
+ define(`LOGOP',`o')')
+ifdef(`OPERATION_nior_n',`
+ define(`func',`mpn_nior_n')
+ define(`VARIANT_3')
+ define(`LOGOP',`o')')
+ifdef(`OPERATION_xor_n',`
+ define(`func',`mpn_xor_n')
+ define(`VARIANT_1')
+ define(`LOGOPC',`xc')
+ define(`LOGOP',`x')')
+ifdef(`OPERATION_xnor_n',`
+ define(`func',`mpn_xnor_n')
+ define(`VARIANT_2')
+ define(`LOGOP',`x')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+ifdef(`VARIANT_1',`
+ cr rp, up
+ jne L(normal)
+
+ sll nn, 2
+ ahi nn, -1
+ lr %r1, nn
+ srl %r1, 8
+ ltr %r1, %r1 C < 256 bytes to copy?
+ je L(1)
+
+L(tp): LOGOPC 0(256, rp), 0(vp)
+ la rp, 256(rp)
+ la vp, 256(vp)
+ brct %r1, L(tp)
+
+L(1): bras %r1, L(2) C make r1 point to mvc insn
+ LOGOPC 0(1, rp), 0(vp)
+L(2): ex nn, 0(%r1) C execute mvc with length ((nn-1) mod 256)+1
+L(rtn): br %r14
+
+
+L(normal):
+ stm %r6, %r8, 12(%r15)
+ ahi nn, 3
+ lhi %r7, 3
+ lr %r0, nn
+ srl %r0, 2
+ nr %r7, nn C nn mod 4
+ je L(b1)
+ chi %r7, 2
+ jl L(b2)
+ jne L(top)
+
+L(b3): lm %r5, %r7, 0(up)
+ la up, 12(up)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 4(vp)
+ LOGOP %r7, 8(vp)
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ la vp, 12(vp)
+ j L(mid)
+
+L(b1): l %r5, 0(up)
+ la up, 4(up)
+ LOGOP %r5, 0(vp)
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ la vp, 4(vp)
+ j L(mid)
+
+L(b2): lm %r5, %r6, 0(up)
+ la up, 8(up)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 4(vp)
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ la vp, 8(vp)
+ j L(mid)
+
+L(top): lm %r5, %r8, 0(up)
+ la up, 16(up)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 4(vp)
+ LOGOP %r7, 8(vp)
+ LOGOP %r8, 12(vp)
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+ la vp, 16(vp)
+L(mid): brct %r0, L(top)
+
+ lm %r6, %r8, 12(%r15)
+ br %r14
+')
+
+ifdef(`VARIANT_2',`
+ stm %r6, %r8, 12(%r15)
+ lhi %r1, -1
+
+ ahi nn, 3
+ lhi %r7, 3
+ lr %r0, nn
+ srl %r0, 2
+ nr %r7, nn C nn mod 4
+ je L(b1)
+ chi %r7, 2
+ jl L(b2)
+ jne L(top)
+
+L(b3): lm %r5, %r7, 0(vp)
+ la vp, 12(vp)
+ xr %r5, %r1
+ xr %r6, %r1
+ xr %r7, %r1
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ LOGOP %r7, 8(up)
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ la up, 12(up)
+ j L(mid)
+
+L(b1): l %r5, 0(vp)
+ la vp, 4(vp)
+ xr %r5, %r1
+ LOGOP %r5, 0(up)
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ la up, 4(up)
+ j L(mid)
+
+L(b2): lm %r5, %r6, 0(vp)
+ la vp, 8(vp)
+ xr %r5, %r1
+ xr %r6, %r1
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ la up, 8(up)
+ j L(mid)
+
+L(top): lm %r5, %r8, 0(vp)
+ la vp, 16(vp)
+ xr %r5, %r1
+ xr %r6, %r1
+ xr %r7, %r1
+ xr %r8, %r1
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ LOGOP %r7, 8(up)
+ LOGOP %r8, 12(up)
+ la up, 16(up)
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+L(mid): brct %r0, L(top)
+
+ lm %r6, %r8, 12(%r15)
+ br %r14
+')
+
+ifdef(`VARIANT_3',`
+ stm %r6, %r8, 12(%r15)
+ lhi %r1, -1
+
+ ahi nn, 3
+ lhi %r7, 3
+ lr %r0, nn
+ srl %r0, 2
+ nr %r7, nn C nn mod 4
+ je L(b1)
+ chi %r7, 2
+ jl L(b2)
+ jne L(top)
+
+L(b3): lm %r5, %r7, 0(vp)
+ la vp, 12(vp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ xr %r5, %r1
+ xr %r6, %r1
+ LOGOP %r7, 8(up)
+ xr %r7, %r1
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ la up, 12(up)
+ j L(mid)
+
+L(b1): l %r5, 0(vp)
+ la vp, 4(vp)
+ LOGOP %r5, 0(up)
+ xr %r5, %r1
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ la up, 4(up)
+ j L(mid)
+
+L(b2): lm %r5, %r6, 0(vp)
+ la vp, 8(vp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ xr %r5, %r1
+ xr %r6, %r1
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ la up, 8(up)
+ j L(mid)
+
+L(top): lm %r5, %r8, 0(vp)
+ la vp, 16(vp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ xr %r5, %r1
+ xr %r6, %r1
+ LOGOP %r7, 8(up)
+ LOGOP %r8, 12(up)
+ xr %r7, %r1
+ xr %r8, %r1
+ stm %r5, %r8, 0(rp)
+ la up, 16(up)
+ la rp, 16(rp)
+L(mid): brct %r0, L(top)
+
+ lm %r6, %r8, 12(%r15)
+ br %r14
+')
+
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/lshift.asm b/gmp-6.3.0/mpn/s390_32/lshift.asm
new file mode 100644
index 0000000..da7d76e
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/lshift.asm
@@ -0,0 +1,144 @@
+dnl S/390-32 mpn_lshift.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 6
+C z990 3
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C *
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`cnt', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ lr %r1, n
+ sll %r1, 2
+ stm %r6, %r12, 24(%r15)
+ la up, 0(%r1,up) C put up near end of U
+ la rp, 0(%r1,rp) C put rp near end of R
+ ahi up, -20
+ ahi rp, -16
+ lhi %r8, 32
+ sr %r8, cnt
+ l %r12, 16(up)
+ srl %r12, 0(%r8) C return value
+ lhi %r7, 3
+ nr %r7, n
+ srl n, 2
+ je L(b0)
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): l %r10, 16(up)
+ l %r11, 12(up)
+ l %r9, 8(up)
+ ahi up, -8
+ lr %r8, %r11
+ sldl %r10, 0(cnt)
+ sldl %r8, 0(cnt)
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ ahi rp, -8
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b2): l %r10, 16(up)
+ l %r11, 12(up)
+ ahi up, -4
+ sldl %r10, 0(cnt)
+ st %r10, 12(rp)
+ ahi rp, -4
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b1): ltr n, n
+ je L(end)
+ j L(top)
+
+L(b0): l %r10,16(up)
+ l %r8, 12(up)
+ l %r6, 8(up)
+ l %r0, 4(up)
+ ahi up, -12
+ lr %r11, %r8
+ lr %r9, %r6
+ lr %r7, %r0
+ sldl %r10,0(cnt)
+ sldl %r8, 0(cnt)
+ sldl %r6, 0(cnt)
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ st %r6, 4(rp)
+ ahi rp, -12
+ ahi n, -1
+ je L(end)
+
+ ALIGN(8)
+L(top): l %r10, 16(up)
+ l %r8, 12(up)
+ l %r6, 8(up)
+ l %r0, 4(up)
+ l %r1, 0(up)
+ lr %r11, %r8
+ lr %r9, %r6
+ lr %r7, %r0
+ ahi up, -16
+ sldl %r10, 0(cnt)
+ sldl %r8, 0(cnt)
+ sldl %r6, 0(cnt)
+ sldl %r0, 0(cnt)
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ st %r6, 4(rp)
+ st %r0, 0(rp)
+ ahi rp, -16
+ brct n, L(top)
+
+L(end): l %r10, 16(up)
+ sll %r10, 0(cnt)
+ st %r10, 12(rp)
+
+ lr %r2, %r12
+ lm %r6, %r12, 24(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/lshiftc.asm b/gmp-6.3.0/mpn/s390_32/lshiftc.asm
new file mode 100644
index 0000000..f601673
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/lshiftc.asm
@@ -0,0 +1,156 @@
+dnl S/390-32 mpn_lshiftc.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 7
+C z990 3.375
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C *
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`cnt', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ lr %r1, n
+ sll %r1, 2
+ stm %r6, %r13, 24(%r15)
+ la up, 0(%r1,up) C put up near end of U
+ la rp, 0(%r1,rp) C put rp near end of R
+ ahi up, -20
+ ahi rp, -16
+ lhi %r8, 32
+ sr %r8, cnt
+ l %r12, 16(up)
+ srl %r12, 0(%r8) C return value
+ lhi %r13, -1
+ lhi %r7, 3
+ nr %r7, n
+ srl n, 2
+ je L(b0)
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): l %r10, 16(up)
+ l %r11, 12(up)
+ l %r9, 8(up)
+ ahi up, -8
+ lr %r8, %r11
+ sldl %r10, 0(cnt)
+ sldl %r8, 0(cnt)
+ xr %r10, %r13
+ xr %r8, %r13
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ ahi rp, -8
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b2): l %r10, 16(up)
+ l %r11, 12(up)
+ ahi up, -4
+ sldl %r10, 0(cnt)
+ xr %r10, %r13
+ st %r10, 12(rp)
+ ahi rp, -4
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b1): ltr n, n
+ je L(end)
+ j L(top)
+
+L(b0): l %r10,16(up)
+ l %r8, 12(up)
+ l %r6, 8(up)
+ l %r0, 4(up)
+ ahi up, -12
+ lr %r11, %r8
+ lr %r9, %r6
+ lr %r7, %r0
+ sldl %r10,0(cnt)
+ sldl %r8, 0(cnt)
+ sldl %r6, 0(cnt)
+ xr %r10, %r13
+ xr %r8, %r13
+ xr %r6, %r13
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ st %r6, 4(rp)
+ ahi rp, -12
+ ahi n, -1
+ je L(end)
+
+ ALIGN(8)
+L(top): l %r10, 16(up)
+ l %r8, 12(up)
+ l %r6, 8(up)
+ l %r0, 4(up)
+ l %r1, 0(up)
+ lr %r11, %r8
+ lr %r9, %r6
+ lr %r7, %r0
+ ahi up, -16
+ sldl %r10, 0(cnt)
+ sldl %r8, 0(cnt)
+ sldl %r6, 0(cnt)
+ sldl %r0, 0(cnt)
+ xr %r10, %r13
+ xr %r8, %r13
+ xr %r6, %r13
+ xr %r0, %r13
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ st %r6, 4(rp)
+ st %r0, 0(rp)
+ ahi rp, -16
+ brct n, L(top)
+
+L(end): l %r10, 16(up)
+ sll %r10, 0(cnt)
+ xr %r10, %r13
+ st %r10, 12(rp)
+
+ lr %r2, %r12
+ lm %r6, %r13, 24(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/mul_1.asm b/gmp-6.3.0/mpn/s390_32/mul_1.asm
new file mode 100644
index 0000000..e3ad0c5
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/mul_1.asm
@@ -0,0 +1,85 @@
+dnl S/390 mpn_mul_1 -- Multiply a limb vector with a limb and store the
+dnl result in a second limb vector.
+
+dnl Copyright 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(`rp',2)
+define(`up',3)
+define(`n',4)
+define(`vlimb',5)
+define(`cylimb',7)
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ stm 6,7,24(15)
+ slr cylimb,cylimb # clear cylimb
+ ltr vlimb,vlimb
+ jnl .Loopp
+
+.Loopn: l 1,0(up) # load from u
+ lr 6,1 #
+ mr 0,vlimb # multiply signed
+ alr 0,6 # add vlimb to phi
+ sra 6,31 # make mask
+ nr 6,vlimb # 0 or vlimb
+ alr 0,6 # conditionally add vlimb to phi
+ alr 1,cylimb # add carry limb to plo
+ brc 8+4,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ lr cylimb,0 # new cylimb
+ st 1,0(rp) # store
+ la up,4(,up)
+ la rp,4(,rp)
+ brct n,.Loopn
+
+ lr 2,cylimb
+ lm 6,7,24(15)
+ br 14
+
+.Loopp: l 1,0(up) # load from u
+ lr 6,1 #
+ mr 0,vlimb # multiply signed
+ sra 6,31 # make mask
+ nr 6,vlimb # 0 or vlimb
+ alr 0,6 # conditionally add vlimb to phi
+ alr 1,cylimb # add carry limb to plo
+ brc 8+4,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ lr cylimb,0 # new cylimb
+ st 1,0(rp) # store
+ la up,4(,up)
+ la rp,4(,rp)
+ brct n,.Loopp
+
+ lr 2,cylimb
+ lm 6,7,24(15)
+ br 14
+EPILOGUE(mpn_mul_1)
diff --git a/gmp-6.3.0/mpn/s390_32/rshift.asm b/gmp-6.3.0/mpn/s390_32/rshift.asm
new file mode 100644
index 0000000..5f2cf37
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/rshift.asm
@@ -0,0 +1,138 @@
+dnl S/390-32 mpn_rshift.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 6
+C z990 3
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C *
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`cnt', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ stm %r6, %r12, 24(%r15)
+ lhi %r8, 32
+ sr %r8, cnt
+ l %r12, 0(up)
+ sll %r12, 0(%r8) C return value
+ lhi %r7, 3
+ nr %r7, n
+ srl n, 2
+ je L(b0)
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): l %r11, 0(up)
+ l %r10, 4(up)
+ l %r8, 8(up)
+ ahi up, 8
+ lr %r9, %r10
+ srdl %r10, 0(cnt)
+ srdl %r8, 0(cnt)
+ st %r11, 0(rp)
+ st %r9, 4(rp)
+ ahi rp, 8
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b2): l %r11, 0(up)
+ l %r10, 4(up)
+ ahi up, 4
+ srdl %r10, 0(cnt)
+ st %r11, 0(rp)
+ ahi rp, 4
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b1): ltr n, n
+ je L(end)
+ j L(top)
+
+L(b0): l %r11, 0(up)
+ l %r9, 4(up)
+ l %r7, 8(up)
+ l %r1, 12(up)
+ ahi up, 12
+ lr %r10, %r9
+ lr %r8, %r7
+ lr %r6, %r1
+ srdl %r10, 0(cnt)
+ srdl %r8, 0(cnt)
+ srdl %r6, 0(cnt)
+ st %r11, 0(rp)
+ st %r9, 4(rp)
+ st %r7, 8(rp)
+ ahi rp, 12
+ ahi n, -1
+ je L(end)
+
+ ALIGN(8)
+L(top): l %r11, 0(up)
+ l %r9, 4(up)
+ l %r7, 8(up)
+ l %r1, 12(up)
+ l %r0, 16(up)
+ lr %r10, %r9
+ lr %r8, %r7
+ lr %r6, %r1
+ ahi up, 16
+ srdl %r10, 0(cnt)
+ srdl %r8, 0(cnt)
+ srdl %r6, 0(cnt)
+ srdl %r0, 0(cnt)
+ st %r11, 0(rp)
+ st %r9, 4(rp)
+ st %r7, 8(rp)
+ st %r1, 12(rp)
+ ahi rp, 16
+ brct n, L(top)
+
+L(end): l %r11, 0(up)
+ srl %r11, 0(cnt)
+ st %r11, 0(rp)
+
+ lr %r2, %r12
+ lm %r6, %r12, 24(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/sec_tabselect.asm b/gmp-6.3.0/mpn/s390_32/sec_tabselect.asm
new file mode 100644
index 0000000..c8aa25e
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/sec_tabselect.asm
@@ -0,0 +1,140 @@
+dnl S/390-64 mpn_sec_tabselect
+
+dnl Copyright 2021 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 ?
+C z9 ?
+C z10 ?
+C z196 ?
+C z13 ?
+C z14 ?
+C z15 ?
+
+dnl void
+dnl mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab,
+dnl mp_size_t n, mp_size_t nents, mp_size_t which)
+
+define(`rp', `%r2')
+define(`tp', `%r3')
+define(`n', `%r4')
+define(`nents', `%r5')
+define(`which_arg',`%r6') C magicked to stack
+
+dnl r0 r1 r2 r3 r4 r5 r6 r7
+dnl r8 r9 r10 r11 r12 r13 r14 r15
+
+define(`mask', `%r14')
+define(`k', `%r1')
+define(`which', `%r0')
+
+define(`FRAME', 32)
+
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+ stm %r5, %r15, 20(%r15)
+ ahi %r15, -FRAME
+
+ sll n, 2
+ msr %r5, n
+ st %r5, 16(%r15) C nents * n * LIMB_BYTES
+
+ lr %r5, n
+ srl %r5, 2+2
+ nr %r5, %r5
+ je L(end4)
+L(outer):
+ l which, eval(24+FRAME)(%r15)
+ l k, eval(20+FRAME)(%r15) C nents
+ lhi %r6, 0
+ lhi %r7, 0
+ lhi %r8, 0
+ lhi %r9, 0
+L(tp4): lhi mask, 1
+ slr which, mask
+ slbr mask, mask
+ lm %r10, %r13, 0(tp)
+ nr %r10, mask
+ nr %r11, mask
+ nr %r12, mask
+ nr %r13, mask
+ ar %r6, %r10
+ ar %r7, %r11
+ ar %r8, %r12
+ ar %r9, %r13
+ ar tp, n
+ brct k, L(tp4)
+ stm %r6, %r9, 0(rp)
+ ahi rp, 16
+ sl tp, 16(%r15)
+ ahi tp, eval(4*4)
+ brct %r5, L(outer)
+L(end4):
+ tmll n, 8
+ je L(end2)
+ l which, eval(24+FRAME)(%r15)
+ l k, eval(20+FRAME)(%r15) C nents
+ lhi %r6, 0
+ lhi %r7, 0
+L(tp2): lhi mask, 1
+ slr which, mask
+ slbr mask, mask
+ lm %r10, %r11, 0(tp)
+ nr %r10, mask
+ nr %r11, mask
+ ar %r6, %r10
+ ar %r7, %r11
+ ar tp, n
+ brct k, L(tp2)
+ stm %r6, %r7, 0(rp)
+ ahi rp, 8
+ sl tp, 16(%r15)
+ ahi tp, eval(2*4)
+L(end2):
+ tmll n, 4
+ je L(end1)
+ l which, eval(24+FRAME)(%r15)
+ l k, eval(20+FRAME)(%r15) C nents
+ lhi %r6, 0
+L(tp1): lhi mask, 1
+ slr which, mask
+ slbr mask, mask
+ l %r10, 0(tp)
+ nr %r10, mask
+ ar %r6, %r10
+ ar tp, n
+ brct k, L(tp1)
+ st %r6, 0(rp)
+L(end1):
+ lm %r5, %r15, eval(20+FRAME)(%r15)
+ br %r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/s390_32/submul_1.asm b/gmp-6.3.0/mpn/s390_32/submul_1.asm
new file mode 100644
index 0000000..da7d849
--- /dev/null
+++ b/gmp-6.3.0/mpn/s390_32/submul_1.asm
@@ -0,0 +1,93 @@
+dnl S/390 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
+dnl result from a second limb vector.
+
+dnl Copyright 2001 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(`rp',2)
+define(`up',3)
+define(`n',4)
+define(`vlimb',5)
+define(`cylimb',7)
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ stm 6,7,24(15)
+ slr cylimb,cylimb # clear cylimb
+ ltr vlimb,vlimb
+ jnl .Loopp
+
+.Loopn: l 1,0(up) # load from u
+ lr 6,1 #
+ mr 0,vlimb # multiply signed
+ alr 0,6 # add vlimb to phi
+ sra 6,31 # make mask
+ nr 6,vlimb # 0 or vlimb
+ alr 0,6 # conditionally add vlimb to phi
+ alr 1,cylimb # add carry limb to plo
+ brc 8+4,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ l 6,0(rp) # load r limb
+ slr 6,1 # add u limb to plo
+ brc 2+1,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ lr cylimb,0 # new cylimb
+ st 6,0(rp) # store
+ la up,4(,up)
+ la rp,4(,rp)
+ brct n,.Loopn
+
+ lr 2,cylimb
+ lm 6,7,24(15)
+ br 14
+
+.Loopp: l 1,0(up) # load from u
+ lr 6,1 #
+ mr 0,vlimb # multiply signed
+ sra 6,31 # make mask
+ nr 6,vlimb # 0 or vlimb
+ alr 0,6 # conditionally add vlimb to phi
+ alr 1,cylimb # add carry limb to plo
+ brc 8+4,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ l 6,0(rp) # load r limb
+ slr 6,1 # add u limb to plo
+ brc 2+1,+8 # branch if not carry
+ ahi 0,1 # increment phi
+ lr cylimb,0 # new cylimb
+ st 6,0(rp) # store
+ la up,4(,up)
+ la rp,4(,rp)
+ brct n,.Loopp
+
+ lr 2,cylimb
+ lm 6,7,24(15)
+ br 14
+EPILOGUE(mpn_submul_1)