From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/s390_32/README | 37 ++++ gmp-6.3.0/mpn/s390_32/addmul_1.asm | 93 +++++++++ gmp-6.3.0/mpn/s390_32/copyd.asm | 145 +++++++++++++ gmp-6.3.0/mpn/s390_32/copyi.asm | 69 +++++++ gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm | 72 +++++++ gmp-6.3.0/mpn/s390_32/esame/aors_n.asm | 137 +++++++++++++ gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm | 173 ++++++++++++++++ gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm | 65 ++++++ gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h | 177 ++++++++++++++++ gmp-6.3.0/mpn/s390_32/esame/mul_1.asm | 66 ++++++ gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm | 130 ++++++++++++ gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm | 203 ++++++++++++++++++ gmp-6.3.0/mpn/s390_32/esame/submul_1.asm | 70 +++++++ gmp-6.3.0/mpn/s390_32/gmp-mparam.h | 138 +++++++++++++ gmp-6.3.0/mpn/s390_32/logops_n.asm | 295 +++++++++++++++++++++++++++ gmp-6.3.0/mpn/s390_32/lshift.asm | 144 +++++++++++++ gmp-6.3.0/mpn/s390_32/lshiftc.asm | 156 ++++++++++++++ gmp-6.3.0/mpn/s390_32/mul_1.asm | 85 ++++++++ gmp-6.3.0/mpn/s390_32/rshift.asm | 138 +++++++++++++ gmp-6.3.0/mpn/s390_32/sec_tabselect.asm | 140 +++++++++++++ gmp-6.3.0/mpn/s390_32/submul_1.asm | 93 +++++++++ 21 files changed, 2626 insertions(+) create mode 100644 gmp-6.3.0/mpn/s390_32/README create mode 100644 gmp-6.3.0/mpn/s390_32/addmul_1.asm create mode 100644 gmp-6.3.0/mpn/s390_32/copyd.asm create mode 100644 gmp-6.3.0/mpn/s390_32/copyi.asm create mode 100644 gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm create mode 100644 gmp-6.3.0/mpn/s390_32/esame/aors_n.asm create mode 100644 gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm create mode 100644 gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm create mode 100644 gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h create mode 100644 gmp-6.3.0/mpn/s390_32/esame/mul_1.asm create mode 100644 gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm create mode 100644 gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm create mode 100644 gmp-6.3.0/mpn/s390_32/esame/submul_1.asm create mode 100644 gmp-6.3.0/mpn/s390_32/gmp-mparam.h create mode 100644 gmp-6.3.0/mpn/s390_32/logops_n.asm create mode 100644 gmp-6.3.0/mpn/s390_32/lshift.asm create mode 100644 gmp-6.3.0/mpn/s390_32/lshiftc.asm create mode 100644 gmp-6.3.0/mpn/s390_32/mul_1.asm create mode 100644 gmp-6.3.0/mpn/s390_32/rshift.asm create mode 100644 gmp-6.3.0/mpn/s390_32/sec_tabselect.asm create mode 100644 gmp-6.3.0/mpn/s390_32/submul_1.asm (limited to 'gmp-6.3.0/mpn/s390_32') diff --git a/gmp-6.3.0/mpn/s390_32/README b/gmp-6.3.0/mpn/s390_32/README new file mode 100644 index 0000000..59519ba --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/README @@ -0,0 +1,37 @@ +All current (2001) S/390 and z/Architecture machines are single-issue, +but some newer machines have a deep pipeline. Software-pipelining is +therefore beneficial. + +* mpn_add_n, mpn_sub_n: Use code along the lines below. Two-way unrolling + would be adequate. + + mp_limb_t + mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) + { + mp_limb_t a, b, r, cy; + mp_size_t i; + mp_limb_t mm = -1; + + cy = 0; + up += n; + vp += n; + rp += n; + i = -n; + do + { + a = up[i]; + b = vp[i]; + r = a + b + cy; + rp[i] = r; + cy = (((a & b) | ((a | b) & (r ^ mm)))) >> 31; + i++; + } + while (i < 0); + return cy; + } + +* mpn_lshift, mpn_rshift: Use SLDL/SRDL, and two-way unrolling. + +* mpn_mul_1, mpn_addmul_1, mpn_submul_1: For machines with just signed + multiply (MR), use two loops, similar to the corresponding VAX or + POWER functions. Handle carry like for mpn_add_n. diff --git a/gmp-6.3.0/mpn/s390_32/addmul_1.asm b/gmp-6.3.0/mpn/s390_32/addmul_1.asm new file mode 100644 index 0000000..97189a8 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/addmul_1.asm @@ -0,0 +1,93 @@ +dnl S/390 mpn_addmul_1 -- Multiply a limb vector with a limb and add the +dnl result to a second limb vector. + +dnl Copyright 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +define(`rp',2) +define(`up',3) +define(`n',4) +define(`vlimb',5) +define(`cylimb',7) + +ASM_START() +PROLOGUE(mpn_addmul_1) + stm 6,7,24(15) + slr cylimb,cylimb # clear cylimb + ltr vlimb,vlimb + jnl .Loopp + +.Loopn: l 1,0(up) # load from u + lr 6,1 # + mr 0,vlimb # multiply signed + alr 0,6 # add vlimb to phi + sra 6,31 # make mask + nr 6,vlimb # 0 or vlimb + alr 0,6 # conditionally add vlimb to phi + alr 1,cylimb # add carry limb to plo + brc 8+4,+8 # branch if not carry + ahi 0,1 # increment phi + l 6,0(rp) # load r limb + alr 6,1 # add u limb to plo + brc 8+4,+8 # branch if not carry + ahi 0,1 # increment phi + lr cylimb,0 # new cylimb + st 6,0(rp) # store + la up,4(,up) + la rp,4(,rp) + brct n,.Loopn + + lr 2,cylimb + lm 6,7,24(15) + br 14 + +.Loopp: l 1,0(up) # load from u + lr 6,1 # + mr 0,vlimb # multiply signed + sra 6,31 # make mask + nr 6,vlimb # 0 or vlimb + alr 0,6 # conditionally add vlimb to phi + alr 1,cylimb # add carry limb to plo + brc 8+4,+8 # branch if not carry + ahi 0,1 # increment phi + l 6,0(rp) # load r limb + alr 6,1 # add u limb to plo + brc 8+4,+8 # branch if not carry + ahi 0,1 # increment phi + lr cylimb,0 # new cylimb + st 6,0(rp) # store + la up,4(,up) + la rp,4(,rp) + brct n,.Loopp + + lr 2,cylimb + lm 6,7,24(15) + br 14 +EPILOGUE(mpn_addmul_1) diff --git a/gmp-6.3.0/mpn/s390_32/copyd.asm b/gmp-6.3.0/mpn/s390_32/copyd.asm new file mode 100644 index 0000000..ff252bc --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/copyd.asm @@ -0,0 +1,145 @@ +dnl S/390-32 mpn_copyd + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +include(`../config.m4') + +C cycles/limb +C cycles/limb +C z900 1.65 +C z990 1.125 +C z9 ? +C z10 ? +C z196 ? + +C FIXME: +C * Avoid saving/restoring callee-saves registers for n < 3. This could be +C done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs. +C We could then use r3...r10 in main loop. + +C INPUT PARAMETERS +define(`rp_param', `%r2') +define(`up_param', `%r3') +define(`n', `%r4') + +define(`rp', `%r8') +define(`up', `%r9') + +ASM_START() +PROLOGUE(mpn_copyd) + stm %r6, %r11, 24(%r15) + + lr %r1, n + sll %r1, 2 + la %r10, 8(n) + ahi %r1, -32 + srl %r10, 3 + lhi %r11, -32 + + la rp, 0(%r1,rp_param) C FIXME use lay on z990 and later + la up, 0(%r1,up_param) C FIXME use lay on z990 and later + + lhi %r7, 7 + nr %r7, n C n mod 8 + chi %r7, 2 + jh L(b34567) + chi %r7, 1 + je L(b1) + jh L(b2) + +L(b0): brct %r10, L(top) + j L(end) + +L(b1): l %r0, 28(up) + ahi up, -4 + st %r0, 28(rp) + ahi rp, -4 + brct %r10, L(top) + j L(end) + +L(b2): lm %r0, %r1, 24(up) + ahi up, -8 + stm %r0, %r1, 24(rp) + ahi rp, -8 + brct %r10, L(top) + j L(end) + +L(b34567): + chi %r7, 4 + jl L(b3) + je L(b4) + chi %r7, 6 + je L(b6) + jh L(b7) + +L(b5): lm %r0, %r4, 12(up) + ahi up, -20 + stm %r0, %r4, 12(rp) + ahi rp, -20 + brct %r10, L(top) + j L(end) + +L(b3): lm %r0, %r2, 20(up) + ahi up, -12 + stm %r0, %r2, 20(rp) + ahi rp, -12 + brct %r10, L(top) + j L(end) + +L(b4): lm %r0, %r3, 16(up) + ahi up, -16 + stm %r0, %r3, 16(rp) + ahi rp, -16 + brct %r10, L(top) + j L(end) + +L(b6): lm %r0, %r5, 8(up) + ahi up, -24 + stm %r0, %r5, 8(rp) + ahi rp, -24 + brct %r10, L(top) + j L(end) + +L(b7): lm %r0, %r6, 4(up) + ahi up, -28 + stm %r0, %r6, 4(rp) + ahi rp, -28 + brct %r10, L(top) + j L(end) + +L(top): lm %r0, %r7, 0(up) + la up, 0(%r11,up) + stm %r0, %r7, 0(rp) + la rp, 0(%r11,rp) + brct %r10, L(top) + +L(end): lm %r6, %r11, 24(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/copyi.asm b/gmp-6.3.0/mpn/s390_32/copyi.asm new file mode 100644 index 0000000..1df32f1 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/copyi.asm @@ -0,0 +1,69 @@ +dnl S/390-32 mpn_copyi + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +include(`../config.m4') + +C cycles/limb +C z900 0.75 +C z990 0.375 +C z9 ? +C z10 ? +C z196 ? + +C NOTE +C * This is based on GNU libc memcpy which was written by Martin Schwidefsky. + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') + +ASM_START() +PROLOGUE(mpn_copyi) + ltr %r4, %r4 + sll %r4, 2 + je L(rtn) + ahi %r4, -1 + lr %r5, %r4 + srl %r5, 8 + ltr %r5, %r5 C < 256 bytes to copy? + je L(1) + +L(top): mvc 0(256, rp), 0(up) + la rp, 256(rp) + la up, 256(up) + brct %r5, L(top) + +L(1): bras %r5, L(2) C make r5 point to mvc insn + mvc 0(1, rp), 0(up) +L(2): ex %r4, 0(%r5) C execute mvc with length ((n-1) mod 256)+1 +L(rtn): br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm b/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm new file mode 100644 index 0000000..4375b74 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/esame/addmul_1.asm @@ -0,0 +1,72 @@ +dnl S/390-32 mpn_addmul_1 for systems with MLR instruction + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 18.5 +C z990 10 +C z9 ? +C z10 ? +C z196 ? + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`v0', `%r5') + +define(`z', `%r9') + +ASM_START() +PROLOGUE(mpn_addmul_1) + stm %r9, %r12, 36(%r15) + lhi %r12, 0 C zero index register + ahi %r12, 0 C clear carry fla + lhi %r11, 0 C clear carry limb + lhi z, 0 C clear carry limb + +L(top): l %r1, 0(%r12,up) + l %r10, 0(%r12,rp) + mlr %r0, v0 + alcr %r1, %r10 + alcr %r0, z + alr %r1, %r11 + lr %r11, %r0 + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct n, L(top) + + lhi %r2, 0 + alcr %r2, %r11 + + lm %r9, %r12, 36(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm b/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm new file mode 100644 index 0000000..98b0dbc --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/esame/aors_n.asm @@ -0,0 +1,137 @@ +dnl S/390-32 mpn_add_n and mpn_sub_n. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 ? +C z990 2.75-3 (fast for even n, slow for odd n) +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * Optimise for small n +C * Use r0 and save/restore one less register +C * Using logops_n's v1 inner loop operand order make the loop about 20% +C faster, at the expense of highly alignment-dependent performance. + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`vp', `%r4') +define(`n', `%r5') + +ifdef(`OPERATION_add_n', ` + define(ADSB, al) + define(ADSBCR, alcr) + define(ADSBC, alc) + define(RETVAL,`dnl + lhi %r2, 0 + alcr %r2, %r2') + define(func, mpn_add_n) + define(func_nc, mpn_add_nc)') +ifdef(`OPERATION_sub_n', ` + define(ADSB, sl) + define(ADSBCR, slbr) + define(ADSBC, slb) + define(RETVAL,`dnl + slbr %r2, %r2 + lcr %r2, %r2') + define(func, mpn_sub_n) + define(func_nc, mpn_sub_nc)') + +MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n) + +ASM_START() +PROLOGUE(func) + stm %r6, %r8, 24(%r15) + + ahi n, 3 + lhi %r7, 3 + lr %r1, n + srl %r1, 2 + nr %r7, n C n mod 4 + je L(b1) + chi %r7, 2 + jl L(b2) + jne L(b0) + +L(b3): lm %r5, %r7, 0(up) + la up, 12(up) + ADSB %r5, 0(vp) + ADSBC %r6, 4(vp) + ADSBC %r7, 8(vp) + la vp, 12(vp) + stm %r5, %r7, 0(rp) + la rp, 12(rp) + brct %r1, L(top) + j L(end) + +L(b0): lm %r5, %r8, 0(up) C This redundant insns is no mistake, + la up, 16(up) C it is needed to make main loop run + ADSB %r5, 0(vp) C fast for n = 0 (mod 4). + ADSBC %r6, 4(vp) + j L(m0) + +L(b1): l %r5, 0(up) + la up, 4(up) + ADSB %r5, 0(vp) + la vp, 4(vp) + st %r5, 0(rp) + la rp, 4(rp) + brct %r1, L(top) + j L(end) + +L(b2): lm %r5, %r6, 0(up) + la up, 8(up) + ADSB %r5, 0(vp) + ADSBC %r6, 4(vp) + la vp, 8(vp) + stm %r5, %r6, 0(rp) + la rp, 8(rp) + brct %r1, L(top) + j L(end) + +L(top): lm %r5, %r8, 0(up) + la up, 16(up) + ADSBC %r5, 0(vp) + ADSBC %r6, 4(vp) +L(m0): ADSBC %r7, 8(vp) + ADSBC %r8, 12(vp) + la vp, 16(vp) + stm %r5, %r8, 0(rp) + la rp, 16(rp) + brct %r1, L(top) + +L(end): RETVAL + lm %r6, %r8, 24(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm b/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm new file mode 100644 index 0000000..f2b222b --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/esame/aorslsh1_n.asm @@ -0,0 +1,173 @@ +dnl S/390-32 mpn_addlsh1_n + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 9.25 +C z990 5 +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * Optimise for small n +C * Compute RETVAL for sublsh1_n less stupidly + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`vp', `%r4') +define(`n', `%r5') + +ifdef(`OPERATION_addlsh1_n',` + define(ADDSUBC, alr) + define(ADDSUBE, alcr) + define(INITCY, `lhi %r13, -1') + define(RETVAL, `alr %r1, %r13 + lhi %r2, 2 + alr %r2, %r1') + define(func, mpn_addlsh1_n) +') +ifdef(`OPERATION_sublsh1_n',` + define(ADDSUBC, slr) + define(ADDSUBE, slbr) + define(INITCY, `lhi %r13, 0') + define(RETVAL, `slr %r1, %r13 + lhi %r2, 1 + alr %r2, %r1') + define(func, mpn_sublsh1_n) +') + +MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) + +ASM_START() +PROLOGUE(func) + stm %r6, %r13, 24(%r15) + + la %r0, 3(n) + lhi %r7, 3 + srl %r0, 2 + nr %r7, n C n mod 4 + je L(b0) + chi %r7, 2 + jl L(b1) + je L(b2) + +L(b3): lm %r5, %r7, 0(up) + la up, 12(up) + lm %r9, %r11, 0(vp) + la vp, 12(vp) + + alr %r9, %r9 + alcr %r10, %r10 + alcr %r11, %r11 + slbr %r1, %r1 + + ADDSUBC %r5, %r9 + ADDSUBE %r6, %r10 + ADDSUBE %r7, %r11 + slbr %r13, %r13 + + stm %r5, %r7, 0(rp) + la rp, 12(rp) + brct %r0, L(top) + j L(end) + +L(b0): lhi %r1, -1 + INITCY + j L(top) + +L(b1): l %r5, 0(up) + la up, 4(up) + l %r9, 0(vp) + la vp, 4(vp) + + alr %r9, %r9 + slbr %r1, %r1 + ADDSUBC %r5, %r9 + slbr %r13, %r13 + + st %r5, 0(rp) + la rp, 4(rp) + brct %r0, L(top) + j L(end) + +L(b2): lm %r5, %r6, 0(up) + la up, 8(up) + lm %r9, %r10, 0(vp) + la vp, 8(vp) + + alr %r9, %r9 + alcr %r10, %r10 + slbr %r1, %r1 + + ADDSUBC %r5, %r9 + ADDSUBE %r6, %r10 + slbr %r13, %r13 + + stm %r5, %r6, 0(rp) + la rp, 8(rp) + brct %r0, L(top) + j L(end) + +L(top): lm %r9, %r12, 0(vp) + la vp, 16(vp) + + ahi %r1, 1 C restore carry + + alcr %r9, %r9 + alcr %r10, %r10 + alcr %r11, %r11 + alcr %r12, %r12 + + slbr %r1, %r1 C save carry + + lm %r5, %r8, 0(up) + la up, 16(up) + + ahi %r13, 1 C restore carry + + ADDSUBE %r5, %r9 + ADDSUBE %r6, %r10 + ADDSUBE %r7, %r11 + ADDSUBE %r8, %r12 + + slbr %r13, %r13 + + stm %r5, %r8, 0(rp) + la rp, 16(rp) + brct %r0, L(top) + +L(end): + RETVAL + lm %r6, %r13, 24(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm b/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm new file mode 100644 index 0000000..568a2a4 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/esame/bdiv_dbm1c.asm @@ -0,0 +1,65 @@ +dnl S/390-32 mpn_bdiv_dbm1c for systems with MLR instruction. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 14 +C z990 10 +C z9 ? +C z10 ? +C z196 ? + +C INPUT PARAMETERS +define(`qp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`bd', `%r5') +define(`cy', `%r6') + +ASM_START() + TEXT + ALIGN(16) +PROLOGUE(mpn_bdiv_dbm1c) + stm %r6, %r7, 24(%r15) + lhi %r7, 0 C zero index register + +L(top): l %r1, 0(%r7,up) + mlr %r0, bd + slr %r6, %r1 + st %r6, 0(%r7,qp) + slbr %r6, %r0 + la %r7, 4(%r7) + brct n, L(top) + + lr %r2, %r6 + lm %r6, %r7, 24(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h b/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h new file mode 100644 index 0000000..c0e5046 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/esame/gmp-mparam.h @@ -0,0 +1,177 @@ +/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 1991, 1993, 1994, 2000-2008-2011, 2014 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#define GMP_LIMB_BITS 32 +#define GMP_LIMB_BYTES 4 + +/* 4400 MHz IBM z196 running in 32-bit mode */ +/* FFT tuning limit = 0.5M */ +/* Generated by tuneup.c, 2017-01-02, gcc 4.9 */ + +#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1_1P_METHOD 2 +#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1N_TO_MOD_1_1_THRESHOLD 45 +#define MOD_1U_TO_MOD_1_1_THRESHOLD 18 +#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */ +#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 3 +#define USE_PREINV_DIVREM_1 0 +#define DIV_QR_1N_PI1_METHOD 1 +#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVEXACT_1_THRESHOLD 6 +#define BMOD_1_TO_MOD_1_THRESHOLD 0 /* always */ + +#define DIV_1_VS_MUL_1_PERCENT 320 + +#define MUL_TOOM22_THRESHOLD 12 +#define MUL_TOOM33_THRESHOLD 81 +#define MUL_TOOM44_THRESHOLD 130 +#define MUL_TOOM6H_THRESHOLD 173 +#define MUL_TOOM8H_THRESHOLD 260 + +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 83 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 86 +#define MUL_TOOM43_TO_TOOM54_THRESHOLD 112 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_TOOM2_THRESHOLD 18 +#define SQR_TOOM3_THRESHOLD 69 +#define SQR_TOOM4_THRESHOLD 178 +#define SQR_TOOM6_THRESHOLD 254 +#define SQR_TOOM8_THRESHOLD 406 + +#define MULMID_TOOM42_THRESHOLD 30 + +#define MULMOD_BNM1_THRESHOLD 12 +#define SQRMOD_BNM1_THRESHOLD 7 + +#define MUL_FFT_MODF_THRESHOLD 276 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 276, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \ + { 9, 5}, { 19, 6}, { 13, 7}, { 7, 6}, \ + { 17, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \ + { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \ + { 11, 7}, { 25, 8}, { 15, 7}, { 31, 8}, \ + { 19, 7}, { 39, 8}, { 23, 9}, { 15, 8}, \ + { 39, 9}, { 23,10}, { 15, 9}, { 31, 8}, \ + { 67, 9}, { 39, 8}, { 79, 9}, { 47,10}, \ + { 31, 9}, { 71, 8}, { 143, 9}, { 79,10}, \ + { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \ + { 255, 7}, { 511, 9}, { 143,10}, { 79, 9}, \ + { 159, 8}, { 319, 9}, { 175, 8}, { 351,10}, \ + { 95, 9}, { 191, 8}, { 383,11}, { 63,10}, \ + { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \ + { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \ + { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \ + { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \ + { 511,10}, { 271, 9}, { 543, 8}, { 1087,10}, \ + { 287, 9}, { 575,11}, { 159,10}, { 351, 9}, \ + { 703, 8}, { 1407,11}, { 191,10}, { 415, 9}, \ + { 831,11}, { 223,10}, { 479, 9}, { 959, 8}, \ + { 1919,12}, { 4096,13}, { 8192,14}, { 16384,15}, \ + { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 89 +#define MUL_FFT_THRESHOLD 2688 + +#define SQR_FFT_MODF_THRESHOLD 240 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 240, 5}, { 17, 6}, { 17, 7}, { 9, 6}, \ + { 19, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \ + { 7, 7}, { 19, 8}, { 11, 7}, { 25, 8}, \ + { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \ + { 23, 9}, { 15, 8}, { 39, 9}, { 23,10}, \ + { 15, 9}, { 31, 8}, { 63, 9}, { 47,10}, \ + { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \ + { 143,10}, { 47,11}, { 31,10}, { 63, 9}, \ + { 127, 8}, { 255, 7}, { 511, 9}, { 143,10}, \ + { 79, 9}, { 159, 8}, { 319, 9}, { 175, 8}, \ + { 351, 7}, { 703,10}, { 95, 9}, { 191, 8}, \ + { 383, 9}, { 207, 8}, { 415,11}, { 63,10}, \ + { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \ + { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \ + { 175, 9}, { 351, 8}, { 703, 7}, { 1407,11}, \ + { 95,10}, { 191, 9}, { 383,10}, { 207, 9}, \ + { 415,12}, { 63,11}, { 127,10}, { 255, 9}, \ + { 511,10}, { 287, 9}, { 575,11}, { 159,10}, \ + { 351, 9}, { 703, 8}, { 1407,11}, { 191,10}, \ + { 415, 9}, { 831,11}, { 223,10}, { 479,12}, \ + { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 84 +#define SQR_FFT_THRESHOLD 1856 + +#define MULLO_BASECASE_THRESHOLD 0 /* always */ +#define MULLO_DC_THRESHOLD 27 +#define MULLO_MUL_N_THRESHOLD 5240 +#define SQRLO_BASECASE_THRESHOLD 0 /* always */ +#define SQRLO_DC_THRESHOLD 65 +#define SQRLO_SQR_THRESHOLD 3470 + +#define DC_DIV_QR_THRESHOLD 32 +#define DC_DIVAPPR_Q_THRESHOLD 135 +#define DC_BDIV_QR_THRESHOLD 32 +#define DC_BDIV_Q_THRESHOLD 80 + +#define INV_MULMOD_BNM1_THRESHOLD 42 +#define INV_NEWTON_THRESHOLD 177 +#define INV_APPR_THRESHOLD 139 + +#define BINV_NEWTON_THRESHOLD 179 +#define REDC_1_TO_REDC_N_THRESHOLD 39 + +#define MU_DIV_QR_THRESHOLD 872 +#define MU_DIVAPPR_Q_THRESHOLD 998 +#define MUPI_DIV_QR_THRESHOLD 66 +#define MU_BDIV_QR_THRESHOLD 748 +#define MU_BDIV_Q_THRESHOLD 906 + +#define POWM_SEC_TABLE 9,34,257,946,2913 + +#define GET_STR_DC_THRESHOLD 10 +#define GET_STR_PRECOMPUTE_THRESHOLD 16 +#define SET_STR_DC_THRESHOLD 1045 +#define SET_STR_PRECOMPUTE_THRESHOLD 1800 + +#define FAC_DSC_THRESHOLD 77 +#define FAC_ODD_THRESHOLD 24 + +#define MATRIX22_STRASSEN_THRESHOLD 15 +#define HGCD_THRESHOLD 121 +#define HGCD_APPR_THRESHOLD 142 +#define HGCD_REDUCE_THRESHOLD 1679 +#define GCD_DC_THRESHOLD 389 +#define GCDEXT_DC_THRESHOLD 285 +#define JACOBI_BASE_METHOD 4 diff --git a/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm b/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm new file mode 100644 index 0000000..04be963 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/esame/mul_1.asm @@ -0,0 +1,66 @@ +dnl S/390-32 mpn_mul_1 for systems with MLR instruction + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 14 +C z990 9 +C z9 ? +C z10 ? +C z196 ? + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`v0', `%r5') + +ASM_START() +PROLOGUE(mpn_mul_1) + stm %r11, %r12, 44(%r15) + lhi %r12, 0 C zero index register + ahi %r12, 0 C clear carry flag + lhi %r11, 0 C clear carry limb + +L(top): l %r1, 0(%r12,up) + mlr %r0, v0 + alcr %r1, %r11 + lr %r11, %r0 C copy high part to carry limb + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct n, L(top) + + lhi %r2, 0 + alcr %r2, %r11 + + lm %r11, %r12, 44(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm b/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm new file mode 100644 index 0000000..2c8138d --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/esame/mul_basecase.asm @@ -0,0 +1,130 @@ +dnl S/390-32/esame mpn_mul_basecase. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 ? +C z990 ? +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * Perhaps add special case for un <= 2. +C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped +C up by about 10%. + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`un', `%r4') +define(`vp', `%r5') +define(`vn', `%r6') + +define(`zero', `%r8') + +ASM_START() +PROLOGUE(mpn_mul_basecase) + chi un, 2 + jhe L(ge2) + +C un = vn = 1 + l %r1, 0(vp) + ml %r0, 0(up) + st %r1, 0(rp) + st %r0, 4(rp) + br %r14 + +L(ge2): C jne L(gen) + + +L(gen): +C mul_1 ======================================================================= + + stm %r6, %r12, 24(%r15) + lhi zero, 0 + ahi un, -1 + + l %r7, 0(vp) + l %r11, 0(up) + lhi %r12, 4 C init index register + mlr %r10, %r7 + lr %r9, un + st %r11, 0(rp) + cr %r15, %r15 C clear carry flag + +L(tm): l %r1, 0(%r12,up) + mlr %r0, %r7 + alcr %r1, %r10 + lr %r10, %r0 C copy high part to carry limb + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct %r9, L(tm) + + alcr %r0, zero + st %r0, 0(%r12,rp) + +C addmul_1 loop =============================================================== + + ahi vn, -1 + je L(outer_end) +L(outer_loop): + + la rp, 4(rp) C rp += 1 + la vp, 4(vp) C up += 1 + l %r7, 0(vp) + l %r11, 0(up) + lhi %r12, 4 C init index register + mlr %r10, %r7 + lr %r9, un + al %r11, 0(rp) + st %r11, 0(rp) + +L(tam): l %r1, 0(%r12,up) + l %r11, 0(%r12,rp) + mlr %r0, %r7 + alcr %r1, %r11 + alcr %r0, zero + alr %r1, %r10 + lr %r10, %r0 + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct %r9, L(tam) + + alcr %r0, zero + st %r0, 0(%r12,rp) + + brct vn, L(outer_loop) +L(outer_end): + + lm %r6, %r12, 24(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm b/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm new file mode 100644 index 0000000..f45f87a --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/esame/sqr_basecase.asm @@ -0,0 +1,203 @@ +dnl S/390-32 mpn_sqr_basecase. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 ? +C z990 23 +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * Clean up. +C * Stop iterating addmul_1 loop at latest for n = 2, implement longer tail. +C This will ask for basecase handling of n = 3. +C * Update counters and pointers more straightforwardly, possibly lowering +C register usage. +C * Should we use this allocation-free style for more sqr_basecase asm +C implementations? The only disadvantage is that it requires R != U. +C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped +C up by about 10%. The sqr_diag_addlsh1 loop could probably be sped up even +C more. + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') + +define(`zero', `%r8') +define(`rp_saved', `%r9') +define(`up_saved', `%r13') +define(`n_saved', `%r14') + +ASM_START() +PROLOGUE(mpn_sqr_basecase) + ahi n, -2 + jhe L(ge2) + +C n = 1 + l %r5, 0(up) + mlr %r4, %r5 + st %r5, 0(rp) + st %r4, 4(rp) + br %r14 + +L(ge2): jne L(gen) + +C n = 2 + stm %r6, %r8, 24(%r15) + lhi zero, 0 + + l %r5, 0(up) + mlr %r4, %r5 C u0 * u0 + l %r1, 4(up) + mlr %r0, %r1 C u1 * u1 + st %r5, 0(rp) + + l %r7, 0(up) + ml %r6, 4(up) C u0 * u1 + alr %r7, %r7 + alcr %r6, %r6 + alcr %r0, zero + + alr %r4, %r7 + alcr %r1, %r6 + alcr %r0, zero + st %r4, 4(rp) + st %r1, 8(rp) + st %r0, 12(rp) + + lm %r6, %r8, 24(%r15) + br %r14 + +L(gen): +C mul_1 ======================================================================= + + stm %r6, %r14, 24(%r15) + lhi zero, 0 + lr up_saved, up + lr rp_saved, rp + lr n_saved, n + + l %r6, 0(up) + l %r11, 4(up) + lhi %r12, 8 C init index register + mlr %r10, %r6 + lr %r5, n + st %r11, 4(rp) + cr %r15, %r15 C clear carry flag + +L(tm): l %r1, 0(%r12,up) + mlr %r0, %r6 + alcr %r1, %r10 + lr %r10, %r0 C copy high part to carry limb + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct %r5, L(tm) + + alcr %r0, zero + st %r0, 0(%r12,rp) + +C addmul_1 loop =============================================================== + + ahi n, -1 + je L(outer_end) +L(outer_loop): + + la rp, 8(rp) C rp += 2 + la up, 4(up) C up += 1 + l %r6, 0(up) + l %r11, 4(up) + lhi %r12, 8 C init index register + mlr %r10, %r6 + lr %r5, n + al %r11, 4(rp) + st %r11, 4(rp) + +L(tam): l %r1, 0(%r12,up) + l %r7, 0(%r12,rp) + mlr %r0, %r6 + alcr %r1, %r7 + alcr %r0, zero + alr %r1, %r10 + lr %r10, %r0 + st %r1, 0(%r12,rp) + la %r12, 4(%r12) + brct %r5, L(tam) + + alcr %r0, zero + st %r0, 0(%r12,rp) + + brct n, L(outer_loop) +L(outer_end): + + l %r6, 4(up) + l %r1, 8(up) + lr %r7, %r0 C Same as: l %r7, 12(,rp) + mlr %r0, %r6 + alr %r1, %r7 + alcr %r0, zero + st %r1, 12(rp) + st %r0, 16(rp) + +C sqr_diag_addlsh1 ============================================================ + +define(`up', `up_saved') +define(`rp', `rp_saved') + la n, 1(n_saved) + + l %r1, 0(up) + mlr %r0, %r1 + st %r1, 0(rp) +C clr %r15, %r15 C clear carry (already clear per above) + +L(top): l %r11, 4(up) + la up, 4(up) + l %r6, 4(rp) + l %r7, 8(rp) + mlr %r10, %r11 + alcr %r6, %r6 + alcr %r7, %r7 + alcr %r10, zero C propagate carry to high product limb + alr %r6, %r0 + alcr %r7, %r11 + stm %r6, %r7, 4(rp) + la rp, 8(rp) + lr %r0, %r10 C copy carry limb + brct n, L(top) + + alcr %r0, zero + st %r0, 4(rp) + + lm %r6, %r14, 24(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm b/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm new file mode 100644 index 0000000..a71e57e --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/esame/submul_1.asm @@ -0,0 +1,70 @@ +dnl S/390-32 mpn_submul_1 for systems with MLR instruction. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 20 +C z990 11 +C z9 ? +C z10 ? +C z196 ? + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`v0', `%r5') + +ASM_START() +PROLOGUE(mpn_submul_1) + stm %r9, %r12, 36(%r15) + lhi %r12, 0 + slr %r11, %r11 + +L(top): l %r1, 0(%r12, up) + l %r10, 0(%r12, rp) + mlr %r0, v0 + slbr %r10, %r1 + slbr %r9, %r9 + slr %r0, %r9 C conditional incr + slr %r10, %r11 + lr %r11, %r0 + st %r10, 0(%r12, rp) + la %r12, 4(%r12) + brct %r4, L(top) + + lr %r2, %r11 + slbr %r9, %r9 + slr %r2, %r9 + + lm %r9, %r12, 36(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/gmp-mparam.h b/gmp-6.3.0/mpn/s390_32/gmp-mparam.h new file mode 100644 index 0000000..1aca74a --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/gmp-mparam.h @@ -0,0 +1,138 @@ +/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +#define GMP_LIMB_BITS 32 +#define GMP_LIMB_BYTES 4 + +/* 770 MHz IBM z900 running in 32-bit mode, using just traditional insns */ + +#define DIVREM_1_NORM_THRESHOLD 0 /* always */ +#define DIVREM_1_UNNORM_THRESHOLD 5 +#define MOD_1_1P_METHOD 2 +#define MOD_1_NORM_THRESHOLD 0 /* always */ +#define MOD_1_UNNORM_THRESHOLD 5 +#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1U_TO_MOD_1_1_THRESHOLD 15 +#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */ +#define MOD_1_2_TO_MOD_1_4_THRESHOLD 30 +#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */ +#define USE_PREINV_DIVREM_1 1 +#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVEXACT_1_THRESHOLD 0 /* always */ +#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */ + +#define MUL_TOOM22_THRESHOLD 19 +#define MUL_TOOM33_THRESHOLD 114 +#define MUL_TOOM44_THRESHOLD 166 +#define MUL_TOOM6H_THRESHOLD 226 +#define MUL_TOOM8H_THRESHOLD 333 + +#define MUL_TOOM32_TO_TOOM43_THRESHOLD 106 +#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122 +#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105 +#define MUL_TOOM42_TO_TOOM63_THRESHOLD 113 + +#define SQR_BASECASE_THRESHOLD 7 +#define SQR_TOOM2_THRESHOLD 40 +#define SQR_TOOM3_THRESHOLD 126 +#define SQR_TOOM4_THRESHOLD 192 +#define SQR_TOOM6_THRESHOLD 246 +#define SQR_TOOM8_THRESHOLD 357 + +#define MULMID_TOOM42_THRESHOLD 28 + +#define MULMOD_BNM1_THRESHOLD 12 +#define SQRMOD_BNM1_THRESHOLD 18 + +#define MUL_FFT_MODF_THRESHOLD 244 /* k = 5 */ +#define MUL_FFT_TABLE3 \ + { { 244, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \ + { 8, 5}, { 17, 6}, { 13, 7}, { 7, 6}, \ + { 16, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \ + { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \ + { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \ + { 33, 8}, { 19, 7}, { 39, 8}, { 23, 7}, \ + { 47, 8}, { 27, 9}, { 15, 8}, { 39, 9}, \ + { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \ + { 63, 9}, { 39, 8}, { 79, 9}, { 47,10}, \ + { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \ + { 143, 9}, { 79,10}, { 47,11}, { 2048,12}, \ + { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } +#define MUL_FFT_TABLE3_SIZE 48 +#define MUL_FFT_THRESHOLD 2688 + +#define SQR_FFT_MODF_THRESHOLD 216 /* k = 5 */ +#define SQR_FFT_TABLE3 \ + { { 216, 5}, { 7, 4}, { 15, 5}, { 17, 6}, \ + { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \ + { 20, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \ + { 7, 7}, { 19, 8}, { 11, 7}, { 25, 9}, \ + { 7, 8}, { 15, 7}, { 33, 8}, { 19, 7}, \ + { 39, 8}, { 23, 9}, { 15, 8}, { 39, 9}, \ + { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \ + { 63, 9}, { 39, 8}, { 79, 9}, { 47,10}, \ + { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \ + { 143, 9}, { 79,10}, { 47,11}, { 2048,12}, \ + { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} } +#define SQR_FFT_TABLE3_SIZE 44 +#define SQR_FFT_THRESHOLD 1856 + +#define MULLO_BASECASE_THRESHOLD 0 /* always */ +#define MULLO_DC_THRESHOLD 61 +#define MULLO_MUL_N_THRESHOLD 5240 + +#define DC_DIV_QR_THRESHOLD 70 +#define DC_DIVAPPR_Q_THRESHOLD 234 +#define DC_BDIV_QR_THRESHOLD 59 +#define DC_BDIV_Q_THRESHOLD 137 + +#define INV_MULMOD_BNM1_THRESHOLD 36 +#define INV_NEWTON_THRESHOLD 327 +#define INV_APPR_THRESHOLD 268 + +#define BINV_NEWTON_THRESHOLD 324 +#define REDC_1_TO_REDC_N_THRESHOLD 63 + +#define MU_DIV_QR_THRESHOLD 1099 +#define MU_DIVAPPR_Q_THRESHOLD 1360 +#define MUPI_DIV_QR_THRESHOLD 138 +#define MU_BDIV_QR_THRESHOLD 889 +#define MU_BDIV_Q_THRESHOLD 1234 + +#define MATRIX22_STRASSEN_THRESHOLD 18 +#define HGCD_THRESHOLD 167 +#define GCD_DC_THRESHOLD 518 +#define GCDEXT_DC_THRESHOLD 378 +#define JACOBI_BASE_METHOD 2 + +#define GET_STR_DC_THRESHOLD 14 +#define GET_STR_PRECOMPUTE_THRESHOLD 25 +#define SET_STR_DC_THRESHOLD 577 +#define SET_STR_PRECOMPUTE_THRESHOLD 1217 diff --git a/gmp-6.3.0/mpn/s390_32/logops_n.asm b/gmp-6.3.0/mpn/s390_32/logops_n.asm new file mode 100644 index 0000000..1f2cd2a --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/logops_n.asm @@ -0,0 +1,295 @@ +dnl S/390-32 logops. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb variant 1 variant 2 variant 3 +C rp!=up rp=up +C z900 ? ? ? ? +C z990 2.5 1 2.75 2.75 +C z9 ? ? ? +C z10 ? ? ? +C z196 ? ? ? + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`vp', `%r4') +define(`nn', `%r5') + +ifdef(`OPERATION_and_n',` + define(`func',`mpn_and_n') + define(`VARIANT_1') + define(`LOGOPC',`nc') + define(`LOGOP',`n')') +ifdef(`OPERATION_andn_n',` + define(`func',`mpn_andn_n') + define(`VARIANT_2') + define(`LOGOP',`n')') +ifdef(`OPERATION_nand_n',` + define(`func',`mpn_nand_n') + define(`VARIANT_3') + define(`LOGOP',`n')') +ifdef(`OPERATION_ior_n',` + define(`func',`mpn_ior_n') + define(`VARIANT_1') + define(`LOGOPC',`oc') + define(`LOGOP',`o')') +ifdef(`OPERATION_iorn_n',` + define(`func',`mpn_iorn_n') + define(`VARIANT_2') + define(`LOGOP',`o')') +ifdef(`OPERATION_nior_n',` + define(`func',`mpn_nior_n') + define(`VARIANT_3') + define(`LOGOP',`o')') +ifdef(`OPERATION_xor_n',` + define(`func',`mpn_xor_n') + define(`VARIANT_1') + define(`LOGOPC',`xc') + define(`LOGOP',`x')') +ifdef(`OPERATION_xnor_n',` + define(`func',`mpn_xnor_n') + define(`VARIANT_2') + define(`LOGOP',`x')') + +MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) + +ASM_START() +PROLOGUE(func) +ifdef(`VARIANT_1',` + cr rp, up + jne L(normal) + + sll nn, 2 + ahi nn, -1 + lr %r1, nn + srl %r1, 8 + ltr %r1, %r1 C < 256 bytes to copy? + je L(1) + +L(tp): LOGOPC 0(256, rp), 0(vp) + la rp, 256(rp) + la vp, 256(vp) + brct %r1, L(tp) + +L(1): bras %r1, L(2) C make r1 point to mvc insn + LOGOPC 0(1, rp), 0(vp) +L(2): ex nn, 0(%r1) C execute mvc with length ((nn-1) mod 256)+1 +L(rtn): br %r14 + + +L(normal): + stm %r6, %r8, 12(%r15) + ahi nn, 3 + lhi %r7, 3 + lr %r0, nn + srl %r0, 2 + nr %r7, nn C nn mod 4 + je L(b1) + chi %r7, 2 + jl L(b2) + jne L(top) + +L(b3): lm %r5, %r7, 0(up) + la up, 12(up) + LOGOP %r5, 0(vp) + LOGOP %r6, 4(vp) + LOGOP %r7, 8(vp) + stm %r5, %r7, 0(rp) + la rp, 12(rp) + la vp, 12(vp) + j L(mid) + +L(b1): l %r5, 0(up) + la up, 4(up) + LOGOP %r5, 0(vp) + st %r5, 0(rp) + la rp, 4(rp) + la vp, 4(vp) + j L(mid) + +L(b2): lm %r5, %r6, 0(up) + la up, 8(up) + LOGOP %r5, 0(vp) + LOGOP %r6, 4(vp) + stm %r5, %r6, 0(rp) + la rp, 8(rp) + la vp, 8(vp) + j L(mid) + +L(top): lm %r5, %r8, 0(up) + la up, 16(up) + LOGOP %r5, 0(vp) + LOGOP %r6, 4(vp) + LOGOP %r7, 8(vp) + LOGOP %r8, 12(vp) + stm %r5, %r8, 0(rp) + la rp, 16(rp) + la vp, 16(vp) +L(mid): brct %r0, L(top) + + lm %r6, %r8, 12(%r15) + br %r14 +') + +ifdef(`VARIANT_2',` + stm %r6, %r8, 12(%r15) + lhi %r1, -1 + + ahi nn, 3 + lhi %r7, 3 + lr %r0, nn + srl %r0, 2 + nr %r7, nn C nn mod 4 + je L(b1) + chi %r7, 2 + jl L(b2) + jne L(top) + +L(b3): lm %r5, %r7, 0(vp) + la vp, 12(vp) + xr %r5, %r1 + xr %r6, %r1 + xr %r7, %r1 + LOGOP %r5, 0(up) + LOGOP %r6, 4(up) + LOGOP %r7, 8(up) + stm %r5, %r7, 0(rp) + la rp, 12(rp) + la up, 12(up) + j L(mid) + +L(b1): l %r5, 0(vp) + la vp, 4(vp) + xr %r5, %r1 + LOGOP %r5, 0(up) + st %r5, 0(rp) + la rp, 4(rp) + la up, 4(up) + j L(mid) + +L(b2): lm %r5, %r6, 0(vp) + la vp, 8(vp) + xr %r5, %r1 + xr %r6, %r1 + LOGOP %r5, 0(up) + LOGOP %r6, 4(up) + stm %r5, %r6, 0(rp) + la rp, 8(rp) + la up, 8(up) + j L(mid) + +L(top): lm %r5, %r8, 0(vp) + la vp, 16(vp) + xr %r5, %r1 + xr %r6, %r1 + xr %r7, %r1 + xr %r8, %r1 + LOGOP %r5, 0(up) + LOGOP %r6, 4(up) + LOGOP %r7, 8(up) + LOGOP %r8, 12(up) + la up, 16(up) + stm %r5, %r8, 0(rp) + la rp, 16(rp) +L(mid): brct %r0, L(top) + + lm %r6, %r8, 12(%r15) + br %r14 +') + +ifdef(`VARIANT_3',` + stm %r6, %r8, 12(%r15) + lhi %r1, -1 + + ahi nn, 3 + lhi %r7, 3 + lr %r0, nn + srl %r0, 2 + nr %r7, nn C nn mod 4 + je L(b1) + chi %r7, 2 + jl L(b2) + jne L(top) + +L(b3): lm %r5, %r7, 0(vp) + la vp, 12(vp) + LOGOP %r5, 0(up) + LOGOP %r6, 4(up) + xr %r5, %r1 + xr %r6, %r1 + LOGOP %r7, 8(up) + xr %r7, %r1 + stm %r5, %r7, 0(rp) + la rp, 12(rp) + la up, 12(up) + j L(mid) + +L(b1): l %r5, 0(vp) + la vp, 4(vp) + LOGOP %r5, 0(up) + xr %r5, %r1 + st %r5, 0(rp) + la rp, 4(rp) + la up, 4(up) + j L(mid) + +L(b2): lm %r5, %r6, 0(vp) + la vp, 8(vp) + LOGOP %r5, 0(up) + LOGOP %r6, 4(up) + xr %r5, %r1 + xr %r6, %r1 + stm %r5, %r6, 0(rp) + la rp, 8(rp) + la up, 8(up) + j L(mid) + +L(top): lm %r5, %r8, 0(vp) + la vp, 16(vp) + LOGOP %r5, 0(up) + LOGOP %r6, 4(up) + xr %r5, %r1 + xr %r6, %r1 + LOGOP %r7, 8(up) + LOGOP %r8, 12(up) + xr %r7, %r1 + xr %r8, %r1 + stm %r5, %r8, 0(rp) + la up, 16(up) + la rp, 16(rp) +L(mid): brct %r0, L(top) + + lm %r6, %r8, 12(%r15) + br %r14 +') + +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/lshift.asm b/gmp-6.3.0/mpn/s390_32/lshift.asm new file mode 100644 index 0000000..da7d76e --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/lshift.asm @@ -0,0 +1,144 @@ +dnl S/390-32 mpn_lshift. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 6 +C z990 3 +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`cnt', `%r5') + +ASM_START() +PROLOGUE(mpn_lshift) + lr %r1, n + sll %r1, 2 + stm %r6, %r12, 24(%r15) + la up, 0(%r1,up) C put up near end of U + la rp, 0(%r1,rp) C put rp near end of R + ahi up, -20 + ahi rp, -16 + lhi %r8, 32 + sr %r8, cnt + l %r12, 16(up) + srl %r12, 0(%r8) C return value + lhi %r7, 3 + nr %r7, n + srl n, 2 + je L(b0) + chi %r7, 2 + jl L(b1) + je L(b2) + +L(b3): l %r10, 16(up) + l %r11, 12(up) + l %r9, 8(up) + ahi up, -8 + lr %r8, %r11 + sldl %r10, 0(cnt) + sldl %r8, 0(cnt) + st %r10, 12(rp) + st %r8, 8(rp) + ahi rp, -8 + ltr n, n + je L(end) + j L(top) + +L(b2): l %r10, 16(up) + l %r11, 12(up) + ahi up, -4 + sldl %r10, 0(cnt) + st %r10, 12(rp) + ahi rp, -4 + ltr n, n + je L(end) + j L(top) + +L(b1): ltr n, n + je L(end) + j L(top) + +L(b0): l %r10,16(up) + l %r8, 12(up) + l %r6, 8(up) + l %r0, 4(up) + ahi up, -12 + lr %r11, %r8 + lr %r9, %r6 + lr %r7, %r0 + sldl %r10,0(cnt) + sldl %r8, 0(cnt) + sldl %r6, 0(cnt) + st %r10, 12(rp) + st %r8, 8(rp) + st %r6, 4(rp) + ahi rp, -12 + ahi n, -1 + je L(end) + + ALIGN(8) +L(top): l %r10, 16(up) + l %r8, 12(up) + l %r6, 8(up) + l %r0, 4(up) + l %r1, 0(up) + lr %r11, %r8 + lr %r9, %r6 + lr %r7, %r0 + ahi up, -16 + sldl %r10, 0(cnt) + sldl %r8, 0(cnt) + sldl %r6, 0(cnt) + sldl %r0, 0(cnt) + st %r10, 12(rp) + st %r8, 8(rp) + st %r6, 4(rp) + st %r0, 0(rp) + ahi rp, -16 + brct n, L(top) + +L(end): l %r10, 16(up) + sll %r10, 0(cnt) + st %r10, 12(rp) + + lr %r2, %r12 + lm %r6, %r12, 24(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/lshiftc.asm b/gmp-6.3.0/mpn/s390_32/lshiftc.asm new file mode 100644 index 0000000..f601673 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/lshiftc.asm @@ -0,0 +1,156 @@ +dnl S/390-32 mpn_lshiftc. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 7 +C z990 3.375 +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`cnt', `%r5') + +ASM_START() +PROLOGUE(mpn_lshiftc) + lr %r1, n + sll %r1, 2 + stm %r6, %r13, 24(%r15) + la up, 0(%r1,up) C put up near end of U + la rp, 0(%r1,rp) C put rp near end of R + ahi up, -20 + ahi rp, -16 + lhi %r8, 32 + sr %r8, cnt + l %r12, 16(up) + srl %r12, 0(%r8) C return value + lhi %r13, -1 + lhi %r7, 3 + nr %r7, n + srl n, 2 + je L(b0) + chi %r7, 2 + jl L(b1) + je L(b2) + +L(b3): l %r10, 16(up) + l %r11, 12(up) + l %r9, 8(up) + ahi up, -8 + lr %r8, %r11 + sldl %r10, 0(cnt) + sldl %r8, 0(cnt) + xr %r10, %r13 + xr %r8, %r13 + st %r10, 12(rp) + st %r8, 8(rp) + ahi rp, -8 + ltr n, n + je L(end) + j L(top) + +L(b2): l %r10, 16(up) + l %r11, 12(up) + ahi up, -4 + sldl %r10, 0(cnt) + xr %r10, %r13 + st %r10, 12(rp) + ahi rp, -4 + ltr n, n + je L(end) + j L(top) + +L(b1): ltr n, n + je L(end) + j L(top) + +L(b0): l %r10,16(up) + l %r8, 12(up) + l %r6, 8(up) + l %r0, 4(up) + ahi up, -12 + lr %r11, %r8 + lr %r9, %r6 + lr %r7, %r0 + sldl %r10,0(cnt) + sldl %r8, 0(cnt) + sldl %r6, 0(cnt) + xr %r10, %r13 + xr %r8, %r13 + xr %r6, %r13 + st %r10, 12(rp) + st %r8, 8(rp) + st %r6, 4(rp) + ahi rp, -12 + ahi n, -1 + je L(end) + + ALIGN(8) +L(top): l %r10, 16(up) + l %r8, 12(up) + l %r6, 8(up) + l %r0, 4(up) + l %r1, 0(up) + lr %r11, %r8 + lr %r9, %r6 + lr %r7, %r0 + ahi up, -16 + sldl %r10, 0(cnt) + sldl %r8, 0(cnt) + sldl %r6, 0(cnt) + sldl %r0, 0(cnt) + xr %r10, %r13 + xr %r8, %r13 + xr %r6, %r13 + xr %r0, %r13 + st %r10, 12(rp) + st %r8, 8(rp) + st %r6, 4(rp) + st %r0, 0(rp) + ahi rp, -16 + brct n, L(top) + +L(end): l %r10, 16(up) + sll %r10, 0(cnt) + xr %r10, %r13 + st %r10, 12(rp) + + lr %r2, %r12 + lm %r6, %r13, 24(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/mul_1.asm b/gmp-6.3.0/mpn/s390_32/mul_1.asm new file mode 100644 index 0000000..e3ad0c5 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/mul_1.asm @@ -0,0 +1,85 @@ +dnl S/390 mpn_mul_1 -- Multiply a limb vector with a limb and store the +dnl result in a second limb vector. + +dnl Copyright 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +define(`rp',2) +define(`up',3) +define(`n',4) +define(`vlimb',5) +define(`cylimb',7) + +ASM_START() +PROLOGUE(mpn_mul_1) + stm 6,7,24(15) + slr cylimb,cylimb # clear cylimb + ltr vlimb,vlimb + jnl .Loopp + +.Loopn: l 1,0(up) # load from u + lr 6,1 # + mr 0,vlimb # multiply signed + alr 0,6 # add vlimb to phi + sra 6,31 # make mask + nr 6,vlimb # 0 or vlimb + alr 0,6 # conditionally add vlimb to phi + alr 1,cylimb # add carry limb to plo + brc 8+4,+8 # branch if not carry + ahi 0,1 # increment phi + lr cylimb,0 # new cylimb + st 1,0(rp) # store + la up,4(,up) + la rp,4(,rp) + brct n,.Loopn + + lr 2,cylimb + lm 6,7,24(15) + br 14 + +.Loopp: l 1,0(up) # load from u + lr 6,1 # + mr 0,vlimb # multiply signed + sra 6,31 # make mask + nr 6,vlimb # 0 or vlimb + alr 0,6 # conditionally add vlimb to phi + alr 1,cylimb # add carry limb to plo + brc 8+4,+8 # branch if not carry + ahi 0,1 # increment phi + lr cylimb,0 # new cylimb + st 1,0(rp) # store + la up,4(,up) + la rp,4(,rp) + brct n,.Loopp + + lr 2,cylimb + lm 6,7,24(15) + br 14 +EPILOGUE(mpn_mul_1) diff --git a/gmp-6.3.0/mpn/s390_32/rshift.asm b/gmp-6.3.0/mpn/s390_32/rshift.asm new file mode 100644 index 0000000..5f2cf37 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/rshift.asm @@ -0,0 +1,138 @@ +dnl S/390-32 mpn_rshift. + +dnl Copyright 2011 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 6 +C z990 3 +C z9 ? +C z10 ? +C z196 ? + +C TODO +C * + +C INPUT PARAMETERS +define(`rp', `%r2') +define(`up', `%r3') +define(`n', `%r4') +define(`cnt', `%r5') + +ASM_START() +PROLOGUE(mpn_rshift) + stm %r6, %r12, 24(%r15) + lhi %r8, 32 + sr %r8, cnt + l %r12, 0(up) + sll %r12, 0(%r8) C return value + lhi %r7, 3 + nr %r7, n + srl n, 2 + je L(b0) + chi %r7, 2 + jl L(b1) + je L(b2) + +L(b3): l %r11, 0(up) + l %r10, 4(up) + l %r8, 8(up) + ahi up, 8 + lr %r9, %r10 + srdl %r10, 0(cnt) + srdl %r8, 0(cnt) + st %r11, 0(rp) + st %r9, 4(rp) + ahi rp, 8 + ltr n, n + je L(end) + j L(top) + +L(b2): l %r11, 0(up) + l %r10, 4(up) + ahi up, 4 + srdl %r10, 0(cnt) + st %r11, 0(rp) + ahi rp, 4 + ltr n, n + je L(end) + j L(top) + +L(b1): ltr n, n + je L(end) + j L(top) + +L(b0): l %r11, 0(up) + l %r9, 4(up) + l %r7, 8(up) + l %r1, 12(up) + ahi up, 12 + lr %r10, %r9 + lr %r8, %r7 + lr %r6, %r1 + srdl %r10, 0(cnt) + srdl %r8, 0(cnt) + srdl %r6, 0(cnt) + st %r11, 0(rp) + st %r9, 4(rp) + st %r7, 8(rp) + ahi rp, 12 + ahi n, -1 + je L(end) + + ALIGN(8) +L(top): l %r11, 0(up) + l %r9, 4(up) + l %r7, 8(up) + l %r1, 12(up) + l %r0, 16(up) + lr %r10, %r9 + lr %r8, %r7 + lr %r6, %r1 + ahi up, 16 + srdl %r10, 0(cnt) + srdl %r8, 0(cnt) + srdl %r6, 0(cnt) + srdl %r0, 0(cnt) + st %r11, 0(rp) + st %r9, 4(rp) + st %r7, 8(rp) + st %r1, 12(rp) + ahi rp, 16 + brct n, L(top) + +L(end): l %r11, 0(up) + srl %r11, 0(cnt) + st %r11, 0(rp) + + lr %r2, %r12 + lm %r6, %r12, 24(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/sec_tabselect.asm b/gmp-6.3.0/mpn/s390_32/sec_tabselect.asm new file mode 100644 index 0000000..c8aa25e --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/sec_tabselect.asm @@ -0,0 +1,140 @@ +dnl S/390-64 mpn_sec_tabselect + +dnl Copyright 2021 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C z900 ? +C z990 ? +C z9 ? +C z10 ? +C z196 ? +C z13 ? +C z14 ? +C z15 ? + +dnl void +dnl mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab, +dnl mp_size_t n, mp_size_t nents, mp_size_t which) + +define(`rp', `%r2') +define(`tp', `%r3') +define(`n', `%r4') +define(`nents', `%r5') +define(`which_arg',`%r6') C magicked to stack + +dnl r0 r1 r2 r3 r4 r5 r6 r7 +dnl r8 r9 r10 r11 r12 r13 r14 r15 + +define(`mask', `%r14') +define(`k', `%r1') +define(`which', `%r0') + +define(`FRAME', 32) + +ASM_START() +PROLOGUE(mpn_sec_tabselect) + stm %r5, %r15, 20(%r15) + ahi %r15, -FRAME + + sll n, 2 + msr %r5, n + st %r5, 16(%r15) C nents * n * LIMB_BYTES + + lr %r5, n + srl %r5, 2+2 + nr %r5, %r5 + je L(end4) +L(outer): + l which, eval(24+FRAME)(%r15) + l k, eval(20+FRAME)(%r15) C nents + lhi %r6, 0 + lhi %r7, 0 + lhi %r8, 0 + lhi %r9, 0 +L(tp4): lhi mask, 1 + slr which, mask + slbr mask, mask + lm %r10, %r13, 0(tp) + nr %r10, mask + nr %r11, mask + nr %r12, mask + nr %r13, mask + ar %r6, %r10 + ar %r7, %r11 + ar %r8, %r12 + ar %r9, %r13 + ar tp, n + brct k, L(tp4) + stm %r6, %r9, 0(rp) + ahi rp, 16 + sl tp, 16(%r15) + ahi tp, eval(4*4) + brct %r5, L(outer) +L(end4): + tmll n, 8 + je L(end2) + l which, eval(24+FRAME)(%r15) + l k, eval(20+FRAME)(%r15) C nents + lhi %r6, 0 + lhi %r7, 0 +L(tp2): lhi mask, 1 + slr which, mask + slbr mask, mask + lm %r10, %r11, 0(tp) + nr %r10, mask + nr %r11, mask + ar %r6, %r10 + ar %r7, %r11 + ar tp, n + brct k, L(tp2) + stm %r6, %r7, 0(rp) + ahi rp, 8 + sl tp, 16(%r15) + ahi tp, eval(2*4) +L(end2): + tmll n, 4 + je L(end1) + l which, eval(24+FRAME)(%r15) + l k, eval(20+FRAME)(%r15) C nents + lhi %r6, 0 +L(tp1): lhi mask, 1 + slr which, mask + slbr mask, mask + l %r10, 0(tp) + nr %r10, mask + ar %r6, %r10 + ar tp, n + brct k, L(tp1) + st %r6, 0(rp) +L(end1): + lm %r5, %r15, eval(20+FRAME)(%r15) + br %r14 +EPILOGUE() diff --git a/gmp-6.3.0/mpn/s390_32/submul_1.asm b/gmp-6.3.0/mpn/s390_32/submul_1.asm new file mode 100644 index 0000000..da7d849 --- /dev/null +++ b/gmp-6.3.0/mpn/s390_32/submul_1.asm @@ -0,0 +1,93 @@ +dnl S/390 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the +dnl result from a second limb vector. + +dnl Copyright 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +define(`rp',2) +define(`up',3) +define(`n',4) +define(`vlimb',5) +define(`cylimb',7) + +ASM_START() +PROLOGUE(mpn_submul_1) + stm 6,7,24(15) + slr cylimb,cylimb # clear cylimb + ltr vlimb,vlimb + jnl .Loopp + +.Loopn: l 1,0(up) # load from u + lr 6,1 # + mr 0,vlimb # multiply signed + alr 0,6 # add vlimb to phi + sra 6,31 # make mask + nr 6,vlimb # 0 or vlimb + alr 0,6 # conditionally add vlimb to phi + alr 1,cylimb # add carry limb to plo + brc 8+4,+8 # branch if not carry + ahi 0,1 # increment phi + l 6,0(rp) # load r limb + slr 6,1 # add u limb to plo + brc 2+1,+8 # branch if not carry + ahi 0,1 # increment phi + lr cylimb,0 # new cylimb + st 6,0(rp) # store + la up,4(,up) + la rp,4(,rp) + brct n,.Loopn + + lr 2,cylimb + lm 6,7,24(15) + br 14 + +.Loopp: l 1,0(up) # load from u + lr 6,1 # + mr 0,vlimb # multiply signed + sra 6,31 # make mask + nr 6,vlimb # 0 or vlimb + alr 0,6 # conditionally add vlimb to phi + alr 1,cylimb # add carry limb to plo + brc 8+4,+8 # branch if not carry + ahi 0,1 # increment phi + l 6,0(rp) # load r limb + slr 6,1 # add u limb to plo + brc 2+1,+8 # branch if not carry + ahi 0,1 # increment phi + lr cylimb,0 # new cylimb + st 6,0(rp) # store + la up,4(,up) + la rp,4(,rp) + brct n,.Loopp + + lr 2,cylimb + lm 6,7,24(15) + br 14 +EPILOGUE(mpn_submul_1) -- cgit v1.2.3