From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/power/add_n.asm | 83 +++++++++++++++++++++++++ gmp-6.3.0/mpn/power/addmul_1.asm | 126 +++++++++++++++++++++++++++++++++++++ gmp-6.3.0/mpn/power/gmp-mparam.h | 69 +++++++++++++++++++++ gmp-6.3.0/mpn/power/lshift.asm | 61 ++++++++++++++++++ gmp-6.3.0/mpn/power/mul_1.asm | 113 +++++++++++++++++++++++++++++++++ gmp-6.3.0/mpn/power/rshift.asm | 59 ++++++++++++++++++ gmp-6.3.0/mpn/power/sdiv.asm | 39 ++++++++++++ gmp-6.3.0/mpn/power/sub_n.asm | 85 +++++++++++++++++++++++++ gmp-6.3.0/mpn/power/submul_1.asm | 131 +++++++++++++++++++++++++++++++++++++++ gmp-6.3.0/mpn/power/umul.asm | 43 +++++++++++++ 10 files changed, 809 insertions(+) create mode 100644 gmp-6.3.0/mpn/power/add_n.asm create mode 100644 gmp-6.3.0/mpn/power/addmul_1.asm create mode 100644 gmp-6.3.0/mpn/power/gmp-mparam.h create mode 100644 gmp-6.3.0/mpn/power/lshift.asm create mode 100644 gmp-6.3.0/mpn/power/mul_1.asm create mode 100644 gmp-6.3.0/mpn/power/rshift.asm create mode 100644 gmp-6.3.0/mpn/power/sdiv.asm create mode 100644 gmp-6.3.0/mpn/power/sub_n.asm create mode 100644 gmp-6.3.0/mpn/power/submul_1.asm create mode 100644 gmp-6.3.0/mpn/power/umul.asm (limited to 'gmp-6.3.0/mpn/power') diff --git a/gmp-6.3.0/mpn/power/add_n.asm b/gmp-6.3.0/mpn/power/add_n.asm new file mode 100644 index 0000000..6d6ca73 --- /dev/null +++ b/gmp-6.3.0/mpn/power/add_n.asm @@ -0,0 +1,83 @@ +dnl IBM POWER mpn_add_n -- Add two limb vectors of equal, non-zero length. + +dnl Copyright 1992, 1994-1996, 1999-2001, 2005 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl s2_ptr r5 +dnl size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_add_n) + andil. 10,6,1 C odd or even number of limbs? + l 8,0(4) C load least significant s1 limb + l 0,0(5) C load least significant s2 limb + cal 3,-4(3) C offset res_ptr, it's updated before it's used + sri 10,6,1 C count for unrolled loop + a 7,0,8 C add least significant limbs, set cy + mtctr 10 C copy count into CTR + beq 0,Leven C branch if even # of limbs (# of limbs >= 2) + +C We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 C is count for unrolled loop zero? + bc 4,6,L1 C bne cr1,L1 (misassembled by gas) + st 7,4(3) + aze 3,10 C use the fact that r10 is zero... + br C return + +C We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) C load s1 limb and update s1_ptr + lu 0,4(5) C load s2 limb and update s2_ptr + stu 7,4(3) + ae 7,0,8 C add limbs, set cy +Leven: lu 9,4(4) C load s1 limb and update s1_ptr + lu 10,4(5) C load s2 limb and update s2_ptr + bdz Lend C If done, skip loop + +Loop: lu 8,4(4) C load s1 limb and update s1_ptr + lu 0,4(5) C load s2 limb and update s2_ptr + ae 11,10,9 C add previous limbs with cy, set cy + stu 7,4(3) C + lu 9,4(4) C load s1 limb and update s1_ptr + lu 10,4(5) C load s2 limb and update s2_ptr + ae 7,0,8 C add previous limbs with cy, set cy + stu 11,4(3) C + bdn Loop C decrement CTR and loop back + +Lend: ae 11,10,9 C add limbs with cy, set cy + st 7,4(3) C + st 11,8(3) C + lil 3,0 C load cy into ... + aze 3,3 C ... return value register + br +EPILOGUE(mpn_add_n) diff --git a/gmp-6.3.0/mpn/power/addmul_1.asm b/gmp-6.3.0/mpn/power/addmul_1.asm new file mode 100644 index 0000000..76d8df3 --- /dev/null +++ b/gmp-6.3.0/mpn/power/addmul_1.asm @@ -0,0 +1,126 @@ +dnl IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the +dnl result to a second limb vector. + +dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl s2_limb r6 + +dnl The POWER architecture has no unsigned 32x32->64 bit multiplication +dnl instruction. To obtain that operation, we have to use the 32x32->64 +dnl signed multiplication instruction, and add the appropriate compensation to +dnl the high limb of the result. We add the multiplicand if the multiplier +dnl has its most significant bit set, and we add the multiplier if the +dnl multiplicand has its most significant bit set. We need to preserve the +dnl carry flag between each iteration, so we have to compute the compensation +dnl carefully (the natural, srai+and doesn't work). Since all POWER can +dnl branch in zero cycles, we use conditional branches for the compensation. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_addmul_1) + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + cax 9,9,7 + l 7,4(3) + a 8,8,7 C add res_limb + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 C low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 C propagate cy to new cy_limb + a 8,8,7 C add res_limb + bge Lp0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + l 7,4(3) + aze 9,9 + a 8,8,7 + bge Lp1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 8,7,9 + l 7,4(3) + ae 10,10,0 C propagate cy to new cy_limb + a 8,8,7 C add res_limb + bge Ln0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 8,7,10 + l 7,4(3) + ae 9,9,0 C propagate cy to new cy_limb + a 8,8,7 C add res_limb + bge Ln1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br +EPILOGUE(mpn_addmul_1) diff --git a/gmp-6.3.0/mpn/power/gmp-mparam.h b/gmp-6.3.0/mpn/power/gmp-mparam.h new file mode 100644 index 0000000..7cb36f9 --- /dev/null +++ b/gmp-6.3.0/mpn/power/gmp-mparam.h @@ -0,0 +1,69 @@ +/* POWER gmp-mparam.h -- Compiler/machine parameter header file. + +Copyright 2002-2004 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + +or + + * the GNU General Public License as published by the Free Software + Foundation; either version 2 of the License, or (at your option) any + later version. + +or both in parallel, as here. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received copies of the GNU General Public License and the +GNU Lesser General Public License along with the GNU MP Library. If not, +see https://www.gnu.org/licenses/. */ + +/* Generated by tuneup.c, 2003-02-10, gcc 3.2, POWER2 66.7MHz */ + +#define MUL_TOOM22_THRESHOLD 12 +#define MUL_TOOM33_THRESHOLD 75 + +#define SQR_BASECASE_THRESHOLD 7 +#define SQR_TOOM2_THRESHOLD 28 +#define SQR_TOOM3_THRESHOLD 86 + +#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIV_DC_THRESHOLD 36 +#define POWM_THRESHOLD 69 + +#define HGCD_THRESHOLD 97 +#define GCD_ACCEL_THRESHOLD 3 +#define GCD_DC_THRESHOLD 590 +#define JACOBI_BASE_METHOD 2 + +#define DIVREM_1_NORM_THRESHOLD 12 +#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1_NORM_THRESHOLD 10 +#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define USE_PREINV_DIVREM_1 0 +#define USE_PREINV_MOD_1 1 +#define DIVREM_2_THRESHOLD 11 +#define DIVEXACT_1_THRESHOLD 0 /* always */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always */ + +#define GET_STR_DC_THRESHOLD 10 +#define GET_STR_PRECOMPUTE_THRESHOLD 20 +#define SET_STR_THRESHOLD 2899 + +#define MUL_FFT_TABLE { 336, 800, 1408, 3584, 10240, 24576, 0 } +#define MUL_FFT_MODF_THRESHOLD 296 +#define MUL_FFT_THRESHOLD 2304 + +#define SQR_FFT_TABLE { 336, 800, 1408, 3584, 10240, 24576, 0 } +#define SQR_FFT_MODF_THRESHOLD 296 +#define SQR_FFT_THRESHOLD 2304 diff --git a/gmp-6.3.0/mpn/power/lshift.asm b/gmp-6.3.0/mpn/power/lshift.asm new file mode 100644 index 0000000..efa2105 --- /dev/null +++ b/gmp-6.3.0/mpn/power/lshift.asm @@ -0,0 +1,61 @@ +dnl IBM POWER mpn_lshift -- Shift a number left. + +dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s_ptr r4 +dnl size r5 +dnl cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_lshift) + sli 0,5,2 + cax 9,3,0 + cax 4,4,0 + sfi 8,6,32 + mtctr 5 C put limb count in CTR loop register + lu 0,-4(4) C read most significant limb + sre 3,0,8 C compute carry out limb, and init MQ register + bdz Lend2 C if just one limb, skip loop + lu 0,-4(4) C read 2:nd most significant limb + sreq 7,0,8 C compute most significant limb of result + bdz Lend C if just two limb, skip loop +Loop: lu 0,-4(4) C load next lower limb + stu 7,-4(9) C store previous result during read latency + sreq 7,0,8 C compute result limb + bdn Loop C loop back until CTR is zero +Lend: stu 7,-4(9) C store 2:nd least significant limb +Lend2: sle 7,0,6 C compute least significant limb + st 7,-4(9) C store it + br +EPILOGUE(mpn_lshift) diff --git a/gmp-6.3.0/mpn/power/mul_1.asm b/gmp-6.3.0/mpn/power/mul_1.asm new file mode 100644 index 0000000..38b7b66 --- /dev/null +++ b/gmp-6.3.0/mpn/power/mul_1.asm @@ -0,0 +1,113 @@ +dnl IBM POWER mpn_mul_1 -- Multiply a limb vector with a limb and store the +dnl result in a second limb vector. + +dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl s2_limb r6 + +dnl The POWER architecture has no unsigned 32x32->64 bit multiplication +dnl instruction. To obtain that operation, we have to use the 32x32->64 +dnl signed multiplication instruction, and add the appropriate compensation to +dnl the high limb of the result. We add the multiplicand if the multiplier +dnl has its most significant bit set, and we add the multiplier if the +dnl multiplicand has its most significant bit set. We need to preserve the +dnl carry flag between each iteration, so we have to compute the compensation +dnl carefully (the natural, srai+and doesn't work). Since all POWER can +dnl branch in zero cycles, we use conditional branches for the compensation. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_mul_1) + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + ai 0,0,0 C reset carry + cax 9,9,7 + blt Lneg +Lpos: bdz Lend +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 + bge Lp0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + bge Lp1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Lp1: bdn Lploop + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + cax 10,10,0 C adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,9 + bge Ln0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + cax 9,9,0 C adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,10 + bge Ln1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br +EPILOGUE(mpn_mul_1) diff --git a/gmp-6.3.0/mpn/power/rshift.asm b/gmp-6.3.0/mpn/power/rshift.asm new file mode 100644 index 0000000..1d1815c --- /dev/null +++ b/gmp-6.3.0/mpn/power/rshift.asm @@ -0,0 +1,59 @@ +dnl IBM POWER mpn_rshift -- Shift a number right. + +dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s_ptr r4 +dnl size r5 +dnl cnt r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_rshift) + sfi 8,6,32 + mtctr 5 C put limb count in CTR loop register + l 0,0(4) C read least significant limb + ai 9,3,-4 C adjust res_ptr since it's offset in the stu:s + sle 3,0,8 C compute carry limb, and init MQ register + bdz Lend2 C if just one limb, skip loop + lu 0,4(4) C read 2:nd least significant limb + sleq 7,0,8 C compute least significant limb of result + bdz Lend C if just two limb, skip loop +Loop: lu 0,4(4) C load next higher limb + stu 7,4(9) C store previous result during read latency + sleq 7,0,8 C compute result limb + bdn Loop C loop back until CTR is zero +Lend: stu 7,4(9) C store 2:nd most significant limb +Lend2: sre 7,0,6 C compute most significant limb + st 7,4(9) C store it + br +EPILOGUE(mpn_rshift) diff --git a/gmp-6.3.0/mpn/power/sdiv.asm b/gmp-6.3.0/mpn/power/sdiv.asm new file mode 100644 index 0000000..4a9ed14 --- /dev/null +++ b/gmp-6.3.0/mpn/power/sdiv.asm @@ -0,0 +1,39 @@ +dnl Copyright 1999, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_sdiv_qrnnd) + mtmq 5 + div 0,4,6 + mfmq 9 + st 9,0(3) + mr 3,0 + br +EPILOGUE(mpn_sdiv_qrnnd) diff --git a/gmp-6.3.0/mpn/power/sub_n.asm b/gmp-6.3.0/mpn/power/sub_n.asm new file mode 100644 index 0000000..390c802 --- /dev/null +++ b/gmp-6.3.0/mpn/power/sub_n.asm @@ -0,0 +1,85 @@ +dnl IBM POWER mpn_sub_n -- Subtract two limb vectors of equal, non-zero +dnl length. + +dnl Copyright 1992, 1994-1996, 1999-2001, 2005 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl s2_ptr r5 +dnl size r6 + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_sub_n) + andil. 10,6,1 C odd or even number of limbs? + l 8,0(4) C load least significant s1 limb + l 0,0(5) C load least significant s2 limb + cal 3,-4(3) C offset res_ptr, it's updated before it's used + sri 10,6,1 C count for unrolled loop + sf 7,0,8 C subtract least significant limbs, set cy + mtctr 10 C copy count into CTR + beq 0,Leven C branch if even # of limbs (# of limbs >= 2) + +C We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 C is count for unrolled loop zero? + bc 4,6,L1 C bne cr1,L1 (misassembled by gas) + st 7,4(3) + sfe 3,0,0 C load !cy into ... + sfi 3,3,0 C ... return value register + br C return + +C We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) C load s1 limb and update s1_ptr + lu 0,4(5) C load s2 limb and update s2_ptr + stu 7,4(3) + sfe 7,0,8 C subtract limbs, set cy +Leven: lu 9,4(4) C load s1 limb and update s1_ptr + lu 10,4(5) C load s2 limb and update s2_ptr + bdz Lend C If done, skip loop + +Loop: lu 8,4(4) C load s1 limb and update s1_ptr + lu 0,4(5) C load s2 limb and update s2_ptr + sfe 11,10,9 C subtract previous limbs with cy, set cy + stu 7,4(3) C + lu 9,4(4) C load s1 limb and update s1_ptr + lu 10,4(5) C load s2 limb and update s2_ptr + sfe 7,0,8 C subtract previous limbs with cy, set cy + stu 11,4(3) C + bdn Loop C decrement CTR and loop back + +Lend: sfe 11,10,9 C subtract limbs with cy, set cy + st 7,4(3) C + st 11,8(3) C + sfe 3,0,0 C load !cy into ... + sfi 3,3,0 C ... return value register + br +EPILOGUE(mpn_sub_n) diff --git a/gmp-6.3.0/mpn/power/submul_1.asm b/gmp-6.3.0/mpn/power/submul_1.asm new file mode 100644 index 0000000..1788e0d --- /dev/null +++ b/gmp-6.3.0/mpn/power/submul_1.asm @@ -0,0 +1,131 @@ +dnl IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract +dnl the result from a second limb vector. + +dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +dnl INPUT PARAMETERS +dnl res_ptr r3 +dnl s1_ptr r4 +dnl size r5 +dnl s2_limb r6 + +dnl The POWER architecture has no unsigned 32x32->64 bit multiplication +dnl instruction. To obtain that operation, we have to use the 32x32->64 +dnl signed multiplication instruction, and add the appropriate compensation to +dnl the high limb of the result. We add the multiplicand if the multiplier +dnl has its most significant bit set, and we add the multiplier if the +dnl multiplicand has its most significant bit set. We need to preserve the +dnl carry flag between each iteration, so we have to compute the compensation +dnl carefully (the natural, srai+and doesn't work). Since all POWER can +dnl branch in zero cycles, we use conditional branches for the compensation. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_submul_1) + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 11 + cax 9,9,7 + l 7,4(3) + sf 8,11,7 C add res_limb + a 11,8,11 C invert cy (r11 is junk) + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 11,0,9 C low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 C propagate cy to new cy_limb + sf 8,11,7 C add res_limb + a 11,8,11 C invert cy (r11 is junk) + bge Lp0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 11,0,10 + l 7,4(3) + aze 9,9 + sf 8,11,7 + a 11,8,11 C invert cy (r11 is junk) + bge Lp1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 11,7,9 + l 7,4(3) + ae 10,10,0 C propagate cy to new cy_limb + sf 8,11,7 C add res_limb + a 11,8,11 C invert cy (r11 is junk) + bge Ln0 + cax 10,10,6 C adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 11,7,10 + l 7,4(3) + ae 9,9,0 C propagate cy to new cy_limb + sf 8,11,7 C add res_limb + a 11,8,11 C invert cy (r11 is junk) + bge Ln1 + cax 9,9,6 C adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br +EPILOGUE(mpn_submul_1) diff --git a/gmp-6.3.0/mpn/power/umul.asm b/gmp-6.3.0/mpn/power/umul.asm new file mode 100644 index 0000000..5a0599e --- /dev/null +++ b/gmp-6.3.0/mpn/power/umul.asm @@ -0,0 +1,43 @@ +dnl Copyright 1999, 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +ASM_START() +PROLOGUE(mpn_umul_ppmm) + mul 9,4,5 + srai 0,4,31 + and 0,0,5 + srai 5,5,31 + and 5,5,4 + cax 0,0,5 + mfmq 11 + st 11,0(3) + cax 3,9,0 + br +EPILOGUE(mpn_umul_ppmm) -- cgit v1.2.3