From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm | 129 ++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm (limited to 'gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm') diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm new file mode 100644 index 0000000..d7dbdf9 --- /dev/null +++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/dive_1.asm @@ -0,0 +1,129 @@ +dnl SPARC T3/T4/T5 mpn_divexact_1. + +dnl Contributed to the GNU project by Torbjörn Granlund. + +dnl Copyright 2013 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C UltraSPARC T3: 31 +C UltraSPARC T4/T5: 20-26 hits 20 early, then sharply drops + +C INPUT PARAMETERS +define(`qp', `%i0') +define(`ap', `%i1') +define(`n', `%i2') +define(`d', `%i3') + +define(`dinv',`%o4') + +ASM_START() + REGISTER(%g2,#scratch) + REGISTER(%g3,#scratch) +PROLOGUE(mpn_divexact_1) + save %sp, -176, %sp + cmp n, 1 + bne,pt %xcc, L(gt1) + ldx [ap], %o5 + udivx %o5, d, %g1 + stx %g1, [qp] + return %i7+8 + nop + +L(gt1): add d, -1, %g1 + andn %g1, d, %g1 + popc %g1, %i4 C i4 = count_trailing_zeros(d) + + srlx d, %i4, d + srlx d, 1, %g1 + and %g1, 127, %g1 + + LEA64(binvert_limb_table, g2, g4) + ldub [%g2+%g1], %g1 + add %g1, %g1, %g2 + mulx %g1, %g1, %g1 + mulx %g1, d, %g1 + sub %g2, %g1, %g2 + add %g2, %g2, %g1 + mulx %g2, %g2, %g2 + mulx %g2, d, %g2 + sub %g1, %g2, %g1 + add %g1, %g1, %o7 + mulx %g1, %g1, %g1 + mulx %g1, d, %g1 + add n, -2, n + brz,pt %i4, L(norm) + sub %o7, %g1, dinv + +L(unnorm): + mov 0, %g4 + sub %g0, %i4, %o2 + srlx %o5, %i4, %o5 +L(top_unnorm): + ldx [ap+8], %g3 + add ap, 8, ap + sllx %g3, %o2, %g5 + or %g5, %o5, %g5 + srlx %g3, %i4, %o5 + subcc %g5, %g4, %g4 + mulx %g4, dinv, %g1 + stx %g1, [qp] + add qp, 8, qp + umulxhi(d, %g1, %g1) + addxc( %g1, %g0, %g4) + brgz,pt n, L(top_unnorm) + add n, -1, n + + sub %o5, %g4, %g4 + mulx %g4, dinv, %g1 + stx %g1, [qp] + return %i7+8 + nop + +L(norm): + mulx dinv, %o5, %g1 + stx %g1, [qp] + add qp, 8, qp + addcc %g0, 0, %g4 +L(top_norm): + umulxhi(d, %g1, %g1) + ldx [ap+8], %g5 + add ap, 8, ap + addxc( %g1, %g0, %g1) + subcc %g5, %g1, %g1 + mulx %g1, dinv, %g1 + stx %g1, [qp] + add qp, 8, qp + brgz,pt n, L(top_norm) + add n, -1, n + + return %i7+8 + nop +EPILOGUE() -- cgit v1.2.3