From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/sparc32/v9/add_n.asm | 129 +++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 gmp-6.3.0/mpn/sparc32/v9/add_n.asm (limited to 'gmp-6.3.0/mpn/sparc32/v9/add_n.asm') diff --git a/gmp-6.3.0/mpn/sparc32/v9/add_n.asm b/gmp-6.3.0/mpn/sparc32/v9/add_n.asm new file mode 100644 index 0000000..7bd5974 --- /dev/null +++ b/gmp-6.3.0/mpn/sparc32/v9/add_n.asm @@ -0,0 +1,129 @@ +dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store +dnl sum in a third limb vector. + +dnl Copyright 2001 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + + +include(`../config.m4') + +C INPUT PARAMETERS +define(rp,%o0) +define(s1p,%o1) +define(s2p,%o2) +define(n,%o3) +define(cy,%g1) + +C This code uses 64-bit operations on `o' and `g' registers. It doesn't +C require that `o' registers' upper 32 bits are preserved by the operating +C system, but if they are not, they must be zeroed. That is indeed what +C happens at least on Slowaris 2.5 and 2.6. + +C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at +C about 10 cycles/limb from the Ecache. + +ASM_START() +PROLOGUE(mpn_add_n) + lduw [s1p+0],%o4 + lduw [s2p+0],%o5 + addcc n,-2,n + bl,pn %icc,L(end1) + lduw [s1p+4],%g2 + lduw [s2p+4],%g3 + be,pn %icc,L(end2) + mov 0,cy + + .align 16 +L(loop): + add %o4,%o5,%g4 + add rp,8,rp + lduw [s1p+8],%o4 + fitod %f0,%f2 +C --- + add cy,%g4,%g4 + addcc n,-1,n + lduw [s2p+8],%o5 + fitod %f0,%f2 +C --- + srlx %g4,32,cy + add s2p,8,s2p + stw %g4,[rp-8] + be,pn %icc,L(exito)+4 +C --- + add %g2,%g3,%g4 + addcc n,-1,n + lduw [s1p+12],%g2 + fitod %f0,%f2 +C --- + add cy,%g4,%g4 + add s1p,8,s1p + lduw [s2p+4],%g3 + fitod %f0,%f2 +C --- + srlx %g4,32,cy + bne,pt %icc,L(loop) + stw %g4,[rp-4] +C --- +L(exite): + add %o4,%o5,%g4 + add cy,%g4,%g4 + srlx %g4,32,cy + stw %g4,[rp+0] + add %g2,%g3,%g4 + add cy,%g4,%g4 + stw %g4,[rp+4] + retl + srlx %g4,32,%o0 + +L(exito): + add %g2,%g3,%g4 + add cy,%g4,%g4 + srlx %g4,32,cy + stw %g4,[rp-4] + add %o4,%o5,%g4 + add cy,%g4,%g4 + stw %g4,[rp+0] + retl + srlx %g4,32,%o0 + +L(end1): + add %o4,%o5,%g4 + stw %g4,[rp+0] + retl + srlx %g4,32,%o0 + +L(end2): + add %o4,%o5,%g4 + srlx %g4,32,cy + stw %g4,[rp+0] + add %g2,%g3,%g4 + add cy,%g4,%g4 + stw %g4,[rp+4] + retl + srlx %g4,32,%o0 +EPILOGUE(mpn_add_n) -- cgit v1.2.3