diff options
author | Duncan Wilkie <antigravityd@gmail.com> | 2023-11-18 06:11:09 -0600 |
---|---|---|
committer | Duncan Wilkie <antigravityd@gmail.com> | 2023-11-18 06:11:09 -0600 |
commit | 11da511c784eca003deb90c23570f0873954e0de (patch) | |
tree | e14fdd3d5d6345956d67e79ae771d0633d28362b /gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm |
Initial commit.
Diffstat (limited to 'gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm')
-rw-r--r-- | gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm b/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm new file mode 100644 index 0000000..ade30e4 --- /dev/null +++ b/gmp-6.3.0/mpn/powerpc64/mode64/p7/gcd_22.asm @@ -0,0 +1,146 @@ +dnl PowerPC-64 mpn_gcd_22 optimised for POWER7 and POWER8. + +dnl Copyright 2000-2002, 2005, 2009, 2011-2013, 2019 Free Software Foundation, +dnl Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/bit (approx) +C POWER3/PPC630 - +C POWER4/PPC970 - +C POWER5 - +C POWER6 - +C POWER7 12.3 +C POWER8 13.4 +C POWER9 10.6 + +C We define SLOW if this target uses a slow struct return mechanism, with +C r3 as an implicit parameter for the struct pointer. +undefine(`SLOW')dnl +ifdef(`AIX',`define(`SLOW',`due to AIX')',` + ifdef(`DARWIN',,` + ifdef(`ELFv2_ABI',,`define(`SLOW',`due to ELFv1')')dnl + ') +') + +ifdef(`SLOW',` +define(`IFSLOW', `$1') +define(`u1', `r4') +define(`u0', `r5') +define(`v1', `r6') +define(`v0', `r7') +',` +define(`IFSLOW', `') +define(`u1', `r3') +define(`u0', `r4') +define(`v1', `r5') +define(`v0', `r6') +') + +define(`tmp', `r0') +define(`t0', `r8') +define(`t1', `r9') +define(`s0', `r10') +define(`s1', `r11') +define(`cnt', `r12') + +ASM_START() +PROLOGUE(mpn_gcd_22) +L(top): subfc. t0, v0, u0 C 0 12 + beq cr0, L(lowz) + subfe t1, v1, u1 C 2 14 + subfe. tmp, tmp, tmp C 4 set cr0 from the carry bit + subfc s0, u0, v0 C 0 + subfe s1, u1, v1 C 2 + +L(bck): and tmp, s0, t0 C 2 + cntlzd cnt, tmp C 4 + addi tmp, cnt, 1 C 6 + subfic cnt, cnt, 63 C 6 + + isel v0, v0, u0, 2 C 6 use condition set by subfe + isel v1, v1, u1, 2 C 6 + isel u0, t0, s0, 2 C 6 + isel u1, t1, s1, 2 C 6 + + srd u0, u0, cnt C 8 + sld tmp, u1, tmp C 8 + srd u1, u1, cnt C 8 + or u0, u0, tmp C 10 + + or. r0, u1, v1 C 10 + bne L(top) + + + li r0, 63 + b L(odd) + ALIGN(16) +L(top1):isel v0, u0, v0, 29 C v = min(u,v) + isel u0, r10, r11, 29 C u = |u - v| + subf cnt, cnt, r0 C cnt = 63-cnt + srd u0, u0, cnt +L(odd): subf r10, u0, v0 C r10 = v - u + subf r11, v0, u0 C r11 = u - v + cmpld cr7, v0, u0 + and r8, r11, r10 C isolate lsb + cntlzd cnt, r8 + bne cr7, L(top1) + +ifdef(`SLOW',` + std v0, 0(r3) + std r10, 8(r3) C zero +',` + mr r3, v0 + li r4, 0 +') + blr + + +L(lowz):C We come here when v0 - u0 = 0 + C 1. If v1 - u1 = 0, then gcd is u = v. + C 2. Else compute gcd_21({v1,v0}, |u1-v1|) + subfc. t0, v1, u1 C 2 8 + beq L(end) + li t1, 0 + subfe. tmp, tmp, tmp C 4 set cr0 from the carry bit + subf s0, u1, v1 C 2 + li s1, 0 + b L(bck) + +L(end): +ifdef(`SLOW',` + std v0, 0(r3) + std v1, 8(r3) + blr +',` + mr r3, v0 + mr r4, v1 + blr +') +EPILOGUE() |