From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/arm64/gcd_22.asm | 112 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 gmp-6.3.0/mpn/arm64/gcd_22.asm (limited to 'gmp-6.3.0/mpn/arm64/gcd_22.asm') diff --git a/gmp-6.3.0/mpn/arm64/gcd_22.asm b/gmp-6.3.0/mpn/arm64/gcd_22.asm new file mode 100644 index 0000000..5367fea --- /dev/null +++ b/gmp-6.3.0/mpn/arm64/gcd_22.asm @@ -0,0 +1,112 @@ +dnl ARM v8a mpn_gcd_22. + +dnl Copyright 2019 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + +changecom(blah) + +C cycles/bit (approx) +C Cortex-A35 ? +C Cortex-A53 7.26 +C Cortex-A55 ? +C Cortex-A57 ? +C Cortex-A72 5.72 +C Cortex-A73 6.43 +C Cortex-A75 ? +C Cortex-A76 ? +C Cortex-A77 ? + + +define(`u1', `x0') +define(`u0', `x1') +define(`v1', `x2') +define(`v0', `x3') + +define(`t0', `x5') +define(`t1', `x6') +define(`cnt', `x7') +define(`tnc', `x8') + +ASM_START() +PROLOGUE(mpn_gcd_22) + + ALIGN(16) +L(top): subs t0, u0, v0 C 0 6 + cbz t0, L(lowz) + sbcs t1, u1, v1 C 1 7 + + rbit cnt, t0 C 1 + + cneg t0, t0, cc C 2 + cinv t1, t1, cc C 2 u = |u - v| +L(bck): csel v0, v0, u0, cs C 2 + csel v1, v1, u1, cs C 2 v = min(u,v) + + clz cnt, cnt C 2 + sub tnc, xzr, cnt C 3 + + lsr u0, t0, cnt C 3 + lsl x14, t1, tnc C 4 + lsr u1, t1, cnt C 3 + orr u0, u0, x14 C 5 + + orr x11, u1, v1 + cbnz x11, L(top) + + + subs x4, u0, v0 C 0 + b.eq L(end1) C + + ALIGN(16) +L(top1):rbit x12, x4 C 1,5 + clz x12, x12 C 2 + csneg x4, x4, x4, cs C v = abs(u-v), even 1 + csel u0, v0, u0, cs C u = min(u,v) 1 + lsr v0, x4, x12 C 3 + subs x4, u0, v0 C 4 + b.ne L(top1) C +L(end1):mov x0, u0 + mov x1, #0 + ret + +L(lowz):C We come here when v0 - u0 = 0 + C 1. If v1 - u1 = 0, then gcd is u = v. + C 2. Else compute gcd_21({v1,v0}, |u1-v1|) + subs t0, u1, v1 + b.eq L(end) + mov t1, #0 + rbit cnt, t0 C 1 + cneg t0, t0, cc C 2 + b L(bck) C FIXME: make conditional + +L(end): mov x0, v0 + mov x1, v1 + ret +EPILOGUE() -- cgit v1.2.3