From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/x86_64/lshiftc.asm | 182 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 gmp-6.3.0/mpn/x86_64/lshiftc.asm (limited to 'gmp-6.3.0/mpn/x86_64/lshiftc.asm') diff --git a/gmp-6.3.0/mpn/x86_64/lshiftc.asm b/gmp-6.3.0/mpn/x86_64/lshiftc.asm new file mode 100644 index 0000000..c4ba04a --- /dev/null +++ b/gmp-6.3.0/mpn/x86_64/lshiftc.asm @@ -0,0 +1,182 @@ +dnl AMD64 mpn_lshiftc -- mpn left shift with complement. + +dnl Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + + +C cycles/limb +C AMD K8,K9 2.75 +C AMD K10 2.75 +C Intel P4 ? +C Intel core2 ? +C Intel corei ? +C Intel atom ? +C VIA nano 3.75 + + +C INPUT PARAMETERS +define(`rp', `%rdi') +define(`up', `%rsi') +define(`n', `%rdx') +define(`cnt', `%rcx') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(STD64) + +ASM_START() + TEXT + ALIGN(32) +PROLOGUE(mpn_lshiftc) + FUNC_ENTRY(4) + neg R32(%rcx) C put rsh count in cl + mov -8(up,n,8), %rax + shr R8(%rcx), %rax C function return value + + neg R32(%rcx) C put lsh count in cl + lea 1(n), R32(%r8) + and $3, R32(%r8) + je L(rlx) C jump for n = 3, 7, 11, ... + + dec R32(%r8) + jne L(1) +C n = 4, 8, 12, ... + mov -8(up,n,8), %r10 + shl R8(%rcx), %r10 + neg R32(%rcx) C put rsh count in cl + mov -16(up,n,8), %r8 + shr R8(%rcx), %r8 + or %r8, %r10 + not %r10 + mov %r10, -8(rp,n,8) + dec n + jmp L(rll) + +L(1): dec R32(%r8) + je L(1x) C jump for n = 1, 5, 9, 13, ... +C n = 2, 6, 10, 16, ... + mov -8(up,n,8), %r10 + shl R8(%rcx), %r10 + neg R32(%rcx) C put rsh count in cl + mov -16(up,n,8), %r8 + shr R8(%rcx), %r8 + or %r8, %r10 + not %r10 + mov %r10, -8(rp,n,8) + dec n + neg R32(%rcx) C put lsh count in cl +L(1x): + cmp $1, n + je L(ast) + mov -8(up,n,8), %r10 + shl R8(%rcx), %r10 + mov -16(up,n,8), %r11 + shl R8(%rcx), %r11 + neg R32(%rcx) C put rsh count in cl + mov -16(up,n,8), %r8 + mov -24(up,n,8), %r9 + shr R8(%rcx), %r8 + or %r8, %r10 + shr R8(%rcx), %r9 + or %r9, %r11 + not %r10 + not %r11 + mov %r10, -8(rp,n,8) + mov %r11, -16(rp,n,8) + sub $2, n + +L(rll): neg R32(%rcx) C put lsh count in cl +L(rlx): mov -8(up,n,8), %r10 + shl R8(%rcx), %r10 + mov -16(up,n,8), %r11 + shl R8(%rcx), %r11 + + sub $4, n C 4 + jb L(end) C 2 + ALIGN(16) +L(top): + C finish stuff from lsh block + neg R32(%rcx) C put rsh count in cl + mov 16(up,n,8), %r8 + mov 8(up,n,8), %r9 + shr R8(%rcx), %r8 + or %r8, %r10 + shr R8(%rcx), %r9 + or %r9, %r11 + not %r10 + not %r11 + mov %r10, 24(rp,n,8) + mov %r11, 16(rp,n,8) + C start two new rsh + mov 0(up,n,8), %r8 + mov -8(up,n,8), %r9 + shr R8(%rcx), %r8 + shr R8(%rcx), %r9 + + C finish stuff from rsh block + neg R32(%rcx) C put lsh count in cl + mov 8(up,n,8), %r10 + mov 0(up,n,8), %r11 + shl R8(%rcx), %r10 + or %r10, %r8 + shl R8(%rcx), %r11 + or %r11, %r9 + not %r8 + not %r9 + mov %r8, 8(rp,n,8) + mov %r9, 0(rp,n,8) + C start two new lsh + mov -8(up,n,8), %r10 + mov -16(up,n,8), %r11 + shl R8(%rcx), %r10 + shl R8(%rcx), %r11 + + sub $4, n + jae L(top) C 2 +L(end): + neg R32(%rcx) C put rsh count in cl + mov 8(up), %r8 + shr R8(%rcx), %r8 + or %r8, %r10 + mov (up), %r9 + shr R8(%rcx), %r9 + or %r9, %r11 + not %r10 + not %r11 + mov %r10, 16(rp) + mov %r11, 8(rp) + + neg R32(%rcx) C put lsh count in cl +L(ast): mov (up), %r10 + shl R8(%rcx), %r10 + not %r10 + mov %r10, (rp) + FUNC_EXIT() + ret +EPILOGUE() -- cgit v1.2.3