From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/x86_64/lshift.asm | 172 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 gmp-6.3.0/mpn/x86_64/lshift.asm (limited to 'gmp-6.3.0/mpn/x86_64/lshift.asm') diff --git a/gmp-6.3.0/mpn/x86_64/lshift.asm b/gmp-6.3.0/mpn/x86_64/lshift.asm new file mode 100644 index 0000000..fff3152 --- /dev/null +++ b/gmp-6.3.0/mpn/x86_64/lshift.asm @@ -0,0 +1,172 @@ +dnl AMD64 mpn_lshift -- mpn left shift. + +dnl Copyright 2003, 2005, 2007, 2009, 2011, 2012, 2018 Free Software +dnl Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + + +C cycles/limb cycles/limb cnt=1 +C AMD K8,K9 2.375 1.375 +C AMD K10 2.375 1.375 +C Intel P4 8 10.5 +C Intel core2 2.11 4.28 +C Intel corei ? ? +C Intel atom 5.75 3.5 +C VIA nano 3.5 2.25 + + +C INPUT PARAMETERS +define(`rp', `%rdi') +define(`up', `%rsi') +define(`n', `%rdx') +define(`cnt', `%rcx') + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(STD64) + +ASM_START() + TEXT + ALIGN(32) +PROLOGUE(mpn_lshift) + FUNC_ENTRY(4) + neg R32(%rcx) C put rsh count in cl + mov -8(up,n,8), %rax + shr R8(%rcx), %rax C function return value + + neg R32(%rcx) C put lsh count in cl + lea 1(n), R32(%r8) + and $3, R32(%r8) + je L(rlx) C jump for n = 3, 7, 11, ... + + dec R32(%r8) + jne L(1) +C n = 4, 8, 12, ... + mov -8(up,n,8), %r10 + shl R8(%rcx), %r10 + neg R32(%rcx) C put rsh count in cl + mov -16(up,n,8), %r8 + shr R8(%rcx), %r8 + or %r8, %r10 + mov %r10, -8(rp,n,8) + dec n + jmp L(rll) + +L(1): dec R32(%r8) + je L(1x) C jump for n = 1, 5, 9, 13, ... +C n = 2, 6, 10, 16, ... + mov -8(up,n,8), %r10 + shl R8(%rcx), %r10 + neg R32(%rcx) C put rsh count in cl + mov -16(up,n,8), %r8 + shr R8(%rcx), %r8 + or %r8, %r10 + mov %r10, -8(rp,n,8) + dec n + neg R32(%rcx) C put lsh count in cl +L(1x): + cmp $1, n + je L(ast) + mov -8(up,n,8), %r10 + shl R8(%rcx), %r10 + mov -16(up,n,8), %r11 + shl R8(%rcx), %r11 + neg R32(%rcx) C put rsh count in cl + mov -16(up,n,8), %r8 + mov -24(up,n,8), %r9 + shr R8(%rcx), %r8 + or %r8, %r10 + shr R8(%rcx), %r9 + or %r9, %r11 + mov %r10, -8(rp,n,8) + mov %r11, -16(rp,n,8) + sub $2, n + +L(rll): neg R32(%rcx) C put lsh count in cl +L(rlx): mov -8(up,n,8), %r10 + shl R8(%rcx), %r10 + mov -16(up,n,8), %r11 + shl R8(%rcx), %r11 + + sub $4, n C 4 + jb L(end) C 2 + ALIGN(16) +L(top): + C finish stuff from lsh block + neg R32(%rcx) C put rsh count in cl + mov 16(up,n,8), %r8 + mov 8(up,n,8), %r9 + shr R8(%rcx), %r8 + or %r8, %r10 + shr R8(%rcx), %r9 + or %r9, %r11 + mov %r10, 24(rp,n,8) + mov %r11, 16(rp,n,8) + C start two new rsh + mov 0(up,n,8), %r8 + mov -8(up,n,8), %r9 + shr R8(%rcx), %r8 + shr R8(%rcx), %r9 + + C finish stuff from rsh block + neg R32(%rcx) C put lsh count in cl + mov 8(up,n,8), %r10 + mov 0(up,n,8), %r11 + shl R8(%rcx), %r10 + or %r10, %r8 + shl R8(%rcx), %r11 + or %r11, %r9 + mov %r8, 8(rp,n,8) + mov %r9, 0(rp,n,8) + C start two new lsh + mov -8(up,n,8), %r10 + mov -16(up,n,8), %r11 + shl R8(%rcx), %r10 + shl R8(%rcx), %r11 + + sub $4, n + jae L(top) C 2 +L(end): + neg R32(%rcx) C put rsh count in cl + mov 8(up), %r8 + shr R8(%rcx), %r8 + or %r8, %r10 + mov (up), %r9 + shr R8(%rcx), %r9 + or %r9, %r11 + mov %r10, 16(rp) + mov %r11, 8(rp) + + neg R32(%rcx) C put lsh count in cl +L(ast): mov (up), %r10 + shl R8(%rcx), %r10 + mov %r10, (rp) + FUNC_EXIT() + ret +EPILOGUE() -- cgit v1.2.3