From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- gmp-6.3.0/mpn/x86_64/divrem_1.asm | 314 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 gmp-6.3.0/mpn/x86_64/divrem_1.asm (limited to 'gmp-6.3.0/mpn/x86_64/divrem_1.asm') diff --git a/gmp-6.3.0/mpn/x86_64/divrem_1.asm b/gmp-6.3.0/mpn/x86_64/divrem_1.asm new file mode 100644 index 0000000..d4d61ad --- /dev/null +++ b/gmp-6.3.0/mpn/x86_64/divrem_1.asm @@ -0,0 +1,314 @@ +dnl x86-64 mpn_divrem_1 -- mpn by limb division. + +dnl Copyright 2004, 2005, 2007-2012, 2014 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of either: +dnl +dnl * the GNU Lesser General Public License as published by the Free +dnl Software Foundation; either version 3 of the License, or (at your +dnl option) any later version. +dnl +dnl or +dnl +dnl * the GNU General Public License as published by the Free Software +dnl Foundation; either version 2 of the License, or (at your option) any +dnl later version. +dnl +dnl or both in parallel, as here. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl for more details. +dnl +dnl You should have received copies of the GNU General Public License and the +dnl GNU Lesser General Public License along with the GNU MP Library. If not, +dnl see https://www.gnu.org/licenses/. + +include(`../config.m4') + + +C norm unorm frac +C AMD K8,K9 13 13 12 +C AMD K10 13 13 12 +C Intel P4 43 44 43 +C Intel core2 24.5 24.5 19.5 +C Intel corei 20.5 19.5 18 +C Intel atom 43 46 36 +C VIA nano 25.5 25.5 24 + +C mp_limb_t +C mpn_divrem_1 (mp_ptr qp, mp_size_t fn, +C mp_srcptr np, mp_size_t nn, mp_limb_t d) + +C mp_limb_t +C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn, +C mp_srcptr np, mp_size_t nn, mp_limb_t d, +C mp_limb_t dinv, int cnt) + +C INPUT PARAMETERS +define(`qp', `%rdi') +define(`fn_param', `%rsi') +define(`up_param', `%rdx') +define(`un_param', `%rcx') +define(`d', `%r8') +define(`dinv', `%r9') C only for mpn_preinv_divrem_1 +C shift passed on stack C only for mpn_preinv_divrem_1 + +define(`cnt', `%rcx') +define(`up', `%rsi') +define(`fn', `%r12') +define(`un', `%rbx') + + +C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15 +C cnt qp d dinv + +ABI_SUPPORT(DOS64) +ABI_SUPPORT(STD64) + +IFSTD(`define(`CNTOFF', `40($1)')') +IFDOS(`define(`CNTOFF', `104($1)')') + +ASM_START() + TEXT + ALIGN(16) +PROLOGUE(mpn_preinv_divrem_1) + FUNC_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') +IFDOS(` mov 64(%rsp), %r9 ') + xor R32(%rax), R32(%rax) + push %r13 + push %r12 + push %rbp + push %rbx + + mov fn_param, fn + mov un_param, un + add fn_param, un_param + mov up_param, up + + lea -8(qp,un_param,8), qp + + test d, d + js L(nent) + + mov CNTOFF(%rsp), R8(cnt) + shl R8(cnt), d + jmp L(uent) +EPILOGUE() + + ALIGN(16) +PROLOGUE(mpn_divrem_1) + FUNC_ENTRY(4) +IFDOS(` mov 56(%rsp), %r8 ') + xor R32(%rax), R32(%rax) + push %r13 + push %r12 + push %rbp + push %rbx + + mov fn_param, fn + mov un_param, un + add fn_param, un_param + mov up_param, up + je L(ret) + + lea -8(qp,un_param,8), qp + xor R32(%rbp), R32(%rbp) + + test d, d + jns L(unnormalized) + +L(normalized): + test un, un + je L(8) C un == 0 + mov -8(up,un,8), %rbp + dec un + mov %rbp, %rax + sub d, %rbp + cmovc %rax, %rbp + sbb R32(%rax), R32(%rax) + inc R32(%rax) + mov %rax, (qp) + lea -8(qp), qp +L(8): +IFSTD(` push %rdi ') +IFSTD(` push %rsi ') + push %r8 +IFSTD(` mov d, %rdi ') +IFDOS(` sub $32, %rsp ') +IFDOS(` mov d, %rcx ') + ASSERT(nz, `test $15, %rsp') + CALL( mpn_invert_limb) +IFDOS(` add $32, %rsp ') + pop %r8 +IFSTD(` pop %rsi ') +IFSTD(` pop %rdi ') + + mov %rax, dinv + mov %rbp, %rax + jmp L(nent) + + ALIGN(16) +L(ntop):mov (up,un,8), %r10 C K8-K10 P6-CNR P6-NHM P4 + mul dinv C 0,13 0,20 0,18 0,45 + add %r10, %rax C 4 8 3 12 + adc %rbp, %rdx C 5 9 10 13 + mov %rax, %rbp C 5 9 4 13 + mov %rdx, %r13 C 6 11 12 23 + imul d, %rdx C 6 11 11 23 + sub %rdx, %r10 C 10 16 14 33 + mov d, %rax C + add %r10, %rax C 11 17 15 34 + cmp %rbp, %r10 C 11 17 15 34 + cmovc %r10, %rax C 12 18 16 35 + adc $-1, %r13 C + cmp d, %rax C + jae L(nfx) C +L(nok): mov %r13, (qp) C + sub $8, qp C +L(nent):lea 1(%rax), %rbp C + dec un C + jns L(ntop) C + + xor R32(%rcx), R32(%rcx) + jmp L(frac) + +L(nfx): sub d, %rax + inc %r13 + jmp L(nok) + +L(unnormalized): + test un, un + je L(44) + mov -8(up,un,8), %rax + cmp d, %rax + jae L(44) + mov %rbp, (qp) + mov %rax, %rbp + lea -8(qp), qp + je L(ret) + dec un +L(44): + bsr d, %rcx + not R32(%rcx) + shl R8(%rcx), d + shl R8(%rcx), %rbp + + push %rcx +IFSTD(` push %rdi ') +IFSTD(` push %rsi ') + push %r8 +IFSTD(` sub $8, %rsp ') +IFSTD(` mov d, %rdi ') +IFDOS(` sub $40, %rsp ') +IFDOS(` mov d, %rcx ') + ASSERT(nz, `test $15, %rsp') + CALL( mpn_invert_limb) +IFSTD(` add $8, %rsp ') +IFDOS(` add $40, %rsp ') + pop %r8 +IFSTD(` pop %rsi ') +IFSTD(` pop %rdi ') + pop %rcx + + mov %rax, dinv + mov %rbp, %rax + test un, un + je L(frac) + +L(uent):dec un + mov (up,un,8), %rbp + neg R32(%rcx) + shr R8(%rcx), %rbp + neg R32(%rcx) + or %rbp, %rax + jmp L(ent) + + ALIGN(16) +L(utop):mov (up,un,8), %r10 + shl R8(%rcx), %rbp + neg R32(%rcx) + shr R8(%rcx), %r10 + neg R32(%rcx) + or %r10, %rbp + mul dinv + add %rbp, %rax + adc %r11, %rdx + mov %rax, %r11 + mov %rdx, %r13 + imul d, %rdx + sub %rdx, %rbp + mov d, %rax + add %rbp, %rax + cmp %r11, %rbp + cmovc %rbp, %rax + adc $-1, %r13 + cmp d, %rax + jae L(ufx) +L(uok): mov %r13, (qp) + sub $8, qp +L(ent): mov (up,un,8), %rbp + dec un + lea 1(%rax), %r11 + jns L(utop) + +L(uend):shl R8(%rcx), %rbp + mul dinv + add %rbp, %rax + adc %r11, %rdx + mov %rax, %r11 + mov %rdx, %r13 + imul d, %rdx + sub %rdx, %rbp + mov d, %rax + add %rbp, %rax + cmp %r11, %rbp + cmovc %rbp, %rax + adc $-1, %r13 + cmp d, %rax + jae L(efx) +L(eok): mov %r13, (qp) + sub $8, qp + jmp L(frac) + +L(ufx): sub d, %rax + inc %r13 + jmp L(uok) +L(efx): sub d, %rax + inc %r13 + jmp L(eok) + +L(frac):mov d, %rbp + neg %rbp + jmp L(fent) + + ALIGN(16) C K8-K10 P6-CNR P6-NHM P4 +L(ftop):mul dinv C 0,12 0,17 0,17 + add %r11, %rdx C 5 8 10 + mov %rax, %r11 C 4 8 3 + mov %rdx, %r13 C 6 9 11 + imul %rbp, %rdx C 6 9 11 + mov d, %rax C + add %rdx, %rax C 10 14 14 + cmp %r11, %rdx C 10 14 14 + cmovc %rdx, %rax C 11 15 15 + adc $-1, %r13 C + mov %r13, (qp) C + sub $8, qp C +L(fent):lea 1(%rax), %r11 C + dec fn C + jns L(ftop) C + + shr R8(%rcx), %rax +L(ret): pop %rbx + pop %rbp + pop %r12 + pop %r13 + FUNC_EXIT() + ret +EPILOGUE() -- cgit v1.2.3