aboutsummaryrefslogtreecommitdiff
path: root/gmp-6.3.0/mpn/x86_64/core2/divrem_1.asm
diff options
context:
space:
mode:
Diffstat (limited to 'gmp-6.3.0/mpn/x86_64/core2/divrem_1.asm')
-rw-r--r--gmp-6.3.0/mpn/x86_64/core2/divrem_1.asm243
1 files changed, 243 insertions, 0 deletions
diff --git a/gmp-6.3.0/mpn/x86_64/core2/divrem_1.asm b/gmp-6.3.0/mpn/x86_64/core2/divrem_1.asm
new file mode 100644
index 0000000..1b3f139
--- /dev/null
+++ b/gmp-6.3.0/mpn/x86_64/core2/divrem_1.asm
@@ -0,0 +1,243 @@
+dnl x86-64 mpn_divrem_1 -- mpn by limb division.
+
+dnl Copyright 2004, 2005, 2007-2010, 2012, 2014 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C norm unorm frac
+C AMD K8,K9 15 15 12
+C AMD K10 15 15 12
+C Intel P4 44 44 43
+C Intel core2 24 24 19.5
+C Intel corei 19 19 18
+C Intel atom 51 51 36
+C VIA nano 46 44 22.5
+
+C mp_limb_t
+C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
+C mp_srcptr np, mp_size_t nn, mp_limb_t d)
+
+C mp_limb_t
+C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
+C mp_srcptr np, mp_size_t nn, mp_limb_t d,
+C mp_limb_t dinv, int cnt)
+
+C INPUT PARAMETERS
+define(`qp', `%rdi')
+define(`fn_param', `%rsi')
+define(`up_param', `%rdx')
+define(`un_param', `%rcx')
+define(`d', `%r8')
+define(`dinv', `%r9') C only for mpn_preinv_divrem_1
+C shift passed on stack C only for mpn_preinv_divrem_1
+
+define(`cnt', `%rcx')
+define(`up', `%rsi')
+define(`fn', `%r12')
+define(`un', `%rbx')
+
+
+C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
+C cnt qp d dinv
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFSTD(`define(`CNTOFF', `40($1)')')
+IFDOS(`define(`CNTOFF', `104($1)')')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_preinv_divrem_1)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+IFDOS(` mov 64(%rsp), %r9 ')
+ xor R32(%rax), R32(%rax)
+ push %r13
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov fn_param, fn
+ mov un_param, un
+ add fn_param, un_param
+ mov up_param, up
+
+ lea -8(qp,un_param,8), qp
+
+ mov CNTOFF(%rsp), R8(cnt)
+ shl R8(cnt), d
+ jmp L(ent)
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(mpn_divrem_1)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ xor R32(%rax), R32(%rax)
+ push %r13
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov fn_param, fn
+ mov un_param, un
+ add fn_param, un_param
+ mov up_param, up
+ je L(ret)
+
+ lea -8(qp,un_param,8), qp
+ xor R32(%rbp), R32(%rbp)
+
+L(unnormalized):
+ test un, un
+ je L(44)
+ mov -8(up,un,8), %rax
+ cmp d, %rax
+ jae L(44)
+ mov %rbp, (qp)
+ mov %rax, %rbp
+ lea -8(qp), qp
+ je L(ret)
+ dec un
+L(44):
+ bsr d, %rcx
+ not R32(%rcx)
+ sal R8(%rcx), d
+ sal R8(%rcx), %rbp
+
+ push %rcx
+IFSTD(` push %rdi ')
+IFSTD(` push %rsi ')
+ push %r8
+IFSTD(` sub $8, %rsp ')
+IFSTD(` mov d, %rdi ')
+IFDOS(` sub $40, %rsp ')
+IFDOS(` mov d, %rcx ')
+ ASSERT(nz, `test $15, %rsp')
+ CALL( mpn_invert_limb)
+IFSTD(` add $8, %rsp ')
+IFDOS(` add $40, %rsp ')
+ pop %r8
+IFSTD(` pop %rsi ')
+IFSTD(` pop %rdi ')
+ pop %rcx
+
+ mov %rax, dinv
+ mov %rbp, %rax
+ test un, un
+ je L(frac)
+
+L(ent): mov -8(up,un,8), %rbp
+ shr R8(%rcx), %rax
+ shld R8(%rcx), %rbp, %rax
+ sub $2, un
+ js L(end)
+
+ ALIGN(16)
+L(top): lea 1(%rax), %r11
+ mul dinv
+ mov (up,un,8), %r10
+ shld R8(%rcx), %r10, %rbp
+ mov %rbp, %r13
+ add %rax, %r13
+ adc %r11, %rdx
+ mov %rdx, %r11
+ imul d, %rdx
+ sub %rdx, %rbp
+ lea (d,%rbp), %rax
+ sub $8, qp
+ cmp %r13, %rbp
+ cmovc %rbp, %rax
+ adc $-1, %r11
+ cmp d, %rax
+ jae L(ufx)
+L(uok): dec un
+ mov %r11, 8(qp)
+ mov %r10, %rbp
+ jns L(top)
+
+L(end): lea 1(%rax), %r11
+ sal R8(%rcx), %rbp
+ mul dinv
+ add %rbp, %rax
+ adc %r11, %rdx
+ mov %rax, %r11
+ mov %rdx, %r13
+ imul d, %rdx
+ sub %rdx, %rbp
+ mov d, %rax
+ add %rbp, %rax
+ cmp %r11, %rbp
+ cmovc %rbp, %rax
+ adc $-1, %r13
+ cmp d, %rax
+ jae L(efx)
+L(eok): mov %r13, (qp)
+ sub $8, qp
+ jmp L(frac)
+
+L(ufx): sub d, %rax
+ inc %r11
+ jmp L(uok)
+L(efx): sub d, %rax
+ inc %r13
+ jmp L(eok)
+
+L(frac):mov d, %rbp
+ neg %rbp
+ jmp L(fent)
+
+ ALIGN(16) C K8-K10 P6-CNR P6-NHM P4
+L(ftop):mul dinv C 0,12 0,17 0,17
+ add %r11, %rdx C 5 8 10
+ mov %rax, %r11 C 4 8 3
+ mov %rdx, %r13 C 6 9 11
+ imul %rbp, %rdx C 6 9 11
+ mov d, %rax C
+ add %rdx, %rax C 10 14 14
+ cmp %r11, %rdx C 10 14 14
+ cmovc %rdx, %rax C 11 15 15
+ adc $-1, %r13 C
+ mov %r13, (qp) C
+ sub $8, qp C
+L(fent):lea 1(%rax), %r11 C
+ dec fn C
+ jns L(ftop) C
+
+ shr R8(%rcx), %rax
+L(ret): pop %rbx
+ pop %rbp
+ pop %r12
+ pop %r13
+ FUNC_EXIT()
+ ret
+EPILOGUE()