aboutsummaryrefslogtreecommitdiff
path: root/gmp-6.3.0/mpn/arm64
diff options
context:
space:
mode:
authorDuncan Wilkie <antigravityd@gmail.com>2023-11-18 06:11:09 -0600
committerDuncan Wilkie <antigravityd@gmail.com>2023-11-18 06:11:09 -0600
commit11da511c784eca003deb90c23570f0873954e0de (patch)
treee14fdd3d5d6345956d67e79ae771d0633d28362b /gmp-6.3.0/mpn/arm64
Initial commit.
Diffstat (limited to 'gmp-6.3.0/mpn/arm64')
-rw-r--r--gmp-6.3.0/mpn/arm64/aors_n.asm125
-rw-r--r--gmp-6.3.0/mpn/arm64/aorsmul_1.asm145
-rw-r--r--gmp-6.3.0/mpn/arm64/aorsorrlsh1_n.asm43
-rw-r--r--gmp-6.3.0/mpn/arm64/aorsorrlsh2_n.asm43
-rw-r--r--gmp-6.3.0/mpn/arm64/aorsorrlshC_n.asm139
-rw-r--r--gmp-6.3.0/mpn/arm64/applem1/addaddmul_1msb0.asm92
-rw-r--r--gmp-6.3.0/mpn/arm64/applem1/aorsmul_1.asm161
-rw-r--r--gmp-6.3.0/mpn/arm64/applem1/gmp-mparam.h187
-rw-r--r--gmp-6.3.0/mpn/arm64/applem1/sqr_basecase.asm318
-rw-r--r--gmp-6.3.0/mpn/arm64/arm64-defs.m453
-rw-r--r--gmp-6.3.0/mpn/arm64/bdiv_dbm1c.asm111
-rw-r--r--gmp-6.3.0/mpn/arm64/bdiv_q_1.asm122
-rw-r--r--gmp-6.3.0/mpn/arm64/cnd_aors_n.asm129
-rw-r--r--gmp-6.3.0/mpn/arm64/com.asm92
-rw-r--r--gmp-6.3.0/mpn/arm64/copyd.asm85
-rw-r--r--gmp-6.3.0/mpn/arm64/copyi.asm82
-rw-r--r--gmp-6.3.0/mpn/arm64/cora53/cnd_aors_n.asm99
-rw-r--r--gmp-6.3.0/mpn/arm64/cora53/gmp-mparam.h242
-rw-r--r--gmp-6.3.0/mpn/arm64/cora57/gmp-mparam.h188
-rw-r--r--gmp-6.3.0/mpn/arm64/cora72/gmp-mparam.h242
-rw-r--r--gmp-6.3.0/mpn/arm64/cora73/gmp-mparam.h225
-rw-r--r--gmp-6.3.0/mpn/arm64/darwin.m450
-rw-r--r--gmp-6.3.0/mpn/arm64/divrem_1.asm231
-rw-r--r--gmp-6.3.0/mpn/arm64/gcd_11.asm70
-rw-r--r--gmp-6.3.0/mpn/arm64/gcd_22.asm112
-rw-r--r--gmp-6.3.0/mpn/arm64/gmp-mparam.h192
-rw-r--r--gmp-6.3.0/mpn/arm64/hamdist.asm181
-rw-r--r--gmp-6.3.0/mpn/arm64/invert_limb.asm83
-rw-r--r--gmp-6.3.0/mpn/arm64/logops_n.asm139
-rw-r--r--gmp-6.3.0/mpn/arm64/lshift.asm138
-rw-r--r--gmp-6.3.0/mpn/arm64/lshiftc.asm141
-rw-r--r--gmp-6.3.0/mpn/arm64/mod_34lsub1.asm124
-rw-r--r--gmp-6.3.0/mpn/arm64/mul_1.asm128
-rw-r--r--gmp-6.3.0/mpn/arm64/popcount.asm157
-rw-r--r--gmp-6.3.0/mpn/arm64/rsh1aors_n.asm168
-rw-r--r--gmp-6.3.0/mpn/arm64/rshift.asm136
-rw-r--r--gmp-6.3.0/mpn/arm64/sec_tabselect.asm122
-rw-r--r--gmp-6.3.0/mpn/arm64/sqr_diag_addlsh1.asm102
-rw-r--r--gmp-6.3.0/mpn/arm64/xgene1/gmp-mparam.h182
39 files changed, 5379 insertions, 0 deletions
diff --git a/gmp-6.3.0/mpn/arm64/aors_n.asm b/gmp-6.3.0/mpn/arm64/aors_n.asm
new file mode 100644
index 0000000..b4a6da6
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/aors_n.asm
@@ -0,0 +1,125 @@
+dnl ARM64 mpn_add_n and mpn_sub_n
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 2.75-3.25
+C Cortex-A57 1.5
+C X-Gene 2.0
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`vp', `x2')
+define(`n', `x3')
+
+ifdef(`OPERATION_add_n', `
+ define(`ADDSUBC', adcs)
+ define(`CLRCY', `cmn xzr, xzr')
+ define(`SETCY', `cmp $1, #1')
+ define(`RETVAL', `cset x0, cs')
+ define(`func_n', mpn_add_n)
+ define(`func_nc', mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(`ADDSUBC', sbcs)
+ define(`CLRCY', `cmp xzr, xzr')
+ define(`SETCY', `cmp xzr, $1')
+ define(`RETVAL', `cset x0, cc')
+ define(`func_n', mpn_sub_n)
+ define(`func_nc', mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+ SETCY( x4)
+ b L(ent)
+EPILOGUE()
+PROLOGUE(func_n)
+ CLRCY
+L(ent): lsr x17, n, #2
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x7, [up]
+ ldr x11, [vp]
+ ADDSUBC x13, x7, x11
+ str x13, [rp],#8
+ tbnz n, #1, L(b11)
+
+L(b01): cbz x17, L(ret)
+ ldp x4, x5, [up,#8]
+ ldp x8, x9, [vp,#8]
+ sub up, up, #8
+ sub vp, vp, #8
+ b L(mid)
+
+L(b11): ldp x6, x7, [up,#8]
+ ldp x10, x11, [vp,#8]
+ add up, up, #8
+ add vp, vp, #8
+ cbz x17, L(end)
+ b L(top)
+
+L(bx0): tbnz n, #1, L(b10)
+
+L(b00): ldp x4, x5, [up]
+ ldp x8, x9, [vp]
+ sub up, up, #16
+ sub vp, vp, #16
+ b L(mid)
+
+L(b10): ldp x6, x7, [up]
+ ldp x10, x11, [vp]
+ cbz x17, L(end)
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up,#16]
+ ldp x8, x9, [vp,#16]
+ ADDSUBC x12, x6, x10
+ ADDSUBC x13, x7, x11
+ stp x12, x13, [rp],#16
+L(mid): ldp x6, x7, [up,#32]!
+ ldp x10, x11, [vp,#32]!
+ ADDSUBC x12, x4, x8
+ ADDSUBC x13, x5, x9
+ stp x12, x13, [rp],#16
+ sub x17, x17, #1
+ cbnz x17, L(top)
+
+L(end): ADDSUBC x12, x6, x10
+ ADDSUBC x13, x7, x11
+ stp x12, x13, [rp]
+L(ret): RETVAL
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/aorsmul_1.asm b/gmp-6.3.0/mpn/arm64/aorsmul_1.asm
new file mode 100644
index 0000000..81ec1da
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/aorsmul_1.asm
@@ -0,0 +1,145 @@
+dnl ARM64 mpn_addmul_1 and mpn_submul_1
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013, 2015, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C addmul_1 submul_1
+C cycles/limb cycles/limb
+C Cortex-A53 9.3-9.8 9.3-9.8
+C Cortex-A55 9.0-9.5 9.3-9.8
+C Cortex-A57 7 7
+C Cortex-A72
+C Cortex-A73 6 6
+C X-Gene 5 5
+C Apple M1 1.75 1.75
+
+C NOTES
+C * It is possible to keep the carry chain alive between the addition blocks
+C and thus avoid csinc, but only for addmul_1. Since that saves no time
+C on the tested pipelines, we keep addmul_1 and submul_1 similar.
+C * We could separate feed-in into 4 blocks, one for each residue (mod 4).
+C That is likely to save a few cycles.
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n', `x2')
+define(`v0', `x3')
+
+ifdef(`OPERATION_addmul_1', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`COND', `cc')
+ define(`func', mpn_addmul_1)')
+ifdef(`OPERATION_submul_1', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`COND', `cs')
+ define(`func', mpn_submul_1)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+PROLOGUE(func)
+ adds x15, xzr, xzr
+
+ tbz n, #0, L(1)
+
+ ldr x4, [up],#8
+ mul x8, x4, v0
+ umulh x12, x4, v0
+ ldr x4, [rp]
+ ADDSUB x8, x4, x8
+ csinc x15, x12, x12, COND
+ str x8, [rp],#8
+
+L(1): tbz n, #1, L(2)
+
+ ldp x4, x5, [up],#16
+ mul x8, x4, v0
+ umulh x12, x4, v0
+ mul x9, x5, v0
+ umulh x13, x5, v0
+ adds x8, x8, x15
+ adcs x9, x9, x12
+ ldp x4, x5, [rp]
+ adc x15, x13, xzr
+ ADDSUB x8, x4, x8
+ ADDSUBC x9, x5, x9
+ csinc x15, x15, x15, COND
+ stp x8, x9, [rp],#16
+
+L(2): lsr n, n, #2
+ cbz n, L(le3)
+ ldp x4, x5, [up],#32
+ ldp x6, x7, [up,#-16]
+ b L(mid)
+L(le3): mov x0, x15
+ ret
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up],#32
+ ldp x6, x7, [up,#-16]
+ ADDSUB x8, x16, x8
+ ADDSUBC x9, x17, x9
+ stp x8, x9, [rp],#32
+ ADDSUBC x10, x12, x10
+ ADDSUBC x11, x13, x11
+ stp x10, x11, [rp,#-16]
+ csinc x15, x15, x15, COND
+L(mid): sub n, n, #1
+ mul x8, x4, v0
+ umulh x12, x4, v0
+ mul x9, x5, v0
+ umulh x13, x5, v0
+ adds x8, x8, x15
+ mul x10, x6, v0
+ umulh x14, x6, v0
+ adcs x9, x9, x12
+ mul x11, x7, v0
+ umulh x15, x7, v0
+ adcs x10, x10, x13
+ ldp x16, x17, [rp]
+ adcs x11, x11, x14
+ ldp x12, x13, [rp,#16]
+ adc x15, x15, xzr
+ cbnz n, L(top)
+
+ ADDSUB x8, x16, x8
+ ADDSUBC x9, x17, x9
+ ADDSUBC x10, x12, x10
+ ADDSUBC x11, x13, x11
+ stp x8, x9, [rp]
+ stp x10, x11, [rp,#16]
+ csinc x0, x15, x15, COND
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/aorsorrlsh1_n.asm b/gmp-6.3.0/mpn/arm64/aorsorrlsh1_n.asm
new file mode 100644
index 0000000..c617a67
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/aorsorrlsh1_n.asm
@@ -0,0 +1,43 @@
+dnl ARM64 mpn_addlsh1_n, mpn_sublsh1_n, mpn_rsblsh1_n.
+
+dnl Copyright 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
+
+include_mpn(`arm64/aorsorrlshC_n.asm')
diff --git a/gmp-6.3.0/mpn/arm64/aorsorrlsh2_n.asm b/gmp-6.3.0/mpn/arm64/aorsorrlsh2_n.asm
new file mode 100644
index 0000000..852d117
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/aorsorrlsh2_n.asm
@@ -0,0 +1,43 @@
+dnl ARM64 mpn_addlsh2_n, mpn_sublsh2_n, mpn_rsblsh2_n.
+
+dnl Copyright 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
+
+include_mpn(`arm64/aorsorrlshC_n.asm')
diff --git a/gmp-6.3.0/mpn/arm64/aorsorrlshC_n.asm b/gmp-6.3.0/mpn/arm64/aorsorrlshC_n.asm
new file mode 100644
index 0000000..1718b77
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/aorsorrlshC_n.asm
@@ -0,0 +1,139 @@
+dnl ARM64 mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 3.25-3.75
+C Cortex-A57 2.18
+C X-Gene 2.5
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`vp', `x2')
+define(`n', `x3')
+
+ifdef(`DO_add', `
+ define(`ADDSUB', `adds $1, $2, $3')
+ define(`ADDSUBC', `adcs $1, $2, $3')
+ define(`CLRRCY', `adds $1, xzr, xzr')
+ define(`RETVAL', `adc x0, $1, xzr')
+ define(`func_n', mpn_addlsh`'LSH`'_n)')
+ifdef(`DO_sub', `
+ define(`ADDSUB', `subs $1, $3, $2')
+ define(`ADDSUBC', `sbcs $1, $3, $2')
+ define(`CLRRCY', `subs $1, xzr, xzr')
+ define(`RETVAL', `cinc x0, $1, cc')
+ define(`func_n', mpn_sublsh`'LSH`'_n)')
+ifdef(`DO_rsb', `
+ define(`ADDSUB', `subs $1, $2, $3')
+ define(`ADDSUBC', `sbcs $1, $2, $3')
+ define(`CLRRCY', `subs $1, xzr, xzr')
+ define(`RETVAL', `sbc x0, $1, xzr')
+ define(`func_n', mpn_rsblsh`'LSH`'_n)')
+
+ASM_START()
+PROLOGUE(func_n)
+ lsr x6, n, #2
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x5, [up]
+ tbnz n, #1, L(b11)
+
+L(b01): ldr x11, [vp]
+ cbz x6, L(1)
+ ldp x8, x9, [vp,#8]
+ lsl x13, x11, #LSH
+ ADDSUB( x15, x13, x5)
+ str x15, [rp],#8
+ sub up, up, #24
+ sub vp, vp, #8
+ b L(mid)
+
+L(1): lsl x13, x11, #LSH
+ ADDSUB( x15, x13, x5)
+ str x15, [rp]
+ lsr x0, x11, RSH
+ RETVAL( x0, x1)
+ ret
+
+L(b11): ldr x9, [vp]
+ ldp x10, x11, [vp,#8]!
+ lsl x13, x9, #LSH
+ ADDSUB( x17, x13, x5)
+ str x17, [rp],#8
+ sub up, up, #8
+ cbz x6, L(end)
+ b L(top)
+
+L(bx0): tbnz n, #1, L(b10)
+
+L(b00): CLRRCY( x11)
+ ldp x8, x9, [vp],#-16
+ sub up, up, #32
+ b L(mid)
+
+L(b10): CLRRCY( x9)
+ ldp x10, x11, [vp]
+ sub up, up, #16
+ cbz x6, L(end)
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up,#16]
+ extr x12, x10, x9, #RSH
+ ldp x8, x9, [vp,#16]
+ extr x13, x11, x10, #RSH
+ ADDSUBC(x14, x12, x4)
+ ADDSUBC(x15, x13, x5)
+ stp x14, x15, [rp],#16
+L(mid): ldp x4, x5, [up,#32]!
+ extr x12, x8, x11, #RSH
+ ldp x10, x11, [vp,#32]!
+ extr x13, x9, x8, #RSH
+ ADDSUBC(x16, x12, x4)
+ ADDSUBC(x17, x13, x5)
+ stp x16, x17, [rp],#16
+ sub x6, x6, #1
+ cbnz x6, L(top)
+
+L(end): ldp x4, x5, [up,#16]
+ extr x12, x10, x9, #RSH
+ extr x13, x11, x10, #RSH
+ ADDSUBC(x14, x12, x4)
+ ADDSUBC(x15, x13, x5)
+ stp x14, x15, [rp]
+ lsr x0, x11, RSH
+ RETVAL( x0, x1)
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/applem1/addaddmul_1msb0.asm b/gmp-6.3.0/mpn/arm64/applem1/addaddmul_1msb0.asm
new file mode 100644
index 0000000..03cbf97
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/applem1/addaddmul_1msb0.asm
@@ -0,0 +1,92 @@
+dnl ARM64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63.
+
+dnl Copyright 2021 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53
+C Cortex-A55
+C Cortex-A57
+C Cortex-A72
+C Cortex-A73
+C X-Gene
+C Apple M1 2.0
+
+changecom(blah)
+
+define(`rp', x0)
+define(`ap', x1)
+define(`bp', x2)
+define(`n', x3)
+define(`u0', x4)
+define(`v0', x5)
+
+C TODO
+C * Use fewer distinct registers, should be trivial.
+
+PROLOGUE(mpn_addaddmul_1msb0)
+ lsr x7, n, #1
+ adds x6, xzr, xzr
+ tbz n, #0, L(top)
+
+ ldr x11, [ap], #8 C 0
+ ldr x15, [bp], #8 C 0
+ mul x10, x11, u0 C 0
+ umulh x11, x11, u0 C 1
+ mul x14, x15, v0 C 0
+ umulh x15, x15, v0 C 1
+ adds x10, x10, x14 C 0
+ adcs x6, x11, x15 C 1
+ str x10, [rp], #8 C 0
+ cbz x7, L(end)
+
+L(top): ldp x11, x13, [ap], #16 C 0 1
+ ldp x15, x17, [bp], #16 C 0 1
+ mul x10, x11, u0 C 0
+ umulh x11, x11, u0 C 1
+ mul x14, x15, v0 C 0
+ umulh x15, x15, v0 C 1
+ adcs x10, x10, x14 C 0
+ adc x11, x11, x15 C 1
+ adds x10, x10, x6 C 0
+ mul x12, x13, u0 C 1
+ umulh x13, x13, u0 C 2
+ mul x14, x17, v0 C 1
+ umulh x17, x17, v0 C 2
+ adcs x12, x12, x14 C 1
+ adc x6, x13, x17 C 2
+ adds x11, x12, x11 C 1
+ stp x10, x11, [rp], #16 C 0 1
+ sub x7, x7, #1
+ cbnz x7, L(top)
+
+L(end): adc x0, x6, xzr
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/applem1/aorsmul_1.asm b/gmp-6.3.0/mpn/arm64/applem1/aorsmul_1.asm
new file mode 100644
index 0000000..aa87c2a
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/applem1/aorsmul_1.asm
@@ -0,0 +1,161 @@
+dnl ARM64 mpn_addmul_1 and mpn_submul_1.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2020 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53
+C Cortex-A55
+C Cortex-A57
+C Cortex-A72
+C Cortex-A73
+C X-Gene
+C Apple M1 1.25
+
+changecom(blah)
+
+define(`rp', x0)
+define(`up', x1)
+define(`n', x2)
+define(`v0', x3)
+define(`cin',x4)
+
+define(`CY',x17)
+
+ifdef(`OPERATION_addmul_1', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`COND', `cc')
+ define(`func', mpn_addmul_1)')
+ifdef(`OPERATION_submul_1', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`COND', `cs')
+ define(`func', mpn_submul_1)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1 mpn_addmul_1c)
+
+ifdef(`OPERATION_addmul_1', `
+PROLOGUE(mpn_addmul_1c)
+ mov CY, cin
+ b L(ent)
+EPILOGUE()
+')
+
+PROLOGUE(func)
+ mov CY, #0 C W0
+L(ent): lsr x16, n, #2
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x4, [up], #8
+ mul x8, x4, v0
+ umulh x4, x4, v0
+ tbz n, #1, L(b01)
+
+L(b11): ldp x5,x6, [up], #16
+ ldp x12,x13, [rp]
+ ldr x14, [rp,#16]
+ mul x9, x5, v0
+ umulh x5, x5, v0
+ mul x10, x6, v0
+ umulh x6, x6, v0
+ ADDSUB x8, x12, x8
+ ADDSUBC x4, x13, x4
+ ADDSUBC x5, x14, x5
+ csinc x6, x6, x6, COND
+ ADDSUB x8, x8, CY
+ ADDSUBC x4, x4, x9
+ ADDSUBC x5, x5, x10
+ csinc CY, x6, x6, COND
+ stp x8, x4, [rp], #16
+ str x5, [rp], #8
+ cbnz x16, L(top)
+ mov x0, CY
+ ret
+
+L(b01): ldr x12, [rp]
+ ADDSUB x8, x12, x8
+ csinc x4, x4, x4, COND
+ ADDSUB x8, x8, CY
+ csinc CY, x4, x4, COND
+ str x8, [rp], #8
+ cbnz x16, L(top)
+ mov x0, CY
+ ret
+
+L(bx0): ldp x4,x5, [up], #16
+ tbz n, #1, L(top)+4
+
+L(b10): ldp x12,x13, [rp]
+ mul x8, x4, v0
+ umulh x4, x4, v0
+ mul x9, x5, v0
+ umulh x5, x5, v0
+ ADDSUB x8, x12, x8
+ ADDSUBC x4, x13, x4
+ csinc x5, x5, x5, COND
+ ADDSUB x8, x8, CY
+ ADDSUBC x4, x4, x9
+ csinc CY, x5, x5, COND
+ stp x8, x4, [rp], #16
+ cbz x16, L(done)
+
+L(top): ldp x4,x5, [up], #16 C W0 W1
+ ldp x6,x7, [up], #16 C W2 W3
+ ldp x12,x13, [rp] C W0 W1
+ ldp x14,x15, [rp,#16] C W2 W3
+ mul x8, x4, v0 C W0
+ umulh x4, x4, v0 C W1
+ mul x9, x5, v0 C W1
+ umulh x5, x5, v0 C W2
+ mul x10, x6, v0 C W2
+ umulh x6, x6, v0 C W3
+ mul x11, x7, v0 C W3
+ umulh x7, x7, v0 C W4
+ ADDSUB x8, x12, x8 C W0
+ ADDSUBC x4, x13, x4 C W1
+ ADDSUBC x5, x14, x5 C W2
+ ADDSUBC x6, x15, x6 C W3
+ csinc x7, x7, x7, COND C W4
+ ADDSUB x8, x8, CY C W0 carry-in
+ ADDSUBC x4, x4, x9 C W1
+ ADDSUBC x5, x5, x10 C W2
+ ADDSUBC x6, x6, x11 C W2
+ csinc CY, x7, x7, COND C W3 carry-out
+ stp x8, x4, [rp], #16
+ stp x5, x6, [rp], #16
+ sub x16, x16, #1
+ cbnz x16, L(top)
+
+L(done):mov x0, CY
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/applem1/gmp-mparam.h b/gmp-6.3.0/mpn/arm64/applem1/gmp-mparam.h
new file mode 100644
index 0000000..d08262f
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/applem1/gmp-mparam.h
@@ -0,0 +1,187 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2020 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 3200 MHz Apple M1 */
+/* FFT tuning limit = 1 M */
+/* Generated by tuneup.c, 2020-12-25, gcc 4.2 */
+
+#define MOD_1_1P_METHOD 2 /* 42.96% faster than 1 */
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 18
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
+#define USE_PREINV_DIVREM_1 1 /* native */
+/* From m1.gmplib.org, 2023-07-21 */
+#define DIV_QR_1N_PI1_METHOD 3 /* 13.35% faster than 1 */
+#define DIV_QR_1_NORM_THRESHOLD 2
+#define DIV_QR_1_UNNORM_THRESHOLD 1
+#define DIV_QR_2_PI2_THRESHOLD 9
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 28
+
+#define DIV_1_VS_MUL_1_PERCENT 659
+
+#define MUL_TOOM22_THRESHOLD 26
+#define MUL_TOOM33_THRESHOLD 77
+#define MUL_TOOM44_THRESHOLD 153
+#define MUL_TOOM6H_THRESHOLD 446
+#define MUL_TOOM8H_THRESHOLD 626
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 94
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 81
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 41
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 99
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 133
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 47
+#define SQR_TOOM3_THRESHOLD 74
+#define SQR_TOOM4_THRESHOLD 372
+#define SQR_TOOM6_THRESHOLD 462
+#define SQR_TOOM8_THRESHOLD 592
+
+#define MULMID_TOOM42_THRESHOLD 44
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 11
+
+#define MUL_FFT_MODF_THRESHOLD 216 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 216, 5}, { 7, 4}, { 19, 5}, { 19, 6}, \
+ { 10, 5}, { 21, 6}, { 21, 7}, { 11, 6}, \
+ { 23, 7}, { 21, 8}, { 11, 7}, { 24, 8}, \
+ { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 33, 9}, { 19, 8}, { 39, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
+ { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \
+ { 63,10}, { 39, 9}, { 79,10}, { 55,11}, \
+ { 31,10}, { 79,11}, { 47,12}, { 31,11}, \
+ { 63,10}, { 127, 9}, { 255, 8}, { 511,11}, \
+ { 79,10}, { 159, 9}, { 319, 8}, { 639,11}, \
+ { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511, 8}, { 1023,10}, \
+ { 271, 9}, { 543, 8}, { 1087,11}, { 143,10}, \
+ { 287, 9}, { 575, 8}, { 1151,11}, { 159,10}, \
+ { 319, 9}, { 639,12}, { 95,11}, { 191,10}, \
+ { 383,13}, { 63,12}, { 127,11}, { 255,10}, \
+ { 511, 9}, { 1023,11}, { 271,10}, { 543, 9}, \
+ { 1087, 8}, { 2175,11}, { 287,10}, { 575, 9}, \
+ { 1151,12}, { 159,11}, { 319,10}, { 639, 9}, \
+ { 1279,11}, { 351,10}, { 703, 9}, { 1407,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,12}, \
+ { 223,11}, { 447,10}, { 895,11}, { 479,10}, \
+ { 959,13}, { 8192,14}, { 16384,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 104
+#define MUL_FFT_THRESHOLD 2368
+
+#define SQR_FFT_MODF_THRESHOLD 304 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 304, 5}, { 10, 4}, { 21, 5}, { 11, 4}, \
+ { 23, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
+ { 11, 7}, { 24, 8}, { 15, 7}, { 31, 8}, \
+ { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 33, 9}, { 19, 8}, { 39, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
+ { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \
+ { 63,10}, { 39, 9}, { 79,10}, { 47,11}, \
+ { 31,10}, { 79,11}, { 47,12}, { 31,11}, \
+ { 63,10}, { 127, 9}, { 255, 8}, { 511,11}, \
+ { 79,10}, { 159, 9}, { 319, 8}, { 639,11}, \
+ { 95,10}, { 191, 9}, { 383,12}, { 63,10}, \
+ { 255, 9}, { 511, 8}, { 1023,10}, { 271, 9}, \
+ { 543, 8}, { 1087,10}, { 287, 9}, { 575, 8}, \
+ { 1151,11}, { 159,10}, { 319, 9}, { 639,11}, \
+ { 175,12}, { 95,11}, { 191,10}, { 383, 9}, \
+ { 767,13}, { 63,12}, { 127,11}, { 255,10}, \
+ { 511, 9}, { 1023,11}, { 271,10}, { 543, 9}, \
+ { 1087, 8}, { 2175,10}, { 575, 9}, { 1151,11}, \
+ { 303,12}, { 159,11}, { 319,10}, { 639, 9}, \
+ { 1279,11}, { 351,10}, { 703, 9}, { 1407,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831, 9}, { 1663,12}, { 223,11}, { 447,10}, \
+ { 895,11}, { 479,10}, { 959, 9}, { 1919,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 111
+#define SQR_FFT_THRESHOLD 1856
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 76
+#define MULLO_MUL_N_THRESHOLD 4292
+#define SQRLO_BASECASE_THRESHOLD 6
+#define SQRLO_DC_THRESHOLD 186
+#define SQRLO_SQR_THRESHOLD 3688
+
+#define DC_DIV_QR_THRESHOLD 67
+#define DC_DIVAPPR_Q_THRESHOLD 242
+#define DC_BDIV_QR_THRESHOLD 68
+#define DC_BDIV_Q_THRESHOLD 129
+
+#define INV_MULMOD_BNM1_THRESHOLD 82
+#define INV_NEWTON_THRESHOLD 157
+#define INV_APPR_THRESHOLD 157
+
+#define BINV_NEWTON_THRESHOLD 99
+#define REDC_1_TO_REDC_N_THRESHOLD 68
+
+#define MU_DIV_QR_THRESHOLD 979
+#define MU_DIVAPPR_Q_THRESHOLD 1210
+#define MUPI_DIV_QR_THRESHOLD 76
+#define MU_BDIV_QR_THRESHOLD 942
+#define MU_BDIV_Q_THRESHOLD 1341
+
+#define POWM_SEC_TABLE 11,75,137,712,2177
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 18
+#define SET_STR_DC_THRESHOLD 632
+#define SET_STR_PRECOMPUTE_THRESHOLD 1215
+
+#define FAC_DSC_THRESHOLD 252
+#define FAC_ODD_THRESHOLD 0 /* always */
+
+#define MATRIX22_STRASSEN_THRESHOLD 9
+#define HGCD2_DIV1_METHOD 1 /* 8.52% faster than 3 */
+#define HGCD_THRESHOLD 131
+#define HGCD_APPR_THRESHOLD 144
+#define HGCD_REDUCE_THRESHOLD 1962
+#define GCD_DC_THRESHOLD 435
+#define GCDEXT_DC_THRESHOLD 199
+#define JACOBI_BASE_METHOD 4 /* 0.80% faster than 1 */
diff --git a/gmp-6.3.0/mpn/arm64/applem1/sqr_basecase.asm b/gmp-6.3.0/mpn/arm64/applem1/sqr_basecase.asm
new file mode 100644
index 0000000..22246cf
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/applem1/sqr_basecase.asm
@@ -0,0 +1,318 @@
+dnl ARM64 mpn_sqr_basecase
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2020 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl TODO
+dnl * Replace the mul_1 code with less scheduled and thus simpler code. If
+dnl we base it on the addmul_1 loop, the corner code could benefit from
+dnl similar incoming register state, which could eliminate some loads.
+dnl * Handle n = 4 early.
+dnl * Duplicate addmul loop into 4 loops which fall into each other. Perhaps
+dnl stick to one mul_1 loop, but do the (mod 4) stuff at its end instead of
+dnl its beginning.
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n', `x2')
+
+define(`v0', `x3')
+define(`CY', `x17')
+
+PROLOGUE(mpn_sqr_basecase)
+ cmp n, #3
+ b.ls L(le3)
+
+ ldr v0, [up],#8
+ sub n, n, #1
+ mul x6, v0, v0
+ umulh x4, v0, v0
+ str x6, [rp],#8
+ lsl v0, v0, 1
+ lsl n, n, #3
+ lsr x16, n, #5
+ tbnz n, #3, L(mbx1)
+
+L(mbx0):adds x11, x4, xzr C move and clear cy
+ tbz n, #4, L(mb00)
+
+L(mb10):ldp x4, x5, [up],#16
+ mul x8, x4, v0
+ umulh x10, x4, v0
+ cbz x16, L(m2e)
+ ldp x6, x7, [up],#16
+ mul x9, x5, v0
+ b L(mmid)-8
+
+L(mbx1):ldr x7, [up],#8
+ mul x9, x7, v0
+ umulh x11, x7, v0
+ adds x9, x9, x4
+ str x9, [rp],#8
+ tbnz n, #4, L(mb10)
+L(mb00):ldp x6, x7, [up],#16
+ mul x8, x6, v0
+ umulh x10, x6, v0
+ ldp x4, x5, [up],#16
+ mul x9, x7, v0
+ adcs x12, x8, x11
+ umulh x11, x7, v0
+ sub x16, x16, #1
+ cbz x16, L(mend)
+
+ ALIGN(16)
+L(mtop):mul x8, x4, v0
+ ldp x6, x7, [up],#16
+ adcs x13, x9, x10
+ umulh x10, x4, v0
+ mul x9, x5, v0
+ stp x12, x13, [rp],#16
+ adcs x12, x8, x11
+ umulh x11, x5, v0
+L(mmid):mul x8, x6, v0
+ ldp x4, x5, [up],#16
+ adcs x13, x9, x10
+ umulh x10, x6, v0
+ mul x9, x7, v0
+ stp x12, x13, [rp],#16
+ adcs x12, x8, x11
+ umulh x11, x7, v0
+ sub x16, x16, #1
+ cbnz x16, L(mtop)
+
+L(mend):mul x8, x4, v0
+ adcs x13, x9, x10
+ umulh x10, x4, v0
+ stp x12, x13, [rp],#16
+L(m2e): mul x9, x5, v0
+ adcs x12, x8, x11
+ umulh x11, x5, v0
+ adcs x13, x9, x10
+ stp x12, x13, [rp],#16
+ adc x11, x11, xzr
+ str x11, [rp],#8
+
+L(outer):
+ sub n, n, #8
+ sub rp, rp, n
+ sub up, up, n
+ ldp x6, x7, [up,#-16]
+ ldr v0, [rp,#-8]
+ and x8, x7, x6, asr 63
+ mul x9, x7, x7
+ adds v0, v0, x8
+ umulh x4, x7, x7
+ adc x4, x4, xzr
+ adds v0, v0, x9
+ str v0, [rp,#-8]
+ adc CY, x4, xzr
+ adds xzr, x6, x6
+ adc v0, x7, x7
+ cmp n, #16
+ beq L(cor2)
+
+ lsr x16, n, #5
+ tbz n, #3, L(bx0)
+
+L(bx1): ldr x4, [up],#8
+ mul x8, x4, v0
+ umulh x4, x4, v0
+ tbz n, #4, L(b01)
+
+L(b11): ldp x5, x6, [up],#16
+ ldp x12, x13, [rp]
+ ldr x14, [rp,#16]
+ mul x9, x5, v0
+ umulh x5, x5, v0
+ mul x10, x6, v0
+ umulh x6, x6, v0
+ adds x8, x12, x8
+ adcs x4, x13, x4
+ adcs x5, x14, x5
+ adc x6, x6, xzr
+ adds x8, x8, CY
+ adcs x4, x4, x9
+ adcs x5, x5, x10
+ adc CY, x6, xzr
+ stp x8, x4, [rp],#16
+ str x5, [rp],#8
+ cbnz x16, L(top)
+ b L(end)
+
+L(b01): ldr x12, [rp]
+ adds x8, x12, x8
+ adc x4, x4, xzr
+ adds x8, x8, CY
+ adc CY, x4, xzr
+ str x8, [rp],#8
+ b L(top)
+
+L(bx0): ldp x4, x5, [up],#16
+ tbz n, #4, L(top)+4
+
+L(b10): ldp x12, x13, [rp]
+ mul x8, x4, v0
+ umulh x4, x4, v0
+ mul x9, x5, v0
+ umulh x5, x5, v0
+ adds x8, x12, x8
+ adcs x4, x13, x4
+ adc x5, x5, xzr
+ adds x8, x8, CY
+ adcs x4, x4, x9
+ adc CY, x5, xzr
+ stp x8, x4, [rp],#16
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up],#16
+ ldp x6, x7, [up],#16
+ ldp x12, x13, [rp]
+ ldp x14, x15, [rp,#16]
+ mul x8, x4, v0
+ umulh x4, x4, v0
+ mul x9, x5, v0
+ umulh x5, x5, v0
+ mul x10, x6, v0
+ umulh x6, x6, v0
+ mul x11, x7, v0
+ umulh x7, x7, v0
+ adds x8, x12, x8
+ adcs x4, x13, x4
+ adcs x5, x14, x5
+ adcs x6, x15, x6
+ adc x7, x7, xzr
+ adds x8, x8, CY
+ adcs x4, x4, x9
+ adcs x5, x5, x10
+ adcs x6, x6, x11
+ adc CY, x7, xzr
+ stp x8, x4, [rp],#16
+ stp x5, x6, [rp],#16
+ sub x16, x16, #1
+ cbnz x16, L(top)
+
+L(end): str CY, [rp],#8
+ b L(outer)
+
+L(cor2):ldp x10, x11, [up]
+ ldp x12, x13, [rp]
+ mul x8, x10, v0
+ umulh x4, x10, v0
+ mul x9, x11, v0
+ umulh x5, x11, v0
+ adds x8, x12, x8
+ adcs x4, x13, x4
+ adc x5, x5, xzr
+ adds x8, x8, CY
+ adcs x13, x4, x9
+ adc x12, x5, xzr
+ str x8, [rp]
+ and x8, x10, x7, asr 63
+ mul x9, x10, x10
+ adds x13, x13, x8
+ umulh x4, x10, x10
+ adc x4, x4, xzr
+ adds x13, x13, x9
+ adc CY, x4, xzr
+ adds xzr, x7, x7
+ adc v0, x10, x10
+ mul x8, x11, v0
+ umulh x4, x11, v0
+ adds x8, x12, x8
+ adc x4, x4, xzr
+ adds x8, x8, CY
+ adc v0, x4, xzr
+ stp x13, x8, [rp,#8]
+ and x2, x11, x10, asr 63
+ mul x5, x11, x11
+ adds v0, v0, x2
+ umulh x4, x11, x11
+ adc x4, x4, xzr
+ adds v0, v0, x5
+ adc x4, x4, xzr
+ stp v0, x4, [rp,#24]
+ ret
+
+L(le3): ldr v0, [up]
+ mul x4, v0, v0 C W0
+ umulh x5, v0, v0 C W1
+ cmp n, #2
+ b.hs L(2o3)
+ stp x4, x5, [rp]
+ ret
+
+L(2o3): ldr x6, [up,#8]
+ mul x7, x6, x6 C W2
+ umulh x8, x6, x6 C W3
+ mul x9, v0, x6 C W1+1/64
+ umulh x10, v0, x6 C W2+1/64
+ b.hi L(3)
+ adds x5, x5, x9 C W1
+ adcs x7, x7, x10 C W2
+ adc x8, x8, xzr C W3
+ adds x5, x5, x9 C W1
+ adcs x7, x7, x10 C W2
+ adc x8, x8, xzr C W3
+ stp x4, x5, [rp]
+ stp x7, x8, [rp,#16]
+ ret
+
+L(3): ldr x11, [up,#16]
+ mul x12, x11, x11 C W4
+ umulh x13, x11, x11 C W5
+ mul x14, v0, x11 C W2+1/64
+ umulh x15, v0, x11 C W3+1/64
+ mul x16, x6, x11 C W3+1/64
+ umulh x17, x6, x11 C W4+1/64
+ adds x5, x5, x9
+ adcs x7, x7, x10
+ adcs x8, x8, x15
+ adcs x12, x12, x17
+ adc x13, x13, xzr
+ adds x5, x5, x9
+ adcs x7, x7, x10
+ adcs x8, x8, x15
+ adcs x12, x12, x17
+ adc x13, x13, xzr
+ adds x7, x7, x14
+ adcs x8, x8, x16
+ adcs x12, x12, xzr
+ adc x13, x13, xzr
+ adds x7, x7, x14
+ adcs x8, x8, x16
+ adcs x12, x12, xzr
+ adc x13, x13, xzr
+ stp x4, x5, [rp]
+ stp x7, x8, [rp,#16]
+ stp x12, x13, [rp,#32]
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/arm64-defs.m4 b/gmp-6.3.0/mpn/arm64/arm64-defs.m4
new file mode 100644
index 0000000..46149f7
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/arm64-defs.m4
@@ -0,0 +1,53 @@
+divert(-1)
+
+dnl m4 macros for ARM64 ELF assembler.
+
+dnl Copyright 2020 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl Standard commenting is with @, the default m4 # is for constants and we
+dnl don't want to disable macro expansions in or after them.
+
+changecom
+
+
+dnl LEA_HI(reg,gmp_symbol), LEA_LO(reg,gmp_symbol)
+dnl
+dnl Load the address of gmp_symbol into a register. We split this into two
+dnl parts to allow separation for manual insn scheduling.
+
+ifdef(`PIC',`dnl
+define(`LEA_HI', `adrp $1, :got:$2')dnl
+define(`LEA_LO', `ldr $1, [$1, #:got_lo12:$2]')dnl
+',`dnl
+define(`LEA_HI', `adrp $1, $2')dnl
+define(`LEA_LO', `add $1, $1, :lo12:$2')dnl
+')dnl
+
+divert`'dnl
diff --git a/gmp-6.3.0/mpn/arm64/bdiv_dbm1c.asm b/gmp-6.3.0/mpn/arm64/bdiv_dbm1c.asm
new file mode 100644
index 0000000..78984b4
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/bdiv_dbm1c.asm
@@ -0,0 +1,111 @@
+dnl ARM64 mpn_bdiv_dbm1c.
+
+dnl Copyright 2008, 2011, 2012, 2014 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 8
+C Cortex-A57 7
+C X-Gene 4.25
+
+define(`qp', `x0')
+define(`up', `x1')
+define(`n', `x2')
+define(`bd', `x3')
+define(`cy', `x4')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+ ldr x5, [up], #8
+ ands x6, n, #3
+ b.eq L(fi0)
+ cmp x6, #2
+ b.cc L(fi1)
+ b.eq L(fi2)
+
+L(fi3): mul x12, x5, bd
+ umulh x13, x5, bd
+ ldr x5, [up], #8
+ b L(lo3)
+
+L(fi0): mul x10, x5, bd
+ umulh x11, x5, bd
+ ldr x5, [up], #8
+ b L(lo0)
+
+L(fi1): subs n, n, #1
+ mul x12, x5, bd
+ umulh x13, x5, bd
+ b.ls L(wd1)
+ ldr x5, [up], #8
+ b L(lo1)
+
+L(fi2): mul x10, x5, bd
+ umulh x11, x5, bd
+ ldr x5, [up], #8
+ b L(lo2)
+
+L(top): ldr x5, [up], #8
+ subs x4, x4, x10
+ str x4, [qp], #8
+ sbc x4, x4, x11
+L(lo1): mul x10, x5, bd
+ umulh x11, x5, bd
+ ldr x5, [up], #8
+ subs x4, x4, x12
+ str x4, [qp], #8
+ sbc x4, x4, x13
+L(lo0): mul x12, x5, bd
+ umulh x13, x5, bd
+ ldr x5, [up], #8
+ subs x4, x4, x10
+ str x4, [qp], #8
+ sbc x4, x4, x11
+L(lo3): mul x10, x5, bd
+ umulh x11, x5, bd
+ ldr x5, [up], #8
+ subs x4, x4, x12
+ str x4, [qp], #8
+ sbc x4, x4, x13
+L(lo2): subs n, n, #4
+ mul x12, x5, bd
+ umulh x13, x5, bd
+ b.hi L(top)
+
+L(wd2): subs x4, x4, x10
+ str x4, [qp], #8
+ sbc x4, x4, x11
+L(wd1): subs x4, x4, x12
+ str x4, [qp]
+ sbc x0, x4, x13
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/bdiv_q_1.asm b/gmp-6.3.0/mpn/arm64/bdiv_q_1.asm
new file mode 100644
index 0000000..7fffc93
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/bdiv_q_1.asm
@@ -0,0 +1,122 @@
+dnl ARM64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C norm unorm
+C Cortex-A53 12 15
+C Cortex-A57 12 12
+C Cortex-A72
+C Cortex-A73
+C X-Gene 11 11
+
+C TODO
+C * Scheduling of umulh later in the unorm loop brings A53 time to 12 c/l.
+C Unfortunately, that requires software pipelining.
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n', `x2')
+define(`d', `x3')
+define(`di', `x4') C just mpn_pi1_bdiv_q_1
+define(`cnt', `x5') C just mpn_pi1_bdiv_q_1
+
+define(`cy', `r7')
+define(`tnc', `x8')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_q_1)
+
+ rbit x6, d
+ clz cnt, x6
+ lsr d, d, cnt
+
+ LEA_HI( x7, binvert_limb_table)
+ ubfx x6, d, 1, 7
+ LEA_LO( x7, binvert_limb_table)
+ ldrb w6, [x7, x6]
+ ubfiz x7, x6, 1, 8
+ umull x6, w6, w6
+ msub x6, x6, d, x7
+ lsl x7, x6, 1
+ mul x6, x6, x6
+ msub x6, x6, d, x7
+ lsl x7, x6, 1
+ mul x6, x6, x6
+ msub di, x6, d, x7
+
+ b GSYM_PREFIX`'mpn_pi1_bdiv_q_1
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+ sub n, n, #1
+ subs x6, x6, x6 C clear r6 and C flag
+ ldr x9, [up],#8
+ cbz cnt, L(norm)
+
+L(unorm):
+ lsr x12, x9, cnt
+ cbz n, L(eu1)
+ sub tnc, xzr, cnt
+
+L(tpu): ldr x9, [up],#8
+ lsl x7, x9, tnc
+ orr x7, x7, x12
+ sbcs x6, x7, x6
+ mul x7, x6, di
+ str x7, [rp],#8
+ lsr x12, x9, cnt
+ umulh x6, x7, d
+ sub n, n, #1
+ cbnz n, L(tpu)
+
+L(eu1): sbcs x6, x12, x6
+ mul x6, x6, di
+ str x6, [rp]
+ ret
+
+L(norm):
+ mul x5, x9, di
+ str x5, [rp],#8
+ cbz n, L(en1)
+
+L(tpn): ldr x9, [up],#8
+ umulh x5, x5, d
+ sbcs x5, x9, x5
+ mul x5, x5, di
+ str x5, [rp],#8
+ sub n, n, #1
+ cbnz n, L(tpn)
+
+L(en1): ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/cnd_aors_n.asm b/gmp-6.3.0/mpn/arm64/cnd_aors_n.asm
new file mode 100644
index 0000000..397aa51
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/cnd_aors_n.asm
@@ -0,0 +1,129 @@
+dnl ARM64 mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 3.87-4.37
+C Cortex-A57 1.75
+C X-Gene 2.0
+
+changecom(blah)
+
+define(`cnd', `x0')
+define(`rp', `x1')
+define(`up', `x2')
+define(`vp', `x3')
+define(`n', `x4')
+
+ifdef(`OPERATION_cnd_add_n', `
+ define(`ADDSUBC', adcs)
+ define(`CLRCY', `cmn xzr, xzr')
+ define(`RETVAL', `cset x0, cs')
+ define(`func', mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+ define(`ADDSUBC', sbcs)
+ define(`CLRCY', `cmp xzr, xzr')
+ define(`RETVAL', `cset x0, cc')
+ define(`func', mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ cmp cnd, #1
+ sbc cnd, cnd, cnd
+
+ CLRCY
+
+ lsr x17, n, #2
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x13, [vp]
+ ldr x11, [up]
+ bic x7, x13, cnd
+ ADDSUBC x9, x11, x7
+ str x9, [rp]
+ tbnz n, #1, L(b11)
+
+L(b01): cbz x17, L(rt)
+ ldp x12, x13, [vp,#8]
+ ldp x10, x11, [up,#8]
+ sub up, up, #8
+ sub vp, vp, #8
+ sub rp, rp, #24
+ b L(mid)
+
+L(b11): ldp x12, x13, [vp,#8]!
+ ldp x10, x11, [up,#8]!
+ sub rp, rp, #8
+ cbz x17, L(end)
+ b L(top)
+
+L(bx0): ldp x12, x13, [vp]
+ ldp x10, x11, [up]
+ tbnz n, #1, L(b10)
+
+L(b00): sub up, up, #16
+ sub vp, vp, #16
+ sub rp, rp, #32
+ b L(mid)
+
+L(b10): sub rp, rp, #16
+ cbz x17, L(end)
+
+ ALIGN(16)
+L(top): bic x6, x12, cnd
+ bic x7, x13, cnd
+ ldp x12, x13, [vp,#16]
+ ADDSUBC x8, x10, x6
+ ADDSUBC x9, x11, x7
+ ldp x10, x11, [up,#16]
+ stp x8, x9, [rp,#16]
+L(mid): bic x6, x12, cnd
+ bic x7, x13, cnd
+ ldp x12, x13, [vp,#32]!
+ ADDSUBC x8, x10, x6
+ ADDSUBC x9, x11, x7
+ ldp x10, x11, [up,#32]!
+ stp x8, x9, [rp,#32]!
+ sub x17, x17, #1
+ cbnz x17, L(top)
+
+L(end): bic x6, x12, cnd
+ bic x7, x13, cnd
+ ADDSUBC x8, x10, x6
+ ADDSUBC x9, x11, x7
+ stp x8, x9, [rp,#16]
+L(rt): RETVAL
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/com.asm b/gmp-6.3.0/mpn/arm64/com.asm
new file mode 100644
index 0000000..d594943
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/com.asm
@@ -0,0 +1,92 @@
+dnl ARM64 mpn_com.
+
+dnl Copyright 2013, 2020 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53
+C Cortex-A55
+C Cortex-A57
+C Cortex-A72
+C Cortex-A73
+C X-Gene
+C Apple M1
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n', `x2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+ cmp n, #3
+ b.le L(bc)
+
+C Copy until rp is 128-bit aligned
+ tbz rp, #3, L(al2)
+ ldr x4, [up],#8
+ sub n, n, #1
+ mvn x4, x4
+ str x4, [rp],#8
+
+L(al2): ldp x4,x5, [up],#16
+ sub n, n, #6
+ tbnz n, #63, L(end)
+
+ ALIGN(16)
+L(top): ldp x6,x7, [up],#32
+ mvn x4, x4
+ mvn x5, x5
+ stp x4,x5, [rp],#32
+ ldp x4,x5, [up,#-16]
+ mvn x6, x6
+ mvn x7, x7
+ stp x6,x7, [rp,#-16]
+ sub n, n, #4
+ tbz n, #63, L(top)
+
+L(end): mvn x4, x4
+ mvn x5, x5
+ stp x4,x5, [rp],#16
+
+C Copy last 0-3 limbs. Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc): tbz n, #1, L(tl1)
+ ldp x4,x5, [up],#16
+ mvn x4, x4
+ mvn x5, x5
+ stp x4,x5, [rp],#16
+L(tl1): tbz n, #0, L(tl2)
+ ldr x4, [up]
+ mvn x4, x4
+ str x4, [rp]
+L(tl2): ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/copyd.asm b/gmp-6.3.0/mpn/arm64/copyd.asm
new file mode 100644
index 0000000..d542970
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/copyd.asm
@@ -0,0 +1,85 @@
+dnl ARM64 mpn_copyd.
+
+dnl Copyright 2013, 2020 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 1.8
+C Cortex-A55 1.28
+C Cortex-A57
+C Cortex-A72 1
+C Cortex-A73 1.1-1.35 (alignment dependent)
+C X-Gene 1
+C Apple M1 0.31
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n', `x2')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+ add rp, rp, n, lsl #3
+ add up, up, n, lsl #3
+
+ cmp n, #3
+ b.le L(bc)
+
+C Copy until rp is 128-bit aligned
+ tbz rp, #3, L(al2)
+ ldr x4, [up,#-8]!
+ sub n, n, #1
+ str x4, [rp,#-8]!
+
+L(al2): ldp x4,x5, [up,#-16]!
+ sub n, n, #6
+ tbnz n, #63, L(end)
+
+ ALIGN(16)
+L(top): ldp x6,x7, [up,#-16]
+ stp x4,x5, [rp,#-16]
+ ldp x4,x5, [up,#-32]!
+ stp x6,x7, [rp,#-32]!
+ sub n, n, #4
+ tbz n, #63, L(top)
+
+L(end): stp x4,x5, [rp,#-16]!
+
+C Copy last 0-3 limbs. Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc): tbz n, #1, L(tl1)
+ ldp x4,x5, [up,#-16]!
+ stp x4,x5, [rp,#-16]!
+L(tl1): tbz n, #0, L(tl2)
+ ldr x4, [up,#-8]
+ str x4, [rp,#-8]
+L(tl2): ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/copyi.asm b/gmp-6.3.0/mpn/arm64/copyi.asm
new file mode 100644
index 0000000..0de40c5
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/copyi.asm
@@ -0,0 +1,82 @@
+dnl ARM64 mpn_copyi.
+
+dnl Copyright 2013, 2020 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 1.8
+C Cortex-A55 1.28
+C Cortex-A57
+C Cortex-A72 1
+C Cortex-A73 1.1-1.35 (alignment dependent)
+C X-Gene 1
+C Apple M1 0.31
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n', `x2')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+ cmp n, #3
+ b.le L(bc)
+
+C Copy until rp is 128-bit aligned
+ tbz rp, #3, L(al2)
+ ldr x4, [up],#8
+ sub n, n, #1
+ str x4, [rp],#8
+
+L(al2): ldp x4,x5, [up],#16
+ sub n, n, #6
+ tbnz n, #63, L(end)
+
+ ALIGN(16)
+L(top): ldp x6,x7, [up],#32
+ stp x4,x5, [rp],#32
+ ldp x4,x5, [up,#-16]
+ stp x6,x7, [rp,#-16]
+ sub n, n, #4
+ tbz n, #63, L(top)
+
+L(end): stp x4,x5, [rp],#16
+
+C Copy last 0-3 limbs. Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc): tbz n, #1, L(tl1)
+ ldp x4,x5, [up],#16
+ stp x4,x5, [rp],#16
+L(tl1): tbz n, #0, L(tl2)
+ ldr x4, [up]
+ str x4, [rp]
+L(tl2): ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/cora53/cnd_aors_n.asm b/gmp-6.3.0/mpn/arm64/cora53/cnd_aors_n.asm
new file mode 100644
index 0000000..1b227da
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/cora53/cnd_aors_n.asm
@@ -0,0 +1,99 @@
+dnl ARM64 mpn_cnd_add_n, mpn_cnd_sub_n
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 3.5-4
+C Cortex-A57 2.25
+C X-Gene 3.5
+
+changecom(blah)
+
+define(`cnd', `x0')
+define(`rp', `x1')
+define(`up', `x2')
+define(`vp', `x3')
+define(`n', `x4')
+
+ifdef(`OPERATION_cnd_add_n', `
+ define(`ADDSUBC', adcs)
+ define(`CLRCY', `cmn xzr, xzr')
+ define(`RETVAL', `cset x0, cs')
+ define(`func', mpn_cnd_add_n)')
+ifdef(`OPERATION_cnd_sub_n', `
+ define(`ADDSUBC', sbcs)
+ define(`CLRCY', `cmp xzr, xzr')
+ define(`RETVAL', `cset x0, cc')
+ define(`func', mpn_cnd_sub_n)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ cmp cnd, #1
+ sbc cnd, cnd, cnd
+
+ CLRCY C really only needed for n = 0 (mod 4)
+
+ tbz n, #0, L(1)
+ ldr x10, [up], #8
+ ldr x12, [vp], #8
+ bic x6, x12, cnd
+ ADDSUBC x8, x10, x6
+ sub n, n, #1
+ str x8, [rp], #8
+ cbz n, L(rt)
+
+L(1): ldp x10, x11, [up], #16
+ ldp x12, x13, [vp], #16
+ sub n, n, #2
+ cbz n, L(end)
+
+L(top): bic x6, x12, cnd
+ bic x7, x13, cnd
+ ldp x12, x13, [vp], #16
+ ADDSUBC x8, x10, x6
+ ADDSUBC x9, x11, x7
+ ldp x10, x11, [up], #16
+ sub n, n, #2
+ stp x8, x9, [rp], #16
+ cbnz n, L(top)
+
+L(end): bic x6, x12, cnd
+ bic x7, x13, cnd
+ ADDSUBC x8, x10, x6
+ ADDSUBC x9, x11, x7
+ stp x8, x9, [rp]
+L(rt): RETVAL
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/cora53/gmp-mparam.h b/gmp-6.3.0/mpn/arm64/cora53/gmp-mparam.h
new file mode 100644
index 0000000..f4e258d
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/cora53/gmp-mparam.h
@@ -0,0 +1,242 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file for a53.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 1536 MHz Cortex-A53 */
+/* FFT tuning limit = 21,583,800 */
+/* Generated by tuneup.c, 2019-10-22, gcc 5.4 */
+
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 4
+#define MOD_1_1P_METHOD 2 /* 4.84% faster than 1 */
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 18
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 22
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_1N_PI1_METHOD 1 /* 39.05% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD 21
+#define DIV_QR_1_UNNORM_THRESHOLD 21
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 38
+
+#define DIV_1_VS_MUL_1_PERCENT 161
+
+#define MUL_TOOM22_THRESHOLD 14
+#define MUL_TOOM33_THRESHOLD 49
+#define MUL_TOOM44_THRESHOLD 73
+#define MUL_TOOM6H_THRESHOLD 173
+#define MUL_TOOM8H_THRESHOLD 236
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 77
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 88
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 65
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 18
+#define SQR_TOOM3_THRESHOLD 68
+#define SQR_TOOM4_THRESHOLD 183
+#define SQR_TOOM6_THRESHOLD 230
+#define SQR_TOOM8_THRESHOLD 357
+
+#define MULMID_TOOM42_THRESHOLD 23
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 11
+
+#define MUL_FFT_MODF_THRESHOLD 316 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 316, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 13, 7}, { 7, 6}, { 15, 7}, { 8, 6}, \
+ { 17, 7}, { 9, 6}, { 19, 7}, { 17, 8}, \
+ { 9, 7}, { 20, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 9}, { 7, 8}, { 19, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 49, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \
+ { 15,10}, { 31, 9}, { 71,10}, { 39, 9}, \
+ { 83,10}, { 47, 9}, { 99,10}, { 55,11}, \
+ { 31,10}, { 63, 8}, { 255,10}, { 71, 8}, \
+ { 287,10}, { 79, 9}, { 159, 8}, { 319,10}, \
+ { 87,11}, { 47,10}, { 95, 9}, { 191, 8}, \
+ { 383,10}, { 103, 9}, { 207, 8}, { 415,10}, \
+ { 111, 9}, { 223,12}, { 31,11}, { 63, 9}, \
+ { 255, 8}, { 511,10}, { 135, 9}, { 287, 8}, \
+ { 575,11}, { 79,10}, { 159, 9}, { 319, 8}, \
+ { 639,10}, { 175, 9}, { 351, 8}, { 703,11}, \
+ { 95,10}, { 191, 9}, { 383, 8}, { 767,10}, \
+ { 207, 9}, { 415, 8}, { 831,10}, { 223, 9}, \
+ { 447,12}, { 63,10}, { 255, 9}, { 511, 8}, \
+ { 1023, 9}, { 543,10}, { 287, 9}, { 575, 8}, \
+ { 1151,11}, { 159,10}, { 319, 9}, { 639,11}, \
+ { 175,10}, { 351, 9}, { 703, 8}, { 1407,12}, \
+ { 95,11}, { 191,10}, { 383, 9}, { 767,11}, \
+ { 207,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447,13}, { 63,11}, { 255,10}, { 543,11}, \
+ { 287,10}, { 575, 9}, { 1151,12}, { 159,11}, \
+ { 319,10}, { 639,11}, { 351,10}, { 703, 9}, \
+ { 1407, 8}, { 2815,12}, { 191,11}, { 383,10}, \
+ { 767,11}, { 415,10}, { 831,12}, { 223,11}, \
+ { 447,10}, { 895,11}, { 479,10}, { 959, 9}, \
+ { 1919,12}, { 255,11}, { 511,10}, { 1023,11}, \
+ { 543,10}, { 1087,12}, { 287,11}, { 575,10}, \
+ { 1151,12}, { 319,11}, { 639,12}, { 351,11}, \
+ { 703,10}, { 1407, 9}, { 2815,13}, { 191,12}, \
+ { 383,11}, { 767,12}, { 415,11}, { 831,10}, \
+ { 1663,12}, { 447,11}, { 895,10}, { 1791,12}, \
+ { 479,11}, { 959,13}, { 255,12}, { 511,11}, \
+ { 1023,12}, { 543,11}, { 1087,12}, { 575,11}, \
+ { 1151,13}, { 319,12}, { 703,11}, { 1407,10}, \
+ { 2815,13}, { 383,12}, { 831,11}, { 1663,13}, \
+ { 447,12}, { 895,11}, { 1791,12}, { 959,11}, \
+ { 1919,14}, { 255,13}, { 511,12}, { 1087,13}, \
+ { 575,12}, { 1151,13}, { 703,12}, { 1407,11}, \
+ { 2815,14}, { 383,13}, { 831,12}, { 1663,13}, \
+ { 895,12}, { 1791,13}, { 959,12}, { 1919,15}, \
+ { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \
+ { 1215,14}, { 639,13}, { 1407,12}, { 2815,14}, \
+ { 767,13}, { 1663,14}, { 895,13}, { 1919,12}, \
+ { 3839,15}, { 511,14}, { 1023,13}, { 2175,14}, \
+ { 1151,13}, { 2431,12}, { 4863,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 217
+#define MUL_FFT_THRESHOLD 3200
+
+#define SQR_FFT_MODF_THRESHOLD 276 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 276, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 17, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
+ { 15, 7}, { 31, 8}, { 19, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
+ { 79,10}, { 47, 9}, { 95, 8}, { 191,10}, \
+ { 55,11}, { 31,10}, { 63, 8}, { 255,10}, \
+ { 71, 9}, { 143, 8}, { 287,10}, { 79, 9}, \
+ { 159,11}, { 47,10}, { 95, 9}, { 191, 8}, \
+ { 383, 7}, { 767,10}, { 103,12}, { 31,11}, \
+ { 63, 9}, { 255, 8}, { 511, 7}, { 1023,10}, \
+ { 143, 9}, { 287,11}, { 79,10}, { 159, 9}, \
+ { 319, 8}, { 639,10}, { 175, 9}, { 351, 8}, \
+ { 703,11}, { 95,10}, { 191, 9}, { 383, 8}, \
+ { 767,10}, { 207, 9}, { 415, 8}, { 831,10}, \
+ { 223, 9}, { 447,12}, { 63,10}, { 255, 9}, \
+ { 511, 8}, { 1023,11}, { 143,10}, { 287, 9}, \
+ { 575, 8}, { 1151,11}, { 159,10}, { 319, 9}, \
+ { 639,11}, { 175,10}, { 351, 9}, { 703,12}, \
+ { 95,11}, { 191,10}, { 383, 9}, { 767,11}, \
+ { 207,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447,13}, { 63,11}, { 255,10}, { 511, 9}, \
+ { 1023,11}, { 287,10}, { 575, 9}, { 1151,12}, \
+ { 159,11}, { 319,10}, { 639,11}, { 351,10}, \
+ { 703, 9}, { 1407,12}, { 191,11}, { 383,10}, \
+ { 767,11}, { 415,10}, { 831,12}, { 223,11}, \
+ { 447,10}, { 895,11}, { 479,10}, { 959,12}, \
+ { 255,11}, { 511,10}, { 1023,12}, { 287,11}, \
+ { 575,10}, { 1151,12}, { 319,11}, { 639,12}, \
+ { 351,11}, { 703,10}, { 1407,13}, { 191,12}, \
+ { 383,11}, { 767,12}, { 415,11}, { 831,10}, \
+ { 1663,12}, { 447,11}, { 895,12}, { 479,11}, \
+ { 959,10}, { 1919,13}, { 255,12}, { 511,11}, \
+ { 1023,12}, { 543,11}, { 1087,12}, { 575,11}, \
+ { 1151,13}, { 319,12}, { 703,11}, { 1407,10}, \
+ { 2815,13}, { 383,12}, { 831,11}, { 1663,13}, \
+ { 447,12}, { 895,11}, { 1791,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \
+ { 1151,13}, { 703,12}, { 1407,11}, { 2815,14}, \
+ { 383,13}, { 831,12}, { 1663,13}, { 895,12}, \
+ { 1791,13}, { 959,12}, { 1919,15}, { 255,14}, \
+ { 511,13}, { 1087,12}, { 2175,13}, { 1151,14}, \
+ { 639,13}, { 1407,12}, { 2815,14}, { 767,13}, \
+ { 1663,14}, { 895,13}, { 1919,12}, { 3839,15}, \
+ { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
+ { 2431,12}, { 4863,14}, { 16384,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 204
+#define SQR_FFT_THRESHOLD 2688
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 38
+#define MULLO_MUL_N_THRESHOLD 6253
+#define SQRLO_BASECASE_THRESHOLD 4
+#define SQRLO_DC_THRESHOLD 67
+#define SQRLO_SQR_THRESHOLD 5240
+
+#define DC_DIV_QR_THRESHOLD 43
+#define DC_DIVAPPR_Q_THRESHOLD 155
+#define DC_BDIV_QR_THRESHOLD 39
+#define DC_BDIV_Q_THRESHOLD 89
+
+#define INV_MULMOD_BNM1_THRESHOLD 34
+#define INV_NEWTON_THRESHOLD 163
+#define INV_APPR_THRESHOLD 161
+
+#define BINV_NEWTON_THRESHOLD 196
+#define REDC_1_TO_REDC_N_THRESHOLD 43
+
+#define MU_DIV_QR_THRESHOLD 998
+#define MU_DIVAPPR_Q_THRESHOLD 998
+#define MUPI_DIV_QR_THRESHOLD 91
+#define MU_BDIV_QR_THRESHOLD 807
+#define MU_BDIV_Q_THRESHOLD 924
+
+#define POWM_SEC_TABLE 6,30,125,579,1730
+
+#define GET_STR_DC_THRESHOLD 15
+#define GET_STR_PRECOMPUTE_THRESHOLD 30
+#define SET_STR_DC_THRESHOLD 802
+#define SET_STR_PRECOMPUTE_THRESHOLD 1815
+
+#define FAC_DSC_THRESHOLD 258
+#define FAC_ODD_THRESHOLD 24
+
+#define MATRIX22_STRASSEN_THRESHOLD 10
+#define HGCD2_DIV1_METHOD 1 /* 7.05% faster than 3 */
+#define HGCD_THRESHOLD 107
+#define HGCD_APPR_THRESHOLD 112
+#define HGCD_REDUCE_THRESHOLD 1679
+#define GCD_DC_THRESHOLD 324
+#define GCDEXT_DC_THRESHOLD 242
+#define JACOBI_BASE_METHOD 4 /* 22.41% faster than 1 */
+
+/* Tuneup completed successfully, took 66624 seconds */
diff --git a/gmp-6.3.0/mpn/arm64/cora57/gmp-mparam.h b/gmp-6.3.0/mpn/arm64/cora57/gmp-mparam.h
new file mode 100644
index 0000000..e034f02
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/cora57/gmp-mparam.h
@@ -0,0 +1,188 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file for a57, a72-a75.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 1800 MHz Cortex-A72 */
+/* FFT tuning limit = 0.5 M */
+/* Generated by tuneup.c, 2019-10-02, gcc 7.4 */
+
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 4
+#define MOD_1_1P_METHOD 1 /* 2.21% faster than 2 */
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 42
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
+#define USE_PREINV_DIVREM_1 1
+/* From gcc117.osuosl.org, 2023-07-27 */
+#define DIV_QR_1N_PI1_METHOD 4 /* 8.57% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD 5
+#define DIV_QR_1_UNNORM_THRESHOLD 5
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 33
+
+#define DIV_1_VS_MUL_1_PERCENT 168
+
+#define MUL_TOOM22_THRESHOLD 10
+#define MUL_TOOM33_THRESHOLD 41
+#define MUL_TOOM44_THRESHOLD 99
+#define MUL_TOOM6H_THRESHOLD 142
+#define MUL_TOOM8H_THRESHOLD 199
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 63
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 66
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 55
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 18
+#define SQR_TOOM3_THRESHOLD 65
+#define SQR_TOOM4_THRESHOLD 166
+#define SQR_TOOM6_THRESHOLD 222
+#define SQR_TOOM8_THRESHOLD 309
+
+#define MULMID_TOOM42_THRESHOLD 22
+
+#define MULMOD_BNM1_THRESHOLD 7
+#define SQRMOD_BNM1_THRESHOLD 12
+
+#define MUL_FFT_MODF_THRESHOLD 276 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 276, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 13, 7}, { 7, 6}, { 15, 7}, { 8, 6}, \
+ { 17, 7}, { 9, 6}, { 19, 7}, { 13, 8}, \
+ { 7, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
+ { 21, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
+ { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
+ { 23, 8}, { 49, 9}, { 27,10}, { 15, 9}, \
+ { 39,10}, { 23, 9}, { 51,11}, { 15,10}, \
+ { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \
+ { 47, 9}, { 99,10}, { 55,11}, { 31,10}, \
+ { 63, 8}, { 255,10}, { 71, 9}, { 143, 8}, \
+ { 287,10}, { 79, 9}, { 159, 8}, { 319,11}, \
+ { 47,10}, { 95, 9}, { 191,10}, { 103,12}, \
+ { 31,11}, { 63, 9}, { 255, 8}, { 511,10}, \
+ { 143, 8}, { 575,11}, { 79,10}, { 159, 9}, \
+ { 319,10}, { 175, 9}, { 351, 8}, { 703,11}, \
+ { 95,10}, { 191, 9}, { 383,10}, { 207, 9}, \
+ { 415,10}, { 223, 9}, { 447, 8}, { 895,12}, \
+ { 63,10}, { 255, 9}, { 511, 8}, { 1023, 9}, \
+ { 543,11}, { 143,10}, { 287, 9}, { 575, 8}, \
+ { 1151,10}, { 319, 9}, { 639,11}, { 175,10}, \
+ { 351, 9}, { 703,12}, { 95,10}, { 383, 9}, \
+ { 767,11}, { 207, 9}, { 831,11}, { 223,10}, \
+ { 447, 9}, { 895,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 109
+#define MUL_FFT_THRESHOLD 3200
+
+#define SQR_FFT_MODF_THRESHOLD 244 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 244, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 8, 5}, { 17, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
+ { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
+ { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
+ { 27,10}, { 15, 9}, { 39,10}, { 23, 9}, \
+ { 47,11}, { 15,10}, { 31, 9}, { 67,10}, \
+ { 39, 9}, { 79,10}, { 47, 9}, { 99,10}, \
+ { 55,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255,10}, { 71, 8}, { 287, 7}, { 575, 9}, \
+ { 159, 8}, { 319,11}, { 47,10}, { 95, 9}, \
+ { 191, 8}, { 383,12}, { 31,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 143, 9}, { 287, 8}, \
+ { 575,11}, { 79,10}, { 159, 9}, { 319, 8}, \
+ { 639, 9}, { 351,10}, { 191, 9}, { 383,10}, \
+ { 207, 9}, { 415,10}, { 239,12}, { 63,10}, \
+ { 255, 9}, { 511,10}, { 271,11}, { 143,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 351, 9}, { 703,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 207,10}, { 415, 9}, \
+ { 831,11}, { 223,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 97
+#define SQR_FFT_THRESHOLD 2496
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 39
+#define MULLO_MUL_N_THRESHOLD 6253
+#define SQRLO_BASECASE_THRESHOLD 4
+#define SQRLO_DC_THRESHOLD 56
+#define SQRLO_SQR_THRESHOLD 4940
+
+#define DC_DIV_QR_THRESHOLD 41
+#define DC_DIVAPPR_Q_THRESHOLD 136
+#define DC_BDIV_QR_THRESHOLD 39
+#define DC_BDIV_Q_THRESHOLD 89
+
+#define INV_MULMOD_BNM1_THRESHOLD 22
+#define INV_NEWTON_THRESHOLD 154
+#define INV_APPR_THRESHOLD 141
+
+#define BINV_NEWTON_THRESHOLD 182
+#define REDC_1_TO_REDC_N_THRESHOLD 39
+
+#define MU_DIV_QR_THRESHOLD 979
+#define MU_DIVAPPR_Q_THRESHOLD 1078
+#define MUPI_DIV_QR_THRESHOLD 75
+#define MU_BDIV_QR_THRESHOLD 872
+#define MU_BDIV_Q_THRESHOLD 942
+
+#define POWM_SEC_TABLE 1,19,117,539,1730
+
+#define GET_STR_DC_THRESHOLD 10
+#define GET_STR_PRECOMPUTE_THRESHOLD 21
+#define SET_STR_DC_THRESHOLD 572
+#define SET_STR_PRECOMPUTE_THRESHOLD 1036
+
+#define FAC_DSC_THRESHOLD 142
+#define FAC_ODD_THRESHOLD 23
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD2_DIV1_METHOD 1 /* 8.83% faster than 3 */
+#define HGCD_THRESHOLD 80
+#define HGCD_APPR_THRESHOLD 70
+#define HGCD_REDUCE_THRESHOLD 1962
+#define GCD_DC_THRESHOLD 273
+#define GCDEXT_DC_THRESHOLD 198
+#define JACOBI_BASE_METHOD 1 /* 7.49% faster than 4 */
diff --git a/gmp-6.3.0/mpn/arm64/cora72/gmp-mparam.h b/gmp-6.3.0/mpn/arm64/cora72/gmp-mparam.h
new file mode 100644
index 0000000..fc66fd3
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/cora72/gmp-mparam.h
@@ -0,0 +1,242 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file for a72.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 1800 MHz Cortex-A72 */
+/* FFT tuning limit = 50,811,960 */
+/* Generated by tuneup.c, 2019-10-22, gcc 7.3 */
+
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 3
+#define MOD_1_1P_METHOD 2 /* 12.09% faster than 1 */
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 26
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_1N_PI1_METHOD 1 /* 13.42% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD 4
+#define DIV_QR_1_UNNORM_THRESHOLD 4
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 38
+
+#define DIV_1_VS_MUL_1_PERCENT 168
+
+#define MUL_TOOM22_THRESHOLD 8
+#define MUL_TOOM33_THRESHOLD 57
+#define MUL_TOOM44_THRESHOLD 153
+#define MUL_TOOM6H_THRESHOLD 222
+#define MUL_TOOM8H_THRESHOLD 333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 57
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 108
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 104
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 56
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 82
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 16
+#define SQR_TOOM3_THRESHOLD 73
+#define SQR_TOOM4_THRESHOLD 154
+#define SQR_TOOM6_THRESHOLD 206
+#define SQR_TOOM8_THRESHOLD 333
+
+#define MULMID_TOOM42_THRESHOLD 18
+
+#define MULMOD_BNM1_THRESHOLD 8
+#define SQRMOD_BNM1_THRESHOLD 10
+
+#define MUL_FFT_MODF_THRESHOLD 268 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 268, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \
+ { 15, 7}, { 13, 8}, { 7, 7}, { 16, 8}, \
+ { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 9}, { 7, 8}, { 15, 7}, { 31, 8}, \
+ { 19, 9}, { 11, 8}, { 27,10}, { 7, 9}, \
+ { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
+ { 27,10}, { 15, 9}, { 39,10}, { 23, 9}, \
+ { 51,11}, { 15,10}, { 31, 9}, { 71,10}, \
+ { 39, 9}, { 79, 8}, { 159, 7}, { 319, 9}, \
+ { 83,10}, { 47, 9}, { 95, 7}, { 383, 9}, \
+ { 99,10}, { 55,11}, { 31,10}, { 63, 8}, \
+ { 255, 7}, { 511, 9}, { 131,10}, { 71, 9}, \
+ { 143, 8}, { 287, 7}, { 575, 6}, { 1151,10}, \
+ { 79, 8}, { 319, 7}, { 639,10}, { 87, 8}, \
+ { 351,11}, { 47,10}, { 95, 8}, { 383, 7}, \
+ { 767,10}, { 103, 8}, { 415, 7}, { 831, 6}, \
+ { 1663, 9}, { 223, 8}, { 447,12}, { 31,11}, \
+ { 63, 9}, { 255, 8}, { 511, 7}, { 1023, 9}, \
+ { 287, 8}, { 575, 7}, { 1151, 6}, { 2303, 7}, \
+ { 1215,11}, { 79, 9}, { 319, 8}, { 639, 7}, \
+ { 1279, 9}, { 351, 8}, { 703, 7}, { 1407, 6}, \
+ { 2815, 9}, { 383, 8}, { 831, 7}, { 1663, 9}, \
+ { 447, 8}, { 895, 7}, { 1791, 6}, { 3583, 8}, \
+ { 959, 6}, { 3839, 5}, { 7679, 9}, { 511, 8}, \
+ { 1023, 7}, { 2175, 9}, { 575, 8}, { 1151, 7}, \
+ { 2303, 8}, { 1215,10}, { 351, 9}, { 703, 7}, \
+ { 3071, 8}, { 1663, 9}, { 895, 8}, { 1791, 7}, \
+ { 3583, 8}, { 1919, 6}, { 7679, 7}, { 3967, 9}, \
+ { 1023,10}, { 575, 9}, { 1151, 8}, { 2559,10}, \
+ { 703, 8}, { 2815, 9}, { 1471, 7}, { 5887,10}, \
+ { 767,11}, { 415, 9}, { 1791, 8}, { 3583,11}, \
+ { 479,10}, { 959, 8}, { 3967,11}, { 511, 9}, \
+ { 2175,10}, { 1151, 8}, { 4607, 9}, { 2815,10}, \
+ { 1471, 9}, { 2943,11}, { 767,10}, { 1535,11}, \
+ { 831,10}, { 1791,11}, { 959,10}, { 1919, 9}, \
+ { 3839, 8}, { 7679,10}, { 1983,12}, { 511,10}, \
+ { 2047,11}, { 1215,12}, { 639,11}, { 1407,10}, \
+ { 2815,11}, { 1471,12}, { 767,11}, { 1663,12}, \
+ { 895,11}, { 1791,12}, { 959,11}, { 1919,10}, \
+ { 3839,14}, { 255,13}, { 511,12}, { 1023,11}, \
+ { 2047,12}, { 1215,13}, { 639,12}, { 1279,13}, \
+ { 703,12}, { 1407,11}, { 2815,13}, { 767,12}, \
+ { 1535,13}, { 831,12}, { 1663,13}, { 895,12}, \
+ { 1791,11}, { 3583,13}, { 959,12}, { 1919,11}, \
+ { 3839,14}, { 511,13}, { 1023,12}, { 2047,13}, \
+ { 1215,12}, { 2431,14}, { 639,13}, { 1407,12}, \
+ { 2815,13}, { 1471,12}, { 2943,14}, { 767,13}, \
+ { 1535,12}, { 3071,13}, { 1791,12}, { 3583,13}, \
+ { 1919,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
+ { 2431,14}, { 1279,13}, { 2559,15}, { 767,14}, \
+ { 1791,13}, { 3839,15}, { 1023,14}, { 2431,13}, \
+ { 4863,15}, { 1279,14}, { 2943,15}, { 1535,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 218
+#define MUL_FFT_THRESHOLD 2688
+
+#define SQR_FFT_MODF_THRESHOLD 236 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 236, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 15, 7}, { 8, 6}, { 17, 7}, { 13, 8}, \
+ { 7, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
+ { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
+ { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
+ { 27,10}, { 15, 9}, { 39,10}, { 23, 9}, \
+ { 47,11}, { 15,10}, { 31, 9}, { 67,10}, \
+ { 39, 9}, { 79, 8}, { 159,10}, { 47, 9}, \
+ { 95, 8}, { 191, 7}, { 383,10}, { 55,11}, \
+ { 31,10}, { 63, 9}, { 127, 8}, { 255, 7}, \
+ { 511,10}, { 71, 9}, { 143, 8}, { 287, 7}, \
+ { 575,10}, { 79, 8}, { 319, 7}, { 639,11}, \
+ { 47,10}, { 95, 8}, { 383, 7}, { 767, 8}, \
+ { 415,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 255, 8}, { 543, 9}, { 287, 8}, { 575, 7}, \
+ { 1151, 9}, { 319, 8}, { 639, 9}, { 351, 8}, \
+ { 703, 7}, { 1407, 6}, { 2815,10}, { 191, 9}, \
+ { 383, 8}, { 767, 9}, { 415, 8}, { 831, 7}, \
+ { 1663,10}, { 223, 9}, { 447, 8}, { 895, 7}, \
+ { 1791, 9}, { 479, 8}, { 959,12}, { 63,11}, \
+ { 127, 9}, { 543, 8}, { 1087,10}, { 287, 9}, \
+ { 575, 8}, { 1151,10}, { 319, 9}, { 639,10}, \
+ { 351, 9}, { 703, 8}, { 1407, 7}, { 2815, 8}, \
+ { 1471, 5}, { 11775, 9}, { 767, 8}, { 1535,10}, \
+ { 415, 9}, { 895, 8}, { 1919, 6}, { 7679, 7}, \
+ { 3967,11}, { 255,10}, { 543, 9}, { 1087, 8}, \
+ { 2175,10}, { 575, 9}, { 1151, 8}, { 2431,10}, \
+ { 639, 9}, { 1279,10}, { 703, 9}, { 1407, 8}, \
+ { 2943,11}, { 383,10}, { 767,11}, { 447,10}, \
+ { 895,11}, { 479,10}, { 959, 9}, { 1919, 8}, \
+ { 3839,10}, { 1023, 9}, { 2175,10}, { 1215, 9}, \
+ { 2431,11}, { 703, 9}, { 2815,10}, { 1471,11}, \
+ { 767,10}, { 1663,11}, { 895,10}, { 1791,11}, \
+ { 959, 9}, { 3839,12}, { 511,11}, { 1087,10}, \
+ { 2175,11}, { 1215,10}, { 2431,12}, { 639,11}, \
+ { 1279,12}, { 703,11}, { 1471,12}, { 767,11}, \
+ { 1663,12}, { 895,11}, { 1919,10}, { 3839,13}, \
+ { 511,12}, { 1087,11}, { 2175,12}, { 1215,11}, \
+ { 2431,13}, { 639,12}, { 1279,13}, { 703,12}, \
+ { 1407,13}, { 767,12}, { 1535,13}, { 831,12}, \
+ { 1791,13}, { 1151,12}, { 2303,13}, { 1215,14}, \
+ { 639,12}, { 2559,13}, { 1407,14}, { 767,12}, \
+ { 3071,14}, { 895,13}, { 1919,12}, { 3839,14}, \
+ { 1023,13}, { 2175,14}, { 1151,12}, { 4607,14}, \
+ { 1279,13}, { 2559,14}, { 1407,13}, { 2943,15}, \
+ { 767,14}, { 1663,13}, { 3583,14}, { 1919,15}, \
+ { 1023,14}, { 2047,13}, { 4095,14}, { 2943,15}, \
+ { 1535,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 203
+#define SQR_FFT_THRESHOLD 2176
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 33
+#define MULLO_MUL_N_THRESHOLD 5240
+#define SQRLO_BASECASE_THRESHOLD 6
+#define SQRLO_DC_THRESHOLD 45
+#define SQRLO_SQR_THRESHOLD 4265
+
+#define DC_DIV_QR_THRESHOLD 38
+#define DC_DIVAPPR_Q_THRESHOLD 108
+#define DC_BDIV_QR_THRESHOLD 36
+#define DC_BDIV_Q_THRESHOLD 71
+
+#define INV_MULMOD_BNM1_THRESHOLD 14
+#define INV_NEWTON_THRESHOLD 132
+#define INV_APPR_THRESHOLD 124
+
+#define BINV_NEWTON_THRESHOLD 199
+#define REDC_1_TO_REDC_N_THRESHOLD 34
+
+#define MU_DIV_QR_THRESHOLD 979
+#define MU_DIVAPPR_Q_THRESHOLD 979
+#define MUPI_DIV_QR_THRESHOLD 61
+#define MU_BDIV_QR_THRESHOLD 734
+#define MU_BDIV_Q_THRESHOLD 942
+
+#define POWM_SEC_TABLE 6,30,110,579,1730
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 19
+#define SET_STR_DC_THRESHOLD 458
+#define SET_STR_PRECOMPUTE_THRESHOLD 875
+
+#define FAC_DSC_THRESHOLD 153
+#define FAC_ODD_THRESHOLD 24
+
+#define MATRIX22_STRASSEN_THRESHOLD 15
+#define HGCD2_DIV1_METHOD 1 /* 8.41% faster than 3 */
+#define HGCD_THRESHOLD 81
+#define HGCD_APPR_THRESHOLD 80
+#define HGCD_REDUCE_THRESHOLD 1494
+#define GCD_DC_THRESHOLD 268
+#define GCDEXT_DC_THRESHOLD 189
+#define JACOBI_BASE_METHOD 1 /* 10.80% faster than 4 */
+
+/* Tuneup completed successfully, took 96906 seconds */
diff --git a/gmp-6.3.0/mpn/arm64/cora73/gmp-mparam.h b/gmp-6.3.0/mpn/arm64/cora73/gmp-mparam.h
new file mode 100644
index 0000000..7fc7f4e
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/cora73/gmp-mparam.h
@@ -0,0 +1,225 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file for a73.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 1800 MHz Cortex-A72 */
+/* FFT tuning limit = 48,820,337 */
+/* Generated by tuneup.c, 2019-10-22, gcc 7.4 */
+
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 3
+#define MOD_1_1P_METHOD 1 /* 2.28% faster than 2 */
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 44
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 16
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_1N_PI1_METHOD 1 /* 35.13% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD 5
+#define DIV_QR_1_UNNORM_THRESHOLD 5
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 33
+
+#define DIV_1_VS_MUL_1_PERCENT 168
+
+#define MUL_TOOM22_THRESHOLD 10
+#define MUL_TOOM33_THRESHOLD 57
+#define MUL_TOOM44_THRESHOLD 89
+#define MUL_TOOM6H_THRESHOLD 141
+#define MUL_TOOM8H_THRESHOLD 199
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 61
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 65
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 66
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 58
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 18
+#define SQR_TOOM3_THRESHOLD 62
+#define SQR_TOOM4_THRESHOLD 166
+#define SQR_TOOM6_THRESHOLD 222
+#define SQR_TOOM8_THRESHOLD 309
+
+#define MULMID_TOOM42_THRESHOLD 22
+
+#define MULMOD_BNM1_THRESHOLD 8
+#define SQRMOD_BNM1_THRESHOLD 11
+
+#define MUL_FFT_MODF_THRESHOLD 276 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 276, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 15, 7}, { 8, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 13, 8}, { 7, 7}, { 17, 8}, \
+ { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 9}, { 7, 8}, { 19, 9}, { 11, 8}, \
+ { 27,10}, { 7, 9}, { 15, 8}, { 33, 9}, \
+ { 19, 8}, { 39, 9}, { 23, 8}, { 47, 9}, \
+ { 27,10}, { 15, 9}, { 43,10}, { 23, 9}, \
+ { 51,11}, { 15,10}, { 31, 9}, { 67,10}, \
+ { 39, 9}, { 83,10}, { 47, 9}, { 99,10}, \
+ { 55,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 9}, { 131,10}, { 71, 9}, { 143, 8}, \
+ { 287,10}, { 79, 9}, { 159, 8}, { 319,11}, \
+ { 47, 9}, { 191, 8}, { 383, 7}, { 767, 8}, \
+ { 415,12}, { 31,11}, { 63, 9}, { 255, 8}, \
+ { 511,10}, { 143, 9}, { 287, 8}, { 575,11}, \
+ { 79,10}, { 159, 9}, { 319,10}, { 175, 9}, \
+ { 351, 8}, { 703,11}, { 95,10}, { 191, 9}, \
+ { 383, 8}, { 767,10}, { 207, 9}, { 415,10}, \
+ { 223, 9}, { 447,12}, { 63,10}, { 255, 9}, \
+ { 511, 8}, { 1023, 9}, { 543,11}, { 143, 9}, \
+ { 575,10}, { 319, 9}, { 639,10}, { 351, 9}, \
+ { 703,12}, { 95,11}, { 191,10}, { 383,11}, \
+ { 207,10}, { 415,11}, { 223,10}, { 447, 9}, \
+ { 895,13}, { 63,11}, { 255,10}, { 511,11}, \
+ { 287,10}, { 575,12}, { 159,11}, { 319,10}, \
+ { 639,11}, { 351,10}, { 703, 9}, { 1407,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,12}, \
+ { 223,11}, { 447,10}, { 895,11}, { 479,10}, \
+ { 959,12}, { 255,11}, { 543,10}, { 1087,11}, \
+ { 575,12}, { 319,11}, { 639,12}, { 351,11}, \
+ { 703,13}, { 191,12}, { 383,11}, { 767,12}, \
+ { 415,11}, { 831,12}, { 447,11}, { 895,12}, \
+ { 479,13}, { 255,12}, { 511,11}, { 1023,12}, \
+ { 575,13}, { 319,12}, { 703,13}, { 383,12}, \
+ { 831,13}, { 447,12}, { 959,14}, { 255,13}, \
+ { 511,12}, { 1023,13}, { 575,12}, { 1151,13}, \
+ { 703,12}, { 1407,14}, { 383,13}, { 831,12}, \
+ { 1663,13}, { 959,15}, { 255,14}, { 511,13}, \
+ { 1151,14}, { 639,13}, { 1407,14}, { 767,13}, \
+ { 1663,14}, { 895,13}, { 1791,15}, { 511,14}, \
+ { 1023,13}, { 2047,14}, { 1151,13}, { 2431,14}, \
+ { 1407,15}, { 767,14}, { 1791,16}, { 511,15}, \
+ { 1023,14}, { 2431,15}, { 1279,14}, { 2815,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 185
+#define MUL_FFT_THRESHOLD 3200
+
+#define SQR_FFT_MODF_THRESHOLD 244 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 244, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 17, 7}, { 9, 6}, { 19, 7}, { 17, 8}, \
+ { 9, 7}, { 20, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 9}, { 7, 8}, { 19, 9}, { 11, 8}, \
+ { 25,10}, { 7, 9}, { 15, 8}, { 31, 9}, \
+ { 19, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \
+ { 15,10}, { 31, 9}, { 63,10}, { 39, 9}, \
+ { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
+ { 31,10}, { 63, 8}, { 255,10}, { 71, 9}, \
+ { 143, 8}, { 287,10}, { 79, 9}, { 159, 8}, \
+ { 319,11}, { 47,10}, { 95, 9}, { 191, 8}, \
+ { 383,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 287, 8}, { 575,11}, { 79,10}, { 159, 9}, \
+ { 319, 8}, { 639,10}, { 175, 9}, { 351, 8}, \
+ { 703,11}, { 95, 9}, { 383, 8}, { 767,10}, \
+ { 207, 9}, { 415,10}, { 223, 8}, { 895,10}, \
+ { 239,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 287, 9}, { 575,11}, { 159,10}, \
+ { 319, 9}, { 639,11}, { 175,10}, { 351, 9}, \
+ { 703,11}, { 191,10}, { 383,11}, { 207,10}, \
+ { 415,11}, { 223,10}, { 479,11}, { 255,10}, \
+ { 511,11}, { 287,10}, { 575,12}, { 159,11}, \
+ { 351,12}, { 191,11}, { 383,10}, { 767,12}, \
+ { 223,11}, { 447,10}, { 895,11}, { 479,13}, \
+ { 127,12}, { 255,11}, { 511,12}, { 287,10}, \
+ { 1151,12}, { 319,11}, { 639,12}, { 351,11}, \
+ { 703,13}, { 191,12}, { 383,11}, { 767,12}, \
+ { 415,11}, { 831,12}, { 447,11}, { 895,12}, \
+ { 479,11}, { 959,12}, { 511,11}, { 1023,12}, \
+ { 575,11}, { 1151,13}, { 319,12}, { 639,11}, \
+ { 1279,13}, { 383,12}, { 831,13}, { 447,12}, \
+ { 895,14}, { 255,13}, { 511,12}, { 1023,13}, \
+ { 703,14}, { 383,13}, { 831,12}, { 1663,13}, \
+ { 895,15}, { 255,14}, { 511,13}, { 1151,14}, \
+ { 639,13}, { 1407,14}, { 767,13}, { 1535,14}, \
+ { 895,15}, { 511,14}, { 1151,13}, { 2431,14}, \
+ { 1407,15}, { 767,14}, { 1791,16}, { 511,15}, \
+ { 1023,14}, { 2431,15}, { 1279,14}, { 2815,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 165
+#define SQR_FFT_THRESHOLD 2496
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 39
+#define MULLO_MUL_N_THRESHOLD 6253
+#define SQRLO_BASECASE_THRESHOLD 4
+#define SQRLO_DC_THRESHOLD 56
+#define SQRLO_SQR_THRESHOLD 4940
+
+#define DC_DIV_QR_THRESHOLD 36
+#define DC_DIVAPPR_Q_THRESHOLD 136
+#define DC_BDIV_QR_THRESHOLD 35
+#define DC_BDIV_Q_THRESHOLD 88
+
+#define INV_MULMOD_BNM1_THRESHOLD 30
+#define INV_NEWTON_THRESHOLD 149
+#define INV_APPR_THRESHOLD 139
+
+#define BINV_NEWTON_THRESHOLD 166
+#define REDC_1_TO_REDC_N_THRESHOLD 38
+
+#define MU_DIV_QR_THRESHOLD 1120
+#define MU_DIVAPPR_Q_THRESHOLD 1078
+#define MUPI_DIV_QR_THRESHOLD 68
+#define MU_BDIV_QR_THRESHOLD 889
+#define MU_BDIV_Q_THRESHOLD 942
+
+#define POWM_SEC_TABLE 4,22,102,473,1730
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 22
+#define SET_STR_DC_THRESHOLD 381
+#define SET_STR_PRECOMPUTE_THRESHOLD 1042
+
+#define FAC_DSC_THRESHOLD 140
+#define FAC_ODD_THRESHOLD 23
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD2_DIV1_METHOD 1 /* 7.84% faster than 3 */
+#define HGCD_THRESHOLD 80
+#define HGCD_APPR_THRESHOLD 80
+#define HGCD_REDUCE_THRESHOLD 1679
+#define GCD_DC_THRESHOLD 273
+#define GCDEXT_DC_THRESHOLD 201
+#define JACOBI_BASE_METHOD 1 /* 1.03% faster than 4 */
+
+/* Tuneup completed successfully, took 64972 seconds */
diff --git a/gmp-6.3.0/mpn/arm64/darwin.m4 b/gmp-6.3.0/mpn/arm64/darwin.m4
new file mode 100644
index 0000000..36e72fe
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/darwin.m4
@@ -0,0 +1,50 @@
+divert(-1)
+
+dnl m4 macros for ARM64 Darwin assembler.
+
+dnl Copyright 2020 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+dnl Standard commenting is with @, the default m4 # is for constants and we
+dnl don't want to disable macro expansions in or after them.
+
+changecom
+
+
+dnl LEA_HI(reg,gmp_symbol), LEA_LO(reg,gmp_symbol)
+dnl
+dnl Load the address of gmp_symbol into a register. We split this into two
+dnl parts to allow separation for manual insn scheduling. TODO: Darwin allows
+dnl for relaxing these two insns into an adr and a nop, but that requires the
+dnl .loh pseudo for connecting them.
+
+define(`LEA_HI',`adrp $1, $2@GOTPAGE')dnl
+define(`LEA_LO',`ldr $1, [$1, $2@GOTPAGEOFF]')dnl
+
+divert`'dnl
diff --git a/gmp-6.3.0/mpn/arm64/divrem_1.asm b/gmp-6.3.0/mpn/arm64/divrem_1.asm
new file mode 100644
index 0000000..9d5bb59
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/divrem_1.asm
@@ -0,0 +1,231 @@
+dnl ARM64 mpn_divrem_1 and mpn_preinv_divrem_1.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2020 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+dnl TODO
+dnl * Handle the most significant quotient limb for the unnormalised case
+dnl specially, just like in the C code. (It is very often 0.)
+
+define(`qp_arg', x0)
+define(`fn_arg', x1)
+define(`np_arg', x2)
+define(`n_arg', x3)
+define(`d_arg', x4)
+define(`dinv_arg', x5)
+define(`cnt_arg', x6)
+
+define(`qp', x19)
+define(`np', x20)
+define(`n', x21)
+define(`d', x22)
+define(`fn', x24)
+define(`dinv', x0)
+define(`cnt', x23)
+define(`tnc', x8)
+
+dnl mp_limb_t
+dnl mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
+dnl mp_srcptr np, mp_size_t n,
+dnl mp_limb_t d_unnorm)
+
+dnl mp_limb_t
+dnl mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
+dnl mp_srcptr np, mp_size_t n,
+dnl mp_limb_t d_unnorm, mp_limb_t dinv, int cnt)
+
+ASM_START()
+
+PROLOGUE(mpn_preinv_divrem_1)
+ cbz n_arg, L(fz)
+ stp x29, x30, [sp, #-80]!
+ mov x29, sp
+ stp x19, x20, [sp, #16]
+ stp x21, x22, [sp, #32]
+ stp x23, x24, [sp, #48]
+
+ sub n, n_arg, #1
+ add x7, n, fn_arg
+ add np, np_arg, n, lsl #3
+ add qp, qp_arg, x7, lsl #3
+ mov fn, fn_arg
+ mov d, d_arg
+ mov dinv, dinv_arg
+ tbnz d_arg, #63, L(nentry)
+ mov cnt, cnt_arg
+ b L(uentry)
+EPILOGUE()
+
+PROLOGUE(mpn_divrem_1)
+ cbz n_arg, L(fz)
+ stp x29, x30, [sp, #-80]!
+ mov x29, sp
+ stp x19, x20, [sp, #16]
+ stp x21, x22, [sp, #32]
+ stp x23, x24, [sp, #48]
+
+ sub n, n_arg, #1
+ add x7, n, fn_arg
+ add np, np_arg, n, lsl #3
+ add qp, qp_arg, x7, lsl #3
+ mov fn, fn_arg
+ mov d, d_arg
+ tbnz d_arg, #63, L(normalised)
+
+L(unnorm):
+ clz cnt, d
+ lsl x0, d, cnt
+ bl GSYM_PREFIX`'MPN(invert_limb)
+L(uentry):
+ lsl d, d, cnt
+ ldr x7, [np], #-8
+ sub tnc, xzr, cnt
+ lsr x11, x7, tnc C r
+ lsl x1, x7, cnt
+ cbz n, L(uend)
+
+L(utop):ldr x7, [np], #-8
+ add x2, x11, #1
+ mul x10, x11, dinv
+ umulh x17, x11, dinv
+ lsr x9, x7, tnc
+ orr x1, x1, x9
+ adds x10, x1, x10
+ adc x2, x2, x17
+ msub x11, d, x2, x1
+ lsl x1, x7, cnt
+ cmp x10, x11
+ add x14, x11, d
+ csel x11, x14, x11, cc
+ sbc x2, x2, xzr
+ cmp x11, d
+ bcs L(ufx)
+L(uok): str x2, [qp], #-8
+ sub n, n, #1
+ cbnz n, L(utop)
+
+L(uend):add x2, x11, #1
+ mul x10, x11, dinv
+ umulh x17, x11, dinv
+ adds x10, x1, x10
+ adc x2, x2, x17
+ msub x11, d, x2, x1
+ cmp x10, x11
+ add x14, x11, d
+ csel x11, x14, x11, cc
+ sbc x2, x2, xzr
+ subs x14, x11, d
+ adc x2, x2, xzr
+ csel x11, x14, x11, cs
+ str x2, [qp], #-8
+
+ cbnz fn, L(ftop)
+ lsr x0, x11, cnt
+ ldp x19, x20, [sp, #16]
+ ldp x21, x22, [sp, #32]
+ ldp x23, x24, [sp, #48]
+ ldp x29, x30, [sp], #80
+ ret
+
+L(ufx): add x2, x2, #1
+ sub x11, x11, d
+ b L(uok)
+
+
+L(normalised):
+ mov x0, d
+ bl GSYM_PREFIX`'MPN(invert_limb)
+L(nentry):
+ ldr x7, [np], #-8
+ subs x14, x7, d
+ adc x2, xzr, xzr C hi q limb
+ csel x11, x14, x7, cs
+ b L(nok)
+
+L(ntop):ldr x1, [np], #-8
+ add x2, x11, #1
+ mul x10, x11, dinv
+ umulh x17, x11, dinv
+ adds x10, x1, x10
+ adc x2, x2, x17
+ msub x11, d, x2, x1
+ cmp x10, x11
+ add x14, x11, d
+ csel x11, x14, x11, cc C remainder
+ sbc x2, x2, xzr
+ cmp x11, d
+ bcs L(nfx)
+L(nok): str x2, [qp], #-8
+ sub n, n, #1
+ tbz n, #63, L(ntop)
+
+L(nend):cbnz fn, L(frac)
+ mov x0, x11
+ ldp x19, x20, [sp, #16]
+ ldp x21, x22, [sp, #32]
+ ldp x23, x24, [sp, #48]
+ ldp x29, x30, [sp], #80
+ ret
+
+L(nfx): add x2, x2, #1
+ sub x11, x11, d
+ b L(nok)
+
+L(frac):mov cnt, #0
+L(ftop):add x2, x11, #1
+ mul x10, x11, dinv
+ umulh x17, x11, dinv
+ add x2, x2, x17
+ msub x11, d, x2, xzr
+ cmp x10, x11
+ add x14, x11, d
+ csel x11, x14, x11, cc C remainder
+ sbc x2, x2, xzr
+ str x2, [qp], #-8
+ sub fn, fn, #1
+ cbnz fn, L(ftop)
+
+ lsr x0, x11, cnt
+ ldp x19, x20, [sp, #16]
+ ldp x21, x22, [sp, #32]
+ ldp x23, x24, [sp, #48]
+ ldp x29, x30, [sp], #80
+ ret
+
+C Block zero. We need this for the degenerated case of n = 0, fn != 0.
+L(fz): cbz fn_arg, L(zend)
+L(ztop):str xzr, [qp_arg], #8
+ sub fn_arg, fn_arg, #1
+ cbnz fn_arg, L(ztop)
+L(zend):mov x0, #0
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/gcd_11.asm b/gmp-6.3.0/mpn/arm64/gcd_11.asm
new file mode 100644
index 0000000..d8cc3e2
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/gcd_11.asm
@@ -0,0 +1,70 @@
+dnl ARM v8a mpn_gcd_11.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjorn
+dnl Granlund.
+
+dnl Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+changecom(blah)
+
+C cycles/bit (approx)
+C Cortex-A35 ?
+C Cortex-A53 ?
+C Cortex-A55 ?
+C Cortex-A57 ?
+C Cortex-A72 ?
+C Cortex-A73 ?
+C Cortex-A75 ?
+C Cortex-A76 ?
+C Cortex-A77 ?
+C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
+
+define(`u0', `x0')
+define(`v0', `x1')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_11)
+ subs x3, u0, v0 C 0
+ b.eq L(end) C
+
+ ALIGN(16)
+L(top): rbit x12, x3 C 1,5
+ clz x12, x12 C 2
+ csneg x3, x3, x3, cs C v = abs(u-v), even 1
+ csel u0, v0, u0, cs C u = min(u,v) 1
+ lsr v0, x3, x12 C 3
+ subs x3, u0, v0 C 4
+ b.ne L(top) C
+
+L(end): ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/gcd_22.asm b/gmp-6.3.0/mpn/arm64/gcd_22.asm
new file mode 100644
index 0000000..5367fea
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/gcd_22.asm
@@ -0,0 +1,112 @@
+dnl ARM v8a mpn_gcd_22.
+
+dnl Copyright 2019 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+changecom(blah)
+
+C cycles/bit (approx)
+C Cortex-A35 ?
+C Cortex-A53 7.26
+C Cortex-A55 ?
+C Cortex-A57 ?
+C Cortex-A72 5.72
+C Cortex-A73 6.43
+C Cortex-A75 ?
+C Cortex-A76 ?
+C Cortex-A77 ?
+
+
+define(`u1', `x0')
+define(`u0', `x1')
+define(`v1', `x2')
+define(`v0', `x3')
+
+define(`t0', `x5')
+define(`t1', `x6')
+define(`cnt', `x7')
+define(`tnc', `x8')
+
+ASM_START()
+PROLOGUE(mpn_gcd_22)
+
+ ALIGN(16)
+L(top): subs t0, u0, v0 C 0 6
+ cbz t0, L(lowz)
+ sbcs t1, u1, v1 C 1 7
+
+ rbit cnt, t0 C 1
+
+ cneg t0, t0, cc C 2
+ cinv t1, t1, cc C 2 u = |u - v|
+L(bck): csel v0, v0, u0, cs C 2
+ csel v1, v1, u1, cs C 2 v = min(u,v)
+
+ clz cnt, cnt C 2
+ sub tnc, xzr, cnt C 3
+
+ lsr u0, t0, cnt C 3
+ lsl x14, t1, tnc C 4
+ lsr u1, t1, cnt C 3
+ orr u0, u0, x14 C 5
+
+ orr x11, u1, v1
+ cbnz x11, L(top)
+
+
+ subs x4, u0, v0 C 0
+ b.eq L(end1) C
+
+ ALIGN(16)
+L(top1):rbit x12, x4 C 1,5
+ clz x12, x12 C 2
+ csneg x4, x4, x4, cs C v = abs(u-v), even 1
+ csel u0, v0, u0, cs C u = min(u,v) 1
+ lsr v0, x4, x12 C 3
+ subs x4, u0, v0 C 4
+ b.ne L(top1) C
+L(end1):mov x0, u0
+ mov x1, #0
+ ret
+
+L(lowz):C We come here when v0 - u0 = 0
+ C 1. If v1 - u1 = 0, then gcd is u = v.
+ C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
+ subs t0, u1, v1
+ b.eq L(end)
+ mov t1, #0
+ rbit cnt, t0 C 1
+ cneg t0, t0, cc C 2
+ b L(bck) C FIXME: make conditional
+
+L(end): mov x0, v0
+ mov x1, v1
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/gmp-mparam.h b/gmp-6.3.0/mpn/arm64/gmp-mparam.h
new file mode 100644
index 0000000..7c0c193
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/gmp-mparam.h
@@ -0,0 +1,192 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 1536 MHz Cortex-A53 */
+/* FFT tuning limit = 0.5 M */
+/* Generated by tuneup.c, 2019-09-29, gcc 5.4 */
+
+#define DIVREM_1_NORM_THRESHOLD 3
+#define DIVREM_1_UNNORM_THRESHOLD 4
+#define MOD_1_1P_METHOD 2 /* 2.08% faster than 1 */
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 21
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_1N_PI1_METHOD 1 /* 38.26% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD 13
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 40
+
+#define DIV_1_VS_MUL_1_PERCENT 159
+
+#define MUL_TOOM22_THRESHOLD 14
+#define MUL_TOOM33_THRESHOLD 49
+#define MUL_TOOM44_THRESHOLD 82
+#define MUL_TOOM6H_THRESHOLD 173
+#define MUL_TOOM8H_THRESHOLD 236
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 76
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 74
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 18
+#define SQR_TOOM3_THRESHOLD 67
+#define SQR_TOOM4_THRESHOLD 166
+#define SQR_TOOM6_THRESHOLD 222
+#define SQR_TOOM8_THRESHOLD 333
+
+#define MULMID_TOOM42_THRESHOLD 20
+
+#define MULMOD_BNM1_THRESHOLD 10
+#define SQRMOD_BNM1_THRESHOLD 11
+
+#define MUL_FFT_MODF_THRESHOLD 316 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 316, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 13, 7}, { 7, 6}, { 15, 7}, { 8, 6}, \
+ { 17, 7}, { 9, 6}, { 19, 7}, { 17, 8}, \
+ { 9, 7}, { 20, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 9}, { 7, 8}, { 19, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 49, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \
+ { 15,10}, { 31, 9}, { 71,10}, { 39, 9}, \
+ { 83,10}, { 47, 9}, { 99,10}, { 55,11}, \
+ { 31,10}, { 63, 9}, { 127, 8}, { 255, 9}, \
+ { 131,10}, { 71, 8}, { 287,10}, { 79, 9}, \
+ { 159, 8}, { 319,10}, { 87,11}, { 47,10}, \
+ { 95, 9}, { 191, 8}, { 383,10}, { 103, 9}, \
+ { 207, 8}, { 415,10}, { 111, 9}, { 223,12}, \
+ { 31,11}, { 63, 9}, { 255, 8}, { 511,10}, \
+ { 135, 9}, { 287, 8}, { 575,11}, { 79,10}, \
+ { 159, 9}, { 319, 8}, { 639,10}, { 175, 9}, \
+ { 351, 8}, { 703,11}, { 95,10}, { 191, 9}, \
+ { 383, 8}, { 767,10}, { 207, 9}, { 415,11}, \
+ { 111,10}, { 223, 9}, { 447,12}, { 63,10}, \
+ { 255, 9}, { 511, 8}, { 1023, 9}, { 543,10}, \
+ { 287, 9}, { 575, 8}, { 1151,11}, { 159,10}, \
+ { 319, 9}, { 639,11}, { 175,10}, { 351, 9}, \
+ { 703, 8}, { 1407,12}, { 95,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 207,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 118
+#define MUL_FFT_THRESHOLD 3200
+
+#define SQR_FFT_MODF_THRESHOLD 272 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 272, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 8, 5}, { 17, 6}, { 17, 7}, { 17, 8}, \
+ { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 9}, { 7, 8}, { 15, 7}, { 31, 8}, \
+ { 19, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 33, 9}, { 19, 8}, { 39, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
+ { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \
+ { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \
+ { 95, 8}, { 191,10}, { 55,11}, { 31,10}, \
+ { 63, 8}, { 255,10}, { 71, 9}, { 143, 8}, \
+ { 287,10}, { 79, 9}, { 159,11}, { 47,10}, \
+ { 95, 9}, { 191, 8}, { 383, 7}, { 767,10}, \
+ { 103, 9}, { 207,12}, { 31,11}, { 63, 9}, \
+ { 255, 8}, { 511, 7}, { 1023, 9}, { 271,10}, \
+ { 143, 9}, { 287,11}, { 79,10}, { 159, 9}, \
+ { 319, 8}, { 639,10}, { 175, 9}, { 351, 8}, \
+ { 703,11}, { 95,10}, { 191, 9}, { 383, 8}, \
+ { 767,10}, { 207, 9}, { 415, 8}, { 831,10}, \
+ { 223,12}, { 63,10}, { 255, 9}, { 511, 8}, \
+ { 1023,10}, { 271,11}, { 143,10}, { 287, 9}, \
+ { 575, 8}, { 1151,11}, { 159,10}, { 319, 9}, \
+ { 639,11}, { 175,10}, { 351, 9}, { 703,12}, \
+ { 95,11}, { 191,10}, { 383, 9}, { 767,11}, \
+ { 207,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447,13}, { 8192,14}, { 16384,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 112
+#define SQR_FFT_THRESHOLD 2688
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 38
+#define MULLO_MUL_N_THRESHOLD 6253
+#define SQRLO_BASECASE_THRESHOLD 4
+#define SQRLO_DC_THRESHOLD 67
+#define SQRLO_SQR_THRESHOLD 5240
+
+#define DC_DIV_QR_THRESHOLD 42
+#define DC_DIVAPPR_Q_THRESHOLD 152
+#define DC_BDIV_QR_THRESHOLD 39
+#define DC_BDIV_Q_THRESHOLD 93
+
+#define INV_MULMOD_BNM1_THRESHOLD 37
+#define INV_NEWTON_THRESHOLD 163
+#define INV_APPR_THRESHOLD 162
+
+#define BINV_NEWTON_THRESHOLD 194
+#define REDC_1_TO_REDC_N_THRESHOLD 43
+
+#define MU_DIV_QR_THRESHOLD 998
+#define MU_DIVAPPR_Q_THRESHOLD 998
+#define MUPI_DIV_QR_THRESHOLD 98
+#define MU_BDIV_QR_THRESHOLD 807
+#define MU_BDIV_Q_THRESHOLD 924
+
+#define POWM_SEC_TABLE 6,30,194,579,1730
+
+#define GET_STR_DC_THRESHOLD 15
+#define GET_STR_PRECOMPUTE_THRESHOLD 29
+#define SET_STR_DC_THRESHOLD 788
+#define SET_STR_PRECOMPUTE_THRESHOLD 1816
+
+#define FAC_DSC_THRESHOLD 236
+#define FAC_ODD_THRESHOLD 24
+
+#define MATRIX22_STRASSEN_THRESHOLD 10
+#define HGCD2_DIV1_METHOD 1 /* 7.05% faster than 3 */
+#define HGCD_THRESHOLD 101
+#define HGCD_APPR_THRESHOLD 104
+#define HGCD_REDUCE_THRESHOLD 1679
+#define GCD_DC_THRESHOLD 330
+#define GCDEXT_DC_THRESHOLD 242
+#define JACOBI_BASE_METHOD 4 /* 20.00% faster than 1 */
diff --git a/gmp-6.3.0/mpn/arm64/hamdist.asm b/gmp-6.3.0/mpn/arm64/hamdist.asm
new file mode 100644
index 0000000..c72ca55
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/hamdist.asm
@@ -0,0 +1,181 @@
+dnl ARM64 Neon mpn_hamdist -- mpn bit hamming distance.
+
+dnl Copyright 2013, 2014 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 4.5
+C Cortex-A57 1.9
+C X-Gene 4.36
+
+C TODO
+C * Consider greater unrolling.
+C * Arrange to align the pointer, if that helps performance. Use the same
+C read-and-mask trick we use on PCs, for simplicity and performance. (Sorry
+C valgrind!)
+C * Explore if explicit align directives, e.g., "[ptr:128]" help.
+C * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
+
+changecom(blah)
+
+C INPUT PARAMETERS
+define(`ap', x0)
+define(`bp', x1)
+define(`n', x2)
+
+C We sum into 16 16-bit counters in v4,v5, but at the end we sum them and end
+C up with 8 16-bit counters. Therefore, we can sum to 8(2^16-1) bits, or
+C (8*2^16-1)/64 = 0x1fff limbs. We use a chunksize close to that, but which
+C allows the huge count code to jump deep into the code (at L(chu)).
+
+define(`maxsize', 0x1fff)
+define(`chunksize',0x1ff0)
+
+ASM_START()
+PROLOGUE(mpn_hamdist)
+
+ mov x11, #maxsize
+ cmp n, x11
+ b.hi L(gt8k)
+
+L(lt8k):
+ movi v4.16b, #0 C clear summation register
+ movi v5.16b, #0 C clear summation register
+
+ tbz n, #0, L(xx0)
+ sub n, n, #1
+ ld1 {v0.1d}, [ap], #8 C load 1 limb
+ ld1 {v16.1d}, [bp], #8 C load 1 limb
+ eor v0.16b, v0.16b, v16.16b
+ cnt v6.16b, v0.16b
+ uadalp v4.8h, v6.16b C could also splat
+
+L(xx0): tbz n, #1, L(x00)
+ sub n, n, #2
+ ld1 {v0.2d}, [ap], #16 C load 2 limbs
+ ld1 {v16.2d}, [bp], #16 C load 2 limbs
+ eor v0.16b, v0.16b, v16.16b
+ cnt v6.16b, v0.16b
+ uadalp v4.8h, v6.16b
+
+L(x00): tbz n, #2, L(000)
+ subs n, n, #4
+ ld1 {v0.2d,v1.2d}, [ap], #32 C load 4 limbs
+ ld1 {v16.2d,v17.2d}, [bp], #32 C load 4 limbs
+ b.ls L(sum)
+
+L(gt4): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
+ ld1 {v18.2d,v19.2d}, [bp], #32 C load 4 limbs
+ eor v0.16b, v0.16b, v16.16b
+ eor v1.16b, v1.16b, v17.16b
+ sub n, n, #4
+ cnt v6.16b, v0.16b
+ cnt v7.16b, v1.16b
+ b L(mid)
+
+L(000): subs n, n, #8
+ b.lo L(e0)
+
+L(chu): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
+ ld1 {v0.2d,v1.2d}, [ap], #32 C load 4 limbs
+ ld1 {v18.2d,v19.2d}, [bp], #32 C load 4 limbs
+ ld1 {v16.2d,v17.2d}, [bp], #32 C load 4 limbs
+ eor v2.16b, v2.16b, v18.16b
+ eor v3.16b, v3.16b, v19.16b
+ cnt v6.16b, v2.16b
+ cnt v7.16b, v3.16b
+ subs n, n, #8
+ b.lo L(end)
+
+L(top): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
+ ld1 {v18.2d,v19.2d}, [bp], #32 C load 4 limbs
+ eor v0.16b, v0.16b, v16.16b
+ eor v1.16b, v1.16b, v17.16b
+ uadalp v4.8h, v6.16b
+ cnt v6.16b, v0.16b
+ uadalp v5.8h, v7.16b
+ cnt v7.16b, v1.16b
+L(mid): ld1 {v0.2d,v1.2d}, [ap], #32 C load 4 limbs
+ ld1 {v16.2d,v17.2d}, [bp], #32 C load 4 limbs
+ eor v2.16b, v2.16b, v18.16b
+ eor v3.16b, v3.16b, v19.16b
+ subs n, n, #8
+ uadalp v4.8h, v6.16b
+ cnt v6.16b, v2.16b
+ uadalp v5.8h, v7.16b
+ cnt v7.16b, v3.16b
+ b.hs L(top)
+
+L(end): uadalp v4.8h, v6.16b
+ uadalp v5.8h, v7.16b
+L(sum): eor v0.16b, v0.16b, v16.16b
+ eor v1.16b, v1.16b, v17.16b
+ cnt v6.16b, v0.16b
+ cnt v7.16b, v1.16b
+ uadalp v4.8h, v6.16b
+ uadalp v5.8h, v7.16b
+ add v4.8h, v4.8h, v5.8h
+ C we have 8 16-bit counts
+L(e0): uaddlp v4.4s, v4.8h C we have 4 32-bit counts
+ uaddlp v4.2d, v4.4s C we have 2 64-bit counts
+ mov x0, v4.d[0]
+ mov x1, v4.d[1]
+ add x0, x0, x1
+ ret
+
+C Code for count > maxsize. Splits operand and calls above code.
+define(`ap2', x5) C caller-saves reg not used above
+define(`bp2', x6) C caller-saves reg not used above
+L(gt8k):
+ mov x8, x30
+ mov x7, n C full count (caller-saves reg not used above)
+ mov x4, #0 C total sum (caller-saves reg not used above)
+ mov x9, #chunksize*8 C caller-saves reg not used above
+ mov x10, #chunksize C caller-saves reg not used above
+
+1: add ap2, ap, x9 C point at subsequent block
+ add bp2, bp, x9 C point at subsequent block
+ mov n, #chunksize-8 C count for this invocation, adjusted for entry pt
+ movi v4.16b, #0 C clear chunk summation register
+ movi v5.16b, #0 C clear chunk summation register
+ bl L(chu) C jump deep inside code
+ add x4, x4, x0
+ mov ap, ap2 C put chunk pointer in place for calls
+ mov bp, bp2 C put chunk pointer in place for calls
+ sub x7, x7, x10
+ cmp x7, x11
+ b.hi 1b
+
+ mov n, x7 C count for final invocation
+ bl L(lt8k)
+ add x0, x4, x0
+ mov x30, x8
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/invert_limb.asm b/gmp-6.3.0/mpn/arm64/invert_limb.asm
new file mode 100644
index 0000000..6a99bf0
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/invert_limb.asm
@@ -0,0 +1,83 @@
+dnl ARM64 mpn_invert_limb -- Invert a normalized limb.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 ?
+C Cortex-A57 ?
+
+C Compiler generated, mildly edited. Could surely be further optimised.
+
+ASM_START()
+PROLOGUE(mpn_invert_limb)
+ lsr x2, x0, #54
+ LEA_HI( x1, approx_tab)
+ and x2, x2, #0x1fe
+ LEA_LO( x1, approx_tab)
+ ldrh w3, [x1,x2]
+ lsr x4, x0, #24
+ add x4, x4, #1
+ ubfiz x2, x3, #11, #16
+ umull x3, w3, w3
+ mul x3, x3, x4
+ sub x2, x2, #1
+ sub x2, x2, x3, lsr #40
+ lsl x3, x2, #60
+ mul x1, x2, x2
+ msub x1, x1, x4, x3
+ lsl x2, x2, #13
+ add x1, x2, x1, lsr #47
+ and x2, x0, #1
+ neg x3, x2
+ and x3, x3, x1, lsr #1
+ add x2, x2, x0, lsr #1
+ msub x2, x1, x2, x3
+ umulh x2, x2, x1
+ lsl x1, x1, #31
+ add x1, x1, x2, lsr #1
+ mul x3, x1, x0
+ umulh x2, x1, x0
+ adds x4, x3, x0
+ adc x0, x2, x0
+ sub x0, x1, x0
+ ret
+EPILOGUE()
+
+ RODATA
+ ALIGN(2)
+ TYPE( approx_tab, object)
+ SIZE( approx_tab, 512)
+approx_tab:
+forloop(i,256,512-1,dnl
+` .hword eval(0x7fd00/i)
+')dnl
diff --git a/gmp-6.3.0/mpn/arm64/logops_n.asm b/gmp-6.3.0/mpn/arm64/logops_n.asm
new file mode 100644
index 0000000..e959abc
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/logops_n.asm
@@ -0,0 +1,139 @@
+dnl ARM64 mpn_and_n, mpn_andn_n. mpn_nand_n, etc.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb cycles/limb
+C nand,nior all other
+C Cortex-A53 3.25-3.5 2.75-3
+C Cortex-A57 2.0 1.5
+C X-Gene 2.14 2.0
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`vp', `x2')
+define(`n', `x3')
+
+define(`POSTOP', `dnl')
+
+ifdef(`OPERATION_and_n',`
+ define(`func', `mpn_and_n')
+ define(`LOGOP', `and $1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+ define(`func', `mpn_andn_n')
+ define(`LOGOP', `bic $1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+ define(`func', `mpn_nand_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `and $1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+ define(`func', `mpn_ior_n')
+ define(`LOGOP', `orr $1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+ define(`func', `mpn_iorn_n')
+ define(`LOGOP', `orn $1, $2, $3')')
+ifdef(`OPERATION_nior_n',`
+ define(`func', `mpn_nior_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `orr $1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+ define(`func', `mpn_xor_n')
+ define(`LOGOP', `eor $1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+ define(`func', `mpn_xnor_n')
+ define(`LOGOP', `eon $1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+ lsr x17, n, #2
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x7, [up]
+ ldr x11, [vp]
+ LOGOP( x15, x7, x11)
+ POSTOP( x15)
+ str x15, [rp],#8
+ tbnz n, #1, L(b11)
+
+L(b01): cbz x17, L(ret)
+ ldp x4, x5, [up,#8]
+ ldp x8, x9, [vp,#8]
+ sub up, up, #8
+ sub vp, vp, #8
+ b L(mid)
+
+L(b11): ldp x6, x7, [up,#8]
+ ldp x10, x11, [vp,#8]
+ add up, up, #8
+ add vp, vp, #8
+ cbz x17, L(end)
+ b L(top)
+
+L(bx0): tbnz n, #1, L(b10)
+
+L(b00): ldp x4, x5, [up],#-16
+ ldp x8, x9, [vp],#-16
+ b L(mid)
+
+L(b10): ldp x6, x7, [up]
+ ldp x10, x11, [vp]
+ cbz x17, L(end)
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up,#16]
+ ldp x8, x9, [vp,#16]
+ LOGOP( x12, x6, x10)
+ LOGOP( x13, x7, x11)
+ POSTOP( x12)
+ POSTOP( x13)
+ stp x12, x13, [rp],#16
+L(mid): ldp x6, x7, [up,#32]!
+ ldp x10, x11, [vp,#32]!
+ LOGOP( x12, x4, x8)
+ LOGOP( x13, x5, x9)
+ POSTOP( x12)
+ POSTOP( x13)
+ stp x12, x13, [rp],#16
+ sub x17, x17, #1
+ cbnz x17, L(top)
+
+L(end): LOGOP( x12, x6, x10)
+ LOGOP( x13, x7, x11)
+ POSTOP( x12)
+ POSTOP( x13)
+ stp x12, x13, [rp]
+L(ret): ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/lshift.asm b/gmp-6.3.0/mpn/arm64/lshift.asm
new file mode 100644
index 0000000..fe8a1aa
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/lshift.asm
@@ -0,0 +1,138 @@
+dnl ARM64 mpn_lshift.
+
+dnl Copyright 2013, 2014, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb assumed optimal c/l
+C Cortex-A53 3.5-4.0 3.25
+C Cortex-A57 2.0 2.0
+C X-Gene 2.67 2.5
+
+C TODO
+C * The feed-in code used 1 ldr for odd sized and 2 ldr for even sizes. These
+C numbers should be 1 and 0, respectively. The str in wind-down should also
+C go.
+C * Using extr and with 63 separate loops we might reach 1.25 c/l on A57.
+C * A53's speed depends on alignment, tune/speed -w1 gives 3.5, -w0 gives 4.0.
+
+changecom(blah)
+
+define(`rp_arg', `x0')
+define(`up', `x1')
+define(`n', `x2')
+define(`cnt', `x3')
+
+define(`rp', `x16')
+
+define(`tnc',`x8')
+
+define(`PSHIFT', lsl)
+define(`NSHIFT', lsr)
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ add rp, rp_arg, n, lsl #3
+ add up, up, n, lsl #3
+ sub tnc, xzr, cnt
+ lsr x17, n, #2
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x4, [up,#-8]
+ tbnz n, #1, L(b11)
+
+L(b01): NSHIFT x0, x4, tnc
+ PSHIFT x2, x4, cnt
+ cbnz x17, L(gt1)
+ str x2, [rp,#-8]
+ ret
+L(gt1): ldp x4, x5, [up,#-24]
+ sub up, up, #8
+ add rp, rp, #16
+ b L(lo2)
+
+L(b11): NSHIFT x0, x4, tnc
+ PSHIFT x2, x4, cnt
+ ldp x6, x7, [up,#-24]!
+ b L(lo3)
+
+L(bx0): ldp x4, x5, [up,#-16]
+ tbz n, #1, L(b00)
+
+L(b10): NSHIFT x0, x5, tnc
+ PSHIFT x13, x5, cnt
+ NSHIFT x10, x4, tnc
+ PSHIFT x2, x4, cnt
+ cbnz x17, L(gt2)
+ orr x10, x10, x13
+ stp x2, x10, [rp,#-16]
+ ret
+L(gt2): ldp x4, x5, [up,#-32]
+ orr x10, x10, x13
+ str x10, [rp,#-8]
+ sub up, up, #16
+ add rp, rp, #8
+ b L(lo2)
+
+L(b00): NSHIFT x0, x5, tnc
+ PSHIFT x13, x5, cnt
+ NSHIFT x10, x4, tnc
+ PSHIFT x2, x4, cnt
+ ldp x6, x7, [up,#-32]!
+ orr x10, x10, x13
+ str x10, [rp,#-8]!
+ b L(lo0)
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up,#-16]
+ orr x10, x10, x13
+ orr x11, x12, x2
+ stp x10, x11, [rp,#-16]
+ PSHIFT x2, x6, cnt
+L(lo2): NSHIFT x10, x4, tnc
+ PSHIFT x13, x5, cnt
+ NSHIFT x12, x5, tnc
+ ldp x6, x7, [up,#-32]!
+ orr x10, x10, x13
+ orr x11, x12, x2
+ stp x10, x11, [rp,#-32]!
+ PSHIFT x2, x4, cnt
+L(lo0): sub x17, x17, #1
+L(lo3): NSHIFT x10, x6, tnc
+ PSHIFT x13, x7, cnt
+ NSHIFT x12, x7, tnc
+ cbnz x17, L(top)
+
+L(end): orr x10, x10, x13
+ orr x11, x12, x2
+ PSHIFT x2, x6, cnt
+ stp x10, x11, [rp,#-16]
+ str x2, [rp,#-24]
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/lshiftc.asm b/gmp-6.3.0/mpn/arm64/lshiftc.asm
new file mode 100644
index 0000000..6bf5844
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/lshiftc.asm
@@ -0,0 +1,141 @@
+dnl ARM64 mpn_lshiftc.
+
+dnl Copyright 2013, 2014, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb assumed optimal c/l
+C Cortex-A53 3.5-4.0 3.25
+C Cortex-A57 2.0 2.0
+C X-Gene 2.67 2.5
+
+C TODO
+C * The feed-in code used 1 ldr for odd sized and 2 ldr for even sizes. These
+C numbers should be 1 and 0, respectively. The str in wind-down should also
+C go.
+C * Using extr and with 63 separate loops we might reach 1.5 c/l on A57.
+C * A53's speed depends on alignment, tune/speed -w1 gives 3.5, -w0 gives 4.0.
+
+changecom(blah)
+
+define(`rp_arg', `x0')
+define(`up', `x1')
+define(`n', `x2')
+define(`cnt', `x3')
+
+define(`rp', `x16')
+
+define(`tnc',`x8')
+
+define(`PSHIFT', lsl)
+define(`NSHIFT', lsr)
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ add rp, rp_arg, n, lsl #3
+ add up, up, n, lsl #3
+ sub tnc, xzr, cnt
+ lsr x17, n, #2
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x4, [up,#-8]
+ tbnz n, #1, L(b11)
+
+L(b01): NSHIFT x0, x4, tnc
+ PSHIFT x2, x4, cnt
+ cbnz x17, L(gt1)
+ mvn x2, x2
+ str x2, [rp,#-8]
+ ret
+L(gt1): ldp x4, x5, [up,#-24]
+ sub up, up, #8
+ add rp, rp, #16
+ b L(lo2)
+
+L(b11): NSHIFT x0, x4, tnc
+ PSHIFT x2, x4, cnt
+ ldp x6, x7, [up,#-24]!
+ b L(lo3)
+
+L(bx0): ldp x4, x5, [up,#-16]
+ tbz n, #1, L(b00)
+
+L(b10): NSHIFT x0, x5, tnc
+ PSHIFT x13, x5, cnt
+ NSHIFT x10, x4, tnc
+ PSHIFT x2, x4, cnt
+ cbnz x17, L(gt2)
+ eon x10, x10, x13
+ mvn x2, x2
+ stp x2, x10, [rp,#-16]
+ ret
+L(gt2): ldp x4, x5, [up,#-32]
+ eon x10, x10, x13
+ str x10, [rp,#-8]
+ sub up, up, #16
+ add rp, rp, #8
+ b L(lo2)
+
+L(b00): NSHIFT x0, x5, tnc
+ PSHIFT x13, x5, cnt
+ NSHIFT x10, x4, tnc
+ PSHIFT x2, x4, cnt
+ ldp x6, x7, [up,#-32]!
+ eon x10, x10, x13
+ str x10, [rp,#-8]!
+ b L(lo0)
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up,#-16]
+ eon x10, x10, x13
+ eon x11, x12, x2
+ stp x10, x11, [rp,#-16]
+ PSHIFT x2, x6, cnt
+L(lo2): NSHIFT x10, x4, tnc
+ PSHIFT x13, x5, cnt
+ NSHIFT x12, x5, tnc
+ ldp x6, x7, [up,#-32]!
+ eon x10, x10, x13
+ eon x11, x12, x2
+ stp x10, x11, [rp,#-32]!
+ PSHIFT x2, x4, cnt
+L(lo0): sub x17, x17, #1
+L(lo3): NSHIFT x10, x6, tnc
+ PSHIFT x13, x7, cnt
+ NSHIFT x12, x7, tnc
+ cbnz x17, L(top)
+
+L(end): eon x10, x10, x13
+ eon x11, x12, x2
+ PSHIFT x2, x6, cnt
+ stp x10, x11, [rp,#-16]
+ mvn x2, x2
+ str x2, [rp,#-24]
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/mod_34lsub1.asm b/gmp-6.3.0/mpn/arm64/mod_34lsub1.asm
new file mode 100644
index 0000000..7945fe7
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/mod_34lsub1.asm
@@ -0,0 +1,124 @@
+dnl ARM64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
+
+dnl Copyright 2012-2014 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 2
+C Cortex-A57 1
+C X-Gene 1.45
+
+define(`ap', x0)
+define(`n', x1)
+
+changecom(blah)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C * An alternative inner loop which could run at 0.722 c/l on A57:
+C adds x8, x8, x2
+C adcs x9, x9, x3
+C ldp x2, x3, [ap, #-32]
+C adcs x10, x10, x4
+C adc x12, x12, xzr
+C adds x8, x8, x5
+C ldp x4, x5, [ap, #-16]
+C sub n, n, #6
+C adcs x9, x9, x6
+C adcs x10, x10, x7
+C ldp x6, x7, [ap], #48
+C adc x12, x12, xzr
+C tbz n, #63, L(top)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+ subs n, n, #3
+ mov x8, #0
+ b.lt L(le2) C n <= 2
+
+ ldp x2, x3, [ap, #0]
+ ldr x4, [ap, #16]
+ add ap, ap, #24
+ subs n, n, #3
+ b.lt L(sum) C n <= 5
+ cmn x0, #0 C clear carry
+
+L(top): ldp x5, x6, [ap, #0]
+ ldr x7, [ap, #16]
+ add ap, ap, #24
+ sub n, n, #3
+ adcs x2, x2, x5
+ adcs x3, x3, x6
+ adcs x4, x4, x7
+ tbz n, #63, L(top)
+
+ adc x8, xzr, xzr C x8 <= 1
+
+L(sum): cmn n, #2
+ mov x5, #0
+ b.lo 1f
+ ldr x5, [ap], #8
+1: mov x6, #0
+ b.ls 1f
+ ldr x6, [ap], #8
+1: adds x2, x2, x5
+ adcs x3, x3, x6
+ adcs x4, x4, xzr
+ adc x8, x8, xzr C x8 <= 2
+
+L(sum2):
+ and x0, x2, #0xffffffffffff
+ add x0, x0, x2, lsr #48
+ add x0, x0, x8
+
+ lsl x8, x3, #16
+ and x1, x8, #0xffffffffffff
+ add x0, x0, x1
+ add x0, x0, x3, lsr #32
+
+ lsl x8, x4, #32
+ and x1, x8, #0xffffffffffff
+ add x0, x0, x1
+ add x0, x0, x4, lsr #16
+ ret
+
+L(le2): cmn n, #1
+ b.ne L(1)
+ ldp x2, x3, [ap]
+ mov x4, #0
+ b L(sum2)
+L(1): ldr x2, [ap]
+ and x0, x2, #0xffffffffffff
+ add x0, x0, x2, lsr #48
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/mul_1.asm b/gmp-6.3.0/mpn/arm64/mul_1.asm
new file mode 100644
index 0000000..fb965ef
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/mul_1.asm
@@ -0,0 +1,128 @@
+dnl ARM64 mpn_mul_1
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013, 2015, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 7.5-8
+C Cortex-A57 7
+C Cortex-A72
+C X-Gene 4
+C Apple M1 1
+
+C TODO
+C * Start first multiply earlier.
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`n', `x2')
+define(`v0', `x3')
+
+
+PROLOGUE(mpn_mul_1c)
+ adds xzr, xzr, xzr C clear cy flag
+ b L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_mul_1)
+ adds x4, xzr, xzr C clear register and cy flag
+L(com): lsr x17, n, #2
+ tbnz n, #0, L(bx1)
+
+L(bx0): mov x11, x4
+ tbz n, #1, L(b00)
+
+L(b10): ldp x4, x5, [up]
+ mul x8, x4, v0
+ umulh x10, x4, v0
+ cbz x17, L(2)
+ ldp x6, x7, [up,#16]!
+ mul x9, x5, v0
+ b L(mid)-8
+
+L(2): mul x9, x5, v0
+ b L(2e)
+
+L(bx1): ldr x7, [up],#8
+ mul x9, x7, v0
+ umulh x11, x7, v0
+ adds x9, x9, x4
+ str x9, [rp],#8
+ tbnz n, #1, L(b10)
+
+L(b01): cbz x17, L(1)
+
+L(b00): ldp x6, x7, [up]
+ mul x8, x6, v0
+ umulh x10, x6, v0
+ ldp x4, x5, [up,#16]
+ mul x9, x7, v0
+ adcs x12, x8, x11
+ umulh x11, x7, v0
+ add rp, rp, #16
+ sub x17, x17, #1
+ cbz x17, L(end)
+
+ ALIGN(16)
+L(top): mul x8, x4, v0
+ ldp x6, x7, [up,#32]!
+ adcs x13, x9, x10
+ umulh x10, x4, v0
+ mul x9, x5, v0
+ stp x12, x13, [rp,#-16]
+ adcs x12, x8, x11
+ umulh x11, x5, v0
+L(mid): mul x8, x6, v0
+ ldp x4, x5, [up,#16]
+ adcs x13, x9, x10
+ umulh x10, x6, v0
+ mul x9, x7, v0
+ stp x12, x13, [rp],#32
+ adcs x12, x8, x11
+ umulh x11, x7, v0
+ sub x17, x17, #1
+ cbnz x17, L(top)
+
+L(end): mul x8, x4, v0
+ adcs x13, x9, x10
+ umulh x10, x4, v0
+ mul x9, x5, v0
+ stp x12, x13, [rp,#-16]
+L(2e): adcs x12, x8, x11
+ umulh x11, x5, v0
+ adcs x13, x9, x10
+ stp x12, x13, [rp]
+L(1): adc x0, x11, xzr
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/popcount.asm b/gmp-6.3.0/mpn/arm64/popcount.asm
new file mode 100644
index 0000000..74de3fc
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/popcount.asm
@@ -0,0 +1,157 @@
+dnl ARM64 Neon mpn_popcount -- mpn bit population count.
+
+dnl Copyright 2013, 2014 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 2.5
+C Cortex-A57 1.14
+C X-Gene 3
+
+C TODO
+C * Consider greater unrolling.
+C * Arrange to align the pointer, if that helps performance. Use the same
+C read-and-mask trick we use on PCs, for simplicity and performance. (Sorry
+C valgrind!)
+C * Explore if explicit align directives, e.g., "[ptr:128]" help.
+C * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
+
+changecom(blah)
+
+C INPUT PARAMETERS
+define(`ap', x0)
+define(`n', x1)
+
+C We sum into 16 16-bit counters in v4,v5, but at the end we sum them and end
+C up with 8 16-bit counters. Therefore, we can sum to 8(2^16-1) bits, or
+C (8*2^16-1)/64 = 0x1fff limbs. We use a chunksize close to that, but which
+C allows the huge count code to jump deep into the code (at L(chu)).
+
+define(`maxsize', 0x1fff)
+define(`chunksize',0x1ff0)
+
+ASM_START()
+PROLOGUE(mpn_popcount)
+
+ mov x11, #maxsize
+ cmp n, x11
+ b.hi L(gt8k)
+
+L(lt8k):
+ movi v4.16b, #0 C clear summation register
+ movi v5.16b, #0 C clear summation register
+
+ tbz n, #0, L(xx0)
+ sub n, n, #1
+ ld1 {v0.1d}, [ap], #8 C load 1 limb
+ cnt v6.16b, v0.16b
+ uadalp v4.8h, v6.16b C could also splat
+
+L(xx0): tbz n, #1, L(x00)
+ sub n, n, #2
+ ld1 {v0.2d}, [ap], #16 C load 2 limbs
+ cnt v6.16b, v0.16b
+ uadalp v4.8h, v6.16b
+
+L(x00): tbz n, #2, L(000)
+ subs n, n, #4
+ ld1 {v0.2d,v1.2d}, [ap], #32 C load 4 limbs
+ b.ls L(sum)
+
+L(gt4): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
+ sub n, n, #4
+ cnt v6.16b, v0.16b
+ cnt v7.16b, v1.16b
+ b L(mid)
+
+L(000): subs n, n, #8
+ b.lo L(e0)
+
+L(chu): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
+ ld1 {v0.2d,v1.2d}, [ap], #32 C load 4 limbs
+ cnt v6.16b, v2.16b
+ cnt v7.16b, v3.16b
+ subs n, n, #8
+ b.lo L(end)
+
+L(top): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
+ uadalp v4.8h, v6.16b
+ cnt v6.16b, v0.16b
+ uadalp v5.8h, v7.16b
+ cnt v7.16b, v1.16b
+L(mid): ld1 {v0.2d,v1.2d}, [ap], #32 C load 4 limbs
+ subs n, n, #8
+ uadalp v4.8h, v6.16b
+ cnt v6.16b, v2.16b
+ uadalp v5.8h, v7.16b
+ cnt v7.16b, v3.16b
+ b.hs L(top)
+
+L(end): uadalp v4.8h, v6.16b
+ uadalp v5.8h, v7.16b
+L(sum): cnt v6.16b, v0.16b
+ cnt v7.16b, v1.16b
+ uadalp v4.8h, v6.16b
+ uadalp v5.8h, v7.16b
+ add v4.8h, v4.8h, v5.8h
+ C we have 8 16-bit counts
+L(e0): uaddlp v4.4s, v4.8h C we have 4 32-bit counts
+ uaddlp v4.2d, v4.4s C we have 2 64-bit counts
+ mov x0, v4.d[0]
+ mov x1, v4.d[1]
+ add x0, x0, x1
+ ret
+
+C Code for count > maxsize. Splits operand and calls above code.
+define(`ap2', x5) C caller-saves reg not used above
+L(gt8k):
+ mov x8, x30
+ mov x7, n C full count (caller-saves reg not used above)
+ mov x4, #0 C total sum (caller-saves reg not used above)
+ mov x9, #chunksize*8 C caller-saves reg not used above
+ mov x10, #chunksize C caller-saves reg not used above
+
+1: add ap2, ap, x9 C point at subsequent block
+ mov n, #chunksize-8 C count for this invocation, adjusted for entry pt
+ movi v4.16b, #0 C clear chunk summation register
+ movi v5.16b, #0 C clear chunk summation register
+ bl L(chu) C jump deep inside code
+ add x4, x4, x0
+ mov ap, ap2 C put chunk pointer in place for calls
+ sub x7, x7, x10
+ cmp x7, x11
+ b.hi 1b
+
+ mov n, x7 C count for final invocation
+ bl L(lt8k)
+ add x0, x4, x0
+ mov x30, x8
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/rsh1aors_n.asm b/gmp-6.3.0/mpn/arm64/rsh1aors_n.asm
new file mode 100644
index 0000000..afd3d5b
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/rsh1aors_n.asm
@@ -0,0 +1,168 @@
+dnl ARM64 mpn_rsh1add_n and mpn_rsh1sub_n.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb assumed optimal c/l
+C Cortex-A53 3.25-3.75 3.0 steady
+C Cortex-A57 2.15 1.75
+C X-Gene 2.75 2.5
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`up', `x1')
+define(`vp', `x2')
+define(`n', `x3')
+
+ifdef(`OPERATION_rsh1add_n', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`COND', `cs')
+ define(`func_n', mpn_rsh1add_n)')
+ifdef(`OPERATION_rsh1sub_n', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`COND', `cc')
+ define(`func_n', mpn_rsh1sub_n)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func_n)
+ lsr x6, n, #2
+
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x5, [up],#8
+ ldr x9, [vp],#8
+ tbnz n, #1, L(b11)
+
+L(b01): ADDSUB x13, x5, x9
+ and x10, x13, #1
+ cbz x6, L(1)
+ ldp x4, x5, [up],#48
+ ldp x8, x9, [vp],#48
+ ADDSUBC x14, x4, x8
+ ADDSUBC x15, x5, x9
+ ldp x4, x5, [up,#-32]
+ ldp x8, x9, [vp,#-32]
+ extr x17, x14, x13, #1
+ ADDSUBC x12, x4, x8
+ ADDSUBC x13, x5, x9
+ str x17, [rp], #24
+ sub x6, x6, #1
+ cbz x6, L(end)
+ b L(top)
+
+L(1): cset x14, COND
+ extr x17, x14, x13, #1
+ str x17, [rp]
+ mov x0, x10
+ ret
+
+L(b11): ADDSUB x15, x5, x9
+ and x10, x15, #1
+
+ ldp x4, x5, [up],#32
+ ldp x8, x9, [vp],#32
+ ADDSUBC x12, x4, x8
+ ADDSUBC x13, x5, x9
+ cbz x6, L(3)
+ ldp x4, x5, [up,#-16]
+ ldp x8, x9, [vp,#-16]
+ extr x17, x12, x15, #1
+ ADDSUBC x14, x4, x8
+ ADDSUBC x15, x5, x9
+ str x17, [rp], #8
+ b L(mid)
+
+L(3): extr x17, x12, x15, #1
+ str x17, [rp], #8
+ b L(2)
+
+L(bx0): tbz n, #1, L(b00)
+
+L(b10): ldp x4, x5, [up],#32
+ ldp x8, x9, [vp],#32
+ ADDSUB x12, x4, x8
+ ADDSUBC x13, x5, x9
+ and x10, x12, #1
+ cbz x6, L(2)
+ ldp x4, x5, [up,#-16]
+ ldp x8, x9, [vp,#-16]
+ ADDSUBC x14, x4, x8
+ ADDSUBC x15, x5, x9
+ b L(mid)
+
+L(b00): ldp x4, x5, [up],#48
+ ldp x8, x9, [vp],#48
+ ADDSUB x14, x4, x8
+ ADDSUBC x15, x5, x9
+ and x10, x14, #1
+ ldp x4, x5, [up,#-32]
+ ldp x8, x9, [vp,#-32]
+ ADDSUBC x12, x4, x8
+ ADDSUBC x13, x5, x9
+ add rp, rp, #16
+ sub x6, x6, #1
+ cbz x6, L(end)
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up,#-16]
+ ldp x8, x9, [vp,#-16]
+ extr x16, x15, x14, #1
+ extr x17, x12, x15, #1
+ ADDSUBC x14, x4, x8
+ ADDSUBC x15, x5, x9
+ stp x16, x17, [rp,#-16]
+L(mid): ldp x4, x5, [up],#32
+ ldp x8, x9, [vp],#32
+ extr x16, x13, x12, #1
+ extr x17, x14, x13, #1
+ ADDSUBC x12, x4, x8
+ ADDSUBC x13, x5, x9
+ stp x16, x17, [rp],#32
+ sub x6, x6, #1
+ cbnz x6, L(top)
+
+L(end): extr x16, x15, x14, #1
+ extr x17, x12, x15, #1
+ stp x16, x17, [rp,#-16]
+L(2): cset x14, COND
+ extr x16, x13, x12, #1
+ extr x17, x14, x13, #1
+ stp x16, x17, [rp]
+
+L(ret): mov x0, x10
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/rshift.asm b/gmp-6.3.0/mpn/arm64/rshift.asm
new file mode 100644
index 0000000..90187ad
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/rshift.asm
@@ -0,0 +1,136 @@
+dnl ARM64 mpn_rshift.
+
+dnl Copyright 2013, 2014, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb assumed optimal c/l
+C Cortex-A53 3.5-4.0 3.25
+C Cortex-A57 2.0 2.0
+C X-Gene 2.67 2.5
+
+C TODO
+C * The feed-in code used 1 ldr for odd sized and 2 ldr for even sizes. These
+C numbers should be 1 and 0, respectively. The str in wind-down should also
+C go.
+C * Using extr and with 63 separate loops we might reach 1.25 c/l on A57.
+C * A53's speed depends on alignment, but not as simply as for lshift/lshiftc.
+
+changecom(blah)
+
+define(`rp_arg', `x0')
+define(`up', `x1')
+define(`n', `x2')
+define(`cnt', `x3')
+
+define(`rp', `x16')
+
+define(`tnc',`x8')
+
+define(`PSHIFT', lsr)
+define(`NSHIFT', lsl)
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ mov rp, rp_arg
+ sub tnc, xzr, cnt
+ lsr x17, n, #2
+ tbz n, #0, L(bx0)
+
+L(bx1): ldr x5, [up]
+ tbnz n, #1, L(b11)
+
+L(b01): NSHIFT x0, x5, tnc
+ PSHIFT x2, x5, cnt
+ cbnz x17, L(gt1)
+ str x2, [rp]
+ ret
+L(gt1): ldp x4, x5, [up,#8]
+ sub up, up, #8
+ sub rp, rp, #32
+ b L(lo2)
+
+L(b11): NSHIFT x0, x5, tnc
+ PSHIFT x2, x5, cnt
+ ldp x6, x7, [up,#8]!
+ sub rp, rp, #16
+ b L(lo3)
+
+L(bx0): ldp x4, x5, [up]
+ tbz n, #1, L(b00)
+
+L(b10): NSHIFT x0, x4, tnc
+ PSHIFT x13, x4, cnt
+ NSHIFT x10, x5, tnc
+ PSHIFT x2, x5, cnt
+ cbnz x17, L(gt2)
+ orr x10, x10, x13
+ stp x10, x2, [rp]
+ ret
+L(gt2): ldp x4, x5, [up,#16]
+ orr x10, x10, x13
+ str x10, [rp],#-24
+ b L(lo2)
+
+L(b00): NSHIFT x0, x4, tnc
+ PSHIFT x13, x4, cnt
+ NSHIFT x10, x5, tnc
+ PSHIFT x2, x5, cnt
+ ldp x6, x7, [up,#16]!
+ orr x10, x10, x13
+ str x10, [rp],#-8
+ b L(lo0)
+
+ ALIGN(16)
+L(top): ldp x4, x5, [up,#16]
+ orr x10, x10, x13
+ orr x11, x12, x2
+ stp x11, x10, [rp,#16]
+ PSHIFT x2, x7, cnt
+L(lo2): NSHIFT x10, x5, tnc
+ NSHIFT x12, x4, tnc
+ PSHIFT x13, x4, cnt
+ ldp x6, x7, [up,#32]!
+ orr x10, x10, x13
+ orr x11, x12, x2
+ stp x11, x10, [rp,#32]!
+ PSHIFT x2, x5, cnt
+L(lo0): sub x17, x17, #1
+L(lo3): NSHIFT x10, x7, tnc
+ NSHIFT x12, x6, tnc
+ PSHIFT x13, x6, cnt
+ cbnz x17, L(top)
+
+L(end): orr x10, x10, x13
+ orr x11, x12, x2
+ PSHIFT x2, x7, cnt
+ stp x11, x10, [rp,#16]
+ str x2, [rp,#32]
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/sec_tabselect.asm b/gmp-6.3.0/mpn/arm64/sec_tabselect.asm
new file mode 100644
index 0000000..18a268a
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/sec_tabselect.asm
@@ -0,0 +1,122 @@
+dnl ARM64 Neon mpn_sec_tabselect.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2011-2014 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C Cortex-A53 2.25
+C Cortex-A57 1.33
+C X-Gene 2
+
+C void
+C mpn_sec_tabselect (mp_ptr rp, mp_srcptr *tab,
+C mp_size_t n, mp_size_t nents, mp_size_t which)
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`tp', `x1')
+define(`n', `x2')
+define(`nents', `x3')
+define(`which', `x4')
+
+define(`i', `x5')
+define(`j', `x6')
+
+define(`maskq', `v4')
+
+ASM_START()
+PROLOGUE(mpn_sec_tabselect)
+ dup v7.2d, x4 C 2 `which' copies
+
+ mov x10, #1
+ dup v6.2d, x10 C 2 copies of 1
+
+ subs j, n, #4
+ b.mi L(outer_end)
+
+L(outer_top):
+ mov i, nents
+ mov x12, tp C preserve tp
+ movi v5.16b, #0 C zero 2 counter copies
+ movi v2.16b, #0
+ movi v3.16b, #0
+ ALIGN(16)
+L(tp4): cmeq maskq.2d, v5.2d, v7.2d C compare idx copies to `which' copies
+ ld1 {v0.2d,v1.2d}, [tp]
+ add v5.2d, v5.2d, v6.2d
+ bit v2.16b, v0.16b, maskq.16b
+ bit v3.16b, v1.16b, maskq.16b
+ add tp, tp, n, lsl #3
+ sub i, i, #1
+ cbnz i, L(tp4)
+ st1 {v2.2d,v3.2d}, [rp], #32
+ add tp, x12, #32 C restore tp, point to next slice
+ subs j, j, #4
+ b.pl L(outer_top)
+L(outer_end):
+
+ tbz n, #1, L(b0x)
+ mov i, nents
+ mov x12, tp
+ movi v5.16b, #0 C zero 2 counter copies
+ movi v2.16b, #0
+ ALIGN(16)
+L(tp2): cmeq maskq.2d, v5.2d, v7.2d
+ ld1 {v0.2d}, [tp]
+ add v5.2d, v5.2d, v6.2d
+ bit v2.16b, v0.16b, maskq.16b
+ add tp, tp, n, lsl #3
+ sub i, i, #1
+ cbnz i, L(tp2)
+ st1 {v2.2d}, [rp], #16
+ add tp, x12, #16
+
+L(b0x): tbz n, #0, L(b00)
+ mov i, nents
+ mov x12, tp
+ movi v5.16b, #0 C zero 2 counter copies
+ movi v2.16b, #0
+ ALIGN(16)
+L(tp1): cmeq maskq.2d, v5.2d, v7.2d
+ ld1 {v0.1d}, [tp]
+ add v5.2d, v5.2d, v6.2d C FIXME size should be `1d'
+ bit v2.8b, v0.8b, maskq.8b
+ add tp, tp, n, lsl #3
+ sub i, i, #1
+ cbnz i, L(tp1)
+ st1 {v2.1d}, [rp], #8
+ add tp, x12, #8
+
+L(b00): ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/sqr_diag_addlsh1.asm b/gmp-6.3.0/mpn/arm64/sqr_diag_addlsh1.asm
new file mode 100644
index 0000000..39f1cb1
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/sqr_diag_addlsh1.asm
@@ -0,0 +1,102 @@
+dnl ARM64 mpn_sqr_diag_addlsh1.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2016, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Cortex-A53 5.65
+C Cortex-A57 3.5
+C X-Gene 3.38
+
+changecom(blah)
+
+define(`rp', `x0')
+define(`tp', `x1')
+define(`up', `x2')
+define(`n', `x3')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diag_addlsh1)
+ ldr x15, [up],#8
+ lsr x14, n, #1
+ tbz n, #0, L(bx0)
+
+L(bx1): adds x7, xzr, xzr
+ mul x12, x15, x15
+ ldr x16, [up],#8
+ ldp x4, x5, [tp],#16
+ umulh x11, x15, x15
+ b L(mid)
+
+L(bx0): adds x5, xzr, xzr
+ mul x12, x15, x15
+ ldr x17, [up],#16
+ ldp x6, x7, [tp],#32
+ umulh x11, x15, x15
+ sub x14, x14, #1
+ cbz x14, L(end)
+
+ ALIGN(16)
+L(top): extr x9, x6, x5, #63
+ mul x10, x17, x17
+ ldr x16, [up,#-8]
+ adcs x13, x9, x11
+ ldp x4, x5, [tp,#-16]
+ umulh x11, x17, x17
+ extr x8, x7, x6, #63
+ stp x12, x13, [rp],#16
+ adcs x12, x8, x10
+L(mid): extr x9, x4, x7, #63
+ mul x10, x16, x16
+ ldr x17, [up],#16
+ adcs x13, x9, x11
+ ldp x6, x7, [tp],#32
+ umulh x11, x16, x16
+ extr x8, x5, x4, #63
+ stp x12, x13, [rp],#16
+ adcs x12, x8, x10
+ sub x14, x14, #1
+ cbnz x14, L(top)
+
+L(end): extr x9, x6, x5, #63
+ mul x10, x17, x17
+ adcs x13, x9, x11
+ umulh x11, x17, x17
+ extr x8, x7, x6, #63
+ stp x12, x13, [rp]
+ adcs x12, x8, x10
+ extr x9, xzr, x7, #63
+ adcs x13, x9, x11
+ stp x12, x13, [rp,#16]
+
+ ret
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm64/xgene1/gmp-mparam.h b/gmp-6.3.0/mpn/arm64/xgene1/gmp-mparam.h
new file mode 100644
index 0000000..c8020bb
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm64/xgene1/gmp-mparam.h
@@ -0,0 +1,182 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define GMP_LIMB_BYTES 8
+
+/* 2400 MHz AppliedMicro X-Gene */
+/* FFT tuning limit = 0.5 M */
+/* Generated by tuneup.c, 2019-09-28, gcc 4.8 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1 /* 2.00% faster than 2 */
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13
+#define USE_PREINV_DIVREM_1 1
+/* From gcc185.osuosl.org, 2023-07-26 */
+#define DIV_QR_1N_PI1_METHOD 3 /* 5.60% faster than 4 */
+#define DIV_QR_1_NORM_THRESHOLD 1
+#define DIV_QR_1_UNNORM_THRESHOLD 1
+#define DIV_QR_2_PI2_THRESHOLD 14
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 27
+
+#define DIV_1_VS_MUL_1_PERCENT 249
+
+#define MUL_TOOM22_THRESHOLD 18
+#define MUL_TOOM33_THRESHOLD 61
+#define MUL_TOOM44_THRESHOLD 112
+#define MUL_TOOM6H_THRESHOLD 242
+#define MUL_TOOM8H_THRESHOLD 321
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 99
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 109
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 72
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 28
+#define SQR_TOOM3_THRESHOLD 81
+#define SQR_TOOM4_THRESHOLD 154
+#define SQR_TOOM6_THRESHOLD 214
+#define SQR_TOOM8_THRESHOLD 284
+
+#define MULMID_TOOM42_THRESHOLD 46
+
+#define MULMOD_BNM1_THRESHOLD 11
+#define SQRMOD_BNM1_THRESHOLD 13
+
+#define MUL_FFT_MODF_THRESHOLD 412 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 412, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
+ { 19, 7}, { 12, 6}, { 25, 7}, { 17, 8}, \
+ { 9, 7}, { 20, 8}, { 11, 7}, { 25, 8}, \
+ { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
+ { 17, 7}, { 35, 8}, { 21, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 49, 9}, { 27,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 39,10}, \
+ { 23, 9}, { 55,11}, { 15,10}, { 31, 9}, \
+ { 71,10}, { 39, 9}, { 83,10}, { 47, 9}, \
+ { 99,10}, { 55,11}, { 31,10}, { 63, 9}, \
+ { 127,10}, { 71, 9}, { 143,10}, { 79,11}, \
+ { 47,10}, { 103,12}, { 31,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 135, 9}, { 271,10}, \
+ { 143,11}, { 79, 9}, { 319,10}, { 167, 9}, \
+ { 351,11}, { 95, 9}, { 383, 8}, { 767,10}, \
+ { 207, 9}, { 415,11}, { 111,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,11}, { 143,10}, { 287, 9}, { 575,10}, \
+ { 319, 9}, { 639,10}, { 351,12}, { 95,10}, \
+ { 383, 9}, { 767,11}, { 207,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 98
+#define MUL_FFT_THRESHOLD 4736
+
+#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 340, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 19, 7}, { 10, 6}, { 21, 7}, { 11, 6}, \
+ { 23, 7}, { 21, 8}, { 11, 7}, { 24, 8}, \
+ { 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 39,10}, \
+ { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
+ { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \
+ { 95,10}, { 55,11}, { 31,10}, { 79,11}, \
+ { 47,10}, { 95, 9}, { 191,12}, { 31,11}, \
+ { 63,10}, { 127, 9}, { 255, 8}, { 511,10}, \
+ { 135, 9}, { 271,11}, { 79, 9}, { 319, 8}, \
+ { 639,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543,10}, \
+ { 287, 9}, { 575,10}, { 319, 9}, { 639,11}, \
+ { 175,10}, { 351,12}, { 95,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 207,10}, { 415,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 87
+#define SQR_FFT_THRESHOLD 3264
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 45
+#define MULLO_MUL_N_THRESHOLD 8648
+#define SQRLO_BASECASE_THRESHOLD 0 /* always */
+#define SQRLO_DC_THRESHOLD 108
+#define SQRLO_SQR_THRESHOLD 6461
+
+#define DC_DIV_QR_THRESHOLD 64
+#define DC_DIVAPPR_Q_THRESHOLD 222
+#define DC_BDIV_QR_THRESHOLD 63
+#define DC_BDIV_Q_THRESHOLD 132
+
+#define INV_MULMOD_BNM1_THRESHOLD 38
+#define INV_NEWTON_THRESHOLD 242
+#define INV_APPR_THRESHOLD 222
+
+#define BINV_NEWTON_THRESHOLD 254
+#define REDC_1_TO_REDC_N_THRESHOLD 66
+
+#define MU_DIV_QR_THRESHOLD 1234
+#define MU_DIVAPPR_Q_THRESHOLD 1234
+#define MUPI_DIV_QR_THRESHOLD 122
+#define MU_BDIV_QR_THRESHOLD 1210
+#define MU_BDIV_Q_THRESHOLD 1234
+
+#define POWM_SEC_TABLE 3,23,194,712,2499
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 22
+#define SET_STR_DC_THRESHOLD 381
+#define SET_STR_PRECOMPUTE_THRESHOLD 2503
+
+#define FAC_DSC_THRESHOLD 216
+#define FAC_ODD_THRESHOLD 26
+
+#define MATRIX22_STRASSEN_THRESHOLD 14
+#define HGCD2_DIV1_METHOD 5 /* 2.01% faster than 3 */
+#define HGCD_THRESHOLD 122
+#define HGCD_APPR_THRESHOLD 171
+#define HGCD_REDUCE_THRESHOLD 2479
+#define GCD_DC_THRESHOLD 541
+#define GCDEXT_DC_THRESHOLD 386
+#define JACOBI_BASE_METHOD 4 /* 7.46% faster than 1 */