aboutsummaryrefslogtreecommitdiff
path: root/gmp-6.3.0/mpn/arm/v7a
diff options
context:
space:
mode:
Diffstat (limited to 'gmp-6.3.0/mpn/arm/v7a')
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/addmul_1.asm145
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/aors_n.asm162
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/bdiv_q_1.asm36
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/cnd_aors_n.asm158
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/com.asm180
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/gmp-mparam.h212
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/logops_n.asm253
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/mul_1.asm104
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm43
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm43
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm144
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/neon/com.asm97
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyd.asm110
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm90
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm177
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora15/submul_1.asm159
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora17/addmul_1.asm34
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora17/gmp-mparam.h233
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora17/mod_34lsub1.asm121
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora17/mul_1.asm34
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora17/submul_1.asm34
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora5/gmp-mparam.h205
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora7/gmp-mparam.h202
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora8/bdiv_q_1.asm158
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora8/gmp-mparam.h207
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora9/bdiv_q_1.asm36
-rw-r--r--gmp-6.3.0/mpn/arm/v7a/cora9/gmp-mparam.h211
27 files changed, 3588 insertions, 0 deletions
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/addmul_1.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/addmul_1.asm
new file mode 100644
index 0000000..c2277b3
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/addmul_1.asm
@@ -0,0 +1,145 @@
+dnl ARM mpn_addmul_1 optimised for A15.
+
+dnl Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb best
+C StrongARM: -
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 6 3.25
+C Cortex-A15 2 this
+
+C This code uses umlal for adding in the rp[] data, keeping the recurrency path
+C separate from any multiply instructions. It performs well on A15, at umlal's
+C bandwidth.
+C
+C An A9 variant should perhaps stick to 3-way unrolling, and use ldm and stm
+C for all loads and stores. Alternatively, it could do 2-way or 4-way, but
+C then alignment aware code will be necessary (adding O(1) bookkeeping
+C overhead).
+C
+C We don't use r12 due to ldrd and strd limitations.
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te ldrd strd
+C v6 -
+C v6t2 -
+C v7a -
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`v0', `r3')
+
+define(`w0', `r10') define(`w1', `r11')
+define(`u0', `r8') define(`u1', `r9')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ push { r4-r11 }
+
+ ands r6, n, #3
+ sub n, n, #3
+ beq L(b00)
+ cmp r6, #2
+ bcc L(b01)
+ beq L(b10)
+
+L(b11): mov r6, #0
+ cmn r13, #0 C carry clear
+ ldr u1, [up], #-4
+ ldr w1, [rp], #-4
+ mov r7, #0
+ b L(mid)
+
+L(b00): ldrd u0, u1, [up]
+ ldrd w0, w1, [rp]
+ mov r6, #0
+ umlal w0, r6, u0, v0
+ cmn r13, #0 C carry clear
+ mov r7, #0
+ str w0, [rp]
+ b L(mid)
+
+L(b10): ldrd u0, u1, [up], #8
+ ldrd w0, w1, [rp]
+ mov r4, #0
+ umlal w0, r4, u0, v0
+ cmn r13, #0 C carry clear
+ mov r5, #0
+ str w0, [rp], #8
+ umlal w1, r5, u1, v0
+ tst n, n
+ bmi L(end)
+ b L(top)
+
+L(b01): mov r4, #0
+ ldr u1, [up], #4
+ ldr w1, [rp], #4
+ mov r5, #0
+ umlal w1, r5, u1, v0
+ tst n, n
+ bmi L(end)
+
+ ALIGN(16)
+L(top): ldrd u0, u1, [up, #0]
+ adcs r4, r4, w1
+ ldrd w0, w1, [rp, #0]
+ mov r6, #0
+ umlal w0, r6, u0, v0 C 1 2
+ adcs r5, r5, w0
+ mov r7, #0
+ strd r4, r5, [rp, #-4]
+L(mid): umlal w1, r7, u1, v0 C 2 3
+ ldrd u0, u1, [up, #8]
+ adcs r6, r6, w1
+ ldrd w0, w1, [rp, #8]
+ mov r4, #0
+ umlal w0, r4, u0, v0 C 3 4
+ adcs r7, r7, w0
+ mov r5, #0
+ strd r6, r7, [rp, #4]
+ umlal w1, r5, u1, v0 C 0 1
+ sub n, n, #4
+ add up, up, #16
+ add rp, rp, #16
+ tst n, n
+ bpl L(top)
+
+L(end): adcs r4, r4, w1
+ str r4, [rp, #-4]
+ adc r0, r5, #0
+ pop { r4-r11 }
+ bx r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/aors_n.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/aors_n.asm
new file mode 100644
index 0000000..dc3f839
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/aors_n.asm
@@ -0,0 +1,162 @@
+dnl ARM mpn_add_n/mpn_sub_n optimised for A15.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb best
+C StrongARM: -
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 3.55 2.5
+C Cortex-A15 1.27 this
+
+C This was a major improvement compared to the code we had before, but it might
+C not be the best 8-way code possible. We've tried some permutations of auto-
+C increments and separate pointer updates, but they all ran at the same speed
+C on A15.
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te ldrd strd
+C v6 -
+C v6t2 -
+C v7a -
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+ifdef(`OPERATION_add_n', `
+ define(`ADDSUBC', adcs)
+ define(`IFADD', `$1')
+ define(`SETCY', `cmp $1, #1')
+ define(`RETVAL', `adc r0, n, #0')
+ define(`RETVAL2', `adc r0, n, #1')
+ define(`func', mpn_add_n)
+ define(`func_nc', mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(`ADDSUBC', sbcs)
+ define(`IFADD', `')
+ define(`SETCY', `rsbs $1, $1, #0')
+ define(`RETVAL', `sbc r0, r0, r0
+ and r0, r0, #1')
+ define(`RETVAL2', `RETVAL')
+ define(`func', mpn_sub_n)
+ define(`func_nc', mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+ ldr r12, [sp]
+ b L(ent)
+EPILOGUE()
+PROLOGUE(func)
+ mov r12, #0
+L(ent): push { r4-r9 }
+
+ ands r6, n, #3
+ mov n, n, lsr #2
+ beq L(b00)
+ cmp r6, #2
+ bcc L(b01)
+ beq L(b10)
+
+L(b11): ldr r5, [up], #4
+ ldr r7, [vp], #4
+ SETCY( r12)
+ ADDSUBC r9, r5, r7
+ ldrd r4, r5, [up, #0]
+ ldrd r6, r7, [vp, #0]
+ str r9, [rp], #-4
+ b L(lo)
+
+L(b00): ldrd r4, r5, [up], #-8
+ ldrd r6, r7, [vp], #-8
+ SETCY( r12)
+ sub rp, rp, #16
+ b L(mid)
+
+L(b01): ldr r5, [up], #-4
+ ldr r7, [vp], #-4
+ SETCY( r12)
+ ADDSUBC r9, r5, r7
+ str r9, [rp], #-12
+ tst n, n
+ beq L(wd1)
+L(gt1): ldrd r4, r5, [up, #8]
+ ldrd r6, r7, [vp, #8]
+ b L(mid)
+
+L(b10): ldrd r4, r5, [up]
+ ldrd r6, r7, [vp]
+ SETCY( r12)
+ sub rp, rp, #8
+ b L(lo)
+
+ ALIGN(16)
+L(top): ldrd r4, r5, [up, #8]
+ ldrd r6, r7, [vp, #8]
+ strd r8, r9, [rp, #8]
+L(mid): ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ ldrd r4, r5, [up, #16]
+ ldrd r6, r7, [vp, #16]
+ strd r8, r9, [rp, #16]
+ ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ sub n, n, #2
+ tst n, n
+ bmi L(dne)
+ ldrd r4, r5, [up, #24]
+ ldrd r6, r7, [vp, #24]
+ strd r8, r9, [rp, #24]
+ ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ ldrd r4, r5, [up, #32]!
+ ldrd r6, r7, [vp, #32]!
+ strd r8, r9, [rp, #32]!
+L(lo): ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ tst n, n
+ bne L(top)
+
+L(end): strd r8, r9, [rp, #8]
+L(wd1): RETVAL
+ pop { r4-r9 }
+ bx r14
+L(dne): strd r8, r9, [rp, #24]
+ RETVAL2
+ pop { r4-r9 }
+ bx r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/bdiv_q_1.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/bdiv_q_1.asm
new file mode 100644
index 0000000..245b371
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/bdiv_q_1.asm
@@ -0,0 +1,36 @@
+dnl ARM mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+include_mpn(`arm/v7a/cora8/bdiv_q_1.asm')
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/cnd_aors_n.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/cnd_aors_n.asm
new file mode 100644
index 0000000..b9e5cd3
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/cnd_aors_n.asm
@@ -0,0 +1,158 @@
+dnl ARM mpn_cnd_add_n/mpn_cnd_sub_n optimised for A15.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb best
+C StrongARM: -
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 3.75 3
+C Cortex-A15 1.78 this
+
+C This code does not run as well as one could have hoped, since 1.5 c/l seems
+C realistic for this insn mix.
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te ldrd strd
+C v6 -
+C v6t2 -
+C v7a -
+
+define(`cnd',`r0')
+define(`rp', `r1')
+define(`up', `r2')
+define(`vp', `r3')
+define(`n', `r12')
+
+ifdef(`OPERATION_cnd_add_n', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`IFADD', `$1')
+ define(`INITCY', `cmn r0, #0')
+ define(`RETVAL', `adc r0, n, #0')
+ define(`RETVAL2', `adc r0, n, #1')
+ define(`func', mpn_cnd_add_n)
+ define(`func_nc', mpn_add_nc)')
+ifdef(`OPERATION_cnd_sub_n', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`IFADD', `')
+ define(`INITCY', `cmp r0, #0')
+ define(`RETVAL', `sbc r0, r0, r0
+ and r0, r0, #1')
+ define(`RETVAL2', `RETVAL')
+ define(`func', mpn_cnd_sub_n)
+ define(`func_nc', mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ ldr n, [sp]
+ push { r4-r9 }
+
+ cmp cnd, #1
+ sbc cnd, cnd, cnd C conditionally set to 0xffffffff
+
+ ands r6, n, #3
+ mov n, n, lsr #2
+ beq L(b00)
+ cmp r6, #2
+ bcc L(b01)
+ beq L(b10)
+
+L(b11): ldr r5, [up], #4
+ ldr r7, [vp], #4
+ bic r7, r7, cnd
+ ADDSUB r9, r5, r7
+ ldrd r4, r5, [up, #0]
+ ldrd r6, r7, [vp, #0]
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ str r9, [rp], #-4
+ b L(lo)
+
+L(b00): ldrd r4, r5, [up], #-8
+ ldrd r6, r7, [vp], #-8
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ INITCY
+ sub rp, rp, #16
+ b L(mid)
+
+L(b01): ldr r5, [up], #-4
+ ldr r7, [vp], #-4
+ bic r7, r7, cnd
+ ADDSUB r9, r5, r7
+ str r9, [rp], #-12
+ tst n, n
+ beq L(wd1)
+L(gt1): ldrd r4, r5, [up, #8]
+ ldrd r6, r7, [vp, #8]
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ b L(mid)
+
+L(b10): ldrd r4, r5, [up]
+ ldrd r6, r7, [vp]
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ INITCY
+ sub rp, rp, #8
+ b L(lo)
+
+ ALIGN(16)
+L(top): ldrd r6, r7, [vp, #8]
+ ldrd r4, r5, [up, #8]
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ strd r8, r9, [rp, #8]
+L(mid): ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ ldrd r6, r7, [vp, #16]!
+ ldrd r4, r5, [up, #16]!
+ bic r6, r6, cnd
+ bic r7, r7, cnd
+ sub n, n, #1
+ strd r8, r9, [rp, #16]!
+L(lo): ADDSUBC r8, r4, r6
+ ADDSUBC r9, r5, r7
+ tst n, n
+ bne L(top)
+
+L(end): strd r8, r9, [rp, #8]
+L(wd1): RETVAL
+ pop { r4-r9 }
+ bx r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/com.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/com.asm
new file mode 100644
index 0000000..a258afe
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/com.asm
@@ -0,0 +1,180 @@
+dnl ARM mpn_com optimised for A15.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 2.5
+C Cortex-A15 1.0
+
+C This is great A15 core register code, but it is a bit large.
+C We use FEEDIN_VARIANT 1 to save some space, but use 8-way unrolling.
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te ldrd strd
+C v6 -
+C v6t2 -
+C v7a -
+
+define(`FEEDIN_VARIANT', 1) C alternatives: 0 1 2
+define(`UNROLL', 4x2) C alternatives: 4 4x2
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+ push { r4-r5,r8-r9 }
+
+ifelse(FEEDIN_VARIANT,0,`
+ ands r12, n, #3
+ mov n, n, lsr #2
+ beq L(b00a)
+ tst r12, #1
+ beq L(bx0)
+ ldr r5, [up], #4
+ mvn r9, r5
+ str r9, [rp], #4
+ tst r12, #2
+ beq L(b00)
+L(bx0): ldrd r4, r5, [up, #0]
+ sub rp, rp, #8
+ b L(lo)
+L(b00): tst n, n
+ beq L(wd1)
+L(b00a):ldrd r4, r5, [up], #-8
+ sub rp, rp, #16
+ b L(mid)
+')
+ifelse(FEEDIN_VARIANT,1,`
+ and r12, n, #3
+ mov n, n, lsr #2
+ tst r12, #1
+ beq L(bx0)
+ ldr r5, [up], #4
+ mvn r9, r5
+ str r9, [rp], #4
+L(bx0): tst r12, #2
+ beq L(b00)
+ ldrd r4, r5, [up, #0]
+ sub rp, rp, #8
+ b L(lo)
+L(b00): tst n, n
+ beq L(wd1)
+ ldrd r4, r5, [up], #-8
+ sub rp, rp, #16
+ b L(mid)
+')
+ifelse(FEEDIN_VARIANT,2,`
+ ands r12, n, #3
+ mov n, n, lsr #2
+ beq L(b00)
+ cmp r12, #2
+ bcc L(b01)
+ beq L(b10)
+
+L(b11): ldr r5, [up], #4
+ mvn r9, r5
+ ldrd r4, r5, [up, #0]
+ str r9, [rp], #-4
+ b L(lo)
+
+L(b00): ldrd r4, r5, [up], #-8
+ sub rp, rp, #16
+ b L(mid)
+
+L(b01): ldr r5, [up], #-4
+ mvn r9, r5
+ str r9, [rp], #-12
+ tst n, n
+ beq L(wd1)
+L(gt1): ldrd r4, r5, [up, #8]
+ b L(mid)
+
+L(b10): ldrd r4, r5, [up]
+ sub rp, rp, #8
+ b L(lo)
+')
+ ALIGN(16)
+ifelse(UNROLL,4,`
+L(top): ldrd r4, r5, [up, #8]
+ strd r8, r9, [rp, #8]
+L(mid): mvn r8, r4
+ mvn r9, r5
+ ldrd r4, r5, [up, #16]!
+ strd r8, r9, [rp, #16]!
+ sub n, n, #1
+L(lo): mvn r8, r4
+ mvn r9, r5
+ tst n, n
+ bne L(top)
+')
+ifelse(UNROLL,4x2,`
+L(top): ldrd r4, r5, [up, #8]
+ strd r8, r9, [rp, #8]
+L(mid): mvn r8, r4
+ mvn r9, r5
+ ldrd r4, r5, [up, #16]
+ strd r8, r9, [rp, #16]
+ mvn r8, r4
+ mvn r9, r5
+ sub n, n, #2
+ tst n, n
+ bmi L(dne)
+ ldrd r4, r5, [up, #24]
+ strd r8, r9, [rp, #24]
+ mvn r8, r4
+ mvn r9, r5
+ ldrd r4, r5, [up, #32]!
+ strd r8, r9, [rp, #32]!
+L(lo): mvn r8, r4
+ mvn r9, r5
+ tst n, n
+ bne L(top)
+')
+
+L(end): strd r8, r9, [rp, #8]
+L(wd1): pop { r4-r5,r8-r9 }
+ bx r14
+ifelse(UNROLL,4x2,`
+L(dne): strd r8, r9, [rp, #24]
+ pop { r4-r5,r8-r9 }
+ bx r14
+')
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/gmp-mparam.h b/gmp-6.3.0/mpn/arm/v7a/cora15/gmp-mparam.h
new file mode 100644
index 0000000..409cbbb
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/gmp-mparam.h
@@ -0,0 +1,212 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 2000 MHz Cortex-A15 with Neon (in spite of file position) */
+/* FFT tuning limit = 50,736,668 */
+/* Generated by tuneup.c, 2019-10-22, gcc 5.4 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1N_PI1_METHOD 1 /* 49.14% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 17
+
+#define DIV_1_VS_MUL_1_PERCENT 267
+
+#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM33_THRESHOLD 114
+#define MUL_TOOM44_THRESHOLD 178
+#define MUL_TOOM6H_THRESHOLD 238
+#define MUL_TOOM8H_THRESHOLD 597
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 113
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 115
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 115
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 154
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 38
+#define SQR_TOOM3_THRESHOLD 126
+#define SQR_TOOM4_THRESHOLD 336
+#define SQR_TOOM6_THRESHOLD 446
+#define SQR_TOOM8_THRESHOLD 650
+
+#define MULMID_TOOM42_THRESHOLD 52
+
+#define MULMOD_BNM1_THRESHOLD 23
+#define SQRMOD_BNM1_THRESHOLD 17
+
+#define MUL_FFT_MODF_THRESHOLD 575 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 575, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 27, 7}, { 15, 6}, { 31, 7}, { 19, 6}, \
+ { 39, 7}, { 25, 6}, { 51, 7}, { 27, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 41, 8}, \
+ { 23, 7}, { 51, 8}, { 27, 9}, { 15, 8}, \
+ { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
+ { 55,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
+ { 55,10}, { 31, 9}, { 71, 8}, { 143, 9}, \
+ { 87,10}, { 47, 9}, { 111,11}, { 31,10}, \
+ { 63, 9}, { 143,10}, { 79, 9}, { 159,10}, \
+ { 95,11}, { 63,10}, { 143, 9}, { 287,10}, \
+ { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,10}, { 287,11}, { 159,10}, { 335, 9}, \
+ { 671,10}, { 367, 9}, { 735,11}, { 191,10}, \
+ { 383, 9}, { 799,10}, { 415,11}, { 223,12}, \
+ { 127,10}, { 543,11}, { 287,10}, { 575,11}, \
+ { 319,10}, { 639,11}, { 351,10}, { 703,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,11}, { 447,13}, { 127,12}, { 255,11}, \
+ { 543,10}, { 1087,11}, { 575,12}, { 319,11}, \
+ { 671,10}, { 1343,11}, { 735,12}, { 383,11}, \
+ { 831,12}, { 447,11}, { 959,13}, { 255,12}, \
+ { 511,11}, { 1087,12}, { 575,11}, { 1151,12}, \
+ { 639,11}, { 1343,12}, { 703,13}, { 383,12}, \
+ { 767,11}, { 1599,12}, { 895,14}, { 255,13}, \
+ { 511,12}, { 1087,13}, { 639,12}, { 1407,13}, \
+ { 767,12}, { 1599,13}, { 895,14}, { 511,13}, \
+ { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \
+ { 1279,12}, { 2559,13}, { 1407,14}, { 767,13}, \
+ { 1535,12}, { 3135,13}, { 1663,15}, { 511,14}, \
+ { 1023,13}, { 2303,14}, { 1279,13}, { 2559,12}, \
+ { 5119,13}, { 2687,14}, { 1535,13}, { 3071,12}, \
+ { 6143,13}, { 3199,12}, { 6399,14}, { 1791,15}, \
+ { 1023,14}, { 2047,13}, { 4095,14}, { 2303,13}, \
+ { 4607,12}, { 9215,13}, { 4863,12}, { 9727,14}, \
+ { 2559,13}, { 5119,15}, { 1535,14}, { 3071,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 155
+#define MUL_FFT_THRESHOLD 5760
+
+#define SQR_FFT_MODF_THRESHOLD 525 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 525, 5}, { 25, 6}, { 27, 7}, { 15, 6}, \
+ { 31, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
+ { 39, 7}, { 25, 6}, { 51, 7}, { 27, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 41, 8}, \
+ { 23, 7}, { 51, 8}, { 27, 7}, { 55, 9}, \
+ { 15, 8}, { 31, 7}, { 63, 8}, { 39, 9}, \
+ { 23, 8}, { 51,10}, { 15, 9}, { 31, 8}, \
+ { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
+ { 99, 9}, { 55,10}, { 31, 9}, { 79,10}, \
+ { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 95,11}, { 63,10}, { 143, 9}, \
+ { 287, 8}, { 575, 9}, { 303,10}, { 159,11}, \
+ { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,10}, { 287,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \
+ { 191,10}, { 399, 9}, { 799,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447,12}, { 127,10}, \
+ { 543,11}, { 287,10}, { 575,11}, { 319,10}, \
+ { 639,11}, { 351,10}, { 703,12}, { 191,11}, \
+ { 383,10}, { 799,11}, { 415,10}, { 831,11}, \
+ { 447,13}, { 127,11}, { 543,10}, { 1087,11}, \
+ { 607,12}, { 319,11}, { 735,12}, { 383,11}, \
+ { 831,12}, { 447,11}, { 959,12}, { 511,11}, \
+ { 1023,12}, { 575,11}, { 1151,12}, { 639,11}, \
+ { 1279,12}, { 703,13}, { 383,12}, { 767,11}, \
+ { 1535,12}, { 831,11}, { 1663,12}, { 895,14}, \
+ { 255,13}, { 511,12}, { 1087,13}, { 639,12}, \
+ { 1343,13}, { 767,12}, { 1599,13}, { 895,14}, \
+ { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \
+ { 2303,13}, { 1279,14}, { 767,13}, { 1535,12}, \
+ { 3135,13}, { 1663,15}, { 511,14}, { 1023,13}, \
+ { 2047,12}, { 4095,13}, { 2303,14}, { 1279,13}, \
+ { 2559,12}, { 5119,14}, { 1535,13}, { 3071,12}, \
+ { 6143,13}, { 3199,12}, { 6399,14}, { 1791,15}, \
+ { 1023,14}, { 2047,13}, { 4095,14}, { 2303,13}, \
+ { 4607,12}, { 9215,13}, { 4863,12}, { 9727,14}, \
+ { 2559,15}, { 1535,14}, { 3071,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 154
+#define SQR_FFT_THRESHOLD 5312
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 38
+#define MULLO_MUL_N_THRESHOLD 10950
+#define SQRLO_BASECASE_THRESHOLD 10
+#define SQRLO_DC_THRESHOLD 35
+#define SQRLO_SQR_THRESHOLD 10323
+
+#define DC_DIV_QR_THRESHOLD 57
+#define DC_DIVAPPR_Q_THRESHOLD 254
+#define DC_BDIV_QR_THRESHOLD 48
+#define DC_BDIV_Q_THRESHOLD 286
+
+#define INV_MULMOD_BNM1_THRESHOLD 55
+#define INV_NEWTON_THRESHOLD 252
+#define INV_APPR_THRESHOLD 252
+
+#define BINV_NEWTON_THRESHOLD 372
+#define REDC_1_TO_REDC_2_THRESHOLD 61
+#define REDC_2_TO_REDC_N_THRESHOLD 0 /* always */
+
+#define MU_DIV_QR_THRESHOLD 1858
+#define MU_DIVAPPR_Q_THRESHOLD 1787
+#define MUPI_DIV_QR_THRESHOLD 122
+#define MU_BDIV_QR_THRESHOLD 1528
+#define MU_BDIV_Q_THRESHOLD 1836
+
+#define POWM_SEC_TABLE 1,14,200,480,1532
+
+#define GET_STR_DC_THRESHOLD 16
+#define GET_STR_PRECOMPUTE_THRESHOLD 33
+#define SET_STR_DC_THRESHOLD 104
+#define SET_STR_PRECOMPUTE_THRESHOLD 1120
+
+#define FAC_DSC_THRESHOLD 164
+#define FAC_ODD_THRESHOLD 27
+
+#define MATRIX22_STRASSEN_THRESHOLD 19
+#define HGCD2_DIV1_METHOD 1 /* 3.70% faster than 3 */
+#define HGCD_THRESHOLD 137
+#define HGCD_APPR_THRESHOLD 157
+#define HGCD_REDUCE_THRESHOLD 3389
+#define GCD_DC_THRESHOLD 610
+#define GCDEXT_DC_THRESHOLD 443
+#define JACOBI_BASE_METHOD 4 /* 12.66% faster than 1 */
+
+/* Tuneup completed successfully, took 69757 seconds */
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/logops_n.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/logops_n.asm
new file mode 100644
index 0000000..0602614
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/logops_n.asm
@@ -0,0 +1,253 @@
+dnl ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc, optimised for A15.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb cycles/limb
+C and andn ior xor nand iorn nior xnor
+C StrongARM ? ?
+C XScale ? ?
+C Cortex-A7 ? ?
+C Cortex-A8 ? ?
+C Cortex-A9 3.5 3.56
+C Cortex-A15 1.27 1.64
+
+C This is great A15 core register code, but it is a bit large.
+C We use FEEDIN_VARIANT 1 to save some space, but use 8-way unrolling.
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te ldrd strd
+C v6 -
+C v6t2 -
+C v7a -
+
+define(`FEEDIN_VARIANT', 1) C alternatives: 0 1 2
+define(`UNROLL', 4x2) C alternatives: 4 4x2
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+define(`POSTOP')
+
+ifdef(`OPERATION_and_n',`
+ define(`func', `mpn_and_n')
+ define(`LOGOP', `and $1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+ define(`func', `mpn_andn_n')
+ define(`LOGOP', `bic $1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+ define(`func', `mpn_nand_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `and $1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+ define(`func', `mpn_ior_n')
+ define(`LOGOP', `orr $1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+ define(`func', `mpn_iorn_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `bic $1, $3, $2')')
+ifdef(`OPERATION_nior_n',`
+ define(`func', `mpn_nior_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `orr $1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+ define(`func', `mpn_xor_n')
+ define(`LOGOP', `eor $1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+ define(`func', `mpn_xnor_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `eor $1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+ push { r4-r9 }
+
+ifelse(FEEDIN_VARIANT,0,`
+ ands r6, n, #3
+ mov n, n, lsr #2
+ beq L(b00a)
+ tst r6, #1
+ beq L(bx0)
+ ldr r5, [up], #4
+ ldr r7, [vp], #4
+ LOGOP( r9, r5, r7)
+ POSTOP( r9)
+ str r9, [rp], #4
+ tst r6, #2
+ beq L(b00)
+L(bx0): ldrd r4, r5, [up, #0]
+ ldrd r6, r7, [vp, #0]
+ sub rp, rp, #8
+ b L(lo)
+L(b00): tst n, n
+ beq L(wd1)
+L(b00a):ldrd r4, r5, [up], #-8
+ ldrd r6, r7, [vp], #-8
+ sub rp, rp, #16
+ b L(mid)
+')
+ifelse(FEEDIN_VARIANT,1,`
+ and r6, n, #3
+ mov n, n, lsr #2
+ tst r6, #1
+ beq L(bx0)
+ ldr r5, [up], #4
+ ldr r7, [vp], #4
+ LOGOP( r9, r5, r7)
+ POSTOP( r9)
+ str r9, [rp], #4
+L(bx0): tst r6, #2
+ beq L(b00)
+ ldrd r4, r5, [up, #0]
+ ldrd r6, r7, [vp, #0]
+ sub rp, rp, #8
+ b L(lo)
+L(b00): tst n, n
+ beq L(wd1)
+ ldrd r4, r5, [up], #-8
+ ldrd r6, r7, [vp], #-8
+ sub rp, rp, #16
+ b L(mid)
+')
+ifelse(FEEDIN_VARIANT,2,`
+ ands r6, n, #3
+ mov n, n, lsr #2
+ beq L(b00)
+ cmp r6, #2
+ bcc L(b01)
+ beq L(b10)
+
+L(b11): ldr r5, [up], #4
+ ldr r7, [vp], #4
+ LOGOP( r9, r5, r7)
+ ldrd r4, r5, [up, #0]
+ ldrd r6, r7, [vp, #0]
+ POSTOP( r9)
+ str r9, [rp], #-4
+ b L(lo)
+
+L(b00): ldrd r4, r5, [up], #-8
+ ldrd r6, r7, [vp], #-8
+ sub rp, rp, #16
+ b L(mid)
+
+L(b01): ldr r5, [up], #-4
+ ldr r7, [vp], #-4
+ LOGOP( r9, r5, r7)
+ POSTOP( r9)
+ str r9, [rp], #-12
+ tst n, n
+ beq L(wd1)
+L(gt1): ldrd r4, r5, [up, #8]
+ ldrd r6, r7, [vp, #8]
+ b L(mid)
+
+L(b10): ldrd r4, r5, [up]
+ ldrd r6, r7, [vp]
+ sub rp, rp, #8
+ b L(lo)
+')
+ ALIGN(16)
+ifelse(UNROLL,4,`
+L(top): ldrd r4, r5, [up, #8]
+ ldrd r6, r7, [vp, #8]
+ POSTOP( r8)
+ POSTOP( r9)
+ strd r8, r9, [rp, #8]
+L(mid): LOGOP( r8, r4, r6)
+ LOGOP( r9, r5, r7)
+ ldrd r4, r5, [up, #16]!
+ ldrd r6, r7, [vp, #16]!
+ POSTOP( r8)
+ POSTOP( r9)
+ strd r8, r9, [rp, #16]!
+ sub n, n, #1
+L(lo): LOGOP( r8, r4, r6)
+ LOGOP( r9, r5, r7)
+ tst n, n
+ bne L(top)
+')
+ifelse(UNROLL,4x2,`
+L(top): ldrd r4, r5, [up, #8]
+ ldrd r6, r7, [vp, #8]
+ POSTOP( r8)
+ POSTOP( r9)
+ strd r8, r9, [rp, #8]
+L(mid): LOGOP( r8, r4, r6)
+ LOGOP( r9, r5, r7)
+ ldrd r4, r5, [up, #16]
+ ldrd r6, r7, [vp, #16]
+ POSTOP( r8)
+ POSTOP( r9)
+ strd r8, r9, [rp, #16]
+ LOGOP( r8, r4, r6)
+ LOGOP( r9, r5, r7)
+ sub n, n, #2
+ tst n, n
+ bmi L(dne)
+ ldrd r4, r5, [up, #24]
+ ldrd r6, r7, [vp, #24]
+ POSTOP( r8)
+ POSTOP( r9)
+ strd r8, r9, [rp, #24]
+ LOGOP( r8, r4, r6)
+ LOGOP( r9, r5, r7)
+ ldrd r4, r5, [up, #32]!
+ ldrd r6, r7, [vp, #32]!
+ POSTOP( r8)
+ POSTOP( r9)
+ strd r8, r9, [rp, #32]!
+L(lo): LOGOP( r8, r4, r6)
+ LOGOP( r9, r5, r7)
+ tst n, n
+ bne L(top)
+')
+
+L(end): POSTOP( r8)
+ POSTOP( r9)
+ strd r8, r9, [rp, #8]
+L(wd1): pop { r4-r9 }
+ bx r14
+ifelse(UNROLL,4x2,`
+L(dne): POSTOP( r8)
+ POSTOP( r9)
+ strd r8, r9, [rp, #24]
+ pop { r4-r9 }
+ bx r14
+')
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/mul_1.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/mul_1.asm
new file mode 100644
index 0000000..766ba5c
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/mul_1.asm
@@ -0,0 +1,104 @@
+dnl ARM mpn_mul_1 optimised for A15.
+
+dnl Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb best
+C StrongARM: -
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 5.25 3.25
+C Cortex-A15 2.25 this
+
+
+C This runs well on A15 but very poorly on A9. By scheduling loads and adds
+C it is possible to get good A9 performance as well, but at the cost of using
+C many more (callee-saves) registers.
+
+C This is armv5 code, optimized for the armv7a cpu A15. Its location in the
+C GMP file structure might be misleading.
+
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`v0', `r3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1c)
+ ldr r12, [sp]
+ b L(ent)
+EPILOGUE()
+PROLOGUE(mpn_mul_1)
+ mov r12, #0
+L(ent): push {r4-r7}
+
+ ldr r6, [up], #4
+ tst n, #1
+ beq L(bx0)
+
+L(bx1): umull r4, r7, r6, v0
+ adds r4, r4, r12
+ tst n, #2
+ beq L(lo1)
+ b L(lo3)
+
+L(bx0): umull r4, r5, r6, v0
+ adds r4, r4, r12
+ tst n, #2
+ beq L(lo0)
+ b L(lo2)
+
+L(top): ldr r6, [up], #4
+ str r4, [rp], #4
+ umull r4, r5, r6, v0
+ adds r4, r4, r7
+L(lo0): ldr r6, [up], #4
+ str r4, [rp], #4
+ umull r4, r7, r6, v0
+ adcs r4, r4, r5
+L(lo3): ldr r6, [up], #4
+ str r4, [rp], #4
+ umull r4, r5, r6, v0
+ adcs r4, r4, r7
+L(lo2): ldr r6, [up], #4
+ str r4, [rp], #4
+ umull r4, r7, r6, v0
+ adcs r4, r4, r5
+L(lo1): adc r7, r7, #0
+ subs n, n, #4
+ bgt L(top)
+
+ str r4, [rp]
+ mov r0, r7
+ pop {r4-r7}
+ bx lr
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm
new file mode 100644
index 0000000..d8cfe3f
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm
@@ -0,0 +1,43 @@
+dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+
+ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
+
+include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm
new file mode 100644
index 0000000..b48204d
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm
@@ -0,0 +1,43 @@
+dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+
+ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
+ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
+ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
+
+include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
new file mode 100644
index 0000000..51f93c1
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
@@ -0,0 +1,144 @@
+dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+
+C cycles/limb
+C StrongARM -
+C XScale -
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 5.25
+C Cortex-A15 2.25
+
+C TODO
+C * Consider using 4-way feed-in code.
+C * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
+C insufficiently for A7 and A8.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+ifdef(`DO_add', `
+ define(`ADCSBCS', `adcs $1, $2, $3')
+ define(`CLRCY', `cmn r13, #1')
+ define(`RETVAL', `adc r0, $1, #0')
+ define(`func', mpn_addlsh`'LSH`'_n)')
+ifdef(`DO_sub', `
+ define(`ADCSBCS', `sbcs $1, $2, $3')
+ define(`CLRCY', `cmp r13, #0')
+ define(`RETVAL', `sbc $2, $2, $2
+ cmn $2, #1
+ adc r0, $1, #0')
+ define(`func', mpn_sublsh`'LSH`'_n)')
+ifdef(`DO_rsb', `
+ define(`ADCSBCS', `sbcs $1, $3, $2')
+ define(`CLRCY', `cmp r13, #0')
+ define(`RETVAL', `sbc r0, $1, #0')
+ define(`func', mpn_rsblsh`'LSH`'_n)')
+
+
+ASM_START()
+PROLOGUE(func)
+ push {r4-r10}
+ vmov.i8 d0, #0 C could feed carry through here
+ CLRCY
+ tst n, #1
+ beq L(bb0)
+
+L(bb1): vld1.32 {d3[0]}, [vp]!
+ vsli.u32 d0, d3, #LSH
+ ldr r12, [up], #4
+ vmov.32 r5, d0[0]
+ vshr.u32 d0, d3, #32-LSH
+ ADCSBCS( r12, r12, r5)
+ str r12, [rp], #4
+ bics n, n, #1
+ beq L(rtn)
+
+L(bb0): tst n, #2
+ beq L(b00)
+
+L(b10): vld1.32 {d3}, [vp]!
+ vsli.u64 d0, d3, #LSH
+ ldmia up!, {r10,r12}
+ vmov r4, r5, d0
+ vshr.u64 d0, d3, #64-LSH
+ ADCSBCS( r10, r10, r4)
+ ADCSBCS( r12, r12, r5)
+ stmia rp!, {r10,r12}
+ bics n, n, #2
+ beq L(rtn)
+
+L(b00): vld1.32 {d2}, [vp]!
+ vsli.u64 d0, d2, #LSH
+ vshr.u64 d1, d2, #64-LSH
+ vld1.32 {d3}, [vp]!
+ vsli.u64 d1, d3, #LSH
+ vmov r6, r7, d0
+ vshr.u64 d0, d3, #64-LSH
+ sub n, n, #4
+ tst n, n
+ beq L(end)
+
+ ALIGN(16)
+L(top): ldmia up!, {r8,r9,r10,r12}
+ vld1.32 {d2}, [vp]!
+ vsli.u64 d0, d2, #LSH
+ vmov r4, r5, d1
+ vshr.u64 d1, d2, #64-LSH
+ ADCSBCS( r8, r8, r6)
+ ADCSBCS( r9, r9, r7)
+ vld1.32 {d3}, [vp]!
+ vsli.u64 d1, d3, #LSH
+ vmov r6, r7, d0
+ vshr.u64 d0, d3, #64-LSH
+ ADCSBCS( r10, r10, r4)
+ ADCSBCS( r12, r12, r5)
+ stmia rp!, {r8,r9,r10,r12}
+ sub n, n, #4
+ tst n, n
+ bne L(top)
+
+L(end): ldmia up!, {r8,r9,r10,r12}
+ vmov r4, r5, d1
+ ADCSBCS( r8, r8, r6)
+ ADCSBCS( r9, r9, r7)
+ ADCSBCS( r10, r10, r4)
+ ADCSBCS( r12, r12, r5)
+ stmia rp!, {r8,r9,r10,r12}
+L(rtn): vmov.32 r0, d0[0]
+ RETVAL( r0, r1)
+ pop {r4-r10}
+ bx r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/neon/com.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/com.asm
new file mode 100644
index 0000000..9e7a629
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/com.asm
@@ -0,0 +1,97 @@
+dnl ARM Neon mpn_com optimised for A15.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 2.1
+C Cortex-A15 0.65
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+ cmp n, #7
+ ble L(bc)
+
+C Perform a few initial operation until rp is 128-bit aligned
+ tst rp, #4
+ beq L(al1)
+ vld1.32 {d0[0]}, [up]!
+ sub n, n, #1
+ vmvn d0, d0
+ vst1.32 {d0[0]}, [rp]!
+L(al1): tst rp, #8
+ beq L(al2)
+ vld1.32 {d0}, [up]!
+ sub n, n, #2
+ vmvn d0, d0
+ vst1.32 {d0}, [rp:64]!
+L(al2): vld1.32 {q2}, [up]!
+ subs n, n, #12
+ blt L(end)
+
+ ALIGN(16)
+L(top): vld1.32 {q0}, [up]!
+ vmvn q2, q2
+ subs n, n, #8
+ vst1.32 {q2}, [rp:128]!
+ vld1.32 {q2}, [up]!
+ vmvn q0, q0
+ vst1.32 {q0}, [rp:128]!
+ bge L(top)
+
+L(end): vmvn q2, q2
+ vst1.32 {q2}, [rp:128]!
+
+C Handle last 0-7 limbs. Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc): tst n, #4
+ beq L(tl1)
+ vld1.32 {q0}, [up]!
+ vmvn q0, q0
+ vst1.32 {q0}, [rp]!
+L(tl1): tst n, #2
+ beq L(tl2)
+ vld1.32 {d0}, [up]!
+ vmvn d0, d0
+ vst1.32 {d0}, [rp]!
+L(tl2): tst n, #1
+ beq L(tl3)
+ vld1.32 {d0[0]}, [up]
+ vmvn d0, d0
+ vst1.32 {d0[0]}, [rp]
+L(tl3): bx lr
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyd.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyd.asm
new file mode 100644
index 0000000..98fe535
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyd.asm
@@ -0,0 +1,110 @@
+dnl ARM Neon mpn_copyd optimised for A15.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM -
+C XScale -
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 1.75 slower than core register code
+C Cortex-A15 0.52
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+ add rp, rp, n, lsl #2
+ add up, up, n, lsl #2
+
+ cmp n, #7
+ ble L(bc)
+
+C Copy until rp is 128-bit aligned
+ tst rp, #4
+ beq L(al1)
+ sub up, up, #4
+ vld1.32 {d22[0]}, [up]
+ sub n, n, #1
+ sub rp, rp, #4
+ vst1.32 {d22[0]}, [rp]
+L(al1): tst rp, #8
+ beq L(al2)
+ sub up, up, #8
+ vld1.32 {d22}, [up]
+ sub n, n, #2
+ sub rp, rp, #8
+ vst1.32 {d22}, [rp:64]
+L(al2): sub up, up, #16
+ vld1.32 {d26-d27}, [up]
+ subs n, n, #12
+ sub rp, rp, #16 C offset rp for loop
+ blt L(end)
+
+ sub up, up, #16 C offset up for loop
+ mov r12, #-16
+
+ ALIGN(16)
+L(top): vld1.32 {d22-d23}, [up], r12
+ vst1.32 {d26-d27}, [rp:128], r12
+ vld1.32 {d26-d27}, [up], r12
+ vst1.32 {d22-d23}, [rp:128], r12
+ subs n, n, #8
+ bge L(top)
+
+ add up, up, #16 C undo up offset
+ C rp offset undoing folded
+L(end): vst1.32 {d26-d27}, [rp:128]
+
+C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc): tst n, #4
+ beq L(tl1)
+ sub up, up, #16
+ vld1.32 {d22-d23}, [up]
+ sub rp, rp, #16
+ vst1.32 {d22-d23}, [rp]
+L(tl1): tst n, #2
+ beq L(tl2)
+ sub up, up, #8
+ vld1.32 {d22}, [up]
+ sub rp, rp, #8
+ vst1.32 {d22}, [rp]
+L(tl2): tst n, #1
+ beq L(tl3)
+ sub up, up, #4
+ vld1.32 {d22[0]}, [up]
+ sub rp, rp, #4
+ vst1.32 {d22[0]}, [rp]
+L(tl3): bx lr
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm
new file mode 100644
index 0000000..2e05afe
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm
@@ -0,0 +1,90 @@
+dnl ARM Neon mpn_copyi optimised for A15.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM -
+C XScale -
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 1.75 slower than core register code
+C Cortex-A15 0.52
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+ cmp n, #7
+ ble L(bc)
+
+C Copy until rp is 128-bit aligned
+ tst rp, #4
+ beq L(al1)
+ vld1.32 {d22[0]}, [up]!
+ sub n, n, #1
+ vst1.32 {d22[0]}, [rp]!
+L(al1): tst rp, #8
+ beq L(al2)
+ vld1.32 {d22}, [up]!
+ sub n, n, #2
+ vst1.32 {d22}, [rp:64]!
+L(al2): vld1.32 {d26-d27}, [up]!
+ subs n, n, #12
+ blt L(end)
+
+ ALIGN(16)
+L(top): vld1.32 {d22-d23}, [up]!
+ vst1.32 {d26-d27}, [rp:128]!
+ vld1.32 {d26-d27}, [up]!
+ vst1.32 {d22-d23}, [rp:128]!
+ subs n, n, #8
+ bge L(top)
+
+L(end): vst1.32 {d26-d27}, [rp:128]!
+
+C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc): tst n, #4
+ beq L(tl1)
+ vld1.32 {d22-d23}, [up]!
+ vst1.32 {d22-d23}, [rp]!
+L(tl1): tst n, #2
+ beq L(tl2)
+ vld1.32 {d22}, [up]!
+ vst1.32 {d22}, [rp]!
+L(tl2): tst n, #1
+ beq L(tl3)
+ vld1.32 {d22[0]}, [up]
+ vst1.32 {d22[0]}, [rp]
+L(tl3): bx lr
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm
new file mode 100644
index 0000000..2c11d6d
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm
@@ -0,0 +1,177 @@
+dnl ARM Neon mpn_rsh1add_n, mpn_rsh1sub_n.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM -
+C XScale -
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 4-5
+C Cortex-A15 2.5
+
+C TODO
+C * Try to make this smaller, its size (384 bytes) is excessive.
+C * Try to reach 2.25 c/l on A15, to match the addlsh_1 family.
+C * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
+C insufficiently for A7 and A8.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+ifdef(`OPERATION_rsh1add_n', `
+ define(`ADDSUBS', `adds $1, $2, $3')
+ define(`ADCSBCS', `adcs $1, $2, $3')
+ define(`IFADD', `$1')
+ define(`IFSUB', `')
+ define(`func', mpn_rsh1add_n)')
+ifdef(`OPERATION_rsh1sub_n', `
+ define(`ADDSUBS', `subs $1, $2, $3')
+ define(`ADCSBCS', `sbcs $1, $2, $3')
+ define(`IFADD', `')
+ define(`IFSUB', `$1')
+ define(`func', mpn_rsh1sub_n)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ push {r4-r10}
+
+ ands r4, n, #3
+ beq L(b00)
+ cmp r4, #2
+ blo L(b01)
+ beq L(b10)
+
+L(b11): ldmia up!, {r9,r10,r12}
+ ldmia vp!, {r5,r6,r7}
+ ADDSUBS( r9, r9, r5)
+ vmov d4, r9, r9
+ ADCSBCS( r10, r10, r6)
+ ADCSBCS( r12, r12, r7)
+ vshr.u64 d3, d4, #1
+ vmov d1, r10, r12
+ vsli.u64 d3, d1, #31
+ vshr.u64 d2, d1, #1
+ vst1.32 d3[0], [rp]!
+ bics n, n, #3
+ beq L(wd2)
+L(gt3): ldmia up!, {r8,r9,r10,r12}
+ ldmia vp!, {r4,r5,r6,r7}
+ b L(mi0)
+
+L(b10): ldmia up!, {r10,r12}
+ ldmia vp!, {r6,r7}
+ ADDSUBS( r10, r10, r6)
+ ADCSBCS( r12, r12, r7)
+ vmov d4, r10, r12
+ bics n, n, #2
+ vshr.u64 d2, d4, #1
+ beq L(wd2)
+L(gt2): ldmia up!, {r8,r9,r10,r12}
+ ldmia vp!, {r4,r5,r6,r7}
+ b L(mi0)
+
+L(b01): ldr r12, [up], #4
+ ldr r7, [vp], #4
+ ADDSUBS( r12, r12, r7)
+ vmov d4, r12, r12
+ bics n, n, #1
+ bne L(gt1)
+ mov r5, r12, lsr #1
+IFADD(` adc r1, n, #0')
+IFSUB(` adc r1, n, #1')
+ bfi r5, r1, #31, #1
+ str r5, [rp]
+ and r0, r12, #1
+ pop {r4-r10}
+ bx r14
+L(gt1): ldmia up!, {r8,r9,r10,r12}
+ ldmia vp!, {r4,r5,r6,r7}
+ vshr.u64 d2, d4, #1
+ ADCSBCS( r8, r8, r4)
+ ADCSBCS( r9, r9, r5)
+ vmov d0, r8, r9
+ ADCSBCS( r10, r10, r6)
+ ADCSBCS( r12, r12, r7)
+ vsli.u64 d2, d0, #31
+ vshr.u64 d3, d0, #1
+ vst1.32 d2[0], [rp]!
+ b L(mi1)
+
+L(b00): ldmia up!, {r8,r9,r10,r12}
+ ldmia vp!, {r4,r5,r6,r7}
+ ADDSUBS( r8, r8, r4)
+ ADCSBCS( r9, r9, r5)
+ vmov d4, r8, r9
+ ADCSBCS( r10, r10, r6)
+ ADCSBCS( r12, r12, r7)
+ vshr.u64 d3, d4, #1
+ b L(mi1)
+
+ ALIGN(16)
+L(top): ldmia up!, {r8,r9,r10,r12}
+ ldmia vp!, {r4,r5,r6,r7}
+ vsli.u64 d3, d1, #63
+ vshr.u64 d2, d1, #1
+ vst1.32 d3, [rp]!
+L(mi0): ADCSBCS( r8, r8, r4)
+ ADCSBCS( r9, r9, r5)
+ vmov d0, r8, r9
+ ADCSBCS( r10, r10, r6)
+ ADCSBCS( r12, r12, r7)
+ vsli.u64 d2, d0, #63
+ vshr.u64 d3, d0, #1
+ vst1.32 d2, [rp]!
+L(mi1): vmov d1, r10, r12
+ sub n, n, #4
+ tst n, n
+ bne L(top)
+
+L(end): vsli.u64 d3, d1, #63
+ vshr.u64 d2, d1, #1
+ vst1.32 d3, [rp]!
+L(wd2): vmov r4, r5, d2
+IFADD(` adc r1, n, #0')
+IFSUB(` adc r1, n, #1')
+ bfi r5, r1, #31, #1
+ stm rp, {r4,r5}
+
+L(rtn): vmov.32 r0, d4[0]
+ and r0, r0, #1
+ pop {r4-r10}
+ bx r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/submul_1.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/submul_1.asm
new file mode 100644
index 0000000..ed7bfe8
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/submul_1.asm
@@ -0,0 +1,159 @@
+dnl ARM mpn_submul_1 optimised for A15.
+
+dnl Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb best
+C StrongARM: -
+C XScale ?
+C Cortex-A7 ?
+C Cortex-A8 ?
+C Cortex-A9 5.75 3.75
+C Cortex-A15 2.32 this
+
+C This code uses umlal and umaal for adding in the rp[] data, keeping the
+C recurrency path separate from any multiply instructions. It performs well on
+C A15, but not quite at the multiply bandwidth like the corresponding addmul_1
+C code.
+C
+C We don't use r12 due to ldrd and strd limitations.
+C
+C This loop complements U on the fly,
+C U' = B^n - 1 - U
+C and then uses that
+C R - U*v = R + U'*v + v - B^n v
+
+C Architecture requirements:
+C v5 -
+C v5t -
+C v5te ldrd strd
+C v6 umaal
+C v6t2 -
+C v7a -
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`v0', `r3')
+
+define(`w0', `r10') define(`w1', `r11')
+define(`u0', `r8') define(`u1', `r9')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ sub sp, sp, #32
+ strd r10, r11, [sp, #24]
+ strd r8, r9, [sp, #16]
+ strd r6, r7, [sp, #8]
+ strd r4, r5, [sp, #0]
+C push { r4-r11 }
+
+ ands r6, n, #3
+ sub n, n, #3
+ beq L(b00)
+ cmp r6, #2
+ bcc L(b01)
+ beq L(b10)
+
+L(b11): mov r6, #0
+ ldr u1, [up], #-4
+ ldr w1, [rp], #-16
+ mvn u1, u1
+ adds r7, v0, #0
+ b L(mid)
+
+L(b00): ldrd u0, u1, [up]
+ ldrd w0, w1, [rp], #-12
+ mvn u0, u0
+ mvn u1, u1
+ mov r6, v0
+ umaal w0, r6, u0, v0
+ cmn r13, #0 C carry clear
+ mov r7, #0
+ str w0, [rp, #12]
+ b L(mid)
+
+L(b10): ldrd u0, u1, [up], #8
+ ldrd w0, w1, [rp]
+ mvn u0, u0
+ mvn u1, u1
+ mov r4, v0
+ umaal w0, r4, u0, v0
+ mov r5, #0
+ str w0, [rp], #-4
+ umlal w1, r5, u1, v0
+ adds n, n, #0
+ bmi L(end)
+ b L(top)
+
+L(b01): ldr u1, [up], #4
+ ldr w1, [rp], #-8
+ mvn u1, u1
+ mov r5, v0
+ mov r4, #0
+ umaal w1, r5, u1, v0
+ tst n, n
+ bmi L(end)
+
+C ALIGN(16)
+L(top): ldrd u0, u1, [up, #0]
+ adcs r4, r4, w1
+ mvn u0, u0
+ ldrd w0, w1, [rp, #12]
+ mvn u1, u1
+ mov r6, #0
+ umlal w0, r6, u0, v0 C 1 2
+ adcs r5, r5, w0
+ mov r7, #0
+ strd r4, r5, [rp, #8]
+L(mid): umaal w1, r7, u1, v0 C 2 3
+ ldrd u0, u1, [up, #8]
+ add up, up, #16
+ adcs r6, r6, w1
+ mvn u0, u0
+ ldrd w0, w1, [rp, #20]
+ mvn u1, u1
+ mov r4, #0
+ umlal w0, r4, u0, v0 C 3 4
+ adcs r7, r7, w0
+ mov r5, #0
+ strd r6, r7, [rp, #16]!
+ sub n, n, #4
+ umlal w1, r5, u1, v0 C 0 1
+ tst n, n
+ bpl L(top)
+
+L(end): adcs r4, r4, w1
+ str r4, [rp, #8]
+ adc r0, r5, #0
+ sub r0, v0, r0
+ pop { r4-r11 }
+ bx r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora17/addmul_1.asm b/gmp-6.3.0/mpn/arm/v7a/cora17/addmul_1.asm
new file mode 100644
index 0000000..c11ed47
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora17/addmul_1.asm
@@ -0,0 +1,34 @@
+dnl ARM mpn_addmul_1
+
+dnl Copyright 2018 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_addmul_1)
+include_mpn(`arm/v6/addmul_1.asm')
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora17/gmp-mparam.h b/gmp-6.3.0/mpn/arm/v7a/cora17/gmp-mparam.h
new file mode 100644
index 0000000..143d4bc
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora17/gmp-mparam.h
@@ -0,0 +1,233 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1800 MHz Cortex-A17 with Neon (in spite of file position) */
+/* FFT tuning limit = 51243975 */
+/* Generated by tuneup.c, 2019-10-29, gcc 6.3 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1N_PI1_METHOD 1 /* 54.08% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 45
+
+#define DIV_1_VS_MUL_1_PERCENT 248
+
+#define MUL_TOOM22_THRESHOLD 38
+#define MUL_TOOM33_THRESHOLD 132
+#define MUL_TOOM44_THRESHOLD 200
+#define MUL_TOOM6H_THRESHOLD 303
+#define MUL_TOOM8H_THRESHOLD 478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 137
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 179
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 132
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 145
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 191
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 62
+#define SQR_TOOM3_THRESHOLD 189
+#define SQR_TOOM4_THRESHOLD 354
+#define SQR_TOOM6_THRESHOLD 426
+#define SQR_TOOM8_THRESHOLD 608
+
+#define MULMID_TOOM42_THRESHOLD 62
+
+#define MULMOD_BNM1_THRESHOLD 21
+#define SQRMOD_BNM1_THRESHOLD 29
+
+#define MUL_FFT_MODF_THRESHOLD 595 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 595, 5}, { 29, 6}, { 15, 5}, { 31, 6}, \
+ { 16, 5}, { 33, 6}, { 29, 7}, { 15, 6}, \
+ { 33, 7}, { 17, 6}, { 36, 7}, { 19, 6}, \
+ { 39, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \
+ { 27, 7}, { 55, 9}, { 15, 8}, { 31, 7}, \
+ { 63, 8}, { 43, 9}, { 23, 8}, { 55, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 83, 9}, \
+ { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \
+ { 79,10}, { 47, 9}, { 103,11}, { 31,10}, \
+ { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
+ { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \
+ { 143, 8}, { 575,10}, { 159,11}, { 95,10}, \
+ { 191, 9}, { 383, 8}, { 767, 9}, { 399, 8}, \
+ { 799,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511, 8}, { 1023, 9}, { 543, 8}, { 1087, 9}, \
+ { 575,10}, { 303,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 335, 9}, { 671,10}, { 351, 9}, \
+ { 703,10}, { 367, 9}, { 735,11}, { 191,10}, \
+ { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \
+ { 415, 9}, { 831,10}, { 431, 9}, { 863,11}, \
+ { 223,10}, { 447,12}, { 127,10}, { 511, 9}, \
+ { 1023,10}, { 543, 9}, { 1087,10}, { 607, 9}, \
+ { 1215,11}, { 319,10}, { 671, 9}, { 1343,11}, \
+ { 351,10}, { 735,12}, { 191,11}, { 383,10}, \
+ { 799,11}, { 415,10}, { 863,11}, { 447,10}, \
+ { 895,13}, { 127,11}, { 511,10}, { 1023,11}, \
+ { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
+ { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \
+ { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \
+ { 863,10}, { 1727,12}, { 447,11}, { 991,10}, \
+ { 1983,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1215,10}, { 2431,12}, { 639,11}, { 1343,12}, \
+ { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \
+ { 1599,12}, { 831,11}, { 1727,12}, { 959,11}, \
+ { 1983,13}, { 511,12}, { 1087,11}, { 2239,12}, \
+ { 1215,11}, { 2431,13}, { 639,12}, { 1471,11}, \
+ { 2943,13}, { 767,12}, { 1727,13}, { 895,12}, \
+ { 1983,14}, { 511,13}, { 1023,12}, { 2239,13}, \
+ { 1151,12}, { 2495,13}, { 1279,12}, { 2623,13}, \
+ { 1407,12}, { 2943,14}, { 767,13}, { 1535,12}, \
+ { 3135,13}, { 1663,12}, { 3455,13}, { 1919,12}, \
+ { 3839,15}, { 511,14}, { 1023,13}, { 2175,12}, \
+ { 4479,13}, { 2431,14}, { 1279,13}, { 2943,12}, \
+ { 5887,14}, { 1535,13}, { 3455,14}, { 1791,13}, \
+ { 3967,15}, { 1023,14}, { 2047,13}, { 4479,14}, \
+ { 2303,13}, { 4991,12}, { 9983,14}, { 2559,13}, \
+ { 5247,14}, { 2815,13}, { 5887,15}, { 1535,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 194
+#define MUL_FFT_THRESHOLD 6784
+
+#define SQR_FFT_MODF_THRESHOLD 500 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 500, 5}, { 29, 6}, { 15, 5}, { 31, 6}, \
+ { 16, 5}, { 33, 6}, { 29, 7}, { 15, 6}, \
+ { 32, 7}, { 17, 6}, { 36, 7}, { 19, 6}, \
+ { 39, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \
+ { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
+ { 43, 9}, { 23, 8}, { 55,10}, { 15, 9}, \
+ { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
+ { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 143, 9}, { 287,10}, \
+ { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \
+ { 383, 8}, { 767, 9}, { 399,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,10}, { 287, 9}, { 575,10}, { 303,11}, \
+ { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
+ { 671,10}, { 351, 9}, { 703,10}, { 367, 9}, \
+ { 735,11}, { 191,10}, { 383, 9}, { 767,10}, \
+ { 399, 9}, { 799,10}, { 415, 9}, { 831,10}, \
+ { 431, 9}, { 863,10}, { 447,12}, { 127,11}, \
+ { 255,10}, { 511, 9}, { 1023,10}, { 543, 9}, \
+ { 1087,11}, { 287,10}, { 607, 9}, { 1215,11}, \
+ { 319,10}, { 671,11}, { 351,10}, { 735,12}, \
+ { 191,11}, { 383,10}, { 799,11}, { 415,10}, \
+ { 863,11}, { 447,10}, { 895,13}, { 127,12}, \
+ { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \
+ { 1087,11}, { 607,10}, { 1215,12}, { 319,11}, \
+ { 671,10}, { 1343,11}, { 735,10}, { 1471,12}, \
+ { 383,11}, { 799,10}, { 1599,11}, { 863,12}, \
+ { 447,11}, { 959,10}, { 1919,11}, { 991,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1215,10}, { 2431,12}, { 639,11}, { 1343,12}, \
+ { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \
+ { 1599,12}, { 831,11}, { 1727,12}, { 959,11}, \
+ { 1919,14}, { 255,13}, { 511,12}, { 1087,11}, \
+ { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \
+ { 1471,11}, { 2943,13}, { 767,12}, { 1727,13}, \
+ { 895,12}, { 1983,14}, { 511,13}, { 1023,12}, \
+ { 2239,13}, { 1151,12}, { 2495,13}, { 1279,12}, \
+ { 2623,13}, { 1407,12}, { 2943,14}, { 767,13}, \
+ { 1535,12}, { 3071,13}, { 1663,12}, { 3455,13}, \
+ { 1919,12}, { 3839,15}, { 511,14}, { 1023,13}, \
+ { 2175,12}, { 4479,13}, { 2431,14}, { 1279,13}, \
+ { 2943,12}, { 5887,14}, { 1535,13}, { 3455,14}, \
+ { 1791,13}, { 3967,15}, { 1023,14}, { 2047,13}, \
+ { 4479,14}, { 2303,13}, { 4991,12}, { 9983,14}, \
+ { 2559,13}, { 5119,14}, { 2815,13}, { 5887,15}, \
+ { 1535,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 199
+#define SQR_FFT_THRESHOLD 4736
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 27
+#define MULLO_MUL_N_THRESHOLD 13463
+#define SQRLO_BASECASE_THRESHOLD 0 /* always */
+#define SQRLO_DC_THRESHOLD 26
+#define SQRLO_SQR_THRESHOLD 8907
+
+#define DC_DIV_QR_THRESHOLD 38
+#define DC_DIVAPPR_Q_THRESHOLD 103
+#define DC_BDIV_QR_THRESHOLD 44
+#define DC_BDIV_Q_THRESHOLD 98
+
+#define INV_MULMOD_BNM1_THRESHOLD 78
+#define INV_NEWTON_THRESHOLD 165
+#define INV_APPR_THRESHOLD 115
+
+#define BINV_NEWTON_THRESHOLD 296
+#define REDC_1_TO_REDC_2_THRESHOLD 2
+#define REDC_2_TO_REDC_N_THRESHOLD 147
+
+#define MU_DIV_QR_THRESHOLD 2089
+#define MU_DIVAPPR_Q_THRESHOLD 2089
+#define MUPI_DIV_QR_THRESHOLD 70
+#define MU_BDIV_QR_THRESHOLD 1718
+#define MU_BDIV_Q_THRESHOLD 2089
+
+#define POWM_SEC_TABLE 7,19,107,480,1486
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 29
+#define SET_STR_DC_THRESHOLD 126
+#define SET_STR_PRECOMPUTE_THRESHOLD 541
+
+#define FAC_DSC_THRESHOLD 132
+#define FAC_ODD_THRESHOLD 29
+
+#define MATRIX22_STRASSEN_THRESHOLD 30
+#define HGCD2_DIV1_METHOD 1 /* 6.55% faster than 3 */
+#define HGCD_THRESHOLD 54
+#define HGCD_APPR_THRESHOLD 52
+#define HGCD_REDUCE_THRESHOLD 3524
+#define GCD_DC_THRESHOLD 303
+#define GCDEXT_DC_THRESHOLD 225
+#define JACOBI_BASE_METHOD 4 /* 9.73% faster than 1 */
+
+/* Tuneup completed successfully, took 111418 seconds */
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora17/mod_34lsub1.asm b/gmp-6.3.0/mpn/arm/v7a/cora17/mod_34lsub1.asm
new file mode 100644
index 0000000..39e5a15
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora17/mod_34lsub1.asm
@@ -0,0 +1,121 @@
+dnl ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl Copyright 2012, 2013, 2018 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A5 2.67
+C Cortex-A7 2.37
+C Cortex-A8 2.34
+C Cortex-A9 ?
+C Cortex-A15 1.39
+C Cortex-A17 1.60
+C Cortex-A53 2.51
+
+define(`ap', r0)
+define(`n', r1)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C * Write cleverer summation code.
+C * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l.
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+ push { r4, r5, r6, r7 }
+
+ subs n, n, #3
+ mov r7, #0
+ blt L(le2) C n <= 2
+
+ ldmia ap!, { r2, r3, r12 }
+ subs n, n, #3
+ blt L(sum) C n <= 5
+ mov r7, #0
+ b L(mid)
+
+L(top): adds r2, r2, r4
+ adcs r3, r3, r5
+ adcs r12, r12, r6
+ adc r7, r7, #0
+L(mid): ldmia ap!, { r4, r5, r6 }
+ subs n, n, #3
+ bpl L(top)
+
+ adds r2, r2, r4
+ adcs r3, r3, r5
+ adcs r12, r12, r6
+ adc r7, r7, #0 C r7 <= 1
+
+L(sum): cmn n, #2
+ movlo r4, #0
+ ldrhs r4, [ap], #4
+ movls r5, #0
+ ldrhi r5, [ap], #4
+
+ adds r2, r2, r4
+ adcs r3, r3, r5
+ adcs r12, r12, #0
+ adc r7, r7, #0 C r7 <= 2
+
+L(sum2):
+ bic r0, r2, #0xff000000
+ add r0, r0, r2, lsr #24
+ add r0, r0, r7
+
+ mov r7, r3, lsl #8
+ bic r2, r7, #0xff000000
+ add r0, r0, r2
+ add r0, r0, r3, lsr #16
+
+ mov r2, r12, lsl #16
+ bic r1, r2, #0xff000000
+ add r0, r0, r1
+ add r0, r0, r12, lsr #8
+
+ pop { r4, r5, r6, r7 }
+ return lr
+
+L(le2): cmn n, #1
+ bne L(1)
+ ldmia ap!, { r2, r3 }
+ mov r12, #0
+ b L(sum2)
+L(1): ldr r2, [ap]
+ bic r0, r2, #0xff000000
+ add r0, r0, r2, lsr #24
+ pop { r4, r5, r6, r7 }
+ return lr
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora17/mul_1.asm b/gmp-6.3.0/mpn/arm/v7a/cora17/mul_1.asm
new file mode 100644
index 0000000..d9b6042
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora17/mul_1.asm
@@ -0,0 +1,34 @@
+dnl ARM mpn_mul_1
+
+dnl Copyright 2018 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mul_1)
+include_mpn(`arm/v6/mul_1.asm')
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora17/submul_1.asm b/gmp-6.3.0/mpn/arm/v7a/cora17/submul_1.asm
new file mode 100644
index 0000000..f3e8139
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora17/submul_1.asm
@@ -0,0 +1,34 @@
+dnl ARM mpn_submul_1
+
+dnl Copyright 2018 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_submul_1)
+include_mpn(`arm/v6/submul_1.asm')
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora5/gmp-mparam.h b/gmp-6.3.0/mpn/arm/v7a/cora5/gmp-mparam.h
new file mode 100644
index 0000000..e3564e0
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora5/gmp-mparam.h
@@ -0,0 +1,205 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1500 MHz Cortex-A5 (odroid c1) */
+/* FFT tuning limit = 18,235,562 */
+/* Generated by tuneup.c, 2019-10-22, gcc 4.9 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 23
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1N_PI1_METHOD 1 /* 132.79% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 52
+
+#define DIV_1_VS_MUL_1_PERCENT 213
+
+#define MUL_TOOM22_THRESHOLD 48
+#define MUL_TOOM33_THRESHOLD 143
+#define MUL_TOOM44_THRESHOLD 262
+#define MUL_TOOM6H_THRESHOLD 414
+#define MUL_TOOM8H_THRESHOLD 527
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 153
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 168
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 152
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 180
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 226
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 66
+#define SQR_TOOM3_THRESHOLD 149
+#define SQR_TOOM4_THRESHOLD 348
+#define SQR_TOOM6_THRESHOLD 517
+#define SQR_TOOM8_THRESHOLD 608
+
+#define MULMID_TOOM42_THRESHOLD 70
+
+#define MULMOD_BNM1_THRESHOLD 26
+#define SQRMOD_BNM1_THRESHOLD 28
+
+#define MUL_FFT_MODF_THRESHOLD 660 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 660, 5}, { 29, 6}, { 15, 5}, { 33, 6}, \
+ { 17, 5}, { 35, 6}, { 29, 7}, { 15, 6}, \
+ { 37, 7}, { 19, 6}, { 40, 7}, { 21, 6}, \
+ { 43, 7}, { 37, 8}, { 19, 7}, { 43, 8}, \
+ { 23, 7}, { 51, 8}, { 27, 7}, { 55, 8}, \
+ { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \
+ { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \
+ { 83, 9}, { 47, 8}, { 99, 9}, { 55,10}, \
+ { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \
+ { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 167,10}, { 95, 9}, \
+ { 191,10}, { 111,11}, { 63,10}, { 159,11}, \
+ { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271,11}, \
+ { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
+ { 671,11}, { 191,10}, { 383, 9}, { 767,10}, \
+ { 399, 9}, { 799,10}, { 415,11}, { 223,12}, \
+ { 127,11}, { 255,10}, { 511, 9}, { 1023,10}, \
+ { 543,11}, { 287,10}, { 607,11}, { 319,10}, \
+ { 671,11}, { 351,12}, { 191,11}, { 383,10}, \
+ { 799,11}, { 415,10}, { 831,13}, { 127,12}, \
+ { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \
+ { 1087,11}, { 575,10}, { 1151,11}, { 607,12}, \
+ { 319,11}, { 703,12}, { 383,11}, { 831,12}, \
+ { 447,11}, { 895,13}, { 255,12}, { 511,11}, \
+ { 1087,12}, { 575,11}, { 1183,12}, { 639,11}, \
+ { 1279,12}, { 703,13}, { 383,12}, { 767,11}, \
+ { 1535,12}, { 895,14}, { 255,13}, { 511,12}, \
+ { 1151,13}, { 639,12}, { 1407,13}, { 767,12}, \
+ { 1599,13}, { 895,12}, { 1791,14}, { 511,13}, \
+ { 1023,12}, { 2111,13}, { 1151,12}, { 2367,13}, \
+ { 1279,12}, { 2559,13}, { 1407,14}, { 767,13}, \
+ { 1535,12}, { 3071,13}, { 1663,12}, { 3327,13}, \
+ { 1791,15}, { 511,14}, { 1023,13}, { 2175,12}, \
+ { 4351,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 140
+#define MUL_FFT_THRESHOLD 7552
+
+#define SQR_FFT_MODF_THRESHOLD 590 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 590, 5}, { 33, 6}, { 17, 5}, { 35, 6}, \
+ { 36, 7}, { 19, 6}, { 40, 7}, { 21, 6}, \
+ { 43, 7}, { 23, 6}, { 47, 7}, { 37, 8}, \
+ { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \
+ { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
+ { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
+ { 67, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \
+ { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \
+ { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 167,10}, { 95, 9}, \
+ { 191,10}, { 111,11}, { 63,10}, { 159,11}, \
+ { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 335, 9}, { 671,10}, { 351,11}, { 191,10}, \
+ { 383, 9}, { 767,10}, { 415,12}, { 127,11}, \
+ { 255,10}, { 511, 9}, { 1023,10}, { 543, 9}, \
+ { 1087,11}, { 287,10}, { 575, 9}, { 1151,10}, \
+ { 607,11}, { 319,10}, { 671,11}, { 351,12}, \
+ { 191,11}, { 383,10}, { 799,11}, { 415,10}, \
+ { 831,13}, { 127,12}, { 255,11}, { 511,10}, \
+ { 1023,11}, { 543,10}, { 1087,11}, { 575,10}, \
+ { 1151,11}, { 607,12}, { 319,11}, { 735,12}, \
+ { 383,11}, { 831,12}, { 447,11}, { 927,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1151,12}, { 639,11}, { 1279,12}, { 703,13}, \
+ { 383,12}, { 767,11}, { 1535,12}, { 831,11}, \
+ { 1663,12}, { 895,11}, { 1791,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1023,11}, { 2047,12}, \
+ { 1151,13}, { 639,12}, { 1407,13}, { 767,12}, \
+ { 1599,13}, { 895,12}, { 1791,14}, { 511,13}, \
+ { 1023,12}, { 2111,13}, { 1151,12}, { 2367,13}, \
+ { 1279,12}, { 2559,13}, { 1407,14}, { 767,13}, \
+ { 1535,12}, { 3071,13}, { 1663,12}, { 3327,13}, \
+ { 1791,15}, { 511,14}, { 1023,13}, { 2175,12}, \
+ { 4351,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 144
+#define SQR_FFT_THRESHOLD 5760
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 39
+#define MULLO_MUL_N_THRESHOLD 14709
+#define SQRLO_BASECASE_THRESHOLD 8
+#define SQRLO_DC_THRESHOLD 33
+#define SQRLO_SQR_THRESHOLD 11278
+
+#define DC_DIV_QR_THRESHOLD 36
+#define DC_DIVAPPR_Q_THRESHOLD 116
+#define DC_BDIV_QR_THRESHOLD 48
+#define DC_BDIV_Q_THRESHOLD 140
+
+#define INV_MULMOD_BNM1_THRESHOLD 95
+#define INV_NEWTON_THRESHOLD 181
+#define INV_APPR_THRESHOLD 125
+
+#define BINV_NEWTON_THRESHOLD 327
+#define REDC_1_TO_REDC_2_THRESHOLD 0 /* always */
+#define REDC_2_TO_REDC_N_THRESHOLD 152
+
+#define MU_DIV_QR_THRESHOLD 2350
+#define MU_DIVAPPR_Q_THRESHOLD 2130
+#define MUPI_DIV_QR_THRESHOLD 98
+#define MU_BDIV_QR_THRESHOLD 1970
+#define MU_BDIV_Q_THRESHOLD 2172
+
+#define POWM_SEC_TABLE 6,37,108,624,2351
+
+#define GET_STR_DC_THRESHOLD 28
+#define GET_STR_PRECOMPUTE_THRESHOLD 44
+#define SET_STR_DC_THRESHOLD 309
+#define SET_STR_PRECOMPUTE_THRESHOLD 762
+
+#define FAC_DSC_THRESHOLD 236
+#define FAC_ODD_THRESHOLD 29
+
+#define MATRIX22_STRASSEN_THRESHOLD 25
+#define HGCD2_DIV1_METHOD 5 /* 2.92% faster than 3 */
+#define HGCD_THRESHOLD 70
+#define HGCD_APPR_THRESHOLD 59
+#define HGCD_REDUCE_THRESHOLD 4120
+#define GCD_DC_THRESHOLD 229
+#define GCDEXT_DC_THRESHOLD 233
+#define JACOBI_BASE_METHOD 1 /* 17.07% faster than 4 */
+
+/* Tuneup completed successfully, took 47845 seconds */
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora7/gmp-mparam.h b/gmp-6.3.0/mpn/arm/v7a/cora7/gmp-mparam.h
new file mode 100644
index 0000000..78de045
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora7/gmp-mparam.h
@@ -0,0 +1,202 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 900 MHz Cortex-A7 (raspberry pi2) */
+/* FFT tuning limit = 21,559,921 */
+/* Generated by tuneup.c, 2019-10-22, gcc 8.3 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 18
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1N_PI1_METHOD 1 /* 64.16% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 48
+
+#define DIV_1_VS_MUL_1_PERCENT 216
+
+#define MUL_TOOM22_THRESHOLD 39
+#define MUL_TOOM33_THRESHOLD 129
+#define MUL_TOOM44_THRESHOLD 196
+#define MUL_TOOM6H_THRESHOLD 327
+#define MUL_TOOM8H_THRESHOLD 478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 129
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 183
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 132
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 144
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 190
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 52
+#define SQR_TOOM3_THRESHOLD 162
+#define SQR_TOOM4_THRESHOLD 268
+#define SQR_TOOM6_THRESHOLD 399
+#define SQR_TOOM8_THRESHOLD 547
+
+#define MULMID_TOOM42_THRESHOLD 50
+
+#define MULMOD_BNM1_THRESHOLD 21
+#define SQRMOD_BNM1_THRESHOLD 25
+
+#define MUL_FFT_MODF_THRESHOLD 636 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 636, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \
+ { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
+ { 35, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \
+ { 15, 7}, { 35, 8}, { 19, 7}, { 43, 8}, \
+ { 23, 7}, { 49, 8}, { 27, 7}, { 55, 8}, \
+ { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \
+ { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
+ { 83, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
+ { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \
+ { 63,10}, { 159,11}, { 95,10}, { 191,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
+ { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \
+ { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \
+ { 799,10}, { 415,11}, { 223,12}, { 127,11}, \
+ { 255,10}, { 511, 9}, { 1023,10}, { 543,11}, \
+ { 287,10}, { 607,11}, { 319,10}, { 671,11}, \
+ { 351,12}, { 191,11}, { 383,10}, { 799,11}, \
+ { 415,10}, { 831,13}, { 127,12}, { 255,11}, \
+ { 511,10}, { 1023,11}, { 543,10}, { 1087,11}, \
+ { 607,12}, { 319,11}, { 735,12}, { 383,11}, \
+ { 863,12}, { 447,11}, { 959,13}, { 255,12}, \
+ { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \
+ { 639,11}, { 1279,12}, { 703,13}, { 383,12}, \
+ { 767,11}, { 1599,12}, { 959,14}, { 255,13}, \
+ { 511,12}, { 1215,13}, { 639,12}, { 1471,13}, \
+ { 767,12}, { 1663,13}, { 895,12}, { 1855,14}, \
+ { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \
+ { 2431,13}, { 1407,14}, { 767,13}, { 1663,12}, \
+ { 3327,13}, { 1791,15}, { 511,14}, { 1023,13}, \
+ { 2431,14}, { 1279,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 133
+#define MUL_FFT_THRESHOLD 6784
+
+#define SQR_FFT_MODF_THRESHOLD 535 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 535, 5}, { 25, 6}, { 13, 5}, { 28, 6}, \
+ { 15, 5}, { 31, 6}, { 29, 7}, { 15, 6}, \
+ { 33, 7}, { 17, 6}, { 36, 7}, { 19, 6}, \
+ { 39, 7}, { 29, 8}, { 15, 7}, { 37, 8}, \
+ { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \
+ { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
+ { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
+ { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
+ { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \
+ { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \
+ { 191,10}, { 111,11}, { 63,10}, { 143, 9}, \
+ { 287,10}, { 159,11}, { 95,10}, { 191,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
+ { 271, 9}, { 543,10}, { 287,11}, { 159,10}, \
+ { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \
+ { 351,11}, { 191,10}, { 383, 9}, { 767,10}, \
+ { 399, 9}, { 799,10}, { 415, 9}, { 831,11}, \
+ { 223,12}, { 127,10}, { 543,11}, { 287,10}, \
+ { 607,11}, { 319,10}, { 671,11}, { 351,10}, \
+ { 703,12}, { 191,11}, { 383,10}, { 799,11}, \
+ { 415,10}, { 831,13}, { 127,11}, { 511,10}, \
+ { 1023,11}, { 543,10}, { 1087,11}, { 607,12}, \
+ { 319,11}, { 735,12}, { 383,11}, { 863,12}, \
+ { 447,11}, { 991,12}, { 511,11}, { 1087,12}, \
+ { 575,11}, { 1215,12}, { 639,11}, { 1279,12}, \
+ { 703,13}, { 383,12}, { 767,11}, { 1535,12}, \
+ { 831,11}, { 1663,12}, { 959,13}, { 511,12}, \
+ { 1215,13}, { 639,12}, { 1471,13}, { 767,12}, \
+ { 1663,13}, { 895,12}, { 1855,14}, { 511,13}, \
+ { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \
+ { 1407,14}, { 767,13}, { 1791,15}, { 511,14}, \
+ { 1023,13}, { 2431,14}, { 1279,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 134
+#define SQR_FFT_THRESHOLD 4736
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 27
+#define MULLO_MUL_N_THRESHOLD 13463
+#define SQRLO_BASECASE_THRESHOLD 5
+#define SQRLO_DC_THRESHOLD 31
+#define SQRLO_SQR_THRESHOLD 9449
+
+#define DC_DIV_QR_THRESHOLD 28
+#define DC_DIVAPPR_Q_THRESHOLD 90
+#define DC_BDIV_QR_THRESHOLD 32
+#define DC_BDIV_Q_THRESHOLD 110
+
+#define INV_MULMOD_BNM1_THRESHOLD 78
+#define INV_NEWTON_THRESHOLD 134
+#define INV_APPR_THRESHOLD 98
+
+#define BINV_NEWTON_THRESHOLD 278
+#define REDC_1_TO_REDC_2_THRESHOLD 4
+#define REDC_2_TO_REDC_N_THRESHOLD 123
+
+#define MU_DIV_QR_THRESHOLD 1718
+#define MU_DIVAPPR_Q_THRESHOLD 1685
+#define MUPI_DIV_QR_THRESHOLD 62
+#define MU_BDIV_QR_THRESHOLD 1528
+#define MU_BDIV_Q_THRESHOLD 1718
+
+#define POWM_SEC_TABLE 1,22,95,563,1955
+
+#define GET_STR_DC_THRESHOLD 28
+#define GET_STR_PRECOMPUTE_THRESHOLD 51
+#define SET_STR_DC_THRESHOLD 182
+#define SET_STR_PRECOMPUTE_THRESHOLD 638
+
+#define FAC_DSC_THRESHOLD 153
+#define FAC_ODD_THRESHOLD 56
+
+#define MATRIX22_STRASSEN_THRESHOLD 25
+#define HGCD2_DIV1_METHOD 1 /* 5.04% faster than 3 */
+#define HGCD_THRESHOLD 55
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 3389
+#define GCD_DC_THRESHOLD 153
+#define GCDEXT_DC_THRESHOLD 180
+#define JACOBI_BASE_METHOD 1 /* 30.60% faster than 4 */
+
+/* Tuneup completed successfully, took 75202 seconds */
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora8/bdiv_q_1.asm b/gmp-6.3.0/mpn/arm/v7a/cora8/bdiv_q_1.asm
new file mode 100644
index 0000000..e74b260
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora8/bdiv_q_1.asm
@@ -0,0 +1,158 @@
+dnl ARM v6 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+dnl This is v6 code but it runs well on just the v7a Cortex-A8, A9, and A15.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C norm unorm
+C 1176 - -
+C Cortex-A5 9 13
+C Cortex-A7 12 18
+C Cortex-A8 13 14
+C Cortex-A9 9 10 not measured since latest edits
+C Cortex-A15 7 7
+C Cortex-A53 16 24
+
+C Architecture requirements:
+C v5 -
+C v5t clz
+C v5te -
+C v6 umaal
+C v6t2 -
+C v7a -
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`d', `r3')
+define(`di_arg', `sp[0]') C just mpn_pi1_bdiv_q_1
+define(`cnt_arg', `sp[4]') C just mpn_pi1_bdiv_q_1
+
+define(`cy', `r7')
+define(`cnt', `r6')
+define(`tnc', `r4')
+
+ASM_START()
+PROLOGUE(mpn_bdiv_q_1)
+ push {r6-r11}
+
+ rsb r10, d, #0
+ and r10, r10, d
+ clz r10, r10
+ rsbs cnt, r10, #31 C count_trailing_zeros
+ mov d, d, lsr cnt
+
+C binvert limb
+ LEA( r10, binvert_limb_table)
+ and r12, d, #254
+ ldrb r10, [r10, r12, lsr #1]
+ mul r12, r10, r10
+ mul r12, d, r12
+ rsb r12, r12, r10, lsl #1
+ mul r10, r12, r12
+ mul r10, d, r10
+ rsb r10, r10, r12, lsl #1 C r10 = inverse
+ b L(pi1)
+EPILOGUE()
+
+PROLOGUE(mpn_pi1_bdiv_q_1)
+ push {r6-r11}
+
+ ldr cnt, [sp, #28]
+ ldr r10, [sp, #24]
+ cmp cnt, #0
+
+L(pi1): ldr r11, [up], #4 C up[0]
+ mov cy, #0
+ rsb r8, r10, #0 C r8 = -inverse
+ bne L(unorm)
+
+L(norm):
+ subs n, n, #1
+ mul r11, r11, r10
+ beq L(edn)
+
+ ALIGN(16)
+L(tpn): ldr r9, [up], #4
+ mov r12, #0
+ str r11, [rp], #4
+ umaal r12, cy, r11, d
+ mul r11, r9, r10
+ mla r11, cy, r8, r11
+ subs n, n, #1
+ bne L(tpn)
+
+L(edn): str r11, [rp]
+ pop {r6-r11}
+ bx r14
+
+L(unorm):
+ push {r4-r5}
+ rsb tnc, cnt, #32
+ mov r5, r11, lsr cnt
+ subs n, n, #1
+ beq L(ed1)
+
+ ldr r12, [up], #4
+ orr r9, r5, r12, lsl tnc
+ mov r5, r12, lsr cnt
+ mul r11, r9, r10
+ subs n, n, #1
+ beq L(edu)
+
+ ALIGN(16)
+L(tpu): ldr r12, [up], #4
+ orr r9, r5, r12, lsl tnc
+ mov r5, r12, lsr cnt
+ mov r12, #0
+ str r11, [rp], #4
+ umaal r12, cy, r11, d
+ mul r11, r9, r10
+ mla r11, cy, r8, r11
+ subs n, n, #1
+ bne L(tpu)
+
+L(edu): str r11, [rp], #4
+ mov r12, #0
+ umaal r12, cy, r11, d
+ mul r11, r5, r10
+ mla r11, cy, r8, r11
+ str r11, [rp]
+ pop {r4-r11}
+ bx r14
+
+L(ed1): mul r11, r5, r10
+ str r11, [rp]
+ pop {r4-r11}
+ bx r14
+EPILOGUE()
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora8/gmp-mparam.h b/gmp-6.3.0/mpn/arm/v7a/cora8/gmp-mparam.h
new file mode 100644
index 0000000..5864841
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora8/gmp-mparam.h
@@ -0,0 +1,207 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1000 MHz Cortex-A8 (beaglebone black) */
+/* FFT tuning limit = 9,464,348 */
+/* Generated by tuneup.c, 2019-10-23, gcc 6.3 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1N_PI1_METHOD 1 /* 50.65% faster than 2 */
+#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 31
+
+#define DIV_1_VS_MUL_1_PERCENT 192
+
+#define MUL_TOOM22_THRESHOLD 39
+#define MUL_TOOM33_THRESHOLD 129
+#define MUL_TOOM44_THRESHOLD 226
+#define MUL_TOOM6H_THRESHOLD 366
+#define MUL_TOOM8H_THRESHOLD 620
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 141
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 183
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 154
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 160
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 193
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 46
+#define SQR_TOOM3_THRESHOLD 145
+#define SQR_TOOM4_THRESHOLD 375
+#define SQR_TOOM6_THRESHOLD 0 /* always */
+#define SQR_TOOM8_THRESHOLD 547
+
+#define MULMID_TOOM42_THRESHOLD 38
+
+#define MULMOD_BNM1_THRESHOLD 22
+#define SQRMOD_BNM1_THRESHOLD 23
+
+#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 476, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \
+ { 28, 7}, { 15, 6}, { 33, 7}, { 19, 6}, \
+ { 39, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \
+ { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
+ { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
+ { 71, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \
+ { 99, 9}, { 55,10}, { 31, 9}, { 87,10}, \
+ { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 167,10}, { 95, 9}, \
+ { 199,10}, { 111,11}, { 63,10}, { 127, 9}, \
+ { 255,10}, { 143, 9}, { 287, 8}, { 575,10}, \
+ { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \
+ { 383, 8}, { 767, 9}, { 399,10}, { 207,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
+ { 271, 9}, { 543,10}, { 287, 9}, { 575,11}, \
+ { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
+ { 671,10}, { 351, 9}, { 703,10}, { 367,11}, \
+ { 191,10}, { 399, 9}, { 799,10}, { 415,11}, \
+ { 223,12}, { 127,11}, { 255,10}, { 543,11}, \
+ { 287,10}, { 607, 9}, { 1215,11}, { 319,10}, \
+ { 671,11}, { 351,10}, { 703,12}, { 191,11}, \
+ { 383,10}, { 799,11}, { 415,10}, { 863,11}, \
+ { 447,13}, { 127,12}, { 255,11}, { 543,10}, \
+ { 1087,11}, { 607,12}, { 319,11}, { 671,10}, \
+ { 1343,11}, { 735,12}, { 383,11}, { 799,10}, \
+ { 1599,11}, { 863,12}, { 447,11}, { 959,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1215,12}, { 639,11}, { 1343,12}, { 703,13}, \
+ { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \
+ { 1663,12}, { 959,14}, { 255,13}, { 511,12}, \
+ { 1215,13}, { 639,12}, { 1407,13}, { 767,12}, \
+ { 1663,13}, { 895,12}, { 1791,14}, { 511,13}, \
+ { 1023,12}, { 2111,13}, { 1151,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 139
+#define MUL_FFT_THRESHOLD 6784
+
+#define SQR_FFT_MODF_THRESHOLD 436 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 436, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \
+ { 35, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \
+ { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
+ { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \
+ { 43, 9}, { 23, 8}, { 55,10}, { 15, 9}, \
+ { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \
+ { 79,10}, { 47, 9}, { 103,11}, { 31,10}, \
+ { 63, 9}, { 135,10}, { 79, 9}, { 159, 8}, \
+ { 319, 9}, { 167,10}, { 95, 9}, { 191,10}, \
+ { 111,11}, { 63,10}, { 127, 9}, { 255, 8}, \
+ { 511, 9}, { 271,10}, { 143, 9}, { 287, 8}, \
+ { 575, 9}, { 303,10}, { 159, 9}, { 319,11}, \
+ { 95,10}, { 191, 9}, { 383, 8}, { 767, 9}, \
+ { 399,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271, 9}, { 543,10}, { 287, 9}, \
+ { 575,10}, { 303,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 335, 9}, { 671,10}, { 351, 9}, \
+ { 703,10}, { 367,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 399, 9}, { 799,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447,12}, { 127,11}, \
+ { 255,10}, { 511, 9}, { 1023,10}, { 543,11}, \
+ { 287,10}, { 607,11}, { 319,10}, { 671,11}, \
+ { 351,10}, { 735,12}, { 191,11}, { 383,10}, \
+ { 799,11}, { 415,10}, { 863,11}, { 447,10}, \
+ { 895,13}, { 127,12}, { 255,11}, { 511,10}, \
+ { 1023,11}, { 543,10}, { 1087,11}, { 607,12}, \
+ { 319,11}, { 671,10}, { 1343,11}, { 735,12}, \
+ { 383,11}, { 863,12}, { 447,11}, { 959,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \
+ { 1407,13}, { 383,12}, { 767,11}, { 1599,12}, \
+ { 831,11}, { 1663,12}, { 959,14}, { 255,13}, \
+ { 511,12}, { 1215,13}, { 639,12}, { 1471,13}, \
+ { 767,12}, { 1663,13}, { 895,12}, { 1855,14}, \
+ { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 152
+#define SQR_FFT_THRESHOLD 3712
+
+#define MULLO_BASECASE_THRESHOLD 21
+#define MULLO_DC_THRESHOLD 0 /* never mpn_mullo_basecase */
+#define MULLO_MUL_N_THRESHOLD 13463
+#define SQRLO_BASECASE_THRESHOLD 9
+#define SQRLO_DC_THRESHOLD 17
+#define SQRLO_SQR_THRESHOLD 7246
+
+#define DC_DIV_QR_THRESHOLD 27
+#define DC_DIVAPPR_Q_THRESHOLD 74
+#define DC_BDIV_QR_THRESHOLD 21
+#define DC_BDIV_Q_THRESHOLD 64
+
+#define INV_MULMOD_BNM1_THRESHOLD 78
+#define INV_NEWTON_THRESHOLD 31
+#define INV_APPR_THRESHOLD 37
+
+#define BINV_NEWTON_THRESHOLD 167
+#define REDC_1_TO_REDC_2_THRESHOLD 4
+#define REDC_2_TO_REDC_N_THRESHOLD 198
+
+#define MU_DIV_QR_THRESHOLD 1858
+#define MU_DIVAPPR_Q_THRESHOLD 1685
+#define MUPI_DIV_QR_THRESHOLD 43
+#define MU_BDIV_QR_THRESHOLD 1589
+#define MU_BDIV_Q_THRESHOLD 1685
+
+#define POWM_SEC_TABLE 1,13,96,487,1378
+
+#define GET_STR_DC_THRESHOLD 18
+#define GET_STR_PRECOMPUTE_THRESHOLD 36
+#define SET_STR_DC_THRESHOLD 145
+#define SET_STR_PRECOMPUTE_THRESHOLD 505
+
+#define FAC_DSC_THRESHOLD 137
+#define FAC_ODD_THRESHOLD 29
+
+#define MATRIX22_STRASSEN_THRESHOLD 24
+#define HGCD2_DIV1_METHOD 5 /* 4.29% faster than 4 */
+#define HGCD_THRESHOLD 39
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 3524
+#define GCD_DC_THRESHOLD 116
+#define GCDEXT_DC_THRESHOLD 124
+#define JACOBI_BASE_METHOD 4 /* 5.89% faster than 1 */
+
+/* Tuneup completed successfully, took 48230 seconds */
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora9/bdiv_q_1.asm b/gmp-6.3.0/mpn/arm/v7a/cora9/bdiv_q_1.asm
new file mode 100644
index 0000000..245b371
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora9/bdiv_q_1.asm
@@ -0,0 +1,36 @@
+dnl ARM mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb divisor.
+
+dnl Contributed to the GNU project by Torbjörn Granlund.
+
+dnl Copyright 2012, 2013, 2017 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of either:
+dnl
+dnl * the GNU Lesser General Public License as published by the Free
+dnl Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+dnl
+dnl or
+dnl
+dnl * the GNU General Public License as published by the Free Software
+dnl Foundation; either version 2 of the License, or (at your option) any
+dnl later version.
+dnl
+dnl or both in parallel, as here.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received copies of the GNU General Public License and the
+dnl GNU Lesser General Public License along with the GNU MP Library. If not,
+dnl see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+include_mpn(`arm/v7a/cora8/bdiv_q_1.asm')
diff --git a/gmp-6.3.0/mpn/arm/v7a/cora9/gmp-mparam.h b/gmp-6.3.0/mpn/arm/v7a/cora9/gmp-mparam.h
new file mode 100644
index 0000000..5c54012
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora9/gmp-mparam.h
@@ -0,0 +1,211 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010, 2012-2015 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+or
+
+ * the GNU General Public License as published by the Free Software
+ Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library. If not,
+see https://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1000 MHz Cortex-A9 */
+/* FFT tuning limit = 25 M */
+/* Generated by tuneup.c, 2014-03-12, gcc 4.6 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_1N_PI1_METHOD 1
+#define DIV_QR_1_NORM_THRESHOLD 5
+#define DIV_QR_1_UNNORM_THRESHOLD 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 20
+
+#define DIV_1_VS_MUL_1_PERCENT 190
+
+#define MUL_TOOM22_THRESHOLD 45
+#define MUL_TOOM33_THRESHOLD 129
+#define MUL_TOOM44_THRESHOLD 387
+#define MUL_TOOM6H_THRESHOLD 537
+#define MUL_TOOM8H_THRESHOLD 774
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 141
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 237
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 141
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 258
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 211
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 64
+#define SQR_TOOM3_THRESHOLD 189
+#define SQR_TOOM4_THRESHOLD 517
+#define SQR_TOOM6_THRESHOLD 656
+#define SQR_TOOM8_THRESHOLD 0 /* always */
+
+#define MULMID_TOOM42_THRESHOLD 62
+
+#define MULMOD_BNM1_THRESHOLD 23
+#define SQRMOD_BNM1_THRESHOLD 28
+
+#define MUL_FFT_MODF_THRESHOLD 630 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 630, 5}, { 29, 6}, { 15, 5}, { 33, 6}, \
+ { 17, 5}, { 35, 6}, { 36, 7}, { 19, 6}, \
+ { 40, 7}, { 21, 6}, { 43, 7}, { 23, 6}, \
+ { 47, 7}, { 25, 6}, { 51, 7}, { 27, 6}, \
+ { 55, 7}, { 29, 8}, { 15, 7}, { 37, 8}, \
+ { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \
+ { 27, 7}, { 57, 9}, { 15, 8}, { 31, 7}, \
+ { 65, 8}, { 35, 7}, { 71, 8}, { 43, 9}, \
+ { 23, 8}, { 55, 9}, { 31, 8}, { 71, 9}, \
+ { 39, 8}, { 83, 9}, { 47, 8}, { 99, 9}, \
+ { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
+ { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \
+ { 79, 9}, { 167,10}, { 95, 9}, { 191,10}, \
+ { 111,11}, { 63,10}, { 159,11}, { 95,10}, \
+ { 191, 9}, { 383,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543,11}, \
+ { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
+ { 671,11}, { 191,10}, { 383, 9}, { 767,10}, \
+ { 399, 9}, { 799,10}, { 415,11}, { 223,12}, \
+ { 127,11}, { 255,10}, { 511, 9}, { 1023,10}, \
+ { 543,11}, { 287,10}, { 607,11}, { 319,10}, \
+ { 671,11}, { 351,12}, { 191,11}, { 383,10}, \
+ { 799,11}, { 415,10}, { 831,13}, { 127,12}, \
+ { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \
+ { 1087,11}, { 607,12}, { 319,11}, { 735,12}, \
+ { 383,11}, { 831,12}, { 447,11}, { 927,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1215,12}, { 639,11}, { 1343,12}, { 703,13}, \
+ { 383,12}, { 767,11}, { 1535,12}, { 831,11}, \
+ { 1663,12}, { 895,14}, { 255,13}, { 511,12}, \
+ { 1023,11}, { 2047,12}, { 1151,13}, { 639,12}, \
+ { 1407,13}, { 767,12}, { 1663,13}, { 895,12}, \
+ { 1791,14}, { 511,13}, { 1023,12}, { 2111,13}, \
+ { 1151,12}, { 2431,13}, { 1279,12}, { 2559,13}, \
+ { 1407,14}, { 767,13}, { 1535,12}, { 3071,13}, \
+ { 1663,12}, { 3455,13}, { 1791,15}, { 511,14}, \
+ { 1023,13}, { 2047,12}, { 4095,13}, { 2175,12}, \
+ { 4479,13}, { 2431,14}, { 1279,13}, { 2559,12}, \
+ { 5119,13}, { 2815,12}, { 5631,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 157
+#define MUL_FFT_THRESHOLD 6784
+
+#define SQR_FFT_MODF_THRESHOLD 565 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 565, 5}, { 19, 4}, { 40, 5}, { 21, 4}, \
+ { 43, 5}, { 28, 6}, { 15, 5}, { 35, 6}, \
+ { 29, 7}, { 15, 6}, { 37, 7}, { 19, 6}, \
+ { 39, 7}, { 21, 6}, { 43, 7}, { 23, 6}, \
+ { 47, 7}, { 29, 8}, { 15, 7}, { 37, 8}, \
+ { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \
+ { 27, 7}, { 55, 9}, { 15, 8}, { 31, 7}, \
+ { 65, 8}, { 35, 7}, { 71, 8}, { 43, 9}, \
+ { 23, 8}, { 55,10}, { 15, 9}, { 31, 8}, \
+ { 71, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \
+ { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \
+ { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \
+ { 191,10}, { 111,11}, { 63,10}, { 159,11}, \
+ { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511, 8}, { 1023, 9}, \
+ { 527,10}, { 271, 9}, { 543,10}, { 287,11}, \
+ { 159, 9}, { 639,10}, { 335, 9}, { 671,10}, \
+ { 351,11}, { 191,10}, { 383, 9}, { 767,10}, \
+ { 399, 9}, { 799,10}, { 415,11}, { 223,12}, \
+ { 127,11}, { 255,10}, { 511, 9}, { 1023,10}, \
+ { 543,11}, { 287,10}, { 671,11}, { 351,12}, \
+ { 191,11}, { 383,10}, { 799,11}, { 415,10}, \
+ { 831,13}, { 127,12}, { 255,11}, { 511,10}, \
+ { 1023,11}, { 543,10}, { 1087,11}, { 735,12}, \
+ { 383,11}, { 831,12}, { 447,11}, { 927,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1151,12}, { 639,11}, { 1343,12}, { 703,13}, \
+ { 383,12}, { 767,11}, { 1535,12}, { 831,11}, \
+ { 1663,12}, { 959,13}, { 511,12}, { 1023,11}, \
+ { 2047,12}, { 1151,13}, { 639,12}, { 1407,13}, \
+ { 767,12}, { 1599,13}, { 895,12}, { 1791,14}, \
+ { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \
+ { 2431,13}, { 1279,12}, { 2559,13}, { 1407,14}, \
+ { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \
+ { 3455,13}, { 1791,15}, { 511,14}, { 1023,13}, \
+ { 2047,12}, { 4095,13}, { 2175,12}, { 4479,13}, \
+ { 2303,14}, { 1279,13}, { 2559,12}, { 5119,13}, \
+ { 2815,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 155
+#define SQR_FFT_THRESHOLD 5568
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 37
+#define MULLO_MUL_N_THRESHOLD 13463
+#define SQRLO_BASECASE_THRESHOLD 12
+#define SQRLO_DC_THRESHOLD 22
+#define SQRLO_SQR_THRESHOLD 10950
+
+#define DC_DIV_QR_THRESHOLD 32
+#define DC_DIVAPPR_Q_THRESHOLD 99
+#define DC_BDIV_QR_THRESHOLD 43
+#define DC_BDIV_Q_THRESHOLD 102
+
+#define INV_MULMOD_BNM1_THRESHOLD 88
+#define INV_NEWTON_THRESHOLD 141
+#define INV_APPR_THRESHOLD 111
+
+#define BINV_NEWTON_THRESHOLD 312
+#define REDC_1_TO_REDC_2_THRESHOLD 6
+#define REDC_2_TO_REDC_N_THRESHOLD 140
+
+#define MU_DIV_QR_THRESHOLD 2492
+#define MU_DIVAPPR_Q_THRESHOLD 2130
+#define MUPI_DIV_QR_THRESHOLD 55
+#define MU_BDIV_QR_THRESHOLD 2130
+#define MU_BDIV_Q_THRESHOLD 2172
+
+#define POWM_SEC_TABLE 40,53,56,71,1985
+
+#define GET_STR_DC_THRESHOLD 16
+#define GET_STR_PRECOMPUTE_THRESHOLD 33
+#define SET_STR_DC_THRESHOLD 172
+#define SET_STR_PRECOMPUTE_THRESHOLD 671
+
+#define FAC_DSC_THRESHOLD 309
+#define FAC_ODD_THRESHOLD 29
+
+#define MATRIX22_STRASSEN_THRESHOLD 24
+#define HGCD_THRESHOLD 61
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 4120
+#define GCD_DC_THRESHOLD 408
+#define GCDEXT_DC_THRESHOLD 303
+#define JACOBI_BASE_METHOD 4