From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001
From: Duncan Wilkie <antigravityd@gmail.com>
Date: Sat, 18 Nov 2023 06:11:09 -0600
Subject: Initial commit.

---
 gmp-6.3.0/mpn/arm/mod_34lsub1.asm | 124 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 gmp-6.3.0/mpn/arm/mod_34lsub1.asm

(limited to 'gmp-6.3.0/mpn/arm/mod_34lsub1.asm')

diff --git a/gmp-6.3.0/mpn/arm/mod_34lsub1.asm b/gmp-6.3.0/mpn/arm/mod_34lsub1.asm
new file mode 100644
index 0000000..596cd3c
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/mod_34lsub1.asm
@@ -0,0 +1,124 @@
+dnl  ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 ?
+C XScale	 ?
+C Cortex-A5	 2.67
+C Cortex-A7	 2.35
+C Cortex-A8	 2.0
+C Cortex-A9	 1.33
+C Cortex-A15	 1.33
+C Cortex-A17	 3.34
+C Cortex-A53	 2.0
+
+define(`ap',	r0)
+define(`n',	r1)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * Write cleverer summation code.
+C  * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l.
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+	push	{ r4, r5, r6, r7 }
+
+	subs	n, n, #3
+	mov	r7, #0
+	blt	L(le2)			C n <= 2
+
+	ldmia	ap!, { r2, r3, r12 }
+	subs	n, n, #3
+	blt	L(sum)			C n <= 5
+	cmn	r0, #0			C clear carry
+	sub	n, n, #3
+	b	L(mid)
+
+L(top):	adcs	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, r6
+L(mid):	ldmia	ap!, { r4, r5, r6 }
+	tst	n, n
+	sub	n, n, #3
+	bpl	L(top)
+
+	add	n, n, #3
+
+	adcs	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, r6
+	movcs	r7, #1			C r7 <= 1
+
+L(sum):	cmn	n, #2
+	movlo	r4, #0
+	ldrhs	r4, [ap], #4
+	movls	r5, #0
+	ldrhi	r5, [ap], #4
+
+	adds	r2, r2, r4
+	adcs	r3, r3, r5
+	adcs	r12, r12, #0
+	adc	r7, r7, #0		C r7 <= 2
+
+L(sum2):
+	bic	r0, r2, #0xff000000
+	add	r0, r0, r2, lsr #24
+	add	r0, r0, r7
+
+	mov	r7, r3, lsl #8
+	bic	r1, r7, #0xff000000
+	add	r0, r0, r1
+	add	r0, r0, r3, lsr #16
+
+	mov	r7, r12, lsl #16
+	bic	r1, r7, #0xff000000
+	add	r0, r0, r1
+	add	r0, r0, r12, lsr #8
+
+	pop	{ r4, r5, r6, r7 }
+	return	lr
+
+L(le2):	cmn	n, #1
+	bne	L(1)
+	ldmia	ap!, { r2, r3 }
+	mov	r12, #0
+	b	L(sum2)
+L(1):	ldr	r2, [ap]
+	bic	r0, r2, #0xff000000
+	add	r0, r0, r2, lsr #24
+	pop	{ r4, r5, r6, r7 }
+	return	lr
+EPILOGUE()
-- 
cgit v1.2.3