From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001
From: Duncan Wilkie <antigravityd@gmail.com>
Date: Sat, 18 Nov 2023 06:11:09 -0600
Subject: Initial commit.

---
 gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm | 90 +++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm

(limited to 'gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm')

diff --git a/gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm
new file mode 100644
index 0000000..2e05afe
--- /dev/null
+++ b/gmp-6.3.0/mpn/arm/v7a/cora15/neon/copyi.asm
@@ -0,0 +1,90 @@
+dnl  ARM Neon mpn_copyi optimised for A15.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb
+C StrongARM	 -
+C XScale	 -
+C Cortex-A7	 ?
+C Cortex-A8	 ?
+C Cortex-A9	 1.75		slower than core register code
+C Cortex-A15	 0.52
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+	cmp	n, #7
+	ble	L(bc)
+
+C Copy until rp is 128-bit aligned
+	tst	rp, #4
+	beq	L(al1)
+	vld1.32	{d22[0]}, [up]!
+	sub	n, n, #1
+	vst1.32	{d22[0]}, [rp]!
+L(al1):	tst	rp, #8
+	beq	L(al2)
+	vld1.32	{d22}, [up]!
+	sub	n, n, #2
+	vst1.32	{d22}, [rp:64]!
+L(al2):	vld1.32	{d26-d27}, [up]!
+	subs	n, n, #12
+	blt	L(end)
+
+	ALIGN(16)
+L(top):	vld1.32	{d22-d23}, [up]!
+	vst1.32	{d26-d27}, [rp:128]!
+	vld1.32	{d26-d27}, [up]!
+	vst1.32	{d22-d23}, [rp:128]!
+	subs	n, n, #8
+	bge	L(top)
+
+L(end):	vst1.32	{d26-d27}, [rp:128]!
+
+C Copy last 0-7 limbs.  Note that rp is aligned after loop, but not when we
+C arrive here via L(bc)
+L(bc):	tst	n, #4
+	beq	L(tl1)
+	vld1.32	{d22-d23}, [up]!
+	vst1.32	{d22-d23}, [rp]!
+L(tl1):	tst	n, #2
+	beq	L(tl2)
+	vld1.32	{d22}, [up]!
+	vst1.32	{d22}, [rp]!
+L(tl2):	tst	n, #1
+	beq	L(tl3)
+	vld1.32	{d22[0]}, [up]
+	vst1.32	{d22[0]}, [rp]
+L(tl3):	bx	lr
+EPILOGUE()
-- 
cgit v1.2.3