From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001
From: Duncan Wilkie <antigravityd@gmail.com>
Date: Sat, 18 Nov 2023 06:11:09 -0600
Subject: Initial commit.

---
 gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm | 145 ++++++++++++++++++++++
 1 file changed, 145 insertions(+)
 create mode 100644 gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm

(limited to 'gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm')

diff --git a/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm b/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm
new file mode 100644
index 0000000..49ccaec
--- /dev/null
+++ b/gmp-6.3.0/mpn/sparc64/ultrasparct3/cnd_aors_n.asm
@@ -0,0 +1,145 @@
+dnl  SPARC v9 mpn_cnd_add_n and mpn_cnd_sub_n for T3/T4/T5.
+
+dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
+
+dnl  Copyright 2013, 2017 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		   cycles/limb
+C UltraSPARC T3:	 8.5
+C UltraSPARC T4:	 3
+
+C We use a double-pointer trick to allow indexed addressing.  Its setup
+C cost might be a problem in these functions, since we don't expect huge n
+C arguments.
+C
+C For sub we need ~(a & mask) = (~a | ~mask) but by complementing mask we can
+C instead do ~(a & ~mask) = (~a | mask), allowing us to use the orn insn.
+
+C INPUT PARAMETERS
+define(`cnd', `%i0')
+define(`rp',  `%i1')
+define(`up',  `%i2')
+define(`vp',  `%i3')
+define(`n',   `%i4')
+
+define(`mask',   `cnd')
+define(`up0', `%l0')  define(`up1', `%l1')
+define(`vp0', `%l2')  define(`vp1', `%l3')
+define(`rp0', `%g4')  define(`rp1', `%g5')
+define(`u0',  `%l4')  define(`u1',  `%l5')
+define(`v0',  `%l6')  define(`v1',  `%l7')
+define(`x0',  `%g1')  define(`x1',  `%g3')
+define(`w0',  `%g1')  define(`w1',  `%g3')
+
+ifdef(`OPERATION_cnd_add_n',`
+  define(`LOGOP',   `and	$1, $2, $3')
+  define(`MAKEMASK',`cmp	%g0, $1
+		     addxc(	%g0, %g0, $2)
+		     neg	$2, $2')
+  define(`INITCY',  `addcc	%g0, 0, %g0')
+  define(`RETVAL',  `addxc(	%g0, %g0, %i0)')
+  define(`func',    `mpn_cnd_add_n')
+')
+ifdef(`OPERATION_cnd_sub_n',`
+  define(`LOGOP',   `orn	$2, $1, $3')
+  define(`MAKEMASK',`cmp	$1, 1
+		     addxc(	%g0, %g0, $2)
+		     neg	$2, $2')
+  define(`INITCY',  `subcc	%g0, 1, %g0')
+  define(`RETVAL',  `addxc(	%g0, %g0, %i0)
+		     xor	%i0, 1, %i0')
+  define(`func',    `mpn_cnd_sub_n')
+')
+
+MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+
+ASM_START()
+	REGISTER(%g2,#scratch)
+	REGISTER(%g3,#scratch)
+PROLOGUE(func)
+	save	%sp, -176, %sp
+
+	MAKEMASK(cnd,mask)
+
+	andcc	n, 1, %g0
+	sllx	n, 3, n
+	add	n, -16, n
+	add	vp, n, vp0
+	add	up, n, up0
+	add	rp, n, rp0
+	neg	n, n
+	be	L(evn)
+	 INITCY
+
+L(odd):	ldx	[vp0 + n], v1
+	ldx	[up0 + n], u1
+	LOGOP(	v1, mask, x1)
+	addxccc(u1, x1, w1)
+	stx	w1, [rp0 + n]
+	add	n, 8, n
+	brgz	n, L(rtn)
+	 nop
+
+L(evn):	add	vp0, 8, vp1
+	add	up0, 8, up1
+	add	rp0, -24, rp1
+	ldx	[vp0 + n], v0
+	ldx	[vp1 + n], v1
+	ldx	[up0 + n], u0
+	ldx	[up1 + n], u1
+	add	n, 16, n
+	brgz	n, L(end)
+	 add	rp0, -16, rp0
+
+L(top):	LOGOP(	v0, mask, x0)
+	ldx	[vp0 + n], v0
+	LOGOP(	v1, mask, x1)
+	ldx	[vp1 + n], v1
+	addxccc(u0, x0, w0)
+	ldx	[up0 + n], u0
+	addxccc(u1, x1, w1)
+	ldx	[up1 + n], u1
+	stx	w0, [rp0 + n]
+	add	n, 16, n
+	brlez	n, L(top)
+	 stx	w1, [rp1 + n]
+
+L(end):	LOGOP(	v0, mask, x0)
+	LOGOP(	v1, mask, x1)
+	addxccc(u0, x0, w0)
+	addxccc(u1, x1, w1)
+	stx	w0, [rp0 + n]
+	stx	w1, [rp1 + 32]
+
+L(rtn):	RETVAL
+	ret
+	 restore
+EPILOGUE()
-- 
cgit v1.2.3