github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/ultrasparct3/cnd_aors_n.asm (about)

     1  dnl  SPARC v9 mpn_cnd_add_n and mpn_cnd_sub_n for T3/T4/T5.
     2  
     3  dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
     4  
     5  dnl  Copyright 2013 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C		   cycles/limb
    36  C UltraSPARC T3:	 8.5
    37  C UltraSPARC T4:	 3
    38  
    39  C We use a double-pointer trick to allow indexed addressing.  Its setup
    40  C cost might be a problem in these functions, since we don't expect huge n
    41  C arguments.
    42  C
    43  C For sub we need ~(a & mask) = (~a | ~mask) but by complementing mask we can
    44  C instead do ~(a & ~mask) = (~a | mask), allowing us to use the orn insn.
    45  
    46  C INPUT PARAMETERS
    47  define(`cnd', `%i0')
    48  define(`rp',  `%i1')
    49  define(`up',  `%i2')
    50  define(`vp',  `%i3')
    51  define(`n',   `%i4')
    52  
    53  define(`mask',   `cnd')
    54  define(`up0', `%l0')  define(`up1', `%l1')
    55  define(`vp0', `%l2')  define(`vp1', `%l3')
    56  define(`rp0', `%g4')  define(`rp1', `%g5')
    57  define(`u0',  `%l4')  define(`u1',  `%l5')
    58  define(`v0',  `%l6')  define(`v1',  `%l7')
    59  define(`x0',  `%g1')  define(`x1',  `%g3')
    60  define(`w0',  `%g1')  define(`w1',  `%g3')
    61  
    62  ifdef(`OPERATION_cnd_add_n',`
    63    define(`LOGOP',   `and	$1, $2, $3')
    64    define(`MAKEMASK',`cmp	%g0, $1
    65  		     subc	%g0, %g0, $2')
    66    define(`INITCY',  `addcc	%g0, 0, %g0')
    67    define(`RETVAL',  `addxc(	%g0, %g0, %i0)')
    68    define(`func',    `mpn_cnd_add_n')
    69  ')
    70  ifdef(`OPERATION_cnd_sub_n',`
    71    define(`LOGOP',   `orn	$2, $1, $3')
    72    define(`MAKEMASK',`cmp	$1, 1
    73  		     subc	%g0, %g0, $2')
    74    define(`INITCY',  `subcc	%g0, 1, %g0')
    75    define(`RETVAL',  `addxc(	%g0, %g0, %i0)
    76  		     xor	%i0, 1, %i0')
    77    define(`func',    `mpn_cnd_sub_n')
    78  ')
    79  
    80  MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
    81  
    82  ASM_START()
    83  	REGISTER(%g2,#scratch)
    84  	REGISTER(%g3,#scratch)
    85  PROLOGUE(func)
    86  	save	%sp, -176, %sp
    87  
    88  	MAKEMASK(cnd,mask)
    89  
    90  	andcc	n, 1, %g0
    91  	sllx	n, 3, n
    92  	add	n, -16, n
    93  	add	vp, n, vp0
    94  	add	up, n, up0
    95  	add	rp, n, rp0
    96  	neg	n, n
    97  	be	L(evn)
    98  	 INITCY
    99  
   100  L(odd):	ldx	[vp0 + n], v1
   101  	ldx	[up0 + n], u1
   102  	LOGOP(	v1, mask, x1)
   103  	addxccc(u1, x1, w1)
   104  	stx	w1, [rp0 + n]
   105  	add	n, 8, n
   106  	brgz	n, L(rtn)
   107  	 nop
   108  
   109  L(evn):	add	vp0, 8, vp1
   110  	add	up0, 8, up1
   111  	add	rp0, -24, rp1
   112  	ldx	[vp0 + n], v0
   113  	ldx	[vp1 + n], v1
   114  	ldx	[up0 + n], u0
   115  	ldx	[up1 + n], u1
   116  	add	n, 16, n
   117  	brgz	n, L(end)
   118  	 add	rp0, -16, rp0
   119  
   120  L(top):	LOGOP(	v0, mask, x0)
   121  	ldx	[vp0 + n], v0
   122  	LOGOP(	v1, mask, x1)
   123  	ldx	[vp1 + n], v1
   124  	addxccc(u0, x0, w0)
   125  	ldx	[up0 + n], u0
   126  	addxccc(u1, x1, w1)
   127  	ldx	[up1 + n], u1
   128  	stx	w0, [rp0 + n]
   129  	add	n, 16, n
   130  	brlez	n, L(top)
   131  	 stx	w1, [rp1 + n]
   132  
   133  L(end):	LOGOP(	v0, mask, x0)
   134  	LOGOP(	v1, mask, x1)
   135  	addxccc(u0, x0, w0)
   136  	addxccc(u1, x1, w1)
   137  	stx	w0, [rp0 + n]
   138  	stx	w1, [rp1 + 32]
   139  
   140  L(rtn):	RETVAL
   141  	ret
   142  	 restore
   143  EPILOGUE()