github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/ia64/cnd_aors_n.asm (about)

     1  dnl  IA-64 mpn_cnd_add_n/mpn_cnd_sub_n.
     2  
     3  dnl  Contributed to the GNU project by Torbjörn Granlund.
     4  
     5  dnl  Copyright 2013 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C           cycles/limb
    36  C Itanium:      ?
    37  C Itanium 2:    1.5
    38  
    39  C INPUT PARAMETERS
    40  define(`cnd', `r32')
    41  define(`rp',  `r33')
    42  define(`up',  `r34')
    43  define(`vp',  `r35')
    44  define(`n',   `r36')
    45  
    46  ifdef(`OPERATION_cnd_add_n',`
    47    define(ADDSUB,	add)
    48    define(CND,		ltu)
    49    define(INCR,		1)
    50    define(LIM,		-1)
    51    define(func,    mpn_cnd_add_n)
    52  ')
    53  ifdef(`OPERATION_cnd_sub_n',`
    54    define(ADDSUB,	sub)
    55    define(CND,		gtu)
    56    define(INCR,		-1)
    57    define(LIM,		0)
    58    define(func,    mpn_cnd_sub_n)
    59  ')
    60  
    61  define(PFDIST, 160)
    62  
    63  C Some useful aliases for registers we use
    64  define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
    65  define(`x0',`r20') define(`x1',`r21') define(`x2',`r22') define(`x3',`r23')
    66  define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')
    67  define(`w0',`r28') define(`w1',`r29') define(`w2',`r30') define(`w3',`r31')
    68  define(`up1',`up') define(`up2',`r8') define(`upadv',`r1')
    69  define(`vp1',`vp') define(`vp2',`r9') define(`vpadv',`r11')
    70  define(`rp1',`rp') define(`rp2',`r10')
    71  
    72  MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
    73  
    74  ASM_START()
    75  PROLOGUE(func)
    76  	.prologue
    77  	.save	ar.lc, r2
    78  	.body
    79  ifdef(`HAVE_ABI_32',`
    80  	addp4	rp = 0, rp		C				M I
    81  	addp4	up = 0, up		C				M I
    82  	nop.i	0
    83  	addp4	vp = 0, vp		C				M I
    84  	nop.m	0
    85  	zxt4	n = n			C				I
    86  	;;
    87  ')
    88   {.mmi;	and	r3 = 3, n		C				M I
    89  	add	n = -1, n		C				M I
    90  	mov	r2 = ar.lc		C				I0
    91  }{.mmi;	cmp.ne	p6, p7 = 0, cnd		C				M I
    92  	add	vp2 = 8, vp		C				M I
    93  	add	up2 = 8, up		C				M I
    94  	;;
    95  }{.mmi;	add	upadv = PFDIST, up	C				M I
    96  	add	vpadv = PFDIST, vp	C				M I
    97  	shr.u	n = n, 2		C				I0
    98  	.pred.rel "mutex", p6, p7
    99  }{.mmi;	add	rp2 = 8, rp		C				M I
   100     (p6)	mov	cnd = -1		C				M I
   101     (p7)	mov	cnd = 0			C				M I
   102  	;;
   103  }	cmp.eq	p9, p0 = 1, r3		C				M I
   104  	cmp.eq	p7, p0 = 2, r3		C				M I
   105  	cmp.eq	p8, p0 = 3, r3		C				M I
   106     (p9)	br	L(b1)			C				B
   107     (p7)	br	L(b2)			C				B
   108     (p8)	br	L(b3)			C				B
   109  	;;
   110  L(b0):
   111   {.mmi;	ld8	v2 = [vp1], 16		C				M01
   112  	ld8	v3 = [vp2], 16		C				M01
   113  	mov	ar.lc = n		C				I0
   114  	;;
   115  }	ld8	u2 = [up1], 16		C				M01
   116  	ld8	u3 = [up2], 16		C				M01
   117  	and	x2 = v2, cnd		C				M I
   118  	and	x3 = v3, cnd		C				M I
   119  	;;
   120  	ADDSUB	w2 = u2, x2		C				M I
   121  	ADDSUB	w3 = u3, x3		C				M I
   122  	;;
   123  	ld8	v0 = [vp1], 16		C				M01
   124  	ld8	v1 = [vp2], 16		C				M01
   125  	cmp.CND	p8, p0 = w2, u2		C				M I
   126  	cmp.CND	p9, p0 = w3, u3		C				M I
   127  	br	L(lo0)
   128  
   129  L(b1):	ld8	v1 = [vp1], 8		C				M01
   130  	add	vp2 = 8, vp2		C				M I
   131  	add	rp2 = 8, rp2		C				M I
   132  	;;
   133  	ld8	u1 = [up1], 8		C				M01
   134  	add	up2 = 8, up2		C				M I
   135  	and	x1 = v1, cnd		C				M I
   136  	;;
   137  	ADDSUB	w1 = u1, x1		C				M I
   138  	cmp.ne	p10, p0 = 0, n
   139  	add	n = -1, n
   140  	;;
   141  	cmp.CND	p7, p0 = w1, u1		C				M I
   142  	st8	[rp1] = w1, 8		C				M23
   143    (p10)	br	L(b0)
   144  	;;
   145  	mov	r8 = 0			C				M I
   146  	br	L(e1)
   147  
   148  L(b3):	ld8	v3 = [vp1], 8		C				M01
   149  	add	vp2 = 8, vp2		C				M I
   150  	add	rp2 = 8, rp2		C				M I
   151  	;;
   152  	ld8	u3 = [up1], 8		C				M01
   153  	add	up2 = 8, up2		C				M I
   154  	and	x3 = v3, cnd		C				M I
   155  	;;
   156  	ADDSUB	w3 = u3, x3		C				M I
   157  	;;
   158  	cmp.CND	p9, p0 = w3, u3		C				M I
   159  	st8	[rp1] = w3, 8		C				M23
   160  	C fall through
   161  
   162  L(b2):
   163   {.mmi;	ld8	v0 = [vp1], 16		C				M01
   164  	ld8	v1 = [vp2], 16		C				M01
   165  	mov	ar.lc = n		C				I0
   166  	;;
   167  }	ld8	u0 = [up1], 16		C				M01
   168  	ld8	u1 = [up2], 16		C				M01
   169  	and	x0 = v0, cnd		C				M I
   170  	and	x1 = v1, cnd		C				M I
   171  	;;
   172  	ADDSUB	w0 = u0, x0		C				M I
   173  	ADDSUB	w1 = u1, x1		C				M I
   174  	br.cloop.dptk	L(gt2)		C				B
   175  	;;
   176  	cmp.CND	p6, p0 = w0, u0		C				M I
   177  	br		L(e2)		C				B
   178  L(gt2):
   179  	ld8	v2 = [vp1], 16		C				M01
   180  	ld8	v3 = [vp2], 16		C				M01
   181  	cmp.CND	p6, p0 = w0, u0		C				M I
   182  	cmp.CND	p7, p0 = w1, u1		C				M I
   183  	br		L(lo2)		C				B
   184  
   185  
   186  C *** MAIN LOOP START ***
   187  C	ALIGN(32)
   188  L(top):
   189   {.mmi;	ld8	v2 = [vp1], 16		C				M01
   190  	ld8	v3 = [vp2], 16		C				M01
   191  	cmp.CND	p6, p0 = w0, u0		C				M I
   192  }{.mmi;	st8	[rp1] = w2, 16		C				M23
   193  	st8	[rp2] = w3, 16		C				M23
   194  	cmp.CND	p7, p0 = w1, u1		C				M I
   195  	;;
   196  }
   197  L(lo2):
   198   {.mmi;	ld8	u2 = [up1], 16		C				M01
   199  	ld8	u3 = [up2], 16		C				M01
   200     (p9)	cmpeqor	p6, p0 = LIM, w0	C				M I
   201  }{.mmi;	and	x2 = v2, cnd		C				M I
   202  	and	x3 = v3, cnd		C				M I
   203     (p9)	add	w0 = INCR, w0		C				M I
   204  	;;
   205  }{.mmi;	ADDSUB	w2 = u2, x2		C				M I
   206     (p6)	cmpeqor	p7, p0 = LIM, w1	C				M I
   207     (p6)	add	w1 = INCR, w1		C				M I
   208  }{.mmi;	ADDSUB	w3 = u3, x3		C				M I
   209  	lfetch	[upadv], 32
   210  	nop	0
   211  	;;
   212  }{.mmi;	ld8	v0 = [vp1], 16		C				M01
   213  	ld8	v1 = [vp2], 16		C				M01
   214  	cmp.CND	p8, p0 = w2, u2		C				M I
   215  }{.mmi;	st8	[rp1] = w0, 16		C				M23
   216  	st8	[rp2] = w1, 16		C				M23
   217  	cmp.CND	p9, p0 = w3, u3		C				M I
   218  	;;
   219  }
   220  L(lo0):
   221   {.mmi;	ld8	u0 = [up1], 16		C				M01
   222  	ld8	u1 = [up2], 16		C				M01
   223     (p7)	cmpeqor	p8, p0 = LIM, w2	C				M I
   224  }{.mmi;	and	x0 = v0, cnd		C				M I
   225  	and	x1 = v1, cnd		C				M I
   226     (p7)	add	w2 = INCR, w2		C				M I
   227  	;;
   228  }{.mmi;	ADDSUB	w0 = u0, x0		C				M I
   229     (p8)	cmpeqor	p9, p0 = LIM, w3	C				M I
   230     (p8)	add	w3 = INCR, w3		C				M I
   231  }{.mmb;	ADDSUB	w1 = u1, x1		C				M I
   232  	lfetch	[vpadv], 32
   233  	br.cloop.dptk	L(top)		C				B
   234  	;;
   235  }
   236  C *** MAIN LOOP END ***
   237  
   238  
   239  L(end):
   240   {.mmi;	st8	[rp1] = w2, 16		C				M23
   241  	st8	[rp2] = w3, 16		C				M23
   242  	cmp.CND	p6, p0 = w0, u0		C				M I
   243  	;;
   244  }
   245  L(e2):
   246   {.mmi;	cmp.CND	p7, p0 = w1, u1		C				M I
   247     (p9)	cmpeqor	p6, p0 = LIM, w0	C				M I
   248     (p9)	add	w0 = INCR, w0		C				M I
   249  	;;
   250  }{.mmi;	mov	r8 = 0			C				M I
   251     (p6)	cmpeqor	p7, p0 = LIM, w1	C				M I
   252     (p6)	add	w1 = INCR, w1		C				M I
   253  	;;
   254  }{.mmi;	st8	[rp1] = w0, 16		C				M23
   255  	st8	[rp2] = w1, 16		C				M23
   256  	mov	ar.lc = r2		C				I0
   257  }
   258  L(e1):
   259   {.mmb;	nop	0
   260     (p7)	mov	r8 = 1			C				M I
   261  	br.ret.sptk.many b0		C				B
   262  }
   263  EPILOGUE()
   264  ASM_END()