github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/aorslsh2_n.asm (about)

     1  dnl  Alpha mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2).
     2  
     3  dnl  Copyright 2003, 2013 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C      cycles/limb
    34  C EV4:     ?
    35  C EV5:     6
    36  C EV6:     3.75
    37  
    38  C TODO
    39  C  * Tune to reach 3.5 c/l on ev6 and 5.75 c/l on ev5.
    40  
    41  define(`rp',`r16')
    42  define(`up',`r17')
    43  define(`vp',`r18')
    44  define(`n', `r19')
    45  
    46  define(`u0', `r8')
    47  define(`u1', `r1')
    48  define(`v0', `r4')
    49  define(`v1', `r5')
    50  
    51  define(`cy0', `r0')
    52  define(`cy1', `r20')
    53  define(`cy', `r22')
    54  define(`rr', `r24')
    55  define(`ps', `r25')
    56  define(`sl', `r28')
    57  
    58  ifdef(`OPERATION_addlsh2_n',`
    59    define(ADDSUB,       addq)
    60    define(CARRY,       `cmpult $1,$2,$3')
    61    define(func, mpn_addlsh2_n)
    62  ')
    63  ifdef(`OPERATION_sublsh2_n',`
    64    define(ADDSUB,       subq)
    65    define(CARRY,       `cmpult $2,$1,$3')
    66    define(func, mpn_sublsh2_n)
    67  ')
    68  
    69  MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
    70  
    71  ASM_START()
    72  PROLOGUE(func)
    73  	and	n, 2, cy0
    74  	blbs	n, L(bx1)
    75  L(bx0):	ldq	v1, 0(vp)
    76  	ldq	u1, 0(up)
    77  	bis	r31, r31, r2
    78  	bne	cy0, L(b10)
    79  
    80  L(b00):	lda	vp, 48(vp)
    81  	lda	up, -16(up)
    82  	lda	rp, -8(rp)
    83  	s4addq	v1, r31, sl
    84  	br	r31, L(lo0)
    85  
    86  L(b10):	lda	vp, 32(vp)
    87  	lda	rp, 8(rp)
    88  	lda	cy0, 0(r31)
    89  	br	r31, L(lo2)
    90  
    91  L(bx1):	ldq	v0, 0(vp)
    92  	ldq	u0, 0(up)
    93  	lda	cy1, 0(r31)
    94  	bis	r31, r31, r3
    95  	nop
    96  	beq	cy0, L(b01)
    97  
    98  L(b11):	lda	vp, 40(vp)
    99  	lda	up, -24(up)
   100  	lda	rp, 16(rp)
   101  	br	r31, L(lo3)
   102  
   103  L(b01):	lda	n, -4(n)
   104  	ble	n, L(end)
   105  	lda	vp, 24(vp)
   106  	lda	up, -8(up)
   107  
   108  	ALIGN(16)
   109  L(top):	s4addq	v0, r3, sl	C combined vlimb
   110  	ldq	v1, -16(vp)
   111  	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
   112  	ldq	u1, 16(up)
   113  	srl	v0, 62, r2	C high v bits
   114  	ADDSUB	ps, cy1, rr	C consume carry from previous operation
   115  	CARRY(	ps, u0, cy0)	C carry out #2
   116  	stq	rr, 0(rp)
   117  	CARRY(	rr, ps, cy)	C carry out #3
   118  	lda	vp, 32(vp)	C bookkeeping
   119  	addq	cy, cy0, cy0	C final carry out
   120  	s4addq	v1, r2, sl
   121  L(lo0):	ldq	v0, -40(vp)
   122  	ADDSUB	u1, sl, ps
   123  	ldq	u0, 24(up)
   124  	srl	v1, 62, r3
   125  	ADDSUB	ps, cy0, rr
   126  	CARRY(	ps, u1, cy1)
   127  	stq	rr, 8(rp)
   128  	CARRY(	rr, ps, cy)
   129  	lda	rp, 32(rp)	C bookkeeping
   130  	addq	cy, cy1, cy1
   131  L(lo3):	s4addq	v0, r3, sl
   132  	ldq	v1, -32(vp)
   133  	ADDSUB	u0, sl, ps
   134  	ldq	u1, 32(up)
   135  	srl	v0, 62, r2
   136  	ADDSUB	ps, cy1, rr
   137  	CARRY(	ps, u0, cy0)
   138  	stq	rr, -16(rp)
   139  	CARRY(	rr, ps, cy)
   140  	lda	up, 32(up)	C bookkeeping
   141  	addq	cy, cy0, cy0
   142  L(lo2):	s4addq	v1, r2, sl
   143  	ldq	v0, -24(vp)
   144  	ADDSUB	u1, sl, ps
   145  	ldq	u0, 8(up)
   146  	srl	v1, 62, r3
   147  	ADDSUB	ps, cy0, rr
   148  	CARRY(	ps, u1, cy1)
   149  	stq	rr, -8(rp)
   150  	CARRY(	rr, ps, cy)
   151  	lda	n, -4(n)	C bookkeeping
   152  	addq	cy, cy1, cy1
   153  	bgt	n, L(top)
   154  
   155  L(end):	s4addq	v0, r3, sl
   156  	ADDSUB	u0, sl, ps
   157  	srl	v0, 62, r2
   158  	ADDSUB	ps, cy1, rr
   159  	CARRY(	ps, u0, cy0)
   160  	stq	rr, 0(rp)
   161  	CARRY(	rr, ps, cy)
   162  	addq	cy, cy0, cy0
   163  	addq	cy0, r2, r0
   164  
   165  	ret	r31,(r26),1
   166  EPILOGUE()
   167  ASM_END()