github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/ev6/nails/addmul_3.asm (about)

     1  dnl  Alpha ev6 nails mpn_addmul_3.
     2  
     3  dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C Runs at 3.0 cycles/limb.
    34  
    35  C With 2-way unrolling, we could probably reach 2.25 c/l (3.33 i/c).
    36  
    37  
    38  C  INPUT PARAMETERS
    39  define(`rp',`r16')
    40  define(`up',`r17')
    41  define(`n',`r18')
    42  define(`vp',`r19')
    43  
    44  C  Useful register aliases
    45  define(`numb_mask',`r24')
    46  define(`ulimb',`r25')
    47  define(`rlimb',`r27')
    48  
    49  define(`m0a',`r0')
    50  define(`m0b',`r1')
    51  define(`m1a',`r2')
    52  define(`m1b',`r3')
    53  define(`m2a',`r20')
    54  define(`m2b',`r21')
    55  
    56  define(`acc0',`r4')
    57  define(`acc1',`r5')
    58  define(`acc2',`r22')
    59  
    60  define(`v0',`r6')
    61  define(`v1',`r7')
    62  define(`v2',`r23')
    63  
    64  C Used for temps: r8 r19 r28
    65  
    66  define(`NAIL_BITS',`GMP_NAIL_BITS')
    67  define(`NUMB_BITS',`GMP_NUMB_BITS')
    68  
    69  C  This declaration is munged by configure
    70  NAILS_SUPPORT(3-63)
    71  
    72  ASM_START()
    73  PROLOGUE(mpn_addmul_3)
    74  	lda	numb_mask,-1(r31)
    75  	srl	numb_mask,NAIL_BITS,numb_mask
    76  
    77  	ldq	v0,	0(vp)
    78  	ldq	v1,	8(vp)
    79  	ldq	v2,	16(vp)
    80  
    81  	bis	r31,	r31,	acc0		C	zero acc0
    82  	sll	v0,NAIL_BITS,	v0
    83  	bis	r31,	r31,	acc1		C	zero acc1
    84  	sll	v1,NAIL_BITS,	v1
    85  	bis	r31,	r31,	acc2		C	zero acc2
    86  	sll	v2,NAIL_BITS,	v2
    87  	bis	r31,	r31,	r19
    88  
    89  	ldq	ulimb,	0(up)
    90  	lda	up,	8(up)
    91  	mulq	v0,	ulimb,	m0a		C U1
    92  	umulh	v0,	ulimb,	m0b		C U1
    93  	mulq	v1,	ulimb,	m1a		C U1
    94  	umulh	v1,	ulimb,	m1b		C U1
    95  	lda	n,	-1(n)
    96  	mulq	v2,	ulimb,	m2a		C U1
    97  	umulh	v2,	ulimb,	m2b		C U1
    98  	beq	n,	L(end)			C U0
    99  
   100  	ALIGN(16)
   101  L(top):	ldq	rlimb,	0(rp)			C L1
   102  	ldq	ulimb,	0(up)			C L0
   103  	bis	r31,	r31,	r31		C U0	nop
   104  	addq	r19,	acc0,	acc0		C U1	propagate nail
   105  
   106  	lda	rp,	8(rp)			C L1
   107  	srl	m0a,NAIL_BITS,	r8		C U0
   108  	lda	up,	8(up)			C L0
   109  	mulq	v0,	ulimb,	m0a		C U1
   110  
   111  	addq	r8,	acc0,	r19		C U0
   112  	addq	m0b,	acc1,	acc0		C L1
   113  	umulh	v0,	ulimb,	m0b		C U1
   114  	bis	r31,	r31,	r31		C L0	nop
   115  
   116  	addq	rlimb,	r19,	r19		C L1
   117  	srl	m1a,NAIL_BITS,	r8		C U0
   118  	bis	r31,	r31,	r31		C L0	nop
   119  	mulq	v1,	ulimb,	m1a		C U1
   120  
   121  	addq	r8,	acc0,	acc0		C U0
   122  	addq	m1b,	acc2,	acc1		C L1
   123  	umulh	v1,	ulimb,	m1b		C U1
   124  	and	r19,numb_mask,	r28		C L0	extract numb part
   125  
   126  	bis	r31,	r31,	r31		C L1	nop
   127  	srl	m2a,NAIL_BITS,	r8		C U0
   128  	lda	n,	-1(n)			C L0
   129  	mulq	v2,	ulimb,	m2a		C U1
   130  
   131  	addq	r8,	acc1,	acc1		C L0
   132  	bis	r31,	m2b,	acc2		C L1
   133  	umulh	v2,	ulimb,	m2b		C U1
   134  	srl	r19,NUMB_BITS,	r19		C U0	extract nail part
   135  
   136  	stq	r28,	-8(rp)			C L
   137  	bne	n,	L(top)			C U0
   138  
   139  L(end):	ldq	rlimb,	0(rp)
   140  	addq	r19,	acc0,	acc0		C	propagate nail
   141  	lda	rp,	8(rp)
   142  	srl	m0a,NAIL_BITS,	r8		C U0
   143  	addq	r8,	acc0,	r19
   144  	addq	m0b,	acc1,	acc0
   145  	addq	rlimb,	r19,	r19
   146  	srl	m1a,NAIL_BITS,	r8		C U0
   147  	addq	r8,	acc0,	acc0
   148  	addq	m1b,	acc2,	acc1
   149  	and	r19,numb_mask,	r28		C extract limb
   150  	srl	m2a,NAIL_BITS,	r8		C U0
   151  	addq	r8,	acc1,	acc1
   152  	bis	r31,	m2b,	acc2
   153  	srl	r19,NUMB_BITS,	r19		C extract nail
   154  	stq	r28,	-8(rp)
   155  
   156  	addq	r19,	acc0,	acc0		C propagate nail
   157  	and	acc0,numb_mask,	r28
   158  	stq	r28,	0(rp)
   159  	srl	acc0,NUMB_BITS,	r19
   160  	addq	r19,	acc1,	acc1
   161  
   162  	and	acc1,numb_mask,	r28
   163  	stq	r28,	8(rp)
   164  	srl	acc1,NUMB_BITS,	r19
   165  	addq	r19,	acc2,	m0a
   166  
   167  	ret	r31,	(r26),	1
   168  EPILOGUE()
   169  ASM_END()