github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/ev6/nails/addmul_4.asm (about)

     1  dnl  Alpha ev6 nails mpn_addmul_4.
     2  
     3  dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C Runs at 2.5 cycles/limb.
    34  
    35  C We should go for 2-way unrolling over 17 cycles, for 2.125 c/l corresponding
    36  C to 3.24 insn/cycle.
    37  
    38  
    39  C  INPUT PARAMETERS
    40  define(`rp',`r16')
    41  define(`up',`r17')
    42  define(`n',`r18')
    43  define(`vp',`r19')
    44  
    45  C  Useful register aliases
    46  define(`numb_mask',`r24')
    47  define(`ulimb',`r25')
    48  define(`rlimb',`r27')
    49  
    50  define(`m0a',`r0')
    51  define(`m0b',`r1')
    52  define(`m1a',`r2')
    53  define(`m1b',`r3')
    54  define(`m2a',`r20')
    55  define(`m2b',`r21')
    56  define(`m3a',`r12')
    57  define(`m3b',`r13')
    58  
    59  define(`acc0',`r4')
    60  define(`acc1',`r5')
    61  define(`acc2',`r22')
    62  define(`acc3',`r14')
    63  
    64  define(`v0',`r6')
    65  define(`v1',`r7')
    66  define(`v2',`r23')
    67  define(`v3',`r15')
    68  
    69  C Used for temps: r8 r19 r28
    70  
    71  define(`NAIL_BITS',`GMP_NAIL_BITS')
    72  define(`NUMB_BITS',`GMP_NUMB_BITS')
    73  
    74  C  This declaration is munged by configure
    75  NAILS_SUPPORT(4-63)
    76  
    77  ASM_START()
    78  PROLOGUE(mpn_addmul_4)
    79  	lda	r30,	-240(r30)
    80  	stq	r12,	32(r30)
    81  	stq	r13,	40(r30)
    82  	stq	r14,	48(r30)
    83  	stq	r15,	56(r30)
    84  
    85  	lda	numb_mask,-1(r31)
    86  	srl	numb_mask,NAIL_BITS,numb_mask
    87  
    88  	ldq	v0,	0(vp)
    89  	ldq	v1,	8(vp)
    90  	ldq	v2,	16(vp)
    91  	ldq	v3,	24(vp)
    92  
    93  	bis	r31,	r31,	acc0		C	zero acc0
    94  	sll	v0,NAIL_BITS,	v0
    95  	bis	r31,	r31,	acc1		C	zero acc1
    96  	sll	v1,NAIL_BITS,	v1
    97  	bis	r31,	r31,	acc2		C	zero acc2
    98  	sll	v2,NAIL_BITS,	v2
    99  	bis	r31,	r31,	acc3		C	zero acc3
   100  	sll	v3,NAIL_BITS,	v3
   101  	bis	r31,	r31,	r19
   102  
   103  	ldq	ulimb,	0(up)
   104  	lda	up,	8(up)
   105  	mulq	v0,	ulimb,	m0a		C U1
   106  	umulh	v0,	ulimb,	m0b		C U1
   107  	mulq	v1,	ulimb,	m1a		C U1
   108  	umulh	v1,	ulimb,	m1b		C U1
   109  	lda	n,	-1(n)
   110  	mulq	v2,	ulimb,	m2a		C U1
   111  	umulh	v2,	ulimb,	m2b		C U1
   112  	mulq	v3,	ulimb,	m3a		C U1
   113  	umulh	v3,	ulimb,	m3b		C U1
   114  	beq	n,	L(end)			C U0
   115  
   116  	ALIGN(16)
   117  L(top):	bis	r31,	r31,	r31		C U1	nop
   118  	ldq	rlimb,	0(rp)			C L0
   119  	ldq	ulimb,	0(up)			C L1
   120  	addq	r19,	acc0,	acc0		C U0	propagate nail
   121  
   122  	bis	r31,	r31,	r31		C L0	nop
   123  	bis	r31,	r31,	r31		C U1	nop
   124  	bis	r31,	r31,	r31		C L1	nop
   125  	bis	r31,	r31,	r31		C U0	nop
   126  
   127  	lda	rp,	8(rp)			C L0
   128  	srl	m0a,NAIL_BITS,	r8		C U0
   129  	lda	up,	8(up)			C L1
   130  	mulq	v0,	ulimb,	m0a		C U1
   131  
   132  	addq	r8,	acc0,	r19		C U0
   133  	addq	m0b,	acc1,	acc0		C L0
   134  	umulh	v0,	ulimb,	m0b		C U1
   135  	bis	r31,	r31,	r31		C L1	nop
   136  
   137  	addq	rlimb,	r19,	r19		C L0
   138  	srl	m1a,NAIL_BITS,	r8		C U0
   139  	bis	r31,	r31,	r31		C L1	nop
   140  	mulq	v1,	ulimb,	m1a		C U1
   141  
   142  	addq	r8,	acc0,	acc0		C U0
   143  	addq	m1b,	acc2,	acc1		C L0
   144  	umulh	v1,	ulimb,	m1b		C U1
   145  	and	r19,numb_mask,	r28		C L1	extract numb part
   146  
   147  	bis	r31,	r31,	r31		C L0	nop
   148  	srl	m2a,NAIL_BITS,	r8		C U0
   149  	lda	n,	-1(n)			C L1
   150  	mulq	v2,	ulimb,	m2a		C U1
   151  
   152  	addq	r8,	acc1,	acc1		C L1
   153  	addq	m2b,	acc3,	acc2		C L0
   154  	umulh	v2,	ulimb,	m2b		C U1
   155  	srl	r19,NUMB_BITS,	r19		C U0	extract nail part
   156  
   157  	bis	r31,	r31,	r31		C L0	nop
   158  	srl	m3a,NAIL_BITS,	r8		C U0
   159  	stq	r28,	-8(rp)			C L1
   160  	mulq	v3,	ulimb,	m3a		C U1
   161  
   162  	addq	r8,	acc2,	acc2		C L0
   163  	bis	r31,	m3b,	acc3		C L1
   164  	umulh	v3,	ulimb,	m3b		C U1
   165  	bne	n,	L(top)			C U0
   166  
   167  L(end):	ldq	rlimb,	0(rp)
   168  	addq	r19,	acc0,	acc0		C	propagate nail
   169  	lda	rp,	8(rp)			C FIXME: DELETE
   170  	srl	m0a,NAIL_BITS,	r8		C U0
   171  	addq	r8,	acc0,	r19
   172  	addq	m0b,	acc1,	acc0
   173  	addq	rlimb,	r19,	r19
   174  	srl	m1a,NAIL_BITS,	r8		C U0
   175  	addq	r8,	acc0,	acc0
   176  	addq	m1b,	acc2,	acc1
   177  	and	r19,numb_mask,	r28		C extract limb
   178  	srl	m2a,NAIL_BITS,	r8		C U0
   179  	addq	r8,	acc1,	acc1
   180  	addq	m2b,	acc3,	acc2
   181  	srl	r19,NUMB_BITS,	r19		C extract nail
   182  	srl	m3a,NAIL_BITS,	r8		C U0
   183  	stq	r28,	-8(rp)
   184  	addq	r8,	acc2,	acc2
   185  	bis	r31,	m3b,	acc3
   186  
   187  	addq	r19,	acc0,	acc0		C propagate nail
   188  	and	acc0,numb_mask,	r28
   189  	stq	r28,	0(rp)
   190  	srl	acc0,NUMB_BITS,	r19
   191  	addq	r19,	acc1,	acc1
   192  
   193  	and	acc1,numb_mask,	r28
   194  	stq	r28,	8(rp)
   195  	srl	acc1,NUMB_BITS,	r19
   196  	addq	r19,	acc2,	acc2
   197  
   198  	and	acc2,numb_mask,	r28
   199  	stq	r28,	16(rp)
   200  	srl	acc2,NUMB_BITS,	r19
   201  	addq	r19,	acc3,	r0
   202  
   203  	ldq	r12,	32(r30)
   204  	ldq	r13,	40(r30)
   205  	ldq	r14,	48(r30)
   206  	ldq	r15,	56(r30)
   207  	lda	r30,	240(r30)
   208  	ret	r31,	(r26),	1
   209  EPILOGUE()
   210  ASM_END()