github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/ev6/nails/aors_n.asm (about)

     1  dnl  Alpha ev6 nails mpn_add_n and mpn_sub_n.
     2  
     3  dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  
    32  dnl  Runs at 2.5 cycles/limb.  It would be possible to reach 2.0 cycles/limb
    33  dnl  with 8-way unrolling.
    34  
    35  include(`../config.m4')
    36  
    37  dnl  INPUT PARAMETERS
    38  define(`rp',`r16')
    39  define(`up',`r17')
    40  define(`vp',`r18')
    41  define(`n',`r19')
    42  
    43  define(`rl0',`r0')
    44  define(`rl1',`r1')
    45  define(`rl2',`r2')
    46  define(`rl3',`r3')
    47  
    48  define(`ul0',`r4')
    49  define(`ul1',`r5')
    50  define(`ul2',`r6')
    51  define(`ul3',`r7')
    52  
    53  define(`vl0',`r22')
    54  define(`vl1',`r23')
    55  define(`vl2',`r24')
    56  define(`vl3',`r25')
    57  
    58  define(`numb_mask',`r21')
    59  
    60  define(`NAIL_BITS',`GMP_NAIL_BITS')
    61  define(`CYSH',`GMP_NUMB_BITS')
    62  
    63  dnl  This declaration is munged by configure
    64  NAILS_SUPPORT(1-63)
    65  
    66  ifdef(`OPERATION_add_n', `
    67  	define(`OP',        addq)
    68  	define(`CYSH',`GMP_NUMB_BITS')
    69  	define(`func',  mpn_add_n)')
    70  ifdef(`OPERATION_sub_n', `
    71  	define(`OP',        subq)
    72  	define(`CYSH',63)
    73  	define(`func',  mpn_sub_n)')
    74  
    75  MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
    76  
    77  ASM_START()
    78  PROLOGUE(func)
    79  	lda	numb_mask, -1(r31)
    80  	srl	numb_mask, NAIL_BITS, numb_mask
    81  	bis	r31,	r31,	r20
    82  
    83  	and	n,	3,	r25
    84  	lda	n,	-4(n)
    85  	beq	r25,	L(ge4)
    86  
    87  L(lp0):	ldq	ul0,	0(up)
    88  	lda	up,	8(up)
    89  	ldq	vl0,	0(vp)
    90  	lda	vp,	8(vp)
    91  	lda	rp,	8(rp)
    92  	lda	r25,	-1(r25)
    93  	OP	ul0,	vl0,	rl0
    94  	OP	rl0,	r20,	rl0
    95  	and	rl0, numb_mask,	r28
    96  	stq	r28,	-8(rp)
    97  	srl	rl0,	CYSH,	r20
    98  	bne	r25,	L(lp0)
    99  
   100  	blt	n,	L(ret)
   101  
   102  L(ge4):	ldq	ul0,	0(up)
   103  	ldq	vl0,	0(vp)
   104  	ldq	ul1,	8(up)
   105  	ldq	vl1,	8(vp)
   106  	ldq	ul2,	16(up)
   107  	ldq	vl2,	16(vp)
   108  	ldq	ul3,	24(up)
   109  	ldq	vl3,	24(vp)
   110  	lda	up,	32(up)
   111  	lda	vp,	32(vp)
   112  	lda	n,	-4(n)
   113  	bge	n,	L(ge8)
   114  
   115  	OP	ul0,	vl0,	rl0	C		main-add 0
   116  	OP	rl0,	r20,	rl0	C		cy-add 0
   117  	OP	ul1,	vl1,	rl1	C		main-add 1
   118  	srl	rl0,	CYSH,	r20	C		gen cy 0
   119  	OP	rl1,	r20,	rl1	C		cy-add 1
   120  	and	rl0,numb_mask,	r27
   121  	br	r31,	L(cj0)
   122  
   123  L(ge8):	OP	ul0,	vl0,	rl0	C		main-add 0
   124  	ldq	ul0,	0(up)
   125  	ldq	vl0,	0(vp)
   126  	OP	rl0,	r20,	rl0	C		cy-add 0
   127  	OP	ul1,	vl1,	rl1	C		main-add 1
   128  	srl	rl0,	CYSH,	r20	C		gen cy 0
   129  	ldq	ul1,	8(up)
   130  	ldq	vl1,	8(vp)
   131  	OP	rl1,	r20,	rl1	C		cy-add 1
   132  	and	rl0,numb_mask,	r27
   133  	OP	ul2,	vl2,	rl2	C		main-add 2
   134  	srl	rl1,	CYSH,	r20	C		gen cy 1
   135  	ldq	ul2,	16(up)
   136  	ldq	vl2,	16(vp)
   137  	OP	rl2,	r20,	rl2	C		cy-add 2
   138  	and	rl1,numb_mask,	r28
   139  	stq	r27,	0(rp)
   140  	OP	ul3,	vl3,	rl3	C		main-add 3
   141  	srl	rl2,	CYSH,	r20	C		gen cy 2
   142  	ldq	ul3,	24(up)
   143  	ldq	vl3,	24(vp)
   144  	OP	rl3,	r20,	rl3	C		cy-add 3
   145  	and	rl2,numb_mask,	r27
   146  	stq	r28,	8(rp)
   147  	lda	rp,	32(rp)
   148  	lda	up,	32(up)
   149  	lda	vp,	32(vp)
   150  	lda	n,	-4(n)
   151  	blt	n,	L(end)
   152  
   153  	ALIGN(32)
   154  L(top):	OP	ul0,	vl0,	rl0	C		main-add 0
   155  	srl	rl3,	CYSH,	r20	C		gen cy 3
   156  	ldq	ul0,	0(up)
   157  	ldq	vl0,	0(vp)
   158  
   159  	OP	rl0,	r20,	rl0	C		cy-add 0
   160  	and	rl3,numb_mask,	r28
   161  	stq	r27,	-16(rp)
   162  	bis	r31,	r31,	r31
   163  
   164  	OP	ul1,	vl1,	rl1	C		main-add 1
   165  	srl	rl0,	CYSH,	r20	C		gen cy 0
   166  	ldq	ul1,	8(up)
   167  	ldq	vl1,	8(vp)
   168  
   169  	OP	rl1,	r20,	rl1	C		cy-add 1
   170  	and	rl0,numb_mask,	r27
   171  	stq	r28,	-8(rp)
   172  	bis	r31,	r31,	r31
   173  
   174  	OP	ul2,	vl2,	rl2	C		main-add 2
   175  	srl	rl1,	CYSH,	r20	C		gen cy 1
   176  	ldq	ul2,	16(up)
   177  	ldq	vl2,	16(vp)
   178  
   179  	OP	rl2,	r20,	rl2	C		cy-add 2
   180  	and	rl1,numb_mask,	r28
   181  	stq	r27,	0(rp)
   182  	bis	r31,	r31,	r31
   183  
   184  	OP	ul3,	vl3,	rl3	C		main-add 3
   185  	srl	rl2,	CYSH,	r20	C		gen cy 2
   186  	ldq	ul3,	24(up)
   187  	ldq	vl3,	24(vp)
   188  
   189  	OP	rl3,	r20,	rl3	C		cy-add 3
   190  	and	rl2,numb_mask,	r27
   191  	stq	r28,	8(rp)
   192  	bis	r31,	r31,	r31
   193  
   194  	bis	r31,	r31,	r31
   195  	lda	n,	-4(n)
   196  	lda	up,	32(up)
   197  	lda	vp,	32(vp)
   198  
   199  	bis	r31,	r31,	r31
   200  	bis	r31,	r31,	r31
   201  	lda	rp,	32(rp)
   202  	bge	n,	L(top)
   203  
   204  L(end):	OP	ul0,	vl0,	rl0	C		main-add 0
   205  	srl	rl3,	CYSH,	r20	C		gen cy 3
   206  	OP	rl0,	r20,	rl0	C		cy-add 0
   207  	and	rl3,numb_mask,	r28
   208  	stq	r27,	-16(rp)
   209  	OP	ul1,	vl1,	rl1	C		main-add 1
   210  	srl	rl0,	CYSH,	r20	C		gen cy 0
   211  	OP	rl1,	r20,	rl1	C		cy-add 1
   212  	and	rl0,numb_mask,	r27
   213  	stq	r28,	-8(rp)
   214  L(cj0):	OP	ul2,	vl2,	rl2	C		main-add 2
   215  	srl	rl1,	CYSH,	r20	C		gen cy 1
   216  	OP	rl2,	r20,	rl2	C		cy-add 2
   217  	and	rl1,numb_mask,	r28
   218  	stq	r27,	0(rp)
   219  	OP	ul3,	vl3,	rl3	C		main-add 3
   220  	srl	rl2,	CYSH,	r20	C		gen cy 2
   221  	OP	rl3,	r20,	rl3	C		cy-add 3
   222  	and	rl2,numb_mask,	r27
   223  	stq	r28,	8(rp)
   224  
   225  	srl	rl3,	CYSH,	r20	C		gen cy 3
   226  	and	rl3,numb_mask,	r28
   227  	stq	r27,	16(rp)
   228  	stq	r28,	24(rp)
   229  
   230  L(ret):	and	r20,	1,	r0
   231  	ret	r31,	(r26),	1
   232  EPILOGUE()
   233  ASM_END()