github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/add_n.asm (about)

     1  dnl  Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
     2  dnl  store sum in a third limb vector.
     3  
     4  dnl  Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  include(`../config.m4')
    33  
    34  C      cycles/limb
    35  C EV4:     ?
    36  C EV5:     4.75
    37  C EV6:     3
    38  
    39  dnl  INPUT PARAMETERS
    40  dnl  res_ptr	r16
    41  dnl  s1_ptr	r17
    42  dnl  s2_ptr	r18
    43  dnl  size	r19
    44  
    45  ASM_START()
    46  PROLOGUE(mpn_add_nc)
    47  	bis	r20,r31,r25
    48  	br	L(com)
    49  EPILOGUE()
    50  PROLOGUE(mpn_add_n)
    51  	bis	r31,r31,r25		C clear cy
    52  L(com):	subq	r19,4,r19		C decr loop cnt
    53  	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
    54  C Start software pipeline for 1st loop
    55  	ldq	r0,0(r18)
    56  	ldq	r4,0(r17)
    57  	ldq	r1,8(r18)
    58  	ldq	r5,8(r17)
    59  	addq	r17,32,r17		C update s1_ptr
    60  	addq	r0,r4,r28		C 1st main add
    61  	ldq	r2,16(r18)
    62  	addq	r25,r28,r20		C 1st carry add
    63  	ldq	r3,24(r18)
    64  	cmpult	r28,r4,r8		C compute cy from last add
    65  	ldq	r6,-16(r17)
    66  	cmpult	r20,r28,r25		C compute cy from last add
    67  	ldq	r7,-8(r17)
    68  	bis	r8,r25,r25		C combine cy from the two adds
    69  	subq	r19,4,r19		C decr loop cnt
    70  	addq	r1,r5,r28		C 2nd main add
    71  	addq	r18,32,r18		C update s2_ptr
    72  	addq	r28,r25,r21		C 2nd carry add
    73  	cmpult	r28,r5,r8		C compute cy from last add
    74  	blt	r19,$Lend1		C if less than 4 limbs remain, jump
    75  C 1st loop handles groups of 4 limbs in a software pipeline
    76  	ALIGN(16)
    77  $Loop:	cmpult	r21,r28,r25		C compute cy from last add
    78  	ldq	r0,0(r18)
    79  	bis	r8,r25,r25		C combine cy from the two adds
    80  	ldq	r1,8(r18)
    81  	addq	r2,r6,r28		C 3rd main add
    82  	ldq	r4,0(r17)
    83  	addq	r28,r25,r22		C 3rd carry add
    84  	ldq	r5,8(r17)
    85  	cmpult	r28,r6,r8		C compute cy from last add
    86  	cmpult	r22,r28,r25		C compute cy from last add
    87  	stq	r20,0(r16)
    88  	bis	r8,r25,r25		C combine cy from the two adds
    89  	stq	r21,8(r16)
    90  	addq	r3,r7,r28		C 4th main add
    91  	addq	r28,r25,r23		C 4th carry add
    92  	cmpult	r28,r7,r8		C compute cy from last add
    93  	cmpult	r23,r28,r25		C compute cy from last add
    94  		addq	r17,32,r17		C update s1_ptr
    95  	bis	r8,r25,r25		C combine cy from the two adds
    96  		addq	r16,32,r16		C update res_ptr
    97  	addq	r0,r4,r28		C 1st main add
    98  	ldq	r2,16(r18)
    99  	addq	r25,r28,r20		C 1st carry add
   100  	ldq	r3,24(r18)
   101  	cmpult	r28,r4,r8		C compute cy from last add
   102  	ldq	r6,-16(r17)
   103  	cmpult	r20,r28,r25		C compute cy from last add
   104  	ldq	r7,-8(r17)
   105  	bis	r8,r25,r25		C combine cy from the two adds
   106  	subq	r19,4,r19		C decr loop cnt
   107  	stq	r22,-16(r16)
   108  	addq	r1,r5,r28		C 2nd main add
   109  	stq	r23,-8(r16)
   110  	addq	r25,r28,r21		C 2nd carry add
   111  		addq	r18,32,r18		C update s2_ptr
   112  	cmpult	r28,r5,r8		C compute cy from last add
   113  	bge	r19,$Loop
   114  C Finish software pipeline for 1st loop
   115  $Lend1:	cmpult	r21,r28,r25		C compute cy from last add
   116  	bis	r8,r25,r25		C combine cy from the two adds
   117  	addq	r2,r6,r28		C 3rd main add
   118  	addq	r28,r25,r22		C 3rd carry add
   119  	cmpult	r28,r6,r8		C compute cy from last add
   120  	cmpult	r22,r28,r25		C compute cy from last add
   121  	stq	r20,0(r16)
   122  	bis	r8,r25,r25		C combine cy from the two adds
   123  	stq	r21,8(r16)
   124  	addq	r3,r7,r28		C 4th main add
   125  	addq	r28,r25,r23		C 4th carry add
   126  	cmpult	r28,r7,r8		C compute cy from last add
   127  	cmpult	r23,r28,r25		C compute cy from last add
   128  	bis	r8,r25,r25		C combine cy from the two adds
   129  	addq	r16,32,r16		C update res_ptr
   130  	stq	r22,-16(r16)
   131  	stq	r23,-8(r16)
   132  $Lend2:	addq	r19,4,r19		C restore loop cnt
   133  	beq	r19,$Lret
   134  C Start software pipeline for 2nd loop
   135  	ldq	r0,0(r18)
   136  	ldq	r4,0(r17)
   137  	subq	r19,1,r19
   138  	beq	r19,$Lend0
   139  C 2nd loop handles remaining 1-3 limbs
   140  	ALIGN(16)
   141  $Loop0:	addq	r0,r4,r28		C main add
   142  	ldq	r0,8(r18)
   143  	cmpult	r28,r4,r8		C compute cy from last add
   144  	ldq	r4,8(r17)
   145  	addq	r28,r25,r20		C carry add
   146  	addq	r18,8,r18
   147  	addq	r17,8,r17
   148  	stq	r20,0(r16)
   149  	cmpult	r20,r28,r25		C compute cy from last add
   150  	subq	r19,1,r19		C decr loop cnt
   151  	bis	r8,r25,r25		C combine cy from the two adds
   152  	addq	r16,8,r16
   153  	bne	r19,$Loop0
   154  $Lend0:	addq	r0,r4,r28		C main add
   155  	addq	r28,r25,r20		C carry add
   156  	cmpult	r28,r4,r8		C compute cy from last add
   157  	cmpult	r20,r28,r25		C compute cy from last add
   158  	stq	r20,0(r16)
   159  	bis	r8,r25,r25		C combine cy from the two adds
   160  
   161  $Lret:	bis	r25,r31,r0		C return cy
   162  	ret	r31,(r26),1
   163  EPILOGUE()
   164  ASM_END()