github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/sub_n.asm (about)

     1  dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
     2  dnl  and store difference in a third limb vector.
     3  
     4  dnl  Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  include(`../config.m4')
    33  
    34  C      cycles/limb
    35  C EV4:     ?
    36  C EV5:     4.75
    37  C EV6:     3
    38  
    39  dnl  INPUT PARAMETERS
    40  dnl  res_ptr	r16
    41  dnl  s1_ptr	r17
    42  dnl  s2_ptr	r18
    43  dnl  size	r19
    44  
    45  ASM_START()
    46  PROLOGUE(mpn_sub_nc)
    47  	bis	r31,r20,r25
    48  	br	L(com)
    49  EPILOGUE()
    50  PROLOGUE(mpn_sub_n)
    51  	bis	r31,r31,r25		C clear cy
    52  L(com):	subq	r19,4,r19		C decr loop cnt
    53  	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
    54  C Start software pipeline for 1st loop
    55  	ldq	r0,0(r18)
    56  	ldq	r4,0(r17)
    57  	ldq	r1,8(r18)
    58  	ldq	r5,8(r17)
    59  	addq	r17,32,r17		C update s1_ptr
    60  	subq	r4,r0,r28		C 1st main subtract
    61  	ldq	r2,16(r18)
    62  	subq	r28,r25,r20		C 1st carry subtract
    63  	ldq	r3,24(r18)
    64  	cmpult	r4,r0,r8		C compute cy from last subtract
    65  	ldq	r6,-16(r17)
    66  	cmpult	r28,r25,r25		C compute cy from last subtract
    67  	ldq	r7,-8(r17)
    68  	bis	r8,r25,r25		C combine cy from the two subtracts
    69  	subq	r19,4,r19		C decr loop cnt
    70  	subq	r5,r1,r28		C 2nd main subtract
    71  	addq	r18,32,r18		C update s2_ptr
    72  	subq	r28,r25,r21		C 2nd carry subtract
    73  	cmpult	r5,r1,r8		C compute cy from last subtract
    74  	blt	r19,$Lend1		C if less than 4 limbs remain, jump
    75  C 1st loop handles groups of 4 limbs in a software pipeline
    76  	ALIGN(16)
    77  $Loop:	cmpult	r28,r25,r25		C compute cy from last subtract
    78  	ldq	r0,0(r18)
    79  	bis	r8,r25,r25		C combine cy from the two subtracts
    80  	ldq	r1,8(r18)
    81  	subq	r6,r2,r28		C 3rd main subtract
    82  	ldq	r4,0(r17)
    83  	subq	r28,r25,r22		C 3rd carry subtract
    84  	ldq	r5,8(r17)
    85  	cmpult	r6,r2,r8		C compute cy from last subtract
    86  	cmpult	r28,r25,r25		C compute cy from last subtract
    87  	stq	r20,0(r16)
    88  	bis	r8,r25,r25		C combine cy from the two subtracts
    89  	stq	r21,8(r16)
    90  	subq	r7,r3,r28		C 4th main subtract
    91  	subq	r28,r25,r23		C 4th carry subtract
    92  	cmpult	r7,r3,r8		C compute cy from last subtract
    93  	cmpult	r28,r25,r25		C compute cy from last subtract
    94  		addq	r17,32,r17		C update s1_ptr
    95  	bis	r8,r25,r25		C combine cy from the two subtracts
    96  		addq	r16,32,r16		C update res_ptr
    97  	subq	r4,r0,r28		C 1st main subtract
    98  	ldq	r2,16(r18)
    99  	subq	r28,r25,r20		C 1st carry subtract
   100  	ldq	r3,24(r18)
   101  	cmpult	r4,r0,r8		C compute cy from last subtract
   102  	ldq	r6,-16(r17)
   103  	cmpult	r28,r25,r25		C compute cy from last subtract
   104  	ldq	r7,-8(r17)
   105  	bis	r8,r25,r25		C combine cy from the two subtracts
   106  	subq	r19,4,r19		C decr loop cnt
   107  	stq	r22,-16(r16)
   108  	subq	r5,r1,r28		C 2nd main subtract
   109  	stq	r23,-8(r16)
   110  	subq	r28,r25,r21		C 2nd carry subtract
   111  		addq	r18,32,r18		C update s2_ptr
   112  	cmpult	r5,r1,r8		C compute cy from last subtract
   113  	bge	r19,$Loop
   114  C Finish software pipeline for 1st loop
   115  $Lend1:	cmpult	r28,r25,r25		C compute cy from last subtract
   116  	bis	r8,r25,r25		C combine cy from the two subtracts
   117  	subq	r6,r2,r28		C cy add
   118  	subq	r28,r25,r22		C 3rd main subtract
   119  	cmpult	r6,r2,r8		C compute cy from last subtract
   120  	cmpult	r28,r25,r25		C compute cy from last subtract
   121  	stq	r20,0(r16)
   122  	bis	r8,r25,r25		C combine cy from the two subtracts
   123  	stq	r21,8(r16)
   124  	subq	r7,r3,r28		C cy add
   125  	subq	r28,r25,r23		C 4th main subtract
   126  	cmpult	r7,r3,r8		C compute cy from last subtract
   127  	cmpult	r28,r25,r25		C compute cy from last subtract
   128  	bis	r8,r25,r25		C combine cy from the two subtracts
   129  	addq	r16,32,r16		C update res_ptr
   130  	stq	r22,-16(r16)
   131  	stq	r23,-8(r16)
   132  $Lend2:	addq	r19,4,r19		C restore loop cnt
   133  	beq	r19,$Lret
   134  C Start software pipeline for 2nd loop
   135  	ldq	r0,0(r18)
   136  	ldq	r4,0(r17)
   137  	subq	r19,1,r19
   138  	beq	r19,$Lend0
   139  C 2nd loop handles remaining 1-3 limbs
   140  	ALIGN(16)
   141  $Loop0:	subq	r4,r0,r28		C main subtract
   142  	cmpult	r4,r0,r8		C compute cy from last subtract
   143  	ldq	r0,8(r18)
   144  	ldq	r4,8(r17)
   145  	subq	r28,r25,r20		C carry subtract
   146  	addq	r18,8,r18
   147  	addq	r17,8,r17
   148  	stq	r20,0(r16)
   149  	cmpult	r28,r25,r25		C compute cy from last subtract
   150  	subq	r19,1,r19		C decr loop cnt
   151  	bis	r8,r25,r25		C combine cy from the two subtracts
   152  	addq	r16,8,r16
   153  	bne	r19,$Loop0
   154  $Lend0:	subq	r4,r0,r28		C main subtract
   155  	subq	r28,r25,r20		C carry subtract
   156  	cmpult	r4,r0,r8		C compute cy from last subtract
   157  	cmpult	r28,r25,r25		C compute cy from last subtract
   158  	stq	r20,0(r16)
   159  	bis	r8,r25,r25		C combine cy from the two subtracts
   160  
   161  $Lret:	bis	r25,r31,r0		C return cy
   162  	ret	r31,(r26),1
   163  EPILOGUE()
   164  ASM_END()