github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/mod_34lsub1.asm (about)

     1  dnl Alpha mpn_mod_34lsub1.
     2  
     3  dnl  Copyright 2002 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C      cycles/limb
    34  C EV4:     4 (?)
    35  C EV5:     2.67
    36  C EV6:     1.67
    37  
    38  
    39  dnl  INPUT PARAMETERS
    40  dnl  up		r16
    41  dnl  n		r17
    42  
    43  define(`l0',`r18')
    44  define(`l1',`r19')
    45  define(`l2',`r20')
    46  define(`a0',`r21')
    47  define(`a1',`r22')
    48  define(`a2',`r23')
    49  define(`c0',`r24')
    50  define(`c1',`r5')
    51  define(`c2',`r6')
    52  
    53  ASM_START()
    54  PROLOGUE(mpn_mod_34lsub1)
    55  	bis	r31, r31, c0
    56  	bis	r31, r31, c1
    57  	bis	r31, r31, c2
    58  
    59  	lda	r17, -3(r17)
    60  	bge	r17, $L_3_or_more
    61  	bis	r31, r31, a0
    62  	bis	r31, r31, a1
    63  	bis	r31, r31, a2
    64  	br	r31, $L_012
    65  
    66  $L_3_or_more:
    67  	ldq	a0, 0(r16)
    68  	ldq	a1, 8(r16)
    69  	ldq	a2, 16(r16)
    70  	lda	r16, 24(r16)
    71  	lda	r17, -3(r17)
    72  	blt	r17, $L_012
    73  
    74  $L_6_or_more:
    75  	ldq	l0, 0(r16)
    76  	ldq	l1, 8(r16)
    77  	ldq	l2, 16(r16)
    78  	addq	l0, a0, a0
    79  
    80  	lda	r16, 24(r16)
    81  	lda	r17, -3(r17)
    82  	blt	r17, $L_end
    83  
    84  	ALIGN(16)
    85  C Main loop
    86  $L_9_or_more:
    87  $Loop:	cmpult	a0, l0, r0
    88  	ldq	l0, 0(r16)
    89  	addq	r0, c0, c0
    90  	addq	l1, a1, a1
    91  	cmpult	a1, l1, r0
    92  	ldq	l1, 8(r16)
    93  	addq	r0, c1, c1
    94  	addq	l2, a2, a2
    95  	cmpult	a2, l2, r0
    96  	ldq	l2, 16(r16)
    97  	addq	r0, c2, c2
    98  	addq	l0, a0, a0
    99  	lda	r16, 24(r16)
   100  	lda	r17, -3(r17)
   101  	bge	r17, $Loop
   102  
   103  $L_end:	cmpult	a0, l0, r0
   104  	addq	r0, c0, c0
   105  	addq	l1, a1, a1
   106  	cmpult	a1, l1, r0
   107  	addq	r0, c1, c1
   108  	addq	l2, a2, a2
   109  	cmpult	a2, l2, r0
   110  	addq	r0, c2, c2
   111  
   112  C Handle the last (n mod 3) limbs
   113  $L_012:	lda	r17, 2(r17)
   114  	blt	r17, $L_0
   115  	ldq	l0, 0(r16)
   116  	addq	l0, a0, a0
   117  	cmpult	a0, l0, r0
   118  	addq	r0, c0, c0
   119  	beq	r17, $L_0
   120  	ldq	l1, 8(r16)
   121  	addq	l1, a1, a1
   122  	cmpult	a1, l1, r0
   123  	addq	r0, c1, c1
   124  
   125  C Align and sum our 3 main accumulators and 3 carry accumulators
   126  $L_0:	srl	a0, 48, r2
   127  	srl	a1, 32, r4
   128  ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
   129  `	insll	a1, 2, r1',		C (a1 & 0xffffffff) << 16
   130  `	zapnot	a1, 15, r25
   131  	sll	r25, 16, r1')
   132  	zapnot	a0, 63, r0		C a0 & 0xffffffffffff
   133  	srl	a2, 16, a1
   134  ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
   135  `	inswl	a2, 4, r3',		C (a2 & 0xffff) << 32
   136  `	zapnot	a2, 3, r25
   137  	sll	r25, 32, r3')
   138  	addq	r1, r4, r1
   139  	addq	r0, r2, r0
   140  	srl	c0, 32, a2
   141  ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
   142  `	insll	c0, 2, r4',		C (c0 & 0xffffffff) << 16
   143  `	zapnot	c0, 15, r25
   144  	sll	r25, 16, r4')
   145  	addq	r0, r1, r0
   146  	addq	r3, a1, r3
   147  	addq	r0, r3, r0
   148  	srl	c1, 16, c0
   149  ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
   150  `	inswl	c1, 4, r2',		C (c1 & 0xffff) << 32
   151  `	zapnot	c1, 3, r25
   152  	sll	r25, 32, r2')
   153  	addq	r4, a2, r4
   154  C	srl	c2, 48, r3		C This will be 0 in practise
   155  	zapnot	c2, 63, r1		C r1 = c2 & 0xffffffffffff
   156  	addq	r0, r4, r0
   157  	addq	r2, c0, r2
   158  	addq	r0, r2, r0
   159  C	addq	r1, r3, r1
   160  	addq	r0, r1, r0
   161  
   162  	ret	r31, (r26), 1
   163  EPILOGUE(mpn_mod_34lsub1)
   164  ASM_END()