github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/lshift.asm (about)

     1  dnl  Alpha mpn_lshift -- Shift a number left.
     2  
     3  dnl  Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C      cycles/limb
    34  C EV4:     ?
    35  C EV5:     3.25
    36  C EV6:     1.75
    37  
    38  C  INPUT PARAMETERS
    39  C  rp	r16
    40  C  up	r17
    41  C  n	r18
    42  C  cnt	r19
    43  
    44  
    45  ASM_START()
    46  PROLOGUE(mpn_lshift)
    47  	s8addq	r18,r17,r17	C make r17 point at end of s1
    48  	ldq	r4,-8(r17)	C load first limb
    49  	subq	r31,r19,r20
    50  	s8addq	r18,r16,r16	C make r16 point at end of RES
    51  	subq	r18,1,r18
    52  	and	r18,4-1,r28	C number of limbs in first loop
    53  	srl	r4,r20,r0	C compute function result
    54  
    55  	beq	r28,L(L0)
    56  	subq	r18,r28,r18
    57  
    58  	ALIGN(8)
    59  L(top0):
    60  	ldq	r3,-16(r17)
    61  	subq	r16,8,r16
    62  	sll	r4,r19,r5
    63  	subq	r17,8,r17
    64  	subq	r28,1,r28
    65  	srl	r3,r20,r6
    66  	bis	r3,r3,r4
    67  	bis	r5,r6,r8
    68  	stq	r8,0(r16)
    69  	bne	r28,L(top0)
    70  
    71  L(L0):	sll	r4,r19,r24
    72  	beq	r18,L(end)
    73  C warm up phase 1
    74  	ldq	r1,-16(r17)
    75  	subq	r18,4,r18
    76  	ldq	r2,-24(r17)
    77  	ldq	r3,-32(r17)
    78  	ldq	r4,-40(r17)
    79  C warm up phase 2
    80  	srl	r1,r20,r7
    81  	sll	r1,r19,r21
    82  	srl	r2,r20,r8
    83  	beq	r18,L(end1)
    84  	ldq	r1,-48(r17)
    85  	sll	r2,r19,r22
    86  	ldq	r2,-56(r17)
    87  	srl	r3,r20,r5
    88  	bis	r7,r24,r7
    89  	sll	r3,r19,r23
    90  	bis	r8,r21,r8
    91  	srl	r4,r20,r6
    92  	ldq	r3,-64(r17)
    93  	sll	r4,r19,r24
    94  	ldq	r4,-72(r17)
    95  	subq	r18,4,r18
    96  	beq	r18,L(end2)
    97  	ALIGN(16)
    98  C main loop
    99  L(top):	stq	r7,-8(r16)
   100  	bis	r5,r22,r5
   101  	stq	r8,-16(r16)
   102  	bis	r6,r23,r6
   103  
   104  	srl	r1,r20,r7
   105  	subq	r18,4,r18
   106  	sll	r1,r19,r21
   107  	unop	C ldq	r31,-96(r17)
   108  
   109  	srl	r2,r20,r8
   110  	ldq	r1,-80(r17)
   111  	sll	r2,r19,r22
   112  	ldq	r2,-88(r17)
   113  
   114  	stq	r5,-24(r16)
   115  	bis	r7,r24,r7
   116  	stq	r6,-32(r16)
   117  	bis	r8,r21,r8
   118  
   119  	srl	r3,r20,r5
   120  	unop	C ldq	r31,-96(r17)
   121  	sll	r3,r19,r23
   122  	subq	r16,32,r16
   123  
   124  	srl	r4,r20,r6
   125  	ldq	r3,-96(r17)
   126  	sll	r4,r19,r24
   127  	ldq	r4,-104(r17)
   128  
   129  	subq	r17,32,r17
   130  	bne	r18,L(top)
   131  C cool down phase 2/1
   132  L(end2):
   133  	stq	r7,-8(r16)
   134  	bis	r5,r22,r5
   135  	stq	r8,-16(r16)
   136  	bis	r6,r23,r6
   137  	srl	r1,r20,r7
   138  	sll	r1,r19,r21
   139  	srl	r2,r20,r8
   140  	sll	r2,r19,r22
   141  	stq	r5,-24(r16)
   142  	bis	r7,r24,r7
   143  	stq	r6,-32(r16)
   144  	bis	r8,r21,r8
   145  	srl	r3,r20,r5
   146  	sll	r3,r19,r23
   147  	srl	r4,r20,r6
   148  	sll	r4,r19,r24
   149  C cool down phase 2/2
   150  	stq	r7,-40(r16)
   151  	bis	r5,r22,r5
   152  	stq	r8,-48(r16)
   153  	bis	r6,r23,r6
   154  	stq	r5,-56(r16)
   155  	stq	r6,-64(r16)
   156  C cool down phase 2/3
   157  	stq	r24,-72(r16)
   158  	ret	r31,(r26),1
   159  
   160  C cool down phase 1/1
   161  L(end1):
   162  	sll	r2,r19,r22
   163  	srl	r3,r20,r5
   164  	bis	r7,r24,r7
   165  	sll	r3,r19,r23
   166  	bis	r8,r21,r8
   167  	srl	r4,r20,r6
   168  	sll	r4,r19,r24
   169  C cool down phase 1/2
   170  	stq	r7,-8(r16)
   171  	bis	r5,r22,r5
   172  	stq	r8,-16(r16)
   173  	bis	r6,r23,r6
   174  	stq	r5,-24(r16)
   175  	stq	r6,-32(r16)
   176  	stq	r24,-40(r16)
   177  	ret	r31,(r26),1
   178  
   179  L(end):	stq	r24,-8(r16)
   180  	ret	r31,(r26),1
   181  EPILOGUE(mpn_lshift)
   182  ASM_END()