github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/alpha/rshift.asm (about)

     1  dnl  Alpha mpn_rshift -- Shift a number right.
     2  
     3  dnl  Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C      cycles/limb
    34  C EV4:     ?
    35  C EV5:     3.25
    36  C EV6:     1.75
    37  
    38  C  INPUT PARAMETERS
    39  C  rp	r16
    40  C  up	r17
    41  C  n	r18
    42  C  cnt	r19
    43  
    44  
    45  ASM_START()
    46  PROLOGUE(mpn_rshift)
    47  	ldq	r4,0(r17)	C load first limb
    48  	subq	r31,r19,r20
    49  	subq	r18,1,r18
    50  	and	r18,4-1,r28	C number of limbs in first loop
    51  	sll	r4,r20,r0	C compute function result
    52  
    53  	beq	r28,L(L0)
    54  	subq	r18,r28,r18
    55  
    56  	ALIGN(8)
    57  L(top0):
    58  	ldq	r3,8(r17)
    59  	addq	r16,8,r16
    60  	srl	r4,r19,r5
    61  	addq	r17,8,r17
    62  	subq	r28,1,r28
    63  	sll	r3,r20,r6
    64  	bis	r3,r3,r4
    65  	bis	r5,r6,r8
    66  	stq	r8,-8(r16)
    67  	bne	r28,L(top0)
    68  
    69  L(L0):	srl	r4,r19,r24
    70  	beq	r18,L(end)
    71  C warm up phase 1
    72  	ldq	r1,8(r17)
    73  	subq	r18,4,r18
    74  	ldq	r2,16(r17)
    75  	ldq	r3,24(r17)
    76  	ldq	r4,32(r17)
    77  C warm up phase 2
    78  	sll	r1,r20,r7
    79  	srl	r1,r19,r21
    80  	sll	r2,r20,r8
    81  	beq	r18,L(end1)
    82  	ldq	r1,40(r17)
    83  	srl	r2,r19,r22
    84  	ldq	r2,48(r17)
    85  	sll	r3,r20,r5
    86  	bis	r7,r24,r7
    87  	srl	r3,r19,r23
    88  	bis	r8,r21,r8
    89  	sll	r4,r20,r6
    90  	ldq	r3,56(r17)
    91  	srl	r4,r19,r24
    92  	ldq	r4,64(r17)
    93  	subq	r18,4,r18
    94  	beq	r18,L(end2)
    95  	ALIGN(16)
    96  C main loop
    97  L(top):	stq	r7,0(r16)
    98  	bis	r5,r22,r5
    99  	stq	r8,8(r16)
   100  	bis	r6,r23,r6
   101  
   102  	sll	r1,r20,r7
   103  	subq	r18,4,r18
   104  	srl	r1,r19,r21
   105  	unop	C ldq	r31,-96(r17)
   106  
   107  	sll	r2,r20,r8
   108  	ldq	r1,72(r17)
   109  	srl	r2,r19,r22
   110  	ldq	r2,80(r17)
   111  
   112  	stq	r5,16(r16)
   113  	bis	r7,r24,r7
   114  	stq	r6,24(r16)
   115  	bis	r8,r21,r8
   116  
   117  	sll	r3,r20,r5
   118  	unop	C ldq	r31,-96(r17)
   119  	srl	r3,r19,r23
   120  	addq	r16,32,r16
   121  
   122  	sll	r4,r20,r6
   123  	ldq	r3,88(r17)
   124  	srl	r4,r19,r24
   125  	ldq	r4,96(r17)
   126  
   127  	addq	r17,32,r17
   128  	bne	r18,L(top)
   129  C cool down phase 2/1
   130  L(end2):
   131  	stq	r7,0(r16)
   132  	bis	r5,r22,r5
   133  	stq	r8,8(r16)
   134  	bis	r6,r23,r6
   135  	sll	r1,r20,r7
   136  	srl	r1,r19,r21
   137  	sll	r2,r20,r8
   138  	srl	r2,r19,r22
   139  	stq	r5,16(r16)
   140  	bis	r7,r24,r7
   141  	stq	r6,24(r16)
   142  	bis	r8,r21,r8
   143  	sll	r3,r20,r5
   144  	srl	r3,r19,r23
   145  	sll	r4,r20,r6
   146  	srl	r4,r19,r24
   147  C cool down phase 2/2
   148  	stq	r7,32(r16)
   149  	bis	r5,r22,r5
   150  	stq	r8,40(r16)
   151  	bis	r6,r23,r6
   152  	stq	r5,48(r16)
   153  	stq	r6,56(r16)
   154  C cool down phase 2/3
   155  	stq	r24,64(r16)
   156  	ret	r31,(r26),1
   157  
   158  C cool down phase 1/1
   159  L(end1):
   160  	srl	r2,r19,r22
   161  	sll	r3,r20,r5
   162  	bis	r7,r24,r7
   163  	srl	r3,r19,r23
   164  	bis	r8,r21,r8
   165  	sll	r4,r20,r6
   166  	srl	r4,r19,r24
   167  C cool down phase 1/2
   168  	stq	r7,0(r16)
   169  	bis	r5,r22,r5
   170  	stq	r8,8(r16)
   171  	bis	r6,r23,r6
   172  	stq	r5,16(r16)
   173  	stq	r6,24(r16)
   174  	stq	r24,32(r16)
   175  	ret	r31,(r26),1
   176  
   177  L(end):	stq	r24,0(r16)
   178  	ret	r31,(r26),1
   179  EPILOGUE(mpn_rshift)
   180  ASM_END()