github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/p7/gcd_1.asm (about)

     1  dnl  PowerPC-64 mpn_gcd_1.
     2  
     3  dnl  Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C		    cycles/bit (approx)
    34  C POWER3/PPC630		 -
    35  C POWER4/PPC970		 -
    36  C POWER5		 -
    37  C POWER6		 -
    38  C POWER7		 7.6
    39  C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
    40  
    41  C INPUT PARAMETERS
    42  define(`up',    `r3')
    43  define(`n',     `r4')
    44  define(`v0',    `r5')
    45  
    46  EXTERN_FUNC(mpn_mod_1)
    47  EXTERN_FUNC(mpn_modexact_1c_odd)
    48  
    49  ASM_START()
    50  PROLOGUE(mpn_gcd_1,toc)
    51  	mflr	r0
    52  	std	r30, -16(r1)
    53  	std	r31, -8(r1)
    54  	std	r0, 16(r1)
    55  	stdu	r1, -128(r1)
    56  
    57  	ld	r7, 0(up)		C U low limb
    58  	or	r0, r5, r7		C x | y
    59  
    60  	neg	r6, r0
    61  	and	r6, r6, r0
    62  	cntlzd	r31, r6			C common twos
    63  	subfic	r31, r31, 63
    64  
    65  	neg	r6, r5
    66  	and	r6, r6, r5
    67  	cntlzd	r8, r6
    68  	subfic	r8, r8, 63
    69  	srd	r5, r5, r8
    70  	mr	r30, r5			C v0 saved
    71  
    72  	cmpdi	r4, BMOD_1_TO_MOD_1_THRESHOLD
    73  	blt	L(bmod)
    74  	CALL(	mpn_mod_1)
    75  	b	L(reduced)
    76  L(bmod):
    77  	li	r6, 0
    78  	CALL(	mpn_modexact_1c_odd)
    79  L(reduced):
    80  
    81  define(`cnt',  `r9')dnl
    82  
    83  	neg.	r6, r3
    84  	and	r6, r6, r3
    85  	cntlzd	cnt, r6
    86  	li	r12, 63
    87  	bne	L(mid)
    88  	b	L(end)
    89  
    90  	ALIGN(16)
    91  L(top):	isel	r30, r3, r30, 29	C y = min(x,y)
    92  	isel	r3, r10, r11, 29	C x = |y - x|
    93  L(mid):	subf	cnt, cnt, r12		C cnt = 63-cnt
    94  	srd	r3, r3, cnt
    95  	subf	r10, r3, r30		C r10 = y - x
    96  	subf	r11, r30, r3		C r11 = x - y
    97  	cmpld	cr7, r30, r3
    98  	and	r8, r11, r10		C isolate lsb
    99  	cntlzd	cnt, r8
   100  	bne	cr7, L(top)
   101  
   102  L(end):	sld	r3, r30, r31
   103  
   104  	addi	r1, r1, 128
   105  	ld	r0, 16(r1)
   106  	ld	r30, -16(r1)
   107  	ld	r31, -8(r1)
   108  	mtlr	r0
   109  	blr
   110  EPILOGUE()