github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/gcd_1.asm (about)

     1  dnl  PowerPC-64 mpn_gcd_1.
     2  
     3  dnl  Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C		    cycles/bit (approx)
    34  C POWER3/PPC630		 ?
    35  C POWER4/PPC970		 8.5
    36  C POWER5		 ?
    37  C POWER6		10.1
    38  C POWER7		 9.4
    39  C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
    40  
    41  C INPUT PARAMETERS
    42  define(`up',    `r3')
    43  define(`n',     `r4')
    44  define(`v0',    `r5')
    45  
    46  ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',,
    47    `define(`BMOD_1_TO_MOD_1_THRESHOLD',30)')
    48  
    49  EXTERN_FUNC(mpn_mod_1)
    50  EXTERN_FUNC(mpn_modexact_1c_odd)
    51  
    52  ASM_START()
    53  PROLOGUE(mpn_gcd_1,toc)
    54  	mflr	r0
    55  	std	r30, -16(r1)
    56  	std	r31, -8(r1)
    57  	std	r0, 16(r1)
    58  	stdu	r1, -128(r1)
    59  
    60  	ld	r7, 0(up)		C U low limb
    61  	or	r0, r5, r7		C x | y
    62  
    63  	neg	r6, r0
    64  	and	r6, r6, r0
    65  	cntlzd	r31, r6			C common twos
    66  	subfic	r31, r31, 63
    67  
    68  	neg	r6, r5
    69  	and	r6, r6, r5
    70  	cntlzd	r8, r6
    71  	subfic	r8, r8, 63
    72  	srd	r5, r5, r8
    73  	mr	r30, r5			C v0 saved
    74  
    75  	cmpdi	r4, BMOD_1_TO_MOD_1_THRESHOLD
    76  	blt	L(bmod)
    77  	CALL(	mpn_mod_1)
    78  	b	L(reduced)
    79  L(bmod):
    80  	li	r6, 0
    81  	CALL(	mpn_modexact_1c_odd)
    82  L(reduced):
    83  
    84  define(`mask', `r0')dnl
    85  define(`a1',   `r4')dnl
    86  define(`a2',   `r5')dnl
    87  define(`d1',   `r6')dnl
    88  define(`d2',   `r7')dnl
    89  define(`cnt',  `r9')dnl
    90  
    91  	neg.	r6, r3
    92  	and	r6, r6, r3
    93  	cntlzd	cnt, r6
    94  	subfic	cnt, cnt, 63
    95  	li	r12, 63
    96  	bne	L(mid)
    97  	b	L(end)
    98  
    99  	ALIGN(16)
   100  L(top):
   101  	and	a1, r10, mask		C d - a
   102  	andc	a2, r11,  mask		C a - d
   103  	and	d1, r3, mask		C a
   104  	andc	d2, r30, mask		C d
   105  	or	r3, a1, a2		C new a
   106  	subf	cnt, cnt, r12
   107  	or	r30, d1, d2		C new d
   108  L(mid):	srd	r3, r3, cnt
   109  	sub.	r10, r30, r3		C r10 = d - a
   110  	subc	r11, r3, r30		C r11 = a - d
   111  	neg	r8, r10
   112  	and	r8, r8, r10
   113  	subfe	mask, mask, mask
   114  	cntlzd	cnt, r8
   115  	bne	L(top)
   116  
   117  L(end):	sld	r3, r30, r31
   118  
   119  	addi	r1, r1, 128
   120  	ld	r0, 16(r1)
   121  	ld	r30, -16(r1)
   122  	ld	r31, -8(r1)
   123  	mtlr	r0
   124  	blr
   125  EPILOGUE()