github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/gcd_1.asm (about)

     1  dnl  SPARC64 mpn_gcd_1.
     2  
     3  dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for SPARC by Torbjörn
     4  dnl  Granlund.
     5  
     6  dnl  Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
     7  
     8  dnl  This file is part of the GNU MP Library.
     9  dnl
    10  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    11  dnl  it under the terms of either:
    12  dnl
    13  dnl    * the GNU Lesser General Public License as published by the Free
    14  dnl      Software Foundation; either version 3 of the License, or (at your
    15  dnl      option) any later version.
    16  dnl
    17  dnl  or
    18  dnl
    19  dnl    * the GNU General Public License as published by the Free Software
    20  dnl      Foundation; either version 2 of the License, or (at your option) any
    21  dnl      later version.
    22  dnl
    23  dnl  or both in parallel, as here.
    24  dnl
    25  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    26  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    27  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    28  dnl  for more details.
    29  dnl
    30  dnl  You should have received copies of the GNU General Public License and the
    31  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    32  dnl  see https://www.gnu.org/licenses/.
    33  
    34  include(`../config.m4')
    35  
    36  
    37  C		  cycles/bit (approx)
    38  C UltraSPARC 1&2:	 5.1
    39  C UltraSPARC 3:		 5.0
    40  C UltraSPARC T1:	11.4
    41  C UltraSPARC T3:	10
    42  C UltraSPARC T4:	 6
    43  C Numbers measured with: speed -CD -s32-64 -t32 mpn_gcd_1
    44  
    45  C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
    46  
    47  deflit(MAXSHIFT, 7)
    48  deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
    49  
    50  	RODATA
    51  	TYPE(ctz_table,object)
    52  ctz_table:
    53  	.byte	MAXSHIFT
    54  forloop(i,1,MASK,
    55  `	.byte	m4_count_trailing_zeros(i)
    56  ')
    57  	SIZE(ctz_table,.-ctz_table)
    58  
    59  C Threshold of when to call bmod when U is one limb.  Should be about
    60  C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
    61  define(`BMOD_THRES_LOG2', 14)
    62  
    63  C INPUT PARAMETERS
    64  define(`up',    `%i0')
    65  define(`n',     `%i1')
    66  define(`v0',    `%i2')
    67  
    68  
    69  ASM_START()
    70  	REGISTER(%g2,#scratch)
    71  	REGISTER(%g3,#scratch)
    72  PROLOGUE(mpn_gcd_1)
    73  	save	%sp, -192, %sp
    74  	ldx	[up+0], %g1		C U low limb
    75  	mov	-1, %i4
    76  	or	v0, %g1, %g2		C x | y
    77  
    78  L(twos):
    79  	inc	%i4
    80  	andcc	%g2, 1, %g0
    81  	bz,a	%xcc, L(twos)
    82  	 srlx	%g2, 1, %g2
    83  
    84  L(divide_strip_y):
    85  	andcc	v0, 1, %g0
    86  	bz,a	%xcc, L(divide_strip_y)
    87  	 srlx	v0, 1, v0
    88  
    89  	cmp	n, 1			C if n > 1 we need
    90  	bnz	%xcc, L(bmod)		C to call bmod_1
    91  	 nop
    92  
    93  C Both U and V are single limbs, reduce with bmod if u0 >> v0.
    94  	srlx	%g1, BMOD_THRES_LOG2, %g2
    95  	cmp	%g2, v0
    96  	bleu	%xcc, L(noreduce)
    97  	 mov	%g1, %o0
    98  
    99  L(bmod):
   100  	mov	up, %o0
   101  	mov	n, %o1
   102  	mov	v0, %o2
   103  	call	mpn_modexact_1c_odd
   104  	 mov	0, %o3
   105  
   106  L(noreduce):
   107  
   108  	LEA64(ctz_table, i5, g4)
   109  
   110  	cmp	%o0, 0
   111  	bnz	%xcc, L(mid)
   112  	 and	%o0, MASK, %g3		C
   113  
   114  	return	%i7+8
   115  	 sllx	%o2, %o4, %o0		C CAUTION: v0 alias for o2
   116  
   117  	ALIGN(16)
   118  L(top):	movcc	%xcc, %l4, v0		C v = min(u,v)
   119  	movcc	%xcc, %l2, %o0		C u = |v - u]
   120  L(mid):	ldub	[%i5+%g3], %g5		C
   121  	brz,a,pn %g3, L(shift_alot)	C
   122  	 srlx	%o0, MAXSHIFT, %o0
   123  	srlx	%o0, %g5, %l4		C new u, odd
   124  	subcc	v0, %l4, %l2		C v - u, set flags for branch and movcc
   125  	sub	%l4, v0, %o0		C u - v
   126  	bnz,pt	%xcc, L(top)		C
   127  	 and	%l2, MASK, %g3		C extract low MAXSHIFT bits from (v-u)
   128  
   129  	return	%i7+8
   130  	 sllx	%o2, %o4, %o0		C CAUTION: v0 alias for o2
   131  
   132  L(shift_alot):
   133  	b	L(mid)
   134  	 and	%o0, MASK, %g3		C
   135  EPILOGUE()