github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/ultrasparct3/mul_1.asm (about)

     1  dnl  SPARC v9 mpn_mul_1 for T3/T4/T5.
     2  
     3  dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
     4  
     5  dnl  Copyright 2013 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C		   cycles/limb
    36  C UltraSPARC T3:	23
    37  C UltraSPARC T4:	 3
    38  
    39  C INPUT PARAMETERS
    40  define(`rp', `%i0')
    41  define(`up', `%i1')
    42  define(`n',  `%i2')
    43  define(`v0', `%i3')
    44  
    45  ASM_START()
    46  	REGISTER(%g2,#scratch)
    47  	REGISTER(%g3,#scratch)
    48  PROLOGUE(mpn_mul_1)
    49  	save	%sp, -176, %sp
    50  
    51  	and	n, 3, %g5
    52  	add	n, -4, n
    53  	brz	%g5, L(b0)
    54  	 cmp	%g5, 2
    55  	bcs	%xcc, L(b1)
    56  	 nop
    57  	be	%xcc, L(b2)
    58  	 nop
    59  
    60  L(b3):	addcc	%g0, %g0, %i5
    61  	ldx	[up+0], %l0
    62  	ldx	[up+8], %l1
    63  	ldx	[up+16], %l2
    64  	mulx	%l0, v0, %o0
    65  	umulxhi(%l0, v0, %o1)
    66  	brgz	n, L(gt3)
    67  	 add	rp, -8, rp
    68  	mulx	%l1, v0, %o2
    69  	umulxhi(%l1, v0, %o3)
    70  	b	L(wd3)
    71  	 nop
    72  L(gt3):	ldx	[up+24], %l3
    73  	mulx	%l1, v0, %o2
    74  	umulxhi(%l1, v0, %o3)
    75  	add	up, 24, up
    76  	b	L(lo3)
    77  	 add	n, -3, n
    78  
    79  L(b2):	addcc	%g0, %g0, %o1
    80  	ldx	[up+0], %l1
    81  	ldx	[up+8], %l2
    82  	brgz	n, L(gt2)
    83  	 add	rp, -16, rp
    84  	mulx	%l1, v0, %o2
    85  	umulxhi(%l1, v0, %o3)
    86  	mulx	%l2, v0, %o4
    87  	umulxhi(%l2, v0, %o5)
    88  	b	L(wd2)
    89  	 nop
    90  L(gt2):	ldx	[up+16], %l3
    91  	mulx	%l1, v0, %o2
    92  	umulxhi(%l1, v0, %o3)
    93  	ldx	[up+24], %l0
    94  	mulx	%l2, v0, %o4
    95  	umulxhi(%l2, v0, %o5)
    96  	add	up, 16, up
    97  	b	L(lo2)
    98  	 add	n, -2, n
    99  
   100  L(b1):	addcc	%g0, %g0, %o3
   101  	ldx	[up+0], %l2
   102  	brgz	n, L(gt1)
   103  	nop
   104  	mulx	%l2, v0, %o4
   105  	stx	%o4, [rp+0]
   106  	umulxhi(%l2, v0, %i0)
   107  	ret
   108  	 restore
   109  L(gt1):	ldx	[up+8], %l3
   110  	ldx	[up+16], %l0
   111  	mulx	%l2, v0, %o4
   112  	umulxhi(%l2, v0, %o5)
   113  	ldx	[up+24], %l1
   114  	mulx	%l3, v0, %i4
   115  	umulxhi(%l3, v0, %i5)
   116  	add	rp, -24, rp
   117  	add	up, 8, up
   118  	b	L(lo1)
   119  	 add	n, -1, n
   120  
   121  L(b0):	addcc	%g0, %g0, %o5
   122  	ldx	[up+0], %l3
   123  	ldx	[up+8], %l0
   124  	ldx	[up+16], %l1
   125  	mulx	%l3, v0, %i4
   126  	umulxhi(%l3, v0, %i5)
   127  	ldx	[up+24], %l2
   128  	mulx	%l0, v0, %o0
   129  	umulxhi(%l0, v0, %o1)
   130  	b	L(lo0)
   131  	 nop
   132  
   133  	ALIGN(16)
   134  L(top):	ldx	[up+0], %l3	C 0
   135  	addxccc(%i4, %o5, %i4)	C 0
   136  	mulx	%l1, v0, %o2	C 1
   137  	stx	%i4, [rp+0]	C 1
   138  	umulxhi(%l1, v0, %o3)	C 2
   139  L(lo3):	ldx	[up+8], %l0	C 2
   140  	addxccc(%o0, %i5, %o0)	C 3
   141  	mulx	%l2, v0, %o4	C 3
   142  	stx	%o0, [rp+8]	C 4
   143  	umulxhi(%l2, v0, %o5)	C 4
   144  L(lo2):	ldx	[up+16], %l1	C 5
   145  	addxccc(%o2, %o1, %o2)	C 5
   146  	mulx	%l3, v0, %i4	C 6
   147  	stx	%o2, [rp+16]	C 6
   148  	umulxhi(%l3, v0, %i5)	C 7
   149  L(lo1):	ldx	[up+24], %l2	C 7
   150  	addxccc(%o4, %o3, %o4)	C 8
   151  	mulx	%l0, v0, %o0	C 8
   152  	stx	%o4, [rp+24]	C 9
   153  	umulxhi(%l0, v0, %o1)	C 9
   154  	add	rp, 32, rp	C 10
   155  L(lo0):	add	up, 32, up	C 10
   156  	brgz	n, L(top)	C 11
   157  	 add	n, -4, n	C 11
   158  
   159  L(end):	addxccc(%i4, %o5, %i4)
   160  	mulx	%l1, v0, %o2
   161  	stx	%i4, [rp+0]
   162  	umulxhi(%l1, v0, %o3)
   163  	addxccc(%o0, %i5, %o0)
   164  L(wd3):	mulx	%l2, v0, %o4
   165  	stx	%o0, [rp+8]
   166  	umulxhi(%l2, v0, %o5)
   167  	addxccc(%o2, %o1, %o2)
   168  L(wd2):	stx	%o2, [rp+16]
   169  	addxccc(%o4, %o3, %o4)
   170  	stx	%o4, [rp+24]
   171  	addxc(	%g0, %o5, %i0)
   172  	ret
   173  	 restore
   174  EPILOGUE()