github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/ultrasparct3/addmul_1.asm (about)

     1  dnl  SPARC v9 mpn_addmul_1 for T3/T4/T5.
     2  
     3  dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
     4  
     5  dnl  Copyright 2013 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C		   cycles/limb
    36  C UltraSPARC T3:	26
    37  C UltraSPARC T4:	4.5
    38  
    39  C INPUT PARAMETERS
    40  define(`rp', `%i0')
    41  define(`up', `%i1')
    42  define(`n',  `%i2')
    43  define(`v0', `%i3')
    44  
    45  define(`u0',  `%l0')
    46  define(`u1',  `%l1')
    47  define(`u2',  `%l2')
    48  define(`u3',  `%l3')
    49  define(`r0',  `%l4')
    50  define(`r1',  `%l5')
    51  define(`r2',  `%l6')
    52  define(`r3',  `%l7')
    53  
    54  ASM_START()
    55  	REGISTER(%g2,#scratch)
    56  	REGISTER(%g3,#scratch)
    57  PROLOGUE(mpn_addmul_1)
    58  	save	%sp, -176, %sp
    59  	ldx	[up+0], %g1
    60  
    61  	and	n, 3, %g3
    62  	brz	%g3, L(b0)
    63  	 addcc	%g0, %g0, %g5			C clear carry limb, flag
    64  	cmp	%g3, 2
    65  	bcs	%xcc, L(b01)
    66  	 nop
    67  	be	%xcc, L(b10)
    68  	 ldx	[up+8], %g5
    69  
    70  L(b11):	ldx	[up+16], u3
    71  	mulx	%g1, v0, %o2
    72  	umulxhi(%g1, v0, %o3)
    73  	ldx	[rp+0], r1
    74  	mulx	%g5, v0, %o4
    75  	ldx	[rp+8], r2
    76  	umulxhi(%g5, v0, %o5)
    77  	ldx	[rp+16], r3
    78  	mulx	u3, v0, %g4
    79  	umulxhi(u3, v0, %g5)
    80  	addcc	%o3, %o4, %o4
    81  	addxccc(%o5, %g4, %g4)
    82  	addxc(	%g0, %g5, %g5)
    83  	addcc	r1, %o2, r1
    84  	stx	r1, [rp+0]
    85  	addxccc(r2, %o4, r2)
    86  	stx	r2, [rp+8]
    87  	addxccc(r3, %g4, r3)
    88  	stx	r3, [rp+16]
    89  	add	n, -3, n
    90  	add	up, 24, up
    91  	brz	n, L(xit)
    92  	 add	rp, 24, rp
    93  	b	L(com)
    94  	 nop
    95  
    96  L(b10):	mulx	%g1, v0, %o4
    97  	ldx	[rp+0], r2
    98  	umulxhi(%g1, v0, %o5)
    99  	ldx	[rp+8], r3
   100  	mulx	%g5, v0, %g4
   101  	umulxhi(%g5, v0, %g5)
   102  	addcc	%o5, %g4, %g4
   103  	addxc(	%g0, %g5, %g5)
   104  	addcc	r2, %o4, r2
   105  	stx	r2, [rp+0]
   106  	addxccc(r3, %g4, r3)
   107  	stx	r3, [rp+8]
   108  	add	n, -2, n
   109  	add	up, 16, up
   110  	brz	n, L(xit)
   111  	 add	rp, 16, rp
   112  	b	L(com)
   113  	 nop
   114  
   115  L(b01):	ldx	[rp+0], r3
   116  	mulx	%g1, v0, %g4
   117  	umulxhi(%g1, v0, %g5)
   118  	addcc	r3, %g4, r3
   119  	stx	r3, [rp+0]
   120  	add	n, -1, n
   121  	add	up, 8, up
   122  	brz	n, L(xit)
   123  	 add	rp, 8, rp
   124  
   125  L(com):	ldx	[up+0], %g1
   126  L(b0):	ldx	[up+8], u1
   127  	ldx	[up+16], u2
   128  	ldx	[up+24], u3
   129  	mulx	%g1, v0, %o0
   130  	umulxhi(%g1, v0, %o1)
   131  	b	L(lo0)
   132  	 nop
   133  
   134  	ALIGN(16)
   135  L(top):	ldx	[up+0], u0
   136  	addxc(	%g0, %g5, %g5)		C propagate carry into carry limb
   137  	ldx	[up+8], u1
   138  	addcc	r0, %o0, r0
   139  	ldx	[up+16], u2
   140  	addxccc(r1, %o2, r1)
   141  	ldx	[up+24], u3
   142  	addxccc(r2, %o4, r2)
   143  	stx	r0, [rp-32]
   144  	addxccc(r3, %g4, r3)
   145  	stx	r1, [rp-24]
   146  	mulx	u0, v0, %o0
   147  	stx	r2, [rp-16]
   148  	umulxhi(u0, v0, %o1)
   149  	stx	r3, [rp-8]
   150  L(lo0):	mulx	u1, v0, %o2
   151  	ldx	[rp+0], r0
   152  	umulxhi(u1, v0, %o3)
   153  	ldx	[rp+8], r1
   154  	mulx	u2, v0, %o4
   155  	ldx	[rp+16], r2
   156  	umulxhi(u2, v0, %o5)
   157  	ldx	[rp+24], r3
   158  	mulx	u3, v0, %g4
   159  	addxccc(%g5, %o0, %o0)
   160  	umulxhi(u3, v0, %g5)
   161  	add	up, 32, up
   162  	addxccc(%o1, %o2, %o2)
   163  	add	rp, 32, rp
   164  	addxccc(%o3, %o4, %o4)
   165  	add	n, -4, n
   166  	addxccc(%o5, %g4, %g4)
   167  	brgz	n, L(top)
   168  	 nop
   169  
   170  	addxc(	%g0, %g5, %g5)
   171  	addcc	r0, %o0, r0
   172  	stx	r0, [rp-32]
   173  	addxccc(r1, %o2, r1)
   174  	stx	r1, [rp-24]
   175  	addxccc(r2, %o4, r2)
   176  	stx	r2, [rp-16]
   177  	addxccc(r3, %g4, r3)
   178  	stx	r3, [rp-8]
   179  L(xit):	addxc(	%g0, %g5, %i0)
   180  	ret
   181  	 restore
   182  EPILOGUE()