github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc32/sub_n.asm (about)

     1  dnl  SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
     2  dnl  store difference in a third limb vector.
     3  
     4  dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  
    33  include(`../config.m4')
    34  
    35  C INPUT PARAMETERS
    36  define(res_ptr,%o0)
    37  define(s1_ptr,%o1)
    38  define(s2_ptr,%o2)
    39  define(n,%o3)
    40  
    41  ASM_START()
    42  PROLOGUE(mpn_sub_n)
    43  	xor	s2_ptr,res_ptr,%g1
    44  	andcc	%g1,4,%g0
    45  	bne	L(1)			C branch if alignment differs
    46  	nop
    47  C **  V1a  **
    48  	andcc	res_ptr,4,%g0		C res_ptr unaligned? Side effect: cy=0
    49  	be	L(v1)			C if no, branch
    50  	nop
    51  C Add least significant limb separately to align res_ptr and s2_ptr
    52  	ld	[s1_ptr],%g4
    53  	add	s1_ptr,4,s1_ptr
    54  	ld	[s2_ptr],%g2
    55  	add	s2_ptr,4,s2_ptr
    56  	add	n,-1,n
    57  	subcc	%g4,%g2,%o4
    58  	st	%o4,[res_ptr]
    59  	add	res_ptr,4,res_ptr
    60  L(v1):	addx	%g0,%g0,%o4		C save cy in register
    61  	cmp	n,2			C if n < 2 ...
    62  	bl	L(end2)			C ... branch to tail code
    63  	subcc	%g0,%o4,%g0		C restore cy
    64  
    65  	ld	[s1_ptr+0],%g4
    66  	addcc	n,-10,n
    67  	ld	[s1_ptr+4],%g1
    68  	ldd	[s2_ptr+0],%g2
    69  	blt	L(fin1)
    70  	subcc	%g0,%o4,%g0		C restore cy
    71  C Add blocks of 8 limbs until less than 8 limbs remain
    72  L(loop1):
    73  	subxcc	%g4,%g2,%o4
    74  	ld	[s1_ptr+8],%g4
    75  	subxcc	%g1,%g3,%o5
    76  	ld	[s1_ptr+12],%g1
    77  	ldd	[s2_ptr+8],%g2
    78  	std	%o4,[res_ptr+0]
    79  	subxcc	%g4,%g2,%o4
    80  	ld	[s1_ptr+16],%g4
    81  	subxcc	%g1,%g3,%o5
    82  	ld	[s1_ptr+20],%g1
    83  	ldd	[s2_ptr+16],%g2
    84  	std	%o4,[res_ptr+8]
    85  	subxcc	%g4,%g2,%o4
    86  	ld	[s1_ptr+24],%g4
    87  	subxcc	%g1,%g3,%o5
    88  	ld	[s1_ptr+28],%g1
    89  	ldd	[s2_ptr+24],%g2
    90  	std	%o4,[res_ptr+16]
    91  	subxcc	%g4,%g2,%o4
    92  	ld	[s1_ptr+32],%g4
    93  	subxcc	%g1,%g3,%o5
    94  	ld	[s1_ptr+36],%g1
    95  	ldd	[s2_ptr+32],%g2
    96  	std	%o4,[res_ptr+24]
    97  	addx	%g0,%g0,%o4		C save cy in register
    98  	addcc	n,-8,n
    99  	add	s1_ptr,32,s1_ptr
   100  	add	s2_ptr,32,s2_ptr
   101  	add	res_ptr,32,res_ptr
   102  	bge	L(loop1)
   103  	subcc	%g0,%o4,%g0		C restore cy
   104  
   105  L(fin1):
   106  	addcc	n,8-2,n
   107  	blt	L(end1)
   108  	subcc	%g0,%o4,%g0		C restore cy
   109  C Add blocks of 2 limbs until less than 2 limbs remain
   110  L(loope1):
   111  	subxcc	%g4,%g2,%o4
   112  	ld	[s1_ptr+8],%g4
   113  	subxcc	%g1,%g3,%o5
   114  	ld	[s1_ptr+12],%g1
   115  	ldd	[s2_ptr+8],%g2
   116  	std	%o4,[res_ptr+0]
   117  	addx	%g0,%g0,%o4		C save cy in register
   118  	addcc	n,-2,n
   119  	add	s1_ptr,8,s1_ptr
   120  	add	s2_ptr,8,s2_ptr
   121  	add	res_ptr,8,res_ptr
   122  	bge	L(loope1)
   123  	subcc	%g0,%o4,%g0		C restore cy
   124  L(end1):
   125  	subxcc	%g4,%g2,%o4
   126  	subxcc	%g1,%g3,%o5
   127  	std	%o4,[res_ptr+0]
   128  	addx	%g0,%g0,%o4		C save cy in register
   129  
   130  	andcc	n,1,%g0
   131  	be	L(ret1)
   132  	subcc	%g0,%o4,%g0		C restore cy
   133  C Add last limb
   134  	ld	[s1_ptr+8],%g4
   135  	ld	[s2_ptr+8],%g2
   136  	subxcc	%g4,%g2,%o4
   137  	st	%o4,[res_ptr+8]
   138  
   139  L(ret1):
   140  	retl
   141  	addx	%g0,%g0,%o0	C return carry-out from most sign. limb
   142  
   143  L(1):	xor	s1_ptr,res_ptr,%g1
   144  	andcc	%g1,4,%g0
   145  	bne	L(2)
   146  	nop
   147  C **  V1b  **
   148  	andcc	res_ptr,4,%g0		C res_ptr unaligned? Side effect: cy=0
   149  	be	L(v1b)			C if no, branch
   150  	nop
   151  C Add least significant limb separately to align res_ptr and s1_ptr
   152  	ld	[s2_ptr],%g4
   153  	add	s2_ptr,4,s2_ptr
   154  	ld	[s1_ptr],%g2
   155  	add	s1_ptr,4,s1_ptr
   156  	add	n,-1,n
   157  	subcc	%g2,%g4,%o4
   158  	st	%o4,[res_ptr]
   159  	add	res_ptr,4,res_ptr
   160  L(v1b):	addx	%g0,%g0,%o4		C save cy in register
   161  	cmp	n,2			C if n < 2 ...
   162  	bl	L(end2)			C ... branch to tail code
   163  	subcc	%g0,%o4,%g0		C restore cy
   164  
   165  	ld	[s2_ptr+0],%g4
   166  	addcc	n,-10,n
   167  	ld	[s2_ptr+4],%g1
   168  	ldd	[s1_ptr+0],%g2
   169  	blt	L(fin1b)
   170  	subcc	%g0,%o4,%g0		C restore cy
   171  C Add blocks of 8 limbs until less than 8 limbs remain
   172  L(loop1b):
   173  	subxcc	%g2,%g4,%o4
   174  	ld	[s2_ptr+8],%g4
   175  	subxcc	%g3,%g1,%o5
   176  	ld	[s2_ptr+12],%g1
   177  	ldd	[s1_ptr+8],%g2
   178  	std	%o4,[res_ptr+0]
   179  	subxcc	%g2,%g4,%o4
   180  	ld	[s2_ptr+16],%g4
   181  	subxcc	%g3,%g1,%o5
   182  	ld	[s2_ptr+20],%g1
   183  	ldd	[s1_ptr+16],%g2
   184  	std	%o4,[res_ptr+8]
   185  	subxcc	%g2,%g4,%o4
   186  	ld	[s2_ptr+24],%g4
   187  	subxcc	%g3,%g1,%o5
   188  	ld	[s2_ptr+28],%g1
   189  	ldd	[s1_ptr+24],%g2
   190  	std	%o4,[res_ptr+16]
   191  	subxcc	%g2,%g4,%o4
   192  	ld	[s2_ptr+32],%g4
   193  	subxcc	%g3,%g1,%o5
   194  	ld	[s2_ptr+36],%g1
   195  	ldd	[s1_ptr+32],%g2
   196  	std	%o4,[res_ptr+24]
   197  	addx	%g0,%g0,%o4		C save cy in register
   198  	addcc	n,-8,n
   199  	add	s1_ptr,32,s1_ptr
   200  	add	s2_ptr,32,s2_ptr
   201  	add	res_ptr,32,res_ptr
   202  	bge	L(loop1b)
   203  	subcc	%g0,%o4,%g0		C restore cy
   204  
   205  L(fin1b):
   206  	addcc	n,8-2,n
   207  	blt	L(end1b)
   208  	subcc	%g0,%o4,%g0		C restore cy
   209  C Add blocks of 2 limbs until less than 2 limbs remain
   210  L(loope1b):
   211  	subxcc	%g2,%g4,%o4
   212  	ld	[s2_ptr+8],%g4
   213  	subxcc	%g3,%g1,%o5
   214  	ld	[s2_ptr+12],%g1
   215  	ldd	[s1_ptr+8],%g2
   216  	std	%o4,[res_ptr+0]
   217  	addx	%g0,%g0,%o4		C save cy in register
   218  	addcc	n,-2,n
   219  	add	s1_ptr,8,s1_ptr
   220  	add	s2_ptr,8,s2_ptr
   221  	add	res_ptr,8,res_ptr
   222  	bge	L(loope1b)
   223  	subcc	%g0,%o4,%g0		C restore cy
   224  L(end1b):
   225  	subxcc	%g2,%g4,%o4
   226  	subxcc	%g3,%g1,%o5
   227  	std	%o4,[res_ptr+0]
   228  	addx	%g0,%g0,%o4		C save cy in register
   229  
   230  	andcc	n,1,%g0
   231  	be	L(ret1b)
   232  	subcc	%g0,%o4,%g0		C restore cy
   233  C Add last limb
   234  	ld	[s2_ptr+8],%g4
   235  	ld	[s1_ptr+8],%g2
   236  	subxcc	%g2,%g4,%o4
   237  	st	%o4,[res_ptr+8]
   238  
   239  L(ret1b):
   240  	retl
   241  	addx	%g0,%g0,%o0		C return carry-out from most sign. limb
   242  
   243  C **  V2  **
   244  C If we come here, the alignment of s1_ptr and res_ptr as well as the
   245  C alignment of s2_ptr and res_ptr differ.  Since there are only two ways
   246  C things can be aligned (that we care about) we now know that the alignment
   247  C of s1_ptr and s2_ptr are the same.
   248  
   249  L(2):	cmp	n,1
   250  	be	L(jone)
   251  	nop
   252  	andcc	s1_ptr,4,%g0		C s1_ptr unaligned? Side effect: cy=0
   253  	be	L(v2)			C if no, branch
   254  	nop
   255  C Add least significant limb separately to align s1_ptr and s2_ptr
   256  	ld	[s1_ptr],%g4
   257  	add	s1_ptr,4,s1_ptr
   258  	ld	[s2_ptr],%g2
   259  	add	s2_ptr,4,s2_ptr
   260  	add	n,-1,n
   261  	subcc	%g4,%g2,%o4
   262  	st	%o4,[res_ptr]
   263  	add	res_ptr,4,res_ptr
   264  
   265  L(v2):	addx	%g0,%g0,%o4		C save cy in register
   266  	addcc	n,-8,n
   267  	blt	L(fin2)
   268  	subcc	%g0,%o4,%g0		C restore cy
   269  C Add blocks of 8 limbs until less than 8 limbs remain
   270  L(loop2):
   271  	ldd	[s1_ptr+0],%g2
   272  	ldd	[s2_ptr+0],%o4
   273  	subxcc	%g2,%o4,%g2
   274  	st	%g2,[res_ptr+0]
   275  	subxcc	%g3,%o5,%g3
   276  	st	%g3,[res_ptr+4]
   277  	ldd	[s1_ptr+8],%g2
   278  	ldd	[s2_ptr+8],%o4
   279  	subxcc	%g2,%o4,%g2
   280  	st	%g2,[res_ptr+8]
   281  	subxcc	%g3,%o5,%g3
   282  	st	%g3,[res_ptr+12]
   283  	ldd	[s1_ptr+16],%g2
   284  	ldd	[s2_ptr+16],%o4
   285  	subxcc	%g2,%o4,%g2
   286  	st	%g2,[res_ptr+16]
   287  	subxcc	%g3,%o5,%g3
   288  	st	%g3,[res_ptr+20]
   289  	ldd	[s1_ptr+24],%g2
   290  	ldd	[s2_ptr+24],%o4
   291  	subxcc	%g2,%o4,%g2
   292  	st	%g2,[res_ptr+24]
   293  	subxcc	%g3,%o5,%g3
   294  	st	%g3,[res_ptr+28]
   295  	addx	%g0,%g0,%o4		C save cy in register
   296  	addcc	n,-8,n
   297  	add	s1_ptr,32,s1_ptr
   298  	add	s2_ptr,32,s2_ptr
   299  	add	res_ptr,32,res_ptr
   300  	bge	L(loop2)
   301  	subcc	%g0,%o4,%g0		C restore cy
   302  
   303  L(fin2):
   304  	addcc	n,8-2,n
   305  	blt	L(end2)
   306  	subcc	%g0,%o4,%g0		C restore cy
   307  L(loope2):
   308  	ldd	[s1_ptr+0],%g2
   309  	ldd	[s2_ptr+0],%o4
   310  	subxcc	%g2,%o4,%g2
   311  	st	%g2,[res_ptr+0]
   312  	subxcc	%g3,%o5,%g3
   313  	st	%g3,[res_ptr+4]
   314  	addx	%g0,%g0,%o4		C save cy in register
   315  	addcc	n,-2,n
   316  	add	s1_ptr,8,s1_ptr
   317  	add	s2_ptr,8,s2_ptr
   318  	add	res_ptr,8,res_ptr
   319  	bge	L(loope2)
   320  	subcc	%g0,%o4,%g0		C restore cy
   321  L(end2):
   322  	andcc	n,1,%g0
   323  	be	L(ret2)
   324  	subcc	%g0,%o4,%g0		C restore cy
   325  C Add last limb
   326  L(jone):
   327  	ld	[s1_ptr],%g4
   328  	ld	[s2_ptr],%g2
   329  	subxcc	%g4,%g2,%o4
   330  	st	%o4,[res_ptr]
   331  
   332  L(ret2):
   333  	retl
   334  	addx	%g0,%g0,%o0		C return carry-out from most sign. limb
   335  EPILOGUE(mpn_sub_n)