github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc32/add_n.asm (about)

     1  dnl  SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
     2  dnl  sum in a third limb vector.
     3  
     4  dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  
    33  include(`../config.m4')
    34  
    35  C INPUT PARAMETERS
    36  define(res_ptr,%o0)
    37  define(s1_ptr,%o1)
    38  define(s2_ptr,%o2)
    39  define(n,%o3)
    40  
    41  ASM_START()
    42  PROLOGUE(mpn_add_n)
    43  	xor	s2_ptr,res_ptr,%g1
    44  	andcc	%g1,4,%g0
    45  	bne	L(1)			C branch if alignment differs
    46  	nop
    47  C **  V1a  **
    48  L(0):	andcc	res_ptr,4,%g0		C res_ptr unaligned? Side effect: cy=0
    49  	be	L(v1)			C if no, branch
    50  	nop
    51  C Add least significant limb separately to align res_ptr and s2_ptr
    52  	ld	[s1_ptr],%g4
    53  	add	s1_ptr,4,s1_ptr
    54  	ld	[s2_ptr],%g2
    55  	add	s2_ptr,4,s2_ptr
    56  	add	n,-1,n
    57  	addcc	%g4,%g2,%o4
    58  	st	%o4,[res_ptr]
    59  	add	res_ptr,4,res_ptr
    60  L(v1):	addx	%g0,%g0,%o4		C save cy in register
    61  	cmp	n,2			C if n < 2 ...
    62  	bl	L(end2)			C ... branch to tail code
    63  	subcc	%g0,%o4,%g0		C restore cy
    64  
    65  	ld	[s1_ptr+0],%g4
    66  	addcc	n,-10,n
    67  	ld	[s1_ptr+4],%g1
    68  	ldd	[s2_ptr+0],%g2
    69  	blt	L(fin1)
    70  	subcc	%g0,%o4,%g0		C restore cy
    71  C Add blocks of 8 limbs until less than 8 limbs remain
    72  L(loop1):
    73  	addxcc	%g4,%g2,%o4
    74  	ld	[s1_ptr+8],%g4
    75  	addxcc	%g1,%g3,%o5
    76  	ld	[s1_ptr+12],%g1
    77  	ldd	[s2_ptr+8],%g2
    78  	std	%o4,[res_ptr+0]
    79  	addxcc	%g4,%g2,%o4
    80  	ld	[s1_ptr+16],%g4
    81  	addxcc	%g1,%g3,%o5
    82  	ld	[s1_ptr+20],%g1
    83  	ldd	[s2_ptr+16],%g2
    84  	std	%o4,[res_ptr+8]
    85  	addxcc	%g4,%g2,%o4
    86  	ld	[s1_ptr+24],%g4
    87  	addxcc	%g1,%g3,%o5
    88  	ld	[s1_ptr+28],%g1
    89  	ldd	[s2_ptr+24],%g2
    90  	std	%o4,[res_ptr+16]
    91  	addxcc	%g4,%g2,%o4
    92  	ld	[s1_ptr+32],%g4
    93  	addxcc	%g1,%g3,%o5
    94  	ld	[s1_ptr+36],%g1
    95  	ldd	[s2_ptr+32],%g2
    96  	std	%o4,[res_ptr+24]
    97  	addx	%g0,%g0,%o4		C save cy in register
    98  	addcc	n,-8,n
    99  	add	s1_ptr,32,s1_ptr
   100  	add	s2_ptr,32,s2_ptr
   101  	add	res_ptr,32,res_ptr
   102  	bge	L(loop1)
   103  	subcc	%g0,%o4,%g0		C restore cy
   104  
   105  L(fin1):
   106  	addcc	n,8-2,n
   107  	blt	L(end1)
   108  	subcc	%g0,%o4,%g0		C restore cy
   109  C Add blocks of 2 limbs until less than 2 limbs remain
   110  L(loope1):
   111  	addxcc	%g4,%g2,%o4
   112  	ld	[s1_ptr+8],%g4
   113  	addxcc	%g1,%g3,%o5
   114  	ld	[s1_ptr+12],%g1
   115  	ldd	[s2_ptr+8],%g2
   116  	std	%o4,[res_ptr+0]
   117  	addx	%g0,%g0,%o4		C save cy in register
   118  	addcc	n,-2,n
   119  	add	s1_ptr,8,s1_ptr
   120  	add	s2_ptr,8,s2_ptr
   121  	add	res_ptr,8,res_ptr
   122  	bge	L(loope1)
   123  	subcc	%g0,%o4,%g0		C restore cy
   124  L(end1):
   125  	addxcc	%g4,%g2,%o4
   126  	addxcc	%g1,%g3,%o5
   127  	std	%o4,[res_ptr+0]
   128  	addx	%g0,%g0,%o4		C save cy in register
   129  
   130  	andcc	n,1,%g0
   131  	be	L(ret1)
   132  	subcc	%g0,%o4,%g0		C restore cy
   133  C Add last limb
   134  	ld	[s1_ptr+8],%g4
   135  	ld	[s2_ptr+8],%g2
   136  	addxcc	%g4,%g2,%o4
   137  	st	%o4,[res_ptr+8]
   138  
   139  L(ret1):
   140  	retl
   141  	addx	%g0,%g0,%o0	C return carry-out from most sign. limb
   142  
   143  L(1):	xor	s1_ptr,res_ptr,%g1
   144  	andcc	%g1,4,%g0
   145  	bne	L(2)
   146  	nop
   147  C **  V1b  **
   148  	mov	s2_ptr,%g1
   149  	mov	s1_ptr,s2_ptr
   150  	b	L(0)
   151  	mov	%g1,s1_ptr
   152  
   153  C **  V2  **
   154  C If we come here, the alignment of s1_ptr and res_ptr as well as the
   155  C alignment of s2_ptr and res_ptr differ.  Since there are only two ways
   156  C things can be aligned (that we care about) we now know that the alignment
   157  C of s1_ptr and s2_ptr are the same.
   158  
   159  L(2):	cmp	n,1
   160  	be	L(jone)
   161  	nop
   162  	andcc	s1_ptr,4,%g0		C s1_ptr unaligned? Side effect: cy=0
   163  	be	L(v2)			C if no, branch
   164  	nop
   165  C Add least significant limb separately to align s1_ptr and s2_ptr
   166  	ld	[s1_ptr],%g4
   167  	add	s1_ptr,4,s1_ptr
   168  	ld	[s2_ptr],%g2
   169  	add	s2_ptr,4,s2_ptr
   170  	add	n,-1,n
   171  	addcc	%g4,%g2,%o4
   172  	st	%o4,[res_ptr]
   173  	add	res_ptr,4,res_ptr
   174  
   175  L(v2):	addx	%g0,%g0,%o4		C save cy in register
   176  	addcc	n,-8,n
   177  	blt	L(fin2)
   178  	subcc	%g0,%o4,%g0		C restore cy
   179  C Add blocks of 8 limbs until less than 8 limbs remain
   180  L(loop2):
   181  	ldd	[s1_ptr+0],%g2
   182  	ldd	[s2_ptr+0],%o4
   183  	addxcc	%g2,%o4,%g2
   184  	st	%g2,[res_ptr+0]
   185  	addxcc	%g3,%o5,%g3
   186  	st	%g3,[res_ptr+4]
   187  	ldd	[s1_ptr+8],%g2
   188  	ldd	[s2_ptr+8],%o4
   189  	addxcc	%g2,%o4,%g2
   190  	st	%g2,[res_ptr+8]
   191  	addxcc	%g3,%o5,%g3
   192  	st	%g3,[res_ptr+12]
   193  	ldd	[s1_ptr+16],%g2
   194  	ldd	[s2_ptr+16],%o4
   195  	addxcc	%g2,%o4,%g2
   196  	st	%g2,[res_ptr+16]
   197  	addxcc	%g3,%o5,%g3
   198  	st	%g3,[res_ptr+20]
   199  	ldd	[s1_ptr+24],%g2
   200  	ldd	[s2_ptr+24],%o4
   201  	addxcc	%g2,%o4,%g2
   202  	st	%g2,[res_ptr+24]
   203  	addxcc	%g3,%o5,%g3
   204  	st	%g3,[res_ptr+28]
   205  	addx	%g0,%g0,%o4		C save cy in register
   206  	addcc	n,-8,n
   207  	add	s1_ptr,32,s1_ptr
   208  	add	s2_ptr,32,s2_ptr
   209  	add	res_ptr,32,res_ptr
   210  	bge	L(loop2)
   211  	subcc	%g0,%o4,%g0		C restore cy
   212  
   213  L(fin2):
   214  	addcc	n,8-2,n
   215  	blt	L(end2)
   216  	subcc	%g0,%o4,%g0		C restore cy
   217  L(loope2):
   218  	ldd	[s1_ptr+0],%g2
   219  	ldd	[s2_ptr+0],%o4
   220  	addxcc	%g2,%o4,%g2
   221  	st	%g2,[res_ptr+0]
   222  	addxcc	%g3,%o5,%g3
   223  	st	%g3,[res_ptr+4]
   224  	addx	%g0,%g0,%o4		C save cy in register
   225  	addcc	n,-2,n
   226  	add	s1_ptr,8,s1_ptr
   227  	add	s2_ptr,8,s2_ptr
   228  	add	res_ptr,8,res_ptr
   229  	bge	L(loope2)
   230  	subcc	%g0,%o4,%g0		C restore cy
   231  L(end2):
   232  	andcc	n,1,%g0
   233  	be	L(ret2)
   234  	subcc	%g0,%o4,%g0		C restore cy
   235  C Add last limb
   236  L(jone):
   237  	ld	[s1_ptr],%g4
   238  	ld	[s2_ptr],%g2
   239  	addxcc	%g4,%g2,%o4
   240  	st	%o4,[res_ptr]
   241  
   242  L(ret2):
   243  	retl
   244  	addx	%g0,%g0,%o0	C return carry-out from most sign. limb
   245  EPILOGUE(mpn_add_n)