github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/m88k/mc88110/add_n.S (about)

     1  ; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
     2  ; sum in a third limb vector.
     3  
     4  ; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
     5  
     6  ;  This file is part of the GNU MP Library.
     7  ;
     8  ;  The GNU MP Library is free software; you can redistribute it and/or modify
     9  ;  it under the terms of either:
    10  ;
    11  ;    * the GNU Lesser General Public License as published by the Free
    12  ;      Software Foundation; either version 3 of the License, or (at your
    13  ;      option) any later version.
    14  ;
    15  ;  or
    16  ;
    17  ;    * the GNU General Public License as published by the Free Software
    18  ;      Foundation; either version 2 of the License, or (at your option) any
    19  ;      later version.
    20  ;
    21  ;  or both in parallel, as here.
    22  ;
    23  ;  The GNU MP Library is distributed in the hope that it will be useful, but
    24  ;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  ;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  ;  for more details.
    27  ;
    28  ;  You should have received copies of the GNU General Public License and the
    29  ;  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  ;  see https://www.gnu.org/licenses/.
    31  
    32  
    33  ; INPUT PARAMETERS
    34  #define res_ptr	r2
    35  #define s1_ptr	r3
    36  #define s2_ptr	r4
    37  #define size	r5
    38  
    39  #include "sysdep.h"
    40  
    41  	text
    42  	align	16
    43  	global	C_SYMBOL_NAME(__gmpn_add_n)
    44  C_SYMBOL_NAME(__gmpn_add_n):
    45  	addu.co	 r0,r0,r0		; clear cy flag
    46  	xor	 r12,s2_ptr,res_ptr
    47  	bb1	 2,r12,L1
    48  ; **  V1a  **
    49  L0:	bb0	 2,res_ptr,L_v1		; branch if res_ptr is aligned?
    50  /* Add least significant limb separately to align res_ptr and s2_ptr */
    51  	ld	 r10,s1_ptr,0
    52  	addu	 s1_ptr,s1_ptr,4
    53  	ld	 r8,s2_ptr,0
    54  	addu	 s2_ptr,s2_ptr,4
    55  	subu	 size,size,1
    56  	addu.co	 r6,r10,r8
    57  	st	 r6,res_ptr,0
    58  	addu	 res_ptr,res_ptr,4
    59  L_v1:	cmp	 r12,size,2
    60  	bb1	 lt,r12,Lend2
    61  
    62  	ld	 r10,s1_ptr,0
    63  	ld	 r12,s1_ptr,4
    64  	ld.d	 r8,s2_ptr,0
    65  	subu	 size,size,10
    66  	bcnd	 lt0,size,Lfin1
    67  /* Add blocks of 8 limbs until less than 8 limbs remain */
    68  	align	 8
    69  Loop1:	subu	 size,size,8
    70  	addu.cio r6,r10,r8
    71  	ld	 r10,s1_ptr,8
    72  	addu.cio r7,r12,r9
    73  	ld	 r12,s1_ptr,12
    74  	ld.d	 r8,s2_ptr,8
    75  	st.d	 r6,res_ptr,0
    76  	addu.cio r6,r10,r8
    77  	ld	 r10,s1_ptr,16
    78  	addu.cio r7,r12,r9
    79  	ld	 r12,s1_ptr,20
    80  	ld.d	 r8,s2_ptr,16
    81  	st.d	 r6,res_ptr,8
    82  	addu.cio r6,r10,r8
    83  	ld	 r10,s1_ptr,24
    84  	addu.cio r7,r12,r9
    85  	ld	 r12,s1_ptr,28
    86  	ld.d	 r8,s2_ptr,24
    87  	st.d	 r6,res_ptr,16
    88  	addu.cio r6,r10,r8
    89  	ld	 r10,s1_ptr,32
    90  	addu.cio r7,r12,r9
    91  	ld	 r12,s1_ptr,36
    92  	addu	 s1_ptr,s1_ptr,32
    93  	ld.d	 r8,s2_ptr,32
    94  	addu	 s2_ptr,s2_ptr,32
    95  	st.d	 r6,res_ptr,24
    96  	addu	 res_ptr,res_ptr,32
    97  	bcnd	 ge0,size,Loop1
    98  
    99  Lfin1:	addu	 size,size,8-2
   100  	bcnd	 lt0,size,Lend1
   101  /* Add blocks of 2 limbs until less than 2 limbs remain */
   102  Loope1:	addu.cio r6,r10,r8
   103  	ld	 r10,s1_ptr,8
   104  	addu.cio r7,r12,r9
   105  	ld	 r12,s1_ptr,12
   106  	ld.d	 r8,s2_ptr,8
   107  	st.d	 r6,res_ptr,0
   108  	subu	 size,size,2
   109  	addu	 s1_ptr,s1_ptr,8
   110  	addu	 s2_ptr,s2_ptr,8
   111  	addu	 res_ptr,res_ptr,8
   112  	bcnd	 ge0,size,Loope1
   113  Lend1:	addu.cio r6,r10,r8
   114  	addu.cio r7,r12,r9
   115  	st.d	 r6,res_ptr,0
   116  
   117  	bb0	 0,size,Lret1
   118  /* Add last limb */
   119  	ld	 r10,s1_ptr,8
   120  	ld	 r8,s2_ptr,8
   121  	addu.cio r6,r10,r8
   122  	st	 r6,res_ptr,8
   123  
   124  Lret1:	jmp.n	 r1
   125  	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
   126  
   127  L1:	xor	 r12,s1_ptr,res_ptr
   128  	bb1	 2,r12,L2
   129  ; **  V1b  **
   130  	or	 r12,r0,s2_ptr
   131  	or	 s2_ptr,r0,s1_ptr
   132  	or	 s1_ptr,r0,r12
   133  	br	 L0
   134  
   135  ; **  V2  **
   136  /* If we come here, the alignment of s1_ptr and res_ptr as well as the
   137     alignment of s2_ptr and res_ptr differ.  Since there are only two ways
   138     things can be aligned (that we care about) we now know that the alignment
   139     of s1_ptr and s2_ptr are the same.  */
   140  
   141  L2:	cmp	 r12,size,1
   142  	bb1	 eq,r12,Ljone
   143  	bb0	 2,s1_ptr,L_v2		; branch if s1_ptr is aligned
   144  /* Add least significant limb separately to align res_ptr and s2_ptr */
   145  	ld	 r10,s1_ptr,0
   146  	addu	 s1_ptr,s1_ptr,4
   147  	ld	 r8,s2_ptr,0
   148  	addu	 s2_ptr,s2_ptr,4
   149  	subu	 size,size,1
   150  	addu.co	 r6,r10,r8
   151  	st	 r6,res_ptr,0
   152  	addu	 res_ptr,res_ptr,4
   153  
   154  L_v2:	subu	 size,size,8
   155  	bcnd	 lt0,size,Lfin2
   156  /* Add blocks of 8 limbs until less than 8 limbs remain */
   157  	align	 8
   158  Loop2:	subu	 size,size,8
   159  	ld.d	 r8,s1_ptr,0
   160  	ld.d	 r6,s2_ptr,0
   161  	addu.cio r8,r8,r6
   162  	st	 r8,res_ptr,0
   163  	addu.cio r9,r9,r7
   164  	st	 r9,res_ptr,4
   165  	ld.d	 r8,s1_ptr,8
   166  	ld.d	 r6,s2_ptr,8
   167  	addu.cio r8,r8,r6
   168  	st	 r8,res_ptr,8
   169  	addu.cio r9,r9,r7
   170  	st	 r9,res_ptr,12
   171  	ld.d	 r8,s1_ptr,16
   172  	ld.d	 r6,s2_ptr,16
   173  	addu.cio r8,r8,r6
   174  	st	 r8,res_ptr,16
   175  	addu.cio r9,r9,r7
   176  	st	 r9,res_ptr,20
   177  	ld.d	 r8,s1_ptr,24
   178  	ld.d	 r6,s2_ptr,24
   179  	addu.cio r8,r8,r6
   180  	st	 r8,res_ptr,24
   181  	addu.cio r9,r9,r7
   182  	st	 r9,res_ptr,28
   183  	addu	 s1_ptr,s1_ptr,32
   184  	addu	 s2_ptr,s2_ptr,32
   185  	addu	 res_ptr,res_ptr,32
   186  	bcnd	 ge0,size,Loop2
   187  
   188  Lfin2:	addu	 size,size,8-2
   189  	bcnd	 lt0,size,Lend2
   190  Loope2:	ld.d	 r8,s1_ptr,0
   191  	ld.d	 r6,s2_ptr,0
   192  	addu.cio r8,r8,r6
   193  	st	 r8,res_ptr,0
   194  	addu.cio r9,r9,r7
   195  	st	 r9,res_ptr,4
   196  	subu	 size,size,2
   197  	addu	 s1_ptr,s1_ptr,8
   198  	addu	 s2_ptr,s2_ptr,8
   199  	addu	 res_ptr,res_ptr,8
   200  	bcnd	 ge0,size,Loope2
   201  Lend2:	bb0	 0,size,Lret2
   202  /* Add last limb */
   203  Ljone:	ld	 r10,s1_ptr,0
   204  	ld	 r8,s2_ptr,0
   205  	addu.cio r6,r10,r8
   206  	st	 r6,res_ptr,0
   207  
   208  Lret2:	jmp.n	 r1
   209  	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb