github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode32/add_n.asm (about)

     1  dnl  PowerPC-64/mode32 mpn_add_n -- Add two limb vectors of the same length > 0
     2  dnl  and store sum in a third limb vector.
     3  
     4  dnl  Copyright 1999-2001, 2003, 2005 Free Software Foundation, Inc.
     5  
     6  dnl  This file is part of the GNU MP Library.
     7  dnl
     8  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     9  dnl  it under the terms of either:
    10  dnl
    11  dnl    * the GNU Lesser General Public License as published by the Free
    12  dnl      Software Foundation; either version 3 of the License, or (at your
    13  dnl      option) any later version.
    14  dnl
    15  dnl  or
    16  dnl
    17  dnl    * the GNU General Public License as published by the Free Software
    18  dnl      Foundation; either version 2 of the License, or (at your option) any
    19  dnl      later version.
    20  dnl
    21  dnl  or both in parallel, as here.
    22  dnl
    23  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    24  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  dnl  for more details.
    27  dnl
    28  dnl  You should have received copies of the GNU General Public License and the
    29  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  dnl  see https://www.gnu.org/licenses/.
    31  
    32  include(`../config.m4')
    33  
    34  C		cycles/limb
    35  C POWER3/PPC630:     ?
    36  C POWER4/PPC970:     4.25
    37  
    38  C INPUT PARAMETERS
    39  C rp	r3
    40  C up	r4
    41  C vp	r5
    42  C n	r6
    43  
    44  ASM_START()
    45  PROLOGUE(mpn_add_n)
    46  	mtctr	r6		C copy size into CTR
    47  	addic	r0, r0, 0	C clear cy
    48  	ld	r8, 0(r4)	C load least significant s1 limb
    49  	ld	r0, 0(r5)	C load least significant s2 limb
    50  	addi	r3, r3, -8	C offset res_ptr, it's updated before it's used
    51  	bdz	L(end)		C If done, skip loop
    52  
    53  L(oop):	ld	r9, 8(r4)	C load s1 limb
    54  	ld	r10, 8(r5)	C load s2 limb
    55  	adde	r7, r0, r8	C add limbs with cy, set cy
    56  	srdi	r6, r0, 32
    57  	srdi	r11, r8, 32
    58  	adde	r6, r6, r11	C add high limb parts, set cy
    59  	std	r7, 8(r3)	C store result limb
    60  	bdz	L(exit)		C decrement CTR and exit if done
    61  	ldu	r8, 16(r4)	C load s1 limb and update s1_ptr
    62  	ldu	r0, 16(r5)	C load s2 limb and update s2_ptr
    63  	adde	r7, r10, r9	C add limbs with cy, set cy
    64  	srdi	r6, r10, 32
    65  	srdi	r11, r9, 32
    66  	adde	r6, r6, r11	C add high limb parts, set cy
    67  	stdu	r7, 16(r3)	C store result limb and update res_ptr
    68  	bdnz	L(oop)		C decrement CTR and loop back
    69  
    70  L(end):	adde	r7, r0, r8
    71  	srdi	r6, r0, 32
    72  	srdi	r11, r8, 32
    73  	adde	r6, r6, r11	C add limbs with cy, set cy
    74  	std	r7, 8(r3)	C store ultimate result limb
    75  	li	r3, 0		C load cy into ...
    76  	addze	r4, r3		C ... return value register
    77  	blr
    78  L(exit):	adde	r7, r10, r9
    79  	srdi	r6, r10, 32
    80  	srdi	r11, r9, 32
    81  	adde	r6, r6, r11	C add limbs with cy, set cy
    82  	std	r7, 16(r3)
    83  	li	r3, 0		C load cy into ...
    84  	addze	r4, r3		C ... return value register
    85  	blr
    86  EPILOGUE()