github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/aors_n.asm (about)

     1  dnl  PowerPC-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
     2  
     3  dnl  Copyright 1999-2001, 2003-2005, 2007, 2011 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                   cycles/limb
    34  C POWER3/PPC630          1.5
    35  C POWER4/PPC970          2
    36  C POWER5                 2
    37  C POWER6                 2.63
    38  C POWER7               2.25-2.87
    39  
    40  C This code is a little bit slower for POWER3/PPC630 than the simple code used
    41  C previously, but it is much faster for POWER4/PPC970.  The reason for the
    42  C POWER3/PPC630 slowdown can be attributed to the saving and restoring of 4
    43  C registers.
    44  
    45  C INPUT PARAMETERS
    46  C rp	r3
    47  C up	r4
    48  C vp	r5
    49  C n	r6
    50  
    51  ifdef(`OPERATION_add_n',`
    52    define(ADDSUBC,	adde)
    53    define(ADDSUB,	addc)
    54    define(func,		mpn_add_n)
    55    define(func_nc,	mpn_add_nc)
    56    define(GENRVAL,	`addi	r3, r3, 1')
    57    define(SETCBR,	`addic	r0, $1, -1')
    58    define(CLRCB,		`addic	r0, r0, 0')
    59  ')
    60  ifdef(`OPERATION_sub_n',`
    61    define(ADDSUBC,	subfe)
    62    define(ADDSUB,	subfc)
    63    define(func,		mpn_sub_n)
    64    define(func_nc,	mpn_sub_nc)
    65    define(GENRVAL,	`neg	r3, r3')
    66    define(SETCBR,	`subfic	r0, $1, 0')
    67    define(CLRCB,		`addic	r0, r1, -1')
    68  ')
    69  
    70  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
    71  
    72  ASM_START()
    73  PROLOGUE(func_nc)
    74  	SETCBR(r7)
    75  	b	L(ent)
    76  EPILOGUE()
    77  
    78  PROLOGUE(func)
    79  	CLRCB
    80  L(ent):	std	r31, -8(r1)
    81  	std	r30, -16(r1)
    82  	std	r29, -24(r1)
    83  	std	r28, -32(r1)
    84  
    85  	rldicl.	r0, r6, 0,62	C r0 = n & 3, set cr0
    86  	cmpdi	cr6, r0, 2
    87  	addi	r6, r6, 3	C compute count...
    88  	srdi	r6, r6, 2	C ...for ctr
    89  	mtctr	r6		C copy count into ctr
    90  	beq	cr0, L(b00)
    91  	blt	cr6, L(b01)
    92  	beq	cr6, L(b10)
    93  
    94  L(b11):	ld	r8, 0(r4)	C load s1 limb
    95  	ld	r9, 0(r5)	C load s2 limb
    96  	ld	r10, 8(r4)	C load s1 limb
    97  	ld	r11, 8(r5)	C load s2 limb
    98  	ld	r12, 16(r4)	C load s1 limb
    99  	addi	r4, r4, 24
   100  	ld	r0, 16(r5)	C load s2 limb
   101  	addi	r5, r5, 24
   102  	ADDSUBC	r29, r9, r8
   103  	ADDSUBC	r30, r11, r10
   104  	ADDSUBC	r31, r0, r12
   105  	std	r29, 0(r3)
   106  	std	r30, 8(r3)
   107  	std	r31, 16(r3)
   108  	addi	r3, r3, 24
   109  	bdnz	L(go)
   110  	b	L(ret)
   111  
   112  L(b01):	ld	r12, 0(r4)	C load s1 limb
   113  	addi	r4, r4, 8
   114  	ld	r0, 0(r5)	C load s2 limb
   115  	addi	r5, r5, 8
   116  	ADDSUBC	r31, r0, r12	C add
   117  	std	r31, 0(r3)
   118  	addi	r3, r3, 8
   119  	bdnz	L(go)
   120  	b	L(ret)
   121  
   122  L(b10):	ld	r10, 0(r4)	C load s1 limb
   123  	ld	r11, 0(r5)	C load s2 limb
   124  	ld	r12, 8(r4)	C load s1 limb
   125  	addi	r4, r4, 16
   126  	ld	r0, 8(r5)	C load s2 limb
   127  	addi	r5, r5, 16
   128  	ADDSUBC	r30, r11, r10	C add
   129  	ADDSUBC	r31, r0, r12	C add
   130  	std	r30, 0(r3)
   131  	std	r31, 8(r3)
   132  	addi	r3, r3, 16
   133  	bdnz	L(go)
   134  	b	L(ret)
   135  
   136  L(b00):	C INITCY		C clear/set cy
   137  L(go):	ld	r6, 0(r4)	C load s1 limb
   138  	ld	r7, 0(r5)	C load s2 limb
   139  	ld	r8, 8(r4)	C load s1 limb
   140  	ld	r9, 8(r5)	C load s2 limb
   141  	ld	r10, 16(r4)	C load s1 limb
   142  	ld	r11, 16(r5)	C load s2 limb
   143  	ld	r12, 24(r4)	C load s1 limb
   144  	ld	r0, 24(r5)	C load s2 limb
   145  	bdz	L(end)
   146  
   147  	addi	r4, r4, 32
   148  	addi	r5, r5, 32
   149  
   150  	ALIGN(16)
   151  L(top):	ADDSUBC	r28, r7, r6
   152  	ld	r6, 0(r4)	C load s1 limb
   153  	ld	r7, 0(r5)	C load s2 limb
   154  	ADDSUBC	r29, r9, r8
   155  	ld	r8, 8(r4)	C load s1 limb
   156  	ld	r9, 8(r5)	C load s2 limb
   157  	ADDSUBC	r30, r11, r10
   158  	ld	r10, 16(r4)	C load s1 limb
   159  	ld	r11, 16(r5)	C load s2 limb
   160  	ADDSUBC	r31, r0, r12
   161  	ld	r12, 24(r4)	C load s1 limb
   162  	ld	r0, 24(r5)	C load s2 limb
   163  	std	r28, 0(r3)
   164  	addi	r4, r4, 32
   165  	std	r29, 8(r3)
   166  	addi	r5, r5, 32
   167  	std	r30, 16(r3)
   168  	std	r31, 24(r3)
   169  	addi	r3, r3, 32
   170  	bdnz	L(top)		C decrement ctr and loop back
   171  
   172  L(end):	ADDSUBC	r28, r7, r6
   173  	ADDSUBC	r29, r9, r8
   174  	ADDSUBC	r30, r11, r10
   175  	ADDSUBC	r31, r0, r12
   176  	std	r28, 0(r3)
   177  	std	r29, 8(r3)
   178  	std	r30, 16(r3)
   179  	std	r31, 24(r3)
   180  
   181  L(ret):	ld	r31, -8(r1)
   182  	ld	r30, -16(r1)
   183  	ld	r29, -24(r1)
   184  	ld	r28, -32(r1)
   185  
   186  	subfe	r3, r0, r0	C -cy
   187  	GENRVAL
   188  	blr
   189  EPILOGUE()