github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/aorsmul_1.asm (about)

     1  dnl  PowerPC-64 mpn_addmul_1 and mpn_submul_1.
     2  
     3  dnl  Copyright 1999-2001, 2003-2006, 2010-2012 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                   mpn_addmul_1    mpn_submul_1
    34  C                   cycles/limb     cycles/limb
    35  C POWER3/PPC630		6-18		6-18
    36  C POWER4/PPC970		 8		 8.3
    37  C POWER5		 8		 8.25
    38  C POWER6		16.25		16.75
    39  C POWER7		 3.77		 4.9
    40  
    41  C TODO
    42  C  * Try to reduce the number of needed live registers
    43  C  * Add support for _1c entry points
    44  
    45  C INPUT PARAMETERS
    46  define(`rp', `r3')
    47  define(`up', `r4')
    48  define(`n',  `r5')
    49  define(`vl', `r6')
    50  
    51  ifdef(`OPERATION_addmul_1',`
    52    define(ADDSUBC,	adde)
    53    define(ADDSUB,	addc)
    54    define(func,		mpn_addmul_1)
    55    define(func_nc,	mpn_addmul_1c)	C FIXME: not really supported
    56    define(SM,		`')
    57  ')
    58  ifdef(`OPERATION_submul_1',`
    59    define(ADDSUBC,	subfe)
    60    define(ADDSUB,	subfc)
    61    define(func,		mpn_submul_1)
    62    define(func_nc,	mpn_submul_1c)	C FIXME: not really supported
    63    define(SM,		`$1')
    64  ')
    65  
    66  MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
    67  
    68  ASM_START()
    69  PROLOGUE(func)
    70  	std	r31, -8(r1)
    71  	rldicl.	r0, n, 0,62	C r0 = n & 3, set cr0
    72  	std	r30, -16(r1)
    73  	cmpdi	cr6, r0, 2
    74  	std	r29, -24(r1)
    75  	addi	n, n, 3		C compute count...
    76  	std	r28, -32(r1)
    77  	srdi	n, n, 2		C ...for ctr
    78  	std	r27, -40(r1)
    79  	mtctr	n		C copy count into ctr
    80  	beq	cr0, L(b00)
    81  	blt	cr6, L(b01)
    82  	beq	cr6, L(b10)
    83  
    84  L(b11):	ld	r9, 0(up)
    85  	ld	r28, 0(rp)
    86  	mulld	r0, r9, r6
    87  	mulhdu	r12, r9, r6
    88  	ADDSUB	r0, r0, r28
    89  	std	r0, 0(rp)
    90  	addi	rp, rp, 8
    91  	ld	r9, 8(up)
    92  	ld	r27, 16(up)
    93  	addi	up, up, 24
    94  SM(`	subfe	r11, r11, r11 ')
    95  	b	L(bot)
    96  
    97  	ALIGN(16)
    98  L(b00):	ld	r9, 0(up)
    99  	ld	r27, 8(up)
   100  	ld	r28, 0(rp)
   101  	ld	r29, 8(rp)
   102  	mulld	r0, r9, r6
   103  	mulhdu	r5, r9, r6
   104  	mulld	r7, r27, r6
   105  	mulhdu	r8, r27, r6
   106  	addc	r7, r7, r5
   107  	addze	r12, r8
   108  	ADDSUB	r0, r0, r28
   109  	std	r0, 0(rp)
   110  	ADDSUBC	r7, r7, r29
   111  	std	r7, 8(rp)
   112  	addi	rp, rp, 16
   113  	ld	r9, 16(up)
   114  	ld	r27, 24(up)
   115  	addi	up, up, 32
   116  SM(`	subfe	r11, r11, r11 ')
   117  	b	L(bot)
   118  
   119  	ALIGN(16)
   120  L(b01):	bdnz	L(gt1)
   121  	ld	r9, 0(up)
   122  	ld	r11, 0(rp)
   123  	mulld	r0, r9, r6
   124  	mulhdu	r8, r9, r6
   125  	ADDSUB	r0, r0, r11
   126  	std	r0, 0(rp)
   127  SM(`	subfe	r11, r11, r11 ')
   128  SM(`	addic	r11, r11, 1 ')
   129  	addze	r3, r8
   130  	blr
   131  L(gt1):	ld	r9, 0(up)
   132  	ld	r27, 8(up)
   133  	mulld	r0, r9, r6
   134  	mulhdu	r5, r9, r6
   135  	mulld	r7, r27, r6
   136  	mulhdu	r8, r27, r6
   137  	ld	r9, 16(up)
   138  	ld	r28, 0(rp)
   139  	ld	r29, 8(rp)
   140  	ld	r30, 16(rp)
   141  	mulld	r11, r9, r6
   142  	mulhdu	r10, r9, r6
   143  	addc	r7, r7, r5
   144  	adde	r11, r11, r8
   145  	addze	r12, r10
   146  	ADDSUB	r0, r0, r28
   147  	std	r0, 0(rp)
   148  	ADDSUBC	r7, r7, r29
   149  	std	r7, 8(rp)
   150  	ADDSUBC	r11, r11, r30
   151  	std	r11, 16(rp)
   152  	addi	rp, rp, 24
   153  	ld	r9, 24(up)
   154  	ld	r27, 32(up)
   155  	addi	up, up, 40
   156  SM(`	subfe	r11, r11, r11 ')
   157  	b	L(bot)
   158  
   159  L(b10):	addic	r0, r0, 0
   160  	li	r12, 0		C cy_limb = 0
   161  	ld	r9, 0(up)
   162  	ld	r27, 8(up)
   163  	bdz	L(end)
   164  	addi	up, up, 16
   165  
   166  	ALIGN(16)
   167  L(top):	mulld	r0, r9, r6
   168  	mulhdu	r5, r9, r6	C 9
   169  	mulld	r7, r27, r6
   170  	mulhdu	r8, r27, r6	C 27
   171  	ld	r9, 0(up)
   172  	ld	r28, 0(rp)
   173  	ld	r27, 8(up)
   174  	ld	r29, 8(rp)
   175  	adde	r0, r0, r12	C 0 12
   176  	adde	r7, r7, r5	C 5 7
   177  	mulld	r5, r9, r6
   178  	mulhdu	r10, r9, r6	C 9
   179  	mulld	r11, r27, r6
   180  	mulhdu	r12, r27, r6	C 27
   181  	ld	r9, 16(up)
   182  	ld	r30, 16(rp)
   183  	ld	r27, 24(up)
   184  	ld	r31, 24(rp)
   185  	adde	r5, r5, r8	C 8 5
   186  	adde	r11, r11, r10	C 10 11
   187  	addze	r12, r12	C 12
   188  	ADDSUB	r0, r0, r28	C 0 28
   189  	std	r0, 0(rp)	C 0
   190  	ADDSUBC	r7, r7, r29	C 7 29
   191  	std	r7, 8(rp)	C 7
   192  	ADDSUBC	r5, r5, r30	C 5 30
   193  	std	r5, 16(rp)	C 5
   194  	ADDSUBC	r11, r11, r31	C 11 31
   195  	std	r11, 24(rp)	C 11
   196  	addi	up, up, 32
   197  SM(`	subfe	r11, r11, r11 ')
   198  	addi	rp, rp, 32
   199  L(bot):
   200  SM(`	addic	r11, r11, 1 ')
   201  	bdnz	L(top)
   202  
   203  L(end):	mulld	r0, r9, r6
   204  	mulhdu	r5, r9, r6
   205  	mulld	r7, r27, r6
   206  	mulhdu	r8, r27, r6
   207  	ld	r28, 0(rp)
   208  	ld	r29, 8(rp)
   209  	adde	r0, r0, r12
   210  	adde	r7, r7, r5
   211  	addze	r8, r8
   212  	ADDSUB	r0, r0, r28
   213  	std	r0, 0(rp)
   214  	ADDSUBC	r7, r7, r29
   215  	std	r7, 8(rp)
   216  SM(`	subfe	r11, r11, r11 ')
   217  SM(`	addic	r11, r11, 1 ')
   218  	addze	r3, r8
   219  	ld	r31, -8(r1)
   220  	ld	r30, -16(r1)
   221  	ld	r29, -24(r1)
   222  	ld	r28, -32(r1)
   223  	ld	r27, -40(r1)
   224  	blr
   225  EPILOGUE()