github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/mul_basecase.asm (about)

     1  dnl  PowerPC-64 mpn_mul_basecase.
     2  
     3  dnl  Copyright 1999-2001, 2003-2006, 2008 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                  cycles/limb
    34  C POWER3/PPC630         6-18
    35  C POWER4/PPC970          8
    36  C POWER5                 8
    37  C POWER6                24
    38  
    39  C INPUT PARAMETERS
    40  define(`rp', `r3')
    41  define(`up', `r4')
    42  define(`un', `r5')
    43  define(`vp', `r6')
    44  define(`vn', `r7')
    45  
    46  define(`v0',	   `r25')
    47  define(`outer_rp', `r22')
    48  define(`outer_up', `r23')
    49  
    50  ASM_START()
    51  PROLOGUE(mpn_mul_basecase)
    52  
    53  C Special code for un <= 2, for efficiency of these important cases,
    54  C and since it simplifies the default code.
    55  	cmpdi	cr0, un, 2
    56  	bgt	cr0, L(un_gt2)
    57  	cmpdi	cr6, vn, 1
    58  	ld	r7, 0(vp)
    59  	ld	r5, 0(up)
    60  	mulld	r8, r5, r7	C weight 0
    61  	mulhdu	r9, r5, r7	C weight 1
    62  	std	r8, 0(rp)
    63  	beq	cr0, L(2x)
    64  	std	r9, 8(rp)
    65  	blr
    66  	ALIGN(16)
    67  L(2x):	ld	r0, 8(up)
    68  	mulld	r8, r0, r7	C weight 1
    69  	mulhdu	r10, r0, r7	C weight 2
    70  	addc	r9, r9, r8
    71  	addze	r10, r10
    72  	bne	cr6, L(2x2)
    73  	std	r9, 8(rp)
    74  	std	r10, 16(rp)
    75  	blr
    76  	ALIGN(16)
    77  L(2x2):	ld	r6, 8(vp)
    78  	nop
    79  	mulld	r8, r5, r6	C weight 1
    80  	mulhdu	r11, r5, r6	C weight 2
    81  	addc	r9, r9, r8
    82  	std	r9, 8(rp)
    83  	adde	r11, r11, r10
    84  	mulld	r12, r0, r6	C weight 2
    85  	mulhdu	r0, r0, r6	C weight 3
    86  	addze	r0, r0
    87  	addc	r11, r11, r12
    88  	addze	r0, r0
    89  	std	r11, 16(rp)
    90  	std	r0, 24(rp)
    91  	blr
    92  
    93  L(un_gt2):
    94  	std	r31, -8(r1)
    95  	std	r30, -16(r1)
    96  	std	r29, -24(r1)
    97  	std	r28, -32(r1)
    98  	std	r27, -40(r1)
    99  	std	r26, -48(r1)
   100  	std	r25, -56(r1)
   101  	std	r24, -64(r1)
   102  	std	r23, -72(r1)
   103  	std	r22, -80(r1)
   104  
   105  	mr	outer_rp, rp
   106  	mr	outer_up, up
   107  
   108  	ld	v0, 0(vp)	C new v limb
   109  	addi	vp, vp, 8
   110  	ld	r26, 0(up)
   111  
   112  	rldicl.	r0, un, 0,62	C r0 = n & 3, set cr0
   113  	cmpdi	cr6, r0, 2
   114  	addi	un, un, 1	C compute count...
   115  	srdi	un, un, 2	C ...for ctr
   116  	mtctr	un		C copy inner loop count into ctr
   117  	beq	cr0, L(b0)
   118  	blt	cr6, L(b1)
   119  	beq	cr6, L(b2)
   120  
   121  
   122  	ALIGN(16)
   123  L(b3):	mulld	r0, r26, v0
   124  	mulhdu	r12, r26, v0
   125  	addic	r0, r0, 0
   126  	std	r0, 0(rp)
   127  	ld	r26, 8(up)
   128  	ld	r27, 16(up)
   129  	bdz	L(end_m_3)
   130  
   131  	ALIGN(16)
   132  L(lo_m_3):
   133  	mulld	r0, r26, v0
   134  	mulhdu	r31, r26, v0
   135  	ld	r26, 24(up)
   136  	nop
   137  	mulld	r24, r27, v0
   138  	mulhdu	r8, r27, v0
   139  	ld	r27, 32(up)
   140  	nop
   141  	adde	r0, r0, r12
   142  	adde	r24, r24, r31
   143  	mulld	r9, r26, v0
   144  	mulhdu	r10, r26, v0
   145  	ld	r26, 40(up)
   146  	nop
   147  	mulld	r11, r27, v0
   148  	mulhdu	r12, r27, v0
   149  	ld	r27, 48(up)
   150  	std	r0, 8(rp)
   151  	adde	r9, r9, r8
   152  	std	r24, 16(rp)
   153  	adde	r11, r11, r10
   154  	std	r9, 24(rp)
   155  	addi	up, up, 32
   156  	std	r11, 32(rp)
   157  	addi	rp, rp, 32
   158  	bdnz	L(lo_m_3)
   159  
   160  	ALIGN(16)
   161  L(end_m_3):
   162  	mulld	r0, r26, v0
   163  	mulhdu	r31, r26, v0
   164  
   165  	mulld	r24, r27, v0
   166  	mulhdu	r8, r27, v0
   167  
   168  	adde	r0, r0, r12
   169  	adde	r24, r24, r31
   170  
   171  	std	r0, 8(rp)
   172  	std	r24, 16(rp)
   173  	addze	r8, r8
   174  	std	r8, 24(rp)
   175  	addic.	vn, vn, -1
   176  	beq	L(ret)
   177  
   178  	ALIGN(16)
   179  L(outer_lo_3):
   180  	mtctr	un		C copy inner loop count into ctr
   181  	addi	rp, outer_rp, 8
   182  	mr	up, outer_up
   183  	addi	outer_rp, outer_rp, 8
   184  	ld	v0, 0(vp)	C new v limb
   185  	addi	vp, vp, 8
   186  	ld	r26, 0(up)
   187  	ld	r28, 0(rp)
   188  	mulld	r0, r26, v0
   189  	mulhdu	r12, r26, v0
   190  	addc	r0, r0, r28
   191  	std	r0, 0(rp)
   192  	ld	r26, 8(up)
   193  	ld	r27, 16(up)
   194  	bdz	L(end_3)
   195  
   196  	ALIGN(16)		C registers dying
   197  L(lo_3):
   198  	mulld	r0, r26, v0	C
   199  	mulhdu	r10, r26, v0	C 26
   200  	ld	r26, 24(up)	C
   201  	ld	r28, 8(rp)	C
   202  	mulld	r24, r27, v0	C
   203  	mulhdu	r8, r27, v0	C 27
   204  	ld	r27, 32(up)	C
   205  	ld	r29, 16(rp)	C
   206  	adde	r0, r0, r12	C 0 12
   207  	adde	r24, r24, r10	C 24 10
   208  	mulld	r9, r26, v0	C
   209  	mulhdu	r10, r26, v0	C 26
   210  	ld	r26, 40(up)	C
   211  	ld	r30, 24(rp)	C
   212  	mulld	r11, r27, v0	C
   213  	mulhdu	r12, r27, v0	C 27
   214  	ld	r27, 48(up)	C
   215  	ld	r31, 32(rp)	C
   216  	adde	r9, r9, r8	C 8 9
   217  	adde	r11, r11, r10	C 10 11
   218  	addze	r12, r12	C 12
   219  	addc	r0, r0, r28	C 0 28
   220  	std	r0, 8(rp)	C 0
   221  	adde	r24, r24, r29	C 7 29
   222  	std	r24, 16(rp)	C 7
   223  	adde	r9, r9, r30	C 9 30
   224  	std	r9, 24(rp)	C 9
   225  	adde	r11, r11, r31	C 11 31
   226  	std	r11, 32(rp)	C 11
   227  	addi	up, up, 32	C
   228  	addi	rp, rp, 32	C
   229  	bdnz	L(lo_3)	C
   230  
   231  	ALIGN(16)
   232  L(end_3):
   233  	mulld	r0, r26, v0
   234  	mulhdu	r10, r26, v0
   235  	ld	r28, 8(rp)
   236  	nop
   237  	mulld	r24, r27, v0
   238  	mulhdu	r8, r27, v0
   239  	ld	r29, 16(rp)
   240  	nop
   241  	adde	r0, r0, r12
   242  	adde	r24, r24, r10
   243  	addze	r8, r8
   244  	addc	r0, r0, r28
   245  	std	r0, 8(rp)
   246  	adde	r24, r24, r29
   247  	std	r24, 16(rp)
   248  	addze	r8, r8
   249  	std	r8, 24(rp)
   250  
   251  	addic.	vn, vn, -1
   252  	bne	L(outer_lo_3)
   253  	b	L(ret)
   254  
   255  
   256  	ALIGN(16)
   257  L(b0):	ld	r27, 8(up)
   258  	addi	up, up, 8
   259  	mulld	r0, r26, v0
   260  	mulhdu	r10, r26, v0
   261  	mulld	r24, r27, v0
   262  	mulhdu	r8, r27, v0
   263  	addc	r24, r24, r10
   264  	addze	r12, r8
   265  	std	r0, 0(rp)
   266  	std	r24, 8(rp)
   267  	addi	rp, rp, 8
   268  	ld	r26, 8(up)
   269  	ld	r27, 16(up)
   270  	bdz	L(end_m_0)
   271  
   272  	ALIGN(16)
   273  L(lo_m_0):
   274  	mulld	r0, r26, v0
   275  	mulhdu	r31, r26, v0
   276  	ld	r26, 24(up)
   277  	nop
   278  	mulld	r24, r27, v0
   279  	mulhdu	r8, r27, v0
   280  	ld	r27, 32(up)
   281  	nop
   282  	adde	r0, r0, r12
   283  	adde	r24, r24, r31
   284  	mulld	r9, r26, v0
   285  	mulhdu	r10, r26, v0
   286  	ld	r26, 40(up)
   287  	nop
   288  	mulld	r11, r27, v0
   289  	mulhdu	r12, r27, v0
   290  	ld	r27, 48(up)
   291  	std	r0, 8(rp)
   292  	adde	r9, r9, r8
   293  	std	r24, 16(rp)
   294  	adde	r11, r11, r10
   295  	std	r9, 24(rp)
   296  	addi	up, up, 32
   297  	std	r11, 32(rp)
   298  	addi	rp, rp, 32
   299  	bdnz	L(lo_m_0)
   300  
   301  	ALIGN(16)
   302  L(end_m_0):
   303  	mulld	r0, r26, v0
   304  	mulhdu	r31, r26, v0
   305  
   306  	mulld	r24, r27, v0
   307  	mulhdu	r8, r27, v0
   308  
   309  	adde	r0, r0, r12
   310  	adde	r24, r24, r31
   311  
   312  	std	r0, 8(rp)
   313  	addze	r8, r8
   314  	std	r24, 16(rp)
   315  	addic.	vn, vn, -1
   316  	std	r8, 24(rp)
   317  	nop
   318  	beq	L(ret)
   319  
   320  	ALIGN(16)
   321  L(outer_lo_0):
   322  	mtctr	un		C copy inner loop count into ctr
   323  	addi	rp, outer_rp, 16
   324  	addi	up, outer_up, 8
   325  	addi	outer_rp, outer_rp, 8
   326  	ld	v0, 0(vp)	C new v limb
   327  	addi	vp, vp, 8
   328  	ld	r26, -8(up)
   329  	ld	r27, 0(up)
   330  	ld	r28, -8(rp)
   331  	ld	r29, 0(rp)
   332  	nop
   333  	nop
   334  	mulld	r0, r26, v0
   335  	mulhdu	r10, r26, v0
   336  	mulld	r24, r27, v0
   337  	mulhdu	r8, r27, v0
   338  	addc	r24, r24, r10
   339  	addze	r12, r8
   340  	addc	r0, r0, r28
   341  	std	r0, -8(rp)
   342  	adde	r24, r24, r29
   343  	std	r24, 0(rp)
   344  	ld	r26, 8(up)
   345  	ld	r27, 16(up)
   346  	bdz	L(end_0)
   347  
   348  	ALIGN(16)		C registers dying
   349  L(lo_0):
   350  	mulld	r0, r26, v0	C
   351  	mulhdu	r10, r26, v0	C 26
   352  	ld	r26, 24(up)	C
   353  	ld	r28, 8(rp)	C
   354  	mulld	r24, r27, v0	C
   355  	mulhdu	r8, r27, v0	C 27
   356  	ld	r27, 32(up)	C
   357  	ld	r29, 16(rp)	C
   358  	adde	r0, r0, r12	C 0 12
   359  	adde	r24, r24, r10	C 24 10
   360  	mulld	r9, r26, v0	C
   361  	mulhdu	r10, r26, v0	C 26
   362  	ld	r26, 40(up)	C
   363  	ld	r30, 24(rp)	C
   364  	mulld	r11, r27, v0	C
   365  	mulhdu	r12, r27, v0	C 27
   366  	ld	r27, 48(up)	C
   367  	ld	r31, 32(rp)	C
   368  	adde	r9, r9, r8	C 8 9
   369  	adde	r11, r11, r10	C 10 11
   370  	addze	r12, r12	C 12
   371  	addc	r0, r0, r28	C 0 28
   372  	std	r0, 8(rp)	C 0
   373  	adde	r24, r24, r29	C 7 29
   374  	std	r24, 16(rp)	C 7
   375  	adde	r9, r9, r30	C 9 30
   376  	std	r9, 24(rp)	C 9
   377  	adde	r11, r11, r31	C 11 31
   378  	std	r11, 32(rp)	C 11
   379  	addi	up, up, 32	C
   380  	addi	rp, rp, 32	C
   381  	bdnz	L(lo_0)	C
   382  
   383  	ALIGN(16)
   384  L(end_0):
   385  	mulld	r0, r26, v0
   386  	mulhdu	r10, r26, v0
   387  	ld	r28, 8(rp)
   388  	nop
   389  	mulld	r24, r27, v0
   390  	mulhdu	r8, r27, v0
   391  	ld	r29, 16(rp)
   392  	nop
   393  	adde	r0, r0, r12
   394  	adde	r24, r24, r10
   395  	addze	r8, r8
   396  	addic.	vn, vn, -1
   397  	addc	r0, r0, r28
   398  	std	r0, 8(rp)
   399  	adde	r24, r24, r29
   400  	std	r24, 16(rp)
   401  	addze	r8, r8
   402  	std	r8, 24(rp)
   403  	bne	L(outer_lo_0)
   404  	b	L(ret)
   405  
   406  
   407  	ALIGN(16)
   408  L(b1):	ld	r27, 8(up)
   409  	nop
   410  	mulld	r0, r26, v0
   411  	mulhdu	r31, r26, v0
   412  	ld	r26, 16(up)
   413  	mulld	r24, r27, v0
   414  	mulhdu	r8, r27, v0
   415  	mulld	r9, r26, v0
   416  	mulhdu	r10, r26, v0
   417  	addc	r24, r24, r31
   418  	adde	r9, r9, r8
   419  	addze	r12, r10
   420  	std	r0, 0(rp)
   421  	std	r24, 8(rp)
   422  	std	r9, 16(rp)
   423  	addi	up, up, 16
   424  	addi	rp, rp, 16
   425  	ld	r26, 8(up)
   426  	ld	r27, 16(up)
   427  	bdz	L(end_m_1)
   428  
   429  	ALIGN(16)
   430  L(lo_m_1):
   431  	mulld	r0, r26, v0
   432  	mulhdu	r31, r26, v0
   433  	ld	r26, 24(up)
   434  	nop
   435  	mulld	r24, r27, v0
   436  	mulhdu	r8, r27, v0
   437  	ld	r27, 32(up)
   438  	nop
   439  	adde	r0, r0, r12
   440  	adde	r24, r24, r31
   441  	mulld	r9, r26, v0
   442  	mulhdu	r10, r26, v0
   443  	ld	r26, 40(up)
   444  	nop
   445  	mulld	r11, r27, v0
   446  	mulhdu	r12, r27, v0
   447  	ld	r27, 48(up)
   448  	std	r0, 8(rp)
   449  	adde	r9, r9, r8
   450  	std	r24, 16(rp)
   451  	adde	r11, r11, r10
   452  	std	r9, 24(rp)
   453  	addi	up, up, 32
   454  	std	r11, 32(rp)
   455  	addi	rp, rp, 32
   456  	bdnz	L(lo_m_1)
   457  
   458  	ALIGN(16)
   459  L(end_m_1):
   460  	mulld	r0, r26, v0
   461  	mulhdu	r31, r26, v0
   462  
   463  	mulld	r24, r27, v0
   464  	mulhdu	r8, r27, v0
   465  
   466  	adde	r0, r0, r12
   467  	adde	r24, r24, r31
   468  
   469  	std	r0, 8(rp)
   470  	addze	r8, r8
   471  	std	r24, 16(rp)
   472  	addic.	vn, vn, -1
   473  	std	r8, 24(rp)
   474  	nop
   475  	beq	L(ret)
   476  
   477  	ALIGN(16)
   478  L(outer_lo_1):
   479  	mtctr	un		C copy inner loop count into ctr
   480  	addi	rp, outer_rp, 24
   481  	addi	up, outer_up, 16
   482  	addi	outer_rp, outer_rp, 8
   483  	ld	v0, 0(vp)	C new v limb
   484  	addi	vp, vp, 8
   485  	ld	r26, -16(up)
   486  	ld	r27, -8(up)
   487  	mulld	r0, r26, v0
   488  	mulhdu	r31, r26, v0
   489  	ld	r26, 0(up)
   490  	ld	r28, -16(rp)
   491  	mulld	r24, r27, v0
   492  	mulhdu	r8, r27, v0
   493  	ld	r29, -8(rp)
   494  	ld	r30, 0(rp)
   495  	mulld	r9, r26, v0
   496  	mulhdu	r10, r26, v0
   497  	addc	r24, r24, r31
   498  	adde	r9, r9, r8
   499  	addze	r12, r10
   500  	addc	r0, r0, r28
   501  	std	r0, -16(rp)
   502  	adde	r24, r24, r29
   503  	std	r24, -8(rp)
   504  	adde	r9, r9, r30
   505  	std	r9, 0(rp)
   506  	ld	r26, 8(up)
   507  	ld	r27, 16(up)
   508  	bdz	L(end_1)
   509  
   510  	ALIGN(16)		C registers dying
   511  L(lo_1):
   512  	mulld	r0, r26, v0	C
   513  	mulhdu	r10, r26, v0	C 26
   514  	ld	r26, 24(up)	C
   515  	ld	r28, 8(rp)	C
   516  	mulld	r24, r27, v0	C
   517  	mulhdu	r8, r27, v0	C 27
   518  	ld	r27, 32(up)	C
   519  	ld	r29, 16(rp)	C
   520  	adde	r0, r0, r12	C 0 12
   521  	adde	r24, r24, r10	C 24 10
   522  	mulld	r9, r26, v0	C
   523  	mulhdu	r10, r26, v0	C 26
   524  	ld	r26, 40(up)	C
   525  	ld	r30, 24(rp)	C
   526  	mulld	r11, r27, v0	C
   527  	mulhdu	r12, r27, v0	C 27
   528  	ld	r27, 48(up)	C
   529  	ld	r31, 32(rp)	C
   530  	adde	r9, r9, r8	C 8 9
   531  	adde	r11, r11, r10	C 10 11
   532  	addze	r12, r12	C 12
   533  	addc	r0, r0, r28	C 0 28
   534  	std	r0, 8(rp)	C 0
   535  	adde	r24, r24, r29	C 7 29
   536  	std	r24, 16(rp)	C 7
   537  	adde	r9, r9, r30	C 9 30
   538  	std	r9, 24(rp)	C 9
   539  	adde	r11, r11, r31	C 11 31
   540  	std	r11, 32(rp)	C 11
   541  	addi	up, up, 32	C
   542  	addi	rp, rp, 32	C
   543  	bdnz	L(lo_1)	C
   544  
   545  	ALIGN(16)
   546  L(end_1):
   547  	mulld	r0, r26, v0
   548  	mulhdu	r10, r26, v0
   549  	ld	r28, 8(rp)
   550  	nop
   551  	mulld	r24, r27, v0
   552  	mulhdu	r8, r27, v0
   553  	ld	r29, 16(rp)
   554  	nop
   555  	adde	r0, r0, r12
   556  	adde	r24, r24, r10
   557  	addze	r8, r8
   558  	addic.	vn, vn, -1
   559  	addc	r0, r0, r28
   560  	std	r0, 8(rp)
   561  	adde	r24, r24, r29
   562  	std	r24, 16(rp)
   563  	addze	r8, r8
   564  	std	r8, 24(rp)
   565  	bne	L(outer_lo_1)
   566  	b	L(ret)
   567  
   568  
   569  	ALIGN(16)
   570  L(b2):	ld	r27, 8(up)
   571  	addi	up, up, -8
   572  	addi	rp, rp, -8
   573  	li	r12, 0
   574  	addic	r12, r12, 0
   575  
   576  	ALIGN(16)
   577  L(lo_m_2):
   578  	mulld	r0, r26, v0
   579  	mulhdu	r31, r26, v0
   580  	ld	r26, 24(up)
   581  	nop
   582  	mulld	r24, r27, v0
   583  	mulhdu	r8, r27, v0
   584  	ld	r27, 32(up)
   585  	nop
   586  	adde	r0, r0, r12
   587  	adde	r24, r24, r31
   588  	mulld	r9, r26, v0
   589  	mulhdu	r10, r26, v0
   590  	ld	r26, 40(up)
   591  	nop
   592  	mulld	r11, r27, v0
   593  	mulhdu	r12, r27, v0
   594  	ld	r27, 48(up)
   595  	std	r0, 8(rp)
   596  	adde	r9, r9, r8
   597  	std	r24, 16(rp)
   598  	adde	r11, r11, r10
   599  	std	r9, 24(rp)
   600  	addi	up, up, 32
   601  	std	r11, 32(rp)
   602  
   603  	addi	rp, rp, 32
   604  	bdnz	L(lo_m_2)
   605  
   606  	ALIGN(16)
   607  L(end_m_2):
   608  	mulld	r0, r26, v0
   609  	mulhdu	r31, r26, v0
   610  
   611  	mulld	r24, r27, v0
   612  	mulhdu	r8, r27, v0
   613  
   614  	adde	r0, r0, r12
   615  	adde	r24, r24, r31
   616  
   617  	std	r0, 8(rp)
   618  	addze	r8, r8
   619  	std	r24, 16(rp)
   620  	addic.	vn, vn, -1
   621  	std	r8, 24(rp)
   622  	nop
   623  	beq	L(ret)
   624  
   625  	ALIGN(16)
   626  L(outer_lo_2):
   627  	mtctr	un		C copy inner loop count into ctr
   628  	addi	rp, outer_rp, 0
   629  	addi	up, outer_up, -8
   630  	addi	outer_rp, outer_rp, 8
   631  	ld	v0, 0(vp)	C new v limb
   632  	addi	vp, vp, 8
   633  	ld	r26, 8(up)
   634  	ld	r27, 16(up)
   635  	li	r12, 0
   636  	addic	r12, r12, 0
   637  
   638  	ALIGN(16)		C registers dying
   639  L(lo_2):
   640  	mulld	r0, r26, v0	C
   641  	mulhdu	r10, r26, v0	C 26
   642  	ld	r26, 24(up)	C
   643  	ld	r28, 8(rp)	C
   644  	mulld	r24, r27, v0	C
   645  	mulhdu	r8, r27, v0	C 27
   646  	ld	r27, 32(up)	C
   647  	ld	r29, 16(rp)	C
   648  	adde	r0, r0, r12	C 0 12
   649  	adde	r24, r24, r10	C 24 10
   650  	mulld	r9, r26, v0	C
   651  	mulhdu	r10, r26, v0	C 26
   652  	ld	r26, 40(up)	C
   653  	ld	r30, 24(rp)	C
   654  	mulld	r11, r27, v0	C
   655  	mulhdu	r12, r27, v0	C 27
   656  	ld	r27, 48(up)	C
   657  	ld	r31, 32(rp)	C
   658  	adde	r9, r9, r8	C 8 9
   659  	adde	r11, r11, r10	C 10 11
   660  	addze	r12, r12	C 12
   661  	addc	r0, r0, r28	C 0 28
   662  	std	r0, 8(rp)	C 0
   663  	adde	r24, r24, r29	C 7 29
   664  	std	r24, 16(rp)	C 7
   665  	adde	r9, r9, r30	C 9 30
   666  	std	r9, 24(rp)	C 9
   667  	adde	r11, r11, r31	C 11 31
   668  	std	r11, 32(rp)	C 11
   669  	addi	up, up, 32	C
   670  	addi	rp, rp, 32	C
   671  	bdnz	L(lo_2)	C
   672  
   673  	ALIGN(16)
   674  L(end_2):
   675  	mulld	r0, r26, v0
   676  	mulhdu	r10, r26, v0
   677  	ld	r28, 8(rp)
   678  	nop
   679  	mulld	r24, r27, v0
   680  	mulhdu	r8, r27, v0
   681  	ld	r29, 16(rp)
   682  	nop
   683  	adde	r0, r0, r12
   684  	adde	r24, r24, r10
   685  	addze	r8, r8
   686  	addic.	vn, vn, -1
   687  	addc	r0, r0, r28
   688  	std	r0, 8(rp)
   689  	adde	r24, r24, r29
   690  	std	r24, 16(rp)
   691  	addze	r8, r8
   692  	std	r8, 24(rp)
   693  	bne	L(outer_lo_2)
   694  	b	L(ret)
   695  
   696  
   697  L(ret):	ld	r31, -8(r1)
   698  	ld	r30, -16(r1)
   699  	ld	r29, -24(r1)
   700  	ld	r28, -32(r1)
   701  	ld	r27, -40(r1)
   702  	ld	r26, -48(r1)
   703  	ld	r25, -56(r1)
   704  	ld	r24, -64(r1)
   705  	ld	r23, -72(r1)
   706  	ld	r22, -80(r1)
   707  	blr
   708  EPILOGUE()