github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/mod_1_1.asm (about)

     1  dnl  PowerPC-64 mpn_mod_1_1p
     2  
     3  dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                   cycles/limb
    34  C POWER3/PPC630          ?
    35  C POWER4/PPC970         17
    36  C POWER5                16
    37  C POWER6                30
    38  C POWER7                10.2
    39  
    40  C TODO
    41  C  * Optimise, in particular the cps function.  This was compiler-generated and
    42  C    then hand optimised.
    43  
    44  C INPUT PARAMETERS
    45  define(`ap',  `r3')
    46  define(`n',   `r4')
    47  define(`d',   `r5')
    48  define(`cps', `r6')
    49  
    50  ASM_START()
    51  
    52  EXTERN_FUNC(mpn_invert_limb)
    53  
    54  PROLOGUE(mpn_mod_1_1p)
    55  	sldi	r10, r4, 3
    56  	addi	r4, r4, -1
    57  	add	r3, r3, r10
    58  	ld	r0, 16(r6)		C B1modb
    59  	ld	r12, 24(r6)		C B2modb
    60  	ld	r9, -8(r3)
    61  	ld	r10, -16(r3)
    62  	mtctr	r4
    63  	mulhdu	r8, r9, r0
    64  	mulld	r7, r9, r0
    65  	addc	r11, r7, r10
    66  	addze	r9, r8
    67  	bdz	L(end)
    68  
    69  	ALIGN(16)
    70  L(top):	ld	r4, -24(r3)
    71  	addi	r3, r3, -8
    72  	nop
    73  	mulld	r10, r11, r0
    74  	mulld	r8, r9, r12
    75  	mulhdu	r11, r11, r0
    76  	mulhdu	r9, r9, r12
    77  	addc	r7, r10, r4
    78  	addze	r10, r11
    79  	addc	r11, r8, r7
    80  	adde	r9, r9, r10
    81  	bdnz	L(top)
    82  
    83  L(end):
    84  ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
    85  `	lwz	r0, 8(r6)',
    86  `	lwz	r0, 12(r6)')
    87  	ld	r3, 0(r6)
    88  	cmpdi	cr7, r0, 0
    89  	beq-	cr7, L(4)
    90  	subfic	r10, r0, 64
    91  	sld	r9, r9, r0
    92  	srd	r10, r11, r10
    93  	or	r9, r10, r9
    94  L(4):	subfc	r10, r5, r9
    95  	subfe	r10, r10, r10
    96  	nand	r10, r10, r10
    97  	sld	r11, r11, r0
    98  	and	r10, r10, r5
    99  	subf	r9, r10, r9
   100  	mulhdu	r10, r9, r3
   101  	mulld	r3, r9, r3
   102  	addi	r9, r9, 1
   103  	addc	r8, r3, r11
   104  	adde	r3, r10, r9
   105  	mulld	r3, r3, r5
   106  	subf	r3, r3, r11
   107  	cmpld	cr7, r8, r3
   108  	bge	cr7, L(5)		C FIXME: Make branch-less
   109  	add	r3, r3, r5
   110  L(5):	cmpld	cr7, r3, r5
   111  	bge-	cr7, L(10)
   112  	srd	r3, r3, r0
   113  	blr
   114  
   115  L(10):	subf	r3, r5, r3
   116  	srd	r3, r3, r0
   117  	blr
   118  EPILOGUE()
   119  
   120  PROLOGUE(mpn_mod_1_1p_cps,toc)
   121  	mflr	r0
   122  	std	r29, -24(r1)
   123  	std	r30, -16(r1)
   124  	std	r31, -8(r1)
   125  	cntlzd	r31, r4
   126  	std	r0, 16(r1)
   127  	extsw	r31, r31
   128  	mr	r29, r3
   129  	stdu	r1, -144(r1)
   130  	sld	r30, r4, r31
   131  	mr	r3, r30
   132  	CALL(	mpn_invert_limb)
   133  	cmpdi	cr7, r31, 0
   134  	neg	r0, r30
   135  	beq-	cr7, L(13)
   136  	subfic	r11, r31, 64
   137  	li	r0, 1
   138  	neg	r9, r30
   139  	srd	r11, r3, r11
   140  	sld	r0, r0, r31
   141  	or	r0, r11, r0
   142  	mulld	r0, r0, r9
   143  L(13):	mulhdu	r9, r0, r3
   144  	mulld	r11, r0, r3
   145  	add	r9, r0, r9
   146  	nor	r9, r9, r9
   147  	mulld	r9, r9, r30
   148  	cmpld	cr7, r11, r9
   149  	bge	cr7, L(14)
   150  	add	r9, r9, r30
   151  L(14):	addi	r1, r1, 144
   152  	srd	r0, r0, r31
   153  	std	r31, 8(r29)
   154  	std	r3, 0(r29)
   155  	std	r0, 16(r29)
   156  	ld	r0, 16(r1)
   157  	srd	r9, r9, r31
   158  	ld	r30, -16(r1)
   159  	ld	r31, -8(r1)
   160  	std	r9, 24(r29)
   161  	ld	r29, -24(r1)
   162  	mtlr	r0
   163  	blr
   164  EPILOGUE()