github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/invert_limb.asm (about)

     1  dnl  PowerPC-64 mpn_invert_limb -- Invert a normalized limb.
     2  
     3  dnl  Copyright 2004-2006, 2008, 2010, 2013 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                  cycles/limb (approximate)
    34  C POWER3/PPC630         80
    35  C POWER4/PPC970         86
    36  C POWER5                86
    37  C POWER6               170
    38  C POWER7                66
    39  
    40  ASM_START()
    41  PROLOGUE(mpn_invert_limb,toc)
    42  	LEAL(	r12, approx_tab)
    43  	srdi	r9, r3, 32
    44  	rlwinm	r9, r9, 10, 23, 30	C (d >> 55) & 0x1fe
    45  	srdi	r10, r3, 24		C d >> 24
    46  	lis	r11, 0x1000
    47  	rldicl	r8, r3, 0, 63		C d mod 2
    48  	addi	r10, r10, 1		C d40
    49  	sldi	r11, r11, 32		C 2^60
    50  	srdi	r7, r3, 1		C d/2
    51  	add	r7, r7, r8		C d63 = ceil(d/2)
    52  	neg	r8, r8			C mask = -(d mod 2)
    53  	lhzx	r0, r9, r12
    54  	mullw	r9, r0, r0		C v0*v0
    55  	sldi	r6, r0, 11		C v0 << 11
    56  	addi	r0, r6, -1		C (v0 << 11) - 1
    57  	mulld	r9, r9, r10		C v0*v0*d40
    58  	srdi	r9, r9, 40		C v0*v0*d40 >> 40
    59  	subf	r9, r9, r0		C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1
    60  	mulld	r0, r9, r10		C v1*d40
    61  	sldi	r6, r9, 13		C v1 << 13
    62  	subf	r0, r0, r11		C 2^60 - v1*d40
    63  	mulld	r0, r0, r9		C v1 * (2^60 - v1*d40)
    64  	srdi	r0, r0, 47		C v1 * (2^60 - v1*d40) >> 47
    65  	add	r0, r0, r6		C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47)
    66  	mulld	r11, r0, r7		C v2 * d63
    67  	srdi	r10, r0, 1		C v2 >> 1
    68  	sldi	r9, r0, 31		C v2 << 31
    69  	and	r8, r10, r8		C (v2 >> 1) & mask
    70  	subf	r8, r11, r8		C ((v2 >> 1) & mask) - v2 * d63
    71  	mulhdu	r0, r8, r0		C p1 = v2 * (((v2 >> 1) & mask) - v2 * d63)
    72  	srdi	r0, r0, 1		C p1 >> 1
    73  	add	r0, r0, r9		C v3 = (v2 << 31) + (p1 >> 1)
    74  	nop
    75  	mulld	r11, r0, r3
    76  	mulhdu	r9, r0, r3
    77  	addc	r10, r11, r3
    78  	adde	r3, r9, r3
    79  	subf	r3, r3, r0
    80  	blr
    81  EPILOGUE()
    82  
    83  DEF_OBJECT(approx_tab)
    84  forloop(i,256,512-1,dnl
    85  `	.short	eval(0x7fd00/i)
    86  ')dnl
    87  END_OBJECT(approx_tab)
    88  ASM_END()