github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/powerpc64/mode64/divrem_2.asm (about)

     1  dnl  PPC-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
     2  
     3  dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  C                       cycles/limb
    34  C                       norm    frac
    35  C POWER3/PPC630
    36  C POWER4/PPC970         ?       ?
    37  C POWER5                37      ?
    38  C POWER6                62      ?
    39  C POWER6                30.5    ?
    40  
    41  C INPUT PARAMETERS
    42  C qp  = r3
    43  C fn  = r4
    44  C up  = r5
    45  C un  = r6
    46  C dp  = r7
    47  
    48  
    49  ifdef(`DARWIN',,`
    50  define(`r2',`r31')')		C FIXME!
    51  
    52  ASM_START()
    53  
    54  EXTERN_FUNC(mpn_invert_limb)
    55  
    56  PROLOGUE(mpn_divrem_2,toc)
    57  	mflr	r0
    58  	std	r23, -72(r1)
    59  	std	r24, -64(r1)
    60  	std	r25, -56(r1)
    61  	std	r26, -48(r1)
    62  	std	r27, -40(r1)
    63  	std	r28, -32(r1)
    64  	std	r29, -24(r1)
    65  	std	r30, -16(r1)
    66  	std	r31, -8(r1)
    67  	std	r0, 16(r1)
    68  	stdu	r1, -192(r1)
    69  	mr	r24, r3
    70  	mr	r25, r4
    71  	sldi	r0, r6, 3
    72  	add	r26, r5, r0
    73  	addi	r26, r26, -24
    74  	ld	r30, 8(r7)
    75  	ld	r28, 0(r7)
    76  	ld	r29, 16(r26)
    77  	ld	r31, 8(r26)
    78  
    79  ifelse(0,1,`
    80  	li	r23, 0
    81  	cmpld	cr7, r29, r30
    82  	blt	cr7, L(8)
    83  	bgt	cr7, L(9)
    84  	cmpld	cr0, r31, r28
    85  	blt	cr0, L(8)
    86  L(9):	subfc	r31, r28, r31
    87  	subfe	r29, r30, r29
    88  	li	r23, 1
    89  ',`
    90  	li	r23, 0
    91  	cmpld	cr7, r29, r30
    92  	blt	cr7, L(8)
    93  	mfcr	r0
    94  	rlwinm	r0, r0, 30, 1
    95  	subfc	r9, r28, r31
    96  	addze.	r0, r0
    97  	nop
    98  	beq	cr0, L(8)
    99  	subfc	r31, r28, r31
   100  	subfe	r29, r30, r29
   101  	li	r23, 1
   102  ')
   103  
   104  L(8):
   105  	add	r27, r25, r6
   106  	addic.	r27, r27, -3
   107  	blt	cr0, L(18)
   108  	mr	r3, r30
   109  	CALL(	mpn_invert_limb)
   110  	mulld	r10, r3, r30
   111  	mulhdu	r0, r3, r28
   112  	addc	r8, r10, r28
   113  	subfe	r11, r1, r1
   114  	addc	r10, r8, r0
   115  	addze.	r11, r11
   116  	blt	cr0, L(91)
   117  L(40):
   118  	subfc	r10, r30, r10
   119  	addme.	r11, r11
   120  	addi	r3, r3, -1
   121  	bge	cr0, L(40)
   122  L(91):
   123  	addi	r5, r27,  1
   124  	mtctr	r5
   125  	sldi	r0, r27, 3
   126  	add	r24, r24, r0
   127  	ALIGN(16)
   128  L(loop):
   129  	mulhdu	r8, r29, r3
   130  	mulld	r6, r29, r3
   131  	addc	r6, r6, r31
   132  	adde	r8, r8, r29
   133  	cmpd	cr7, r27, r25
   134  	mulld	r0, r30, r8
   135  	mulhdu	r11, r28, r8
   136  	mulld	r10, r28, r8
   137  	subf	r31, r0, r31
   138  	li	r7, 0
   139  	blt	cr7, L(60)
   140  	ld	r7, 0(r26)
   141  	addi	r26, r26, -8
   142  	nop
   143  L(60):	subfc	r7, r28, r7
   144  	subfe	r31, r30, r31
   145  	subfc	r7, r10, r7
   146  	subfe	r4, r11, r31
   147  	subfc	r9, r6, r4
   148  	subfe	r9, r1, r1
   149  	andc	r6, r28, r9
   150  	andc	r0, r30, r9
   151  	addc	r31, r7, r6
   152  	adde	r29, r4, r0
   153  	subf	r8, r9, r8
   154  	cmpld	cr7, r29, r30
   155  	bge-	cr7, L(fix)
   156  L(bck):	std	r8, 0(r24)
   157  	addi	r24, r24, -8
   158  	addi	r27, r27, -1
   159  	bdnz	L(loop)
   160  L(18):
   161  	std	r31, 8(r26)
   162  	std	r29, 16(r26)
   163  	mr	r3, r23
   164  	addi	r1, r1, 192
   165  	ld	r0, 16(r1)
   166  	mtlr	r0
   167  	ld	r23, -72(r1)
   168  	ld	r24, -64(r1)
   169  	ld	r25, -56(r1)
   170  	ld	r26, -48(r1)
   171  	ld	r27, -40(r1)
   172  	ld	r28, -32(r1)
   173  	ld	r29, -24(r1)
   174  	ld	r30, -16(r1)
   175  	ld	r31, -8(r1)
   176  	blr
   177  L(fix):
   178  	mfcr	r0
   179  	rlwinm	r0, r0, 30, 1
   180  	subfc	r9, r28, r31
   181  	addze.	r0, r0
   182  	beq	cr0, L(bck)
   183  	subfc	r31, r28, r31
   184  	subfe	r29, r30, r29
   185  	addi	r8, r8, 1
   186  	b	L(bck)
   187  EPILOGUE()