github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/div_qr_2n_pi1.asm (about)

     1  dnl  x86-64 mpn_div_qr_2n_pi1
     2  dnl  -- Divide an mpn number by a normalized 2-limb number,
     3  dnl     using a single-limb inverse.
     4  
     5  dnl  Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  
    36  C		c/l
    37  C INPUT PARAMETERS
    38  define(`qp',		`%rdi')
    39  define(`rp',		`%rsi')
    40  define(`up_param',	`%rdx')
    41  define(`un',		`%rcx')
    42  define(`d1',		`%r8')
    43  define(`d0',		`%r9')
    44  define(`di_param',	`8(%rsp)')
    45  
    46  define(`di',		`%r10')
    47  define(`up',		`%r11')
    48  define(`u2',		`%rbx')
    49  define(`u1',		`%r12')
    50  define(`t1',		`%r13')
    51  define(`t0',		`%r14')
    52  define(`md1',		`%r15')
    53  
    54  C TODO
    55  C * Store qh in the same stack slot as di_param, instead of pushing
    56  C   it. (we could put it in register %rbp, but then we would need to
    57  C   save and restore that instead, which doesn't seem like a win).
    58  
    59  ABI_SUPPORT(DOS64)
    60  ABI_SUPPORT(STD64)
    61  
    62  ASM_START()
    63  	TEXT
    64  	ALIGN(16)
    65  PROLOGUE(mpn_div_qr_2n_pi1)
    66  	FUNC_ENTRY(4)
    67  IFDOS(`	mov	56(%rsp), %r8	')
    68  IFDOS(`	mov	64(%rsp), %r9	')
    69  IFDOS(`define(`di_param', `72(%rsp)')')
    70  	mov	di_param, di
    71  	mov	up_param, up
    72  	push	%r15
    73  	push	%r14
    74  	push	%r13
    75  	push	%r12
    76  	push	%rbx
    77  
    78  	mov	-16(up, un, 8), u1
    79  	mov	-8(up, un, 8), u2
    80  
    81  	mov	u1, t0
    82  	mov	u2, t1
    83  	sub	d0, t0
    84  	sbb	d1, t1
    85  	cmovnc  t0, u1
    86  	cmovnc	t1, u2
    87  	C push qh which is !carry
    88  	sbb	%rax, %rax
    89  	inc	%rax
    90  	push	%rax
    91  	lea	-2(un), un
    92  	mov	d1, md1
    93  	neg	md1
    94  
    95  	jmp	L(next)
    96  
    97  	ALIGN(16)
    98  L(loop):
    99  	C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
   100  	C Based on the optimized divrem_2.asm code.
   101  
   102  	mov	di, %rax
   103  	mul	u2
   104  	mov	u1, t0
   105  	add	%rax, t0	C q0 in t0
   106  	adc	u2, %rdx
   107  	mov	%rdx, t1	C q in t1
   108  	imul	md1, %rdx
   109  	mov	d0, %rax
   110  	lea	(%rdx, u1), u2
   111  	mul	t1
   112  	mov	(up, un, 8), u1
   113  	sub	d0, u1
   114  	sbb	d1, u2
   115  	sub	%rax, u1
   116  	sbb	%rdx, u2
   117  	xor	R32(%rax), R32(%rax)
   118  	xor	R32(%rdx), R32(%rdx)
   119  	cmp	t0, u2
   120  	cmovnc	d0, %rax
   121  	cmovnc	d1, %rdx
   122  	adc	$0, t1
   123  	nop
   124  	add	%rax, u1
   125  	adc	%rdx, u2
   126  	cmp	d1, u2
   127  	jae	L(fix)
   128  L(bck):
   129  	mov	t1, (qp, un, 8)
   130  L(next):
   131  	sub	$1, un
   132  	jnc	L(loop)
   133  L(end):
   134  	mov	u2, 8(rp)
   135  	mov	u1, (rp)
   136  
   137  	C qh on stack
   138  	pop	%rax
   139  
   140  	pop	%rbx
   141  	pop	%r12
   142  	pop	%r13
   143  	pop	%r14
   144  	pop	%r15
   145  	FUNC_EXIT()
   146  	ret
   147  
   148  L(fix):	C Unlikely update. u2 >= d1
   149  	seta	%dl
   150  	cmp	d0, u1
   151  	setae	%al
   152  	orb	%dl, %al		C "orb" form to placate Sun tools
   153  	je	L(bck)
   154  	inc	t1
   155  	sub	d0, u1
   156  	sbb	d1, u2
   157  	jmp	L(bck)
   158  EPILOGUE()