github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/divrem_2.asm (about)

     1  dnl  x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
     2  
     3  dnl  Copyright 2007, 2008, 2010, 2014 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C	     cycles/limb	best
    35  C AMD K8,K9	18
    36  C AMD K10	18
    37  C AMD bull
    38  C AMD pile
    39  C AMD bobcat
    40  C AMD jaguar
    41  C Intel P4	68
    42  C Intel core	34
    43  C Intel NHM	30.25
    44  C Intel SBR	21.3
    45  C Intel IBR	21.4
    46  C Intel HWL	20.6
    47  C Intel BWL
    48  C Intel atom	73
    49  C VIA nano	33
    50  
    51  
    52  C INPUT PARAMETERS
    53  define(`qp',		`%rdi')
    54  define(`fn',		`%rsi')
    55  define(`up_param',	`%rdx')
    56  define(`un_param',	`%rcx')
    57  define(`dp',		`%r8')
    58  
    59  ABI_SUPPORT(DOS64)
    60  ABI_SUPPORT(STD64)
    61  
    62  ASM_START()
    63  	TEXT
    64  	ALIGN(16)
    65  PROLOGUE(mpn_divrem_2)
    66  	FUNC_ENTRY(4)
    67  IFDOS(`	mov	56(%rsp), %r8	')
    68  	push	%r15
    69  	push	%r14
    70  	push	%r13
    71  	push	%r12
    72  	lea	-24(%rdx,%rcx,8), %r12	C r12 = &up[un-1]
    73  	mov	%rsi, %r13
    74  	push	%rbp
    75  	mov	%rdi, %rbp
    76  	push	%rbx
    77  	mov	8(%r8), %r11		C d1
    78  	mov	16(%r12), %rbx
    79  	mov	(%r8), %r8		C d0
    80  	mov	8(%r12), %r10
    81  
    82  	xor	R32(%r15), R32(%r15)
    83  	cmp	%rbx, %r11
    84  	ja	L(2)
    85  	setb	%dl
    86  	cmp	%r10, %r8
    87  	setbe	%al
    88  	orb	%al, %dl		C "orb" form to placate Sun tools
    89  	je	L(2)
    90  	inc	R32(%r15)
    91  	sub	%r8, %r10
    92  	sbb	%r11, %rbx
    93  L(2):
    94  	lea	-3(%rcx,%r13), %r14	C un + fn - 3
    95  	test	%r14, %r14
    96  	js	L(end)
    97  
    98  	push	%r8
    99  	push	%r10
   100  	push	%r11
   101  IFSTD(`	mov	%r11, %rdi	')
   102  IFDOS(`	mov	%r11, %rcx	')
   103  	ASSERT(nz, `test $15, %rsp')
   104  	CALL(	mpn_invert_limb)
   105  	pop	%r11
   106  	pop	%r10
   107  	pop	%r8
   108  
   109  	mov	%r11, %rdx
   110  	mov	%rax, %rdi
   111  	imul	%rax, %rdx
   112  	mov	%rdx, %r9
   113  	mul	%r8
   114  	xor	R32(%rcx), R32(%rcx)
   115  	add	%r8, %r9
   116  	adc	$-1, %rcx
   117  	add	%rdx, %r9
   118  	adc	$0, %rcx
   119  	js	2f
   120  1:	dec	%rdi
   121  	sub	%r11, %r9
   122  	sbb	$0, %rcx
   123  	jns	1b
   124  2:
   125  
   126  	lea	(%rbp,%r14,8), %rbp
   127  	mov	%r11, %rsi
   128  	neg	%rsi			C -d1
   129  
   130  C rax rbx rcx rdx rsi rdi  rbp r8 r9 r10 r11 r12 r13 r14 r15
   131  C     n2  un      -d1 dinv qp  d0 q0     d1  up  fn      msl
   132  
   133  	ALIGN(16)
   134  L(top):	mov	%rdi, %rax		C di		ncp
   135  	mul	%rbx			C		0, 17
   136  	mov	%r10, %rcx		C
   137  	add	%rax, %rcx		C		4
   138  	adc	%rbx, %rdx		C		5
   139  	mov	%rdx, %r9		C q		6
   140  	imul	%rsi, %rdx		C		6
   141  	mov	%r8, %rax		C		ncp
   142  	lea	(%rdx, %r10), %rbx	C n1 -= ...	10
   143  	xor	R32(%r10), R32(%r10)	C
   144  	mul	%r9			C		7
   145  	cmp	%r14, %r13		C
   146  	jg	L(19)			C
   147  	mov	(%r12), %r10		C
   148  	sub	$8, %r12		C
   149  L(19):	sub	%r8, %r10		C		ncp
   150  	sbb	%r11, %rbx		C		11
   151  	sub	%rax, %r10		C		11
   152  	sbb	%rdx, %rbx		C		12
   153  	xor	R32(%rax), R32(%rax)	C
   154  	xor	R32(%rdx), R32(%rdx)	C
   155  	cmp	%rcx, %rbx		C		13
   156  	cmovnc	%r8, %rax		C		14
   157  	cmovnc	%r11, %rdx		C		14
   158  	adc	$0, %r9			C adjust q	14
   159  	nop
   160  	add	%rax, %r10		C		15
   161  	adc	%rdx, %rbx		C		16
   162  	cmp	%r11, %rbx		C
   163  	jae	L(fix)			C
   164  L(bck):	mov	%r9, (%rbp)		C
   165  	sub	$8, %rbp		C
   166  	dec	%r14
   167  	jns	L(top)
   168  
   169  L(end):	mov	%r10, 8(%r12)
   170  	mov	%rbx, 16(%r12)
   171  	pop	%rbx
   172  	pop	%rbp
   173  	pop	%r12
   174  	pop	%r13
   175  	pop	%r14
   176  	mov	%r15, %rax
   177  	pop	%r15
   178  	FUNC_EXIT()
   179  	ret
   180  
   181  L(fix):	seta	%dl
   182  	cmp	%r8, %r10
   183  	setae	%al
   184  	orb	%dl, %al		C "orb" form to placate Sun tools
   185  	je	L(bck)
   186  	inc	%r9
   187  	sub	%r8, %r10
   188  	sbb	%r11, %rbx
   189  	jmp	L(bck)
   190  EPILOGUE()