github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/divrem_1.asm (about)

     1  dnl  x86-64 mpn_divrem_1 -- mpn by limb division.
     2  
     3  dnl  Copyright 2004, 2005, 2007-2012, 2014 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C		norm	unorm	frac
    35  C AMD K8,K9	13	13	12
    36  C AMD K10	13	13	12
    37  C Intel P4	43	44	43
    38  C Intel core2	24.5	24.5	19.5
    39  C Intel corei	20.5	19.5	18
    40  C Intel atom	43	46	36
    41  C VIA nano	25.5	25.5	24
    42  
    43  C mp_limb_t
    44  C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
    45  C               mp_srcptr np, mp_size_t nn, mp_limb_t d)
    46  
    47  C mp_limb_t
    48  C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
    49  C                      mp_srcptr np, mp_size_t nn, mp_limb_t d,
    50  C                      mp_limb_t dinv, int cnt)
    51  
    52  C INPUT PARAMETERS
    53  define(`qp',		`%rdi')
    54  define(`fn_param',	`%rsi')
    55  define(`up_param',	`%rdx')
    56  define(`un_param',	`%rcx')
    57  define(`d',		`%r8')
    58  define(`dinv',		`%r9')		C only for mpn_preinv_divrem_1
    59  C       shift passed on stack		C only for mpn_preinv_divrem_1
    60  
    61  define(`cnt',		`%rcx')
    62  define(`up',		`%rsi')
    63  define(`fn',		`%r12')
    64  define(`un',		`%rbx')
    65  
    66  
    67  C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
    68  C         cnt         qp      d  dinv
    69  
    70  ABI_SUPPORT(DOS64)
    71  ABI_SUPPORT(STD64)
    72  
    73  IFSTD(`define(`CNTOFF',		`40($1)')')
    74  IFDOS(`define(`CNTOFF',		`104($1)')')
    75  
    76  ASM_START()
    77  	TEXT
    78  	ALIGN(16)
    79  PROLOGUE(mpn_preinv_divrem_1)
    80  	FUNC_ENTRY(4)
    81  IFDOS(`	mov	56(%rsp), %r8	')
    82  IFDOS(`	mov	64(%rsp), %r9	')
    83  	xor	R32(%rax), R32(%rax)
    84  	push	%r13
    85  	push	%r12
    86  	push	%rbp
    87  	push	%rbx
    88  
    89  	mov	fn_param, fn
    90  	mov	un_param, un
    91  	add	fn_param, un_param
    92  	mov	up_param, up
    93  
    94  	lea	-8(qp,un_param,8), qp
    95  
    96  	test	d, d
    97  	js	L(nent)
    98  
    99  	mov	CNTOFF(%rsp), R8(cnt)
   100  	shl	R8(cnt), d
   101  	jmp	L(uent)
   102  EPILOGUE()
   103  
   104  	ALIGN(16)
   105  PROLOGUE(mpn_divrem_1)
   106  	FUNC_ENTRY(4)
   107  IFDOS(`	mov	56(%rsp), %r8	')
   108  	xor	R32(%rax), R32(%rax)
   109  	push	%r13
   110  	push	%r12
   111  	push	%rbp
   112  	push	%rbx
   113  
   114  	mov	fn_param, fn
   115  	mov	un_param, un
   116  	add	fn_param, un_param
   117  	mov	up_param, up
   118  	je	L(ret)
   119  
   120  	lea	-8(qp,un_param,8), qp
   121  	xor	R32(%rbp), R32(%rbp)
   122  
   123  	test	d, d
   124  	jns	L(unnormalized)
   125  
   126  L(normalized):
   127  	test	un, un
   128  	je	L(8)			C un == 0
   129  	mov	-8(up,un,8), %rbp
   130  	dec	un
   131  	mov	%rbp, %rax
   132  	sub	d, %rbp
   133  	cmovc	%rax, %rbp
   134  	sbb	R32(%rax), R32(%rax)
   135  	inc	R32(%rax)
   136  	mov	%rax, (qp)
   137  	lea	-8(qp), qp
   138  L(8):
   139  IFSTD(`	push	%rdi		')
   140  IFSTD(`	push	%rsi		')
   141  	push	%r8
   142  IFSTD(`	mov	d, %rdi		')
   143  IFDOS(`	sub	$32, %rsp	')
   144  IFDOS(`	mov	d, %rcx		')
   145  	ASSERT(nz, `test $15, %rsp')
   146  	CALL(	mpn_invert_limb)
   147  IFDOS(`	add	$32, %rsp	')
   148  	pop	%r8
   149  IFSTD(`	pop	%rsi		')
   150  IFSTD(`	pop	%rdi		')
   151  
   152  	mov	%rax, dinv
   153  	mov	%rbp, %rax
   154  	jmp	L(nent)
   155  
   156  	ALIGN(16)
   157  L(ntop):mov	(up,un,8), %r10		C	    K8-K10  P6-CNR P6-NHM  P4
   158  	mul	dinv			C	      0,13   0,20   0,18   0,45
   159  	add	%r10, %rax		C	      4      8      3     12
   160  	adc	%rbp, %rdx		C	      5      9     10     13
   161  	mov	%rax, %rbp		C	      5      9      4     13
   162  	mov	%rdx, %r13		C	      6     11     12     23
   163  	imul	d, %rdx			C	      6     11     11     23
   164  	sub	%rdx, %r10		C	     10     16     14     33
   165  	mov	d, %rax			C
   166  	add	%r10, %rax		C	     11     17     15     34
   167  	cmp	%rbp, %r10		C	     11     17     15     34
   168  	cmovc	%r10, %rax		C	     12     18     16     35
   169  	adc	$-1, %r13		C
   170  	cmp	d, %rax			C
   171  	jae	L(nfx)			C
   172  L(nok):	mov	%r13, (qp)		C
   173  	sub	$8, qp			C
   174  L(nent):lea	1(%rax), %rbp		C
   175  	dec	un			C
   176  	jns	L(ntop)			C
   177  
   178  	xor	R32(%rcx), R32(%rcx)
   179  	jmp	L(frac)
   180  
   181  L(nfx):	sub	d, %rax
   182  	inc	%r13
   183  	jmp	L(nok)
   184  
   185  L(unnormalized):
   186  	test	un, un
   187  	je	L(44)
   188  	mov	-8(up,un,8), %rax
   189  	cmp	d, %rax
   190  	jae	L(44)
   191  	mov	%rbp, (qp)
   192  	mov	%rax, %rbp
   193  	lea	-8(qp), qp
   194  	je	L(ret)
   195  	dec	un
   196  L(44):
   197  	bsr	d, %rcx
   198  	not	R32(%rcx)
   199  	shl	R8(%rcx), d
   200  	shl	R8(%rcx), %rbp
   201  
   202  	push	%rcx
   203  IFSTD(`	push	%rdi		')
   204  IFSTD(`	push	%rsi		')
   205  	push	%r8
   206  IFSTD(`	sub	$8, %rsp	')
   207  IFSTD(`	mov	d, %rdi		')
   208  IFDOS(`	sub	$40, %rsp	')
   209  IFDOS(`	mov	d, %rcx		')
   210  	ASSERT(nz, `test $15, %rsp')
   211  	CALL(	mpn_invert_limb)
   212  IFSTD(`	add	$8, %rsp	')
   213  IFDOS(`	add	$40, %rsp	')
   214  	pop	%r8
   215  IFSTD(`	pop	%rsi		')
   216  IFSTD(`	pop	%rdi		')
   217  	pop	%rcx
   218  
   219  	mov	%rax, dinv
   220  	mov	%rbp, %rax
   221  	test	un, un
   222  	je	L(frac)
   223  
   224  L(uent):dec	un
   225  	mov	(up,un,8), %rbp
   226  	neg	R32(%rcx)
   227  	shr	R8(%rcx), %rbp
   228  	neg	R32(%rcx)
   229  	or	%rbp, %rax
   230  	jmp	L(ent)
   231  
   232  	ALIGN(16)
   233  L(utop):mov	(up,un,8), %r10
   234  	shl	R8(%rcx), %rbp
   235  	neg	R32(%rcx)
   236  	shr	R8(%rcx), %r10
   237  	neg	R32(%rcx)
   238  	or	%r10, %rbp
   239  	mul	dinv
   240  	add	%rbp, %rax
   241  	adc	%r11, %rdx
   242  	mov	%rax, %r11
   243  	mov	%rdx, %r13
   244  	imul	d, %rdx
   245  	sub	%rdx, %rbp
   246  	mov	d, %rax
   247  	add	%rbp, %rax
   248  	cmp	%r11, %rbp
   249  	cmovc	%rbp, %rax
   250  	adc	$-1, %r13
   251  	cmp	d, %rax
   252  	jae	L(ufx)
   253  L(uok):	mov	%r13, (qp)
   254  	sub	$8, qp
   255  L(ent):	mov	(up,un,8), %rbp
   256  	dec	un
   257  	lea	1(%rax), %r11
   258  	jns	L(utop)
   259  
   260  L(uend):shl	R8(%rcx), %rbp
   261  	mul	dinv
   262  	add	%rbp, %rax
   263  	adc	%r11, %rdx
   264  	mov	%rax, %r11
   265  	mov	%rdx, %r13
   266  	imul	d, %rdx
   267  	sub	%rdx, %rbp
   268  	mov	d, %rax
   269  	add	%rbp, %rax
   270  	cmp	%r11, %rbp
   271  	cmovc	%rbp, %rax
   272  	adc	$-1, %r13
   273  	cmp	d, %rax
   274  	jae	L(efx)
   275  L(eok):	mov	%r13, (qp)
   276  	sub	$8, qp
   277  	jmp	L(frac)
   278  
   279  L(ufx):	sub	d, %rax
   280  	inc	%r13
   281  	jmp	L(uok)
   282  L(efx):	sub	d, %rax
   283  	inc	%r13
   284  	jmp	L(eok)
   285  
   286  L(frac):mov	d, %rbp
   287  	neg	%rbp
   288  	jmp	L(fent)
   289  
   290  	ALIGN(16)			C	    K8-K10  P6-CNR P6-NHM  P4
   291  L(ftop):mul	dinv			C	      0,12   0,17   0,17
   292  	add	%r11, %rdx		C	      5      8     10
   293  	mov	%rax, %r11		C	      4      8      3
   294  	mov	%rdx, %r13		C	      6      9     11
   295  	imul	%rbp, %rdx		C	      6      9     11
   296  	mov	d, %rax			C
   297  	add	%rdx, %rax		C	     10     14     14
   298  	cmp	%r11, %rdx		C	     10     14     14
   299  	cmovc	%rdx, %rax		C	     11     15     15
   300  	adc	$-1, %r13		C
   301  	mov	%r13, (qp)		C
   302  	sub	$8, qp			C
   303  L(fent):lea	1(%rax), %r11		C
   304  	dec	fn			C
   305  	jns	L(ftop)			C
   306  
   307  	shr	R8(%rcx), %rax
   308  L(ret):	pop	%rbx
   309  	pop	%rbp
   310  	pop	%r12
   311  	pop	%r13
   312  	FUNC_EXIT()
   313  	ret
   314  EPILOGUE()