github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm (about)

     1  dnl  Intel Atom  mpn_bdiv_dbm1.
     2  
     3  dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
     4  
     5  dnl  Copyright 2011 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C			    cycles/limb
    36  C			    cycles/limb
    37  C P5				 -
    38  C P6 model 0-8,10-12		 -
    39  C P6 model 9  (Banias)		 9.75
    40  C P6 model 13 (Dothan)
    41  C P4 model 0  (Willamette)
    42  C P4 model 1  (?)
    43  C P4 model 2  (Northwood)	 8.25
    44  C P4 model 3  (Prescott)
    45  C P4 model 4  (Nocona)
    46  C Intel Atom			 8
    47  C AMD K6			 -
    48  C AMD K7			 -
    49  C AMD K8
    50  C AMD K10
    51  
    52  C TODO: This code was optimised for atom-32, consider moving it back to atom
    53  C	dir(atom currently grabs this code), and write a 4-way version(7c/l).
    54  
    55  defframe(PARAM_CARRY,20)
    56  defframe(PARAM_MUL,  16)
    57  defframe(PARAM_SIZE, 12)
    58  defframe(PARAM_SRC,  8)
    59  defframe(PARAM_DST,  4)
    60  
    61  dnl  re-use parameter space
    62  define(SAVE_RP,`PARAM_MUL')
    63  define(SAVE_UP,`PARAM_SIZE')
    64  
    65  define(`rp', `%edi')
    66  define(`up', `%esi')
    67  define(`n',  `%ecx')
    68  define(`reg', `%edx')
    69  define(`cy', `%eax')	C contains the return value
    70  
    71  ASM_START()
    72  	TEXT
    73  	ALIGN(16)
    74  deflit(`FRAME',0)
    75  
    76  PROLOGUE(mpn_bdiv_dbm1c)
    77  	mov	PARAM_SIZE, n		C size
    78  	mov	up, SAVE_UP
    79  	mov	PARAM_SRC, up
    80  	movd	PARAM_MUL, %mm7
    81  	mov	rp, SAVE_RP
    82  	mov	PARAM_DST, rp
    83  
    84  	movd	(up), %mm0
    85  	pmuludq	%mm7, %mm0
    86  	shr	n
    87  	mov	PARAM_CARRY, cy
    88  	jz	L(eq1)
    89  
    90  	movd	4(up), %mm1
    91  	jc	L(odd)
    92  
    93  	lea	4(up), up
    94  	pmuludq	%mm7, %mm1
    95  	movd	%mm0, reg
    96  	psrlq	$32, %mm0
    97  	sub	reg, cy
    98  	movd	%mm0, reg
    99  	movq	%mm1, %mm0
   100  	dec	n
   101  	mov	cy, (rp)
   102  	lea	4(rp), rp
   103  	jz	L(end)
   104  
   105  C	ALIGN(16)
   106  L(top):	movd	4(up), %mm1
   107  	sbb	reg, cy
   108  L(odd):	movd	%mm0, reg
   109  	psrlq	$32, %mm0
   110  	pmuludq	%mm7, %mm1
   111  	sub	reg, cy
   112  	lea	8(up), up
   113  	movd	%mm0, reg
   114  	movd	(up), %mm0
   115  	mov	cy, (rp)
   116  	sbb	reg, cy
   117  	movd	%mm1, reg
   118  	psrlq	$32, %mm1
   119  	sub	reg, cy
   120  	movd	%mm1, reg
   121  	pmuludq	%mm7, %mm0
   122  	dec	n
   123  	mov	cy, 4(rp)
   124  	lea	8(rp), rp
   125  	jnz	L(top)
   126  
   127  L(end):	sbb	reg, cy
   128  
   129  L(eq1):	movd	%mm0, reg
   130  	psrlq	$32, %mm0
   131  	mov	SAVE_UP, up
   132  	sub	reg, cy
   133  	movd	%mm0, reg
   134  	emms
   135  	mov	cy, (rp)
   136  	sbb	reg, cy
   137  
   138  	mov	SAVE_RP, rp
   139  	ret
   140  EPILOGUE()
   141  ASM_END()