github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/sec_tabselect.asm (about)

     1  dnl  x86 mpn_sec_tabselect.
     2  
     3  dnl  Copyright 2011 Free Software Foundation, Inc.
     4  
     5  dnl  This file is part of the GNU MP Library.
     6  dnl
     7  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     8  dnl  it under the terms of either:
     9  dnl
    10  dnl    * the GNU Lesser General Public License as published by the Free
    11  dnl      Software Foundation; either version 3 of the License, or (at your
    12  dnl      option) any later version.
    13  dnl
    14  dnl  or
    15  dnl
    16  dnl    * the GNU General Public License as published by the Free Software
    17  dnl      Foundation; either version 2 of the License, or (at your option) any
    18  dnl      later version.
    19  dnl
    20  dnl  or both in parallel, as here.
    21  dnl
    22  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    23  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  dnl  for more details.
    26  dnl
    27  dnl  You should have received copies of the GNU General Public License and the
    28  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  dnl  see https://www.gnu.org/licenses/.
    30  
    31  include(`../config.m4')
    32  
    33  
    34  C			    cycles/limb
    35  C P5				 ?
    36  C P6 model 0-8,10-12		 ?
    37  C P6 model 9  (Banias)		 ?
    38  C P6 model 13 (Dothan)		 ?
    39  C P4 model 0  (Willamette)	 ?
    40  C P4 model 1  (?)		 ?
    41  C P4 model 2  (Northwood)	 4.5
    42  C P4 model 3  (Prescott)	 ?
    43  C P4 model 4  (Nocona)		 ?
    44  C Intel Atom			 ?
    45  C AMD K6			 ?
    46  C AMD K7			 3.4
    47  C AMD K8			 ?
    48  C AMD K10			 ?
    49  
    50  C NOTES
    51  C  * This has not been tuned for any specific processor.  Its speed should not
    52  C    be too bad, though.
    53  C  * Using SSE2 could result in many-fold speedup.
    54  
    55  C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
    56  define(`rp',     `%edi')
    57  define(`tp',     `%esi')
    58  define(`n',      `%ebx')
    59  define(`nents',  `%ecx')
    60  define(`which',  `36(%esp)')
    61  
    62  define(`i',      `%ebp')
    63  define(`maskp',  `20(%esp)')
    64  define(`maskn',  `32(%esp)')
    65  
    66  ASM_START()
    67  	TEXT
    68  	ALIGN(16)
    69  PROLOGUE(mpn_sec_tabselect)
    70  	push	%edi
    71  	push	%esi
    72  	push	%ebx
    73  	push	%ebp
    74  	mov	20(%esp), rp
    75  	mov	24(%esp), tp
    76  	mov	28(%esp), n
    77  	mov	32(%esp), nents
    78  
    79  	lea	(rp,n,4), rp
    80  	lea	(tp,n,4), tp
    81  	sub	nents, which
    82  L(outer):
    83  	mov	which, %eax
    84  	add	nents, %eax
    85  	neg	%eax			C set CF iff 'which' != k
    86  	sbb	%eax, %eax
    87  	mov	%eax, maskn
    88  	not	%eax
    89  	mov	%eax, maskp
    90  
    91  	mov	n, i
    92  	neg	i
    93  
    94  	ALIGN(16)
    95  L(top):	mov	(tp,i,4), %eax
    96  	and	maskp, %eax
    97  	mov	(rp,i,4), %edx
    98  	and	maskn, %edx
    99  	or	%edx, %eax
   100  	mov	%eax, (rp,i,4)
   101  	inc	i
   102  	js	L(top)
   103  
   104  L(end):	mov	n, %eax
   105  	lea	(tp,%eax,4), tp
   106  	dec	nents
   107  	jne	L(outer)
   108  
   109  L(outer_end):
   110  	pop	%ebp
   111  	pop	%ebx
   112  	pop	%esi
   113  	pop	%edi
   114  	ret
   115  EPILOGUE()