github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/mmx/sec_tabselect.asm (about)

     1  dnl  X86 MMX mpn_sec_tabselect.
     2  
     3  dnl  Contributed to the GNU project by Torbjörn Granlund.
     4  
     5  dnl  Copyright 2011-2013 Free Software Foundation, Inc.
     6  
     7  dnl  This file is part of the GNU MP Library.
     8  dnl
     9  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    10  dnl  it under the terms of either:
    11  dnl
    12  dnl    * the GNU Lesser General Public License as published by the Free
    13  dnl      Software Foundation; either version 3 of the License, or (at your
    14  dnl      option) any later version.
    15  dnl
    16  dnl  or
    17  dnl
    18  dnl    * the GNU General Public License as published by the Free Software
    19  dnl      Foundation; either version 2 of the License, or (at your option) any
    20  dnl      later version.
    21  dnl
    22  dnl  or both in parallel, as here.
    23  dnl
    24  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
    25  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  dnl  for more details.
    28  dnl
    29  dnl  You should have received copies of the GNU General Public License and the
    30  dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  dnl  see https://www.gnu.org/licenses/.
    32  
    33  include(`../config.m4')
    34  
    35  C			     cycles/limb     cycles/limb
    36  C			      ali,evn n	     unal,evn n
    37  C P5
    38  C P6 model 0-8,10-12
    39  C P6 model 9  (Banias)
    40  C P6 model 13 (Dothan)		 1.33		 1.87
    41  C P4 model 0  (Willamette)
    42  C P4 model 1  (?)
    43  C P4 model 2  (Northwood)	 2.1		 2.63
    44  C P4 model 3  (Prescott)
    45  C P4 model 4  (Nocona)		 1.7		 2.57
    46  C Intel Atom			 1.85		 2.7
    47  C AMD K6
    48  C AMD K7			 1.33		 1.33
    49  C AMD K8
    50  C AMD K10
    51  
    52  define(`rp',     `%edi')
    53  define(`tp',     `%esi')
    54  define(`n',      `%edx')
    55  define(`nents',  `%ecx')
    56  define(`which',  `')
    57  
    58  define(`i',      `%ebp')
    59  define(`j',      `%ebx')
    60  
    61  ASM_START()
    62  	TEXT
    63  	ALIGN(16)
    64  PROLOGUE(mpn_sec_tabselect)
    65  	push	%ebx
    66  	push	%esi
    67  	push	%edi
    68  	push	%ebp
    69  
    70  	mov	20(%esp), rp
    71  	mov	24(%esp), tp
    72  	mov	28(%esp), n
    73  	mov	32(%esp), nents
    74  
    75  	movd	36(%esp), %mm6
    76  	punpckldq %mm6, %mm6		C 2 copies of `which'
    77  
    78  	mov	$1, %ebx
    79  	movd	%ebx, %mm7
    80  	punpckldq %mm7, %mm7		C 2 copies of 1
    81  
    82  	mov	n, j
    83  	add	$-4, j
    84  	js	L(outer_end)
    85  
    86  L(outer_top):
    87  	mov	nents, i
    88  	mov	tp, %eax
    89  	pxor	%mm1, %mm1
    90  	pxor	%mm4, %mm4
    91  	pxor	%mm5, %mm5
    92  	ALIGN(16)
    93  L(top):	movq	%mm6, %mm0
    94  	pcmpeqd	%mm1, %mm0
    95  	paddd	%mm7, %mm1
    96  	movq	(tp), %mm2
    97  	movq	8(tp), %mm3
    98  	pand	%mm0, %mm2
    99  	pand	%mm0, %mm3
   100  	por	%mm2, %mm4
   101  	por	%mm3, %mm5
   102  	lea	(tp,n,4), tp
   103  	add	$-1, i
   104  	jne	L(top)
   105  
   106  	movq	%mm4, (rp)
   107  	movq	%mm5, 8(rp)
   108  
   109  	lea	16(%eax), tp
   110  	lea	16(rp), rp
   111  	add	$-4, j
   112  	jns	L(outer_top)
   113  L(outer_end):
   114  
   115  	test	$2, %dl
   116  	jz	L(b0x)
   117  
   118  L(b1x):	mov	nents, i
   119  	mov	tp, %eax
   120  	pxor	%mm1, %mm1
   121  	pxor	%mm4, %mm4
   122  	ALIGN(16)
   123  L(tp2):	movq	%mm6, %mm0
   124  	pcmpeqd	%mm1, %mm0
   125  	paddd	%mm7, %mm1
   126  	movq	(tp), %mm2
   127  	pand	%mm0, %mm2
   128  	por	%mm2, %mm4
   129  	lea	(tp,n,4), tp
   130  	add	$-1, i
   131  	jne	L(tp2)
   132  
   133  	movq	%mm4, (rp)
   134  
   135  	lea	8(%eax), tp
   136  	lea	8(rp), rp
   137  
   138  L(b0x):	test	$1, %dl
   139  	jz	L(b00)
   140  
   141  L(b01):	mov	nents, i
   142  	pxor	%mm1, %mm1
   143  	pxor	%mm4, %mm4
   144  	ALIGN(16)
   145  L(tp1):	movq	%mm6, %mm0
   146  	pcmpeqd	%mm1, %mm0
   147  	paddd	%mm7, %mm1
   148  	movd	(tp), %mm2
   149  	pand	%mm0, %mm2
   150  	por	%mm2, %mm4
   151  	lea	(tp,n,4), tp
   152  	add	$-1, i
   153  	jne	L(tp1)
   154  
   155  	movd	%mm4, (rp)
   156  
   157  L(b00):	pop	%ebp
   158  	pop	%edi
   159  	pop	%esi
   160  	pop	%ebx
   161  	emms
   162  	ret
   163  EPILOGUE()