github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/mmx/sec_tabselect.asm (about) 1 dnl X86 MMX mpn_sec_tabselect. 2 3 dnl Contributed to the GNU project by Torbjörn Granlund. 4 5 dnl Copyright 2011-2013 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb cycles/limb 36 C ali,evn n unal,evn n 37 C P5 38 C P6 model 0-8,10-12 39 C P6 model 9 (Banias) 40 C P6 model 13 (Dothan) 1.33 1.87 41 C P4 model 0 (Willamette) 42 C P4 model 1 (?) 43 C P4 model 2 (Northwood) 2.1 2.63 44 C P4 model 3 (Prescott) 45 C P4 model 4 (Nocona) 1.7 2.57 46 C Intel Atom 1.85 2.7 47 C AMD K6 48 C AMD K7 1.33 1.33 49 C AMD K8 50 C AMD K10 51 52 define(`rp', `%edi') 53 define(`tp', `%esi') 54 define(`n', `%edx') 55 define(`nents', `%ecx') 56 define(`which', `') 57 58 define(`i', `%ebp') 59 define(`j', `%ebx') 60 61 ASM_START() 62 TEXT 63 ALIGN(16) 64 PROLOGUE(mpn_sec_tabselect) 65 push %ebx 66 push %esi 67 push %edi 68 push %ebp 69 70 mov 20(%esp), rp 71 mov 24(%esp), tp 72 mov 28(%esp), n 73 mov 32(%esp), nents 74 75 movd 36(%esp), %mm6 76 punpckldq %mm6, %mm6 C 2 copies of `which' 77 78 mov $1, %ebx 79 movd %ebx, %mm7 80 punpckldq %mm7, %mm7 C 2 copies of 1 81 82 mov n, j 83 add $-4, j 84 js L(outer_end) 85 86 L(outer_top): 87 mov nents, i 88 mov tp, %eax 89 pxor %mm1, %mm1 90 pxor %mm4, %mm4 91 pxor %mm5, %mm5 92 ALIGN(16) 93 L(top): movq %mm6, %mm0 94 pcmpeqd %mm1, %mm0 95 paddd %mm7, %mm1 96 movq (tp), %mm2 97 movq 8(tp), %mm3 98 pand %mm0, %mm2 99 pand %mm0, %mm3 100 por %mm2, %mm4 101 por %mm3, %mm5 102 lea (tp,n,4), tp 103 add $-1, i 104 jne L(top) 105 106 movq %mm4, (rp) 107 movq %mm5, 8(rp) 108 109 lea 16(%eax), tp 110 lea 16(rp), rp 111 add $-4, j 112 jns L(outer_top) 113 L(outer_end): 114 115 test $2, %dl 116 jz L(b0x) 117 118 L(b1x): mov nents, i 119 mov tp, %eax 120 pxor %mm1, %mm1 121 pxor %mm4, %mm4 122 ALIGN(16) 123 L(tp2): movq %mm6, %mm0 124 pcmpeqd %mm1, %mm0 125 paddd %mm7, %mm1 126 movq (tp), %mm2 127 pand %mm0, %mm2 128 por %mm2, %mm4 129 lea (tp,n,4), tp 130 add $-1, i 131 jne L(tp2) 132 133 movq %mm4, (rp) 134 135 lea 8(%eax), tp 136 lea 8(rp), rp 137 138 L(b0x): test $1, %dl 139 jz L(b00) 140 141 L(b01): mov nents, i 142 pxor %mm1, %mm1 143 pxor %mm4, %mm4 144 ALIGN(16) 145 L(tp1): movq %mm6, %mm0 146 pcmpeqd %mm1, %mm0 147 paddd %mm7, %mm1 148 movd (tp), %mm2 149 pand %mm0, %mm2 150 por %mm2, %mm4 151 lea (tp,n,4), tp 152 add $-1, i 153 jne L(tp1) 154 155 movd %mm4, (rp) 156 157 L(b00): pop %ebp 158 pop %edi 159 pop %esi 160 pop %ebx 161 emms 162 ret 163 EPILOGUE()