github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/pentium4/lshiftc.asm (about) 1 dnl x86-64 mpn_lshiftc optimized for Pentium 4. 2 3 dnl Copyright 2003, 2005, 2007, 2008, 2010, 2012 Free Software Foundation, 4 dnl Inc. 5 6 dnl This file is part of the GNU MP Library. 7 dnl 8 dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 dnl it under the terms of either: 10 dnl 11 dnl * the GNU Lesser General Public License as published by the Free 12 dnl Software Foundation; either version 3 of the License, or (at your 13 dnl option) any later version. 14 dnl 15 dnl or 16 dnl 17 dnl * the GNU General Public License as published by the Free Software 18 dnl Foundation; either version 2 of the License, or (at your option) any 19 dnl later version. 20 dnl 21 dnl or both in parallel, as here. 22 dnl 23 dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 dnl for more details. 27 dnl 28 dnl You should have received copies of the GNU General Public License and the 29 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 dnl see https://www.gnu.org/licenses/. 31 32 include(`../config.m4') 33 34 35 C cycles/limb 36 C AMD K8,K9 ? 37 C AMD K10 ? 38 C Intel P4 4.15 39 C Intel core2 ? 40 C Intel corei ? 41 C Intel atom ? 42 C VIA nano ? 43 44 C INPUT PARAMETERS 45 define(`rp',`%rdi') 46 define(`up',`%rsi') 47 define(`n',`%rdx') 48 define(`cnt',`%cl') 49 50 ABI_SUPPORT(DOS64) 51 ABI_SUPPORT(STD64) 52 53 ASM_START() 54 TEXT 55 ALIGN(32) 56 PROLOGUE(mpn_lshiftc) 57 FUNC_ENTRY(4) 58 mov -8(up,n,8), %rax 59 pcmpeqd %mm6, %mm6 C 0xffff...fff 60 movd R32(%rcx), %mm4 61 neg R32(%rcx) C put rsh count in cl 62 and $63, R32(%rcx) 63 movd R32(%rcx), %mm5 64 65 lea 1(n), R32(%r8) 66 67 shr R8(%rcx), %rax C function return value 68 69 and $3, R32(%r8) 70 je L(rol) C jump for n = 3, 7, 11, ... 71 72 dec R32(%r8) 73 jne L(1) 74 C n = 4, 8, 12, ... 75 movq -8(up,n,8), %mm2 76 psllq %mm4, %mm2 77 movq -16(up,n,8), %mm0 78 pxor %mm6, %mm2 79 psrlq %mm5, %mm0 80 pandn %mm2, %mm0 81 movq %mm0, -8(rp,n,8) 82 dec n 83 jmp L(rol) 84 85 L(1): dec R32(%r8) 86 je L(1x) C jump for n = 1, 5, 9, 13, ... 87 C n = 2, 6, 10, 16, ... 88 movq -8(up,n,8), %mm2 89 psllq %mm4, %mm2 90 movq -16(up,n,8), %mm0 91 pxor %mm6, %mm2 92 psrlq %mm5, %mm0 93 pandn %mm2, %mm0 94 movq %mm0, -8(rp,n,8) 95 dec n 96 L(1x): 97 cmp $1, n 98 je L(ast) 99 movq -8(up,n,8), %mm2 100 psllq %mm4, %mm2 101 movq -16(up,n,8), %mm3 102 psllq %mm4, %mm3 103 movq -16(up,n,8), %mm0 104 movq -24(up,n,8), %mm1 105 pxor %mm6, %mm2 106 psrlq %mm5, %mm0 107 pandn %mm2, %mm0 108 pxor %mm6, %mm3 109 psrlq %mm5, %mm1 110 pandn %mm3, %mm1 111 movq %mm0, -8(rp,n,8) 112 movq %mm1, -16(rp,n,8) 113 sub $2, n 114 115 L(rol): movq -8(up,n,8), %mm2 116 psllq %mm4, %mm2 117 movq -16(up,n,8), %mm3 118 psllq %mm4, %mm3 119 120 sub $4, n 121 jb L(end) 122 ALIGN(32) 123 L(top): 124 C finish stuff from lsh block 125 movq 16(up,n,8), %mm0 126 pxor %mm6, %mm2 127 movq 8(up,n,8), %mm1 128 psrlq %mm5, %mm0 129 psrlq %mm5, %mm1 130 pandn %mm2, %mm0 131 pxor %mm6, %mm3 132 movq %mm0, 24(rp,n,8) 133 movq (up,n,8), %mm0 134 pandn %mm3, %mm1 135 movq %mm1, 16(rp,n,8) 136 movq -8(up,n,8), %mm1 137 C start two new rsh 138 psrlq %mm5, %mm0 139 psrlq %mm5, %mm1 140 141 C finish stuff from rsh block 142 movq 8(up,n,8), %mm2 143 pxor %mm6, %mm0 144 movq (up,n,8), %mm3 145 psllq %mm4, %mm2 146 psllq %mm4, %mm3 147 pandn %mm0, %mm2 148 pxor %mm6, %mm1 149 movq %mm2, 8(rp,n,8) 150 movq -8(up,n,8), %mm2 151 pandn %mm1, %mm3 152 movq %mm3, (rp,n,8) 153 movq -16(up,n,8), %mm3 154 C start two new lsh 155 sub $4, n 156 psllq %mm4, %mm2 157 psllq %mm4, %mm3 158 159 jae L(top) 160 161 L(end): pxor %mm6, %mm2 162 movq 8(up), %mm0 163 psrlq %mm5, %mm0 164 pandn %mm2, %mm0 165 pxor %mm6, %mm3 166 movq (up), %mm1 167 psrlq %mm5, %mm1 168 pandn %mm3, %mm1 169 movq %mm0, 16(rp) 170 movq %mm1, 8(rp) 171 172 L(ast): movq (up), %mm2 173 psllq %mm4, %mm2 174 pxor %mm6, %mm2 175 movq %mm2, (rp) 176 emms 177 FUNC_EXIT() 178 ret 179 EPILOGUE()