github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/core2/lshiftc.asm (about) 1 dnl x86-64 mpn_lshiftc optimized for "Core 2". 2 3 dnl Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C cycles/limb 35 C AMD K8,K9 ? 36 C AMD K10 ? 37 C Intel P4 ? 38 C Intel core2 1.5 39 C Intel NHM 2.25 (up to about n = 260, then 1.875) 40 C Intel SBR 2.25 41 C Intel atom ? 42 C VIA nano ? 43 44 45 C INPUT PARAMETERS 46 define(`rp', `%rdi') 47 define(`up', `%rsi') 48 define(`n', `%rdx') 49 define(`cnt', `%rcx') 50 51 ABI_SUPPORT(DOS64) 52 ABI_SUPPORT(STD64) 53 54 ASM_START() 55 TEXT 56 ALIGN(16) 57 PROLOGUE(mpn_lshiftc) 58 FUNC_ENTRY(4) 59 lea -8(rp,n,8), rp 60 lea -8(up,n,8), up 61 62 mov R32(%rdx), R32(%rax) 63 and $3, R32(%rax) 64 jne L(nb00) 65 L(b00): C n = 4, 8, 12, ... 66 mov (up), %r10 67 mov -8(up), %r11 68 xor R32(%rax), R32(%rax) 69 shld R8(cnt), %r10, %rax 70 mov -16(up), %r8 71 lea 24(rp), rp 72 sub $4, n 73 jmp L(00) 74 75 L(nb00):C n = 1, 5, 9, ... 76 cmp $2, R32(%rax) 77 jae L(nb01) 78 L(b01): mov (up), %r9 79 xor R32(%rax), R32(%rax) 80 shld R8(cnt), %r9, %rax 81 sub $2, n 82 jb L(le1) 83 mov -8(up), %r10 84 mov -16(up), %r11 85 lea -8(up), up 86 lea 16(rp), rp 87 jmp L(01) 88 L(le1): shl R8(cnt), %r9 89 not %r9 90 mov %r9, (rp) 91 FUNC_EXIT() 92 ret 93 94 L(nb01):C n = 2, 6, 10, ... 95 jne L(b11) 96 L(b10): mov (up), %r8 97 mov -8(up), %r9 98 xor R32(%rax), R32(%rax) 99 shld R8(cnt), %r8, %rax 100 sub $3, n 101 jb L(le2) 102 mov -16(up), %r10 103 lea -16(up), up 104 lea 8(rp), rp 105 jmp L(10) 106 L(le2): shld R8(cnt), %r9, %r8 107 not %r8 108 mov %r8, (rp) 109 shl R8(cnt), %r9 110 not %r9 111 mov %r9, -8(rp) 112 FUNC_EXIT() 113 ret 114 115 ALIGN(16) C performance critical! 116 L(b11): C n = 3, 7, 11, ... 117 mov (up), %r11 118 mov -8(up), %r8 119 xor R32(%rax), R32(%rax) 120 shld R8(cnt), %r11, %rax 121 mov -16(up), %r9 122 lea -24(up), up 123 sub $4, n 124 jb L(end) 125 126 ALIGN(16) 127 L(top): shld R8(cnt), %r8, %r11 128 mov (up), %r10 129 not %r11 130 mov %r11, (rp) 131 L(10): shld R8(cnt), %r9, %r8 132 mov -8(up), %r11 133 not %r8 134 mov %r8, -8(rp) 135 L(01): shld R8(cnt), %r10, %r9 136 mov -16(up), %r8 137 not %r9 138 mov %r9, -16(rp) 139 L(00): shld R8(cnt), %r11, %r10 140 mov -24(up), %r9 141 not %r10 142 mov %r10, -24(rp) 143 add $-32, up 144 lea -32(rp), rp 145 sub $4, n 146 jnc L(top) 147 148 L(end): shld R8(cnt), %r8, %r11 149 not %r11 150 mov %r11, (rp) 151 shld R8(cnt), %r9, %r8 152 not %r8 153 mov %r8, -8(rp) 154 shl R8(cnt), %r9 155 not %r9 156 mov %r9, -16(rp) 157 FUNC_EXIT() 158 ret 159 EPILOGUE()