github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/com.asm (about) 1 dnl Intel Pentium mpn_com -- mpn ones complement. 2 3 dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C P5: 1.75 cycles/limb 35 36 37 NAILS_SUPPORT(0-31) 38 39 40 C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); 41 C 42 C This code is similar to mpn_copyi, basically there's just some "xorl 43 C $GMP_NUMB_MASK"s inserted. 44 C 45 C Alternatives: 46 C 47 C On P55 some MMX code could be 1.25 c/l (8 limb unrolled) if src and dst 48 C are the same alignment mod 8, but it doesn't seem worth the trouble for 49 C just that case (there'd need to be some plain integer available too for 50 C the unaligned case). 51 52 defframe(PARAM_SIZE,12) 53 defframe(PARAM_SRC, 8) 54 defframe(PARAM_DST, 4) 55 56 TEXT 57 ALIGN(8) 58 PROLOGUE(mpn_com) 59 deflit(`FRAME',0) 60 61 movl PARAM_SRC, %eax 62 movl PARAM_SIZE, %ecx 63 64 pushl %esi FRAME_pushl() 65 pushl %edi FRAME_pushl() 66 67 leal (%eax,%ecx,4), %eax 68 xorl $-1, %ecx C -size-1 69 70 movl PARAM_DST, %edx 71 addl $8, %ecx C -size+7 72 73 jns L(end) 74 75 movl (%edx), %esi C fetch destination cache line 76 nop 77 78 L(top): 79 C eax &src[size] 80 C ebx 81 C ecx counter, limbs, negative 82 C edx dst, incrementing 83 C esi scratch 84 C edi scratch 85 C ebp 86 87 movl 28(%edx), %esi C destination prefetch 88 addl $32, %edx 89 90 movl -28(%eax,%ecx,4), %esi 91 movl -24(%eax,%ecx,4), %edi 92 xorl $GMP_NUMB_MASK, %esi 93 xorl $GMP_NUMB_MASK, %edi 94 movl %esi, -32(%edx) 95 movl %edi, -28(%edx) 96 97 movl -20(%eax,%ecx,4), %esi 98 movl -16(%eax,%ecx,4), %edi 99 xorl $GMP_NUMB_MASK, %esi 100 xorl $GMP_NUMB_MASK, %edi 101 movl %esi, -24(%edx) 102 movl %edi, -20(%edx) 103 104 movl -12(%eax,%ecx,4), %esi 105 movl -8(%eax,%ecx,4), %edi 106 xorl $GMP_NUMB_MASK, %esi 107 xorl $GMP_NUMB_MASK, %edi 108 movl %esi, -16(%edx) 109 movl %edi, -12(%edx) 110 111 movl -4(%eax,%ecx,4), %esi 112 movl (%eax,%ecx,4), %edi 113 xorl $GMP_NUMB_MASK, %esi 114 xorl $GMP_NUMB_MASK, %edi 115 movl %esi, -8(%edx) 116 movl %edi, -4(%edx) 117 118 addl $8, %ecx 119 js L(top) 120 121 122 L(end): 123 C eax &src[size] 124 C ecx 0 to 7, representing respectively 7 to 0 limbs remaining 125 C edx dst, next location to store 126 127 subl $4, %ecx 128 nop 129 130 jns L(no4) 131 132 movl -12(%eax,%ecx,4), %esi 133 movl -8(%eax,%ecx,4), %edi 134 xorl $GMP_NUMB_MASK, %esi 135 xorl $GMP_NUMB_MASK, %edi 136 movl %esi, (%edx) 137 movl %edi, 4(%edx) 138 139 movl -4(%eax,%ecx,4), %esi 140 movl (%eax,%ecx,4), %edi 141 xorl $GMP_NUMB_MASK, %esi 142 xorl $GMP_NUMB_MASK, %edi 143 movl %esi, 8(%edx) 144 movl %edi, 12(%edx) 145 146 addl $16, %edx 147 addl $4, %ecx 148 L(no4): 149 150 subl $2, %ecx 151 nop 152 153 jns L(no2) 154 155 movl -4(%eax,%ecx,4), %esi 156 movl (%eax,%ecx,4), %edi 157 xorl $GMP_NUMB_MASK, %esi 158 xorl $GMP_NUMB_MASK, %edi 159 movl %esi, (%edx) 160 movl %edi, 4(%edx) 161 162 addl $8, %edx 163 addl $2, %ecx 164 L(no2): 165 166 popl %edi 167 jnz L(done) 168 169 movl -4(%eax), %ecx 170 171 xorl $GMP_NUMB_MASK, %ecx 172 popl %esi 173 174 movl %ecx, (%edx) 175 ret 176 177 L(done): 178 popl %esi 179 ret 180 181 EPILOGUE()