github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/copyd.asm (about) 1 dnl Intel Pentium mpn_copyd -- copy limb vector, decrementing. 2 3 dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C P5: 1.25 cycles/limb 35 36 37 C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); 38 C 39 C See comments in copyi.asm. 40 41 defframe(PARAM_SIZE,12) 42 defframe(PARAM_SRC, 8) 43 defframe(PARAM_DST, 4) 44 45 TEXT 46 ALIGN(8) 47 PROLOGUE(mpn_copyd) 48 deflit(`FRAME',0) 49 50 movl PARAM_SRC, %eax 51 movl PARAM_SIZE, %ecx 52 53 pushl %esi FRAME_pushl() 54 pushl %edi FRAME_pushl() 55 56 leal -4(%eax,%ecx,4), %eax C &src[size-1] 57 movl PARAM_DST, %edx 58 59 subl $7, %ecx C size-7 60 jle L(end) 61 62 movl 28-4(%edx,%ecx,4), %esi C prefetch cache, dst[size-1] 63 nop 64 65 L(top): 66 C eax src, decrementing 67 C ebx 68 C ecx counter, limbs 69 C edx dst 70 C esi scratch 71 C edi scratch 72 C ebp 73 74 movl 28-32(%edx,%ecx,4), %esi C prefetch dst cache line 75 subl $8, %ecx 76 77 movl (%eax), %esi C read words pairwise 78 movl -4(%eax), %edi 79 movl %esi, 56(%edx,%ecx,4) C store words pairwise 80 movl %edi, 52(%edx,%ecx,4) 81 82 movl -8(%eax), %esi 83 movl -12(%eax), %edi 84 movl %esi, 48(%edx,%ecx,4) 85 movl %edi, 44(%edx,%ecx,4) 86 87 movl -16(%eax), %esi 88 movl -20(%eax), %edi 89 movl %esi, 40(%edx,%ecx,4) 90 movl %edi, 36(%edx,%ecx,4) 91 92 movl -24(%eax), %esi 93 movl -28(%eax), %edi 94 movl %esi, 32(%edx,%ecx,4) 95 movl %edi, 28(%edx,%ecx,4) 96 97 leal -32(%eax), %eax 98 jg L(top) 99 100 101 L(end): 102 C ecx -7 to 0, representing respectively 0 to 7 limbs remaining 103 C eax src end 104 C edx dst, next location to store 105 106 addl $4, %ecx 107 jle L(no4) 108 109 movl (%eax), %esi 110 movl -4(%eax), %edi 111 movl %esi, 8(%edx,%ecx,4) 112 movl %edi, 4(%edx,%ecx,4) 113 114 movl -8(%eax), %esi 115 movl -12(%eax), %edi 116 movl %esi, (%edx,%ecx,4) 117 movl %edi, -4(%edx,%ecx,4) 118 119 subl $16, %eax 120 subl $4, %ecx 121 L(no4): 122 123 addl $2, %ecx 124 jle L(no2) 125 126 movl (%eax), %esi 127 movl -4(%eax), %edi 128 movl %esi, (%edx,%ecx,4) 129 movl %edi, -4(%edx,%ecx,4) 130 131 subl $8, %eax 132 subl $2, %ecx 133 L(no2): 134 135 jnz L(done) 136 137 movl (%eax), %ecx 138 movl %ecx, (%edx) C risk of cache bank clash here 139 140 L(done): 141 popl %edi 142 popl %esi 143 144 ret 145 146 EPILOGUE()