github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/k7/mmx/copyd.asm (about) 1 dnl AMD K7 mpn_copyd -- copy limb vector, decrementing. 2 3 dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C alignment dst/src, A=0mod8 N=4mod8 35 C A/A A/N N/A N/N 36 C K7 0.75 1.0 1.0 0.75 37 38 39 C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); 40 C 41 C The various comments in mpn/x86/k7/copyi.asm apply here too. 42 43 defframe(PARAM_SIZE,12) 44 defframe(PARAM_SRC, 8) 45 defframe(PARAM_DST, 4) 46 deflit(`FRAME',0) 47 48 dnl parameter space reused 49 define(SAVE_EBX,`PARAM_SIZE') 50 define(SAVE_ESI,`PARAM_SRC') 51 52 dnl minimum 5 since the unrolled code can't handle less than 5 53 deflit(UNROLL_THRESHOLD, 5) 54 55 TEXT 56 ALIGN(32) 57 PROLOGUE(mpn_copyd) 58 59 movl PARAM_SIZE, %ecx 60 movl %ebx, SAVE_EBX 61 62 movl PARAM_SRC, %eax 63 movl PARAM_DST, %edx 64 65 cmpl $UNROLL_THRESHOLD, %ecx 66 jae L(unroll) 67 68 orl %ecx, %ecx 69 jz L(simple_done) 70 71 L(simple): 72 C eax src 73 C ebx scratch 74 C ecx counter 75 C edx dst 76 C 77 C this loop is 2 cycles/limb 78 79 movl -4(%eax,%ecx,4), %ebx 80 movl %ebx, -4(%edx,%ecx,4) 81 decl %ecx 82 jnz L(simple) 83 84 L(simple_done): 85 movl SAVE_EBX, %ebx 86 ret 87 88 89 L(unroll): 90 movl %esi, SAVE_ESI 91 leal (%eax,%ecx,4), %ebx 92 leal (%edx,%ecx,4), %esi 93 94 andl %esi, %ebx 95 movl SAVE_ESI, %esi 96 subl $4, %ecx C size-4 97 98 testl $4, %ebx C testl to pad code closer to 16 bytes for L(top) 99 jz L(aligned) 100 101 C both src and dst unaligned, process one limb to align them 102 movl 12(%eax,%ecx,4), %ebx 103 movl %ebx, 12(%edx,%ecx,4) 104 decl %ecx 105 L(aligned): 106 107 108 ALIGN(16) 109 L(top): 110 C eax src 111 C ebx 112 C ecx counter, limbs 113 C edx dst 114 115 movq 8(%eax,%ecx,4), %mm0 116 movq (%eax,%ecx,4), %mm1 117 subl $4, %ecx 118 movq %mm0, 16+8(%edx,%ecx,4) 119 movq %mm1, 16(%edx,%ecx,4) 120 jns L(top) 121 122 123 C now %ecx is -4 to -1 representing respectively 0 to 3 limbs remaining 124 125 testb $2, %cl 126 jz L(finish_not_two) 127 128 movq 8(%eax,%ecx,4), %mm0 129 movq %mm0, 8(%edx,%ecx,4) 130 L(finish_not_two): 131 132 testb $1, %cl 133 jz L(done) 134 135 movl (%eax), %ebx 136 movl %ebx, (%edx) 137 138 L(done): 139 movl SAVE_EBX, %ebx 140 emms 141 ret 142 143 144 EPILOGUE()