github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/logops_n.asm (about) 1 dnl Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations. 2 3 dnl Copyright 2001, 2002 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C P5: 3.0 c/l and, ior, xor 35 C 3.5 c/l andn, iorn, nand, nior, xnor 36 37 38 define(M4_choose_op, 39 `ifdef(`OPERATION_$1',` 40 define(`M4_function', `mpn_$1') 41 define(`M4_want_pre', `$4') 42 define(`M4op', `$3') 43 define(`M4_want_post',`$2') 44 ')') 45 define(M4pre, `ifelse(M4_want_pre, yes,`$1')') 46 define(M4post,`ifelse(M4_want_post,yes,`$1')') 47 48 M4_choose_op( and_n, , andl, ) 49 M4_choose_op( andn_n, , andl, yes) 50 M4_choose_op( nand_n, yes, andl, ) 51 M4_choose_op( ior_n, , orl, ) 52 M4_choose_op( iorn_n, , orl, yes) 53 M4_choose_op( nior_n, yes, orl, ) 54 M4_choose_op( xor_n, , xorl, ) 55 M4_choose_op( xnor_n, yes, xorl, ) 56 57 ifdef(`M4_function',, 58 `m4_error(`Unrecognised or undefined OPERATION symbol 59 ')') 60 61 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 62 63 NAILS_SUPPORT(0-31) 64 65 66 C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size); 67 C 68 C Nothing complicated here, just some care to avoid data cache bank clashes 69 C and AGIs. 70 C 71 C We're one register short of being able to do a simple 4 loads, 2 ops, 2 72 C stores. Instead %ebp is juggled a bit and nops are introduced to keep the 73 C pairings as intended. An in-place operation would free up a register, for 74 C an 0.5 c/l speedup, if that's worth bothering with. 75 C 76 C This code seems best for P55 too. Data alignment is a big problem for MMX 77 C and the pairing restrictions on movq and integer instructions make life 78 C difficult. 79 80 defframe(PARAM_SIZE,16) 81 defframe(PARAM_YP, 12) 82 defframe(PARAM_XP, 8) 83 defframe(PARAM_WP, 4) 84 85 TEXT 86 ALIGN(8) 87 88 PROLOGUE(M4_function) 89 deflit(`FRAME',0) 90 91 pushl %ebx FRAME_pushl() 92 pushl %esi FRAME_pushl() 93 94 pushl %edi FRAME_pushl() 95 pushl %ebp FRAME_pushl() 96 97 movl PARAM_SIZE, %ecx 98 movl PARAM_XP, %ebx 99 100 movl PARAM_YP, %esi 101 movl PARAM_WP, %edi 102 103 shrl %ecx 104 jnc L(entry) 105 106 movl (%ebx,%ecx,8), %eax C risk of data cache bank clash here 107 movl (%esi,%ecx,8), %edx 108 109 M4pre(` notl_or_xorl_GMP_NUMB_MASK(%edx)') 110 111 M4op %edx, %eax 112 113 M4post(`xorl $GMP_NUMB_MASK, %eax') 114 orl %ecx, %ecx 115 116 movl %eax, (%edi,%ecx,8) 117 jz L(done) 118 119 jmp L(entry) 120 121 122 L(top): 123 C eax 124 C ebx xp 125 C ecx counter, limb pairs, decrementing 126 C edx 127 C esi yp 128 C edi wp 129 C ebp 130 131 M4op %ebp, %edx 132 nop 133 134 M4post(`xorl $GMP_NUMB_MASK, %eax') 135 M4post(`xorl $GMP_NUMB_MASK, %edx') 136 137 movl %eax, 4(%edi,%ecx,8) 138 movl %edx, (%edi,%ecx,8) 139 140 L(entry): 141 movl -4(%ebx,%ecx,8), %ebp 142 nop 143 144 movl -4(%esi,%ecx,8), %eax 145 movl -8(%esi,%ecx,8), %edx 146 147 M4pre(` xorl $GMP_NUMB_MASK, %eax') 148 M4pre(` xorl $GMP_NUMB_MASK, %edx') 149 150 M4op %ebp, %eax 151 movl -8(%ebx,%ecx,8), %ebp 152 153 decl %ecx 154 jnz L(top) 155 156 157 M4op %ebp, %edx 158 nop 159 160 M4post(`xorl $GMP_NUMB_MASK, %eax') 161 M4post(`xorl $GMP_NUMB_MASK, %edx') 162 163 movl %eax, 4(%edi,%ecx,8) 164 movl %edx, (%edi,%ecx,8) 165 166 167 L(done): 168 popl %ebp 169 popl %edi 170 171 popl %esi 172 popl %ebx 173 174 ret 175 176 EPILOGUE()