github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/pentium/popcount.asm (about) 1 dnl Intel P5 mpn_popcount -- mpn bit population count. 2 3 dnl Copyright 2001, 2002, 2014, 2015 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C P5: 8.0 cycles/limb 35 36 37 C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); 38 C 39 C An arithmetic approach has been found to be slower than the table lookup, 40 C due to needing too many instructions. 41 42 C The slightly strange quoting here helps the renaming done by tune/many.pl. 43 deflit(TABLE_NAME, 44 m4_assert_defined(`GSYM_PREFIX') 45 GSYM_PREFIX`'mpn_popcount``'_table') 46 47 C FIXME: exporting the table to hamdist is incorrect as it hurt incremental 48 C linking. 49 50 RODATA 51 ALIGN(8) 52 GLOBL TABLE_NAME 53 TABLE_NAME: 54 forloop(i,0,255, 55 ` .byte m4_popcount(i) 56 ') 57 58 defframe(PARAM_SIZE,8) 59 defframe(PARAM_SRC, 4) 60 61 TEXT 62 ALIGN(8) 63 64 PROLOGUE(mpn_popcount) 65 deflit(`FRAME',0) 66 67 movl PARAM_SIZE, %ecx 68 pushl %esi FRAME_pushl() 69 70 ifdef(`PIC',` 71 pushl %ebx FRAME_pushl() 72 pushl %ebp FRAME_pushl() 73 ifdef(`DARWIN',` 74 shll %ecx C size in byte pairs 75 LEA( TABLE_NAME, %ebp) 76 movl PARAM_SRC, %esi 77 xorl %eax, %eax C total 78 xorl %ebx, %ebx C byte 79 xorl %edx, %edx C byte 80 ',` 81 call L(here) 82 L(here): 83 popl %ebp 84 shll %ecx C size in byte pairs 85 86 addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp 87 movl PARAM_SRC, %esi 88 89 xorl %eax, %eax C total 90 xorl %ebx, %ebx C byte 91 92 movl TABLE_NAME@GOT(%ebp), %ebp 93 xorl %edx, %edx C byte 94 ') 95 define(TABLE,`(%ebp,$1)') 96 ',` 97 dnl non-PIC 98 shll %ecx C size in byte pairs 99 movl PARAM_SRC, %esi 100 101 pushl %ebx FRAME_pushl() 102 xorl %eax, %eax C total 103 104 xorl %ebx, %ebx C byte 105 xorl %edx, %edx C byte 106 107 define(TABLE,`TABLE_NAME`'($1)') 108 ') 109 110 111 ALIGN(8) C necessary on P55 for claimed speed 112 L(top): 113 C eax total 114 C ebx byte 115 C ecx counter, 2*size to 2 116 C edx byte 117 C esi src 118 C edi 119 C ebp [PIC] table 120 121 addl %ebx, %eax 122 movb -1(%esi,%ecx,2), %bl 123 124 addl %edx, %eax 125 movb -2(%esi,%ecx,2), %dl 126 127 movb TABLE(%ebx), %bl 128 decl %ecx 129 130 movb TABLE(%edx), %dl 131 jnz L(top) 132 133 134 ifdef(`PIC',` 135 popl %ebp 136 ') 137 addl %ebx, %eax 138 popl %ebx 139 140 addl %edx, %eax 141 popl %esi 142 143 ret 144 145 EPILOGUE() 146 ASM_END()