github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/fat/fat_entry.asm (about) 1 dnl x86 fat binary entrypoints. 2 3 dnl Contributed to the GNU project by Kevin Ryde (original x86_32 code) and 4 dnl Torbjorn Granlund (port to x86_64) 5 6 dnl Copyright 2003, 2009, 2011-2014, 2016 Free Software Foundation, Inc. 7 8 dnl This file is part of the GNU MP Library. 9 dnl 10 dnl The GNU MP Library is free software; you can redistribute it and/or modify 11 dnl it under the terms of either: 12 dnl 13 dnl * the GNU Lesser General Public License as published by the Free 14 dnl Software Foundation; either version 3 of the License, or (at your 15 dnl option) any later version. 16 dnl 17 dnl or 18 dnl 19 dnl * the GNU General Public License as published by the Free Software 20 dnl Foundation; either version 2 of the License, or (at your option) any 21 dnl later version. 22 dnl 23 dnl or both in parallel, as here. 24 dnl 25 dnl The GNU MP Library is distributed in the hope that it will be useful, but 26 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28 dnl for more details. 29 dnl 30 dnl You should have received copies of the GNU General Public License and the 31 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32 dnl see https://www.gnu.org/licenses/. 33 34 include(`../config.m4') 35 36 37 dnl Forcibly disable profiling. 38 dnl 39 dnl The entrypoints and inits are small enough not to worry about, the real 40 dnl routines arrived at will have any profiling. Also, the way the code 41 dnl here ends with a jump means we won't work properly with the 42 dnl "instrument" profiling scheme anyway. 43 44 define(`WANT_PROFILING',no) 45 46 47 dnl We define PRETEND_PIC as a helper symbol, the use it for suppressing 48 dnl normal, fast call code, since that triggers problems on Darwin, OpenBSD 49 dnl and some versions of GNU/Linux. This will go away when symbol hiding is 50 dnl finished. 51 52 ifdef(`DARWIN', 53 `define(`PRETEND_PIC')') 54 ifdef(`OPENBSD', 55 `define(`PRETEND_PIC')') 56 ifdef(`LINUX', 57 `define(`PRETEND_PIC')') 58 ifdef(`PIC', 59 `define(`PRETEND_PIC')') 60 61 ABI_SUPPORT(DOS64) 62 ABI_SUPPORT(STD64) 63 64 TEXT 65 66 dnl Usage: FAT_ENTRY(name, offset) 67 dnl 68 dnl Emit a fat binary entrypoint function of the given name. This is the 69 dnl normal entry for applications, eg. __gmpn_add_n. 70 dnl 71 dnl The code simply jumps through the function pointer in __gmpn_cpuvec at 72 dnl the given "offset" (in bytes). 73 dnl 74 dnl For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be 75 dnl fine for all x86s. 76 dnl 77 dnl For ELF/DARWIN PIC, the jumps are 20 bytes each, and are best aligned to 78 dnl 16 to ensure at least the first two instructions don't cross a cache line 79 dnl boundary. 80 dnl 81 dnl For DOS64, the jumps are 6 bytes. The same form works also for GNU/Linux 82 dnl (at least with certain assembler/linkers) but FreeBSD 8.2 crashes. Not 83 dnl tested on Darwin, Slowaris, NetBSD, etc. 84 dnl 85 dnl Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE 86 dnl grepping in configure, stopping that code trying to eval something with 87 dnl $1 in it. 88 89 define(FAT_ENTRY, 90 m4_assert_numargs(2) 91 `ifdef(`HOST_DOS64', 92 ` ALIGN(8) 93 `'PROLOGUE($1) 94 jmp *$2+GSYM_PREFIX`'__gmpn_cpuvec(%rip) 95 EPILOGUE() 96 ', 97 ` ALIGN(ifdef(`PIC',16,8)) 98 `'PROLOGUE($1) 99 ifdef(`PRETEND_PIC', 100 ` LEA( GSYM_PREFIX`'__gmpn_cpuvec, %rax) 101 jmp *$2(%rax) 102 ',`dnl non-PIC 103 jmp *GSYM_PREFIX`'__gmpn_cpuvec+$2 104 ') 105 EPILOGUE() 106 ')') 107 108 109 dnl FAT_ENTRY for each CPUVEC_FUNCS_LIST 110 dnl 111 112 define(`CPUVEC_offset',0) 113 foreach(i, 114 `FAT_ENTRY(MPN(i),CPUVEC_offset) 115 define(`CPUVEC_offset',eval(CPUVEC_offset + 8))', 116 CPUVEC_FUNCS_LIST) 117 118 119 dnl Usage: FAT_INIT(name, offset) 120 dnl 121 dnl Emit a fat binary initializer function of the given name. These 122 dnl functions are the initial values for the pointers in __gmpn_cpuvec. 123 dnl 124 dnl The code simply calls __gmpn_cpuvec_init, and then jumps back through 125 dnl the __gmpn_cpuvec pointer, at the given "offset" (in bytes). 126 dnl __gmpn_cpuvec_init will have stored the address of the selected 127 dnl implementation there. 128 dnl 129 dnl Only one of these routines will be executed, and only once, since after 130 dnl that all the __gmpn_cpuvec pointers go to real routines. So there's no 131 dnl need for anything special here, just something small and simple. To 132 dnl keep code size down, "fat_init" is a shared bit of code, arrived at 133 dnl with the offset in %al. %al is used since the movb instruction is 2 134 dnl bytes where %eax would be 4. 135 dnl 136 dnl Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the 137 dnl HAVE_NATIVE grepping in configure, preventing that code trying to eval 138 dnl something with $1 in it. 139 dnl 140 dnl We need to preserve parameter registers over the __gmpn_cpuvec_init call 141 142 define(FAT_INIT, 143 m4_assert_numargs(2) 144 `PROLOGUE($1) 145 mov $`'$2, %al 146 jmp L(fat_init) 147 EPILOGUE() 148 ') 149 150 dnl FAT_INIT for each CPUVEC_FUNCS_LIST 151 dnl 152 153 define(`CPUVEC_offset',0) 154 foreach(i, 155 `FAT_INIT(MPN(i`'_init),CPUVEC_offset) 156 define(`CPUVEC_offset',eval(CPUVEC_offset + 1))', 157 CPUVEC_FUNCS_LIST) 158 159 L(fat_init): 160 C al __gmpn_cpuvec byte offset 161 162 movzbl %al, %eax 163 IFSTD(` push %rdi ') 164 IFSTD(` push %rsi ') 165 push %rdx 166 push %rcx 167 push %r8 168 push %r9 169 push %rax 170 CALL( __gmpn_cpuvec_init) 171 pop %rax 172 pop %r9 173 pop %r8 174 pop %rcx 175 pop %rdx 176 IFSTD(` pop %rsi ') 177 IFSTD(` pop %rdi ') 178 ifdef(`PRETEND_PIC',` 179 LEA( GSYM_PREFIX`'__gmpn_cpuvec, %r10) 180 jmp *(%r10,%rax,8) 181 ',`dnl non-PIC 182 jmp *GSYM_PREFIX`'__gmpn_cpuvec(,%rax,8) 183 ') 184 185 186 C long __gmpn_cpuid (char dst[12], int id); 187 C 188 C This is called only 3 times, so just something simple and compact is fine. 189 C 190 C The rcx/ecx zeroing here is needed for the BMI2 check. 191 192 define(`rp', `%rdi') 193 define(`idx', `%rsi') 194 195 PROLOGUE(__gmpn_cpuid) 196 FUNC_ENTRY(2) 197 mov %rbx, %r8 198 mov R32(idx), R32(%rax) 199 xor %ecx, %ecx 200 cpuid 201 mov %ebx, (rp) 202 mov %edx, 4(rp) 203 mov %ecx, 8(rp) 204 mov %r8, %rbx 205 FUNC_EXIT() 206 ret 207 EPILOGUE()