github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86_64/x86_64-defs.m4 (about) 1 divert(-1) 2 3 dnl m4 macros for amd64 assembler. 4 5 dnl Copyright 1999-2005, 2008, 2009, 2011-2013 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 34 dnl Usage: CPUVEC_FUNCS_LIST 35 dnl 36 dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the 37 dnl order they appear in that structure. 38 39 define(CPUVEC_FUNCS_LIST, 40 ``add_n', 41 `addlsh1_n', 42 `addlsh2_n', 43 `addmul_1', 44 `addmul_2', 45 `bdiv_dbm1c', 46 `cnd_add_n', 47 `cnd_sub_n', 48 `com', 49 `copyd', 50 `copyi', 51 `divexact_1', 52 `divrem_1', 53 `gcd_1', 54 `lshift', 55 `lshiftc', 56 `mod_1', 57 `mod_1_1p', 58 `mod_1_1p_cps', 59 `mod_1s_2p', 60 `mod_1s_2p_cps', 61 `mod_1s_4p', 62 `mod_1s_4p_cps', 63 `mod_34lsub1', 64 `modexact_1c_odd', 65 `mul_1', 66 `mul_basecase', 67 `mullo_basecase', 68 `preinv_divrem_1', 69 `preinv_mod_1', 70 `redc_1', 71 `redc_2', 72 `rshift', 73 `sqr_basecase', 74 `sub_n', 75 `sublsh1_n', 76 `submul_1'') 77 78 79 dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo) 80 dnl 81 dnl In the amd64 code we use explicit TEXT and ALIGN() calls in the code, 82 dnl since different alignments are wanted in various circumstances. So for 83 dnl instance, 84 dnl 85 dnl TEXT 86 dnl ALIGN(16) 87 dnl PROLOGUE(mpn_add_n) 88 dnl ... 89 dnl EPILOGUE() 90 91 define(`PROLOGUE_cpu', 92 m4_assert_numargs(1) 93 ` GLOBL $1 94 TYPE($1,`function') 95 $1: 96 ') 97 98 99 dnl Usage: ASSERT([cond][,instructions]) 100 dnl 101 dnl If WANT_ASSERT is 1, output the given instructions and expect the given 102 dnl flags condition to then be satisfied. For example, 103 dnl 104 dnl ASSERT(ne, `cmpq %rax, %rbx') 105 dnl 106 dnl The instructions can be omitted to just assert a flags condition with 107 dnl no extra calculation. For example, 108 dnl 109 dnl ASSERT(nc) 110 dnl 111 dnl When `instructions' is not empty, a pushfq/popfq is added for 112 dnl convenience to preserve the flags, but the instructions themselves must 113 dnl preserve any registers that matter. 114 dnl 115 dnl The condition can be omitted to just output the given instructions when 116 dnl assertion checking is wanted. In this case the pushf/popf is omitted. 117 dnl For example, 118 dnl 119 dnl ASSERT(, `movq %rax, VAR_KEEPVAL') 120 121 define(ASSERT, 122 m4_assert_numargs_range(1,2) 123 m4_assert_defined(`WANT_ASSERT') 124 `ifelse(WANT_ASSERT,1, 125 `ifelse(`$1',, 126 ` $2', 127 `ifelse(`$2',,, 128 ` pushfq') 129 $2 130 `j$1' L(ASSERT_ok`'ASSERT_counter) 131 ud2 C assertion failed 132 L(ASSERT_ok`'ASSERT_counter): 133 ifelse(`$2',,,` popfq') 134 define(`ASSERT_counter',incr(ASSERT_counter))')')') 135 136 define(ASSERT_counter,1) 137 138 define(`LEA',`dnl 139 ifdef(`PIC', 140 `mov $1@GOTPCREL(%rip), $2' 141 , 142 `movabs `$'$1, $2') 143 ') 144 145 146 define(`DEF_OBJECT', 147 m4_assert_numargs_range(1,2) 148 ` RODATA 149 ALIGN(ifelse($#,1,2,$2)) 150 $1: 151 ') 152 153 define(`END_OBJECT', 154 m4_assert_numargs(1) 155 ` SIZE(`$1',.-`$1')') 156 157 158 define(`R32', 159 `ifelse($1,`%rax',`%eax', 160 $1,`%rbx',`%ebx', 161 $1,`%rcx',`%ecx', 162 $1,`%rdx',`%edx', 163 $1,`%rsi',`%esi', 164 $1,`%rdi',`%edi', 165 $1,`%rbp',`%ebp', 166 $1,`%r8',`%r8d', 167 $1,`%r9',`%r9d', 168 $1,`%r10',`%r10d', 169 $1,`%r11',`%r11d', 170 $1,`%r12',`%r12d', 171 $1,`%r13',`%r13d', 172 $1,`%r14',`%r14d', 173 $1,`%r15',`%r15d')') 174 define(`R8', 175 `ifelse($1,`%rax',`%al', 176 $1,`%rbx',`%bl', 177 $1,`%rcx',`%cl', 178 $1,`%rdx',`%dl', 179 $1,`%rsi',`%sil', 180 $1,`%rdi',`%dil', 181 $1,`%rbp',`%bpl', 182 $1,`%r8',`%r8b', 183 $1,`%r9',`%r9b', 184 $1,`%r10',`%r10b', 185 $1,`%r11',`%r11b', 186 $1,`%r12',`%r12b', 187 $1,`%r13',`%r13b', 188 $1,`%r14',`%r14b', 189 $1,`%r15',`%r15b')') 190 191 192 dnl Usage: CALL(funcname) 193 dnl 194 195 define(`CALL',`dnl 196 ifdef(`PIC', 197 `call GSYM_PREFIX`'$1@PLT' 198 , 199 `call GSYM_PREFIX`'$1' 200 )') 201 202 203 define(`JUMPTABSECT', `.section .data.rel.ro.local,"aw",@progbits') 204 205 206 dnl Usage: JMPENT(targlabel,tablabel) 207 208 define(`JMPENT',`dnl 209 ifdef(`PIC', 210 `.long $1-$2'dnl 211 , 212 `.quad $1'dnl 213 )') 214 215 216 dnl These macros are defined just for DOS64, where they provide calling 217 dnl sequence glue code. 218 219 define(`FUNC_ENTRY',`') 220 define(`FUNC_EXIT',`') 221 222 223 dnl Target ABI macros. 224 225 define(`IFDOS', `') 226 define(`IFSTD', `$1') 227 define(`IFELF', `$1') 228 229 230 dnl Usage: PROTECT(symbol) 231 dnl 232 dnl Used for private GMP symbols that should never be overridden by users. 233 dnl This can save reloc entries and improve shlib sharing as well as 234 dnl application startup times 235 236 define(`PROTECT', `.hidden $1') 237 238 239 dnl Usage: x86_lookup(target, key,value, key,value, ...) 240 dnl 241 dnl Look for `target' among the `key' parameters. 242 dnl 243 dnl x86_lookup expands to the corresponding `value', or generates an error 244 dnl if `target' isn't found. 245 246 define(x86_lookup, 247 m4_assert_numargs_range(1,999) 248 `ifelse(eval($#<3),1, 249 `m4_error(`unrecognised part of x86 instruction: $1 250 ')', 251 `ifelse(`$1',`$2', `$3', 252 `x86_lookup(`$1',shift(shift(shift($@))))')')') 253 254 255 dnl Usage: x86_opcode_regxmm(reg) 256 dnl 257 dnl Validate the given xmm register, and return its number, 0 to 7. 258 259 define(x86_opcode_regxmm, 260 m4_assert_numargs(1) 261 `x86_lookup(`$1',x86_opcode_regxmm_list)') 262 263 define(x86_opcode_regxmm_list, 264 ``%xmm0',0, 265 `%xmm1',1, 266 `%xmm2',2, 267 `%xmm3',3, 268 `%xmm4',4, 269 `%xmm5',5, 270 `%xmm6',6, 271 `%xmm7',7, 272 `%xmm8',8, 273 `%xmm9',9, 274 `%xmm10',10, 275 `%xmm11',11, 276 `%xmm12',12, 277 `%xmm13',13, 278 `%xmm14',14, 279 `%xmm15',15') 280 281 dnl Usage: palignr($imm,%srcreg,%dstreg) 282 dnl 283 dnl Emit a palignr instruction, using a .byte sequence, since obsolete but 284 dnl still distributed versions of gas don't know SSSE3 instructions. 285 286 define(`palignr', 287 m4_assert_numargs(3) 288 `.byte 0x66,dnl 289 ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1, 290 `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl 291 0x0f,0x3a,0x0f,dnl 292 eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl 293 substr($1,1)') 294 295 296 dnl Usage 297 dnl 298 dnl regnum(op) raw operand index (so slightly misnamed) 299 dnl regnumh(op) high bit of register operand nimber 300 dnl ix(op) 0 for reg operand, 1 for plain pointer operand. 301 dnl 302 303 define(`regnum',`x86_lookup(`$1',oplist)') 304 define(`regnumh',`eval(regnum($1)/8 & 1)') 305 define(`ix',`eval(regnum($1)/16)') 306 define(`oplist', 307 ``%rax', 0, `%rcx', 1, `%rdx', 2, `%rbx', 3, 308 `%rsp', 4, `%rbp', 5, `%rsi', 6, `%rdi', 7, 309 `%r8', 8, `%r9', 9, `%r10', 10, `%r11', 11, 310 `%r12', 12, `%r13', 13, `%r14', 14, `%r15', 15, 311 `(%rax)',16, `(%rcx)',17, `(%rdx)',18, `(%rbx)',19, 312 `(%rsp)',20, `(%rbp)',21, `(%rsi)',22, `(%rdi)',23, 313 `(%r8)', 24, `(%r9)', 25, `(%r10)',26, `(%r11)',27, 314 `(%r12)',28, `(%r13)',29, `(%r14)',30, `(%r15)',31') 315 316 317 dnl Usage 318 dnl 319 dnl mulx(reg1,reg2,reg3) 320 dnl 321 dnl or 322 dnl 323 dnl mulx((reg1),reg2,reg3) 324 dnl 325 dnl where reg1 is any register but rsp,rbp,r12,r13, or 326 dnl 327 dnl mulx(off,(reg1),reg2,reg3) 328 dnl 329 dnl where reg1 is any register but rsp,r12. 330 dnl 331 dnl The exceptions are due to special coding needed for some registers; rsp 332 dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the 333 dnl offset-less form. 334 dnl 335 dnl Other addressing forms are not handled. Invalid forms are not properly 336 dnl detected. Offsets that don't fit one byte are not handled correctly. 337 338 define(`mulx',`dnl 339 .byte 0xc4`'dnl 340 ifelse(`$#',3,`dnl 341 ,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl 342 ,eval(0xfb-8*regnum($2))`'dnl 343 ,0xf6`'dnl 344 ,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl 345 ',`$#',4,`dnl 346 ,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl 347 ,eval(0xfb-8*regnum($3))`'dnl 348 ,0xf6`'dnl 349 ,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl 350 ,eval(($1 + 256) % 256)`'dnl 351 ')') 352 353 dnl Usage 354 dnl 355 dnl adcx(reg1,reg2) 356 dnl adox(reg1,reg2) 357 dnl 358 dnl or 359 dnl 360 dnl adcx((reg1),reg2) 361 dnl adox((reg1),reg2) 362 dnl 363 dnl where reg1 is any register but rsp,rbp,r12,r13, or 364 dnl 365 dnl adcx(off,(reg1),reg2) 366 dnl adox(off,(reg1),reg2) 367 dnl 368 dnl where reg1 is any register but rsp,r12. 369 dnl 370 dnl The exceptions are due to special coding needed for some registers; rsp 371 dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the 372 dnl offset-less form. 373 dnl 374 dnl Other addressing forms are not handled. Invalid forms are not properly 375 dnl detected. Offsets that don't fit one byte are not handled correctly. 376 377 define(`adx_helper',`dnl 378 ,eval(0x48+regnumh($1)+4*regnumh($2))`'dnl 379 ,0x0f`'dnl 380 ,0x38`'dnl 381 ,0xf6`'dnl 382 ') 383 384 define(`adx',`dnl 385 ifelse(`$#',2,`dnl 386 adx_helper($1,$2)dnl 387 ,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($2))-0xc0*ix($1))`'dnl 388 ',`$#',3,`dnl 389 adx_helper($2,$3)dnl 390 ,eval(0x40+(7 & regnum($2))+8*(7 & regnum($3)))`'dnl 391 ,eval(($1 + 256) % 256)`'dnl 392 ')') 393 394 define(`adcx',`dnl 395 .byte 0x66`'dnl 396 adx($@)') 397 398 define(`adox',`dnl 399 .byte 0xf3`'dnl 400 adx($@)') 401 402 divert`'dnl