github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/x86/x86-defs.m4 (about) 1 divert(-1) 2 3 dnl m4 macros for x86 assembler. 4 5 dnl Copyright 1999-2003, 2007, 2010, 2012, 2014 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 34 dnl Notes: 35 dnl 36 dnl m4 isn't perfect for processing BSD style x86 assembler code, the main 37 dnl problems are, 38 dnl 39 dnl 1. Doing define(foo,123) and then using foo in an addressing mode like 40 dnl foo(%ebx) expands as a macro rather than a constant. This is worked 41 dnl around by using deflit() from asm-defs.m4, instead of define(). 42 dnl 43 dnl 2. Immediates in macro definitions need a space or `' to stop the $ 44 dnl looking like a macro parameter. For example, 45 dnl 46 dnl define(foo, `mov $ 123, %eax') 47 dnl 48 dnl This is only a problem in macro definitions, not in ordinary text, 49 dnl and not in macro parameters like text passed to forloop() or ifdef(). 50 51 52 deflit(GMP_LIMB_BYTES, 4) 53 54 55 dnl Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL. We 56 dnl undefine PIC since we don't need to be position independent in this 57 dnl case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc. 58 59 ifdef(`DLL_EXPORT',`undefine(`PIC')') 60 61 62 dnl Usage: CPUVEC_FUNCS_LIST 63 dnl 64 dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the 65 dnl order they appear in that structure. 66 67 define(CPUVEC_FUNCS_LIST, 68 ``add_n', 69 `addlsh1_n', 70 `addlsh2_n', 71 `addmul_1', 72 `addmul_2', 73 `bdiv_dbm1c', 74 `cnd_add_n', 75 `cnd_sub_n', 76 `com', 77 `copyd', 78 `copyi', 79 `divexact_1', 80 `divrem_1', 81 `gcd_1', 82 `lshift', 83 `lshiftc', 84 `mod_1', 85 `mod_1_1p', 86 `mod_1_1p_cps', 87 `mod_1s_2p', 88 `mod_1s_2p_cps', 89 `mod_1s_4p', 90 `mod_1s_4p_cps', 91 `mod_34lsub1', 92 `modexact_1c_odd', 93 `mul_1', 94 `mul_basecase', 95 `mullo_basecase', 96 `preinv_divrem_1', 97 `preinv_mod_1', 98 `redc_1', 99 `redc_2', 100 `rshift', 101 `sqr_basecase', 102 `sub_n', 103 `sublsh1_n', 104 `submul_1'') 105 106 107 dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo) 108 dnl 109 dnl In the x86 code we use explicit TEXT and ALIGN() calls in the code, 110 dnl since different alignments are wanted in various circumstances. So for 111 dnl instance, 112 dnl 113 dnl TEXT 114 dnl ALIGN(16) 115 dnl PROLOGUE(mpn_add_n) 116 dnl ... 117 dnl EPILOGUE() 118 119 define(`PROLOGUE_cpu', 120 m4_assert_numargs(1) 121 m4_assert_defined(`WANT_PROFILING') 122 `GLOBL $1 123 TYPE($1,`function') 124 COFF_TYPE($1) 125 $1: 126 ifelse(WANT_PROFILING,`prof', ` call_mcount') 127 ifelse(WANT_PROFILING,`gprof', ` call_mcount') 128 ifelse(WANT_PROFILING,`instrument',` call_instrument(enter)') 129 ') 130 131 132 dnl Usage: COFF_TYPE(GSYM_PREFIX`'foo) 133 dnl 134 dnl Emit COFF style ".def ... .endef" type information for a function, when 135 dnl supported. The argument should include any GSYM_PREFIX. 136 dnl 137 dnl See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE. 138 139 define(COFF_TYPE, 140 m4_assert_numargs(1) 141 m4_assert_defined(`HAVE_COFF_TYPE') 142 `ifelse(HAVE_COFF_TYPE,yes, 143 `.def $1 144 .scl 2 145 .type 32 146 .endef')') 147 148 149 dnl Usage: call_mcount 150 dnl 151 dnl For `gprof' style profiling, %ebp is setup as a frame pointer. None of 152 dnl the assembler routines use %ebp this way, so it's done only for the 153 dnl benefit of mcount. glibc sysdeps/i386/i386-mcount.S shows how mcount 154 dnl gets the current function from (%esp) and the parent from 4(%ebp). 155 dnl 156 dnl For `prof' style profiling gcc generates mcount calls without setting 157 dnl up %ebp, and the same is done here. 158 159 define(`call_mcount', 160 m4_assert_numargs(-1) 161 m4_assert_defined(`WANT_PROFILING') 162 m4_assert_defined(`MCOUNT_PIC_REG') 163 m4_assert_defined(`MCOUNT_NONPIC_REG') 164 m4_assert_defined(`MCOUNT_PIC_CALL') 165 m4_assert_defined(`MCOUNT_NONPIC_CALL') 166 `ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,, 167 ` DATA 168 ALIGN(4) 169 L(mcount_data_`'mcount_counter): 170 W32 0 171 TEXT 172 ')dnl 173 ifelse(WANT_PROFILING,`gprof', 174 ` pushl %ebp 175 movl %esp, %ebp 176 ')dnl 177 ifdef(`PIC', 178 ` pushl %ebx 179 call_movl_eip_to_ebx 180 L(mcount_here_`'mcount_counter): 181 addl $_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx 182 ifelse(MCOUNT_PIC_REG,,, 183 ` leal L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG') 184 MCOUNT_PIC_CALL 185 popl %ebx 186 ',`dnl non-PIC 187 ifelse(MCOUNT_NONPIC_REG,,, 188 ` movl `$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG 189 ')dnl 190 MCOUNT_NONPIC_CALL 191 ')dnl 192 ifelse(WANT_PROFILING,`gprof', 193 ` popl %ebp 194 ') 195 define(`mcount_counter',incr(mcount_counter)) 196 ') 197 198 define(mcount_counter,1) 199 200 201 dnl Usage: call_instrument(enter|exit) 202 dnl 203 dnl Call __cyg_profile_func_enter or __cyg_profile_func_exit. 204 dnl 205 dnl For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves 206 dnl so %ebx is just setup for these calls. It's a bit wasteful to repeat 207 dnl the setup for the exit call having done it earlier for the enter, but 208 dnl there's nowhere very convenient to hold %ebx through the length of a 209 dnl routine, in general. 210 dnl 211 dnl For PIC, because instrument_current_function will be within the current 212 dnl object file we can get it just as an offset from %eip, there's no need 213 dnl to use the GOT. 214 dnl 215 dnl No attempt is made to maintain the stack alignment gcc generates with 216 dnl -mpreferred-stack-boundary. This wouldn't be hard, but it seems highly 217 dnl unlikely the instrumenting functions would be doing anything that'd 218 dnl benefit from alignment, in particular they're unlikely to be using 219 dnl doubles or long doubles on the stack. 220 dnl 221 dnl The FRAME scheme is used to conveniently account for the register saves 222 dnl before accessing the return address. Any previous value is saved and 223 dnl restored, since plenty of code keeps a value across a "ret" in the 224 dnl middle of a routine. 225 226 define(call_instrument, 227 m4_assert_numargs(1) 228 ` pushdef(`FRAME',0) 229 ifelse($1,exit, 230 ` pushl %eax FRAME_pushl() C return value 231 ') 232 ifdef(`PIC', 233 ` pushl %ebx FRAME_pushl() 234 call_movl_eip_to_ebx 235 L(instrument_here_`'instrument_count): 236 movl %ebx, %ecx 237 addl $_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx 238 C use addl rather than leal to avoid old gas bugs, see mpn/x86/README 239 addl $instrument_current_function-L(instrument_here_`'instrument_count), %ecx 240 pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr 241 pushl %ecx FRAME_pushl() C this function 242 call GSYM_PREFIX`'__cyg_profile_func_$1@PLT 243 addl $`'8, %esp 244 popl %ebx 245 ', 246 ` C non-PIC 247 pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr 248 pushl $instrument_current_function FRAME_pushl() C this function 249 call GSYM_PREFIX`'__cyg_profile_func_$1 250 addl $`'8, %esp 251 ') 252 ifelse($1,exit, 253 ` popl %eax C return value 254 ') 255 popdef(`FRAME') 256 define(`instrument_count',incr(instrument_count)) 257 ') 258 define(instrument_count,1) 259 260 261 dnl Usage: instrument_current_function 262 dnl 263 dnl Return the current function name for instrumenting purposes. This is 264 dnl PROLOGUE_current_function, but it sticks at the first such name seen. 265 dnl 266 dnl Sticking to the first name seen ensures that multiple-entrypoint 267 dnl functions like mpn_add_nc and mpn_add_n will make enter and exit calls 268 dnl giving the same function address. 269 270 define(instrument_current_function, 271 m4_assert_numargs(-1) 272 `ifdef(`instrument_current_function_seen', 273 `instrument_current_function_seen', 274 `define(`instrument_current_function_seen',PROLOGUE_current_function)dnl 275 PROLOGUE_current_function')') 276 277 278 dnl Usage: call_movl_eip_to_ebx 279 dnl 280 dnl Generate a call to L(movl_eip_to_ebx), and record the need for that 281 dnl routine. 282 283 define(call_movl_eip_to_ebx, 284 m4_assert_numargs(-1) 285 `call L(movl_eip_to_ebx) 286 define(`movl_eip_to_ebx_needed',1)') 287 288 dnl Usage: generate_movl_eip_to_ebx 289 dnl 290 dnl Emit a L(movl_eip_to_ebx) routine, if needed and not already generated. 291 292 define(generate_movl_eip_to_ebx, 293 m4_assert_numargs(-1) 294 `ifelse(movl_eip_to_ebx_needed,1, 295 `ifelse(movl_eip_to_ebx_done,1,, 296 `L(movl_eip_to_ebx): 297 movl (%esp), %ebx 298 ret_internal 299 define(`movl_eip_to_ebx_done',1) 300 ')')') 301 302 303 dnl Usage: ret 304 dnl 305 dnl Generate a "ret", but if doing instrumented profiling then call 306 dnl __cyg_profile_func_exit first. 307 308 define(ret, 309 m4_assert_numargs(-1) 310 m4_assert_defined(`WANT_PROFILING') 311 `ifelse(WANT_PROFILING,instrument, 312 `ret_instrument', 313 `ret_internal') 314 generate_movl_eip_to_ebx 315 ') 316 317 318 dnl Usage: ret_internal 319 dnl 320 dnl A plain "ret", without any __cyg_profile_func_exit call. This can be 321 dnl used for a return which is internal to some function, such as when 322 dnl getting %eip for PIC. 323 324 define(ret_internal, 325 m4_assert_numargs(-1) 326 ``ret'') 327 328 329 dnl Usage: ret_instrument 330 dnl 331 dnl Generate call to __cyg_profile_func_exit and then a ret. If a ret has 332 dnl already been seen from this function then jump to that chunk of code, 333 dnl rather than emitting it again. 334 335 define(ret_instrument, 336 m4_assert_numargs(-1) 337 `ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1, 338 `jmp L(instrument_exit_`'instrument_current_function)', 339 `define(ret_instrument_seen_`'instrument_current_function,1) 340 L(instrument_exit_`'instrument_current_function): 341 call_instrument(exit) 342 ret_internal')') 343 344 345 dnl Usage: _GLOBAL_OFFSET_TABLE_ 346 dnl 347 dnl Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix. 348 dnl This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still 349 dnl work with systems requiring an extra underscore such as OpenBSD. 350 dnl 351 dnl deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come 352 dnl out right, though that form doesn't work properly in gas (see 353 dnl mpn/x86/README). 354 355 deflit(_GLOBAL_OFFSET_TABLE_, 356 m4_assert_defined(`GOT_GSYM_PREFIX') 357 `GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'') 358 359 360 dnl -------------------------------------------------------------------------- 361 dnl Various x86 macros. 362 dnl 363 364 365 dnl Usage: ALIGN_OFFSET(bytes,offset) 366 dnl 367 dnl Align to `offset' away from a multiple of `bytes'. 368 dnl 369 dnl This is useful for testing, for example align to something very strict 370 dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)". 371 dnl 372 dnl Generally you wouldn't execute across the padding, but it's done with 373 dnl nop's so it'll work. 374 375 define(ALIGN_OFFSET, 376 m4_assert_numargs(2) 377 `ALIGN($1) 378 forloop(`i',1,$2,` nop 379 ')') 380 381 382 dnl Usage: defframe(name,offset) 383 dnl 384 dnl Make a definition like the following with which to access a parameter 385 dnl or variable on the stack. 386 dnl 387 dnl define(name,`FRAME+offset(%esp)') 388 dnl 389 dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one 390 dnl byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp). 391 dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the 392 dnl zero offset is wanted. 393 dnl 394 dnl The new macro also gets a check that when it's used FRAME is actually 395 dnl defined, and that the final %esp offset isn't negative, which would 396 dnl mean an attempt to access something below the current %esp. 397 dnl 398 dnl deflit() is used rather than a plain define(), so the new macro won't 399 dnl delete any following parenthesized expression. name(%edi) will come 400 dnl out say as 16(%esp)(%edi). This isn't valid assembler and should 401 dnl provoke an error, which is better than silently giving just 16(%esp). 402 dnl 403 dnl See README for more on the suggested way to access the stack frame. 404 405 define(defframe, 406 m4_assert_numargs(2) 407 `deflit(`$1', 408 m4_assert_defined(`FRAME') 409 `defframe_check_notbelow(`$1',$2,FRAME)dnl 410 defframe_empty_if_zero(FRAME+($2))(%esp)')') 411 412 dnl Called: defframe_empty_if_zero(expression) 413 define(defframe_empty_if_zero, 414 m4_assert_numargs(1) 415 `ifelse(defframe_empty_if_zero_disabled,1, 416 `eval($1)', 417 `m4_empty_if_zero($1)')') 418 419 dnl Called: defframe_check_notbelow(`name',offset,FRAME) 420 define(defframe_check_notbelow, 421 m4_assert_numargs(3) 422 `ifelse(eval(($3)+($2)<0),1, 423 `m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes 424 ')')') 425 426 427 dnl Usage: FRAME_pushl() 428 dnl FRAME_popl() 429 dnl FRAME_addl_esp(n) 430 dnl FRAME_subl_esp(n) 431 dnl 432 dnl Adjust FRAME appropriately for a pushl or popl, or for an addl or subl 433 dnl %esp of n bytes. 434 dnl 435 dnl Using these macros is completely optional. Sometimes it makes more 436 dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's 437 dnl jumps and different sequences of FRAME values need to be used in 438 dnl different places. 439 440 define(FRAME_pushl, 441 m4_assert_numargs(0) 442 m4_assert_defined(`FRAME') 443 `deflit(`FRAME',eval(FRAME+4))') 444 445 define(FRAME_popl, 446 m4_assert_numargs(0) 447 m4_assert_defined(`FRAME') 448 `deflit(`FRAME',eval(FRAME-4))') 449 450 define(FRAME_addl_esp, 451 m4_assert_numargs(1) 452 m4_assert_defined(`FRAME') 453 `deflit(`FRAME',eval(FRAME-($1)))') 454 455 define(FRAME_subl_esp, 456 m4_assert_numargs(1) 457 m4_assert_defined(`FRAME') 458 `deflit(`FRAME',eval(FRAME+($1)))') 459 460 461 dnl Usage: defframe_pushl(name) 462 dnl 463 dnl Do a combination FRAME_pushl() and a defframe() to name the stack 464 dnl location just pushed. This should come after a pushl instruction. 465 dnl Putting it on the same line works and avoids lengthening the code. For 466 dnl example, 467 dnl 468 dnl pushl %eax defframe_pushl(VAR_COUNTER) 469 dnl 470 dnl Notice the defframe() is done with an unquoted -FRAME thus giving its 471 dnl current value without tracking future changes. 472 473 define(defframe_pushl, 474 m4_assert_numargs(1) 475 `FRAME_pushl()defframe(`$1',-FRAME)') 476 477 478 dnl -------------------------------------------------------------------------- 479 dnl Assembler instruction macros. 480 dnl 481 482 483 dnl Usage: emms_or_femms 484 dnl femms_available_p 485 dnl 486 dnl femms_available_p expands to 1 or 0 according to whether the AMD 3DNow 487 dnl femms instruction is available. emms_or_femms expands to femms if 488 dnl available, or emms if not. 489 dnl 490 dnl emms_or_femms is meant for use in the K6 directory where plain K6 491 dnl (without femms) and K6-2 and K6-3 (with a slightly faster femms) are 492 dnl supported together. 493 dnl 494 dnl On K7 femms is no longer faster and is just an alias for emms, so plain 495 dnl emms may as well be used. 496 497 define(femms_available_p, 498 m4_assert_numargs(-1) 499 `m4_ifdef_anyof_p( 500 `HAVE_HOST_CPU_k62', 501 `HAVE_HOST_CPU_k63', 502 `HAVE_HOST_CPU_athlon')') 503 504 define(emms_or_femms, 505 m4_assert_numargs(-1) 506 `ifelse(femms_available_p,1,`femms',`emms')') 507 508 509 dnl Usage: femms 510 dnl 511 dnl Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the 512 dnl following is a replacement using .byte. 513 514 define(femms, 515 m4_assert_numargs(-1) 516 `.byte 15,14 C AMD 3DNow femms') 517 518 519 dnl Usage: jadcl0(op) 520 dnl 521 dnl Generate a jnc/incl as a substitute for adcl $0,op. Note this isn't an 522 dnl exact replacement, since it doesn't set the flags like adcl does. 523 dnl 524 dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and 525 dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch 526 dnl misprediction penalty is small, and the multiply algorithm used leads 527 dnl to a carry bit on average only 1/4 of the time. 528 dnl 529 dnl jadcl0_disabled can be set to 1 to instead generate an ordinary adcl 530 dnl for comparison. For example, 531 dnl 532 dnl define(`jadcl0_disabled',1) 533 dnl 534 dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is 535 dnl the same size as an adcl. This makes it possible to use the exact same 536 dnl computed jump code when testing the relative speed of the two. 537 538 define(jadcl0, 539 m4_assert_numargs(1) 540 `ifelse(jadcl0_disabled,1, 541 `adcl $`'0, $1', 542 `jnc L(jadcl0_`'jadcl0_counter) 543 incl $1 544 L(jadcl0_`'jadcl0_counter): 545 define(`jadcl0_counter',incr(jadcl0_counter))')') 546 547 define(jadcl0_counter,1) 548 549 550 dnl Usage: x86_lookup(target, key,value, key,value, ...) 551 dnl x86_lookup_p(target, key,value, key,value, ...) 552 dnl 553 dnl Look for `target' among the `key' parameters. 554 dnl 555 dnl x86_lookup expands to the corresponding `value', or generates an error 556 dnl if `target' isn't found. 557 dnl 558 dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not. 559 560 define(x86_lookup, 561 m4_assert_numargs_range(1,999) 562 `ifelse(eval($#<3),1, 563 `m4_error(`unrecognised part of x86 instruction: $1 564 ')', 565 `ifelse(`$1',`$2', `$3', 566 `x86_lookup(`$1',shift(shift(shift($@))))')')') 567 568 define(x86_lookup_p, 569 m4_assert_numargs_range(1,999) 570 `ifelse(eval($#<3),1, `0', 571 `ifelse(`$1',`$2', `1', 572 `x86_lookup_p(`$1',shift(shift(shift($@))))')')') 573 574 575 dnl Usage: x86_opcode_reg32(reg) 576 dnl x86_opcode_reg32_p(reg) 577 dnl 578 dnl x86_opcode_reg32 expands to the standard 3 bit encoding for the given 579 dnl 32-bit register, eg. `%ebp' turns into 5. 580 dnl 581 dnl x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0 582 dnl if not. 583 584 define(x86_opcode_reg32, 585 m4_assert_numargs(1) 586 `x86_lookup(`$1',x86_opcode_reg32_list)') 587 588 define(x86_opcode_reg32_p, 589 m4_assert_onearg() 590 `x86_lookup_p(`$1',x86_opcode_reg32_list)') 591 592 define(x86_opcode_reg32_list, 593 ``%eax',0, 594 `%ecx',1, 595 `%edx',2, 596 `%ebx',3, 597 `%esp',4, 598 `%ebp',5, 599 `%esi',6, 600 `%edi',7') 601 602 603 dnl Usage: x86_opcode_tttn(cond) 604 dnl 605 dnl Expand to the 4-bit "tttn" field value for the given x86 branch 606 dnl condition (like `c', `ae', etc). 607 608 define(x86_opcode_tttn, 609 m4_assert_numargs(1) 610 `x86_lookup(`$1',x86_opcode_ttn_list)') 611 612 define(x86_opcode_tttn_list, 613 ``o', 0, 614 `no', 1, 615 `b', 2, `c', 2, `nae',2, 616 `nb', 3, `nc', 3, `ae', 3, 617 `e', 4, `z', 4, 618 `ne', 5, `nz', 5, 619 `be', 6, `na', 6, 620 `nbe', 7, `a', 7, 621 `s', 8, 622 `ns', 9, 623 `p', 10, `pe', 10, `npo',10, 624 `np', 11, `npe',11, `po', 11, 625 `l', 12, `nge',12, 626 `nl', 13, `ge', 13, 627 `le', 14, `ng', 14, 628 `nle',15, `g', 15') 629 630 631 dnl Usage: cmovCC(%srcreg,%dstreg) 632 dnl 633 dnl Emit a cmov instruction, using a .byte sequence, since various past 634 dnl versions of gas don't know cmov. For example, 635 dnl 636 dnl cmovz( %eax, %ebx) 637 dnl 638 dnl The source operand can only be a plain register. (m4 code implementing 639 dnl full memory addressing modes exists, believe it or not, but isn't 640 dnl currently needed and isn't included.) 641 dnl 642 dnl All the standard conditions are defined. Attempting to use one without 643 dnl the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke 644 dnl an error. This protects against writing something old gas wouldn't 645 dnl understand. 646 647 dnl Called: define_cmov_many(cond,tttn,cond,tttn,...) 648 define(define_cmov_many, 649 `ifelse(m4_length(`$1'),0,, 650 `define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')') 651 652 dnl Called: define_cmov(cond,tttn) 653 dnl Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')') 654 define(define_cmov, 655 m4_assert_numargs(2) 656 `define(`cmov$1', 657 m4_instruction_wrapper() 658 m4_assert_numargs(2) 659 `cmov_internal'(m4_doublequote($`'0),``$2'',dnl 660 m4_doublequote($`'1),m4_doublequote($`'2)))') 661 662 define_cmov_many(x86_opcode_tttn_list) 663 664 dnl Called: cmov_internal(name,tttn,src,dst) 665 define(cmov_internal, 666 m4_assert_numargs(4) 667 `.byte dnl 668 15, dnl 669 eval(64+$2), dnl 670 eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl 671 C `$1 $3, $4'') 672 673 674 dnl Usage: x86_opcode_regmmx(reg) 675 dnl 676 dnl Validate the given mmx register, and return its number, 0 to 7. 677 678 define(x86_opcode_regmmx, 679 m4_assert_numargs(1) 680 `x86_lookup(`$1',x86_opcode_regmmx_list)') 681 682 define(x86_opcode_regmmx_list, 683 ``%mm0',0, 684 `%mm1',1, 685 `%mm2',2, 686 `%mm3',3, 687 `%mm4',4, 688 `%mm5',5, 689 `%mm6',6, 690 `%mm7',7') 691 692 693 dnl Usage: psadbw(%srcreg,%dstreg) 694 dnl 695 dnl Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on 696 dnl FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences. For 697 dnl example, 698 dnl 699 dnl psadbw( %mm1, %mm2) 700 dnl 701 dnl Only register->register forms are supported here, which suffices for 702 dnl the current code. 703 704 define(psadbw, 705 m4_instruction_wrapper() 706 m4_assert_numargs(2) 707 `.byte 0x0f,0xf6,dnl 708 eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl 709 C `psadbw $1, $2'') 710 711 712 dnl Usage: Zdisp(inst,op,op,op) 713 dnl 714 dnl Generate explicit .byte sequences if necessary to force a byte-sized 715 dnl zero displacement on an instruction. For example, 716 dnl 717 dnl Zdisp( movl, 0,(%esi), %eax) 718 dnl 719 dnl expands to 720 dnl 721 dnl .byte 139,70,0 C movl 0(%esi), %eax 722 dnl 723 dnl If the displacement given isn't 0, then normal assembler code is 724 dnl generated. For example, 725 dnl 726 dnl Zdisp( movl, 4,(%esi), %eax) 727 dnl 728 dnl expands to 729 dnl 730 dnl movl 4(%esi), %eax 731 dnl 732 dnl This means a single Zdisp() form can be used with an expression for the 733 dnl displacement, and .byte will be used only if necessary. The 734 dnl displacement argument is eval()ed. 735 dnl 736 dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is 737 dnl implemented with a table of instructions and encodings. A new entry is 738 dnl needed for any different operation or registers. The table is split 739 dnl into separate macros to avoid overflowing BSD m4 macro expansion space. 740 741 define(Zdisp, 742 m4_assert_numargs(4) 743 `define(`Zdisp_found',0)dnl 744 Zdisp_1($@)dnl 745 Zdisp_2($@)dnl 746 Zdisp_3($@)dnl 747 Zdisp_4($@)dnl 748 ifelse(Zdisp_found,0, 749 `m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4 750 ')')') 751 752 define(Zdisp_1,`dnl 753 Zdisp_match( adcl, 0,(%edx), %eax, `0x13,0x42,0x00', $@)`'dnl 754 Zdisp_match( adcl, 0,(%edx), %ebx, `0x13,0x5a,0x00', $@)`'dnl 755 Zdisp_match( adcl, 0,(%edx), %esi, `0x13,0x72,0x00', $@)`'dnl 756 Zdisp_match( addl, %ebx, 0,(%edi), `0x01,0x5f,0x00', $@)`'dnl 757 Zdisp_match( addl, %ecx, 0,(%edi), `0x01,0x4f,0x00', $@)`'dnl 758 Zdisp_match( addl, %esi, 0,(%edi), `0x01,0x77,0x00', $@)`'dnl 759 Zdisp_match( sbbl, 0,(%edx), %eax, `0x1b,0x42,0x00', $@)`'dnl 760 Zdisp_match( sbbl, 0,(%edx), %esi, `0x1b,0x72,0x00', $@)`'dnl 761 Zdisp_match( subl, %ecx, 0,(%edi), `0x29,0x4f,0x00', $@)`'dnl 762 Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl 763 Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl 764 Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax, `0x13,0x44,0x8b,0x00', $@)`'dnl 765 Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax, `0x1b,0x44,0x8b,0x00', $@)`'dnl 766 ') 767 define(Zdisp_2,`dnl 768 Zdisp_match( movl, %eax, 0,(%edi), `0x89,0x47,0x00', $@)`'dnl 769 Zdisp_match( movl, %ebx, 0,(%edi), `0x89,0x5f,0x00', $@)`'dnl 770 Zdisp_match( movl, %esi, 0,(%edi), `0x89,0x77,0x00', $@)`'dnl 771 Zdisp_match( movl, 0,(%ebx), %eax, `0x8b,0x43,0x00', $@)`'dnl 772 Zdisp_match( movl, 0,(%ebx), %esi, `0x8b,0x73,0x00', $@)`'dnl 773 Zdisp_match( movl, 0,(%edx), %eax, `0x8b,0x42,0x00', $@)`'dnl 774 Zdisp_match( movl, 0,(%esi), %eax, `0x8b,0x46,0x00', $@)`'dnl 775 Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl 776 Zdisp_match( mov, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl 777 Zdisp_match( mov, %eax, 0,(%edi,%ecx,4), `0x89,0x44,0x8f,0x00', $@)`'dnl 778 ') 779 define(Zdisp_3,`dnl 780 Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl 781 Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl 782 Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl 783 Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl 784 Zdisp_match( movq, 0,(%edx), %mm0, `0x0f,0x6f,0x42,0x00', $@)`'dnl 785 Zdisp_match( movq, 0,(%esi), %mm0, `0x0f,0x6f,0x46,0x00', $@)`'dnl 786 Zdisp_match( movq, %mm0, 0,(%edi), `0x0f,0x7f,0x47,0x00', $@)`'dnl 787 Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl 788 Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl 789 Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl 790 ') 791 define(Zdisp_4,`dnl 792 Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl 793 Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl 794 Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl 795 Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl 796 Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl 797 Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl 798 Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl 799 ') 800 801 define(Zdisp_match, 802 m4_assert_numargs(9) 803 `ifelse(eval(m4_stringequal_p(`$1',`$6') 804 && m4_stringequal_p(`$2',0) 805 && m4_stringequal_p(`$3',`$8') 806 && m4_stringequal_p(`$4',`$9')),1, 807 `define(`Zdisp_found',1)dnl 808 ifelse(eval(`$7'),0, 809 ` .byte $5 C `$1 0$3, $4'', 810 ` $6 $7$8, $9')', 811 812 `ifelse(eval(m4_stringequal_p(`$1',`$6') 813 && m4_stringequal_p(`$2',`$7') 814 && m4_stringequal_p(`$3',0) 815 && m4_stringequal_p(`$4',`$9')),1, 816 `define(`Zdisp_found',1)dnl 817 ifelse(eval(`$8'),0, 818 ` .byte $5 C `$1 $2, 0$4'', 819 ` $6 $7, $8$9')')')') 820 821 822 dnl Usage: shldl(count,src,dst) 823 dnl shrdl(count,src,dst) 824 dnl shldw(count,src,dst) 825 dnl shrdw(count,src,dst) 826 dnl 827 dnl Generate a double-shift instruction, possibly omitting a %cl count 828 dnl parameter if that's what the assembler requires, as indicated by 829 dnl WANT_SHLDL_CL in config.m4. For example, 830 dnl 831 dnl shldl( %cl, %eax, %ebx) 832 dnl 833 dnl turns into either 834 dnl 835 dnl shldl %cl, %eax, %ebx 836 dnl or 837 dnl shldl %eax, %ebx 838 dnl 839 dnl Immediate counts are always passed through unchanged. For example, 840 dnl 841 dnl shrdl( $2, %esi, %edi) 842 dnl becomes 843 dnl shrdl $2, %esi, %edi 844 dnl 845 dnl 846 dnl If you forget to use the macro form "shldl( ...)" and instead write 847 dnl just a plain "shldl ...", an error results. This ensures the necessary 848 dnl variant treatment of %cl isn't accidentally bypassed. 849 850 define(define_shd_instruction, 851 m4_assert_numargs(1) 852 `define($1, 853 m4_instruction_wrapper() 854 m4_assert_numargs(3) 855 `shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl 856 m4_doublequote($`'2),m4_doublequote($`'3)))') 857 858 dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc 859 define_shd_instruction(shldl) 860 define_shd_instruction(shrdl) 861 define_shd_instruction(shldw) 862 define_shd_instruction(shrdw) 863 864 dnl Called: shd_instruction(op,count,src,dst) 865 define(shd_instruction, 866 m4_assert_numargs(4) 867 m4_assert_defined(`WANT_SHLDL_CL') 868 `ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1, 869 ``$1' `$3', `$4'', 870 ``$1' `$2', `$3', `$4'')') 871 872 873 dnl Usage: ASSERT([cond][,instructions]) 874 dnl 875 dnl If WANT_ASSERT is 1, output the given instructions and expect the given 876 dnl flags condition to then be satisfied. For example, 877 dnl 878 dnl ASSERT(ne, `cmpl %eax, %ebx') 879 dnl 880 dnl The instructions can be omitted to just assert a flags condition with 881 dnl no extra calculation. For example, 882 dnl 883 dnl ASSERT(nc) 884 dnl 885 dnl When `instructions' is not empty, a pushf/popf is added to preserve the 886 dnl flags, but the instructions themselves must preserve any registers that 887 dnl matter. FRAME is adjusted for the push and pop, so the instructions 888 dnl given can use defframe() stack variables. 889 dnl 890 dnl The condition can be omitted to just output the given instructions when 891 dnl assertion checking is wanted. In this case the pushf/popf is omitted. 892 dnl For example, 893 dnl 894 dnl ASSERT(, `movl %eax, VAR_KEEPVAL') 895 896 define(ASSERT, 897 m4_assert_numargs_range(1,2) 898 m4_assert_defined(`WANT_ASSERT') 899 `ifelse(WANT_ASSERT,1, 900 `ifelse(`$1',, 901 `$2', 902 `C ASSERT 903 ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')') 904 $2 905 j`$1' L(ASSERT_ok`'ASSERT_counter) 906 ud2 C assertion failed 907 L(ASSERT_ok`'ASSERT_counter): 908 ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')') 909 define(`ASSERT_counter',incr(ASSERT_counter))')')') 910 911 define(ASSERT_counter,1) 912 913 914 dnl Usage: movl_text_address(label,register) 915 dnl 916 dnl Get the address of a text segment label, using either a plain movl or a 917 dnl position-independent calculation, as necessary. For example, 918 dnl 919 dnl movl_code_address(L(foo),%eax) 920 dnl 921 dnl This macro is only meant for use in ASSERT()s or when testing, since 922 dnl the PIC sequence it generates will want to be done with a ret balancing 923 dnl the call on CPUs with return address branch prediction. 924 dnl 925 dnl The addl generated here has a backward reference to the label, and so 926 dnl won't suffer from the two forwards references bug in old gas (described 927 dnl in mpn/x86/README). 928 929 define(movl_text_address, 930 m4_assert_numargs(2) 931 `ifdef(`PIC', 932 `call L(movl_text_address_`'movl_text_address_counter) 933 L(movl_text_address_`'movl_text_address_counter): 934 popl $2 C %eip 935 addl `$'$1-L(movl_text_address_`'movl_text_address_counter), $2 936 define(`movl_text_address_counter',incr(movl_text_address_counter))', 937 `movl `$'$1, $2')') 938 939 define(movl_text_address_counter,1) 940 941 942 dnl Usage: notl_or_xorl_GMP_NUMB_MASK(reg) 943 dnl 944 dnl Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as 945 dnl appropriate for nails in use or not. 946 947 define(notl_or_xorl_GMP_NUMB_MASK, 948 m4_assert_numargs(1) 949 `ifelse(GMP_NAIL_BITS,0, 950 `notl `$1'', 951 `xorl $GMP_NUMB_MASK, `$1'')') 952 953 954 dnl Usage LEA(symbol,reg) 955 dnl Usage LEAL(symbol_local_to_file,reg) 956 957 define(`LEA', 958 m4_assert_numargs(2) 959 `ifdef(`PIC',`dnl 960 ifelse(index(defn(`load_eip'), `$2'),-1, 961 `m4append(`load_eip', 962 ` TEXT 963 ALIGN(16) 964 L(movl_eip_`'substr($2,1)): 965 movl (%esp), $2 966 ret_internal 967 ')')dnl 968 call L(movl_eip_`'substr($2,1)) 969 addl $_GLOBAL_OFFSET_TABLE_, $2 970 movl $1@GOT($2), $2 971 ',` 972 movl `$'$1, $2 973 ')') 974 975 define(`LEAL', 976 m4_assert_numargs(2) 977 `ifdef(`PIC',`dnl 978 ifelse(index(defn(`load_eip'), `$2'),-1, 979 `m4append(`load_eip', 980 ` TEXT 981 ALIGN(16) 982 L(movl_eip_`'substr($2,1)): 983 movl (%esp), $2 984 ret_internal 985 ')')dnl 986 call L(movl_eip_`'substr($2,1)) 987 addl $_GLOBAL_OFFSET_TABLE_, $2 988 leal $1@GOTOFF($2), $2 989 ',` 990 movl `$'$1, $2 991 ')') 992 993 dnl ASM_END 994 995 define(`ASM_END',`load_eip') 996 997 define(`load_eip', `') dnl updated in LEA/LEAL 998 999 1000 define(`DEF_OBJECT', 1001 m4_assert_numargs_range(1,2) 1002 `RODATA 1003 ALIGN(ifelse($#,1,2,$2)) 1004 $1: 1005 ') 1006 1007 define(`END_OBJECT', 1008 m4_assert_numargs(1) 1009 ` SIZE(`$1',.-`$1')') 1010 1011 dnl Usage: CALL(funcname) 1012 dnl 1013 1014 define(`CALL', 1015 m4_assert_numargs(1) 1016 `ifdef(`PIC', 1017 `call GSYM_PREFIX`'$1@PLT', 1018 `call GSYM_PREFIX`'$1')') 1019 1020 ifdef(`PIC', 1021 `define(`PIC_WITH_EBX')', 1022 `undefine(`PIC_WITH_EBX')') 1023 1024 divert`'dnl