github.com/bir3/gocompiler@v0.3.205/src/cmd/internal/obj/x86/asm6.go (about) 1 // Inferno utils/6l/span.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "github.com/bir3/gocompiler/src/cmd/internal/obj" 35 "github.com/bir3/gocompiler/src/cmd/internal/objabi" 36 "github.com/bir3/gocompiler/src/cmd/internal/sys" 37 "encoding/binary" 38 "fmt" 39 "github.com/bir3/gocompiler/src/internal/buildcfg" 40 "log" 41 "strings" 42 ) 43 44 var ( 45 plan9privates *obj.LSym 46 ) 47 48 // Instruction layout. 49 50 // Loop alignment constants: 51 // want to align loop entry to loopAlign-byte boundary, 52 // and willing to insert at most maxLoopPad bytes of NOP to do so. 53 // We define a loop entry as the target of a backward jump. 54 // 55 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config, 56 // and it aligns all jump targets, not just backward jump targets. 57 // 58 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here 59 // is very slight but negative, so the alignment is disabled by 60 // setting MaxLoopPad = 0. The code is here for reference and 61 // for future experiments. 62 const ( 63 loopAlign = 16 64 maxLoopPad = 0 65 ) 66 67 // Bit flags that are used to express jump target properties. 68 const ( 69 // branchBackwards marks targets that are located behind. 70 // Used to express jumps to loop headers. 71 branchBackwards = (1 << iota) 72 // branchShort marks branches those target is close, 73 // with offset is in -128..127 range. 74 branchShort 75 // branchLoopHead marks loop entry. 76 // Used to insert padding for misaligned loops. 77 branchLoopHead 78 ) 79 80 // opBytes holds optab encoding bytes. 81 // Each ytab reserves fixed amount of bytes in this array. 82 // 83 // The size should be the minimal number of bytes that 84 // are enough to hold biggest optab op lines. 85 type opBytes [31]uint8 86 87 type Optab struct { 88 as obj.As 89 ytab []ytab 90 prefix uint8 91 op opBytes 92 } 93 94 type movtab struct { 95 as obj.As 96 ft uint8 97 f3t uint8 98 tt uint8 99 code uint8 100 op [4]uint8 101 } 102 103 const ( 104 Yxxx = iota 105 Ynone 106 Yi0 // $0 107 Yi1 // $1 108 Yu2 // $x, x fits in uint2 109 Yi8 // $x, x fits in int8 110 Yu8 // $x, x fits in uint8 111 Yu7 // $x, x in 0..127 (fits in both int8 and uint8) 112 Ys32 113 Yi32 114 Yi64 115 Yiauto 116 Yal 117 Ycl 118 Yax 119 Ycx 120 Yrb 121 Yrl 122 Yrl32 // Yrl on 32-bit system 123 Yrf 124 Yf0 125 Yrx 126 Ymb 127 Yml 128 Ym 129 Ybr 130 Ycs 131 Yss 132 Yds 133 Yes 134 Yfs 135 Ygs 136 Ygdtr 137 Yidtr 138 Yldtr 139 Ymsw 140 Ytask 141 Ycr0 142 Ycr1 143 Ycr2 144 Ycr3 145 Ycr4 146 Ycr5 147 Ycr6 148 Ycr7 149 Ycr8 150 Ydr0 151 Ydr1 152 Ydr2 153 Ydr3 154 Ydr4 155 Ydr5 156 Ydr6 157 Ydr7 158 Ytr0 159 Ytr1 160 Ytr2 161 Ytr3 162 Ytr4 163 Ytr5 164 Ytr6 165 Ytr7 166 Ymr 167 Ymm 168 Yxr0 // X0 only. "<XMM0>" notation in Intel manual. 169 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex 170 Yxr // X0..X15 171 YxrEvex // X0..X31 172 Yxm 173 YxmEvex // YxrEvex+Ym 174 Yxvm // VSIB vector array; vm32x/vm64x 175 YxvmEvex // Yxvm which permits High-16 X register as index. 176 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex 177 Yyr // Y0..Y15 178 YyrEvex // Y0..Y31 179 Yym 180 YymEvex // YyrEvex+Ym 181 Yyvm // VSIB vector array; vm32y/vm64y 182 YyvmEvex // Yyvm which permits High-16 Y register as index. 183 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex 184 Yzr // Z0..Z31 185 Yzm // Yzr+Ym 186 Yzvm // VSIB vector array; vm32z/vm64z 187 Yk0 // K0 188 Yknot0 // K1..K7; write mask 189 Yk // K0..K7; used for KOP 190 Ykm // Yk+Ym; used for KOP 191 Ytls 192 Ytextsize 193 Yindir 194 Ymax 195 ) 196 197 const ( 198 Zxxx = iota 199 Zlit 200 Zlitm_r 201 Zlitr_m 202 Zlit_m_r 203 Z_rp 204 Zbr 205 Zcall 206 Zcallcon 207 Zcallduff 208 Zcallind 209 Zcallindreg 210 Zib_ 211 Zib_rp 212 Zibo_m 213 Zibo_m_xm 214 Zil_ 215 Zil_rp 216 Ziq_rp 217 Zilo_m 218 Zjmp 219 Zjmpcon 220 Zloop 221 Zo_iw 222 Zm_o 223 Zm_r 224 Z_m_r 225 Zm2_r 226 Zm_r_xm 227 Zm_r_i_xm 228 Zm_r_xm_nr 229 Zr_m_xm_nr 230 Zibm_r // mmx1,mmx2/mem64,imm8 231 Zibr_m 232 Zmb_r 233 Zaut_r 234 Zo_m 235 Zo_m64 236 Zpseudo 237 Zr_m 238 Zr_m_xm 239 Zrp_ 240 Z_ib 241 Z_il 242 Zm_ibo 243 Zm_ilo 244 Zib_rr 245 Zil_rr 246 Zbyte 247 248 Zvex_rm_v_r 249 Zvex_rm_v_ro 250 Zvex_r_v_rm 251 Zvex_i_rm_vo 252 Zvex_v_rm_r 253 Zvex_i_rm_r 254 Zvex_i_r_v 255 Zvex_i_rm_v_r 256 Zvex 257 Zvex_rm_r_vo 258 Zvex_i_r_rm 259 Zvex_hr_rm_v_r 260 261 Zevex_first 262 Zevex_i_r_k_rm 263 Zevex_i_r_rm 264 Zevex_i_rm_k_r 265 Zevex_i_rm_k_vo 266 Zevex_i_rm_r 267 Zevex_i_rm_v_k_r 268 Zevex_i_rm_v_r 269 Zevex_i_rm_vo 270 Zevex_k_rmo 271 Zevex_r_k_rm 272 Zevex_r_v_k_rm 273 Zevex_r_v_rm 274 Zevex_rm_k_r 275 Zevex_rm_v_k_r 276 Zevex_rm_v_r 277 Zevex_last 278 279 Zmax 280 ) 281 282 const ( 283 Px = 0 284 Px1 = 1 // symbolic; exact value doesn't matter 285 P32 = 0x32 // 32-bit only 286 Pe = 0x66 // operand escape 287 Pm = 0x0f // 2byte opcode escape 288 Pq = 0xff // both escapes: 66 0f 289 Pb = 0xfe // byte operands 290 Pf2 = 0xf2 // xmm escape 1: f2 0f 291 Pf3 = 0xf3 // xmm escape 2: f3 0f 292 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f 293 Pq3 = 0x67 // xmm escape 3: 66 48 0f 294 Pq4 = 0x68 // xmm escape 4: 66 0F 38 295 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 296 Pq5 = 0x6a // xmm escape 5: F3 0F 38 297 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 298 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f 299 Pw = 0x48 // Rex.w 300 Pw8 = 0x90 // symbolic; exact value doesn't matter 301 Py = 0x80 // defaults to 64-bit mode 302 Py1 = 0x81 // symbolic; exact value doesn't matter 303 Py3 = 0x83 // symbolic; exact value doesn't matter 304 Pavx = 0x84 // symbolic: exact value doesn't matter 305 306 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R 307 Rxw = 1 << 3 // =1, 64-bit operand size 308 Rxr = 1 << 2 // extend modrm reg 309 Rxx = 1 << 1 // extend sib index 310 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg 311 ) 312 313 const ( 314 // Encoding for VEX prefix in tables. 315 // The P, L, and W fields are chosen to match 316 // their eventual locations in the VEX prefix bytes. 317 318 // Encoding for VEX prefix in tables. 319 // The P, L, and W fields are chosen to match 320 // their eventual locations in the VEX prefix bytes. 321 322 // Using spare bit to make leading [E]VEX encoding byte different from 323 // 0x0f even if all other VEX fields are 0. 324 avxEscape = 1 << 6 325 326 // P field - 2 bits 327 vex66 = 1 << 0 328 vexF3 = 2 << 0 329 vexF2 = 3 << 0 330 // L field - 1 bit 331 vexLZ = 0 << 2 332 vexLIG = 0 << 2 333 vex128 = 0 << 2 334 vex256 = 1 << 2 335 // W field - 1 bit 336 vexWIG = 0 << 7 337 vexW0 = 0 << 7 338 vexW1 = 1 << 7 339 // M field - 5 bits, but mostly reserved; we can store up to 3 340 vex0F = 1 << 3 341 vex0F38 = 2 << 3 342 vex0F3A = 3 << 3 343 ) 344 345 var ycover [Ymax * Ymax]uint8 346 347 var reg [MAXREG]int 348 349 var regrex [MAXREG + 1]int 350 351 var ynone = []ytab{ 352 {Zlit, 1, argList{}}, 353 } 354 355 var ytext = []ytab{ 356 {Zpseudo, 0, argList{Ymb, Ytextsize}}, 357 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, 358 } 359 360 var ynop = []ytab{ 361 {Zpseudo, 0, argList{}}, 362 {Zpseudo, 0, argList{Yiauto}}, 363 {Zpseudo, 0, argList{Yml}}, 364 {Zpseudo, 0, argList{Yrf}}, 365 {Zpseudo, 0, argList{Yxr}}, 366 {Zpseudo, 0, argList{Yiauto}}, 367 {Zpseudo, 0, argList{Yml}}, 368 {Zpseudo, 0, argList{Yrf}}, 369 {Zpseudo, 1, argList{Yxr}}, 370 } 371 372 var yfuncdata = []ytab{ 373 {Zpseudo, 0, argList{Yi32, Ym}}, 374 } 375 376 var ypcdata = []ytab{ 377 {Zpseudo, 0, argList{Yi32, Yi32}}, 378 } 379 380 var yxorb = []ytab{ 381 {Zib_, 1, argList{Yi32, Yal}}, 382 {Zibo_m, 2, argList{Yi32, Ymb}}, 383 {Zr_m, 1, argList{Yrb, Ymb}}, 384 {Zm_r, 1, argList{Ymb, Yrb}}, 385 } 386 387 var yaddl = []ytab{ 388 {Zibo_m, 2, argList{Yi8, Yml}}, 389 {Zil_, 1, argList{Yi32, Yax}}, 390 {Zilo_m, 2, argList{Yi32, Yml}}, 391 {Zr_m, 1, argList{Yrl, Yml}}, 392 {Zm_r, 1, argList{Yml, Yrl}}, 393 } 394 395 var yincl = []ytab{ 396 {Z_rp, 1, argList{Yrl}}, 397 {Zo_m, 2, argList{Yml}}, 398 } 399 400 var yincq = []ytab{ 401 {Zo_m, 2, argList{Yml}}, 402 } 403 404 var ycmpb = []ytab{ 405 {Z_ib, 1, argList{Yal, Yi32}}, 406 {Zm_ibo, 2, argList{Ymb, Yi32}}, 407 {Zm_r, 1, argList{Ymb, Yrb}}, 408 {Zr_m, 1, argList{Yrb, Ymb}}, 409 } 410 411 var ycmpl = []ytab{ 412 {Zm_ibo, 2, argList{Yml, Yi8}}, 413 {Z_il, 1, argList{Yax, Yi32}}, 414 {Zm_ilo, 2, argList{Yml, Yi32}}, 415 {Zm_r, 1, argList{Yml, Yrl}}, 416 {Zr_m, 1, argList{Yrl, Yml}}, 417 } 418 419 var yshb = []ytab{ 420 {Zo_m, 2, argList{Yi1, Ymb}}, 421 {Zibo_m, 2, argList{Yu8, Ymb}}, 422 {Zo_m, 2, argList{Ycx, Ymb}}, 423 } 424 425 var yshl = []ytab{ 426 {Zo_m, 2, argList{Yi1, Yml}}, 427 {Zibo_m, 2, argList{Yu8, Yml}}, 428 {Zo_m, 2, argList{Ycl, Yml}}, 429 {Zo_m, 2, argList{Ycx, Yml}}, 430 } 431 432 var ytestl = []ytab{ 433 {Zil_, 1, argList{Yi32, Yax}}, 434 {Zilo_m, 2, argList{Yi32, Yml}}, 435 {Zr_m, 1, argList{Yrl, Yml}}, 436 {Zm_r, 1, argList{Yml, Yrl}}, 437 } 438 439 var ymovb = []ytab{ 440 {Zr_m, 1, argList{Yrb, Ymb}}, 441 {Zm_r, 1, argList{Ymb, Yrb}}, 442 {Zib_rp, 1, argList{Yi32, Yrb}}, 443 {Zibo_m, 2, argList{Yi32, Ymb}}, 444 } 445 446 var ybtl = []ytab{ 447 {Zibo_m, 2, argList{Yi8, Yml}}, 448 {Zr_m, 1, argList{Yrl, Yml}}, 449 } 450 451 var ymovw = []ytab{ 452 {Zr_m, 1, argList{Yrl, Yml}}, 453 {Zm_r, 1, argList{Yml, Yrl}}, 454 {Zil_rp, 1, argList{Yi32, Yrl}}, 455 {Zilo_m, 2, argList{Yi32, Yml}}, 456 {Zaut_r, 2, argList{Yiauto, Yrl}}, 457 } 458 459 var ymovl = []ytab{ 460 {Zr_m, 1, argList{Yrl, Yml}}, 461 {Zm_r, 1, argList{Yml, Yrl}}, 462 {Zil_rp, 1, argList{Yi32, Yrl}}, 463 {Zilo_m, 2, argList{Yi32, Yml}}, 464 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD 465 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD 466 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) 467 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) 468 {Zaut_r, 2, argList{Yiauto, Yrl}}, 469 } 470 471 var yret = []ytab{ 472 {Zo_iw, 1, argList{}}, 473 {Zo_iw, 1, argList{Yi32}}, 474 } 475 476 var ymovq = []ytab{ 477 // valid in 32-bit mode 478 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) 479 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ 480 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q 481 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 482 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 483 484 // valid only in 64-bit mode, usually with 64-bit prefix 485 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 486 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b 487 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) 488 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate 489 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) 490 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD 491 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD 492 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load 493 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store 494 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ 495 } 496 497 var ymovbe = []ytab{ 498 {Zlitm_r, 3, argList{Ym, Yrl}}, 499 {Zlitr_m, 3, argList{Yrl, Ym}}, 500 } 501 502 var ym_rl = []ytab{ 503 {Zm_r, 1, argList{Ym, Yrl}}, 504 } 505 506 var yrl_m = []ytab{ 507 {Zr_m, 1, argList{Yrl, Ym}}, 508 } 509 510 var ymb_rl = []ytab{ 511 {Zmb_r, 1, argList{Ymb, Yrl}}, 512 } 513 514 var yml_rl = []ytab{ 515 {Zm_r, 1, argList{Yml, Yrl}}, 516 } 517 518 var yrl_ml = []ytab{ 519 {Zr_m, 1, argList{Yrl, Yml}}, 520 } 521 522 var yml_mb = []ytab{ 523 {Zr_m, 1, argList{Yrb, Ymb}}, 524 {Zm_r, 1, argList{Ymb, Yrb}}, 525 } 526 527 var yrb_mb = []ytab{ 528 {Zr_m, 1, argList{Yrb, Ymb}}, 529 } 530 531 var yxchg = []ytab{ 532 {Z_rp, 1, argList{Yax, Yrl}}, 533 {Zrp_, 1, argList{Yrl, Yax}}, 534 {Zr_m, 1, argList{Yrl, Yml}}, 535 {Zm_r, 1, argList{Yml, Yrl}}, 536 } 537 538 var ydivl = []ytab{ 539 {Zm_o, 2, argList{Yml}}, 540 } 541 542 var ydivb = []ytab{ 543 {Zm_o, 2, argList{Ymb}}, 544 } 545 546 var yimul = []ytab{ 547 {Zm_o, 2, argList{Yml}}, 548 {Zib_rr, 1, argList{Yi8, Yrl}}, 549 {Zil_rr, 1, argList{Yi32, Yrl}}, 550 {Zm_r, 2, argList{Yml, Yrl}}, 551 } 552 553 var yimul3 = []ytab{ 554 {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, 555 {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, 556 } 557 558 var ybyte = []ytab{ 559 {Zbyte, 1, argList{Yi64}}, 560 } 561 562 var yin = []ytab{ 563 {Zib_, 1, argList{Yi32}}, 564 {Zlit, 1, argList{}}, 565 } 566 567 var yint = []ytab{ 568 {Zib_, 1, argList{Yi32}}, 569 } 570 571 var ypushl = []ytab{ 572 {Zrp_, 1, argList{Yrl}}, 573 {Zm_o, 2, argList{Ym}}, 574 {Zib_, 1, argList{Yi8}}, 575 {Zil_, 1, argList{Yi32}}, 576 } 577 578 var ypopl = []ytab{ 579 {Z_rp, 1, argList{Yrl}}, 580 {Zo_m, 2, argList{Ym}}, 581 } 582 583 var ywrfsbase = []ytab{ 584 {Zm_o, 2, argList{Yrl}}, 585 } 586 587 var yrdrand = []ytab{ 588 {Zo_m, 2, argList{Yrl}}, 589 } 590 591 var yclflush = []ytab{ 592 {Zo_m, 2, argList{Ym}}, 593 } 594 595 var ybswap = []ytab{ 596 {Z_rp, 2, argList{Yrl}}, 597 } 598 599 var yscond = []ytab{ 600 {Zo_m, 2, argList{Ymb}}, 601 } 602 603 var yjcond = []ytab{ 604 {Zbr, 0, argList{Ybr}}, 605 {Zbr, 0, argList{Yi0, Ybr}}, 606 {Zbr, 1, argList{Yi1, Ybr}}, 607 } 608 609 var yloop = []ytab{ 610 {Zloop, 1, argList{Ybr}}, 611 } 612 613 var ycall = []ytab{ 614 {Zcallindreg, 0, argList{Yml}}, 615 {Zcallindreg, 2, argList{Yrx, Yrx}}, 616 {Zcallind, 2, argList{Yindir}}, 617 {Zcall, 0, argList{Ybr}}, 618 {Zcallcon, 1, argList{Yi32}}, 619 } 620 621 var yduff = []ytab{ 622 {Zcallduff, 1, argList{Yi32}}, 623 } 624 625 var yjmp = []ytab{ 626 {Zo_m64, 2, argList{Yml}}, 627 {Zjmp, 0, argList{Ybr}}, 628 {Zjmpcon, 1, argList{Yi32}}, 629 } 630 631 var yfmvd = []ytab{ 632 {Zm_o, 2, argList{Ym, Yf0}}, 633 {Zo_m, 2, argList{Yf0, Ym}}, 634 {Zm_o, 2, argList{Yrf, Yf0}}, 635 {Zo_m, 2, argList{Yf0, Yrf}}, 636 } 637 638 var yfmvdp = []ytab{ 639 {Zo_m, 2, argList{Yf0, Ym}}, 640 {Zo_m, 2, argList{Yf0, Yrf}}, 641 } 642 643 var yfmvf = []ytab{ 644 {Zm_o, 2, argList{Ym, Yf0}}, 645 {Zo_m, 2, argList{Yf0, Ym}}, 646 } 647 648 var yfmvx = []ytab{ 649 {Zm_o, 2, argList{Ym, Yf0}}, 650 } 651 652 var yfmvp = []ytab{ 653 {Zo_m, 2, argList{Yf0, Ym}}, 654 } 655 656 var yfcmv = []ytab{ 657 {Zm_o, 2, argList{Yrf, Yf0}}, 658 } 659 660 var yfadd = []ytab{ 661 {Zm_o, 2, argList{Ym, Yf0}}, 662 {Zm_o, 2, argList{Yrf, Yf0}}, 663 {Zo_m, 2, argList{Yf0, Yrf}}, 664 } 665 666 var yfxch = []ytab{ 667 {Zo_m, 2, argList{Yf0, Yrf}}, 668 {Zm_o, 2, argList{Yrf, Yf0}}, 669 } 670 671 var ycompp = []ytab{ 672 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 673 } 674 675 var ystsw = []ytab{ 676 {Zo_m, 2, argList{Ym}}, 677 {Zlit, 1, argList{Yax}}, 678 } 679 680 var ysvrs_mo = []ytab{ 681 {Zm_o, 2, argList{Ym}}, 682 } 683 684 // unaryDst version of "ysvrs_mo". 685 var ysvrs_om = []ytab{ 686 {Zo_m, 2, argList{Ym}}, 687 } 688 689 var ymm = []ytab{ 690 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 691 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 692 } 693 694 var yxm = []ytab{ 695 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 696 } 697 698 var yxm_q4 = []ytab{ 699 {Zm_r, 1, argList{Yxm, Yxr}}, 700 } 701 702 var yxcvm1 = []ytab{ 703 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 704 {Zm_r_xm, 2, argList{Yxm, Ymr}}, 705 } 706 707 var yxcvm2 = []ytab{ 708 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 709 {Zm_r_xm, 2, argList{Ymm, Yxr}}, 710 } 711 712 var yxr = []ytab{ 713 {Zm_r_xm, 1, argList{Yxr, Yxr}}, 714 } 715 716 var yxr_ml = []ytab{ 717 {Zr_m_xm, 1, argList{Yxr, Yml}}, 718 } 719 720 var ymr = []ytab{ 721 {Zm_r, 1, argList{Ymr, Ymr}}, 722 } 723 724 var ymr_ml = []ytab{ 725 {Zr_m_xm, 1, argList{Ymr, Yml}}, 726 } 727 728 var yxcmpi = []ytab{ 729 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, 730 } 731 732 var yxmov = []ytab{ 733 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 734 {Zr_m_xm, 1, argList{Yxr, Yxm}}, 735 } 736 737 var yxcvfl = []ytab{ 738 {Zm_r_xm, 1, argList{Yxm, Yrl}}, 739 } 740 741 var yxcvlf = []ytab{ 742 {Zm_r_xm, 1, argList{Yml, Yxr}}, 743 } 744 745 var yxcvfq = []ytab{ 746 {Zm_r_xm, 2, argList{Yxm, Yrl}}, 747 } 748 749 var yxcvqf = []ytab{ 750 {Zm_r_xm, 2, argList{Yml, Yxr}}, 751 } 752 753 var yps = []ytab{ 754 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 755 {Zibo_m_xm, 2, argList{Yi8, Ymr}}, 756 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 757 {Zibo_m_xm, 3, argList{Yi8, Yxr}}, 758 } 759 760 var yxrrl = []ytab{ 761 {Zm_r, 1, argList{Yxr, Yrl}}, 762 } 763 764 var ymrxr = []ytab{ 765 {Zm_r, 1, argList{Ymr, Yxr}}, 766 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 767 } 768 769 var ymshuf = []ytab{ 770 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, 771 } 772 773 var ymshufb = []ytab{ 774 {Zm2_r, 2, argList{Yxm, Yxr}}, 775 } 776 777 // It should never have more than 1 entry, 778 // because some optab entries you opcode secuences that 779 // are longer than 2 bytes (zoffset=2 here), 780 // ROUNDPD and ROUNDPS and recently added BLENDPD, 781 // to name a few. 782 var yxshuf = []ytab{ 783 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 784 } 785 786 var yextrw = []ytab{ 787 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, 788 {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, 789 } 790 791 var yextr = []ytab{ 792 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, 793 } 794 795 var yinsrw = []ytab{ 796 {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, 797 } 798 799 var yinsr = []ytab{ 800 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, 801 } 802 803 var ypsdq = []ytab{ 804 {Zibo_m, 2, argList{Yi8, Yxr}}, 805 } 806 807 var ymskb = []ytab{ 808 {Zm_r_xm, 2, argList{Yxr, Yrl}}, 809 {Zm_r_xm, 1, argList{Ymr, Yrl}}, 810 } 811 812 var ycrc32l = []ytab{ 813 {Zlitm_r, 0, argList{Yml, Yrl}}, 814 } 815 816 var ycrc32b = []ytab{ 817 {Zlitm_r, 0, argList{Ymb, Yrl}}, 818 } 819 820 var yprefetch = []ytab{ 821 {Zm_o, 2, argList{Ym}}, 822 } 823 824 var yaes = []ytab{ 825 {Zlitm_r, 2, argList{Yxm, Yxr}}, 826 } 827 828 var yxbegin = []ytab{ 829 {Zjmp, 1, argList{Ybr}}, 830 } 831 832 var yxabort = []ytab{ 833 {Zib_, 1, argList{Yu8}}, 834 } 835 836 var ylddqu = []ytab{ 837 {Zm_r, 1, argList{Ym, Yxr}}, 838 } 839 840 var ypalignr = []ytab{ 841 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 842 } 843 844 var ysha256rnds2 = []ytab{ 845 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, 846 } 847 848 var yblendvpd = []ytab{ 849 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, 850 } 851 852 var ymmxmm0f38 = []ytab{ 853 {Zlitm_r, 3, argList{Ymm, Ymr}}, 854 {Zlitm_r, 5, argList{Yxm, Yxr}}, 855 } 856 857 var yextractps = []ytab{ 858 {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, 859 } 860 861 var ysha1rnds4 = []ytab{ 862 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, 863 } 864 865 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say, 866 // ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab 867 // to find the entry with the given p.As and then looks through the ytable for 868 // that instruction (the second field in the optab struct) for a line whose 869 // first two values match the Ytypes of the p.From and p.To operands. The 870 // function oclass computes the specific Ytype of an operand and then the set 871 // of more general Ytypes that it satisfies is implied by the ycover table, set 872 // up in instinit. For example, oclass distinguishes the constants 0 and 1 873 // from the more general 8-bit constants, but instinit says 874 // 875 // ycover[Yi0*Ymax+Ys32] = 1 876 // ycover[Yi1*Ymax+Ys32] = 1 877 // ycover[Yi8*Ymax+Ys32] = 1 878 // 879 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) 880 // if that's what an instruction can handle. 881 // 882 // In parallel with the scan through the ytable for the appropriate line, there 883 // is a z pointer that starts out pointing at the strange magic byte list in 884 // the Optab struct. With each step past a non-matching ytable line, z 885 // advances by the 4th entry in the line. When a matching line is found, that 886 // z pointer has the extra data to use in laying down the instruction bytes. 887 // The actual bytes laid down are a function of the 3rd entry in the line (that 888 // is, the Ztype) and the z bytes. 889 // 890 // For example, let's look at AADDL. The optab line says: 891 // 892 // {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 893 // 894 // and yaddl says 895 // 896 // var yaddl = []ytab{ 897 // {Yi8, Ynone, Yml, Zibo_m, 2}, 898 // {Yi32, Ynone, Yax, Zil_, 1}, 899 // {Yi32, Ynone, Yml, Zilo_m, 2}, 900 // {Yrl, Ynone, Yml, Zr_m, 1}, 901 // {Yml, Ynone, Yrl, Zm_r, 1}, 902 // } 903 // 904 // so there are 5 possible types of ADDL instruction that can be laid down, and 905 // possible states used to lay them down (Ztype and z pointer, assuming z 906 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: 907 // 908 // Yi8, Yml -> Zibo_m, z (0x83, 00) 909 // Yi32, Yax -> Zil_, z+2 (0x05) 910 // Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) 911 // Yrl, Yml -> Zr_m, z+2+1+2 (0x01) 912 // Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) 913 // 914 // The Pconstant in the optab line controls the prefix bytes to emit. That's 915 // relatively straightforward as this program goes. 916 // 917 // The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for 918 // example, is an opcode byte (z[0]) then an asmando (which is some kind of 919 // encoded addressing mode for the Yml arg), and then a single immediate byte. 920 // Zilo_m is the same but a long (32-bit) immediate. 921 var optab = 922 // as, ytab, andproto, opcode 923 [...]Optab{ 924 {obj.AXXX, nil, 0, opBytes{}}, 925 {AAAA, ynone, P32, opBytes{0x37}}, 926 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, 927 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, 928 {AAAS, ynone, P32, opBytes{0x3f}}, 929 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, 930 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 931 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 932 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 933 {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, 934 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, 935 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, 936 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 937 {AADDPD, yxm, Pq, opBytes{0x58}}, 938 {AADDPS, yxm, Pm, opBytes{0x58}}, 939 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 940 {AADDSD, yxm, Pf2, opBytes{0x58}}, 941 {AADDSS, yxm, Pf3, opBytes{0x58}}, 942 {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, 943 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, 944 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 945 {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, 946 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, 947 {AADJSP, nil, 0, opBytes{}}, 948 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, 949 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 950 {AANDNPD, yxm, Pq, opBytes{0x55}}, 951 {AANDNPS, yxm, Pm, opBytes{0x55}}, 952 {AANDPD, yxm, Pq, opBytes{0x54}}, 953 {AANDPS, yxm, Pm, opBytes{0x54}}, 954 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 955 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 956 {AARPL, yrl_ml, P32, opBytes{0x63}}, 957 {ABOUNDL, yrl_m, P32, opBytes{0x62}}, 958 {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, 959 {ABSFL, yml_rl, Pm, opBytes{0xbc}}, 960 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, 961 {ABSFW, yml_rl, Pq, opBytes{0xbc}}, 962 {ABSRL, yml_rl, Pm, opBytes{0xbd}}, 963 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, 964 {ABSRW, yml_rl, Pq, opBytes{0xbd}}, 965 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, 966 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, 967 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, 968 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, 969 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, 970 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, 971 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, 972 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, 973 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, 974 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, 975 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, 976 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, 977 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, 978 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, 979 {ABYTE, ybyte, Px, opBytes{1}}, 980 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, 981 {ACBW, ynone, Pe, opBytes{0x98}}, 982 {ACDQ, ynone, Px, opBytes{0x99}}, 983 {ACDQE, ynone, Pw, opBytes{0x98}}, 984 {ACLAC, ynone, Pm, opBytes{01, 0xca}}, 985 {ACLC, ynone, Px, opBytes{0xf8}}, 986 {ACLD, ynone, Px, opBytes{0xfc}}, 987 {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}}, 988 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, 989 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, 990 {ACLI, ynone, Px, opBytes{0xfa}}, 991 {ACLTS, ynone, Pm, opBytes{0x06}}, 992 {ACLWB, yclflush, Pq, opBytes{0xae, 06}}, 993 {ACMC, ynone, Px, opBytes{0xf5}}, 994 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, 995 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, 996 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, 997 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, 998 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, 999 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, 1000 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, 1001 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, 1002 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, 1003 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, 1004 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, 1005 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, 1006 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, 1007 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, 1008 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, 1009 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, 1010 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, 1011 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, 1012 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, 1013 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, 1014 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, 1015 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, 1016 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, 1017 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, 1018 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, 1019 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, 1020 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, 1021 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, 1022 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, 1023 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, 1024 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, 1025 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, 1026 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, 1027 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, 1028 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, 1029 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, 1030 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, 1031 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, 1032 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, 1033 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, 1034 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, 1035 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, 1036 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, 1037 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, 1038 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, 1039 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, 1040 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, 1041 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, 1042 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, 1043 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1044 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, 1045 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, 1046 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1047 {ACMPSB, ynone, Pb, opBytes{0xa6}}, 1048 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, 1049 {ACMPSL, ynone, Px, opBytes{0xa7}}, 1050 {ACMPSQ, ynone, Pw, opBytes{0xa7}}, 1051 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, 1052 {ACMPSW, ynone, Pe, opBytes{0xa7}}, 1053 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1054 {ACOMISD, yxm, Pe, opBytes{0x2f}}, 1055 {ACOMISS, yxm, Pm, opBytes{0x2f}}, 1056 {ACPUID, ynone, Pm, opBytes{0xa2}}, 1057 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, 1058 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, 1059 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, 1060 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, 1061 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, 1062 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, 1063 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, 1064 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, 1065 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, 1066 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, 1067 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, 1068 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, 1069 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, 1070 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, 1071 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, 1072 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, 1073 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, 1074 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, 1075 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, 1076 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, 1077 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, 1078 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, 1079 {ACWD, ynone, Pe, opBytes{0x99}}, 1080 {ACWDE, ynone, Px, opBytes{0x98}}, 1081 {ACQO, ynone, Pw, opBytes{0x99}}, 1082 {ADAA, ynone, P32, opBytes{0x27}}, 1083 {ADAS, ynone, P32, opBytes{0x2f}}, 1084 {ADECB, yscond, Pb, opBytes{0xfe, 01}}, 1085 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, 1086 {ADECQ, yincq, Pw, opBytes{0xff, 01}}, 1087 {ADECW, yincq, Pe, opBytes{0xff, 01}}, 1088 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, 1089 {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, 1090 {ADIVPD, yxm, Pe, opBytes{0x5e}}, 1091 {ADIVPS, yxm, Pm, opBytes{0x5e}}, 1092 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, 1093 {ADIVSD, yxm, Pf2, opBytes{0x5e}}, 1094 {ADIVSS, yxm, Pf3, opBytes{0x5e}}, 1095 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, 1096 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, 1097 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, 1098 {AEMMS, ynone, Pm, opBytes{0x77}}, 1099 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, 1100 {AENTER, nil, 0, opBytes{}}, // botch 1101 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, 1102 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, 1103 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, 1104 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, 1105 {AHLT, ynone, Px, opBytes{0xf4}}, 1106 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, 1107 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, 1108 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, 1109 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, 1110 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, 1111 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1112 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1113 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1114 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, 1115 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, 1116 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, 1117 {AINB, yin, Pb, opBytes{0xe4, 0xec}}, 1118 {AINW, yin, Pe, opBytes{0xe5, 0xed}}, 1119 {AINL, yin, Px, opBytes{0xe5, 0xed}}, 1120 {AINCB, yscond, Pb, opBytes{0xfe, 00}}, 1121 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, 1122 {AINCQ, yincq, Pw, opBytes{0xff, 00}}, 1123 {AINCW, yincq, Pe, opBytes{0xff, 00}}, 1124 {AINSB, ynone, Pb, opBytes{0x6c}}, 1125 {AINSL, ynone, Px, opBytes{0x6d}}, 1126 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, 1127 {AINSW, ynone, Pe, opBytes{0x6d}}, 1128 {AICEBP, ynone, Px, opBytes{0xf1}}, 1129 {AINT, yint, Px, opBytes{0xcd}}, 1130 {AINTO, ynone, P32, opBytes{0xce}}, 1131 {AIRETL, ynone, Px, opBytes{0xcf}}, 1132 {AIRETQ, ynone, Pw, opBytes{0xcf}}, 1133 {AIRETW, ynone, Pe, opBytes{0xcf}}, 1134 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, 1135 {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, 1136 {AJCXZL, yloop, Px, opBytes{0xe3}}, 1137 {AJCXZW, yloop, Px, opBytes{0xe3}}, 1138 {AJCXZQ, yloop, Px, opBytes{0xe3}}, 1139 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, 1140 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, 1141 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, 1142 {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, 1143 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, 1144 {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, 1145 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, 1146 {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, 1147 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, 1148 {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, 1149 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, 1150 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, 1151 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, 1152 {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, 1153 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, 1154 {AHADDPD, yxm, Pq, opBytes{0x7c}}, 1155 {AHADDPS, yxm, Pf2, opBytes{0x7c}}, 1156 {AHSUBPD, yxm, Pq, opBytes{0x7d}}, 1157 {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, 1158 {ALAHF, ynone, Px, opBytes{0x9f}}, 1159 {ALARL, yml_rl, Pm, opBytes{0x02}}, 1160 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, 1161 {ALARW, yml_rl, Pq, opBytes{0x02}}, 1162 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, 1163 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, 1164 {ALEAL, ym_rl, Px, opBytes{0x8d}}, 1165 {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, 1166 {ALEAVEL, ynone, P32, opBytes{0xc9}}, 1167 {ALEAVEQ, ynone, Py, opBytes{0xc9}}, 1168 {ALEAVEW, ynone, Pe, opBytes{0xc9}}, 1169 {ALEAW, ym_rl, Pe, opBytes{0x8d}}, 1170 {ALOCK, ynone, Px, opBytes{0xf0}}, 1171 {ALODSB, ynone, Pb, opBytes{0xac}}, 1172 {ALODSL, ynone, Px, opBytes{0xad}}, 1173 {ALODSQ, ynone, Pw, opBytes{0xad}}, 1174 {ALODSW, ynone, Pe, opBytes{0xad}}, 1175 {ALONG, ybyte, Px, opBytes{4}}, 1176 {ALOOP, yloop, Px, opBytes{0xe2}}, 1177 {ALOOPEQ, yloop, Px, opBytes{0xe1}}, 1178 {ALOOPNE, yloop, Px, opBytes{0xe0}}, 1179 {ALTR, ydivl, Pm, opBytes{0x00, 03}}, 1180 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, 1181 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, 1182 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, 1183 {ALSLL, yml_rl, Pm, opBytes{0x03}}, 1184 {ALSLW, yml_rl, Pq, opBytes{0x03}}, 1185 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, 1186 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, 1187 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, 1188 {AMAXPD, yxm, Pe, opBytes{0x5f}}, 1189 {AMAXPS, yxm, Pm, opBytes{0x5f}}, 1190 {AMAXSD, yxm, Pf2, opBytes{0x5f}}, 1191 {AMAXSS, yxm, Pf3, opBytes{0x5f}}, 1192 {AMINPD, yxm, Pe, opBytes{0x5d}}, 1193 {AMINPS, yxm, Pm, opBytes{0x5d}}, 1194 {AMINSD, yxm, Pf2, opBytes{0x5d}}, 1195 {AMINSS, yxm, Pf3, opBytes{0x5d}}, 1196 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, 1197 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, 1198 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, 1199 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, 1200 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, 1201 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, 1202 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, 1203 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, 1204 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, 1205 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, 1206 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, 1207 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, 1208 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, 1209 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, 1210 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, 1211 {AMOVHLPS, yxr, Pm, opBytes{0x12}}, 1212 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, 1213 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, 1214 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1215 {AMOVLHPS, yxr, Pm, opBytes{0x16}}, 1216 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, 1217 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, 1218 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, 1219 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, 1220 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, 1221 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, 1222 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, 1223 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, 1224 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, 1225 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, 1226 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, 1227 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1228 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, 1229 {AMOVSB, ynone, Pb, opBytes{0xa4}}, 1230 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, 1231 {AMOVSL, ynone, Px, opBytes{0xa5}}, 1232 {AMOVSQ, ynone, Pw, opBytes{0xa5}}, 1233 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, 1234 {AMOVSW, ynone, Pe, opBytes{0xa5}}, 1235 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, 1236 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, 1237 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, 1238 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, 1239 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, 1240 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, 1241 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, 1242 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, 1243 {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, 1244 {AMULL, ydivl, Px, opBytes{0xf7, 04}}, 1245 {AMULPD, yxm, Pe, opBytes{0x59}}, 1246 {AMULPS, yxm, Ym, opBytes{0x59}}, 1247 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, 1248 {AMULSD, yxm, Pf2, opBytes{0x59}}, 1249 {AMULSS, yxm, Pf3, opBytes{0x59}}, 1250 {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, 1251 {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, 1252 {ANEGL, yscond, Px, opBytes{0xf7, 03}}, 1253 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, 1254 {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, 1255 {obj.ANOP, ynop, Px, opBytes{0, 0}}, 1256 {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, 1257 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. 1258 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, 1259 {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, 1260 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, 1261 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1262 {AORPD, yxm, Pq, opBytes{0x56}}, 1263 {AORPS, yxm, Pm, opBytes{0x56}}, 1264 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1265 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1266 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, 1267 {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, 1268 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, 1269 {AOUTSB, ynone, Pb, opBytes{0x6e}}, 1270 {AOUTSL, ynone, Px, opBytes{0x6f}}, 1271 {AOUTSW, ynone, Pe, opBytes{0x6f}}, 1272 {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, 1273 {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, 1274 {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, 1275 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, 1276 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, 1277 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, 1278 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, 1279 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, 1280 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, 1281 {APADDQ, yxm, Pe, opBytes{0xd4}}, 1282 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, 1283 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, 1284 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, 1285 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, 1286 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, 1287 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, 1288 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, 1289 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, 1290 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, 1291 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, 1292 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, 1293 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, 1294 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, 1295 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, 1296 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, 1297 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, 1298 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, 1299 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, 1300 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, 1301 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, 1302 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, 1303 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, 1304 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, 1305 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, 1306 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, 1307 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, 1308 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, 1309 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, 1310 {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, 1311 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, 1312 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, 1313 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, 1314 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, 1315 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, 1316 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, 1317 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, 1318 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, 1319 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, 1320 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, 1321 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, 1322 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, 1323 {APMAXSW, yxm, Pe, opBytes{0xee}}, 1324 {APMAXUB, yxm, Pe, opBytes{0xde}}, 1325 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, 1326 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, 1327 {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, 1328 {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, 1329 {APMINSW, yxm, Pe, opBytes{0xea}}, 1330 {APMINUB, yxm, Pe, opBytes{0xda}}, 1331 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, 1332 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, 1333 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, 1334 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, 1335 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, 1336 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, 1337 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, 1338 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, 1339 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, 1340 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, 1341 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, 1342 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, 1343 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, 1344 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, 1345 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, 1346 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, 1347 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, 1348 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, 1349 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, 1350 {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, 1351 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, 1352 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, 1353 {APOPAL, ynone, P32, opBytes{0x61}}, 1354 {APOPAW, ynone, Pe, opBytes{0x61}}, 1355 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, 1356 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, 1357 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, 1358 {APOPFL, ynone, P32, opBytes{0x9d}}, 1359 {APOPFQ, ynone, Py, opBytes{0x9d}}, 1360 {APOPFW, ynone, Pe, opBytes{0x9d}}, 1361 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, 1362 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, 1363 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, 1364 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, 1365 {APSADBW, yxm, Pq, opBytes{0xf6}}, 1366 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, 1367 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, 1368 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, 1369 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, 1370 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, 1371 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, 1372 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, 1373 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, 1374 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, 1375 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, 1376 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, 1377 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, 1378 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, 1379 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, 1380 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, 1381 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, 1382 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, 1383 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, 1384 {APSUBB, yxm, Pe, opBytes{0xf8}}, 1385 {APSUBL, yxm, Pe, opBytes{0xfa}}, 1386 {APSUBQ, yxm, Pe, opBytes{0xfb}}, 1387 {APSUBSB, yxm, Pe, opBytes{0xe8}}, 1388 {APSUBSW, yxm, Pe, opBytes{0xe9}}, 1389 {APSUBUSB, yxm, Pe, opBytes{0xd8}}, 1390 {APSUBUSW, yxm, Pe, opBytes{0xd9}}, 1391 {APSUBW, yxm, Pe, opBytes{0xf9}}, 1392 {APTEST, yxm_q4, Pq4, opBytes{0x17}}, 1393 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, 1394 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, 1395 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, 1396 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, 1397 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, 1398 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, 1399 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, 1400 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, 1401 {APUSHAL, ynone, P32, opBytes{0x60}}, 1402 {APUSHAW, ynone, Pe, opBytes{0x60}}, 1403 {APUSHFL, ynone, P32, opBytes{0x9c}}, 1404 {APUSHFQ, ynone, Py, opBytes{0x9c}}, 1405 {APUSHFW, ynone, Pe, opBytes{0x9c}}, 1406 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1407 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1408 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1409 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, 1410 {AQUAD, ybyte, Px, opBytes{8}}, 1411 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, 1412 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1413 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1414 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1415 {ARCPPS, yxm, Pm, opBytes{0x53}}, 1416 {ARCPSS, yxm, Pf3, opBytes{0x53}}, 1417 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, 1418 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1419 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1420 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1421 {AREP, ynone, Px, opBytes{0xf3}}, 1422 {AREPN, ynone, Px, opBytes{0xf2}}, 1423 {obj.ARET, ynone, Px, opBytes{0xc3}}, 1424 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, 1425 {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, 1426 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, 1427 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, 1428 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1429 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1430 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1431 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, 1432 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1433 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1434 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1435 {ARSQRTPS, yxm, Pm, opBytes{0x52}}, 1436 {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, 1437 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL 1438 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1439 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1440 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1441 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1442 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, 1443 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1444 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1445 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1446 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, 1447 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1448 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1449 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1450 {ASCASB, ynone, Pb, opBytes{0xae}}, 1451 {ASCASL, ynone, Px, opBytes{0xaf}}, 1452 {ASCASQ, ynone, Pw, opBytes{0xaf}}, 1453 {ASCASW, ynone, Pe, opBytes{0xaf}}, 1454 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, 1455 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, 1456 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, 1457 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, 1458 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, 1459 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, 1460 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, 1461 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, 1462 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, 1463 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, 1464 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, 1465 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, 1466 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, 1467 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, 1468 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, 1469 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, 1470 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1471 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1472 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1473 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1474 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, 1475 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1476 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1477 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1478 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, 1479 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, 1480 {ASQRTPD, yxm, Pe, opBytes{0x51}}, 1481 {ASQRTPS, yxm, Pm, opBytes{0x51}}, 1482 {ASQRTSD, yxm, Pf2, opBytes{0x51}}, 1483 {ASQRTSS, yxm, Pf3, opBytes{0x51}}, 1484 {ASTC, ynone, Px, opBytes{0xf9}}, 1485 {ASTD, ynone, Px, opBytes{0xfd}}, 1486 {ASTI, ynone, Px, opBytes{0xfb}}, 1487 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, 1488 {ASTOSB, ynone, Pb, opBytes{0xaa}}, 1489 {ASTOSL, ynone, Px, opBytes{0xab}}, 1490 {ASTOSQ, ynone, Pw, opBytes{0xab}}, 1491 {ASTOSW, ynone, Pe, opBytes{0xab}}, 1492 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, 1493 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1494 {ASUBPD, yxm, Pe, opBytes{0x5c}}, 1495 {ASUBPS, yxm, Pm, opBytes{0x5c}}, 1496 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1497 {ASUBSD, yxm, Pf2, opBytes{0x5c}}, 1498 {ASUBSS, yxm, Pf3, opBytes{0x5c}}, 1499 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1500 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, 1501 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall 1502 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, 1503 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1504 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1505 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1506 {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}}, 1507 {obj.ATEXT, ytext, Px, opBytes{}}, 1508 {AUCOMISD, yxm, Pe, opBytes{0x2e}}, 1509 {AUCOMISS, yxm, Pm, opBytes{0x2e}}, 1510 {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, 1511 {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, 1512 {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, 1513 {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, 1514 {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}}, 1515 {AVERR, ydivl, Pm, opBytes{0x00, 04}}, 1516 {AVERW, ydivl, Pm, opBytes{0x00, 05}}, 1517 {AWAIT, ynone, Px, opBytes{0x9b}}, 1518 {AWORD, ybyte, Px, opBytes{2}}, 1519 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, 1520 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, 1521 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, 1522 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, 1523 {AXLAT, ynone, Px, opBytes{0xd7}}, 1524 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, 1525 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1526 {AXORPD, yxm, Pe, opBytes{0x57}}, 1527 {AXORPS, yxm, Pm, opBytes{0x57}}, 1528 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1529 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1530 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, 1531 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, 1532 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, 1533 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, 1534 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, 1535 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, 1536 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, 1537 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, 1538 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, 1539 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, 1540 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, 1541 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, 1542 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, 1543 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, 1544 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, 1545 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, 1546 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, 1547 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, 1548 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, 1549 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, 1550 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, 1551 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, 1552 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, 1553 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, 1554 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, 1555 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, 1556 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, 1557 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, 1558 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch 1559 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch 1560 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, 1561 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, 1562 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, 1563 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, 1564 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, 1565 {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, 1566 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, 1567 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, 1568 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, 1569 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, 1570 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, 1571 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, 1572 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, 1573 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, 1574 {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, 1575 {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, 1576 {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, 1577 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, 1578 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, 1579 {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, 1580 {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, 1581 {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, 1582 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, 1583 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, 1584 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, 1585 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, 1586 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, 1587 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, 1588 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, 1589 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, 1590 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, 1591 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, 1592 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, 1593 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, 1594 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, 1595 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, 1596 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, 1597 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, 1598 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, 1599 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, 1600 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, 1601 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, 1602 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, 1603 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, 1604 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, 1605 {AFFREE, nil, 0, opBytes{}}, 1606 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, 1607 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, 1608 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, 1609 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, 1610 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, 1611 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, 1612 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, 1613 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, 1614 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, 1615 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, 1616 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, 1617 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, 1618 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, 1619 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, 1620 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, 1621 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, 1622 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, 1623 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, 1624 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, 1625 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, 1626 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, 1627 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, 1628 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, 1629 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, 1630 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, 1631 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, 1632 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, 1633 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, 1634 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, 1635 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, 1636 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, 1637 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, 1638 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, 1639 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, 1640 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, 1641 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, 1642 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, 1643 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, 1644 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, 1645 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, 1646 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, 1647 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, 1648 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, 1649 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, 1650 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, 1651 {AINVD, ynone, Pm, opBytes{0x08}}, 1652 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, 1653 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, 1654 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, 1655 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, 1656 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, 1657 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, 1658 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, 1659 {ARDMSR, ynone, Pm, opBytes{0x32}}, 1660 {ARDPMC, ynone, Pm, opBytes{0x33}}, 1661 {ARDTSC, ynone, Pm, opBytes{0x31}}, 1662 {ARSM, ynone, Pm, opBytes{0xaa}}, 1663 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, 1664 {ASYSRET, ynone, Pm, opBytes{0x07}}, 1665 {AWBINVD, ynone, Pm, opBytes{0x09}}, 1666 {AWRMSR, ynone, Pm, opBytes{0x30}}, 1667 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, 1668 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, 1669 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, 1670 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, 1671 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, 1672 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, 1673 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1674 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1675 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1676 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, 1677 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, 1678 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, 1679 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, 1680 {AMOVQL, yrl_ml, Px, opBytes{0x89}}, 1681 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, 1682 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, 1683 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, 1684 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, 1685 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, 1686 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, 1687 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, 1688 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, 1689 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, 1690 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, 1691 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, 1692 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, 1693 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, 1694 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, 1695 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, 1696 {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, 1697 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, 1698 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, 1699 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, 1700 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, 1701 {AUD1, ynone, Pm, opBytes{0xb9, 0}}, 1702 {AUD2, ynone, Pm, opBytes{0x0b, 0}}, 1703 {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}}, 1704 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, 1705 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, 1706 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, 1707 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, 1708 {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, 1709 {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, 1710 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, 1711 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, 1712 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1713 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1714 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1715 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, 1716 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, 1717 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, 1718 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, 1719 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, 1720 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, 1721 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, 1722 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, 1723 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, 1724 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, 1725 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, 1726 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, 1727 {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, 1728 {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, 1729 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, 1730 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, 1731 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, 1732 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, 1733 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, 1734 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, 1735 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, 1736 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, 1737 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, 1738 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, 1739 {AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1740 {AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1741 {AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, 1742 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, 1743 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, 1744 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, 1745 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, 1746 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, 1747 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, 1748 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, 1749 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, 1750 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, 1751 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, 1752 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, 1753 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, 1754 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, 1755 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, 1756 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, 1757 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, 1758 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, 1759 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, 1760 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, 1761 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, 1762 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, 1763 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, 1764 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, 1765 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, 1766 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, 1767 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, 1768 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, 1769 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, 1770 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, 1771 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, 1772 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, 1773 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, 1774 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, 1775 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, 1776 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, 1777 1778 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, 1779 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, 1780 {AXACQUIRE, ynone, Px, opBytes{0xf2}}, 1781 {AXRELEASE, ynone, Px, opBytes{0xf3}}, 1782 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, 1783 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, 1784 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, 1785 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, 1786 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, 1787 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, 1788 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, 1789 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, 1790 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, 1791 1792 {obj.AEND, nil, 0, opBytes{}}, 1793 {0, nil, 0, opBytes{}}, 1794 } 1795 1796 var opindex [(ALAST + 1) & obj.AMask]*Optab 1797 1798 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing. 1799 // This happens on systems like Solaris that call .so functions instead of system calls. 1800 // It does not seem to be necessary for any other systems. This is probably working 1801 // around a Solaris-specific bug that should be fixed differently, but we don't know 1802 // what that bug is. And this does fix it. 1803 func useAbs(ctxt *obj.Link, s *obj.LSym) bool { 1804 if ctxt.Headtype == objabi.Hsolaris { 1805 // All the Solaris dynamic imports from libc.so begin with "libc_". 1806 return strings.HasPrefix(s.Name, "libc_") 1807 } 1808 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared 1809 } 1810 1811 // single-instruction no-ops of various lengths. 1812 // constructed by hand and disassembled with gdb to verify. 1813 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. 1814 var nop = [][16]uint8{ 1815 {0x90}, 1816 {0x66, 0x90}, 1817 {0x0F, 0x1F, 0x00}, 1818 {0x0F, 0x1F, 0x40, 0x00}, 1819 {0x0F, 0x1F, 0x44, 0x00, 0x00}, 1820 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, 1821 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, 1822 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1823 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1824 } 1825 1826 // Native Client rejects the repeated 0x66 prefix. 1827 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1828 func fillnop(p []byte, n int) { 1829 var m int 1830 1831 for n > 0 { 1832 m = n 1833 if m > len(nop) { 1834 m = len(nop) 1835 } 1836 copy(p[:m], nop[m-1][:m]) 1837 p = p[m:] 1838 n -= m 1839 } 1840 } 1841 1842 func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { 1843 s.Grow(int64(c) + int64(pad)) 1844 fillnop(s.P[c:], int(pad)) 1845 return c + pad 1846 } 1847 1848 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { 1849 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { 1850 return l 1851 } 1852 return q 1853 } 1854 1855 // isJump returns whether p is a jump instruction. 1856 // It is used to ensure that no standalone or macro-fused jump will straddle 1857 // or end on a 32 byte boundary by inserting NOPs before the jumps. 1858 func isJump(p *obj.Prog) bool { 1859 return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || 1860 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO 1861 } 1862 1863 // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional 1864 // jump. Otherwise, nil is returned. 1865 func lookForJCC(p *obj.Prog) *obj.Prog { 1866 // Skip any PCDATA, FUNCDATA or NOP instructions 1867 var q *obj.Prog 1868 for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link { 1869 } 1870 1871 if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL { 1872 return nil 1873 } 1874 1875 switch q.As { 1876 case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI, 1877 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT: 1878 default: 1879 return nil 1880 } 1881 1882 return q 1883 } 1884 1885 // fusedJump determines whether p can be fused with a subsequent conditional jump instruction. 1886 // If it can, we return true followed by the total size of the fused jump. If it can't, we return false. 1887 // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2. 1888 func fusedJump(p *obj.Prog) (bool, uint8) { 1889 var fusedSize uint8 1890 1891 // The first instruction in a macro fused pair may be preceded by the LOCK prefix, 1892 // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we 1893 // need to be careful to insert any padding before the locks rather than directly after them. 1894 1895 if p.As == AXRELEASE || p.As == AXACQUIRE { 1896 fusedSize += p.Isize 1897 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1898 } 1899 if p == nil { 1900 return false, 0 1901 } 1902 } 1903 if p.As == ALOCK { 1904 fusedSize += p.Isize 1905 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1906 } 1907 if p == nil { 1908 return false, 0 1909 } 1910 } 1911 cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW 1912 1913 cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ || 1914 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp 1915 1916 testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW || 1917 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW 1918 1919 incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW || 1920 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW 1921 1922 if !cmpAddSub && !testAnd && !incDec { 1923 return false, 0 1924 } 1925 1926 if !incDec { 1927 var argOne obj.AddrType 1928 var argTwo obj.AddrType 1929 if cmp { 1930 argOne = p.From.Type 1931 argTwo = p.To.Type 1932 } else { 1933 argOne = p.To.Type 1934 argTwo = p.From.Type 1935 } 1936 if argOne == obj.TYPE_REG { 1937 if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM { 1938 return false, 0 1939 } 1940 } else if argOne == obj.TYPE_MEM { 1941 if argTwo != obj.TYPE_REG { 1942 return false, 0 1943 } 1944 } else { 1945 return false, 0 1946 } 1947 } 1948 1949 fusedSize += p.Isize 1950 jmp := lookForJCC(p) 1951 if jmp == nil { 1952 return false, 0 1953 } 1954 1955 fusedSize += jmp.Isize 1956 1957 if testAnd { 1958 return true, fusedSize 1959 } 1960 1961 if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI || 1962 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC { 1963 return false, 0 1964 } 1965 1966 if cmpAddSub { 1967 return true, fusedSize 1968 } 1969 1970 if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS { 1971 return false, 0 1972 } 1973 1974 return true, fusedSize 1975 } 1976 1977 type padJumpsCtx int32 1978 1979 func makePjcCtx(ctxt *obj.Link) padJumpsCtx { 1980 // Disable jump padding on 32 bit builds by settting 1981 // padJumps to 0. 1982 if ctxt.Arch.Family == sys.I386 { 1983 return padJumpsCtx(0) 1984 } 1985 1986 // Disable jump padding for hand written assembly code. 1987 if ctxt.IsAsm { 1988 return padJumpsCtx(0) 1989 } 1990 1991 return padJumpsCtx(32) 1992 } 1993 1994 // padJump detects whether the instruction being assembled is a standalone or a macro-fused 1995 // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does 1996 // not cross or end on a 32 byte boundary. 1997 func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 { 1998 if pjc == 0 { 1999 return c 2000 } 2001 2002 var toPad int32 2003 fj, fjSize := fusedJump(p) 2004 mask := int32(pjc - 1) 2005 if fj { 2006 if (c&mask)+int32(fjSize) >= int32(pjc) { 2007 toPad = int32(pjc) - (c & mask) 2008 } 2009 } else if isJump(p) { 2010 if (c&mask)+int32(p.Isize) >= int32(pjc) { 2011 toPad = int32(pjc) - (c & mask) 2012 } 2013 } 2014 if toPad <= 0 { 2015 return c 2016 } 2017 2018 return noppad(ctxt, s, c, toPad) 2019 } 2020 2021 // reAssemble is called if an instruction's size changes during assembly. If 2022 // it does and the instruction is a standalone or a macro-fused jump we need to 2023 // reassemble. 2024 func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool { 2025 if pjc == 0 { 2026 return false 2027 } 2028 2029 fj, _ := fusedJump(p) 2030 return fj || isJump(p) 2031 } 2032 2033 type nopPad struct { 2034 p *obj.Prog // Instruction before the pad 2035 n int32 // Size of the pad 2036 } 2037 2038 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { 2039 if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 { 2040 ctxt.Diag("-spectre=ret not supported on 386") 2041 ctxt.Retpoline = false // don't keep printing 2042 } 2043 2044 pjc := makePjcCtx(ctxt) 2045 2046 if s.P != nil { 2047 return 2048 } 2049 2050 if ycover[0] == 0 { 2051 ctxt.Diag("x86 tables not initialized, call x86.instinit first") 2052 } 2053 2054 for p := s.Func().Text; p != nil; p = p.Link { 2055 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { 2056 p.To.SetTarget(p) 2057 } 2058 if p.As == AADJSP { 2059 p.To.Type = obj.TYPE_REG 2060 p.To.Reg = REG_SP 2061 // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive. 2062 // One exception: It is smaller to encode $-0x80 than $0x80. 2063 // For that case, flip the sign and the op: 2064 // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'. 2065 switch v := p.From.Offset; { 2066 case v == 0: 2067 p.As = obj.ANOP 2068 case v == 0x80 || (v < 0 && v != -0x80): 2069 p.As = spadjop(ctxt, AADDL, AADDQ) 2070 p.From.Offset *= -1 2071 default: 2072 p.As = spadjop(ctxt, ASUBL, ASUBQ) 2073 } 2074 } 2075 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) { 2076 if p.To.Type != obj.TYPE_REG { 2077 ctxt.Diag("non-retpoline-compatible: %v", p) 2078 continue 2079 } 2080 p.To.Type = obj.TYPE_BRANCH 2081 p.To.Name = obj.NAME_EXTERN 2082 p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg))) 2083 p.To.Reg = 0 2084 p.To.Offset = 0 2085 } 2086 } 2087 2088 var count int64 // rough count of number of instructions 2089 for p := s.Func().Text; p != nil; p = p.Link { 2090 count++ 2091 p.Back = branchShort // use short branches first time through 2092 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { 2093 p.Back |= branchBackwards 2094 q.Back |= branchLoopHead 2095 } 2096 } 2097 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction 2098 2099 var ab AsmBuf 2100 var n int 2101 var c int32 2102 errors := ctxt.Errors 2103 var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) 2104 nrelocs0 := len(s.R) 2105 for { 2106 // This loop continues while there are reasons to re-assemble 2107 // whole block, like the presence of long forward jumps. 2108 reAssemble := false 2109 for i := range s.R[nrelocs0:] { 2110 s.R[nrelocs0+i] = obj.Reloc{} 2111 } 2112 s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler 2113 s.P = s.P[:0] 2114 c = 0 2115 var pPrev *obj.Prog 2116 nops = nops[:0] 2117 for p := s.Func().Text; p != nil; p = p.Link { 2118 c0 := c 2119 c = pjc.padJump(ctxt, s, p, c) 2120 2121 if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 { 2122 // pad with NOPs 2123 v := -c & (loopAlign - 1) 2124 2125 if v <= maxLoopPad { 2126 s.Grow(int64(c) + int64(v)) 2127 fillnop(s.P[c:], int(v)) 2128 c += v 2129 } 2130 } 2131 2132 p.Pc = int64(c) 2133 2134 // process forward jumps to p 2135 for q := p.Rel; q != nil; q = q.Forwd { 2136 v := int32(p.Pc - (q.Pc + int64(q.Isize))) 2137 if q.Back&branchShort != 0 { 2138 if v > 127 { 2139 reAssemble = true 2140 q.Back ^= branchShort 2141 } 2142 2143 if q.As == AJCXZL || q.As == AXBEGIN { 2144 s.P[q.Pc+2] = byte(v) 2145 } else { 2146 s.P[q.Pc+1] = byte(v) 2147 } 2148 } else { 2149 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) 2150 } 2151 } 2152 2153 p.Rel = nil 2154 2155 p.Pc = int64(c) 2156 ab.asmins(ctxt, s, p) 2157 m := ab.Len() 2158 if int(p.Isize) != m { 2159 p.Isize = uint8(m) 2160 if pjc.reAssemble(p) { 2161 // We need to re-assemble here to check for jumps and fused jumps 2162 // that span or end on 32 byte boundaries. 2163 reAssemble = true 2164 } 2165 } 2166 2167 s.Grow(p.Pc + int64(m)) 2168 copy(s.P[p.Pc:], ab.Bytes()) 2169 // If there was padding, remember it. 2170 if pPrev != nil && !ctxt.IsAsm && c > c0 { 2171 nops = append(nops, nopPad{p: pPrev, n: c - c0}) 2172 } 2173 c += int32(m) 2174 pPrev = p 2175 } 2176 2177 n++ 2178 if n > 1000 { 2179 ctxt.Diag("span must be looping") 2180 log.Fatalf("loop") 2181 } 2182 if !reAssemble { 2183 break 2184 } 2185 if ctxt.Errors > errors { 2186 return 2187 } 2188 } 2189 // splice padding nops into Progs 2190 for _, n := range nops { 2191 pp := n.p 2192 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)} 2193 pp.Link = np 2194 } 2195 2196 s.Size = int64(c) 2197 2198 if false { /* debug['a'] > 1 */ 2199 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) 2200 var i int 2201 for i = 0; i < len(s.P); i++ { 2202 fmt.Printf(" %.2x", s.P[i]) 2203 if i%16 == 15 { 2204 fmt.Printf("\n %.6x", uint(i+1)) 2205 } 2206 } 2207 2208 if i%16 != 0 { 2209 fmt.Printf("\n") 2210 } 2211 2212 for i := 0; i < len(s.R); i++ { 2213 r := &s.R[i] 2214 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) 2215 } 2216 } 2217 2218 // Mark nonpreemptible instruction sequences. 2219 // The 2-instruction TLS access sequence 2220 // MOVQ TLS, BX 2221 // MOVQ 0(BX)(TLS*1), BX 2222 // is not async preemptible, as if it is preempted and resumed on 2223 // a different thread, the TLS address may become invalid. 2224 if !CanUse1InsnTLS(ctxt) { 2225 useTLS := func(p *obj.Prog) bool { 2226 // Only need to mark the second instruction, which has 2227 // REG_TLS as Index. (It is okay to interrupt and restart 2228 // the first instruction.) 2229 return p.From.Index == REG_TLS 2230 } 2231 obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) 2232 } 2233 2234 // Now that we know byte offsets, we can generate jump table entries. 2235 // TODO: could this live in obj instead of obj/$ARCH? 2236 for _, jt := range s.Func().JumpTables { 2237 for i, p := range jt.Targets { 2238 // The ith jumptable entry points to the p.Pc'th 2239 // byte in the function symbol s. 2240 jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc) 2241 } 2242 } 2243 } 2244 2245 func instinit(ctxt *obj.Link) { 2246 if ycover[0] != 0 { 2247 // Already initialized; stop now. 2248 // This happens in the cmd/asm tests, 2249 // each of which re-initializes the arch. 2250 return 2251 } 2252 2253 switch ctxt.Headtype { 2254 case objabi.Hplan9: 2255 plan9privates = ctxt.Lookup("_privates") 2256 } 2257 2258 for i := range avxOptab { 2259 c := avxOptab[i].as 2260 if opindex[c&obj.AMask] != nil { 2261 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) 2262 } 2263 opindex[c&obj.AMask] = &avxOptab[i] 2264 } 2265 for i := 1; optab[i].as != 0; i++ { 2266 c := optab[i].as 2267 if opindex[c&obj.AMask] != nil { 2268 ctxt.Diag("phase error in optab: %d (%v)", i, c) 2269 } 2270 opindex[c&obj.AMask] = &optab[i] 2271 } 2272 2273 for i := 0; i < Ymax; i++ { 2274 ycover[i*Ymax+i] = 1 2275 } 2276 2277 ycover[Yi0*Ymax+Yu2] = 1 2278 ycover[Yi1*Ymax+Yu2] = 1 2279 2280 ycover[Yi0*Ymax+Yi8] = 1 2281 ycover[Yi1*Ymax+Yi8] = 1 2282 ycover[Yu2*Ymax+Yi8] = 1 2283 ycover[Yu7*Ymax+Yi8] = 1 2284 2285 ycover[Yi0*Ymax+Yu7] = 1 2286 ycover[Yi1*Ymax+Yu7] = 1 2287 ycover[Yu2*Ymax+Yu7] = 1 2288 2289 ycover[Yi0*Ymax+Yu8] = 1 2290 ycover[Yi1*Ymax+Yu8] = 1 2291 ycover[Yu2*Ymax+Yu8] = 1 2292 ycover[Yu7*Ymax+Yu8] = 1 2293 2294 ycover[Yi0*Ymax+Ys32] = 1 2295 ycover[Yi1*Ymax+Ys32] = 1 2296 ycover[Yu2*Ymax+Ys32] = 1 2297 ycover[Yu7*Ymax+Ys32] = 1 2298 ycover[Yu8*Ymax+Ys32] = 1 2299 ycover[Yi8*Ymax+Ys32] = 1 2300 2301 ycover[Yi0*Ymax+Yi32] = 1 2302 ycover[Yi1*Ymax+Yi32] = 1 2303 ycover[Yu2*Ymax+Yi32] = 1 2304 ycover[Yu7*Ymax+Yi32] = 1 2305 ycover[Yu8*Ymax+Yi32] = 1 2306 ycover[Yi8*Ymax+Yi32] = 1 2307 ycover[Ys32*Ymax+Yi32] = 1 2308 2309 ycover[Yi0*Ymax+Yi64] = 1 2310 ycover[Yi1*Ymax+Yi64] = 1 2311 ycover[Yu7*Ymax+Yi64] = 1 2312 ycover[Yu2*Ymax+Yi64] = 1 2313 ycover[Yu8*Ymax+Yi64] = 1 2314 ycover[Yi8*Ymax+Yi64] = 1 2315 ycover[Ys32*Ymax+Yi64] = 1 2316 ycover[Yi32*Ymax+Yi64] = 1 2317 2318 ycover[Yal*Ymax+Yrb] = 1 2319 ycover[Ycl*Ymax+Yrb] = 1 2320 ycover[Yax*Ymax+Yrb] = 1 2321 ycover[Ycx*Ymax+Yrb] = 1 2322 ycover[Yrx*Ymax+Yrb] = 1 2323 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 2324 2325 ycover[Ycl*Ymax+Ycx] = 1 2326 2327 ycover[Yax*Ymax+Yrx] = 1 2328 ycover[Ycx*Ymax+Yrx] = 1 2329 2330 ycover[Yax*Ymax+Yrl] = 1 2331 ycover[Ycx*Ymax+Yrl] = 1 2332 ycover[Yrx*Ymax+Yrl] = 1 2333 ycover[Yrl32*Ymax+Yrl] = 1 2334 2335 ycover[Yf0*Ymax+Yrf] = 1 2336 2337 ycover[Yal*Ymax+Ymb] = 1 2338 ycover[Ycl*Ymax+Ymb] = 1 2339 ycover[Yax*Ymax+Ymb] = 1 2340 ycover[Ycx*Ymax+Ymb] = 1 2341 ycover[Yrx*Ymax+Ymb] = 1 2342 ycover[Yrb*Ymax+Ymb] = 1 2343 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 2344 ycover[Ym*Ymax+Ymb] = 1 2345 2346 ycover[Yax*Ymax+Yml] = 1 2347 ycover[Ycx*Ymax+Yml] = 1 2348 ycover[Yrx*Ymax+Yml] = 1 2349 ycover[Yrl*Ymax+Yml] = 1 2350 ycover[Yrl32*Ymax+Yml] = 1 2351 ycover[Ym*Ymax+Yml] = 1 2352 2353 ycover[Yax*Ymax+Ymm] = 1 2354 ycover[Ycx*Ymax+Ymm] = 1 2355 ycover[Yrx*Ymax+Ymm] = 1 2356 ycover[Yrl*Ymax+Ymm] = 1 2357 ycover[Yrl32*Ymax+Ymm] = 1 2358 ycover[Ym*Ymax+Ymm] = 1 2359 ycover[Ymr*Ymax+Ymm] = 1 2360 2361 ycover[Yxr0*Ymax+Yxr] = 1 2362 2363 ycover[Ym*Ymax+Yxm] = 1 2364 ycover[Yxr0*Ymax+Yxm] = 1 2365 ycover[Yxr*Ymax+Yxm] = 1 2366 2367 ycover[Ym*Ymax+Yym] = 1 2368 ycover[Yyr*Ymax+Yym] = 1 2369 2370 ycover[Yxr0*Ymax+YxrEvex] = 1 2371 ycover[Yxr*Ymax+YxrEvex] = 1 2372 2373 ycover[Ym*Ymax+YxmEvex] = 1 2374 ycover[Yxr0*Ymax+YxmEvex] = 1 2375 ycover[Yxr*Ymax+YxmEvex] = 1 2376 ycover[YxrEvex*Ymax+YxmEvex] = 1 2377 2378 ycover[Yyr*Ymax+YyrEvex] = 1 2379 2380 ycover[Ym*Ymax+YymEvex] = 1 2381 ycover[Yyr*Ymax+YymEvex] = 1 2382 ycover[YyrEvex*Ymax+YymEvex] = 1 2383 2384 ycover[Ym*Ymax+Yzm] = 1 2385 ycover[Yzr*Ymax+Yzm] = 1 2386 2387 ycover[Yk0*Ymax+Yk] = 1 2388 ycover[Yknot0*Ymax+Yk] = 1 2389 2390 ycover[Yk0*Ymax+Ykm] = 1 2391 ycover[Yknot0*Ymax+Ykm] = 1 2392 ycover[Yk*Ymax+Ykm] = 1 2393 ycover[Ym*Ymax+Ykm] = 1 2394 2395 ycover[Yxvm*Ymax+YxvmEvex] = 1 2396 2397 ycover[Yyvm*Ymax+YyvmEvex] = 1 2398 2399 for i := 0; i < MAXREG; i++ { 2400 reg[i] = -1 2401 if i >= REG_AL && i <= REG_R15B { 2402 reg[i] = (i - REG_AL) & 7 2403 if i >= REG_SPB && i <= REG_DIB { 2404 regrex[i] = 0x40 2405 } 2406 if i >= REG_R8B && i <= REG_R15B { 2407 regrex[i] = Rxr | Rxx | Rxb 2408 } 2409 } 2410 2411 if i >= REG_AH && i <= REG_BH { 2412 reg[i] = 4 + ((i - REG_AH) & 7) 2413 } 2414 if i >= REG_AX && i <= REG_R15 { 2415 reg[i] = (i - REG_AX) & 7 2416 if i >= REG_R8 { 2417 regrex[i] = Rxr | Rxx | Rxb 2418 } 2419 } 2420 2421 if i >= REG_F0 && i <= REG_F0+7 { 2422 reg[i] = (i - REG_F0) & 7 2423 } 2424 if i >= REG_M0 && i <= REG_M0+7 { 2425 reg[i] = (i - REG_M0) & 7 2426 } 2427 if i >= REG_K0 && i <= REG_K0+7 { 2428 reg[i] = (i - REG_K0) & 7 2429 } 2430 if i >= REG_X0 && i <= REG_X0+15 { 2431 reg[i] = (i - REG_X0) & 7 2432 if i >= REG_X0+8 { 2433 regrex[i] = Rxr | Rxx | Rxb 2434 } 2435 } 2436 if i >= REG_X16 && i <= REG_X16+15 { 2437 reg[i] = (i - REG_X16) & 7 2438 if i >= REG_X16+8 { 2439 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2440 } else { 2441 regrex[i] = RxrEvex 2442 } 2443 } 2444 if i >= REG_Y0 && i <= REG_Y0+15 { 2445 reg[i] = (i - REG_Y0) & 7 2446 if i >= REG_Y0+8 { 2447 regrex[i] = Rxr | Rxx | Rxb 2448 } 2449 } 2450 if i >= REG_Y16 && i <= REG_Y16+15 { 2451 reg[i] = (i - REG_Y16) & 7 2452 if i >= REG_Y16+8 { 2453 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2454 } else { 2455 regrex[i] = RxrEvex 2456 } 2457 } 2458 if i >= REG_Z0 && i <= REG_Z0+15 { 2459 reg[i] = (i - REG_Z0) & 7 2460 if i > REG_Z0+7 { 2461 regrex[i] = Rxr | Rxx | Rxb 2462 } 2463 } 2464 if i >= REG_Z16 && i <= REG_Z16+15 { 2465 reg[i] = (i - REG_Z16) & 7 2466 if i >= REG_Z16+8 { 2467 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2468 } else { 2469 regrex[i] = RxrEvex 2470 } 2471 } 2472 2473 if i >= REG_CR+8 && i <= REG_CR+15 { 2474 regrex[i] = Rxr 2475 } 2476 } 2477 } 2478 2479 var isAndroid = buildcfg.GOOS == "android" 2480 2481 func prefixof(ctxt *obj.Link, a *obj.Addr) int { 2482 if a.Reg < REG_CS && a.Index < REG_CS { // fast path 2483 return 0 2484 } 2485 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 2486 switch a.Reg { 2487 case REG_CS: 2488 return 0x2e 2489 2490 case REG_DS: 2491 return 0x3e 2492 2493 case REG_ES: 2494 return 0x26 2495 2496 case REG_FS: 2497 return 0x64 2498 2499 case REG_GS: 2500 return 0x65 2501 2502 case REG_TLS: 2503 // NOTE: Systems listed here should be only systems that 2504 // support direct TLS references like 8(TLS) implemented as 2505 // direct references from FS or GS. Systems that require 2506 // the initial-exec model, where you load the TLS base into 2507 // a register and then index from that register, do not reach 2508 // this code and should not be listed. 2509 if ctxt.Arch.Family == sys.I386 { 2510 switch ctxt.Headtype { 2511 default: 2512 if isAndroid { 2513 return 0x65 // GS 2514 } 2515 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2516 2517 case objabi.Hdarwin, 2518 objabi.Hdragonfly, 2519 objabi.Hfreebsd, 2520 objabi.Hnetbsd, 2521 objabi.Hopenbsd: 2522 return 0x65 // GS 2523 } 2524 } 2525 2526 switch ctxt.Headtype { 2527 default: 2528 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2529 2530 case objabi.Hlinux: 2531 if isAndroid { 2532 return 0x64 // FS 2533 } 2534 2535 if ctxt.Flag_shared { 2536 log.Fatalf("unknown TLS base register for linux with -shared") 2537 } else { 2538 return 0x64 // FS 2539 } 2540 2541 case objabi.Hdragonfly, 2542 objabi.Hfreebsd, 2543 objabi.Hnetbsd, 2544 objabi.Hopenbsd, 2545 objabi.Hsolaris: 2546 return 0x64 // FS 2547 2548 case objabi.Hdarwin: 2549 return 0x65 // GS 2550 } 2551 } 2552 } 2553 2554 if ctxt.Arch.Family == sys.I386 { 2555 if a.Index == REG_TLS && ctxt.Flag_shared { 2556 // When building for inclusion into a shared library, an instruction of the form 2557 // MOVL off(CX)(TLS*1), AX 2558 // becomes 2559 // mov %gs:off(%ecx), %eax 2560 // which assumes that the correct TLS offset has been loaded into %ecx (today 2561 // there is only one TLS variable -- g -- so this is OK). When not building for 2562 // a shared library the instruction it becomes 2563 // mov 0x0(%ecx), %eax 2564 // and a R_TLS_LE relocation, and so does not require a prefix. 2565 return 0x65 // GS 2566 } 2567 return 0 2568 } 2569 2570 switch a.Index { 2571 case REG_CS: 2572 return 0x2e 2573 2574 case REG_DS: 2575 return 0x3e 2576 2577 case REG_ES: 2578 return 0x26 2579 2580 case REG_TLS: 2581 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { 2582 // When building for inclusion into a shared library, an instruction of the form 2583 // MOV off(CX)(TLS*1), AX 2584 // becomes 2585 // mov %fs:off(%rcx), %rax 2586 // which assumes that the correct TLS offset has been loaded into %rcx (today 2587 // there is only one TLS variable -- g -- so this is OK). When not building for 2588 // a shared library the instruction does not require a prefix. 2589 return 0x64 2590 } 2591 2592 case REG_FS: 2593 return 0x64 2594 2595 case REG_GS: 2596 return 0x65 2597 } 2598 2599 return 0 2600 } 2601 2602 // oclassRegList returns multisource operand class for addr. 2603 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { 2604 // TODO(quasilyte): when oclass register case is refactored into 2605 // lookup table, use it here to get register kind more easily. 2606 // Helper functions like regIsXmm should go away too (they will become redundant). 2607 2608 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } 2609 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } 2610 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } 2611 2612 reg0, reg1 := decodeRegisterRange(addr.Offset) 2613 low := regIndex(int16(reg0)) 2614 high := regIndex(int16(reg1)) 2615 2616 if ctxt.Arch.Family == sys.I386 { 2617 if low >= 8 || high >= 8 { 2618 return Yxxx 2619 } 2620 } 2621 2622 switch high - low { 2623 case 3: 2624 switch { 2625 case regIsXmm(reg0) && regIsXmm(reg1): 2626 return YxrEvexMulti4 2627 case regIsYmm(reg0) && regIsYmm(reg1): 2628 return YyrEvexMulti4 2629 case regIsZmm(reg0) && regIsZmm(reg1): 2630 return YzrMulti4 2631 default: 2632 return Yxxx 2633 } 2634 default: 2635 return Yxxx 2636 } 2637 } 2638 2639 // oclassVMem returns V-mem (vector memory with VSIB) operand class. 2640 // For addr that is not V-mem returns (Yxxx, false). 2641 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { 2642 switch addr.Index { 2643 case REG_X0 + 0, 2644 REG_X0 + 1, 2645 REG_X0 + 2, 2646 REG_X0 + 3, 2647 REG_X0 + 4, 2648 REG_X0 + 5, 2649 REG_X0 + 6, 2650 REG_X0 + 7: 2651 return Yxvm, true 2652 case REG_X8 + 0, 2653 REG_X8 + 1, 2654 REG_X8 + 2, 2655 REG_X8 + 3, 2656 REG_X8 + 4, 2657 REG_X8 + 5, 2658 REG_X8 + 6, 2659 REG_X8 + 7: 2660 if ctxt.Arch.Family == sys.I386 { 2661 return Yxxx, true 2662 } 2663 return Yxvm, true 2664 case REG_X16 + 0, 2665 REG_X16 + 1, 2666 REG_X16 + 2, 2667 REG_X16 + 3, 2668 REG_X16 + 4, 2669 REG_X16 + 5, 2670 REG_X16 + 6, 2671 REG_X16 + 7, 2672 REG_X16 + 8, 2673 REG_X16 + 9, 2674 REG_X16 + 10, 2675 REG_X16 + 11, 2676 REG_X16 + 12, 2677 REG_X16 + 13, 2678 REG_X16 + 14, 2679 REG_X16 + 15: 2680 if ctxt.Arch.Family == sys.I386 { 2681 return Yxxx, true 2682 } 2683 return YxvmEvex, true 2684 2685 case REG_Y0 + 0, 2686 REG_Y0 + 1, 2687 REG_Y0 + 2, 2688 REG_Y0 + 3, 2689 REG_Y0 + 4, 2690 REG_Y0 + 5, 2691 REG_Y0 + 6, 2692 REG_Y0 + 7: 2693 return Yyvm, true 2694 case REG_Y8 + 0, 2695 REG_Y8 + 1, 2696 REG_Y8 + 2, 2697 REG_Y8 + 3, 2698 REG_Y8 + 4, 2699 REG_Y8 + 5, 2700 REG_Y8 + 6, 2701 REG_Y8 + 7: 2702 if ctxt.Arch.Family == sys.I386 { 2703 return Yxxx, true 2704 } 2705 return Yyvm, true 2706 case REG_Y16 + 0, 2707 REG_Y16 + 1, 2708 REG_Y16 + 2, 2709 REG_Y16 + 3, 2710 REG_Y16 + 4, 2711 REG_Y16 + 5, 2712 REG_Y16 + 6, 2713 REG_Y16 + 7, 2714 REG_Y16 + 8, 2715 REG_Y16 + 9, 2716 REG_Y16 + 10, 2717 REG_Y16 + 11, 2718 REG_Y16 + 12, 2719 REG_Y16 + 13, 2720 REG_Y16 + 14, 2721 REG_Y16 + 15: 2722 if ctxt.Arch.Family == sys.I386 { 2723 return Yxxx, true 2724 } 2725 return YyvmEvex, true 2726 2727 case REG_Z0 + 0, 2728 REG_Z0 + 1, 2729 REG_Z0 + 2, 2730 REG_Z0 + 3, 2731 REG_Z0 + 4, 2732 REG_Z0 + 5, 2733 REG_Z0 + 6, 2734 REG_Z0 + 7: 2735 return Yzvm, true 2736 case REG_Z8 + 0, 2737 REG_Z8 + 1, 2738 REG_Z8 + 2, 2739 REG_Z8 + 3, 2740 REG_Z8 + 4, 2741 REG_Z8 + 5, 2742 REG_Z8 + 6, 2743 REG_Z8 + 7, 2744 REG_Z8 + 8, 2745 REG_Z8 + 9, 2746 REG_Z8 + 10, 2747 REG_Z8 + 11, 2748 REG_Z8 + 12, 2749 REG_Z8 + 13, 2750 REG_Z8 + 14, 2751 REG_Z8 + 15, 2752 REG_Z8 + 16, 2753 REG_Z8 + 17, 2754 REG_Z8 + 18, 2755 REG_Z8 + 19, 2756 REG_Z8 + 20, 2757 REG_Z8 + 21, 2758 REG_Z8 + 22, 2759 REG_Z8 + 23: 2760 if ctxt.Arch.Family == sys.I386 { 2761 return Yxxx, true 2762 } 2763 return Yzvm, true 2764 } 2765 2766 return Yxxx, false 2767 } 2768 2769 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { 2770 switch a.Type { 2771 case obj.TYPE_REGLIST: 2772 return oclassRegList(ctxt, a) 2773 2774 case obj.TYPE_NONE: 2775 return Ynone 2776 2777 case obj.TYPE_BRANCH: 2778 return Ybr 2779 2780 case obj.TYPE_INDIR: 2781 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { 2782 return Yindir 2783 } 2784 return Yxxx 2785 2786 case obj.TYPE_MEM: 2787 // Pseudo registers have negative index, but SP is 2788 // not pseudo on x86, hence REG_SP check is not redundant. 2789 if a.Index == REG_SP || a.Index < 0 { 2790 // Can't use FP/SB/PC/SP as the index register. 2791 return Yxxx 2792 } 2793 2794 if vmem, ok := oclassVMem(ctxt, a); ok { 2795 return vmem 2796 } 2797 2798 if ctxt.Arch.Family == sys.AMD64 { 2799 switch a.Name { 2800 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: 2801 // Global variables can't use index registers and their 2802 // base register is %rip (%rip is encoded as REG_NONE). 2803 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { 2804 return Yxxx 2805 } 2806 case obj.NAME_AUTO, obj.NAME_PARAM: 2807 // These names must have a base of SP. The old compiler 2808 // uses 0 for the base register. SSA uses REG_SP. 2809 if a.Reg != REG_SP && a.Reg != 0 { 2810 return Yxxx 2811 } 2812 case obj.NAME_NONE: 2813 // everything is ok 2814 default: 2815 // unknown name 2816 return Yxxx 2817 } 2818 } 2819 return Ym 2820 2821 case obj.TYPE_ADDR: 2822 switch a.Name { 2823 case obj.NAME_GOTREF: 2824 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") 2825 return Yxxx 2826 2827 case obj.NAME_EXTERN, 2828 obj.NAME_STATIC: 2829 if a.Sym != nil && useAbs(ctxt, a.Sym) { 2830 return Yi32 2831 } 2832 return Yiauto // use pc-relative addressing 2833 2834 case obj.NAME_AUTO, 2835 obj.NAME_PARAM: 2836 return Yiauto 2837 } 2838 2839 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index 2840 // and got Yi32 in an earlier version of this code. 2841 // Keep doing that until we fix yduff etc. 2842 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { 2843 return Yi32 2844 } 2845 2846 if a.Sym != nil || a.Name != obj.NAME_NONE { 2847 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) 2848 } 2849 fallthrough 2850 2851 case obj.TYPE_CONST: 2852 if a.Sym != nil { 2853 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) 2854 } 2855 2856 v := a.Offset 2857 if ctxt.Arch.Family == sys.I386 { 2858 v = int64(int32(v)) 2859 } 2860 switch { 2861 case v == 0: 2862 return Yi0 2863 case v == 1: 2864 return Yi1 2865 case v >= 0 && v <= 3: 2866 return Yu2 2867 case v >= 0 && v <= 127: 2868 return Yu7 2869 case v >= 0 && v <= 255: 2870 return Yu8 2871 case v >= -128 && v <= 127: 2872 return Yi8 2873 } 2874 if ctxt.Arch.Family == sys.I386 { 2875 return Yi32 2876 } 2877 l := int32(v) 2878 if int64(l) == v { 2879 return Ys32 // can sign extend 2880 } 2881 if v>>32 == 0 { 2882 return Yi32 // unsigned 2883 } 2884 return Yi64 2885 2886 case obj.TYPE_TEXTSIZE: 2887 return Ytextsize 2888 } 2889 2890 if a.Type != obj.TYPE_REG { 2891 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) 2892 return Yxxx 2893 } 2894 2895 switch a.Reg { 2896 case REG_AL: 2897 return Yal 2898 2899 case REG_AX: 2900 return Yax 2901 2902 /* 2903 case REG_SPB: 2904 */ 2905 case REG_BPB, 2906 REG_SIB, 2907 REG_DIB, 2908 REG_R8B, 2909 REG_R9B, 2910 REG_R10B, 2911 REG_R11B, 2912 REG_R12B, 2913 REG_R13B, 2914 REG_R14B, 2915 REG_R15B: 2916 if ctxt.Arch.Family == sys.I386 { 2917 return Yxxx 2918 } 2919 fallthrough 2920 2921 case REG_DL, 2922 REG_BL, 2923 REG_AH, 2924 REG_CH, 2925 REG_DH, 2926 REG_BH: 2927 return Yrb 2928 2929 case REG_CL: 2930 return Ycl 2931 2932 case REG_CX: 2933 return Ycx 2934 2935 case REG_DX, REG_BX: 2936 return Yrx 2937 2938 case REG_R8, // not really Yrl 2939 REG_R9, 2940 REG_R10, 2941 REG_R11, 2942 REG_R12, 2943 REG_R13, 2944 REG_R14, 2945 REG_R15: 2946 if ctxt.Arch.Family == sys.I386 { 2947 return Yxxx 2948 } 2949 fallthrough 2950 2951 case REG_SP, REG_BP, REG_SI, REG_DI: 2952 if ctxt.Arch.Family == sys.I386 { 2953 return Yrl32 2954 } 2955 return Yrl 2956 2957 case REG_F0 + 0: 2958 return Yf0 2959 2960 case REG_F0 + 1, 2961 REG_F0 + 2, 2962 REG_F0 + 3, 2963 REG_F0 + 4, 2964 REG_F0 + 5, 2965 REG_F0 + 6, 2966 REG_F0 + 7: 2967 return Yrf 2968 2969 case REG_M0 + 0, 2970 REG_M0 + 1, 2971 REG_M0 + 2, 2972 REG_M0 + 3, 2973 REG_M0 + 4, 2974 REG_M0 + 5, 2975 REG_M0 + 6, 2976 REG_M0 + 7: 2977 return Ymr 2978 2979 case REG_X0: 2980 return Yxr0 2981 2982 case REG_X0 + 1, 2983 REG_X0 + 2, 2984 REG_X0 + 3, 2985 REG_X0 + 4, 2986 REG_X0 + 5, 2987 REG_X0 + 6, 2988 REG_X0 + 7, 2989 REG_X0 + 8, 2990 REG_X0 + 9, 2991 REG_X0 + 10, 2992 REG_X0 + 11, 2993 REG_X0 + 12, 2994 REG_X0 + 13, 2995 REG_X0 + 14, 2996 REG_X0 + 15: 2997 return Yxr 2998 2999 case REG_X0 + 16, 3000 REG_X0 + 17, 3001 REG_X0 + 18, 3002 REG_X0 + 19, 3003 REG_X0 + 20, 3004 REG_X0 + 21, 3005 REG_X0 + 22, 3006 REG_X0 + 23, 3007 REG_X0 + 24, 3008 REG_X0 + 25, 3009 REG_X0 + 26, 3010 REG_X0 + 27, 3011 REG_X0 + 28, 3012 REG_X0 + 29, 3013 REG_X0 + 30, 3014 REG_X0 + 31: 3015 return YxrEvex 3016 3017 case REG_Y0 + 0, 3018 REG_Y0 + 1, 3019 REG_Y0 + 2, 3020 REG_Y0 + 3, 3021 REG_Y0 + 4, 3022 REG_Y0 + 5, 3023 REG_Y0 + 6, 3024 REG_Y0 + 7, 3025 REG_Y0 + 8, 3026 REG_Y0 + 9, 3027 REG_Y0 + 10, 3028 REG_Y0 + 11, 3029 REG_Y0 + 12, 3030 REG_Y0 + 13, 3031 REG_Y0 + 14, 3032 REG_Y0 + 15: 3033 return Yyr 3034 3035 case REG_Y0 + 16, 3036 REG_Y0 + 17, 3037 REG_Y0 + 18, 3038 REG_Y0 + 19, 3039 REG_Y0 + 20, 3040 REG_Y0 + 21, 3041 REG_Y0 + 22, 3042 REG_Y0 + 23, 3043 REG_Y0 + 24, 3044 REG_Y0 + 25, 3045 REG_Y0 + 26, 3046 REG_Y0 + 27, 3047 REG_Y0 + 28, 3048 REG_Y0 + 29, 3049 REG_Y0 + 30, 3050 REG_Y0 + 31: 3051 return YyrEvex 3052 3053 case REG_Z0 + 0, 3054 REG_Z0 + 1, 3055 REG_Z0 + 2, 3056 REG_Z0 + 3, 3057 REG_Z0 + 4, 3058 REG_Z0 + 5, 3059 REG_Z0 + 6, 3060 REG_Z0 + 7: 3061 return Yzr 3062 3063 case REG_Z0 + 8, 3064 REG_Z0 + 9, 3065 REG_Z0 + 10, 3066 REG_Z0 + 11, 3067 REG_Z0 + 12, 3068 REG_Z0 + 13, 3069 REG_Z0 + 14, 3070 REG_Z0 + 15, 3071 REG_Z0 + 16, 3072 REG_Z0 + 17, 3073 REG_Z0 + 18, 3074 REG_Z0 + 19, 3075 REG_Z0 + 20, 3076 REG_Z0 + 21, 3077 REG_Z0 + 22, 3078 REG_Z0 + 23, 3079 REG_Z0 + 24, 3080 REG_Z0 + 25, 3081 REG_Z0 + 26, 3082 REG_Z0 + 27, 3083 REG_Z0 + 28, 3084 REG_Z0 + 29, 3085 REG_Z0 + 30, 3086 REG_Z0 + 31: 3087 if ctxt.Arch.Family == sys.I386 { 3088 return Yxxx 3089 } 3090 return Yzr 3091 3092 case REG_K0: 3093 return Yk0 3094 3095 case REG_K0 + 1, 3096 REG_K0 + 2, 3097 REG_K0 + 3, 3098 REG_K0 + 4, 3099 REG_K0 + 5, 3100 REG_K0 + 6, 3101 REG_K0 + 7: 3102 return Yknot0 3103 3104 case REG_CS: 3105 return Ycs 3106 case REG_SS: 3107 return Yss 3108 case REG_DS: 3109 return Yds 3110 case REG_ES: 3111 return Yes 3112 case REG_FS: 3113 return Yfs 3114 case REG_GS: 3115 return Ygs 3116 case REG_TLS: 3117 return Ytls 3118 3119 case REG_GDTR: 3120 return Ygdtr 3121 case REG_IDTR: 3122 return Yidtr 3123 case REG_LDTR: 3124 return Yldtr 3125 case REG_MSW: 3126 return Ymsw 3127 case REG_TASK: 3128 return Ytask 3129 3130 case REG_CR + 0: 3131 return Ycr0 3132 case REG_CR + 1: 3133 return Ycr1 3134 case REG_CR + 2: 3135 return Ycr2 3136 case REG_CR + 3: 3137 return Ycr3 3138 case REG_CR + 4: 3139 return Ycr4 3140 case REG_CR + 5: 3141 return Ycr5 3142 case REG_CR + 6: 3143 return Ycr6 3144 case REG_CR + 7: 3145 return Ycr7 3146 case REG_CR + 8: 3147 return Ycr8 3148 3149 case REG_DR + 0: 3150 return Ydr0 3151 case REG_DR + 1: 3152 return Ydr1 3153 case REG_DR + 2: 3154 return Ydr2 3155 case REG_DR + 3: 3156 return Ydr3 3157 case REG_DR + 4: 3158 return Ydr4 3159 case REG_DR + 5: 3160 return Ydr5 3161 case REG_DR + 6: 3162 return Ydr6 3163 case REG_DR + 7: 3164 return Ydr7 3165 3166 case REG_TR + 0: 3167 return Ytr0 3168 case REG_TR + 1: 3169 return Ytr1 3170 case REG_TR + 2: 3171 return Ytr2 3172 case REG_TR + 3: 3173 return Ytr3 3174 case REG_TR + 4: 3175 return Ytr4 3176 case REG_TR + 5: 3177 return Ytr5 3178 case REG_TR + 6: 3179 return Ytr6 3180 case REG_TR + 7: 3181 return Ytr7 3182 } 3183 3184 return Yxxx 3185 } 3186 3187 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into 3188 // and hold assembly state. 3189 type AsmBuf struct { 3190 buf [100]byte 3191 off int 3192 rexflag int 3193 vexflag bool // Per inst: true for VEX-encoded 3194 evexflag bool // Per inst: true for EVEX-encoded 3195 rep bool 3196 repn bool 3197 lock bool 3198 3199 evex evexBits // Initialized when evexflag is true 3200 } 3201 3202 // Put1 appends one byte to the end of the buffer. 3203 func (ab *AsmBuf) Put1(x byte) { 3204 ab.buf[ab.off] = x 3205 ab.off++ 3206 } 3207 3208 // Put2 appends two bytes to the end of the buffer. 3209 func (ab *AsmBuf) Put2(x, y byte) { 3210 ab.buf[ab.off+0] = x 3211 ab.buf[ab.off+1] = y 3212 ab.off += 2 3213 } 3214 3215 // Put3 appends three bytes to the end of the buffer. 3216 func (ab *AsmBuf) Put3(x, y, z byte) { 3217 ab.buf[ab.off+0] = x 3218 ab.buf[ab.off+1] = y 3219 ab.buf[ab.off+2] = z 3220 ab.off += 3 3221 } 3222 3223 // Put4 appends four bytes to the end of the buffer. 3224 func (ab *AsmBuf) Put4(x, y, z, w byte) { 3225 ab.buf[ab.off+0] = x 3226 ab.buf[ab.off+1] = y 3227 ab.buf[ab.off+2] = z 3228 ab.buf[ab.off+3] = w 3229 ab.off += 4 3230 } 3231 3232 // PutInt16 writes v into the buffer using little-endian encoding. 3233 func (ab *AsmBuf) PutInt16(v int16) { 3234 ab.buf[ab.off+0] = byte(v) 3235 ab.buf[ab.off+1] = byte(v >> 8) 3236 ab.off += 2 3237 } 3238 3239 // PutInt32 writes v into the buffer using little-endian encoding. 3240 func (ab *AsmBuf) PutInt32(v int32) { 3241 ab.buf[ab.off+0] = byte(v) 3242 ab.buf[ab.off+1] = byte(v >> 8) 3243 ab.buf[ab.off+2] = byte(v >> 16) 3244 ab.buf[ab.off+3] = byte(v >> 24) 3245 ab.off += 4 3246 } 3247 3248 // PutInt64 writes v into the buffer using little-endian encoding. 3249 func (ab *AsmBuf) PutInt64(v int64) { 3250 ab.buf[ab.off+0] = byte(v) 3251 ab.buf[ab.off+1] = byte(v >> 8) 3252 ab.buf[ab.off+2] = byte(v >> 16) 3253 ab.buf[ab.off+3] = byte(v >> 24) 3254 ab.buf[ab.off+4] = byte(v >> 32) 3255 ab.buf[ab.off+5] = byte(v >> 40) 3256 ab.buf[ab.off+6] = byte(v >> 48) 3257 ab.buf[ab.off+7] = byte(v >> 56) 3258 ab.off += 8 3259 } 3260 3261 // Put copies b into the buffer. 3262 func (ab *AsmBuf) Put(b []byte) { 3263 copy(ab.buf[ab.off:], b) 3264 ab.off += len(b) 3265 } 3266 3267 // PutOpBytesLit writes zero terminated sequence of bytes from op, 3268 // starting at specified offset (e.g. z counter value). 3269 // Trailing 0 is not written. 3270 // 3271 // Intended to be used for literal Z cases. 3272 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). 3273 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { 3274 for int(op[offset]) != 0 { 3275 ab.Put1(byte(op[offset])) 3276 offset++ 3277 } 3278 } 3279 3280 // Insert inserts b at offset i. 3281 func (ab *AsmBuf) Insert(i int, b byte) { 3282 ab.off++ 3283 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) 3284 ab.buf[i] = b 3285 } 3286 3287 // Last returns the byte at the end of the buffer. 3288 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } 3289 3290 // Len returns the length of the buffer. 3291 func (ab *AsmBuf) Len() int { return ab.off } 3292 3293 // Bytes returns the contents of the buffer. 3294 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } 3295 3296 // Reset empties the buffer. 3297 func (ab *AsmBuf) Reset() { ab.off = 0 } 3298 3299 // At returns the byte at offset i. 3300 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } 3301 3302 // asmidx emits SIB byte. 3303 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { 3304 var i int 3305 3306 // X/Y index register is used in VSIB. 3307 switch index { 3308 default: 3309 goto bad 3310 3311 case REG_NONE: 3312 i = 4 << 3 3313 goto bas 3314 3315 case REG_R8, 3316 REG_R9, 3317 REG_R10, 3318 REG_R11, 3319 REG_R12, 3320 REG_R13, 3321 REG_R14, 3322 REG_R15, 3323 REG_X8, 3324 REG_X9, 3325 REG_X10, 3326 REG_X11, 3327 REG_X12, 3328 REG_X13, 3329 REG_X14, 3330 REG_X15, 3331 REG_X16, 3332 REG_X17, 3333 REG_X18, 3334 REG_X19, 3335 REG_X20, 3336 REG_X21, 3337 REG_X22, 3338 REG_X23, 3339 REG_X24, 3340 REG_X25, 3341 REG_X26, 3342 REG_X27, 3343 REG_X28, 3344 REG_X29, 3345 REG_X30, 3346 REG_X31, 3347 REG_Y8, 3348 REG_Y9, 3349 REG_Y10, 3350 REG_Y11, 3351 REG_Y12, 3352 REG_Y13, 3353 REG_Y14, 3354 REG_Y15, 3355 REG_Y16, 3356 REG_Y17, 3357 REG_Y18, 3358 REG_Y19, 3359 REG_Y20, 3360 REG_Y21, 3361 REG_Y22, 3362 REG_Y23, 3363 REG_Y24, 3364 REG_Y25, 3365 REG_Y26, 3366 REG_Y27, 3367 REG_Y28, 3368 REG_Y29, 3369 REG_Y30, 3370 REG_Y31, 3371 REG_Z8, 3372 REG_Z9, 3373 REG_Z10, 3374 REG_Z11, 3375 REG_Z12, 3376 REG_Z13, 3377 REG_Z14, 3378 REG_Z15, 3379 REG_Z16, 3380 REG_Z17, 3381 REG_Z18, 3382 REG_Z19, 3383 REG_Z20, 3384 REG_Z21, 3385 REG_Z22, 3386 REG_Z23, 3387 REG_Z24, 3388 REG_Z25, 3389 REG_Z26, 3390 REG_Z27, 3391 REG_Z28, 3392 REG_Z29, 3393 REG_Z30, 3394 REG_Z31: 3395 if ctxt.Arch.Family == sys.I386 { 3396 goto bad 3397 } 3398 fallthrough 3399 3400 case REG_AX, 3401 REG_CX, 3402 REG_DX, 3403 REG_BX, 3404 REG_BP, 3405 REG_SI, 3406 REG_DI, 3407 REG_X0, 3408 REG_X1, 3409 REG_X2, 3410 REG_X3, 3411 REG_X4, 3412 REG_X5, 3413 REG_X6, 3414 REG_X7, 3415 REG_Y0, 3416 REG_Y1, 3417 REG_Y2, 3418 REG_Y3, 3419 REG_Y4, 3420 REG_Y5, 3421 REG_Y6, 3422 REG_Y7, 3423 REG_Z0, 3424 REG_Z1, 3425 REG_Z2, 3426 REG_Z3, 3427 REG_Z4, 3428 REG_Z5, 3429 REG_Z6, 3430 REG_Z7: 3431 i = reg[index] << 3 3432 } 3433 3434 switch scale { 3435 default: 3436 goto bad 3437 3438 case 1: 3439 break 3440 3441 case 2: 3442 i |= 1 << 6 3443 3444 case 4: 3445 i |= 2 << 6 3446 3447 case 8: 3448 i |= 3 << 6 3449 } 3450 3451 bas: 3452 switch base { 3453 default: 3454 goto bad 3455 3456 case REG_NONE: // must be mod=00 3457 i |= 5 3458 3459 case REG_R8, 3460 REG_R9, 3461 REG_R10, 3462 REG_R11, 3463 REG_R12, 3464 REG_R13, 3465 REG_R14, 3466 REG_R15: 3467 if ctxt.Arch.Family == sys.I386 { 3468 goto bad 3469 } 3470 fallthrough 3471 3472 case REG_AX, 3473 REG_CX, 3474 REG_DX, 3475 REG_BX, 3476 REG_SP, 3477 REG_BP, 3478 REG_SI, 3479 REG_DI: 3480 i |= reg[base] 3481 } 3482 3483 ab.Put1(byte(i)) 3484 return 3485 3486 bad: 3487 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) 3488 ab.Put1(0) 3489 } 3490 3491 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { 3492 var rel obj.Reloc 3493 3494 v := vaddr(ctxt, p, a, &rel) 3495 if rel.Siz != 0 { 3496 if rel.Siz != 4 { 3497 ctxt.Diag("bad reloc") 3498 } 3499 r := obj.Addrel(cursym) 3500 *r = rel 3501 r.Off = int32(p.Pc + int64(ab.Len())) 3502 } 3503 3504 ab.PutInt32(int32(v)) 3505 } 3506 3507 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { 3508 if r != nil { 3509 *r = obj.Reloc{} 3510 } 3511 3512 switch a.Name { 3513 case obj.NAME_STATIC, 3514 obj.NAME_GOTREF, 3515 obj.NAME_EXTERN: 3516 s := a.Sym 3517 if r == nil { 3518 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3519 log.Fatalf("reloc") 3520 } 3521 3522 if a.Name == obj.NAME_GOTREF { 3523 r.Siz = 4 3524 r.Type = objabi.R_GOTPCREL 3525 } else if useAbs(ctxt, s) { 3526 r.Siz = 4 3527 r.Type = objabi.R_ADDR 3528 } else { 3529 r.Siz = 4 3530 r.Type = objabi.R_PCREL 3531 } 3532 3533 r.Off = -1 // caller must fill in 3534 r.Sym = s 3535 r.Add = a.Offset 3536 3537 return 0 3538 } 3539 3540 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { 3541 if r == nil { 3542 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3543 log.Fatalf("reloc") 3544 } 3545 3546 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { 3547 r.Type = objabi.R_TLS_LE 3548 r.Siz = 4 3549 r.Off = -1 // caller must fill in 3550 r.Add = a.Offset 3551 } 3552 return 0 3553 } 3554 3555 return a.Offset 3556 } 3557 3558 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { 3559 var base int 3560 var rel obj.Reloc 3561 3562 rex &= 0x40 | Rxr 3563 if a.Offset != int64(int32(a.Offset)) { 3564 // The rules are slightly different for 386 and AMD64, 3565 // mostly for historical reasons. We may unify them later, 3566 // but it must be discussed beforehand. 3567 // 3568 // For 64bit mode only LEAL is allowed to overflow. 3569 // It's how https://golang.org/cl/59630 made it. 3570 // crypto/sha1/sha1block_amd64.s depends on this feature. 3571 // 3572 // For 32bit mode rules are more permissive. 3573 // If offset fits uint32, it's permitted. 3574 // This is allowed for assembly that wants to use 32-bit hex 3575 // constants, e.g. LEAL 0x99999999(AX), AX. 3576 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || 3577 (ctxt.Arch.Family != sys.AMD64 && 3578 int64(uint32(a.Offset)) == a.Offset && 3579 ab.rexflag&Rxw == 0) 3580 if !overflowOK { 3581 ctxt.Diag("offset too large in %s", p) 3582 } 3583 } 3584 v := int32(a.Offset) 3585 rel.Siz = 0 3586 3587 switch a.Type { 3588 case obj.TYPE_ADDR: 3589 if a.Name == obj.NAME_NONE { 3590 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") 3591 } 3592 if a.Index == REG_TLS { 3593 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") 3594 } 3595 goto bad 3596 3597 case obj.TYPE_REG: 3598 const regFirst = REG_AL 3599 const regLast = REG_Z31 3600 if a.Reg < regFirst || regLast < a.Reg { 3601 goto bad 3602 } 3603 if v != 0 { 3604 goto bad 3605 } 3606 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) 3607 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex 3608 return 3609 } 3610 3611 if a.Type != obj.TYPE_MEM { 3612 goto bad 3613 } 3614 3615 if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) { 3616 base := int(a.Reg) 3617 switch a.Name { 3618 case obj.NAME_EXTERN, 3619 obj.NAME_GOTREF, 3620 obj.NAME_STATIC: 3621 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { 3622 goto bad 3623 } 3624 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3625 // The base register has already been set. It holds the PC 3626 // of this instruction returned by a PC-reading thunk. 3627 // See obj6.go:rewriteToPcrel. 3628 } else { 3629 base = REG_NONE 3630 } 3631 v = int32(vaddr(ctxt, p, a, &rel)) 3632 3633 case obj.NAME_AUTO, 3634 obj.NAME_PARAM: 3635 base = REG_SP 3636 } 3637 3638 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex 3639 if base == REG_NONE { 3640 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3641 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3642 goto putrelv 3643 } 3644 3645 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3646 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3647 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3648 return 3649 } 3650 3651 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3652 ab.Put1(byte(1<<6 | 4<<0 | r<<3)) 3653 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3654 ab.Put1(disp8) 3655 return 3656 } 3657 3658 ab.Put1(byte(2<<6 | 4<<0 | r<<3)) 3659 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3660 goto putrelv 3661 } 3662 3663 base = int(a.Reg) 3664 switch a.Name { 3665 case obj.NAME_STATIC, 3666 obj.NAME_GOTREF, 3667 obj.NAME_EXTERN: 3668 if a.Sym == nil { 3669 ctxt.Diag("bad addr: %v", p) 3670 } 3671 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3672 // The base register has already been set. It holds the PC 3673 // of this instruction returned by a PC-reading thunk. 3674 // See obj6.go:rewriteToPcrel. 3675 } else { 3676 base = REG_NONE 3677 } 3678 v = int32(vaddr(ctxt, p, a, &rel)) 3679 3680 case obj.NAME_AUTO, 3681 obj.NAME_PARAM: 3682 base = REG_SP 3683 } 3684 3685 if base == REG_TLS { 3686 v = int32(vaddr(ctxt, p, a, &rel)) 3687 } 3688 3689 ab.rexflag |= regrex[base]&Rxb | rex 3690 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { 3691 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { 3692 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { 3693 ctxt.Diag("%v has offset against gotref", p) 3694 } 3695 ab.Put1(byte(0<<6 | 5<<0 | r<<3)) 3696 goto putrelv 3697 } 3698 3699 // temporary 3700 ab.Put2( 3701 byte(0<<6|4<<0|r<<3), // sib present 3702 0<<6|4<<3|5<<0, // DS:d32 3703 ) 3704 goto putrelv 3705 } 3706 3707 if base == REG_SP || base == REG_R12 { 3708 if v == 0 { 3709 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3710 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3711 return 3712 } 3713 3714 if disp8, ok := toDisp8(v, p, ab); ok { 3715 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) 3716 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3717 ab.Put1(disp8) 3718 return 3719 } 3720 3721 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3722 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3723 goto putrelv 3724 } 3725 3726 if REG_AX <= base && base <= REG_R15 { 3727 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid && 3728 !(ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64) { 3729 rel = obj.Reloc{} 3730 rel.Type = objabi.R_TLS_LE 3731 rel.Siz = 4 3732 rel.Sym = nil 3733 rel.Add = int64(v) 3734 v = 0 3735 } 3736 3737 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3738 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3739 return 3740 } 3741 3742 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3743 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) 3744 return 3745 } 3746 3747 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3748 goto putrelv 3749 } 3750 3751 goto bad 3752 3753 putrelv: 3754 if rel.Siz != 0 { 3755 if rel.Siz != 4 { 3756 ctxt.Diag("bad rel") 3757 goto bad 3758 } 3759 3760 r := obj.Addrel(cursym) 3761 *r = rel 3762 r.Off = int32(p.Pc + int64(ab.Len())) 3763 } 3764 3765 ab.PutInt32(v) 3766 return 3767 3768 bad: 3769 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) 3770 } 3771 3772 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { 3773 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) 3774 } 3775 3776 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { 3777 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) 3778 } 3779 3780 func bytereg(a *obj.Addr, t *uint8) { 3781 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { 3782 a.Reg += REG_AL - REG_AX 3783 *t = 0 3784 } 3785 } 3786 3787 func unbytereg(a *obj.Addr, t *uint8) { 3788 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { 3789 a.Reg += REG_AX - REG_AL 3790 *t = 0 3791 } 3792 } 3793 3794 const ( 3795 movLit uint8 = iota // Like Zlit 3796 movRegMem 3797 movMemReg 3798 movRegMem2op 3799 movMemReg2op 3800 movFullPtr // Load full pointer, trash heap (unsupported) 3801 movDoubleShift 3802 movTLSReg 3803 ) 3804 3805 var ymovtab = []movtab{ 3806 // push 3807 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, 3808 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, 3809 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, 3810 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, 3811 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3812 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3813 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3814 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3815 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, 3816 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, 3817 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, 3818 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, 3819 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, 3820 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, 3821 3822 // pop 3823 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, 3824 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, 3825 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, 3826 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3827 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3828 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3829 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3830 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, 3831 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, 3832 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, 3833 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, 3834 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, 3835 3836 // mov seg 3837 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, 3838 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, 3839 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, 3840 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, 3841 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, 3842 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, 3843 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, 3844 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, 3845 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, 3846 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, 3847 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, 3848 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, 3849 3850 // mov cr 3851 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3852 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3853 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3854 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3855 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3856 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3857 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3858 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3859 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3860 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3861 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3862 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3863 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3864 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3865 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3866 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3867 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3868 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3869 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3870 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3871 3872 // mov dr 3873 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3874 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3875 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3876 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3877 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, 3878 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, 3879 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3880 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3881 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3882 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3883 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3884 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3885 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, 3886 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, 3887 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3888 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3889 3890 // mov tr 3891 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, 3892 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, 3893 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, 3894 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, 3895 3896 // lgdt, sgdt, lidt, sidt 3897 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3898 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3899 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3900 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3901 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3902 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3903 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3904 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3905 3906 // lldt, sldt 3907 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, 3908 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, 3909 3910 // lmsw, smsw 3911 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, 3912 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, 3913 3914 // ltr, str 3915 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, 3916 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, 3917 3918 /* load full pointer - unsupported 3919 {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, 3920 {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, 3921 */ 3922 3923 // double shift 3924 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3925 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3926 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3927 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3928 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3929 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3930 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3931 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3932 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3933 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3934 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3935 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3936 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3937 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3938 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3939 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3940 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3941 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3942 3943 // load TLS base 3944 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3945 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3946 {0, 0, 0, 0, 0, [4]uint8{}}, 3947 } 3948 3949 func isax(a *obj.Addr) bool { 3950 switch a.Reg { 3951 case REG_AX, REG_AL, REG_AH: 3952 return true 3953 } 3954 3955 return a.Index == REG_AX 3956 } 3957 3958 func subreg(p *obj.Prog, from int, to int) { 3959 if false { /* debug['Q'] */ 3960 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) 3961 } 3962 3963 if int(p.From.Reg) == from { 3964 p.From.Reg = int16(to) 3965 p.Ft = 0 3966 } 3967 3968 if int(p.To.Reg) == from { 3969 p.To.Reg = int16(to) 3970 p.Tt = 0 3971 } 3972 3973 if int(p.From.Index) == from { 3974 p.From.Index = int16(to) 3975 p.Ft = 0 3976 } 3977 3978 if int(p.To.Index) == from { 3979 p.To.Index = int16(to) 3980 p.Tt = 0 3981 } 3982 3983 if false { /* debug['Q'] */ 3984 fmt.Printf("%v\n", p) 3985 } 3986 } 3987 3988 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { 3989 switch op { 3990 case Pm, Pe, Pf2, Pf3: 3991 if osize != 1 { 3992 if op != Pm { 3993 ab.Put1(byte(op)) 3994 } 3995 ab.Put1(Pm) 3996 z++ 3997 op = int(o.op[z]) 3998 break 3999 } 4000 fallthrough 4001 4002 default: 4003 if ab.Len() == 0 || ab.Last() != Pm { 4004 ab.Put1(Pm) 4005 } 4006 } 4007 4008 ab.Put1(byte(op)) 4009 return z 4010 } 4011 4012 var bpduff1 = []byte{ 4013 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) 4014 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP 4015 } 4016 4017 var bpduff2 = []byte{ 4018 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP 4019 } 4020 4021 // asmevex emits EVEX pregis and opcode byte. 4022 // In addition to asmvex r/m, vvvv and reg fields also requires optional 4023 // K-masking register. 4024 // 4025 // Expects asmbuf.evex to be properly initialized. 4026 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { 4027 ab.evexflag = true 4028 evex := ab.evex 4029 4030 rexR := byte(1) 4031 evexR := byte(1) 4032 rexX := byte(1) 4033 rexB := byte(1) 4034 if r != nil { 4035 if regrex[r.Reg]&Rxr != 0 { 4036 rexR = 0 // "ModR/M.reg" selector 4th bit. 4037 } 4038 if regrex[r.Reg]&RxrEvex != 0 { 4039 evexR = 0 // "ModR/M.reg" selector 5th bit. 4040 } 4041 } 4042 if rm != nil { 4043 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { 4044 rexX = 0 4045 } else if regrex[rm.Index]&Rxx != 0 { 4046 rexX = 0 4047 } 4048 if regrex[rm.Reg]&Rxb != 0 { 4049 rexB = 0 4050 } 4051 } 4052 // P0 = [R][X][B][R'][00][mm] 4053 p0 := (rexR << 7) | 4054 (rexX << 6) | 4055 (rexB << 5) | 4056 (evexR << 4) | 4057 (0 << 2) | 4058 (evex.M() << 0) 4059 4060 vexV := byte(0) 4061 if v != nil { 4062 // 4bit-wide reg index. 4063 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4064 } 4065 vexV ^= 0x0F 4066 // P1 = [W][vvvv][1][pp] 4067 p1 := (evex.W() << 7) | 4068 (vexV << 3) | 4069 (1 << 2) | 4070 (evex.P() << 0) 4071 4072 suffix := evexSuffixMap[p.Scond] 4073 evexZ := byte(0) 4074 evexLL := evex.L() 4075 evexB := byte(0) 4076 evexV := byte(1) 4077 evexA := byte(0) 4078 if suffix.zeroing { 4079 if !evex.ZeroingEnabled() { 4080 ctxt.Diag("unsupported zeroing: %v", p) 4081 } 4082 evexZ = 1 4083 } 4084 switch { 4085 case suffix.rounding != rcUnset: 4086 if rm != nil && rm.Type == obj.TYPE_MEM { 4087 ctxt.Diag("illegal rounding with memory argument: %v", p) 4088 } else if !evex.RoundingEnabled() { 4089 ctxt.Diag("unsupported rounding: %v", p) 4090 } 4091 evexB = 1 4092 evexLL = suffix.rounding 4093 case suffix.broadcast: 4094 if rm == nil || rm.Type != obj.TYPE_MEM { 4095 ctxt.Diag("illegal broadcast without memory argument: %v", p) 4096 } else if !evex.BroadcastEnabled() { 4097 ctxt.Diag("unsupported broadcast: %v", p) 4098 } 4099 evexB = 1 4100 case suffix.sae: 4101 if rm != nil && rm.Type == obj.TYPE_MEM { 4102 ctxt.Diag("illegal SAE with memory argument: %v", p) 4103 } else if !evex.SaeEnabled() { 4104 ctxt.Diag("unsupported SAE: %v", p) 4105 } 4106 evexB = 1 4107 } 4108 if rm != nil && regrex[rm.Index]&RxrEvex != 0 { 4109 evexV = 0 4110 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { 4111 evexV = 0 // VSR selector 5th bit. 4112 } 4113 if k != nil { 4114 evexA = byte(reg[k.Reg]) 4115 } 4116 // P2 = [z][L'L][b][V'][aaa] 4117 p2 := (evexZ << 7) | 4118 (evexLL << 5) | 4119 (evexB << 4) | 4120 (evexV << 3) | 4121 (evexA << 0) 4122 4123 const evexEscapeByte = 0x62 4124 ab.Put4(evexEscapeByte, p0, p1, p2) 4125 ab.Put1(evex.opcode) 4126 } 4127 4128 // Emit VEX prefix and opcode byte. 4129 // The three addresses are the r/m, vvvv, and reg fields. 4130 // The reg and rm arguments appear in the same order as the 4131 // arguments to asmand, which typically follows the call to asmvex. 4132 // The final two arguments are the VEX prefix (see encoding above) 4133 // and the opcode byte. 4134 // For details about vex prefix see: 4135 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description 4136 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { 4137 ab.vexflag = true 4138 rexR := 0 4139 if r != nil { 4140 rexR = regrex[r.Reg] & Rxr 4141 } 4142 rexB := 0 4143 rexX := 0 4144 if rm != nil { 4145 rexB = regrex[rm.Reg] & Rxb 4146 rexX = regrex[rm.Index] & Rxx 4147 } 4148 vexM := (vex >> 3) & 0x7 4149 vexWLP := vex & 0x87 4150 vexV := byte(0) 4151 if v != nil { 4152 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4153 } 4154 vexV ^= 0xF 4155 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { 4156 // Can use 2-byte encoding. 4157 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) 4158 } else { 4159 // Must use 3-byte encoding. 4160 ab.Put3(0xc4, 4161 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, 4162 vexV<<3|vexWLP, 4163 ) 4164 } 4165 ab.Put1(opcode) 4166 } 4167 4168 // regIndex returns register index that fits in 5 bits. 4169 // 4170 // R : 3 bit | legacy instructions | N/A 4171 // [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr 4172 // EVEX.R : 1 bit | EVEX extension bit | RxrEvex 4173 // 4174 // Examples: 4175 // 4176 // REG_Z30 => 30 4177 // REG_X15 => 15 4178 // REG_R9 => 9 4179 // REG_AX => 0 4180 func regIndex(r int16) int { 4181 lower3bits := reg[r] 4182 high4bit := regrex[r] & Rxr << 1 4183 high5bit := regrex[r] & RxrEvex << 0 4184 return lower3bits | high4bit | high5bit 4185 } 4186 4187 // avx2gatherValid reports whether p satisfies AVX2 gather constraints. 4188 // Reports errors via ctxt. 4189 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4190 // If any pair of the index, mask, or destination registers 4191 // are the same, illegal instruction trap (#UD) is triggered. 4192 index := regIndex(p.GetFrom3().Index) 4193 mask := regIndex(p.From.Reg) 4194 dest := regIndex(p.To.Reg) 4195 if dest == mask || dest == index || mask == index { 4196 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) 4197 return false 4198 } 4199 4200 return true 4201 } 4202 4203 // avx512gatherValid reports whether p satisfies AVX512 gather constraints. 4204 // Reports errors via ctxt. 4205 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4206 // Illegal instruction trap (#UD) is triggered if the destination vector 4207 // register is the same as index vector in VSIB. 4208 index := regIndex(p.From.Index) 4209 dest := regIndex(p.To.Reg) 4210 if dest == index { 4211 ctxt.Diag("index and destination registers should be distinct: %v", p) 4212 return false 4213 } 4214 4215 return true 4216 } 4217 4218 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 4219 o := opindex[p.As&obj.AMask] 4220 4221 if o == nil { 4222 ctxt.Diag("asmins: missing op %v", p) 4223 return 4224 } 4225 4226 if pre := prefixof(ctxt, &p.From); pre != 0 { 4227 ab.Put1(byte(pre)) 4228 } 4229 if pre := prefixof(ctxt, &p.To); pre != 0 { 4230 ab.Put1(byte(pre)) 4231 } 4232 4233 // Checks to warn about instruction/arguments combinations that 4234 // will unconditionally trigger illegal instruction trap (#UD). 4235 switch p.As { 4236 case AVGATHERDPD, 4237 AVGATHERQPD, 4238 AVGATHERDPS, 4239 AVGATHERQPS, 4240 AVPGATHERDD, 4241 AVPGATHERQD, 4242 AVPGATHERDQ, 4243 AVPGATHERQQ: 4244 // AVX512 gather requires explicit K mask. 4245 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { 4246 if !avx512gatherValid(ctxt, p) { 4247 return 4248 } 4249 } else { 4250 if !avx2gatherValid(ctxt, p) { 4251 return 4252 } 4253 } 4254 } 4255 4256 if p.Ft == 0 { 4257 p.Ft = uint8(oclass(ctxt, p, &p.From)) 4258 } 4259 if p.Tt == 0 { 4260 p.Tt = uint8(oclass(ctxt, p, &p.To)) 4261 } 4262 4263 ft := int(p.Ft) * Ymax 4264 var f3t int 4265 tt := int(p.Tt) * Ymax 4266 4267 xo := obj.Bool2int(o.op[0] == 0x0f) 4268 z := 0 4269 var a *obj.Addr 4270 var l int 4271 var op int 4272 var q *obj.Prog 4273 var r *obj.Reloc 4274 var rel obj.Reloc 4275 var v int64 4276 4277 args := make([]int, 0, argListMax) 4278 if ft != Ynone*Ymax { 4279 args = append(args, ft) 4280 } 4281 for i := range p.RestArgs { 4282 args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) 4283 } 4284 if tt != Ynone*Ymax { 4285 args = append(args, tt) 4286 } 4287 4288 for _, yt := range o.ytab { 4289 // ytab matching is purely args-based, 4290 // but AVX512 suffixes like "Z" or "RU_SAE" will 4291 // add EVEX-only filter that will reject non-EVEX matches. 4292 // 4293 // Consider "VADDPD.BCST 2032(DX), X0, X0". 4294 // Without this rule, operands will lead to VEX-encoded form 4295 // and produce "c5b15813" encoding. 4296 if !yt.match(args) { 4297 // "xo" is always zero for VEX/EVEX encoded insts. 4298 z += int(yt.zoffset) + xo 4299 } else { 4300 if p.Scond != 0 && !evexZcase(yt.zcase) { 4301 // Do not signal error and continue to search 4302 // for matching EVEX-encoded form. 4303 z += int(yt.zoffset) 4304 continue 4305 } 4306 4307 switch o.prefix { 4308 case Px1: // first option valid only in 32-bit mode 4309 if ctxt.Arch.Family == sys.AMD64 && z == 0 { 4310 z += int(yt.zoffset) + xo 4311 continue 4312 } 4313 case Pq: // 16 bit escape and opcode escape 4314 ab.Put2(Pe, Pm) 4315 4316 case Pq3: // 16 bit escape and opcode escape + REX.W 4317 ab.rexflag |= Pw 4318 ab.Put2(Pe, Pm) 4319 4320 case Pq4: // 66 0F 38 4321 ab.Put3(0x66, 0x0F, 0x38) 4322 4323 case Pq4w: // 66 0F 38 + REX.W 4324 ab.rexflag |= Pw 4325 ab.Put3(0x66, 0x0F, 0x38) 4326 4327 case Pq5: // F3 0F 38 4328 ab.Put3(0xF3, 0x0F, 0x38) 4329 4330 case Pq5w: // F3 0F 38 + REX.W 4331 ab.rexflag |= Pw 4332 ab.Put3(0xF3, 0x0F, 0x38) 4333 4334 case Pf2, // xmm opcode escape 4335 Pf3: 4336 ab.Put2(o.prefix, Pm) 4337 4338 case Pef3: 4339 ab.Put3(Pe, Pf3, Pm) 4340 4341 case Pfw: // xmm opcode escape + REX.W 4342 ab.rexflag |= Pw 4343 ab.Put2(Pf3, Pm) 4344 4345 case Pm: // opcode escape 4346 ab.Put1(Pm) 4347 4348 case Pe: // 16 bit escape 4349 ab.Put1(Pe) 4350 4351 case Pw: // 64-bit escape 4352 if ctxt.Arch.Family != sys.AMD64 { 4353 ctxt.Diag("asmins: illegal 64: %v", p) 4354 } 4355 ab.rexflag |= Pw 4356 4357 case Pw8: // 64-bit escape if z >= 8 4358 if z >= 8 { 4359 if ctxt.Arch.Family != sys.AMD64 { 4360 ctxt.Diag("asmins: illegal 64: %v", p) 4361 } 4362 ab.rexflag |= Pw 4363 } 4364 4365 case Pb: // botch 4366 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { 4367 goto bad 4368 } 4369 // NOTE(rsc): This is probably safe to do always, 4370 // but when enabled it chooses different encodings 4371 // than the old cmd/internal/obj/i386 code did, 4372 // which breaks our "same bits out" checks. 4373 // In particular, CMPB AX, $0 encodes as 80 f8 00 4374 // in the original obj/i386, and it would encode 4375 // (using a valid, shorter form) as 3c 00 if we enabled 4376 // the call to bytereg here. 4377 if ctxt.Arch.Family == sys.AMD64 { 4378 bytereg(&p.From, &p.Ft) 4379 bytereg(&p.To, &p.Tt) 4380 } 4381 4382 case P32: // 32 bit but illegal if 64-bit mode 4383 if ctxt.Arch.Family == sys.AMD64 { 4384 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) 4385 } 4386 4387 case Py: // 64-bit only, no prefix 4388 if ctxt.Arch.Family != sys.AMD64 { 4389 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4390 } 4391 4392 case Py1: // 64-bit only if z < 1, no prefix 4393 if z < 1 && ctxt.Arch.Family != sys.AMD64 { 4394 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4395 } 4396 4397 case Py3: // 64-bit only if z < 3, no prefix 4398 if z < 3 && ctxt.Arch.Family != sys.AMD64 { 4399 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4400 } 4401 } 4402 4403 if z >= len(o.op) { 4404 log.Fatalf("asmins bad table %v", p) 4405 } 4406 op = int(o.op[z]) 4407 if op == 0x0f { 4408 ab.Put1(byte(op)) 4409 z++ 4410 op = int(o.op[z]) 4411 } 4412 4413 switch yt.zcase { 4414 default: 4415 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) 4416 return 4417 4418 case Zpseudo: 4419 break 4420 4421 case Zlit: 4422 ab.PutOpBytesLit(z, &o.op) 4423 4424 case Zlitr_m: 4425 ab.PutOpBytesLit(z, &o.op) 4426 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4427 4428 case Zlitm_r: 4429 ab.PutOpBytesLit(z, &o.op) 4430 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4431 4432 case Zlit_m_r: 4433 ab.PutOpBytesLit(z, &o.op) 4434 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4435 4436 case Zmb_r: 4437 bytereg(&p.From, &p.Ft) 4438 fallthrough 4439 4440 case Zm_r: 4441 ab.Put1(byte(op)) 4442 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4443 4444 case Z_m_r: 4445 ab.Put1(byte(op)) 4446 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4447 4448 case Zm2_r: 4449 ab.Put2(byte(op), o.op[z+1]) 4450 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4451 4452 case Zm_r_xm: 4453 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4454 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4455 4456 case Zm_r_xm_nr: 4457 ab.rexflag = 0 4458 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4459 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4460 4461 case Zm_r_i_xm: 4462 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4463 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) 4464 ab.Put1(byte(p.To.Offset)) 4465 4466 case Zibm_r, Zibr_m: 4467 ab.PutOpBytesLit(z, &o.op) 4468 if yt.zcase == Zibr_m { 4469 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4470 } else { 4471 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4472 } 4473 switch { 4474 default: 4475 ab.Put1(byte(p.From.Offset)) 4476 case yt.args[0] == Yi32 && o.prefix == Pe: 4477 ab.PutInt16(int16(p.From.Offset)) 4478 case yt.args[0] == Yi32: 4479 ab.PutInt32(int32(p.From.Offset)) 4480 } 4481 4482 case Zaut_r: 4483 ab.Put1(0x8d) // leal 4484 if p.From.Type != obj.TYPE_ADDR { 4485 ctxt.Diag("asmins: Zaut sb type ADDR") 4486 } 4487 p.From.Type = obj.TYPE_MEM 4488 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4489 p.From.Type = obj.TYPE_ADDR 4490 4491 case Zm_o: 4492 ab.Put1(byte(op)) 4493 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4494 4495 case Zr_m: 4496 ab.Put1(byte(op)) 4497 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4498 4499 case Zvex: 4500 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4501 4502 case Zvex_rm_v_r: 4503 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4504 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4505 4506 case Zvex_rm_v_ro: 4507 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4508 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4509 4510 case Zvex_i_rm_vo: 4511 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4512 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) 4513 ab.Put1(byte(p.From.Offset)) 4514 4515 case Zvex_i_r_v: 4516 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4517 regnum := byte(0x7) 4518 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { 4519 regnum &= byte(p.GetFrom3().Reg - REG_X0) 4520 } else { 4521 regnum &= byte(p.GetFrom3().Reg - REG_Y0) 4522 } 4523 ab.Put1(o.op[z+2] | regnum) 4524 ab.Put1(byte(p.From.Offset)) 4525 4526 case Zvex_i_rm_v_r: 4527 imm, from, from3, to := unpackOps4(p) 4528 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4529 ab.asmand(ctxt, cursym, p, from, to) 4530 ab.Put1(byte(imm.Offset)) 4531 4532 case Zvex_i_rm_r: 4533 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) 4534 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4535 ab.Put1(byte(p.From.Offset)) 4536 4537 case Zvex_v_rm_r: 4538 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) 4539 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4540 4541 case Zvex_r_v_rm: 4542 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) 4543 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4544 4545 case Zvex_rm_r_vo: 4546 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) 4547 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4548 4549 case Zvex_i_r_rm: 4550 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) 4551 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4552 ab.Put1(byte(p.From.Offset)) 4553 4554 case Zvex_hr_rm_v_r: 4555 hr, from, from3, to := unpackOps4(p) 4556 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4557 ab.asmand(ctxt, cursym, p, from, to) 4558 ab.Put1(byte(regIndex(hr.Reg) << 4)) 4559 4560 case Zevex_k_rmo: 4561 ab.evex = newEVEXBits(z, &o.op) 4562 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) 4563 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) 4564 4565 case Zevex_i_rm_vo: 4566 ab.evex = newEVEXBits(z, &o.op) 4567 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) 4568 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) 4569 ab.Put1(byte(p.From.Offset)) 4570 4571 case Zevex_i_rm_k_vo: 4572 imm, from, kmask, to := unpackOps4(p) 4573 ab.evex = newEVEXBits(z, &o.op) 4574 ab.asmevex(ctxt, p, from, to, nil, kmask) 4575 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) 4576 ab.Put1(byte(imm.Offset)) 4577 4578 case Zevex_i_r_rm: 4579 ab.evex = newEVEXBits(z, &o.op) 4580 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) 4581 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4582 ab.Put1(byte(p.From.Offset)) 4583 4584 case Zevex_i_r_k_rm: 4585 imm, from, kmask, to := unpackOps4(p) 4586 ab.evex = newEVEXBits(z, &o.op) 4587 ab.asmevex(ctxt, p, to, nil, from, kmask) 4588 ab.asmand(ctxt, cursym, p, to, from) 4589 ab.Put1(byte(imm.Offset)) 4590 4591 case Zevex_i_rm_r: 4592 ab.evex = newEVEXBits(z, &o.op) 4593 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) 4594 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4595 ab.Put1(byte(p.From.Offset)) 4596 4597 case Zevex_i_rm_k_r: 4598 imm, from, kmask, to := unpackOps4(p) 4599 ab.evex = newEVEXBits(z, &o.op) 4600 ab.asmevex(ctxt, p, from, nil, to, kmask) 4601 ab.asmand(ctxt, cursym, p, from, to) 4602 ab.Put1(byte(imm.Offset)) 4603 4604 case Zevex_i_rm_v_r: 4605 imm, from, from3, to := unpackOps4(p) 4606 ab.evex = newEVEXBits(z, &o.op) 4607 ab.asmevex(ctxt, p, from, from3, to, nil) 4608 ab.asmand(ctxt, cursym, p, from, to) 4609 ab.Put1(byte(imm.Offset)) 4610 4611 case Zevex_i_rm_v_k_r: 4612 imm, from, from3, kmask, to := unpackOps5(p) 4613 ab.evex = newEVEXBits(z, &o.op) 4614 ab.asmevex(ctxt, p, from, from3, to, kmask) 4615 ab.asmand(ctxt, cursym, p, from, to) 4616 ab.Put1(byte(imm.Offset)) 4617 4618 case Zevex_r_v_rm: 4619 ab.evex = newEVEXBits(z, &o.op) 4620 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) 4621 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4622 4623 case Zevex_rm_v_r: 4624 ab.evex = newEVEXBits(z, &o.op) 4625 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) 4626 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4627 4628 case Zevex_rm_k_r: 4629 ab.evex = newEVEXBits(z, &o.op) 4630 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) 4631 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4632 4633 case Zevex_r_k_rm: 4634 ab.evex = newEVEXBits(z, &o.op) 4635 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) 4636 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4637 4638 case Zevex_rm_v_k_r: 4639 from, from3, kmask, to := unpackOps4(p) 4640 ab.evex = newEVEXBits(z, &o.op) 4641 ab.asmevex(ctxt, p, from, from3, to, kmask) 4642 ab.asmand(ctxt, cursym, p, from, to) 4643 4644 case Zevex_r_v_k_rm: 4645 from, from3, kmask, to := unpackOps4(p) 4646 ab.evex = newEVEXBits(z, &o.op) 4647 ab.asmevex(ctxt, p, to, from3, from, kmask) 4648 ab.asmand(ctxt, cursym, p, to, from) 4649 4650 case Zr_m_xm: 4651 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4652 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4653 4654 case Zr_m_xm_nr: 4655 ab.rexflag = 0 4656 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4657 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4658 4659 case Zo_m: 4660 ab.Put1(byte(op)) 4661 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4662 4663 case Zcallindreg: 4664 r = obj.Addrel(cursym) 4665 r.Off = int32(p.Pc) 4666 r.Type = objabi.R_CALLIND 4667 r.Siz = 0 4668 fallthrough 4669 4670 case Zo_m64: 4671 ab.Put1(byte(op)) 4672 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) 4673 4674 case Zm_ibo: 4675 ab.Put1(byte(op)) 4676 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4677 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) 4678 4679 case Zibo_m: 4680 ab.Put1(byte(op)) 4681 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4682 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4683 4684 case Zibo_m_xm: 4685 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4686 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4687 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4688 4689 case Z_ib, Zib_: 4690 if yt.zcase == Zib_ { 4691 a = &p.From 4692 } else { 4693 a = &p.To 4694 } 4695 ab.Put1(byte(op)) 4696 if p.As == AXABORT { 4697 ab.Put1(o.op[z+1]) 4698 } 4699 ab.Put1(byte(vaddr(ctxt, p, a, nil))) 4700 4701 case Zib_rp: 4702 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4703 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) 4704 4705 case Zil_rp: 4706 ab.rexflag |= regrex[p.To.Reg] & Rxb 4707 ab.Put1(byte(op + reg[p.To.Reg])) 4708 if o.prefix == Pe { 4709 v = vaddr(ctxt, p, &p.From, nil) 4710 ab.PutInt16(int16(v)) 4711 } else { 4712 ab.relput4(ctxt, cursym, p, &p.From) 4713 } 4714 4715 case Zo_iw: 4716 ab.Put1(byte(op)) 4717 if p.From.Type != obj.TYPE_NONE { 4718 v = vaddr(ctxt, p, &p.From, nil) 4719 ab.PutInt16(int16(v)) 4720 } 4721 4722 case Ziq_rp: 4723 v = vaddr(ctxt, p, &p.From, &rel) 4724 l = int(v >> 32) 4725 if l == 0 && rel.Siz != 8 { 4726 ab.rexflag &^= (0x40 | Rxw) 4727 4728 ab.rexflag |= regrex[p.To.Reg] & Rxb 4729 ab.Put1(byte(0xb8 + reg[p.To.Reg])) 4730 if rel.Type != 0 { 4731 r = obj.Addrel(cursym) 4732 *r = rel 4733 r.Off = int32(p.Pc + int64(ab.Len())) 4734 } 4735 4736 ab.PutInt32(int32(v)) 4737 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend 4738 ab.Put1(0xc7) 4739 ab.asmando(ctxt, cursym, p, &p.To, 0) 4740 4741 ab.PutInt32(int32(v)) // need all 8 4742 } else { 4743 ab.rexflag |= regrex[p.To.Reg] & Rxb 4744 ab.Put1(byte(op + reg[p.To.Reg])) 4745 if rel.Type != 0 { 4746 r = obj.Addrel(cursym) 4747 *r = rel 4748 r.Off = int32(p.Pc + int64(ab.Len())) 4749 } 4750 4751 ab.PutInt64(v) 4752 } 4753 4754 case Zib_rr: 4755 ab.Put1(byte(op)) 4756 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4757 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4758 4759 case Z_il, Zil_: 4760 if yt.zcase == Zil_ { 4761 a = &p.From 4762 } else { 4763 a = &p.To 4764 } 4765 ab.Put1(byte(op)) 4766 if o.prefix == Pe { 4767 v = vaddr(ctxt, p, a, nil) 4768 ab.PutInt16(int16(v)) 4769 } else { 4770 ab.relput4(ctxt, cursym, p, a) 4771 } 4772 4773 case Zm_ilo, Zilo_m: 4774 ab.Put1(byte(op)) 4775 if yt.zcase == Zilo_m { 4776 a = &p.From 4777 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4778 } else { 4779 a = &p.To 4780 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4781 } 4782 4783 if o.prefix == Pe { 4784 v = vaddr(ctxt, p, a, nil) 4785 ab.PutInt16(int16(v)) 4786 } else { 4787 ab.relput4(ctxt, cursym, p, a) 4788 } 4789 4790 case Zil_rr: 4791 ab.Put1(byte(op)) 4792 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4793 if o.prefix == Pe { 4794 v = vaddr(ctxt, p, &p.From, nil) 4795 ab.PutInt16(int16(v)) 4796 } else { 4797 ab.relput4(ctxt, cursym, p, &p.From) 4798 } 4799 4800 case Z_rp: 4801 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4802 ab.Put1(byte(op + reg[p.To.Reg])) 4803 4804 case Zrp_: 4805 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) 4806 ab.Put1(byte(op + reg[p.From.Reg])) 4807 4808 case Zcallcon, Zjmpcon: 4809 if yt.zcase == Zcallcon { 4810 ab.Put1(byte(op)) 4811 } else { 4812 ab.Put1(o.op[z+1]) 4813 } 4814 r = obj.Addrel(cursym) 4815 r.Off = int32(p.Pc + int64(ab.Len())) 4816 r.Type = objabi.R_PCREL 4817 r.Siz = 4 4818 r.Add = p.To.Offset 4819 ab.PutInt32(0) 4820 4821 case Zcallind: 4822 ab.Put2(byte(op), o.op[z+1]) 4823 r = obj.Addrel(cursym) 4824 r.Off = int32(p.Pc + int64(ab.Len())) 4825 if ctxt.Arch.Family == sys.AMD64 { 4826 r.Type = objabi.R_PCREL 4827 } else { 4828 r.Type = objabi.R_ADDR 4829 } 4830 r.Siz = 4 4831 r.Add = p.To.Offset 4832 r.Sym = p.To.Sym 4833 ab.PutInt32(0) 4834 4835 case Zcall, Zcallduff: 4836 if p.To.Sym == nil { 4837 ctxt.Diag("call without target") 4838 ctxt.DiagFlush() 4839 log.Fatalf("bad code") 4840 } 4841 4842 if yt.zcase == Zcallduff && ctxt.Flag_dynlink { 4843 ctxt.Diag("directly calling duff when dynamically linking Go") 4844 } 4845 4846 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4847 // Maintain BP around call, since duffcopy/duffzero can't do it 4848 // (the call jumps into the middle of the function). 4849 // This makes it possible to see call sites for duffcopy/duffzero in 4850 // BP-based profiling tools like Linux perf (which is the 4851 // whole point of maintaining frame pointers in Go). 4852 // MOVQ BP, -16(SP) 4853 // LEAQ -16(SP), BP 4854 ab.Put(bpduff1) 4855 } 4856 ab.Put1(byte(op)) 4857 r = obj.Addrel(cursym) 4858 r.Off = int32(p.Pc + int64(ab.Len())) 4859 r.Sym = p.To.Sym 4860 r.Add = p.To.Offset 4861 r.Type = objabi.R_CALL 4862 r.Siz = 4 4863 ab.PutInt32(0) 4864 4865 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4866 // Pop BP pushed above. 4867 // MOVQ 0(BP), BP 4868 ab.Put(bpduff2) 4869 } 4870 4871 // TODO: jump across functions needs reloc 4872 case Zbr, Zjmp, Zloop: 4873 if p.As == AXBEGIN { 4874 ab.Put1(byte(op)) 4875 } 4876 if p.To.Sym != nil { 4877 if yt.zcase != Zjmp { 4878 ctxt.Diag("branch to ATEXT") 4879 ctxt.DiagFlush() 4880 log.Fatalf("bad code") 4881 } 4882 4883 ab.Put1(o.op[z+1]) 4884 r = obj.Addrel(cursym) 4885 r.Off = int32(p.Pc + int64(ab.Len())) 4886 r.Sym = p.To.Sym 4887 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that 4888 // it can point to a trampoline instead of the destination itself. 4889 r.Type = objabi.R_CALL 4890 r.Siz = 4 4891 ab.PutInt32(0) 4892 break 4893 } 4894 4895 // Assumes q is in this function. 4896 // TODO: Check in input, preserve in brchain. 4897 4898 // Fill in backward jump now. 4899 q = p.To.Target() 4900 4901 if q == nil { 4902 ctxt.Diag("jmp/branch/loop without target") 4903 ctxt.DiagFlush() 4904 log.Fatalf("bad code") 4905 } 4906 4907 if p.Back&branchBackwards != 0 { 4908 v = q.Pc - (p.Pc + 2) 4909 if v >= -128 && p.As != AXBEGIN { 4910 if p.As == AJCXZL { 4911 ab.Put1(0x67) 4912 } 4913 ab.Put2(byte(op), byte(v)) 4914 } else if yt.zcase == Zloop { 4915 ctxt.Diag("loop too far: %v", p) 4916 } else { 4917 v -= 5 - 2 4918 if p.As == AXBEGIN { 4919 v-- 4920 } 4921 if yt.zcase == Zbr { 4922 ab.Put1(0x0f) 4923 v-- 4924 } 4925 4926 ab.Put1(o.op[z+1]) 4927 ab.PutInt32(int32(v)) 4928 } 4929 4930 break 4931 } 4932 4933 // Annotate target; will fill in later. 4934 p.Forwd = q.Rel 4935 4936 q.Rel = p 4937 if p.Back&branchShort != 0 && p.As != AXBEGIN { 4938 if p.As == AJCXZL { 4939 ab.Put1(0x67) 4940 } 4941 ab.Put2(byte(op), 0) 4942 } else if yt.zcase == Zloop { 4943 ctxt.Diag("loop too far: %v", p) 4944 } else { 4945 if yt.zcase == Zbr { 4946 ab.Put1(0x0f) 4947 } 4948 ab.Put1(o.op[z+1]) 4949 ab.PutInt32(0) 4950 } 4951 4952 case Zbyte: 4953 v = vaddr(ctxt, p, &p.From, &rel) 4954 if rel.Siz != 0 { 4955 rel.Siz = uint8(op) 4956 r = obj.Addrel(cursym) 4957 *r = rel 4958 r.Off = int32(p.Pc + int64(ab.Len())) 4959 } 4960 4961 ab.Put1(byte(v)) 4962 if op > 1 { 4963 ab.Put1(byte(v >> 8)) 4964 if op > 2 { 4965 ab.PutInt16(int16(v >> 16)) 4966 if op > 4 { 4967 ab.PutInt32(int32(v >> 32)) 4968 } 4969 } 4970 } 4971 } 4972 4973 return 4974 } 4975 } 4976 f3t = Ynone * Ymax 4977 if p.GetFrom3() != nil { 4978 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax 4979 } 4980 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { 4981 var pp obj.Prog 4982 var t []byte 4983 if p.As == mo[0].as { 4984 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { 4985 t = mo[0].op[:] 4986 switch mo[0].code { 4987 default: 4988 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) 4989 4990 case movLit: 4991 for z = 0; t[z] != 0; z++ { 4992 ab.Put1(t[z]) 4993 } 4994 4995 case movRegMem: 4996 ab.Put1(t[0]) 4997 ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) 4998 4999 case movMemReg: 5000 ab.Put1(t[0]) 5001 ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) 5002 5003 case movRegMem2op: // r,m - 2op 5004 ab.Put2(t[0], t[1]) 5005 ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) 5006 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) 5007 5008 case movMemReg2op: 5009 ab.Put2(t[0], t[1]) 5010 ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) 5011 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) 5012 5013 case movFullPtr: 5014 if t[0] != 0 { 5015 ab.Put1(t[0]) 5016 } 5017 switch p.To.Index { 5018 default: 5019 goto bad 5020 5021 case REG_DS: 5022 ab.Put1(0xc5) 5023 5024 case REG_SS: 5025 ab.Put2(0x0f, 0xb2) 5026 5027 case REG_ES: 5028 ab.Put1(0xc4) 5029 5030 case REG_FS: 5031 ab.Put2(0x0f, 0xb4) 5032 5033 case REG_GS: 5034 ab.Put2(0x0f, 0xb5) 5035 } 5036 5037 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 5038 5039 case movDoubleShift: 5040 if t[0] == Pw { 5041 if ctxt.Arch.Family != sys.AMD64 { 5042 ctxt.Diag("asmins: illegal 64: %v", p) 5043 } 5044 ab.rexflag |= Pw 5045 t = t[1:] 5046 } else if t[0] == Pe { 5047 ab.Put1(Pe) 5048 t = t[1:] 5049 } 5050 5051 switch p.From.Type { 5052 default: 5053 goto bad 5054 5055 case obj.TYPE_CONST: 5056 ab.Put2(0x0f, t[0]) 5057 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5058 ab.Put1(byte(p.From.Offset)) 5059 5060 case obj.TYPE_REG: 5061 switch p.From.Reg { 5062 default: 5063 goto bad 5064 5065 case REG_CL, REG_CX: 5066 ab.Put2(0x0f, t[1]) 5067 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5068 } 5069 } 5070 5071 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5072 // where you load the TLS base register into a register and then index off that 5073 // register to access the actual TLS variables. Systems that allow direct TLS access 5074 // are handled in prefixof above and should not be listed here. 5075 case movTLSReg: 5076 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { 5077 ctxt.Diag("invalid load of TLS: %v", p) 5078 } 5079 5080 if ctxt.Arch.Family == sys.I386 { 5081 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5082 // where you load the TLS base register into a register and then index off that 5083 // register to access the actual TLS variables. Systems that allow direct TLS access 5084 // are handled in prefixof above and should not be listed here. 5085 switch ctxt.Headtype { 5086 default: 5087 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5088 5089 case objabi.Hlinux, objabi.Hfreebsd: 5090 if ctxt.Flag_shared { 5091 // Note that this is not generating the same insns as the other cases. 5092 // MOV TLS, dst 5093 // becomes 5094 // call __x86.get_pc_thunk.dst 5095 // movl (gotpc + g@gotntpoff)(dst), dst 5096 // which is encoded as 5097 // call __x86.get_pc_thunk.dst 5098 // movq 0(dst), dst 5099 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access 5100 // is g, which we can't check here, but will when we assemble the second 5101 // instruction. 5102 dst := p.To.Reg 5103 ab.Put1(0xe8) 5104 r = obj.Addrel(cursym) 5105 r.Off = int32(p.Pc + int64(ab.Len())) 5106 r.Type = objabi.R_CALL 5107 r.Siz = 4 5108 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) 5109 ab.PutInt32(0) 5110 5111 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) 5112 r = obj.Addrel(cursym) 5113 r.Off = int32(p.Pc + int64(ab.Len())) 5114 r.Type = objabi.R_TLS_IE 5115 r.Siz = 4 5116 r.Add = 2 5117 ab.PutInt32(0) 5118 } else { 5119 // ELF TLS base is 0(GS). 5120 pp.From = p.From 5121 5122 pp.From.Type = obj.TYPE_MEM 5123 pp.From.Reg = REG_GS 5124 pp.From.Offset = 0 5125 pp.From.Index = REG_NONE 5126 pp.From.Scale = 0 5127 ab.Put2(0x65, // GS 5128 0x8B) 5129 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5130 } 5131 case objabi.Hplan9: 5132 pp.From = obj.Addr{} 5133 pp.From.Type = obj.TYPE_MEM 5134 pp.From.Name = obj.NAME_EXTERN 5135 pp.From.Sym = plan9privates 5136 pp.From.Offset = 0 5137 pp.From.Index = REG_NONE 5138 ab.Put1(0x8B) 5139 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5140 5141 case objabi.Hwindows: 5142 // Windows TLS base is always 0x14(FS). 5143 pp.From = p.From 5144 5145 pp.From.Type = obj.TYPE_MEM 5146 pp.From.Reg = REG_FS 5147 pp.From.Offset = 0x14 5148 pp.From.Index = REG_NONE 5149 pp.From.Scale = 0 5150 ab.Put2(0x64, // FS 5151 0x8B) 5152 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5153 } 5154 break 5155 } 5156 5157 switch ctxt.Headtype { 5158 default: 5159 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5160 5161 case objabi.Hlinux, objabi.Hfreebsd: 5162 if !ctxt.Flag_shared { 5163 log.Fatalf("unknown TLS base location for linux/freebsd without -shared") 5164 } 5165 // Note that this is not generating the same insn as the other cases. 5166 // MOV TLS, R_to 5167 // becomes 5168 // movq g@gottpoff(%rip), R_to 5169 // which is encoded as 5170 // movq 0(%rip), R_to 5171 // and a R_TLS_IE reloc. This all assumes the only tls variable we access 5172 // is g, which we can't check here, but will when we assemble the second 5173 // instruction. 5174 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) 5175 5176 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) 5177 r = obj.Addrel(cursym) 5178 r.Off = int32(p.Pc + int64(ab.Len())) 5179 r.Type = objabi.R_TLS_IE 5180 r.Siz = 4 5181 r.Add = -4 5182 ab.PutInt32(0) 5183 5184 case objabi.Hplan9: 5185 pp.From = obj.Addr{} 5186 pp.From.Type = obj.TYPE_MEM 5187 pp.From.Name = obj.NAME_EXTERN 5188 pp.From.Sym = plan9privates 5189 pp.From.Offset = 0 5190 pp.From.Index = REG_NONE 5191 ab.rexflag |= Pw 5192 ab.Put1(0x8B) 5193 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5194 5195 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. 5196 // TLS base is 0(FS). 5197 pp.From = p.From 5198 5199 pp.From.Type = obj.TYPE_MEM 5200 pp.From.Name = obj.NAME_NONE 5201 pp.From.Reg = REG_NONE 5202 pp.From.Offset = 0 5203 pp.From.Index = REG_NONE 5204 pp.From.Scale = 0 5205 ab.rexflag |= Pw 5206 ab.Put2(0x64, // FS 5207 0x8B) 5208 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5209 } 5210 } 5211 return 5212 } 5213 } 5214 } 5215 goto bad 5216 5217 bad: 5218 if ctxt.Arch.Family != sys.AMD64 { 5219 // here, the assembly has failed. 5220 // if it's a byte instruction that has 5221 // unaddressable registers, try to 5222 // exchange registers and reissue the 5223 // instruction with the operands renamed. 5224 pp := *p 5225 5226 unbytereg(&pp.From, &pp.Ft) 5227 unbytereg(&pp.To, &pp.Tt) 5228 5229 z := int(p.From.Reg) 5230 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5231 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5232 // For now, different to keep bit-for-bit compatibility. 5233 if ctxt.Arch.Family == sys.I386 { 5234 breg := byteswapreg(ctxt, &p.To) 5235 if breg != REG_AX { 5236 ab.Put1(0x87) // xchg lhs,bx 5237 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5238 subreg(&pp, z, breg) 5239 ab.doasm(ctxt, cursym, &pp) 5240 ab.Put1(0x87) // xchg lhs,bx 5241 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5242 } else { 5243 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5244 subreg(&pp, z, REG_AX) 5245 ab.doasm(ctxt, cursym, &pp) 5246 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5247 } 5248 return 5249 } 5250 5251 if isax(&p.To) || p.To.Type == obj.TYPE_NONE { 5252 // We certainly don't want to exchange 5253 // with AX if the op is MUL or DIV. 5254 ab.Put1(0x87) // xchg lhs,bx 5255 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5256 subreg(&pp, z, REG_BX) 5257 ab.doasm(ctxt, cursym, &pp) 5258 ab.Put1(0x87) // xchg lhs,bx 5259 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5260 } else { 5261 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5262 subreg(&pp, z, REG_AX) 5263 ab.doasm(ctxt, cursym, &pp) 5264 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5265 } 5266 return 5267 } 5268 5269 z = int(p.To.Reg) 5270 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5271 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5272 // For now, different to keep bit-for-bit compatibility. 5273 if ctxt.Arch.Family == sys.I386 { 5274 breg := byteswapreg(ctxt, &p.From) 5275 if breg != REG_AX { 5276 ab.Put1(0x87) //xchg rhs,bx 5277 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5278 subreg(&pp, z, breg) 5279 ab.doasm(ctxt, cursym, &pp) 5280 ab.Put1(0x87) // xchg rhs,bx 5281 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5282 } else { 5283 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5284 subreg(&pp, z, REG_AX) 5285 ab.doasm(ctxt, cursym, &pp) 5286 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5287 } 5288 return 5289 } 5290 5291 if isax(&p.From) { 5292 ab.Put1(0x87) // xchg rhs,bx 5293 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5294 subreg(&pp, z, REG_BX) 5295 ab.doasm(ctxt, cursym, &pp) 5296 ab.Put1(0x87) // xchg rhs,bx 5297 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5298 } else { 5299 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5300 subreg(&pp, z, REG_AX) 5301 ab.doasm(ctxt, cursym, &pp) 5302 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5303 } 5304 return 5305 } 5306 } 5307 5308 ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p) 5309 } 5310 5311 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX) 5312 // which is not referenced in a. 5313 // If a is empty, it returns BX to account for MULB-like instructions 5314 // that might use DX and AX. 5315 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { 5316 cana, canb, canc, cand := true, true, true, true 5317 if a.Type == obj.TYPE_NONE { 5318 cana, cand = false, false 5319 } 5320 5321 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { 5322 switch a.Reg { 5323 case REG_NONE: 5324 cana, cand = false, false 5325 case REG_AX, REG_AL, REG_AH: 5326 cana = false 5327 case REG_BX, REG_BL, REG_BH: 5328 canb = false 5329 case REG_CX, REG_CL, REG_CH: 5330 canc = false 5331 case REG_DX, REG_DL, REG_DH: 5332 cand = false 5333 } 5334 } 5335 5336 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { 5337 switch a.Index { 5338 case REG_AX: 5339 cana = false 5340 case REG_BX: 5341 canb = false 5342 case REG_CX: 5343 canc = false 5344 case REG_DX: 5345 cand = false 5346 } 5347 } 5348 5349 switch { 5350 case cana: 5351 return REG_AX 5352 case canb: 5353 return REG_BX 5354 case canc: 5355 return REG_CX 5356 case cand: 5357 return REG_DX 5358 default: 5359 ctxt.Diag("impossible byte register") 5360 ctxt.DiagFlush() 5361 log.Fatalf("bad code") 5362 return 0 5363 } 5364 } 5365 5366 func isbadbyte(a *obj.Addr) bool { 5367 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) 5368 } 5369 5370 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 5371 ab.Reset() 5372 5373 ab.rexflag = 0 5374 ab.vexflag = false 5375 ab.evexflag = false 5376 mark := ab.Len() 5377 ab.doasm(ctxt, cursym, p) 5378 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5379 // as befits the whole approach of the architecture, 5380 // the rex prefix must appear before the first opcode byte 5381 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but 5382 // before the 0f opcode escape!), or it might be ignored. 5383 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. 5384 if ctxt.Arch.Family != sys.AMD64 { 5385 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) 5386 } 5387 n := ab.Len() 5388 var np int 5389 for np = mark; np < n; np++ { 5390 c := ab.At(np) 5391 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { 5392 break 5393 } 5394 } 5395 ab.Insert(np, byte(0x40|ab.rexflag)) 5396 } 5397 5398 n := ab.Len() 5399 for i := len(cursym.R) - 1; i >= 0; i-- { 5400 r := &cursym.R[i] 5401 if int64(r.Off) < p.Pc { 5402 break 5403 } 5404 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5405 r.Off++ 5406 } 5407 if r.Type == objabi.R_PCREL { 5408 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { 5409 // PC-relative addressing is relative to the end of the instruction, 5410 // but the relocations applied by the linker are relative to the end 5411 // of the relocation. Because immediate instruction 5412 // arguments can follow the PC-relative memory reference in the 5413 // instruction encoding, the two may not coincide. In this case, 5414 // adjust addend so that linker can keep relocating relative to the 5415 // end of the relocation. 5416 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) 5417 } else if ctxt.Arch.Family == sys.I386 { 5418 // On 386 PC-relative addressing (for non-call/jmp instructions) 5419 // assumes that the previous instruction loaded the PC of the end 5420 // of that instruction into CX, so the adjustment is relative to 5421 // that. 5422 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5423 } 5424 } 5425 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { 5426 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. 5427 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5428 } 5429 5430 } 5431 } 5432 5433 // unpackOps4 extracts 4 operands from p. 5434 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { 5435 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To 5436 } 5437 5438 // unpackOps5 extracts 5 operands from p. 5439 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { 5440 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To 5441 }