github.com/bir3/gocompiler@v0.9.2202/src/cmd/internal/obj/x86/asm6.go (about) 1 // Inferno utils/6l/span.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "github.com/bir3/gocompiler/src/cmd/internal/obj" 35 "github.com/bir3/gocompiler/src/cmd/internal/objabi" 36 "github.com/bir3/gocompiler/src/cmd/internal/sys" 37 "encoding/binary" 38 "fmt" 39 "github.com/bir3/gocompiler/src/internal/buildcfg" 40 "log" 41 "strings" 42 ) 43 44 var ( 45 plan9privates *obj.LSym 46 ) 47 48 // Instruction layout. 49 50 // Loop alignment constants: 51 // want to align loop entry to loopAlign-byte boundary, 52 // and willing to insert at most maxLoopPad bytes of NOP to do so. 53 // We define a loop entry as the target of a backward jump. 54 // 55 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config, 56 // and it aligns all jump targets, not just backward jump targets. 57 // 58 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here 59 // is very slight but negative, so the alignment is disabled by 60 // setting MaxLoopPad = 0. The code is here for reference and 61 // for future experiments. 62 const ( 63 loopAlign = 16 64 maxLoopPad = 0 65 ) 66 67 // Bit flags that are used to express jump target properties. 68 const ( 69 // branchBackwards marks targets that are located behind. 70 // Used to express jumps to loop headers. 71 branchBackwards = (1 << iota) 72 // branchShort marks branches those target is close, 73 // with offset is in -128..127 range. 74 branchShort 75 // branchLoopHead marks loop entry. 76 // Used to insert padding for misaligned loops. 77 branchLoopHead 78 ) 79 80 // opBytes holds optab encoding bytes. 81 // Each ytab reserves fixed amount of bytes in this array. 82 // 83 // The size should be the minimal number of bytes that 84 // are enough to hold biggest optab op lines. 85 type opBytes [31]uint8 86 87 type Optab struct { 88 as obj.As 89 ytab []ytab 90 prefix uint8 91 op opBytes 92 } 93 94 type movtab struct { 95 as obj.As 96 ft uint8 97 f3t uint8 98 tt uint8 99 code uint8 100 op [4]uint8 101 } 102 103 const ( 104 Yxxx = iota 105 Ynone 106 Yi0 // $0 107 Yi1 // $1 108 Yu2 // $x, x fits in uint2 109 Yi8 // $x, x fits in int8 110 Yu8 // $x, x fits in uint8 111 Yu7 // $x, x in 0..127 (fits in both int8 and uint8) 112 Ys32 113 Yi32 114 Yi64 115 Yiauto 116 Yal 117 Ycl 118 Yax 119 Ycx 120 Yrb 121 Yrl 122 Yrl32 // Yrl on 32-bit system 123 Yrf 124 Yf0 125 Yrx 126 Ymb 127 Yml 128 Ym 129 Ybr 130 Ycs 131 Yss 132 Yds 133 Yes 134 Yfs 135 Ygs 136 Ygdtr 137 Yidtr 138 Yldtr 139 Ymsw 140 Ytask 141 Ycr0 142 Ycr1 143 Ycr2 144 Ycr3 145 Ycr4 146 Ycr5 147 Ycr6 148 Ycr7 149 Ycr8 150 Ydr0 151 Ydr1 152 Ydr2 153 Ydr3 154 Ydr4 155 Ydr5 156 Ydr6 157 Ydr7 158 Ytr0 159 Ytr1 160 Ytr2 161 Ytr3 162 Ytr4 163 Ytr5 164 Ytr6 165 Ytr7 166 Ymr 167 Ymm 168 Yxr0 // X0 only. "<XMM0>" notation in Intel manual. 169 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex 170 Yxr // X0..X15 171 YxrEvex // X0..X31 172 Yxm 173 YxmEvex // YxrEvex+Ym 174 Yxvm // VSIB vector array; vm32x/vm64x 175 YxvmEvex // Yxvm which permits High-16 X register as index. 176 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex 177 Yyr // Y0..Y15 178 YyrEvex // Y0..Y31 179 Yym 180 YymEvex // YyrEvex+Ym 181 Yyvm // VSIB vector array; vm32y/vm64y 182 YyvmEvex // Yyvm which permits High-16 Y register as index. 183 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex 184 Yzr // Z0..Z31 185 Yzm // Yzr+Ym 186 Yzvm // VSIB vector array; vm32z/vm64z 187 Yk0 // K0 188 Yknot0 // K1..K7; write mask 189 Yk // K0..K7; used for KOP 190 Ykm // Yk+Ym; used for KOP 191 Ytls 192 Ytextsize 193 Yindir 194 Ymax 195 ) 196 197 const ( 198 Zxxx = iota 199 Zlit 200 Zlitm_r 201 Zlitr_m 202 Zlit_m_r 203 Z_rp 204 Zbr 205 Zcall 206 Zcallcon 207 Zcallduff 208 Zcallind 209 Zcallindreg 210 Zib_ 211 Zib_rp 212 Zibo_m 213 Zibo_m_xm 214 Zil_ 215 Zil_rp 216 Ziq_rp 217 Zilo_m 218 Zjmp 219 Zjmpcon 220 Zloop 221 Zo_iw 222 Zm_o 223 Zm_r 224 Z_m_r 225 Zm2_r 226 Zm_r_xm 227 Zm_r_i_xm 228 Zm_r_xm_nr 229 Zr_m_xm_nr 230 Zibm_r // mmx1,mmx2/mem64,imm8 231 Zibr_m 232 Zmb_r 233 Zaut_r 234 Zo_m 235 Zo_m64 236 Zpseudo 237 Zr_m 238 Zr_m_xm 239 Zrp_ 240 Z_ib 241 Z_il 242 Zm_ibo 243 Zm_ilo 244 Zib_rr 245 Zil_rr 246 Zbyte 247 248 Zvex_rm_v_r 249 Zvex_rm_v_ro 250 Zvex_r_v_rm 251 Zvex_i_rm_vo 252 Zvex_v_rm_r 253 Zvex_i_rm_r 254 Zvex_i_r_v 255 Zvex_i_rm_v_r 256 Zvex 257 Zvex_rm_r_vo 258 Zvex_i_r_rm 259 Zvex_hr_rm_v_r 260 261 Zevex_first 262 Zevex_i_r_k_rm 263 Zevex_i_r_rm 264 Zevex_i_rm_k_r 265 Zevex_i_rm_k_vo 266 Zevex_i_rm_r 267 Zevex_i_rm_v_k_r 268 Zevex_i_rm_v_r 269 Zevex_i_rm_vo 270 Zevex_k_rmo 271 Zevex_r_k_rm 272 Zevex_r_v_k_rm 273 Zevex_r_v_rm 274 Zevex_rm_k_r 275 Zevex_rm_v_k_r 276 Zevex_rm_v_r 277 Zevex_last 278 279 Zmax 280 ) 281 282 const ( 283 Px = 0 284 Px1 = 1 // symbolic; exact value doesn't matter 285 P32 = 0x32 // 32-bit only 286 Pe = 0x66 // operand escape 287 Pm = 0x0f // 2byte opcode escape 288 Pq = 0xff // both escapes: 66 0f 289 Pb = 0xfe // byte operands 290 Pf2 = 0xf2 // xmm escape 1: f2 0f 291 Pf3 = 0xf3 // xmm escape 2: f3 0f 292 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f 293 Pq3 = 0x67 // xmm escape 3: 66 48 0f 294 Pq4 = 0x68 // xmm escape 4: 66 0F 38 295 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 296 Pq5 = 0x6a // xmm escape 5: F3 0F 38 297 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 298 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f 299 Pw = 0x48 // Rex.w 300 Pw8 = 0x90 // symbolic; exact value doesn't matter 301 Py = 0x80 // defaults to 64-bit mode 302 Py1 = 0x81 // symbolic; exact value doesn't matter 303 Py3 = 0x83 // symbolic; exact value doesn't matter 304 Pavx = 0x84 // symbolic; exact value doesn't matter 305 306 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R 307 Rxw = 1 << 3 // =1, 64-bit operand size 308 Rxr = 1 << 2 // extend modrm reg 309 Rxx = 1 << 1 // extend sib index 310 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg 311 ) 312 313 const ( 314 // Encoding for VEX prefix in tables. 315 // The P, L, and W fields are chosen to match 316 // their eventual locations in the VEX prefix bytes. 317 318 // Encoding for VEX prefix in tables. 319 // The P, L, and W fields are chosen to match 320 // their eventual locations in the VEX prefix bytes. 321 322 // Using spare bit to make leading [E]VEX encoding byte different from 323 // 0x0f even if all other VEX fields are 0. 324 avxEscape = 1 << 6 325 326 // P field - 2 bits 327 vex66 = 1 << 0 328 vexF3 = 2 << 0 329 vexF2 = 3 << 0 330 // L field - 1 bit 331 vexLZ = 0 << 2 332 vexLIG = 0 << 2 333 vex128 = 0 << 2 334 vex256 = 1 << 2 335 // W field - 1 bit 336 vexWIG = 0 << 7 337 vexW0 = 0 << 7 338 vexW1 = 1 << 7 339 // M field - 5 bits, but mostly reserved; we can store up to 3 340 vex0F = 1 << 3 341 vex0F38 = 2 << 3 342 vex0F3A = 3 << 3 343 ) 344 345 var ycover [Ymax * Ymax]uint8 346 347 var reg [MAXREG]int 348 349 var regrex [MAXREG + 1]int 350 351 var ynone = []ytab{ 352 {Zlit, 1, argList{}}, 353 } 354 355 var ytext = []ytab{ 356 {Zpseudo, 0, argList{Ymb, Ytextsize}}, 357 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, 358 } 359 360 var ynop = []ytab{ 361 {Zpseudo, 0, argList{}}, 362 {Zpseudo, 0, argList{Yiauto}}, 363 {Zpseudo, 0, argList{Yml}}, 364 {Zpseudo, 0, argList{Yrf}}, 365 {Zpseudo, 0, argList{Yxr}}, 366 {Zpseudo, 0, argList{Yiauto}}, 367 {Zpseudo, 0, argList{Yml}}, 368 {Zpseudo, 0, argList{Yrf}}, 369 {Zpseudo, 1, argList{Yxr}}, 370 } 371 372 var yfuncdata = []ytab{ 373 {Zpseudo, 0, argList{Yi32, Ym}}, 374 } 375 376 var ypcdata = []ytab{ 377 {Zpseudo, 0, argList{Yi32, Yi32}}, 378 } 379 380 var yxorb = []ytab{ 381 {Zib_, 1, argList{Yi32, Yal}}, 382 {Zibo_m, 2, argList{Yi32, Ymb}}, 383 {Zr_m, 1, argList{Yrb, Ymb}}, 384 {Zm_r, 1, argList{Ymb, Yrb}}, 385 } 386 387 var yaddl = []ytab{ 388 {Zibo_m, 2, argList{Yi8, Yml}}, 389 {Zil_, 1, argList{Yi32, Yax}}, 390 {Zilo_m, 2, argList{Yi32, Yml}}, 391 {Zr_m, 1, argList{Yrl, Yml}}, 392 {Zm_r, 1, argList{Yml, Yrl}}, 393 } 394 395 var yincl = []ytab{ 396 {Z_rp, 1, argList{Yrl}}, 397 {Zo_m, 2, argList{Yml}}, 398 } 399 400 var yincq = []ytab{ 401 {Zo_m, 2, argList{Yml}}, 402 } 403 404 var ycmpb = []ytab{ 405 {Z_ib, 1, argList{Yal, Yi32}}, 406 {Zm_ibo, 2, argList{Ymb, Yi32}}, 407 {Zm_r, 1, argList{Ymb, Yrb}}, 408 {Zr_m, 1, argList{Yrb, Ymb}}, 409 } 410 411 var ycmpl = []ytab{ 412 {Zm_ibo, 2, argList{Yml, Yi8}}, 413 {Z_il, 1, argList{Yax, Yi32}}, 414 {Zm_ilo, 2, argList{Yml, Yi32}}, 415 {Zm_r, 1, argList{Yml, Yrl}}, 416 {Zr_m, 1, argList{Yrl, Yml}}, 417 } 418 419 var yshb = []ytab{ 420 {Zo_m, 2, argList{Yi1, Ymb}}, 421 {Zibo_m, 2, argList{Yu8, Ymb}}, 422 {Zo_m, 2, argList{Ycx, Ymb}}, 423 } 424 425 var yshl = []ytab{ 426 {Zo_m, 2, argList{Yi1, Yml}}, 427 {Zibo_m, 2, argList{Yu8, Yml}}, 428 {Zo_m, 2, argList{Ycl, Yml}}, 429 {Zo_m, 2, argList{Ycx, Yml}}, 430 } 431 432 var ytestl = []ytab{ 433 {Zil_, 1, argList{Yi32, Yax}}, 434 {Zilo_m, 2, argList{Yi32, Yml}}, 435 {Zr_m, 1, argList{Yrl, Yml}}, 436 {Zm_r, 1, argList{Yml, Yrl}}, 437 } 438 439 var ymovb = []ytab{ 440 {Zr_m, 1, argList{Yrb, Ymb}}, 441 {Zm_r, 1, argList{Ymb, Yrb}}, 442 {Zib_rp, 1, argList{Yi32, Yrb}}, 443 {Zibo_m, 2, argList{Yi32, Ymb}}, 444 } 445 446 var ybtl = []ytab{ 447 {Zibo_m, 2, argList{Yi8, Yml}}, 448 {Zr_m, 1, argList{Yrl, Yml}}, 449 } 450 451 var ymovw = []ytab{ 452 {Zr_m, 1, argList{Yrl, Yml}}, 453 {Zm_r, 1, argList{Yml, Yrl}}, 454 {Zil_rp, 1, argList{Yi32, Yrl}}, 455 {Zilo_m, 2, argList{Yi32, Yml}}, 456 {Zaut_r, 2, argList{Yiauto, Yrl}}, 457 } 458 459 var ymovl = []ytab{ 460 {Zr_m, 1, argList{Yrl, Yml}}, 461 {Zm_r, 1, argList{Yml, Yrl}}, 462 {Zil_rp, 1, argList{Yi32, Yrl}}, 463 {Zilo_m, 2, argList{Yi32, Yml}}, 464 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD 465 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD 466 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) 467 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) 468 {Zaut_r, 2, argList{Yiauto, Yrl}}, 469 } 470 471 var yret = []ytab{ 472 {Zo_iw, 1, argList{}}, 473 {Zo_iw, 1, argList{Yi32}}, 474 } 475 476 var ymovq = []ytab{ 477 // valid in 32-bit mode 478 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) 479 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ 480 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q 481 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 482 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 483 484 // valid only in 64-bit mode, usually with 64-bit prefix 485 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 486 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b 487 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) 488 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate 489 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) 490 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD 491 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD 492 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load 493 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store 494 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ 495 } 496 497 var ymovbe = []ytab{ 498 {Zlitm_r, 3, argList{Ym, Yrl}}, 499 {Zlitr_m, 3, argList{Yrl, Ym}}, 500 } 501 502 var ym_rl = []ytab{ 503 {Zm_r, 1, argList{Ym, Yrl}}, 504 } 505 506 var yrl_m = []ytab{ 507 {Zr_m, 1, argList{Yrl, Ym}}, 508 } 509 510 var ymb_rl = []ytab{ 511 {Zmb_r, 1, argList{Ymb, Yrl}}, 512 } 513 514 var yml_rl = []ytab{ 515 {Zm_r, 1, argList{Yml, Yrl}}, 516 } 517 518 var yrl_ml = []ytab{ 519 {Zr_m, 1, argList{Yrl, Yml}}, 520 } 521 522 var yml_mb = []ytab{ 523 {Zr_m, 1, argList{Yrb, Ymb}}, 524 {Zm_r, 1, argList{Ymb, Yrb}}, 525 } 526 527 var yrb_mb = []ytab{ 528 {Zr_m, 1, argList{Yrb, Ymb}}, 529 } 530 531 var yxchg = []ytab{ 532 {Z_rp, 1, argList{Yax, Yrl}}, 533 {Zrp_, 1, argList{Yrl, Yax}}, 534 {Zr_m, 1, argList{Yrl, Yml}}, 535 {Zm_r, 1, argList{Yml, Yrl}}, 536 } 537 538 var ydivl = []ytab{ 539 {Zm_o, 2, argList{Yml}}, 540 } 541 542 var ydivb = []ytab{ 543 {Zm_o, 2, argList{Ymb}}, 544 } 545 546 var yimul = []ytab{ 547 {Zm_o, 2, argList{Yml}}, 548 {Zib_rr, 1, argList{Yi8, Yrl}}, 549 {Zil_rr, 1, argList{Yi32, Yrl}}, 550 {Zm_r, 2, argList{Yml, Yrl}}, 551 } 552 553 var yimul3 = []ytab{ 554 {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, 555 {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, 556 } 557 558 var ybyte = []ytab{ 559 {Zbyte, 1, argList{Yi64}}, 560 } 561 562 var yin = []ytab{ 563 {Zib_, 1, argList{Yi32}}, 564 {Zlit, 1, argList{}}, 565 } 566 567 var yint = []ytab{ 568 {Zib_, 1, argList{Yi32}}, 569 } 570 571 var ypushl = []ytab{ 572 {Zrp_, 1, argList{Yrl}}, 573 {Zm_o, 2, argList{Ym}}, 574 {Zib_, 1, argList{Yi8}}, 575 {Zil_, 1, argList{Yi32}}, 576 } 577 578 var ypopl = []ytab{ 579 {Z_rp, 1, argList{Yrl}}, 580 {Zo_m, 2, argList{Ym}}, 581 } 582 583 var ywrfsbase = []ytab{ 584 {Zm_o, 2, argList{Yrl}}, 585 } 586 587 var yrdrand = []ytab{ 588 {Zo_m, 2, argList{Yrl}}, 589 } 590 591 var yclflush = []ytab{ 592 {Zo_m, 2, argList{Ym}}, 593 } 594 595 var ybswap = []ytab{ 596 {Z_rp, 2, argList{Yrl}}, 597 } 598 599 var yscond = []ytab{ 600 {Zo_m, 2, argList{Ymb}}, 601 } 602 603 var yjcond = []ytab{ 604 {Zbr, 0, argList{Ybr}}, 605 {Zbr, 0, argList{Yi0, Ybr}}, 606 {Zbr, 1, argList{Yi1, Ybr}}, 607 } 608 609 var yloop = []ytab{ 610 {Zloop, 1, argList{Ybr}}, 611 } 612 613 var ycall = []ytab{ 614 {Zcallindreg, 0, argList{Yml}}, 615 {Zcallindreg, 2, argList{Yrx, Yrx}}, 616 {Zcallind, 2, argList{Yindir}}, 617 {Zcall, 0, argList{Ybr}}, 618 {Zcallcon, 1, argList{Yi32}}, 619 } 620 621 var yduff = []ytab{ 622 {Zcallduff, 1, argList{Yi32}}, 623 } 624 625 var yjmp = []ytab{ 626 {Zo_m64, 2, argList{Yml}}, 627 {Zjmp, 0, argList{Ybr}}, 628 {Zjmpcon, 1, argList{Yi32}}, 629 } 630 631 var yfmvd = []ytab{ 632 {Zm_o, 2, argList{Ym, Yf0}}, 633 {Zo_m, 2, argList{Yf0, Ym}}, 634 {Zm_o, 2, argList{Yrf, Yf0}}, 635 {Zo_m, 2, argList{Yf0, Yrf}}, 636 } 637 638 var yfmvdp = []ytab{ 639 {Zo_m, 2, argList{Yf0, Ym}}, 640 {Zo_m, 2, argList{Yf0, Yrf}}, 641 } 642 643 var yfmvf = []ytab{ 644 {Zm_o, 2, argList{Ym, Yf0}}, 645 {Zo_m, 2, argList{Yf0, Ym}}, 646 } 647 648 var yfmvx = []ytab{ 649 {Zm_o, 2, argList{Ym, Yf0}}, 650 } 651 652 var yfmvp = []ytab{ 653 {Zo_m, 2, argList{Yf0, Ym}}, 654 } 655 656 var yfcmv = []ytab{ 657 {Zm_o, 2, argList{Yrf, Yf0}}, 658 } 659 660 var yfadd = []ytab{ 661 {Zm_o, 2, argList{Ym, Yf0}}, 662 {Zm_o, 2, argList{Yrf, Yf0}}, 663 {Zo_m, 2, argList{Yf0, Yrf}}, 664 } 665 666 var yfxch = []ytab{ 667 {Zo_m, 2, argList{Yf0, Yrf}}, 668 {Zm_o, 2, argList{Yrf, Yf0}}, 669 } 670 671 var ycompp = []ytab{ 672 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 673 } 674 675 var ystsw = []ytab{ 676 {Zo_m, 2, argList{Ym}}, 677 {Zlit, 1, argList{Yax}}, 678 } 679 680 var ysvrs_mo = []ytab{ 681 {Zm_o, 2, argList{Ym}}, 682 } 683 684 // unaryDst version of "ysvrs_mo". 685 var ysvrs_om = []ytab{ 686 {Zo_m, 2, argList{Ym}}, 687 } 688 689 var ymm = []ytab{ 690 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 691 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 692 } 693 694 var yxm = []ytab{ 695 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 696 } 697 698 var yxm_q4 = []ytab{ 699 {Zm_r, 1, argList{Yxm, Yxr}}, 700 } 701 702 var yxcvm1 = []ytab{ 703 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 704 {Zm_r_xm, 2, argList{Yxm, Ymr}}, 705 } 706 707 var yxcvm2 = []ytab{ 708 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 709 {Zm_r_xm, 2, argList{Ymm, Yxr}}, 710 } 711 712 var yxr = []ytab{ 713 {Zm_r_xm, 1, argList{Yxr, Yxr}}, 714 } 715 716 var yxr_ml = []ytab{ 717 {Zr_m_xm, 1, argList{Yxr, Yml}}, 718 } 719 720 var ymr = []ytab{ 721 {Zm_r, 1, argList{Ymr, Ymr}}, 722 } 723 724 var ymr_ml = []ytab{ 725 {Zr_m_xm, 1, argList{Ymr, Yml}}, 726 } 727 728 var yxcmpi = []ytab{ 729 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, 730 } 731 732 var yxmov = []ytab{ 733 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 734 {Zr_m_xm, 1, argList{Yxr, Yxm}}, 735 } 736 737 var yxcvfl = []ytab{ 738 {Zm_r_xm, 1, argList{Yxm, Yrl}}, 739 } 740 741 var yxcvlf = []ytab{ 742 {Zm_r_xm, 1, argList{Yml, Yxr}}, 743 } 744 745 var yxcvfq = []ytab{ 746 {Zm_r_xm, 2, argList{Yxm, Yrl}}, 747 } 748 749 var yxcvqf = []ytab{ 750 {Zm_r_xm, 2, argList{Yml, Yxr}}, 751 } 752 753 var yps = []ytab{ 754 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 755 {Zibo_m_xm, 2, argList{Yi8, Ymr}}, 756 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 757 {Zibo_m_xm, 3, argList{Yi8, Yxr}}, 758 } 759 760 var yxrrl = []ytab{ 761 {Zm_r, 1, argList{Yxr, Yrl}}, 762 } 763 764 var ymrxr = []ytab{ 765 {Zm_r, 1, argList{Ymr, Yxr}}, 766 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 767 } 768 769 var ymshuf = []ytab{ 770 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, 771 } 772 773 var ymshufb = []ytab{ 774 {Zm2_r, 2, argList{Yxm, Yxr}}, 775 } 776 777 // It should never have more than 1 entry, 778 // because some optab entries have opcode sequences that 779 // are longer than 2 bytes (zoffset=2 here), 780 // ROUNDPD and ROUNDPS and recently added BLENDPD, 781 // to name a few. 782 var yxshuf = []ytab{ 783 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 784 } 785 786 var yextrw = []ytab{ 787 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, 788 {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, 789 } 790 791 var yextr = []ytab{ 792 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, 793 } 794 795 var yinsrw = []ytab{ 796 {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, 797 } 798 799 var yinsr = []ytab{ 800 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, 801 } 802 803 var ypsdq = []ytab{ 804 {Zibo_m, 2, argList{Yi8, Yxr}}, 805 } 806 807 var ymskb = []ytab{ 808 {Zm_r_xm, 2, argList{Yxr, Yrl}}, 809 {Zm_r_xm, 1, argList{Ymr, Yrl}}, 810 } 811 812 var ycrc32l = []ytab{ 813 {Zlitm_r, 0, argList{Yml, Yrl}}, 814 } 815 816 var ycrc32b = []ytab{ 817 {Zlitm_r, 0, argList{Ymb, Yrl}}, 818 } 819 820 var yprefetch = []ytab{ 821 {Zm_o, 2, argList{Ym}}, 822 } 823 824 var yaes = []ytab{ 825 {Zlitm_r, 2, argList{Yxm, Yxr}}, 826 } 827 828 var yxbegin = []ytab{ 829 {Zjmp, 1, argList{Ybr}}, 830 } 831 832 var yxabort = []ytab{ 833 {Zib_, 1, argList{Yu8}}, 834 } 835 836 var ylddqu = []ytab{ 837 {Zm_r, 1, argList{Ym, Yxr}}, 838 } 839 840 var ypalignr = []ytab{ 841 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 842 } 843 844 var ysha256rnds2 = []ytab{ 845 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, 846 } 847 848 var yblendvpd = []ytab{ 849 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, 850 } 851 852 var ymmxmm0f38 = []ytab{ 853 {Zlitm_r, 3, argList{Ymm, Ymr}}, 854 {Zlitm_r, 5, argList{Yxm, Yxr}}, 855 } 856 857 var yextractps = []ytab{ 858 {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, 859 } 860 861 var ysha1rnds4 = []ytab{ 862 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, 863 } 864 865 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say, 866 // ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab 867 // to find the entry with the given p.As and then looks through the ytable for 868 // that instruction (the second field in the optab struct) for a line whose 869 // first two values match the Ytypes of the p.From and p.To operands. The 870 // function oclass computes the specific Ytype of an operand and then the set 871 // of more general Ytypes that it satisfies is implied by the ycover table, set 872 // up in instinit. For example, oclass distinguishes the constants 0 and 1 873 // from the more general 8-bit constants, but instinit says 874 // 875 // ycover[Yi0*Ymax+Ys32] = 1 876 // ycover[Yi1*Ymax+Ys32] = 1 877 // ycover[Yi8*Ymax+Ys32] = 1 878 // 879 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) 880 // if that's what an instruction can handle. 881 // 882 // In parallel with the scan through the ytable for the appropriate line, there 883 // is a z pointer that starts out pointing at the strange magic byte list in 884 // the Optab struct. With each step past a non-matching ytable line, z 885 // advances by the 4th entry in the line. When a matching line is found, that 886 // z pointer has the extra data to use in laying down the instruction bytes. 887 // The actual bytes laid down are a function of the 3rd entry in the line (that 888 // is, the Ztype) and the z bytes. 889 // 890 // For example, let's look at AADDL. The optab line says: 891 // 892 // {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 893 // 894 // and yaddl says 895 // 896 // var yaddl = []ytab{ 897 // {Yi8, Ynone, Yml, Zibo_m, 2}, 898 // {Yi32, Ynone, Yax, Zil_, 1}, 899 // {Yi32, Ynone, Yml, Zilo_m, 2}, 900 // {Yrl, Ynone, Yml, Zr_m, 1}, 901 // {Yml, Ynone, Yrl, Zm_r, 1}, 902 // } 903 // 904 // so there are 5 possible types of ADDL instruction that can be laid down, and 905 // possible states used to lay them down (Ztype and z pointer, assuming z 906 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: 907 // 908 // Yi8, Yml -> Zibo_m, z (0x83, 00) 909 // Yi32, Yax -> Zil_, z+2 (0x05) 910 // Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) 911 // Yrl, Yml -> Zr_m, z+2+1+2 (0x01) 912 // Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) 913 // 914 // The Pconstant in the optab line controls the prefix bytes to emit. That's 915 // relatively straightforward as this program goes. 916 // 917 // The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for 918 // example, is an opcode byte (z[0]) then an asmando (which is some kind of 919 // encoded addressing mode for the Yml arg), and then a single immediate byte. 920 // Zilo_m is the same but a long (32-bit) immediate. 921 var optab = 922 // as, ytab, andproto, opcode 923 [...]Optab{ 924 {obj.AXXX, nil, 0, opBytes{}}, 925 {AAAA, ynone, P32, opBytes{0x37}}, 926 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, 927 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, 928 {AAAS, ynone, P32, opBytes{0x3f}}, 929 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, 930 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 931 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 932 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 933 {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, 934 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, 935 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, 936 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 937 {AADDPD, yxm, Pq, opBytes{0x58}}, 938 {AADDPS, yxm, Pm, opBytes{0x58}}, 939 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 940 {AADDSD, yxm, Pf2, opBytes{0x58}}, 941 {AADDSS, yxm, Pf3, opBytes{0x58}}, 942 {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, 943 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, 944 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 945 {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, 946 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, 947 {AADJSP, nil, 0, opBytes{}}, 948 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, 949 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 950 {AANDNPD, yxm, Pq, opBytes{0x55}}, 951 {AANDNPS, yxm, Pm, opBytes{0x55}}, 952 {AANDPD, yxm, Pq, opBytes{0x54}}, 953 {AANDPS, yxm, Pm, opBytes{0x54}}, 954 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 955 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 956 {AARPL, yrl_ml, P32, opBytes{0x63}}, 957 {ABOUNDL, yrl_m, P32, opBytes{0x62}}, 958 {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, 959 {ABSFL, yml_rl, Pm, opBytes{0xbc}}, 960 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, 961 {ABSFW, yml_rl, Pq, opBytes{0xbc}}, 962 {ABSRL, yml_rl, Pm, opBytes{0xbd}}, 963 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, 964 {ABSRW, yml_rl, Pq, opBytes{0xbd}}, 965 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, 966 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, 967 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, 968 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, 969 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, 970 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, 971 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, 972 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, 973 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, 974 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, 975 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, 976 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, 977 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, 978 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, 979 {ABYTE, ybyte, Px, opBytes{1}}, 980 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, 981 {ACBW, ynone, Pe, opBytes{0x98}}, 982 {ACDQ, ynone, Px, opBytes{0x99}}, 983 {ACDQE, ynone, Pw, opBytes{0x98}}, 984 {ACLAC, ynone, Pm, opBytes{01, 0xca}}, 985 {ACLC, ynone, Px, opBytes{0xf8}}, 986 {ACLD, ynone, Px, opBytes{0xfc}}, 987 {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}}, 988 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, 989 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, 990 {ACLI, ynone, Px, opBytes{0xfa}}, 991 {ACLTS, ynone, Pm, opBytes{0x06}}, 992 {ACLWB, yclflush, Pq, opBytes{0xae, 06}}, 993 {ACMC, ynone, Px, opBytes{0xf5}}, 994 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, 995 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, 996 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, 997 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, 998 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, 999 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, 1000 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, 1001 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, 1002 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, 1003 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, 1004 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, 1005 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, 1006 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, 1007 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, 1008 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, 1009 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, 1010 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, 1011 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, 1012 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, 1013 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, 1014 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, 1015 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, 1016 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, 1017 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, 1018 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, 1019 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, 1020 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, 1021 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, 1022 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, 1023 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, 1024 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, 1025 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, 1026 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, 1027 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, 1028 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, 1029 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, 1030 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, 1031 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, 1032 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, 1033 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, 1034 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, 1035 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, 1036 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, 1037 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, 1038 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, 1039 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, 1040 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, 1041 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, 1042 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, 1043 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1044 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, 1045 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, 1046 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1047 {ACMPSB, ynone, Pb, opBytes{0xa6}}, 1048 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, 1049 {ACMPSL, ynone, Px, opBytes{0xa7}}, 1050 {ACMPSQ, ynone, Pw, opBytes{0xa7}}, 1051 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, 1052 {ACMPSW, ynone, Pe, opBytes{0xa7}}, 1053 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1054 {ACOMISD, yxm, Pe, opBytes{0x2f}}, 1055 {ACOMISS, yxm, Pm, opBytes{0x2f}}, 1056 {ACPUID, ynone, Pm, opBytes{0xa2}}, 1057 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, 1058 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, 1059 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, 1060 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, 1061 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, 1062 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, 1063 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, 1064 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, 1065 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, 1066 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, 1067 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, 1068 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, 1069 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, 1070 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, 1071 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, 1072 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, 1073 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, 1074 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, 1075 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, 1076 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, 1077 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, 1078 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, 1079 {ACWD, ynone, Pe, opBytes{0x99}}, 1080 {ACWDE, ynone, Px, opBytes{0x98}}, 1081 {ACQO, ynone, Pw, opBytes{0x99}}, 1082 {ADAA, ynone, P32, opBytes{0x27}}, 1083 {ADAS, ynone, P32, opBytes{0x2f}}, 1084 {ADECB, yscond, Pb, opBytes{0xfe, 01}}, 1085 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, 1086 {ADECQ, yincq, Pw, opBytes{0xff, 01}}, 1087 {ADECW, yincq, Pe, opBytes{0xff, 01}}, 1088 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, 1089 {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, 1090 {ADIVPD, yxm, Pe, opBytes{0x5e}}, 1091 {ADIVPS, yxm, Pm, opBytes{0x5e}}, 1092 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, 1093 {ADIVSD, yxm, Pf2, opBytes{0x5e}}, 1094 {ADIVSS, yxm, Pf3, opBytes{0x5e}}, 1095 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, 1096 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, 1097 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, 1098 {AEMMS, ynone, Pm, opBytes{0x77}}, 1099 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, 1100 {AENTER, nil, 0, opBytes{}}, // botch 1101 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, 1102 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, 1103 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, 1104 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, 1105 {AHLT, ynone, Px, opBytes{0xf4}}, 1106 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, 1107 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, 1108 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, 1109 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, 1110 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, 1111 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1112 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1113 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1114 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, 1115 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, 1116 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, 1117 {AINB, yin, Pb, opBytes{0xe4, 0xec}}, 1118 {AINW, yin, Pe, opBytes{0xe5, 0xed}}, 1119 {AINL, yin, Px, opBytes{0xe5, 0xed}}, 1120 {AINCB, yscond, Pb, opBytes{0xfe, 00}}, 1121 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, 1122 {AINCQ, yincq, Pw, opBytes{0xff, 00}}, 1123 {AINCW, yincq, Pe, opBytes{0xff, 00}}, 1124 {AINSB, ynone, Pb, opBytes{0x6c}}, 1125 {AINSL, ynone, Px, opBytes{0x6d}}, 1126 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, 1127 {AINSW, ynone, Pe, opBytes{0x6d}}, 1128 {AICEBP, ynone, Px, opBytes{0xf1}}, 1129 {AINT, yint, Px, opBytes{0xcd}}, 1130 {AINTO, ynone, P32, opBytes{0xce}}, 1131 {AIRETL, ynone, Px, opBytes{0xcf}}, 1132 {AIRETQ, ynone, Pw, opBytes{0xcf}}, 1133 {AIRETW, ynone, Pe, opBytes{0xcf}}, 1134 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, 1135 {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, 1136 {AJCXZL, yloop, Px, opBytes{0xe3}}, 1137 {AJCXZW, yloop, Px, opBytes{0xe3}}, 1138 {AJCXZQ, yloop, Px, opBytes{0xe3}}, 1139 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, 1140 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, 1141 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, 1142 {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, 1143 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, 1144 {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, 1145 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, 1146 {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, 1147 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, 1148 {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, 1149 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, 1150 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, 1151 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, 1152 {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, 1153 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, 1154 {AHADDPD, yxm, Pq, opBytes{0x7c}}, 1155 {AHADDPS, yxm, Pf2, opBytes{0x7c}}, 1156 {AHSUBPD, yxm, Pq, opBytes{0x7d}}, 1157 {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, 1158 {ALAHF, ynone, Px, opBytes{0x9f}}, 1159 {ALARL, yml_rl, Pm, opBytes{0x02}}, 1160 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, 1161 {ALARW, yml_rl, Pq, opBytes{0x02}}, 1162 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, 1163 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, 1164 {ALEAL, ym_rl, Px, opBytes{0x8d}}, 1165 {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, 1166 {ALEAVEL, ynone, P32, opBytes{0xc9}}, 1167 {ALEAVEQ, ynone, Py, opBytes{0xc9}}, 1168 {ALEAVEW, ynone, Pe, opBytes{0xc9}}, 1169 {ALEAW, ym_rl, Pe, opBytes{0x8d}}, 1170 {ALOCK, ynone, Px, opBytes{0xf0}}, 1171 {ALODSB, ynone, Pb, opBytes{0xac}}, 1172 {ALODSL, ynone, Px, opBytes{0xad}}, 1173 {ALODSQ, ynone, Pw, opBytes{0xad}}, 1174 {ALODSW, ynone, Pe, opBytes{0xad}}, 1175 {ALONG, ybyte, Px, opBytes{4}}, 1176 {ALOOP, yloop, Px, opBytes{0xe2}}, 1177 {ALOOPEQ, yloop, Px, opBytes{0xe1}}, 1178 {ALOOPNE, yloop, Px, opBytes{0xe0}}, 1179 {ALTR, ydivl, Pm, opBytes{0x00, 03}}, 1180 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, 1181 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, 1182 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, 1183 {ALSLL, yml_rl, Pm, opBytes{0x03}}, 1184 {ALSLW, yml_rl, Pq, opBytes{0x03}}, 1185 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, 1186 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, 1187 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, 1188 {AMAXPD, yxm, Pe, opBytes{0x5f}}, 1189 {AMAXPS, yxm, Pm, opBytes{0x5f}}, 1190 {AMAXSD, yxm, Pf2, opBytes{0x5f}}, 1191 {AMAXSS, yxm, Pf3, opBytes{0x5f}}, 1192 {AMINPD, yxm, Pe, opBytes{0x5d}}, 1193 {AMINPS, yxm, Pm, opBytes{0x5d}}, 1194 {AMINSD, yxm, Pf2, opBytes{0x5d}}, 1195 {AMINSS, yxm, Pf3, opBytes{0x5d}}, 1196 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, 1197 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, 1198 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, 1199 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, 1200 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, 1201 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, 1202 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, 1203 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, 1204 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, 1205 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, 1206 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, 1207 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, 1208 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, 1209 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, 1210 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, 1211 {AMOVHLPS, yxr, Pm, opBytes{0x12}}, 1212 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, 1213 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, 1214 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1215 {AMOVLHPS, yxr, Pm, opBytes{0x16}}, 1216 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, 1217 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, 1218 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, 1219 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, 1220 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, 1221 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, 1222 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, 1223 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, 1224 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, 1225 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, 1226 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, 1227 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1228 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, 1229 {AMOVSB, ynone, Pb, opBytes{0xa4}}, 1230 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, 1231 {AMOVSL, ynone, Px, opBytes{0xa5}}, 1232 {AMOVSQ, ynone, Pw, opBytes{0xa5}}, 1233 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, 1234 {AMOVSW, ynone, Pe, opBytes{0xa5}}, 1235 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, 1236 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, 1237 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, 1238 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, 1239 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, 1240 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, 1241 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, 1242 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, 1243 {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, 1244 {AMULL, ydivl, Px, opBytes{0xf7, 04}}, 1245 {AMULPD, yxm, Pe, opBytes{0x59}}, 1246 {AMULPS, yxm, Ym, opBytes{0x59}}, 1247 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, 1248 {AMULSD, yxm, Pf2, opBytes{0x59}}, 1249 {AMULSS, yxm, Pf3, opBytes{0x59}}, 1250 {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, 1251 {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, 1252 {ANEGL, yscond, Px, opBytes{0xf7, 03}}, 1253 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, 1254 {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, 1255 {obj.ANOP, ynop, Px, opBytes{0, 0}}, 1256 {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, 1257 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. 1258 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, 1259 {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, 1260 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, 1261 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1262 {AORPD, yxm, Pq, opBytes{0x56}}, 1263 {AORPS, yxm, Pm, opBytes{0x56}}, 1264 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1265 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1266 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, 1267 {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, 1268 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, 1269 {AOUTSB, ynone, Pb, opBytes{0x6e}}, 1270 {AOUTSL, ynone, Px, opBytes{0x6f}}, 1271 {AOUTSW, ynone, Pe, opBytes{0x6f}}, 1272 {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, 1273 {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, 1274 {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, 1275 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, 1276 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, 1277 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, 1278 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, 1279 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, 1280 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, 1281 {APADDQ, yxm, Pe, opBytes{0xd4}}, 1282 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, 1283 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, 1284 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, 1285 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, 1286 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, 1287 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, 1288 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, 1289 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, 1290 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, 1291 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, 1292 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, 1293 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, 1294 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, 1295 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, 1296 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, 1297 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, 1298 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, 1299 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, 1300 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, 1301 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, 1302 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, 1303 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, 1304 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, 1305 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, 1306 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, 1307 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, 1308 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, 1309 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, 1310 {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, 1311 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, 1312 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, 1313 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, 1314 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, 1315 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, 1316 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, 1317 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, 1318 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, 1319 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, 1320 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, 1321 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, 1322 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, 1323 {APMAXSW, yxm, Pe, opBytes{0xee}}, 1324 {APMAXUB, yxm, Pe, opBytes{0xde}}, 1325 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, 1326 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, 1327 {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, 1328 {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, 1329 {APMINSW, yxm, Pe, opBytes{0xea}}, 1330 {APMINUB, yxm, Pe, opBytes{0xda}}, 1331 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, 1332 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, 1333 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, 1334 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, 1335 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, 1336 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, 1337 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, 1338 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, 1339 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, 1340 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, 1341 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, 1342 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, 1343 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, 1344 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, 1345 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, 1346 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, 1347 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, 1348 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, 1349 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, 1350 {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, 1351 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, 1352 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, 1353 {APOPAL, ynone, P32, opBytes{0x61}}, 1354 {APOPAW, ynone, Pe, opBytes{0x61}}, 1355 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, 1356 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, 1357 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, 1358 {APOPFL, ynone, P32, opBytes{0x9d}}, 1359 {APOPFQ, ynone, Py, opBytes{0x9d}}, 1360 {APOPFW, ynone, Pe, opBytes{0x9d}}, 1361 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, 1362 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, 1363 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, 1364 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, 1365 {APSADBW, yxm, Pq, opBytes{0xf6}}, 1366 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, 1367 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, 1368 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, 1369 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, 1370 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, 1371 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, 1372 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, 1373 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, 1374 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, 1375 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, 1376 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, 1377 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, 1378 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, 1379 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, 1380 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, 1381 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, 1382 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, 1383 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, 1384 {APSUBB, yxm, Pe, opBytes{0xf8}}, 1385 {APSUBL, yxm, Pe, opBytes{0xfa}}, 1386 {APSUBQ, yxm, Pe, opBytes{0xfb}}, 1387 {APSUBSB, yxm, Pe, opBytes{0xe8}}, 1388 {APSUBSW, yxm, Pe, opBytes{0xe9}}, 1389 {APSUBUSB, yxm, Pe, opBytes{0xd8}}, 1390 {APSUBUSW, yxm, Pe, opBytes{0xd9}}, 1391 {APSUBW, yxm, Pe, opBytes{0xf9}}, 1392 {APTEST, yxm_q4, Pq4, opBytes{0x17}}, 1393 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, 1394 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, 1395 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, 1396 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, 1397 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, 1398 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, 1399 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, 1400 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, 1401 {APUSHAL, ynone, P32, opBytes{0x60}}, 1402 {APUSHAW, ynone, Pe, opBytes{0x60}}, 1403 {APUSHFL, ynone, P32, opBytes{0x9c}}, 1404 {APUSHFQ, ynone, Py, opBytes{0x9c}}, 1405 {APUSHFW, ynone, Pe, opBytes{0x9c}}, 1406 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1407 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1408 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1409 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, 1410 {AQUAD, ybyte, Px, opBytes{8}}, 1411 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, 1412 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1413 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1414 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1415 {ARCPPS, yxm, Pm, opBytes{0x53}}, 1416 {ARCPSS, yxm, Pf3, opBytes{0x53}}, 1417 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, 1418 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1419 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1420 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1421 {AREP, ynone, Px, opBytes{0xf3}}, 1422 {AREPN, ynone, Px, opBytes{0xf2}}, 1423 {obj.ARET, ynone, Px, opBytes{0xc3}}, 1424 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, 1425 {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, 1426 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, 1427 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, 1428 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1429 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1430 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1431 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, 1432 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1433 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1434 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1435 {ARSQRTPS, yxm, Pm, opBytes{0x52}}, 1436 {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, 1437 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL 1438 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1439 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1440 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1441 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1442 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, 1443 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1444 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1445 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1446 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, 1447 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1448 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1449 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1450 {ASCASB, ynone, Pb, opBytes{0xae}}, 1451 {ASCASL, ynone, Px, opBytes{0xaf}}, 1452 {ASCASQ, ynone, Pw, opBytes{0xaf}}, 1453 {ASCASW, ynone, Pe, opBytes{0xaf}}, 1454 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, 1455 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, 1456 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, 1457 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, 1458 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, 1459 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, 1460 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, 1461 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, 1462 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, 1463 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, 1464 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, 1465 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, 1466 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, 1467 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, 1468 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, 1469 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, 1470 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1471 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1472 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1473 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1474 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, 1475 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1476 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1477 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1478 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, 1479 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, 1480 {ASQRTPD, yxm, Pe, opBytes{0x51}}, 1481 {ASQRTPS, yxm, Pm, opBytes{0x51}}, 1482 {ASQRTSD, yxm, Pf2, opBytes{0x51}}, 1483 {ASQRTSS, yxm, Pf3, opBytes{0x51}}, 1484 {ASTC, ynone, Px, opBytes{0xf9}}, 1485 {ASTD, ynone, Px, opBytes{0xfd}}, 1486 {ASTI, ynone, Px, opBytes{0xfb}}, 1487 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, 1488 {ASTOSB, ynone, Pb, opBytes{0xaa}}, 1489 {ASTOSL, ynone, Px, opBytes{0xab}}, 1490 {ASTOSQ, ynone, Pw, opBytes{0xab}}, 1491 {ASTOSW, ynone, Pe, opBytes{0xab}}, 1492 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, 1493 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1494 {ASUBPD, yxm, Pe, opBytes{0x5c}}, 1495 {ASUBPS, yxm, Pm, opBytes{0x5c}}, 1496 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1497 {ASUBSD, yxm, Pf2, opBytes{0x5c}}, 1498 {ASUBSS, yxm, Pf3, opBytes{0x5c}}, 1499 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1500 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, 1501 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall 1502 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, 1503 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1504 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1505 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1506 {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}}, 1507 {obj.ATEXT, ytext, Px, opBytes{}}, 1508 {AUCOMISD, yxm, Pe, opBytes{0x2e}}, 1509 {AUCOMISS, yxm, Pm, opBytes{0x2e}}, 1510 {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, 1511 {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, 1512 {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, 1513 {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, 1514 {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}}, 1515 {AVERR, ydivl, Pm, opBytes{0x00, 04}}, 1516 {AVERW, ydivl, Pm, opBytes{0x00, 05}}, 1517 {AWAIT, ynone, Px, opBytes{0x9b}}, 1518 {AWORD, ybyte, Px, opBytes{2}}, 1519 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, 1520 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, 1521 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, 1522 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, 1523 {AXLAT, ynone, Px, opBytes{0xd7}}, 1524 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, 1525 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1526 {AXORPD, yxm, Pe, opBytes{0x57}}, 1527 {AXORPS, yxm, Pm, opBytes{0x57}}, 1528 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1529 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1530 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, 1531 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, 1532 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, 1533 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, 1534 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, 1535 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, 1536 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, 1537 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, 1538 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, 1539 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, 1540 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, 1541 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, 1542 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, 1543 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, 1544 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, 1545 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, 1546 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, 1547 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, 1548 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, 1549 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, 1550 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, 1551 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, 1552 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, 1553 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, 1554 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, 1555 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, 1556 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, 1557 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, 1558 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch 1559 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch 1560 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, 1561 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, 1562 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, 1563 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, 1564 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, 1565 {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, 1566 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, 1567 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, 1568 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, 1569 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, 1570 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, 1571 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, 1572 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, 1573 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, 1574 {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, 1575 {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, 1576 {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, 1577 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, 1578 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, 1579 {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, 1580 {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, 1581 {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, 1582 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, 1583 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, 1584 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, 1585 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, 1586 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, 1587 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, 1588 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, 1589 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, 1590 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, 1591 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, 1592 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, 1593 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, 1594 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, 1595 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, 1596 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, 1597 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, 1598 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, 1599 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, 1600 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, 1601 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, 1602 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, 1603 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, 1604 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, 1605 {AFFREE, nil, 0, opBytes{}}, 1606 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, 1607 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, 1608 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, 1609 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, 1610 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, 1611 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, 1612 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, 1613 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, 1614 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, 1615 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, 1616 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, 1617 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, 1618 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, 1619 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, 1620 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, 1621 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, 1622 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, 1623 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, 1624 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, 1625 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, 1626 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, 1627 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, 1628 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, 1629 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, 1630 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, 1631 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, 1632 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, 1633 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, 1634 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, 1635 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, 1636 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, 1637 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, 1638 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, 1639 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, 1640 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, 1641 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, 1642 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, 1643 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, 1644 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, 1645 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, 1646 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, 1647 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, 1648 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, 1649 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, 1650 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, 1651 {AINVD, ynone, Pm, opBytes{0x08}}, 1652 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, 1653 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, 1654 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, 1655 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, 1656 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, 1657 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, 1658 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, 1659 {ARDMSR, ynone, Pm, opBytes{0x32}}, 1660 {ARDPMC, ynone, Pm, opBytes{0x33}}, 1661 {ARDTSC, ynone, Pm, opBytes{0x31}}, 1662 {ARSM, ynone, Pm, opBytes{0xaa}}, 1663 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, 1664 {ASYSRET, ynone, Pm, opBytes{0x07}}, 1665 {AWBINVD, ynone, Pm, opBytes{0x09}}, 1666 {AWRMSR, ynone, Pm, opBytes{0x30}}, 1667 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, 1668 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, 1669 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, 1670 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, 1671 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, 1672 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, 1673 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1674 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1675 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1676 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, 1677 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, 1678 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, 1679 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, 1680 {AMOVQL, yrl_ml, Px, opBytes{0x89}}, 1681 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, 1682 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, 1683 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, 1684 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, 1685 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, 1686 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, 1687 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, 1688 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, 1689 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, 1690 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, 1691 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, 1692 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, 1693 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, 1694 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, 1695 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, 1696 {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, 1697 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, 1698 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, 1699 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, 1700 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, 1701 {AUD1, ynone, Pm, opBytes{0xb9, 0}}, 1702 {AUD2, ynone, Pm, opBytes{0x0b, 0}}, 1703 {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}}, 1704 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, 1705 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, 1706 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, 1707 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, 1708 {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, 1709 {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, 1710 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, 1711 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, 1712 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1713 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1714 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1715 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, 1716 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, 1717 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, 1718 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, 1719 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, 1720 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, 1721 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, 1722 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, 1723 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, 1724 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, 1725 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, 1726 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, 1727 {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, 1728 {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, 1729 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, 1730 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, 1731 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, 1732 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, 1733 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, 1734 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, 1735 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, 1736 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, 1737 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, 1738 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, 1739 {AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1740 {AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1741 {AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, 1742 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, 1743 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, 1744 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, 1745 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, 1746 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, 1747 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, 1748 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, 1749 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, 1750 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, 1751 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, 1752 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, 1753 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, 1754 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, 1755 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, 1756 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, 1757 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, 1758 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, 1759 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, 1760 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, 1761 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, 1762 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, 1763 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, 1764 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, 1765 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, 1766 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, 1767 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, 1768 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, 1769 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, 1770 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, 1771 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, 1772 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, 1773 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, 1774 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, 1775 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, 1776 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, 1777 {ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}}, 1778 1779 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, 1780 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, 1781 {AXACQUIRE, ynone, Px, opBytes{0xf2}}, 1782 {AXRELEASE, ynone, Px, opBytes{0xf3}}, 1783 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, 1784 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, 1785 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, 1786 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, 1787 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, 1788 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, 1789 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, 1790 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, 1791 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, 1792 1793 {obj.AEND, nil, 0, opBytes{}}, 1794 {0, nil, 0, opBytes{}}, 1795 } 1796 1797 var opindex [(ALAST + 1) & obj.AMask]*Optab 1798 1799 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing. 1800 // This happens on systems like Solaris that call .so functions instead of system calls. 1801 // It does not seem to be necessary for any other systems. This is probably working 1802 // around a Solaris-specific bug that should be fixed differently, but we don't know 1803 // what that bug is. And this does fix it. 1804 func useAbs(ctxt *obj.Link, s *obj.LSym) bool { 1805 if ctxt.Headtype == objabi.Hsolaris { 1806 // All the Solaris dynamic imports from libc.so begin with "libc_". 1807 return strings.HasPrefix(s.Name, "libc_") 1808 } 1809 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared 1810 } 1811 1812 // single-instruction no-ops of various lengths. 1813 // constructed by hand and disassembled with gdb to verify. 1814 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. 1815 var nop = [][16]uint8{ 1816 {0x90}, 1817 {0x66, 0x90}, 1818 {0x0F, 0x1F, 0x00}, 1819 {0x0F, 0x1F, 0x40, 0x00}, 1820 {0x0F, 0x1F, 0x44, 0x00, 0x00}, 1821 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, 1822 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, 1823 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1824 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1825 } 1826 1827 // Native Client rejects the repeated 0x66 prefix. 1828 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1829 func fillnop(p []byte, n int) { 1830 var m int 1831 1832 for n > 0 { 1833 m = n 1834 if m > len(nop) { 1835 m = len(nop) 1836 } 1837 copy(p[:m], nop[m-1][:m]) 1838 p = p[m:] 1839 n -= m 1840 } 1841 } 1842 1843 func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { 1844 s.Grow(int64(c) + int64(pad)) 1845 fillnop(s.P[c:], int(pad)) 1846 return c + pad 1847 } 1848 1849 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { 1850 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { 1851 return l 1852 } 1853 return q 1854 } 1855 1856 // isJump returns whether p is a jump instruction. 1857 // It is used to ensure that no standalone or macro-fused jump will straddle 1858 // or end on a 32 byte boundary by inserting NOPs before the jumps. 1859 func isJump(p *obj.Prog) bool { 1860 return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || 1861 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO 1862 } 1863 1864 // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional 1865 // jump. Otherwise, nil is returned. 1866 func lookForJCC(p *obj.Prog) *obj.Prog { 1867 // Skip any PCDATA, FUNCDATA or NOP instructions 1868 var q *obj.Prog 1869 for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link { 1870 } 1871 1872 if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL { 1873 return nil 1874 } 1875 1876 switch q.As { 1877 case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI, 1878 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT: 1879 default: 1880 return nil 1881 } 1882 1883 return q 1884 } 1885 1886 // fusedJump determines whether p can be fused with a subsequent conditional jump instruction. 1887 // If it can, we return true followed by the total size of the fused jump. If it can't, we return false. 1888 // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2. 1889 func fusedJump(p *obj.Prog) (bool, uint8) { 1890 var fusedSize uint8 1891 1892 // The first instruction in a macro fused pair may be preceded by the LOCK prefix, 1893 // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we 1894 // need to be careful to insert any padding before the locks rather than directly after them. 1895 1896 if p.As == AXRELEASE || p.As == AXACQUIRE { 1897 fusedSize += p.Isize 1898 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1899 } 1900 if p == nil { 1901 return false, 0 1902 } 1903 } 1904 if p.As == ALOCK { 1905 fusedSize += p.Isize 1906 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1907 } 1908 if p == nil { 1909 return false, 0 1910 } 1911 } 1912 cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW 1913 1914 cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ || 1915 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp 1916 1917 testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW || 1918 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW 1919 1920 incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW || 1921 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW 1922 1923 if !cmpAddSub && !testAnd && !incDec { 1924 return false, 0 1925 } 1926 1927 if !incDec { 1928 var argOne obj.AddrType 1929 var argTwo obj.AddrType 1930 if cmp { 1931 argOne = p.From.Type 1932 argTwo = p.To.Type 1933 } else { 1934 argOne = p.To.Type 1935 argTwo = p.From.Type 1936 } 1937 if argOne == obj.TYPE_REG { 1938 if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM { 1939 return false, 0 1940 } 1941 } else if argOne == obj.TYPE_MEM { 1942 if argTwo != obj.TYPE_REG { 1943 return false, 0 1944 } 1945 } else { 1946 return false, 0 1947 } 1948 } 1949 1950 fusedSize += p.Isize 1951 jmp := lookForJCC(p) 1952 if jmp == nil { 1953 return false, 0 1954 } 1955 1956 fusedSize += jmp.Isize 1957 1958 if testAnd { 1959 return true, fusedSize 1960 } 1961 1962 if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI || 1963 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC { 1964 return false, 0 1965 } 1966 1967 if cmpAddSub { 1968 return true, fusedSize 1969 } 1970 1971 if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS { 1972 return false, 0 1973 } 1974 1975 return true, fusedSize 1976 } 1977 1978 type padJumpsCtx int32 1979 1980 func makePjcCtx(ctxt *obj.Link) padJumpsCtx { 1981 // Disable jump padding on 32 bit builds by setting 1982 // padJumps to 0. 1983 if ctxt.Arch.Family == sys.I386 { 1984 return padJumpsCtx(0) 1985 } 1986 1987 // Disable jump padding for hand written assembly code. 1988 if ctxt.IsAsm { 1989 return padJumpsCtx(0) 1990 } 1991 1992 return padJumpsCtx(32) 1993 } 1994 1995 // padJump detects whether the instruction being assembled is a standalone or a macro-fused 1996 // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does 1997 // not cross or end on a 32 byte boundary. 1998 func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 { 1999 if pjc == 0 { 2000 return c 2001 } 2002 2003 var toPad int32 2004 fj, fjSize := fusedJump(p) 2005 mask := int32(pjc - 1) 2006 if fj { 2007 if (c&mask)+int32(fjSize) >= int32(pjc) { 2008 toPad = int32(pjc) - (c & mask) 2009 } 2010 } else if isJump(p) { 2011 if (c&mask)+int32(p.Isize) >= int32(pjc) { 2012 toPad = int32(pjc) - (c & mask) 2013 } 2014 } 2015 if toPad <= 0 { 2016 return c 2017 } 2018 2019 return noppad(ctxt, s, c, toPad) 2020 } 2021 2022 // reAssemble is called if an instruction's size changes during assembly. If 2023 // it does and the instruction is a standalone or a macro-fused jump we need to 2024 // reassemble. 2025 func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool { 2026 if pjc == 0 { 2027 return false 2028 } 2029 2030 fj, _ := fusedJump(p) 2031 return fj || isJump(p) 2032 } 2033 2034 type nopPad struct { 2035 p *obj.Prog // Instruction before the pad 2036 n int32 // Size of the pad 2037 } 2038 2039 // Padding bytes to add to align code as requested. 2040 // Alignment is restricted to powers of 2 between 8 and 2048 inclusive. 2041 // 2042 // pc: current offset in function, in bytes 2043 // a: requested alignment, in bytes 2044 // cursym: current function being assembled 2045 // returns number of bytes of padding needed 2046 func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { 2047 if !((a&(a-1) == 0) && 8 <= a && a <= 2048) { 2048 ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a) 2049 return 0 2050 } 2051 2052 // By default function alignment is 32 bytes for amd64 2053 if cursym.Func().Align < int32(a) { 2054 cursym.Func().Align = int32(a) 2055 } 2056 2057 if pc&(a-1) != 0 { 2058 return int(a - (pc & (a - 1))) 2059 } 2060 2061 return 0 2062 } 2063 2064 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { 2065 if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 { 2066 ctxt.Diag("-spectre=ret not supported on 386") 2067 ctxt.Retpoline = false // don't keep printing 2068 } 2069 2070 pjc := makePjcCtx(ctxt) 2071 2072 if s.P != nil { 2073 return 2074 } 2075 2076 if ycover[0] == 0 { 2077 ctxt.Diag("x86 tables not initialized, call x86.instinit first") 2078 } 2079 2080 for p := s.Func().Text; p != nil; p = p.Link { 2081 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { 2082 p.To.SetTarget(p) 2083 } 2084 if p.As == AADJSP { 2085 p.To.Type = obj.TYPE_REG 2086 p.To.Reg = REG_SP 2087 // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive. 2088 // One exception: It is smaller to encode $-0x80 than $0x80. 2089 // For that case, flip the sign and the op: 2090 // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'. 2091 switch v := p.From.Offset; { 2092 case v == 0: 2093 p.As = obj.ANOP 2094 case v == 0x80 || (v < 0 && v != -0x80): 2095 p.As = spadjop(ctxt, AADDL, AADDQ) 2096 p.From.Offset *= -1 2097 default: 2098 p.As = spadjop(ctxt, ASUBL, ASUBQ) 2099 } 2100 } 2101 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) { 2102 if p.To.Type != obj.TYPE_REG { 2103 ctxt.Diag("non-retpoline-compatible: %v", p) 2104 continue 2105 } 2106 p.To.Type = obj.TYPE_BRANCH 2107 p.To.Name = obj.NAME_EXTERN 2108 p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg))) 2109 p.To.Reg = 0 2110 p.To.Offset = 0 2111 } 2112 } 2113 2114 var count int64 // rough count of number of instructions 2115 for p := s.Func().Text; p != nil; p = p.Link { 2116 count++ 2117 p.Back = branchShort // use short branches first time through 2118 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { 2119 p.Back |= branchBackwards 2120 q.Back |= branchLoopHead 2121 } 2122 } 2123 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction 2124 2125 var ab AsmBuf 2126 var n int 2127 var c int32 2128 errors := ctxt.Errors 2129 var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) 2130 nrelocs0 := len(s.R) 2131 for { 2132 // This loop continues while there are reasons to re-assemble 2133 // whole block, like the presence of long forward jumps. 2134 reAssemble := false 2135 for i := range s.R[nrelocs0:] { 2136 s.R[nrelocs0+i] = obj.Reloc{} 2137 } 2138 s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler 2139 s.P = s.P[:0] 2140 c = 0 2141 var pPrev *obj.Prog 2142 nops = nops[:0] 2143 for p := s.Func().Text; p != nil; p = p.Link { 2144 c0 := c 2145 c = pjc.padJump(ctxt, s, p, c) 2146 2147 if p.As == obj.APCALIGN { 2148 aln := p.From.Offset 2149 v := addpad(int64(c), aln, ctxt, s) 2150 if v > 0 { 2151 s.Grow(int64(c) + int64(v)) 2152 fillnop(s.P[c:], int(v)) 2153 } 2154 2155 c += int32(v) 2156 pPrev = p 2157 continue 2158 } 2159 2160 if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 { 2161 // pad with NOPs 2162 v := -c & (loopAlign - 1) 2163 2164 if v <= maxLoopPad { 2165 s.Grow(int64(c) + int64(v)) 2166 fillnop(s.P[c:], int(v)) 2167 c += v 2168 } 2169 } 2170 2171 p.Pc = int64(c) 2172 2173 // process forward jumps to p 2174 for q := p.Rel; q != nil; q = q.Forwd { 2175 v := int32(p.Pc - (q.Pc + int64(q.Isize))) 2176 if q.Back&branchShort != 0 { 2177 if v > 127 { 2178 reAssemble = true 2179 q.Back ^= branchShort 2180 } 2181 2182 if q.As == AJCXZL || q.As == AXBEGIN { 2183 s.P[q.Pc+2] = byte(v) 2184 } else { 2185 s.P[q.Pc+1] = byte(v) 2186 } 2187 } else { 2188 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) 2189 } 2190 } 2191 2192 p.Rel = nil 2193 2194 p.Pc = int64(c) 2195 ab.asmins(ctxt, s, p) 2196 m := ab.Len() 2197 if int(p.Isize) != m { 2198 p.Isize = uint8(m) 2199 if pjc.reAssemble(p) { 2200 // We need to re-assemble here to check for jumps and fused jumps 2201 // that span or end on 32 byte boundaries. 2202 reAssemble = true 2203 } 2204 } 2205 2206 s.Grow(p.Pc + int64(m)) 2207 copy(s.P[p.Pc:], ab.Bytes()) 2208 // If there was padding, remember it. 2209 if pPrev != nil && !ctxt.IsAsm && c > c0 { 2210 nops = append(nops, nopPad{p: pPrev, n: c - c0}) 2211 } 2212 c += int32(m) 2213 pPrev = p 2214 } 2215 2216 n++ 2217 if n > 1000 { 2218 ctxt.Diag("span must be looping") 2219 log.Fatalf("loop") 2220 } 2221 if !reAssemble { 2222 break 2223 } 2224 if ctxt.Errors > errors { 2225 return 2226 } 2227 } 2228 // splice padding nops into Progs 2229 for _, n := range nops { 2230 pp := n.p 2231 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)} 2232 pp.Link = np 2233 } 2234 2235 s.Size = int64(c) 2236 2237 if false { /* debug['a'] > 1 */ 2238 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) 2239 var i int 2240 for i = 0; i < len(s.P); i++ { 2241 fmt.Printf(" %.2x", s.P[i]) 2242 if i%16 == 15 { 2243 fmt.Printf("\n %.6x", uint(i+1)) 2244 } 2245 } 2246 2247 if i%16 != 0 { 2248 fmt.Printf("\n") 2249 } 2250 2251 for i := 0; i < len(s.R); i++ { 2252 r := &s.R[i] 2253 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) 2254 } 2255 } 2256 2257 // Mark nonpreemptible instruction sequences. 2258 // The 2-instruction TLS access sequence 2259 // MOVQ TLS, BX 2260 // MOVQ 0(BX)(TLS*1), BX 2261 // is not async preemptible, as if it is preempted and resumed on 2262 // a different thread, the TLS address may become invalid. 2263 if !CanUse1InsnTLS(ctxt) { 2264 useTLS := func(p *obj.Prog) bool { 2265 // Only need to mark the second instruction, which has 2266 // REG_TLS as Index. (It is okay to interrupt and restart 2267 // the first instruction.) 2268 return p.From.Index == REG_TLS 2269 } 2270 obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) 2271 } 2272 2273 // Now that we know byte offsets, we can generate jump table entries. 2274 // TODO: could this live in obj instead of obj/$ARCH? 2275 for _, jt := range s.Func().JumpTables { 2276 for i, p := range jt.Targets { 2277 // The ith jumptable entry points to the p.Pc'th 2278 // byte in the function symbol s. 2279 jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc) 2280 } 2281 } 2282 } 2283 2284 func instinit(ctxt *obj.Link) { 2285 if ycover[0] != 0 { 2286 // Already initialized; stop now. 2287 // This happens in the cmd/asm tests, 2288 // each of which re-initializes the arch. 2289 return 2290 } 2291 2292 switch ctxt.Headtype { 2293 case objabi.Hplan9: 2294 plan9privates = ctxt.Lookup("_privates") 2295 } 2296 2297 for i := range avxOptab { 2298 c := avxOptab[i].as 2299 if opindex[c&obj.AMask] != nil { 2300 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) 2301 } 2302 opindex[c&obj.AMask] = &avxOptab[i] 2303 } 2304 for i := 1; optab[i].as != 0; i++ { 2305 c := optab[i].as 2306 if opindex[c&obj.AMask] != nil { 2307 ctxt.Diag("phase error in optab: %d (%v)", i, c) 2308 } 2309 opindex[c&obj.AMask] = &optab[i] 2310 } 2311 2312 for i := 0; i < Ymax; i++ { 2313 ycover[i*Ymax+i] = 1 2314 } 2315 2316 ycover[Yi0*Ymax+Yu2] = 1 2317 ycover[Yi1*Ymax+Yu2] = 1 2318 2319 ycover[Yi0*Ymax+Yi8] = 1 2320 ycover[Yi1*Ymax+Yi8] = 1 2321 ycover[Yu2*Ymax+Yi8] = 1 2322 ycover[Yu7*Ymax+Yi8] = 1 2323 2324 ycover[Yi0*Ymax+Yu7] = 1 2325 ycover[Yi1*Ymax+Yu7] = 1 2326 ycover[Yu2*Ymax+Yu7] = 1 2327 2328 ycover[Yi0*Ymax+Yu8] = 1 2329 ycover[Yi1*Ymax+Yu8] = 1 2330 ycover[Yu2*Ymax+Yu8] = 1 2331 ycover[Yu7*Ymax+Yu8] = 1 2332 2333 ycover[Yi0*Ymax+Ys32] = 1 2334 ycover[Yi1*Ymax+Ys32] = 1 2335 ycover[Yu2*Ymax+Ys32] = 1 2336 ycover[Yu7*Ymax+Ys32] = 1 2337 ycover[Yu8*Ymax+Ys32] = 1 2338 ycover[Yi8*Ymax+Ys32] = 1 2339 2340 ycover[Yi0*Ymax+Yi32] = 1 2341 ycover[Yi1*Ymax+Yi32] = 1 2342 ycover[Yu2*Ymax+Yi32] = 1 2343 ycover[Yu7*Ymax+Yi32] = 1 2344 ycover[Yu8*Ymax+Yi32] = 1 2345 ycover[Yi8*Ymax+Yi32] = 1 2346 ycover[Ys32*Ymax+Yi32] = 1 2347 2348 ycover[Yi0*Ymax+Yi64] = 1 2349 ycover[Yi1*Ymax+Yi64] = 1 2350 ycover[Yu7*Ymax+Yi64] = 1 2351 ycover[Yu2*Ymax+Yi64] = 1 2352 ycover[Yu8*Ymax+Yi64] = 1 2353 ycover[Yi8*Ymax+Yi64] = 1 2354 ycover[Ys32*Ymax+Yi64] = 1 2355 ycover[Yi32*Ymax+Yi64] = 1 2356 2357 ycover[Yal*Ymax+Yrb] = 1 2358 ycover[Ycl*Ymax+Yrb] = 1 2359 ycover[Yax*Ymax+Yrb] = 1 2360 ycover[Ycx*Ymax+Yrb] = 1 2361 ycover[Yrx*Ymax+Yrb] = 1 2362 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 2363 2364 ycover[Ycl*Ymax+Ycx] = 1 2365 2366 ycover[Yax*Ymax+Yrx] = 1 2367 ycover[Ycx*Ymax+Yrx] = 1 2368 2369 ycover[Yax*Ymax+Yrl] = 1 2370 ycover[Ycx*Ymax+Yrl] = 1 2371 ycover[Yrx*Ymax+Yrl] = 1 2372 ycover[Yrl32*Ymax+Yrl] = 1 2373 2374 ycover[Yf0*Ymax+Yrf] = 1 2375 2376 ycover[Yal*Ymax+Ymb] = 1 2377 ycover[Ycl*Ymax+Ymb] = 1 2378 ycover[Yax*Ymax+Ymb] = 1 2379 ycover[Ycx*Ymax+Ymb] = 1 2380 ycover[Yrx*Ymax+Ymb] = 1 2381 ycover[Yrb*Ymax+Ymb] = 1 2382 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 2383 ycover[Ym*Ymax+Ymb] = 1 2384 2385 ycover[Yax*Ymax+Yml] = 1 2386 ycover[Ycx*Ymax+Yml] = 1 2387 ycover[Yrx*Ymax+Yml] = 1 2388 ycover[Yrl*Ymax+Yml] = 1 2389 ycover[Yrl32*Ymax+Yml] = 1 2390 ycover[Ym*Ymax+Yml] = 1 2391 2392 ycover[Yax*Ymax+Ymm] = 1 2393 ycover[Ycx*Ymax+Ymm] = 1 2394 ycover[Yrx*Ymax+Ymm] = 1 2395 ycover[Yrl*Ymax+Ymm] = 1 2396 ycover[Yrl32*Ymax+Ymm] = 1 2397 ycover[Ym*Ymax+Ymm] = 1 2398 ycover[Ymr*Ymax+Ymm] = 1 2399 2400 ycover[Yxr0*Ymax+Yxr] = 1 2401 2402 ycover[Ym*Ymax+Yxm] = 1 2403 ycover[Yxr0*Ymax+Yxm] = 1 2404 ycover[Yxr*Ymax+Yxm] = 1 2405 2406 ycover[Ym*Ymax+Yym] = 1 2407 ycover[Yyr*Ymax+Yym] = 1 2408 2409 ycover[Yxr0*Ymax+YxrEvex] = 1 2410 ycover[Yxr*Ymax+YxrEvex] = 1 2411 2412 ycover[Ym*Ymax+YxmEvex] = 1 2413 ycover[Yxr0*Ymax+YxmEvex] = 1 2414 ycover[Yxr*Ymax+YxmEvex] = 1 2415 ycover[YxrEvex*Ymax+YxmEvex] = 1 2416 2417 ycover[Yyr*Ymax+YyrEvex] = 1 2418 2419 ycover[Ym*Ymax+YymEvex] = 1 2420 ycover[Yyr*Ymax+YymEvex] = 1 2421 ycover[YyrEvex*Ymax+YymEvex] = 1 2422 2423 ycover[Ym*Ymax+Yzm] = 1 2424 ycover[Yzr*Ymax+Yzm] = 1 2425 2426 ycover[Yk0*Ymax+Yk] = 1 2427 ycover[Yknot0*Ymax+Yk] = 1 2428 2429 ycover[Yk0*Ymax+Ykm] = 1 2430 ycover[Yknot0*Ymax+Ykm] = 1 2431 ycover[Yk*Ymax+Ykm] = 1 2432 ycover[Ym*Ymax+Ykm] = 1 2433 2434 ycover[Yxvm*Ymax+YxvmEvex] = 1 2435 2436 ycover[Yyvm*Ymax+YyvmEvex] = 1 2437 2438 for i := 0; i < MAXREG; i++ { 2439 reg[i] = -1 2440 if i >= REG_AL && i <= REG_R15B { 2441 reg[i] = (i - REG_AL) & 7 2442 if i >= REG_SPB && i <= REG_DIB { 2443 regrex[i] = 0x40 2444 } 2445 if i >= REG_R8B && i <= REG_R15B { 2446 regrex[i] = Rxr | Rxx | Rxb 2447 } 2448 } 2449 2450 if i >= REG_AH && i <= REG_BH { 2451 reg[i] = 4 + ((i - REG_AH) & 7) 2452 } 2453 if i >= REG_AX && i <= REG_R15 { 2454 reg[i] = (i - REG_AX) & 7 2455 if i >= REG_R8 { 2456 regrex[i] = Rxr | Rxx | Rxb 2457 } 2458 } 2459 2460 if i >= REG_F0 && i <= REG_F0+7 { 2461 reg[i] = (i - REG_F0) & 7 2462 } 2463 if i >= REG_M0 && i <= REG_M0+7 { 2464 reg[i] = (i - REG_M0) & 7 2465 } 2466 if i >= REG_K0 && i <= REG_K0+7 { 2467 reg[i] = (i - REG_K0) & 7 2468 } 2469 if i >= REG_X0 && i <= REG_X0+15 { 2470 reg[i] = (i - REG_X0) & 7 2471 if i >= REG_X0+8 { 2472 regrex[i] = Rxr | Rxx | Rxb 2473 } 2474 } 2475 if i >= REG_X16 && i <= REG_X16+15 { 2476 reg[i] = (i - REG_X16) & 7 2477 if i >= REG_X16+8 { 2478 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2479 } else { 2480 regrex[i] = RxrEvex 2481 } 2482 } 2483 if i >= REG_Y0 && i <= REG_Y0+15 { 2484 reg[i] = (i - REG_Y0) & 7 2485 if i >= REG_Y0+8 { 2486 regrex[i] = Rxr | Rxx | Rxb 2487 } 2488 } 2489 if i >= REG_Y16 && i <= REG_Y16+15 { 2490 reg[i] = (i - REG_Y16) & 7 2491 if i >= REG_Y16+8 { 2492 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2493 } else { 2494 regrex[i] = RxrEvex 2495 } 2496 } 2497 if i >= REG_Z0 && i <= REG_Z0+15 { 2498 reg[i] = (i - REG_Z0) & 7 2499 if i > REG_Z0+7 { 2500 regrex[i] = Rxr | Rxx | Rxb 2501 } 2502 } 2503 if i >= REG_Z16 && i <= REG_Z16+15 { 2504 reg[i] = (i - REG_Z16) & 7 2505 if i >= REG_Z16+8 { 2506 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2507 } else { 2508 regrex[i] = RxrEvex 2509 } 2510 } 2511 2512 if i >= REG_CR+8 && i <= REG_CR+15 { 2513 regrex[i] = Rxr 2514 } 2515 } 2516 } 2517 2518 var isAndroid = buildcfg.GOOS == "android" 2519 2520 func prefixof(ctxt *obj.Link, a *obj.Addr) int { 2521 if a.Reg < REG_CS && a.Index < REG_CS { // fast path 2522 return 0 2523 } 2524 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 2525 switch a.Reg { 2526 case REG_CS: 2527 return 0x2e 2528 2529 case REG_DS: 2530 return 0x3e 2531 2532 case REG_ES: 2533 return 0x26 2534 2535 case REG_FS: 2536 return 0x64 2537 2538 case REG_GS: 2539 return 0x65 2540 2541 case REG_TLS: 2542 // NOTE: Systems listed here should be only systems that 2543 // support direct TLS references like 8(TLS) implemented as 2544 // direct references from FS or GS. Systems that require 2545 // the initial-exec model, where you load the TLS base into 2546 // a register and then index from that register, do not reach 2547 // this code and should not be listed. 2548 if ctxt.Arch.Family == sys.I386 { 2549 switch ctxt.Headtype { 2550 default: 2551 if isAndroid { 2552 return 0x65 // GS 2553 } 2554 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2555 2556 case objabi.Hdarwin, 2557 objabi.Hdragonfly, 2558 objabi.Hfreebsd, 2559 objabi.Hnetbsd, 2560 objabi.Hopenbsd: 2561 return 0x65 // GS 2562 } 2563 } 2564 2565 switch ctxt.Headtype { 2566 default: 2567 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2568 2569 case objabi.Hlinux: 2570 if isAndroid { 2571 return 0x64 // FS 2572 } 2573 2574 if ctxt.Flag_shared { 2575 log.Fatalf("unknown TLS base register for linux with -shared") 2576 } else { 2577 return 0x64 // FS 2578 } 2579 2580 case objabi.Hdragonfly, 2581 objabi.Hfreebsd, 2582 objabi.Hnetbsd, 2583 objabi.Hopenbsd, 2584 objabi.Hsolaris: 2585 return 0x64 // FS 2586 2587 case objabi.Hdarwin: 2588 return 0x65 // GS 2589 } 2590 } 2591 } 2592 2593 switch a.Index { 2594 case REG_CS: 2595 return 0x2e 2596 2597 case REG_DS: 2598 return 0x3e 2599 2600 case REG_ES: 2601 return 0x26 2602 2603 case REG_TLS: 2604 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { 2605 // When building for inclusion into a shared library, an instruction of the form 2606 // MOV off(CX)(TLS*1), AX 2607 // becomes 2608 // mov %gs:off(%ecx), %eax // on i386 2609 // mov %fs:off(%rcx), %rax // on amd64 2610 // which assumes that the correct TLS offset has been loaded into CX (today 2611 // there is only one TLS variable -- g -- so this is OK). When not building for 2612 // a shared library the instruction it becomes 2613 // mov 0x0(%ecx), %eax // on i386 2614 // mov 0x0(%rcx), %rax // on amd64 2615 // and a R_TLS_LE relocation, and so does not require a prefix. 2616 if ctxt.Arch.Family == sys.I386 { 2617 return 0x65 // GS 2618 } 2619 return 0x64 // FS 2620 } 2621 2622 case REG_FS: 2623 return 0x64 2624 2625 case REG_GS: 2626 return 0x65 2627 } 2628 2629 return 0 2630 } 2631 2632 // oclassRegList returns multisource operand class for addr. 2633 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { 2634 // TODO(quasilyte): when oclass register case is refactored into 2635 // lookup table, use it here to get register kind more easily. 2636 // Helper functions like regIsXmm should go away too (they will become redundant). 2637 2638 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } 2639 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } 2640 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } 2641 2642 reg0, reg1 := decodeRegisterRange(addr.Offset) 2643 low := regIndex(int16(reg0)) 2644 high := regIndex(int16(reg1)) 2645 2646 if ctxt.Arch.Family == sys.I386 { 2647 if low >= 8 || high >= 8 { 2648 return Yxxx 2649 } 2650 } 2651 2652 switch high - low { 2653 case 3: 2654 switch { 2655 case regIsXmm(reg0) && regIsXmm(reg1): 2656 return YxrEvexMulti4 2657 case regIsYmm(reg0) && regIsYmm(reg1): 2658 return YyrEvexMulti4 2659 case regIsZmm(reg0) && regIsZmm(reg1): 2660 return YzrMulti4 2661 default: 2662 return Yxxx 2663 } 2664 default: 2665 return Yxxx 2666 } 2667 } 2668 2669 // oclassVMem returns V-mem (vector memory with VSIB) operand class. 2670 // For addr that is not V-mem returns (Yxxx, false). 2671 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { 2672 switch addr.Index { 2673 case REG_X0 + 0, 2674 REG_X0 + 1, 2675 REG_X0 + 2, 2676 REG_X0 + 3, 2677 REG_X0 + 4, 2678 REG_X0 + 5, 2679 REG_X0 + 6, 2680 REG_X0 + 7: 2681 return Yxvm, true 2682 case REG_X8 + 0, 2683 REG_X8 + 1, 2684 REG_X8 + 2, 2685 REG_X8 + 3, 2686 REG_X8 + 4, 2687 REG_X8 + 5, 2688 REG_X8 + 6, 2689 REG_X8 + 7: 2690 if ctxt.Arch.Family == sys.I386 { 2691 return Yxxx, true 2692 } 2693 return Yxvm, true 2694 case REG_X16 + 0, 2695 REG_X16 + 1, 2696 REG_X16 + 2, 2697 REG_X16 + 3, 2698 REG_X16 + 4, 2699 REG_X16 + 5, 2700 REG_X16 + 6, 2701 REG_X16 + 7, 2702 REG_X16 + 8, 2703 REG_X16 + 9, 2704 REG_X16 + 10, 2705 REG_X16 + 11, 2706 REG_X16 + 12, 2707 REG_X16 + 13, 2708 REG_X16 + 14, 2709 REG_X16 + 15: 2710 if ctxt.Arch.Family == sys.I386 { 2711 return Yxxx, true 2712 } 2713 return YxvmEvex, true 2714 2715 case REG_Y0 + 0, 2716 REG_Y0 + 1, 2717 REG_Y0 + 2, 2718 REG_Y0 + 3, 2719 REG_Y0 + 4, 2720 REG_Y0 + 5, 2721 REG_Y0 + 6, 2722 REG_Y0 + 7: 2723 return Yyvm, true 2724 case REG_Y8 + 0, 2725 REG_Y8 + 1, 2726 REG_Y8 + 2, 2727 REG_Y8 + 3, 2728 REG_Y8 + 4, 2729 REG_Y8 + 5, 2730 REG_Y8 + 6, 2731 REG_Y8 + 7: 2732 if ctxt.Arch.Family == sys.I386 { 2733 return Yxxx, true 2734 } 2735 return Yyvm, true 2736 case REG_Y16 + 0, 2737 REG_Y16 + 1, 2738 REG_Y16 + 2, 2739 REG_Y16 + 3, 2740 REG_Y16 + 4, 2741 REG_Y16 + 5, 2742 REG_Y16 + 6, 2743 REG_Y16 + 7, 2744 REG_Y16 + 8, 2745 REG_Y16 + 9, 2746 REG_Y16 + 10, 2747 REG_Y16 + 11, 2748 REG_Y16 + 12, 2749 REG_Y16 + 13, 2750 REG_Y16 + 14, 2751 REG_Y16 + 15: 2752 if ctxt.Arch.Family == sys.I386 { 2753 return Yxxx, true 2754 } 2755 return YyvmEvex, true 2756 2757 case REG_Z0 + 0, 2758 REG_Z0 + 1, 2759 REG_Z0 + 2, 2760 REG_Z0 + 3, 2761 REG_Z0 + 4, 2762 REG_Z0 + 5, 2763 REG_Z0 + 6, 2764 REG_Z0 + 7: 2765 return Yzvm, true 2766 case REG_Z8 + 0, 2767 REG_Z8 + 1, 2768 REG_Z8 + 2, 2769 REG_Z8 + 3, 2770 REG_Z8 + 4, 2771 REG_Z8 + 5, 2772 REG_Z8 + 6, 2773 REG_Z8 + 7, 2774 REG_Z8 + 8, 2775 REG_Z8 + 9, 2776 REG_Z8 + 10, 2777 REG_Z8 + 11, 2778 REG_Z8 + 12, 2779 REG_Z8 + 13, 2780 REG_Z8 + 14, 2781 REG_Z8 + 15, 2782 REG_Z8 + 16, 2783 REG_Z8 + 17, 2784 REG_Z8 + 18, 2785 REG_Z8 + 19, 2786 REG_Z8 + 20, 2787 REG_Z8 + 21, 2788 REG_Z8 + 22, 2789 REG_Z8 + 23: 2790 if ctxt.Arch.Family == sys.I386 { 2791 return Yxxx, true 2792 } 2793 return Yzvm, true 2794 } 2795 2796 return Yxxx, false 2797 } 2798 2799 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { 2800 switch a.Type { 2801 case obj.TYPE_REGLIST: 2802 return oclassRegList(ctxt, a) 2803 2804 case obj.TYPE_NONE: 2805 return Ynone 2806 2807 case obj.TYPE_BRANCH: 2808 return Ybr 2809 2810 case obj.TYPE_INDIR: 2811 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { 2812 return Yindir 2813 } 2814 return Yxxx 2815 2816 case obj.TYPE_MEM: 2817 // Pseudo registers have negative index, but SP is 2818 // not pseudo on x86, hence REG_SP check is not redundant. 2819 if a.Index == REG_SP || a.Index < 0 { 2820 // Can't use FP/SB/PC/SP as the index register. 2821 return Yxxx 2822 } 2823 2824 if vmem, ok := oclassVMem(ctxt, a); ok { 2825 return vmem 2826 } 2827 2828 if ctxt.Arch.Family == sys.AMD64 { 2829 switch a.Name { 2830 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: 2831 // Global variables can't use index registers and their 2832 // base register is %rip (%rip is encoded as REG_NONE). 2833 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { 2834 return Yxxx 2835 } 2836 case obj.NAME_AUTO, obj.NAME_PARAM: 2837 // These names must have a base of SP. The old compiler 2838 // uses 0 for the base register. SSA uses REG_SP. 2839 if a.Reg != REG_SP && a.Reg != 0 { 2840 return Yxxx 2841 } 2842 case obj.NAME_NONE: 2843 // everything is ok 2844 default: 2845 // unknown name 2846 return Yxxx 2847 } 2848 } 2849 return Ym 2850 2851 case obj.TYPE_ADDR: 2852 switch a.Name { 2853 case obj.NAME_GOTREF: 2854 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") 2855 return Yxxx 2856 2857 case obj.NAME_EXTERN, 2858 obj.NAME_STATIC: 2859 if a.Sym != nil && useAbs(ctxt, a.Sym) { 2860 return Yi32 2861 } 2862 return Yiauto // use pc-relative addressing 2863 2864 case obj.NAME_AUTO, 2865 obj.NAME_PARAM: 2866 return Yiauto 2867 } 2868 2869 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index 2870 // and got Yi32 in an earlier version of this code. 2871 // Keep doing that until we fix yduff etc. 2872 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { 2873 return Yi32 2874 } 2875 2876 if a.Sym != nil || a.Name != obj.NAME_NONE { 2877 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) 2878 } 2879 fallthrough 2880 2881 case obj.TYPE_CONST: 2882 if a.Sym != nil { 2883 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) 2884 } 2885 2886 v := a.Offset 2887 if ctxt.Arch.Family == sys.I386 { 2888 v = int64(int32(v)) 2889 } 2890 switch { 2891 case v == 0: 2892 return Yi0 2893 case v == 1: 2894 return Yi1 2895 case v >= 0 && v <= 3: 2896 return Yu2 2897 case v >= 0 && v <= 127: 2898 return Yu7 2899 case v >= 0 && v <= 255: 2900 return Yu8 2901 case v >= -128 && v <= 127: 2902 return Yi8 2903 } 2904 if ctxt.Arch.Family == sys.I386 { 2905 return Yi32 2906 } 2907 l := int32(v) 2908 if int64(l) == v { 2909 return Ys32 // can sign extend 2910 } 2911 if v>>32 == 0 { 2912 return Yi32 // unsigned 2913 } 2914 return Yi64 2915 2916 case obj.TYPE_TEXTSIZE: 2917 return Ytextsize 2918 } 2919 2920 if a.Type != obj.TYPE_REG { 2921 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) 2922 return Yxxx 2923 } 2924 2925 switch a.Reg { 2926 case REG_AL: 2927 return Yal 2928 2929 case REG_AX: 2930 return Yax 2931 2932 /* 2933 case REG_SPB: 2934 */ 2935 case REG_BPB, 2936 REG_SIB, 2937 REG_DIB, 2938 REG_R8B, 2939 REG_R9B, 2940 REG_R10B, 2941 REG_R11B, 2942 REG_R12B, 2943 REG_R13B, 2944 REG_R14B, 2945 REG_R15B: 2946 if ctxt.Arch.Family == sys.I386 { 2947 return Yxxx 2948 } 2949 fallthrough 2950 2951 case REG_DL, 2952 REG_BL, 2953 REG_AH, 2954 REG_CH, 2955 REG_DH, 2956 REG_BH: 2957 return Yrb 2958 2959 case REG_CL: 2960 return Ycl 2961 2962 case REG_CX: 2963 return Ycx 2964 2965 case REG_DX, REG_BX: 2966 return Yrx 2967 2968 case REG_R8, // not really Yrl 2969 REG_R9, 2970 REG_R10, 2971 REG_R11, 2972 REG_R12, 2973 REG_R13, 2974 REG_R14, 2975 REG_R15: 2976 if ctxt.Arch.Family == sys.I386 { 2977 return Yxxx 2978 } 2979 fallthrough 2980 2981 case REG_SP, REG_BP, REG_SI, REG_DI: 2982 if ctxt.Arch.Family == sys.I386 { 2983 return Yrl32 2984 } 2985 return Yrl 2986 2987 case REG_F0 + 0: 2988 return Yf0 2989 2990 case REG_F0 + 1, 2991 REG_F0 + 2, 2992 REG_F0 + 3, 2993 REG_F0 + 4, 2994 REG_F0 + 5, 2995 REG_F0 + 6, 2996 REG_F0 + 7: 2997 return Yrf 2998 2999 case REG_M0 + 0, 3000 REG_M0 + 1, 3001 REG_M0 + 2, 3002 REG_M0 + 3, 3003 REG_M0 + 4, 3004 REG_M0 + 5, 3005 REG_M0 + 6, 3006 REG_M0 + 7: 3007 return Ymr 3008 3009 case REG_X0: 3010 return Yxr0 3011 3012 case REG_X0 + 1, 3013 REG_X0 + 2, 3014 REG_X0 + 3, 3015 REG_X0 + 4, 3016 REG_X0 + 5, 3017 REG_X0 + 6, 3018 REG_X0 + 7, 3019 REG_X0 + 8, 3020 REG_X0 + 9, 3021 REG_X0 + 10, 3022 REG_X0 + 11, 3023 REG_X0 + 12, 3024 REG_X0 + 13, 3025 REG_X0 + 14, 3026 REG_X0 + 15: 3027 return Yxr 3028 3029 case REG_X0 + 16, 3030 REG_X0 + 17, 3031 REG_X0 + 18, 3032 REG_X0 + 19, 3033 REG_X0 + 20, 3034 REG_X0 + 21, 3035 REG_X0 + 22, 3036 REG_X0 + 23, 3037 REG_X0 + 24, 3038 REG_X0 + 25, 3039 REG_X0 + 26, 3040 REG_X0 + 27, 3041 REG_X0 + 28, 3042 REG_X0 + 29, 3043 REG_X0 + 30, 3044 REG_X0 + 31: 3045 return YxrEvex 3046 3047 case REG_Y0 + 0, 3048 REG_Y0 + 1, 3049 REG_Y0 + 2, 3050 REG_Y0 + 3, 3051 REG_Y0 + 4, 3052 REG_Y0 + 5, 3053 REG_Y0 + 6, 3054 REG_Y0 + 7, 3055 REG_Y0 + 8, 3056 REG_Y0 + 9, 3057 REG_Y0 + 10, 3058 REG_Y0 + 11, 3059 REG_Y0 + 12, 3060 REG_Y0 + 13, 3061 REG_Y0 + 14, 3062 REG_Y0 + 15: 3063 return Yyr 3064 3065 case REG_Y0 + 16, 3066 REG_Y0 + 17, 3067 REG_Y0 + 18, 3068 REG_Y0 + 19, 3069 REG_Y0 + 20, 3070 REG_Y0 + 21, 3071 REG_Y0 + 22, 3072 REG_Y0 + 23, 3073 REG_Y0 + 24, 3074 REG_Y0 + 25, 3075 REG_Y0 + 26, 3076 REG_Y0 + 27, 3077 REG_Y0 + 28, 3078 REG_Y0 + 29, 3079 REG_Y0 + 30, 3080 REG_Y0 + 31: 3081 return YyrEvex 3082 3083 case REG_Z0 + 0, 3084 REG_Z0 + 1, 3085 REG_Z0 + 2, 3086 REG_Z0 + 3, 3087 REG_Z0 + 4, 3088 REG_Z0 + 5, 3089 REG_Z0 + 6, 3090 REG_Z0 + 7: 3091 return Yzr 3092 3093 case REG_Z0 + 8, 3094 REG_Z0 + 9, 3095 REG_Z0 + 10, 3096 REG_Z0 + 11, 3097 REG_Z0 + 12, 3098 REG_Z0 + 13, 3099 REG_Z0 + 14, 3100 REG_Z0 + 15, 3101 REG_Z0 + 16, 3102 REG_Z0 + 17, 3103 REG_Z0 + 18, 3104 REG_Z0 + 19, 3105 REG_Z0 + 20, 3106 REG_Z0 + 21, 3107 REG_Z0 + 22, 3108 REG_Z0 + 23, 3109 REG_Z0 + 24, 3110 REG_Z0 + 25, 3111 REG_Z0 + 26, 3112 REG_Z0 + 27, 3113 REG_Z0 + 28, 3114 REG_Z0 + 29, 3115 REG_Z0 + 30, 3116 REG_Z0 + 31: 3117 if ctxt.Arch.Family == sys.I386 { 3118 return Yxxx 3119 } 3120 return Yzr 3121 3122 case REG_K0: 3123 return Yk0 3124 3125 case REG_K0 + 1, 3126 REG_K0 + 2, 3127 REG_K0 + 3, 3128 REG_K0 + 4, 3129 REG_K0 + 5, 3130 REG_K0 + 6, 3131 REG_K0 + 7: 3132 return Yknot0 3133 3134 case REG_CS: 3135 return Ycs 3136 case REG_SS: 3137 return Yss 3138 case REG_DS: 3139 return Yds 3140 case REG_ES: 3141 return Yes 3142 case REG_FS: 3143 return Yfs 3144 case REG_GS: 3145 return Ygs 3146 case REG_TLS: 3147 return Ytls 3148 3149 case REG_GDTR: 3150 return Ygdtr 3151 case REG_IDTR: 3152 return Yidtr 3153 case REG_LDTR: 3154 return Yldtr 3155 case REG_MSW: 3156 return Ymsw 3157 case REG_TASK: 3158 return Ytask 3159 3160 case REG_CR + 0: 3161 return Ycr0 3162 case REG_CR + 1: 3163 return Ycr1 3164 case REG_CR + 2: 3165 return Ycr2 3166 case REG_CR + 3: 3167 return Ycr3 3168 case REG_CR + 4: 3169 return Ycr4 3170 case REG_CR + 5: 3171 return Ycr5 3172 case REG_CR + 6: 3173 return Ycr6 3174 case REG_CR + 7: 3175 return Ycr7 3176 case REG_CR + 8: 3177 return Ycr8 3178 3179 case REG_DR + 0: 3180 return Ydr0 3181 case REG_DR + 1: 3182 return Ydr1 3183 case REG_DR + 2: 3184 return Ydr2 3185 case REG_DR + 3: 3186 return Ydr3 3187 case REG_DR + 4: 3188 return Ydr4 3189 case REG_DR + 5: 3190 return Ydr5 3191 case REG_DR + 6: 3192 return Ydr6 3193 case REG_DR + 7: 3194 return Ydr7 3195 3196 case REG_TR + 0: 3197 return Ytr0 3198 case REG_TR + 1: 3199 return Ytr1 3200 case REG_TR + 2: 3201 return Ytr2 3202 case REG_TR + 3: 3203 return Ytr3 3204 case REG_TR + 4: 3205 return Ytr4 3206 case REG_TR + 5: 3207 return Ytr5 3208 case REG_TR + 6: 3209 return Ytr6 3210 case REG_TR + 7: 3211 return Ytr7 3212 } 3213 3214 return Yxxx 3215 } 3216 3217 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into 3218 // and hold assembly state. 3219 type AsmBuf struct { 3220 buf [100]byte 3221 off int 3222 rexflag int 3223 vexflag bool // Per inst: true for VEX-encoded 3224 evexflag bool // Per inst: true for EVEX-encoded 3225 rep bool 3226 repn bool 3227 lock bool 3228 3229 evex evexBits // Initialized when evexflag is true 3230 } 3231 3232 // Put1 appends one byte to the end of the buffer. 3233 func (ab *AsmBuf) Put1(x byte) { 3234 ab.buf[ab.off] = x 3235 ab.off++ 3236 } 3237 3238 // Put2 appends two bytes to the end of the buffer. 3239 func (ab *AsmBuf) Put2(x, y byte) { 3240 ab.buf[ab.off+0] = x 3241 ab.buf[ab.off+1] = y 3242 ab.off += 2 3243 } 3244 3245 // Put3 appends three bytes to the end of the buffer. 3246 func (ab *AsmBuf) Put3(x, y, z byte) { 3247 ab.buf[ab.off+0] = x 3248 ab.buf[ab.off+1] = y 3249 ab.buf[ab.off+2] = z 3250 ab.off += 3 3251 } 3252 3253 // Put4 appends four bytes to the end of the buffer. 3254 func (ab *AsmBuf) Put4(x, y, z, w byte) { 3255 ab.buf[ab.off+0] = x 3256 ab.buf[ab.off+1] = y 3257 ab.buf[ab.off+2] = z 3258 ab.buf[ab.off+3] = w 3259 ab.off += 4 3260 } 3261 3262 // PutInt16 writes v into the buffer using little-endian encoding. 3263 func (ab *AsmBuf) PutInt16(v int16) { 3264 ab.buf[ab.off+0] = byte(v) 3265 ab.buf[ab.off+1] = byte(v >> 8) 3266 ab.off += 2 3267 } 3268 3269 // PutInt32 writes v into the buffer using little-endian encoding. 3270 func (ab *AsmBuf) PutInt32(v int32) { 3271 ab.buf[ab.off+0] = byte(v) 3272 ab.buf[ab.off+1] = byte(v >> 8) 3273 ab.buf[ab.off+2] = byte(v >> 16) 3274 ab.buf[ab.off+3] = byte(v >> 24) 3275 ab.off += 4 3276 } 3277 3278 // PutInt64 writes v into the buffer using little-endian encoding. 3279 func (ab *AsmBuf) PutInt64(v int64) { 3280 ab.buf[ab.off+0] = byte(v) 3281 ab.buf[ab.off+1] = byte(v >> 8) 3282 ab.buf[ab.off+2] = byte(v >> 16) 3283 ab.buf[ab.off+3] = byte(v >> 24) 3284 ab.buf[ab.off+4] = byte(v >> 32) 3285 ab.buf[ab.off+5] = byte(v >> 40) 3286 ab.buf[ab.off+6] = byte(v >> 48) 3287 ab.buf[ab.off+7] = byte(v >> 56) 3288 ab.off += 8 3289 } 3290 3291 // Put copies b into the buffer. 3292 func (ab *AsmBuf) Put(b []byte) { 3293 copy(ab.buf[ab.off:], b) 3294 ab.off += len(b) 3295 } 3296 3297 // PutOpBytesLit writes zero terminated sequence of bytes from op, 3298 // starting at specified offset (e.g. z counter value). 3299 // Trailing 0 is not written. 3300 // 3301 // Intended to be used for literal Z cases. 3302 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). 3303 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { 3304 for int(op[offset]) != 0 { 3305 ab.Put1(byte(op[offset])) 3306 offset++ 3307 } 3308 } 3309 3310 // Insert inserts b at offset i. 3311 func (ab *AsmBuf) Insert(i int, b byte) { 3312 ab.off++ 3313 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) 3314 ab.buf[i] = b 3315 } 3316 3317 // Last returns the byte at the end of the buffer. 3318 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } 3319 3320 // Len returns the length of the buffer. 3321 func (ab *AsmBuf) Len() int { return ab.off } 3322 3323 // Bytes returns the contents of the buffer. 3324 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } 3325 3326 // Reset empties the buffer. 3327 func (ab *AsmBuf) Reset() { ab.off = 0 } 3328 3329 // At returns the byte at offset i. 3330 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } 3331 3332 // asmidx emits SIB byte. 3333 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { 3334 var i int 3335 3336 // X/Y index register is used in VSIB. 3337 switch index { 3338 default: 3339 goto bad 3340 3341 case REG_NONE: 3342 i = 4 << 3 3343 goto bas 3344 3345 case REG_R8, 3346 REG_R9, 3347 REG_R10, 3348 REG_R11, 3349 REG_R12, 3350 REG_R13, 3351 REG_R14, 3352 REG_R15, 3353 REG_X8, 3354 REG_X9, 3355 REG_X10, 3356 REG_X11, 3357 REG_X12, 3358 REG_X13, 3359 REG_X14, 3360 REG_X15, 3361 REG_X16, 3362 REG_X17, 3363 REG_X18, 3364 REG_X19, 3365 REG_X20, 3366 REG_X21, 3367 REG_X22, 3368 REG_X23, 3369 REG_X24, 3370 REG_X25, 3371 REG_X26, 3372 REG_X27, 3373 REG_X28, 3374 REG_X29, 3375 REG_X30, 3376 REG_X31, 3377 REG_Y8, 3378 REG_Y9, 3379 REG_Y10, 3380 REG_Y11, 3381 REG_Y12, 3382 REG_Y13, 3383 REG_Y14, 3384 REG_Y15, 3385 REG_Y16, 3386 REG_Y17, 3387 REG_Y18, 3388 REG_Y19, 3389 REG_Y20, 3390 REG_Y21, 3391 REG_Y22, 3392 REG_Y23, 3393 REG_Y24, 3394 REG_Y25, 3395 REG_Y26, 3396 REG_Y27, 3397 REG_Y28, 3398 REG_Y29, 3399 REG_Y30, 3400 REG_Y31, 3401 REG_Z8, 3402 REG_Z9, 3403 REG_Z10, 3404 REG_Z11, 3405 REG_Z12, 3406 REG_Z13, 3407 REG_Z14, 3408 REG_Z15, 3409 REG_Z16, 3410 REG_Z17, 3411 REG_Z18, 3412 REG_Z19, 3413 REG_Z20, 3414 REG_Z21, 3415 REG_Z22, 3416 REG_Z23, 3417 REG_Z24, 3418 REG_Z25, 3419 REG_Z26, 3420 REG_Z27, 3421 REG_Z28, 3422 REG_Z29, 3423 REG_Z30, 3424 REG_Z31: 3425 if ctxt.Arch.Family == sys.I386 { 3426 goto bad 3427 } 3428 fallthrough 3429 3430 case REG_AX, 3431 REG_CX, 3432 REG_DX, 3433 REG_BX, 3434 REG_BP, 3435 REG_SI, 3436 REG_DI, 3437 REG_X0, 3438 REG_X1, 3439 REG_X2, 3440 REG_X3, 3441 REG_X4, 3442 REG_X5, 3443 REG_X6, 3444 REG_X7, 3445 REG_Y0, 3446 REG_Y1, 3447 REG_Y2, 3448 REG_Y3, 3449 REG_Y4, 3450 REG_Y5, 3451 REG_Y6, 3452 REG_Y7, 3453 REG_Z0, 3454 REG_Z1, 3455 REG_Z2, 3456 REG_Z3, 3457 REG_Z4, 3458 REG_Z5, 3459 REG_Z6, 3460 REG_Z7: 3461 i = reg[index] << 3 3462 } 3463 3464 switch scale { 3465 default: 3466 goto bad 3467 3468 case 1: 3469 break 3470 3471 case 2: 3472 i |= 1 << 6 3473 3474 case 4: 3475 i |= 2 << 6 3476 3477 case 8: 3478 i |= 3 << 6 3479 } 3480 3481 bas: 3482 switch base { 3483 default: 3484 goto bad 3485 3486 case REG_NONE: // must be mod=00 3487 i |= 5 3488 3489 case REG_R8, 3490 REG_R9, 3491 REG_R10, 3492 REG_R11, 3493 REG_R12, 3494 REG_R13, 3495 REG_R14, 3496 REG_R15: 3497 if ctxt.Arch.Family == sys.I386 { 3498 goto bad 3499 } 3500 fallthrough 3501 3502 case REG_AX, 3503 REG_CX, 3504 REG_DX, 3505 REG_BX, 3506 REG_SP, 3507 REG_BP, 3508 REG_SI, 3509 REG_DI: 3510 i |= reg[base] 3511 } 3512 3513 ab.Put1(byte(i)) 3514 return 3515 3516 bad: 3517 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) 3518 ab.Put1(0) 3519 } 3520 3521 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { 3522 var rel obj.Reloc 3523 3524 v := vaddr(ctxt, p, a, &rel) 3525 if rel.Siz != 0 { 3526 if rel.Siz != 4 { 3527 ctxt.Diag("bad reloc") 3528 } 3529 r := obj.Addrel(cursym) 3530 *r = rel 3531 r.Off = int32(p.Pc + int64(ab.Len())) 3532 } 3533 3534 ab.PutInt32(int32(v)) 3535 } 3536 3537 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { 3538 if r != nil { 3539 *r = obj.Reloc{} 3540 } 3541 3542 switch a.Name { 3543 case obj.NAME_STATIC, 3544 obj.NAME_GOTREF, 3545 obj.NAME_EXTERN: 3546 s := a.Sym 3547 if r == nil { 3548 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3549 log.Fatalf("reloc") 3550 } 3551 3552 if a.Name == obj.NAME_GOTREF { 3553 r.Siz = 4 3554 r.Type = objabi.R_GOTPCREL 3555 } else if useAbs(ctxt, s) { 3556 r.Siz = 4 3557 r.Type = objabi.R_ADDR 3558 } else { 3559 r.Siz = 4 3560 r.Type = objabi.R_PCREL 3561 } 3562 3563 r.Off = -1 // caller must fill in 3564 r.Sym = s 3565 r.Add = a.Offset 3566 3567 return 0 3568 } 3569 3570 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { 3571 if r == nil { 3572 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3573 log.Fatalf("reloc") 3574 } 3575 3576 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { 3577 r.Type = objabi.R_TLS_LE 3578 r.Siz = 4 3579 r.Off = -1 // caller must fill in 3580 r.Add = a.Offset 3581 } 3582 return 0 3583 } 3584 3585 return a.Offset 3586 } 3587 3588 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { 3589 var base int 3590 var rel obj.Reloc 3591 3592 rex &= 0x40 | Rxr 3593 if a.Offset != int64(int32(a.Offset)) { 3594 // The rules are slightly different for 386 and AMD64, 3595 // mostly for historical reasons. We may unify them later, 3596 // but it must be discussed beforehand. 3597 // 3598 // For 64bit mode only LEAL is allowed to overflow. 3599 // It's how https://golang.org/cl/59630 made it. 3600 // crypto/sha1/sha1block_amd64.s depends on this feature. 3601 // 3602 // For 32bit mode rules are more permissive. 3603 // If offset fits uint32, it's permitted. 3604 // This is allowed for assembly that wants to use 32-bit hex 3605 // constants, e.g. LEAL 0x99999999(AX), AX. 3606 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || 3607 (ctxt.Arch.Family != sys.AMD64 && 3608 int64(uint32(a.Offset)) == a.Offset && 3609 ab.rexflag&Rxw == 0) 3610 if !overflowOK { 3611 ctxt.Diag("offset too large in %s", p) 3612 } 3613 } 3614 v := int32(a.Offset) 3615 rel.Siz = 0 3616 3617 switch a.Type { 3618 case obj.TYPE_ADDR: 3619 if a.Name == obj.NAME_NONE { 3620 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") 3621 } 3622 if a.Index == REG_TLS { 3623 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") 3624 } 3625 goto bad 3626 3627 case obj.TYPE_REG: 3628 const regFirst = REG_AL 3629 const regLast = REG_Z31 3630 if a.Reg < regFirst || regLast < a.Reg { 3631 goto bad 3632 } 3633 if v != 0 { 3634 goto bad 3635 } 3636 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) 3637 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex 3638 return 3639 } 3640 3641 if a.Type != obj.TYPE_MEM { 3642 goto bad 3643 } 3644 3645 if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) { 3646 base := int(a.Reg) 3647 switch a.Name { 3648 case obj.NAME_EXTERN, 3649 obj.NAME_GOTREF, 3650 obj.NAME_STATIC: 3651 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { 3652 goto bad 3653 } 3654 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3655 // The base register has already been set. It holds the PC 3656 // of this instruction returned by a PC-reading thunk. 3657 // See obj6.go:rewriteToPcrel. 3658 } else { 3659 base = REG_NONE 3660 } 3661 v = int32(vaddr(ctxt, p, a, &rel)) 3662 3663 case obj.NAME_AUTO, 3664 obj.NAME_PARAM: 3665 base = REG_SP 3666 } 3667 3668 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex 3669 if base == REG_NONE { 3670 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3671 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3672 goto putrelv 3673 } 3674 3675 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3676 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3677 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3678 return 3679 } 3680 3681 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3682 ab.Put1(byte(1<<6 | 4<<0 | r<<3)) 3683 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3684 ab.Put1(disp8) 3685 return 3686 } 3687 3688 ab.Put1(byte(2<<6 | 4<<0 | r<<3)) 3689 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3690 goto putrelv 3691 } 3692 3693 base = int(a.Reg) 3694 switch a.Name { 3695 case obj.NAME_STATIC, 3696 obj.NAME_GOTREF, 3697 obj.NAME_EXTERN: 3698 if a.Sym == nil { 3699 ctxt.Diag("bad addr: %v", p) 3700 } 3701 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3702 // The base register has already been set. It holds the PC 3703 // of this instruction returned by a PC-reading thunk. 3704 // See obj6.go:rewriteToPcrel. 3705 } else { 3706 base = REG_NONE 3707 } 3708 v = int32(vaddr(ctxt, p, a, &rel)) 3709 3710 case obj.NAME_AUTO, 3711 obj.NAME_PARAM: 3712 base = REG_SP 3713 } 3714 3715 if base == REG_TLS { 3716 v = int32(vaddr(ctxt, p, a, &rel)) 3717 } 3718 3719 ab.rexflag |= regrex[base]&Rxb | rex 3720 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { 3721 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { 3722 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { 3723 ctxt.Diag("%v has offset against gotref", p) 3724 } 3725 ab.Put1(byte(0<<6 | 5<<0 | r<<3)) 3726 goto putrelv 3727 } 3728 3729 // temporary 3730 ab.Put2( 3731 byte(0<<6|4<<0|r<<3), // sib present 3732 0<<6|4<<3|5<<0, // DS:d32 3733 ) 3734 goto putrelv 3735 } 3736 3737 if base == REG_SP || base == REG_R12 { 3738 if v == 0 { 3739 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3740 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3741 return 3742 } 3743 3744 if disp8, ok := toDisp8(v, p, ab); ok { 3745 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) 3746 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3747 ab.Put1(disp8) 3748 return 3749 } 3750 3751 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3752 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3753 goto putrelv 3754 } 3755 3756 if REG_AX <= base && base <= REG_R15 { 3757 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid && 3758 ctxt.Headtype != objabi.Hwindows { 3759 rel = obj.Reloc{} 3760 rel.Type = objabi.R_TLS_LE 3761 rel.Siz = 4 3762 rel.Sym = nil 3763 rel.Add = int64(v) 3764 v = 0 3765 } 3766 3767 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3768 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3769 return 3770 } 3771 3772 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3773 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) 3774 return 3775 } 3776 3777 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3778 goto putrelv 3779 } 3780 3781 goto bad 3782 3783 putrelv: 3784 if rel.Siz != 0 { 3785 if rel.Siz != 4 { 3786 ctxt.Diag("bad rel") 3787 goto bad 3788 } 3789 3790 r := obj.Addrel(cursym) 3791 *r = rel 3792 r.Off = int32(p.Pc + int64(ab.Len())) 3793 } 3794 3795 ab.PutInt32(v) 3796 return 3797 3798 bad: 3799 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) 3800 } 3801 3802 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { 3803 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) 3804 } 3805 3806 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { 3807 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) 3808 } 3809 3810 func bytereg(a *obj.Addr, t *uint8) { 3811 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { 3812 a.Reg += REG_AL - REG_AX 3813 *t = 0 3814 } 3815 } 3816 3817 func unbytereg(a *obj.Addr, t *uint8) { 3818 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { 3819 a.Reg += REG_AX - REG_AL 3820 *t = 0 3821 } 3822 } 3823 3824 const ( 3825 movLit uint8 = iota // Like Zlit 3826 movRegMem 3827 movMemReg 3828 movRegMem2op 3829 movMemReg2op 3830 movFullPtr // Load full pointer, trash heap (unsupported) 3831 movDoubleShift 3832 movTLSReg 3833 ) 3834 3835 var ymovtab = []movtab{ 3836 // push 3837 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, 3838 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, 3839 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, 3840 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, 3841 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3842 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3843 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3844 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3845 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, 3846 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, 3847 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, 3848 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, 3849 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, 3850 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, 3851 3852 // pop 3853 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, 3854 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, 3855 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, 3856 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3857 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3858 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3859 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3860 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, 3861 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, 3862 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, 3863 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, 3864 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, 3865 3866 // mov seg 3867 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, 3868 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, 3869 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, 3870 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, 3871 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, 3872 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, 3873 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, 3874 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, 3875 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, 3876 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, 3877 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, 3878 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, 3879 3880 // mov cr 3881 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3882 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3883 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3884 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3885 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3886 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3887 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3888 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3889 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3890 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3891 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3892 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3893 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3894 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3895 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3896 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3897 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3898 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3899 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3900 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3901 3902 // mov dr 3903 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3904 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3905 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3906 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3907 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, 3908 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, 3909 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3910 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3911 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3912 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3913 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3914 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3915 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, 3916 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, 3917 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3918 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3919 3920 // mov tr 3921 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, 3922 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, 3923 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, 3924 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, 3925 3926 // lgdt, sgdt, lidt, sidt 3927 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3928 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3929 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3930 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3931 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3932 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3933 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3934 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3935 3936 // lldt, sldt 3937 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, 3938 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, 3939 3940 // lmsw, smsw 3941 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, 3942 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, 3943 3944 // ltr, str 3945 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, 3946 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, 3947 3948 /* load full pointer - unsupported 3949 {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, 3950 {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, 3951 */ 3952 3953 // double shift 3954 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3955 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3956 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3957 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3958 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3959 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3960 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3961 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3962 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3963 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3964 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3965 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3966 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3967 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3968 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3969 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3970 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3971 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3972 3973 // load TLS base 3974 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3975 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3976 {0, 0, 0, 0, 0, [4]uint8{}}, 3977 } 3978 3979 func isax(a *obj.Addr) bool { 3980 switch a.Reg { 3981 case REG_AX, REG_AL, REG_AH: 3982 return true 3983 } 3984 3985 return a.Index == REG_AX 3986 } 3987 3988 func subreg(p *obj.Prog, from int, to int) { 3989 if false { /* debug['Q'] */ 3990 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) 3991 } 3992 3993 if int(p.From.Reg) == from { 3994 p.From.Reg = int16(to) 3995 p.Ft = 0 3996 } 3997 3998 if int(p.To.Reg) == from { 3999 p.To.Reg = int16(to) 4000 p.Tt = 0 4001 } 4002 4003 if int(p.From.Index) == from { 4004 p.From.Index = int16(to) 4005 p.Ft = 0 4006 } 4007 4008 if int(p.To.Index) == from { 4009 p.To.Index = int16(to) 4010 p.Tt = 0 4011 } 4012 4013 if false { /* debug['Q'] */ 4014 fmt.Printf("%v\n", p) 4015 } 4016 } 4017 4018 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { 4019 switch op { 4020 case Pm, Pe, Pf2, Pf3: 4021 if osize != 1 { 4022 if op != Pm { 4023 ab.Put1(byte(op)) 4024 } 4025 ab.Put1(Pm) 4026 z++ 4027 op = int(o.op[z]) 4028 break 4029 } 4030 fallthrough 4031 4032 default: 4033 if ab.Len() == 0 || ab.Last() != Pm { 4034 ab.Put1(Pm) 4035 } 4036 } 4037 4038 ab.Put1(byte(op)) 4039 return z 4040 } 4041 4042 var bpduff1 = []byte{ 4043 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) 4044 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP 4045 } 4046 4047 var bpduff2 = []byte{ 4048 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP 4049 } 4050 4051 // asmevex emits EVEX pregis and opcode byte. 4052 // In addition to asmvex r/m, vvvv and reg fields also requires optional 4053 // K-masking register. 4054 // 4055 // Expects asmbuf.evex to be properly initialized. 4056 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { 4057 ab.evexflag = true 4058 evex := ab.evex 4059 4060 rexR := byte(1) 4061 evexR := byte(1) 4062 rexX := byte(1) 4063 rexB := byte(1) 4064 if r != nil { 4065 if regrex[r.Reg]&Rxr != 0 { 4066 rexR = 0 // "ModR/M.reg" selector 4th bit. 4067 } 4068 if regrex[r.Reg]&RxrEvex != 0 { 4069 evexR = 0 // "ModR/M.reg" selector 5th bit. 4070 } 4071 } 4072 if rm != nil { 4073 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { 4074 rexX = 0 4075 } else if regrex[rm.Index]&Rxx != 0 { 4076 rexX = 0 4077 } 4078 if regrex[rm.Reg]&Rxb != 0 { 4079 rexB = 0 4080 } 4081 } 4082 // P0 = [R][X][B][R'][00][mm] 4083 p0 := (rexR << 7) | 4084 (rexX << 6) | 4085 (rexB << 5) | 4086 (evexR << 4) | 4087 (0 << 2) | 4088 (evex.M() << 0) 4089 4090 vexV := byte(0) 4091 if v != nil { 4092 // 4bit-wide reg index. 4093 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4094 } 4095 vexV ^= 0x0F 4096 // P1 = [W][vvvv][1][pp] 4097 p1 := (evex.W() << 7) | 4098 (vexV << 3) | 4099 (1 << 2) | 4100 (evex.P() << 0) 4101 4102 suffix := evexSuffixMap[p.Scond] 4103 evexZ := byte(0) 4104 evexLL := evex.L() 4105 evexB := byte(0) 4106 evexV := byte(1) 4107 evexA := byte(0) 4108 if suffix.zeroing { 4109 if !evex.ZeroingEnabled() { 4110 ctxt.Diag("unsupported zeroing: %v", p) 4111 } 4112 if k == nil { 4113 // When you request zeroing you must specify a mask register. 4114 // See issue 57952. 4115 ctxt.Diag("mask register must be specified for .Z instructions: %v", p) 4116 } else if k.Reg == REG_K0 { 4117 // The mask register must not be K0. That restriction is already 4118 // handled by the Yknot0 restriction in the opcode tables, so we 4119 // won't ever reach here. But put something sensible here just in case. 4120 ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p) 4121 } 4122 evexZ = 1 4123 } 4124 switch { 4125 case suffix.rounding != rcUnset: 4126 if rm != nil && rm.Type == obj.TYPE_MEM { 4127 ctxt.Diag("illegal rounding with memory argument: %v", p) 4128 } else if !evex.RoundingEnabled() { 4129 ctxt.Diag("unsupported rounding: %v", p) 4130 } 4131 evexB = 1 4132 evexLL = suffix.rounding 4133 case suffix.broadcast: 4134 if rm == nil || rm.Type != obj.TYPE_MEM { 4135 ctxt.Diag("illegal broadcast without memory argument: %v", p) 4136 } else if !evex.BroadcastEnabled() { 4137 ctxt.Diag("unsupported broadcast: %v", p) 4138 } 4139 evexB = 1 4140 case suffix.sae: 4141 if rm != nil && rm.Type == obj.TYPE_MEM { 4142 ctxt.Diag("illegal SAE with memory argument: %v", p) 4143 } else if !evex.SaeEnabled() { 4144 ctxt.Diag("unsupported SAE: %v", p) 4145 } 4146 evexB = 1 4147 } 4148 if rm != nil && regrex[rm.Index]&RxrEvex != 0 { 4149 evexV = 0 4150 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { 4151 evexV = 0 // VSR selector 5th bit. 4152 } 4153 if k != nil { 4154 evexA = byte(reg[k.Reg]) 4155 } 4156 // P2 = [z][L'L][b][V'][aaa] 4157 p2 := (evexZ << 7) | 4158 (evexLL << 5) | 4159 (evexB << 4) | 4160 (evexV << 3) | 4161 (evexA << 0) 4162 4163 const evexEscapeByte = 0x62 4164 ab.Put4(evexEscapeByte, p0, p1, p2) 4165 ab.Put1(evex.opcode) 4166 } 4167 4168 // Emit VEX prefix and opcode byte. 4169 // The three addresses are the r/m, vvvv, and reg fields. 4170 // The reg and rm arguments appear in the same order as the 4171 // arguments to asmand, which typically follows the call to asmvex. 4172 // The final two arguments are the VEX prefix (see encoding above) 4173 // and the opcode byte. 4174 // For details about vex prefix see: 4175 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description 4176 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { 4177 ab.vexflag = true 4178 rexR := 0 4179 if r != nil { 4180 rexR = regrex[r.Reg] & Rxr 4181 } 4182 rexB := 0 4183 rexX := 0 4184 if rm != nil { 4185 rexB = regrex[rm.Reg] & Rxb 4186 rexX = regrex[rm.Index] & Rxx 4187 } 4188 vexM := (vex >> 3) & 0x7 4189 vexWLP := vex & 0x87 4190 vexV := byte(0) 4191 if v != nil { 4192 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4193 } 4194 vexV ^= 0xF 4195 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { 4196 // Can use 2-byte encoding. 4197 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) 4198 } else { 4199 // Must use 3-byte encoding. 4200 ab.Put3(0xc4, 4201 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, 4202 vexV<<3|vexWLP, 4203 ) 4204 } 4205 ab.Put1(opcode) 4206 } 4207 4208 // regIndex returns register index that fits in 5 bits. 4209 // 4210 // R : 3 bit | legacy instructions | N/A 4211 // [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr 4212 // EVEX.R : 1 bit | EVEX extension bit | RxrEvex 4213 // 4214 // Examples: 4215 // 4216 // REG_Z30 => 30 4217 // REG_X15 => 15 4218 // REG_R9 => 9 4219 // REG_AX => 0 4220 func regIndex(r int16) int { 4221 lower3bits := reg[r] 4222 high4bit := regrex[r] & Rxr << 1 4223 high5bit := regrex[r] & RxrEvex << 0 4224 return lower3bits | high4bit | high5bit 4225 } 4226 4227 // avx2gatherValid reports whether p satisfies AVX2 gather constraints. 4228 // Reports errors via ctxt. 4229 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4230 // If any pair of the index, mask, or destination registers 4231 // are the same, illegal instruction trap (#UD) is triggered. 4232 index := regIndex(p.GetFrom3().Index) 4233 mask := regIndex(p.From.Reg) 4234 dest := regIndex(p.To.Reg) 4235 if dest == mask || dest == index || mask == index { 4236 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) 4237 return false 4238 } 4239 4240 return true 4241 } 4242 4243 // avx512gatherValid reports whether p satisfies AVX512 gather constraints. 4244 // Reports errors via ctxt. 4245 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4246 // Illegal instruction trap (#UD) is triggered if the destination vector 4247 // register is the same as index vector in VSIB. 4248 index := regIndex(p.From.Index) 4249 dest := regIndex(p.To.Reg) 4250 if dest == index { 4251 ctxt.Diag("index and destination registers should be distinct: %v", p) 4252 return false 4253 } 4254 4255 return true 4256 } 4257 4258 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 4259 o := opindex[p.As&obj.AMask] 4260 4261 if o == nil { 4262 ctxt.Diag("asmins: missing op %v", p) 4263 return 4264 } 4265 4266 if pre := prefixof(ctxt, &p.From); pre != 0 { 4267 ab.Put1(byte(pre)) 4268 } 4269 if pre := prefixof(ctxt, &p.To); pre != 0 { 4270 ab.Put1(byte(pre)) 4271 } 4272 4273 // Checks to warn about instruction/arguments combinations that 4274 // will unconditionally trigger illegal instruction trap (#UD). 4275 switch p.As { 4276 case AVGATHERDPD, 4277 AVGATHERQPD, 4278 AVGATHERDPS, 4279 AVGATHERQPS, 4280 AVPGATHERDD, 4281 AVPGATHERQD, 4282 AVPGATHERDQ, 4283 AVPGATHERQQ: 4284 if p.GetFrom3() == nil { 4285 // gathers need a 3rd arg. See issue 58822. 4286 ctxt.Diag("need a third arg for gather instruction: %v", p) 4287 return 4288 } 4289 // AVX512 gather requires explicit K mask. 4290 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { 4291 if !avx512gatherValid(ctxt, p) { 4292 return 4293 } 4294 } else { 4295 if !avx2gatherValid(ctxt, p) { 4296 return 4297 } 4298 } 4299 } 4300 4301 if p.Ft == 0 { 4302 p.Ft = uint8(oclass(ctxt, p, &p.From)) 4303 } 4304 if p.Tt == 0 { 4305 p.Tt = uint8(oclass(ctxt, p, &p.To)) 4306 } 4307 4308 ft := int(p.Ft) * Ymax 4309 var f3t int 4310 tt := int(p.Tt) * Ymax 4311 4312 xo := obj.Bool2int(o.op[0] == 0x0f) 4313 z := 0 4314 var a *obj.Addr 4315 var l int 4316 var op int 4317 var q *obj.Prog 4318 var r *obj.Reloc 4319 var rel obj.Reloc 4320 var v int64 4321 4322 args := make([]int, 0, argListMax) 4323 if ft != Ynone*Ymax { 4324 args = append(args, ft) 4325 } 4326 for i := range p.RestArgs { 4327 args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) 4328 } 4329 if tt != Ynone*Ymax { 4330 args = append(args, tt) 4331 } 4332 4333 for _, yt := range o.ytab { 4334 // ytab matching is purely args-based, 4335 // but AVX512 suffixes like "Z" or "RU_SAE" will 4336 // add EVEX-only filter that will reject non-EVEX matches. 4337 // 4338 // Consider "VADDPD.BCST 2032(DX), X0, X0". 4339 // Without this rule, operands will lead to VEX-encoded form 4340 // and produce "c5b15813" encoding. 4341 if !yt.match(args) { 4342 // "xo" is always zero for VEX/EVEX encoded insts. 4343 z += int(yt.zoffset) + xo 4344 } else { 4345 if p.Scond != 0 && !evexZcase(yt.zcase) { 4346 // Do not signal error and continue to search 4347 // for matching EVEX-encoded form. 4348 z += int(yt.zoffset) 4349 continue 4350 } 4351 4352 switch o.prefix { 4353 case Px1: // first option valid only in 32-bit mode 4354 if ctxt.Arch.Family == sys.AMD64 && z == 0 { 4355 z += int(yt.zoffset) + xo 4356 continue 4357 } 4358 case Pq: // 16 bit escape and opcode escape 4359 ab.Put2(Pe, Pm) 4360 4361 case Pq3: // 16 bit escape and opcode escape + REX.W 4362 ab.rexflag |= Pw 4363 ab.Put2(Pe, Pm) 4364 4365 case Pq4: // 66 0F 38 4366 ab.Put3(0x66, 0x0F, 0x38) 4367 4368 case Pq4w: // 66 0F 38 + REX.W 4369 ab.rexflag |= Pw 4370 ab.Put3(0x66, 0x0F, 0x38) 4371 4372 case Pq5: // F3 0F 38 4373 ab.Put3(0xF3, 0x0F, 0x38) 4374 4375 case Pq5w: // F3 0F 38 + REX.W 4376 ab.rexflag |= Pw 4377 ab.Put3(0xF3, 0x0F, 0x38) 4378 4379 case Pf2, // xmm opcode escape 4380 Pf3: 4381 ab.Put2(o.prefix, Pm) 4382 4383 case Pef3: 4384 ab.Put3(Pe, Pf3, Pm) 4385 4386 case Pfw: // xmm opcode escape + REX.W 4387 ab.rexflag |= Pw 4388 ab.Put2(Pf3, Pm) 4389 4390 case Pm: // opcode escape 4391 ab.Put1(Pm) 4392 4393 case Pe: // 16 bit escape 4394 ab.Put1(Pe) 4395 4396 case Pw: // 64-bit escape 4397 if ctxt.Arch.Family != sys.AMD64 { 4398 ctxt.Diag("asmins: illegal 64: %v", p) 4399 } 4400 ab.rexflag |= Pw 4401 4402 case Pw8: // 64-bit escape if z >= 8 4403 if z >= 8 { 4404 if ctxt.Arch.Family != sys.AMD64 { 4405 ctxt.Diag("asmins: illegal 64: %v", p) 4406 } 4407 ab.rexflag |= Pw 4408 } 4409 4410 case Pb: // botch 4411 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { 4412 goto bad 4413 } 4414 // NOTE(rsc): This is probably safe to do always, 4415 // but when enabled it chooses different encodings 4416 // than the old cmd/internal/obj/i386 code did, 4417 // which breaks our "same bits out" checks. 4418 // In particular, CMPB AX, $0 encodes as 80 f8 00 4419 // in the original obj/i386, and it would encode 4420 // (using a valid, shorter form) as 3c 00 if we enabled 4421 // the call to bytereg here. 4422 if ctxt.Arch.Family == sys.AMD64 { 4423 bytereg(&p.From, &p.Ft) 4424 bytereg(&p.To, &p.Tt) 4425 } 4426 4427 case P32: // 32 bit but illegal if 64-bit mode 4428 if ctxt.Arch.Family == sys.AMD64 { 4429 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) 4430 } 4431 4432 case Py: // 64-bit only, no prefix 4433 if ctxt.Arch.Family != sys.AMD64 { 4434 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4435 } 4436 4437 case Py1: // 64-bit only if z < 1, no prefix 4438 if z < 1 && ctxt.Arch.Family != sys.AMD64 { 4439 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4440 } 4441 4442 case Py3: // 64-bit only if z < 3, no prefix 4443 if z < 3 && ctxt.Arch.Family != sys.AMD64 { 4444 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4445 } 4446 } 4447 4448 if z >= len(o.op) { 4449 log.Fatalf("asmins bad table %v", p) 4450 } 4451 op = int(o.op[z]) 4452 if op == 0x0f { 4453 ab.Put1(byte(op)) 4454 z++ 4455 op = int(o.op[z]) 4456 } 4457 4458 switch yt.zcase { 4459 default: 4460 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) 4461 return 4462 4463 case Zpseudo: 4464 break 4465 4466 case Zlit: 4467 ab.PutOpBytesLit(z, &o.op) 4468 4469 case Zlitr_m: 4470 ab.PutOpBytesLit(z, &o.op) 4471 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4472 4473 case Zlitm_r: 4474 ab.PutOpBytesLit(z, &o.op) 4475 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4476 4477 case Zlit_m_r: 4478 ab.PutOpBytesLit(z, &o.op) 4479 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4480 4481 case Zmb_r: 4482 bytereg(&p.From, &p.Ft) 4483 fallthrough 4484 4485 case Zm_r: 4486 ab.Put1(byte(op)) 4487 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4488 4489 case Z_m_r: 4490 ab.Put1(byte(op)) 4491 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4492 4493 case Zm2_r: 4494 ab.Put2(byte(op), o.op[z+1]) 4495 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4496 4497 case Zm_r_xm: 4498 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4499 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4500 4501 case Zm_r_xm_nr: 4502 ab.rexflag = 0 4503 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4504 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4505 4506 case Zm_r_i_xm: 4507 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4508 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) 4509 ab.Put1(byte(p.To.Offset)) 4510 4511 case Zibm_r, Zibr_m: 4512 ab.PutOpBytesLit(z, &o.op) 4513 if yt.zcase == Zibr_m { 4514 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4515 } else { 4516 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4517 } 4518 switch { 4519 default: 4520 ab.Put1(byte(p.From.Offset)) 4521 case yt.args[0] == Yi32 && o.prefix == Pe: 4522 ab.PutInt16(int16(p.From.Offset)) 4523 case yt.args[0] == Yi32: 4524 ab.PutInt32(int32(p.From.Offset)) 4525 } 4526 4527 case Zaut_r: 4528 ab.Put1(0x8d) // leal 4529 if p.From.Type != obj.TYPE_ADDR { 4530 ctxt.Diag("asmins: Zaut sb type ADDR") 4531 } 4532 p.From.Type = obj.TYPE_MEM 4533 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4534 p.From.Type = obj.TYPE_ADDR 4535 4536 case Zm_o: 4537 ab.Put1(byte(op)) 4538 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4539 4540 case Zr_m: 4541 ab.Put1(byte(op)) 4542 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4543 4544 case Zvex: 4545 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4546 4547 case Zvex_rm_v_r: 4548 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4549 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4550 4551 case Zvex_rm_v_ro: 4552 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4553 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4554 4555 case Zvex_i_rm_vo: 4556 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4557 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) 4558 ab.Put1(byte(p.From.Offset)) 4559 4560 case Zvex_i_r_v: 4561 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4562 regnum := byte(0x7) 4563 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { 4564 regnum &= byte(p.GetFrom3().Reg - REG_X0) 4565 } else { 4566 regnum &= byte(p.GetFrom3().Reg - REG_Y0) 4567 } 4568 ab.Put1(o.op[z+2] | regnum) 4569 ab.Put1(byte(p.From.Offset)) 4570 4571 case Zvex_i_rm_v_r: 4572 imm, from, from3, to := unpackOps4(p) 4573 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4574 ab.asmand(ctxt, cursym, p, from, to) 4575 ab.Put1(byte(imm.Offset)) 4576 4577 case Zvex_i_rm_r: 4578 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) 4579 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4580 ab.Put1(byte(p.From.Offset)) 4581 4582 case Zvex_v_rm_r: 4583 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) 4584 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4585 4586 case Zvex_r_v_rm: 4587 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) 4588 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4589 4590 case Zvex_rm_r_vo: 4591 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) 4592 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4593 4594 case Zvex_i_r_rm: 4595 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) 4596 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4597 ab.Put1(byte(p.From.Offset)) 4598 4599 case Zvex_hr_rm_v_r: 4600 hr, from, from3, to := unpackOps4(p) 4601 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4602 ab.asmand(ctxt, cursym, p, from, to) 4603 ab.Put1(byte(regIndex(hr.Reg) << 4)) 4604 4605 case Zevex_k_rmo: 4606 ab.evex = newEVEXBits(z, &o.op) 4607 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) 4608 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) 4609 4610 case Zevex_i_rm_vo: 4611 ab.evex = newEVEXBits(z, &o.op) 4612 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) 4613 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) 4614 ab.Put1(byte(p.From.Offset)) 4615 4616 case Zevex_i_rm_k_vo: 4617 imm, from, kmask, to := unpackOps4(p) 4618 ab.evex = newEVEXBits(z, &o.op) 4619 ab.asmevex(ctxt, p, from, to, nil, kmask) 4620 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) 4621 ab.Put1(byte(imm.Offset)) 4622 4623 case Zevex_i_r_rm: 4624 ab.evex = newEVEXBits(z, &o.op) 4625 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) 4626 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4627 ab.Put1(byte(p.From.Offset)) 4628 4629 case Zevex_i_r_k_rm: 4630 imm, from, kmask, to := unpackOps4(p) 4631 ab.evex = newEVEXBits(z, &o.op) 4632 ab.asmevex(ctxt, p, to, nil, from, kmask) 4633 ab.asmand(ctxt, cursym, p, to, from) 4634 ab.Put1(byte(imm.Offset)) 4635 4636 case Zevex_i_rm_r: 4637 ab.evex = newEVEXBits(z, &o.op) 4638 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) 4639 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4640 ab.Put1(byte(p.From.Offset)) 4641 4642 case Zevex_i_rm_k_r: 4643 imm, from, kmask, to := unpackOps4(p) 4644 ab.evex = newEVEXBits(z, &o.op) 4645 ab.asmevex(ctxt, p, from, nil, to, kmask) 4646 ab.asmand(ctxt, cursym, p, from, to) 4647 ab.Put1(byte(imm.Offset)) 4648 4649 case Zevex_i_rm_v_r: 4650 imm, from, from3, to := unpackOps4(p) 4651 ab.evex = newEVEXBits(z, &o.op) 4652 ab.asmevex(ctxt, p, from, from3, to, nil) 4653 ab.asmand(ctxt, cursym, p, from, to) 4654 ab.Put1(byte(imm.Offset)) 4655 4656 case Zevex_i_rm_v_k_r: 4657 imm, from, from3, kmask, to := unpackOps5(p) 4658 ab.evex = newEVEXBits(z, &o.op) 4659 ab.asmevex(ctxt, p, from, from3, to, kmask) 4660 ab.asmand(ctxt, cursym, p, from, to) 4661 ab.Put1(byte(imm.Offset)) 4662 4663 case Zevex_r_v_rm: 4664 ab.evex = newEVEXBits(z, &o.op) 4665 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) 4666 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4667 4668 case Zevex_rm_v_r: 4669 ab.evex = newEVEXBits(z, &o.op) 4670 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) 4671 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4672 4673 case Zevex_rm_k_r: 4674 ab.evex = newEVEXBits(z, &o.op) 4675 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) 4676 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4677 4678 case Zevex_r_k_rm: 4679 ab.evex = newEVEXBits(z, &o.op) 4680 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) 4681 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4682 4683 case Zevex_rm_v_k_r: 4684 from, from3, kmask, to := unpackOps4(p) 4685 ab.evex = newEVEXBits(z, &o.op) 4686 ab.asmevex(ctxt, p, from, from3, to, kmask) 4687 ab.asmand(ctxt, cursym, p, from, to) 4688 4689 case Zevex_r_v_k_rm: 4690 from, from3, kmask, to := unpackOps4(p) 4691 ab.evex = newEVEXBits(z, &o.op) 4692 ab.asmevex(ctxt, p, to, from3, from, kmask) 4693 ab.asmand(ctxt, cursym, p, to, from) 4694 4695 case Zr_m_xm: 4696 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4697 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4698 4699 case Zr_m_xm_nr: 4700 ab.rexflag = 0 4701 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4702 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4703 4704 case Zo_m: 4705 ab.Put1(byte(op)) 4706 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4707 4708 case Zcallindreg: 4709 r = obj.Addrel(cursym) 4710 r.Off = int32(p.Pc) 4711 r.Type = objabi.R_CALLIND 4712 r.Siz = 0 4713 fallthrough 4714 4715 case Zo_m64: 4716 ab.Put1(byte(op)) 4717 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) 4718 4719 case Zm_ibo: 4720 ab.Put1(byte(op)) 4721 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4722 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) 4723 4724 case Zibo_m: 4725 ab.Put1(byte(op)) 4726 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4727 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4728 4729 case Zibo_m_xm: 4730 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4731 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4732 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4733 4734 case Z_ib, Zib_: 4735 if yt.zcase == Zib_ { 4736 a = &p.From 4737 } else { 4738 a = &p.To 4739 } 4740 ab.Put1(byte(op)) 4741 if p.As == AXABORT { 4742 ab.Put1(o.op[z+1]) 4743 } 4744 ab.Put1(byte(vaddr(ctxt, p, a, nil))) 4745 4746 case Zib_rp: 4747 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4748 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) 4749 4750 case Zil_rp: 4751 ab.rexflag |= regrex[p.To.Reg] & Rxb 4752 ab.Put1(byte(op + reg[p.To.Reg])) 4753 if o.prefix == Pe { 4754 v = vaddr(ctxt, p, &p.From, nil) 4755 ab.PutInt16(int16(v)) 4756 } else { 4757 ab.relput4(ctxt, cursym, p, &p.From) 4758 } 4759 4760 case Zo_iw: 4761 ab.Put1(byte(op)) 4762 if p.From.Type != obj.TYPE_NONE { 4763 v = vaddr(ctxt, p, &p.From, nil) 4764 ab.PutInt16(int16(v)) 4765 } 4766 4767 case Ziq_rp: 4768 v = vaddr(ctxt, p, &p.From, &rel) 4769 l = int(v >> 32) 4770 if l == 0 && rel.Siz != 8 { 4771 ab.rexflag &^= (0x40 | Rxw) 4772 4773 ab.rexflag |= regrex[p.To.Reg] & Rxb 4774 ab.Put1(byte(0xb8 + reg[p.To.Reg])) 4775 if rel.Type != 0 { 4776 r = obj.Addrel(cursym) 4777 *r = rel 4778 r.Off = int32(p.Pc + int64(ab.Len())) 4779 } 4780 4781 ab.PutInt32(int32(v)) 4782 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend 4783 ab.Put1(0xc7) 4784 ab.asmando(ctxt, cursym, p, &p.To, 0) 4785 4786 ab.PutInt32(int32(v)) // need all 8 4787 } else { 4788 ab.rexflag |= regrex[p.To.Reg] & Rxb 4789 ab.Put1(byte(op + reg[p.To.Reg])) 4790 if rel.Type != 0 { 4791 r = obj.Addrel(cursym) 4792 *r = rel 4793 r.Off = int32(p.Pc + int64(ab.Len())) 4794 } 4795 4796 ab.PutInt64(v) 4797 } 4798 4799 case Zib_rr: 4800 ab.Put1(byte(op)) 4801 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4802 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4803 4804 case Z_il, Zil_: 4805 if yt.zcase == Zil_ { 4806 a = &p.From 4807 } else { 4808 a = &p.To 4809 } 4810 ab.Put1(byte(op)) 4811 if o.prefix == Pe { 4812 v = vaddr(ctxt, p, a, nil) 4813 ab.PutInt16(int16(v)) 4814 } else { 4815 ab.relput4(ctxt, cursym, p, a) 4816 } 4817 4818 case Zm_ilo, Zilo_m: 4819 ab.Put1(byte(op)) 4820 if yt.zcase == Zilo_m { 4821 a = &p.From 4822 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4823 } else { 4824 a = &p.To 4825 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4826 } 4827 4828 if o.prefix == Pe { 4829 v = vaddr(ctxt, p, a, nil) 4830 ab.PutInt16(int16(v)) 4831 } else { 4832 ab.relput4(ctxt, cursym, p, a) 4833 } 4834 4835 case Zil_rr: 4836 ab.Put1(byte(op)) 4837 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4838 if o.prefix == Pe { 4839 v = vaddr(ctxt, p, &p.From, nil) 4840 ab.PutInt16(int16(v)) 4841 } else { 4842 ab.relput4(ctxt, cursym, p, &p.From) 4843 } 4844 4845 case Z_rp: 4846 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4847 ab.Put1(byte(op + reg[p.To.Reg])) 4848 4849 case Zrp_: 4850 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) 4851 ab.Put1(byte(op + reg[p.From.Reg])) 4852 4853 case Zcallcon, Zjmpcon: 4854 if yt.zcase == Zcallcon { 4855 ab.Put1(byte(op)) 4856 } else { 4857 ab.Put1(o.op[z+1]) 4858 } 4859 r = obj.Addrel(cursym) 4860 r.Off = int32(p.Pc + int64(ab.Len())) 4861 r.Type = objabi.R_PCREL 4862 r.Siz = 4 4863 r.Add = p.To.Offset 4864 ab.PutInt32(0) 4865 4866 case Zcallind: 4867 ab.Put2(byte(op), o.op[z+1]) 4868 r = obj.Addrel(cursym) 4869 r.Off = int32(p.Pc + int64(ab.Len())) 4870 if ctxt.Arch.Family == sys.AMD64 { 4871 r.Type = objabi.R_PCREL 4872 } else { 4873 r.Type = objabi.R_ADDR 4874 } 4875 r.Siz = 4 4876 r.Add = p.To.Offset 4877 r.Sym = p.To.Sym 4878 ab.PutInt32(0) 4879 4880 case Zcall, Zcallduff: 4881 if p.To.Sym == nil { 4882 ctxt.Diag("call without target") 4883 ctxt.DiagFlush() 4884 log.Fatalf("bad code") 4885 } 4886 4887 if yt.zcase == Zcallduff && ctxt.Flag_dynlink { 4888 ctxt.Diag("directly calling duff when dynamically linking Go") 4889 } 4890 4891 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4892 // Maintain BP around call, since duffcopy/duffzero can't do it 4893 // (the call jumps into the middle of the function). 4894 // This makes it possible to see call sites for duffcopy/duffzero in 4895 // BP-based profiling tools like Linux perf (which is the 4896 // whole point of maintaining frame pointers in Go). 4897 // MOVQ BP, -16(SP) 4898 // LEAQ -16(SP), BP 4899 ab.Put(bpduff1) 4900 } 4901 ab.Put1(byte(op)) 4902 r = obj.Addrel(cursym) 4903 r.Off = int32(p.Pc + int64(ab.Len())) 4904 r.Sym = p.To.Sym 4905 r.Add = p.To.Offset 4906 r.Type = objabi.R_CALL 4907 r.Siz = 4 4908 ab.PutInt32(0) 4909 4910 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4911 // Pop BP pushed above. 4912 // MOVQ 0(BP), BP 4913 ab.Put(bpduff2) 4914 } 4915 4916 // TODO: jump across functions needs reloc 4917 case Zbr, Zjmp, Zloop: 4918 if p.As == AXBEGIN { 4919 ab.Put1(byte(op)) 4920 } 4921 if p.To.Sym != nil { 4922 if yt.zcase != Zjmp { 4923 ctxt.Diag("branch to ATEXT") 4924 ctxt.DiagFlush() 4925 log.Fatalf("bad code") 4926 } 4927 4928 ab.Put1(o.op[z+1]) 4929 r = obj.Addrel(cursym) 4930 r.Off = int32(p.Pc + int64(ab.Len())) 4931 r.Sym = p.To.Sym 4932 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that 4933 // it can point to a trampoline instead of the destination itself. 4934 r.Type = objabi.R_CALL 4935 r.Siz = 4 4936 ab.PutInt32(0) 4937 break 4938 } 4939 4940 // Assumes q is in this function. 4941 // TODO: Check in input, preserve in brchain. 4942 4943 // Fill in backward jump now. 4944 q = p.To.Target() 4945 4946 if q == nil { 4947 ctxt.Diag("jmp/branch/loop without target") 4948 ctxt.DiagFlush() 4949 log.Fatalf("bad code") 4950 } 4951 4952 if p.Back&branchBackwards != 0 { 4953 v = q.Pc - (p.Pc + 2) 4954 if v >= -128 && p.As != AXBEGIN { 4955 if p.As == AJCXZL { 4956 ab.Put1(0x67) 4957 } 4958 ab.Put2(byte(op), byte(v)) 4959 } else if yt.zcase == Zloop { 4960 ctxt.Diag("loop too far: %v", p) 4961 } else { 4962 v -= 5 - 2 4963 if p.As == AXBEGIN { 4964 v-- 4965 } 4966 if yt.zcase == Zbr { 4967 ab.Put1(0x0f) 4968 v-- 4969 } 4970 4971 ab.Put1(o.op[z+1]) 4972 ab.PutInt32(int32(v)) 4973 } 4974 4975 break 4976 } 4977 4978 // Annotate target; will fill in later. 4979 p.Forwd = q.Rel 4980 4981 q.Rel = p 4982 if p.Back&branchShort != 0 && p.As != AXBEGIN { 4983 if p.As == AJCXZL { 4984 ab.Put1(0x67) 4985 } 4986 ab.Put2(byte(op), 0) 4987 } else if yt.zcase == Zloop { 4988 ctxt.Diag("loop too far: %v", p) 4989 } else { 4990 if yt.zcase == Zbr { 4991 ab.Put1(0x0f) 4992 } 4993 ab.Put1(o.op[z+1]) 4994 ab.PutInt32(0) 4995 } 4996 4997 case Zbyte: 4998 v = vaddr(ctxt, p, &p.From, &rel) 4999 if rel.Siz != 0 { 5000 rel.Siz = uint8(op) 5001 r = obj.Addrel(cursym) 5002 *r = rel 5003 r.Off = int32(p.Pc + int64(ab.Len())) 5004 } 5005 5006 ab.Put1(byte(v)) 5007 if op > 1 { 5008 ab.Put1(byte(v >> 8)) 5009 if op > 2 { 5010 ab.PutInt16(int16(v >> 16)) 5011 if op > 4 { 5012 ab.PutInt32(int32(v >> 32)) 5013 } 5014 } 5015 } 5016 } 5017 5018 return 5019 } 5020 } 5021 f3t = Ynone * Ymax 5022 if p.GetFrom3() != nil { 5023 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax 5024 } 5025 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { 5026 var pp obj.Prog 5027 var t []byte 5028 if p.As == mo[0].as { 5029 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { 5030 t = mo[0].op[:] 5031 switch mo[0].code { 5032 default: 5033 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) 5034 5035 case movLit: 5036 for z = 0; t[z] != 0; z++ { 5037 ab.Put1(t[z]) 5038 } 5039 5040 case movRegMem: 5041 ab.Put1(t[0]) 5042 ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) 5043 5044 case movMemReg: 5045 ab.Put1(t[0]) 5046 ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) 5047 5048 case movRegMem2op: // r,m - 2op 5049 ab.Put2(t[0], t[1]) 5050 ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) 5051 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) 5052 5053 case movMemReg2op: 5054 ab.Put2(t[0], t[1]) 5055 ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) 5056 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) 5057 5058 case movFullPtr: 5059 if t[0] != 0 { 5060 ab.Put1(t[0]) 5061 } 5062 switch p.To.Index { 5063 default: 5064 goto bad 5065 5066 case REG_DS: 5067 ab.Put1(0xc5) 5068 5069 case REG_SS: 5070 ab.Put2(0x0f, 0xb2) 5071 5072 case REG_ES: 5073 ab.Put1(0xc4) 5074 5075 case REG_FS: 5076 ab.Put2(0x0f, 0xb4) 5077 5078 case REG_GS: 5079 ab.Put2(0x0f, 0xb5) 5080 } 5081 5082 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 5083 5084 case movDoubleShift: 5085 if t[0] == Pw { 5086 if ctxt.Arch.Family != sys.AMD64 { 5087 ctxt.Diag("asmins: illegal 64: %v", p) 5088 } 5089 ab.rexflag |= Pw 5090 t = t[1:] 5091 } else if t[0] == Pe { 5092 ab.Put1(Pe) 5093 t = t[1:] 5094 } 5095 5096 switch p.From.Type { 5097 default: 5098 goto bad 5099 5100 case obj.TYPE_CONST: 5101 ab.Put2(0x0f, t[0]) 5102 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5103 ab.Put1(byte(p.From.Offset)) 5104 5105 case obj.TYPE_REG: 5106 switch p.From.Reg { 5107 default: 5108 goto bad 5109 5110 case REG_CL, REG_CX: 5111 ab.Put2(0x0f, t[1]) 5112 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5113 } 5114 } 5115 5116 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5117 // where you load the TLS base register into a register and then index off that 5118 // register to access the actual TLS variables. Systems that allow direct TLS access 5119 // are handled in prefixof above and should not be listed here. 5120 case movTLSReg: 5121 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { 5122 ctxt.Diag("invalid load of TLS: %v", p) 5123 } 5124 5125 if ctxt.Arch.Family == sys.I386 { 5126 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5127 // where you load the TLS base register into a register and then index off that 5128 // register to access the actual TLS variables. Systems that allow direct TLS access 5129 // are handled in prefixof above and should not be listed here. 5130 switch ctxt.Headtype { 5131 default: 5132 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5133 5134 case objabi.Hlinux, objabi.Hfreebsd: 5135 if ctxt.Flag_shared { 5136 // Note that this is not generating the same insns as the other cases. 5137 // MOV TLS, dst 5138 // becomes 5139 // call __x86.get_pc_thunk.dst 5140 // movl (gotpc + g@gotntpoff)(dst), dst 5141 // which is encoded as 5142 // call __x86.get_pc_thunk.dst 5143 // movq 0(dst), dst 5144 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access 5145 // is g, which we can't check here, but will when we assemble the second 5146 // instruction. 5147 dst := p.To.Reg 5148 ab.Put1(0xe8) 5149 r = obj.Addrel(cursym) 5150 r.Off = int32(p.Pc + int64(ab.Len())) 5151 r.Type = objabi.R_CALL 5152 r.Siz = 4 5153 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) 5154 ab.PutInt32(0) 5155 5156 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) 5157 r = obj.Addrel(cursym) 5158 r.Off = int32(p.Pc + int64(ab.Len())) 5159 r.Type = objabi.R_TLS_IE 5160 r.Siz = 4 5161 r.Add = 2 5162 ab.PutInt32(0) 5163 } else { 5164 // ELF TLS base is 0(GS). 5165 pp.From = p.From 5166 5167 pp.From.Type = obj.TYPE_MEM 5168 pp.From.Reg = REG_GS 5169 pp.From.Offset = 0 5170 pp.From.Index = REG_NONE 5171 pp.From.Scale = 0 5172 ab.Put2(0x65, // GS 5173 0x8B) 5174 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5175 } 5176 case objabi.Hplan9: 5177 pp.From = obj.Addr{} 5178 pp.From.Type = obj.TYPE_MEM 5179 pp.From.Name = obj.NAME_EXTERN 5180 pp.From.Sym = plan9privates 5181 pp.From.Offset = 0 5182 pp.From.Index = REG_NONE 5183 ab.Put1(0x8B) 5184 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5185 } 5186 break 5187 } 5188 5189 switch ctxt.Headtype { 5190 default: 5191 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5192 5193 case objabi.Hlinux, objabi.Hfreebsd: 5194 if !ctxt.Flag_shared { 5195 log.Fatalf("unknown TLS base location for linux/freebsd without -shared") 5196 } 5197 // Note that this is not generating the same insn as the other cases. 5198 // MOV TLS, R_to 5199 // becomes 5200 // movq g@gottpoff(%rip), R_to 5201 // which is encoded as 5202 // movq 0(%rip), R_to 5203 // and a R_TLS_IE reloc. This all assumes the only tls variable we access 5204 // is g, which we can't check here, but will when we assemble the second 5205 // instruction. 5206 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) 5207 5208 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) 5209 r = obj.Addrel(cursym) 5210 r.Off = int32(p.Pc + int64(ab.Len())) 5211 r.Type = objabi.R_TLS_IE 5212 r.Siz = 4 5213 r.Add = -4 5214 ab.PutInt32(0) 5215 5216 case objabi.Hplan9: 5217 pp.From = obj.Addr{} 5218 pp.From.Type = obj.TYPE_MEM 5219 pp.From.Name = obj.NAME_EXTERN 5220 pp.From.Sym = plan9privates 5221 pp.From.Offset = 0 5222 pp.From.Index = REG_NONE 5223 ab.rexflag |= Pw 5224 ab.Put1(0x8B) 5225 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5226 5227 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. 5228 // TLS base is 0(FS). 5229 pp.From = p.From 5230 5231 pp.From.Type = obj.TYPE_MEM 5232 pp.From.Name = obj.NAME_NONE 5233 pp.From.Reg = REG_NONE 5234 pp.From.Offset = 0 5235 pp.From.Index = REG_NONE 5236 pp.From.Scale = 0 5237 ab.rexflag |= Pw 5238 ab.Put2(0x64, // FS 5239 0x8B) 5240 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5241 } 5242 } 5243 return 5244 } 5245 } 5246 } 5247 goto bad 5248 5249 bad: 5250 if ctxt.Arch.Family != sys.AMD64 { 5251 // here, the assembly has failed. 5252 // if it's a byte instruction that has 5253 // unaddressable registers, try to 5254 // exchange registers and reissue the 5255 // instruction with the operands renamed. 5256 pp := *p 5257 5258 unbytereg(&pp.From, &pp.Ft) 5259 unbytereg(&pp.To, &pp.Tt) 5260 5261 z := int(p.From.Reg) 5262 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5263 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5264 // For now, different to keep bit-for-bit compatibility. 5265 if ctxt.Arch.Family == sys.I386 { 5266 breg := byteswapreg(ctxt, &p.To) 5267 if breg != REG_AX { 5268 ab.Put1(0x87) // xchg lhs,bx 5269 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5270 subreg(&pp, z, breg) 5271 ab.doasm(ctxt, cursym, &pp) 5272 ab.Put1(0x87) // xchg lhs,bx 5273 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5274 } else { 5275 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5276 subreg(&pp, z, REG_AX) 5277 ab.doasm(ctxt, cursym, &pp) 5278 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5279 } 5280 return 5281 } 5282 5283 if isax(&p.To) || p.To.Type == obj.TYPE_NONE { 5284 // We certainly don't want to exchange 5285 // with AX if the op is MUL or DIV. 5286 ab.Put1(0x87) // xchg lhs,bx 5287 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5288 subreg(&pp, z, REG_BX) 5289 ab.doasm(ctxt, cursym, &pp) 5290 ab.Put1(0x87) // xchg lhs,bx 5291 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5292 } else { 5293 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5294 subreg(&pp, z, REG_AX) 5295 ab.doasm(ctxt, cursym, &pp) 5296 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5297 } 5298 return 5299 } 5300 5301 z = int(p.To.Reg) 5302 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5303 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5304 // For now, different to keep bit-for-bit compatibility. 5305 if ctxt.Arch.Family == sys.I386 { 5306 breg := byteswapreg(ctxt, &p.From) 5307 if breg != REG_AX { 5308 ab.Put1(0x87) //xchg rhs,bx 5309 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5310 subreg(&pp, z, breg) 5311 ab.doasm(ctxt, cursym, &pp) 5312 ab.Put1(0x87) // xchg rhs,bx 5313 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5314 } else { 5315 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5316 subreg(&pp, z, REG_AX) 5317 ab.doasm(ctxt, cursym, &pp) 5318 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5319 } 5320 return 5321 } 5322 5323 if isax(&p.From) { 5324 ab.Put1(0x87) // xchg rhs,bx 5325 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5326 subreg(&pp, z, REG_BX) 5327 ab.doasm(ctxt, cursym, &pp) 5328 ab.Put1(0x87) // xchg rhs,bx 5329 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5330 } else { 5331 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5332 subreg(&pp, z, REG_AX) 5333 ab.doasm(ctxt, cursym, &pp) 5334 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5335 } 5336 return 5337 } 5338 } 5339 5340 ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p) 5341 } 5342 5343 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX) 5344 // which is not referenced in a. 5345 // If a is empty, it returns BX to account for MULB-like instructions 5346 // that might use DX and AX. 5347 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { 5348 cana, canb, canc, cand := true, true, true, true 5349 if a.Type == obj.TYPE_NONE { 5350 cana, cand = false, false 5351 } 5352 5353 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { 5354 switch a.Reg { 5355 case REG_NONE: 5356 cana, cand = false, false 5357 case REG_AX, REG_AL, REG_AH: 5358 cana = false 5359 case REG_BX, REG_BL, REG_BH: 5360 canb = false 5361 case REG_CX, REG_CL, REG_CH: 5362 canc = false 5363 case REG_DX, REG_DL, REG_DH: 5364 cand = false 5365 } 5366 } 5367 5368 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { 5369 switch a.Index { 5370 case REG_AX: 5371 cana = false 5372 case REG_BX: 5373 canb = false 5374 case REG_CX: 5375 canc = false 5376 case REG_DX: 5377 cand = false 5378 } 5379 } 5380 5381 switch { 5382 case cana: 5383 return REG_AX 5384 case canb: 5385 return REG_BX 5386 case canc: 5387 return REG_CX 5388 case cand: 5389 return REG_DX 5390 default: 5391 ctxt.Diag("impossible byte register") 5392 ctxt.DiagFlush() 5393 log.Fatalf("bad code") 5394 return 0 5395 } 5396 } 5397 5398 func isbadbyte(a *obj.Addr) bool { 5399 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) 5400 } 5401 5402 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 5403 ab.Reset() 5404 5405 ab.rexflag = 0 5406 ab.vexflag = false 5407 ab.evexflag = false 5408 mark := ab.Len() 5409 ab.doasm(ctxt, cursym, p) 5410 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5411 // as befits the whole approach of the architecture, 5412 // the rex prefix must appear before the first opcode byte 5413 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but 5414 // before the 0f opcode escape!), or it might be ignored. 5415 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. 5416 if ctxt.Arch.Family != sys.AMD64 { 5417 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) 5418 } 5419 n := ab.Len() 5420 var np int 5421 for np = mark; np < n; np++ { 5422 c := ab.At(np) 5423 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { 5424 break 5425 } 5426 } 5427 ab.Insert(np, byte(0x40|ab.rexflag)) 5428 } 5429 5430 n := ab.Len() 5431 for i := len(cursym.R) - 1; i >= 0; i-- { 5432 r := &cursym.R[i] 5433 if int64(r.Off) < p.Pc { 5434 break 5435 } 5436 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5437 r.Off++ 5438 } 5439 if r.Type == objabi.R_PCREL { 5440 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { 5441 // PC-relative addressing is relative to the end of the instruction, 5442 // but the relocations applied by the linker are relative to the end 5443 // of the relocation. Because immediate instruction 5444 // arguments can follow the PC-relative memory reference in the 5445 // instruction encoding, the two may not coincide. In this case, 5446 // adjust addend so that linker can keep relocating relative to the 5447 // end of the relocation. 5448 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) 5449 } else if ctxt.Arch.Family == sys.I386 { 5450 // On 386 PC-relative addressing (for non-call/jmp instructions) 5451 // assumes that the previous instruction loaded the PC of the end 5452 // of that instruction into CX, so the adjustment is relative to 5453 // that. 5454 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5455 } 5456 } 5457 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { 5458 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. 5459 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5460 } 5461 5462 } 5463 } 5464 5465 // unpackOps4 extracts 4 operands from p. 5466 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { 5467 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To 5468 } 5469 5470 // unpackOps5 extracts 5 operands from p. 5471 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { 5472 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To 5473 }