github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/obj/x86/asm6.go (about) 1 // Inferno utils/6l/span.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "encoding/binary" 35 "fmt" 36 "log" 37 "strings" 38 39 "github.com/go-asm/go/buildcfg" 40 "github.com/go-asm/go/cmd/obj" 41 "github.com/go-asm/go/cmd/objabi" 42 "github.com/go-asm/go/cmd/sys" 43 ) 44 45 var ( 46 plan9privates *obj.LSym 47 ) 48 49 // Instruction layout. 50 51 // Loop alignment constants: 52 // want to align loop entry to loopAlign-byte boundary, 53 // and willing to insert at most maxLoopPad bytes of NOP to do so. 54 // We define a loop entry as the target of a backward jump. 55 // 56 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config, 57 // and it aligns all jump targets, not just backward jump targets. 58 // 59 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here 60 // is very slight but negative, so the alignment is disabled by 61 // setting MaxLoopPad = 0. The code is here for reference and 62 // for future experiments. 63 const ( 64 loopAlign = 16 65 maxLoopPad = 0 66 ) 67 68 // Bit flags that are used to express jump target properties. 69 const ( 70 // branchBackwards marks targets that are located behind. 71 // Used to express jumps to loop headers. 72 branchBackwards = (1 << iota) 73 // branchShort marks branches those target is close, 74 // with offset is in -128..127 range. 75 branchShort 76 // branchLoopHead marks loop entry. 77 // Used to insert padding for misaligned loops. 78 branchLoopHead 79 ) 80 81 // opBytes holds optab encoding bytes. 82 // Each ytab reserves fixed amount of bytes in this array. 83 // 84 // The size should be the minimal number of bytes that 85 // are enough to hold biggest optab op lines. 86 type opBytes [31]uint8 87 88 type Optab struct { 89 as obj.As 90 ytab []ytab 91 prefix uint8 92 op opBytes 93 } 94 95 type movtab struct { 96 as obj.As 97 ft uint8 98 f3t uint8 99 tt uint8 100 code uint8 101 op [4]uint8 102 } 103 104 const ( 105 Yxxx = iota 106 Ynone 107 Yi0 // $0 108 Yi1 // $1 109 Yu2 // $x, x fits in uint2 110 Yi8 // $x, x fits in int8 111 Yu8 // $x, x fits in uint8 112 Yu7 // $x, x in 0..127 (fits in both int8 and uint8) 113 Ys32 114 Yi32 115 Yi64 116 Yiauto 117 Yal 118 Ycl 119 Yax 120 Ycx 121 Yrb 122 Yrl 123 Yrl32 // Yrl on 32-bit system 124 Yrf 125 Yf0 126 Yrx 127 Ymb 128 Yml 129 Ym 130 Ybr 131 Ycs 132 Yss 133 Yds 134 Yes 135 Yfs 136 Ygs 137 Ygdtr 138 Yidtr 139 Yldtr 140 Ymsw 141 Ytask 142 Ycr0 143 Ycr1 144 Ycr2 145 Ycr3 146 Ycr4 147 Ycr5 148 Ycr6 149 Ycr7 150 Ycr8 151 Ydr0 152 Ydr1 153 Ydr2 154 Ydr3 155 Ydr4 156 Ydr5 157 Ydr6 158 Ydr7 159 Ytr0 160 Ytr1 161 Ytr2 162 Ytr3 163 Ytr4 164 Ytr5 165 Ytr6 166 Ytr7 167 Ymr 168 Ymm 169 Yxr0 // X0 only. "<XMM0>" notation in Intel manual. 170 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex 171 Yxr // X0..X15 172 YxrEvex // X0..X31 173 Yxm 174 YxmEvex // YxrEvex+Ym 175 Yxvm // VSIB vector array; vm32x/vm64x 176 YxvmEvex // Yxvm which permits High-16 X register as index. 177 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex 178 Yyr // Y0..Y15 179 YyrEvex // Y0..Y31 180 Yym 181 YymEvex // YyrEvex+Ym 182 Yyvm // VSIB vector array; vm32y/vm64y 183 YyvmEvex // Yyvm which permits High-16 Y register as index. 184 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex 185 Yzr // Z0..Z31 186 Yzm // Yzr+Ym 187 Yzvm // VSIB vector array; vm32z/vm64z 188 Yk0 // K0 189 Yknot0 // K1..K7; write mask 190 Yk // K0..K7; used for KOP 191 Ykm // Yk+Ym; used for KOP 192 Ytls 193 Ytextsize 194 Yindir 195 Ymax 196 ) 197 198 const ( 199 Zxxx = iota 200 Zlit 201 Zlitm_r 202 Zlitr_m 203 Zlit_m_r 204 Z_rp 205 Zbr 206 Zcall 207 Zcallcon 208 Zcallduff 209 Zcallind 210 Zcallindreg 211 Zib_ 212 Zib_rp 213 Zibo_m 214 Zibo_m_xm 215 Zil_ 216 Zil_rp 217 Ziq_rp 218 Zilo_m 219 Zjmp 220 Zjmpcon 221 Zloop 222 Zo_iw 223 Zm_o 224 Zm_r 225 Z_m_r 226 Zm2_r 227 Zm_r_xm 228 Zm_r_i_xm 229 Zm_r_xm_nr 230 Zr_m_xm_nr 231 Zibm_r // mmx1,mmx2/mem64,imm8 232 Zibr_m 233 Zmb_r 234 Zaut_r 235 Zo_m 236 Zo_m64 237 Zpseudo 238 Zr_m 239 Zr_m_xm 240 Zrp_ 241 Z_ib 242 Z_il 243 Zm_ibo 244 Zm_ilo 245 Zib_rr 246 Zil_rr 247 Zbyte 248 249 Zvex_rm_v_r 250 Zvex_rm_v_ro 251 Zvex_r_v_rm 252 Zvex_i_rm_vo 253 Zvex_v_rm_r 254 Zvex_i_rm_r 255 Zvex_i_r_v 256 Zvex_i_rm_v_r 257 Zvex 258 Zvex_rm_r_vo 259 Zvex_i_r_rm 260 Zvex_hr_rm_v_r 261 262 Zevex_first 263 Zevex_i_r_k_rm 264 Zevex_i_r_rm 265 Zevex_i_rm_k_r 266 Zevex_i_rm_k_vo 267 Zevex_i_rm_r 268 Zevex_i_rm_v_k_r 269 Zevex_i_rm_v_r 270 Zevex_i_rm_vo 271 Zevex_k_rmo 272 Zevex_r_k_rm 273 Zevex_r_v_k_rm 274 Zevex_r_v_rm 275 Zevex_rm_k_r 276 Zevex_rm_v_k_r 277 Zevex_rm_v_r 278 Zevex_last 279 280 Zmax 281 ) 282 283 const ( 284 Px = 0 285 Px1 = 1 // symbolic; exact value doesn't matter 286 P32 = 0x32 // 32-bit only 287 Pe = 0x66 // operand escape 288 Pm = 0x0f // 2byte opcode escape 289 Pq = 0xff // both escapes: 66 0f 290 Pb = 0xfe // byte operands 291 Pf2 = 0xf2 // xmm escape 1: f2 0f 292 Pf3 = 0xf3 // xmm escape 2: f3 0f 293 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f 294 Pq3 = 0x67 // xmm escape 3: 66 48 0f 295 Pq4 = 0x68 // xmm escape 4: 66 0F 38 296 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 297 Pq5 = 0x6a // xmm escape 5: F3 0F 38 298 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 299 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f 300 Pw = 0x48 // Rex.w 301 Pw8 = 0x90 // symbolic; exact value doesn't matter 302 Py = 0x80 // defaults to 64-bit mode 303 Py1 = 0x81 // symbolic; exact value doesn't matter 304 Py3 = 0x83 // symbolic; exact value doesn't matter 305 Pavx = 0x84 // symbolic; exact value doesn't matter 306 307 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R 308 Rxw = 1 << 3 // =1, 64-bit operand size 309 Rxr = 1 << 2 // extend modrm reg 310 Rxx = 1 << 1 // extend sib index 311 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg 312 ) 313 314 const ( 315 // Encoding for VEX prefix in tables. 316 // The P, L, and W fields are chosen to match 317 // their eventual locations in the VEX prefix bytes. 318 319 // Encoding for VEX prefix in tables. 320 // The P, L, and W fields are chosen to match 321 // their eventual locations in the VEX prefix bytes. 322 323 // Using spare bit to make leading [E]VEX encoding byte different from 324 // 0x0f even if all other VEX fields are 0. 325 avxEscape = 1 << 6 326 327 // P field - 2 bits 328 vex66 = 1 << 0 329 vexF3 = 2 << 0 330 vexF2 = 3 << 0 331 // L field - 1 bit 332 vexLZ = 0 << 2 333 vexLIG = 0 << 2 334 vex128 = 0 << 2 335 vex256 = 1 << 2 336 // W field - 1 bit 337 vexWIG = 0 << 7 338 vexW0 = 0 << 7 339 vexW1 = 1 << 7 340 // M field - 5 bits, but mostly reserved; we can store up to 3 341 vex0F = 1 << 3 342 vex0F38 = 2 << 3 343 vex0F3A = 3 << 3 344 ) 345 346 var ycover [Ymax * Ymax]uint8 347 348 var reg [MAXREG]int 349 350 var regrex [MAXREG + 1]int 351 352 var ynone = []ytab{ 353 {Zlit, 1, argList{}}, 354 } 355 356 var ytext = []ytab{ 357 {Zpseudo, 0, argList{Ymb, Ytextsize}}, 358 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, 359 } 360 361 var ynop = []ytab{ 362 {Zpseudo, 0, argList{}}, 363 {Zpseudo, 0, argList{Yiauto}}, 364 {Zpseudo, 0, argList{Yml}}, 365 {Zpseudo, 0, argList{Yrf}}, 366 {Zpseudo, 0, argList{Yxr}}, 367 {Zpseudo, 0, argList{Yiauto}}, 368 {Zpseudo, 0, argList{Yml}}, 369 {Zpseudo, 0, argList{Yrf}}, 370 {Zpseudo, 1, argList{Yxr}}, 371 } 372 373 var yfuncdata = []ytab{ 374 {Zpseudo, 0, argList{Yi32, Ym}}, 375 } 376 377 var ypcdata = []ytab{ 378 {Zpseudo, 0, argList{Yi32, Yi32}}, 379 } 380 381 var yxorb = []ytab{ 382 {Zib_, 1, argList{Yi32, Yal}}, 383 {Zibo_m, 2, argList{Yi32, Ymb}}, 384 {Zr_m, 1, argList{Yrb, Ymb}}, 385 {Zm_r, 1, argList{Ymb, Yrb}}, 386 } 387 388 var yaddl = []ytab{ 389 {Zibo_m, 2, argList{Yi8, Yml}}, 390 {Zil_, 1, argList{Yi32, Yax}}, 391 {Zilo_m, 2, argList{Yi32, Yml}}, 392 {Zr_m, 1, argList{Yrl, Yml}}, 393 {Zm_r, 1, argList{Yml, Yrl}}, 394 } 395 396 var yincl = []ytab{ 397 {Z_rp, 1, argList{Yrl}}, 398 {Zo_m, 2, argList{Yml}}, 399 } 400 401 var yincq = []ytab{ 402 {Zo_m, 2, argList{Yml}}, 403 } 404 405 var ycmpb = []ytab{ 406 {Z_ib, 1, argList{Yal, Yi32}}, 407 {Zm_ibo, 2, argList{Ymb, Yi32}}, 408 {Zm_r, 1, argList{Ymb, Yrb}}, 409 {Zr_m, 1, argList{Yrb, Ymb}}, 410 } 411 412 var ycmpl = []ytab{ 413 {Zm_ibo, 2, argList{Yml, Yi8}}, 414 {Z_il, 1, argList{Yax, Yi32}}, 415 {Zm_ilo, 2, argList{Yml, Yi32}}, 416 {Zm_r, 1, argList{Yml, Yrl}}, 417 {Zr_m, 1, argList{Yrl, Yml}}, 418 } 419 420 var yshb = []ytab{ 421 {Zo_m, 2, argList{Yi1, Ymb}}, 422 {Zibo_m, 2, argList{Yu8, Ymb}}, 423 {Zo_m, 2, argList{Ycx, Ymb}}, 424 } 425 426 var yshl = []ytab{ 427 {Zo_m, 2, argList{Yi1, Yml}}, 428 {Zibo_m, 2, argList{Yu8, Yml}}, 429 {Zo_m, 2, argList{Ycl, Yml}}, 430 {Zo_m, 2, argList{Ycx, Yml}}, 431 } 432 433 var ytestl = []ytab{ 434 {Zil_, 1, argList{Yi32, Yax}}, 435 {Zilo_m, 2, argList{Yi32, Yml}}, 436 {Zr_m, 1, argList{Yrl, Yml}}, 437 {Zm_r, 1, argList{Yml, Yrl}}, 438 } 439 440 var ymovb = []ytab{ 441 {Zr_m, 1, argList{Yrb, Ymb}}, 442 {Zm_r, 1, argList{Ymb, Yrb}}, 443 {Zib_rp, 1, argList{Yi32, Yrb}}, 444 {Zibo_m, 2, argList{Yi32, Ymb}}, 445 } 446 447 var ybtl = []ytab{ 448 {Zibo_m, 2, argList{Yi8, Yml}}, 449 {Zr_m, 1, argList{Yrl, Yml}}, 450 } 451 452 var ymovw = []ytab{ 453 {Zr_m, 1, argList{Yrl, Yml}}, 454 {Zm_r, 1, argList{Yml, Yrl}}, 455 {Zil_rp, 1, argList{Yi32, Yrl}}, 456 {Zilo_m, 2, argList{Yi32, Yml}}, 457 {Zaut_r, 2, argList{Yiauto, Yrl}}, 458 } 459 460 var ymovl = []ytab{ 461 {Zr_m, 1, argList{Yrl, Yml}}, 462 {Zm_r, 1, argList{Yml, Yrl}}, 463 {Zil_rp, 1, argList{Yi32, Yrl}}, 464 {Zilo_m, 2, argList{Yi32, Yml}}, 465 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD 466 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD 467 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) 468 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) 469 {Zaut_r, 2, argList{Yiauto, Yrl}}, 470 } 471 472 var yret = []ytab{ 473 {Zo_iw, 1, argList{}}, 474 {Zo_iw, 1, argList{Yi32}}, 475 } 476 477 var ymovq = []ytab{ 478 // valid in 32-bit mode 479 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) 480 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ 481 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q 482 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 483 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 484 485 // valid only in 64-bit mode, usually with 64-bit prefix 486 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 487 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b 488 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) 489 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate 490 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) 491 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD 492 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD 493 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load 494 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store 495 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ 496 } 497 498 var ymovbe = []ytab{ 499 {Zlitm_r, 3, argList{Ym, Yrl}}, 500 {Zlitr_m, 3, argList{Yrl, Ym}}, 501 } 502 503 var ym_rl = []ytab{ 504 {Zm_r, 1, argList{Ym, Yrl}}, 505 } 506 507 var yrl_m = []ytab{ 508 {Zr_m, 1, argList{Yrl, Ym}}, 509 } 510 511 var ymb_rl = []ytab{ 512 {Zmb_r, 1, argList{Ymb, Yrl}}, 513 } 514 515 var yml_rl = []ytab{ 516 {Zm_r, 1, argList{Yml, Yrl}}, 517 } 518 519 var yrl_ml = []ytab{ 520 {Zr_m, 1, argList{Yrl, Yml}}, 521 } 522 523 var yml_mb = []ytab{ 524 {Zr_m, 1, argList{Yrb, Ymb}}, 525 {Zm_r, 1, argList{Ymb, Yrb}}, 526 } 527 528 var yrb_mb = []ytab{ 529 {Zr_m, 1, argList{Yrb, Ymb}}, 530 } 531 532 var yxchg = []ytab{ 533 {Z_rp, 1, argList{Yax, Yrl}}, 534 {Zrp_, 1, argList{Yrl, Yax}}, 535 {Zr_m, 1, argList{Yrl, Yml}}, 536 {Zm_r, 1, argList{Yml, Yrl}}, 537 } 538 539 var ydivl = []ytab{ 540 {Zm_o, 2, argList{Yml}}, 541 } 542 543 var ydivb = []ytab{ 544 {Zm_o, 2, argList{Ymb}}, 545 } 546 547 var yimul = []ytab{ 548 {Zm_o, 2, argList{Yml}}, 549 {Zib_rr, 1, argList{Yi8, Yrl}}, 550 {Zil_rr, 1, argList{Yi32, Yrl}}, 551 {Zm_r, 2, argList{Yml, Yrl}}, 552 } 553 554 var yimul3 = []ytab{ 555 {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, 556 {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, 557 } 558 559 var ybyte = []ytab{ 560 {Zbyte, 1, argList{Yi64}}, 561 } 562 563 var yin = []ytab{ 564 {Zib_, 1, argList{Yi32}}, 565 {Zlit, 1, argList{}}, 566 } 567 568 var yint = []ytab{ 569 {Zib_, 1, argList{Yi32}}, 570 } 571 572 var ypushl = []ytab{ 573 {Zrp_, 1, argList{Yrl}}, 574 {Zm_o, 2, argList{Ym}}, 575 {Zib_, 1, argList{Yi8}}, 576 {Zil_, 1, argList{Yi32}}, 577 } 578 579 var ypopl = []ytab{ 580 {Z_rp, 1, argList{Yrl}}, 581 {Zo_m, 2, argList{Ym}}, 582 } 583 584 var ywrfsbase = []ytab{ 585 {Zm_o, 2, argList{Yrl}}, 586 } 587 588 var yrdrand = []ytab{ 589 {Zo_m, 2, argList{Yrl}}, 590 } 591 592 var yclflush = []ytab{ 593 {Zo_m, 2, argList{Ym}}, 594 } 595 596 var ybswap = []ytab{ 597 {Z_rp, 2, argList{Yrl}}, 598 } 599 600 var yscond = []ytab{ 601 {Zo_m, 2, argList{Ymb}}, 602 } 603 604 var yjcond = []ytab{ 605 {Zbr, 0, argList{Ybr}}, 606 {Zbr, 0, argList{Yi0, Ybr}}, 607 {Zbr, 1, argList{Yi1, Ybr}}, 608 } 609 610 var yloop = []ytab{ 611 {Zloop, 1, argList{Ybr}}, 612 } 613 614 var ycall = []ytab{ 615 {Zcallindreg, 0, argList{Yml}}, 616 {Zcallindreg, 2, argList{Yrx, Yrx}}, 617 {Zcallind, 2, argList{Yindir}}, 618 {Zcall, 0, argList{Ybr}}, 619 {Zcallcon, 1, argList{Yi32}}, 620 } 621 622 var yduff = []ytab{ 623 {Zcallduff, 1, argList{Yi32}}, 624 } 625 626 var yjmp = []ytab{ 627 {Zo_m64, 2, argList{Yml}}, 628 {Zjmp, 0, argList{Ybr}}, 629 {Zjmpcon, 1, argList{Yi32}}, 630 } 631 632 var yfmvd = []ytab{ 633 {Zm_o, 2, argList{Ym, Yf0}}, 634 {Zo_m, 2, argList{Yf0, Ym}}, 635 {Zm_o, 2, argList{Yrf, Yf0}}, 636 {Zo_m, 2, argList{Yf0, Yrf}}, 637 } 638 639 var yfmvdp = []ytab{ 640 {Zo_m, 2, argList{Yf0, Ym}}, 641 {Zo_m, 2, argList{Yf0, Yrf}}, 642 } 643 644 var yfmvf = []ytab{ 645 {Zm_o, 2, argList{Ym, Yf0}}, 646 {Zo_m, 2, argList{Yf0, Ym}}, 647 } 648 649 var yfmvx = []ytab{ 650 {Zm_o, 2, argList{Ym, Yf0}}, 651 } 652 653 var yfmvp = []ytab{ 654 {Zo_m, 2, argList{Yf0, Ym}}, 655 } 656 657 var yfcmv = []ytab{ 658 {Zm_o, 2, argList{Yrf, Yf0}}, 659 } 660 661 var yfadd = []ytab{ 662 {Zm_o, 2, argList{Ym, Yf0}}, 663 {Zm_o, 2, argList{Yrf, Yf0}}, 664 {Zo_m, 2, argList{Yf0, Yrf}}, 665 } 666 667 var yfxch = []ytab{ 668 {Zo_m, 2, argList{Yf0, Yrf}}, 669 {Zm_o, 2, argList{Yrf, Yf0}}, 670 } 671 672 var ycompp = []ytab{ 673 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 674 } 675 676 var ystsw = []ytab{ 677 {Zo_m, 2, argList{Ym}}, 678 {Zlit, 1, argList{Yax}}, 679 } 680 681 var ysvrs_mo = []ytab{ 682 {Zm_o, 2, argList{Ym}}, 683 } 684 685 // unaryDst version of "ysvrs_mo". 686 var ysvrs_om = []ytab{ 687 {Zo_m, 2, argList{Ym}}, 688 } 689 690 var ymm = []ytab{ 691 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 692 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 693 } 694 695 var yxm = []ytab{ 696 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 697 } 698 699 var yxm_q4 = []ytab{ 700 {Zm_r, 1, argList{Yxm, Yxr}}, 701 } 702 703 var yxcvm1 = []ytab{ 704 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 705 {Zm_r_xm, 2, argList{Yxm, Ymr}}, 706 } 707 708 var yxcvm2 = []ytab{ 709 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 710 {Zm_r_xm, 2, argList{Ymm, Yxr}}, 711 } 712 713 var yxr = []ytab{ 714 {Zm_r_xm, 1, argList{Yxr, Yxr}}, 715 } 716 717 var yxr_ml = []ytab{ 718 {Zr_m_xm, 1, argList{Yxr, Yml}}, 719 } 720 721 var ymr = []ytab{ 722 {Zm_r, 1, argList{Ymr, Ymr}}, 723 } 724 725 var ymr_ml = []ytab{ 726 {Zr_m_xm, 1, argList{Ymr, Yml}}, 727 } 728 729 var yxcmpi = []ytab{ 730 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, 731 } 732 733 var yxmov = []ytab{ 734 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 735 {Zr_m_xm, 1, argList{Yxr, Yxm}}, 736 } 737 738 var yxcvfl = []ytab{ 739 {Zm_r_xm, 1, argList{Yxm, Yrl}}, 740 } 741 742 var yxcvlf = []ytab{ 743 {Zm_r_xm, 1, argList{Yml, Yxr}}, 744 } 745 746 var yxcvfq = []ytab{ 747 {Zm_r_xm, 2, argList{Yxm, Yrl}}, 748 } 749 750 var yxcvqf = []ytab{ 751 {Zm_r_xm, 2, argList{Yml, Yxr}}, 752 } 753 754 var yps = []ytab{ 755 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 756 {Zibo_m_xm, 2, argList{Yi8, Ymr}}, 757 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 758 {Zibo_m_xm, 3, argList{Yi8, Yxr}}, 759 } 760 761 var yxrrl = []ytab{ 762 {Zm_r, 1, argList{Yxr, Yrl}}, 763 } 764 765 var ymrxr = []ytab{ 766 {Zm_r, 1, argList{Ymr, Yxr}}, 767 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 768 } 769 770 var ymshuf = []ytab{ 771 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, 772 } 773 774 var ymshufb = []ytab{ 775 {Zm2_r, 2, argList{Yxm, Yxr}}, 776 } 777 778 // It should never have more than 1 entry, 779 // because some optab entries have opcode sequences that 780 // are longer than 2 bytes (zoffset=2 here), 781 // ROUNDPD and ROUNDPS and recently added BLENDPD, 782 // to name a few. 783 var yxshuf = []ytab{ 784 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 785 } 786 787 var yextrw = []ytab{ 788 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, 789 {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, 790 } 791 792 var yextr = []ytab{ 793 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, 794 } 795 796 var yinsrw = []ytab{ 797 {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, 798 } 799 800 var yinsr = []ytab{ 801 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, 802 } 803 804 var ypsdq = []ytab{ 805 {Zibo_m, 2, argList{Yi8, Yxr}}, 806 } 807 808 var ymskb = []ytab{ 809 {Zm_r_xm, 2, argList{Yxr, Yrl}}, 810 {Zm_r_xm, 1, argList{Ymr, Yrl}}, 811 } 812 813 var ycrc32l = []ytab{ 814 {Zlitm_r, 0, argList{Yml, Yrl}}, 815 } 816 817 var ycrc32b = []ytab{ 818 {Zlitm_r, 0, argList{Ymb, Yrl}}, 819 } 820 821 var yprefetch = []ytab{ 822 {Zm_o, 2, argList{Ym}}, 823 } 824 825 var yaes = []ytab{ 826 {Zlitm_r, 2, argList{Yxm, Yxr}}, 827 } 828 829 var yxbegin = []ytab{ 830 {Zjmp, 1, argList{Ybr}}, 831 } 832 833 var yxabort = []ytab{ 834 {Zib_, 1, argList{Yu8}}, 835 } 836 837 var ylddqu = []ytab{ 838 {Zm_r, 1, argList{Ym, Yxr}}, 839 } 840 841 var ypalignr = []ytab{ 842 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 843 } 844 845 var ysha256rnds2 = []ytab{ 846 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, 847 } 848 849 var yblendvpd = []ytab{ 850 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, 851 } 852 853 var ymmxmm0f38 = []ytab{ 854 {Zlitm_r, 3, argList{Ymm, Ymr}}, 855 {Zlitm_r, 5, argList{Yxm, Yxr}}, 856 } 857 858 var yextractps = []ytab{ 859 {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, 860 } 861 862 var ysha1rnds4 = []ytab{ 863 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, 864 } 865 866 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say, 867 // ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab 868 // to find the entry with the given p.As and then looks through the ytable for 869 // that instruction (the second field in the optab struct) for a line whose 870 // first two values match the Ytypes of the p.From and p.To operands. The 871 // function oclass computes the specific Ytype of an operand and then the set 872 // of more general Ytypes that it satisfies is implied by the ycover table, set 873 // up in instinit. For example, oclass distinguishes the constants 0 and 1 874 // from the more general 8-bit constants, but instinit says 875 // 876 // ycover[Yi0*Ymax+Ys32] = 1 877 // ycover[Yi1*Ymax+Ys32] = 1 878 // ycover[Yi8*Ymax+Ys32] = 1 879 // 880 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) 881 // if that's what an instruction can handle. 882 // 883 // In parallel with the scan through the ytable for the appropriate line, there 884 // is a z pointer that starts out pointing at the strange magic byte list in 885 // the Optab struct. With each step past a non-matching ytable line, z 886 // advances by the 4th entry in the line. When a matching line is found, that 887 // z pointer has the extra data to use in laying down the instruction bytes. 888 // The actual bytes laid down are a function of the 3rd entry in the line (that 889 // is, the Ztype) and the z bytes. 890 // 891 // For example, let's look at AADDL. The optab line says: 892 // 893 // {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 894 // 895 // and yaddl says 896 // 897 // var yaddl = []ytab{ 898 // {Yi8, Ynone, Yml, Zibo_m, 2}, 899 // {Yi32, Ynone, Yax, Zil_, 1}, 900 // {Yi32, Ynone, Yml, Zilo_m, 2}, 901 // {Yrl, Ynone, Yml, Zr_m, 1}, 902 // {Yml, Ynone, Yrl, Zm_r, 1}, 903 // } 904 // 905 // so there are 5 possible types of ADDL instruction that can be laid down, and 906 // possible states used to lay them down (Ztype and z pointer, assuming z 907 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: 908 // 909 // Yi8, Yml -> Zibo_m, z (0x83, 00) 910 // Yi32, Yax -> Zil_, z+2 (0x05) 911 // Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) 912 // Yrl, Yml -> Zr_m, z+2+1+2 (0x01) 913 // Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) 914 // 915 // The Pconstant in the optab line controls the prefix bytes to emit. That's 916 // relatively straightforward as this program goes. 917 // 918 // The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for 919 // example, is an opcode byte (z[0]) then an asmando (which is some kind of 920 // encoded addressing mode for the Yml arg), and then a single immediate byte. 921 // Zilo_m is the same but a long (32-bit) immediate. 922 var optab = 923 // as, ytab, andproto, opcode 924 [...]Optab{ 925 {obj.AXXX, nil, 0, opBytes{}}, 926 {AAAA, ynone, P32, opBytes{0x37}}, 927 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, 928 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, 929 {AAAS, ynone, P32, opBytes{0x3f}}, 930 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, 931 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 932 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 933 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 934 {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, 935 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, 936 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, 937 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 938 {AADDPD, yxm, Pq, opBytes{0x58}}, 939 {AADDPS, yxm, Pm, opBytes{0x58}}, 940 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 941 {AADDSD, yxm, Pf2, opBytes{0x58}}, 942 {AADDSS, yxm, Pf3, opBytes{0x58}}, 943 {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, 944 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, 945 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 946 {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, 947 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, 948 {AADJSP, nil, 0, opBytes{}}, 949 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, 950 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 951 {AANDNPD, yxm, Pq, opBytes{0x55}}, 952 {AANDNPS, yxm, Pm, opBytes{0x55}}, 953 {AANDPD, yxm, Pq, opBytes{0x54}}, 954 {AANDPS, yxm, Pm, opBytes{0x54}}, 955 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 956 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 957 {AARPL, yrl_ml, P32, opBytes{0x63}}, 958 {ABOUNDL, yrl_m, P32, opBytes{0x62}}, 959 {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, 960 {ABSFL, yml_rl, Pm, opBytes{0xbc}}, 961 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, 962 {ABSFW, yml_rl, Pq, opBytes{0xbc}}, 963 {ABSRL, yml_rl, Pm, opBytes{0xbd}}, 964 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, 965 {ABSRW, yml_rl, Pq, opBytes{0xbd}}, 966 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, 967 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, 968 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, 969 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, 970 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, 971 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, 972 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, 973 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, 974 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, 975 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, 976 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, 977 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, 978 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, 979 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, 980 {ABYTE, ybyte, Px, opBytes{1}}, 981 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, 982 {ACBW, ynone, Pe, opBytes{0x98}}, 983 {ACDQ, ynone, Px, opBytes{0x99}}, 984 {ACDQE, ynone, Pw, opBytes{0x98}}, 985 {ACLAC, ynone, Pm, opBytes{01, 0xca}}, 986 {ACLC, ynone, Px, opBytes{0xf8}}, 987 {ACLD, ynone, Px, opBytes{0xfc}}, 988 {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}}, 989 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, 990 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, 991 {ACLI, ynone, Px, opBytes{0xfa}}, 992 {ACLTS, ynone, Pm, opBytes{0x06}}, 993 {ACLWB, yclflush, Pq, opBytes{0xae, 06}}, 994 {ACMC, ynone, Px, opBytes{0xf5}}, 995 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, 996 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, 997 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, 998 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, 999 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, 1000 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, 1001 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, 1002 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, 1003 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, 1004 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, 1005 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, 1006 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, 1007 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, 1008 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, 1009 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, 1010 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, 1011 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, 1012 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, 1013 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, 1014 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, 1015 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, 1016 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, 1017 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, 1018 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, 1019 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, 1020 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, 1021 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, 1022 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, 1023 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, 1024 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, 1025 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, 1026 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, 1027 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, 1028 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, 1029 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, 1030 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, 1031 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, 1032 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, 1033 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, 1034 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, 1035 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, 1036 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, 1037 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, 1038 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, 1039 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, 1040 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, 1041 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, 1042 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, 1043 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, 1044 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1045 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, 1046 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, 1047 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1048 {ACMPSB, ynone, Pb, opBytes{0xa6}}, 1049 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, 1050 {ACMPSL, ynone, Px, opBytes{0xa7}}, 1051 {ACMPSQ, ynone, Pw, opBytes{0xa7}}, 1052 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, 1053 {ACMPSW, ynone, Pe, opBytes{0xa7}}, 1054 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1055 {ACOMISD, yxm, Pe, opBytes{0x2f}}, 1056 {ACOMISS, yxm, Pm, opBytes{0x2f}}, 1057 {ACPUID, ynone, Pm, opBytes{0xa2}}, 1058 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, 1059 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, 1060 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, 1061 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, 1062 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, 1063 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, 1064 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, 1065 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, 1066 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, 1067 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, 1068 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, 1069 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, 1070 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, 1071 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, 1072 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, 1073 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, 1074 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, 1075 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, 1076 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, 1077 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, 1078 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, 1079 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, 1080 {ACWD, ynone, Pe, opBytes{0x99}}, 1081 {ACWDE, ynone, Px, opBytes{0x98}}, 1082 {ACQO, ynone, Pw, opBytes{0x99}}, 1083 {ADAA, ynone, P32, opBytes{0x27}}, 1084 {ADAS, ynone, P32, opBytes{0x2f}}, 1085 {ADECB, yscond, Pb, opBytes{0xfe, 01}}, 1086 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, 1087 {ADECQ, yincq, Pw, opBytes{0xff, 01}}, 1088 {ADECW, yincq, Pe, opBytes{0xff, 01}}, 1089 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, 1090 {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, 1091 {ADIVPD, yxm, Pe, opBytes{0x5e}}, 1092 {ADIVPS, yxm, Pm, opBytes{0x5e}}, 1093 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, 1094 {ADIVSD, yxm, Pf2, opBytes{0x5e}}, 1095 {ADIVSS, yxm, Pf3, opBytes{0x5e}}, 1096 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, 1097 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, 1098 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, 1099 {AEMMS, ynone, Pm, opBytes{0x77}}, 1100 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, 1101 {AENTER, nil, 0, opBytes{}}, // botch 1102 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, 1103 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, 1104 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, 1105 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, 1106 {AHLT, ynone, Px, opBytes{0xf4}}, 1107 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, 1108 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, 1109 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, 1110 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, 1111 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, 1112 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1113 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1114 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1115 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, 1116 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, 1117 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, 1118 {AINB, yin, Pb, opBytes{0xe4, 0xec}}, 1119 {AINW, yin, Pe, opBytes{0xe5, 0xed}}, 1120 {AINL, yin, Px, opBytes{0xe5, 0xed}}, 1121 {AINCB, yscond, Pb, opBytes{0xfe, 00}}, 1122 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, 1123 {AINCQ, yincq, Pw, opBytes{0xff, 00}}, 1124 {AINCW, yincq, Pe, opBytes{0xff, 00}}, 1125 {AINSB, ynone, Pb, opBytes{0x6c}}, 1126 {AINSL, ynone, Px, opBytes{0x6d}}, 1127 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, 1128 {AINSW, ynone, Pe, opBytes{0x6d}}, 1129 {AICEBP, ynone, Px, opBytes{0xf1}}, 1130 {AINT, yint, Px, opBytes{0xcd}}, 1131 {AINTO, ynone, P32, opBytes{0xce}}, 1132 {AIRETL, ynone, Px, opBytes{0xcf}}, 1133 {AIRETQ, ynone, Pw, opBytes{0xcf}}, 1134 {AIRETW, ynone, Pe, opBytes{0xcf}}, 1135 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, 1136 {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, 1137 {AJCXZL, yloop, Px, opBytes{0xe3}}, 1138 {AJCXZW, yloop, Px, opBytes{0xe3}}, 1139 {AJCXZQ, yloop, Px, opBytes{0xe3}}, 1140 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, 1141 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, 1142 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, 1143 {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, 1144 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, 1145 {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, 1146 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, 1147 {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, 1148 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, 1149 {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, 1150 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, 1151 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, 1152 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, 1153 {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, 1154 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, 1155 {AHADDPD, yxm, Pq, opBytes{0x7c}}, 1156 {AHADDPS, yxm, Pf2, opBytes{0x7c}}, 1157 {AHSUBPD, yxm, Pq, opBytes{0x7d}}, 1158 {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, 1159 {ALAHF, ynone, Px, opBytes{0x9f}}, 1160 {ALARL, yml_rl, Pm, opBytes{0x02}}, 1161 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, 1162 {ALARW, yml_rl, Pq, opBytes{0x02}}, 1163 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, 1164 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, 1165 {ALEAL, ym_rl, Px, opBytes{0x8d}}, 1166 {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, 1167 {ALEAVEL, ynone, P32, opBytes{0xc9}}, 1168 {ALEAVEQ, ynone, Py, opBytes{0xc9}}, 1169 {ALEAVEW, ynone, Pe, opBytes{0xc9}}, 1170 {ALEAW, ym_rl, Pe, opBytes{0x8d}}, 1171 {ALOCK, ynone, Px, opBytes{0xf0}}, 1172 {ALODSB, ynone, Pb, opBytes{0xac}}, 1173 {ALODSL, ynone, Px, opBytes{0xad}}, 1174 {ALODSQ, ynone, Pw, opBytes{0xad}}, 1175 {ALODSW, ynone, Pe, opBytes{0xad}}, 1176 {ALONG, ybyte, Px, opBytes{4}}, 1177 {ALOOP, yloop, Px, opBytes{0xe2}}, 1178 {ALOOPEQ, yloop, Px, opBytes{0xe1}}, 1179 {ALOOPNE, yloop, Px, opBytes{0xe0}}, 1180 {ALTR, ydivl, Pm, opBytes{0x00, 03}}, 1181 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, 1182 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, 1183 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, 1184 {ALSLL, yml_rl, Pm, opBytes{0x03}}, 1185 {ALSLW, yml_rl, Pq, opBytes{0x03}}, 1186 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, 1187 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, 1188 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, 1189 {AMAXPD, yxm, Pe, opBytes{0x5f}}, 1190 {AMAXPS, yxm, Pm, opBytes{0x5f}}, 1191 {AMAXSD, yxm, Pf2, opBytes{0x5f}}, 1192 {AMAXSS, yxm, Pf3, opBytes{0x5f}}, 1193 {AMINPD, yxm, Pe, opBytes{0x5d}}, 1194 {AMINPS, yxm, Pm, opBytes{0x5d}}, 1195 {AMINSD, yxm, Pf2, opBytes{0x5d}}, 1196 {AMINSS, yxm, Pf3, opBytes{0x5d}}, 1197 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, 1198 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, 1199 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, 1200 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, 1201 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, 1202 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, 1203 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, 1204 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, 1205 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, 1206 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, 1207 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, 1208 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, 1209 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, 1210 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, 1211 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, 1212 {AMOVHLPS, yxr, Pm, opBytes{0x12}}, 1213 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, 1214 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, 1215 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1216 {AMOVLHPS, yxr, Pm, opBytes{0x16}}, 1217 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, 1218 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, 1219 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, 1220 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, 1221 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, 1222 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, 1223 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, 1224 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, 1225 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, 1226 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, 1227 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, 1228 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1229 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, 1230 {AMOVSB, ynone, Pb, opBytes{0xa4}}, 1231 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, 1232 {AMOVSL, ynone, Px, opBytes{0xa5}}, 1233 {AMOVSQ, ynone, Pw, opBytes{0xa5}}, 1234 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, 1235 {AMOVSW, ynone, Pe, opBytes{0xa5}}, 1236 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, 1237 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, 1238 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, 1239 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, 1240 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, 1241 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, 1242 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, 1243 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, 1244 {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, 1245 {AMULL, ydivl, Px, opBytes{0xf7, 04}}, 1246 {AMULPD, yxm, Pe, opBytes{0x59}}, 1247 {AMULPS, yxm, Ym, opBytes{0x59}}, 1248 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, 1249 {AMULSD, yxm, Pf2, opBytes{0x59}}, 1250 {AMULSS, yxm, Pf3, opBytes{0x59}}, 1251 {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, 1252 {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, 1253 {ANEGL, yscond, Px, opBytes{0xf7, 03}}, 1254 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, 1255 {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, 1256 {obj.ANOP, ynop, Px, opBytes{0, 0}}, 1257 {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, 1258 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. 1259 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, 1260 {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, 1261 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, 1262 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1263 {AORPD, yxm, Pq, opBytes{0x56}}, 1264 {AORPS, yxm, Pm, opBytes{0x56}}, 1265 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1266 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1267 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, 1268 {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, 1269 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, 1270 {AOUTSB, ynone, Pb, opBytes{0x6e}}, 1271 {AOUTSL, ynone, Px, opBytes{0x6f}}, 1272 {AOUTSW, ynone, Pe, opBytes{0x6f}}, 1273 {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, 1274 {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, 1275 {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, 1276 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, 1277 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, 1278 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, 1279 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, 1280 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, 1281 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, 1282 {APADDQ, yxm, Pe, opBytes{0xd4}}, 1283 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, 1284 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, 1285 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, 1286 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, 1287 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, 1288 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, 1289 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, 1290 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, 1291 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, 1292 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, 1293 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, 1294 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, 1295 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, 1296 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, 1297 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, 1298 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, 1299 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, 1300 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, 1301 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, 1302 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, 1303 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, 1304 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, 1305 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, 1306 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, 1307 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, 1308 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, 1309 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, 1310 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, 1311 {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, 1312 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, 1313 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, 1314 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, 1315 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, 1316 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, 1317 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, 1318 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, 1319 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, 1320 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, 1321 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, 1322 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, 1323 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, 1324 {APMAXSW, yxm, Pe, opBytes{0xee}}, 1325 {APMAXUB, yxm, Pe, opBytes{0xde}}, 1326 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, 1327 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, 1328 {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, 1329 {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, 1330 {APMINSW, yxm, Pe, opBytes{0xea}}, 1331 {APMINUB, yxm, Pe, opBytes{0xda}}, 1332 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, 1333 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, 1334 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, 1335 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, 1336 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, 1337 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, 1338 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, 1339 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, 1340 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, 1341 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, 1342 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, 1343 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, 1344 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, 1345 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, 1346 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, 1347 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, 1348 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, 1349 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, 1350 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, 1351 {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, 1352 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, 1353 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, 1354 {APOPAL, ynone, P32, opBytes{0x61}}, 1355 {APOPAW, ynone, Pe, opBytes{0x61}}, 1356 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, 1357 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, 1358 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, 1359 {APOPFL, ynone, P32, opBytes{0x9d}}, 1360 {APOPFQ, ynone, Py, opBytes{0x9d}}, 1361 {APOPFW, ynone, Pe, opBytes{0x9d}}, 1362 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, 1363 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, 1364 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, 1365 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, 1366 {APSADBW, yxm, Pq, opBytes{0xf6}}, 1367 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, 1368 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, 1369 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, 1370 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, 1371 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, 1372 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, 1373 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, 1374 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, 1375 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, 1376 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, 1377 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, 1378 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, 1379 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, 1380 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, 1381 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, 1382 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, 1383 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, 1384 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, 1385 {APSUBB, yxm, Pe, opBytes{0xf8}}, 1386 {APSUBL, yxm, Pe, opBytes{0xfa}}, 1387 {APSUBQ, yxm, Pe, opBytes{0xfb}}, 1388 {APSUBSB, yxm, Pe, opBytes{0xe8}}, 1389 {APSUBSW, yxm, Pe, opBytes{0xe9}}, 1390 {APSUBUSB, yxm, Pe, opBytes{0xd8}}, 1391 {APSUBUSW, yxm, Pe, opBytes{0xd9}}, 1392 {APSUBW, yxm, Pe, opBytes{0xf9}}, 1393 {APTEST, yxm_q4, Pq4, opBytes{0x17}}, 1394 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, 1395 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, 1396 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, 1397 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, 1398 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, 1399 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, 1400 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, 1401 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, 1402 {APUSHAL, ynone, P32, opBytes{0x60}}, 1403 {APUSHAW, ynone, Pe, opBytes{0x60}}, 1404 {APUSHFL, ynone, P32, opBytes{0x9c}}, 1405 {APUSHFQ, ynone, Py, opBytes{0x9c}}, 1406 {APUSHFW, ynone, Pe, opBytes{0x9c}}, 1407 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1408 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1409 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1410 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, 1411 {AQUAD, ybyte, Px, opBytes{8}}, 1412 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, 1413 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1414 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1415 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1416 {ARCPPS, yxm, Pm, opBytes{0x53}}, 1417 {ARCPSS, yxm, Pf3, opBytes{0x53}}, 1418 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, 1419 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1420 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1421 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1422 {AREP, ynone, Px, opBytes{0xf3}}, 1423 {AREPN, ynone, Px, opBytes{0xf2}}, 1424 {obj.ARET, ynone, Px, opBytes{0xc3}}, 1425 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, 1426 {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, 1427 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, 1428 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, 1429 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1430 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1431 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1432 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, 1433 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1434 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1435 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1436 {ARSQRTPS, yxm, Pm, opBytes{0x52}}, 1437 {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, 1438 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL 1439 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1440 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1441 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1442 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1443 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, 1444 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1445 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1446 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1447 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, 1448 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1449 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1450 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1451 {ASCASB, ynone, Pb, opBytes{0xae}}, 1452 {ASCASL, ynone, Px, opBytes{0xaf}}, 1453 {ASCASQ, ynone, Pw, opBytes{0xaf}}, 1454 {ASCASW, ynone, Pe, opBytes{0xaf}}, 1455 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, 1456 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, 1457 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, 1458 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, 1459 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, 1460 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, 1461 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, 1462 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, 1463 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, 1464 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, 1465 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, 1466 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, 1467 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, 1468 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, 1469 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, 1470 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, 1471 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1472 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1473 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1474 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1475 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, 1476 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1477 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1478 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1479 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, 1480 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, 1481 {ASQRTPD, yxm, Pe, opBytes{0x51}}, 1482 {ASQRTPS, yxm, Pm, opBytes{0x51}}, 1483 {ASQRTSD, yxm, Pf2, opBytes{0x51}}, 1484 {ASQRTSS, yxm, Pf3, opBytes{0x51}}, 1485 {ASTC, ynone, Px, opBytes{0xf9}}, 1486 {ASTD, ynone, Px, opBytes{0xfd}}, 1487 {ASTI, ynone, Px, opBytes{0xfb}}, 1488 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, 1489 {ASTOSB, ynone, Pb, opBytes{0xaa}}, 1490 {ASTOSL, ynone, Px, opBytes{0xab}}, 1491 {ASTOSQ, ynone, Pw, opBytes{0xab}}, 1492 {ASTOSW, ynone, Pe, opBytes{0xab}}, 1493 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, 1494 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1495 {ASUBPD, yxm, Pe, opBytes{0x5c}}, 1496 {ASUBPS, yxm, Pm, opBytes{0x5c}}, 1497 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1498 {ASUBSD, yxm, Pf2, opBytes{0x5c}}, 1499 {ASUBSS, yxm, Pf3, opBytes{0x5c}}, 1500 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1501 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, 1502 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall 1503 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, 1504 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1505 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1506 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1507 {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}}, 1508 {obj.ATEXT, ytext, Px, opBytes{}}, 1509 {AUCOMISD, yxm, Pe, opBytes{0x2e}}, 1510 {AUCOMISS, yxm, Pm, opBytes{0x2e}}, 1511 {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, 1512 {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, 1513 {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, 1514 {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, 1515 {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}}, 1516 {AVERR, ydivl, Pm, opBytes{0x00, 04}}, 1517 {AVERW, ydivl, Pm, opBytes{0x00, 05}}, 1518 {AWAIT, ynone, Px, opBytes{0x9b}}, 1519 {AWORD, ybyte, Px, opBytes{2}}, 1520 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, 1521 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, 1522 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, 1523 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, 1524 {AXLAT, ynone, Px, opBytes{0xd7}}, 1525 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, 1526 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1527 {AXORPD, yxm, Pe, opBytes{0x57}}, 1528 {AXORPS, yxm, Pm, opBytes{0x57}}, 1529 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1530 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1531 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, 1532 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, 1533 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, 1534 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, 1535 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, 1536 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, 1537 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, 1538 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, 1539 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, 1540 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, 1541 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, 1542 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, 1543 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, 1544 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, 1545 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, 1546 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, 1547 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, 1548 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, 1549 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, 1550 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, 1551 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, 1552 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, 1553 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, 1554 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, 1555 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, 1556 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, 1557 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, 1558 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, 1559 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch 1560 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch 1561 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, 1562 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, 1563 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, 1564 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, 1565 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, 1566 {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, 1567 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, 1568 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, 1569 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, 1570 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, 1571 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, 1572 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, 1573 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, 1574 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, 1575 {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, 1576 {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, 1577 {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, 1578 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, 1579 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, 1580 {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, 1581 {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, 1582 {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, 1583 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, 1584 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, 1585 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, 1586 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, 1587 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, 1588 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, 1589 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, 1590 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, 1591 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, 1592 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, 1593 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, 1594 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, 1595 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, 1596 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, 1597 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, 1598 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, 1599 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, 1600 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, 1601 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, 1602 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, 1603 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, 1604 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, 1605 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, 1606 {AFFREE, nil, 0, opBytes{}}, 1607 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, 1608 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, 1609 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, 1610 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, 1611 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, 1612 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, 1613 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, 1614 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, 1615 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, 1616 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, 1617 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, 1618 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, 1619 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, 1620 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, 1621 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, 1622 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, 1623 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, 1624 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, 1625 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, 1626 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, 1627 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, 1628 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, 1629 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, 1630 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, 1631 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, 1632 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, 1633 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, 1634 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, 1635 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, 1636 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, 1637 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, 1638 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, 1639 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, 1640 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, 1641 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, 1642 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, 1643 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, 1644 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, 1645 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, 1646 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, 1647 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, 1648 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, 1649 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, 1650 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, 1651 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, 1652 {AINVD, ynone, Pm, opBytes{0x08}}, 1653 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, 1654 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, 1655 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, 1656 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, 1657 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, 1658 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, 1659 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, 1660 {ARDMSR, ynone, Pm, opBytes{0x32}}, 1661 {ARDPMC, ynone, Pm, opBytes{0x33}}, 1662 {ARDTSC, ynone, Pm, opBytes{0x31}}, 1663 {ARSM, ynone, Pm, opBytes{0xaa}}, 1664 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, 1665 {ASYSRET, ynone, Pm, opBytes{0x07}}, 1666 {AWBINVD, ynone, Pm, opBytes{0x09}}, 1667 {AWRMSR, ynone, Pm, opBytes{0x30}}, 1668 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, 1669 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, 1670 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, 1671 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, 1672 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, 1673 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, 1674 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1675 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1676 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1677 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, 1678 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, 1679 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, 1680 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, 1681 {AMOVQL, yrl_ml, Px, opBytes{0x89}}, 1682 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, 1683 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, 1684 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, 1685 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, 1686 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, 1687 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, 1688 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, 1689 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, 1690 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, 1691 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, 1692 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, 1693 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, 1694 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, 1695 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, 1696 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, 1697 {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, 1698 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, 1699 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, 1700 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, 1701 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, 1702 {AUD1, ynone, Pm, opBytes{0xb9, 0}}, 1703 {AUD2, ynone, Pm, opBytes{0x0b, 0}}, 1704 {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}}, 1705 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, 1706 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, 1707 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, 1708 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, 1709 {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, 1710 {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, 1711 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, 1712 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, 1713 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1714 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1715 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1716 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, 1717 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, 1718 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, 1719 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, 1720 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, 1721 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, 1722 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, 1723 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, 1724 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, 1725 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, 1726 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, 1727 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, 1728 {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, 1729 {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, 1730 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, 1731 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, 1732 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, 1733 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, 1734 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, 1735 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, 1736 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, 1737 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, 1738 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, 1739 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, 1740 {AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1741 {AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1742 {AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, 1743 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, 1744 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, 1745 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, 1746 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, 1747 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, 1748 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, 1749 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, 1750 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, 1751 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, 1752 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, 1753 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, 1754 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, 1755 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, 1756 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, 1757 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, 1758 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, 1759 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, 1760 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, 1761 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, 1762 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, 1763 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, 1764 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, 1765 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, 1766 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, 1767 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, 1768 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, 1769 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, 1770 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, 1771 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, 1772 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, 1773 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, 1774 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, 1775 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, 1776 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, 1777 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, 1778 {ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}}, 1779 1780 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, 1781 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, 1782 {AXACQUIRE, ynone, Px, opBytes{0xf2}}, 1783 {AXRELEASE, ynone, Px, opBytes{0xf3}}, 1784 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, 1785 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, 1786 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, 1787 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, 1788 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, 1789 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, 1790 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, 1791 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, 1792 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, 1793 1794 {obj.AEND, nil, 0, opBytes{}}, 1795 {0, nil, 0, opBytes{}}, 1796 } 1797 1798 var opindex [(ALAST + 1) & obj.AMask]*Optab 1799 1800 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing. 1801 // This happens on systems like Solaris that call .so functions instead of system calls. 1802 // It does not seem to be necessary for any other systems. This is probably working 1803 // around a Solaris-specific bug that should be fixed differently, but we don't know 1804 // what that bug is. And this does fix it. 1805 func useAbs(ctxt *obj.Link, s *obj.LSym) bool { 1806 if ctxt.Headtype == objabi.Hsolaris { 1807 // All the Solaris dynamic imports from libc.so begin with "libc_". 1808 return strings.HasPrefix(s.Name, "libc_") 1809 } 1810 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared 1811 } 1812 1813 // single-instruction no-ops of various lengths. 1814 // constructed by hand and disassembled with gdb to verify. 1815 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. 1816 var nop = [][16]uint8{ 1817 {0x90}, 1818 {0x66, 0x90}, 1819 {0x0F, 0x1F, 0x00}, 1820 {0x0F, 0x1F, 0x40, 0x00}, 1821 {0x0F, 0x1F, 0x44, 0x00, 0x00}, 1822 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, 1823 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, 1824 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1825 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1826 } 1827 1828 // Native Client rejects the repeated 0x66 prefix. 1829 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1830 func fillnop(p []byte, n int) { 1831 var m int 1832 1833 for n > 0 { 1834 m = n 1835 if m > len(nop) { 1836 m = len(nop) 1837 } 1838 copy(p[:m], nop[m-1][:m]) 1839 p = p[m:] 1840 n -= m 1841 } 1842 } 1843 1844 func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { 1845 s.Grow(int64(c) + int64(pad)) 1846 fillnop(s.P[c:], int(pad)) 1847 return c + pad 1848 } 1849 1850 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { 1851 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { 1852 return l 1853 } 1854 return q 1855 } 1856 1857 // isJump returns whether p is a jump instruction. 1858 // It is used to ensure that no standalone or macro-fused jump will straddle 1859 // or end on a 32 byte boundary by inserting NOPs before the jumps. 1860 func isJump(p *obj.Prog) bool { 1861 return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || 1862 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO 1863 } 1864 1865 // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional 1866 // jump. Otherwise, nil is returned. 1867 func lookForJCC(p *obj.Prog) *obj.Prog { 1868 // Skip any PCDATA, FUNCDATA or NOP instructions 1869 var q *obj.Prog 1870 for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link { 1871 } 1872 1873 if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL { 1874 return nil 1875 } 1876 1877 switch q.As { 1878 case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI, 1879 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT: 1880 default: 1881 return nil 1882 } 1883 1884 return q 1885 } 1886 1887 // fusedJump determines whether p can be fused with a subsequent conditional jump instruction. 1888 // If it can, we return true followed by the total size of the fused jump. If it can't, we return false. 1889 // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2. 1890 func fusedJump(p *obj.Prog) (bool, uint8) { 1891 var fusedSize uint8 1892 1893 // The first instruction in a macro fused pair may be preceded by the LOCK prefix, 1894 // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we 1895 // need to be careful to insert any padding before the locks rather than directly after them. 1896 1897 if p.As == AXRELEASE || p.As == AXACQUIRE { 1898 fusedSize += p.Isize 1899 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1900 } 1901 if p == nil { 1902 return false, 0 1903 } 1904 } 1905 if p.As == ALOCK { 1906 fusedSize += p.Isize 1907 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1908 } 1909 if p == nil { 1910 return false, 0 1911 } 1912 } 1913 cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW 1914 1915 cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ || 1916 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp 1917 1918 testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW || 1919 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW 1920 1921 incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW || 1922 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW 1923 1924 if !cmpAddSub && !testAnd && !incDec { 1925 return false, 0 1926 } 1927 1928 if !incDec { 1929 var argOne obj.AddrType 1930 var argTwo obj.AddrType 1931 if cmp { 1932 argOne = p.From.Type 1933 argTwo = p.To.Type 1934 } else { 1935 argOne = p.To.Type 1936 argTwo = p.From.Type 1937 } 1938 if argOne == obj.TYPE_REG { 1939 if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM { 1940 return false, 0 1941 } 1942 } else if argOne == obj.TYPE_MEM { 1943 if argTwo != obj.TYPE_REG { 1944 return false, 0 1945 } 1946 } else { 1947 return false, 0 1948 } 1949 } 1950 1951 fusedSize += p.Isize 1952 jmp := lookForJCC(p) 1953 if jmp == nil { 1954 return false, 0 1955 } 1956 1957 fusedSize += jmp.Isize 1958 1959 if testAnd { 1960 return true, fusedSize 1961 } 1962 1963 if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI || 1964 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC { 1965 return false, 0 1966 } 1967 1968 if cmpAddSub { 1969 return true, fusedSize 1970 } 1971 1972 if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS { 1973 return false, 0 1974 } 1975 1976 return true, fusedSize 1977 } 1978 1979 type padJumpsCtx int32 1980 1981 func makePjcCtx(ctxt *obj.Link) padJumpsCtx { 1982 // Disable jump padding on 32 bit builds by setting 1983 // padJumps to 0. 1984 if ctxt.Arch.Family == sys.I386 { 1985 return padJumpsCtx(0) 1986 } 1987 1988 // Disable jump padding for hand written assembly code. 1989 if ctxt.IsAsm { 1990 return padJumpsCtx(0) 1991 } 1992 1993 return padJumpsCtx(32) 1994 } 1995 1996 // padJump detects whether the instruction being assembled is a standalone or a macro-fused 1997 // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does 1998 // not cross or end on a 32 byte boundary. 1999 func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 { 2000 if pjc == 0 { 2001 return c 2002 } 2003 2004 var toPad int32 2005 fj, fjSize := fusedJump(p) 2006 mask := int32(pjc - 1) 2007 if fj { 2008 if (c&mask)+int32(fjSize) >= int32(pjc) { 2009 toPad = int32(pjc) - (c & mask) 2010 } 2011 } else if isJump(p) { 2012 if (c&mask)+int32(p.Isize) >= int32(pjc) { 2013 toPad = int32(pjc) - (c & mask) 2014 } 2015 } 2016 if toPad <= 0 { 2017 return c 2018 } 2019 2020 return noppad(ctxt, s, c, toPad) 2021 } 2022 2023 // reAssemble is called if an instruction's size changes during assembly. If 2024 // it does and the instruction is a standalone or a macro-fused jump we need to 2025 // reassemble. 2026 func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool { 2027 if pjc == 0 { 2028 return false 2029 } 2030 2031 fj, _ := fusedJump(p) 2032 return fj || isJump(p) 2033 } 2034 2035 type nopPad struct { 2036 p *obj.Prog // Instruction before the pad 2037 n int32 // Size of the pad 2038 } 2039 2040 // Padding bytes to add to align code as requested. 2041 // Alignment is restricted to powers of 2 between 8 and 2048 inclusive. 2042 // 2043 // pc: current offset in function, in bytes 2044 // a: requested alignment, in bytes 2045 // cursym: current function being assembled 2046 // returns number of bytes of padding needed 2047 func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { 2048 if !((a&(a-1) == 0) && 8 <= a && a <= 2048) { 2049 ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a) 2050 return 0 2051 } 2052 2053 // By default function alignment is 32 bytes for amd64 2054 if cursym.Func().Align < int32(a) { 2055 cursym.Func().Align = int32(a) 2056 } 2057 2058 if pc&(a-1) != 0 { 2059 return int(a - (pc & (a - 1))) 2060 } 2061 2062 return 0 2063 } 2064 2065 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { 2066 if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 { 2067 ctxt.Diag("-spectre=ret not supported on 386") 2068 ctxt.Retpoline = false // don't keep printing 2069 } 2070 2071 pjc := makePjcCtx(ctxt) 2072 2073 if s.P != nil { 2074 return 2075 } 2076 2077 if ycover[0] == 0 { 2078 ctxt.Diag("x86 tables not initialized, call x86.instinit first") 2079 } 2080 2081 for p := s.Func().Text; p != nil; p = p.Link { 2082 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { 2083 p.To.SetTarget(p) 2084 } 2085 if p.As == AADJSP { 2086 p.To.Type = obj.TYPE_REG 2087 p.To.Reg = REG_SP 2088 // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive. 2089 // One exception: It is smaller to encode $-0x80 than $0x80. 2090 // For that case, flip the sign and the op: 2091 // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'. 2092 switch v := p.From.Offset; { 2093 case v == 0: 2094 p.As = obj.ANOP 2095 case v == 0x80 || (v < 0 && v != -0x80): 2096 p.As = spadjop(ctxt, AADDL, AADDQ) 2097 p.From.Offset *= -1 2098 default: 2099 p.As = spadjop(ctxt, ASUBL, ASUBQ) 2100 } 2101 } 2102 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) { 2103 if p.To.Type != obj.TYPE_REG { 2104 ctxt.Diag("non-retpoline-compatible: %v", p) 2105 continue 2106 } 2107 p.To.Type = obj.TYPE_BRANCH 2108 p.To.Name = obj.NAME_EXTERN 2109 p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg))) 2110 p.To.Reg = 0 2111 p.To.Offset = 0 2112 } 2113 } 2114 2115 var count int64 // rough count of number of instructions 2116 for p := s.Func().Text; p != nil; p = p.Link { 2117 count++ 2118 p.Back = branchShort // use short branches first time through 2119 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { 2120 p.Back |= branchBackwards 2121 q.Back |= branchLoopHead 2122 } 2123 } 2124 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction 2125 2126 var ab AsmBuf 2127 var n int 2128 var c int32 2129 errors := ctxt.Errors 2130 var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) 2131 nrelocs0 := len(s.R) 2132 for { 2133 // This loop continues while there are reasons to re-assemble 2134 // whole block, like the presence of long forward jumps. 2135 reAssemble := false 2136 for i := range s.R[nrelocs0:] { 2137 s.R[nrelocs0+i] = obj.Reloc{} 2138 } 2139 s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler 2140 s.P = s.P[:0] 2141 c = 0 2142 var pPrev *obj.Prog 2143 nops = nops[:0] 2144 for p := s.Func().Text; p != nil; p = p.Link { 2145 c0 := c 2146 c = pjc.padJump(ctxt, s, p, c) 2147 2148 if p.As == obj.APCALIGN { 2149 aln := p.From.Offset 2150 v := addpad(int64(c), aln, ctxt, s) 2151 if v > 0 { 2152 s.Grow(int64(c) + int64(v)) 2153 fillnop(s.P[c:], int(v)) 2154 } 2155 2156 c += int32(v) 2157 pPrev = p 2158 continue 2159 } 2160 2161 if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 { 2162 // pad with NOPs 2163 v := -c & (loopAlign - 1) 2164 2165 if v <= maxLoopPad { 2166 s.Grow(int64(c) + int64(v)) 2167 fillnop(s.P[c:], int(v)) 2168 c += v 2169 } 2170 } 2171 2172 p.Pc = int64(c) 2173 2174 // process forward jumps to p 2175 for q := p.Rel; q != nil; q = q.Forwd { 2176 v := int32(p.Pc - (q.Pc + int64(q.Isize))) 2177 if q.Back&branchShort != 0 { 2178 if v > 127 { 2179 reAssemble = true 2180 q.Back ^= branchShort 2181 } 2182 2183 if q.As == AJCXZL || q.As == AXBEGIN { 2184 s.P[q.Pc+2] = byte(v) 2185 } else { 2186 s.P[q.Pc+1] = byte(v) 2187 } 2188 } else { 2189 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) 2190 } 2191 } 2192 2193 p.Rel = nil 2194 2195 p.Pc = int64(c) 2196 ab.asmins(ctxt, s, p) 2197 m := ab.Len() 2198 if int(p.Isize) != m { 2199 p.Isize = uint8(m) 2200 if pjc.reAssemble(p) { 2201 // We need to re-assemble here to check for jumps and fused jumps 2202 // that span or end on 32 byte boundaries. 2203 reAssemble = true 2204 } 2205 } 2206 2207 s.Grow(p.Pc + int64(m)) 2208 copy(s.P[p.Pc:], ab.Bytes()) 2209 // If there was padding, remember it. 2210 if pPrev != nil && !ctxt.IsAsm && c > c0 { 2211 nops = append(nops, nopPad{p: pPrev, n: c - c0}) 2212 } 2213 c += int32(m) 2214 pPrev = p 2215 } 2216 2217 n++ 2218 if n > 1000 { 2219 ctxt.Diag("span must be looping") 2220 log.Fatalf("loop") 2221 } 2222 if !reAssemble { 2223 break 2224 } 2225 if ctxt.Errors > errors { 2226 return 2227 } 2228 } 2229 // splice padding nops into Progs 2230 for _, n := range nops { 2231 pp := n.p 2232 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)} 2233 pp.Link = np 2234 } 2235 2236 s.Size = int64(c) 2237 2238 if false { /* debug['a'] > 1 */ 2239 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) 2240 var i int 2241 for i = 0; i < len(s.P); i++ { 2242 fmt.Printf(" %.2x", s.P[i]) 2243 if i%16 == 15 { 2244 fmt.Printf("\n %.6x", uint(i+1)) 2245 } 2246 } 2247 2248 if i%16 != 0 { 2249 fmt.Printf("\n") 2250 } 2251 2252 for i := 0; i < len(s.R); i++ { 2253 r := &s.R[i] 2254 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) 2255 } 2256 } 2257 2258 // Mark nonpreemptible instruction sequences. 2259 // The 2-instruction TLS access sequence 2260 // MOVQ TLS, BX 2261 // MOVQ 0(BX)(TLS*1), BX 2262 // is not async preemptible, as if it is preempted and resumed on 2263 // a different thread, the TLS address may become invalid. 2264 if !CanUse1InsnTLS(ctxt) { 2265 useTLS := func(p *obj.Prog) bool { 2266 // Only need to mark the second instruction, which has 2267 // REG_TLS as Index. (It is okay to interrupt and restart 2268 // the first instruction.) 2269 return p.From.Index == REG_TLS 2270 } 2271 obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) 2272 } 2273 2274 // Now that we know byte offsets, we can generate jump table entries. 2275 // TODO: could this live in obj instead of obj/$ARCH? 2276 for _, jt := range s.Func().JumpTables { 2277 for i, p := range jt.Targets { 2278 // The ith jumptable entry points to the p.Pc'th 2279 // byte in the function symbol s. 2280 jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc) 2281 } 2282 } 2283 } 2284 2285 func instinit(ctxt *obj.Link) { 2286 if ycover[0] != 0 { 2287 // Already initialized; stop now. 2288 // This happens in the cmd/asm tests, 2289 // each of which re-initializes the arch. 2290 return 2291 } 2292 2293 switch ctxt.Headtype { 2294 case objabi.Hplan9: 2295 plan9privates = ctxt.Lookup("_privates") 2296 } 2297 2298 for i := range avxOptab { 2299 c := avxOptab[i].as 2300 if opindex[c&obj.AMask] != nil { 2301 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) 2302 } 2303 opindex[c&obj.AMask] = &avxOptab[i] 2304 } 2305 for i := 1; optab[i].as != 0; i++ { 2306 c := optab[i].as 2307 if opindex[c&obj.AMask] != nil { 2308 ctxt.Diag("phase error in optab: %d (%v)", i, c) 2309 } 2310 opindex[c&obj.AMask] = &optab[i] 2311 } 2312 2313 for i := 0; i < Ymax; i++ { 2314 ycover[i*Ymax+i] = 1 2315 } 2316 2317 ycover[Yi0*Ymax+Yu2] = 1 2318 ycover[Yi1*Ymax+Yu2] = 1 2319 2320 ycover[Yi0*Ymax+Yi8] = 1 2321 ycover[Yi1*Ymax+Yi8] = 1 2322 ycover[Yu2*Ymax+Yi8] = 1 2323 ycover[Yu7*Ymax+Yi8] = 1 2324 2325 ycover[Yi0*Ymax+Yu7] = 1 2326 ycover[Yi1*Ymax+Yu7] = 1 2327 ycover[Yu2*Ymax+Yu7] = 1 2328 2329 ycover[Yi0*Ymax+Yu8] = 1 2330 ycover[Yi1*Ymax+Yu8] = 1 2331 ycover[Yu2*Ymax+Yu8] = 1 2332 ycover[Yu7*Ymax+Yu8] = 1 2333 2334 ycover[Yi0*Ymax+Ys32] = 1 2335 ycover[Yi1*Ymax+Ys32] = 1 2336 ycover[Yu2*Ymax+Ys32] = 1 2337 ycover[Yu7*Ymax+Ys32] = 1 2338 ycover[Yu8*Ymax+Ys32] = 1 2339 ycover[Yi8*Ymax+Ys32] = 1 2340 2341 ycover[Yi0*Ymax+Yi32] = 1 2342 ycover[Yi1*Ymax+Yi32] = 1 2343 ycover[Yu2*Ymax+Yi32] = 1 2344 ycover[Yu7*Ymax+Yi32] = 1 2345 ycover[Yu8*Ymax+Yi32] = 1 2346 ycover[Yi8*Ymax+Yi32] = 1 2347 ycover[Ys32*Ymax+Yi32] = 1 2348 2349 ycover[Yi0*Ymax+Yi64] = 1 2350 ycover[Yi1*Ymax+Yi64] = 1 2351 ycover[Yu7*Ymax+Yi64] = 1 2352 ycover[Yu2*Ymax+Yi64] = 1 2353 ycover[Yu8*Ymax+Yi64] = 1 2354 ycover[Yi8*Ymax+Yi64] = 1 2355 ycover[Ys32*Ymax+Yi64] = 1 2356 ycover[Yi32*Ymax+Yi64] = 1 2357 2358 ycover[Yal*Ymax+Yrb] = 1 2359 ycover[Ycl*Ymax+Yrb] = 1 2360 ycover[Yax*Ymax+Yrb] = 1 2361 ycover[Ycx*Ymax+Yrb] = 1 2362 ycover[Yrx*Ymax+Yrb] = 1 2363 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 2364 2365 ycover[Ycl*Ymax+Ycx] = 1 2366 2367 ycover[Yax*Ymax+Yrx] = 1 2368 ycover[Ycx*Ymax+Yrx] = 1 2369 2370 ycover[Yax*Ymax+Yrl] = 1 2371 ycover[Ycx*Ymax+Yrl] = 1 2372 ycover[Yrx*Ymax+Yrl] = 1 2373 ycover[Yrl32*Ymax+Yrl] = 1 2374 2375 ycover[Yf0*Ymax+Yrf] = 1 2376 2377 ycover[Yal*Ymax+Ymb] = 1 2378 ycover[Ycl*Ymax+Ymb] = 1 2379 ycover[Yax*Ymax+Ymb] = 1 2380 ycover[Ycx*Ymax+Ymb] = 1 2381 ycover[Yrx*Ymax+Ymb] = 1 2382 ycover[Yrb*Ymax+Ymb] = 1 2383 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 2384 ycover[Ym*Ymax+Ymb] = 1 2385 2386 ycover[Yax*Ymax+Yml] = 1 2387 ycover[Ycx*Ymax+Yml] = 1 2388 ycover[Yrx*Ymax+Yml] = 1 2389 ycover[Yrl*Ymax+Yml] = 1 2390 ycover[Yrl32*Ymax+Yml] = 1 2391 ycover[Ym*Ymax+Yml] = 1 2392 2393 ycover[Yax*Ymax+Ymm] = 1 2394 ycover[Ycx*Ymax+Ymm] = 1 2395 ycover[Yrx*Ymax+Ymm] = 1 2396 ycover[Yrl*Ymax+Ymm] = 1 2397 ycover[Yrl32*Ymax+Ymm] = 1 2398 ycover[Ym*Ymax+Ymm] = 1 2399 ycover[Ymr*Ymax+Ymm] = 1 2400 2401 ycover[Yxr0*Ymax+Yxr] = 1 2402 2403 ycover[Ym*Ymax+Yxm] = 1 2404 ycover[Yxr0*Ymax+Yxm] = 1 2405 ycover[Yxr*Ymax+Yxm] = 1 2406 2407 ycover[Ym*Ymax+Yym] = 1 2408 ycover[Yyr*Ymax+Yym] = 1 2409 2410 ycover[Yxr0*Ymax+YxrEvex] = 1 2411 ycover[Yxr*Ymax+YxrEvex] = 1 2412 2413 ycover[Ym*Ymax+YxmEvex] = 1 2414 ycover[Yxr0*Ymax+YxmEvex] = 1 2415 ycover[Yxr*Ymax+YxmEvex] = 1 2416 ycover[YxrEvex*Ymax+YxmEvex] = 1 2417 2418 ycover[Yyr*Ymax+YyrEvex] = 1 2419 2420 ycover[Ym*Ymax+YymEvex] = 1 2421 ycover[Yyr*Ymax+YymEvex] = 1 2422 ycover[YyrEvex*Ymax+YymEvex] = 1 2423 2424 ycover[Ym*Ymax+Yzm] = 1 2425 ycover[Yzr*Ymax+Yzm] = 1 2426 2427 ycover[Yk0*Ymax+Yk] = 1 2428 ycover[Yknot0*Ymax+Yk] = 1 2429 2430 ycover[Yk0*Ymax+Ykm] = 1 2431 ycover[Yknot0*Ymax+Ykm] = 1 2432 ycover[Yk*Ymax+Ykm] = 1 2433 ycover[Ym*Ymax+Ykm] = 1 2434 2435 ycover[Yxvm*Ymax+YxvmEvex] = 1 2436 2437 ycover[Yyvm*Ymax+YyvmEvex] = 1 2438 2439 for i := 0; i < MAXREG; i++ { 2440 reg[i] = -1 2441 if i >= REG_AL && i <= REG_R15B { 2442 reg[i] = (i - REG_AL) & 7 2443 if i >= REG_SPB && i <= REG_DIB { 2444 regrex[i] = 0x40 2445 } 2446 if i >= REG_R8B && i <= REG_R15B { 2447 regrex[i] = Rxr | Rxx | Rxb 2448 } 2449 } 2450 2451 if i >= REG_AH && i <= REG_BH { 2452 reg[i] = 4 + ((i - REG_AH) & 7) 2453 } 2454 if i >= REG_AX && i <= REG_R15 { 2455 reg[i] = (i - REG_AX) & 7 2456 if i >= REG_R8 { 2457 regrex[i] = Rxr | Rxx | Rxb 2458 } 2459 } 2460 2461 if i >= REG_F0 && i <= REG_F0+7 { 2462 reg[i] = (i - REG_F0) & 7 2463 } 2464 if i >= REG_M0 && i <= REG_M0+7 { 2465 reg[i] = (i - REG_M0) & 7 2466 } 2467 if i >= REG_K0 && i <= REG_K0+7 { 2468 reg[i] = (i - REG_K0) & 7 2469 } 2470 if i >= REG_X0 && i <= REG_X0+15 { 2471 reg[i] = (i - REG_X0) & 7 2472 if i >= REG_X0+8 { 2473 regrex[i] = Rxr | Rxx | Rxb 2474 } 2475 } 2476 if i >= REG_X16 && i <= REG_X16+15 { 2477 reg[i] = (i - REG_X16) & 7 2478 if i >= REG_X16+8 { 2479 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2480 } else { 2481 regrex[i] = RxrEvex 2482 } 2483 } 2484 if i >= REG_Y0 && i <= REG_Y0+15 { 2485 reg[i] = (i - REG_Y0) & 7 2486 if i >= REG_Y0+8 { 2487 regrex[i] = Rxr | Rxx | Rxb 2488 } 2489 } 2490 if i >= REG_Y16 && i <= REG_Y16+15 { 2491 reg[i] = (i - REG_Y16) & 7 2492 if i >= REG_Y16+8 { 2493 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2494 } else { 2495 regrex[i] = RxrEvex 2496 } 2497 } 2498 if i >= REG_Z0 && i <= REG_Z0+15 { 2499 reg[i] = (i - REG_Z0) & 7 2500 if i > REG_Z0+7 { 2501 regrex[i] = Rxr | Rxx | Rxb 2502 } 2503 } 2504 if i >= REG_Z16 && i <= REG_Z16+15 { 2505 reg[i] = (i - REG_Z16) & 7 2506 if i >= REG_Z16+8 { 2507 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2508 } else { 2509 regrex[i] = RxrEvex 2510 } 2511 } 2512 2513 if i >= REG_CR+8 && i <= REG_CR+15 { 2514 regrex[i] = Rxr 2515 } 2516 } 2517 } 2518 2519 var isAndroid = buildcfg.GOOS == "android" 2520 2521 func prefixof(ctxt *obj.Link, a *obj.Addr) int { 2522 if a.Reg < REG_CS && a.Index < REG_CS { // fast path 2523 return 0 2524 } 2525 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 2526 switch a.Reg { 2527 case REG_CS: 2528 return 0x2e 2529 2530 case REG_DS: 2531 return 0x3e 2532 2533 case REG_ES: 2534 return 0x26 2535 2536 case REG_FS: 2537 return 0x64 2538 2539 case REG_GS: 2540 return 0x65 2541 2542 case REG_TLS: 2543 // NOTE: Systems listed here should be only systems that 2544 // support direct TLS references like 8(TLS) implemented as 2545 // direct references from FS or GS. Systems that require 2546 // the initial-exec model, where you load the TLS base into 2547 // a register and then index from that register, do not reach 2548 // this code and should not be listed. 2549 if ctxt.Arch.Family == sys.I386 { 2550 switch ctxt.Headtype { 2551 default: 2552 if isAndroid { 2553 return 0x65 // GS 2554 } 2555 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2556 2557 case objabi.Hdarwin, 2558 objabi.Hdragonfly, 2559 objabi.Hfreebsd, 2560 objabi.Hnetbsd, 2561 objabi.Hopenbsd: 2562 return 0x65 // GS 2563 } 2564 } 2565 2566 switch ctxt.Headtype { 2567 default: 2568 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2569 2570 case objabi.Hlinux: 2571 if isAndroid { 2572 return 0x64 // FS 2573 } 2574 2575 if ctxt.Flag_shared { 2576 log.Fatalf("unknown TLS base register for linux with -shared") 2577 } else { 2578 return 0x64 // FS 2579 } 2580 2581 case objabi.Hdragonfly, 2582 objabi.Hfreebsd, 2583 objabi.Hnetbsd, 2584 objabi.Hopenbsd, 2585 objabi.Hsolaris: 2586 return 0x64 // FS 2587 2588 case objabi.Hdarwin: 2589 return 0x65 // GS 2590 } 2591 } 2592 } 2593 2594 switch a.Index { 2595 case REG_CS: 2596 return 0x2e 2597 2598 case REG_DS: 2599 return 0x3e 2600 2601 case REG_ES: 2602 return 0x26 2603 2604 case REG_TLS: 2605 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { 2606 // When building for inclusion into a shared library, an instruction of the form 2607 // MOV off(CX)(TLS*1), AX 2608 // becomes 2609 // mov %gs:off(%ecx), %eax // on i386 2610 // mov %fs:off(%rcx), %rax // on amd64 2611 // which assumes that the correct TLS offset has been loaded into CX (today 2612 // there is only one TLS variable -- g -- so this is OK). When not building for 2613 // a shared library the instruction it becomes 2614 // mov 0x0(%ecx), %eax // on i386 2615 // mov 0x0(%rcx), %rax // on amd64 2616 // and a R_TLS_LE relocation, and so does not require a prefix. 2617 if ctxt.Arch.Family == sys.I386 { 2618 return 0x65 // GS 2619 } 2620 return 0x64 // FS 2621 } 2622 2623 case REG_FS: 2624 return 0x64 2625 2626 case REG_GS: 2627 return 0x65 2628 } 2629 2630 return 0 2631 } 2632 2633 // oclassRegList returns multisource operand class for addr. 2634 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { 2635 // TODO(quasilyte): when oclass register case is refactored into 2636 // lookup table, use it here to get register kind more easily. 2637 // Helper functions like regIsXmm should go away too (they will become redundant). 2638 2639 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } 2640 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } 2641 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } 2642 2643 reg0, reg1 := decodeRegisterRange(addr.Offset) 2644 low := regIndex(int16(reg0)) 2645 high := regIndex(int16(reg1)) 2646 2647 if ctxt.Arch.Family == sys.I386 { 2648 if low >= 8 || high >= 8 { 2649 return Yxxx 2650 } 2651 } 2652 2653 switch high - low { 2654 case 3: 2655 switch { 2656 case regIsXmm(reg0) && regIsXmm(reg1): 2657 return YxrEvexMulti4 2658 case regIsYmm(reg0) && regIsYmm(reg1): 2659 return YyrEvexMulti4 2660 case regIsZmm(reg0) && regIsZmm(reg1): 2661 return YzrMulti4 2662 default: 2663 return Yxxx 2664 } 2665 default: 2666 return Yxxx 2667 } 2668 } 2669 2670 // oclassVMem returns V-mem (vector memory with VSIB) operand class. 2671 // For addr that is not V-mem returns (Yxxx, false). 2672 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { 2673 switch addr.Index { 2674 case REG_X0 + 0, 2675 REG_X0 + 1, 2676 REG_X0 + 2, 2677 REG_X0 + 3, 2678 REG_X0 + 4, 2679 REG_X0 + 5, 2680 REG_X0 + 6, 2681 REG_X0 + 7: 2682 return Yxvm, true 2683 case REG_X8 + 0, 2684 REG_X8 + 1, 2685 REG_X8 + 2, 2686 REG_X8 + 3, 2687 REG_X8 + 4, 2688 REG_X8 + 5, 2689 REG_X8 + 6, 2690 REG_X8 + 7: 2691 if ctxt.Arch.Family == sys.I386 { 2692 return Yxxx, true 2693 } 2694 return Yxvm, true 2695 case REG_X16 + 0, 2696 REG_X16 + 1, 2697 REG_X16 + 2, 2698 REG_X16 + 3, 2699 REG_X16 + 4, 2700 REG_X16 + 5, 2701 REG_X16 + 6, 2702 REG_X16 + 7, 2703 REG_X16 + 8, 2704 REG_X16 + 9, 2705 REG_X16 + 10, 2706 REG_X16 + 11, 2707 REG_X16 + 12, 2708 REG_X16 + 13, 2709 REG_X16 + 14, 2710 REG_X16 + 15: 2711 if ctxt.Arch.Family == sys.I386 { 2712 return Yxxx, true 2713 } 2714 return YxvmEvex, true 2715 2716 case REG_Y0 + 0, 2717 REG_Y0 + 1, 2718 REG_Y0 + 2, 2719 REG_Y0 + 3, 2720 REG_Y0 + 4, 2721 REG_Y0 + 5, 2722 REG_Y0 + 6, 2723 REG_Y0 + 7: 2724 return Yyvm, true 2725 case REG_Y8 + 0, 2726 REG_Y8 + 1, 2727 REG_Y8 + 2, 2728 REG_Y8 + 3, 2729 REG_Y8 + 4, 2730 REG_Y8 + 5, 2731 REG_Y8 + 6, 2732 REG_Y8 + 7: 2733 if ctxt.Arch.Family == sys.I386 { 2734 return Yxxx, true 2735 } 2736 return Yyvm, true 2737 case REG_Y16 + 0, 2738 REG_Y16 + 1, 2739 REG_Y16 + 2, 2740 REG_Y16 + 3, 2741 REG_Y16 + 4, 2742 REG_Y16 + 5, 2743 REG_Y16 + 6, 2744 REG_Y16 + 7, 2745 REG_Y16 + 8, 2746 REG_Y16 + 9, 2747 REG_Y16 + 10, 2748 REG_Y16 + 11, 2749 REG_Y16 + 12, 2750 REG_Y16 + 13, 2751 REG_Y16 + 14, 2752 REG_Y16 + 15: 2753 if ctxt.Arch.Family == sys.I386 { 2754 return Yxxx, true 2755 } 2756 return YyvmEvex, true 2757 2758 case REG_Z0 + 0, 2759 REG_Z0 + 1, 2760 REG_Z0 + 2, 2761 REG_Z0 + 3, 2762 REG_Z0 + 4, 2763 REG_Z0 + 5, 2764 REG_Z0 + 6, 2765 REG_Z0 + 7: 2766 return Yzvm, true 2767 case REG_Z8 + 0, 2768 REG_Z8 + 1, 2769 REG_Z8 + 2, 2770 REG_Z8 + 3, 2771 REG_Z8 + 4, 2772 REG_Z8 + 5, 2773 REG_Z8 + 6, 2774 REG_Z8 + 7, 2775 REG_Z8 + 8, 2776 REG_Z8 + 9, 2777 REG_Z8 + 10, 2778 REG_Z8 + 11, 2779 REG_Z8 + 12, 2780 REG_Z8 + 13, 2781 REG_Z8 + 14, 2782 REG_Z8 + 15, 2783 REG_Z8 + 16, 2784 REG_Z8 + 17, 2785 REG_Z8 + 18, 2786 REG_Z8 + 19, 2787 REG_Z8 + 20, 2788 REG_Z8 + 21, 2789 REG_Z8 + 22, 2790 REG_Z8 + 23: 2791 if ctxt.Arch.Family == sys.I386 { 2792 return Yxxx, true 2793 } 2794 return Yzvm, true 2795 } 2796 2797 return Yxxx, false 2798 } 2799 2800 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { 2801 switch a.Type { 2802 case obj.TYPE_REGLIST: 2803 return oclassRegList(ctxt, a) 2804 2805 case obj.TYPE_NONE: 2806 return Ynone 2807 2808 case obj.TYPE_BRANCH: 2809 return Ybr 2810 2811 case obj.TYPE_INDIR: 2812 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { 2813 return Yindir 2814 } 2815 return Yxxx 2816 2817 case obj.TYPE_MEM: 2818 // Pseudo registers have negative index, but SP is 2819 // not pseudo on x86, hence REG_SP check is not redundant. 2820 if a.Index == REG_SP || a.Index < 0 { 2821 // Can't use FP/SB/PC/SP as the index register. 2822 return Yxxx 2823 } 2824 2825 if vmem, ok := oclassVMem(ctxt, a); ok { 2826 return vmem 2827 } 2828 2829 if ctxt.Arch.Family == sys.AMD64 { 2830 switch a.Name { 2831 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: 2832 // Global variables can't use index registers and their 2833 // base register is %rip (%rip is encoded as REG_NONE). 2834 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { 2835 return Yxxx 2836 } 2837 case obj.NAME_AUTO, obj.NAME_PARAM: 2838 // These names must have a base of SP. The old compiler 2839 // uses 0 for the base register. SSA uses REG_SP. 2840 if a.Reg != REG_SP && a.Reg != 0 { 2841 return Yxxx 2842 } 2843 case obj.NAME_NONE: 2844 // everything is ok 2845 default: 2846 // unknown name 2847 return Yxxx 2848 } 2849 } 2850 return Ym 2851 2852 case obj.TYPE_ADDR: 2853 switch a.Name { 2854 case obj.NAME_GOTREF: 2855 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") 2856 return Yxxx 2857 2858 case obj.NAME_EXTERN, 2859 obj.NAME_STATIC: 2860 if a.Sym != nil && useAbs(ctxt, a.Sym) { 2861 return Yi32 2862 } 2863 return Yiauto // use pc-relative addressing 2864 2865 case obj.NAME_AUTO, 2866 obj.NAME_PARAM: 2867 return Yiauto 2868 } 2869 2870 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index 2871 // and got Yi32 in an earlier version of this code. 2872 // Keep doing that until we fix yduff etc. 2873 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { 2874 return Yi32 2875 } 2876 2877 if a.Sym != nil || a.Name != obj.NAME_NONE { 2878 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) 2879 } 2880 fallthrough 2881 2882 case obj.TYPE_CONST: 2883 if a.Sym != nil { 2884 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) 2885 } 2886 2887 v := a.Offset 2888 if ctxt.Arch.Family == sys.I386 { 2889 v = int64(int32(v)) 2890 } 2891 switch { 2892 case v == 0: 2893 return Yi0 2894 case v == 1: 2895 return Yi1 2896 case v >= 0 && v <= 3: 2897 return Yu2 2898 case v >= 0 && v <= 127: 2899 return Yu7 2900 case v >= 0 && v <= 255: 2901 return Yu8 2902 case v >= -128 && v <= 127: 2903 return Yi8 2904 } 2905 if ctxt.Arch.Family == sys.I386 { 2906 return Yi32 2907 } 2908 l := int32(v) 2909 if int64(l) == v { 2910 return Ys32 // can sign extend 2911 } 2912 if v>>32 == 0 { 2913 return Yi32 // unsigned 2914 } 2915 return Yi64 2916 2917 case obj.TYPE_TEXTSIZE: 2918 return Ytextsize 2919 } 2920 2921 if a.Type != obj.TYPE_REG { 2922 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) 2923 return Yxxx 2924 } 2925 2926 switch a.Reg { 2927 case REG_AL: 2928 return Yal 2929 2930 case REG_AX: 2931 return Yax 2932 2933 /* 2934 case REG_SPB: 2935 */ 2936 case REG_BPB, 2937 REG_SIB, 2938 REG_DIB, 2939 REG_R8B, 2940 REG_R9B, 2941 REG_R10B, 2942 REG_R11B, 2943 REG_R12B, 2944 REG_R13B, 2945 REG_R14B, 2946 REG_R15B: 2947 if ctxt.Arch.Family == sys.I386 { 2948 return Yxxx 2949 } 2950 fallthrough 2951 2952 case REG_DL, 2953 REG_BL, 2954 REG_AH, 2955 REG_CH, 2956 REG_DH, 2957 REG_BH: 2958 return Yrb 2959 2960 case REG_CL: 2961 return Ycl 2962 2963 case REG_CX: 2964 return Ycx 2965 2966 case REG_DX, REG_BX: 2967 return Yrx 2968 2969 case REG_R8, // not really Yrl 2970 REG_R9, 2971 REG_R10, 2972 REG_R11, 2973 REG_R12, 2974 REG_R13, 2975 REG_R14, 2976 REG_R15: 2977 if ctxt.Arch.Family == sys.I386 { 2978 return Yxxx 2979 } 2980 fallthrough 2981 2982 case REG_SP, REG_BP, REG_SI, REG_DI: 2983 if ctxt.Arch.Family == sys.I386 { 2984 return Yrl32 2985 } 2986 return Yrl 2987 2988 case REG_F0 + 0: 2989 return Yf0 2990 2991 case REG_F0 + 1, 2992 REG_F0 + 2, 2993 REG_F0 + 3, 2994 REG_F0 + 4, 2995 REG_F0 + 5, 2996 REG_F0 + 6, 2997 REG_F0 + 7: 2998 return Yrf 2999 3000 case REG_M0 + 0, 3001 REG_M0 + 1, 3002 REG_M0 + 2, 3003 REG_M0 + 3, 3004 REG_M0 + 4, 3005 REG_M0 + 5, 3006 REG_M0 + 6, 3007 REG_M0 + 7: 3008 return Ymr 3009 3010 case REG_X0: 3011 return Yxr0 3012 3013 case REG_X0 + 1, 3014 REG_X0 + 2, 3015 REG_X0 + 3, 3016 REG_X0 + 4, 3017 REG_X0 + 5, 3018 REG_X0 + 6, 3019 REG_X0 + 7, 3020 REG_X0 + 8, 3021 REG_X0 + 9, 3022 REG_X0 + 10, 3023 REG_X0 + 11, 3024 REG_X0 + 12, 3025 REG_X0 + 13, 3026 REG_X0 + 14, 3027 REG_X0 + 15: 3028 return Yxr 3029 3030 case REG_X0 + 16, 3031 REG_X0 + 17, 3032 REG_X0 + 18, 3033 REG_X0 + 19, 3034 REG_X0 + 20, 3035 REG_X0 + 21, 3036 REG_X0 + 22, 3037 REG_X0 + 23, 3038 REG_X0 + 24, 3039 REG_X0 + 25, 3040 REG_X0 + 26, 3041 REG_X0 + 27, 3042 REG_X0 + 28, 3043 REG_X0 + 29, 3044 REG_X0 + 30, 3045 REG_X0 + 31: 3046 return YxrEvex 3047 3048 case REG_Y0 + 0, 3049 REG_Y0 + 1, 3050 REG_Y0 + 2, 3051 REG_Y0 + 3, 3052 REG_Y0 + 4, 3053 REG_Y0 + 5, 3054 REG_Y0 + 6, 3055 REG_Y0 + 7, 3056 REG_Y0 + 8, 3057 REG_Y0 + 9, 3058 REG_Y0 + 10, 3059 REG_Y0 + 11, 3060 REG_Y0 + 12, 3061 REG_Y0 + 13, 3062 REG_Y0 + 14, 3063 REG_Y0 + 15: 3064 return Yyr 3065 3066 case REG_Y0 + 16, 3067 REG_Y0 + 17, 3068 REG_Y0 + 18, 3069 REG_Y0 + 19, 3070 REG_Y0 + 20, 3071 REG_Y0 + 21, 3072 REG_Y0 + 22, 3073 REG_Y0 + 23, 3074 REG_Y0 + 24, 3075 REG_Y0 + 25, 3076 REG_Y0 + 26, 3077 REG_Y0 + 27, 3078 REG_Y0 + 28, 3079 REG_Y0 + 29, 3080 REG_Y0 + 30, 3081 REG_Y0 + 31: 3082 return YyrEvex 3083 3084 case REG_Z0 + 0, 3085 REG_Z0 + 1, 3086 REG_Z0 + 2, 3087 REG_Z0 + 3, 3088 REG_Z0 + 4, 3089 REG_Z0 + 5, 3090 REG_Z0 + 6, 3091 REG_Z0 + 7: 3092 return Yzr 3093 3094 case REG_Z0 + 8, 3095 REG_Z0 + 9, 3096 REG_Z0 + 10, 3097 REG_Z0 + 11, 3098 REG_Z0 + 12, 3099 REG_Z0 + 13, 3100 REG_Z0 + 14, 3101 REG_Z0 + 15, 3102 REG_Z0 + 16, 3103 REG_Z0 + 17, 3104 REG_Z0 + 18, 3105 REG_Z0 + 19, 3106 REG_Z0 + 20, 3107 REG_Z0 + 21, 3108 REG_Z0 + 22, 3109 REG_Z0 + 23, 3110 REG_Z0 + 24, 3111 REG_Z0 + 25, 3112 REG_Z0 + 26, 3113 REG_Z0 + 27, 3114 REG_Z0 + 28, 3115 REG_Z0 + 29, 3116 REG_Z0 + 30, 3117 REG_Z0 + 31: 3118 if ctxt.Arch.Family == sys.I386 { 3119 return Yxxx 3120 } 3121 return Yzr 3122 3123 case REG_K0: 3124 return Yk0 3125 3126 case REG_K0 + 1, 3127 REG_K0 + 2, 3128 REG_K0 + 3, 3129 REG_K0 + 4, 3130 REG_K0 + 5, 3131 REG_K0 + 6, 3132 REG_K0 + 7: 3133 return Yknot0 3134 3135 case REG_CS: 3136 return Ycs 3137 case REG_SS: 3138 return Yss 3139 case REG_DS: 3140 return Yds 3141 case REG_ES: 3142 return Yes 3143 case REG_FS: 3144 return Yfs 3145 case REG_GS: 3146 return Ygs 3147 case REG_TLS: 3148 return Ytls 3149 3150 case REG_GDTR: 3151 return Ygdtr 3152 case REG_IDTR: 3153 return Yidtr 3154 case REG_LDTR: 3155 return Yldtr 3156 case REG_MSW: 3157 return Ymsw 3158 case REG_TASK: 3159 return Ytask 3160 3161 case REG_CR + 0: 3162 return Ycr0 3163 case REG_CR + 1: 3164 return Ycr1 3165 case REG_CR + 2: 3166 return Ycr2 3167 case REG_CR + 3: 3168 return Ycr3 3169 case REG_CR + 4: 3170 return Ycr4 3171 case REG_CR + 5: 3172 return Ycr5 3173 case REG_CR + 6: 3174 return Ycr6 3175 case REG_CR + 7: 3176 return Ycr7 3177 case REG_CR + 8: 3178 return Ycr8 3179 3180 case REG_DR + 0: 3181 return Ydr0 3182 case REG_DR + 1: 3183 return Ydr1 3184 case REG_DR + 2: 3185 return Ydr2 3186 case REG_DR + 3: 3187 return Ydr3 3188 case REG_DR + 4: 3189 return Ydr4 3190 case REG_DR + 5: 3191 return Ydr5 3192 case REG_DR + 6: 3193 return Ydr6 3194 case REG_DR + 7: 3195 return Ydr7 3196 3197 case REG_TR + 0: 3198 return Ytr0 3199 case REG_TR + 1: 3200 return Ytr1 3201 case REG_TR + 2: 3202 return Ytr2 3203 case REG_TR + 3: 3204 return Ytr3 3205 case REG_TR + 4: 3206 return Ytr4 3207 case REG_TR + 5: 3208 return Ytr5 3209 case REG_TR + 6: 3210 return Ytr6 3211 case REG_TR + 7: 3212 return Ytr7 3213 } 3214 3215 return Yxxx 3216 } 3217 3218 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into 3219 // and hold assembly state. 3220 type AsmBuf struct { 3221 buf [100]byte 3222 off int 3223 rexflag int 3224 vexflag bool // Per inst: true for VEX-encoded 3225 evexflag bool // Per inst: true for EVEX-encoded 3226 rep bool 3227 repn bool 3228 lock bool 3229 3230 evex evexBits // Initialized when evexflag is true 3231 } 3232 3233 // Put1 appends one byte to the end of the buffer. 3234 func (ab *AsmBuf) Put1(x byte) { 3235 ab.buf[ab.off] = x 3236 ab.off++ 3237 } 3238 3239 // Put2 appends two bytes to the end of the buffer. 3240 func (ab *AsmBuf) Put2(x, y byte) { 3241 ab.buf[ab.off+0] = x 3242 ab.buf[ab.off+1] = y 3243 ab.off += 2 3244 } 3245 3246 // Put3 appends three bytes to the end of the buffer. 3247 func (ab *AsmBuf) Put3(x, y, z byte) { 3248 ab.buf[ab.off+0] = x 3249 ab.buf[ab.off+1] = y 3250 ab.buf[ab.off+2] = z 3251 ab.off += 3 3252 } 3253 3254 // Put4 appends four bytes to the end of the buffer. 3255 func (ab *AsmBuf) Put4(x, y, z, w byte) { 3256 ab.buf[ab.off+0] = x 3257 ab.buf[ab.off+1] = y 3258 ab.buf[ab.off+2] = z 3259 ab.buf[ab.off+3] = w 3260 ab.off += 4 3261 } 3262 3263 // PutInt16 writes v into the buffer using little-endian encoding. 3264 func (ab *AsmBuf) PutInt16(v int16) { 3265 ab.buf[ab.off+0] = byte(v) 3266 ab.buf[ab.off+1] = byte(v >> 8) 3267 ab.off += 2 3268 } 3269 3270 // PutInt32 writes v into the buffer using little-endian encoding. 3271 func (ab *AsmBuf) PutInt32(v int32) { 3272 ab.buf[ab.off+0] = byte(v) 3273 ab.buf[ab.off+1] = byte(v >> 8) 3274 ab.buf[ab.off+2] = byte(v >> 16) 3275 ab.buf[ab.off+3] = byte(v >> 24) 3276 ab.off += 4 3277 } 3278 3279 // PutInt64 writes v into the buffer using little-endian encoding. 3280 func (ab *AsmBuf) PutInt64(v int64) { 3281 ab.buf[ab.off+0] = byte(v) 3282 ab.buf[ab.off+1] = byte(v >> 8) 3283 ab.buf[ab.off+2] = byte(v >> 16) 3284 ab.buf[ab.off+3] = byte(v >> 24) 3285 ab.buf[ab.off+4] = byte(v >> 32) 3286 ab.buf[ab.off+5] = byte(v >> 40) 3287 ab.buf[ab.off+6] = byte(v >> 48) 3288 ab.buf[ab.off+7] = byte(v >> 56) 3289 ab.off += 8 3290 } 3291 3292 // Put copies b into the buffer. 3293 func (ab *AsmBuf) Put(b []byte) { 3294 copy(ab.buf[ab.off:], b) 3295 ab.off += len(b) 3296 } 3297 3298 // PutOpBytesLit writes zero terminated sequence of bytes from op, 3299 // starting at specified offset (e.g. z counter value). 3300 // Trailing 0 is not written. 3301 // 3302 // Intended to be used for literal Z cases. 3303 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). 3304 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { 3305 for int(op[offset]) != 0 { 3306 ab.Put1(byte(op[offset])) 3307 offset++ 3308 } 3309 } 3310 3311 // Insert inserts b at offset i. 3312 func (ab *AsmBuf) Insert(i int, b byte) { 3313 ab.off++ 3314 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) 3315 ab.buf[i] = b 3316 } 3317 3318 // Last returns the byte at the end of the buffer. 3319 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } 3320 3321 // Len returns the length of the buffer. 3322 func (ab *AsmBuf) Len() int { return ab.off } 3323 3324 // Bytes returns the contents of the buffer. 3325 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } 3326 3327 // Reset empties the buffer. 3328 func (ab *AsmBuf) Reset() { ab.off = 0 } 3329 3330 // At returns the byte at offset i. 3331 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } 3332 3333 // asmidx emits SIB byte. 3334 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { 3335 var i int 3336 3337 // X/Y index register is used in VSIB. 3338 switch index { 3339 default: 3340 goto bad 3341 3342 case REG_NONE: 3343 i = 4 << 3 3344 goto bas 3345 3346 case REG_R8, 3347 REG_R9, 3348 REG_R10, 3349 REG_R11, 3350 REG_R12, 3351 REG_R13, 3352 REG_R14, 3353 REG_R15, 3354 REG_X8, 3355 REG_X9, 3356 REG_X10, 3357 REG_X11, 3358 REG_X12, 3359 REG_X13, 3360 REG_X14, 3361 REG_X15, 3362 REG_X16, 3363 REG_X17, 3364 REG_X18, 3365 REG_X19, 3366 REG_X20, 3367 REG_X21, 3368 REG_X22, 3369 REG_X23, 3370 REG_X24, 3371 REG_X25, 3372 REG_X26, 3373 REG_X27, 3374 REG_X28, 3375 REG_X29, 3376 REG_X30, 3377 REG_X31, 3378 REG_Y8, 3379 REG_Y9, 3380 REG_Y10, 3381 REG_Y11, 3382 REG_Y12, 3383 REG_Y13, 3384 REG_Y14, 3385 REG_Y15, 3386 REG_Y16, 3387 REG_Y17, 3388 REG_Y18, 3389 REG_Y19, 3390 REG_Y20, 3391 REG_Y21, 3392 REG_Y22, 3393 REG_Y23, 3394 REG_Y24, 3395 REG_Y25, 3396 REG_Y26, 3397 REG_Y27, 3398 REG_Y28, 3399 REG_Y29, 3400 REG_Y30, 3401 REG_Y31, 3402 REG_Z8, 3403 REG_Z9, 3404 REG_Z10, 3405 REG_Z11, 3406 REG_Z12, 3407 REG_Z13, 3408 REG_Z14, 3409 REG_Z15, 3410 REG_Z16, 3411 REG_Z17, 3412 REG_Z18, 3413 REG_Z19, 3414 REG_Z20, 3415 REG_Z21, 3416 REG_Z22, 3417 REG_Z23, 3418 REG_Z24, 3419 REG_Z25, 3420 REG_Z26, 3421 REG_Z27, 3422 REG_Z28, 3423 REG_Z29, 3424 REG_Z30, 3425 REG_Z31: 3426 if ctxt.Arch.Family == sys.I386 { 3427 goto bad 3428 } 3429 fallthrough 3430 3431 case REG_AX, 3432 REG_CX, 3433 REG_DX, 3434 REG_BX, 3435 REG_BP, 3436 REG_SI, 3437 REG_DI, 3438 REG_X0, 3439 REG_X1, 3440 REG_X2, 3441 REG_X3, 3442 REG_X4, 3443 REG_X5, 3444 REG_X6, 3445 REG_X7, 3446 REG_Y0, 3447 REG_Y1, 3448 REG_Y2, 3449 REG_Y3, 3450 REG_Y4, 3451 REG_Y5, 3452 REG_Y6, 3453 REG_Y7, 3454 REG_Z0, 3455 REG_Z1, 3456 REG_Z2, 3457 REG_Z3, 3458 REG_Z4, 3459 REG_Z5, 3460 REG_Z6, 3461 REG_Z7: 3462 i = reg[index] << 3 3463 } 3464 3465 switch scale { 3466 default: 3467 goto bad 3468 3469 case 1: 3470 break 3471 3472 case 2: 3473 i |= 1 << 6 3474 3475 case 4: 3476 i |= 2 << 6 3477 3478 case 8: 3479 i |= 3 << 6 3480 } 3481 3482 bas: 3483 switch base { 3484 default: 3485 goto bad 3486 3487 case REG_NONE: // must be mod=00 3488 i |= 5 3489 3490 case REG_R8, 3491 REG_R9, 3492 REG_R10, 3493 REG_R11, 3494 REG_R12, 3495 REG_R13, 3496 REG_R14, 3497 REG_R15: 3498 if ctxt.Arch.Family == sys.I386 { 3499 goto bad 3500 } 3501 fallthrough 3502 3503 case REG_AX, 3504 REG_CX, 3505 REG_DX, 3506 REG_BX, 3507 REG_SP, 3508 REG_BP, 3509 REG_SI, 3510 REG_DI: 3511 i |= reg[base] 3512 } 3513 3514 ab.Put1(byte(i)) 3515 return 3516 3517 bad: 3518 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) 3519 ab.Put1(0) 3520 } 3521 3522 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { 3523 var rel obj.Reloc 3524 3525 v := vaddr(ctxt, p, a, &rel) 3526 if rel.Siz != 0 { 3527 if rel.Siz != 4 { 3528 ctxt.Diag("bad reloc") 3529 } 3530 r := obj.Addrel(cursym) 3531 *r = rel 3532 r.Off = int32(p.Pc + int64(ab.Len())) 3533 } 3534 3535 ab.PutInt32(int32(v)) 3536 } 3537 3538 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { 3539 if r != nil { 3540 *r = obj.Reloc{} 3541 } 3542 3543 switch a.Name { 3544 case obj.NAME_STATIC, 3545 obj.NAME_GOTREF, 3546 obj.NAME_EXTERN: 3547 s := a.Sym 3548 if r == nil { 3549 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3550 log.Fatalf("reloc") 3551 } 3552 3553 if a.Name == obj.NAME_GOTREF { 3554 r.Siz = 4 3555 r.Type = objabi.R_GOTPCREL 3556 } else if useAbs(ctxt, s) { 3557 r.Siz = 4 3558 r.Type = objabi.R_ADDR 3559 } else { 3560 r.Siz = 4 3561 r.Type = objabi.R_PCREL 3562 } 3563 3564 r.Off = -1 // caller must fill in 3565 r.Sym = s 3566 r.Add = a.Offset 3567 3568 return 0 3569 } 3570 3571 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { 3572 if r == nil { 3573 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3574 log.Fatalf("reloc") 3575 } 3576 3577 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { 3578 r.Type = objabi.R_TLS_LE 3579 r.Siz = 4 3580 r.Off = -1 // caller must fill in 3581 r.Add = a.Offset 3582 } 3583 return 0 3584 } 3585 3586 return a.Offset 3587 } 3588 3589 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { 3590 var base int 3591 var rel obj.Reloc 3592 3593 rex &= 0x40 | Rxr 3594 if a.Offset != int64(int32(a.Offset)) { 3595 // The rules are slightly different for 386 and AMD64, 3596 // mostly for historical reasons. We may unify them later, 3597 // but it must be discussed beforehand. 3598 // 3599 // For 64bit mode only LEAL is allowed to overflow. 3600 // It's how https://golang.org/cl/59630 made it. 3601 // crypto/sha1/sha1block_amd64.s depends on this feature. 3602 // 3603 // For 32bit mode rules are more permissive. 3604 // If offset fits uint32, it's permitted. 3605 // This is allowed for assembly that wants to use 32-bit hex 3606 // constants, e.g. LEAL 0x99999999(AX), AX. 3607 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || 3608 (ctxt.Arch.Family != sys.AMD64 && 3609 int64(uint32(a.Offset)) == a.Offset && 3610 ab.rexflag&Rxw == 0) 3611 if !overflowOK { 3612 ctxt.Diag("offset too large in %s", p) 3613 } 3614 } 3615 v := int32(a.Offset) 3616 rel.Siz = 0 3617 3618 switch a.Type { 3619 case obj.TYPE_ADDR: 3620 if a.Name == obj.NAME_NONE { 3621 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") 3622 } 3623 if a.Index == REG_TLS { 3624 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") 3625 } 3626 goto bad 3627 3628 case obj.TYPE_REG: 3629 const regFirst = REG_AL 3630 const regLast = REG_Z31 3631 if a.Reg < regFirst || regLast < a.Reg { 3632 goto bad 3633 } 3634 if v != 0 { 3635 goto bad 3636 } 3637 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) 3638 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex 3639 return 3640 } 3641 3642 if a.Type != obj.TYPE_MEM { 3643 goto bad 3644 } 3645 3646 if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) { 3647 base := int(a.Reg) 3648 switch a.Name { 3649 case obj.NAME_EXTERN, 3650 obj.NAME_GOTREF, 3651 obj.NAME_STATIC: 3652 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { 3653 goto bad 3654 } 3655 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3656 // The base register has already been set. It holds the PC 3657 // of this instruction returned by a PC-reading thunk. 3658 // See obj6.go:rewriteToPcrel. 3659 } else { 3660 base = REG_NONE 3661 } 3662 v = int32(vaddr(ctxt, p, a, &rel)) 3663 3664 case obj.NAME_AUTO, 3665 obj.NAME_PARAM: 3666 base = REG_SP 3667 } 3668 3669 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex 3670 if base == REG_NONE { 3671 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3672 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3673 goto putrelv 3674 } 3675 3676 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3677 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3678 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3679 return 3680 } 3681 3682 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3683 ab.Put1(byte(1<<6 | 4<<0 | r<<3)) 3684 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3685 ab.Put1(disp8) 3686 return 3687 } 3688 3689 ab.Put1(byte(2<<6 | 4<<0 | r<<3)) 3690 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3691 goto putrelv 3692 } 3693 3694 base = int(a.Reg) 3695 switch a.Name { 3696 case obj.NAME_STATIC, 3697 obj.NAME_GOTREF, 3698 obj.NAME_EXTERN: 3699 if a.Sym == nil { 3700 ctxt.Diag("bad addr: %v", p) 3701 } 3702 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3703 // The base register has already been set. It holds the PC 3704 // of this instruction returned by a PC-reading thunk. 3705 // See obj6.go:rewriteToPcrel. 3706 } else { 3707 base = REG_NONE 3708 } 3709 v = int32(vaddr(ctxt, p, a, &rel)) 3710 3711 case obj.NAME_AUTO, 3712 obj.NAME_PARAM: 3713 base = REG_SP 3714 } 3715 3716 if base == REG_TLS { 3717 v = int32(vaddr(ctxt, p, a, &rel)) 3718 } 3719 3720 ab.rexflag |= regrex[base]&Rxb | rex 3721 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { 3722 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { 3723 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { 3724 ctxt.Diag("%v has offset against gotref", p) 3725 } 3726 ab.Put1(byte(0<<6 | 5<<0 | r<<3)) 3727 goto putrelv 3728 } 3729 3730 // temporary 3731 ab.Put2( 3732 byte(0<<6|4<<0|r<<3), // sib present 3733 0<<6|4<<3|5<<0, // DS:d32 3734 ) 3735 goto putrelv 3736 } 3737 3738 if base == REG_SP || base == REG_R12 { 3739 if v == 0 { 3740 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3741 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3742 return 3743 } 3744 3745 if disp8, ok := toDisp8(v, p, ab); ok { 3746 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) 3747 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3748 ab.Put1(disp8) 3749 return 3750 } 3751 3752 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3753 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3754 goto putrelv 3755 } 3756 3757 if REG_AX <= base && base <= REG_R15 { 3758 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid && 3759 ctxt.Headtype != objabi.Hwindows { 3760 rel = obj.Reloc{} 3761 rel.Type = objabi.R_TLS_LE 3762 rel.Siz = 4 3763 rel.Sym = nil 3764 rel.Add = int64(v) 3765 v = 0 3766 } 3767 3768 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3769 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3770 return 3771 } 3772 3773 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3774 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) 3775 return 3776 } 3777 3778 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3779 goto putrelv 3780 } 3781 3782 goto bad 3783 3784 putrelv: 3785 if rel.Siz != 0 { 3786 if rel.Siz != 4 { 3787 ctxt.Diag("bad rel") 3788 goto bad 3789 } 3790 3791 r := obj.Addrel(cursym) 3792 *r = rel 3793 r.Off = int32(p.Pc + int64(ab.Len())) 3794 } 3795 3796 ab.PutInt32(v) 3797 return 3798 3799 bad: 3800 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) 3801 } 3802 3803 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { 3804 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) 3805 } 3806 3807 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { 3808 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) 3809 } 3810 3811 func bytereg(a *obj.Addr, t *uint8) { 3812 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { 3813 a.Reg += REG_AL - REG_AX 3814 *t = 0 3815 } 3816 } 3817 3818 func unbytereg(a *obj.Addr, t *uint8) { 3819 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { 3820 a.Reg += REG_AX - REG_AL 3821 *t = 0 3822 } 3823 } 3824 3825 const ( 3826 movLit uint8 = iota // Like Zlit 3827 movRegMem 3828 movMemReg 3829 movRegMem2op 3830 movMemReg2op 3831 movFullPtr // Load full pointer, trash heap (unsupported) 3832 movDoubleShift 3833 movTLSReg 3834 ) 3835 3836 var ymovtab = []movtab{ 3837 // push 3838 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, 3839 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, 3840 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, 3841 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, 3842 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3843 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3844 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3845 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3846 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, 3847 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, 3848 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, 3849 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, 3850 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, 3851 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, 3852 3853 // pop 3854 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, 3855 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, 3856 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, 3857 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3858 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3859 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3860 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3861 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, 3862 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, 3863 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, 3864 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, 3865 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, 3866 3867 // mov seg 3868 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, 3869 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, 3870 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, 3871 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, 3872 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, 3873 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, 3874 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, 3875 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, 3876 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, 3877 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, 3878 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, 3879 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, 3880 3881 // mov cr 3882 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3883 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3884 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3885 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3886 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3887 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3888 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3889 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3890 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3891 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3892 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3893 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3894 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3895 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3896 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3897 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3898 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3899 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3900 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3901 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3902 3903 // mov dr 3904 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3905 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3906 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3907 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3908 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, 3909 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, 3910 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3911 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3912 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3913 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3914 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3915 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3916 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, 3917 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, 3918 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3919 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3920 3921 // mov tr 3922 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, 3923 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, 3924 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, 3925 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, 3926 3927 // lgdt, sgdt, lidt, sidt 3928 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3929 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3930 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3931 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3932 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3933 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3934 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3935 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3936 3937 // lldt, sldt 3938 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, 3939 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, 3940 3941 // lmsw, smsw 3942 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, 3943 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, 3944 3945 // ltr, str 3946 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, 3947 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, 3948 3949 /* load full pointer - unsupported 3950 {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, 3951 {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, 3952 */ 3953 3954 // double shift 3955 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3956 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3957 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3958 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3959 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3960 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3961 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3962 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3963 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3964 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3965 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3966 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3967 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3968 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3969 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3970 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3971 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3972 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3973 3974 // load TLS base 3975 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3976 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3977 {0, 0, 0, 0, 0, [4]uint8{}}, 3978 } 3979 3980 func isax(a *obj.Addr) bool { 3981 switch a.Reg { 3982 case REG_AX, REG_AL, REG_AH: 3983 return true 3984 } 3985 3986 return a.Index == REG_AX 3987 } 3988 3989 func subreg(p *obj.Prog, from int, to int) { 3990 if false { /* debug['Q'] */ 3991 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) 3992 } 3993 3994 if int(p.From.Reg) == from { 3995 p.From.Reg = int16(to) 3996 p.Ft = 0 3997 } 3998 3999 if int(p.To.Reg) == from { 4000 p.To.Reg = int16(to) 4001 p.Tt = 0 4002 } 4003 4004 if int(p.From.Index) == from { 4005 p.From.Index = int16(to) 4006 p.Ft = 0 4007 } 4008 4009 if int(p.To.Index) == from { 4010 p.To.Index = int16(to) 4011 p.Tt = 0 4012 } 4013 4014 if false { /* debug['Q'] */ 4015 fmt.Printf("%v\n", p) 4016 } 4017 } 4018 4019 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { 4020 switch op { 4021 case Pm, Pe, Pf2, Pf3: 4022 if osize != 1 { 4023 if op != Pm { 4024 ab.Put1(byte(op)) 4025 } 4026 ab.Put1(Pm) 4027 z++ 4028 op = int(o.op[z]) 4029 break 4030 } 4031 fallthrough 4032 4033 default: 4034 if ab.Len() == 0 || ab.Last() != Pm { 4035 ab.Put1(Pm) 4036 } 4037 } 4038 4039 ab.Put1(byte(op)) 4040 return z 4041 } 4042 4043 var bpduff1 = []byte{ 4044 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) 4045 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP 4046 } 4047 4048 var bpduff2 = []byte{ 4049 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP 4050 } 4051 4052 // asmevex emits EVEX pregis and opcode byte. 4053 // In addition to asmvex r/m, vvvv and reg fields also requires optional 4054 // K-masking register. 4055 // 4056 // Expects asmbuf.evex to be properly initialized. 4057 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { 4058 ab.evexflag = true 4059 evex := ab.evex 4060 4061 rexR := byte(1) 4062 evexR := byte(1) 4063 rexX := byte(1) 4064 rexB := byte(1) 4065 if r != nil { 4066 if regrex[r.Reg]&Rxr != 0 { 4067 rexR = 0 // "ModR/M.reg" selector 4th bit. 4068 } 4069 if regrex[r.Reg]&RxrEvex != 0 { 4070 evexR = 0 // "ModR/M.reg" selector 5th bit. 4071 } 4072 } 4073 if rm != nil { 4074 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { 4075 rexX = 0 4076 } else if regrex[rm.Index]&Rxx != 0 { 4077 rexX = 0 4078 } 4079 if regrex[rm.Reg]&Rxb != 0 { 4080 rexB = 0 4081 } 4082 } 4083 // P0 = [R][X][B][R'][00][mm] 4084 p0 := (rexR << 7) | 4085 (rexX << 6) | 4086 (rexB << 5) | 4087 (evexR << 4) | 4088 (0 << 2) | 4089 (evex.M() << 0) 4090 4091 vexV := byte(0) 4092 if v != nil { 4093 // 4bit-wide reg index. 4094 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4095 } 4096 vexV ^= 0x0F 4097 // P1 = [W][vvvv][1][pp] 4098 p1 := (evex.W() << 7) | 4099 (vexV << 3) | 4100 (1 << 2) | 4101 (evex.P() << 0) 4102 4103 suffix := evexSuffixMap[p.Scond] 4104 evexZ := byte(0) 4105 evexLL := evex.L() 4106 evexB := byte(0) 4107 evexV := byte(1) 4108 evexA := byte(0) 4109 if suffix.zeroing { 4110 if !evex.ZeroingEnabled() { 4111 ctxt.Diag("unsupported zeroing: %v", p) 4112 } 4113 if k == nil { 4114 // When you request zeroing you must specify a mask register. 4115 // See issue 57952. 4116 ctxt.Diag("mask register must be specified for .Z instructions: %v", p) 4117 } else if k.Reg == REG_K0 { 4118 // The mask register must not be K0. That restriction is already 4119 // handled by the Yknot0 restriction in the opcode tables, so we 4120 // won't ever reach here. But put something sensible here just in case. 4121 ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p) 4122 } 4123 evexZ = 1 4124 } 4125 switch { 4126 case suffix.rounding != rcUnset: 4127 if rm != nil && rm.Type == obj.TYPE_MEM { 4128 ctxt.Diag("illegal rounding with memory argument: %v", p) 4129 } else if !evex.RoundingEnabled() { 4130 ctxt.Diag("unsupported rounding: %v", p) 4131 } 4132 evexB = 1 4133 evexLL = suffix.rounding 4134 case suffix.broadcast: 4135 if rm == nil || rm.Type != obj.TYPE_MEM { 4136 ctxt.Diag("illegal broadcast without memory argument: %v", p) 4137 } else if !evex.BroadcastEnabled() { 4138 ctxt.Diag("unsupported broadcast: %v", p) 4139 } 4140 evexB = 1 4141 case suffix.sae: 4142 if rm != nil && rm.Type == obj.TYPE_MEM { 4143 ctxt.Diag("illegal SAE with memory argument: %v", p) 4144 } else if !evex.SaeEnabled() { 4145 ctxt.Diag("unsupported SAE: %v", p) 4146 } 4147 evexB = 1 4148 } 4149 if rm != nil && regrex[rm.Index]&RxrEvex != 0 { 4150 evexV = 0 4151 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { 4152 evexV = 0 // VSR selector 5th bit. 4153 } 4154 if k != nil { 4155 evexA = byte(reg[k.Reg]) 4156 } 4157 // P2 = [z][L'L][b][V'][aaa] 4158 p2 := (evexZ << 7) | 4159 (evexLL << 5) | 4160 (evexB << 4) | 4161 (evexV << 3) | 4162 (evexA << 0) 4163 4164 const evexEscapeByte = 0x62 4165 ab.Put4(evexEscapeByte, p0, p1, p2) 4166 ab.Put1(evex.opcode) 4167 } 4168 4169 // Emit VEX prefix and opcode byte. 4170 // The three addresses are the r/m, vvvv, and reg fields. 4171 // The reg and rm arguments appear in the same order as the 4172 // arguments to asmand, which typically follows the call to asmvex. 4173 // The final two arguments are the VEX prefix (see encoding above) 4174 // and the opcode byte. 4175 // For details about vex prefix see: 4176 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description 4177 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { 4178 ab.vexflag = true 4179 rexR := 0 4180 if r != nil { 4181 rexR = regrex[r.Reg] & Rxr 4182 } 4183 rexB := 0 4184 rexX := 0 4185 if rm != nil { 4186 rexB = regrex[rm.Reg] & Rxb 4187 rexX = regrex[rm.Index] & Rxx 4188 } 4189 vexM := (vex >> 3) & 0x7 4190 vexWLP := vex & 0x87 4191 vexV := byte(0) 4192 if v != nil { 4193 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4194 } 4195 vexV ^= 0xF 4196 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { 4197 // Can use 2-byte encoding. 4198 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) 4199 } else { 4200 // Must use 3-byte encoding. 4201 ab.Put3(0xc4, 4202 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, 4203 vexV<<3|vexWLP, 4204 ) 4205 } 4206 ab.Put1(opcode) 4207 } 4208 4209 // regIndex returns register index that fits in 5 bits. 4210 // 4211 // R : 3 bit | legacy instructions | N/A 4212 // [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr 4213 // EVEX.R : 1 bit | EVEX extension bit | RxrEvex 4214 // 4215 // Examples: 4216 // 4217 // REG_Z30 => 30 4218 // REG_X15 => 15 4219 // REG_R9 => 9 4220 // REG_AX => 0 4221 func regIndex(r int16) int { 4222 lower3bits := reg[r] 4223 high4bit := regrex[r] & Rxr << 1 4224 high5bit := regrex[r] & RxrEvex << 0 4225 return lower3bits | high4bit | high5bit 4226 } 4227 4228 // avx2gatherValid reports whether p satisfies AVX2 gather constraints. 4229 // Reports errors via ctxt. 4230 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4231 // If any pair of the index, mask, or destination registers 4232 // are the same, illegal instruction trap (#UD) is triggered. 4233 index := regIndex(p.GetFrom3().Index) 4234 mask := regIndex(p.From.Reg) 4235 dest := regIndex(p.To.Reg) 4236 if dest == mask || dest == index || mask == index { 4237 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) 4238 return false 4239 } 4240 4241 return true 4242 } 4243 4244 // avx512gatherValid reports whether p satisfies AVX512 gather constraints. 4245 // Reports errors via ctxt. 4246 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4247 // Illegal instruction trap (#UD) is triggered if the destination vector 4248 // register is the same as index vector in VSIB. 4249 index := regIndex(p.From.Index) 4250 dest := regIndex(p.To.Reg) 4251 if dest == index { 4252 ctxt.Diag("index and destination registers should be distinct: %v", p) 4253 return false 4254 } 4255 4256 return true 4257 } 4258 4259 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 4260 o := opindex[p.As&obj.AMask] 4261 4262 if o == nil { 4263 ctxt.Diag("asmins: missing op %v", p) 4264 return 4265 } 4266 4267 if pre := prefixof(ctxt, &p.From); pre != 0 { 4268 ab.Put1(byte(pre)) 4269 } 4270 if pre := prefixof(ctxt, &p.To); pre != 0 { 4271 ab.Put1(byte(pre)) 4272 } 4273 4274 // Checks to warn about instruction/arguments combinations that 4275 // will unconditionally trigger illegal instruction trap (#UD). 4276 switch p.As { 4277 case AVGATHERDPD, 4278 AVGATHERQPD, 4279 AVGATHERDPS, 4280 AVGATHERQPS, 4281 AVPGATHERDD, 4282 AVPGATHERQD, 4283 AVPGATHERDQ, 4284 AVPGATHERQQ: 4285 if p.GetFrom3() == nil { 4286 // gathers need a 3rd arg. See issue 58822. 4287 ctxt.Diag("need a third arg for gather instruction: %v", p) 4288 return 4289 } 4290 // AVX512 gather requires explicit K mask. 4291 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { 4292 if !avx512gatherValid(ctxt, p) { 4293 return 4294 } 4295 } else { 4296 if !avx2gatherValid(ctxt, p) { 4297 return 4298 } 4299 } 4300 } 4301 4302 if p.Ft == 0 { 4303 p.Ft = uint8(oclass(ctxt, p, &p.From)) 4304 } 4305 if p.Tt == 0 { 4306 p.Tt = uint8(oclass(ctxt, p, &p.To)) 4307 } 4308 4309 ft := int(p.Ft) * Ymax 4310 var f3t int 4311 tt := int(p.Tt) * Ymax 4312 4313 xo := obj.Bool2int(o.op[0] == 0x0f) 4314 z := 0 4315 var a *obj.Addr 4316 var l int 4317 var op int 4318 var q *obj.Prog 4319 var r *obj.Reloc 4320 var rel obj.Reloc 4321 var v int64 4322 4323 args := make([]int, 0, argListMax) 4324 if ft != Ynone*Ymax { 4325 args = append(args, ft) 4326 } 4327 for i := range p.RestArgs { 4328 args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) 4329 } 4330 if tt != Ynone*Ymax { 4331 args = append(args, tt) 4332 } 4333 4334 for _, yt := range o.ytab { 4335 // ytab matching is purely args-based, 4336 // but AVX512 suffixes like "Z" or "RU_SAE" will 4337 // add EVEX-only filter that will reject non-EVEX matches. 4338 // 4339 // Consider "VADDPD.BCST 2032(DX), X0, X0". 4340 // Without this rule, operands will lead to VEX-encoded form 4341 // and produce "c5b15813" encoding. 4342 if !yt.match(args) { 4343 // "xo" is always zero for VEX/EVEX encoded insts. 4344 z += int(yt.zoffset) + xo 4345 } else { 4346 if p.Scond != 0 && !evexZcase(yt.zcase) { 4347 // Do not signal error and continue to search 4348 // for matching EVEX-encoded form. 4349 z += int(yt.zoffset) 4350 continue 4351 } 4352 4353 switch o.prefix { 4354 case Px1: // first option valid only in 32-bit mode 4355 if ctxt.Arch.Family == sys.AMD64 && z == 0 { 4356 z += int(yt.zoffset) + xo 4357 continue 4358 } 4359 case Pq: // 16 bit escape and opcode escape 4360 ab.Put2(Pe, Pm) 4361 4362 case Pq3: // 16 bit escape and opcode escape + REX.W 4363 ab.rexflag |= Pw 4364 ab.Put2(Pe, Pm) 4365 4366 case Pq4: // 66 0F 38 4367 ab.Put3(0x66, 0x0F, 0x38) 4368 4369 case Pq4w: // 66 0F 38 + REX.W 4370 ab.rexflag |= Pw 4371 ab.Put3(0x66, 0x0F, 0x38) 4372 4373 case Pq5: // F3 0F 38 4374 ab.Put3(0xF3, 0x0F, 0x38) 4375 4376 case Pq5w: // F3 0F 38 + REX.W 4377 ab.rexflag |= Pw 4378 ab.Put3(0xF3, 0x0F, 0x38) 4379 4380 case Pf2, // xmm opcode escape 4381 Pf3: 4382 ab.Put2(o.prefix, Pm) 4383 4384 case Pef3: 4385 ab.Put3(Pe, Pf3, Pm) 4386 4387 case Pfw: // xmm opcode escape + REX.W 4388 ab.rexflag |= Pw 4389 ab.Put2(Pf3, Pm) 4390 4391 case Pm: // opcode escape 4392 ab.Put1(Pm) 4393 4394 case Pe: // 16 bit escape 4395 ab.Put1(Pe) 4396 4397 case Pw: // 64-bit escape 4398 if ctxt.Arch.Family != sys.AMD64 { 4399 ctxt.Diag("asmins: illegal 64: %v", p) 4400 } 4401 ab.rexflag |= Pw 4402 4403 case Pw8: // 64-bit escape if z >= 8 4404 if z >= 8 { 4405 if ctxt.Arch.Family != sys.AMD64 { 4406 ctxt.Diag("asmins: illegal 64: %v", p) 4407 } 4408 ab.rexflag |= Pw 4409 } 4410 4411 case Pb: // botch 4412 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { 4413 goto bad 4414 } 4415 // NOTE(rsc): This is probably safe to do always, 4416 // but when enabled it chooses different encodings 4417 // than the old github.com/go-asm/go/cmd/obj/i386 code did, 4418 // which breaks our "same bits out" checks. 4419 // In particular, CMPB AX, $0 encodes as 80 f8 00 4420 // in the original obj/i386, and it would encode 4421 // (using a valid, shorter form) as 3c 00 if we enabled 4422 // the call to bytereg here. 4423 if ctxt.Arch.Family == sys.AMD64 { 4424 bytereg(&p.From, &p.Ft) 4425 bytereg(&p.To, &p.Tt) 4426 } 4427 4428 case P32: // 32 bit but illegal if 64-bit mode 4429 if ctxt.Arch.Family == sys.AMD64 { 4430 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) 4431 } 4432 4433 case Py: // 64-bit only, no prefix 4434 if ctxt.Arch.Family != sys.AMD64 { 4435 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4436 } 4437 4438 case Py1: // 64-bit only if z < 1, no prefix 4439 if z < 1 && ctxt.Arch.Family != sys.AMD64 { 4440 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4441 } 4442 4443 case Py3: // 64-bit only if z < 3, no prefix 4444 if z < 3 && ctxt.Arch.Family != sys.AMD64 { 4445 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4446 } 4447 } 4448 4449 if z >= len(o.op) { 4450 log.Fatalf("asmins bad table %v", p) 4451 } 4452 op = int(o.op[z]) 4453 if op == 0x0f { 4454 ab.Put1(byte(op)) 4455 z++ 4456 op = int(o.op[z]) 4457 } 4458 4459 switch yt.zcase { 4460 default: 4461 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) 4462 return 4463 4464 case Zpseudo: 4465 break 4466 4467 case Zlit: 4468 ab.PutOpBytesLit(z, &o.op) 4469 4470 case Zlitr_m: 4471 ab.PutOpBytesLit(z, &o.op) 4472 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4473 4474 case Zlitm_r: 4475 ab.PutOpBytesLit(z, &o.op) 4476 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4477 4478 case Zlit_m_r: 4479 ab.PutOpBytesLit(z, &o.op) 4480 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4481 4482 case Zmb_r: 4483 bytereg(&p.From, &p.Ft) 4484 fallthrough 4485 4486 case Zm_r: 4487 ab.Put1(byte(op)) 4488 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4489 4490 case Z_m_r: 4491 ab.Put1(byte(op)) 4492 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4493 4494 case Zm2_r: 4495 ab.Put2(byte(op), o.op[z+1]) 4496 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4497 4498 case Zm_r_xm: 4499 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4500 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4501 4502 case Zm_r_xm_nr: 4503 ab.rexflag = 0 4504 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4505 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4506 4507 case Zm_r_i_xm: 4508 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4509 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) 4510 ab.Put1(byte(p.To.Offset)) 4511 4512 case Zibm_r, Zibr_m: 4513 ab.PutOpBytesLit(z, &o.op) 4514 if yt.zcase == Zibr_m { 4515 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4516 } else { 4517 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4518 } 4519 switch { 4520 default: 4521 ab.Put1(byte(p.From.Offset)) 4522 case yt.args[0] == Yi32 && o.prefix == Pe: 4523 ab.PutInt16(int16(p.From.Offset)) 4524 case yt.args[0] == Yi32: 4525 ab.PutInt32(int32(p.From.Offset)) 4526 } 4527 4528 case Zaut_r: 4529 ab.Put1(0x8d) // leal 4530 if p.From.Type != obj.TYPE_ADDR { 4531 ctxt.Diag("asmins: Zaut sb type ADDR") 4532 } 4533 p.From.Type = obj.TYPE_MEM 4534 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4535 p.From.Type = obj.TYPE_ADDR 4536 4537 case Zm_o: 4538 ab.Put1(byte(op)) 4539 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4540 4541 case Zr_m: 4542 ab.Put1(byte(op)) 4543 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4544 4545 case Zvex: 4546 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4547 4548 case Zvex_rm_v_r: 4549 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4550 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4551 4552 case Zvex_rm_v_ro: 4553 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4554 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4555 4556 case Zvex_i_rm_vo: 4557 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4558 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) 4559 ab.Put1(byte(p.From.Offset)) 4560 4561 case Zvex_i_r_v: 4562 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4563 regnum := byte(0x7) 4564 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { 4565 regnum &= byte(p.GetFrom3().Reg - REG_X0) 4566 } else { 4567 regnum &= byte(p.GetFrom3().Reg - REG_Y0) 4568 } 4569 ab.Put1(o.op[z+2] | regnum) 4570 ab.Put1(byte(p.From.Offset)) 4571 4572 case Zvex_i_rm_v_r: 4573 imm, from, from3, to := unpackOps4(p) 4574 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4575 ab.asmand(ctxt, cursym, p, from, to) 4576 ab.Put1(byte(imm.Offset)) 4577 4578 case Zvex_i_rm_r: 4579 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) 4580 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4581 ab.Put1(byte(p.From.Offset)) 4582 4583 case Zvex_v_rm_r: 4584 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) 4585 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4586 4587 case Zvex_r_v_rm: 4588 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) 4589 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4590 4591 case Zvex_rm_r_vo: 4592 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) 4593 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4594 4595 case Zvex_i_r_rm: 4596 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) 4597 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4598 ab.Put1(byte(p.From.Offset)) 4599 4600 case Zvex_hr_rm_v_r: 4601 hr, from, from3, to := unpackOps4(p) 4602 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4603 ab.asmand(ctxt, cursym, p, from, to) 4604 ab.Put1(byte(regIndex(hr.Reg) << 4)) 4605 4606 case Zevex_k_rmo: 4607 ab.evex = newEVEXBits(z, &o.op) 4608 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) 4609 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) 4610 4611 case Zevex_i_rm_vo: 4612 ab.evex = newEVEXBits(z, &o.op) 4613 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) 4614 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) 4615 ab.Put1(byte(p.From.Offset)) 4616 4617 case Zevex_i_rm_k_vo: 4618 imm, from, kmask, to := unpackOps4(p) 4619 ab.evex = newEVEXBits(z, &o.op) 4620 ab.asmevex(ctxt, p, from, to, nil, kmask) 4621 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) 4622 ab.Put1(byte(imm.Offset)) 4623 4624 case Zevex_i_r_rm: 4625 ab.evex = newEVEXBits(z, &o.op) 4626 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) 4627 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4628 ab.Put1(byte(p.From.Offset)) 4629 4630 case Zevex_i_r_k_rm: 4631 imm, from, kmask, to := unpackOps4(p) 4632 ab.evex = newEVEXBits(z, &o.op) 4633 ab.asmevex(ctxt, p, to, nil, from, kmask) 4634 ab.asmand(ctxt, cursym, p, to, from) 4635 ab.Put1(byte(imm.Offset)) 4636 4637 case Zevex_i_rm_r: 4638 ab.evex = newEVEXBits(z, &o.op) 4639 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) 4640 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4641 ab.Put1(byte(p.From.Offset)) 4642 4643 case Zevex_i_rm_k_r: 4644 imm, from, kmask, to := unpackOps4(p) 4645 ab.evex = newEVEXBits(z, &o.op) 4646 ab.asmevex(ctxt, p, from, nil, to, kmask) 4647 ab.asmand(ctxt, cursym, p, from, to) 4648 ab.Put1(byte(imm.Offset)) 4649 4650 case Zevex_i_rm_v_r: 4651 imm, from, from3, to := unpackOps4(p) 4652 ab.evex = newEVEXBits(z, &o.op) 4653 ab.asmevex(ctxt, p, from, from3, to, nil) 4654 ab.asmand(ctxt, cursym, p, from, to) 4655 ab.Put1(byte(imm.Offset)) 4656 4657 case Zevex_i_rm_v_k_r: 4658 imm, from, from3, kmask, to := unpackOps5(p) 4659 ab.evex = newEVEXBits(z, &o.op) 4660 ab.asmevex(ctxt, p, from, from3, to, kmask) 4661 ab.asmand(ctxt, cursym, p, from, to) 4662 ab.Put1(byte(imm.Offset)) 4663 4664 case Zevex_r_v_rm: 4665 ab.evex = newEVEXBits(z, &o.op) 4666 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) 4667 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4668 4669 case Zevex_rm_v_r: 4670 ab.evex = newEVEXBits(z, &o.op) 4671 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) 4672 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4673 4674 case Zevex_rm_k_r: 4675 ab.evex = newEVEXBits(z, &o.op) 4676 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) 4677 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4678 4679 case Zevex_r_k_rm: 4680 ab.evex = newEVEXBits(z, &o.op) 4681 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) 4682 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4683 4684 case Zevex_rm_v_k_r: 4685 from, from3, kmask, to := unpackOps4(p) 4686 ab.evex = newEVEXBits(z, &o.op) 4687 ab.asmevex(ctxt, p, from, from3, to, kmask) 4688 ab.asmand(ctxt, cursym, p, from, to) 4689 4690 case Zevex_r_v_k_rm: 4691 from, from3, kmask, to := unpackOps4(p) 4692 ab.evex = newEVEXBits(z, &o.op) 4693 ab.asmevex(ctxt, p, to, from3, from, kmask) 4694 ab.asmand(ctxt, cursym, p, to, from) 4695 4696 case Zr_m_xm: 4697 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4698 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4699 4700 case Zr_m_xm_nr: 4701 ab.rexflag = 0 4702 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4703 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4704 4705 case Zo_m: 4706 ab.Put1(byte(op)) 4707 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4708 4709 case Zcallindreg: 4710 r = obj.Addrel(cursym) 4711 r.Off = int32(p.Pc) 4712 r.Type = objabi.R_CALLIND 4713 r.Siz = 0 4714 fallthrough 4715 4716 case Zo_m64: 4717 ab.Put1(byte(op)) 4718 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) 4719 4720 case Zm_ibo: 4721 ab.Put1(byte(op)) 4722 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4723 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) 4724 4725 case Zibo_m: 4726 ab.Put1(byte(op)) 4727 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4728 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4729 4730 case Zibo_m_xm: 4731 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4732 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4733 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4734 4735 case Z_ib, Zib_: 4736 if yt.zcase == Zib_ { 4737 a = &p.From 4738 } else { 4739 a = &p.To 4740 } 4741 ab.Put1(byte(op)) 4742 if p.As == AXABORT { 4743 ab.Put1(o.op[z+1]) 4744 } 4745 ab.Put1(byte(vaddr(ctxt, p, a, nil))) 4746 4747 case Zib_rp: 4748 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4749 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) 4750 4751 case Zil_rp: 4752 ab.rexflag |= regrex[p.To.Reg] & Rxb 4753 ab.Put1(byte(op + reg[p.To.Reg])) 4754 if o.prefix == Pe { 4755 v = vaddr(ctxt, p, &p.From, nil) 4756 ab.PutInt16(int16(v)) 4757 } else { 4758 ab.relput4(ctxt, cursym, p, &p.From) 4759 } 4760 4761 case Zo_iw: 4762 ab.Put1(byte(op)) 4763 if p.From.Type != obj.TYPE_NONE { 4764 v = vaddr(ctxt, p, &p.From, nil) 4765 ab.PutInt16(int16(v)) 4766 } 4767 4768 case Ziq_rp: 4769 v = vaddr(ctxt, p, &p.From, &rel) 4770 l = int(v >> 32) 4771 if l == 0 && rel.Siz != 8 { 4772 ab.rexflag &^= (0x40 | Rxw) 4773 4774 ab.rexflag |= regrex[p.To.Reg] & Rxb 4775 ab.Put1(byte(0xb8 + reg[p.To.Reg])) 4776 if rel.Type != 0 { 4777 r = obj.Addrel(cursym) 4778 *r = rel 4779 r.Off = int32(p.Pc + int64(ab.Len())) 4780 } 4781 4782 ab.PutInt32(int32(v)) 4783 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend 4784 ab.Put1(0xc7) 4785 ab.asmando(ctxt, cursym, p, &p.To, 0) 4786 4787 ab.PutInt32(int32(v)) // need all 8 4788 } else { 4789 ab.rexflag |= regrex[p.To.Reg] & Rxb 4790 ab.Put1(byte(op + reg[p.To.Reg])) 4791 if rel.Type != 0 { 4792 r = obj.Addrel(cursym) 4793 *r = rel 4794 r.Off = int32(p.Pc + int64(ab.Len())) 4795 } 4796 4797 ab.PutInt64(v) 4798 } 4799 4800 case Zib_rr: 4801 ab.Put1(byte(op)) 4802 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4803 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4804 4805 case Z_il, Zil_: 4806 if yt.zcase == Zil_ { 4807 a = &p.From 4808 } else { 4809 a = &p.To 4810 } 4811 ab.Put1(byte(op)) 4812 if o.prefix == Pe { 4813 v = vaddr(ctxt, p, a, nil) 4814 ab.PutInt16(int16(v)) 4815 } else { 4816 ab.relput4(ctxt, cursym, p, a) 4817 } 4818 4819 case Zm_ilo, Zilo_m: 4820 ab.Put1(byte(op)) 4821 if yt.zcase == Zilo_m { 4822 a = &p.From 4823 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4824 } else { 4825 a = &p.To 4826 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4827 } 4828 4829 if o.prefix == Pe { 4830 v = vaddr(ctxt, p, a, nil) 4831 ab.PutInt16(int16(v)) 4832 } else { 4833 ab.relput4(ctxt, cursym, p, a) 4834 } 4835 4836 case Zil_rr: 4837 ab.Put1(byte(op)) 4838 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4839 if o.prefix == Pe { 4840 v = vaddr(ctxt, p, &p.From, nil) 4841 ab.PutInt16(int16(v)) 4842 } else { 4843 ab.relput4(ctxt, cursym, p, &p.From) 4844 } 4845 4846 case Z_rp: 4847 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4848 ab.Put1(byte(op + reg[p.To.Reg])) 4849 4850 case Zrp_: 4851 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) 4852 ab.Put1(byte(op + reg[p.From.Reg])) 4853 4854 case Zcallcon, Zjmpcon: 4855 if yt.zcase == Zcallcon { 4856 ab.Put1(byte(op)) 4857 } else { 4858 ab.Put1(o.op[z+1]) 4859 } 4860 r = obj.Addrel(cursym) 4861 r.Off = int32(p.Pc + int64(ab.Len())) 4862 r.Type = objabi.R_PCREL 4863 r.Siz = 4 4864 r.Add = p.To.Offset 4865 ab.PutInt32(0) 4866 4867 case Zcallind: 4868 ab.Put2(byte(op), o.op[z+1]) 4869 r = obj.Addrel(cursym) 4870 r.Off = int32(p.Pc + int64(ab.Len())) 4871 if ctxt.Arch.Family == sys.AMD64 { 4872 r.Type = objabi.R_PCREL 4873 } else { 4874 r.Type = objabi.R_ADDR 4875 } 4876 r.Siz = 4 4877 r.Add = p.To.Offset 4878 r.Sym = p.To.Sym 4879 ab.PutInt32(0) 4880 4881 case Zcall, Zcallduff: 4882 if p.To.Sym == nil { 4883 ctxt.Diag("call without target") 4884 ctxt.DiagFlush() 4885 log.Fatalf("bad code") 4886 } 4887 4888 if yt.zcase == Zcallduff && ctxt.Flag_dynlink { 4889 ctxt.Diag("directly calling duff when dynamically linking Go") 4890 } 4891 4892 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4893 // Maintain BP around call, since duffcopy/duffzero can't do it 4894 // (the call jumps into the middle of the function). 4895 // This makes it possible to see call sites for duffcopy/duffzero in 4896 // BP-based profiling tools like Linux perf (which is the 4897 // whole point of maintaining frame pointers in Go). 4898 // MOVQ BP, -16(SP) 4899 // LEAQ -16(SP), BP 4900 ab.Put(bpduff1) 4901 } 4902 ab.Put1(byte(op)) 4903 r = obj.Addrel(cursym) 4904 r.Off = int32(p.Pc + int64(ab.Len())) 4905 r.Sym = p.To.Sym 4906 r.Add = p.To.Offset 4907 r.Type = objabi.R_CALL 4908 r.Siz = 4 4909 ab.PutInt32(0) 4910 4911 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4912 // Pop BP pushed above. 4913 // MOVQ 0(BP), BP 4914 ab.Put(bpduff2) 4915 } 4916 4917 // TODO: jump across functions needs reloc 4918 case Zbr, Zjmp, Zloop: 4919 if p.As == AXBEGIN { 4920 ab.Put1(byte(op)) 4921 } 4922 if p.To.Sym != nil { 4923 if yt.zcase != Zjmp { 4924 ctxt.Diag("branch to ATEXT") 4925 ctxt.DiagFlush() 4926 log.Fatalf("bad code") 4927 } 4928 4929 ab.Put1(o.op[z+1]) 4930 r = obj.Addrel(cursym) 4931 r.Off = int32(p.Pc + int64(ab.Len())) 4932 r.Sym = p.To.Sym 4933 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that 4934 // it can point to a trampoline instead of the destination itself. 4935 r.Type = objabi.R_CALL 4936 r.Siz = 4 4937 ab.PutInt32(0) 4938 break 4939 } 4940 4941 // Assumes q is in this function. 4942 // TODO: Check in input, preserve in brchain. 4943 4944 // Fill in backward jump now. 4945 q = p.To.Target() 4946 4947 if q == nil { 4948 ctxt.Diag("jmp/branch/loop without target") 4949 ctxt.DiagFlush() 4950 log.Fatalf("bad code") 4951 } 4952 4953 if p.Back&branchBackwards != 0 { 4954 v = q.Pc - (p.Pc + 2) 4955 if v >= -128 && p.As != AXBEGIN { 4956 if p.As == AJCXZL { 4957 ab.Put1(0x67) 4958 } 4959 ab.Put2(byte(op), byte(v)) 4960 } else if yt.zcase == Zloop { 4961 ctxt.Diag("loop too far: %v", p) 4962 } else { 4963 v -= 5 - 2 4964 if p.As == AXBEGIN { 4965 v-- 4966 } 4967 if yt.zcase == Zbr { 4968 ab.Put1(0x0f) 4969 v-- 4970 } 4971 4972 ab.Put1(o.op[z+1]) 4973 ab.PutInt32(int32(v)) 4974 } 4975 4976 break 4977 } 4978 4979 // Annotate target; will fill in later. 4980 p.Forwd = q.Rel 4981 4982 q.Rel = p 4983 if p.Back&branchShort != 0 && p.As != AXBEGIN { 4984 if p.As == AJCXZL { 4985 ab.Put1(0x67) 4986 } 4987 ab.Put2(byte(op), 0) 4988 } else if yt.zcase == Zloop { 4989 ctxt.Diag("loop too far: %v", p) 4990 } else { 4991 if yt.zcase == Zbr { 4992 ab.Put1(0x0f) 4993 } 4994 ab.Put1(o.op[z+1]) 4995 ab.PutInt32(0) 4996 } 4997 4998 case Zbyte: 4999 v = vaddr(ctxt, p, &p.From, &rel) 5000 if rel.Siz != 0 { 5001 rel.Siz = uint8(op) 5002 r = obj.Addrel(cursym) 5003 *r = rel 5004 r.Off = int32(p.Pc + int64(ab.Len())) 5005 } 5006 5007 ab.Put1(byte(v)) 5008 if op > 1 { 5009 ab.Put1(byte(v >> 8)) 5010 if op > 2 { 5011 ab.PutInt16(int16(v >> 16)) 5012 if op > 4 { 5013 ab.PutInt32(int32(v >> 32)) 5014 } 5015 } 5016 } 5017 } 5018 5019 return 5020 } 5021 } 5022 f3t = Ynone * Ymax 5023 if p.GetFrom3() != nil { 5024 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax 5025 } 5026 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { 5027 var pp obj.Prog 5028 var t []byte 5029 if p.As == mo[0].as { 5030 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { 5031 t = mo[0].op[:] 5032 switch mo[0].code { 5033 default: 5034 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) 5035 5036 case movLit: 5037 for z = 0; t[z] != 0; z++ { 5038 ab.Put1(t[z]) 5039 } 5040 5041 case movRegMem: 5042 ab.Put1(t[0]) 5043 ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) 5044 5045 case movMemReg: 5046 ab.Put1(t[0]) 5047 ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) 5048 5049 case movRegMem2op: // r,m - 2op 5050 ab.Put2(t[0], t[1]) 5051 ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) 5052 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) 5053 5054 case movMemReg2op: 5055 ab.Put2(t[0], t[1]) 5056 ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) 5057 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) 5058 5059 case movFullPtr: 5060 if t[0] != 0 { 5061 ab.Put1(t[0]) 5062 } 5063 switch p.To.Index { 5064 default: 5065 goto bad 5066 5067 case REG_DS: 5068 ab.Put1(0xc5) 5069 5070 case REG_SS: 5071 ab.Put2(0x0f, 0xb2) 5072 5073 case REG_ES: 5074 ab.Put1(0xc4) 5075 5076 case REG_FS: 5077 ab.Put2(0x0f, 0xb4) 5078 5079 case REG_GS: 5080 ab.Put2(0x0f, 0xb5) 5081 } 5082 5083 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 5084 5085 case movDoubleShift: 5086 if t[0] == Pw { 5087 if ctxt.Arch.Family != sys.AMD64 { 5088 ctxt.Diag("asmins: illegal 64: %v", p) 5089 } 5090 ab.rexflag |= Pw 5091 t = t[1:] 5092 } else if t[0] == Pe { 5093 ab.Put1(Pe) 5094 t = t[1:] 5095 } 5096 5097 switch p.From.Type { 5098 default: 5099 goto bad 5100 5101 case obj.TYPE_CONST: 5102 ab.Put2(0x0f, t[0]) 5103 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5104 ab.Put1(byte(p.From.Offset)) 5105 5106 case obj.TYPE_REG: 5107 switch p.From.Reg { 5108 default: 5109 goto bad 5110 5111 case REG_CL, REG_CX: 5112 ab.Put2(0x0f, t[1]) 5113 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5114 } 5115 } 5116 5117 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5118 // where you load the TLS base register into a register and then index off that 5119 // register to access the actual TLS variables. Systems that allow direct TLS access 5120 // are handled in prefixof above and should not be listed here. 5121 case movTLSReg: 5122 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { 5123 ctxt.Diag("invalid load of TLS: %v", p) 5124 } 5125 5126 if ctxt.Arch.Family == sys.I386 { 5127 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5128 // where you load the TLS base register into a register and then index off that 5129 // register to access the actual TLS variables. Systems that allow direct TLS access 5130 // are handled in prefixof above and should not be listed here. 5131 switch ctxt.Headtype { 5132 default: 5133 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5134 5135 case objabi.Hlinux, objabi.Hfreebsd: 5136 if ctxt.Flag_shared { 5137 // Note that this is not generating the same insns as the other cases. 5138 // MOV TLS, dst 5139 // becomes 5140 // call __x86.get_pc_thunk.dst 5141 // movl (gotpc + g@gotntpoff)(dst), dst 5142 // which is encoded as 5143 // call __x86.get_pc_thunk.dst 5144 // movq 0(dst), dst 5145 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access 5146 // is g, which we can't check here, but will when we assemble the second 5147 // instruction. 5148 dst := p.To.Reg 5149 ab.Put1(0xe8) 5150 r = obj.Addrel(cursym) 5151 r.Off = int32(p.Pc + int64(ab.Len())) 5152 r.Type = objabi.R_CALL 5153 r.Siz = 4 5154 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) 5155 ab.PutInt32(0) 5156 5157 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) 5158 r = obj.Addrel(cursym) 5159 r.Off = int32(p.Pc + int64(ab.Len())) 5160 r.Type = objabi.R_TLS_IE 5161 r.Siz = 4 5162 r.Add = 2 5163 ab.PutInt32(0) 5164 } else { 5165 // ELF TLS base is 0(GS). 5166 pp.From = p.From 5167 5168 pp.From.Type = obj.TYPE_MEM 5169 pp.From.Reg = REG_GS 5170 pp.From.Offset = 0 5171 pp.From.Index = REG_NONE 5172 pp.From.Scale = 0 5173 ab.Put2(0x65, // GS 5174 0x8B) 5175 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5176 } 5177 case objabi.Hplan9: 5178 pp.From = obj.Addr{} 5179 pp.From.Type = obj.TYPE_MEM 5180 pp.From.Name = obj.NAME_EXTERN 5181 pp.From.Sym = plan9privates 5182 pp.From.Offset = 0 5183 pp.From.Index = REG_NONE 5184 ab.Put1(0x8B) 5185 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5186 } 5187 break 5188 } 5189 5190 switch ctxt.Headtype { 5191 default: 5192 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5193 5194 case objabi.Hlinux, objabi.Hfreebsd: 5195 if !ctxt.Flag_shared { 5196 log.Fatalf("unknown TLS base location for linux/freebsd without -shared") 5197 } 5198 // Note that this is not generating the same insn as the other cases. 5199 // MOV TLS, R_to 5200 // becomes 5201 // movq g@gottpoff(%rip), R_to 5202 // which is encoded as 5203 // movq 0(%rip), R_to 5204 // and a R_TLS_IE reloc. This all assumes the only tls variable we access 5205 // is g, which we can't check here, but will when we assemble the second 5206 // instruction. 5207 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) 5208 5209 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) 5210 r = obj.Addrel(cursym) 5211 r.Off = int32(p.Pc + int64(ab.Len())) 5212 r.Type = objabi.R_TLS_IE 5213 r.Siz = 4 5214 r.Add = -4 5215 ab.PutInt32(0) 5216 5217 case objabi.Hplan9: 5218 pp.From = obj.Addr{} 5219 pp.From.Type = obj.TYPE_MEM 5220 pp.From.Name = obj.NAME_EXTERN 5221 pp.From.Sym = plan9privates 5222 pp.From.Offset = 0 5223 pp.From.Index = REG_NONE 5224 ab.rexflag |= Pw 5225 ab.Put1(0x8B) 5226 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5227 5228 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. 5229 // TLS base is 0(FS). 5230 pp.From = p.From 5231 5232 pp.From.Type = obj.TYPE_MEM 5233 pp.From.Name = obj.NAME_NONE 5234 pp.From.Reg = REG_NONE 5235 pp.From.Offset = 0 5236 pp.From.Index = REG_NONE 5237 pp.From.Scale = 0 5238 ab.rexflag |= Pw 5239 ab.Put2(0x64, // FS 5240 0x8B) 5241 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5242 } 5243 } 5244 return 5245 } 5246 } 5247 } 5248 goto bad 5249 5250 bad: 5251 if ctxt.Arch.Family != sys.AMD64 { 5252 // here, the assembly has failed. 5253 // if it's a byte instruction that has 5254 // unaddressable registers, try to 5255 // exchange registers and reissue the 5256 // instruction with the operands renamed. 5257 pp := *p 5258 5259 unbytereg(&pp.From, &pp.Ft) 5260 unbytereg(&pp.To, &pp.Tt) 5261 5262 z := int(p.From.Reg) 5263 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5264 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5265 // For now, different to keep bit-for-bit compatibility. 5266 if ctxt.Arch.Family == sys.I386 { 5267 breg := byteswapreg(ctxt, &p.To) 5268 if breg != REG_AX { 5269 ab.Put1(0x87) // xchg lhs,bx 5270 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5271 subreg(&pp, z, breg) 5272 ab.doasm(ctxt, cursym, &pp) 5273 ab.Put1(0x87) // xchg lhs,bx 5274 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5275 } else { 5276 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5277 subreg(&pp, z, REG_AX) 5278 ab.doasm(ctxt, cursym, &pp) 5279 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5280 } 5281 return 5282 } 5283 5284 if isax(&p.To) || p.To.Type == obj.TYPE_NONE { 5285 // We certainly don't want to exchange 5286 // with AX if the op is MUL or DIV. 5287 ab.Put1(0x87) // xchg lhs,bx 5288 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5289 subreg(&pp, z, REG_BX) 5290 ab.doasm(ctxt, cursym, &pp) 5291 ab.Put1(0x87) // xchg lhs,bx 5292 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5293 } else { 5294 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5295 subreg(&pp, z, REG_AX) 5296 ab.doasm(ctxt, cursym, &pp) 5297 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5298 } 5299 return 5300 } 5301 5302 z = int(p.To.Reg) 5303 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5304 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5305 // For now, different to keep bit-for-bit compatibility. 5306 if ctxt.Arch.Family == sys.I386 { 5307 breg := byteswapreg(ctxt, &p.From) 5308 if breg != REG_AX { 5309 ab.Put1(0x87) //xchg rhs,bx 5310 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5311 subreg(&pp, z, breg) 5312 ab.doasm(ctxt, cursym, &pp) 5313 ab.Put1(0x87) // xchg rhs,bx 5314 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5315 } else { 5316 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5317 subreg(&pp, z, REG_AX) 5318 ab.doasm(ctxt, cursym, &pp) 5319 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5320 } 5321 return 5322 } 5323 5324 if isax(&p.From) { 5325 ab.Put1(0x87) // xchg rhs,bx 5326 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5327 subreg(&pp, z, REG_BX) 5328 ab.doasm(ctxt, cursym, &pp) 5329 ab.Put1(0x87) // xchg rhs,bx 5330 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5331 } else { 5332 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5333 subreg(&pp, z, REG_AX) 5334 ab.doasm(ctxt, cursym, &pp) 5335 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5336 } 5337 return 5338 } 5339 } 5340 5341 ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p) 5342 } 5343 5344 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX) 5345 // which is not referenced in a. 5346 // If a is empty, it returns BX to account for MULB-like instructions 5347 // that might use DX and AX. 5348 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { 5349 cana, canb, canc, cand := true, true, true, true 5350 if a.Type == obj.TYPE_NONE { 5351 cana, cand = false, false 5352 } 5353 5354 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { 5355 switch a.Reg { 5356 case REG_NONE: 5357 cana, cand = false, false 5358 case REG_AX, REG_AL, REG_AH: 5359 cana = false 5360 case REG_BX, REG_BL, REG_BH: 5361 canb = false 5362 case REG_CX, REG_CL, REG_CH: 5363 canc = false 5364 case REG_DX, REG_DL, REG_DH: 5365 cand = false 5366 } 5367 } 5368 5369 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { 5370 switch a.Index { 5371 case REG_AX: 5372 cana = false 5373 case REG_BX: 5374 canb = false 5375 case REG_CX: 5376 canc = false 5377 case REG_DX: 5378 cand = false 5379 } 5380 } 5381 5382 switch { 5383 case cana: 5384 return REG_AX 5385 case canb: 5386 return REG_BX 5387 case canc: 5388 return REG_CX 5389 case cand: 5390 return REG_DX 5391 default: 5392 ctxt.Diag("impossible byte register") 5393 ctxt.DiagFlush() 5394 log.Fatalf("bad code") 5395 return 0 5396 } 5397 } 5398 5399 func isbadbyte(a *obj.Addr) bool { 5400 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) 5401 } 5402 5403 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 5404 ab.Reset() 5405 5406 ab.rexflag = 0 5407 ab.vexflag = false 5408 ab.evexflag = false 5409 mark := ab.Len() 5410 ab.doasm(ctxt, cursym, p) 5411 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5412 // as befits the whole approach of the architecture, 5413 // the rex prefix must appear before the first opcode byte 5414 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but 5415 // before the 0f opcode escape!), or it might be ignored. 5416 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. 5417 if ctxt.Arch.Family != sys.AMD64 { 5418 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) 5419 } 5420 n := ab.Len() 5421 var np int 5422 for np = mark; np < n; np++ { 5423 c := ab.At(np) 5424 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { 5425 break 5426 } 5427 } 5428 ab.Insert(np, byte(0x40|ab.rexflag)) 5429 } 5430 5431 n := ab.Len() 5432 for i := len(cursym.R) - 1; i >= 0; i-- { 5433 r := &cursym.R[i] 5434 if int64(r.Off) < p.Pc { 5435 break 5436 } 5437 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5438 r.Off++ 5439 } 5440 if r.Type == objabi.R_PCREL { 5441 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { 5442 // PC-relative addressing is relative to the end of the instruction, 5443 // but the relocations applied by the linker are relative to the end 5444 // of the relocation. Because immediate instruction 5445 // arguments can follow the PC-relative memory reference in the 5446 // instruction encoding, the two may not coincide. In this case, 5447 // adjust addend so that linker can keep relocating relative to the 5448 // end of the relocation. 5449 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) 5450 } else if ctxt.Arch.Family == sys.I386 { 5451 // On 386 PC-relative addressing (for non-call/jmp instructions) 5452 // assumes that the previous instruction loaded the PC of the end 5453 // of that instruction into CX, so the adjustment is relative to 5454 // that. 5455 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5456 } 5457 } 5458 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { 5459 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. 5460 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5461 } 5462 5463 } 5464 } 5465 5466 // unpackOps4 extracts 4 operands from p. 5467 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { 5468 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To 5469 } 5470 5471 // unpackOps5 extracts 5 operands from p. 5472 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { 5473 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To 5474 }