github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/internal/obj/x86/asm6.go (about) 1 // Inferno utils/6l/span.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "github.com/gagliardetto/golang-go/cmd/internal/obj" 35 "github.com/gagliardetto/golang-go/cmd/internal/objabi" 36 "github.com/gagliardetto/golang-go/cmd/internal/sys" 37 "encoding/binary" 38 "fmt" 39 "log" 40 "strings" 41 ) 42 43 var ( 44 plan9privates *obj.LSym 45 deferreturn *obj.LSym 46 ) 47 48 // Instruction layout. 49 50 // Loop alignment constants: 51 // want to align loop entry to loopAlign-byte boundary, 52 // and willing to insert at most maxLoopPad bytes of NOP to do so. 53 // We define a loop entry as the target of a backward jump. 54 // 55 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config, 56 // and it aligns all jump targets, not just backward jump targets. 57 // 58 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here 59 // is very slight but negative, so the alignment is disabled by 60 // setting MaxLoopPad = 0. The code is here for reference and 61 // for future experiments. 62 // 63 const ( 64 loopAlign = 16 65 maxLoopPad = 0 66 ) 67 68 // Bit flags that are used to express jump target properties. 69 const ( 70 // branchBackwards marks targets that are located behind. 71 // Used to express jumps to loop headers. 72 branchBackwards = (1 << iota) 73 // branchShort marks branches those target is close, 74 // with offset is in -128..127 range. 75 branchShort 76 // branchLoopHead marks loop entry. 77 // Used to insert padding for misaligned loops. 78 branchLoopHead 79 ) 80 81 // opBytes holds optab encoding bytes. 82 // Each ytab reserves fixed amount of bytes in this array. 83 // 84 // The size should be the minimal number of bytes that 85 // are enough to hold biggest optab op lines. 86 type opBytes [31]uint8 87 88 type Optab struct { 89 as obj.As 90 ytab []ytab 91 prefix uint8 92 op opBytes 93 } 94 95 type movtab struct { 96 as obj.As 97 ft uint8 98 f3t uint8 99 tt uint8 100 code uint8 101 op [4]uint8 102 } 103 104 const ( 105 Yxxx = iota 106 Ynone 107 Yi0 // $0 108 Yi1 // $1 109 Yu2 // $x, x fits in uint2 110 Yi8 // $x, x fits in int8 111 Yu8 // $x, x fits in uint8 112 Yu7 // $x, x in 0..127 (fits in both int8 and uint8) 113 Ys32 114 Yi32 115 Yi64 116 Yiauto 117 Yal 118 Ycl 119 Yax 120 Ycx 121 Yrb 122 Yrl 123 Yrl32 // Yrl on 32-bit system 124 Yrf 125 Yf0 126 Yrx 127 Ymb 128 Yml 129 Ym 130 Ybr 131 Ycs 132 Yss 133 Yds 134 Yes 135 Yfs 136 Ygs 137 Ygdtr 138 Yidtr 139 Yldtr 140 Ymsw 141 Ytask 142 Ycr0 143 Ycr1 144 Ycr2 145 Ycr3 146 Ycr4 147 Ycr5 148 Ycr6 149 Ycr7 150 Ycr8 151 Ydr0 152 Ydr1 153 Ydr2 154 Ydr3 155 Ydr4 156 Ydr5 157 Ydr6 158 Ydr7 159 Ytr0 160 Ytr1 161 Ytr2 162 Ytr3 163 Ytr4 164 Ytr5 165 Ytr6 166 Ytr7 167 Ymr 168 Ymm 169 Yxr0 // X0 only. "<XMM0>" notation in Intel manual. 170 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex 171 Yxr // X0..X15 172 YxrEvex // X0..X31 173 Yxm 174 YxmEvex // YxrEvex+Ym 175 Yxvm // VSIB vector array; vm32x/vm64x 176 YxvmEvex // Yxvm which permits High-16 X register as index. 177 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex 178 Yyr // Y0..Y15 179 YyrEvex // Y0..Y31 180 Yym 181 YymEvex // YyrEvex+Ym 182 Yyvm // VSIB vector array; vm32y/vm64y 183 YyvmEvex // Yyvm which permits High-16 Y register as index. 184 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex 185 Yzr // Z0..Z31 186 Yzm // Yzr+Ym 187 Yzvm // VSIB vector array; vm32z/vm64z 188 Yk0 // K0 189 Yknot0 // K1..K7; write mask 190 Yk // K0..K7; used for KOP 191 Ykm // Yk+Ym; used for KOP 192 Ytls 193 Ytextsize 194 Yindir 195 Ymax 196 ) 197 198 const ( 199 Zxxx = iota 200 Zlit 201 Zlitm_r 202 Zlitr_m 203 Zlit_m_r 204 Z_rp 205 Zbr 206 Zcall 207 Zcallcon 208 Zcallduff 209 Zcallind 210 Zcallindreg 211 Zib_ 212 Zib_rp 213 Zibo_m 214 Zibo_m_xm 215 Zil_ 216 Zil_rp 217 Ziq_rp 218 Zilo_m 219 Zjmp 220 Zjmpcon 221 Zloop 222 Zo_iw 223 Zm_o 224 Zm_r 225 Z_m_r 226 Zm2_r 227 Zm_r_xm 228 Zm_r_i_xm 229 Zm_r_xm_nr 230 Zr_m_xm_nr 231 Zibm_r // mmx1,mmx2/mem64,imm8 232 Zibr_m 233 Zmb_r 234 Zaut_r 235 Zo_m 236 Zo_m64 237 Zpseudo 238 Zr_m 239 Zr_m_xm 240 Zrp_ 241 Z_ib 242 Z_il 243 Zm_ibo 244 Zm_ilo 245 Zib_rr 246 Zil_rr 247 Zbyte 248 249 Zvex_rm_v_r 250 Zvex_rm_v_ro 251 Zvex_r_v_rm 252 Zvex_i_rm_vo 253 Zvex_v_rm_r 254 Zvex_i_rm_r 255 Zvex_i_r_v 256 Zvex_i_rm_v_r 257 Zvex 258 Zvex_rm_r_vo 259 Zvex_i_r_rm 260 Zvex_hr_rm_v_r 261 262 Zevex_first 263 Zevex_i_r_k_rm 264 Zevex_i_r_rm 265 Zevex_i_rm_k_r 266 Zevex_i_rm_k_vo 267 Zevex_i_rm_r 268 Zevex_i_rm_v_k_r 269 Zevex_i_rm_v_r 270 Zevex_i_rm_vo 271 Zevex_k_rmo 272 Zevex_r_k_rm 273 Zevex_r_v_k_rm 274 Zevex_r_v_rm 275 Zevex_rm_k_r 276 Zevex_rm_v_k_r 277 Zevex_rm_v_r 278 Zevex_last 279 280 Zmax 281 ) 282 283 const ( 284 Px = 0 285 Px1 = 1 // symbolic; exact value doesn't matter 286 P32 = 0x32 // 32-bit only 287 Pe = 0x66 // operand escape 288 Pm = 0x0f // 2byte opcode escape 289 Pq = 0xff // both escapes: 66 0f 290 Pb = 0xfe // byte operands 291 Pf2 = 0xf2 // xmm escape 1: f2 0f 292 Pf3 = 0xf3 // xmm escape 2: f3 0f 293 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f 294 Pq3 = 0x67 // xmm escape 3: 66 48 0f 295 Pq4 = 0x68 // xmm escape 4: 66 0F 38 296 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 297 Pq5 = 0x6a // xmm escape 5: F3 0F 38 298 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 299 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f 300 Pw = 0x48 // Rex.w 301 Pw8 = 0x90 // symbolic; exact value doesn't matter 302 Py = 0x80 // defaults to 64-bit mode 303 Py1 = 0x81 // symbolic; exact value doesn't matter 304 Py3 = 0x83 // symbolic; exact value doesn't matter 305 Pavx = 0x84 // symbolic: exact value doesn't matter 306 307 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R 308 Rxw = 1 << 3 // =1, 64-bit operand size 309 Rxr = 1 << 2 // extend modrm reg 310 Rxx = 1 << 1 // extend sib index 311 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg 312 ) 313 314 const ( 315 // Encoding for VEX prefix in tables. 316 // The P, L, and W fields are chosen to match 317 // their eventual locations in the VEX prefix bytes. 318 319 // Encoding for VEX prefix in tables. 320 // The P, L, and W fields are chosen to match 321 // their eventual locations in the VEX prefix bytes. 322 323 // Using spare bit to make leading [E]VEX encoding byte different from 324 // 0x0f even if all other VEX fields are 0. 325 avxEscape = 1 << 6 326 327 // P field - 2 bits 328 vex66 = 1 << 0 329 vexF3 = 2 << 0 330 vexF2 = 3 << 0 331 // L field - 1 bit 332 vexLZ = 0 << 2 333 vexLIG = 0 << 2 334 vex128 = 0 << 2 335 vex256 = 1 << 2 336 // W field - 1 bit 337 vexWIG = 0 << 7 338 vexW0 = 0 << 7 339 vexW1 = 1 << 7 340 // M field - 5 bits, but mostly reserved; we can store up to 3 341 vex0F = 1 << 3 342 vex0F38 = 2 << 3 343 vex0F3A = 3 << 3 344 ) 345 346 var ycover [Ymax * Ymax]uint8 347 348 var reg [MAXREG]int 349 350 var regrex [MAXREG + 1]int 351 352 var ynone = []ytab{ 353 {Zlit, 1, argList{}}, 354 } 355 356 var ytext = []ytab{ 357 {Zpseudo, 0, argList{Ymb, Ytextsize}}, 358 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, 359 } 360 361 var ynop = []ytab{ 362 {Zpseudo, 0, argList{}}, 363 {Zpseudo, 0, argList{Yiauto}}, 364 {Zpseudo, 0, argList{Yml}}, 365 {Zpseudo, 0, argList{Yrf}}, 366 {Zpseudo, 0, argList{Yxr}}, 367 {Zpseudo, 0, argList{Yiauto}}, 368 {Zpseudo, 0, argList{Yml}}, 369 {Zpseudo, 0, argList{Yrf}}, 370 {Zpseudo, 1, argList{Yxr}}, 371 } 372 373 var yfuncdata = []ytab{ 374 {Zpseudo, 0, argList{Yi32, Ym}}, 375 } 376 377 var ypcdata = []ytab{ 378 {Zpseudo, 0, argList{Yi32, Yi32}}, 379 } 380 381 var yxorb = []ytab{ 382 {Zib_, 1, argList{Yi32, Yal}}, 383 {Zibo_m, 2, argList{Yi32, Ymb}}, 384 {Zr_m, 1, argList{Yrb, Ymb}}, 385 {Zm_r, 1, argList{Ymb, Yrb}}, 386 } 387 388 var yaddl = []ytab{ 389 {Zibo_m, 2, argList{Yi8, Yml}}, 390 {Zil_, 1, argList{Yi32, Yax}}, 391 {Zilo_m, 2, argList{Yi32, Yml}}, 392 {Zr_m, 1, argList{Yrl, Yml}}, 393 {Zm_r, 1, argList{Yml, Yrl}}, 394 } 395 396 var yincl = []ytab{ 397 {Z_rp, 1, argList{Yrl}}, 398 {Zo_m, 2, argList{Yml}}, 399 } 400 401 var yincq = []ytab{ 402 {Zo_m, 2, argList{Yml}}, 403 } 404 405 var ycmpb = []ytab{ 406 {Z_ib, 1, argList{Yal, Yi32}}, 407 {Zm_ibo, 2, argList{Ymb, Yi32}}, 408 {Zm_r, 1, argList{Ymb, Yrb}}, 409 {Zr_m, 1, argList{Yrb, Ymb}}, 410 } 411 412 var ycmpl = []ytab{ 413 {Zm_ibo, 2, argList{Yml, Yi8}}, 414 {Z_il, 1, argList{Yax, Yi32}}, 415 {Zm_ilo, 2, argList{Yml, Yi32}}, 416 {Zm_r, 1, argList{Yml, Yrl}}, 417 {Zr_m, 1, argList{Yrl, Yml}}, 418 } 419 420 var yshb = []ytab{ 421 {Zo_m, 2, argList{Yi1, Ymb}}, 422 {Zibo_m, 2, argList{Yu8, Ymb}}, 423 {Zo_m, 2, argList{Ycx, Ymb}}, 424 } 425 426 var yshl = []ytab{ 427 {Zo_m, 2, argList{Yi1, Yml}}, 428 {Zibo_m, 2, argList{Yu8, Yml}}, 429 {Zo_m, 2, argList{Ycl, Yml}}, 430 {Zo_m, 2, argList{Ycx, Yml}}, 431 } 432 433 var ytestl = []ytab{ 434 {Zil_, 1, argList{Yi32, Yax}}, 435 {Zilo_m, 2, argList{Yi32, Yml}}, 436 {Zr_m, 1, argList{Yrl, Yml}}, 437 {Zm_r, 1, argList{Yml, Yrl}}, 438 } 439 440 var ymovb = []ytab{ 441 {Zr_m, 1, argList{Yrb, Ymb}}, 442 {Zm_r, 1, argList{Ymb, Yrb}}, 443 {Zib_rp, 1, argList{Yi32, Yrb}}, 444 {Zibo_m, 2, argList{Yi32, Ymb}}, 445 } 446 447 var ybtl = []ytab{ 448 {Zibo_m, 2, argList{Yi8, Yml}}, 449 {Zr_m, 1, argList{Yrl, Yml}}, 450 } 451 452 var ymovw = []ytab{ 453 {Zr_m, 1, argList{Yrl, Yml}}, 454 {Zm_r, 1, argList{Yml, Yrl}}, 455 {Zil_rp, 1, argList{Yi32, Yrl}}, 456 {Zilo_m, 2, argList{Yi32, Yml}}, 457 {Zaut_r, 2, argList{Yiauto, Yrl}}, 458 } 459 460 var ymovl = []ytab{ 461 {Zr_m, 1, argList{Yrl, Yml}}, 462 {Zm_r, 1, argList{Yml, Yrl}}, 463 {Zil_rp, 1, argList{Yi32, Yrl}}, 464 {Zilo_m, 2, argList{Yi32, Yml}}, 465 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD 466 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD 467 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) 468 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) 469 {Zaut_r, 2, argList{Yiauto, Yrl}}, 470 } 471 472 var yret = []ytab{ 473 {Zo_iw, 1, argList{}}, 474 {Zo_iw, 1, argList{Yi32}}, 475 } 476 477 var ymovq = []ytab{ 478 // valid in 32-bit mode 479 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) 480 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ 481 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q 482 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 483 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 484 485 // valid only in 64-bit mode, usually with 64-bit prefix 486 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 487 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b 488 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) 489 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate 490 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) 491 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD 492 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD 493 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load 494 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store 495 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ 496 } 497 498 var ymovbe = []ytab{ 499 {Zlitm_r, 3, argList{Ym, Yrl}}, 500 {Zlitr_m, 3, argList{Yrl, Ym}}, 501 } 502 503 var ym_rl = []ytab{ 504 {Zm_r, 1, argList{Ym, Yrl}}, 505 } 506 507 var yrl_m = []ytab{ 508 {Zr_m, 1, argList{Yrl, Ym}}, 509 } 510 511 var ymb_rl = []ytab{ 512 {Zmb_r, 1, argList{Ymb, Yrl}}, 513 } 514 515 var yml_rl = []ytab{ 516 {Zm_r, 1, argList{Yml, Yrl}}, 517 } 518 519 var yrl_ml = []ytab{ 520 {Zr_m, 1, argList{Yrl, Yml}}, 521 } 522 523 var yml_mb = []ytab{ 524 {Zr_m, 1, argList{Yrb, Ymb}}, 525 {Zm_r, 1, argList{Ymb, Yrb}}, 526 } 527 528 var yrb_mb = []ytab{ 529 {Zr_m, 1, argList{Yrb, Ymb}}, 530 } 531 532 var yxchg = []ytab{ 533 {Z_rp, 1, argList{Yax, Yrl}}, 534 {Zrp_, 1, argList{Yrl, Yax}}, 535 {Zr_m, 1, argList{Yrl, Yml}}, 536 {Zm_r, 1, argList{Yml, Yrl}}, 537 } 538 539 var ydivl = []ytab{ 540 {Zm_o, 2, argList{Yml}}, 541 } 542 543 var ydivb = []ytab{ 544 {Zm_o, 2, argList{Ymb}}, 545 } 546 547 var yimul = []ytab{ 548 {Zm_o, 2, argList{Yml}}, 549 {Zib_rr, 1, argList{Yi8, Yrl}}, 550 {Zil_rr, 1, argList{Yi32, Yrl}}, 551 {Zm_r, 2, argList{Yml, Yrl}}, 552 } 553 554 var yimul3 = []ytab{ 555 {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, 556 {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, 557 } 558 559 var ybyte = []ytab{ 560 {Zbyte, 1, argList{Yi64}}, 561 } 562 563 var yin = []ytab{ 564 {Zib_, 1, argList{Yi32}}, 565 {Zlit, 1, argList{}}, 566 } 567 568 var yint = []ytab{ 569 {Zib_, 1, argList{Yi32}}, 570 } 571 572 var ypushl = []ytab{ 573 {Zrp_, 1, argList{Yrl}}, 574 {Zm_o, 2, argList{Ym}}, 575 {Zib_, 1, argList{Yi8}}, 576 {Zil_, 1, argList{Yi32}}, 577 } 578 579 var ypopl = []ytab{ 580 {Z_rp, 1, argList{Yrl}}, 581 {Zo_m, 2, argList{Ym}}, 582 } 583 584 var ywrfsbase = []ytab{ 585 {Zm_o, 2, argList{Yrl}}, 586 } 587 588 var yrdrand = []ytab{ 589 {Zo_m, 2, argList{Yrl}}, 590 } 591 592 var yclflush = []ytab{ 593 {Zo_m, 2, argList{Ym}}, 594 } 595 596 var ybswap = []ytab{ 597 {Z_rp, 2, argList{Yrl}}, 598 } 599 600 var yscond = []ytab{ 601 {Zo_m, 2, argList{Ymb}}, 602 } 603 604 var yjcond = []ytab{ 605 {Zbr, 0, argList{Ybr}}, 606 {Zbr, 0, argList{Yi0, Ybr}}, 607 {Zbr, 1, argList{Yi1, Ybr}}, 608 } 609 610 var yloop = []ytab{ 611 {Zloop, 1, argList{Ybr}}, 612 } 613 614 var ycall = []ytab{ 615 {Zcallindreg, 0, argList{Yml}}, 616 {Zcallindreg, 2, argList{Yrx, Yrx}}, 617 {Zcallind, 2, argList{Yindir}}, 618 {Zcall, 0, argList{Ybr}}, 619 {Zcallcon, 1, argList{Yi32}}, 620 } 621 622 var yduff = []ytab{ 623 {Zcallduff, 1, argList{Yi32}}, 624 } 625 626 var yjmp = []ytab{ 627 {Zo_m64, 2, argList{Yml}}, 628 {Zjmp, 0, argList{Ybr}}, 629 {Zjmpcon, 1, argList{Yi32}}, 630 } 631 632 var yfmvd = []ytab{ 633 {Zm_o, 2, argList{Ym, Yf0}}, 634 {Zo_m, 2, argList{Yf0, Ym}}, 635 {Zm_o, 2, argList{Yrf, Yf0}}, 636 {Zo_m, 2, argList{Yf0, Yrf}}, 637 } 638 639 var yfmvdp = []ytab{ 640 {Zo_m, 2, argList{Yf0, Ym}}, 641 {Zo_m, 2, argList{Yf0, Yrf}}, 642 } 643 644 var yfmvf = []ytab{ 645 {Zm_o, 2, argList{Ym, Yf0}}, 646 {Zo_m, 2, argList{Yf0, Ym}}, 647 } 648 649 var yfmvx = []ytab{ 650 {Zm_o, 2, argList{Ym, Yf0}}, 651 } 652 653 var yfmvp = []ytab{ 654 {Zo_m, 2, argList{Yf0, Ym}}, 655 } 656 657 var yfcmv = []ytab{ 658 {Zm_o, 2, argList{Yrf, Yf0}}, 659 } 660 661 var yfadd = []ytab{ 662 {Zm_o, 2, argList{Ym, Yf0}}, 663 {Zm_o, 2, argList{Yrf, Yf0}}, 664 {Zo_m, 2, argList{Yf0, Yrf}}, 665 } 666 667 var yfxch = []ytab{ 668 {Zo_m, 2, argList{Yf0, Yrf}}, 669 {Zm_o, 2, argList{Yrf, Yf0}}, 670 } 671 672 var ycompp = []ytab{ 673 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 674 } 675 676 var ystsw = []ytab{ 677 {Zo_m, 2, argList{Ym}}, 678 {Zlit, 1, argList{Yax}}, 679 } 680 681 var ysvrs_mo = []ytab{ 682 {Zm_o, 2, argList{Ym}}, 683 } 684 685 // unaryDst version of "ysvrs_mo". 686 var ysvrs_om = []ytab{ 687 {Zo_m, 2, argList{Ym}}, 688 } 689 690 var ymm = []ytab{ 691 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 692 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 693 } 694 695 var yxm = []ytab{ 696 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 697 } 698 699 var yxm_q4 = []ytab{ 700 {Zm_r, 1, argList{Yxm, Yxr}}, 701 } 702 703 var yxcvm1 = []ytab{ 704 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 705 {Zm_r_xm, 2, argList{Yxm, Ymr}}, 706 } 707 708 var yxcvm2 = []ytab{ 709 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 710 {Zm_r_xm, 2, argList{Ymm, Yxr}}, 711 } 712 713 var yxr = []ytab{ 714 {Zm_r_xm, 1, argList{Yxr, Yxr}}, 715 } 716 717 var yxr_ml = []ytab{ 718 {Zr_m_xm, 1, argList{Yxr, Yml}}, 719 } 720 721 var ymr = []ytab{ 722 {Zm_r, 1, argList{Ymr, Ymr}}, 723 } 724 725 var ymr_ml = []ytab{ 726 {Zr_m_xm, 1, argList{Ymr, Yml}}, 727 } 728 729 var yxcmpi = []ytab{ 730 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, 731 } 732 733 var yxmov = []ytab{ 734 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 735 {Zr_m_xm, 1, argList{Yxr, Yxm}}, 736 } 737 738 var yxcvfl = []ytab{ 739 {Zm_r_xm, 1, argList{Yxm, Yrl}}, 740 } 741 742 var yxcvlf = []ytab{ 743 {Zm_r_xm, 1, argList{Yml, Yxr}}, 744 } 745 746 var yxcvfq = []ytab{ 747 {Zm_r_xm, 2, argList{Yxm, Yrl}}, 748 } 749 750 var yxcvqf = []ytab{ 751 {Zm_r_xm, 2, argList{Yml, Yxr}}, 752 } 753 754 var yps = []ytab{ 755 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 756 {Zibo_m_xm, 2, argList{Yi8, Ymr}}, 757 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 758 {Zibo_m_xm, 3, argList{Yi8, Yxr}}, 759 } 760 761 var yxrrl = []ytab{ 762 {Zm_r, 1, argList{Yxr, Yrl}}, 763 } 764 765 var ymrxr = []ytab{ 766 {Zm_r, 1, argList{Ymr, Yxr}}, 767 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 768 } 769 770 var ymshuf = []ytab{ 771 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, 772 } 773 774 var ymshufb = []ytab{ 775 {Zm2_r, 2, argList{Yxm, Yxr}}, 776 } 777 778 // It should never have more than 1 entry, 779 // because some optab entries you opcode secuences that 780 // are longer than 2 bytes (zoffset=2 here), 781 // ROUNDPD and ROUNDPS and recently added BLENDPD, 782 // to name a few. 783 var yxshuf = []ytab{ 784 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 785 } 786 787 var yextrw = []ytab{ 788 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, 789 {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, 790 } 791 792 var yextr = []ytab{ 793 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, 794 } 795 796 var yinsrw = []ytab{ 797 {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, 798 } 799 800 var yinsr = []ytab{ 801 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, 802 } 803 804 var ypsdq = []ytab{ 805 {Zibo_m, 2, argList{Yi8, Yxr}}, 806 } 807 808 var ymskb = []ytab{ 809 {Zm_r_xm, 2, argList{Yxr, Yrl}}, 810 {Zm_r_xm, 1, argList{Ymr, Yrl}}, 811 } 812 813 var ycrc32l = []ytab{ 814 {Zlitm_r, 0, argList{Yml, Yrl}}, 815 } 816 817 var ycrc32b = []ytab{ 818 {Zlitm_r, 0, argList{Ymb, Yrl}}, 819 } 820 821 var yprefetch = []ytab{ 822 {Zm_o, 2, argList{Ym}}, 823 } 824 825 var yaes = []ytab{ 826 {Zlitm_r, 2, argList{Yxm, Yxr}}, 827 } 828 829 var yxbegin = []ytab{ 830 {Zjmp, 1, argList{Ybr}}, 831 } 832 833 var yxabort = []ytab{ 834 {Zib_, 1, argList{Yu8}}, 835 } 836 837 var ylddqu = []ytab{ 838 {Zm_r, 1, argList{Ym, Yxr}}, 839 } 840 841 var ypalignr = []ytab{ 842 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 843 } 844 845 var ysha256rnds2 = []ytab{ 846 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, 847 } 848 849 var yblendvpd = []ytab{ 850 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, 851 } 852 853 var ymmxmm0f38 = []ytab{ 854 {Zlitm_r, 3, argList{Ymm, Ymr}}, 855 {Zlitm_r, 5, argList{Yxm, Yxr}}, 856 } 857 858 var yextractps = []ytab{ 859 {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, 860 } 861 862 var ysha1rnds4 = []ytab{ 863 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, 864 } 865 866 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say, 867 // ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab 868 // to find the entry with the given p.As and then looks through the ytable for 869 // that instruction (the second field in the optab struct) for a line whose 870 // first two values match the Ytypes of the p.From and p.To operands. The 871 // function oclass computes the specific Ytype of an operand and then the set 872 // of more general Ytypes that it satisfies is implied by the ycover table, set 873 // up in instinit. For example, oclass distinguishes the constants 0 and 1 874 // from the more general 8-bit constants, but instinit says 875 // 876 // ycover[Yi0*Ymax+Ys32] = 1 877 // ycover[Yi1*Ymax+Ys32] = 1 878 // ycover[Yi8*Ymax+Ys32] = 1 879 // 880 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) 881 // if that's what an instruction can handle. 882 // 883 // In parallel with the scan through the ytable for the appropriate line, there 884 // is a z pointer that starts out pointing at the strange magic byte list in 885 // the Optab struct. With each step past a non-matching ytable line, z 886 // advances by the 4th entry in the line. When a matching line is found, that 887 // z pointer has the extra data to use in laying down the instruction bytes. 888 // The actual bytes laid down are a function of the 3rd entry in the line (that 889 // is, the Ztype) and the z bytes. 890 // 891 // For example, let's look at AADDL. The optab line says: 892 // {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 893 // 894 // and yaddl says 895 // var yaddl = []ytab{ 896 // {Yi8, Ynone, Yml, Zibo_m, 2}, 897 // {Yi32, Ynone, Yax, Zil_, 1}, 898 // {Yi32, Ynone, Yml, Zilo_m, 2}, 899 // {Yrl, Ynone, Yml, Zr_m, 1}, 900 // {Yml, Ynone, Yrl, Zm_r, 1}, 901 // } 902 // 903 // so there are 5 possible types of ADDL instruction that can be laid down, and 904 // possible states used to lay them down (Ztype and z pointer, assuming z 905 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: 906 // 907 // Yi8, Yml -> Zibo_m, z (0x83, 00) 908 // Yi32, Yax -> Zil_, z+2 (0x05) 909 // Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) 910 // Yrl, Yml -> Zr_m, z+2+1+2 (0x01) 911 // Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) 912 // 913 // The Pconstant in the optab line controls the prefix bytes to emit. That's 914 // relatively straightforward as this program goes. 915 // 916 // The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for 917 // example, is an opcode byte (z[0]) then an asmando (which is some kind of 918 // encoded addressing mode for the Yml arg), and then a single immediate byte. 919 // Zilo_m is the same but a long (32-bit) immediate. 920 var optab = 921 // as, ytab, andproto, opcode 922 [...]Optab{ 923 {obj.AXXX, nil, 0, opBytes{}}, 924 {AAAA, ynone, P32, opBytes{0x37}}, 925 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, 926 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, 927 {AAAS, ynone, P32, opBytes{0x3f}}, 928 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, 929 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 930 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 931 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 932 {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, 933 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, 934 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, 935 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 936 {AADDPD, yxm, Pq, opBytes{0x58}}, 937 {AADDPS, yxm, Pm, opBytes{0x58}}, 938 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 939 {AADDSD, yxm, Pf2, opBytes{0x58}}, 940 {AADDSS, yxm, Pf3, opBytes{0x58}}, 941 {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, 942 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, 943 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 944 {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, 945 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, 946 {AADJSP, nil, 0, opBytes{}}, 947 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, 948 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 949 {AANDNPD, yxm, Pq, opBytes{0x55}}, 950 {AANDNPS, yxm, Pm, opBytes{0x55}}, 951 {AANDPD, yxm, Pq, opBytes{0x54}}, 952 {AANDPS, yxm, Pm, opBytes{0x54}}, 953 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 954 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 955 {AARPL, yrl_ml, P32, opBytes{0x63}}, 956 {ABOUNDL, yrl_m, P32, opBytes{0x62}}, 957 {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, 958 {ABSFL, yml_rl, Pm, opBytes{0xbc}}, 959 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, 960 {ABSFW, yml_rl, Pq, opBytes{0xbc}}, 961 {ABSRL, yml_rl, Pm, opBytes{0xbd}}, 962 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, 963 {ABSRW, yml_rl, Pq, opBytes{0xbd}}, 964 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, 965 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, 966 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, 967 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, 968 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, 969 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, 970 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, 971 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, 972 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, 973 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, 974 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, 975 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, 976 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, 977 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, 978 {ABYTE, ybyte, Px, opBytes{1}}, 979 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, 980 {ACBW, ynone, Pe, opBytes{0x98}}, 981 {ACDQ, ynone, Px, opBytes{0x99}}, 982 {ACDQE, ynone, Pw, opBytes{0x98}}, 983 {ACLAC, ynone, Pm, opBytes{01, 0xca}}, 984 {ACLC, ynone, Px, opBytes{0xf8}}, 985 {ACLD, ynone, Px, opBytes{0xfc}}, 986 {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}}, 987 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, 988 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, 989 {ACLI, ynone, Px, opBytes{0xfa}}, 990 {ACLTS, ynone, Pm, opBytes{0x06}}, 991 {ACLWB, yclflush, Pq, opBytes{0xae, 06}}, 992 {ACMC, ynone, Px, opBytes{0xf5}}, 993 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, 994 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, 995 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, 996 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, 997 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, 998 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, 999 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, 1000 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, 1001 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, 1002 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, 1003 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, 1004 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, 1005 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, 1006 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, 1007 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, 1008 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, 1009 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, 1010 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, 1011 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, 1012 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, 1013 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, 1014 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, 1015 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, 1016 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, 1017 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, 1018 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, 1019 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, 1020 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, 1021 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, 1022 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, 1023 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, 1024 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, 1025 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, 1026 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, 1027 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, 1028 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, 1029 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, 1030 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, 1031 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, 1032 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, 1033 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, 1034 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, 1035 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, 1036 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, 1037 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, 1038 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, 1039 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, 1040 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, 1041 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, 1042 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1043 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, 1044 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, 1045 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1046 {ACMPSB, ynone, Pb, opBytes{0xa6}}, 1047 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, 1048 {ACMPSL, ynone, Px, opBytes{0xa7}}, 1049 {ACMPSQ, ynone, Pw, opBytes{0xa7}}, 1050 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, 1051 {ACMPSW, ynone, Pe, opBytes{0xa7}}, 1052 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1053 {ACOMISD, yxm, Pe, opBytes{0x2f}}, 1054 {ACOMISS, yxm, Pm, opBytes{0x2f}}, 1055 {ACPUID, ynone, Pm, opBytes{0xa2}}, 1056 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, 1057 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, 1058 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, 1059 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, 1060 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, 1061 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, 1062 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, 1063 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, 1064 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, 1065 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, 1066 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, 1067 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, 1068 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, 1069 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, 1070 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, 1071 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, 1072 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, 1073 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, 1074 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, 1075 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, 1076 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, 1077 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, 1078 {ACWD, ynone, Pe, opBytes{0x99}}, 1079 {ACWDE, ynone, Px, opBytes{0x98}}, 1080 {ACQO, ynone, Pw, opBytes{0x99}}, 1081 {ADAA, ynone, P32, opBytes{0x27}}, 1082 {ADAS, ynone, P32, opBytes{0x2f}}, 1083 {ADECB, yscond, Pb, opBytes{0xfe, 01}}, 1084 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, 1085 {ADECQ, yincq, Pw, opBytes{0xff, 01}}, 1086 {ADECW, yincq, Pe, opBytes{0xff, 01}}, 1087 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, 1088 {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, 1089 {ADIVPD, yxm, Pe, opBytes{0x5e}}, 1090 {ADIVPS, yxm, Pm, opBytes{0x5e}}, 1091 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, 1092 {ADIVSD, yxm, Pf2, opBytes{0x5e}}, 1093 {ADIVSS, yxm, Pf3, opBytes{0x5e}}, 1094 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, 1095 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, 1096 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, 1097 {AEMMS, ynone, Pm, opBytes{0x77}}, 1098 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, 1099 {AENTER, nil, 0, opBytes{}}, // botch 1100 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, 1101 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, 1102 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, 1103 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, 1104 {AHLT, ynone, Px, opBytes{0xf4}}, 1105 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, 1106 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, 1107 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, 1108 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, 1109 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, 1110 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1111 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1112 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1113 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, 1114 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, 1115 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, 1116 {AINB, yin, Pb, opBytes{0xe4, 0xec}}, 1117 {AINW, yin, Pe, opBytes{0xe5, 0xed}}, 1118 {AINL, yin, Px, opBytes{0xe5, 0xed}}, 1119 {AINCB, yscond, Pb, opBytes{0xfe, 00}}, 1120 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, 1121 {AINCQ, yincq, Pw, opBytes{0xff, 00}}, 1122 {AINCW, yincq, Pe, opBytes{0xff, 00}}, 1123 {AINSB, ynone, Pb, opBytes{0x6c}}, 1124 {AINSL, ynone, Px, opBytes{0x6d}}, 1125 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, 1126 {AINSW, ynone, Pe, opBytes{0x6d}}, 1127 {AICEBP, ynone, Px, opBytes{0xf1}}, 1128 {AINT, yint, Px, opBytes{0xcd}}, 1129 {AINTO, ynone, P32, opBytes{0xce}}, 1130 {AIRETL, ynone, Px, opBytes{0xcf}}, 1131 {AIRETQ, ynone, Pw, opBytes{0xcf}}, 1132 {AIRETW, ynone, Pe, opBytes{0xcf}}, 1133 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, 1134 {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, 1135 {AJCXZL, yloop, Px, opBytes{0xe3}}, 1136 {AJCXZW, yloop, Px, opBytes{0xe3}}, 1137 {AJCXZQ, yloop, Px, opBytes{0xe3}}, 1138 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, 1139 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, 1140 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, 1141 {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, 1142 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, 1143 {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, 1144 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, 1145 {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, 1146 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, 1147 {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, 1148 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, 1149 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, 1150 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, 1151 {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, 1152 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, 1153 {AHADDPD, yxm, Pq, opBytes{0x7c}}, 1154 {AHADDPS, yxm, Pf2, opBytes{0x7c}}, 1155 {AHSUBPD, yxm, Pq, opBytes{0x7d}}, 1156 {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, 1157 {ALAHF, ynone, Px, opBytes{0x9f}}, 1158 {ALARL, yml_rl, Pm, opBytes{0x02}}, 1159 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, 1160 {ALARW, yml_rl, Pq, opBytes{0x02}}, 1161 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, 1162 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, 1163 {ALEAL, ym_rl, Px, opBytes{0x8d}}, 1164 {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, 1165 {ALEAVEL, ynone, P32, opBytes{0xc9}}, 1166 {ALEAVEQ, ynone, Py, opBytes{0xc9}}, 1167 {ALEAVEW, ynone, Pe, opBytes{0xc9}}, 1168 {ALEAW, ym_rl, Pe, opBytes{0x8d}}, 1169 {ALOCK, ynone, Px, opBytes{0xf0}}, 1170 {ALODSB, ynone, Pb, opBytes{0xac}}, 1171 {ALODSL, ynone, Px, opBytes{0xad}}, 1172 {ALODSQ, ynone, Pw, opBytes{0xad}}, 1173 {ALODSW, ynone, Pe, opBytes{0xad}}, 1174 {ALONG, ybyte, Px, opBytes{4}}, 1175 {ALOOP, yloop, Px, opBytes{0xe2}}, 1176 {ALOOPEQ, yloop, Px, opBytes{0xe1}}, 1177 {ALOOPNE, yloop, Px, opBytes{0xe0}}, 1178 {ALTR, ydivl, Pm, opBytes{0x00, 03}}, 1179 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, 1180 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, 1181 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, 1182 {ALSLL, yml_rl, Pm, opBytes{0x03}}, 1183 {ALSLW, yml_rl, Pq, opBytes{0x03}}, 1184 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, 1185 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, 1186 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, 1187 {AMAXPD, yxm, Pe, opBytes{0x5f}}, 1188 {AMAXPS, yxm, Pm, opBytes{0x5f}}, 1189 {AMAXSD, yxm, Pf2, opBytes{0x5f}}, 1190 {AMAXSS, yxm, Pf3, opBytes{0x5f}}, 1191 {AMINPD, yxm, Pe, opBytes{0x5d}}, 1192 {AMINPS, yxm, Pm, opBytes{0x5d}}, 1193 {AMINSD, yxm, Pf2, opBytes{0x5d}}, 1194 {AMINSS, yxm, Pf3, opBytes{0x5d}}, 1195 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, 1196 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, 1197 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, 1198 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, 1199 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, 1200 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, 1201 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, 1202 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, 1203 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, 1204 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, 1205 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, 1206 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, 1207 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, 1208 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, 1209 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, 1210 {AMOVHLPS, yxr, Pm, opBytes{0x12}}, 1211 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, 1212 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, 1213 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1214 {AMOVLHPS, yxr, Pm, opBytes{0x16}}, 1215 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, 1216 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, 1217 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, 1218 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, 1219 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, 1220 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, 1221 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, 1222 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, 1223 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, 1224 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, 1225 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, 1226 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1227 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, 1228 {AMOVSB, ynone, Pb, opBytes{0xa4}}, 1229 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, 1230 {AMOVSL, ynone, Px, opBytes{0xa5}}, 1231 {AMOVSQ, ynone, Pw, opBytes{0xa5}}, 1232 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, 1233 {AMOVSW, ynone, Pe, opBytes{0xa5}}, 1234 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, 1235 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, 1236 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, 1237 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, 1238 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, 1239 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, 1240 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, 1241 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, 1242 {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, 1243 {AMULL, ydivl, Px, opBytes{0xf7, 04}}, 1244 {AMULPD, yxm, Pe, opBytes{0x59}}, 1245 {AMULPS, yxm, Ym, opBytes{0x59}}, 1246 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, 1247 {AMULSD, yxm, Pf2, opBytes{0x59}}, 1248 {AMULSS, yxm, Pf3, opBytes{0x59}}, 1249 {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, 1250 {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, 1251 {ANEGL, yscond, Px, opBytes{0xf7, 03}}, 1252 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, 1253 {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, 1254 {obj.ANOP, ynop, Px, opBytes{0, 0}}, 1255 {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, 1256 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. 1257 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, 1258 {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, 1259 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, 1260 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1261 {AORPD, yxm, Pq, opBytes{0x56}}, 1262 {AORPS, yxm, Pm, opBytes{0x56}}, 1263 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1264 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1265 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, 1266 {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, 1267 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, 1268 {AOUTSB, ynone, Pb, opBytes{0x6e}}, 1269 {AOUTSL, ynone, Px, opBytes{0x6f}}, 1270 {AOUTSW, ynone, Pe, opBytes{0x6f}}, 1271 {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, 1272 {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, 1273 {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, 1274 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, 1275 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, 1276 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, 1277 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, 1278 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, 1279 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, 1280 {APADDQ, yxm, Pe, opBytes{0xd4}}, 1281 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, 1282 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, 1283 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, 1284 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, 1285 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, 1286 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, 1287 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, 1288 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, 1289 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, 1290 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, 1291 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, 1292 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, 1293 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, 1294 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, 1295 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, 1296 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, 1297 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, 1298 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, 1299 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, 1300 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, 1301 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, 1302 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, 1303 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, 1304 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, 1305 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, 1306 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, 1307 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, 1308 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, 1309 {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, 1310 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, 1311 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, 1312 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, 1313 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, 1314 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, 1315 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, 1316 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, 1317 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, 1318 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, 1319 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, 1320 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, 1321 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, 1322 {APMAXSW, yxm, Pe, opBytes{0xee}}, 1323 {APMAXUB, yxm, Pe, opBytes{0xde}}, 1324 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, 1325 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, 1326 {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, 1327 {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, 1328 {APMINSW, yxm, Pe, opBytes{0xea}}, 1329 {APMINUB, yxm, Pe, opBytes{0xda}}, 1330 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, 1331 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, 1332 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, 1333 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, 1334 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, 1335 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, 1336 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, 1337 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, 1338 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, 1339 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, 1340 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, 1341 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, 1342 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, 1343 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, 1344 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, 1345 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, 1346 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, 1347 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, 1348 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, 1349 {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, 1350 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, 1351 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, 1352 {APOPAL, ynone, P32, opBytes{0x61}}, 1353 {APOPAW, ynone, Pe, opBytes{0x61}}, 1354 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, 1355 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, 1356 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, 1357 {APOPFL, ynone, P32, opBytes{0x9d}}, 1358 {APOPFQ, ynone, Py, opBytes{0x9d}}, 1359 {APOPFW, ynone, Pe, opBytes{0x9d}}, 1360 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, 1361 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, 1362 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, 1363 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, 1364 {APSADBW, yxm, Pq, opBytes{0xf6}}, 1365 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, 1366 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, 1367 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, 1368 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, 1369 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, 1370 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, 1371 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, 1372 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, 1373 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, 1374 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, 1375 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, 1376 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, 1377 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, 1378 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, 1379 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, 1380 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, 1381 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, 1382 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, 1383 {APSUBB, yxm, Pe, opBytes{0xf8}}, 1384 {APSUBL, yxm, Pe, opBytes{0xfa}}, 1385 {APSUBQ, yxm, Pe, opBytes{0xfb}}, 1386 {APSUBSB, yxm, Pe, opBytes{0xe8}}, 1387 {APSUBSW, yxm, Pe, opBytes{0xe9}}, 1388 {APSUBUSB, yxm, Pe, opBytes{0xd8}}, 1389 {APSUBUSW, yxm, Pe, opBytes{0xd9}}, 1390 {APSUBW, yxm, Pe, opBytes{0xf9}}, 1391 {APTEST, yxm_q4, Pq4, opBytes{0x17}}, 1392 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, 1393 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, 1394 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, 1395 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, 1396 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, 1397 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, 1398 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, 1399 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, 1400 {APUSHAL, ynone, P32, opBytes{0x60}}, 1401 {APUSHAW, ynone, Pe, opBytes{0x60}}, 1402 {APUSHFL, ynone, P32, opBytes{0x9c}}, 1403 {APUSHFQ, ynone, Py, opBytes{0x9c}}, 1404 {APUSHFW, ynone, Pe, opBytes{0x9c}}, 1405 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1406 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1407 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1408 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, 1409 {AQUAD, ybyte, Px, opBytes{8}}, 1410 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, 1411 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1412 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1413 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1414 {ARCPPS, yxm, Pm, opBytes{0x53}}, 1415 {ARCPSS, yxm, Pf3, opBytes{0x53}}, 1416 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, 1417 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1418 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1419 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1420 {AREP, ynone, Px, opBytes{0xf3}}, 1421 {AREPN, ynone, Px, opBytes{0xf2}}, 1422 {obj.ARET, ynone, Px, opBytes{0xc3}}, 1423 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, 1424 {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, 1425 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, 1426 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, 1427 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1428 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1429 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1430 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, 1431 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1432 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1433 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1434 {ARSQRTPS, yxm, Pm, opBytes{0x52}}, 1435 {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, 1436 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL 1437 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1438 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1439 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1440 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1441 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, 1442 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1443 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1444 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1445 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, 1446 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1447 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1448 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1449 {ASCASB, ynone, Pb, opBytes{0xae}}, 1450 {ASCASL, ynone, Px, opBytes{0xaf}}, 1451 {ASCASQ, ynone, Pw, opBytes{0xaf}}, 1452 {ASCASW, ynone, Pe, opBytes{0xaf}}, 1453 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, 1454 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, 1455 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, 1456 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, 1457 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, 1458 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, 1459 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, 1460 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, 1461 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, 1462 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, 1463 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, 1464 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, 1465 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, 1466 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, 1467 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, 1468 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, 1469 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1470 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1471 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1472 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1473 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, 1474 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1475 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1476 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1477 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, 1478 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, 1479 {ASQRTPD, yxm, Pe, opBytes{0x51}}, 1480 {ASQRTPS, yxm, Pm, opBytes{0x51}}, 1481 {ASQRTSD, yxm, Pf2, opBytes{0x51}}, 1482 {ASQRTSS, yxm, Pf3, opBytes{0x51}}, 1483 {ASTC, ynone, Px, opBytes{0xf9}}, 1484 {ASTD, ynone, Px, opBytes{0xfd}}, 1485 {ASTI, ynone, Px, opBytes{0xfb}}, 1486 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, 1487 {ASTOSB, ynone, Pb, opBytes{0xaa}}, 1488 {ASTOSL, ynone, Px, opBytes{0xab}}, 1489 {ASTOSQ, ynone, Pw, opBytes{0xab}}, 1490 {ASTOSW, ynone, Pe, opBytes{0xab}}, 1491 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, 1492 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1493 {ASUBPD, yxm, Pe, opBytes{0x5c}}, 1494 {ASUBPS, yxm, Pm, opBytes{0x5c}}, 1495 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1496 {ASUBSD, yxm, Pf2, opBytes{0x5c}}, 1497 {ASUBSS, yxm, Pf3, opBytes{0x5c}}, 1498 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1499 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, 1500 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall 1501 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, 1502 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1503 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1504 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1505 {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}}, 1506 {obj.ATEXT, ytext, Px, opBytes{}}, 1507 {AUCOMISD, yxm, Pe, opBytes{0x2e}}, 1508 {AUCOMISS, yxm, Pm, opBytes{0x2e}}, 1509 {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, 1510 {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, 1511 {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, 1512 {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, 1513 {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}}, 1514 {AVERR, ydivl, Pm, opBytes{0x00, 04}}, 1515 {AVERW, ydivl, Pm, opBytes{0x00, 05}}, 1516 {AWAIT, ynone, Px, opBytes{0x9b}}, 1517 {AWORD, ybyte, Px, opBytes{2}}, 1518 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, 1519 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, 1520 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, 1521 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, 1522 {AXLAT, ynone, Px, opBytes{0xd7}}, 1523 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, 1524 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1525 {AXORPD, yxm, Pe, opBytes{0x57}}, 1526 {AXORPS, yxm, Pm, opBytes{0x57}}, 1527 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1528 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1529 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, 1530 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, 1531 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, 1532 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, 1533 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, 1534 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, 1535 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, 1536 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, 1537 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, 1538 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, 1539 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, 1540 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, 1541 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, 1542 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, 1543 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, 1544 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, 1545 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, 1546 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, 1547 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, 1548 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, 1549 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, 1550 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, 1551 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, 1552 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, 1553 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, 1554 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, 1555 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, 1556 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, 1557 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch 1558 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch 1559 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, 1560 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, 1561 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, 1562 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, 1563 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, 1564 {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, 1565 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, 1566 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, 1567 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, 1568 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, 1569 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, 1570 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, 1571 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, 1572 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, 1573 {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, 1574 {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, 1575 {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, 1576 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, 1577 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, 1578 {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, 1579 {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, 1580 {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, 1581 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, 1582 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, 1583 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, 1584 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, 1585 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, 1586 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, 1587 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, 1588 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, 1589 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, 1590 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, 1591 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, 1592 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, 1593 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, 1594 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, 1595 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, 1596 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, 1597 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, 1598 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, 1599 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, 1600 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, 1601 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, 1602 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, 1603 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, 1604 {AFFREE, nil, 0, opBytes{}}, 1605 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, 1606 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, 1607 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, 1608 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, 1609 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, 1610 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, 1611 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, 1612 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, 1613 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, 1614 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, 1615 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, 1616 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, 1617 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, 1618 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, 1619 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, 1620 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, 1621 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, 1622 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, 1623 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, 1624 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, 1625 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, 1626 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, 1627 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, 1628 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, 1629 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, 1630 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, 1631 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, 1632 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, 1633 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, 1634 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, 1635 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, 1636 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, 1637 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, 1638 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, 1639 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, 1640 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, 1641 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, 1642 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, 1643 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, 1644 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, 1645 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, 1646 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, 1647 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, 1648 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, 1649 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, 1650 {AINVD, ynone, Pm, opBytes{0x08}}, 1651 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, 1652 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, 1653 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, 1654 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, 1655 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, 1656 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, 1657 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, 1658 {ARDMSR, ynone, Pm, opBytes{0x32}}, 1659 {ARDPMC, ynone, Pm, opBytes{0x33}}, 1660 {ARDTSC, ynone, Pm, opBytes{0x31}}, 1661 {ARSM, ynone, Pm, opBytes{0xaa}}, 1662 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, 1663 {ASYSRET, ynone, Pm, opBytes{0x07}}, 1664 {AWBINVD, ynone, Pm, opBytes{0x09}}, 1665 {AWRMSR, ynone, Pm, opBytes{0x30}}, 1666 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, 1667 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, 1668 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, 1669 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, 1670 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, 1671 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, 1672 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1673 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1674 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1675 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, 1676 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, 1677 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, 1678 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, 1679 {AMOVQL, yrl_ml, Px, opBytes{0x89}}, 1680 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, 1681 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, 1682 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, 1683 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, 1684 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, 1685 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, 1686 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, 1687 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, 1688 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, 1689 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, 1690 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, 1691 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, 1692 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, 1693 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, 1694 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, 1695 {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, 1696 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, 1697 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, 1698 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, 1699 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, 1700 {AUD1, ynone, Pm, opBytes{0xb9, 0}}, 1701 {AUD2, ynone, Pm, opBytes{0x0b, 0}}, 1702 {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}}, 1703 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, 1704 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, 1705 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, 1706 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, 1707 {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, 1708 {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, 1709 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, 1710 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, 1711 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1712 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1713 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1714 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, 1715 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, 1716 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, 1717 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, 1718 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, 1719 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, 1720 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, 1721 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, 1722 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, 1723 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, 1724 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, 1725 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, 1726 {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, 1727 {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, 1728 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, 1729 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, 1730 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, 1731 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, 1732 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, 1733 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, 1734 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, 1735 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, 1736 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, 1737 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, 1738 {AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1739 {AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1740 {AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, 1741 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, 1742 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, 1743 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, 1744 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, 1745 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, 1746 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, 1747 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, 1748 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, 1749 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, 1750 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, 1751 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, 1752 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, 1753 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, 1754 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, 1755 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, 1756 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, 1757 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, 1758 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, 1759 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, 1760 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, 1761 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, 1762 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, 1763 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, 1764 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, 1765 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, 1766 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, 1767 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, 1768 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, 1769 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, 1770 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, 1771 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, 1772 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, 1773 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, 1774 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, 1775 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, 1776 1777 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, 1778 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, 1779 {AXACQUIRE, ynone, Px, opBytes{0xf2}}, 1780 {AXRELEASE, ynone, Px, opBytes{0xf3}}, 1781 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, 1782 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, 1783 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, 1784 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, 1785 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, 1786 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, 1787 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, 1788 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, 1789 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, 1790 1791 {obj.AEND, nil, 0, opBytes{}}, 1792 {0, nil, 0, opBytes{}}, 1793 } 1794 1795 var opindex [(ALAST + 1) & obj.AMask]*Optab 1796 1797 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing. 1798 // This happens on systems like Solaris that call .so functions instead of system calls. 1799 // It does not seem to be necessary for any other systems. This is probably working 1800 // around a Solaris-specific bug that should be fixed differently, but we don't know 1801 // what that bug is. And this does fix it. 1802 func useAbs(ctxt *obj.Link, s *obj.LSym) bool { 1803 if ctxt.Headtype == objabi.Hsolaris { 1804 // All the Solaris dynamic imports from libc.so begin with "libc_". 1805 return strings.HasPrefix(s.Name, "libc_") 1806 } 1807 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared 1808 } 1809 1810 // single-instruction no-ops of various lengths. 1811 // constructed by hand and disassembled with gdb to verify. 1812 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. 1813 var nop = [][16]uint8{ 1814 {0x90}, 1815 {0x66, 0x90}, 1816 {0x0F, 0x1F, 0x00}, 1817 {0x0F, 0x1F, 0x40, 0x00}, 1818 {0x0F, 0x1F, 0x44, 0x00, 0x00}, 1819 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, 1820 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, 1821 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1822 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1823 } 1824 1825 // Native Client rejects the repeated 0x66 prefix. 1826 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1827 func fillnop(p []byte, n int) { 1828 var m int 1829 1830 for n > 0 { 1831 m = n 1832 if m > len(nop) { 1833 m = len(nop) 1834 } 1835 copy(p[:m], nop[m-1][:m]) 1836 p = p[m:] 1837 n -= m 1838 } 1839 } 1840 1841 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { 1842 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { 1843 return l 1844 } 1845 return q 1846 } 1847 1848 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { 1849 if s.P != nil { 1850 return 1851 } 1852 1853 if ycover[0] == 0 { 1854 ctxt.Diag("x86 tables not initialized, call x86.instinit first") 1855 } 1856 1857 for p := s.Func.Text; p != nil; p = p.Link { 1858 if p.To.Type == obj.TYPE_BRANCH && p.Pcond == nil { 1859 p.Pcond = p 1860 } 1861 if p.As == AADJSP { 1862 p.To.Type = obj.TYPE_REG 1863 p.To.Reg = REG_SP 1864 // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive. 1865 // One exception: It is smaller to encode $-0x80 than $0x80. 1866 // For that case, flip the sign and the op: 1867 // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'. 1868 switch v := p.From.Offset; { 1869 case v == 0: 1870 p.As = obj.ANOP 1871 case v == 0x80 || (v < 0 && v != -0x80): 1872 p.As = spadjop(ctxt, AADDL, AADDQ) 1873 p.From.Offset *= -1 1874 default: 1875 p.As = spadjop(ctxt, ASUBL, ASUBQ) 1876 } 1877 } 1878 } 1879 1880 var count int64 // rough count of number of instructions 1881 for p := s.Func.Text; p != nil; p = p.Link { 1882 count++ 1883 p.Back = branchShort // use short branches first time through 1884 if q := p.Pcond; q != nil && (q.Back&branchShort != 0) { 1885 p.Back |= branchBackwards 1886 q.Back |= branchLoopHead 1887 } 1888 } 1889 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction 1890 1891 var ab AsmBuf 1892 var n int 1893 var c int32 1894 errors := ctxt.Errors 1895 for { 1896 // This loop continues while there are reasons to re-assemble 1897 // whole block, like the presence of long forward jumps. 1898 reAssemble := false 1899 for i := range s.R { 1900 s.R[i] = obj.Reloc{} 1901 } 1902 s.R = s.R[:0] 1903 s.P = s.P[:0] 1904 c = 0 1905 for p := s.Func.Text; p != nil; p = p.Link { 1906 1907 if (p.Back&branchLoopHead != 0) && c&(loopAlign-1) != 0 { 1908 // pad with NOPs 1909 v := -c & (loopAlign - 1) 1910 1911 if v <= maxLoopPad { 1912 s.Grow(int64(c) + int64(v)) 1913 fillnop(s.P[c:], int(v)) 1914 c += v 1915 } 1916 } 1917 1918 p.Pc = int64(c) 1919 1920 // process forward jumps to p 1921 for q := p.Rel; q != nil; q = q.Forwd { 1922 v := int32(p.Pc - (q.Pc + int64(q.Isize))) 1923 if q.Back&branchShort != 0 { 1924 if v > 127 { 1925 reAssemble = true 1926 q.Back ^= branchShort 1927 } 1928 1929 if q.As == AJCXZL || q.As == AXBEGIN { 1930 s.P[q.Pc+2] = byte(v) 1931 } else { 1932 s.P[q.Pc+1] = byte(v) 1933 } 1934 } else { 1935 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) 1936 } 1937 } 1938 1939 p.Rel = nil 1940 1941 p.Pc = int64(c) 1942 ab.asmins(ctxt, s, p) 1943 m := ab.Len() 1944 if int(p.Isize) != m { 1945 p.Isize = uint8(m) 1946 } 1947 1948 s.Grow(p.Pc + int64(m)) 1949 copy(s.P[p.Pc:], ab.Bytes()) 1950 c += int32(m) 1951 } 1952 1953 n++ 1954 if n > 20 { 1955 ctxt.Diag("span must be looping") 1956 log.Fatalf("loop") 1957 } 1958 if !reAssemble { 1959 break 1960 } 1961 if ctxt.Errors > errors { 1962 return 1963 } 1964 } 1965 1966 s.Size = int64(c) 1967 1968 if false { /* debug['a'] > 1 */ 1969 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) 1970 var i int 1971 for i = 0; i < len(s.P); i++ { 1972 fmt.Printf(" %.2x", s.P[i]) 1973 if i%16 == 15 { 1974 fmt.Printf("\n %.6x", uint(i+1)) 1975 } 1976 } 1977 1978 if i%16 != 0 { 1979 fmt.Printf("\n") 1980 } 1981 1982 for i := 0; i < len(s.R); i++ { 1983 r := &s.R[i] 1984 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) 1985 } 1986 } 1987 1988 // Mark nonpreemptible instruction sequences. 1989 // The 2-instruction TLS access sequence 1990 // MOVQ TLS, BX 1991 // MOVQ 0(BX)(TLS*1), BX 1992 // is not async preemptible, as if it is preempted and resumed on 1993 // a different thread, the TLS address may become invalid. 1994 if !CanUse1InsnTLS(ctxt) { 1995 useTLS := func(p *obj.Prog) bool { 1996 // Only need to mark the second instruction, which has 1997 // REG_TLS as Index. (It is okay to interrupt and restart 1998 // the first instruction.) 1999 return p.From.Index == REG_TLS 2000 } 2001 obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS) 2002 } 2003 } 2004 2005 func instinit(ctxt *obj.Link) { 2006 if ycover[0] != 0 { 2007 // Already initialized; stop now. 2008 // This happens in the cmd/asm tests, 2009 // each of which re-initializes the arch. 2010 return 2011 } 2012 2013 switch ctxt.Headtype { 2014 case objabi.Hplan9: 2015 plan9privates = ctxt.Lookup("_privates") 2016 } 2017 2018 for i := range avxOptab { 2019 c := avxOptab[i].as 2020 if opindex[c&obj.AMask] != nil { 2021 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) 2022 } 2023 opindex[c&obj.AMask] = &avxOptab[i] 2024 } 2025 for i := 1; optab[i].as != 0; i++ { 2026 c := optab[i].as 2027 if opindex[c&obj.AMask] != nil { 2028 ctxt.Diag("phase error in optab: %d (%v)", i, c) 2029 } 2030 opindex[c&obj.AMask] = &optab[i] 2031 } 2032 2033 for i := 0; i < Ymax; i++ { 2034 ycover[i*Ymax+i] = 1 2035 } 2036 2037 ycover[Yi0*Ymax+Yu2] = 1 2038 ycover[Yi1*Ymax+Yu2] = 1 2039 2040 ycover[Yi0*Ymax+Yi8] = 1 2041 ycover[Yi1*Ymax+Yi8] = 1 2042 ycover[Yu2*Ymax+Yi8] = 1 2043 ycover[Yu7*Ymax+Yi8] = 1 2044 2045 ycover[Yi0*Ymax+Yu7] = 1 2046 ycover[Yi1*Ymax+Yu7] = 1 2047 ycover[Yu2*Ymax+Yu7] = 1 2048 2049 ycover[Yi0*Ymax+Yu8] = 1 2050 ycover[Yi1*Ymax+Yu8] = 1 2051 ycover[Yu2*Ymax+Yu8] = 1 2052 ycover[Yu7*Ymax+Yu8] = 1 2053 2054 ycover[Yi0*Ymax+Ys32] = 1 2055 ycover[Yi1*Ymax+Ys32] = 1 2056 ycover[Yu2*Ymax+Ys32] = 1 2057 ycover[Yu7*Ymax+Ys32] = 1 2058 ycover[Yu8*Ymax+Ys32] = 1 2059 ycover[Yi8*Ymax+Ys32] = 1 2060 2061 ycover[Yi0*Ymax+Yi32] = 1 2062 ycover[Yi1*Ymax+Yi32] = 1 2063 ycover[Yu2*Ymax+Yi32] = 1 2064 ycover[Yu7*Ymax+Yi32] = 1 2065 ycover[Yu8*Ymax+Yi32] = 1 2066 ycover[Yi8*Ymax+Yi32] = 1 2067 ycover[Ys32*Ymax+Yi32] = 1 2068 2069 ycover[Yi0*Ymax+Yi64] = 1 2070 ycover[Yi1*Ymax+Yi64] = 1 2071 ycover[Yu7*Ymax+Yi64] = 1 2072 ycover[Yu2*Ymax+Yi64] = 1 2073 ycover[Yu8*Ymax+Yi64] = 1 2074 ycover[Yi8*Ymax+Yi64] = 1 2075 ycover[Ys32*Ymax+Yi64] = 1 2076 ycover[Yi32*Ymax+Yi64] = 1 2077 2078 ycover[Yal*Ymax+Yrb] = 1 2079 ycover[Ycl*Ymax+Yrb] = 1 2080 ycover[Yax*Ymax+Yrb] = 1 2081 ycover[Ycx*Ymax+Yrb] = 1 2082 ycover[Yrx*Ymax+Yrb] = 1 2083 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 2084 2085 ycover[Ycl*Ymax+Ycx] = 1 2086 2087 ycover[Yax*Ymax+Yrx] = 1 2088 ycover[Ycx*Ymax+Yrx] = 1 2089 2090 ycover[Yax*Ymax+Yrl] = 1 2091 ycover[Ycx*Ymax+Yrl] = 1 2092 ycover[Yrx*Ymax+Yrl] = 1 2093 ycover[Yrl32*Ymax+Yrl] = 1 2094 2095 ycover[Yf0*Ymax+Yrf] = 1 2096 2097 ycover[Yal*Ymax+Ymb] = 1 2098 ycover[Ycl*Ymax+Ymb] = 1 2099 ycover[Yax*Ymax+Ymb] = 1 2100 ycover[Ycx*Ymax+Ymb] = 1 2101 ycover[Yrx*Ymax+Ymb] = 1 2102 ycover[Yrb*Ymax+Ymb] = 1 2103 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 2104 ycover[Ym*Ymax+Ymb] = 1 2105 2106 ycover[Yax*Ymax+Yml] = 1 2107 ycover[Ycx*Ymax+Yml] = 1 2108 ycover[Yrx*Ymax+Yml] = 1 2109 ycover[Yrl*Ymax+Yml] = 1 2110 ycover[Yrl32*Ymax+Yml] = 1 2111 ycover[Ym*Ymax+Yml] = 1 2112 2113 ycover[Yax*Ymax+Ymm] = 1 2114 ycover[Ycx*Ymax+Ymm] = 1 2115 ycover[Yrx*Ymax+Ymm] = 1 2116 ycover[Yrl*Ymax+Ymm] = 1 2117 ycover[Yrl32*Ymax+Ymm] = 1 2118 ycover[Ym*Ymax+Ymm] = 1 2119 ycover[Ymr*Ymax+Ymm] = 1 2120 2121 ycover[Yxr0*Ymax+Yxr] = 1 2122 2123 ycover[Ym*Ymax+Yxm] = 1 2124 ycover[Yxr0*Ymax+Yxm] = 1 2125 ycover[Yxr*Ymax+Yxm] = 1 2126 2127 ycover[Ym*Ymax+Yym] = 1 2128 ycover[Yyr*Ymax+Yym] = 1 2129 2130 ycover[Yxr0*Ymax+YxrEvex] = 1 2131 ycover[Yxr*Ymax+YxrEvex] = 1 2132 2133 ycover[Ym*Ymax+YxmEvex] = 1 2134 ycover[Yxr0*Ymax+YxmEvex] = 1 2135 ycover[Yxr*Ymax+YxmEvex] = 1 2136 ycover[YxrEvex*Ymax+YxmEvex] = 1 2137 2138 ycover[Yyr*Ymax+YyrEvex] = 1 2139 2140 ycover[Ym*Ymax+YymEvex] = 1 2141 ycover[Yyr*Ymax+YymEvex] = 1 2142 ycover[YyrEvex*Ymax+YymEvex] = 1 2143 2144 ycover[Ym*Ymax+Yzm] = 1 2145 ycover[Yzr*Ymax+Yzm] = 1 2146 2147 ycover[Yk0*Ymax+Yk] = 1 2148 ycover[Yknot0*Ymax+Yk] = 1 2149 2150 ycover[Yk0*Ymax+Ykm] = 1 2151 ycover[Yknot0*Ymax+Ykm] = 1 2152 ycover[Yk*Ymax+Ykm] = 1 2153 ycover[Ym*Ymax+Ykm] = 1 2154 2155 ycover[Yxvm*Ymax+YxvmEvex] = 1 2156 2157 ycover[Yyvm*Ymax+YyvmEvex] = 1 2158 2159 for i := 0; i < MAXREG; i++ { 2160 reg[i] = -1 2161 if i >= REG_AL && i <= REG_R15B { 2162 reg[i] = (i - REG_AL) & 7 2163 if i >= REG_SPB && i <= REG_DIB { 2164 regrex[i] = 0x40 2165 } 2166 if i >= REG_R8B && i <= REG_R15B { 2167 regrex[i] = Rxr | Rxx | Rxb 2168 } 2169 } 2170 2171 if i >= REG_AH && i <= REG_BH { 2172 reg[i] = 4 + ((i - REG_AH) & 7) 2173 } 2174 if i >= REG_AX && i <= REG_R15 { 2175 reg[i] = (i - REG_AX) & 7 2176 if i >= REG_R8 { 2177 regrex[i] = Rxr | Rxx | Rxb 2178 } 2179 } 2180 2181 if i >= REG_F0 && i <= REG_F0+7 { 2182 reg[i] = (i - REG_F0) & 7 2183 } 2184 if i >= REG_M0 && i <= REG_M0+7 { 2185 reg[i] = (i - REG_M0) & 7 2186 } 2187 if i >= REG_K0 && i <= REG_K0+7 { 2188 reg[i] = (i - REG_K0) & 7 2189 } 2190 if i >= REG_X0 && i <= REG_X0+15 { 2191 reg[i] = (i - REG_X0) & 7 2192 if i >= REG_X0+8 { 2193 regrex[i] = Rxr | Rxx | Rxb 2194 } 2195 } 2196 if i >= REG_X16 && i <= REG_X16+15 { 2197 reg[i] = (i - REG_X16) & 7 2198 if i >= REG_X16+8 { 2199 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2200 } else { 2201 regrex[i] = RxrEvex 2202 } 2203 } 2204 if i >= REG_Y0 && i <= REG_Y0+15 { 2205 reg[i] = (i - REG_Y0) & 7 2206 if i >= REG_Y0+8 { 2207 regrex[i] = Rxr | Rxx | Rxb 2208 } 2209 } 2210 if i >= REG_Y16 && i <= REG_Y16+15 { 2211 reg[i] = (i - REG_Y16) & 7 2212 if i >= REG_Y16+8 { 2213 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2214 } else { 2215 regrex[i] = RxrEvex 2216 } 2217 } 2218 if i >= REG_Z0 && i <= REG_Z0+15 { 2219 reg[i] = (i - REG_Z0) & 7 2220 if i > REG_Z0+7 { 2221 regrex[i] = Rxr | Rxx | Rxb 2222 } 2223 } 2224 if i >= REG_Z16 && i <= REG_Z16+15 { 2225 reg[i] = (i - REG_Z16) & 7 2226 if i >= REG_Z16+8 { 2227 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2228 } else { 2229 regrex[i] = RxrEvex 2230 } 2231 } 2232 2233 if i >= REG_CR+8 && i <= REG_CR+15 { 2234 regrex[i] = Rxr 2235 } 2236 } 2237 } 2238 2239 var isAndroid = objabi.GOOS == "android" 2240 2241 func prefixof(ctxt *obj.Link, a *obj.Addr) int { 2242 if a.Reg < REG_CS && a.Index < REG_CS { // fast path 2243 return 0 2244 } 2245 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 2246 switch a.Reg { 2247 case REG_CS: 2248 return 0x2e 2249 2250 case REG_DS: 2251 return 0x3e 2252 2253 case REG_ES: 2254 return 0x26 2255 2256 case REG_FS: 2257 return 0x64 2258 2259 case REG_GS: 2260 return 0x65 2261 2262 case REG_TLS: 2263 // NOTE: Systems listed here should be only systems that 2264 // support direct TLS references like 8(TLS) implemented as 2265 // direct references from FS or GS. Systems that require 2266 // the initial-exec model, where you load the TLS base into 2267 // a register and then index from that register, do not reach 2268 // this code and should not be listed. 2269 if ctxt.Arch.Family == sys.I386 { 2270 switch ctxt.Headtype { 2271 default: 2272 if isAndroid { 2273 return 0x65 // GS 2274 } 2275 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2276 2277 case objabi.Hdarwin, 2278 objabi.Hdragonfly, 2279 objabi.Hfreebsd, 2280 objabi.Hnetbsd, 2281 objabi.Hopenbsd: 2282 return 0x65 // GS 2283 } 2284 } 2285 2286 switch ctxt.Headtype { 2287 default: 2288 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2289 2290 case objabi.Hlinux: 2291 if isAndroid { 2292 return 0x64 // FS 2293 } 2294 2295 if ctxt.Flag_shared { 2296 log.Fatalf("unknown TLS base register for linux with -shared") 2297 } else { 2298 return 0x64 // FS 2299 } 2300 2301 case objabi.Hdragonfly, 2302 objabi.Hfreebsd, 2303 objabi.Hnetbsd, 2304 objabi.Hopenbsd, 2305 objabi.Hsolaris: 2306 return 0x64 // FS 2307 2308 case objabi.Hdarwin: 2309 return 0x65 // GS 2310 } 2311 } 2312 } 2313 2314 if ctxt.Arch.Family == sys.I386 { 2315 if a.Index == REG_TLS && ctxt.Flag_shared { 2316 // When building for inclusion into a shared library, an instruction of the form 2317 // MOVL off(CX)(TLS*1), AX 2318 // becomes 2319 // mov %gs:off(%ecx), %eax 2320 // which assumes that the correct TLS offset has been loaded into %ecx (today 2321 // there is only one TLS variable -- g -- so this is OK). When not building for 2322 // a shared library the instruction it becomes 2323 // mov 0x0(%ecx), %eax 2324 // and a R_TLS_LE relocation, and so does not require a prefix. 2325 return 0x65 // GS 2326 } 2327 return 0 2328 } 2329 2330 switch a.Index { 2331 case REG_CS: 2332 return 0x2e 2333 2334 case REG_DS: 2335 return 0x3e 2336 2337 case REG_ES: 2338 return 0x26 2339 2340 case REG_TLS: 2341 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { 2342 // When building for inclusion into a shared library, an instruction of the form 2343 // MOV off(CX)(TLS*1), AX 2344 // becomes 2345 // mov %fs:off(%rcx), %rax 2346 // which assumes that the correct TLS offset has been loaded into %rcx (today 2347 // there is only one TLS variable -- g -- so this is OK). When not building for 2348 // a shared library the instruction does not require a prefix. 2349 return 0x64 2350 } 2351 2352 case REG_FS: 2353 return 0x64 2354 2355 case REG_GS: 2356 return 0x65 2357 } 2358 2359 return 0 2360 } 2361 2362 // oclassRegList returns multisource operand class for addr. 2363 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { 2364 // TODO(quasilyte): when oclass register case is refactored into 2365 // lookup table, use it here to get register kind more easily. 2366 // Helper functions like regIsXmm should go away too (they will become redundant). 2367 2368 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } 2369 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } 2370 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } 2371 2372 reg0, reg1 := decodeRegisterRange(addr.Offset) 2373 low := regIndex(int16(reg0)) 2374 high := regIndex(int16(reg1)) 2375 2376 if ctxt.Arch.Family == sys.I386 { 2377 if low >= 8 || high >= 8 { 2378 return Yxxx 2379 } 2380 } 2381 2382 switch high - low { 2383 case 3: 2384 switch { 2385 case regIsXmm(reg0) && regIsXmm(reg1): 2386 return YxrEvexMulti4 2387 case regIsYmm(reg0) && regIsYmm(reg1): 2388 return YyrEvexMulti4 2389 case regIsZmm(reg0) && regIsZmm(reg1): 2390 return YzrMulti4 2391 default: 2392 return Yxxx 2393 } 2394 default: 2395 return Yxxx 2396 } 2397 } 2398 2399 // oclassVMem returns V-mem (vector memory with VSIB) operand class. 2400 // For addr that is not V-mem returns (Yxxx, false). 2401 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { 2402 switch addr.Index { 2403 case REG_X0 + 0, 2404 REG_X0 + 1, 2405 REG_X0 + 2, 2406 REG_X0 + 3, 2407 REG_X0 + 4, 2408 REG_X0 + 5, 2409 REG_X0 + 6, 2410 REG_X0 + 7: 2411 return Yxvm, true 2412 case REG_X8 + 0, 2413 REG_X8 + 1, 2414 REG_X8 + 2, 2415 REG_X8 + 3, 2416 REG_X8 + 4, 2417 REG_X8 + 5, 2418 REG_X8 + 6, 2419 REG_X8 + 7: 2420 if ctxt.Arch.Family == sys.I386 { 2421 return Yxxx, true 2422 } 2423 return Yxvm, true 2424 case REG_X16 + 0, 2425 REG_X16 + 1, 2426 REG_X16 + 2, 2427 REG_X16 + 3, 2428 REG_X16 + 4, 2429 REG_X16 + 5, 2430 REG_X16 + 6, 2431 REG_X16 + 7, 2432 REG_X16 + 8, 2433 REG_X16 + 9, 2434 REG_X16 + 10, 2435 REG_X16 + 11, 2436 REG_X16 + 12, 2437 REG_X16 + 13, 2438 REG_X16 + 14, 2439 REG_X16 + 15: 2440 if ctxt.Arch.Family == sys.I386 { 2441 return Yxxx, true 2442 } 2443 return YxvmEvex, true 2444 2445 case REG_Y0 + 0, 2446 REG_Y0 + 1, 2447 REG_Y0 + 2, 2448 REG_Y0 + 3, 2449 REG_Y0 + 4, 2450 REG_Y0 + 5, 2451 REG_Y0 + 6, 2452 REG_Y0 + 7: 2453 return Yyvm, true 2454 case REG_Y8 + 0, 2455 REG_Y8 + 1, 2456 REG_Y8 + 2, 2457 REG_Y8 + 3, 2458 REG_Y8 + 4, 2459 REG_Y8 + 5, 2460 REG_Y8 + 6, 2461 REG_Y8 + 7: 2462 if ctxt.Arch.Family == sys.I386 { 2463 return Yxxx, true 2464 } 2465 return Yyvm, true 2466 case REG_Y16 + 0, 2467 REG_Y16 + 1, 2468 REG_Y16 + 2, 2469 REG_Y16 + 3, 2470 REG_Y16 + 4, 2471 REG_Y16 + 5, 2472 REG_Y16 + 6, 2473 REG_Y16 + 7, 2474 REG_Y16 + 8, 2475 REG_Y16 + 9, 2476 REG_Y16 + 10, 2477 REG_Y16 + 11, 2478 REG_Y16 + 12, 2479 REG_Y16 + 13, 2480 REG_Y16 + 14, 2481 REG_Y16 + 15: 2482 if ctxt.Arch.Family == sys.I386 { 2483 return Yxxx, true 2484 } 2485 return YyvmEvex, true 2486 2487 case REG_Z0 + 0, 2488 REG_Z0 + 1, 2489 REG_Z0 + 2, 2490 REG_Z0 + 3, 2491 REG_Z0 + 4, 2492 REG_Z0 + 5, 2493 REG_Z0 + 6, 2494 REG_Z0 + 7: 2495 return Yzvm, true 2496 case REG_Z8 + 0, 2497 REG_Z8 + 1, 2498 REG_Z8 + 2, 2499 REG_Z8 + 3, 2500 REG_Z8 + 4, 2501 REG_Z8 + 5, 2502 REG_Z8 + 6, 2503 REG_Z8 + 7, 2504 REG_Z8 + 8, 2505 REG_Z8 + 9, 2506 REG_Z8 + 10, 2507 REG_Z8 + 11, 2508 REG_Z8 + 12, 2509 REG_Z8 + 13, 2510 REG_Z8 + 14, 2511 REG_Z8 + 15, 2512 REG_Z8 + 16, 2513 REG_Z8 + 17, 2514 REG_Z8 + 18, 2515 REG_Z8 + 19, 2516 REG_Z8 + 20, 2517 REG_Z8 + 21, 2518 REG_Z8 + 22, 2519 REG_Z8 + 23: 2520 if ctxt.Arch.Family == sys.I386 { 2521 return Yxxx, true 2522 } 2523 return Yzvm, true 2524 } 2525 2526 return Yxxx, false 2527 } 2528 2529 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { 2530 switch a.Type { 2531 case obj.TYPE_REGLIST: 2532 return oclassRegList(ctxt, a) 2533 2534 case obj.TYPE_NONE: 2535 return Ynone 2536 2537 case obj.TYPE_BRANCH: 2538 return Ybr 2539 2540 case obj.TYPE_INDIR: 2541 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { 2542 return Yindir 2543 } 2544 return Yxxx 2545 2546 case obj.TYPE_MEM: 2547 // Pseudo registers have negative index, but SP is 2548 // not pseudo on x86, hence REG_SP check is not redundant. 2549 if a.Index == REG_SP || a.Index < 0 { 2550 // Can't use FP/SB/PC/SP as the index register. 2551 return Yxxx 2552 } 2553 2554 if vmem, ok := oclassVMem(ctxt, a); ok { 2555 return vmem 2556 } 2557 2558 if ctxt.Arch.Family == sys.AMD64 { 2559 switch a.Name { 2560 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: 2561 // Global variables can't use index registers and their 2562 // base register is %rip (%rip is encoded as REG_NONE). 2563 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { 2564 return Yxxx 2565 } 2566 case obj.NAME_AUTO, obj.NAME_PARAM: 2567 // These names must have a base of SP. The old compiler 2568 // uses 0 for the base register. SSA uses REG_SP. 2569 if a.Reg != REG_SP && a.Reg != 0 { 2570 return Yxxx 2571 } 2572 case obj.NAME_NONE: 2573 // everything is ok 2574 default: 2575 // unknown name 2576 return Yxxx 2577 } 2578 } 2579 return Ym 2580 2581 case obj.TYPE_ADDR: 2582 switch a.Name { 2583 case obj.NAME_GOTREF: 2584 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") 2585 return Yxxx 2586 2587 case obj.NAME_EXTERN, 2588 obj.NAME_STATIC: 2589 if a.Sym != nil && useAbs(ctxt, a.Sym) { 2590 return Yi32 2591 } 2592 return Yiauto // use pc-relative addressing 2593 2594 case obj.NAME_AUTO, 2595 obj.NAME_PARAM: 2596 return Yiauto 2597 } 2598 2599 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index 2600 // and got Yi32 in an earlier version of this code. 2601 // Keep doing that until we fix yduff etc. 2602 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { 2603 return Yi32 2604 } 2605 2606 if a.Sym != nil || a.Name != obj.NAME_NONE { 2607 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) 2608 } 2609 fallthrough 2610 2611 case obj.TYPE_CONST: 2612 if a.Sym != nil { 2613 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) 2614 } 2615 2616 v := a.Offset 2617 if ctxt.Arch.Family == sys.I386 { 2618 v = int64(int32(v)) 2619 } 2620 switch { 2621 case v == 0: 2622 return Yi0 2623 case v == 1: 2624 return Yi1 2625 case v >= 0 && v <= 3: 2626 return Yu2 2627 case v >= 0 && v <= 127: 2628 return Yu7 2629 case v >= 0 && v <= 255: 2630 return Yu8 2631 case v >= -128 && v <= 127: 2632 return Yi8 2633 } 2634 if ctxt.Arch.Family == sys.I386 { 2635 return Yi32 2636 } 2637 l := int32(v) 2638 if int64(l) == v { 2639 return Ys32 // can sign extend 2640 } 2641 if v>>32 == 0 { 2642 return Yi32 // unsigned 2643 } 2644 return Yi64 2645 2646 case obj.TYPE_TEXTSIZE: 2647 return Ytextsize 2648 } 2649 2650 if a.Type != obj.TYPE_REG { 2651 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) 2652 return Yxxx 2653 } 2654 2655 switch a.Reg { 2656 case REG_AL: 2657 return Yal 2658 2659 case REG_AX: 2660 return Yax 2661 2662 /* 2663 case REG_SPB: 2664 */ 2665 case REG_BPB, 2666 REG_SIB, 2667 REG_DIB, 2668 REG_R8B, 2669 REG_R9B, 2670 REG_R10B, 2671 REG_R11B, 2672 REG_R12B, 2673 REG_R13B, 2674 REG_R14B, 2675 REG_R15B: 2676 if ctxt.Arch.Family == sys.I386 { 2677 return Yxxx 2678 } 2679 fallthrough 2680 2681 case REG_DL, 2682 REG_BL, 2683 REG_AH, 2684 REG_CH, 2685 REG_DH, 2686 REG_BH: 2687 return Yrb 2688 2689 case REG_CL: 2690 return Ycl 2691 2692 case REG_CX: 2693 return Ycx 2694 2695 case REG_DX, REG_BX: 2696 return Yrx 2697 2698 case REG_R8, // not really Yrl 2699 REG_R9, 2700 REG_R10, 2701 REG_R11, 2702 REG_R12, 2703 REG_R13, 2704 REG_R14, 2705 REG_R15: 2706 if ctxt.Arch.Family == sys.I386 { 2707 return Yxxx 2708 } 2709 fallthrough 2710 2711 case REG_SP, REG_BP, REG_SI, REG_DI: 2712 if ctxt.Arch.Family == sys.I386 { 2713 return Yrl32 2714 } 2715 return Yrl 2716 2717 case REG_F0 + 0: 2718 return Yf0 2719 2720 case REG_F0 + 1, 2721 REG_F0 + 2, 2722 REG_F0 + 3, 2723 REG_F0 + 4, 2724 REG_F0 + 5, 2725 REG_F0 + 6, 2726 REG_F0 + 7: 2727 return Yrf 2728 2729 case REG_M0 + 0, 2730 REG_M0 + 1, 2731 REG_M0 + 2, 2732 REG_M0 + 3, 2733 REG_M0 + 4, 2734 REG_M0 + 5, 2735 REG_M0 + 6, 2736 REG_M0 + 7: 2737 return Ymr 2738 2739 case REG_X0: 2740 return Yxr0 2741 2742 case REG_X0 + 1, 2743 REG_X0 + 2, 2744 REG_X0 + 3, 2745 REG_X0 + 4, 2746 REG_X0 + 5, 2747 REG_X0 + 6, 2748 REG_X0 + 7, 2749 REG_X0 + 8, 2750 REG_X0 + 9, 2751 REG_X0 + 10, 2752 REG_X0 + 11, 2753 REG_X0 + 12, 2754 REG_X0 + 13, 2755 REG_X0 + 14, 2756 REG_X0 + 15: 2757 return Yxr 2758 2759 case REG_X0 + 16, 2760 REG_X0 + 17, 2761 REG_X0 + 18, 2762 REG_X0 + 19, 2763 REG_X0 + 20, 2764 REG_X0 + 21, 2765 REG_X0 + 22, 2766 REG_X0 + 23, 2767 REG_X0 + 24, 2768 REG_X0 + 25, 2769 REG_X0 + 26, 2770 REG_X0 + 27, 2771 REG_X0 + 28, 2772 REG_X0 + 29, 2773 REG_X0 + 30, 2774 REG_X0 + 31: 2775 return YxrEvex 2776 2777 case REG_Y0 + 0, 2778 REG_Y0 + 1, 2779 REG_Y0 + 2, 2780 REG_Y0 + 3, 2781 REG_Y0 + 4, 2782 REG_Y0 + 5, 2783 REG_Y0 + 6, 2784 REG_Y0 + 7, 2785 REG_Y0 + 8, 2786 REG_Y0 + 9, 2787 REG_Y0 + 10, 2788 REG_Y0 + 11, 2789 REG_Y0 + 12, 2790 REG_Y0 + 13, 2791 REG_Y0 + 14, 2792 REG_Y0 + 15: 2793 return Yyr 2794 2795 case REG_Y0 + 16, 2796 REG_Y0 + 17, 2797 REG_Y0 + 18, 2798 REG_Y0 + 19, 2799 REG_Y0 + 20, 2800 REG_Y0 + 21, 2801 REG_Y0 + 22, 2802 REG_Y0 + 23, 2803 REG_Y0 + 24, 2804 REG_Y0 + 25, 2805 REG_Y0 + 26, 2806 REG_Y0 + 27, 2807 REG_Y0 + 28, 2808 REG_Y0 + 29, 2809 REG_Y0 + 30, 2810 REG_Y0 + 31: 2811 return YyrEvex 2812 2813 case REG_Z0 + 0, 2814 REG_Z0 + 1, 2815 REG_Z0 + 2, 2816 REG_Z0 + 3, 2817 REG_Z0 + 4, 2818 REG_Z0 + 5, 2819 REG_Z0 + 6, 2820 REG_Z0 + 7: 2821 return Yzr 2822 2823 case REG_Z0 + 8, 2824 REG_Z0 + 9, 2825 REG_Z0 + 10, 2826 REG_Z0 + 11, 2827 REG_Z0 + 12, 2828 REG_Z0 + 13, 2829 REG_Z0 + 14, 2830 REG_Z0 + 15, 2831 REG_Z0 + 16, 2832 REG_Z0 + 17, 2833 REG_Z0 + 18, 2834 REG_Z0 + 19, 2835 REG_Z0 + 20, 2836 REG_Z0 + 21, 2837 REG_Z0 + 22, 2838 REG_Z0 + 23, 2839 REG_Z0 + 24, 2840 REG_Z0 + 25, 2841 REG_Z0 + 26, 2842 REG_Z0 + 27, 2843 REG_Z0 + 28, 2844 REG_Z0 + 29, 2845 REG_Z0 + 30, 2846 REG_Z0 + 31: 2847 if ctxt.Arch.Family == sys.I386 { 2848 return Yxxx 2849 } 2850 return Yzr 2851 2852 case REG_K0: 2853 return Yk0 2854 2855 case REG_K0 + 1, 2856 REG_K0 + 2, 2857 REG_K0 + 3, 2858 REG_K0 + 4, 2859 REG_K0 + 5, 2860 REG_K0 + 6, 2861 REG_K0 + 7: 2862 return Yknot0 2863 2864 case REG_CS: 2865 return Ycs 2866 case REG_SS: 2867 return Yss 2868 case REG_DS: 2869 return Yds 2870 case REG_ES: 2871 return Yes 2872 case REG_FS: 2873 return Yfs 2874 case REG_GS: 2875 return Ygs 2876 case REG_TLS: 2877 return Ytls 2878 2879 case REG_GDTR: 2880 return Ygdtr 2881 case REG_IDTR: 2882 return Yidtr 2883 case REG_LDTR: 2884 return Yldtr 2885 case REG_MSW: 2886 return Ymsw 2887 case REG_TASK: 2888 return Ytask 2889 2890 case REG_CR + 0: 2891 return Ycr0 2892 case REG_CR + 1: 2893 return Ycr1 2894 case REG_CR + 2: 2895 return Ycr2 2896 case REG_CR + 3: 2897 return Ycr3 2898 case REG_CR + 4: 2899 return Ycr4 2900 case REG_CR + 5: 2901 return Ycr5 2902 case REG_CR + 6: 2903 return Ycr6 2904 case REG_CR + 7: 2905 return Ycr7 2906 case REG_CR + 8: 2907 return Ycr8 2908 2909 case REG_DR + 0: 2910 return Ydr0 2911 case REG_DR + 1: 2912 return Ydr1 2913 case REG_DR + 2: 2914 return Ydr2 2915 case REG_DR + 3: 2916 return Ydr3 2917 case REG_DR + 4: 2918 return Ydr4 2919 case REG_DR + 5: 2920 return Ydr5 2921 case REG_DR + 6: 2922 return Ydr6 2923 case REG_DR + 7: 2924 return Ydr7 2925 2926 case REG_TR + 0: 2927 return Ytr0 2928 case REG_TR + 1: 2929 return Ytr1 2930 case REG_TR + 2: 2931 return Ytr2 2932 case REG_TR + 3: 2933 return Ytr3 2934 case REG_TR + 4: 2935 return Ytr4 2936 case REG_TR + 5: 2937 return Ytr5 2938 case REG_TR + 6: 2939 return Ytr6 2940 case REG_TR + 7: 2941 return Ytr7 2942 } 2943 2944 return Yxxx 2945 } 2946 2947 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into 2948 // and hold assembly state. 2949 type AsmBuf struct { 2950 buf [100]byte 2951 off int 2952 rexflag int 2953 vexflag bool // Per inst: true for VEX-encoded 2954 evexflag bool // Per inst: true for EVEX-encoded 2955 rep bool 2956 repn bool 2957 lock bool 2958 2959 evex evexBits // Initialized when evexflag is true 2960 } 2961 2962 // Put1 appends one byte to the end of the buffer. 2963 func (ab *AsmBuf) Put1(x byte) { 2964 ab.buf[ab.off] = x 2965 ab.off++ 2966 } 2967 2968 // Put2 appends two bytes to the end of the buffer. 2969 func (ab *AsmBuf) Put2(x, y byte) { 2970 ab.buf[ab.off+0] = x 2971 ab.buf[ab.off+1] = y 2972 ab.off += 2 2973 } 2974 2975 // Put3 appends three bytes to the end of the buffer. 2976 func (ab *AsmBuf) Put3(x, y, z byte) { 2977 ab.buf[ab.off+0] = x 2978 ab.buf[ab.off+1] = y 2979 ab.buf[ab.off+2] = z 2980 ab.off += 3 2981 } 2982 2983 // Put4 appends four bytes to the end of the buffer. 2984 func (ab *AsmBuf) Put4(x, y, z, w byte) { 2985 ab.buf[ab.off+0] = x 2986 ab.buf[ab.off+1] = y 2987 ab.buf[ab.off+2] = z 2988 ab.buf[ab.off+3] = w 2989 ab.off += 4 2990 } 2991 2992 // PutInt16 writes v into the buffer using little-endian encoding. 2993 func (ab *AsmBuf) PutInt16(v int16) { 2994 ab.buf[ab.off+0] = byte(v) 2995 ab.buf[ab.off+1] = byte(v >> 8) 2996 ab.off += 2 2997 } 2998 2999 // PutInt32 writes v into the buffer using little-endian encoding. 3000 func (ab *AsmBuf) PutInt32(v int32) { 3001 ab.buf[ab.off+0] = byte(v) 3002 ab.buf[ab.off+1] = byte(v >> 8) 3003 ab.buf[ab.off+2] = byte(v >> 16) 3004 ab.buf[ab.off+3] = byte(v >> 24) 3005 ab.off += 4 3006 } 3007 3008 // PutInt64 writes v into the buffer using little-endian encoding. 3009 func (ab *AsmBuf) PutInt64(v int64) { 3010 ab.buf[ab.off+0] = byte(v) 3011 ab.buf[ab.off+1] = byte(v >> 8) 3012 ab.buf[ab.off+2] = byte(v >> 16) 3013 ab.buf[ab.off+3] = byte(v >> 24) 3014 ab.buf[ab.off+4] = byte(v >> 32) 3015 ab.buf[ab.off+5] = byte(v >> 40) 3016 ab.buf[ab.off+6] = byte(v >> 48) 3017 ab.buf[ab.off+7] = byte(v >> 56) 3018 ab.off += 8 3019 } 3020 3021 // Put copies b into the buffer. 3022 func (ab *AsmBuf) Put(b []byte) { 3023 copy(ab.buf[ab.off:], b) 3024 ab.off += len(b) 3025 } 3026 3027 // PutOpBytesLit writes zero terminated sequence of bytes from op, 3028 // starting at specified offset (e.g. z counter value). 3029 // Trailing 0 is not written. 3030 // 3031 // Intended to be used for literal Z cases. 3032 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). 3033 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { 3034 for int(op[offset]) != 0 { 3035 ab.Put1(byte(op[offset])) 3036 offset++ 3037 } 3038 } 3039 3040 // Insert inserts b at offset i. 3041 func (ab *AsmBuf) Insert(i int, b byte) { 3042 ab.off++ 3043 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) 3044 ab.buf[i] = b 3045 } 3046 3047 // Last returns the byte at the end of the buffer. 3048 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } 3049 3050 // Len returns the length of the buffer. 3051 func (ab *AsmBuf) Len() int { return ab.off } 3052 3053 // Bytes returns the contents of the buffer. 3054 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } 3055 3056 // Reset empties the buffer. 3057 func (ab *AsmBuf) Reset() { ab.off = 0 } 3058 3059 // At returns the byte at offset i. 3060 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } 3061 3062 // asmidx emits SIB byte. 3063 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { 3064 var i int 3065 3066 // X/Y index register is used in VSIB. 3067 switch index { 3068 default: 3069 goto bad 3070 3071 case REG_NONE: 3072 i = 4 << 3 3073 goto bas 3074 3075 case REG_R8, 3076 REG_R9, 3077 REG_R10, 3078 REG_R11, 3079 REG_R12, 3080 REG_R13, 3081 REG_R14, 3082 REG_R15, 3083 REG_X8, 3084 REG_X9, 3085 REG_X10, 3086 REG_X11, 3087 REG_X12, 3088 REG_X13, 3089 REG_X14, 3090 REG_X15, 3091 REG_X16, 3092 REG_X17, 3093 REG_X18, 3094 REG_X19, 3095 REG_X20, 3096 REG_X21, 3097 REG_X22, 3098 REG_X23, 3099 REG_X24, 3100 REG_X25, 3101 REG_X26, 3102 REG_X27, 3103 REG_X28, 3104 REG_X29, 3105 REG_X30, 3106 REG_X31, 3107 REG_Y8, 3108 REG_Y9, 3109 REG_Y10, 3110 REG_Y11, 3111 REG_Y12, 3112 REG_Y13, 3113 REG_Y14, 3114 REG_Y15, 3115 REG_Y16, 3116 REG_Y17, 3117 REG_Y18, 3118 REG_Y19, 3119 REG_Y20, 3120 REG_Y21, 3121 REG_Y22, 3122 REG_Y23, 3123 REG_Y24, 3124 REG_Y25, 3125 REG_Y26, 3126 REG_Y27, 3127 REG_Y28, 3128 REG_Y29, 3129 REG_Y30, 3130 REG_Y31, 3131 REG_Z8, 3132 REG_Z9, 3133 REG_Z10, 3134 REG_Z11, 3135 REG_Z12, 3136 REG_Z13, 3137 REG_Z14, 3138 REG_Z15, 3139 REG_Z16, 3140 REG_Z17, 3141 REG_Z18, 3142 REG_Z19, 3143 REG_Z20, 3144 REG_Z21, 3145 REG_Z22, 3146 REG_Z23, 3147 REG_Z24, 3148 REG_Z25, 3149 REG_Z26, 3150 REG_Z27, 3151 REG_Z28, 3152 REG_Z29, 3153 REG_Z30, 3154 REG_Z31: 3155 if ctxt.Arch.Family == sys.I386 { 3156 goto bad 3157 } 3158 fallthrough 3159 3160 case REG_AX, 3161 REG_CX, 3162 REG_DX, 3163 REG_BX, 3164 REG_BP, 3165 REG_SI, 3166 REG_DI, 3167 REG_X0, 3168 REG_X1, 3169 REG_X2, 3170 REG_X3, 3171 REG_X4, 3172 REG_X5, 3173 REG_X6, 3174 REG_X7, 3175 REG_Y0, 3176 REG_Y1, 3177 REG_Y2, 3178 REG_Y3, 3179 REG_Y4, 3180 REG_Y5, 3181 REG_Y6, 3182 REG_Y7, 3183 REG_Z0, 3184 REG_Z1, 3185 REG_Z2, 3186 REG_Z3, 3187 REG_Z4, 3188 REG_Z5, 3189 REG_Z6, 3190 REG_Z7: 3191 i = reg[index] << 3 3192 } 3193 3194 switch scale { 3195 default: 3196 goto bad 3197 3198 case 1: 3199 break 3200 3201 case 2: 3202 i |= 1 << 6 3203 3204 case 4: 3205 i |= 2 << 6 3206 3207 case 8: 3208 i |= 3 << 6 3209 } 3210 3211 bas: 3212 switch base { 3213 default: 3214 goto bad 3215 3216 case REG_NONE: // must be mod=00 3217 i |= 5 3218 3219 case REG_R8, 3220 REG_R9, 3221 REG_R10, 3222 REG_R11, 3223 REG_R12, 3224 REG_R13, 3225 REG_R14, 3226 REG_R15: 3227 if ctxt.Arch.Family == sys.I386 { 3228 goto bad 3229 } 3230 fallthrough 3231 3232 case REG_AX, 3233 REG_CX, 3234 REG_DX, 3235 REG_BX, 3236 REG_SP, 3237 REG_BP, 3238 REG_SI, 3239 REG_DI: 3240 i |= reg[base] 3241 } 3242 3243 ab.Put1(byte(i)) 3244 return 3245 3246 bad: 3247 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) 3248 ab.Put1(0) 3249 } 3250 3251 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { 3252 var rel obj.Reloc 3253 3254 v := vaddr(ctxt, p, a, &rel) 3255 if rel.Siz != 0 { 3256 if rel.Siz != 4 { 3257 ctxt.Diag("bad reloc") 3258 } 3259 r := obj.Addrel(cursym) 3260 *r = rel 3261 r.Off = int32(p.Pc + int64(ab.Len())) 3262 } 3263 3264 ab.PutInt32(int32(v)) 3265 } 3266 3267 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { 3268 if r != nil { 3269 *r = obj.Reloc{} 3270 } 3271 3272 switch a.Name { 3273 case obj.NAME_STATIC, 3274 obj.NAME_GOTREF, 3275 obj.NAME_EXTERN: 3276 s := a.Sym 3277 if r == nil { 3278 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3279 log.Fatalf("reloc") 3280 } 3281 3282 if a.Name == obj.NAME_GOTREF { 3283 r.Siz = 4 3284 r.Type = objabi.R_GOTPCREL 3285 } else if useAbs(ctxt, s) { 3286 r.Siz = 4 3287 r.Type = objabi.R_ADDR 3288 } else { 3289 r.Siz = 4 3290 r.Type = objabi.R_PCREL 3291 } 3292 3293 r.Off = -1 // caller must fill in 3294 r.Sym = s 3295 r.Add = a.Offset 3296 3297 return 0 3298 } 3299 3300 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { 3301 if r == nil { 3302 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3303 log.Fatalf("reloc") 3304 } 3305 3306 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { 3307 r.Type = objabi.R_TLS_LE 3308 r.Siz = 4 3309 r.Off = -1 // caller must fill in 3310 r.Add = a.Offset 3311 } 3312 return 0 3313 } 3314 3315 return a.Offset 3316 } 3317 3318 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { 3319 var base int 3320 var rel obj.Reloc 3321 3322 rex &= 0x40 | Rxr 3323 if a.Offset != int64(int32(a.Offset)) { 3324 // The rules are slightly different for 386 and AMD64, 3325 // mostly for historical reasons. We may unify them later, 3326 // but it must be discussed beforehand. 3327 // 3328 // For 64bit mode only LEAL is allowed to overflow. 3329 // It's how https://golang.org/cl/59630 made it. 3330 // crypto/sha1/sha1block_amd64.s depends on this feature. 3331 // 3332 // For 32bit mode rules are more permissive. 3333 // If offset fits uint32, it's permitted. 3334 // This is allowed for assembly that wants to use 32-bit hex 3335 // constants, e.g. LEAL 0x99999999(AX), AX. 3336 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || 3337 (ctxt.Arch.Family != sys.AMD64 && 3338 int64(uint32(a.Offset)) == a.Offset && 3339 ab.rexflag&Rxw == 0) 3340 if !overflowOK { 3341 ctxt.Diag("offset too large in %s", p) 3342 } 3343 } 3344 v := int32(a.Offset) 3345 rel.Siz = 0 3346 3347 switch a.Type { 3348 case obj.TYPE_ADDR: 3349 if a.Name == obj.NAME_NONE { 3350 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") 3351 } 3352 if a.Index == REG_TLS { 3353 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") 3354 } 3355 goto bad 3356 3357 case obj.TYPE_REG: 3358 const regFirst = REG_AL 3359 const regLast = REG_Z31 3360 if a.Reg < regFirst || regLast < a.Reg { 3361 goto bad 3362 } 3363 if v != 0 { 3364 goto bad 3365 } 3366 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) 3367 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex 3368 return 3369 } 3370 3371 if a.Type != obj.TYPE_MEM { 3372 goto bad 3373 } 3374 3375 if a.Index != REG_NONE && a.Index != REG_TLS { 3376 base := int(a.Reg) 3377 switch a.Name { 3378 case obj.NAME_EXTERN, 3379 obj.NAME_GOTREF, 3380 obj.NAME_STATIC: 3381 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { 3382 goto bad 3383 } 3384 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3385 // The base register has already been set. It holds the PC 3386 // of this instruction returned by a PC-reading thunk. 3387 // See obj6.go:rewriteToPcrel. 3388 } else { 3389 base = REG_NONE 3390 } 3391 v = int32(vaddr(ctxt, p, a, &rel)) 3392 3393 case obj.NAME_AUTO, 3394 obj.NAME_PARAM: 3395 base = REG_SP 3396 } 3397 3398 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex 3399 if base == REG_NONE { 3400 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3401 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3402 goto putrelv 3403 } 3404 3405 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3406 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3407 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3408 return 3409 } 3410 3411 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3412 ab.Put1(byte(1<<6 | 4<<0 | r<<3)) 3413 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3414 ab.Put1(disp8) 3415 return 3416 } 3417 3418 ab.Put1(byte(2<<6 | 4<<0 | r<<3)) 3419 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3420 goto putrelv 3421 } 3422 3423 base = int(a.Reg) 3424 switch a.Name { 3425 case obj.NAME_STATIC, 3426 obj.NAME_GOTREF, 3427 obj.NAME_EXTERN: 3428 if a.Sym == nil { 3429 ctxt.Diag("bad addr: %v", p) 3430 } 3431 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3432 // The base register has already been set. It holds the PC 3433 // of this instruction returned by a PC-reading thunk. 3434 // See obj6.go:rewriteToPcrel. 3435 } else { 3436 base = REG_NONE 3437 } 3438 v = int32(vaddr(ctxt, p, a, &rel)) 3439 3440 case obj.NAME_AUTO, 3441 obj.NAME_PARAM: 3442 base = REG_SP 3443 } 3444 3445 if base == REG_TLS { 3446 v = int32(vaddr(ctxt, p, a, &rel)) 3447 } 3448 3449 ab.rexflag |= regrex[base]&Rxb | rex 3450 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { 3451 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { 3452 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { 3453 ctxt.Diag("%v has offset against gotref", p) 3454 } 3455 ab.Put1(byte(0<<6 | 5<<0 | r<<3)) 3456 goto putrelv 3457 } 3458 3459 // temporary 3460 ab.Put2( 3461 byte(0<<6|4<<0|r<<3), // sib present 3462 0<<6|4<<3|5<<0, // DS:d32 3463 ) 3464 goto putrelv 3465 } 3466 3467 if base == REG_SP || base == REG_R12 { 3468 if v == 0 { 3469 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3470 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3471 return 3472 } 3473 3474 if disp8, ok := toDisp8(v, p, ab); ok { 3475 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) 3476 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3477 ab.Put1(disp8) 3478 return 3479 } 3480 3481 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3482 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3483 goto putrelv 3484 } 3485 3486 if REG_AX <= base && base <= REG_R15 { 3487 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid { 3488 rel = obj.Reloc{} 3489 rel.Type = objabi.R_TLS_LE 3490 rel.Siz = 4 3491 rel.Sym = nil 3492 rel.Add = int64(v) 3493 v = 0 3494 } 3495 3496 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3497 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3498 return 3499 } 3500 3501 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3502 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) 3503 return 3504 } 3505 3506 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3507 goto putrelv 3508 } 3509 3510 goto bad 3511 3512 putrelv: 3513 if rel.Siz != 0 { 3514 if rel.Siz != 4 { 3515 ctxt.Diag("bad rel") 3516 goto bad 3517 } 3518 3519 r := obj.Addrel(cursym) 3520 *r = rel 3521 r.Off = int32(p.Pc + int64(ab.Len())) 3522 } 3523 3524 ab.PutInt32(v) 3525 return 3526 3527 bad: 3528 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) 3529 } 3530 3531 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { 3532 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) 3533 } 3534 3535 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { 3536 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) 3537 } 3538 3539 func bytereg(a *obj.Addr, t *uint8) { 3540 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { 3541 a.Reg += REG_AL - REG_AX 3542 *t = 0 3543 } 3544 } 3545 3546 func unbytereg(a *obj.Addr, t *uint8) { 3547 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { 3548 a.Reg += REG_AX - REG_AL 3549 *t = 0 3550 } 3551 } 3552 3553 const ( 3554 movLit uint8 = iota // Like Zlit 3555 movRegMem 3556 movMemReg 3557 movRegMem2op 3558 movMemReg2op 3559 movFullPtr // Load full pointer, trash heap (unsupported) 3560 movDoubleShift 3561 movTLSReg 3562 ) 3563 3564 var ymovtab = []movtab{ 3565 // push 3566 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, 3567 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, 3568 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, 3569 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, 3570 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3571 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3572 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3573 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3574 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, 3575 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, 3576 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, 3577 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, 3578 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, 3579 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, 3580 3581 // pop 3582 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, 3583 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, 3584 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, 3585 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3586 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3587 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3588 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3589 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, 3590 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, 3591 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, 3592 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, 3593 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, 3594 3595 // mov seg 3596 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, 3597 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, 3598 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, 3599 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, 3600 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, 3601 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, 3602 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, 3603 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, 3604 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, 3605 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, 3606 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, 3607 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, 3608 3609 // mov cr 3610 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3611 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3612 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3613 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3614 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3615 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3616 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3617 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3618 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3619 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3620 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3621 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3622 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3623 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3624 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3625 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3626 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3627 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3628 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3629 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3630 3631 // mov dr 3632 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3633 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3634 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3635 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3636 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, 3637 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, 3638 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3639 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3640 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3641 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3642 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3643 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3644 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, 3645 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, 3646 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3647 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3648 3649 // mov tr 3650 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, 3651 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, 3652 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, 3653 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, 3654 3655 // lgdt, sgdt, lidt, sidt 3656 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3657 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3658 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3659 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3660 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3661 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3662 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3663 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3664 3665 // lldt, sldt 3666 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, 3667 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, 3668 3669 // lmsw, smsw 3670 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, 3671 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, 3672 3673 // ltr, str 3674 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, 3675 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, 3676 3677 /* load full pointer - unsupported 3678 {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, 3679 {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, 3680 */ 3681 3682 // double shift 3683 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3684 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3685 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3686 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3687 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3688 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3689 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3690 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3691 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3692 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3693 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3694 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3695 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3696 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3697 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3698 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3699 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3700 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3701 3702 // load TLS base 3703 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3704 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3705 {0, 0, 0, 0, 0, [4]uint8{}}, 3706 } 3707 3708 func isax(a *obj.Addr) bool { 3709 switch a.Reg { 3710 case REG_AX, REG_AL, REG_AH: 3711 return true 3712 } 3713 3714 if a.Index == REG_AX { 3715 return true 3716 } 3717 return false 3718 } 3719 3720 func subreg(p *obj.Prog, from int, to int) { 3721 if false { /* debug['Q'] */ 3722 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) 3723 } 3724 3725 if int(p.From.Reg) == from { 3726 p.From.Reg = int16(to) 3727 p.Ft = 0 3728 } 3729 3730 if int(p.To.Reg) == from { 3731 p.To.Reg = int16(to) 3732 p.Tt = 0 3733 } 3734 3735 if int(p.From.Index) == from { 3736 p.From.Index = int16(to) 3737 p.Ft = 0 3738 } 3739 3740 if int(p.To.Index) == from { 3741 p.To.Index = int16(to) 3742 p.Tt = 0 3743 } 3744 3745 if false { /* debug['Q'] */ 3746 fmt.Printf("%v\n", p) 3747 } 3748 } 3749 3750 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { 3751 switch op { 3752 case Pm, Pe, Pf2, Pf3: 3753 if osize != 1 { 3754 if op != Pm { 3755 ab.Put1(byte(op)) 3756 } 3757 ab.Put1(Pm) 3758 z++ 3759 op = int(o.op[z]) 3760 break 3761 } 3762 fallthrough 3763 3764 default: 3765 if ab.Len() == 0 || ab.Last() != Pm { 3766 ab.Put1(Pm) 3767 } 3768 } 3769 3770 ab.Put1(byte(op)) 3771 return z 3772 } 3773 3774 var bpduff1 = []byte{ 3775 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) 3776 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP 3777 } 3778 3779 var bpduff2 = []byte{ 3780 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP 3781 } 3782 3783 // asmevex emits EVEX pregis and opcode byte. 3784 // In addition to asmvex r/m, vvvv and reg fields also requires optional 3785 // K-masking register. 3786 // 3787 // Expects asmbuf.evex to be properly initialized. 3788 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { 3789 ab.evexflag = true 3790 evex := ab.evex 3791 3792 rexR := byte(1) 3793 evexR := byte(1) 3794 rexX := byte(1) 3795 rexB := byte(1) 3796 if r != nil { 3797 if regrex[r.Reg]&Rxr != 0 { 3798 rexR = 0 // "ModR/M.reg" selector 4th bit. 3799 } 3800 if regrex[r.Reg]&RxrEvex != 0 { 3801 evexR = 0 // "ModR/M.reg" selector 5th bit. 3802 } 3803 } 3804 if rm != nil { 3805 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { 3806 rexX = 0 3807 } else if regrex[rm.Index]&Rxx != 0 { 3808 rexX = 0 3809 } 3810 if regrex[rm.Reg]&Rxb != 0 { 3811 rexB = 0 3812 } 3813 } 3814 // P0 = [R][X][B][R'][00][mm] 3815 p0 := (rexR << 7) | 3816 (rexX << 6) | 3817 (rexB << 5) | 3818 (evexR << 4) | 3819 (0 << 2) | 3820 (evex.M() << 0) 3821 3822 vexV := byte(0) 3823 if v != nil { 3824 // 4bit-wide reg index. 3825 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 3826 } 3827 vexV ^= 0x0F 3828 // P1 = [W][vvvv][1][pp] 3829 p1 := (evex.W() << 7) | 3830 (vexV << 3) | 3831 (1 << 2) | 3832 (evex.P() << 0) 3833 3834 suffix := evexSuffixMap[p.Scond] 3835 evexZ := byte(0) 3836 evexLL := evex.L() 3837 evexB := byte(0) 3838 evexV := byte(1) 3839 evexA := byte(0) 3840 if suffix.zeroing { 3841 if !evex.ZeroingEnabled() { 3842 ctxt.Diag("unsupported zeroing: %v", p) 3843 } 3844 evexZ = 1 3845 } 3846 switch { 3847 case suffix.rounding != rcUnset: 3848 if rm != nil && rm.Type == obj.TYPE_MEM { 3849 ctxt.Diag("illegal rounding with memory argument: %v", p) 3850 } else if !evex.RoundingEnabled() { 3851 ctxt.Diag("unsupported rounding: %v", p) 3852 } 3853 evexB = 1 3854 evexLL = suffix.rounding 3855 case suffix.broadcast: 3856 if rm == nil || rm.Type != obj.TYPE_MEM { 3857 ctxt.Diag("illegal broadcast without memory argument: %v", p) 3858 } else if !evex.BroadcastEnabled() { 3859 ctxt.Diag("unsupported broadcast: %v", p) 3860 } 3861 evexB = 1 3862 case suffix.sae: 3863 if rm != nil && rm.Type == obj.TYPE_MEM { 3864 ctxt.Diag("illegal SAE with memory argument: %v", p) 3865 } else if !evex.SaeEnabled() { 3866 ctxt.Diag("unsupported SAE: %v", p) 3867 } 3868 evexB = 1 3869 } 3870 if rm != nil && regrex[rm.Index]&RxrEvex != 0 { 3871 evexV = 0 3872 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { 3873 evexV = 0 // VSR selector 5th bit. 3874 } 3875 if k != nil { 3876 evexA = byte(reg[k.Reg]) 3877 } 3878 // P2 = [z][L'L][b][V'][aaa] 3879 p2 := (evexZ << 7) | 3880 (evexLL << 5) | 3881 (evexB << 4) | 3882 (evexV << 3) | 3883 (evexA << 0) 3884 3885 const evexEscapeByte = 0x62 3886 ab.Put4(evexEscapeByte, p0, p1, p2) 3887 ab.Put1(evex.opcode) 3888 } 3889 3890 // Emit VEX prefix and opcode byte. 3891 // The three addresses are the r/m, vvvv, and reg fields. 3892 // The reg and rm arguments appear in the same order as the 3893 // arguments to asmand, which typically follows the call to asmvex. 3894 // The final two arguments are the VEX prefix (see encoding above) 3895 // and the opcode byte. 3896 // For details about vex prefix see: 3897 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description 3898 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { 3899 ab.vexflag = true 3900 rexR := 0 3901 if r != nil { 3902 rexR = regrex[r.Reg] & Rxr 3903 } 3904 rexB := 0 3905 rexX := 0 3906 if rm != nil { 3907 rexB = regrex[rm.Reg] & Rxb 3908 rexX = regrex[rm.Index] & Rxx 3909 } 3910 vexM := (vex >> 3) & 0x7 3911 vexWLP := vex & 0x87 3912 vexV := byte(0) 3913 if v != nil { 3914 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 3915 } 3916 vexV ^= 0xF 3917 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { 3918 // Can use 2-byte encoding. 3919 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) 3920 } else { 3921 // Must use 3-byte encoding. 3922 ab.Put3(0xc4, 3923 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, 3924 vexV<<3|vexWLP, 3925 ) 3926 } 3927 ab.Put1(opcode) 3928 } 3929 3930 // regIndex returns register index that fits in 5 bits. 3931 // 3932 // R : 3 bit | legacy instructions | N/A 3933 // [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr 3934 // EVEX.R : 1 bit | EVEX extension bit | RxrEvex 3935 // 3936 // Examples: 3937 // REG_Z30 => 30 3938 // REG_X15 => 15 3939 // REG_R9 => 9 3940 // REG_AX => 0 3941 // 3942 func regIndex(r int16) int { 3943 lower3bits := reg[r] 3944 high4bit := regrex[r] & Rxr << 1 3945 high5bit := regrex[r] & RxrEvex << 0 3946 return lower3bits | high4bit | high5bit 3947 } 3948 3949 // avx2gatherValid reports whether p satisfies AVX2 gather constraints. 3950 // Reports errors via ctxt. 3951 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 3952 // If any pair of the index, mask, or destination registers 3953 // are the same, illegal instruction trap (#UD) is triggered. 3954 index := regIndex(p.GetFrom3().Index) 3955 mask := regIndex(p.From.Reg) 3956 dest := regIndex(p.To.Reg) 3957 if dest == mask || dest == index || mask == index { 3958 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) 3959 return false 3960 } 3961 3962 return true 3963 } 3964 3965 // avx512gatherValid reports whether p satisfies AVX512 gather constraints. 3966 // Reports errors via ctxt. 3967 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 3968 // Illegal instruction trap (#UD) is triggered if the destination vector 3969 // register is the same as index vector in VSIB. 3970 index := regIndex(p.From.Index) 3971 dest := regIndex(p.To.Reg) 3972 if dest == index { 3973 ctxt.Diag("index and destination registers should be distinct: %v", p) 3974 return false 3975 } 3976 3977 return true 3978 } 3979 3980 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 3981 o := opindex[p.As&obj.AMask] 3982 3983 if o == nil { 3984 ctxt.Diag("asmins: missing op %v", p) 3985 return 3986 } 3987 3988 if pre := prefixof(ctxt, &p.From); pre != 0 { 3989 ab.Put1(byte(pre)) 3990 } 3991 if pre := prefixof(ctxt, &p.To); pre != 0 { 3992 ab.Put1(byte(pre)) 3993 } 3994 3995 // Checks to warn about instruction/arguments combinations that 3996 // will unconditionally trigger illegal instruction trap (#UD). 3997 switch p.As { 3998 case AVGATHERDPD, 3999 AVGATHERQPD, 4000 AVGATHERDPS, 4001 AVGATHERQPS, 4002 AVPGATHERDD, 4003 AVPGATHERQD, 4004 AVPGATHERDQ, 4005 AVPGATHERQQ: 4006 // AVX512 gather requires explicit K mask. 4007 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { 4008 if !avx512gatherValid(ctxt, p) { 4009 return 4010 } 4011 } else { 4012 if !avx2gatherValid(ctxt, p) { 4013 return 4014 } 4015 } 4016 } 4017 4018 if p.Ft == 0 { 4019 p.Ft = uint8(oclass(ctxt, p, &p.From)) 4020 } 4021 if p.Tt == 0 { 4022 p.Tt = uint8(oclass(ctxt, p, &p.To)) 4023 } 4024 4025 ft := int(p.Ft) * Ymax 4026 var f3t int 4027 tt := int(p.Tt) * Ymax 4028 4029 xo := obj.Bool2int(o.op[0] == 0x0f) 4030 z := 0 4031 var a *obj.Addr 4032 var l int 4033 var op int 4034 var q *obj.Prog 4035 var r *obj.Reloc 4036 var rel obj.Reloc 4037 var v int64 4038 4039 args := make([]int, 0, argListMax) 4040 if ft != Ynone*Ymax { 4041 args = append(args, ft) 4042 } 4043 for i := range p.RestArgs { 4044 args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax) 4045 } 4046 if tt != Ynone*Ymax { 4047 args = append(args, tt) 4048 } 4049 4050 for _, yt := range o.ytab { 4051 // ytab matching is purely args-based, 4052 // but AVX512 suffixes like "Z" or "RU_SAE" will 4053 // add EVEX-only filter that will reject non-EVEX matches. 4054 // 4055 // Consider "VADDPD.BCST 2032(DX), X0, X0". 4056 // Without this rule, operands will lead to VEX-encoded form 4057 // and produce "c5b15813" encoding. 4058 if !yt.match(args) { 4059 // "xo" is always zero for VEX/EVEX encoded insts. 4060 z += int(yt.zoffset) + xo 4061 } else { 4062 if p.Scond != 0 && !evexZcase(yt.zcase) { 4063 // Do not signal error and continue to search 4064 // for matching EVEX-encoded form. 4065 z += int(yt.zoffset) 4066 continue 4067 } 4068 4069 switch o.prefix { 4070 case Px1: // first option valid only in 32-bit mode 4071 if ctxt.Arch.Family == sys.AMD64 && z == 0 { 4072 z += int(yt.zoffset) + xo 4073 continue 4074 } 4075 case Pq: // 16 bit escape and opcode escape 4076 ab.Put2(Pe, Pm) 4077 4078 case Pq3: // 16 bit escape and opcode escape + REX.W 4079 ab.rexflag |= Pw 4080 ab.Put2(Pe, Pm) 4081 4082 case Pq4: // 66 0F 38 4083 ab.Put3(0x66, 0x0F, 0x38) 4084 4085 case Pq4w: // 66 0F 38 + REX.W 4086 ab.rexflag |= Pw 4087 ab.Put3(0x66, 0x0F, 0x38) 4088 4089 case Pq5: // F3 0F 38 4090 ab.Put3(0xF3, 0x0F, 0x38) 4091 4092 case Pq5w: // F3 0F 38 + REX.W 4093 ab.rexflag |= Pw 4094 ab.Put3(0xF3, 0x0F, 0x38) 4095 4096 case Pf2, // xmm opcode escape 4097 Pf3: 4098 ab.Put2(o.prefix, Pm) 4099 4100 case Pef3: 4101 ab.Put3(Pe, Pf3, Pm) 4102 4103 case Pfw: // xmm opcode escape + REX.W 4104 ab.rexflag |= Pw 4105 ab.Put2(Pf3, Pm) 4106 4107 case Pm: // opcode escape 4108 ab.Put1(Pm) 4109 4110 case Pe: // 16 bit escape 4111 ab.Put1(Pe) 4112 4113 case Pw: // 64-bit escape 4114 if ctxt.Arch.Family != sys.AMD64 { 4115 ctxt.Diag("asmins: illegal 64: %v", p) 4116 } 4117 ab.rexflag |= Pw 4118 4119 case Pw8: // 64-bit escape if z >= 8 4120 if z >= 8 { 4121 if ctxt.Arch.Family != sys.AMD64 { 4122 ctxt.Diag("asmins: illegal 64: %v", p) 4123 } 4124 ab.rexflag |= Pw 4125 } 4126 4127 case Pb: // botch 4128 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { 4129 goto bad 4130 } 4131 // NOTE(rsc): This is probably safe to do always, 4132 // but when enabled it chooses different encodings 4133 // than the old cmd/internal/obj/i386 code did, 4134 // which breaks our "same bits out" checks. 4135 // In particular, CMPB AX, $0 encodes as 80 f8 00 4136 // in the original obj/i386, and it would encode 4137 // (using a valid, shorter form) as 3c 00 if we enabled 4138 // the call to bytereg here. 4139 if ctxt.Arch.Family == sys.AMD64 { 4140 bytereg(&p.From, &p.Ft) 4141 bytereg(&p.To, &p.Tt) 4142 } 4143 4144 case P32: // 32 bit but illegal if 64-bit mode 4145 if ctxt.Arch.Family == sys.AMD64 { 4146 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) 4147 } 4148 4149 case Py: // 64-bit only, no prefix 4150 if ctxt.Arch.Family != sys.AMD64 { 4151 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4152 } 4153 4154 case Py1: // 64-bit only if z < 1, no prefix 4155 if z < 1 && ctxt.Arch.Family != sys.AMD64 { 4156 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4157 } 4158 4159 case Py3: // 64-bit only if z < 3, no prefix 4160 if z < 3 && ctxt.Arch.Family != sys.AMD64 { 4161 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4162 } 4163 } 4164 4165 if z >= len(o.op) { 4166 log.Fatalf("asmins bad table %v", p) 4167 } 4168 op = int(o.op[z]) 4169 if op == 0x0f { 4170 ab.Put1(byte(op)) 4171 z++ 4172 op = int(o.op[z]) 4173 } 4174 4175 switch yt.zcase { 4176 default: 4177 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) 4178 return 4179 4180 case Zpseudo: 4181 break 4182 4183 case Zlit: 4184 ab.PutOpBytesLit(z, &o.op) 4185 4186 case Zlitr_m: 4187 ab.PutOpBytesLit(z, &o.op) 4188 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4189 4190 case Zlitm_r: 4191 ab.PutOpBytesLit(z, &o.op) 4192 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4193 4194 case Zlit_m_r: 4195 ab.PutOpBytesLit(z, &o.op) 4196 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4197 4198 case Zmb_r: 4199 bytereg(&p.From, &p.Ft) 4200 fallthrough 4201 4202 case Zm_r: 4203 ab.Put1(byte(op)) 4204 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4205 4206 case Z_m_r: 4207 ab.Put1(byte(op)) 4208 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4209 4210 case Zm2_r: 4211 ab.Put2(byte(op), o.op[z+1]) 4212 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4213 4214 case Zm_r_xm: 4215 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4216 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4217 4218 case Zm_r_xm_nr: 4219 ab.rexflag = 0 4220 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4221 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4222 4223 case Zm_r_i_xm: 4224 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4225 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) 4226 ab.Put1(byte(p.To.Offset)) 4227 4228 case Zibm_r, Zibr_m: 4229 ab.PutOpBytesLit(z, &o.op) 4230 if yt.zcase == Zibr_m { 4231 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4232 } else { 4233 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4234 } 4235 switch { 4236 default: 4237 ab.Put1(byte(p.From.Offset)) 4238 case yt.args[0] == Yi32 && o.prefix == Pe: 4239 ab.PutInt16(int16(p.From.Offset)) 4240 case yt.args[0] == Yi32: 4241 ab.PutInt32(int32(p.From.Offset)) 4242 } 4243 4244 case Zaut_r: 4245 ab.Put1(0x8d) // leal 4246 if p.From.Type != obj.TYPE_ADDR { 4247 ctxt.Diag("asmins: Zaut sb type ADDR") 4248 } 4249 p.From.Type = obj.TYPE_MEM 4250 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4251 p.From.Type = obj.TYPE_ADDR 4252 4253 case Zm_o: 4254 ab.Put1(byte(op)) 4255 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4256 4257 case Zr_m: 4258 ab.Put1(byte(op)) 4259 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4260 4261 case Zvex: 4262 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4263 4264 case Zvex_rm_v_r: 4265 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4266 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4267 4268 case Zvex_rm_v_ro: 4269 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4270 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4271 4272 case Zvex_i_rm_vo: 4273 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4274 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) 4275 ab.Put1(byte(p.From.Offset)) 4276 4277 case Zvex_i_r_v: 4278 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4279 regnum := byte(0x7) 4280 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { 4281 regnum &= byte(p.GetFrom3().Reg - REG_X0) 4282 } else { 4283 regnum &= byte(p.GetFrom3().Reg - REG_Y0) 4284 } 4285 ab.Put1(o.op[z+2] | regnum) 4286 ab.Put1(byte(p.From.Offset)) 4287 4288 case Zvex_i_rm_v_r: 4289 imm, from, from3, to := unpackOps4(p) 4290 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4291 ab.asmand(ctxt, cursym, p, from, to) 4292 ab.Put1(byte(imm.Offset)) 4293 4294 case Zvex_i_rm_r: 4295 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) 4296 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4297 ab.Put1(byte(p.From.Offset)) 4298 4299 case Zvex_v_rm_r: 4300 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) 4301 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4302 4303 case Zvex_r_v_rm: 4304 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) 4305 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4306 4307 case Zvex_rm_r_vo: 4308 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) 4309 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4310 4311 case Zvex_i_r_rm: 4312 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) 4313 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4314 ab.Put1(byte(p.From.Offset)) 4315 4316 case Zvex_hr_rm_v_r: 4317 hr, from, from3, to := unpackOps4(p) 4318 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4319 ab.asmand(ctxt, cursym, p, from, to) 4320 ab.Put1(byte(regIndex(hr.Reg) << 4)) 4321 4322 case Zevex_k_rmo: 4323 ab.evex = newEVEXBits(z, &o.op) 4324 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) 4325 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) 4326 4327 case Zevex_i_rm_vo: 4328 ab.evex = newEVEXBits(z, &o.op) 4329 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) 4330 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) 4331 ab.Put1(byte(p.From.Offset)) 4332 4333 case Zevex_i_rm_k_vo: 4334 imm, from, kmask, to := unpackOps4(p) 4335 ab.evex = newEVEXBits(z, &o.op) 4336 ab.asmevex(ctxt, p, from, to, nil, kmask) 4337 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) 4338 ab.Put1(byte(imm.Offset)) 4339 4340 case Zevex_i_r_rm: 4341 ab.evex = newEVEXBits(z, &o.op) 4342 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) 4343 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4344 ab.Put1(byte(p.From.Offset)) 4345 4346 case Zevex_i_r_k_rm: 4347 imm, from, kmask, to := unpackOps4(p) 4348 ab.evex = newEVEXBits(z, &o.op) 4349 ab.asmevex(ctxt, p, to, nil, from, kmask) 4350 ab.asmand(ctxt, cursym, p, to, from) 4351 ab.Put1(byte(imm.Offset)) 4352 4353 case Zevex_i_rm_r: 4354 ab.evex = newEVEXBits(z, &o.op) 4355 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) 4356 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4357 ab.Put1(byte(p.From.Offset)) 4358 4359 case Zevex_i_rm_k_r: 4360 imm, from, kmask, to := unpackOps4(p) 4361 ab.evex = newEVEXBits(z, &o.op) 4362 ab.asmevex(ctxt, p, from, nil, to, kmask) 4363 ab.asmand(ctxt, cursym, p, from, to) 4364 ab.Put1(byte(imm.Offset)) 4365 4366 case Zevex_i_rm_v_r: 4367 imm, from, from3, to := unpackOps4(p) 4368 ab.evex = newEVEXBits(z, &o.op) 4369 ab.asmevex(ctxt, p, from, from3, to, nil) 4370 ab.asmand(ctxt, cursym, p, from, to) 4371 ab.Put1(byte(imm.Offset)) 4372 4373 case Zevex_i_rm_v_k_r: 4374 imm, from, from3, kmask, to := unpackOps5(p) 4375 ab.evex = newEVEXBits(z, &o.op) 4376 ab.asmevex(ctxt, p, from, from3, to, kmask) 4377 ab.asmand(ctxt, cursym, p, from, to) 4378 ab.Put1(byte(imm.Offset)) 4379 4380 case Zevex_r_v_rm: 4381 ab.evex = newEVEXBits(z, &o.op) 4382 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) 4383 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4384 4385 case Zevex_rm_v_r: 4386 ab.evex = newEVEXBits(z, &o.op) 4387 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) 4388 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4389 4390 case Zevex_rm_k_r: 4391 ab.evex = newEVEXBits(z, &o.op) 4392 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) 4393 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4394 4395 case Zevex_r_k_rm: 4396 ab.evex = newEVEXBits(z, &o.op) 4397 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) 4398 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4399 4400 case Zevex_rm_v_k_r: 4401 from, from3, kmask, to := unpackOps4(p) 4402 ab.evex = newEVEXBits(z, &o.op) 4403 ab.asmevex(ctxt, p, from, from3, to, kmask) 4404 ab.asmand(ctxt, cursym, p, from, to) 4405 4406 case Zevex_r_v_k_rm: 4407 from, from3, kmask, to := unpackOps4(p) 4408 ab.evex = newEVEXBits(z, &o.op) 4409 ab.asmevex(ctxt, p, to, from3, from, kmask) 4410 ab.asmand(ctxt, cursym, p, to, from) 4411 4412 case Zr_m_xm: 4413 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4414 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4415 4416 case Zr_m_xm_nr: 4417 ab.rexflag = 0 4418 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4419 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4420 4421 case Zo_m: 4422 ab.Put1(byte(op)) 4423 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4424 4425 case Zcallindreg: 4426 r = obj.Addrel(cursym) 4427 r.Off = int32(p.Pc) 4428 r.Type = objabi.R_CALLIND 4429 r.Siz = 0 4430 fallthrough 4431 4432 case Zo_m64: 4433 ab.Put1(byte(op)) 4434 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) 4435 4436 case Zm_ibo: 4437 ab.Put1(byte(op)) 4438 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4439 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) 4440 4441 case Zibo_m: 4442 ab.Put1(byte(op)) 4443 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4444 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4445 4446 case Zibo_m_xm: 4447 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4448 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4449 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4450 4451 case Z_ib, Zib_: 4452 if yt.zcase == Zib_ { 4453 a = &p.From 4454 } else { 4455 a = &p.To 4456 } 4457 ab.Put1(byte(op)) 4458 if p.As == AXABORT { 4459 ab.Put1(o.op[z+1]) 4460 } 4461 ab.Put1(byte(vaddr(ctxt, p, a, nil))) 4462 4463 case Zib_rp: 4464 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4465 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) 4466 4467 case Zil_rp: 4468 ab.rexflag |= regrex[p.To.Reg] & Rxb 4469 ab.Put1(byte(op + reg[p.To.Reg])) 4470 if o.prefix == Pe { 4471 v = vaddr(ctxt, p, &p.From, nil) 4472 ab.PutInt16(int16(v)) 4473 } else { 4474 ab.relput4(ctxt, cursym, p, &p.From) 4475 } 4476 4477 case Zo_iw: 4478 ab.Put1(byte(op)) 4479 if p.From.Type != obj.TYPE_NONE { 4480 v = vaddr(ctxt, p, &p.From, nil) 4481 ab.PutInt16(int16(v)) 4482 } 4483 4484 case Ziq_rp: 4485 v = vaddr(ctxt, p, &p.From, &rel) 4486 l = int(v >> 32) 4487 if l == 0 && rel.Siz != 8 { 4488 ab.rexflag &^= (0x40 | Rxw) 4489 4490 ab.rexflag |= regrex[p.To.Reg] & Rxb 4491 ab.Put1(byte(0xb8 + reg[p.To.Reg])) 4492 if rel.Type != 0 { 4493 r = obj.Addrel(cursym) 4494 *r = rel 4495 r.Off = int32(p.Pc + int64(ab.Len())) 4496 } 4497 4498 ab.PutInt32(int32(v)) 4499 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend 4500 ab.Put1(0xc7) 4501 ab.asmando(ctxt, cursym, p, &p.To, 0) 4502 4503 ab.PutInt32(int32(v)) // need all 8 4504 } else { 4505 ab.rexflag |= regrex[p.To.Reg] & Rxb 4506 ab.Put1(byte(op + reg[p.To.Reg])) 4507 if rel.Type != 0 { 4508 r = obj.Addrel(cursym) 4509 *r = rel 4510 r.Off = int32(p.Pc + int64(ab.Len())) 4511 } 4512 4513 ab.PutInt64(v) 4514 } 4515 4516 case Zib_rr: 4517 ab.Put1(byte(op)) 4518 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4519 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4520 4521 case Z_il, Zil_: 4522 if yt.zcase == Zil_ { 4523 a = &p.From 4524 } else { 4525 a = &p.To 4526 } 4527 ab.Put1(byte(op)) 4528 if o.prefix == Pe { 4529 v = vaddr(ctxt, p, a, nil) 4530 ab.PutInt16(int16(v)) 4531 } else { 4532 ab.relput4(ctxt, cursym, p, a) 4533 } 4534 4535 case Zm_ilo, Zilo_m: 4536 ab.Put1(byte(op)) 4537 if yt.zcase == Zilo_m { 4538 a = &p.From 4539 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4540 } else { 4541 a = &p.To 4542 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4543 } 4544 4545 if o.prefix == Pe { 4546 v = vaddr(ctxt, p, a, nil) 4547 ab.PutInt16(int16(v)) 4548 } else { 4549 ab.relput4(ctxt, cursym, p, a) 4550 } 4551 4552 case Zil_rr: 4553 ab.Put1(byte(op)) 4554 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4555 if o.prefix == Pe { 4556 v = vaddr(ctxt, p, &p.From, nil) 4557 ab.PutInt16(int16(v)) 4558 } else { 4559 ab.relput4(ctxt, cursym, p, &p.From) 4560 } 4561 4562 case Z_rp: 4563 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4564 ab.Put1(byte(op + reg[p.To.Reg])) 4565 4566 case Zrp_: 4567 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) 4568 ab.Put1(byte(op + reg[p.From.Reg])) 4569 4570 case Zcallcon, Zjmpcon: 4571 if yt.zcase == Zcallcon { 4572 ab.Put1(byte(op)) 4573 } else { 4574 ab.Put1(o.op[z+1]) 4575 } 4576 r = obj.Addrel(cursym) 4577 r.Off = int32(p.Pc + int64(ab.Len())) 4578 r.Type = objabi.R_PCREL 4579 r.Siz = 4 4580 r.Add = p.To.Offset 4581 ab.PutInt32(0) 4582 4583 case Zcallind: 4584 ab.Put2(byte(op), o.op[z+1]) 4585 r = obj.Addrel(cursym) 4586 r.Off = int32(p.Pc + int64(ab.Len())) 4587 if ctxt.Arch.Family == sys.AMD64 { 4588 r.Type = objabi.R_PCREL 4589 } else { 4590 r.Type = objabi.R_ADDR 4591 } 4592 r.Siz = 4 4593 r.Add = p.To.Offset 4594 r.Sym = p.To.Sym 4595 ab.PutInt32(0) 4596 4597 case Zcall, Zcallduff: 4598 if p.To.Sym == nil { 4599 ctxt.Diag("call without target") 4600 ctxt.DiagFlush() 4601 log.Fatalf("bad code") 4602 } 4603 4604 if yt.zcase == Zcallduff && ctxt.Flag_dynlink { 4605 ctxt.Diag("directly calling duff when dynamically linking Go") 4606 } 4607 4608 if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4609 // Maintain BP around call, since duffcopy/duffzero can't do it 4610 // (the call jumps into the middle of the function). 4611 // This makes it possible to see call sites for duffcopy/duffzero in 4612 // BP-based profiling tools like Linux perf (which is the 4613 // whole point of obj.Framepointer_enabled). 4614 // MOVQ BP, -16(SP) 4615 // LEAQ -16(SP), BP 4616 ab.Put(bpduff1) 4617 } 4618 ab.Put1(byte(op)) 4619 r = obj.Addrel(cursym) 4620 r.Off = int32(p.Pc + int64(ab.Len())) 4621 r.Sym = p.To.Sym 4622 r.Add = p.To.Offset 4623 r.Type = objabi.R_CALL 4624 r.Siz = 4 4625 ab.PutInt32(0) 4626 4627 if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4628 // Pop BP pushed above. 4629 // MOVQ 0(BP), BP 4630 ab.Put(bpduff2) 4631 } 4632 4633 // TODO: jump across functions needs reloc 4634 case Zbr, Zjmp, Zloop: 4635 if p.As == AXBEGIN { 4636 ab.Put1(byte(op)) 4637 } 4638 if p.To.Sym != nil { 4639 if yt.zcase != Zjmp { 4640 ctxt.Diag("branch to ATEXT") 4641 ctxt.DiagFlush() 4642 log.Fatalf("bad code") 4643 } 4644 4645 ab.Put1(o.op[z+1]) 4646 r = obj.Addrel(cursym) 4647 r.Off = int32(p.Pc + int64(ab.Len())) 4648 r.Sym = p.To.Sym 4649 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that 4650 // it can point to a trampoline instead of the destination itself. 4651 r.Type = objabi.R_CALL 4652 r.Siz = 4 4653 ab.PutInt32(0) 4654 break 4655 } 4656 4657 // Assumes q is in this function. 4658 // TODO: Check in input, preserve in brchain. 4659 4660 // Fill in backward jump now. 4661 q = p.Pcond 4662 4663 if q == nil { 4664 ctxt.Diag("jmp/branch/loop without target") 4665 ctxt.DiagFlush() 4666 log.Fatalf("bad code") 4667 } 4668 4669 if p.Back&branchBackwards != 0 { 4670 v = q.Pc - (p.Pc + 2) 4671 if v >= -128 && p.As != AXBEGIN { 4672 if p.As == AJCXZL { 4673 ab.Put1(0x67) 4674 } 4675 ab.Put2(byte(op), byte(v)) 4676 } else if yt.zcase == Zloop { 4677 ctxt.Diag("loop too far: %v", p) 4678 } else { 4679 v -= 5 - 2 4680 if p.As == AXBEGIN { 4681 v-- 4682 } 4683 if yt.zcase == Zbr { 4684 ab.Put1(0x0f) 4685 v-- 4686 } 4687 4688 ab.Put1(o.op[z+1]) 4689 ab.PutInt32(int32(v)) 4690 } 4691 4692 break 4693 } 4694 4695 // Annotate target; will fill in later. 4696 p.Forwd = q.Rel 4697 4698 q.Rel = p 4699 if p.Back&branchShort != 0 && p.As != AXBEGIN { 4700 if p.As == AJCXZL { 4701 ab.Put1(0x67) 4702 } 4703 ab.Put2(byte(op), 0) 4704 } else if yt.zcase == Zloop { 4705 ctxt.Diag("loop too far: %v", p) 4706 } else { 4707 if yt.zcase == Zbr { 4708 ab.Put1(0x0f) 4709 } 4710 ab.Put1(o.op[z+1]) 4711 ab.PutInt32(0) 4712 } 4713 4714 case Zbyte: 4715 v = vaddr(ctxt, p, &p.From, &rel) 4716 if rel.Siz != 0 { 4717 rel.Siz = uint8(op) 4718 r = obj.Addrel(cursym) 4719 *r = rel 4720 r.Off = int32(p.Pc + int64(ab.Len())) 4721 } 4722 4723 ab.Put1(byte(v)) 4724 if op > 1 { 4725 ab.Put1(byte(v >> 8)) 4726 if op > 2 { 4727 ab.PutInt16(int16(v >> 16)) 4728 if op > 4 { 4729 ab.PutInt32(int32(v >> 32)) 4730 } 4731 } 4732 } 4733 } 4734 4735 return 4736 } 4737 } 4738 f3t = Ynone * Ymax 4739 if p.GetFrom3() != nil { 4740 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax 4741 } 4742 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { 4743 var pp obj.Prog 4744 var t []byte 4745 if p.As == mo[0].as { 4746 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { 4747 t = mo[0].op[:] 4748 switch mo[0].code { 4749 default: 4750 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) 4751 4752 case movLit: 4753 for z = 0; t[z] != 0; z++ { 4754 ab.Put1(t[z]) 4755 } 4756 4757 case movRegMem: 4758 ab.Put1(t[0]) 4759 ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) 4760 4761 case movMemReg: 4762 ab.Put1(t[0]) 4763 ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) 4764 4765 case movRegMem2op: // r,m - 2op 4766 ab.Put2(t[0], t[1]) 4767 ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) 4768 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) 4769 4770 case movMemReg2op: 4771 ab.Put2(t[0], t[1]) 4772 ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) 4773 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) 4774 4775 case movFullPtr: 4776 if t[0] != 0 { 4777 ab.Put1(t[0]) 4778 } 4779 switch p.To.Index { 4780 default: 4781 goto bad 4782 4783 case REG_DS: 4784 ab.Put1(0xc5) 4785 4786 case REG_SS: 4787 ab.Put2(0x0f, 0xb2) 4788 4789 case REG_ES: 4790 ab.Put1(0xc4) 4791 4792 case REG_FS: 4793 ab.Put2(0x0f, 0xb4) 4794 4795 case REG_GS: 4796 ab.Put2(0x0f, 0xb5) 4797 } 4798 4799 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4800 4801 case movDoubleShift: 4802 if t[0] == Pw { 4803 if ctxt.Arch.Family != sys.AMD64 { 4804 ctxt.Diag("asmins: illegal 64: %v", p) 4805 } 4806 ab.rexflag |= Pw 4807 t = t[1:] 4808 } else if t[0] == Pe { 4809 ab.Put1(Pe) 4810 t = t[1:] 4811 } 4812 4813 switch p.From.Type { 4814 default: 4815 goto bad 4816 4817 case obj.TYPE_CONST: 4818 ab.Put2(0x0f, t[0]) 4819 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 4820 ab.Put1(byte(p.From.Offset)) 4821 4822 case obj.TYPE_REG: 4823 switch p.From.Reg { 4824 default: 4825 goto bad 4826 4827 case REG_CL, REG_CX: 4828 ab.Put2(0x0f, t[1]) 4829 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 4830 } 4831 } 4832 4833 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 4834 // where you load the TLS base register into a register and then index off that 4835 // register to access the actual TLS variables. Systems that allow direct TLS access 4836 // are handled in prefixof above and should not be listed here. 4837 case movTLSReg: 4838 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { 4839 ctxt.Diag("invalid load of TLS: %v", p) 4840 } 4841 4842 if ctxt.Arch.Family == sys.I386 { 4843 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 4844 // where you load the TLS base register into a register and then index off that 4845 // register to access the actual TLS variables. Systems that allow direct TLS access 4846 // are handled in prefixof above and should not be listed here. 4847 switch ctxt.Headtype { 4848 default: 4849 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 4850 4851 case objabi.Hlinux, objabi.Hfreebsd: 4852 if ctxt.Flag_shared { 4853 // Note that this is not generating the same insns as the other cases. 4854 // MOV TLS, dst 4855 // becomes 4856 // call __x86.get_pc_thunk.dst 4857 // movl (gotpc + g@gotntpoff)(dst), dst 4858 // which is encoded as 4859 // call __x86.get_pc_thunk.dst 4860 // movq 0(dst), dst 4861 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access 4862 // is g, which we can't check here, but will when we assemble the second 4863 // instruction. 4864 dst := p.To.Reg 4865 ab.Put1(0xe8) 4866 r = obj.Addrel(cursym) 4867 r.Off = int32(p.Pc + int64(ab.Len())) 4868 r.Type = objabi.R_CALL 4869 r.Siz = 4 4870 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) 4871 ab.PutInt32(0) 4872 4873 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) 4874 r = obj.Addrel(cursym) 4875 r.Off = int32(p.Pc + int64(ab.Len())) 4876 r.Type = objabi.R_TLS_IE 4877 r.Siz = 4 4878 r.Add = 2 4879 ab.PutInt32(0) 4880 } else { 4881 // ELF TLS base is 0(GS). 4882 pp.From = p.From 4883 4884 pp.From.Type = obj.TYPE_MEM 4885 pp.From.Reg = REG_GS 4886 pp.From.Offset = 0 4887 pp.From.Index = REG_NONE 4888 pp.From.Scale = 0 4889 ab.Put2(0x65, // GS 4890 0x8B) 4891 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 4892 } 4893 case objabi.Hplan9: 4894 pp.From = obj.Addr{} 4895 pp.From.Type = obj.TYPE_MEM 4896 pp.From.Name = obj.NAME_EXTERN 4897 pp.From.Sym = plan9privates 4898 pp.From.Offset = 0 4899 pp.From.Index = REG_NONE 4900 ab.Put1(0x8B) 4901 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 4902 4903 case objabi.Hwindows: 4904 // Windows TLS base is always 0x14(FS). 4905 pp.From = p.From 4906 4907 pp.From.Type = obj.TYPE_MEM 4908 pp.From.Reg = REG_FS 4909 pp.From.Offset = 0x14 4910 pp.From.Index = REG_NONE 4911 pp.From.Scale = 0 4912 ab.Put2(0x64, // FS 4913 0x8B) 4914 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 4915 } 4916 break 4917 } 4918 4919 switch ctxt.Headtype { 4920 default: 4921 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 4922 4923 case objabi.Hlinux, objabi.Hfreebsd: 4924 if !ctxt.Flag_shared { 4925 log.Fatalf("unknown TLS base location for linux/freebsd without -shared") 4926 } 4927 // Note that this is not generating the same insn as the other cases. 4928 // MOV TLS, R_to 4929 // becomes 4930 // movq g@gottpoff(%rip), R_to 4931 // which is encoded as 4932 // movq 0(%rip), R_to 4933 // and a R_TLS_IE reloc. This all assumes the only tls variable we access 4934 // is g, which we can't check here, but will when we assemble the second 4935 // instruction. 4936 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) 4937 4938 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) 4939 r = obj.Addrel(cursym) 4940 r.Off = int32(p.Pc + int64(ab.Len())) 4941 r.Type = objabi.R_TLS_IE 4942 r.Siz = 4 4943 r.Add = -4 4944 ab.PutInt32(0) 4945 4946 case objabi.Hplan9: 4947 pp.From = obj.Addr{} 4948 pp.From.Type = obj.TYPE_MEM 4949 pp.From.Name = obj.NAME_EXTERN 4950 pp.From.Sym = plan9privates 4951 pp.From.Offset = 0 4952 pp.From.Index = REG_NONE 4953 ab.rexflag |= Pw 4954 ab.Put1(0x8B) 4955 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 4956 4957 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. 4958 // TLS base is 0(FS). 4959 pp.From = p.From 4960 4961 pp.From.Type = obj.TYPE_MEM 4962 pp.From.Name = obj.NAME_NONE 4963 pp.From.Reg = REG_NONE 4964 pp.From.Offset = 0 4965 pp.From.Index = REG_NONE 4966 pp.From.Scale = 0 4967 ab.rexflag |= Pw 4968 ab.Put2(0x64, // FS 4969 0x8B) 4970 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 4971 4972 case objabi.Hwindows: 4973 // Windows TLS base is always 0x28(GS). 4974 pp.From = p.From 4975 4976 pp.From.Type = obj.TYPE_MEM 4977 pp.From.Name = obj.NAME_NONE 4978 pp.From.Reg = REG_GS 4979 pp.From.Offset = 0x28 4980 pp.From.Index = REG_NONE 4981 pp.From.Scale = 0 4982 ab.rexflag |= Pw 4983 ab.Put2(0x65, // GS 4984 0x8B) 4985 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 4986 } 4987 } 4988 return 4989 } 4990 } 4991 } 4992 goto bad 4993 4994 bad: 4995 if ctxt.Arch.Family != sys.AMD64 { 4996 // here, the assembly has failed. 4997 // if it's a byte instruction that has 4998 // unaddressable registers, try to 4999 // exchange registers and reissue the 5000 // instruction with the operands renamed. 5001 pp := *p 5002 5003 unbytereg(&pp.From, &pp.Ft) 5004 unbytereg(&pp.To, &pp.Tt) 5005 5006 z := int(p.From.Reg) 5007 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5008 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5009 // For now, different to keep bit-for-bit compatibility. 5010 if ctxt.Arch.Family == sys.I386 { 5011 breg := byteswapreg(ctxt, &p.To) 5012 if breg != REG_AX { 5013 ab.Put1(0x87) // xchg lhs,bx 5014 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5015 subreg(&pp, z, breg) 5016 ab.doasm(ctxt, cursym, &pp) 5017 ab.Put1(0x87) // xchg lhs,bx 5018 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5019 } else { 5020 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5021 subreg(&pp, z, REG_AX) 5022 ab.doasm(ctxt, cursym, &pp) 5023 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5024 } 5025 return 5026 } 5027 5028 if isax(&p.To) || p.To.Type == obj.TYPE_NONE { 5029 // We certainly don't want to exchange 5030 // with AX if the op is MUL or DIV. 5031 ab.Put1(0x87) // xchg lhs,bx 5032 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5033 subreg(&pp, z, REG_BX) 5034 ab.doasm(ctxt, cursym, &pp) 5035 ab.Put1(0x87) // xchg lhs,bx 5036 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5037 } else { 5038 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5039 subreg(&pp, z, REG_AX) 5040 ab.doasm(ctxt, cursym, &pp) 5041 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5042 } 5043 return 5044 } 5045 5046 z = int(p.To.Reg) 5047 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5048 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5049 // For now, different to keep bit-for-bit compatibility. 5050 if ctxt.Arch.Family == sys.I386 { 5051 breg := byteswapreg(ctxt, &p.From) 5052 if breg != REG_AX { 5053 ab.Put1(0x87) //xchg rhs,bx 5054 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5055 subreg(&pp, z, breg) 5056 ab.doasm(ctxt, cursym, &pp) 5057 ab.Put1(0x87) // xchg rhs,bx 5058 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5059 } else { 5060 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5061 subreg(&pp, z, REG_AX) 5062 ab.doasm(ctxt, cursym, &pp) 5063 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5064 } 5065 return 5066 } 5067 5068 if isax(&p.From) { 5069 ab.Put1(0x87) // xchg rhs,bx 5070 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5071 subreg(&pp, z, REG_BX) 5072 ab.doasm(ctxt, cursym, &pp) 5073 ab.Put1(0x87) // xchg rhs,bx 5074 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5075 } else { 5076 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5077 subreg(&pp, z, REG_AX) 5078 ab.doasm(ctxt, cursym, &pp) 5079 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5080 } 5081 return 5082 } 5083 } 5084 5085 ctxt.Diag("invalid instruction: %v", p) 5086 } 5087 5088 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX) 5089 // which is not referenced in a. 5090 // If a is empty, it returns BX to account for MULB-like instructions 5091 // that might use DX and AX. 5092 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { 5093 cana, canb, canc, cand := true, true, true, true 5094 if a.Type == obj.TYPE_NONE { 5095 cana, cand = false, false 5096 } 5097 5098 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { 5099 switch a.Reg { 5100 case REG_NONE: 5101 cana, cand = false, false 5102 case REG_AX, REG_AL, REG_AH: 5103 cana = false 5104 case REG_BX, REG_BL, REG_BH: 5105 canb = false 5106 case REG_CX, REG_CL, REG_CH: 5107 canc = false 5108 case REG_DX, REG_DL, REG_DH: 5109 cand = false 5110 } 5111 } 5112 5113 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { 5114 switch a.Index { 5115 case REG_AX: 5116 cana = false 5117 case REG_BX: 5118 canb = false 5119 case REG_CX: 5120 canc = false 5121 case REG_DX: 5122 cand = false 5123 } 5124 } 5125 5126 switch { 5127 case cana: 5128 return REG_AX 5129 case canb: 5130 return REG_BX 5131 case canc: 5132 return REG_CX 5133 case cand: 5134 return REG_DX 5135 default: 5136 ctxt.Diag("impossible byte register") 5137 ctxt.DiagFlush() 5138 log.Fatalf("bad code") 5139 return 0 5140 } 5141 } 5142 5143 func isbadbyte(a *obj.Addr) bool { 5144 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) 5145 } 5146 5147 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 5148 ab.Reset() 5149 5150 ab.rexflag = 0 5151 ab.vexflag = false 5152 ab.evexflag = false 5153 mark := ab.Len() 5154 ab.doasm(ctxt, cursym, p) 5155 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5156 // as befits the whole approach of the architecture, 5157 // the rex prefix must appear before the first opcode byte 5158 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but 5159 // before the 0f opcode escape!), or it might be ignored. 5160 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. 5161 if ctxt.Arch.Family != sys.AMD64 { 5162 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) 5163 } 5164 n := ab.Len() 5165 var np int 5166 for np = mark; np < n; np++ { 5167 c := ab.At(np) 5168 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { 5169 break 5170 } 5171 } 5172 ab.Insert(np, byte(0x40|ab.rexflag)) 5173 } 5174 5175 n := ab.Len() 5176 for i := len(cursym.R) - 1; i >= 0; i-- { 5177 r := &cursym.R[i] 5178 if int64(r.Off) < p.Pc { 5179 break 5180 } 5181 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5182 r.Off++ 5183 } 5184 if r.Type == objabi.R_PCREL { 5185 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { 5186 // PC-relative addressing is relative to the end of the instruction, 5187 // but the relocations applied by the linker are relative to the end 5188 // of the relocation. Because immediate instruction 5189 // arguments can follow the PC-relative memory reference in the 5190 // instruction encoding, the two may not coincide. In this case, 5191 // adjust addend so that linker can keep relocating relative to the 5192 // end of the relocation. 5193 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) 5194 } else if ctxt.Arch.Family == sys.I386 { 5195 // On 386 PC-relative addressing (for non-call/jmp instructions) 5196 // assumes that the previous instruction loaded the PC of the end 5197 // of that instruction into CX, so the adjustment is relative to 5198 // that. 5199 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5200 } 5201 } 5202 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { 5203 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. 5204 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5205 } 5206 5207 } 5208 } 5209 5210 // unpackOps4 extracts 4 operands from p. 5211 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { 5212 return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To 5213 } 5214 5215 // unpackOps5 extracts 5 operands from p. 5216 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { 5217 return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To 5218 }