github.com/zxy12/go_duplicate_1_12@v0.0.0-20200217043740-b1636fc0368b/src/cmd/internal/obj/x86/asm6.go (about) 1 // Inferno utils/6l/span.c 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "cmd/internal/obj" 35 "cmd/internal/objabi" 36 "cmd/internal/sys" 37 "encoding/binary" 38 "fmt" 39 "log" 40 "strings" 41 ) 42 43 var ( 44 plan9privates *obj.LSym 45 deferreturn *obj.LSym 46 ) 47 48 // Instruction layout. 49 50 // Loop alignment constants: 51 // want to align loop entry to loopAlign-byte boundary, 52 // and willing to insert at most maxLoopPad bytes of NOP to do so. 53 // We define a loop entry as the target of a backward jump. 54 // 55 // gcc uses maxLoopPad = 10 for its 'generic x86-64' config, 56 // and it aligns all jump targets, not just backward jump targets. 57 // 58 // As of 6/1/2012, the effect of setting maxLoopPad = 10 here 59 // is very slight but negative, so the alignment is disabled by 60 // setting MaxLoopPad = 0. The code is here for reference and 61 // for future experiments. 62 // 63 const ( 64 loopAlign = 16 65 maxLoopPad = 0 66 ) 67 68 // Bit flags that are used to express jump target properties. 69 const ( 70 // branchBackwards marks targets that are located behind. 71 // Used to express jumps to loop headers. 72 branchBackwards = (1 << iota) 73 // branchShort marks branches those target is close, 74 // with offset is in -128..127 range. 75 branchShort 76 // branchLoopHead marks loop entry. 77 // Used to insert padding for misaligned loops. 78 branchLoopHead 79 ) 80 81 // opBytes holds optab encoding bytes. 82 // Each ytab reserves fixed amount of bytes in this array. 83 // 84 // The size should be the minimal number of bytes that 85 // are enough to hold biggest optab op lines. 86 type opBytes [31]uint8 87 88 type Optab struct { 89 as obj.As 90 ytab []ytab 91 prefix uint8 92 op opBytes 93 } 94 95 type Movtab struct { 96 as obj.As 97 ft uint8 98 f3t uint8 99 tt uint8 100 code uint8 101 op [4]uint8 102 } 103 104 const ( 105 Yxxx = iota 106 Ynone 107 Yi0 // $0 108 Yi1 // $1 109 Yu2 // $x, x fits in uint2 110 Yi8 // $x, x fits in int8 111 Yu8 // $x, x fits in uint8 112 Yu7 // $x, x in 0..127 (fits in both int8 and uint8) 113 Ys32 114 Yi32 115 Yi64 116 Yiauto 117 Yal 118 Ycl 119 Yax 120 Ycx 121 Yrb 122 Yrl 123 Yrl32 // Yrl on 32-bit system 124 Yrf 125 Yf0 126 Yrx 127 Ymb 128 Yml 129 Ym 130 Ybr 131 Ycs 132 Yss 133 Yds 134 Yes 135 Yfs 136 Ygs 137 Ygdtr 138 Yidtr 139 Yldtr 140 Ymsw 141 Ytask 142 Ycr0 143 Ycr1 144 Ycr2 145 Ycr3 146 Ycr4 147 Ycr5 148 Ycr6 149 Ycr7 150 Ycr8 151 Ydr0 152 Ydr1 153 Ydr2 154 Ydr3 155 Ydr4 156 Ydr5 157 Ydr6 158 Ydr7 159 Ytr0 160 Ytr1 161 Ytr2 162 Ytr3 163 Ytr4 164 Ytr5 165 Ytr6 166 Ytr7 167 Ymr 168 Ymm 169 Yxr0 // X0 only. "<XMM0>" notation in Intel manual. 170 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex 171 Yxr // X0..X15 172 YxrEvex // X0..X31 173 Yxm 174 YxmEvex // YxrEvex+Ym 175 Yxvm // VSIB vector array; vm32x/vm64x 176 YxvmEvex // Yxvm which permits High-16 X register as index. 177 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex 178 Yyr // Y0..Y15 179 YyrEvex // Y0..Y31 180 Yym 181 YymEvex // YyrEvex+Ym 182 Yyvm // VSIB vector array; vm32y/vm64y 183 YyvmEvex // Yyvm which permits High-16 Y register as index. 184 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex 185 Yzr // Z0..Z31 186 Yzm // Yzr+Ym 187 Yzvm // VSIB vector array; vm32z/vm64z 188 Yk0 // K0 189 Yknot0 // K1..K7; write mask 190 Yk // K0..K7; used for KOP 191 Ykm // Yk+Ym; used for KOP 192 Ytls 193 Ytextsize 194 Yindir 195 Ymax 196 ) 197 198 const ( 199 Zxxx = iota 200 Zlit 201 Zlitm_r 202 Zlitr_m 203 Zlit_m_r 204 Z_rp 205 Zbr 206 Zcall 207 Zcallcon 208 Zcallduff 209 Zcallind 210 Zcallindreg 211 Zib_ 212 Zib_rp 213 Zibo_m 214 Zibo_m_xm 215 Zil_ 216 Zil_rp 217 Ziq_rp 218 Zilo_m 219 Zjmp 220 Zjmpcon 221 Zloop 222 Zo_iw 223 Zm_o 224 Zm_r 225 Z_m_r 226 Zm2_r 227 Zm_r_xm 228 Zm_r_i_xm 229 Zm_r_xm_nr 230 Zr_m_xm_nr 231 Zibm_r // mmx1,mmx2/mem64,imm8 232 Zibr_m 233 Zmb_r 234 Zaut_r 235 Zo_m 236 Zo_m64 237 Zpseudo 238 Zr_m 239 Zr_m_xm 240 Zrp_ 241 Z_ib 242 Z_il 243 Zm_ibo 244 Zm_ilo 245 Zib_rr 246 Zil_rr 247 Zbyte 248 249 Zvex_rm_v_r 250 Zvex_rm_v_ro 251 Zvex_r_v_rm 252 Zvex_i_rm_vo 253 Zvex_v_rm_r 254 Zvex_i_rm_r 255 Zvex_i_r_v 256 Zvex_i_rm_v_r 257 Zvex 258 Zvex_rm_r_vo 259 Zvex_i_r_rm 260 Zvex_hr_rm_v_r 261 262 Zevex_first 263 Zevex_i_r_k_rm 264 Zevex_i_r_rm 265 Zevex_i_rm_k_r 266 Zevex_i_rm_k_vo 267 Zevex_i_rm_r 268 Zevex_i_rm_v_k_r 269 Zevex_i_rm_v_r 270 Zevex_i_rm_vo 271 Zevex_k_rmo 272 Zevex_r_k_rm 273 Zevex_r_v_k_rm 274 Zevex_r_v_rm 275 Zevex_rm_k_r 276 Zevex_rm_v_k_r 277 Zevex_rm_v_r 278 Zevex_last 279 280 Zmax 281 ) 282 283 const ( 284 Px = 0 285 Px1 = 1 // symbolic; exact value doesn't matter 286 P32 = 0x32 // 32-bit only 287 Pe = 0x66 // operand escape 288 Pm = 0x0f // 2byte opcode escape 289 Pq = 0xff // both escapes: 66 0f 290 Pb = 0xfe // byte operands 291 Pf2 = 0xf2 // xmm escape 1: f2 0f 292 Pf3 = 0xf3 // xmm escape 2: f3 0f 293 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f 294 Pq3 = 0x67 // xmm escape 3: 66 48 0f 295 Pq4 = 0x68 // xmm escape 4: 66 0F 38 296 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 297 Pq5 = 0x6a // xmm escape 5: F3 0F 38 298 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 299 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f 300 Pw = 0x48 // Rex.w 301 Pw8 = 0x90 // symbolic; exact value doesn't matter 302 Py = 0x80 // defaults to 64-bit mode 303 Py1 = 0x81 // symbolic; exact value doesn't matter 304 Py3 = 0x83 // symbolic; exact value doesn't matter 305 Pavx = 0x84 // symbolic: exact value doesn't matter 306 307 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R 308 Rxw = 1 << 3 // =1, 64-bit operand size 309 Rxr = 1 << 2 // extend modrm reg 310 Rxx = 1 << 1 // extend sib index 311 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg 312 ) 313 314 const ( 315 // Encoding for VEX prefix in tables. 316 // The P, L, and W fields are chosen to match 317 // their eventual locations in the VEX prefix bytes. 318 319 // Encoding for VEX prefix in tables. 320 // The P, L, and W fields are chosen to match 321 // their eventual locations in the VEX prefix bytes. 322 323 // Using spare bit to make leading [E]VEX encoding byte different from 324 // 0x0f even if all other VEX fields are 0. 325 avxEscape = 1 << 6 326 327 // P field - 2 bits 328 vex66 = 1 << 0 329 vexF3 = 2 << 0 330 vexF2 = 3 << 0 331 // L field - 1 bit 332 vexLZ = 0 << 2 333 vexLIG = 0 << 2 334 vex128 = 0 << 2 335 vex256 = 1 << 2 336 // W field - 1 bit 337 vexWIG = 0 << 7 338 vexW0 = 0 << 7 339 vexW1 = 1 << 7 340 // M field - 5 bits, but mostly reserved; we can store up to 3 341 vex0F = 1 << 3 342 vex0F38 = 2 << 3 343 vex0F3A = 3 << 3 344 ) 345 346 var ycover [Ymax * Ymax]uint8 347 348 var reg [MAXREG]int 349 350 var regrex [MAXREG + 1]int 351 352 var ynone = []ytab{ 353 {Zlit, 1, argList{}}, 354 } 355 356 var ytext = []ytab{ 357 {Zpseudo, 0, argList{Ymb, Ytextsize}}, 358 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, 359 } 360 361 var ynop = []ytab{ 362 {Zpseudo, 0, argList{}}, 363 {Zpseudo, 0, argList{Yiauto}}, 364 {Zpseudo, 0, argList{Yml}}, 365 {Zpseudo, 0, argList{Yrf}}, 366 {Zpseudo, 0, argList{Yxr}}, 367 {Zpseudo, 0, argList{Yiauto}}, 368 {Zpseudo, 0, argList{Yml}}, 369 {Zpseudo, 0, argList{Yrf}}, 370 {Zpseudo, 1, argList{Yxr}}, 371 } 372 373 var yfuncdata = []ytab{ 374 {Zpseudo, 0, argList{Yi32, Ym}}, 375 } 376 377 var ypcdata = []ytab{ 378 {Zpseudo, 0, argList{Yi32, Yi32}}, 379 } 380 381 var yxorb = []ytab{ 382 {Zib_, 1, argList{Yi32, Yal}}, 383 {Zibo_m, 2, argList{Yi32, Ymb}}, 384 {Zr_m, 1, argList{Yrb, Ymb}}, 385 {Zm_r, 1, argList{Ymb, Yrb}}, 386 } 387 388 var yaddl = []ytab{ 389 {Zibo_m, 2, argList{Yi8, Yml}}, 390 {Zil_, 1, argList{Yi32, Yax}}, 391 {Zilo_m, 2, argList{Yi32, Yml}}, 392 {Zr_m, 1, argList{Yrl, Yml}}, 393 {Zm_r, 1, argList{Yml, Yrl}}, 394 } 395 396 var yincl = []ytab{ 397 {Z_rp, 1, argList{Yrl}}, 398 {Zo_m, 2, argList{Yml}}, 399 } 400 401 var yincq = []ytab{ 402 {Zo_m, 2, argList{Yml}}, 403 } 404 405 var ycmpb = []ytab{ 406 {Z_ib, 1, argList{Yal, Yi32}}, 407 {Zm_ibo, 2, argList{Ymb, Yi32}}, 408 {Zm_r, 1, argList{Ymb, Yrb}}, 409 {Zr_m, 1, argList{Yrb, Ymb}}, 410 } 411 412 var ycmpl = []ytab{ 413 {Zm_ibo, 2, argList{Yml, Yi8}}, 414 {Z_il, 1, argList{Yax, Yi32}}, 415 {Zm_ilo, 2, argList{Yml, Yi32}}, 416 {Zm_r, 1, argList{Yml, Yrl}}, 417 {Zr_m, 1, argList{Yrl, Yml}}, 418 } 419 420 var yshb = []ytab{ 421 {Zo_m, 2, argList{Yi1, Ymb}}, 422 {Zibo_m, 2, argList{Yu8, Ymb}}, 423 {Zo_m, 2, argList{Ycx, Ymb}}, 424 } 425 426 var yshl = []ytab{ 427 {Zo_m, 2, argList{Yi1, Yml}}, 428 {Zibo_m, 2, argList{Yu8, Yml}}, 429 {Zo_m, 2, argList{Ycl, Yml}}, 430 {Zo_m, 2, argList{Ycx, Yml}}, 431 } 432 433 var ytestl = []ytab{ 434 {Zil_, 1, argList{Yi32, Yax}}, 435 {Zilo_m, 2, argList{Yi32, Yml}}, 436 {Zr_m, 1, argList{Yrl, Yml}}, 437 {Zm_r, 1, argList{Yml, Yrl}}, 438 } 439 440 var ymovb = []ytab{ 441 {Zr_m, 1, argList{Yrb, Ymb}}, 442 {Zm_r, 1, argList{Ymb, Yrb}}, 443 {Zib_rp, 1, argList{Yi32, Yrb}}, 444 {Zibo_m, 2, argList{Yi32, Ymb}}, 445 } 446 447 var ybtl = []ytab{ 448 {Zibo_m, 2, argList{Yi8, Yml}}, 449 {Zr_m, 1, argList{Yrl, Yml}}, 450 } 451 452 var ymovw = []ytab{ 453 {Zr_m, 1, argList{Yrl, Yml}}, 454 {Zm_r, 1, argList{Yml, Yrl}}, 455 {Zil_rp, 1, argList{Yi32, Yrl}}, 456 {Zilo_m, 2, argList{Yi32, Yml}}, 457 {Zaut_r, 2, argList{Yiauto, Yrl}}, 458 } 459 460 var ymovl = []ytab{ 461 {Zr_m, 1, argList{Yrl, Yml}}, 462 {Zm_r, 1, argList{Yml, Yrl}}, 463 {Zil_rp, 1, argList{Yi32, Yrl}}, 464 {Zilo_m, 2, argList{Yi32, Yml}}, 465 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD 466 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD 467 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) 468 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) 469 {Zaut_r, 2, argList{Yiauto, Yrl}}, 470 } 471 472 var yret = []ytab{ 473 {Zo_iw, 1, argList{}}, 474 {Zo_iw, 1, argList{Yi32}}, 475 } 476 477 var ymovq = []ytab{ 478 // valid in 32-bit mode 479 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) 480 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ 481 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q 482 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 483 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 484 485 // valid only in 64-bit mode, usually with 64-bit prefix 486 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 487 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b 488 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) 489 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate 490 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) 491 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD 492 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD 493 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load 494 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store 495 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ 496 } 497 498 var ymovbe = []ytab{ 499 {Zlitm_r, 3, argList{Ym, Yrl}}, 500 {Zlitr_m, 3, argList{Yrl, Ym}}, 501 } 502 503 var ym_rl = []ytab{ 504 {Zm_r, 1, argList{Ym, Yrl}}, 505 } 506 507 var yrl_m = []ytab{ 508 {Zr_m, 1, argList{Yrl, Ym}}, 509 } 510 511 var ymb_rl = []ytab{ 512 {Zmb_r, 1, argList{Ymb, Yrl}}, 513 } 514 515 var yml_rl = []ytab{ 516 {Zm_r, 1, argList{Yml, Yrl}}, 517 } 518 519 var yrl_ml = []ytab{ 520 {Zr_m, 1, argList{Yrl, Yml}}, 521 } 522 523 var yml_mb = []ytab{ 524 {Zr_m, 1, argList{Yrb, Ymb}}, 525 {Zm_r, 1, argList{Ymb, Yrb}}, 526 } 527 528 var yrb_mb = []ytab{ 529 {Zr_m, 1, argList{Yrb, Ymb}}, 530 } 531 532 var yxchg = []ytab{ 533 {Z_rp, 1, argList{Yax, Yrl}}, 534 {Zrp_, 1, argList{Yrl, Yax}}, 535 {Zr_m, 1, argList{Yrl, Yml}}, 536 {Zm_r, 1, argList{Yml, Yrl}}, 537 } 538 539 var ydivl = []ytab{ 540 {Zm_o, 2, argList{Yml}}, 541 } 542 543 var ydivb = []ytab{ 544 {Zm_o, 2, argList{Ymb}}, 545 } 546 547 var yimul = []ytab{ 548 {Zm_o, 2, argList{Yml}}, 549 {Zib_rr, 1, argList{Yi8, Yrl}}, 550 {Zil_rr, 1, argList{Yi32, Yrl}}, 551 {Zm_r, 2, argList{Yml, Yrl}}, 552 } 553 554 var yimul3 = []ytab{ 555 {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, 556 {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, 557 } 558 559 var ybyte = []ytab{ 560 {Zbyte, 1, argList{Yi64}}, 561 } 562 563 var yin = []ytab{ 564 {Zib_, 1, argList{Yi32}}, 565 {Zlit, 1, argList{}}, 566 } 567 568 var yint = []ytab{ 569 {Zib_, 1, argList{Yi32}}, 570 } 571 572 var ypushl = []ytab{ 573 {Zrp_, 1, argList{Yrl}}, 574 {Zm_o, 2, argList{Ym}}, 575 {Zib_, 1, argList{Yi8}}, 576 {Zil_, 1, argList{Yi32}}, 577 } 578 579 var ypopl = []ytab{ 580 {Z_rp, 1, argList{Yrl}}, 581 {Zo_m, 2, argList{Ym}}, 582 } 583 584 var ywrfsbase = []ytab{ 585 {Zm_o, 2, argList{Yrl}}, 586 } 587 588 var yrdrand = []ytab{ 589 {Zo_m, 2, argList{Yrl}}, 590 } 591 592 var yclflush = []ytab{ 593 {Zo_m, 2, argList{Ym}}, 594 } 595 596 var ybswap = []ytab{ 597 {Z_rp, 2, argList{Yrl}}, 598 } 599 600 var yscond = []ytab{ 601 {Zo_m, 2, argList{Ymb}}, 602 } 603 604 var yjcond = []ytab{ 605 {Zbr, 0, argList{Ybr}}, 606 {Zbr, 0, argList{Yi0, Ybr}}, 607 {Zbr, 1, argList{Yi1, Ybr}}, 608 } 609 610 var yloop = []ytab{ 611 {Zloop, 1, argList{Ybr}}, 612 } 613 614 var ycall = []ytab{ 615 {Zcallindreg, 0, argList{Yml}}, 616 {Zcallindreg, 2, argList{Yrx, Yrx}}, 617 {Zcallind, 2, argList{Yindir}}, 618 {Zcall, 0, argList{Ybr}}, 619 {Zcallcon, 1, argList{Yi32}}, 620 } 621 622 var yduff = []ytab{ 623 {Zcallduff, 1, argList{Yi32}}, 624 } 625 626 var yjmp = []ytab{ 627 {Zo_m64, 2, argList{Yml}}, 628 {Zjmp, 0, argList{Ybr}}, 629 {Zjmpcon, 1, argList{Yi32}}, 630 } 631 632 var yfmvd = []ytab{ 633 {Zm_o, 2, argList{Ym, Yf0}}, 634 {Zo_m, 2, argList{Yf0, Ym}}, 635 {Zm_o, 2, argList{Yrf, Yf0}}, 636 {Zo_m, 2, argList{Yf0, Yrf}}, 637 } 638 639 var yfmvdp = []ytab{ 640 {Zo_m, 2, argList{Yf0, Ym}}, 641 {Zo_m, 2, argList{Yf0, Yrf}}, 642 } 643 644 var yfmvf = []ytab{ 645 {Zm_o, 2, argList{Ym, Yf0}}, 646 {Zo_m, 2, argList{Yf0, Ym}}, 647 } 648 649 var yfmvx = []ytab{ 650 {Zm_o, 2, argList{Ym, Yf0}}, 651 } 652 653 var yfmvp = []ytab{ 654 {Zo_m, 2, argList{Yf0, Ym}}, 655 } 656 657 var yfcmv = []ytab{ 658 {Zm_o, 2, argList{Yrf, Yf0}}, 659 } 660 661 var yfadd = []ytab{ 662 {Zm_o, 2, argList{Ym, Yf0}}, 663 {Zm_o, 2, argList{Yrf, Yf0}}, 664 {Zo_m, 2, argList{Yf0, Yrf}}, 665 } 666 667 var yfxch = []ytab{ 668 {Zo_m, 2, argList{Yf0, Yrf}}, 669 {Zm_o, 2, argList{Yrf, Yf0}}, 670 } 671 672 var ycompp = []ytab{ 673 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 674 } 675 676 var ystsw = []ytab{ 677 {Zo_m, 2, argList{Ym}}, 678 {Zlit, 1, argList{Yax}}, 679 } 680 681 var ysvrs_mo = []ytab{ 682 {Zm_o, 2, argList{Ym}}, 683 } 684 685 // unaryDst version of "ysvrs_mo". 686 var ysvrs_om = []ytab{ 687 {Zo_m, 2, argList{Ym}}, 688 } 689 690 var ymm = []ytab{ 691 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 692 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 693 } 694 695 var yxm = []ytab{ 696 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 697 } 698 699 var yxm_q4 = []ytab{ 700 {Zm_r, 1, argList{Yxm, Yxr}}, 701 } 702 703 var yxcvm1 = []ytab{ 704 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 705 {Zm_r_xm, 2, argList{Yxm, Ymr}}, 706 } 707 708 var yxcvm2 = []ytab{ 709 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 710 {Zm_r_xm, 2, argList{Ymm, Yxr}}, 711 } 712 713 var yxr = []ytab{ 714 {Zm_r_xm, 1, argList{Yxr, Yxr}}, 715 } 716 717 var yxr_ml = []ytab{ 718 {Zr_m_xm, 1, argList{Yxr, Yml}}, 719 } 720 721 var ymr = []ytab{ 722 {Zm_r, 1, argList{Ymr, Ymr}}, 723 } 724 725 var ymr_ml = []ytab{ 726 {Zr_m_xm, 1, argList{Ymr, Yml}}, 727 } 728 729 var yxcmpi = []ytab{ 730 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, 731 } 732 733 var yxmov = []ytab{ 734 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 735 {Zr_m_xm, 1, argList{Yxr, Yxm}}, 736 } 737 738 var yxcvfl = []ytab{ 739 {Zm_r_xm, 1, argList{Yxm, Yrl}}, 740 } 741 742 var yxcvlf = []ytab{ 743 {Zm_r_xm, 1, argList{Yml, Yxr}}, 744 } 745 746 var yxcvfq = []ytab{ 747 {Zm_r_xm, 2, argList{Yxm, Yrl}}, 748 } 749 750 var yxcvqf = []ytab{ 751 {Zm_r_xm, 2, argList{Yml, Yxr}}, 752 } 753 754 var yps = []ytab{ 755 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 756 {Zibo_m_xm, 2, argList{Yi8, Ymr}}, 757 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 758 {Zibo_m_xm, 3, argList{Yi8, Yxr}}, 759 } 760 761 var yxrrl = []ytab{ 762 {Zm_r, 1, argList{Yxr, Yrl}}, 763 } 764 765 var ymrxr = []ytab{ 766 {Zm_r, 1, argList{Ymr, Yxr}}, 767 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 768 } 769 770 var ymshuf = []ytab{ 771 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, 772 } 773 774 var ymshufb = []ytab{ 775 {Zm2_r, 2, argList{Yxm, Yxr}}, 776 } 777 778 // It should never have more than 1 entry, 779 // because some optab entries you opcode secuences that 780 // are longer than 2 bytes (zoffset=2 here), 781 // ROUNDPD and ROUNDPS and recently added BLENDPD, 782 // to name a few. 783 var yxshuf = []ytab{ 784 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 785 } 786 787 var yextrw = []ytab{ 788 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, 789 {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, 790 } 791 792 var yextr = []ytab{ 793 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, 794 } 795 796 var yinsrw = []ytab{ 797 {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, 798 } 799 800 var yinsr = []ytab{ 801 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, 802 } 803 804 var ypsdq = []ytab{ 805 {Zibo_m, 2, argList{Yi8, Yxr}}, 806 } 807 808 var ymskb = []ytab{ 809 {Zm_r_xm, 2, argList{Yxr, Yrl}}, 810 {Zm_r_xm, 1, argList{Ymr, Yrl}}, 811 } 812 813 var ycrc32l = []ytab{ 814 {Zlitm_r, 0, argList{Yml, Yrl}}, 815 } 816 817 var ycrc32b = []ytab{ 818 {Zlitm_r, 0, argList{Ymb, Yrl}}, 819 } 820 821 var yprefetch = []ytab{ 822 {Zm_o, 2, argList{Ym}}, 823 } 824 825 var yaes = []ytab{ 826 {Zlitm_r, 2, argList{Yxm, Yxr}}, 827 } 828 829 var yxbegin = []ytab{ 830 {Zjmp, 1, argList{Ybr}}, 831 } 832 833 var yxabort = []ytab{ 834 {Zib_, 1, argList{Yu8}}, 835 } 836 837 var ylddqu = []ytab{ 838 {Zm_r, 1, argList{Ym, Yxr}}, 839 } 840 841 var ypalignr = []ytab{ 842 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 843 } 844 845 var ysha256rnds2 = []ytab{ 846 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, 847 } 848 849 var yblendvpd = []ytab{ 850 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, 851 } 852 853 var ymmxmm0f38 = []ytab{ 854 {Zlitm_r, 3, argList{Ymm, Ymr}}, 855 {Zlitm_r, 5, argList{Yxm, Yxr}}, 856 } 857 858 var yextractps = []ytab{ 859 {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, 860 } 861 862 var ysha1rnds4 = []ytab{ 863 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, 864 } 865 866 // You are doasm, holding in your hand a *obj.Prog with p.As set to, say, 867 // ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab 868 // to find the entry with the given p.As and then looks through the ytable for 869 // that instruction (the second field in the optab struct) for a line whose 870 // first two values match the Ytypes of the p.From and p.To operands. The 871 // function oclass computes the specific Ytype of an operand and then the set 872 // of more general Ytypes that it satisfies is implied by the ycover table, set 873 // up in instinit. For example, oclass distinguishes the constants 0 and 1 874 // from the more general 8-bit constants, but instinit says 875 // 876 // ycover[Yi0*Ymax+Ys32] = 1 877 // ycover[Yi1*Ymax+Ys32] = 1 878 // ycover[Yi8*Ymax+Ys32] = 1 879 // 880 // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) 881 // if that's what an instruction can handle. 882 // 883 // In parallel with the scan through the ytable for the appropriate line, there 884 // is a z pointer that starts out pointing at the strange magic byte list in 885 // the Optab struct. With each step past a non-matching ytable line, z 886 // advances by the 4th entry in the line. When a matching line is found, that 887 // z pointer has the extra data to use in laying down the instruction bytes. 888 // The actual bytes laid down are a function of the 3rd entry in the line (that 889 // is, the Ztype) and the z bytes. 890 // 891 // For example, let's look at AADDL. The optab line says: 892 // {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 893 // 894 // and yaddl says 895 // var yaddl = []ytab{ 896 // {Yi8, Ynone, Yml, Zibo_m, 2}, 897 // {Yi32, Ynone, Yax, Zil_, 1}, 898 // {Yi32, Ynone, Yml, Zilo_m, 2}, 899 // {Yrl, Ynone, Yml, Zr_m, 1}, 900 // {Yml, Ynone, Yrl, Zm_r, 1}, 901 // } 902 // 903 // so there are 5 possible types of ADDL instruction that can be laid down, and 904 // possible states used to lay them down (Ztype and z pointer, assuming z 905 // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: 906 // 907 // Yi8, Yml -> Zibo_m, z (0x83, 00) 908 // Yi32, Yax -> Zil_, z+2 (0x05) 909 // Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) 910 // Yrl, Yml -> Zr_m, z+2+1+2 (0x01) 911 // Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) 912 // 913 // The Pconstant in the optab line controls the prefix bytes to emit. That's 914 // relatively straightforward as this program goes. 915 // 916 // The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for 917 // example, is an opcode byte (z[0]) then an asmando (which is some kind of 918 // encoded addressing mode for the Yml arg), and then a single immediate byte. 919 // Zilo_m is the same but a long (32-bit) immediate. 920 var optab = 921 // as, ytab, andproto, opcode 922 [...]Optab{ 923 {obj.AXXX, nil, 0, opBytes{}}, 924 {AAAA, ynone, P32, opBytes{0x37}}, 925 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, 926 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, 927 {AAAS, ynone, P32, opBytes{0x3f}}, 928 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, 929 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 930 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 931 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 932 {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, 933 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, 934 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, 935 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 936 {AADDPD, yxm, Pq, opBytes{0x58}}, 937 {AADDPS, yxm, Pm, opBytes{0x58}}, 938 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 939 {AADDSD, yxm, Pf2, opBytes{0x58}}, 940 {AADDSS, yxm, Pf3, opBytes{0x58}}, 941 {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, 942 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, 943 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 944 {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, 945 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, 946 {AADJSP, nil, 0, opBytes{}}, 947 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, 948 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 949 {AANDNPD, yxm, Pq, opBytes{0x55}}, 950 {AANDNPS, yxm, Pm, opBytes{0x55}}, 951 {AANDPD, yxm, Pq, opBytes{0x54}}, 952 {AANDPS, yxm, Pm, opBytes{0x54}}, 953 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 954 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 955 {AARPL, yrl_ml, P32, opBytes{0x63}}, 956 {ABOUNDL, yrl_m, P32, opBytes{0x62}}, 957 {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, 958 {ABSFL, yml_rl, Pm, opBytes{0xbc}}, 959 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, 960 {ABSFW, yml_rl, Pq, opBytes{0xbc}}, 961 {ABSRL, yml_rl, Pm, opBytes{0xbd}}, 962 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, 963 {ABSRW, yml_rl, Pq, opBytes{0xbd}}, 964 {ABSWAPW, ybswap, Pe, opBytes{0x0f, 0xc8}}, 965 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, 966 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, 967 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, 968 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, 969 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, 970 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, 971 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, 972 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, 973 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, 974 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, 975 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, 976 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, 977 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, 978 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, 979 {ABYTE, ybyte, Px, opBytes{1}}, 980 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, 981 {ACBW, ynone, Pe, opBytes{0x98}}, 982 {ACDQ, ynone, Px, opBytes{0x99}}, 983 {ACDQE, ynone, Pw, opBytes{0x98}}, 984 {ACLAC, ynone, Pm, opBytes{01, 0xca}}, 985 {ACLC, ynone, Px, opBytes{0xf8}}, 986 {ACLD, ynone, Px, opBytes{0xfc}}, 987 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, 988 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, 989 {ACLI, ynone, Px, opBytes{0xfa}}, 990 {ACLTS, ynone, Pm, opBytes{0x06}}, 991 {ACMC, ynone, Px, opBytes{0xf5}}, 992 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, 993 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, 994 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, 995 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, 996 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, 997 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, 998 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, 999 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, 1000 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, 1001 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, 1002 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, 1003 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, 1004 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, 1005 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, 1006 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, 1007 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, 1008 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, 1009 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, 1010 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, 1011 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, 1012 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, 1013 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, 1014 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, 1015 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, 1016 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, 1017 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, 1018 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, 1019 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, 1020 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, 1021 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, 1022 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, 1023 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, 1024 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, 1025 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, 1026 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, 1027 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, 1028 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, 1029 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, 1030 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, 1031 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, 1032 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, 1033 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, 1034 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, 1035 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, 1036 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, 1037 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, 1038 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, 1039 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, 1040 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, 1041 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1042 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, 1043 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, 1044 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1045 {ACMPSB, ynone, Pb, opBytes{0xa6}}, 1046 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, 1047 {ACMPSL, ynone, Px, opBytes{0xa7}}, 1048 {ACMPSQ, ynone, Pw, opBytes{0xa7}}, 1049 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, 1050 {ACMPSW, ynone, Pe, opBytes{0xa7}}, 1051 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1052 {ACOMISD, yxm, Pe, opBytes{0x2f}}, 1053 {ACOMISS, yxm, Pm, opBytes{0x2f}}, 1054 {ACPUID, ynone, Pm, opBytes{0xa2}}, 1055 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, 1056 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, 1057 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, 1058 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, 1059 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, 1060 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, 1061 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, 1062 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, 1063 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, 1064 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, 1065 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, 1066 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, 1067 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, 1068 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, 1069 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, 1070 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, 1071 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, 1072 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, 1073 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, 1074 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, 1075 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, 1076 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, 1077 {ACWD, ynone, Pe, opBytes{0x99}}, 1078 {ACWDE, ynone, Px, opBytes{0x98}}, 1079 {ACQO, ynone, Pw, opBytes{0x99}}, 1080 {ADAA, ynone, P32, opBytes{0x27}}, 1081 {ADAS, ynone, P32, opBytes{0x2f}}, 1082 {ADECB, yscond, Pb, opBytes{0xfe, 01}}, 1083 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, 1084 {ADECQ, yincq, Pw, opBytes{0xff, 01}}, 1085 {ADECW, yincq, Pe, opBytes{0xff, 01}}, 1086 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, 1087 {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, 1088 {ADIVPD, yxm, Pe, opBytes{0x5e}}, 1089 {ADIVPS, yxm, Pm, opBytes{0x5e}}, 1090 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, 1091 {ADIVSD, yxm, Pf2, opBytes{0x5e}}, 1092 {ADIVSS, yxm, Pf3, opBytes{0x5e}}, 1093 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, 1094 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, 1095 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, 1096 {AEMMS, ynone, Pm, opBytes{0x77}}, 1097 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, 1098 {AENTER, nil, 0, opBytes{}}, // botch 1099 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, 1100 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, 1101 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, 1102 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, 1103 {AHLT, ynone, Px, opBytes{0xf4}}, 1104 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, 1105 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, 1106 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, 1107 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, 1108 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, 1109 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1110 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1111 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1112 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, 1113 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, 1114 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, 1115 {AINB, yin, Pb, opBytes{0xe4, 0xec}}, 1116 {AINW, yin, Pe, opBytes{0xe5, 0xed}}, 1117 {AINL, yin, Px, opBytes{0xe5, 0xed}}, 1118 {AINCB, yscond, Pb, opBytes{0xfe, 00}}, 1119 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, 1120 {AINCQ, yincq, Pw, opBytes{0xff, 00}}, 1121 {AINCW, yincq, Pe, opBytes{0xff, 00}}, 1122 {AINSB, ynone, Pb, opBytes{0x6c}}, 1123 {AINSL, ynone, Px, opBytes{0x6d}}, 1124 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, 1125 {AINSW, ynone, Pe, opBytes{0x6d}}, 1126 {AICEBP, ynone, Px, opBytes{0xf1}}, 1127 {AINT, yint, Px, opBytes{0xcd}}, 1128 {AINTO, ynone, P32, opBytes{0xce}}, 1129 {AIRETL, ynone, Px, opBytes{0xcf}}, 1130 {AIRETQ, ynone, Pw, opBytes{0xcf}}, 1131 {AIRETW, ynone, Pe, opBytes{0xcf}}, 1132 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, 1133 {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, 1134 {AJCXZL, yloop, Px, opBytes{0xe3}}, 1135 {AJCXZW, yloop, Px, opBytes{0xe3}}, 1136 {AJCXZQ, yloop, Px, opBytes{0xe3}}, 1137 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, 1138 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, 1139 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, 1140 {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, 1141 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, 1142 {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, 1143 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, 1144 {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, 1145 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, 1146 {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, 1147 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, 1148 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, 1149 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, 1150 {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, 1151 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, 1152 {AHADDPD, yxm, Pq, opBytes{0x7c}}, 1153 {AHADDPS, yxm, Pf2, opBytes{0x7c}}, 1154 {AHSUBPD, yxm, Pq, opBytes{0x7d}}, 1155 {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, 1156 {ALAHF, ynone, Px, opBytes{0x9f}}, 1157 {ALARL, yml_rl, Pm, opBytes{0x02}}, 1158 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, 1159 {ALARW, yml_rl, Pq, opBytes{0x02}}, 1160 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, 1161 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, 1162 {ALEAL, ym_rl, Px, opBytes{0x8d}}, 1163 {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, 1164 {ALEAVEL, ynone, P32, opBytes{0xc9}}, 1165 {ALEAVEQ, ynone, Py, opBytes{0xc9}}, 1166 {ALEAVEW, ynone, Pe, opBytes{0xc9}}, 1167 {ALEAW, ym_rl, Pe, opBytes{0x8d}}, 1168 {ALOCK, ynone, Px, opBytes{0xf0}}, 1169 {ALODSB, ynone, Pb, opBytes{0xac}}, 1170 {ALODSL, ynone, Px, opBytes{0xad}}, 1171 {ALODSQ, ynone, Pw, opBytes{0xad}}, 1172 {ALODSW, ynone, Pe, opBytes{0xad}}, 1173 {ALONG, ybyte, Px, opBytes{4}}, 1174 {ALOOP, yloop, Px, opBytes{0xe2}}, 1175 {ALOOPEQ, yloop, Px, opBytes{0xe1}}, 1176 {ALOOPNE, yloop, Px, opBytes{0xe0}}, 1177 {ALTR, ydivl, Pm, opBytes{0x00, 03}}, 1178 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, 1179 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, 1180 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, 1181 {ALSLL, yml_rl, Pm, opBytes{0x03}}, 1182 {ALSLW, yml_rl, Pq, opBytes{0x03}}, 1183 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, 1184 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, 1185 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, 1186 {AMAXPD, yxm, Pe, opBytes{0x5f}}, 1187 {AMAXPS, yxm, Pm, opBytes{0x5f}}, 1188 {AMAXSD, yxm, Pf2, opBytes{0x5f}}, 1189 {AMAXSS, yxm, Pf3, opBytes{0x5f}}, 1190 {AMINPD, yxm, Pe, opBytes{0x5d}}, 1191 {AMINPS, yxm, Pm, opBytes{0x5d}}, 1192 {AMINSD, yxm, Pf2, opBytes{0x5d}}, 1193 {AMINSS, yxm, Pf3, opBytes{0x5d}}, 1194 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, 1195 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, 1196 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, 1197 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, 1198 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, 1199 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, 1200 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, 1201 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, 1202 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, 1203 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, 1204 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, 1205 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, 1206 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, 1207 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, 1208 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, 1209 {AMOVHLPS, yxr, Pm, opBytes{0x12}}, 1210 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, 1211 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, 1212 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1213 {AMOVLHPS, yxr, Pm, opBytes{0x16}}, 1214 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, 1215 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, 1216 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, 1217 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, 1218 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, 1219 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, 1220 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, 1221 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, 1222 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, 1223 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, 1224 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, 1225 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1226 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, 1227 {AMOVSB, ynone, Pb, opBytes{0xa4}}, 1228 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, 1229 {AMOVSL, ynone, Px, opBytes{0xa5}}, 1230 {AMOVSQ, ynone, Pw, opBytes{0xa5}}, 1231 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, 1232 {AMOVSW, ynone, Pe, opBytes{0xa5}}, 1233 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, 1234 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, 1235 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, 1236 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, 1237 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, 1238 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, 1239 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, 1240 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, 1241 {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, 1242 {AMULL, ydivl, Px, opBytes{0xf7, 04}}, 1243 {AMULPD, yxm, Pe, opBytes{0x59}}, 1244 {AMULPS, yxm, Ym, opBytes{0x59}}, 1245 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, 1246 {AMULSD, yxm, Pf2, opBytes{0x59}}, 1247 {AMULSS, yxm, Pf3, opBytes{0x59}}, 1248 {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, 1249 {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, 1250 {ANEGL, yscond, Px, opBytes{0xf7, 03}}, 1251 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, 1252 {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, 1253 {obj.ANOP, ynop, Px, opBytes{0, 0}}, 1254 {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, 1255 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. 1256 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, 1257 {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, 1258 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, 1259 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1260 {AORPD, yxm, Pq, opBytes{0x56}}, 1261 {AORPS, yxm, Pm, opBytes{0x56}}, 1262 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1263 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1264 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, 1265 {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, 1266 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, 1267 {AOUTSB, ynone, Pb, opBytes{0x6e}}, 1268 {AOUTSL, ynone, Px, opBytes{0x6f}}, 1269 {AOUTSW, ynone, Pe, opBytes{0x6f}}, 1270 {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, 1271 {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, 1272 {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, 1273 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, 1274 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, 1275 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, 1276 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, 1277 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, 1278 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, 1279 {APADDQ, yxm, Pe, opBytes{0xd4}}, 1280 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, 1281 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, 1282 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, 1283 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, 1284 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, 1285 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, 1286 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, 1287 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, 1288 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, 1289 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, 1290 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, 1291 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, 1292 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, 1293 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, 1294 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, 1295 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, 1296 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, 1297 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, 1298 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, 1299 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, 1300 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, 1301 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, 1302 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, 1303 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, 1304 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, 1305 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, 1306 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, 1307 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, 1308 {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, 1309 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, 1310 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, 1311 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, 1312 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, 1313 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, 1314 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, 1315 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, 1316 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, 1317 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, 1318 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, 1319 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, 1320 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, 1321 {APMAXSW, yxm, Pe, opBytes{0xee}}, 1322 {APMAXUB, yxm, Pe, opBytes{0xde}}, 1323 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, 1324 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, 1325 {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, 1326 {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, 1327 {APMINSW, yxm, Pe, opBytes{0xea}}, 1328 {APMINUB, yxm, Pe, opBytes{0xda}}, 1329 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, 1330 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, 1331 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, 1332 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, 1333 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, 1334 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, 1335 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, 1336 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, 1337 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, 1338 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, 1339 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, 1340 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, 1341 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, 1342 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, 1343 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, 1344 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, 1345 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, 1346 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, 1347 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, 1348 {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, 1349 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, 1350 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, 1351 {APOPAL, ynone, P32, opBytes{0x61}}, 1352 {APOPAW, ynone, Pe, opBytes{0x61}}, 1353 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, 1354 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, 1355 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, 1356 {APOPFL, ynone, P32, opBytes{0x9d}}, 1357 {APOPFQ, ynone, Py, opBytes{0x9d}}, 1358 {APOPFW, ynone, Pe, opBytes{0x9d}}, 1359 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, 1360 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, 1361 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, 1362 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, 1363 {APSADBW, yxm, Pq, opBytes{0xf6}}, 1364 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, 1365 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, 1366 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, 1367 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, 1368 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, 1369 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, 1370 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, 1371 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, 1372 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, 1373 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, 1374 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, 1375 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, 1376 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, 1377 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, 1378 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, 1379 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, 1380 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, 1381 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, 1382 {APSUBB, yxm, Pe, opBytes{0xf8}}, 1383 {APSUBL, yxm, Pe, opBytes{0xfa}}, 1384 {APSUBQ, yxm, Pe, opBytes{0xfb}}, 1385 {APSUBSB, yxm, Pe, opBytes{0xe8}}, 1386 {APSUBSW, yxm, Pe, opBytes{0xe9}}, 1387 {APSUBUSB, yxm, Pe, opBytes{0xd8}}, 1388 {APSUBUSW, yxm, Pe, opBytes{0xd9}}, 1389 {APSUBW, yxm, Pe, opBytes{0xf9}}, 1390 {APTEST, yxm_q4, Pq4, opBytes{0x17}}, 1391 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, 1392 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, 1393 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, 1394 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, 1395 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, 1396 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, 1397 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, 1398 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, 1399 {APUSHAL, ynone, P32, opBytes{0x60}}, 1400 {APUSHAW, ynone, Pe, opBytes{0x60}}, 1401 {APUSHFL, ynone, P32, opBytes{0x9c}}, 1402 {APUSHFQ, ynone, Py, opBytes{0x9c}}, 1403 {APUSHFW, ynone, Pe, opBytes{0x9c}}, 1404 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1405 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1406 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1407 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, 1408 {AQUAD, ybyte, Px, opBytes{8}}, 1409 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, 1410 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1411 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1412 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1413 {ARCPPS, yxm, Pm, opBytes{0x53}}, 1414 {ARCPSS, yxm, Pf3, opBytes{0x53}}, 1415 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, 1416 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1417 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1418 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1419 {AREP, ynone, Px, opBytes{0xf3}}, 1420 {AREPN, ynone, Px, opBytes{0xf2}}, 1421 {obj.ARET, ynone, Px, opBytes{0xc3}}, 1422 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, 1423 {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, 1424 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, 1425 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, 1426 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1427 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1428 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1429 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, 1430 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1431 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1432 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1433 {ARSQRTPS, yxm, Pm, opBytes{0x52}}, 1434 {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, 1435 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL 1436 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1437 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1438 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1439 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1440 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, 1441 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1442 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1443 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1444 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, 1445 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1446 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1447 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1448 {ASCASB, ynone, Pb, opBytes{0xae}}, 1449 {ASCASL, ynone, Px, opBytes{0xaf}}, 1450 {ASCASQ, ynone, Pw, opBytes{0xaf}}, 1451 {ASCASW, ynone, Pe, opBytes{0xaf}}, 1452 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, 1453 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, 1454 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, 1455 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, 1456 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, 1457 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, 1458 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, 1459 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, 1460 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, 1461 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, 1462 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, 1463 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, 1464 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, 1465 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, 1466 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, 1467 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, 1468 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1469 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1470 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1471 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1472 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, 1473 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1474 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1475 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1476 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, 1477 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, 1478 {ASQRTPD, yxm, Pe, opBytes{0x51}}, 1479 {ASQRTPS, yxm, Pm, opBytes{0x51}}, 1480 {ASQRTSD, yxm, Pf2, opBytes{0x51}}, 1481 {ASQRTSS, yxm, Pf3, opBytes{0x51}}, 1482 {ASTC, ynone, Px, opBytes{0xf9}}, 1483 {ASTD, ynone, Px, opBytes{0xfd}}, 1484 {ASTI, ynone, Px, opBytes{0xfb}}, 1485 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, 1486 {ASTOSB, ynone, Pb, opBytes{0xaa}}, 1487 {ASTOSL, ynone, Px, opBytes{0xab}}, 1488 {ASTOSQ, ynone, Pw, opBytes{0xab}}, 1489 {ASTOSW, ynone, Pe, opBytes{0xab}}, 1490 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, 1491 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1492 {ASUBPD, yxm, Pe, opBytes{0x5c}}, 1493 {ASUBPS, yxm, Pm, opBytes{0x5c}}, 1494 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1495 {ASUBSD, yxm, Pf2, opBytes{0x5c}}, 1496 {ASUBSS, yxm, Pf3, opBytes{0x5c}}, 1497 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1498 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, 1499 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall 1500 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, 1501 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1502 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1503 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1504 {obj.ATEXT, ytext, Px, opBytes{}}, 1505 {AUCOMISD, yxm, Pe, opBytes{0x2e}}, 1506 {AUCOMISS, yxm, Pm, opBytes{0x2e}}, 1507 {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, 1508 {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, 1509 {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, 1510 {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, 1511 {AVERR, ydivl, Pm, opBytes{0x00, 04}}, 1512 {AVERW, ydivl, Pm, opBytes{0x00, 05}}, 1513 {AWAIT, ynone, Px, opBytes{0x9b}}, 1514 {AWORD, ybyte, Px, opBytes{2}}, 1515 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, 1516 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, 1517 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, 1518 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, 1519 {AXLAT, ynone, Px, opBytes{0xd7}}, 1520 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, 1521 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1522 {AXORPD, yxm, Pe, opBytes{0x57}}, 1523 {AXORPS, yxm, Pm, opBytes{0x57}}, 1524 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1525 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1526 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, 1527 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, 1528 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, 1529 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, 1530 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, 1531 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, 1532 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, 1533 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, 1534 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, 1535 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, 1536 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, 1537 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, 1538 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, 1539 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, 1540 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, 1541 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, 1542 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, 1543 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, 1544 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, 1545 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, 1546 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, 1547 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, 1548 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, 1549 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, 1550 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, 1551 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, 1552 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, 1553 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, 1554 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch 1555 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch 1556 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, 1557 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, 1558 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, 1559 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, 1560 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, 1561 {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, 1562 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, 1563 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, 1564 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, 1565 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, 1566 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, 1567 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, 1568 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, 1569 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, 1570 {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, 1571 {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, 1572 {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, 1573 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, 1574 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, 1575 {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, 1576 {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, 1577 {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, 1578 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, 1579 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, 1580 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, 1581 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, 1582 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, 1583 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, 1584 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, 1585 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, 1586 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, 1587 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, 1588 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, 1589 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, 1590 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, 1591 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, 1592 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, 1593 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, 1594 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, 1595 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, 1596 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, 1597 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, 1598 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, 1599 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, 1600 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, 1601 {AFFREE, nil, 0, opBytes{}}, 1602 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, 1603 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, 1604 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, 1605 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, 1606 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, 1607 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, 1608 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, 1609 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, 1610 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, 1611 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, 1612 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, 1613 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, 1614 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, 1615 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, 1616 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, 1617 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, 1618 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, 1619 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, 1620 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, 1621 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, 1622 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, 1623 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, 1624 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, 1625 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, 1626 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, 1627 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, 1628 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, 1629 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, 1630 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, 1631 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, 1632 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, 1633 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, 1634 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, 1635 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, 1636 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, 1637 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, 1638 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, 1639 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, 1640 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, 1641 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, 1642 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, 1643 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, 1644 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, 1645 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, 1646 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, 1647 {AINVD, ynone, Pm, opBytes{0x08}}, 1648 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, 1649 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, 1650 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, 1651 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, 1652 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, 1653 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, 1654 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, 1655 {ARDMSR, ynone, Pm, opBytes{0x32}}, 1656 {ARDPMC, ynone, Pm, opBytes{0x33}}, 1657 {ARDTSC, ynone, Pm, opBytes{0x31}}, 1658 {ARSM, ynone, Pm, opBytes{0xaa}}, 1659 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, 1660 {ASYSRET, ynone, Pm, opBytes{0x07}}, 1661 {AWBINVD, ynone, Pm, opBytes{0x09}}, 1662 {AWRMSR, ynone, Pm, opBytes{0x30}}, 1663 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, 1664 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, 1665 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, 1666 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, 1667 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, 1668 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, 1669 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1670 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1671 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1672 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, 1673 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, 1674 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, 1675 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, 1676 {AMOVQL, yrl_ml, Px, opBytes{0x89}}, 1677 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, 1678 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, 1679 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, 1680 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, 1681 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, 1682 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, 1683 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, 1684 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, 1685 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, 1686 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, 1687 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, 1688 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, 1689 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, 1690 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, 1691 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, 1692 {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, 1693 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, 1694 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, 1695 1696 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, 1697 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, 1698 {AUD1, ynone, Pm, opBytes{0xb9, 0}}, 1699 {AUD2, ynone, Pm, opBytes{0x0b, 0}}, 1700 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, 1701 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, 1702 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, 1703 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, 1704 {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, 1705 {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, 1706 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, 1707 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, 1708 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1709 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1710 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1711 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, 1712 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, 1713 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, 1714 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, 1715 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, 1716 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, 1717 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, 1718 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, 1719 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, 1720 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, 1721 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, 1722 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, 1723 {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, 1724 {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, 1725 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, 1726 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, 1727 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, 1728 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, 1729 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, 1730 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, 1731 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, 1732 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, 1733 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, 1734 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, 1735 {AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1736 {AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1737 {AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, 1738 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, 1739 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, 1740 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, 1741 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, 1742 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, 1743 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, 1744 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, 1745 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, 1746 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, 1747 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, 1748 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, 1749 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, 1750 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, 1751 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, 1752 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, 1753 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, 1754 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, 1755 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, 1756 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, 1757 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, 1758 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, 1759 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, 1760 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, 1761 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, 1762 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, 1763 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, 1764 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, 1765 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, 1766 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, 1767 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, 1768 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, 1769 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, 1770 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, 1771 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, 1772 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, 1773 1774 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, 1775 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, 1776 {AXACQUIRE, ynone, Px, opBytes{0xf2}}, 1777 {AXRELEASE, ynone, Px, opBytes{0xf3}}, 1778 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, 1779 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, 1780 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, 1781 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, 1782 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, 1783 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, 1784 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, 1785 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, 1786 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, 1787 1788 {obj.AEND, nil, 0, opBytes{}}, 1789 {0, nil, 0, opBytes{}}, 1790 } 1791 1792 var opindex [(ALAST + 1) & obj.AMask]*Optab 1793 1794 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing. 1795 // This happens on systems like Solaris that call .so functions instead of system calls. 1796 // It does not seem to be necessary for any other systems. This is probably working 1797 // around a Solaris-specific bug that should be fixed differently, but we don't know 1798 // what that bug is. And this does fix it. 1799 func useAbs(ctxt *obj.Link, s *obj.LSym) bool { 1800 if ctxt.Headtype == objabi.Hsolaris { 1801 // All the Solaris dynamic imports from libc.so begin with "libc_". 1802 return strings.HasPrefix(s.Name, "libc_") 1803 } 1804 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared 1805 } 1806 1807 // single-instruction no-ops of various lengths. 1808 // constructed by hand and disassembled with gdb to verify. 1809 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. 1810 var nop = [][16]uint8{ 1811 {0x90}, 1812 {0x66, 0x90}, 1813 {0x0F, 0x1F, 0x00}, 1814 {0x0F, 0x1F, 0x40, 0x00}, 1815 {0x0F, 0x1F, 0x44, 0x00, 0x00}, 1816 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, 1817 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, 1818 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1819 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1820 } 1821 1822 // Native Client rejects the repeated 0x66 prefix. 1823 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1824 func fillnop(p []byte, n int) { 1825 var m int 1826 1827 for n > 0 { 1828 m = n 1829 if m > len(nop) { 1830 m = len(nop) 1831 } 1832 copy(p[:m], nop[m-1][:m]) 1833 p = p[m:] 1834 n -= m 1835 } 1836 } 1837 1838 func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { 1839 s.Grow(int64(c) + int64(pad)) 1840 fillnop(s.P[c:], int(pad)) 1841 return c + pad 1842 } 1843 1844 func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { 1845 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { 1846 return l 1847 } 1848 return q 1849 } 1850 1851 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { 1852 if s.P != nil { 1853 return 1854 } 1855 1856 if ycover[0] == 0 { 1857 ctxt.Diag("x86 tables not initialized, call x86.instinit first") 1858 } 1859 1860 var ab AsmBuf 1861 1862 for p := s.Func.Text; p != nil; p = p.Link { 1863 if p.To.Type == obj.TYPE_BRANCH { 1864 if p.Pcond == nil { 1865 p.Pcond = p 1866 } 1867 } 1868 if p.As == AADJSP { 1869 p.To.Type = obj.TYPE_REG 1870 p.To.Reg = REG_SP 1871 v := int32(-p.From.Offset) 1872 p.From.Offset = int64(v) 1873 p.As = spadjop(ctxt, AADDL, AADDQ) 1874 if v < 0 { 1875 p.As = spadjop(ctxt, ASUBL, ASUBQ) 1876 v = -v 1877 p.From.Offset = int64(v) 1878 } 1879 1880 if v == 0 { 1881 p.As = obj.ANOP 1882 } 1883 } 1884 } 1885 1886 var q *obj.Prog 1887 var count int64 // rough count of number of instructions 1888 for p := s.Func.Text; p != nil; p = p.Link { 1889 count++ 1890 p.Back = branchShort // use short branches first time through 1891 q = p.Pcond 1892 if q != nil && (q.Back&branchShort != 0) { 1893 p.Back |= branchBackwards 1894 q.Back |= branchLoopHead 1895 } 1896 1897 if p.As == AADJSP { 1898 p.To.Type = obj.TYPE_REG 1899 p.To.Reg = REG_SP 1900 v := int32(-p.From.Offset) 1901 p.From.Offset = int64(v) 1902 p.As = spadjop(ctxt, AADDL, AADDQ) 1903 if v < 0 { 1904 p.As = spadjop(ctxt, ASUBL, ASUBQ) 1905 v = -v 1906 p.From.Offset = int64(v) 1907 } 1908 1909 if v == 0 { 1910 p.As = obj.ANOP 1911 } 1912 } 1913 } 1914 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction 1915 1916 n := 0 1917 var c int32 1918 errors := ctxt.Errors 1919 for { 1920 // This loop continues while there are reasons to re-assemble 1921 // whole block, like the presence of long forward jumps. 1922 reAssemble := false 1923 for i := range s.R { 1924 s.R[i] = obj.Reloc{} 1925 } 1926 s.R = s.R[:0] 1927 s.P = s.P[:0] 1928 c = 0 1929 for p := s.Func.Text; p != nil; p = p.Link { 1930 if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 { 1931 // pad everything to avoid crossing 32-byte boundary 1932 if c>>5 != (c+int32(p.Isize)-1)>>5 { 1933 c = naclpad(ctxt, s, c, -c&31) 1934 } 1935 1936 // pad call deferreturn to start at 32-byte boundary 1937 // so that subtracting 5 in jmpdefer will jump back 1938 // to that boundary and rerun the call. 1939 if p.As == obj.ACALL && p.To.Sym == deferreturn { 1940 c = naclpad(ctxt, s, c, -c&31) 1941 } 1942 1943 // pad call to end at 32-byte boundary 1944 if p.As == obj.ACALL { 1945 c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31) 1946 } 1947 1948 // the linker treats REP and STOSQ as different instructions 1949 // but in fact the REP is a prefix on the STOSQ. 1950 // make sure REP has room for 2 more bytes, so that 1951 // padding will not be inserted before the next instruction. 1952 if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 { 1953 c = naclpad(ctxt, s, c, -c&31) 1954 } 1955 1956 // same for LOCK. 1957 // various instructions follow; the longest is 4 bytes. 1958 // give ourselves 8 bytes so as to avoid surprises. 1959 if p.As == ALOCK && c>>5 != (c+8-1)>>5 { 1960 c = naclpad(ctxt, s, c, -c&31) 1961 } 1962 } 1963 1964 if (p.Back&branchLoopHead != 0) && c&(loopAlign-1) != 0 { 1965 // pad with NOPs 1966 v := -c & (loopAlign - 1) 1967 1968 if v <= maxLoopPad { 1969 s.Grow(int64(c) + int64(v)) 1970 fillnop(s.P[c:], int(v)) 1971 c += v 1972 } 1973 } 1974 1975 p.Pc = int64(c) 1976 1977 // process forward jumps to p 1978 for q = p.Rel; q != nil; q = q.Forwd { 1979 v := int32(p.Pc - (q.Pc + int64(q.Isize))) 1980 if q.Back&branchShort != 0 { 1981 if v > 127 { 1982 reAssemble = true 1983 q.Back ^= branchShort 1984 } 1985 1986 if q.As == AJCXZL || q.As == AXBEGIN { 1987 s.P[q.Pc+2] = byte(v) 1988 } else { 1989 s.P[q.Pc+1] = byte(v) 1990 } 1991 } else { 1992 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) 1993 } 1994 } 1995 1996 p.Rel = nil 1997 1998 p.Pc = int64(c) 1999 ab.asmins(ctxt, s, p) 2000 m := ab.Len() 2001 if int(p.Isize) != m { 2002 p.Isize = uint8(m) 2003 // When building for NaCl, we currently need 2004 // at least 2 rounds to ensure proper 32-byte alignment. 2005 if ctxt.Headtype == objabi.Hnacl { 2006 reAssemble = true 2007 } 2008 } 2009 2010 s.Grow(p.Pc + int64(m)) 2011 copy(s.P[p.Pc:], ab.Bytes()) 2012 c += int32(m) 2013 } 2014 2015 n++ 2016 if n > 20 { 2017 ctxt.Diag("span must be looping") 2018 log.Fatalf("loop") 2019 } 2020 if !reAssemble { 2021 break 2022 } 2023 if ctxt.Errors > errors { 2024 return 2025 } 2026 } 2027 2028 if ctxt.Headtype == objabi.Hnacl { 2029 c = naclpad(ctxt, s, c, -c&31) 2030 } 2031 2032 s.Size = int64(c) 2033 2034 if false { /* debug['a'] > 1 */ 2035 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) 2036 var i int 2037 for i = 0; i < len(s.P); i++ { 2038 fmt.Printf(" %.2x", s.P[i]) 2039 if i%16 == 15 { 2040 fmt.Printf("\n %.6x", uint(i+1)) 2041 } 2042 } 2043 2044 if i%16 != 0 { 2045 fmt.Printf("\n") 2046 } 2047 2048 for i := 0; i < len(s.R); i++ { 2049 r := &s.R[i] 2050 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) 2051 } 2052 } 2053 } 2054 2055 func instinit(ctxt *obj.Link) { 2056 if ycover[0] != 0 { 2057 // Already initialized; stop now. 2058 // This happens in the cmd/asm tests, 2059 // each of which re-initializes the arch. 2060 return 2061 } 2062 2063 switch ctxt.Headtype { 2064 case objabi.Hplan9: 2065 plan9privates = ctxt.Lookup("_privates") 2066 case objabi.Hnacl: 2067 deferreturn = ctxt.LookupABI("runtime.deferreturn", obj.ABIInternal) 2068 } 2069 2070 for i := range avxOptab { 2071 c := avxOptab[i].as 2072 if opindex[c&obj.AMask] != nil { 2073 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) 2074 } 2075 opindex[c&obj.AMask] = &avxOptab[i] 2076 } 2077 for i := 1; optab[i].as != 0; i++ { 2078 c := optab[i].as 2079 if opindex[c&obj.AMask] != nil { 2080 ctxt.Diag("phase error in optab: %d (%v)", i, c) 2081 } 2082 opindex[c&obj.AMask] = &optab[i] 2083 } 2084 2085 for i := 0; i < Ymax; i++ { 2086 ycover[i*Ymax+i] = 1 2087 } 2088 2089 ycover[Yi0*Ymax+Yu2] = 1 2090 ycover[Yi1*Ymax+Yu2] = 1 2091 2092 ycover[Yi0*Ymax+Yi8] = 1 2093 ycover[Yi1*Ymax+Yi8] = 1 2094 ycover[Yu2*Ymax+Yi8] = 1 2095 ycover[Yu7*Ymax+Yi8] = 1 2096 2097 ycover[Yi0*Ymax+Yu7] = 1 2098 ycover[Yi1*Ymax+Yu7] = 1 2099 ycover[Yu2*Ymax+Yu7] = 1 2100 2101 ycover[Yi0*Ymax+Yu8] = 1 2102 ycover[Yi1*Ymax+Yu8] = 1 2103 ycover[Yu2*Ymax+Yu8] = 1 2104 ycover[Yu7*Ymax+Yu8] = 1 2105 2106 ycover[Yi0*Ymax+Ys32] = 1 2107 ycover[Yi1*Ymax+Ys32] = 1 2108 ycover[Yu2*Ymax+Ys32] = 1 2109 ycover[Yu7*Ymax+Ys32] = 1 2110 ycover[Yu8*Ymax+Ys32] = 1 2111 ycover[Yi8*Ymax+Ys32] = 1 2112 2113 ycover[Yi0*Ymax+Yi32] = 1 2114 ycover[Yi1*Ymax+Yi32] = 1 2115 ycover[Yu2*Ymax+Yi32] = 1 2116 ycover[Yu7*Ymax+Yi32] = 1 2117 ycover[Yu8*Ymax+Yi32] = 1 2118 ycover[Yi8*Ymax+Yi32] = 1 2119 ycover[Ys32*Ymax+Yi32] = 1 2120 2121 ycover[Yi0*Ymax+Yi64] = 1 2122 ycover[Yi1*Ymax+Yi64] = 1 2123 ycover[Yu7*Ymax+Yi64] = 1 2124 ycover[Yu2*Ymax+Yi64] = 1 2125 ycover[Yu8*Ymax+Yi64] = 1 2126 ycover[Yi8*Ymax+Yi64] = 1 2127 ycover[Ys32*Ymax+Yi64] = 1 2128 ycover[Yi32*Ymax+Yi64] = 1 2129 2130 ycover[Yal*Ymax+Yrb] = 1 2131 ycover[Ycl*Ymax+Yrb] = 1 2132 ycover[Yax*Ymax+Yrb] = 1 2133 ycover[Ycx*Ymax+Yrb] = 1 2134 ycover[Yrx*Ymax+Yrb] = 1 2135 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 2136 2137 ycover[Ycl*Ymax+Ycx] = 1 2138 2139 ycover[Yax*Ymax+Yrx] = 1 2140 ycover[Ycx*Ymax+Yrx] = 1 2141 2142 ycover[Yax*Ymax+Yrl] = 1 2143 ycover[Ycx*Ymax+Yrl] = 1 2144 ycover[Yrx*Ymax+Yrl] = 1 2145 ycover[Yrl32*Ymax+Yrl] = 1 2146 2147 ycover[Yf0*Ymax+Yrf] = 1 2148 2149 ycover[Yal*Ymax+Ymb] = 1 2150 ycover[Ycl*Ymax+Ymb] = 1 2151 ycover[Yax*Ymax+Ymb] = 1 2152 ycover[Ycx*Ymax+Ymb] = 1 2153 ycover[Yrx*Ymax+Ymb] = 1 2154 ycover[Yrb*Ymax+Ymb] = 1 2155 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 2156 ycover[Ym*Ymax+Ymb] = 1 2157 2158 ycover[Yax*Ymax+Yml] = 1 2159 ycover[Ycx*Ymax+Yml] = 1 2160 ycover[Yrx*Ymax+Yml] = 1 2161 ycover[Yrl*Ymax+Yml] = 1 2162 ycover[Yrl32*Ymax+Yml] = 1 2163 ycover[Ym*Ymax+Yml] = 1 2164 2165 ycover[Yax*Ymax+Ymm] = 1 2166 ycover[Ycx*Ymax+Ymm] = 1 2167 ycover[Yrx*Ymax+Ymm] = 1 2168 ycover[Yrl*Ymax+Ymm] = 1 2169 ycover[Yrl32*Ymax+Ymm] = 1 2170 ycover[Ym*Ymax+Ymm] = 1 2171 ycover[Ymr*Ymax+Ymm] = 1 2172 2173 ycover[Yxr0*Ymax+Yxr] = 1 2174 2175 ycover[Ym*Ymax+Yxm] = 1 2176 ycover[Yxr0*Ymax+Yxm] = 1 2177 ycover[Yxr*Ymax+Yxm] = 1 2178 2179 ycover[Ym*Ymax+Yym] = 1 2180 ycover[Yyr*Ymax+Yym] = 1 2181 2182 ycover[Yxr0*Ymax+YxrEvex] = 1 2183 ycover[Yxr*Ymax+YxrEvex] = 1 2184 2185 ycover[Ym*Ymax+YxmEvex] = 1 2186 ycover[Yxr0*Ymax+YxmEvex] = 1 2187 ycover[Yxr*Ymax+YxmEvex] = 1 2188 ycover[YxrEvex*Ymax+YxmEvex] = 1 2189 2190 ycover[Yyr*Ymax+YyrEvex] = 1 2191 2192 ycover[Ym*Ymax+YymEvex] = 1 2193 ycover[Yyr*Ymax+YymEvex] = 1 2194 ycover[YyrEvex*Ymax+YymEvex] = 1 2195 2196 ycover[Ym*Ymax+Yzm] = 1 2197 ycover[Yzr*Ymax+Yzm] = 1 2198 2199 ycover[Yk0*Ymax+Yk] = 1 2200 ycover[Yknot0*Ymax+Yk] = 1 2201 2202 ycover[Yk0*Ymax+Ykm] = 1 2203 ycover[Yknot0*Ymax+Ykm] = 1 2204 ycover[Yk*Ymax+Ykm] = 1 2205 ycover[Ym*Ymax+Ykm] = 1 2206 2207 ycover[Yxvm*Ymax+YxvmEvex] = 1 2208 2209 ycover[Yyvm*Ymax+YyvmEvex] = 1 2210 2211 for i := 0; i < MAXREG; i++ { 2212 reg[i] = -1 2213 if i >= REG_AL && i <= REG_R15B { 2214 reg[i] = (i - REG_AL) & 7 2215 if i >= REG_SPB && i <= REG_DIB { 2216 regrex[i] = 0x40 2217 } 2218 if i >= REG_R8B && i <= REG_R15B { 2219 regrex[i] = Rxr | Rxx | Rxb 2220 } 2221 } 2222 2223 if i >= REG_AH && i <= REG_BH { 2224 reg[i] = 4 + ((i - REG_AH) & 7) 2225 } 2226 if i >= REG_AX && i <= REG_R15 { 2227 reg[i] = (i - REG_AX) & 7 2228 if i >= REG_R8 { 2229 regrex[i] = Rxr | Rxx | Rxb 2230 } 2231 } 2232 2233 if i >= REG_F0 && i <= REG_F0+7 { 2234 reg[i] = (i - REG_F0) & 7 2235 } 2236 if i >= REG_M0 && i <= REG_M0+7 { 2237 reg[i] = (i - REG_M0) & 7 2238 } 2239 if i >= REG_K0 && i <= REG_K0+7 { 2240 reg[i] = (i - REG_K0) & 7 2241 } 2242 if i >= REG_X0 && i <= REG_X0+15 { 2243 reg[i] = (i - REG_X0) & 7 2244 if i >= REG_X0+8 { 2245 regrex[i] = Rxr | Rxx | Rxb 2246 } 2247 } 2248 if i >= REG_X16 && i <= REG_X16+15 { 2249 reg[i] = (i - REG_X16) & 7 2250 if i >= REG_X16+8 { 2251 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2252 } else { 2253 regrex[i] = RxrEvex 2254 } 2255 } 2256 if i >= REG_Y0 && i <= REG_Y0+15 { 2257 reg[i] = (i - REG_Y0) & 7 2258 if i >= REG_Y0+8 { 2259 regrex[i] = Rxr | Rxx | Rxb 2260 } 2261 } 2262 if i >= REG_Y16 && i <= REG_Y16+15 { 2263 reg[i] = (i - REG_Y16) & 7 2264 if i >= REG_Y16+8 { 2265 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2266 } else { 2267 regrex[i] = RxrEvex 2268 } 2269 } 2270 if i >= REG_Z0 && i <= REG_Z0+15 { 2271 reg[i] = (i - REG_Z0) & 7 2272 if i > REG_Z0+7 { 2273 regrex[i] = Rxr | Rxx | Rxb 2274 } 2275 } 2276 if i >= REG_Z16 && i <= REG_Z16+15 { 2277 reg[i] = (i - REG_Z16) & 7 2278 if i >= REG_Z16+8 { 2279 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2280 } else { 2281 regrex[i] = RxrEvex 2282 } 2283 } 2284 2285 if i >= REG_CR+8 && i <= REG_CR+15 { 2286 regrex[i] = Rxr 2287 } 2288 } 2289 } 2290 2291 var isAndroid = objabi.GOOS == "android" 2292 2293 func prefixof(ctxt *obj.Link, a *obj.Addr) int { 2294 if a.Reg < REG_CS && a.Index < REG_CS { // fast path 2295 return 0 2296 } 2297 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 2298 switch a.Reg { 2299 case REG_CS: 2300 return 0x2e 2301 2302 case REG_DS: 2303 return 0x3e 2304 2305 case REG_ES: 2306 return 0x26 2307 2308 case REG_FS: 2309 return 0x64 2310 2311 case REG_GS: 2312 return 0x65 2313 2314 case REG_TLS: 2315 // NOTE: Systems listed here should be only systems that 2316 // support direct TLS references like 8(TLS) implemented as 2317 // direct references from FS or GS. Systems that require 2318 // the initial-exec model, where you load the TLS base into 2319 // a register and then index from that register, do not reach 2320 // this code and should not be listed. 2321 if ctxt.Arch.Family == sys.I386 { 2322 switch ctxt.Headtype { 2323 default: 2324 if isAndroid { 2325 return 0x65 // GS 2326 } 2327 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2328 2329 case objabi.Hdarwin, 2330 objabi.Hdragonfly, 2331 objabi.Hfreebsd, 2332 objabi.Hnetbsd, 2333 objabi.Hopenbsd: 2334 return 0x65 // GS 2335 } 2336 } 2337 2338 switch ctxt.Headtype { 2339 default: 2340 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2341 2342 case objabi.Hlinux: 2343 if isAndroid { 2344 return 0x64 // FS 2345 } 2346 2347 if ctxt.Flag_shared { 2348 log.Fatalf("unknown TLS base register for linux with -shared") 2349 } else { 2350 return 0x64 // FS 2351 } 2352 2353 case objabi.Hdragonfly, 2354 objabi.Hfreebsd, 2355 objabi.Hnetbsd, 2356 objabi.Hopenbsd, 2357 objabi.Hsolaris: 2358 return 0x64 // FS 2359 2360 case objabi.Hdarwin: 2361 return 0x65 // GS 2362 } 2363 } 2364 } 2365 2366 if ctxt.Arch.Family == sys.I386 { 2367 if a.Index == REG_TLS && ctxt.Flag_shared { 2368 // When building for inclusion into a shared library, an instruction of the form 2369 // MOVL 0(CX)(TLS*1), AX 2370 // becomes 2371 // mov %gs:(%ecx), %eax 2372 // which assumes that the correct TLS offset has been loaded into %ecx (today 2373 // there is only one TLS variable -- g -- so this is OK). When not building for 2374 // a shared library the instruction it becomes 2375 // mov 0x0(%ecx), $eax 2376 // and a R_TLS_LE relocation, and so does not require a prefix. 2377 if a.Offset != 0 { 2378 ctxt.Diag("cannot handle non-0 offsets to TLS") 2379 } 2380 return 0x65 // GS 2381 } 2382 return 0 2383 } 2384 2385 switch a.Index { 2386 case REG_CS: 2387 return 0x2e 2388 2389 case REG_DS: 2390 return 0x3e 2391 2392 case REG_ES: 2393 return 0x26 2394 2395 case REG_TLS: 2396 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { 2397 // When building for inclusion into a shared library, an instruction of the form 2398 // MOV 0(CX)(TLS*1), AX 2399 // becomes 2400 // mov %fs:(%rcx), %rax 2401 // which assumes that the correct TLS offset has been loaded into %rcx (today 2402 // there is only one TLS variable -- g -- so this is OK). When not building for 2403 // a shared library the instruction does not require a prefix. 2404 if a.Offset != 0 { 2405 log.Fatalf("cannot handle non-0 offsets to TLS") 2406 } 2407 return 0x64 2408 } 2409 2410 case REG_FS: 2411 return 0x64 2412 2413 case REG_GS: 2414 return 0x65 2415 } 2416 2417 return 0 2418 } 2419 2420 // oclassRegList returns multisource operand class for addr. 2421 func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { 2422 // TODO(quasilyte): when oclass register case is refactored into 2423 // lookup table, use it here to get register kind more easily. 2424 // Helper functions like regIsXmm should go away too (they will become redundant). 2425 2426 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } 2427 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } 2428 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } 2429 2430 reg0, reg1 := decodeRegisterRange(addr.Offset) 2431 low := regIndex(int16(reg0)) 2432 high := regIndex(int16(reg1)) 2433 2434 if ctxt.Arch.Family == sys.I386 { 2435 if low >= 8 || high >= 8 { 2436 return Yxxx 2437 } 2438 } 2439 2440 switch high - low { 2441 case 3: 2442 switch { 2443 case regIsXmm(reg0) && regIsXmm(reg1): 2444 return YxrEvexMulti4 2445 case regIsYmm(reg0) && regIsYmm(reg1): 2446 return YyrEvexMulti4 2447 case regIsZmm(reg0) && regIsZmm(reg1): 2448 return YzrMulti4 2449 default: 2450 return Yxxx 2451 } 2452 default: 2453 return Yxxx 2454 } 2455 } 2456 2457 // oclassVMem returns V-mem (vector memory with VSIB) operand class. 2458 // For addr that is not V-mem returns (Yxxx, false). 2459 func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { 2460 switch addr.Index { 2461 case REG_X0 + 0, 2462 REG_X0 + 1, 2463 REG_X0 + 2, 2464 REG_X0 + 3, 2465 REG_X0 + 4, 2466 REG_X0 + 5, 2467 REG_X0 + 6, 2468 REG_X0 + 7: 2469 return Yxvm, true 2470 case REG_X8 + 0, 2471 REG_X8 + 1, 2472 REG_X8 + 2, 2473 REG_X8 + 3, 2474 REG_X8 + 4, 2475 REG_X8 + 5, 2476 REG_X8 + 6, 2477 REG_X8 + 7: 2478 if ctxt.Arch.Family == sys.I386 { 2479 return Yxxx, true 2480 } 2481 return Yxvm, true 2482 case REG_X16 + 0, 2483 REG_X16 + 1, 2484 REG_X16 + 2, 2485 REG_X16 + 3, 2486 REG_X16 + 4, 2487 REG_X16 + 5, 2488 REG_X16 + 6, 2489 REG_X16 + 7, 2490 REG_X16 + 8, 2491 REG_X16 + 9, 2492 REG_X16 + 10, 2493 REG_X16 + 11, 2494 REG_X16 + 12, 2495 REG_X16 + 13, 2496 REG_X16 + 14, 2497 REG_X16 + 15: 2498 if ctxt.Arch.Family == sys.I386 { 2499 return Yxxx, true 2500 } 2501 return YxvmEvex, true 2502 2503 case REG_Y0 + 0, 2504 REG_Y0 + 1, 2505 REG_Y0 + 2, 2506 REG_Y0 + 3, 2507 REG_Y0 + 4, 2508 REG_Y0 + 5, 2509 REG_Y0 + 6, 2510 REG_Y0 + 7: 2511 return Yyvm, true 2512 case REG_Y8 + 0, 2513 REG_Y8 + 1, 2514 REG_Y8 + 2, 2515 REG_Y8 + 3, 2516 REG_Y8 + 4, 2517 REG_Y8 + 5, 2518 REG_Y8 + 6, 2519 REG_Y8 + 7: 2520 if ctxt.Arch.Family == sys.I386 { 2521 return Yxxx, true 2522 } 2523 return Yyvm, true 2524 case REG_Y16 + 0, 2525 REG_Y16 + 1, 2526 REG_Y16 + 2, 2527 REG_Y16 + 3, 2528 REG_Y16 + 4, 2529 REG_Y16 + 5, 2530 REG_Y16 + 6, 2531 REG_Y16 + 7, 2532 REG_Y16 + 8, 2533 REG_Y16 + 9, 2534 REG_Y16 + 10, 2535 REG_Y16 + 11, 2536 REG_Y16 + 12, 2537 REG_Y16 + 13, 2538 REG_Y16 + 14, 2539 REG_Y16 + 15: 2540 if ctxt.Arch.Family == sys.I386 { 2541 return Yxxx, true 2542 } 2543 return YyvmEvex, true 2544 2545 case REG_Z0 + 0, 2546 REG_Z0 + 1, 2547 REG_Z0 + 2, 2548 REG_Z0 + 3, 2549 REG_Z0 + 4, 2550 REG_Z0 + 5, 2551 REG_Z0 + 6, 2552 REG_Z0 + 7: 2553 return Yzvm, true 2554 case REG_Z8 + 0, 2555 REG_Z8 + 1, 2556 REG_Z8 + 2, 2557 REG_Z8 + 3, 2558 REG_Z8 + 4, 2559 REG_Z8 + 5, 2560 REG_Z8 + 6, 2561 REG_Z8 + 7, 2562 REG_Z8 + 8, 2563 REG_Z8 + 9, 2564 REG_Z8 + 10, 2565 REG_Z8 + 11, 2566 REG_Z8 + 12, 2567 REG_Z8 + 13, 2568 REG_Z8 + 14, 2569 REG_Z8 + 15, 2570 REG_Z8 + 16, 2571 REG_Z8 + 17, 2572 REG_Z8 + 18, 2573 REG_Z8 + 19, 2574 REG_Z8 + 20, 2575 REG_Z8 + 21, 2576 REG_Z8 + 22, 2577 REG_Z8 + 23: 2578 if ctxt.Arch.Family == sys.I386 { 2579 return Yxxx, true 2580 } 2581 return Yzvm, true 2582 } 2583 2584 return Yxxx, false 2585 } 2586 2587 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { 2588 switch a.Type { 2589 case obj.TYPE_REGLIST: 2590 return oclassRegList(ctxt, a) 2591 2592 case obj.TYPE_NONE: 2593 return Ynone 2594 2595 case obj.TYPE_BRANCH: 2596 return Ybr 2597 2598 case obj.TYPE_INDIR: 2599 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { 2600 return Yindir 2601 } 2602 return Yxxx 2603 2604 case obj.TYPE_MEM: 2605 // Pseudo registers have negative index, but SP is 2606 // not pseudo on x86, hence REG_SP check is not redundant. 2607 if a.Index == REG_SP || a.Index < 0 { 2608 // Can't use FP/SB/PC/SP as the index register. 2609 return Yxxx 2610 } 2611 2612 if vmem, ok := oclassVMem(ctxt, a); ok { 2613 return vmem 2614 } 2615 2616 if ctxt.Arch.Family == sys.AMD64 { 2617 switch a.Name { 2618 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: 2619 // Global variables can't use index registers and their 2620 // base register is %rip (%rip is encoded as REG_NONE). 2621 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { 2622 return Yxxx 2623 } 2624 case obj.NAME_AUTO, obj.NAME_PARAM: 2625 // These names must have a base of SP. The old compiler 2626 // uses 0 for the base register. SSA uses REG_SP. 2627 if a.Reg != REG_SP && a.Reg != 0 { 2628 return Yxxx 2629 } 2630 case obj.NAME_NONE: 2631 // everything is ok 2632 default: 2633 // unknown name 2634 return Yxxx 2635 } 2636 } 2637 return Ym 2638 2639 case obj.TYPE_ADDR: 2640 switch a.Name { 2641 case obj.NAME_GOTREF: 2642 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") 2643 return Yxxx 2644 2645 case obj.NAME_EXTERN, 2646 obj.NAME_STATIC: 2647 if a.Sym != nil && useAbs(ctxt, a.Sym) { 2648 return Yi32 2649 } 2650 return Yiauto // use pc-relative addressing 2651 2652 case obj.NAME_AUTO, 2653 obj.NAME_PARAM: 2654 return Yiauto 2655 } 2656 2657 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index 2658 // and got Yi32 in an earlier version of this code. 2659 // Keep doing that until we fix yduff etc. 2660 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { 2661 return Yi32 2662 } 2663 2664 if a.Sym != nil || a.Name != obj.NAME_NONE { 2665 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) 2666 } 2667 fallthrough 2668 2669 case obj.TYPE_CONST: 2670 if a.Sym != nil { 2671 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) 2672 } 2673 2674 v := a.Offset 2675 if ctxt.Arch.Family == sys.I386 { 2676 v = int64(int32(v)) 2677 } 2678 switch { 2679 case v == 0: 2680 return Yi0 2681 case v == 1: 2682 return Yi1 2683 case v >= 0 && v <= 3: 2684 return Yu2 2685 case v >= 0 && v <= 127: 2686 return Yu7 2687 case v >= 0 && v <= 255: 2688 return Yu8 2689 case v >= -128 && v <= 127: 2690 return Yi8 2691 } 2692 if ctxt.Arch.Family == sys.I386 { 2693 return Yi32 2694 } 2695 l := int32(v) 2696 if int64(l) == v { 2697 return Ys32 // can sign extend 2698 } 2699 if v>>32 == 0 { 2700 return Yi32 // unsigned 2701 } 2702 return Yi64 2703 2704 case obj.TYPE_TEXTSIZE: 2705 return Ytextsize 2706 } 2707 2708 if a.Type != obj.TYPE_REG { 2709 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) 2710 return Yxxx 2711 } 2712 2713 switch a.Reg { 2714 case REG_AL: 2715 return Yal 2716 2717 case REG_AX: 2718 return Yax 2719 2720 /* 2721 case REG_SPB: 2722 */ 2723 case REG_BPB, 2724 REG_SIB, 2725 REG_DIB, 2726 REG_R8B, 2727 REG_R9B, 2728 REG_R10B, 2729 REG_R11B, 2730 REG_R12B, 2731 REG_R13B, 2732 REG_R14B, 2733 REG_R15B: 2734 if ctxt.Arch.Family == sys.I386 { 2735 return Yxxx 2736 } 2737 fallthrough 2738 2739 case REG_DL, 2740 REG_BL, 2741 REG_AH, 2742 REG_CH, 2743 REG_DH, 2744 REG_BH: 2745 return Yrb 2746 2747 case REG_CL: 2748 return Ycl 2749 2750 case REG_CX: 2751 return Ycx 2752 2753 case REG_DX, REG_BX: 2754 return Yrx 2755 2756 case REG_R8, // not really Yrl 2757 REG_R9, 2758 REG_R10, 2759 REG_R11, 2760 REG_R12, 2761 REG_R13, 2762 REG_R14, 2763 REG_R15: 2764 if ctxt.Arch.Family == sys.I386 { 2765 return Yxxx 2766 } 2767 fallthrough 2768 2769 case REG_SP, REG_BP, REG_SI, REG_DI: 2770 if ctxt.Arch.Family == sys.I386 { 2771 return Yrl32 2772 } 2773 return Yrl 2774 2775 case REG_F0 + 0: 2776 return Yf0 2777 2778 case REG_F0 + 1, 2779 REG_F0 + 2, 2780 REG_F0 + 3, 2781 REG_F0 + 4, 2782 REG_F0 + 5, 2783 REG_F0 + 6, 2784 REG_F0 + 7: 2785 return Yrf 2786 2787 case REG_M0 + 0, 2788 REG_M0 + 1, 2789 REG_M0 + 2, 2790 REG_M0 + 3, 2791 REG_M0 + 4, 2792 REG_M0 + 5, 2793 REG_M0 + 6, 2794 REG_M0 + 7: 2795 return Ymr 2796 2797 case REG_X0: 2798 return Yxr0 2799 2800 case REG_X0 + 1, 2801 REG_X0 + 2, 2802 REG_X0 + 3, 2803 REG_X0 + 4, 2804 REG_X0 + 5, 2805 REG_X0 + 6, 2806 REG_X0 + 7, 2807 REG_X0 + 8, 2808 REG_X0 + 9, 2809 REG_X0 + 10, 2810 REG_X0 + 11, 2811 REG_X0 + 12, 2812 REG_X0 + 13, 2813 REG_X0 + 14, 2814 REG_X0 + 15: 2815 return Yxr 2816 2817 case REG_X0 + 16, 2818 REG_X0 + 17, 2819 REG_X0 + 18, 2820 REG_X0 + 19, 2821 REG_X0 + 20, 2822 REG_X0 + 21, 2823 REG_X0 + 22, 2824 REG_X0 + 23, 2825 REG_X0 + 24, 2826 REG_X0 + 25, 2827 REG_X0 + 26, 2828 REG_X0 + 27, 2829 REG_X0 + 28, 2830 REG_X0 + 29, 2831 REG_X0 + 30, 2832 REG_X0 + 31: 2833 return YxrEvex 2834 2835 case REG_Y0 + 0, 2836 REG_Y0 + 1, 2837 REG_Y0 + 2, 2838 REG_Y0 + 3, 2839 REG_Y0 + 4, 2840 REG_Y0 + 5, 2841 REG_Y0 + 6, 2842 REG_Y0 + 7, 2843 REG_Y0 + 8, 2844 REG_Y0 + 9, 2845 REG_Y0 + 10, 2846 REG_Y0 + 11, 2847 REG_Y0 + 12, 2848 REG_Y0 + 13, 2849 REG_Y0 + 14, 2850 REG_Y0 + 15: 2851 return Yyr 2852 2853 case REG_Y0 + 16, 2854 REG_Y0 + 17, 2855 REG_Y0 + 18, 2856 REG_Y0 + 19, 2857 REG_Y0 + 20, 2858 REG_Y0 + 21, 2859 REG_Y0 + 22, 2860 REG_Y0 + 23, 2861 REG_Y0 + 24, 2862 REG_Y0 + 25, 2863 REG_Y0 + 26, 2864 REG_Y0 + 27, 2865 REG_Y0 + 28, 2866 REG_Y0 + 29, 2867 REG_Y0 + 30, 2868 REG_Y0 + 31: 2869 return YyrEvex 2870 2871 case REG_Z0 + 0, 2872 REG_Z0 + 1, 2873 REG_Z0 + 2, 2874 REG_Z0 + 3, 2875 REG_Z0 + 4, 2876 REG_Z0 + 5, 2877 REG_Z0 + 6, 2878 REG_Z0 + 7: 2879 return Yzr 2880 2881 case REG_Z0 + 8, 2882 REG_Z0 + 9, 2883 REG_Z0 + 10, 2884 REG_Z0 + 11, 2885 REG_Z0 + 12, 2886 REG_Z0 + 13, 2887 REG_Z0 + 14, 2888 REG_Z0 + 15, 2889 REG_Z0 + 16, 2890 REG_Z0 + 17, 2891 REG_Z0 + 18, 2892 REG_Z0 + 19, 2893 REG_Z0 + 20, 2894 REG_Z0 + 21, 2895 REG_Z0 + 22, 2896 REG_Z0 + 23, 2897 REG_Z0 + 24, 2898 REG_Z0 + 25, 2899 REG_Z0 + 26, 2900 REG_Z0 + 27, 2901 REG_Z0 + 28, 2902 REG_Z0 + 29, 2903 REG_Z0 + 30, 2904 REG_Z0 + 31: 2905 if ctxt.Arch.Family == sys.I386 { 2906 return Yxxx 2907 } 2908 return Yzr 2909 2910 case REG_K0: 2911 return Yk0 2912 2913 case REG_K0 + 1, 2914 REG_K0 + 2, 2915 REG_K0 + 3, 2916 REG_K0 + 4, 2917 REG_K0 + 5, 2918 REG_K0 + 6, 2919 REG_K0 + 7: 2920 return Yknot0 2921 2922 case REG_CS: 2923 return Ycs 2924 case REG_SS: 2925 return Yss 2926 case REG_DS: 2927 return Yds 2928 case REG_ES: 2929 return Yes 2930 case REG_FS: 2931 return Yfs 2932 case REG_GS: 2933 return Ygs 2934 case REG_TLS: 2935 return Ytls 2936 2937 case REG_GDTR: 2938 return Ygdtr 2939 case REG_IDTR: 2940 return Yidtr 2941 case REG_LDTR: 2942 return Yldtr 2943 case REG_MSW: 2944 return Ymsw 2945 case REG_TASK: 2946 return Ytask 2947 2948 case REG_CR + 0: 2949 return Ycr0 2950 case REG_CR + 1: 2951 return Ycr1 2952 case REG_CR + 2: 2953 return Ycr2 2954 case REG_CR + 3: 2955 return Ycr3 2956 case REG_CR + 4: 2957 return Ycr4 2958 case REG_CR + 5: 2959 return Ycr5 2960 case REG_CR + 6: 2961 return Ycr6 2962 case REG_CR + 7: 2963 return Ycr7 2964 case REG_CR + 8: 2965 return Ycr8 2966 2967 case REG_DR + 0: 2968 return Ydr0 2969 case REG_DR + 1: 2970 return Ydr1 2971 case REG_DR + 2: 2972 return Ydr2 2973 case REG_DR + 3: 2974 return Ydr3 2975 case REG_DR + 4: 2976 return Ydr4 2977 case REG_DR + 5: 2978 return Ydr5 2979 case REG_DR + 6: 2980 return Ydr6 2981 case REG_DR + 7: 2982 return Ydr7 2983 2984 case REG_TR + 0: 2985 return Ytr0 2986 case REG_TR + 1: 2987 return Ytr1 2988 case REG_TR + 2: 2989 return Ytr2 2990 case REG_TR + 3: 2991 return Ytr3 2992 case REG_TR + 4: 2993 return Ytr4 2994 case REG_TR + 5: 2995 return Ytr5 2996 case REG_TR + 6: 2997 return Ytr6 2998 case REG_TR + 7: 2999 return Ytr7 3000 } 3001 3002 return Yxxx 3003 } 3004 3005 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into 3006 // and hold assembly state. 3007 type AsmBuf struct { 3008 buf [100]byte 3009 off int 3010 rexflag int 3011 vexflag bool // Per inst: true for VEX-encoded 3012 evexflag bool // Per inst: true for EVEX-encoded 3013 rep bool 3014 repn bool 3015 lock bool 3016 3017 evex evexBits // Initialized when evexflag is true 3018 } 3019 3020 // Put1 appends one byte to the end of the buffer. 3021 func (ab *AsmBuf) Put1(x byte) { 3022 ab.buf[ab.off] = x 3023 ab.off++ 3024 } 3025 3026 // Put2 appends two bytes to the end of the buffer. 3027 func (ab *AsmBuf) Put2(x, y byte) { 3028 ab.buf[ab.off+0] = x 3029 ab.buf[ab.off+1] = y 3030 ab.off += 2 3031 } 3032 3033 // Put3 appends three bytes to the end of the buffer. 3034 func (ab *AsmBuf) Put3(x, y, z byte) { 3035 ab.buf[ab.off+0] = x 3036 ab.buf[ab.off+1] = y 3037 ab.buf[ab.off+2] = z 3038 ab.off += 3 3039 } 3040 3041 // Put4 appends four bytes to the end of the buffer. 3042 func (ab *AsmBuf) Put4(x, y, z, w byte) { 3043 ab.buf[ab.off+0] = x 3044 ab.buf[ab.off+1] = y 3045 ab.buf[ab.off+2] = z 3046 ab.buf[ab.off+3] = w 3047 ab.off += 4 3048 } 3049 3050 // PutInt16 writes v into the buffer using little-endian encoding. 3051 func (ab *AsmBuf) PutInt16(v int16) { 3052 ab.buf[ab.off+0] = byte(v) 3053 ab.buf[ab.off+1] = byte(v >> 8) 3054 ab.off += 2 3055 } 3056 3057 // PutInt32 writes v into the buffer using little-endian encoding. 3058 func (ab *AsmBuf) PutInt32(v int32) { 3059 ab.buf[ab.off+0] = byte(v) 3060 ab.buf[ab.off+1] = byte(v >> 8) 3061 ab.buf[ab.off+2] = byte(v >> 16) 3062 ab.buf[ab.off+3] = byte(v >> 24) 3063 ab.off += 4 3064 } 3065 3066 // PutInt64 writes v into the buffer using little-endian encoding. 3067 func (ab *AsmBuf) PutInt64(v int64) { 3068 ab.buf[ab.off+0] = byte(v) 3069 ab.buf[ab.off+1] = byte(v >> 8) 3070 ab.buf[ab.off+2] = byte(v >> 16) 3071 ab.buf[ab.off+3] = byte(v >> 24) 3072 ab.buf[ab.off+4] = byte(v >> 32) 3073 ab.buf[ab.off+5] = byte(v >> 40) 3074 ab.buf[ab.off+6] = byte(v >> 48) 3075 ab.buf[ab.off+7] = byte(v >> 56) 3076 ab.off += 8 3077 } 3078 3079 // Put copies b into the buffer. 3080 func (ab *AsmBuf) Put(b []byte) { 3081 copy(ab.buf[ab.off:], b) 3082 ab.off += len(b) 3083 } 3084 3085 // PutOpBytesLit writes zero terminated sequence of bytes from op, 3086 // starting at specified offsed (e.g. z counter value). 3087 // Trailing 0 is not written. 3088 // 3089 // Intended to be used for literal Z cases. 3090 // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). 3091 func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { 3092 for int(op[offset]) != 0 { 3093 ab.Put1(byte(op[offset])) 3094 offset++ 3095 } 3096 } 3097 3098 // Insert inserts b at offset i. 3099 func (ab *AsmBuf) Insert(i int, b byte) { 3100 ab.off++ 3101 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) 3102 ab.buf[i] = b 3103 } 3104 3105 // Last returns the byte at the end of the buffer. 3106 func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } 3107 3108 // Len returns the length of the buffer. 3109 func (ab *AsmBuf) Len() int { return ab.off } 3110 3111 // Bytes returns the contents of the buffer. 3112 func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } 3113 3114 // Reset empties the buffer. 3115 func (ab *AsmBuf) Reset() { ab.off = 0 } 3116 3117 // At returns the byte at offset i. 3118 func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } 3119 3120 // asmidx emits SIB byte. 3121 func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { 3122 var i int 3123 3124 // X/Y index register is used in VSIB. 3125 switch index { 3126 default: 3127 goto bad 3128 3129 case REG_NONE: 3130 i = 4 << 3 3131 goto bas 3132 3133 case REG_R8, 3134 REG_R9, 3135 REG_R10, 3136 REG_R11, 3137 REG_R12, 3138 REG_R13, 3139 REG_R14, 3140 REG_R15, 3141 REG_X8, 3142 REG_X9, 3143 REG_X10, 3144 REG_X11, 3145 REG_X12, 3146 REG_X13, 3147 REG_X14, 3148 REG_X15, 3149 REG_X16, 3150 REG_X17, 3151 REG_X18, 3152 REG_X19, 3153 REG_X20, 3154 REG_X21, 3155 REG_X22, 3156 REG_X23, 3157 REG_X24, 3158 REG_X25, 3159 REG_X26, 3160 REG_X27, 3161 REG_X28, 3162 REG_X29, 3163 REG_X30, 3164 REG_X31, 3165 REG_Y8, 3166 REG_Y9, 3167 REG_Y10, 3168 REG_Y11, 3169 REG_Y12, 3170 REG_Y13, 3171 REG_Y14, 3172 REG_Y15, 3173 REG_Y16, 3174 REG_Y17, 3175 REG_Y18, 3176 REG_Y19, 3177 REG_Y20, 3178 REG_Y21, 3179 REG_Y22, 3180 REG_Y23, 3181 REG_Y24, 3182 REG_Y25, 3183 REG_Y26, 3184 REG_Y27, 3185 REG_Y28, 3186 REG_Y29, 3187 REG_Y30, 3188 REG_Y31, 3189 REG_Z8, 3190 REG_Z9, 3191 REG_Z10, 3192 REG_Z11, 3193 REG_Z12, 3194 REG_Z13, 3195 REG_Z14, 3196 REG_Z15, 3197 REG_Z16, 3198 REG_Z17, 3199 REG_Z18, 3200 REG_Z19, 3201 REG_Z20, 3202 REG_Z21, 3203 REG_Z22, 3204 REG_Z23, 3205 REG_Z24, 3206 REG_Z25, 3207 REG_Z26, 3208 REG_Z27, 3209 REG_Z28, 3210 REG_Z29, 3211 REG_Z30, 3212 REG_Z31: 3213 if ctxt.Arch.Family == sys.I386 { 3214 goto bad 3215 } 3216 fallthrough 3217 3218 case REG_AX, 3219 REG_CX, 3220 REG_DX, 3221 REG_BX, 3222 REG_BP, 3223 REG_SI, 3224 REG_DI, 3225 REG_X0, 3226 REG_X1, 3227 REG_X2, 3228 REG_X3, 3229 REG_X4, 3230 REG_X5, 3231 REG_X6, 3232 REG_X7, 3233 REG_Y0, 3234 REG_Y1, 3235 REG_Y2, 3236 REG_Y3, 3237 REG_Y4, 3238 REG_Y5, 3239 REG_Y6, 3240 REG_Y7, 3241 REG_Z0, 3242 REG_Z1, 3243 REG_Z2, 3244 REG_Z3, 3245 REG_Z4, 3246 REG_Z5, 3247 REG_Z6, 3248 REG_Z7: 3249 i = reg[index] << 3 3250 } 3251 3252 switch scale { 3253 default: 3254 goto bad 3255 3256 case 1: 3257 break 3258 3259 case 2: 3260 i |= 1 << 6 3261 3262 case 4: 3263 i |= 2 << 6 3264 3265 case 8: 3266 i |= 3 << 6 3267 } 3268 3269 bas: 3270 switch base { 3271 default: 3272 goto bad 3273 3274 case REG_NONE: // must be mod=00 3275 i |= 5 3276 3277 case REG_R8, 3278 REG_R9, 3279 REG_R10, 3280 REG_R11, 3281 REG_R12, 3282 REG_R13, 3283 REG_R14, 3284 REG_R15: 3285 if ctxt.Arch.Family == sys.I386 { 3286 goto bad 3287 } 3288 fallthrough 3289 3290 case REG_AX, 3291 REG_CX, 3292 REG_DX, 3293 REG_BX, 3294 REG_SP, 3295 REG_BP, 3296 REG_SI, 3297 REG_DI: 3298 i |= reg[base] 3299 } 3300 3301 ab.Put1(byte(i)) 3302 return 3303 3304 bad: 3305 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) 3306 ab.Put1(0) 3307 } 3308 3309 func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { 3310 var rel obj.Reloc 3311 3312 v := vaddr(ctxt, p, a, &rel) 3313 if rel.Siz != 0 { 3314 if rel.Siz != 4 { 3315 ctxt.Diag("bad reloc") 3316 } 3317 r := obj.Addrel(cursym) 3318 *r = rel 3319 r.Off = int32(p.Pc + int64(ab.Len())) 3320 } 3321 3322 ab.PutInt32(int32(v)) 3323 } 3324 3325 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { 3326 if r != nil { 3327 *r = obj.Reloc{} 3328 } 3329 3330 switch a.Name { 3331 case obj.NAME_STATIC, 3332 obj.NAME_GOTREF, 3333 obj.NAME_EXTERN: 3334 s := a.Sym 3335 if r == nil { 3336 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3337 log.Fatalf("reloc") 3338 } 3339 3340 if a.Name == obj.NAME_GOTREF { 3341 r.Siz = 4 3342 r.Type = objabi.R_GOTPCREL 3343 } else if useAbs(ctxt, s) { 3344 r.Siz = 4 3345 r.Type = objabi.R_ADDR 3346 } else { 3347 r.Siz = 4 3348 r.Type = objabi.R_PCREL 3349 } 3350 3351 r.Off = -1 // caller must fill in 3352 r.Sym = s 3353 r.Add = a.Offset 3354 3355 return 0 3356 } 3357 3358 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { 3359 if r == nil { 3360 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3361 log.Fatalf("reloc") 3362 } 3363 3364 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { 3365 r.Type = objabi.R_TLS_LE 3366 r.Siz = 4 3367 r.Off = -1 // caller must fill in 3368 r.Add = a.Offset 3369 } 3370 return 0 3371 } 3372 3373 return a.Offset 3374 } 3375 3376 func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { 3377 var base int 3378 var rel obj.Reloc 3379 3380 rex &= 0x40 | Rxr 3381 if a.Offset != int64(int32(a.Offset)) { 3382 // The rules are slightly different for 386 and AMD64, 3383 // mostly for historical reasons. We may unify them later, 3384 // but it must be discussed beforehand. 3385 // 3386 // For 64bit mode only LEAL is allowed to overflow. 3387 // It's how https://golang.org/cl/59630 made it. 3388 // crypto/sha1/sha1block_amd64.s depends on this feature. 3389 // 3390 // For 32bit mode rules are more permissive. 3391 // If offset fits uint32, it's permitted. 3392 // This is allowed for assembly that wants to use 32-bit hex 3393 // constants, e.g. LEAL 0x99999999(AX), AX. 3394 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || 3395 (ctxt.Arch.Family != sys.AMD64 && 3396 int64(uint32(a.Offset)) == a.Offset && 3397 ab.rexflag&Rxw == 0) 3398 if !overflowOK { 3399 ctxt.Diag("offset too large in %s", p) 3400 } 3401 } 3402 v := int32(a.Offset) 3403 rel.Siz = 0 3404 3405 switch a.Type { 3406 case obj.TYPE_ADDR: 3407 if a.Name == obj.NAME_NONE { 3408 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") 3409 } 3410 if a.Index == REG_TLS { 3411 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") 3412 } 3413 goto bad 3414 3415 case obj.TYPE_REG: 3416 const regFirst = REG_AL 3417 const regLast = REG_Z31 3418 if a.Reg < regFirst || regLast < a.Reg { 3419 goto bad 3420 } 3421 if v != 0 { 3422 goto bad 3423 } 3424 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) 3425 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex 3426 return 3427 } 3428 3429 if a.Type != obj.TYPE_MEM { 3430 goto bad 3431 } 3432 3433 if a.Index != REG_NONE && a.Index != REG_TLS { 3434 base := int(a.Reg) 3435 switch a.Name { 3436 case obj.NAME_EXTERN, 3437 obj.NAME_GOTREF, 3438 obj.NAME_STATIC: 3439 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { 3440 goto bad 3441 } 3442 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3443 // The base register has already been set. It holds the PC 3444 // of this instruction returned by a PC-reading thunk. 3445 // See obj6.go:rewriteToPcrel. 3446 } else { 3447 base = REG_NONE 3448 } 3449 v = int32(vaddr(ctxt, p, a, &rel)) 3450 3451 case obj.NAME_AUTO, 3452 obj.NAME_PARAM: 3453 base = REG_SP 3454 } 3455 3456 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex 3457 if base == REG_NONE { 3458 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3459 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3460 goto putrelv 3461 } 3462 3463 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3464 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3465 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3466 return 3467 } 3468 3469 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3470 ab.Put1(byte(1<<6 | 4<<0 | r<<3)) 3471 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3472 ab.Put1(disp8) 3473 return 3474 } 3475 3476 ab.Put1(byte(2<<6 | 4<<0 | r<<3)) 3477 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3478 goto putrelv 3479 } 3480 3481 base = int(a.Reg) 3482 switch a.Name { 3483 case obj.NAME_STATIC, 3484 obj.NAME_GOTREF, 3485 obj.NAME_EXTERN: 3486 if a.Sym == nil { 3487 ctxt.Diag("bad addr: %v", p) 3488 } 3489 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3490 // The base register has already been set. It holds the PC 3491 // of this instruction returned by a PC-reading thunk. 3492 // See obj6.go:rewriteToPcrel. 3493 } else { 3494 base = REG_NONE 3495 } 3496 v = int32(vaddr(ctxt, p, a, &rel)) 3497 3498 case obj.NAME_AUTO, 3499 obj.NAME_PARAM: 3500 base = REG_SP 3501 } 3502 3503 if base == REG_TLS { 3504 v = int32(vaddr(ctxt, p, a, &rel)) 3505 } 3506 3507 ab.rexflag |= regrex[base]&Rxb | rex 3508 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { 3509 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { 3510 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { 3511 ctxt.Diag("%v has offset against gotref", p) 3512 } 3513 ab.Put1(byte(0<<6 | 5<<0 | r<<3)) 3514 goto putrelv 3515 } 3516 3517 // temporary 3518 ab.Put2( 3519 byte(0<<6|4<<0|r<<3), // sib present 3520 0<<6|4<<3|5<<0, // DS:d32 3521 ) 3522 goto putrelv 3523 } 3524 3525 if base == REG_SP || base == REG_R12 { 3526 if v == 0 { 3527 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3528 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3529 return 3530 } 3531 3532 if disp8, ok := toDisp8(v, p, ab); ok { 3533 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) 3534 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3535 ab.Put1(disp8) 3536 return 3537 } 3538 3539 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3540 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3541 goto putrelv 3542 } 3543 3544 if REG_AX <= base && base <= REG_R15 { 3545 if a.Index == REG_TLS && !ctxt.Flag_shared { 3546 rel = obj.Reloc{} 3547 rel.Type = objabi.R_TLS_LE 3548 rel.Siz = 4 3549 rel.Sym = nil 3550 rel.Add = int64(v) 3551 v = 0 3552 } 3553 3554 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3555 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3556 return 3557 } 3558 3559 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3560 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) 3561 return 3562 } 3563 3564 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3565 goto putrelv 3566 } 3567 3568 goto bad 3569 3570 putrelv: 3571 if rel.Siz != 0 { 3572 if rel.Siz != 4 { 3573 ctxt.Diag("bad rel") 3574 goto bad 3575 } 3576 3577 r := obj.Addrel(cursym) 3578 *r = rel 3579 r.Off = int32(p.Pc + int64(ab.Len())) 3580 } 3581 3582 ab.PutInt32(v) 3583 return 3584 3585 bad: 3586 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) 3587 } 3588 3589 func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { 3590 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) 3591 } 3592 3593 func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { 3594 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) 3595 } 3596 3597 func bytereg(a *obj.Addr, t *uint8) { 3598 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { 3599 a.Reg += REG_AL - REG_AX 3600 *t = 0 3601 } 3602 } 3603 3604 func unbytereg(a *obj.Addr, t *uint8) { 3605 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { 3606 a.Reg += REG_AX - REG_AL 3607 *t = 0 3608 } 3609 } 3610 3611 const ( 3612 movLit uint8 = iota // Like Zlit 3613 movRegMem 3614 movMemReg 3615 movRegMem2op 3616 movMemReg2op 3617 movFullPtr // Load full pointer, trash heap (unsupported) 3618 movDoubleShift 3619 movTLSReg 3620 ) 3621 3622 var ymovtab = []Movtab{ 3623 // push 3624 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, 3625 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, 3626 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, 3627 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, 3628 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3629 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3630 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3631 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3632 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, 3633 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, 3634 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, 3635 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, 3636 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, 3637 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, 3638 3639 // pop 3640 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, 3641 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, 3642 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, 3643 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3644 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3645 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3646 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3647 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, 3648 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, 3649 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, 3650 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, 3651 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, 3652 3653 // mov seg 3654 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, 3655 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, 3656 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, 3657 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, 3658 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, 3659 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, 3660 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, 3661 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, 3662 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, 3663 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, 3664 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, 3665 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, 3666 3667 // mov cr 3668 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3669 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3670 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3671 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3672 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3673 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3674 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3675 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3676 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3677 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3678 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3679 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3680 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3681 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3682 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3683 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3684 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3685 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3686 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3687 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3688 3689 // mov dr 3690 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3691 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3692 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3693 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3694 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, 3695 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, 3696 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3697 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3698 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3699 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3700 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3701 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3702 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, 3703 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, 3704 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3705 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3706 3707 // mov tr 3708 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, 3709 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, 3710 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, 3711 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, 3712 3713 // lgdt, sgdt, lidt, sidt 3714 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3715 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3716 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3717 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3718 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3719 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3720 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3721 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3722 3723 // lldt, sldt 3724 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, 3725 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, 3726 3727 // lmsw, smsw 3728 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, 3729 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, 3730 3731 // ltr, str 3732 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, 3733 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, 3734 3735 /* load full pointer - unsupported 3736 Movtab{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, 3737 Movtab{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, 3738 */ 3739 3740 // double shift 3741 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3742 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3743 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3744 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3745 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3746 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3747 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3748 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3749 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3750 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3751 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3752 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3753 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3754 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3755 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3756 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3757 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3758 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3759 3760 // load TLS base 3761 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3762 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3763 {0, 0, 0, 0, 0, [4]uint8{}}, 3764 } 3765 3766 func isax(a *obj.Addr) bool { 3767 switch a.Reg { 3768 case REG_AX, REG_AL, REG_AH: 3769 return true 3770 } 3771 3772 if a.Index == REG_AX { 3773 return true 3774 } 3775 return false 3776 } 3777 3778 func subreg(p *obj.Prog, from int, to int) { 3779 if false { /* debug['Q'] */ 3780 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) 3781 } 3782 3783 if int(p.From.Reg) == from { 3784 p.From.Reg = int16(to) 3785 p.Ft = 0 3786 } 3787 3788 if int(p.To.Reg) == from { 3789 p.To.Reg = int16(to) 3790 p.Tt = 0 3791 } 3792 3793 if int(p.From.Index) == from { 3794 p.From.Index = int16(to) 3795 p.Ft = 0 3796 } 3797 3798 if int(p.To.Index) == from { 3799 p.To.Index = int16(to) 3800 p.Tt = 0 3801 } 3802 3803 if false { /* debug['Q'] */ 3804 fmt.Printf("%v\n", p) 3805 } 3806 } 3807 3808 func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { 3809 switch op { 3810 case Pm, Pe, Pf2, Pf3: 3811 if osize != 1 { 3812 if op != Pm { 3813 ab.Put1(byte(op)) 3814 } 3815 ab.Put1(Pm) 3816 z++ 3817 op = int(o.op[z]) 3818 break 3819 } 3820 fallthrough 3821 3822 default: 3823 if ab.Len() == 0 || ab.Last() != Pm { 3824 ab.Put1(Pm) 3825 } 3826 } 3827 3828 ab.Put1(byte(op)) 3829 return z 3830 } 3831 3832 var bpduff1 = []byte{ 3833 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) 3834 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP 3835 } 3836 3837 var bpduff2 = []byte{ 3838 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP 3839 } 3840 3841 // asmevex emits EVEX pregis and opcode byte. 3842 // In addition to asmvex r/m, vvvv and reg fields also requires optional 3843 // K-masking register. 3844 // 3845 // Expects asmbuf.evex to be properly initialized. 3846 func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { 3847 ab.evexflag = true 3848 evex := ab.evex 3849 3850 rexR := byte(1) 3851 evexR := byte(1) 3852 rexX := byte(1) 3853 rexB := byte(1) 3854 if r != nil { 3855 if regrex[r.Reg]&Rxr != 0 { 3856 rexR = 0 // "ModR/M.reg" selector 4th bit. 3857 } 3858 if regrex[r.Reg]&RxrEvex != 0 { 3859 evexR = 0 // "ModR/M.reg" selector 5th bit. 3860 } 3861 } 3862 if rm != nil { 3863 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { 3864 rexX = 0 3865 } else if regrex[rm.Index]&Rxx != 0 { 3866 rexX = 0 3867 } 3868 if regrex[rm.Reg]&Rxb != 0 { 3869 rexB = 0 3870 } 3871 } 3872 // P0 = [R][X][B][R'][00][mm] 3873 p0 := (rexR << 7) | 3874 (rexX << 6) | 3875 (rexB << 5) | 3876 (evexR << 4) | 3877 (0 << 2) | 3878 (evex.M() << 0) 3879 3880 vexV := byte(0) 3881 if v != nil { 3882 // 4bit-wide reg index. 3883 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 3884 } 3885 vexV ^= 0x0F 3886 // P1 = [W][vvvv][1][pp] 3887 p1 := (evex.W() << 7) | 3888 (vexV << 3) | 3889 (1 << 2) | 3890 (evex.P() << 0) 3891 3892 suffix := evexSuffixMap[p.Scond] 3893 evexZ := byte(0) 3894 evexLL := evex.L() 3895 evexB := byte(0) 3896 evexV := byte(1) 3897 evexA := byte(0) 3898 if suffix.zeroing { 3899 if !evex.ZeroingEnabled() { 3900 ctxt.Diag("unsupported zeroing: %v", p) 3901 } 3902 evexZ = 1 3903 } 3904 switch { 3905 case suffix.rounding != rcUnset: 3906 if rm != nil && rm.Type == obj.TYPE_MEM { 3907 ctxt.Diag("illegal rounding with memory argument: %v", p) 3908 } else if !evex.RoundingEnabled() { 3909 ctxt.Diag("unsupported rounding: %v", p) 3910 } 3911 evexB = 1 3912 evexLL = suffix.rounding 3913 case suffix.broadcast: 3914 if rm == nil || rm.Type != obj.TYPE_MEM { 3915 ctxt.Diag("illegal broadcast without memory argument: %v", p) 3916 } else if !evex.BroadcastEnabled() { 3917 ctxt.Diag("unsupported broadcast: %v", p) 3918 } 3919 evexB = 1 3920 case suffix.sae: 3921 if rm != nil && rm.Type == obj.TYPE_MEM { 3922 ctxt.Diag("illegal SAE with memory argument: %v", p) 3923 } else if !evex.SaeEnabled() { 3924 ctxt.Diag("unsupported SAE: %v", p) 3925 } 3926 evexB = 1 3927 } 3928 if rm != nil && regrex[rm.Index]&RxrEvex != 0 { 3929 evexV = 0 3930 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { 3931 evexV = 0 // VSR selector 5th bit. 3932 } 3933 if k != nil { 3934 evexA = byte(reg[k.Reg]) 3935 } 3936 // P2 = [z][L'L][b][V'][aaa] 3937 p2 := (evexZ << 7) | 3938 (evexLL << 5) | 3939 (evexB << 4) | 3940 (evexV << 3) | 3941 (evexA << 0) 3942 3943 const evexEscapeByte = 0x62 3944 ab.Put4(evexEscapeByte, p0, p1, p2) 3945 ab.Put1(evex.opcode) 3946 } 3947 3948 // Emit VEX prefix and opcode byte. 3949 // The three addresses are the r/m, vvvv, and reg fields. 3950 // The reg and rm arguments appear in the same order as the 3951 // arguments to asmand, which typically follows the call to asmvex. 3952 // The final two arguments are the VEX prefix (see encoding above) 3953 // and the opcode byte. 3954 // For details about vex prefix see: 3955 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description 3956 func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { 3957 ab.vexflag = true 3958 rexR := 0 3959 if r != nil { 3960 rexR = regrex[r.Reg] & Rxr 3961 } 3962 rexB := 0 3963 rexX := 0 3964 if rm != nil { 3965 rexB = regrex[rm.Reg] & Rxb 3966 rexX = regrex[rm.Index] & Rxx 3967 } 3968 vexM := (vex >> 3) & 0x7 3969 vexWLP := vex & 0x87 3970 vexV := byte(0) 3971 if v != nil { 3972 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 3973 } 3974 vexV ^= 0xF 3975 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { 3976 // Can use 2-byte encoding. 3977 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) 3978 } else { 3979 // Must use 3-byte encoding. 3980 ab.Put3(0xc4, 3981 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, 3982 vexV<<3|vexWLP, 3983 ) 3984 } 3985 ab.Put1(opcode) 3986 } 3987 3988 // regIndex returns register index that fits in 5 bits. 3989 // 3990 // R : 3 bit | legacy instructions | N/A 3991 // [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr 3992 // EVEX.R : 1 bit | EVEX extension bit | RxrEvex 3993 // 3994 // Examples: 3995 // REG_Z30 => 30 3996 // REG_X15 => 15 3997 // REG_R9 => 9 3998 // REG_AX => 0 3999 // 4000 func regIndex(r int16) int { 4001 lower3bits := reg[r] 4002 high4bit := regrex[r] & Rxr << 1 4003 high5bit := regrex[r] & RxrEvex << 0 4004 return lower3bits | high4bit | high5bit 4005 } 4006 4007 // avx2gatherValid reports whether p satisfies AVX2 gather constraints. 4008 // Reports errors via ctxt. 4009 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4010 // If any pair of the index, mask, or destination registers 4011 // are the same, illegal instruction trap (#UD) is triggered. 4012 index := regIndex(p.GetFrom3().Index) 4013 mask := regIndex(p.From.Reg) 4014 dest := regIndex(p.To.Reg) 4015 if dest == mask || dest == index || mask == index { 4016 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) 4017 return false 4018 } 4019 4020 return true 4021 } 4022 4023 // avx512gatherValid reports whether p satisfies AVX512 gather constraints. 4024 // Reports errors via ctxt. 4025 func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4026 // Illegal instruction trap (#UD) is triggered if the destination vector 4027 // register is the same as index vector in VSIB. 4028 index := regIndex(p.From.Index) 4029 dest := regIndex(p.To.Reg) 4030 if dest == index { 4031 ctxt.Diag("index and destination registers should be distinct: %v", p) 4032 return false 4033 } 4034 4035 return true 4036 } 4037 4038 func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 4039 o := opindex[p.As&obj.AMask] 4040 4041 if o == nil { 4042 ctxt.Diag("asmins: missing op %v", p) 4043 return 4044 } 4045 4046 if pre := prefixof(ctxt, &p.From); pre != 0 { 4047 ab.Put1(byte(pre)) 4048 } 4049 if pre := prefixof(ctxt, &p.To); pre != 0 { 4050 ab.Put1(byte(pre)) 4051 } 4052 4053 // Checks to warn about instruction/arguments combinations that 4054 // will unconditionally trigger illegal instruction trap (#UD). 4055 switch p.As { 4056 case AVGATHERDPD, 4057 AVGATHERQPD, 4058 AVGATHERDPS, 4059 AVGATHERQPS, 4060 AVPGATHERDD, 4061 AVPGATHERQD, 4062 AVPGATHERDQ, 4063 AVPGATHERQQ: 4064 // AVX512 gather requires explicit K mask. 4065 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { 4066 if !avx512gatherValid(ctxt, p) { 4067 return 4068 } 4069 } else { 4070 if !avx2gatherValid(ctxt, p) { 4071 return 4072 } 4073 } 4074 } 4075 4076 if p.Ft == 0 { 4077 p.Ft = uint8(oclass(ctxt, p, &p.From)) 4078 } 4079 if p.Tt == 0 { 4080 p.Tt = uint8(oclass(ctxt, p, &p.To)) 4081 } 4082 4083 ft := int(p.Ft) * Ymax 4084 var f3t int 4085 tt := int(p.Tt) * Ymax 4086 4087 xo := obj.Bool2int(o.op[0] == 0x0f) 4088 z := 0 4089 var a *obj.Addr 4090 var l int 4091 var op int 4092 var q *obj.Prog 4093 var r *obj.Reloc 4094 var rel obj.Reloc 4095 var v int64 4096 4097 args := make([]int, 0, argListMax) 4098 if ft != Ynone*Ymax { 4099 args = append(args, ft) 4100 } 4101 for i := range p.RestArgs { 4102 args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax) 4103 } 4104 if tt != Ynone*Ymax { 4105 args = append(args, tt) 4106 } 4107 4108 for _, yt := range o.ytab { 4109 // ytab matching is purely args-based, 4110 // but AVX512 suffixes like "Z" or "RU_SAE" will 4111 // add EVEX-only filter that will reject non-EVEX matches. 4112 // 4113 // Consider "VADDPD.BCST 2032(DX), X0, X0". 4114 // Without this rule, operands will lead to VEX-encoded form 4115 // and produce "c5b15813" encoding. 4116 if !yt.match(args) { 4117 // "xo" is always zero for VEX/EVEX encoded insts. 4118 z += int(yt.zoffset) + xo 4119 } else { 4120 if p.Scond != 0 && !evexZcase(yt.zcase) { 4121 // Do not signal error and continue to search 4122 // for matching EVEX-encoded form. 4123 z += int(yt.zoffset) 4124 continue 4125 } 4126 4127 switch o.prefix { 4128 case Px1: // first option valid only in 32-bit mode 4129 if ctxt.Arch.Family == sys.AMD64 && z == 0 { 4130 z += int(yt.zoffset) + xo 4131 continue 4132 } 4133 case Pq: // 16 bit escape and opcode escape 4134 ab.Put2(Pe, Pm) 4135 4136 case Pq3: // 16 bit escape and opcode escape + REX.W 4137 ab.rexflag |= Pw 4138 ab.Put2(Pe, Pm) 4139 4140 case Pq4: // 66 0F 38 4141 ab.Put3(0x66, 0x0F, 0x38) 4142 4143 case Pq4w: // 66 0F 38 + REX.W 4144 ab.rexflag |= Pw 4145 ab.Put3(0x66, 0x0F, 0x38) 4146 4147 case Pq5: // F3 0F 38 4148 ab.Put3(0xF3, 0x0F, 0x38) 4149 4150 case Pq5w: // F3 0F 38 + REX.W 4151 ab.rexflag |= Pw 4152 ab.Put3(0xF3, 0x0F, 0x38) 4153 4154 case Pf2, // xmm opcode escape 4155 Pf3: 4156 ab.Put2(o.prefix, Pm) 4157 4158 case Pef3: 4159 ab.Put3(Pe, Pf3, Pm) 4160 4161 case Pfw: // xmm opcode escape + REX.W 4162 ab.rexflag |= Pw 4163 ab.Put2(Pf3, Pm) 4164 4165 case Pm: // opcode escape 4166 ab.Put1(Pm) 4167 4168 case Pe: // 16 bit escape 4169 ab.Put1(Pe) 4170 4171 case Pw: // 64-bit escape 4172 if ctxt.Arch.Family != sys.AMD64 { 4173 ctxt.Diag("asmins: illegal 64: %v", p) 4174 } 4175 ab.rexflag |= Pw 4176 4177 case Pw8: // 64-bit escape if z >= 8 4178 if z >= 8 { 4179 if ctxt.Arch.Family != sys.AMD64 { 4180 ctxt.Diag("asmins: illegal 64: %v", p) 4181 } 4182 ab.rexflag |= Pw 4183 } 4184 4185 case Pb: // botch 4186 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { 4187 goto bad 4188 } 4189 // NOTE(rsc): This is probably safe to do always, 4190 // but when enabled it chooses different encodings 4191 // than the old cmd/internal/obj/i386 code did, 4192 // which breaks our "same bits out" checks. 4193 // In particular, CMPB AX, $0 encodes as 80 f8 00 4194 // in the original obj/i386, and it would encode 4195 // (using a valid, shorter form) as 3c 00 if we enabled 4196 // the call to bytereg here. 4197 if ctxt.Arch.Family == sys.AMD64 { 4198 bytereg(&p.From, &p.Ft) 4199 bytereg(&p.To, &p.Tt) 4200 } 4201 4202 case P32: // 32 bit but illegal if 64-bit mode 4203 if ctxt.Arch.Family == sys.AMD64 { 4204 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) 4205 } 4206 4207 case Py: // 64-bit only, no prefix 4208 if ctxt.Arch.Family != sys.AMD64 { 4209 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4210 } 4211 4212 case Py1: // 64-bit only if z < 1, no prefix 4213 if z < 1 && ctxt.Arch.Family != sys.AMD64 { 4214 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4215 } 4216 4217 case Py3: // 64-bit only if z < 3, no prefix 4218 if z < 3 && ctxt.Arch.Family != sys.AMD64 { 4219 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4220 } 4221 } 4222 4223 if z >= len(o.op) { 4224 log.Fatalf("asmins bad table %v", p) 4225 } 4226 op = int(o.op[z]) 4227 if op == 0x0f { 4228 ab.Put1(byte(op)) 4229 z++ 4230 op = int(o.op[z]) 4231 } 4232 4233 switch yt.zcase { 4234 default: 4235 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) 4236 return 4237 4238 case Zpseudo: 4239 break 4240 4241 case Zlit: 4242 ab.PutOpBytesLit(z, &o.op) 4243 4244 case Zlitr_m: 4245 ab.PutOpBytesLit(z, &o.op) 4246 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4247 4248 case Zlitm_r: 4249 ab.PutOpBytesLit(z, &o.op) 4250 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4251 4252 case Zlit_m_r: 4253 ab.PutOpBytesLit(z, &o.op) 4254 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4255 4256 case Zmb_r: 4257 bytereg(&p.From, &p.Ft) 4258 fallthrough 4259 4260 case Zm_r: 4261 ab.Put1(byte(op)) 4262 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4263 4264 case Z_m_r: 4265 ab.Put1(byte(op)) 4266 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4267 4268 case Zm2_r: 4269 ab.Put2(byte(op), o.op[z+1]) 4270 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4271 4272 case Zm_r_xm: 4273 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4274 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4275 4276 case Zm_r_xm_nr: 4277 ab.rexflag = 0 4278 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4279 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4280 4281 case Zm_r_i_xm: 4282 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4283 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) 4284 ab.Put1(byte(p.To.Offset)) 4285 4286 case Zibm_r, Zibr_m: 4287 ab.PutOpBytesLit(z, &o.op) 4288 if yt.zcase == Zibr_m { 4289 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4290 } else { 4291 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4292 } 4293 switch { 4294 default: 4295 ab.Put1(byte(p.From.Offset)) 4296 case yt.args[0] == Yi32 && o.prefix == Pe: 4297 ab.PutInt16(int16(p.From.Offset)) 4298 case yt.args[0] == Yi32: 4299 ab.PutInt32(int32(p.From.Offset)) 4300 } 4301 4302 case Zaut_r: 4303 ab.Put1(0x8d) // leal 4304 if p.From.Type != obj.TYPE_ADDR { 4305 ctxt.Diag("asmins: Zaut sb type ADDR") 4306 } 4307 p.From.Type = obj.TYPE_MEM 4308 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4309 p.From.Type = obj.TYPE_ADDR 4310 4311 case Zm_o: 4312 ab.Put1(byte(op)) 4313 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4314 4315 case Zr_m: 4316 ab.Put1(byte(op)) 4317 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4318 4319 case Zvex: 4320 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4321 4322 case Zvex_rm_v_r: 4323 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4324 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4325 4326 case Zvex_rm_v_ro: 4327 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4328 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4329 4330 case Zvex_i_rm_vo: 4331 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4332 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) 4333 ab.Put1(byte(p.From.Offset)) 4334 4335 case Zvex_i_r_v: 4336 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4337 regnum := byte(0x7) 4338 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { 4339 regnum &= byte(p.GetFrom3().Reg - REG_X0) 4340 } else { 4341 regnum &= byte(p.GetFrom3().Reg - REG_Y0) 4342 } 4343 ab.Put1(o.op[z+2] | regnum) 4344 ab.Put1(byte(p.From.Offset)) 4345 4346 case Zvex_i_rm_v_r: 4347 imm, from, from3, to := unpackOps4(p) 4348 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4349 ab.asmand(ctxt, cursym, p, from, to) 4350 ab.Put1(byte(imm.Offset)) 4351 4352 case Zvex_i_rm_r: 4353 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) 4354 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4355 ab.Put1(byte(p.From.Offset)) 4356 4357 case Zvex_v_rm_r: 4358 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) 4359 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4360 4361 case Zvex_r_v_rm: 4362 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) 4363 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4364 4365 case Zvex_rm_r_vo: 4366 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) 4367 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4368 4369 case Zvex_i_r_rm: 4370 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) 4371 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4372 ab.Put1(byte(p.From.Offset)) 4373 4374 case Zvex_hr_rm_v_r: 4375 hr, from, from3, to := unpackOps4(p) 4376 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4377 ab.asmand(ctxt, cursym, p, from, to) 4378 ab.Put1(byte(regIndex(hr.Reg) << 4)) 4379 4380 case Zevex_k_rmo: 4381 ab.evex = newEVEXBits(z, &o.op) 4382 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) 4383 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) 4384 4385 case Zevex_i_rm_vo: 4386 ab.evex = newEVEXBits(z, &o.op) 4387 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) 4388 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) 4389 ab.Put1(byte(p.From.Offset)) 4390 4391 case Zevex_i_rm_k_vo: 4392 imm, from, kmask, to := unpackOps4(p) 4393 ab.evex = newEVEXBits(z, &o.op) 4394 ab.asmevex(ctxt, p, from, to, nil, kmask) 4395 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) 4396 ab.Put1(byte(imm.Offset)) 4397 4398 case Zevex_i_r_rm: 4399 ab.evex = newEVEXBits(z, &o.op) 4400 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) 4401 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4402 ab.Put1(byte(p.From.Offset)) 4403 4404 case Zevex_i_r_k_rm: 4405 imm, from, kmask, to := unpackOps4(p) 4406 ab.evex = newEVEXBits(z, &o.op) 4407 ab.asmevex(ctxt, p, to, nil, from, kmask) 4408 ab.asmand(ctxt, cursym, p, to, from) 4409 ab.Put1(byte(imm.Offset)) 4410 4411 case Zevex_i_rm_r: 4412 ab.evex = newEVEXBits(z, &o.op) 4413 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) 4414 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4415 ab.Put1(byte(p.From.Offset)) 4416 4417 case Zevex_i_rm_k_r: 4418 imm, from, kmask, to := unpackOps4(p) 4419 ab.evex = newEVEXBits(z, &o.op) 4420 ab.asmevex(ctxt, p, from, nil, to, kmask) 4421 ab.asmand(ctxt, cursym, p, from, to) 4422 ab.Put1(byte(imm.Offset)) 4423 4424 case Zevex_i_rm_v_r: 4425 imm, from, from3, to := unpackOps4(p) 4426 ab.evex = newEVEXBits(z, &o.op) 4427 ab.asmevex(ctxt, p, from, from3, to, nil) 4428 ab.asmand(ctxt, cursym, p, from, to) 4429 ab.Put1(byte(imm.Offset)) 4430 4431 case Zevex_i_rm_v_k_r: 4432 imm, from, from3, kmask, to := unpackOps5(p) 4433 ab.evex = newEVEXBits(z, &o.op) 4434 ab.asmevex(ctxt, p, from, from3, to, kmask) 4435 ab.asmand(ctxt, cursym, p, from, to) 4436 ab.Put1(byte(imm.Offset)) 4437 4438 case Zevex_r_v_rm: 4439 ab.evex = newEVEXBits(z, &o.op) 4440 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) 4441 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4442 4443 case Zevex_rm_v_r: 4444 ab.evex = newEVEXBits(z, &o.op) 4445 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) 4446 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4447 4448 case Zevex_rm_k_r: 4449 ab.evex = newEVEXBits(z, &o.op) 4450 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) 4451 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4452 4453 case Zevex_r_k_rm: 4454 ab.evex = newEVEXBits(z, &o.op) 4455 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) 4456 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4457 4458 case Zevex_rm_v_k_r: 4459 from, from3, kmask, to := unpackOps4(p) 4460 ab.evex = newEVEXBits(z, &o.op) 4461 ab.asmevex(ctxt, p, from, from3, to, kmask) 4462 ab.asmand(ctxt, cursym, p, from, to) 4463 4464 case Zevex_r_v_k_rm: 4465 from, from3, kmask, to := unpackOps4(p) 4466 ab.evex = newEVEXBits(z, &o.op) 4467 ab.asmevex(ctxt, p, to, from3, from, kmask) 4468 ab.asmand(ctxt, cursym, p, to, from) 4469 4470 case Zr_m_xm: 4471 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4472 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4473 4474 case Zr_m_xm_nr: 4475 ab.rexflag = 0 4476 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4477 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4478 4479 case Zo_m: 4480 ab.Put1(byte(op)) 4481 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4482 4483 case Zcallindreg: 4484 r = obj.Addrel(cursym) 4485 r.Off = int32(p.Pc) 4486 r.Type = objabi.R_CALLIND 4487 r.Siz = 0 4488 fallthrough 4489 4490 case Zo_m64: 4491 ab.Put1(byte(op)) 4492 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) 4493 4494 case Zm_ibo: 4495 ab.Put1(byte(op)) 4496 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4497 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) 4498 4499 case Zibo_m: 4500 ab.Put1(byte(op)) 4501 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4502 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4503 4504 case Zibo_m_xm: 4505 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4506 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4507 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4508 4509 case Z_ib, Zib_: 4510 if yt.zcase == Zib_ { 4511 a = &p.From 4512 } else { 4513 a = &p.To 4514 } 4515 ab.Put1(byte(op)) 4516 if p.As == AXABORT { 4517 ab.Put1(o.op[z+1]) 4518 } 4519 ab.Put1(byte(vaddr(ctxt, p, a, nil))) 4520 4521 case Zib_rp: 4522 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4523 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) 4524 4525 case Zil_rp: 4526 ab.rexflag |= regrex[p.To.Reg] & Rxb 4527 ab.Put1(byte(op + reg[p.To.Reg])) 4528 if o.prefix == Pe { 4529 v = vaddr(ctxt, p, &p.From, nil) 4530 ab.PutInt16(int16(v)) 4531 } else { 4532 ab.relput4(ctxt, cursym, p, &p.From) 4533 } 4534 4535 case Zo_iw: 4536 ab.Put1(byte(op)) 4537 if p.From.Type != obj.TYPE_NONE { 4538 v = vaddr(ctxt, p, &p.From, nil) 4539 ab.PutInt16(int16(v)) 4540 } 4541 4542 case Ziq_rp: 4543 v = vaddr(ctxt, p, &p.From, &rel) 4544 l = int(v >> 32) 4545 if l == 0 && rel.Siz != 8 { 4546 ab.rexflag &^= (0x40 | Rxw) 4547 4548 ab.rexflag |= regrex[p.To.Reg] & Rxb 4549 ab.Put1(byte(0xb8 + reg[p.To.Reg])) 4550 if rel.Type != 0 { 4551 r = obj.Addrel(cursym) 4552 *r = rel 4553 r.Off = int32(p.Pc + int64(ab.Len())) 4554 } 4555 4556 ab.PutInt32(int32(v)) 4557 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend 4558 ab.Put1(0xc7) 4559 ab.asmando(ctxt, cursym, p, &p.To, 0) 4560 4561 ab.PutInt32(int32(v)) // need all 8 4562 } else { 4563 ab.rexflag |= regrex[p.To.Reg] & Rxb 4564 ab.Put1(byte(op + reg[p.To.Reg])) 4565 if rel.Type != 0 { 4566 r = obj.Addrel(cursym) 4567 *r = rel 4568 r.Off = int32(p.Pc + int64(ab.Len())) 4569 } 4570 4571 ab.PutInt64(v) 4572 } 4573 4574 case Zib_rr: 4575 ab.Put1(byte(op)) 4576 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4577 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4578 4579 case Z_il, Zil_: 4580 if yt.zcase == Zil_ { 4581 a = &p.From 4582 } else { 4583 a = &p.To 4584 } 4585 ab.Put1(byte(op)) 4586 if o.prefix == Pe { 4587 v = vaddr(ctxt, p, a, nil) 4588 ab.PutInt16(int16(v)) 4589 } else { 4590 ab.relput4(ctxt, cursym, p, a) 4591 } 4592 4593 case Zm_ilo, Zilo_m: 4594 ab.Put1(byte(op)) 4595 if yt.zcase == Zilo_m { 4596 a = &p.From 4597 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4598 } else { 4599 a = &p.To 4600 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4601 } 4602 4603 if o.prefix == Pe { 4604 v = vaddr(ctxt, p, a, nil) 4605 ab.PutInt16(int16(v)) 4606 } else { 4607 ab.relput4(ctxt, cursym, p, a) 4608 } 4609 4610 case Zil_rr: 4611 ab.Put1(byte(op)) 4612 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4613 if o.prefix == Pe { 4614 v = vaddr(ctxt, p, &p.From, nil) 4615 ab.PutInt16(int16(v)) 4616 } else { 4617 ab.relput4(ctxt, cursym, p, &p.From) 4618 } 4619 4620 case Z_rp: 4621 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4622 ab.Put1(byte(op + reg[p.To.Reg])) 4623 4624 case Zrp_: 4625 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) 4626 ab.Put1(byte(op + reg[p.From.Reg])) 4627 4628 case Zcallcon, Zjmpcon: 4629 if yt.zcase == Zcallcon { 4630 ab.Put1(byte(op)) 4631 } else { 4632 ab.Put1(o.op[z+1]) 4633 } 4634 r = obj.Addrel(cursym) 4635 r.Off = int32(p.Pc + int64(ab.Len())) 4636 r.Type = objabi.R_PCREL 4637 r.Siz = 4 4638 r.Add = p.To.Offset 4639 ab.PutInt32(0) 4640 4641 case Zcallind: 4642 ab.Put2(byte(op), o.op[z+1]) 4643 r = obj.Addrel(cursym) 4644 r.Off = int32(p.Pc + int64(ab.Len())) 4645 if ctxt.Arch.Family == sys.AMD64 { 4646 r.Type = objabi.R_PCREL 4647 } else { 4648 r.Type = objabi.R_ADDR 4649 } 4650 r.Siz = 4 4651 r.Add = p.To.Offset 4652 r.Sym = p.To.Sym 4653 ab.PutInt32(0) 4654 4655 case Zcall, Zcallduff: 4656 if p.To.Sym == nil { 4657 ctxt.Diag("call without target") 4658 ctxt.DiagFlush() 4659 log.Fatalf("bad code") 4660 } 4661 4662 if yt.zcase == Zcallduff && ctxt.Flag_dynlink { 4663 ctxt.Diag("directly calling duff when dynamically linking Go") 4664 } 4665 4666 if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4667 // Maintain BP around call, since duffcopy/duffzero can't do it 4668 // (the call jumps into the middle of the function). 4669 // This makes it possible to see call sites for duffcopy/duffzero in 4670 // BP-based profiling tools like Linux perf (which is the 4671 // whole point of obj.Framepointer_enabled). 4672 // MOVQ BP, -16(SP) 4673 // LEAQ -16(SP), BP 4674 ab.Put(bpduff1) 4675 } 4676 ab.Put1(byte(op)) 4677 r = obj.Addrel(cursym) 4678 r.Off = int32(p.Pc + int64(ab.Len())) 4679 r.Sym = p.To.Sym 4680 r.Add = p.To.Offset 4681 r.Type = objabi.R_CALL 4682 r.Siz = 4 4683 ab.PutInt32(0) 4684 4685 if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4686 // Pop BP pushed above. 4687 // MOVQ 0(BP), BP 4688 ab.Put(bpduff2) 4689 } 4690 4691 // TODO: jump across functions needs reloc 4692 case Zbr, Zjmp, Zloop: 4693 if p.As == AXBEGIN { 4694 ab.Put1(byte(op)) 4695 } 4696 if p.To.Sym != nil { 4697 if yt.zcase != Zjmp { 4698 ctxt.Diag("branch to ATEXT") 4699 ctxt.DiagFlush() 4700 log.Fatalf("bad code") 4701 } 4702 4703 ab.Put1(o.op[z+1]) 4704 r = obj.Addrel(cursym) 4705 r.Off = int32(p.Pc + int64(ab.Len())) 4706 r.Sym = p.To.Sym 4707 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that 4708 // it can point to a trampoline instead of the destination itself. 4709 r.Type = objabi.R_CALL 4710 r.Siz = 4 4711 ab.PutInt32(0) 4712 break 4713 } 4714 4715 // Assumes q is in this function. 4716 // TODO: Check in input, preserve in brchain. 4717 4718 // Fill in backward jump now. 4719 q = p.Pcond 4720 4721 if q == nil { 4722 ctxt.Diag("jmp/branch/loop without target") 4723 ctxt.DiagFlush() 4724 log.Fatalf("bad code") 4725 } 4726 4727 if p.Back&branchBackwards != 0 { 4728 v = q.Pc - (p.Pc + 2) 4729 if v >= -128 && p.As != AXBEGIN { 4730 if p.As == AJCXZL { 4731 ab.Put1(0x67) 4732 } 4733 ab.Put2(byte(op), byte(v)) 4734 } else if yt.zcase == Zloop { 4735 ctxt.Diag("loop too far: %v", p) 4736 } else { 4737 v -= 5 - 2 4738 if p.As == AXBEGIN { 4739 v-- 4740 } 4741 if yt.zcase == Zbr { 4742 ab.Put1(0x0f) 4743 v-- 4744 } 4745 4746 ab.Put1(o.op[z+1]) 4747 ab.PutInt32(int32(v)) 4748 } 4749 4750 break 4751 } 4752 4753 // Annotate target; will fill in later. 4754 p.Forwd = q.Rel 4755 4756 q.Rel = p 4757 if p.Back&branchShort != 0 && p.As != AXBEGIN { 4758 if p.As == AJCXZL { 4759 ab.Put1(0x67) 4760 } 4761 ab.Put2(byte(op), 0) 4762 } else if yt.zcase == Zloop { 4763 ctxt.Diag("loop too far: %v", p) 4764 } else { 4765 if yt.zcase == Zbr { 4766 ab.Put1(0x0f) 4767 } 4768 ab.Put1(o.op[z+1]) 4769 ab.PutInt32(0) 4770 } 4771 4772 case Zbyte: 4773 v = vaddr(ctxt, p, &p.From, &rel) 4774 if rel.Siz != 0 { 4775 rel.Siz = uint8(op) 4776 r = obj.Addrel(cursym) 4777 *r = rel 4778 r.Off = int32(p.Pc + int64(ab.Len())) 4779 } 4780 4781 ab.Put1(byte(v)) 4782 if op > 1 { 4783 ab.Put1(byte(v >> 8)) 4784 if op > 2 { 4785 ab.PutInt16(int16(v >> 16)) 4786 if op > 4 { 4787 ab.PutInt32(int32(v >> 32)) 4788 } 4789 } 4790 } 4791 } 4792 4793 return 4794 } 4795 } 4796 f3t = Ynone * Ymax 4797 if p.GetFrom3() != nil { 4798 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax 4799 } 4800 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { 4801 var pp obj.Prog 4802 var t []byte 4803 if p.As == mo[0].as { 4804 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { 4805 t = mo[0].op[:] 4806 switch mo[0].code { 4807 default: 4808 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) 4809 4810 case movLit: 4811 for z = 0; t[z] != 0; z++ { 4812 ab.Put1(t[z]) 4813 } 4814 4815 case movRegMem: 4816 ab.Put1(t[0]) 4817 ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) 4818 4819 case movMemReg: 4820 ab.Put1(t[0]) 4821 ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) 4822 4823 case movRegMem2op: // r,m - 2op 4824 ab.Put2(t[0], t[1]) 4825 ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) 4826 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) 4827 4828 case movMemReg2op: 4829 ab.Put2(t[0], t[1]) 4830 ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) 4831 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) 4832 4833 case movFullPtr: 4834 if t[0] != 0 { 4835 ab.Put1(t[0]) 4836 } 4837 switch p.To.Index { 4838 default: 4839 goto bad 4840 4841 case REG_DS: 4842 ab.Put1(0xc5) 4843 4844 case REG_SS: 4845 ab.Put2(0x0f, 0xb2) 4846 4847 case REG_ES: 4848 ab.Put1(0xc4) 4849 4850 case REG_FS: 4851 ab.Put2(0x0f, 0xb4) 4852 4853 case REG_GS: 4854 ab.Put2(0x0f, 0xb5) 4855 } 4856 4857 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4858 4859 case movDoubleShift: 4860 if t[0] == Pw { 4861 if ctxt.Arch.Family != sys.AMD64 { 4862 ctxt.Diag("asmins: illegal 64: %v", p) 4863 } 4864 ab.rexflag |= Pw 4865 t = t[1:] 4866 } else if t[0] == Pe { 4867 ab.Put1(Pe) 4868 t = t[1:] 4869 } 4870 4871 switch p.From.Type { 4872 default: 4873 goto bad 4874 4875 case obj.TYPE_CONST: 4876 ab.Put2(0x0f, t[0]) 4877 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 4878 ab.Put1(byte(p.From.Offset)) 4879 4880 case obj.TYPE_REG: 4881 switch p.From.Reg { 4882 default: 4883 goto bad 4884 4885 case REG_CL, REG_CX: 4886 ab.Put2(0x0f, t[1]) 4887 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 4888 } 4889 } 4890 4891 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 4892 // where you load the TLS base register into a register and then index off that 4893 // register to access the actual TLS variables. Systems that allow direct TLS access 4894 // are handled in prefixof above and should not be listed here. 4895 case movTLSReg: 4896 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { 4897 ctxt.Diag("invalid load of TLS: %v", p) 4898 } 4899 4900 if ctxt.Arch.Family == sys.I386 { 4901 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 4902 // where you load the TLS base register into a register and then index off that 4903 // register to access the actual TLS variables. Systems that allow direct TLS access 4904 // are handled in prefixof above and should not be listed here. 4905 switch ctxt.Headtype { 4906 default: 4907 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 4908 4909 case objabi.Hlinux, 4910 objabi.Hnacl, objabi.Hfreebsd: 4911 if ctxt.Flag_shared { 4912 // Note that this is not generating the same insns as the other cases. 4913 // MOV TLS, dst 4914 // becomes 4915 // call __x86.get_pc_thunk.dst 4916 // movl (gotpc + g@gotntpoff)(dst), dst 4917 // which is encoded as 4918 // call __x86.get_pc_thunk.dst 4919 // movq 0(dst), dst 4920 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access 4921 // is g, which we can't check here, but will when we assemble the second 4922 // instruction. 4923 dst := p.To.Reg 4924 ab.Put1(0xe8) 4925 r = obj.Addrel(cursym) 4926 r.Off = int32(p.Pc + int64(ab.Len())) 4927 r.Type = objabi.R_CALL 4928 r.Siz = 4 4929 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) 4930 ab.PutInt32(0) 4931 4932 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) 4933 r = obj.Addrel(cursym) 4934 r.Off = int32(p.Pc + int64(ab.Len())) 4935 r.Type = objabi.R_TLS_IE 4936 r.Siz = 4 4937 r.Add = 2 4938 ab.PutInt32(0) 4939 } else { 4940 // ELF TLS base is 0(GS). 4941 pp.From = p.From 4942 4943 pp.From.Type = obj.TYPE_MEM 4944 pp.From.Reg = REG_GS 4945 pp.From.Offset = 0 4946 pp.From.Index = REG_NONE 4947 pp.From.Scale = 0 4948 ab.Put2(0x65, // GS 4949 0x8B) 4950 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 4951 } 4952 case objabi.Hplan9: 4953 pp.From = obj.Addr{} 4954 pp.From.Type = obj.TYPE_MEM 4955 pp.From.Name = obj.NAME_EXTERN 4956 pp.From.Sym = plan9privates 4957 pp.From.Offset = 0 4958 pp.From.Index = REG_NONE 4959 ab.Put1(0x8B) 4960 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 4961 4962 case objabi.Hwindows: 4963 // Windows TLS base is always 0x14(FS). 4964 pp.From = p.From 4965 4966 pp.From.Type = obj.TYPE_MEM 4967 pp.From.Reg = REG_FS 4968 pp.From.Offset = 0x14 4969 pp.From.Index = REG_NONE 4970 pp.From.Scale = 0 4971 ab.Put2(0x64, // FS 4972 0x8B) 4973 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 4974 } 4975 break 4976 } 4977 4978 switch ctxt.Headtype { 4979 default: 4980 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 4981 4982 case objabi.Hlinux, objabi.Hfreebsd: 4983 if !ctxt.Flag_shared { 4984 log.Fatalf("unknown TLS base location for linux/freebsd without -shared") 4985 } 4986 // Note that this is not generating the same insn as the other cases. 4987 // MOV TLS, R_to 4988 // becomes 4989 // movq g@gottpoff(%rip), R_to 4990 // which is encoded as 4991 // movq 0(%rip), R_to 4992 // and a R_TLS_IE reloc. This all assumes the only tls variable we access 4993 // is g, which we can't check here, but will when we assemble the second 4994 // instruction. 4995 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) 4996 4997 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) 4998 r = obj.Addrel(cursym) 4999 r.Off = int32(p.Pc + int64(ab.Len())) 5000 r.Type = objabi.R_TLS_IE 5001 r.Siz = 4 5002 r.Add = -4 5003 ab.PutInt32(0) 5004 5005 case objabi.Hplan9: 5006 pp.From = obj.Addr{} 5007 pp.From.Type = obj.TYPE_MEM 5008 pp.From.Name = obj.NAME_EXTERN 5009 pp.From.Sym = plan9privates 5010 pp.From.Offset = 0 5011 pp.From.Index = REG_NONE 5012 ab.rexflag |= Pw 5013 ab.Put1(0x8B) 5014 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5015 5016 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. 5017 // TLS base is 0(FS). 5018 pp.From = p.From 5019 5020 pp.From.Type = obj.TYPE_MEM 5021 pp.From.Name = obj.NAME_NONE 5022 pp.From.Reg = REG_NONE 5023 pp.From.Offset = 0 5024 pp.From.Index = REG_NONE 5025 pp.From.Scale = 0 5026 ab.rexflag |= Pw 5027 ab.Put2(0x64, // FS 5028 0x8B) 5029 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5030 5031 case objabi.Hwindows: 5032 // Windows TLS base is always 0x28(GS). 5033 pp.From = p.From 5034 5035 pp.From.Type = obj.TYPE_MEM 5036 pp.From.Name = obj.NAME_NONE 5037 pp.From.Reg = REG_GS 5038 pp.From.Offset = 0x28 5039 pp.From.Index = REG_NONE 5040 pp.From.Scale = 0 5041 ab.rexflag |= Pw 5042 ab.Put2(0x65, // GS 5043 0x8B) 5044 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5045 } 5046 } 5047 return 5048 } 5049 } 5050 } 5051 goto bad 5052 5053 bad: 5054 if ctxt.Arch.Family != sys.AMD64 { 5055 // here, the assembly has failed. 5056 // if it's a byte instruction that has 5057 // unaddressable registers, try to 5058 // exchange registers and reissue the 5059 // instruction with the operands renamed. 5060 pp := *p 5061 5062 unbytereg(&pp.From, &pp.Ft) 5063 unbytereg(&pp.To, &pp.Tt) 5064 5065 z := int(p.From.Reg) 5066 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5067 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5068 // For now, different to keep bit-for-bit compatibility. 5069 if ctxt.Arch.Family == sys.I386 { 5070 breg := byteswapreg(ctxt, &p.To) 5071 if breg != REG_AX { 5072 ab.Put1(0x87) // xchg lhs,bx 5073 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5074 subreg(&pp, z, breg) 5075 ab.doasm(ctxt, cursym, &pp) 5076 ab.Put1(0x87) // xchg lhs,bx 5077 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5078 } else { 5079 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5080 subreg(&pp, z, REG_AX) 5081 ab.doasm(ctxt, cursym, &pp) 5082 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5083 } 5084 return 5085 } 5086 5087 if isax(&p.To) || p.To.Type == obj.TYPE_NONE { 5088 // We certainly don't want to exchange 5089 // with AX if the op is MUL or DIV. 5090 ab.Put1(0x87) // xchg lhs,bx 5091 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5092 subreg(&pp, z, REG_BX) 5093 ab.doasm(ctxt, cursym, &pp) 5094 ab.Put1(0x87) // xchg lhs,bx 5095 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5096 } else { 5097 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5098 subreg(&pp, z, REG_AX) 5099 ab.doasm(ctxt, cursym, &pp) 5100 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5101 } 5102 return 5103 } 5104 5105 z = int(p.To.Reg) 5106 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5107 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5108 // For now, different to keep bit-for-bit compatibility. 5109 if ctxt.Arch.Family == sys.I386 { 5110 breg := byteswapreg(ctxt, &p.From) 5111 if breg != REG_AX { 5112 ab.Put1(0x87) //xchg rhs,bx 5113 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5114 subreg(&pp, z, breg) 5115 ab.doasm(ctxt, cursym, &pp) 5116 ab.Put1(0x87) // xchg rhs,bx 5117 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5118 } else { 5119 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5120 subreg(&pp, z, REG_AX) 5121 ab.doasm(ctxt, cursym, &pp) 5122 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5123 } 5124 return 5125 } 5126 5127 if isax(&p.From) { 5128 ab.Put1(0x87) // xchg rhs,bx 5129 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5130 subreg(&pp, z, REG_BX) 5131 ab.doasm(ctxt, cursym, &pp) 5132 ab.Put1(0x87) // xchg rhs,bx 5133 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5134 } else { 5135 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5136 subreg(&pp, z, REG_AX) 5137 ab.doasm(ctxt, cursym, &pp) 5138 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5139 } 5140 return 5141 } 5142 } 5143 5144 ctxt.Diag("invalid instruction: %v", p) 5145 // ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To)) 5146 } 5147 5148 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX) 5149 // which is not referenced in a. 5150 // If a is empty, it returns BX to account for MULB-like instructions 5151 // that might use DX and AX. 5152 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { 5153 cana, canb, canc, cand := true, true, true, true 5154 if a.Type == obj.TYPE_NONE { 5155 cana, cand = false, false 5156 } 5157 5158 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { 5159 switch a.Reg { 5160 case REG_NONE: 5161 cana, cand = false, false 5162 case REG_AX, REG_AL, REG_AH: 5163 cana = false 5164 case REG_BX, REG_BL, REG_BH: 5165 canb = false 5166 case REG_CX, REG_CL, REG_CH: 5167 canc = false 5168 case REG_DX, REG_DL, REG_DH: 5169 cand = false 5170 } 5171 } 5172 5173 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { 5174 switch a.Index { 5175 case REG_AX: 5176 cana = false 5177 case REG_BX: 5178 canb = false 5179 case REG_CX: 5180 canc = false 5181 case REG_DX: 5182 cand = false 5183 } 5184 } 5185 5186 switch { 5187 case cana: 5188 return REG_AX 5189 case canb: 5190 return REG_BX 5191 case canc: 5192 return REG_CX 5193 case cand: 5194 return REG_DX 5195 default: 5196 ctxt.Diag("impossible byte register") 5197 ctxt.DiagFlush() 5198 log.Fatalf("bad code") 5199 return 0 5200 } 5201 } 5202 5203 func isbadbyte(a *obj.Addr) bool { 5204 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) 5205 } 5206 5207 var naclret = []uint8{ 5208 0x5e, // POPL SI 5209 // 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging 5210 0x83, 5211 0xe6, 5212 0xe0, // ANDL $~31, SI 5213 0x4c, 5214 0x01, 5215 0xfe, // ADDQ R15, SI 5216 0xff, 5217 0xe6, // JMP SI 5218 } 5219 5220 var naclret8 = []uint8{ 5221 0x5d, // POPL BP 5222 // 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging 5223 0x83, 5224 0xe5, 5225 0xe0, // ANDL $~31, BP 5226 0xff, 5227 0xe5, // JMP BP 5228 } 5229 5230 var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP 5231 5232 var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP 5233 5234 var naclmovs = []uint8{ 5235 0x89, 5236 0xf6, // MOVL SI, SI 5237 0x49, 5238 0x8d, 5239 0x34, 5240 0x37, // LEAQ (R15)(SI*1), SI 5241 0x89, 5242 0xff, // MOVL DI, DI 5243 0x49, 5244 0x8d, 5245 0x3c, 5246 0x3f, // LEAQ (R15)(DI*1), DI 5247 } 5248 5249 var naclstos = []uint8{ 5250 0x89, 5251 0xff, // MOVL DI, DI 5252 0x49, 5253 0x8d, 5254 0x3c, 5255 0x3f, // LEAQ (R15)(DI*1), DI 5256 } 5257 5258 func (ab *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) { 5259 if reg >= REG_R8 { 5260 ab.Put1(0x45) 5261 } 5262 reg = (reg - REG_AX) & 7 5263 ab.Put2(0x89, byte(3<<6|reg<<3|reg)) 5264 } 5265 5266 func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 5267 ab.Reset() 5268 5269 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 { 5270 switch p.As { 5271 case obj.ARET: 5272 ab.Put(naclret8) 5273 return 5274 5275 case obj.ACALL, 5276 obj.AJMP: 5277 if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI { 5278 ab.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0) 5279 } 5280 5281 case AINT: 5282 ab.Put1(0xf4) 5283 return 5284 } 5285 } 5286 5287 if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 { 5288 if p.As == AREP { 5289 ab.rep = true 5290 return 5291 } 5292 5293 if p.As == AREPN { 5294 ab.repn = true 5295 return 5296 } 5297 5298 if p.As == ALOCK { 5299 ab.lock = true 5300 return 5301 } 5302 5303 if p.As != ALEAQ && p.As != ALEAL { 5304 if p.From.Index != REG_NONE && p.From.Scale > 0 { 5305 ab.nacltrunc(ctxt, int(p.From.Index)) 5306 } 5307 if p.To.Index != REG_NONE && p.To.Scale > 0 { 5308 ab.nacltrunc(ctxt, int(p.To.Index)) 5309 } 5310 } 5311 5312 switch p.As { 5313 case obj.ARET: 5314 ab.Put(naclret) 5315 return 5316 5317 case obj.ACALL, 5318 obj.AJMP: 5319 if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI { 5320 // ANDL $~31, reg 5321 ab.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0) 5322 // ADDQ R15, reg 5323 ab.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX))) 5324 } 5325 5326 if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 { 5327 // ANDL $~31, reg 5328 ab.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0) 5329 // ADDQ R15, reg 5330 ab.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8))) 5331 } 5332 5333 case AINT: 5334 ab.Put1(0xf4) 5335 return 5336 5337 case ASCASB, 5338 ASCASW, 5339 ASCASL, 5340 ASCASQ, 5341 ASTOSB, 5342 ASTOSW, 5343 ASTOSL, 5344 ASTOSQ: 5345 ab.Put(naclstos) 5346 5347 case AMOVSB, AMOVSW, AMOVSL, AMOVSQ: 5348 ab.Put(naclmovs) 5349 } 5350 5351 if ab.rep { 5352 ab.Put1(0xf3) 5353 ab.rep = false 5354 } 5355 5356 if ab.repn { 5357 ab.Put1(0xf2) 5358 ab.repn = false 5359 } 5360 5361 if ab.lock { 5362 ab.Put1(0xf0) 5363 ab.lock = false 5364 } 5365 } 5366 5367 ab.rexflag = 0 5368 ab.vexflag = false 5369 ab.evexflag = false 5370 mark := ab.Len() 5371 ab.doasm(ctxt, cursym, p) 5372 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5373 // as befits the whole approach of the architecture, 5374 // the rex prefix must appear before the first opcode byte 5375 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but 5376 // before the 0f opcode escape!), or it might be ignored. 5377 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. 5378 if ctxt.Arch.Family != sys.AMD64 { 5379 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) 5380 } 5381 n := ab.Len() 5382 var np int 5383 for np = mark; np < n; np++ { 5384 c := ab.At(np) 5385 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { 5386 break 5387 } 5388 } 5389 ab.Insert(np, byte(0x40|ab.rexflag)) 5390 } 5391 5392 n := ab.Len() 5393 for i := len(cursym.R) - 1; i >= 0; i-- { 5394 r := &cursym.R[i] 5395 if int64(r.Off) < p.Pc { 5396 break 5397 } 5398 if ab.rexflag != 0 && !ab.vexflag { 5399 r.Off++ 5400 } 5401 if r.Type == objabi.R_PCREL { 5402 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { 5403 // PC-relative addressing is relative to the end of the instruction, 5404 // but the relocations applied by the linker are relative to the end 5405 // of the relocation. Because immediate instruction 5406 // arguments can follow the PC-relative memory reference in the 5407 // instruction encoding, the two may not coincide. In this case, 5408 // adjust addend so that linker can keep relocating relative to the 5409 // end of the relocation. 5410 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) 5411 } else if ctxt.Arch.Family == sys.I386 { 5412 // On 386 PC-relative addressing (for non-call/jmp instructions) 5413 // assumes that the previous instruction loaded the PC of the end 5414 // of that instruction into CX, so the adjustment is relative to 5415 // that. 5416 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5417 } 5418 } 5419 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { 5420 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. 5421 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5422 } 5423 5424 } 5425 5426 if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG { 5427 switch p.To.Reg { 5428 case REG_SP: 5429 ab.Put(naclspfix) 5430 case REG_BP: 5431 ab.Put(naclbpfix) 5432 } 5433 } 5434 } 5435 5436 // unpackOps4 extracts 4 operands from p. 5437 func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { 5438 return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To 5439 } 5440 5441 // unpackOps5 extracts 5 operands from p. 5442 func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { 5443 return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To 5444 }