github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/cmd/compile/internal/x86/gsubr.go (about) 1 // Derived from Inferno utils/8c/txt.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/8c/txt.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "cmd/compile/internal/big" 35 "cmd/compile/internal/gc" 36 "cmd/internal/obj" 37 "cmd/internal/obj/x86" 38 "fmt" 39 ) 40 41 // TODO(rsc): Can make this bigger if we move 42 // the text segment up higher in 8l for all GOOS. 43 // At the same time, can raise StackBig in ../../runtime/stack.h. 44 var unmappedzero uint32 = 4096 45 46 // foptoas flags 47 const ( 48 Frev = 1 << 0 49 Fpop = 1 << 1 50 Fpop2 = 1 << 2 51 ) 52 53 /* 54 * return Axxx for Oxxx on type t. 55 */ 56 func optoas(op gc.Op, t *gc.Type) int { 57 if t == nil { 58 gc.Fatalf("optoas: t is nil") 59 } 60 61 // avoid constant conversions in switches below 62 const ( 63 OMINUS_ = uint32(gc.OMINUS) << 16 64 OLSH_ = uint32(gc.OLSH) << 16 65 ORSH_ = uint32(gc.ORSH) << 16 66 OADD_ = uint32(gc.OADD) << 16 67 OSUB_ = uint32(gc.OSUB) << 16 68 OMUL_ = uint32(gc.OMUL) << 16 69 ODIV_ = uint32(gc.ODIV) << 16 70 OMOD_ = uint32(gc.OMOD) << 16 71 OOR_ = uint32(gc.OOR) << 16 72 OAND_ = uint32(gc.OAND) << 16 73 OXOR_ = uint32(gc.OXOR) << 16 74 OEQ_ = uint32(gc.OEQ) << 16 75 ONE_ = uint32(gc.ONE) << 16 76 OLT_ = uint32(gc.OLT) << 16 77 OLE_ = uint32(gc.OLE) << 16 78 OGE_ = uint32(gc.OGE) << 16 79 OGT_ = uint32(gc.OGT) << 16 80 OCMP_ = uint32(gc.OCMP) << 16 81 OAS_ = uint32(gc.OAS) << 16 82 OHMUL_ = uint32(gc.OHMUL) << 16 83 OADDR_ = uint32(gc.OADDR) << 16 84 OINC_ = uint32(gc.OINC) << 16 85 ODEC_ = uint32(gc.ODEC) << 16 86 OLROT_ = uint32(gc.OLROT) << 16 87 OEXTEND_ = uint32(gc.OEXTEND) << 16 88 OCOM_ = uint32(gc.OCOM) << 16 89 ) 90 91 a := obj.AXXX 92 switch uint32(op)<<16 | uint32(gc.Simtype[t.Etype]) { 93 default: 94 gc.Fatalf("optoas: no entry %v-%v", gc.Oconv(int(op), 0), t) 95 96 case OADDR_ | gc.TPTR32: 97 a = x86.ALEAL 98 99 case OEQ_ | gc.TBOOL, 100 OEQ_ | gc.TINT8, 101 OEQ_ | gc.TUINT8, 102 OEQ_ | gc.TINT16, 103 OEQ_ | gc.TUINT16, 104 OEQ_ | gc.TINT32, 105 OEQ_ | gc.TUINT32, 106 OEQ_ | gc.TINT64, 107 OEQ_ | gc.TUINT64, 108 OEQ_ | gc.TPTR32, 109 OEQ_ | gc.TPTR64, 110 OEQ_ | gc.TFLOAT32, 111 OEQ_ | gc.TFLOAT64: 112 a = x86.AJEQ 113 114 case ONE_ | gc.TBOOL, 115 ONE_ | gc.TINT8, 116 ONE_ | gc.TUINT8, 117 ONE_ | gc.TINT16, 118 ONE_ | gc.TUINT16, 119 ONE_ | gc.TINT32, 120 ONE_ | gc.TUINT32, 121 ONE_ | gc.TINT64, 122 ONE_ | gc.TUINT64, 123 ONE_ | gc.TPTR32, 124 ONE_ | gc.TPTR64, 125 ONE_ | gc.TFLOAT32, 126 ONE_ | gc.TFLOAT64: 127 a = x86.AJNE 128 129 case OLT_ | gc.TINT8, 130 OLT_ | gc.TINT16, 131 OLT_ | gc.TINT32, 132 OLT_ | gc.TINT64: 133 a = x86.AJLT 134 135 case OLT_ | gc.TUINT8, 136 OLT_ | gc.TUINT16, 137 OLT_ | gc.TUINT32, 138 OLT_ | gc.TUINT64: 139 a = x86.AJCS 140 141 case OLE_ | gc.TINT8, 142 OLE_ | gc.TINT16, 143 OLE_ | gc.TINT32, 144 OLE_ | gc.TINT64: 145 a = x86.AJLE 146 147 case OLE_ | gc.TUINT8, 148 OLE_ | gc.TUINT16, 149 OLE_ | gc.TUINT32, 150 OLE_ | gc.TUINT64: 151 a = x86.AJLS 152 153 case OGT_ | gc.TINT8, 154 OGT_ | gc.TINT16, 155 OGT_ | gc.TINT32, 156 OGT_ | gc.TINT64: 157 a = x86.AJGT 158 159 case OGT_ | gc.TUINT8, 160 OGT_ | gc.TUINT16, 161 OGT_ | gc.TUINT32, 162 OGT_ | gc.TUINT64, 163 OLT_ | gc.TFLOAT32, 164 OLT_ | gc.TFLOAT64: 165 a = x86.AJHI 166 167 case OGE_ | gc.TINT8, 168 OGE_ | gc.TINT16, 169 OGE_ | gc.TINT32, 170 OGE_ | gc.TINT64: 171 a = x86.AJGE 172 173 case OGE_ | gc.TUINT8, 174 OGE_ | gc.TUINT16, 175 OGE_ | gc.TUINT32, 176 OGE_ | gc.TUINT64, 177 OLE_ | gc.TFLOAT32, 178 OLE_ | gc.TFLOAT64: 179 a = x86.AJCC 180 181 case OCMP_ | gc.TBOOL, 182 OCMP_ | gc.TINT8, 183 OCMP_ | gc.TUINT8: 184 a = x86.ACMPB 185 186 case OCMP_ | gc.TINT16, 187 OCMP_ | gc.TUINT16: 188 a = x86.ACMPW 189 190 case OCMP_ | gc.TINT32, 191 OCMP_ | gc.TUINT32, 192 OCMP_ | gc.TPTR32: 193 a = x86.ACMPL 194 195 case OAS_ | gc.TBOOL, 196 OAS_ | gc.TINT8, 197 OAS_ | gc.TUINT8: 198 a = x86.AMOVB 199 200 case OAS_ | gc.TINT16, 201 OAS_ | gc.TUINT16: 202 a = x86.AMOVW 203 204 case OAS_ | gc.TINT32, 205 OAS_ | gc.TUINT32, 206 OAS_ | gc.TPTR32: 207 a = x86.AMOVL 208 209 case OAS_ | gc.TFLOAT32: 210 a = x86.AMOVSS 211 212 case OAS_ | gc.TFLOAT64: 213 a = x86.AMOVSD 214 215 case OADD_ | gc.TINT8, 216 OADD_ | gc.TUINT8: 217 a = x86.AADDB 218 219 case OADD_ | gc.TINT16, 220 OADD_ | gc.TUINT16: 221 a = x86.AADDW 222 223 case OADD_ | gc.TINT32, 224 OADD_ | gc.TUINT32, 225 OADD_ | gc.TPTR32: 226 a = x86.AADDL 227 228 case OSUB_ | gc.TINT8, 229 OSUB_ | gc.TUINT8: 230 a = x86.ASUBB 231 232 case OSUB_ | gc.TINT16, 233 OSUB_ | gc.TUINT16: 234 a = x86.ASUBW 235 236 case OSUB_ | gc.TINT32, 237 OSUB_ | gc.TUINT32, 238 OSUB_ | gc.TPTR32: 239 a = x86.ASUBL 240 241 case OINC_ | gc.TINT8, 242 OINC_ | gc.TUINT8: 243 a = x86.AINCB 244 245 case OINC_ | gc.TINT16, 246 OINC_ | gc.TUINT16: 247 a = x86.AINCW 248 249 case OINC_ | gc.TINT32, 250 OINC_ | gc.TUINT32, 251 OINC_ | gc.TPTR32: 252 a = x86.AINCL 253 254 case ODEC_ | gc.TINT8, 255 ODEC_ | gc.TUINT8: 256 a = x86.ADECB 257 258 case ODEC_ | gc.TINT16, 259 ODEC_ | gc.TUINT16: 260 a = x86.ADECW 261 262 case ODEC_ | gc.TINT32, 263 ODEC_ | gc.TUINT32, 264 ODEC_ | gc.TPTR32: 265 a = x86.ADECL 266 267 case OCOM_ | gc.TINT8, 268 OCOM_ | gc.TUINT8: 269 a = x86.ANOTB 270 271 case OCOM_ | gc.TINT16, 272 OCOM_ | gc.TUINT16: 273 a = x86.ANOTW 274 275 case OCOM_ | gc.TINT32, 276 OCOM_ | gc.TUINT32, 277 OCOM_ | gc.TPTR32: 278 a = x86.ANOTL 279 280 case OMINUS_ | gc.TINT8, 281 OMINUS_ | gc.TUINT8: 282 a = x86.ANEGB 283 284 case OMINUS_ | gc.TINT16, 285 OMINUS_ | gc.TUINT16: 286 a = x86.ANEGW 287 288 case OMINUS_ | gc.TINT32, 289 OMINUS_ | gc.TUINT32, 290 OMINUS_ | gc.TPTR32: 291 a = x86.ANEGL 292 293 case OAND_ | gc.TINT8, 294 OAND_ | gc.TUINT8: 295 a = x86.AANDB 296 297 case OAND_ | gc.TINT16, 298 OAND_ | gc.TUINT16: 299 a = x86.AANDW 300 301 case OAND_ | gc.TINT32, 302 OAND_ | gc.TUINT32, 303 OAND_ | gc.TPTR32: 304 a = x86.AANDL 305 306 case OOR_ | gc.TINT8, 307 OOR_ | gc.TUINT8: 308 a = x86.AORB 309 310 case OOR_ | gc.TINT16, 311 OOR_ | gc.TUINT16: 312 a = x86.AORW 313 314 case OOR_ | gc.TINT32, 315 OOR_ | gc.TUINT32, 316 OOR_ | gc.TPTR32: 317 a = x86.AORL 318 319 case OXOR_ | gc.TINT8, 320 OXOR_ | gc.TUINT8: 321 a = x86.AXORB 322 323 case OXOR_ | gc.TINT16, 324 OXOR_ | gc.TUINT16: 325 a = x86.AXORW 326 327 case OXOR_ | gc.TINT32, 328 OXOR_ | gc.TUINT32, 329 OXOR_ | gc.TPTR32: 330 a = x86.AXORL 331 332 case OLROT_ | gc.TINT8, 333 OLROT_ | gc.TUINT8: 334 a = x86.AROLB 335 336 case OLROT_ | gc.TINT16, 337 OLROT_ | gc.TUINT16: 338 a = x86.AROLW 339 340 case OLROT_ | gc.TINT32, 341 OLROT_ | gc.TUINT32, 342 OLROT_ | gc.TPTR32: 343 a = x86.AROLL 344 345 case OLSH_ | gc.TINT8, 346 OLSH_ | gc.TUINT8: 347 a = x86.ASHLB 348 349 case OLSH_ | gc.TINT16, 350 OLSH_ | gc.TUINT16: 351 a = x86.ASHLW 352 353 case OLSH_ | gc.TINT32, 354 OLSH_ | gc.TUINT32, 355 OLSH_ | gc.TPTR32: 356 a = x86.ASHLL 357 358 case ORSH_ | gc.TUINT8: 359 a = x86.ASHRB 360 361 case ORSH_ | gc.TUINT16: 362 a = x86.ASHRW 363 364 case ORSH_ | gc.TUINT32, 365 ORSH_ | gc.TPTR32: 366 a = x86.ASHRL 367 368 case ORSH_ | gc.TINT8: 369 a = x86.ASARB 370 371 case ORSH_ | gc.TINT16: 372 a = x86.ASARW 373 374 case ORSH_ | gc.TINT32: 375 a = x86.ASARL 376 377 case OHMUL_ | gc.TINT8, 378 OMUL_ | gc.TINT8, 379 OMUL_ | gc.TUINT8: 380 a = x86.AIMULB 381 382 case OHMUL_ | gc.TINT16, 383 OMUL_ | gc.TINT16, 384 OMUL_ | gc.TUINT16: 385 a = x86.AIMULW 386 387 case OHMUL_ | gc.TINT32, 388 OMUL_ | gc.TINT32, 389 OMUL_ | gc.TUINT32, 390 OMUL_ | gc.TPTR32: 391 a = x86.AIMULL 392 393 case OHMUL_ | gc.TUINT8: 394 a = x86.AMULB 395 396 case OHMUL_ | gc.TUINT16: 397 a = x86.AMULW 398 399 case OHMUL_ | gc.TUINT32, 400 OHMUL_ | gc.TPTR32: 401 a = x86.AMULL 402 403 case ODIV_ | gc.TINT8, 404 OMOD_ | gc.TINT8: 405 a = x86.AIDIVB 406 407 case ODIV_ | gc.TUINT8, 408 OMOD_ | gc.TUINT8: 409 a = x86.ADIVB 410 411 case ODIV_ | gc.TINT16, 412 OMOD_ | gc.TINT16: 413 a = x86.AIDIVW 414 415 case ODIV_ | gc.TUINT16, 416 OMOD_ | gc.TUINT16: 417 a = x86.ADIVW 418 419 case ODIV_ | gc.TINT32, 420 OMOD_ | gc.TINT32: 421 a = x86.AIDIVL 422 423 case ODIV_ | gc.TUINT32, 424 ODIV_ | gc.TPTR32, 425 OMOD_ | gc.TUINT32, 426 OMOD_ | gc.TPTR32: 427 a = x86.ADIVL 428 429 case OEXTEND_ | gc.TINT16: 430 a = x86.ACWD 431 432 case OEXTEND_ | gc.TINT32: 433 a = x86.ACDQ 434 } 435 436 return a 437 } 438 439 func foptoas(op gc.Op, t *gc.Type, flg int) int { 440 a := obj.AXXX 441 et := gc.Simtype[t.Etype] 442 443 // avoid constant conversions in switches below 444 const ( 445 OCMP_ = uint32(gc.OCMP) << 16 446 OAS_ = uint32(gc.OAS) << 16 447 OADD_ = uint32(gc.OADD) << 16 448 OSUB_ = uint32(gc.OSUB) << 16 449 OMUL_ = uint32(gc.OMUL) << 16 450 ODIV_ = uint32(gc.ODIV) << 16 451 OMINUS_ = uint32(gc.OMINUS) << 16 452 ) 453 454 if !gc.Thearch.Use387 { 455 switch uint32(op)<<16 | uint32(et) { 456 default: 457 gc.Fatalf("foptoas-sse: no entry %v-%v", gc.Oconv(int(op), 0), t) 458 459 case OCMP_ | gc.TFLOAT32: 460 a = x86.AUCOMISS 461 462 case OCMP_ | gc.TFLOAT64: 463 a = x86.AUCOMISD 464 465 case OAS_ | gc.TFLOAT32: 466 a = x86.AMOVSS 467 468 case OAS_ | gc.TFLOAT64: 469 a = x86.AMOVSD 470 471 case OADD_ | gc.TFLOAT32: 472 a = x86.AADDSS 473 474 case OADD_ | gc.TFLOAT64: 475 a = x86.AADDSD 476 477 case OSUB_ | gc.TFLOAT32: 478 a = x86.ASUBSS 479 480 case OSUB_ | gc.TFLOAT64: 481 a = x86.ASUBSD 482 483 case OMUL_ | gc.TFLOAT32: 484 a = x86.AMULSS 485 486 case OMUL_ | gc.TFLOAT64: 487 a = x86.AMULSD 488 489 case ODIV_ | gc.TFLOAT32: 490 a = x86.ADIVSS 491 492 case ODIV_ | gc.TFLOAT64: 493 a = x86.ADIVSD 494 } 495 496 return a 497 } 498 499 // If we need Fpop, it means we're working on 500 // two different floating-point registers, not memory. 501 // There the instruction only has a float64 form. 502 if flg&Fpop != 0 { 503 et = gc.TFLOAT64 504 } 505 506 // clear Frev if unneeded 507 switch op { 508 case gc.OADD, 509 gc.OMUL: 510 flg &^= Frev 511 } 512 513 switch uint32(op)<<16 | (uint32(et)<<8 | uint32(flg)) { 514 case OADD_ | (gc.TFLOAT32<<8 | 0): 515 return x86.AFADDF 516 517 case OADD_ | (gc.TFLOAT64<<8 | 0): 518 return x86.AFADDD 519 520 case OADD_ | (gc.TFLOAT64<<8 | Fpop): 521 return x86.AFADDDP 522 523 case OSUB_ | (gc.TFLOAT32<<8 | 0): 524 return x86.AFSUBF 525 526 case OSUB_ | (gc.TFLOAT32<<8 | Frev): 527 return x86.AFSUBRF 528 529 case OSUB_ | (gc.TFLOAT64<<8 | 0): 530 return x86.AFSUBD 531 532 case OSUB_ | (gc.TFLOAT64<<8 | Frev): 533 return x86.AFSUBRD 534 535 case OSUB_ | (gc.TFLOAT64<<8 | Fpop): 536 return x86.AFSUBDP 537 538 case OSUB_ | (gc.TFLOAT64<<8 | (Fpop | Frev)): 539 return x86.AFSUBRDP 540 541 case OMUL_ | (gc.TFLOAT32<<8 | 0): 542 return x86.AFMULF 543 544 case OMUL_ | (gc.TFLOAT64<<8 | 0): 545 return x86.AFMULD 546 547 case OMUL_ | (gc.TFLOAT64<<8 | Fpop): 548 return x86.AFMULDP 549 550 case ODIV_ | (gc.TFLOAT32<<8 | 0): 551 return x86.AFDIVF 552 553 case ODIV_ | (gc.TFLOAT32<<8 | Frev): 554 return x86.AFDIVRF 555 556 case ODIV_ | (gc.TFLOAT64<<8 | 0): 557 return x86.AFDIVD 558 559 case ODIV_ | (gc.TFLOAT64<<8 | Frev): 560 return x86.AFDIVRD 561 562 case ODIV_ | (gc.TFLOAT64<<8 | Fpop): 563 return x86.AFDIVDP 564 565 case ODIV_ | (gc.TFLOAT64<<8 | (Fpop | Frev)): 566 return x86.AFDIVRDP 567 568 case OCMP_ | (gc.TFLOAT32<<8 | 0): 569 return x86.AFCOMF 570 571 case OCMP_ | (gc.TFLOAT32<<8 | Fpop): 572 return x86.AFCOMFP 573 574 case OCMP_ | (gc.TFLOAT64<<8 | 0): 575 return x86.AFCOMD 576 577 case OCMP_ | (gc.TFLOAT64<<8 | Fpop): 578 return x86.AFCOMDP 579 580 case OCMP_ | (gc.TFLOAT64<<8 | Fpop2): 581 return x86.AFCOMDPP 582 583 case OMINUS_ | (gc.TFLOAT32<<8 | 0): 584 return x86.AFCHS 585 586 case OMINUS_ | (gc.TFLOAT64<<8 | 0): 587 return x86.AFCHS 588 } 589 590 gc.Fatalf("foptoas %v %v %#x", gc.Oconv(int(op), 0), t, flg) 591 return 0 592 } 593 594 var resvd = []int{ 595 // REG_DI, // for movstring 596 // REG_SI, // for movstring 597 598 x86.REG_AX, // for divide 599 x86.REG_CX, // for shift 600 x86.REG_DX, // for divide, context 601 x86.REG_SP, // for stack 602 } 603 604 /* 605 * generate 606 * as $c, reg 607 */ 608 func gconreg(as int, c int64, reg int) { 609 var n1 gc.Node 610 var n2 gc.Node 611 612 gc.Nodconst(&n1, gc.Types[gc.TINT64], c) 613 gc.Nodreg(&n2, gc.Types[gc.TINT64], reg) 614 gins(as, &n1, &n2) 615 } 616 617 /* 618 * generate 619 * as $c, n 620 */ 621 func ginscon(as int, c int64, n2 *gc.Node) { 622 var n1 gc.Node 623 gc.Nodconst(&n1, gc.Types[gc.TINT32], c) 624 gins(as, &n1, n2) 625 } 626 627 func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog { 628 if gc.Isint[t.Etype] || t.Etype == gc.Tptr { 629 if (n1.Op == gc.OLITERAL || n1.Op == gc.OADDR && n1.Left.Op == gc.ONAME) && n2.Op != gc.OLITERAL { 630 // Reverse comparison to place constant (including address constant) last. 631 op = gc.Brrev(op) 632 n1, n2 = n2, n1 633 } 634 } 635 636 // General case. 637 var r1, r2, g1, g2 gc.Node 638 639 // A special case to make write barriers more efficient. 640 // Comparing the first field of a named struct can be done directly. 641 base := n1 642 if n1.Op == gc.ODOT && n1.Left.Type.Etype == gc.TSTRUCT && n1.Left.Type.Type.Sym == n1.Right.Sym { 643 base = n1.Left 644 } 645 646 if base.Op == gc.ONAME && base.Class&gc.PHEAP == 0 || n1.Op == gc.OINDREG { 647 r1 = *n1 648 } else { 649 gc.Regalloc(&r1, t, n1) 650 gc.Regalloc(&g1, n1.Type, &r1) 651 gc.Cgen(n1, &g1) 652 gmove(&g1, &r1) 653 } 654 if n2.Op == gc.OLITERAL && gc.Isint[t.Etype] || n2.Op == gc.OADDR && n2.Left.Op == gc.ONAME && n2.Left.Class == gc.PEXTERN { 655 r2 = *n2 656 } else { 657 gc.Regalloc(&r2, t, n2) 658 gc.Regalloc(&g2, n1.Type, &r2) 659 gc.Cgen(n2, &g2) 660 gmove(&g2, &r2) 661 } 662 gins(optoas(gc.OCMP, t), &r1, &r2) 663 if r1.Op == gc.OREGISTER { 664 gc.Regfree(&g1) 665 gc.Regfree(&r1) 666 } 667 if r2.Op == gc.OREGISTER { 668 gc.Regfree(&g2) 669 gc.Regfree(&r2) 670 } 671 return gc.Gbranch(optoas(op, t), nil, likely) 672 } 673 674 /* 675 * swap node contents 676 */ 677 func nswap(a *gc.Node, b *gc.Node) { 678 t := *a 679 *a = *b 680 *b = t 681 } 682 683 /* 684 * return constant i node. 685 * overwritten by next call, but useful in calls to gins. 686 */ 687 688 var ncon_n gc.Node 689 690 func ncon(i uint32) *gc.Node { 691 if ncon_n.Type == nil { 692 gc.Nodconst(&ncon_n, gc.Types[gc.TUINT32], 0) 693 } 694 ncon_n.SetInt(int64(i)) 695 return &ncon_n 696 } 697 698 var sclean [10]gc.Node 699 700 var nsclean int 701 702 /* 703 * n is a 64-bit value. fill in lo and hi to refer to its 32-bit halves. 704 */ 705 func split64(n *gc.Node, lo *gc.Node, hi *gc.Node) { 706 if !gc.Is64(n.Type) { 707 gc.Fatalf("split64 %v", n.Type) 708 } 709 710 if nsclean >= len(sclean) { 711 gc.Fatalf("split64 clean") 712 } 713 sclean[nsclean].Op = gc.OEMPTY 714 nsclean++ 715 switch n.Op { 716 default: 717 switch n.Op { 718 default: 719 var n1 gc.Node 720 if !dotaddable(n, &n1) { 721 gc.Igen(n, &n1, nil) 722 sclean[nsclean-1] = n1 723 } 724 725 n = &n1 726 727 case gc.ONAME: 728 if n.Class == gc.PPARAMREF { 729 var n1 gc.Node 730 gc.Cgen(n.Name.Heapaddr, &n1) 731 sclean[nsclean-1] = n1 732 n = &n1 733 } 734 735 // nothing 736 case gc.OINDREG: 737 break 738 } 739 740 *lo = *n 741 *hi = *n 742 lo.Type = gc.Types[gc.TUINT32] 743 if n.Type.Etype == gc.TINT64 { 744 hi.Type = gc.Types[gc.TINT32] 745 } else { 746 hi.Type = gc.Types[gc.TUINT32] 747 } 748 hi.Xoffset += 4 749 750 case gc.OLITERAL: 751 var n1 gc.Node 752 n.Convconst(&n1, n.Type) 753 i := n1.Int() 754 gc.Nodconst(lo, gc.Types[gc.TUINT32], int64(uint32(i))) 755 i >>= 32 756 if n.Type.Etype == gc.TINT64 { 757 gc.Nodconst(hi, gc.Types[gc.TINT32], int64(int32(i))) 758 } else { 759 gc.Nodconst(hi, gc.Types[gc.TUINT32], int64(uint32(i))) 760 } 761 } 762 } 763 764 func splitclean() { 765 if nsclean <= 0 { 766 gc.Fatalf("splitclean") 767 } 768 nsclean-- 769 if sclean[nsclean].Op != gc.OEMPTY { 770 gc.Regfree(&sclean[nsclean]) 771 } 772 } 773 774 // set up nodes representing fp constants 775 var ( 776 zerof gc.Node 777 two63f gc.Node 778 two64f gc.Node 779 bignodes_did bool 780 ) 781 782 func bignodes() { 783 if bignodes_did { 784 return 785 } 786 bignodes_did = true 787 788 gc.Nodconst(&zerof, gc.Types[gc.TINT64], 0) 789 zerof.Convconst(&zerof, gc.Types[gc.TFLOAT64]) 790 791 var i big.Int 792 i.SetInt64(1) 793 i.Lsh(&i, 63) 794 var bigi gc.Node 795 796 gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0) 797 bigi.SetBigInt(&i) 798 bigi.Convconst(&two63f, gc.Types[gc.TFLOAT64]) 799 800 gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0) 801 i.Lsh(&i, 1) 802 bigi.SetBigInt(&i) 803 bigi.Convconst(&two64f, gc.Types[gc.TFLOAT64]) 804 } 805 806 func memname(n *gc.Node, t *gc.Type) { 807 gc.Tempname(n, t) 808 n.Sym = gc.Lookup("." + n.Sym.Name[1:]) // keep optimizer from registerizing 809 n.Orig.Sym = n.Sym 810 } 811 812 func gmove(f *gc.Node, t *gc.Node) { 813 if gc.Debug['M'] != 0 { 814 fmt.Printf("gmove %v -> %v\n", f, t) 815 } 816 817 ft := gc.Simsimtype(f.Type) 818 tt := gc.Simsimtype(t.Type) 819 cvt := t.Type 820 821 if gc.Iscomplex[ft] || gc.Iscomplex[tt] { 822 gc.Complexmove(f, t) 823 return 824 } 825 826 if gc.Isfloat[ft] || gc.Isfloat[tt] { 827 floatmove(f, t) 828 return 829 } 830 831 // cannot have two integer memory operands; 832 // except 64-bit, which always copies via registers anyway. 833 var r1 gc.Node 834 var a int 835 if gc.Isint[ft] && gc.Isint[tt] && !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) { 836 goto hard 837 } 838 839 // convert constant to desired type 840 if f.Op == gc.OLITERAL { 841 var con gc.Node 842 f.Convconst(&con, t.Type) 843 f = &con 844 ft = gc.Simsimtype(con.Type) 845 } 846 847 // value -> value copy, only one memory operand. 848 // figure out the instruction to use. 849 // break out of switch for one-instruction gins. 850 // goto rdst for "destination must be register". 851 // goto hard for "convert to cvt type first". 852 // otherwise handle and return. 853 854 switch uint32(ft)<<16 | uint32(tt) { 855 default: 856 // should not happen 857 gc.Fatalf("gmove %v -> %v", f, t) 858 return 859 860 /* 861 * integer copy and truncate 862 */ 863 case gc.TINT8<<16 | gc.TINT8, // same size 864 gc.TINT8<<16 | gc.TUINT8, 865 gc.TUINT8<<16 | gc.TINT8, 866 gc.TUINT8<<16 | gc.TUINT8: 867 a = x86.AMOVB 868 869 case gc.TINT16<<16 | gc.TINT8, // truncate 870 gc.TUINT16<<16 | gc.TINT8, 871 gc.TINT32<<16 | gc.TINT8, 872 gc.TUINT32<<16 | gc.TINT8, 873 gc.TINT16<<16 | gc.TUINT8, 874 gc.TUINT16<<16 | gc.TUINT8, 875 gc.TINT32<<16 | gc.TUINT8, 876 gc.TUINT32<<16 | gc.TUINT8: 877 a = x86.AMOVB 878 879 goto rsrc 880 881 case gc.TINT64<<16 | gc.TINT8, // truncate low word 882 gc.TUINT64<<16 | gc.TINT8, 883 gc.TINT64<<16 | gc.TUINT8, 884 gc.TUINT64<<16 | gc.TUINT8: 885 var flo gc.Node 886 var fhi gc.Node 887 split64(f, &flo, &fhi) 888 889 var r1 gc.Node 890 gc.Nodreg(&r1, t.Type, x86.REG_AX) 891 gmove(&flo, &r1) 892 gins(x86.AMOVB, &r1, t) 893 splitclean() 894 return 895 896 case gc.TINT16<<16 | gc.TINT16, // same size 897 gc.TINT16<<16 | gc.TUINT16, 898 gc.TUINT16<<16 | gc.TINT16, 899 gc.TUINT16<<16 | gc.TUINT16: 900 a = x86.AMOVW 901 902 case gc.TINT32<<16 | gc.TINT16, // truncate 903 gc.TUINT32<<16 | gc.TINT16, 904 gc.TINT32<<16 | gc.TUINT16, 905 gc.TUINT32<<16 | gc.TUINT16: 906 a = x86.AMOVW 907 908 goto rsrc 909 910 case gc.TINT64<<16 | gc.TINT16, // truncate low word 911 gc.TUINT64<<16 | gc.TINT16, 912 gc.TINT64<<16 | gc.TUINT16, 913 gc.TUINT64<<16 | gc.TUINT16: 914 var flo gc.Node 915 var fhi gc.Node 916 split64(f, &flo, &fhi) 917 918 var r1 gc.Node 919 gc.Nodreg(&r1, t.Type, x86.REG_AX) 920 gmove(&flo, &r1) 921 gins(x86.AMOVW, &r1, t) 922 splitclean() 923 return 924 925 case gc.TINT32<<16 | gc.TINT32, // same size 926 gc.TINT32<<16 | gc.TUINT32, 927 gc.TUINT32<<16 | gc.TINT32, 928 gc.TUINT32<<16 | gc.TUINT32: 929 a = x86.AMOVL 930 931 case gc.TINT64<<16 | gc.TINT32, // truncate 932 gc.TUINT64<<16 | gc.TINT32, 933 gc.TINT64<<16 | gc.TUINT32, 934 gc.TUINT64<<16 | gc.TUINT32: 935 var fhi gc.Node 936 var flo gc.Node 937 split64(f, &flo, &fhi) 938 939 var r1 gc.Node 940 gc.Nodreg(&r1, t.Type, x86.REG_AX) 941 gmove(&flo, &r1) 942 gins(x86.AMOVL, &r1, t) 943 splitclean() 944 return 945 946 case gc.TINT64<<16 | gc.TINT64, // same size 947 gc.TINT64<<16 | gc.TUINT64, 948 gc.TUINT64<<16 | gc.TINT64, 949 gc.TUINT64<<16 | gc.TUINT64: 950 var fhi gc.Node 951 var flo gc.Node 952 split64(f, &flo, &fhi) 953 954 var tlo gc.Node 955 var thi gc.Node 956 split64(t, &tlo, &thi) 957 if f.Op == gc.OLITERAL { 958 gins(x86.AMOVL, &flo, &tlo) 959 gins(x86.AMOVL, &fhi, &thi) 960 } else { 961 // Implementation of conversion-free x = y for int64 or uint64 x. 962 // This is generated by the code that copies small values out of closures, 963 // and that code has DX live, so avoid DX and just use AX twice. 964 var r1 gc.Node 965 gc.Nodreg(&r1, gc.Types[gc.TUINT32], x86.REG_AX) 966 gins(x86.AMOVL, &flo, &r1) 967 gins(x86.AMOVL, &r1, &tlo) 968 gins(x86.AMOVL, &fhi, &r1) 969 gins(x86.AMOVL, &r1, &thi) 970 } 971 972 splitclean() 973 splitclean() 974 return 975 976 /* 977 * integer up-conversions 978 */ 979 case gc.TINT8<<16 | gc.TINT16, // sign extend int8 980 gc.TINT8<<16 | gc.TUINT16: 981 a = x86.AMOVBWSX 982 983 goto rdst 984 985 case gc.TINT8<<16 | gc.TINT32, 986 gc.TINT8<<16 | gc.TUINT32: 987 a = x86.AMOVBLSX 988 goto rdst 989 990 case gc.TINT8<<16 | gc.TINT64, // convert via int32 991 gc.TINT8<<16 | gc.TUINT64: 992 cvt = gc.Types[gc.TINT32] 993 994 goto hard 995 996 case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8 997 gc.TUINT8<<16 | gc.TUINT16: 998 a = x86.AMOVBWZX 999 1000 goto rdst 1001 1002 case gc.TUINT8<<16 | gc.TINT32, 1003 gc.TUINT8<<16 | gc.TUINT32: 1004 a = x86.AMOVBLZX 1005 goto rdst 1006 1007 case gc.TUINT8<<16 | gc.TINT64, // convert via uint32 1008 gc.TUINT8<<16 | gc.TUINT64: 1009 cvt = gc.Types[gc.TUINT32] 1010 1011 goto hard 1012 1013 case gc.TINT16<<16 | gc.TINT32, // sign extend int16 1014 gc.TINT16<<16 | gc.TUINT32: 1015 a = x86.AMOVWLSX 1016 1017 goto rdst 1018 1019 case gc.TINT16<<16 | gc.TINT64, // convert via int32 1020 gc.TINT16<<16 | gc.TUINT64: 1021 cvt = gc.Types[gc.TINT32] 1022 1023 goto hard 1024 1025 case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16 1026 gc.TUINT16<<16 | gc.TUINT32: 1027 a = x86.AMOVWLZX 1028 1029 goto rdst 1030 1031 case gc.TUINT16<<16 | gc.TINT64, // convert via uint32 1032 gc.TUINT16<<16 | gc.TUINT64: 1033 cvt = gc.Types[gc.TUINT32] 1034 1035 goto hard 1036 1037 case gc.TINT32<<16 | gc.TINT64, // sign extend int32 1038 gc.TINT32<<16 | gc.TUINT64: 1039 var thi gc.Node 1040 var tlo gc.Node 1041 split64(t, &tlo, &thi) 1042 1043 var flo gc.Node 1044 gc.Nodreg(&flo, tlo.Type, x86.REG_AX) 1045 var fhi gc.Node 1046 gc.Nodreg(&fhi, thi.Type, x86.REG_DX) 1047 gmove(f, &flo) 1048 gins(x86.ACDQ, nil, nil) 1049 gins(x86.AMOVL, &flo, &tlo) 1050 gins(x86.AMOVL, &fhi, &thi) 1051 splitclean() 1052 return 1053 1054 case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32 1055 gc.TUINT32<<16 | gc.TUINT64: 1056 var tlo gc.Node 1057 var thi gc.Node 1058 split64(t, &tlo, &thi) 1059 1060 gmove(f, &tlo) 1061 gins(x86.AMOVL, ncon(0), &thi) 1062 splitclean() 1063 return 1064 } 1065 1066 gins(a, f, t) 1067 return 1068 1069 // requires register source 1070 rsrc: 1071 gc.Regalloc(&r1, f.Type, t) 1072 1073 gmove(f, &r1) 1074 gins(a, &r1, t) 1075 gc.Regfree(&r1) 1076 return 1077 1078 // requires register destination 1079 rdst: 1080 { 1081 gc.Regalloc(&r1, t.Type, t) 1082 1083 gins(a, f, &r1) 1084 gmove(&r1, t) 1085 gc.Regfree(&r1) 1086 return 1087 } 1088 1089 // requires register intermediate 1090 hard: 1091 gc.Regalloc(&r1, cvt, t) 1092 1093 gmove(f, &r1) 1094 gmove(&r1, t) 1095 gc.Regfree(&r1) 1096 return 1097 } 1098 1099 func floatmove(f *gc.Node, t *gc.Node) { 1100 var r1 gc.Node 1101 1102 ft := gc.Simsimtype(f.Type) 1103 tt := gc.Simsimtype(t.Type) 1104 cvt := t.Type 1105 1106 // cannot have two floating point memory operands. 1107 if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) { 1108 goto hard 1109 } 1110 1111 // convert constant to desired type 1112 if f.Op == gc.OLITERAL { 1113 var con gc.Node 1114 f.Convconst(&con, t.Type) 1115 f = &con 1116 ft = gc.Simsimtype(con.Type) 1117 1118 // some constants can't move directly to memory. 1119 if gc.Ismem(t) { 1120 // float constants come from memory. 1121 if gc.Isfloat[tt] { 1122 goto hard 1123 } 1124 } 1125 } 1126 1127 // value -> value copy, only one memory operand. 1128 // figure out the instruction to use. 1129 // break out of switch for one-instruction gins. 1130 // goto rdst for "destination must be register". 1131 // goto hard for "convert to cvt type first". 1132 // otherwise handle and return. 1133 1134 switch uint32(ft)<<16 | uint32(tt) { 1135 default: 1136 if gc.Thearch.Use387 { 1137 floatmove_387(f, t) 1138 } else { 1139 floatmove_sse(f, t) 1140 } 1141 return 1142 1143 // float to very long integer. 1144 case gc.TFLOAT32<<16 | gc.TINT64, 1145 gc.TFLOAT64<<16 | gc.TINT64: 1146 if f.Op == gc.OREGISTER { 1147 cvt = f.Type 1148 goto hardmem 1149 } 1150 1151 var r1 gc.Node 1152 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) 1153 if ft == gc.TFLOAT32 { 1154 gins(x86.AFMOVF, f, &r1) 1155 } else { 1156 gins(x86.AFMOVD, f, &r1) 1157 } 1158 1159 // set round to zero mode during conversion 1160 var t1 gc.Node 1161 memname(&t1, gc.Types[gc.TUINT16]) 1162 1163 var t2 gc.Node 1164 memname(&t2, gc.Types[gc.TUINT16]) 1165 gins(x86.AFSTCW, nil, &t1) 1166 gins(x86.AMOVW, ncon(0xf7f), &t2) 1167 gins(x86.AFLDCW, &t2, nil) 1168 if tt == gc.TINT16 { 1169 gins(x86.AFMOVWP, &r1, t) 1170 } else if tt == gc.TINT32 { 1171 gins(x86.AFMOVLP, &r1, t) 1172 } else { 1173 gins(x86.AFMOVVP, &r1, t) 1174 } 1175 gins(x86.AFLDCW, &t1, nil) 1176 return 1177 1178 case gc.TFLOAT32<<16 | gc.TUINT64, 1179 gc.TFLOAT64<<16 | gc.TUINT64: 1180 if !gc.Ismem(f) { 1181 cvt = f.Type 1182 goto hardmem 1183 } 1184 1185 bignodes() 1186 var f0 gc.Node 1187 gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0) 1188 var f1 gc.Node 1189 gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1) 1190 var ax gc.Node 1191 gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX) 1192 1193 if ft == gc.TFLOAT32 { 1194 gins(x86.AFMOVF, f, &f0) 1195 } else { 1196 gins(x86.AFMOVD, f, &f0) 1197 } 1198 1199 // if 0 > v { answer = 0 } 1200 gins(x86.AFMOVD, &zerof, &f0) 1201 1202 gins(x86.AFUCOMIP, &f0, &f1) 1203 p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) 1204 1205 // if 1<<64 <= v { answer = 0 too } 1206 gins(x86.AFMOVD, &two64f, &f0) 1207 1208 gins(x86.AFUCOMIP, &f0, &f1) 1209 p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) 1210 gc.Patch(p1, gc.Pc) 1211 gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack 1212 var thi gc.Node 1213 var tlo gc.Node 1214 split64(t, &tlo, &thi) 1215 gins(x86.AMOVL, ncon(0), &tlo) 1216 gins(x86.AMOVL, ncon(0), &thi) 1217 splitclean() 1218 p1 = gc.Gbranch(obj.AJMP, nil, 0) 1219 gc.Patch(p2, gc.Pc) 1220 1221 // in range; algorithm is: 1222 // if small enough, use native float64 -> int64 conversion. 1223 // otherwise, subtract 2^63, convert, and add it back. 1224 1225 // set round to zero mode during conversion 1226 var t1 gc.Node 1227 memname(&t1, gc.Types[gc.TUINT16]) 1228 1229 var t2 gc.Node 1230 memname(&t2, gc.Types[gc.TUINT16]) 1231 gins(x86.AFSTCW, nil, &t1) 1232 gins(x86.AMOVW, ncon(0xf7f), &t2) 1233 gins(x86.AFLDCW, &t2, nil) 1234 1235 // actual work 1236 gins(x86.AFMOVD, &two63f, &f0) 1237 1238 gins(x86.AFUCOMIP, &f0, &f1) 1239 p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0) 1240 gins(x86.AFMOVVP, &f0, t) 1241 p3 := gc.Gbranch(obj.AJMP, nil, 0) 1242 gc.Patch(p2, gc.Pc) 1243 gins(x86.AFMOVD, &two63f, &f0) 1244 gins(x86.AFSUBDP, &f0, &f1) 1245 gins(x86.AFMOVVP, &f0, t) 1246 split64(t, &tlo, &thi) 1247 gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63 1248 gc.Patch(p3, gc.Pc) 1249 splitclean() 1250 1251 // restore rounding mode 1252 gins(x86.AFLDCW, &t1, nil) 1253 1254 gc.Patch(p1, gc.Pc) 1255 return 1256 1257 /* 1258 * integer to float 1259 */ 1260 case gc.TINT64<<16 | gc.TFLOAT32, 1261 gc.TINT64<<16 | gc.TFLOAT64: 1262 if t.Op == gc.OREGISTER { 1263 goto hardmem 1264 } 1265 var f0 gc.Node 1266 gc.Nodreg(&f0, t.Type, x86.REG_F0) 1267 gins(x86.AFMOVV, f, &f0) 1268 if tt == gc.TFLOAT32 { 1269 gins(x86.AFMOVFP, &f0, t) 1270 } else { 1271 gins(x86.AFMOVDP, &f0, t) 1272 } 1273 return 1274 1275 // algorithm is: 1276 // if small enough, use native int64 -> float64 conversion. 1277 // otherwise, halve (rounding to odd?), convert, and double. 1278 case gc.TUINT64<<16 | gc.TFLOAT32, 1279 gc.TUINT64<<16 | gc.TFLOAT64: 1280 var ax gc.Node 1281 gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX) 1282 1283 var dx gc.Node 1284 gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX) 1285 var cx gc.Node 1286 gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX) 1287 var t1 gc.Node 1288 gc.Tempname(&t1, f.Type) 1289 var tlo gc.Node 1290 var thi gc.Node 1291 split64(&t1, &tlo, &thi) 1292 gmove(f, &t1) 1293 gins(x86.ACMPL, &thi, ncon(0)) 1294 p1 := gc.Gbranch(x86.AJLT, nil, 0) 1295 1296 // native 1297 var r1 gc.Node 1298 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) 1299 1300 gins(x86.AFMOVV, &t1, &r1) 1301 if tt == gc.TFLOAT32 { 1302 gins(x86.AFMOVFP, &r1, t) 1303 } else { 1304 gins(x86.AFMOVDP, &r1, t) 1305 } 1306 p2 := gc.Gbranch(obj.AJMP, nil, 0) 1307 1308 // simulated 1309 gc.Patch(p1, gc.Pc) 1310 1311 gmove(&tlo, &ax) 1312 gmove(&thi, &dx) 1313 p1 = gins(x86.ASHRL, ncon(1), &ax) 1314 p1.From.Index = x86.REG_DX // double-width shift DX -> AX 1315 p1.From.Scale = 0 1316 gins(x86.AMOVL, ncon(0), &cx) 1317 gins(x86.ASETCC, nil, &cx) 1318 gins(x86.AORL, &cx, &ax) 1319 gins(x86.ASHRL, ncon(1), &dx) 1320 gmove(&dx, &thi) 1321 gmove(&ax, &tlo) 1322 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) 1323 var r2 gc.Node 1324 gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1) 1325 gins(x86.AFMOVV, &t1, &r1) 1326 gins(x86.AFMOVD, &r1, &r1) 1327 gins(x86.AFADDDP, &r1, &r2) 1328 if tt == gc.TFLOAT32 { 1329 gins(x86.AFMOVFP, &r1, t) 1330 } else { 1331 gins(x86.AFMOVDP, &r1, t) 1332 } 1333 gc.Patch(p2, gc.Pc) 1334 splitclean() 1335 return 1336 } 1337 1338 // requires register intermediate 1339 hard: 1340 gc.Regalloc(&r1, cvt, t) 1341 1342 gmove(f, &r1) 1343 gmove(&r1, t) 1344 gc.Regfree(&r1) 1345 return 1346 1347 // requires memory intermediate 1348 hardmem: 1349 gc.Tempname(&r1, cvt) 1350 1351 gmove(f, &r1) 1352 gmove(&r1, t) 1353 return 1354 } 1355 1356 func floatmove_387(f *gc.Node, t *gc.Node) { 1357 var r1 gc.Node 1358 var a int 1359 1360 ft := gc.Simsimtype(f.Type) 1361 tt := gc.Simsimtype(t.Type) 1362 cvt := t.Type 1363 1364 switch uint32(ft)<<16 | uint32(tt) { 1365 default: 1366 goto fatal 1367 1368 /* 1369 * float to integer 1370 */ 1371 case gc.TFLOAT32<<16 | gc.TINT16, 1372 gc.TFLOAT32<<16 | gc.TINT32, 1373 gc.TFLOAT32<<16 | gc.TINT64, 1374 gc.TFLOAT64<<16 | gc.TINT16, 1375 gc.TFLOAT64<<16 | gc.TINT32, 1376 gc.TFLOAT64<<16 | gc.TINT64: 1377 if t.Op == gc.OREGISTER { 1378 goto hardmem 1379 } 1380 var r1 gc.Node 1381 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) 1382 if f.Op != gc.OREGISTER { 1383 if ft == gc.TFLOAT32 { 1384 gins(x86.AFMOVF, f, &r1) 1385 } else { 1386 gins(x86.AFMOVD, f, &r1) 1387 } 1388 } 1389 1390 // set round to zero mode during conversion 1391 var t1 gc.Node 1392 memname(&t1, gc.Types[gc.TUINT16]) 1393 1394 var t2 gc.Node 1395 memname(&t2, gc.Types[gc.TUINT16]) 1396 gins(x86.AFSTCW, nil, &t1) 1397 gins(x86.AMOVW, ncon(0xf7f), &t2) 1398 gins(x86.AFLDCW, &t2, nil) 1399 if tt == gc.TINT16 { 1400 gins(x86.AFMOVWP, &r1, t) 1401 } else if tt == gc.TINT32 { 1402 gins(x86.AFMOVLP, &r1, t) 1403 } else { 1404 gins(x86.AFMOVVP, &r1, t) 1405 } 1406 gins(x86.AFLDCW, &t1, nil) 1407 return 1408 1409 // convert via int32. 1410 case gc.TFLOAT32<<16 | gc.TINT8, 1411 gc.TFLOAT32<<16 | gc.TUINT16, 1412 gc.TFLOAT32<<16 | gc.TUINT8, 1413 gc.TFLOAT64<<16 | gc.TINT8, 1414 gc.TFLOAT64<<16 | gc.TUINT16, 1415 gc.TFLOAT64<<16 | gc.TUINT8: 1416 var t1 gc.Node 1417 gc.Tempname(&t1, gc.Types[gc.TINT32]) 1418 1419 gmove(f, &t1) 1420 switch tt { 1421 default: 1422 gc.Fatalf("gmove %v", t) 1423 1424 case gc.TINT8: 1425 gins(x86.ACMPL, &t1, ncon(-0x80&(1<<32-1))) 1426 p1 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TINT32]), nil, -1) 1427 gins(x86.ACMPL, &t1, ncon(0x7f)) 1428 p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TINT32]), nil, -1) 1429 p3 := gc.Gbranch(obj.AJMP, nil, 0) 1430 gc.Patch(p1, gc.Pc) 1431 gc.Patch(p2, gc.Pc) 1432 gmove(ncon(-0x80&(1<<32-1)), &t1) 1433 gc.Patch(p3, gc.Pc) 1434 gmove(&t1, t) 1435 1436 case gc.TUINT8: 1437 gins(x86.ATESTL, ncon(0xffffff00), &t1) 1438 p1 := gc.Gbranch(x86.AJEQ, nil, +1) 1439 gins(x86.AMOVL, ncon(0), &t1) 1440 gc.Patch(p1, gc.Pc) 1441 gmove(&t1, t) 1442 1443 case gc.TUINT16: 1444 gins(x86.ATESTL, ncon(0xffff0000), &t1) 1445 p1 := gc.Gbranch(x86.AJEQ, nil, +1) 1446 gins(x86.AMOVL, ncon(0), &t1) 1447 gc.Patch(p1, gc.Pc) 1448 gmove(&t1, t) 1449 } 1450 1451 return 1452 1453 // convert via int64. 1454 case gc.TFLOAT32<<16 | gc.TUINT32, 1455 gc.TFLOAT64<<16 | gc.TUINT32: 1456 cvt = gc.Types[gc.TINT64] 1457 1458 goto hardmem 1459 1460 /* 1461 * integer to float 1462 */ 1463 case gc.TINT16<<16 | gc.TFLOAT32, 1464 gc.TINT16<<16 | gc.TFLOAT64, 1465 gc.TINT32<<16 | gc.TFLOAT32, 1466 gc.TINT32<<16 | gc.TFLOAT64, 1467 gc.TINT64<<16 | gc.TFLOAT32, 1468 gc.TINT64<<16 | gc.TFLOAT64: 1469 if t.Op != gc.OREGISTER { 1470 goto hard 1471 } 1472 if f.Op == gc.OREGISTER { 1473 cvt = f.Type 1474 goto hardmem 1475 } 1476 1477 switch ft { 1478 case gc.TINT16: 1479 a = x86.AFMOVW 1480 1481 case gc.TINT32: 1482 a = x86.AFMOVL 1483 1484 default: 1485 a = x86.AFMOVV 1486 } 1487 1488 // convert via int32 memory 1489 case gc.TINT8<<16 | gc.TFLOAT32, 1490 gc.TINT8<<16 | gc.TFLOAT64, 1491 gc.TUINT16<<16 | gc.TFLOAT32, 1492 gc.TUINT16<<16 | gc.TFLOAT64, 1493 gc.TUINT8<<16 | gc.TFLOAT32, 1494 gc.TUINT8<<16 | gc.TFLOAT64: 1495 cvt = gc.Types[gc.TINT32] 1496 1497 goto hardmem 1498 1499 // convert via int64 memory 1500 case gc.TUINT32<<16 | gc.TFLOAT32, 1501 gc.TUINT32<<16 | gc.TFLOAT64: 1502 cvt = gc.Types[gc.TINT64] 1503 1504 goto hardmem 1505 1506 // The way the code generator uses floating-point 1507 // registers, a move from F0 to F0 is intended as a no-op. 1508 // On the x86, it's not: it pushes a second copy of F0 1509 // on the floating point stack. So toss it away here. 1510 // Also, F0 is the *only* register we ever evaluate 1511 // into, so we should only see register/register as F0/F0. 1512 /* 1513 * float to float 1514 */ 1515 case gc.TFLOAT32<<16 | gc.TFLOAT32, 1516 gc.TFLOAT64<<16 | gc.TFLOAT64: 1517 if gc.Ismem(f) && gc.Ismem(t) { 1518 goto hard 1519 } 1520 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1521 if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { 1522 goto fatal 1523 } 1524 return 1525 } 1526 1527 a = x86.AFMOVF 1528 if ft == gc.TFLOAT64 { 1529 a = x86.AFMOVD 1530 } 1531 if gc.Ismem(t) { 1532 if f.Op != gc.OREGISTER || f.Reg != x86.REG_F0 { 1533 gc.Fatalf("gmove %v", f) 1534 } 1535 a = x86.AFMOVFP 1536 if ft == gc.TFLOAT64 { 1537 a = x86.AFMOVDP 1538 } 1539 } 1540 1541 case gc.TFLOAT32<<16 | gc.TFLOAT64: 1542 if gc.Ismem(f) && gc.Ismem(t) { 1543 goto hard 1544 } 1545 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1546 if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { 1547 goto fatal 1548 } 1549 return 1550 } 1551 1552 if f.Op == gc.OREGISTER { 1553 gins(x86.AFMOVDP, f, t) 1554 } else { 1555 gins(x86.AFMOVF, f, t) 1556 } 1557 return 1558 1559 case gc.TFLOAT64<<16 | gc.TFLOAT32: 1560 if gc.Ismem(f) && gc.Ismem(t) { 1561 goto hard 1562 } 1563 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1564 var r1 gc.Node 1565 gc.Tempname(&r1, gc.Types[gc.TFLOAT32]) 1566 gins(x86.AFMOVFP, f, &r1) 1567 gins(x86.AFMOVF, &r1, t) 1568 return 1569 } 1570 1571 if f.Op == gc.OREGISTER { 1572 gins(x86.AFMOVFP, f, t) 1573 } else { 1574 gins(x86.AFMOVD, f, t) 1575 } 1576 return 1577 } 1578 1579 gins(a, f, t) 1580 return 1581 1582 // requires register intermediate 1583 hard: 1584 gc.Regalloc(&r1, cvt, t) 1585 1586 gmove(f, &r1) 1587 gmove(&r1, t) 1588 gc.Regfree(&r1) 1589 return 1590 1591 // requires memory intermediate 1592 hardmem: 1593 gc.Tempname(&r1, cvt) 1594 1595 gmove(f, &r1) 1596 gmove(&r1, t) 1597 return 1598 1599 // should not happen 1600 fatal: 1601 gc.Fatalf("gmove %v -> %v", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong)) 1602 1603 return 1604 } 1605 1606 func floatmove_sse(f *gc.Node, t *gc.Node) { 1607 var r1 gc.Node 1608 var cvt *gc.Type 1609 var a int 1610 1611 ft := gc.Simsimtype(f.Type) 1612 tt := gc.Simsimtype(t.Type) 1613 1614 switch uint32(ft)<<16 | uint32(tt) { 1615 // should not happen 1616 default: 1617 gc.Fatalf("gmove %v -> %v", f, t) 1618 1619 return 1620 1621 // convert via int32. 1622 /* 1623 * float to integer 1624 */ 1625 case gc.TFLOAT32<<16 | gc.TINT16, 1626 gc.TFLOAT32<<16 | gc.TINT8, 1627 gc.TFLOAT32<<16 | gc.TUINT16, 1628 gc.TFLOAT32<<16 | gc.TUINT8, 1629 gc.TFLOAT64<<16 | gc.TINT16, 1630 gc.TFLOAT64<<16 | gc.TINT8, 1631 gc.TFLOAT64<<16 | gc.TUINT16, 1632 gc.TFLOAT64<<16 | gc.TUINT8: 1633 cvt = gc.Types[gc.TINT32] 1634 1635 goto hard 1636 1637 // convert via int64. 1638 case gc.TFLOAT32<<16 | gc.TUINT32, 1639 gc.TFLOAT64<<16 | gc.TUINT32: 1640 cvt = gc.Types[gc.TINT64] 1641 1642 goto hardmem 1643 1644 case gc.TFLOAT32<<16 | gc.TINT32: 1645 a = x86.ACVTTSS2SL 1646 goto rdst 1647 1648 case gc.TFLOAT64<<16 | gc.TINT32: 1649 a = x86.ACVTTSD2SL 1650 goto rdst 1651 1652 // convert via int32 memory 1653 /* 1654 * integer to float 1655 */ 1656 case gc.TINT8<<16 | gc.TFLOAT32, 1657 gc.TINT8<<16 | gc.TFLOAT64, 1658 gc.TINT16<<16 | gc.TFLOAT32, 1659 gc.TINT16<<16 | gc.TFLOAT64, 1660 gc.TUINT16<<16 | gc.TFLOAT32, 1661 gc.TUINT16<<16 | gc.TFLOAT64, 1662 gc.TUINT8<<16 | gc.TFLOAT32, 1663 gc.TUINT8<<16 | gc.TFLOAT64: 1664 cvt = gc.Types[gc.TINT32] 1665 1666 goto hard 1667 1668 // convert via int64 memory 1669 case gc.TUINT32<<16 | gc.TFLOAT32, 1670 gc.TUINT32<<16 | gc.TFLOAT64: 1671 cvt = gc.Types[gc.TINT64] 1672 1673 goto hardmem 1674 1675 case gc.TINT32<<16 | gc.TFLOAT32: 1676 a = x86.ACVTSL2SS 1677 goto rdst 1678 1679 case gc.TINT32<<16 | gc.TFLOAT64: 1680 a = x86.ACVTSL2SD 1681 goto rdst 1682 1683 /* 1684 * float to float 1685 */ 1686 case gc.TFLOAT32<<16 | gc.TFLOAT32: 1687 a = x86.AMOVSS 1688 1689 case gc.TFLOAT64<<16 | gc.TFLOAT64: 1690 a = x86.AMOVSD 1691 1692 case gc.TFLOAT32<<16 | gc.TFLOAT64: 1693 a = x86.ACVTSS2SD 1694 goto rdst 1695 1696 case gc.TFLOAT64<<16 | gc.TFLOAT32: 1697 a = x86.ACVTSD2SS 1698 goto rdst 1699 } 1700 1701 gins(a, f, t) 1702 return 1703 1704 // requires register intermediate 1705 hard: 1706 gc.Regalloc(&r1, cvt, t) 1707 1708 gmove(f, &r1) 1709 gmove(&r1, t) 1710 gc.Regfree(&r1) 1711 return 1712 1713 // requires memory intermediate 1714 hardmem: 1715 gc.Tempname(&r1, cvt) 1716 1717 gmove(f, &r1) 1718 gmove(&r1, t) 1719 return 1720 1721 // requires register destination 1722 rdst: 1723 gc.Regalloc(&r1, t.Type, t) 1724 1725 gins(a, f, &r1) 1726 gmove(&r1, t) 1727 gc.Regfree(&r1) 1728 return 1729 } 1730 1731 func samaddr(f *gc.Node, t *gc.Node) bool { 1732 if f.Op != t.Op { 1733 return false 1734 } 1735 1736 switch f.Op { 1737 case gc.OREGISTER: 1738 if f.Reg != t.Reg { 1739 break 1740 } 1741 return true 1742 } 1743 1744 return false 1745 } 1746 1747 /* 1748 * generate one instruction: 1749 * as f, t 1750 */ 1751 func gins(as int, f *gc.Node, t *gc.Node) *obj.Prog { 1752 if as == x86.AFMOVF && f != nil && f.Op == gc.OREGISTER && t != nil && t.Op == gc.OREGISTER { 1753 gc.Fatalf("gins MOVF reg, reg") 1754 } 1755 if as == x86.ACVTSD2SS && f != nil && f.Op == gc.OLITERAL { 1756 gc.Fatalf("gins CVTSD2SS const") 1757 } 1758 if as == x86.AMOVSD && t != nil && t.Op == gc.OREGISTER && t.Reg == x86.REG_F0 { 1759 gc.Fatalf("gins MOVSD into F0") 1760 } 1761 1762 if as == x86.AMOVL && f != nil && f.Op == gc.OADDR && f.Left.Op == gc.ONAME && f.Left.Class != gc.PEXTERN && f.Left.Class != gc.PFUNC { 1763 // Turn MOVL $xxx(FP/SP) into LEAL xxx. 1764 // These should be equivalent but most of the backend 1765 // only expects to see LEAL, because that's what we had 1766 // historically generated. Various hidden assumptions are baked in by now. 1767 as = x86.ALEAL 1768 f = f.Left 1769 } 1770 1771 switch as { 1772 case x86.AMOVB, 1773 x86.AMOVW, 1774 x86.AMOVL: 1775 if f != nil && t != nil && samaddr(f, t) { 1776 return nil 1777 } 1778 1779 case x86.ALEAL: 1780 if f != nil && gc.Isconst(f, gc.CTNIL) { 1781 gc.Fatalf("gins LEAL nil %v", f.Type) 1782 } 1783 } 1784 1785 p := gc.Prog(as) 1786 gc.Naddr(&p.From, f) 1787 gc.Naddr(&p.To, t) 1788 1789 if gc.Debug['g'] != 0 { 1790 fmt.Printf("%v\n", p) 1791 } 1792 1793 w := 0 1794 switch as { 1795 case x86.AMOVB: 1796 w = 1 1797 1798 case x86.AMOVW: 1799 w = 2 1800 1801 case x86.AMOVL: 1802 w = 4 1803 } 1804 1805 if true && w != 0 && f != nil && (p.From.Width > int64(w) || p.To.Width > int64(w)) { 1806 gc.Dump("bad width from:", f) 1807 gc.Dump("bad width to:", t) 1808 gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) 1809 } 1810 1811 if p.To.Type == obj.TYPE_ADDR && w > 0 { 1812 gc.Fatalf("bad use of addr: %v", p) 1813 } 1814 1815 return p 1816 } 1817 1818 func ginsnop() { 1819 var reg gc.Node 1820 gc.Nodreg(®, gc.Types[gc.TINT], x86.REG_AX) 1821 gins(x86.AXCHGL, ®, ®) 1822 } 1823 1824 func dotaddable(n *gc.Node, n1 *gc.Node) bool { 1825 if n.Op != gc.ODOT { 1826 return false 1827 } 1828 1829 var oary [10]int64 1830 var nn *gc.Node 1831 o := gc.Dotoffset(n, oary[:], &nn) 1832 if nn != nil && nn.Addable && o == 1 && oary[0] >= 0 { 1833 *n1 = *nn 1834 n1.Type = n.Type 1835 n1.Xoffset += oary[0] 1836 return true 1837 } 1838 1839 return false 1840 } 1841 1842 func sudoclean() { 1843 } 1844 1845 func sudoaddable(as int, n *gc.Node, a *obj.Addr) bool { 1846 *a = obj.Addr{} 1847 return false 1848 }