github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/compile/internal/x86/gsubr.go (about) 1 // Derived from Inferno utils/8c/txt.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/8c/txt.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "cmd/compile/internal/big" 35 "cmd/compile/internal/gc" 36 "cmd/internal/obj" 37 "cmd/internal/obj/x86" 38 "fmt" 39 ) 40 41 // TODO(rsc): Can make this bigger if we move 42 // the text segment up higher in 8l for all GOOS. 43 // At the same time, can raise StackBig in ../../runtime/stack.h. 44 var unmappedzero uint32 = 4096 45 46 // foptoas flags 47 const ( 48 Frev = 1 << 0 49 Fpop = 1 << 1 50 Fpop2 = 1 << 2 51 ) 52 53 /* 54 * return Axxx for Oxxx on type t. 55 */ 56 func optoas(op gc.Op, t *gc.Type) int { 57 if t == nil { 58 gc.Fatalf("optoas: t is nil") 59 } 60 61 // avoid constant conversions in switches below 62 const ( 63 OMINUS_ = uint32(gc.OMINUS) << 16 64 OLSH_ = uint32(gc.OLSH) << 16 65 ORSH_ = uint32(gc.ORSH) << 16 66 OADD_ = uint32(gc.OADD) << 16 67 OSUB_ = uint32(gc.OSUB) << 16 68 OMUL_ = uint32(gc.OMUL) << 16 69 ODIV_ = uint32(gc.ODIV) << 16 70 OMOD_ = uint32(gc.OMOD) << 16 71 OOR_ = uint32(gc.OOR) << 16 72 OAND_ = uint32(gc.OAND) << 16 73 OXOR_ = uint32(gc.OXOR) << 16 74 OEQ_ = uint32(gc.OEQ) << 16 75 ONE_ = uint32(gc.ONE) << 16 76 OLT_ = uint32(gc.OLT) << 16 77 OLE_ = uint32(gc.OLE) << 16 78 OGE_ = uint32(gc.OGE) << 16 79 OGT_ = uint32(gc.OGT) << 16 80 OCMP_ = uint32(gc.OCMP) << 16 81 OAS_ = uint32(gc.OAS) << 16 82 OHMUL_ = uint32(gc.OHMUL) << 16 83 OADDR_ = uint32(gc.OADDR) << 16 84 OINC_ = uint32(gc.OINC) << 16 85 ODEC_ = uint32(gc.ODEC) << 16 86 OLROT_ = uint32(gc.OLROT) << 16 87 OEXTEND_ = uint32(gc.OEXTEND) << 16 88 OCOM_ = uint32(gc.OCOM) << 16 89 ) 90 91 a := obj.AXXX 92 switch uint32(op)<<16 | uint32(gc.Simtype[t.Etype]) { 93 default: 94 gc.Fatalf("optoas: no entry %v-%v", gc.Oconv(int(op), 0), t) 95 96 case OADDR_ | gc.TPTR32: 97 a = x86.ALEAL 98 99 case OEQ_ | gc.TBOOL, 100 OEQ_ | gc.TINT8, 101 OEQ_ | gc.TUINT8, 102 OEQ_ | gc.TINT16, 103 OEQ_ | gc.TUINT16, 104 OEQ_ | gc.TINT32, 105 OEQ_ | gc.TUINT32, 106 OEQ_ | gc.TINT64, 107 OEQ_ | gc.TUINT64, 108 OEQ_ | gc.TPTR32, 109 OEQ_ | gc.TPTR64, 110 OEQ_ | gc.TFLOAT32, 111 OEQ_ | gc.TFLOAT64: 112 a = x86.AJEQ 113 114 case ONE_ | gc.TBOOL, 115 ONE_ | gc.TINT8, 116 ONE_ | gc.TUINT8, 117 ONE_ | gc.TINT16, 118 ONE_ | gc.TUINT16, 119 ONE_ | gc.TINT32, 120 ONE_ | gc.TUINT32, 121 ONE_ | gc.TINT64, 122 ONE_ | gc.TUINT64, 123 ONE_ | gc.TPTR32, 124 ONE_ | gc.TPTR64, 125 ONE_ | gc.TFLOAT32, 126 ONE_ | gc.TFLOAT64: 127 a = x86.AJNE 128 129 case OLT_ | gc.TINT8, 130 OLT_ | gc.TINT16, 131 OLT_ | gc.TINT32, 132 OLT_ | gc.TINT64: 133 a = x86.AJLT 134 135 case OLT_ | gc.TUINT8, 136 OLT_ | gc.TUINT16, 137 OLT_ | gc.TUINT32, 138 OLT_ | gc.TUINT64: 139 a = x86.AJCS 140 141 case OLE_ | gc.TINT8, 142 OLE_ | gc.TINT16, 143 OLE_ | gc.TINT32, 144 OLE_ | gc.TINT64: 145 a = x86.AJLE 146 147 case OLE_ | gc.TUINT8, 148 OLE_ | gc.TUINT16, 149 OLE_ | gc.TUINT32, 150 OLE_ | gc.TUINT64: 151 a = x86.AJLS 152 153 case OGT_ | gc.TINT8, 154 OGT_ | gc.TINT16, 155 OGT_ | gc.TINT32, 156 OGT_ | gc.TINT64: 157 a = x86.AJGT 158 159 case OGT_ | gc.TUINT8, 160 OGT_ | gc.TUINT16, 161 OGT_ | gc.TUINT32, 162 OGT_ | gc.TUINT64, 163 OLT_ | gc.TFLOAT32, 164 OLT_ | gc.TFLOAT64: 165 a = x86.AJHI 166 167 case OGE_ | gc.TINT8, 168 OGE_ | gc.TINT16, 169 OGE_ | gc.TINT32, 170 OGE_ | gc.TINT64: 171 a = x86.AJGE 172 173 case OGE_ | gc.TUINT8, 174 OGE_ | gc.TUINT16, 175 OGE_ | gc.TUINT32, 176 OGE_ | gc.TUINT64, 177 OLE_ | gc.TFLOAT32, 178 OLE_ | gc.TFLOAT64: 179 a = x86.AJCC 180 181 case OCMP_ | gc.TBOOL, 182 OCMP_ | gc.TINT8, 183 OCMP_ | gc.TUINT8: 184 a = x86.ACMPB 185 186 case OCMP_ | gc.TINT16, 187 OCMP_ | gc.TUINT16: 188 a = x86.ACMPW 189 190 case OCMP_ | gc.TINT32, 191 OCMP_ | gc.TUINT32, 192 OCMP_ | gc.TPTR32: 193 a = x86.ACMPL 194 195 case OAS_ | gc.TBOOL, 196 OAS_ | gc.TINT8, 197 OAS_ | gc.TUINT8: 198 a = x86.AMOVB 199 200 case OAS_ | gc.TINT16, 201 OAS_ | gc.TUINT16: 202 a = x86.AMOVW 203 204 case OAS_ | gc.TINT32, 205 OAS_ | gc.TUINT32, 206 OAS_ | gc.TPTR32: 207 a = x86.AMOVL 208 209 case OAS_ | gc.TFLOAT32: 210 a = x86.AMOVSS 211 212 case OAS_ | gc.TFLOAT64: 213 a = x86.AMOVSD 214 215 case OADD_ | gc.TINT8, 216 OADD_ | gc.TUINT8: 217 a = x86.AADDB 218 219 case OADD_ | gc.TINT16, 220 OADD_ | gc.TUINT16: 221 a = x86.AADDW 222 223 case OADD_ | gc.TINT32, 224 OADD_ | gc.TUINT32, 225 OADD_ | gc.TPTR32: 226 a = x86.AADDL 227 228 case OSUB_ | gc.TINT8, 229 OSUB_ | gc.TUINT8: 230 a = x86.ASUBB 231 232 case OSUB_ | gc.TINT16, 233 OSUB_ | gc.TUINT16: 234 a = x86.ASUBW 235 236 case OSUB_ | gc.TINT32, 237 OSUB_ | gc.TUINT32, 238 OSUB_ | gc.TPTR32: 239 a = x86.ASUBL 240 241 case OINC_ | gc.TINT8, 242 OINC_ | gc.TUINT8: 243 a = x86.AINCB 244 245 case OINC_ | gc.TINT16, 246 OINC_ | gc.TUINT16: 247 a = x86.AINCW 248 249 case OINC_ | gc.TINT32, 250 OINC_ | gc.TUINT32, 251 OINC_ | gc.TPTR32: 252 a = x86.AINCL 253 254 case ODEC_ | gc.TINT8, 255 ODEC_ | gc.TUINT8: 256 a = x86.ADECB 257 258 case ODEC_ | gc.TINT16, 259 ODEC_ | gc.TUINT16: 260 a = x86.ADECW 261 262 case ODEC_ | gc.TINT32, 263 ODEC_ | gc.TUINT32, 264 ODEC_ | gc.TPTR32: 265 a = x86.ADECL 266 267 case OCOM_ | gc.TINT8, 268 OCOM_ | gc.TUINT8: 269 a = x86.ANOTB 270 271 case OCOM_ | gc.TINT16, 272 OCOM_ | gc.TUINT16: 273 a = x86.ANOTW 274 275 case OCOM_ | gc.TINT32, 276 OCOM_ | gc.TUINT32, 277 OCOM_ | gc.TPTR32: 278 a = x86.ANOTL 279 280 case OMINUS_ | gc.TINT8, 281 OMINUS_ | gc.TUINT8: 282 a = x86.ANEGB 283 284 case OMINUS_ | gc.TINT16, 285 OMINUS_ | gc.TUINT16: 286 a = x86.ANEGW 287 288 case OMINUS_ | gc.TINT32, 289 OMINUS_ | gc.TUINT32, 290 OMINUS_ | gc.TPTR32: 291 a = x86.ANEGL 292 293 case OAND_ | gc.TINT8, 294 OAND_ | gc.TUINT8: 295 a = x86.AANDB 296 297 case OAND_ | gc.TINT16, 298 OAND_ | gc.TUINT16: 299 a = x86.AANDW 300 301 case OAND_ | gc.TINT32, 302 OAND_ | gc.TUINT32, 303 OAND_ | gc.TPTR32: 304 a = x86.AANDL 305 306 case OOR_ | gc.TINT8, 307 OOR_ | gc.TUINT8: 308 a = x86.AORB 309 310 case OOR_ | gc.TINT16, 311 OOR_ | gc.TUINT16: 312 a = x86.AORW 313 314 case OOR_ | gc.TINT32, 315 OOR_ | gc.TUINT32, 316 OOR_ | gc.TPTR32: 317 a = x86.AORL 318 319 case OXOR_ | gc.TINT8, 320 OXOR_ | gc.TUINT8: 321 a = x86.AXORB 322 323 case OXOR_ | gc.TINT16, 324 OXOR_ | gc.TUINT16: 325 a = x86.AXORW 326 327 case OXOR_ | gc.TINT32, 328 OXOR_ | gc.TUINT32, 329 OXOR_ | gc.TPTR32: 330 a = x86.AXORL 331 332 case OLROT_ | gc.TINT8, 333 OLROT_ | gc.TUINT8: 334 a = x86.AROLB 335 336 case OLROT_ | gc.TINT16, 337 OLROT_ | gc.TUINT16: 338 a = x86.AROLW 339 340 case OLROT_ | gc.TINT32, 341 OLROT_ | gc.TUINT32, 342 OLROT_ | gc.TPTR32: 343 a = x86.AROLL 344 345 case OLSH_ | gc.TINT8, 346 OLSH_ | gc.TUINT8: 347 a = x86.ASHLB 348 349 case OLSH_ | gc.TINT16, 350 OLSH_ | gc.TUINT16: 351 a = x86.ASHLW 352 353 case OLSH_ | gc.TINT32, 354 OLSH_ | gc.TUINT32, 355 OLSH_ | gc.TPTR32: 356 a = x86.ASHLL 357 358 case ORSH_ | gc.TUINT8: 359 a = x86.ASHRB 360 361 case ORSH_ | gc.TUINT16: 362 a = x86.ASHRW 363 364 case ORSH_ | gc.TUINT32, 365 ORSH_ | gc.TPTR32: 366 a = x86.ASHRL 367 368 case ORSH_ | gc.TINT8: 369 a = x86.ASARB 370 371 case ORSH_ | gc.TINT16: 372 a = x86.ASARW 373 374 case ORSH_ | gc.TINT32: 375 a = x86.ASARL 376 377 case OHMUL_ | gc.TINT8, 378 OMUL_ | gc.TINT8, 379 OMUL_ | gc.TUINT8: 380 a = x86.AIMULB 381 382 case OHMUL_ | gc.TINT16, 383 OMUL_ | gc.TINT16, 384 OMUL_ | gc.TUINT16: 385 a = x86.AIMULW 386 387 case OHMUL_ | gc.TINT32, 388 OMUL_ | gc.TINT32, 389 OMUL_ | gc.TUINT32, 390 OMUL_ | gc.TPTR32: 391 a = x86.AIMULL 392 393 case OHMUL_ | gc.TUINT8: 394 a = x86.AMULB 395 396 case OHMUL_ | gc.TUINT16: 397 a = x86.AMULW 398 399 case OHMUL_ | gc.TUINT32, 400 OHMUL_ | gc.TPTR32: 401 a = x86.AMULL 402 403 case ODIV_ | gc.TINT8, 404 OMOD_ | gc.TINT8: 405 a = x86.AIDIVB 406 407 case ODIV_ | gc.TUINT8, 408 OMOD_ | gc.TUINT8: 409 a = x86.ADIVB 410 411 case ODIV_ | gc.TINT16, 412 OMOD_ | gc.TINT16: 413 a = x86.AIDIVW 414 415 case ODIV_ | gc.TUINT16, 416 OMOD_ | gc.TUINT16: 417 a = x86.ADIVW 418 419 case ODIV_ | gc.TINT32, 420 OMOD_ | gc.TINT32: 421 a = x86.AIDIVL 422 423 case ODIV_ | gc.TUINT32, 424 ODIV_ | gc.TPTR32, 425 OMOD_ | gc.TUINT32, 426 OMOD_ | gc.TPTR32: 427 a = x86.ADIVL 428 429 case OEXTEND_ | gc.TINT16: 430 a = x86.ACWD 431 432 case OEXTEND_ | gc.TINT32: 433 a = x86.ACDQ 434 } 435 436 return a 437 } 438 439 func foptoas(op gc.Op, t *gc.Type, flg int) int { 440 a := obj.AXXX 441 et := gc.Simtype[t.Etype] 442 443 // avoid constant conversions in switches below 444 const ( 445 OCMP_ = uint32(gc.OCMP) << 16 446 OAS_ = uint32(gc.OAS) << 16 447 OADD_ = uint32(gc.OADD) << 16 448 OSUB_ = uint32(gc.OSUB) << 16 449 OMUL_ = uint32(gc.OMUL) << 16 450 ODIV_ = uint32(gc.ODIV) << 16 451 OMINUS_ = uint32(gc.OMINUS) << 16 452 ) 453 454 if !gc.Thearch.Use387 { 455 switch uint32(op)<<16 | uint32(et) { 456 default: 457 gc.Fatalf("foptoas-sse: no entry %v-%v", gc.Oconv(int(op), 0), t) 458 459 case OCMP_ | gc.TFLOAT32: 460 a = x86.AUCOMISS 461 462 case OCMP_ | gc.TFLOAT64: 463 a = x86.AUCOMISD 464 465 case OAS_ | gc.TFLOAT32: 466 a = x86.AMOVSS 467 468 case OAS_ | gc.TFLOAT64: 469 a = x86.AMOVSD 470 471 case OADD_ | gc.TFLOAT32: 472 a = x86.AADDSS 473 474 case OADD_ | gc.TFLOAT64: 475 a = x86.AADDSD 476 477 case OSUB_ | gc.TFLOAT32: 478 a = x86.ASUBSS 479 480 case OSUB_ | gc.TFLOAT64: 481 a = x86.ASUBSD 482 483 case OMUL_ | gc.TFLOAT32: 484 a = x86.AMULSS 485 486 case OMUL_ | gc.TFLOAT64: 487 a = x86.AMULSD 488 489 case ODIV_ | gc.TFLOAT32: 490 a = x86.ADIVSS 491 492 case ODIV_ | gc.TFLOAT64: 493 a = x86.ADIVSD 494 } 495 496 return a 497 } 498 499 // If we need Fpop, it means we're working on 500 // two different floating-point registers, not memory. 501 // There the instruction only has a float64 form. 502 if flg&Fpop != 0 { 503 et = gc.TFLOAT64 504 } 505 506 // clear Frev if unneeded 507 switch op { 508 case gc.OADD, 509 gc.OMUL: 510 flg &^= Frev 511 } 512 513 switch uint32(op)<<16 | (uint32(et)<<8 | uint32(flg)) { 514 case OADD_ | (gc.TFLOAT32<<8 | 0): 515 return x86.AFADDF 516 517 case OADD_ | (gc.TFLOAT64<<8 | 0): 518 return x86.AFADDD 519 520 case OADD_ | (gc.TFLOAT64<<8 | Fpop): 521 return x86.AFADDDP 522 523 case OSUB_ | (gc.TFLOAT32<<8 | 0): 524 return x86.AFSUBF 525 526 case OSUB_ | (gc.TFLOAT32<<8 | Frev): 527 return x86.AFSUBRF 528 529 case OSUB_ | (gc.TFLOAT64<<8 | 0): 530 return x86.AFSUBD 531 532 case OSUB_ | (gc.TFLOAT64<<8 | Frev): 533 return x86.AFSUBRD 534 535 case OSUB_ | (gc.TFLOAT64<<8 | Fpop): 536 return x86.AFSUBDP 537 538 case OSUB_ | (gc.TFLOAT64<<8 | (Fpop | Frev)): 539 return x86.AFSUBRDP 540 541 case OMUL_ | (gc.TFLOAT32<<8 | 0): 542 return x86.AFMULF 543 544 case OMUL_ | (gc.TFLOAT64<<8 | 0): 545 return x86.AFMULD 546 547 case OMUL_ | (gc.TFLOAT64<<8 | Fpop): 548 return x86.AFMULDP 549 550 case ODIV_ | (gc.TFLOAT32<<8 | 0): 551 return x86.AFDIVF 552 553 case ODIV_ | (gc.TFLOAT32<<8 | Frev): 554 return x86.AFDIVRF 555 556 case ODIV_ | (gc.TFLOAT64<<8 | 0): 557 return x86.AFDIVD 558 559 case ODIV_ | (gc.TFLOAT64<<8 | Frev): 560 return x86.AFDIVRD 561 562 case ODIV_ | (gc.TFLOAT64<<8 | Fpop): 563 return x86.AFDIVDP 564 565 case ODIV_ | (gc.TFLOAT64<<8 | (Fpop | Frev)): 566 return x86.AFDIVRDP 567 568 case OCMP_ | (gc.TFLOAT32<<8 | 0): 569 return x86.AFCOMF 570 571 case OCMP_ | (gc.TFLOAT32<<8 | Fpop): 572 return x86.AFCOMFP 573 574 case OCMP_ | (gc.TFLOAT64<<8 | 0): 575 return x86.AFCOMD 576 577 case OCMP_ | (gc.TFLOAT64<<8 | Fpop): 578 return x86.AFCOMDP 579 580 case OCMP_ | (gc.TFLOAT64<<8 | Fpop2): 581 return x86.AFCOMDPP 582 583 case OMINUS_ | (gc.TFLOAT32<<8 | 0): 584 return x86.AFCHS 585 586 case OMINUS_ | (gc.TFLOAT64<<8 | 0): 587 return x86.AFCHS 588 } 589 590 gc.Fatalf("foptoas %v %v %#x", gc.Oconv(int(op), 0), t, flg) 591 return 0 592 } 593 594 var resvd = []int{ 595 // REG_DI, // for movstring 596 // REG_SI, // for movstring 597 598 x86.REG_AX, // for divide 599 x86.REG_CX, // for shift 600 x86.REG_DX, // for divide, context 601 x86.REG_SP, // for stack 602 } 603 604 /* 605 * generate 606 * as $c, reg 607 */ 608 func gconreg(as int, c int64, reg int) { 609 var n1 gc.Node 610 var n2 gc.Node 611 612 gc.Nodconst(&n1, gc.Types[gc.TINT64], c) 613 gc.Nodreg(&n2, gc.Types[gc.TINT64], reg) 614 gins(as, &n1, &n2) 615 } 616 617 /* 618 * generate 619 * as $c, n 620 */ 621 func ginscon(as int, c int64, n2 *gc.Node) { 622 var n1 gc.Node 623 gc.Nodconst(&n1, gc.Types[gc.TINT32], c) 624 gins(as, &n1, n2) 625 } 626 627 func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog { 628 if gc.Isint[t.Etype] || t.Etype == gc.Tptr { 629 if (n1.Op == gc.OLITERAL || n1.Op == gc.OADDR && n1.Left.Op == gc.ONAME) && n2.Op != gc.OLITERAL { 630 // Reverse comparison to place constant (including address constant) last. 631 op = gc.Brrev(op) 632 n1, n2 = n2, n1 633 } 634 } 635 636 // General case. 637 var r1, r2, g1, g2 gc.Node 638 if n1.Op == gc.ONAME && n1.Class&gc.PHEAP == 0 || n1.Op == gc.OINDREG { 639 r1 = *n1 640 } else { 641 gc.Regalloc(&r1, t, n1) 642 gc.Regalloc(&g1, n1.Type, &r1) 643 gc.Cgen(n1, &g1) 644 gmove(&g1, &r1) 645 } 646 if n2.Op == gc.OLITERAL && gc.Isint[t.Etype] || n2.Op == gc.OADDR && n2.Left.Op == gc.ONAME && n2.Left.Class == gc.PEXTERN { 647 r2 = *n2 648 } else { 649 gc.Regalloc(&r2, t, n2) 650 gc.Regalloc(&g2, n1.Type, &r2) 651 gc.Cgen(n2, &g2) 652 gmove(&g2, &r2) 653 } 654 gins(optoas(gc.OCMP, t), &r1, &r2) 655 if r1.Op == gc.OREGISTER { 656 gc.Regfree(&g1) 657 gc.Regfree(&r1) 658 } 659 if r2.Op == gc.OREGISTER { 660 gc.Regfree(&g2) 661 gc.Regfree(&r2) 662 } 663 return gc.Gbranch(optoas(op, t), nil, likely) 664 } 665 666 /* 667 * swap node contents 668 */ 669 func nswap(a *gc.Node, b *gc.Node) { 670 t := *a 671 *a = *b 672 *b = t 673 } 674 675 /* 676 * return constant i node. 677 * overwritten by next call, but useful in calls to gins. 678 */ 679 680 var ncon_n gc.Node 681 682 func ncon(i uint32) *gc.Node { 683 if ncon_n.Type == nil { 684 gc.Nodconst(&ncon_n, gc.Types[gc.TUINT32], 0) 685 } 686 ncon_n.SetInt(int64(i)) 687 return &ncon_n 688 } 689 690 var sclean [10]gc.Node 691 692 var nsclean int 693 694 /* 695 * n is a 64-bit value. fill in lo and hi to refer to its 32-bit halves. 696 */ 697 func split64(n *gc.Node, lo *gc.Node, hi *gc.Node) { 698 if !gc.Is64(n.Type) { 699 gc.Fatalf("split64 %v", n.Type) 700 } 701 702 if nsclean >= len(sclean) { 703 gc.Fatalf("split64 clean") 704 } 705 sclean[nsclean].Op = gc.OEMPTY 706 nsclean++ 707 switch n.Op { 708 default: 709 switch n.Op { 710 default: 711 var n1 gc.Node 712 if !dotaddable(n, &n1) { 713 gc.Igen(n, &n1, nil) 714 sclean[nsclean-1] = n1 715 } 716 717 n = &n1 718 719 case gc.ONAME: 720 if n.Class == gc.PPARAMREF { 721 var n1 gc.Node 722 gc.Cgen(n.Name.Heapaddr, &n1) 723 sclean[nsclean-1] = n1 724 n = &n1 725 } 726 727 // nothing 728 case gc.OINDREG: 729 break 730 } 731 732 *lo = *n 733 *hi = *n 734 lo.Type = gc.Types[gc.TUINT32] 735 if n.Type.Etype == gc.TINT64 { 736 hi.Type = gc.Types[gc.TINT32] 737 } else { 738 hi.Type = gc.Types[gc.TUINT32] 739 } 740 hi.Xoffset += 4 741 742 case gc.OLITERAL: 743 var n1 gc.Node 744 n.Convconst(&n1, n.Type) 745 i := n1.Int() 746 gc.Nodconst(lo, gc.Types[gc.TUINT32], int64(uint32(i))) 747 i >>= 32 748 if n.Type.Etype == gc.TINT64 { 749 gc.Nodconst(hi, gc.Types[gc.TINT32], int64(int32(i))) 750 } else { 751 gc.Nodconst(hi, gc.Types[gc.TUINT32], int64(uint32(i))) 752 } 753 } 754 } 755 756 func splitclean() { 757 if nsclean <= 0 { 758 gc.Fatalf("splitclean") 759 } 760 nsclean-- 761 if sclean[nsclean].Op != gc.OEMPTY { 762 gc.Regfree(&sclean[nsclean]) 763 } 764 } 765 766 // set up nodes representing fp constants 767 var ( 768 zerof gc.Node 769 two63f gc.Node 770 two64f gc.Node 771 bignodes_did bool 772 ) 773 774 func bignodes() { 775 if bignodes_did { 776 return 777 } 778 bignodes_did = true 779 780 gc.Nodconst(&zerof, gc.Types[gc.TINT64], 0) 781 zerof.Convconst(&zerof, gc.Types[gc.TFLOAT64]) 782 783 var i big.Int 784 i.SetInt64(1) 785 i.Lsh(&i, 63) 786 var bigi gc.Node 787 788 gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0) 789 bigi.SetBigInt(&i) 790 bigi.Convconst(&two63f, gc.Types[gc.TFLOAT64]) 791 792 gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0) 793 i.Lsh(&i, 1) 794 bigi.SetBigInt(&i) 795 bigi.Convconst(&two64f, gc.Types[gc.TFLOAT64]) 796 } 797 798 func memname(n *gc.Node, t *gc.Type) { 799 gc.Tempname(n, t) 800 n.Sym = gc.Lookup("." + n.Sym.Name[1:]) // keep optimizer from registerizing 801 n.Orig.Sym = n.Sym 802 } 803 804 func gmove(f *gc.Node, t *gc.Node) { 805 if gc.Debug['M'] != 0 { 806 fmt.Printf("gmove %v -> %v\n", f, t) 807 } 808 809 ft := gc.Simsimtype(f.Type) 810 tt := gc.Simsimtype(t.Type) 811 cvt := t.Type 812 813 if gc.Iscomplex[ft] || gc.Iscomplex[tt] { 814 gc.Complexmove(f, t) 815 return 816 } 817 818 if gc.Isfloat[ft] || gc.Isfloat[tt] { 819 floatmove(f, t) 820 return 821 } 822 823 // cannot have two integer memory operands; 824 // except 64-bit, which always copies via registers anyway. 825 var r1 gc.Node 826 var a int 827 if gc.Isint[ft] && gc.Isint[tt] && !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) { 828 goto hard 829 } 830 831 // convert constant to desired type 832 if f.Op == gc.OLITERAL { 833 var con gc.Node 834 f.Convconst(&con, t.Type) 835 f = &con 836 ft = gc.Simsimtype(con.Type) 837 } 838 839 // value -> value copy, only one memory operand. 840 // figure out the instruction to use. 841 // break out of switch for one-instruction gins. 842 // goto rdst for "destination must be register". 843 // goto hard for "convert to cvt type first". 844 // otherwise handle and return. 845 846 switch uint32(ft)<<16 | uint32(tt) { 847 default: 848 // should not happen 849 gc.Fatalf("gmove %v -> %v", f, t) 850 return 851 852 /* 853 * integer copy and truncate 854 */ 855 case gc.TINT8<<16 | gc.TINT8, // same size 856 gc.TINT8<<16 | gc.TUINT8, 857 gc.TUINT8<<16 | gc.TINT8, 858 gc.TUINT8<<16 | gc.TUINT8: 859 a = x86.AMOVB 860 861 case gc.TINT16<<16 | gc.TINT8, // truncate 862 gc.TUINT16<<16 | gc.TINT8, 863 gc.TINT32<<16 | gc.TINT8, 864 gc.TUINT32<<16 | gc.TINT8, 865 gc.TINT16<<16 | gc.TUINT8, 866 gc.TUINT16<<16 | gc.TUINT8, 867 gc.TINT32<<16 | gc.TUINT8, 868 gc.TUINT32<<16 | gc.TUINT8: 869 a = x86.AMOVB 870 871 goto rsrc 872 873 case gc.TINT64<<16 | gc.TINT8, // truncate low word 874 gc.TUINT64<<16 | gc.TINT8, 875 gc.TINT64<<16 | gc.TUINT8, 876 gc.TUINT64<<16 | gc.TUINT8: 877 var flo gc.Node 878 var fhi gc.Node 879 split64(f, &flo, &fhi) 880 881 var r1 gc.Node 882 gc.Nodreg(&r1, t.Type, x86.REG_AX) 883 gmove(&flo, &r1) 884 gins(x86.AMOVB, &r1, t) 885 splitclean() 886 return 887 888 case gc.TINT16<<16 | gc.TINT16, // same size 889 gc.TINT16<<16 | gc.TUINT16, 890 gc.TUINT16<<16 | gc.TINT16, 891 gc.TUINT16<<16 | gc.TUINT16: 892 a = x86.AMOVW 893 894 case gc.TINT32<<16 | gc.TINT16, // truncate 895 gc.TUINT32<<16 | gc.TINT16, 896 gc.TINT32<<16 | gc.TUINT16, 897 gc.TUINT32<<16 | gc.TUINT16: 898 a = x86.AMOVW 899 900 goto rsrc 901 902 case gc.TINT64<<16 | gc.TINT16, // truncate low word 903 gc.TUINT64<<16 | gc.TINT16, 904 gc.TINT64<<16 | gc.TUINT16, 905 gc.TUINT64<<16 | gc.TUINT16: 906 var flo gc.Node 907 var fhi gc.Node 908 split64(f, &flo, &fhi) 909 910 var r1 gc.Node 911 gc.Nodreg(&r1, t.Type, x86.REG_AX) 912 gmove(&flo, &r1) 913 gins(x86.AMOVW, &r1, t) 914 splitclean() 915 return 916 917 case gc.TINT32<<16 | gc.TINT32, // same size 918 gc.TINT32<<16 | gc.TUINT32, 919 gc.TUINT32<<16 | gc.TINT32, 920 gc.TUINT32<<16 | gc.TUINT32: 921 a = x86.AMOVL 922 923 case gc.TINT64<<16 | gc.TINT32, // truncate 924 gc.TUINT64<<16 | gc.TINT32, 925 gc.TINT64<<16 | gc.TUINT32, 926 gc.TUINT64<<16 | gc.TUINT32: 927 var fhi gc.Node 928 var flo gc.Node 929 split64(f, &flo, &fhi) 930 931 var r1 gc.Node 932 gc.Nodreg(&r1, t.Type, x86.REG_AX) 933 gmove(&flo, &r1) 934 gins(x86.AMOVL, &r1, t) 935 splitclean() 936 return 937 938 case gc.TINT64<<16 | gc.TINT64, // same size 939 gc.TINT64<<16 | gc.TUINT64, 940 gc.TUINT64<<16 | gc.TINT64, 941 gc.TUINT64<<16 | gc.TUINT64: 942 var fhi gc.Node 943 var flo gc.Node 944 split64(f, &flo, &fhi) 945 946 var tlo gc.Node 947 var thi gc.Node 948 split64(t, &tlo, &thi) 949 if f.Op == gc.OLITERAL { 950 gins(x86.AMOVL, &flo, &tlo) 951 gins(x86.AMOVL, &fhi, &thi) 952 } else { 953 // Implementation of conversion-free x = y for int64 or uint64 x. 954 // This is generated by the code that copies small values out of closures, 955 // and that code has DX live, so avoid DX and use CX instead. 956 var r1 gc.Node 957 gc.Nodreg(&r1, gc.Types[gc.TUINT32], x86.REG_AX) 958 var r2 gc.Node 959 gc.Nodreg(&r2, gc.Types[gc.TUINT32], x86.REG_CX) 960 gins(x86.AMOVL, &flo, &r1) 961 gins(x86.AMOVL, &fhi, &r2) 962 gins(x86.AMOVL, &r1, &tlo) 963 gins(x86.AMOVL, &r2, &thi) 964 } 965 966 splitclean() 967 splitclean() 968 return 969 970 /* 971 * integer up-conversions 972 */ 973 case gc.TINT8<<16 | gc.TINT16, // sign extend int8 974 gc.TINT8<<16 | gc.TUINT16: 975 a = x86.AMOVBWSX 976 977 goto rdst 978 979 case gc.TINT8<<16 | gc.TINT32, 980 gc.TINT8<<16 | gc.TUINT32: 981 a = x86.AMOVBLSX 982 goto rdst 983 984 case gc.TINT8<<16 | gc.TINT64, // convert via int32 985 gc.TINT8<<16 | gc.TUINT64: 986 cvt = gc.Types[gc.TINT32] 987 988 goto hard 989 990 case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8 991 gc.TUINT8<<16 | gc.TUINT16: 992 a = x86.AMOVBWZX 993 994 goto rdst 995 996 case gc.TUINT8<<16 | gc.TINT32, 997 gc.TUINT8<<16 | gc.TUINT32: 998 a = x86.AMOVBLZX 999 goto rdst 1000 1001 case gc.TUINT8<<16 | gc.TINT64, // convert via uint32 1002 gc.TUINT8<<16 | gc.TUINT64: 1003 cvt = gc.Types[gc.TUINT32] 1004 1005 goto hard 1006 1007 case gc.TINT16<<16 | gc.TINT32, // sign extend int16 1008 gc.TINT16<<16 | gc.TUINT32: 1009 a = x86.AMOVWLSX 1010 1011 goto rdst 1012 1013 case gc.TINT16<<16 | gc.TINT64, // convert via int32 1014 gc.TINT16<<16 | gc.TUINT64: 1015 cvt = gc.Types[gc.TINT32] 1016 1017 goto hard 1018 1019 case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16 1020 gc.TUINT16<<16 | gc.TUINT32: 1021 a = x86.AMOVWLZX 1022 1023 goto rdst 1024 1025 case gc.TUINT16<<16 | gc.TINT64, // convert via uint32 1026 gc.TUINT16<<16 | gc.TUINT64: 1027 cvt = gc.Types[gc.TUINT32] 1028 1029 goto hard 1030 1031 case gc.TINT32<<16 | gc.TINT64, // sign extend int32 1032 gc.TINT32<<16 | gc.TUINT64: 1033 var thi gc.Node 1034 var tlo gc.Node 1035 split64(t, &tlo, &thi) 1036 1037 var flo gc.Node 1038 gc.Nodreg(&flo, tlo.Type, x86.REG_AX) 1039 var fhi gc.Node 1040 gc.Nodreg(&fhi, thi.Type, x86.REG_DX) 1041 gmove(f, &flo) 1042 gins(x86.ACDQ, nil, nil) 1043 gins(x86.AMOVL, &flo, &tlo) 1044 gins(x86.AMOVL, &fhi, &thi) 1045 splitclean() 1046 return 1047 1048 case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32 1049 gc.TUINT32<<16 | gc.TUINT64: 1050 var tlo gc.Node 1051 var thi gc.Node 1052 split64(t, &tlo, &thi) 1053 1054 gmove(f, &tlo) 1055 gins(x86.AMOVL, ncon(0), &thi) 1056 splitclean() 1057 return 1058 } 1059 1060 gins(a, f, t) 1061 return 1062 1063 // requires register source 1064 rsrc: 1065 gc.Regalloc(&r1, f.Type, t) 1066 1067 gmove(f, &r1) 1068 gins(a, &r1, t) 1069 gc.Regfree(&r1) 1070 return 1071 1072 // requires register destination 1073 rdst: 1074 { 1075 gc.Regalloc(&r1, t.Type, t) 1076 1077 gins(a, f, &r1) 1078 gmove(&r1, t) 1079 gc.Regfree(&r1) 1080 return 1081 } 1082 1083 // requires register intermediate 1084 hard: 1085 gc.Regalloc(&r1, cvt, t) 1086 1087 gmove(f, &r1) 1088 gmove(&r1, t) 1089 gc.Regfree(&r1) 1090 return 1091 } 1092 1093 func floatmove(f *gc.Node, t *gc.Node) { 1094 var r1 gc.Node 1095 1096 ft := gc.Simsimtype(f.Type) 1097 tt := gc.Simsimtype(t.Type) 1098 cvt := t.Type 1099 1100 // cannot have two floating point memory operands. 1101 if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) { 1102 goto hard 1103 } 1104 1105 // convert constant to desired type 1106 if f.Op == gc.OLITERAL { 1107 var con gc.Node 1108 f.Convconst(&con, t.Type) 1109 f = &con 1110 ft = gc.Simsimtype(con.Type) 1111 1112 // some constants can't move directly to memory. 1113 if gc.Ismem(t) { 1114 // float constants come from memory. 1115 if gc.Isfloat[tt] { 1116 goto hard 1117 } 1118 } 1119 } 1120 1121 // value -> value copy, only one memory operand. 1122 // figure out the instruction to use. 1123 // break out of switch for one-instruction gins. 1124 // goto rdst for "destination must be register". 1125 // goto hard for "convert to cvt type first". 1126 // otherwise handle and return. 1127 1128 switch uint32(ft)<<16 | uint32(tt) { 1129 default: 1130 if gc.Thearch.Use387 { 1131 floatmove_387(f, t) 1132 } else { 1133 floatmove_sse(f, t) 1134 } 1135 return 1136 1137 // float to very long integer. 1138 case gc.TFLOAT32<<16 | gc.TINT64, 1139 gc.TFLOAT64<<16 | gc.TINT64: 1140 if f.Op == gc.OREGISTER { 1141 cvt = f.Type 1142 goto hardmem 1143 } 1144 1145 var r1 gc.Node 1146 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) 1147 if ft == gc.TFLOAT32 { 1148 gins(x86.AFMOVF, f, &r1) 1149 } else { 1150 gins(x86.AFMOVD, f, &r1) 1151 } 1152 1153 // set round to zero mode during conversion 1154 var t1 gc.Node 1155 memname(&t1, gc.Types[gc.TUINT16]) 1156 1157 var t2 gc.Node 1158 memname(&t2, gc.Types[gc.TUINT16]) 1159 gins(x86.AFSTCW, nil, &t1) 1160 gins(x86.AMOVW, ncon(0xf7f), &t2) 1161 gins(x86.AFLDCW, &t2, nil) 1162 if tt == gc.TINT16 { 1163 gins(x86.AFMOVWP, &r1, t) 1164 } else if tt == gc.TINT32 { 1165 gins(x86.AFMOVLP, &r1, t) 1166 } else { 1167 gins(x86.AFMOVVP, &r1, t) 1168 } 1169 gins(x86.AFLDCW, &t1, nil) 1170 return 1171 1172 case gc.TFLOAT32<<16 | gc.TUINT64, 1173 gc.TFLOAT64<<16 | gc.TUINT64: 1174 if !gc.Ismem(f) { 1175 cvt = f.Type 1176 goto hardmem 1177 } 1178 1179 bignodes() 1180 var f0 gc.Node 1181 gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0) 1182 var f1 gc.Node 1183 gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1) 1184 var ax gc.Node 1185 gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX) 1186 1187 if ft == gc.TFLOAT32 { 1188 gins(x86.AFMOVF, f, &f0) 1189 } else { 1190 gins(x86.AFMOVD, f, &f0) 1191 } 1192 1193 // if 0 > v { answer = 0 } 1194 gins(x86.AFMOVD, &zerof, &f0) 1195 1196 gins(x86.AFUCOMIP, &f0, &f1) 1197 p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) 1198 1199 // if 1<<64 <= v { answer = 0 too } 1200 gins(x86.AFMOVD, &two64f, &f0) 1201 1202 gins(x86.AFUCOMIP, &f0, &f1) 1203 p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) 1204 gc.Patch(p1, gc.Pc) 1205 gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack 1206 var thi gc.Node 1207 var tlo gc.Node 1208 split64(t, &tlo, &thi) 1209 gins(x86.AMOVL, ncon(0), &tlo) 1210 gins(x86.AMOVL, ncon(0), &thi) 1211 splitclean() 1212 p1 = gc.Gbranch(obj.AJMP, nil, 0) 1213 gc.Patch(p2, gc.Pc) 1214 1215 // in range; algorithm is: 1216 // if small enough, use native float64 -> int64 conversion. 1217 // otherwise, subtract 2^63, convert, and add it back. 1218 1219 // set round to zero mode during conversion 1220 var t1 gc.Node 1221 memname(&t1, gc.Types[gc.TUINT16]) 1222 1223 var t2 gc.Node 1224 memname(&t2, gc.Types[gc.TUINT16]) 1225 gins(x86.AFSTCW, nil, &t1) 1226 gins(x86.AMOVW, ncon(0xf7f), &t2) 1227 gins(x86.AFLDCW, &t2, nil) 1228 1229 // actual work 1230 gins(x86.AFMOVD, &two63f, &f0) 1231 1232 gins(x86.AFUCOMIP, &f0, &f1) 1233 p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0) 1234 gins(x86.AFMOVVP, &f0, t) 1235 p3 := gc.Gbranch(obj.AJMP, nil, 0) 1236 gc.Patch(p2, gc.Pc) 1237 gins(x86.AFMOVD, &two63f, &f0) 1238 gins(x86.AFSUBDP, &f0, &f1) 1239 gins(x86.AFMOVVP, &f0, t) 1240 split64(t, &tlo, &thi) 1241 gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63 1242 gc.Patch(p3, gc.Pc) 1243 splitclean() 1244 1245 // restore rounding mode 1246 gins(x86.AFLDCW, &t1, nil) 1247 1248 gc.Patch(p1, gc.Pc) 1249 return 1250 1251 /* 1252 * integer to float 1253 */ 1254 case gc.TINT64<<16 | gc.TFLOAT32, 1255 gc.TINT64<<16 | gc.TFLOAT64: 1256 if t.Op == gc.OREGISTER { 1257 goto hardmem 1258 } 1259 var f0 gc.Node 1260 gc.Nodreg(&f0, t.Type, x86.REG_F0) 1261 gins(x86.AFMOVV, f, &f0) 1262 if tt == gc.TFLOAT32 { 1263 gins(x86.AFMOVFP, &f0, t) 1264 } else { 1265 gins(x86.AFMOVDP, &f0, t) 1266 } 1267 return 1268 1269 // algorithm is: 1270 // if small enough, use native int64 -> float64 conversion. 1271 // otherwise, halve (rounding to odd?), convert, and double. 1272 case gc.TUINT64<<16 | gc.TFLOAT32, 1273 gc.TUINT64<<16 | gc.TFLOAT64: 1274 var ax gc.Node 1275 gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX) 1276 1277 var dx gc.Node 1278 gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX) 1279 var cx gc.Node 1280 gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX) 1281 var t1 gc.Node 1282 gc.Tempname(&t1, f.Type) 1283 var tlo gc.Node 1284 var thi gc.Node 1285 split64(&t1, &tlo, &thi) 1286 gmove(f, &t1) 1287 gins(x86.ACMPL, &thi, ncon(0)) 1288 p1 := gc.Gbranch(x86.AJLT, nil, 0) 1289 1290 // native 1291 var r1 gc.Node 1292 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) 1293 1294 gins(x86.AFMOVV, &t1, &r1) 1295 if tt == gc.TFLOAT32 { 1296 gins(x86.AFMOVFP, &r1, t) 1297 } else { 1298 gins(x86.AFMOVDP, &r1, t) 1299 } 1300 p2 := gc.Gbranch(obj.AJMP, nil, 0) 1301 1302 // simulated 1303 gc.Patch(p1, gc.Pc) 1304 1305 gmove(&tlo, &ax) 1306 gmove(&thi, &dx) 1307 p1 = gins(x86.ASHRL, ncon(1), &ax) 1308 p1.From.Index = x86.REG_DX // double-width shift DX -> AX 1309 p1.From.Scale = 0 1310 gins(x86.AMOVL, ncon(0), &cx) 1311 gins(x86.ASETCC, nil, &cx) 1312 gins(x86.AORL, &cx, &ax) 1313 gins(x86.ASHRL, ncon(1), &dx) 1314 gmove(&dx, &thi) 1315 gmove(&ax, &tlo) 1316 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) 1317 var r2 gc.Node 1318 gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1) 1319 gins(x86.AFMOVV, &t1, &r1) 1320 gins(x86.AFMOVD, &r1, &r1) 1321 gins(x86.AFADDDP, &r1, &r2) 1322 if tt == gc.TFLOAT32 { 1323 gins(x86.AFMOVFP, &r1, t) 1324 } else { 1325 gins(x86.AFMOVDP, &r1, t) 1326 } 1327 gc.Patch(p2, gc.Pc) 1328 splitclean() 1329 return 1330 } 1331 1332 // requires register intermediate 1333 hard: 1334 gc.Regalloc(&r1, cvt, t) 1335 1336 gmove(f, &r1) 1337 gmove(&r1, t) 1338 gc.Regfree(&r1) 1339 return 1340 1341 // requires memory intermediate 1342 hardmem: 1343 gc.Tempname(&r1, cvt) 1344 1345 gmove(f, &r1) 1346 gmove(&r1, t) 1347 return 1348 } 1349 1350 func floatmove_387(f *gc.Node, t *gc.Node) { 1351 var r1 gc.Node 1352 var a int 1353 1354 ft := gc.Simsimtype(f.Type) 1355 tt := gc.Simsimtype(t.Type) 1356 cvt := t.Type 1357 1358 switch uint32(ft)<<16 | uint32(tt) { 1359 default: 1360 goto fatal 1361 1362 /* 1363 * float to integer 1364 */ 1365 case gc.TFLOAT32<<16 | gc.TINT16, 1366 gc.TFLOAT32<<16 | gc.TINT32, 1367 gc.TFLOAT32<<16 | gc.TINT64, 1368 gc.TFLOAT64<<16 | gc.TINT16, 1369 gc.TFLOAT64<<16 | gc.TINT32, 1370 gc.TFLOAT64<<16 | gc.TINT64: 1371 if t.Op == gc.OREGISTER { 1372 goto hardmem 1373 } 1374 var r1 gc.Node 1375 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) 1376 if f.Op != gc.OREGISTER { 1377 if ft == gc.TFLOAT32 { 1378 gins(x86.AFMOVF, f, &r1) 1379 } else { 1380 gins(x86.AFMOVD, f, &r1) 1381 } 1382 } 1383 1384 // set round to zero mode during conversion 1385 var t1 gc.Node 1386 memname(&t1, gc.Types[gc.TUINT16]) 1387 1388 var t2 gc.Node 1389 memname(&t2, gc.Types[gc.TUINT16]) 1390 gins(x86.AFSTCW, nil, &t1) 1391 gins(x86.AMOVW, ncon(0xf7f), &t2) 1392 gins(x86.AFLDCW, &t2, nil) 1393 if tt == gc.TINT16 { 1394 gins(x86.AFMOVWP, &r1, t) 1395 } else if tt == gc.TINT32 { 1396 gins(x86.AFMOVLP, &r1, t) 1397 } else { 1398 gins(x86.AFMOVVP, &r1, t) 1399 } 1400 gins(x86.AFLDCW, &t1, nil) 1401 return 1402 1403 // convert via int32. 1404 case gc.TFLOAT32<<16 | gc.TINT8, 1405 gc.TFLOAT32<<16 | gc.TUINT16, 1406 gc.TFLOAT32<<16 | gc.TUINT8, 1407 gc.TFLOAT64<<16 | gc.TINT8, 1408 gc.TFLOAT64<<16 | gc.TUINT16, 1409 gc.TFLOAT64<<16 | gc.TUINT8: 1410 var t1 gc.Node 1411 gc.Tempname(&t1, gc.Types[gc.TINT32]) 1412 1413 gmove(f, &t1) 1414 switch tt { 1415 default: 1416 gc.Fatalf("gmove %v", t) 1417 1418 case gc.TINT8: 1419 gins(x86.ACMPL, &t1, ncon(-0x80&(1<<32-1))) 1420 p1 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TINT32]), nil, -1) 1421 gins(x86.ACMPL, &t1, ncon(0x7f)) 1422 p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TINT32]), nil, -1) 1423 p3 := gc.Gbranch(obj.AJMP, nil, 0) 1424 gc.Patch(p1, gc.Pc) 1425 gc.Patch(p2, gc.Pc) 1426 gmove(ncon(-0x80&(1<<32-1)), &t1) 1427 gc.Patch(p3, gc.Pc) 1428 gmove(&t1, t) 1429 1430 case gc.TUINT8: 1431 gins(x86.ATESTL, ncon(0xffffff00), &t1) 1432 p1 := gc.Gbranch(x86.AJEQ, nil, +1) 1433 gins(x86.AMOVL, ncon(0), &t1) 1434 gc.Patch(p1, gc.Pc) 1435 gmove(&t1, t) 1436 1437 case gc.TUINT16: 1438 gins(x86.ATESTL, ncon(0xffff0000), &t1) 1439 p1 := gc.Gbranch(x86.AJEQ, nil, +1) 1440 gins(x86.AMOVL, ncon(0), &t1) 1441 gc.Patch(p1, gc.Pc) 1442 gmove(&t1, t) 1443 } 1444 1445 return 1446 1447 // convert via int64. 1448 case gc.TFLOAT32<<16 | gc.TUINT32, 1449 gc.TFLOAT64<<16 | gc.TUINT32: 1450 cvt = gc.Types[gc.TINT64] 1451 1452 goto hardmem 1453 1454 /* 1455 * integer to float 1456 */ 1457 case gc.TINT16<<16 | gc.TFLOAT32, 1458 gc.TINT16<<16 | gc.TFLOAT64, 1459 gc.TINT32<<16 | gc.TFLOAT32, 1460 gc.TINT32<<16 | gc.TFLOAT64, 1461 gc.TINT64<<16 | gc.TFLOAT32, 1462 gc.TINT64<<16 | gc.TFLOAT64: 1463 if t.Op != gc.OREGISTER { 1464 goto hard 1465 } 1466 if f.Op == gc.OREGISTER { 1467 cvt = f.Type 1468 goto hardmem 1469 } 1470 1471 switch ft { 1472 case gc.TINT16: 1473 a = x86.AFMOVW 1474 1475 case gc.TINT32: 1476 a = x86.AFMOVL 1477 1478 default: 1479 a = x86.AFMOVV 1480 } 1481 1482 // convert via int32 memory 1483 case gc.TINT8<<16 | gc.TFLOAT32, 1484 gc.TINT8<<16 | gc.TFLOAT64, 1485 gc.TUINT16<<16 | gc.TFLOAT32, 1486 gc.TUINT16<<16 | gc.TFLOAT64, 1487 gc.TUINT8<<16 | gc.TFLOAT32, 1488 gc.TUINT8<<16 | gc.TFLOAT64: 1489 cvt = gc.Types[gc.TINT32] 1490 1491 goto hardmem 1492 1493 // convert via int64 memory 1494 case gc.TUINT32<<16 | gc.TFLOAT32, 1495 gc.TUINT32<<16 | gc.TFLOAT64: 1496 cvt = gc.Types[gc.TINT64] 1497 1498 goto hardmem 1499 1500 // The way the code generator uses floating-point 1501 // registers, a move from F0 to F0 is intended as a no-op. 1502 // On the x86, it's not: it pushes a second copy of F0 1503 // on the floating point stack. So toss it away here. 1504 // Also, F0 is the *only* register we ever evaluate 1505 // into, so we should only see register/register as F0/F0. 1506 /* 1507 * float to float 1508 */ 1509 case gc.TFLOAT32<<16 | gc.TFLOAT32, 1510 gc.TFLOAT64<<16 | gc.TFLOAT64: 1511 if gc.Ismem(f) && gc.Ismem(t) { 1512 goto hard 1513 } 1514 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1515 if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { 1516 goto fatal 1517 } 1518 return 1519 } 1520 1521 a = x86.AFMOVF 1522 if ft == gc.TFLOAT64 { 1523 a = x86.AFMOVD 1524 } 1525 if gc.Ismem(t) { 1526 if f.Op != gc.OREGISTER || f.Reg != x86.REG_F0 { 1527 gc.Fatalf("gmove %v", f) 1528 } 1529 a = x86.AFMOVFP 1530 if ft == gc.TFLOAT64 { 1531 a = x86.AFMOVDP 1532 } 1533 } 1534 1535 case gc.TFLOAT32<<16 | gc.TFLOAT64: 1536 if gc.Ismem(f) && gc.Ismem(t) { 1537 goto hard 1538 } 1539 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1540 if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { 1541 goto fatal 1542 } 1543 return 1544 } 1545 1546 if f.Op == gc.OREGISTER { 1547 gins(x86.AFMOVDP, f, t) 1548 } else { 1549 gins(x86.AFMOVF, f, t) 1550 } 1551 return 1552 1553 case gc.TFLOAT64<<16 | gc.TFLOAT32: 1554 if gc.Ismem(f) && gc.Ismem(t) { 1555 goto hard 1556 } 1557 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1558 var r1 gc.Node 1559 gc.Tempname(&r1, gc.Types[gc.TFLOAT32]) 1560 gins(x86.AFMOVFP, f, &r1) 1561 gins(x86.AFMOVF, &r1, t) 1562 return 1563 } 1564 1565 if f.Op == gc.OREGISTER { 1566 gins(x86.AFMOVFP, f, t) 1567 } else { 1568 gins(x86.AFMOVD, f, t) 1569 } 1570 return 1571 } 1572 1573 gins(a, f, t) 1574 return 1575 1576 // requires register intermediate 1577 hard: 1578 gc.Regalloc(&r1, cvt, t) 1579 1580 gmove(f, &r1) 1581 gmove(&r1, t) 1582 gc.Regfree(&r1) 1583 return 1584 1585 // requires memory intermediate 1586 hardmem: 1587 gc.Tempname(&r1, cvt) 1588 1589 gmove(f, &r1) 1590 gmove(&r1, t) 1591 return 1592 1593 // should not happen 1594 fatal: 1595 gc.Fatalf("gmove %v -> %v", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong)) 1596 1597 return 1598 } 1599 1600 func floatmove_sse(f *gc.Node, t *gc.Node) { 1601 var r1 gc.Node 1602 var cvt *gc.Type 1603 var a int 1604 1605 ft := gc.Simsimtype(f.Type) 1606 tt := gc.Simsimtype(t.Type) 1607 1608 switch uint32(ft)<<16 | uint32(tt) { 1609 // should not happen 1610 default: 1611 gc.Fatalf("gmove %v -> %v", f, t) 1612 1613 return 1614 1615 // convert via int32. 1616 /* 1617 * float to integer 1618 */ 1619 case gc.TFLOAT32<<16 | gc.TINT16, 1620 gc.TFLOAT32<<16 | gc.TINT8, 1621 gc.TFLOAT32<<16 | gc.TUINT16, 1622 gc.TFLOAT32<<16 | gc.TUINT8, 1623 gc.TFLOAT64<<16 | gc.TINT16, 1624 gc.TFLOAT64<<16 | gc.TINT8, 1625 gc.TFLOAT64<<16 | gc.TUINT16, 1626 gc.TFLOAT64<<16 | gc.TUINT8: 1627 cvt = gc.Types[gc.TINT32] 1628 1629 goto hard 1630 1631 // convert via int64. 1632 case gc.TFLOAT32<<16 | gc.TUINT32, 1633 gc.TFLOAT64<<16 | gc.TUINT32: 1634 cvt = gc.Types[gc.TINT64] 1635 1636 goto hardmem 1637 1638 case gc.TFLOAT32<<16 | gc.TINT32: 1639 a = x86.ACVTTSS2SL 1640 goto rdst 1641 1642 case gc.TFLOAT64<<16 | gc.TINT32: 1643 a = x86.ACVTTSD2SL 1644 goto rdst 1645 1646 // convert via int32 memory 1647 /* 1648 * integer to float 1649 */ 1650 case gc.TINT8<<16 | gc.TFLOAT32, 1651 gc.TINT8<<16 | gc.TFLOAT64, 1652 gc.TINT16<<16 | gc.TFLOAT32, 1653 gc.TINT16<<16 | gc.TFLOAT64, 1654 gc.TUINT16<<16 | gc.TFLOAT32, 1655 gc.TUINT16<<16 | gc.TFLOAT64, 1656 gc.TUINT8<<16 | gc.TFLOAT32, 1657 gc.TUINT8<<16 | gc.TFLOAT64: 1658 cvt = gc.Types[gc.TINT32] 1659 1660 goto hard 1661 1662 // convert via int64 memory 1663 case gc.TUINT32<<16 | gc.TFLOAT32, 1664 gc.TUINT32<<16 | gc.TFLOAT64: 1665 cvt = gc.Types[gc.TINT64] 1666 1667 goto hardmem 1668 1669 case gc.TINT32<<16 | gc.TFLOAT32: 1670 a = x86.ACVTSL2SS 1671 goto rdst 1672 1673 case gc.TINT32<<16 | gc.TFLOAT64: 1674 a = x86.ACVTSL2SD 1675 goto rdst 1676 1677 /* 1678 * float to float 1679 */ 1680 case gc.TFLOAT32<<16 | gc.TFLOAT32: 1681 a = x86.AMOVSS 1682 1683 case gc.TFLOAT64<<16 | gc.TFLOAT64: 1684 a = x86.AMOVSD 1685 1686 case gc.TFLOAT32<<16 | gc.TFLOAT64: 1687 a = x86.ACVTSS2SD 1688 goto rdst 1689 1690 case gc.TFLOAT64<<16 | gc.TFLOAT32: 1691 a = x86.ACVTSD2SS 1692 goto rdst 1693 } 1694 1695 gins(a, f, t) 1696 return 1697 1698 // requires register intermediate 1699 hard: 1700 gc.Regalloc(&r1, cvt, t) 1701 1702 gmove(f, &r1) 1703 gmove(&r1, t) 1704 gc.Regfree(&r1) 1705 return 1706 1707 // requires memory intermediate 1708 hardmem: 1709 gc.Tempname(&r1, cvt) 1710 1711 gmove(f, &r1) 1712 gmove(&r1, t) 1713 return 1714 1715 // requires register destination 1716 rdst: 1717 gc.Regalloc(&r1, t.Type, t) 1718 1719 gins(a, f, &r1) 1720 gmove(&r1, t) 1721 gc.Regfree(&r1) 1722 return 1723 } 1724 1725 func samaddr(f *gc.Node, t *gc.Node) bool { 1726 if f.Op != t.Op { 1727 return false 1728 } 1729 1730 switch f.Op { 1731 case gc.OREGISTER: 1732 if f.Reg != t.Reg { 1733 break 1734 } 1735 return true 1736 } 1737 1738 return false 1739 } 1740 1741 /* 1742 * generate one instruction: 1743 * as f, t 1744 */ 1745 func gins(as int, f *gc.Node, t *gc.Node) *obj.Prog { 1746 if as == x86.AFMOVF && f != nil && f.Op == gc.OREGISTER && t != nil && t.Op == gc.OREGISTER { 1747 gc.Fatalf("gins MOVF reg, reg") 1748 } 1749 if as == x86.ACVTSD2SS && f != nil && f.Op == gc.OLITERAL { 1750 gc.Fatalf("gins CVTSD2SS const") 1751 } 1752 if as == x86.AMOVSD && t != nil && t.Op == gc.OREGISTER && t.Reg == x86.REG_F0 { 1753 gc.Fatalf("gins MOVSD into F0") 1754 } 1755 1756 if as == x86.AMOVL && f != nil && f.Op == gc.OADDR && f.Left.Op == gc.ONAME && f.Left.Class != gc.PEXTERN && f.Left.Class != gc.PFUNC { 1757 // Turn MOVL $xxx(FP/SP) into LEAL xxx. 1758 // These should be equivalent but most of the backend 1759 // only expects to see LEAL, because that's what we had 1760 // historically generated. Various hidden assumptions are baked in by now. 1761 as = x86.ALEAL 1762 f = f.Left 1763 } 1764 1765 switch as { 1766 case x86.AMOVB, 1767 x86.AMOVW, 1768 x86.AMOVL: 1769 if f != nil && t != nil && samaddr(f, t) { 1770 return nil 1771 } 1772 1773 case x86.ALEAL: 1774 if f != nil && gc.Isconst(f, gc.CTNIL) { 1775 gc.Fatalf("gins LEAL nil %v", f.Type) 1776 } 1777 } 1778 1779 p := gc.Prog(as) 1780 gc.Naddr(&p.From, f) 1781 gc.Naddr(&p.To, t) 1782 1783 if gc.Debug['g'] != 0 { 1784 fmt.Printf("%v\n", p) 1785 } 1786 1787 w := 0 1788 switch as { 1789 case x86.AMOVB: 1790 w = 1 1791 1792 case x86.AMOVW: 1793 w = 2 1794 1795 case x86.AMOVL: 1796 w = 4 1797 } 1798 1799 if true && w != 0 && f != nil && (p.From.Width > int64(w) || p.To.Width > int64(w)) { 1800 gc.Dump("bad width from:", f) 1801 gc.Dump("bad width to:", t) 1802 gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) 1803 } 1804 1805 if p.To.Type == obj.TYPE_ADDR && w > 0 { 1806 gc.Fatalf("bad use of addr: %v", p) 1807 } 1808 1809 return p 1810 } 1811 1812 func ginsnop() { 1813 var reg gc.Node 1814 gc.Nodreg(®, gc.Types[gc.TINT], x86.REG_AX) 1815 gins(x86.AXCHGL, ®, ®) 1816 } 1817 1818 func dotaddable(n *gc.Node, n1 *gc.Node) bool { 1819 if n.Op != gc.ODOT { 1820 return false 1821 } 1822 1823 var oary [10]int64 1824 var nn *gc.Node 1825 o := gc.Dotoffset(n, oary[:], &nn) 1826 if nn != nil && nn.Addable && o == 1 && oary[0] >= 0 { 1827 *n1 = *nn 1828 n1.Type = n.Type 1829 n1.Xoffset += oary[0] 1830 return true 1831 } 1832 1833 return false 1834 } 1835 1836 func sudoclean() { 1837 } 1838 1839 func sudoaddable(as int, n *gc.Node, a *obj.Addr) bool { 1840 *a = obj.Addr{} 1841 return false 1842 }