github.com/gocuntian/go@v0.0.0-20160610041250-fee02d270bf8/src/cmd/compile/internal/x86/gsubr.go (about) 1 // Derived from Inferno utils/8c/txt.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/8c/txt.c 3 // 4 // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6 // Portions Copyright © 1997-1999 Vita Nuova Limited 7 // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright © 2004,2006 Bruce Ellis 9 // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10 // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright © 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "cmd/compile/internal/big" 35 "cmd/compile/internal/gc" 36 "cmd/internal/obj" 37 "cmd/internal/obj/x86" 38 "fmt" 39 ) 40 41 // TODO(rsc): Can make this bigger if we move 42 // the text segment up higher in 8l for all GOOS. 43 // At the same time, can raise StackBig in ../../runtime/stack.h. 44 var unmappedzero uint32 = 4096 45 46 // foptoas flags 47 const ( 48 Frev = 1 << 0 49 Fpop = 1 << 1 50 Fpop2 = 1 << 2 51 ) 52 53 /* 54 * return Axxx for Oxxx on type t. 55 */ 56 func optoas(op gc.Op, t *gc.Type) obj.As { 57 if t == nil { 58 gc.Fatalf("optoas: t is nil") 59 } 60 61 // avoid constant conversions in switches below 62 const ( 63 OMINUS_ = uint32(gc.OMINUS) << 16 64 OLSH_ = uint32(gc.OLSH) << 16 65 ORSH_ = uint32(gc.ORSH) << 16 66 OADD_ = uint32(gc.OADD) << 16 67 OSUB_ = uint32(gc.OSUB) << 16 68 OMUL_ = uint32(gc.OMUL) << 16 69 ODIV_ = uint32(gc.ODIV) << 16 70 OMOD_ = uint32(gc.OMOD) << 16 71 OOR_ = uint32(gc.OOR) << 16 72 OAND_ = uint32(gc.OAND) << 16 73 OXOR_ = uint32(gc.OXOR) << 16 74 OEQ_ = uint32(gc.OEQ) << 16 75 ONE_ = uint32(gc.ONE) << 16 76 OLT_ = uint32(gc.OLT) << 16 77 OLE_ = uint32(gc.OLE) << 16 78 OGE_ = uint32(gc.OGE) << 16 79 OGT_ = uint32(gc.OGT) << 16 80 OCMP_ = uint32(gc.OCMP) << 16 81 OAS_ = uint32(gc.OAS) << 16 82 OHMUL_ = uint32(gc.OHMUL) << 16 83 OADDR_ = uint32(gc.OADDR) << 16 84 OINC_ = uint32(gc.OINC) << 16 85 ODEC_ = uint32(gc.ODEC) << 16 86 OLROT_ = uint32(gc.OLROT) << 16 87 OEXTEND_ = uint32(gc.OEXTEND) << 16 88 OCOM_ = uint32(gc.OCOM) << 16 89 ) 90 91 a := obj.AXXX 92 switch uint32(op)<<16 | uint32(gc.Simtype[t.Etype]) { 93 default: 94 gc.Fatalf("optoas: no entry %v-%v", op, t) 95 96 case OADDR_ | gc.TPTR32: 97 a = x86.ALEAL 98 99 case OEQ_ | gc.TBOOL, 100 OEQ_ | gc.TINT8, 101 OEQ_ | gc.TUINT8, 102 OEQ_ | gc.TINT16, 103 OEQ_ | gc.TUINT16, 104 OEQ_ | gc.TINT32, 105 OEQ_ | gc.TUINT32, 106 OEQ_ | gc.TINT64, 107 OEQ_ | gc.TUINT64, 108 OEQ_ | gc.TPTR32, 109 OEQ_ | gc.TPTR64, 110 OEQ_ | gc.TFLOAT32, 111 OEQ_ | gc.TFLOAT64: 112 a = x86.AJEQ 113 114 case ONE_ | gc.TBOOL, 115 ONE_ | gc.TINT8, 116 ONE_ | gc.TUINT8, 117 ONE_ | gc.TINT16, 118 ONE_ | gc.TUINT16, 119 ONE_ | gc.TINT32, 120 ONE_ | gc.TUINT32, 121 ONE_ | gc.TINT64, 122 ONE_ | gc.TUINT64, 123 ONE_ | gc.TPTR32, 124 ONE_ | gc.TPTR64, 125 ONE_ | gc.TFLOAT32, 126 ONE_ | gc.TFLOAT64: 127 a = x86.AJNE 128 129 case OLT_ | gc.TINT8, 130 OLT_ | gc.TINT16, 131 OLT_ | gc.TINT32, 132 OLT_ | gc.TINT64: 133 a = x86.AJLT 134 135 case OLT_ | gc.TUINT8, 136 OLT_ | gc.TUINT16, 137 OLT_ | gc.TUINT32, 138 OLT_ | gc.TUINT64: 139 a = x86.AJCS 140 141 case OLE_ | gc.TINT8, 142 OLE_ | gc.TINT16, 143 OLE_ | gc.TINT32, 144 OLE_ | gc.TINT64: 145 a = x86.AJLE 146 147 case OLE_ | gc.TUINT8, 148 OLE_ | gc.TUINT16, 149 OLE_ | gc.TUINT32, 150 OLE_ | gc.TUINT64: 151 a = x86.AJLS 152 153 case OGT_ | gc.TINT8, 154 OGT_ | gc.TINT16, 155 OGT_ | gc.TINT32, 156 OGT_ | gc.TINT64: 157 a = x86.AJGT 158 159 case OGT_ | gc.TUINT8, 160 OGT_ | gc.TUINT16, 161 OGT_ | gc.TUINT32, 162 OGT_ | gc.TUINT64, 163 OLT_ | gc.TFLOAT32, 164 OLT_ | gc.TFLOAT64: 165 a = x86.AJHI 166 167 case OGE_ | gc.TINT8, 168 OGE_ | gc.TINT16, 169 OGE_ | gc.TINT32, 170 OGE_ | gc.TINT64: 171 a = x86.AJGE 172 173 case OGE_ | gc.TUINT8, 174 OGE_ | gc.TUINT16, 175 OGE_ | gc.TUINT32, 176 OGE_ | gc.TUINT64, 177 OLE_ | gc.TFLOAT32, 178 OLE_ | gc.TFLOAT64: 179 a = x86.AJCC 180 181 case OCMP_ | gc.TBOOL, 182 OCMP_ | gc.TINT8, 183 OCMP_ | gc.TUINT8: 184 a = x86.ACMPB 185 186 case OCMP_ | gc.TINT16, 187 OCMP_ | gc.TUINT16: 188 a = x86.ACMPW 189 190 case OCMP_ | gc.TINT32, 191 OCMP_ | gc.TUINT32, 192 OCMP_ | gc.TPTR32: 193 a = x86.ACMPL 194 195 case OAS_ | gc.TBOOL, 196 OAS_ | gc.TINT8, 197 OAS_ | gc.TUINT8: 198 a = x86.AMOVB 199 200 case OAS_ | gc.TINT16, 201 OAS_ | gc.TUINT16: 202 a = x86.AMOVW 203 204 case OAS_ | gc.TINT32, 205 OAS_ | gc.TUINT32, 206 OAS_ | gc.TPTR32: 207 a = x86.AMOVL 208 209 case OAS_ | gc.TFLOAT32: 210 a = x86.AMOVSS 211 212 case OAS_ | gc.TFLOAT64: 213 a = x86.AMOVSD 214 215 case OADD_ | gc.TINT8, 216 OADD_ | gc.TUINT8: 217 a = x86.AADDB 218 219 case OADD_ | gc.TINT16, 220 OADD_ | gc.TUINT16: 221 a = x86.AADDW 222 223 case OADD_ | gc.TINT32, 224 OADD_ | gc.TUINT32, 225 OADD_ | gc.TPTR32: 226 a = x86.AADDL 227 228 case OSUB_ | gc.TINT8, 229 OSUB_ | gc.TUINT8: 230 a = x86.ASUBB 231 232 case OSUB_ | gc.TINT16, 233 OSUB_ | gc.TUINT16: 234 a = x86.ASUBW 235 236 case OSUB_ | gc.TINT32, 237 OSUB_ | gc.TUINT32, 238 OSUB_ | gc.TPTR32: 239 a = x86.ASUBL 240 241 case OINC_ | gc.TINT8, 242 OINC_ | gc.TUINT8: 243 a = x86.AINCB 244 245 case OINC_ | gc.TINT16, 246 OINC_ | gc.TUINT16: 247 a = x86.AINCW 248 249 case OINC_ | gc.TINT32, 250 OINC_ | gc.TUINT32, 251 OINC_ | gc.TPTR32: 252 a = x86.AINCL 253 254 case ODEC_ | gc.TINT8, 255 ODEC_ | gc.TUINT8: 256 a = x86.ADECB 257 258 case ODEC_ | gc.TINT16, 259 ODEC_ | gc.TUINT16: 260 a = x86.ADECW 261 262 case ODEC_ | gc.TINT32, 263 ODEC_ | gc.TUINT32, 264 ODEC_ | gc.TPTR32: 265 a = x86.ADECL 266 267 case OCOM_ | gc.TINT8, 268 OCOM_ | gc.TUINT8: 269 a = x86.ANOTB 270 271 case OCOM_ | gc.TINT16, 272 OCOM_ | gc.TUINT16: 273 a = x86.ANOTW 274 275 case OCOM_ | gc.TINT32, 276 OCOM_ | gc.TUINT32, 277 OCOM_ | gc.TPTR32: 278 a = x86.ANOTL 279 280 case OMINUS_ | gc.TINT8, 281 OMINUS_ | gc.TUINT8: 282 a = x86.ANEGB 283 284 case OMINUS_ | gc.TINT16, 285 OMINUS_ | gc.TUINT16: 286 a = x86.ANEGW 287 288 case OMINUS_ | gc.TINT32, 289 OMINUS_ | gc.TUINT32, 290 OMINUS_ | gc.TPTR32: 291 a = x86.ANEGL 292 293 case OAND_ | gc.TINT8, 294 OAND_ | gc.TUINT8: 295 a = x86.AANDB 296 297 case OAND_ | gc.TINT16, 298 OAND_ | gc.TUINT16: 299 a = x86.AANDW 300 301 case OAND_ | gc.TINT32, 302 OAND_ | gc.TUINT32, 303 OAND_ | gc.TPTR32: 304 a = x86.AANDL 305 306 case OOR_ | gc.TINT8, 307 OOR_ | gc.TUINT8: 308 a = x86.AORB 309 310 case OOR_ | gc.TINT16, 311 OOR_ | gc.TUINT16: 312 a = x86.AORW 313 314 case OOR_ | gc.TINT32, 315 OOR_ | gc.TUINT32, 316 OOR_ | gc.TPTR32: 317 a = x86.AORL 318 319 case OXOR_ | gc.TINT8, 320 OXOR_ | gc.TUINT8: 321 a = x86.AXORB 322 323 case OXOR_ | gc.TINT16, 324 OXOR_ | gc.TUINT16: 325 a = x86.AXORW 326 327 case OXOR_ | gc.TINT32, 328 OXOR_ | gc.TUINT32, 329 OXOR_ | gc.TPTR32: 330 a = x86.AXORL 331 332 case OLROT_ | gc.TINT8, 333 OLROT_ | gc.TUINT8: 334 a = x86.AROLB 335 336 case OLROT_ | gc.TINT16, 337 OLROT_ | gc.TUINT16: 338 a = x86.AROLW 339 340 case OLROT_ | gc.TINT32, 341 OLROT_ | gc.TUINT32, 342 OLROT_ | gc.TPTR32: 343 a = x86.AROLL 344 345 case OLSH_ | gc.TINT8, 346 OLSH_ | gc.TUINT8: 347 a = x86.ASHLB 348 349 case OLSH_ | gc.TINT16, 350 OLSH_ | gc.TUINT16: 351 a = x86.ASHLW 352 353 case OLSH_ | gc.TINT32, 354 OLSH_ | gc.TUINT32, 355 OLSH_ | gc.TPTR32: 356 a = x86.ASHLL 357 358 case ORSH_ | gc.TUINT8: 359 a = x86.ASHRB 360 361 case ORSH_ | gc.TUINT16: 362 a = x86.ASHRW 363 364 case ORSH_ | gc.TUINT32, 365 ORSH_ | gc.TPTR32: 366 a = x86.ASHRL 367 368 case ORSH_ | gc.TINT8: 369 a = x86.ASARB 370 371 case ORSH_ | gc.TINT16: 372 a = x86.ASARW 373 374 case ORSH_ | gc.TINT32: 375 a = x86.ASARL 376 377 case OHMUL_ | gc.TINT8, 378 OMUL_ | gc.TINT8, 379 OMUL_ | gc.TUINT8: 380 a = x86.AIMULB 381 382 case OHMUL_ | gc.TINT16, 383 OMUL_ | gc.TINT16, 384 OMUL_ | gc.TUINT16: 385 a = x86.AIMULW 386 387 case OHMUL_ | gc.TINT32, 388 OMUL_ | gc.TINT32, 389 OMUL_ | gc.TUINT32, 390 OMUL_ | gc.TPTR32: 391 a = x86.AIMULL 392 393 case OHMUL_ | gc.TUINT8: 394 a = x86.AMULB 395 396 case OHMUL_ | gc.TUINT16: 397 a = x86.AMULW 398 399 case OHMUL_ | gc.TUINT32, 400 OHMUL_ | gc.TPTR32: 401 a = x86.AMULL 402 403 case ODIV_ | gc.TINT8, 404 OMOD_ | gc.TINT8: 405 a = x86.AIDIVB 406 407 case ODIV_ | gc.TUINT8, 408 OMOD_ | gc.TUINT8: 409 a = x86.ADIVB 410 411 case ODIV_ | gc.TINT16, 412 OMOD_ | gc.TINT16: 413 a = x86.AIDIVW 414 415 case ODIV_ | gc.TUINT16, 416 OMOD_ | gc.TUINT16: 417 a = x86.ADIVW 418 419 case ODIV_ | gc.TINT32, 420 OMOD_ | gc.TINT32: 421 a = x86.AIDIVL 422 423 case ODIV_ | gc.TUINT32, 424 ODIV_ | gc.TPTR32, 425 OMOD_ | gc.TUINT32, 426 OMOD_ | gc.TPTR32: 427 a = x86.ADIVL 428 429 case OEXTEND_ | gc.TINT16: 430 a = x86.ACWD 431 432 case OEXTEND_ | gc.TINT32: 433 a = x86.ACDQ 434 } 435 436 return a 437 } 438 439 func foptoas(op gc.Op, t *gc.Type, flg int) obj.As { 440 a := obj.AXXX 441 et := gc.Simtype[t.Etype] 442 443 // avoid constant conversions in switches below 444 const ( 445 OCMP_ = uint32(gc.OCMP) << 16 446 OAS_ = uint32(gc.OAS) << 16 447 OADD_ = uint32(gc.OADD) << 16 448 OSUB_ = uint32(gc.OSUB) << 16 449 OMUL_ = uint32(gc.OMUL) << 16 450 ODIV_ = uint32(gc.ODIV) << 16 451 OMINUS_ = uint32(gc.OMINUS) << 16 452 ) 453 454 if !gc.Thearch.Use387 { 455 switch uint32(op)<<16 | uint32(et) { 456 default: 457 gc.Fatalf("foptoas-sse: no entry %v-%v", op, t) 458 459 case OCMP_ | gc.TFLOAT32: 460 a = x86.AUCOMISS 461 462 case OCMP_ | gc.TFLOAT64: 463 a = x86.AUCOMISD 464 465 case OAS_ | gc.TFLOAT32: 466 a = x86.AMOVSS 467 468 case OAS_ | gc.TFLOAT64: 469 a = x86.AMOVSD 470 471 case OADD_ | gc.TFLOAT32: 472 a = x86.AADDSS 473 474 case OADD_ | gc.TFLOAT64: 475 a = x86.AADDSD 476 477 case OSUB_ | gc.TFLOAT32: 478 a = x86.ASUBSS 479 480 case OSUB_ | gc.TFLOAT64: 481 a = x86.ASUBSD 482 483 case OMUL_ | gc.TFLOAT32: 484 a = x86.AMULSS 485 486 case OMUL_ | gc.TFLOAT64: 487 a = x86.AMULSD 488 489 case ODIV_ | gc.TFLOAT32: 490 a = x86.ADIVSS 491 492 case ODIV_ | gc.TFLOAT64: 493 a = x86.ADIVSD 494 } 495 496 return a 497 } 498 499 // If we need Fpop, it means we're working on 500 // two different floating-point registers, not memory. 501 // There the instruction only has a float64 form. 502 if flg&Fpop != 0 { 503 et = gc.TFLOAT64 504 } 505 506 // clear Frev if unneeded 507 switch op { 508 case gc.OADD, 509 gc.OMUL: 510 flg &^= Frev 511 } 512 513 switch uint32(op)<<16 | (uint32(et)<<8 | uint32(flg)) { 514 case OADD_ | (gc.TFLOAT32<<8 | 0): 515 return x86.AFADDF 516 517 case OADD_ | (gc.TFLOAT64<<8 | 0): 518 return x86.AFADDD 519 520 case OADD_ | (gc.TFLOAT64<<8 | Fpop): 521 return x86.AFADDDP 522 523 case OSUB_ | (gc.TFLOAT32<<8 | 0): 524 return x86.AFSUBF 525 526 case OSUB_ | (gc.TFLOAT32<<8 | Frev): 527 return x86.AFSUBRF 528 529 case OSUB_ | (gc.TFLOAT64<<8 | 0): 530 return x86.AFSUBD 531 532 case OSUB_ | (gc.TFLOAT64<<8 | Frev): 533 return x86.AFSUBRD 534 535 case OSUB_ | (gc.TFLOAT64<<8 | Fpop): 536 return x86.AFSUBDP 537 538 case OSUB_ | (gc.TFLOAT64<<8 | (Fpop | Frev)): 539 return x86.AFSUBRDP 540 541 case OMUL_ | (gc.TFLOAT32<<8 | 0): 542 return x86.AFMULF 543 544 case OMUL_ | (gc.TFLOAT64<<8 | 0): 545 return x86.AFMULD 546 547 case OMUL_ | (gc.TFLOAT64<<8 | Fpop): 548 return x86.AFMULDP 549 550 case ODIV_ | (gc.TFLOAT32<<8 | 0): 551 return x86.AFDIVF 552 553 case ODIV_ | (gc.TFLOAT32<<8 | Frev): 554 return x86.AFDIVRF 555 556 case ODIV_ | (gc.TFLOAT64<<8 | 0): 557 return x86.AFDIVD 558 559 case ODIV_ | (gc.TFLOAT64<<8 | Frev): 560 return x86.AFDIVRD 561 562 case ODIV_ | (gc.TFLOAT64<<8 | Fpop): 563 return x86.AFDIVDP 564 565 case ODIV_ | (gc.TFLOAT64<<8 | (Fpop | Frev)): 566 return x86.AFDIVRDP 567 568 case OCMP_ | (gc.TFLOAT32<<8 | 0): 569 return x86.AFCOMF 570 571 case OCMP_ | (gc.TFLOAT32<<8 | Fpop): 572 return x86.AFCOMFP 573 574 case OCMP_ | (gc.TFLOAT64<<8 | 0): 575 return x86.AFCOMD 576 577 case OCMP_ | (gc.TFLOAT64<<8 | Fpop): 578 return x86.AFCOMDP 579 580 case OCMP_ | (gc.TFLOAT64<<8 | Fpop2): 581 return x86.AFCOMDPP 582 583 case OMINUS_ | (gc.TFLOAT32<<8 | 0): 584 return x86.AFCHS 585 586 case OMINUS_ | (gc.TFLOAT64<<8 | 0): 587 return x86.AFCHS 588 } 589 590 gc.Fatalf("foptoas %v %v %#x", op, t, flg) 591 return 0 592 } 593 594 var resvd = []int{ 595 // REG_DI, // for movstring 596 // REG_SI, // for movstring 597 598 x86.REG_AX, // for divide 599 x86.REG_CX, // for shift 600 x86.REG_DX, // for divide, context 601 x86.REG_SP, // for stack 602 } 603 604 /* 605 * generate 606 * as $c, reg 607 */ 608 func gconreg(as obj.As, c int64, reg int) { 609 var n1 gc.Node 610 var n2 gc.Node 611 612 gc.Nodconst(&n1, gc.Types[gc.TINT64], c) 613 gc.Nodreg(&n2, gc.Types[gc.TINT64], reg) 614 gins(as, &n1, &n2) 615 } 616 617 /* 618 * generate 619 * as $c, n 620 */ 621 func ginscon(as obj.As, c int64, n2 *gc.Node) { 622 var n1 gc.Node 623 gc.Nodconst(&n1, gc.Types[gc.TINT32], c) 624 gins(as, &n1, n2) 625 } 626 627 func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog { 628 if t.IsInteger() || t.Etype == gc.Tptr { 629 if (n1.Op == gc.OLITERAL || n1.Op == gc.OADDR && n1.Left.Op == gc.ONAME) && n2.Op != gc.OLITERAL { 630 // Reverse comparison to place constant (including address constant) last. 631 op = gc.Brrev(op) 632 n1, n2 = n2, n1 633 } 634 } 635 636 // General case. 637 var r1, r2, g1, g2 gc.Node 638 639 // A special case to make write barriers more efficient. 640 // Comparing the first field of a named struct can be done directly. 641 base := n1 642 if n1.Op == gc.ODOT && n1.Left.Type.IsStruct() && n1.Left.Type.Field(0).Sym == n1.Sym { 643 base = n1.Left 644 } 645 646 if base.Op == gc.ONAME && base.Class != gc.PAUTOHEAP || n1.Op == gc.OINDREG { 647 r1 = *n1 648 } else { 649 gc.Regalloc(&r1, t, n1) 650 gc.Regalloc(&g1, n1.Type, &r1) 651 gc.Cgen(n1, &g1) 652 gmove(&g1, &r1) 653 } 654 if n2.Op == gc.OLITERAL && t.IsInteger() || n2.Op == gc.OADDR && n2.Left.Op == gc.ONAME && n2.Left.Class == gc.PEXTERN { 655 r2 = *n2 656 } else { 657 gc.Regalloc(&r2, t, n2) 658 gc.Regalloc(&g2, n1.Type, &r2) 659 gc.Cgen(n2, &g2) 660 gmove(&g2, &r2) 661 } 662 gins(optoas(gc.OCMP, t), &r1, &r2) 663 if r1.Op == gc.OREGISTER { 664 gc.Regfree(&g1) 665 gc.Regfree(&r1) 666 } 667 if r2.Op == gc.OREGISTER { 668 gc.Regfree(&g2) 669 gc.Regfree(&r2) 670 } 671 return gc.Gbranch(optoas(op, t), nil, likely) 672 } 673 674 /* 675 * swap node contents 676 */ 677 func nswap(a *gc.Node, b *gc.Node) { 678 t := *a 679 *a = *b 680 *b = t 681 } 682 683 /* 684 * return constant i node. 685 * overwritten by next call, but useful in calls to gins. 686 */ 687 688 var ncon_n gc.Node 689 690 func ncon(i uint32) *gc.Node { 691 if ncon_n.Type == nil { 692 gc.Nodconst(&ncon_n, gc.Types[gc.TUINT32], 0) 693 } 694 ncon_n.SetInt(int64(i)) 695 return &ncon_n 696 } 697 698 var sclean [10]gc.Node 699 700 var nsclean int 701 702 /* 703 * n is a 64-bit value. fill in lo and hi to refer to its 32-bit halves. 704 */ 705 func split64(n *gc.Node, lo *gc.Node, hi *gc.Node) { 706 if !gc.Is64(n.Type) { 707 gc.Fatalf("split64 %v", n.Type) 708 } 709 710 if nsclean >= len(sclean) { 711 gc.Fatalf("split64 clean") 712 } 713 sclean[nsclean].Op = gc.OEMPTY 714 nsclean++ 715 switch n.Op { 716 default: 717 switch n.Op { 718 default: 719 var n1 gc.Node 720 if !dotaddable(n, &n1) { 721 gc.Igen(n, &n1, nil) 722 sclean[nsclean-1] = n1 723 } 724 725 n = &n1 726 727 case gc.ONAME, gc.OINDREG: 728 // nothing 729 } 730 731 *lo = *n 732 *hi = *n 733 lo.Type = gc.Types[gc.TUINT32] 734 if n.Type.Etype == gc.TINT64 { 735 hi.Type = gc.Types[gc.TINT32] 736 } else { 737 hi.Type = gc.Types[gc.TUINT32] 738 } 739 hi.Xoffset += 4 740 741 case gc.OLITERAL: 742 var n1 gc.Node 743 n.Convconst(&n1, n.Type) 744 i := n1.Int64() 745 gc.Nodconst(lo, gc.Types[gc.TUINT32], int64(uint32(i))) 746 i >>= 32 747 if n.Type.Etype == gc.TINT64 { 748 gc.Nodconst(hi, gc.Types[gc.TINT32], int64(int32(i))) 749 } else { 750 gc.Nodconst(hi, gc.Types[gc.TUINT32], int64(uint32(i))) 751 } 752 } 753 } 754 755 func splitclean() { 756 if nsclean <= 0 { 757 gc.Fatalf("splitclean") 758 } 759 nsclean-- 760 if sclean[nsclean].Op != gc.OEMPTY { 761 gc.Regfree(&sclean[nsclean]) 762 } 763 } 764 765 // set up nodes representing fp constants 766 var ( 767 zerof gc.Node 768 two63f gc.Node 769 two64f gc.Node 770 bignodes_did bool 771 ) 772 773 func bignodes() { 774 if bignodes_did { 775 return 776 } 777 bignodes_did = true 778 779 gc.Nodconst(&zerof, gc.Types[gc.TINT64], 0) 780 zerof.Convconst(&zerof, gc.Types[gc.TFLOAT64]) 781 782 var i big.Int 783 i.SetInt64(1) 784 i.Lsh(&i, 63) 785 var bigi gc.Node 786 787 gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0) 788 bigi.SetBigInt(&i) 789 bigi.Convconst(&two63f, gc.Types[gc.TFLOAT64]) 790 791 gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0) 792 i.Lsh(&i, 1) 793 bigi.SetBigInt(&i) 794 bigi.Convconst(&two64f, gc.Types[gc.TFLOAT64]) 795 } 796 797 func memname(n *gc.Node, t *gc.Type) { 798 gc.Tempname(n, t) 799 n.Sym = gc.Lookup("." + n.Sym.Name[1:]) // keep optimizer from registerizing 800 n.Orig.Sym = n.Sym 801 } 802 803 func gmove(f *gc.Node, t *gc.Node) { 804 if gc.Debug['M'] != 0 { 805 fmt.Printf("gmove %v -> %v\n", f, t) 806 } 807 808 ft := gc.Simsimtype(f.Type) 809 tt := gc.Simsimtype(t.Type) 810 cvt := t.Type 811 812 if gc.Iscomplex[ft] || gc.Iscomplex[tt] { 813 gc.Complexmove(f, t) 814 return 815 } 816 817 if gc.Isfloat[ft] || gc.Isfloat[tt] { 818 floatmove(f, t) 819 return 820 } 821 822 // cannot have two integer memory operands; 823 // except 64-bit, which always copies via registers anyway. 824 var r1 gc.Node 825 var a obj.As 826 if gc.Isint[ft] && gc.Isint[tt] && !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) { 827 goto hard 828 } 829 830 // convert constant to desired type 831 if f.Op == gc.OLITERAL { 832 var con gc.Node 833 f.Convconst(&con, t.Type) 834 f = &con 835 ft = gc.Simsimtype(con.Type) 836 } 837 838 // value -> value copy, only one memory operand. 839 // figure out the instruction to use. 840 // break out of switch for one-instruction gins. 841 // goto rdst for "destination must be register". 842 // goto hard for "convert to cvt type first". 843 // otherwise handle and return. 844 845 switch uint32(ft)<<16 | uint32(tt) { 846 default: 847 // should not happen 848 gc.Fatalf("gmove %v -> %v", f, t) 849 return 850 851 /* 852 * integer copy and truncate 853 */ 854 case gc.TINT8<<16 | gc.TINT8, // same size 855 gc.TINT8<<16 | gc.TUINT8, 856 gc.TUINT8<<16 | gc.TINT8, 857 gc.TUINT8<<16 | gc.TUINT8: 858 a = x86.AMOVB 859 860 case gc.TINT16<<16 | gc.TINT8, // truncate 861 gc.TUINT16<<16 | gc.TINT8, 862 gc.TINT32<<16 | gc.TINT8, 863 gc.TUINT32<<16 | gc.TINT8, 864 gc.TINT16<<16 | gc.TUINT8, 865 gc.TUINT16<<16 | gc.TUINT8, 866 gc.TINT32<<16 | gc.TUINT8, 867 gc.TUINT32<<16 | gc.TUINT8: 868 a = x86.AMOVB 869 870 goto rsrc 871 872 case gc.TINT64<<16 | gc.TINT8, // truncate low word 873 gc.TUINT64<<16 | gc.TINT8, 874 gc.TINT64<<16 | gc.TUINT8, 875 gc.TUINT64<<16 | gc.TUINT8: 876 var flo gc.Node 877 var fhi gc.Node 878 split64(f, &flo, &fhi) 879 880 var r1 gc.Node 881 gc.Nodreg(&r1, t.Type, x86.REG_AX) 882 gmove(&flo, &r1) 883 gins(x86.AMOVB, &r1, t) 884 splitclean() 885 return 886 887 case gc.TINT16<<16 | gc.TINT16, // same size 888 gc.TINT16<<16 | gc.TUINT16, 889 gc.TUINT16<<16 | gc.TINT16, 890 gc.TUINT16<<16 | gc.TUINT16: 891 a = x86.AMOVW 892 893 case gc.TINT32<<16 | gc.TINT16, // truncate 894 gc.TUINT32<<16 | gc.TINT16, 895 gc.TINT32<<16 | gc.TUINT16, 896 gc.TUINT32<<16 | gc.TUINT16: 897 a = x86.AMOVW 898 899 goto rsrc 900 901 case gc.TINT64<<16 | gc.TINT16, // truncate low word 902 gc.TUINT64<<16 | gc.TINT16, 903 gc.TINT64<<16 | gc.TUINT16, 904 gc.TUINT64<<16 | gc.TUINT16: 905 var flo gc.Node 906 var fhi gc.Node 907 split64(f, &flo, &fhi) 908 909 var r1 gc.Node 910 gc.Nodreg(&r1, t.Type, x86.REG_AX) 911 gmove(&flo, &r1) 912 gins(x86.AMOVW, &r1, t) 913 splitclean() 914 return 915 916 case gc.TINT32<<16 | gc.TINT32, // same size 917 gc.TINT32<<16 | gc.TUINT32, 918 gc.TUINT32<<16 | gc.TINT32, 919 gc.TUINT32<<16 | gc.TUINT32: 920 a = x86.AMOVL 921 922 case gc.TINT64<<16 | gc.TINT32, // truncate 923 gc.TUINT64<<16 | gc.TINT32, 924 gc.TINT64<<16 | gc.TUINT32, 925 gc.TUINT64<<16 | gc.TUINT32: 926 var fhi gc.Node 927 var flo gc.Node 928 split64(f, &flo, &fhi) 929 930 var r1 gc.Node 931 gc.Nodreg(&r1, t.Type, x86.REG_AX) 932 gmove(&flo, &r1) 933 gins(x86.AMOVL, &r1, t) 934 splitclean() 935 return 936 937 case gc.TINT64<<16 | gc.TINT64, // same size 938 gc.TINT64<<16 | gc.TUINT64, 939 gc.TUINT64<<16 | gc.TINT64, 940 gc.TUINT64<<16 | gc.TUINT64: 941 var fhi gc.Node 942 var flo gc.Node 943 split64(f, &flo, &fhi) 944 945 var tlo gc.Node 946 var thi gc.Node 947 split64(t, &tlo, &thi) 948 if f.Op == gc.OLITERAL { 949 gins(x86.AMOVL, &flo, &tlo) 950 gins(x86.AMOVL, &fhi, &thi) 951 } else { 952 // Implementation of conversion-free x = y for int64 or uint64 x. 953 // This is generated by the code that copies small values out of closures, 954 // and that code has DX live, so avoid DX and just use AX twice. 955 var r1 gc.Node 956 gc.Nodreg(&r1, gc.Types[gc.TUINT32], x86.REG_AX) 957 gins(x86.AMOVL, &flo, &r1) 958 gins(x86.AMOVL, &r1, &tlo) 959 gins(x86.AMOVL, &fhi, &r1) 960 gins(x86.AMOVL, &r1, &thi) 961 } 962 963 splitclean() 964 splitclean() 965 return 966 967 /* 968 * integer up-conversions 969 */ 970 case gc.TINT8<<16 | gc.TINT16, // sign extend int8 971 gc.TINT8<<16 | gc.TUINT16: 972 a = x86.AMOVBWSX 973 974 goto rdst 975 976 case gc.TINT8<<16 | gc.TINT32, 977 gc.TINT8<<16 | gc.TUINT32: 978 a = x86.AMOVBLSX 979 goto rdst 980 981 case gc.TINT8<<16 | gc.TINT64, // convert via int32 982 gc.TINT8<<16 | gc.TUINT64: 983 cvt = gc.Types[gc.TINT32] 984 985 goto hard 986 987 case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8 988 gc.TUINT8<<16 | gc.TUINT16: 989 a = x86.AMOVBWZX 990 991 goto rdst 992 993 case gc.TUINT8<<16 | gc.TINT32, 994 gc.TUINT8<<16 | gc.TUINT32: 995 a = x86.AMOVBLZX 996 goto rdst 997 998 case gc.TUINT8<<16 | gc.TINT64, // convert via uint32 999 gc.TUINT8<<16 | gc.TUINT64: 1000 cvt = gc.Types[gc.TUINT32] 1001 1002 goto hard 1003 1004 case gc.TINT16<<16 | gc.TINT32, // sign extend int16 1005 gc.TINT16<<16 | gc.TUINT32: 1006 a = x86.AMOVWLSX 1007 1008 goto rdst 1009 1010 case gc.TINT16<<16 | gc.TINT64, // convert via int32 1011 gc.TINT16<<16 | gc.TUINT64: 1012 cvt = gc.Types[gc.TINT32] 1013 1014 goto hard 1015 1016 case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16 1017 gc.TUINT16<<16 | gc.TUINT32: 1018 a = x86.AMOVWLZX 1019 1020 goto rdst 1021 1022 case gc.TUINT16<<16 | gc.TINT64, // convert via uint32 1023 gc.TUINT16<<16 | gc.TUINT64: 1024 cvt = gc.Types[gc.TUINT32] 1025 1026 goto hard 1027 1028 case gc.TINT32<<16 | gc.TINT64, // sign extend int32 1029 gc.TINT32<<16 | gc.TUINT64: 1030 var thi gc.Node 1031 var tlo gc.Node 1032 split64(t, &tlo, &thi) 1033 1034 var flo gc.Node 1035 gc.Nodreg(&flo, tlo.Type, x86.REG_AX) 1036 var fhi gc.Node 1037 gc.Nodreg(&fhi, thi.Type, x86.REG_DX) 1038 gmove(f, &flo) 1039 gins(x86.ACDQ, nil, nil) 1040 gins(x86.AMOVL, &flo, &tlo) 1041 gins(x86.AMOVL, &fhi, &thi) 1042 splitclean() 1043 return 1044 1045 case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32 1046 gc.TUINT32<<16 | gc.TUINT64: 1047 var tlo gc.Node 1048 var thi gc.Node 1049 split64(t, &tlo, &thi) 1050 1051 gmove(f, &tlo) 1052 gins(x86.AMOVL, ncon(0), &thi) 1053 splitclean() 1054 return 1055 } 1056 1057 gins(a, f, t) 1058 return 1059 1060 // requires register source 1061 rsrc: 1062 gc.Regalloc(&r1, f.Type, t) 1063 1064 gmove(f, &r1) 1065 gins(a, &r1, t) 1066 gc.Regfree(&r1) 1067 return 1068 1069 // requires register destination 1070 rdst: 1071 { 1072 gc.Regalloc(&r1, t.Type, t) 1073 1074 gins(a, f, &r1) 1075 gmove(&r1, t) 1076 gc.Regfree(&r1) 1077 return 1078 } 1079 1080 // requires register intermediate 1081 hard: 1082 gc.Regalloc(&r1, cvt, t) 1083 1084 gmove(f, &r1) 1085 gmove(&r1, t) 1086 gc.Regfree(&r1) 1087 return 1088 } 1089 1090 func floatmove(f *gc.Node, t *gc.Node) { 1091 var r1 gc.Node 1092 1093 ft := gc.Simsimtype(f.Type) 1094 tt := gc.Simsimtype(t.Type) 1095 cvt := t.Type 1096 1097 // cannot have two floating point memory operands. 1098 if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) { 1099 goto hard 1100 } 1101 1102 // convert constant to desired type 1103 if f.Op == gc.OLITERAL { 1104 var con gc.Node 1105 f.Convconst(&con, t.Type) 1106 f = &con 1107 ft = gc.Simsimtype(con.Type) 1108 1109 // some constants can't move directly to memory. 1110 if gc.Ismem(t) { 1111 // float constants come from memory. 1112 if gc.Isfloat[tt] { 1113 goto hard 1114 } 1115 } 1116 } 1117 1118 // value -> value copy, only one memory operand. 1119 // figure out the instruction to use. 1120 // break out of switch for one-instruction gins. 1121 // goto rdst for "destination must be register". 1122 // goto hard for "convert to cvt type first". 1123 // otherwise handle and return. 1124 1125 switch uint32(ft)<<16 | uint32(tt) { 1126 default: 1127 if gc.Thearch.Use387 { 1128 floatmove_387(f, t) 1129 } else { 1130 floatmove_sse(f, t) 1131 } 1132 return 1133 1134 // float to very long integer. 1135 case gc.TFLOAT32<<16 | gc.TINT64, 1136 gc.TFLOAT64<<16 | gc.TINT64: 1137 if f.Op == gc.OREGISTER { 1138 cvt = f.Type 1139 goto hardmem 1140 } 1141 1142 var r1 gc.Node 1143 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) 1144 if ft == gc.TFLOAT32 { 1145 gins(x86.AFMOVF, f, &r1) 1146 } else { 1147 gins(x86.AFMOVD, f, &r1) 1148 } 1149 1150 // set round to zero mode during conversion 1151 var t1 gc.Node 1152 memname(&t1, gc.Types[gc.TUINT16]) 1153 1154 var t2 gc.Node 1155 memname(&t2, gc.Types[gc.TUINT16]) 1156 gins(x86.AFSTCW, nil, &t1) 1157 gins(x86.AMOVW, ncon(0xf7f), &t2) 1158 gins(x86.AFLDCW, &t2, nil) 1159 if tt == gc.TINT16 { 1160 gins(x86.AFMOVWP, &r1, t) 1161 } else if tt == gc.TINT32 { 1162 gins(x86.AFMOVLP, &r1, t) 1163 } else { 1164 gins(x86.AFMOVVP, &r1, t) 1165 } 1166 gins(x86.AFLDCW, &t1, nil) 1167 return 1168 1169 case gc.TFLOAT32<<16 | gc.TUINT64, 1170 gc.TFLOAT64<<16 | gc.TUINT64: 1171 if !gc.Ismem(f) { 1172 cvt = f.Type 1173 goto hardmem 1174 } 1175 1176 bignodes() 1177 var f0 gc.Node 1178 gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0) 1179 var f1 gc.Node 1180 gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1) 1181 var ax gc.Node 1182 gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX) 1183 1184 if ft == gc.TFLOAT32 { 1185 gins(x86.AFMOVF, f, &f0) 1186 } else { 1187 gins(x86.AFMOVD, f, &f0) 1188 } 1189 1190 // if 0 > v { answer = 0 } 1191 gins(x86.AFMOVD, &zerof, &f0) 1192 gins(x86.AFUCOMP, &f0, &f1) 1193 gins(x86.AFSTSW, nil, &ax) 1194 gins(x86.ASAHF, nil, nil) 1195 p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) 1196 1197 // if 1<<64 <= v { answer = 0 too } 1198 gins(x86.AFMOVD, &two64f, &f0) 1199 1200 gins(x86.AFUCOMP, &f0, &f1) 1201 gins(x86.AFSTSW, nil, &ax) 1202 gins(x86.ASAHF, nil, nil) 1203 p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) 1204 gc.Patch(p1, gc.Pc) 1205 gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack 1206 var thi gc.Node 1207 var tlo gc.Node 1208 split64(t, &tlo, &thi) 1209 gins(x86.AMOVL, ncon(0), &tlo) 1210 gins(x86.AMOVL, ncon(0), &thi) 1211 splitclean() 1212 p1 = gc.Gbranch(obj.AJMP, nil, 0) 1213 gc.Patch(p2, gc.Pc) 1214 1215 // in range; algorithm is: 1216 // if small enough, use native float64 -> int64 conversion. 1217 // otherwise, subtract 2^63, convert, and add it back. 1218 1219 // set round to zero mode during conversion 1220 var t1 gc.Node 1221 memname(&t1, gc.Types[gc.TUINT16]) 1222 1223 var t2 gc.Node 1224 memname(&t2, gc.Types[gc.TUINT16]) 1225 gins(x86.AFSTCW, nil, &t1) 1226 gins(x86.AMOVW, ncon(0xf7f), &t2) 1227 gins(x86.AFLDCW, &t2, nil) 1228 1229 // actual work 1230 gins(x86.AFMOVD, &two63f, &f0) 1231 1232 gins(x86.AFUCOMP, &f0, &f1) 1233 gins(x86.AFSTSW, nil, &ax) 1234 gins(x86.ASAHF, nil, nil) 1235 p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0) 1236 gins(x86.AFMOVVP, &f0, t) 1237 p3 := gc.Gbranch(obj.AJMP, nil, 0) 1238 gc.Patch(p2, gc.Pc) 1239 gins(x86.AFMOVD, &two63f, &f0) 1240 gins(x86.AFSUBDP, &f0, &f1) 1241 gins(x86.AFMOVVP, &f0, t) 1242 split64(t, &tlo, &thi) 1243 gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63 1244 gc.Patch(p3, gc.Pc) 1245 splitclean() 1246 1247 // restore rounding mode 1248 gins(x86.AFLDCW, &t1, nil) 1249 1250 gc.Patch(p1, gc.Pc) 1251 return 1252 1253 /* 1254 * integer to float 1255 */ 1256 case gc.TINT64<<16 | gc.TFLOAT32, 1257 gc.TINT64<<16 | gc.TFLOAT64: 1258 if t.Op == gc.OREGISTER { 1259 goto hardmem 1260 } 1261 var f0 gc.Node 1262 gc.Nodreg(&f0, t.Type, x86.REG_F0) 1263 gins(x86.AFMOVV, f, &f0) 1264 if tt == gc.TFLOAT32 { 1265 gins(x86.AFMOVFP, &f0, t) 1266 } else { 1267 gins(x86.AFMOVDP, &f0, t) 1268 } 1269 return 1270 1271 // algorithm is: 1272 // if small enough, use native int64 -> float64 conversion. 1273 // otherwise, halve (rounding to odd?), convert, and double. 1274 case gc.TUINT64<<16 | gc.TFLOAT32, 1275 gc.TUINT64<<16 | gc.TFLOAT64: 1276 var ax gc.Node 1277 gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX) 1278 1279 var dx gc.Node 1280 gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX) 1281 var cx gc.Node 1282 gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX) 1283 var t1 gc.Node 1284 gc.Tempname(&t1, f.Type) 1285 var tlo gc.Node 1286 var thi gc.Node 1287 split64(&t1, &tlo, &thi) 1288 gmove(f, &t1) 1289 gins(x86.ACMPL, &thi, ncon(0)) 1290 p1 := gc.Gbranch(x86.AJLT, nil, 0) 1291 1292 // native 1293 var r1 gc.Node 1294 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) 1295 1296 gins(x86.AFMOVV, &t1, &r1) 1297 if tt == gc.TFLOAT32 { 1298 gins(x86.AFMOVFP, &r1, t) 1299 } else { 1300 gins(x86.AFMOVDP, &r1, t) 1301 } 1302 p2 := gc.Gbranch(obj.AJMP, nil, 0) 1303 1304 // simulated 1305 gc.Patch(p1, gc.Pc) 1306 1307 gmove(&tlo, &ax) 1308 gmove(&thi, &dx) 1309 p1 = gins(x86.ASHRL, ncon(1), &ax) 1310 p1.From.Index = x86.REG_DX // double-width shift DX -> AX 1311 p1.From.Scale = 0 1312 gins(x86.AMOVL, ncon(0), &cx) 1313 gins(x86.ASETCC, nil, &cx) 1314 gins(x86.AORL, &cx, &ax) 1315 gins(x86.ASHRL, ncon(1), &dx) 1316 gmove(&dx, &thi) 1317 gmove(&ax, &tlo) 1318 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) 1319 var r2 gc.Node 1320 gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1) 1321 gins(x86.AFMOVV, &t1, &r1) 1322 gins(x86.AFMOVD, &r1, &r1) 1323 gins(x86.AFADDDP, &r1, &r2) 1324 if tt == gc.TFLOAT32 { 1325 gins(x86.AFMOVFP, &r1, t) 1326 } else { 1327 gins(x86.AFMOVDP, &r1, t) 1328 } 1329 gc.Patch(p2, gc.Pc) 1330 splitclean() 1331 return 1332 } 1333 1334 // requires register intermediate 1335 hard: 1336 gc.Regalloc(&r1, cvt, t) 1337 1338 gmove(f, &r1) 1339 gmove(&r1, t) 1340 gc.Regfree(&r1) 1341 return 1342 1343 // requires memory intermediate 1344 hardmem: 1345 gc.Tempname(&r1, cvt) 1346 1347 gmove(f, &r1) 1348 gmove(&r1, t) 1349 return 1350 } 1351 1352 func floatmove_387(f *gc.Node, t *gc.Node) { 1353 var r1 gc.Node 1354 var a obj.As 1355 1356 ft := gc.Simsimtype(f.Type) 1357 tt := gc.Simsimtype(t.Type) 1358 cvt := t.Type 1359 1360 switch uint32(ft)<<16 | uint32(tt) { 1361 default: 1362 goto fatal 1363 1364 /* 1365 * float to integer 1366 */ 1367 case gc.TFLOAT32<<16 | gc.TINT16, 1368 gc.TFLOAT32<<16 | gc.TINT32, 1369 gc.TFLOAT32<<16 | gc.TINT64, 1370 gc.TFLOAT64<<16 | gc.TINT16, 1371 gc.TFLOAT64<<16 | gc.TINT32, 1372 gc.TFLOAT64<<16 | gc.TINT64: 1373 if t.Op == gc.OREGISTER { 1374 goto hardmem 1375 } 1376 var r1 gc.Node 1377 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) 1378 if f.Op != gc.OREGISTER { 1379 if ft == gc.TFLOAT32 { 1380 gins(x86.AFMOVF, f, &r1) 1381 } else { 1382 gins(x86.AFMOVD, f, &r1) 1383 } 1384 } 1385 1386 // set round to zero mode during conversion 1387 var t1 gc.Node 1388 memname(&t1, gc.Types[gc.TUINT16]) 1389 1390 var t2 gc.Node 1391 memname(&t2, gc.Types[gc.TUINT16]) 1392 gins(x86.AFSTCW, nil, &t1) 1393 gins(x86.AMOVW, ncon(0xf7f), &t2) 1394 gins(x86.AFLDCW, &t2, nil) 1395 if tt == gc.TINT16 { 1396 gins(x86.AFMOVWP, &r1, t) 1397 } else if tt == gc.TINT32 { 1398 gins(x86.AFMOVLP, &r1, t) 1399 } else { 1400 gins(x86.AFMOVVP, &r1, t) 1401 } 1402 gins(x86.AFLDCW, &t1, nil) 1403 return 1404 1405 // convert via int32. 1406 case gc.TFLOAT32<<16 | gc.TINT8, 1407 gc.TFLOAT32<<16 | gc.TUINT16, 1408 gc.TFLOAT32<<16 | gc.TUINT8, 1409 gc.TFLOAT64<<16 | gc.TINT8, 1410 gc.TFLOAT64<<16 | gc.TUINT16, 1411 gc.TFLOAT64<<16 | gc.TUINT8: 1412 var t1 gc.Node 1413 gc.Tempname(&t1, gc.Types[gc.TINT32]) 1414 1415 gmove(f, &t1) 1416 switch tt { 1417 default: 1418 gc.Fatalf("gmove %v", t) 1419 1420 case gc.TINT8: 1421 gins(x86.ACMPL, &t1, ncon(-0x80&(1<<32-1))) 1422 p1 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TINT32]), nil, -1) 1423 gins(x86.ACMPL, &t1, ncon(0x7f)) 1424 p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TINT32]), nil, -1) 1425 p3 := gc.Gbranch(obj.AJMP, nil, 0) 1426 gc.Patch(p1, gc.Pc) 1427 gc.Patch(p2, gc.Pc) 1428 gmove(ncon(-0x80&(1<<32-1)), &t1) 1429 gc.Patch(p3, gc.Pc) 1430 gmove(&t1, t) 1431 1432 case gc.TUINT8: 1433 gins(x86.ATESTL, ncon(0xffffff00), &t1) 1434 p1 := gc.Gbranch(x86.AJEQ, nil, +1) 1435 gins(x86.AMOVL, ncon(0), &t1) 1436 gc.Patch(p1, gc.Pc) 1437 gmove(&t1, t) 1438 1439 case gc.TUINT16: 1440 gins(x86.ATESTL, ncon(0xffff0000), &t1) 1441 p1 := gc.Gbranch(x86.AJEQ, nil, +1) 1442 gins(x86.AMOVL, ncon(0), &t1) 1443 gc.Patch(p1, gc.Pc) 1444 gmove(&t1, t) 1445 } 1446 1447 return 1448 1449 // convert via int64. 1450 case gc.TFLOAT32<<16 | gc.TUINT32, 1451 gc.TFLOAT64<<16 | gc.TUINT32: 1452 cvt = gc.Types[gc.TINT64] 1453 1454 goto hardmem 1455 1456 /* 1457 * integer to float 1458 */ 1459 case gc.TINT16<<16 | gc.TFLOAT32, 1460 gc.TINT16<<16 | gc.TFLOAT64, 1461 gc.TINT32<<16 | gc.TFLOAT32, 1462 gc.TINT32<<16 | gc.TFLOAT64, 1463 gc.TINT64<<16 | gc.TFLOAT32, 1464 gc.TINT64<<16 | gc.TFLOAT64: 1465 if t.Op != gc.OREGISTER { 1466 goto hard 1467 } 1468 if f.Op == gc.OREGISTER { 1469 cvt = f.Type 1470 goto hardmem 1471 } 1472 1473 switch ft { 1474 case gc.TINT16: 1475 a = x86.AFMOVW 1476 1477 case gc.TINT32: 1478 a = x86.AFMOVL 1479 1480 default: 1481 a = x86.AFMOVV 1482 } 1483 1484 // convert via int32 memory 1485 case gc.TINT8<<16 | gc.TFLOAT32, 1486 gc.TINT8<<16 | gc.TFLOAT64, 1487 gc.TUINT16<<16 | gc.TFLOAT32, 1488 gc.TUINT16<<16 | gc.TFLOAT64, 1489 gc.TUINT8<<16 | gc.TFLOAT32, 1490 gc.TUINT8<<16 | gc.TFLOAT64: 1491 cvt = gc.Types[gc.TINT32] 1492 1493 goto hardmem 1494 1495 // convert via int64 memory 1496 case gc.TUINT32<<16 | gc.TFLOAT32, 1497 gc.TUINT32<<16 | gc.TFLOAT64: 1498 cvt = gc.Types[gc.TINT64] 1499 1500 goto hardmem 1501 1502 // The way the code generator uses floating-point 1503 // registers, a move from F0 to F0 is intended as a no-op. 1504 // On the x86, it's not: it pushes a second copy of F0 1505 // on the floating point stack. So toss it away here. 1506 // Also, F0 is the *only* register we ever evaluate 1507 // into, so we should only see register/register as F0/F0. 1508 /* 1509 * float to float 1510 */ 1511 case gc.TFLOAT32<<16 | gc.TFLOAT32, 1512 gc.TFLOAT64<<16 | gc.TFLOAT64: 1513 if gc.Ismem(f) && gc.Ismem(t) { 1514 goto hard 1515 } 1516 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1517 if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { 1518 goto fatal 1519 } 1520 return 1521 } 1522 1523 a = x86.AFMOVF 1524 if ft == gc.TFLOAT64 { 1525 a = x86.AFMOVD 1526 } 1527 if gc.Ismem(t) { 1528 if f.Op != gc.OREGISTER || f.Reg != x86.REG_F0 { 1529 gc.Fatalf("gmove %v", f) 1530 } 1531 a = x86.AFMOVFP 1532 if ft == gc.TFLOAT64 { 1533 a = x86.AFMOVDP 1534 } 1535 } 1536 1537 case gc.TFLOAT32<<16 | gc.TFLOAT64: 1538 if gc.Ismem(f) && gc.Ismem(t) { 1539 goto hard 1540 } 1541 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1542 if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { 1543 goto fatal 1544 } 1545 return 1546 } 1547 1548 if f.Op == gc.OREGISTER { 1549 gins(x86.AFMOVDP, f, t) 1550 } else { 1551 gins(x86.AFMOVF, f, t) 1552 } 1553 return 1554 1555 case gc.TFLOAT64<<16 | gc.TFLOAT32: 1556 if gc.Ismem(f) && gc.Ismem(t) { 1557 goto hard 1558 } 1559 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1560 var r1 gc.Node 1561 gc.Tempname(&r1, gc.Types[gc.TFLOAT32]) 1562 gins(x86.AFMOVFP, f, &r1) 1563 gins(x86.AFMOVF, &r1, t) 1564 return 1565 } 1566 1567 if f.Op == gc.OREGISTER { 1568 gins(x86.AFMOVFP, f, t) 1569 } else { 1570 gins(x86.AFMOVD, f, t) 1571 } 1572 return 1573 } 1574 1575 gins(a, f, t) 1576 return 1577 1578 // requires register intermediate 1579 hard: 1580 gc.Regalloc(&r1, cvt, t) 1581 1582 gmove(f, &r1) 1583 gmove(&r1, t) 1584 gc.Regfree(&r1) 1585 return 1586 1587 // requires memory intermediate 1588 hardmem: 1589 gc.Tempname(&r1, cvt) 1590 1591 gmove(f, &r1) 1592 gmove(&r1, t) 1593 return 1594 1595 // should not happen 1596 fatal: 1597 gc.Fatalf("gmove %v -> %v", gc.Nconv(f, gc.FmtLong), gc.Nconv(t, gc.FmtLong)) 1598 1599 return 1600 } 1601 1602 func floatmove_sse(f *gc.Node, t *gc.Node) { 1603 var r1 gc.Node 1604 var cvt *gc.Type 1605 var a obj.As 1606 1607 ft := gc.Simsimtype(f.Type) 1608 tt := gc.Simsimtype(t.Type) 1609 1610 switch uint32(ft)<<16 | uint32(tt) { 1611 // should not happen 1612 default: 1613 gc.Fatalf("gmove %v -> %v", f, t) 1614 1615 return 1616 1617 // convert via int32. 1618 /* 1619 * float to integer 1620 */ 1621 case gc.TFLOAT32<<16 | gc.TINT16, 1622 gc.TFLOAT32<<16 | gc.TINT8, 1623 gc.TFLOAT32<<16 | gc.TUINT16, 1624 gc.TFLOAT32<<16 | gc.TUINT8, 1625 gc.TFLOAT64<<16 | gc.TINT16, 1626 gc.TFLOAT64<<16 | gc.TINT8, 1627 gc.TFLOAT64<<16 | gc.TUINT16, 1628 gc.TFLOAT64<<16 | gc.TUINT8: 1629 cvt = gc.Types[gc.TINT32] 1630 1631 goto hard 1632 1633 // convert via int64. 1634 case gc.TFLOAT32<<16 | gc.TUINT32, 1635 gc.TFLOAT64<<16 | gc.TUINT32: 1636 cvt = gc.Types[gc.TINT64] 1637 1638 goto hardmem 1639 1640 case gc.TFLOAT32<<16 | gc.TINT32: 1641 a = x86.ACVTTSS2SL 1642 goto rdst 1643 1644 case gc.TFLOAT64<<16 | gc.TINT32: 1645 a = x86.ACVTTSD2SL 1646 goto rdst 1647 1648 // convert via int32 memory 1649 /* 1650 * integer to float 1651 */ 1652 case gc.TINT8<<16 | gc.TFLOAT32, 1653 gc.TINT8<<16 | gc.TFLOAT64, 1654 gc.TINT16<<16 | gc.TFLOAT32, 1655 gc.TINT16<<16 | gc.TFLOAT64, 1656 gc.TUINT16<<16 | gc.TFLOAT32, 1657 gc.TUINT16<<16 | gc.TFLOAT64, 1658 gc.TUINT8<<16 | gc.TFLOAT32, 1659 gc.TUINT8<<16 | gc.TFLOAT64: 1660 cvt = gc.Types[gc.TINT32] 1661 1662 goto hard 1663 1664 // convert via int64 memory 1665 case gc.TUINT32<<16 | gc.TFLOAT32, 1666 gc.TUINT32<<16 | gc.TFLOAT64: 1667 cvt = gc.Types[gc.TINT64] 1668 1669 goto hardmem 1670 1671 case gc.TINT32<<16 | gc.TFLOAT32: 1672 a = x86.ACVTSL2SS 1673 goto rdst 1674 1675 case gc.TINT32<<16 | gc.TFLOAT64: 1676 a = x86.ACVTSL2SD 1677 goto rdst 1678 1679 /* 1680 * float to float 1681 */ 1682 case gc.TFLOAT32<<16 | gc.TFLOAT32: 1683 a = x86.AMOVSS 1684 1685 case gc.TFLOAT64<<16 | gc.TFLOAT64: 1686 a = x86.AMOVSD 1687 1688 case gc.TFLOAT32<<16 | gc.TFLOAT64: 1689 a = x86.ACVTSS2SD 1690 goto rdst 1691 1692 case gc.TFLOAT64<<16 | gc.TFLOAT32: 1693 a = x86.ACVTSD2SS 1694 goto rdst 1695 } 1696 1697 gins(a, f, t) 1698 return 1699 1700 // requires register intermediate 1701 hard: 1702 gc.Regalloc(&r1, cvt, t) 1703 1704 gmove(f, &r1) 1705 gmove(&r1, t) 1706 gc.Regfree(&r1) 1707 return 1708 1709 // requires memory intermediate 1710 hardmem: 1711 gc.Tempname(&r1, cvt) 1712 1713 gmove(f, &r1) 1714 gmove(&r1, t) 1715 return 1716 1717 // requires register destination 1718 rdst: 1719 gc.Regalloc(&r1, t.Type, t) 1720 1721 gins(a, f, &r1) 1722 gmove(&r1, t) 1723 gc.Regfree(&r1) 1724 return 1725 } 1726 1727 func samaddr(f *gc.Node, t *gc.Node) bool { 1728 if f.Op != t.Op { 1729 return false 1730 } 1731 1732 switch f.Op { 1733 case gc.OREGISTER: 1734 if f.Reg != t.Reg { 1735 break 1736 } 1737 return true 1738 } 1739 1740 return false 1741 } 1742 1743 /* 1744 * generate one instruction: 1745 * as f, t 1746 */ 1747 func gins(as obj.As, f *gc.Node, t *gc.Node) *obj.Prog { 1748 if as == x86.AFMOVF && f != nil && f.Op == gc.OREGISTER && t != nil && t.Op == gc.OREGISTER { 1749 gc.Fatalf("gins MOVF reg, reg") 1750 } 1751 if as == x86.ACVTSD2SS && f != nil && f.Op == gc.OLITERAL { 1752 gc.Fatalf("gins CVTSD2SS const") 1753 } 1754 if as == x86.AMOVSD && t != nil && t.Op == gc.OREGISTER && t.Reg == x86.REG_F0 { 1755 gc.Fatalf("gins MOVSD into F0") 1756 } 1757 1758 if as == x86.AMOVL && f != nil && f.Op == gc.OADDR && f.Left.Op == gc.ONAME && f.Left.Class != gc.PEXTERN && f.Left.Class != gc.PFUNC { 1759 // Turn MOVL $xxx(FP/SP) into LEAL xxx. 1760 // These should be equivalent but most of the backend 1761 // only expects to see LEAL, because that's what we had 1762 // historically generated. Various hidden assumptions are baked in by now. 1763 as = x86.ALEAL 1764 f = f.Left 1765 } 1766 1767 switch as { 1768 case x86.AMOVB, 1769 x86.AMOVW, 1770 x86.AMOVL: 1771 if f != nil && t != nil && samaddr(f, t) { 1772 return nil 1773 } 1774 1775 case x86.ALEAL: 1776 if f != nil && gc.Isconst(f, gc.CTNIL) { 1777 gc.Fatalf("gins LEAL nil %v", f.Type) 1778 } 1779 } 1780 1781 p := gc.Prog(as) 1782 gc.Naddr(&p.From, f) 1783 gc.Naddr(&p.To, t) 1784 1785 if gc.Debug['g'] != 0 { 1786 fmt.Printf("%v\n", p) 1787 } 1788 1789 w := 0 1790 switch as { 1791 case x86.AMOVB: 1792 w = 1 1793 1794 case x86.AMOVW: 1795 w = 2 1796 1797 case x86.AMOVL: 1798 w = 4 1799 } 1800 1801 if true && w != 0 && f != nil && (p.From.Width > int64(w) || p.To.Width > int64(w)) { 1802 gc.Dump("bad width from:", f) 1803 gc.Dump("bad width to:", t) 1804 gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) 1805 } 1806 1807 if p.To.Type == obj.TYPE_ADDR && w > 0 { 1808 gc.Fatalf("bad use of addr: %v", p) 1809 } 1810 1811 return p 1812 } 1813 1814 func ginsnop() { 1815 var reg gc.Node 1816 gc.Nodreg(®, gc.Types[gc.TINT], x86.REG_AX) 1817 gins(x86.AXCHGL, ®, ®) 1818 } 1819 1820 func dotaddable(n *gc.Node, n1 *gc.Node) bool { 1821 if n.Op != gc.ODOT { 1822 return false 1823 } 1824 1825 var oary [10]int64 1826 var nn *gc.Node 1827 o := gc.Dotoffset(n, oary[:], &nn) 1828 if nn != nil && nn.Addable && o == 1 && oary[0] >= 0 { 1829 *n1 = *nn 1830 n1.Type = n.Type 1831 n1.Xoffset += oary[0] 1832 return true 1833 } 1834 1835 return false 1836 } 1837 1838 func sudoclean() { 1839 } 1840 1841 func sudoaddable(as obj.As, n *gc.Node, a *obj.Addr) bool { 1842 *a = obj.Addr{} 1843 return false 1844 }