rsc.io/go@v0.0.0-20150416155037-e040fd465409/src/cmd/8g/gsubr.go (about)

     1  // Derived from Inferno utils/8c/txt.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/8c/txt.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package main
    32  
    33  import (
    34  	"cmd/internal/gc"
    35  	"cmd/internal/obj"
    36  	"cmd/internal/obj/x86"
    37  	"fmt"
    38  )
    39  
    40  // TODO(rsc): Can make this bigger if we move
    41  // the text segment up higher in 8l for all GOOS.
    42  // At the same time, can raise StackBig in ../../runtime/stack.h.
    43  var unmappedzero uint32 = 4096
    44  
    45  /*
    46   * return Axxx for Oxxx on type t.
    47   */
    48  func optoas(op int, t *gc.Type) int {
    49  	if t == nil {
    50  		gc.Fatal("optoas: t is nil")
    51  	}
    52  
    53  	a := obj.AXXX
    54  	switch uint32(op)<<16 | uint32(gc.Simtype[t.Etype]) {
    55  	default:
    56  		gc.Fatal("optoas: no entry %v-%v", gc.Oconv(int(op), 0), gc.Tconv(t, 0))
    57  
    58  	case gc.OADDR<<16 | gc.TPTR32:
    59  		a = x86.ALEAL
    60  
    61  	case gc.OEQ<<16 | gc.TBOOL,
    62  		gc.OEQ<<16 | gc.TINT8,
    63  		gc.OEQ<<16 | gc.TUINT8,
    64  		gc.OEQ<<16 | gc.TINT16,
    65  		gc.OEQ<<16 | gc.TUINT16,
    66  		gc.OEQ<<16 | gc.TINT32,
    67  		gc.OEQ<<16 | gc.TUINT32,
    68  		gc.OEQ<<16 | gc.TINT64,
    69  		gc.OEQ<<16 | gc.TUINT64,
    70  		gc.OEQ<<16 | gc.TPTR32,
    71  		gc.OEQ<<16 | gc.TPTR64,
    72  		gc.OEQ<<16 | gc.TFLOAT32,
    73  		gc.OEQ<<16 | gc.TFLOAT64:
    74  		a = x86.AJEQ
    75  
    76  	case gc.ONE<<16 | gc.TBOOL,
    77  		gc.ONE<<16 | gc.TINT8,
    78  		gc.ONE<<16 | gc.TUINT8,
    79  		gc.ONE<<16 | gc.TINT16,
    80  		gc.ONE<<16 | gc.TUINT16,
    81  		gc.ONE<<16 | gc.TINT32,
    82  		gc.ONE<<16 | gc.TUINT32,
    83  		gc.ONE<<16 | gc.TINT64,
    84  		gc.ONE<<16 | gc.TUINT64,
    85  		gc.ONE<<16 | gc.TPTR32,
    86  		gc.ONE<<16 | gc.TPTR64,
    87  		gc.ONE<<16 | gc.TFLOAT32,
    88  		gc.ONE<<16 | gc.TFLOAT64:
    89  		a = x86.AJNE
    90  
    91  	case gc.OLT<<16 | gc.TINT8,
    92  		gc.OLT<<16 | gc.TINT16,
    93  		gc.OLT<<16 | gc.TINT32,
    94  		gc.OLT<<16 | gc.TINT64:
    95  		a = x86.AJLT
    96  
    97  	case gc.OLT<<16 | gc.TUINT8,
    98  		gc.OLT<<16 | gc.TUINT16,
    99  		gc.OLT<<16 | gc.TUINT32,
   100  		gc.OLT<<16 | gc.TUINT64:
   101  		a = x86.AJCS
   102  
   103  	case gc.OLE<<16 | gc.TINT8,
   104  		gc.OLE<<16 | gc.TINT16,
   105  		gc.OLE<<16 | gc.TINT32,
   106  		gc.OLE<<16 | gc.TINT64:
   107  		a = x86.AJLE
   108  
   109  	case gc.OLE<<16 | gc.TUINT8,
   110  		gc.OLE<<16 | gc.TUINT16,
   111  		gc.OLE<<16 | gc.TUINT32,
   112  		gc.OLE<<16 | gc.TUINT64:
   113  		a = x86.AJLS
   114  
   115  	case gc.OGT<<16 | gc.TINT8,
   116  		gc.OGT<<16 | gc.TINT16,
   117  		gc.OGT<<16 | gc.TINT32,
   118  		gc.OGT<<16 | gc.TINT64:
   119  		a = x86.AJGT
   120  
   121  	case gc.OGT<<16 | gc.TUINT8,
   122  		gc.OGT<<16 | gc.TUINT16,
   123  		gc.OGT<<16 | gc.TUINT32,
   124  		gc.OGT<<16 | gc.TUINT64,
   125  		gc.OLT<<16 | gc.TFLOAT32,
   126  		gc.OLT<<16 | gc.TFLOAT64:
   127  		a = x86.AJHI
   128  
   129  	case gc.OGE<<16 | gc.TINT8,
   130  		gc.OGE<<16 | gc.TINT16,
   131  		gc.OGE<<16 | gc.TINT32,
   132  		gc.OGE<<16 | gc.TINT64:
   133  		a = x86.AJGE
   134  
   135  	case gc.OGE<<16 | gc.TUINT8,
   136  		gc.OGE<<16 | gc.TUINT16,
   137  		gc.OGE<<16 | gc.TUINT32,
   138  		gc.OGE<<16 | gc.TUINT64,
   139  		gc.OLE<<16 | gc.TFLOAT32,
   140  		gc.OLE<<16 | gc.TFLOAT64:
   141  		a = x86.AJCC
   142  
   143  	case gc.OCMP<<16 | gc.TBOOL,
   144  		gc.OCMP<<16 | gc.TINT8,
   145  		gc.OCMP<<16 | gc.TUINT8:
   146  		a = x86.ACMPB
   147  
   148  	case gc.OCMP<<16 | gc.TINT16,
   149  		gc.OCMP<<16 | gc.TUINT16:
   150  		a = x86.ACMPW
   151  
   152  	case gc.OCMP<<16 | gc.TINT32,
   153  		gc.OCMP<<16 | gc.TUINT32,
   154  		gc.OCMP<<16 | gc.TPTR32:
   155  		a = x86.ACMPL
   156  
   157  	case gc.OAS<<16 | gc.TBOOL,
   158  		gc.OAS<<16 | gc.TINT8,
   159  		gc.OAS<<16 | gc.TUINT8:
   160  		a = x86.AMOVB
   161  
   162  	case gc.OAS<<16 | gc.TINT16,
   163  		gc.OAS<<16 | gc.TUINT16:
   164  		a = x86.AMOVW
   165  
   166  	case gc.OAS<<16 | gc.TINT32,
   167  		gc.OAS<<16 | gc.TUINT32,
   168  		gc.OAS<<16 | gc.TPTR32:
   169  		a = x86.AMOVL
   170  
   171  	case gc.OAS<<16 | gc.TFLOAT32:
   172  		a = x86.AMOVSS
   173  
   174  	case gc.OAS<<16 | gc.TFLOAT64:
   175  		a = x86.AMOVSD
   176  
   177  	case gc.OADD<<16 | gc.TINT8,
   178  		gc.OADD<<16 | gc.TUINT8:
   179  		a = x86.AADDB
   180  
   181  	case gc.OADD<<16 | gc.TINT16,
   182  		gc.OADD<<16 | gc.TUINT16:
   183  		a = x86.AADDW
   184  
   185  	case gc.OADD<<16 | gc.TINT32,
   186  		gc.OADD<<16 | gc.TUINT32,
   187  		gc.OADD<<16 | gc.TPTR32:
   188  		a = x86.AADDL
   189  
   190  	case gc.OSUB<<16 | gc.TINT8,
   191  		gc.OSUB<<16 | gc.TUINT8:
   192  		a = x86.ASUBB
   193  
   194  	case gc.OSUB<<16 | gc.TINT16,
   195  		gc.OSUB<<16 | gc.TUINT16:
   196  		a = x86.ASUBW
   197  
   198  	case gc.OSUB<<16 | gc.TINT32,
   199  		gc.OSUB<<16 | gc.TUINT32,
   200  		gc.OSUB<<16 | gc.TPTR32:
   201  		a = x86.ASUBL
   202  
   203  	case gc.OINC<<16 | gc.TINT8,
   204  		gc.OINC<<16 | gc.TUINT8:
   205  		a = x86.AINCB
   206  
   207  	case gc.OINC<<16 | gc.TINT16,
   208  		gc.OINC<<16 | gc.TUINT16:
   209  		a = x86.AINCW
   210  
   211  	case gc.OINC<<16 | gc.TINT32,
   212  		gc.OINC<<16 | gc.TUINT32,
   213  		gc.OINC<<16 | gc.TPTR32:
   214  		a = x86.AINCL
   215  
   216  	case gc.ODEC<<16 | gc.TINT8,
   217  		gc.ODEC<<16 | gc.TUINT8:
   218  		a = x86.ADECB
   219  
   220  	case gc.ODEC<<16 | gc.TINT16,
   221  		gc.ODEC<<16 | gc.TUINT16:
   222  		a = x86.ADECW
   223  
   224  	case gc.ODEC<<16 | gc.TINT32,
   225  		gc.ODEC<<16 | gc.TUINT32,
   226  		gc.ODEC<<16 | gc.TPTR32:
   227  		a = x86.ADECL
   228  
   229  	case gc.OCOM<<16 | gc.TINT8,
   230  		gc.OCOM<<16 | gc.TUINT8:
   231  		a = x86.ANOTB
   232  
   233  	case gc.OCOM<<16 | gc.TINT16,
   234  		gc.OCOM<<16 | gc.TUINT16:
   235  		a = x86.ANOTW
   236  
   237  	case gc.OCOM<<16 | gc.TINT32,
   238  		gc.OCOM<<16 | gc.TUINT32,
   239  		gc.OCOM<<16 | gc.TPTR32:
   240  		a = x86.ANOTL
   241  
   242  	case gc.OMINUS<<16 | gc.TINT8,
   243  		gc.OMINUS<<16 | gc.TUINT8:
   244  		a = x86.ANEGB
   245  
   246  	case gc.OMINUS<<16 | gc.TINT16,
   247  		gc.OMINUS<<16 | gc.TUINT16:
   248  		a = x86.ANEGW
   249  
   250  	case gc.OMINUS<<16 | gc.TINT32,
   251  		gc.OMINUS<<16 | gc.TUINT32,
   252  		gc.OMINUS<<16 | gc.TPTR32:
   253  		a = x86.ANEGL
   254  
   255  	case gc.OAND<<16 | gc.TINT8,
   256  		gc.OAND<<16 | gc.TUINT8:
   257  		a = x86.AANDB
   258  
   259  	case gc.OAND<<16 | gc.TINT16,
   260  		gc.OAND<<16 | gc.TUINT16:
   261  		a = x86.AANDW
   262  
   263  	case gc.OAND<<16 | gc.TINT32,
   264  		gc.OAND<<16 | gc.TUINT32,
   265  		gc.OAND<<16 | gc.TPTR32:
   266  		a = x86.AANDL
   267  
   268  	case gc.OOR<<16 | gc.TINT8,
   269  		gc.OOR<<16 | gc.TUINT8:
   270  		a = x86.AORB
   271  
   272  	case gc.OOR<<16 | gc.TINT16,
   273  		gc.OOR<<16 | gc.TUINT16:
   274  		a = x86.AORW
   275  
   276  	case gc.OOR<<16 | gc.TINT32,
   277  		gc.OOR<<16 | gc.TUINT32,
   278  		gc.OOR<<16 | gc.TPTR32:
   279  		a = x86.AORL
   280  
   281  	case gc.OXOR<<16 | gc.TINT8,
   282  		gc.OXOR<<16 | gc.TUINT8:
   283  		a = x86.AXORB
   284  
   285  	case gc.OXOR<<16 | gc.TINT16,
   286  		gc.OXOR<<16 | gc.TUINT16:
   287  		a = x86.AXORW
   288  
   289  	case gc.OXOR<<16 | gc.TINT32,
   290  		gc.OXOR<<16 | gc.TUINT32,
   291  		gc.OXOR<<16 | gc.TPTR32:
   292  		a = x86.AXORL
   293  
   294  	case gc.OLROT<<16 | gc.TINT8,
   295  		gc.OLROT<<16 | gc.TUINT8:
   296  		a = x86.AROLB
   297  
   298  	case gc.OLROT<<16 | gc.TINT16,
   299  		gc.OLROT<<16 | gc.TUINT16:
   300  		a = x86.AROLW
   301  
   302  	case gc.OLROT<<16 | gc.TINT32,
   303  		gc.OLROT<<16 | gc.TUINT32,
   304  		gc.OLROT<<16 | gc.TPTR32:
   305  		a = x86.AROLL
   306  
   307  	case gc.OLSH<<16 | gc.TINT8,
   308  		gc.OLSH<<16 | gc.TUINT8:
   309  		a = x86.ASHLB
   310  
   311  	case gc.OLSH<<16 | gc.TINT16,
   312  		gc.OLSH<<16 | gc.TUINT16:
   313  		a = x86.ASHLW
   314  
   315  	case gc.OLSH<<16 | gc.TINT32,
   316  		gc.OLSH<<16 | gc.TUINT32,
   317  		gc.OLSH<<16 | gc.TPTR32:
   318  		a = x86.ASHLL
   319  
   320  	case gc.ORSH<<16 | gc.TUINT8:
   321  		a = x86.ASHRB
   322  
   323  	case gc.ORSH<<16 | gc.TUINT16:
   324  		a = x86.ASHRW
   325  
   326  	case gc.ORSH<<16 | gc.TUINT32,
   327  		gc.ORSH<<16 | gc.TPTR32:
   328  		a = x86.ASHRL
   329  
   330  	case gc.ORSH<<16 | gc.TINT8:
   331  		a = x86.ASARB
   332  
   333  	case gc.ORSH<<16 | gc.TINT16:
   334  		a = x86.ASARW
   335  
   336  	case gc.ORSH<<16 | gc.TINT32:
   337  		a = x86.ASARL
   338  
   339  	case gc.OHMUL<<16 | gc.TINT8,
   340  		gc.OMUL<<16 | gc.TINT8,
   341  		gc.OMUL<<16 | gc.TUINT8:
   342  		a = x86.AIMULB
   343  
   344  	case gc.OHMUL<<16 | gc.TINT16,
   345  		gc.OMUL<<16 | gc.TINT16,
   346  		gc.OMUL<<16 | gc.TUINT16:
   347  		a = x86.AIMULW
   348  
   349  	case gc.OHMUL<<16 | gc.TINT32,
   350  		gc.OMUL<<16 | gc.TINT32,
   351  		gc.OMUL<<16 | gc.TUINT32,
   352  		gc.OMUL<<16 | gc.TPTR32:
   353  		a = x86.AIMULL
   354  
   355  	case gc.OHMUL<<16 | gc.TUINT8:
   356  		a = x86.AMULB
   357  
   358  	case gc.OHMUL<<16 | gc.TUINT16:
   359  		a = x86.AMULW
   360  
   361  	case gc.OHMUL<<16 | gc.TUINT32,
   362  		gc.OHMUL<<16 | gc.TPTR32:
   363  		a = x86.AMULL
   364  
   365  	case gc.ODIV<<16 | gc.TINT8,
   366  		gc.OMOD<<16 | gc.TINT8:
   367  		a = x86.AIDIVB
   368  
   369  	case gc.ODIV<<16 | gc.TUINT8,
   370  		gc.OMOD<<16 | gc.TUINT8:
   371  		a = x86.ADIVB
   372  
   373  	case gc.ODIV<<16 | gc.TINT16,
   374  		gc.OMOD<<16 | gc.TINT16:
   375  		a = x86.AIDIVW
   376  
   377  	case gc.ODIV<<16 | gc.TUINT16,
   378  		gc.OMOD<<16 | gc.TUINT16:
   379  		a = x86.ADIVW
   380  
   381  	case gc.ODIV<<16 | gc.TINT32,
   382  		gc.OMOD<<16 | gc.TINT32:
   383  		a = x86.AIDIVL
   384  
   385  	case gc.ODIV<<16 | gc.TUINT32,
   386  		gc.ODIV<<16 | gc.TPTR32,
   387  		gc.OMOD<<16 | gc.TUINT32,
   388  		gc.OMOD<<16 | gc.TPTR32:
   389  		a = x86.ADIVL
   390  
   391  	case gc.OEXTEND<<16 | gc.TINT16:
   392  		a = x86.ACWD
   393  
   394  	case gc.OEXTEND<<16 | gc.TINT32:
   395  		a = x86.ACDQ
   396  	}
   397  
   398  	return a
   399  }
   400  
   401  func foptoas(op int, t *gc.Type, flg int) int {
   402  	a := obj.AXXX
   403  	et := int(gc.Simtype[t.Etype])
   404  
   405  	if !gc.Thearch.Use387 {
   406  		switch uint32(op)<<16 | uint32(et) {
   407  		default:
   408  			gc.Fatal("foptoas-sse: no entry %v-%v", gc.Oconv(int(op), 0), gc.Tconv(t, 0))
   409  
   410  		case gc.OCMP<<16 | gc.TFLOAT32:
   411  			a = x86.AUCOMISS
   412  
   413  		case gc.OCMP<<16 | gc.TFLOAT64:
   414  			a = x86.AUCOMISD
   415  
   416  		case gc.OAS<<16 | gc.TFLOAT32:
   417  			a = x86.AMOVSS
   418  
   419  		case gc.OAS<<16 | gc.TFLOAT64:
   420  			a = x86.AMOVSD
   421  
   422  		case gc.OADD<<16 | gc.TFLOAT32:
   423  			a = x86.AADDSS
   424  
   425  		case gc.OADD<<16 | gc.TFLOAT64:
   426  			a = x86.AADDSD
   427  
   428  		case gc.OSUB<<16 | gc.TFLOAT32:
   429  			a = x86.ASUBSS
   430  
   431  		case gc.OSUB<<16 | gc.TFLOAT64:
   432  			a = x86.ASUBSD
   433  
   434  		case gc.OMUL<<16 | gc.TFLOAT32:
   435  			a = x86.AMULSS
   436  
   437  		case gc.OMUL<<16 | gc.TFLOAT64:
   438  			a = x86.AMULSD
   439  
   440  		case gc.ODIV<<16 | gc.TFLOAT32:
   441  			a = x86.ADIVSS
   442  
   443  		case gc.ODIV<<16 | gc.TFLOAT64:
   444  			a = x86.ADIVSD
   445  		}
   446  
   447  		return a
   448  	}
   449  
   450  	// If we need Fpop, it means we're working on
   451  	// two different floating-point registers, not memory.
   452  	// There the instruction only has a float64 form.
   453  	if flg&Fpop != 0 {
   454  		et = gc.TFLOAT64
   455  	}
   456  
   457  	// clear Frev if unneeded
   458  	switch op {
   459  	case gc.OADD,
   460  		gc.OMUL:
   461  		flg &^= Frev
   462  	}
   463  
   464  	switch uint32(op)<<16 | (uint32(et)<<8 | uint32(flg)) {
   465  	case gc.OADD<<16 | (gc.TFLOAT32<<8 | 0):
   466  		return x86.AFADDF
   467  
   468  	case gc.OADD<<16 | (gc.TFLOAT64<<8 | 0):
   469  		return x86.AFADDD
   470  
   471  	case gc.OADD<<16 | (gc.TFLOAT64<<8 | Fpop):
   472  		return x86.AFADDDP
   473  
   474  	case gc.OSUB<<16 | (gc.TFLOAT32<<8 | 0):
   475  		return x86.AFSUBF
   476  
   477  	case gc.OSUB<<16 | (gc.TFLOAT32<<8 | Frev):
   478  		return x86.AFSUBRF
   479  
   480  	case gc.OSUB<<16 | (gc.TFLOAT64<<8 | 0):
   481  		return x86.AFSUBD
   482  
   483  	case gc.OSUB<<16 | (gc.TFLOAT64<<8 | Frev):
   484  		return x86.AFSUBRD
   485  
   486  	case gc.OSUB<<16 | (gc.TFLOAT64<<8 | Fpop):
   487  		return x86.AFSUBDP
   488  
   489  	case gc.OSUB<<16 | (gc.TFLOAT64<<8 | (Fpop | Frev)):
   490  		return x86.AFSUBRDP
   491  
   492  	case gc.OMUL<<16 | (gc.TFLOAT32<<8 | 0):
   493  		return x86.AFMULF
   494  
   495  	case gc.OMUL<<16 | (gc.TFLOAT64<<8 | 0):
   496  		return x86.AFMULD
   497  
   498  	case gc.OMUL<<16 | (gc.TFLOAT64<<8 | Fpop):
   499  		return x86.AFMULDP
   500  
   501  	case gc.ODIV<<16 | (gc.TFLOAT32<<8 | 0):
   502  		return x86.AFDIVF
   503  
   504  	case gc.ODIV<<16 | (gc.TFLOAT32<<8 | Frev):
   505  		return x86.AFDIVRF
   506  
   507  	case gc.ODIV<<16 | (gc.TFLOAT64<<8 | 0):
   508  		return x86.AFDIVD
   509  
   510  	case gc.ODIV<<16 | (gc.TFLOAT64<<8 | Frev):
   511  		return x86.AFDIVRD
   512  
   513  	case gc.ODIV<<16 | (gc.TFLOAT64<<8 | Fpop):
   514  		return x86.AFDIVDP
   515  
   516  	case gc.ODIV<<16 | (gc.TFLOAT64<<8 | (Fpop | Frev)):
   517  		return x86.AFDIVRDP
   518  
   519  	case gc.OCMP<<16 | (gc.TFLOAT32<<8 | 0):
   520  		return x86.AFCOMF
   521  
   522  	case gc.OCMP<<16 | (gc.TFLOAT32<<8 | Fpop):
   523  		return x86.AFCOMFP
   524  
   525  	case gc.OCMP<<16 | (gc.TFLOAT64<<8 | 0):
   526  		return x86.AFCOMD
   527  
   528  	case gc.OCMP<<16 | (gc.TFLOAT64<<8 | Fpop):
   529  		return x86.AFCOMDP
   530  
   531  	case gc.OCMP<<16 | (gc.TFLOAT64<<8 | Fpop2):
   532  		return x86.AFCOMDPP
   533  
   534  	case gc.OMINUS<<16 | (gc.TFLOAT32<<8 | 0):
   535  		return x86.AFCHS
   536  
   537  	case gc.OMINUS<<16 | (gc.TFLOAT64<<8 | 0):
   538  		return x86.AFCHS
   539  	}
   540  
   541  	gc.Fatal("foptoas %v %v %#x", gc.Oconv(int(op), 0), gc.Tconv(t, 0), flg)
   542  	return 0
   543  }
   544  
   545  var resvd = []int{
   546  	//	REG_DI,	// for movstring
   547  	//	REG_SI,	// for movstring
   548  
   549  	x86.REG_AX, // for divide
   550  	x86.REG_CX, // for shift
   551  	x86.REG_DX, // for divide
   552  	x86.REG_SP, // for stack
   553  }
   554  
   555  /*
   556   * generate
   557   *	as $c, reg
   558   */
   559  func gconreg(as int, c int64, reg int) {
   560  	var n1 gc.Node
   561  	var n2 gc.Node
   562  
   563  	gc.Nodconst(&n1, gc.Types[gc.TINT64], c)
   564  	gc.Nodreg(&n2, gc.Types[gc.TINT64], reg)
   565  	gins(as, &n1, &n2)
   566  }
   567  
   568  /*
   569   * generate
   570   *	as $c, n
   571   */
   572  func ginscon(as int, c int64, n2 *gc.Node) {
   573  	var n1 gc.Node
   574  	gc.Nodconst(&n1, gc.Types[gc.TINT32], c)
   575  	gins(as, &n1, n2)
   576  }
   577  
   578  /*
   579   * swap node contents
   580   */
   581  func nswap(a *gc.Node, b *gc.Node) {
   582  	t := *a
   583  	*a = *b
   584  	*b = t
   585  }
   586  
   587  /*
   588   * return constant i node.
   589   * overwritten by next call, but useful in calls to gins.
   590   */
   591  
   592  var ncon_n gc.Node
   593  
   594  func ncon(i uint32) *gc.Node {
   595  	if ncon_n.Type == nil {
   596  		gc.Nodconst(&ncon_n, gc.Types[gc.TUINT32], 0)
   597  	}
   598  	gc.Mpmovecfix(ncon_n.Val.U.Xval, int64(i))
   599  	return &ncon_n
   600  }
   601  
   602  var sclean [10]gc.Node
   603  
   604  var nsclean int
   605  
   606  /*
   607   * n is a 64-bit value.  fill in lo and hi to refer to its 32-bit halves.
   608   */
   609  func split64(n *gc.Node, lo *gc.Node, hi *gc.Node) {
   610  	if !gc.Is64(n.Type) {
   611  		gc.Fatal("split64 %v", gc.Tconv(n.Type, 0))
   612  	}
   613  
   614  	if nsclean >= len(sclean) {
   615  		gc.Fatal("split64 clean")
   616  	}
   617  	sclean[nsclean].Op = gc.OEMPTY
   618  	nsclean++
   619  	switch n.Op {
   620  	default:
   621  		switch n.Op {
   622  		default:
   623  			var n1 gc.Node
   624  			if !dotaddable(n, &n1) {
   625  				gc.Igen(n, &n1, nil)
   626  				sclean[nsclean-1] = n1
   627  			}
   628  
   629  			n = &n1
   630  
   631  		case gc.ONAME:
   632  			if n.Class == gc.PPARAMREF {
   633  				var n1 gc.Node
   634  				gc.Cgen(n.Heapaddr, &n1)
   635  				sclean[nsclean-1] = n1
   636  				n = &n1
   637  			}
   638  
   639  			// nothing
   640  		case gc.OINDREG:
   641  			break
   642  		}
   643  
   644  		*lo = *n
   645  		*hi = *n
   646  		lo.Type = gc.Types[gc.TUINT32]
   647  		if n.Type.Etype == gc.TINT64 {
   648  			hi.Type = gc.Types[gc.TINT32]
   649  		} else {
   650  			hi.Type = gc.Types[gc.TUINT32]
   651  		}
   652  		hi.Xoffset += 4
   653  
   654  	case gc.OLITERAL:
   655  		var n1 gc.Node
   656  		gc.Convconst(&n1, n.Type, &n.Val)
   657  		i := gc.Mpgetfix(n1.Val.U.Xval)
   658  		gc.Nodconst(lo, gc.Types[gc.TUINT32], int64(uint32(i)))
   659  		i >>= 32
   660  		if n.Type.Etype == gc.TINT64 {
   661  			gc.Nodconst(hi, gc.Types[gc.TINT32], int64(int32(i)))
   662  		} else {
   663  			gc.Nodconst(hi, gc.Types[gc.TUINT32], int64(uint32(i)))
   664  		}
   665  	}
   666  }
   667  
   668  func splitclean() {
   669  	if nsclean <= 0 {
   670  		gc.Fatal("splitclean")
   671  	}
   672  	nsclean--
   673  	if sclean[nsclean].Op != gc.OEMPTY {
   674  		gc.Regfree(&sclean[nsclean])
   675  	}
   676  }
   677  
   678  /*
   679   * set up nodes representing fp constants
   680   */
   681  var zerof gc.Node
   682  
   683  var two64f gc.Node
   684  
   685  var two63f gc.Node
   686  
   687  var bignodes_did int
   688  
   689  func bignodes() {
   690  	if bignodes_did != 0 {
   691  		return
   692  	}
   693  	bignodes_did = 1
   694  
   695  	two64f = *ncon(0)
   696  	two64f.Type = gc.Types[gc.TFLOAT64]
   697  	two64f.Val.Ctype = gc.CTFLT
   698  	two64f.Val.U.Fval = new(gc.Mpflt)
   699  	gc.Mpmovecflt(two64f.Val.U.Fval, 18446744073709551616.)
   700  
   701  	two63f = two64f
   702  	two63f.Val.U.Fval = new(gc.Mpflt)
   703  	gc.Mpmovecflt(two63f.Val.U.Fval, 9223372036854775808.)
   704  
   705  	zerof = two64f
   706  	zerof.Val.U.Fval = new(gc.Mpflt)
   707  	gc.Mpmovecflt(zerof.Val.U.Fval, 0)
   708  }
   709  
   710  func memname(n *gc.Node, t *gc.Type) {
   711  	gc.Tempname(n, t)
   712  	n.Sym = gc.Lookup("." + n.Sym.Name[1:]) // keep optimizer from registerizing
   713  	n.Orig.Sym = n.Sym
   714  }
   715  
   716  func gmove(f *gc.Node, t *gc.Node) {
   717  	if gc.Debug['M'] != 0 {
   718  		fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, 0), gc.Nconv(t, 0))
   719  	}
   720  
   721  	ft := gc.Simsimtype(f.Type)
   722  	tt := gc.Simsimtype(t.Type)
   723  	cvt := t.Type
   724  
   725  	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
   726  		gc.Complexmove(f, t)
   727  		return
   728  	}
   729  
   730  	if gc.Isfloat[ft] || gc.Isfloat[tt] {
   731  		floatmove(f, t)
   732  		return
   733  	}
   734  
   735  	// cannot have two integer memory operands;
   736  	// except 64-bit, which always copies via registers anyway.
   737  	var r1 gc.Node
   738  	var a int
   739  	if gc.Isint[ft] && gc.Isint[tt] && !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) {
   740  		goto hard
   741  	}
   742  
   743  	// convert constant to desired type
   744  	if f.Op == gc.OLITERAL {
   745  		var con gc.Node
   746  		gc.Convconst(&con, t.Type, &f.Val)
   747  		f = &con
   748  		ft = gc.Simsimtype(con.Type)
   749  	}
   750  
   751  	// value -> value copy, only one memory operand.
   752  	// figure out the instruction to use.
   753  	// break out of switch for one-instruction gins.
   754  	// goto rdst for "destination must be register".
   755  	// goto hard for "convert to cvt type first".
   756  	// otherwise handle and return.
   757  
   758  	switch uint32(ft)<<16 | uint32(tt) {
   759  	default:
   760  		// should not happen
   761  		gc.Fatal("gmove %v -> %v", gc.Nconv(f, 0), gc.Nconv(t, 0))
   762  		return
   763  
   764  		/*
   765  		 * integer copy and truncate
   766  		 */
   767  	case gc.TINT8<<16 | gc.TINT8, // same size
   768  		gc.TINT8<<16 | gc.TUINT8,
   769  		gc.TUINT8<<16 | gc.TINT8,
   770  		gc.TUINT8<<16 | gc.TUINT8:
   771  		a = x86.AMOVB
   772  
   773  	case gc.TINT16<<16 | gc.TINT8, // truncate
   774  		gc.TUINT16<<16 | gc.TINT8,
   775  		gc.TINT32<<16 | gc.TINT8,
   776  		gc.TUINT32<<16 | gc.TINT8,
   777  		gc.TINT16<<16 | gc.TUINT8,
   778  		gc.TUINT16<<16 | gc.TUINT8,
   779  		gc.TINT32<<16 | gc.TUINT8,
   780  		gc.TUINT32<<16 | gc.TUINT8:
   781  		a = x86.AMOVB
   782  
   783  		goto rsrc
   784  
   785  	case gc.TINT64<<16 | gc.TINT8, // truncate low word
   786  		gc.TUINT64<<16 | gc.TINT8,
   787  		gc.TINT64<<16 | gc.TUINT8,
   788  		gc.TUINT64<<16 | gc.TUINT8:
   789  		var flo gc.Node
   790  		var fhi gc.Node
   791  		split64(f, &flo, &fhi)
   792  
   793  		var r1 gc.Node
   794  		gc.Nodreg(&r1, t.Type, x86.REG_AX)
   795  		gmove(&flo, &r1)
   796  		gins(x86.AMOVB, &r1, t)
   797  		splitclean()
   798  		return
   799  
   800  	case gc.TINT16<<16 | gc.TINT16, // same size
   801  		gc.TINT16<<16 | gc.TUINT16,
   802  		gc.TUINT16<<16 | gc.TINT16,
   803  		gc.TUINT16<<16 | gc.TUINT16:
   804  		a = x86.AMOVW
   805  
   806  	case gc.TINT32<<16 | gc.TINT16, // truncate
   807  		gc.TUINT32<<16 | gc.TINT16,
   808  		gc.TINT32<<16 | gc.TUINT16,
   809  		gc.TUINT32<<16 | gc.TUINT16:
   810  		a = x86.AMOVW
   811  
   812  		goto rsrc
   813  
   814  	case gc.TINT64<<16 | gc.TINT16, // truncate low word
   815  		gc.TUINT64<<16 | gc.TINT16,
   816  		gc.TINT64<<16 | gc.TUINT16,
   817  		gc.TUINT64<<16 | gc.TUINT16:
   818  		var flo gc.Node
   819  		var fhi gc.Node
   820  		split64(f, &flo, &fhi)
   821  
   822  		var r1 gc.Node
   823  		gc.Nodreg(&r1, t.Type, x86.REG_AX)
   824  		gmove(&flo, &r1)
   825  		gins(x86.AMOVW, &r1, t)
   826  		splitclean()
   827  		return
   828  
   829  	case gc.TINT32<<16 | gc.TINT32, // same size
   830  		gc.TINT32<<16 | gc.TUINT32,
   831  		gc.TUINT32<<16 | gc.TINT32,
   832  		gc.TUINT32<<16 | gc.TUINT32:
   833  		a = x86.AMOVL
   834  
   835  	case gc.TINT64<<16 | gc.TINT32, // truncate
   836  		gc.TUINT64<<16 | gc.TINT32,
   837  		gc.TINT64<<16 | gc.TUINT32,
   838  		gc.TUINT64<<16 | gc.TUINT32:
   839  		var fhi gc.Node
   840  		var flo gc.Node
   841  		split64(f, &flo, &fhi)
   842  
   843  		var r1 gc.Node
   844  		gc.Nodreg(&r1, t.Type, x86.REG_AX)
   845  		gmove(&flo, &r1)
   846  		gins(x86.AMOVL, &r1, t)
   847  		splitclean()
   848  		return
   849  
   850  	case gc.TINT64<<16 | gc.TINT64, // same size
   851  		gc.TINT64<<16 | gc.TUINT64,
   852  		gc.TUINT64<<16 | gc.TINT64,
   853  		gc.TUINT64<<16 | gc.TUINT64:
   854  		var fhi gc.Node
   855  		var flo gc.Node
   856  		split64(f, &flo, &fhi)
   857  
   858  		var tlo gc.Node
   859  		var thi gc.Node
   860  		split64(t, &tlo, &thi)
   861  		if f.Op == gc.OLITERAL {
   862  			gins(x86.AMOVL, &flo, &tlo)
   863  			gins(x86.AMOVL, &fhi, &thi)
   864  		} else {
   865  			var r1 gc.Node
   866  			gc.Nodreg(&r1, gc.Types[gc.TUINT32], x86.REG_AX)
   867  			var r2 gc.Node
   868  			gc.Nodreg(&r2, gc.Types[gc.TUINT32], x86.REG_DX)
   869  			gins(x86.AMOVL, &flo, &r1)
   870  			gins(x86.AMOVL, &fhi, &r2)
   871  			gins(x86.AMOVL, &r1, &tlo)
   872  			gins(x86.AMOVL, &r2, &thi)
   873  		}
   874  
   875  		splitclean()
   876  		splitclean()
   877  		return
   878  
   879  		/*
   880  		 * integer up-conversions
   881  		 */
   882  	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
   883  		gc.TINT8<<16 | gc.TUINT16:
   884  		a = x86.AMOVBWSX
   885  
   886  		goto rdst
   887  
   888  	case gc.TINT8<<16 | gc.TINT32,
   889  		gc.TINT8<<16 | gc.TUINT32:
   890  		a = x86.AMOVBLSX
   891  		goto rdst
   892  
   893  	case gc.TINT8<<16 | gc.TINT64, // convert via int32
   894  		gc.TINT8<<16 | gc.TUINT64:
   895  		cvt = gc.Types[gc.TINT32]
   896  
   897  		goto hard
   898  
   899  	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
   900  		gc.TUINT8<<16 | gc.TUINT16:
   901  		a = x86.AMOVBWZX
   902  
   903  		goto rdst
   904  
   905  	case gc.TUINT8<<16 | gc.TINT32,
   906  		gc.TUINT8<<16 | gc.TUINT32:
   907  		a = x86.AMOVBLZX
   908  		goto rdst
   909  
   910  	case gc.TUINT8<<16 | gc.TINT64, // convert via uint32
   911  		gc.TUINT8<<16 | gc.TUINT64:
   912  		cvt = gc.Types[gc.TUINT32]
   913  
   914  		goto hard
   915  
   916  	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
   917  		gc.TINT16<<16 | gc.TUINT32:
   918  		a = x86.AMOVWLSX
   919  
   920  		goto rdst
   921  
   922  	case gc.TINT16<<16 | gc.TINT64, // convert via int32
   923  		gc.TINT16<<16 | gc.TUINT64:
   924  		cvt = gc.Types[gc.TINT32]
   925  
   926  		goto hard
   927  
   928  	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
   929  		gc.TUINT16<<16 | gc.TUINT32:
   930  		a = x86.AMOVWLZX
   931  
   932  		goto rdst
   933  
   934  	case gc.TUINT16<<16 | gc.TINT64, // convert via uint32
   935  		gc.TUINT16<<16 | gc.TUINT64:
   936  		cvt = gc.Types[gc.TUINT32]
   937  
   938  		goto hard
   939  
   940  	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
   941  		gc.TINT32<<16 | gc.TUINT64:
   942  		var thi gc.Node
   943  		var tlo gc.Node
   944  		split64(t, &tlo, &thi)
   945  
   946  		var flo gc.Node
   947  		gc.Nodreg(&flo, tlo.Type, x86.REG_AX)
   948  		var fhi gc.Node
   949  		gc.Nodreg(&fhi, thi.Type, x86.REG_DX)
   950  		gmove(f, &flo)
   951  		gins(x86.ACDQ, nil, nil)
   952  		gins(x86.AMOVL, &flo, &tlo)
   953  		gins(x86.AMOVL, &fhi, &thi)
   954  		splitclean()
   955  		return
   956  
   957  	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
   958  		gc.TUINT32<<16 | gc.TUINT64:
   959  		var tlo gc.Node
   960  		var thi gc.Node
   961  		split64(t, &tlo, &thi)
   962  
   963  		gmove(f, &tlo)
   964  		gins(x86.AMOVL, ncon(0), &thi)
   965  		splitclean()
   966  		return
   967  	}
   968  
   969  	gins(a, f, t)
   970  	return
   971  
   972  	// requires register source
   973  rsrc:
   974  	gc.Regalloc(&r1, f.Type, t)
   975  
   976  	gmove(f, &r1)
   977  	gins(a, &r1, t)
   978  	gc.Regfree(&r1)
   979  	return
   980  
   981  	// requires register destination
   982  rdst:
   983  	{
   984  		gc.Regalloc(&r1, t.Type, t)
   985  
   986  		gins(a, f, &r1)
   987  		gmove(&r1, t)
   988  		gc.Regfree(&r1)
   989  		return
   990  	}
   991  
   992  	// requires register intermediate
   993  hard:
   994  	gc.Regalloc(&r1, cvt, t)
   995  
   996  	gmove(f, &r1)
   997  	gmove(&r1, t)
   998  	gc.Regfree(&r1)
   999  	return
  1000  }
  1001  
  1002  func floatmove(f *gc.Node, t *gc.Node) {
  1003  	var r1 gc.Node
  1004  
  1005  	ft := gc.Simsimtype(f.Type)
  1006  	tt := gc.Simsimtype(t.Type)
  1007  	cvt := t.Type
  1008  
  1009  	// cannot have two floating point memory operands.
  1010  	if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) {
  1011  		goto hard
  1012  	}
  1013  
  1014  	// convert constant to desired type
  1015  	if f.Op == gc.OLITERAL {
  1016  		var con gc.Node
  1017  		gc.Convconst(&con, t.Type, &f.Val)
  1018  		f = &con
  1019  		ft = gc.Simsimtype(con.Type)
  1020  
  1021  		// some constants can't move directly to memory.
  1022  		if gc.Ismem(t) {
  1023  			// float constants come from memory.
  1024  			if gc.Isfloat[tt] {
  1025  				goto hard
  1026  			}
  1027  		}
  1028  	}
  1029  
  1030  	// value -> value copy, only one memory operand.
  1031  	// figure out the instruction to use.
  1032  	// break out of switch for one-instruction gins.
  1033  	// goto rdst for "destination must be register".
  1034  	// goto hard for "convert to cvt type first".
  1035  	// otherwise handle and return.
  1036  
  1037  	switch uint32(ft)<<16 | uint32(tt) {
  1038  	default:
  1039  		if gc.Thearch.Use387 {
  1040  			floatmove_387(f, t)
  1041  		} else {
  1042  			floatmove_sse(f, t)
  1043  		}
  1044  		return
  1045  
  1046  		// float to very long integer.
  1047  	case gc.TFLOAT32<<16 | gc.TINT64,
  1048  		gc.TFLOAT64<<16 | gc.TINT64:
  1049  		if f.Op == gc.OREGISTER {
  1050  			cvt = f.Type
  1051  			goto hardmem
  1052  		}
  1053  
  1054  		var r1 gc.Node
  1055  		gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0)
  1056  		if ft == gc.TFLOAT32 {
  1057  			gins(x86.AFMOVF, f, &r1)
  1058  		} else {
  1059  			gins(x86.AFMOVD, f, &r1)
  1060  		}
  1061  
  1062  		// set round to zero mode during conversion
  1063  		var t1 gc.Node
  1064  		memname(&t1, gc.Types[gc.TUINT16])
  1065  
  1066  		var t2 gc.Node
  1067  		memname(&t2, gc.Types[gc.TUINT16])
  1068  		gins(x86.AFSTCW, nil, &t1)
  1069  		gins(x86.AMOVW, ncon(0xf7f), &t2)
  1070  		gins(x86.AFLDCW, &t2, nil)
  1071  		if tt == gc.TINT16 {
  1072  			gins(x86.AFMOVWP, &r1, t)
  1073  		} else if tt == gc.TINT32 {
  1074  			gins(x86.AFMOVLP, &r1, t)
  1075  		} else {
  1076  			gins(x86.AFMOVVP, &r1, t)
  1077  		}
  1078  		gins(x86.AFLDCW, &t1, nil)
  1079  		return
  1080  
  1081  	case gc.TFLOAT32<<16 | gc.TUINT64,
  1082  		gc.TFLOAT64<<16 | gc.TUINT64:
  1083  		if !gc.Ismem(f) {
  1084  			cvt = f.Type
  1085  			goto hardmem
  1086  		}
  1087  
  1088  		bignodes()
  1089  		var f0 gc.Node
  1090  		gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0)
  1091  		var f1 gc.Node
  1092  		gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1)
  1093  		var ax gc.Node
  1094  		gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)
  1095  
  1096  		if ft == gc.TFLOAT32 {
  1097  			gins(x86.AFMOVF, f, &f0)
  1098  		} else {
  1099  			gins(x86.AFMOVD, f, &f0)
  1100  		}
  1101  
  1102  		// if 0 > v { answer = 0 }
  1103  		gins(x86.AFMOVD, &zerof, &f0)
  1104  
  1105  		gins(x86.AFUCOMIP, &f0, &f1)
  1106  		p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)
  1107  
  1108  		// if 1<<64 <= v { answer = 0 too }
  1109  		gins(x86.AFMOVD, &two64f, &f0)
  1110  
  1111  		gins(x86.AFUCOMIP, &f0, &f1)
  1112  		p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)
  1113  		gc.Patch(p1, gc.Pc)
  1114  		gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack
  1115  		var thi gc.Node
  1116  		var tlo gc.Node
  1117  		split64(t, &tlo, &thi)
  1118  		gins(x86.AMOVL, ncon(0), &tlo)
  1119  		gins(x86.AMOVL, ncon(0), &thi)
  1120  		splitclean()
  1121  		p1 = gc.Gbranch(obj.AJMP, nil, 0)
  1122  		gc.Patch(p2, gc.Pc)
  1123  
  1124  		// in range; algorithm is:
  1125  		//	if small enough, use native float64 -> int64 conversion.
  1126  		//	otherwise, subtract 2^63, convert, and add it back.
  1127  
  1128  		// set round to zero mode during conversion
  1129  		var t1 gc.Node
  1130  		memname(&t1, gc.Types[gc.TUINT16])
  1131  
  1132  		var t2 gc.Node
  1133  		memname(&t2, gc.Types[gc.TUINT16])
  1134  		gins(x86.AFSTCW, nil, &t1)
  1135  		gins(x86.AMOVW, ncon(0xf7f), &t2)
  1136  		gins(x86.AFLDCW, &t2, nil)
  1137  
  1138  		// actual work
  1139  		gins(x86.AFMOVD, &two63f, &f0)
  1140  
  1141  		gins(x86.AFUCOMIP, &f0, &f1)
  1142  		p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0)
  1143  		gins(x86.AFMOVVP, &f0, t)
  1144  		p3 := gc.Gbranch(obj.AJMP, nil, 0)
  1145  		gc.Patch(p2, gc.Pc)
  1146  		gins(x86.AFMOVD, &two63f, &f0)
  1147  		gins(x86.AFSUBDP, &f0, &f1)
  1148  		gins(x86.AFMOVVP, &f0, t)
  1149  		split64(t, &tlo, &thi)
  1150  		gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63
  1151  		gc.Patch(p3, gc.Pc)
  1152  		splitclean()
  1153  
  1154  		// restore rounding mode
  1155  		gins(x86.AFLDCW, &t1, nil)
  1156  
  1157  		gc.Patch(p1, gc.Pc)
  1158  		return
  1159  
  1160  		/*
  1161  		 * integer to float
  1162  		 */
  1163  	case gc.TINT64<<16 | gc.TFLOAT32,
  1164  		gc.TINT64<<16 | gc.TFLOAT64:
  1165  		if t.Op == gc.OREGISTER {
  1166  			goto hardmem
  1167  		}
  1168  		var f0 gc.Node
  1169  		gc.Nodreg(&f0, t.Type, x86.REG_F0)
  1170  		gins(x86.AFMOVV, f, &f0)
  1171  		if tt == gc.TFLOAT32 {
  1172  			gins(x86.AFMOVFP, &f0, t)
  1173  		} else {
  1174  			gins(x86.AFMOVDP, &f0, t)
  1175  		}
  1176  		return
  1177  
  1178  		// algorithm is:
  1179  	//	if small enough, use native int64 -> float64 conversion.
  1180  	//	otherwise, halve (rounding to odd?), convert, and double.
  1181  	case gc.TUINT64<<16 | gc.TFLOAT32,
  1182  		gc.TUINT64<<16 | gc.TFLOAT64:
  1183  		var ax gc.Node
  1184  		gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX)
  1185  
  1186  		var dx gc.Node
  1187  		gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX)
  1188  		var cx gc.Node
  1189  		gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
  1190  		var t1 gc.Node
  1191  		gc.Tempname(&t1, f.Type)
  1192  		var tlo gc.Node
  1193  		var thi gc.Node
  1194  		split64(&t1, &tlo, &thi)
  1195  		gmove(f, &t1)
  1196  		gins(x86.ACMPL, &thi, ncon(0))
  1197  		p1 := gc.Gbranch(x86.AJLT, nil, 0)
  1198  
  1199  		// native
  1200  		var r1 gc.Node
  1201  		gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)
  1202  
  1203  		gins(x86.AFMOVV, &t1, &r1)
  1204  		if tt == gc.TFLOAT32 {
  1205  			gins(x86.AFMOVFP, &r1, t)
  1206  		} else {
  1207  			gins(x86.AFMOVDP, &r1, t)
  1208  		}
  1209  		p2 := gc.Gbranch(obj.AJMP, nil, 0)
  1210  
  1211  		// simulated
  1212  		gc.Patch(p1, gc.Pc)
  1213  
  1214  		gmove(&tlo, &ax)
  1215  		gmove(&thi, &dx)
  1216  		p1 = gins(x86.ASHRL, ncon(1), &ax)
  1217  		p1.From.Index = x86.REG_DX // double-width shift DX -> AX
  1218  		p1.From.Scale = 0
  1219  		gins(x86.AMOVL, ncon(0), &cx)
  1220  		gins(x86.ASETCC, nil, &cx)
  1221  		gins(x86.AORL, &cx, &ax)
  1222  		gins(x86.ASHRL, ncon(1), &dx)
  1223  		gmove(&dx, &thi)
  1224  		gmove(&ax, &tlo)
  1225  		gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)
  1226  		var r2 gc.Node
  1227  		gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1)
  1228  		gins(x86.AFMOVV, &t1, &r1)
  1229  		gins(x86.AFMOVD, &r1, &r1)
  1230  		gins(x86.AFADDDP, &r1, &r2)
  1231  		if tt == gc.TFLOAT32 {
  1232  			gins(x86.AFMOVFP, &r1, t)
  1233  		} else {
  1234  			gins(x86.AFMOVDP, &r1, t)
  1235  		}
  1236  		gc.Patch(p2, gc.Pc)
  1237  		splitclean()
  1238  		return
  1239  	}
  1240  
  1241  	// requires register intermediate
  1242  hard:
  1243  	gc.Regalloc(&r1, cvt, t)
  1244  
  1245  	gmove(f, &r1)
  1246  	gmove(&r1, t)
  1247  	gc.Regfree(&r1)
  1248  	return
  1249  
  1250  	// requires memory intermediate
  1251  hardmem:
  1252  	gc.Tempname(&r1, cvt)
  1253  
  1254  	gmove(f, &r1)
  1255  	gmove(&r1, t)
  1256  	return
  1257  }
  1258  
  1259  func floatmove_387(f *gc.Node, t *gc.Node) {
  1260  	var r1 gc.Node
  1261  	var a int
  1262  
  1263  	ft := gc.Simsimtype(f.Type)
  1264  	tt := gc.Simsimtype(t.Type)
  1265  	cvt := t.Type
  1266  
  1267  	switch uint32(ft)<<16 | uint32(tt) {
  1268  	default:
  1269  		goto fatal
  1270  
  1271  		/*
  1272  		* float to integer
  1273  		 */
  1274  	case gc.TFLOAT32<<16 | gc.TINT16,
  1275  		gc.TFLOAT32<<16 | gc.TINT32,
  1276  		gc.TFLOAT32<<16 | gc.TINT64,
  1277  		gc.TFLOAT64<<16 | gc.TINT16,
  1278  		gc.TFLOAT64<<16 | gc.TINT32,
  1279  		gc.TFLOAT64<<16 | gc.TINT64:
  1280  		if t.Op == gc.OREGISTER {
  1281  			goto hardmem
  1282  		}
  1283  		var r1 gc.Node
  1284  		gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0)
  1285  		if f.Op != gc.OREGISTER {
  1286  			if ft == gc.TFLOAT32 {
  1287  				gins(x86.AFMOVF, f, &r1)
  1288  			} else {
  1289  				gins(x86.AFMOVD, f, &r1)
  1290  			}
  1291  		}
  1292  
  1293  		// set round to zero mode during conversion
  1294  		var t1 gc.Node
  1295  		memname(&t1, gc.Types[gc.TUINT16])
  1296  
  1297  		var t2 gc.Node
  1298  		memname(&t2, gc.Types[gc.TUINT16])
  1299  		gins(x86.AFSTCW, nil, &t1)
  1300  		gins(x86.AMOVW, ncon(0xf7f), &t2)
  1301  		gins(x86.AFLDCW, &t2, nil)
  1302  		if tt == gc.TINT16 {
  1303  			gins(x86.AFMOVWP, &r1, t)
  1304  		} else if tt == gc.TINT32 {
  1305  			gins(x86.AFMOVLP, &r1, t)
  1306  		} else {
  1307  			gins(x86.AFMOVVP, &r1, t)
  1308  		}
  1309  		gins(x86.AFLDCW, &t1, nil)
  1310  		return
  1311  
  1312  		// convert via int32.
  1313  	case gc.TFLOAT32<<16 | gc.TINT8,
  1314  		gc.TFLOAT32<<16 | gc.TUINT16,
  1315  		gc.TFLOAT32<<16 | gc.TUINT8,
  1316  		gc.TFLOAT64<<16 | gc.TINT8,
  1317  		gc.TFLOAT64<<16 | gc.TUINT16,
  1318  		gc.TFLOAT64<<16 | gc.TUINT8:
  1319  		var t1 gc.Node
  1320  		gc.Tempname(&t1, gc.Types[gc.TINT32])
  1321  
  1322  		gmove(f, &t1)
  1323  		switch tt {
  1324  		default:
  1325  			gc.Fatal("gmove %v", gc.Nconv(t, 0))
  1326  
  1327  		case gc.TINT8:
  1328  			gins(x86.ACMPL, &t1, ncon(-0x80&(1<<32-1)))
  1329  			p1 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TINT32]), nil, -1)
  1330  			gins(x86.ACMPL, &t1, ncon(0x7f))
  1331  			p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TINT32]), nil, -1)
  1332  			p3 := gc.Gbranch(obj.AJMP, nil, 0)
  1333  			gc.Patch(p1, gc.Pc)
  1334  			gc.Patch(p2, gc.Pc)
  1335  			gmove(ncon(-0x80&(1<<32-1)), &t1)
  1336  			gc.Patch(p3, gc.Pc)
  1337  			gmove(&t1, t)
  1338  
  1339  		case gc.TUINT8:
  1340  			gins(x86.ATESTL, ncon(0xffffff00), &t1)
  1341  			p1 := gc.Gbranch(x86.AJEQ, nil, +1)
  1342  			gins(x86.AMOVL, ncon(0), &t1)
  1343  			gc.Patch(p1, gc.Pc)
  1344  			gmove(&t1, t)
  1345  
  1346  		case gc.TUINT16:
  1347  			gins(x86.ATESTL, ncon(0xffff0000), &t1)
  1348  			p1 := gc.Gbranch(x86.AJEQ, nil, +1)
  1349  			gins(x86.AMOVL, ncon(0), &t1)
  1350  			gc.Patch(p1, gc.Pc)
  1351  			gmove(&t1, t)
  1352  		}
  1353  
  1354  		return
  1355  
  1356  		// convert via int64.
  1357  	case gc.TFLOAT32<<16 | gc.TUINT32,
  1358  		gc.TFLOAT64<<16 | gc.TUINT32:
  1359  		cvt = gc.Types[gc.TINT64]
  1360  
  1361  		goto hardmem
  1362  
  1363  		/*
  1364  		 * integer to float
  1365  		 */
  1366  	case gc.TINT16<<16 | gc.TFLOAT32,
  1367  		gc.TINT16<<16 | gc.TFLOAT64,
  1368  		gc.TINT32<<16 | gc.TFLOAT32,
  1369  		gc.TINT32<<16 | gc.TFLOAT64,
  1370  		gc.TINT64<<16 | gc.TFLOAT32,
  1371  		gc.TINT64<<16 | gc.TFLOAT64:
  1372  		if t.Op != gc.OREGISTER {
  1373  			goto hard
  1374  		}
  1375  		if f.Op == gc.OREGISTER {
  1376  			cvt = f.Type
  1377  			goto hardmem
  1378  		}
  1379  
  1380  		switch ft {
  1381  		case gc.TINT16:
  1382  			a = x86.AFMOVW
  1383  
  1384  		case gc.TINT32:
  1385  			a = x86.AFMOVL
  1386  
  1387  		default:
  1388  			a = x86.AFMOVV
  1389  		}
  1390  
  1391  		// convert via int32 memory
  1392  	case gc.TINT8<<16 | gc.TFLOAT32,
  1393  		gc.TINT8<<16 | gc.TFLOAT64,
  1394  		gc.TUINT16<<16 | gc.TFLOAT32,
  1395  		gc.TUINT16<<16 | gc.TFLOAT64,
  1396  		gc.TUINT8<<16 | gc.TFLOAT32,
  1397  		gc.TUINT8<<16 | gc.TFLOAT64:
  1398  		cvt = gc.Types[gc.TINT32]
  1399  
  1400  		goto hardmem
  1401  
  1402  		// convert via int64 memory
  1403  	case gc.TUINT32<<16 | gc.TFLOAT32,
  1404  		gc.TUINT32<<16 | gc.TFLOAT64:
  1405  		cvt = gc.Types[gc.TINT64]
  1406  
  1407  		goto hardmem
  1408  
  1409  		// The way the code generator uses floating-point
  1410  	// registers, a move from F0 to F0 is intended as a no-op.
  1411  	// On the x86, it's not: it pushes a second copy of F0
  1412  	// on the floating point stack.  So toss it away here.
  1413  	// Also, F0 is the *only* register we ever evaluate
  1414  	// into, so we should only see register/register as F0/F0.
  1415  	/*
  1416  	 * float to float
  1417  	 */
  1418  	case gc.TFLOAT32<<16 | gc.TFLOAT32,
  1419  		gc.TFLOAT64<<16 | gc.TFLOAT64:
  1420  		if gc.Ismem(f) && gc.Ismem(t) {
  1421  			goto hard
  1422  		}
  1423  		if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
  1424  			if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 {
  1425  				goto fatal
  1426  			}
  1427  			return
  1428  		}
  1429  
  1430  		a = x86.AFMOVF
  1431  		if ft == gc.TFLOAT64 {
  1432  			a = x86.AFMOVD
  1433  		}
  1434  		if gc.Ismem(t) {
  1435  			if f.Op != gc.OREGISTER || f.Reg != x86.REG_F0 {
  1436  				gc.Fatal("gmove %v", gc.Nconv(f, 0))
  1437  			}
  1438  			a = x86.AFMOVFP
  1439  			if ft == gc.TFLOAT64 {
  1440  				a = x86.AFMOVDP
  1441  			}
  1442  		}
  1443  
  1444  	case gc.TFLOAT32<<16 | gc.TFLOAT64:
  1445  		if gc.Ismem(f) && gc.Ismem(t) {
  1446  			goto hard
  1447  		}
  1448  		if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
  1449  			if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 {
  1450  				goto fatal
  1451  			}
  1452  			return
  1453  		}
  1454  
  1455  		if f.Op == gc.OREGISTER {
  1456  			gins(x86.AFMOVDP, f, t)
  1457  		} else {
  1458  			gins(x86.AFMOVF, f, t)
  1459  		}
  1460  		return
  1461  
  1462  	case gc.TFLOAT64<<16 | gc.TFLOAT32:
  1463  		if gc.Ismem(f) && gc.Ismem(t) {
  1464  			goto hard
  1465  		}
  1466  		if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
  1467  			var r1 gc.Node
  1468  			gc.Tempname(&r1, gc.Types[gc.TFLOAT32])
  1469  			gins(x86.AFMOVFP, f, &r1)
  1470  			gins(x86.AFMOVF, &r1, t)
  1471  			return
  1472  		}
  1473  
  1474  		if f.Op == gc.OREGISTER {
  1475  			gins(x86.AFMOVFP, f, t)
  1476  		} else {
  1477  			gins(x86.AFMOVD, f, t)
  1478  		}
  1479  		return
  1480  	}
  1481  
  1482  	gins(a, f, t)
  1483  	return
  1484  
  1485  	// requires register intermediate
  1486  hard:
  1487  	gc.Regalloc(&r1, cvt, t)
  1488  
  1489  	gmove(f, &r1)
  1490  	gmove(&r1, t)
  1491  	gc.Regfree(&r1)
  1492  	return
  1493  
  1494  	// requires memory intermediate
  1495  hardmem:
  1496  	gc.Tempname(&r1, cvt)
  1497  
  1498  	gmove(f, &r1)
  1499  	gmove(&r1, t)
  1500  	return
  1501  
  1502  	// should not happen
  1503  fatal:
  1504  	gc.Fatal("gmove %v -> %v", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong))
  1505  
  1506  	return
  1507  }
  1508  
  1509  func floatmove_sse(f *gc.Node, t *gc.Node) {
  1510  	var r1 gc.Node
  1511  	var cvt *gc.Type
  1512  	var a int
  1513  
  1514  	ft := gc.Simsimtype(f.Type)
  1515  	tt := gc.Simsimtype(t.Type)
  1516  
  1517  	switch uint32(ft)<<16 | uint32(tt) {
  1518  	// should not happen
  1519  	default:
  1520  		gc.Fatal("gmove %v -> %v", gc.Nconv(f, 0), gc.Nconv(t, 0))
  1521  
  1522  		return
  1523  
  1524  		// convert via int32.
  1525  	/*
  1526  	* float to integer
  1527  	 */
  1528  	case gc.TFLOAT32<<16 | gc.TINT16,
  1529  		gc.TFLOAT32<<16 | gc.TINT8,
  1530  		gc.TFLOAT32<<16 | gc.TUINT16,
  1531  		gc.TFLOAT32<<16 | gc.TUINT8,
  1532  		gc.TFLOAT64<<16 | gc.TINT16,
  1533  		gc.TFLOAT64<<16 | gc.TINT8,
  1534  		gc.TFLOAT64<<16 | gc.TUINT16,
  1535  		gc.TFLOAT64<<16 | gc.TUINT8:
  1536  		cvt = gc.Types[gc.TINT32]
  1537  
  1538  		goto hard
  1539  
  1540  		// convert via int64.
  1541  	case gc.TFLOAT32<<16 | gc.TUINT32,
  1542  		gc.TFLOAT64<<16 | gc.TUINT32:
  1543  		cvt = gc.Types[gc.TINT64]
  1544  
  1545  		goto hardmem
  1546  
  1547  	case gc.TFLOAT32<<16 | gc.TINT32:
  1548  		a = x86.ACVTTSS2SL
  1549  		goto rdst
  1550  
  1551  	case gc.TFLOAT64<<16 | gc.TINT32:
  1552  		a = x86.ACVTTSD2SL
  1553  		goto rdst
  1554  
  1555  		// convert via int32 memory
  1556  	/*
  1557  	 * integer to float
  1558  	 */
  1559  	case gc.TINT8<<16 | gc.TFLOAT32,
  1560  		gc.TINT8<<16 | gc.TFLOAT64,
  1561  		gc.TINT16<<16 | gc.TFLOAT32,
  1562  		gc.TINT16<<16 | gc.TFLOAT64,
  1563  		gc.TUINT16<<16 | gc.TFLOAT32,
  1564  		gc.TUINT16<<16 | gc.TFLOAT64,
  1565  		gc.TUINT8<<16 | gc.TFLOAT32,
  1566  		gc.TUINT8<<16 | gc.TFLOAT64:
  1567  		cvt = gc.Types[gc.TINT32]
  1568  
  1569  		goto hard
  1570  
  1571  		// convert via int64 memory
  1572  	case gc.TUINT32<<16 | gc.TFLOAT32,
  1573  		gc.TUINT32<<16 | gc.TFLOAT64:
  1574  		cvt = gc.Types[gc.TINT64]
  1575  
  1576  		goto hardmem
  1577  
  1578  	case gc.TINT32<<16 | gc.TFLOAT32:
  1579  		a = x86.ACVTSL2SS
  1580  		goto rdst
  1581  
  1582  	case gc.TINT32<<16 | gc.TFLOAT64:
  1583  		a = x86.ACVTSL2SD
  1584  		goto rdst
  1585  
  1586  		/*
  1587  		 * float to float
  1588  		 */
  1589  	case gc.TFLOAT32<<16 | gc.TFLOAT32:
  1590  		a = x86.AMOVSS
  1591  
  1592  	case gc.TFLOAT64<<16 | gc.TFLOAT64:
  1593  		a = x86.AMOVSD
  1594  
  1595  	case gc.TFLOAT32<<16 | gc.TFLOAT64:
  1596  		a = x86.ACVTSS2SD
  1597  		goto rdst
  1598  
  1599  	case gc.TFLOAT64<<16 | gc.TFLOAT32:
  1600  		a = x86.ACVTSD2SS
  1601  		goto rdst
  1602  	}
  1603  
  1604  	gins(a, f, t)
  1605  	return
  1606  
  1607  	// requires register intermediate
  1608  hard:
  1609  	gc.Regalloc(&r1, cvt, t)
  1610  
  1611  	gmove(f, &r1)
  1612  	gmove(&r1, t)
  1613  	gc.Regfree(&r1)
  1614  	return
  1615  
  1616  	// requires memory intermediate
  1617  hardmem:
  1618  	gc.Tempname(&r1, cvt)
  1619  
  1620  	gmove(f, &r1)
  1621  	gmove(&r1, t)
  1622  	return
  1623  
  1624  	// requires register destination
  1625  rdst:
  1626  	gc.Regalloc(&r1, t.Type, t)
  1627  
  1628  	gins(a, f, &r1)
  1629  	gmove(&r1, t)
  1630  	gc.Regfree(&r1)
  1631  	return
  1632  }
  1633  
  1634  func samaddr(f *gc.Node, t *gc.Node) bool {
  1635  	if f.Op != t.Op {
  1636  		return false
  1637  	}
  1638  
  1639  	switch f.Op {
  1640  	case gc.OREGISTER:
  1641  		if f.Reg != t.Reg {
  1642  			break
  1643  		}
  1644  		return true
  1645  	}
  1646  
  1647  	return false
  1648  }
  1649  
  1650  /*
  1651   * generate one instruction:
  1652   *	as f, t
  1653   */
  1654  func gins(as int, f *gc.Node, t *gc.Node) *obj.Prog {
  1655  	if as == x86.AFMOVF && f != nil && f.Op == gc.OREGISTER && t != nil && t.Op == gc.OREGISTER {
  1656  		gc.Fatal("gins MOVF reg, reg")
  1657  	}
  1658  	if as == x86.ACVTSD2SS && f != nil && f.Op == gc.OLITERAL {
  1659  		gc.Fatal("gins CVTSD2SS const")
  1660  	}
  1661  	if as == x86.AMOVSD && t != nil && t.Op == gc.OREGISTER && t.Reg == x86.REG_F0 {
  1662  		gc.Fatal("gins MOVSD into F0")
  1663  	}
  1664  
  1665  	if as == x86.AMOVL && f != nil && f.Op == gc.OADDR && f.Left.Op == gc.ONAME && f.Left.Class != gc.PEXTERN && f.Left.Class != gc.PFUNC {
  1666  		// Turn MOVL $xxx(FP/SP) into LEAL xxx.
  1667  		// These should be equivalent but most of the backend
  1668  		// only expects to see LEAL, because that's what we had
  1669  		// historically generated. Various hidden assumptions are baked in by now.
  1670  		as = x86.ALEAL
  1671  		f = f.Left
  1672  	}
  1673  
  1674  	switch as {
  1675  	case x86.AMOVB,
  1676  		x86.AMOVW,
  1677  		x86.AMOVL:
  1678  		if f != nil && t != nil && samaddr(f, t) {
  1679  			return nil
  1680  		}
  1681  
  1682  	case x86.ALEAL:
  1683  		if f != nil && gc.Isconst(f, gc.CTNIL) {
  1684  			gc.Fatal("gins LEAL nil %v", gc.Tconv(f.Type, 0))
  1685  		}
  1686  	}
  1687  
  1688  	p := gc.Prog(as)
  1689  	gc.Naddr(&p.From, f)
  1690  	gc.Naddr(&p.To, t)
  1691  
  1692  	if gc.Debug['g'] != 0 {
  1693  		fmt.Printf("%v\n", p)
  1694  	}
  1695  
  1696  	w := 0
  1697  	switch as {
  1698  	case x86.AMOVB:
  1699  		w = 1
  1700  
  1701  	case x86.AMOVW:
  1702  		w = 2
  1703  
  1704  	case x86.AMOVL:
  1705  		w = 4
  1706  	}
  1707  
  1708  	if true && w != 0 && f != nil && (p.From.Width > int64(w) || p.To.Width > int64(w)) {
  1709  		gc.Dump("bad width from:", f)
  1710  		gc.Dump("bad width to:", t)
  1711  		gc.Fatal("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width)
  1712  	}
  1713  
  1714  	if p.To.Type == obj.TYPE_ADDR && w > 0 {
  1715  		gc.Fatal("bad use of addr: %v", p)
  1716  	}
  1717  
  1718  	return p
  1719  }
  1720  
  1721  func ginsnop() {
  1722  	var reg gc.Node
  1723  	gc.Nodreg(&reg, gc.Types[gc.TINT], x86.REG_AX)
  1724  	gins(x86.AXCHGL, &reg, &reg)
  1725  }
  1726  
  1727  func dotaddable(n *gc.Node, n1 *gc.Node) bool {
  1728  	if n.Op != gc.ODOT {
  1729  		return false
  1730  	}
  1731  
  1732  	var oary [10]int64
  1733  	var nn *gc.Node
  1734  	o := gc.Dotoffset(n, oary[:], &nn)
  1735  	if nn != nil && nn.Addable && o == 1 && oary[0] >= 0 {
  1736  		*n1 = *nn
  1737  		n1.Type = n.Type
  1738  		n1.Xoffset += oary[0]
  1739  		return true
  1740  	}
  1741  
  1742  	return false
  1743  }
  1744  
  1745  func sudoclean() {
  1746  }
  1747  
  1748  func sudoaddable(as int, n *gc.Node, a *obj.Addr) bool {
  1749  	*a = obj.Addr{}
  1750  	return false
  1751  }