github.com/slayercat/go@v0.0.0-20170428012452-c51559813f61/src/cmd/compile/internal/x86/387.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package x86
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/compile/internal/ssa"
    10  	"cmd/internal/obj"
    11  	"cmd/internal/obj/x86"
    12  	"math"
    13  )
    14  
    15  // Generates code for v using 387 instructions.
    16  func ssaGenValue387(s *gc.SSAGenState, v *ssa.Value) {
    17  	// The SSA compiler pretends that it has an SSE backend.
    18  	// If we don't have one of those, we need to translate
    19  	// all the SSE ops to equivalent 387 ops. That's what this
    20  	// function does.
    21  
    22  	switch v.Op {
    23  	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
    24  		p := s.Prog(loadPush(v.Type))
    25  		p.From.Type = obj.TYPE_FCONST
    26  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
    27  		p.To.Type = obj.TYPE_REG
    28  		p.To.Reg = x86.REG_F0
    29  		popAndSave(s, v)
    30  
    31  	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
    32  		p := s.Prog(loadPush(v.Type))
    33  		p.From.Type = obj.TYPE_MEM
    34  		p.From.Reg = v.Args[0].Reg()
    35  		p.To.Type = obj.TYPE_REG
    36  		p.To.Reg = x86.REG_F0
    37  		popAndSave(s, v)
    38  
    39  	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1, ssa.Op386MOVSSloadidx4, ssa.Op386MOVSDloadidx8:
    40  		p := s.Prog(loadPush(v.Type))
    41  		p.From.Type = obj.TYPE_MEM
    42  		p.From.Reg = v.Args[0].Reg()
    43  		gc.AddAux(&p.From, v)
    44  		switch v.Op {
    45  		case ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
    46  			p.From.Scale = 1
    47  			p.From.Index = v.Args[1].Reg()
    48  		case ssa.Op386MOVSSloadidx4:
    49  			p.From.Scale = 4
    50  			p.From.Index = v.Args[1].Reg()
    51  		case ssa.Op386MOVSDloadidx8:
    52  			p.From.Scale = 8
    53  			p.From.Index = v.Args[1].Reg()
    54  		}
    55  		p.To.Type = obj.TYPE_REG
    56  		p.To.Reg = x86.REG_F0
    57  		popAndSave(s, v)
    58  
    59  	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore:
    60  		// Push to-be-stored value on top of stack.
    61  		push(s, v.Args[1])
    62  
    63  		// Pop and store value.
    64  		var op obj.As
    65  		switch v.Op {
    66  		case ssa.Op386MOVSSstore:
    67  			op = x86.AFMOVFP
    68  		case ssa.Op386MOVSDstore:
    69  			op = x86.AFMOVDP
    70  		}
    71  		p := s.Prog(op)
    72  		p.From.Type = obj.TYPE_REG
    73  		p.From.Reg = x86.REG_F0
    74  		p.To.Type = obj.TYPE_MEM
    75  		p.To.Reg = v.Args[0].Reg()
    76  		gc.AddAux(&p.To, v)
    77  
    78  	case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVSDstoreidx8:
    79  		push(s, v.Args[2])
    80  		var op obj.As
    81  		switch v.Op {
    82  		case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSSstoreidx4:
    83  			op = x86.AFMOVFP
    84  		case ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSDstoreidx8:
    85  			op = x86.AFMOVDP
    86  		}
    87  		p := s.Prog(op)
    88  		p.From.Type = obj.TYPE_REG
    89  		p.From.Reg = x86.REG_F0
    90  		p.To.Type = obj.TYPE_MEM
    91  		p.To.Reg = v.Args[0].Reg()
    92  		gc.AddAux(&p.To, v)
    93  		switch v.Op {
    94  		case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
    95  			p.To.Scale = 1
    96  			p.To.Index = v.Args[1].Reg()
    97  		case ssa.Op386MOVSSstoreidx4:
    98  			p.To.Scale = 4
    99  			p.To.Index = v.Args[1].Reg()
   100  		case ssa.Op386MOVSDstoreidx8:
   101  			p.To.Scale = 8
   102  			p.To.Index = v.Args[1].Reg()
   103  		}
   104  
   105  	case ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
   106  		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD:
   107  		if v.Reg() != v.Args[0].Reg() {
   108  			v.Fatalf("input[0] and output not in same register %s", v.LongString())
   109  		}
   110  
   111  		// Push arg1 on top of stack
   112  		push(s, v.Args[1])
   113  
   114  		// Set precision if needed.  64 bits is the default.
   115  		switch v.Op {
   116  		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
   117  			p := s.Prog(x86.AFSTCW)
   118  			s.AddrScratch(&p.To)
   119  			p = s.Prog(x86.AFLDCW)
   120  			p.From.Type = obj.TYPE_MEM
   121  			p.From.Name = obj.NAME_EXTERN
   122  			p.From.Sym = gc.Sysfunc("controlWord32")
   123  		}
   124  
   125  		var op obj.As
   126  		switch v.Op {
   127  		case ssa.Op386ADDSS, ssa.Op386ADDSD:
   128  			op = x86.AFADDDP
   129  		case ssa.Op386SUBSS, ssa.Op386SUBSD:
   130  			op = x86.AFSUBDP
   131  		case ssa.Op386MULSS, ssa.Op386MULSD:
   132  			op = x86.AFMULDP
   133  		case ssa.Op386DIVSS, ssa.Op386DIVSD:
   134  			op = x86.AFDIVDP
   135  		}
   136  		p := s.Prog(op)
   137  		p.From.Type = obj.TYPE_REG
   138  		p.From.Reg = x86.REG_F0
   139  		p.To.Type = obj.TYPE_REG
   140  		p.To.Reg = s.SSEto387[v.Reg()] + 1
   141  
   142  		// Restore precision if needed.
   143  		switch v.Op {
   144  		case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS:
   145  			p := s.Prog(x86.AFLDCW)
   146  			s.AddrScratch(&p.From)
   147  		}
   148  
   149  	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
   150  		push(s, v.Args[0])
   151  
   152  		// Compare.
   153  		p := s.Prog(x86.AFUCOMP)
   154  		p.From.Type = obj.TYPE_REG
   155  		p.From.Reg = x86.REG_F0
   156  		p.To.Type = obj.TYPE_REG
   157  		p.To.Reg = s.SSEto387[v.Args[1].Reg()] + 1
   158  
   159  		// Save AX.
   160  		p = s.Prog(x86.AMOVL)
   161  		p.From.Type = obj.TYPE_REG
   162  		p.From.Reg = x86.REG_AX
   163  		s.AddrScratch(&p.To)
   164  
   165  		// Move status word into AX.
   166  		p = s.Prog(x86.AFSTSW)
   167  		p.To.Type = obj.TYPE_REG
   168  		p.To.Reg = x86.REG_AX
   169  
   170  		// Then move the flags we need to the integer flags.
   171  		s.Prog(x86.ASAHF)
   172  
   173  		// Restore AX.
   174  		p = s.Prog(x86.AMOVL)
   175  		s.AddrScratch(&p.From)
   176  		p.To.Type = obj.TYPE_REG
   177  		p.To.Reg = x86.REG_AX
   178  
   179  	case ssa.Op386SQRTSD:
   180  		push(s, v.Args[0])
   181  		s.Prog(x86.AFSQRT)
   182  		popAndSave(s, v)
   183  
   184  	case ssa.Op386FCHS:
   185  		push(s, v.Args[0])
   186  		s.Prog(x86.AFCHS)
   187  		popAndSave(s, v)
   188  
   189  	case ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD:
   190  		p := s.Prog(x86.AMOVL)
   191  		p.From.Type = obj.TYPE_REG
   192  		p.From.Reg = v.Args[0].Reg()
   193  		s.AddrScratch(&p.To)
   194  		p = s.Prog(x86.AFMOVL)
   195  		s.AddrScratch(&p.From)
   196  		p.To.Type = obj.TYPE_REG
   197  		p.To.Reg = x86.REG_F0
   198  		popAndSave(s, v)
   199  
   200  	case ssa.Op386CVTTSD2SL, ssa.Op386CVTTSS2SL:
   201  		push(s, v.Args[0])
   202  
   203  		// Save control word.
   204  		p := s.Prog(x86.AFSTCW)
   205  		s.AddrScratch(&p.To)
   206  		p.To.Offset += 4
   207  
   208  		// Load control word which truncates (rounds towards zero).
   209  		p = s.Prog(x86.AFLDCW)
   210  		p.From.Type = obj.TYPE_MEM
   211  		p.From.Name = obj.NAME_EXTERN
   212  		p.From.Sym = gc.Sysfunc("controlWord64trunc")
   213  
   214  		// Now do the conversion.
   215  		p = s.Prog(x86.AFMOVLP)
   216  		p.From.Type = obj.TYPE_REG
   217  		p.From.Reg = x86.REG_F0
   218  		s.AddrScratch(&p.To)
   219  		p = s.Prog(x86.AMOVL)
   220  		s.AddrScratch(&p.From)
   221  		p.To.Type = obj.TYPE_REG
   222  		p.To.Reg = v.Reg()
   223  
   224  		// Restore control word.
   225  		p = s.Prog(x86.AFLDCW)
   226  		s.AddrScratch(&p.From)
   227  		p.From.Offset += 4
   228  
   229  	case ssa.Op386CVTSS2SD:
   230  		// float32 -> float64 is a nop
   231  		push(s, v.Args[0])
   232  		popAndSave(s, v)
   233  
   234  	case ssa.Op386CVTSD2SS:
   235  		// Round to nearest float32.
   236  		push(s, v.Args[0])
   237  		p := s.Prog(x86.AFMOVFP)
   238  		p.From.Type = obj.TYPE_REG
   239  		p.From.Reg = x86.REG_F0
   240  		s.AddrScratch(&p.To)
   241  		p = s.Prog(x86.AFMOVF)
   242  		s.AddrScratch(&p.From)
   243  		p.To.Type = obj.TYPE_REG
   244  		p.To.Reg = x86.REG_F0
   245  		popAndSave(s, v)
   246  
   247  	case ssa.OpLoadReg:
   248  		if !v.Type.IsFloat() {
   249  			ssaGenValue(s, v)
   250  			return
   251  		}
   252  		// Load+push the value we need.
   253  		p := s.Prog(loadPush(v.Type))
   254  		gc.AddrAuto(&p.From, v.Args[0])
   255  		p.To.Type = obj.TYPE_REG
   256  		p.To.Reg = x86.REG_F0
   257  		// Move the value to its assigned register.
   258  		popAndSave(s, v)
   259  
   260  	case ssa.OpStoreReg:
   261  		if !v.Type.IsFloat() {
   262  			ssaGenValue(s, v)
   263  			return
   264  		}
   265  		push(s, v.Args[0])
   266  		var op obj.As
   267  		switch v.Type.Size() {
   268  		case 4:
   269  			op = x86.AFMOVFP
   270  		case 8:
   271  			op = x86.AFMOVDP
   272  		}
   273  		p := s.Prog(op)
   274  		p.From.Type = obj.TYPE_REG
   275  		p.From.Reg = x86.REG_F0
   276  		gc.AddrAuto(&p.To, v)
   277  
   278  	case ssa.OpCopy:
   279  		if !v.Type.IsFloat() {
   280  			ssaGenValue(s, v)
   281  			return
   282  		}
   283  		push(s, v.Args[0])
   284  		popAndSave(s, v)
   285  
   286  	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
   287  		flush387(s) // Calls must empty the FP stack.
   288  		fallthrough // then issue the call as normal
   289  	default:
   290  		ssaGenValue(s, v)
   291  	}
   292  }
   293  
   294  // push pushes v onto the floating-point stack.  v must be in a register.
   295  func push(s *gc.SSAGenState, v *ssa.Value) {
   296  	p := s.Prog(x86.AFMOVD)
   297  	p.From.Type = obj.TYPE_REG
   298  	p.From.Reg = s.SSEto387[v.Reg()]
   299  	p.To.Type = obj.TYPE_REG
   300  	p.To.Reg = x86.REG_F0
   301  }
   302  
   303  // popAndSave pops a value off of the floating-point stack and stores
   304  // it in the reigster assigned to v.
   305  func popAndSave(s *gc.SSAGenState, v *ssa.Value) {
   306  	r := v.Reg()
   307  	if _, ok := s.SSEto387[r]; ok {
   308  		// Pop value, write to correct register.
   309  		p := s.Prog(x86.AFMOVDP)
   310  		p.From.Type = obj.TYPE_REG
   311  		p.From.Reg = x86.REG_F0
   312  		p.To.Type = obj.TYPE_REG
   313  		p.To.Reg = s.SSEto387[v.Reg()] + 1
   314  	} else {
   315  		// Don't actually pop value. This 387 register is now the
   316  		// new home for the not-yet-assigned-a-home SSE register.
   317  		// Increase the register mapping of all other registers by one.
   318  		for rSSE, r387 := range s.SSEto387 {
   319  			s.SSEto387[rSSE] = r387 + 1
   320  		}
   321  		s.SSEto387[r] = x86.REG_F0
   322  	}
   323  }
   324  
   325  // loadPush returns the opcode for load+push of the given type.
   326  func loadPush(t ssa.Type) obj.As {
   327  	if t.Size() == 4 {
   328  		return x86.AFMOVF
   329  	}
   330  	return x86.AFMOVD
   331  }
   332  
   333  // flush387 removes all entries from the 387 floating-point stack.
   334  func flush387(s *gc.SSAGenState) {
   335  	for k := range s.SSEto387 {
   336  		p := s.Prog(x86.AFMOVDP)
   337  		p.From.Type = obj.TYPE_REG
   338  		p.From.Reg = x86.REG_F0
   339  		p.To.Type = obj.TYPE_REG
   340  		p.To.Reg = x86.REG_F0
   341  		delete(s.SSEto387, k)
   342  	}
   343  }
   344  
   345  func ssaGenBlock387(s *gc.SSAGenState, b, next *ssa.Block) {
   346  	// Empty the 387's FP stack before the block ends.
   347  	flush387(s)
   348  
   349  	ssaGenBlock(s, b, next)
   350  }