github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/arm64/ssa.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package arm64
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/bir3/gocompiler/src/cmd/compile/internal/base"
    11  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ir"
    12  	"github.com/bir3/gocompiler/src/cmd/compile/internal/logopt"
    13  	"github.com/bir3/gocompiler/src/cmd/compile/internal/objw"
    14  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ssa"
    15  	"github.com/bir3/gocompiler/src/cmd/compile/internal/ssagen"
    16  	"github.com/bir3/gocompiler/src/cmd/compile/internal/types"
    17  	"github.com/bir3/gocompiler/src/cmd/internal/obj"
    18  	"github.com/bir3/gocompiler/src/cmd/internal/obj/arm64"
    19  )
    20  
    21  // loadByType returns the load instruction of the given type.
    22  func loadByType(t *types.Type) obj.As {
    23  	if t.IsFloat() {
    24  		switch t.Size() {
    25  		case 4:
    26  			return arm64.AFMOVS
    27  		case 8:
    28  			return arm64.AFMOVD
    29  		}
    30  	} else {
    31  		switch t.Size() {
    32  		case 1:
    33  			if t.IsSigned() {
    34  				return arm64.AMOVB
    35  			} else {
    36  				return arm64.AMOVBU
    37  			}
    38  		case 2:
    39  			if t.IsSigned() {
    40  				return arm64.AMOVH
    41  			} else {
    42  				return arm64.AMOVHU
    43  			}
    44  		case 4:
    45  			if t.IsSigned() {
    46  				return arm64.AMOVW
    47  			} else {
    48  				return arm64.AMOVWU
    49  			}
    50  		case 8:
    51  			return arm64.AMOVD
    52  		}
    53  	}
    54  	panic("bad load type")
    55  }
    56  
    57  // storeByType returns the store instruction of the given type.
    58  func storeByType(t *types.Type) obj.As {
    59  	if t.IsFloat() {
    60  		switch t.Size() {
    61  		case 4:
    62  			return arm64.AFMOVS
    63  		case 8:
    64  			return arm64.AFMOVD
    65  		}
    66  	} else {
    67  		switch t.Size() {
    68  		case 1:
    69  			return arm64.AMOVB
    70  		case 2:
    71  			return arm64.AMOVH
    72  		case 4:
    73  			return arm64.AMOVW
    74  		case 8:
    75  			return arm64.AMOVD
    76  		}
    77  	}
    78  	panic("bad store type")
    79  }
    80  
    81  // makeshift encodes a register shifted by a constant, used as an Offset in Prog.
    82  func makeshift(v *ssa.Value, reg int16, typ int64, s int64) int64 {
    83  	if s < 0 || s >= 64 {
    84  		v.Fatalf("shift out of range: %d", s)
    85  	}
    86  	return int64(reg&31)<<16 | typ | (s&63)<<10
    87  }
    88  
    89  // genshift generates a Prog for r = r0 op (r1 shifted by n).
    90  func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int64, n int64) *obj.Prog {
    91  	p := s.Prog(as)
    92  	p.From.Type = obj.TYPE_SHIFT
    93  	p.From.Offset = makeshift(v, r1, typ, n)
    94  	p.Reg = r0
    95  	if r != 0 {
    96  		p.To.Type = obj.TYPE_REG
    97  		p.To.Reg = r
    98  	}
    99  	return p
   100  }
   101  
   102  // generate the memory operand for the indexed load/store instructions.
   103  // base and idx are registers.
   104  func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
   105  	// Reg: base register, Index: (shifted) index register
   106  	mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
   107  	switch op {
   108  	case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
   109  		ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
   110  		mop.Index = arm64.REG_LSL | 3<<5 | idx&31
   111  	case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
   112  		ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
   113  		mop.Index = arm64.REG_LSL | 2<<5 | idx&31
   114  	case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
   115  		mop.Index = arm64.REG_LSL | 1<<5 | idx&31
   116  	default: // not shifted
   117  		mop.Index = idx
   118  	}
   119  	return mop
   120  }
   121  
   122  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   123  	switch v.Op {
   124  	case ssa.OpCopy, ssa.OpARM64MOVDreg:
   125  		if v.Type.IsMemory() {
   126  			return
   127  		}
   128  		x := v.Args[0].Reg()
   129  		y := v.Reg()
   130  		if x == y {
   131  			return
   132  		}
   133  		as := arm64.AMOVD
   134  		if v.Type.IsFloat() {
   135  			switch v.Type.Size() {
   136  			case 4:
   137  				as = arm64.AFMOVS
   138  			case 8:
   139  				as = arm64.AFMOVD
   140  			default:
   141  				panic("bad float size")
   142  			}
   143  		}
   144  		p := s.Prog(as)
   145  		p.From.Type = obj.TYPE_REG
   146  		p.From.Reg = x
   147  		p.To.Type = obj.TYPE_REG
   148  		p.To.Reg = y
   149  	case ssa.OpARM64MOVDnop:
   150  		// nothing to do
   151  	case ssa.OpLoadReg:
   152  		if v.Type.IsFlags() {
   153  			v.Fatalf("load flags not implemented: %v", v.LongString())
   154  			return
   155  		}
   156  		p := s.Prog(loadByType(v.Type))
   157  		ssagen.AddrAuto(&p.From, v.Args[0])
   158  		p.To.Type = obj.TYPE_REG
   159  		p.To.Reg = v.Reg()
   160  	case ssa.OpStoreReg:
   161  		if v.Type.IsFlags() {
   162  			v.Fatalf("store flags not implemented: %v", v.LongString())
   163  			return
   164  		}
   165  		p := s.Prog(storeByType(v.Type))
   166  		p.From.Type = obj.TYPE_REG
   167  		p.From.Reg = v.Args[0].Reg()
   168  		ssagen.AddrAuto(&p.To, v)
   169  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   170  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   171  		// The loop only runs once.
   172  		for _, a := range v.Block.Func.RegArgs {
   173  			// Pass the spill/unspill information along to the assembler, offset by size of
   174  			// the saved LR slot.
   175  			addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   176  			s.FuncInfo().AddSpill(
   177  				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   178  		}
   179  		v.Block.Func.RegArgs = nil
   180  		ssagen.CheckArgReg(v)
   181  	case ssa.OpARM64ADD,
   182  		ssa.OpARM64SUB,
   183  		ssa.OpARM64AND,
   184  		ssa.OpARM64OR,
   185  		ssa.OpARM64XOR,
   186  		ssa.OpARM64BIC,
   187  		ssa.OpARM64EON,
   188  		ssa.OpARM64ORN,
   189  		ssa.OpARM64MUL,
   190  		ssa.OpARM64MULW,
   191  		ssa.OpARM64MNEG,
   192  		ssa.OpARM64MNEGW,
   193  		ssa.OpARM64MULH,
   194  		ssa.OpARM64UMULH,
   195  		ssa.OpARM64MULL,
   196  		ssa.OpARM64UMULL,
   197  		ssa.OpARM64DIV,
   198  		ssa.OpARM64UDIV,
   199  		ssa.OpARM64DIVW,
   200  		ssa.OpARM64UDIVW,
   201  		ssa.OpARM64MOD,
   202  		ssa.OpARM64UMOD,
   203  		ssa.OpARM64MODW,
   204  		ssa.OpARM64UMODW,
   205  		ssa.OpARM64SLL,
   206  		ssa.OpARM64SRL,
   207  		ssa.OpARM64SRA,
   208  		ssa.OpARM64FADDS,
   209  		ssa.OpARM64FADDD,
   210  		ssa.OpARM64FSUBS,
   211  		ssa.OpARM64FSUBD,
   212  		ssa.OpARM64FMULS,
   213  		ssa.OpARM64FMULD,
   214  		ssa.OpARM64FNMULS,
   215  		ssa.OpARM64FNMULD,
   216  		ssa.OpARM64FDIVS,
   217  		ssa.OpARM64FDIVD,
   218  		ssa.OpARM64ROR,
   219  		ssa.OpARM64RORW:
   220  		r := v.Reg()
   221  		r1 := v.Args[0].Reg()
   222  		r2 := v.Args[1].Reg()
   223  		p := s.Prog(v.Op.Asm())
   224  		p.From.Type = obj.TYPE_REG
   225  		p.From.Reg = r2
   226  		p.Reg = r1
   227  		p.To.Type = obj.TYPE_REG
   228  		p.To.Reg = r
   229  	case ssa.OpARM64FMADDS,
   230  		ssa.OpARM64FMADDD,
   231  		ssa.OpARM64FNMADDS,
   232  		ssa.OpARM64FNMADDD,
   233  		ssa.OpARM64FMSUBS,
   234  		ssa.OpARM64FMSUBD,
   235  		ssa.OpARM64FNMSUBS,
   236  		ssa.OpARM64FNMSUBD,
   237  		ssa.OpARM64MADD,
   238  		ssa.OpARM64MADDW,
   239  		ssa.OpARM64MSUB,
   240  		ssa.OpARM64MSUBW:
   241  		rt := v.Reg()
   242  		ra := v.Args[0].Reg()
   243  		rm := v.Args[1].Reg()
   244  		rn := v.Args[2].Reg()
   245  		p := s.Prog(v.Op.Asm())
   246  		p.Reg = ra
   247  		p.From.Type = obj.TYPE_REG
   248  		p.From.Reg = rm
   249  		p.SetFrom3Reg(rn)
   250  		p.To.Type = obj.TYPE_REG
   251  		p.To.Reg = rt
   252  	case ssa.OpARM64ADDconst,
   253  		ssa.OpARM64SUBconst,
   254  		ssa.OpARM64ANDconst,
   255  		ssa.OpARM64ORconst,
   256  		ssa.OpARM64XORconst,
   257  		ssa.OpARM64SLLconst,
   258  		ssa.OpARM64SRLconst,
   259  		ssa.OpARM64SRAconst,
   260  		ssa.OpARM64RORconst,
   261  		ssa.OpARM64RORWconst:
   262  		p := s.Prog(v.Op.Asm())
   263  		p.From.Type = obj.TYPE_CONST
   264  		p.From.Offset = v.AuxInt
   265  		p.Reg = v.Args[0].Reg()
   266  		p.To.Type = obj.TYPE_REG
   267  		p.To.Reg = v.Reg()
   268  	case ssa.OpARM64ADDSconstflags:
   269  		p := s.Prog(v.Op.Asm())
   270  		p.From.Type = obj.TYPE_CONST
   271  		p.From.Offset = v.AuxInt
   272  		p.Reg = v.Args[0].Reg()
   273  		p.To.Type = obj.TYPE_REG
   274  		p.To.Reg = v.Reg0()
   275  	case ssa.OpARM64ADCzerocarry:
   276  		p := s.Prog(v.Op.Asm())
   277  		p.From.Type = obj.TYPE_REG
   278  		p.From.Reg = arm64.REGZERO
   279  		p.Reg = arm64.REGZERO
   280  		p.To.Type = obj.TYPE_REG
   281  		p.To.Reg = v.Reg()
   282  	case ssa.OpARM64ADCSflags,
   283  		ssa.OpARM64ADDSflags,
   284  		ssa.OpARM64SBCSflags,
   285  		ssa.OpARM64SUBSflags:
   286  		r := v.Reg0()
   287  		r1 := v.Args[0].Reg()
   288  		r2 := v.Args[1].Reg()
   289  		p := s.Prog(v.Op.Asm())
   290  		p.From.Type = obj.TYPE_REG
   291  		p.From.Reg = r2
   292  		p.Reg = r1
   293  		p.To.Type = obj.TYPE_REG
   294  		p.To.Reg = r
   295  	case ssa.OpARM64NEGSflags:
   296  		p := s.Prog(v.Op.Asm())
   297  		p.From.Type = obj.TYPE_REG
   298  		p.From.Reg = v.Args[0].Reg()
   299  		p.To.Type = obj.TYPE_REG
   300  		p.To.Reg = v.Reg0()
   301  	case ssa.OpARM64NGCzerocarry:
   302  		p := s.Prog(v.Op.Asm())
   303  		p.From.Type = obj.TYPE_REG
   304  		p.From.Reg = arm64.REGZERO
   305  		p.To.Type = obj.TYPE_REG
   306  		p.To.Reg = v.Reg()
   307  	case ssa.OpARM64EXTRconst,
   308  		ssa.OpARM64EXTRWconst:
   309  		p := s.Prog(v.Op.Asm())
   310  		p.From.Type = obj.TYPE_CONST
   311  		p.From.Offset = v.AuxInt
   312  		p.SetFrom3Reg(v.Args[0].Reg())
   313  		p.Reg = v.Args[1].Reg()
   314  		p.To.Type = obj.TYPE_REG
   315  		p.To.Reg = v.Reg()
   316  	case ssa.OpARM64MVNshiftLL, ssa.OpARM64NEGshiftLL:
   317  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   318  	case ssa.OpARM64MVNshiftRL, ssa.OpARM64NEGshiftRL:
   319  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   320  	case ssa.OpARM64MVNshiftRA, ssa.OpARM64NEGshiftRA:
   321  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   322  	case ssa.OpARM64MVNshiftRO:
   323  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   324  	case ssa.OpARM64ADDshiftLL,
   325  		ssa.OpARM64SUBshiftLL,
   326  		ssa.OpARM64ANDshiftLL,
   327  		ssa.OpARM64ORshiftLL,
   328  		ssa.OpARM64XORshiftLL,
   329  		ssa.OpARM64EONshiftLL,
   330  		ssa.OpARM64ORNshiftLL,
   331  		ssa.OpARM64BICshiftLL:
   332  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   333  	case ssa.OpARM64ADDshiftRL,
   334  		ssa.OpARM64SUBshiftRL,
   335  		ssa.OpARM64ANDshiftRL,
   336  		ssa.OpARM64ORshiftRL,
   337  		ssa.OpARM64XORshiftRL,
   338  		ssa.OpARM64EONshiftRL,
   339  		ssa.OpARM64ORNshiftRL,
   340  		ssa.OpARM64BICshiftRL:
   341  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   342  	case ssa.OpARM64ADDshiftRA,
   343  		ssa.OpARM64SUBshiftRA,
   344  		ssa.OpARM64ANDshiftRA,
   345  		ssa.OpARM64ORshiftRA,
   346  		ssa.OpARM64XORshiftRA,
   347  		ssa.OpARM64EONshiftRA,
   348  		ssa.OpARM64ORNshiftRA,
   349  		ssa.OpARM64BICshiftRA:
   350  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   351  	case ssa.OpARM64ANDshiftRO,
   352  		ssa.OpARM64ORshiftRO,
   353  		ssa.OpARM64XORshiftRO,
   354  		ssa.OpARM64EONshiftRO,
   355  		ssa.OpARM64ORNshiftRO,
   356  		ssa.OpARM64BICshiftRO:
   357  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   358  	case ssa.OpARM64MOVDconst:
   359  		p := s.Prog(v.Op.Asm())
   360  		p.From.Type = obj.TYPE_CONST
   361  		p.From.Offset = v.AuxInt
   362  		p.To.Type = obj.TYPE_REG
   363  		p.To.Reg = v.Reg()
   364  	case ssa.OpARM64FMOVSconst,
   365  		ssa.OpARM64FMOVDconst:
   366  		p := s.Prog(v.Op.Asm())
   367  		p.From.Type = obj.TYPE_FCONST
   368  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   369  		p.To.Type = obj.TYPE_REG
   370  		p.To.Reg = v.Reg()
   371  	case ssa.OpARM64FCMPS0,
   372  		ssa.OpARM64FCMPD0:
   373  		p := s.Prog(v.Op.Asm())
   374  		p.From.Type = obj.TYPE_FCONST
   375  		p.From.Val = math.Float64frombits(0)
   376  		p.Reg = v.Args[0].Reg()
   377  	case ssa.OpARM64CMP,
   378  		ssa.OpARM64CMPW,
   379  		ssa.OpARM64CMN,
   380  		ssa.OpARM64CMNW,
   381  		ssa.OpARM64TST,
   382  		ssa.OpARM64TSTW,
   383  		ssa.OpARM64FCMPS,
   384  		ssa.OpARM64FCMPD:
   385  		p := s.Prog(v.Op.Asm())
   386  		p.From.Type = obj.TYPE_REG
   387  		p.From.Reg = v.Args[1].Reg()
   388  		p.Reg = v.Args[0].Reg()
   389  	case ssa.OpARM64CMPconst,
   390  		ssa.OpARM64CMPWconst,
   391  		ssa.OpARM64CMNconst,
   392  		ssa.OpARM64CMNWconst,
   393  		ssa.OpARM64TSTconst,
   394  		ssa.OpARM64TSTWconst:
   395  		p := s.Prog(v.Op.Asm())
   396  		p.From.Type = obj.TYPE_CONST
   397  		p.From.Offset = v.AuxInt
   398  		p.Reg = v.Args[0].Reg()
   399  	case ssa.OpARM64CMPshiftLL, ssa.OpARM64CMNshiftLL, ssa.OpARM64TSTshiftLL:
   400  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LL, v.AuxInt)
   401  	case ssa.OpARM64CMPshiftRL, ssa.OpARM64CMNshiftRL, ssa.OpARM64TSTshiftRL:
   402  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LR, v.AuxInt)
   403  	case ssa.OpARM64CMPshiftRA, ssa.OpARM64CMNshiftRA, ssa.OpARM64TSTshiftRA:
   404  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_AR, v.AuxInt)
   405  	case ssa.OpARM64TSTshiftRO:
   406  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_ROR, v.AuxInt)
   407  	case ssa.OpARM64MOVDaddr:
   408  		p := s.Prog(arm64.AMOVD)
   409  		p.From.Type = obj.TYPE_ADDR
   410  		p.From.Reg = v.Args[0].Reg()
   411  		p.To.Type = obj.TYPE_REG
   412  		p.To.Reg = v.Reg()
   413  
   414  		var wantreg string
   415  		// MOVD $sym+off(base), R
   416  		// the assembler expands it as the following:
   417  		// - base is SP: add constant offset to SP (R13)
   418  		//               when constant is large, tmp register (R11) may be used
   419  		// - base is SB: load external address from constant pool (use relocation)
   420  		switch v.Aux.(type) {
   421  		default:
   422  			v.Fatalf("aux is of unknown type %T", v.Aux)
   423  		case *obj.LSym:
   424  			wantreg = "SB"
   425  			ssagen.AddAux(&p.From, v)
   426  		case *ir.Name:
   427  			wantreg = "SP"
   428  			ssagen.AddAux(&p.From, v)
   429  		case nil:
   430  			// No sym, just MOVD $off(SP), R
   431  			wantreg = "SP"
   432  			p.From.Offset = v.AuxInt
   433  		}
   434  		if reg := v.Args[0].RegName(); reg != wantreg {
   435  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   436  		}
   437  	case ssa.OpARM64MOVBload,
   438  		ssa.OpARM64MOVBUload,
   439  		ssa.OpARM64MOVHload,
   440  		ssa.OpARM64MOVHUload,
   441  		ssa.OpARM64MOVWload,
   442  		ssa.OpARM64MOVWUload,
   443  		ssa.OpARM64MOVDload,
   444  		ssa.OpARM64FMOVSload,
   445  		ssa.OpARM64FMOVDload:
   446  		p := s.Prog(v.Op.Asm())
   447  		p.From.Type = obj.TYPE_MEM
   448  		p.From.Reg = v.Args[0].Reg()
   449  		ssagen.AddAux(&p.From, v)
   450  		p.To.Type = obj.TYPE_REG
   451  		p.To.Reg = v.Reg()
   452  	case ssa.OpARM64LDP:
   453  		p := s.Prog(v.Op.Asm())
   454  		p.From.Type = obj.TYPE_MEM
   455  		p.From.Reg = v.Args[0].Reg()
   456  		ssagen.AddAux(&p.From, v)
   457  		p.To.Type = obj.TYPE_REGREG
   458  		p.To.Reg = v.Reg0()
   459  		p.To.Offset = int64(v.Reg1())
   460  	case ssa.OpARM64MOVBloadidx,
   461  		ssa.OpARM64MOVBUloadidx,
   462  		ssa.OpARM64MOVHloadidx,
   463  		ssa.OpARM64MOVHUloadidx,
   464  		ssa.OpARM64MOVWloadidx,
   465  		ssa.OpARM64MOVWUloadidx,
   466  		ssa.OpARM64MOVDloadidx,
   467  		ssa.OpARM64FMOVSloadidx,
   468  		ssa.OpARM64FMOVDloadidx,
   469  		ssa.OpARM64MOVHloadidx2,
   470  		ssa.OpARM64MOVHUloadidx2,
   471  		ssa.OpARM64MOVWloadidx4,
   472  		ssa.OpARM64MOVWUloadidx4,
   473  		ssa.OpARM64MOVDloadidx8,
   474  		ssa.OpARM64FMOVDloadidx8,
   475  		ssa.OpARM64FMOVSloadidx4:
   476  		p := s.Prog(v.Op.Asm())
   477  		p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   478  		p.To.Type = obj.TYPE_REG
   479  		p.To.Reg = v.Reg()
   480  	case ssa.OpARM64LDAR,
   481  		ssa.OpARM64LDARB,
   482  		ssa.OpARM64LDARW:
   483  		p := s.Prog(v.Op.Asm())
   484  		p.From.Type = obj.TYPE_MEM
   485  		p.From.Reg = v.Args[0].Reg()
   486  		ssagen.AddAux(&p.From, v)
   487  		p.To.Type = obj.TYPE_REG
   488  		p.To.Reg = v.Reg0()
   489  	case ssa.OpARM64MOVBstore,
   490  		ssa.OpARM64MOVHstore,
   491  		ssa.OpARM64MOVWstore,
   492  		ssa.OpARM64MOVDstore,
   493  		ssa.OpARM64FMOVSstore,
   494  		ssa.OpARM64FMOVDstore,
   495  		ssa.OpARM64STLRB,
   496  		ssa.OpARM64STLR,
   497  		ssa.OpARM64STLRW:
   498  		p := s.Prog(v.Op.Asm())
   499  		p.From.Type = obj.TYPE_REG
   500  		p.From.Reg = v.Args[1].Reg()
   501  		p.To.Type = obj.TYPE_MEM
   502  		p.To.Reg = v.Args[0].Reg()
   503  		ssagen.AddAux(&p.To, v)
   504  	case ssa.OpARM64MOVBstoreidx,
   505  		ssa.OpARM64MOVHstoreidx,
   506  		ssa.OpARM64MOVWstoreidx,
   507  		ssa.OpARM64MOVDstoreidx,
   508  		ssa.OpARM64FMOVSstoreidx,
   509  		ssa.OpARM64FMOVDstoreidx,
   510  		ssa.OpARM64MOVHstoreidx2,
   511  		ssa.OpARM64MOVWstoreidx4,
   512  		ssa.OpARM64FMOVSstoreidx4,
   513  		ssa.OpARM64MOVDstoreidx8,
   514  		ssa.OpARM64FMOVDstoreidx8:
   515  		p := s.Prog(v.Op.Asm())
   516  		p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   517  		p.From.Type = obj.TYPE_REG
   518  		p.From.Reg = v.Args[2].Reg()
   519  	case ssa.OpARM64STP:
   520  		p := s.Prog(v.Op.Asm())
   521  		p.From.Type = obj.TYPE_REGREG
   522  		p.From.Reg = v.Args[1].Reg()
   523  		p.From.Offset = int64(v.Args[2].Reg())
   524  		p.To.Type = obj.TYPE_MEM
   525  		p.To.Reg = v.Args[0].Reg()
   526  		ssagen.AddAux(&p.To, v)
   527  	case ssa.OpARM64MOVBstorezero,
   528  		ssa.OpARM64MOVHstorezero,
   529  		ssa.OpARM64MOVWstorezero,
   530  		ssa.OpARM64MOVDstorezero:
   531  		p := s.Prog(v.Op.Asm())
   532  		p.From.Type = obj.TYPE_REG
   533  		p.From.Reg = arm64.REGZERO
   534  		p.To.Type = obj.TYPE_MEM
   535  		p.To.Reg = v.Args[0].Reg()
   536  		ssagen.AddAux(&p.To, v)
   537  	case ssa.OpARM64MOVBstorezeroidx,
   538  		ssa.OpARM64MOVHstorezeroidx,
   539  		ssa.OpARM64MOVWstorezeroidx,
   540  		ssa.OpARM64MOVDstorezeroidx,
   541  		ssa.OpARM64MOVHstorezeroidx2,
   542  		ssa.OpARM64MOVWstorezeroidx4,
   543  		ssa.OpARM64MOVDstorezeroidx8:
   544  		p := s.Prog(v.Op.Asm())
   545  		p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   546  		p.From.Type = obj.TYPE_REG
   547  		p.From.Reg = arm64.REGZERO
   548  	case ssa.OpARM64MOVQstorezero:
   549  		p := s.Prog(v.Op.Asm())
   550  		p.From.Type = obj.TYPE_REGREG
   551  		p.From.Reg = arm64.REGZERO
   552  		p.From.Offset = int64(arm64.REGZERO)
   553  		p.To.Type = obj.TYPE_MEM
   554  		p.To.Reg = v.Args[0].Reg()
   555  		ssagen.AddAux(&p.To, v)
   556  	case ssa.OpARM64BFI,
   557  		ssa.OpARM64BFXIL:
   558  		p := s.Prog(v.Op.Asm())
   559  		p.From.Type = obj.TYPE_CONST
   560  		p.From.Offset = v.AuxInt >> 8
   561  		p.SetFrom3Const(v.AuxInt & 0xff)
   562  		p.Reg = v.Args[1].Reg()
   563  		p.To.Type = obj.TYPE_REG
   564  		p.To.Reg = v.Reg()
   565  	case ssa.OpARM64SBFIZ,
   566  		ssa.OpARM64SBFX,
   567  		ssa.OpARM64UBFIZ,
   568  		ssa.OpARM64UBFX:
   569  		p := s.Prog(v.Op.Asm())
   570  		p.From.Type = obj.TYPE_CONST
   571  		p.From.Offset = v.AuxInt >> 8
   572  		p.SetFrom3Const(v.AuxInt & 0xff)
   573  		p.Reg = v.Args[0].Reg()
   574  		p.To.Type = obj.TYPE_REG
   575  		p.To.Reg = v.Reg()
   576  	case ssa.OpARM64LoweredAtomicExchange64,
   577  		ssa.OpARM64LoweredAtomicExchange32:
   578  		// LDAXR	(Rarg0), Rout
   579  		// STLXR	Rarg1, (Rarg0), Rtmp
   580  		// CBNZ		Rtmp, -2(PC)
   581  		ld := arm64.ALDAXR
   582  		st := arm64.ASTLXR
   583  		if v.Op == ssa.OpARM64LoweredAtomicExchange32 {
   584  			ld = arm64.ALDAXRW
   585  			st = arm64.ASTLXRW
   586  		}
   587  		r0 := v.Args[0].Reg()
   588  		r1 := v.Args[1].Reg()
   589  		out := v.Reg0()
   590  		p := s.Prog(ld)
   591  		p.From.Type = obj.TYPE_MEM
   592  		p.From.Reg = r0
   593  		p.To.Type = obj.TYPE_REG
   594  		p.To.Reg = out
   595  		p1 := s.Prog(st)
   596  		p1.From.Type = obj.TYPE_REG
   597  		p1.From.Reg = r1
   598  		p1.To.Type = obj.TYPE_MEM
   599  		p1.To.Reg = r0
   600  		p1.RegTo2 = arm64.REGTMP
   601  		p2 := s.Prog(arm64.ACBNZ)
   602  		p2.From.Type = obj.TYPE_REG
   603  		p2.From.Reg = arm64.REGTMP
   604  		p2.To.Type = obj.TYPE_BRANCH
   605  		p2.To.SetTarget(p)
   606  	case ssa.OpARM64LoweredAtomicExchange64Variant,
   607  		ssa.OpARM64LoweredAtomicExchange32Variant:
   608  		swap := arm64.ASWPALD
   609  		if v.Op == ssa.OpARM64LoweredAtomicExchange32Variant {
   610  			swap = arm64.ASWPALW
   611  		}
   612  		r0 := v.Args[0].Reg()
   613  		r1 := v.Args[1].Reg()
   614  		out := v.Reg0()
   615  
   616  		// SWPALD	Rarg1, (Rarg0), Rout
   617  		p := s.Prog(swap)
   618  		p.From.Type = obj.TYPE_REG
   619  		p.From.Reg = r1
   620  		p.To.Type = obj.TYPE_MEM
   621  		p.To.Reg = r0
   622  		p.RegTo2 = out
   623  
   624  	case ssa.OpARM64LoweredAtomicAdd64,
   625  		ssa.OpARM64LoweredAtomicAdd32:
   626  		// LDAXR	(Rarg0), Rout
   627  		// ADD		Rarg1, Rout
   628  		// STLXR	Rout, (Rarg0), Rtmp
   629  		// CBNZ		Rtmp, -3(PC)
   630  		ld := arm64.ALDAXR
   631  		st := arm64.ASTLXR
   632  		if v.Op == ssa.OpARM64LoweredAtomicAdd32 {
   633  			ld = arm64.ALDAXRW
   634  			st = arm64.ASTLXRW
   635  		}
   636  		r0 := v.Args[0].Reg()
   637  		r1 := v.Args[1].Reg()
   638  		out := v.Reg0()
   639  		p := s.Prog(ld)
   640  		p.From.Type = obj.TYPE_MEM
   641  		p.From.Reg = r0
   642  		p.To.Type = obj.TYPE_REG
   643  		p.To.Reg = out
   644  		p1 := s.Prog(arm64.AADD)
   645  		p1.From.Type = obj.TYPE_REG
   646  		p1.From.Reg = r1
   647  		p1.To.Type = obj.TYPE_REG
   648  		p1.To.Reg = out
   649  		p2 := s.Prog(st)
   650  		p2.From.Type = obj.TYPE_REG
   651  		p2.From.Reg = out
   652  		p2.To.Type = obj.TYPE_MEM
   653  		p2.To.Reg = r0
   654  		p2.RegTo2 = arm64.REGTMP
   655  		p3 := s.Prog(arm64.ACBNZ)
   656  		p3.From.Type = obj.TYPE_REG
   657  		p3.From.Reg = arm64.REGTMP
   658  		p3.To.Type = obj.TYPE_BRANCH
   659  		p3.To.SetTarget(p)
   660  	case ssa.OpARM64LoweredAtomicAdd64Variant,
   661  		ssa.OpARM64LoweredAtomicAdd32Variant:
   662  		// LDADDAL	Rarg1, (Rarg0), Rout
   663  		// ADD		Rarg1, Rout
   664  		op := arm64.ALDADDALD
   665  		if v.Op == ssa.OpARM64LoweredAtomicAdd32Variant {
   666  			op = arm64.ALDADDALW
   667  		}
   668  		r0 := v.Args[0].Reg()
   669  		r1 := v.Args[1].Reg()
   670  		out := v.Reg0()
   671  		p := s.Prog(op)
   672  		p.From.Type = obj.TYPE_REG
   673  		p.From.Reg = r1
   674  		p.To.Type = obj.TYPE_MEM
   675  		p.To.Reg = r0
   676  		p.RegTo2 = out
   677  		p1 := s.Prog(arm64.AADD)
   678  		p1.From.Type = obj.TYPE_REG
   679  		p1.From.Reg = r1
   680  		p1.To.Type = obj.TYPE_REG
   681  		p1.To.Reg = out
   682  	case ssa.OpARM64LoweredAtomicCas64,
   683  		ssa.OpARM64LoweredAtomicCas32:
   684  		// LDAXR	(Rarg0), Rtmp
   685  		// CMP		Rarg1, Rtmp
   686  		// BNE		3(PC)
   687  		// STLXR	Rarg2, (Rarg0), Rtmp
   688  		// CBNZ		Rtmp, -4(PC)
   689  		// CSET		EQ, Rout
   690  		ld := arm64.ALDAXR
   691  		st := arm64.ASTLXR
   692  		cmp := arm64.ACMP
   693  		if v.Op == ssa.OpARM64LoweredAtomicCas32 {
   694  			ld = arm64.ALDAXRW
   695  			st = arm64.ASTLXRW
   696  			cmp = arm64.ACMPW
   697  		}
   698  		r0 := v.Args[0].Reg()
   699  		r1 := v.Args[1].Reg()
   700  		r2 := v.Args[2].Reg()
   701  		out := v.Reg0()
   702  		p := s.Prog(ld)
   703  		p.From.Type = obj.TYPE_MEM
   704  		p.From.Reg = r0
   705  		p.To.Type = obj.TYPE_REG
   706  		p.To.Reg = arm64.REGTMP
   707  		p1 := s.Prog(cmp)
   708  		p1.From.Type = obj.TYPE_REG
   709  		p1.From.Reg = r1
   710  		p1.Reg = arm64.REGTMP
   711  		p2 := s.Prog(arm64.ABNE)
   712  		p2.To.Type = obj.TYPE_BRANCH
   713  		p3 := s.Prog(st)
   714  		p3.From.Type = obj.TYPE_REG
   715  		p3.From.Reg = r2
   716  		p3.To.Type = obj.TYPE_MEM
   717  		p3.To.Reg = r0
   718  		p3.RegTo2 = arm64.REGTMP
   719  		p4 := s.Prog(arm64.ACBNZ)
   720  		p4.From.Type = obj.TYPE_REG
   721  		p4.From.Reg = arm64.REGTMP
   722  		p4.To.Type = obj.TYPE_BRANCH
   723  		p4.To.SetTarget(p)
   724  		p5 := s.Prog(arm64.ACSET)
   725  		p5.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   726  		p5.From.Offset = int64(arm64.SPOP_EQ)
   727  		p5.To.Type = obj.TYPE_REG
   728  		p5.To.Reg = out
   729  		p2.To.SetTarget(p5)
   730  	case ssa.OpARM64LoweredAtomicCas64Variant,
   731  		ssa.OpARM64LoweredAtomicCas32Variant:
   732  		// Rarg0: ptr
   733  		// Rarg1: old
   734  		// Rarg2: new
   735  		// MOV  	Rarg1, Rtmp
   736  		// CASAL	Rtmp, (Rarg0), Rarg2
   737  		// CMP  	Rarg1, Rtmp
   738  		// CSET 	EQ, Rout
   739  		cas := arm64.ACASALD
   740  		cmp := arm64.ACMP
   741  		mov := arm64.AMOVD
   742  		if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
   743  			cas = arm64.ACASALW
   744  			cmp = arm64.ACMPW
   745  			mov = arm64.AMOVW
   746  		}
   747  		r0 := v.Args[0].Reg()
   748  		r1 := v.Args[1].Reg()
   749  		r2 := v.Args[2].Reg()
   750  		out := v.Reg0()
   751  
   752  		// MOV  	Rarg1, Rtmp
   753  		p := s.Prog(mov)
   754  		p.From.Type = obj.TYPE_REG
   755  		p.From.Reg = r1
   756  		p.To.Type = obj.TYPE_REG
   757  		p.To.Reg = arm64.REGTMP
   758  
   759  		// CASAL	Rtmp, (Rarg0), Rarg2
   760  		p1 := s.Prog(cas)
   761  		p1.From.Type = obj.TYPE_REG
   762  		p1.From.Reg = arm64.REGTMP
   763  		p1.To.Type = obj.TYPE_MEM
   764  		p1.To.Reg = r0
   765  		p1.RegTo2 = r2
   766  
   767  		// CMP  	Rarg1, Rtmp
   768  		p2 := s.Prog(cmp)
   769  		p2.From.Type = obj.TYPE_REG
   770  		p2.From.Reg = r1
   771  		p2.Reg = arm64.REGTMP
   772  
   773  		// CSET 	EQ, Rout
   774  		p3 := s.Prog(arm64.ACSET)
   775  		p3.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   776  		p3.From.Offset = int64(arm64.SPOP_EQ)
   777  		p3.To.Type = obj.TYPE_REG
   778  		p3.To.Reg = out
   779  
   780  	case ssa.OpARM64LoweredAtomicAnd8,
   781  		ssa.OpARM64LoweredAtomicAnd32,
   782  		ssa.OpARM64LoweredAtomicOr8,
   783  		ssa.OpARM64LoweredAtomicOr32:
   784  		// LDAXRB/LDAXRW (Rarg0), Rout
   785  		// AND/OR	Rarg1, Rout
   786  		// STLXRB/STLXRB Rout, (Rarg0), Rtmp
   787  		// CBNZ		Rtmp, -3(PC)
   788  		ld := arm64.ALDAXRB
   789  		st := arm64.ASTLXRB
   790  		if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
   791  			ld = arm64.ALDAXRW
   792  			st = arm64.ASTLXRW
   793  		}
   794  		r0 := v.Args[0].Reg()
   795  		r1 := v.Args[1].Reg()
   796  		out := v.Reg0()
   797  		p := s.Prog(ld)
   798  		p.From.Type = obj.TYPE_MEM
   799  		p.From.Reg = r0
   800  		p.To.Type = obj.TYPE_REG
   801  		p.To.Reg = out
   802  		p1 := s.Prog(v.Op.Asm())
   803  		p1.From.Type = obj.TYPE_REG
   804  		p1.From.Reg = r1
   805  		p1.To.Type = obj.TYPE_REG
   806  		p1.To.Reg = out
   807  		p2 := s.Prog(st)
   808  		p2.From.Type = obj.TYPE_REG
   809  		p2.From.Reg = out
   810  		p2.To.Type = obj.TYPE_MEM
   811  		p2.To.Reg = r0
   812  		p2.RegTo2 = arm64.REGTMP
   813  		p3 := s.Prog(arm64.ACBNZ)
   814  		p3.From.Type = obj.TYPE_REG
   815  		p3.From.Reg = arm64.REGTMP
   816  		p3.To.Type = obj.TYPE_BRANCH
   817  		p3.To.SetTarget(p)
   818  	case ssa.OpARM64LoweredAtomicAnd8Variant,
   819  		ssa.OpARM64LoweredAtomicAnd32Variant:
   820  		atomic_clear := arm64.ALDCLRALW
   821  		if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
   822  			atomic_clear = arm64.ALDCLRALB
   823  		}
   824  		r0 := v.Args[0].Reg()
   825  		r1 := v.Args[1].Reg()
   826  		out := v.Reg0()
   827  
   828  		// MNV       Rarg1 Rtemp
   829  		p := s.Prog(arm64.AMVN)
   830  		p.From.Type = obj.TYPE_REG
   831  		p.From.Reg = r1
   832  		p.To.Type = obj.TYPE_REG
   833  		p.To.Reg = arm64.REGTMP
   834  
   835  		// LDCLRALW  Rtemp, (Rarg0), Rout
   836  		p1 := s.Prog(atomic_clear)
   837  		p1.From.Type = obj.TYPE_REG
   838  		p1.From.Reg = arm64.REGTMP
   839  		p1.To.Type = obj.TYPE_MEM
   840  		p1.To.Reg = r0
   841  		p1.RegTo2 = out
   842  
   843  		// AND       Rarg1, Rout
   844  		p2 := s.Prog(arm64.AAND)
   845  		p2.From.Type = obj.TYPE_REG
   846  		p2.From.Reg = r1
   847  		p2.To.Type = obj.TYPE_REG
   848  		p2.To.Reg = out
   849  
   850  	case ssa.OpARM64LoweredAtomicOr8Variant,
   851  		ssa.OpARM64LoweredAtomicOr32Variant:
   852  		atomic_or := arm64.ALDORALW
   853  		if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
   854  			atomic_or = arm64.ALDORALB
   855  		}
   856  		r0 := v.Args[0].Reg()
   857  		r1 := v.Args[1].Reg()
   858  		out := v.Reg0()
   859  
   860  		// LDORALW  Rarg1, (Rarg0), Rout
   861  		p := s.Prog(atomic_or)
   862  		p.From.Type = obj.TYPE_REG
   863  		p.From.Reg = r1
   864  		p.To.Type = obj.TYPE_MEM
   865  		p.To.Reg = r0
   866  		p.RegTo2 = out
   867  
   868  		// ORR       Rarg1, Rout
   869  		p2 := s.Prog(arm64.AORR)
   870  		p2.From.Type = obj.TYPE_REG
   871  		p2.From.Reg = r1
   872  		p2.To.Type = obj.TYPE_REG
   873  		p2.To.Reg = out
   874  
   875  	case ssa.OpARM64MOVBreg,
   876  		ssa.OpARM64MOVBUreg,
   877  		ssa.OpARM64MOVHreg,
   878  		ssa.OpARM64MOVHUreg,
   879  		ssa.OpARM64MOVWreg,
   880  		ssa.OpARM64MOVWUreg:
   881  		a := v.Args[0]
   882  		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
   883  			a = a.Args[0]
   884  		}
   885  		if a.Op == ssa.OpLoadReg {
   886  			t := a.Type
   887  			switch {
   888  			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
   889  				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   890  				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
   891  				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   892  				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
   893  				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   894  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   895  				if v.Reg() == v.Args[0].Reg() {
   896  					return
   897  				}
   898  				p := s.Prog(arm64.AMOVD)
   899  				p.From.Type = obj.TYPE_REG
   900  				p.From.Reg = v.Args[0].Reg()
   901  				p.To.Type = obj.TYPE_REG
   902  				p.To.Reg = v.Reg()
   903  				return
   904  			default:
   905  			}
   906  		}
   907  		fallthrough
   908  	case ssa.OpARM64MVN,
   909  		ssa.OpARM64NEG,
   910  		ssa.OpARM64FABSD,
   911  		ssa.OpARM64FMOVDfpgp,
   912  		ssa.OpARM64FMOVDgpfp,
   913  		ssa.OpARM64FMOVSfpgp,
   914  		ssa.OpARM64FMOVSgpfp,
   915  		ssa.OpARM64FNEGS,
   916  		ssa.OpARM64FNEGD,
   917  		ssa.OpARM64FSQRTS,
   918  		ssa.OpARM64FSQRTD,
   919  		ssa.OpARM64FCVTZSSW,
   920  		ssa.OpARM64FCVTZSDW,
   921  		ssa.OpARM64FCVTZUSW,
   922  		ssa.OpARM64FCVTZUDW,
   923  		ssa.OpARM64FCVTZSS,
   924  		ssa.OpARM64FCVTZSD,
   925  		ssa.OpARM64FCVTZUS,
   926  		ssa.OpARM64FCVTZUD,
   927  		ssa.OpARM64SCVTFWS,
   928  		ssa.OpARM64SCVTFWD,
   929  		ssa.OpARM64SCVTFS,
   930  		ssa.OpARM64SCVTFD,
   931  		ssa.OpARM64UCVTFWS,
   932  		ssa.OpARM64UCVTFWD,
   933  		ssa.OpARM64UCVTFS,
   934  		ssa.OpARM64UCVTFD,
   935  		ssa.OpARM64FCVTSD,
   936  		ssa.OpARM64FCVTDS,
   937  		ssa.OpARM64REV,
   938  		ssa.OpARM64REVW,
   939  		ssa.OpARM64REV16,
   940  		ssa.OpARM64REV16W,
   941  		ssa.OpARM64RBIT,
   942  		ssa.OpARM64RBITW,
   943  		ssa.OpARM64CLZ,
   944  		ssa.OpARM64CLZW,
   945  		ssa.OpARM64FRINTAD,
   946  		ssa.OpARM64FRINTMD,
   947  		ssa.OpARM64FRINTND,
   948  		ssa.OpARM64FRINTPD,
   949  		ssa.OpARM64FRINTZD:
   950  		p := s.Prog(v.Op.Asm())
   951  		p.From.Type = obj.TYPE_REG
   952  		p.From.Reg = v.Args[0].Reg()
   953  		p.To.Type = obj.TYPE_REG
   954  		p.To.Reg = v.Reg()
   955  	case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
   956  		// input is already rounded
   957  	case ssa.OpARM64VCNT:
   958  		p := s.Prog(v.Op.Asm())
   959  		p.From.Type = obj.TYPE_REG
   960  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
   961  		p.To.Type = obj.TYPE_REG
   962  		p.To.Reg = (v.Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
   963  	case ssa.OpARM64VUADDLV:
   964  		p := s.Prog(v.Op.Asm())
   965  		p.From.Type = obj.TYPE_REG
   966  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
   967  		p.To.Type = obj.TYPE_REG
   968  		p.To.Reg = v.Reg() - arm64.REG_F0 + arm64.REG_V0
   969  	case ssa.OpARM64CSEL, ssa.OpARM64CSEL0:
   970  		r1 := int16(arm64.REGZERO)
   971  		if v.Op != ssa.OpARM64CSEL0 {
   972  			r1 = v.Args[1].Reg()
   973  		}
   974  		p := s.Prog(v.Op.Asm())
   975  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   976  		condCode := condBits[ssa.Op(v.AuxInt)]
   977  		p.From.Offset = int64(condCode)
   978  		p.Reg = v.Args[0].Reg()
   979  		p.SetFrom3Reg(r1)
   980  		p.To.Type = obj.TYPE_REG
   981  		p.To.Reg = v.Reg()
   982  	case ssa.OpARM64CSINC, ssa.OpARM64CSINV, ssa.OpARM64CSNEG:
   983  		p := s.Prog(v.Op.Asm())
   984  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   985  		condCode := condBits[ssa.Op(v.AuxInt)]
   986  		p.From.Offset = int64(condCode)
   987  		p.Reg = v.Args[0].Reg()
   988  		p.SetFrom3Reg(v.Args[1].Reg())
   989  		p.To.Type = obj.TYPE_REG
   990  		p.To.Reg = v.Reg()
   991  	case ssa.OpARM64CSETM:
   992  		p := s.Prog(arm64.ACSETM)
   993  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   994  		condCode := condBits[ssa.Op(v.AuxInt)]
   995  		p.From.Offset = int64(condCode)
   996  		p.To.Type = obj.TYPE_REG
   997  		p.To.Reg = v.Reg()
   998  	case ssa.OpARM64DUFFZERO:
   999  		// runtime.duffzero expects start address in R20
  1000  		p := s.Prog(obj.ADUFFZERO)
  1001  		p.To.Type = obj.TYPE_MEM
  1002  		p.To.Name = obj.NAME_EXTERN
  1003  		p.To.Sym = ir.Syms.Duffzero
  1004  		p.To.Offset = v.AuxInt
  1005  	case ssa.OpARM64LoweredZero:
  1006  		// STP.P	(ZR,ZR), 16(R16)
  1007  		// CMP	Rarg1, R16
  1008  		// BLE	-2(PC)
  1009  		// arg1 is the address of the last 16-byte unit to zero
  1010  		p := s.Prog(arm64.ASTP)
  1011  		p.Scond = arm64.C_XPOST
  1012  		p.From.Type = obj.TYPE_REGREG
  1013  		p.From.Reg = arm64.REGZERO
  1014  		p.From.Offset = int64(arm64.REGZERO)
  1015  		p.To.Type = obj.TYPE_MEM
  1016  		p.To.Reg = arm64.REG_R16
  1017  		p.To.Offset = 16
  1018  		p2 := s.Prog(arm64.ACMP)
  1019  		p2.From.Type = obj.TYPE_REG
  1020  		p2.From.Reg = v.Args[1].Reg()
  1021  		p2.Reg = arm64.REG_R16
  1022  		p3 := s.Prog(arm64.ABLE)
  1023  		p3.To.Type = obj.TYPE_BRANCH
  1024  		p3.To.SetTarget(p)
  1025  	case ssa.OpARM64DUFFCOPY:
  1026  		p := s.Prog(obj.ADUFFCOPY)
  1027  		p.To.Type = obj.TYPE_MEM
  1028  		p.To.Name = obj.NAME_EXTERN
  1029  		p.To.Sym = ir.Syms.Duffcopy
  1030  		p.To.Offset = v.AuxInt
  1031  	case ssa.OpARM64LoweredMove:
  1032  		// LDP.P	16(R16), (R25, Rtmp)
  1033  		// STP.P	(R25, Rtmp), 16(R17)
  1034  		// CMP	Rarg2, R16
  1035  		// BLE	-3(PC)
  1036  		// arg2 is the address of the last element of src
  1037  		p := s.Prog(arm64.ALDP)
  1038  		p.Scond = arm64.C_XPOST
  1039  		p.From.Type = obj.TYPE_MEM
  1040  		p.From.Reg = arm64.REG_R16
  1041  		p.From.Offset = 16
  1042  		p.To.Type = obj.TYPE_REGREG
  1043  		p.To.Reg = arm64.REG_R25
  1044  		p.To.Offset = int64(arm64.REGTMP)
  1045  		p2 := s.Prog(arm64.ASTP)
  1046  		p2.Scond = arm64.C_XPOST
  1047  		p2.From.Type = obj.TYPE_REGREG
  1048  		p2.From.Reg = arm64.REG_R25
  1049  		p2.From.Offset = int64(arm64.REGTMP)
  1050  		p2.To.Type = obj.TYPE_MEM
  1051  		p2.To.Reg = arm64.REG_R17
  1052  		p2.To.Offset = 16
  1053  		p3 := s.Prog(arm64.ACMP)
  1054  		p3.From.Type = obj.TYPE_REG
  1055  		p3.From.Reg = v.Args[2].Reg()
  1056  		p3.Reg = arm64.REG_R16
  1057  		p4 := s.Prog(arm64.ABLE)
  1058  		p4.To.Type = obj.TYPE_BRANCH
  1059  		p4.To.SetTarget(p)
  1060  	case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
  1061  		s.Call(v)
  1062  	case ssa.OpARM64CALLtail:
  1063  		s.TailCall(v)
  1064  	case ssa.OpARM64LoweredWB:
  1065  		p := s.Prog(obj.ACALL)
  1066  		p.To.Type = obj.TYPE_MEM
  1067  		p.To.Name = obj.NAME_EXTERN
  1068  		p.To.Sym = v.Aux.(*obj.LSym)
  1069  	case ssa.OpARM64LoweredPanicBoundsA, ssa.OpARM64LoweredPanicBoundsB, ssa.OpARM64LoweredPanicBoundsC:
  1070  		p := s.Prog(obj.ACALL)
  1071  		p.To.Type = obj.TYPE_MEM
  1072  		p.To.Name = obj.NAME_EXTERN
  1073  		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
  1074  		s.UseArgs(16) // space used in callee args area by assembly stubs
  1075  	case ssa.OpARM64LoweredNilCheck:
  1076  		// Issue a load which will fault if arg is nil.
  1077  		p := s.Prog(arm64.AMOVB)
  1078  		p.From.Type = obj.TYPE_MEM
  1079  		p.From.Reg = v.Args[0].Reg()
  1080  		ssagen.AddAux(&p.From, v)
  1081  		p.To.Type = obj.TYPE_REG
  1082  		p.To.Reg = arm64.REGTMP
  1083  		if logopt.Enabled() {
  1084  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1085  		}
  1086  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Line==1 in generated wrappers
  1087  			base.WarnfAt(v.Pos, "generated nil check")
  1088  		}
  1089  	case ssa.OpARM64Equal,
  1090  		ssa.OpARM64NotEqual,
  1091  		ssa.OpARM64LessThan,
  1092  		ssa.OpARM64LessEqual,
  1093  		ssa.OpARM64GreaterThan,
  1094  		ssa.OpARM64GreaterEqual,
  1095  		ssa.OpARM64LessThanU,
  1096  		ssa.OpARM64LessEqualU,
  1097  		ssa.OpARM64GreaterThanU,
  1098  		ssa.OpARM64GreaterEqualU,
  1099  		ssa.OpARM64LessThanF,
  1100  		ssa.OpARM64LessEqualF,
  1101  		ssa.OpARM64GreaterThanF,
  1102  		ssa.OpARM64GreaterEqualF,
  1103  		ssa.OpARM64NotLessThanF,
  1104  		ssa.OpARM64NotLessEqualF,
  1105  		ssa.OpARM64NotGreaterThanF,
  1106  		ssa.OpARM64NotGreaterEqualF:
  1107  		// generate boolean values using CSET
  1108  		p := s.Prog(arm64.ACSET)
  1109  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1110  		condCode := condBits[v.Op]
  1111  		p.From.Offset = int64(condCode)
  1112  		p.To.Type = obj.TYPE_REG
  1113  		p.To.Reg = v.Reg()
  1114  	case ssa.OpARM64PRFM:
  1115  		p := s.Prog(v.Op.Asm())
  1116  		p.From.Type = obj.TYPE_MEM
  1117  		p.From.Reg = v.Args[0].Reg()
  1118  		p.To.Type = obj.TYPE_CONST
  1119  		p.To.Offset = v.AuxInt
  1120  	case ssa.OpARM64LoweredGetClosurePtr:
  1121  		// Closure pointer is R26 (arm64.REGCTXT).
  1122  		ssagen.CheckLoweredGetClosurePtr(v)
  1123  	case ssa.OpARM64LoweredGetCallerSP:
  1124  		// caller's SP is FixedFrameSize below the address of the first arg
  1125  		p := s.Prog(arm64.AMOVD)
  1126  		p.From.Type = obj.TYPE_ADDR
  1127  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1128  		p.From.Name = obj.NAME_PARAM
  1129  		p.To.Type = obj.TYPE_REG
  1130  		p.To.Reg = v.Reg()
  1131  	case ssa.OpARM64LoweredGetCallerPC:
  1132  		p := s.Prog(obj.AGETCALLERPC)
  1133  		p.To.Type = obj.TYPE_REG
  1134  		p.To.Reg = v.Reg()
  1135  	case ssa.OpARM64DMB:
  1136  		p := s.Prog(v.Op.Asm())
  1137  		p.From.Type = obj.TYPE_CONST
  1138  		p.From.Offset = v.AuxInt
  1139  	case ssa.OpARM64FlagConstant:
  1140  		v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString())
  1141  	case ssa.OpARM64InvertFlags:
  1142  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1143  	case ssa.OpClobber:
  1144  		// MOVW	$0xdeaddead, REGTMP
  1145  		// MOVW	REGTMP, (slot)
  1146  		// MOVW	REGTMP, 4(slot)
  1147  		p := s.Prog(arm64.AMOVW)
  1148  		p.From.Type = obj.TYPE_CONST
  1149  		p.From.Offset = 0xdeaddead
  1150  		p.To.Type = obj.TYPE_REG
  1151  		p.To.Reg = arm64.REGTMP
  1152  		p = s.Prog(arm64.AMOVW)
  1153  		p.From.Type = obj.TYPE_REG
  1154  		p.From.Reg = arm64.REGTMP
  1155  		p.To.Type = obj.TYPE_MEM
  1156  		p.To.Reg = arm64.REGSP
  1157  		ssagen.AddAux(&p.To, v)
  1158  		p = s.Prog(arm64.AMOVW)
  1159  		p.From.Type = obj.TYPE_REG
  1160  		p.From.Reg = arm64.REGTMP
  1161  		p.To.Type = obj.TYPE_MEM
  1162  		p.To.Reg = arm64.REGSP
  1163  		ssagen.AddAux2(&p.To, v, v.AuxInt+4)
  1164  	case ssa.OpClobberReg:
  1165  		x := uint64(0xdeaddeaddeaddead)
  1166  		p := s.Prog(arm64.AMOVD)
  1167  		p.From.Type = obj.TYPE_CONST
  1168  		p.From.Offset = int64(x)
  1169  		p.To.Type = obj.TYPE_REG
  1170  		p.To.Reg = v.Reg()
  1171  	default:
  1172  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1173  	}
  1174  }
  1175  
  1176  var condBits = map[ssa.Op]arm64.SpecialOperand{
  1177  	ssa.OpARM64Equal:         arm64.SPOP_EQ,
  1178  	ssa.OpARM64NotEqual:      arm64.SPOP_NE,
  1179  	ssa.OpARM64LessThan:      arm64.SPOP_LT,
  1180  	ssa.OpARM64LessThanU:     arm64.SPOP_LO,
  1181  	ssa.OpARM64LessEqual:     arm64.SPOP_LE,
  1182  	ssa.OpARM64LessEqualU:    arm64.SPOP_LS,
  1183  	ssa.OpARM64GreaterThan:   arm64.SPOP_GT,
  1184  	ssa.OpARM64GreaterThanU:  arm64.SPOP_HI,
  1185  	ssa.OpARM64GreaterEqual:  arm64.SPOP_GE,
  1186  	ssa.OpARM64GreaterEqualU: arm64.SPOP_HS,
  1187  	ssa.OpARM64LessThanF:     arm64.SPOP_MI, // Less than
  1188  	ssa.OpARM64LessEqualF:    arm64.SPOP_LS, // Less than or equal to
  1189  	ssa.OpARM64GreaterThanF:  arm64.SPOP_GT, // Greater than
  1190  	ssa.OpARM64GreaterEqualF: arm64.SPOP_GE, // Greater than or equal to
  1191  
  1192  	// The following condition codes have unordered to handle comparisons related to NaN.
  1193  	ssa.OpARM64NotLessThanF:     arm64.SPOP_PL, // Greater than, equal to, or unordered
  1194  	ssa.OpARM64NotLessEqualF:    arm64.SPOP_HI, // Greater than or unordered
  1195  	ssa.OpARM64NotGreaterThanF:  arm64.SPOP_LE, // Less than, equal to or unordered
  1196  	ssa.OpARM64NotGreaterEqualF: arm64.SPOP_LT, // Less than or unordered
  1197  }
  1198  
  1199  var blockJump = map[ssa.BlockKind]struct {
  1200  	asm, invasm obj.As
  1201  }{
  1202  	ssa.BlockARM64EQ:     {arm64.ABEQ, arm64.ABNE},
  1203  	ssa.BlockARM64NE:     {arm64.ABNE, arm64.ABEQ},
  1204  	ssa.BlockARM64LT:     {arm64.ABLT, arm64.ABGE},
  1205  	ssa.BlockARM64GE:     {arm64.ABGE, arm64.ABLT},
  1206  	ssa.BlockARM64LE:     {arm64.ABLE, arm64.ABGT},
  1207  	ssa.BlockARM64GT:     {arm64.ABGT, arm64.ABLE},
  1208  	ssa.BlockARM64ULT:    {arm64.ABLO, arm64.ABHS},
  1209  	ssa.BlockARM64UGE:    {arm64.ABHS, arm64.ABLO},
  1210  	ssa.BlockARM64UGT:    {arm64.ABHI, arm64.ABLS},
  1211  	ssa.BlockARM64ULE:    {arm64.ABLS, arm64.ABHI},
  1212  	ssa.BlockARM64Z:      {arm64.ACBZ, arm64.ACBNZ},
  1213  	ssa.BlockARM64NZ:     {arm64.ACBNZ, arm64.ACBZ},
  1214  	ssa.BlockARM64ZW:     {arm64.ACBZW, arm64.ACBNZW},
  1215  	ssa.BlockARM64NZW:    {arm64.ACBNZW, arm64.ACBZW},
  1216  	ssa.BlockARM64TBZ:    {arm64.ATBZ, arm64.ATBNZ},
  1217  	ssa.BlockARM64TBNZ:   {arm64.ATBNZ, arm64.ATBZ},
  1218  	ssa.BlockARM64FLT:    {arm64.ABMI, arm64.ABPL},
  1219  	ssa.BlockARM64FGE:    {arm64.ABGE, arm64.ABLT},
  1220  	ssa.BlockARM64FLE:    {arm64.ABLS, arm64.ABHI},
  1221  	ssa.BlockARM64FGT:    {arm64.ABGT, arm64.ABLE},
  1222  	ssa.BlockARM64LTnoov: {arm64.ABMI, arm64.ABPL},
  1223  	ssa.BlockARM64GEnoov: {arm64.ABPL, arm64.ABMI},
  1224  }
  1225  
  1226  // To model a 'LEnoov' ('<=' without overflow checking) branching.
  1227  var leJumps = [2][2]ssagen.IndexJump{
  1228  	{{Jump: arm64.ABEQ, Index: 0}, {Jump: arm64.ABPL, Index: 1}}, // next == b.Succs[0]
  1229  	{{Jump: arm64.ABMI, Index: 0}, {Jump: arm64.ABEQ, Index: 0}}, // next == b.Succs[1]
  1230  }
  1231  
  1232  // To model a 'GTnoov' ('>' without overflow checking) branching.
  1233  var gtJumps = [2][2]ssagen.IndexJump{
  1234  	{{Jump: arm64.ABMI, Index: 1}, {Jump: arm64.ABEQ, Index: 1}}, // next == b.Succs[0]
  1235  	{{Jump: arm64.ABEQ, Index: 1}, {Jump: arm64.ABPL, Index: 0}}, // next == b.Succs[1]
  1236  }
  1237  
  1238  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1239  	switch b.Kind {
  1240  	case ssa.BlockPlain:
  1241  		if b.Succs[0].Block() != next {
  1242  			p := s.Prog(obj.AJMP)
  1243  			p.To.Type = obj.TYPE_BRANCH
  1244  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1245  		}
  1246  
  1247  	case ssa.BlockDefer:
  1248  		// defer returns in R0:
  1249  		// 0 if we should continue executing
  1250  		// 1 if we should jump to deferreturn call
  1251  		p := s.Prog(arm64.ACMP)
  1252  		p.From.Type = obj.TYPE_CONST
  1253  		p.From.Offset = 0
  1254  		p.Reg = arm64.REG_R0
  1255  		p = s.Prog(arm64.ABNE)
  1256  		p.To.Type = obj.TYPE_BRANCH
  1257  		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
  1258  		if b.Succs[0].Block() != next {
  1259  			p := s.Prog(obj.AJMP)
  1260  			p.To.Type = obj.TYPE_BRANCH
  1261  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1262  		}
  1263  
  1264  	case ssa.BlockExit, ssa.BlockRetJmp:
  1265  
  1266  	case ssa.BlockRet:
  1267  		s.Prog(obj.ARET)
  1268  
  1269  	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
  1270  		ssa.BlockARM64LT, ssa.BlockARM64GE,
  1271  		ssa.BlockARM64LE, ssa.BlockARM64GT,
  1272  		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
  1273  		ssa.BlockARM64ULE, ssa.BlockARM64UGE,
  1274  		ssa.BlockARM64Z, ssa.BlockARM64NZ,
  1275  		ssa.BlockARM64ZW, ssa.BlockARM64NZW,
  1276  		ssa.BlockARM64FLT, ssa.BlockARM64FGE,
  1277  		ssa.BlockARM64FLE, ssa.BlockARM64FGT,
  1278  		ssa.BlockARM64LTnoov, ssa.BlockARM64GEnoov:
  1279  		jmp := blockJump[b.Kind]
  1280  		var p *obj.Prog
  1281  		switch next {
  1282  		case b.Succs[0].Block():
  1283  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1284  		case b.Succs[1].Block():
  1285  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1286  		default:
  1287  			if b.Likely != ssa.BranchUnlikely {
  1288  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1289  				s.Br(obj.AJMP, b.Succs[1].Block())
  1290  			} else {
  1291  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1292  				s.Br(obj.AJMP, b.Succs[0].Block())
  1293  			}
  1294  		}
  1295  		if !b.Controls[0].Type.IsFlags() {
  1296  			p.From.Type = obj.TYPE_REG
  1297  			p.From.Reg = b.Controls[0].Reg()
  1298  		}
  1299  	case ssa.BlockARM64TBZ, ssa.BlockARM64TBNZ:
  1300  		jmp := blockJump[b.Kind]
  1301  		var p *obj.Prog
  1302  		switch next {
  1303  		case b.Succs[0].Block():
  1304  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1305  		case b.Succs[1].Block():
  1306  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1307  		default:
  1308  			if b.Likely != ssa.BranchUnlikely {
  1309  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1310  				s.Br(obj.AJMP, b.Succs[1].Block())
  1311  			} else {
  1312  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1313  				s.Br(obj.AJMP, b.Succs[0].Block())
  1314  			}
  1315  		}
  1316  		p.From.Offset = b.AuxInt
  1317  		p.From.Type = obj.TYPE_CONST
  1318  		p.Reg = b.Controls[0].Reg()
  1319  
  1320  	case ssa.BlockARM64LEnoov:
  1321  		s.CombJump(b, next, &leJumps)
  1322  	case ssa.BlockARM64GTnoov:
  1323  		s.CombJump(b, next, &gtJumps)
  1324  
  1325  	case ssa.BlockARM64JUMPTABLE:
  1326  		// MOVD	(TABLE)(IDX<<3), Rtmp
  1327  		// JMP	(Rtmp)
  1328  		p := s.Prog(arm64.AMOVD)
  1329  		p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
  1330  		p.To.Type = obj.TYPE_REG
  1331  		p.To.Reg = arm64.REGTMP
  1332  		p = s.Prog(obj.AJMP)
  1333  		p.To.Type = obj.TYPE_MEM
  1334  		p.To.Reg = arm64.REGTMP
  1335  		// Save jump tables for later resolution of the target blocks.
  1336  		s.JumpTables = append(s.JumpTables, b)
  1337  
  1338  	default:
  1339  		b.Fatalf("branch not implemented: %s", b.LongString())
  1340  	}
  1341  }
  1342  
  1343  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1344  	p := s.Prog(loadByType(t))
  1345  	p.From.Type = obj.TYPE_MEM
  1346  	p.From.Name = obj.NAME_AUTO
  1347  	p.From.Sym = n.Linksym()
  1348  	p.From.Offset = n.FrameOffset() + off
  1349  	p.To.Type = obj.TYPE_REG
  1350  	p.To.Reg = reg
  1351  	return p
  1352  }
  1353  
  1354  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1355  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1356  	p.To.Name = obj.NAME_PARAM
  1357  	p.To.Sym = n.Linksym()
  1358  	p.Pos = p.Pos.WithNotStmt()
  1359  	return p
  1360  }