github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/obj/riscv/obj.go (about)

     1  // Copyright © 2015 The Go Authors.  All rights reserved.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package riscv
    22  
    23  import (
    24  	"fmt"
    25  	"log"
    26  	"math/bits"
    27  
    28  	"github.com/go-asm/go/abi"
    29  	"github.com/go-asm/go/cmd/obj"
    30  	"github.com/go-asm/go/cmd/objabi"
    31  	"github.com/go-asm/go/cmd/sys"
    32  )
    33  
    34  func buildop(ctxt *obj.Link) {}
    35  
    36  func jalToSym(ctxt *obj.Link, p *obj.Prog, lr int16) {
    37  	switch p.As {
    38  	case obj.ACALL, obj.AJMP, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY:
    39  	default:
    40  		ctxt.Diag("unexpected Prog in jalToSym: %v", p)
    41  		return
    42  	}
    43  
    44  	p.As = AJAL
    45  	p.Mark |= NEED_JAL_RELOC
    46  	p.From.Type = obj.TYPE_REG
    47  	p.From.Reg = lr
    48  	p.Reg = obj.REG_NONE
    49  }
    50  
    51  // progedit is called individually for each *obj.Prog. It normalizes instruction
    52  // formats and eliminates as many pseudo-instructions as possible.
    53  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    54  
    55  	// Expand binary instructions to ternary ones.
    56  	if p.Reg == obj.REG_NONE {
    57  		switch p.As {
    58  		case AADDI, ASLTI, ASLTIU, AANDI, AORI, AXORI, ASLLI, ASRLI, ASRAI,
    59  			AADDIW, ASLLIW, ASRLIW, ASRAIW, AADDW, ASUBW, ASLLW, ASRLW, ASRAW,
    60  			AADD, AAND, AOR, AXOR, ASLL, ASRL, ASUB, ASRA,
    61  			AMUL, AMULH, AMULHU, AMULHSU, AMULW, ADIV, ADIVU, ADIVW, ADIVUW,
    62  			AREM, AREMU, AREMW, AREMUW:
    63  			p.Reg = p.To.Reg
    64  		}
    65  	}
    66  
    67  	// Rewrite instructions with constant operands to refer to the immediate
    68  	// form of the instruction.
    69  	if p.From.Type == obj.TYPE_CONST {
    70  		switch p.As {
    71  		case AADD:
    72  			p.As = AADDI
    73  		case ASUB:
    74  			p.As, p.From.Offset = AADDI, -p.From.Offset
    75  		case ASLT:
    76  			p.As = ASLTI
    77  		case ASLTU:
    78  			p.As = ASLTIU
    79  		case AAND:
    80  			p.As = AANDI
    81  		case AOR:
    82  			p.As = AORI
    83  		case AXOR:
    84  			p.As = AXORI
    85  		case ASLL:
    86  			p.As = ASLLI
    87  		case ASRL:
    88  			p.As = ASRLI
    89  		case ASRA:
    90  			p.As = ASRAI
    91  		case AADDW:
    92  			p.As = AADDIW
    93  		case ASUBW:
    94  			p.As, p.From.Offset = AADDIW, -p.From.Offset
    95  		case ASLLW:
    96  			p.As = ASLLIW
    97  		case ASRLW:
    98  			p.As = ASRLIW
    99  		case ASRAW:
   100  			p.As = ASRAIW
   101  		}
   102  	}
   103  
   104  	switch p.As {
   105  	case obj.AJMP:
   106  		// Turn JMP into JAL ZERO or JALR ZERO.
   107  		p.From.Type = obj.TYPE_REG
   108  		p.From.Reg = REG_ZERO
   109  
   110  		switch p.To.Type {
   111  		case obj.TYPE_BRANCH:
   112  			p.As = AJAL
   113  		case obj.TYPE_MEM:
   114  			switch p.To.Name {
   115  			case obj.NAME_NONE:
   116  				p.As = AJALR
   117  			case obj.NAME_EXTERN, obj.NAME_STATIC:
   118  				// Handled in preprocess.
   119  			default:
   120  				ctxt.Diag("unsupported name %d for %v", p.To.Name, p)
   121  			}
   122  		default:
   123  			panic(fmt.Sprintf("unhandled type %+v", p.To.Type))
   124  		}
   125  
   126  	case obj.ACALL:
   127  		switch p.To.Type {
   128  		case obj.TYPE_MEM:
   129  			// Handled in preprocess.
   130  		case obj.TYPE_REG:
   131  			p.As = AJALR
   132  			p.From.Type = obj.TYPE_REG
   133  			p.From.Reg = REG_LR
   134  		default:
   135  			ctxt.Diag("unknown destination type %+v in CALL: %v", p.To.Type, p)
   136  		}
   137  
   138  	case obj.AUNDEF:
   139  		p.As = AEBREAK
   140  
   141  	case ASCALL:
   142  		// SCALL is the old name for ECALL.
   143  		p.As = AECALL
   144  
   145  	case ASBREAK:
   146  		// SBREAK is the old name for EBREAK.
   147  		p.As = AEBREAK
   148  
   149  	case AMOV:
   150  		if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == obj.REG_NONE && int64(int32(p.From.Offset)) != p.From.Offset {
   151  			ctz := bits.TrailingZeros64(uint64(p.From.Offset))
   152  			val := p.From.Offset >> ctz
   153  			if int64(int32(val)) == val {
   154  				// It's ok. We can handle constants with many trailing zeros.
   155  				break
   156  			}
   157  			// Put >32-bit constants in memory and load them.
   158  			p.From.Type = obj.TYPE_MEM
   159  			p.From.Sym = ctxt.Int64Sym(p.From.Offset)
   160  			p.From.Name = obj.NAME_EXTERN
   161  			p.From.Offset = 0
   162  		}
   163  	}
   164  }
   165  
   166  // addrToReg extracts the register from an Addr, handling special Addr.Names.
   167  func addrToReg(a obj.Addr) int16 {
   168  	switch a.Name {
   169  	case obj.NAME_PARAM, obj.NAME_AUTO:
   170  		return REG_SP
   171  	}
   172  	return a.Reg
   173  }
   174  
   175  // movToLoad converts a MOV mnemonic into the corresponding load instruction.
   176  func movToLoad(mnemonic obj.As) obj.As {
   177  	switch mnemonic {
   178  	case AMOV:
   179  		return ALD
   180  	case AMOVB:
   181  		return ALB
   182  	case AMOVH:
   183  		return ALH
   184  	case AMOVW:
   185  		return ALW
   186  	case AMOVBU:
   187  		return ALBU
   188  	case AMOVHU:
   189  		return ALHU
   190  	case AMOVWU:
   191  		return ALWU
   192  	case AMOVF:
   193  		return AFLW
   194  	case AMOVD:
   195  		return AFLD
   196  	default:
   197  		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
   198  	}
   199  }
   200  
   201  // movToStore converts a MOV mnemonic into the corresponding store instruction.
   202  func movToStore(mnemonic obj.As) obj.As {
   203  	switch mnemonic {
   204  	case AMOV:
   205  		return ASD
   206  	case AMOVB:
   207  		return ASB
   208  	case AMOVH:
   209  		return ASH
   210  	case AMOVW:
   211  		return ASW
   212  	case AMOVF:
   213  		return AFSW
   214  	case AMOVD:
   215  		return AFSD
   216  	default:
   217  		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
   218  	}
   219  }
   220  
   221  // markRelocs marks an obj.Prog that specifies a MOV pseudo-instruction and
   222  // requires relocation.
   223  func markRelocs(p *obj.Prog) {
   224  	switch p.As {
   225  	case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
   226  		switch {
   227  		case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG:
   228  			switch p.From.Name {
   229  			case obj.NAME_EXTERN, obj.NAME_STATIC:
   230  				p.Mark |= NEED_PCREL_ITYPE_RELOC
   231  			}
   232  		case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG:
   233  			switch p.From.Name {
   234  			case obj.NAME_EXTERN, obj.NAME_STATIC:
   235  				p.Mark |= NEED_PCREL_ITYPE_RELOC
   236  			}
   237  		case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM:
   238  			switch p.To.Name {
   239  			case obj.NAME_EXTERN, obj.NAME_STATIC:
   240  				p.Mark |= NEED_PCREL_STYPE_RELOC
   241  			}
   242  		}
   243  	}
   244  }
   245  
   246  // InvertBranch inverts the condition of a conditional branch.
   247  func InvertBranch(as obj.As) obj.As {
   248  	switch as {
   249  	case ABEQ:
   250  		return ABNE
   251  	case ABEQZ:
   252  		return ABNEZ
   253  	case ABGE:
   254  		return ABLT
   255  	case ABGEU:
   256  		return ABLTU
   257  	case ABGEZ:
   258  		return ABLTZ
   259  	case ABGT:
   260  		return ABLE
   261  	case ABGTU:
   262  		return ABLEU
   263  	case ABGTZ:
   264  		return ABLEZ
   265  	case ABLE:
   266  		return ABGT
   267  	case ABLEU:
   268  		return ABGTU
   269  	case ABLEZ:
   270  		return ABGTZ
   271  	case ABLT:
   272  		return ABGE
   273  	case ABLTU:
   274  		return ABGEU
   275  	case ABLTZ:
   276  		return ABGEZ
   277  	case ABNE:
   278  		return ABEQ
   279  	case ABNEZ:
   280  		return ABEQZ
   281  	default:
   282  		panic("InvertBranch: not a branch")
   283  	}
   284  }
   285  
   286  // containsCall reports whether the symbol contains a CALL (or equivalent)
   287  // instruction. Must be called after progedit.
   288  func containsCall(sym *obj.LSym) bool {
   289  	// CALLs are CALL or JAL(R) with link register LR.
   290  	for p := sym.Func().Text; p != nil; p = p.Link {
   291  		switch p.As {
   292  		case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
   293  			return true
   294  		case AJAL, AJALR:
   295  			if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR {
   296  				return true
   297  			}
   298  		}
   299  	}
   300  
   301  	return false
   302  }
   303  
   304  // setPCs sets the Pc field in all instructions reachable from p.
   305  // It uses pc as the initial value and returns the next available pc.
   306  func setPCs(p *obj.Prog, pc int64) int64 {
   307  	for ; p != nil; p = p.Link {
   308  		p.Pc = pc
   309  		for _, ins := range instructionsForProg(p) {
   310  			pc += int64(ins.length())
   311  		}
   312  
   313  		if p.As == obj.APCALIGN {
   314  			alignedValue := p.From.Offset
   315  			v := pcAlignPadLength(pc, alignedValue)
   316  			pc += int64(v)
   317  		}
   318  	}
   319  	return pc
   320  }
   321  
   322  // stackOffset updates Addr offsets based on the current stack size.
   323  //
   324  // The stack looks like:
   325  // -------------------
   326  // |                 |
   327  // |      PARAMs     |
   328  // |                 |
   329  // |                 |
   330  // -------------------
   331  // |    Parent RA    |   SP on function entry
   332  // -------------------
   333  // |                 |
   334  // |                 |
   335  // |       AUTOs     |
   336  // |                 |
   337  // |                 |
   338  // -------------------
   339  // |        RA       |   SP during function execution
   340  // -------------------
   341  //
   342  // FixedFrameSize makes other packages aware of the space allocated for RA.
   343  //
   344  // A nicer version of this diagram can be found on slide 21 of the presentation
   345  // attached to https://golang.org/issue/16922#issuecomment-243748180.
   346  func stackOffset(a *obj.Addr, stacksize int64) {
   347  	switch a.Name {
   348  	case obj.NAME_AUTO:
   349  		// Adjust to the top of AUTOs.
   350  		a.Offset += stacksize
   351  	case obj.NAME_PARAM:
   352  		// Adjust to the bottom of PARAMs.
   353  		a.Offset += stacksize + 8
   354  	}
   355  }
   356  
   357  // preprocess generates prologue and epilogue code, computes PC-relative branch
   358  // and jump offsets, and resolves pseudo-registers.
   359  //
   360  // preprocess is called once per linker symbol.
   361  //
   362  // When preprocess finishes, all instructions in the symbol are either
   363  // concrete, real RISC-V instructions or directive pseudo-ops like TEXT,
   364  // PCDATA, and FUNCDATA.
   365  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   366  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   367  		return
   368  	}
   369  
   370  	// Generate the prologue.
   371  	text := cursym.Func().Text
   372  	if text.As != obj.ATEXT {
   373  		ctxt.Diag("preprocess: found symbol that does not start with TEXT directive")
   374  		return
   375  	}
   376  
   377  	stacksize := text.To.Offset
   378  	if stacksize == -8 {
   379  		// Historical way to mark NOFRAME.
   380  		text.From.Sym.Set(obj.AttrNoFrame, true)
   381  		stacksize = 0
   382  	}
   383  	if stacksize < 0 {
   384  		ctxt.Diag("negative frame size %d - did you mean NOFRAME?", stacksize)
   385  	}
   386  	if text.From.Sym.NoFrame() {
   387  		if stacksize != 0 {
   388  			ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", stacksize)
   389  		}
   390  	}
   391  
   392  	if !containsCall(cursym) {
   393  		text.From.Sym.Set(obj.AttrLeaf, true)
   394  		if stacksize == 0 {
   395  			// A leaf function with no locals has no frame.
   396  			text.From.Sym.Set(obj.AttrNoFrame, true)
   397  		}
   398  	}
   399  
   400  	// Save LR unless there is no frame.
   401  	if !text.From.Sym.NoFrame() {
   402  		stacksize += ctxt.Arch.FixedFrameSize
   403  	}
   404  
   405  	cursym.Func().Args = text.To.Val.(int32)
   406  	cursym.Func().Locals = int32(stacksize)
   407  
   408  	prologue := text
   409  
   410  	if !cursym.Func().Text.From.Sym.NoSplit() {
   411  		prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check
   412  	}
   413  
   414  	if stacksize != 0 {
   415  		prologue = ctxt.StartUnsafePoint(prologue, newprog)
   416  
   417  		// Actually save LR.
   418  		prologue = obj.Appendp(prologue, newprog)
   419  		prologue.As = AMOV
   420  		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   421  		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -stacksize}
   422  
   423  		// Insert stack adjustment.
   424  		prologue = obj.Appendp(prologue, newprog)
   425  		prologue.As = AADDI
   426  		prologue.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -stacksize}
   427  		prologue.Reg = REG_SP
   428  		prologue.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
   429  		prologue.Spadj = int32(stacksize)
   430  
   431  		prologue = ctxt.EndUnsafePoint(prologue, newprog, -1)
   432  
   433  		// On Linux, in a cgo binary we may get a SIGSETXID signal early on
   434  		// before the signal stack is set, as glibc doesn't allow us to block
   435  		// SIGSETXID. So a signal may land on the current stack and clobber
   436  		// the content below the SP. We store the LR again after the SP is
   437  		// decremented.
   438  		prologue = obj.Appendp(prologue, newprog)
   439  		prologue.As = AMOV
   440  		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   441  		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
   442  	}
   443  
   444  	if cursym.Func().Text.From.Sym.Wrapper() {
   445  		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
   446  		//
   447  		//   MOV g_panic(g), X5
   448  		//   BNE X5, ZERO, adjust
   449  		// end:
   450  		//   NOP
   451  		// ...rest of function..
   452  		// adjust:
   453  		//   MOV panic_argp(X5), X6
   454  		//   ADD $(autosize+FIXED_FRAME), SP, X7
   455  		//   BNE X6, X7, end
   456  		//   ADD $FIXED_FRAME, SP, X6
   457  		//   MOV X6, panic_argp(X5)
   458  		//   JMP end
   459  		//
   460  		// The NOP is needed to give the jumps somewhere to land.
   461  
   462  		ldpanic := obj.Appendp(prologue, newprog)
   463  
   464  		ldpanic.As = AMOV
   465  		ldpanic.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGG, Offset: 4 * int64(ctxt.Arch.PtrSize)} // G.panic
   466  		ldpanic.Reg = obj.REG_NONE
   467  		ldpanic.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5}
   468  
   469  		bneadj := obj.Appendp(ldpanic, newprog)
   470  		bneadj.As = ABNE
   471  		bneadj.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5}
   472  		bneadj.Reg = REG_ZERO
   473  		bneadj.To.Type = obj.TYPE_BRANCH
   474  
   475  		endadj := obj.Appendp(bneadj, newprog)
   476  		endadj.As = obj.ANOP
   477  
   478  		last := endadj
   479  		for last.Link != nil {
   480  			last = last.Link
   481  		}
   482  
   483  		getargp := obj.Appendp(last, newprog)
   484  		getargp.As = AMOV
   485  		getargp.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp
   486  		getargp.Reg = obj.REG_NONE
   487  		getargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
   488  
   489  		bneadj.To.SetTarget(getargp)
   490  
   491  		calcargp := obj.Appendp(getargp, newprog)
   492  		calcargp.As = AADDI
   493  		calcargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize + ctxt.Arch.FixedFrameSize}
   494  		calcargp.Reg = REG_SP
   495  		calcargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X7}
   496  
   497  		testargp := obj.Appendp(calcargp, newprog)
   498  		testargp.As = ABNE
   499  		testargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
   500  		testargp.Reg = REG_X7
   501  		testargp.To.Type = obj.TYPE_BRANCH
   502  		testargp.To.SetTarget(endadj)
   503  
   504  		adjargp := obj.Appendp(testargp, newprog)
   505  		adjargp.As = AADDI
   506  		adjargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(ctxt.Arch.PtrSize)}
   507  		adjargp.Reg = REG_SP
   508  		adjargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
   509  
   510  		setargp := obj.Appendp(adjargp, newprog)
   511  		setargp.As = AMOV
   512  		setargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
   513  		setargp.Reg = obj.REG_NONE
   514  		setargp.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp
   515  
   516  		godone := obj.Appendp(setargp, newprog)
   517  		godone.As = AJAL
   518  		godone.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
   519  		godone.To.Type = obj.TYPE_BRANCH
   520  		godone.To.SetTarget(endadj)
   521  	}
   522  
   523  	// Update stack-based offsets.
   524  	for p := cursym.Func().Text; p != nil; p = p.Link {
   525  		stackOffset(&p.From, stacksize)
   526  		stackOffset(&p.To, stacksize)
   527  	}
   528  
   529  	// Additional instruction rewriting.
   530  	for p := cursym.Func().Text; p != nil; p = p.Link {
   531  		switch p.As {
   532  		case obj.AGETCALLERPC:
   533  			if cursym.Leaf() {
   534  				// MOV LR, Rd
   535  				p.As = AMOV
   536  				p.From.Type = obj.TYPE_REG
   537  				p.From.Reg = REG_LR
   538  			} else {
   539  				// MOV (RSP), Rd
   540  				p.As = AMOV
   541  				p.From.Type = obj.TYPE_MEM
   542  				p.From.Reg = REG_SP
   543  			}
   544  
   545  		case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
   546  			switch p.To.Type {
   547  			case obj.TYPE_MEM:
   548  				jalToSym(ctxt, p, REG_LR)
   549  			}
   550  
   551  		case obj.AJMP:
   552  			switch p.To.Type {
   553  			case obj.TYPE_MEM:
   554  				switch p.To.Name {
   555  				case obj.NAME_EXTERN, obj.NAME_STATIC:
   556  					jalToSym(ctxt, p, REG_ZERO)
   557  				}
   558  			}
   559  
   560  		case obj.ARET:
   561  			// Replace RET with epilogue.
   562  			retJMP := p.To.Sym
   563  
   564  			if stacksize != 0 {
   565  				// Restore LR.
   566  				p.As = AMOV
   567  				p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
   568  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   569  				p = obj.Appendp(p, newprog)
   570  
   571  				p.As = AADDI
   572  				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize}
   573  				p.Reg = REG_SP
   574  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
   575  				p.Spadj = int32(-stacksize)
   576  				p = obj.Appendp(p, newprog)
   577  			}
   578  
   579  			if retJMP != nil {
   580  				p.As = obj.ARET
   581  				p.To.Sym = retJMP
   582  				jalToSym(ctxt, p, REG_ZERO)
   583  			} else {
   584  				p.As = AJALR
   585  				p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
   586  				p.Reg = obj.REG_NONE
   587  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   588  			}
   589  
   590  			// "Add back" the stack removed in the previous instruction.
   591  			//
   592  			// This is to avoid confusing pctospadj, which sums
   593  			// Spadj from function entry to each PC, and shouldn't
   594  			// count adjustments from earlier epilogues, since they
   595  			// won't affect later PCs.
   596  			p.Spadj = int32(stacksize)
   597  
   598  		case AADDI:
   599  			// Refine Spadjs account for adjustment via ADDI instruction.
   600  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.From.Type == obj.TYPE_CONST {
   601  				p.Spadj = int32(-p.From.Offset)
   602  			}
   603  		}
   604  
   605  		if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
   606  			f := cursym.Func()
   607  			if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
   608  				f.FuncFlag |= abi.FuncFlagSPWrite
   609  				if ctxt.Debugvlog || !ctxt.IsAsm {
   610  					ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   611  					if !ctxt.IsAsm {
   612  						ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   613  						ctxt.DiagFlush()
   614  						log.Fatalf("bad SPWRITE")
   615  					}
   616  				}
   617  			}
   618  		}
   619  	}
   620  
   621  	var callCount int
   622  	for p := cursym.Func().Text; p != nil; p = p.Link {
   623  		markRelocs(p)
   624  		if p.Mark&NEED_JAL_RELOC == NEED_JAL_RELOC {
   625  			callCount++
   626  		}
   627  	}
   628  	const callTrampSize = 8 // 2 machine instructions.
   629  	maxTrampSize := int64(callCount * callTrampSize)
   630  
   631  	// Compute instruction addresses.  Once we do that, we need to check for
   632  	// overextended jumps and branches.  Within each iteration, Pc differences
   633  	// are always lower bounds (since the program gets monotonically longer,
   634  	// a fixed point will be reached).  No attempt to handle functions > 2GiB.
   635  	for {
   636  		big, rescan := false, false
   637  		maxPC := setPCs(cursym.Func().Text, 0)
   638  		if maxPC+maxTrampSize > (1 << 20) {
   639  			big = true
   640  		}
   641  
   642  		for p := cursym.Func().Text; p != nil; p = p.Link {
   643  			switch p.As {
   644  			case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
   645  				if p.To.Type != obj.TYPE_BRANCH {
   646  					panic("assemble: instruction with branch-like opcode lacks destination")
   647  				}
   648  				offset := p.To.Target().Pc - p.Pc
   649  				if offset < -4096 || 4096 <= offset {
   650  					// Branch is long.  Replace it with a jump.
   651  					jmp := obj.Appendp(p, newprog)
   652  					jmp.As = AJAL
   653  					jmp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
   654  					jmp.To = obj.Addr{Type: obj.TYPE_BRANCH}
   655  					jmp.To.SetTarget(p.To.Target())
   656  
   657  					p.As = InvertBranch(p.As)
   658  					p.To.SetTarget(jmp.Link)
   659  
   660  					// We may have made previous branches too long,
   661  					// so recheck them.
   662  					rescan = true
   663  				}
   664  			case AJAL:
   665  				// Linker will handle the intersymbol case and trampolines.
   666  				if p.To.Target() == nil {
   667  					if !big {
   668  						break
   669  					}
   670  					// This function is going to be too large for JALs
   671  					// to reach trampolines. Replace with AUIPC+JALR.
   672  					jmp := obj.Appendp(p, newprog)
   673  					jmp.As = AJALR
   674  					jmp.From = p.From
   675  					jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
   676  
   677  					p.As = AAUIPC
   678  					p.Mark = (p.Mark &^ NEED_JAL_RELOC) | NEED_CALL_RELOC
   679  					p.AddRestSource(obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym})
   680  					p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0}
   681  					p.Reg = obj.REG_NONE
   682  					p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
   683  
   684  					rescan = true
   685  					break
   686  				}
   687  				offset := p.To.Target().Pc - p.Pc
   688  				if offset < -(1<<20) || (1<<20) <= offset {
   689  					// Replace with 2-instruction sequence. This assumes
   690  					// that TMP is not live across J instructions, since
   691  					// it is reserved by SSA.
   692  					jmp := obj.Appendp(p, newprog)
   693  					jmp.As = AJALR
   694  					jmp.From = p.From
   695  					jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
   696  
   697  					// p.From is not generally valid, however will be
   698  					// fixed up in the next loop.
   699  					p.As = AAUIPC
   700  					p.From = obj.Addr{Type: obj.TYPE_BRANCH, Sym: p.From.Sym}
   701  					p.From.SetTarget(p.To.Target())
   702  					p.Reg = obj.REG_NONE
   703  					p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
   704  
   705  					rescan = true
   706  				}
   707  			}
   708  		}
   709  
   710  		if !rescan {
   711  			break
   712  		}
   713  	}
   714  
   715  	// Now that there are no long branches, resolve branch and jump targets.
   716  	// At this point, instruction rewriting which changes the number of
   717  	// instructions will break everything--don't do it!
   718  	for p := cursym.Func().Text; p != nil; p = p.Link {
   719  		switch p.As {
   720  		case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
   721  			switch p.To.Type {
   722  			case obj.TYPE_BRANCH:
   723  				p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
   724  			case obj.TYPE_MEM:
   725  				panic("unhandled type")
   726  			}
   727  
   728  		case AJAL:
   729  			// Linker will handle the intersymbol case and trampolines.
   730  			if p.To.Target() != nil {
   731  				p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
   732  			}
   733  
   734  		case AAUIPC:
   735  			if p.From.Type == obj.TYPE_BRANCH {
   736  				low, high, err := Split32BitImmediate(p.From.Target().Pc - p.Pc)
   737  				if err != nil {
   738  					ctxt.Diag("%v: jump displacement %d too large", p, p.To.Target().Pc-p.Pc)
   739  				}
   740  				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high, Sym: cursym}
   741  				p.Link.To.Offset = low
   742  			}
   743  
   744  		case obj.APCALIGN:
   745  			alignedValue := p.From.Offset
   746  			if (alignedValue&(alignedValue-1) != 0) || 4 > alignedValue || alignedValue > 2048 {
   747  				ctxt.Diag("alignment value of an instruction must be a power of two and in the range [4, 2048], got %d\n", alignedValue)
   748  			}
   749  			// Update the current text symbol alignment value.
   750  			if int32(alignedValue) > cursym.Func().Align {
   751  				cursym.Func().Align = int32(alignedValue)
   752  			}
   753  		}
   754  	}
   755  
   756  	// Validate all instructions - this provides nice error messages.
   757  	for p := cursym.Func().Text; p != nil; p = p.Link {
   758  		for _, ins := range instructionsForProg(p) {
   759  			ins.validate(ctxt)
   760  		}
   761  	}
   762  }
   763  
   764  func pcAlignPadLength(pc int64, alignedValue int64) int {
   765  	return int(-pc & (alignedValue - 1))
   766  }
   767  
   768  func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgAlloc, framesize int64) *obj.Prog {
   769  	// Leaf function with no frame is effectively NOSPLIT.
   770  	if framesize == 0 {
   771  		return p
   772  	}
   773  
   774  	if ctxt.Flag_maymorestack != "" {
   775  		// Save LR and REGCTXT
   776  		const frameSize = 16
   777  		p = ctxt.StartUnsafePoint(p, newprog)
   778  
   779  		// Spill Arguments. This has to happen before we open
   780  		// any more frame space.
   781  		p = cursym.Func().SpillRegisterArgs(p, newprog)
   782  
   783  		// MOV LR, -16(SP)
   784  		p = obj.Appendp(p, newprog)
   785  		p.As = AMOV
   786  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   787  		p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -frameSize}
   788  		// ADDI $-16, SP
   789  		p = obj.Appendp(p, newprog)
   790  		p.As = AADDI
   791  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -frameSize}
   792  		p.Reg = REG_SP
   793  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
   794  		p.Spadj = frameSize
   795  		// MOV REGCTXT, 8(SP)
   796  		p = obj.Appendp(p, newprog)
   797  		p.As = AMOV
   798  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT}
   799  		p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8}
   800  
   801  		// CALL maymorestack
   802  		p = obj.Appendp(p, newprog)
   803  		p.As = obj.ACALL
   804  		p.To.Type = obj.TYPE_BRANCH
   805  		// See ../x86/obj6.go
   806  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
   807  		jalToSym(ctxt, p, REG_X5)
   808  
   809  		// Restore LR and REGCTXT
   810  
   811  		// MOV 8(SP), REGCTXT
   812  		p = obj.Appendp(p, newprog)
   813  		p.As = AMOV
   814  		p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8}
   815  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT}
   816  		// MOV (SP), LR
   817  		p = obj.Appendp(p, newprog)
   818  		p.As = AMOV
   819  		p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
   820  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   821  		// ADDI $16, SP
   822  		p = obj.Appendp(p, newprog)
   823  		p.As = AADDI
   824  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: frameSize}
   825  		p.Reg = REG_SP
   826  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
   827  		p.Spadj = -frameSize
   828  
   829  		// Unspill arguments
   830  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
   831  		p = ctxt.EndUnsafePoint(p, newprog, -1)
   832  	}
   833  
   834  	// Jump back to here after morestack returns.
   835  	startPred := p
   836  
   837  	// MOV	g_stackguard(g), X6
   838  	p = obj.Appendp(p, newprog)
   839  	p.As = AMOV
   840  	p.From.Type = obj.TYPE_MEM
   841  	p.From.Reg = REGG
   842  	p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
   843  	if cursym.CFunc() {
   844  		p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
   845  	}
   846  	p.To.Type = obj.TYPE_REG
   847  	p.To.Reg = REG_X6
   848  
   849  	// Mark the stack bound check and morestack call async nonpreemptible.
   850  	// If we get preempted here, when resumed the preemption request is
   851  	// cleared, but we'll still call morestack, which will double the stack
   852  	// unnecessarily. See issue #35470.
   853  	p = ctxt.StartUnsafePoint(p, newprog)
   854  
   855  	var to_done, to_more *obj.Prog
   856  
   857  	if framesize <= abi.StackSmall {
   858  		// small stack
   859  		//	// if SP > stackguard { goto done }
   860  		//	BLTU	stackguard, SP, done
   861  		p = obj.Appendp(p, newprog)
   862  		p.As = ABLTU
   863  		p.From.Type = obj.TYPE_REG
   864  		p.From.Reg = REG_X6
   865  		p.Reg = REG_SP
   866  		p.To.Type = obj.TYPE_BRANCH
   867  		to_done = p
   868  	} else {
   869  		// large stack: SP-framesize < stackguard-StackSmall
   870  		offset := int64(framesize) - abi.StackSmall
   871  		if framesize > abi.StackBig {
   872  			// Such a large stack we need to protect against underflow.
   873  			// The runtime guarantees SP > objabi.StackBig, but
   874  			// framesize is large enough that SP-framesize may
   875  			// underflow, causing a direct comparison with the
   876  			// stack guard to incorrectly succeed. We explicitly
   877  			// guard against underflow.
   878  			//
   879  			//	MOV	$(framesize-StackSmall), X7
   880  			//	BLTU	SP, X7, label-of-call-to-morestack
   881  
   882  			p = obj.Appendp(p, newprog)
   883  			p.As = AMOV
   884  			p.From.Type = obj.TYPE_CONST
   885  			p.From.Offset = offset
   886  			p.To.Type = obj.TYPE_REG
   887  			p.To.Reg = REG_X7
   888  
   889  			p = obj.Appendp(p, newprog)
   890  			p.As = ABLTU
   891  			p.From.Type = obj.TYPE_REG
   892  			p.From.Reg = REG_SP
   893  			p.Reg = REG_X7
   894  			p.To.Type = obj.TYPE_BRANCH
   895  			to_more = p
   896  		}
   897  
   898  		// Check against the stack guard. We've ensured this won't underflow.
   899  		//	ADD	$-(framesize-StackSmall), SP, X7
   900  		//	// if X7 > stackguard { goto done }
   901  		//	BLTU	stackguard, X7, done
   902  		p = obj.Appendp(p, newprog)
   903  		p.As = AADDI
   904  		p.From.Type = obj.TYPE_CONST
   905  		p.From.Offset = -offset
   906  		p.Reg = REG_SP
   907  		p.To.Type = obj.TYPE_REG
   908  		p.To.Reg = REG_X7
   909  
   910  		p = obj.Appendp(p, newprog)
   911  		p.As = ABLTU
   912  		p.From.Type = obj.TYPE_REG
   913  		p.From.Reg = REG_X6
   914  		p.Reg = REG_X7
   915  		p.To.Type = obj.TYPE_BRANCH
   916  		to_done = p
   917  	}
   918  
   919  	// Spill the register args that could be clobbered by the
   920  	// morestack code
   921  	p = ctxt.EmitEntryStackMap(cursym, p, newprog)
   922  	p = cursym.Func().SpillRegisterArgs(p, newprog)
   923  
   924  	// CALL runtime.morestack(SB)
   925  	p = obj.Appendp(p, newprog)
   926  	p.As = obj.ACALL
   927  	p.To.Type = obj.TYPE_BRANCH
   928  
   929  	if cursym.CFunc() {
   930  		p.To.Sym = ctxt.Lookup("runtime.morestackc")
   931  	} else if !cursym.Func().Text.From.Sym.NeedCtxt() {
   932  		p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt")
   933  	} else {
   934  		p.To.Sym = ctxt.Lookup("runtime.morestack")
   935  	}
   936  	if to_more != nil {
   937  		to_more.To.SetTarget(p)
   938  	}
   939  	jalToSym(ctxt, p, REG_X5)
   940  
   941  	// The instructions which unspill regs should be preemptible.
   942  	p = ctxt.EndUnsafePoint(p, newprog, -1)
   943  	p = cursym.Func().UnspillRegisterArgs(p, newprog)
   944  
   945  	// JMP start
   946  	p = obj.Appendp(p, newprog)
   947  	p.As = AJAL
   948  	p.To = obj.Addr{Type: obj.TYPE_BRANCH}
   949  	p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
   950  	p.To.SetTarget(startPred.Link)
   951  
   952  	// placeholder for to_done's jump target
   953  	p = obj.Appendp(p, newprog)
   954  	p.As = obj.ANOP // zero-width place holder
   955  	to_done.To.SetTarget(p)
   956  
   957  	return p
   958  }
   959  
   960  // signExtend sign extends val starting at bit bit.
   961  func signExtend(val int64, bit uint) int64 {
   962  	return val << (64 - bit) >> (64 - bit)
   963  }
   964  
   965  // Split32BitImmediate splits a signed 32-bit immediate into a signed 20-bit
   966  // upper immediate and a signed 12-bit lower immediate to be added to the upper
   967  // result. For example, high may be used in LUI and low in a following ADDI to
   968  // generate a full 32-bit constant.
   969  func Split32BitImmediate(imm int64) (low, high int64, err error) {
   970  	if err := immIFits(imm, 32); err != nil {
   971  		return 0, 0, err
   972  	}
   973  
   974  	// Nothing special needs to be done if the immediate fits in 12 bits.
   975  	if err := immIFits(imm, 12); err == nil {
   976  		return imm, 0, nil
   977  	}
   978  
   979  	high = imm >> 12
   980  
   981  	// The bottom 12 bits will be treated as signed.
   982  	//
   983  	// If that will result in a negative 12 bit number, add 1 to
   984  	// our upper bits to adjust for the borrow.
   985  	//
   986  	// It is not possible for this increment to overflow. To
   987  	// overflow, the 20 top bits would be 1, and the sign bit for
   988  	// the low 12 bits would be set, in which case the entire 32
   989  	// bit pattern fits in a 12 bit signed value.
   990  	if imm&(1<<11) != 0 {
   991  		high++
   992  	}
   993  
   994  	low = signExtend(imm, 12)
   995  	high = signExtend(high, 20)
   996  
   997  	return low, high, nil
   998  }
   999  
  1000  func regVal(r, min, max uint32) uint32 {
  1001  	if r < min || r > max {
  1002  		panic(fmt.Sprintf("register out of range, want %d <= %d <= %d", min, r, max))
  1003  	}
  1004  	return r - min
  1005  }
  1006  
  1007  // regI returns an integer register.
  1008  func regI(r uint32) uint32 {
  1009  	return regVal(r, REG_X0, REG_X31)
  1010  }
  1011  
  1012  // regF returns a float register.
  1013  func regF(r uint32) uint32 {
  1014  	return regVal(r, REG_F0, REG_F31)
  1015  }
  1016  
  1017  // regAddr extracts a register from an Addr.
  1018  func regAddr(a obj.Addr, min, max uint32) uint32 {
  1019  	if a.Type != obj.TYPE_REG {
  1020  		panic(fmt.Sprintf("ill typed: %+v", a))
  1021  	}
  1022  	return regVal(uint32(a.Reg), min, max)
  1023  }
  1024  
  1025  // regIAddr extracts the integer register from an Addr.
  1026  func regIAddr(a obj.Addr) uint32 {
  1027  	return regAddr(a, REG_X0, REG_X31)
  1028  }
  1029  
  1030  // regFAddr extracts the float register from an Addr.
  1031  func regFAddr(a obj.Addr) uint32 {
  1032  	return regAddr(a, REG_F0, REG_F31)
  1033  }
  1034  
  1035  // immEven checks that the immediate is a multiple of two. If it
  1036  // is not, an error is returned.
  1037  func immEven(x int64) error {
  1038  	if x&1 != 0 {
  1039  		return fmt.Errorf("immediate %#x is not a multiple of two", x)
  1040  	}
  1041  	return nil
  1042  }
  1043  
  1044  // immIFits checks whether the immediate value x fits in nbits bits
  1045  // as a signed integer. If it does not, an error is returned.
  1046  func immIFits(x int64, nbits uint) error {
  1047  	nbits--
  1048  	min := int64(-1) << nbits
  1049  	max := int64(1)<<nbits - 1
  1050  	if x < min || x > max {
  1051  		if nbits <= 16 {
  1052  			return fmt.Errorf("signed immediate %d must be in range [%d, %d] (%d bits)", x, min, max, nbits)
  1053  		}
  1054  		return fmt.Errorf("signed immediate %#x must be in range [%#x, %#x] (%d bits)", x, min, max, nbits)
  1055  	}
  1056  	return nil
  1057  }
  1058  
  1059  // immI extracts the signed integer of the specified size from an immediate.
  1060  func immI(as obj.As, imm int64, nbits uint) uint32 {
  1061  	if err := immIFits(imm, nbits); err != nil {
  1062  		panic(fmt.Sprintf("%v: %v", as, err))
  1063  	}
  1064  	return uint32(imm)
  1065  }
  1066  
  1067  func wantImmI(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) {
  1068  	if err := immIFits(imm, nbits); err != nil {
  1069  		ctxt.Diag("%v: %v", ins, err)
  1070  	}
  1071  }
  1072  
  1073  func wantReg(ctxt *obj.Link, ins *instruction, pos string, descr string, r, min, max uint32) {
  1074  	if r < min || r > max {
  1075  		var suffix string
  1076  		if r != obj.REG_NONE {
  1077  			suffix = fmt.Sprintf(" but got non-%s register %s", descr, RegName(int(r)))
  1078  		}
  1079  		ctxt.Diag("%v: expected %s register in %s position%s", ins, descr, pos, suffix)
  1080  	}
  1081  }
  1082  
  1083  func wantNoneReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
  1084  	if r != obj.REG_NONE {
  1085  		ctxt.Diag("%v: expected no register in %s but got register %s", ins, pos, RegName(int(r)))
  1086  	}
  1087  }
  1088  
  1089  // wantIntReg checks that r is an integer register.
  1090  func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
  1091  	wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31)
  1092  }
  1093  
  1094  // wantFloatReg checks that r is a floating-point register.
  1095  func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
  1096  	wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31)
  1097  }
  1098  
  1099  // wantEvenOffset checks that the offset is a multiple of two.
  1100  func wantEvenOffset(ctxt *obj.Link, ins *instruction, offset int64) {
  1101  	if err := immEven(offset); err != nil {
  1102  		ctxt.Diag("%v: %v", ins, err)
  1103  	}
  1104  }
  1105  
  1106  func validateRIII(ctxt *obj.Link, ins *instruction) {
  1107  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1108  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1109  	wantIntReg(ctxt, ins, "rs2", ins.rs2)
  1110  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1111  }
  1112  
  1113  func validateRFFF(ctxt *obj.Link, ins *instruction) {
  1114  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1115  	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
  1116  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1117  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1118  }
  1119  
  1120  func validateRFFFF(ctxt *obj.Link, ins *instruction) {
  1121  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1122  	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
  1123  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1124  	wantFloatReg(ctxt, ins, "rs3", ins.rs3)
  1125  }
  1126  
  1127  func validateRFFI(ctxt *obj.Link, ins *instruction) {
  1128  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1129  	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
  1130  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1131  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1132  }
  1133  
  1134  func validateRFI(ctxt *obj.Link, ins *instruction) {
  1135  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1136  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1137  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1138  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1139  }
  1140  
  1141  func validateRIF(ctxt *obj.Link, ins *instruction) {
  1142  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1143  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1144  	wantIntReg(ctxt, ins, "rs2", ins.rs2)
  1145  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1146  }
  1147  
  1148  func validateRFF(ctxt *obj.Link, ins *instruction) {
  1149  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1150  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1151  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1152  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1153  }
  1154  
  1155  func validateII(ctxt *obj.Link, ins *instruction) {
  1156  	wantImmI(ctxt, ins, ins.imm, 12)
  1157  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1158  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1159  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1160  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1161  }
  1162  
  1163  func validateIF(ctxt *obj.Link, ins *instruction) {
  1164  	wantImmI(ctxt, ins, ins.imm, 12)
  1165  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1166  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1167  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1168  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1169  }
  1170  
  1171  func validateSI(ctxt *obj.Link, ins *instruction) {
  1172  	wantImmI(ctxt, ins, ins.imm, 12)
  1173  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1174  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1175  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1176  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1177  }
  1178  
  1179  func validateSF(ctxt *obj.Link, ins *instruction) {
  1180  	wantImmI(ctxt, ins, ins.imm, 12)
  1181  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1182  	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
  1183  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1184  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1185  }
  1186  
  1187  func validateB(ctxt *obj.Link, ins *instruction) {
  1188  	// Offsets are multiples of two, so accept 13 bit immediates for the
  1189  	// 12 bit slot. We implicitly drop the least significant bit in encodeB.
  1190  	wantEvenOffset(ctxt, ins, ins.imm)
  1191  	wantImmI(ctxt, ins, ins.imm, 13)
  1192  	wantNoneReg(ctxt, ins, "rd", ins.rd)
  1193  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1194  	wantIntReg(ctxt, ins, "rs2", ins.rs2)
  1195  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1196  }
  1197  
  1198  func validateU(ctxt *obj.Link, ins *instruction) {
  1199  	wantImmI(ctxt, ins, ins.imm, 20)
  1200  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1201  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1202  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1203  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1204  }
  1205  
  1206  func validateJ(ctxt *obj.Link, ins *instruction) {
  1207  	// Offsets are multiples of two, so accept 21 bit immediates for the
  1208  	// 20 bit slot. We implicitly drop the least significant bit in encodeJ.
  1209  	wantEvenOffset(ctxt, ins, ins.imm)
  1210  	wantImmI(ctxt, ins, ins.imm, 21)
  1211  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1212  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1213  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1214  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1215  }
  1216  
  1217  func validateRaw(ctxt *obj.Link, ins *instruction) {
  1218  	// Treat the raw value specially as a 32-bit unsigned integer.
  1219  	// Nobody wants to enter negative machine code.
  1220  	if ins.imm < 0 || 1<<32 <= ins.imm {
  1221  		ctxt.Diag("%v: immediate %d in raw position cannot be larger than 32 bits", ins.as, ins.imm)
  1222  	}
  1223  }
  1224  
  1225  // extractBitAndShift extracts the specified bit from the given immediate,
  1226  // before shifting it to the requested position and returning it.
  1227  func extractBitAndShift(imm uint32, bit, pos int) uint32 {
  1228  	return ((imm >> bit) & 1) << pos
  1229  }
  1230  
  1231  // encodeR encodes an R-type RISC-V instruction.
  1232  func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 {
  1233  	enc := encode(as)
  1234  	if enc == nil {
  1235  		panic("encodeR: could not encode instruction")
  1236  	}
  1237  	if enc.rs2 != 0 && rs2 != 0 {
  1238  		panic("encodeR: instruction uses rs2, but rs2 was nonzero")
  1239  	}
  1240  	return funct7<<25 | enc.funct7<<25 | enc.rs2<<20 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode
  1241  }
  1242  
  1243  // encodeR4 encodes an R4-type RISC-V instruction.
  1244  func encodeR4(as obj.As, rs1, rs2, rs3, rd, funct3, funct2 uint32) uint32 {
  1245  	enc := encode(as)
  1246  	if enc == nil {
  1247  		panic("encodeR4: could not encode instruction")
  1248  	}
  1249  	if enc.rs2 != 0 {
  1250  		panic("encodeR4: instruction uses rs2")
  1251  	}
  1252  	funct2 |= enc.funct7
  1253  	if funct2&^3 != 0 {
  1254  		panic("encodeR4: funct2 requires more than 2 bits")
  1255  	}
  1256  	return rs3<<27 | funct2<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode
  1257  }
  1258  
  1259  func encodeRIII(ins *instruction) uint32 {
  1260  	return encodeR(ins.as, regI(ins.rs1), regI(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
  1261  }
  1262  
  1263  func encodeRFFF(ins *instruction) uint32 {
  1264  	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rd), ins.funct3, ins.funct7)
  1265  }
  1266  
  1267  func encodeRFFFF(ins *instruction) uint32 {
  1268  	return encodeR4(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rs3), regF(ins.rd), ins.funct3, ins.funct7)
  1269  }
  1270  
  1271  func encodeRFFI(ins *instruction) uint32 {
  1272  	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
  1273  }
  1274  
  1275  func encodeRFI(ins *instruction) uint32 {
  1276  	return encodeR(ins.as, regF(ins.rs2), 0, regI(ins.rd), ins.funct3, ins.funct7)
  1277  }
  1278  
  1279  func encodeRIF(ins *instruction) uint32 {
  1280  	return encodeR(ins.as, regI(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
  1281  }
  1282  
  1283  func encodeRFF(ins *instruction) uint32 {
  1284  	return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
  1285  }
  1286  
  1287  // encodeI encodes an I-type RISC-V instruction.
  1288  func encodeI(as obj.As, rs1, rd, imm uint32) uint32 {
  1289  	enc := encode(as)
  1290  	if enc == nil {
  1291  		panic("encodeI: could not encode instruction")
  1292  	}
  1293  	imm |= uint32(enc.csr)
  1294  	return imm<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode
  1295  }
  1296  
  1297  func encodeII(ins *instruction) uint32 {
  1298  	return encodeI(ins.as, regI(ins.rs1), regI(ins.rd), uint32(ins.imm))
  1299  }
  1300  
  1301  func encodeIF(ins *instruction) uint32 {
  1302  	return encodeI(ins.as, regI(ins.rs1), regF(ins.rd), uint32(ins.imm))
  1303  }
  1304  
  1305  // encodeS encodes an S-type RISC-V instruction.
  1306  func encodeS(as obj.As, rs1, rs2, imm uint32) uint32 {
  1307  	enc := encode(as)
  1308  	if enc == nil {
  1309  		panic("encodeS: could not encode instruction")
  1310  	}
  1311  	return (imm>>5)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | (imm&0x1f)<<7 | enc.opcode
  1312  }
  1313  
  1314  func encodeSI(ins *instruction) uint32 {
  1315  	return encodeS(ins.as, regI(ins.rd), regI(ins.rs1), uint32(ins.imm))
  1316  }
  1317  
  1318  func encodeSF(ins *instruction) uint32 {
  1319  	return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm))
  1320  }
  1321  
  1322  // encodeBImmediate encodes an immediate for a B-type RISC-V instruction.
  1323  func encodeBImmediate(imm uint32) uint32 {
  1324  	return (imm>>12)<<31 | ((imm>>5)&0x3f)<<25 | ((imm>>1)&0xf)<<8 | ((imm>>11)&0x1)<<7
  1325  }
  1326  
  1327  // encodeB encodes a B-type RISC-V instruction.
  1328  func encodeB(ins *instruction) uint32 {
  1329  	imm := immI(ins.as, ins.imm, 13)
  1330  	rs2 := regI(ins.rs1)
  1331  	rs1 := regI(ins.rs2)
  1332  	enc := encode(ins.as)
  1333  	if enc == nil {
  1334  		panic("encodeB: could not encode instruction")
  1335  	}
  1336  	return encodeBImmediate(imm) | rs2<<20 | rs1<<15 | enc.funct3<<12 | enc.opcode
  1337  }
  1338  
  1339  // encodeU encodes a U-type RISC-V instruction.
  1340  func encodeU(ins *instruction) uint32 {
  1341  	// The immediates for encodeU are the upper 20 bits of a 32 bit value.
  1342  	// Rather than have the user/compiler generate a 32 bit constant, the
  1343  	// bottommost bits of which must all be zero, instead accept just the
  1344  	// top bits.
  1345  	imm := immI(ins.as, ins.imm, 20)
  1346  	rd := regI(ins.rd)
  1347  	enc := encode(ins.as)
  1348  	if enc == nil {
  1349  		panic("encodeU: could not encode instruction")
  1350  	}
  1351  	return imm<<12 | rd<<7 | enc.opcode
  1352  }
  1353  
  1354  // encodeJImmediate encodes an immediate for a J-type RISC-V instruction.
  1355  func encodeJImmediate(imm uint32) uint32 {
  1356  	return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12
  1357  }
  1358  
  1359  // encodeJ encodes a J-type RISC-V instruction.
  1360  func encodeJ(ins *instruction) uint32 {
  1361  	imm := immI(ins.as, ins.imm, 21)
  1362  	rd := regI(ins.rd)
  1363  	enc := encode(ins.as)
  1364  	if enc == nil {
  1365  		panic("encodeJ: could not encode instruction")
  1366  	}
  1367  	return encodeJImmediate(imm) | rd<<7 | enc.opcode
  1368  }
  1369  
  1370  // encodeCBImmediate encodes an immediate for a CB-type RISC-V instruction.
  1371  func encodeCBImmediate(imm uint32) uint32 {
  1372  	// Bit order - [8|4:3|7:6|2:1|5]
  1373  	bits := extractBitAndShift(imm, 8, 7)
  1374  	bits |= extractBitAndShift(imm, 4, 6)
  1375  	bits |= extractBitAndShift(imm, 3, 5)
  1376  	bits |= extractBitAndShift(imm, 7, 4)
  1377  	bits |= extractBitAndShift(imm, 6, 3)
  1378  	bits |= extractBitAndShift(imm, 2, 2)
  1379  	bits |= extractBitAndShift(imm, 1, 1)
  1380  	bits |= extractBitAndShift(imm, 5, 0)
  1381  	return (bits>>5)<<10 | (bits&0x1f)<<2
  1382  }
  1383  
  1384  // encodeCJImmediate encodes an immediate for a CJ-type RISC-V instruction.
  1385  func encodeCJImmediate(imm uint32) uint32 {
  1386  	// Bit order - [11|4|9:8|10|6|7|3:1|5]
  1387  	bits := extractBitAndShift(imm, 11, 10)
  1388  	bits |= extractBitAndShift(imm, 4, 9)
  1389  	bits |= extractBitAndShift(imm, 9, 8)
  1390  	bits |= extractBitAndShift(imm, 8, 7)
  1391  	bits |= extractBitAndShift(imm, 10, 6)
  1392  	bits |= extractBitAndShift(imm, 6, 5)
  1393  	bits |= extractBitAndShift(imm, 7, 4)
  1394  	bits |= extractBitAndShift(imm, 3, 3)
  1395  	bits |= extractBitAndShift(imm, 2, 2)
  1396  	bits |= extractBitAndShift(imm, 1, 1)
  1397  	bits |= extractBitAndShift(imm, 5, 0)
  1398  	return bits << 2
  1399  }
  1400  
  1401  func encodeRawIns(ins *instruction) uint32 {
  1402  	// Treat the raw value specially as a 32-bit unsigned integer.
  1403  	// Nobody wants to enter negative machine code.
  1404  	if ins.imm < 0 || 1<<32 <= ins.imm {
  1405  		panic(fmt.Sprintf("immediate %d cannot fit in 32 bits", ins.imm))
  1406  	}
  1407  	return uint32(ins.imm)
  1408  }
  1409  
  1410  func EncodeBImmediate(imm int64) (int64, error) {
  1411  	if err := immIFits(imm, 13); err != nil {
  1412  		return 0, err
  1413  	}
  1414  	if err := immEven(imm); err != nil {
  1415  		return 0, err
  1416  	}
  1417  	return int64(encodeBImmediate(uint32(imm))), nil
  1418  }
  1419  
  1420  func EncodeCBImmediate(imm int64) (int64, error) {
  1421  	if err := immIFits(imm, 9); err != nil {
  1422  		return 0, err
  1423  	}
  1424  	if err := immEven(imm); err != nil {
  1425  		return 0, err
  1426  	}
  1427  	return int64(encodeCBImmediate(uint32(imm))), nil
  1428  }
  1429  
  1430  func EncodeCJImmediate(imm int64) (int64, error) {
  1431  	if err := immIFits(imm, 12); err != nil {
  1432  		return 0, err
  1433  	}
  1434  	if err := immEven(imm); err != nil {
  1435  		return 0, err
  1436  	}
  1437  	return int64(encodeCJImmediate(uint32(imm))), nil
  1438  }
  1439  
  1440  func EncodeIImmediate(imm int64) (int64, error) {
  1441  	if err := immIFits(imm, 12); err != nil {
  1442  		return 0, err
  1443  	}
  1444  	return imm << 20, nil
  1445  }
  1446  
  1447  func EncodeJImmediate(imm int64) (int64, error) {
  1448  	if err := immIFits(imm, 21); err != nil {
  1449  		return 0, err
  1450  	}
  1451  	if err := immEven(imm); err != nil {
  1452  		return 0, err
  1453  	}
  1454  	return int64(encodeJImmediate(uint32(imm))), nil
  1455  }
  1456  
  1457  func EncodeSImmediate(imm int64) (int64, error) {
  1458  	if err := immIFits(imm, 12); err != nil {
  1459  		return 0, err
  1460  	}
  1461  	return ((imm >> 5) << 25) | ((imm & 0x1f) << 7), nil
  1462  }
  1463  
  1464  func EncodeUImmediate(imm int64) (int64, error) {
  1465  	if err := immIFits(imm, 20); err != nil {
  1466  		return 0, err
  1467  	}
  1468  	return imm << 12, nil
  1469  }
  1470  
  1471  type encoding struct {
  1472  	encode   func(*instruction) uint32     // encode returns the machine code for an instruction
  1473  	validate func(*obj.Link, *instruction) // validate validates an instruction
  1474  	length   int                           // length of encoded instruction; 0 for pseudo-ops, 4 otherwise
  1475  }
  1476  
  1477  var (
  1478  	// Encodings have the following naming convention:
  1479  	//
  1480  	//  1. the instruction encoding (R/I/S/B/U/J), in lowercase
  1481  	//  2. zero or more register operand identifiers (I = integer
  1482  	//     register, F = float register), in uppercase
  1483  	//  3. the word "Encoding"
  1484  	//
  1485  	// For example, rIIIEncoding indicates an R-type instruction with two
  1486  	// integer register inputs and an integer register output; sFEncoding
  1487  	// indicates an S-type instruction with rs2 being a float register.
  1488  
  1489  	rIIIEncoding  = encoding{encode: encodeRIII, validate: validateRIII, length: 4}
  1490  	rFFFEncoding  = encoding{encode: encodeRFFF, validate: validateRFFF, length: 4}
  1491  	rFFFFEncoding = encoding{encode: encodeRFFFF, validate: validateRFFFF, length: 4}
  1492  	rFFIEncoding  = encoding{encode: encodeRFFI, validate: validateRFFI, length: 4}
  1493  	rFIEncoding   = encoding{encode: encodeRFI, validate: validateRFI, length: 4}
  1494  	rIFEncoding   = encoding{encode: encodeRIF, validate: validateRIF, length: 4}
  1495  	rFFEncoding   = encoding{encode: encodeRFF, validate: validateRFF, length: 4}
  1496  
  1497  	iIEncoding = encoding{encode: encodeII, validate: validateII, length: 4}
  1498  	iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4}
  1499  
  1500  	sIEncoding = encoding{encode: encodeSI, validate: validateSI, length: 4}
  1501  	sFEncoding = encoding{encode: encodeSF, validate: validateSF, length: 4}
  1502  
  1503  	bEncoding = encoding{encode: encodeB, validate: validateB, length: 4}
  1504  	uEncoding = encoding{encode: encodeU, validate: validateU, length: 4}
  1505  	jEncoding = encoding{encode: encodeJ, validate: validateJ, length: 4}
  1506  
  1507  	// rawEncoding encodes a raw instruction byte sequence.
  1508  	rawEncoding = encoding{encode: encodeRawIns, validate: validateRaw, length: 4}
  1509  
  1510  	// pseudoOpEncoding panics if encoding is attempted, but does no validation.
  1511  	pseudoOpEncoding = encoding{encode: nil, validate: func(*obj.Link, *instruction) {}, length: 0}
  1512  
  1513  	// badEncoding is used when an invalid op is encountered.
  1514  	// An error has already been generated, so let anything else through.
  1515  	badEncoding = encoding{encode: func(*instruction) uint32 { return 0 }, validate: func(*obj.Link, *instruction) {}, length: 0}
  1516  )
  1517  
  1518  // encodings contains the encodings for RISC-V instructions.
  1519  // Instructions are masked with obj.AMask to keep indices small.
  1520  var encodings = [ALAST & obj.AMask]encoding{
  1521  
  1522  	// Unprivileged ISA
  1523  
  1524  	// 2.4: Integer Computational Instructions
  1525  	AADDI & obj.AMask:  iIEncoding,
  1526  	ASLTI & obj.AMask:  iIEncoding,
  1527  	ASLTIU & obj.AMask: iIEncoding,
  1528  	AANDI & obj.AMask:  iIEncoding,
  1529  	AORI & obj.AMask:   iIEncoding,
  1530  	AXORI & obj.AMask:  iIEncoding,
  1531  	ASLLI & obj.AMask:  iIEncoding,
  1532  	ASRLI & obj.AMask:  iIEncoding,
  1533  	ASRAI & obj.AMask:  iIEncoding,
  1534  	ALUI & obj.AMask:   uEncoding,
  1535  	AAUIPC & obj.AMask: uEncoding,
  1536  	AADD & obj.AMask:   rIIIEncoding,
  1537  	ASLT & obj.AMask:   rIIIEncoding,
  1538  	ASLTU & obj.AMask:  rIIIEncoding,
  1539  	AAND & obj.AMask:   rIIIEncoding,
  1540  	AOR & obj.AMask:    rIIIEncoding,
  1541  	AXOR & obj.AMask:   rIIIEncoding,
  1542  	ASLL & obj.AMask:   rIIIEncoding,
  1543  	ASRL & obj.AMask:   rIIIEncoding,
  1544  	ASUB & obj.AMask:   rIIIEncoding,
  1545  	ASRA & obj.AMask:   rIIIEncoding,
  1546  
  1547  	// 2.5: Control Transfer Instructions
  1548  	AJAL & obj.AMask:  jEncoding,
  1549  	AJALR & obj.AMask: iIEncoding,
  1550  	ABEQ & obj.AMask:  bEncoding,
  1551  	ABNE & obj.AMask:  bEncoding,
  1552  	ABLT & obj.AMask:  bEncoding,
  1553  	ABLTU & obj.AMask: bEncoding,
  1554  	ABGE & obj.AMask:  bEncoding,
  1555  	ABGEU & obj.AMask: bEncoding,
  1556  
  1557  	// 2.6: Load and Store Instructions
  1558  	ALW & obj.AMask:  iIEncoding,
  1559  	ALWU & obj.AMask: iIEncoding,
  1560  	ALH & obj.AMask:  iIEncoding,
  1561  	ALHU & obj.AMask: iIEncoding,
  1562  	ALB & obj.AMask:  iIEncoding,
  1563  	ALBU & obj.AMask: iIEncoding,
  1564  	ASW & obj.AMask:  sIEncoding,
  1565  	ASH & obj.AMask:  sIEncoding,
  1566  	ASB & obj.AMask:  sIEncoding,
  1567  
  1568  	// 2.7: Memory Ordering
  1569  	AFENCE & obj.AMask: iIEncoding,
  1570  
  1571  	// 5.2: Integer Computational Instructions (RV64I)
  1572  	AADDIW & obj.AMask: iIEncoding,
  1573  	ASLLIW & obj.AMask: iIEncoding,
  1574  	ASRLIW & obj.AMask: iIEncoding,
  1575  	ASRAIW & obj.AMask: iIEncoding,
  1576  	AADDW & obj.AMask:  rIIIEncoding,
  1577  	ASLLW & obj.AMask:  rIIIEncoding,
  1578  	ASRLW & obj.AMask:  rIIIEncoding,
  1579  	ASUBW & obj.AMask:  rIIIEncoding,
  1580  	ASRAW & obj.AMask:  rIIIEncoding,
  1581  
  1582  	// 5.3: Load and Store Instructions (RV64I)
  1583  	ALD & obj.AMask: iIEncoding,
  1584  	ASD & obj.AMask: sIEncoding,
  1585  
  1586  	// 7.1: Multiplication Operations
  1587  	AMUL & obj.AMask:    rIIIEncoding,
  1588  	AMULH & obj.AMask:   rIIIEncoding,
  1589  	AMULHU & obj.AMask:  rIIIEncoding,
  1590  	AMULHSU & obj.AMask: rIIIEncoding,
  1591  	AMULW & obj.AMask:   rIIIEncoding,
  1592  	ADIV & obj.AMask:    rIIIEncoding,
  1593  	ADIVU & obj.AMask:   rIIIEncoding,
  1594  	AREM & obj.AMask:    rIIIEncoding,
  1595  	AREMU & obj.AMask:   rIIIEncoding,
  1596  	ADIVW & obj.AMask:   rIIIEncoding,
  1597  	ADIVUW & obj.AMask:  rIIIEncoding,
  1598  	AREMW & obj.AMask:   rIIIEncoding,
  1599  	AREMUW & obj.AMask:  rIIIEncoding,
  1600  
  1601  	// 8.2: Load-Reserved/Store-Conditional
  1602  	ALRW & obj.AMask: rIIIEncoding,
  1603  	ALRD & obj.AMask: rIIIEncoding,
  1604  	ASCW & obj.AMask: rIIIEncoding,
  1605  	ASCD & obj.AMask: rIIIEncoding,
  1606  
  1607  	// 8.3: Atomic Memory Operations
  1608  	AAMOSWAPW & obj.AMask: rIIIEncoding,
  1609  	AAMOSWAPD & obj.AMask: rIIIEncoding,
  1610  	AAMOADDW & obj.AMask:  rIIIEncoding,
  1611  	AAMOADDD & obj.AMask:  rIIIEncoding,
  1612  	AAMOANDW & obj.AMask:  rIIIEncoding,
  1613  	AAMOANDD & obj.AMask:  rIIIEncoding,
  1614  	AAMOORW & obj.AMask:   rIIIEncoding,
  1615  	AAMOORD & obj.AMask:   rIIIEncoding,
  1616  	AAMOXORW & obj.AMask:  rIIIEncoding,
  1617  	AAMOXORD & obj.AMask:  rIIIEncoding,
  1618  	AAMOMAXW & obj.AMask:  rIIIEncoding,
  1619  	AAMOMAXD & obj.AMask:  rIIIEncoding,
  1620  	AAMOMAXUW & obj.AMask: rIIIEncoding,
  1621  	AAMOMAXUD & obj.AMask: rIIIEncoding,
  1622  	AAMOMINW & obj.AMask:  rIIIEncoding,
  1623  	AAMOMIND & obj.AMask:  rIIIEncoding,
  1624  	AAMOMINUW & obj.AMask: rIIIEncoding,
  1625  	AAMOMINUD & obj.AMask: rIIIEncoding,
  1626  
  1627  	// 10.1: Base Counters and Timers
  1628  	ARDCYCLE & obj.AMask:   iIEncoding,
  1629  	ARDTIME & obj.AMask:    iIEncoding,
  1630  	ARDINSTRET & obj.AMask: iIEncoding,
  1631  
  1632  	// 11.5: Single-Precision Load and Store Instructions
  1633  	AFLW & obj.AMask: iFEncoding,
  1634  	AFSW & obj.AMask: sFEncoding,
  1635  
  1636  	// 11.6: Single-Precision Floating-Point Computational Instructions
  1637  	AFADDS & obj.AMask:   rFFFEncoding,
  1638  	AFSUBS & obj.AMask:   rFFFEncoding,
  1639  	AFMULS & obj.AMask:   rFFFEncoding,
  1640  	AFDIVS & obj.AMask:   rFFFEncoding,
  1641  	AFMINS & obj.AMask:   rFFFEncoding,
  1642  	AFMAXS & obj.AMask:   rFFFEncoding,
  1643  	AFSQRTS & obj.AMask:  rFFFEncoding,
  1644  	AFMADDS & obj.AMask:  rFFFFEncoding,
  1645  	AFMSUBS & obj.AMask:  rFFFFEncoding,
  1646  	AFNMSUBS & obj.AMask: rFFFFEncoding,
  1647  	AFNMADDS & obj.AMask: rFFFFEncoding,
  1648  
  1649  	// 11.7: Single-Precision Floating-Point Conversion and Move Instructions
  1650  	AFCVTWS & obj.AMask:  rFIEncoding,
  1651  	AFCVTLS & obj.AMask:  rFIEncoding,
  1652  	AFCVTSW & obj.AMask:  rIFEncoding,
  1653  	AFCVTSL & obj.AMask:  rIFEncoding,
  1654  	AFCVTWUS & obj.AMask: rFIEncoding,
  1655  	AFCVTLUS & obj.AMask: rFIEncoding,
  1656  	AFCVTSWU & obj.AMask: rIFEncoding,
  1657  	AFCVTSLU & obj.AMask: rIFEncoding,
  1658  	AFSGNJS & obj.AMask:  rFFFEncoding,
  1659  	AFSGNJNS & obj.AMask: rFFFEncoding,
  1660  	AFSGNJXS & obj.AMask: rFFFEncoding,
  1661  	AFMVXS & obj.AMask:   rFIEncoding,
  1662  	AFMVSX & obj.AMask:   rIFEncoding,
  1663  	AFMVXW & obj.AMask:   rFIEncoding,
  1664  	AFMVWX & obj.AMask:   rIFEncoding,
  1665  
  1666  	// 11.8: Single-Precision Floating-Point Compare Instructions
  1667  	AFEQS & obj.AMask: rFFIEncoding,
  1668  	AFLTS & obj.AMask: rFFIEncoding,
  1669  	AFLES & obj.AMask: rFFIEncoding,
  1670  
  1671  	// 11.9: Single-Precision Floating-Point Classify Instruction
  1672  	AFCLASSS & obj.AMask: rFIEncoding,
  1673  
  1674  	// 12.3: Double-Precision Load and Store Instructions
  1675  	AFLD & obj.AMask: iFEncoding,
  1676  	AFSD & obj.AMask: sFEncoding,
  1677  
  1678  	// 12.4: Double-Precision Floating-Point Computational Instructions
  1679  	AFADDD & obj.AMask:   rFFFEncoding,
  1680  	AFSUBD & obj.AMask:   rFFFEncoding,
  1681  	AFMULD & obj.AMask:   rFFFEncoding,
  1682  	AFDIVD & obj.AMask:   rFFFEncoding,
  1683  	AFMIND & obj.AMask:   rFFFEncoding,
  1684  	AFMAXD & obj.AMask:   rFFFEncoding,
  1685  	AFSQRTD & obj.AMask:  rFFFEncoding,
  1686  	AFMADDD & obj.AMask:  rFFFFEncoding,
  1687  	AFMSUBD & obj.AMask:  rFFFFEncoding,
  1688  	AFNMSUBD & obj.AMask: rFFFFEncoding,
  1689  	AFNMADDD & obj.AMask: rFFFFEncoding,
  1690  
  1691  	// 12.5: Double-Precision Floating-Point Conversion and Move Instructions
  1692  	AFCVTWD & obj.AMask:  rFIEncoding,
  1693  	AFCVTLD & obj.AMask:  rFIEncoding,
  1694  	AFCVTDW & obj.AMask:  rIFEncoding,
  1695  	AFCVTDL & obj.AMask:  rIFEncoding,
  1696  	AFCVTWUD & obj.AMask: rFIEncoding,
  1697  	AFCVTLUD & obj.AMask: rFIEncoding,
  1698  	AFCVTDWU & obj.AMask: rIFEncoding,
  1699  	AFCVTDLU & obj.AMask: rIFEncoding,
  1700  	AFCVTSD & obj.AMask:  rFFEncoding,
  1701  	AFCVTDS & obj.AMask:  rFFEncoding,
  1702  	AFSGNJD & obj.AMask:  rFFFEncoding,
  1703  	AFSGNJND & obj.AMask: rFFFEncoding,
  1704  	AFSGNJXD & obj.AMask: rFFFEncoding,
  1705  	AFMVXD & obj.AMask:   rFIEncoding,
  1706  	AFMVDX & obj.AMask:   rIFEncoding,
  1707  
  1708  	// 12.6: Double-Precision Floating-Point Compare Instructions
  1709  	AFEQD & obj.AMask: rFFIEncoding,
  1710  	AFLTD & obj.AMask: rFFIEncoding,
  1711  	AFLED & obj.AMask: rFFIEncoding,
  1712  
  1713  	// 12.7: Double-Precision Floating-Point Classify Instruction
  1714  	AFCLASSD & obj.AMask: rFIEncoding,
  1715  
  1716  	// Privileged ISA
  1717  
  1718  	// 3.2.1: Environment Call and Breakpoint
  1719  	AECALL & obj.AMask:  iIEncoding,
  1720  	AEBREAK & obj.AMask: iIEncoding,
  1721  
  1722  	// Escape hatch
  1723  	AWORD & obj.AMask: rawEncoding,
  1724  
  1725  	// Pseudo-operations
  1726  	obj.AFUNCDATA: pseudoOpEncoding,
  1727  	obj.APCDATA:   pseudoOpEncoding,
  1728  	obj.ATEXT:     pseudoOpEncoding,
  1729  	obj.ANOP:      pseudoOpEncoding,
  1730  	obj.ADUFFZERO: pseudoOpEncoding,
  1731  	obj.ADUFFCOPY: pseudoOpEncoding,
  1732  	obj.APCALIGN:  pseudoOpEncoding,
  1733  }
  1734  
  1735  // encodingForAs returns the encoding for an obj.As.
  1736  func encodingForAs(as obj.As) (encoding, error) {
  1737  	if base := as &^ obj.AMask; base != obj.ABaseRISCV && base != 0 {
  1738  		return badEncoding, fmt.Errorf("encodingForAs: not a RISC-V instruction %s", as)
  1739  	}
  1740  	asi := as & obj.AMask
  1741  	if int(asi) >= len(encodings) {
  1742  		return badEncoding, fmt.Errorf("encodingForAs: bad RISC-V instruction %s", as)
  1743  	}
  1744  	enc := encodings[asi]
  1745  	if enc.validate == nil {
  1746  		return badEncoding, fmt.Errorf("encodingForAs: no encoding for instruction %s", as)
  1747  	}
  1748  	return enc, nil
  1749  }
  1750  
  1751  type instruction struct {
  1752  	p      *obj.Prog // Prog that instruction is for
  1753  	as     obj.As    // Assembler opcode
  1754  	rd     uint32    // Destination register
  1755  	rs1    uint32    // Source register 1
  1756  	rs2    uint32    // Source register 2
  1757  	rs3    uint32    // Source register 3
  1758  	imm    int64     // Immediate
  1759  	funct3 uint32    // Function 3
  1760  	funct7 uint32    // Function 7 (or Function 2)
  1761  }
  1762  
  1763  func (ins *instruction) String() string {
  1764  	if ins.p == nil {
  1765  		return ins.as.String()
  1766  	}
  1767  	var suffix string
  1768  	if ins.p.As != ins.as {
  1769  		suffix = fmt.Sprintf(" (%v)", ins.as)
  1770  	}
  1771  	return fmt.Sprintf("%v%v", ins.p, suffix)
  1772  }
  1773  
  1774  func (ins *instruction) encode() (uint32, error) {
  1775  	enc, err := encodingForAs(ins.as)
  1776  	if err != nil {
  1777  		return 0, err
  1778  	}
  1779  	if enc.length <= 0 {
  1780  		return 0, fmt.Errorf("%v: encoding called for a pseudo instruction", ins.as)
  1781  	}
  1782  	return enc.encode(ins), nil
  1783  }
  1784  
  1785  func (ins *instruction) length() int {
  1786  	enc, err := encodingForAs(ins.as)
  1787  	if err != nil {
  1788  		return 0
  1789  	}
  1790  	return enc.length
  1791  }
  1792  
  1793  func (ins *instruction) validate(ctxt *obj.Link) {
  1794  	enc, err := encodingForAs(ins.as)
  1795  	if err != nil {
  1796  		ctxt.Diag(err.Error())
  1797  		return
  1798  	}
  1799  	enc.validate(ctxt, ins)
  1800  }
  1801  
  1802  func (ins *instruction) usesRegTmp() bool {
  1803  	return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP
  1804  }
  1805  
  1806  // instructionForProg returns the default *obj.Prog to instruction mapping.
  1807  func instructionForProg(p *obj.Prog) *instruction {
  1808  	ins := &instruction{
  1809  		as:  p.As,
  1810  		rd:  uint32(p.To.Reg),
  1811  		rs1: uint32(p.Reg),
  1812  		rs2: uint32(p.From.Reg),
  1813  		imm: p.From.Offset,
  1814  	}
  1815  	if len(p.RestArgs) == 1 {
  1816  		ins.rs3 = uint32(p.RestArgs[0].Reg)
  1817  	}
  1818  	return ins
  1819  }
  1820  
  1821  // instructionsForOpImmediate returns the machine instructions for an immediate
  1822  // operand. The instruction is specified by as and the source register is
  1823  // specified by rs, instead of the obj.Prog.
  1824  func instructionsForOpImmediate(p *obj.Prog, as obj.As, rs int16) []*instruction {
  1825  	// <opi> $imm, REG, TO
  1826  	ins := instructionForProg(p)
  1827  	ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE
  1828  
  1829  	low, high, err := Split32BitImmediate(ins.imm)
  1830  	if err != nil {
  1831  		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm, err)
  1832  		return nil
  1833  	}
  1834  	if high == 0 {
  1835  		return []*instruction{ins}
  1836  	}
  1837  
  1838  	// Split into two additions, if possible.
  1839  	// Do not split SP-writing instructions, as otherwise the recorded SP delta may be wrong.
  1840  	if p.Spadj == 0 && ins.as == AADDI && ins.imm >= -(1<<12) && ins.imm < 1<<12-1 {
  1841  		imm0 := ins.imm / 2
  1842  		imm1 := ins.imm - imm0
  1843  
  1844  		// ADDI $(imm/2), REG, TO
  1845  		// ADDI $(imm-imm/2), TO, TO
  1846  		ins.imm = imm0
  1847  		insADDI := &instruction{as: AADDI, rd: ins.rd, rs1: ins.rd, imm: imm1}
  1848  		return []*instruction{ins, insADDI}
  1849  	}
  1850  
  1851  	// LUI $high, TMP
  1852  	// ADDIW $low, TMP, TMP
  1853  	// <op> TMP, REG, TO
  1854  	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
  1855  	insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP, imm: low}
  1856  	switch ins.as {
  1857  	case AADDI:
  1858  		ins.as = AADD
  1859  	case AANDI:
  1860  		ins.as = AAND
  1861  	case AORI:
  1862  		ins.as = AOR
  1863  	case AXORI:
  1864  		ins.as = AXOR
  1865  	default:
  1866  		p.Ctxt.Diag("unsupported immediate instruction %v for splitting", p)
  1867  		return nil
  1868  	}
  1869  	ins.rs2 = REG_TMP
  1870  	if low == 0 {
  1871  		return []*instruction{insLUI, ins}
  1872  	}
  1873  	return []*instruction{insLUI, insADDIW, ins}
  1874  }
  1875  
  1876  // instructionsForLoad returns the machine instructions for a load. The load
  1877  // instruction is specified by as and the base/source register is specified
  1878  // by rs, instead of the obj.Prog.
  1879  func instructionsForLoad(p *obj.Prog, as obj.As, rs int16) []*instruction {
  1880  	if p.From.Type != obj.TYPE_MEM {
  1881  		p.Ctxt.Diag("%v requires memory for source", p)
  1882  		return nil
  1883  	}
  1884  
  1885  	switch as {
  1886  	case ALD, ALB, ALH, ALW, ALBU, ALHU, ALWU, AFLW, AFLD:
  1887  	default:
  1888  		p.Ctxt.Diag("%v: unknown load instruction %v", p, as)
  1889  		return nil
  1890  	}
  1891  
  1892  	// <load> $imm, REG, TO (load $imm+(REG), TO)
  1893  	ins := instructionForProg(p)
  1894  	ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE
  1895  	ins.imm = p.From.Offset
  1896  
  1897  	low, high, err := Split32BitImmediate(ins.imm)
  1898  	if err != nil {
  1899  		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm)
  1900  		return nil
  1901  	}
  1902  	if high == 0 {
  1903  		return []*instruction{ins}
  1904  	}
  1905  
  1906  	// LUI $high, TMP
  1907  	// ADD TMP, REG, TMP
  1908  	// <load> $low, TMP, TO
  1909  	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
  1910  	insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rs1}
  1911  	ins.rs1, ins.imm = REG_TMP, low
  1912  
  1913  	return []*instruction{insLUI, insADD, ins}
  1914  }
  1915  
  1916  // instructionsForStore returns the machine instructions for a store. The store
  1917  // instruction is specified by as and the target/source register is specified
  1918  // by rd, instead of the obj.Prog.
  1919  func instructionsForStore(p *obj.Prog, as obj.As, rd int16) []*instruction {
  1920  	if p.To.Type != obj.TYPE_MEM {
  1921  		p.Ctxt.Diag("%v requires memory for destination", p)
  1922  		return nil
  1923  	}
  1924  
  1925  	switch as {
  1926  	case ASW, ASH, ASB, ASD, AFSW, AFSD:
  1927  	default:
  1928  		p.Ctxt.Diag("%v: unknown store instruction %v", p, as)
  1929  		return nil
  1930  	}
  1931  
  1932  	// <store> $imm, REG, TO (store $imm+(TO), REG)
  1933  	ins := instructionForProg(p)
  1934  	ins.as, ins.rd, ins.rs1, ins.rs2 = as, uint32(rd), uint32(p.From.Reg), obj.REG_NONE
  1935  	ins.imm = p.To.Offset
  1936  
  1937  	low, high, err := Split32BitImmediate(ins.imm)
  1938  	if err != nil {
  1939  		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm)
  1940  		return nil
  1941  	}
  1942  	if high == 0 {
  1943  		return []*instruction{ins}
  1944  	}
  1945  
  1946  	// LUI $high, TMP
  1947  	// ADD TMP, TO, TMP
  1948  	// <store> $low, REG, TMP
  1949  	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
  1950  	insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rd}
  1951  	ins.rd, ins.imm = REG_TMP, low
  1952  
  1953  	return []*instruction{insLUI, insADD, ins}
  1954  }
  1955  
  1956  func instructionsForTLS(p *obj.Prog, ins *instruction) []*instruction {
  1957  	insAddTP := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: REG_TP}
  1958  
  1959  	var inss []*instruction
  1960  	if p.Ctxt.Flag_shared {
  1961  		// TLS initial-exec mode - load TLS offset from GOT, add the thread pointer
  1962  		// register, then load from or store to the resulting memory location.
  1963  		insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP}
  1964  		insLoadTLSOffset := &instruction{as: ALD, rd: REG_TMP, rs1: REG_TMP}
  1965  		inss = []*instruction{insAUIPC, insLoadTLSOffset, insAddTP, ins}
  1966  	} else {
  1967  		// TLS local-exec mode - load upper TLS offset, add the lower TLS offset,
  1968  		// add the thread pointer register, then load from or store to the resulting
  1969  		// memory location. Note that this differs from the suggested three
  1970  		// instruction sequence, as the Go linker does not currently have an
  1971  		// easy way to handle relocation across 12 bytes of machine code.
  1972  		insLUI := &instruction{as: ALUI, rd: REG_TMP}
  1973  		insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP}
  1974  		inss = []*instruction{insLUI, insADDIW, insAddTP, ins}
  1975  	}
  1976  	return inss
  1977  }
  1978  
  1979  func instructionsForTLSLoad(p *obj.Prog) []*instruction {
  1980  	if p.From.Sym.Type != objabi.STLSBSS {
  1981  		p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.From.Sym)
  1982  		return nil
  1983  	}
  1984  
  1985  	ins := instructionForProg(p)
  1986  	ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), REG_TMP, obj.REG_NONE, 0
  1987  
  1988  	return instructionsForTLS(p, ins)
  1989  }
  1990  
  1991  func instructionsForTLSStore(p *obj.Prog) []*instruction {
  1992  	if p.To.Sym.Type != objabi.STLSBSS {
  1993  		p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.To.Sym)
  1994  		return nil
  1995  	}
  1996  
  1997  	ins := instructionForProg(p)
  1998  	ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0
  1999  
  2000  	return instructionsForTLS(p, ins)
  2001  }
  2002  
  2003  // instructionsForMOV returns the machine instructions for an *obj.Prog that
  2004  // uses a MOV pseudo-instruction.
  2005  func instructionsForMOV(p *obj.Prog) []*instruction {
  2006  	ins := instructionForProg(p)
  2007  	inss := []*instruction{ins}
  2008  
  2009  	if p.Reg != 0 {
  2010  		p.Ctxt.Diag("%v: illegal MOV instruction", p)
  2011  		return nil
  2012  	}
  2013  
  2014  	switch {
  2015  	case p.From.Type == obj.TYPE_CONST && p.To.Type == obj.TYPE_REG:
  2016  		// Handle constant to register moves.
  2017  		if p.As != AMOV {
  2018  			p.Ctxt.Diag("%v: unsupported constant load", p)
  2019  			return nil
  2020  		}
  2021  
  2022  		// For constants larger than 32 bits in size that have trailing zeros,
  2023  		// use the value with the trailing zeros removed and then use a SLLI
  2024  		// instruction to restore the original constant.
  2025  		// For example:
  2026  		// 	MOV $0x8000000000000000, X10
  2027  		// becomes
  2028  		// 	MOV $1, X10
  2029  		// 	SLLI $63, X10, X10
  2030  		var insSLLI *instruction
  2031  		if err := immIFits(ins.imm, 32); err != nil {
  2032  			ctz := bits.TrailingZeros64(uint64(ins.imm))
  2033  			if err := immIFits(ins.imm>>ctz, 32); err == nil {
  2034  				ins.imm = ins.imm >> ctz
  2035  				insSLLI = &instruction{as: ASLLI, rd: ins.rd, rs1: ins.rd, imm: int64(ctz)}
  2036  			}
  2037  		}
  2038  
  2039  		low, high, err := Split32BitImmediate(ins.imm)
  2040  		if err != nil {
  2041  			p.Ctxt.Diag("%v: constant %d too large: %v", p, ins.imm, err)
  2042  			return nil
  2043  		}
  2044  
  2045  		// MOV $c, R -> ADD $c, ZERO, R
  2046  		ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, REG_ZERO, obj.REG_NONE, low
  2047  
  2048  		// LUI is only necessary if the constant does not fit in 12 bits.
  2049  		if high != 0 {
  2050  			// LUI top20bits(c), R
  2051  			// ADD bottom12bits(c), R, R
  2052  			insLUI := &instruction{as: ALUI, rd: ins.rd, imm: high}
  2053  			inss = []*instruction{insLUI}
  2054  			if low != 0 {
  2055  				ins.as, ins.rs1 = AADDIW, ins.rd
  2056  				inss = append(inss, ins)
  2057  			}
  2058  		}
  2059  		if insSLLI != nil {
  2060  			inss = append(inss, insSLLI)
  2061  		}
  2062  
  2063  	case p.From.Type == obj.TYPE_CONST && p.To.Type != obj.TYPE_REG:
  2064  		p.Ctxt.Diag("%v: constant load must target register", p)
  2065  		return nil
  2066  
  2067  	case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG:
  2068  		// Handle register to register moves.
  2069  		switch p.As {
  2070  		case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb
  2071  			ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, uint32(p.From.Reg), obj.REG_NONE, 0
  2072  		case AMOVW: // MOVW Ra, Rb -> ADDIW $0, Ra, Rb
  2073  			ins.as, ins.rs1, ins.rs2, ins.imm = AADDIW, uint32(p.From.Reg), obj.REG_NONE, 0
  2074  		case AMOVBU: // MOVBU Ra, Rb -> ANDI $255, Ra, Rb
  2075  			ins.as, ins.rs1, ins.rs2, ins.imm = AANDI, uint32(p.From.Reg), obj.REG_NONE, 255
  2076  		case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb
  2077  			ins.as, ins.rs1 = AFSGNJS, uint32(p.From.Reg)
  2078  		case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb
  2079  			ins.as, ins.rs1 = AFSGNJD, uint32(p.From.Reg)
  2080  		case AMOVB, AMOVH:
  2081  			// Use SLLI/SRAI to extend.
  2082  			ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE
  2083  			if p.As == AMOVB {
  2084  				ins.imm = 56
  2085  			} else if p.As == AMOVH {
  2086  				ins.imm = 48
  2087  			}
  2088  			ins2 := &instruction{as: ASRAI, rd: ins.rd, rs1: ins.rd, imm: ins.imm}
  2089  			inss = append(inss, ins2)
  2090  		case AMOVHU, AMOVWU:
  2091  			// Use SLLI/SRLI to extend.
  2092  			ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE
  2093  			if p.As == AMOVHU {
  2094  				ins.imm = 48
  2095  			} else if p.As == AMOVWU {
  2096  				ins.imm = 32
  2097  			}
  2098  			ins2 := &instruction{as: ASRLI, rd: ins.rd, rs1: ins.rd, imm: ins.imm}
  2099  			inss = append(inss, ins2)
  2100  		}
  2101  
  2102  	case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG:
  2103  		// Memory to register loads.
  2104  		switch p.From.Name {
  2105  		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
  2106  			// MOV c(Rs), Rd -> L $c, Rs, Rd
  2107  			inss = instructionsForLoad(p, movToLoad(p.As), addrToReg(p.From))
  2108  
  2109  		case obj.NAME_EXTERN, obj.NAME_STATIC:
  2110  			if p.From.Sym.Type == objabi.STLSBSS {
  2111  				return instructionsForTLSLoad(p)
  2112  			}
  2113  
  2114  			// Note that the values for $off_hi and $off_lo are currently
  2115  			// zero and will be assigned during relocation.
  2116  			//
  2117  			// AUIPC $off_hi, Rd
  2118  			// L $off_lo, Rd, Rd
  2119  			insAUIPC := &instruction{as: AAUIPC, rd: ins.rd}
  2120  			ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), ins.rd, obj.REG_NONE, 0
  2121  			inss = []*instruction{insAUIPC, ins}
  2122  
  2123  		default:
  2124  			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
  2125  			return nil
  2126  		}
  2127  
  2128  	case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM:
  2129  		// Register to memory stores.
  2130  		switch p.As {
  2131  		case AMOVBU, AMOVHU, AMOVWU:
  2132  			p.Ctxt.Diag("%v: unsupported unsigned store", p)
  2133  			return nil
  2134  		}
  2135  		switch p.To.Name {
  2136  		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
  2137  			// MOV Rs, c(Rd) -> S $c, Rs, Rd
  2138  			inss = instructionsForStore(p, movToStore(p.As), addrToReg(p.To))
  2139  
  2140  		case obj.NAME_EXTERN, obj.NAME_STATIC:
  2141  			if p.To.Sym.Type == objabi.STLSBSS {
  2142  				return instructionsForTLSStore(p)
  2143  			}
  2144  
  2145  			// Note that the values for $off_hi and $off_lo are currently
  2146  			// zero and will be assigned during relocation.
  2147  			//
  2148  			// AUIPC $off_hi, Rtmp
  2149  			// S $off_lo, Rtmp, Rd
  2150  			insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP}
  2151  			ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0
  2152  			inss = []*instruction{insAUIPC, ins}
  2153  
  2154  		default:
  2155  			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
  2156  			return nil
  2157  		}
  2158  
  2159  	case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG:
  2160  		// MOV $sym+off(SP/SB), R
  2161  		if p.As != AMOV {
  2162  			p.Ctxt.Diag("%v: unsupported address load", p)
  2163  			return nil
  2164  		}
  2165  		switch p.From.Name {
  2166  		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
  2167  			inss = instructionsForOpImmediate(p, AADDI, addrToReg(p.From))
  2168  
  2169  		case obj.NAME_EXTERN, obj.NAME_STATIC:
  2170  			// Note that the values for $off_hi and $off_lo are currently
  2171  			// zero and will be assigned during relocation.
  2172  			//
  2173  			// AUIPC $off_hi, R
  2174  			// ADDI $off_lo, R
  2175  			insAUIPC := &instruction{as: AAUIPC, rd: ins.rd}
  2176  			ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, ins.rd, obj.REG_NONE, 0
  2177  			inss = []*instruction{insAUIPC, ins}
  2178  
  2179  		default:
  2180  			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
  2181  			return nil
  2182  		}
  2183  
  2184  	case p.From.Type == obj.TYPE_ADDR && p.To.Type != obj.TYPE_REG:
  2185  		p.Ctxt.Diag("%v: address load must target register", p)
  2186  		return nil
  2187  
  2188  	default:
  2189  		p.Ctxt.Diag("%v: unsupported MOV", p)
  2190  		return nil
  2191  	}
  2192  
  2193  	return inss
  2194  }
  2195  
  2196  // instructionsForProg returns the machine instructions for an *obj.Prog.
  2197  func instructionsForProg(p *obj.Prog) []*instruction {
  2198  	ins := instructionForProg(p)
  2199  	inss := []*instruction{ins}
  2200  
  2201  	if len(p.RestArgs) > 1 {
  2202  		p.Ctxt.Diag("too many source registers")
  2203  		return nil
  2204  	}
  2205  
  2206  	switch ins.as {
  2207  	case AJAL, AJALR:
  2208  		ins.rd, ins.rs1, ins.rs2 = uint32(p.From.Reg), uint32(p.To.Reg), obj.REG_NONE
  2209  		ins.imm = p.To.Offset
  2210  
  2211  	case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
  2212  		switch ins.as {
  2213  		case ABEQZ:
  2214  			ins.as, ins.rs1, ins.rs2 = ABEQ, REG_ZERO, uint32(p.From.Reg)
  2215  		case ABGEZ:
  2216  			ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg)
  2217  		case ABGT:
  2218  			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), uint32(p.Reg)
  2219  		case ABGTU:
  2220  			ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.From.Reg), uint32(p.Reg)
  2221  		case ABGTZ:
  2222  			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO
  2223  		case ABLE:
  2224  			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), uint32(p.Reg)
  2225  		case ABLEU:
  2226  			ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.From.Reg), uint32(p.Reg)
  2227  		case ABLEZ:
  2228  			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO
  2229  		case ABLTZ:
  2230  			ins.as, ins.rs1, ins.rs2 = ABLT, REG_ZERO, uint32(p.From.Reg)
  2231  		case ABNEZ:
  2232  			ins.as, ins.rs1, ins.rs2 = ABNE, REG_ZERO, uint32(p.From.Reg)
  2233  		}
  2234  		ins.imm = p.To.Offset
  2235  
  2236  	case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
  2237  		inss = instructionsForMOV(p)
  2238  
  2239  	case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD:
  2240  		inss = instructionsForLoad(p, ins.as, p.From.Reg)
  2241  
  2242  	case ASW, ASH, ASB, ASD, AFSW, AFSD:
  2243  		inss = instructionsForStore(p, ins.as, p.To.Reg)
  2244  
  2245  	case ALRW, ALRD:
  2246  		// Set aq to use acquire access ordering
  2247  		ins.funct7 = 2
  2248  		ins.rs1, ins.rs2 = uint32(p.From.Reg), REG_ZERO
  2249  
  2250  	case AADDI, AANDI, AORI, AXORI:
  2251  		inss = instructionsForOpImmediate(p, ins.as, p.Reg)
  2252  
  2253  	case ASCW, ASCD:
  2254  		// Set release access ordering
  2255  		ins.funct7 = 1
  2256  		ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
  2257  
  2258  	case AAMOSWAPW, AAMOSWAPD, AAMOADDW, AAMOADDD, AAMOANDW, AAMOANDD, AAMOORW, AAMOORD,
  2259  		AAMOXORW, AAMOXORD, AAMOMINW, AAMOMIND, AAMOMINUW, AAMOMINUD, AAMOMAXW, AAMOMAXD, AAMOMAXUW, AAMOMAXUD:
  2260  		// Set aqrl to use acquire & release access ordering
  2261  		ins.funct7 = 3
  2262  		ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
  2263  
  2264  	case AECALL, AEBREAK, ARDCYCLE, ARDTIME, ARDINSTRET:
  2265  		insEnc := encode(p.As)
  2266  		if p.To.Type == obj.TYPE_NONE {
  2267  			ins.rd = REG_ZERO
  2268  		}
  2269  		ins.rs1 = REG_ZERO
  2270  		ins.imm = insEnc.csr
  2271  
  2272  	case AFENCE:
  2273  		ins.rd, ins.rs1, ins.rs2 = REG_ZERO, REG_ZERO, obj.REG_NONE
  2274  		ins.imm = 0x0ff
  2275  
  2276  	case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD:
  2277  		// Set the rounding mode in funct3 to round to zero.
  2278  		ins.funct3 = 1
  2279  
  2280  	case AFNES, AFNED:
  2281  		// Replace FNE[SD] with FEQ[SD] and NOT.
  2282  		if p.To.Type != obj.TYPE_REG {
  2283  			p.Ctxt.Diag("%v needs an integer register output", p)
  2284  			return nil
  2285  		}
  2286  		if ins.as == AFNES {
  2287  			ins.as = AFEQS
  2288  		} else {
  2289  			ins.as = AFEQD
  2290  		}
  2291  		ins2 := &instruction{
  2292  			as:  AXORI, // [bit] xor 1 = not [bit]
  2293  			rd:  ins.rd,
  2294  			rs1: ins.rd,
  2295  			imm: 1,
  2296  		}
  2297  		inss = append(inss, ins2)
  2298  
  2299  	case AFSQRTS, AFSQRTD:
  2300  		// These instructions expect a zero (i.e. float register 0)
  2301  		// to be the second input operand.
  2302  		ins.rs1 = uint32(p.From.Reg)
  2303  		ins.rs2 = REG_F0
  2304  
  2305  	case AFMADDS, AFMSUBS, AFNMADDS, AFNMSUBS,
  2306  		AFMADDD, AFMSUBD, AFNMADDD, AFNMSUBD:
  2307  		// Swap the first two operands so that the operands are in the same
  2308  		// order as they are in the specification: RS1, RS2, RS3, RD.
  2309  		ins.rs1, ins.rs2 = ins.rs2, ins.rs1
  2310  
  2311  	case ANEG, ANEGW:
  2312  		// NEG rs, rd -> SUB rs, X0, rd
  2313  		ins.as = ASUB
  2314  		if p.As == ANEGW {
  2315  			ins.as = ASUBW
  2316  		}
  2317  		ins.rs1 = REG_ZERO
  2318  		if ins.rd == obj.REG_NONE {
  2319  			ins.rd = ins.rs2
  2320  		}
  2321  
  2322  	case ANOT:
  2323  		// NOT rs, rd -> XORI $-1, rs, rd
  2324  		ins.as = AXORI
  2325  		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
  2326  		if ins.rd == obj.REG_NONE {
  2327  			ins.rd = ins.rs1
  2328  		}
  2329  		ins.imm = -1
  2330  
  2331  	case ASEQZ:
  2332  		// SEQZ rs, rd -> SLTIU $1, rs, rd
  2333  		ins.as = ASLTIU
  2334  		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
  2335  		ins.imm = 1
  2336  
  2337  	case ASNEZ:
  2338  		// SNEZ rs, rd -> SLTU rs, x0, rd
  2339  		ins.as = ASLTU
  2340  		ins.rs1 = REG_ZERO
  2341  
  2342  	case AFABSS:
  2343  		// FABSS rs, rd -> FSGNJXS rs, rs, rd
  2344  		ins.as = AFSGNJXS
  2345  		ins.rs1 = uint32(p.From.Reg)
  2346  
  2347  	case AFABSD:
  2348  		// FABSD rs, rd -> FSGNJXD rs, rs, rd
  2349  		ins.as = AFSGNJXD
  2350  		ins.rs1 = uint32(p.From.Reg)
  2351  
  2352  	case AFNEGS:
  2353  		// FNEGS rs, rd -> FSGNJNS rs, rs, rd
  2354  		ins.as = AFSGNJNS
  2355  		ins.rs1 = uint32(p.From.Reg)
  2356  
  2357  	case AFNEGD:
  2358  		// FNEGD rs, rd -> FSGNJND rs, rs, rd
  2359  		ins.as = AFSGNJND
  2360  		ins.rs1 = uint32(p.From.Reg)
  2361  
  2362  	case ASLLI, ASRLI, ASRAI:
  2363  		if ins.imm < 0 || ins.imm > 63 {
  2364  			p.Ctxt.Diag("%v: shift amount out of range 0 to 63", p)
  2365  		}
  2366  
  2367  	case ASLLIW, ASRLIW, ASRAIW:
  2368  		if ins.imm < 0 || ins.imm > 31 {
  2369  			p.Ctxt.Diag("%v: shift amount out of range 0 to 31", p)
  2370  		}
  2371  	}
  2372  
  2373  	for _, ins := range inss {
  2374  		ins.p = p
  2375  	}
  2376  
  2377  	return inss
  2378  }
  2379  
  2380  // assemble emits machine code.
  2381  // It is called at the very end of the assembly process.
  2382  func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
  2383  	if ctxt.Retpoline {
  2384  		ctxt.Diag("-spectre=ret not supported on riscv")
  2385  		ctxt.Retpoline = false // don't keep printing
  2386  	}
  2387  
  2388  	// If errors were encountered during preprocess/validation, proceeding
  2389  	// and attempting to encode said instructions will only lead to panics.
  2390  	if ctxt.Errors > 0 {
  2391  		return
  2392  	}
  2393  
  2394  	for p := cursym.Func().Text; p != nil; p = p.Link {
  2395  		switch p.As {
  2396  		case AJAL:
  2397  			if p.Mark&NEED_JAL_RELOC == NEED_JAL_RELOC {
  2398  				rel := obj.Addrel(cursym)
  2399  				rel.Off = int32(p.Pc)
  2400  				rel.Siz = 4
  2401  				rel.Sym = p.To.Sym
  2402  				rel.Add = p.To.Offset
  2403  				rel.Type = objabi.R_RISCV_JAL
  2404  			}
  2405  		case AJALR:
  2406  			if p.To.Sym != nil {
  2407  				ctxt.Diag("%v: unexpected AJALR with to symbol", p)
  2408  			}
  2409  
  2410  		case AAUIPC, AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
  2411  			var addr *obj.Addr
  2412  			var rt objabi.RelocType
  2413  			if p.Mark&NEED_CALL_RELOC == NEED_CALL_RELOC {
  2414  				rt = objabi.R_RISCV_CALL
  2415  				addr = &p.From
  2416  			} else if p.Mark&NEED_PCREL_ITYPE_RELOC == NEED_PCREL_ITYPE_RELOC {
  2417  				rt = objabi.R_RISCV_PCREL_ITYPE
  2418  				addr = &p.From
  2419  			} else if p.Mark&NEED_PCREL_STYPE_RELOC == NEED_PCREL_STYPE_RELOC {
  2420  				rt = objabi.R_RISCV_PCREL_STYPE
  2421  				addr = &p.To
  2422  			} else {
  2423  				break
  2424  			}
  2425  			if p.As == AAUIPC {
  2426  				if p.Link == nil {
  2427  					ctxt.Diag("AUIPC needing PC-relative reloc missing following instruction")
  2428  					break
  2429  				}
  2430  				addr = &p.RestArgs[0].Addr
  2431  			}
  2432  			if addr.Sym == nil {
  2433  				ctxt.Diag("PC-relative relocation missing symbol")
  2434  				break
  2435  			}
  2436  			if addr.Sym.Type == objabi.STLSBSS {
  2437  				if ctxt.Flag_shared {
  2438  					rt = objabi.R_RISCV_TLS_IE
  2439  				} else {
  2440  					rt = objabi.R_RISCV_TLS_LE
  2441  				}
  2442  			}
  2443  
  2444  			rel := obj.Addrel(cursym)
  2445  			rel.Off = int32(p.Pc)
  2446  			rel.Siz = 8
  2447  			rel.Sym = addr.Sym
  2448  			rel.Add = addr.Offset
  2449  			rel.Type = rt
  2450  
  2451  		case obj.APCALIGN:
  2452  			alignedValue := p.From.Offset
  2453  			v := pcAlignPadLength(p.Pc, alignedValue)
  2454  			offset := p.Pc
  2455  			for ; v >= 4; v -= 4 {
  2456  				// NOP
  2457  				cursym.WriteBytes(ctxt, offset, []byte{0x13, 0, 0, 0})
  2458  				offset += 4
  2459  			}
  2460  			continue
  2461  		}
  2462  
  2463  		offset := p.Pc
  2464  		for _, ins := range instructionsForProg(p) {
  2465  			if ic, err := ins.encode(); err == nil {
  2466  				cursym.WriteInt(ctxt, offset, ins.length(), int64(ic))
  2467  				offset += int64(ins.length())
  2468  			}
  2469  			if ins.usesRegTmp() {
  2470  				p.Mark |= USES_REG_TMP
  2471  			}
  2472  		}
  2473  	}
  2474  
  2475  	obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil)
  2476  }
  2477  
  2478  func isUnsafePoint(p *obj.Prog) bool {
  2479  	return p.Mark&USES_REG_TMP == USES_REG_TMP || p.From.Reg == REG_TMP || p.To.Reg == REG_TMP || p.Reg == REG_TMP
  2480  }
  2481  
  2482  var LinkRISCV64 = obj.LinkArch{
  2483  	Arch:           sys.ArchRISCV64,
  2484  	Init:           buildop,
  2485  	Preprocess:     preprocess,
  2486  	Assemble:       assemble,
  2487  	Progedit:       progedit,
  2488  	UnaryDst:       unaryDst,
  2489  	DWARFRegisters: RISCV64DWARFRegisters,
  2490  }