github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/runtime/asm_arm64.s (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "tls_arm64.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  
    11  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    12  	// SP = stack; R0 = argc; R1 = argv
    13  
    14  	SUB	$32, RSP
    15  	MOVW	R0, 8(RSP) // argc
    16  	MOVD	R1, 16(RSP) // argv
    17  
    18  	// create istack out of the given (operating system) stack.
    19  	// _cgo_init may update stackguard.
    20  	MOVD	$runtime·g0(SB), g
    21  	MOVD	RSP, R7
    22  	MOVD	$(-64*1024)(R7), R0
    23  	MOVD	R0, g_stackguard0(g)
    24  	MOVD	R0, g_stackguard1(g)
    25  	MOVD	R0, (g_stack+stack_lo)(g)
    26  	MOVD	R7, (g_stack+stack_hi)(g)
    27  
    28  	// if there is a _cgo_init, call it using the gcc ABI.
    29  	MOVD	_cgo_init(SB), R12
    30  	CMP	$0, R12
    31  	BEQ	nocgo
    32  
    33  	MRS_TPIDR_R0			// load TLS base pointer
    34  	MOVD	R0, R3			// arg 3: TLS base pointer
    35  #ifdef TLSG_IS_VARIABLE
    36  	MOVD	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
    37  #else
    38  	MOVD	$0, R2		        // arg 2: not used when using platform's TLS
    39  #endif
    40  	MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
    41  	MOVD	g, R0			// arg 0: G
    42  	SUB	$16, RSP		// reserve 16 bytes for sp-8 where fp may be saved.
    43  	BL	(R12)
    44  	ADD	$16, RSP
    45  
    46  nocgo:
    47  	BL	runtime·save_g(SB)
    48  	// update stackguard after _cgo_init
    49  	MOVD	(g_stack+stack_lo)(g), R0
    50  	ADD	$const__StackGuard, R0
    51  	MOVD	R0, g_stackguard0(g)
    52  	MOVD	R0, g_stackguard1(g)
    53  
    54  	// set the per-goroutine and per-mach "registers"
    55  	MOVD	$runtime·m0(SB), R0
    56  
    57  	// save m->g0 = g0
    58  	MOVD	g, m_g0(R0)
    59  	// save m0 to g0->m
    60  	MOVD	R0, g_m(g)
    61  
    62  	BL	runtime·check(SB)
    63  
    64  	MOVW	8(RSP), R0	// copy argc
    65  	MOVW	R0, -8(RSP)
    66  	MOVD	16(RSP), R0		// copy argv
    67  	MOVD	R0, 0(RSP)
    68  	BL	runtime·args(SB)
    69  	BL	runtime·osinit(SB)
    70  	BL	runtime·schedinit(SB)
    71  
    72  	// create a new goroutine to start program
    73  	MOVD	$runtime·mainPC(SB), R0		// entry
    74  	MOVD	RSP, R7
    75  	MOVD.W	$0, -8(R7)
    76  	MOVD.W	R0, -8(R7)
    77  	MOVD.W	$0, -8(R7)
    78  	MOVD.W	$0, -8(R7)
    79  	MOVD	R7, RSP
    80  	BL	runtime·newproc(SB)
    81  	ADD	$32, RSP
    82  
    83  	// start this M
    84  	BL	runtime·mstart(SB)
    85  
    86  	MOVD	$0, R0
    87  	MOVD	R0, (R0)	// boom
    88  	UNDEF
    89  
    90  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    91  GLOBL	runtime·mainPC(SB),RODATA,$8
    92  
    93  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    94  	BRK
    95  	RET
    96  
    97  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    98  	RET
    99  
   100  /*
   101   *  go-routine
   102   */
   103  
   104  // void gosave(Gobuf*)
   105  // save state in Gobuf; setjmp
   106  TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
   107  	MOVD	buf+0(FP), R3
   108  	MOVD	RSP, R0
   109  	MOVD	R0, gobuf_sp(R3)
   110  	MOVD	R29, gobuf_bp(R3)
   111  	MOVD	LR, gobuf_pc(R3)
   112  	MOVD	g, gobuf_g(R3)
   113  	MOVD	ZR, gobuf_lr(R3)
   114  	MOVD	ZR, gobuf_ret(R3)
   115  	// Assert ctxt is zero. See func save.
   116  	MOVD	gobuf_ctxt(R3), R0
   117  	CMP	$0, R0
   118  	BEQ	2(PC)
   119  	CALL	runtime·badctxt(SB)
   120  	RET
   121  
   122  // void gogo(Gobuf*)
   123  // restore state from Gobuf; longjmp
   124  TEXT runtime·gogo(SB), NOSPLIT, $24-8
   125  	MOVD	buf+0(FP), R5
   126  	MOVD	gobuf_g(R5), g
   127  	BL	runtime·save_g(SB)
   128  
   129  	MOVD	0(g), R4	// make sure g is not nil
   130  	MOVD	gobuf_sp(R5), R0
   131  	MOVD	R0, RSP
   132  	MOVD	gobuf_bp(R5), R29
   133  	MOVD	gobuf_lr(R5), LR
   134  	MOVD	gobuf_ret(R5), R0
   135  	MOVD	gobuf_ctxt(R5), R26
   136  	MOVD	$0, gobuf_sp(R5)
   137  	MOVD	$0, gobuf_bp(R5)
   138  	MOVD	$0, gobuf_ret(R5)
   139  	MOVD	$0, gobuf_lr(R5)
   140  	MOVD	$0, gobuf_ctxt(R5)
   141  	CMP	ZR, ZR // set condition codes for == test, needed by stack split
   142  	MOVD	gobuf_pc(R5), R6
   143  	B	(R6)
   144  
   145  // void mcall(fn func(*g))
   146  // Switch to m->g0's stack, call fn(g).
   147  // Fn must never return. It should gogo(&g->sched)
   148  // to keep running g.
   149  TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
   150  	// Save caller state in g->sched
   151  	MOVD	RSP, R0
   152  	MOVD	R0, (g_sched+gobuf_sp)(g)
   153  	MOVD	R29, (g_sched+gobuf_bp)(g)
   154  	MOVD	LR, (g_sched+gobuf_pc)(g)
   155  	MOVD	$0, (g_sched+gobuf_lr)(g)
   156  	MOVD	g, (g_sched+gobuf_g)(g)
   157  
   158  	// Switch to m->g0 & its stack, call fn.
   159  	MOVD	g, R3
   160  	MOVD	g_m(g), R8
   161  	MOVD	m_g0(R8), g
   162  	BL	runtime·save_g(SB)
   163  	CMP	g, R3
   164  	BNE	2(PC)
   165  	B	runtime·badmcall(SB)
   166  	MOVD	fn+0(FP), R26			// context
   167  	MOVD	0(R26), R4			// code pointer
   168  	MOVD	(g_sched+gobuf_sp)(g), R0
   169  	MOVD	R0, RSP	// sp = m->g0->sched.sp
   170  	MOVD	(g_sched+gobuf_bp)(g), R29
   171  	MOVD	R3, -8(RSP)
   172  	MOVD	$0, -16(RSP)
   173  	SUB	$16, RSP
   174  	BL	(R4)
   175  	B	runtime·badmcall2(SB)
   176  
   177  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   178  // of the G stack. We need to distinguish the routine that
   179  // lives at the bottom of the G stack from the one that lives
   180  // at the top of the system stack because the one at the top of
   181  // the system stack terminates the stack walk (see topofstack()).
   182  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   183  	UNDEF
   184  	BL	(LR)	// make sure this function is not leaf
   185  	RET
   186  
   187  // func systemstack(fn func())
   188  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   189  	MOVD	fn+0(FP), R3	// R3 = fn
   190  	MOVD	R3, R26		// context
   191  	MOVD	g_m(g), R4	// R4 = m
   192  
   193  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   194  	CMP	g, R5
   195  	BEQ	noswitch
   196  
   197  	MOVD	m_g0(R4), R5	// R5 = g0
   198  	CMP	g, R5
   199  	BEQ	noswitch
   200  
   201  	MOVD	m_curg(R4), R6
   202  	CMP	g, R6
   203  	BEQ	switch
   204  
   205  	// Bad: g is not gsignal, not g0, not curg. What is it?
   206  	// Hide call from linker nosplit analysis.
   207  	MOVD	$runtime·badsystemstack(SB), R3
   208  	BL	(R3)
   209  	B	runtime·abort(SB)
   210  
   211  switch:
   212  	// save our state in g->sched. Pretend to
   213  	// be systemstack_switch if the G stack is scanned.
   214  	MOVD	$runtime·systemstack_switch(SB), R6
   215  	ADD	$8, R6	// get past prologue
   216  	MOVD	R6, (g_sched+gobuf_pc)(g)
   217  	MOVD	RSP, R0
   218  	MOVD	R0, (g_sched+gobuf_sp)(g)
   219  	MOVD	R29, (g_sched+gobuf_bp)(g)
   220  	MOVD	$0, (g_sched+gobuf_lr)(g)
   221  	MOVD	g, (g_sched+gobuf_g)(g)
   222  
   223  	// switch to g0
   224  	MOVD	R5, g
   225  	BL	runtime·save_g(SB)
   226  	MOVD	(g_sched+gobuf_sp)(g), R3
   227  	// make it look like mstart called systemstack on g0, to stop traceback
   228  	SUB	$16, R3
   229  	AND	$~15, R3
   230  	MOVD	$runtime·mstart(SB), R4
   231  	MOVD	R4, 0(R3)
   232  	MOVD	R3, RSP
   233  	MOVD	(g_sched+gobuf_bp)(g), R29
   234  
   235  	// call target function
   236  	MOVD	0(R26), R3	// code pointer
   237  	BL	(R3)
   238  
   239  	// switch back to g
   240  	MOVD	g_m(g), R3
   241  	MOVD	m_curg(R3), g
   242  	BL	runtime·save_g(SB)
   243  	MOVD	(g_sched+gobuf_sp)(g), R0
   244  	MOVD	R0, RSP
   245  	MOVD	(g_sched+gobuf_bp)(g), R29
   246  	MOVD	$0, (g_sched+gobuf_sp)(g)
   247  	MOVD	$0, (g_sched+gobuf_bp)(g)
   248  	RET
   249  
   250  noswitch:
   251  	// already on m stack, just call directly
   252  	// Using a tail call here cleans up tracebacks since we won't stop
   253  	// at an intermediate systemstack.
   254  	MOVD	0(R26), R3	// code pointer
   255  	MOVD.P	16(RSP), R30	// restore LR
   256  	SUB	$8, RSP, R29	// restore FP
   257  	B	(R3)
   258  
   259  /*
   260   * support for morestack
   261   */
   262  
   263  // Called during function prolog when more stack is needed.
   264  // Caller has already loaded:
   265  // R3 prolog's LR (R30)
   266  //
   267  // The traceback routines see morestack on a g0 as being
   268  // the top of a stack (for example, morestack calling newstack
   269  // calling the scheduler calling newm calling gc), so we must
   270  // record an argument size. For that purpose, it has no arguments.
   271  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   272  	// Cannot grow scheduler stack (m->g0).
   273  	MOVD	g_m(g), R8
   274  	MOVD	m_g0(R8), R4
   275  	CMP	g, R4
   276  	BNE	3(PC)
   277  	BL	runtime·badmorestackg0(SB)
   278  	B	runtime·abort(SB)
   279  
   280  	// Cannot grow signal stack (m->gsignal).
   281  	MOVD	m_gsignal(R8), R4
   282  	CMP	g, R4
   283  	BNE	3(PC)
   284  	BL	runtime·badmorestackgsignal(SB)
   285  	B	runtime·abort(SB)
   286  
   287  	// Called from f.
   288  	// Set g->sched to context in f
   289  	MOVD	RSP, R0
   290  	MOVD	R0, (g_sched+gobuf_sp)(g)
   291  	MOVD	R29, (g_sched+gobuf_bp)(g)
   292  	MOVD	LR, (g_sched+gobuf_pc)(g)
   293  	MOVD	R3, (g_sched+gobuf_lr)(g)
   294  	MOVD	R26, (g_sched+gobuf_ctxt)(g)
   295  
   296  	// Called from f.
   297  	// Set m->morebuf to f's callers.
   298  	MOVD	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
   299  	MOVD	RSP, R0
   300  	MOVD	R0, (m_morebuf+gobuf_sp)(R8)	// f's caller's RSP
   301  	MOVD	g, (m_morebuf+gobuf_g)(R8)
   302  
   303  	// Call newstack on m->g0's stack.
   304  	MOVD	m_g0(R8), g
   305  	BL	runtime·save_g(SB)
   306  	MOVD	(g_sched+gobuf_sp)(g), R0
   307  	MOVD	R0, RSP
   308  	MOVD	(g_sched+gobuf_bp)(g), R29
   309  	MOVD.W	$0, -16(RSP)	// create a call frame on g0 (saved LR; keep 16-aligned)
   310  	BL	runtime·newstack(SB)
   311  
   312  	// Not reached, but make sure the return PC from the call to newstack
   313  	// is still in this function, and not the beginning of the next.
   314  	UNDEF
   315  
   316  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   317  	MOVW	$0, R26
   318  	B runtime·morestack(SB)
   319  
   320  // reflectcall: call a function with the given argument list
   321  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   322  // we don't have variable-sized frames, so we use a small number
   323  // of constant-sized-frame functions to encode a few bits of size in the pc.
   324  // Caution: ugly multiline assembly macros in your future!
   325  
   326  #define DISPATCH(NAME,MAXSIZE)		\
   327  	MOVD	$MAXSIZE, R27;		\
   328  	CMP	R27, R16;		\
   329  	BGT	3(PC);			\
   330  	MOVD	$NAME(SB), R27;	\
   331  	B	(R27)
   332  // Note: can't just "B NAME(SB)" - bad inlining results.
   333  
   334  TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
   335  	MOVWU argsize+24(FP), R16
   336  	DISPATCH(runtime·call32, 32)
   337  	DISPATCH(runtime·call64, 64)
   338  	DISPATCH(runtime·call128, 128)
   339  	DISPATCH(runtime·call256, 256)
   340  	DISPATCH(runtime·call512, 512)
   341  	DISPATCH(runtime·call1024, 1024)
   342  	DISPATCH(runtime·call2048, 2048)
   343  	DISPATCH(runtime·call4096, 4096)
   344  	DISPATCH(runtime·call8192, 8192)
   345  	DISPATCH(runtime·call16384, 16384)
   346  	DISPATCH(runtime·call32768, 32768)
   347  	DISPATCH(runtime·call65536, 65536)
   348  	DISPATCH(runtime·call131072, 131072)
   349  	DISPATCH(runtime·call262144, 262144)
   350  	DISPATCH(runtime·call524288, 524288)
   351  	DISPATCH(runtime·call1048576, 1048576)
   352  	DISPATCH(runtime·call2097152, 2097152)
   353  	DISPATCH(runtime·call4194304, 4194304)
   354  	DISPATCH(runtime·call8388608, 8388608)
   355  	DISPATCH(runtime·call16777216, 16777216)
   356  	DISPATCH(runtime·call33554432, 33554432)
   357  	DISPATCH(runtime·call67108864, 67108864)
   358  	DISPATCH(runtime·call134217728, 134217728)
   359  	DISPATCH(runtime·call268435456, 268435456)
   360  	DISPATCH(runtime·call536870912, 536870912)
   361  	DISPATCH(runtime·call1073741824, 1073741824)
   362  	MOVD	$runtime·badreflectcall(SB), R0
   363  	B	(R0)
   364  
   365  #define CALLFN(NAME,MAXSIZE)			\
   366  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   367  	NO_LOCAL_POINTERS;			\
   368  	/* copy arguments to stack */		\
   369  	MOVD	arg+16(FP), R3;			\
   370  	MOVWU	argsize+24(FP), R4;		\
   371  	ADD	$8, RSP, R5;			\
   372  	BIC	$0xf, R4, R6;			\
   373  	CBZ	R6, 6(PC);			\
   374  	/* if R6=(argsize&~15) != 0 */		\
   375  	ADD	R6, R5, R6;			\
   376  	/* copy 16 bytes a time */		\
   377  	LDP.P	16(R3), (R7, R8);		\
   378  	STP.P	(R7, R8), 16(R5);		\
   379  	CMP	R5, R6;				\
   380  	BNE	-3(PC);				\
   381  	AND	$0xf, R4, R6;			\
   382  	CBZ	R6, 6(PC);			\
   383  	/* if R6=(argsize&15) != 0 */		\
   384  	ADD	R6, R5, R6;			\
   385  	/* copy 1 byte a time for the rest */	\
   386  	MOVBU.P	1(R3), R7;			\
   387  	MOVBU.P	R7, 1(R5);			\
   388  	CMP	R5, R6;				\
   389  	BNE	-3(PC);				\
   390  	/* call function */			\
   391  	MOVD	f+8(FP), R26;			\
   392  	MOVD	(R26), R0;			\
   393  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   394  	BL	(R0);				\
   395  	/* copy return values back */		\
   396  	MOVD	argtype+0(FP), R7;		\
   397  	MOVD	arg+16(FP), R3;			\
   398  	MOVWU	n+24(FP), R4;			\
   399  	MOVWU	retoffset+28(FP), R6;		\
   400  	ADD	$8, RSP, R5;			\
   401  	ADD	R6, R5; 			\
   402  	ADD	R6, R3;				\
   403  	SUB	R6, R4;				\
   404  	BL	callRet<>(SB);			\
   405  	RET
   406  
   407  // callRet copies return values back at the end of call*. This is a
   408  // separate function so it can allocate stack space for the arguments
   409  // to reflectcallmove. It does not follow the Go ABI; it expects its
   410  // arguments in registers.
   411  TEXT callRet<>(SB), NOSPLIT, $40-0
   412  	MOVD	R7, 8(RSP)
   413  	MOVD	R3, 16(RSP)
   414  	MOVD	R5, 24(RSP)
   415  	MOVD	R4, 32(RSP)
   416  	BL	runtime·reflectcallmove(SB)
   417  	RET
   418  
   419  // These have 8 added to make the overall frame size a multiple of 16,
   420  // as required by the ABI. (There is another +8 for the saved LR.)
   421  CALLFN(·call32, 40 )
   422  CALLFN(·call64, 72 )
   423  CALLFN(·call128, 136 )
   424  CALLFN(·call256, 264 )
   425  CALLFN(·call512, 520 )
   426  CALLFN(·call1024, 1032 )
   427  CALLFN(·call2048, 2056 )
   428  CALLFN(·call4096, 4104 )
   429  CALLFN(·call8192, 8200 )
   430  CALLFN(·call16384, 16392 )
   431  CALLFN(·call32768, 32776 )
   432  CALLFN(·call65536, 65544 )
   433  CALLFN(·call131072, 131080 )
   434  CALLFN(·call262144, 262152 )
   435  CALLFN(·call524288, 524296 )
   436  CALLFN(·call1048576, 1048584 )
   437  CALLFN(·call2097152, 2097160 )
   438  CALLFN(·call4194304, 4194312 )
   439  CALLFN(·call8388608, 8388616 )
   440  CALLFN(·call16777216, 16777224 )
   441  CALLFN(·call33554432, 33554440 )
   442  CALLFN(·call67108864, 67108872 )
   443  CALLFN(·call134217728, 134217736 )
   444  CALLFN(·call268435456, 268435464 )
   445  CALLFN(·call536870912, 536870920 )
   446  CALLFN(·call1073741824, 1073741832 )
   447  
   448  // func memhash32(p unsafe.Pointer, h uintptr) uintptr
   449  TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24
   450  	MOVB	runtime·useAeshash(SB), R0
   451  	CMP	$0, R0
   452  	BEQ	noaes
   453  	MOVD	p+0(FP), R0
   454  	MOVD	h+8(FP), R1
   455  	MOVD	$ret+16(FP), R2
   456  	MOVD	$runtime·aeskeysched+0(SB), R3
   457  
   458  	VEOR	V0.B16, V0.B16, V0.B16
   459  	VLD1	(R3), [V2.B16]
   460  	VLD1	(R0), V0.S[1]
   461  	VMOV	R1, V0.S[0]
   462  
   463  	AESE	V2.B16, V0.B16
   464  	AESMC	V0.B16, V0.B16
   465  	AESE	V2.B16, V0.B16
   466  	AESMC	V0.B16, V0.B16
   467  	AESE	V2.B16, V0.B16
   468  
   469  	VST1	[V0.D1], (R2)
   470  	RET
   471  noaes:
   472  	B	runtime·memhash32Fallback(SB)
   473  
   474  // func memhash64(p unsafe.Pointer, h uintptr) uintptr
   475  TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24
   476  	MOVB	runtime·useAeshash(SB), R0
   477  	CMP	$0, R0
   478  	BEQ	noaes
   479  	MOVD	p+0(FP), R0
   480  	MOVD	h+8(FP), R1
   481  	MOVD	$ret+16(FP), R2
   482  	MOVD	$runtime·aeskeysched+0(SB), R3
   483  
   484  	VEOR	V0.B16, V0.B16, V0.B16
   485  	VLD1	(R3), [V2.B16]
   486  	VLD1	(R0), V0.D[1]
   487  	VMOV	R1, V0.D[0]
   488  
   489  	AESE	V2.B16, V0.B16
   490  	AESMC	V0.B16, V0.B16
   491  	AESE	V2.B16, V0.B16
   492  	AESMC	V0.B16, V0.B16
   493  	AESE	V2.B16, V0.B16
   494  
   495  	VST1	[V0.D1], (R2)
   496  	RET
   497  noaes:
   498  	B	runtime·memhash64Fallback(SB)
   499  
   500  // func memhash(p unsafe.Pointer, h, size uintptr) uintptr
   501  TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32
   502  	MOVB	runtime·useAeshash(SB), R0
   503  	CMP	$0, R0
   504  	BEQ	noaes
   505  	MOVD	p+0(FP), R0
   506  	MOVD	s+16(FP), R1
   507  	MOVD	h+8(FP), R3
   508  	MOVD	$ret+24(FP), R2
   509  	B	aeshashbody<>(SB)
   510  noaes:
   511  	B	runtime·memhashFallback(SB)
   512  
   513  // func strhash(p unsafe.Pointer, h uintptr) uintptr
   514  TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24
   515  	MOVB	runtime·useAeshash(SB), R0
   516  	CMP	$0, R0
   517  	BEQ	noaes
   518  	MOVD	p+0(FP), R10 // string pointer
   519  	LDP	(R10), (R0, R1) //string data/ length
   520  	MOVD	h+8(FP), R3
   521  	MOVD	$ret+16(FP), R2 // return adddress
   522  	B	aeshashbody<>(SB)
   523  noaes:
   524  	B	runtime·strhashFallback(SB)
   525  
   526  // R0: data
   527  // R1: length
   528  // R2: address to put return value
   529  // R3: seed data
   530  TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
   531  	VEOR	V30.B16, V30.B16, V30.B16
   532  	VMOV	R3, V30.D[0]
   533  	VMOV	R1, V30.D[1] // load length into seed
   534  
   535  	MOVD	$runtime·aeskeysched+0(SB), R4
   536  	VLD1.P	16(R4), [V0.B16]
   537  	AESE	V30.B16, V0.B16
   538  	AESMC	V0.B16, V0.B16
   539  	CMP	$16, R1
   540  	BLO	aes0to15
   541  	BEQ	aes16
   542  	CMP	$32, R1
   543  	BLS	aes17to32
   544  	CMP	$64, R1
   545  	BLS	aes33to64
   546  	CMP	$128, R1
   547  	BLS	aes65to128
   548  	B	aes129plus
   549  
   550  aes0to15:
   551  	CMP	$0, R1
   552  	BEQ	aes0
   553  	VEOR	V2.B16, V2.B16, V2.B16
   554  	TBZ	$3, R1, less_than_8
   555  	VLD1.P	8(R0), V2.D[0]
   556  
   557  less_than_8:
   558  	TBZ	$2, R1, less_than_4
   559  	VLD1.P	4(R0), V2.S[2]
   560  
   561  less_than_4:
   562  	TBZ	$1, R1, less_than_2
   563  	VLD1.P	2(R0), V2.H[6]
   564  
   565  less_than_2:
   566  	TBZ	$0, R1, done
   567  	VLD1	(R0), V2.B[14]
   568  done:
   569  	AESE	V0.B16, V2.B16
   570  	AESMC	V2.B16, V2.B16
   571  	AESE	V0.B16, V2.B16
   572  	AESMC	V2.B16, V2.B16
   573  	AESE	V0.B16, V2.B16
   574  
   575  	VST1	[V2.D1], (R2)
   576  	RET
   577  aes0:
   578  	VST1	[V0.D1], (R2)
   579  	RET
   580  aes16:
   581  	VLD1	(R0), [V2.B16]
   582  	B	done
   583  
   584  aes17to32:
   585  	// make second seed
   586  	VLD1	(R4), [V1.B16]
   587  	AESE	V30.B16, V1.B16
   588  	AESMC	V1.B16, V1.B16
   589  	SUB	$16, R1, R10
   590  	VLD1.P	(R0)(R10), [V2.B16]
   591  	VLD1	(R0), [V3.B16]
   592  
   593  	AESE	V0.B16, V2.B16
   594  	AESMC	V2.B16, V2.B16
   595  	AESE	V1.B16, V3.B16
   596  	AESMC	V3.B16, V3.B16
   597  
   598  	AESE	V0.B16, V2.B16
   599  	AESMC	V2.B16, V2.B16
   600  	AESE	V1.B16, V3.B16
   601  	AESMC	V3.B16, V3.B16
   602  
   603  	AESE	V0.B16, V2.B16
   604  	AESE	V1.B16, V3.B16
   605  
   606  	VEOR	V3.B16, V2.B16, V2.B16
   607  	VST1	[V2.D1], (R2)
   608  	RET
   609  
   610  aes33to64:
   611  	VLD1	(R4), [V1.B16, V2.B16, V3.B16]
   612  	AESE	V30.B16, V1.B16
   613  	AESMC	V1.B16, V1.B16
   614  	AESE	V30.B16, V2.B16
   615  	AESMC	V2.B16, V2.B16
   616  	AESE	V30.B16, V3.B16
   617  	AESMC	V3.B16, V3.B16
   618  	SUB	$32, R1, R10
   619  
   620  	VLD1.P	(R0)(R10), [V4.B16, V5.B16]
   621  	VLD1	(R0), [V6.B16, V7.B16]
   622  
   623  	AESE	V0.B16, V4.B16
   624  	AESMC	V4.B16, V4.B16
   625  	AESE	V1.B16, V5.B16
   626  	AESMC	V5.B16, V5.B16
   627  	AESE	V2.B16, V6.B16
   628  	AESMC	V6.B16, V6.B16
   629  	AESE	V3.B16, V7.B16
   630  	AESMC	V7.B16, V7.B16
   631  
   632  	AESE	V0.B16, V4.B16
   633  	AESMC	V4.B16, V4.B16
   634  	AESE	V1.B16, V5.B16
   635  	AESMC	V5.B16, V5.B16
   636  	AESE	V2.B16, V6.B16
   637  	AESMC	V6.B16, V6.B16
   638  	AESE	V3.B16, V7.B16
   639  	AESMC	V7.B16, V7.B16
   640  
   641  	AESE	V0.B16, V4.B16
   642  	AESE	V1.B16, V5.B16
   643  	AESE	V2.B16, V6.B16
   644  	AESE	V3.B16, V7.B16
   645  
   646  	VEOR	V6.B16, V4.B16, V4.B16
   647  	VEOR	V7.B16, V5.B16, V5.B16
   648  	VEOR	V5.B16, V4.B16, V4.B16
   649  
   650  	VST1	[V4.D1], (R2)
   651  	RET
   652  
   653  aes65to128:
   654  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   655  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   656  	AESE	V30.B16, V1.B16
   657  	AESMC	V1.B16, V1.B16
   658  	AESE	V30.B16, V2.B16
   659  	AESMC	V2.B16, V2.B16
   660  	AESE	V30.B16, V3.B16
   661  	AESMC	V3.B16, V3.B16
   662  	AESE	V30.B16, V4.B16
   663  	AESMC	V4.B16, V4.B16
   664  	AESE	V30.B16, V5.B16
   665  	AESMC	V5.B16, V5.B16
   666  	AESE	V30.B16, V6.B16
   667  	AESMC	V6.B16, V6.B16
   668  	AESE	V30.B16, V7.B16
   669  	AESMC	V7.B16, V7.B16
   670  
   671  	SUB	$64, R1, R10
   672  	VLD1.P	(R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   673  	VLD1	(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
   674  	AESE	V0.B16,	 V8.B16
   675  	AESMC	V8.B16,  V8.B16
   676  	AESE	V1.B16,	 V9.B16
   677  	AESMC	V9.B16,  V9.B16
   678  	AESE	V2.B16, V10.B16
   679  	AESMC	V10.B16,  V10.B16
   680  	AESE	V3.B16, V11.B16
   681  	AESMC	V11.B16,  V11.B16
   682  	AESE	V4.B16, V12.B16
   683  	AESMC	V12.B16,  V12.B16
   684  	AESE	V5.B16, V13.B16
   685  	AESMC	V13.B16,  V13.B16
   686  	AESE	V6.B16, V14.B16
   687  	AESMC	V14.B16,  V14.B16
   688  	AESE	V7.B16, V15.B16
   689  	AESMC	V15.B16,  V15.B16
   690  
   691  	AESE	V0.B16,	 V8.B16
   692  	AESMC	V8.B16,  V8.B16
   693  	AESE	V1.B16,	 V9.B16
   694  	AESMC	V9.B16,  V9.B16
   695  	AESE	V2.B16, V10.B16
   696  	AESMC	V10.B16,  V10.B16
   697  	AESE	V3.B16, V11.B16
   698  	AESMC	V11.B16,  V11.B16
   699  	AESE	V4.B16, V12.B16
   700  	AESMC	V12.B16,  V12.B16
   701  	AESE	V5.B16, V13.B16
   702  	AESMC	V13.B16,  V13.B16
   703  	AESE	V6.B16, V14.B16
   704  	AESMC	V14.B16,  V14.B16
   705  	AESE	V7.B16, V15.B16
   706  	AESMC	V15.B16,  V15.B16
   707  
   708  	AESE	V0.B16,	 V8.B16
   709  	AESE	V1.B16,	 V9.B16
   710  	AESE	V2.B16, V10.B16
   711  	AESE	V3.B16, V11.B16
   712  	AESE	V4.B16, V12.B16
   713  	AESE	V5.B16, V13.B16
   714  	AESE	V6.B16, V14.B16
   715  	AESE	V7.B16, V15.B16
   716  
   717  	VEOR	V12.B16, V8.B16, V8.B16
   718  	VEOR	V13.B16, V9.B16, V9.B16
   719  	VEOR	V14.B16, V10.B16, V10.B16
   720  	VEOR	V15.B16, V11.B16, V11.B16
   721  	VEOR	V10.B16, V8.B16, V8.B16
   722  	VEOR	V11.B16, V9.B16, V9.B16
   723  	VEOR	V9.B16, V8.B16, V8.B16
   724  
   725  	VST1	[V8.D1], (R2)
   726  	RET
   727  
   728  aes129plus:
   729  	PRFM (R0), PLDL1KEEP
   730  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   731  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   732  	AESE	V30.B16, V1.B16
   733  	AESMC	V1.B16, V1.B16
   734  	AESE	V30.B16, V2.B16
   735  	AESMC	V2.B16, V2.B16
   736  	AESE	V30.B16, V3.B16
   737  	AESMC	V3.B16, V3.B16
   738  	AESE	V30.B16, V4.B16
   739  	AESMC	V4.B16, V4.B16
   740  	AESE	V30.B16, V5.B16
   741  	AESMC	V5.B16, V5.B16
   742  	AESE	V30.B16, V6.B16
   743  	AESMC	V6.B16, V6.B16
   744  	AESE	V30.B16, V7.B16
   745  	AESMC	V7.B16, V7.B16
   746  	ADD	R0, R1, R10
   747  	SUB	$128, R10, R10
   748  	VLD1.P	64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   749  	VLD1	(R10), [V12.B16, V13.B16, V14.B16, V15.B16]
   750  	SUB	$1, R1, R1
   751  	LSR	$7, R1, R1
   752  
   753  aesloop:
   754  	AESE	V8.B16,	 V0.B16
   755  	AESMC	V0.B16,  V0.B16
   756  	AESE	V9.B16,	 V1.B16
   757  	AESMC	V1.B16,  V1.B16
   758  	AESE	V10.B16, V2.B16
   759  	AESMC	V2.B16,  V2.B16
   760  	AESE	V11.B16, V3.B16
   761  	AESMC	V3.B16,  V3.B16
   762  	AESE	V12.B16, V4.B16
   763  	AESMC	V4.B16,  V4.B16
   764  	AESE	V13.B16, V5.B16
   765  	AESMC	V5.B16,  V5.B16
   766  	AESE	V14.B16, V6.B16
   767  	AESMC	V6.B16,  V6.B16
   768  	AESE	V15.B16, V7.B16
   769  	AESMC	V7.B16,  V7.B16
   770  
   771  	VLD1.P	64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
   772  	AESE	V8.B16,	 V0.B16
   773  	AESMC	V0.B16,  V0.B16
   774  	AESE	V9.B16,	 V1.B16
   775  	AESMC	V1.B16,  V1.B16
   776  	AESE	V10.B16, V2.B16
   777  	AESMC	V2.B16,  V2.B16
   778  	AESE	V11.B16, V3.B16
   779  	AESMC	V3.B16,  V3.B16
   780  
   781  	VLD1.P	64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
   782  	AESE	V12.B16, V4.B16
   783  	AESMC	V4.B16,  V4.B16
   784  	AESE	V13.B16, V5.B16
   785  	AESMC	V5.B16,  V5.B16
   786  	AESE	V14.B16, V6.B16
   787  	AESMC	V6.B16,  V6.B16
   788  	AESE	V15.B16, V7.B16
   789  	AESMC	V7.B16,  V7.B16
   790  	SUB	$1, R1, R1
   791  	CBNZ	R1, aesloop
   792  
   793  	AESE	V8.B16,	 V0.B16
   794  	AESMC	V0.B16,  V0.B16
   795  	AESE	V9.B16,	 V1.B16
   796  	AESMC	V1.B16,  V1.B16
   797  	AESE	V10.B16, V2.B16
   798  	AESMC	V2.B16,  V2.B16
   799  	AESE	V11.B16, V3.B16
   800  	AESMC	V3.B16,  V3.B16
   801  	AESE	V12.B16, V4.B16
   802  	AESMC	V4.B16,  V4.B16
   803  	AESE	V13.B16, V5.B16
   804  	AESMC	V5.B16,  V5.B16
   805  	AESE	V14.B16, V6.B16
   806  	AESMC	V6.B16,  V6.B16
   807  	AESE	V15.B16, V7.B16
   808  	AESMC	V7.B16,  V7.B16
   809  
   810  	AESE	V8.B16,	 V0.B16
   811  	AESMC	V0.B16,  V0.B16
   812  	AESE	V9.B16,	 V1.B16
   813  	AESMC	V1.B16,  V1.B16
   814  	AESE	V10.B16, V2.B16
   815  	AESMC	V2.B16,  V2.B16
   816  	AESE	V11.B16, V3.B16
   817  	AESMC	V3.B16,  V3.B16
   818  	AESE	V12.B16, V4.B16
   819  	AESMC	V4.B16,  V4.B16
   820  	AESE	V13.B16, V5.B16
   821  	AESMC	V5.B16,  V5.B16
   822  	AESE	V14.B16, V6.B16
   823  	AESMC	V6.B16,  V6.B16
   824  	AESE	V15.B16, V7.B16
   825  	AESMC	V7.B16,  V7.B16
   826  
   827  	AESE	V8.B16,	 V0.B16
   828  	AESE	V9.B16,	 V1.B16
   829  	AESE	V10.B16, V2.B16
   830  	AESE	V11.B16, V3.B16
   831  	AESE	V12.B16, V4.B16
   832  	AESE	V13.B16, V5.B16
   833  	AESE	V14.B16, V6.B16
   834  	AESE	V15.B16, V7.B16
   835  
   836  	VEOR	V0.B16, V1.B16, V0.B16
   837  	VEOR	V2.B16, V3.B16, V2.B16
   838  	VEOR	V4.B16, V5.B16, V4.B16
   839  	VEOR	V6.B16, V7.B16, V6.B16
   840  	VEOR	V0.B16, V2.B16, V0.B16
   841  	VEOR	V4.B16, V6.B16, V4.B16
   842  	VEOR	V4.B16, V0.B16, V0.B16
   843  
   844  	VST1	[V0.D1], (R2)
   845  	RET
   846  
   847  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   848  	MOVWU	cycles+0(FP), R0
   849  again:
   850  	YIELD
   851  	SUBW	$1, R0
   852  	CBNZ	R0, again
   853  	RET
   854  
   855  // void jmpdefer(fv, sp);
   856  // called from deferreturn.
   857  // 1. grab stored LR for caller
   858  // 2. sub 4 bytes to get back to BL deferreturn
   859  // 3. BR to fn
   860  TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
   861  	MOVD	0(RSP), R0
   862  	SUB	$4, R0
   863  	MOVD	R0, LR
   864  
   865  	MOVD	fv+0(FP), R26
   866  	MOVD	argp+8(FP), R0
   867  	MOVD	R0, RSP
   868  	SUB	$8, RSP
   869  	MOVD	0(R26), R3
   870  	B	(R3)
   871  
   872  // Save state of caller into g->sched. Smashes R0.
   873  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   874  	MOVD	LR, (g_sched+gobuf_pc)(g)
   875  	MOVD	RSP, R0
   876  	MOVD	R0, (g_sched+gobuf_sp)(g)
   877  	MOVD	R29, (g_sched+gobuf_bp)(g)
   878  	MOVD	$0, (g_sched+gobuf_lr)(g)
   879  	MOVD	$0, (g_sched+gobuf_ret)(g)
   880  	// Assert ctxt is zero. See func save.
   881  	MOVD	(g_sched+gobuf_ctxt)(g), R0
   882  	CMP	$0, R0
   883  	BEQ	2(PC)
   884  	CALL	runtime·badctxt(SB)
   885  	RET
   886  
   887  // func asmcgocall(fn, arg unsafe.Pointer) int32
   888  // Call fn(arg) on the scheduler stack,
   889  // aligned appropriately for the gcc ABI.
   890  // See cgocall.go for more details.
   891  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   892  	MOVD	fn+0(FP), R1
   893  	MOVD	arg+8(FP), R0
   894  
   895  	MOVD	RSP, R2		// save original stack pointer
   896  	CMP	$0, g
   897  	BEQ	nosave
   898  	MOVD	g, R4
   899  
   900  	// Figure out if we need to switch to m->g0 stack.
   901  	// We get called to create new OS threads too, and those
   902  	// come in on the m->g0 stack already.
   903  	MOVD	g_m(g), R8
   904  	MOVD	m_gsignal(R8), R3
   905  	CMP	R3, g
   906  	BEQ	nosave
   907  	MOVD	m_g0(R8), R3
   908  	CMP	R3, g
   909  	BEQ	nosave
   910  
   911  	// Switch to system stack.
   912  	MOVD	R0, R9	// gosave<> and save_g might clobber R0
   913  	BL	gosave<>(SB)
   914  	MOVD	R3, g
   915  	BL	runtime·save_g(SB)
   916  	MOVD	(g_sched+gobuf_sp)(g), R0
   917  	MOVD	R0, RSP
   918  	MOVD	(g_sched+gobuf_bp)(g), R29
   919  	MOVD	R9, R0
   920  
   921  	// Now on a scheduling stack (a pthread-created stack).
   922  	// Save room for two of our pointers /*, plus 32 bytes of callee
   923  	// save area that lives on the caller stack. */
   924  	MOVD	RSP, R13
   925  	SUB	$16, R13
   926  	MOVD	R13, RSP
   927  	MOVD	R4, 0(RSP)	// save old g on stack
   928  	MOVD	(g_stack+stack_hi)(R4), R4
   929  	SUB	R2, R4
   930  	MOVD	R4, 8(RSP)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   931  	BL	(R1)
   932  	MOVD	R0, R9
   933  
   934  	// Restore g, stack pointer. R0 is errno, so don't touch it
   935  	MOVD	0(RSP), g
   936  	BL	runtime·save_g(SB)
   937  	MOVD	(g_stack+stack_hi)(g), R5
   938  	MOVD	8(RSP), R6
   939  	SUB	R6, R5
   940  	MOVD	R9, R0
   941  	MOVD	R5, RSP
   942  
   943  	MOVW	R0, ret+16(FP)
   944  	RET
   945  
   946  nosave:
   947  	// Running on a system stack, perhaps even without a g.
   948  	// Having no g can happen during thread creation or thread teardown
   949  	// (see needm/dropm on Solaris, for example).
   950  	// This code is like the above sequence but without saving/restoring g
   951  	// and without worrying about the stack moving out from under us
   952  	// (because we're on a system stack, not a goroutine stack).
   953  	// The above code could be used directly if already on a system stack,
   954  	// but then the only path through this code would be a rare case on Solaris.
   955  	// Using this code for all "already on system stack" calls exercises it more,
   956  	// which should help keep it correct.
   957  	MOVD	RSP, R13
   958  	SUB	$16, R13
   959  	MOVD	R13, RSP
   960  	MOVD	$0, R4
   961  	MOVD	R4, 0(RSP)	// Where above code stores g, in case someone looks during debugging.
   962  	MOVD	R2, 8(RSP)	// Save original stack pointer.
   963  	BL	(R1)
   964  	// Restore stack pointer.
   965  	MOVD	8(RSP), R2
   966  	MOVD	R2, RSP	
   967  	MOVD	R0, ret+16(FP)
   968  	RET
   969  
   970  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   971  // Turn the fn into a Go func (by taking its address) and call
   972  // cgocallback_gofunc.
   973  TEXT runtime·cgocallback(SB),NOSPLIT,$40-32
   974  	MOVD	$fn+0(FP), R0
   975  	MOVD	R0, 8(RSP)
   976  	MOVD	frame+8(FP), R0
   977  	MOVD	R0, 16(RSP)
   978  	MOVD	framesize+16(FP), R0
   979  	MOVD	R0, 24(RSP)
   980  	MOVD	ctxt+24(FP), R0
   981  	MOVD	R0, 32(RSP)
   982  	MOVD	$runtime·cgocallback_gofunc(SB), R0
   983  	BL	(R0)
   984  	RET
   985  
   986  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   987  // See cgocall.go for more details.
   988  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$24-32
   989  	NO_LOCAL_POINTERS
   990  
   991  	// Load g from thread-local storage.
   992  	MOVB	runtime·iscgo(SB), R3
   993  	CMP	$0, R3
   994  	BEQ	nocgo
   995  	BL	runtime·load_g(SB)
   996  nocgo:
   997  
   998  	// If g is nil, Go did not create the current thread.
   999  	// Call needm to obtain one for temporary use.
  1000  	// In this case, we're running on the thread stack, so there's
  1001  	// lots of space, but the linker doesn't know. Hide the call from
  1002  	// the linker analysis by using an indirect call.
  1003  	CMP	$0, g
  1004  	BEQ	needm
  1005  
  1006  	MOVD	g_m(g), R8
  1007  	MOVD	R8, savedm-8(SP)
  1008  	B	havem
  1009  
  1010  needm:
  1011  	MOVD	g, savedm-8(SP) // g is zero, so is m.
  1012  	MOVD	$runtime·needm(SB), R0
  1013  	BL	(R0)
  1014  
  1015  	// Set m->sched.sp = SP, so that if a panic happens
  1016  	// during the function we are about to execute, it will
  1017  	// have a valid SP to run on the g0 stack.
  1018  	// The next few lines (after the havem label)
  1019  	// will save this SP onto the stack and then write
  1020  	// the same SP back to m->sched.sp. That seems redundant,
  1021  	// but if an unrecovered panic happens, unwindm will
  1022  	// restore the g->sched.sp from the stack location
  1023  	// and then systemstack will try to use it. If we don't set it here,
  1024  	// that restored SP will be uninitialized (typically 0) and
  1025  	// will not be usable.
  1026  	MOVD	g_m(g), R8
  1027  	MOVD	m_g0(R8), R3
  1028  	MOVD	RSP, R0
  1029  	MOVD	R0, (g_sched+gobuf_sp)(R3)
  1030  	MOVD	R29, (g_sched+gobuf_bp)(R3)
  1031  
  1032  havem:
  1033  	// Now there's a valid m, and we're running on its m->g0.
  1034  	// Save current m->g0->sched.sp on stack and then set it to SP.
  1035  	// Save current sp in m->g0->sched.sp in preparation for
  1036  	// switch back to m->curg stack.
  1037  	// NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
  1038  	// Beware that the frame size is actually 32+16.
  1039  	MOVD	m_g0(R8), R3
  1040  	MOVD	(g_sched+gobuf_sp)(R3), R4
  1041  	MOVD	R4, savedsp-16(SP)
  1042  	MOVD	RSP, R0
  1043  	MOVD	R0, (g_sched+gobuf_sp)(R3)
  1044  
  1045  	// Switch to m->curg stack and call runtime.cgocallbackg.
  1046  	// Because we are taking over the execution of m->curg
  1047  	// but *not* resuming what had been running, we need to
  1048  	// save that information (m->curg->sched) so we can restore it.
  1049  	// We can restore m->curg->sched.sp easily, because calling
  1050  	// runtime.cgocallbackg leaves SP unchanged upon return.
  1051  	// To save m->curg->sched.pc, we push it onto the stack.
  1052  	// This has the added benefit that it looks to the traceback
  1053  	// routine like cgocallbackg is going to return to that
  1054  	// PC (because the frame we allocate below has the same
  1055  	// size as cgocallback_gofunc's frame declared above)
  1056  	// so that the traceback will seamlessly trace back into
  1057  	// the earlier calls.
  1058  	//
  1059  	// In the new goroutine, -8(SP) is unused (where SP refers to
  1060  	// m->curg's SP while we're setting it up, before we've adjusted it).
  1061  	MOVD	m_curg(R8), g
  1062  	BL	runtime·save_g(SB)
  1063  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
  1064  	MOVD	(g_sched+gobuf_pc)(g), R5
  1065  	MOVD	R5, -48(R4)
  1066  	MOVD	(g_sched+gobuf_bp)(g), R5
  1067  	MOVD	R5, -56(R4)
  1068  	MOVD	ctxt+24(FP), R0
  1069  	MOVD	R0, -40(R4)
  1070  	MOVD	$-48(R4), R0 // maintain 16-byte SP alignment
  1071  	MOVD	R0, RSP
  1072  	BL	runtime·cgocallbackg(SB)
  1073  
  1074  	// Restore g->sched (== m->curg->sched) from saved values.
  1075  	MOVD	0(RSP), R5
  1076  	MOVD	R5, (g_sched+gobuf_pc)(g)
  1077  	MOVD	RSP, R4
  1078  	ADD	$48, R4, R4
  1079  	MOVD	R4, (g_sched+gobuf_sp)(g)
  1080  
  1081  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
  1082  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
  1083  	// so we do not have to restore it.)
  1084  	MOVD	g_m(g), R8
  1085  	MOVD	m_g0(R8), g
  1086  	BL	runtime·save_g(SB)
  1087  	MOVD	(g_sched+gobuf_sp)(g), R0
  1088  	MOVD	R0, RSP
  1089  	MOVD	savedsp-16(SP), R4
  1090  	MOVD	R4, (g_sched+gobuf_sp)(g)
  1091  
  1092  	// If the m on entry was nil, we called needm above to borrow an m
  1093  	// for the duration of the call. Since the call is over, return it with dropm.
  1094  	MOVD	savedm-8(SP), R6
  1095  	CMP	$0, R6
  1096  	BNE	droppedm
  1097  	MOVD	$runtime·dropm(SB), R0
  1098  	BL	(R0)
  1099  droppedm:
  1100  
  1101  	// Done!
  1102  	RET
  1103  
  1104  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1105  // Must obey the gcc calling convention.
  1106  TEXT _cgo_topofstack(SB),NOSPLIT,$24
  1107  	// g (R28) and REGTMP (R27)  might be clobbered by load_g. They
  1108  	// are callee-save in the gcc calling convention, so save them.
  1109  	MOVD	R27, savedR27-8(SP)
  1110  	MOVD	g, saveG-16(SP)
  1111  
  1112  	BL	runtime·load_g(SB)
  1113  	MOVD	g_m(g), R0
  1114  	MOVD	m_curg(R0), R0
  1115  	MOVD	(g_stack+stack_hi)(R0), R0
  1116  
  1117  	MOVD	saveG-16(SP), g
  1118  	MOVD	savedR28-8(SP), R27
  1119  	RET
  1120  
  1121  // void setg(G*); set g. for use by needm.
  1122  TEXT runtime·setg(SB), NOSPLIT, $0-8
  1123  	MOVD	gg+0(FP), g
  1124  	// This only happens if iscgo, so jump straight to save_g
  1125  	BL	runtime·save_g(SB)
  1126  	RET
  1127  
  1128  // void setg_gcc(G*); set g called from gcc
  1129  TEXT setg_gcc<>(SB),NOSPLIT,$8
  1130  	MOVD	R0, g
  1131  	MOVD	R27, savedR27-8(SP)
  1132  	BL	runtime·save_g(SB)
  1133  	MOVD	savedR27-8(SP), R27
  1134  	RET
  1135  
  1136  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
  1137  	MOVD	ZR, R0
  1138  	MOVD	(R0), R0
  1139  	UNDEF
  1140  
  1141  TEXT runtime·return0(SB), NOSPLIT, $0
  1142  	MOVW	$0, R0
  1143  	RET
  1144  
  1145  // The top-most function running on a goroutine
  1146  // returns to goexit+PCQuantum.
  1147  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
  1148  	MOVD	R0, R0	// NOP
  1149  	BL	runtime·goexit1(SB)	// does not return
  1150  
  1151  // This is called from .init_array and follows the platform, not Go, ABI.
  1152  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1153  	SUB	$0x10, RSP
  1154  	MOVD	R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
  1155  	MOVD	runtime·lastmoduledatap(SB), R1
  1156  	MOVD	R0, moduledata_next(R1)
  1157  	MOVD	R0, runtime·lastmoduledatap(SB)
  1158  	MOVD	8(RSP), R27
  1159  	ADD	$0x10, RSP
  1160  	RET
  1161  
  1162  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1163  	MOVW	$1, R3
  1164  	MOVB	R3, ret+0(FP)
  1165  	RET
  1166  
  1167  // gcWriteBarrier performs a heap pointer write and informs the GC.
  1168  //
  1169  // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1170  // - R2 is the destination of the write
  1171  // - R3 is the value being written at R2
  1172  // It clobbers condition codes.
  1173  // It does not clobber any general-purpose registers,
  1174  // but may clobber others (e.g., floating point registers)
  1175  // The act of CALLing gcWriteBarrier will clobber R30 (LR).
  1176  TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$216
  1177  	// Save the registers clobbered by the fast path.
  1178  	MOVD	R0, 200(RSP)
  1179  	MOVD	R1, 208(RSP)
  1180  	MOVD	g_m(g), R0
  1181  	MOVD	m_p(R0), R0
  1182  	MOVD	(p_wbBuf+wbBuf_next)(R0), R1
  1183  	// Increment wbBuf.next position.
  1184  	ADD	$16, R1
  1185  	MOVD	R1, (p_wbBuf+wbBuf_next)(R0)
  1186  	MOVD	(p_wbBuf+wbBuf_end)(R0), R0
  1187  	CMP	R1, R0
  1188  	// Record the write.
  1189  	MOVD	R3, -16(R1)	// Record value
  1190  	MOVD	(R2), R0	// TODO: This turns bad writes into bad reads.
  1191  	MOVD	R0, -8(R1)	// Record *slot
  1192  	// Is the buffer full? (flags set in CMP above)
  1193  	BEQ	flush
  1194  ret:
  1195  	MOVD	200(RSP), R0
  1196  	MOVD	208(RSP), R1
  1197  	// Do the write.
  1198  	MOVD	R3, (R2)
  1199  	RET
  1200  
  1201  flush:
  1202  	// Save all general purpose registers since these could be
  1203  	// clobbered by wbBufFlush and were not saved by the caller.
  1204  	MOVD	R2, 8(RSP)	// Also first argument to wbBufFlush
  1205  	MOVD	R3, 16(RSP)	// Also second argument to wbBufFlush
  1206  	// R0 already saved
  1207  	// R1 already saved
  1208  	MOVD	R4, 24(RSP)
  1209  	MOVD	R5, 32(RSP)
  1210  	MOVD	R6, 40(RSP)
  1211  	MOVD	R7, 48(RSP)
  1212  	MOVD	R8, 56(RSP)
  1213  	MOVD	R9, 64(RSP)
  1214  	MOVD	R10, 72(RSP)
  1215  	MOVD	R11, 80(RSP)
  1216  	MOVD	R12, 88(RSP)
  1217  	MOVD	R13, 96(RSP)
  1218  	MOVD	R14, 104(RSP)
  1219  	MOVD	R15, 112(RSP)
  1220  	MOVD	R16, 120(RSP)
  1221  	MOVD	R17, 128(RSP)
  1222  	// R18 is unused.
  1223  	MOVD	R19, 136(RSP)
  1224  	MOVD	R20, 144(RSP)
  1225  	MOVD	R21, 152(RSP)
  1226  	MOVD	R22, 160(RSP)
  1227  	MOVD	R23, 168(RSP)
  1228  	MOVD	R24, 176(RSP)
  1229  	MOVD	R25, 184(RSP)
  1230  	MOVD	R26, 192(RSP)
  1231  	// R27 is temp register.
  1232  	// R28 is g.
  1233  	// R29 is frame pointer (unused).
  1234  	// R30 is LR, which was saved by the prologue.
  1235  	// R31 is SP.
  1236  
  1237  	// This takes arguments R2 and R3.
  1238  	CALL	runtime·wbBufFlush(SB)
  1239  
  1240  	MOVD	8(RSP), R2
  1241  	MOVD	16(RSP), R3
  1242  	MOVD	24(RSP), R4
  1243  	MOVD	32(RSP), R5
  1244  	MOVD	40(RSP), R6
  1245  	MOVD	48(RSP), R7
  1246  	MOVD	56(RSP), R8
  1247  	MOVD	64(RSP), R9
  1248  	MOVD	72(RSP), R10
  1249  	MOVD	80(RSP), R11
  1250  	MOVD	88(RSP), R12
  1251  	MOVD	96(RSP), R13
  1252  	MOVD	104(RSP), R14
  1253  	MOVD	112(RSP), R15
  1254  	MOVD	120(RSP), R16
  1255  	MOVD	128(RSP), R17
  1256  	MOVD	136(RSP), R19
  1257  	MOVD	144(RSP), R20
  1258  	MOVD	152(RSP), R21
  1259  	MOVD	160(RSP), R22
  1260  	MOVD	168(RSP), R23
  1261  	MOVD	176(RSP), R24
  1262  	MOVD	184(RSP), R25
  1263  	MOVD	192(RSP), R26
  1264  	JMP	ret
  1265  
  1266  // Note: these functions use a special calling convention to save generated code space.
  1267  // Arguments are passed in registers, but the space for those arguments are allocated
  1268  // in the caller's stack frame. These stubs write the args into that stack space and
  1269  // then tail call to the corresponding runtime handler.
  1270  // The tail call makes these stubs disappear in backtraces.
  1271  TEXT runtime·panicIndex(SB),NOSPLIT,$0-16
  1272  	MOVD	R0, x+0(FP)
  1273  	MOVD	R1, y+8(FP)
  1274  	JMP	runtime·goPanicIndex(SB)
  1275  TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16
  1276  	MOVD	R0, x+0(FP)
  1277  	MOVD	R1, y+8(FP)
  1278  	JMP	runtime·goPanicIndexU(SB)
  1279  TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16
  1280  	MOVD	R1, x+0(FP)
  1281  	MOVD	R2, y+8(FP)
  1282  	JMP	runtime·goPanicSliceAlen(SB)
  1283  TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16
  1284  	MOVD	R1, x+0(FP)
  1285  	MOVD	R2, y+8(FP)
  1286  	JMP	runtime·goPanicSliceAlenU(SB)
  1287  TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16
  1288  	MOVD	R1, x+0(FP)
  1289  	MOVD	R2, y+8(FP)
  1290  	JMP	runtime·goPanicSliceAcap(SB)
  1291  TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16
  1292  	MOVD	R1, x+0(FP)
  1293  	MOVD	R2, y+8(FP)
  1294  	JMP	runtime·goPanicSliceAcapU(SB)
  1295  TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16
  1296  	MOVD	R0, x+0(FP)
  1297  	MOVD	R1, y+8(FP)
  1298  	JMP	runtime·goPanicSliceB(SB)
  1299  TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16
  1300  	MOVD	R0, x+0(FP)
  1301  	MOVD	R1, y+8(FP)
  1302  	JMP	runtime·goPanicSliceBU(SB)
  1303  TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16
  1304  	MOVD	R2, x+0(FP)
  1305  	MOVD	R3, y+8(FP)
  1306  	JMP	runtime·goPanicSlice3Alen(SB)
  1307  TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16
  1308  	MOVD	R2, x+0(FP)
  1309  	MOVD	R3, y+8(FP)
  1310  	JMP	runtime·goPanicSlice3AlenU(SB)
  1311  TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16
  1312  	MOVD	R2, x+0(FP)
  1313  	MOVD	R3, y+8(FP)
  1314  	JMP	runtime·goPanicSlice3Acap(SB)
  1315  TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16
  1316  	MOVD	R2, x+0(FP)
  1317  	MOVD	R3, y+8(FP)
  1318  	JMP	runtime·goPanicSlice3AcapU(SB)
  1319  TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16
  1320  	MOVD	R1, x+0(FP)
  1321  	MOVD	R2, y+8(FP)
  1322  	JMP	runtime·goPanicSlice3B(SB)
  1323  TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16
  1324  	MOVD	R1, x+0(FP)
  1325  	MOVD	R2, y+8(FP)
  1326  	JMP	runtime·goPanicSlice3BU(SB)
  1327  TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16
  1328  	MOVD	R0, x+0(FP)
  1329  	MOVD	R1, y+8(FP)
  1330  	JMP	runtime·goPanicSlice3C(SB)
  1331  TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16
  1332  	MOVD	R0, x+0(FP)
  1333  	MOVD	R1, y+8(FP)
  1334  	JMP	runtime·goPanicSlice3CU(SB)