github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/runtime/asm_arm64.s (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "tls_arm64.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  
    11  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    12  	// SP = stack; R0 = argc; R1 = argv
    13  
    14  	// initialize essential registers
    15  	BL	runtime·reginit(SB)
    16  
    17  	SUB	$32, RSP
    18  	MOVW	R0, 8(RSP) // argc
    19  	MOVD	R1, 16(RSP) // argv
    20  
    21  	// create istack out of the given (operating system) stack.
    22  	// _cgo_init may update stackguard.
    23  	MOVD	$runtime·g0(SB), g
    24  	MOVD RSP, R7
    25  	MOVD	$(-64*1024)(R7), R0
    26  	MOVD	R0, g_stackguard0(g)
    27  	MOVD	R0, g_stackguard1(g)
    28  	MOVD	R0, (g_stack+stack_lo)(g)
    29  	MOVD	R7, (g_stack+stack_hi)(g)
    30  
    31  	// if there is a _cgo_init, call it using the gcc ABI.
    32  	MOVD	_cgo_init(SB), R12
    33  	CMP	$0, R12
    34  	BEQ	nocgo
    35  
    36  	MRS_TPIDR_R0			// load TLS base pointer
    37  	MOVD	R0, R3			// arg 3: TLS base pointer
    38  #ifdef TLSG_IS_VARIABLE
    39  	MOVD	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
    40  #else
    41  	MOVD	$0, R2		        // arg 2: not used when using platform's TLS
    42  #endif
    43  	MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
    44  	MOVD	g, R0			// arg 0: G
    45  	BL	(R12)
    46  	MOVD	_cgo_init(SB), R12
    47  	CMP	$0, R12
    48  	BEQ	nocgo
    49  
    50  nocgo:
    51  	// update stackguard after _cgo_init
    52  	MOVD	(g_stack+stack_lo)(g), R0
    53  	ADD	$const__StackGuard, R0
    54  	MOVD	R0, g_stackguard0(g)
    55  	MOVD	R0, g_stackguard1(g)
    56  
    57  	// set the per-goroutine and per-mach "registers"
    58  	MOVD	$runtime·m0(SB), R0
    59  
    60  	// save m->g0 = g0
    61  	MOVD	g, m_g0(R0)
    62  	// save m0 to g0->m
    63  	MOVD	R0, g_m(g)
    64  
    65  	BL	runtime·check(SB)
    66  
    67  	MOVW	8(RSP), R0	// copy argc
    68  	MOVW	R0, -8(RSP)
    69  	MOVD	16(RSP), R0		// copy argv
    70  	MOVD	R0, 0(RSP)
    71  	BL	runtime·args(SB)
    72  	BL	runtime·osinit(SB)
    73  	BL	runtime·schedinit(SB)
    74  
    75  	// create a new goroutine to start program
    76  	MOVD	$runtime·mainPC(SB), R0		// entry
    77  	MOVD	RSP, R7
    78  	MOVD.W	$0, -8(R7)
    79  	MOVD.W	R0, -8(R7)
    80  	MOVD.W	$0, -8(R7)
    81  	MOVD.W	$0, -8(R7)
    82  	MOVD	R7, RSP
    83  	BL	runtime·newproc(SB)
    84  	ADD	$32, RSP
    85  
    86  	// start this M
    87  	BL	runtime·mstart(SB)
    88  
    89  	MOVD	$0, R0
    90  	MOVD	R0, (R0)	// boom
    91  	UNDEF
    92  
    93  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    94  GLOBL	runtime·mainPC(SB),RODATA,$8
    95  
    96  TEXT runtime·breakpoint(SB),NOSPLIT,$-8-0
    97  	BRK
    98  	RET
    99  
   100  TEXT runtime·asminit(SB),NOSPLIT,$-8-0
   101  	RET
   102  
   103  TEXT runtime·reginit(SB),NOSPLIT,$-8-0
   104  	// initialize essential FP registers
   105  	FMOVD	$4503601774854144.0, F27
   106  	FMOVD	$0.5, F29
   107  	FSUBD	F29, F29, F28
   108  	FADDD	F29, F29, F30
   109  	FADDD	F30, F30, F31
   110  	RET
   111  
   112  /*
   113   *  go-routine
   114   */
   115  
   116  // void gosave(Gobuf*)
   117  // save state in Gobuf; setjmp
   118  TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   119  	MOVD	buf+0(FP), R3
   120  	MOVD	RSP, R0
   121  	MOVD	R0, gobuf_sp(R3)
   122  	MOVD	LR, gobuf_pc(R3)
   123  	MOVD	g, gobuf_g(R3)
   124  	MOVD	ZR, gobuf_lr(R3)
   125  	MOVD	ZR, gobuf_ret(R3)
   126  	MOVD	ZR, gobuf_ctxt(R3)
   127  	RET
   128  
   129  // void gogo(Gobuf*)
   130  // restore state from Gobuf; longjmp
   131  TEXT runtime·gogo(SB), NOSPLIT, $-8-8
   132  	MOVD	buf+0(FP), R5
   133  	MOVD	gobuf_g(R5), g
   134  	BL	runtime·save_g(SB)
   135  
   136  	MOVD	0(g), R4	// make sure g is not nil
   137  	MOVD	gobuf_sp(R5), R0
   138  	MOVD	R0, RSP
   139  	MOVD	gobuf_lr(R5), LR
   140  	MOVD	gobuf_ret(R5), R0
   141  	MOVD	gobuf_ctxt(R5), R26
   142  	MOVD	$0, gobuf_sp(R5)
   143  	MOVD	$0, gobuf_ret(R5)
   144  	MOVD	$0, gobuf_lr(R5)
   145  	MOVD	$0, gobuf_ctxt(R5)
   146  	CMP	ZR, ZR // set condition codes for == test, needed by stack split
   147  	MOVD	gobuf_pc(R5), R6
   148  	B	(R6)
   149  
   150  // void mcall(fn func(*g))
   151  // Switch to m->g0's stack, call fn(g).
   152  // Fn must never return.  It should gogo(&g->sched)
   153  // to keep running g.
   154  TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   155  	// Save caller state in g->sched
   156  	MOVD	RSP, R0
   157  	MOVD	R0, (g_sched+gobuf_sp)(g)
   158  	MOVD	LR, (g_sched+gobuf_pc)(g)
   159  	MOVD	$0, (g_sched+gobuf_lr)(g)
   160  	MOVD	g, (g_sched+gobuf_g)(g)
   161  
   162  	// Switch to m->g0 & its stack, call fn.
   163  	MOVD	g, R3
   164  	MOVD	g_m(g), R8
   165  	MOVD	m_g0(R8), g
   166  	BL	runtime·save_g(SB)
   167  	CMP	g, R3
   168  	BNE	2(PC)
   169  	B	runtime·badmcall(SB)
   170  	MOVD	fn+0(FP), R26			// context
   171  	MOVD	0(R26), R4			// code pointer
   172  	MOVD	(g_sched+gobuf_sp)(g), R0
   173  	MOVD	R0, RSP	// sp = m->g0->sched.sp
   174  	MOVD	R3, -8(RSP)
   175  	MOVD	$0, -16(RSP)
   176  	SUB	$16, RSP
   177  	BL	(R4)
   178  	B	runtime·badmcall2(SB)
   179  
   180  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   181  // of the G stack.  We need to distinguish the routine that
   182  // lives at the bottom of the G stack from the one that lives
   183  // at the top of the system stack because the one at the top of
   184  // the system stack terminates the stack walk (see topofstack()).
   185  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   186  	UNDEF
   187  	BL	(LR)	// make sure this function is not leaf
   188  	RET
   189  
   190  // func systemstack(fn func())
   191  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   192  	MOVD	fn+0(FP), R3	// R3 = fn
   193  	MOVD	R3, R26		// context
   194  	MOVD	g_m(g), R4	// R4 = m
   195  
   196  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   197  	CMP	g, R5
   198  	BEQ	noswitch
   199  
   200  	MOVD	m_g0(R4), R5	// R5 = g0
   201  	CMP	g, R5
   202  	BEQ	noswitch
   203  
   204  	MOVD	m_curg(R4), R6
   205  	CMP	g, R6
   206  	BEQ	switch
   207  
   208  	// Bad: g is not gsignal, not g0, not curg. What is it?
   209  	// Hide call from linker nosplit analysis.
   210  	MOVD	$runtime·badsystemstack(SB), R3
   211  	BL	(R3)
   212  
   213  switch:
   214  	// save our state in g->sched.  Pretend to
   215  	// be systemstack_switch if the G stack is scanned.
   216  	MOVD	$runtime·systemstack_switch(SB), R6
   217  	ADD	$8, R6	// get past prologue
   218  	MOVD	R6, (g_sched+gobuf_pc)(g)
   219  	MOVD	RSP, R0
   220  	MOVD	R0, (g_sched+gobuf_sp)(g)
   221  	MOVD	$0, (g_sched+gobuf_lr)(g)
   222  	MOVD	g, (g_sched+gobuf_g)(g)
   223  
   224  	// switch to g0
   225  	MOVD	R5, g
   226  	BL	runtime·save_g(SB)
   227  	MOVD	(g_sched+gobuf_sp)(g), R3
   228  	// make it look like mstart called systemstack on g0, to stop traceback
   229  	SUB	$16, R3
   230  	AND	$~15, R3
   231  	MOVD	$runtime·mstart(SB), R4
   232  	MOVD	R4, 0(R3)
   233  	MOVD	R3, RSP
   234  
   235  	// call target function
   236  	MOVD	0(R26), R3	// code pointer
   237  	BL	(R3)
   238  
   239  	// switch back to g
   240  	MOVD	g_m(g), R3
   241  	MOVD	m_curg(R3), g
   242  	BL	runtime·save_g(SB)
   243  	MOVD	(g_sched+gobuf_sp)(g), R0
   244  	MOVD	R0, RSP
   245  	MOVD	$0, (g_sched+gobuf_sp)(g)
   246  	RET
   247  
   248  noswitch:
   249  	// already on m stack, just call directly
   250  	MOVD	0(R26), R3	// code pointer
   251  	BL	(R3)
   252  	RET
   253  
   254  /*
   255   * support for morestack
   256   */
   257  
   258  // Called during function prolog when more stack is needed.
   259  // Caller has already loaded:
   260  // R3 prolog's LR (R30)
   261  //
   262  // The traceback routines see morestack on a g0 as being
   263  // the top of a stack (for example, morestack calling newstack
   264  // calling the scheduler calling newm calling gc), so we must
   265  // record an argument size. For that purpose, it has no arguments.
   266  TEXT runtime·morestack(SB),NOSPLIT,$-8-0
   267  	// Cannot grow scheduler stack (m->g0).
   268  	MOVD	g_m(g), R8
   269  	MOVD	m_g0(R8), R4
   270  	CMP	g, R4
   271  	BNE	2(PC)
   272  	B	runtime·abort(SB)
   273  
   274  	// Cannot grow signal stack (m->gsignal).
   275  	MOVD	m_gsignal(R8), R4
   276  	CMP	g, R4
   277  	BNE	2(PC)
   278  	B	runtime·abort(SB)
   279  
   280  	// Called from f.
   281  	// Set g->sched to context in f
   282  	MOVD	R26, (g_sched+gobuf_ctxt)(g)
   283  	MOVD	RSP, R0
   284  	MOVD	R0, (g_sched+gobuf_sp)(g)
   285  	MOVD	LR, (g_sched+gobuf_pc)(g)
   286  	MOVD	R3, (g_sched+gobuf_lr)(g)
   287  
   288  	// Called from f.
   289  	// Set m->morebuf to f's callers.
   290  	MOVD	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
   291  	MOVD	RSP, R0
   292  	MOVD	R0, (m_morebuf+gobuf_sp)(R8)	// f's caller's RSP
   293  	MOVD	g, (m_morebuf+gobuf_g)(R8)
   294  
   295  	// Call newstack on m->g0's stack.
   296  	MOVD	m_g0(R8), g
   297  	BL	runtime·save_g(SB)
   298  	MOVD	(g_sched+gobuf_sp)(g), R0
   299  	MOVD	R0, RSP
   300  	BL	runtime·newstack(SB)
   301  
   302  	// Not reached, but make sure the return PC from the call to newstack
   303  	// is still in this function, and not the beginning of the next.
   304  	UNDEF
   305  
   306  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-4-0
   307  	MOVW	$0, R26
   308  	B runtime·morestack(SB)
   309  
   310  TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   311  	// We came here via a RET to an overwritten LR.
   312  	// R0 may be live (see return0). Other registers are available.
   313  
   314  	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   315  	MOVD	(g_stkbar+slice_array)(g), R4
   316  	MOVD	g_stkbarPos(g), R5
   317  	MOVD	$stkbar__size, R6
   318  	MUL	R5, R6
   319  	ADD	R4, R6
   320  	MOVD	stkbar_savedLRVal(R6), R6
   321  	// Record that this stack barrier was hit.
   322  	ADD	$1, R5
   323  	MOVD	R5, g_stkbarPos(g)
   324  	// Jump to the original return PC.
   325  	B	(R6)
   326  
   327  // reflectcall: call a function with the given argument list
   328  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   329  // we don't have variable-sized frames, so we use a small number
   330  // of constant-sized-frame functions to encode a few bits of size in the pc.
   331  // Caution: ugly multiline assembly macros in your future!
   332  
   333  #define DISPATCH(NAME,MAXSIZE)		\
   334  	MOVD	$MAXSIZE, R27;		\
   335  	CMP	R27, R16;		\
   336  	BGT	3(PC);			\
   337  	MOVD	$NAME(SB), R27;	\
   338  	B	(R27)
   339  // Note: can't just "B NAME(SB)" - bad inlining results.
   340  
   341  TEXT reflect·call(SB), NOSPLIT, $0-0
   342  	B	·reflectcall(SB)
   343  
   344  TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   345  	MOVWU argsize+24(FP), R16
   346  	// NOTE(rsc): No call16, because CALLFN needs four words
   347  	// of argument space to invoke callwritebarrier.
   348  	DISPATCH(runtime·call32, 32)
   349  	DISPATCH(runtime·call64, 64)
   350  	DISPATCH(runtime·call128, 128)
   351  	DISPATCH(runtime·call256, 256)
   352  	DISPATCH(runtime·call512, 512)
   353  	DISPATCH(runtime·call1024, 1024)
   354  	DISPATCH(runtime·call2048, 2048)
   355  	DISPATCH(runtime·call4096, 4096)
   356  	DISPATCH(runtime·call8192, 8192)
   357  	DISPATCH(runtime·call16384, 16384)
   358  	DISPATCH(runtime·call32768, 32768)
   359  	DISPATCH(runtime·call65536, 65536)
   360  	DISPATCH(runtime·call131072, 131072)
   361  	DISPATCH(runtime·call262144, 262144)
   362  	DISPATCH(runtime·call524288, 524288)
   363  	DISPATCH(runtime·call1048576, 1048576)
   364  	DISPATCH(runtime·call2097152, 2097152)
   365  	DISPATCH(runtime·call4194304, 4194304)
   366  	DISPATCH(runtime·call8388608, 8388608)
   367  	DISPATCH(runtime·call16777216, 16777216)
   368  	DISPATCH(runtime·call33554432, 33554432)
   369  	DISPATCH(runtime·call67108864, 67108864)
   370  	DISPATCH(runtime·call134217728, 134217728)
   371  	DISPATCH(runtime·call268435456, 268435456)
   372  	DISPATCH(runtime·call536870912, 536870912)
   373  	DISPATCH(runtime·call1073741824, 1073741824)
   374  	MOVD	$runtime·badreflectcall(SB), R0
   375  	B	(R0)
   376  
   377  #define CALLFN(NAME,MAXSIZE)			\
   378  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   379  	NO_LOCAL_POINTERS;			\
   380  	/* copy arguments to stack */		\
   381  	MOVD	arg+16(FP), R3;			\
   382  	MOVWU	argsize+24(FP), R4;			\
   383  	MOVD	RSP, R5;				\
   384  	ADD	$(8-1), R5;			\
   385  	SUB	$1, R3;				\
   386  	ADD	R5, R4;				\
   387  	CMP	R5, R4;				\
   388  	BEQ	4(PC);				\
   389  	MOVBU.W	1(R3), R6;			\
   390  	MOVBU.W	R6, 1(R5);			\
   391  	B	-4(PC);				\
   392  	/* call function */			\
   393  	MOVD	f+8(FP), R26;			\
   394  	MOVD	(R26), R0;			\
   395  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   396  	BL	(R0);				\
   397  	/* copy return values back */		\
   398  	MOVD	arg+16(FP), R3;			\
   399  	MOVWU	n+24(FP), R4;			\
   400  	MOVWU	retoffset+28(FP), R6;		\
   401  	MOVD	RSP, R5;				\
   402  	ADD	R6, R5; 			\
   403  	ADD	R6, R3;				\
   404  	SUB	R6, R4;				\
   405  	ADD	$(8-1), R5;			\
   406  	SUB	$1, R3;				\
   407  	ADD	R5, R4;				\
   408  loop:						\
   409  	CMP	R5, R4;				\
   410  	BEQ	end;				\
   411  	MOVBU.W	1(R5), R6;			\
   412  	MOVBU.W	R6, 1(R3);			\
   413  	B	loop;				\
   414  end:						\
   415  	/* execute write barrier updates */	\
   416  	MOVD	argtype+0(FP), R7;		\
   417  	MOVD	arg+16(FP), R3;			\
   418  	MOVWU	n+24(FP), R4;			\
   419  	MOVWU	retoffset+28(FP), R6;		\
   420  	MOVD	R7, 8(RSP);			\
   421  	MOVD	R3, 16(RSP);			\
   422  	MOVD	R4, 24(RSP);			\
   423  	MOVD	R6, 32(RSP);			\
   424  	BL	runtime·callwritebarrier(SB);	\
   425  	RET
   426  
   427  // These have 8 added to make the overall frame size a multiple of 16,
   428  // as required by the ABI. (There is another +8 for the saved LR.)
   429  CALLFN(·call32, 40 )
   430  CALLFN(·call64, 72 )
   431  CALLFN(·call128, 136 )
   432  CALLFN(·call256, 264 )
   433  CALLFN(·call512, 520 )
   434  CALLFN(·call1024, 1032 )
   435  CALLFN(·call2048, 2056 )
   436  CALLFN(·call4096, 4104 )
   437  CALLFN(·call8192, 8200 )
   438  CALLFN(·call16384, 16392 )
   439  CALLFN(·call32768, 32776 )
   440  CALLFN(·call65536, 65544 )
   441  CALLFN(·call131072, 131080 )
   442  CALLFN(·call262144, 262152 )
   443  CALLFN(·call524288, 524296 )
   444  CALLFN(·call1048576, 1048584 )
   445  CALLFN(·call2097152, 2097160 )
   446  CALLFN(·call4194304, 4194312 )
   447  CALLFN(·call8388608, 8388616 )
   448  CALLFN(·call16777216, 16777224 )
   449  CALLFN(·call33554432, 33554440 )
   450  CALLFN(·call67108864, 67108872 )
   451  CALLFN(·call134217728, 134217736 )
   452  CALLFN(·call268435456, 268435464 )
   453  CALLFN(·call536870912, 536870920 )
   454  CALLFN(·call1073741824, 1073741832 )
   455  
   456  // bool cas(uint32 *ptr, uint32 old, uint32 new)
   457  // Atomically:
   458  //	if(*val == old){
   459  //		*val = new;
   460  //		return 1;
   461  //	} else
   462  //		return 0;
   463  TEXT runtime·cas(SB), NOSPLIT, $0-17
   464  	MOVD	ptr+0(FP), R0
   465  	MOVW	old+8(FP), R1
   466  	MOVW	new+12(FP), R2
   467  again:
   468  	LDAXRW	(R0), R3
   469  	CMPW	R1, R3
   470  	BNE	ok
   471  	STLXRW	R2, (R0), R3
   472  	CBNZ	R3, again
   473  ok:
   474  	CSET	EQ, R0
   475  	MOVB	R0, ret+16(FP)
   476  	RET
   477  
   478  TEXT runtime·casuintptr(SB), NOSPLIT, $0-25
   479  	B	runtime·cas64(SB)
   480  
   481  TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $-8-16
   482  	B	runtime·atomicload64(SB)
   483  
   484  TEXT runtime·atomicloaduint(SB), NOSPLIT, $-8-16
   485  	B	runtime·atomicload64(SB)
   486  
   487  TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
   488  	B	runtime·atomicstore64(SB)
   489  
   490  // AES hashing not implemented for ARM64, issue #10109.
   491  TEXT runtime·aeshash(SB),NOSPLIT,$-8-0
   492  	MOVW	$0, R0
   493  	MOVW	(R0), R1
   494  TEXT runtime·aeshash32(SB),NOSPLIT,$-8-0
   495  	MOVW	$0, R0
   496  	MOVW	(R0), R1
   497  TEXT runtime·aeshash64(SB),NOSPLIT,$-8-0
   498  	MOVW	$0, R0
   499  	MOVW	(R0), R1
   500  TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0
   501  	MOVW	$0, R0
   502  	MOVW	(R0), R1
   503  
   504  // bool casp(void **val, void *old, void *new)
   505  // Atomically:
   506  //	if(*val == old){
   507  //		*val = new;
   508  //		return 1;
   509  //	} else
   510  //		return 0;
   511  TEXT runtime·casp1(SB), NOSPLIT, $0-25
   512  	B runtime·cas64(SB)
   513  
   514  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   515  	MOVWU	cycles+0(FP), R0
   516  again:
   517  	YIELD
   518  	SUBW	$1, R0
   519  	CBNZ	R0, again
   520  	RET
   521  
   522  // void jmpdefer(fv, sp);
   523  // called from deferreturn.
   524  // 1. grab stored LR for caller
   525  // 2. sub 4 bytes to get back to BL deferreturn
   526  // 3. BR to fn
   527  TEXT runtime·jmpdefer(SB), NOSPLIT, $-8-16
   528  	MOVD	0(RSP), R0
   529  	SUB	$4, R0
   530  	MOVD	R0, LR
   531  
   532  	MOVD	fv+0(FP), R26
   533  	MOVD	argp+8(FP), R0
   534  	MOVD	R0, RSP
   535  	SUB	$8, RSP
   536  	MOVD	0(R26), R3
   537  	B	(R3)
   538  
   539  // Save state of caller into g->sched. Smashes R0.
   540  TEXT gosave<>(SB),NOSPLIT,$-8
   541  	MOVD	LR, (g_sched+gobuf_pc)(g)
   542  	MOVD RSP, R0
   543  	MOVD	R0, (g_sched+gobuf_sp)(g)
   544  	MOVD	$0, (g_sched+gobuf_lr)(g)
   545  	MOVD	$0, (g_sched+gobuf_ret)(g)
   546  	MOVD	$0, (g_sched+gobuf_ctxt)(g)
   547  	RET
   548  
   549  // func asmcgocall(fn, arg unsafe.Pointer) int32
   550  // Call fn(arg) on the scheduler stack,
   551  // aligned appropriately for the gcc ABI.
   552  // See cgocall.go for more details.
   553  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   554  	MOVD	fn+0(FP), R1
   555  	MOVD	arg+8(FP), R0
   556  
   557  	MOVD	RSP, R2		// save original stack pointer
   558  	MOVD	g, R4
   559  
   560  	// Figure out if we need to switch to m->g0 stack.
   561  	// We get called to create new OS threads too, and those
   562  	// come in on the m->g0 stack already.
   563  	MOVD	g_m(g), R8
   564  	MOVD	m_g0(R8), R3
   565  	CMP	R3, g
   566  	BEQ	g0
   567  	MOVD	R0, R9	// gosave<> and save_g might clobber R0
   568  	BL	gosave<>(SB)
   569  	MOVD	R3, g
   570  	BL	runtime·save_g(SB)
   571  	MOVD	(g_sched+gobuf_sp)(g), R0
   572  	MOVD	R0, RSP
   573  	MOVD	R9, R0
   574  
   575  	// Now on a scheduling stack (a pthread-created stack).
   576  g0:
   577  	// Save room for two of our pointers /*, plus 32 bytes of callee
   578  	// save area that lives on the caller stack. */
   579  	MOVD	RSP, R13
   580  	SUB	$16, R13
   581  	MOVD	R13, RSP
   582  	MOVD	R4, 0(RSP)	// save old g on stack
   583  	MOVD	(g_stack+stack_hi)(R4), R4
   584  	SUB	R2, R4
   585  	MOVD	R4, 8(RSP)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   586  	BL	(R1)
   587  	MOVD	R0, R9
   588  
   589  	// Restore g, stack pointer.  R0 is errno, so don't touch it
   590  	MOVD	0(RSP), g
   591  	BL	runtime·save_g(SB)
   592  	MOVD	(g_stack+stack_hi)(g), R5
   593  	MOVD	8(RSP), R6
   594  	SUB	R6, R5
   595  	MOVD	R9, R0
   596  	MOVD	R5, RSP
   597  
   598  	MOVW	R0, ret+16(FP)
   599  	RET
   600  
   601  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   602  // Turn the fn into a Go func (by taking its address) and call
   603  // cgocallback_gofunc.
   604  TEXT runtime·cgocallback(SB),NOSPLIT,$24-24
   605  	MOVD	$fn+0(FP), R0
   606  	MOVD	R0, 8(RSP)
   607  	MOVD	frame+8(FP), R0
   608  	MOVD	R0, 16(RSP)
   609  	MOVD	framesize+16(FP), R0
   610  	MOVD	R0, 24(RSP)
   611  	MOVD	$runtime·cgocallback_gofunc(SB), R0
   612  	BL	(R0)
   613  	RET
   614  
   615  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   616  // See cgocall.go for more details.
   617  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$24-24
   618  	NO_LOCAL_POINTERS
   619  
   620  	// Load g from thread-local storage.
   621  	MOVB	runtime·iscgo(SB), R3
   622  	CMP	$0, R3
   623  	BEQ	nocgo
   624  	BL	runtime·load_g(SB)
   625  nocgo:
   626  
   627  	// If g is nil, Go did not create the current thread.
   628  	// Call needm to obtain one for temporary use.
   629  	// In this case, we're running on the thread stack, so there's
   630  	// lots of space, but the linker doesn't know. Hide the call from
   631  	// the linker analysis by using an indirect call.
   632  	CMP	$0, g
   633  	BNE	havem
   634  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   635  	MOVD	$runtime·needm(SB), R0
   636  	BL	(R0)
   637  
   638  	// Set m->sched.sp = SP, so that if a panic happens
   639  	// during the function we are about to execute, it will
   640  	// have a valid SP to run on the g0 stack.
   641  	// The next few lines (after the havem label)
   642  	// will save this SP onto the stack and then write
   643  	// the same SP back to m->sched.sp. That seems redundant,
   644  	// but if an unrecovered panic happens, unwindm will
   645  	// restore the g->sched.sp from the stack location
   646  	// and then systemstack will try to use it. If we don't set it here,
   647  	// that restored SP will be uninitialized (typically 0) and
   648  	// will not be usable.
   649  	MOVD	g_m(g), R8
   650  	MOVD	m_g0(R8), R3
   651  	MOVD	RSP, R0
   652  	MOVD	R0, (g_sched+gobuf_sp)(R3)
   653  
   654  havem:
   655  	MOVD	g_m(g), R8
   656  	MOVD	R8, savedm-8(SP)
   657  	// Now there's a valid m, and we're running on its m->g0.
   658  	// Save current m->g0->sched.sp on stack and then set it to SP.
   659  	// Save current sp in m->g0->sched.sp in preparation for
   660  	// switch back to m->curg stack.
   661  	// NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
   662  	// Beware that the frame size is actually 32.
   663  	MOVD	m_g0(R8), R3
   664  	MOVD	(g_sched+gobuf_sp)(R3), R4
   665  	MOVD	R4, savedsp-16(SP)
   666  	MOVD	RSP, R0
   667  	MOVD	R0, (g_sched+gobuf_sp)(R3)
   668  
   669  	// Switch to m->curg stack and call runtime.cgocallbackg.
   670  	// Because we are taking over the execution of m->curg
   671  	// but *not* resuming what had been running, we need to
   672  	// save that information (m->curg->sched) so we can restore it.
   673  	// We can restore m->curg->sched.sp easily, because calling
   674  	// runtime.cgocallbackg leaves SP unchanged upon return.
   675  	// To save m->curg->sched.pc, we push it onto the stack.
   676  	// This has the added benefit that it looks to the traceback
   677  	// routine like cgocallbackg is going to return to that
   678  	// PC (because the frame we allocate below has the same
   679  	// size as cgocallback_gofunc's frame declared above)
   680  	// so that the traceback will seamlessly trace back into
   681  	// the earlier calls.
   682  	//
   683  	// In the new goroutine, -16(SP) and -8(SP) are unused.
   684  	MOVD	m_curg(R8), g
   685  	BL	runtime·save_g(SB)
   686  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   687  	MOVD	(g_sched+gobuf_pc)(g), R5
   688  	MOVD	R5, -(24+8)(R4)	// maintain 16-byte SP alignment
   689  	MOVD	$-(24+8)(R4), R0
   690  	MOVD	R0, RSP
   691  	BL	runtime·cgocallbackg(SB)
   692  
   693  	// Restore g->sched (== m->curg->sched) from saved values.
   694  	MOVD	0(RSP), R5
   695  	MOVD	R5, (g_sched+gobuf_pc)(g)
   696  	MOVD	RSP, R4
   697  	ADD	$(24+8), R4, R4
   698  	MOVD	R4, (g_sched+gobuf_sp)(g)
   699  
   700  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   701  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   702  	// so we do not have to restore it.)
   703  	MOVD	g_m(g), R8
   704  	MOVD	m_g0(R8), g
   705  	BL	runtime·save_g(SB)
   706  	MOVD	(g_sched+gobuf_sp)(g), R0
   707  	MOVD	R0, RSP
   708  	MOVD	savedsp-16(SP), R4
   709  	MOVD	R4, (g_sched+gobuf_sp)(g)
   710  
   711  	// If the m on entry was nil, we called needm above to borrow an m
   712  	// for the duration of the call. Since the call is over, return it with dropm.
   713  	MOVD	savedm-8(SP), R6
   714  	CMP	$0, R6
   715  	BNE	droppedm
   716  	MOVD	$runtime·dropm(SB), R0
   717  	BL	(R0)
   718  droppedm:
   719  
   720  	// Done!
   721  	RET
   722  
   723  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   724  // Must obey the gcc calling convention.
   725  TEXT _cgo_topofstack(SB),NOSPLIT,$24
   726  	// g (R28) and REGTMP (R27)  might be clobbered by load_g. They
   727  	// are callee-save in the gcc calling convention, so save them.
   728  	MOVD	R27, savedR27-8(SP)
   729  	MOVD	g, saveG-16(SP)
   730  
   731  	BL	runtime·load_g(SB)
   732  	MOVD	g_m(g), R0
   733  	MOVD	m_curg(R0), R0
   734  	MOVD	(g_stack+stack_hi)(R0), R0
   735  
   736  	MOVD	saveG-16(SP), g
   737  	MOVD	savedR28-8(SP), R27
   738  	RET
   739  
   740  // void setg(G*); set g. for use by needm.
   741  TEXT runtime·setg(SB), NOSPLIT, $0-8
   742  	MOVD	gg+0(FP), g
   743  	// This only happens if iscgo, so jump straight to save_g
   744  	BL	runtime·save_g(SB)
   745  	RET
   746  
   747  // void setg_gcc(G*); set g called from gcc
   748  TEXT setg_gcc<>(SB),NOSPLIT,$8
   749  	MOVD	R0, g
   750  	MOVD	R27, savedR27-8(SP)
   751  	BL	runtime·save_g(SB)
   752  	MOVD	savedR27-8(SP), R27
   753  	RET
   754  
   755  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
   756  	MOVD	16(RSP), R0		// LR saved by caller
   757  	MOVD	runtime·stackBarrierPC(SB), R1
   758  	CMP	R0, R1
   759  	BNE	nobar
   760  	// Get original return PC.
   761  	BL	runtime·nextBarrierPC(SB)
   762  	MOVD	8(RSP), R0
   763  nobar:
   764  	MOVD	R0, ret+8(FP)
   765  	RET
   766  
   767  TEXT runtime·setcallerpc(SB),NOSPLIT,$8-16
   768  	MOVD	pc+8(FP), R0
   769  	MOVD	16(RSP), R1
   770  	MOVD	runtime·stackBarrierPC(SB), R2
   771  	CMP	R1, R2
   772  	BEQ	setbar
   773  	MOVD	R0, 16(RSP)		// set LR in caller
   774  	RET
   775  setbar:
   776  	// Set the stack barrier return PC.
   777  	MOVD	R0, 8(RSP)
   778  	BL	runtime·setNextBarrierPC(SB)
   779  	RET
   780  
   781  TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
   782  	MOVD	argp+0(FP), R0
   783  	SUB	$8, R0
   784  	MOVD	R0, ret+8(FP)
   785  	RET
   786  
   787  TEXT runtime·abort(SB),NOSPLIT,$-8-0
   788  	B	(ZR)
   789  	UNDEF
   790  
   791  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   792  // redirects to memhash(p, h, size) using the size
   793  // stored in the closure.
   794  TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
   795  	GO_ARGS
   796  	NO_LOCAL_POINTERS
   797  	MOVD	p+0(FP), R3
   798  	MOVD	h+8(FP), R4
   799  	MOVD	8(R26), R5
   800  	MOVD	R3, 8(RSP)
   801  	MOVD	R4, 16(RSP)
   802  	MOVD	R5, 24(RSP)
   803  	BL	runtime·memhash(SB)
   804  	MOVD	32(RSP), R3
   805  	MOVD	R3, ret+16(FP)
   806  	RET
   807  
   808  TEXT runtime·memeq(SB),NOSPLIT,$-8-25
   809  	MOVD	a+0(FP), R1
   810  	MOVD	b+8(FP), R2
   811  	MOVD	size+16(FP), R3
   812  	ADD	R1, R3, R6
   813  	MOVD	$1, R0
   814  	MOVB	R0, ret+24(FP)
   815  loop:
   816  	CMP	R1, R6
   817  	BEQ	done
   818  	MOVBU.P	1(R1), R4
   819  	MOVBU.P	1(R2), R5
   820  	CMP	R4, R5
   821  	BEQ	loop
   822  
   823  	MOVB	$0, ret+24(FP)
   824  done:
   825  	RET
   826  
   827  // memequal_varlen(a, b unsafe.Pointer) bool
   828  TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
   829  	MOVD	a+0(FP), R3
   830  	MOVD	b+8(FP), R4
   831  	CMP	R3, R4
   832  	BEQ	eq
   833  	MOVD	8(R26), R5    // compiler stores size at offset 8 in the closure
   834  	MOVD	R3, 8(RSP)
   835  	MOVD	R4, 16(RSP)
   836  	MOVD	R5, 24(RSP)
   837  	BL	runtime·memeq(SB)
   838  	MOVBU	32(RSP), R3
   839  	MOVB	R3, ret+16(FP)
   840  	RET
   841  eq:
   842  	MOVD	$1, R3
   843  	MOVB	R3, ret+16(FP)
   844  	RET
   845  
   846  TEXT runtime·cmpstring(SB),NOSPLIT,$-4-40
   847  	MOVD	s1_base+0(FP), R2
   848  	MOVD	s1_len+8(FP), R0
   849  	MOVD	s2_base+16(FP), R3
   850  	MOVD	s2_len+24(FP), R1
   851  	ADD	$40, RSP, R7
   852  	B	runtime·cmpbody<>(SB)
   853  
   854  TEXT bytes·Compare(SB),NOSPLIT,$-4-56
   855  	MOVD	s1+0(FP), R2
   856  	MOVD	s1+8(FP), R0
   857  	MOVD	s2+24(FP), R3
   858  	MOVD	s2+32(FP), R1
   859  	ADD	$56, RSP, R7
   860  	B	runtime·cmpbody<>(SB)
   861  
   862  // On entry:
   863  // R0 is the length of s1
   864  // R1 is the length of s2
   865  // R2 points to the start of s1
   866  // R3 points to the start of s2
   867  // R7 points to return value (-1/0/1 will be written here)
   868  //
   869  // On exit:
   870  // R4, R5, and R6 are clobbered
   871  TEXT runtime·cmpbody<>(SB),NOSPLIT,$-4-0
   872  	CMP	R2, R3
   873  	BEQ	samebytes // same starting pointers; compare lengths
   874  	CMP	R0, R1
   875  	CSEL    LT, R1, R0, R6 // R6 is min(R0, R1)
   876  
   877  	ADD	R2, R6	// R2 is current byte in s1, R6 is last byte in s1 to compare
   878  loop:
   879  	CMP	R2, R6
   880  	BEQ	samebytes // all compared bytes were the same; compare lengths
   881  	MOVBU.P	1(R2), R4
   882  	MOVBU.P	1(R3), R5
   883  	CMP	R4, R5
   884  	BEQ	loop
   885  	// bytes differed
   886  	MOVD	$1, R4
   887  	CSNEG	LT, R4, R4, R4
   888  	MOVD	R4, (R7)
   889  	RET
   890  samebytes:
   891  	MOVD	$1, R4
   892  	CMP	R0, R1
   893  	CSNEG	LT, R4, R4, R4
   894  	CSEL	EQ, ZR, R4, R4
   895  	MOVD	R4, (R7)
   896  	RET
   897  
   898  // eqstring tests whether two strings are equal.
   899  // The compiler guarantees that strings passed
   900  // to eqstring have equal length.
   901  // See runtime_test.go:eqstring_generic for
   902  // equivalent Go code.
   903  TEXT runtime·eqstring(SB),NOSPLIT,$0-33
   904  	MOVD	s1str+0(FP), R0
   905  	MOVD	s1len+8(FP), R1
   906  	MOVD	s2str+16(FP), R2
   907  	ADD	R0, R1		// end
   908  loop:
   909  	CMP	R0, R1
   910  	BEQ	equal		// reaches the end
   911  	MOVBU.P	1(R0), R4
   912  	MOVBU.P	1(R2), R5
   913  	CMP	R4, R5
   914  	BEQ	loop
   915  notequal:
   916  	MOVB	ZR, ret+32(FP)
   917  	RET
   918  equal:
   919  	MOVD	$1, R0
   920  	MOVB	R0, ret+32(FP)
   921  	RET
   922  
   923  //
   924  // functions for other packages
   925  //
   926  TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
   927  	MOVD	b+0(FP), R0
   928  	MOVD	b_len+8(FP), R1
   929  	MOVBU	c+24(FP), R2	// byte to find
   930  	MOVD	R0, R4		// store base for later
   931  	ADD	R0, R1		// end
   932  loop:
   933  	CMP	R0, R1
   934  	BEQ	notfound
   935  	MOVBU.P	1(R0), R3
   936  	CMP	R2, R3
   937  	BNE	loop
   938  
   939  	SUB	$1, R0		// R0 will be one beyond the position we want
   940  	SUB	R4, R0		// remove base
   941  	MOVD	R0, ret+32(FP)
   942  	RET
   943  
   944  notfound:
   945  	MOVD	$-1, R0
   946  	MOVD	R0, ret+32(FP)
   947  	RET
   948  
   949  TEXT strings·IndexByte(SB),NOSPLIT,$0-32
   950  	MOVD	s+0(FP), R0
   951  	MOVD	s_len+8(FP), R1
   952  	MOVBU	c+16(FP), R2	// byte to find
   953  	MOVD	R0, R4		// store base for later
   954  	ADD	R0, R1		// end
   955  loop:
   956  	CMP	R0, R1
   957  	BEQ	notfound
   958  	MOVBU.P	1(R0), R3
   959  	CMP	R2, R3
   960  	BNE	loop
   961  
   962  	SUB	$1, R0		// R0 will be one beyond the position we want
   963  	SUB	R4, R0		// remove base
   964  	MOVD	R0, ret+24(FP)
   965  	RET
   966  
   967  notfound:
   968  	MOVD	$-1, R0
   969  	MOVD	R0, ret+24(FP)
   970  	RET
   971  
   972  // TODO: share code with memeq?
   973  TEXT bytes·Equal(SB),NOSPLIT,$0-49
   974  	MOVD	a_len+8(FP), R1
   975  	MOVD	b_len+32(FP), R3
   976  	CMP	R1, R3		// unequal lengths are not equal
   977  	BNE	notequal
   978  	MOVD	a+0(FP), R0
   979  	MOVD	b+24(FP), R2
   980  	ADD	R0, R1		// end
   981  loop:
   982  	CMP	R0, R1
   983  	BEQ	equal		// reaches the end
   984  	MOVBU.P	1(R0), R4
   985  	MOVBU.P	1(R2), R5
   986  	CMP	R4, R5
   987  	BEQ	loop
   988  notequal:
   989  	MOVB	ZR, ret+48(FP)
   990  	RET
   991  equal:
   992  	MOVD	$1, R0
   993  	MOVB	R0, ret+48(FP)
   994  	RET
   995  
   996  TEXT runtime·fastrand1(SB),NOSPLIT,$-8-4
   997  	MOVD	g_m(g), R1
   998  	MOVWU	m_fastrand(R1), R0
   999  	ADD	R0, R0
  1000  	CMPW	$0, R0
  1001  	BGE	notneg
  1002  	EOR	$0x88888eef, R0
  1003  notneg:
  1004  	MOVW	R0, m_fastrand(R1)
  1005  	MOVW	R0, ret+0(FP)
  1006  	RET
  1007  
  1008  TEXT runtime·return0(SB), NOSPLIT, $0
  1009  	MOVW	$0, R0
  1010  	RET
  1011  
  1012  // The top-most function running on a goroutine
  1013  // returns to goexit+PCQuantum.
  1014  TEXT runtime·goexit(SB),NOSPLIT,$-8-0
  1015  	MOVD	R0, R0	// NOP
  1016  	BL	runtime·goexit1(SB)	// does not return
  1017  
  1018  // TODO(aram): use PRFM here.
  1019  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
  1020  	RET
  1021  
  1022  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
  1023  	RET
  1024  
  1025  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
  1026  	RET
  1027  
  1028  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
  1029  	RET
  1030  
  1031  TEXT runtime·sigreturn(SB),NOSPLIT,$0-8
  1032          RET