github.com/s1s1ty/go@v0.0.0-20180207192209-104445e3140f/src/runtime/asm_arm64.s (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "tls_arm64.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  
    11  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    12  	// SP = stack; R0 = argc; R1 = argv
    13  
    14  	SUB	$32, RSP
    15  	MOVW	R0, 8(RSP) // argc
    16  	MOVD	R1, 16(RSP) // argv
    17  
    18  	// create istack out of the given (operating system) stack.
    19  	// _cgo_init may update stackguard.
    20  	MOVD	$runtime·g0(SB), g
    21  	MOVD RSP, R7
    22  	MOVD	$(-64*1024)(R7), R0
    23  	MOVD	R0, g_stackguard0(g)
    24  	MOVD	R0, g_stackguard1(g)
    25  	MOVD	R0, (g_stack+stack_lo)(g)
    26  	MOVD	R7, (g_stack+stack_hi)(g)
    27  
    28  	// if there is a _cgo_init, call it using the gcc ABI.
    29  	MOVD	_cgo_init(SB), R12
    30  	CMP	$0, R12
    31  	BEQ	nocgo
    32  
    33  	MRS_TPIDR_R0			// load TLS base pointer
    34  	MOVD	R0, R3			// arg 3: TLS base pointer
    35  #ifdef TLSG_IS_VARIABLE
    36  	MOVD	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
    37  #else
    38  	MOVD	$0, R2		        // arg 2: not used when using platform's TLS
    39  #endif
    40  	MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
    41  	MOVD	g, R0			// arg 0: G
    42  	BL	(R12)
    43  	MOVD	_cgo_init(SB), R12
    44  	CMP	$0, R12
    45  	BEQ	nocgo
    46  
    47  nocgo:
    48  	// update stackguard after _cgo_init
    49  	MOVD	(g_stack+stack_lo)(g), R0
    50  	ADD	$const__StackGuard, R0
    51  	MOVD	R0, g_stackguard0(g)
    52  	MOVD	R0, g_stackguard1(g)
    53  
    54  	// set the per-goroutine and per-mach "registers"
    55  	MOVD	$runtime·m0(SB), R0
    56  
    57  	// save m->g0 = g0
    58  	MOVD	g, m_g0(R0)
    59  	// save m0 to g0->m
    60  	MOVD	R0, g_m(g)
    61  
    62  	BL	runtime·check(SB)
    63  
    64  	MOVW	8(RSP), R0	// copy argc
    65  	MOVW	R0, -8(RSP)
    66  	MOVD	16(RSP), R0		// copy argv
    67  	MOVD	R0, 0(RSP)
    68  	BL	runtime·args(SB)
    69  	BL	runtime·osinit(SB)
    70  	BL	runtime·schedinit(SB)
    71  
    72  	// create a new goroutine to start program
    73  	MOVD	$runtime·mainPC(SB), R0		// entry
    74  	MOVD	RSP, R7
    75  	MOVD.W	$0, -8(R7)
    76  	MOVD.W	R0, -8(R7)
    77  	MOVD.W	$0, -8(R7)
    78  	MOVD.W	$0, -8(R7)
    79  	MOVD	R7, RSP
    80  	BL	runtime·newproc(SB)
    81  	ADD	$32, RSP
    82  
    83  	// start this M
    84  	BL	runtime·mstart(SB)
    85  
    86  	MOVD	$0, R0
    87  	MOVD	R0, (R0)	// boom
    88  	UNDEF
    89  
    90  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    91  GLOBL	runtime·mainPC(SB),RODATA,$8
    92  
    93  TEXT runtime·breakpoint(SB),NOSPLIT,$-8-0
    94  	BRK
    95  	RET
    96  
    97  TEXT runtime·asminit(SB),NOSPLIT,$-8-0
    98  	RET
    99  
   100  /*
   101   *  go-routine
   102   */
   103  
   104  // void gosave(Gobuf*)
   105  // save state in Gobuf; setjmp
   106  TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   107  	MOVD	buf+0(FP), R3
   108  	MOVD	RSP, R0
   109  	MOVD	R0, gobuf_sp(R3)
   110  	MOVD	LR, gobuf_pc(R3)
   111  	MOVD	g, gobuf_g(R3)
   112  	MOVD	ZR, gobuf_lr(R3)
   113  	MOVD	ZR, gobuf_ret(R3)
   114  	// Assert ctxt is zero. See func save.
   115  	MOVD	gobuf_ctxt(R3), R0
   116  	CMP	$0, R0
   117  	BEQ	2(PC)
   118  	CALL	runtime·badctxt(SB)
   119  	RET
   120  
   121  // void gogo(Gobuf*)
   122  // restore state from Gobuf; longjmp
   123  TEXT runtime·gogo(SB), NOSPLIT, $24-8
   124  	MOVD	buf+0(FP), R5
   125  	MOVD	gobuf_g(R5), g
   126  	BL	runtime·save_g(SB)
   127  
   128  	MOVD	0(g), R4	// make sure g is not nil
   129  	MOVD	gobuf_sp(R5), R0
   130  	MOVD	R0, RSP
   131  	MOVD	gobuf_lr(R5), LR
   132  	MOVD	gobuf_ret(R5), R0
   133  	MOVD	gobuf_ctxt(R5), R26
   134  	MOVD	$0, gobuf_sp(R5)
   135  	MOVD	$0, gobuf_ret(R5)
   136  	MOVD	$0, gobuf_lr(R5)
   137  	MOVD	$0, gobuf_ctxt(R5)
   138  	CMP	ZR, ZR // set condition codes for == test, needed by stack split
   139  	MOVD	gobuf_pc(R5), R6
   140  	B	(R6)
   141  
   142  // void mcall(fn func(*g))
   143  // Switch to m->g0's stack, call fn(g).
   144  // Fn must never return. It should gogo(&g->sched)
   145  // to keep running g.
   146  TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   147  	// Save caller state in g->sched
   148  	MOVD	RSP, R0
   149  	MOVD	R0, (g_sched+gobuf_sp)(g)
   150  	MOVD	LR, (g_sched+gobuf_pc)(g)
   151  	MOVD	$0, (g_sched+gobuf_lr)(g)
   152  	MOVD	g, (g_sched+gobuf_g)(g)
   153  
   154  	// Switch to m->g0 & its stack, call fn.
   155  	MOVD	g, R3
   156  	MOVD	g_m(g), R8
   157  	MOVD	m_g0(R8), g
   158  	BL	runtime·save_g(SB)
   159  	CMP	g, R3
   160  	BNE	2(PC)
   161  	B	runtime·badmcall(SB)
   162  	MOVD	fn+0(FP), R26			// context
   163  	MOVD	0(R26), R4			// code pointer
   164  	MOVD	(g_sched+gobuf_sp)(g), R0
   165  	MOVD	R0, RSP	// sp = m->g0->sched.sp
   166  	MOVD	R3, -8(RSP)
   167  	MOVD	$0, -16(RSP)
   168  	SUB	$16, RSP
   169  	BL	(R4)
   170  	B	runtime·badmcall2(SB)
   171  
   172  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   173  // of the G stack. We need to distinguish the routine that
   174  // lives at the bottom of the G stack from the one that lives
   175  // at the top of the system stack because the one at the top of
   176  // the system stack terminates the stack walk (see topofstack()).
   177  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   178  	UNDEF
   179  	BL	(LR)	// make sure this function is not leaf
   180  	RET
   181  
   182  // func systemstack(fn func())
   183  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   184  	MOVD	fn+0(FP), R3	// R3 = fn
   185  	MOVD	R3, R26		// context
   186  	MOVD	g_m(g), R4	// R4 = m
   187  
   188  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   189  	CMP	g, R5
   190  	BEQ	noswitch
   191  
   192  	MOVD	m_g0(R4), R5	// R5 = g0
   193  	CMP	g, R5
   194  	BEQ	noswitch
   195  
   196  	MOVD	m_curg(R4), R6
   197  	CMP	g, R6
   198  	BEQ	switch
   199  
   200  	// Bad: g is not gsignal, not g0, not curg. What is it?
   201  	// Hide call from linker nosplit analysis.
   202  	MOVD	$runtime·badsystemstack(SB), R3
   203  	BL	(R3)
   204  
   205  switch:
   206  	// save our state in g->sched. Pretend to
   207  	// be systemstack_switch if the G stack is scanned.
   208  	MOVD	$runtime·systemstack_switch(SB), R6
   209  	ADD	$8, R6	// get past prologue
   210  	MOVD	R6, (g_sched+gobuf_pc)(g)
   211  	MOVD	RSP, R0
   212  	MOVD	R0, (g_sched+gobuf_sp)(g)
   213  	MOVD	$0, (g_sched+gobuf_lr)(g)
   214  	MOVD	g, (g_sched+gobuf_g)(g)
   215  
   216  	// switch to g0
   217  	MOVD	R5, g
   218  	BL	runtime·save_g(SB)
   219  	MOVD	(g_sched+gobuf_sp)(g), R3
   220  	// make it look like mstart called systemstack on g0, to stop traceback
   221  	SUB	$16, R3
   222  	AND	$~15, R3
   223  	MOVD	$runtime·mstart(SB), R4
   224  	MOVD	R4, 0(R3)
   225  	MOVD	R3, RSP
   226  
   227  	// call target function
   228  	MOVD	0(R26), R3	// code pointer
   229  	BL	(R3)
   230  
   231  	// switch back to g
   232  	MOVD	g_m(g), R3
   233  	MOVD	m_curg(R3), g
   234  	BL	runtime·save_g(SB)
   235  	MOVD	(g_sched+gobuf_sp)(g), R0
   236  	MOVD	R0, RSP
   237  	MOVD	$0, (g_sched+gobuf_sp)(g)
   238  	RET
   239  
   240  noswitch:
   241  	// already on m stack, just call directly
   242  	// Using a tail call here cleans up tracebacks since we won't stop
   243  	// at an intermediate systemstack.
   244  	MOVD	0(R26), R3	// code pointer
   245  	MOVD.P	16(RSP), R30	// restore LR
   246  	B	(R3)
   247  
   248  /*
   249   * support for morestack
   250   */
   251  
   252  // Called during function prolog when more stack is needed.
   253  // Caller has already loaded:
   254  // R3 prolog's LR (R30)
   255  //
   256  // The traceback routines see morestack on a g0 as being
   257  // the top of a stack (for example, morestack calling newstack
   258  // calling the scheduler calling newm calling gc), so we must
   259  // record an argument size. For that purpose, it has no arguments.
   260  TEXT runtime·morestack(SB),NOSPLIT,$-8-0
   261  	// Cannot grow scheduler stack (m->g0).
   262  	MOVD	g_m(g), R8
   263  	MOVD	m_g0(R8), R4
   264  	CMP	g, R4
   265  	BNE	3(PC)
   266  	BL	runtime·badmorestackg0(SB)
   267  	B	runtime·abort(SB)
   268  
   269  	// Cannot grow signal stack (m->gsignal).
   270  	MOVD	m_gsignal(R8), R4
   271  	CMP	g, R4
   272  	BNE	3(PC)
   273  	BL	runtime·badmorestackgsignal(SB)
   274  	B	runtime·abort(SB)
   275  
   276  	// Called from f.
   277  	// Set g->sched to context in f
   278  	MOVD	RSP, R0
   279  	MOVD	R0, (g_sched+gobuf_sp)(g)
   280  	MOVD	LR, (g_sched+gobuf_pc)(g)
   281  	MOVD	R3, (g_sched+gobuf_lr)(g)
   282  	MOVD	R26, (g_sched+gobuf_ctxt)(g)
   283  
   284  	// Called from f.
   285  	// Set m->morebuf to f's callers.
   286  	MOVD	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
   287  	MOVD	RSP, R0
   288  	MOVD	R0, (m_morebuf+gobuf_sp)(R8)	// f's caller's RSP
   289  	MOVD	g, (m_morebuf+gobuf_g)(R8)
   290  
   291  	// Call newstack on m->g0's stack.
   292  	MOVD	m_g0(R8), g
   293  	BL	runtime·save_g(SB)
   294  	MOVD	(g_sched+gobuf_sp)(g), R0
   295  	MOVD	R0, RSP
   296  	MOVD.W	$0, -16(RSP)	// create a call frame on g0 (saved LR; keep 16-aligned)
   297  	BL	runtime·newstack(SB)
   298  
   299  	// Not reached, but make sure the return PC from the call to newstack
   300  	// is still in this function, and not the beginning of the next.
   301  	UNDEF
   302  
   303  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-4-0
   304  	MOVW	$0, R26
   305  	B runtime·morestack(SB)
   306  
   307  // reflectcall: call a function with the given argument list
   308  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   309  // we don't have variable-sized frames, so we use a small number
   310  // of constant-sized-frame functions to encode a few bits of size in the pc.
   311  // Caution: ugly multiline assembly macros in your future!
   312  
   313  #define DISPATCH(NAME,MAXSIZE)		\
   314  	MOVD	$MAXSIZE, R27;		\
   315  	CMP	R27, R16;		\
   316  	BGT	3(PC);			\
   317  	MOVD	$NAME(SB), R27;	\
   318  	B	(R27)
   319  // Note: can't just "B NAME(SB)" - bad inlining results.
   320  
   321  TEXT reflect·call(SB), NOSPLIT, $0-0
   322  	B	·reflectcall(SB)
   323  
   324  TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   325  	MOVWU argsize+24(FP), R16
   326  	DISPATCH(runtime·call32, 32)
   327  	DISPATCH(runtime·call64, 64)
   328  	DISPATCH(runtime·call128, 128)
   329  	DISPATCH(runtime·call256, 256)
   330  	DISPATCH(runtime·call512, 512)
   331  	DISPATCH(runtime·call1024, 1024)
   332  	DISPATCH(runtime·call2048, 2048)
   333  	DISPATCH(runtime·call4096, 4096)
   334  	DISPATCH(runtime·call8192, 8192)
   335  	DISPATCH(runtime·call16384, 16384)
   336  	DISPATCH(runtime·call32768, 32768)
   337  	DISPATCH(runtime·call65536, 65536)
   338  	DISPATCH(runtime·call131072, 131072)
   339  	DISPATCH(runtime·call262144, 262144)
   340  	DISPATCH(runtime·call524288, 524288)
   341  	DISPATCH(runtime·call1048576, 1048576)
   342  	DISPATCH(runtime·call2097152, 2097152)
   343  	DISPATCH(runtime·call4194304, 4194304)
   344  	DISPATCH(runtime·call8388608, 8388608)
   345  	DISPATCH(runtime·call16777216, 16777216)
   346  	DISPATCH(runtime·call33554432, 33554432)
   347  	DISPATCH(runtime·call67108864, 67108864)
   348  	DISPATCH(runtime·call134217728, 134217728)
   349  	DISPATCH(runtime·call268435456, 268435456)
   350  	DISPATCH(runtime·call536870912, 536870912)
   351  	DISPATCH(runtime·call1073741824, 1073741824)
   352  	MOVD	$runtime·badreflectcall(SB), R0
   353  	B	(R0)
   354  
   355  #define CALLFN(NAME,MAXSIZE)			\
   356  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   357  	NO_LOCAL_POINTERS;			\
   358  	/* copy arguments to stack */		\
   359  	MOVD	arg+16(FP), R3;			\
   360  	MOVWU	argsize+24(FP), R4;		\
   361  	ADD	$8, RSP, R5;			\
   362  	BIC	$0xf, R4, R6;			\
   363  	CBZ	R6, 6(PC);			\
   364  	/* if R6=(argsize&~15) != 0 */		\
   365  	ADD	R6, R5, R6;			\
   366  	/* copy 16 bytes a time */		\
   367  	LDP.P	16(R3), (R7, R8);		\
   368  	STP.P	(R7, R8), 16(R5);		\
   369  	CMP	R5, R6;				\
   370  	BNE	-3(PC);				\
   371  	AND	$0xf, R4, R6;			\
   372  	CBZ	R6, 6(PC);			\
   373  	/* if R6=(argsize&15) != 0 */		\
   374  	ADD	R6, R5, R6;			\
   375  	/* copy 1 byte a time for the rest */	\
   376  	MOVBU.P	1(R3), R7;			\
   377  	MOVBU.P	R7, 1(R5);			\
   378  	CMP	R5, R6;				\
   379  	BNE	-3(PC);				\
   380  	/* call function */			\
   381  	MOVD	f+8(FP), R26;			\
   382  	MOVD	(R26), R0;			\
   383  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   384  	BL	(R0);				\
   385  	/* copy return values back */		\
   386  	MOVD	argtype+0(FP), R7;		\
   387  	MOVD	arg+16(FP), R3;			\
   388  	MOVWU	n+24(FP), R4;			\
   389  	MOVWU	retoffset+28(FP), R6;		\
   390  	ADD	$8, RSP, R5;			\
   391  	ADD	R6, R5; 			\
   392  	ADD	R6, R3;				\
   393  	SUB	R6, R4;				\
   394  	BL	callRet<>(SB);			\
   395  	RET
   396  
   397  // callRet copies return values back at the end of call*. This is a
   398  // separate function so it can allocate stack space for the arguments
   399  // to reflectcallmove. It does not follow the Go ABI; it expects its
   400  // arguments in registers.
   401  TEXT callRet<>(SB), NOSPLIT, $40-0
   402  	MOVD	R7, 8(RSP)
   403  	MOVD	R3, 16(RSP)
   404  	MOVD	R5, 24(RSP)
   405  	MOVD	R4, 32(RSP)
   406  	BL	runtime·reflectcallmove(SB)
   407  	RET
   408  
   409  // These have 8 added to make the overall frame size a multiple of 16,
   410  // as required by the ABI. (There is another +8 for the saved LR.)
   411  CALLFN(·call32, 40 )
   412  CALLFN(·call64, 72 )
   413  CALLFN(·call128, 136 )
   414  CALLFN(·call256, 264 )
   415  CALLFN(·call512, 520 )
   416  CALLFN(·call1024, 1032 )
   417  CALLFN(·call2048, 2056 )
   418  CALLFN(·call4096, 4104 )
   419  CALLFN(·call8192, 8200 )
   420  CALLFN(·call16384, 16392 )
   421  CALLFN(·call32768, 32776 )
   422  CALLFN(·call65536, 65544 )
   423  CALLFN(·call131072, 131080 )
   424  CALLFN(·call262144, 262152 )
   425  CALLFN(·call524288, 524296 )
   426  CALLFN(·call1048576, 1048584 )
   427  CALLFN(·call2097152, 2097160 )
   428  CALLFN(·call4194304, 4194312 )
   429  CALLFN(·call8388608, 8388616 )
   430  CALLFN(·call16777216, 16777224 )
   431  CALLFN(·call33554432, 33554440 )
   432  CALLFN(·call67108864, 67108872 )
   433  CALLFN(·call134217728, 134217736 )
   434  CALLFN(·call268435456, 268435464 )
   435  CALLFN(·call536870912, 536870920 )
   436  CALLFN(·call1073741824, 1073741832 )
   437  
   438  // AES hashing not implemented for ARM64, issue #10109.
   439  TEXT runtime·aeshash(SB),NOSPLIT,$-8-0
   440  	MOVW	$0, R0
   441  	MOVW	(R0), R1
   442  TEXT runtime·aeshash32(SB),NOSPLIT,$-8-0
   443  	MOVW	$0, R0
   444  	MOVW	(R0), R1
   445  TEXT runtime·aeshash64(SB),NOSPLIT,$-8-0
   446  	MOVW	$0, R0
   447  	MOVW	(R0), R1
   448  TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0
   449  	MOVW	$0, R0
   450  	MOVW	(R0), R1
   451  	
   452  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   453  	MOVWU	cycles+0(FP), R0
   454  again:
   455  	YIELD
   456  	SUBW	$1, R0
   457  	CBNZ	R0, again
   458  	RET
   459  
   460  // void jmpdefer(fv, sp);
   461  // called from deferreturn.
   462  // 1. grab stored LR for caller
   463  // 2. sub 4 bytes to get back to BL deferreturn
   464  // 3. BR to fn
   465  TEXT runtime·jmpdefer(SB), NOSPLIT, $-8-16
   466  	MOVD	0(RSP), R0
   467  	SUB	$4, R0
   468  	MOVD	R0, LR
   469  
   470  	MOVD	fv+0(FP), R26
   471  	MOVD	argp+8(FP), R0
   472  	MOVD	R0, RSP
   473  	SUB	$8, RSP
   474  	MOVD	0(R26), R3
   475  	B	(R3)
   476  
   477  // Save state of caller into g->sched. Smashes R0.
   478  TEXT gosave<>(SB),NOSPLIT,$-8
   479  	MOVD	LR, (g_sched+gobuf_pc)(g)
   480  	MOVD RSP, R0
   481  	MOVD	R0, (g_sched+gobuf_sp)(g)
   482  	MOVD	$0, (g_sched+gobuf_lr)(g)
   483  	MOVD	$0, (g_sched+gobuf_ret)(g)
   484  	// Assert ctxt is zero. See func save.
   485  	MOVD	(g_sched+gobuf_ctxt)(g), R0
   486  	CMP	$0, R0
   487  	BEQ	2(PC)
   488  	CALL	runtime·badctxt(SB)
   489  	RET
   490  
   491  // func asmcgocall(fn, arg unsafe.Pointer) int32
   492  // Call fn(arg) on the scheduler stack,
   493  // aligned appropriately for the gcc ABI.
   494  // See cgocall.go for more details.
   495  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   496  	MOVD	fn+0(FP), R1
   497  	MOVD	arg+8(FP), R0
   498  
   499  	MOVD	RSP, R2		// save original stack pointer
   500  	MOVD	g, R4
   501  
   502  	// Figure out if we need to switch to m->g0 stack.
   503  	// We get called to create new OS threads too, and those
   504  	// come in on the m->g0 stack already.
   505  	MOVD	g_m(g), R8
   506  	MOVD	m_g0(R8), R3
   507  	CMP	R3, g
   508  	BEQ	g0
   509  	MOVD	R0, R9	// gosave<> and save_g might clobber R0
   510  	BL	gosave<>(SB)
   511  	MOVD	R3, g
   512  	BL	runtime·save_g(SB)
   513  	MOVD	(g_sched+gobuf_sp)(g), R0
   514  	MOVD	R0, RSP
   515  	MOVD	R9, R0
   516  
   517  	// Now on a scheduling stack (a pthread-created stack).
   518  g0:
   519  	// Save room for two of our pointers /*, plus 32 bytes of callee
   520  	// save area that lives on the caller stack. */
   521  	MOVD	RSP, R13
   522  	SUB	$16, R13
   523  	MOVD	R13, RSP
   524  	MOVD	R4, 0(RSP)	// save old g on stack
   525  	MOVD	(g_stack+stack_hi)(R4), R4
   526  	SUB	R2, R4
   527  	MOVD	R4, 8(RSP)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   528  	BL	(R1)
   529  	MOVD	R0, R9
   530  
   531  	// Restore g, stack pointer. R0 is errno, so don't touch it
   532  	MOVD	0(RSP), g
   533  	BL	runtime·save_g(SB)
   534  	MOVD	(g_stack+stack_hi)(g), R5
   535  	MOVD	8(RSP), R6
   536  	SUB	R6, R5
   537  	MOVD	R9, R0
   538  	MOVD	R5, RSP
   539  
   540  	MOVW	R0, ret+16(FP)
   541  	RET
   542  
   543  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   544  // Turn the fn into a Go func (by taking its address) and call
   545  // cgocallback_gofunc.
   546  TEXT runtime·cgocallback(SB),NOSPLIT,$40-32
   547  	MOVD	$fn+0(FP), R0
   548  	MOVD	R0, 8(RSP)
   549  	MOVD	frame+8(FP), R0
   550  	MOVD	R0, 16(RSP)
   551  	MOVD	framesize+16(FP), R0
   552  	MOVD	R0, 24(RSP)
   553  	MOVD	ctxt+24(FP), R0
   554  	MOVD	R0, 32(RSP)
   555  	MOVD	$runtime·cgocallback_gofunc(SB), R0
   556  	BL	(R0)
   557  	RET
   558  
   559  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   560  // See cgocall.go for more details.
   561  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$24-32
   562  	NO_LOCAL_POINTERS
   563  
   564  	// Load g from thread-local storage.
   565  	MOVB	runtime·iscgo(SB), R3
   566  	CMP	$0, R3
   567  	BEQ	nocgo
   568  	BL	runtime·load_g(SB)
   569  nocgo:
   570  
   571  	// If g is nil, Go did not create the current thread.
   572  	// Call needm to obtain one for temporary use.
   573  	// In this case, we're running on the thread stack, so there's
   574  	// lots of space, but the linker doesn't know. Hide the call from
   575  	// the linker analysis by using an indirect call.
   576  	CMP	$0, g
   577  	BEQ	needm
   578  
   579  	MOVD	g_m(g), R8
   580  	MOVD	R8, savedm-8(SP)
   581  	B	havem
   582  
   583  needm:
   584  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   585  	MOVD	$runtime·needm(SB), R0
   586  	BL	(R0)
   587  
   588  	// Set m->sched.sp = SP, so that if a panic happens
   589  	// during the function we are about to execute, it will
   590  	// have a valid SP to run on the g0 stack.
   591  	// The next few lines (after the havem label)
   592  	// will save this SP onto the stack and then write
   593  	// the same SP back to m->sched.sp. That seems redundant,
   594  	// but if an unrecovered panic happens, unwindm will
   595  	// restore the g->sched.sp from the stack location
   596  	// and then systemstack will try to use it. If we don't set it here,
   597  	// that restored SP will be uninitialized (typically 0) and
   598  	// will not be usable.
   599  	MOVD	g_m(g), R8
   600  	MOVD	m_g0(R8), R3
   601  	MOVD	RSP, R0
   602  	MOVD	R0, (g_sched+gobuf_sp)(R3)
   603  
   604  havem:
   605  	// Now there's a valid m, and we're running on its m->g0.
   606  	// Save current m->g0->sched.sp on stack and then set it to SP.
   607  	// Save current sp in m->g0->sched.sp in preparation for
   608  	// switch back to m->curg stack.
   609  	// NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
   610  	// Beware that the frame size is actually 32.
   611  	MOVD	m_g0(R8), R3
   612  	MOVD	(g_sched+gobuf_sp)(R3), R4
   613  	MOVD	R4, savedsp-16(SP)
   614  	MOVD	RSP, R0
   615  	MOVD	R0, (g_sched+gobuf_sp)(R3)
   616  
   617  	// Switch to m->curg stack and call runtime.cgocallbackg.
   618  	// Because we are taking over the execution of m->curg
   619  	// but *not* resuming what had been running, we need to
   620  	// save that information (m->curg->sched) so we can restore it.
   621  	// We can restore m->curg->sched.sp easily, because calling
   622  	// runtime.cgocallbackg leaves SP unchanged upon return.
   623  	// To save m->curg->sched.pc, we push it onto the stack.
   624  	// This has the added benefit that it looks to the traceback
   625  	// routine like cgocallbackg is going to return to that
   626  	// PC (because the frame we allocate below has the same
   627  	// size as cgocallback_gofunc's frame declared above)
   628  	// so that the traceback will seamlessly trace back into
   629  	// the earlier calls.
   630  	//
   631  	// In the new goroutine, -8(SP) is unused (where SP refers to
   632  	// m->curg's SP while we're setting it up, before we've adjusted it).
   633  	MOVD	m_curg(R8), g
   634  	BL	runtime·save_g(SB)
   635  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   636  	MOVD	(g_sched+gobuf_pc)(g), R5
   637  	MOVD	R5, -(24+8)(R4)
   638  	MOVD	ctxt+24(FP), R0
   639  	MOVD	R0, -(16+8)(R4)
   640  	MOVD	$-(24+8)(R4), R0 // maintain 16-byte SP alignment
   641  	MOVD	R0, RSP
   642  	BL	runtime·cgocallbackg(SB)
   643  
   644  	// Restore g->sched (== m->curg->sched) from saved values.
   645  	MOVD	0(RSP), R5
   646  	MOVD	R5, (g_sched+gobuf_pc)(g)
   647  	MOVD	RSP, R4
   648  	ADD	$(24+8), R4, R4
   649  	MOVD	R4, (g_sched+gobuf_sp)(g)
   650  
   651  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   652  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   653  	// so we do not have to restore it.)
   654  	MOVD	g_m(g), R8
   655  	MOVD	m_g0(R8), g
   656  	BL	runtime·save_g(SB)
   657  	MOVD	(g_sched+gobuf_sp)(g), R0
   658  	MOVD	R0, RSP
   659  	MOVD	savedsp-16(SP), R4
   660  	MOVD	R4, (g_sched+gobuf_sp)(g)
   661  
   662  	// If the m on entry was nil, we called needm above to borrow an m
   663  	// for the duration of the call. Since the call is over, return it with dropm.
   664  	MOVD	savedm-8(SP), R6
   665  	CMP	$0, R6
   666  	BNE	droppedm
   667  	MOVD	$runtime·dropm(SB), R0
   668  	BL	(R0)
   669  droppedm:
   670  
   671  	// Done!
   672  	RET
   673  
   674  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   675  // Must obey the gcc calling convention.
   676  TEXT _cgo_topofstack(SB),NOSPLIT,$24
   677  	// g (R28) and REGTMP (R27)  might be clobbered by load_g. They
   678  	// are callee-save in the gcc calling convention, so save them.
   679  	MOVD	R27, savedR27-8(SP)
   680  	MOVD	g, saveG-16(SP)
   681  
   682  	BL	runtime·load_g(SB)
   683  	MOVD	g_m(g), R0
   684  	MOVD	m_curg(R0), R0
   685  	MOVD	(g_stack+stack_hi)(R0), R0
   686  
   687  	MOVD	saveG-16(SP), g
   688  	MOVD	savedR28-8(SP), R27
   689  	RET
   690  
   691  // void setg(G*); set g. for use by needm.
   692  TEXT runtime·setg(SB), NOSPLIT, $0-8
   693  	MOVD	gg+0(FP), g
   694  	// This only happens if iscgo, so jump straight to save_g
   695  	BL	runtime·save_g(SB)
   696  	RET
   697  
   698  // void setg_gcc(G*); set g called from gcc
   699  TEXT setg_gcc<>(SB),NOSPLIT,$8
   700  	MOVD	R0, g
   701  	MOVD	R27, savedR27-8(SP)
   702  	BL	runtime·save_g(SB)
   703  	MOVD	savedR27-8(SP), R27
   704  	RET
   705  
   706  TEXT runtime·getcallerpc(SB),NOSPLIT,$-8-8
   707  	MOVD	0(RSP), R0		// LR saved by caller
   708  	MOVD	R0, ret+0(FP)
   709  	RET
   710  
   711  TEXT runtime·abort(SB),NOSPLIT,$-8-0
   712  	B	(ZR)
   713  	UNDEF
   714  
   715  // memequal(a, b unsafe.Pointer, size uintptr) bool
   716  TEXT runtime·memequal(SB),NOSPLIT,$-8-25
   717  	MOVD	size+16(FP), R1
   718  	// short path to handle 0-byte case
   719  	CBZ	R1, equal
   720  	MOVD	a+0(FP), R0
   721  	MOVD	b+8(FP), R2
   722  	MOVD	$ret+24(FP), R8
   723  	B	runtime·memeqbody<>(SB)
   724  equal:
   725  	MOVD	$1, R0
   726  	MOVB	R0, ret+24(FP)
   727  	RET
   728  
   729  // memequal_varlen(a, b unsafe.Pointer) bool
   730  TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
   731  	MOVD	a+0(FP), R3
   732  	MOVD	b+8(FP), R4
   733  	CMP	R3, R4
   734  	BEQ	eq
   735  	MOVD	8(R26), R5    // compiler stores size at offset 8 in the closure
   736  	MOVD	R3, 8(RSP)
   737  	MOVD	R4, 16(RSP)
   738  	MOVD	R5, 24(RSP)
   739  	BL	runtime·memequal(SB)
   740  	MOVBU	32(RSP), R3
   741  	MOVB	R3, ret+16(FP)
   742  	RET
   743  eq:
   744  	MOVD	$1, R3
   745  	MOVB	R3, ret+16(FP)
   746  	RET
   747  
   748  TEXT runtime·cmpstring(SB),NOSPLIT,$-4-40
   749  	MOVD	s1_base+0(FP), R2
   750  	MOVD	s1_len+8(FP), R0
   751  	MOVD	s2_base+16(FP), R3
   752  	MOVD	s2_len+24(FP), R1
   753  	ADD	$40, RSP, R7
   754  	B	runtime·cmpbody<>(SB)
   755  
   756  TEXT bytes·Compare(SB),NOSPLIT,$-4-56
   757  	MOVD	s1+0(FP), R2
   758  	MOVD	s1+8(FP), R0
   759  	MOVD	s2+24(FP), R3
   760  	MOVD	s2+32(FP), R1
   761  	ADD	$56, RSP, R7
   762  	B	runtime·cmpbody<>(SB)
   763  
   764  // On entry:
   765  // R0 is the length of s1
   766  // R1 is the length of s2
   767  // R2 points to the start of s1
   768  // R3 points to the start of s2
   769  // R7 points to return value (-1/0/1 will be written here)
   770  //
   771  // On exit:
   772  // R4, R5, and R6 are clobbered
   773  TEXT runtime·cmpbody<>(SB),NOSPLIT,$-4-0
   774  	CMP	R2, R3
   775  	BEQ	samebytes // same starting pointers; compare lengths
   776  	CMP	R0, R1
   777  	CSEL    LT, R1, R0, R6 // R6 is min(R0, R1)
   778  
   779  	ADD	R2, R6	// R2 is current byte in s1, R6 is last byte in s1 to compare
   780  loop:
   781  	CMP	R2, R6
   782  	BEQ	samebytes // all compared bytes were the same; compare lengths
   783  	MOVBU.P	1(R2), R4
   784  	MOVBU.P	1(R3), R5
   785  	CMP	R4, R5
   786  	BEQ	loop
   787  	// bytes differed
   788  	MOVD	$1, R4
   789  	CSNEG	LT, R4, R4, R4
   790  	MOVD	R4, (R7)
   791  	RET
   792  samebytes:
   793  	MOVD	$1, R4
   794  	CMP	R0, R1
   795  	CSNEG	LT, R4, R4, R4
   796  	CSEL	EQ, ZR, R4, R4
   797  	MOVD	R4, (R7)
   798  	RET
   799  
   800  //
   801  // functions for other packages
   802  //
   803  TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
   804  	MOVD	b+0(FP), R0
   805  	MOVD	b_len+8(FP), R2
   806  	MOVBU	c+24(FP), R1
   807  	MOVD	$ret+32(FP), R8
   808  	B	runtime·indexbytebody<>(SB)
   809  
   810  TEXT strings·IndexByte(SB),NOSPLIT,$0-32
   811  	MOVD	s+0(FP), R0
   812  	MOVD	s_len+8(FP), R2
   813  	MOVBU	c+16(FP), R1
   814  	MOVD	$ret+24(FP), R8
   815  	B	runtime·indexbytebody<>(SB)
   816  
   817  // input:
   818  //   R0: data
   819  //   R1: byte to search
   820  //   R2: data len
   821  //   R8: address to put result
   822  TEXT runtime·indexbytebody<>(SB),NOSPLIT,$0
   823  	// Core algorithm:
   824  	// For each 32-byte chunk we calculate a 64-bit syndrome value,
   825  	// with two bits per byte. For each tuple, bit 0 is set if the
   826  	// relevant byte matched the requested character and bit 1 is
   827  	// not used (faster than using a 32bit syndrome). Since the bits
   828  	// in the syndrome reflect exactly the order in which things occur
   829  	// in the original string, counting trailing zeros allows to
   830  	// identify exactly which byte has matched.
   831  
   832  	CBZ	R2, fail
   833  	MOVD	R0, R11
   834  	// Magic constant 0x40100401 allows us to identify
   835  	// which lane matches the requested byte.
   836  	// 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24))
   837  	// Different bytes have different bit masks (i.e: 1, 4, 16, 64)
   838  	MOVD	$0x40100401, R5
   839  	VMOV	R1, V0.B16
   840  	// Work with aligned 32-byte chunks
   841  	BIC	$0x1f, R0, R3
   842  	VMOV	R5, V5.S4
   843  	ANDS	$0x1f, R0, R9
   844  	AND	$0x1f, R2, R10
   845  	BEQ	loop
   846  
   847  	// Input string is not 32-byte aligned. We calculate the
   848  	// syndrome value for the aligned 32 bytes block containing
   849  	// the first bytes and mask off the irrelevant part.
   850  	VLD1.P	(R3), [V1.B16, V2.B16]
   851  	SUB	$0x20, R9, R4
   852  	ADDS	R4, R2, R2
   853  	VCMEQ	V0.B16, V1.B16, V3.B16
   854  	VCMEQ	V0.B16, V2.B16, V4.B16
   855  	VAND	V5.B16, V3.B16, V3.B16
   856  	VAND	V5.B16, V4.B16, V4.B16
   857  	VADDP	V4.B16, V3.B16, V6.B16 // 256->128
   858  	VADDP	V6.B16, V6.B16, V6.B16 // 128->64
   859  	VMOV	V6.D[0], R6
   860  	// Clear the irrelevant lower bits
   861  	LSL	$1, R9, R4
   862  	LSR	R4, R6, R6
   863  	LSL	R4, R6, R6
   864  	// The first block can also be the last
   865  	BLS	masklast
   866  	// Have we found something already?
   867  	CBNZ	R6, tail
   868  
   869  loop:
   870  	VLD1.P	(R3), [V1.B16, V2.B16]
   871  	SUBS	$0x20, R2, R2
   872  	VCMEQ	V0.B16, V1.B16, V3.B16
   873  	VCMEQ	V0.B16, V2.B16, V4.B16
   874  	// If we're out of data we finish regardless of the result
   875  	BLS	end
   876  	// Use a fast check for the termination condition
   877  	VORR	V4.B16, V3.B16, V6.B16
   878  	VADDP	V6.D2, V6.D2, V6.D2
   879  	VMOV	V6.D[0], R6
   880  	// We're not out of data, loop if we haven't found the character
   881  	CBZ	R6, loop
   882  
   883  end:
   884  	// Termination condition found, let's calculate the syndrome value
   885  	VAND	V5.B16, V3.B16, V3.B16
   886  	VAND	V5.B16, V4.B16, V4.B16
   887  	VADDP	V4.B16, V3.B16, V6.B16
   888  	VADDP	V6.B16, V6.B16, V6.B16
   889  	VMOV	V6.D[0], R6
   890  	// Only do the clear for the last possible block with less than 32 bytes
   891  	// Condition flags come from SUBS in the loop
   892  	BHS	tail
   893  
   894  masklast:
   895  	// Clear the irrelevant upper bits
   896  	ADD	R9, R10, R4
   897  	AND	$0x1f, R4, R4
   898  	SUB	$0x20, R4, R4
   899  	NEG	R4<<1, R4
   900  	LSL	R4, R6, R6
   901  	LSR	R4, R6, R6
   902  
   903  tail:
   904  	// Check that we have found a character
   905  	CBZ	R6, fail
   906  	// Count the trailing zeros using bit reversing
   907  	RBIT	R6, R6
   908  	// Compensate the last post-increment
   909  	SUB	$0x20, R3, R3
   910  	// And count the leading zeros
   911  	CLZ	R6, R6
   912  	// R6 is twice the offset into the fragment
   913  	ADD	R6>>1, R3, R0
   914  	// Compute the offset result
   915  	SUB	R11, R0, R0
   916  	MOVD	R0, (R8)
   917  	RET
   918  
   919  fail:
   920  	MOVD	$-1, R0
   921  	MOVD	R0, (R8)
   922  	RET
   923  
   924  // Equal(a, b []byte) bool
   925  TEXT bytes·Equal(SB),NOSPLIT,$0-49
   926  	MOVD	a_len+8(FP), R1
   927  	MOVD	b_len+32(FP), R3
   928  	CMP	R1, R3
   929  	// unequal lengths are not equal
   930  	BNE	not_equal
   931  	// short path to handle 0-byte case
   932  	CBZ	R1, equal
   933  	MOVD	a+0(FP), R0
   934  	MOVD	b+24(FP), R2
   935  	MOVD	$ret+48(FP), R8
   936  	B	runtime·memeqbody<>(SB)
   937  equal:
   938  	MOVD	$1, R0
   939  	MOVB	R0, ret+48(FP)
   940  	RET
   941  not_equal:
   942  	MOVB	ZR, ret+48(FP)
   943  	RET
   944  
   945  // input:
   946  // R0: pointer a
   947  // R1: data len
   948  // R2: pointer b
   949  // R8: address to put result
   950  TEXT runtime·memeqbody<>(SB),NOSPLIT,$0
   951  	CMP	$1, R1
   952  	// handle 1-byte special case for better performance
   953  	BEQ	one
   954  	CMP	$16, R1
   955  	// handle specially if length < 16
   956  	BLO	tail
   957  	BIC	$0x3f, R1, R3
   958  	CBZ	R3, chunk16
   959  	// work with 64-byte chunks
   960  	ADD	R3, R0, R6	// end of chunks
   961  chunk64_loop:
   962  	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
   963  	VLD1.P	(R2), [V4.D2, V5.D2, V6.D2, V7.D2]
   964  	VCMEQ	V0.D2, V4.D2, V8.D2
   965  	VCMEQ	V1.D2, V5.D2, V9.D2
   966  	VCMEQ	V2.D2, V6.D2, V10.D2
   967  	VCMEQ	V3.D2, V7.D2, V11.D2
   968  	VAND	V8.B16, V9.B16, V8.B16
   969  	VAND	V8.B16, V10.B16, V8.B16
   970  	VAND	V8.B16, V11.B16, V8.B16
   971  	CMP	R0, R6
   972  	VMOV	V8.D[0], R4
   973  	VMOV	V8.D[1], R5
   974  	CBZ	R4, not_equal
   975  	CBZ	R5, not_equal
   976  	BNE	chunk64_loop
   977  	AND	$0x3f, R1, R1
   978  	CBZ	R1, equal
   979  chunk16:
   980  	// work with 16-byte chunks
   981  	BIC	$0xf, R1, R3
   982  	CBZ	R3, tail
   983  	ADD	R3, R0, R6	// end of chunks
   984  chunk16_loop:
   985  	VLD1.P	(R0), [V0.D2]
   986  	VLD1.P	(R2), [V1.D2]
   987  	VCMEQ	V0.D2, V1.D2, V2.D2
   988  	CMP	R0, R6
   989  	VMOV	V2.D[0], R4
   990  	VMOV	V2.D[1], R5
   991  	CBZ	R4, not_equal
   992  	CBZ	R5, not_equal
   993  	BNE	chunk16_loop
   994  	AND	$0xf, R1, R1
   995  	CBZ	R1, equal
   996  tail:
   997  	// special compare of tail with length < 16
   998  	TBZ	$3, R1, lt_8
   999  	MOVD.P	8(R0), R4
  1000  	MOVD.P	8(R2), R5
  1001  	CMP	R4, R5
  1002  	BNE	not_equal
  1003  lt_8:
  1004  	TBZ	$2, R1, lt_4
  1005  	MOVWU.P	4(R0), R4
  1006  	MOVWU.P	4(R2), R5
  1007  	CMP	R4, R5
  1008  	BNE	not_equal
  1009  lt_4:
  1010  	TBZ	$1, R1, lt_2
  1011  	MOVHU.P	2(R0), R4
  1012  	MOVHU.P	2(R2), R5
  1013  	CMP	R4, R5
  1014  	BNE	not_equal
  1015  lt_2:
  1016  	TBZ     $0, R1, equal
  1017  one:
  1018  	MOVBU	(R0), R4
  1019  	MOVBU	(R2), R5
  1020  	CMP	R4, R5
  1021  	BNE	not_equal
  1022  equal:
  1023  	MOVD	$1, R0
  1024  	MOVB	R0, (R8)
  1025  	RET
  1026  not_equal:
  1027  	MOVB	ZR, (R8)
  1028  	RET
  1029  
  1030  TEXT runtime·return0(SB), NOSPLIT, $0
  1031  	MOVW	$0, R0
  1032  	RET
  1033  
  1034  // The top-most function running on a goroutine
  1035  // returns to goexit+PCQuantum.
  1036  TEXT runtime·goexit(SB),NOSPLIT,$-8-0
  1037  	MOVD	R0, R0	// NOP
  1038  	BL	runtime·goexit1(SB)	// does not return
  1039  
  1040  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
  1041  	RET
  1042  
  1043  // This is called from .init_array and follows the platform, not Go, ABI.
  1044  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1045  	SUB	$0x10, RSP
  1046  	MOVD	R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
  1047  	MOVD	runtime·lastmoduledatap(SB), R1
  1048  	MOVD	R0, moduledata_next(R1)
  1049  	MOVD	R0, runtime·lastmoduledatap(SB)
  1050  	MOVD	8(RSP), R27
  1051  	ADD	$0x10, RSP
  1052  	RET
  1053  
  1054  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1055  	MOVW	$1, R3
  1056  	MOVB	R3, ret+0(FP)
  1057  	RET