github.com/rakyll/go@v0.0.0-20170216000551-64c02460d703/src/runtime/asm_s390x.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    12  	// C TLS base pointer in AR0:AR1
    13  
    14  	// initialize essential registers
    15  	XOR	R0, R0
    16  
    17  	SUB	$24, R15
    18  	MOVW	R2, 8(R15) // argc
    19  	MOVD	R3, 16(R15) // argv
    20  
    21  	// create istack out of the given (operating system) stack.
    22  	// _cgo_init may update stackguard.
    23  	MOVD	$runtime·g0(SB), g
    24  	MOVD	R15, R11
    25  	SUB	$(64*1024), R11
    26  	MOVD	R11, g_stackguard0(g)
    27  	MOVD	R11, g_stackguard1(g)
    28  	MOVD	R11, (g_stack+stack_lo)(g)
    29  	MOVD	R15, (g_stack+stack_hi)(g)
    30  
    31  	// if there is a _cgo_init, call it using the gcc ABI.
    32  	MOVD	_cgo_init(SB), R11
    33  	CMPBEQ	R11, $0, nocgo
    34  	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
    35  	SLD	$32, R4, R4
    36  	MOVW	AR1, R4			// arg 2: TLS base pointer
    37  	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
    38  	MOVD	g, R2			// arg 0: G
    39  	// C functions expect 160 bytes of space on caller stack frame
    40  	// and an 8-byte aligned stack pointer
    41  	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
    42  	SUB	$160, R15		// reserve 160 bytes
    43  	MOVD    $~7, R6
    44  	AND 	R6, R15			// 8-byte align
    45  	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
    46  	MOVD	R9, R15			// restore stack
    47  	XOR	R0, R0			// zero R0
    48  
    49  nocgo:
    50  	// update stackguard after _cgo_init
    51  	MOVD	(g_stack+stack_lo)(g), R2
    52  	ADD	$const__StackGuard, R2
    53  	MOVD	R2, g_stackguard0(g)
    54  	MOVD	R2, g_stackguard1(g)
    55  
    56  	// set the per-goroutine and per-mach "registers"
    57  	MOVD	$runtime·m0(SB), R2
    58  
    59  	// save m->g0 = g0
    60  	MOVD	g, m_g0(R2)
    61  	// save m0 to g0->m
    62  	MOVD	R2, g_m(g)
    63  
    64  	BL	runtime·check(SB)
    65  
    66  	// argc/argv are already prepared on stack
    67  	BL	runtime·args(SB)
    68  	BL	runtime·osinit(SB)
    69  	BL	runtime·schedinit(SB)
    70  
    71  	// create a new goroutine to start program
    72  	MOVD	$runtime·mainPC(SB), R2		// entry
    73  	SUB     $24, R15
    74  	MOVD 	R2, 16(R15)
    75  	MOVD 	$0, 8(R15)
    76  	MOVD 	$0, 0(R15)
    77  	BL	runtime·newproc(SB)
    78  	ADD	$24, R15
    79  
    80  	// start this M
    81  	BL	runtime·mstart(SB)
    82  
    83  	MOVD	$0, 1(R0)
    84  	RET
    85  
    86  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    87  GLOBL	runtime·mainPC(SB),RODATA,$8
    88  
    89  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    90  	MOVD	$0, 2(R0)
    91  	RET
    92  
    93  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    94  	RET
    95  
    96  /*
    97   *  go-routine
    98   */
    99  
   100  // void gosave(Gobuf*)
   101  // save state in Gobuf; setjmp
   102  TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   103  	MOVD	buf+0(FP), R3
   104  	MOVD	R15, gobuf_sp(R3)
   105  	MOVD	LR, gobuf_pc(R3)
   106  	MOVD	g, gobuf_g(R3)
   107  	MOVD	$0, gobuf_lr(R3)
   108  	MOVD	$0, gobuf_ret(R3)
   109  	// Assert ctxt is zero. See func save.
   110  	MOVD	gobuf_ctxt(R3), R3
   111  	CMPBEQ	R3, $0, 2(PC)
   112  	BL	runtime·badctxt(SB)
   113  	RET
   114  
   115  // void gogo(Gobuf*)
   116  // restore state from Gobuf; longjmp
   117  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   118  	MOVD	buf+0(FP), R5
   119  
   120  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   121  	MOVD	gobuf_ctxt(R5), R1
   122  	CMPBEQ	R1, $0, nilctxt
   123  	MOVD	$gobuf_ctxt(R5), R1
   124  	MOVD	R1, 8(R15)
   125  	MOVD	R0, 16(R15)
   126  	BL	runtime·writebarrierptr_prewrite(SB)
   127  	MOVD	buf+0(FP), R5
   128  
   129  nilctxt:
   130  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   131  	BL	runtime·save_g(SB)
   132  
   133  	MOVD	0(g), R4
   134  	MOVD	gobuf_sp(R5), R15
   135  	MOVD	gobuf_lr(R5), LR
   136  	MOVD	gobuf_ret(R5), R3
   137  	MOVD	gobuf_ctxt(R5), R12
   138  	MOVD	$0, gobuf_sp(R5)
   139  	MOVD	$0, gobuf_ret(R5)
   140  	MOVD	$0, gobuf_lr(R5)
   141  	MOVD	$0, gobuf_ctxt(R5)
   142  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   143  	MOVD	gobuf_pc(R5), R6
   144  	BR	(R6)
   145  
   146  // void mcall(fn func(*g))
   147  // Switch to m->g0's stack, call fn(g).
   148  // Fn must never return.  It should gogo(&g->sched)
   149  // to keep running g.
   150  TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   151  	// Save caller state in g->sched
   152  	MOVD	R15, (g_sched+gobuf_sp)(g)
   153  	MOVD	LR, (g_sched+gobuf_pc)(g)
   154  	MOVD	$0, (g_sched+gobuf_lr)(g)
   155  	MOVD	g, (g_sched+gobuf_g)(g)
   156  
   157  	// Switch to m->g0 & its stack, call fn.
   158  	MOVD	g, R3
   159  	MOVD	g_m(g), R8
   160  	MOVD	m_g0(R8), g
   161  	BL	runtime·save_g(SB)
   162  	CMP	g, R3
   163  	BNE	2(PC)
   164  	BR	runtime·badmcall(SB)
   165  	MOVD	fn+0(FP), R12			// context
   166  	MOVD	0(R12), R4			// code pointer
   167  	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
   168  	SUB	$16, R15
   169  	MOVD	R3, 8(R15)
   170  	MOVD	$0, 0(R15)
   171  	BL	(R4)
   172  	BR	runtime·badmcall2(SB)
   173  
   174  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   175  // of the G stack.  We need to distinguish the routine that
   176  // lives at the bottom of the G stack from the one that lives
   177  // at the top of the system stack because the one at the top of
   178  // the system stack terminates the stack walk (see topofstack()).
   179  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   180  	UNDEF
   181  	BL	(LR)	// make sure this function is not leaf
   182  	RET
   183  
   184  // func systemstack(fn func())
   185  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   186  	MOVD	fn+0(FP), R3	// R3 = fn
   187  	MOVD	R3, R12		// context
   188  	MOVD	g_m(g), R4	// R4 = m
   189  
   190  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   191  	CMPBEQ	g, R5, noswitch
   192  
   193  	MOVD	m_g0(R4), R5	// R5 = g0
   194  	CMPBEQ	g, R5, noswitch
   195  
   196  	MOVD	m_curg(R4), R6
   197  	CMPBEQ	g, R6, switch
   198  
   199  	// Bad: g is not gsignal, not g0, not curg. What is it?
   200  	// Hide call from linker nosplit analysis.
   201  	MOVD	$runtime·badsystemstack(SB), R3
   202  	BL	(R3)
   203  
   204  switch:
   205  	// save our state in g->sched.  Pretend to
   206  	// be systemstack_switch if the G stack is scanned.
   207  	MOVD	$runtime·systemstack_switch(SB), R6
   208  	ADD	$16, R6	// get past prologue
   209  	MOVD	R6, (g_sched+gobuf_pc)(g)
   210  	MOVD	R15, (g_sched+gobuf_sp)(g)
   211  	MOVD	$0, (g_sched+gobuf_lr)(g)
   212  	MOVD	g, (g_sched+gobuf_g)(g)
   213  
   214  	// switch to g0
   215  	MOVD	R5, g
   216  	BL	runtime·save_g(SB)
   217  	MOVD	(g_sched+gobuf_sp)(g), R3
   218  	// make it look like mstart called systemstack on g0, to stop traceback
   219  	SUB	$8, R3
   220  	MOVD	$runtime·mstart(SB), R4
   221  	MOVD	R4, 0(R3)
   222  	MOVD	R3, R15
   223  
   224  	// call target function
   225  	MOVD	0(R12), R3	// code pointer
   226  	BL	(R3)
   227  
   228  	// switch back to g
   229  	MOVD	g_m(g), R3
   230  	MOVD	m_curg(R3), g
   231  	BL	runtime·save_g(SB)
   232  	MOVD	(g_sched+gobuf_sp)(g), R15
   233  	MOVD	$0, (g_sched+gobuf_sp)(g)
   234  	RET
   235  
   236  noswitch:
   237  	// already on m stack, just call directly
   238  	MOVD	0(R12), R3	// code pointer
   239  	BL	(R3)
   240  	RET
   241  
   242  /*
   243   * support for morestack
   244   */
   245  
   246  // Called during function prolog when more stack is needed.
   247  // Caller has already loaded:
   248  // R3: framesize, R4: argsize, R5: LR
   249  //
   250  // The traceback routines see morestack on a g0 as being
   251  // the top of a stack (for example, morestack calling newstack
   252  // calling the scheduler calling newm calling gc), so we must
   253  // record an argument size. For that purpose, it has no arguments.
   254  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   255  	// Cannot grow scheduler stack (m->g0).
   256  	MOVD	g_m(g), R7
   257  	MOVD	m_g0(R7), R8
   258  	CMPBNE	g, R8, 3(PC)
   259  	BL	runtime·badmorestackg0(SB)
   260  	BL	runtime·abort(SB)
   261  
   262  	// Cannot grow signal stack (m->gsignal).
   263  	MOVD	m_gsignal(R7), R8
   264  	CMP	g, R8
   265  	BNE	3(PC)
   266  	BL	runtime·badmorestackgsignal(SB)
   267  	BL	runtime·abort(SB)
   268  
   269  	// Called from f.
   270  	// Set g->sched to context in f.
   271  	MOVD	R15, (g_sched+gobuf_sp)(g)
   272  	MOVD	LR, R8
   273  	MOVD	R8, (g_sched+gobuf_pc)(g)
   274  	MOVD	R5, (g_sched+gobuf_lr)(g)
   275  	// newstack will fill gobuf.ctxt.
   276  
   277  	// Called from f.
   278  	// Set m->morebuf to f's caller.
   279  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   280  	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   281  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   282  
   283  	// Call newstack on m->g0's stack.
   284  	MOVD	m_g0(R7), g
   285  	BL	runtime·save_g(SB)
   286  	MOVD	(g_sched+gobuf_sp)(g), R15
   287  	// Create a stack frame on g0 to call newstack.
   288  	MOVD	$0, -16(R15)	// Zero saved LR in frame
   289  	SUB	$16, R15
   290  	MOVD	R12, 8(R15)	// ctxt argument
   291  	BL	runtime·newstack(SB)
   292  
   293  	// Not reached, but make sure the return PC from the call to newstack
   294  	// is still in this function, and not the beginning of the next.
   295  	UNDEF
   296  
   297  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   298  	MOVD	$0, R12
   299  	BR	runtime·morestack(SB)
   300  
   301  // reflectcall: call a function with the given argument list
   302  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   303  // we don't have variable-sized frames, so we use a small number
   304  // of constant-sized-frame functions to encode a few bits of size in the pc.
   305  // Caution: ugly multiline assembly macros in your future!
   306  
   307  #define DISPATCH(NAME,MAXSIZE)		\
   308  	MOVD	$MAXSIZE, R4;		\
   309  	CMP	R3, R4;		\
   310  	BGT	3(PC);			\
   311  	MOVD	$NAME(SB), R5;	\
   312  	BR	(R5)
   313  // Note: can't just "BR NAME(SB)" - bad inlining results.
   314  
   315  TEXT reflect·call(SB), NOSPLIT, $0-0
   316  	BR	·reflectcall(SB)
   317  
   318  TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   319  	MOVWZ argsize+24(FP), R3
   320  	DISPATCH(runtime·call32, 32)
   321  	DISPATCH(runtime·call64, 64)
   322  	DISPATCH(runtime·call128, 128)
   323  	DISPATCH(runtime·call256, 256)
   324  	DISPATCH(runtime·call512, 512)
   325  	DISPATCH(runtime·call1024, 1024)
   326  	DISPATCH(runtime·call2048, 2048)
   327  	DISPATCH(runtime·call4096, 4096)
   328  	DISPATCH(runtime·call8192, 8192)
   329  	DISPATCH(runtime·call16384, 16384)
   330  	DISPATCH(runtime·call32768, 32768)
   331  	DISPATCH(runtime·call65536, 65536)
   332  	DISPATCH(runtime·call131072, 131072)
   333  	DISPATCH(runtime·call262144, 262144)
   334  	DISPATCH(runtime·call524288, 524288)
   335  	DISPATCH(runtime·call1048576, 1048576)
   336  	DISPATCH(runtime·call2097152, 2097152)
   337  	DISPATCH(runtime·call4194304, 4194304)
   338  	DISPATCH(runtime·call8388608, 8388608)
   339  	DISPATCH(runtime·call16777216, 16777216)
   340  	DISPATCH(runtime·call33554432, 33554432)
   341  	DISPATCH(runtime·call67108864, 67108864)
   342  	DISPATCH(runtime·call134217728, 134217728)
   343  	DISPATCH(runtime·call268435456, 268435456)
   344  	DISPATCH(runtime·call536870912, 536870912)
   345  	DISPATCH(runtime·call1073741824, 1073741824)
   346  	MOVD	$runtime·badreflectcall(SB), R5
   347  	BR	(R5)
   348  
   349  #define CALLFN(NAME,MAXSIZE)			\
   350  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   351  	NO_LOCAL_POINTERS;			\
   352  	/* copy arguments to stack */		\
   353  	MOVD	arg+16(FP), R4;			\
   354  	MOVWZ	argsize+24(FP), R5;		\
   355  	MOVD	$stack-MAXSIZE(SP), R6;		\
   356  loopArgs: /* copy 256 bytes at a time */	\
   357  	CMP	R5, $256;			\
   358  	BLT	tailArgs;			\
   359  	SUB	$256, R5;			\
   360  	MVC	$256, 0(R4), 0(R6);		\
   361  	MOVD	$256(R4), R4;			\
   362  	MOVD	$256(R6), R6;			\
   363  	BR	loopArgs;			\
   364  tailArgs: /* copy remaining bytes */		\
   365  	CMP	R5, $0;				\
   366  	BEQ	callFunction;			\
   367  	SUB	$1, R5;				\
   368  	EXRL	$callfnMVC<>(SB), R5;		\
   369  callFunction:					\
   370  	MOVD	f+8(FP), R12;			\
   371  	MOVD	(R12), R8;			\
   372  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   373  	BL	(R8);				\
   374  	/* copy return values back */		\
   375  	MOVD	argtype+0(FP), R7;		\
   376  	MOVD	arg+16(FP), R6;			\
   377  	MOVWZ	n+24(FP), R5;			\
   378  	MOVD	$stack-MAXSIZE(SP), R4;		\
   379  	MOVWZ	retoffset+28(FP), R1;		\
   380  	ADD	R1, R4;				\
   381  	ADD	R1, R6;				\
   382  	SUB	R1, R5;				\
   383  	BL	callRet<>(SB);			\
   384  	RET
   385  
   386  // callRet copies return values back at the end of call*. This is a
   387  // separate function so it can allocate stack space for the arguments
   388  // to reflectcallmove. It does not follow the Go ABI; it expects its
   389  // arguments in registers.
   390  TEXT callRet<>(SB), NOSPLIT, $32-0
   391  	MOVD	R7, 8(R15)
   392  	MOVD	R6, 16(R15)
   393  	MOVD	R4, 24(R15)
   394  	MOVD	R5, 32(R15)
   395  	BL	runtime·reflectcallmove(SB)
   396  	RET
   397  
   398  CALLFN(·call32, 32)
   399  CALLFN(·call64, 64)
   400  CALLFN(·call128, 128)
   401  CALLFN(·call256, 256)
   402  CALLFN(·call512, 512)
   403  CALLFN(·call1024, 1024)
   404  CALLFN(·call2048, 2048)
   405  CALLFN(·call4096, 4096)
   406  CALLFN(·call8192, 8192)
   407  CALLFN(·call16384, 16384)
   408  CALLFN(·call32768, 32768)
   409  CALLFN(·call65536, 65536)
   410  CALLFN(·call131072, 131072)
   411  CALLFN(·call262144, 262144)
   412  CALLFN(·call524288, 524288)
   413  CALLFN(·call1048576, 1048576)
   414  CALLFN(·call2097152, 2097152)
   415  CALLFN(·call4194304, 4194304)
   416  CALLFN(·call8388608, 8388608)
   417  CALLFN(·call16777216, 16777216)
   418  CALLFN(·call33554432, 33554432)
   419  CALLFN(·call67108864, 67108864)
   420  CALLFN(·call134217728, 134217728)
   421  CALLFN(·call268435456, 268435456)
   422  CALLFN(·call536870912, 536870912)
   423  CALLFN(·call1073741824, 1073741824)
   424  
   425  // Not a function: target for EXRL (execute relative long) instruction.
   426  TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
   427  	MVC	$1, 0(R4), 0(R6)
   428  
   429  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   430  	RET
   431  
   432  // void jmpdefer(fv, sp);
   433  // called from deferreturn.
   434  // 1. grab stored LR for caller
   435  // 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
   436  // 3. BR to fn
   437  TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
   438  	MOVD	0(R15), R1
   439  	SUB	$6, R1, LR
   440  
   441  	MOVD	fv+0(FP), R12
   442  	MOVD	argp+8(FP), R15
   443  	SUB	$8, R15
   444  	MOVD	0(R12), R3
   445  	BR	(R3)
   446  
   447  // Save state of caller into g->sched. Smashes R1.
   448  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   449  	MOVD	LR, (g_sched+gobuf_pc)(g)
   450  	MOVD	R15, (g_sched+gobuf_sp)(g)
   451  	MOVD	$0, (g_sched+gobuf_lr)(g)
   452  	MOVD	$0, (g_sched+gobuf_ret)(g)
   453  	// Assert ctxt is zero. See func save.
   454  	MOVD	(g_sched+gobuf_ctxt)(g), R1
   455  	CMPBEQ	R1, $0, 2(PC)
   456  	BL	runtime·badctxt(SB)
   457  	RET
   458  
   459  // func asmcgocall(fn, arg unsafe.Pointer) int32
   460  // Call fn(arg) on the scheduler stack,
   461  // aligned appropriately for the gcc ABI.
   462  // See cgocall.go for more details.
   463  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   464  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
   465  	// C TLS base pointer in AR0:AR1
   466  	MOVD	fn+0(FP), R3
   467  	MOVD	arg+8(FP), R4
   468  
   469  	MOVD	R15, R2		// save original stack pointer
   470  	MOVD	g, R5
   471  
   472  	// Figure out if we need to switch to m->g0 stack.
   473  	// We get called to create new OS threads too, and those
   474  	// come in on the m->g0 stack already.
   475  	MOVD	g_m(g), R6
   476  	MOVD	m_g0(R6), R6
   477  	CMPBEQ	R6, g, g0
   478  	BL	gosave<>(SB)
   479  	MOVD	R6, g
   480  	BL	runtime·save_g(SB)
   481  	MOVD	(g_sched+gobuf_sp)(g), R15
   482  
   483  	// Now on a scheduling stack (a pthread-created stack).
   484  g0:
   485  	// Save room for two of our pointers, plus 160 bytes of callee
   486  	// save area that lives on the caller stack.
   487  	SUB	$176, R15
   488  	MOVD	$~7, R6
   489  	AND	R6, R15                 // 8-byte alignment for gcc ABI
   490  	MOVD	R5, 168(R15)             // save old g on stack
   491  	MOVD	(g_stack+stack_hi)(R5), R5
   492  	SUB	R2, R5
   493  	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   494  	MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
   495  	MOVD	R4, R2                  // arg in R2
   496  	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
   497  
   498  	XOR	R0, R0                  // set R0 back to 0.
   499  	// Restore g, stack pointer.
   500  	MOVD	168(R15), g
   501  	BL	runtime·save_g(SB)
   502  	MOVD	(g_stack+stack_hi)(g), R5
   503  	MOVD	160(R15), R6
   504  	SUB	R6, R5
   505  	MOVD	R5, R15
   506  
   507  	MOVW	R2, ret+16(FP)
   508  	RET
   509  
   510  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   511  // Turn the fn into a Go func (by taking its address) and call
   512  // cgocallback_gofunc.
   513  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   514  	MOVD	$fn+0(FP), R3
   515  	MOVD	R3, 8(R15)
   516  	MOVD	frame+8(FP), R3
   517  	MOVD	R3, 16(R15)
   518  	MOVD	framesize+16(FP), R3
   519  	MOVD	R3, 24(R15)
   520  	MOVD	ctxt+24(FP), R3
   521  	MOVD	R3, 32(R15)
   522  	MOVD	$runtime·cgocallback_gofunc(SB), R3
   523  	BL	(R3)
   524  	RET
   525  
   526  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   527  // See cgocall.go for more details.
   528  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   529  	NO_LOCAL_POINTERS
   530  
   531  	// Load m and g from thread-local storage.
   532  	MOVB	runtime·iscgo(SB), R3
   533  	CMPBEQ	R3, $0, nocgo
   534  	BL	runtime·load_g(SB)
   535  
   536  nocgo:
   537  	// If g is nil, Go did not create the current thread.
   538  	// Call needm to obtain one for temporary use.
   539  	// In this case, we're running on the thread stack, so there's
   540  	// lots of space, but the linker doesn't know. Hide the call from
   541  	// the linker analysis by using an indirect call.
   542  	CMPBEQ	g, $0, needm
   543  
   544  	MOVD	g_m(g), R8
   545  	MOVD	R8, savedm-8(SP)
   546  	BR	havem
   547  
   548  needm:
   549  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   550  	MOVD	$runtime·needm(SB), R3
   551  	BL	(R3)
   552  
   553  	// Set m->sched.sp = SP, so that if a panic happens
   554  	// during the function we are about to execute, it will
   555  	// have a valid SP to run on the g0 stack.
   556  	// The next few lines (after the havem label)
   557  	// will save this SP onto the stack and then write
   558  	// the same SP back to m->sched.sp. That seems redundant,
   559  	// but if an unrecovered panic happens, unwindm will
   560  	// restore the g->sched.sp from the stack location
   561  	// and then systemstack will try to use it. If we don't set it here,
   562  	// that restored SP will be uninitialized (typically 0) and
   563  	// will not be usable.
   564  	MOVD	g_m(g), R8
   565  	MOVD	m_g0(R8), R3
   566  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   567  
   568  havem:
   569  	// Now there's a valid m, and we're running on its m->g0.
   570  	// Save current m->g0->sched.sp on stack and then set it to SP.
   571  	// Save current sp in m->g0->sched.sp in preparation for
   572  	// switch back to m->curg stack.
   573  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   574  	MOVD	m_g0(R8), R3
   575  	MOVD	(g_sched+gobuf_sp)(R3), R4
   576  	MOVD	R4, savedsp-16(SP)
   577  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   578  
   579  	// Switch to m->curg stack and call runtime.cgocallbackg.
   580  	// Because we are taking over the execution of m->curg
   581  	// but *not* resuming what had been running, we need to
   582  	// save that information (m->curg->sched) so we can restore it.
   583  	// We can restore m->curg->sched.sp easily, because calling
   584  	// runtime.cgocallbackg leaves SP unchanged upon return.
   585  	// To save m->curg->sched.pc, we push it onto the stack.
   586  	// This has the added benefit that it looks to the traceback
   587  	// routine like cgocallbackg is going to return to that
   588  	// PC (because the frame we allocate below has the same
   589  	// size as cgocallback_gofunc's frame declared above)
   590  	// so that the traceback will seamlessly trace back into
   591  	// the earlier calls.
   592  	//
   593  	// In the new goroutine, -8(SP) is unused (where SP refers to
   594  	// m->curg's SP while we're setting it up, before we've adjusted it).
   595  	MOVD	m_curg(R8), g
   596  	BL	runtime·save_g(SB)
   597  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   598  	MOVD	(g_sched+gobuf_pc)(g), R5
   599  	MOVD	R5, -24(R4)
   600  	MOVD	ctxt+24(FP), R5
   601  	MOVD	R5, -16(R4)
   602  	MOVD	$-24(R4), R15
   603  	BL	runtime·cgocallbackg(SB)
   604  
   605  	// Restore g->sched (== m->curg->sched) from saved values.
   606  	MOVD	0(R15), R5
   607  	MOVD	R5, (g_sched+gobuf_pc)(g)
   608  	MOVD	$24(R15), R4
   609  	MOVD	R4, (g_sched+gobuf_sp)(g)
   610  
   611  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   612  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   613  	// so we do not have to restore it.)
   614  	MOVD	g_m(g), R8
   615  	MOVD	m_g0(R8), g
   616  	BL	runtime·save_g(SB)
   617  	MOVD	(g_sched+gobuf_sp)(g), R15
   618  	MOVD	savedsp-16(SP), R4
   619  	MOVD	R4, (g_sched+gobuf_sp)(g)
   620  
   621  	// If the m on entry was nil, we called needm above to borrow an m
   622  	// for the duration of the call. Since the call is over, return it with dropm.
   623  	MOVD	savedm-8(SP), R6
   624  	CMPBNE	R6, $0, droppedm
   625  	MOVD	$runtime·dropm(SB), R3
   626  	BL	(R3)
   627  droppedm:
   628  
   629  	// Done!
   630  	RET
   631  
   632  // void setg(G*); set g. for use by needm.
   633  TEXT runtime·setg(SB), NOSPLIT, $0-8
   634  	MOVD	gg+0(FP), g
   635  	// This only happens if iscgo, so jump straight to save_g
   636  	BL	runtime·save_g(SB)
   637  	RET
   638  
   639  // void setg_gcc(G*); set g in C TLS.
   640  // Must obey the gcc calling convention.
   641  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   642  	// The standard prologue clobbers LR (R14), which is callee-save in
   643  	// the C ABI, so we have to use NOFRAME and save LR ourselves.
   644  	MOVD	LR, R1
   645  	// Also save g, R10, and R11 since they're callee-save in C ABI
   646  	MOVD	R10, R3
   647  	MOVD	g, R4
   648  	MOVD	R11, R5
   649  
   650  	MOVD	R2, g
   651  	BL	runtime·save_g(SB)
   652  
   653  	MOVD	R5, R11
   654  	MOVD	R4, g
   655  	MOVD	R3, R10
   656  	MOVD	R1, LR
   657  	RET
   658  
   659  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
   660  	MOVD	16(R15), R3		// LR saved by caller
   661  	MOVD	R3, ret+8(FP)
   662  	RET
   663  
   664  TEXT runtime·setcallerpc(SB),NOSPLIT,$8-16
   665  	MOVD	pc+8(FP), R3
   666  	MOVD	R3, 16(R15)		// set LR in caller
   667  	RET
   668  
   669  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   670  	MOVW	(R0), R0
   671  	UNDEF
   672  
   673  // int64 runtime·cputicks(void)
   674  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   675  	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
   676  	// This means that since about 1972 the msb has been set, making the
   677  	// result of a call to STORE CLOCK (stck) a negative number.
   678  	// We clear the msb to make it positive.
   679  	STCK	ret+0(FP)      // serialises before and after call
   680  	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
   681  	SLD	$1, R3
   682  	SRD	$1, R3
   683  	MOVD	R3, ret+0(FP)
   684  	RET
   685  
   686  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   687  // redirects to memhash(p, h, size) using the size
   688  // stored in the closure.
   689  TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
   690  	GO_ARGS
   691  	NO_LOCAL_POINTERS
   692  	MOVD	p+0(FP), R3
   693  	MOVD	h+8(FP), R4
   694  	MOVD	8(R12), R5
   695  	MOVD	R3, 8(R15)
   696  	MOVD	R4, 16(R15)
   697  	MOVD	R5, 24(R15)
   698  	BL	runtime·memhash(SB)
   699  	MOVD	32(R15), R3
   700  	MOVD	R3, ret+16(FP)
   701  	RET
   702  
   703  // AES hashing not implemented for s390x
   704  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   705  	MOVW	(R0), R15
   706  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   707  	MOVW	(R0), R15
   708  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   709  	MOVW	(R0), R15
   710  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   711  	MOVW	(R0), R15
   712  
   713  // memequal(a, b unsafe.Pointer, size uintptr) bool
   714  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
   715  	MOVD	a+0(FP), R3
   716  	MOVD	b+8(FP), R5
   717  	MOVD	size+16(FP), R6
   718  	LA	ret+24(FP), R7
   719  	BR	runtime·memeqbody(SB)
   720  
   721  // memequal_varlen(a, b unsafe.Pointer) bool
   722  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
   723  	MOVD	a+0(FP), R3
   724  	MOVD	b+8(FP), R5
   725  	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
   726  	LA	ret+16(FP), R7
   727  	BR	runtime·memeqbody(SB)
   728  
   729  // eqstring tests whether two strings are equal.
   730  // The compiler guarantees that strings passed
   731  // to eqstring have equal length.
   732  // See runtime_test.go:eqstring_generic for
   733  // equivalent Go code.
   734  TEXT runtime·eqstring(SB),NOSPLIT|NOFRAME,$0-33
   735  	MOVD	s1_base+0(FP), R3
   736  	MOVD	s1_len+8(FP), R6
   737  	MOVD	s2_base+16(FP), R5
   738  	LA	ret+32(FP), R7
   739  	BR	runtime·memeqbody(SB)
   740  
   741  TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
   742  	MOVD	a_len+8(FP), R2
   743  	MOVD	b_len+32(FP), R6
   744  	MOVD	a+0(FP), R3
   745  	MOVD	b+24(FP), R5
   746  	LA	ret+48(FP), R7
   747  	CMPBNE	R2, R6, notequal
   748  	BR	runtime·memeqbody(SB)
   749  notequal:
   750  	MOVB	$0, ret+48(FP)
   751  	RET
   752  
   753  // input:
   754  //   R3 = a
   755  //   R5 = b
   756  //   R6 = len
   757  //   R7 = address of output byte (stores 0 or 1 here)
   758  //   a and b have the same length
   759  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   760  	CMPBEQ	R3, R5, equal
   761  loop:
   762  	CMPBEQ	R6, $0, equal
   763  	CMPBLT	R6, $32, tiny
   764  	CMP	R6, $256
   765  	BLT	tail
   766  	CLC	$256, 0(R3), 0(R5)
   767  	BNE	notequal
   768  	SUB	$256, R6
   769  	LA	256(R3), R3
   770  	LA	256(R5), R5
   771  	BR	loop
   772  tail:
   773  	SUB	$1, R6, R8
   774  	EXRL	$runtime·memeqbodyclc(SB), R8
   775  	BEQ	equal
   776  notequal:
   777  	MOVB	$0, 0(R7)
   778  	RET
   779  equal:
   780  	MOVB	$1, 0(R7)
   781  	RET
   782  tiny:
   783  	MOVD	$0, R2
   784  	CMPBLT	R6, $16, lt16
   785  	MOVD	0(R3), R8
   786  	MOVD	0(R5), R9
   787  	CMPBNE	R8, R9, notequal
   788  	MOVD	8(R3), R8
   789  	MOVD	8(R5), R9
   790  	CMPBNE	R8, R9, notequal
   791  	LA	16(R2), R2
   792  	SUB	$16, R6
   793  lt16:
   794  	CMPBLT	R6, $8, lt8
   795  	MOVD	0(R3)(R2*1), R8
   796  	MOVD	0(R5)(R2*1), R9
   797  	CMPBNE	R8, R9, notequal
   798  	LA	8(R2), R2
   799  	SUB	$8, R6
   800  lt8:
   801  	CMPBLT	R6, $4, lt4
   802  	MOVWZ	0(R3)(R2*1), R8
   803  	MOVWZ	0(R5)(R2*1), R9
   804  	CMPBNE	R8, R9, notequal
   805  	LA	4(R2), R2
   806  	SUB	$4, R6
   807  lt4:
   808  #define CHECK(n) \
   809  	CMPBEQ	R6, $n, equal \
   810  	MOVB	n(R3)(R2*1), R8 \
   811  	MOVB	n(R5)(R2*1), R9 \
   812  	CMPBNE	R8, R9, notequal
   813  	CHECK(0)
   814  	CHECK(1)
   815  	CHECK(2)
   816  	CHECK(3)
   817  	BR	equal
   818  
   819  TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   820  	CLC	$1, 0(R3), 0(R5)
   821  	RET
   822  
   823  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
   824  	MOVD	s+0(FP), R3     // s => R3
   825  	MOVD	s_len+8(FP), R4 // s_len => R4
   826  	MOVBZ	c+24(FP), R5    // c => R5
   827  	MOVD	$ret+32(FP), R2 // &ret => R9
   828  	BR	runtime·indexbytebody(SB)
   829  
   830  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
   831  	MOVD	s+0(FP), R3     // s => R3
   832  	MOVD	s_len+8(FP), R4 // s_len => R4
   833  	MOVBZ	c+16(FP), R5    // c => R5
   834  	MOVD	$ret+24(FP), R2 // &ret => R9
   835  	BR	runtime·indexbytebody(SB)
   836  
   837  // input:
   838  // R3: s
   839  // R4: s_len
   840  // R5: c -- byte sought
   841  // R2: &ret -- address to put index into
   842  TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0
   843  	CMPBEQ	R4, $0, notfound
   844  	MOVD	R3, R6          // store base for later
   845  	ADD	R3, R4, R8      // the address after the end of the string
   846  	//if the length is small, use loop; otherwise, use vector or srst search
   847  	CMPBGE	R4, $16, large
   848  
   849  residual:
   850  	CMPBEQ	R3, R8, notfound
   851  	MOVBZ	0(R3), R7
   852  	LA	1(R3), R3
   853  	CMPBNE	R7, R5, residual
   854  
   855  found:
   856  	SUB	R6, R3
   857  	SUB	$1, R3
   858  	MOVD	R3, 0(R2)
   859  	RET
   860  
   861  notfound:
   862  	MOVD	$-1, 0(R2)
   863  	RET
   864  
   865  large:
   866  	MOVBZ	·cpu+facilities_hasVX(SB), R1
   867  	CMPBNE	R1, $0, vectorimpl
   868  
   869  srstimpl:                       // no vector facility
   870  	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
   871  srstloop:
   872  	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
   873  	BVS	srstloop        // interrupted - continue
   874  	BGT	notfoundr0
   875  foundr0:
   876  	XOR	R0, R0          // reset R0
   877  	SUB	R6, R8          // remove base
   878  	MOVD	R8, 0(R2)
   879  	RET
   880  notfoundr0:
   881  	XOR	R0, R0          // reset R0
   882  	MOVD	$-1, 0(R2)
   883  	RET
   884  
   885  vectorimpl:
   886  	//if the address is not 16byte aligned, use loop for the header
   887  	MOVD	R3, R8
   888  	AND	$15, R8
   889  	CMPBGT	R8, $0, notaligned
   890  
   891  aligned:
   892  	ADD	R6, R4, R8
   893  	MOVD	R8, R7
   894  	AND	$-16, R7
   895  	// replicate c across V17
   896  	VLVGB	$0, R5, V19
   897  	VREPB	$0, V19, V17
   898  
   899  vectorloop:
   900  	CMPBGE	R3, R7, residual
   901  	VL	0(R3), V16    // load string to be searched into V16
   902  	ADD	$16, R3
   903  	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   904  	BVS	vectorloop
   905  
   906  	// when vector search found c in the string
   907  	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   908  	SUB	$16, R3
   909  	SUB	R6, R3
   910  	ADD	R3, R7
   911  	MOVD	R7, 0(R2)
   912  	RET
   913  
   914  notaligned:
   915  	MOVD	R3, R8
   916  	AND	$-16, R8
   917  	ADD     $16, R8
   918  notalignedloop:
   919  	CMPBEQ	R3, R8, aligned
   920  	MOVBZ	0(R3), R7
   921  	LA	1(R3), R3
   922  	CMPBNE	R7, R5, notalignedloop
   923  	BR	found
   924  
   925  TEXT runtime·return0(SB), NOSPLIT, $0
   926  	MOVW	$0, R3
   927  	RET
   928  
   929  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   930  // Must obey the gcc calling convention.
   931  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
   932  	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
   933  	MOVD	g, R1
   934  	MOVD	R10, R3
   935  	MOVD	LR, R4
   936  	MOVD	R11, R5
   937  
   938  	BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
   939  	MOVD	g_m(g), R2
   940  	MOVD	m_curg(R2), R2
   941  	MOVD	(g_stack+stack_hi)(R2), R2
   942  
   943  	MOVD	R1, g
   944  	MOVD	R3, R10
   945  	MOVD	R4, LR
   946  	MOVD	R5, R11
   947  	RET
   948  
   949  // The top-most function running on a goroutine
   950  // returns to goexit+PCQuantum.
   951  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
   952  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   953  	BL	runtime·goexit1(SB)	// does not return
   954  	// traceback from goexit1 must hit code range of goexit
   955  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   956  
   957  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
   958  	RET
   959  
   960  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
   961  	RET
   962  
   963  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
   964  	RET
   965  
   966  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
   967  	RET
   968  
   969  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
   970  	RET
   971  
   972  TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
   973  	SYNC
   974  	RET
   975  
   976  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
   977  	MOVD	s1_base+0(FP), R3
   978  	MOVD	s1_len+8(FP), R4
   979  	MOVD	s2_base+16(FP), R5
   980  	MOVD	s2_len+24(FP), R6
   981  	LA	ret+32(FP), R7
   982  	BR	runtime·cmpbody(SB)
   983  
   984  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
   985  	MOVD	s1+0(FP), R3
   986  	MOVD	s1+8(FP), R4
   987  	MOVD	s2+24(FP), R5
   988  	MOVD	s2+32(FP), R6
   989  	LA	res+48(FP), R7
   990  	BR	runtime·cmpbody(SB)
   991  
   992  // input:
   993  //   R3 = a
   994  //   R4 = alen
   995  //   R5 = b
   996  //   R6 = blen
   997  //   R7 = address of output word (stores -1/0/1 here)
   998  TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
   999  	CMPBEQ	R3, R5, cmplengths
  1000  	MOVD	R4, R8
  1001  	CMPBLE	R4, R6, amin
  1002  	MOVD	R6, R8
  1003  amin:
  1004  	CMPBEQ	R8, $0, cmplengths
  1005  	CMP	R8, $256
  1006  	BLE	tail
  1007  loop:
  1008  	CLC	$256, 0(R3), 0(R5)
  1009  	BGT	gt
  1010  	BLT	lt
  1011  	SUB	$256, R8
  1012  	CMP	R8, $256
  1013  	BGT	loop
  1014  tail:
  1015  	SUB	$1, R8
  1016  	EXRL	$runtime·cmpbodyclc(SB), R8
  1017  	BGT	gt
  1018  	BLT	lt
  1019  cmplengths:
  1020  	CMP	R4, R6
  1021  	BEQ	eq
  1022  	BLT	lt
  1023  gt:
  1024  	MOVD	$1, 0(R7)
  1025  	RET
  1026  lt:
  1027  	MOVD	$-1, 0(R7)
  1028  	RET
  1029  eq:
  1030  	MOVD	$0, 0(R7)
  1031  	RET
  1032  
  1033  TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
  1034  	CLC	$1, 0(R3), 0(R5)
  1035  	RET
  1036  
  1037  // func supportsVX() bool
  1038  TEXT strings·supportsVX(SB),NOSPLIT,$0-1
  1039  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1040  	MOVB	R0, ret+0(FP)
  1041  	RET
  1042  
  1043  // func supportsVX() bool
  1044  TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
  1045  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1046  	MOVB	R0, ret+0(FP)
  1047  	RET
  1048  
  1049  // func indexShortStr(s, sep string) int
  1050  // Caller must confirm availability of vx facility before calling.
  1051  TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
  1052  	LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
  1053  	LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1054  	MOVD	$ret+32(FP), R5
  1055  	BR	runtime·indexShortStr(SB)
  1056  
  1057  // func indexShortStr(s, sep []byte) int
  1058  // Caller must confirm availability of vx facility before calling.
  1059  TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
  1060  	LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
  1061  	LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1062  	MOVD	$ret+48(FP), R5
  1063  	BR	runtime·indexShortStr(SB)
  1064  
  1065  // s: string we are searching
  1066  // sep: string to search for
  1067  // R1=&s[0], R2=len(s)
  1068  // R3=&sep[0], R4=len(sep)
  1069  // R5=&ret (int)
  1070  // Caller must confirm availability of vx facility before calling.
  1071  TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
  1072  	CMPBGT	R4, R2, notfound
  1073  	ADD	R1, R2
  1074  	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
  1075  	CMPBEQ	R4, $0, notfound
  1076  	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
  1077  	VLL	R4, (R3), V0 // contains first 16 bytes of sep
  1078  	MOVD	R1, R7
  1079  index2plus:
  1080  	CMPBNE	R4, $1, index3plus
  1081  	MOVD	$15(R7), R9
  1082  	CMPBGE	R9, R2, index2to16
  1083  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1084  	VONE	V16
  1085  	VREPH	$0, V0, V1
  1086  	CMPBGE	R9, R2, index2to16
  1087  index2loop:
  1088  	VL	0(R7), V2          // 16 bytes, even indices
  1089  	VL	1(R7), V4          // 16 bytes, odd indices
  1090  	VCEQH	V1, V2, V5         // compare even indices
  1091  	VCEQH	V1, V4, V6         // compare odd indices
  1092  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1093  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1094  	BLT	foundV17
  1095  	MOVD	$16(R7), R7        // R7+=16
  1096  	ADD	$15, R7, R9
  1097  	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
  1098  	CMPBLE	R7, R2, index2to16
  1099  	BR	notfound
  1100  
  1101  index3plus:
  1102  	CMPBNE	R4, $2, index4plus
  1103  	ADD	$15, R7, R9
  1104  	CMPBGE	R9, R2, index2to16
  1105  	MOVD	$1, R0
  1106  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1107  	VONE	V16
  1108  	VREPH	$0, V0, V1
  1109  	VREPB	$2, V0, V8
  1110  index3loop:
  1111  	VL	(R7), V2           // load 16-bytes into V2
  1112  	VLL	R0, 16(R7), V3     // load 2-bytes into V3
  1113  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1114  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
  1115  	VCEQH	V1, V2, V5         // compare 2-byte even indices
  1116  	VCEQH	V1, V4, V6         // compare 2-byte odd indices
  1117  	VCEQB	V8, V9, V10        // compare last bytes
  1118  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1119  	VN	V7, V10, V7        // AND indices with last byte
  1120  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1121  	BLT	foundV17
  1122  	MOVD	$16(R7), R7        // R7+=16
  1123  	ADD	$15, R7, R9
  1124  	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
  1125  	CMPBLE	R7, R2, index2to16
  1126  	BR	notfound
  1127  
  1128  index4plus:
  1129  	CMPBNE	R4, $3, index5plus
  1130  	ADD	$15, R7, R9
  1131  	CMPBGE	R9, R2, index2to16
  1132  	MOVD	$2, R0
  1133  	VGBM	$0x8888, V29       // 0xff000000ff000000...
  1134  	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
  1135  	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
  1136  	VONE	V16
  1137  	VREPF	$0, V0, V1
  1138  index4loop:
  1139  	VL	(R7), V2           // load 16-bytes into V2
  1140  	VLL	R0, 16(R7), V3     // load 3-bytes into V3
  1141  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1142  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
  1143  	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
  1144  	VCEQF	V1, V2, V5         // compare index 0, 4, ...
  1145  	VCEQF	V1, V4, V6         // compare index 1, 5, ...
  1146  	VCEQF	V1, V9, V11        // compare index 2, 6, ...
  1147  	VCEQF	V1, V10, V12       // compare index 3, 7, ...
  1148  	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
  1149  	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
  1150  	VSEL	V13, V14, V31, V7  // final merge
  1151  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1152  	BLT	foundV17
  1153  	MOVD	$16(R7), R7        // R7+=16
  1154  	ADD	$15, R7, R9
  1155  	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
  1156  	CMPBLE	R7, R2, index2to16
  1157  	BR	notfound
  1158  
  1159  index5plus:
  1160  	CMPBGT	R4, $15, index17plus
  1161  index2to16:
  1162  	CMPBGT	R7, R2, notfound
  1163  	MOVD	$1(R7), R8
  1164  	CMPBGT	R8, R2, index2to16tail
  1165  index2to16loop:
  1166  	// unrolled 2x
  1167  	VLL	R4, (R7), V1
  1168  	VLL	R4, 1(R7), V2
  1169  	VCEQGS	V0, V1, V3
  1170  	BEQ	found
  1171  	MOVD	$1(R7), R7
  1172  	VCEQGS	V0, V2, V4
  1173  	BEQ	found
  1174  	MOVD	$1(R7), R7
  1175  	CMPBLT	R7, R2, index2to16loop
  1176  	CMPBGT	R7, R2, notfound
  1177  index2to16tail:
  1178  	VLL	R4, (R7), V1
  1179  	VCEQGS	V0, V1, V2
  1180  	BEQ	found
  1181  	BR	notfound
  1182  
  1183  index17plus:
  1184  	CMPBGT	R4, $31, index33plus
  1185  	SUB	$16, R4, R0
  1186  	VLL	R0, 16(R3), V1
  1187  	VONE	V7
  1188  index17to32loop:
  1189  	VL	(R7), V2
  1190  	VLL	R0, 16(R7), V3
  1191  	VCEQG	V0, V2, V4
  1192  	VCEQG	V1, V3, V5
  1193  	VN	V4, V5, V6
  1194  	VCEQGS	V6, V7, V8
  1195  	BEQ	found
  1196  	MOVD	$1(R7), R7
  1197  	CMPBLE  R7, R2, index17to32loop
  1198  	BR	notfound
  1199  
  1200  index33plus:
  1201  	CMPBGT	R4, $47, index49plus
  1202  	SUB	$32, R4, R0
  1203  	VL	16(R3), V1
  1204  	VLL	R0, 32(R3), V2
  1205  	VONE	V11
  1206  index33to48loop:
  1207  	VL	(R7), V3
  1208  	VL	16(R7), V4
  1209  	VLL	R0, 32(R7), V5
  1210  	VCEQG	V0, V3, V6
  1211  	VCEQG	V1, V4, V7
  1212  	VCEQG	V2, V5, V8
  1213  	VN	V6, V7, V9
  1214  	VN	V8, V9, V10
  1215  	VCEQGS	V10, V11, V12
  1216  	BEQ	found
  1217  	MOVD	$1(R7), R7
  1218  	CMPBLE  R7, R2, index33to48loop
  1219  	BR	notfound
  1220  
  1221  index49plus:
  1222  	CMPBGT	R4, $63, index65plus
  1223  	SUB	$48, R4, R0
  1224  	VL	16(R3), V1
  1225  	VL	32(R3), V2
  1226  	VLL	R0, 48(R3), V3
  1227  	VONE	V15
  1228  index49to64loop:
  1229  	VL	(R7), V4
  1230  	VL	16(R7), V5
  1231  	VL	32(R7), V6
  1232  	VLL	R0, 48(R7), V7
  1233  	VCEQG	V0, V4, V8
  1234  	VCEQG	V1, V5, V9
  1235  	VCEQG	V2, V6, V10
  1236  	VCEQG	V3, V7, V11
  1237  	VN	V8, V9, V12
  1238  	VN	V10, V11, V13
  1239  	VN	V12, V13, V14
  1240  	VCEQGS	V14, V15, V16
  1241  	BEQ	found
  1242  	MOVD	$1(R7), R7
  1243  	CMPBLE  R7, R2, index49to64loop
  1244  notfound:
  1245  	MOVD	$-1, (R5)
  1246  	RET
  1247  
  1248  index65plus:
  1249  	// not implemented
  1250  	MOVD	$0, (R0)
  1251  	RET
  1252  
  1253  foundV17: // index is in doubleword V17[0]
  1254  	VLGVG	$0, V17, R8
  1255  	ADD	R8, R7
  1256  found:
  1257  	SUB	R1, R7
  1258  	MOVD	R7, (R5)
  1259  	RET
  1260  
  1261  // This is called from .init_array and follows the platform, not Go, ABI.
  1262  // We are overly conservative. We could only save the registers we use.
  1263  // However, since this function is only called once per loaded module
  1264  // performance is unimportant.
  1265  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1266  	// Save R6-R15 in the register save area of the calling function.
  1267  	// Don't bother saving F8-F15 as we aren't doing any calls.
  1268  	STMG	R6, R15, 48(R15)
  1269  
  1270  	// append the argument (passed in R2, as per the ELF ABI) to the
  1271  	// moduledata linked list.
  1272  	MOVD	runtime·lastmoduledatap(SB), R1
  1273  	MOVD	R2, moduledata_next(R1)
  1274  	MOVD	R2, runtime·lastmoduledatap(SB)
  1275  
  1276  	// Restore R6-R15.
  1277  	LMG	48(R15), R6, R15
  1278  	RET
  1279  
  1280  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1281  	MOVB	$1, ret+0(FP)
  1282  	RET