github.com/FenixAra/go@v0.0.0-20170127160404-96ea0918e670/src/runtime/asm_s390x.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    12  	// C TLS base pointer in AR0:AR1
    13  
    14  	// initialize essential registers
    15  	XOR	R0, R0
    16  
    17  	SUB	$24, R15
    18  	MOVW	R2, 8(R15) // argc
    19  	MOVD	R3, 16(R15) // argv
    20  
    21  	// create istack out of the given (operating system) stack.
    22  	// _cgo_init may update stackguard.
    23  	MOVD	$runtime·g0(SB), g
    24  	MOVD	R15, R11
    25  	SUB	$(64*1024), R11
    26  	MOVD	R11, g_stackguard0(g)
    27  	MOVD	R11, g_stackguard1(g)
    28  	MOVD	R11, (g_stack+stack_lo)(g)
    29  	MOVD	R15, (g_stack+stack_hi)(g)
    30  
    31  	// if there is a _cgo_init, call it using the gcc ABI.
    32  	MOVD	_cgo_init(SB), R11
    33  	CMPBEQ	R11, $0, nocgo
    34  	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
    35  	SLD	$32, R4, R4
    36  	MOVW	AR1, R4			// arg 2: TLS base pointer
    37  	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
    38  	MOVD	g, R2			// arg 0: G
    39  	// C functions expect 160 bytes of space on caller stack frame
    40  	// and an 8-byte aligned stack pointer
    41  	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
    42  	SUB	$160, R15		// reserve 160 bytes
    43  	MOVD    $~7, R6
    44  	AND 	R6, R15			// 8-byte align
    45  	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
    46  	MOVD	R9, R15			// restore stack
    47  	XOR	R0, R0			// zero R0
    48  
    49  nocgo:
    50  	// update stackguard after _cgo_init
    51  	MOVD	(g_stack+stack_lo)(g), R2
    52  	ADD	$const__StackGuard, R2
    53  	MOVD	R2, g_stackguard0(g)
    54  	MOVD	R2, g_stackguard1(g)
    55  
    56  	// set the per-goroutine and per-mach "registers"
    57  	MOVD	$runtime·m0(SB), R2
    58  
    59  	// save m->g0 = g0
    60  	MOVD	g, m_g0(R2)
    61  	// save m0 to g0->m
    62  	MOVD	R2, g_m(g)
    63  
    64  	BL	runtime·check(SB)
    65  
    66  	// argc/argv are already prepared on stack
    67  	BL	runtime·args(SB)
    68  	BL	runtime·osinit(SB)
    69  	BL	runtime·schedinit(SB)
    70  
    71  	// create a new goroutine to start program
    72  	MOVD	$runtime·mainPC(SB), R2		// entry
    73  	SUB     $24, R15
    74  	MOVD 	R2, 16(R15)
    75  	MOVD 	$0, 8(R15)
    76  	MOVD 	$0, 0(R15)
    77  	BL	runtime·newproc(SB)
    78  	ADD	$24, R15
    79  
    80  	// start this M
    81  	BL	runtime·mstart(SB)
    82  
    83  	MOVD	$0, 1(R0)
    84  	RET
    85  
    86  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    87  GLOBL	runtime·mainPC(SB),RODATA,$8
    88  
    89  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    90  	MOVD	$0, 2(R0)
    91  	RET
    92  
    93  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    94  	RET
    95  
    96  /*
    97   *  go-routine
    98   */
    99  
   100  // void gosave(Gobuf*)
   101  // save state in Gobuf; setjmp
   102  TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   103  	MOVD	buf+0(FP), R3
   104  	MOVD	R15, gobuf_sp(R3)
   105  	MOVD	LR, gobuf_pc(R3)
   106  	MOVD	g, gobuf_g(R3)
   107  	MOVD	$0, gobuf_lr(R3)
   108  	MOVD	$0, gobuf_ret(R3)
   109  	// Assert ctxt is zero. See func save.
   110  	MOVD	gobuf_ctxt(R3), R3
   111  	CMPBEQ	R3, $0, 2(PC)
   112  	BL	runtime·badctxt(SB)
   113  	RET
   114  
   115  // void gogo(Gobuf*)
   116  // restore state from Gobuf; longjmp
   117  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   118  	MOVD	buf+0(FP), R5
   119  
   120  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   121  	MOVD	gobuf_ctxt(R5), R1
   122  	CMPBEQ	R1, $0, nilctxt
   123  	MOVD	$gobuf_ctxt(R5), R1
   124  	MOVD	R1, 8(R15)
   125  	MOVD	R0, 16(R15)
   126  	BL	runtime·writebarrierptr_prewrite(SB)
   127  	MOVD	buf+0(FP), R5
   128  
   129  nilctxt:
   130  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   131  	BL	runtime·save_g(SB)
   132  
   133  	MOVD	0(g), R4
   134  	MOVD	gobuf_sp(R5), R15
   135  	MOVD	gobuf_lr(R5), LR
   136  	MOVD	gobuf_ret(R5), R3
   137  	MOVD	gobuf_ctxt(R5), R12
   138  	MOVD	$0, gobuf_sp(R5)
   139  	MOVD	$0, gobuf_ret(R5)
   140  	MOVD	$0, gobuf_lr(R5)
   141  	MOVD	$0, gobuf_ctxt(R5)
   142  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   143  	MOVD	gobuf_pc(R5), R6
   144  	BR	(R6)
   145  
   146  // void mcall(fn func(*g))
   147  // Switch to m->g0's stack, call fn(g).
   148  // Fn must never return.  It should gogo(&g->sched)
   149  // to keep running g.
   150  TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   151  	// Save caller state in g->sched
   152  	MOVD	R15, (g_sched+gobuf_sp)(g)
   153  	MOVD	LR, (g_sched+gobuf_pc)(g)
   154  	MOVD	$0, (g_sched+gobuf_lr)(g)
   155  	MOVD	g, (g_sched+gobuf_g)(g)
   156  
   157  	// Switch to m->g0 & its stack, call fn.
   158  	MOVD	g, R3
   159  	MOVD	g_m(g), R8
   160  	MOVD	m_g0(R8), g
   161  	BL	runtime·save_g(SB)
   162  	CMP	g, R3
   163  	BNE	2(PC)
   164  	BR	runtime·badmcall(SB)
   165  	MOVD	fn+0(FP), R12			// context
   166  	MOVD	0(R12), R4			// code pointer
   167  	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
   168  	SUB	$16, R15
   169  	MOVD	R3, 8(R15)
   170  	MOVD	$0, 0(R15)
   171  	BL	(R4)
   172  	BR	runtime·badmcall2(SB)
   173  
   174  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   175  // of the G stack.  We need to distinguish the routine that
   176  // lives at the bottom of the G stack from the one that lives
   177  // at the top of the system stack because the one at the top of
   178  // the system stack terminates the stack walk (see topofstack()).
   179  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   180  	UNDEF
   181  	BL	(LR)	// make sure this function is not leaf
   182  	RET
   183  
   184  // func systemstack(fn func())
   185  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   186  	MOVD	fn+0(FP), R3	// R3 = fn
   187  	MOVD	R3, R12		// context
   188  	MOVD	g_m(g), R4	// R4 = m
   189  
   190  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   191  	CMPBEQ	g, R5, noswitch
   192  
   193  	MOVD	m_g0(R4), R5	// R5 = g0
   194  	CMPBEQ	g, R5, noswitch
   195  
   196  	MOVD	m_curg(R4), R6
   197  	CMPBEQ	g, R6, switch
   198  
   199  	// Bad: g is not gsignal, not g0, not curg. What is it?
   200  	// Hide call from linker nosplit analysis.
   201  	MOVD	$runtime·badsystemstack(SB), R3
   202  	BL	(R3)
   203  
   204  switch:
   205  	// save our state in g->sched.  Pretend to
   206  	// be systemstack_switch if the G stack is scanned.
   207  	MOVD	$runtime·systemstack_switch(SB), R6
   208  	ADD	$16, R6	// get past prologue
   209  	MOVD	R6, (g_sched+gobuf_pc)(g)
   210  	MOVD	R15, (g_sched+gobuf_sp)(g)
   211  	MOVD	$0, (g_sched+gobuf_lr)(g)
   212  	MOVD	g, (g_sched+gobuf_g)(g)
   213  
   214  	// switch to g0
   215  	MOVD	R5, g
   216  	BL	runtime·save_g(SB)
   217  	MOVD	(g_sched+gobuf_sp)(g), R3
   218  	// make it look like mstart called systemstack on g0, to stop traceback
   219  	SUB	$8, R3
   220  	MOVD	$runtime·mstart(SB), R4
   221  	MOVD	R4, 0(R3)
   222  	MOVD	R3, R15
   223  
   224  	// call target function
   225  	MOVD	0(R12), R3	// code pointer
   226  	BL	(R3)
   227  
   228  	// switch back to g
   229  	MOVD	g_m(g), R3
   230  	MOVD	m_curg(R3), g
   231  	BL	runtime·save_g(SB)
   232  	MOVD	(g_sched+gobuf_sp)(g), R15
   233  	MOVD	$0, (g_sched+gobuf_sp)(g)
   234  	RET
   235  
   236  noswitch:
   237  	// already on m stack, just call directly
   238  	MOVD	0(R12), R3	// code pointer
   239  	BL	(R3)
   240  	RET
   241  
   242  /*
   243   * support for morestack
   244   */
   245  
   246  // Called during function prolog when more stack is needed.
   247  // Caller has already loaded:
   248  // R3: framesize, R4: argsize, R5: LR
   249  //
   250  // The traceback routines see morestack on a g0 as being
   251  // the top of a stack (for example, morestack calling newstack
   252  // calling the scheduler calling newm calling gc), so we must
   253  // record an argument size. For that purpose, it has no arguments.
   254  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   255  	// Cannot grow scheduler stack (m->g0).
   256  	MOVD	g_m(g), R7
   257  	MOVD	m_g0(R7), R8
   258  	CMPBNE	g, R8, 3(PC)
   259  	BL	runtime·badmorestackg0(SB)
   260  	BL	runtime·abort(SB)
   261  
   262  	// Cannot grow signal stack (m->gsignal).
   263  	MOVD	m_gsignal(R7), R8
   264  	CMP	g, R8
   265  	BNE	3(PC)
   266  	BL	runtime·badmorestackgsignal(SB)
   267  	BL	runtime·abort(SB)
   268  
   269  	// Called from f.
   270  	// Set g->sched to context in f.
   271  	MOVD	R15, (g_sched+gobuf_sp)(g)
   272  	MOVD	LR, R8
   273  	MOVD	R8, (g_sched+gobuf_pc)(g)
   274  	MOVD	R5, (g_sched+gobuf_lr)(g)
   275  	// newstack will fill gobuf.ctxt.
   276  
   277  	// Called from f.
   278  	// Set m->morebuf to f's caller.
   279  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   280  	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   281  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   282  
   283  	// Call newstack on m->g0's stack.
   284  	MOVD	m_g0(R7), g
   285  	BL	runtime·save_g(SB)
   286  	MOVD	(g_sched+gobuf_sp)(g), R15
   287  	// Create a stack frame on g0 to call newstack.
   288  	MOVD	$0, -16(R15)	// Zero saved LR in frame
   289  	SUB	$16, R15
   290  	MOVD	R12, 8(R15)	// ctxt argument
   291  	BL	runtime·newstack(SB)
   292  
   293  	// Not reached, but make sure the return PC from the call to newstack
   294  	// is still in this function, and not the beginning of the next.
   295  	UNDEF
   296  
   297  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   298  	MOVD	$0, R12
   299  	BR	runtime·morestack(SB)
   300  
   301  TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   302  	// We came here via a RET to an overwritten LR.
   303  	// R3 may be live. Other registers are available.
   304  
   305  	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   306  	MOVD	(g_stkbar+slice_array)(g), R4
   307  	MOVD	g_stkbarPos(g), R5
   308  	MOVD	$stkbar__size, R6
   309  	MULLD	R5, R6
   310  	ADD	R4, R6
   311  	MOVD	stkbar_savedLRVal(R6), R6
   312  	// Record that this stack barrier was hit.
   313  	ADD	$1, R5
   314  	MOVD	R5, g_stkbarPos(g)
   315  	// Jump to the original return PC.
   316  	BR	(R6)
   317  
   318  // reflectcall: call a function with the given argument list
   319  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   320  // we don't have variable-sized frames, so we use a small number
   321  // of constant-sized-frame functions to encode a few bits of size in the pc.
   322  // Caution: ugly multiline assembly macros in your future!
   323  
   324  #define DISPATCH(NAME,MAXSIZE)		\
   325  	MOVD	$MAXSIZE, R4;		\
   326  	CMP	R3, R4;		\
   327  	BGT	3(PC);			\
   328  	MOVD	$NAME(SB), R5;	\
   329  	BR	(R5)
   330  // Note: can't just "BR NAME(SB)" - bad inlining results.
   331  
   332  TEXT reflect·call(SB), NOSPLIT, $0-0
   333  	BR	·reflectcall(SB)
   334  
   335  TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   336  	MOVWZ argsize+24(FP), R3
   337  	DISPATCH(runtime·call32, 32)
   338  	DISPATCH(runtime·call64, 64)
   339  	DISPATCH(runtime·call128, 128)
   340  	DISPATCH(runtime·call256, 256)
   341  	DISPATCH(runtime·call512, 512)
   342  	DISPATCH(runtime·call1024, 1024)
   343  	DISPATCH(runtime·call2048, 2048)
   344  	DISPATCH(runtime·call4096, 4096)
   345  	DISPATCH(runtime·call8192, 8192)
   346  	DISPATCH(runtime·call16384, 16384)
   347  	DISPATCH(runtime·call32768, 32768)
   348  	DISPATCH(runtime·call65536, 65536)
   349  	DISPATCH(runtime·call131072, 131072)
   350  	DISPATCH(runtime·call262144, 262144)
   351  	DISPATCH(runtime·call524288, 524288)
   352  	DISPATCH(runtime·call1048576, 1048576)
   353  	DISPATCH(runtime·call2097152, 2097152)
   354  	DISPATCH(runtime·call4194304, 4194304)
   355  	DISPATCH(runtime·call8388608, 8388608)
   356  	DISPATCH(runtime·call16777216, 16777216)
   357  	DISPATCH(runtime·call33554432, 33554432)
   358  	DISPATCH(runtime·call67108864, 67108864)
   359  	DISPATCH(runtime·call134217728, 134217728)
   360  	DISPATCH(runtime·call268435456, 268435456)
   361  	DISPATCH(runtime·call536870912, 536870912)
   362  	DISPATCH(runtime·call1073741824, 1073741824)
   363  	MOVD	$runtime·badreflectcall(SB), R5
   364  	BR	(R5)
   365  
   366  #define CALLFN(NAME,MAXSIZE)			\
   367  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   368  	NO_LOCAL_POINTERS;			\
   369  	/* copy arguments to stack */		\
   370  	MOVD	arg+16(FP), R4;			\
   371  	MOVWZ	argsize+24(FP), R5;		\
   372  	MOVD	$stack-MAXSIZE(SP), R6;		\
   373  loopArgs: /* copy 256 bytes at a time */	\
   374  	CMP	R5, $256;			\
   375  	BLT	tailArgs;			\
   376  	SUB	$256, R5;			\
   377  	MVC	$256, 0(R4), 0(R6);		\
   378  	MOVD	$256(R4), R4;			\
   379  	MOVD	$256(R6), R6;			\
   380  	BR	loopArgs;			\
   381  tailArgs: /* copy remaining bytes */		\
   382  	CMP	R5, $0;				\
   383  	BEQ	callFunction;			\
   384  	SUB	$1, R5;				\
   385  	EXRL	$callfnMVC<>(SB), R5;		\
   386  callFunction:					\
   387  	MOVD	f+8(FP), R12;			\
   388  	MOVD	(R12), R8;			\
   389  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   390  	BL	(R8);				\
   391  	/* copy return values back */		\
   392  	MOVD	argtype+0(FP), R7;		\
   393  	MOVD	arg+16(FP), R6;			\
   394  	MOVWZ	n+24(FP), R5;			\
   395  	MOVD	$stack-MAXSIZE(SP), R4;		\
   396  	MOVWZ	retoffset+28(FP), R1;		\
   397  	ADD	R1, R4;				\
   398  	ADD	R1, R6;				\
   399  	SUB	R1, R5;				\
   400  	BL	callRet<>(SB);			\
   401  	RET
   402  
   403  // callRet copies return values back at the end of call*. This is a
   404  // separate function so it can allocate stack space for the arguments
   405  // to reflectcallmove. It does not follow the Go ABI; it expects its
   406  // arguments in registers.
   407  TEXT callRet<>(SB), NOSPLIT, $32-0
   408  	MOVD	R7, 8(R15)
   409  	MOVD	R6, 16(R15)
   410  	MOVD	R4, 24(R15)
   411  	MOVD	R5, 32(R15)
   412  	BL	runtime·reflectcallmove(SB)
   413  	RET
   414  
   415  CALLFN(·call32, 32)
   416  CALLFN(·call64, 64)
   417  CALLFN(·call128, 128)
   418  CALLFN(·call256, 256)
   419  CALLFN(·call512, 512)
   420  CALLFN(·call1024, 1024)
   421  CALLFN(·call2048, 2048)
   422  CALLFN(·call4096, 4096)
   423  CALLFN(·call8192, 8192)
   424  CALLFN(·call16384, 16384)
   425  CALLFN(·call32768, 32768)
   426  CALLFN(·call65536, 65536)
   427  CALLFN(·call131072, 131072)
   428  CALLFN(·call262144, 262144)
   429  CALLFN(·call524288, 524288)
   430  CALLFN(·call1048576, 1048576)
   431  CALLFN(·call2097152, 2097152)
   432  CALLFN(·call4194304, 4194304)
   433  CALLFN(·call8388608, 8388608)
   434  CALLFN(·call16777216, 16777216)
   435  CALLFN(·call33554432, 33554432)
   436  CALLFN(·call67108864, 67108864)
   437  CALLFN(·call134217728, 134217728)
   438  CALLFN(·call268435456, 268435456)
   439  CALLFN(·call536870912, 536870912)
   440  CALLFN(·call1073741824, 1073741824)
   441  
   442  // Not a function: target for EXRL (execute relative long) instruction.
   443  TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
   444  	MVC	$1, 0(R4), 0(R6)
   445  
   446  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   447  	RET
   448  
   449  // void jmpdefer(fv, sp);
   450  // called from deferreturn.
   451  // 1. grab stored LR for caller
   452  // 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
   453  // 3. BR to fn
   454  TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
   455  	MOVD	0(R15), R1
   456  	SUB	$6, R1, LR
   457  
   458  	MOVD	fv+0(FP), R12
   459  	MOVD	argp+8(FP), R15
   460  	SUB	$8, R15
   461  	MOVD	0(R12), R3
   462  	BR	(R3)
   463  
   464  // Save state of caller into g->sched. Smashes R1.
   465  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   466  	MOVD	LR, (g_sched+gobuf_pc)(g)
   467  	MOVD	R15, (g_sched+gobuf_sp)(g)
   468  	MOVD	$0, (g_sched+gobuf_lr)(g)
   469  	MOVD	$0, (g_sched+gobuf_ret)(g)
   470  	// Assert ctxt is zero. See func save.
   471  	MOVD	(g_sched+gobuf_ctxt)(g), R1
   472  	CMPBEQ	R1, $0, 2(PC)
   473  	BL	runtime·badctxt(SB)
   474  	RET
   475  
   476  // func asmcgocall(fn, arg unsafe.Pointer) int32
   477  // Call fn(arg) on the scheduler stack,
   478  // aligned appropriately for the gcc ABI.
   479  // See cgocall.go for more details.
   480  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   481  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
   482  	// C TLS base pointer in AR0:AR1
   483  	MOVD	fn+0(FP), R3
   484  	MOVD	arg+8(FP), R4
   485  
   486  	MOVD	R15, R2		// save original stack pointer
   487  	MOVD	g, R5
   488  
   489  	// Figure out if we need to switch to m->g0 stack.
   490  	// We get called to create new OS threads too, and those
   491  	// come in on the m->g0 stack already.
   492  	MOVD	g_m(g), R6
   493  	MOVD	m_g0(R6), R6
   494  	CMPBEQ	R6, g, g0
   495  	BL	gosave<>(SB)
   496  	MOVD	R6, g
   497  	BL	runtime·save_g(SB)
   498  	MOVD	(g_sched+gobuf_sp)(g), R15
   499  
   500  	// Now on a scheduling stack (a pthread-created stack).
   501  g0:
   502  	// Save room for two of our pointers, plus 160 bytes of callee
   503  	// save area that lives on the caller stack.
   504  	SUB	$176, R15
   505  	MOVD	$~7, R6
   506  	AND	R6, R15                 // 8-byte alignment for gcc ABI
   507  	MOVD	R5, 168(R15)             // save old g on stack
   508  	MOVD	(g_stack+stack_hi)(R5), R5
   509  	SUB	R2, R5
   510  	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   511  	MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
   512  	MOVD	R4, R2                  // arg in R2
   513  	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
   514  
   515  	XOR	R0, R0                  // set R0 back to 0.
   516  	// Restore g, stack pointer.
   517  	MOVD	168(R15), g
   518  	BL	runtime·save_g(SB)
   519  	MOVD	(g_stack+stack_hi)(g), R5
   520  	MOVD	160(R15), R6
   521  	SUB	R6, R5
   522  	MOVD	R5, R15
   523  
   524  	MOVW	R2, ret+16(FP)
   525  	RET
   526  
   527  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   528  // Turn the fn into a Go func (by taking its address) and call
   529  // cgocallback_gofunc.
   530  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   531  	MOVD	$fn+0(FP), R3
   532  	MOVD	R3, 8(R15)
   533  	MOVD	frame+8(FP), R3
   534  	MOVD	R3, 16(R15)
   535  	MOVD	framesize+16(FP), R3
   536  	MOVD	R3, 24(R15)
   537  	MOVD	ctxt+24(FP), R3
   538  	MOVD	R3, 32(R15)
   539  	MOVD	$runtime·cgocallback_gofunc(SB), R3
   540  	BL	(R3)
   541  	RET
   542  
   543  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   544  // See cgocall.go for more details.
   545  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   546  	NO_LOCAL_POINTERS
   547  
   548  	// Load m and g from thread-local storage.
   549  	MOVB	runtime·iscgo(SB), R3
   550  	CMPBEQ	R3, $0, nocgo
   551  	BL	runtime·load_g(SB)
   552  
   553  nocgo:
   554  	// If g is nil, Go did not create the current thread.
   555  	// Call needm to obtain one for temporary use.
   556  	// In this case, we're running on the thread stack, so there's
   557  	// lots of space, but the linker doesn't know. Hide the call from
   558  	// the linker analysis by using an indirect call.
   559  	CMPBEQ	g, $0, needm
   560  
   561  	MOVD	g_m(g), R8
   562  	MOVD	R8, savedm-8(SP)
   563  	BR	havem
   564  
   565  needm:
   566  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   567  	MOVD	$runtime·needm(SB), R3
   568  	BL	(R3)
   569  
   570  	// Set m->sched.sp = SP, so that if a panic happens
   571  	// during the function we are about to execute, it will
   572  	// have a valid SP to run on the g0 stack.
   573  	// The next few lines (after the havem label)
   574  	// will save this SP onto the stack and then write
   575  	// the same SP back to m->sched.sp. That seems redundant,
   576  	// but if an unrecovered panic happens, unwindm will
   577  	// restore the g->sched.sp from the stack location
   578  	// and then systemstack will try to use it. If we don't set it here,
   579  	// that restored SP will be uninitialized (typically 0) and
   580  	// will not be usable.
   581  	MOVD	g_m(g), R8
   582  	MOVD	m_g0(R8), R3
   583  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   584  
   585  havem:
   586  	// Now there's a valid m, and we're running on its m->g0.
   587  	// Save current m->g0->sched.sp on stack and then set it to SP.
   588  	// Save current sp in m->g0->sched.sp in preparation for
   589  	// switch back to m->curg stack.
   590  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   591  	MOVD	m_g0(R8), R3
   592  	MOVD	(g_sched+gobuf_sp)(R3), R4
   593  	MOVD	R4, savedsp-16(SP)
   594  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   595  
   596  	// Switch to m->curg stack and call runtime.cgocallbackg.
   597  	// Because we are taking over the execution of m->curg
   598  	// but *not* resuming what had been running, we need to
   599  	// save that information (m->curg->sched) so we can restore it.
   600  	// We can restore m->curg->sched.sp easily, because calling
   601  	// runtime.cgocallbackg leaves SP unchanged upon return.
   602  	// To save m->curg->sched.pc, we push it onto the stack.
   603  	// This has the added benefit that it looks to the traceback
   604  	// routine like cgocallbackg is going to return to that
   605  	// PC (because the frame we allocate below has the same
   606  	// size as cgocallback_gofunc's frame declared above)
   607  	// so that the traceback will seamlessly trace back into
   608  	// the earlier calls.
   609  	//
   610  	// In the new goroutine, -8(SP) is unused (where SP refers to
   611  	// m->curg's SP while we're setting it up, before we've adjusted it).
   612  	MOVD	m_curg(R8), g
   613  	BL	runtime·save_g(SB)
   614  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   615  	MOVD	(g_sched+gobuf_pc)(g), R5
   616  	MOVD	R5, -24(R4)
   617  	MOVD	ctxt+24(FP), R5
   618  	MOVD	R5, -16(R4)
   619  	MOVD	$-24(R4), R15
   620  	BL	runtime·cgocallbackg(SB)
   621  
   622  	// Restore g->sched (== m->curg->sched) from saved values.
   623  	MOVD	0(R15), R5
   624  	MOVD	R5, (g_sched+gobuf_pc)(g)
   625  	MOVD	$24(R15), R4
   626  	MOVD	R4, (g_sched+gobuf_sp)(g)
   627  
   628  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   629  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   630  	// so we do not have to restore it.)
   631  	MOVD	g_m(g), R8
   632  	MOVD	m_g0(R8), g
   633  	BL	runtime·save_g(SB)
   634  	MOVD	(g_sched+gobuf_sp)(g), R15
   635  	MOVD	savedsp-16(SP), R4
   636  	MOVD	R4, (g_sched+gobuf_sp)(g)
   637  
   638  	// If the m on entry was nil, we called needm above to borrow an m
   639  	// for the duration of the call. Since the call is over, return it with dropm.
   640  	MOVD	savedm-8(SP), R6
   641  	CMPBNE	R6, $0, droppedm
   642  	MOVD	$runtime·dropm(SB), R3
   643  	BL	(R3)
   644  droppedm:
   645  
   646  	// Done!
   647  	RET
   648  
   649  // void setg(G*); set g. for use by needm.
   650  TEXT runtime·setg(SB), NOSPLIT, $0-8
   651  	MOVD	gg+0(FP), g
   652  	// This only happens if iscgo, so jump straight to save_g
   653  	BL	runtime·save_g(SB)
   654  	RET
   655  
   656  // void setg_gcc(G*); set g in C TLS.
   657  // Must obey the gcc calling convention.
   658  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   659  	// The standard prologue clobbers LR (R14), which is callee-save in
   660  	// the C ABI, so we have to use NOFRAME and save LR ourselves.
   661  	MOVD	LR, R1
   662  	// Also save g, R10, and R11 since they're callee-save in C ABI
   663  	MOVD	R10, R3
   664  	MOVD	g, R4
   665  	MOVD	R11, R5
   666  
   667  	MOVD	R2, g
   668  	BL	runtime·save_g(SB)
   669  
   670  	MOVD	R5, R11
   671  	MOVD	R4, g
   672  	MOVD	R3, R10
   673  	MOVD	R1, LR
   674  	RET
   675  
   676  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
   677  	MOVD	16(R15), R3		// LR saved by caller
   678  	MOVD	runtime·stackBarrierPC(SB), R4
   679  	CMPBNE	R3, R4, nobar
   680  	// Get original return PC.
   681  	BL	runtime·nextBarrierPC(SB)
   682  	MOVD	8(R15), R3
   683  nobar:
   684  	MOVD	R3, ret+8(FP)
   685  	RET
   686  
   687  TEXT runtime·setcallerpc(SB),NOSPLIT,$8-16
   688  	MOVD	pc+8(FP), R3
   689  	MOVD	16(R15), R4
   690  	MOVD	runtime·stackBarrierPC(SB), R5
   691  	CMPBEQ	R4, R5, setbar
   692  	MOVD	R3, 16(R15)		// set LR in caller
   693  	RET
   694  setbar:
   695  	// Set the stack barrier return PC.
   696  	MOVD	R3, 8(R15)
   697  	BL	runtime·setNextBarrierPC(SB)
   698  	RET
   699  
   700  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   701  	MOVW	(R0), R0
   702  	UNDEF
   703  
   704  // int64 runtime·cputicks(void)
   705  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   706  	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
   707  	// This means that since about 1972 the msb has been set, making the
   708  	// result of a call to STORE CLOCK (stck) a negative number.
   709  	// We clear the msb to make it positive.
   710  	STCK	ret+0(FP)      // serialises before and after call
   711  	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
   712  	SLD	$1, R3
   713  	SRD	$1, R3
   714  	MOVD	R3, ret+0(FP)
   715  	RET
   716  
   717  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   718  // redirects to memhash(p, h, size) using the size
   719  // stored in the closure.
   720  TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
   721  	GO_ARGS
   722  	NO_LOCAL_POINTERS
   723  	MOVD	p+0(FP), R3
   724  	MOVD	h+8(FP), R4
   725  	MOVD	8(R12), R5
   726  	MOVD	R3, 8(R15)
   727  	MOVD	R4, 16(R15)
   728  	MOVD	R5, 24(R15)
   729  	BL	runtime·memhash(SB)
   730  	MOVD	32(R15), R3
   731  	MOVD	R3, ret+16(FP)
   732  	RET
   733  
   734  // AES hashing not implemented for s390x
   735  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   736  	MOVW	(R0), R15
   737  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   738  	MOVW	(R0), R15
   739  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   740  	MOVW	(R0), R15
   741  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   742  	MOVW	(R0), R15
   743  
   744  // memequal(a, b unsafe.Pointer, size uintptr) bool
   745  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
   746  	MOVD	a+0(FP), R3
   747  	MOVD	b+8(FP), R5
   748  	MOVD	size+16(FP), R6
   749  	LA	ret+24(FP), R7
   750  	BR	runtime·memeqbody(SB)
   751  
   752  // memequal_varlen(a, b unsafe.Pointer) bool
   753  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
   754  	MOVD	a+0(FP), R3
   755  	MOVD	b+8(FP), R5
   756  	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
   757  	LA	ret+16(FP), R7
   758  	BR	runtime·memeqbody(SB)
   759  
   760  // eqstring tests whether two strings are equal.
   761  // The compiler guarantees that strings passed
   762  // to eqstring have equal length.
   763  // See runtime_test.go:eqstring_generic for
   764  // equivalent Go code.
   765  TEXT runtime·eqstring(SB),NOSPLIT|NOFRAME,$0-33
   766  	MOVD	s1_base+0(FP), R3
   767  	MOVD	s1_len+8(FP), R6
   768  	MOVD	s2_base+16(FP), R5
   769  	LA	ret+32(FP), R7
   770  	BR	runtime·memeqbody(SB)
   771  
   772  TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
   773  	MOVD	a_len+8(FP), R2
   774  	MOVD	b_len+32(FP), R6
   775  	MOVD	a+0(FP), R3
   776  	MOVD	b+24(FP), R5
   777  	LA	ret+48(FP), R7
   778  	CMPBNE	R2, R6, notequal
   779  	BR	runtime·memeqbody(SB)
   780  notequal:
   781  	MOVB	$0, ret+48(FP)
   782  	RET
   783  
   784  // input:
   785  //   R3 = a
   786  //   R5 = b
   787  //   R6 = len
   788  //   R7 = address of output byte (stores 0 or 1 here)
   789  //   a and b have the same length
   790  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   791  	CMPBEQ	R3, R5, equal
   792  loop:
   793  	CMPBEQ	R6, $0, equal
   794  	CMPBLT	R6, $32, tiny
   795  	CMP	R6, $256
   796  	BLT	tail
   797  	CLC	$256, 0(R3), 0(R5)
   798  	BNE	notequal
   799  	SUB	$256, R6
   800  	LA	256(R3), R3
   801  	LA	256(R5), R5
   802  	BR	loop
   803  tail:
   804  	SUB	$1, R6, R8
   805  	EXRL	$runtime·memeqbodyclc(SB), R8
   806  	BEQ	equal
   807  notequal:
   808  	MOVB	$0, 0(R7)
   809  	RET
   810  equal:
   811  	MOVB	$1, 0(R7)
   812  	RET
   813  tiny:
   814  	MOVD	$0, R2
   815  	CMPBLT	R6, $16, lt16
   816  	MOVD	0(R3), R8
   817  	MOVD	0(R5), R9
   818  	CMPBNE	R8, R9, notequal
   819  	MOVD	8(R3), R8
   820  	MOVD	8(R5), R9
   821  	CMPBNE	R8, R9, notequal
   822  	LA	16(R2), R2
   823  	SUB	$16, R6
   824  lt16:
   825  	CMPBLT	R6, $8, lt8
   826  	MOVD	0(R3)(R2*1), R8
   827  	MOVD	0(R5)(R2*1), R9
   828  	CMPBNE	R8, R9, notequal
   829  	LA	8(R2), R2
   830  	SUB	$8, R6
   831  lt8:
   832  	CMPBLT	R6, $4, lt4
   833  	MOVWZ	0(R3)(R2*1), R8
   834  	MOVWZ	0(R5)(R2*1), R9
   835  	CMPBNE	R8, R9, notequal
   836  	LA	4(R2), R2
   837  	SUB	$4, R6
   838  lt4:
   839  #define CHECK(n) \
   840  	CMPBEQ	R6, $n, equal \
   841  	MOVB	n(R3)(R2*1), R8 \
   842  	MOVB	n(R5)(R2*1), R9 \
   843  	CMPBNE	R8, R9, notequal
   844  	CHECK(0)
   845  	CHECK(1)
   846  	CHECK(2)
   847  	CHECK(3)
   848  	BR	equal
   849  
   850  TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   851  	CLC	$1, 0(R3), 0(R5)
   852  	RET
   853  
   854  TEXT runtime·fastrand(SB), NOSPLIT, $0-4
   855  	MOVD	g_m(g), R4
   856  	MOVWZ	m_fastrand(R4), R3
   857  	ADD	R3, R3
   858  	CMPW	R3, $0
   859  	BGE	2(PC)
   860  	XOR	$0x88888eef, R3
   861  	MOVW	R3, m_fastrand(R4)
   862  	MOVW	R3, ret+0(FP)
   863  	RET
   864  
   865  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
   866  	MOVD	s+0(FP), R3     // s => R3
   867  	MOVD	s_len+8(FP), R4 // s_len => R4
   868  	MOVBZ	c+24(FP), R5    // c => R5
   869  	MOVD	$ret+32(FP), R2 // &ret => R9
   870  	BR	runtime·indexbytebody(SB)
   871  
   872  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
   873  	MOVD	s+0(FP), R3     // s => R3
   874  	MOVD	s_len+8(FP), R4 // s_len => R4
   875  	MOVBZ	c+16(FP), R5    // c => R5
   876  	MOVD	$ret+24(FP), R2 // &ret => R9
   877  	BR	runtime·indexbytebody(SB)
   878  
   879  // input:
   880  // R3: s
   881  // R4: s_len
   882  // R5: c -- byte sought
   883  // R2: &ret -- address to put index into
   884  TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0
   885  	CMPBEQ	R4, $0, notfound
   886  	MOVD	R3, R6          // store base for later
   887  	ADD	R3, R4, R8      // the address after the end of the string
   888  	//if the length is small, use loop; otherwise, use vector or srst search
   889  	CMPBGE	R4, $16, large
   890  
   891  residual:
   892  	CMPBEQ	R3, R8, notfound
   893  	MOVBZ	0(R3), R7
   894  	LA	1(R3), R3
   895  	CMPBNE	R7, R5, residual
   896  
   897  found:
   898  	SUB	R6, R3
   899  	SUB	$1, R3
   900  	MOVD	R3, 0(R2)
   901  	RET
   902  
   903  notfound:
   904  	MOVD	$-1, 0(R2)
   905  	RET
   906  
   907  large:
   908  	MOVBZ	·cpu+facilities_hasVX(SB), R1
   909  	CMPBNE	R1, $0, vectorimpl
   910  
   911  srstimpl:                       // no vector facility
   912  	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
   913  srstloop:
   914  	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
   915  	BVS	srstloop        // interrupted - continue
   916  	BGT	notfoundr0
   917  foundr0:
   918  	XOR	R0, R0          // reset R0
   919  	SUB	R6, R8          // remove base
   920  	MOVD	R8, 0(R2)
   921  	RET
   922  notfoundr0:
   923  	XOR	R0, R0          // reset R0
   924  	MOVD	$-1, 0(R2)
   925  	RET
   926  
   927  vectorimpl:
   928  	//if the address is not 16byte aligned, use loop for the header
   929  	MOVD	R3, R8
   930  	AND	$15, R8
   931  	CMPBGT	R8, $0, notaligned
   932  
   933  aligned:
   934  	ADD	R6, R4, R8
   935  	MOVD	R8, R7
   936  	AND	$-16, R7
   937  	// replicate c across V17
   938  	VLVGB	$0, R5, V19
   939  	VREPB	$0, V19, V17
   940  
   941  vectorloop:
   942  	CMPBGE	R3, R7, residual
   943  	VL	0(R3), V16    // load string to be searched into V16
   944  	ADD	$16, R3
   945  	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   946  	BVS	vectorloop
   947  
   948  	// when vector search found c in the string
   949  	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   950  	SUB	$16, R3
   951  	SUB	R6, R3
   952  	ADD	R3, R7
   953  	MOVD	R7, 0(R2)
   954  	RET
   955  
   956  notaligned:
   957  	MOVD	R3, R8
   958  	AND	$-16, R8
   959  	ADD     $16, R8
   960  notalignedloop:
   961  	CMPBEQ	R3, R8, aligned
   962  	MOVBZ	0(R3), R7
   963  	LA	1(R3), R3
   964  	CMPBNE	R7, R5, notalignedloop
   965  	BR	found
   966  
   967  TEXT runtime·return0(SB), NOSPLIT, $0
   968  	MOVW	$0, R3
   969  	RET
   970  
   971  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   972  // Must obey the gcc calling convention.
   973  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
   974  	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
   975  	MOVD	g, R1
   976  	MOVD	R10, R3
   977  	MOVD	LR, R4
   978  	MOVD	R11, R5
   979  
   980  	BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
   981  	MOVD	g_m(g), R2
   982  	MOVD	m_curg(R2), R2
   983  	MOVD	(g_stack+stack_hi)(R2), R2
   984  
   985  	MOVD	R1, g
   986  	MOVD	R3, R10
   987  	MOVD	R4, LR
   988  	MOVD	R5, R11
   989  	RET
   990  
   991  // The top-most function running on a goroutine
   992  // returns to goexit+PCQuantum.
   993  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
   994  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   995  	BL	runtime·goexit1(SB)	// does not return
   996  	// traceback from goexit1 must hit code range of goexit
   997  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   998  
   999  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
  1000  	RET
  1001  
  1002  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
  1003  	RET
  1004  
  1005  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
  1006  	RET
  1007  
  1008  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
  1009  	RET
  1010  
  1011  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
  1012  	RET
  1013  
  1014  TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
  1015  	SYNC
  1016  	RET
  1017  
  1018  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1019  	MOVD	s1_base+0(FP), R3
  1020  	MOVD	s1_len+8(FP), R4
  1021  	MOVD	s2_base+16(FP), R5
  1022  	MOVD	s2_len+24(FP), R6
  1023  	LA	ret+32(FP), R7
  1024  	BR	runtime·cmpbody(SB)
  1025  
  1026  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1027  	MOVD	s1+0(FP), R3
  1028  	MOVD	s1+8(FP), R4
  1029  	MOVD	s2+24(FP), R5
  1030  	MOVD	s2+32(FP), R6
  1031  	LA	res+48(FP), R7
  1032  	BR	runtime·cmpbody(SB)
  1033  
  1034  // input:
  1035  //   R3 = a
  1036  //   R4 = alen
  1037  //   R5 = b
  1038  //   R6 = blen
  1039  //   R7 = address of output word (stores -1/0/1 here)
  1040  TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
  1041  	CMPBEQ	R3, R5, cmplengths
  1042  	MOVD	R4, R8
  1043  	CMPBLE	R4, R6, amin
  1044  	MOVD	R6, R8
  1045  amin:
  1046  	CMPBEQ	R8, $0, cmplengths
  1047  	CMP	R8, $256
  1048  	BLE	tail
  1049  loop:
  1050  	CLC	$256, 0(R3), 0(R5)
  1051  	BGT	gt
  1052  	BLT	lt
  1053  	SUB	$256, R8
  1054  	CMP	R8, $256
  1055  	BGT	loop
  1056  tail:
  1057  	SUB	$1, R8
  1058  	EXRL	$runtime·cmpbodyclc(SB), R8
  1059  	BGT	gt
  1060  	BLT	lt
  1061  cmplengths:
  1062  	CMP	R4, R6
  1063  	BEQ	eq
  1064  	BLT	lt
  1065  gt:
  1066  	MOVD	$1, 0(R7)
  1067  	RET
  1068  lt:
  1069  	MOVD	$-1, 0(R7)
  1070  	RET
  1071  eq:
  1072  	MOVD	$0, 0(R7)
  1073  	RET
  1074  
  1075  TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
  1076  	CLC	$1, 0(R3), 0(R5)
  1077  	RET
  1078  
  1079  // func supportsVX() bool
  1080  TEXT strings·supportsVX(SB),NOSPLIT,$0-1
  1081  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1082  	MOVB	R0, ret+0(FP)
  1083  	RET
  1084  
  1085  // func supportsVX() bool
  1086  TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
  1087  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1088  	MOVB	R0, ret+0(FP)
  1089  	RET
  1090  
  1091  // func indexShortStr(s, sep string) int
  1092  // Caller must confirm availability of vx facility before calling.
  1093  TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
  1094  	LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
  1095  	LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1096  	MOVD	$ret+32(FP), R5
  1097  	BR	runtime·indexShortStr(SB)
  1098  
  1099  // func indexShortStr(s, sep []byte) int
  1100  // Caller must confirm availability of vx facility before calling.
  1101  TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
  1102  	LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
  1103  	LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1104  	MOVD	$ret+48(FP), R5
  1105  	BR	runtime·indexShortStr(SB)
  1106  
  1107  // s: string we are searching
  1108  // sep: string to search for
  1109  // R1=&s[0], R2=len(s)
  1110  // R3=&sep[0], R4=len(sep)
  1111  // R5=&ret (int)
  1112  // Caller must confirm availability of vx facility before calling.
  1113  TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
  1114  	CMPBGT	R4, R2, notfound
  1115  	ADD	R1, R2
  1116  	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
  1117  	CMPBEQ	R4, $0, notfound
  1118  	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
  1119  	VLL	R4, (R3), V0 // contains first 16 bytes of sep
  1120  	MOVD	R1, R7
  1121  index2plus:
  1122  	CMPBNE	R4, $1, index3plus
  1123  	MOVD	$15(R7), R9
  1124  	CMPBGE	R9, R2, index2to16
  1125  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1126  	VONE	V16
  1127  	VREPH	$0, V0, V1
  1128  	CMPBGE	R9, R2, index2to16
  1129  index2loop:
  1130  	VL	0(R7), V2          // 16 bytes, even indices
  1131  	VL	1(R7), V4          // 16 bytes, odd indices
  1132  	VCEQH	V1, V2, V5         // compare even indices
  1133  	VCEQH	V1, V4, V6         // compare odd indices
  1134  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1135  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1136  	BLT	foundV17
  1137  	MOVD	$16(R7), R7        // R7+=16
  1138  	ADD	$15, R7, R9
  1139  	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
  1140  	CMPBLE	R7, R2, index2to16
  1141  	BR	notfound
  1142  
  1143  index3plus:
  1144  	CMPBNE	R4, $2, index4plus
  1145  	ADD	$15, R7, R9
  1146  	CMPBGE	R9, R2, index2to16
  1147  	MOVD	$1, R0
  1148  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1149  	VONE	V16
  1150  	VREPH	$0, V0, V1
  1151  	VREPB	$2, V0, V8
  1152  index3loop:
  1153  	VL	(R7), V2           // load 16-bytes into V2
  1154  	VLL	R0, 16(R7), V3     // load 2-bytes into V3
  1155  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1156  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
  1157  	VCEQH	V1, V2, V5         // compare 2-byte even indices
  1158  	VCEQH	V1, V4, V6         // compare 2-byte odd indices
  1159  	VCEQB	V8, V9, V10        // compare last bytes
  1160  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1161  	VN	V7, V10, V7        // AND indices with last byte
  1162  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1163  	BLT	foundV17
  1164  	MOVD	$16(R7), R7        // R7+=16
  1165  	ADD	$15, R7, R9
  1166  	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
  1167  	CMPBLE	R7, R2, index2to16
  1168  	BR	notfound
  1169  
  1170  index4plus:
  1171  	CMPBNE	R4, $3, index5plus
  1172  	ADD	$15, R7, R9
  1173  	CMPBGE	R9, R2, index2to16
  1174  	MOVD	$2, R0
  1175  	VGBM	$0x8888, V29       // 0xff000000ff000000...
  1176  	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
  1177  	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
  1178  	VONE	V16
  1179  	VREPF	$0, V0, V1
  1180  index4loop:
  1181  	VL	(R7), V2           // load 16-bytes into V2
  1182  	VLL	R0, 16(R7), V3     // load 3-bytes into V3
  1183  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1184  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
  1185  	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
  1186  	VCEQF	V1, V2, V5         // compare index 0, 4, ...
  1187  	VCEQF	V1, V4, V6         // compare index 1, 5, ...
  1188  	VCEQF	V1, V9, V11        // compare index 2, 6, ...
  1189  	VCEQF	V1, V10, V12       // compare index 3, 7, ...
  1190  	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
  1191  	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
  1192  	VSEL	V13, V14, V31, V7  // final merge
  1193  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1194  	BLT	foundV17
  1195  	MOVD	$16(R7), R7        // R7+=16
  1196  	ADD	$15, R7, R9
  1197  	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
  1198  	CMPBLE	R7, R2, index2to16
  1199  	BR	notfound
  1200  
  1201  index5plus:
  1202  	CMPBGT	R4, $15, index17plus
  1203  index2to16:
  1204  	CMPBGT	R7, R2, notfound
  1205  	MOVD	$1(R7), R8
  1206  	CMPBGT	R8, R2, index2to16tail
  1207  index2to16loop:
  1208  	// unrolled 2x
  1209  	VLL	R4, (R7), V1
  1210  	VLL	R4, 1(R7), V2
  1211  	VCEQGS	V0, V1, V3
  1212  	BEQ	found
  1213  	MOVD	$1(R7), R7
  1214  	VCEQGS	V0, V2, V4
  1215  	BEQ	found
  1216  	MOVD	$1(R7), R7
  1217  	CMPBLT	R7, R2, index2to16loop
  1218  	CMPBGT	R7, R2, notfound
  1219  index2to16tail:
  1220  	VLL	R4, (R7), V1
  1221  	VCEQGS	V0, V1, V2
  1222  	BEQ	found
  1223  	BR	notfound
  1224  
  1225  index17plus:
  1226  	CMPBGT	R4, $31, index33plus
  1227  	SUB	$16, R4, R0
  1228  	VLL	R0, 16(R3), V1
  1229  	VONE	V7
  1230  index17to32loop:
  1231  	VL	(R7), V2
  1232  	VLL	R0, 16(R7), V3
  1233  	VCEQG	V0, V2, V4
  1234  	VCEQG	V1, V3, V5
  1235  	VN	V4, V5, V6
  1236  	VCEQGS	V6, V7, V8
  1237  	BEQ	found
  1238  	MOVD	$1(R7), R7
  1239  	CMPBLE  R7, R2, index17to32loop
  1240  	BR	notfound
  1241  
  1242  index33plus:
  1243  	CMPBGT	R4, $47, index49plus
  1244  	SUB	$32, R4, R0
  1245  	VL	16(R3), V1
  1246  	VLL	R0, 32(R3), V2
  1247  	VONE	V11
  1248  index33to48loop:
  1249  	VL	(R7), V3
  1250  	VL	16(R7), V4
  1251  	VLL	R0, 32(R7), V5
  1252  	VCEQG	V0, V3, V6
  1253  	VCEQG	V1, V4, V7
  1254  	VCEQG	V2, V5, V8
  1255  	VN	V6, V7, V9
  1256  	VN	V8, V9, V10
  1257  	VCEQGS	V10, V11, V12
  1258  	BEQ	found
  1259  	MOVD	$1(R7), R7
  1260  	CMPBLE  R7, R2, index33to48loop
  1261  	BR	notfound
  1262  
  1263  index49plus:
  1264  	CMPBGT	R4, $63, index65plus
  1265  	SUB	$48, R4, R0
  1266  	VL	16(R3), V1
  1267  	VL	32(R3), V2
  1268  	VLL	R0, 48(R3), V3
  1269  	VONE	V15
  1270  index49to64loop:
  1271  	VL	(R7), V4
  1272  	VL	16(R7), V5
  1273  	VL	32(R7), V6
  1274  	VLL	R0, 48(R7), V7
  1275  	VCEQG	V0, V4, V8
  1276  	VCEQG	V1, V5, V9
  1277  	VCEQG	V2, V6, V10
  1278  	VCEQG	V3, V7, V11
  1279  	VN	V8, V9, V12
  1280  	VN	V10, V11, V13
  1281  	VN	V12, V13, V14
  1282  	VCEQGS	V14, V15, V16
  1283  	BEQ	found
  1284  	MOVD	$1(R7), R7
  1285  	CMPBLE  R7, R2, index49to64loop
  1286  notfound:
  1287  	MOVD	$-1, (R5)
  1288  	RET
  1289  
  1290  index65plus:
  1291  	// not implemented
  1292  	MOVD	$0, (R0)
  1293  	RET
  1294  
  1295  foundV17: // index is in doubleword V17[0]
  1296  	VLGVG	$0, V17, R8
  1297  	ADD	R8, R7
  1298  found:
  1299  	SUB	R1, R7
  1300  	MOVD	R7, (R5)
  1301  	RET
  1302  
  1303  // This is called from .init_array and follows the platform, not Go, ABI.
  1304  // We are overly conservative. We could only save the registers we use.
  1305  // However, since this function is only called once per loaded module
  1306  // performance is unimportant.
  1307  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1308  	// Save R6-R15 in the register save area of the calling function.
  1309  	// Don't bother saving F8-F15 as we aren't doing any calls.
  1310  	STMG	R6, R15, 48(R15)
  1311  
  1312  	// append the argument (passed in R2, as per the ELF ABI) to the
  1313  	// moduledata linked list.
  1314  	MOVD	runtime·lastmoduledatap(SB), R1
  1315  	MOVD	R2, moduledata_next(R1)
  1316  	MOVD	R2, runtime·lastmoduledatap(SB)
  1317  
  1318  	// Restore R6-R15.
  1319  	LMG	48(R15), R6, R15
  1320  	RET
  1321  
  1322  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1323  	MOVB	$1, ret+0(FP)
  1324  	RET