github.com/eun/go@v0.0.0-20170811110501-92cfd07a6cfd/src/runtime/asm_s390x.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    12  	// C TLS base pointer in AR0:AR1
    13  
    14  	// initialize essential registers
    15  	XOR	R0, R0
    16  
    17  	SUB	$24, R15
    18  	MOVW	R2, 8(R15) // argc
    19  	MOVD	R3, 16(R15) // argv
    20  
    21  	// create istack out of the given (operating system) stack.
    22  	// _cgo_init may update stackguard.
    23  	MOVD	$runtime·g0(SB), g
    24  	MOVD	R15, R11
    25  	SUB	$(64*1024), R11
    26  	MOVD	R11, g_stackguard0(g)
    27  	MOVD	R11, g_stackguard1(g)
    28  	MOVD	R11, (g_stack+stack_lo)(g)
    29  	MOVD	R15, (g_stack+stack_hi)(g)
    30  
    31  	// if there is a _cgo_init, call it using the gcc ABI.
    32  	MOVD	_cgo_init(SB), R11
    33  	CMPBEQ	R11, $0, nocgo
    34  	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
    35  	SLD	$32, R4, R4
    36  	MOVW	AR1, R4			// arg 2: TLS base pointer
    37  	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
    38  	MOVD	g, R2			// arg 0: G
    39  	// C functions expect 160 bytes of space on caller stack frame
    40  	// and an 8-byte aligned stack pointer
    41  	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
    42  	SUB	$160, R15		// reserve 160 bytes
    43  	MOVD    $~7, R6
    44  	AND 	R6, R15			// 8-byte align
    45  	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
    46  	MOVD	R9, R15			// restore stack
    47  	XOR	R0, R0			// zero R0
    48  
    49  nocgo:
    50  	// update stackguard after _cgo_init
    51  	MOVD	(g_stack+stack_lo)(g), R2
    52  	ADD	$const__StackGuard, R2
    53  	MOVD	R2, g_stackguard0(g)
    54  	MOVD	R2, g_stackguard1(g)
    55  
    56  	// set the per-goroutine and per-mach "registers"
    57  	MOVD	$runtime·m0(SB), R2
    58  
    59  	// save m->g0 = g0
    60  	MOVD	g, m_g0(R2)
    61  	// save m0 to g0->m
    62  	MOVD	R2, g_m(g)
    63  
    64  	BL	runtime·check(SB)
    65  
    66  	// argc/argv are already prepared on stack
    67  	BL	runtime·args(SB)
    68  	BL	runtime·osinit(SB)
    69  	BL	runtime·schedinit(SB)
    70  
    71  	// create a new goroutine to start program
    72  	MOVD	$runtime·mainPC(SB), R2		// entry
    73  	SUB     $24, R15
    74  	MOVD 	R2, 16(R15)
    75  	MOVD 	$0, 8(R15)
    76  	MOVD 	$0, 0(R15)
    77  	BL	runtime·newproc(SB)
    78  	ADD	$24, R15
    79  
    80  	// start this M
    81  	BL	runtime·mstart(SB)
    82  
    83  	MOVD	$0, 1(R0)
    84  	RET
    85  
    86  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    87  GLOBL	runtime·mainPC(SB),RODATA,$8
    88  
    89  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    90  	MOVD	$0, 2(R0)
    91  	RET
    92  
    93  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    94  	RET
    95  
    96  /*
    97   *  go-routine
    98   */
    99  
   100  // void gosave(Gobuf*)
   101  // save state in Gobuf; setjmp
   102  TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   103  	MOVD	buf+0(FP), R3
   104  	MOVD	R15, gobuf_sp(R3)
   105  	MOVD	LR, gobuf_pc(R3)
   106  	MOVD	g, gobuf_g(R3)
   107  	MOVD	$0, gobuf_lr(R3)
   108  	MOVD	$0, gobuf_ret(R3)
   109  	// Assert ctxt is zero. See func save.
   110  	MOVD	gobuf_ctxt(R3), R3
   111  	CMPBEQ	R3, $0, 2(PC)
   112  	BL	runtime·badctxt(SB)
   113  	RET
   114  
   115  // void gogo(Gobuf*)
   116  // restore state from Gobuf; longjmp
   117  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   118  	MOVD	buf+0(FP), R5
   119  
   120  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   121  	MOVD	gobuf_ctxt(R5), R1
   122  	CMPBEQ	R1, $0, nilctxt
   123  	MOVD	$gobuf_ctxt(R5), R1
   124  	MOVD	R1, 8(R15)
   125  	MOVD	R0, 16(R15)
   126  	BL	runtime·writebarrierptr_prewrite(SB)
   127  	MOVD	buf+0(FP), R5
   128  
   129  nilctxt:
   130  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   131  	BL	runtime·save_g(SB)
   132  
   133  	MOVD	0(g), R4
   134  	MOVD	gobuf_sp(R5), R15
   135  	MOVD	gobuf_lr(R5), LR
   136  	MOVD	gobuf_ret(R5), R3
   137  	MOVD	gobuf_ctxt(R5), R12
   138  	MOVD	$0, gobuf_sp(R5)
   139  	MOVD	$0, gobuf_ret(R5)
   140  	MOVD	$0, gobuf_lr(R5)
   141  	MOVD	$0, gobuf_ctxt(R5)
   142  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   143  	MOVD	gobuf_pc(R5), R6
   144  	BR	(R6)
   145  
   146  // void mcall(fn func(*g))
   147  // Switch to m->g0's stack, call fn(g).
   148  // Fn must never return.  It should gogo(&g->sched)
   149  // to keep running g.
   150  TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   151  	// Save caller state in g->sched
   152  	MOVD	R15, (g_sched+gobuf_sp)(g)
   153  	MOVD	LR, (g_sched+gobuf_pc)(g)
   154  	MOVD	$0, (g_sched+gobuf_lr)(g)
   155  	MOVD	g, (g_sched+gobuf_g)(g)
   156  
   157  	// Switch to m->g0 & its stack, call fn.
   158  	MOVD	g, R3
   159  	MOVD	g_m(g), R8
   160  	MOVD	m_g0(R8), g
   161  	BL	runtime·save_g(SB)
   162  	CMP	g, R3
   163  	BNE	2(PC)
   164  	BR	runtime·badmcall(SB)
   165  	MOVD	fn+0(FP), R12			// context
   166  	MOVD	0(R12), R4			// code pointer
   167  	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
   168  	SUB	$16, R15
   169  	MOVD	R3, 8(R15)
   170  	MOVD	$0, 0(R15)
   171  	BL	(R4)
   172  	BR	runtime·badmcall2(SB)
   173  
   174  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   175  // of the G stack.  We need to distinguish the routine that
   176  // lives at the bottom of the G stack from the one that lives
   177  // at the top of the system stack because the one at the top of
   178  // the system stack terminates the stack walk (see topofstack()).
   179  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   180  	UNDEF
   181  	BL	(LR)	// make sure this function is not leaf
   182  	RET
   183  
   184  // func systemstack(fn func())
   185  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   186  	MOVD	fn+0(FP), R3	// R3 = fn
   187  	MOVD	R3, R12		// context
   188  	MOVD	g_m(g), R4	// R4 = m
   189  
   190  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   191  	CMPBEQ	g, R5, noswitch
   192  
   193  	MOVD	m_g0(R4), R5	// R5 = g0
   194  	CMPBEQ	g, R5, noswitch
   195  
   196  	MOVD	m_curg(R4), R6
   197  	CMPBEQ	g, R6, switch
   198  
   199  	// Bad: g is not gsignal, not g0, not curg. What is it?
   200  	// Hide call from linker nosplit analysis.
   201  	MOVD	$runtime·badsystemstack(SB), R3
   202  	BL	(R3)
   203  
   204  switch:
   205  	// save our state in g->sched.  Pretend to
   206  	// be systemstack_switch if the G stack is scanned.
   207  	MOVD	$runtime·systemstack_switch(SB), R6
   208  	ADD	$16, R6	// get past prologue
   209  	MOVD	R6, (g_sched+gobuf_pc)(g)
   210  	MOVD	R15, (g_sched+gobuf_sp)(g)
   211  	MOVD	$0, (g_sched+gobuf_lr)(g)
   212  	MOVD	g, (g_sched+gobuf_g)(g)
   213  
   214  	// switch to g0
   215  	MOVD	R5, g
   216  	BL	runtime·save_g(SB)
   217  	MOVD	(g_sched+gobuf_sp)(g), R3
   218  	// make it look like mstart called systemstack on g0, to stop traceback
   219  	SUB	$8, R3
   220  	MOVD	$runtime·mstart(SB), R4
   221  	MOVD	R4, 0(R3)
   222  	MOVD	R3, R15
   223  
   224  	// call target function
   225  	MOVD	0(R12), R3	// code pointer
   226  	BL	(R3)
   227  
   228  	// switch back to g
   229  	MOVD	g_m(g), R3
   230  	MOVD	m_curg(R3), g
   231  	BL	runtime·save_g(SB)
   232  	MOVD	(g_sched+gobuf_sp)(g), R15
   233  	MOVD	$0, (g_sched+gobuf_sp)(g)
   234  	RET
   235  
   236  noswitch:
   237  	// already on m stack, just call directly
   238  	MOVD	0(R12), R3	// code pointer
   239  	BL	(R3)
   240  	RET
   241  
   242  /*
   243   * support for morestack
   244   */
   245  
   246  // Called during function prolog when more stack is needed.
   247  // Caller has already loaded:
   248  // R3: framesize, R4: argsize, R5: LR
   249  //
   250  // The traceback routines see morestack on a g0 as being
   251  // the top of a stack (for example, morestack calling newstack
   252  // calling the scheduler calling newm calling gc), so we must
   253  // record an argument size. For that purpose, it has no arguments.
   254  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   255  	// Cannot grow scheduler stack (m->g0).
   256  	MOVD	g_m(g), R7
   257  	MOVD	m_g0(R7), R8
   258  	CMPBNE	g, R8, 3(PC)
   259  	BL	runtime·badmorestackg0(SB)
   260  	BL	runtime·abort(SB)
   261  
   262  	// Cannot grow signal stack (m->gsignal).
   263  	MOVD	m_gsignal(R7), R8
   264  	CMP	g, R8
   265  	BNE	3(PC)
   266  	BL	runtime·badmorestackgsignal(SB)
   267  	BL	runtime·abort(SB)
   268  
   269  	// Called from f.
   270  	// Set g->sched to context in f.
   271  	MOVD	R15, (g_sched+gobuf_sp)(g)
   272  	MOVD	LR, R8
   273  	MOVD	R8, (g_sched+gobuf_pc)(g)
   274  	MOVD	R5, (g_sched+gobuf_lr)(g)
   275  	// newstack will fill gobuf.ctxt.
   276  
   277  	// Called from f.
   278  	// Set m->morebuf to f's caller.
   279  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   280  	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   281  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   282  
   283  	// Call newstack on m->g0's stack.
   284  	MOVD	m_g0(R7), g
   285  	BL	runtime·save_g(SB)
   286  	MOVD	(g_sched+gobuf_sp)(g), R15
   287  	// Create a stack frame on g0 to call newstack.
   288  	MOVD	$0, -16(R15)	// Zero saved LR in frame
   289  	SUB	$16, R15
   290  	MOVD	R12, 8(R15)	// ctxt argument
   291  	BL	runtime·newstack(SB)
   292  
   293  	// Not reached, but make sure the return PC from the call to newstack
   294  	// is still in this function, and not the beginning of the next.
   295  	UNDEF
   296  
   297  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   298  	MOVD	$0, R12
   299  	BR	runtime·morestack(SB)
   300  
   301  // reflectcall: call a function with the given argument list
   302  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   303  // we don't have variable-sized frames, so we use a small number
   304  // of constant-sized-frame functions to encode a few bits of size in the pc.
   305  // Caution: ugly multiline assembly macros in your future!
   306  
   307  #define DISPATCH(NAME,MAXSIZE)		\
   308  	MOVD	$MAXSIZE, R4;		\
   309  	CMP	R3, R4;		\
   310  	BGT	3(PC);			\
   311  	MOVD	$NAME(SB), R5;	\
   312  	BR	(R5)
   313  // Note: can't just "BR NAME(SB)" - bad inlining results.
   314  
   315  TEXT reflect·call(SB), NOSPLIT, $0-0
   316  	BR	·reflectcall(SB)
   317  
   318  TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   319  	MOVWZ argsize+24(FP), R3
   320  	DISPATCH(runtime·call32, 32)
   321  	DISPATCH(runtime·call64, 64)
   322  	DISPATCH(runtime·call128, 128)
   323  	DISPATCH(runtime·call256, 256)
   324  	DISPATCH(runtime·call512, 512)
   325  	DISPATCH(runtime·call1024, 1024)
   326  	DISPATCH(runtime·call2048, 2048)
   327  	DISPATCH(runtime·call4096, 4096)
   328  	DISPATCH(runtime·call8192, 8192)
   329  	DISPATCH(runtime·call16384, 16384)
   330  	DISPATCH(runtime·call32768, 32768)
   331  	DISPATCH(runtime·call65536, 65536)
   332  	DISPATCH(runtime·call131072, 131072)
   333  	DISPATCH(runtime·call262144, 262144)
   334  	DISPATCH(runtime·call524288, 524288)
   335  	DISPATCH(runtime·call1048576, 1048576)
   336  	DISPATCH(runtime·call2097152, 2097152)
   337  	DISPATCH(runtime·call4194304, 4194304)
   338  	DISPATCH(runtime·call8388608, 8388608)
   339  	DISPATCH(runtime·call16777216, 16777216)
   340  	DISPATCH(runtime·call33554432, 33554432)
   341  	DISPATCH(runtime·call67108864, 67108864)
   342  	DISPATCH(runtime·call134217728, 134217728)
   343  	DISPATCH(runtime·call268435456, 268435456)
   344  	DISPATCH(runtime·call536870912, 536870912)
   345  	DISPATCH(runtime·call1073741824, 1073741824)
   346  	MOVD	$runtime·badreflectcall(SB), R5
   347  	BR	(R5)
   348  
   349  #define CALLFN(NAME,MAXSIZE)			\
   350  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   351  	NO_LOCAL_POINTERS;			\
   352  	/* copy arguments to stack */		\
   353  	MOVD	arg+16(FP), R4;			\
   354  	MOVWZ	argsize+24(FP), R5;		\
   355  	MOVD	$stack-MAXSIZE(SP), R6;		\
   356  loopArgs: /* copy 256 bytes at a time */	\
   357  	CMP	R5, $256;			\
   358  	BLT	tailArgs;			\
   359  	SUB	$256, R5;			\
   360  	MVC	$256, 0(R4), 0(R6);		\
   361  	MOVD	$256(R4), R4;			\
   362  	MOVD	$256(R6), R6;			\
   363  	BR	loopArgs;			\
   364  tailArgs: /* copy remaining bytes */		\
   365  	CMP	R5, $0;				\
   366  	BEQ	callFunction;			\
   367  	SUB	$1, R5;				\
   368  	EXRL	$callfnMVC<>(SB), R5;		\
   369  callFunction:					\
   370  	MOVD	f+8(FP), R12;			\
   371  	MOVD	(R12), R8;			\
   372  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   373  	BL	(R8);				\
   374  	/* copy return values back */		\
   375  	MOVD	argtype+0(FP), R7;		\
   376  	MOVD	arg+16(FP), R6;			\
   377  	MOVWZ	n+24(FP), R5;			\
   378  	MOVD	$stack-MAXSIZE(SP), R4;		\
   379  	MOVWZ	retoffset+28(FP), R1;		\
   380  	ADD	R1, R4;				\
   381  	ADD	R1, R6;				\
   382  	SUB	R1, R5;				\
   383  	BL	callRet<>(SB);			\
   384  	RET
   385  
   386  // callRet copies return values back at the end of call*. This is a
   387  // separate function so it can allocate stack space for the arguments
   388  // to reflectcallmove. It does not follow the Go ABI; it expects its
   389  // arguments in registers.
   390  TEXT callRet<>(SB), NOSPLIT, $32-0
   391  	MOVD	R7, 8(R15)
   392  	MOVD	R6, 16(R15)
   393  	MOVD	R4, 24(R15)
   394  	MOVD	R5, 32(R15)
   395  	BL	runtime·reflectcallmove(SB)
   396  	RET
   397  
   398  CALLFN(·call32, 32)
   399  CALLFN(·call64, 64)
   400  CALLFN(·call128, 128)
   401  CALLFN(·call256, 256)
   402  CALLFN(·call512, 512)
   403  CALLFN(·call1024, 1024)
   404  CALLFN(·call2048, 2048)
   405  CALLFN(·call4096, 4096)
   406  CALLFN(·call8192, 8192)
   407  CALLFN(·call16384, 16384)
   408  CALLFN(·call32768, 32768)
   409  CALLFN(·call65536, 65536)
   410  CALLFN(·call131072, 131072)
   411  CALLFN(·call262144, 262144)
   412  CALLFN(·call524288, 524288)
   413  CALLFN(·call1048576, 1048576)
   414  CALLFN(·call2097152, 2097152)
   415  CALLFN(·call4194304, 4194304)
   416  CALLFN(·call8388608, 8388608)
   417  CALLFN(·call16777216, 16777216)
   418  CALLFN(·call33554432, 33554432)
   419  CALLFN(·call67108864, 67108864)
   420  CALLFN(·call134217728, 134217728)
   421  CALLFN(·call268435456, 268435456)
   422  CALLFN(·call536870912, 536870912)
   423  CALLFN(·call1073741824, 1073741824)
   424  
   425  // Not a function: target for EXRL (execute relative long) instruction.
   426  TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
   427  	MVC	$1, 0(R4), 0(R6)
   428  
   429  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   430  	RET
   431  
   432  // void jmpdefer(fv, sp);
   433  // called from deferreturn.
   434  // 1. grab stored LR for caller
   435  // 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
   436  // 3. BR to fn
   437  TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
   438  	MOVD	0(R15), R1
   439  	SUB	$6, R1, LR
   440  
   441  	MOVD	fv+0(FP), R12
   442  	MOVD	argp+8(FP), R15
   443  	SUB	$8, R15
   444  	MOVD	0(R12), R3
   445  	BR	(R3)
   446  
   447  // Save state of caller into g->sched. Smashes R1.
   448  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   449  	MOVD	LR, (g_sched+gobuf_pc)(g)
   450  	MOVD	R15, (g_sched+gobuf_sp)(g)
   451  	MOVD	$0, (g_sched+gobuf_lr)(g)
   452  	MOVD	$0, (g_sched+gobuf_ret)(g)
   453  	// Assert ctxt is zero. See func save.
   454  	MOVD	(g_sched+gobuf_ctxt)(g), R1
   455  	CMPBEQ	R1, $0, 2(PC)
   456  	BL	runtime·badctxt(SB)
   457  	RET
   458  
   459  // func asmcgocall(fn, arg unsafe.Pointer) int32
   460  // Call fn(arg) on the scheduler stack,
   461  // aligned appropriately for the gcc ABI.
   462  // See cgocall.go for more details.
   463  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   464  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
   465  	// C TLS base pointer in AR0:AR1
   466  	MOVD	fn+0(FP), R3
   467  	MOVD	arg+8(FP), R4
   468  
   469  	MOVD	R15, R2		// save original stack pointer
   470  	MOVD	g, R5
   471  
   472  	// Figure out if we need to switch to m->g0 stack.
   473  	// We get called to create new OS threads too, and those
   474  	// come in on the m->g0 stack already.
   475  	MOVD	g_m(g), R6
   476  	MOVD	m_g0(R6), R6
   477  	CMPBEQ	R6, g, g0
   478  	BL	gosave<>(SB)
   479  	MOVD	R6, g
   480  	BL	runtime·save_g(SB)
   481  	MOVD	(g_sched+gobuf_sp)(g), R15
   482  
   483  	// Now on a scheduling stack (a pthread-created stack).
   484  g0:
   485  	// Save room for two of our pointers, plus 160 bytes of callee
   486  	// save area that lives on the caller stack.
   487  	SUB	$176, R15
   488  	MOVD	$~7, R6
   489  	AND	R6, R15                 // 8-byte alignment for gcc ABI
   490  	MOVD	R5, 168(R15)             // save old g on stack
   491  	MOVD	(g_stack+stack_hi)(R5), R5
   492  	SUB	R2, R5
   493  	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   494  	MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
   495  	MOVD	R4, R2                  // arg in R2
   496  	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
   497  
   498  	XOR	R0, R0                  // set R0 back to 0.
   499  	// Restore g, stack pointer.
   500  	MOVD	168(R15), g
   501  	BL	runtime·save_g(SB)
   502  	MOVD	(g_stack+stack_hi)(g), R5
   503  	MOVD	160(R15), R6
   504  	SUB	R6, R5
   505  	MOVD	R5, R15
   506  
   507  	MOVW	R2, ret+16(FP)
   508  	RET
   509  
   510  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   511  // Turn the fn into a Go func (by taking its address) and call
   512  // cgocallback_gofunc.
   513  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   514  	MOVD	$fn+0(FP), R3
   515  	MOVD	R3, 8(R15)
   516  	MOVD	frame+8(FP), R3
   517  	MOVD	R3, 16(R15)
   518  	MOVD	framesize+16(FP), R3
   519  	MOVD	R3, 24(R15)
   520  	MOVD	ctxt+24(FP), R3
   521  	MOVD	R3, 32(R15)
   522  	MOVD	$runtime·cgocallback_gofunc(SB), R3
   523  	BL	(R3)
   524  	RET
   525  
   526  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   527  // See cgocall.go for more details.
   528  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   529  	NO_LOCAL_POINTERS
   530  
   531  	// Load m and g from thread-local storage.
   532  	MOVB	runtime·iscgo(SB), R3
   533  	CMPBEQ	R3, $0, nocgo
   534  	BL	runtime·load_g(SB)
   535  
   536  nocgo:
   537  	// If g is nil, Go did not create the current thread.
   538  	// Call needm to obtain one for temporary use.
   539  	// In this case, we're running on the thread stack, so there's
   540  	// lots of space, but the linker doesn't know. Hide the call from
   541  	// the linker analysis by using an indirect call.
   542  	CMPBEQ	g, $0, needm
   543  
   544  	MOVD	g_m(g), R8
   545  	MOVD	R8, savedm-8(SP)
   546  	BR	havem
   547  
   548  needm:
   549  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   550  	MOVD	$runtime·needm(SB), R3
   551  	BL	(R3)
   552  
   553  	// Set m->sched.sp = SP, so that if a panic happens
   554  	// during the function we are about to execute, it will
   555  	// have a valid SP to run on the g0 stack.
   556  	// The next few lines (after the havem label)
   557  	// will save this SP onto the stack and then write
   558  	// the same SP back to m->sched.sp. That seems redundant,
   559  	// but if an unrecovered panic happens, unwindm will
   560  	// restore the g->sched.sp from the stack location
   561  	// and then systemstack will try to use it. If we don't set it here,
   562  	// that restored SP will be uninitialized (typically 0) and
   563  	// will not be usable.
   564  	MOVD	g_m(g), R8
   565  	MOVD	m_g0(R8), R3
   566  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   567  
   568  havem:
   569  	// Now there's a valid m, and we're running on its m->g0.
   570  	// Save current m->g0->sched.sp on stack and then set it to SP.
   571  	// Save current sp in m->g0->sched.sp in preparation for
   572  	// switch back to m->curg stack.
   573  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   574  	MOVD	m_g0(R8), R3
   575  	MOVD	(g_sched+gobuf_sp)(R3), R4
   576  	MOVD	R4, savedsp-16(SP)
   577  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   578  
   579  	// Switch to m->curg stack and call runtime.cgocallbackg.
   580  	// Because we are taking over the execution of m->curg
   581  	// but *not* resuming what had been running, we need to
   582  	// save that information (m->curg->sched) so we can restore it.
   583  	// We can restore m->curg->sched.sp easily, because calling
   584  	// runtime.cgocallbackg leaves SP unchanged upon return.
   585  	// To save m->curg->sched.pc, we push it onto the stack.
   586  	// This has the added benefit that it looks to the traceback
   587  	// routine like cgocallbackg is going to return to that
   588  	// PC (because the frame we allocate below has the same
   589  	// size as cgocallback_gofunc's frame declared above)
   590  	// so that the traceback will seamlessly trace back into
   591  	// the earlier calls.
   592  	//
   593  	// In the new goroutine, -8(SP) is unused (where SP refers to
   594  	// m->curg's SP while we're setting it up, before we've adjusted it).
   595  	MOVD	m_curg(R8), g
   596  	BL	runtime·save_g(SB)
   597  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   598  	MOVD	(g_sched+gobuf_pc)(g), R5
   599  	MOVD	R5, -24(R4)
   600  	MOVD	ctxt+24(FP), R5
   601  	MOVD	R5, -16(R4)
   602  	MOVD	$-24(R4), R15
   603  	BL	runtime·cgocallbackg(SB)
   604  
   605  	// Restore g->sched (== m->curg->sched) from saved values.
   606  	MOVD	0(R15), R5
   607  	MOVD	R5, (g_sched+gobuf_pc)(g)
   608  	MOVD	$24(R15), R4
   609  	MOVD	R4, (g_sched+gobuf_sp)(g)
   610  
   611  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   612  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   613  	// so we do not have to restore it.)
   614  	MOVD	g_m(g), R8
   615  	MOVD	m_g0(R8), g
   616  	BL	runtime·save_g(SB)
   617  	MOVD	(g_sched+gobuf_sp)(g), R15
   618  	MOVD	savedsp-16(SP), R4
   619  	MOVD	R4, (g_sched+gobuf_sp)(g)
   620  
   621  	// If the m on entry was nil, we called needm above to borrow an m
   622  	// for the duration of the call. Since the call is over, return it with dropm.
   623  	MOVD	savedm-8(SP), R6
   624  	CMPBNE	R6, $0, droppedm
   625  	MOVD	$runtime·dropm(SB), R3
   626  	BL	(R3)
   627  droppedm:
   628  
   629  	// Done!
   630  	RET
   631  
   632  // void setg(G*); set g. for use by needm.
   633  TEXT runtime·setg(SB), NOSPLIT, $0-8
   634  	MOVD	gg+0(FP), g
   635  	// This only happens if iscgo, so jump straight to save_g
   636  	BL	runtime·save_g(SB)
   637  	RET
   638  
   639  // void setg_gcc(G*); set g in C TLS.
   640  // Must obey the gcc calling convention.
   641  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   642  	// The standard prologue clobbers LR (R14), which is callee-save in
   643  	// the C ABI, so we have to use NOFRAME and save LR ourselves.
   644  	MOVD	LR, R1
   645  	// Also save g, R10, and R11 since they're callee-save in C ABI
   646  	MOVD	R10, R3
   647  	MOVD	g, R4
   648  	MOVD	R11, R5
   649  
   650  	MOVD	R2, g
   651  	BL	runtime·save_g(SB)
   652  
   653  	MOVD	R5, R11
   654  	MOVD	R4, g
   655  	MOVD	R3, R10
   656  	MOVD	R1, LR
   657  	RET
   658  
   659  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
   660  	MOVD	16(R15), R3		// LR saved by caller
   661  	MOVD	R3, ret+8(FP)
   662  	RET
   663  
   664  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   665  	MOVW	(R0), R0
   666  	UNDEF
   667  
   668  // int64 runtime·cputicks(void)
   669  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   670  	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
   671  	// This means that since about 1972 the msb has been set, making the
   672  	// result of a call to STORE CLOCK (stck) a negative number.
   673  	// We clear the msb to make it positive.
   674  	STCK	ret+0(FP)      // serialises before and after call
   675  	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
   676  	SLD	$1, R3
   677  	SRD	$1, R3
   678  	MOVD	R3, ret+0(FP)
   679  	RET
   680  
   681  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   682  // redirects to memhash(p, h, size) using the size
   683  // stored in the closure.
   684  TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
   685  	GO_ARGS
   686  	NO_LOCAL_POINTERS
   687  	MOVD	p+0(FP), R3
   688  	MOVD	h+8(FP), R4
   689  	MOVD	8(R12), R5
   690  	MOVD	R3, 8(R15)
   691  	MOVD	R4, 16(R15)
   692  	MOVD	R5, 24(R15)
   693  	BL	runtime·memhash(SB)
   694  	MOVD	32(R15), R3
   695  	MOVD	R3, ret+16(FP)
   696  	RET
   697  
   698  // AES hashing not implemented for s390x
   699  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   700  	MOVW	(R0), R15
   701  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   702  	MOVW	(R0), R15
   703  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   704  	MOVW	(R0), R15
   705  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   706  	MOVW	(R0), R15
   707  
   708  // memequal(a, b unsafe.Pointer, size uintptr) bool
   709  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
   710  	MOVD	a+0(FP), R3
   711  	MOVD	b+8(FP), R5
   712  	MOVD	size+16(FP), R6
   713  	LA	ret+24(FP), R7
   714  	BR	runtime·memeqbody(SB)
   715  
   716  // memequal_varlen(a, b unsafe.Pointer) bool
   717  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
   718  	MOVD	a+0(FP), R3
   719  	MOVD	b+8(FP), R5
   720  	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
   721  	LA	ret+16(FP), R7
   722  	BR	runtime·memeqbody(SB)
   723  
   724  // eqstring tests whether two strings are equal.
   725  // The compiler guarantees that strings passed
   726  // to eqstring have equal length.
   727  // See runtime_test.go:eqstring_generic for
   728  // equivalent Go code.
   729  TEXT runtime·eqstring(SB),NOSPLIT|NOFRAME,$0-33
   730  	MOVD	s1_base+0(FP), R3
   731  	MOVD	s1_len+8(FP), R6
   732  	MOVD	s2_base+16(FP), R5
   733  	LA	ret+32(FP), R7
   734  	BR	runtime·memeqbody(SB)
   735  
   736  TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
   737  	MOVD	a_len+8(FP), R2
   738  	MOVD	b_len+32(FP), R6
   739  	MOVD	a+0(FP), R3
   740  	MOVD	b+24(FP), R5
   741  	LA	ret+48(FP), R7
   742  	CMPBNE	R2, R6, notequal
   743  	BR	runtime·memeqbody(SB)
   744  notequal:
   745  	MOVB	$0, ret+48(FP)
   746  	RET
   747  
   748  // input:
   749  //   R3 = a
   750  //   R5 = b
   751  //   R6 = len
   752  //   R7 = address of output byte (stores 0 or 1 here)
   753  //   a and b have the same length
   754  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   755  	CMPBEQ	R3, R5, equal
   756  loop:
   757  	CMPBEQ	R6, $0, equal
   758  	CMPBLT	R6, $32, tiny
   759  	CMP	R6, $256
   760  	BLT	tail
   761  	CLC	$256, 0(R3), 0(R5)
   762  	BNE	notequal
   763  	SUB	$256, R6
   764  	LA	256(R3), R3
   765  	LA	256(R5), R5
   766  	BR	loop
   767  tail:
   768  	SUB	$1, R6, R8
   769  	EXRL	$runtime·memeqbodyclc(SB), R8
   770  	BEQ	equal
   771  notequal:
   772  	MOVB	$0, 0(R7)
   773  	RET
   774  equal:
   775  	MOVB	$1, 0(R7)
   776  	RET
   777  tiny:
   778  	MOVD	$0, R2
   779  	CMPBLT	R6, $16, lt16
   780  	MOVD	0(R3), R8
   781  	MOVD	0(R5), R9
   782  	CMPBNE	R8, R9, notequal
   783  	MOVD	8(R3), R8
   784  	MOVD	8(R5), R9
   785  	CMPBNE	R8, R9, notequal
   786  	LA	16(R2), R2
   787  	SUB	$16, R6
   788  lt16:
   789  	CMPBLT	R6, $8, lt8
   790  	MOVD	0(R3)(R2*1), R8
   791  	MOVD	0(R5)(R2*1), R9
   792  	CMPBNE	R8, R9, notequal
   793  	LA	8(R2), R2
   794  	SUB	$8, R6
   795  lt8:
   796  	CMPBLT	R6, $4, lt4
   797  	MOVWZ	0(R3)(R2*1), R8
   798  	MOVWZ	0(R5)(R2*1), R9
   799  	CMPBNE	R8, R9, notequal
   800  	LA	4(R2), R2
   801  	SUB	$4, R6
   802  lt4:
   803  #define CHECK(n) \
   804  	CMPBEQ	R6, $n, equal \
   805  	MOVB	n(R3)(R2*1), R8 \
   806  	MOVB	n(R5)(R2*1), R9 \
   807  	CMPBNE	R8, R9, notequal
   808  	CHECK(0)
   809  	CHECK(1)
   810  	CHECK(2)
   811  	CHECK(3)
   812  	BR	equal
   813  
   814  TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   815  	CLC	$1, 0(R3), 0(R5)
   816  	RET
   817  
   818  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
   819  	MOVD	s+0(FP), R3     // s => R3
   820  	MOVD	s_len+8(FP), R4 // s_len => R4
   821  	MOVBZ	c+24(FP), R5    // c => R5
   822  	MOVD	$ret+32(FP), R2 // &ret => R9
   823  	BR	runtime·indexbytebody(SB)
   824  
   825  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
   826  	MOVD	s+0(FP), R3     // s => R3
   827  	MOVD	s_len+8(FP), R4 // s_len => R4
   828  	MOVBZ	c+16(FP), R5    // c => R5
   829  	MOVD	$ret+24(FP), R2 // &ret => R9
   830  	BR	runtime·indexbytebody(SB)
   831  
   832  // input:
   833  // R3: s
   834  // R4: s_len
   835  // R5: c -- byte sought
   836  // R2: &ret -- address to put index into
   837  TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0
   838  	CMPBEQ	R4, $0, notfound
   839  	MOVD	R3, R6          // store base for later
   840  	ADD	R3, R4, R8      // the address after the end of the string
   841  	//if the length is small, use loop; otherwise, use vector or srst search
   842  	CMPBGE	R4, $16, large
   843  
   844  residual:
   845  	CMPBEQ	R3, R8, notfound
   846  	MOVBZ	0(R3), R7
   847  	LA	1(R3), R3
   848  	CMPBNE	R7, R5, residual
   849  
   850  found:
   851  	SUB	R6, R3
   852  	SUB	$1, R3
   853  	MOVD	R3, 0(R2)
   854  	RET
   855  
   856  notfound:
   857  	MOVD	$-1, 0(R2)
   858  	RET
   859  
   860  large:
   861  	MOVBZ	·cpu+facilities_hasVX(SB), R1
   862  	CMPBNE	R1, $0, vectorimpl
   863  
   864  srstimpl:                       // no vector facility
   865  	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
   866  srstloop:
   867  	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
   868  	BVS	srstloop        // interrupted - continue
   869  	BGT	notfoundr0
   870  foundr0:
   871  	XOR	R0, R0          // reset R0
   872  	SUB	R6, R8          // remove base
   873  	MOVD	R8, 0(R2)
   874  	RET
   875  notfoundr0:
   876  	XOR	R0, R0          // reset R0
   877  	MOVD	$-1, 0(R2)
   878  	RET
   879  
   880  vectorimpl:
   881  	//if the address is not 16byte aligned, use loop for the header
   882  	MOVD	R3, R8
   883  	AND	$15, R8
   884  	CMPBGT	R8, $0, notaligned
   885  
   886  aligned:
   887  	ADD	R6, R4, R8
   888  	MOVD	R8, R7
   889  	AND	$-16, R7
   890  	// replicate c across V17
   891  	VLVGB	$0, R5, V19
   892  	VREPB	$0, V19, V17
   893  
   894  vectorloop:
   895  	CMPBGE	R3, R7, residual
   896  	VL	0(R3), V16    // load string to be searched into V16
   897  	ADD	$16, R3
   898  	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   899  	BVS	vectorloop
   900  
   901  	// when vector search found c in the string
   902  	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   903  	SUB	$16, R3
   904  	SUB	R6, R3
   905  	ADD	R3, R7
   906  	MOVD	R7, 0(R2)
   907  	RET
   908  
   909  notaligned:
   910  	MOVD	R3, R8
   911  	AND	$-16, R8
   912  	ADD     $16, R8
   913  notalignedloop:
   914  	CMPBEQ	R3, R8, aligned
   915  	MOVBZ	0(R3), R7
   916  	LA	1(R3), R3
   917  	CMPBNE	R7, R5, notalignedloop
   918  	BR	found
   919  
   920  TEXT runtime·return0(SB), NOSPLIT, $0
   921  	MOVW	$0, R3
   922  	RET
   923  
   924  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   925  // Must obey the gcc calling convention.
   926  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
   927  	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
   928  	MOVD	g, R1
   929  	MOVD	R10, R3
   930  	MOVD	LR, R4
   931  	MOVD	R11, R5
   932  
   933  	BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
   934  	MOVD	g_m(g), R2
   935  	MOVD	m_curg(R2), R2
   936  	MOVD	(g_stack+stack_hi)(R2), R2
   937  
   938  	MOVD	R1, g
   939  	MOVD	R3, R10
   940  	MOVD	R4, LR
   941  	MOVD	R5, R11
   942  	RET
   943  
   944  // The top-most function running on a goroutine
   945  // returns to goexit+PCQuantum.
   946  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
   947  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   948  	BL	runtime·goexit1(SB)	// does not return
   949  	// traceback from goexit1 must hit code range of goexit
   950  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   951  
   952  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
   953  	RET
   954  
   955  TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
   956  	SYNC
   957  	RET
   958  
   959  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
   960  	MOVD	s1_base+0(FP), R3
   961  	MOVD	s1_len+8(FP), R4
   962  	MOVD	s2_base+16(FP), R5
   963  	MOVD	s2_len+24(FP), R6
   964  	LA	ret+32(FP), R7
   965  	BR	runtime·cmpbody(SB)
   966  
   967  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
   968  	MOVD	s1+0(FP), R3
   969  	MOVD	s1+8(FP), R4
   970  	MOVD	s2+24(FP), R5
   971  	MOVD	s2+32(FP), R6
   972  	LA	res+48(FP), R7
   973  	BR	runtime·cmpbody(SB)
   974  
   975  // input:
   976  //   R3 = a
   977  //   R4 = alen
   978  //   R5 = b
   979  //   R6 = blen
   980  //   R7 = address of output word (stores -1/0/1 here)
   981  TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
   982  	CMPBEQ	R3, R5, cmplengths
   983  	MOVD	R4, R8
   984  	CMPBLE	R4, R6, amin
   985  	MOVD	R6, R8
   986  amin:
   987  	CMPBEQ	R8, $0, cmplengths
   988  	CMP	R8, $256
   989  	BLE	tail
   990  loop:
   991  	CLC	$256, 0(R3), 0(R5)
   992  	BGT	gt
   993  	BLT	lt
   994  	SUB	$256, R8
   995  	CMP	R8, $256
   996  	BGT	loop
   997  tail:
   998  	SUB	$1, R8
   999  	EXRL	$runtime·cmpbodyclc(SB), R8
  1000  	BGT	gt
  1001  	BLT	lt
  1002  cmplengths:
  1003  	CMP	R4, R6
  1004  	BEQ	eq
  1005  	BLT	lt
  1006  gt:
  1007  	MOVD	$1, 0(R7)
  1008  	RET
  1009  lt:
  1010  	MOVD	$-1, 0(R7)
  1011  	RET
  1012  eq:
  1013  	MOVD	$0, 0(R7)
  1014  	RET
  1015  
  1016  TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
  1017  	CLC	$1, 0(R3), 0(R5)
  1018  	RET
  1019  
  1020  // func supportsVX() bool
  1021  TEXT strings·supportsVX(SB),NOSPLIT,$0-1
  1022  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1023  	MOVB	R0, ret+0(FP)
  1024  	RET
  1025  
  1026  // func supportsVX() bool
  1027  TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
  1028  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1029  	MOVB	R0, ret+0(FP)
  1030  	RET
  1031  
  1032  // func indexShortStr(s, sep string) int
  1033  // Caller must confirm availability of vx facility before calling.
  1034  TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
  1035  	LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
  1036  	LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1037  	MOVD	$ret+32(FP), R5
  1038  	BR	runtime·indexShortStr(SB)
  1039  
  1040  // func indexShortStr(s, sep []byte) int
  1041  // Caller must confirm availability of vx facility before calling.
  1042  TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
  1043  	LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
  1044  	LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1045  	MOVD	$ret+48(FP), R5
  1046  	BR	runtime·indexShortStr(SB)
  1047  
  1048  // s: string we are searching
  1049  // sep: string to search for
  1050  // R1=&s[0], R2=len(s)
  1051  // R3=&sep[0], R4=len(sep)
  1052  // R5=&ret (int)
  1053  // Caller must confirm availability of vx facility before calling.
  1054  TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
  1055  	CMPBGT	R4, R2, notfound
  1056  	ADD	R1, R2
  1057  	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
  1058  	CMPBEQ	R4, $0, notfound
  1059  	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
  1060  	VLL	R4, (R3), V0 // contains first 16 bytes of sep
  1061  	MOVD	R1, R7
  1062  index2plus:
  1063  	CMPBNE	R4, $1, index3plus
  1064  	MOVD	$15(R7), R9
  1065  	CMPBGE	R9, R2, index2to16
  1066  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1067  	VONE	V16
  1068  	VREPH	$0, V0, V1
  1069  	CMPBGE	R9, R2, index2to16
  1070  index2loop:
  1071  	VL	0(R7), V2          // 16 bytes, even indices
  1072  	VL	1(R7), V4          // 16 bytes, odd indices
  1073  	VCEQH	V1, V2, V5         // compare even indices
  1074  	VCEQH	V1, V4, V6         // compare odd indices
  1075  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1076  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1077  	BLT	foundV17
  1078  	MOVD	$16(R7), R7        // R7+=16
  1079  	ADD	$15, R7, R9
  1080  	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
  1081  	CMPBLE	R7, R2, index2to16
  1082  	BR	notfound
  1083  
  1084  index3plus:
  1085  	CMPBNE	R4, $2, index4plus
  1086  	ADD	$15, R7, R9
  1087  	CMPBGE	R9, R2, index2to16
  1088  	MOVD	$1, R0
  1089  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1090  	VONE	V16
  1091  	VREPH	$0, V0, V1
  1092  	VREPB	$2, V0, V8
  1093  index3loop:
  1094  	VL	(R7), V2           // load 16-bytes into V2
  1095  	VLL	R0, 16(R7), V3     // load 2-bytes into V3
  1096  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1097  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
  1098  	VCEQH	V1, V2, V5         // compare 2-byte even indices
  1099  	VCEQH	V1, V4, V6         // compare 2-byte odd indices
  1100  	VCEQB	V8, V9, V10        // compare last bytes
  1101  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1102  	VN	V7, V10, V7        // AND indices with last byte
  1103  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1104  	BLT	foundV17
  1105  	MOVD	$16(R7), R7        // R7+=16
  1106  	ADD	$15, R7, R9
  1107  	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
  1108  	CMPBLE	R7, R2, index2to16
  1109  	BR	notfound
  1110  
  1111  index4plus:
  1112  	CMPBNE	R4, $3, index5plus
  1113  	ADD	$15, R7, R9
  1114  	CMPBGE	R9, R2, index2to16
  1115  	MOVD	$2, R0
  1116  	VGBM	$0x8888, V29       // 0xff000000ff000000...
  1117  	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
  1118  	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
  1119  	VONE	V16
  1120  	VREPF	$0, V0, V1
  1121  index4loop:
  1122  	VL	(R7), V2           // load 16-bytes into V2
  1123  	VLL	R0, 16(R7), V3     // load 3-bytes into V3
  1124  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1125  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
  1126  	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
  1127  	VCEQF	V1, V2, V5         // compare index 0, 4, ...
  1128  	VCEQF	V1, V4, V6         // compare index 1, 5, ...
  1129  	VCEQF	V1, V9, V11        // compare index 2, 6, ...
  1130  	VCEQF	V1, V10, V12       // compare index 3, 7, ...
  1131  	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
  1132  	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
  1133  	VSEL	V13, V14, V31, V7  // final merge
  1134  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1135  	BLT	foundV17
  1136  	MOVD	$16(R7), R7        // R7+=16
  1137  	ADD	$15, R7, R9
  1138  	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
  1139  	CMPBLE	R7, R2, index2to16
  1140  	BR	notfound
  1141  
  1142  index5plus:
  1143  	CMPBGT	R4, $15, index17plus
  1144  index2to16:
  1145  	CMPBGT	R7, R2, notfound
  1146  	MOVD	$1(R7), R8
  1147  	CMPBGT	R8, R2, index2to16tail
  1148  index2to16loop:
  1149  	// unrolled 2x
  1150  	VLL	R4, (R7), V1
  1151  	VLL	R4, 1(R7), V2
  1152  	VCEQGS	V0, V1, V3
  1153  	BEQ	found
  1154  	MOVD	$1(R7), R7
  1155  	VCEQGS	V0, V2, V4
  1156  	BEQ	found
  1157  	MOVD	$1(R7), R7
  1158  	CMPBLT	R7, R2, index2to16loop
  1159  	CMPBGT	R7, R2, notfound
  1160  index2to16tail:
  1161  	VLL	R4, (R7), V1
  1162  	VCEQGS	V0, V1, V2
  1163  	BEQ	found
  1164  	BR	notfound
  1165  
  1166  index17plus:
  1167  	CMPBGT	R4, $31, index33plus
  1168  	SUB	$16, R4, R0
  1169  	VLL	R0, 16(R3), V1
  1170  	VONE	V7
  1171  index17to32loop:
  1172  	VL	(R7), V2
  1173  	VLL	R0, 16(R7), V3
  1174  	VCEQG	V0, V2, V4
  1175  	VCEQG	V1, V3, V5
  1176  	VN	V4, V5, V6
  1177  	VCEQGS	V6, V7, V8
  1178  	BEQ	found
  1179  	MOVD	$1(R7), R7
  1180  	CMPBLE  R7, R2, index17to32loop
  1181  	BR	notfound
  1182  
  1183  index33plus:
  1184  	CMPBGT	R4, $47, index49plus
  1185  	SUB	$32, R4, R0
  1186  	VL	16(R3), V1
  1187  	VLL	R0, 32(R3), V2
  1188  	VONE	V11
  1189  index33to48loop:
  1190  	VL	(R7), V3
  1191  	VL	16(R7), V4
  1192  	VLL	R0, 32(R7), V5
  1193  	VCEQG	V0, V3, V6
  1194  	VCEQG	V1, V4, V7
  1195  	VCEQG	V2, V5, V8
  1196  	VN	V6, V7, V9
  1197  	VN	V8, V9, V10
  1198  	VCEQGS	V10, V11, V12
  1199  	BEQ	found
  1200  	MOVD	$1(R7), R7
  1201  	CMPBLE  R7, R2, index33to48loop
  1202  	BR	notfound
  1203  
  1204  index49plus:
  1205  	CMPBGT	R4, $63, index65plus
  1206  	SUB	$48, R4, R0
  1207  	VL	16(R3), V1
  1208  	VL	32(R3), V2
  1209  	VLL	R0, 48(R3), V3
  1210  	VONE	V15
  1211  index49to64loop:
  1212  	VL	(R7), V4
  1213  	VL	16(R7), V5
  1214  	VL	32(R7), V6
  1215  	VLL	R0, 48(R7), V7
  1216  	VCEQG	V0, V4, V8
  1217  	VCEQG	V1, V5, V9
  1218  	VCEQG	V2, V6, V10
  1219  	VCEQG	V3, V7, V11
  1220  	VN	V8, V9, V12
  1221  	VN	V10, V11, V13
  1222  	VN	V12, V13, V14
  1223  	VCEQGS	V14, V15, V16
  1224  	BEQ	found
  1225  	MOVD	$1(R7), R7
  1226  	CMPBLE  R7, R2, index49to64loop
  1227  notfound:
  1228  	MOVD	$-1, (R5)
  1229  	RET
  1230  
  1231  index65plus:
  1232  	// not implemented
  1233  	MOVD	$0, (R0)
  1234  	RET
  1235  
  1236  foundV17: // index is in doubleword V17[0]
  1237  	VLGVG	$0, V17, R8
  1238  	ADD	R8, R7
  1239  found:
  1240  	SUB	R1, R7
  1241  	MOVD	R7, (R5)
  1242  	RET
  1243  
  1244  // This is called from .init_array and follows the platform, not Go, ABI.
  1245  // We are overly conservative. We could only save the registers we use.
  1246  // However, since this function is only called once per loaded module
  1247  // performance is unimportant.
  1248  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1249  	// Save R6-R15 in the register save area of the calling function.
  1250  	// Don't bother saving F8-F15 as we aren't doing any calls.
  1251  	STMG	R6, R15, 48(R15)
  1252  
  1253  	// append the argument (passed in R2, as per the ELF ABI) to the
  1254  	// moduledata linked list.
  1255  	MOVD	runtime·lastmoduledatap(SB), R1
  1256  	MOVD	R2, moduledata_next(R1)
  1257  	MOVD	R2, runtime·lastmoduledatap(SB)
  1258  
  1259  	// Restore R6-R15.
  1260  	LMG	48(R15), R6, R15
  1261  	RET
  1262  
  1263  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1264  	MOVB	$1, ret+0(FP)
  1265  	RET