github.com/karrick/go@v0.0.0-20170817181416-d5b0ec858b37/src/runtime/asm_s390x.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    12  	// C TLS base pointer in AR0:AR1
    13  
    14  	// initialize essential registers
    15  	XOR	R0, R0
    16  
    17  	SUB	$24, R15
    18  	MOVW	R2, 8(R15) // argc
    19  	MOVD	R3, 16(R15) // argv
    20  
    21  	// create istack out of the given (operating system) stack.
    22  	// _cgo_init may update stackguard.
    23  	MOVD	$runtime·g0(SB), g
    24  	MOVD	R15, R11
    25  	SUB	$(64*1024), R11
    26  	MOVD	R11, g_stackguard0(g)
    27  	MOVD	R11, g_stackguard1(g)
    28  	MOVD	R11, (g_stack+stack_lo)(g)
    29  	MOVD	R15, (g_stack+stack_hi)(g)
    30  
    31  	// if there is a _cgo_init, call it using the gcc ABI.
    32  	MOVD	_cgo_init(SB), R11
    33  	CMPBEQ	R11, $0, nocgo
    34  	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
    35  	SLD	$32, R4, R4
    36  	MOVW	AR1, R4			// arg 2: TLS base pointer
    37  	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
    38  	MOVD	g, R2			// arg 0: G
    39  	// C functions expect 160 bytes of space on caller stack frame
    40  	// and an 8-byte aligned stack pointer
    41  	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
    42  	SUB	$160, R15		// reserve 160 bytes
    43  	MOVD    $~7, R6
    44  	AND 	R6, R15			// 8-byte align
    45  	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
    46  	MOVD	R9, R15			// restore stack
    47  	XOR	R0, R0			// zero R0
    48  
    49  nocgo:
    50  	// update stackguard after _cgo_init
    51  	MOVD	(g_stack+stack_lo)(g), R2
    52  	ADD	$const__StackGuard, R2
    53  	MOVD	R2, g_stackguard0(g)
    54  	MOVD	R2, g_stackguard1(g)
    55  
    56  	// set the per-goroutine and per-mach "registers"
    57  	MOVD	$runtime·m0(SB), R2
    58  
    59  	// save m->g0 = g0
    60  	MOVD	g, m_g0(R2)
    61  	// save m0 to g0->m
    62  	MOVD	R2, g_m(g)
    63  
    64  	BL	runtime·check(SB)
    65  
    66  	// argc/argv are already prepared on stack
    67  	BL	runtime·args(SB)
    68  	BL	runtime·osinit(SB)
    69  	BL	runtime·schedinit(SB)
    70  
    71  	// create a new goroutine to start program
    72  	MOVD	$runtime·mainPC(SB), R2		// entry
    73  	SUB     $24, R15
    74  	MOVD 	R2, 16(R15)
    75  	MOVD 	$0, 8(R15)
    76  	MOVD 	$0, 0(R15)
    77  	BL	runtime·newproc(SB)
    78  	ADD	$24, R15
    79  
    80  	// start this M
    81  	BL	runtime·mstart(SB)
    82  
    83  	MOVD	$0, 1(R0)
    84  	RET
    85  
    86  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    87  GLOBL	runtime·mainPC(SB),RODATA,$8
    88  
    89  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    90  	MOVD	$0, 2(R0)
    91  	RET
    92  
    93  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    94  	RET
    95  
    96  /*
    97   *  go-routine
    98   */
    99  
   100  // void gosave(Gobuf*)
   101  // save state in Gobuf; setjmp
   102  TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   103  	MOVD	buf+0(FP), R3
   104  	MOVD	R15, gobuf_sp(R3)
   105  	MOVD	LR, gobuf_pc(R3)
   106  	MOVD	g, gobuf_g(R3)
   107  	MOVD	$0, gobuf_lr(R3)
   108  	MOVD	$0, gobuf_ret(R3)
   109  	// Assert ctxt is zero. See func save.
   110  	MOVD	gobuf_ctxt(R3), R3
   111  	CMPBEQ	R3, $0, 2(PC)
   112  	BL	runtime·badctxt(SB)
   113  	RET
   114  
   115  // void gogo(Gobuf*)
   116  // restore state from Gobuf; longjmp
   117  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   118  	MOVD	buf+0(FP), R5
   119  
   120  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   121  	MOVD	gobuf_ctxt(R5), R1
   122  	CMPBEQ	R1, $0, nilctxt
   123  	MOVD	$gobuf_ctxt(R5), R1
   124  	MOVD	R1, 8(R15)
   125  	MOVD	R0, 16(R15)
   126  	BL	runtime·writebarrierptr_prewrite(SB)
   127  	MOVD	buf+0(FP), R5
   128  
   129  nilctxt:
   130  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   131  	BL	runtime·save_g(SB)
   132  
   133  	MOVD	0(g), R4
   134  	MOVD	gobuf_sp(R5), R15
   135  	MOVD	gobuf_lr(R5), LR
   136  	MOVD	gobuf_ret(R5), R3
   137  	MOVD	gobuf_ctxt(R5), R12
   138  	MOVD	$0, gobuf_sp(R5)
   139  	MOVD	$0, gobuf_ret(R5)
   140  	MOVD	$0, gobuf_lr(R5)
   141  	MOVD	$0, gobuf_ctxt(R5)
   142  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   143  	MOVD	gobuf_pc(R5), R6
   144  	BR	(R6)
   145  
   146  // void mcall(fn func(*g))
   147  // Switch to m->g0's stack, call fn(g).
   148  // Fn must never return.  It should gogo(&g->sched)
   149  // to keep running g.
   150  TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   151  	// Save caller state in g->sched
   152  	MOVD	R15, (g_sched+gobuf_sp)(g)
   153  	MOVD	LR, (g_sched+gobuf_pc)(g)
   154  	MOVD	$0, (g_sched+gobuf_lr)(g)
   155  	MOVD	g, (g_sched+gobuf_g)(g)
   156  
   157  	// Switch to m->g0 & its stack, call fn.
   158  	MOVD	g, R3
   159  	MOVD	g_m(g), R8
   160  	MOVD	m_g0(R8), g
   161  	BL	runtime·save_g(SB)
   162  	CMP	g, R3
   163  	BNE	2(PC)
   164  	BR	runtime·badmcall(SB)
   165  	MOVD	fn+0(FP), R12			// context
   166  	MOVD	0(R12), R4			// code pointer
   167  	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
   168  	SUB	$16, R15
   169  	MOVD	R3, 8(R15)
   170  	MOVD	$0, 0(R15)
   171  	BL	(R4)
   172  	BR	runtime·badmcall2(SB)
   173  
   174  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   175  // of the G stack.  We need to distinguish the routine that
   176  // lives at the bottom of the G stack from the one that lives
   177  // at the top of the system stack because the one at the top of
   178  // the system stack terminates the stack walk (see topofstack()).
   179  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   180  	UNDEF
   181  	BL	(LR)	// make sure this function is not leaf
   182  	RET
   183  
   184  // func systemstack(fn func())
   185  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   186  	MOVD	fn+0(FP), R3	// R3 = fn
   187  	MOVD	R3, R12		// context
   188  	MOVD	g_m(g), R4	// R4 = m
   189  
   190  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   191  	CMPBEQ	g, R5, noswitch
   192  
   193  	MOVD	m_g0(R4), R5	// R5 = g0
   194  	CMPBEQ	g, R5, noswitch
   195  
   196  	MOVD	m_curg(R4), R6
   197  	CMPBEQ	g, R6, switch
   198  
   199  	// Bad: g is not gsignal, not g0, not curg. What is it?
   200  	// Hide call from linker nosplit analysis.
   201  	MOVD	$runtime·badsystemstack(SB), R3
   202  	BL	(R3)
   203  
   204  switch:
   205  	// save our state in g->sched.  Pretend to
   206  	// be systemstack_switch if the G stack is scanned.
   207  	MOVD	$runtime·systemstack_switch(SB), R6
   208  	ADD	$16, R6	// get past prologue
   209  	MOVD	R6, (g_sched+gobuf_pc)(g)
   210  	MOVD	R15, (g_sched+gobuf_sp)(g)
   211  	MOVD	$0, (g_sched+gobuf_lr)(g)
   212  	MOVD	g, (g_sched+gobuf_g)(g)
   213  
   214  	// switch to g0
   215  	MOVD	R5, g
   216  	BL	runtime·save_g(SB)
   217  	MOVD	(g_sched+gobuf_sp)(g), R3
   218  	// make it look like mstart called systemstack on g0, to stop traceback
   219  	SUB	$8, R3
   220  	MOVD	$runtime·mstart(SB), R4
   221  	MOVD	R4, 0(R3)
   222  	MOVD	R3, R15
   223  
   224  	// call target function
   225  	MOVD	0(R12), R3	// code pointer
   226  	BL	(R3)
   227  
   228  	// switch back to g
   229  	MOVD	g_m(g), R3
   230  	MOVD	m_curg(R3), g
   231  	BL	runtime·save_g(SB)
   232  	MOVD	(g_sched+gobuf_sp)(g), R15
   233  	MOVD	$0, (g_sched+gobuf_sp)(g)
   234  	RET
   235  
   236  noswitch:
   237  	// already on m stack, just call directly
   238  	MOVD	0(R12), R3	// code pointer
   239  	BL	(R3)
   240  	RET
   241  
   242  /*
   243   * support for morestack
   244   */
   245  
   246  // Called during function prolog when more stack is needed.
   247  // Caller has already loaded:
   248  // R3: framesize, R4: argsize, R5: LR
   249  //
   250  // The traceback routines see morestack on a g0 as being
   251  // the top of a stack (for example, morestack calling newstack
   252  // calling the scheduler calling newm calling gc), so we must
   253  // record an argument size. For that purpose, it has no arguments.
   254  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   255  	// Cannot grow scheduler stack (m->g0).
   256  	MOVD	g_m(g), R7
   257  	MOVD	m_g0(R7), R8
   258  	CMPBNE	g, R8, 3(PC)
   259  	BL	runtime·badmorestackg0(SB)
   260  	BL	runtime·abort(SB)
   261  
   262  	// Cannot grow signal stack (m->gsignal).
   263  	MOVD	m_gsignal(R7), R8
   264  	CMP	g, R8
   265  	BNE	3(PC)
   266  	BL	runtime·badmorestackgsignal(SB)
   267  	BL	runtime·abort(SB)
   268  
   269  	// Called from f.
   270  	// Set g->sched to context in f.
   271  	MOVD	R15, (g_sched+gobuf_sp)(g)
   272  	MOVD	LR, R8
   273  	MOVD	R8, (g_sched+gobuf_pc)(g)
   274  	MOVD	R5, (g_sched+gobuf_lr)(g)
   275  	// newstack will fill gobuf.ctxt.
   276  
   277  	// Called from f.
   278  	// Set m->morebuf to f's caller.
   279  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   280  	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   281  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   282  
   283  	// Call newstack on m->g0's stack.
   284  	MOVD	m_g0(R7), g
   285  	BL	runtime·save_g(SB)
   286  	MOVD	(g_sched+gobuf_sp)(g), R15
   287  	// Create a stack frame on g0 to call newstack.
   288  	MOVD	$0, -16(R15)	// Zero saved LR in frame
   289  	SUB	$16, R15
   290  	MOVD	R12, 8(R15)	// ctxt argument
   291  	BL	runtime·newstack(SB)
   292  
   293  	// Not reached, but make sure the return PC from the call to newstack
   294  	// is still in this function, and not the beginning of the next.
   295  	UNDEF
   296  
   297  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   298  	MOVD	$0, R12
   299  	BR	runtime·morestack(SB)
   300  
   301  // reflectcall: call a function with the given argument list
   302  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   303  // we don't have variable-sized frames, so we use a small number
   304  // of constant-sized-frame functions to encode a few bits of size in the pc.
   305  // Caution: ugly multiline assembly macros in your future!
   306  
   307  #define DISPATCH(NAME,MAXSIZE)		\
   308  	MOVD	$MAXSIZE, R4;		\
   309  	CMP	R3, R4;		\
   310  	BGT	3(PC);			\
   311  	MOVD	$NAME(SB), R5;	\
   312  	BR	(R5)
   313  // Note: can't just "BR NAME(SB)" - bad inlining results.
   314  
   315  TEXT reflect·call(SB), NOSPLIT, $0-0
   316  	BR	·reflectcall(SB)
   317  
   318  TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   319  	MOVWZ argsize+24(FP), R3
   320  	DISPATCH(runtime·call32, 32)
   321  	DISPATCH(runtime·call64, 64)
   322  	DISPATCH(runtime·call128, 128)
   323  	DISPATCH(runtime·call256, 256)
   324  	DISPATCH(runtime·call512, 512)
   325  	DISPATCH(runtime·call1024, 1024)
   326  	DISPATCH(runtime·call2048, 2048)
   327  	DISPATCH(runtime·call4096, 4096)
   328  	DISPATCH(runtime·call8192, 8192)
   329  	DISPATCH(runtime·call16384, 16384)
   330  	DISPATCH(runtime·call32768, 32768)
   331  	DISPATCH(runtime·call65536, 65536)
   332  	DISPATCH(runtime·call131072, 131072)
   333  	DISPATCH(runtime·call262144, 262144)
   334  	DISPATCH(runtime·call524288, 524288)
   335  	DISPATCH(runtime·call1048576, 1048576)
   336  	DISPATCH(runtime·call2097152, 2097152)
   337  	DISPATCH(runtime·call4194304, 4194304)
   338  	DISPATCH(runtime·call8388608, 8388608)
   339  	DISPATCH(runtime·call16777216, 16777216)
   340  	DISPATCH(runtime·call33554432, 33554432)
   341  	DISPATCH(runtime·call67108864, 67108864)
   342  	DISPATCH(runtime·call134217728, 134217728)
   343  	DISPATCH(runtime·call268435456, 268435456)
   344  	DISPATCH(runtime·call536870912, 536870912)
   345  	DISPATCH(runtime·call1073741824, 1073741824)
   346  	MOVD	$runtime·badreflectcall(SB), R5
   347  	BR	(R5)
   348  
   349  #define CALLFN(NAME,MAXSIZE)			\
   350  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   351  	NO_LOCAL_POINTERS;			\
   352  	/* copy arguments to stack */		\
   353  	MOVD	arg+16(FP), R4;			\
   354  	MOVWZ	argsize+24(FP), R5;		\
   355  	MOVD	$stack-MAXSIZE(SP), R6;		\
   356  loopArgs: /* copy 256 bytes at a time */	\
   357  	CMP	R5, $256;			\
   358  	BLT	tailArgs;			\
   359  	SUB	$256, R5;			\
   360  	MVC	$256, 0(R4), 0(R6);		\
   361  	MOVD	$256(R4), R4;			\
   362  	MOVD	$256(R6), R6;			\
   363  	BR	loopArgs;			\
   364  tailArgs: /* copy remaining bytes */		\
   365  	CMP	R5, $0;				\
   366  	BEQ	callFunction;			\
   367  	SUB	$1, R5;				\
   368  	EXRL	$callfnMVC<>(SB), R5;		\
   369  callFunction:					\
   370  	MOVD	f+8(FP), R12;			\
   371  	MOVD	(R12), R8;			\
   372  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   373  	BL	(R8);				\
   374  	/* copy return values back */		\
   375  	MOVD	argtype+0(FP), R7;		\
   376  	MOVD	arg+16(FP), R6;			\
   377  	MOVWZ	n+24(FP), R5;			\
   378  	MOVD	$stack-MAXSIZE(SP), R4;		\
   379  	MOVWZ	retoffset+28(FP), R1;		\
   380  	ADD	R1, R4;				\
   381  	ADD	R1, R6;				\
   382  	SUB	R1, R5;				\
   383  	BL	callRet<>(SB);			\
   384  	RET
   385  
   386  // callRet copies return values back at the end of call*. This is a
   387  // separate function so it can allocate stack space for the arguments
   388  // to reflectcallmove. It does not follow the Go ABI; it expects its
   389  // arguments in registers.
   390  TEXT callRet<>(SB), NOSPLIT, $32-0
   391  	MOVD	R7, 8(R15)
   392  	MOVD	R6, 16(R15)
   393  	MOVD	R4, 24(R15)
   394  	MOVD	R5, 32(R15)
   395  	BL	runtime·reflectcallmove(SB)
   396  	RET
   397  
   398  CALLFN(·call32, 32)
   399  CALLFN(·call64, 64)
   400  CALLFN(·call128, 128)
   401  CALLFN(·call256, 256)
   402  CALLFN(·call512, 512)
   403  CALLFN(·call1024, 1024)
   404  CALLFN(·call2048, 2048)
   405  CALLFN(·call4096, 4096)
   406  CALLFN(·call8192, 8192)
   407  CALLFN(·call16384, 16384)
   408  CALLFN(·call32768, 32768)
   409  CALLFN(·call65536, 65536)
   410  CALLFN(·call131072, 131072)
   411  CALLFN(·call262144, 262144)
   412  CALLFN(·call524288, 524288)
   413  CALLFN(·call1048576, 1048576)
   414  CALLFN(·call2097152, 2097152)
   415  CALLFN(·call4194304, 4194304)
   416  CALLFN(·call8388608, 8388608)
   417  CALLFN(·call16777216, 16777216)
   418  CALLFN(·call33554432, 33554432)
   419  CALLFN(·call67108864, 67108864)
   420  CALLFN(·call134217728, 134217728)
   421  CALLFN(·call268435456, 268435456)
   422  CALLFN(·call536870912, 536870912)
   423  CALLFN(·call1073741824, 1073741824)
   424  
   425  // Not a function: target for EXRL (execute relative long) instruction.
   426  TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
   427  	MVC	$1, 0(R4), 0(R6)
   428  
   429  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   430  	RET
   431  
   432  // void jmpdefer(fv, sp);
   433  // called from deferreturn.
   434  // 1. grab stored LR for caller
   435  // 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
   436  // 3. BR to fn
   437  TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
   438  	MOVD	0(R15), R1
   439  	SUB	$6, R1, LR
   440  
   441  	MOVD	fv+0(FP), R12
   442  	MOVD	argp+8(FP), R15
   443  	SUB	$8, R15
   444  	MOVD	0(R12), R3
   445  	BR	(R3)
   446  
   447  // Save state of caller into g->sched. Smashes R1.
   448  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   449  	MOVD	LR, (g_sched+gobuf_pc)(g)
   450  	MOVD	R15, (g_sched+gobuf_sp)(g)
   451  	MOVD	$0, (g_sched+gobuf_lr)(g)
   452  	MOVD	$0, (g_sched+gobuf_ret)(g)
   453  	// Assert ctxt is zero. See func save.
   454  	MOVD	(g_sched+gobuf_ctxt)(g), R1
   455  	CMPBEQ	R1, $0, 2(PC)
   456  	BL	runtime·badctxt(SB)
   457  	RET
   458  
   459  // func asmcgocall(fn, arg unsafe.Pointer) int32
   460  // Call fn(arg) on the scheduler stack,
   461  // aligned appropriately for the gcc ABI.
   462  // See cgocall.go for more details.
   463  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   464  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
   465  	// C TLS base pointer in AR0:AR1
   466  	MOVD	fn+0(FP), R3
   467  	MOVD	arg+8(FP), R4
   468  
   469  	MOVD	R15, R2		// save original stack pointer
   470  	MOVD	g, R5
   471  
   472  	// Figure out if we need to switch to m->g0 stack.
   473  	// We get called to create new OS threads too, and those
   474  	// come in on the m->g0 stack already.
   475  	MOVD	g_m(g), R6
   476  	MOVD	m_g0(R6), R6
   477  	CMPBEQ	R6, g, g0
   478  	BL	gosave<>(SB)
   479  	MOVD	R6, g
   480  	BL	runtime·save_g(SB)
   481  	MOVD	(g_sched+gobuf_sp)(g), R15
   482  
   483  	// Now on a scheduling stack (a pthread-created stack).
   484  g0:
   485  	// Save room for two of our pointers, plus 160 bytes of callee
   486  	// save area that lives on the caller stack.
   487  	SUB	$176, R15
   488  	MOVD	$~7, R6
   489  	AND	R6, R15                 // 8-byte alignment for gcc ABI
   490  	MOVD	R5, 168(R15)             // save old g on stack
   491  	MOVD	(g_stack+stack_hi)(R5), R5
   492  	SUB	R2, R5
   493  	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   494  	MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
   495  	MOVD	R4, R2                  // arg in R2
   496  	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
   497  
   498  	XOR	R0, R0                  // set R0 back to 0.
   499  	// Restore g, stack pointer.
   500  	MOVD	168(R15), g
   501  	BL	runtime·save_g(SB)
   502  	MOVD	(g_stack+stack_hi)(g), R5
   503  	MOVD	160(R15), R6
   504  	SUB	R6, R5
   505  	MOVD	R5, R15
   506  
   507  	MOVW	R2, ret+16(FP)
   508  	RET
   509  
   510  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   511  // Turn the fn into a Go func (by taking its address) and call
   512  // cgocallback_gofunc.
   513  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   514  	MOVD	$fn+0(FP), R3
   515  	MOVD	R3, 8(R15)
   516  	MOVD	frame+8(FP), R3
   517  	MOVD	R3, 16(R15)
   518  	MOVD	framesize+16(FP), R3
   519  	MOVD	R3, 24(R15)
   520  	MOVD	ctxt+24(FP), R3
   521  	MOVD	R3, 32(R15)
   522  	MOVD	$runtime·cgocallback_gofunc(SB), R3
   523  	BL	(R3)
   524  	RET
   525  
   526  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   527  // See cgocall.go for more details.
   528  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   529  	NO_LOCAL_POINTERS
   530  
   531  	// Load m and g from thread-local storage.
   532  	MOVB	runtime·iscgo(SB), R3
   533  	CMPBEQ	R3, $0, nocgo
   534  	BL	runtime·load_g(SB)
   535  
   536  nocgo:
   537  	// If g is nil, Go did not create the current thread.
   538  	// Call needm to obtain one for temporary use.
   539  	// In this case, we're running on the thread stack, so there's
   540  	// lots of space, but the linker doesn't know. Hide the call from
   541  	// the linker analysis by using an indirect call.
   542  	CMPBEQ	g, $0, needm
   543  
   544  	MOVD	g_m(g), R8
   545  	MOVD	R8, savedm-8(SP)
   546  	BR	havem
   547  
   548  needm:
   549  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   550  	MOVD	$runtime·needm(SB), R3
   551  	BL	(R3)
   552  
   553  	// Set m->sched.sp = SP, so that if a panic happens
   554  	// during the function we are about to execute, it will
   555  	// have a valid SP to run on the g0 stack.
   556  	// The next few lines (after the havem label)
   557  	// will save this SP onto the stack and then write
   558  	// the same SP back to m->sched.sp. That seems redundant,
   559  	// but if an unrecovered panic happens, unwindm will
   560  	// restore the g->sched.sp from the stack location
   561  	// and then systemstack will try to use it. If we don't set it here,
   562  	// that restored SP will be uninitialized (typically 0) and
   563  	// will not be usable.
   564  	MOVD	g_m(g), R8
   565  	MOVD	m_g0(R8), R3
   566  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   567  
   568  havem:
   569  	// Now there's a valid m, and we're running on its m->g0.
   570  	// Save current m->g0->sched.sp on stack and then set it to SP.
   571  	// Save current sp in m->g0->sched.sp in preparation for
   572  	// switch back to m->curg stack.
   573  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   574  	MOVD	m_g0(R8), R3
   575  	MOVD	(g_sched+gobuf_sp)(R3), R4
   576  	MOVD	R4, savedsp-16(SP)
   577  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   578  
   579  	// Switch to m->curg stack and call runtime.cgocallbackg.
   580  	// Because we are taking over the execution of m->curg
   581  	// but *not* resuming what had been running, we need to
   582  	// save that information (m->curg->sched) so we can restore it.
   583  	// We can restore m->curg->sched.sp easily, because calling
   584  	// runtime.cgocallbackg leaves SP unchanged upon return.
   585  	// To save m->curg->sched.pc, we push it onto the stack.
   586  	// This has the added benefit that it looks to the traceback
   587  	// routine like cgocallbackg is going to return to that
   588  	// PC (because the frame we allocate below has the same
   589  	// size as cgocallback_gofunc's frame declared above)
   590  	// so that the traceback will seamlessly trace back into
   591  	// the earlier calls.
   592  	//
   593  	// In the new goroutine, -8(SP) is unused (where SP refers to
   594  	// m->curg's SP while we're setting it up, before we've adjusted it).
   595  	MOVD	m_curg(R8), g
   596  	BL	runtime·save_g(SB)
   597  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   598  	MOVD	(g_sched+gobuf_pc)(g), R5
   599  	MOVD	R5, -24(R4)
   600  	MOVD	ctxt+24(FP), R5
   601  	MOVD	R5, -16(R4)
   602  	MOVD	$-24(R4), R15
   603  	BL	runtime·cgocallbackg(SB)
   604  
   605  	// Restore g->sched (== m->curg->sched) from saved values.
   606  	MOVD	0(R15), R5
   607  	MOVD	R5, (g_sched+gobuf_pc)(g)
   608  	MOVD	$24(R15), R4
   609  	MOVD	R4, (g_sched+gobuf_sp)(g)
   610  
   611  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   612  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   613  	// so we do not have to restore it.)
   614  	MOVD	g_m(g), R8
   615  	MOVD	m_g0(R8), g
   616  	BL	runtime·save_g(SB)
   617  	MOVD	(g_sched+gobuf_sp)(g), R15
   618  	MOVD	savedsp-16(SP), R4
   619  	MOVD	R4, (g_sched+gobuf_sp)(g)
   620  
   621  	// If the m on entry was nil, we called needm above to borrow an m
   622  	// for the duration of the call. Since the call is over, return it with dropm.
   623  	MOVD	savedm-8(SP), R6
   624  	CMPBNE	R6, $0, droppedm
   625  	MOVD	$runtime·dropm(SB), R3
   626  	BL	(R3)
   627  droppedm:
   628  
   629  	// Done!
   630  	RET
   631  
   632  // void setg(G*); set g. for use by needm.
   633  TEXT runtime·setg(SB), NOSPLIT, $0-8
   634  	MOVD	gg+0(FP), g
   635  	// This only happens if iscgo, so jump straight to save_g
   636  	BL	runtime·save_g(SB)
   637  	RET
   638  
   639  // void setg_gcc(G*); set g in C TLS.
   640  // Must obey the gcc calling convention.
   641  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   642  	// The standard prologue clobbers LR (R14), which is callee-save in
   643  	// the C ABI, so we have to use NOFRAME and save LR ourselves.
   644  	MOVD	LR, R1
   645  	// Also save g, R10, and R11 since they're callee-save in C ABI
   646  	MOVD	R10, R3
   647  	MOVD	g, R4
   648  	MOVD	R11, R5
   649  
   650  	MOVD	R2, g
   651  	BL	runtime·save_g(SB)
   652  
   653  	MOVD	R5, R11
   654  	MOVD	R4, g
   655  	MOVD	R3, R10
   656  	MOVD	R1, LR
   657  	RET
   658  
   659  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
   660  	MOVD	16(R15), R3		// LR saved by caller
   661  	MOVD	R3, ret+8(FP)
   662  	RET
   663  
   664  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   665  	MOVW	(R0), R0
   666  	UNDEF
   667  
   668  // int64 runtime·cputicks(void)
   669  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   670  	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
   671  	// This means that since about 1972 the msb has been set, making the
   672  	// result of a call to STORE CLOCK (stck) a negative number.
   673  	// We clear the msb to make it positive.
   674  	STCK	ret+0(FP)      // serialises before and after call
   675  	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
   676  	SLD	$1, R3
   677  	SRD	$1, R3
   678  	MOVD	R3, ret+0(FP)
   679  	RET
   680  
   681  // AES hashing not implemented for s390x
   682  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   683  	MOVW	(R0), R15
   684  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   685  	MOVW	(R0), R15
   686  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   687  	MOVW	(R0), R15
   688  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   689  	MOVW	(R0), R15
   690  
   691  // memequal(a, b unsafe.Pointer, size uintptr) bool
   692  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
   693  	MOVD	a+0(FP), R3
   694  	MOVD	b+8(FP), R5
   695  	MOVD	size+16(FP), R6
   696  	LA	ret+24(FP), R7
   697  	BR	runtime·memeqbody(SB)
   698  
   699  // memequal_varlen(a, b unsafe.Pointer) bool
   700  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
   701  	MOVD	a+0(FP), R3
   702  	MOVD	b+8(FP), R5
   703  	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
   704  	LA	ret+16(FP), R7
   705  	BR	runtime·memeqbody(SB)
   706  
   707  // eqstring tests whether two strings are equal.
   708  // The compiler guarantees that strings passed
   709  // to eqstring have equal length.
   710  // See runtime_test.go:eqstring_generic for
   711  // equivalent Go code.
   712  TEXT runtime·eqstring(SB),NOSPLIT|NOFRAME,$0-33
   713  	MOVD	s1_base+0(FP), R3
   714  	MOVD	s1_len+8(FP), R6
   715  	MOVD	s2_base+16(FP), R5
   716  	LA	ret+32(FP), R7
   717  	BR	runtime·memeqbody(SB)
   718  
   719  TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
   720  	MOVD	a_len+8(FP), R2
   721  	MOVD	b_len+32(FP), R6
   722  	MOVD	a+0(FP), R3
   723  	MOVD	b+24(FP), R5
   724  	LA	ret+48(FP), R7
   725  	CMPBNE	R2, R6, notequal
   726  	BR	runtime·memeqbody(SB)
   727  notequal:
   728  	MOVB	$0, ret+48(FP)
   729  	RET
   730  
   731  // input:
   732  //   R3 = a
   733  //   R5 = b
   734  //   R6 = len
   735  //   R7 = address of output byte (stores 0 or 1 here)
   736  //   a and b have the same length
   737  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   738  	CMPBEQ	R3, R5, equal
   739  loop:
   740  	CMPBEQ	R6, $0, equal
   741  	CMPBLT	R6, $32, tiny
   742  	CMP	R6, $256
   743  	BLT	tail
   744  	CLC	$256, 0(R3), 0(R5)
   745  	BNE	notequal
   746  	SUB	$256, R6
   747  	LA	256(R3), R3
   748  	LA	256(R5), R5
   749  	BR	loop
   750  tail:
   751  	SUB	$1, R6, R8
   752  	EXRL	$runtime·memeqbodyclc(SB), R8
   753  	BEQ	equal
   754  notequal:
   755  	MOVB	$0, 0(R7)
   756  	RET
   757  equal:
   758  	MOVB	$1, 0(R7)
   759  	RET
   760  tiny:
   761  	MOVD	$0, R2
   762  	CMPBLT	R6, $16, lt16
   763  	MOVD	0(R3), R8
   764  	MOVD	0(R5), R9
   765  	CMPBNE	R8, R9, notequal
   766  	MOVD	8(R3), R8
   767  	MOVD	8(R5), R9
   768  	CMPBNE	R8, R9, notequal
   769  	LA	16(R2), R2
   770  	SUB	$16, R6
   771  lt16:
   772  	CMPBLT	R6, $8, lt8
   773  	MOVD	0(R3)(R2*1), R8
   774  	MOVD	0(R5)(R2*1), R9
   775  	CMPBNE	R8, R9, notequal
   776  	LA	8(R2), R2
   777  	SUB	$8, R6
   778  lt8:
   779  	CMPBLT	R6, $4, lt4
   780  	MOVWZ	0(R3)(R2*1), R8
   781  	MOVWZ	0(R5)(R2*1), R9
   782  	CMPBNE	R8, R9, notequal
   783  	LA	4(R2), R2
   784  	SUB	$4, R6
   785  lt4:
   786  #define CHECK(n) \
   787  	CMPBEQ	R6, $n, equal \
   788  	MOVB	n(R3)(R2*1), R8 \
   789  	MOVB	n(R5)(R2*1), R9 \
   790  	CMPBNE	R8, R9, notequal
   791  	CHECK(0)
   792  	CHECK(1)
   793  	CHECK(2)
   794  	CHECK(3)
   795  	BR	equal
   796  
   797  TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   798  	CLC	$1, 0(R3), 0(R5)
   799  	RET
   800  
   801  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
   802  	MOVD	s+0(FP), R3     // s => R3
   803  	MOVD	s_len+8(FP), R4 // s_len => R4
   804  	MOVBZ	c+24(FP), R5    // c => R5
   805  	MOVD	$ret+32(FP), R2 // &ret => R9
   806  	BR	runtime·indexbytebody(SB)
   807  
   808  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
   809  	MOVD	s+0(FP), R3     // s => R3
   810  	MOVD	s_len+8(FP), R4 // s_len => R4
   811  	MOVBZ	c+16(FP), R5    // c => R5
   812  	MOVD	$ret+24(FP), R2 // &ret => R9
   813  	BR	runtime·indexbytebody(SB)
   814  
   815  // input:
   816  // R3: s
   817  // R4: s_len
   818  // R5: c -- byte sought
   819  // R2: &ret -- address to put index into
   820  TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0
   821  	CMPBEQ	R4, $0, notfound
   822  	MOVD	R3, R6          // store base for later
   823  	ADD	R3, R4, R8      // the address after the end of the string
   824  	//if the length is small, use loop; otherwise, use vector or srst search
   825  	CMPBGE	R4, $16, large
   826  
   827  residual:
   828  	CMPBEQ	R3, R8, notfound
   829  	MOVBZ	0(R3), R7
   830  	LA	1(R3), R3
   831  	CMPBNE	R7, R5, residual
   832  
   833  found:
   834  	SUB	R6, R3
   835  	SUB	$1, R3
   836  	MOVD	R3, 0(R2)
   837  	RET
   838  
   839  notfound:
   840  	MOVD	$-1, 0(R2)
   841  	RET
   842  
   843  large:
   844  	MOVBZ	·cpu+facilities_hasVX(SB), R1
   845  	CMPBNE	R1, $0, vectorimpl
   846  
   847  srstimpl:                       // no vector facility
   848  	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
   849  srstloop:
   850  	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
   851  	BVS	srstloop        // interrupted - continue
   852  	BGT	notfoundr0
   853  foundr0:
   854  	XOR	R0, R0          // reset R0
   855  	SUB	R6, R8          // remove base
   856  	MOVD	R8, 0(R2)
   857  	RET
   858  notfoundr0:
   859  	XOR	R0, R0          // reset R0
   860  	MOVD	$-1, 0(R2)
   861  	RET
   862  
   863  vectorimpl:
   864  	//if the address is not 16byte aligned, use loop for the header
   865  	MOVD	R3, R8
   866  	AND	$15, R8
   867  	CMPBGT	R8, $0, notaligned
   868  
   869  aligned:
   870  	ADD	R6, R4, R8
   871  	MOVD	R8, R7
   872  	AND	$-16, R7
   873  	// replicate c across V17
   874  	VLVGB	$0, R5, V19
   875  	VREPB	$0, V19, V17
   876  
   877  vectorloop:
   878  	CMPBGE	R3, R7, residual
   879  	VL	0(R3), V16    // load string to be searched into V16
   880  	ADD	$16, R3
   881  	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   882  	BVS	vectorloop
   883  
   884  	// when vector search found c in the string
   885  	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   886  	SUB	$16, R3
   887  	SUB	R6, R3
   888  	ADD	R3, R7
   889  	MOVD	R7, 0(R2)
   890  	RET
   891  
   892  notaligned:
   893  	MOVD	R3, R8
   894  	AND	$-16, R8
   895  	ADD     $16, R8
   896  notalignedloop:
   897  	CMPBEQ	R3, R8, aligned
   898  	MOVBZ	0(R3), R7
   899  	LA	1(R3), R3
   900  	CMPBNE	R7, R5, notalignedloop
   901  	BR	found
   902  
   903  TEXT runtime·return0(SB), NOSPLIT, $0
   904  	MOVW	$0, R3
   905  	RET
   906  
   907  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   908  // Must obey the gcc calling convention.
   909  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
   910  	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
   911  	MOVD	g, R1
   912  	MOVD	R10, R3
   913  	MOVD	LR, R4
   914  	MOVD	R11, R5
   915  
   916  	BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
   917  	MOVD	g_m(g), R2
   918  	MOVD	m_curg(R2), R2
   919  	MOVD	(g_stack+stack_hi)(R2), R2
   920  
   921  	MOVD	R1, g
   922  	MOVD	R3, R10
   923  	MOVD	R4, LR
   924  	MOVD	R5, R11
   925  	RET
   926  
   927  // The top-most function running on a goroutine
   928  // returns to goexit+PCQuantum.
   929  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
   930  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   931  	BL	runtime·goexit1(SB)	// does not return
   932  	// traceback from goexit1 must hit code range of goexit
   933  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   934  
   935  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
   936  	RET
   937  
   938  TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
   939  	SYNC
   940  	RET
   941  
   942  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
   943  	MOVD	s1_base+0(FP), R3
   944  	MOVD	s1_len+8(FP), R4
   945  	MOVD	s2_base+16(FP), R5
   946  	MOVD	s2_len+24(FP), R6
   947  	LA	ret+32(FP), R7
   948  	BR	runtime·cmpbody(SB)
   949  
   950  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
   951  	MOVD	s1+0(FP), R3
   952  	MOVD	s1+8(FP), R4
   953  	MOVD	s2+24(FP), R5
   954  	MOVD	s2+32(FP), R6
   955  	LA	res+48(FP), R7
   956  	BR	runtime·cmpbody(SB)
   957  
   958  // input:
   959  //   R3 = a
   960  //   R4 = alen
   961  //   R5 = b
   962  //   R6 = blen
   963  //   R7 = address of output word (stores -1/0/1 here)
   964  TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
   965  	CMPBEQ	R3, R5, cmplengths
   966  	MOVD	R4, R8
   967  	CMPBLE	R4, R6, amin
   968  	MOVD	R6, R8
   969  amin:
   970  	CMPBEQ	R8, $0, cmplengths
   971  	CMP	R8, $256
   972  	BLE	tail
   973  loop:
   974  	CLC	$256, 0(R3), 0(R5)
   975  	BGT	gt
   976  	BLT	lt
   977  	SUB	$256, R8
   978  	CMP	R8, $256
   979  	BGT	loop
   980  tail:
   981  	SUB	$1, R8
   982  	EXRL	$runtime·cmpbodyclc(SB), R8
   983  	BGT	gt
   984  	BLT	lt
   985  cmplengths:
   986  	CMP	R4, R6
   987  	BEQ	eq
   988  	BLT	lt
   989  gt:
   990  	MOVD	$1, 0(R7)
   991  	RET
   992  lt:
   993  	MOVD	$-1, 0(R7)
   994  	RET
   995  eq:
   996  	MOVD	$0, 0(R7)
   997  	RET
   998  
   999  TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
  1000  	CLC	$1, 0(R3), 0(R5)
  1001  	RET
  1002  
  1003  // func supportsVX() bool
  1004  TEXT strings·supportsVX(SB),NOSPLIT,$0-1
  1005  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1006  	MOVB	R0, ret+0(FP)
  1007  	RET
  1008  
  1009  // func supportsVX() bool
  1010  TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
  1011  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1012  	MOVB	R0, ret+0(FP)
  1013  	RET
  1014  
  1015  // func indexShortStr(s, sep string) int
  1016  // Caller must confirm availability of vx facility before calling.
  1017  TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
  1018  	LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
  1019  	LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1020  	MOVD	$ret+32(FP), R5
  1021  	BR	runtime·indexShortStr(SB)
  1022  
  1023  // func indexShortStr(s, sep []byte) int
  1024  // Caller must confirm availability of vx facility before calling.
  1025  TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
  1026  	LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
  1027  	LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1028  	MOVD	$ret+48(FP), R5
  1029  	BR	runtime·indexShortStr(SB)
  1030  
  1031  // s: string we are searching
  1032  // sep: string to search for
  1033  // R1=&s[0], R2=len(s)
  1034  // R3=&sep[0], R4=len(sep)
  1035  // R5=&ret (int)
  1036  // Caller must confirm availability of vx facility before calling.
  1037  TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
  1038  	CMPBGT	R4, R2, notfound
  1039  	ADD	R1, R2
  1040  	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
  1041  	CMPBEQ	R4, $0, notfound
  1042  	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
  1043  	VLL	R4, (R3), V0 // contains first 16 bytes of sep
  1044  	MOVD	R1, R7
  1045  index2plus:
  1046  	CMPBNE	R4, $1, index3plus
  1047  	MOVD	$15(R7), R9
  1048  	CMPBGE	R9, R2, index2to16
  1049  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1050  	VONE	V16
  1051  	VREPH	$0, V0, V1
  1052  	CMPBGE	R9, R2, index2to16
  1053  index2loop:
  1054  	VL	0(R7), V2          // 16 bytes, even indices
  1055  	VL	1(R7), V4          // 16 bytes, odd indices
  1056  	VCEQH	V1, V2, V5         // compare even indices
  1057  	VCEQH	V1, V4, V6         // compare odd indices
  1058  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1059  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1060  	BLT	foundV17
  1061  	MOVD	$16(R7), R7        // R7+=16
  1062  	ADD	$15, R7, R9
  1063  	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
  1064  	CMPBLE	R7, R2, index2to16
  1065  	BR	notfound
  1066  
  1067  index3plus:
  1068  	CMPBNE	R4, $2, index4plus
  1069  	ADD	$15, R7, R9
  1070  	CMPBGE	R9, R2, index2to16
  1071  	MOVD	$1, R0
  1072  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1073  	VONE	V16
  1074  	VREPH	$0, V0, V1
  1075  	VREPB	$2, V0, V8
  1076  index3loop:
  1077  	VL	(R7), V2           // load 16-bytes into V2
  1078  	VLL	R0, 16(R7), V3     // load 2-bytes into V3
  1079  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1080  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
  1081  	VCEQH	V1, V2, V5         // compare 2-byte even indices
  1082  	VCEQH	V1, V4, V6         // compare 2-byte odd indices
  1083  	VCEQB	V8, V9, V10        // compare last bytes
  1084  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1085  	VN	V7, V10, V7        // AND indices with last byte
  1086  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1087  	BLT	foundV17
  1088  	MOVD	$16(R7), R7        // R7+=16
  1089  	ADD	$15, R7, R9
  1090  	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
  1091  	CMPBLE	R7, R2, index2to16
  1092  	BR	notfound
  1093  
  1094  index4plus:
  1095  	CMPBNE	R4, $3, index5plus
  1096  	ADD	$15, R7, R9
  1097  	CMPBGE	R9, R2, index2to16
  1098  	MOVD	$2, R0
  1099  	VGBM	$0x8888, V29       // 0xff000000ff000000...
  1100  	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
  1101  	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
  1102  	VONE	V16
  1103  	VREPF	$0, V0, V1
  1104  index4loop:
  1105  	VL	(R7), V2           // load 16-bytes into V2
  1106  	VLL	R0, 16(R7), V3     // load 3-bytes into V3
  1107  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1108  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
  1109  	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
  1110  	VCEQF	V1, V2, V5         // compare index 0, 4, ...
  1111  	VCEQF	V1, V4, V6         // compare index 1, 5, ...
  1112  	VCEQF	V1, V9, V11        // compare index 2, 6, ...
  1113  	VCEQF	V1, V10, V12       // compare index 3, 7, ...
  1114  	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
  1115  	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
  1116  	VSEL	V13, V14, V31, V7  // final merge
  1117  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1118  	BLT	foundV17
  1119  	MOVD	$16(R7), R7        // R7+=16
  1120  	ADD	$15, R7, R9
  1121  	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
  1122  	CMPBLE	R7, R2, index2to16
  1123  	BR	notfound
  1124  
  1125  index5plus:
  1126  	CMPBGT	R4, $15, index17plus
  1127  index2to16:
  1128  	CMPBGT	R7, R2, notfound
  1129  	MOVD	$1(R7), R8
  1130  	CMPBGT	R8, R2, index2to16tail
  1131  index2to16loop:
  1132  	// unrolled 2x
  1133  	VLL	R4, (R7), V1
  1134  	VLL	R4, 1(R7), V2
  1135  	VCEQGS	V0, V1, V3
  1136  	BEQ	found
  1137  	MOVD	$1(R7), R7
  1138  	VCEQGS	V0, V2, V4
  1139  	BEQ	found
  1140  	MOVD	$1(R7), R7
  1141  	CMPBLT	R7, R2, index2to16loop
  1142  	CMPBGT	R7, R2, notfound
  1143  index2to16tail:
  1144  	VLL	R4, (R7), V1
  1145  	VCEQGS	V0, V1, V2
  1146  	BEQ	found
  1147  	BR	notfound
  1148  
  1149  index17plus:
  1150  	CMPBGT	R4, $31, index33plus
  1151  	SUB	$16, R4, R0
  1152  	VLL	R0, 16(R3), V1
  1153  	VONE	V7
  1154  index17to32loop:
  1155  	VL	(R7), V2
  1156  	VLL	R0, 16(R7), V3
  1157  	VCEQG	V0, V2, V4
  1158  	VCEQG	V1, V3, V5
  1159  	VN	V4, V5, V6
  1160  	VCEQGS	V6, V7, V8
  1161  	BEQ	found
  1162  	MOVD	$1(R7), R7
  1163  	CMPBLE  R7, R2, index17to32loop
  1164  	BR	notfound
  1165  
  1166  index33plus:
  1167  	CMPBGT	R4, $47, index49plus
  1168  	SUB	$32, R4, R0
  1169  	VL	16(R3), V1
  1170  	VLL	R0, 32(R3), V2
  1171  	VONE	V11
  1172  index33to48loop:
  1173  	VL	(R7), V3
  1174  	VL	16(R7), V4
  1175  	VLL	R0, 32(R7), V5
  1176  	VCEQG	V0, V3, V6
  1177  	VCEQG	V1, V4, V7
  1178  	VCEQG	V2, V5, V8
  1179  	VN	V6, V7, V9
  1180  	VN	V8, V9, V10
  1181  	VCEQGS	V10, V11, V12
  1182  	BEQ	found
  1183  	MOVD	$1(R7), R7
  1184  	CMPBLE  R7, R2, index33to48loop
  1185  	BR	notfound
  1186  
  1187  index49plus:
  1188  	CMPBGT	R4, $63, index65plus
  1189  	SUB	$48, R4, R0
  1190  	VL	16(R3), V1
  1191  	VL	32(R3), V2
  1192  	VLL	R0, 48(R3), V3
  1193  	VONE	V15
  1194  index49to64loop:
  1195  	VL	(R7), V4
  1196  	VL	16(R7), V5
  1197  	VL	32(R7), V6
  1198  	VLL	R0, 48(R7), V7
  1199  	VCEQG	V0, V4, V8
  1200  	VCEQG	V1, V5, V9
  1201  	VCEQG	V2, V6, V10
  1202  	VCEQG	V3, V7, V11
  1203  	VN	V8, V9, V12
  1204  	VN	V10, V11, V13
  1205  	VN	V12, V13, V14
  1206  	VCEQGS	V14, V15, V16
  1207  	BEQ	found
  1208  	MOVD	$1(R7), R7
  1209  	CMPBLE  R7, R2, index49to64loop
  1210  notfound:
  1211  	MOVD	$-1, (R5)
  1212  	RET
  1213  
  1214  index65plus:
  1215  	// not implemented
  1216  	MOVD	$0, (R0)
  1217  	RET
  1218  
  1219  foundV17: // index is in doubleword V17[0]
  1220  	VLGVG	$0, V17, R8
  1221  	ADD	R8, R7
  1222  found:
  1223  	SUB	R1, R7
  1224  	MOVD	R7, (R5)
  1225  	RET
  1226  
  1227  // This is called from .init_array and follows the platform, not Go, ABI.
  1228  // We are overly conservative. We could only save the registers we use.
  1229  // However, since this function is only called once per loaded module
  1230  // performance is unimportant.
  1231  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1232  	// Save R6-R15 in the register save area of the calling function.
  1233  	// Don't bother saving F8-F15 as we aren't doing any calls.
  1234  	STMG	R6, R15, 48(R15)
  1235  
  1236  	// append the argument (passed in R2, as per the ELF ABI) to the
  1237  	// moduledata linked list.
  1238  	MOVD	runtime·lastmoduledatap(SB), R1
  1239  	MOVD	R2, moduledata_next(R1)
  1240  	MOVD	R2, runtime·lastmoduledatap(SB)
  1241  
  1242  	// Restore R6-R15.
  1243  	LMG	48(R15), R6, R15
  1244  	RET
  1245  
  1246  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1247  	MOVB	$1, ret+0(FP)
  1248  	RET