github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/runtime/asm_s390x.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    12  	// C TLS base pointer in AR0:AR1
    13  
    14  	// initialize essential registers
    15  	XOR	R0, R0
    16  
    17  	SUB	$24, R15
    18  	MOVW	R2, 8(R15) // argc
    19  	MOVD	R3, 16(R15) // argv
    20  
    21  	// create istack out of the given (operating system) stack.
    22  	// _cgo_init may update stackguard.
    23  	MOVD	$runtime·g0(SB), g
    24  	MOVD	R15, R11
    25  	SUB	$(64*1024), R11
    26  	MOVD	R11, g_stackguard0(g)
    27  	MOVD	R11, g_stackguard1(g)
    28  	MOVD	R11, (g_stack+stack_lo)(g)
    29  	MOVD	R15, (g_stack+stack_hi)(g)
    30  
    31  	// if there is a _cgo_init, call it using the gcc ABI.
    32  	MOVD	_cgo_init(SB), R11
    33  	CMPBEQ	R11, $0, nocgo
    34  	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
    35  	SLD	$32, R4, R4
    36  	MOVW	AR1, R4			// arg 2: TLS base pointer
    37  	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
    38  	MOVD	g, R2			// arg 0: G
    39  	// C functions expect 160 bytes of space on caller stack frame
    40  	// and an 8-byte aligned stack pointer
    41  	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
    42  	SUB	$160, R15		// reserve 160 bytes
    43  	MOVD    $~7, R6
    44  	AND 	R6, R15			// 8-byte align
    45  	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
    46  	MOVD	R9, R15			// restore stack
    47  	XOR	R0, R0			// zero R0
    48  
    49  nocgo:
    50  	// update stackguard after _cgo_init
    51  	MOVD	(g_stack+stack_lo)(g), R2
    52  	ADD	$const__StackGuard, R2
    53  	MOVD	R2, g_stackguard0(g)
    54  	MOVD	R2, g_stackguard1(g)
    55  
    56  	// set the per-goroutine and per-mach "registers"
    57  	MOVD	$runtime·m0(SB), R2
    58  
    59  	// save m->g0 = g0
    60  	MOVD	g, m_g0(R2)
    61  	// save m0 to g0->m
    62  	MOVD	R2, g_m(g)
    63  
    64  	BL	runtime·check(SB)
    65  
    66  	// argc/argv are already prepared on stack
    67  	BL	runtime·args(SB)
    68  	BL	runtime·osinit(SB)
    69  	BL	runtime·schedinit(SB)
    70  
    71  	// create a new goroutine to start program
    72  	MOVD	$runtime·mainPC(SB), R2		// entry
    73  	SUB     $24, R15
    74  	MOVD 	R2, 16(R15)
    75  	MOVD 	$0, 8(R15)
    76  	MOVD 	$0, 0(R15)
    77  	BL	runtime·newproc(SB)
    78  	ADD	$24, R15
    79  
    80  	// start this M
    81  	BL	runtime·mstart(SB)
    82  
    83  	MOVD	$0, 1(R0)
    84  	RET
    85  
    86  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    87  GLOBL	runtime·mainPC(SB),RODATA,$8
    88  
    89  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    90  	MOVD	$0, 2(R0)
    91  	RET
    92  
    93  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    94  	RET
    95  
    96  /*
    97   *  go-routine
    98   */
    99  
   100  // void gosave(Gobuf*)
   101  // save state in Gobuf; setjmp
   102  TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   103  	MOVD	buf+0(FP), R3
   104  	MOVD	R15, gobuf_sp(R3)
   105  	MOVD	LR, gobuf_pc(R3)
   106  	MOVD	g, gobuf_g(R3)
   107  	MOVD	$0, gobuf_lr(R3)
   108  	MOVD	$0, gobuf_ret(R3)
   109  	// Assert ctxt is zero. See func save.
   110  	MOVD	gobuf_ctxt(R3), R3
   111  	CMPBEQ	R3, $0, 2(PC)
   112  	BL	runtime·badctxt(SB)
   113  	RET
   114  
   115  // void gogo(Gobuf*)
   116  // restore state from Gobuf; longjmp
   117  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   118  	MOVD	buf+0(FP), R5
   119  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   120  	BL	runtime·save_g(SB)
   121  
   122  	MOVD	0(g), R4
   123  	MOVD	gobuf_sp(R5), R15
   124  	MOVD	gobuf_lr(R5), LR
   125  	MOVD	gobuf_ret(R5), R3
   126  	MOVD	gobuf_ctxt(R5), R12
   127  	MOVD	$0, gobuf_sp(R5)
   128  	MOVD	$0, gobuf_ret(R5)
   129  	MOVD	$0, gobuf_lr(R5)
   130  	MOVD	$0, gobuf_ctxt(R5)
   131  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   132  	MOVD	gobuf_pc(R5), R6
   133  	BR	(R6)
   134  
   135  // void mcall(fn func(*g))
   136  // Switch to m->g0's stack, call fn(g).
   137  // Fn must never return.  It should gogo(&g->sched)
   138  // to keep running g.
   139  TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   140  	// Save caller state in g->sched
   141  	MOVD	R15, (g_sched+gobuf_sp)(g)
   142  	MOVD	LR, (g_sched+gobuf_pc)(g)
   143  	MOVD	$0, (g_sched+gobuf_lr)(g)
   144  	MOVD	g, (g_sched+gobuf_g)(g)
   145  
   146  	// Switch to m->g0 & its stack, call fn.
   147  	MOVD	g, R3
   148  	MOVD	g_m(g), R8
   149  	MOVD	m_g0(R8), g
   150  	BL	runtime·save_g(SB)
   151  	CMP	g, R3
   152  	BNE	2(PC)
   153  	BR	runtime·badmcall(SB)
   154  	MOVD	fn+0(FP), R12			// context
   155  	MOVD	0(R12), R4			// code pointer
   156  	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
   157  	SUB	$16, R15
   158  	MOVD	R3, 8(R15)
   159  	MOVD	$0, 0(R15)
   160  	BL	(R4)
   161  	BR	runtime·badmcall2(SB)
   162  
   163  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   164  // of the G stack.  We need to distinguish the routine that
   165  // lives at the bottom of the G stack from the one that lives
   166  // at the top of the system stack because the one at the top of
   167  // the system stack terminates the stack walk (see topofstack()).
   168  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   169  	UNDEF
   170  	BL	(LR)	// make sure this function is not leaf
   171  	RET
   172  
   173  // func systemstack(fn func())
   174  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   175  	MOVD	fn+0(FP), R3	// R3 = fn
   176  	MOVD	R3, R12		// context
   177  	MOVD	g_m(g), R4	// R4 = m
   178  
   179  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   180  	CMPBEQ	g, R5, noswitch
   181  
   182  	MOVD	m_g0(R4), R5	// R5 = g0
   183  	CMPBEQ	g, R5, noswitch
   184  
   185  	MOVD	m_curg(R4), R6
   186  	CMPBEQ	g, R6, switch
   187  
   188  	// Bad: g is not gsignal, not g0, not curg. What is it?
   189  	// Hide call from linker nosplit analysis.
   190  	MOVD	$runtime·badsystemstack(SB), R3
   191  	BL	(R3)
   192  
   193  switch:
   194  	// save our state in g->sched.  Pretend to
   195  	// be systemstack_switch if the G stack is scanned.
   196  	MOVD	$runtime·systemstack_switch(SB), R6
   197  	ADD	$16, R6	// get past prologue
   198  	MOVD	R6, (g_sched+gobuf_pc)(g)
   199  	MOVD	R15, (g_sched+gobuf_sp)(g)
   200  	MOVD	$0, (g_sched+gobuf_lr)(g)
   201  	MOVD	g, (g_sched+gobuf_g)(g)
   202  
   203  	// switch to g0
   204  	MOVD	R5, g
   205  	BL	runtime·save_g(SB)
   206  	MOVD	(g_sched+gobuf_sp)(g), R3
   207  	// make it look like mstart called systemstack on g0, to stop traceback
   208  	SUB	$8, R3
   209  	MOVD	$runtime·mstart(SB), R4
   210  	MOVD	R4, 0(R3)
   211  	MOVD	R3, R15
   212  
   213  	// call target function
   214  	MOVD	0(R12), R3	// code pointer
   215  	BL	(R3)
   216  
   217  	// switch back to g
   218  	MOVD	g_m(g), R3
   219  	MOVD	m_curg(R3), g
   220  	BL	runtime·save_g(SB)
   221  	MOVD	(g_sched+gobuf_sp)(g), R15
   222  	MOVD	$0, (g_sched+gobuf_sp)(g)
   223  	RET
   224  
   225  noswitch:
   226  	// already on m stack, just call directly
   227  	// Using a tail call here cleans up tracebacks since we won't stop
   228  	// at an intermediate systemstack.
   229  	MOVD	0(R12), R3	// code pointer
   230  	MOVD	0(R15), LR	// restore LR
   231  	ADD	$8, R15
   232  	BR	(R3)
   233  
   234  /*
   235   * support for morestack
   236   */
   237  
   238  // Called during function prolog when more stack is needed.
   239  // Caller has already loaded:
   240  // R3: framesize, R4: argsize, R5: LR
   241  //
   242  // The traceback routines see morestack on a g0 as being
   243  // the top of a stack (for example, morestack calling newstack
   244  // calling the scheduler calling newm calling gc), so we must
   245  // record an argument size. For that purpose, it has no arguments.
   246  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   247  	// Cannot grow scheduler stack (m->g0).
   248  	MOVD	g_m(g), R7
   249  	MOVD	m_g0(R7), R8
   250  	CMPBNE	g, R8, 3(PC)
   251  	BL	runtime·badmorestackg0(SB)
   252  	BL	runtime·abort(SB)
   253  
   254  	// Cannot grow signal stack (m->gsignal).
   255  	MOVD	m_gsignal(R7), R8
   256  	CMP	g, R8
   257  	BNE	3(PC)
   258  	BL	runtime·badmorestackgsignal(SB)
   259  	BL	runtime·abort(SB)
   260  
   261  	// Called from f.
   262  	// Set g->sched to context in f.
   263  	MOVD	R15, (g_sched+gobuf_sp)(g)
   264  	MOVD	LR, R8
   265  	MOVD	R8, (g_sched+gobuf_pc)(g)
   266  	MOVD	R5, (g_sched+gobuf_lr)(g)
   267  	MOVD	R12, (g_sched+gobuf_ctxt)(g)
   268  
   269  	// Called from f.
   270  	// Set m->morebuf to f's caller.
   271  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   272  	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   273  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   274  
   275  	// Call newstack on m->g0's stack.
   276  	MOVD	m_g0(R7), g
   277  	BL	runtime·save_g(SB)
   278  	MOVD	(g_sched+gobuf_sp)(g), R15
   279  	// Create a stack frame on g0 to call newstack.
   280  	MOVD	$0, -8(R15)	// Zero saved LR in frame
   281  	SUB	$8, R15
   282  	BL	runtime·newstack(SB)
   283  
   284  	// Not reached, but make sure the return PC from the call to newstack
   285  	// is still in this function, and not the beginning of the next.
   286  	UNDEF
   287  
   288  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   289  	MOVD	$0, R12
   290  	BR	runtime·morestack(SB)
   291  
   292  // reflectcall: call a function with the given argument list
   293  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   294  // we don't have variable-sized frames, so we use a small number
   295  // of constant-sized-frame functions to encode a few bits of size in the pc.
   296  // Caution: ugly multiline assembly macros in your future!
   297  
   298  #define DISPATCH(NAME,MAXSIZE)		\
   299  	MOVD	$MAXSIZE, R4;		\
   300  	CMP	R3, R4;		\
   301  	BGT	3(PC);			\
   302  	MOVD	$NAME(SB), R5;	\
   303  	BR	(R5)
   304  // Note: can't just "BR NAME(SB)" - bad inlining results.
   305  
   306  TEXT reflect·call(SB), NOSPLIT, $0-0
   307  	BR	·reflectcall(SB)
   308  
   309  TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   310  	MOVWZ argsize+24(FP), R3
   311  	DISPATCH(runtime·call32, 32)
   312  	DISPATCH(runtime·call64, 64)
   313  	DISPATCH(runtime·call128, 128)
   314  	DISPATCH(runtime·call256, 256)
   315  	DISPATCH(runtime·call512, 512)
   316  	DISPATCH(runtime·call1024, 1024)
   317  	DISPATCH(runtime·call2048, 2048)
   318  	DISPATCH(runtime·call4096, 4096)
   319  	DISPATCH(runtime·call8192, 8192)
   320  	DISPATCH(runtime·call16384, 16384)
   321  	DISPATCH(runtime·call32768, 32768)
   322  	DISPATCH(runtime·call65536, 65536)
   323  	DISPATCH(runtime·call131072, 131072)
   324  	DISPATCH(runtime·call262144, 262144)
   325  	DISPATCH(runtime·call524288, 524288)
   326  	DISPATCH(runtime·call1048576, 1048576)
   327  	DISPATCH(runtime·call2097152, 2097152)
   328  	DISPATCH(runtime·call4194304, 4194304)
   329  	DISPATCH(runtime·call8388608, 8388608)
   330  	DISPATCH(runtime·call16777216, 16777216)
   331  	DISPATCH(runtime·call33554432, 33554432)
   332  	DISPATCH(runtime·call67108864, 67108864)
   333  	DISPATCH(runtime·call134217728, 134217728)
   334  	DISPATCH(runtime·call268435456, 268435456)
   335  	DISPATCH(runtime·call536870912, 536870912)
   336  	DISPATCH(runtime·call1073741824, 1073741824)
   337  	MOVD	$runtime·badreflectcall(SB), R5
   338  	BR	(R5)
   339  
   340  #define CALLFN(NAME,MAXSIZE)			\
   341  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   342  	NO_LOCAL_POINTERS;			\
   343  	/* copy arguments to stack */		\
   344  	MOVD	arg+16(FP), R4;			\
   345  	MOVWZ	argsize+24(FP), R5;		\
   346  	MOVD	$stack-MAXSIZE(SP), R6;		\
   347  loopArgs: /* copy 256 bytes at a time */	\
   348  	CMP	R5, $256;			\
   349  	BLT	tailArgs;			\
   350  	SUB	$256, R5;			\
   351  	MVC	$256, 0(R4), 0(R6);		\
   352  	MOVD	$256(R4), R4;			\
   353  	MOVD	$256(R6), R6;			\
   354  	BR	loopArgs;			\
   355  tailArgs: /* copy remaining bytes */		\
   356  	CMP	R5, $0;				\
   357  	BEQ	callFunction;			\
   358  	SUB	$1, R5;				\
   359  	EXRL	$callfnMVC<>(SB), R5;		\
   360  callFunction:					\
   361  	MOVD	f+8(FP), R12;			\
   362  	MOVD	(R12), R8;			\
   363  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   364  	BL	(R8);				\
   365  	/* copy return values back */		\
   366  	MOVD	argtype+0(FP), R7;		\
   367  	MOVD	arg+16(FP), R6;			\
   368  	MOVWZ	n+24(FP), R5;			\
   369  	MOVD	$stack-MAXSIZE(SP), R4;		\
   370  	MOVWZ	retoffset+28(FP), R1;		\
   371  	ADD	R1, R4;				\
   372  	ADD	R1, R6;				\
   373  	SUB	R1, R5;				\
   374  	BL	callRet<>(SB);			\
   375  	RET
   376  
   377  // callRet copies return values back at the end of call*. This is a
   378  // separate function so it can allocate stack space for the arguments
   379  // to reflectcallmove. It does not follow the Go ABI; it expects its
   380  // arguments in registers.
   381  TEXT callRet<>(SB), NOSPLIT, $32-0
   382  	MOVD	R7, 8(R15)
   383  	MOVD	R6, 16(R15)
   384  	MOVD	R4, 24(R15)
   385  	MOVD	R5, 32(R15)
   386  	BL	runtime·reflectcallmove(SB)
   387  	RET
   388  
   389  CALLFN(·call32, 32)
   390  CALLFN(·call64, 64)
   391  CALLFN(·call128, 128)
   392  CALLFN(·call256, 256)
   393  CALLFN(·call512, 512)
   394  CALLFN(·call1024, 1024)
   395  CALLFN(·call2048, 2048)
   396  CALLFN(·call4096, 4096)
   397  CALLFN(·call8192, 8192)
   398  CALLFN(·call16384, 16384)
   399  CALLFN(·call32768, 32768)
   400  CALLFN(·call65536, 65536)
   401  CALLFN(·call131072, 131072)
   402  CALLFN(·call262144, 262144)
   403  CALLFN(·call524288, 524288)
   404  CALLFN(·call1048576, 1048576)
   405  CALLFN(·call2097152, 2097152)
   406  CALLFN(·call4194304, 4194304)
   407  CALLFN(·call8388608, 8388608)
   408  CALLFN(·call16777216, 16777216)
   409  CALLFN(·call33554432, 33554432)
   410  CALLFN(·call67108864, 67108864)
   411  CALLFN(·call134217728, 134217728)
   412  CALLFN(·call268435456, 268435456)
   413  CALLFN(·call536870912, 536870912)
   414  CALLFN(·call1073741824, 1073741824)
   415  
   416  // Not a function: target for EXRL (execute relative long) instruction.
   417  TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
   418  	MVC	$1, 0(R4), 0(R6)
   419  
   420  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   421  	RET
   422  
   423  // void jmpdefer(fv, sp);
   424  // called from deferreturn.
   425  // 1. grab stored LR for caller
   426  // 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
   427  // 3. BR to fn
   428  TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
   429  	MOVD	0(R15), R1
   430  	SUB	$6, R1, LR
   431  
   432  	MOVD	fv+0(FP), R12
   433  	MOVD	argp+8(FP), R15
   434  	SUB	$8, R15
   435  	MOVD	0(R12), R3
   436  	BR	(R3)
   437  
   438  // Save state of caller into g->sched. Smashes R1.
   439  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   440  	MOVD	LR, (g_sched+gobuf_pc)(g)
   441  	MOVD	R15, (g_sched+gobuf_sp)(g)
   442  	MOVD	$0, (g_sched+gobuf_lr)(g)
   443  	MOVD	$0, (g_sched+gobuf_ret)(g)
   444  	// Assert ctxt is zero. See func save.
   445  	MOVD	(g_sched+gobuf_ctxt)(g), R1
   446  	CMPBEQ	R1, $0, 2(PC)
   447  	BL	runtime·badctxt(SB)
   448  	RET
   449  
   450  // func asmcgocall(fn, arg unsafe.Pointer) int32
   451  // Call fn(arg) on the scheduler stack,
   452  // aligned appropriately for the gcc ABI.
   453  // See cgocall.go for more details.
   454  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   455  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
   456  	// C TLS base pointer in AR0:AR1
   457  	MOVD	fn+0(FP), R3
   458  	MOVD	arg+8(FP), R4
   459  
   460  	MOVD	R15, R2		// save original stack pointer
   461  	MOVD	g, R5
   462  
   463  	// Figure out if we need to switch to m->g0 stack.
   464  	// We get called to create new OS threads too, and those
   465  	// come in on the m->g0 stack already.
   466  	MOVD	g_m(g), R6
   467  	MOVD	m_g0(R6), R6
   468  	CMPBEQ	R6, g, g0
   469  	BL	gosave<>(SB)
   470  	MOVD	R6, g
   471  	BL	runtime·save_g(SB)
   472  	MOVD	(g_sched+gobuf_sp)(g), R15
   473  
   474  	// Now on a scheduling stack (a pthread-created stack).
   475  g0:
   476  	// Save room for two of our pointers, plus 160 bytes of callee
   477  	// save area that lives on the caller stack.
   478  	SUB	$176, R15
   479  	MOVD	$~7, R6
   480  	AND	R6, R15                 // 8-byte alignment for gcc ABI
   481  	MOVD	R5, 168(R15)             // save old g on stack
   482  	MOVD	(g_stack+stack_hi)(R5), R5
   483  	SUB	R2, R5
   484  	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   485  	MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
   486  	MOVD	R4, R2                  // arg in R2
   487  	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
   488  
   489  	XOR	R0, R0                  // set R0 back to 0.
   490  	// Restore g, stack pointer.
   491  	MOVD	168(R15), g
   492  	BL	runtime·save_g(SB)
   493  	MOVD	(g_stack+stack_hi)(g), R5
   494  	MOVD	160(R15), R6
   495  	SUB	R6, R5
   496  	MOVD	R5, R15
   497  
   498  	MOVW	R2, ret+16(FP)
   499  	RET
   500  
   501  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   502  // Turn the fn into a Go func (by taking its address) and call
   503  // cgocallback_gofunc.
   504  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   505  	MOVD	$fn+0(FP), R3
   506  	MOVD	R3, 8(R15)
   507  	MOVD	frame+8(FP), R3
   508  	MOVD	R3, 16(R15)
   509  	MOVD	framesize+16(FP), R3
   510  	MOVD	R3, 24(R15)
   511  	MOVD	ctxt+24(FP), R3
   512  	MOVD	R3, 32(R15)
   513  	MOVD	$runtime·cgocallback_gofunc(SB), R3
   514  	BL	(R3)
   515  	RET
   516  
   517  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   518  // See cgocall.go for more details.
   519  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   520  	NO_LOCAL_POINTERS
   521  
   522  	// Load m and g from thread-local storage.
   523  	MOVB	runtime·iscgo(SB), R3
   524  	CMPBEQ	R3, $0, nocgo
   525  	BL	runtime·load_g(SB)
   526  
   527  nocgo:
   528  	// If g is nil, Go did not create the current thread.
   529  	// Call needm to obtain one for temporary use.
   530  	// In this case, we're running on the thread stack, so there's
   531  	// lots of space, but the linker doesn't know. Hide the call from
   532  	// the linker analysis by using an indirect call.
   533  	CMPBEQ	g, $0, needm
   534  
   535  	MOVD	g_m(g), R8
   536  	MOVD	R8, savedm-8(SP)
   537  	BR	havem
   538  
   539  needm:
   540  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   541  	MOVD	$runtime·needm(SB), R3
   542  	BL	(R3)
   543  
   544  	// Set m->sched.sp = SP, so that if a panic happens
   545  	// during the function we are about to execute, it will
   546  	// have a valid SP to run on the g0 stack.
   547  	// The next few lines (after the havem label)
   548  	// will save this SP onto the stack and then write
   549  	// the same SP back to m->sched.sp. That seems redundant,
   550  	// but if an unrecovered panic happens, unwindm will
   551  	// restore the g->sched.sp from the stack location
   552  	// and then systemstack will try to use it. If we don't set it here,
   553  	// that restored SP will be uninitialized (typically 0) and
   554  	// will not be usable.
   555  	MOVD	g_m(g), R8
   556  	MOVD	m_g0(R8), R3
   557  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   558  
   559  havem:
   560  	// Now there's a valid m, and we're running on its m->g0.
   561  	// Save current m->g0->sched.sp on stack and then set it to SP.
   562  	// Save current sp in m->g0->sched.sp in preparation for
   563  	// switch back to m->curg stack.
   564  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   565  	MOVD	m_g0(R8), R3
   566  	MOVD	(g_sched+gobuf_sp)(R3), R4
   567  	MOVD	R4, savedsp-16(SP)
   568  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   569  
   570  	// Switch to m->curg stack and call runtime.cgocallbackg.
   571  	// Because we are taking over the execution of m->curg
   572  	// but *not* resuming what had been running, we need to
   573  	// save that information (m->curg->sched) so we can restore it.
   574  	// We can restore m->curg->sched.sp easily, because calling
   575  	// runtime.cgocallbackg leaves SP unchanged upon return.
   576  	// To save m->curg->sched.pc, we push it onto the stack.
   577  	// This has the added benefit that it looks to the traceback
   578  	// routine like cgocallbackg is going to return to that
   579  	// PC (because the frame we allocate below has the same
   580  	// size as cgocallback_gofunc's frame declared above)
   581  	// so that the traceback will seamlessly trace back into
   582  	// the earlier calls.
   583  	//
   584  	// In the new goroutine, -8(SP) is unused (where SP refers to
   585  	// m->curg's SP while we're setting it up, before we've adjusted it).
   586  	MOVD	m_curg(R8), g
   587  	BL	runtime·save_g(SB)
   588  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   589  	MOVD	(g_sched+gobuf_pc)(g), R5
   590  	MOVD	R5, -24(R4)
   591  	MOVD	ctxt+24(FP), R5
   592  	MOVD	R5, -16(R4)
   593  	MOVD	$-24(R4), R15
   594  	BL	runtime·cgocallbackg(SB)
   595  
   596  	// Restore g->sched (== m->curg->sched) from saved values.
   597  	MOVD	0(R15), R5
   598  	MOVD	R5, (g_sched+gobuf_pc)(g)
   599  	MOVD	$24(R15), R4
   600  	MOVD	R4, (g_sched+gobuf_sp)(g)
   601  
   602  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   603  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   604  	// so we do not have to restore it.)
   605  	MOVD	g_m(g), R8
   606  	MOVD	m_g0(R8), g
   607  	BL	runtime·save_g(SB)
   608  	MOVD	(g_sched+gobuf_sp)(g), R15
   609  	MOVD	savedsp-16(SP), R4
   610  	MOVD	R4, (g_sched+gobuf_sp)(g)
   611  
   612  	// If the m on entry was nil, we called needm above to borrow an m
   613  	// for the duration of the call. Since the call is over, return it with dropm.
   614  	MOVD	savedm-8(SP), R6
   615  	CMPBNE	R6, $0, droppedm
   616  	MOVD	$runtime·dropm(SB), R3
   617  	BL	(R3)
   618  droppedm:
   619  
   620  	// Done!
   621  	RET
   622  
   623  // void setg(G*); set g. for use by needm.
   624  TEXT runtime·setg(SB), NOSPLIT, $0-8
   625  	MOVD	gg+0(FP), g
   626  	// This only happens if iscgo, so jump straight to save_g
   627  	BL	runtime·save_g(SB)
   628  	RET
   629  
   630  // void setg_gcc(G*); set g in C TLS.
   631  // Must obey the gcc calling convention.
   632  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   633  	// The standard prologue clobbers LR (R14), which is callee-save in
   634  	// the C ABI, so we have to use NOFRAME and save LR ourselves.
   635  	MOVD	LR, R1
   636  	// Also save g, R10, and R11 since they're callee-save in C ABI
   637  	MOVD	R10, R3
   638  	MOVD	g, R4
   639  	MOVD	R11, R5
   640  
   641  	MOVD	R2, g
   642  	BL	runtime·save_g(SB)
   643  
   644  	MOVD	R5, R11
   645  	MOVD	R4, g
   646  	MOVD	R3, R10
   647  	MOVD	R1, LR
   648  	RET
   649  
   650  TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
   651  	MOVD	0(R15), R3		// LR saved by caller
   652  	MOVD	R3, ret+0(FP)
   653  	RET
   654  
   655  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   656  	MOVW	(R0), R0
   657  	UNDEF
   658  
   659  // int64 runtime·cputicks(void)
   660  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   661  	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
   662  	// This means that since about 1972 the msb has been set, making the
   663  	// result of a call to STORE CLOCK (stck) a negative number.
   664  	// We clear the msb to make it positive.
   665  	STCK	ret+0(FP)      // serialises before and after call
   666  	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
   667  	SLD	$1, R3
   668  	SRD	$1, R3
   669  	MOVD	R3, ret+0(FP)
   670  	RET
   671  
   672  // AES hashing not implemented for s390x
   673  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   674  	MOVW	(R0), R15
   675  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   676  	MOVW	(R0), R15
   677  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   678  	MOVW	(R0), R15
   679  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   680  	MOVW	(R0), R15
   681  
   682  // memequal(a, b unsafe.Pointer, size uintptr) bool
   683  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
   684  	MOVD	a+0(FP), R3
   685  	MOVD	b+8(FP), R5
   686  	MOVD	size+16(FP), R6
   687  	LA	ret+24(FP), R7
   688  	BR	runtime·memeqbody(SB)
   689  
   690  // memequal_varlen(a, b unsafe.Pointer) bool
   691  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
   692  	MOVD	a+0(FP), R3
   693  	MOVD	b+8(FP), R5
   694  	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
   695  	LA	ret+16(FP), R7
   696  	BR	runtime·memeqbody(SB)
   697  
   698  TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
   699  	MOVD	a_len+8(FP), R2
   700  	MOVD	b_len+32(FP), R6
   701  	MOVD	a+0(FP), R3
   702  	MOVD	b+24(FP), R5
   703  	LA	ret+48(FP), R7
   704  	CMPBNE	R2, R6, notequal
   705  	BR	runtime·memeqbody(SB)
   706  notequal:
   707  	MOVB	$0, ret+48(FP)
   708  	RET
   709  
   710  // input:
   711  //   R3 = a
   712  //   R5 = b
   713  //   R6 = len
   714  //   R7 = address of output byte (stores 0 or 1 here)
   715  //   a and b have the same length
   716  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   717  	CMPBEQ	R3, R5, equal
   718  loop:
   719  	CMPBEQ	R6, $0, equal
   720  	CMPBLT	R6, $32, tiny
   721  	CMP	R6, $256
   722  	BLT	tail
   723  	CLC	$256, 0(R3), 0(R5)
   724  	BNE	notequal
   725  	SUB	$256, R6
   726  	LA	256(R3), R3
   727  	LA	256(R5), R5
   728  	BR	loop
   729  tail:
   730  	SUB	$1, R6, R8
   731  	EXRL	$runtime·memeqbodyclc(SB), R8
   732  	BEQ	equal
   733  notequal:
   734  	MOVB	$0, 0(R7)
   735  	RET
   736  equal:
   737  	MOVB	$1, 0(R7)
   738  	RET
   739  tiny:
   740  	MOVD	$0, R2
   741  	CMPBLT	R6, $16, lt16
   742  	MOVD	0(R3), R8
   743  	MOVD	0(R5), R9
   744  	CMPBNE	R8, R9, notequal
   745  	MOVD	8(R3), R8
   746  	MOVD	8(R5), R9
   747  	CMPBNE	R8, R9, notequal
   748  	LA	16(R2), R2
   749  	SUB	$16, R6
   750  lt16:
   751  	CMPBLT	R6, $8, lt8
   752  	MOVD	0(R3)(R2*1), R8
   753  	MOVD	0(R5)(R2*1), R9
   754  	CMPBNE	R8, R9, notequal
   755  	LA	8(R2), R2
   756  	SUB	$8, R6
   757  lt8:
   758  	CMPBLT	R6, $4, lt4
   759  	MOVWZ	0(R3)(R2*1), R8
   760  	MOVWZ	0(R5)(R2*1), R9
   761  	CMPBNE	R8, R9, notequal
   762  	LA	4(R2), R2
   763  	SUB	$4, R6
   764  lt4:
   765  #define CHECK(n) \
   766  	CMPBEQ	R6, $n, equal \
   767  	MOVB	n(R3)(R2*1), R8 \
   768  	MOVB	n(R5)(R2*1), R9 \
   769  	CMPBNE	R8, R9, notequal
   770  	CHECK(0)
   771  	CHECK(1)
   772  	CHECK(2)
   773  	CHECK(3)
   774  	BR	equal
   775  
   776  TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   777  	CLC	$1, 0(R3), 0(R5)
   778  	RET
   779  
   780  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
   781  	MOVD	s+0(FP), R3     // s => R3
   782  	MOVD	s_len+8(FP), R4 // s_len => R4
   783  	MOVBZ	c+24(FP), R5    // c => R5
   784  	MOVD	$ret+32(FP), R2 // &ret => R9
   785  	BR	runtime·indexbytebody(SB)
   786  
   787  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
   788  	MOVD	s+0(FP), R3     // s => R3
   789  	MOVD	s_len+8(FP), R4 // s_len => R4
   790  	MOVBZ	c+16(FP), R5    // c => R5
   791  	MOVD	$ret+24(FP), R2 // &ret => R9
   792  	BR	runtime·indexbytebody(SB)
   793  
   794  // input:
   795  // R3: s
   796  // R4: s_len
   797  // R5: c -- byte sought
   798  // R2: &ret -- address to put index into
   799  TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0
   800  	CMPBEQ	R4, $0, notfound
   801  	MOVD	R3, R6          // store base for later
   802  	ADD	R3, R4, R8      // the address after the end of the string
   803  	//if the length is small, use loop; otherwise, use vector or srst search
   804  	CMPBGE	R4, $16, large
   805  
   806  residual:
   807  	CMPBEQ	R3, R8, notfound
   808  	MOVBZ	0(R3), R7
   809  	LA	1(R3), R3
   810  	CMPBNE	R7, R5, residual
   811  
   812  found:
   813  	SUB	R6, R3
   814  	SUB	$1, R3
   815  	MOVD	R3, 0(R2)
   816  	RET
   817  
   818  notfound:
   819  	MOVD	$-1, 0(R2)
   820  	RET
   821  
   822  large:
   823  	MOVBZ	·cpu+facilities_hasVX(SB), R1
   824  	CMPBNE	R1, $0, vectorimpl
   825  
   826  srstimpl:                       // no vector facility
   827  	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
   828  srstloop:
   829  	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
   830  	BVS	srstloop        // interrupted - continue
   831  	BGT	notfoundr0
   832  foundr0:
   833  	XOR	R0, R0          // reset R0
   834  	SUB	R6, R8          // remove base
   835  	MOVD	R8, 0(R2)
   836  	RET
   837  notfoundr0:
   838  	XOR	R0, R0          // reset R0
   839  	MOVD	$-1, 0(R2)
   840  	RET
   841  
   842  vectorimpl:
   843  	//if the address is not 16byte aligned, use loop for the header
   844  	MOVD	R3, R8
   845  	AND	$15, R8
   846  	CMPBGT	R8, $0, notaligned
   847  
   848  aligned:
   849  	ADD	R6, R4, R8
   850  	MOVD	R8, R7
   851  	AND	$-16, R7
   852  	// replicate c across V17
   853  	VLVGB	$0, R5, V19
   854  	VREPB	$0, V19, V17
   855  
   856  vectorloop:
   857  	CMPBGE	R3, R7, residual
   858  	VL	0(R3), V16    // load string to be searched into V16
   859  	ADD	$16, R3
   860  	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   861  	BVS	vectorloop
   862  
   863  	// when vector search found c in the string
   864  	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   865  	SUB	$16, R3
   866  	SUB	R6, R3
   867  	ADD	R3, R7
   868  	MOVD	R7, 0(R2)
   869  	RET
   870  
   871  notaligned:
   872  	MOVD	R3, R8
   873  	AND	$-16, R8
   874  	ADD     $16, R8
   875  notalignedloop:
   876  	CMPBEQ	R3, R8, aligned
   877  	MOVBZ	0(R3), R7
   878  	LA	1(R3), R3
   879  	CMPBNE	R7, R5, notalignedloop
   880  	BR	found
   881  
   882  TEXT runtime·return0(SB), NOSPLIT, $0
   883  	MOVW	$0, R3
   884  	RET
   885  
   886  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   887  // Must obey the gcc calling convention.
   888  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
   889  	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
   890  	MOVD	g, R1
   891  	MOVD	R10, R3
   892  	MOVD	LR, R4
   893  	MOVD	R11, R5
   894  
   895  	BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
   896  	MOVD	g_m(g), R2
   897  	MOVD	m_curg(R2), R2
   898  	MOVD	(g_stack+stack_hi)(R2), R2
   899  
   900  	MOVD	R1, g
   901  	MOVD	R3, R10
   902  	MOVD	R4, LR
   903  	MOVD	R5, R11
   904  	RET
   905  
   906  // The top-most function running on a goroutine
   907  // returns to goexit+PCQuantum.
   908  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
   909  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   910  	BL	runtime·goexit1(SB)	// does not return
   911  	// traceback from goexit1 must hit code range of goexit
   912  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   913  
   914  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
   915  	RET
   916  
   917  TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
   918          // Stores are already ordered on s390x, so this is just a
   919          // compile barrier.
   920  	RET
   921  
   922  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
   923  	MOVD	s1_base+0(FP), R3
   924  	MOVD	s1_len+8(FP), R4
   925  	MOVD	s2_base+16(FP), R5
   926  	MOVD	s2_len+24(FP), R6
   927  	LA	ret+32(FP), R7
   928  	BR	runtime·cmpbody(SB)
   929  
   930  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
   931  	MOVD	s1+0(FP), R3
   932  	MOVD	s1+8(FP), R4
   933  	MOVD	s2+24(FP), R5
   934  	MOVD	s2+32(FP), R6
   935  	LA	res+48(FP), R7
   936  	BR	runtime·cmpbody(SB)
   937  
   938  // input:
   939  //   R3 = a
   940  //   R4 = alen
   941  //   R5 = b
   942  //   R6 = blen
   943  //   R7 = address of output word (stores -1/0/1 here)
   944  TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
   945  	CMPBEQ	R3, R5, cmplengths
   946  	MOVD	R4, R8
   947  	CMPBLE	R4, R6, amin
   948  	MOVD	R6, R8
   949  amin:
   950  	CMPBEQ	R8, $0, cmplengths
   951  	CMP	R8, $256
   952  	BLE	tail
   953  loop:
   954  	CLC	$256, 0(R3), 0(R5)
   955  	BGT	gt
   956  	BLT	lt
   957  	SUB	$256, R8
   958  	CMP	R8, $256
   959  	BGT	loop
   960  tail:
   961  	SUB	$1, R8
   962  	EXRL	$runtime·cmpbodyclc(SB), R8
   963  	BGT	gt
   964  	BLT	lt
   965  cmplengths:
   966  	CMP	R4, R6
   967  	BEQ	eq
   968  	BLT	lt
   969  gt:
   970  	MOVD	$1, 0(R7)
   971  	RET
   972  lt:
   973  	MOVD	$-1, 0(R7)
   974  	RET
   975  eq:
   976  	MOVD	$0, 0(R7)
   977  	RET
   978  
   979  TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   980  	CLC	$1, 0(R3), 0(R5)
   981  	RET
   982  
   983  // func supportsVX() bool
   984  TEXT strings·supportsVX(SB),NOSPLIT,$0-1
   985  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
   986  	MOVB	R0, ret+0(FP)
   987  	RET
   988  
   989  // func supportsVX() bool
   990  TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
   991  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
   992  	MOVB	R0, ret+0(FP)
   993  	RET
   994  
   995  // func indexShortStr(s, sep string) int
   996  // Caller must confirm availability of vx facility before calling.
   997  TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
   998  	LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
   999  	LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1000  	MOVD	$ret+32(FP), R5
  1001  	BR	runtime·indexShortStr(SB)
  1002  
  1003  // func indexShortStr(s, sep []byte) int
  1004  // Caller must confirm availability of vx facility before calling.
  1005  TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
  1006  	LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
  1007  	LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1008  	MOVD	$ret+48(FP), R5
  1009  	BR	runtime·indexShortStr(SB)
  1010  
  1011  // s: string we are searching
  1012  // sep: string to search for
  1013  // R1=&s[0], R2=len(s)
  1014  // R3=&sep[0], R4=len(sep)
  1015  // R5=&ret (int)
  1016  // Caller must confirm availability of vx facility before calling.
  1017  TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
  1018  	CMPBGT	R4, R2, notfound
  1019  	ADD	R1, R2
  1020  	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
  1021  	CMPBEQ	R4, $0, notfound
  1022  	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
  1023  	VLL	R4, (R3), V0 // contains first 16 bytes of sep
  1024  	MOVD	R1, R7
  1025  index2plus:
  1026  	CMPBNE	R4, $1, index3plus
  1027  	MOVD	$15(R7), R9
  1028  	CMPBGE	R9, R2, index2to16
  1029  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1030  	VONE	V16
  1031  	VREPH	$0, V0, V1
  1032  	CMPBGE	R9, R2, index2to16
  1033  index2loop:
  1034  	VL	0(R7), V2          // 16 bytes, even indices
  1035  	VL	1(R7), V4          // 16 bytes, odd indices
  1036  	VCEQH	V1, V2, V5         // compare even indices
  1037  	VCEQH	V1, V4, V6         // compare odd indices
  1038  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1039  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1040  	BLT	foundV17
  1041  	MOVD	$16(R7), R7        // R7+=16
  1042  	ADD	$15, R7, R9
  1043  	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
  1044  	CMPBLE	R7, R2, index2to16
  1045  	BR	notfound
  1046  
  1047  index3plus:
  1048  	CMPBNE	R4, $2, index4plus
  1049  	ADD	$15, R7, R9
  1050  	CMPBGE	R9, R2, index2to16
  1051  	MOVD	$1, R0
  1052  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1053  	VONE	V16
  1054  	VREPH	$0, V0, V1
  1055  	VREPB	$2, V0, V8
  1056  index3loop:
  1057  	VL	(R7), V2           // load 16-bytes into V2
  1058  	VLL	R0, 16(R7), V3     // load 2-bytes into V3
  1059  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1060  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
  1061  	VCEQH	V1, V2, V5         // compare 2-byte even indices
  1062  	VCEQH	V1, V4, V6         // compare 2-byte odd indices
  1063  	VCEQB	V8, V9, V10        // compare last bytes
  1064  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1065  	VN	V7, V10, V7        // AND indices with last byte
  1066  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1067  	BLT	foundV17
  1068  	MOVD	$16(R7), R7        // R7+=16
  1069  	ADD	$15, R7, R9
  1070  	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
  1071  	CMPBLE	R7, R2, index2to16
  1072  	BR	notfound
  1073  
  1074  index4plus:
  1075  	CMPBNE	R4, $3, index5plus
  1076  	ADD	$15, R7, R9
  1077  	CMPBGE	R9, R2, index2to16
  1078  	MOVD	$2, R0
  1079  	VGBM	$0x8888, V29       // 0xff000000ff000000...
  1080  	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
  1081  	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
  1082  	VONE	V16
  1083  	VREPF	$0, V0, V1
  1084  index4loop:
  1085  	VL	(R7), V2           // load 16-bytes into V2
  1086  	VLL	R0, 16(R7), V3     // load 3-bytes into V3
  1087  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1088  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
  1089  	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
  1090  	VCEQF	V1, V2, V5         // compare index 0, 4, ...
  1091  	VCEQF	V1, V4, V6         // compare index 1, 5, ...
  1092  	VCEQF	V1, V9, V11        // compare index 2, 6, ...
  1093  	VCEQF	V1, V10, V12       // compare index 3, 7, ...
  1094  	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
  1095  	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
  1096  	VSEL	V13, V14, V31, V7  // final merge
  1097  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1098  	BLT	foundV17
  1099  	MOVD	$16(R7), R7        // R7+=16
  1100  	ADD	$15, R7, R9
  1101  	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
  1102  	CMPBLE	R7, R2, index2to16
  1103  	BR	notfound
  1104  
  1105  index5plus:
  1106  	CMPBGT	R4, $15, index17plus
  1107  index2to16:
  1108  	CMPBGT	R7, R2, notfound
  1109  	MOVD	$1(R7), R8
  1110  	CMPBGT	R8, R2, index2to16tail
  1111  index2to16loop:
  1112  	// unrolled 2x
  1113  	VLL	R4, (R7), V1
  1114  	VLL	R4, 1(R7), V2
  1115  	VCEQGS	V0, V1, V3
  1116  	BEQ	found
  1117  	MOVD	$1(R7), R7
  1118  	VCEQGS	V0, V2, V4
  1119  	BEQ	found
  1120  	MOVD	$1(R7), R7
  1121  	CMPBLT	R7, R2, index2to16loop
  1122  	CMPBGT	R7, R2, notfound
  1123  index2to16tail:
  1124  	VLL	R4, (R7), V1
  1125  	VCEQGS	V0, V1, V2
  1126  	BEQ	found
  1127  	BR	notfound
  1128  
  1129  index17plus:
  1130  	CMPBGT	R4, $31, index33plus
  1131  	SUB	$16, R4, R0
  1132  	VLL	R0, 16(R3), V1
  1133  	VONE	V7
  1134  index17to32loop:
  1135  	VL	(R7), V2
  1136  	VLL	R0, 16(R7), V3
  1137  	VCEQG	V0, V2, V4
  1138  	VCEQG	V1, V3, V5
  1139  	VN	V4, V5, V6
  1140  	VCEQGS	V6, V7, V8
  1141  	BEQ	found
  1142  	MOVD	$1(R7), R7
  1143  	CMPBLE  R7, R2, index17to32loop
  1144  	BR	notfound
  1145  
  1146  index33plus:
  1147  	CMPBGT	R4, $47, index49plus
  1148  	SUB	$32, R4, R0
  1149  	VL	16(R3), V1
  1150  	VLL	R0, 32(R3), V2
  1151  	VONE	V11
  1152  index33to48loop:
  1153  	VL	(R7), V3
  1154  	VL	16(R7), V4
  1155  	VLL	R0, 32(R7), V5
  1156  	VCEQG	V0, V3, V6
  1157  	VCEQG	V1, V4, V7
  1158  	VCEQG	V2, V5, V8
  1159  	VN	V6, V7, V9
  1160  	VN	V8, V9, V10
  1161  	VCEQGS	V10, V11, V12
  1162  	BEQ	found
  1163  	MOVD	$1(R7), R7
  1164  	CMPBLE  R7, R2, index33to48loop
  1165  	BR	notfound
  1166  
  1167  index49plus:
  1168  	CMPBGT	R4, $63, index65plus
  1169  	SUB	$48, R4, R0
  1170  	VL	16(R3), V1
  1171  	VL	32(R3), V2
  1172  	VLL	R0, 48(R3), V3
  1173  	VONE	V15
  1174  index49to64loop:
  1175  	VL	(R7), V4
  1176  	VL	16(R7), V5
  1177  	VL	32(R7), V6
  1178  	VLL	R0, 48(R7), V7
  1179  	VCEQG	V0, V4, V8
  1180  	VCEQG	V1, V5, V9
  1181  	VCEQG	V2, V6, V10
  1182  	VCEQG	V3, V7, V11
  1183  	VN	V8, V9, V12
  1184  	VN	V10, V11, V13
  1185  	VN	V12, V13, V14
  1186  	VCEQGS	V14, V15, V16
  1187  	BEQ	found
  1188  	MOVD	$1(R7), R7
  1189  	CMPBLE  R7, R2, index49to64loop
  1190  notfound:
  1191  	MOVD	$-1, (R5)
  1192  	RET
  1193  
  1194  index65plus:
  1195  	// not implemented
  1196  	MOVD	$0, (R0)
  1197  	RET
  1198  
  1199  foundV17: // index is in doubleword V17[0]
  1200  	VLGVG	$0, V17, R8
  1201  	ADD	R8, R7
  1202  found:
  1203  	SUB	R1, R7
  1204  	MOVD	R7, (R5)
  1205  	RET
  1206  
  1207  // This is called from .init_array and follows the platform, not Go, ABI.
  1208  // We are overly conservative. We could only save the registers we use.
  1209  // However, since this function is only called once per loaded module
  1210  // performance is unimportant.
  1211  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1212  	// Save R6-R15 in the register save area of the calling function.
  1213  	// Don't bother saving F8-F15 as we aren't doing any calls.
  1214  	STMG	R6, R15, 48(R15)
  1215  
  1216  	// append the argument (passed in R2, as per the ELF ABI) to the
  1217  	// moduledata linked list.
  1218  	MOVD	runtime·lastmoduledatap(SB), R1
  1219  	MOVD	R2, moduledata_next(R1)
  1220  	MOVD	R2, runtime·lastmoduledatap(SB)
  1221  
  1222  	// Restore R6-R15.
  1223  	LMG	48(R15), R6, R15
  1224  	RET
  1225  
  1226  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1227  	MOVB	$1, ret+0(FP)
  1228  	RET