github.com/filosottile/go@v0.0.0-20170906193555-dbed9972d994/src/runtime/asm_s390x.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    12  	// C TLS base pointer in AR0:AR1
    13  
    14  	// initialize essential registers
    15  	XOR	R0, R0
    16  
    17  	SUB	$24, R15
    18  	MOVW	R2, 8(R15) // argc
    19  	MOVD	R3, 16(R15) // argv
    20  
    21  	// create istack out of the given (operating system) stack.
    22  	// _cgo_init may update stackguard.
    23  	MOVD	$runtime·g0(SB), g
    24  	MOVD	R15, R11
    25  	SUB	$(64*1024), R11
    26  	MOVD	R11, g_stackguard0(g)
    27  	MOVD	R11, g_stackguard1(g)
    28  	MOVD	R11, (g_stack+stack_lo)(g)
    29  	MOVD	R15, (g_stack+stack_hi)(g)
    30  
    31  	// if there is a _cgo_init, call it using the gcc ABI.
    32  	MOVD	_cgo_init(SB), R11
    33  	CMPBEQ	R11, $0, nocgo
    34  	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
    35  	SLD	$32, R4, R4
    36  	MOVW	AR1, R4			// arg 2: TLS base pointer
    37  	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
    38  	MOVD	g, R2			// arg 0: G
    39  	// C functions expect 160 bytes of space on caller stack frame
    40  	// and an 8-byte aligned stack pointer
    41  	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
    42  	SUB	$160, R15		// reserve 160 bytes
    43  	MOVD    $~7, R6
    44  	AND 	R6, R15			// 8-byte align
    45  	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
    46  	MOVD	R9, R15			// restore stack
    47  	XOR	R0, R0			// zero R0
    48  
    49  nocgo:
    50  	// update stackguard after _cgo_init
    51  	MOVD	(g_stack+stack_lo)(g), R2
    52  	ADD	$const__StackGuard, R2
    53  	MOVD	R2, g_stackguard0(g)
    54  	MOVD	R2, g_stackguard1(g)
    55  
    56  	// set the per-goroutine and per-mach "registers"
    57  	MOVD	$runtime·m0(SB), R2
    58  
    59  	// save m->g0 = g0
    60  	MOVD	g, m_g0(R2)
    61  	// save m0 to g0->m
    62  	MOVD	R2, g_m(g)
    63  
    64  	BL	runtime·check(SB)
    65  
    66  	// argc/argv are already prepared on stack
    67  	BL	runtime·args(SB)
    68  	BL	runtime·osinit(SB)
    69  	BL	runtime·schedinit(SB)
    70  
    71  	// create a new goroutine to start program
    72  	MOVD	$runtime·mainPC(SB), R2		// entry
    73  	SUB     $24, R15
    74  	MOVD 	R2, 16(R15)
    75  	MOVD 	$0, 8(R15)
    76  	MOVD 	$0, 0(R15)
    77  	BL	runtime·newproc(SB)
    78  	ADD	$24, R15
    79  
    80  	// start this M
    81  	BL	runtime·mstart(SB)
    82  
    83  	MOVD	$0, 1(R0)
    84  	RET
    85  
    86  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    87  GLOBL	runtime·mainPC(SB),RODATA,$8
    88  
    89  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    90  	MOVD	$0, 2(R0)
    91  	RET
    92  
    93  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    94  	RET
    95  
    96  /*
    97   *  go-routine
    98   */
    99  
   100  // void gosave(Gobuf*)
   101  // save state in Gobuf; setjmp
   102  TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   103  	MOVD	buf+0(FP), R3
   104  	MOVD	R15, gobuf_sp(R3)
   105  	MOVD	LR, gobuf_pc(R3)
   106  	MOVD	g, gobuf_g(R3)
   107  	MOVD	$0, gobuf_lr(R3)
   108  	MOVD	$0, gobuf_ret(R3)
   109  	// Assert ctxt is zero. See func save.
   110  	MOVD	gobuf_ctxt(R3), R3
   111  	CMPBEQ	R3, $0, 2(PC)
   112  	BL	runtime·badctxt(SB)
   113  	RET
   114  
   115  // void gogo(Gobuf*)
   116  // restore state from Gobuf; longjmp
   117  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   118  	MOVD	buf+0(FP), R5
   119  
   120  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   121  	MOVD	gobuf_ctxt(R5), R1
   122  	CMPBEQ	R1, $0, nilctxt
   123  	MOVD	$gobuf_ctxt(R5), R1
   124  	MOVD	R1, 8(R15)
   125  	MOVD	R0, 16(R15)
   126  	BL	runtime·writebarrierptr_prewrite(SB)
   127  	MOVD	buf+0(FP), R5
   128  
   129  nilctxt:
   130  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   131  	BL	runtime·save_g(SB)
   132  
   133  	MOVD	0(g), R4
   134  	MOVD	gobuf_sp(R5), R15
   135  	MOVD	gobuf_lr(R5), LR
   136  	MOVD	gobuf_ret(R5), R3
   137  	MOVD	gobuf_ctxt(R5), R12
   138  	MOVD	$0, gobuf_sp(R5)
   139  	MOVD	$0, gobuf_ret(R5)
   140  	MOVD	$0, gobuf_lr(R5)
   141  	MOVD	$0, gobuf_ctxt(R5)
   142  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   143  	MOVD	gobuf_pc(R5), R6
   144  	BR	(R6)
   145  
   146  // void mcall(fn func(*g))
   147  // Switch to m->g0's stack, call fn(g).
   148  // Fn must never return.  It should gogo(&g->sched)
   149  // to keep running g.
   150  TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   151  	// Save caller state in g->sched
   152  	MOVD	R15, (g_sched+gobuf_sp)(g)
   153  	MOVD	LR, (g_sched+gobuf_pc)(g)
   154  	MOVD	$0, (g_sched+gobuf_lr)(g)
   155  	MOVD	g, (g_sched+gobuf_g)(g)
   156  
   157  	// Switch to m->g0 & its stack, call fn.
   158  	MOVD	g, R3
   159  	MOVD	g_m(g), R8
   160  	MOVD	m_g0(R8), g
   161  	BL	runtime·save_g(SB)
   162  	CMP	g, R3
   163  	BNE	2(PC)
   164  	BR	runtime·badmcall(SB)
   165  	MOVD	fn+0(FP), R12			// context
   166  	MOVD	0(R12), R4			// code pointer
   167  	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
   168  	SUB	$16, R15
   169  	MOVD	R3, 8(R15)
   170  	MOVD	$0, 0(R15)
   171  	BL	(R4)
   172  	BR	runtime·badmcall2(SB)
   173  
   174  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   175  // of the G stack.  We need to distinguish the routine that
   176  // lives at the bottom of the G stack from the one that lives
   177  // at the top of the system stack because the one at the top of
   178  // the system stack terminates the stack walk (see topofstack()).
   179  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   180  	UNDEF
   181  	BL	(LR)	// make sure this function is not leaf
   182  	RET
   183  
   184  // func systemstack(fn func())
   185  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   186  	MOVD	fn+0(FP), R3	// R3 = fn
   187  	MOVD	R3, R12		// context
   188  	MOVD	g_m(g), R4	// R4 = m
   189  
   190  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   191  	CMPBEQ	g, R5, noswitch
   192  
   193  	MOVD	m_g0(R4), R5	// R5 = g0
   194  	CMPBEQ	g, R5, noswitch
   195  
   196  	MOVD	m_curg(R4), R6
   197  	CMPBEQ	g, R6, switch
   198  
   199  	// Bad: g is not gsignal, not g0, not curg. What is it?
   200  	// Hide call from linker nosplit analysis.
   201  	MOVD	$runtime·badsystemstack(SB), R3
   202  	BL	(R3)
   203  
   204  switch:
   205  	// save our state in g->sched.  Pretend to
   206  	// be systemstack_switch if the G stack is scanned.
   207  	MOVD	$runtime·systemstack_switch(SB), R6
   208  	ADD	$16, R6	// get past prologue
   209  	MOVD	R6, (g_sched+gobuf_pc)(g)
   210  	MOVD	R15, (g_sched+gobuf_sp)(g)
   211  	MOVD	$0, (g_sched+gobuf_lr)(g)
   212  	MOVD	g, (g_sched+gobuf_g)(g)
   213  
   214  	// switch to g0
   215  	MOVD	R5, g
   216  	BL	runtime·save_g(SB)
   217  	MOVD	(g_sched+gobuf_sp)(g), R3
   218  	// make it look like mstart called systemstack on g0, to stop traceback
   219  	SUB	$8, R3
   220  	MOVD	$runtime·mstart(SB), R4
   221  	MOVD	R4, 0(R3)
   222  	MOVD	R3, R15
   223  
   224  	// call target function
   225  	MOVD	0(R12), R3	// code pointer
   226  	BL	(R3)
   227  
   228  	// switch back to g
   229  	MOVD	g_m(g), R3
   230  	MOVD	m_curg(R3), g
   231  	BL	runtime·save_g(SB)
   232  	MOVD	(g_sched+gobuf_sp)(g), R15
   233  	MOVD	$0, (g_sched+gobuf_sp)(g)
   234  	RET
   235  
   236  noswitch:
   237  	// already on m stack, just call directly
   238  	MOVD	0(R12), R3	// code pointer
   239  	BL	(R3)
   240  	RET
   241  
   242  /*
   243   * support for morestack
   244   */
   245  
   246  // Called during function prolog when more stack is needed.
   247  // Caller has already loaded:
   248  // R3: framesize, R4: argsize, R5: LR
   249  //
   250  // The traceback routines see morestack on a g0 as being
   251  // the top of a stack (for example, morestack calling newstack
   252  // calling the scheduler calling newm calling gc), so we must
   253  // record an argument size. For that purpose, it has no arguments.
   254  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   255  	// Cannot grow scheduler stack (m->g0).
   256  	MOVD	g_m(g), R7
   257  	MOVD	m_g0(R7), R8
   258  	CMPBNE	g, R8, 3(PC)
   259  	BL	runtime·badmorestackg0(SB)
   260  	BL	runtime·abort(SB)
   261  
   262  	// Cannot grow signal stack (m->gsignal).
   263  	MOVD	m_gsignal(R7), R8
   264  	CMP	g, R8
   265  	BNE	3(PC)
   266  	BL	runtime·badmorestackgsignal(SB)
   267  	BL	runtime·abort(SB)
   268  
   269  	// Called from f.
   270  	// Set g->sched to context in f.
   271  	MOVD	R15, (g_sched+gobuf_sp)(g)
   272  	MOVD	LR, R8
   273  	MOVD	R8, (g_sched+gobuf_pc)(g)
   274  	MOVD	R5, (g_sched+gobuf_lr)(g)
   275  	// newstack will fill gobuf.ctxt.
   276  
   277  	// Called from f.
   278  	// Set m->morebuf to f's caller.
   279  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   280  	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   281  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   282  
   283  	// Call newstack on m->g0's stack.
   284  	MOVD	m_g0(R7), g
   285  	BL	runtime·save_g(SB)
   286  	MOVD	(g_sched+gobuf_sp)(g), R15
   287  	// Create a stack frame on g0 to call newstack.
   288  	MOVD	$0, -16(R15)	// Zero saved LR in frame
   289  	SUB	$16, R15
   290  	MOVD	R12, 8(R15)	// ctxt argument
   291  	BL	runtime·newstack(SB)
   292  
   293  	// Not reached, but make sure the return PC from the call to newstack
   294  	// is still in this function, and not the beginning of the next.
   295  	UNDEF
   296  
   297  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   298  	MOVD	$0, R12
   299  	BR	runtime·morestack(SB)
   300  
   301  // reflectcall: call a function with the given argument list
   302  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   303  // we don't have variable-sized frames, so we use a small number
   304  // of constant-sized-frame functions to encode a few bits of size in the pc.
   305  // Caution: ugly multiline assembly macros in your future!
   306  
   307  #define DISPATCH(NAME,MAXSIZE)		\
   308  	MOVD	$MAXSIZE, R4;		\
   309  	CMP	R3, R4;		\
   310  	BGT	3(PC);			\
   311  	MOVD	$NAME(SB), R5;	\
   312  	BR	(R5)
   313  // Note: can't just "BR NAME(SB)" - bad inlining results.
   314  
   315  TEXT reflect·call(SB), NOSPLIT, $0-0
   316  	BR	·reflectcall(SB)
   317  
   318  TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   319  	MOVWZ argsize+24(FP), R3
   320  	DISPATCH(runtime·call32, 32)
   321  	DISPATCH(runtime·call64, 64)
   322  	DISPATCH(runtime·call128, 128)
   323  	DISPATCH(runtime·call256, 256)
   324  	DISPATCH(runtime·call512, 512)
   325  	DISPATCH(runtime·call1024, 1024)
   326  	DISPATCH(runtime·call2048, 2048)
   327  	DISPATCH(runtime·call4096, 4096)
   328  	DISPATCH(runtime·call8192, 8192)
   329  	DISPATCH(runtime·call16384, 16384)
   330  	DISPATCH(runtime·call32768, 32768)
   331  	DISPATCH(runtime·call65536, 65536)
   332  	DISPATCH(runtime·call131072, 131072)
   333  	DISPATCH(runtime·call262144, 262144)
   334  	DISPATCH(runtime·call524288, 524288)
   335  	DISPATCH(runtime·call1048576, 1048576)
   336  	DISPATCH(runtime·call2097152, 2097152)
   337  	DISPATCH(runtime·call4194304, 4194304)
   338  	DISPATCH(runtime·call8388608, 8388608)
   339  	DISPATCH(runtime·call16777216, 16777216)
   340  	DISPATCH(runtime·call33554432, 33554432)
   341  	DISPATCH(runtime·call67108864, 67108864)
   342  	DISPATCH(runtime·call134217728, 134217728)
   343  	DISPATCH(runtime·call268435456, 268435456)
   344  	DISPATCH(runtime·call536870912, 536870912)
   345  	DISPATCH(runtime·call1073741824, 1073741824)
   346  	MOVD	$runtime·badreflectcall(SB), R5
   347  	BR	(R5)
   348  
   349  #define CALLFN(NAME,MAXSIZE)			\
   350  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   351  	NO_LOCAL_POINTERS;			\
   352  	/* copy arguments to stack */		\
   353  	MOVD	arg+16(FP), R4;			\
   354  	MOVWZ	argsize+24(FP), R5;		\
   355  	MOVD	$stack-MAXSIZE(SP), R6;		\
   356  loopArgs: /* copy 256 bytes at a time */	\
   357  	CMP	R5, $256;			\
   358  	BLT	tailArgs;			\
   359  	SUB	$256, R5;			\
   360  	MVC	$256, 0(R4), 0(R6);		\
   361  	MOVD	$256(R4), R4;			\
   362  	MOVD	$256(R6), R6;			\
   363  	BR	loopArgs;			\
   364  tailArgs: /* copy remaining bytes */		\
   365  	CMP	R5, $0;				\
   366  	BEQ	callFunction;			\
   367  	SUB	$1, R5;				\
   368  	EXRL	$callfnMVC<>(SB), R5;		\
   369  callFunction:					\
   370  	MOVD	f+8(FP), R12;			\
   371  	MOVD	(R12), R8;			\
   372  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   373  	BL	(R8);				\
   374  	/* copy return values back */		\
   375  	MOVD	argtype+0(FP), R7;		\
   376  	MOVD	arg+16(FP), R6;			\
   377  	MOVWZ	n+24(FP), R5;			\
   378  	MOVD	$stack-MAXSIZE(SP), R4;		\
   379  	MOVWZ	retoffset+28(FP), R1;		\
   380  	ADD	R1, R4;				\
   381  	ADD	R1, R6;				\
   382  	SUB	R1, R5;				\
   383  	BL	callRet<>(SB);			\
   384  	RET
   385  
   386  // callRet copies return values back at the end of call*. This is a
   387  // separate function so it can allocate stack space for the arguments
   388  // to reflectcallmove. It does not follow the Go ABI; it expects its
   389  // arguments in registers.
   390  TEXT callRet<>(SB), NOSPLIT, $32-0
   391  	MOVD	R7, 8(R15)
   392  	MOVD	R6, 16(R15)
   393  	MOVD	R4, 24(R15)
   394  	MOVD	R5, 32(R15)
   395  	BL	runtime·reflectcallmove(SB)
   396  	RET
   397  
   398  CALLFN(·call32, 32)
   399  CALLFN(·call64, 64)
   400  CALLFN(·call128, 128)
   401  CALLFN(·call256, 256)
   402  CALLFN(·call512, 512)
   403  CALLFN(·call1024, 1024)
   404  CALLFN(·call2048, 2048)
   405  CALLFN(·call4096, 4096)
   406  CALLFN(·call8192, 8192)
   407  CALLFN(·call16384, 16384)
   408  CALLFN(·call32768, 32768)
   409  CALLFN(·call65536, 65536)
   410  CALLFN(·call131072, 131072)
   411  CALLFN(·call262144, 262144)
   412  CALLFN(·call524288, 524288)
   413  CALLFN(·call1048576, 1048576)
   414  CALLFN(·call2097152, 2097152)
   415  CALLFN(·call4194304, 4194304)
   416  CALLFN(·call8388608, 8388608)
   417  CALLFN(·call16777216, 16777216)
   418  CALLFN(·call33554432, 33554432)
   419  CALLFN(·call67108864, 67108864)
   420  CALLFN(·call134217728, 134217728)
   421  CALLFN(·call268435456, 268435456)
   422  CALLFN(·call536870912, 536870912)
   423  CALLFN(·call1073741824, 1073741824)
   424  
   425  // Not a function: target for EXRL (execute relative long) instruction.
   426  TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
   427  	MVC	$1, 0(R4), 0(R6)
   428  
   429  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   430  	RET
   431  
   432  // void jmpdefer(fv, sp);
   433  // called from deferreturn.
   434  // 1. grab stored LR for caller
   435  // 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
   436  // 3. BR to fn
   437  TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
   438  	MOVD	0(R15), R1
   439  	SUB	$6, R1, LR
   440  
   441  	MOVD	fv+0(FP), R12
   442  	MOVD	argp+8(FP), R15
   443  	SUB	$8, R15
   444  	MOVD	0(R12), R3
   445  	BR	(R3)
   446  
   447  // Save state of caller into g->sched. Smashes R1.
   448  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   449  	MOVD	LR, (g_sched+gobuf_pc)(g)
   450  	MOVD	R15, (g_sched+gobuf_sp)(g)
   451  	MOVD	$0, (g_sched+gobuf_lr)(g)
   452  	MOVD	$0, (g_sched+gobuf_ret)(g)
   453  	// Assert ctxt is zero. See func save.
   454  	MOVD	(g_sched+gobuf_ctxt)(g), R1
   455  	CMPBEQ	R1, $0, 2(PC)
   456  	BL	runtime·badctxt(SB)
   457  	RET
   458  
   459  // func asmcgocall(fn, arg unsafe.Pointer) int32
   460  // Call fn(arg) on the scheduler stack,
   461  // aligned appropriately for the gcc ABI.
   462  // See cgocall.go for more details.
   463  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   464  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
   465  	// C TLS base pointer in AR0:AR1
   466  	MOVD	fn+0(FP), R3
   467  	MOVD	arg+8(FP), R4
   468  
   469  	MOVD	R15, R2		// save original stack pointer
   470  	MOVD	g, R5
   471  
   472  	// Figure out if we need to switch to m->g0 stack.
   473  	// We get called to create new OS threads too, and those
   474  	// come in on the m->g0 stack already.
   475  	MOVD	g_m(g), R6
   476  	MOVD	m_g0(R6), R6
   477  	CMPBEQ	R6, g, g0
   478  	BL	gosave<>(SB)
   479  	MOVD	R6, g
   480  	BL	runtime·save_g(SB)
   481  	MOVD	(g_sched+gobuf_sp)(g), R15
   482  
   483  	// Now on a scheduling stack (a pthread-created stack).
   484  g0:
   485  	// Save room for two of our pointers, plus 160 bytes of callee
   486  	// save area that lives on the caller stack.
   487  	SUB	$176, R15
   488  	MOVD	$~7, R6
   489  	AND	R6, R15                 // 8-byte alignment for gcc ABI
   490  	MOVD	R5, 168(R15)             // save old g on stack
   491  	MOVD	(g_stack+stack_hi)(R5), R5
   492  	SUB	R2, R5
   493  	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   494  	MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
   495  	MOVD	R4, R2                  // arg in R2
   496  	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
   497  
   498  	XOR	R0, R0                  // set R0 back to 0.
   499  	// Restore g, stack pointer.
   500  	MOVD	168(R15), g
   501  	BL	runtime·save_g(SB)
   502  	MOVD	(g_stack+stack_hi)(g), R5
   503  	MOVD	160(R15), R6
   504  	SUB	R6, R5
   505  	MOVD	R5, R15
   506  
   507  	MOVW	R2, ret+16(FP)
   508  	RET
   509  
   510  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   511  // Turn the fn into a Go func (by taking its address) and call
   512  // cgocallback_gofunc.
   513  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   514  	MOVD	$fn+0(FP), R3
   515  	MOVD	R3, 8(R15)
   516  	MOVD	frame+8(FP), R3
   517  	MOVD	R3, 16(R15)
   518  	MOVD	framesize+16(FP), R3
   519  	MOVD	R3, 24(R15)
   520  	MOVD	ctxt+24(FP), R3
   521  	MOVD	R3, 32(R15)
   522  	MOVD	$runtime·cgocallback_gofunc(SB), R3
   523  	BL	(R3)
   524  	RET
   525  
   526  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   527  // See cgocall.go for more details.
   528  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   529  	NO_LOCAL_POINTERS
   530  
   531  	// Load m and g from thread-local storage.
   532  	MOVB	runtime·iscgo(SB), R3
   533  	CMPBEQ	R3, $0, nocgo
   534  	BL	runtime·load_g(SB)
   535  
   536  nocgo:
   537  	// If g is nil, Go did not create the current thread.
   538  	// Call needm to obtain one for temporary use.
   539  	// In this case, we're running on the thread stack, so there's
   540  	// lots of space, but the linker doesn't know. Hide the call from
   541  	// the linker analysis by using an indirect call.
   542  	CMPBEQ	g, $0, needm
   543  
   544  	MOVD	g_m(g), R8
   545  	MOVD	R8, savedm-8(SP)
   546  	BR	havem
   547  
   548  needm:
   549  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   550  	MOVD	$runtime·needm(SB), R3
   551  	BL	(R3)
   552  
   553  	// Set m->sched.sp = SP, so that if a panic happens
   554  	// during the function we are about to execute, it will
   555  	// have a valid SP to run on the g0 stack.
   556  	// The next few lines (after the havem label)
   557  	// will save this SP onto the stack and then write
   558  	// the same SP back to m->sched.sp. That seems redundant,
   559  	// but if an unrecovered panic happens, unwindm will
   560  	// restore the g->sched.sp from the stack location
   561  	// and then systemstack will try to use it. If we don't set it here,
   562  	// that restored SP will be uninitialized (typically 0) and
   563  	// will not be usable.
   564  	MOVD	g_m(g), R8
   565  	MOVD	m_g0(R8), R3
   566  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   567  
   568  havem:
   569  	// Now there's a valid m, and we're running on its m->g0.
   570  	// Save current m->g0->sched.sp on stack and then set it to SP.
   571  	// Save current sp in m->g0->sched.sp in preparation for
   572  	// switch back to m->curg stack.
   573  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   574  	MOVD	m_g0(R8), R3
   575  	MOVD	(g_sched+gobuf_sp)(R3), R4
   576  	MOVD	R4, savedsp-16(SP)
   577  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   578  
   579  	// Switch to m->curg stack and call runtime.cgocallbackg.
   580  	// Because we are taking over the execution of m->curg
   581  	// but *not* resuming what had been running, we need to
   582  	// save that information (m->curg->sched) so we can restore it.
   583  	// We can restore m->curg->sched.sp easily, because calling
   584  	// runtime.cgocallbackg leaves SP unchanged upon return.
   585  	// To save m->curg->sched.pc, we push it onto the stack.
   586  	// This has the added benefit that it looks to the traceback
   587  	// routine like cgocallbackg is going to return to that
   588  	// PC (because the frame we allocate below has the same
   589  	// size as cgocallback_gofunc's frame declared above)
   590  	// so that the traceback will seamlessly trace back into
   591  	// the earlier calls.
   592  	//
   593  	// In the new goroutine, -8(SP) is unused (where SP refers to
   594  	// m->curg's SP while we're setting it up, before we've adjusted it).
   595  	MOVD	m_curg(R8), g
   596  	BL	runtime·save_g(SB)
   597  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   598  	MOVD	(g_sched+gobuf_pc)(g), R5
   599  	MOVD	R5, -24(R4)
   600  	MOVD	ctxt+24(FP), R5
   601  	MOVD	R5, -16(R4)
   602  	MOVD	$-24(R4), R15
   603  	BL	runtime·cgocallbackg(SB)
   604  
   605  	// Restore g->sched (== m->curg->sched) from saved values.
   606  	MOVD	0(R15), R5
   607  	MOVD	R5, (g_sched+gobuf_pc)(g)
   608  	MOVD	$24(R15), R4
   609  	MOVD	R4, (g_sched+gobuf_sp)(g)
   610  
   611  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   612  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   613  	// so we do not have to restore it.)
   614  	MOVD	g_m(g), R8
   615  	MOVD	m_g0(R8), g
   616  	BL	runtime·save_g(SB)
   617  	MOVD	(g_sched+gobuf_sp)(g), R15
   618  	MOVD	savedsp-16(SP), R4
   619  	MOVD	R4, (g_sched+gobuf_sp)(g)
   620  
   621  	// If the m on entry was nil, we called needm above to borrow an m
   622  	// for the duration of the call. Since the call is over, return it with dropm.
   623  	MOVD	savedm-8(SP), R6
   624  	CMPBNE	R6, $0, droppedm
   625  	MOVD	$runtime·dropm(SB), R3
   626  	BL	(R3)
   627  droppedm:
   628  
   629  	// Done!
   630  	RET
   631  
   632  // void setg(G*); set g. for use by needm.
   633  TEXT runtime·setg(SB), NOSPLIT, $0-8
   634  	MOVD	gg+0(FP), g
   635  	// This only happens if iscgo, so jump straight to save_g
   636  	BL	runtime·save_g(SB)
   637  	RET
   638  
   639  // void setg_gcc(G*); set g in C TLS.
   640  // Must obey the gcc calling convention.
   641  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   642  	// The standard prologue clobbers LR (R14), which is callee-save in
   643  	// the C ABI, so we have to use NOFRAME and save LR ourselves.
   644  	MOVD	LR, R1
   645  	// Also save g, R10, and R11 since they're callee-save in C ABI
   646  	MOVD	R10, R3
   647  	MOVD	g, R4
   648  	MOVD	R11, R5
   649  
   650  	MOVD	R2, g
   651  	BL	runtime·save_g(SB)
   652  
   653  	MOVD	R5, R11
   654  	MOVD	R4, g
   655  	MOVD	R3, R10
   656  	MOVD	R1, LR
   657  	RET
   658  
   659  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
   660  	MOVD	16(R15), R3		// LR saved by caller
   661  	MOVD	R3, ret+8(FP)
   662  	RET
   663  
   664  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   665  	MOVW	(R0), R0
   666  	UNDEF
   667  
   668  // int64 runtime·cputicks(void)
   669  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   670  	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
   671  	// This means that since about 1972 the msb has been set, making the
   672  	// result of a call to STORE CLOCK (stck) a negative number.
   673  	// We clear the msb to make it positive.
   674  	STCK	ret+0(FP)      // serialises before and after call
   675  	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
   676  	SLD	$1, R3
   677  	SRD	$1, R3
   678  	MOVD	R3, ret+0(FP)
   679  	RET
   680  
   681  // AES hashing not implemented for s390x
   682  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   683  	MOVW	(R0), R15
   684  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   685  	MOVW	(R0), R15
   686  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   687  	MOVW	(R0), R15
   688  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   689  	MOVW	(R0), R15
   690  
   691  // memequal(a, b unsafe.Pointer, size uintptr) bool
   692  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
   693  	MOVD	a+0(FP), R3
   694  	MOVD	b+8(FP), R5
   695  	MOVD	size+16(FP), R6
   696  	LA	ret+24(FP), R7
   697  	BR	runtime·memeqbody(SB)
   698  
   699  // memequal_varlen(a, b unsafe.Pointer) bool
   700  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
   701  	MOVD	a+0(FP), R3
   702  	MOVD	b+8(FP), R5
   703  	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
   704  	LA	ret+16(FP), R7
   705  	BR	runtime·memeqbody(SB)
   706  
   707  TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
   708  	MOVD	a_len+8(FP), R2
   709  	MOVD	b_len+32(FP), R6
   710  	MOVD	a+0(FP), R3
   711  	MOVD	b+24(FP), R5
   712  	LA	ret+48(FP), R7
   713  	CMPBNE	R2, R6, notequal
   714  	BR	runtime·memeqbody(SB)
   715  notequal:
   716  	MOVB	$0, ret+48(FP)
   717  	RET
   718  
   719  // input:
   720  //   R3 = a
   721  //   R5 = b
   722  //   R6 = len
   723  //   R7 = address of output byte (stores 0 or 1 here)
   724  //   a and b have the same length
   725  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   726  	CMPBEQ	R3, R5, equal
   727  loop:
   728  	CMPBEQ	R6, $0, equal
   729  	CMPBLT	R6, $32, tiny
   730  	CMP	R6, $256
   731  	BLT	tail
   732  	CLC	$256, 0(R3), 0(R5)
   733  	BNE	notequal
   734  	SUB	$256, R6
   735  	LA	256(R3), R3
   736  	LA	256(R5), R5
   737  	BR	loop
   738  tail:
   739  	SUB	$1, R6, R8
   740  	EXRL	$runtime·memeqbodyclc(SB), R8
   741  	BEQ	equal
   742  notequal:
   743  	MOVB	$0, 0(R7)
   744  	RET
   745  equal:
   746  	MOVB	$1, 0(R7)
   747  	RET
   748  tiny:
   749  	MOVD	$0, R2
   750  	CMPBLT	R6, $16, lt16
   751  	MOVD	0(R3), R8
   752  	MOVD	0(R5), R9
   753  	CMPBNE	R8, R9, notequal
   754  	MOVD	8(R3), R8
   755  	MOVD	8(R5), R9
   756  	CMPBNE	R8, R9, notequal
   757  	LA	16(R2), R2
   758  	SUB	$16, R6
   759  lt16:
   760  	CMPBLT	R6, $8, lt8
   761  	MOVD	0(R3)(R2*1), R8
   762  	MOVD	0(R5)(R2*1), R9
   763  	CMPBNE	R8, R9, notequal
   764  	LA	8(R2), R2
   765  	SUB	$8, R6
   766  lt8:
   767  	CMPBLT	R6, $4, lt4
   768  	MOVWZ	0(R3)(R2*1), R8
   769  	MOVWZ	0(R5)(R2*1), R9
   770  	CMPBNE	R8, R9, notequal
   771  	LA	4(R2), R2
   772  	SUB	$4, R6
   773  lt4:
   774  #define CHECK(n) \
   775  	CMPBEQ	R6, $n, equal \
   776  	MOVB	n(R3)(R2*1), R8 \
   777  	MOVB	n(R5)(R2*1), R9 \
   778  	CMPBNE	R8, R9, notequal
   779  	CHECK(0)
   780  	CHECK(1)
   781  	CHECK(2)
   782  	CHECK(3)
   783  	BR	equal
   784  
   785  TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   786  	CLC	$1, 0(R3), 0(R5)
   787  	RET
   788  
   789  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
   790  	MOVD	s+0(FP), R3     // s => R3
   791  	MOVD	s_len+8(FP), R4 // s_len => R4
   792  	MOVBZ	c+24(FP), R5    // c => R5
   793  	MOVD	$ret+32(FP), R2 // &ret => R9
   794  	BR	runtime·indexbytebody(SB)
   795  
   796  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
   797  	MOVD	s+0(FP), R3     // s => R3
   798  	MOVD	s_len+8(FP), R4 // s_len => R4
   799  	MOVBZ	c+16(FP), R5    // c => R5
   800  	MOVD	$ret+24(FP), R2 // &ret => R9
   801  	BR	runtime·indexbytebody(SB)
   802  
   803  // input:
   804  // R3: s
   805  // R4: s_len
   806  // R5: c -- byte sought
   807  // R2: &ret -- address to put index into
   808  TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0
   809  	CMPBEQ	R4, $0, notfound
   810  	MOVD	R3, R6          // store base for later
   811  	ADD	R3, R4, R8      // the address after the end of the string
   812  	//if the length is small, use loop; otherwise, use vector or srst search
   813  	CMPBGE	R4, $16, large
   814  
   815  residual:
   816  	CMPBEQ	R3, R8, notfound
   817  	MOVBZ	0(R3), R7
   818  	LA	1(R3), R3
   819  	CMPBNE	R7, R5, residual
   820  
   821  found:
   822  	SUB	R6, R3
   823  	SUB	$1, R3
   824  	MOVD	R3, 0(R2)
   825  	RET
   826  
   827  notfound:
   828  	MOVD	$-1, 0(R2)
   829  	RET
   830  
   831  large:
   832  	MOVBZ	·cpu+facilities_hasVX(SB), R1
   833  	CMPBNE	R1, $0, vectorimpl
   834  
   835  srstimpl:                       // no vector facility
   836  	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
   837  srstloop:
   838  	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
   839  	BVS	srstloop        // interrupted - continue
   840  	BGT	notfoundr0
   841  foundr0:
   842  	XOR	R0, R0          // reset R0
   843  	SUB	R6, R8          // remove base
   844  	MOVD	R8, 0(R2)
   845  	RET
   846  notfoundr0:
   847  	XOR	R0, R0          // reset R0
   848  	MOVD	$-1, 0(R2)
   849  	RET
   850  
   851  vectorimpl:
   852  	//if the address is not 16byte aligned, use loop for the header
   853  	MOVD	R3, R8
   854  	AND	$15, R8
   855  	CMPBGT	R8, $0, notaligned
   856  
   857  aligned:
   858  	ADD	R6, R4, R8
   859  	MOVD	R8, R7
   860  	AND	$-16, R7
   861  	// replicate c across V17
   862  	VLVGB	$0, R5, V19
   863  	VREPB	$0, V19, V17
   864  
   865  vectorloop:
   866  	CMPBGE	R3, R7, residual
   867  	VL	0(R3), V16    // load string to be searched into V16
   868  	ADD	$16, R3
   869  	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   870  	BVS	vectorloop
   871  
   872  	// when vector search found c in the string
   873  	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   874  	SUB	$16, R3
   875  	SUB	R6, R3
   876  	ADD	R3, R7
   877  	MOVD	R7, 0(R2)
   878  	RET
   879  
   880  notaligned:
   881  	MOVD	R3, R8
   882  	AND	$-16, R8
   883  	ADD     $16, R8
   884  notalignedloop:
   885  	CMPBEQ	R3, R8, aligned
   886  	MOVBZ	0(R3), R7
   887  	LA	1(R3), R3
   888  	CMPBNE	R7, R5, notalignedloop
   889  	BR	found
   890  
   891  TEXT runtime·return0(SB), NOSPLIT, $0
   892  	MOVW	$0, R3
   893  	RET
   894  
   895  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   896  // Must obey the gcc calling convention.
   897  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
   898  	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
   899  	MOVD	g, R1
   900  	MOVD	R10, R3
   901  	MOVD	LR, R4
   902  	MOVD	R11, R5
   903  
   904  	BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
   905  	MOVD	g_m(g), R2
   906  	MOVD	m_curg(R2), R2
   907  	MOVD	(g_stack+stack_hi)(R2), R2
   908  
   909  	MOVD	R1, g
   910  	MOVD	R3, R10
   911  	MOVD	R4, LR
   912  	MOVD	R5, R11
   913  	RET
   914  
   915  // The top-most function running on a goroutine
   916  // returns to goexit+PCQuantum.
   917  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
   918  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   919  	BL	runtime·goexit1(SB)	// does not return
   920  	// traceback from goexit1 must hit code range of goexit
   921  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   922  
   923  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
   924  	RET
   925  
   926  TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
   927  	SYNC
   928  	RET
   929  
   930  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
   931  	MOVD	s1_base+0(FP), R3
   932  	MOVD	s1_len+8(FP), R4
   933  	MOVD	s2_base+16(FP), R5
   934  	MOVD	s2_len+24(FP), R6
   935  	LA	ret+32(FP), R7
   936  	BR	runtime·cmpbody(SB)
   937  
   938  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
   939  	MOVD	s1+0(FP), R3
   940  	MOVD	s1+8(FP), R4
   941  	MOVD	s2+24(FP), R5
   942  	MOVD	s2+32(FP), R6
   943  	LA	res+48(FP), R7
   944  	BR	runtime·cmpbody(SB)
   945  
   946  // input:
   947  //   R3 = a
   948  //   R4 = alen
   949  //   R5 = b
   950  //   R6 = blen
   951  //   R7 = address of output word (stores -1/0/1 here)
   952  TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
   953  	CMPBEQ	R3, R5, cmplengths
   954  	MOVD	R4, R8
   955  	CMPBLE	R4, R6, amin
   956  	MOVD	R6, R8
   957  amin:
   958  	CMPBEQ	R8, $0, cmplengths
   959  	CMP	R8, $256
   960  	BLE	tail
   961  loop:
   962  	CLC	$256, 0(R3), 0(R5)
   963  	BGT	gt
   964  	BLT	lt
   965  	SUB	$256, R8
   966  	CMP	R8, $256
   967  	BGT	loop
   968  tail:
   969  	SUB	$1, R8
   970  	EXRL	$runtime·cmpbodyclc(SB), R8
   971  	BGT	gt
   972  	BLT	lt
   973  cmplengths:
   974  	CMP	R4, R6
   975  	BEQ	eq
   976  	BLT	lt
   977  gt:
   978  	MOVD	$1, 0(R7)
   979  	RET
   980  lt:
   981  	MOVD	$-1, 0(R7)
   982  	RET
   983  eq:
   984  	MOVD	$0, 0(R7)
   985  	RET
   986  
   987  TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   988  	CLC	$1, 0(R3), 0(R5)
   989  	RET
   990  
   991  // func supportsVX() bool
   992  TEXT strings·supportsVX(SB),NOSPLIT,$0-1
   993  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
   994  	MOVB	R0, ret+0(FP)
   995  	RET
   996  
   997  // func supportsVX() bool
   998  TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
   999  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1000  	MOVB	R0, ret+0(FP)
  1001  	RET
  1002  
  1003  // func indexShortStr(s, sep string) int
  1004  // Caller must confirm availability of vx facility before calling.
  1005  TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
  1006  	LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
  1007  	LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1008  	MOVD	$ret+32(FP), R5
  1009  	BR	runtime·indexShortStr(SB)
  1010  
  1011  // func indexShortStr(s, sep []byte) int
  1012  // Caller must confirm availability of vx facility before calling.
  1013  TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
  1014  	LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
  1015  	LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1016  	MOVD	$ret+48(FP), R5
  1017  	BR	runtime·indexShortStr(SB)
  1018  
  1019  // s: string we are searching
  1020  // sep: string to search for
  1021  // R1=&s[0], R2=len(s)
  1022  // R3=&sep[0], R4=len(sep)
  1023  // R5=&ret (int)
  1024  // Caller must confirm availability of vx facility before calling.
  1025  TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
  1026  	CMPBGT	R4, R2, notfound
  1027  	ADD	R1, R2
  1028  	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
  1029  	CMPBEQ	R4, $0, notfound
  1030  	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
  1031  	VLL	R4, (R3), V0 // contains first 16 bytes of sep
  1032  	MOVD	R1, R7
  1033  index2plus:
  1034  	CMPBNE	R4, $1, index3plus
  1035  	MOVD	$15(R7), R9
  1036  	CMPBGE	R9, R2, index2to16
  1037  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1038  	VONE	V16
  1039  	VREPH	$0, V0, V1
  1040  	CMPBGE	R9, R2, index2to16
  1041  index2loop:
  1042  	VL	0(R7), V2          // 16 bytes, even indices
  1043  	VL	1(R7), V4          // 16 bytes, odd indices
  1044  	VCEQH	V1, V2, V5         // compare even indices
  1045  	VCEQH	V1, V4, V6         // compare odd indices
  1046  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1047  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1048  	BLT	foundV17
  1049  	MOVD	$16(R7), R7        // R7+=16
  1050  	ADD	$15, R7, R9
  1051  	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
  1052  	CMPBLE	R7, R2, index2to16
  1053  	BR	notfound
  1054  
  1055  index3plus:
  1056  	CMPBNE	R4, $2, index4plus
  1057  	ADD	$15, R7, R9
  1058  	CMPBGE	R9, R2, index2to16
  1059  	MOVD	$1, R0
  1060  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1061  	VONE	V16
  1062  	VREPH	$0, V0, V1
  1063  	VREPB	$2, V0, V8
  1064  index3loop:
  1065  	VL	(R7), V2           // load 16-bytes into V2
  1066  	VLL	R0, 16(R7), V3     // load 2-bytes into V3
  1067  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1068  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
  1069  	VCEQH	V1, V2, V5         // compare 2-byte even indices
  1070  	VCEQH	V1, V4, V6         // compare 2-byte odd indices
  1071  	VCEQB	V8, V9, V10        // compare last bytes
  1072  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1073  	VN	V7, V10, V7        // AND indices with last byte
  1074  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1075  	BLT	foundV17
  1076  	MOVD	$16(R7), R7        // R7+=16
  1077  	ADD	$15, R7, R9
  1078  	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
  1079  	CMPBLE	R7, R2, index2to16
  1080  	BR	notfound
  1081  
  1082  index4plus:
  1083  	CMPBNE	R4, $3, index5plus
  1084  	ADD	$15, R7, R9
  1085  	CMPBGE	R9, R2, index2to16
  1086  	MOVD	$2, R0
  1087  	VGBM	$0x8888, V29       // 0xff000000ff000000...
  1088  	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
  1089  	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
  1090  	VONE	V16
  1091  	VREPF	$0, V0, V1
  1092  index4loop:
  1093  	VL	(R7), V2           // load 16-bytes into V2
  1094  	VLL	R0, 16(R7), V3     // load 3-bytes into V3
  1095  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1096  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
  1097  	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
  1098  	VCEQF	V1, V2, V5         // compare index 0, 4, ...
  1099  	VCEQF	V1, V4, V6         // compare index 1, 5, ...
  1100  	VCEQF	V1, V9, V11        // compare index 2, 6, ...
  1101  	VCEQF	V1, V10, V12       // compare index 3, 7, ...
  1102  	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
  1103  	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
  1104  	VSEL	V13, V14, V31, V7  // final merge
  1105  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1106  	BLT	foundV17
  1107  	MOVD	$16(R7), R7        // R7+=16
  1108  	ADD	$15, R7, R9
  1109  	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
  1110  	CMPBLE	R7, R2, index2to16
  1111  	BR	notfound
  1112  
  1113  index5plus:
  1114  	CMPBGT	R4, $15, index17plus
  1115  index2to16:
  1116  	CMPBGT	R7, R2, notfound
  1117  	MOVD	$1(R7), R8
  1118  	CMPBGT	R8, R2, index2to16tail
  1119  index2to16loop:
  1120  	// unrolled 2x
  1121  	VLL	R4, (R7), V1
  1122  	VLL	R4, 1(R7), V2
  1123  	VCEQGS	V0, V1, V3
  1124  	BEQ	found
  1125  	MOVD	$1(R7), R7
  1126  	VCEQGS	V0, V2, V4
  1127  	BEQ	found
  1128  	MOVD	$1(R7), R7
  1129  	CMPBLT	R7, R2, index2to16loop
  1130  	CMPBGT	R7, R2, notfound
  1131  index2to16tail:
  1132  	VLL	R4, (R7), V1
  1133  	VCEQGS	V0, V1, V2
  1134  	BEQ	found
  1135  	BR	notfound
  1136  
  1137  index17plus:
  1138  	CMPBGT	R4, $31, index33plus
  1139  	SUB	$16, R4, R0
  1140  	VLL	R0, 16(R3), V1
  1141  	VONE	V7
  1142  index17to32loop:
  1143  	VL	(R7), V2
  1144  	VLL	R0, 16(R7), V3
  1145  	VCEQG	V0, V2, V4
  1146  	VCEQG	V1, V3, V5
  1147  	VN	V4, V5, V6
  1148  	VCEQGS	V6, V7, V8
  1149  	BEQ	found
  1150  	MOVD	$1(R7), R7
  1151  	CMPBLE  R7, R2, index17to32loop
  1152  	BR	notfound
  1153  
  1154  index33plus:
  1155  	CMPBGT	R4, $47, index49plus
  1156  	SUB	$32, R4, R0
  1157  	VL	16(R3), V1
  1158  	VLL	R0, 32(R3), V2
  1159  	VONE	V11
  1160  index33to48loop:
  1161  	VL	(R7), V3
  1162  	VL	16(R7), V4
  1163  	VLL	R0, 32(R7), V5
  1164  	VCEQG	V0, V3, V6
  1165  	VCEQG	V1, V4, V7
  1166  	VCEQG	V2, V5, V8
  1167  	VN	V6, V7, V9
  1168  	VN	V8, V9, V10
  1169  	VCEQGS	V10, V11, V12
  1170  	BEQ	found
  1171  	MOVD	$1(R7), R7
  1172  	CMPBLE  R7, R2, index33to48loop
  1173  	BR	notfound
  1174  
  1175  index49plus:
  1176  	CMPBGT	R4, $63, index65plus
  1177  	SUB	$48, R4, R0
  1178  	VL	16(R3), V1
  1179  	VL	32(R3), V2
  1180  	VLL	R0, 48(R3), V3
  1181  	VONE	V15
  1182  index49to64loop:
  1183  	VL	(R7), V4
  1184  	VL	16(R7), V5
  1185  	VL	32(R7), V6
  1186  	VLL	R0, 48(R7), V7
  1187  	VCEQG	V0, V4, V8
  1188  	VCEQG	V1, V5, V9
  1189  	VCEQG	V2, V6, V10
  1190  	VCEQG	V3, V7, V11
  1191  	VN	V8, V9, V12
  1192  	VN	V10, V11, V13
  1193  	VN	V12, V13, V14
  1194  	VCEQGS	V14, V15, V16
  1195  	BEQ	found
  1196  	MOVD	$1(R7), R7
  1197  	CMPBLE  R7, R2, index49to64loop
  1198  notfound:
  1199  	MOVD	$-1, (R5)
  1200  	RET
  1201  
  1202  index65plus:
  1203  	// not implemented
  1204  	MOVD	$0, (R0)
  1205  	RET
  1206  
  1207  foundV17: // index is in doubleword V17[0]
  1208  	VLGVG	$0, V17, R8
  1209  	ADD	R8, R7
  1210  found:
  1211  	SUB	R1, R7
  1212  	MOVD	R7, (R5)
  1213  	RET
  1214  
  1215  // This is called from .init_array and follows the platform, not Go, ABI.
  1216  // We are overly conservative. We could only save the registers we use.
  1217  // However, since this function is only called once per loaded module
  1218  // performance is unimportant.
  1219  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1220  	// Save R6-R15 in the register save area of the calling function.
  1221  	// Don't bother saving F8-F15 as we aren't doing any calls.
  1222  	STMG	R6, R15, 48(R15)
  1223  
  1224  	// append the argument (passed in R2, as per the ELF ABI) to the
  1225  	// moduledata linked list.
  1226  	MOVD	runtime·lastmoduledatap(SB), R1
  1227  	MOVD	R2, moduledata_next(R1)
  1228  	MOVD	R2, runtime·lastmoduledatap(SB)
  1229  
  1230  	// Restore R6-R15.
  1231  	LMG	48(R15), R6, R15
  1232  	RET
  1233  
  1234  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1235  	MOVB	$1, ret+0(FP)
  1236  	RET