github.com/s1s1ty/go@v0.0.0-20180207192209-104445e3140f/src/runtime/asm_s390x.s (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  // _rt0_s390x_lib is common startup code for s390x systems when
    11  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    12  // arrange to invoke this function as a global constructor (for
    13  // c-archive) or when the shared library is loaded (for c-shared).
    14  // We expect argc and argv to be passed in the usual C ABI registers
    15  // R2 and R3.
    16  TEXT _rt0_s390x_lib(SB), NOSPLIT|NOFRAME, $0
    17  	STMG	R6, R15, 48(R15)
    18  	MOVD	R2, _rt0_s390x_lib_argc<>(SB)
    19  	MOVD	R3, _rt0_s390x_lib_argv<>(SB)
    20  
    21  	// Save R6-R15 in the register save area of the calling function.
    22  	STMG	R6, R15, 48(R15)
    23  
    24  	// Allocate 80 bytes on the stack.
    25  	MOVD	$-80(R15), R15
    26  
    27  	// Save F8-F15 in our stack frame.
    28  	FMOVD	F8, 16(R15)
    29  	FMOVD	F9, 24(R15)
    30  	FMOVD	F10, 32(R15)
    31  	FMOVD	F11, 40(R15)
    32  	FMOVD	F12, 48(R15)
    33  	FMOVD	F13, 56(R15)
    34  	FMOVD	F14, 64(R15)
    35  	FMOVD	F15, 72(R15)
    36  
    37  	// Synchronous initialization.
    38  	MOVD	$runtime·libpreinit(SB), R1
    39  	BL	R1
    40  
    41  	// Create a new thread to finish Go runtime initialization.
    42  	MOVD	_cgo_sys_thread_create(SB), R1
    43  	CMP	R1, $0
    44  	BEQ	nocgo
    45  	MOVD	$_rt0_s390x_lib_go(SB), R2
    46  	MOVD	$0, R3
    47  	BL	R1
    48  	BR	restore
    49  
    50  nocgo:
    51  	MOVD	$0x800000, R1              // stacksize
    52  	MOVD	R1, 0(R15)
    53  	MOVD	$_rt0_s390x_lib_go(SB), R1
    54  	MOVD	R1, 8(R15)                 // fn
    55  	MOVD	$runtime·newosproc(SB), R1
    56  	BL	R1
    57  
    58  restore:
    59  	// Restore F8-F15 from our stack frame.
    60  	FMOVD	16(R15), F8
    61  	FMOVD	24(R15), F9
    62  	FMOVD	32(R15), F10
    63  	FMOVD	40(R15), F11
    64  	FMOVD	48(R15), F12
    65  	FMOVD	56(R15), F13
    66  	FMOVD	64(R15), F14
    67  	FMOVD	72(R15), F15
    68  	MOVD	$80(R15), R15
    69  
    70  	// Restore R6-R15.
    71  	LMG	48(R15), R6, R15
    72  	RET
    73  
    74  // _rt0_s390x_lib_go initializes the Go runtime.
    75  // This is started in a separate thread by _rt0_s390x_lib.
    76  TEXT _rt0_s390x_lib_go(SB), NOSPLIT|NOFRAME, $0
    77  	MOVD	_rt0_s390x_lib_argc<>(SB), R2
    78  	MOVD	_rt0_s390x_lib_argv<>(SB), R3
    79  	MOVD	$runtime·rt0_go(SB), R1
    80  	BR	R1
    81  
    82  DATA _rt0_s390x_lib_argc<>(SB)/8, $0
    83  GLOBL _rt0_s390x_lib_argc<>(SB), NOPTR, $8
    84  DATA _rt0_s90x_lib_argv<>(SB)/8, $0
    85  GLOBL _rt0_s390x_lib_argv<>(SB), NOPTR, $8
    86  
    87  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    88  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    89  	// C TLS base pointer in AR0:AR1
    90  
    91  	// initialize essential registers
    92  	XOR	R0, R0
    93  
    94  	SUB	$24, R15
    95  	MOVW	R2, 8(R15) // argc
    96  	MOVD	R3, 16(R15) // argv
    97  
    98  	// create istack out of the given (operating system) stack.
    99  	// _cgo_init may update stackguard.
   100  	MOVD	$runtime·g0(SB), g
   101  	MOVD	R15, R11
   102  	SUB	$(64*1024), R11
   103  	MOVD	R11, g_stackguard0(g)
   104  	MOVD	R11, g_stackguard1(g)
   105  	MOVD	R11, (g_stack+stack_lo)(g)
   106  	MOVD	R15, (g_stack+stack_hi)(g)
   107  
   108  	// if there is a _cgo_init, call it using the gcc ABI.
   109  	MOVD	_cgo_init(SB), R11
   110  	CMPBEQ	R11, $0, nocgo
   111  	MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
   112  	SLD	$32, R4, R4
   113  	MOVW	AR1, R4			// arg 2: TLS base pointer
   114  	MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
   115  	MOVD	g, R2			// arg 0: G
   116  	// C functions expect 160 bytes of space on caller stack frame
   117  	// and an 8-byte aligned stack pointer
   118  	MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
   119  	SUB	$160, R15		// reserve 160 bytes
   120  	MOVD    $~7, R6
   121  	AND 	R6, R15			// 8-byte align
   122  	BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
   123  	MOVD	R9, R15			// restore stack
   124  	XOR	R0, R0			// zero R0
   125  
   126  nocgo:
   127  	// update stackguard after _cgo_init
   128  	MOVD	(g_stack+stack_lo)(g), R2
   129  	ADD	$const__StackGuard, R2
   130  	MOVD	R2, g_stackguard0(g)
   131  	MOVD	R2, g_stackguard1(g)
   132  
   133  	// set the per-goroutine and per-mach "registers"
   134  	MOVD	$runtime·m0(SB), R2
   135  
   136  	// save m->g0 = g0
   137  	MOVD	g, m_g0(R2)
   138  	// save m0 to g0->m
   139  	MOVD	R2, g_m(g)
   140  
   141  	BL	runtime·check(SB)
   142  
   143  	// argc/argv are already prepared on stack
   144  	BL	runtime·args(SB)
   145  	BL	runtime·osinit(SB)
   146  	BL	runtime·schedinit(SB)
   147  
   148  	// create a new goroutine to start program
   149  	MOVD	$runtime·mainPC(SB), R2		// entry
   150  	SUB     $24, R15
   151  	MOVD 	R2, 16(R15)
   152  	MOVD 	$0, 8(R15)
   153  	MOVD 	$0, 0(R15)
   154  	BL	runtime·newproc(SB)
   155  	ADD	$24, R15
   156  
   157  	// start this M
   158  	BL	runtime·mstart(SB)
   159  
   160  	MOVD	$0, 1(R0)
   161  	RET
   162  
   163  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
   164  GLOBL	runtime·mainPC(SB),RODATA,$8
   165  
   166  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
   167  	MOVD	$0, 2(R0)
   168  	RET
   169  
   170  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
   171  	RET
   172  
   173  /*
   174   *  go-routine
   175   */
   176  
   177  // void gosave(Gobuf*)
   178  // save state in Gobuf; setjmp
   179  TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   180  	MOVD	buf+0(FP), R3
   181  	MOVD	R15, gobuf_sp(R3)
   182  	MOVD	LR, gobuf_pc(R3)
   183  	MOVD	g, gobuf_g(R3)
   184  	MOVD	$0, gobuf_lr(R3)
   185  	MOVD	$0, gobuf_ret(R3)
   186  	// Assert ctxt is zero. See func save.
   187  	MOVD	gobuf_ctxt(R3), R3
   188  	CMPBEQ	R3, $0, 2(PC)
   189  	BL	runtime·badctxt(SB)
   190  	RET
   191  
   192  // void gogo(Gobuf*)
   193  // restore state from Gobuf; longjmp
   194  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   195  	MOVD	buf+0(FP), R5
   196  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   197  	BL	runtime·save_g(SB)
   198  
   199  	MOVD	0(g), R4
   200  	MOVD	gobuf_sp(R5), R15
   201  	MOVD	gobuf_lr(R5), LR
   202  	MOVD	gobuf_ret(R5), R3
   203  	MOVD	gobuf_ctxt(R5), R12
   204  	MOVD	$0, gobuf_sp(R5)
   205  	MOVD	$0, gobuf_ret(R5)
   206  	MOVD	$0, gobuf_lr(R5)
   207  	MOVD	$0, gobuf_ctxt(R5)
   208  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   209  	MOVD	gobuf_pc(R5), R6
   210  	BR	(R6)
   211  
   212  // void mcall(fn func(*g))
   213  // Switch to m->g0's stack, call fn(g).
   214  // Fn must never return.  It should gogo(&g->sched)
   215  // to keep running g.
   216  TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   217  	// Save caller state in g->sched
   218  	MOVD	R15, (g_sched+gobuf_sp)(g)
   219  	MOVD	LR, (g_sched+gobuf_pc)(g)
   220  	MOVD	$0, (g_sched+gobuf_lr)(g)
   221  	MOVD	g, (g_sched+gobuf_g)(g)
   222  
   223  	// Switch to m->g0 & its stack, call fn.
   224  	MOVD	g, R3
   225  	MOVD	g_m(g), R8
   226  	MOVD	m_g0(R8), g
   227  	BL	runtime·save_g(SB)
   228  	CMP	g, R3
   229  	BNE	2(PC)
   230  	BR	runtime·badmcall(SB)
   231  	MOVD	fn+0(FP), R12			// context
   232  	MOVD	0(R12), R4			// code pointer
   233  	MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
   234  	SUB	$16, R15
   235  	MOVD	R3, 8(R15)
   236  	MOVD	$0, 0(R15)
   237  	BL	(R4)
   238  	BR	runtime·badmcall2(SB)
   239  
   240  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   241  // of the G stack.  We need to distinguish the routine that
   242  // lives at the bottom of the G stack from the one that lives
   243  // at the top of the system stack because the one at the top of
   244  // the system stack terminates the stack walk (see topofstack()).
   245  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   246  	UNDEF
   247  	BL	(LR)	// make sure this function is not leaf
   248  	RET
   249  
   250  // func systemstack(fn func())
   251  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   252  	MOVD	fn+0(FP), R3	// R3 = fn
   253  	MOVD	R3, R12		// context
   254  	MOVD	g_m(g), R4	// R4 = m
   255  
   256  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   257  	CMPBEQ	g, R5, noswitch
   258  
   259  	MOVD	m_g0(R4), R5	// R5 = g0
   260  	CMPBEQ	g, R5, noswitch
   261  
   262  	MOVD	m_curg(R4), R6
   263  	CMPBEQ	g, R6, switch
   264  
   265  	// Bad: g is not gsignal, not g0, not curg. What is it?
   266  	// Hide call from linker nosplit analysis.
   267  	MOVD	$runtime·badsystemstack(SB), R3
   268  	BL	(R3)
   269  
   270  switch:
   271  	// save our state in g->sched.  Pretend to
   272  	// be systemstack_switch if the G stack is scanned.
   273  	MOVD	$runtime·systemstack_switch(SB), R6
   274  	ADD	$16, R6	// get past prologue
   275  	MOVD	R6, (g_sched+gobuf_pc)(g)
   276  	MOVD	R15, (g_sched+gobuf_sp)(g)
   277  	MOVD	$0, (g_sched+gobuf_lr)(g)
   278  	MOVD	g, (g_sched+gobuf_g)(g)
   279  
   280  	// switch to g0
   281  	MOVD	R5, g
   282  	BL	runtime·save_g(SB)
   283  	MOVD	(g_sched+gobuf_sp)(g), R3
   284  	// make it look like mstart called systemstack on g0, to stop traceback
   285  	SUB	$8, R3
   286  	MOVD	$runtime·mstart(SB), R4
   287  	MOVD	R4, 0(R3)
   288  	MOVD	R3, R15
   289  
   290  	// call target function
   291  	MOVD	0(R12), R3	// code pointer
   292  	BL	(R3)
   293  
   294  	// switch back to g
   295  	MOVD	g_m(g), R3
   296  	MOVD	m_curg(R3), g
   297  	BL	runtime·save_g(SB)
   298  	MOVD	(g_sched+gobuf_sp)(g), R15
   299  	MOVD	$0, (g_sched+gobuf_sp)(g)
   300  	RET
   301  
   302  noswitch:
   303  	// already on m stack, just call directly
   304  	// Using a tail call here cleans up tracebacks since we won't stop
   305  	// at an intermediate systemstack.
   306  	MOVD	0(R12), R3	// code pointer
   307  	MOVD	0(R15), LR	// restore LR
   308  	ADD	$8, R15
   309  	BR	(R3)
   310  
   311  /*
   312   * support for morestack
   313   */
   314  
   315  // Called during function prolog when more stack is needed.
   316  // Caller has already loaded:
   317  // R3: framesize, R4: argsize, R5: LR
   318  //
   319  // The traceback routines see morestack on a g0 as being
   320  // the top of a stack (for example, morestack calling newstack
   321  // calling the scheduler calling newm calling gc), so we must
   322  // record an argument size. For that purpose, it has no arguments.
   323  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   324  	// Cannot grow scheduler stack (m->g0).
   325  	MOVD	g_m(g), R7
   326  	MOVD	m_g0(R7), R8
   327  	CMPBNE	g, R8, 3(PC)
   328  	BL	runtime·badmorestackg0(SB)
   329  	BL	runtime·abort(SB)
   330  
   331  	// Cannot grow signal stack (m->gsignal).
   332  	MOVD	m_gsignal(R7), R8
   333  	CMP	g, R8
   334  	BNE	3(PC)
   335  	BL	runtime·badmorestackgsignal(SB)
   336  	BL	runtime·abort(SB)
   337  
   338  	// Called from f.
   339  	// Set g->sched to context in f.
   340  	MOVD	R15, (g_sched+gobuf_sp)(g)
   341  	MOVD	LR, R8
   342  	MOVD	R8, (g_sched+gobuf_pc)(g)
   343  	MOVD	R5, (g_sched+gobuf_lr)(g)
   344  	MOVD	R12, (g_sched+gobuf_ctxt)(g)
   345  
   346  	// Called from f.
   347  	// Set m->morebuf to f's caller.
   348  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   349  	MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   350  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   351  
   352  	// Call newstack on m->g0's stack.
   353  	MOVD	m_g0(R7), g
   354  	BL	runtime·save_g(SB)
   355  	MOVD	(g_sched+gobuf_sp)(g), R15
   356  	// Create a stack frame on g0 to call newstack.
   357  	MOVD	$0, -8(R15)	// Zero saved LR in frame
   358  	SUB	$8, R15
   359  	BL	runtime·newstack(SB)
   360  
   361  	// Not reached, but make sure the return PC from the call to newstack
   362  	// is still in this function, and not the beginning of the next.
   363  	UNDEF
   364  
   365  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   366  	MOVD	$0, R12
   367  	BR	runtime·morestack(SB)
   368  
   369  // reflectcall: call a function with the given argument list
   370  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   371  // we don't have variable-sized frames, so we use a small number
   372  // of constant-sized-frame functions to encode a few bits of size in the pc.
   373  // Caution: ugly multiline assembly macros in your future!
   374  
   375  #define DISPATCH(NAME,MAXSIZE)		\
   376  	MOVD	$MAXSIZE, R4;		\
   377  	CMP	R3, R4;		\
   378  	BGT	3(PC);			\
   379  	MOVD	$NAME(SB), R5;	\
   380  	BR	(R5)
   381  // Note: can't just "BR NAME(SB)" - bad inlining results.
   382  
   383  TEXT reflect·call(SB), NOSPLIT, $0-0
   384  	BR	·reflectcall(SB)
   385  
   386  TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   387  	MOVWZ argsize+24(FP), R3
   388  	DISPATCH(runtime·call32, 32)
   389  	DISPATCH(runtime·call64, 64)
   390  	DISPATCH(runtime·call128, 128)
   391  	DISPATCH(runtime·call256, 256)
   392  	DISPATCH(runtime·call512, 512)
   393  	DISPATCH(runtime·call1024, 1024)
   394  	DISPATCH(runtime·call2048, 2048)
   395  	DISPATCH(runtime·call4096, 4096)
   396  	DISPATCH(runtime·call8192, 8192)
   397  	DISPATCH(runtime·call16384, 16384)
   398  	DISPATCH(runtime·call32768, 32768)
   399  	DISPATCH(runtime·call65536, 65536)
   400  	DISPATCH(runtime·call131072, 131072)
   401  	DISPATCH(runtime·call262144, 262144)
   402  	DISPATCH(runtime·call524288, 524288)
   403  	DISPATCH(runtime·call1048576, 1048576)
   404  	DISPATCH(runtime·call2097152, 2097152)
   405  	DISPATCH(runtime·call4194304, 4194304)
   406  	DISPATCH(runtime·call8388608, 8388608)
   407  	DISPATCH(runtime·call16777216, 16777216)
   408  	DISPATCH(runtime·call33554432, 33554432)
   409  	DISPATCH(runtime·call67108864, 67108864)
   410  	DISPATCH(runtime·call134217728, 134217728)
   411  	DISPATCH(runtime·call268435456, 268435456)
   412  	DISPATCH(runtime·call536870912, 536870912)
   413  	DISPATCH(runtime·call1073741824, 1073741824)
   414  	MOVD	$runtime·badreflectcall(SB), R5
   415  	BR	(R5)
   416  
   417  #define CALLFN(NAME,MAXSIZE)			\
   418  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   419  	NO_LOCAL_POINTERS;			\
   420  	/* copy arguments to stack */		\
   421  	MOVD	arg+16(FP), R4;			\
   422  	MOVWZ	argsize+24(FP), R5;		\
   423  	MOVD	$stack-MAXSIZE(SP), R6;		\
   424  loopArgs: /* copy 256 bytes at a time */	\
   425  	CMP	R5, $256;			\
   426  	BLT	tailArgs;			\
   427  	SUB	$256, R5;			\
   428  	MVC	$256, 0(R4), 0(R6);		\
   429  	MOVD	$256(R4), R4;			\
   430  	MOVD	$256(R6), R6;			\
   431  	BR	loopArgs;			\
   432  tailArgs: /* copy remaining bytes */		\
   433  	CMP	R5, $0;				\
   434  	BEQ	callFunction;			\
   435  	SUB	$1, R5;				\
   436  	EXRL	$callfnMVC<>(SB), R5;		\
   437  callFunction:					\
   438  	MOVD	f+8(FP), R12;			\
   439  	MOVD	(R12), R8;			\
   440  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   441  	BL	(R8);				\
   442  	/* copy return values back */		\
   443  	MOVD	argtype+0(FP), R7;		\
   444  	MOVD	arg+16(FP), R6;			\
   445  	MOVWZ	n+24(FP), R5;			\
   446  	MOVD	$stack-MAXSIZE(SP), R4;		\
   447  	MOVWZ	retoffset+28(FP), R1;		\
   448  	ADD	R1, R4;				\
   449  	ADD	R1, R6;				\
   450  	SUB	R1, R5;				\
   451  	BL	callRet<>(SB);			\
   452  	RET
   453  
   454  // callRet copies return values back at the end of call*. This is a
   455  // separate function so it can allocate stack space for the arguments
   456  // to reflectcallmove. It does not follow the Go ABI; it expects its
   457  // arguments in registers.
   458  TEXT callRet<>(SB), NOSPLIT, $32-0
   459  	MOVD	R7, 8(R15)
   460  	MOVD	R6, 16(R15)
   461  	MOVD	R4, 24(R15)
   462  	MOVD	R5, 32(R15)
   463  	BL	runtime·reflectcallmove(SB)
   464  	RET
   465  
   466  CALLFN(·call32, 32)
   467  CALLFN(·call64, 64)
   468  CALLFN(·call128, 128)
   469  CALLFN(·call256, 256)
   470  CALLFN(·call512, 512)
   471  CALLFN(·call1024, 1024)
   472  CALLFN(·call2048, 2048)
   473  CALLFN(·call4096, 4096)
   474  CALLFN(·call8192, 8192)
   475  CALLFN(·call16384, 16384)
   476  CALLFN(·call32768, 32768)
   477  CALLFN(·call65536, 65536)
   478  CALLFN(·call131072, 131072)
   479  CALLFN(·call262144, 262144)
   480  CALLFN(·call524288, 524288)
   481  CALLFN(·call1048576, 1048576)
   482  CALLFN(·call2097152, 2097152)
   483  CALLFN(·call4194304, 4194304)
   484  CALLFN(·call8388608, 8388608)
   485  CALLFN(·call16777216, 16777216)
   486  CALLFN(·call33554432, 33554432)
   487  CALLFN(·call67108864, 67108864)
   488  CALLFN(·call134217728, 134217728)
   489  CALLFN(·call268435456, 268435456)
   490  CALLFN(·call536870912, 536870912)
   491  CALLFN(·call1073741824, 1073741824)
   492  
   493  // Not a function: target for EXRL (execute relative long) instruction.
   494  TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
   495  	MVC	$1, 0(R4), 0(R6)
   496  
   497  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   498  	RET
   499  
   500  // void jmpdefer(fv, sp);
   501  // called from deferreturn.
   502  // 1. grab stored LR for caller
   503  // 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
   504  // 3. BR to fn
   505  TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
   506  	MOVD	0(R15), R1
   507  	SUB	$6, R1, LR
   508  
   509  	MOVD	fv+0(FP), R12
   510  	MOVD	argp+8(FP), R15
   511  	SUB	$8, R15
   512  	MOVD	0(R12), R3
   513  	BR	(R3)
   514  
   515  // Save state of caller into g->sched. Smashes R1.
   516  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   517  	MOVD	LR, (g_sched+gobuf_pc)(g)
   518  	MOVD	R15, (g_sched+gobuf_sp)(g)
   519  	MOVD	$0, (g_sched+gobuf_lr)(g)
   520  	MOVD	$0, (g_sched+gobuf_ret)(g)
   521  	// Assert ctxt is zero. See func save.
   522  	MOVD	(g_sched+gobuf_ctxt)(g), R1
   523  	CMPBEQ	R1, $0, 2(PC)
   524  	BL	runtime·badctxt(SB)
   525  	RET
   526  
   527  // func asmcgocall(fn, arg unsafe.Pointer) int32
   528  // Call fn(arg) on the scheduler stack,
   529  // aligned appropriately for the gcc ABI.
   530  // See cgocall.go for more details.
   531  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   532  	// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
   533  	// C TLS base pointer in AR0:AR1
   534  	MOVD	fn+0(FP), R3
   535  	MOVD	arg+8(FP), R4
   536  
   537  	MOVD	R15, R2		// save original stack pointer
   538  	MOVD	g, R5
   539  
   540  	// Figure out if we need to switch to m->g0 stack.
   541  	// We get called to create new OS threads too, and those
   542  	// come in on the m->g0 stack already.
   543  	MOVD	g_m(g), R6
   544  	MOVD	m_g0(R6), R6
   545  	CMPBEQ	R6, g, g0
   546  	BL	gosave<>(SB)
   547  	MOVD	R6, g
   548  	BL	runtime·save_g(SB)
   549  	MOVD	(g_sched+gobuf_sp)(g), R15
   550  
   551  	// Now on a scheduling stack (a pthread-created stack).
   552  g0:
   553  	// Save room for two of our pointers, plus 160 bytes of callee
   554  	// save area that lives on the caller stack.
   555  	SUB	$176, R15
   556  	MOVD	$~7, R6
   557  	AND	R6, R15                 // 8-byte alignment for gcc ABI
   558  	MOVD	R5, 168(R15)             // save old g on stack
   559  	MOVD	(g_stack+stack_hi)(R5), R5
   560  	SUB	R2, R5
   561  	MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   562  	MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
   563  	MOVD	R4, R2                  // arg in R2
   564  	BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
   565  
   566  	XOR	R0, R0                  // set R0 back to 0.
   567  	// Restore g, stack pointer.
   568  	MOVD	168(R15), g
   569  	BL	runtime·save_g(SB)
   570  	MOVD	(g_stack+stack_hi)(g), R5
   571  	MOVD	160(R15), R6
   572  	SUB	R6, R5
   573  	MOVD	R5, R15
   574  
   575  	MOVW	R2, ret+16(FP)
   576  	RET
   577  
   578  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   579  // Turn the fn into a Go func (by taking its address) and call
   580  // cgocallback_gofunc.
   581  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   582  	MOVD	$fn+0(FP), R3
   583  	MOVD	R3, 8(R15)
   584  	MOVD	frame+8(FP), R3
   585  	MOVD	R3, 16(R15)
   586  	MOVD	framesize+16(FP), R3
   587  	MOVD	R3, 24(R15)
   588  	MOVD	ctxt+24(FP), R3
   589  	MOVD	R3, 32(R15)
   590  	MOVD	$runtime·cgocallback_gofunc(SB), R3
   591  	BL	(R3)
   592  	RET
   593  
   594  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   595  // See cgocall.go for more details.
   596  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   597  	NO_LOCAL_POINTERS
   598  
   599  	// Load m and g from thread-local storage.
   600  	MOVB	runtime·iscgo(SB), R3
   601  	CMPBEQ	R3, $0, nocgo
   602  	BL	runtime·load_g(SB)
   603  
   604  nocgo:
   605  	// If g is nil, Go did not create the current thread.
   606  	// Call needm to obtain one for temporary use.
   607  	// In this case, we're running on the thread stack, so there's
   608  	// lots of space, but the linker doesn't know. Hide the call from
   609  	// the linker analysis by using an indirect call.
   610  	CMPBEQ	g, $0, needm
   611  
   612  	MOVD	g_m(g), R8
   613  	MOVD	R8, savedm-8(SP)
   614  	BR	havem
   615  
   616  needm:
   617  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   618  	MOVD	$runtime·needm(SB), R3
   619  	BL	(R3)
   620  
   621  	// Set m->sched.sp = SP, so that if a panic happens
   622  	// during the function we are about to execute, it will
   623  	// have a valid SP to run on the g0 stack.
   624  	// The next few lines (after the havem label)
   625  	// will save this SP onto the stack and then write
   626  	// the same SP back to m->sched.sp. That seems redundant,
   627  	// but if an unrecovered panic happens, unwindm will
   628  	// restore the g->sched.sp from the stack location
   629  	// and then systemstack will try to use it. If we don't set it here,
   630  	// that restored SP will be uninitialized (typically 0) and
   631  	// will not be usable.
   632  	MOVD	g_m(g), R8
   633  	MOVD	m_g0(R8), R3
   634  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   635  
   636  havem:
   637  	// Now there's a valid m, and we're running on its m->g0.
   638  	// Save current m->g0->sched.sp on stack and then set it to SP.
   639  	// Save current sp in m->g0->sched.sp in preparation for
   640  	// switch back to m->curg stack.
   641  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   642  	MOVD	m_g0(R8), R3
   643  	MOVD	(g_sched+gobuf_sp)(R3), R4
   644  	MOVD	R4, savedsp-16(SP)
   645  	MOVD	R15, (g_sched+gobuf_sp)(R3)
   646  
   647  	// Switch to m->curg stack and call runtime.cgocallbackg.
   648  	// Because we are taking over the execution of m->curg
   649  	// but *not* resuming what had been running, we need to
   650  	// save that information (m->curg->sched) so we can restore it.
   651  	// We can restore m->curg->sched.sp easily, because calling
   652  	// runtime.cgocallbackg leaves SP unchanged upon return.
   653  	// To save m->curg->sched.pc, we push it onto the stack.
   654  	// This has the added benefit that it looks to the traceback
   655  	// routine like cgocallbackg is going to return to that
   656  	// PC (because the frame we allocate below has the same
   657  	// size as cgocallback_gofunc's frame declared above)
   658  	// so that the traceback will seamlessly trace back into
   659  	// the earlier calls.
   660  	//
   661  	// In the new goroutine, -8(SP) is unused (where SP refers to
   662  	// m->curg's SP while we're setting it up, before we've adjusted it).
   663  	MOVD	m_curg(R8), g
   664  	BL	runtime·save_g(SB)
   665  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   666  	MOVD	(g_sched+gobuf_pc)(g), R5
   667  	MOVD	R5, -24(R4)
   668  	MOVD	ctxt+24(FP), R5
   669  	MOVD	R5, -16(R4)
   670  	MOVD	$-24(R4), R15
   671  	BL	runtime·cgocallbackg(SB)
   672  
   673  	// Restore g->sched (== m->curg->sched) from saved values.
   674  	MOVD	0(R15), R5
   675  	MOVD	R5, (g_sched+gobuf_pc)(g)
   676  	MOVD	$24(R15), R4
   677  	MOVD	R4, (g_sched+gobuf_sp)(g)
   678  
   679  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   680  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   681  	// so we do not have to restore it.)
   682  	MOVD	g_m(g), R8
   683  	MOVD	m_g0(R8), g
   684  	BL	runtime·save_g(SB)
   685  	MOVD	(g_sched+gobuf_sp)(g), R15
   686  	MOVD	savedsp-16(SP), R4
   687  	MOVD	R4, (g_sched+gobuf_sp)(g)
   688  
   689  	// If the m on entry was nil, we called needm above to borrow an m
   690  	// for the duration of the call. Since the call is over, return it with dropm.
   691  	MOVD	savedm-8(SP), R6
   692  	CMPBNE	R6, $0, droppedm
   693  	MOVD	$runtime·dropm(SB), R3
   694  	BL	(R3)
   695  droppedm:
   696  
   697  	// Done!
   698  	RET
   699  
   700  // void setg(G*); set g. for use by needm.
   701  TEXT runtime·setg(SB), NOSPLIT, $0-8
   702  	MOVD	gg+0(FP), g
   703  	// This only happens if iscgo, so jump straight to save_g
   704  	BL	runtime·save_g(SB)
   705  	RET
   706  
   707  // void setg_gcc(G*); set g in C TLS.
   708  // Must obey the gcc calling convention.
   709  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   710  	// The standard prologue clobbers LR (R14), which is callee-save in
   711  	// the C ABI, so we have to use NOFRAME and save LR ourselves.
   712  	MOVD	LR, R1
   713  	// Also save g, R10, and R11 since they're callee-save in C ABI
   714  	MOVD	R10, R3
   715  	MOVD	g, R4
   716  	MOVD	R11, R5
   717  
   718  	MOVD	R2, g
   719  	BL	runtime·save_g(SB)
   720  
   721  	MOVD	R5, R11
   722  	MOVD	R4, g
   723  	MOVD	R3, R10
   724  	MOVD	R1, LR
   725  	RET
   726  
   727  TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
   728  	MOVD	0(R15), R3		// LR saved by caller
   729  	MOVD	R3, ret+0(FP)
   730  	RET
   731  
   732  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   733  	MOVW	(R0), R0
   734  	UNDEF
   735  
   736  // int64 runtime·cputicks(void)
   737  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   738  	// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
   739  	// This means that since about 1972 the msb has been set, making the
   740  	// result of a call to STORE CLOCK (stck) a negative number.
   741  	// We clear the msb to make it positive.
   742  	STCK	ret+0(FP)      // serialises before and after call
   743  	MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
   744  	SLD	$1, R3
   745  	SRD	$1, R3
   746  	MOVD	R3, ret+0(FP)
   747  	RET
   748  
   749  // AES hashing not implemented for s390x
   750  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   751  	MOVW	(R0), R15
   752  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   753  	MOVW	(R0), R15
   754  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   755  	MOVW	(R0), R15
   756  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   757  	MOVW	(R0), R15
   758  
   759  // memequal(a, b unsafe.Pointer, size uintptr) bool
   760  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
   761  	MOVD	a+0(FP), R3
   762  	MOVD	b+8(FP), R5
   763  	MOVD	size+16(FP), R6
   764  	LA	ret+24(FP), R7
   765  	BR	runtime·memeqbody(SB)
   766  
   767  // memequal_varlen(a, b unsafe.Pointer) bool
   768  TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
   769  	MOVD	a+0(FP), R3
   770  	MOVD	b+8(FP), R5
   771  	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
   772  	LA	ret+16(FP), R7
   773  	BR	runtime·memeqbody(SB)
   774  
   775  TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
   776  	MOVD	a_len+8(FP), R2
   777  	MOVD	b_len+32(FP), R6
   778  	MOVD	a+0(FP), R3
   779  	MOVD	b+24(FP), R5
   780  	LA	ret+48(FP), R7
   781  	CMPBNE	R2, R6, notequal
   782  	BR	runtime·memeqbody(SB)
   783  notequal:
   784  	MOVB	$0, ret+48(FP)
   785  	RET
   786  
   787  // input:
   788  //   R3 = a
   789  //   R5 = b
   790  //   R6 = len
   791  //   R7 = address of output byte (stores 0 or 1 here)
   792  //   a and b have the same length
   793  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   794  	CMPBEQ	R3, R5, equal
   795  loop:
   796  	CMPBEQ	R6, $0, equal
   797  	CMPBLT	R6, $32, tiny
   798  	CMP	R6, $256
   799  	BLT	tail
   800  	CLC	$256, 0(R3), 0(R5)
   801  	BNE	notequal
   802  	SUB	$256, R6
   803  	LA	256(R3), R3
   804  	LA	256(R5), R5
   805  	BR	loop
   806  tail:
   807  	SUB	$1, R6, R8
   808  	EXRL	$runtime·memeqbodyclc(SB), R8
   809  	BEQ	equal
   810  notequal:
   811  	MOVB	$0, 0(R7)
   812  	RET
   813  equal:
   814  	MOVB	$1, 0(R7)
   815  	RET
   816  tiny:
   817  	MOVD	$0, R2
   818  	CMPBLT	R6, $16, lt16
   819  	MOVD	0(R3), R8
   820  	MOVD	0(R5), R9
   821  	CMPBNE	R8, R9, notequal
   822  	MOVD	8(R3), R8
   823  	MOVD	8(R5), R9
   824  	CMPBNE	R8, R9, notequal
   825  	LA	16(R2), R2
   826  	SUB	$16, R6
   827  lt16:
   828  	CMPBLT	R6, $8, lt8
   829  	MOVD	0(R3)(R2*1), R8
   830  	MOVD	0(R5)(R2*1), R9
   831  	CMPBNE	R8, R9, notequal
   832  	LA	8(R2), R2
   833  	SUB	$8, R6
   834  lt8:
   835  	CMPBLT	R6, $4, lt4
   836  	MOVWZ	0(R3)(R2*1), R8
   837  	MOVWZ	0(R5)(R2*1), R9
   838  	CMPBNE	R8, R9, notequal
   839  	LA	4(R2), R2
   840  	SUB	$4, R6
   841  lt4:
   842  #define CHECK(n) \
   843  	CMPBEQ	R6, $n, equal \
   844  	MOVB	n(R3)(R2*1), R8 \
   845  	MOVB	n(R5)(R2*1), R9 \
   846  	CMPBNE	R8, R9, notequal
   847  	CHECK(0)
   848  	CHECK(1)
   849  	CHECK(2)
   850  	CHECK(3)
   851  	BR	equal
   852  
   853  TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   854  	CLC	$1, 0(R3), 0(R5)
   855  	RET
   856  
   857  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
   858  	MOVD	s+0(FP), R3     // s => R3
   859  	MOVD	s_len+8(FP), R4 // s_len => R4
   860  	MOVBZ	c+24(FP), R5    // c => R5
   861  	MOVD	$ret+32(FP), R2 // &ret => R9
   862  	BR	runtime·indexbytebody(SB)
   863  
   864  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
   865  	MOVD	s+0(FP), R3     // s => R3
   866  	MOVD	s_len+8(FP), R4 // s_len => R4
   867  	MOVBZ	c+16(FP), R5    // c => R5
   868  	MOVD	$ret+24(FP), R2 // &ret => R9
   869  	BR	runtime·indexbytebody(SB)
   870  
   871  // input:
   872  // R3: s
   873  // R4: s_len
   874  // R5: c -- byte sought
   875  // R2: &ret -- address to put index into
   876  TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0
   877  	CMPBEQ	R4, $0, notfound
   878  	MOVD	R3, R6          // store base for later
   879  	ADD	R3, R4, R8      // the address after the end of the string
   880  	//if the length is small, use loop; otherwise, use vector or srst search
   881  	CMPBGE	R4, $16, large
   882  
   883  residual:
   884  	CMPBEQ	R3, R8, notfound
   885  	MOVBZ	0(R3), R7
   886  	LA	1(R3), R3
   887  	CMPBNE	R7, R5, residual
   888  
   889  found:
   890  	SUB	R6, R3
   891  	SUB	$1, R3
   892  	MOVD	R3, 0(R2)
   893  	RET
   894  
   895  notfound:
   896  	MOVD	$-1, 0(R2)
   897  	RET
   898  
   899  large:
   900  	MOVBZ	·cpu+facilities_hasVX(SB), R1
   901  	CMPBNE	R1, $0, vectorimpl
   902  
   903  srstimpl:                       // no vector facility
   904  	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
   905  srstloop:
   906  	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
   907  	BVS	srstloop        // interrupted - continue
   908  	BGT	notfoundr0
   909  foundr0:
   910  	XOR	R0, R0          // reset R0
   911  	SUB	R6, R8          // remove base
   912  	MOVD	R8, 0(R2)
   913  	RET
   914  notfoundr0:
   915  	XOR	R0, R0          // reset R0
   916  	MOVD	$-1, 0(R2)
   917  	RET
   918  
   919  vectorimpl:
   920  	//if the address is not 16byte aligned, use loop for the header
   921  	MOVD	R3, R8
   922  	AND	$15, R8
   923  	CMPBGT	R8, $0, notaligned
   924  
   925  aligned:
   926  	ADD	R6, R4, R8
   927  	MOVD	R8, R7
   928  	AND	$-16, R7
   929  	// replicate c across V17
   930  	VLVGB	$0, R5, V19
   931  	VREPB	$0, V19, V17
   932  
   933  vectorloop:
   934  	CMPBGE	R3, R7, residual
   935  	VL	0(R3), V16    // load string to be searched into V16
   936  	ADD	$16, R3
   937  	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   938  	BVS	vectorloop
   939  
   940  	// when vector search found c in the string
   941  	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   942  	SUB	$16, R3
   943  	SUB	R6, R3
   944  	ADD	R3, R7
   945  	MOVD	R7, 0(R2)
   946  	RET
   947  
   948  notaligned:
   949  	MOVD	R3, R8
   950  	AND	$-16, R8
   951  	ADD     $16, R8
   952  notalignedloop:
   953  	CMPBEQ	R3, R8, aligned
   954  	MOVBZ	0(R3), R7
   955  	LA	1(R3), R3
   956  	CMPBNE	R7, R5, notalignedloop
   957  	BR	found
   958  
   959  TEXT runtime·return0(SB), NOSPLIT, $0
   960  	MOVW	$0, R3
   961  	RET
   962  
   963  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   964  // Must obey the gcc calling convention.
   965  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
   966  	// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
   967  	MOVD	g, R1
   968  	MOVD	R10, R3
   969  	MOVD	LR, R4
   970  	MOVD	R11, R5
   971  
   972  	BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
   973  	MOVD	g_m(g), R2
   974  	MOVD	m_curg(R2), R2
   975  	MOVD	(g_stack+stack_hi)(R2), R2
   976  
   977  	MOVD	R1, g
   978  	MOVD	R3, R10
   979  	MOVD	R4, LR
   980  	MOVD	R5, R11
   981  	RET
   982  
   983  // The top-most function running on a goroutine
   984  // returns to goexit+PCQuantum.
   985  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
   986  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   987  	BL	runtime·goexit1(SB)	// does not return
   988  	// traceback from goexit1 must hit code range of goexit
   989  	BYTE $0x07; BYTE $0x00; // 2-byte nop
   990  
   991  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
   992  	RET
   993  
   994  TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
   995          // Stores are already ordered on s390x, so this is just a
   996          // compile barrier.
   997  	RET
   998  
   999  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1000  	MOVD	s1_base+0(FP), R3
  1001  	MOVD	s1_len+8(FP), R4
  1002  	MOVD	s2_base+16(FP), R5
  1003  	MOVD	s2_len+24(FP), R6
  1004  	LA	ret+32(FP), R7
  1005  	BR	runtime·cmpbody(SB)
  1006  
  1007  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1008  	MOVD	s1+0(FP), R3
  1009  	MOVD	s1+8(FP), R4
  1010  	MOVD	s2+24(FP), R5
  1011  	MOVD	s2+32(FP), R6
  1012  	LA	res+48(FP), R7
  1013  	BR	runtime·cmpbody(SB)
  1014  
  1015  // input:
  1016  //   R3 = a
  1017  //   R4 = alen
  1018  //   R5 = b
  1019  //   R6 = blen
  1020  //   R7 = address of output word (stores -1/0/1 here)
  1021  TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
  1022  	CMPBEQ	R3, R5, cmplengths
  1023  	MOVD	R4, R8
  1024  	CMPBLE	R4, R6, amin
  1025  	MOVD	R6, R8
  1026  amin:
  1027  	CMPBEQ	R8, $0, cmplengths
  1028  	CMP	R8, $256
  1029  	BLE	tail
  1030  loop:
  1031  	CLC	$256, 0(R3), 0(R5)
  1032  	BGT	gt
  1033  	BLT	lt
  1034  	SUB	$256, R8
  1035  	CMP	R8, $256
  1036  	BGT	loop
  1037  tail:
  1038  	SUB	$1, R8
  1039  	EXRL	$runtime·cmpbodyclc(SB), R8
  1040  	BGT	gt
  1041  	BLT	lt
  1042  cmplengths:
  1043  	CMP	R4, R6
  1044  	BEQ	eq
  1045  	BLT	lt
  1046  gt:
  1047  	MOVD	$1, 0(R7)
  1048  	RET
  1049  lt:
  1050  	MOVD	$-1, 0(R7)
  1051  	RET
  1052  eq:
  1053  	MOVD	$0, 0(R7)
  1054  	RET
  1055  
  1056  TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
  1057  	CLC	$1, 0(R3), 0(R5)
  1058  	RET
  1059  
  1060  // func supportsVX() bool
  1061  TEXT strings·supportsVX(SB),NOSPLIT,$0-1
  1062  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1063  	MOVB	R0, ret+0(FP)
  1064  	RET
  1065  
  1066  // func supportsVX() bool
  1067  TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
  1068  	MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1069  	MOVB	R0, ret+0(FP)
  1070  	RET
  1071  
  1072  // func indexShortStr(s, sep string) int
  1073  // Caller must confirm availability of vx facility before calling.
  1074  TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
  1075  	LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
  1076  	LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1077  	MOVD	$ret+32(FP), R5
  1078  	BR	runtime·indexShortStr(SB)
  1079  
  1080  // func indexShortStr(s, sep []byte) int
  1081  // Caller must confirm availability of vx facility before calling.
  1082  TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
  1083  	LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
  1084  	LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1085  	MOVD	$ret+48(FP), R5
  1086  	BR	runtime·indexShortStr(SB)
  1087  
  1088  // s: string we are searching
  1089  // sep: string to search for
  1090  // R1=&s[0], R2=len(s)
  1091  // R3=&sep[0], R4=len(sep)
  1092  // R5=&ret (int)
  1093  // Caller must confirm availability of vx facility before calling.
  1094  TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
  1095  	CMPBGT	R4, R2, notfound
  1096  	ADD	R1, R2
  1097  	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
  1098  	CMPBEQ	R4, $0, notfound
  1099  	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
  1100  	VLL	R4, (R3), V0 // contains first 16 bytes of sep
  1101  	MOVD	R1, R7
  1102  index2plus:
  1103  	CMPBNE	R4, $1, index3plus
  1104  	MOVD	$15(R7), R9
  1105  	CMPBGE	R9, R2, index2to16
  1106  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1107  	VONE	V16
  1108  	VREPH	$0, V0, V1
  1109  	CMPBGE	R9, R2, index2to16
  1110  index2loop:
  1111  	VL	0(R7), V2          // 16 bytes, even indices
  1112  	VL	1(R7), V4          // 16 bytes, odd indices
  1113  	VCEQH	V1, V2, V5         // compare even indices
  1114  	VCEQH	V1, V4, V6         // compare odd indices
  1115  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1116  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1117  	BLT	foundV17
  1118  	MOVD	$16(R7), R7        // R7+=16
  1119  	ADD	$15, R7, R9
  1120  	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
  1121  	CMPBLE	R7, R2, index2to16
  1122  	BR	notfound
  1123  
  1124  index3plus:
  1125  	CMPBNE	R4, $2, index4plus
  1126  	ADD	$15, R7, R9
  1127  	CMPBGE	R9, R2, index2to16
  1128  	MOVD	$1, R0
  1129  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1130  	VONE	V16
  1131  	VREPH	$0, V0, V1
  1132  	VREPB	$2, V0, V8
  1133  index3loop:
  1134  	VL	(R7), V2           // load 16-bytes into V2
  1135  	VLL	R0, 16(R7), V3     // load 2-bytes into V3
  1136  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1137  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
  1138  	VCEQH	V1, V2, V5         // compare 2-byte even indices
  1139  	VCEQH	V1, V4, V6         // compare 2-byte odd indices
  1140  	VCEQB	V8, V9, V10        // compare last bytes
  1141  	VSEL	V5, V6, V31, V7    // merge even and odd indices
  1142  	VN	V7, V10, V7        // AND indices with last byte
  1143  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1144  	BLT	foundV17
  1145  	MOVD	$16(R7), R7        // R7+=16
  1146  	ADD	$15, R7, R9
  1147  	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
  1148  	CMPBLE	R7, R2, index2to16
  1149  	BR	notfound
  1150  
  1151  index4plus:
  1152  	CMPBNE	R4, $3, index5plus
  1153  	ADD	$15, R7, R9
  1154  	CMPBGE	R9, R2, index2to16
  1155  	MOVD	$2, R0
  1156  	VGBM	$0x8888, V29       // 0xff000000ff000000...
  1157  	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
  1158  	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
  1159  	VONE	V16
  1160  	VREPF	$0, V0, V1
  1161  index4loop:
  1162  	VL	(R7), V2           // load 16-bytes into V2
  1163  	VLL	R0, 16(R7), V3     // load 3-bytes into V3
  1164  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1165  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
  1166  	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
  1167  	VCEQF	V1, V2, V5         // compare index 0, 4, ...
  1168  	VCEQF	V1, V4, V6         // compare index 1, 5, ...
  1169  	VCEQF	V1, V9, V11        // compare index 2, 6, ...
  1170  	VCEQF	V1, V10, V12       // compare index 3, 7, ...
  1171  	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
  1172  	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
  1173  	VSEL	V13, V14, V31, V7  // final merge
  1174  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1175  	BLT	foundV17
  1176  	MOVD	$16(R7), R7        // R7+=16
  1177  	ADD	$15, R7, R9
  1178  	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
  1179  	CMPBLE	R7, R2, index2to16
  1180  	BR	notfound
  1181  
  1182  index5plus:
  1183  	CMPBGT	R4, $15, index17plus
  1184  index2to16:
  1185  	CMPBGT	R7, R2, notfound
  1186  	MOVD	$1(R7), R8
  1187  	CMPBGT	R8, R2, index2to16tail
  1188  index2to16loop:
  1189  	// unrolled 2x
  1190  	VLL	R4, (R7), V1
  1191  	VLL	R4, 1(R7), V2
  1192  	VCEQGS	V0, V1, V3
  1193  	BEQ	found
  1194  	MOVD	$1(R7), R7
  1195  	VCEQGS	V0, V2, V4
  1196  	BEQ	found
  1197  	MOVD	$1(R7), R7
  1198  	CMPBLT	R7, R2, index2to16loop
  1199  	CMPBGT	R7, R2, notfound
  1200  index2to16tail:
  1201  	VLL	R4, (R7), V1
  1202  	VCEQGS	V0, V1, V2
  1203  	BEQ	found
  1204  	BR	notfound
  1205  
  1206  index17plus:
  1207  	CMPBGT	R4, $31, index33plus
  1208  	SUB	$16, R4, R0
  1209  	VLL	R0, 16(R3), V1
  1210  	VONE	V7
  1211  index17to32loop:
  1212  	VL	(R7), V2
  1213  	VLL	R0, 16(R7), V3
  1214  	VCEQG	V0, V2, V4
  1215  	VCEQG	V1, V3, V5
  1216  	VN	V4, V5, V6
  1217  	VCEQGS	V6, V7, V8
  1218  	BEQ	found
  1219  	MOVD	$1(R7), R7
  1220  	CMPBLE  R7, R2, index17to32loop
  1221  	BR	notfound
  1222  
  1223  index33plus:
  1224  	CMPBGT	R4, $47, index49plus
  1225  	SUB	$32, R4, R0
  1226  	VL	16(R3), V1
  1227  	VLL	R0, 32(R3), V2
  1228  	VONE	V11
  1229  index33to48loop:
  1230  	VL	(R7), V3
  1231  	VL	16(R7), V4
  1232  	VLL	R0, 32(R7), V5
  1233  	VCEQG	V0, V3, V6
  1234  	VCEQG	V1, V4, V7
  1235  	VCEQG	V2, V5, V8
  1236  	VN	V6, V7, V9
  1237  	VN	V8, V9, V10
  1238  	VCEQGS	V10, V11, V12
  1239  	BEQ	found
  1240  	MOVD	$1(R7), R7
  1241  	CMPBLE  R7, R2, index33to48loop
  1242  	BR	notfound
  1243  
  1244  index49plus:
  1245  	CMPBGT	R4, $63, index65plus
  1246  	SUB	$48, R4, R0
  1247  	VL	16(R3), V1
  1248  	VL	32(R3), V2
  1249  	VLL	R0, 48(R3), V3
  1250  	VONE	V15
  1251  index49to64loop:
  1252  	VL	(R7), V4
  1253  	VL	16(R7), V5
  1254  	VL	32(R7), V6
  1255  	VLL	R0, 48(R7), V7
  1256  	VCEQG	V0, V4, V8
  1257  	VCEQG	V1, V5, V9
  1258  	VCEQG	V2, V6, V10
  1259  	VCEQG	V3, V7, V11
  1260  	VN	V8, V9, V12
  1261  	VN	V10, V11, V13
  1262  	VN	V12, V13, V14
  1263  	VCEQGS	V14, V15, V16
  1264  	BEQ	found
  1265  	MOVD	$1(R7), R7
  1266  	CMPBLE  R7, R2, index49to64loop
  1267  notfound:
  1268  	MOVD	$-1, (R5)
  1269  	RET
  1270  
  1271  index65plus:
  1272  	// not implemented
  1273  	MOVD	$0, (R0)
  1274  	RET
  1275  
  1276  foundV17: // index is in doubleword V17[0]
  1277  	VLGVG	$0, V17, R8
  1278  	ADD	R8, R7
  1279  found:
  1280  	SUB	R1, R7
  1281  	MOVD	R7, (R5)
  1282  	RET
  1283  
  1284  // This is called from .init_array and follows the platform, not Go, ABI.
  1285  // We are overly conservative. We could only save the registers we use.
  1286  // However, since this function is only called once per loaded module
  1287  // performance is unimportant.
  1288  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1289  	// Save R6-R15 in the register save area of the calling function.
  1290  	// Don't bother saving F8-F15 as we aren't doing any calls.
  1291  	STMG	R6, R15, 48(R15)
  1292  
  1293  	// append the argument (passed in R2, as per the ELF ABI) to the
  1294  	// moduledata linked list.
  1295  	MOVD	runtime·lastmoduledatap(SB), R1
  1296  	MOVD	R2, moduledata_next(R1)
  1297  	MOVD	R2, runtime·lastmoduledatap(SB)
  1298  
  1299  	// Restore R6-R15.
  1300  	LMG	48(R15), R6, R15
  1301  	RET
  1302  
  1303  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1304  	MOVB	$1, ret+0(FP)
  1305  	RET