github.com/filosottile/go@v0.0.0-20170906193555-dbed9972d994/src/runtime/asm_ppc64x.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ppc64 ppc64le
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  #include "asm_ppc64x.h"
    12  
    13  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    14  	// R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer
    15  
    16  	// initialize essential registers
    17  	BL	runtime·reginit(SB)
    18  
    19  	SUB	$(FIXED_FRAME+16), R1
    20  	MOVD	R2, 24(R1)		// stash the TOC pointer away again now we've created a new frame
    21  	MOVW	R3, FIXED_FRAME+0(R1)	// argc
    22  	MOVD	R4, FIXED_FRAME+8(R1)	// argv
    23  
    24  	// create istack out of the given (operating system) stack.
    25  	// _cgo_init may update stackguard.
    26  	MOVD	$runtime·g0(SB), g
    27  	MOVD	$(-64*1024), R31
    28  	ADD	R31, R1, R3
    29  	MOVD	R3, g_stackguard0(g)
    30  	MOVD	R3, g_stackguard1(g)
    31  	MOVD	R3, (g_stack+stack_lo)(g)
    32  	MOVD	R1, (g_stack+stack_hi)(g)
    33  
    34  	// if there is a _cgo_init, call it using the gcc ABI.
    35  	MOVD	_cgo_init(SB), R12
    36  	CMP	R0, R12
    37  	BEQ	nocgo
    38  	MOVD	R12, CTR		// r12 = "global function entry point"
    39  	MOVD	R13, R5			// arg 2: TLS base pointer
    40  	MOVD	$setg_gcc<>(SB), R4 	// arg 1: setg
    41  	MOVD	g, R3			// arg 0: G
    42  	// C functions expect 32 bytes of space on caller stack frame
    43  	// and a 16-byte aligned R1
    44  	MOVD	R1, R14			// save current stack
    45  	SUB	$32, R1			// reserve 32 bytes
    46  	RLDCR	$0, R1, $~15, R1	// 16-byte align
    47  	BL	(CTR)			// may clobber R0, R3-R12
    48  	MOVD	R14, R1			// restore stack
    49  	MOVD	24(R1), R2
    50  	XOR	R0, R0			// fix R0
    51  
    52  nocgo:
    53  	// update stackguard after _cgo_init
    54  	MOVD	(g_stack+stack_lo)(g), R3
    55  	ADD	$const__StackGuard, R3
    56  	MOVD	R3, g_stackguard0(g)
    57  	MOVD	R3, g_stackguard1(g)
    58  
    59  	// set the per-goroutine and per-mach "registers"
    60  	MOVD	$runtime·m0(SB), R3
    61  
    62  	// save m->g0 = g0
    63  	MOVD	g, m_g0(R3)
    64  	// save m0 to g0->m
    65  	MOVD	R3, g_m(g)
    66  
    67  	BL	runtime·check(SB)
    68  
    69  	// args are already prepared
    70  	BL	runtime·args(SB)
    71  	BL	runtime·osinit(SB)
    72  	BL	runtime·schedinit(SB)
    73  
    74  	// create a new goroutine to start program
    75  	MOVD	$runtime·mainPC(SB), R3		// entry
    76  	MOVDU	R3, -8(R1)
    77  	MOVDU	R0, -8(R1)
    78  	MOVDU	R0, -8(R1)
    79  	MOVDU	R0, -8(R1)
    80  	MOVDU	R0, -8(R1)
    81  	MOVDU	R0, -8(R1)
    82  	BL	runtime·newproc(SB)
    83  	ADD	$(16+FIXED_FRAME), R1
    84  
    85  	// start this M
    86  	BL	runtime·mstart(SB)
    87  
    88  	MOVD	R0, 0(R0)
    89  	RET
    90  
    91  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    92  GLOBL	runtime·mainPC(SB),RODATA,$8
    93  
    94  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    95  	MOVD	R0, 0(R0) // TODO: TD
    96  	RET
    97  
    98  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    99  	RET
   100  
   101  TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0
   102  	// crosscall_ppc64 and crosscall2 need to reginit, but can't
   103  	// get at the 'runtime.reginit' symbol.
   104  	BR	runtime·reginit(SB)
   105  
   106  TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0
   107  	// set R0 to zero, it's expected by the toolchain
   108  	XOR R0, R0
   109  	RET
   110  
   111  /*
   112   *  go-routine
   113   */
   114  
   115  // void gosave(Gobuf*)
   116  // save state in Gobuf; setjmp
   117  TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
   118  	MOVD	buf+0(FP), R3
   119  	MOVD	R1, gobuf_sp(R3)
   120  	MOVD	LR, R31
   121  	MOVD	R31, gobuf_pc(R3)
   122  	MOVD	g, gobuf_g(R3)
   123  	MOVD	R0, gobuf_lr(R3)
   124  	MOVD	R0, gobuf_ret(R3)
   125  	// Assert ctxt is zero. See func save.
   126  	MOVD	gobuf_ctxt(R3), R3
   127  	CMP	R0, R3
   128  	BEQ	2(PC)
   129  	BL	runtime·badctxt(SB)
   130  	RET
   131  
   132  // void gogo(Gobuf*)
   133  // restore state from Gobuf; longjmp
   134  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   135  	MOVD	buf+0(FP), R5
   136  
   137  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   138  	MOVD	gobuf_ctxt(R5), R3
   139  	CMP	R0, R3
   140  	BEQ	nilctxt
   141  	MOVD	$gobuf_ctxt(R5), R3
   142  	MOVD	R3, FIXED_FRAME+0(R1)
   143  	MOVD	R0, FIXED_FRAME+8(R1)
   144  	BL	runtime·writebarrierptr_prewrite(SB)
   145  	MOVD	buf+0(FP), R5
   146  
   147  nilctxt:
   148  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   149  	BL	runtime·save_g(SB)
   150  
   151  	MOVD	0(g), R4
   152  	MOVD	gobuf_sp(R5), R1
   153  	MOVD	gobuf_lr(R5), R31
   154  	MOVD	R31, LR
   155  	MOVD	gobuf_ret(R5), R3
   156  	MOVD	gobuf_ctxt(R5), R11
   157  	MOVD	R0, gobuf_sp(R5)
   158  	MOVD	R0, gobuf_ret(R5)
   159  	MOVD	R0, gobuf_lr(R5)
   160  	MOVD	R0, gobuf_ctxt(R5)
   161  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   162  	MOVD	gobuf_pc(R5), R12
   163  	MOVD	R12, CTR
   164  	BR	(CTR)
   165  
   166  // void mcall(fn func(*g))
   167  // Switch to m->g0's stack, call fn(g).
   168  // Fn must never return. It should gogo(&g->sched)
   169  // to keep running g.
   170  TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
   171  	// Save caller state in g->sched
   172  	MOVD	R1, (g_sched+gobuf_sp)(g)
   173  	MOVD	LR, R31
   174  	MOVD	R31, (g_sched+gobuf_pc)(g)
   175  	MOVD	R0, (g_sched+gobuf_lr)(g)
   176  	MOVD	g, (g_sched+gobuf_g)(g)
   177  
   178  	// Switch to m->g0 & its stack, call fn.
   179  	MOVD	g, R3
   180  	MOVD	g_m(g), R8
   181  	MOVD	m_g0(R8), g
   182  	BL	runtime·save_g(SB)
   183  	CMP	g, R3
   184  	BNE	2(PC)
   185  	BR	runtime·badmcall(SB)
   186  	MOVD	fn+0(FP), R11			// context
   187  	MOVD	0(R11), R12			// code pointer
   188  	MOVD	R12, CTR
   189  	MOVD	(g_sched+gobuf_sp)(g), R1	// sp = m->g0->sched.sp
   190  	MOVDU	R3, -8(R1)
   191  	MOVDU	R0, -8(R1)
   192  	MOVDU	R0, -8(R1)
   193  	MOVDU	R0, -8(R1)
   194  	MOVDU	R0, -8(R1)
   195  	BL	(CTR)
   196  	MOVD	24(R1), R2
   197  	BR	runtime·badmcall2(SB)
   198  
   199  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   200  // of the G stack. We need to distinguish the routine that
   201  // lives at the bottom of the G stack from the one that lives
   202  // at the top of the system stack because the one at the top of
   203  // the system stack terminates the stack walk (see topofstack()).
   204  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   205  	// We have several undefs here so that 16 bytes past
   206  	// $runtime·systemstack_switch lies within them whether or not the
   207          // instructions that derive r2 from r12 are there.
   208  	UNDEF
   209  	UNDEF
   210  	UNDEF
   211  	BL	(LR)	// make sure this function is not leaf
   212  	RET
   213  
   214  // func systemstack(fn func())
   215  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   216  	MOVD	fn+0(FP), R3	// R3 = fn
   217  	MOVD	R3, R11		// context
   218  	MOVD	g_m(g), R4	// R4 = m
   219  
   220  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   221  	CMP	g, R5
   222  	BEQ	noswitch
   223  
   224  	MOVD	m_g0(R4), R5	// R5 = g0
   225  	CMP	g, R5
   226  	BEQ	noswitch
   227  
   228  	MOVD	m_curg(R4), R6
   229  	CMP	g, R6
   230  	BEQ	switch
   231  
   232  	// Bad: g is not gsignal, not g0, not curg. What is it?
   233  	// Hide call from linker nosplit analysis.
   234  	MOVD	$runtime·badsystemstack(SB), R12
   235  	MOVD	R12, CTR
   236  	BL	(CTR)
   237  
   238  switch:
   239  	// save our state in g->sched. Pretend to
   240  	// be systemstack_switch if the G stack is scanned.
   241  	MOVD	$runtime·systemstack_switch(SB), R6
   242  	ADD     $16, R6 // get past prologue (including r2-setting instructions when they're there)
   243  	MOVD	R6, (g_sched+gobuf_pc)(g)
   244  	MOVD	R1, (g_sched+gobuf_sp)(g)
   245  	MOVD	R0, (g_sched+gobuf_lr)(g)
   246  	MOVD	g, (g_sched+gobuf_g)(g)
   247  
   248  	// switch to g0
   249  	MOVD	R5, g
   250  	BL	runtime·save_g(SB)
   251  	MOVD	(g_sched+gobuf_sp)(g), R3
   252  	// make it look like mstart called systemstack on g0, to stop traceback
   253  	SUB	$FIXED_FRAME, R3
   254  	MOVD	$runtime·mstart(SB), R4
   255  	MOVD	R4, 0(R3)
   256  	MOVD	R3, R1
   257  
   258  	// call target function
   259  	MOVD	0(R11), R12	// code pointer
   260  	MOVD	R12, CTR
   261  	BL	(CTR)
   262  
   263  	// restore TOC pointer. It seems unlikely that we will use systemstack
   264  	// to call a function defined in another module, but the results of
   265  	// doing so would be so confusing that it's worth doing this.
   266  	MOVD	g_m(g), R3
   267  	MOVD	m_curg(R3), g
   268  	MOVD	(g_sched+gobuf_sp)(g), R3
   269  	MOVD	24(R3), R2
   270  	// switch back to g
   271  	MOVD	g_m(g), R3
   272  	MOVD	m_curg(R3), g
   273  	BL	runtime·save_g(SB)
   274  	MOVD	(g_sched+gobuf_sp)(g), R1
   275  	MOVD	R0, (g_sched+gobuf_sp)(g)
   276  	RET
   277  
   278  noswitch:
   279  	// already on m stack, just call directly
   280  	MOVD	0(R11), R12	// code pointer
   281  	MOVD	R12, CTR
   282  	BL	(CTR)
   283  	MOVD	24(R1), R2
   284  	RET
   285  
   286  /*
   287   * support for morestack
   288   */
   289  
   290  // Called during function prolog when more stack is needed.
   291  // Caller has already loaded:
   292  // R3: framesize, R4: argsize, R5: LR
   293  //
   294  // The traceback routines see morestack on a g0 as being
   295  // the top of a stack (for example, morestack calling newstack
   296  // calling the scheduler calling newm calling gc), so we must
   297  // record an argument size. For that purpose, it has no arguments.
   298  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   299  	// Cannot grow scheduler stack (m->g0).
   300  	MOVD	g_m(g), R7
   301  	MOVD	m_g0(R7), R8
   302  	CMP	g, R8
   303  	BNE	3(PC)
   304  	BL	runtime·badmorestackg0(SB)
   305  	BL	runtime·abort(SB)
   306  
   307  	// Cannot grow signal stack (m->gsignal).
   308  	MOVD	m_gsignal(R7), R8
   309  	CMP	g, R8
   310  	BNE	3(PC)
   311  	BL	runtime·badmorestackgsignal(SB)
   312  	BL	runtime·abort(SB)
   313  
   314  	// Called from f.
   315  	// Set g->sched to context in f.
   316  	MOVD	R1, (g_sched+gobuf_sp)(g)
   317  	MOVD	LR, R8
   318  	MOVD	R8, (g_sched+gobuf_pc)(g)
   319  	MOVD	R5, (g_sched+gobuf_lr)(g)
   320  	// newstack will fill gobuf.ctxt.
   321  
   322  	// Called from f.
   323  	// Set m->morebuf to f's caller.
   324  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   325  	MOVD	R1, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   326  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   327  
   328  	// Call newstack on m->g0's stack.
   329  	MOVD	m_g0(R7), g
   330  	BL	runtime·save_g(SB)
   331  	MOVD	(g_sched+gobuf_sp)(g), R1
   332  	MOVDU   R0, -(FIXED_FRAME+8)(R1)	// create a call frame on g0
   333  	MOVD	R11, FIXED_FRAME+0(R1)	// ctxt argument
   334  	BL	runtime·newstack(SB)
   335  
   336  	// Not reached, but make sure the return PC from the call to newstack
   337  	// is still in this function, and not the beginning of the next.
   338  	UNDEF
   339  
   340  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   341  	MOVD	R0, R11
   342  	BR	runtime·morestack(SB)
   343  
   344  // reflectcall: call a function with the given argument list
   345  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   346  // we don't have variable-sized frames, so we use a small number
   347  // of constant-sized-frame functions to encode a few bits of size in the pc.
   348  // Caution: ugly multiline assembly macros in your future!
   349  
   350  #define DISPATCH(NAME,MAXSIZE)		\
   351  	MOVD	$MAXSIZE, R31;		\
   352  	CMP	R3, R31;		\
   353  	BGT	4(PC);			\
   354  	MOVD	$NAME(SB), R12;		\
   355  	MOVD	R12, CTR;		\
   356  	BR	(CTR)
   357  // Note: can't just "BR NAME(SB)" - bad inlining results.
   358  
   359  TEXT reflect·call(SB), NOSPLIT, $0-0
   360  	BR	·reflectcall(SB)
   361  
   362  TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
   363  	MOVWZ argsize+24(FP), R3
   364  	DISPATCH(runtime·call32, 32)
   365  	DISPATCH(runtime·call64, 64)
   366  	DISPATCH(runtime·call128, 128)
   367  	DISPATCH(runtime·call256, 256)
   368  	DISPATCH(runtime·call512, 512)
   369  	DISPATCH(runtime·call1024, 1024)
   370  	DISPATCH(runtime·call2048, 2048)
   371  	DISPATCH(runtime·call4096, 4096)
   372  	DISPATCH(runtime·call8192, 8192)
   373  	DISPATCH(runtime·call16384, 16384)
   374  	DISPATCH(runtime·call32768, 32768)
   375  	DISPATCH(runtime·call65536, 65536)
   376  	DISPATCH(runtime·call131072, 131072)
   377  	DISPATCH(runtime·call262144, 262144)
   378  	DISPATCH(runtime·call524288, 524288)
   379  	DISPATCH(runtime·call1048576, 1048576)
   380  	DISPATCH(runtime·call2097152, 2097152)
   381  	DISPATCH(runtime·call4194304, 4194304)
   382  	DISPATCH(runtime·call8388608, 8388608)
   383  	DISPATCH(runtime·call16777216, 16777216)
   384  	DISPATCH(runtime·call33554432, 33554432)
   385  	DISPATCH(runtime·call67108864, 67108864)
   386  	DISPATCH(runtime·call134217728, 134217728)
   387  	DISPATCH(runtime·call268435456, 268435456)
   388  	DISPATCH(runtime·call536870912, 536870912)
   389  	DISPATCH(runtime·call1073741824, 1073741824)
   390  	MOVD	$runtime·badreflectcall(SB), R12
   391  	MOVD	R12, CTR
   392  	BR	(CTR)
   393  
   394  #define CALLFN(NAME,MAXSIZE)			\
   395  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   396  	NO_LOCAL_POINTERS;			\
   397  	/* copy arguments to stack */		\
   398  	MOVD	arg+16(FP), R3;			\
   399  	MOVWZ	argsize+24(FP), R4;			\
   400  	MOVD	R1, R5;				\
   401  	ADD	$(FIXED_FRAME-1), R5;			\
   402  	SUB	$1, R3;				\
   403  	ADD	R5, R4;				\
   404  	CMP	R5, R4;				\
   405  	BEQ	4(PC);				\
   406  	MOVBZU	1(R3), R6;			\
   407  	MOVBZU	R6, 1(R5);			\
   408  	BR	-4(PC);				\
   409  	/* call function */			\
   410  	MOVD	f+8(FP), R11;			\
   411  	MOVD	(R11), R12;			\
   412  	MOVD	R12, CTR;			\
   413  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   414  	BL	(CTR);				\
   415  	MOVD	24(R1), R2;			\
   416  	/* copy return values back */		\
   417  	MOVD	argtype+0(FP), R7;		\
   418  	MOVD	arg+16(FP), R3;			\
   419  	MOVWZ	n+24(FP), R4;			\
   420  	MOVWZ	retoffset+28(FP), R6;		\
   421  	ADD	$FIXED_FRAME, R1, R5;		\
   422  	ADD	R6, R5; 			\
   423  	ADD	R6, R3;				\
   424  	SUB	R6, R4;				\
   425  	BL	callRet<>(SB);			\
   426  	RET
   427  
   428  // callRet copies return values back at the end of call*. This is a
   429  // separate function so it can allocate stack space for the arguments
   430  // to reflectcallmove. It does not follow the Go ABI; it expects its
   431  // arguments in registers.
   432  TEXT callRet<>(SB), NOSPLIT, $32-0
   433  	MOVD	R7, FIXED_FRAME+0(R1)
   434  	MOVD	R3, FIXED_FRAME+8(R1)
   435  	MOVD	R5, FIXED_FRAME+16(R1)
   436  	MOVD	R4, FIXED_FRAME+24(R1)
   437  	BL	runtime·reflectcallmove(SB)
   438  	RET
   439  
   440  CALLFN(·call32, 32)
   441  CALLFN(·call64, 64)
   442  CALLFN(·call128, 128)
   443  CALLFN(·call256, 256)
   444  CALLFN(·call512, 512)
   445  CALLFN(·call1024, 1024)
   446  CALLFN(·call2048, 2048)
   447  CALLFN(·call4096, 4096)
   448  CALLFN(·call8192, 8192)
   449  CALLFN(·call16384, 16384)
   450  CALLFN(·call32768, 32768)
   451  CALLFN(·call65536, 65536)
   452  CALLFN(·call131072, 131072)
   453  CALLFN(·call262144, 262144)
   454  CALLFN(·call524288, 524288)
   455  CALLFN(·call1048576, 1048576)
   456  CALLFN(·call2097152, 2097152)
   457  CALLFN(·call4194304, 4194304)
   458  CALLFN(·call8388608, 8388608)
   459  CALLFN(·call16777216, 16777216)
   460  CALLFN(·call33554432, 33554432)
   461  CALLFN(·call67108864, 67108864)
   462  CALLFN(·call134217728, 134217728)
   463  CALLFN(·call268435456, 268435456)
   464  CALLFN(·call536870912, 536870912)
   465  CALLFN(·call1073741824, 1073741824)
   466  
   467  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   468  	RET
   469  
   470  // void jmpdefer(fv, sp);
   471  // called from deferreturn.
   472  // 1. grab stored LR for caller
   473  // 2. sub 8 bytes to get back to either nop or toc reload before deferreturn
   474  // 3. BR to fn
   475  // When dynamically linking Go, it is not sufficient to rewind to the BL
   476  // deferreturn -- we might be jumping between modules and so we need to reset
   477  // the TOC pointer in r2. To do this, codegen inserts MOVD 24(R1), R2 *before*
   478  // the BL deferreturn and jmpdefer rewinds to that.
   479  TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
   480  	MOVD	0(R1), R31
   481  	SUB     $8, R31
   482  	MOVD	R31, LR
   483  
   484  	MOVD	fv+0(FP), R11
   485  	MOVD	argp+8(FP), R1
   486  	SUB	$FIXED_FRAME, R1
   487  	MOVD	0(R11), R12
   488  	MOVD	R12, CTR
   489  	BR	(CTR)
   490  
   491  // Save state of caller into g->sched. Smashes R31.
   492  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   493  	MOVD	LR, R31
   494  	MOVD	R31, (g_sched+gobuf_pc)(g)
   495  	MOVD	R1, (g_sched+gobuf_sp)(g)
   496  	MOVD	R0, (g_sched+gobuf_lr)(g)
   497  	MOVD	R0, (g_sched+gobuf_ret)(g)
   498  	// Assert ctxt is zero. See func save.
   499  	MOVD	(g_sched+gobuf_ctxt)(g), R31
   500  	CMP	R0, R31
   501  	BEQ	2(PC)
   502  	BL	runtime·badctxt(SB)
   503  	RET
   504  
   505  // func asmcgocall(fn, arg unsafe.Pointer) int32
   506  // Call fn(arg) on the scheduler stack,
   507  // aligned appropriately for the gcc ABI.
   508  // See cgocall.go for more details.
   509  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   510  	MOVD	fn+0(FP), R3
   511  	MOVD	arg+8(FP), R4
   512  
   513  	MOVD	R1, R7		// save original stack pointer
   514  	MOVD	g, R5
   515  
   516  	// Figure out if we need to switch to m->g0 stack.
   517  	// We get called to create new OS threads too, and those
   518  	// come in on the m->g0 stack already.
   519  	MOVD	g_m(g), R6
   520  	MOVD	m_g0(R6), R6
   521  	CMP	R6, g
   522  	BEQ	g0
   523  	BL	gosave<>(SB)
   524  	MOVD	R6, g
   525  	BL	runtime·save_g(SB)
   526  	MOVD	(g_sched+gobuf_sp)(g), R1
   527  
   528  	// Now on a scheduling stack (a pthread-created stack).
   529  g0:
   530  	// Save room for two of our pointers, plus 32 bytes of callee
   531  	// save area that lives on the caller stack.
   532  	SUB	$48, R1
   533  	RLDCR	$0, R1, $~15, R1	// 16-byte alignment for gcc ABI
   534  	MOVD	R5, 40(R1)	// save old g on stack
   535  	MOVD	(g_stack+stack_hi)(R5), R5
   536  	SUB	R7, R5
   537  	MOVD	R5, 32(R1)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   538  	MOVD	R0, 0(R1)	// clear back chain pointer (TODO can we give it real back trace information?)
   539  	// This is a "global call", so put the global entry point in r12
   540  	MOVD	R3, R12
   541  	MOVD	R12, CTR
   542  	MOVD	R4, R3		// arg in r3
   543  	BL	(CTR)
   544  
   545  	// C code can clobber R0, so set it back to 0.  F27-F31 are
   546  	// callee save, so we don't need to recover those.
   547  	XOR	R0, R0
   548  	// Restore g, stack pointer, toc pointer.
   549  	// R3 is errno, so don't touch it
   550  	MOVD	40(R1), g
   551  	MOVD    (g_stack+stack_hi)(g), R5
   552  	MOVD    32(R1), R6
   553  	SUB     R6, R5
   554  	MOVD    24(R5), R2
   555  	BL	runtime·save_g(SB)
   556  	MOVD	(g_stack+stack_hi)(g), R5
   557  	MOVD	32(R1), R6
   558  	SUB	R6, R5
   559  	MOVD	R5, R1
   560  
   561  	MOVW	R3, ret+16(FP)
   562  	RET
   563  
   564  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   565  // Turn the fn into a Go func (by taking its address) and call
   566  // cgocallback_gofunc.
   567  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   568  	MOVD	$fn+0(FP), R3
   569  	MOVD	R3, FIXED_FRAME+0(R1)
   570  	MOVD	frame+8(FP), R3
   571  	MOVD	R3, FIXED_FRAME+8(R1)
   572  	MOVD	framesize+16(FP), R3
   573  	MOVD	R3, FIXED_FRAME+16(R1)
   574  	MOVD	ctxt+24(FP), R3
   575  	MOVD	R3, FIXED_FRAME+24(R1)
   576  	MOVD	$runtime·cgocallback_gofunc(SB), R12
   577  	MOVD	R12, CTR
   578  	BL	(CTR)
   579  	RET
   580  
   581  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   582  // See cgocall.go for more details.
   583  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   584  	NO_LOCAL_POINTERS
   585  
   586  	// Load m and g from thread-local storage.
   587  	MOVB	runtime·iscgo(SB), R3
   588  	CMP	R3, $0
   589  	BEQ	nocgo
   590  	BL	runtime·load_g(SB)
   591  nocgo:
   592  
   593  	// If g is nil, Go did not create the current thread.
   594  	// Call needm to obtain one for temporary use.
   595  	// In this case, we're running on the thread stack, so there's
   596  	// lots of space, but the linker doesn't know. Hide the call from
   597  	// the linker analysis by using an indirect call.
   598  	CMP	g, $0
   599  	BEQ	needm
   600  
   601  	MOVD	g_m(g), R8
   602  	MOVD	R8, savedm-8(SP)
   603  	BR	havem
   604  
   605  needm:
   606  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   607  	MOVD	$runtime·needm(SB), R12
   608  	MOVD	R12, CTR
   609  	BL	(CTR)
   610  
   611  	// Set m->sched.sp = SP, so that if a panic happens
   612  	// during the function we are about to execute, it will
   613  	// have a valid SP to run on the g0 stack.
   614  	// The next few lines (after the havem label)
   615  	// will save this SP onto the stack and then write
   616  	// the same SP back to m->sched.sp. That seems redundant,
   617  	// but if an unrecovered panic happens, unwindm will
   618  	// restore the g->sched.sp from the stack location
   619  	// and then systemstack will try to use it. If we don't set it here,
   620  	// that restored SP will be uninitialized (typically 0) and
   621  	// will not be usable.
   622  	MOVD	g_m(g), R8
   623  	MOVD	m_g0(R8), R3
   624  	MOVD	R1, (g_sched+gobuf_sp)(R3)
   625  
   626  havem:
   627  	// Now there's a valid m, and we're running on its m->g0.
   628  	// Save current m->g0->sched.sp on stack and then set it to SP.
   629  	// Save current sp in m->g0->sched.sp in preparation for
   630  	// switch back to m->curg stack.
   631  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   632  	MOVD	m_g0(R8), R3
   633  	MOVD	(g_sched+gobuf_sp)(R3), R4
   634  	MOVD	R4, savedsp-16(SP)
   635  	MOVD	R1, (g_sched+gobuf_sp)(R3)
   636  
   637  	// Switch to m->curg stack and call runtime.cgocallbackg.
   638  	// Because we are taking over the execution of m->curg
   639  	// but *not* resuming what had been running, we need to
   640  	// save that information (m->curg->sched) so we can restore it.
   641  	// We can restore m->curg->sched.sp easily, because calling
   642  	// runtime.cgocallbackg leaves SP unchanged upon return.
   643  	// To save m->curg->sched.pc, we push it onto the stack.
   644  	// This has the added benefit that it looks to the traceback
   645  	// routine like cgocallbackg is going to return to that
   646  	// PC (because the frame we allocate below has the same
   647  	// size as cgocallback_gofunc's frame declared above)
   648  	// so that the traceback will seamlessly trace back into
   649  	// the earlier calls.
   650  	//
   651  	// In the new goroutine, -8(SP) is unused (where SP refers to
   652  	// m->curg's SP while we're setting it up, before we've adjusted it).
   653  	MOVD	m_curg(R8), g
   654  	BL	runtime·save_g(SB)
   655  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   656  	MOVD	(g_sched+gobuf_pc)(g), R5
   657  	MOVD	R5, -(FIXED_FRAME+16)(R4)
   658  	MOVD	ctxt+24(FP), R3
   659  	MOVD	R3, -16(R4)
   660  	MOVD	$-(FIXED_FRAME+16)(R4), R1
   661  	BL	runtime·cgocallbackg(SB)
   662  
   663  	// Restore g->sched (== m->curg->sched) from saved values.
   664  	MOVD	0(R1), R5
   665  	MOVD	R5, (g_sched+gobuf_pc)(g)
   666  	MOVD	$(FIXED_FRAME+16)(R1), R4
   667  	MOVD	R4, (g_sched+gobuf_sp)(g)
   668  
   669  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   670  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   671  	// so we do not have to restore it.)
   672  	MOVD	g_m(g), R8
   673  	MOVD	m_g0(R8), g
   674  	BL	runtime·save_g(SB)
   675  	MOVD	(g_sched+gobuf_sp)(g), R1
   676  	MOVD	savedsp-16(SP), R4
   677  	MOVD	R4, (g_sched+gobuf_sp)(g)
   678  
   679  	// If the m on entry was nil, we called needm above to borrow an m
   680  	// for the duration of the call. Since the call is over, return it with dropm.
   681  	MOVD	savedm-8(SP), R6
   682  	CMP	R6, $0
   683  	BNE	droppedm
   684  	MOVD	$runtime·dropm(SB), R12
   685  	MOVD	R12, CTR
   686  	BL	(CTR)
   687  droppedm:
   688  
   689  	// Done!
   690  	RET
   691  
   692  // void setg(G*); set g. for use by needm.
   693  TEXT runtime·setg(SB), NOSPLIT, $0-8
   694  	MOVD	gg+0(FP), g
   695  	// This only happens if iscgo, so jump straight to save_g
   696  	BL	runtime·save_g(SB)
   697  	RET
   698  
   699  // void setg_gcc(G*); set g in C TLS.
   700  // Must obey the gcc calling convention.
   701  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   702  	// The standard prologue clobbers R31, which is callee-save in
   703  	// the C ABI, so we have to use $-8-0 and save LR ourselves.
   704  	MOVD	LR, R4
   705  	// Also save g and R31, since they're callee-save in C ABI
   706  	MOVD	R31, R5
   707  	MOVD	g, R6
   708  
   709  	MOVD	R3, g
   710  	BL	runtime·save_g(SB)
   711  
   712  	MOVD	R6, g
   713  	MOVD	R5, R31
   714  	MOVD	R4, LR
   715  	RET
   716  
   717  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
   718  	MOVD	FIXED_FRAME+8(R1), R3		// LR saved by caller
   719  	MOVD	R3, ret+8(FP)
   720  	RET
   721  
   722  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   723  	MOVW	(R0), R0
   724  	UNDEF
   725  
   726  #define	TBRL	268
   727  #define	TBRU	269		/* Time base Upper/Lower */
   728  
   729  // int64 runtime·cputicks(void)
   730  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   731  	MOVW	SPR(TBRU), R4
   732  	MOVW	SPR(TBRL), R3
   733  	MOVW	SPR(TBRU), R5
   734  	CMPW	R4, R5
   735  	BNE	-4(PC)
   736  	SLD	$32, R5
   737  	OR	R5, R3
   738  	MOVD	R3, ret+0(FP)
   739  	RET
   740  
   741  // AES hashing not implemented for ppc64
   742  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   743  	MOVW	(R0), R1
   744  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   745  	MOVW	(R0), R1
   746  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   747  	MOVW	(R0), R1
   748  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   749  	MOVW	(R0), R1
   750  
   751  TEXT runtime·memequal(SB),NOSPLIT,$0-25
   752  	MOVD    a+0(FP), R3
   753  	MOVD    b+8(FP), R4
   754  	MOVD    size+16(FP), R5
   755  
   756  	BL	runtime·memeqbody(SB)
   757  	MOVB    R9, ret+24(FP)
   758  	RET
   759  
   760  // memequal_varlen(a, b unsafe.Pointer) bool
   761  TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
   762  	MOVD	a+0(FP), R3
   763  	MOVD	b+8(FP), R4
   764  	CMP	R3, R4
   765  	BEQ	eq
   766  	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
   767  	BL	runtime·memeqbody(SB)
   768  	MOVB	R9, ret+16(FP)
   769  	RET
   770  eq:
   771  	MOVD	$1, R3
   772  	MOVB	R3, ret+16(FP)
   773  	RET
   774  
   775  // Do an efficient memcmp for ppc64le
   776  // R3 = s1 len
   777  // R4 = s2 len
   778  // R5 = s1 addr
   779  // R6 = s2 addr
   780  // R7 = addr of return value
   781  TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
   782  	MOVD	R3,R8		// set up length
   783  	CMP	R3,R4,CR2	// unequal?
   784  	BC	12,8,setuplen	// BLT CR2
   785  	MOVD	R4,R8		// use R4 for comparison len
   786  setuplen:
   787  	MOVD	R8,CTR		// set up loop counter
   788  	CMP	R8,$8		// only optimize >=8
   789  	BLT	simplecheck
   790  	DCBT	(R5)		// cache hint
   791  	DCBT	(R6)
   792  	CMP	R8,$32		// optimize >= 32
   793  	MOVD	R8,R9
   794  	BLT	setup8a		// 8 byte moves only
   795  setup32a:
   796  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   797  	MOVD	R9,CTR
   798  
   799          // Special processing for 32 bytes or longer.
   800          // Loading this way is faster and correct as long as the
   801  	// doublewords being compared are equal. Once they
   802  	// are found unequal, reload them in proper byte order
   803  	// to determine greater or less than.
   804  loop32a:
   805  	MOVD	0(R5),R9	// doublewords to compare
   806  	MOVD	0(R6),R10	// get 4 doublewords
   807  	MOVD	8(R5),R14
   808  	MOVD	8(R6),R15
   809  	CMPU	R9,R10		// bytes equal?
   810  	MOVD	$0,R16		// set up for cmpne
   811  	BNE	cmpne		// further compare for LT or GT
   812  	MOVD	16(R5),R9	// get next pair of doublewords
   813  	MOVD	16(R6),R10
   814  	CMPU	R14,R15		// bytes match?
   815  	MOVD	$8,R16		// set up for cmpne
   816  	BNE	cmpne		// further compare for LT or GT
   817  	MOVD	24(R5),R14	// get next pair of doublewords
   818  	MOVD    24(R6),R15
   819  	CMPU	R9,R10		// bytes match?
   820  	MOVD	$16,R16		// set up for cmpne
   821  	BNE	cmpne		// further compare for LT or GT
   822  	MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
   823  	ADD	$32,R5		// bump up to next 32
   824  	ADD	$32,R6
   825  	CMPU    R14,R15		// bytes match?
   826  	BC	8,2,loop32a	// br ctr and cr
   827  	BNE	cmpne
   828  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   829  	BEQ	leftover	// and result is 0
   830  setup8a:
   831  	SRADCC	$3,R9,R9	// get the 8 byte count
   832  	BEQ	leftover	// shifted value is 0
   833  	MOVD	R9,CTR		// loop count for doublewords
   834  loop8:
   835  	MOVDBR	(R5+R0),R9	// doublewords to compare
   836  	MOVDBR	(R6+R0),R10	// LE compare order
   837  	ADD	$8,R5
   838  	ADD	$8,R6
   839  	CMPU	R9,R10		// match?
   840  	BC	8,2,loop8	// bt ctr <> 0 && cr
   841  	BGT	greater
   842  	BLT	less
   843  leftover:
   844  	ANDCC	$7,R8,R9	// check for leftover bytes
   845  	MOVD	R9,CTR		// save the ctr
   846  	BNE	simple		// leftover bytes
   847  	BC	12,10,equal	// test CR2 for length comparison
   848  	BC	12,8,less
   849  	BR	greater
   850  simplecheck:
   851  	CMP	R8,$0		// remaining compare length 0
   852  	BNE	simple		// do simple compare
   853  	BC	12,10,equal	// test CR2 for length comparison
   854  	BC	12,8,less	// 1st len < 2nd len, result less
   855  	BR	greater		// 1st len > 2nd len must be greater
   856  simple:
   857  	MOVBZ	0(R5), R9	// get byte from 1st operand
   858  	ADD	$1,R5
   859  	MOVBZ	0(R6), R10	// get byte from 2nd operand
   860  	ADD	$1,R6
   861  	CMPU	R9, R10
   862  	BC	8,2,simple	// bc ctr <> 0 && cr
   863  	BGT	greater		// 1st > 2nd
   864  	BLT	less		// 1st < 2nd
   865  	BC	12,10,equal	// test CR2 for length comparison
   866  	BC	12,9,greater	// 2nd len > 1st len
   867  	BR	less		// must be less
   868  cmpne:				// only here is not equal
   869  	MOVDBR	(R5+R16),R8	// reload in reverse order
   870  	MOVDBR	(R6+R16),R9
   871  	CMPU	R8,R9		// compare correct endianness
   872  	BGT	greater		// here only if NE
   873  less:
   874  	MOVD	$-1,R3
   875  	MOVD	R3,(R7)		// return value if A < B
   876  	RET
   877  equal:
   878  	MOVD	$0,(R7)		// return value if A == B
   879  	RET
   880  greater:
   881  	MOVD	$1,R3
   882  	MOVD	R3,(R7)		// return value if A > B
   883  	RET
   884  
   885  // Do an efficient memcmp for ppc64 (BE)
   886  // R3 = s1 len
   887  // R4 = s2 len
   888  // R5 = s1 addr
   889  // R6 = s2 addr
   890  // R7 = addr of return value
   891  TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
   892  	MOVD	R3,R8		// set up length
   893  	CMP	R3,R4,CR2	// unequal?
   894  	BC	12,8,setuplen	// BLT CR2
   895  	MOVD	R4,R8		// use R4 for comparison len
   896  setuplen:
   897  	MOVD	R8,CTR		// set up loop counter
   898  	CMP	R8,$8		// only optimize >=8
   899  	BLT	simplecheck
   900  	DCBT	(R5)		// cache hint
   901  	DCBT	(R6)
   902  	CMP	R8,$32		// optimize >= 32
   903  	MOVD	R8,R9
   904  	BLT	setup8a		// 8 byte moves only
   905  
   906  setup32a:
   907  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   908  	MOVD	R9,CTR
   909  loop32a:
   910  	MOVD	0(R5),R9	// doublewords to compare
   911  	MOVD	0(R6),R10	// get 4 doublewords
   912  	MOVD	8(R5),R14
   913  	MOVD	8(R6),R15
   914  	CMPU	R9,R10		// bytes equal?
   915  	BLT	less		// found to be less
   916  	BGT	greater		// found to be greater
   917  	MOVD	16(R5),R9	// get next pair of doublewords
   918  	MOVD	16(R6),R10
   919  	CMPU	R14,R15		// bytes match?
   920  	BLT	less		// found less
   921  	BGT	greater		// found greater
   922  	MOVD	24(R5),R14	// get next pair of doublewords
   923  	MOVD	24(R6),R15
   924  	CMPU	R9,R10		// bytes match?
   925  	BLT	less		// found to be less
   926  	BGT	greater		// found to be greater
   927  	ADD	$32,R5		// bump up to next 32
   928  	ADD	$32,R6
   929  	CMPU	R14,R15		// bytes match?
   930  	BC	8,2,loop32a	// br ctr and cr
   931  	BLT	less		// with BE, byte ordering is
   932  	BGT	greater		// good for compare
   933  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   934  	BEQ	leftover	// and result is 0
   935  setup8a:
   936  	SRADCC	$3,R9,R9	// get the 8 byte count
   937  	BEQ	leftover	// shifted value is 0
   938  	MOVD	R9,CTR		// loop count for doublewords
   939  loop8:
   940  	MOVD	(R5),R9
   941  	MOVD	(R6),R10
   942  	ADD	$8,R5
   943  	ADD	$8,R6
   944  	CMPU	R9,R10		// match?
   945  	BC	8,2,loop8	// bt ctr <> 0 && cr
   946  	BGT	greater
   947  	BLT	less
   948  leftover:
   949  	ANDCC	$7,R8,R9	// check for leftover bytes
   950  	MOVD	R9,CTR		// save the ctr
   951  	BNE	simple		// leftover bytes
   952  	BC	12,10,equal	// test CR2 for length comparison
   953  	BC	12,8,less
   954  	BR	greater
   955  simplecheck:
   956  	CMP	R8,$0		// remaining compare length 0
   957  	BNE	simple		// do simple compare
   958  	BC	12,10,equal	// test CR2 for length comparison
   959  	BC 	12,8,less	// 1st len < 2nd len, result less
   960  	BR	greater		// same len, must be equal
   961  simple:
   962  	MOVBZ	0(R5),R9	// get byte from 1st operand
   963  	ADD	$1,R5
   964  	MOVBZ	0(R6),R10	// get byte from 2nd operand
   965  	ADD	$1,R6
   966  	CMPU	R9,R10
   967  	BC	8,2,simple	// bc ctr <> 0 && cr
   968  	BGT	greater		// 1st > 2nd
   969  	BLT	less		// 1st < 2nd
   970  	BC	12,10,equal	// test CR2 for length comparison
   971  	BC	12,9,greater	// 2nd len > 1st len
   972  less:
   973  	MOVD	$-1,R3
   974  	MOVD    R3,(R7)		// return value if A < B
   975  	RET
   976  equal:
   977  	MOVD    $0,(R7)		// return value if A == B
   978  	RET
   979  greater:
   980  	MOVD	$1,R3
   981  	MOVD	R3,(R7)		// return value if A > B
   982  	RET
   983  
   984  // Do an efficient memequal for ppc64
   985  // R3 = s1
   986  // R4 = s2
   987  // R5 = len
   988  // R9 = return value
   989  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   990  	MOVD    R5,CTR
   991  	CMP     R5,$8		// only optimize >=8
   992  	BLT     simplecheck
   993  	DCBT	(R3)		// cache hint
   994  	DCBT	(R4)
   995  	CMP	R5,$32		// optimize >= 32
   996  	MOVD	R5,R6		// needed if setup8a branch
   997  	BLT	setup8a		// 8 byte moves only
   998  setup32a:                       // 8 byte aligned, >= 32 bytes
   999  	SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
  1000  	MOVD	R6,CTR
  1001  loop32a:
  1002  	MOVD    0(R3),R6        // doublewords to compare
  1003  	MOVD    0(R4),R7
  1004  	MOVD	8(R3),R8	//
  1005  	MOVD	8(R4),R9
  1006  	CMP     R6,R7           // bytes batch?
  1007  	BNE     noteq
  1008  	MOVD	16(R3),R6
  1009  	MOVD	16(R4),R7
  1010  	CMP     R8,R9		// bytes match?
  1011  	MOVD	24(R3),R8
  1012  	MOVD	24(R4),R9
  1013  	BNE     noteq
  1014  	CMP     R6,R7           // bytes match?
  1015  	BNE	noteq
  1016  	ADD     $32,R3		// bump up to next 32
  1017  	ADD     $32,R4
  1018  	CMP     R8,R9           // bytes match?
  1019  	BC      8,2,loop32a	// br ctr and cr
  1020  	BNE	noteq
  1021  	ANDCC	$24,R5,R6       // Any 8 byte chunks?
  1022  	BEQ	leftover	// and result is 0
  1023  setup8a:
  1024  	SRADCC  $3,R6,R6        // get the 8 byte count
  1025  	BEQ	leftover	// shifted value is 0
  1026  	MOVD    R6,CTR
  1027  loop8:
  1028  	MOVD    0(R3),R6        // doublewords to compare
  1029  	ADD	$8,R3
  1030  	MOVD    0(R4),R7
  1031  	ADD     $8,R4
  1032  	CMP     R6,R7           // match?
  1033  	BC	8,2,loop8	// bt ctr <> 0 && cr
  1034  	BNE     noteq
  1035  leftover:
  1036  	ANDCC   $7,R5,R6        // check for leftover bytes
  1037  	BEQ     equal
  1038  	MOVD    R6,CTR
  1039  	BR	simple
  1040  simplecheck:
  1041  	CMP	R5,$0
  1042  	BEQ	equal
  1043  simple:
  1044  	MOVBZ   0(R3), R6
  1045  	ADD	$1,R3
  1046  	MOVBZ   0(R4), R7
  1047  	ADD     $1,R4
  1048  	CMP     R6, R7
  1049  	BNE     noteq
  1050  	BC      8,2,simple
  1051  	BNE	noteq
  1052  	BR	equal
  1053  noteq:
  1054  	MOVD    $0, R9
  1055  	RET
  1056  equal:
  1057  	MOVD    $1, R9
  1058  	RET
  1059  
  1060  TEXT bytes·Equal(SB),NOSPLIT,$0-49
  1061  	MOVD	a_len+8(FP), R4
  1062  	MOVD	b_len+32(FP), R5
  1063  	CMP	R5, R4		// unequal lengths are not equal
  1064  	BNE	noteq
  1065  	MOVD	a+0(FP), R3
  1066  	MOVD	b+24(FP), R4
  1067  	BL	runtime·memeqbody(SB)
  1068  
  1069  	MOVBZ	R9,ret+48(FP)
  1070  	RET
  1071  
  1072  noteq:
  1073  	MOVBZ	$0,ret+48(FP)
  1074  	RET
  1075  
  1076  equal:
  1077  	MOVD	$1,R3
  1078  	MOVBZ	R3,ret+48(FP)
  1079  	RET
  1080  
  1081  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
  1082  	MOVD	s+0(FP), R3		// R3 = byte array pointer
  1083  	MOVD	s_len+8(FP), R4		// R4 = length
  1084  	MOVBZ	c+24(FP), R5		// R5 = byte
  1085  	MOVD	$ret+32(FP), R14	// R14 = &ret
  1086  	BR	runtime·indexbytebody<>(SB)
  1087  
  1088  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
  1089  	MOVD	s+0(FP), R3	  // R3 = string
  1090  	MOVD	s_len+8(FP), R4	  // R4 = length
  1091  	MOVBZ	c+16(FP), R5	  // R5 = byte
  1092  	MOVD	$ret+24(FP), R14  // R14 = &ret
  1093  	BR	runtime·indexbytebody<>(SB)
  1094  
  1095  TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
  1096  	DCBT	(R3)		// Prepare cache line.
  1097  	MOVD	R3,R10		// Save base address for calculating the index later.
  1098  	RLDICR	$0,R3,$60,R8	// Align address to doubleword boundary in R8.
  1099  	RLDIMI	$8,R5,$48,R5	// Replicating the byte across the register.
  1100  
  1101  	// Calculate last acceptable address and check for possible overflow
  1102  	// using a saturated add.
  1103  	// Overflows set last acceptable address to 0xffffffffffffffff.
  1104  	ADD	R4,R3,R7
  1105  	SUBC	R3,R7,R6
  1106  	SUBE	R0,R0,R9
  1107  	MOVW	R9,R6
  1108  	OR	R6,R7,R7
  1109  
  1110  	RLDIMI	$16,R5,$32,R5
  1111  	CMPU	R4,$32		// Check if it's a small string (<32 bytes). Those will be processed differently.
  1112  	MOVD	$-1,R9
  1113  	WORD $0x54661EB8	// Calculate padding in R6 (rlwinm r6,r3,3,26,28).
  1114  	RLDIMI	$32,R5,$0,R5
  1115  	ADD	$-1,R7,R7
  1116  #ifdef GOARCH_ppc64le
  1117  	SLD	R6,R9,R9	// Prepare mask for Little Endian
  1118  #else
  1119  	SRD	R6,R9,R9	// Same for Big Endian
  1120  #endif
  1121  	BLE	small_string	// Jump to the small string case if it's <32 bytes.
  1122  
  1123  	// Case for length >32 bytes
  1124  	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1125  	CMPB	R12,R5,R3	// Check for a match.
  1126  	AND	R9,R3,R3	// Mask bytes below s_base
  1127  	RLDICL	$0,R7,$61,R4	// length-1
  1128  	RLDICR	$0,R7,$60,R7	// Last doubleword in R7
  1129  	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation
  1130  	BNE	CR7,done
  1131  
  1132  	// Check for doubleword alignment and jump to the loop setup if aligned.
  1133  	MOVFL	R8,CR7
  1134  	BC	12,28,loop_setup
  1135  
  1136  	// Not aligned, so handle the second doubleword
  1137  	MOVDU	8(R8),R12
  1138  	CMPB	R12,R5,R3
  1139  	CMPU	R3,$0,CR7
  1140  	BNE	CR7,done
  1141  
  1142  loop_setup:
  1143  	// We are now aligned to a 16-byte boundary. We will load two doublewords
  1144  	// per loop iteration. The last doubleword is in R7, so our loop counter
  1145  	// starts at (R7-R8)/16.
  1146  	SUB	R8,R7,R6
  1147  	SRD	$4,R6,R6
  1148  	MOVD	R6,CTR
  1149  
  1150  	// Note: when we have an align directive, align this loop to 32 bytes so
  1151  	// it fits in a single icache sector.
  1152  loop:
  1153  	// Load two doublewords, then compare and merge in a single register. We
  1154  	// will check two doublewords per iteration, then find out which of them
  1155  	// contains the byte later. This speeds up the search.
  1156  	MOVD	8(R8),R12
  1157  	MOVDU	16(R8),R11
  1158  	CMPB	R12,R5,R3
  1159  	CMPB	R11,R5,R9
  1160  	OR	R3,R9,R6
  1161  	CMPU	R6,$0,CR7
  1162  	BNE	CR7,found
  1163  	BC	16,0,loop
  1164  
  1165  	// Counter zeroed, but we may have another doubleword to read
  1166  	CMPU	R8,R7
  1167  	BEQ	notfound
  1168  
  1169  	MOVDU	8(R8),R12
  1170  	CMPB	R12,R5,R3
  1171  	CMPU	R3,$0,CR6
  1172  	BNE	CR6,done
  1173  
  1174  notfound:
  1175  	MOVD	$-1,R3
  1176  	MOVD	R3,(R14)
  1177  	RET
  1178  
  1179  found:
  1180  	// One of the doublewords from the loop contains the byte we are looking
  1181  	// for. Check the first doubleword and adjust the address if found.
  1182  	CMPU	R3,$0,CR6
  1183  	ADD	$-8,R8,R8
  1184  	BNE	CR6,done
  1185  
  1186  	// Not found, so it must be in the second doubleword of the merged pair.
  1187  	MOVD	R9,R3
  1188  	ADD	$8,R8,R8
  1189  
  1190  done:
  1191  	// At this point, R3 has 0xFF in the same position as the byte we are
  1192  	// looking for in the doubleword. Use that to calculate the exact index
  1193  	// of the byte.
  1194  #ifdef GOARCH_ppc64le
  1195  	ADD	$-1,R3,R11
  1196  	ANDN	R3,R11,R11
  1197  	POPCNTD	R11,R11		// Count trailing zeros (Little Endian).
  1198  #else
  1199  	CNTLZD	R3,R11		// Count leading zeros (Big Endian).
  1200  #endif
  1201  	CMPU	R8,R7		// Check if we are at the last doubleword.
  1202  	SRD	$3,R11		// Convert trailing zeros to bytes.
  1203  	ADD	R11,R8,R3
  1204  	CMPU	R11,R4,CR7	// If at the last doubleword, check the byte offset.
  1205  	BNE	return
  1206  	BLE	CR7,return
  1207  	MOVD	$-1,R3
  1208  	MOVD	R3,(R14)
  1209  	RET
  1210  
  1211  return:
  1212  	SUB	R10,R3		// Calculate index.
  1213  	MOVD	R3,(R14)
  1214  	RET
  1215  
  1216  small_string:
  1217  	// We unroll this loop for better performance.
  1218  	CMPU	R4,$0		// Check for length=0
  1219  	BEQ	notfound
  1220  
  1221  	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1222  	CMPB	R12,R5,R3	// Check for a match.
  1223  	AND	R9,R3,R3	// Mask bytes below s_base.
  1224  	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation.
  1225  	RLDICL	$0,R7,$61,R4	// length-1
  1226  	RLDICR	$0,R7,$60,R7	// Last doubleword in R7.
  1227          CMPU	R8,R7
  1228  	BNE	CR7,done
  1229  	BEQ	notfound	// Hit length.
  1230  
  1231  	MOVDU	8(R8),R12
  1232  	CMPB	R12,R5,R3
  1233  	CMPU	R3,$0,CR6
  1234  	CMPU	R8,R7
  1235  	BNE	CR6,done
  1236  	BEQ	notfound
  1237  
  1238  	MOVDU	8(R8),R12
  1239  	CMPB	R12,R5,R3
  1240  	CMPU	R3,$0,CR6
  1241  	CMPU	R8,R7
  1242  	BNE	CR6,done
  1243  	BEQ	notfound
  1244  
  1245  	MOVDU	8(R8),R12
  1246  	CMPB	R12,R5,R3
  1247  	CMPU	R3,$0,CR6
  1248  	CMPU	R8,R7
  1249  	BNE	CR6,done
  1250  	BEQ	notfound
  1251  
  1252  	MOVDU	8(R8),R12
  1253  	CMPB	R12,R5,R3
  1254  	CMPU	R3,$0,CR6
  1255  	CMPU	R8,R7
  1256  	BNE	CR6,done
  1257  	BR	notfound
  1258  
  1259  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1260  	MOVD	s1_base+0(FP), R5
  1261  	MOVD	s2_base+16(FP), R6
  1262  	MOVD	s1_len+8(FP), R3
  1263  	CMP	R5,R6,CR7
  1264  	MOVD	s2_len+24(FP), R4
  1265  	MOVD	$ret+32(FP), R7
  1266  	CMP	R3,R4,CR6
  1267  	BEQ	CR7,equal
  1268  
  1269  notequal:
  1270  #ifdef	GOARCH_ppc64le
  1271  	BR	cmpbodyLE<>(SB)
  1272  #else
  1273  	BR      cmpbodyBE<>(SB)
  1274  #endif
  1275  
  1276  equal:
  1277  	BEQ	CR6,done
  1278  	MOVD	$1, R8
  1279  	BGT	CR6,greater
  1280  	NEG	R8
  1281  
  1282  greater:
  1283  	MOVD	R8, (R7)
  1284  	RET
  1285  
  1286  done:
  1287  	MOVD	$0, (R7)
  1288  	RET
  1289  
  1290  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1291  	MOVD	s1+0(FP), R5
  1292  	MOVD	s2+24(FP), R6
  1293  	MOVD	s1+8(FP), R3
  1294  	CMP	R5,R6,CR7
  1295  	MOVD	s2+32(FP), R4
  1296  	MOVD	$ret+48(FP), R7
  1297  	CMP	R3,R4,CR6
  1298  	BEQ	CR7,equal
  1299  
  1300  #ifdef	GOARCH_ppc64le
  1301  	BR	cmpbodyLE<>(SB)
  1302  #else
  1303  	BR      cmpbodyBE<>(SB)
  1304  #endif
  1305  
  1306  equal:
  1307  	BEQ	CR6,done
  1308  	MOVD	$1, R8
  1309  	BGT	CR6,greater
  1310  	NEG	R8
  1311  
  1312  greater:
  1313  	MOVD	R8, (R7)
  1314  	RET
  1315  
  1316  done:
  1317  	MOVD	$0, (R7)
  1318  	RET
  1319  
  1320  TEXT runtime·return0(SB), NOSPLIT, $0
  1321  	MOVW	$0, R3
  1322  	RET
  1323  
  1324  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1325  // Must obey the gcc calling convention.
  1326  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
  1327  	// g (R30) and R31 are callee-save in the C ABI, so save them
  1328  	MOVD	g, R4
  1329  	MOVD	R31, R5
  1330  	MOVD	LR, R6
  1331  
  1332  	BL	runtime·load_g(SB)	// clobbers g (R30), R31
  1333  	MOVD	g_m(g), R3
  1334  	MOVD	m_curg(R3), R3
  1335  	MOVD	(g_stack+stack_hi)(R3), R3
  1336  
  1337  	MOVD	R4, g
  1338  	MOVD	R5, R31
  1339  	MOVD	R6, LR
  1340  	RET
  1341  
  1342  // The top-most function running on a goroutine
  1343  // returns to goexit+PCQuantum.
  1344  //
  1345  // When dynamically linking Go, it can be returned to from a function
  1346  // implemented in a different module and so needs to reload the TOC pointer
  1347  // from the stack (although this function declares that it does not set up x-a
  1348  // frame, newproc1 does in fact allocate one for goexit and saves the TOC
  1349  // pointer in the correct place).
  1350  // goexit+_PCQuantum is halfway through the usual global entry point prologue
  1351  // that derives r2 from r12 which is a bit silly, but not harmful.
  1352  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
  1353  	MOVD	24(R1), R2
  1354  	BL	runtime·goexit1(SB)	// does not return
  1355  	// traceback from goexit1 must hit code range of goexit
  1356  	MOVD	R0, R0	// NOP
  1357  
  1358  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
  1359  	RET
  1360  
  1361  // prepGoExitFrame saves the current TOC pointer (i.e. the TOC pointer for the
  1362  // module containing runtime) to the frame that goexit will execute in when
  1363  // the goroutine exits. It's implemented in assembly mainly because that's the
  1364  // easiest way to get access to R2.
  1365  TEXT runtime·prepGoExitFrame(SB),NOSPLIT,$0-8
  1366        MOVD    sp+0(FP), R3
  1367        MOVD    R2, 24(R3)
  1368        RET
  1369  
  1370  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1371  	ADD	$-8, R1
  1372  	MOVD	R31, 0(R1)
  1373  	MOVD	runtime·lastmoduledatap(SB), R4
  1374  	MOVD	R3, moduledata_next(R4)
  1375  	MOVD	R3, runtime·lastmoduledatap(SB)
  1376  	MOVD	0(R1), R31
  1377  	ADD	$8, R1
  1378  	RET
  1379  
  1380  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1381  	MOVW	$1, R3
  1382  	MOVB	R3, ret+0(FP)
  1383  	RET