github.com/eun/go@v0.0.0-20170811110501-92cfd07a6cfd/src/runtime/asm_ppc64x.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ppc64 ppc64le
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  #include "asm_ppc64x.h"
    12  
    13  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    14  	// R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer
    15  
    16  	// initialize essential registers
    17  	BL	runtime·reginit(SB)
    18  
    19  	SUB	$(FIXED_FRAME+16), R1
    20  	MOVD	R2, 24(R1)		// stash the TOC pointer away again now we've created a new frame
    21  	MOVW	R3, FIXED_FRAME+0(R1)	// argc
    22  	MOVD	R4, FIXED_FRAME+8(R1)	// argv
    23  
    24  	// create istack out of the given (operating system) stack.
    25  	// _cgo_init may update stackguard.
    26  	MOVD	$runtime·g0(SB), g
    27  	MOVD	$(-64*1024), R31
    28  	ADD	R31, R1, R3
    29  	MOVD	R3, g_stackguard0(g)
    30  	MOVD	R3, g_stackguard1(g)
    31  	MOVD	R3, (g_stack+stack_lo)(g)
    32  	MOVD	R1, (g_stack+stack_hi)(g)
    33  
    34  	// if there is a _cgo_init, call it using the gcc ABI.
    35  	MOVD	_cgo_init(SB), R12
    36  	CMP	R0, R12
    37  	BEQ	nocgo
    38  	MOVD	R12, CTR		// r12 = "global function entry point"
    39  	MOVD	R13, R5			// arg 2: TLS base pointer
    40  	MOVD	$setg_gcc<>(SB), R4 	// arg 1: setg
    41  	MOVD	g, R3			// arg 0: G
    42  	// C functions expect 32 bytes of space on caller stack frame
    43  	// and a 16-byte aligned R1
    44  	MOVD	R1, R14			// save current stack
    45  	SUB	$32, R1			// reserve 32 bytes
    46  	RLDCR	$0, R1, $~15, R1	// 16-byte align
    47  	BL	(CTR)			// may clobber R0, R3-R12
    48  	MOVD	R14, R1			// restore stack
    49  	MOVD	24(R1), R2
    50  	XOR	R0, R0			// fix R0
    51  
    52  nocgo:
    53  	// update stackguard after _cgo_init
    54  	MOVD	(g_stack+stack_lo)(g), R3
    55  	ADD	$const__StackGuard, R3
    56  	MOVD	R3, g_stackguard0(g)
    57  	MOVD	R3, g_stackguard1(g)
    58  
    59  	// set the per-goroutine and per-mach "registers"
    60  	MOVD	$runtime·m0(SB), R3
    61  
    62  	// save m->g0 = g0
    63  	MOVD	g, m_g0(R3)
    64  	// save m0 to g0->m
    65  	MOVD	R3, g_m(g)
    66  
    67  	BL	runtime·check(SB)
    68  
    69  	// args are already prepared
    70  	BL	runtime·args(SB)
    71  	BL	runtime·osinit(SB)
    72  	BL	runtime·schedinit(SB)
    73  
    74  	// create a new goroutine to start program
    75  	MOVD	$runtime·mainPC(SB), R3		// entry
    76  	MOVDU	R3, -8(R1)
    77  	MOVDU	R0, -8(R1)
    78  	MOVDU	R0, -8(R1)
    79  	MOVDU	R0, -8(R1)
    80  	MOVDU	R0, -8(R1)
    81  	MOVDU	R0, -8(R1)
    82  	BL	runtime·newproc(SB)
    83  	ADD	$(16+FIXED_FRAME), R1
    84  
    85  	// start this M
    86  	BL	runtime·mstart(SB)
    87  
    88  	MOVD	R0, 0(R0)
    89  	RET
    90  
    91  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    92  GLOBL	runtime·mainPC(SB),RODATA,$8
    93  
    94  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    95  	MOVD	R0, 0(R0) // TODO: TD
    96  	RET
    97  
    98  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    99  	RET
   100  
   101  TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0
   102  	// crosscall_ppc64 and crosscall2 need to reginit, but can't
   103  	// get at the 'runtime.reginit' symbol.
   104  	BR	runtime·reginit(SB)
   105  
   106  TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0
   107  	// set R0 to zero, it's expected by the toolchain
   108  	XOR R0, R0
   109  	RET
   110  
   111  /*
   112   *  go-routine
   113   */
   114  
   115  // void gosave(Gobuf*)
   116  // save state in Gobuf; setjmp
   117  TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
   118  	MOVD	buf+0(FP), R3
   119  	MOVD	R1, gobuf_sp(R3)
   120  	MOVD	LR, R31
   121  	MOVD	R31, gobuf_pc(R3)
   122  	MOVD	g, gobuf_g(R3)
   123  	MOVD	R0, gobuf_lr(R3)
   124  	MOVD	R0, gobuf_ret(R3)
   125  	// Assert ctxt is zero. See func save.
   126  	MOVD	gobuf_ctxt(R3), R3
   127  	CMP	R0, R3
   128  	BEQ	2(PC)
   129  	BL	runtime·badctxt(SB)
   130  	RET
   131  
   132  // void gogo(Gobuf*)
   133  // restore state from Gobuf; longjmp
   134  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   135  	MOVD	buf+0(FP), R5
   136  
   137  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   138  	MOVD	gobuf_ctxt(R5), R3
   139  	CMP	R0, R3
   140  	BEQ	nilctxt
   141  	MOVD	$gobuf_ctxt(R5), R3
   142  	MOVD	R3, FIXED_FRAME+0(R1)
   143  	MOVD	R0, FIXED_FRAME+8(R1)
   144  	BL	runtime·writebarrierptr_prewrite(SB)
   145  	MOVD	buf+0(FP), R5
   146  
   147  nilctxt:
   148  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   149  	BL	runtime·save_g(SB)
   150  
   151  	MOVD	0(g), R4
   152  	MOVD	gobuf_sp(R5), R1
   153  	MOVD	gobuf_lr(R5), R31
   154  	MOVD	R31, LR
   155  	MOVD	gobuf_ret(R5), R3
   156  	MOVD	gobuf_ctxt(R5), R11
   157  	MOVD	R0, gobuf_sp(R5)
   158  	MOVD	R0, gobuf_ret(R5)
   159  	MOVD	R0, gobuf_lr(R5)
   160  	MOVD	R0, gobuf_ctxt(R5)
   161  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   162  	MOVD	gobuf_pc(R5), R12
   163  	MOVD	R12, CTR
   164  	BR	(CTR)
   165  
   166  // void mcall(fn func(*g))
   167  // Switch to m->g0's stack, call fn(g).
   168  // Fn must never return. It should gogo(&g->sched)
   169  // to keep running g.
   170  TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
   171  	// Save caller state in g->sched
   172  	MOVD	R1, (g_sched+gobuf_sp)(g)
   173  	MOVD	LR, R31
   174  	MOVD	R31, (g_sched+gobuf_pc)(g)
   175  	MOVD	R0, (g_sched+gobuf_lr)(g)
   176  	MOVD	g, (g_sched+gobuf_g)(g)
   177  
   178  	// Switch to m->g0 & its stack, call fn.
   179  	MOVD	g, R3
   180  	MOVD	g_m(g), R8
   181  	MOVD	m_g0(R8), g
   182  	BL	runtime·save_g(SB)
   183  	CMP	g, R3
   184  	BNE	2(PC)
   185  	BR	runtime·badmcall(SB)
   186  	MOVD	fn+0(FP), R11			// context
   187  	MOVD	0(R11), R12			// code pointer
   188  	MOVD	R12, CTR
   189  	MOVD	(g_sched+gobuf_sp)(g), R1	// sp = m->g0->sched.sp
   190  	MOVDU	R3, -8(R1)
   191  	MOVDU	R0, -8(R1)
   192  	MOVDU	R0, -8(R1)
   193  	MOVDU	R0, -8(R1)
   194  	MOVDU	R0, -8(R1)
   195  	BL	(CTR)
   196  	MOVD	24(R1), R2
   197  	BR	runtime·badmcall2(SB)
   198  
   199  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   200  // of the G stack. We need to distinguish the routine that
   201  // lives at the bottom of the G stack from the one that lives
   202  // at the top of the system stack because the one at the top of
   203  // the system stack terminates the stack walk (see topofstack()).
   204  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   205  	// We have several undefs here so that 16 bytes past
   206  	// $runtime·systemstack_switch lies within them whether or not the
   207          // instructions that derive r2 from r12 are there.
   208  	UNDEF
   209  	UNDEF
   210  	UNDEF
   211  	BL	(LR)	// make sure this function is not leaf
   212  	RET
   213  
   214  // func systemstack(fn func())
   215  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   216  	MOVD	fn+0(FP), R3	// R3 = fn
   217  	MOVD	R3, R11		// context
   218  	MOVD	g_m(g), R4	// R4 = m
   219  
   220  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   221  	CMP	g, R5
   222  	BEQ	noswitch
   223  
   224  	MOVD	m_g0(R4), R5	// R5 = g0
   225  	CMP	g, R5
   226  	BEQ	noswitch
   227  
   228  	MOVD	m_curg(R4), R6
   229  	CMP	g, R6
   230  	BEQ	switch
   231  
   232  	// Bad: g is not gsignal, not g0, not curg. What is it?
   233  	// Hide call from linker nosplit analysis.
   234  	MOVD	$runtime·badsystemstack(SB), R12
   235  	MOVD	R12, CTR
   236  	BL	(CTR)
   237  
   238  switch:
   239  	// save our state in g->sched. Pretend to
   240  	// be systemstack_switch if the G stack is scanned.
   241  	MOVD	$runtime·systemstack_switch(SB), R6
   242  	ADD     $16, R6 // get past prologue (including r2-setting instructions when they're there)
   243  	MOVD	R6, (g_sched+gobuf_pc)(g)
   244  	MOVD	R1, (g_sched+gobuf_sp)(g)
   245  	MOVD	R0, (g_sched+gobuf_lr)(g)
   246  	MOVD	g, (g_sched+gobuf_g)(g)
   247  
   248  	// switch to g0
   249  	MOVD	R5, g
   250  	BL	runtime·save_g(SB)
   251  	MOVD	(g_sched+gobuf_sp)(g), R3
   252  	// make it look like mstart called systemstack on g0, to stop traceback
   253  	SUB	$FIXED_FRAME, R3
   254  	MOVD	$runtime·mstart(SB), R4
   255  	MOVD	R4, 0(R3)
   256  	MOVD	R3, R1
   257  
   258  	// call target function
   259  	MOVD	0(R11), R12	// code pointer
   260  	MOVD	R12, CTR
   261  	BL	(CTR)
   262  
   263  	// restore TOC pointer. It seems unlikely that we will use systemstack
   264  	// to call a function defined in another module, but the results of
   265  	// doing so would be so confusing that it's worth doing this.
   266  	MOVD	g_m(g), R3
   267  	MOVD	m_curg(R3), g
   268  	MOVD	(g_sched+gobuf_sp)(g), R3
   269  	MOVD	24(R3), R2
   270  	// switch back to g
   271  	MOVD	g_m(g), R3
   272  	MOVD	m_curg(R3), g
   273  	BL	runtime·save_g(SB)
   274  	MOVD	(g_sched+gobuf_sp)(g), R1
   275  	MOVD	R0, (g_sched+gobuf_sp)(g)
   276  	RET
   277  
   278  noswitch:
   279  	// already on m stack, just call directly
   280  	MOVD	0(R11), R12	// code pointer
   281  	MOVD	R12, CTR
   282  	BL	(CTR)
   283  	MOVD	24(R1), R2
   284  	RET
   285  
   286  /*
   287   * support for morestack
   288   */
   289  
   290  // Called during function prolog when more stack is needed.
   291  // Caller has already loaded:
   292  // R3: framesize, R4: argsize, R5: LR
   293  //
   294  // The traceback routines see morestack on a g0 as being
   295  // the top of a stack (for example, morestack calling newstack
   296  // calling the scheduler calling newm calling gc), so we must
   297  // record an argument size. For that purpose, it has no arguments.
   298  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   299  	// Cannot grow scheduler stack (m->g0).
   300  	MOVD	g_m(g), R7
   301  	MOVD	m_g0(R7), R8
   302  	CMP	g, R8
   303  	BNE	3(PC)
   304  	BL	runtime·badmorestackg0(SB)
   305  	BL	runtime·abort(SB)
   306  
   307  	// Cannot grow signal stack (m->gsignal).
   308  	MOVD	m_gsignal(R7), R8
   309  	CMP	g, R8
   310  	BNE	3(PC)
   311  	BL	runtime·badmorestackgsignal(SB)
   312  	BL	runtime·abort(SB)
   313  
   314  	// Called from f.
   315  	// Set g->sched to context in f.
   316  	MOVD	R1, (g_sched+gobuf_sp)(g)
   317  	MOVD	LR, R8
   318  	MOVD	R8, (g_sched+gobuf_pc)(g)
   319  	MOVD	R5, (g_sched+gobuf_lr)(g)
   320  	// newstack will fill gobuf.ctxt.
   321  
   322  	// Called from f.
   323  	// Set m->morebuf to f's caller.
   324  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   325  	MOVD	R1, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   326  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   327  
   328  	// Call newstack on m->g0's stack.
   329  	MOVD	m_g0(R7), g
   330  	BL	runtime·save_g(SB)
   331  	MOVD	(g_sched+gobuf_sp)(g), R1
   332  	MOVDU   R0, -(FIXED_FRAME+8)(R1)	// create a call frame on g0
   333  	MOVD	R11, FIXED_FRAME+0(R1)	// ctxt argument
   334  	BL	runtime·newstack(SB)
   335  
   336  	// Not reached, but make sure the return PC from the call to newstack
   337  	// is still in this function, and not the beginning of the next.
   338  	UNDEF
   339  
   340  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   341  	MOVD	R0, R11
   342  	BR	runtime·morestack(SB)
   343  
   344  // reflectcall: call a function with the given argument list
   345  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   346  // we don't have variable-sized frames, so we use a small number
   347  // of constant-sized-frame functions to encode a few bits of size in the pc.
   348  // Caution: ugly multiline assembly macros in your future!
   349  
   350  #define DISPATCH(NAME,MAXSIZE)		\
   351  	MOVD	$MAXSIZE, R31;		\
   352  	CMP	R3, R31;		\
   353  	BGT	4(PC);			\
   354  	MOVD	$NAME(SB), R12;		\
   355  	MOVD	R12, CTR;		\
   356  	BR	(CTR)
   357  // Note: can't just "BR NAME(SB)" - bad inlining results.
   358  
   359  TEXT reflect·call(SB), NOSPLIT, $0-0
   360  	BR	·reflectcall(SB)
   361  
   362  TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
   363  	MOVWZ argsize+24(FP), R3
   364  	DISPATCH(runtime·call32, 32)
   365  	DISPATCH(runtime·call64, 64)
   366  	DISPATCH(runtime·call128, 128)
   367  	DISPATCH(runtime·call256, 256)
   368  	DISPATCH(runtime·call512, 512)
   369  	DISPATCH(runtime·call1024, 1024)
   370  	DISPATCH(runtime·call2048, 2048)
   371  	DISPATCH(runtime·call4096, 4096)
   372  	DISPATCH(runtime·call8192, 8192)
   373  	DISPATCH(runtime·call16384, 16384)
   374  	DISPATCH(runtime·call32768, 32768)
   375  	DISPATCH(runtime·call65536, 65536)
   376  	DISPATCH(runtime·call131072, 131072)
   377  	DISPATCH(runtime·call262144, 262144)
   378  	DISPATCH(runtime·call524288, 524288)
   379  	DISPATCH(runtime·call1048576, 1048576)
   380  	DISPATCH(runtime·call2097152, 2097152)
   381  	DISPATCH(runtime·call4194304, 4194304)
   382  	DISPATCH(runtime·call8388608, 8388608)
   383  	DISPATCH(runtime·call16777216, 16777216)
   384  	DISPATCH(runtime·call33554432, 33554432)
   385  	DISPATCH(runtime·call67108864, 67108864)
   386  	DISPATCH(runtime·call134217728, 134217728)
   387  	DISPATCH(runtime·call268435456, 268435456)
   388  	DISPATCH(runtime·call536870912, 536870912)
   389  	DISPATCH(runtime·call1073741824, 1073741824)
   390  	MOVD	$runtime·badreflectcall(SB), R12
   391  	MOVD	R12, CTR
   392  	BR	(CTR)
   393  
   394  #define CALLFN(NAME,MAXSIZE)			\
   395  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   396  	NO_LOCAL_POINTERS;			\
   397  	/* copy arguments to stack */		\
   398  	MOVD	arg+16(FP), R3;			\
   399  	MOVWZ	argsize+24(FP), R4;			\
   400  	MOVD	R1, R5;				\
   401  	ADD	$(FIXED_FRAME-1), R5;			\
   402  	SUB	$1, R3;				\
   403  	ADD	R5, R4;				\
   404  	CMP	R5, R4;				\
   405  	BEQ	4(PC);				\
   406  	MOVBZU	1(R3), R6;			\
   407  	MOVBZU	R6, 1(R5);			\
   408  	BR	-4(PC);				\
   409  	/* call function */			\
   410  	MOVD	f+8(FP), R11;			\
   411  	MOVD	(R11), R12;			\
   412  	MOVD	R12, CTR;			\
   413  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   414  	BL	(CTR);				\
   415  	MOVD	24(R1), R2;			\
   416  	/* copy return values back */		\
   417  	MOVD	argtype+0(FP), R7;		\
   418  	MOVD	arg+16(FP), R3;			\
   419  	MOVWZ	n+24(FP), R4;			\
   420  	MOVWZ	retoffset+28(FP), R6;		\
   421  	ADD	$FIXED_FRAME, R1, R5;		\
   422  	ADD	R6, R5; 			\
   423  	ADD	R6, R3;				\
   424  	SUB	R6, R4;				\
   425  	BL	callRet<>(SB);			\
   426  	RET
   427  
   428  // callRet copies return values back at the end of call*. This is a
   429  // separate function so it can allocate stack space for the arguments
   430  // to reflectcallmove. It does not follow the Go ABI; it expects its
   431  // arguments in registers.
   432  TEXT callRet<>(SB), NOSPLIT, $32-0
   433  	MOVD	R7, FIXED_FRAME+0(R1)
   434  	MOVD	R3, FIXED_FRAME+8(R1)
   435  	MOVD	R5, FIXED_FRAME+16(R1)
   436  	MOVD	R4, FIXED_FRAME+24(R1)
   437  	BL	runtime·reflectcallmove(SB)
   438  	RET
   439  
   440  CALLFN(·call32, 32)
   441  CALLFN(·call64, 64)
   442  CALLFN(·call128, 128)
   443  CALLFN(·call256, 256)
   444  CALLFN(·call512, 512)
   445  CALLFN(·call1024, 1024)
   446  CALLFN(·call2048, 2048)
   447  CALLFN(·call4096, 4096)
   448  CALLFN(·call8192, 8192)
   449  CALLFN(·call16384, 16384)
   450  CALLFN(·call32768, 32768)
   451  CALLFN(·call65536, 65536)
   452  CALLFN(·call131072, 131072)
   453  CALLFN(·call262144, 262144)
   454  CALLFN(·call524288, 524288)
   455  CALLFN(·call1048576, 1048576)
   456  CALLFN(·call2097152, 2097152)
   457  CALLFN(·call4194304, 4194304)
   458  CALLFN(·call8388608, 8388608)
   459  CALLFN(·call16777216, 16777216)
   460  CALLFN(·call33554432, 33554432)
   461  CALLFN(·call67108864, 67108864)
   462  CALLFN(·call134217728, 134217728)
   463  CALLFN(·call268435456, 268435456)
   464  CALLFN(·call536870912, 536870912)
   465  CALLFN(·call1073741824, 1073741824)
   466  
   467  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   468  	RET
   469  
   470  // void jmpdefer(fv, sp);
   471  // called from deferreturn.
   472  // 1. grab stored LR for caller
   473  // 2. sub 8 bytes to get back to either nop or toc reload before deferreturn
   474  // 3. BR to fn
   475  // When dynamically linking Go, it is not sufficient to rewind to the BL
   476  // deferreturn -- we might be jumping between modules and so we need to reset
   477  // the TOC pointer in r2. To do this, codegen inserts MOVD 24(R1), R2 *before*
   478  // the BL deferreturn and jmpdefer rewinds to that.
   479  TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
   480  	MOVD	0(R1), R31
   481  	SUB     $8, R31
   482  	MOVD	R31, LR
   483  
   484  	MOVD	fv+0(FP), R11
   485  	MOVD	argp+8(FP), R1
   486  	SUB	$FIXED_FRAME, R1
   487  	MOVD	0(R11), R12
   488  	MOVD	R12, CTR
   489  	BR	(CTR)
   490  
   491  // Save state of caller into g->sched. Smashes R31.
   492  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   493  	MOVD	LR, R31
   494  	MOVD	R31, (g_sched+gobuf_pc)(g)
   495  	MOVD	R1, (g_sched+gobuf_sp)(g)
   496  	MOVD	R0, (g_sched+gobuf_lr)(g)
   497  	MOVD	R0, (g_sched+gobuf_ret)(g)
   498  	// Assert ctxt is zero. See func save.
   499  	MOVD	(g_sched+gobuf_ctxt)(g), R31
   500  	CMP	R0, R31
   501  	BEQ	2(PC)
   502  	BL	runtime·badctxt(SB)
   503  	RET
   504  
   505  // func asmcgocall(fn, arg unsafe.Pointer) int32
   506  // Call fn(arg) on the scheduler stack,
   507  // aligned appropriately for the gcc ABI.
   508  // See cgocall.go for more details.
   509  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   510  	MOVD	fn+0(FP), R3
   511  	MOVD	arg+8(FP), R4
   512  
   513  	MOVD	R1, R7		// save original stack pointer
   514  	MOVD	g, R5
   515  
   516  	// Figure out if we need to switch to m->g0 stack.
   517  	// We get called to create new OS threads too, and those
   518  	// come in on the m->g0 stack already.
   519  	MOVD	g_m(g), R6
   520  	MOVD	m_g0(R6), R6
   521  	CMP	R6, g
   522  	BEQ	g0
   523  	BL	gosave<>(SB)
   524  	MOVD	R6, g
   525  	BL	runtime·save_g(SB)
   526  	MOVD	(g_sched+gobuf_sp)(g), R1
   527  
   528  	// Now on a scheduling stack (a pthread-created stack).
   529  g0:
   530  	// Save room for two of our pointers, plus 32 bytes of callee
   531  	// save area that lives on the caller stack.
   532  	SUB	$48, R1
   533  	RLDCR	$0, R1, $~15, R1	// 16-byte alignment for gcc ABI
   534  	MOVD	R5, 40(R1)	// save old g on stack
   535  	MOVD	(g_stack+stack_hi)(R5), R5
   536  	SUB	R7, R5
   537  	MOVD	R5, 32(R1)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   538  	MOVD	R0, 0(R1)	// clear back chain pointer (TODO can we give it real back trace information?)
   539  	// This is a "global call", so put the global entry point in r12
   540  	MOVD	R3, R12
   541  	MOVD	R12, CTR
   542  	MOVD	R4, R3		// arg in r3
   543  	BL	(CTR)
   544  
   545  	// C code can clobber R0, so set it back to 0.  F27-F31 are
   546  	// callee save, so we don't need to recover those.
   547  	XOR	R0, R0
   548  	// Restore g, stack pointer, toc pointer.
   549  	// R3 is errno, so don't touch it
   550  	MOVD	40(R1), g
   551  	MOVD    (g_stack+stack_hi)(g), R5
   552  	MOVD    32(R1), R6
   553  	SUB     R6, R5
   554  	MOVD    24(R5), R2
   555  	BL	runtime·save_g(SB)
   556  	MOVD	(g_stack+stack_hi)(g), R5
   557  	MOVD	32(R1), R6
   558  	SUB	R6, R5
   559  	MOVD	R5, R1
   560  
   561  	MOVW	R3, ret+16(FP)
   562  	RET
   563  
   564  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   565  // Turn the fn into a Go func (by taking its address) and call
   566  // cgocallback_gofunc.
   567  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   568  	MOVD	$fn+0(FP), R3
   569  	MOVD	R3, FIXED_FRAME+0(R1)
   570  	MOVD	frame+8(FP), R3
   571  	MOVD	R3, FIXED_FRAME+8(R1)
   572  	MOVD	framesize+16(FP), R3
   573  	MOVD	R3, FIXED_FRAME+16(R1)
   574  	MOVD	ctxt+24(FP), R3
   575  	MOVD	R3, FIXED_FRAME+24(R1)
   576  	MOVD	$runtime·cgocallback_gofunc(SB), R12
   577  	MOVD	R12, CTR
   578  	BL	(CTR)
   579  	RET
   580  
   581  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   582  // See cgocall.go for more details.
   583  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   584  	NO_LOCAL_POINTERS
   585  
   586  	// Load m and g from thread-local storage.
   587  	MOVB	runtime·iscgo(SB), R3
   588  	CMP	R3, $0
   589  	BEQ	nocgo
   590  	BL	runtime·load_g(SB)
   591  nocgo:
   592  
   593  	// If g is nil, Go did not create the current thread.
   594  	// Call needm to obtain one for temporary use.
   595  	// In this case, we're running on the thread stack, so there's
   596  	// lots of space, but the linker doesn't know. Hide the call from
   597  	// the linker analysis by using an indirect call.
   598  	CMP	g, $0
   599  	BEQ	needm
   600  
   601  	MOVD	g_m(g), R8
   602  	MOVD	R8, savedm-8(SP)
   603  	BR	havem
   604  
   605  needm:
   606  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   607  	MOVD	$runtime·needm(SB), R12
   608  	MOVD	R12, CTR
   609  	BL	(CTR)
   610  
   611  	// Set m->sched.sp = SP, so that if a panic happens
   612  	// during the function we are about to execute, it will
   613  	// have a valid SP to run on the g0 stack.
   614  	// The next few lines (after the havem label)
   615  	// will save this SP onto the stack and then write
   616  	// the same SP back to m->sched.sp. That seems redundant,
   617  	// but if an unrecovered panic happens, unwindm will
   618  	// restore the g->sched.sp from the stack location
   619  	// and then systemstack will try to use it. If we don't set it here,
   620  	// that restored SP will be uninitialized (typically 0) and
   621  	// will not be usable.
   622  	MOVD	g_m(g), R8
   623  	MOVD	m_g0(R8), R3
   624  	MOVD	R1, (g_sched+gobuf_sp)(R3)
   625  
   626  havem:
   627  	// Now there's a valid m, and we're running on its m->g0.
   628  	// Save current m->g0->sched.sp on stack and then set it to SP.
   629  	// Save current sp in m->g0->sched.sp in preparation for
   630  	// switch back to m->curg stack.
   631  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   632  	MOVD	m_g0(R8), R3
   633  	MOVD	(g_sched+gobuf_sp)(R3), R4
   634  	MOVD	R4, savedsp-16(SP)
   635  	MOVD	R1, (g_sched+gobuf_sp)(R3)
   636  
   637  	// Switch to m->curg stack and call runtime.cgocallbackg.
   638  	// Because we are taking over the execution of m->curg
   639  	// but *not* resuming what had been running, we need to
   640  	// save that information (m->curg->sched) so we can restore it.
   641  	// We can restore m->curg->sched.sp easily, because calling
   642  	// runtime.cgocallbackg leaves SP unchanged upon return.
   643  	// To save m->curg->sched.pc, we push it onto the stack.
   644  	// This has the added benefit that it looks to the traceback
   645  	// routine like cgocallbackg is going to return to that
   646  	// PC (because the frame we allocate below has the same
   647  	// size as cgocallback_gofunc's frame declared above)
   648  	// so that the traceback will seamlessly trace back into
   649  	// the earlier calls.
   650  	//
   651  	// In the new goroutine, -8(SP) is unused (where SP refers to
   652  	// m->curg's SP while we're setting it up, before we've adjusted it).
   653  	MOVD	m_curg(R8), g
   654  	BL	runtime·save_g(SB)
   655  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   656  	MOVD	(g_sched+gobuf_pc)(g), R5
   657  	MOVD	R5, -(FIXED_FRAME+16)(R4)
   658  	MOVD	ctxt+24(FP), R3
   659  	MOVD	R3, -16(R4)
   660  	MOVD	$-(FIXED_FRAME+16)(R4), R1
   661  	BL	runtime·cgocallbackg(SB)
   662  
   663  	// Restore g->sched (== m->curg->sched) from saved values.
   664  	MOVD	0(R1), R5
   665  	MOVD	R5, (g_sched+gobuf_pc)(g)
   666  	MOVD	$(FIXED_FRAME+16)(R1), R4
   667  	MOVD	R4, (g_sched+gobuf_sp)(g)
   668  
   669  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   670  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   671  	// so we do not have to restore it.)
   672  	MOVD	g_m(g), R8
   673  	MOVD	m_g0(R8), g
   674  	BL	runtime·save_g(SB)
   675  	MOVD	(g_sched+gobuf_sp)(g), R1
   676  	MOVD	savedsp-16(SP), R4
   677  	MOVD	R4, (g_sched+gobuf_sp)(g)
   678  
   679  	// If the m on entry was nil, we called needm above to borrow an m
   680  	// for the duration of the call. Since the call is over, return it with dropm.
   681  	MOVD	savedm-8(SP), R6
   682  	CMP	R6, $0
   683  	BNE	droppedm
   684  	MOVD	$runtime·dropm(SB), R12
   685  	MOVD	R12, CTR
   686  	BL	(CTR)
   687  droppedm:
   688  
   689  	// Done!
   690  	RET
   691  
   692  // void setg(G*); set g. for use by needm.
   693  TEXT runtime·setg(SB), NOSPLIT, $0-8
   694  	MOVD	gg+0(FP), g
   695  	// This only happens if iscgo, so jump straight to save_g
   696  	BL	runtime·save_g(SB)
   697  	RET
   698  
   699  // void setg_gcc(G*); set g in C TLS.
   700  // Must obey the gcc calling convention.
   701  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   702  	// The standard prologue clobbers R31, which is callee-save in
   703  	// the C ABI, so we have to use $-8-0 and save LR ourselves.
   704  	MOVD	LR, R4
   705  	// Also save g and R31, since they're callee-save in C ABI
   706  	MOVD	R31, R5
   707  	MOVD	g, R6
   708  
   709  	MOVD	R3, g
   710  	BL	runtime·save_g(SB)
   711  
   712  	MOVD	R6, g
   713  	MOVD	R5, R31
   714  	MOVD	R4, LR
   715  	RET
   716  
   717  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
   718  	MOVD	FIXED_FRAME+8(R1), R3		// LR saved by caller
   719  	MOVD	R3, ret+8(FP)
   720  	RET
   721  
   722  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   723  	MOVW	(R0), R0
   724  	UNDEF
   725  
   726  #define	TBRL	268
   727  #define	TBRU	269		/* Time base Upper/Lower */
   728  
   729  // int64 runtime·cputicks(void)
   730  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   731  	MOVW	SPR(TBRU), R4
   732  	MOVW	SPR(TBRL), R3
   733  	MOVW	SPR(TBRU), R5
   734  	CMPW	R4, R5
   735  	BNE	-4(PC)
   736  	SLD	$32, R5
   737  	OR	R5, R3
   738  	MOVD	R3, ret+0(FP)
   739  	RET
   740  
   741  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   742  // redirects to memhash(p, h, size) using the size
   743  // stored in the closure.
   744  TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
   745  	GO_ARGS
   746  	NO_LOCAL_POINTERS
   747  	MOVD	p+0(FP), R3
   748  	MOVD	h+8(FP), R4
   749  	MOVD	8(R11), R5
   750  	MOVD	R3, FIXED_FRAME+0(R1)
   751  	MOVD	R4, FIXED_FRAME+8(R1)
   752  	MOVD	R5, FIXED_FRAME+16(R1)
   753  	BL	runtime·memhash(SB)
   754  	MOVD	FIXED_FRAME+24(R1), R3
   755  	MOVD	R3, ret+16(FP)
   756  	RET
   757  
   758  // AES hashing not implemented for ppc64
   759  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   760  	MOVW	(R0), R1
   761  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   762  	MOVW	(R0), R1
   763  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   764  	MOVW	(R0), R1
   765  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   766  	MOVW	(R0), R1
   767  
   768  TEXT runtime·memequal(SB),NOSPLIT,$0-25
   769  	MOVD    a+0(FP), R3
   770  	MOVD    b+8(FP), R4
   771  	MOVD    size+16(FP), R5
   772  
   773  	BL	runtime·memeqbody(SB)
   774  	MOVB    R9, ret+24(FP)
   775  	RET
   776  
   777  // memequal_varlen(a, b unsafe.Pointer) bool
   778  TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
   779  	MOVD	a+0(FP), R3
   780  	MOVD	b+8(FP), R4
   781  	CMP	R3, R4
   782  	BEQ	eq
   783  	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
   784  	BL	runtime·memeqbody(SB)
   785  	MOVB	R9, ret+16(FP)
   786  	RET
   787  eq:
   788  	MOVD	$1, R3
   789  	MOVB	R3, ret+16(FP)
   790  	RET
   791  
   792  // Do an efficient memcmp for ppc64le
   793  // R3 = s1 len
   794  // R4 = s2 len
   795  // R5 = s1 addr
   796  // R6 = s2 addr
   797  // R7 = addr of return value
   798  TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
   799  	MOVD	R3,R8		// set up length
   800  	CMP	R3,R4,CR2	// unequal?
   801  	BC	12,8,setuplen	// BLT CR2
   802  	MOVD	R4,R8		// use R4 for comparison len
   803  setuplen:
   804  	MOVD	R8,CTR		// set up loop counter
   805  	CMP	R8,$8		// only optimize >=8
   806  	BLT	simplecheck
   807  	DCBT	(R5)		// cache hint
   808  	DCBT	(R6)
   809  	CMP	R8,$32		// optimize >= 32
   810  	MOVD	R8,R9
   811  	BLT	setup8a		// 8 byte moves only
   812  setup32a:
   813  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   814  	MOVD	R9,CTR
   815  
   816          // Special processing for 32 bytes or longer.
   817          // Loading this way is faster and correct as long as the
   818  	// doublewords being compared are equal. Once they
   819  	// are found unequal, reload them in proper byte order
   820  	// to determine greater or less than.
   821  loop32a:
   822  	MOVD	0(R5),R9	// doublewords to compare
   823  	MOVD	0(R6),R10	// get 4 doublewords
   824  	MOVD	8(R5),R14
   825  	MOVD	8(R6),R15
   826  	CMPU	R9,R10		// bytes equal?
   827  	MOVD	$0,R16		// set up for cmpne
   828  	BNE	cmpne		// further compare for LT or GT
   829  	MOVD	16(R5),R9	// get next pair of doublewords
   830  	MOVD	16(R6),R10
   831  	CMPU	R14,R15		// bytes match?
   832  	MOVD	$8,R16		// set up for cmpne
   833  	BNE	cmpne		// further compare for LT or GT
   834  	MOVD	24(R5),R14	// get next pair of doublewords
   835  	MOVD    24(R6),R15
   836  	CMPU	R9,R10		// bytes match?
   837  	MOVD	$16,R16		// set up for cmpne
   838  	BNE	cmpne		// further compare for LT or GT
   839  	MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
   840  	ADD	$32,R5		// bump up to next 32
   841  	ADD	$32,R6
   842  	CMPU    R14,R15		// bytes match?
   843  	BC	8,2,loop32a	// br ctr and cr
   844  	BNE	cmpne
   845  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   846  	BEQ	leftover	// and result is 0
   847  setup8a:
   848  	SRADCC	$3,R9,R9	// get the 8 byte count
   849  	BEQ	leftover	// shifted value is 0
   850  	MOVD	R9,CTR		// loop count for doublewords
   851  loop8:
   852  	MOVDBR	(R5+R0),R9	// doublewords to compare
   853  	MOVDBR	(R6+R0),R10	// LE compare order
   854  	ADD	$8,R5
   855  	ADD	$8,R6
   856  	CMPU	R9,R10		// match?
   857  	BC	8,2,loop8	// bt ctr <> 0 && cr
   858  	BGT	greater
   859  	BLT	less
   860  leftover:
   861  	ANDCC	$7,R8,R9	// check for leftover bytes
   862  	MOVD	R9,CTR		// save the ctr
   863  	BNE	simple		// leftover bytes
   864  	BC	12,10,equal	// test CR2 for length comparison
   865  	BC	12,8,less
   866  	BR	greater
   867  simplecheck:
   868  	CMP	R8,$0		// remaining compare length 0
   869  	BNE	simple		// do simple compare
   870  	BC	12,10,equal	// test CR2 for length comparison
   871  	BC	12,8,less	// 1st len < 2nd len, result less
   872  	BR	greater		// 1st len > 2nd len must be greater
   873  simple:
   874  	MOVBZ	0(R5), R9	// get byte from 1st operand
   875  	ADD	$1,R5
   876  	MOVBZ	0(R6), R10	// get byte from 2nd operand
   877  	ADD	$1,R6
   878  	CMPU	R9, R10
   879  	BC	8,2,simple	// bc ctr <> 0 && cr
   880  	BGT	greater		// 1st > 2nd
   881  	BLT	less		// 1st < 2nd
   882  	BC	12,10,equal	// test CR2 for length comparison
   883  	BC	12,9,greater	// 2nd len > 1st len
   884  	BR	less		// must be less
   885  cmpne:				// only here is not equal
   886  	MOVDBR	(R5+R16),R8	// reload in reverse order
   887  	MOVDBR	(R6+R16),R9
   888  	CMPU	R8,R9		// compare correct endianness
   889  	BGT	greater		// here only if NE
   890  less:
   891  	MOVD	$-1,R3
   892  	MOVD	R3,(R7)		// return value if A < B
   893  	RET
   894  equal:
   895  	MOVD	$0,(R7)		// return value if A == B
   896  	RET
   897  greater:
   898  	MOVD	$1,R3
   899  	MOVD	R3,(R7)		// return value if A > B
   900  	RET
   901  
   902  // Do an efficient memcmp for ppc64 (BE)
   903  // R3 = s1 len
   904  // R4 = s2 len
   905  // R5 = s1 addr
   906  // R6 = s2 addr
   907  // R7 = addr of return value
   908  TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
   909  	MOVD	R3,R8		// set up length
   910  	CMP	R3,R4,CR2	// unequal?
   911  	BC	12,8,setuplen	// BLT CR2
   912  	MOVD	R4,R8		// use R4 for comparison len
   913  setuplen:
   914  	MOVD	R8,CTR		// set up loop counter
   915  	CMP	R8,$8		// only optimize >=8
   916  	BLT	simplecheck
   917  	DCBT	(R5)		// cache hint
   918  	DCBT	(R6)
   919  	CMP	R8,$32		// optimize >= 32
   920  	MOVD	R8,R9
   921  	BLT	setup8a		// 8 byte moves only
   922  
   923  setup32a:
   924  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   925  	MOVD	R9,CTR
   926  loop32a:
   927  	MOVD	0(R5),R9	// doublewords to compare
   928  	MOVD	0(R6),R10	// get 4 doublewords
   929  	MOVD	8(R5),R14
   930  	MOVD	8(R6),R15
   931  	CMPU	R9,R10		// bytes equal?
   932  	BLT	less		// found to be less
   933  	BGT	greater		// found to be greater
   934  	MOVD	16(R5),R9	// get next pair of doublewords
   935  	MOVD	16(R6),R10
   936  	CMPU	R14,R15		// bytes match?
   937  	BLT	less		// found less
   938  	BGT	greater		// found greater
   939  	MOVD	24(R5),R14	// get next pair of doublewords
   940  	MOVD	24(R6),R15
   941  	CMPU	R9,R10		// bytes match?
   942  	BLT	less		// found to be less
   943  	BGT	greater		// found to be greater
   944  	ADD	$32,R5		// bump up to next 32
   945  	ADD	$32,R6
   946  	CMPU	R14,R15		// bytes match?
   947  	BC	8,2,loop32a	// br ctr and cr
   948  	BLT	less		// with BE, byte ordering is
   949  	BGT	greater		// good for compare
   950  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   951  	BEQ	leftover	// and result is 0
   952  setup8a:
   953  	SRADCC	$3,R9,R9	// get the 8 byte count
   954  	BEQ	leftover	// shifted value is 0
   955  	MOVD	R9,CTR		// loop count for doublewords
   956  loop8:
   957  	MOVD	(R5),R9
   958  	MOVD	(R6),R10
   959  	ADD	$8,R5
   960  	ADD	$8,R6
   961  	CMPU	R9,R10		// match?
   962  	BC	8,2,loop8	// bt ctr <> 0 && cr
   963  	BGT	greater
   964  	BLT	less
   965  leftover:
   966  	ANDCC	$7,R8,R9	// check for leftover bytes
   967  	MOVD	R9,CTR		// save the ctr
   968  	BNE	simple		// leftover bytes
   969  	BC	12,10,equal	// test CR2 for length comparison
   970  	BC	12,8,less
   971  	BR	greater
   972  simplecheck:
   973  	CMP	R8,$0		// remaining compare length 0
   974  	BNE	simple		// do simple compare
   975  	BC	12,10,equal	// test CR2 for length comparison
   976  	BC 	12,8,less	// 1st len < 2nd len, result less
   977  	BR	greater		// same len, must be equal
   978  simple:
   979  	MOVBZ	0(R5),R9	// get byte from 1st operand
   980  	ADD	$1,R5
   981  	MOVBZ	0(R6),R10	// get byte from 2nd operand
   982  	ADD	$1,R6
   983  	CMPU	R9,R10
   984  	BC	8,2,simple	// bc ctr <> 0 && cr
   985  	BGT	greater		// 1st > 2nd
   986  	BLT	less		// 1st < 2nd
   987  	BC	12,10,equal	// test CR2 for length comparison
   988  	BC	12,9,greater	// 2nd len > 1st len
   989  less:
   990  	MOVD	$-1,R3
   991  	MOVD    R3,(R7)		// return value if A < B
   992  	RET
   993  equal:
   994  	MOVD    $0,(R7)		// return value if A == B
   995  	RET
   996  greater:
   997  	MOVD	$1,R3
   998  	MOVD	R3,(R7)		// return value if A > B
   999  	RET
  1000  
  1001  // Do an efficient memequal for ppc64
  1002  // R3 = s1
  1003  // R4 = s2
  1004  // R5 = len
  1005  // R9 = return value
  1006  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
  1007  	MOVD    R5,CTR
  1008  	CMP     R5,$8		// only optimize >=8
  1009  	BLT     simplecheck
  1010  	DCBT	(R3)		// cache hint
  1011  	DCBT	(R4)
  1012  	CMP	R5,$32		// optimize >= 32
  1013  	MOVD	R5,R6		// needed if setup8a branch
  1014  	BLT	setup8a		// 8 byte moves only
  1015  setup32a:                       // 8 byte aligned, >= 32 bytes
  1016  	SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
  1017  	MOVD	R6,CTR
  1018  loop32a:
  1019  	MOVD    0(R3),R6        // doublewords to compare
  1020  	MOVD    0(R4),R7
  1021  	MOVD	8(R3),R8	//
  1022  	MOVD	8(R4),R9
  1023  	CMP     R6,R7           // bytes batch?
  1024  	BNE     noteq
  1025  	MOVD	16(R3),R6
  1026  	MOVD	16(R4),R7
  1027  	CMP     R8,R9		// bytes match?
  1028  	MOVD	24(R3),R8
  1029  	MOVD	24(R4),R9
  1030  	BNE     noteq
  1031  	CMP     R6,R7           // bytes match?
  1032  	BNE	noteq
  1033  	ADD     $32,R3		// bump up to next 32
  1034  	ADD     $32,R4
  1035  	CMP     R8,R9           // bytes match?
  1036  	BC      8,2,loop32a	// br ctr and cr
  1037  	BNE	noteq
  1038  	ANDCC	$24,R5,R6       // Any 8 byte chunks?
  1039  	BEQ	leftover	// and result is 0
  1040  setup8a:
  1041  	SRADCC  $3,R6,R6        // get the 8 byte count
  1042  	BEQ	leftover	// shifted value is 0
  1043  	MOVD    R6,CTR
  1044  loop8:
  1045  	MOVD    0(R3),R6        // doublewords to compare
  1046  	ADD	$8,R3
  1047  	MOVD    0(R4),R7
  1048  	ADD     $8,R4
  1049  	CMP     R6,R7           // match?
  1050  	BC	8,2,loop8	// bt ctr <> 0 && cr
  1051  	BNE     noteq
  1052  leftover:
  1053  	ANDCC   $7,R5,R6        // check for leftover bytes
  1054  	BEQ     equal
  1055  	MOVD    R6,CTR
  1056  	BR	simple
  1057  simplecheck:
  1058  	CMP	R5,$0
  1059  	BEQ	equal
  1060  simple:
  1061  	MOVBZ   0(R3), R6
  1062  	ADD	$1,R3
  1063  	MOVBZ   0(R4), R7
  1064  	ADD     $1,R4
  1065  	CMP     R6, R7
  1066  	BNE     noteq
  1067  	BC      8,2,simple
  1068  	BNE	noteq
  1069  	BR	equal
  1070  noteq:
  1071  	MOVD    $0, R9
  1072  	RET
  1073  equal:
  1074  	MOVD    $1, R9
  1075  	RET
  1076  
  1077  // eqstring tests whether two strings are equal.
  1078  // The compiler guarantees that strings passed
  1079  // to eqstring have equal length.
  1080  // See runtime_test.go:eqstring_generic for
  1081  // equivalent Go code.
  1082  TEXT runtime·eqstring(SB),NOSPLIT,$0-33
  1083  	MOVD    s1_base+0(FP), R3
  1084  	MOVD    s2_base+16(FP), R4
  1085  	MOVD    $1, R5
  1086  	MOVB    R5, ret+32(FP)
  1087  	CMP     R3, R4
  1088  	BNE     2(PC)
  1089  	RET
  1090  	MOVD    s1_len+8(FP), R5
  1091  	BL      runtime·memeqbody(SB)
  1092  	MOVB    R9, ret+32(FP)
  1093  	RET
  1094  
  1095  TEXT bytes·Equal(SB),NOSPLIT,$0-49
  1096  	MOVD	a_len+8(FP), R4
  1097  	MOVD	b_len+32(FP), R5
  1098  	CMP	R5, R4		// unequal lengths are not equal
  1099  	BNE	noteq
  1100  	MOVD	a+0(FP), R3
  1101  	MOVD	b+24(FP), R4
  1102  	BL	runtime·memeqbody(SB)
  1103  
  1104  	MOVBZ	R9,ret+48(FP)
  1105  	RET
  1106  
  1107  noteq:
  1108  	MOVBZ	$0,ret+48(FP)
  1109  	RET
  1110  
  1111  equal:
  1112  	MOVD	$1,R3
  1113  	MOVBZ	R3,ret+48(FP)
  1114  	RET
  1115  
  1116  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
  1117  	MOVD	s+0(FP), R3		// R3 = byte array pointer
  1118  	MOVD	s_len+8(FP), R4		// R4 = length
  1119  	MOVBZ	c+24(FP), R5		// R5 = byte
  1120  	MOVD	$ret+32(FP), R14	// R14 = &ret
  1121  	BR	runtime·indexbytebody<>(SB)
  1122  
  1123  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
  1124  	MOVD	s+0(FP), R3	  // R3 = string
  1125  	MOVD	s_len+8(FP), R4	  // R4 = length
  1126  	MOVBZ	c+16(FP), R5	  // R5 = byte
  1127  	MOVD	$ret+24(FP), R14  // R14 = &ret
  1128  	BR	runtime·indexbytebody<>(SB)
  1129  
  1130  TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
  1131  	DCBT	(R3)		// Prepare cache line.
  1132  	MOVD	R3,R10		// Save base address for calculating the index later.
  1133  	RLDICR	$0,R3,$60,R8	// Align address to doubleword boundary in R8.
  1134  	RLDIMI	$8,R5,$48,R5	// Replicating the byte across the register.
  1135  
  1136  	// Calculate last acceptable address and check for possible overflow
  1137  	// using a saturated add.
  1138  	// Overflows set last acceptable address to 0xffffffffffffffff.
  1139  	ADD	R4,R3,R7
  1140  	SUBC	R3,R7,R6
  1141  	SUBE	R0,R0,R9
  1142  	MOVW	R9,R6
  1143  	OR	R6,R7,R7
  1144  
  1145  	RLDIMI	$16,R5,$32,R5
  1146  	CMPU	R4,$32		// Check if it's a small string (<32 bytes). Those will be processed differently.
  1147  	MOVD	$-1,R9
  1148  	WORD $0x54661EB8	// Calculate padding in R6 (rlwinm r6,r3,3,26,28).
  1149  	RLDIMI	$32,R5,$0,R5
  1150  	ADD	$-1,R7,R7
  1151  #ifdef GOARCH_ppc64le
  1152  	SLD	R6,R9,R9	// Prepare mask for Little Endian
  1153  #else
  1154  	SRD	R6,R9,R9	// Same for Big Endian
  1155  #endif
  1156  	BLE	small_string	// Jump to the small string case if it's <32 bytes.
  1157  
  1158  	// Case for length >32 bytes
  1159  	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1160  	CMPB	R12,R5,R3	// Check for a match.
  1161  	AND	R9,R3,R3	// Mask bytes below s_base
  1162  	RLDICL	$0,R7,$61,R4	// length-1
  1163  	RLDICR	$0,R7,$60,R7	// Last doubleword in R7
  1164  	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation
  1165  	BNE	CR7,done
  1166  
  1167  	// Check for doubleword alignment and jump to the loop setup if aligned.
  1168  	MOVFL	R8,CR7
  1169  	BC	12,28,loop_setup
  1170  
  1171  	// Not aligned, so handle the second doubleword
  1172  	MOVDU	8(R8),R12
  1173  	CMPB	R12,R5,R3
  1174  	CMPU	R3,$0,CR7
  1175  	BNE	CR7,done
  1176  
  1177  loop_setup:
  1178  	// We are now aligned to a 16-byte boundary. We will load two doublewords
  1179  	// per loop iteration. The last doubleword is in R7, so our loop counter
  1180  	// starts at (R7-R8)/16.
  1181  	SUB	R8,R7,R6
  1182  	SRD	$4,R6,R6
  1183  	MOVD	R6,CTR
  1184  
  1185  	// Note: when we have an align directive, align this loop to 32 bytes so
  1186  	// it fits in a single icache sector.
  1187  loop:
  1188  	// Load two doublewords, then compare and merge in a single register. We
  1189  	// will check two doublewords per iteration, then find out which of them
  1190  	// contains the byte later. This speeds up the search.
  1191  	MOVD	8(R8),R12
  1192  	MOVDU	16(R8),R11
  1193  	CMPB	R12,R5,R3
  1194  	CMPB	R11,R5,R9
  1195  	OR	R3,R9,R6
  1196  	CMPU	R6,$0,CR7
  1197  	BNE	CR7,found
  1198  	BC	16,0,loop
  1199  
  1200  	// Counter zeroed, but we may have another doubleword to read
  1201  	CMPU	R8,R7
  1202  	BEQ	notfound
  1203  
  1204  	MOVDU	8(R8),R12
  1205  	CMPB	R12,R5,R3
  1206  	CMPU	R3,$0,CR6
  1207  	BNE	CR6,done
  1208  
  1209  notfound:
  1210  	MOVD	$-1,R3
  1211  	MOVD	R3,(R14)
  1212  	RET
  1213  
  1214  found:
  1215  	// One of the doublewords from the loop contains the byte we are looking
  1216  	// for. Check the first doubleword and adjust the address if found.
  1217  	CMPU	R3,$0,CR6
  1218  	ADD	$-8,R8,R8
  1219  	BNE	CR6,done
  1220  
  1221  	// Not found, so it must be in the second doubleword of the merged pair.
  1222  	MOVD	R9,R3
  1223  	ADD	$8,R8,R8
  1224  
  1225  done:
  1226  	// At this point, R3 has 0xFF in the same position as the byte we are
  1227  	// looking for in the doubleword. Use that to calculate the exact index
  1228  	// of the byte.
  1229  #ifdef GOARCH_ppc64le
  1230  	ADD	$-1,R3,R11
  1231  	ANDN	R3,R11,R11
  1232  	POPCNTD	R11,R11		// Count trailing zeros (Little Endian).
  1233  #else
  1234  	CNTLZD	R3,R11		// Count leading zeros (Big Endian).
  1235  #endif
  1236  	CMPU	R8,R7		// Check if we are at the last doubleword.
  1237  	SRD	$3,R11		// Convert trailing zeros to bytes.
  1238  	ADD	R11,R8,R3
  1239  	CMPU	R11,R4,CR7	// If at the last doubleword, check the byte offset.
  1240  	BNE	return
  1241  	BLE	CR7,return
  1242  	MOVD	$-1,R3
  1243  	MOVD	R3,(R14)
  1244  	RET
  1245  
  1246  return:
  1247  	SUB	R10,R3		// Calculate index.
  1248  	MOVD	R3,(R14)
  1249  	RET
  1250  
  1251  small_string:
  1252  	// We unroll this loop for better performance.
  1253  	CMPU	R4,$0		// Check for length=0
  1254  	BEQ	notfound
  1255  
  1256  	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1257  	CMPB	R12,R5,R3	// Check for a match.
  1258  	AND	R9,R3,R3	// Mask bytes below s_base.
  1259  	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation.
  1260  	RLDICL	$0,R7,$61,R4	// length-1
  1261  	RLDICR	$0,R7,$60,R7	// Last doubleword in R7.
  1262          CMPU	R8,R7
  1263  	BNE	CR7,done
  1264  	BEQ	notfound	// Hit length.
  1265  
  1266  	MOVDU	8(R8),R12
  1267  	CMPB	R12,R5,R3
  1268  	CMPU	R3,$0,CR6
  1269  	CMPU	R8,R7
  1270  	BNE	CR6,done
  1271  	BEQ	notfound
  1272  
  1273  	MOVDU	8(R8),R12
  1274  	CMPB	R12,R5,R3
  1275  	CMPU	R3,$0,CR6
  1276  	CMPU	R8,R7
  1277  	BNE	CR6,done
  1278  	BEQ	notfound
  1279  
  1280  	MOVDU	8(R8),R12
  1281  	CMPB	R12,R5,R3
  1282  	CMPU	R3,$0,CR6
  1283  	CMPU	R8,R7
  1284  	BNE	CR6,done
  1285  	BEQ	notfound
  1286  
  1287  	MOVDU	8(R8),R12
  1288  	CMPB	R12,R5,R3
  1289  	CMPU	R3,$0,CR6
  1290  	CMPU	R8,R7
  1291  	BNE	CR6,done
  1292  	BR	notfound
  1293  
  1294  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1295  	MOVD	s1_base+0(FP), R5
  1296  	MOVD	s1_len+8(FP), R3
  1297  	MOVD	s2_base+16(FP), R6
  1298  	MOVD	s2_len+24(FP), R4
  1299  	MOVD	$ret+32(FP), R7
  1300  #ifdef	GOARCH_ppc64le
  1301  	BR	cmpbodyLE<>(SB)
  1302  #else
  1303  	BR      cmpbodyBE<>(SB)
  1304  #endif
  1305  
  1306  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1307  	MOVD	s1+0(FP), R5
  1308  	MOVD	s1+8(FP), R3
  1309  	MOVD	s2+24(FP), R6
  1310  	MOVD	s2+32(FP), R4
  1311  	MOVD	$ret+48(FP), R7
  1312  #ifdef	GOARCH_ppc64le
  1313  	BR	cmpbodyLE<>(SB)
  1314  #else
  1315  	BR      cmpbodyBE<>(SB)
  1316  #endif
  1317  
  1318  TEXT runtime·return0(SB), NOSPLIT, $0
  1319  	MOVW	$0, R3
  1320  	RET
  1321  
  1322  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1323  // Must obey the gcc calling convention.
  1324  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
  1325  	// g (R30) and R31 are callee-save in the C ABI, so save them
  1326  	MOVD	g, R4
  1327  	MOVD	R31, R5
  1328  	MOVD	LR, R6
  1329  
  1330  	BL	runtime·load_g(SB)	// clobbers g (R30), R31
  1331  	MOVD	g_m(g), R3
  1332  	MOVD	m_curg(R3), R3
  1333  	MOVD	(g_stack+stack_hi)(R3), R3
  1334  
  1335  	MOVD	R4, g
  1336  	MOVD	R5, R31
  1337  	MOVD	R6, LR
  1338  	RET
  1339  
  1340  // The top-most function running on a goroutine
  1341  // returns to goexit+PCQuantum.
  1342  //
  1343  // When dynamically linking Go, it can be returned to from a function
  1344  // implemented in a different module and so needs to reload the TOC pointer
  1345  // from the stack (although this function declares that it does not set up x-a
  1346  // frame, newproc1 does in fact allocate one for goexit and saves the TOC
  1347  // pointer in the correct place).
  1348  // goexit+_PCQuantum is halfway through the usual global entry point prologue
  1349  // that derives r2 from r12 which is a bit silly, but not harmful.
  1350  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
  1351  	MOVD	24(R1), R2
  1352  	BL	runtime·goexit1(SB)	// does not return
  1353  	// traceback from goexit1 must hit code range of goexit
  1354  	MOVD	R0, R0	// NOP
  1355  
  1356  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
  1357  	RET
  1358  
  1359  // prepGoExitFrame saves the current TOC pointer (i.e. the TOC pointer for the
  1360  // module containing runtime) to the frame that goexit will execute in when
  1361  // the goroutine exits. It's implemented in assembly mainly because that's the
  1362  // easiest way to get access to R2.
  1363  TEXT runtime·prepGoExitFrame(SB),NOSPLIT,$0-8
  1364        MOVD    sp+0(FP), R3
  1365        MOVD    R2, 24(R3)
  1366        RET
  1367  
  1368  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1369  	ADD	$-8, R1
  1370  	MOVD	R31, 0(R1)
  1371  	MOVD	runtime·lastmoduledatap(SB), R4
  1372  	MOVD	R3, moduledata_next(R4)
  1373  	MOVD	R3, runtime·lastmoduledatap(SB)
  1374  	MOVD	0(R1), R31
  1375  	ADD	$8, R1
  1376  	RET
  1377  
  1378  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1379  	MOVW	$1, R3
  1380  	MOVB	R3, ret+0(FP)
  1381  	RET