github.com/s1s1ty/go@v0.0.0-20180207192209-104445e3140f/src/runtime/asm_ppc64x.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ppc64 ppc64le
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  #include "asm_ppc64x.h"
    12  
    13  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    14  	// R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer
    15  
    16  	// initialize essential registers
    17  	BL	runtime·reginit(SB)
    18  
    19  	SUB	$(FIXED_FRAME+16), R1
    20  	MOVD	R2, 24(R1)		// stash the TOC pointer away again now we've created a new frame
    21  	MOVW	R3, FIXED_FRAME+0(R1)	// argc
    22  	MOVD	R4, FIXED_FRAME+8(R1)	// argv
    23  
    24  	// create istack out of the given (operating system) stack.
    25  	// _cgo_init may update stackguard.
    26  	MOVD	$runtime·g0(SB), g
    27  	MOVD	$(-64*1024), R31
    28  	ADD	R31, R1, R3
    29  	MOVD	R3, g_stackguard0(g)
    30  	MOVD	R3, g_stackguard1(g)
    31  	MOVD	R3, (g_stack+stack_lo)(g)
    32  	MOVD	R1, (g_stack+stack_hi)(g)
    33  
    34  	// if there is a _cgo_init, call it using the gcc ABI.
    35  	MOVD	_cgo_init(SB), R12
    36  	CMP	R0, R12
    37  	BEQ	nocgo
    38  	MOVD	R12, CTR		// r12 = "global function entry point"
    39  	MOVD	R13, R5			// arg 2: TLS base pointer
    40  	MOVD	$setg_gcc<>(SB), R4 	// arg 1: setg
    41  	MOVD	g, R3			// arg 0: G
    42  	// C functions expect 32 bytes of space on caller stack frame
    43  	// and a 16-byte aligned R1
    44  	MOVD	R1, R14			// save current stack
    45  	SUB	$32, R1			// reserve 32 bytes
    46  	RLDCR	$0, R1, $~15, R1	// 16-byte align
    47  	BL	(CTR)			// may clobber R0, R3-R12
    48  	MOVD	R14, R1			// restore stack
    49  	MOVD	24(R1), R2
    50  	XOR	R0, R0			// fix R0
    51  
    52  nocgo:
    53  	// update stackguard after _cgo_init
    54  	MOVD	(g_stack+stack_lo)(g), R3
    55  	ADD	$const__StackGuard, R3
    56  	MOVD	R3, g_stackguard0(g)
    57  	MOVD	R3, g_stackguard1(g)
    58  
    59  	// set the per-goroutine and per-mach "registers"
    60  	MOVD	$runtime·m0(SB), R3
    61  
    62  	// save m->g0 = g0
    63  	MOVD	g, m_g0(R3)
    64  	// save m0 to g0->m
    65  	MOVD	R3, g_m(g)
    66  
    67  	BL	runtime·check(SB)
    68  
    69  	// args are already prepared
    70  	BL	runtime·args(SB)
    71  	BL	runtime·osinit(SB)
    72  	BL	runtime·schedinit(SB)
    73  
    74  	// create a new goroutine to start program
    75  	MOVD	$runtime·mainPC(SB), R3		// entry
    76  	MOVDU	R3, -8(R1)
    77  	MOVDU	R0, -8(R1)
    78  	MOVDU	R0, -8(R1)
    79  	MOVDU	R0, -8(R1)
    80  	MOVDU	R0, -8(R1)
    81  	MOVDU	R0, -8(R1)
    82  	BL	runtime·newproc(SB)
    83  	ADD	$(16+FIXED_FRAME), R1
    84  
    85  	// start this M
    86  	BL	runtime·mstart(SB)
    87  
    88  	MOVD	R0, 0(R0)
    89  	RET
    90  
    91  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    92  GLOBL	runtime·mainPC(SB),RODATA,$8
    93  
    94  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    95  	MOVD	R0, 0(R0) // TODO: TD
    96  	RET
    97  
    98  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    99  	RET
   100  
   101  TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0
   102  	// crosscall_ppc64 and crosscall2 need to reginit, but can't
   103  	// get at the 'runtime.reginit' symbol.
   104  	BR	runtime·reginit(SB)
   105  
   106  TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0
   107  	// set R0 to zero, it's expected by the toolchain
   108  	XOR R0, R0
   109  	RET
   110  
   111  /*
   112   *  go-routine
   113   */
   114  
   115  // void gosave(Gobuf*)
   116  // save state in Gobuf; setjmp
   117  TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
   118  	MOVD	buf+0(FP), R3
   119  	MOVD	R1, gobuf_sp(R3)
   120  	MOVD	LR, R31
   121  	MOVD	R31, gobuf_pc(R3)
   122  	MOVD	g, gobuf_g(R3)
   123  	MOVD	R0, gobuf_lr(R3)
   124  	MOVD	R0, gobuf_ret(R3)
   125  	// Assert ctxt is zero. See func save.
   126  	MOVD	gobuf_ctxt(R3), R3
   127  	CMP	R0, R3
   128  	BEQ	2(PC)
   129  	BL	runtime·badctxt(SB)
   130  	RET
   131  
   132  // void gogo(Gobuf*)
   133  // restore state from Gobuf; longjmp
   134  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   135  	MOVD	buf+0(FP), R5
   136  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   137  	BL	runtime·save_g(SB)
   138  
   139  	MOVD	0(g), R4
   140  	MOVD	gobuf_sp(R5), R1
   141  	MOVD	gobuf_lr(R5), R31
   142  	MOVD	R31, LR
   143  	MOVD	gobuf_ret(R5), R3
   144  	MOVD	gobuf_ctxt(R5), R11
   145  	MOVD	R0, gobuf_sp(R5)
   146  	MOVD	R0, gobuf_ret(R5)
   147  	MOVD	R0, gobuf_lr(R5)
   148  	MOVD	R0, gobuf_ctxt(R5)
   149  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   150  	MOVD	gobuf_pc(R5), R12
   151  	MOVD	R12, CTR
   152  	BR	(CTR)
   153  
   154  // void mcall(fn func(*g))
   155  // Switch to m->g0's stack, call fn(g).
   156  // Fn must never return. It should gogo(&g->sched)
   157  // to keep running g.
   158  TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
   159  	// Save caller state in g->sched
   160  	MOVD	R1, (g_sched+gobuf_sp)(g)
   161  	MOVD	LR, R31
   162  	MOVD	R31, (g_sched+gobuf_pc)(g)
   163  	MOVD	R0, (g_sched+gobuf_lr)(g)
   164  	MOVD	g, (g_sched+gobuf_g)(g)
   165  
   166  	// Switch to m->g0 & its stack, call fn.
   167  	MOVD	g, R3
   168  	MOVD	g_m(g), R8
   169  	MOVD	m_g0(R8), g
   170  	BL	runtime·save_g(SB)
   171  	CMP	g, R3
   172  	BNE	2(PC)
   173  	BR	runtime·badmcall(SB)
   174  	MOVD	fn+0(FP), R11			// context
   175  	MOVD	0(R11), R12			// code pointer
   176  	MOVD	R12, CTR
   177  	MOVD	(g_sched+gobuf_sp)(g), R1	// sp = m->g0->sched.sp
   178  	MOVDU	R3, -8(R1)
   179  	MOVDU	R0, -8(R1)
   180  	MOVDU	R0, -8(R1)
   181  	MOVDU	R0, -8(R1)
   182  	MOVDU	R0, -8(R1)
   183  	BL	(CTR)
   184  	MOVD	24(R1), R2
   185  	BR	runtime·badmcall2(SB)
   186  
   187  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   188  // of the G stack. We need to distinguish the routine that
   189  // lives at the bottom of the G stack from the one that lives
   190  // at the top of the system stack because the one at the top of
   191  // the system stack terminates the stack walk (see topofstack()).
   192  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   193  	// We have several undefs here so that 16 bytes past
   194  	// $runtime·systemstack_switch lies within them whether or not the
   195          // instructions that derive r2 from r12 are there.
   196  	UNDEF
   197  	UNDEF
   198  	UNDEF
   199  	BL	(LR)	// make sure this function is not leaf
   200  	RET
   201  
   202  // func systemstack(fn func())
   203  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   204  	MOVD	fn+0(FP), R3	// R3 = fn
   205  	MOVD	R3, R11		// context
   206  	MOVD	g_m(g), R4	// R4 = m
   207  
   208  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   209  	CMP	g, R5
   210  	BEQ	noswitch
   211  
   212  	MOVD	m_g0(R4), R5	// R5 = g0
   213  	CMP	g, R5
   214  	BEQ	noswitch
   215  
   216  	MOVD	m_curg(R4), R6
   217  	CMP	g, R6
   218  	BEQ	switch
   219  
   220  	// Bad: g is not gsignal, not g0, not curg. What is it?
   221  	// Hide call from linker nosplit analysis.
   222  	MOVD	$runtime·badsystemstack(SB), R12
   223  	MOVD	R12, CTR
   224  	BL	(CTR)
   225  
   226  switch:
   227  	// save our state in g->sched. Pretend to
   228  	// be systemstack_switch if the G stack is scanned.
   229  	MOVD	$runtime·systemstack_switch(SB), R6
   230  	ADD     $16, R6 // get past prologue (including r2-setting instructions when they're there)
   231  	MOVD	R6, (g_sched+gobuf_pc)(g)
   232  	MOVD	R1, (g_sched+gobuf_sp)(g)
   233  	MOVD	R0, (g_sched+gobuf_lr)(g)
   234  	MOVD	g, (g_sched+gobuf_g)(g)
   235  
   236  	// switch to g0
   237  	MOVD	R5, g
   238  	BL	runtime·save_g(SB)
   239  	MOVD	(g_sched+gobuf_sp)(g), R3
   240  	// make it look like mstart called systemstack on g0, to stop traceback
   241  	SUB	$FIXED_FRAME, R3
   242  	MOVD	$runtime·mstart(SB), R4
   243  	MOVD	R4, 0(R3)
   244  	MOVD	R3, R1
   245  
   246  	// call target function
   247  	MOVD	0(R11), R12	// code pointer
   248  	MOVD	R12, CTR
   249  	BL	(CTR)
   250  
   251  	// restore TOC pointer. It seems unlikely that we will use systemstack
   252  	// to call a function defined in another module, but the results of
   253  	// doing so would be so confusing that it's worth doing this.
   254  	MOVD	g_m(g), R3
   255  	MOVD	m_curg(R3), g
   256  	MOVD	(g_sched+gobuf_sp)(g), R3
   257  	MOVD	24(R3), R2
   258  	// switch back to g
   259  	MOVD	g_m(g), R3
   260  	MOVD	m_curg(R3), g
   261  	BL	runtime·save_g(SB)
   262  	MOVD	(g_sched+gobuf_sp)(g), R1
   263  	MOVD	R0, (g_sched+gobuf_sp)(g)
   264  	RET
   265  
   266  noswitch:
   267  	// already on m stack, just call directly
   268  	// On other arches we do a tail call here, but it appears to be
   269  	// impossible to tail call a function pointer in shared mode on
   270  	// ppc64 because the caller is responsible for restoring the TOC.
   271  	MOVD	0(R11), R12	// code pointer
   272  	MOVD	R12, CTR
   273  	BL	(CTR)
   274  	MOVD	24(R1), R2
   275  	RET
   276  
   277  /*
   278   * support for morestack
   279   */
   280  
   281  // Called during function prolog when more stack is needed.
   282  // Caller has already loaded:
   283  // R3: framesize, R4: argsize, R5: LR
   284  //
   285  // The traceback routines see morestack on a g0 as being
   286  // the top of a stack (for example, morestack calling newstack
   287  // calling the scheduler calling newm calling gc), so we must
   288  // record an argument size. For that purpose, it has no arguments.
   289  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   290  	// Cannot grow scheduler stack (m->g0).
   291  	MOVD	g_m(g), R7
   292  	MOVD	m_g0(R7), R8
   293  	CMP	g, R8
   294  	BNE	3(PC)
   295  	BL	runtime·badmorestackg0(SB)
   296  	BL	runtime·abort(SB)
   297  
   298  	// Cannot grow signal stack (m->gsignal).
   299  	MOVD	m_gsignal(R7), R8
   300  	CMP	g, R8
   301  	BNE	3(PC)
   302  	BL	runtime·badmorestackgsignal(SB)
   303  	BL	runtime·abort(SB)
   304  
   305  	// Called from f.
   306  	// Set g->sched to context in f.
   307  	MOVD	R1, (g_sched+gobuf_sp)(g)
   308  	MOVD	LR, R8
   309  	MOVD	R8, (g_sched+gobuf_pc)(g)
   310  	MOVD	R5, (g_sched+gobuf_lr)(g)
   311  	MOVD	R11, (g_sched+gobuf_ctxt)(g)
   312  
   313  	// Called from f.
   314  	// Set m->morebuf to f's caller.
   315  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   316  	MOVD	R1, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   317  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   318  
   319  	// Call newstack on m->g0's stack.
   320  	MOVD	m_g0(R7), g
   321  	BL	runtime·save_g(SB)
   322  	MOVD	(g_sched+gobuf_sp)(g), R1
   323  	MOVDU   R0, -(FIXED_FRAME+0)(R1)	// create a call frame on g0
   324  	BL	runtime·newstack(SB)
   325  
   326  	// Not reached, but make sure the return PC from the call to newstack
   327  	// is still in this function, and not the beginning of the next.
   328  	UNDEF
   329  
   330  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   331  	MOVD	R0, R11
   332  	BR	runtime·morestack(SB)
   333  
   334  // reflectcall: call a function with the given argument list
   335  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   336  // we don't have variable-sized frames, so we use a small number
   337  // of constant-sized-frame functions to encode a few bits of size in the pc.
   338  // Caution: ugly multiline assembly macros in your future!
   339  
   340  #define DISPATCH(NAME,MAXSIZE)		\
   341  	MOVD	$MAXSIZE, R31;		\
   342  	CMP	R3, R31;		\
   343  	BGT	4(PC);			\
   344  	MOVD	$NAME(SB), R12;		\
   345  	MOVD	R12, CTR;		\
   346  	BR	(CTR)
   347  // Note: can't just "BR NAME(SB)" - bad inlining results.
   348  
   349  TEXT reflect·call(SB), NOSPLIT, $0-0
   350  	BR	·reflectcall(SB)
   351  
   352  TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
   353  	MOVWZ argsize+24(FP), R3
   354  	DISPATCH(runtime·call32, 32)
   355  	DISPATCH(runtime·call64, 64)
   356  	DISPATCH(runtime·call128, 128)
   357  	DISPATCH(runtime·call256, 256)
   358  	DISPATCH(runtime·call512, 512)
   359  	DISPATCH(runtime·call1024, 1024)
   360  	DISPATCH(runtime·call2048, 2048)
   361  	DISPATCH(runtime·call4096, 4096)
   362  	DISPATCH(runtime·call8192, 8192)
   363  	DISPATCH(runtime·call16384, 16384)
   364  	DISPATCH(runtime·call32768, 32768)
   365  	DISPATCH(runtime·call65536, 65536)
   366  	DISPATCH(runtime·call131072, 131072)
   367  	DISPATCH(runtime·call262144, 262144)
   368  	DISPATCH(runtime·call524288, 524288)
   369  	DISPATCH(runtime·call1048576, 1048576)
   370  	DISPATCH(runtime·call2097152, 2097152)
   371  	DISPATCH(runtime·call4194304, 4194304)
   372  	DISPATCH(runtime·call8388608, 8388608)
   373  	DISPATCH(runtime·call16777216, 16777216)
   374  	DISPATCH(runtime·call33554432, 33554432)
   375  	DISPATCH(runtime·call67108864, 67108864)
   376  	DISPATCH(runtime·call134217728, 134217728)
   377  	DISPATCH(runtime·call268435456, 268435456)
   378  	DISPATCH(runtime·call536870912, 536870912)
   379  	DISPATCH(runtime·call1073741824, 1073741824)
   380  	MOVD	$runtime·badreflectcall(SB), R12
   381  	MOVD	R12, CTR
   382  	BR	(CTR)
   383  
   384  #define CALLFN(NAME,MAXSIZE)			\
   385  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   386  	NO_LOCAL_POINTERS;			\
   387  	/* copy arguments to stack */		\
   388  	MOVD	arg+16(FP), R3;			\
   389  	MOVWZ	argsize+24(FP), R4;			\
   390  	MOVD	R1, R5;				\
   391  	ADD	$(FIXED_FRAME-1), R5;			\
   392  	SUB	$1, R3;				\
   393  	ADD	R5, R4;				\
   394  	CMP	R5, R4;				\
   395  	BEQ	4(PC);				\
   396  	MOVBZU	1(R3), R6;			\
   397  	MOVBZU	R6, 1(R5);			\
   398  	BR	-4(PC);				\
   399  	/* call function */			\
   400  	MOVD	f+8(FP), R11;			\
   401  	MOVD	(R11), R12;			\
   402  	MOVD	R12, CTR;			\
   403  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   404  	BL	(CTR);				\
   405  	MOVD	24(R1), R2;			\
   406  	/* copy return values back */		\
   407  	MOVD	argtype+0(FP), R7;		\
   408  	MOVD	arg+16(FP), R3;			\
   409  	MOVWZ	n+24(FP), R4;			\
   410  	MOVWZ	retoffset+28(FP), R6;		\
   411  	ADD	$FIXED_FRAME, R1, R5;		\
   412  	ADD	R6, R5; 			\
   413  	ADD	R6, R3;				\
   414  	SUB	R6, R4;				\
   415  	BL	callRet<>(SB);			\
   416  	RET
   417  
   418  // callRet copies return values back at the end of call*. This is a
   419  // separate function so it can allocate stack space for the arguments
   420  // to reflectcallmove. It does not follow the Go ABI; it expects its
   421  // arguments in registers.
   422  TEXT callRet<>(SB), NOSPLIT, $32-0
   423  	MOVD	R7, FIXED_FRAME+0(R1)
   424  	MOVD	R3, FIXED_FRAME+8(R1)
   425  	MOVD	R5, FIXED_FRAME+16(R1)
   426  	MOVD	R4, FIXED_FRAME+24(R1)
   427  	BL	runtime·reflectcallmove(SB)
   428  	RET
   429  
   430  CALLFN(·call32, 32)
   431  CALLFN(·call64, 64)
   432  CALLFN(·call128, 128)
   433  CALLFN(·call256, 256)
   434  CALLFN(·call512, 512)
   435  CALLFN(·call1024, 1024)
   436  CALLFN(·call2048, 2048)
   437  CALLFN(·call4096, 4096)
   438  CALLFN(·call8192, 8192)
   439  CALLFN(·call16384, 16384)
   440  CALLFN(·call32768, 32768)
   441  CALLFN(·call65536, 65536)
   442  CALLFN(·call131072, 131072)
   443  CALLFN(·call262144, 262144)
   444  CALLFN(·call524288, 524288)
   445  CALLFN(·call1048576, 1048576)
   446  CALLFN(·call2097152, 2097152)
   447  CALLFN(·call4194304, 4194304)
   448  CALLFN(·call8388608, 8388608)
   449  CALLFN(·call16777216, 16777216)
   450  CALLFN(·call33554432, 33554432)
   451  CALLFN(·call67108864, 67108864)
   452  CALLFN(·call134217728, 134217728)
   453  CALLFN(·call268435456, 268435456)
   454  CALLFN(·call536870912, 536870912)
   455  CALLFN(·call1073741824, 1073741824)
   456  
   457  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   458  	RET
   459  
   460  // void jmpdefer(fv, sp);
   461  // called from deferreturn.
   462  // 1. grab stored LR for caller
   463  // 2. sub 8 bytes to get back to either nop or toc reload before deferreturn
   464  // 3. BR to fn
   465  // When dynamically linking Go, it is not sufficient to rewind to the BL
   466  // deferreturn -- we might be jumping between modules and so we need to reset
   467  // the TOC pointer in r2. To do this, codegen inserts MOVD 24(R1), R2 *before*
   468  // the BL deferreturn and jmpdefer rewinds to that.
   469  TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
   470  	MOVD	0(R1), R31
   471  	SUB     $8, R31
   472  	MOVD	R31, LR
   473  
   474  	MOVD	fv+0(FP), R11
   475  	MOVD	argp+8(FP), R1
   476  	SUB	$FIXED_FRAME, R1
   477  	MOVD	0(R11), R12
   478  	MOVD	R12, CTR
   479  	BR	(CTR)
   480  
   481  // Save state of caller into g->sched. Smashes R31.
   482  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   483  	MOVD	LR, R31
   484  	MOVD	R31, (g_sched+gobuf_pc)(g)
   485  	MOVD	R1, (g_sched+gobuf_sp)(g)
   486  	MOVD	R0, (g_sched+gobuf_lr)(g)
   487  	MOVD	R0, (g_sched+gobuf_ret)(g)
   488  	// Assert ctxt is zero. See func save.
   489  	MOVD	(g_sched+gobuf_ctxt)(g), R31
   490  	CMP	R0, R31
   491  	BEQ	2(PC)
   492  	BL	runtime·badctxt(SB)
   493  	RET
   494  
   495  // func asmcgocall(fn, arg unsafe.Pointer) int32
   496  // Call fn(arg) on the scheduler stack,
   497  // aligned appropriately for the gcc ABI.
   498  // See cgocall.go for more details.
   499  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   500  	MOVD	fn+0(FP), R3
   501  	MOVD	arg+8(FP), R4
   502  
   503  	MOVD	R1, R7		// save original stack pointer
   504  	MOVD	g, R5
   505  
   506  	// Figure out if we need to switch to m->g0 stack.
   507  	// We get called to create new OS threads too, and those
   508  	// come in on the m->g0 stack already.
   509  	MOVD	g_m(g), R6
   510  	MOVD	m_g0(R6), R6
   511  	CMP	R6, g
   512  	BEQ	g0
   513  	BL	gosave<>(SB)
   514  	MOVD	R6, g
   515  	BL	runtime·save_g(SB)
   516  	MOVD	(g_sched+gobuf_sp)(g), R1
   517  
   518  	// Now on a scheduling stack (a pthread-created stack).
   519  g0:
   520  	// Save room for two of our pointers, plus 32 bytes of callee
   521  	// save area that lives on the caller stack.
   522  	SUB	$48, R1
   523  	RLDCR	$0, R1, $~15, R1	// 16-byte alignment for gcc ABI
   524  	MOVD	R5, 40(R1)	// save old g on stack
   525  	MOVD	(g_stack+stack_hi)(R5), R5
   526  	SUB	R7, R5
   527  	MOVD	R5, 32(R1)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   528  	MOVD	R0, 0(R1)	// clear back chain pointer (TODO can we give it real back trace information?)
   529  	// This is a "global call", so put the global entry point in r12
   530  	MOVD	R3, R12
   531  	MOVD	R12, CTR
   532  	MOVD	R4, R3		// arg in r3
   533  	BL	(CTR)
   534  
   535  	// C code can clobber R0, so set it back to 0.  F27-F31 are
   536  	// callee save, so we don't need to recover those.
   537  	XOR	R0, R0
   538  	// Restore g, stack pointer, toc pointer.
   539  	// R3 is errno, so don't touch it
   540  	MOVD	40(R1), g
   541  	MOVD    (g_stack+stack_hi)(g), R5
   542  	MOVD    32(R1), R6
   543  	SUB     R6, R5
   544  	MOVD    24(R5), R2
   545  	BL	runtime·save_g(SB)
   546  	MOVD	(g_stack+stack_hi)(g), R5
   547  	MOVD	32(R1), R6
   548  	SUB	R6, R5
   549  	MOVD	R5, R1
   550  
   551  	MOVW	R3, ret+16(FP)
   552  	RET
   553  
   554  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   555  // Turn the fn into a Go func (by taking its address) and call
   556  // cgocallback_gofunc.
   557  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   558  	MOVD	$fn+0(FP), R3
   559  	MOVD	R3, FIXED_FRAME+0(R1)
   560  	MOVD	frame+8(FP), R3
   561  	MOVD	R3, FIXED_FRAME+8(R1)
   562  	MOVD	framesize+16(FP), R3
   563  	MOVD	R3, FIXED_FRAME+16(R1)
   564  	MOVD	ctxt+24(FP), R3
   565  	MOVD	R3, FIXED_FRAME+24(R1)
   566  	MOVD	$runtime·cgocallback_gofunc(SB), R12
   567  	MOVD	R12, CTR
   568  	BL	(CTR)
   569  	RET
   570  
   571  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   572  // See cgocall.go for more details.
   573  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   574  	NO_LOCAL_POINTERS
   575  
   576  	// Load m and g from thread-local storage.
   577  	MOVB	runtime·iscgo(SB), R3
   578  	CMP	R3, $0
   579  	BEQ	nocgo
   580  	BL	runtime·load_g(SB)
   581  nocgo:
   582  
   583  	// If g is nil, Go did not create the current thread.
   584  	// Call needm to obtain one for temporary use.
   585  	// In this case, we're running on the thread stack, so there's
   586  	// lots of space, but the linker doesn't know. Hide the call from
   587  	// the linker analysis by using an indirect call.
   588  	CMP	g, $0
   589  	BEQ	needm
   590  
   591  	MOVD	g_m(g), R8
   592  	MOVD	R8, savedm-8(SP)
   593  	BR	havem
   594  
   595  needm:
   596  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   597  	MOVD	$runtime·needm(SB), R12
   598  	MOVD	R12, CTR
   599  	BL	(CTR)
   600  
   601  	// Set m->sched.sp = SP, so that if a panic happens
   602  	// during the function we are about to execute, it will
   603  	// have a valid SP to run on the g0 stack.
   604  	// The next few lines (after the havem label)
   605  	// will save this SP onto the stack and then write
   606  	// the same SP back to m->sched.sp. That seems redundant,
   607  	// but if an unrecovered panic happens, unwindm will
   608  	// restore the g->sched.sp from the stack location
   609  	// and then systemstack will try to use it. If we don't set it here,
   610  	// that restored SP will be uninitialized (typically 0) and
   611  	// will not be usable.
   612  	MOVD	g_m(g), R8
   613  	MOVD	m_g0(R8), R3
   614  	MOVD	R1, (g_sched+gobuf_sp)(R3)
   615  
   616  havem:
   617  	// Now there's a valid m, and we're running on its m->g0.
   618  	// Save current m->g0->sched.sp on stack and then set it to SP.
   619  	// Save current sp in m->g0->sched.sp in preparation for
   620  	// switch back to m->curg stack.
   621  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   622  	MOVD	m_g0(R8), R3
   623  	MOVD	(g_sched+gobuf_sp)(R3), R4
   624  	MOVD	R4, savedsp-16(SP)
   625  	MOVD	R1, (g_sched+gobuf_sp)(R3)
   626  
   627  	// Switch to m->curg stack and call runtime.cgocallbackg.
   628  	// Because we are taking over the execution of m->curg
   629  	// but *not* resuming what had been running, we need to
   630  	// save that information (m->curg->sched) so we can restore it.
   631  	// We can restore m->curg->sched.sp easily, because calling
   632  	// runtime.cgocallbackg leaves SP unchanged upon return.
   633  	// To save m->curg->sched.pc, we push it onto the stack.
   634  	// This has the added benefit that it looks to the traceback
   635  	// routine like cgocallbackg is going to return to that
   636  	// PC (because the frame we allocate below has the same
   637  	// size as cgocallback_gofunc's frame declared above)
   638  	// so that the traceback will seamlessly trace back into
   639  	// the earlier calls.
   640  	//
   641  	// In the new goroutine, -8(SP) is unused (where SP refers to
   642  	// m->curg's SP while we're setting it up, before we've adjusted it).
   643  	MOVD	m_curg(R8), g
   644  	BL	runtime·save_g(SB)
   645  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   646  	MOVD	(g_sched+gobuf_pc)(g), R5
   647  	MOVD	R5, -(FIXED_FRAME+16)(R4)
   648  	MOVD	ctxt+24(FP), R3
   649  	MOVD	R3, -16(R4)
   650  	MOVD	$-(FIXED_FRAME+16)(R4), R1
   651  	BL	runtime·cgocallbackg(SB)
   652  
   653  	// Restore g->sched (== m->curg->sched) from saved values.
   654  	MOVD	0(R1), R5
   655  	MOVD	R5, (g_sched+gobuf_pc)(g)
   656  	MOVD	$(FIXED_FRAME+16)(R1), R4
   657  	MOVD	R4, (g_sched+gobuf_sp)(g)
   658  
   659  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   660  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   661  	// so we do not have to restore it.)
   662  	MOVD	g_m(g), R8
   663  	MOVD	m_g0(R8), g
   664  	BL	runtime·save_g(SB)
   665  	MOVD	(g_sched+gobuf_sp)(g), R1
   666  	MOVD	savedsp-16(SP), R4
   667  	MOVD	R4, (g_sched+gobuf_sp)(g)
   668  
   669  	// If the m on entry was nil, we called needm above to borrow an m
   670  	// for the duration of the call. Since the call is over, return it with dropm.
   671  	MOVD	savedm-8(SP), R6
   672  	CMP	R6, $0
   673  	BNE	droppedm
   674  	MOVD	$runtime·dropm(SB), R12
   675  	MOVD	R12, CTR
   676  	BL	(CTR)
   677  droppedm:
   678  
   679  	// Done!
   680  	RET
   681  
   682  // void setg(G*); set g. for use by needm.
   683  TEXT runtime·setg(SB), NOSPLIT, $0-8
   684  	MOVD	gg+0(FP), g
   685  	// This only happens if iscgo, so jump straight to save_g
   686  	BL	runtime·save_g(SB)
   687  	RET
   688  
   689  // void setg_gcc(G*); set g in C TLS.
   690  // Must obey the gcc calling convention.
   691  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   692  	// The standard prologue clobbers R31, which is callee-save in
   693  	// the C ABI, so we have to use $-8-0 and save LR ourselves.
   694  	MOVD	LR, R4
   695  	// Also save g and R31, since they're callee-save in C ABI
   696  	MOVD	R31, R5
   697  	MOVD	g, R6
   698  
   699  	MOVD	R3, g
   700  	BL	runtime·save_g(SB)
   701  
   702  	MOVD	R6, g
   703  	MOVD	R5, R31
   704  	MOVD	R4, LR
   705  	RET
   706  
   707  TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
   708  	MOVD	0(R1), R3		// LR saved by caller
   709  	MOVD	R3, ret+0(FP)
   710  	RET
   711  
   712  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   713  	MOVW	(R0), R0
   714  	UNDEF
   715  
   716  #define	TBRL	268
   717  #define	TBRU	269		/* Time base Upper/Lower */
   718  
   719  // int64 runtime·cputicks(void)
   720  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   721  	MOVW	SPR(TBRU), R4
   722  	MOVW	SPR(TBRL), R3
   723  	MOVW	SPR(TBRU), R5
   724  	CMPW	R4, R5
   725  	BNE	-4(PC)
   726  	SLD	$32, R5
   727  	OR	R5, R3
   728  	MOVD	R3, ret+0(FP)
   729  	RET
   730  
   731  // AES hashing not implemented for ppc64
   732  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   733  	MOVW	(R0), R1
   734  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   735  	MOVW	(R0), R1
   736  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   737  	MOVW	(R0), R1
   738  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   739  	MOVW	(R0), R1
   740  
   741  TEXT runtime·memequal(SB),NOSPLIT,$0-25
   742  	MOVD    a+0(FP), R3
   743  	MOVD    b+8(FP), R4
   744  	MOVD    size+16(FP), R5
   745  
   746  	BL	runtime·memeqbody(SB)
   747  	MOVB    R9, ret+24(FP)
   748  	RET
   749  
   750  // memequal_varlen(a, b unsafe.Pointer) bool
   751  TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
   752  	MOVD	a+0(FP), R3
   753  	MOVD	b+8(FP), R4
   754  	CMP	R3, R4
   755  	BEQ	eq
   756  	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
   757  	BL	runtime·memeqbody(SB)
   758  	MOVB	R9, ret+16(FP)
   759  	RET
   760  eq:
   761  	MOVD	$1, R3
   762  	MOVB	R3, ret+16(FP)
   763  	RET
   764  
   765  // Do an efficient memcmp for ppc64le
   766  // R3 = s1 len
   767  // R4 = s2 len
   768  // R5 = s1 addr
   769  // R6 = s2 addr
   770  // R7 = addr of return value
   771  TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
   772  	MOVD	R3,R8		// set up length
   773  	CMP	R3,R4,CR2	// unequal?
   774  	BC	12,8,setuplen	// BLT CR2
   775  	MOVD	R4,R8		// use R4 for comparison len
   776  setuplen:
   777  	MOVD	R8,CTR		// set up loop counter
   778  	CMP	R8,$8		// only optimize >=8
   779  	BLT	simplecheck
   780  	DCBT	(R5)		// cache hint
   781  	DCBT	(R6)
   782  	CMP	R8,$32		// optimize >= 32
   783  	MOVD	R8,R9
   784  	BLT	setup8a		// 8 byte moves only
   785  setup32a:
   786  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   787  	MOVD	R9,CTR
   788  
   789          // Special processing for 32 bytes or longer.
   790          // Loading this way is faster and correct as long as the
   791  	// doublewords being compared are equal. Once they
   792  	// are found unequal, reload them in proper byte order
   793  	// to determine greater or less than.
   794  loop32a:
   795  	MOVD	0(R5),R9	// doublewords to compare
   796  	MOVD	0(R6),R10	// get 4 doublewords
   797  	MOVD	8(R5),R14
   798  	MOVD	8(R6),R15
   799  	CMPU	R9,R10		// bytes equal?
   800  	MOVD	$0,R16		// set up for cmpne
   801  	BNE	cmpne		// further compare for LT or GT
   802  	MOVD	16(R5),R9	// get next pair of doublewords
   803  	MOVD	16(R6),R10
   804  	CMPU	R14,R15		// bytes match?
   805  	MOVD	$8,R16		// set up for cmpne
   806  	BNE	cmpne		// further compare for LT or GT
   807  	MOVD	24(R5),R14	// get next pair of doublewords
   808  	MOVD    24(R6),R15
   809  	CMPU	R9,R10		// bytes match?
   810  	MOVD	$16,R16		// set up for cmpne
   811  	BNE	cmpne		// further compare for LT or GT
   812  	MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
   813  	ADD	$32,R5		// bump up to next 32
   814  	ADD	$32,R6
   815  	CMPU    R14,R15		// bytes match?
   816  	BC	8,2,loop32a	// br ctr and cr
   817  	BNE	cmpne
   818  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   819  	BEQ	leftover	// and result is 0
   820  setup8a:
   821  	SRADCC	$3,R9,R9	// get the 8 byte count
   822  	BEQ	leftover	// shifted value is 0
   823  	MOVD	R9,CTR		// loop count for doublewords
   824  loop8:
   825  	MOVDBR	(R5+R0),R9	// doublewords to compare
   826  	MOVDBR	(R6+R0),R10	// LE compare order
   827  	ADD	$8,R5
   828  	ADD	$8,R6
   829  	CMPU	R9,R10		// match?
   830  	BC	8,2,loop8	// bt ctr <> 0 && cr
   831  	BGT	greater
   832  	BLT	less
   833  leftover:
   834  	ANDCC	$7,R8,R9	// check for leftover bytes
   835  	MOVD	R9,CTR		// save the ctr
   836  	BNE	simple		// leftover bytes
   837  	BC	12,10,equal	// test CR2 for length comparison
   838  	BC	12,8,less
   839  	BR	greater
   840  simplecheck:
   841  	CMP	R8,$0		// remaining compare length 0
   842  	BNE	simple		// do simple compare
   843  	BC	12,10,equal	// test CR2 for length comparison
   844  	BC	12,8,less	// 1st len < 2nd len, result less
   845  	BR	greater		// 1st len > 2nd len must be greater
   846  simple:
   847  	MOVBZ	0(R5), R9	// get byte from 1st operand
   848  	ADD	$1,R5
   849  	MOVBZ	0(R6), R10	// get byte from 2nd operand
   850  	ADD	$1,R6
   851  	CMPU	R9, R10
   852  	BC	8,2,simple	// bc ctr <> 0 && cr
   853  	BGT	greater		// 1st > 2nd
   854  	BLT	less		// 1st < 2nd
   855  	BC	12,10,equal	// test CR2 for length comparison
   856  	BC	12,9,greater	// 2nd len > 1st len
   857  	BR	less		// must be less
   858  cmpne:				// only here is not equal
   859  	MOVDBR	(R5+R16),R8	// reload in reverse order
   860  	MOVDBR	(R6+R16),R9
   861  	CMPU	R8,R9		// compare correct endianness
   862  	BGT	greater		// here only if NE
   863  less:
   864  	MOVD	$-1,R3
   865  	MOVD	R3,(R7)		// return value if A < B
   866  	RET
   867  equal:
   868  	MOVD	$0,(R7)		// return value if A == B
   869  	RET
   870  greater:
   871  	MOVD	$1,R3
   872  	MOVD	R3,(R7)		// return value if A > B
   873  	RET
   874  
   875  // Do an efficient memcmp for ppc64 (BE)
   876  // R3 = s1 len
   877  // R4 = s2 len
   878  // R5 = s1 addr
   879  // R6 = s2 addr
   880  // R7 = addr of return value
   881  TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
   882  	MOVD	R3,R8		// set up length
   883  	CMP	R3,R4,CR2	// unequal?
   884  	BC	12,8,setuplen	// BLT CR2
   885  	MOVD	R4,R8		// use R4 for comparison len
   886  setuplen:
   887  	MOVD	R8,CTR		// set up loop counter
   888  	CMP	R8,$8		// only optimize >=8
   889  	BLT	simplecheck
   890  	DCBT	(R5)		// cache hint
   891  	DCBT	(R6)
   892  	CMP	R8,$32		// optimize >= 32
   893  	MOVD	R8,R9
   894  	BLT	setup8a		// 8 byte moves only
   895  
   896  setup32a:
   897  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   898  	MOVD	R9,CTR
   899  loop32a:
   900  	MOVD	0(R5),R9	// doublewords to compare
   901  	MOVD	0(R6),R10	// get 4 doublewords
   902  	MOVD	8(R5),R14
   903  	MOVD	8(R6),R15
   904  	CMPU	R9,R10		// bytes equal?
   905  	BLT	less		// found to be less
   906  	BGT	greater		// found to be greater
   907  	MOVD	16(R5),R9	// get next pair of doublewords
   908  	MOVD	16(R6),R10
   909  	CMPU	R14,R15		// bytes match?
   910  	BLT	less		// found less
   911  	BGT	greater		// found greater
   912  	MOVD	24(R5),R14	// get next pair of doublewords
   913  	MOVD	24(R6),R15
   914  	CMPU	R9,R10		// bytes match?
   915  	BLT	less		// found to be less
   916  	BGT	greater		// found to be greater
   917  	ADD	$32,R5		// bump up to next 32
   918  	ADD	$32,R6
   919  	CMPU	R14,R15		// bytes match?
   920  	BC	8,2,loop32a	// br ctr and cr
   921  	BLT	less		// with BE, byte ordering is
   922  	BGT	greater		// good for compare
   923  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   924  	BEQ	leftover	// and result is 0
   925  setup8a:
   926  	SRADCC	$3,R9,R9	// get the 8 byte count
   927  	BEQ	leftover	// shifted value is 0
   928  	MOVD	R9,CTR		// loop count for doublewords
   929  loop8:
   930  	MOVD	(R5),R9
   931  	MOVD	(R6),R10
   932  	ADD	$8,R5
   933  	ADD	$8,R6
   934  	CMPU	R9,R10		// match?
   935  	BC	8,2,loop8	// bt ctr <> 0 && cr
   936  	BGT	greater
   937  	BLT	less
   938  leftover:
   939  	ANDCC	$7,R8,R9	// check for leftover bytes
   940  	MOVD	R9,CTR		// save the ctr
   941  	BNE	simple		// leftover bytes
   942  	BC	12,10,equal	// test CR2 for length comparison
   943  	BC	12,8,less
   944  	BR	greater
   945  simplecheck:
   946  	CMP	R8,$0		// remaining compare length 0
   947  	BNE	simple		// do simple compare
   948  	BC	12,10,equal	// test CR2 for length comparison
   949  	BC 	12,8,less	// 1st len < 2nd len, result less
   950  	BR	greater		// same len, must be equal
   951  simple:
   952  	MOVBZ	0(R5),R9	// get byte from 1st operand
   953  	ADD	$1,R5
   954  	MOVBZ	0(R6),R10	// get byte from 2nd operand
   955  	ADD	$1,R6
   956  	CMPU	R9,R10
   957  	BC	8,2,simple	// bc ctr <> 0 && cr
   958  	BGT	greater		// 1st > 2nd
   959  	BLT	less		// 1st < 2nd
   960  	BC	12,10,equal	// test CR2 for length comparison
   961  	BC	12,9,greater	// 2nd len > 1st len
   962  less:
   963  	MOVD	$-1,R3
   964  	MOVD    R3,(R7)		// return value if A < B
   965  	RET
   966  equal:
   967  	MOVD    $0,(R7)		// return value if A == B
   968  	RET
   969  greater:
   970  	MOVD	$1,R3
   971  	MOVD	R3,(R7)		// return value if A > B
   972  	RET
   973  
   974  // Do an efficient memequal for ppc64
   975  // R3 = s1
   976  // R4 = s2
   977  // R5 = len
   978  // R9 = return value
   979  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   980  	MOVD    R5,CTR
   981  	CMP     R5,$8		// only optimize >=8
   982  	BLT     simplecheck
   983  	DCBT	(R3)		// cache hint
   984  	DCBT	(R4)
   985  	CMP	R5,$32		// optimize >= 32
   986  	MOVD	R5,R6		// needed if setup8a branch
   987  	BLT	setup8a		// 8 byte moves only
   988  setup32a:                       // 8 byte aligned, >= 32 bytes
   989  	SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
   990  	MOVD	R6,CTR
   991  loop32a:
   992  	MOVD    0(R3),R6        // doublewords to compare
   993  	MOVD    0(R4),R7
   994  	MOVD	8(R3),R8	//
   995  	MOVD	8(R4),R9
   996  	CMP     R6,R7           // bytes batch?
   997  	BNE     noteq
   998  	MOVD	16(R3),R6
   999  	MOVD	16(R4),R7
  1000  	CMP     R8,R9		// bytes match?
  1001  	MOVD	24(R3),R8
  1002  	MOVD	24(R4),R9
  1003  	BNE     noteq
  1004  	CMP     R6,R7           // bytes match?
  1005  	BNE	noteq
  1006  	ADD     $32,R3		// bump up to next 32
  1007  	ADD     $32,R4
  1008  	CMP     R8,R9           // bytes match?
  1009  	BC      8,2,loop32a	// br ctr and cr
  1010  	BNE	noteq
  1011  	ANDCC	$24,R5,R6       // Any 8 byte chunks?
  1012  	BEQ	leftover	// and result is 0
  1013  setup8a:
  1014  	SRADCC  $3,R6,R6        // get the 8 byte count
  1015  	BEQ	leftover	// shifted value is 0
  1016  	MOVD    R6,CTR
  1017  loop8:
  1018  	MOVD    0(R3),R6        // doublewords to compare
  1019  	ADD	$8,R3
  1020  	MOVD    0(R4),R7
  1021  	ADD     $8,R4
  1022  	CMP     R6,R7           // match?
  1023  	BC	8,2,loop8	// bt ctr <> 0 && cr
  1024  	BNE     noteq
  1025  leftover:
  1026  	ANDCC   $7,R5,R6        // check for leftover bytes
  1027  	BEQ     equal
  1028  	MOVD    R6,CTR
  1029  	BR	simple
  1030  simplecheck:
  1031  	CMP	R5,$0
  1032  	BEQ	equal
  1033  simple:
  1034  	MOVBZ   0(R3), R6
  1035  	ADD	$1,R3
  1036  	MOVBZ   0(R4), R7
  1037  	ADD     $1,R4
  1038  	CMP     R6, R7
  1039  	BNE     noteq
  1040  	BC      8,2,simple
  1041  	BNE	noteq
  1042  	BR	equal
  1043  noteq:
  1044  	MOVD    $0, R9
  1045  	RET
  1046  equal:
  1047  	MOVD    $1, R9
  1048  	RET
  1049  
  1050  TEXT bytes·Equal(SB),NOSPLIT,$0-49
  1051  	MOVD	a_len+8(FP), R4
  1052  	MOVD	b_len+32(FP), R5
  1053  	CMP	R5, R4		// unequal lengths are not equal
  1054  	BNE	noteq
  1055  	MOVD	a+0(FP), R3
  1056  	MOVD	b+24(FP), R4
  1057  	BL	runtime·memeqbody(SB)
  1058  
  1059  	MOVBZ	R9,ret+48(FP)
  1060  	RET
  1061  
  1062  noteq:
  1063  	MOVBZ	$0,ret+48(FP)
  1064  	RET
  1065  
  1066  equal:
  1067  	MOVD	$1,R3
  1068  	MOVBZ	R3,ret+48(FP)
  1069  	RET
  1070  
  1071  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
  1072  	MOVD	s+0(FP), R3		// R3 = byte array pointer
  1073  	MOVD	s_len+8(FP), R4		// R4 = length
  1074  	MOVBZ	c+24(FP), R5		// R5 = byte
  1075  	MOVD	$ret+32(FP), R14	// R14 = &ret
  1076  	BR	runtime·indexbytebody<>(SB)
  1077  
  1078  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
  1079  	MOVD	s+0(FP), R3	  // R3 = string
  1080  	MOVD	s_len+8(FP), R4	  // R4 = length
  1081  	MOVBZ	c+16(FP), R5	  // R5 = byte
  1082  	MOVD	$ret+24(FP), R14  // R14 = &ret
  1083  	BR	runtime·indexbytebody<>(SB)
  1084  
  1085  TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
  1086  	DCBT	(R3)		// Prepare cache line.
  1087  	MOVD	R3,R17		// Save base address for calculating the index later.
  1088  	RLDICR	$0,R3,$60,R8	// Align address to doubleword boundary in R8.
  1089  	RLDIMI	$8,R5,$48,R5	// Replicating the byte across the register.
  1090  	ADD	R4,R3,R7	// Last acceptable address in R7.
  1091  
  1092  	RLDIMI	$16,R5,$32,R5
  1093  	CMPU	R4,$32		// Check if it's a small string (<32 bytes). Those will be processed differently.
  1094  	MOVD	$-1,R9
  1095  	WORD	$0x54661EB8	// Calculate padding in R6 (rlwinm r6,r3,3,26,28).
  1096  	RLDIMI	$32,R5,$0,R5
  1097  	MOVD	R7,R10		// Save last acceptable address in R10 for later.
  1098  	ADD	$-1,R7,R7
  1099  #ifdef GOARCH_ppc64le
  1100  	SLD	R6,R9,R9	// Prepare mask for Little Endian
  1101  #else
  1102  	SRD	R6,R9,R9	// Same for Big Endian
  1103  #endif
  1104  	BLE	small_string	// Jump to the small string case if it's <32 bytes.
  1105  
  1106  	// If we are 64-byte aligned, branch to qw_align just to get the auxiliary values
  1107  	// in V0, V1 and V10, then branch to the preloop.
  1108  	ANDCC	$63,R3,R11
  1109  	BEQ	CR0,qw_align
  1110  	RLDICL	$0,R3,$61,R11
  1111  
  1112  	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1113  	CMPB	R12,R5,R3	// Check for a match.
  1114  	AND	R9,R3,R3	// Mask bytes below s_base
  1115  	RLDICL	$0,R7,$61,R6	// length-1
  1116  	RLDICR	$0,R7,$60,R7	// Last doubleword in R7
  1117  	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation
  1118  	BNE	CR7,done
  1119  	ADD	$8,R8,R8
  1120  	ADD	$-8,R4,R4
  1121  	ADD	R4,R11,R4
  1122  
  1123  	// Check for quadword alignment
  1124  	ANDCC	$15,R8,R11
  1125  	BEQ	CR0,qw_align
  1126  
  1127  	// Not aligned, so handle the next doubleword
  1128  	MOVD	0(R8),R12
  1129  	CMPB	R12,R5,R3
  1130  	CMPU	R3,$0,CR7
  1131  	BNE	CR7,done
  1132  	ADD	$8,R8,R8
  1133  	ADD	$-8,R4,R4
  1134  
  1135  	// Either quadword aligned or 64-byte at this point. We can use LVX.
  1136  qw_align:
  1137  
  1138  	// Set up auxiliary data for the vectorized algorithm.
  1139  	VSPLTISB  $0,V0		// Replicate 0 across V0
  1140  	VSPLTISB  $3,V10	// Use V10 as control for VBPERMQ
  1141  	MTVRD	  R5,V1
  1142  	LVSL	  (R0+R0),V11
  1143  	VSLB	  V11,V10,V10
  1144  	VSPLTB	  $7,V1,V1	// Replicate byte across V1
  1145  	CMPU	  R4, $64	// If len <= 64, don't use the vectorized loop
  1146  	BLE	  tail
  1147  
  1148  	// We will load 4 quardwords per iteration in the loop, so check for
  1149  	// 64-byte alignment. If 64-byte aligned, then branch to the preloop.
  1150  	ANDCC	  $63,R8,R11
  1151  	BEQ	  CR0,preloop
  1152  
  1153  	// Not 64-byte aligned. Load one quadword at a time until aligned.
  1154  	LVX	    (R8+R0),V4
  1155  	VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
  1156  	BNE	    CR6,found_qw_align
  1157  	ADD	    $16,R8,R8
  1158  	ADD	    $-16,R4,R4
  1159  
  1160  	ANDCC	    $63,R8,R11
  1161  	BEQ	    CR0,preloop
  1162  	LVX	    (R8+R0),V4
  1163  	VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
  1164  	BNE	    CR6,found_qw_align
  1165  	ADD	    $16,R8,R8
  1166  	ADD	    $-16,R4,R4
  1167  
  1168  	ANDCC	    $63,R8,R11
  1169  	BEQ	    CR0,preloop
  1170  	LVX	    (R8+R0),V4
  1171  	VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
  1172  	BNE	    CR6,found_qw_align
  1173  	ADD	    $-16,R4,R4
  1174  	ADD	    $16,R8,R8
  1175  
  1176  	// 64-byte aligned. Prepare for the main loop.
  1177  preloop:
  1178  	CMPU	R4,$64
  1179  	BLE	tail	      // If len <= 64, don't use the vectorized loop
  1180  
  1181  	// We are now aligned to a 64-byte boundary. We will load 4 quadwords
  1182  	// per loop iteration. The last doubleword is in R10, so our loop counter
  1183  	// starts at (R10-R8)/64.
  1184  	SUB	R8,R10,R6
  1185  	SRD	$6,R6,R9      // Loop counter in R9
  1186  	MOVD	R9,CTR
  1187  
  1188  	MOVD	$16,R11      // Load offsets for the vector loads
  1189  	MOVD	$32,R9
  1190  	MOVD	$48,R7
  1191  
  1192  	// Main loop we will load 64 bytes per iteration
  1193  loop:
  1194  	LVX	    (R8+R0),V2	      // Load 4 16-byte vectors
  1195  	LVX	    (R11+R8),V3
  1196  	LVX	    (R9+R8),V4
  1197  	LVX	    (R7+R8),V5
  1198  	VCMPEQUB    V1,V2,V6	      // Look for byte in each vector
  1199  	VCMPEQUB    V1,V3,V7
  1200  	VCMPEQUB    V1,V4,V8
  1201  	VCMPEQUB    V1,V5,V9
  1202  	VOR	    V6,V7,V11	      // Compress the result in a single vector
  1203  	VOR	    V8,V9,V12
  1204  	VOR	    V11,V12,V11
  1205  	VCMPEQUBCC  V0,V11,V11	      // Check for byte
  1206  	BGE	    CR6,found
  1207  	ADD	    $64,R8,R8
  1208  	BC	    16,0,loop	      // bdnz loop
  1209  
  1210  	// Handle the tailing bytes or R4 <= 64
  1211  	RLDICL	$0,R6,$58,R4
  1212  tail:
  1213  	CMPU	    R4,$0
  1214  	BEQ	    notfound
  1215  	LVX	    (R8+R0),V4
  1216  	VCMPEQUBCC  V1,V4,V6
  1217  	BNE	    CR6,found_qw_align
  1218  	ADD	    $16,R8,R8
  1219  	CMPU	    R4,$16,CR6
  1220  	BLE	    CR6,notfound
  1221  	ADD	    $-16,R4,R4
  1222  
  1223  	LVX	    (R8+R0),V4
  1224  	VCMPEQUBCC  V1,V4,V6
  1225  	BNE	    CR6,found_qw_align
  1226  	ADD	    $16,R8,R8
  1227  	CMPU	    R4,$16,CR6
  1228  	BLE	    CR6,notfound
  1229  	ADD	    $-16,R4,R4
  1230  
  1231  	LVX	    (R8+R0),V4
  1232  	VCMPEQUBCC  V1,V4,V6
  1233  	BNE	    CR6,found_qw_align
  1234  	ADD	    $16,R8,R8
  1235  	CMPU	    R4,$16,CR6
  1236  	BLE	    CR6,notfound
  1237  	ADD	    $-16,R4,R4
  1238  
  1239  	LVX	    (R8+R0),V4
  1240  	VCMPEQUBCC  V1,V4,V6
  1241  	BNE	    CR6,found_qw_align
  1242  
  1243  notfound:
  1244  	MOVD	$-1,R3
  1245  	MOVD	R3,(R14)
  1246  	RET
  1247  
  1248  found:
  1249  	// We will now compress the results into a single doubleword,
  1250  	// so it can be moved to a GPR for the final index calculation.
  1251  
  1252  	// The bytes in V6-V9 are either 0x00 or 0xFF. So, permute the
  1253  	// first bit of each byte into bits 48-63.
  1254  	VBPERMQ	  V6,V10,V6
  1255  	VBPERMQ	  V7,V10,V7
  1256  	VBPERMQ	  V8,V10,V8
  1257  	VBPERMQ	  V9,V10,V9
  1258  
  1259  	// Shift each 16-bit component into its correct position for
  1260  	// merging into a single doubleword.
  1261  #ifdef GOARCH_ppc64le
  1262  	VSLDOI	  $2,V7,V7,V7
  1263  	VSLDOI	  $4,V8,V8,V8
  1264  	VSLDOI	  $6,V9,V9,V9
  1265  #else
  1266  	VSLDOI	  $6,V6,V6,V6
  1267  	VSLDOI	  $4,V7,V7,V7
  1268  	VSLDOI	  $2,V8,V8,V8
  1269  #endif
  1270  
  1271  	// Merge V6-V9 into a single doubleword and move to a GPR.
  1272  	VOR	V6,V7,V11
  1273  	VOR	V8,V9,V4
  1274  	VOR	V4,V11,V4
  1275  	MFVRD	V4,R3
  1276  
  1277  #ifdef GOARCH_ppc64le
  1278  	ADD	  $-1,R3,R11
  1279  	ANDN	  R3,R11,R11
  1280  	POPCNTD	  R11,R11	// Count trailing zeros (Little Endian).
  1281  #else
  1282  	CNTLZD	R3,R11		// Count leading zeros (Big Endian).
  1283  #endif
  1284  	ADD	R8,R11,R3	// Calculate byte address
  1285  
  1286  return:
  1287  	SUB	R17,R3
  1288  	MOVD	R3,(R14)
  1289  	RET
  1290  
  1291  found_qw_align:
  1292  	// Use the same algorithm as above. Compress the result into
  1293  	// a single doubleword and move it to a GPR for the final
  1294  	// calculation.
  1295  	VBPERMQ	  V6,V10,V6
  1296  
  1297  #ifdef GOARCH_ppc64le
  1298  	MFVRD	  V6,R3
  1299  	ADD	  $-1,R3,R11
  1300  	ANDN	  R3,R11,R11
  1301  	POPCNTD	  R11,R11
  1302  #else
  1303  	VSLDOI	  $6,V6,V6,V6
  1304  	MFVRD	  V6,R3
  1305  	CNTLZD	  R3,R11
  1306  #endif
  1307  	ADD	  R8,R11,R3
  1308  	CMPU	  R11,R4
  1309  	BLT	  return
  1310  	BR	  notfound
  1311  
  1312  done:
  1313  	// At this point, R3 has 0xFF in the same position as the byte we are
  1314  	// looking for in the doubleword. Use that to calculate the exact index
  1315  	// of the byte.
  1316  #ifdef GOARCH_ppc64le
  1317  	ADD	$-1,R3,R11
  1318  	ANDN	R3,R11,R11
  1319  	POPCNTD	R11,R11		// Count trailing zeros (Little Endian).
  1320  #else
  1321  	CNTLZD	R3,R11		// Count leading zeros (Big Endian).
  1322  #endif
  1323  	CMPU	R8,R7		// Check if we are at the last doubleword.
  1324  	SRD	$3,R11		// Convert trailing zeros to bytes.
  1325  	ADD	R11,R8,R3
  1326  	CMPU	R11,R6,CR7	// If at the last doubleword, check the byte offset.
  1327  	BNE	return
  1328  	BLE	CR7,return
  1329  	BR	notfound
  1330  
  1331  small_string:
  1332  	// We unroll this loop for better performance.
  1333  	CMPU	R4,$0		// Check for length=0
  1334  	BEQ	notfound
  1335  
  1336  	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1337  	CMPB	R12,R5,R3	// Check for a match.
  1338  	AND	R9,R3,R3	// Mask bytes below s_base.
  1339  	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation.
  1340  	RLDICL	$0,R7,$61,R6	// length-1
  1341  	RLDICR	$0,R7,$60,R7	// Last doubleword in R7.
  1342  	CMPU	R8,R7
  1343  	BNE	CR7,done
  1344  	BEQ	notfound	// Hit length.
  1345  
  1346  	MOVDU	8(R8),R12
  1347  	CMPB	R12,R5,R3
  1348  	CMPU	R3,$0,CR6
  1349  	CMPU	R8,R7
  1350  	BNE	CR6,done
  1351  	BEQ	notfound
  1352  
  1353  	MOVDU	8(R8),R12
  1354  	CMPB	R12,R5,R3
  1355  	CMPU	R3,$0,CR6
  1356  	CMPU	R8,R7
  1357  	BNE	CR6,done
  1358  	BEQ	notfound
  1359  
  1360  	MOVDU	8(R8),R12
  1361  	CMPB	R12,R5,R3
  1362  	CMPU	R3,$0,CR6
  1363  	CMPU	R8,R7
  1364  	BNE	CR6,done
  1365  	BEQ	notfound
  1366  
  1367  	MOVDU	8(R8),R12
  1368  	CMPB	R12,R5,R3
  1369  	CMPU	R3,$0,CR6
  1370  	BNE	CR6,done
  1371  	BR	notfound
  1372  
  1373  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1374  	MOVD	s1_base+0(FP), R5
  1375  	MOVD	s2_base+16(FP), R6
  1376  	MOVD	s1_len+8(FP), R3
  1377  	CMP	R5,R6,CR7
  1378  	MOVD	s2_len+24(FP), R4
  1379  	MOVD	$ret+32(FP), R7
  1380  	CMP	R3,R4,CR6
  1381  	BEQ	CR7,equal
  1382  
  1383  notequal:
  1384  #ifdef	GOARCH_ppc64le
  1385  	BR	cmpbodyLE<>(SB)
  1386  #else
  1387  	BR      cmpbodyBE<>(SB)
  1388  #endif
  1389  
  1390  equal:
  1391  	BEQ	CR6,done
  1392  	MOVD	$1, R8
  1393  	BGT	CR6,greater
  1394  	NEG	R8
  1395  
  1396  greater:
  1397  	MOVD	R8, (R7)
  1398  	RET
  1399  
  1400  done:
  1401  	MOVD	$0, (R7)
  1402  	RET
  1403  
  1404  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1405  	MOVD	s1+0(FP), R5
  1406  	MOVD	s2+24(FP), R6
  1407  	MOVD	s1+8(FP), R3
  1408  	CMP	R5,R6,CR7
  1409  	MOVD	s2+32(FP), R4
  1410  	MOVD	$ret+48(FP), R7
  1411  	CMP	R3,R4,CR6
  1412  	BEQ	CR7,equal
  1413  
  1414  #ifdef	GOARCH_ppc64le
  1415  	BR	cmpbodyLE<>(SB)
  1416  #else
  1417  	BR      cmpbodyBE<>(SB)
  1418  #endif
  1419  
  1420  equal:
  1421  	BEQ	CR6,done
  1422  	MOVD	$1, R8
  1423  	BGT	CR6,greater
  1424  	NEG	R8
  1425  
  1426  greater:
  1427  	MOVD	R8, (R7)
  1428  	RET
  1429  
  1430  done:
  1431  	MOVD	$0, (R7)
  1432  	RET
  1433  
  1434  TEXT runtime·return0(SB), NOSPLIT, $0
  1435  	MOVW	$0, R3
  1436  	RET
  1437  
  1438  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1439  // Must obey the gcc calling convention.
  1440  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
  1441  	// g (R30) and R31 are callee-save in the C ABI, so save them
  1442  	MOVD	g, R4
  1443  	MOVD	R31, R5
  1444  	MOVD	LR, R6
  1445  
  1446  	BL	runtime·load_g(SB)	// clobbers g (R30), R31
  1447  	MOVD	g_m(g), R3
  1448  	MOVD	m_curg(R3), R3
  1449  	MOVD	(g_stack+stack_hi)(R3), R3
  1450  
  1451  	MOVD	R4, g
  1452  	MOVD	R5, R31
  1453  	MOVD	R6, LR
  1454  	RET
  1455  
  1456  // The top-most function running on a goroutine
  1457  // returns to goexit+PCQuantum.
  1458  //
  1459  // When dynamically linking Go, it can be returned to from a function
  1460  // implemented in a different module and so needs to reload the TOC pointer
  1461  // from the stack (although this function declares that it does not set up x-a
  1462  // frame, newproc1 does in fact allocate one for goexit and saves the TOC
  1463  // pointer in the correct place).
  1464  // goexit+_PCQuantum is halfway through the usual global entry point prologue
  1465  // that derives r2 from r12 which is a bit silly, but not harmful.
  1466  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
  1467  	MOVD	24(R1), R2
  1468  	BL	runtime·goexit1(SB)	// does not return
  1469  	// traceback from goexit1 must hit code range of goexit
  1470  	MOVD	R0, R0	// NOP
  1471  
  1472  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
  1473  	RET
  1474  
  1475  // prepGoExitFrame saves the current TOC pointer (i.e. the TOC pointer for the
  1476  // module containing runtime) to the frame that goexit will execute in when
  1477  // the goroutine exits. It's implemented in assembly mainly because that's the
  1478  // easiest way to get access to R2.
  1479  TEXT runtime·prepGoExitFrame(SB),NOSPLIT,$0-8
  1480        MOVD    sp+0(FP), R3
  1481        MOVD    R2, 24(R3)
  1482        RET
  1483  
  1484  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1485  	ADD	$-8, R1
  1486  	MOVD	R31, 0(R1)
  1487  	MOVD	runtime·lastmoduledatap(SB), R4
  1488  	MOVD	R3, moduledata_next(R4)
  1489  	MOVD	R3, runtime·lastmoduledatap(SB)
  1490  	MOVD	0(R1), R31
  1491  	ADD	$8, R1
  1492  	RET
  1493  
  1494  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1495  	MOVW	$1, R3
  1496  	MOVB	R3, ret+0(FP)
  1497  	RET