github.com/epfl-dcsl/gotee@v0.0.0-20200909122901-014b35f5e5e9/src/runtime/asm_ppc64x.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ppc64 ppc64le
     6  
     7  #include "go_asm.h"
     8  #include "go_tls.h"
     9  #include "funcdata.h"
    10  #include "textflag.h"
    11  #include "asm_ppc64x.h"
    12  
    13  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    14  	// R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer
    15  
    16  	// initialize essential registers
    17  	BL	runtime·reginit(SB)
    18  
    19  	SUB	$(FIXED_FRAME+16), R1
    20  	MOVD	R2, 24(R1)		// stash the TOC pointer away again now we've created a new frame
    21  	MOVW	R3, FIXED_FRAME+0(R1)	// argc
    22  	MOVD	R4, FIXED_FRAME+8(R1)	// argv
    23  
    24  	// create istack out of the given (operating system) stack.
    25  	// _cgo_init may update stackguard.
    26  	MOVD	$runtime·g0(SB), g
    27  	MOVD	$(-64*1024), R31
    28  	ADD	R31, R1, R3
    29  	MOVD	R3, g_stackguard0(g)
    30  	MOVD	R3, g_stackguard1(g)
    31  	MOVD	R3, (g_stack+stack_lo)(g)
    32  	MOVD	R1, (g_stack+stack_hi)(g)
    33  
    34  	// if there is a _cgo_init, call it using the gcc ABI.
    35  	MOVD	_cgo_init(SB), R12
    36  	CMP	R0, R12
    37  	BEQ	nocgo
    38  	MOVD	R12, CTR		// r12 = "global function entry point"
    39  	MOVD	R13, R5			// arg 2: TLS base pointer
    40  	MOVD	$setg_gcc<>(SB), R4 	// arg 1: setg
    41  	MOVD	g, R3			// arg 0: G
    42  	// C functions expect 32 bytes of space on caller stack frame
    43  	// and a 16-byte aligned R1
    44  	MOVD	R1, R14			// save current stack
    45  	SUB	$32, R1			// reserve 32 bytes
    46  	RLDCR	$0, R1, $~15, R1	// 16-byte align
    47  	BL	(CTR)			// may clobber R0, R3-R12
    48  	MOVD	R14, R1			// restore stack
    49  	MOVD	24(R1), R2
    50  	XOR	R0, R0			// fix R0
    51  
    52  nocgo:
    53  	// update stackguard after _cgo_init
    54  	MOVD	(g_stack+stack_lo)(g), R3
    55  	ADD	$const__StackGuard, R3
    56  	MOVD	R3, g_stackguard0(g)
    57  	MOVD	R3, g_stackguard1(g)
    58  
    59  	// set the per-goroutine and per-mach "registers"
    60  	MOVD	$runtime·m0(SB), R3
    61  
    62  	// save m->g0 = g0
    63  	MOVD	g, m_g0(R3)
    64  	// save m0 to g0->m
    65  	MOVD	R3, g_m(g)
    66  
    67  	BL	runtime·check(SB)
    68  
    69  	// args are already prepared
    70  	BL	runtime·args(SB)
    71  	BL	runtime·osinit(SB)
    72  	BL	runtime·schedinit(SB)
    73  
    74  	// create a new goroutine to start program
    75  	MOVD	$runtime·mainPC(SB), R3		// entry
    76  	MOVDU	R3, -8(R1)
    77  	MOVDU	R0, -8(R1)
    78  	MOVDU	R0, -8(R1)
    79  	MOVDU	R0, -8(R1)
    80  	MOVDU	R0, -8(R1)
    81  	MOVDU	R0, -8(R1)
    82  	BL	runtime·newproc(SB)
    83  	ADD	$(16+FIXED_FRAME), R1
    84  
    85  	// start this M
    86  	BL	runtime·mstart(SB)
    87  
    88  	MOVD	R0, 0(R0)
    89  	RET
    90  
    91  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    92  GLOBL	runtime·mainPC(SB),RODATA,$8
    93  
    94  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    95  	MOVD	R0, 0(R0) // TODO: TD
    96  	RET
    97  
    98  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    99  	RET
   100  
   101  TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0
   102  	// crosscall_ppc64 and crosscall2 need to reginit, but can't
   103  	// get at the 'runtime.reginit' symbol.
   104  	BR	runtime·reginit(SB)
   105  
   106  TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0
   107  	// set R0 to zero, it's expected by the toolchain
   108  	XOR R0, R0
   109  	RET
   110  
   111  /*
   112   *  go-routine
   113   */
   114  
   115  // void gosave(Gobuf*)
   116  // save state in Gobuf; setjmp
   117  TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
   118  	MOVD	buf+0(FP), R3
   119  	MOVD	R1, gobuf_sp(R3)
   120  	MOVD	LR, R31
   121  	MOVD	R31, gobuf_pc(R3)
   122  	MOVD	g, gobuf_g(R3)
   123  	MOVD	R0, gobuf_lr(R3)
   124  	MOVD	R0, gobuf_ret(R3)
   125  	// Assert ctxt is zero. See func save.
   126  	MOVD	gobuf_ctxt(R3), R3
   127  	CMP	R0, R3
   128  	BEQ	2(PC)
   129  	BL	runtime·badctxt(SB)
   130  	RET
   131  
   132  // void gogo(Gobuf*)
   133  // restore state from Gobuf; longjmp
   134  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   135  	MOVD	buf+0(FP), R5
   136  	MOVD	gobuf_g(R5), g	// make sure g is not nil
   137  	BL	runtime·save_g(SB)
   138  
   139  	MOVD	0(g), R4
   140  	MOVD	gobuf_sp(R5), R1
   141  	MOVD	gobuf_lr(R5), R31
   142  	MOVD	24(R1), R2	// restore R2
   143  	MOVD	R31, LR
   144  	MOVD	gobuf_ret(R5), R3
   145  	MOVD	gobuf_ctxt(R5), R11
   146  	MOVD	R0, gobuf_sp(R5)
   147  	MOVD	R0, gobuf_ret(R5)
   148  	MOVD	R0, gobuf_lr(R5)
   149  	MOVD	R0, gobuf_ctxt(R5)
   150  	CMP	R0, R0 // set condition codes for == test, needed by stack split
   151  	MOVD	gobuf_pc(R5), R12
   152  	MOVD	R12, CTR
   153  	BR	(CTR)
   154  
   155  // void mcall(fn func(*g))
   156  // Switch to m->g0's stack, call fn(g).
   157  // Fn must never return. It should gogo(&g->sched)
   158  // to keep running g.
   159  TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
   160  	// Save caller state in g->sched
   161  	MOVD	R1, (g_sched+gobuf_sp)(g)
   162  	MOVD	LR, R31
   163  	MOVD	R31, (g_sched+gobuf_pc)(g)
   164  	MOVD	R0, (g_sched+gobuf_lr)(g)
   165  	MOVD	g, (g_sched+gobuf_g)(g)
   166  
   167  	// Switch to m->g0 & its stack, call fn.
   168  	MOVD	g, R3
   169  	MOVD	g_m(g), R8
   170  	MOVD	m_g0(R8), g
   171  	BL	runtime·save_g(SB)
   172  	CMP	g, R3
   173  	BNE	2(PC)
   174  	BR	runtime·badmcall(SB)
   175  	MOVD	fn+0(FP), R11			// context
   176  	MOVD	0(R11), R12			// code pointer
   177  	MOVD	R12, CTR
   178  	MOVD	(g_sched+gobuf_sp)(g), R1	// sp = m->g0->sched.sp
   179  	MOVDU	R3, -8(R1)
   180  	MOVDU	R0, -8(R1)
   181  	MOVDU	R0, -8(R1)
   182  	MOVDU	R0, -8(R1)
   183  	MOVDU	R0, -8(R1)
   184  	BL	(CTR)
   185  	MOVD	24(R1), R2
   186  	BR	runtime·badmcall2(SB)
   187  
   188  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   189  // of the G stack. We need to distinguish the routine that
   190  // lives at the bottom of the G stack from the one that lives
   191  // at the top of the system stack because the one at the top of
   192  // the system stack terminates the stack walk (see topofstack()).
   193  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   194  	// We have several undefs here so that 16 bytes past
   195  	// $runtime·systemstack_switch lies within them whether or not the
   196          // instructions that derive r2 from r12 are there.
   197  	UNDEF
   198  	UNDEF
   199  	UNDEF
   200  	BL	(LR)	// make sure this function is not leaf
   201  	RET
   202  
   203  // func systemstack(fn func())
   204  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   205  	MOVD	fn+0(FP), R3	// R3 = fn
   206  	MOVD	R3, R11		// context
   207  	MOVD	g_m(g), R4	// R4 = m
   208  
   209  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   210  	CMP	g, R5
   211  	BEQ	noswitch
   212  
   213  	MOVD	m_g0(R4), R5	// R5 = g0
   214  	CMP	g, R5
   215  	BEQ	noswitch
   216  
   217  	MOVD	m_curg(R4), R6
   218  	CMP	g, R6
   219  	BEQ	switch
   220  
   221  	// Bad: g is not gsignal, not g0, not curg. What is it?
   222  	// Hide call from linker nosplit analysis.
   223  	MOVD	$runtime·badsystemstack(SB), R12
   224  	MOVD	R12, CTR
   225  	BL	(CTR)
   226  
   227  switch:
   228  	// save our state in g->sched. Pretend to
   229  	// be systemstack_switch if the G stack is scanned.
   230  	MOVD	$runtime·systemstack_switch(SB), R6
   231  	ADD     $16, R6 // get past prologue (including r2-setting instructions when they're there)
   232  	MOVD	R6, (g_sched+gobuf_pc)(g)
   233  	MOVD	R1, (g_sched+gobuf_sp)(g)
   234  	MOVD	R0, (g_sched+gobuf_lr)(g)
   235  	MOVD	g, (g_sched+gobuf_g)(g)
   236  
   237  	// switch to g0
   238  	MOVD	R5, g
   239  	BL	runtime·save_g(SB)
   240  	MOVD	(g_sched+gobuf_sp)(g), R3
   241  	// make it look like mstart called systemstack on g0, to stop traceback
   242  	SUB	$FIXED_FRAME, R3
   243  	MOVD	$runtime·mstart(SB), R4
   244  	MOVD	R4, 0(R3)
   245  	MOVD	R3, R1
   246  
   247  	// call target function
   248  	MOVD	0(R11), R12	// code pointer
   249  	MOVD	R12, CTR
   250  	BL	(CTR)
   251  
   252  	// restore TOC pointer. It seems unlikely that we will use systemstack
   253  	// to call a function defined in another module, but the results of
   254  	// doing so would be so confusing that it's worth doing this.
   255  	MOVD	g_m(g), R3
   256  	MOVD	m_curg(R3), g
   257  	MOVD	(g_sched+gobuf_sp)(g), R3
   258  	MOVD	24(R3), R2
   259  	// switch back to g
   260  	MOVD	g_m(g), R3
   261  	MOVD	m_curg(R3), g
   262  	BL	runtime·save_g(SB)
   263  	MOVD	(g_sched+gobuf_sp)(g), R1
   264  	MOVD	R0, (g_sched+gobuf_sp)(g)
   265  	RET
   266  
   267  noswitch:
   268  	// already on m stack, just call directly
   269  	// On other arches we do a tail call here, but it appears to be
   270  	// impossible to tail call a function pointer in shared mode on
   271  	// ppc64 because the caller is responsible for restoring the TOC.
   272  	MOVD	0(R11), R12	// code pointer
   273  	MOVD	R12, CTR
   274  	BL	(CTR)
   275  	MOVD	24(R1), R2
   276  	RET
   277  
   278  /*
   279   * support for morestack
   280   */
   281  
   282  // Called during function prolog when more stack is needed.
   283  // Caller has already loaded:
   284  // R3: framesize, R4: argsize, R5: LR
   285  //
   286  // The traceback routines see morestack on a g0 as being
   287  // the top of a stack (for example, morestack calling newstack
   288  // calling the scheduler calling newm calling gc), so we must
   289  // record an argument size. For that purpose, it has no arguments.
   290  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   291  	// Cannot grow scheduler stack (m->g0).
   292  	MOVD	g_m(g), R7
   293  	MOVD	m_g0(R7), R8
   294  	CMP	g, R8
   295  	BNE	3(PC)
   296  	BL	runtime·badmorestackg0(SB)
   297  	BL	runtime·abort(SB)
   298  
   299  	// Cannot grow signal stack (m->gsignal).
   300  	MOVD	m_gsignal(R7), R8
   301  	CMP	g, R8
   302  	BNE	3(PC)
   303  	BL	runtime·badmorestackgsignal(SB)
   304  	BL	runtime·abort(SB)
   305  
   306  	// Called from f.
   307  	// Set g->sched to context in f.
   308  	MOVD	R1, (g_sched+gobuf_sp)(g)
   309  	MOVD	LR, R8
   310  	MOVD	R8, (g_sched+gobuf_pc)(g)
   311  	MOVD	R5, (g_sched+gobuf_lr)(g)
   312  	MOVD	R11, (g_sched+gobuf_ctxt)(g)
   313  
   314  	// Called from f.
   315  	// Set m->morebuf to f's caller.
   316  	MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   317  	MOVD	R1, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   318  	MOVD	g, (m_morebuf+gobuf_g)(R7)
   319  
   320  	// Call newstack on m->g0's stack.
   321  	MOVD	m_g0(R7), g
   322  	BL	runtime·save_g(SB)
   323  	MOVD	(g_sched+gobuf_sp)(g), R1
   324  	MOVDU   R0, -(FIXED_FRAME+0)(R1)	// create a call frame on g0
   325  	BL	runtime·newstack(SB)
   326  
   327  	// Not reached, but make sure the return PC from the call to newstack
   328  	// is still in this function, and not the beginning of the next.
   329  	UNDEF
   330  
   331  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   332  	MOVD	R0, R11
   333  	BR	runtime·morestack(SB)
   334  
   335  // reflectcall: call a function with the given argument list
   336  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   337  // we don't have variable-sized frames, so we use a small number
   338  // of constant-sized-frame functions to encode a few bits of size in the pc.
   339  // Caution: ugly multiline assembly macros in your future!
   340  
   341  #define DISPATCH(NAME,MAXSIZE)		\
   342  	MOVD	$MAXSIZE, R31;		\
   343  	CMP	R3, R31;		\
   344  	BGT	4(PC);			\
   345  	MOVD	$NAME(SB), R12;		\
   346  	MOVD	R12, CTR;		\
   347  	BR	(CTR)
   348  // Note: can't just "BR NAME(SB)" - bad inlining results.
   349  
   350  TEXT reflect·call(SB), NOSPLIT, $0-0
   351  	BR	·reflectcall(SB)
   352  
   353  TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
   354  	MOVWZ argsize+24(FP), R3
   355  	DISPATCH(runtime·call32, 32)
   356  	DISPATCH(runtime·call64, 64)
   357  	DISPATCH(runtime·call128, 128)
   358  	DISPATCH(runtime·call256, 256)
   359  	DISPATCH(runtime·call512, 512)
   360  	DISPATCH(runtime·call1024, 1024)
   361  	DISPATCH(runtime·call2048, 2048)
   362  	DISPATCH(runtime·call4096, 4096)
   363  	DISPATCH(runtime·call8192, 8192)
   364  	DISPATCH(runtime·call16384, 16384)
   365  	DISPATCH(runtime·call32768, 32768)
   366  	DISPATCH(runtime·call65536, 65536)
   367  	DISPATCH(runtime·call131072, 131072)
   368  	DISPATCH(runtime·call262144, 262144)
   369  	DISPATCH(runtime·call524288, 524288)
   370  	DISPATCH(runtime·call1048576, 1048576)
   371  	DISPATCH(runtime·call2097152, 2097152)
   372  	DISPATCH(runtime·call4194304, 4194304)
   373  	DISPATCH(runtime·call8388608, 8388608)
   374  	DISPATCH(runtime·call16777216, 16777216)
   375  	DISPATCH(runtime·call33554432, 33554432)
   376  	DISPATCH(runtime·call67108864, 67108864)
   377  	DISPATCH(runtime·call134217728, 134217728)
   378  	DISPATCH(runtime·call268435456, 268435456)
   379  	DISPATCH(runtime·call536870912, 536870912)
   380  	DISPATCH(runtime·call1073741824, 1073741824)
   381  	MOVD	$runtime·badreflectcall(SB), R12
   382  	MOVD	R12, CTR
   383  	BR	(CTR)
   384  
   385  #define CALLFN(NAME,MAXSIZE)			\
   386  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   387  	NO_LOCAL_POINTERS;			\
   388  	/* copy arguments to stack */		\
   389  	MOVD	arg+16(FP), R3;			\
   390  	MOVWZ	argsize+24(FP), R4;			\
   391  	MOVD	R1, R5;				\
   392  	ADD	$(FIXED_FRAME-1), R5;			\
   393  	SUB	$1, R3;				\
   394  	ADD	R5, R4;				\
   395  	CMP	R5, R4;				\
   396  	BEQ	4(PC);				\
   397  	MOVBZU	1(R3), R6;			\
   398  	MOVBZU	R6, 1(R5);			\
   399  	BR	-4(PC);				\
   400  	/* call function */			\
   401  	MOVD	f+8(FP), R11;			\
   402  	MOVD	(R11), R12;			\
   403  	MOVD	R12, CTR;			\
   404  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   405  	BL	(CTR);				\
   406  	MOVD	24(R1), R2;			\
   407  	/* copy return values back */		\
   408  	MOVD	argtype+0(FP), R7;		\
   409  	MOVD	arg+16(FP), R3;			\
   410  	MOVWZ	n+24(FP), R4;			\
   411  	MOVWZ	retoffset+28(FP), R6;		\
   412  	ADD	$FIXED_FRAME, R1, R5;		\
   413  	ADD	R6, R5; 			\
   414  	ADD	R6, R3;				\
   415  	SUB	R6, R4;				\
   416  	BL	callRet<>(SB);			\
   417  	RET
   418  
   419  // callRet copies return values back at the end of call*. This is a
   420  // separate function so it can allocate stack space for the arguments
   421  // to reflectcallmove. It does not follow the Go ABI; it expects its
   422  // arguments in registers.
   423  TEXT callRet<>(SB), NOSPLIT, $32-0
   424  	MOVD	R7, FIXED_FRAME+0(R1)
   425  	MOVD	R3, FIXED_FRAME+8(R1)
   426  	MOVD	R5, FIXED_FRAME+16(R1)
   427  	MOVD	R4, FIXED_FRAME+24(R1)
   428  	BL	runtime·reflectcallmove(SB)
   429  	RET
   430  
   431  CALLFN(·call32, 32)
   432  CALLFN(·call64, 64)
   433  CALLFN(·call128, 128)
   434  CALLFN(·call256, 256)
   435  CALLFN(·call512, 512)
   436  CALLFN(·call1024, 1024)
   437  CALLFN(·call2048, 2048)
   438  CALLFN(·call4096, 4096)
   439  CALLFN(·call8192, 8192)
   440  CALLFN(·call16384, 16384)
   441  CALLFN(·call32768, 32768)
   442  CALLFN(·call65536, 65536)
   443  CALLFN(·call131072, 131072)
   444  CALLFN(·call262144, 262144)
   445  CALLFN(·call524288, 524288)
   446  CALLFN(·call1048576, 1048576)
   447  CALLFN(·call2097152, 2097152)
   448  CALLFN(·call4194304, 4194304)
   449  CALLFN(·call8388608, 8388608)
   450  CALLFN(·call16777216, 16777216)
   451  CALLFN(·call33554432, 33554432)
   452  CALLFN(·call67108864, 67108864)
   453  CALLFN(·call134217728, 134217728)
   454  CALLFN(·call268435456, 268435456)
   455  CALLFN(·call536870912, 536870912)
   456  CALLFN(·call1073741824, 1073741824)
   457  
   458  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   459  	RET
   460  
   461  // void jmpdefer(fv, sp);
   462  // called from deferreturn.
   463  // 1. grab stored LR for caller
   464  // 2. sub 8 bytes to get back to either nop or toc reload before deferreturn
   465  // 3. BR to fn
   466  // When dynamically linking Go, it is not sufficient to rewind to the BL
   467  // deferreturn -- we might be jumping between modules and so we need to reset
   468  // the TOC pointer in r2. To do this, codegen inserts MOVD 24(R1), R2 *before*
   469  // the BL deferreturn and jmpdefer rewinds to that.
   470  TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
   471  	MOVD	0(R1), R31
   472  	SUB     $8, R31
   473  	MOVD	R31, LR
   474  
   475  	MOVD	fv+0(FP), R11
   476  	MOVD	argp+8(FP), R1
   477  	SUB	$FIXED_FRAME, R1
   478  	MOVD	0(R11), R12
   479  	MOVD	R12, CTR
   480  	BR	(CTR)
   481  
   482  // Save state of caller into g->sched. Smashes R31.
   483  TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   484  	MOVD	LR, R31
   485  	MOVD	R31, (g_sched+gobuf_pc)(g)
   486  	MOVD	R1, (g_sched+gobuf_sp)(g)
   487  	MOVD	R0, (g_sched+gobuf_lr)(g)
   488  	MOVD	R0, (g_sched+gobuf_ret)(g)
   489  	// Assert ctxt is zero. See func save.
   490  	MOVD	(g_sched+gobuf_ctxt)(g), R31
   491  	CMP	R0, R31
   492  	BEQ	2(PC)
   493  	BL	runtime·badctxt(SB)
   494  	RET
   495  
   496  // func asmcgocall(fn, arg unsafe.Pointer) int32
   497  // Call fn(arg) on the scheduler stack,
   498  // aligned appropriately for the gcc ABI.
   499  // See cgocall.go for more details.
   500  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   501  	MOVD	fn+0(FP), R3
   502  	MOVD	arg+8(FP), R4
   503  
   504  	MOVD	R1, R7		// save original stack pointer
   505  	MOVD	g, R5
   506  
   507  	// Figure out if we need to switch to m->g0 stack.
   508  	// We get called to create new OS threads too, and those
   509  	// come in on the m->g0 stack already.
   510  	MOVD	g_m(g), R6
   511  	MOVD	m_g0(R6), R6
   512  	CMP	R6, g
   513  	BEQ	g0
   514  	BL	gosave<>(SB)
   515  	MOVD	R6, g
   516  	BL	runtime·save_g(SB)
   517  	MOVD	(g_sched+gobuf_sp)(g), R1
   518  
   519  	// Now on a scheduling stack (a pthread-created stack).
   520  g0:
   521  	// Save room for two of our pointers, plus 32 bytes of callee
   522  	// save area that lives on the caller stack.
   523  	SUB	$48, R1
   524  	RLDCR	$0, R1, $~15, R1	// 16-byte alignment for gcc ABI
   525  	MOVD	R5, 40(R1)	// save old g on stack
   526  	MOVD	(g_stack+stack_hi)(R5), R5
   527  	SUB	R7, R5
   528  	MOVD	R5, 32(R1)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   529  	MOVD	R0, 0(R1)	// clear back chain pointer (TODO can we give it real back trace information?)
   530  	// This is a "global call", so put the global entry point in r12
   531  	MOVD	R3, R12
   532  	MOVD	R12, CTR
   533  	MOVD	R4, R3		// arg in r3
   534  	BL	(CTR)
   535  
   536  	// C code can clobber R0, so set it back to 0.  F27-F31 are
   537  	// callee save, so we don't need to recover those.
   538  	XOR	R0, R0
   539  	// Restore g, stack pointer, toc pointer.
   540  	// R3 is errno, so don't touch it
   541  	MOVD	40(R1), g
   542  	MOVD    (g_stack+stack_hi)(g), R5
   543  	MOVD    32(R1), R6
   544  	SUB     R6, R5
   545  	MOVD    24(R5), R2
   546  	BL	runtime·save_g(SB)
   547  	MOVD	(g_stack+stack_hi)(g), R5
   548  	MOVD	32(R1), R6
   549  	SUB	R6, R5
   550  	MOVD	R5, R1
   551  
   552  	MOVW	R3, ret+16(FP)
   553  	RET
   554  
   555  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   556  // Turn the fn into a Go func (by taking its address) and call
   557  // cgocallback_gofunc.
   558  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   559  	MOVD	$fn+0(FP), R3
   560  	MOVD	R3, FIXED_FRAME+0(R1)
   561  	MOVD	frame+8(FP), R3
   562  	MOVD	R3, FIXED_FRAME+8(R1)
   563  	MOVD	framesize+16(FP), R3
   564  	MOVD	R3, FIXED_FRAME+16(R1)
   565  	MOVD	ctxt+24(FP), R3
   566  	MOVD	R3, FIXED_FRAME+24(R1)
   567  	MOVD	$runtime·cgocallback_gofunc(SB), R12
   568  	MOVD	R12, CTR
   569  	BL	(CTR)
   570  	RET
   571  
   572  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   573  // See cgocall.go for more details.
   574  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   575  	NO_LOCAL_POINTERS
   576  
   577  	// Load m and g from thread-local storage.
   578  	MOVB	runtime·iscgo(SB), R3
   579  	CMP	R3, $0
   580  	BEQ	nocgo
   581  	BL	runtime·load_g(SB)
   582  nocgo:
   583  
   584  	// If g is nil, Go did not create the current thread.
   585  	// Call needm to obtain one for temporary use.
   586  	// In this case, we're running on the thread stack, so there's
   587  	// lots of space, but the linker doesn't know. Hide the call from
   588  	// the linker analysis by using an indirect call.
   589  	CMP	g, $0
   590  	BEQ	needm
   591  
   592  	MOVD	g_m(g), R8
   593  	MOVD	R8, savedm-8(SP)
   594  	BR	havem
   595  
   596  needm:
   597  	MOVD	g, savedm-8(SP) // g is zero, so is m.
   598  	MOVD	$runtime·needm(SB), R12
   599  	MOVD	R12, CTR
   600  	BL	(CTR)
   601  
   602  	// Set m->sched.sp = SP, so that if a panic happens
   603  	// during the function we are about to execute, it will
   604  	// have a valid SP to run on the g0 stack.
   605  	// The next few lines (after the havem label)
   606  	// will save this SP onto the stack and then write
   607  	// the same SP back to m->sched.sp. That seems redundant,
   608  	// but if an unrecovered panic happens, unwindm will
   609  	// restore the g->sched.sp from the stack location
   610  	// and then systemstack will try to use it. If we don't set it here,
   611  	// that restored SP will be uninitialized (typically 0) and
   612  	// will not be usable.
   613  	MOVD	g_m(g), R8
   614  	MOVD	m_g0(R8), R3
   615  	MOVD	R1, (g_sched+gobuf_sp)(R3)
   616  
   617  havem:
   618  	// Now there's a valid m, and we're running on its m->g0.
   619  	// Save current m->g0->sched.sp on stack and then set it to SP.
   620  	// Save current sp in m->g0->sched.sp in preparation for
   621  	// switch back to m->curg stack.
   622  	// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   623  	MOVD	m_g0(R8), R3
   624  	MOVD	(g_sched+gobuf_sp)(R3), R4
   625  	MOVD	R4, savedsp-16(SP)
   626  	MOVD	R1, (g_sched+gobuf_sp)(R3)
   627  
   628  	// Switch to m->curg stack and call runtime.cgocallbackg.
   629  	// Because we are taking over the execution of m->curg
   630  	// but *not* resuming what had been running, we need to
   631  	// save that information (m->curg->sched) so we can restore it.
   632  	// We can restore m->curg->sched.sp easily, because calling
   633  	// runtime.cgocallbackg leaves SP unchanged upon return.
   634  	// To save m->curg->sched.pc, we push it onto the stack.
   635  	// This has the added benefit that it looks to the traceback
   636  	// routine like cgocallbackg is going to return to that
   637  	// PC (because the frame we allocate below has the same
   638  	// size as cgocallback_gofunc's frame declared above)
   639  	// so that the traceback will seamlessly trace back into
   640  	// the earlier calls.
   641  	//
   642  	// In the new goroutine, -8(SP) is unused (where SP refers to
   643  	// m->curg's SP while we're setting it up, before we've adjusted it).
   644  	MOVD	m_curg(R8), g
   645  	BL	runtime·save_g(SB)
   646  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   647  	MOVD	(g_sched+gobuf_pc)(g), R5
   648  	MOVD	R5, -(FIXED_FRAME+16)(R4)
   649  	MOVD	ctxt+24(FP), R3
   650  	MOVD	R3, -16(R4)
   651  	MOVD	$-(FIXED_FRAME+16)(R4), R1
   652  	BL	runtime·cgocallbackg(SB)
   653  
   654  	// Restore g->sched (== m->curg->sched) from saved values.
   655  	MOVD	0(R1), R5
   656  	MOVD	R5, (g_sched+gobuf_pc)(g)
   657  	MOVD	$(FIXED_FRAME+16)(R1), R4
   658  	MOVD	R4, (g_sched+gobuf_sp)(g)
   659  
   660  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   661  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   662  	// so we do not have to restore it.)
   663  	MOVD	g_m(g), R8
   664  	MOVD	m_g0(R8), g
   665  	BL	runtime·save_g(SB)
   666  	MOVD	(g_sched+gobuf_sp)(g), R1
   667  	MOVD	savedsp-16(SP), R4
   668  	MOVD	R4, (g_sched+gobuf_sp)(g)
   669  
   670  	// If the m on entry was nil, we called needm above to borrow an m
   671  	// for the duration of the call. Since the call is over, return it with dropm.
   672  	MOVD	savedm-8(SP), R6
   673  	CMP	R6, $0
   674  	BNE	droppedm
   675  	MOVD	$runtime·dropm(SB), R12
   676  	MOVD	R12, CTR
   677  	BL	(CTR)
   678  droppedm:
   679  
   680  	// Done!
   681  	RET
   682  
   683  // void setg(G*); set g. for use by needm.
   684  TEXT runtime·setg(SB), NOSPLIT, $0-8
   685  	MOVD	gg+0(FP), g
   686  	// This only happens if iscgo, so jump straight to save_g
   687  	BL	runtime·save_g(SB)
   688  	RET
   689  
   690  // void setg_gcc(G*); set g in C TLS.
   691  // Must obey the gcc calling convention.
   692  TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   693  	// The standard prologue clobbers R31, which is callee-save in
   694  	// the C ABI, so we have to use $-8-0 and save LR ourselves.
   695  	MOVD	LR, R4
   696  	// Also save g and R31, since they're callee-save in C ABI
   697  	MOVD	R31, R5
   698  	MOVD	g, R6
   699  
   700  	MOVD	R3, g
   701  	BL	runtime·save_g(SB)
   702  
   703  	MOVD	R6, g
   704  	MOVD	R5, R31
   705  	MOVD	R4, LR
   706  	RET
   707  
   708  TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
   709  	MOVD	0(R1), R3		// LR saved by caller
   710  	MOVD	R3, ret+0(FP)
   711  	RET
   712  
   713  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   714  	MOVW	(R0), R0
   715  	UNDEF
   716  
   717  #define	TBRL	268
   718  #define	TBRU	269		/* Time base Upper/Lower */
   719  
   720  // int64 runtime·cputicks(void)
   721  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   722  	MOVW	SPR(TBRU), R4
   723  	MOVW	SPR(TBRL), R3
   724  	MOVW	SPR(TBRU), R5
   725  	CMPW	R4, R5
   726  	BNE	-4(PC)
   727  	SLD	$32, R5
   728  	OR	R5, R3
   729  	MOVD	R3, ret+0(FP)
   730  	RET
   731  
   732  // AES hashing not implemented for ppc64
   733  TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   734  	MOVW	(R0), R1
   735  TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   736  	MOVW	(R0), R1
   737  TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   738  	MOVW	(R0), R1
   739  TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   740  	MOVW	(R0), R1
   741  
   742  TEXT runtime·memequal(SB),NOSPLIT,$0-25
   743  	MOVD    a+0(FP), R3
   744  	MOVD    b+8(FP), R4
   745  	MOVD    size+16(FP), R5
   746  
   747  	BL	runtime·memeqbody(SB)
   748  	MOVB    R9, ret+24(FP)
   749  	RET
   750  
   751  // memequal_varlen(a, b unsafe.Pointer) bool
   752  TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
   753  	MOVD	a+0(FP), R3
   754  	MOVD	b+8(FP), R4
   755  	CMP	R3, R4
   756  	BEQ	eq
   757  	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
   758  	BL	runtime·memeqbody(SB)
   759  	MOVB	R9, ret+16(FP)
   760  	RET
   761  eq:
   762  	MOVD	$1, R3
   763  	MOVB	R3, ret+16(FP)
   764  	RET
   765  
   766  // Do an efficient memcmp for ppc64le
   767  // R3 = s1 len
   768  // R4 = s2 len
   769  // R5 = s1 addr
   770  // R6 = s2 addr
   771  // R7 = addr of return value
   772  TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
   773  	MOVD	R3,R8		// set up length
   774  	CMP	R3,R4,CR2	// unequal?
   775  	BC	12,8,setuplen	// BLT CR2
   776  	MOVD	R4,R8		// use R4 for comparison len
   777  setuplen:
   778  	MOVD	R8,CTR		// set up loop counter
   779  	CMP	R8,$8		// only optimize >=8
   780  	BLT	simplecheck
   781  	DCBT	(R5)		// cache hint
   782  	DCBT	(R6)
   783  	CMP	R8,$32		// optimize >= 32
   784  	MOVD	R8,R9
   785  	BLT	setup8a		// 8 byte moves only
   786  setup32a:
   787  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   788  	MOVD	R9,CTR
   789  
   790          // Special processing for 32 bytes or longer.
   791          // Loading this way is faster and correct as long as the
   792  	// doublewords being compared are equal. Once they
   793  	// are found unequal, reload them in proper byte order
   794  	// to determine greater or less than.
   795  loop32a:
   796  	MOVD	0(R5),R9	// doublewords to compare
   797  	MOVD	0(R6),R10	// get 4 doublewords
   798  	MOVD	8(R5),R14
   799  	MOVD	8(R6),R15
   800  	CMPU	R9,R10		// bytes equal?
   801  	MOVD	$0,R16		// set up for cmpne
   802  	BNE	cmpne		// further compare for LT or GT
   803  	MOVD	16(R5),R9	// get next pair of doublewords
   804  	MOVD	16(R6),R10
   805  	CMPU	R14,R15		// bytes match?
   806  	MOVD	$8,R16		// set up for cmpne
   807  	BNE	cmpne		// further compare for LT or GT
   808  	MOVD	24(R5),R14	// get next pair of doublewords
   809  	MOVD    24(R6),R15
   810  	CMPU	R9,R10		// bytes match?
   811  	MOVD	$16,R16		// set up for cmpne
   812  	BNE	cmpne		// further compare for LT or GT
   813  	MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
   814  	ADD	$32,R5		// bump up to next 32
   815  	ADD	$32,R6
   816  	CMPU    R14,R15		// bytes match?
   817  	BC	8,2,loop32a	// br ctr and cr
   818  	BNE	cmpne
   819  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   820  	BEQ	leftover	// and result is 0
   821  setup8a:
   822  	SRADCC	$3,R9,R9	// get the 8 byte count
   823  	BEQ	leftover	// shifted value is 0
   824  	MOVD	R9,CTR		// loop count for doublewords
   825  loop8:
   826  	MOVDBR	(R5+R0),R9	// doublewords to compare
   827  	MOVDBR	(R6+R0),R10	// LE compare order
   828  	ADD	$8,R5
   829  	ADD	$8,R6
   830  	CMPU	R9,R10		// match?
   831  	BC	8,2,loop8	// bt ctr <> 0 && cr
   832  	BGT	greater
   833  	BLT	less
   834  leftover:
   835  	ANDCC	$7,R8,R9	// check for leftover bytes
   836  	MOVD	R9,CTR		// save the ctr
   837  	BNE	simple		// leftover bytes
   838  	BC	12,10,equal	// test CR2 for length comparison
   839  	BC	12,8,less
   840  	BR	greater
   841  simplecheck:
   842  	CMP	R8,$0		// remaining compare length 0
   843  	BNE	simple		// do simple compare
   844  	BC	12,10,equal	// test CR2 for length comparison
   845  	BC	12,8,less	// 1st len < 2nd len, result less
   846  	BR	greater		// 1st len > 2nd len must be greater
   847  simple:
   848  	MOVBZ	0(R5), R9	// get byte from 1st operand
   849  	ADD	$1,R5
   850  	MOVBZ	0(R6), R10	// get byte from 2nd operand
   851  	ADD	$1,R6
   852  	CMPU	R9, R10
   853  	BC	8,2,simple	// bc ctr <> 0 && cr
   854  	BGT	greater		// 1st > 2nd
   855  	BLT	less		// 1st < 2nd
   856  	BC	12,10,equal	// test CR2 for length comparison
   857  	BC	12,9,greater	// 2nd len > 1st len
   858  	BR	less		// must be less
   859  cmpne:				// only here is not equal
   860  	MOVDBR	(R5+R16),R8	// reload in reverse order
   861  	MOVDBR	(R6+R16),R9
   862  	CMPU	R8,R9		// compare correct endianness
   863  	BGT	greater		// here only if NE
   864  less:
   865  	MOVD	$-1,R3
   866  	MOVD	R3,(R7)		// return value if A < B
   867  	RET
   868  equal:
   869  	MOVD	$0,(R7)		// return value if A == B
   870  	RET
   871  greater:
   872  	MOVD	$1,R3
   873  	MOVD	R3,(R7)		// return value if A > B
   874  	RET
   875  
   876  // Do an efficient memcmp for ppc64 (BE)
   877  // R3 = s1 len
   878  // R4 = s2 len
   879  // R5 = s1 addr
   880  // R6 = s2 addr
   881  // R7 = addr of return value
   882  TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
   883  	MOVD	R3,R8		// set up length
   884  	CMP	R3,R4,CR2	// unequal?
   885  	BC	12,8,setuplen	// BLT CR2
   886  	MOVD	R4,R8		// use R4 for comparison len
   887  setuplen:
   888  	MOVD	R8,CTR		// set up loop counter
   889  	CMP	R8,$8		// only optimize >=8
   890  	BLT	simplecheck
   891  	DCBT	(R5)		// cache hint
   892  	DCBT	(R6)
   893  	CMP	R8,$32		// optimize >= 32
   894  	MOVD	R8,R9
   895  	BLT	setup8a		// 8 byte moves only
   896  
   897  setup32a:
   898  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   899  	MOVD	R9,CTR
   900  loop32a:
   901  	MOVD	0(R5),R9	// doublewords to compare
   902  	MOVD	0(R6),R10	// get 4 doublewords
   903  	MOVD	8(R5),R14
   904  	MOVD	8(R6),R15
   905  	CMPU	R9,R10		// bytes equal?
   906  	BLT	less		// found to be less
   907  	BGT	greater		// found to be greater
   908  	MOVD	16(R5),R9	// get next pair of doublewords
   909  	MOVD	16(R6),R10
   910  	CMPU	R14,R15		// bytes match?
   911  	BLT	less		// found less
   912  	BGT	greater		// found greater
   913  	MOVD	24(R5),R14	// get next pair of doublewords
   914  	MOVD	24(R6),R15
   915  	CMPU	R9,R10		// bytes match?
   916  	BLT	less		// found to be less
   917  	BGT	greater		// found to be greater
   918  	ADD	$32,R5		// bump up to next 32
   919  	ADD	$32,R6
   920  	CMPU	R14,R15		// bytes match?
   921  	BC	8,2,loop32a	// br ctr and cr
   922  	BLT	less		// with BE, byte ordering is
   923  	BGT	greater		// good for compare
   924  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   925  	BEQ	leftover	// and result is 0
   926  setup8a:
   927  	SRADCC	$3,R9,R9	// get the 8 byte count
   928  	BEQ	leftover	// shifted value is 0
   929  	MOVD	R9,CTR		// loop count for doublewords
   930  loop8:
   931  	MOVD	(R5),R9
   932  	MOVD	(R6),R10
   933  	ADD	$8,R5
   934  	ADD	$8,R6
   935  	CMPU	R9,R10		// match?
   936  	BC	8,2,loop8	// bt ctr <> 0 && cr
   937  	BGT	greater
   938  	BLT	less
   939  leftover:
   940  	ANDCC	$7,R8,R9	// check for leftover bytes
   941  	MOVD	R9,CTR		// save the ctr
   942  	BNE	simple		// leftover bytes
   943  	BC	12,10,equal	// test CR2 for length comparison
   944  	BC	12,8,less
   945  	BR	greater
   946  simplecheck:
   947  	CMP	R8,$0		// remaining compare length 0
   948  	BNE	simple		// do simple compare
   949  	BC	12,10,equal	// test CR2 for length comparison
   950  	BC 	12,8,less	// 1st len < 2nd len, result less
   951  	BR	greater		// same len, must be equal
   952  simple:
   953  	MOVBZ	0(R5),R9	// get byte from 1st operand
   954  	ADD	$1,R5
   955  	MOVBZ	0(R6),R10	// get byte from 2nd operand
   956  	ADD	$1,R6
   957  	CMPU	R9,R10
   958  	BC	8,2,simple	// bc ctr <> 0 && cr
   959  	BGT	greater		// 1st > 2nd
   960  	BLT	less		// 1st < 2nd
   961  	BC	12,10,equal	// test CR2 for length comparison
   962  	BC	12,9,greater	// 2nd len > 1st len
   963  less:
   964  	MOVD	$-1,R3
   965  	MOVD    R3,(R7)		// return value if A < B
   966  	RET
   967  equal:
   968  	MOVD    $0,(R7)		// return value if A == B
   969  	RET
   970  greater:
   971  	MOVD	$1,R3
   972  	MOVD	R3,(R7)		// return value if A > B
   973  	RET
   974  
   975  // Do an efficient memequal for ppc64
   976  // R3 = s1
   977  // R4 = s2
   978  // R5 = len
   979  // R9 = return value
   980  TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   981  	MOVD    R5,CTR
   982  	CMP     R5,$8		// only optimize >=8
   983  	BLT     simplecheck
   984  	DCBT	(R3)		// cache hint
   985  	DCBT	(R4)
   986  	CMP	R5,$32		// optimize >= 32
   987  	MOVD	R5,R6		// needed if setup8a branch
   988  	BLT	setup8a		// 8 byte moves only
   989  setup32a:                       // 8 byte aligned, >= 32 bytes
   990  	SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
   991  	MOVD	R6,CTR
   992  loop32a:
   993  	MOVD    0(R3),R6        // doublewords to compare
   994  	MOVD    0(R4),R7
   995  	MOVD	8(R3),R8	//
   996  	MOVD	8(R4),R9
   997  	CMP     R6,R7           // bytes batch?
   998  	BNE     noteq
   999  	MOVD	16(R3),R6
  1000  	MOVD	16(R4),R7
  1001  	CMP     R8,R9		// bytes match?
  1002  	MOVD	24(R3),R8
  1003  	MOVD	24(R4),R9
  1004  	BNE     noteq
  1005  	CMP     R6,R7           // bytes match?
  1006  	BNE	noteq
  1007  	ADD     $32,R3		// bump up to next 32
  1008  	ADD     $32,R4
  1009  	CMP     R8,R9           // bytes match?
  1010  	BC      8,2,loop32a	// br ctr and cr
  1011  	BNE	noteq
  1012  	ANDCC	$24,R5,R6       // Any 8 byte chunks?
  1013  	BEQ	leftover	// and result is 0
  1014  setup8a:
  1015  	SRADCC  $3,R6,R6        // get the 8 byte count
  1016  	BEQ	leftover	// shifted value is 0
  1017  	MOVD    R6,CTR
  1018  loop8:
  1019  	MOVD    0(R3),R6        // doublewords to compare
  1020  	ADD	$8,R3
  1021  	MOVD    0(R4),R7
  1022  	ADD     $8,R4
  1023  	CMP     R6,R7           // match?
  1024  	BC	8,2,loop8	// bt ctr <> 0 && cr
  1025  	BNE     noteq
  1026  leftover:
  1027  	ANDCC   $7,R5,R6        // check for leftover bytes
  1028  	BEQ     equal
  1029  	MOVD    R6,CTR
  1030  	BR	simple
  1031  simplecheck:
  1032  	CMP	R5,$0
  1033  	BEQ	equal
  1034  simple:
  1035  	MOVBZ   0(R3), R6
  1036  	ADD	$1,R3
  1037  	MOVBZ   0(R4), R7
  1038  	ADD     $1,R4
  1039  	CMP     R6, R7
  1040  	BNE     noteq
  1041  	BC      8,2,simple
  1042  	BNE	noteq
  1043  	BR	equal
  1044  noteq:
  1045  	MOVD    $0, R9
  1046  	RET
  1047  equal:
  1048  	MOVD    $1, R9
  1049  	RET
  1050  
  1051  TEXT bytes·Equal(SB),NOSPLIT,$0-49
  1052  	MOVD	a_len+8(FP), R4
  1053  	MOVD	b_len+32(FP), R5
  1054  	CMP	R5, R4		// unequal lengths are not equal
  1055  	BNE	noteq
  1056  	MOVD	a+0(FP), R3
  1057  	MOVD	b+24(FP), R4
  1058  	BL	runtime·memeqbody(SB)
  1059  
  1060  	MOVBZ	R9,ret+48(FP)
  1061  	RET
  1062  
  1063  noteq:
  1064  	MOVBZ	$0,ret+48(FP)
  1065  	RET
  1066  
  1067  equal:
  1068  	MOVD	$1,R3
  1069  	MOVBZ	R3,ret+48(FP)
  1070  	RET
  1071  
  1072  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
  1073  	MOVD	s+0(FP), R3		// R3 = byte array pointer
  1074  	MOVD	s_len+8(FP), R4		// R4 = length
  1075  	MOVBZ	c+24(FP), R5		// R5 = byte
  1076  	MOVD	$ret+32(FP), R14	// R14 = &ret
  1077  	BR	runtime·indexbytebody<>(SB)
  1078  
  1079  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
  1080  	MOVD	s+0(FP), R3	  // R3 = string
  1081  	MOVD	s_len+8(FP), R4	  // R4 = length
  1082  	MOVBZ	c+16(FP), R5	  // R5 = byte
  1083  	MOVD	$ret+24(FP), R14  // R14 = &ret
  1084  	BR	runtime·indexbytebody<>(SB)
  1085  
  1086  TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
  1087  	DCBT	(R3)		// Prepare cache line.
  1088  	MOVD	R3,R17		// Save base address for calculating the index later.
  1089  	RLDICR	$0,R3,$60,R8	// Align address to doubleword boundary in R8.
  1090  	RLDIMI	$8,R5,$48,R5	// Replicating the byte across the register.
  1091  	ADD	R4,R3,R7	// Last acceptable address in R7.
  1092  
  1093  	RLDIMI	$16,R5,$32,R5
  1094  	CMPU	R4,$32		// Check if it's a small string (<32 bytes). Those will be processed differently.
  1095  	MOVD	$-1,R9
  1096  	WORD	$0x54661EB8	// Calculate padding in R6 (rlwinm r6,r3,3,26,28).
  1097  	RLDIMI	$32,R5,$0,R5
  1098  	MOVD	R7,R10		// Save last acceptable address in R10 for later.
  1099  	ADD	$-1,R7,R7
  1100  #ifdef GOARCH_ppc64le
  1101  	SLD	R6,R9,R9	// Prepare mask for Little Endian
  1102  #else
  1103  	SRD	R6,R9,R9	// Same for Big Endian
  1104  #endif
  1105  	BLE	small_string	// Jump to the small string case if it's <32 bytes.
  1106  
  1107  	// If we are 64-byte aligned, branch to qw_align just to get the auxiliary values
  1108  	// in V0, V1 and V10, then branch to the preloop.
  1109  	ANDCC	$63,R3,R11
  1110  	BEQ	CR0,qw_align
  1111  	RLDICL	$0,R3,$61,R11
  1112  
  1113  	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1114  	CMPB	R12,R5,R3	// Check for a match.
  1115  	AND	R9,R3,R3	// Mask bytes below s_base
  1116  	RLDICL	$0,R7,$61,R6	// length-1
  1117  	RLDICR	$0,R7,$60,R7	// Last doubleword in R7
  1118  	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation
  1119  	BNE	CR7,done
  1120  	ADD	$8,R8,R8
  1121  	ADD	$-8,R4,R4
  1122  	ADD	R4,R11,R4
  1123  
  1124  	// Check for quadword alignment
  1125  	ANDCC	$15,R8,R11
  1126  	BEQ	CR0,qw_align
  1127  
  1128  	// Not aligned, so handle the next doubleword
  1129  	MOVD	0(R8),R12
  1130  	CMPB	R12,R5,R3
  1131  	CMPU	R3,$0,CR7
  1132  	BNE	CR7,done
  1133  	ADD	$8,R8,R8
  1134  	ADD	$-8,R4,R4
  1135  
  1136  	// Either quadword aligned or 64-byte at this point. We can use LVX.
  1137  qw_align:
  1138  
  1139  	// Set up auxiliary data for the vectorized algorithm.
  1140  	VSPLTISB  $0,V0		// Replicate 0 across V0
  1141  	VSPLTISB  $3,V10	// Use V10 as control for VBPERMQ
  1142  	MTVRD	  R5,V1
  1143  	LVSL	  (R0+R0),V11
  1144  	VSLB	  V11,V10,V10
  1145  	VSPLTB	  $7,V1,V1	// Replicate byte across V1
  1146  	CMPU	  R4, $64	// If len <= 64, don't use the vectorized loop
  1147  	BLE	  tail
  1148  
  1149  	// We will load 4 quardwords per iteration in the loop, so check for
  1150  	// 64-byte alignment. If 64-byte aligned, then branch to the preloop.
  1151  	ANDCC	  $63,R8,R11
  1152  	BEQ	  CR0,preloop
  1153  
  1154  	// Not 64-byte aligned. Load one quadword at a time until aligned.
  1155  	LVX	    (R8+R0),V4
  1156  	VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
  1157  	BNE	    CR6,found_qw_align
  1158  	ADD	    $16,R8,R8
  1159  	ADD	    $-16,R4,R4
  1160  
  1161  	ANDCC	    $63,R8,R11
  1162  	BEQ	    CR0,preloop
  1163  	LVX	    (R8+R0),V4
  1164  	VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
  1165  	BNE	    CR6,found_qw_align
  1166  	ADD	    $16,R8,R8
  1167  	ADD	    $-16,R4,R4
  1168  
  1169  	ANDCC	    $63,R8,R11
  1170  	BEQ	    CR0,preloop
  1171  	LVX	    (R8+R0),V4
  1172  	VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
  1173  	BNE	    CR6,found_qw_align
  1174  	ADD	    $-16,R4,R4
  1175  	ADD	    $16,R8,R8
  1176  
  1177  	// 64-byte aligned. Prepare for the main loop.
  1178  preloop:
  1179  	CMPU	R4,$64
  1180  	BLE	tail	      // If len <= 64, don't use the vectorized loop
  1181  
  1182  	// We are now aligned to a 64-byte boundary. We will load 4 quadwords
  1183  	// per loop iteration. The last doubleword is in R10, so our loop counter
  1184  	// starts at (R10-R8)/64.
  1185  	SUB	R8,R10,R6
  1186  	SRD	$6,R6,R9      // Loop counter in R9
  1187  	MOVD	R9,CTR
  1188  
  1189  	MOVD	$16,R11      // Load offsets for the vector loads
  1190  	MOVD	$32,R9
  1191  	MOVD	$48,R7
  1192  
  1193  	// Main loop we will load 64 bytes per iteration
  1194  loop:
  1195  	LVX	    (R8+R0),V2	      // Load 4 16-byte vectors
  1196  	LVX	    (R11+R8),V3
  1197  	LVX	    (R9+R8),V4
  1198  	LVX	    (R7+R8),V5
  1199  	VCMPEQUB    V1,V2,V6	      // Look for byte in each vector
  1200  	VCMPEQUB    V1,V3,V7
  1201  	VCMPEQUB    V1,V4,V8
  1202  	VCMPEQUB    V1,V5,V9
  1203  	VOR	    V6,V7,V11	      // Compress the result in a single vector
  1204  	VOR	    V8,V9,V12
  1205  	VOR	    V11,V12,V11
  1206  	VCMPEQUBCC  V0,V11,V11	      // Check for byte
  1207  	BGE	    CR6,found
  1208  	ADD	    $64,R8,R8
  1209  	BC	    16,0,loop	      // bdnz loop
  1210  
  1211  	// Handle the tailing bytes or R4 <= 64
  1212  	RLDICL	$0,R6,$58,R4
  1213  tail:
  1214  	CMPU	    R4,$0
  1215  	BEQ	    notfound
  1216  	LVX	    (R8+R0),V4
  1217  	VCMPEQUBCC  V1,V4,V6
  1218  	BNE	    CR6,found_qw_align
  1219  	ADD	    $16,R8,R8
  1220  	CMPU	    R4,$16,CR6
  1221  	BLE	    CR6,notfound
  1222  	ADD	    $-16,R4,R4
  1223  
  1224  	LVX	    (R8+R0),V4
  1225  	VCMPEQUBCC  V1,V4,V6
  1226  	BNE	    CR6,found_qw_align
  1227  	ADD	    $16,R8,R8
  1228  	CMPU	    R4,$16,CR6
  1229  	BLE	    CR6,notfound
  1230  	ADD	    $-16,R4,R4
  1231  
  1232  	LVX	    (R8+R0),V4
  1233  	VCMPEQUBCC  V1,V4,V6
  1234  	BNE	    CR6,found_qw_align
  1235  	ADD	    $16,R8,R8
  1236  	CMPU	    R4,$16,CR6
  1237  	BLE	    CR6,notfound
  1238  	ADD	    $-16,R4,R4
  1239  
  1240  	LVX	    (R8+R0),V4
  1241  	VCMPEQUBCC  V1,V4,V6
  1242  	BNE	    CR6,found_qw_align
  1243  
  1244  notfound:
  1245  	MOVD	$-1,R3
  1246  	MOVD	R3,(R14)
  1247  	RET
  1248  
  1249  found:
  1250  	// We will now compress the results into a single doubleword,
  1251  	// so it can be moved to a GPR for the final index calculation.
  1252  
  1253  	// The bytes in V6-V9 are either 0x00 or 0xFF. So, permute the
  1254  	// first bit of each byte into bits 48-63.
  1255  	VBPERMQ	  V6,V10,V6
  1256  	VBPERMQ	  V7,V10,V7
  1257  	VBPERMQ	  V8,V10,V8
  1258  	VBPERMQ	  V9,V10,V9
  1259  
  1260  	// Shift each 16-bit component into its correct position for
  1261  	// merging into a single doubleword.
  1262  #ifdef GOARCH_ppc64le
  1263  	VSLDOI	  $2,V7,V7,V7
  1264  	VSLDOI	  $4,V8,V8,V8
  1265  	VSLDOI	  $6,V9,V9,V9
  1266  #else
  1267  	VSLDOI	  $6,V6,V6,V6
  1268  	VSLDOI	  $4,V7,V7,V7
  1269  	VSLDOI	  $2,V8,V8,V8
  1270  #endif
  1271  
  1272  	// Merge V6-V9 into a single doubleword and move to a GPR.
  1273  	VOR	V6,V7,V11
  1274  	VOR	V8,V9,V4
  1275  	VOR	V4,V11,V4
  1276  	MFVRD	V4,R3
  1277  
  1278  #ifdef GOARCH_ppc64le
  1279  	ADD	  $-1,R3,R11
  1280  	ANDN	  R3,R11,R11
  1281  	POPCNTD	  R11,R11	// Count trailing zeros (Little Endian).
  1282  #else
  1283  	CNTLZD	R3,R11		// Count leading zeros (Big Endian).
  1284  #endif
  1285  	ADD	R8,R11,R3	// Calculate byte address
  1286  
  1287  return:
  1288  	SUB	R17,R3
  1289  	MOVD	R3,(R14)
  1290  	RET
  1291  
  1292  found_qw_align:
  1293  	// Use the same algorithm as above. Compress the result into
  1294  	// a single doubleword and move it to a GPR for the final
  1295  	// calculation.
  1296  	VBPERMQ	  V6,V10,V6
  1297  
  1298  #ifdef GOARCH_ppc64le
  1299  	MFVRD	  V6,R3
  1300  	ADD	  $-1,R3,R11
  1301  	ANDN	  R3,R11,R11
  1302  	POPCNTD	  R11,R11
  1303  #else
  1304  	VSLDOI	  $6,V6,V6,V6
  1305  	MFVRD	  V6,R3
  1306  	CNTLZD	  R3,R11
  1307  #endif
  1308  	ADD	  R8,R11,R3
  1309  	CMPU	  R11,R4
  1310  	BLT	  return
  1311  	BR	  notfound
  1312  
  1313  done:
  1314  	// At this point, R3 has 0xFF in the same position as the byte we are
  1315  	// looking for in the doubleword. Use that to calculate the exact index
  1316  	// of the byte.
  1317  #ifdef GOARCH_ppc64le
  1318  	ADD	$-1,R3,R11
  1319  	ANDN	R3,R11,R11
  1320  	POPCNTD	R11,R11		// Count trailing zeros (Little Endian).
  1321  #else
  1322  	CNTLZD	R3,R11		// Count leading zeros (Big Endian).
  1323  #endif
  1324  	CMPU	R8,R7		// Check if we are at the last doubleword.
  1325  	SRD	$3,R11		// Convert trailing zeros to bytes.
  1326  	ADD	R11,R8,R3
  1327  	CMPU	R11,R6,CR7	// If at the last doubleword, check the byte offset.
  1328  	BNE	return
  1329  	BLE	CR7,return
  1330  	BR	notfound
  1331  
  1332  small_string:
  1333  	// We unroll this loop for better performance.
  1334  	CMPU	R4,$0		// Check for length=0
  1335  	BEQ	notfound
  1336  
  1337  	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1338  	CMPB	R12,R5,R3	// Check for a match.
  1339  	AND	R9,R3,R3	// Mask bytes below s_base.
  1340  	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation.
  1341  	RLDICL	$0,R7,$61,R6	// length-1
  1342  	RLDICR	$0,R7,$60,R7	// Last doubleword in R7.
  1343  	CMPU	R8,R7
  1344  	BNE	CR7,done
  1345  	BEQ	notfound	// Hit length.
  1346  
  1347  	MOVDU	8(R8),R12
  1348  	CMPB	R12,R5,R3
  1349  	CMPU	R3,$0,CR6
  1350  	CMPU	R8,R7
  1351  	BNE	CR6,done
  1352  	BEQ	notfound
  1353  
  1354  	MOVDU	8(R8),R12
  1355  	CMPB	R12,R5,R3
  1356  	CMPU	R3,$0,CR6
  1357  	CMPU	R8,R7
  1358  	BNE	CR6,done
  1359  	BEQ	notfound
  1360  
  1361  	MOVDU	8(R8),R12
  1362  	CMPB	R12,R5,R3
  1363  	CMPU	R3,$0,CR6
  1364  	CMPU	R8,R7
  1365  	BNE	CR6,done
  1366  	BEQ	notfound
  1367  
  1368  	MOVDU	8(R8),R12
  1369  	CMPB	R12,R5,R3
  1370  	CMPU	R3,$0,CR6
  1371  	BNE	CR6,done
  1372  	BR	notfound
  1373  
  1374  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1375  	MOVD	s1_base+0(FP), R5
  1376  	MOVD	s2_base+16(FP), R6
  1377  	MOVD	s1_len+8(FP), R3
  1378  	CMP	R5,R6,CR7
  1379  	MOVD	s2_len+24(FP), R4
  1380  	MOVD	$ret+32(FP), R7
  1381  	CMP	R3,R4,CR6
  1382  	BEQ	CR7,equal
  1383  
  1384  notequal:
  1385  #ifdef	GOARCH_ppc64le
  1386  	BR	cmpbodyLE<>(SB)
  1387  #else
  1388  	BR      cmpbodyBE<>(SB)
  1389  #endif
  1390  
  1391  equal:
  1392  	BEQ	CR6,done
  1393  	MOVD	$1, R8
  1394  	BGT	CR6,greater
  1395  	NEG	R8
  1396  
  1397  greater:
  1398  	MOVD	R8, (R7)
  1399  	RET
  1400  
  1401  done:
  1402  	MOVD	$0, (R7)
  1403  	RET
  1404  
  1405  TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1406  	MOVD	s1+0(FP), R5
  1407  	MOVD	s2+24(FP), R6
  1408  	MOVD	s1+8(FP), R3
  1409  	CMP	R5,R6,CR7
  1410  	MOVD	s2+32(FP), R4
  1411  	MOVD	$ret+48(FP), R7
  1412  	CMP	R3,R4,CR6
  1413  	BEQ	CR7,equal
  1414  
  1415  #ifdef	GOARCH_ppc64le
  1416  	BR	cmpbodyLE<>(SB)
  1417  #else
  1418  	BR      cmpbodyBE<>(SB)
  1419  #endif
  1420  
  1421  equal:
  1422  	BEQ	CR6,done
  1423  	MOVD	$1, R8
  1424  	BGT	CR6,greater
  1425  	NEG	R8
  1426  
  1427  greater:
  1428  	MOVD	R8, (R7)
  1429  	RET
  1430  
  1431  done:
  1432  	MOVD	$0, (R7)
  1433  	RET
  1434  
  1435  TEXT runtime·return0(SB), NOSPLIT, $0
  1436  	MOVW	$0, R3
  1437  	RET
  1438  
  1439  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1440  // Must obey the gcc calling convention.
  1441  TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
  1442  	// g (R30) and R31 are callee-save in the C ABI, so save them
  1443  	MOVD	g, R4
  1444  	MOVD	R31, R5
  1445  	MOVD	LR, R6
  1446  
  1447  	BL	runtime·load_g(SB)	// clobbers g (R30), R31
  1448  	MOVD	g_m(g), R3
  1449  	MOVD	m_curg(R3), R3
  1450  	MOVD	(g_stack+stack_hi)(R3), R3
  1451  
  1452  	MOVD	R4, g
  1453  	MOVD	R5, R31
  1454  	MOVD	R6, LR
  1455  	RET
  1456  
  1457  // The top-most function running on a goroutine
  1458  // returns to goexit+PCQuantum.
  1459  //
  1460  // When dynamically linking Go, it can be returned to from a function
  1461  // implemented in a different module and so needs to reload the TOC pointer
  1462  // from the stack (although this function declares that it does not set up x-a
  1463  // frame, newproc1 does in fact allocate one for goexit and saves the TOC
  1464  // pointer in the correct place).
  1465  // goexit+_PCQuantum is halfway through the usual global entry point prologue
  1466  // that derives r2 from r12 which is a bit silly, but not harmful.
  1467  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
  1468  	MOVD	24(R1), R2
  1469  	BL	runtime·goexit1(SB)	// does not return
  1470  	// traceback from goexit1 must hit code range of goexit
  1471  	MOVD	R0, R0	// NOP
  1472  
  1473  TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
  1474  	RET
  1475  
  1476  // prepGoExitFrame saves the current TOC pointer (i.e. the TOC pointer for the
  1477  // module containing runtime) to the frame that goexit will execute in when
  1478  // the goroutine exits. It's implemented in assembly mainly because that's the
  1479  // easiest way to get access to R2.
  1480  TEXT runtime·prepGoExitFrame(SB),NOSPLIT,$0-8
  1481        MOVD    sp+0(FP), R3
  1482        MOVD    R2, 24(R3)
  1483        RET
  1484  
  1485  TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1486  	ADD	$-8, R1
  1487  	MOVD	R31, 0(R1)
  1488  	MOVD	runtime·lastmoduledatap(SB), R4
  1489  	MOVD	R3, moduledata_next(R4)
  1490  	MOVD	R3, runtime·lastmoduledatap(SB)
  1491  	MOVD	0(R1), R31
  1492  	ADD	$8, R1
  1493  	RET
  1494  
  1495  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1496  	MOVW	$1, R3
  1497  	MOVB	R3, ret+0(FP)
  1498  	RET