github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/runtime/asm_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "go_asm.h" 8 #include "go_tls.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 #include "asm_ppc64x.h" 12 13 TEXT runtime·rt0_go(SB),NOSPLIT,$0 14 // R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer 15 16 // initialize essential registers 17 BL runtime·reginit(SB) 18 19 SUB $(FIXED_FRAME+16), R1 20 MOVD R2, 24(R1) // stash the TOC pointer away again now we've created a new frame 21 MOVW R3, FIXED_FRAME+0(R1) // argc 22 MOVD R4, FIXED_FRAME+8(R1) // argv 23 24 // create istack out of the given (operating system) stack. 25 // _cgo_init may update stackguard. 26 MOVD $runtime·g0(SB), g 27 MOVD $(-64*1024), R31 28 ADD R31, R1, R3 29 MOVD R3, g_stackguard0(g) 30 MOVD R3, g_stackguard1(g) 31 MOVD R3, (g_stack+stack_lo)(g) 32 MOVD R1, (g_stack+stack_hi)(g) 33 34 // if there is a _cgo_init, call it using the gcc ABI. 35 MOVD _cgo_init(SB), R12 36 CMP R0, R12 37 BEQ nocgo 38 MOVD R12, CTR // r12 = "global function entry point" 39 MOVD R13, R5 // arg 2: TLS base pointer 40 MOVD $setg_gcc<>(SB), R4 // arg 1: setg 41 MOVD g, R3 // arg 0: G 42 // C functions expect 32 bytes of space on caller stack frame 43 // and a 16-byte aligned R1 44 MOVD R1, R14 // save current stack 45 SUB $32, R1 // reserve 32 bytes 46 RLDCR $0, R1, $~15, R1 // 16-byte align 47 BL (CTR) // may clobber R0, R3-R12 48 MOVD R14, R1 // restore stack 49 MOVD 24(R1), R2 50 XOR R0, R0 // fix R0 51 52 nocgo: 53 // update stackguard after _cgo_init 54 MOVD (g_stack+stack_lo)(g), R3 55 ADD $const__StackGuard, R3 56 MOVD R3, g_stackguard0(g) 57 MOVD R3, g_stackguard1(g) 58 59 // set the per-goroutine and per-mach "registers" 60 MOVD $runtime·m0(SB), R3 61 62 // save m->g0 = g0 63 MOVD g, m_g0(R3) 64 // save m0 to g0->m 65 MOVD R3, g_m(g) 66 67 BL runtime·check(SB) 68 69 // args are already prepared 70 BL runtime·args(SB) 71 BL runtime·osinit(SB) 72 BL runtime·schedinit(SB) 73 74 // create a new goroutine to start program 75 MOVD $runtime·mainPC(SB), R3 // entry 76 MOVDU R3, -8(R1) 77 MOVDU R0, -8(R1) 78 MOVDU R0, -8(R1) 79 MOVDU R0, -8(R1) 80 MOVDU R0, -8(R1) 81 MOVDU R0, -8(R1) 82 BL runtime·newproc(SB) 83 ADD $(16+FIXED_FRAME), R1 84 85 // start this M 86 BL runtime·mstart(SB) 87 88 MOVD R0, 0(R0) 89 RET 90 91 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) 92 GLOBL runtime·mainPC(SB),RODATA,$8 93 94 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0 95 MOVD R0, 0(R0) // TODO: TD 96 RET 97 98 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0 99 RET 100 101 TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0 102 // crosscall_ppc64 and crosscall2 need to reginit, but can't 103 // get at the 'runtime.reginit' symbol. 104 BR runtime·reginit(SB) 105 106 TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0 107 // set R0 to zero, it's expected by the toolchain 108 XOR R0, R0 109 RET 110 111 /* 112 * go-routine 113 */ 114 115 // void gosave(Gobuf*) 116 // save state in Gobuf; setjmp 117 TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8 118 MOVD buf+0(FP), R3 119 MOVD R1, gobuf_sp(R3) 120 MOVD LR, R31 121 MOVD R31, gobuf_pc(R3) 122 MOVD g, gobuf_g(R3) 123 MOVD R0, gobuf_lr(R3) 124 MOVD R0, gobuf_ret(R3) 125 // Assert ctxt is zero. See func save. 126 MOVD gobuf_ctxt(R3), R3 127 CMP R0, R3 128 BEQ 2(PC) 129 BL runtime·badctxt(SB) 130 RET 131 132 // void gogo(Gobuf*) 133 // restore state from Gobuf; longjmp 134 TEXT runtime·gogo(SB), NOSPLIT, $16-8 135 MOVD buf+0(FP), R5 136 137 // If ctxt is not nil, invoke deletion barrier before overwriting. 138 MOVD gobuf_ctxt(R5), R3 139 CMP R0, R3 140 BEQ nilctxt 141 MOVD $gobuf_ctxt(R5), R3 142 MOVD R3, FIXED_FRAME+0(R1) 143 MOVD R0, FIXED_FRAME+8(R1) 144 BL runtime·writebarrierptr_prewrite(SB) 145 MOVD buf+0(FP), R5 146 147 nilctxt: 148 MOVD gobuf_g(R5), g // make sure g is not nil 149 BL runtime·save_g(SB) 150 151 MOVD 0(g), R4 152 MOVD gobuf_sp(R5), R1 153 MOVD gobuf_lr(R5), R31 154 MOVD R31, LR 155 MOVD gobuf_ret(R5), R3 156 MOVD gobuf_ctxt(R5), R11 157 MOVD R0, gobuf_sp(R5) 158 MOVD R0, gobuf_ret(R5) 159 MOVD R0, gobuf_lr(R5) 160 MOVD R0, gobuf_ctxt(R5) 161 CMP R0, R0 // set condition codes for == test, needed by stack split 162 MOVD gobuf_pc(R5), R12 163 MOVD R12, CTR 164 BR (CTR) 165 166 // void mcall(fn func(*g)) 167 // Switch to m->g0's stack, call fn(g). 168 // Fn must never return. It should gogo(&g->sched) 169 // to keep running g. 170 TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8 171 // Save caller state in g->sched 172 MOVD R1, (g_sched+gobuf_sp)(g) 173 MOVD LR, R31 174 MOVD R31, (g_sched+gobuf_pc)(g) 175 MOVD R0, (g_sched+gobuf_lr)(g) 176 MOVD g, (g_sched+gobuf_g)(g) 177 178 // Switch to m->g0 & its stack, call fn. 179 MOVD g, R3 180 MOVD g_m(g), R8 181 MOVD m_g0(R8), g 182 BL runtime·save_g(SB) 183 CMP g, R3 184 BNE 2(PC) 185 BR runtime·badmcall(SB) 186 MOVD fn+0(FP), R11 // context 187 MOVD 0(R11), R12 // code pointer 188 MOVD R12, CTR 189 MOVD (g_sched+gobuf_sp)(g), R1 // sp = m->g0->sched.sp 190 MOVDU R3, -8(R1) 191 MOVDU R0, -8(R1) 192 MOVDU R0, -8(R1) 193 MOVDU R0, -8(R1) 194 MOVDU R0, -8(R1) 195 BL (CTR) 196 MOVD 24(R1), R2 197 BR runtime·badmcall2(SB) 198 199 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 200 // of the G stack. We need to distinguish the routine that 201 // lives at the bottom of the G stack from the one that lives 202 // at the top of the system stack because the one at the top of 203 // the system stack terminates the stack walk (see topofstack()). 204 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 205 // We have several undefs here so that 16 bytes past 206 // $runtime·systemstack_switch lies within them whether or not the 207 // instructions that derive r2 from r12 are there. 208 UNDEF 209 UNDEF 210 UNDEF 211 BL (LR) // make sure this function is not leaf 212 RET 213 214 // func systemstack(fn func()) 215 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 216 MOVD fn+0(FP), R3 // R3 = fn 217 MOVD R3, R11 // context 218 MOVD g_m(g), R4 // R4 = m 219 220 MOVD m_gsignal(R4), R5 // R5 = gsignal 221 CMP g, R5 222 BEQ noswitch 223 224 MOVD m_g0(R4), R5 // R5 = g0 225 CMP g, R5 226 BEQ noswitch 227 228 MOVD m_curg(R4), R6 229 CMP g, R6 230 BEQ switch 231 232 // Bad: g is not gsignal, not g0, not curg. What is it? 233 // Hide call from linker nosplit analysis. 234 MOVD $runtime·badsystemstack(SB), R12 235 MOVD R12, CTR 236 BL (CTR) 237 238 switch: 239 // save our state in g->sched. Pretend to 240 // be systemstack_switch if the G stack is scanned. 241 MOVD $runtime·systemstack_switch(SB), R6 242 ADD $16, R6 // get past prologue (including r2-setting instructions when they're there) 243 MOVD R6, (g_sched+gobuf_pc)(g) 244 MOVD R1, (g_sched+gobuf_sp)(g) 245 MOVD R0, (g_sched+gobuf_lr)(g) 246 MOVD g, (g_sched+gobuf_g)(g) 247 248 // switch to g0 249 MOVD R5, g 250 BL runtime·save_g(SB) 251 MOVD (g_sched+gobuf_sp)(g), R3 252 // make it look like mstart called systemstack on g0, to stop traceback 253 SUB $FIXED_FRAME, R3 254 MOVD $runtime·mstart(SB), R4 255 MOVD R4, 0(R3) 256 MOVD R3, R1 257 258 // call target function 259 MOVD 0(R11), R12 // code pointer 260 MOVD R12, CTR 261 BL (CTR) 262 263 // restore TOC pointer. It seems unlikely that we will use systemstack 264 // to call a function defined in another module, but the results of 265 // doing so would be so confusing that it's worth doing this. 266 MOVD g_m(g), R3 267 MOVD m_curg(R3), g 268 MOVD (g_sched+gobuf_sp)(g), R3 269 MOVD 24(R3), R2 270 // switch back to g 271 MOVD g_m(g), R3 272 MOVD m_curg(R3), g 273 BL runtime·save_g(SB) 274 MOVD (g_sched+gobuf_sp)(g), R1 275 MOVD R0, (g_sched+gobuf_sp)(g) 276 RET 277 278 noswitch: 279 // already on m stack, just call directly 280 MOVD 0(R11), R12 // code pointer 281 MOVD R12, CTR 282 BL (CTR) 283 MOVD 24(R1), R2 284 RET 285 286 /* 287 * support for morestack 288 */ 289 290 // Called during function prolog when more stack is needed. 291 // Caller has already loaded: 292 // R3: framesize, R4: argsize, R5: LR 293 // 294 // The traceback routines see morestack on a g0 as being 295 // the top of a stack (for example, morestack calling newstack 296 // calling the scheduler calling newm calling gc), so we must 297 // record an argument size. For that purpose, it has no arguments. 298 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 299 // Cannot grow scheduler stack (m->g0). 300 MOVD g_m(g), R7 301 MOVD m_g0(R7), R8 302 CMP g, R8 303 BNE 3(PC) 304 BL runtime·badmorestackg0(SB) 305 BL runtime·abort(SB) 306 307 // Cannot grow signal stack (m->gsignal). 308 MOVD m_gsignal(R7), R8 309 CMP g, R8 310 BNE 3(PC) 311 BL runtime·badmorestackgsignal(SB) 312 BL runtime·abort(SB) 313 314 // Called from f. 315 // Set g->sched to context in f. 316 MOVD R1, (g_sched+gobuf_sp)(g) 317 MOVD LR, R8 318 MOVD R8, (g_sched+gobuf_pc)(g) 319 MOVD R5, (g_sched+gobuf_lr)(g) 320 // newstack will fill gobuf.ctxt. 321 322 // Called from f. 323 // Set m->morebuf to f's caller. 324 MOVD R5, (m_morebuf+gobuf_pc)(R7) // f's caller's PC 325 MOVD R1, (m_morebuf+gobuf_sp)(R7) // f's caller's SP 326 MOVD g, (m_morebuf+gobuf_g)(R7) 327 328 // Call newstack on m->g0's stack. 329 MOVD m_g0(R7), g 330 BL runtime·save_g(SB) 331 MOVD (g_sched+gobuf_sp)(g), R1 332 MOVDU R0, -(FIXED_FRAME+8)(R1) // create a call frame on g0 333 MOVD R11, FIXED_FRAME+0(R1) // ctxt argument 334 BL runtime·newstack(SB) 335 336 // Not reached, but make sure the return PC from the call to newstack 337 // is still in this function, and not the beginning of the next. 338 UNDEF 339 340 TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 341 MOVD R0, R11 342 BR runtime·morestack(SB) 343 344 // reflectcall: call a function with the given argument list 345 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 346 // we don't have variable-sized frames, so we use a small number 347 // of constant-sized-frame functions to encode a few bits of size in the pc. 348 // Caution: ugly multiline assembly macros in your future! 349 350 #define DISPATCH(NAME,MAXSIZE) \ 351 MOVD $MAXSIZE, R31; \ 352 CMP R3, R31; \ 353 BGT 4(PC); \ 354 MOVD $NAME(SB), R12; \ 355 MOVD R12, CTR; \ 356 BR (CTR) 357 // Note: can't just "BR NAME(SB)" - bad inlining results. 358 359 TEXT reflect·call(SB), NOSPLIT, $0-0 360 BR ·reflectcall(SB) 361 362 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32 363 MOVWZ argsize+24(FP), R3 364 DISPATCH(runtime·call32, 32) 365 DISPATCH(runtime·call64, 64) 366 DISPATCH(runtime·call128, 128) 367 DISPATCH(runtime·call256, 256) 368 DISPATCH(runtime·call512, 512) 369 DISPATCH(runtime·call1024, 1024) 370 DISPATCH(runtime·call2048, 2048) 371 DISPATCH(runtime·call4096, 4096) 372 DISPATCH(runtime·call8192, 8192) 373 DISPATCH(runtime·call16384, 16384) 374 DISPATCH(runtime·call32768, 32768) 375 DISPATCH(runtime·call65536, 65536) 376 DISPATCH(runtime·call131072, 131072) 377 DISPATCH(runtime·call262144, 262144) 378 DISPATCH(runtime·call524288, 524288) 379 DISPATCH(runtime·call1048576, 1048576) 380 DISPATCH(runtime·call2097152, 2097152) 381 DISPATCH(runtime·call4194304, 4194304) 382 DISPATCH(runtime·call8388608, 8388608) 383 DISPATCH(runtime·call16777216, 16777216) 384 DISPATCH(runtime·call33554432, 33554432) 385 DISPATCH(runtime·call67108864, 67108864) 386 DISPATCH(runtime·call134217728, 134217728) 387 DISPATCH(runtime·call268435456, 268435456) 388 DISPATCH(runtime·call536870912, 536870912) 389 DISPATCH(runtime·call1073741824, 1073741824) 390 MOVD $runtime·badreflectcall(SB), R12 391 MOVD R12, CTR 392 BR (CTR) 393 394 #define CALLFN(NAME,MAXSIZE) \ 395 TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ 396 NO_LOCAL_POINTERS; \ 397 /* copy arguments to stack */ \ 398 MOVD arg+16(FP), R3; \ 399 MOVWZ argsize+24(FP), R4; \ 400 MOVD R1, R5; \ 401 ADD $(FIXED_FRAME-1), R5; \ 402 SUB $1, R3; \ 403 ADD R5, R4; \ 404 CMP R5, R4; \ 405 BEQ 4(PC); \ 406 MOVBZU 1(R3), R6; \ 407 MOVBZU R6, 1(R5); \ 408 BR -4(PC); \ 409 /* call function */ \ 410 MOVD f+8(FP), R11; \ 411 MOVD (R11), R12; \ 412 MOVD R12, CTR; \ 413 PCDATA $PCDATA_StackMapIndex, $0; \ 414 BL (CTR); \ 415 MOVD 24(R1), R2; \ 416 /* copy return values back */ \ 417 MOVD argtype+0(FP), R7; \ 418 MOVD arg+16(FP), R3; \ 419 MOVWZ n+24(FP), R4; \ 420 MOVWZ retoffset+28(FP), R6; \ 421 ADD $FIXED_FRAME, R1, R5; \ 422 ADD R6, R5; \ 423 ADD R6, R3; \ 424 SUB R6, R4; \ 425 BL callRet<>(SB); \ 426 RET 427 428 // callRet copies return values back at the end of call*. This is a 429 // separate function so it can allocate stack space for the arguments 430 // to reflectcallmove. It does not follow the Go ABI; it expects its 431 // arguments in registers. 432 TEXT callRet<>(SB), NOSPLIT, $32-0 433 MOVD R7, FIXED_FRAME+0(R1) 434 MOVD R3, FIXED_FRAME+8(R1) 435 MOVD R5, FIXED_FRAME+16(R1) 436 MOVD R4, FIXED_FRAME+24(R1) 437 BL runtime·reflectcallmove(SB) 438 RET 439 440 CALLFN(·call32, 32) 441 CALLFN(·call64, 64) 442 CALLFN(·call128, 128) 443 CALLFN(·call256, 256) 444 CALLFN(·call512, 512) 445 CALLFN(·call1024, 1024) 446 CALLFN(·call2048, 2048) 447 CALLFN(·call4096, 4096) 448 CALLFN(·call8192, 8192) 449 CALLFN(·call16384, 16384) 450 CALLFN(·call32768, 32768) 451 CALLFN(·call65536, 65536) 452 CALLFN(·call131072, 131072) 453 CALLFN(·call262144, 262144) 454 CALLFN(·call524288, 524288) 455 CALLFN(·call1048576, 1048576) 456 CALLFN(·call2097152, 2097152) 457 CALLFN(·call4194304, 4194304) 458 CALLFN(·call8388608, 8388608) 459 CALLFN(·call16777216, 16777216) 460 CALLFN(·call33554432, 33554432) 461 CALLFN(·call67108864, 67108864) 462 CALLFN(·call134217728, 134217728) 463 CALLFN(·call268435456, 268435456) 464 CALLFN(·call536870912, 536870912) 465 CALLFN(·call1073741824, 1073741824) 466 467 TEXT runtime·procyield(SB),NOSPLIT,$0-0 468 RET 469 470 // void jmpdefer(fv, sp); 471 // called from deferreturn. 472 // 1. grab stored LR for caller 473 // 2. sub 8 bytes to get back to either nop or toc reload before deferreturn 474 // 3. BR to fn 475 // When dynamically linking Go, it is not sufficient to rewind to the BL 476 // deferreturn -- we might be jumping between modules and so we need to reset 477 // the TOC pointer in r2. To do this, codegen inserts MOVD 24(R1), R2 *before* 478 // the BL deferreturn and jmpdefer rewinds to that. 479 TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16 480 MOVD 0(R1), R31 481 SUB $8, R31 482 MOVD R31, LR 483 484 MOVD fv+0(FP), R11 485 MOVD argp+8(FP), R1 486 SUB $FIXED_FRAME, R1 487 MOVD 0(R11), R12 488 MOVD R12, CTR 489 BR (CTR) 490 491 // Save state of caller into g->sched. Smashes R31. 492 TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0 493 MOVD LR, R31 494 MOVD R31, (g_sched+gobuf_pc)(g) 495 MOVD R1, (g_sched+gobuf_sp)(g) 496 MOVD R0, (g_sched+gobuf_lr)(g) 497 MOVD R0, (g_sched+gobuf_ret)(g) 498 // Assert ctxt is zero. See func save. 499 MOVD (g_sched+gobuf_ctxt)(g), R31 500 CMP R0, R31 501 BEQ 2(PC) 502 BL runtime·badctxt(SB) 503 RET 504 505 // func asmcgocall(fn, arg unsafe.Pointer) int32 506 // Call fn(arg) on the scheduler stack, 507 // aligned appropriately for the gcc ABI. 508 // See cgocall.go for more details. 509 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 510 MOVD fn+0(FP), R3 511 MOVD arg+8(FP), R4 512 513 MOVD R1, R7 // save original stack pointer 514 MOVD g, R5 515 516 // Figure out if we need to switch to m->g0 stack. 517 // We get called to create new OS threads too, and those 518 // come in on the m->g0 stack already. 519 MOVD g_m(g), R6 520 MOVD m_g0(R6), R6 521 CMP R6, g 522 BEQ g0 523 BL gosave<>(SB) 524 MOVD R6, g 525 BL runtime·save_g(SB) 526 MOVD (g_sched+gobuf_sp)(g), R1 527 528 // Now on a scheduling stack (a pthread-created stack). 529 g0: 530 // Save room for two of our pointers, plus 32 bytes of callee 531 // save area that lives on the caller stack. 532 SUB $48, R1 533 RLDCR $0, R1, $~15, R1 // 16-byte alignment for gcc ABI 534 MOVD R5, 40(R1) // save old g on stack 535 MOVD (g_stack+stack_hi)(R5), R5 536 SUB R7, R5 537 MOVD R5, 32(R1) // save depth in old g stack (can't just save SP, as stack might be copied during a callback) 538 MOVD R0, 0(R1) // clear back chain pointer (TODO can we give it real back trace information?) 539 // This is a "global call", so put the global entry point in r12 540 MOVD R3, R12 541 MOVD R12, CTR 542 MOVD R4, R3 // arg in r3 543 BL (CTR) 544 545 // C code can clobber R0, so set it back to 0. F27-F31 are 546 // callee save, so we don't need to recover those. 547 XOR R0, R0 548 // Restore g, stack pointer, toc pointer. 549 // R3 is errno, so don't touch it 550 MOVD 40(R1), g 551 MOVD (g_stack+stack_hi)(g), R5 552 MOVD 32(R1), R6 553 SUB R6, R5 554 MOVD 24(R5), R2 555 BL runtime·save_g(SB) 556 MOVD (g_stack+stack_hi)(g), R5 557 MOVD 32(R1), R6 558 SUB R6, R5 559 MOVD R5, R1 560 561 MOVW R3, ret+16(FP) 562 RET 563 564 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 565 // Turn the fn into a Go func (by taking its address) and call 566 // cgocallback_gofunc. 567 TEXT runtime·cgocallback(SB),NOSPLIT,$32-32 568 MOVD $fn+0(FP), R3 569 MOVD R3, FIXED_FRAME+0(R1) 570 MOVD frame+8(FP), R3 571 MOVD R3, FIXED_FRAME+8(R1) 572 MOVD framesize+16(FP), R3 573 MOVD R3, FIXED_FRAME+16(R1) 574 MOVD ctxt+24(FP), R3 575 MOVD R3, FIXED_FRAME+24(R1) 576 MOVD $runtime·cgocallback_gofunc(SB), R12 577 MOVD R12, CTR 578 BL (CTR) 579 RET 580 581 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 582 // See cgocall.go for more details. 583 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32 584 NO_LOCAL_POINTERS 585 586 // Load m and g from thread-local storage. 587 MOVB runtime·iscgo(SB), R3 588 CMP R3, $0 589 BEQ nocgo 590 BL runtime·load_g(SB) 591 nocgo: 592 593 // If g is nil, Go did not create the current thread. 594 // Call needm to obtain one for temporary use. 595 // In this case, we're running on the thread stack, so there's 596 // lots of space, but the linker doesn't know. Hide the call from 597 // the linker analysis by using an indirect call. 598 CMP g, $0 599 BEQ needm 600 601 MOVD g_m(g), R8 602 MOVD R8, savedm-8(SP) 603 BR havem 604 605 needm: 606 MOVD g, savedm-8(SP) // g is zero, so is m. 607 MOVD $runtime·needm(SB), R12 608 MOVD R12, CTR 609 BL (CTR) 610 611 // Set m->sched.sp = SP, so that if a panic happens 612 // during the function we are about to execute, it will 613 // have a valid SP to run on the g0 stack. 614 // The next few lines (after the havem label) 615 // will save this SP onto the stack and then write 616 // the same SP back to m->sched.sp. That seems redundant, 617 // but if an unrecovered panic happens, unwindm will 618 // restore the g->sched.sp from the stack location 619 // and then systemstack will try to use it. If we don't set it here, 620 // that restored SP will be uninitialized (typically 0) and 621 // will not be usable. 622 MOVD g_m(g), R8 623 MOVD m_g0(R8), R3 624 MOVD R1, (g_sched+gobuf_sp)(R3) 625 626 havem: 627 // Now there's a valid m, and we're running on its m->g0. 628 // Save current m->g0->sched.sp on stack and then set it to SP. 629 // Save current sp in m->g0->sched.sp in preparation for 630 // switch back to m->curg stack. 631 // NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP). 632 MOVD m_g0(R8), R3 633 MOVD (g_sched+gobuf_sp)(R3), R4 634 MOVD R4, savedsp-16(SP) 635 MOVD R1, (g_sched+gobuf_sp)(R3) 636 637 // Switch to m->curg stack and call runtime.cgocallbackg. 638 // Because we are taking over the execution of m->curg 639 // but *not* resuming what had been running, we need to 640 // save that information (m->curg->sched) so we can restore it. 641 // We can restore m->curg->sched.sp easily, because calling 642 // runtime.cgocallbackg leaves SP unchanged upon return. 643 // To save m->curg->sched.pc, we push it onto the stack. 644 // This has the added benefit that it looks to the traceback 645 // routine like cgocallbackg is going to return to that 646 // PC (because the frame we allocate below has the same 647 // size as cgocallback_gofunc's frame declared above) 648 // so that the traceback will seamlessly trace back into 649 // the earlier calls. 650 // 651 // In the new goroutine, -8(SP) is unused (where SP refers to 652 // m->curg's SP while we're setting it up, before we've adjusted it). 653 MOVD m_curg(R8), g 654 BL runtime·save_g(SB) 655 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 656 MOVD (g_sched+gobuf_pc)(g), R5 657 MOVD R5, -(FIXED_FRAME+16)(R4) 658 MOVD ctxt+24(FP), R3 659 MOVD R3, -16(R4) 660 MOVD $-(FIXED_FRAME+16)(R4), R1 661 BL runtime·cgocallbackg(SB) 662 663 // Restore g->sched (== m->curg->sched) from saved values. 664 MOVD 0(R1), R5 665 MOVD R5, (g_sched+gobuf_pc)(g) 666 MOVD $(FIXED_FRAME+16)(R1), R4 667 MOVD R4, (g_sched+gobuf_sp)(g) 668 669 // Switch back to m->g0's stack and restore m->g0->sched.sp. 670 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 671 // so we do not have to restore it.) 672 MOVD g_m(g), R8 673 MOVD m_g0(R8), g 674 BL runtime·save_g(SB) 675 MOVD (g_sched+gobuf_sp)(g), R1 676 MOVD savedsp-16(SP), R4 677 MOVD R4, (g_sched+gobuf_sp)(g) 678 679 // If the m on entry was nil, we called needm above to borrow an m 680 // for the duration of the call. Since the call is over, return it with dropm. 681 MOVD savedm-8(SP), R6 682 CMP R6, $0 683 BNE droppedm 684 MOVD $runtime·dropm(SB), R12 685 MOVD R12, CTR 686 BL (CTR) 687 droppedm: 688 689 // Done! 690 RET 691 692 // void setg(G*); set g. for use by needm. 693 TEXT runtime·setg(SB), NOSPLIT, $0-8 694 MOVD gg+0(FP), g 695 // This only happens if iscgo, so jump straight to save_g 696 BL runtime·save_g(SB) 697 RET 698 699 // void setg_gcc(G*); set g in C TLS. 700 // Must obey the gcc calling convention. 701 TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0 702 // The standard prologue clobbers R31, which is callee-save in 703 // the C ABI, so we have to use $-8-0 and save LR ourselves. 704 MOVD LR, R4 705 // Also save g and R31, since they're callee-save in C ABI 706 MOVD R31, R5 707 MOVD g, R6 708 709 MOVD R3, g 710 BL runtime·save_g(SB) 711 712 MOVD R6, g 713 MOVD R5, R31 714 MOVD R4, LR 715 RET 716 717 TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16 718 MOVD FIXED_FRAME+8(R1), R3 // LR saved by caller 719 MOVD R3, ret+8(FP) 720 RET 721 722 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0 723 MOVW (R0), R0 724 UNDEF 725 726 #define TBRL 268 727 #define TBRU 269 /* Time base Upper/Lower */ 728 729 // int64 runtime·cputicks(void) 730 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 731 MOVW SPR(TBRU), R4 732 MOVW SPR(TBRL), R3 733 MOVW SPR(TBRU), R5 734 CMPW R4, R5 735 BNE -4(PC) 736 SLD $32, R5 737 OR R5, R3 738 MOVD R3, ret+0(FP) 739 RET 740 741 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 742 // redirects to memhash(p, h, size) using the size 743 // stored in the closure. 744 TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24 745 GO_ARGS 746 NO_LOCAL_POINTERS 747 MOVD p+0(FP), R3 748 MOVD h+8(FP), R4 749 MOVD 8(R11), R5 750 MOVD R3, FIXED_FRAME+0(R1) 751 MOVD R4, FIXED_FRAME+8(R1) 752 MOVD R5, FIXED_FRAME+16(R1) 753 BL runtime·memhash(SB) 754 MOVD FIXED_FRAME+24(R1), R3 755 MOVD R3, ret+16(FP) 756 RET 757 758 // AES hashing not implemented for ppc64 759 TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0 760 MOVW (R0), R1 761 TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0 762 MOVW (R0), R1 763 TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0 764 MOVW (R0), R1 765 TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0 766 MOVW (R0), R1 767 768 TEXT runtime·memequal(SB),NOSPLIT,$0-25 769 MOVD a+0(FP), R3 770 MOVD b+8(FP), R4 771 MOVD size+16(FP), R5 772 773 BL runtime·memeqbody(SB) 774 MOVB R9, ret+24(FP) 775 RET 776 777 // memequal_varlen(a, b unsafe.Pointer) bool 778 TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 779 MOVD a+0(FP), R3 780 MOVD b+8(FP), R4 781 CMP R3, R4 782 BEQ eq 783 MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure 784 BL runtime·memeqbody(SB) 785 MOVB R9, ret+16(FP) 786 RET 787 eq: 788 MOVD $1, R3 789 MOVB R3, ret+16(FP) 790 RET 791 792 // Do an efficient memcmp for ppc64le 793 // R3 = s1 len 794 // R4 = s2 len 795 // R5 = s1 addr 796 // R6 = s2 addr 797 // R7 = addr of return value 798 TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0 799 MOVD R3,R8 // set up length 800 CMP R3,R4,CR2 // unequal? 801 BC 12,8,setuplen // BLT CR2 802 MOVD R4,R8 // use R4 for comparison len 803 setuplen: 804 MOVD R8,CTR // set up loop counter 805 CMP R8,$8 // only optimize >=8 806 BLT simplecheck 807 DCBT (R5) // cache hint 808 DCBT (R6) 809 CMP R8,$32 // optimize >= 32 810 MOVD R8,R9 811 BLT setup8a // 8 byte moves only 812 setup32a: 813 SRADCC $5,R8,R9 // number of 32 byte chunks 814 MOVD R9,CTR 815 816 // Special processing for 32 bytes or longer. 817 // Loading this way is faster and correct as long as the 818 // doublewords being compared are equal. Once they 819 // are found unequal, reload them in proper byte order 820 // to determine greater or less than. 821 loop32a: 822 MOVD 0(R5),R9 // doublewords to compare 823 MOVD 0(R6),R10 // get 4 doublewords 824 MOVD 8(R5),R14 825 MOVD 8(R6),R15 826 CMPU R9,R10 // bytes equal? 827 MOVD $0,R16 // set up for cmpne 828 BNE cmpne // further compare for LT or GT 829 MOVD 16(R5),R9 // get next pair of doublewords 830 MOVD 16(R6),R10 831 CMPU R14,R15 // bytes match? 832 MOVD $8,R16 // set up for cmpne 833 BNE cmpne // further compare for LT or GT 834 MOVD 24(R5),R14 // get next pair of doublewords 835 MOVD 24(R6),R15 836 CMPU R9,R10 // bytes match? 837 MOVD $16,R16 // set up for cmpne 838 BNE cmpne // further compare for LT or GT 839 MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32 840 ADD $32,R5 // bump up to next 32 841 ADD $32,R6 842 CMPU R14,R15 // bytes match? 843 BC 8,2,loop32a // br ctr and cr 844 BNE cmpne 845 ANDCC $24,R8,R9 // Any 8 byte chunks? 846 BEQ leftover // and result is 0 847 setup8a: 848 SRADCC $3,R9,R9 // get the 8 byte count 849 BEQ leftover // shifted value is 0 850 MOVD R9,CTR // loop count for doublewords 851 loop8: 852 MOVDBR (R5+R0),R9 // doublewords to compare 853 MOVDBR (R6+R0),R10 // LE compare order 854 ADD $8,R5 855 ADD $8,R6 856 CMPU R9,R10 // match? 857 BC 8,2,loop8 // bt ctr <> 0 && cr 858 BGT greater 859 BLT less 860 leftover: 861 ANDCC $7,R8,R9 // check for leftover bytes 862 MOVD R9,CTR // save the ctr 863 BNE simple // leftover bytes 864 BC 12,10,equal // test CR2 for length comparison 865 BC 12,8,less 866 BR greater 867 simplecheck: 868 CMP R8,$0 // remaining compare length 0 869 BNE simple // do simple compare 870 BC 12,10,equal // test CR2 for length comparison 871 BC 12,8,less // 1st len < 2nd len, result less 872 BR greater // 1st len > 2nd len must be greater 873 simple: 874 MOVBZ 0(R5), R9 // get byte from 1st operand 875 ADD $1,R5 876 MOVBZ 0(R6), R10 // get byte from 2nd operand 877 ADD $1,R6 878 CMPU R9, R10 879 BC 8,2,simple // bc ctr <> 0 && cr 880 BGT greater // 1st > 2nd 881 BLT less // 1st < 2nd 882 BC 12,10,equal // test CR2 for length comparison 883 BC 12,9,greater // 2nd len > 1st len 884 BR less // must be less 885 cmpne: // only here is not equal 886 MOVDBR (R5+R16),R8 // reload in reverse order 887 MOVDBR (R6+R16),R9 888 CMPU R8,R9 // compare correct endianness 889 BGT greater // here only if NE 890 less: 891 MOVD $-1,R3 892 MOVD R3,(R7) // return value if A < B 893 RET 894 equal: 895 MOVD $0,(R7) // return value if A == B 896 RET 897 greater: 898 MOVD $1,R3 899 MOVD R3,(R7) // return value if A > B 900 RET 901 902 // Do an efficient memcmp for ppc64 (BE) 903 // R3 = s1 len 904 // R4 = s2 len 905 // R5 = s1 addr 906 // R6 = s2 addr 907 // R7 = addr of return value 908 TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0 909 MOVD R3,R8 // set up length 910 CMP R3,R4,CR2 // unequal? 911 BC 12,8,setuplen // BLT CR2 912 MOVD R4,R8 // use R4 for comparison len 913 setuplen: 914 MOVD R8,CTR // set up loop counter 915 CMP R8,$8 // only optimize >=8 916 BLT simplecheck 917 DCBT (R5) // cache hint 918 DCBT (R6) 919 CMP R8,$32 // optimize >= 32 920 MOVD R8,R9 921 BLT setup8a // 8 byte moves only 922 923 setup32a: 924 SRADCC $5,R8,R9 // number of 32 byte chunks 925 MOVD R9,CTR 926 loop32a: 927 MOVD 0(R5),R9 // doublewords to compare 928 MOVD 0(R6),R10 // get 4 doublewords 929 MOVD 8(R5),R14 930 MOVD 8(R6),R15 931 CMPU R9,R10 // bytes equal? 932 BLT less // found to be less 933 BGT greater // found to be greater 934 MOVD 16(R5),R9 // get next pair of doublewords 935 MOVD 16(R6),R10 936 CMPU R14,R15 // bytes match? 937 BLT less // found less 938 BGT greater // found greater 939 MOVD 24(R5),R14 // get next pair of doublewords 940 MOVD 24(R6),R15 941 CMPU R9,R10 // bytes match? 942 BLT less // found to be less 943 BGT greater // found to be greater 944 ADD $32,R5 // bump up to next 32 945 ADD $32,R6 946 CMPU R14,R15 // bytes match? 947 BC 8,2,loop32a // br ctr and cr 948 BLT less // with BE, byte ordering is 949 BGT greater // good for compare 950 ANDCC $24,R8,R9 // Any 8 byte chunks? 951 BEQ leftover // and result is 0 952 setup8a: 953 SRADCC $3,R9,R9 // get the 8 byte count 954 BEQ leftover // shifted value is 0 955 MOVD R9,CTR // loop count for doublewords 956 loop8: 957 MOVD (R5),R9 958 MOVD (R6),R10 959 ADD $8,R5 960 ADD $8,R6 961 CMPU R9,R10 // match? 962 BC 8,2,loop8 // bt ctr <> 0 && cr 963 BGT greater 964 BLT less 965 leftover: 966 ANDCC $7,R8,R9 // check for leftover bytes 967 MOVD R9,CTR // save the ctr 968 BNE simple // leftover bytes 969 BC 12,10,equal // test CR2 for length comparison 970 BC 12,8,less 971 BR greater 972 simplecheck: 973 CMP R8,$0 // remaining compare length 0 974 BNE simple // do simple compare 975 BC 12,10,equal // test CR2 for length comparison 976 BC 12,8,less // 1st len < 2nd len, result less 977 BR greater // same len, must be equal 978 simple: 979 MOVBZ 0(R5),R9 // get byte from 1st operand 980 ADD $1,R5 981 MOVBZ 0(R6),R10 // get byte from 2nd operand 982 ADD $1,R6 983 CMPU R9,R10 984 BC 8,2,simple // bc ctr <> 0 && cr 985 BGT greater // 1st > 2nd 986 BLT less // 1st < 2nd 987 BC 12,10,equal // test CR2 for length comparison 988 BC 12,9,greater // 2nd len > 1st len 989 less: 990 MOVD $-1,R3 991 MOVD R3,(R7) // return value if A < B 992 RET 993 equal: 994 MOVD $0,(R7) // return value if A == B 995 RET 996 greater: 997 MOVD $1,R3 998 MOVD R3,(R7) // return value if A > B 999 RET 1000 1001 // Do an efficient memequal for ppc64 1002 // R3 = s1 1003 // R4 = s2 1004 // R5 = len 1005 // R9 = return value 1006 TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0 1007 MOVD R5,CTR 1008 CMP R5,$8 // only optimize >=8 1009 BLT simplecheck 1010 DCBT (R3) // cache hint 1011 DCBT (R4) 1012 CMP R5,$32 // optimize >= 32 1013 MOVD R5,R6 // needed if setup8a branch 1014 BLT setup8a // 8 byte moves only 1015 setup32a: // 8 byte aligned, >= 32 bytes 1016 SRADCC $5,R5,R6 // number of 32 byte chunks to compare 1017 MOVD R6,CTR 1018 loop32a: 1019 MOVD 0(R3),R6 // doublewords to compare 1020 MOVD 0(R4),R7 1021 MOVD 8(R3),R8 // 1022 MOVD 8(R4),R9 1023 CMP R6,R7 // bytes batch? 1024 BNE noteq 1025 MOVD 16(R3),R6 1026 MOVD 16(R4),R7 1027 CMP R8,R9 // bytes match? 1028 MOVD 24(R3),R8 1029 MOVD 24(R4),R9 1030 BNE noteq 1031 CMP R6,R7 // bytes match? 1032 BNE noteq 1033 ADD $32,R3 // bump up to next 32 1034 ADD $32,R4 1035 CMP R8,R9 // bytes match? 1036 BC 8,2,loop32a // br ctr and cr 1037 BNE noteq 1038 ANDCC $24,R5,R6 // Any 8 byte chunks? 1039 BEQ leftover // and result is 0 1040 setup8a: 1041 SRADCC $3,R6,R6 // get the 8 byte count 1042 BEQ leftover // shifted value is 0 1043 MOVD R6,CTR 1044 loop8: 1045 MOVD 0(R3),R6 // doublewords to compare 1046 ADD $8,R3 1047 MOVD 0(R4),R7 1048 ADD $8,R4 1049 CMP R6,R7 // match? 1050 BC 8,2,loop8 // bt ctr <> 0 && cr 1051 BNE noteq 1052 leftover: 1053 ANDCC $7,R5,R6 // check for leftover bytes 1054 BEQ equal 1055 MOVD R6,CTR 1056 BR simple 1057 simplecheck: 1058 CMP R5,$0 1059 BEQ equal 1060 simple: 1061 MOVBZ 0(R3), R6 1062 ADD $1,R3 1063 MOVBZ 0(R4), R7 1064 ADD $1,R4 1065 CMP R6, R7 1066 BNE noteq 1067 BC 8,2,simple 1068 BNE noteq 1069 BR equal 1070 noteq: 1071 MOVD $0, R9 1072 RET 1073 equal: 1074 MOVD $1, R9 1075 RET 1076 1077 // eqstring tests whether two strings are equal. 1078 // The compiler guarantees that strings passed 1079 // to eqstring have equal length. 1080 // See runtime_test.go:eqstring_generic for 1081 // equivalent Go code. 1082 TEXT runtime·eqstring(SB),NOSPLIT,$0-33 1083 MOVD s1_base+0(FP), R3 1084 MOVD s2_base+16(FP), R4 1085 MOVD $1, R5 1086 MOVB R5, ret+32(FP) 1087 CMP R3, R4 1088 BNE 2(PC) 1089 RET 1090 MOVD s1_len+8(FP), R5 1091 BL runtime·memeqbody(SB) 1092 MOVB R9, ret+32(FP) 1093 RET 1094 1095 TEXT bytes·Equal(SB),NOSPLIT,$0-49 1096 MOVD a_len+8(FP), R4 1097 MOVD b_len+32(FP), R5 1098 CMP R5, R4 // unequal lengths are not equal 1099 BNE noteq 1100 MOVD a+0(FP), R3 1101 MOVD b+24(FP), R4 1102 BL runtime·memeqbody(SB) 1103 1104 MOVBZ R9,ret+48(FP) 1105 RET 1106 1107 noteq: 1108 MOVBZ $0,ret+48(FP) 1109 RET 1110 1111 equal: 1112 MOVD $1,R3 1113 MOVBZ R3,ret+48(FP) 1114 RET 1115 1116 TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 1117 MOVD s+0(FP), R3 // R3 = byte array pointer 1118 MOVD s_len+8(FP), R4 // R4 = length 1119 MOVBZ c+24(FP), R5 // R5 = byte 1120 MOVD $ret+32(FP), R14 // R14 = &ret 1121 BR runtime·indexbytebody<>(SB) 1122 1123 TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32 1124 MOVD s+0(FP), R3 // R3 = string 1125 MOVD s_len+8(FP), R4 // R4 = length 1126 MOVBZ c+16(FP), R5 // R5 = byte 1127 MOVD $ret+24(FP), R14 // R14 = &ret 1128 BR runtime·indexbytebody<>(SB) 1129 1130 TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0 1131 DCBT (R3) // Prepare cache line. 1132 MOVD R3,R10 // Save base address for calculating the index later. 1133 RLDICR $0,R3,$60,R8 // Align address to doubleword boundary in R8. 1134 RLDIMI $8,R5,$48,R5 // Replicating the byte across the register. 1135 1136 // Calculate last acceptable address and check for possible overflow 1137 // using a saturated add. 1138 // Overflows set last acceptable address to 0xffffffffffffffff. 1139 ADD R4,R3,R7 1140 SUBC R3,R7,R6 1141 SUBE R0,R0,R9 1142 MOVW R9,R6 1143 OR R6,R7,R7 1144 1145 RLDIMI $16,R5,$32,R5 1146 CMPU R4,$32 // Check if it's a small string (<32 bytes). Those will be processed differently. 1147 MOVD $-1,R9 1148 WORD $0x54661EB8 // Calculate padding in R6 (rlwinm r6,r3,3,26,28). 1149 RLDIMI $32,R5,$0,R5 1150 ADD $-1,R7,R7 1151 #ifdef GOARCH_ppc64le 1152 SLD R6,R9,R9 // Prepare mask for Little Endian 1153 #else 1154 SRD R6,R9,R9 // Same for Big Endian 1155 #endif 1156 BLE small_string // Jump to the small string case if it's <32 bytes. 1157 1158 // Case for length >32 bytes 1159 MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8. 1160 CMPB R12,R5,R3 // Check for a match. 1161 AND R9,R3,R3 // Mask bytes below s_base 1162 RLDICL $0,R7,$61,R4 // length-1 1163 RLDICR $0,R7,$60,R7 // Last doubleword in R7 1164 CMPU R3,$0,CR7 // If we have a match, jump to the final computation 1165 BNE CR7,done 1166 1167 // Check for doubleword alignment and jump to the loop setup if aligned. 1168 MOVFL R8,CR7 1169 BC 12,28,loop_setup 1170 1171 // Not aligned, so handle the second doubleword 1172 MOVDU 8(R8),R12 1173 CMPB R12,R5,R3 1174 CMPU R3,$0,CR7 1175 BNE CR7,done 1176 1177 loop_setup: 1178 // We are now aligned to a 16-byte boundary. We will load two doublewords 1179 // per loop iteration. The last doubleword is in R7, so our loop counter 1180 // starts at (R7-R8)/16. 1181 SUB R8,R7,R6 1182 SRD $4,R6,R6 1183 MOVD R6,CTR 1184 1185 // Note: when we have an align directive, align this loop to 32 bytes so 1186 // it fits in a single icache sector. 1187 loop: 1188 // Load two doublewords, then compare and merge in a single register. We 1189 // will check two doublewords per iteration, then find out which of them 1190 // contains the byte later. This speeds up the search. 1191 MOVD 8(R8),R12 1192 MOVDU 16(R8),R11 1193 CMPB R12,R5,R3 1194 CMPB R11,R5,R9 1195 OR R3,R9,R6 1196 CMPU R6,$0,CR7 1197 BNE CR7,found 1198 BC 16,0,loop 1199 1200 // Counter zeroed, but we may have another doubleword to read 1201 CMPU R8,R7 1202 BEQ notfound 1203 1204 MOVDU 8(R8),R12 1205 CMPB R12,R5,R3 1206 CMPU R3,$0,CR6 1207 BNE CR6,done 1208 1209 notfound: 1210 MOVD $-1,R3 1211 MOVD R3,(R14) 1212 RET 1213 1214 found: 1215 // One of the doublewords from the loop contains the byte we are looking 1216 // for. Check the first doubleword and adjust the address if found. 1217 CMPU R3,$0,CR6 1218 ADD $-8,R8,R8 1219 BNE CR6,done 1220 1221 // Not found, so it must be in the second doubleword of the merged pair. 1222 MOVD R9,R3 1223 ADD $8,R8,R8 1224 1225 done: 1226 // At this point, R3 has 0xFF in the same position as the byte we are 1227 // looking for in the doubleword. Use that to calculate the exact index 1228 // of the byte. 1229 #ifdef GOARCH_ppc64le 1230 ADD $-1,R3,R11 1231 ANDN R3,R11,R11 1232 POPCNTD R11,R11 // Count trailing zeros (Little Endian). 1233 #else 1234 CNTLZD R3,R11 // Count leading zeros (Big Endian). 1235 #endif 1236 CMPU R8,R7 // Check if we are at the last doubleword. 1237 SRD $3,R11 // Convert trailing zeros to bytes. 1238 ADD R11,R8,R3 1239 CMPU R11,R4,CR7 // If at the last doubleword, check the byte offset. 1240 BNE return 1241 BLE CR7,return 1242 MOVD $-1,R3 1243 MOVD R3,(R14) 1244 RET 1245 1246 return: 1247 SUB R10,R3 // Calculate index. 1248 MOVD R3,(R14) 1249 RET 1250 1251 small_string: 1252 // We unroll this loop for better performance. 1253 CMPU R4,$0 // Check for length=0 1254 BEQ notfound 1255 1256 MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8. 1257 CMPB R12,R5,R3 // Check for a match. 1258 AND R9,R3,R3 // Mask bytes below s_base. 1259 CMPU R3,$0,CR7 // If we have a match, jump to the final computation. 1260 RLDICL $0,R7,$61,R4 // length-1 1261 RLDICR $0,R7,$60,R7 // Last doubleword in R7. 1262 CMPU R8,R7 1263 BNE CR7,done 1264 BEQ notfound // Hit length. 1265 1266 MOVDU 8(R8),R12 1267 CMPB R12,R5,R3 1268 CMPU R3,$0,CR6 1269 CMPU R8,R7 1270 BNE CR6,done 1271 BEQ notfound 1272 1273 MOVDU 8(R8),R12 1274 CMPB R12,R5,R3 1275 CMPU R3,$0,CR6 1276 CMPU R8,R7 1277 BNE CR6,done 1278 BEQ notfound 1279 1280 MOVDU 8(R8),R12 1281 CMPB R12,R5,R3 1282 CMPU R3,$0,CR6 1283 CMPU R8,R7 1284 BNE CR6,done 1285 BEQ notfound 1286 1287 MOVDU 8(R8),R12 1288 CMPB R12,R5,R3 1289 CMPU R3,$0,CR6 1290 CMPU R8,R7 1291 BNE CR6,done 1292 BR notfound 1293 1294 TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 1295 MOVD s1_base+0(FP), R5 1296 MOVD s1_len+8(FP), R3 1297 MOVD s2_base+16(FP), R6 1298 MOVD s2_len+24(FP), R4 1299 MOVD $ret+32(FP), R7 1300 #ifdef GOARCH_ppc64le 1301 BR cmpbodyLE<>(SB) 1302 #else 1303 BR cmpbodyBE<>(SB) 1304 #endif 1305 1306 TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56 1307 MOVD s1+0(FP), R5 1308 MOVD s1+8(FP), R3 1309 MOVD s2+24(FP), R6 1310 MOVD s2+32(FP), R4 1311 MOVD $ret+48(FP), R7 1312 #ifdef GOARCH_ppc64le 1313 BR cmpbodyLE<>(SB) 1314 #else 1315 BR cmpbodyBE<>(SB) 1316 #endif 1317 1318 TEXT runtime·return0(SB), NOSPLIT, $0 1319 MOVW $0, R3 1320 RET 1321 1322 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1323 // Must obey the gcc calling convention. 1324 TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0 1325 // g (R30) and R31 are callee-save in the C ABI, so save them 1326 MOVD g, R4 1327 MOVD R31, R5 1328 MOVD LR, R6 1329 1330 BL runtime·load_g(SB) // clobbers g (R30), R31 1331 MOVD g_m(g), R3 1332 MOVD m_curg(R3), R3 1333 MOVD (g_stack+stack_hi)(R3), R3 1334 1335 MOVD R4, g 1336 MOVD R5, R31 1337 MOVD R6, LR 1338 RET 1339 1340 // The top-most function running on a goroutine 1341 // returns to goexit+PCQuantum. 1342 // 1343 // When dynamically linking Go, it can be returned to from a function 1344 // implemented in a different module and so needs to reload the TOC pointer 1345 // from the stack (although this function declares that it does not set up x-a 1346 // frame, newproc1 does in fact allocate one for goexit and saves the TOC 1347 // pointer in the correct place). 1348 // goexit+_PCQuantum is halfway through the usual global entry point prologue 1349 // that derives r2 from r12 which is a bit silly, but not harmful. 1350 TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0 1351 MOVD 24(R1), R2 1352 BL runtime·goexit1(SB) // does not return 1353 // traceback from goexit1 must hit code range of goexit 1354 MOVD R0, R0 // NOP 1355 1356 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 1357 RET 1358 1359 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 1360 RET 1361 1362 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 1363 RET 1364 1365 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 1366 RET 1367 1368 TEXT runtime·sigreturn(SB),NOSPLIT,$0-0 1369 RET 1370 1371 // prepGoExitFrame saves the current TOC pointer (i.e. the TOC pointer for the 1372 // module containing runtime) to the frame that goexit will execute in when 1373 // the goroutine exits. It's implemented in assembly mainly because that's the 1374 // easiest way to get access to R2. 1375 TEXT runtime·prepGoExitFrame(SB),NOSPLIT,$0-8 1376 MOVD sp+0(FP), R3 1377 MOVD R2, 24(R3) 1378 RET 1379 1380 TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0 1381 ADD $-8, R1 1382 MOVD R31, 0(R1) 1383 MOVD runtime·lastmoduledatap(SB), R4 1384 MOVD R3, moduledata_next(R4) 1385 MOVD R3, runtime·lastmoduledatap(SB) 1386 MOVD 0(R1), R31 1387 ADD $8, R1 1388 RET 1389 1390 TEXT ·checkASM(SB),NOSPLIT,$0-1 1391 MOVW $1, R3 1392 MOVB R3, ret+0(FP) 1393 RET