github.com/stingnevermore/go@v0.0.0-20180120041312-3810f5bfed72/src/runtime/asm_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "go_asm.h" 8 #include "go_tls.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 #include "asm_ppc64x.h" 12 13 TEXT runtime·rt0_go(SB),NOSPLIT,$0 14 // R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer 15 16 // initialize essential registers 17 BL runtime·reginit(SB) 18 19 SUB $(FIXED_FRAME+16), R1 20 MOVD R2, 24(R1) // stash the TOC pointer away again now we've created a new frame 21 MOVW R3, FIXED_FRAME+0(R1) // argc 22 MOVD R4, FIXED_FRAME+8(R1) // argv 23 24 // create istack out of the given (operating system) stack. 25 // _cgo_init may update stackguard. 26 MOVD $runtime·g0(SB), g 27 MOVD $(-64*1024), R31 28 ADD R31, R1, R3 29 MOVD R3, g_stackguard0(g) 30 MOVD R3, g_stackguard1(g) 31 MOVD R3, (g_stack+stack_lo)(g) 32 MOVD R1, (g_stack+stack_hi)(g) 33 34 // if there is a _cgo_init, call it using the gcc ABI. 35 MOVD _cgo_init(SB), R12 36 CMP R0, R12 37 BEQ nocgo 38 MOVD R12, CTR // r12 = "global function entry point" 39 MOVD R13, R5 // arg 2: TLS base pointer 40 MOVD $setg_gcc<>(SB), R4 // arg 1: setg 41 MOVD g, R3 // arg 0: G 42 // C functions expect 32 bytes of space on caller stack frame 43 // and a 16-byte aligned R1 44 MOVD R1, R14 // save current stack 45 SUB $32, R1 // reserve 32 bytes 46 RLDCR $0, R1, $~15, R1 // 16-byte align 47 BL (CTR) // may clobber R0, R3-R12 48 MOVD R14, R1 // restore stack 49 MOVD 24(R1), R2 50 XOR R0, R0 // fix R0 51 52 nocgo: 53 // update stackguard after _cgo_init 54 MOVD (g_stack+stack_lo)(g), R3 55 ADD $const__StackGuard, R3 56 MOVD R3, g_stackguard0(g) 57 MOVD R3, g_stackguard1(g) 58 59 // set the per-goroutine and per-mach "registers" 60 MOVD $runtime·m0(SB), R3 61 62 // save m->g0 = g0 63 MOVD g, m_g0(R3) 64 // save m0 to g0->m 65 MOVD R3, g_m(g) 66 67 BL runtime·check(SB) 68 69 // args are already prepared 70 BL runtime·args(SB) 71 BL runtime·osinit(SB) 72 BL runtime·schedinit(SB) 73 74 // create a new goroutine to start program 75 MOVD $runtime·mainPC(SB), R3 // entry 76 MOVDU R3, -8(R1) 77 MOVDU R0, -8(R1) 78 MOVDU R0, -8(R1) 79 MOVDU R0, -8(R1) 80 MOVDU R0, -8(R1) 81 MOVDU R0, -8(R1) 82 BL runtime·newproc(SB) 83 ADD $(16+FIXED_FRAME), R1 84 85 // start this M 86 BL runtime·mstart(SB) 87 88 MOVD R0, 0(R0) 89 RET 90 91 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) 92 GLOBL runtime·mainPC(SB),RODATA,$8 93 94 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0 95 MOVD R0, 0(R0) // TODO: TD 96 RET 97 98 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0 99 RET 100 101 TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0 102 // crosscall_ppc64 and crosscall2 need to reginit, but can't 103 // get at the 'runtime.reginit' symbol. 104 BR runtime·reginit(SB) 105 106 TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0 107 // set R0 to zero, it's expected by the toolchain 108 XOR R0, R0 109 RET 110 111 /* 112 * go-routine 113 */ 114 115 // void gosave(Gobuf*) 116 // save state in Gobuf; setjmp 117 TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8 118 MOVD buf+0(FP), R3 119 MOVD R1, gobuf_sp(R3) 120 MOVD LR, R31 121 MOVD R31, gobuf_pc(R3) 122 MOVD g, gobuf_g(R3) 123 MOVD R0, gobuf_lr(R3) 124 MOVD R0, gobuf_ret(R3) 125 // Assert ctxt is zero. See func save. 126 MOVD gobuf_ctxt(R3), R3 127 CMP R0, R3 128 BEQ 2(PC) 129 BL runtime·badctxt(SB) 130 RET 131 132 // void gogo(Gobuf*) 133 // restore state from Gobuf; longjmp 134 TEXT runtime·gogo(SB), NOSPLIT, $16-8 135 MOVD buf+0(FP), R5 136 MOVD gobuf_g(R5), g // make sure g is not nil 137 BL runtime·save_g(SB) 138 139 MOVD 0(g), R4 140 MOVD gobuf_sp(R5), R1 141 MOVD gobuf_lr(R5), R31 142 MOVD R31, LR 143 MOVD gobuf_ret(R5), R3 144 MOVD gobuf_ctxt(R5), R11 145 MOVD R0, gobuf_sp(R5) 146 MOVD R0, gobuf_ret(R5) 147 MOVD R0, gobuf_lr(R5) 148 MOVD R0, gobuf_ctxt(R5) 149 CMP R0, R0 // set condition codes for == test, needed by stack split 150 MOVD gobuf_pc(R5), R12 151 MOVD R12, CTR 152 BR (CTR) 153 154 // void mcall(fn func(*g)) 155 // Switch to m->g0's stack, call fn(g). 156 // Fn must never return. It should gogo(&g->sched) 157 // to keep running g. 158 TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8 159 // Save caller state in g->sched 160 MOVD R1, (g_sched+gobuf_sp)(g) 161 MOVD LR, R31 162 MOVD R31, (g_sched+gobuf_pc)(g) 163 MOVD R0, (g_sched+gobuf_lr)(g) 164 MOVD g, (g_sched+gobuf_g)(g) 165 166 // Switch to m->g0 & its stack, call fn. 167 MOVD g, R3 168 MOVD g_m(g), R8 169 MOVD m_g0(R8), g 170 BL runtime·save_g(SB) 171 CMP g, R3 172 BNE 2(PC) 173 BR runtime·badmcall(SB) 174 MOVD fn+0(FP), R11 // context 175 MOVD 0(R11), R12 // code pointer 176 MOVD R12, CTR 177 MOVD (g_sched+gobuf_sp)(g), R1 // sp = m->g0->sched.sp 178 MOVDU R3, -8(R1) 179 MOVDU R0, -8(R1) 180 MOVDU R0, -8(R1) 181 MOVDU R0, -8(R1) 182 MOVDU R0, -8(R1) 183 BL (CTR) 184 MOVD 24(R1), R2 185 BR runtime·badmcall2(SB) 186 187 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 188 // of the G stack. We need to distinguish the routine that 189 // lives at the bottom of the G stack from the one that lives 190 // at the top of the system stack because the one at the top of 191 // the system stack terminates the stack walk (see topofstack()). 192 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 193 // We have several undefs here so that 16 bytes past 194 // $runtime·systemstack_switch lies within them whether or not the 195 // instructions that derive r2 from r12 are there. 196 UNDEF 197 UNDEF 198 UNDEF 199 BL (LR) // make sure this function is not leaf 200 RET 201 202 // func systemstack(fn func()) 203 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 204 MOVD fn+0(FP), R3 // R3 = fn 205 MOVD R3, R11 // context 206 MOVD g_m(g), R4 // R4 = m 207 208 MOVD m_gsignal(R4), R5 // R5 = gsignal 209 CMP g, R5 210 BEQ noswitch 211 212 MOVD m_g0(R4), R5 // R5 = g0 213 CMP g, R5 214 BEQ noswitch 215 216 MOVD m_curg(R4), R6 217 CMP g, R6 218 BEQ switch 219 220 // Bad: g is not gsignal, not g0, not curg. What is it? 221 // Hide call from linker nosplit analysis. 222 MOVD $runtime·badsystemstack(SB), R12 223 MOVD R12, CTR 224 BL (CTR) 225 226 switch: 227 // save our state in g->sched. Pretend to 228 // be systemstack_switch if the G stack is scanned. 229 MOVD $runtime·systemstack_switch(SB), R6 230 ADD $16, R6 // get past prologue (including r2-setting instructions when they're there) 231 MOVD R6, (g_sched+gobuf_pc)(g) 232 MOVD R1, (g_sched+gobuf_sp)(g) 233 MOVD R0, (g_sched+gobuf_lr)(g) 234 MOVD g, (g_sched+gobuf_g)(g) 235 236 // switch to g0 237 MOVD R5, g 238 BL runtime·save_g(SB) 239 MOVD (g_sched+gobuf_sp)(g), R3 240 // make it look like mstart called systemstack on g0, to stop traceback 241 SUB $FIXED_FRAME, R3 242 MOVD $runtime·mstart(SB), R4 243 MOVD R4, 0(R3) 244 MOVD R3, R1 245 246 // call target function 247 MOVD 0(R11), R12 // code pointer 248 MOVD R12, CTR 249 BL (CTR) 250 251 // restore TOC pointer. It seems unlikely that we will use systemstack 252 // to call a function defined in another module, but the results of 253 // doing so would be so confusing that it's worth doing this. 254 MOVD g_m(g), R3 255 MOVD m_curg(R3), g 256 MOVD (g_sched+gobuf_sp)(g), R3 257 MOVD 24(R3), R2 258 // switch back to g 259 MOVD g_m(g), R3 260 MOVD m_curg(R3), g 261 BL runtime·save_g(SB) 262 MOVD (g_sched+gobuf_sp)(g), R1 263 MOVD R0, (g_sched+gobuf_sp)(g) 264 RET 265 266 noswitch: 267 // already on m stack, just call directly 268 // On other arches we do a tail call here, but it appears to be 269 // impossible to tail call a function pointer in shared mode on 270 // ppc64 because the caller is responsible for restoring the TOC. 271 MOVD 0(R11), R12 // code pointer 272 MOVD R12, CTR 273 BL (CTR) 274 MOVD 24(R1), R2 275 RET 276 277 /* 278 * support for morestack 279 */ 280 281 // Called during function prolog when more stack is needed. 282 // Caller has already loaded: 283 // R3: framesize, R4: argsize, R5: LR 284 // 285 // The traceback routines see morestack on a g0 as being 286 // the top of a stack (for example, morestack calling newstack 287 // calling the scheduler calling newm calling gc), so we must 288 // record an argument size. For that purpose, it has no arguments. 289 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 290 // Cannot grow scheduler stack (m->g0). 291 MOVD g_m(g), R7 292 MOVD m_g0(R7), R8 293 CMP g, R8 294 BNE 3(PC) 295 BL runtime·badmorestackg0(SB) 296 BL runtime·abort(SB) 297 298 // Cannot grow signal stack (m->gsignal). 299 MOVD m_gsignal(R7), R8 300 CMP g, R8 301 BNE 3(PC) 302 BL runtime·badmorestackgsignal(SB) 303 BL runtime·abort(SB) 304 305 // Called from f. 306 // Set g->sched to context in f. 307 MOVD R1, (g_sched+gobuf_sp)(g) 308 MOVD LR, R8 309 MOVD R8, (g_sched+gobuf_pc)(g) 310 MOVD R5, (g_sched+gobuf_lr)(g) 311 MOVD R11, (g_sched+gobuf_ctxt)(g) 312 313 // Called from f. 314 // Set m->morebuf to f's caller. 315 MOVD R5, (m_morebuf+gobuf_pc)(R7) // f's caller's PC 316 MOVD R1, (m_morebuf+gobuf_sp)(R7) // f's caller's SP 317 MOVD g, (m_morebuf+gobuf_g)(R7) 318 319 // Call newstack on m->g0's stack. 320 MOVD m_g0(R7), g 321 BL runtime·save_g(SB) 322 MOVD (g_sched+gobuf_sp)(g), R1 323 MOVDU R0, -(FIXED_FRAME+0)(R1) // create a call frame on g0 324 BL runtime·newstack(SB) 325 326 // Not reached, but make sure the return PC from the call to newstack 327 // is still in this function, and not the beginning of the next. 328 UNDEF 329 330 TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 331 MOVD R0, R11 332 BR runtime·morestack(SB) 333 334 // reflectcall: call a function with the given argument list 335 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 336 // we don't have variable-sized frames, so we use a small number 337 // of constant-sized-frame functions to encode a few bits of size in the pc. 338 // Caution: ugly multiline assembly macros in your future! 339 340 #define DISPATCH(NAME,MAXSIZE) \ 341 MOVD $MAXSIZE, R31; \ 342 CMP R3, R31; \ 343 BGT 4(PC); \ 344 MOVD $NAME(SB), R12; \ 345 MOVD R12, CTR; \ 346 BR (CTR) 347 // Note: can't just "BR NAME(SB)" - bad inlining results. 348 349 TEXT reflect·call(SB), NOSPLIT, $0-0 350 BR ·reflectcall(SB) 351 352 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32 353 MOVWZ argsize+24(FP), R3 354 DISPATCH(runtime·call32, 32) 355 DISPATCH(runtime·call64, 64) 356 DISPATCH(runtime·call128, 128) 357 DISPATCH(runtime·call256, 256) 358 DISPATCH(runtime·call512, 512) 359 DISPATCH(runtime·call1024, 1024) 360 DISPATCH(runtime·call2048, 2048) 361 DISPATCH(runtime·call4096, 4096) 362 DISPATCH(runtime·call8192, 8192) 363 DISPATCH(runtime·call16384, 16384) 364 DISPATCH(runtime·call32768, 32768) 365 DISPATCH(runtime·call65536, 65536) 366 DISPATCH(runtime·call131072, 131072) 367 DISPATCH(runtime·call262144, 262144) 368 DISPATCH(runtime·call524288, 524288) 369 DISPATCH(runtime·call1048576, 1048576) 370 DISPATCH(runtime·call2097152, 2097152) 371 DISPATCH(runtime·call4194304, 4194304) 372 DISPATCH(runtime·call8388608, 8388608) 373 DISPATCH(runtime·call16777216, 16777216) 374 DISPATCH(runtime·call33554432, 33554432) 375 DISPATCH(runtime·call67108864, 67108864) 376 DISPATCH(runtime·call134217728, 134217728) 377 DISPATCH(runtime·call268435456, 268435456) 378 DISPATCH(runtime·call536870912, 536870912) 379 DISPATCH(runtime·call1073741824, 1073741824) 380 MOVD $runtime·badreflectcall(SB), R12 381 MOVD R12, CTR 382 BR (CTR) 383 384 #define CALLFN(NAME,MAXSIZE) \ 385 TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ 386 NO_LOCAL_POINTERS; \ 387 /* copy arguments to stack */ \ 388 MOVD arg+16(FP), R3; \ 389 MOVWZ argsize+24(FP), R4; \ 390 MOVD R1, R5; \ 391 ADD $(FIXED_FRAME-1), R5; \ 392 SUB $1, R3; \ 393 ADD R5, R4; \ 394 CMP R5, R4; \ 395 BEQ 4(PC); \ 396 MOVBZU 1(R3), R6; \ 397 MOVBZU R6, 1(R5); \ 398 BR -4(PC); \ 399 /* call function */ \ 400 MOVD f+8(FP), R11; \ 401 MOVD (R11), R12; \ 402 MOVD R12, CTR; \ 403 PCDATA $PCDATA_StackMapIndex, $0; \ 404 BL (CTR); \ 405 MOVD 24(R1), R2; \ 406 /* copy return values back */ \ 407 MOVD argtype+0(FP), R7; \ 408 MOVD arg+16(FP), R3; \ 409 MOVWZ n+24(FP), R4; \ 410 MOVWZ retoffset+28(FP), R6; \ 411 ADD $FIXED_FRAME, R1, R5; \ 412 ADD R6, R5; \ 413 ADD R6, R3; \ 414 SUB R6, R4; \ 415 BL callRet<>(SB); \ 416 RET 417 418 // callRet copies return values back at the end of call*. This is a 419 // separate function so it can allocate stack space for the arguments 420 // to reflectcallmove. It does not follow the Go ABI; it expects its 421 // arguments in registers. 422 TEXT callRet<>(SB), NOSPLIT, $32-0 423 MOVD R7, FIXED_FRAME+0(R1) 424 MOVD R3, FIXED_FRAME+8(R1) 425 MOVD R5, FIXED_FRAME+16(R1) 426 MOVD R4, FIXED_FRAME+24(R1) 427 BL runtime·reflectcallmove(SB) 428 RET 429 430 CALLFN(·call32, 32) 431 CALLFN(·call64, 64) 432 CALLFN(·call128, 128) 433 CALLFN(·call256, 256) 434 CALLFN(·call512, 512) 435 CALLFN(·call1024, 1024) 436 CALLFN(·call2048, 2048) 437 CALLFN(·call4096, 4096) 438 CALLFN(·call8192, 8192) 439 CALLFN(·call16384, 16384) 440 CALLFN(·call32768, 32768) 441 CALLFN(·call65536, 65536) 442 CALLFN(·call131072, 131072) 443 CALLFN(·call262144, 262144) 444 CALLFN(·call524288, 524288) 445 CALLFN(·call1048576, 1048576) 446 CALLFN(·call2097152, 2097152) 447 CALLFN(·call4194304, 4194304) 448 CALLFN(·call8388608, 8388608) 449 CALLFN(·call16777216, 16777216) 450 CALLFN(·call33554432, 33554432) 451 CALLFN(·call67108864, 67108864) 452 CALLFN(·call134217728, 134217728) 453 CALLFN(·call268435456, 268435456) 454 CALLFN(·call536870912, 536870912) 455 CALLFN(·call1073741824, 1073741824) 456 457 TEXT runtime·procyield(SB),NOSPLIT,$0-0 458 RET 459 460 // void jmpdefer(fv, sp); 461 // called from deferreturn. 462 // 1. grab stored LR for caller 463 // 2. sub 8 bytes to get back to either nop or toc reload before deferreturn 464 // 3. BR to fn 465 // When dynamically linking Go, it is not sufficient to rewind to the BL 466 // deferreturn -- we might be jumping between modules and so we need to reset 467 // the TOC pointer in r2. To do this, codegen inserts MOVD 24(R1), R2 *before* 468 // the BL deferreturn and jmpdefer rewinds to that. 469 TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16 470 MOVD 0(R1), R31 471 SUB $8, R31 472 MOVD R31, LR 473 474 MOVD fv+0(FP), R11 475 MOVD argp+8(FP), R1 476 SUB $FIXED_FRAME, R1 477 MOVD 0(R11), R12 478 MOVD R12, CTR 479 BR (CTR) 480 481 // Save state of caller into g->sched. Smashes R31. 482 TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0 483 MOVD LR, R31 484 MOVD R31, (g_sched+gobuf_pc)(g) 485 MOVD R1, (g_sched+gobuf_sp)(g) 486 MOVD R0, (g_sched+gobuf_lr)(g) 487 MOVD R0, (g_sched+gobuf_ret)(g) 488 // Assert ctxt is zero. See func save. 489 MOVD (g_sched+gobuf_ctxt)(g), R31 490 CMP R0, R31 491 BEQ 2(PC) 492 BL runtime·badctxt(SB) 493 RET 494 495 // func asmcgocall(fn, arg unsafe.Pointer) int32 496 // Call fn(arg) on the scheduler stack, 497 // aligned appropriately for the gcc ABI. 498 // See cgocall.go for more details. 499 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 500 MOVD fn+0(FP), R3 501 MOVD arg+8(FP), R4 502 503 MOVD R1, R7 // save original stack pointer 504 MOVD g, R5 505 506 // Figure out if we need to switch to m->g0 stack. 507 // We get called to create new OS threads too, and those 508 // come in on the m->g0 stack already. 509 MOVD g_m(g), R6 510 MOVD m_g0(R6), R6 511 CMP R6, g 512 BEQ g0 513 BL gosave<>(SB) 514 MOVD R6, g 515 BL runtime·save_g(SB) 516 MOVD (g_sched+gobuf_sp)(g), R1 517 518 // Now on a scheduling stack (a pthread-created stack). 519 g0: 520 // Save room for two of our pointers, plus 32 bytes of callee 521 // save area that lives on the caller stack. 522 SUB $48, R1 523 RLDCR $0, R1, $~15, R1 // 16-byte alignment for gcc ABI 524 MOVD R5, 40(R1) // save old g on stack 525 MOVD (g_stack+stack_hi)(R5), R5 526 SUB R7, R5 527 MOVD R5, 32(R1) // save depth in old g stack (can't just save SP, as stack might be copied during a callback) 528 MOVD R0, 0(R1) // clear back chain pointer (TODO can we give it real back trace information?) 529 // This is a "global call", so put the global entry point in r12 530 MOVD R3, R12 531 MOVD R12, CTR 532 MOVD R4, R3 // arg in r3 533 BL (CTR) 534 535 // C code can clobber R0, so set it back to 0. F27-F31 are 536 // callee save, so we don't need to recover those. 537 XOR R0, R0 538 // Restore g, stack pointer, toc pointer. 539 // R3 is errno, so don't touch it 540 MOVD 40(R1), g 541 MOVD (g_stack+stack_hi)(g), R5 542 MOVD 32(R1), R6 543 SUB R6, R5 544 MOVD 24(R5), R2 545 BL runtime·save_g(SB) 546 MOVD (g_stack+stack_hi)(g), R5 547 MOVD 32(R1), R6 548 SUB R6, R5 549 MOVD R5, R1 550 551 MOVW R3, ret+16(FP) 552 RET 553 554 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 555 // Turn the fn into a Go func (by taking its address) and call 556 // cgocallback_gofunc. 557 TEXT runtime·cgocallback(SB),NOSPLIT,$32-32 558 MOVD $fn+0(FP), R3 559 MOVD R3, FIXED_FRAME+0(R1) 560 MOVD frame+8(FP), R3 561 MOVD R3, FIXED_FRAME+8(R1) 562 MOVD framesize+16(FP), R3 563 MOVD R3, FIXED_FRAME+16(R1) 564 MOVD ctxt+24(FP), R3 565 MOVD R3, FIXED_FRAME+24(R1) 566 MOVD $runtime·cgocallback_gofunc(SB), R12 567 MOVD R12, CTR 568 BL (CTR) 569 RET 570 571 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 572 // See cgocall.go for more details. 573 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32 574 NO_LOCAL_POINTERS 575 576 // Load m and g from thread-local storage. 577 MOVB runtime·iscgo(SB), R3 578 CMP R3, $0 579 BEQ nocgo 580 BL runtime·load_g(SB) 581 nocgo: 582 583 // If g is nil, Go did not create the current thread. 584 // Call needm to obtain one for temporary use. 585 // In this case, we're running on the thread stack, so there's 586 // lots of space, but the linker doesn't know. Hide the call from 587 // the linker analysis by using an indirect call. 588 CMP g, $0 589 BEQ needm 590 591 MOVD g_m(g), R8 592 MOVD R8, savedm-8(SP) 593 BR havem 594 595 needm: 596 MOVD g, savedm-8(SP) // g is zero, so is m. 597 MOVD $runtime·needm(SB), R12 598 MOVD R12, CTR 599 BL (CTR) 600 601 // Set m->sched.sp = SP, so that if a panic happens 602 // during the function we are about to execute, it will 603 // have a valid SP to run on the g0 stack. 604 // The next few lines (after the havem label) 605 // will save this SP onto the stack and then write 606 // the same SP back to m->sched.sp. That seems redundant, 607 // but if an unrecovered panic happens, unwindm will 608 // restore the g->sched.sp from the stack location 609 // and then systemstack will try to use it. If we don't set it here, 610 // that restored SP will be uninitialized (typically 0) and 611 // will not be usable. 612 MOVD g_m(g), R8 613 MOVD m_g0(R8), R3 614 MOVD R1, (g_sched+gobuf_sp)(R3) 615 616 havem: 617 // Now there's a valid m, and we're running on its m->g0. 618 // Save current m->g0->sched.sp on stack and then set it to SP. 619 // Save current sp in m->g0->sched.sp in preparation for 620 // switch back to m->curg stack. 621 // NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP). 622 MOVD m_g0(R8), R3 623 MOVD (g_sched+gobuf_sp)(R3), R4 624 MOVD R4, savedsp-16(SP) 625 MOVD R1, (g_sched+gobuf_sp)(R3) 626 627 // Switch to m->curg stack and call runtime.cgocallbackg. 628 // Because we are taking over the execution of m->curg 629 // but *not* resuming what had been running, we need to 630 // save that information (m->curg->sched) so we can restore it. 631 // We can restore m->curg->sched.sp easily, because calling 632 // runtime.cgocallbackg leaves SP unchanged upon return. 633 // To save m->curg->sched.pc, we push it onto the stack. 634 // This has the added benefit that it looks to the traceback 635 // routine like cgocallbackg is going to return to that 636 // PC (because the frame we allocate below has the same 637 // size as cgocallback_gofunc's frame declared above) 638 // so that the traceback will seamlessly trace back into 639 // the earlier calls. 640 // 641 // In the new goroutine, -8(SP) is unused (where SP refers to 642 // m->curg's SP while we're setting it up, before we've adjusted it). 643 MOVD m_curg(R8), g 644 BL runtime·save_g(SB) 645 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 646 MOVD (g_sched+gobuf_pc)(g), R5 647 MOVD R5, -(FIXED_FRAME+16)(R4) 648 MOVD ctxt+24(FP), R3 649 MOVD R3, -16(R4) 650 MOVD $-(FIXED_FRAME+16)(R4), R1 651 BL runtime·cgocallbackg(SB) 652 653 // Restore g->sched (== m->curg->sched) from saved values. 654 MOVD 0(R1), R5 655 MOVD R5, (g_sched+gobuf_pc)(g) 656 MOVD $(FIXED_FRAME+16)(R1), R4 657 MOVD R4, (g_sched+gobuf_sp)(g) 658 659 // Switch back to m->g0's stack and restore m->g0->sched.sp. 660 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 661 // so we do not have to restore it.) 662 MOVD g_m(g), R8 663 MOVD m_g0(R8), g 664 BL runtime·save_g(SB) 665 MOVD (g_sched+gobuf_sp)(g), R1 666 MOVD savedsp-16(SP), R4 667 MOVD R4, (g_sched+gobuf_sp)(g) 668 669 // If the m on entry was nil, we called needm above to borrow an m 670 // for the duration of the call. Since the call is over, return it with dropm. 671 MOVD savedm-8(SP), R6 672 CMP R6, $0 673 BNE droppedm 674 MOVD $runtime·dropm(SB), R12 675 MOVD R12, CTR 676 BL (CTR) 677 droppedm: 678 679 // Done! 680 RET 681 682 // void setg(G*); set g. for use by needm. 683 TEXT runtime·setg(SB), NOSPLIT, $0-8 684 MOVD gg+0(FP), g 685 // This only happens if iscgo, so jump straight to save_g 686 BL runtime·save_g(SB) 687 RET 688 689 // void setg_gcc(G*); set g in C TLS. 690 // Must obey the gcc calling convention. 691 TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0 692 // The standard prologue clobbers R31, which is callee-save in 693 // the C ABI, so we have to use $-8-0 and save LR ourselves. 694 MOVD LR, R4 695 // Also save g and R31, since they're callee-save in C ABI 696 MOVD R31, R5 697 MOVD g, R6 698 699 MOVD R3, g 700 BL runtime·save_g(SB) 701 702 MOVD R6, g 703 MOVD R5, R31 704 MOVD R4, LR 705 RET 706 707 TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8 708 MOVD 0(R1), R3 // LR saved by caller 709 MOVD R3, ret+0(FP) 710 RET 711 712 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0 713 MOVW (R0), R0 714 UNDEF 715 716 #define TBRL 268 717 #define TBRU 269 /* Time base Upper/Lower */ 718 719 // int64 runtime·cputicks(void) 720 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 721 MOVW SPR(TBRU), R4 722 MOVW SPR(TBRL), R3 723 MOVW SPR(TBRU), R5 724 CMPW R4, R5 725 BNE -4(PC) 726 SLD $32, R5 727 OR R5, R3 728 MOVD R3, ret+0(FP) 729 RET 730 731 // AES hashing not implemented for ppc64 732 TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0 733 MOVW (R0), R1 734 TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0 735 MOVW (R0), R1 736 TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0 737 MOVW (R0), R1 738 TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0 739 MOVW (R0), R1 740 741 TEXT runtime·memequal(SB),NOSPLIT,$0-25 742 MOVD a+0(FP), R3 743 MOVD b+8(FP), R4 744 MOVD size+16(FP), R5 745 746 BL runtime·memeqbody(SB) 747 MOVB R9, ret+24(FP) 748 RET 749 750 // memequal_varlen(a, b unsafe.Pointer) bool 751 TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 752 MOVD a+0(FP), R3 753 MOVD b+8(FP), R4 754 CMP R3, R4 755 BEQ eq 756 MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure 757 BL runtime·memeqbody(SB) 758 MOVB R9, ret+16(FP) 759 RET 760 eq: 761 MOVD $1, R3 762 MOVB R3, ret+16(FP) 763 RET 764 765 // Do an efficient memcmp for ppc64le 766 // R3 = s1 len 767 // R4 = s2 len 768 // R5 = s1 addr 769 // R6 = s2 addr 770 // R7 = addr of return value 771 TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0 772 MOVD R3,R8 // set up length 773 CMP R3,R4,CR2 // unequal? 774 BC 12,8,setuplen // BLT CR2 775 MOVD R4,R8 // use R4 for comparison len 776 setuplen: 777 MOVD R8,CTR // set up loop counter 778 CMP R8,$8 // only optimize >=8 779 BLT simplecheck 780 DCBT (R5) // cache hint 781 DCBT (R6) 782 CMP R8,$32 // optimize >= 32 783 MOVD R8,R9 784 BLT setup8a // 8 byte moves only 785 setup32a: 786 SRADCC $5,R8,R9 // number of 32 byte chunks 787 MOVD R9,CTR 788 789 // Special processing for 32 bytes or longer. 790 // Loading this way is faster and correct as long as the 791 // doublewords being compared are equal. Once they 792 // are found unequal, reload them in proper byte order 793 // to determine greater or less than. 794 loop32a: 795 MOVD 0(R5),R9 // doublewords to compare 796 MOVD 0(R6),R10 // get 4 doublewords 797 MOVD 8(R5),R14 798 MOVD 8(R6),R15 799 CMPU R9,R10 // bytes equal? 800 MOVD $0,R16 // set up for cmpne 801 BNE cmpne // further compare for LT or GT 802 MOVD 16(R5),R9 // get next pair of doublewords 803 MOVD 16(R6),R10 804 CMPU R14,R15 // bytes match? 805 MOVD $8,R16 // set up for cmpne 806 BNE cmpne // further compare for LT or GT 807 MOVD 24(R5),R14 // get next pair of doublewords 808 MOVD 24(R6),R15 809 CMPU R9,R10 // bytes match? 810 MOVD $16,R16 // set up for cmpne 811 BNE cmpne // further compare for LT or GT 812 MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32 813 ADD $32,R5 // bump up to next 32 814 ADD $32,R6 815 CMPU R14,R15 // bytes match? 816 BC 8,2,loop32a // br ctr and cr 817 BNE cmpne 818 ANDCC $24,R8,R9 // Any 8 byte chunks? 819 BEQ leftover // and result is 0 820 setup8a: 821 SRADCC $3,R9,R9 // get the 8 byte count 822 BEQ leftover // shifted value is 0 823 MOVD R9,CTR // loop count for doublewords 824 loop8: 825 MOVDBR (R5+R0),R9 // doublewords to compare 826 MOVDBR (R6+R0),R10 // LE compare order 827 ADD $8,R5 828 ADD $8,R6 829 CMPU R9,R10 // match? 830 BC 8,2,loop8 // bt ctr <> 0 && cr 831 BGT greater 832 BLT less 833 leftover: 834 ANDCC $7,R8,R9 // check for leftover bytes 835 MOVD R9,CTR // save the ctr 836 BNE simple // leftover bytes 837 BC 12,10,equal // test CR2 for length comparison 838 BC 12,8,less 839 BR greater 840 simplecheck: 841 CMP R8,$0 // remaining compare length 0 842 BNE simple // do simple compare 843 BC 12,10,equal // test CR2 for length comparison 844 BC 12,8,less // 1st len < 2nd len, result less 845 BR greater // 1st len > 2nd len must be greater 846 simple: 847 MOVBZ 0(R5), R9 // get byte from 1st operand 848 ADD $1,R5 849 MOVBZ 0(R6), R10 // get byte from 2nd operand 850 ADD $1,R6 851 CMPU R9, R10 852 BC 8,2,simple // bc ctr <> 0 && cr 853 BGT greater // 1st > 2nd 854 BLT less // 1st < 2nd 855 BC 12,10,equal // test CR2 for length comparison 856 BC 12,9,greater // 2nd len > 1st len 857 BR less // must be less 858 cmpne: // only here is not equal 859 MOVDBR (R5+R16),R8 // reload in reverse order 860 MOVDBR (R6+R16),R9 861 CMPU R8,R9 // compare correct endianness 862 BGT greater // here only if NE 863 less: 864 MOVD $-1,R3 865 MOVD R3,(R7) // return value if A < B 866 RET 867 equal: 868 MOVD $0,(R7) // return value if A == B 869 RET 870 greater: 871 MOVD $1,R3 872 MOVD R3,(R7) // return value if A > B 873 RET 874 875 // Do an efficient memcmp for ppc64 (BE) 876 // R3 = s1 len 877 // R4 = s2 len 878 // R5 = s1 addr 879 // R6 = s2 addr 880 // R7 = addr of return value 881 TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0 882 MOVD R3,R8 // set up length 883 CMP R3,R4,CR2 // unequal? 884 BC 12,8,setuplen // BLT CR2 885 MOVD R4,R8 // use R4 for comparison len 886 setuplen: 887 MOVD R8,CTR // set up loop counter 888 CMP R8,$8 // only optimize >=8 889 BLT simplecheck 890 DCBT (R5) // cache hint 891 DCBT (R6) 892 CMP R8,$32 // optimize >= 32 893 MOVD R8,R9 894 BLT setup8a // 8 byte moves only 895 896 setup32a: 897 SRADCC $5,R8,R9 // number of 32 byte chunks 898 MOVD R9,CTR 899 loop32a: 900 MOVD 0(R5),R9 // doublewords to compare 901 MOVD 0(R6),R10 // get 4 doublewords 902 MOVD 8(R5),R14 903 MOVD 8(R6),R15 904 CMPU R9,R10 // bytes equal? 905 BLT less // found to be less 906 BGT greater // found to be greater 907 MOVD 16(R5),R9 // get next pair of doublewords 908 MOVD 16(R6),R10 909 CMPU R14,R15 // bytes match? 910 BLT less // found less 911 BGT greater // found greater 912 MOVD 24(R5),R14 // get next pair of doublewords 913 MOVD 24(R6),R15 914 CMPU R9,R10 // bytes match? 915 BLT less // found to be less 916 BGT greater // found to be greater 917 ADD $32,R5 // bump up to next 32 918 ADD $32,R6 919 CMPU R14,R15 // bytes match? 920 BC 8,2,loop32a // br ctr and cr 921 BLT less // with BE, byte ordering is 922 BGT greater // good for compare 923 ANDCC $24,R8,R9 // Any 8 byte chunks? 924 BEQ leftover // and result is 0 925 setup8a: 926 SRADCC $3,R9,R9 // get the 8 byte count 927 BEQ leftover // shifted value is 0 928 MOVD R9,CTR // loop count for doublewords 929 loop8: 930 MOVD (R5),R9 931 MOVD (R6),R10 932 ADD $8,R5 933 ADD $8,R6 934 CMPU R9,R10 // match? 935 BC 8,2,loop8 // bt ctr <> 0 && cr 936 BGT greater 937 BLT less 938 leftover: 939 ANDCC $7,R8,R9 // check for leftover bytes 940 MOVD R9,CTR // save the ctr 941 BNE simple // leftover bytes 942 BC 12,10,equal // test CR2 for length comparison 943 BC 12,8,less 944 BR greater 945 simplecheck: 946 CMP R8,$0 // remaining compare length 0 947 BNE simple // do simple compare 948 BC 12,10,equal // test CR2 for length comparison 949 BC 12,8,less // 1st len < 2nd len, result less 950 BR greater // same len, must be equal 951 simple: 952 MOVBZ 0(R5),R9 // get byte from 1st operand 953 ADD $1,R5 954 MOVBZ 0(R6),R10 // get byte from 2nd operand 955 ADD $1,R6 956 CMPU R9,R10 957 BC 8,2,simple // bc ctr <> 0 && cr 958 BGT greater // 1st > 2nd 959 BLT less // 1st < 2nd 960 BC 12,10,equal // test CR2 for length comparison 961 BC 12,9,greater // 2nd len > 1st len 962 less: 963 MOVD $-1,R3 964 MOVD R3,(R7) // return value if A < B 965 RET 966 equal: 967 MOVD $0,(R7) // return value if A == B 968 RET 969 greater: 970 MOVD $1,R3 971 MOVD R3,(R7) // return value if A > B 972 RET 973 974 // Do an efficient memequal for ppc64 975 // R3 = s1 976 // R4 = s2 977 // R5 = len 978 // R9 = return value 979 TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0 980 MOVD R5,CTR 981 CMP R5,$8 // only optimize >=8 982 BLT simplecheck 983 DCBT (R3) // cache hint 984 DCBT (R4) 985 CMP R5,$32 // optimize >= 32 986 MOVD R5,R6 // needed if setup8a branch 987 BLT setup8a // 8 byte moves only 988 setup32a: // 8 byte aligned, >= 32 bytes 989 SRADCC $5,R5,R6 // number of 32 byte chunks to compare 990 MOVD R6,CTR 991 loop32a: 992 MOVD 0(R3),R6 // doublewords to compare 993 MOVD 0(R4),R7 994 MOVD 8(R3),R8 // 995 MOVD 8(R4),R9 996 CMP R6,R7 // bytes batch? 997 BNE noteq 998 MOVD 16(R3),R6 999 MOVD 16(R4),R7 1000 CMP R8,R9 // bytes match? 1001 MOVD 24(R3),R8 1002 MOVD 24(R4),R9 1003 BNE noteq 1004 CMP R6,R7 // bytes match? 1005 BNE noteq 1006 ADD $32,R3 // bump up to next 32 1007 ADD $32,R4 1008 CMP R8,R9 // bytes match? 1009 BC 8,2,loop32a // br ctr and cr 1010 BNE noteq 1011 ANDCC $24,R5,R6 // Any 8 byte chunks? 1012 BEQ leftover // and result is 0 1013 setup8a: 1014 SRADCC $3,R6,R6 // get the 8 byte count 1015 BEQ leftover // shifted value is 0 1016 MOVD R6,CTR 1017 loop8: 1018 MOVD 0(R3),R6 // doublewords to compare 1019 ADD $8,R3 1020 MOVD 0(R4),R7 1021 ADD $8,R4 1022 CMP R6,R7 // match? 1023 BC 8,2,loop8 // bt ctr <> 0 && cr 1024 BNE noteq 1025 leftover: 1026 ANDCC $7,R5,R6 // check for leftover bytes 1027 BEQ equal 1028 MOVD R6,CTR 1029 BR simple 1030 simplecheck: 1031 CMP R5,$0 1032 BEQ equal 1033 simple: 1034 MOVBZ 0(R3), R6 1035 ADD $1,R3 1036 MOVBZ 0(R4), R7 1037 ADD $1,R4 1038 CMP R6, R7 1039 BNE noteq 1040 BC 8,2,simple 1041 BNE noteq 1042 BR equal 1043 noteq: 1044 MOVD $0, R9 1045 RET 1046 equal: 1047 MOVD $1, R9 1048 RET 1049 1050 TEXT bytes·Equal(SB),NOSPLIT,$0-49 1051 MOVD a_len+8(FP), R4 1052 MOVD b_len+32(FP), R5 1053 CMP R5, R4 // unequal lengths are not equal 1054 BNE noteq 1055 MOVD a+0(FP), R3 1056 MOVD b+24(FP), R4 1057 BL runtime·memeqbody(SB) 1058 1059 MOVBZ R9,ret+48(FP) 1060 RET 1061 1062 noteq: 1063 MOVBZ $0,ret+48(FP) 1064 RET 1065 1066 equal: 1067 MOVD $1,R3 1068 MOVBZ R3,ret+48(FP) 1069 RET 1070 1071 TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 1072 MOVD s+0(FP), R3 // R3 = byte array pointer 1073 MOVD s_len+8(FP), R4 // R4 = length 1074 MOVBZ c+24(FP), R5 // R5 = byte 1075 MOVD $ret+32(FP), R14 // R14 = &ret 1076 BR runtime·indexbytebody<>(SB) 1077 1078 TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32 1079 MOVD s+0(FP), R3 // R3 = string 1080 MOVD s_len+8(FP), R4 // R4 = length 1081 MOVBZ c+16(FP), R5 // R5 = byte 1082 MOVD $ret+24(FP), R14 // R14 = &ret 1083 BR runtime·indexbytebody<>(SB) 1084 1085 TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0 1086 DCBT (R3) // Prepare cache line. 1087 MOVD R3,R17 // Save base address for calculating the index later. 1088 RLDICR $0,R3,$60,R8 // Align address to doubleword boundary in R8. 1089 RLDIMI $8,R5,$48,R5 // Replicating the byte across the register. 1090 ADD R4,R3,R7 // Last acceptable address in R7. 1091 1092 RLDIMI $16,R5,$32,R5 1093 CMPU R4,$32 // Check if it's a small string (<32 bytes). Those will be processed differently. 1094 MOVD $-1,R9 1095 WORD $0x54661EB8 // Calculate padding in R6 (rlwinm r6,r3,3,26,28). 1096 RLDIMI $32,R5,$0,R5 1097 MOVD R7,R10 // Save last acceptable address in R10 for later. 1098 ADD $-1,R7,R7 1099 #ifdef GOARCH_ppc64le 1100 SLD R6,R9,R9 // Prepare mask for Little Endian 1101 #else 1102 SRD R6,R9,R9 // Same for Big Endian 1103 #endif 1104 BLE small_string // Jump to the small string case if it's <32 bytes. 1105 1106 // If we are 64-byte aligned, branch to qw_align just to get the auxiliary values 1107 // in V0, V1 and V10, then branch to the preloop. 1108 ANDCC $63,R3,R11 1109 BEQ CR0,qw_align 1110 RLDICL $0,R3,$61,R11 1111 1112 MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8. 1113 CMPB R12,R5,R3 // Check for a match. 1114 AND R9,R3,R3 // Mask bytes below s_base 1115 RLDICL $0,R7,$61,R6 // length-1 1116 RLDICR $0,R7,$60,R7 // Last doubleword in R7 1117 CMPU R3,$0,CR7 // If we have a match, jump to the final computation 1118 BNE CR7,done 1119 ADD $8,R8,R8 1120 ADD $-8,R4,R4 1121 ADD R4,R11,R4 1122 1123 // Check for quadword alignment 1124 ANDCC $15,R8,R11 1125 BEQ CR0,qw_align 1126 1127 // Not aligned, so handle the next doubleword 1128 MOVD 0(R8),R12 1129 CMPB R12,R5,R3 1130 CMPU R3,$0,CR7 1131 BNE CR7,done 1132 ADD $8,R8,R8 1133 ADD $-8,R4,R4 1134 1135 // Either quadword aligned or 64-byte at this point. We can use LVX. 1136 qw_align: 1137 1138 // Set up auxiliary data for the vectorized algorithm. 1139 VSPLTISB $0,V0 // Replicate 0 across V0 1140 VSPLTISB $3,V10 // Use V10 as control for VBPERMQ 1141 MTVRD R5,V1 1142 LVSL (R0+R0),V11 1143 VSLB V11,V10,V10 1144 VSPLTB $7,V1,V1 // Replicate byte across V1 1145 CMPU R4, $64 // If len <= 64, don't use the vectorized loop 1146 BLE tail 1147 1148 // We will load 4 quardwords per iteration in the loop, so check for 1149 // 64-byte alignment. If 64-byte aligned, then branch to the preloop. 1150 ANDCC $63,R8,R11 1151 BEQ CR0,preloop 1152 1153 // Not 64-byte aligned. Load one quadword at a time until aligned. 1154 LVX (R8+R0),V4 1155 VCMPEQUBCC V1,V4,V6 // Check for byte in V4 1156 BNE CR6,found_qw_align 1157 ADD $16,R8,R8 1158 ADD $-16,R4,R4 1159 1160 ANDCC $63,R8,R11 1161 BEQ CR0,preloop 1162 LVX (R8+R0),V4 1163 VCMPEQUBCC V1,V4,V6 // Check for byte in V4 1164 BNE CR6,found_qw_align 1165 ADD $16,R8,R8 1166 ADD $-16,R4,R4 1167 1168 ANDCC $63,R8,R11 1169 BEQ CR0,preloop 1170 LVX (R8+R0),V4 1171 VCMPEQUBCC V1,V4,V6 // Check for byte in V4 1172 BNE CR6,found_qw_align 1173 ADD $-16,R4,R4 1174 ADD $16,R8,R8 1175 1176 // 64-byte aligned. Prepare for the main loop. 1177 preloop: 1178 CMPU R4,$64 1179 BLE tail // If len <= 64, don't use the vectorized loop 1180 1181 // We are now aligned to a 64-byte boundary. We will load 4 quadwords 1182 // per loop iteration. The last doubleword is in R10, so our loop counter 1183 // starts at (R10-R8)/64. 1184 SUB R8,R10,R6 1185 SRD $6,R6,R9 // Loop counter in R9 1186 MOVD R9,CTR 1187 1188 MOVD $16,R11 // Load offsets for the vector loads 1189 MOVD $32,R9 1190 MOVD $48,R7 1191 1192 // Main loop we will load 64 bytes per iteration 1193 loop: 1194 LVX (R8+R0),V2 // Load 4 16-byte vectors 1195 LVX (R11+R8),V3 1196 LVX (R9+R8),V4 1197 LVX (R7+R8),V5 1198 VCMPEQUB V1,V2,V6 // Look for byte in each vector 1199 VCMPEQUB V1,V3,V7 1200 VCMPEQUB V1,V4,V8 1201 VCMPEQUB V1,V5,V9 1202 VOR V6,V7,V11 // Compress the result in a single vector 1203 VOR V8,V9,V12 1204 VOR V11,V12,V11 1205 VCMPEQUBCC V0,V11,V11 // Check for byte 1206 BGE CR6,found 1207 ADD $64,R8,R8 1208 BC 16,0,loop // bdnz loop 1209 1210 // Handle the tailing bytes or R4 <= 64 1211 RLDICL $0,R6,$58,R4 1212 tail: 1213 CMPU R4,$0 1214 BEQ notfound 1215 LVX (R8+R0),V4 1216 VCMPEQUBCC V1,V4,V6 1217 BNE CR6,found_qw_align 1218 ADD $16,R8,R8 1219 CMPU R4,$16,CR6 1220 BLE CR6,notfound 1221 ADD $-16,R4,R4 1222 1223 LVX (R8+R0),V4 1224 VCMPEQUBCC V1,V4,V6 1225 BNE CR6,found_qw_align 1226 ADD $16,R8,R8 1227 CMPU R4,$16,CR6 1228 BLE CR6,notfound 1229 ADD $-16,R4,R4 1230 1231 LVX (R8+R0),V4 1232 VCMPEQUBCC V1,V4,V6 1233 BNE CR6,found_qw_align 1234 ADD $16,R8,R8 1235 CMPU R4,$16,CR6 1236 BLE CR6,notfound 1237 ADD $-16,R4,R4 1238 1239 LVX (R8+R0),V4 1240 VCMPEQUBCC V1,V4,V6 1241 BNE CR6,found_qw_align 1242 1243 notfound: 1244 MOVD $-1,R3 1245 MOVD R3,(R14) 1246 RET 1247 1248 found: 1249 // We will now compress the results into a single doubleword, 1250 // so it can be moved to a GPR for the final index calculation. 1251 1252 // The bytes in V6-V9 are either 0x00 or 0xFF. So, permute the 1253 // first bit of each byte into bits 48-63. 1254 VBPERMQ V6,V10,V6 1255 VBPERMQ V7,V10,V7 1256 VBPERMQ V8,V10,V8 1257 VBPERMQ V9,V10,V9 1258 1259 // Shift each 16-bit component into its correct position for 1260 // merging into a single doubleword. 1261 #ifdef GOARCH_ppc64le 1262 VSLDOI $2,V7,V7,V7 1263 VSLDOI $4,V8,V8,V8 1264 VSLDOI $6,V9,V9,V9 1265 #else 1266 VSLDOI $6,V6,V6,V6 1267 VSLDOI $4,V7,V7,V7 1268 VSLDOI $2,V8,V8,V8 1269 #endif 1270 1271 // Merge V6-V9 into a single doubleword and move to a GPR. 1272 VOR V6,V7,V11 1273 VOR V8,V9,V4 1274 VOR V4,V11,V4 1275 MFVRD V4,R3 1276 1277 #ifdef GOARCH_ppc64le 1278 ADD $-1,R3,R11 1279 ANDN R3,R11,R11 1280 POPCNTD R11,R11 // Count trailing zeros (Little Endian). 1281 #else 1282 CNTLZD R3,R11 // Count leading zeros (Big Endian). 1283 #endif 1284 ADD R8,R11,R3 // Calculate byte address 1285 1286 return: 1287 SUB R17,R3 1288 MOVD R3,(R14) 1289 RET 1290 1291 found_qw_align: 1292 // Use the same algorithm as above. Compress the result into 1293 // a single doubleword and move it to a GPR for the final 1294 // calculation. 1295 VBPERMQ V6,V10,V6 1296 1297 #ifdef GOARCH_ppc64le 1298 MFVRD V6,R3 1299 ADD $-1,R3,R11 1300 ANDN R3,R11,R11 1301 POPCNTD R11,R11 1302 #else 1303 VSLDOI $6,V6,V6,V6 1304 MFVRD V6,R3 1305 CNTLZD R3,R11 1306 #endif 1307 ADD R8,R11,R3 1308 CMPU R11,R4 1309 BLT return 1310 BR notfound 1311 1312 done: 1313 // At this point, R3 has 0xFF in the same position as the byte we are 1314 // looking for in the doubleword. Use that to calculate the exact index 1315 // of the byte. 1316 #ifdef GOARCH_ppc64le 1317 ADD $-1,R3,R11 1318 ANDN R3,R11,R11 1319 POPCNTD R11,R11 // Count trailing zeros (Little Endian). 1320 #else 1321 CNTLZD R3,R11 // Count leading zeros (Big Endian). 1322 #endif 1323 CMPU R8,R7 // Check if we are at the last doubleword. 1324 SRD $3,R11 // Convert trailing zeros to bytes. 1325 ADD R11,R8,R3 1326 CMPU R11,R6,CR7 // If at the last doubleword, check the byte offset. 1327 BNE return 1328 BLE CR7,return 1329 BR notfound 1330 1331 small_string: 1332 // We unroll this loop for better performance. 1333 CMPU R4,$0 // Check for length=0 1334 BEQ notfound 1335 1336 MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8. 1337 CMPB R12,R5,R3 // Check for a match. 1338 AND R9,R3,R3 // Mask bytes below s_base. 1339 CMPU R3,$0,CR7 // If we have a match, jump to the final computation. 1340 RLDICL $0,R7,$61,R6 // length-1 1341 RLDICR $0,R7,$60,R7 // Last doubleword in R7. 1342 CMPU R8,R7 1343 BNE CR7,done 1344 BEQ notfound // Hit length. 1345 1346 MOVDU 8(R8),R12 1347 CMPB R12,R5,R3 1348 CMPU R3,$0,CR6 1349 CMPU R8,R7 1350 BNE CR6,done 1351 BEQ notfound 1352 1353 MOVDU 8(R8),R12 1354 CMPB R12,R5,R3 1355 CMPU R3,$0,CR6 1356 CMPU R8,R7 1357 BNE CR6,done 1358 BEQ notfound 1359 1360 MOVDU 8(R8),R12 1361 CMPB R12,R5,R3 1362 CMPU R3,$0,CR6 1363 CMPU R8,R7 1364 BNE CR6,done 1365 BEQ notfound 1366 1367 MOVDU 8(R8),R12 1368 CMPB R12,R5,R3 1369 CMPU R3,$0,CR6 1370 BNE CR6,done 1371 BR notfound 1372 1373 TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 1374 MOVD s1_base+0(FP), R5 1375 MOVD s2_base+16(FP), R6 1376 MOVD s1_len+8(FP), R3 1377 CMP R5,R6,CR7 1378 MOVD s2_len+24(FP), R4 1379 MOVD $ret+32(FP), R7 1380 CMP R3,R4,CR6 1381 BEQ CR7,equal 1382 1383 notequal: 1384 #ifdef GOARCH_ppc64le 1385 BR cmpbodyLE<>(SB) 1386 #else 1387 BR cmpbodyBE<>(SB) 1388 #endif 1389 1390 equal: 1391 BEQ CR6,done 1392 MOVD $1, R8 1393 BGT CR6,greater 1394 NEG R8 1395 1396 greater: 1397 MOVD R8, (R7) 1398 RET 1399 1400 done: 1401 MOVD $0, (R7) 1402 RET 1403 1404 TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56 1405 MOVD s1+0(FP), R5 1406 MOVD s2+24(FP), R6 1407 MOVD s1+8(FP), R3 1408 CMP R5,R6,CR7 1409 MOVD s2+32(FP), R4 1410 MOVD $ret+48(FP), R7 1411 CMP R3,R4,CR6 1412 BEQ CR7,equal 1413 1414 #ifdef GOARCH_ppc64le 1415 BR cmpbodyLE<>(SB) 1416 #else 1417 BR cmpbodyBE<>(SB) 1418 #endif 1419 1420 equal: 1421 BEQ CR6,done 1422 MOVD $1, R8 1423 BGT CR6,greater 1424 NEG R8 1425 1426 greater: 1427 MOVD R8, (R7) 1428 RET 1429 1430 done: 1431 MOVD $0, (R7) 1432 RET 1433 1434 TEXT runtime·return0(SB), NOSPLIT, $0 1435 MOVW $0, R3 1436 RET 1437 1438 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1439 // Must obey the gcc calling convention. 1440 TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0 1441 // g (R30) and R31 are callee-save in the C ABI, so save them 1442 MOVD g, R4 1443 MOVD R31, R5 1444 MOVD LR, R6 1445 1446 BL runtime·load_g(SB) // clobbers g (R30), R31 1447 MOVD g_m(g), R3 1448 MOVD m_curg(R3), R3 1449 MOVD (g_stack+stack_hi)(R3), R3 1450 1451 MOVD R4, g 1452 MOVD R5, R31 1453 MOVD R6, LR 1454 RET 1455 1456 // The top-most function running on a goroutine 1457 // returns to goexit+PCQuantum. 1458 // 1459 // When dynamically linking Go, it can be returned to from a function 1460 // implemented in a different module and so needs to reload the TOC pointer 1461 // from the stack (although this function declares that it does not set up x-a 1462 // frame, newproc1 does in fact allocate one for goexit and saves the TOC 1463 // pointer in the correct place). 1464 // goexit+_PCQuantum is halfway through the usual global entry point prologue 1465 // that derives r2 from r12 which is a bit silly, but not harmful. 1466 TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0 1467 MOVD 24(R1), R2 1468 BL runtime·goexit1(SB) // does not return 1469 // traceback from goexit1 must hit code range of goexit 1470 MOVD R0, R0 // NOP 1471 1472 TEXT runtime·sigreturn(SB),NOSPLIT,$0-0 1473 RET 1474 1475 // prepGoExitFrame saves the current TOC pointer (i.e. the TOC pointer for the 1476 // module containing runtime) to the frame that goexit will execute in when 1477 // the goroutine exits. It's implemented in assembly mainly because that's the 1478 // easiest way to get access to R2. 1479 TEXT runtime·prepGoExitFrame(SB),NOSPLIT,$0-8 1480 MOVD sp+0(FP), R3 1481 MOVD R2, 24(R3) 1482 RET 1483 1484 TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0 1485 ADD $-8, R1 1486 MOVD R31, 0(R1) 1487 MOVD runtime·lastmoduledatap(SB), R4 1488 MOVD R3, moduledata_next(R4) 1489 MOVD R3, runtime·lastmoduledatap(SB) 1490 MOVD 0(R1), R31 1491 ADD $8, R1 1492 RET 1493 1494 TEXT ·checkASM(SB),NOSPLIT,$0-1 1495 MOVW $1, R3 1496 MOVB R3, ret+0(FP) 1497 RET