github.com/epfl-dcsl/gotee@v0.0.0-20200909122901-014b35f5e5e9/src/runtime/asm_ppc64x.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64 ppc64le 6 7 #include "go_asm.h" 8 #include "go_tls.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 #include "asm_ppc64x.h" 12 13 TEXT runtime·rt0_go(SB),NOSPLIT,$0 14 // R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer 15 16 // initialize essential registers 17 BL runtime·reginit(SB) 18 19 SUB $(FIXED_FRAME+16), R1 20 MOVD R2, 24(R1) // stash the TOC pointer away again now we've created a new frame 21 MOVW R3, FIXED_FRAME+0(R1) // argc 22 MOVD R4, FIXED_FRAME+8(R1) // argv 23 24 // create istack out of the given (operating system) stack. 25 // _cgo_init may update stackguard. 26 MOVD $runtime·g0(SB), g 27 MOVD $(-64*1024), R31 28 ADD R31, R1, R3 29 MOVD R3, g_stackguard0(g) 30 MOVD R3, g_stackguard1(g) 31 MOVD R3, (g_stack+stack_lo)(g) 32 MOVD R1, (g_stack+stack_hi)(g) 33 34 // if there is a _cgo_init, call it using the gcc ABI. 35 MOVD _cgo_init(SB), R12 36 CMP R0, R12 37 BEQ nocgo 38 MOVD R12, CTR // r12 = "global function entry point" 39 MOVD R13, R5 // arg 2: TLS base pointer 40 MOVD $setg_gcc<>(SB), R4 // arg 1: setg 41 MOVD g, R3 // arg 0: G 42 // C functions expect 32 bytes of space on caller stack frame 43 // and a 16-byte aligned R1 44 MOVD R1, R14 // save current stack 45 SUB $32, R1 // reserve 32 bytes 46 RLDCR $0, R1, $~15, R1 // 16-byte align 47 BL (CTR) // may clobber R0, R3-R12 48 MOVD R14, R1 // restore stack 49 MOVD 24(R1), R2 50 XOR R0, R0 // fix R0 51 52 nocgo: 53 // update stackguard after _cgo_init 54 MOVD (g_stack+stack_lo)(g), R3 55 ADD $const__StackGuard, R3 56 MOVD R3, g_stackguard0(g) 57 MOVD R3, g_stackguard1(g) 58 59 // set the per-goroutine and per-mach "registers" 60 MOVD $runtime·m0(SB), R3 61 62 // save m->g0 = g0 63 MOVD g, m_g0(R3) 64 // save m0 to g0->m 65 MOVD R3, g_m(g) 66 67 BL runtime·check(SB) 68 69 // args are already prepared 70 BL runtime·args(SB) 71 BL runtime·osinit(SB) 72 BL runtime·schedinit(SB) 73 74 // create a new goroutine to start program 75 MOVD $runtime·mainPC(SB), R3 // entry 76 MOVDU R3, -8(R1) 77 MOVDU R0, -8(R1) 78 MOVDU R0, -8(R1) 79 MOVDU R0, -8(R1) 80 MOVDU R0, -8(R1) 81 MOVDU R0, -8(R1) 82 BL runtime·newproc(SB) 83 ADD $(16+FIXED_FRAME), R1 84 85 // start this M 86 BL runtime·mstart(SB) 87 88 MOVD R0, 0(R0) 89 RET 90 91 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) 92 GLOBL runtime·mainPC(SB),RODATA,$8 93 94 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0 95 MOVD R0, 0(R0) // TODO: TD 96 RET 97 98 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0 99 RET 100 101 TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0 102 // crosscall_ppc64 and crosscall2 need to reginit, but can't 103 // get at the 'runtime.reginit' symbol. 104 BR runtime·reginit(SB) 105 106 TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0 107 // set R0 to zero, it's expected by the toolchain 108 XOR R0, R0 109 RET 110 111 /* 112 * go-routine 113 */ 114 115 // void gosave(Gobuf*) 116 // save state in Gobuf; setjmp 117 TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8 118 MOVD buf+0(FP), R3 119 MOVD R1, gobuf_sp(R3) 120 MOVD LR, R31 121 MOVD R31, gobuf_pc(R3) 122 MOVD g, gobuf_g(R3) 123 MOVD R0, gobuf_lr(R3) 124 MOVD R0, gobuf_ret(R3) 125 // Assert ctxt is zero. See func save. 126 MOVD gobuf_ctxt(R3), R3 127 CMP R0, R3 128 BEQ 2(PC) 129 BL runtime·badctxt(SB) 130 RET 131 132 // void gogo(Gobuf*) 133 // restore state from Gobuf; longjmp 134 TEXT runtime·gogo(SB), NOSPLIT, $16-8 135 MOVD buf+0(FP), R5 136 MOVD gobuf_g(R5), g // make sure g is not nil 137 BL runtime·save_g(SB) 138 139 MOVD 0(g), R4 140 MOVD gobuf_sp(R5), R1 141 MOVD gobuf_lr(R5), R31 142 MOVD 24(R1), R2 // restore R2 143 MOVD R31, LR 144 MOVD gobuf_ret(R5), R3 145 MOVD gobuf_ctxt(R5), R11 146 MOVD R0, gobuf_sp(R5) 147 MOVD R0, gobuf_ret(R5) 148 MOVD R0, gobuf_lr(R5) 149 MOVD R0, gobuf_ctxt(R5) 150 CMP R0, R0 // set condition codes for == test, needed by stack split 151 MOVD gobuf_pc(R5), R12 152 MOVD R12, CTR 153 BR (CTR) 154 155 // void mcall(fn func(*g)) 156 // Switch to m->g0's stack, call fn(g). 157 // Fn must never return. It should gogo(&g->sched) 158 // to keep running g. 159 TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8 160 // Save caller state in g->sched 161 MOVD R1, (g_sched+gobuf_sp)(g) 162 MOVD LR, R31 163 MOVD R31, (g_sched+gobuf_pc)(g) 164 MOVD R0, (g_sched+gobuf_lr)(g) 165 MOVD g, (g_sched+gobuf_g)(g) 166 167 // Switch to m->g0 & its stack, call fn. 168 MOVD g, R3 169 MOVD g_m(g), R8 170 MOVD m_g0(R8), g 171 BL runtime·save_g(SB) 172 CMP g, R3 173 BNE 2(PC) 174 BR runtime·badmcall(SB) 175 MOVD fn+0(FP), R11 // context 176 MOVD 0(R11), R12 // code pointer 177 MOVD R12, CTR 178 MOVD (g_sched+gobuf_sp)(g), R1 // sp = m->g0->sched.sp 179 MOVDU R3, -8(R1) 180 MOVDU R0, -8(R1) 181 MOVDU R0, -8(R1) 182 MOVDU R0, -8(R1) 183 MOVDU R0, -8(R1) 184 BL (CTR) 185 MOVD 24(R1), R2 186 BR runtime·badmcall2(SB) 187 188 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 189 // of the G stack. We need to distinguish the routine that 190 // lives at the bottom of the G stack from the one that lives 191 // at the top of the system stack because the one at the top of 192 // the system stack terminates the stack walk (see topofstack()). 193 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 194 // We have several undefs here so that 16 bytes past 195 // $runtime·systemstack_switch lies within them whether or not the 196 // instructions that derive r2 from r12 are there. 197 UNDEF 198 UNDEF 199 UNDEF 200 BL (LR) // make sure this function is not leaf 201 RET 202 203 // func systemstack(fn func()) 204 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 205 MOVD fn+0(FP), R3 // R3 = fn 206 MOVD R3, R11 // context 207 MOVD g_m(g), R4 // R4 = m 208 209 MOVD m_gsignal(R4), R5 // R5 = gsignal 210 CMP g, R5 211 BEQ noswitch 212 213 MOVD m_g0(R4), R5 // R5 = g0 214 CMP g, R5 215 BEQ noswitch 216 217 MOVD m_curg(R4), R6 218 CMP g, R6 219 BEQ switch 220 221 // Bad: g is not gsignal, not g0, not curg. What is it? 222 // Hide call from linker nosplit analysis. 223 MOVD $runtime·badsystemstack(SB), R12 224 MOVD R12, CTR 225 BL (CTR) 226 227 switch: 228 // save our state in g->sched. Pretend to 229 // be systemstack_switch if the G stack is scanned. 230 MOVD $runtime·systemstack_switch(SB), R6 231 ADD $16, R6 // get past prologue (including r2-setting instructions when they're there) 232 MOVD R6, (g_sched+gobuf_pc)(g) 233 MOVD R1, (g_sched+gobuf_sp)(g) 234 MOVD R0, (g_sched+gobuf_lr)(g) 235 MOVD g, (g_sched+gobuf_g)(g) 236 237 // switch to g0 238 MOVD R5, g 239 BL runtime·save_g(SB) 240 MOVD (g_sched+gobuf_sp)(g), R3 241 // make it look like mstart called systemstack on g0, to stop traceback 242 SUB $FIXED_FRAME, R3 243 MOVD $runtime·mstart(SB), R4 244 MOVD R4, 0(R3) 245 MOVD R3, R1 246 247 // call target function 248 MOVD 0(R11), R12 // code pointer 249 MOVD R12, CTR 250 BL (CTR) 251 252 // restore TOC pointer. It seems unlikely that we will use systemstack 253 // to call a function defined in another module, but the results of 254 // doing so would be so confusing that it's worth doing this. 255 MOVD g_m(g), R3 256 MOVD m_curg(R3), g 257 MOVD (g_sched+gobuf_sp)(g), R3 258 MOVD 24(R3), R2 259 // switch back to g 260 MOVD g_m(g), R3 261 MOVD m_curg(R3), g 262 BL runtime·save_g(SB) 263 MOVD (g_sched+gobuf_sp)(g), R1 264 MOVD R0, (g_sched+gobuf_sp)(g) 265 RET 266 267 noswitch: 268 // already on m stack, just call directly 269 // On other arches we do a tail call here, but it appears to be 270 // impossible to tail call a function pointer in shared mode on 271 // ppc64 because the caller is responsible for restoring the TOC. 272 MOVD 0(R11), R12 // code pointer 273 MOVD R12, CTR 274 BL (CTR) 275 MOVD 24(R1), R2 276 RET 277 278 /* 279 * support for morestack 280 */ 281 282 // Called during function prolog when more stack is needed. 283 // Caller has already loaded: 284 // R3: framesize, R4: argsize, R5: LR 285 // 286 // The traceback routines see morestack on a g0 as being 287 // the top of a stack (for example, morestack calling newstack 288 // calling the scheduler calling newm calling gc), so we must 289 // record an argument size. For that purpose, it has no arguments. 290 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 291 // Cannot grow scheduler stack (m->g0). 292 MOVD g_m(g), R7 293 MOVD m_g0(R7), R8 294 CMP g, R8 295 BNE 3(PC) 296 BL runtime·badmorestackg0(SB) 297 BL runtime·abort(SB) 298 299 // Cannot grow signal stack (m->gsignal). 300 MOVD m_gsignal(R7), R8 301 CMP g, R8 302 BNE 3(PC) 303 BL runtime·badmorestackgsignal(SB) 304 BL runtime·abort(SB) 305 306 // Called from f. 307 // Set g->sched to context in f. 308 MOVD R1, (g_sched+gobuf_sp)(g) 309 MOVD LR, R8 310 MOVD R8, (g_sched+gobuf_pc)(g) 311 MOVD R5, (g_sched+gobuf_lr)(g) 312 MOVD R11, (g_sched+gobuf_ctxt)(g) 313 314 // Called from f. 315 // Set m->morebuf to f's caller. 316 MOVD R5, (m_morebuf+gobuf_pc)(R7) // f's caller's PC 317 MOVD R1, (m_morebuf+gobuf_sp)(R7) // f's caller's SP 318 MOVD g, (m_morebuf+gobuf_g)(R7) 319 320 // Call newstack on m->g0's stack. 321 MOVD m_g0(R7), g 322 BL runtime·save_g(SB) 323 MOVD (g_sched+gobuf_sp)(g), R1 324 MOVDU R0, -(FIXED_FRAME+0)(R1) // create a call frame on g0 325 BL runtime·newstack(SB) 326 327 // Not reached, but make sure the return PC from the call to newstack 328 // is still in this function, and not the beginning of the next. 329 UNDEF 330 331 TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 332 MOVD R0, R11 333 BR runtime·morestack(SB) 334 335 // reflectcall: call a function with the given argument list 336 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 337 // we don't have variable-sized frames, so we use a small number 338 // of constant-sized-frame functions to encode a few bits of size in the pc. 339 // Caution: ugly multiline assembly macros in your future! 340 341 #define DISPATCH(NAME,MAXSIZE) \ 342 MOVD $MAXSIZE, R31; \ 343 CMP R3, R31; \ 344 BGT 4(PC); \ 345 MOVD $NAME(SB), R12; \ 346 MOVD R12, CTR; \ 347 BR (CTR) 348 // Note: can't just "BR NAME(SB)" - bad inlining results. 349 350 TEXT reflect·call(SB), NOSPLIT, $0-0 351 BR ·reflectcall(SB) 352 353 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32 354 MOVWZ argsize+24(FP), R3 355 DISPATCH(runtime·call32, 32) 356 DISPATCH(runtime·call64, 64) 357 DISPATCH(runtime·call128, 128) 358 DISPATCH(runtime·call256, 256) 359 DISPATCH(runtime·call512, 512) 360 DISPATCH(runtime·call1024, 1024) 361 DISPATCH(runtime·call2048, 2048) 362 DISPATCH(runtime·call4096, 4096) 363 DISPATCH(runtime·call8192, 8192) 364 DISPATCH(runtime·call16384, 16384) 365 DISPATCH(runtime·call32768, 32768) 366 DISPATCH(runtime·call65536, 65536) 367 DISPATCH(runtime·call131072, 131072) 368 DISPATCH(runtime·call262144, 262144) 369 DISPATCH(runtime·call524288, 524288) 370 DISPATCH(runtime·call1048576, 1048576) 371 DISPATCH(runtime·call2097152, 2097152) 372 DISPATCH(runtime·call4194304, 4194304) 373 DISPATCH(runtime·call8388608, 8388608) 374 DISPATCH(runtime·call16777216, 16777216) 375 DISPATCH(runtime·call33554432, 33554432) 376 DISPATCH(runtime·call67108864, 67108864) 377 DISPATCH(runtime·call134217728, 134217728) 378 DISPATCH(runtime·call268435456, 268435456) 379 DISPATCH(runtime·call536870912, 536870912) 380 DISPATCH(runtime·call1073741824, 1073741824) 381 MOVD $runtime·badreflectcall(SB), R12 382 MOVD R12, CTR 383 BR (CTR) 384 385 #define CALLFN(NAME,MAXSIZE) \ 386 TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ 387 NO_LOCAL_POINTERS; \ 388 /* copy arguments to stack */ \ 389 MOVD arg+16(FP), R3; \ 390 MOVWZ argsize+24(FP), R4; \ 391 MOVD R1, R5; \ 392 ADD $(FIXED_FRAME-1), R5; \ 393 SUB $1, R3; \ 394 ADD R5, R4; \ 395 CMP R5, R4; \ 396 BEQ 4(PC); \ 397 MOVBZU 1(R3), R6; \ 398 MOVBZU R6, 1(R5); \ 399 BR -4(PC); \ 400 /* call function */ \ 401 MOVD f+8(FP), R11; \ 402 MOVD (R11), R12; \ 403 MOVD R12, CTR; \ 404 PCDATA $PCDATA_StackMapIndex, $0; \ 405 BL (CTR); \ 406 MOVD 24(R1), R2; \ 407 /* copy return values back */ \ 408 MOVD argtype+0(FP), R7; \ 409 MOVD arg+16(FP), R3; \ 410 MOVWZ n+24(FP), R4; \ 411 MOVWZ retoffset+28(FP), R6; \ 412 ADD $FIXED_FRAME, R1, R5; \ 413 ADD R6, R5; \ 414 ADD R6, R3; \ 415 SUB R6, R4; \ 416 BL callRet<>(SB); \ 417 RET 418 419 // callRet copies return values back at the end of call*. This is a 420 // separate function so it can allocate stack space for the arguments 421 // to reflectcallmove. It does not follow the Go ABI; it expects its 422 // arguments in registers. 423 TEXT callRet<>(SB), NOSPLIT, $32-0 424 MOVD R7, FIXED_FRAME+0(R1) 425 MOVD R3, FIXED_FRAME+8(R1) 426 MOVD R5, FIXED_FRAME+16(R1) 427 MOVD R4, FIXED_FRAME+24(R1) 428 BL runtime·reflectcallmove(SB) 429 RET 430 431 CALLFN(·call32, 32) 432 CALLFN(·call64, 64) 433 CALLFN(·call128, 128) 434 CALLFN(·call256, 256) 435 CALLFN(·call512, 512) 436 CALLFN(·call1024, 1024) 437 CALLFN(·call2048, 2048) 438 CALLFN(·call4096, 4096) 439 CALLFN(·call8192, 8192) 440 CALLFN(·call16384, 16384) 441 CALLFN(·call32768, 32768) 442 CALLFN(·call65536, 65536) 443 CALLFN(·call131072, 131072) 444 CALLFN(·call262144, 262144) 445 CALLFN(·call524288, 524288) 446 CALLFN(·call1048576, 1048576) 447 CALLFN(·call2097152, 2097152) 448 CALLFN(·call4194304, 4194304) 449 CALLFN(·call8388608, 8388608) 450 CALLFN(·call16777216, 16777216) 451 CALLFN(·call33554432, 33554432) 452 CALLFN(·call67108864, 67108864) 453 CALLFN(·call134217728, 134217728) 454 CALLFN(·call268435456, 268435456) 455 CALLFN(·call536870912, 536870912) 456 CALLFN(·call1073741824, 1073741824) 457 458 TEXT runtime·procyield(SB),NOSPLIT,$0-0 459 RET 460 461 // void jmpdefer(fv, sp); 462 // called from deferreturn. 463 // 1. grab stored LR for caller 464 // 2. sub 8 bytes to get back to either nop or toc reload before deferreturn 465 // 3. BR to fn 466 // When dynamically linking Go, it is not sufficient to rewind to the BL 467 // deferreturn -- we might be jumping between modules and so we need to reset 468 // the TOC pointer in r2. To do this, codegen inserts MOVD 24(R1), R2 *before* 469 // the BL deferreturn and jmpdefer rewinds to that. 470 TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16 471 MOVD 0(R1), R31 472 SUB $8, R31 473 MOVD R31, LR 474 475 MOVD fv+0(FP), R11 476 MOVD argp+8(FP), R1 477 SUB $FIXED_FRAME, R1 478 MOVD 0(R11), R12 479 MOVD R12, CTR 480 BR (CTR) 481 482 // Save state of caller into g->sched. Smashes R31. 483 TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0 484 MOVD LR, R31 485 MOVD R31, (g_sched+gobuf_pc)(g) 486 MOVD R1, (g_sched+gobuf_sp)(g) 487 MOVD R0, (g_sched+gobuf_lr)(g) 488 MOVD R0, (g_sched+gobuf_ret)(g) 489 // Assert ctxt is zero. See func save. 490 MOVD (g_sched+gobuf_ctxt)(g), R31 491 CMP R0, R31 492 BEQ 2(PC) 493 BL runtime·badctxt(SB) 494 RET 495 496 // func asmcgocall(fn, arg unsafe.Pointer) int32 497 // Call fn(arg) on the scheduler stack, 498 // aligned appropriately for the gcc ABI. 499 // See cgocall.go for more details. 500 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 501 MOVD fn+0(FP), R3 502 MOVD arg+8(FP), R4 503 504 MOVD R1, R7 // save original stack pointer 505 MOVD g, R5 506 507 // Figure out if we need to switch to m->g0 stack. 508 // We get called to create new OS threads too, and those 509 // come in on the m->g0 stack already. 510 MOVD g_m(g), R6 511 MOVD m_g0(R6), R6 512 CMP R6, g 513 BEQ g0 514 BL gosave<>(SB) 515 MOVD R6, g 516 BL runtime·save_g(SB) 517 MOVD (g_sched+gobuf_sp)(g), R1 518 519 // Now on a scheduling stack (a pthread-created stack). 520 g0: 521 // Save room for two of our pointers, plus 32 bytes of callee 522 // save area that lives on the caller stack. 523 SUB $48, R1 524 RLDCR $0, R1, $~15, R1 // 16-byte alignment for gcc ABI 525 MOVD R5, 40(R1) // save old g on stack 526 MOVD (g_stack+stack_hi)(R5), R5 527 SUB R7, R5 528 MOVD R5, 32(R1) // save depth in old g stack (can't just save SP, as stack might be copied during a callback) 529 MOVD R0, 0(R1) // clear back chain pointer (TODO can we give it real back trace information?) 530 // This is a "global call", so put the global entry point in r12 531 MOVD R3, R12 532 MOVD R12, CTR 533 MOVD R4, R3 // arg in r3 534 BL (CTR) 535 536 // C code can clobber R0, so set it back to 0. F27-F31 are 537 // callee save, so we don't need to recover those. 538 XOR R0, R0 539 // Restore g, stack pointer, toc pointer. 540 // R3 is errno, so don't touch it 541 MOVD 40(R1), g 542 MOVD (g_stack+stack_hi)(g), R5 543 MOVD 32(R1), R6 544 SUB R6, R5 545 MOVD 24(R5), R2 546 BL runtime·save_g(SB) 547 MOVD (g_stack+stack_hi)(g), R5 548 MOVD 32(R1), R6 549 SUB R6, R5 550 MOVD R5, R1 551 552 MOVW R3, ret+16(FP) 553 RET 554 555 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 556 // Turn the fn into a Go func (by taking its address) and call 557 // cgocallback_gofunc. 558 TEXT runtime·cgocallback(SB),NOSPLIT,$32-32 559 MOVD $fn+0(FP), R3 560 MOVD R3, FIXED_FRAME+0(R1) 561 MOVD frame+8(FP), R3 562 MOVD R3, FIXED_FRAME+8(R1) 563 MOVD framesize+16(FP), R3 564 MOVD R3, FIXED_FRAME+16(R1) 565 MOVD ctxt+24(FP), R3 566 MOVD R3, FIXED_FRAME+24(R1) 567 MOVD $runtime·cgocallback_gofunc(SB), R12 568 MOVD R12, CTR 569 BL (CTR) 570 RET 571 572 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 573 // See cgocall.go for more details. 574 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32 575 NO_LOCAL_POINTERS 576 577 // Load m and g from thread-local storage. 578 MOVB runtime·iscgo(SB), R3 579 CMP R3, $0 580 BEQ nocgo 581 BL runtime·load_g(SB) 582 nocgo: 583 584 // If g is nil, Go did not create the current thread. 585 // Call needm to obtain one for temporary use. 586 // In this case, we're running on the thread stack, so there's 587 // lots of space, but the linker doesn't know. Hide the call from 588 // the linker analysis by using an indirect call. 589 CMP g, $0 590 BEQ needm 591 592 MOVD g_m(g), R8 593 MOVD R8, savedm-8(SP) 594 BR havem 595 596 needm: 597 MOVD g, savedm-8(SP) // g is zero, so is m. 598 MOVD $runtime·needm(SB), R12 599 MOVD R12, CTR 600 BL (CTR) 601 602 // Set m->sched.sp = SP, so that if a panic happens 603 // during the function we are about to execute, it will 604 // have a valid SP to run on the g0 stack. 605 // The next few lines (after the havem label) 606 // will save this SP onto the stack and then write 607 // the same SP back to m->sched.sp. That seems redundant, 608 // but if an unrecovered panic happens, unwindm will 609 // restore the g->sched.sp from the stack location 610 // and then systemstack will try to use it. If we don't set it here, 611 // that restored SP will be uninitialized (typically 0) and 612 // will not be usable. 613 MOVD g_m(g), R8 614 MOVD m_g0(R8), R3 615 MOVD R1, (g_sched+gobuf_sp)(R3) 616 617 havem: 618 // Now there's a valid m, and we're running on its m->g0. 619 // Save current m->g0->sched.sp on stack and then set it to SP. 620 // Save current sp in m->g0->sched.sp in preparation for 621 // switch back to m->curg stack. 622 // NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP). 623 MOVD m_g0(R8), R3 624 MOVD (g_sched+gobuf_sp)(R3), R4 625 MOVD R4, savedsp-16(SP) 626 MOVD R1, (g_sched+gobuf_sp)(R3) 627 628 // Switch to m->curg stack and call runtime.cgocallbackg. 629 // Because we are taking over the execution of m->curg 630 // but *not* resuming what had been running, we need to 631 // save that information (m->curg->sched) so we can restore it. 632 // We can restore m->curg->sched.sp easily, because calling 633 // runtime.cgocallbackg leaves SP unchanged upon return. 634 // To save m->curg->sched.pc, we push it onto the stack. 635 // This has the added benefit that it looks to the traceback 636 // routine like cgocallbackg is going to return to that 637 // PC (because the frame we allocate below has the same 638 // size as cgocallback_gofunc's frame declared above) 639 // so that the traceback will seamlessly trace back into 640 // the earlier calls. 641 // 642 // In the new goroutine, -8(SP) is unused (where SP refers to 643 // m->curg's SP while we're setting it up, before we've adjusted it). 644 MOVD m_curg(R8), g 645 BL runtime·save_g(SB) 646 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 647 MOVD (g_sched+gobuf_pc)(g), R5 648 MOVD R5, -(FIXED_FRAME+16)(R4) 649 MOVD ctxt+24(FP), R3 650 MOVD R3, -16(R4) 651 MOVD $-(FIXED_FRAME+16)(R4), R1 652 BL runtime·cgocallbackg(SB) 653 654 // Restore g->sched (== m->curg->sched) from saved values. 655 MOVD 0(R1), R5 656 MOVD R5, (g_sched+gobuf_pc)(g) 657 MOVD $(FIXED_FRAME+16)(R1), R4 658 MOVD R4, (g_sched+gobuf_sp)(g) 659 660 // Switch back to m->g0's stack and restore m->g0->sched.sp. 661 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 662 // so we do not have to restore it.) 663 MOVD g_m(g), R8 664 MOVD m_g0(R8), g 665 BL runtime·save_g(SB) 666 MOVD (g_sched+gobuf_sp)(g), R1 667 MOVD savedsp-16(SP), R4 668 MOVD R4, (g_sched+gobuf_sp)(g) 669 670 // If the m on entry was nil, we called needm above to borrow an m 671 // for the duration of the call. Since the call is over, return it with dropm. 672 MOVD savedm-8(SP), R6 673 CMP R6, $0 674 BNE droppedm 675 MOVD $runtime·dropm(SB), R12 676 MOVD R12, CTR 677 BL (CTR) 678 droppedm: 679 680 // Done! 681 RET 682 683 // void setg(G*); set g. for use by needm. 684 TEXT runtime·setg(SB), NOSPLIT, $0-8 685 MOVD gg+0(FP), g 686 // This only happens if iscgo, so jump straight to save_g 687 BL runtime·save_g(SB) 688 RET 689 690 // void setg_gcc(G*); set g in C TLS. 691 // Must obey the gcc calling convention. 692 TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0 693 // The standard prologue clobbers R31, which is callee-save in 694 // the C ABI, so we have to use $-8-0 and save LR ourselves. 695 MOVD LR, R4 696 // Also save g and R31, since they're callee-save in C ABI 697 MOVD R31, R5 698 MOVD g, R6 699 700 MOVD R3, g 701 BL runtime·save_g(SB) 702 703 MOVD R6, g 704 MOVD R5, R31 705 MOVD R4, LR 706 RET 707 708 TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8 709 MOVD 0(R1), R3 // LR saved by caller 710 MOVD R3, ret+0(FP) 711 RET 712 713 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0 714 MOVW (R0), R0 715 UNDEF 716 717 #define TBRL 268 718 #define TBRU 269 /* Time base Upper/Lower */ 719 720 // int64 runtime·cputicks(void) 721 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 722 MOVW SPR(TBRU), R4 723 MOVW SPR(TBRL), R3 724 MOVW SPR(TBRU), R5 725 CMPW R4, R5 726 BNE -4(PC) 727 SLD $32, R5 728 OR R5, R3 729 MOVD R3, ret+0(FP) 730 RET 731 732 // AES hashing not implemented for ppc64 733 TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0 734 MOVW (R0), R1 735 TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0 736 MOVW (R0), R1 737 TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0 738 MOVW (R0), R1 739 TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0 740 MOVW (R0), R1 741 742 TEXT runtime·memequal(SB),NOSPLIT,$0-25 743 MOVD a+0(FP), R3 744 MOVD b+8(FP), R4 745 MOVD size+16(FP), R5 746 747 BL runtime·memeqbody(SB) 748 MOVB R9, ret+24(FP) 749 RET 750 751 // memequal_varlen(a, b unsafe.Pointer) bool 752 TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 753 MOVD a+0(FP), R3 754 MOVD b+8(FP), R4 755 CMP R3, R4 756 BEQ eq 757 MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure 758 BL runtime·memeqbody(SB) 759 MOVB R9, ret+16(FP) 760 RET 761 eq: 762 MOVD $1, R3 763 MOVB R3, ret+16(FP) 764 RET 765 766 // Do an efficient memcmp for ppc64le 767 // R3 = s1 len 768 // R4 = s2 len 769 // R5 = s1 addr 770 // R6 = s2 addr 771 // R7 = addr of return value 772 TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0 773 MOVD R3,R8 // set up length 774 CMP R3,R4,CR2 // unequal? 775 BC 12,8,setuplen // BLT CR2 776 MOVD R4,R8 // use R4 for comparison len 777 setuplen: 778 MOVD R8,CTR // set up loop counter 779 CMP R8,$8 // only optimize >=8 780 BLT simplecheck 781 DCBT (R5) // cache hint 782 DCBT (R6) 783 CMP R8,$32 // optimize >= 32 784 MOVD R8,R9 785 BLT setup8a // 8 byte moves only 786 setup32a: 787 SRADCC $5,R8,R9 // number of 32 byte chunks 788 MOVD R9,CTR 789 790 // Special processing for 32 bytes or longer. 791 // Loading this way is faster and correct as long as the 792 // doublewords being compared are equal. Once they 793 // are found unequal, reload them in proper byte order 794 // to determine greater or less than. 795 loop32a: 796 MOVD 0(R5),R9 // doublewords to compare 797 MOVD 0(R6),R10 // get 4 doublewords 798 MOVD 8(R5),R14 799 MOVD 8(R6),R15 800 CMPU R9,R10 // bytes equal? 801 MOVD $0,R16 // set up for cmpne 802 BNE cmpne // further compare for LT or GT 803 MOVD 16(R5),R9 // get next pair of doublewords 804 MOVD 16(R6),R10 805 CMPU R14,R15 // bytes match? 806 MOVD $8,R16 // set up for cmpne 807 BNE cmpne // further compare for LT or GT 808 MOVD 24(R5),R14 // get next pair of doublewords 809 MOVD 24(R6),R15 810 CMPU R9,R10 // bytes match? 811 MOVD $16,R16 // set up for cmpne 812 BNE cmpne // further compare for LT or GT 813 MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32 814 ADD $32,R5 // bump up to next 32 815 ADD $32,R6 816 CMPU R14,R15 // bytes match? 817 BC 8,2,loop32a // br ctr and cr 818 BNE cmpne 819 ANDCC $24,R8,R9 // Any 8 byte chunks? 820 BEQ leftover // and result is 0 821 setup8a: 822 SRADCC $3,R9,R9 // get the 8 byte count 823 BEQ leftover // shifted value is 0 824 MOVD R9,CTR // loop count for doublewords 825 loop8: 826 MOVDBR (R5+R0),R9 // doublewords to compare 827 MOVDBR (R6+R0),R10 // LE compare order 828 ADD $8,R5 829 ADD $8,R6 830 CMPU R9,R10 // match? 831 BC 8,2,loop8 // bt ctr <> 0 && cr 832 BGT greater 833 BLT less 834 leftover: 835 ANDCC $7,R8,R9 // check for leftover bytes 836 MOVD R9,CTR // save the ctr 837 BNE simple // leftover bytes 838 BC 12,10,equal // test CR2 for length comparison 839 BC 12,8,less 840 BR greater 841 simplecheck: 842 CMP R8,$0 // remaining compare length 0 843 BNE simple // do simple compare 844 BC 12,10,equal // test CR2 for length comparison 845 BC 12,8,less // 1st len < 2nd len, result less 846 BR greater // 1st len > 2nd len must be greater 847 simple: 848 MOVBZ 0(R5), R9 // get byte from 1st operand 849 ADD $1,R5 850 MOVBZ 0(R6), R10 // get byte from 2nd operand 851 ADD $1,R6 852 CMPU R9, R10 853 BC 8,2,simple // bc ctr <> 0 && cr 854 BGT greater // 1st > 2nd 855 BLT less // 1st < 2nd 856 BC 12,10,equal // test CR2 for length comparison 857 BC 12,9,greater // 2nd len > 1st len 858 BR less // must be less 859 cmpne: // only here is not equal 860 MOVDBR (R5+R16),R8 // reload in reverse order 861 MOVDBR (R6+R16),R9 862 CMPU R8,R9 // compare correct endianness 863 BGT greater // here only if NE 864 less: 865 MOVD $-1,R3 866 MOVD R3,(R7) // return value if A < B 867 RET 868 equal: 869 MOVD $0,(R7) // return value if A == B 870 RET 871 greater: 872 MOVD $1,R3 873 MOVD R3,(R7) // return value if A > B 874 RET 875 876 // Do an efficient memcmp for ppc64 (BE) 877 // R3 = s1 len 878 // R4 = s2 len 879 // R5 = s1 addr 880 // R6 = s2 addr 881 // R7 = addr of return value 882 TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0 883 MOVD R3,R8 // set up length 884 CMP R3,R4,CR2 // unequal? 885 BC 12,8,setuplen // BLT CR2 886 MOVD R4,R8 // use R4 for comparison len 887 setuplen: 888 MOVD R8,CTR // set up loop counter 889 CMP R8,$8 // only optimize >=8 890 BLT simplecheck 891 DCBT (R5) // cache hint 892 DCBT (R6) 893 CMP R8,$32 // optimize >= 32 894 MOVD R8,R9 895 BLT setup8a // 8 byte moves only 896 897 setup32a: 898 SRADCC $5,R8,R9 // number of 32 byte chunks 899 MOVD R9,CTR 900 loop32a: 901 MOVD 0(R5),R9 // doublewords to compare 902 MOVD 0(R6),R10 // get 4 doublewords 903 MOVD 8(R5),R14 904 MOVD 8(R6),R15 905 CMPU R9,R10 // bytes equal? 906 BLT less // found to be less 907 BGT greater // found to be greater 908 MOVD 16(R5),R9 // get next pair of doublewords 909 MOVD 16(R6),R10 910 CMPU R14,R15 // bytes match? 911 BLT less // found less 912 BGT greater // found greater 913 MOVD 24(R5),R14 // get next pair of doublewords 914 MOVD 24(R6),R15 915 CMPU R9,R10 // bytes match? 916 BLT less // found to be less 917 BGT greater // found to be greater 918 ADD $32,R5 // bump up to next 32 919 ADD $32,R6 920 CMPU R14,R15 // bytes match? 921 BC 8,2,loop32a // br ctr and cr 922 BLT less // with BE, byte ordering is 923 BGT greater // good for compare 924 ANDCC $24,R8,R9 // Any 8 byte chunks? 925 BEQ leftover // and result is 0 926 setup8a: 927 SRADCC $3,R9,R9 // get the 8 byte count 928 BEQ leftover // shifted value is 0 929 MOVD R9,CTR // loop count for doublewords 930 loop8: 931 MOVD (R5),R9 932 MOVD (R6),R10 933 ADD $8,R5 934 ADD $8,R6 935 CMPU R9,R10 // match? 936 BC 8,2,loop8 // bt ctr <> 0 && cr 937 BGT greater 938 BLT less 939 leftover: 940 ANDCC $7,R8,R9 // check for leftover bytes 941 MOVD R9,CTR // save the ctr 942 BNE simple // leftover bytes 943 BC 12,10,equal // test CR2 for length comparison 944 BC 12,8,less 945 BR greater 946 simplecheck: 947 CMP R8,$0 // remaining compare length 0 948 BNE simple // do simple compare 949 BC 12,10,equal // test CR2 for length comparison 950 BC 12,8,less // 1st len < 2nd len, result less 951 BR greater // same len, must be equal 952 simple: 953 MOVBZ 0(R5),R9 // get byte from 1st operand 954 ADD $1,R5 955 MOVBZ 0(R6),R10 // get byte from 2nd operand 956 ADD $1,R6 957 CMPU R9,R10 958 BC 8,2,simple // bc ctr <> 0 && cr 959 BGT greater // 1st > 2nd 960 BLT less // 1st < 2nd 961 BC 12,10,equal // test CR2 for length comparison 962 BC 12,9,greater // 2nd len > 1st len 963 less: 964 MOVD $-1,R3 965 MOVD R3,(R7) // return value if A < B 966 RET 967 equal: 968 MOVD $0,(R7) // return value if A == B 969 RET 970 greater: 971 MOVD $1,R3 972 MOVD R3,(R7) // return value if A > B 973 RET 974 975 // Do an efficient memequal for ppc64 976 // R3 = s1 977 // R4 = s2 978 // R5 = len 979 // R9 = return value 980 TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0 981 MOVD R5,CTR 982 CMP R5,$8 // only optimize >=8 983 BLT simplecheck 984 DCBT (R3) // cache hint 985 DCBT (R4) 986 CMP R5,$32 // optimize >= 32 987 MOVD R5,R6 // needed if setup8a branch 988 BLT setup8a // 8 byte moves only 989 setup32a: // 8 byte aligned, >= 32 bytes 990 SRADCC $5,R5,R6 // number of 32 byte chunks to compare 991 MOVD R6,CTR 992 loop32a: 993 MOVD 0(R3),R6 // doublewords to compare 994 MOVD 0(R4),R7 995 MOVD 8(R3),R8 // 996 MOVD 8(R4),R9 997 CMP R6,R7 // bytes batch? 998 BNE noteq 999 MOVD 16(R3),R6 1000 MOVD 16(R4),R7 1001 CMP R8,R9 // bytes match? 1002 MOVD 24(R3),R8 1003 MOVD 24(R4),R9 1004 BNE noteq 1005 CMP R6,R7 // bytes match? 1006 BNE noteq 1007 ADD $32,R3 // bump up to next 32 1008 ADD $32,R4 1009 CMP R8,R9 // bytes match? 1010 BC 8,2,loop32a // br ctr and cr 1011 BNE noteq 1012 ANDCC $24,R5,R6 // Any 8 byte chunks? 1013 BEQ leftover // and result is 0 1014 setup8a: 1015 SRADCC $3,R6,R6 // get the 8 byte count 1016 BEQ leftover // shifted value is 0 1017 MOVD R6,CTR 1018 loop8: 1019 MOVD 0(R3),R6 // doublewords to compare 1020 ADD $8,R3 1021 MOVD 0(R4),R7 1022 ADD $8,R4 1023 CMP R6,R7 // match? 1024 BC 8,2,loop8 // bt ctr <> 0 && cr 1025 BNE noteq 1026 leftover: 1027 ANDCC $7,R5,R6 // check for leftover bytes 1028 BEQ equal 1029 MOVD R6,CTR 1030 BR simple 1031 simplecheck: 1032 CMP R5,$0 1033 BEQ equal 1034 simple: 1035 MOVBZ 0(R3), R6 1036 ADD $1,R3 1037 MOVBZ 0(R4), R7 1038 ADD $1,R4 1039 CMP R6, R7 1040 BNE noteq 1041 BC 8,2,simple 1042 BNE noteq 1043 BR equal 1044 noteq: 1045 MOVD $0, R9 1046 RET 1047 equal: 1048 MOVD $1, R9 1049 RET 1050 1051 TEXT bytes·Equal(SB),NOSPLIT,$0-49 1052 MOVD a_len+8(FP), R4 1053 MOVD b_len+32(FP), R5 1054 CMP R5, R4 // unequal lengths are not equal 1055 BNE noteq 1056 MOVD a+0(FP), R3 1057 MOVD b+24(FP), R4 1058 BL runtime·memeqbody(SB) 1059 1060 MOVBZ R9,ret+48(FP) 1061 RET 1062 1063 noteq: 1064 MOVBZ $0,ret+48(FP) 1065 RET 1066 1067 equal: 1068 MOVD $1,R3 1069 MOVBZ R3,ret+48(FP) 1070 RET 1071 1072 TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 1073 MOVD s+0(FP), R3 // R3 = byte array pointer 1074 MOVD s_len+8(FP), R4 // R4 = length 1075 MOVBZ c+24(FP), R5 // R5 = byte 1076 MOVD $ret+32(FP), R14 // R14 = &ret 1077 BR runtime·indexbytebody<>(SB) 1078 1079 TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32 1080 MOVD s+0(FP), R3 // R3 = string 1081 MOVD s_len+8(FP), R4 // R4 = length 1082 MOVBZ c+16(FP), R5 // R5 = byte 1083 MOVD $ret+24(FP), R14 // R14 = &ret 1084 BR runtime·indexbytebody<>(SB) 1085 1086 TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0 1087 DCBT (R3) // Prepare cache line. 1088 MOVD R3,R17 // Save base address for calculating the index later. 1089 RLDICR $0,R3,$60,R8 // Align address to doubleword boundary in R8. 1090 RLDIMI $8,R5,$48,R5 // Replicating the byte across the register. 1091 ADD R4,R3,R7 // Last acceptable address in R7. 1092 1093 RLDIMI $16,R5,$32,R5 1094 CMPU R4,$32 // Check if it's a small string (<32 bytes). Those will be processed differently. 1095 MOVD $-1,R9 1096 WORD $0x54661EB8 // Calculate padding in R6 (rlwinm r6,r3,3,26,28). 1097 RLDIMI $32,R5,$0,R5 1098 MOVD R7,R10 // Save last acceptable address in R10 for later. 1099 ADD $-1,R7,R7 1100 #ifdef GOARCH_ppc64le 1101 SLD R6,R9,R9 // Prepare mask for Little Endian 1102 #else 1103 SRD R6,R9,R9 // Same for Big Endian 1104 #endif 1105 BLE small_string // Jump to the small string case if it's <32 bytes. 1106 1107 // If we are 64-byte aligned, branch to qw_align just to get the auxiliary values 1108 // in V0, V1 and V10, then branch to the preloop. 1109 ANDCC $63,R3,R11 1110 BEQ CR0,qw_align 1111 RLDICL $0,R3,$61,R11 1112 1113 MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8. 1114 CMPB R12,R5,R3 // Check for a match. 1115 AND R9,R3,R3 // Mask bytes below s_base 1116 RLDICL $0,R7,$61,R6 // length-1 1117 RLDICR $0,R7,$60,R7 // Last doubleword in R7 1118 CMPU R3,$0,CR7 // If we have a match, jump to the final computation 1119 BNE CR7,done 1120 ADD $8,R8,R8 1121 ADD $-8,R4,R4 1122 ADD R4,R11,R4 1123 1124 // Check for quadword alignment 1125 ANDCC $15,R8,R11 1126 BEQ CR0,qw_align 1127 1128 // Not aligned, so handle the next doubleword 1129 MOVD 0(R8),R12 1130 CMPB R12,R5,R3 1131 CMPU R3,$0,CR7 1132 BNE CR7,done 1133 ADD $8,R8,R8 1134 ADD $-8,R4,R4 1135 1136 // Either quadword aligned or 64-byte at this point. We can use LVX. 1137 qw_align: 1138 1139 // Set up auxiliary data for the vectorized algorithm. 1140 VSPLTISB $0,V0 // Replicate 0 across V0 1141 VSPLTISB $3,V10 // Use V10 as control for VBPERMQ 1142 MTVRD R5,V1 1143 LVSL (R0+R0),V11 1144 VSLB V11,V10,V10 1145 VSPLTB $7,V1,V1 // Replicate byte across V1 1146 CMPU R4, $64 // If len <= 64, don't use the vectorized loop 1147 BLE tail 1148 1149 // We will load 4 quardwords per iteration in the loop, so check for 1150 // 64-byte alignment. If 64-byte aligned, then branch to the preloop. 1151 ANDCC $63,R8,R11 1152 BEQ CR0,preloop 1153 1154 // Not 64-byte aligned. Load one quadword at a time until aligned. 1155 LVX (R8+R0),V4 1156 VCMPEQUBCC V1,V4,V6 // Check for byte in V4 1157 BNE CR6,found_qw_align 1158 ADD $16,R8,R8 1159 ADD $-16,R4,R4 1160 1161 ANDCC $63,R8,R11 1162 BEQ CR0,preloop 1163 LVX (R8+R0),V4 1164 VCMPEQUBCC V1,V4,V6 // Check for byte in V4 1165 BNE CR6,found_qw_align 1166 ADD $16,R8,R8 1167 ADD $-16,R4,R4 1168 1169 ANDCC $63,R8,R11 1170 BEQ CR0,preloop 1171 LVX (R8+R0),V4 1172 VCMPEQUBCC V1,V4,V6 // Check for byte in V4 1173 BNE CR6,found_qw_align 1174 ADD $-16,R4,R4 1175 ADD $16,R8,R8 1176 1177 // 64-byte aligned. Prepare for the main loop. 1178 preloop: 1179 CMPU R4,$64 1180 BLE tail // If len <= 64, don't use the vectorized loop 1181 1182 // We are now aligned to a 64-byte boundary. We will load 4 quadwords 1183 // per loop iteration. The last doubleword is in R10, so our loop counter 1184 // starts at (R10-R8)/64. 1185 SUB R8,R10,R6 1186 SRD $6,R6,R9 // Loop counter in R9 1187 MOVD R9,CTR 1188 1189 MOVD $16,R11 // Load offsets for the vector loads 1190 MOVD $32,R9 1191 MOVD $48,R7 1192 1193 // Main loop we will load 64 bytes per iteration 1194 loop: 1195 LVX (R8+R0),V2 // Load 4 16-byte vectors 1196 LVX (R11+R8),V3 1197 LVX (R9+R8),V4 1198 LVX (R7+R8),V5 1199 VCMPEQUB V1,V2,V6 // Look for byte in each vector 1200 VCMPEQUB V1,V3,V7 1201 VCMPEQUB V1,V4,V8 1202 VCMPEQUB V1,V5,V9 1203 VOR V6,V7,V11 // Compress the result in a single vector 1204 VOR V8,V9,V12 1205 VOR V11,V12,V11 1206 VCMPEQUBCC V0,V11,V11 // Check for byte 1207 BGE CR6,found 1208 ADD $64,R8,R8 1209 BC 16,0,loop // bdnz loop 1210 1211 // Handle the tailing bytes or R4 <= 64 1212 RLDICL $0,R6,$58,R4 1213 tail: 1214 CMPU R4,$0 1215 BEQ notfound 1216 LVX (R8+R0),V4 1217 VCMPEQUBCC V1,V4,V6 1218 BNE CR6,found_qw_align 1219 ADD $16,R8,R8 1220 CMPU R4,$16,CR6 1221 BLE CR6,notfound 1222 ADD $-16,R4,R4 1223 1224 LVX (R8+R0),V4 1225 VCMPEQUBCC V1,V4,V6 1226 BNE CR6,found_qw_align 1227 ADD $16,R8,R8 1228 CMPU R4,$16,CR6 1229 BLE CR6,notfound 1230 ADD $-16,R4,R4 1231 1232 LVX (R8+R0),V4 1233 VCMPEQUBCC V1,V4,V6 1234 BNE CR6,found_qw_align 1235 ADD $16,R8,R8 1236 CMPU R4,$16,CR6 1237 BLE CR6,notfound 1238 ADD $-16,R4,R4 1239 1240 LVX (R8+R0),V4 1241 VCMPEQUBCC V1,V4,V6 1242 BNE CR6,found_qw_align 1243 1244 notfound: 1245 MOVD $-1,R3 1246 MOVD R3,(R14) 1247 RET 1248 1249 found: 1250 // We will now compress the results into a single doubleword, 1251 // so it can be moved to a GPR for the final index calculation. 1252 1253 // The bytes in V6-V9 are either 0x00 or 0xFF. So, permute the 1254 // first bit of each byte into bits 48-63. 1255 VBPERMQ V6,V10,V6 1256 VBPERMQ V7,V10,V7 1257 VBPERMQ V8,V10,V8 1258 VBPERMQ V9,V10,V9 1259 1260 // Shift each 16-bit component into its correct position for 1261 // merging into a single doubleword. 1262 #ifdef GOARCH_ppc64le 1263 VSLDOI $2,V7,V7,V7 1264 VSLDOI $4,V8,V8,V8 1265 VSLDOI $6,V9,V9,V9 1266 #else 1267 VSLDOI $6,V6,V6,V6 1268 VSLDOI $4,V7,V7,V7 1269 VSLDOI $2,V8,V8,V8 1270 #endif 1271 1272 // Merge V6-V9 into a single doubleword and move to a GPR. 1273 VOR V6,V7,V11 1274 VOR V8,V9,V4 1275 VOR V4,V11,V4 1276 MFVRD V4,R3 1277 1278 #ifdef GOARCH_ppc64le 1279 ADD $-1,R3,R11 1280 ANDN R3,R11,R11 1281 POPCNTD R11,R11 // Count trailing zeros (Little Endian). 1282 #else 1283 CNTLZD R3,R11 // Count leading zeros (Big Endian). 1284 #endif 1285 ADD R8,R11,R3 // Calculate byte address 1286 1287 return: 1288 SUB R17,R3 1289 MOVD R3,(R14) 1290 RET 1291 1292 found_qw_align: 1293 // Use the same algorithm as above. Compress the result into 1294 // a single doubleword and move it to a GPR for the final 1295 // calculation. 1296 VBPERMQ V6,V10,V6 1297 1298 #ifdef GOARCH_ppc64le 1299 MFVRD V6,R3 1300 ADD $-1,R3,R11 1301 ANDN R3,R11,R11 1302 POPCNTD R11,R11 1303 #else 1304 VSLDOI $6,V6,V6,V6 1305 MFVRD V6,R3 1306 CNTLZD R3,R11 1307 #endif 1308 ADD R8,R11,R3 1309 CMPU R11,R4 1310 BLT return 1311 BR notfound 1312 1313 done: 1314 // At this point, R3 has 0xFF in the same position as the byte we are 1315 // looking for in the doubleword. Use that to calculate the exact index 1316 // of the byte. 1317 #ifdef GOARCH_ppc64le 1318 ADD $-1,R3,R11 1319 ANDN R3,R11,R11 1320 POPCNTD R11,R11 // Count trailing zeros (Little Endian). 1321 #else 1322 CNTLZD R3,R11 // Count leading zeros (Big Endian). 1323 #endif 1324 CMPU R8,R7 // Check if we are at the last doubleword. 1325 SRD $3,R11 // Convert trailing zeros to bytes. 1326 ADD R11,R8,R3 1327 CMPU R11,R6,CR7 // If at the last doubleword, check the byte offset. 1328 BNE return 1329 BLE CR7,return 1330 BR notfound 1331 1332 small_string: 1333 // We unroll this loop for better performance. 1334 CMPU R4,$0 // Check for length=0 1335 BEQ notfound 1336 1337 MOVD 0(R8),R12 // Load one doubleword from the aligned address in R8. 1338 CMPB R12,R5,R3 // Check for a match. 1339 AND R9,R3,R3 // Mask bytes below s_base. 1340 CMPU R3,$0,CR7 // If we have a match, jump to the final computation. 1341 RLDICL $0,R7,$61,R6 // length-1 1342 RLDICR $0,R7,$60,R7 // Last doubleword in R7. 1343 CMPU R8,R7 1344 BNE CR7,done 1345 BEQ notfound // Hit length. 1346 1347 MOVDU 8(R8),R12 1348 CMPB R12,R5,R3 1349 CMPU R3,$0,CR6 1350 CMPU R8,R7 1351 BNE CR6,done 1352 BEQ notfound 1353 1354 MOVDU 8(R8),R12 1355 CMPB R12,R5,R3 1356 CMPU R3,$0,CR6 1357 CMPU R8,R7 1358 BNE CR6,done 1359 BEQ notfound 1360 1361 MOVDU 8(R8),R12 1362 CMPB R12,R5,R3 1363 CMPU R3,$0,CR6 1364 CMPU R8,R7 1365 BNE CR6,done 1366 BEQ notfound 1367 1368 MOVDU 8(R8),R12 1369 CMPB R12,R5,R3 1370 CMPU R3,$0,CR6 1371 BNE CR6,done 1372 BR notfound 1373 1374 TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 1375 MOVD s1_base+0(FP), R5 1376 MOVD s2_base+16(FP), R6 1377 MOVD s1_len+8(FP), R3 1378 CMP R5,R6,CR7 1379 MOVD s2_len+24(FP), R4 1380 MOVD $ret+32(FP), R7 1381 CMP R3,R4,CR6 1382 BEQ CR7,equal 1383 1384 notequal: 1385 #ifdef GOARCH_ppc64le 1386 BR cmpbodyLE<>(SB) 1387 #else 1388 BR cmpbodyBE<>(SB) 1389 #endif 1390 1391 equal: 1392 BEQ CR6,done 1393 MOVD $1, R8 1394 BGT CR6,greater 1395 NEG R8 1396 1397 greater: 1398 MOVD R8, (R7) 1399 RET 1400 1401 done: 1402 MOVD $0, (R7) 1403 RET 1404 1405 TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56 1406 MOVD s1+0(FP), R5 1407 MOVD s2+24(FP), R6 1408 MOVD s1+8(FP), R3 1409 CMP R5,R6,CR7 1410 MOVD s2+32(FP), R4 1411 MOVD $ret+48(FP), R7 1412 CMP R3,R4,CR6 1413 BEQ CR7,equal 1414 1415 #ifdef GOARCH_ppc64le 1416 BR cmpbodyLE<>(SB) 1417 #else 1418 BR cmpbodyBE<>(SB) 1419 #endif 1420 1421 equal: 1422 BEQ CR6,done 1423 MOVD $1, R8 1424 BGT CR6,greater 1425 NEG R8 1426 1427 greater: 1428 MOVD R8, (R7) 1429 RET 1430 1431 done: 1432 MOVD $0, (R7) 1433 RET 1434 1435 TEXT runtime·return0(SB), NOSPLIT, $0 1436 MOVW $0, R3 1437 RET 1438 1439 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1440 // Must obey the gcc calling convention. 1441 TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0 1442 // g (R30) and R31 are callee-save in the C ABI, so save them 1443 MOVD g, R4 1444 MOVD R31, R5 1445 MOVD LR, R6 1446 1447 BL runtime·load_g(SB) // clobbers g (R30), R31 1448 MOVD g_m(g), R3 1449 MOVD m_curg(R3), R3 1450 MOVD (g_stack+stack_hi)(R3), R3 1451 1452 MOVD R4, g 1453 MOVD R5, R31 1454 MOVD R6, LR 1455 RET 1456 1457 // The top-most function running on a goroutine 1458 // returns to goexit+PCQuantum. 1459 // 1460 // When dynamically linking Go, it can be returned to from a function 1461 // implemented in a different module and so needs to reload the TOC pointer 1462 // from the stack (although this function declares that it does not set up x-a 1463 // frame, newproc1 does in fact allocate one for goexit and saves the TOC 1464 // pointer in the correct place). 1465 // goexit+_PCQuantum is halfway through the usual global entry point prologue 1466 // that derives r2 from r12 which is a bit silly, but not harmful. 1467 TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0 1468 MOVD 24(R1), R2 1469 BL runtime·goexit1(SB) // does not return 1470 // traceback from goexit1 must hit code range of goexit 1471 MOVD R0, R0 // NOP 1472 1473 TEXT runtime·sigreturn(SB),NOSPLIT,$0-0 1474 RET 1475 1476 // prepGoExitFrame saves the current TOC pointer (i.e. the TOC pointer for the 1477 // module containing runtime) to the frame that goexit will execute in when 1478 // the goroutine exits. It's implemented in assembly mainly because that's the 1479 // easiest way to get access to R2. 1480 TEXT runtime·prepGoExitFrame(SB),NOSPLIT,$0-8 1481 MOVD sp+0(FP), R3 1482 MOVD R2, 24(R3) 1483 RET 1484 1485 TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0 1486 ADD $-8, R1 1487 MOVD R31, 0(R1) 1488 MOVD runtime·lastmoduledatap(SB), R4 1489 MOVD R3, moduledata_next(R4) 1490 MOVD R3, runtime·lastmoduledatap(SB) 1491 MOVD 0(R1), R31 1492 ADD $8, R1 1493 RET 1494 1495 TEXT ·checkASM(SB),NOSPLIT,$0-1 1496 MOVW $1, R3 1497 MOVB R3, ret+0(FP) 1498 RET