github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "zasm_GOOS_GOARCH.h" 6 #include "funcdata.h" 7 #include "../../cmd/ld/textflag.h" 8 9 TEXT _rt0_go(SB),NOSPLIT,$0 10 // copy arguments forward on an even stack 11 MOVQ DI, AX // argc 12 MOVQ SI, BX // argv 13 SUBQ $(4*8+7), SP // 2args 2auto 14 ANDQ $~15, SP 15 MOVQ AX, 16(SP) 16 MOVQ BX, 24(SP) 17 18 // create istack out of the given (operating system) stack. 19 // _cgo_init may update stackguard. 20 MOVQ $runtime·g0(SB), DI 21 LEAQ (-64*1024+104)(SP), BX 22 MOVQ BX, g_stackguard(DI) 23 MOVQ BX, g_stackguard0(DI) 24 MOVQ SP, g_stackbase(DI) 25 26 // find out information about the processor we're on 27 MOVQ $0, AX 28 CPUID 29 CMPQ AX, $0 30 JE nocpuinfo 31 MOVQ $1, AX 32 CPUID 33 MOVL CX, runtime·cpuid_ecx(SB) 34 MOVL DX, runtime·cpuid_edx(SB) 35 nocpuinfo: 36 37 // if there is an _cgo_init, call it. 38 MOVQ _cgo_init(SB), AX 39 TESTQ AX, AX 40 JZ needtls 41 // g0 already in DI 42 MOVQ DI, CX // Win64 uses CX for first parameter 43 MOVQ $setmg_gcc<>(SB), SI 44 CALL AX 45 // update stackguard after _cgo_init 46 MOVQ $runtime·g0(SB), CX 47 MOVQ g_stackguard0(CX), AX 48 MOVQ AX, g_stackguard(CX) 49 CMPL runtime·iswindows(SB), $0 50 JEQ ok 51 52 needtls: 53 // skip TLS setup on Plan 9 54 CMPL runtime·isplan9(SB), $1 55 JEQ ok 56 57 LEAQ runtime·tls0(SB), DI 58 CALL runtime·settls(SB) 59 60 // store through it, to make sure it works 61 get_tls(BX) 62 MOVQ $0x123, g(BX) 63 MOVQ runtime·tls0(SB), AX 64 CMPQ AX, $0x123 65 JEQ 2(PC) 66 MOVL AX, 0 // abort 67 ok: 68 // set the per-goroutine and per-mach "registers" 69 get_tls(BX) 70 LEAQ runtime·g0(SB), CX 71 MOVQ CX, g(BX) 72 LEAQ runtime·m0(SB), AX 73 MOVQ AX, m(BX) 74 75 // save m->g0 = g0 76 MOVQ CX, m_g0(AX) 77 78 CLD // convention is D is always left cleared 79 CALL runtime·check(SB) 80 81 MOVL 16(SP), AX // copy argc 82 MOVL AX, 0(SP) 83 MOVQ 24(SP), AX // copy argv 84 MOVQ AX, 8(SP) 85 CALL runtime·args(SB) 86 CALL runtime·osinit(SB) 87 CALL runtime·hashinit(SB) 88 CALL runtime·schedinit(SB) 89 90 // create a new goroutine to start program 91 PUSHQ $runtime·main·f(SB) // entry 92 PUSHQ $0 // arg size 93 ARGSIZE(16) 94 CALL runtime·newproc(SB) 95 ARGSIZE(-1) 96 POPQ AX 97 POPQ AX 98 99 // start this M 100 CALL runtime·mstart(SB) 101 102 MOVL $0xf1, 0xf1 // crash 103 RET 104 105 DATA runtime·main·f+0(SB)/8,$runtime·main(SB) 106 GLOBL runtime·main·f(SB),RODATA,$8 107 108 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 109 BYTE $0xcc 110 RET 111 112 TEXT runtime·asminit(SB),NOSPLIT,$0-0 113 // No per-thread init. 114 RET 115 116 /* 117 * go-routine 118 */ 119 120 // void gosave(Gobuf*) 121 // save state in Gobuf; setjmp 122 TEXT runtime·gosave(SB), NOSPLIT, $0-8 123 MOVQ 8(SP), AX // gobuf 124 LEAQ 8(SP), BX // caller's SP 125 MOVQ BX, gobuf_sp(AX) 126 MOVQ 0(SP), BX // caller's PC 127 MOVQ BX, gobuf_pc(AX) 128 MOVQ $0, gobuf_ret(AX) 129 MOVQ $0, gobuf_ctxt(AX) 130 get_tls(CX) 131 MOVQ g(CX), BX 132 MOVQ BX, gobuf_g(AX) 133 RET 134 135 // void gogo(Gobuf*) 136 // restore state from Gobuf; longjmp 137 TEXT runtime·gogo(SB), NOSPLIT, $0-8 138 MOVQ 8(SP), BX // gobuf 139 MOVQ gobuf_g(BX), DX 140 MOVQ 0(DX), CX // make sure g != nil 141 get_tls(CX) 142 MOVQ DX, g(CX) 143 MOVQ gobuf_sp(BX), SP // restore SP 144 MOVQ gobuf_ret(BX), AX 145 MOVQ gobuf_ctxt(BX), DX 146 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 147 MOVQ $0, gobuf_ret(BX) 148 MOVQ $0, gobuf_ctxt(BX) 149 MOVQ gobuf_pc(BX), BX 150 JMP BX 151 152 // void mcall(void (*fn)(G*)) 153 // Switch to m->g0's stack, call fn(g). 154 // Fn must never return. It should gogo(&g->sched) 155 // to keep running g. 156 TEXT runtime·mcall(SB), NOSPLIT, $0-8 157 MOVQ fn+0(FP), DI 158 159 get_tls(CX) 160 MOVQ g(CX), AX // save state in g->sched 161 MOVQ 0(SP), BX // caller's PC 162 MOVQ BX, (g_sched+gobuf_pc)(AX) 163 LEAQ 8(SP), BX // caller's SP 164 MOVQ BX, (g_sched+gobuf_sp)(AX) 165 MOVQ AX, (g_sched+gobuf_g)(AX) 166 167 // switch to m->g0 & its stack, call fn 168 MOVQ m(CX), BX 169 MOVQ m_g0(BX), SI 170 CMPQ SI, AX // if g == m->g0 call badmcall 171 JNE 3(PC) 172 MOVQ $runtime·badmcall(SB), AX 173 JMP AX 174 MOVQ SI, g(CX) // g = m->g0 175 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 176 PUSHQ AX 177 ARGSIZE(8) 178 CALL DI 179 POPQ AX 180 MOVQ $runtime·badmcall2(SB), AX 181 JMP AX 182 RET 183 184 /* 185 * support for morestack 186 */ 187 188 // Called during function prolog when more stack is needed. 189 // Caller has already done get_tls(CX); MOVQ m(CX), BX. 190 // 191 // The traceback routines see morestack on a g0 as being 192 // the top of a stack (for example, morestack calling newstack 193 // calling the scheduler calling newm calling gc), so we must 194 // record an argument size. For that purpose, it has no arguments. 195 TEXT runtime·morestack(SB),NOSPLIT,$0-0 196 // Cannot grow scheduler stack (m->g0). 197 MOVQ m_g0(BX), SI 198 CMPQ g(CX), SI 199 JNE 2(PC) 200 INT $3 201 202 // Called from f. 203 // Set m->morebuf to f's caller. 204 MOVQ 8(SP), AX // f's caller's PC 205 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 206 LEAQ 16(SP), AX // f's caller's SP 207 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 208 MOVQ AX, m_moreargp(BX) 209 get_tls(CX) 210 MOVQ g(CX), SI 211 MOVQ SI, (m_morebuf+gobuf_g)(BX) 212 213 // Set g->sched to context in f. 214 MOVQ 0(SP), AX // f's PC 215 MOVQ AX, (g_sched+gobuf_pc)(SI) 216 MOVQ SI, (g_sched+gobuf_g)(SI) 217 LEAQ 8(SP), AX // f's SP 218 MOVQ AX, (g_sched+gobuf_sp)(SI) 219 MOVQ DX, (g_sched+gobuf_ctxt)(SI) 220 221 // Call newstack on m->g0's stack. 222 MOVQ m_g0(BX), BP 223 MOVQ BP, g(CX) 224 MOVQ (g_sched+gobuf_sp)(BP), SP 225 CALL runtime·newstack(SB) 226 MOVQ $0, 0x1003 // crash if newstack returns 227 RET 228 229 // Called from panic. Mimics morestack, 230 // reuses stack growth code to create a frame 231 // with the desired args running the desired function. 232 // 233 // func call(fn *byte, arg *byte, argsize uint32). 234 TEXT runtime·newstackcall(SB), NOSPLIT, $0-20 235 get_tls(CX) 236 MOVQ m(CX), BX 237 238 // Save our caller's state as the PC and SP to 239 // restore when returning from f. 240 MOVQ 0(SP), AX // our caller's PC 241 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 242 LEAQ 8(SP), AX // our caller's SP 243 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 244 MOVQ g(CX), AX 245 MOVQ AX, (m_morebuf+gobuf_g)(BX) 246 247 // Save our own state as the PC and SP to restore 248 // if this goroutine needs to be restarted. 249 MOVQ $runtime·newstackcall(SB), (g_sched+gobuf_pc)(AX) 250 MOVQ SP, (g_sched+gobuf_sp)(AX) 251 252 // Set up morestack arguments to call f on a new stack. 253 // We set f's frame size to 1, as a hint to newstack 254 // that this is a call from runtime·newstackcall. 255 // If it turns out that f needs a larger frame than 256 // the default stack, f's usual stack growth prolog will 257 // allocate a new segment (and recopy the arguments). 258 MOVQ 8(SP), AX // fn 259 MOVQ 16(SP), DX // arg frame 260 MOVL 24(SP), CX // arg size 261 262 MOVQ AX, m_cret(BX) // f's PC 263 MOVQ DX, m_moreargp(BX) // argument frame pointer 264 MOVL CX, m_moreargsize(BX) // f's argument size 265 MOVL $1, m_moreframesize(BX) // f's frame size 266 267 // Call newstack on m->g0's stack. 268 MOVQ m_g0(BX), BP 269 get_tls(CX) 270 MOVQ BP, g(CX) 271 MOVQ (g_sched+gobuf_sp)(BP), SP 272 CALL runtime·newstack(SB) 273 MOVQ $0, 0x1103 // crash if newstack returns 274 RET 275 276 // reflect·call: call a function with the given argument list 277 // func call(f *FuncVal, arg *byte, argsize uint32). 278 // we don't have variable-sized frames, so we use a small number 279 // of constant-sized-frame functions to encode a few bits of size in the pc. 280 // Caution: ugly multiline assembly macros in your future! 281 282 #define DISPATCH(NAME,MAXSIZE) \ 283 CMPQ CX, $MAXSIZE; \ 284 JA 3(PC); \ 285 MOVQ $runtime·NAME(SB), AX; \ 286 JMP AX 287 // Note: can't just "JMP runtime·NAME(SB)" - bad inlining results. 288 289 TEXT reflect·call(SB), NOSPLIT, $0-20 290 MOVLQZX argsize+16(FP), CX 291 DISPATCH(call16, 16) 292 DISPATCH(call32, 32) 293 DISPATCH(call64, 64) 294 DISPATCH(call128, 128) 295 DISPATCH(call256, 256) 296 DISPATCH(call512, 512) 297 DISPATCH(call1024, 1024) 298 DISPATCH(call2048, 2048) 299 DISPATCH(call4096, 4096) 300 DISPATCH(call8192, 8192) 301 DISPATCH(call16384, 16384) 302 DISPATCH(call32768, 32768) 303 DISPATCH(call65536, 65536) 304 DISPATCH(call131072, 131072) 305 DISPATCH(call262144, 262144) 306 DISPATCH(call524288, 524288) 307 DISPATCH(call1048576, 1048576) 308 DISPATCH(call2097152, 2097152) 309 DISPATCH(call4194304, 4194304) 310 DISPATCH(call8388608, 8388608) 311 DISPATCH(call16777216, 16777216) 312 DISPATCH(call33554432, 33554432) 313 DISPATCH(call67108864, 67108864) 314 DISPATCH(call134217728, 134217728) 315 DISPATCH(call268435456, 268435456) 316 DISPATCH(call536870912, 536870912) 317 DISPATCH(call1073741824, 1073741824) 318 MOVQ $runtime·badreflectcall(SB), AX 319 JMP AX 320 321 #define CALLFN(NAME,MAXSIZE) \ 322 TEXT runtime·NAME(SB), WRAPPER, $MAXSIZE-20; \ 323 /* copy arguments to stack */ \ 324 MOVQ argptr+8(FP), SI; \ 325 MOVLQZX argsize+16(FP), CX; \ 326 MOVQ SP, DI; \ 327 REP;MOVSB; \ 328 /* call function */ \ 329 MOVQ f+0(FP), DX; \ 330 CALL (DX); \ 331 /* copy return values back */ \ 332 MOVQ argptr+8(FP), DI; \ 333 MOVLQZX argsize+16(FP), CX; \ 334 MOVQ SP, SI; \ 335 REP;MOVSB; \ 336 RET 337 338 CALLFN(call16, 16) 339 CALLFN(call32, 32) 340 CALLFN(call64, 64) 341 CALLFN(call128, 128) 342 CALLFN(call256, 256) 343 CALLFN(call512, 512) 344 CALLFN(call1024, 1024) 345 CALLFN(call2048, 2048) 346 CALLFN(call4096, 4096) 347 CALLFN(call8192, 8192) 348 CALLFN(call16384, 16384) 349 CALLFN(call32768, 32768) 350 CALLFN(call65536, 65536) 351 CALLFN(call131072, 131072) 352 CALLFN(call262144, 262144) 353 CALLFN(call524288, 524288) 354 CALLFN(call1048576, 1048576) 355 CALLFN(call2097152, 2097152) 356 CALLFN(call4194304, 4194304) 357 CALLFN(call8388608, 8388608) 358 CALLFN(call16777216, 16777216) 359 CALLFN(call33554432, 33554432) 360 CALLFN(call67108864, 67108864) 361 CALLFN(call134217728, 134217728) 362 CALLFN(call268435456, 268435456) 363 CALLFN(call536870912, 536870912) 364 CALLFN(call1073741824, 1073741824) 365 366 // Return point when leaving stack. 367 // 368 // Lessstack can appear in stack traces for the same reason 369 // as morestack; in that context, it has 0 arguments. 370 TEXT runtime·lessstack(SB), NOSPLIT, $0-0 371 // Save return value in m->cret 372 get_tls(CX) 373 MOVQ m(CX), BX 374 MOVQ AX, m_cret(BX) 375 376 // Call oldstack on m->g0's stack. 377 MOVQ m_g0(BX), BP 378 MOVQ BP, g(CX) 379 MOVQ (g_sched+gobuf_sp)(BP), SP 380 CALL runtime·oldstack(SB) 381 MOVQ $0, 0x1004 // crash if oldstack returns 382 RET 383 384 // morestack trampolines 385 TEXT runtime·morestack00(SB),NOSPLIT,$0 386 get_tls(CX) 387 MOVQ m(CX), BX 388 MOVQ $0, AX 389 MOVQ AX, m_moreframesize(BX) 390 MOVQ $runtime·morestack(SB), AX 391 JMP AX 392 393 TEXT runtime·morestack01(SB),NOSPLIT,$0 394 get_tls(CX) 395 MOVQ m(CX), BX 396 SHLQ $32, AX 397 MOVQ AX, m_moreframesize(BX) 398 MOVQ $runtime·morestack(SB), AX 399 JMP AX 400 401 TEXT runtime·morestack10(SB),NOSPLIT,$0 402 get_tls(CX) 403 MOVQ m(CX), BX 404 MOVLQZX AX, AX 405 MOVQ AX, m_moreframesize(BX) 406 MOVQ $runtime·morestack(SB), AX 407 JMP AX 408 409 TEXT runtime·morestack11(SB),NOSPLIT,$0 410 get_tls(CX) 411 MOVQ m(CX), BX 412 MOVQ AX, m_moreframesize(BX) 413 MOVQ $runtime·morestack(SB), AX 414 JMP AX 415 416 // subcases of morestack01 417 // with const of 8,16,...48 418 TEXT runtime·morestack8(SB),NOSPLIT,$0 419 MOVQ $1, R8 420 MOVQ $morestack<>(SB), AX 421 JMP AX 422 423 TEXT runtime·morestack16(SB),NOSPLIT,$0 424 MOVQ $2, R8 425 MOVQ $morestack<>(SB), AX 426 JMP AX 427 428 TEXT runtime·morestack24(SB),NOSPLIT,$0 429 MOVQ $3, R8 430 MOVQ $morestack<>(SB), AX 431 JMP AX 432 433 TEXT runtime·morestack32(SB),NOSPLIT,$0 434 MOVQ $4, R8 435 MOVQ $morestack<>(SB), AX 436 JMP AX 437 438 TEXT runtime·morestack40(SB),NOSPLIT,$0 439 MOVQ $5, R8 440 MOVQ $morestack<>(SB), AX 441 JMP AX 442 443 TEXT runtime·morestack48(SB),NOSPLIT,$0 444 MOVQ $6, R8 445 MOVQ $morestack<>(SB), AX 446 JMP AX 447 448 TEXT morestack<>(SB),NOSPLIT,$0 449 get_tls(CX) 450 MOVQ m(CX), BX 451 SHLQ $35, R8 452 MOVQ R8, m_moreframesize(BX) 453 MOVQ $runtime·morestack(SB), AX 454 JMP AX 455 456 // bool cas(int32 *val, int32 old, int32 new) 457 // Atomically: 458 // if(*val == old){ 459 // *val = new; 460 // return 1; 461 // } else 462 // return 0; 463 TEXT runtime·cas(SB), NOSPLIT, $0-16 464 MOVQ 8(SP), BX 465 MOVL 16(SP), AX 466 MOVL 20(SP), CX 467 LOCK 468 CMPXCHGL CX, 0(BX) 469 JZ 3(PC) 470 MOVL $0, AX 471 RET 472 MOVL $1, AX 473 RET 474 475 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 476 // Atomically: 477 // if(*val == *old){ 478 // *val = new; 479 // return 1; 480 // } else { 481 // return 0; 482 // } 483 TEXT runtime·cas64(SB), NOSPLIT, $0-24 484 MOVQ 8(SP), BX 485 MOVQ 16(SP), AX 486 MOVQ 24(SP), CX 487 LOCK 488 CMPXCHGQ CX, 0(BX) 489 JNZ cas64_fail 490 MOVL $1, AX 491 RET 492 cas64_fail: 493 MOVL $0, AX 494 RET 495 496 // bool casp(void **val, void *old, void *new) 497 // Atomically: 498 // if(*val == old){ 499 // *val = new; 500 // return 1; 501 // } else 502 // return 0; 503 TEXT runtime·casp(SB), NOSPLIT, $0-24 504 MOVQ 8(SP), BX 505 MOVQ 16(SP), AX 506 MOVQ 24(SP), CX 507 LOCK 508 CMPXCHGQ CX, 0(BX) 509 JZ 3(PC) 510 MOVL $0, AX 511 RET 512 MOVL $1, AX 513 RET 514 515 // uint32 xadd(uint32 volatile *val, int32 delta) 516 // Atomically: 517 // *val += delta; 518 // return *val; 519 TEXT runtime·xadd(SB), NOSPLIT, $0-12 520 MOVQ 8(SP), BX 521 MOVL 16(SP), AX 522 MOVL AX, CX 523 LOCK 524 XADDL AX, 0(BX) 525 ADDL CX, AX 526 RET 527 528 TEXT runtime·xadd64(SB), NOSPLIT, $0-16 529 MOVQ 8(SP), BX 530 MOVQ 16(SP), AX 531 MOVQ AX, CX 532 LOCK 533 XADDQ AX, 0(BX) 534 ADDQ CX, AX 535 RET 536 537 TEXT runtime·xchg(SB), NOSPLIT, $0-12 538 MOVQ 8(SP), BX 539 MOVL 16(SP), AX 540 XCHGL AX, 0(BX) 541 RET 542 543 TEXT runtime·xchg64(SB), NOSPLIT, $0-16 544 MOVQ 8(SP), BX 545 MOVQ 16(SP), AX 546 XCHGQ AX, 0(BX) 547 RET 548 549 TEXT runtime·procyield(SB),NOSPLIT,$0-0 550 MOVL 8(SP), AX 551 again: 552 PAUSE 553 SUBL $1, AX 554 JNZ again 555 RET 556 557 TEXT runtime·atomicstorep(SB), NOSPLIT, $0-16 558 MOVQ 8(SP), BX 559 MOVQ 16(SP), AX 560 XCHGQ AX, 0(BX) 561 RET 562 563 TEXT runtime·atomicstore(SB), NOSPLIT, $0-12 564 MOVQ 8(SP), BX 565 MOVL 16(SP), AX 566 XCHGL AX, 0(BX) 567 RET 568 569 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16 570 MOVQ 8(SP), BX 571 MOVQ 16(SP), AX 572 XCHGQ AX, 0(BX) 573 RET 574 575 // void jmpdefer(fn, sp); 576 // called from deferreturn. 577 // 1. pop the caller 578 // 2. sub 5 bytes from the callers return 579 // 3. jmp to the argument 580 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 581 MOVQ 8(SP), DX // fn 582 MOVQ 16(SP), BX // caller sp 583 LEAQ -8(BX), SP // caller sp after CALL 584 SUBQ $5, (SP) // return to CALL again 585 MOVQ 0(DX), BX 586 JMP BX // but first run the deferred function 587 588 // Save state of caller into g->sched. Smashes R8, R9. 589 TEXT gosave<>(SB),NOSPLIT,$0 590 get_tls(R8) 591 MOVQ g(R8), R8 592 MOVQ 0(SP), R9 593 MOVQ R9, (g_sched+gobuf_pc)(R8) 594 LEAQ 8(SP), R9 595 MOVQ R9, (g_sched+gobuf_sp)(R8) 596 MOVQ $0, (g_sched+gobuf_ret)(R8) 597 MOVQ $0, (g_sched+gobuf_ctxt)(R8) 598 RET 599 600 // asmcgocall(void(*fn)(void*), void *arg) 601 // Call fn(arg) on the scheduler stack, 602 // aligned appropriately for the gcc ABI. 603 // See cgocall.c for more details. 604 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-16 605 MOVQ fn+0(FP), AX 606 MOVQ arg+8(FP), BX 607 MOVQ SP, DX 608 609 // Figure out if we need to switch to m->g0 stack. 610 // We get called to create new OS threads too, and those 611 // come in on the m->g0 stack already. 612 get_tls(CX) 613 MOVQ m(CX), BP 614 MOVQ m_g0(BP), SI 615 MOVQ g(CX), DI 616 CMPQ SI, DI 617 JEQ 4(PC) 618 CALL gosave<>(SB) 619 MOVQ SI, g(CX) 620 MOVQ (g_sched+gobuf_sp)(SI), SP 621 622 // Now on a scheduling stack (a pthread-created stack). 623 // Make sure we have enough room for 4 stack-backed fast-call 624 // registers as per windows amd64 calling convention. 625 SUBQ $64, SP 626 ANDQ $~15, SP // alignment for gcc ABI 627 MOVQ DI, 48(SP) // save g 628 MOVQ DX, 40(SP) // save SP 629 MOVQ BX, DI // DI = first argument in AMD64 ABI 630 MOVQ BX, CX // CX = first argument in Win64 631 CALL AX 632 633 // Restore registers, g, stack pointer. 634 get_tls(CX) 635 MOVQ 48(SP), DI 636 MOVQ DI, g(CX) 637 MOVQ 40(SP), SP 638 RET 639 640 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 641 // Turn the fn into a Go func (by taking its address) and call 642 // cgocallback_gofunc. 643 TEXT runtime·cgocallback(SB),NOSPLIT,$24-24 644 LEAQ fn+0(FP), AX 645 MOVQ AX, 0(SP) 646 MOVQ frame+8(FP), AX 647 MOVQ AX, 8(SP) 648 MOVQ framesize+16(FP), AX 649 MOVQ AX, 16(SP) 650 MOVQ $runtime·cgocallback_gofunc(SB), AX 651 CALL AX 652 RET 653 654 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 655 // See cgocall.c for more details. 656 TEXT runtime·cgocallback_gofunc(SB),NOSPLIT,$8-24 657 // If m is nil, Go did not create the current thread. 658 // Call needm to obtain one for temporary use. 659 // In this case, we're running on the thread stack, so there's 660 // lots of space, but the linker doesn't know. Hide the call from 661 // the linker analysis by using an indirect call through AX. 662 get_tls(CX) 663 #ifdef GOOS_windows 664 MOVL $0, BP 665 CMPQ CX, $0 666 JEQ 2(PC) 667 #endif 668 MOVQ m(CX), BP 669 MOVQ BP, R8 // holds oldm until end of function 670 CMPQ BP, $0 671 JNE havem 672 needm: 673 MOVQ R8, 0(SP) 674 MOVQ $runtime·needm(SB), AX 675 CALL AX 676 MOVQ 0(SP), R8 677 get_tls(CX) 678 MOVQ m(CX), BP 679 680 havem: 681 // Now there's a valid m, and we're running on its m->g0. 682 // Save current m->g0->sched.sp on stack and then set it to SP. 683 // Save current sp in m->g0->sched.sp in preparation for 684 // switch back to m->curg stack. 685 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 686 MOVQ m_g0(BP), SI 687 MOVQ (g_sched+gobuf_sp)(SI), AX 688 MOVQ AX, 0(SP) 689 MOVQ SP, (g_sched+gobuf_sp)(SI) 690 691 // Switch to m->curg stack and call runtime.cgocallbackg. 692 // Because we are taking over the execution of m->curg 693 // but *not* resuming what had been running, we need to 694 // save that information (m->curg->sched) so we can restore it. 695 // We can restore m->curg->sched.sp easily, because calling 696 // runtime.cgocallbackg leaves SP unchanged upon return. 697 // To save m->curg->sched.pc, we push it onto the stack. 698 // This has the added benefit that it looks to the traceback 699 // routine like cgocallbackg is going to return to that 700 // PC (because the frame we allocate below has the same 701 // size as cgocallback_gofunc's frame declared above) 702 // so that the traceback will seamlessly trace back into 703 // the earlier calls. 704 // 705 // In the new goroutine, 0(SP) holds the saved R8. 706 MOVQ m_curg(BP), SI 707 MOVQ SI, g(CX) 708 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 709 MOVQ (g_sched+gobuf_pc)(SI), BP 710 MOVQ BP, -8(DI) 711 LEAQ -(8+8)(DI), SP 712 MOVQ R8, 0(SP) 713 CALL runtime·cgocallbackg(SB) 714 MOVQ 0(SP), R8 715 716 // Restore g->sched (== m->curg->sched) from saved values. 717 get_tls(CX) 718 MOVQ g(CX), SI 719 MOVQ 8(SP), BP 720 MOVQ BP, (g_sched+gobuf_pc)(SI) 721 LEAQ (8+8)(SP), DI 722 MOVQ DI, (g_sched+gobuf_sp)(SI) 723 724 // Switch back to m->g0's stack and restore m->g0->sched.sp. 725 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 726 // so we do not have to restore it.) 727 MOVQ m(CX), BP 728 MOVQ m_g0(BP), SI 729 MOVQ SI, g(CX) 730 MOVQ (g_sched+gobuf_sp)(SI), SP 731 MOVQ 0(SP), AX 732 MOVQ AX, (g_sched+gobuf_sp)(SI) 733 734 // If the m on entry was nil, we called needm above to borrow an m 735 // for the duration of the call. Since the call is over, return it with dropm. 736 CMPQ R8, $0 737 JNE 3(PC) 738 MOVQ $runtime·dropm(SB), AX 739 CALL AX 740 741 // Done! 742 RET 743 744 // void setmg(M*, G*); set m and g. for use by needm. 745 TEXT runtime·setmg(SB), NOSPLIT, $0-16 746 MOVQ mm+0(FP), AX 747 #ifdef GOOS_windows 748 CMPQ AX, $0 749 JNE settls 750 MOVQ $0, 0x28(GS) 751 RET 752 settls: 753 LEAQ m_tls(AX), AX 754 MOVQ AX, 0x28(GS) 755 #endif 756 get_tls(CX) 757 MOVQ mm+0(FP), AX 758 MOVQ AX, m(CX) 759 MOVQ gg+8(FP), BX 760 MOVQ BX, g(CX) 761 RET 762 763 // void setmg_gcc(M*, G*); set m and g called from gcc. 764 TEXT setmg_gcc<>(SB),NOSPLIT,$0 765 get_tls(AX) 766 MOVQ DI, m(AX) 767 MOVQ SI, g(AX) 768 RET 769 770 // check that SP is in range [g->stackbase, g->stackguard) 771 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 772 get_tls(CX) 773 MOVQ g(CX), AX 774 CMPQ g_stackbase(AX), SP 775 JHI 2(PC) 776 INT $3 777 CMPQ SP, g_stackguard(AX) 778 JHI 2(PC) 779 INT $3 780 RET 781 782 TEXT runtime·memclr(SB),NOSPLIT,$0-16 783 MOVQ 8(SP), DI // arg 1 addr 784 MOVQ 16(SP), CX // arg 2 count 785 MOVQ CX, BX 786 ANDQ $7, BX 787 SHRQ $3, CX 788 MOVQ $0, AX 789 CLD 790 REP 791 STOSQ 792 MOVQ BX, CX 793 REP 794 STOSB 795 RET 796 797 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8 798 MOVQ x+0(FP),AX // addr of first arg 799 MOVQ -8(AX),AX // get calling pc 800 RET 801 802 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16 803 MOVQ x+0(FP),AX // addr of first arg 804 MOVQ x+8(FP), BX 805 MOVQ BX, -8(AX) // set calling pc 806 RET 807 808 TEXT runtime·getcallersp(SB),NOSPLIT,$0-8 809 MOVQ sp+0(FP), AX 810 RET 811 812 // int64 runtime·cputicks(void) 813 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 814 RDTSC 815 SHLQ $32, DX 816 ADDQ DX, AX 817 RET 818 819 TEXT runtime·stackguard(SB),NOSPLIT,$0-16 820 MOVQ SP, DX 821 MOVQ DX, sp+0(FP) 822 get_tls(CX) 823 MOVQ g(CX), BX 824 MOVQ g_stackguard(BX), DX 825 MOVQ DX, limit+8(FP) 826 RET 827 828 GLOBL runtime·tls0(SB), $64 829 830 // hash function using AES hardware instructions 831 TEXT runtime·aeshash(SB),NOSPLIT,$0-24 832 MOVQ 8(SP), DX // ptr to hash value 833 MOVQ 16(SP), CX // size 834 MOVQ 24(SP), AX // ptr to data 835 JMP runtime·aeshashbody(SB) 836 837 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24 838 MOVQ 8(SP), DX // ptr to hash value 839 MOVQ 24(SP), AX // ptr to string struct 840 MOVQ 8(AX), CX // length of string 841 MOVQ (AX), AX // string data 842 JMP runtime·aeshashbody(SB) 843 844 // AX: data 845 // CX: length 846 // DX: ptr to seed input / hash output 847 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-24 848 MOVQ (DX), X0 // seed to low 64 bits of xmm0 849 PINSRQ $1, CX, X0 // size to high 64 bits of xmm0 850 MOVO runtime·aeskeysched+0(SB), X2 851 MOVO runtime·aeskeysched+16(SB), X3 852 CMPQ CX, $16 853 JB aessmall 854 aesloop: 855 CMPQ CX, $16 856 JBE aesloopend 857 MOVOU (AX), X1 858 AESENC X2, X0 859 AESENC X1, X0 860 SUBQ $16, CX 861 ADDQ $16, AX 862 JMP aesloop 863 // 1-16 bytes remaining 864 aesloopend: 865 // This load may overlap with the previous load above. 866 // We'll hash some bytes twice, but that's ok. 867 MOVOU -16(AX)(CX*1), X1 868 JMP partial 869 // 0-15 bytes 870 aessmall: 871 TESTQ CX, CX 872 JE finalize // 0 bytes 873 874 CMPB AX, $0xf0 875 JA highpartial 876 877 // 16 bytes loaded at this address won't cross 878 // a page boundary, so we can load it directly. 879 MOVOU (AX), X1 880 ADDQ CX, CX 881 PAND masks<>(SB)(CX*8), X1 882 JMP partial 883 highpartial: 884 // address ends in 1111xxxx. Might be up against 885 // a page boundary, so load ending at last byte. 886 // Then shift bytes down using pshufb. 887 MOVOU -16(AX)(CX*1), X1 888 ADDQ CX, CX 889 PSHUFB shifts<>(SB)(CX*8), X1 890 partial: 891 // incorporate partial block into hash 892 AESENC X3, X0 893 AESENC X1, X0 894 finalize: 895 // finalize hash 896 AESENC X2, X0 897 AESENC X3, X0 898 AESENC X2, X0 899 MOVQ X0, (DX) 900 RET 901 902 TEXT runtime·aeshash32(SB),NOSPLIT,$0-24 903 MOVQ 8(SP), DX // ptr to hash value 904 MOVQ 24(SP), AX // ptr to data 905 MOVQ (DX), X0 // seed 906 PINSRD $2, (AX), X0 // data 907 AESENC runtime·aeskeysched+0(SB), X0 908 AESENC runtime·aeskeysched+16(SB), X0 909 AESENC runtime·aeskeysched+0(SB), X0 910 MOVQ X0, (DX) 911 RET 912 913 TEXT runtime·aeshash64(SB),NOSPLIT,$0-24 914 MOVQ 8(SP), DX // ptr to hash value 915 MOVQ 24(SP), AX // ptr to data 916 MOVQ (DX), X0 // seed 917 PINSRQ $1, (AX), X0 // data 918 AESENC runtime·aeskeysched+0(SB), X0 919 AESENC runtime·aeskeysched+16(SB), X0 920 AESENC runtime·aeskeysched+0(SB), X0 921 MOVQ X0, (DX) 922 RET 923 924 // simple mask to get rid of data in the high part of the register. 925 DATA masks<>+0x00(SB)/8, $0x0000000000000000 926 DATA masks<>+0x08(SB)/8, $0x0000000000000000 927 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 928 DATA masks<>+0x18(SB)/8, $0x0000000000000000 929 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 930 DATA masks<>+0x28(SB)/8, $0x0000000000000000 931 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 932 DATA masks<>+0x38(SB)/8, $0x0000000000000000 933 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 934 DATA masks<>+0x48(SB)/8, $0x0000000000000000 935 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 936 DATA masks<>+0x58(SB)/8, $0x0000000000000000 937 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 938 DATA masks<>+0x68(SB)/8, $0x0000000000000000 939 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 940 DATA masks<>+0x78(SB)/8, $0x0000000000000000 941 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 942 DATA masks<>+0x88(SB)/8, $0x0000000000000000 943 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 944 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 945 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 946 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 947 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 948 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 949 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 950 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 951 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 952 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 953 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 954 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 955 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 956 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 957 GLOBL masks<>(SB),RODATA,$256 958 959 // these are arguments to pshufb. They move data down from 960 // the high bytes of the register to the low bytes of the register. 961 // index is how many bytes to move. 962 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 963 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 964 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 965 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 966 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 967 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 968 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 969 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 970 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 971 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 972 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 973 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 974 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 975 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 976 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 977 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 978 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 979 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 980 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 981 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 982 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 983 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 984 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 985 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 986 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 987 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 988 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 989 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 990 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 991 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 992 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 993 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 994 GLOBL shifts<>(SB),RODATA,$256 995 996 TEXT runtime·memeq(SB),NOSPLIT,$0-24 997 MOVQ a+0(FP), SI 998 MOVQ b+8(FP), DI 999 MOVQ count+16(FP), BX 1000 JMP runtime·memeqbody(SB) 1001 1002 // a in SI 1003 // b in DI 1004 // count in BX 1005 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1006 XORQ AX, AX 1007 1008 CMPQ BX, $8 1009 JB small 1010 1011 // 64 bytes at a time using xmm registers 1012 hugeloop: 1013 CMPQ BX, $64 1014 JB bigloop 1015 MOVOU (SI), X0 1016 MOVOU (DI), X1 1017 MOVOU 16(SI), X2 1018 MOVOU 16(DI), X3 1019 MOVOU 32(SI), X4 1020 MOVOU 32(DI), X5 1021 MOVOU 48(SI), X6 1022 MOVOU 48(DI), X7 1023 PCMPEQB X1, X0 1024 PCMPEQB X3, X2 1025 PCMPEQB X5, X4 1026 PCMPEQB X7, X6 1027 PAND X2, X0 1028 PAND X6, X4 1029 PAND X4, X0 1030 PMOVMSKB X0, DX 1031 ADDQ $64, SI 1032 ADDQ $64, DI 1033 SUBQ $64, BX 1034 CMPL DX, $0xffff 1035 JEQ hugeloop 1036 RET 1037 1038 // 8 bytes at a time using 64-bit register 1039 bigloop: 1040 CMPQ BX, $8 1041 JBE leftover 1042 MOVQ (SI), CX 1043 MOVQ (DI), DX 1044 ADDQ $8, SI 1045 ADDQ $8, DI 1046 SUBQ $8, BX 1047 CMPQ CX, DX 1048 JEQ bigloop 1049 RET 1050 1051 // remaining 0-8 bytes 1052 leftover: 1053 MOVQ -8(SI)(BX*1), CX 1054 MOVQ -8(DI)(BX*1), DX 1055 CMPQ CX, DX 1056 SETEQ AX 1057 RET 1058 1059 small: 1060 CMPQ BX, $0 1061 JEQ equal 1062 1063 LEAQ 0(BX*8), CX 1064 NEGQ CX 1065 1066 CMPB SI, $0xf8 1067 JA si_high 1068 1069 // load at SI won't cross a page boundary. 1070 MOVQ (SI), SI 1071 JMP si_finish 1072 si_high: 1073 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 1074 MOVQ -8(SI)(BX*1), SI 1075 SHRQ CX, SI 1076 si_finish: 1077 1078 // same for DI. 1079 CMPB DI, $0xf8 1080 JA di_high 1081 MOVQ (DI), DI 1082 JMP di_finish 1083 di_high: 1084 MOVQ -8(DI)(BX*1), DI 1085 SHRQ CX, DI 1086 di_finish: 1087 1088 SUBQ SI, DI 1089 SHLQ CX, DI 1090 equal: 1091 SETEQ AX 1092 RET 1093 1094 TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 1095 MOVQ s1+0(FP), SI 1096 MOVQ s1+8(FP), BX 1097 MOVQ s2+16(FP), DI 1098 MOVQ s2+24(FP), DX 1099 CALL runtime·cmpbody(SB) 1100 MOVQ AX, res+32(FP) 1101 RET 1102 1103 TEXT bytes·Compare(SB),NOSPLIT,$0-56 1104 MOVQ s1+0(FP), SI 1105 MOVQ s1+8(FP), BX 1106 MOVQ s2+24(FP), DI 1107 MOVQ s2+32(FP), DX 1108 CALL runtime·cmpbody(SB) 1109 MOVQ AX, res+48(FP) 1110 RET 1111 1112 // input: 1113 // SI = a 1114 // DI = b 1115 // BX = alen 1116 // DX = blen 1117 // output: 1118 // AX = 1/0/-1 1119 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1120 CMPQ SI, DI 1121 JEQ cmp_allsame 1122 CMPQ BX, DX 1123 MOVQ DX, BP 1124 CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare 1125 CMPQ BP, $8 1126 JB cmp_small 1127 1128 cmp_loop: 1129 CMPQ BP, $16 1130 JBE cmp_0through16 1131 MOVOU (SI), X0 1132 MOVOU (DI), X1 1133 PCMPEQB X0, X1 1134 PMOVMSKB X1, AX 1135 XORQ $0xffff, AX // convert EQ to NE 1136 JNE cmp_diff16 // branch if at least one byte is not equal 1137 ADDQ $16, SI 1138 ADDQ $16, DI 1139 SUBQ $16, BP 1140 JMP cmp_loop 1141 1142 // AX = bit mask of differences 1143 cmp_diff16: 1144 BSFQ AX, BX // index of first byte that differs 1145 XORQ AX, AX 1146 MOVB (SI)(BX*1), CX 1147 CMPB CX, (DI)(BX*1) 1148 SETHI AX 1149 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 1150 RET 1151 1152 // 0 through 16 bytes left, alen>=8, blen>=8 1153 cmp_0through16: 1154 CMPQ BP, $8 1155 JBE cmp_0through8 1156 MOVQ (SI), AX 1157 MOVQ (DI), CX 1158 CMPQ AX, CX 1159 JNE cmp_diff8 1160 cmp_0through8: 1161 MOVQ -8(SI)(BP*1), AX 1162 MOVQ -8(DI)(BP*1), CX 1163 CMPQ AX, CX 1164 JEQ cmp_allsame 1165 1166 // AX and CX contain parts of a and b that differ. 1167 cmp_diff8: 1168 BSWAPQ AX // reverse order of bytes 1169 BSWAPQ CX 1170 XORQ AX, CX 1171 BSRQ CX, CX // index of highest bit difference 1172 SHRQ CX, AX // move a's bit to bottom 1173 ANDQ $1, AX // mask bit 1174 LEAQ -1(AX*2), AX // 1/0 => +1/-1 1175 RET 1176 1177 // 0-7 bytes in common 1178 cmp_small: 1179 LEAQ (BP*8), CX // bytes left -> bits left 1180 NEGQ CX // - bits lift (== 64 - bits left mod 64) 1181 JEQ cmp_allsame 1182 1183 // load bytes of a into high bytes of AX 1184 CMPB SI, $0xf8 1185 JA cmp_si_high 1186 MOVQ (SI), SI 1187 JMP cmp_si_finish 1188 cmp_si_high: 1189 MOVQ -8(SI)(BP*1), SI 1190 SHRQ CX, SI 1191 cmp_si_finish: 1192 SHLQ CX, SI 1193 1194 // load bytes of b in to high bytes of BX 1195 CMPB DI, $0xf8 1196 JA cmp_di_high 1197 MOVQ (DI), DI 1198 JMP cmp_di_finish 1199 cmp_di_high: 1200 MOVQ -8(DI)(BP*1), DI 1201 SHRQ CX, DI 1202 cmp_di_finish: 1203 SHLQ CX, DI 1204 1205 BSWAPQ SI // reverse order of bytes 1206 BSWAPQ DI 1207 XORQ SI, DI // find bit differences 1208 JEQ cmp_allsame 1209 BSRQ DI, CX // index of highest bit difference 1210 SHRQ CX, SI // move a's bit to bottom 1211 ANDQ $1, SI // mask bit 1212 LEAQ -1(SI*2), AX // 1/0 => +1/-1 1213 RET 1214 1215 cmp_allsame: 1216 XORQ AX, AX 1217 XORQ CX, CX 1218 CMPQ BX, DX 1219 SETGT AX // 1 if alen > blen 1220 SETEQ CX // 1 if alen == blen 1221 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 1222 RET 1223 1224 TEXT bytes·IndexByte(SB),NOSPLIT,$0 1225 MOVQ s+0(FP), SI 1226 MOVQ s_len+8(FP), BX 1227 MOVB c+24(FP), AL 1228 CALL runtime·indexbytebody(SB) 1229 MOVQ AX, ret+32(FP) 1230 RET 1231 1232 TEXT strings·IndexByte(SB),NOSPLIT,$0 1233 MOVQ s+0(FP), SI 1234 MOVQ s_len+8(FP), BX 1235 MOVB c+16(FP), AL 1236 CALL runtime·indexbytebody(SB) 1237 MOVQ AX, ret+24(FP) 1238 RET 1239 1240 // input: 1241 // SI: data 1242 // BX: data len 1243 // AL: byte sought 1244 // output: 1245 // AX 1246 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 1247 MOVQ SI, DI 1248 1249 CMPQ BX, $16 1250 JLT indexbyte_small 1251 1252 // round up to first 16-byte boundary 1253 TESTQ $15, SI 1254 JZ aligned 1255 MOVQ SI, CX 1256 ANDQ $~15, CX 1257 ADDQ $16, CX 1258 1259 // search the beginning 1260 SUBQ SI, CX 1261 REPN; SCASB 1262 JZ success 1263 1264 // DI is 16-byte aligned; get ready to search using SSE instructions 1265 aligned: 1266 // round down to last 16-byte boundary 1267 MOVQ BX, R11 1268 ADDQ SI, R11 1269 ANDQ $~15, R11 1270 1271 // shuffle X0 around so that each byte contains c 1272 MOVD AX, X0 1273 PUNPCKLBW X0, X0 1274 PUNPCKLBW X0, X0 1275 PSHUFL $0, X0, X0 1276 JMP condition 1277 1278 sse: 1279 // move the next 16-byte chunk of the buffer into X1 1280 MOVO (DI), X1 1281 // compare bytes in X0 to X1 1282 PCMPEQB X0, X1 1283 // take the top bit of each byte in X1 and put the result in DX 1284 PMOVMSKB X1, DX 1285 TESTL DX, DX 1286 JNZ ssesuccess 1287 ADDQ $16, DI 1288 1289 condition: 1290 CMPQ DI, R11 1291 JLT sse 1292 1293 // search the end 1294 MOVQ SI, CX 1295 ADDQ BX, CX 1296 SUBQ R11, CX 1297 // if CX == 0, the zero flag will be set and we'll end up 1298 // returning a false success 1299 JZ failure 1300 REPN; SCASB 1301 JZ success 1302 1303 failure: 1304 MOVQ $-1, AX 1305 RET 1306 1307 // handle for lengths < 16 1308 indexbyte_small: 1309 MOVQ BX, CX 1310 REPN; SCASB 1311 JZ success 1312 MOVQ $-1, AX 1313 RET 1314 1315 // we've found the chunk containing the byte 1316 // now just figure out which specific byte it is 1317 ssesuccess: 1318 // get the index of the least significant set bit 1319 BSFW DX, DX 1320 SUBQ SI, DI 1321 ADDQ DI, DX 1322 MOVQ DX, AX 1323 RET 1324 1325 success: 1326 SUBQ SI, DI 1327 SUBL $1, DI 1328 MOVQ DI, AX 1329 RET 1330 1331 TEXT bytes·Equal(SB),NOSPLIT,$0-49 1332 MOVQ a_len+8(FP), BX 1333 MOVQ b_len+32(FP), CX 1334 XORQ AX, AX 1335 CMPQ BX, CX 1336 JNE eqret 1337 MOVQ a+0(FP), SI 1338 MOVQ b+24(FP), DI 1339 CALL runtime·memeqbody(SB) 1340 eqret: 1341 MOVB AX, ret+48(FP) 1342 RET