rsc.io/go@v0.0.0-20150416155037-e040fd465409/src/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVQ DI, AX // argc 13 MOVQ SI, BX // argv 14 SUBQ $(4*8+7), SP // 2args 2auto 15 ANDQ $~15, SP 16 MOVQ AX, 16(SP) 17 MOVQ BX, 24(SP) 18 19 // create istack out of the given (operating system) stack. 20 // _cgo_init may update stackguard. 21 MOVQ $runtime·g0(SB), DI 22 LEAQ (-64*1024+104)(SP), BX 23 MOVQ BX, g_stackguard0(DI) 24 MOVQ BX, g_stackguard1(DI) 25 MOVQ BX, (g_stack+stack_lo)(DI) 26 MOVQ SP, (g_stack+stack_hi)(DI) 27 28 // find out information about the processor we're on 29 MOVQ $0, AX 30 CPUID 31 CMPQ AX, $0 32 JE nocpuinfo 33 34 // Figure out how to serialize RDTSC. 35 // On Intel processors LFENCE is enough. AMD requires MFENCE. 36 // Don't know about the rest, so let's do MFENCE. 37 CMPL BX, $0x756E6547 // "Genu" 38 JNE notintel 39 CMPL DX, $0x49656E69 // "ineI" 40 JNE notintel 41 CMPL CX, $0x6C65746E // "ntel" 42 JNE notintel 43 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 44 notintel: 45 46 MOVQ $1, AX 47 CPUID 48 MOVL CX, runtime·cpuid_ecx(SB) 49 MOVL DX, runtime·cpuid_edx(SB) 50 nocpuinfo: 51 52 // if there is an _cgo_init, call it. 53 MOVQ _cgo_init(SB), AX 54 TESTQ AX, AX 55 JZ needtls 56 // g0 already in DI 57 MOVQ DI, CX // Win64 uses CX for first parameter 58 MOVQ $setg_gcc<>(SB), SI 59 CALL AX 60 61 // update stackguard after _cgo_init 62 MOVQ $runtime·g0(SB), CX 63 MOVQ (g_stack+stack_lo)(CX), AX 64 ADDQ $const__StackGuard, AX 65 MOVQ AX, g_stackguard0(CX) 66 MOVQ AX, g_stackguard1(CX) 67 68 CMPL runtime·iswindows(SB), $0 69 JEQ ok 70 needtls: 71 // skip TLS setup on Plan 9 72 CMPL runtime·isplan9(SB), $1 73 JEQ ok 74 // skip TLS setup on Solaris 75 CMPL runtime·issolaris(SB), $1 76 JEQ ok 77 78 LEAQ runtime·tls0(SB), DI 79 CALL runtime·settls(SB) 80 81 // store through it, to make sure it works 82 get_tls(BX) 83 MOVQ $0x123, g(BX) 84 MOVQ runtime·tls0(SB), AX 85 CMPQ AX, $0x123 86 JEQ 2(PC) 87 MOVL AX, 0 // abort 88 ok: 89 // set the per-goroutine and per-mach "registers" 90 get_tls(BX) 91 LEAQ runtime·g0(SB), CX 92 MOVQ CX, g(BX) 93 LEAQ runtime·m0(SB), AX 94 95 // save m->g0 = g0 96 MOVQ CX, m_g0(AX) 97 // save m0 to g0->m 98 MOVQ AX, g_m(CX) 99 100 CLD // convention is D is always left cleared 101 CALL runtime·check(SB) 102 103 MOVL 16(SP), AX // copy argc 104 MOVL AX, 0(SP) 105 MOVQ 24(SP), AX // copy argv 106 MOVQ AX, 8(SP) 107 CALL runtime·args(SB) 108 CALL runtime·osinit(SB) 109 CALL runtime·schedinit(SB) 110 111 // create a new goroutine to start program 112 MOVQ $runtime·mainPC(SB), AX // entry 113 PUSHQ AX 114 PUSHQ $0 // arg size 115 CALL runtime·newproc(SB) 116 POPQ AX 117 POPQ AX 118 119 // start this M 120 CALL runtime·mstart(SB) 121 122 MOVL $0xf1, 0xf1 // crash 123 RET 124 125 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) 126 GLOBL runtime·mainPC(SB),RODATA,$8 127 128 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 129 BYTE $0xcc 130 RET 131 132 TEXT runtime·asminit(SB),NOSPLIT,$0-0 133 // No per-thread init. 134 RET 135 136 /* 137 * go-routine 138 */ 139 140 // void gosave(Gobuf*) 141 // save state in Gobuf; setjmp 142 TEXT runtime·gosave(SB), NOSPLIT, $0-8 143 MOVQ buf+0(FP), AX // gobuf 144 LEAQ buf+0(FP), BX // caller's SP 145 MOVQ BX, gobuf_sp(AX) 146 MOVQ 0(SP), BX // caller's PC 147 MOVQ BX, gobuf_pc(AX) 148 MOVQ $0, gobuf_ret(AX) 149 MOVQ $0, gobuf_ctxt(AX) 150 MOVQ BP, gobuf_bp(AX) 151 get_tls(CX) 152 MOVQ g(CX), BX 153 MOVQ BX, gobuf_g(AX) 154 RET 155 156 // void gogo(Gobuf*) 157 // restore state from Gobuf; longjmp 158 TEXT runtime·gogo(SB), NOSPLIT, $0-8 159 MOVQ buf+0(FP), BX // gobuf 160 MOVQ gobuf_g(BX), DX 161 MOVQ 0(DX), CX // make sure g != nil 162 get_tls(CX) 163 MOVQ DX, g(CX) 164 MOVQ gobuf_sp(BX), SP // restore SP 165 MOVQ gobuf_ret(BX), AX 166 MOVQ gobuf_ctxt(BX), DX 167 MOVQ gobuf_bp(BX), BP 168 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 169 MOVQ $0, gobuf_ret(BX) 170 MOVQ $0, gobuf_ctxt(BX) 171 MOVQ $0, gobuf_bp(BX) 172 MOVQ gobuf_pc(BX), BX 173 JMP BX 174 175 // func mcall(fn func(*g)) 176 // Switch to m->g0's stack, call fn(g). 177 // Fn must never return. It should gogo(&g->sched) 178 // to keep running g. 179 TEXT runtime·mcall(SB), NOSPLIT, $0-8 180 MOVQ fn+0(FP), DI 181 182 get_tls(CX) 183 MOVQ g(CX), AX // save state in g->sched 184 MOVQ 0(SP), BX // caller's PC 185 MOVQ BX, (g_sched+gobuf_pc)(AX) 186 LEAQ fn+0(FP), BX // caller's SP 187 MOVQ BX, (g_sched+gobuf_sp)(AX) 188 MOVQ AX, (g_sched+gobuf_g)(AX) 189 MOVQ BP, (g_sched+gobuf_bp)(AX) 190 191 // switch to m->g0 & its stack, call fn 192 MOVQ g(CX), BX 193 MOVQ g_m(BX), BX 194 MOVQ m_g0(BX), SI 195 CMPQ SI, AX // if g == m->g0 call badmcall 196 JNE 3(PC) 197 MOVQ $runtime·badmcall(SB), AX 198 JMP AX 199 MOVQ SI, g(CX) // g = m->g0 200 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 201 PUSHQ AX 202 MOVQ DI, DX 203 MOVQ 0(DI), DI 204 CALL DI 205 POPQ AX 206 MOVQ $runtime·badmcall2(SB), AX 207 JMP AX 208 RET 209 210 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 211 // of the G stack. We need to distinguish the routine that 212 // lives at the bottom of the G stack from the one that lives 213 // at the top of the system stack because the one at the top of 214 // the system stack terminates the stack walk (see topofstack()). 215 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 216 RET 217 218 // func systemstack(fn func()) 219 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 220 MOVQ fn+0(FP), DI // DI = fn 221 get_tls(CX) 222 MOVQ g(CX), AX // AX = g 223 MOVQ g_m(AX), BX // BX = m 224 225 MOVQ m_gsignal(BX), DX // DX = gsignal 226 CMPQ AX, DX 227 JEQ noswitch 228 229 MOVQ m_g0(BX), DX // DX = g0 230 CMPQ AX, DX 231 JEQ noswitch 232 233 MOVQ m_curg(BX), R8 234 CMPQ AX, R8 235 JEQ switch 236 237 // Bad: g is not gsignal, not g0, not curg. What is it? 238 MOVQ $runtime·badsystemstack(SB), AX 239 CALL AX 240 241 switch: 242 // save our state in g->sched. Pretend to 243 // be systemstack_switch if the G stack is scanned. 244 MOVQ $runtime·systemstack_switch(SB), SI 245 MOVQ SI, (g_sched+gobuf_pc)(AX) 246 MOVQ SP, (g_sched+gobuf_sp)(AX) 247 MOVQ AX, (g_sched+gobuf_g)(AX) 248 MOVQ BP, (g_sched+gobuf_bp)(AX) 249 250 // switch to g0 251 MOVQ DX, g(CX) 252 MOVQ (g_sched+gobuf_sp)(DX), BX 253 // make it look like mstart called systemstack on g0, to stop traceback 254 SUBQ $8, BX 255 MOVQ $runtime·mstart(SB), DX 256 MOVQ DX, 0(BX) 257 MOVQ BX, SP 258 259 // call target function 260 MOVQ DI, DX 261 MOVQ 0(DI), DI 262 CALL DI 263 264 // switch back to g 265 get_tls(CX) 266 MOVQ g(CX), AX 267 MOVQ g_m(AX), BX 268 MOVQ m_curg(BX), AX 269 MOVQ AX, g(CX) 270 MOVQ (g_sched+gobuf_sp)(AX), SP 271 MOVQ $0, (g_sched+gobuf_sp)(AX) 272 RET 273 274 noswitch: 275 // already on m stack, just call directly 276 MOVQ DI, DX 277 MOVQ 0(DI), DI 278 CALL DI 279 RET 280 281 /* 282 * support for morestack 283 */ 284 285 // Called during function prolog when more stack is needed. 286 // 287 // The traceback routines see morestack on a g0 as being 288 // the top of a stack (for example, morestack calling newstack 289 // calling the scheduler calling newm calling gc), so we must 290 // record an argument size. For that purpose, it has no arguments. 291 TEXT runtime·morestack(SB),NOSPLIT,$0-0 292 // Cannot grow scheduler stack (m->g0). 293 get_tls(CX) 294 MOVQ g(CX), BX 295 MOVQ g_m(BX), BX 296 MOVQ m_g0(BX), SI 297 CMPQ g(CX), SI 298 JNE 2(PC) 299 INT $3 300 301 // Cannot grow signal stack (m->gsignal). 302 MOVQ m_gsignal(BX), SI 303 CMPQ g(CX), SI 304 JNE 2(PC) 305 INT $3 306 307 // Called from f. 308 // Set m->morebuf to f's caller. 309 MOVQ 8(SP), AX // f's caller's PC 310 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 311 LEAQ 16(SP), AX // f's caller's SP 312 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 313 get_tls(CX) 314 MOVQ g(CX), SI 315 MOVQ SI, (m_morebuf+gobuf_g)(BX) 316 317 // Set g->sched to context in f. 318 MOVQ 0(SP), AX // f's PC 319 MOVQ AX, (g_sched+gobuf_pc)(SI) 320 MOVQ SI, (g_sched+gobuf_g)(SI) 321 LEAQ 8(SP), AX // f's SP 322 MOVQ AX, (g_sched+gobuf_sp)(SI) 323 MOVQ DX, (g_sched+gobuf_ctxt)(SI) 324 MOVQ BP, (g_sched+gobuf_bp)(SI) 325 326 // Call newstack on m->g0's stack. 327 MOVQ m_g0(BX), BX 328 MOVQ BX, g(CX) 329 MOVQ (g_sched+gobuf_sp)(BX), SP 330 CALL runtime·newstack(SB) 331 MOVQ $0, 0x1003 // crash if newstack returns 332 RET 333 334 // morestack but not preserving ctxt. 335 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 336 MOVL $0, DX 337 JMP runtime·morestack(SB) 338 339 // reflectcall: call a function with the given argument list 340 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 341 // we don't have variable-sized frames, so we use a small number 342 // of constant-sized-frame functions to encode a few bits of size in the pc. 343 // Caution: ugly multiline assembly macros in your future! 344 345 #define DISPATCH(NAME,MAXSIZE) \ 346 CMPQ CX, $MAXSIZE; \ 347 JA 3(PC); \ 348 MOVQ $NAME(SB), AX; \ 349 JMP AX 350 // Note: can't just "JMP NAME(SB)" - bad inlining results. 351 352 TEXT reflect·call(SB), NOSPLIT, $0-0 353 JMP ·reflectcall(SB) 354 355 TEXT ·reflectcall(SB), NOSPLIT, $0-32 356 MOVLQZX argsize+24(FP), CX 357 // NOTE(rsc): No call16, because CALLFN needs four words 358 // of argument space to invoke callwritebarrier. 359 DISPATCH(runtime·call32, 32) 360 DISPATCH(runtime·call64, 64) 361 DISPATCH(runtime·call128, 128) 362 DISPATCH(runtime·call256, 256) 363 DISPATCH(runtime·call512, 512) 364 DISPATCH(runtime·call1024, 1024) 365 DISPATCH(runtime·call2048, 2048) 366 DISPATCH(runtime·call4096, 4096) 367 DISPATCH(runtime·call8192, 8192) 368 DISPATCH(runtime·call16384, 16384) 369 DISPATCH(runtime·call32768, 32768) 370 DISPATCH(runtime·call65536, 65536) 371 DISPATCH(runtime·call131072, 131072) 372 DISPATCH(runtime·call262144, 262144) 373 DISPATCH(runtime·call524288, 524288) 374 DISPATCH(runtime·call1048576, 1048576) 375 DISPATCH(runtime·call2097152, 2097152) 376 DISPATCH(runtime·call4194304, 4194304) 377 DISPATCH(runtime·call8388608, 8388608) 378 DISPATCH(runtime·call16777216, 16777216) 379 DISPATCH(runtime·call33554432, 33554432) 380 DISPATCH(runtime·call67108864, 67108864) 381 DISPATCH(runtime·call134217728, 134217728) 382 DISPATCH(runtime·call268435456, 268435456) 383 DISPATCH(runtime·call536870912, 536870912) 384 DISPATCH(runtime·call1073741824, 1073741824) 385 MOVQ $runtime·badreflectcall(SB), AX 386 JMP AX 387 388 #define CALLFN(NAME,MAXSIZE) \ 389 TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \ 390 NO_LOCAL_POINTERS; \ 391 /* copy arguments to stack */ \ 392 MOVQ argptr+16(FP), SI; \ 393 MOVLQZX argsize+24(FP), CX; \ 394 MOVQ SP, DI; \ 395 REP;MOVSB; \ 396 /* call function */ \ 397 MOVQ f+8(FP), DX; \ 398 PCDATA $PCDATA_StackMapIndex, $0; \ 399 CALL (DX); \ 400 /* copy return values back */ \ 401 MOVQ argptr+16(FP), DI; \ 402 MOVLQZX argsize+24(FP), CX; \ 403 MOVLQZX retoffset+28(FP), BX; \ 404 MOVQ SP, SI; \ 405 ADDQ BX, DI; \ 406 ADDQ BX, SI; \ 407 SUBQ BX, CX; \ 408 REP;MOVSB; \ 409 /* execute write barrier updates */ \ 410 MOVQ argtype+0(FP), DX; \ 411 MOVQ argptr+16(FP), DI; \ 412 MOVLQZX argsize+24(FP), CX; \ 413 MOVLQZX retoffset+28(FP), BX; \ 414 MOVQ DX, 0(SP); \ 415 MOVQ DI, 8(SP); \ 416 MOVQ CX, 16(SP); \ 417 MOVQ BX, 24(SP); \ 418 CALL runtime·callwritebarrier(SB); \ 419 RET 420 421 CALLFN(·call32, 32) 422 CALLFN(·call64, 64) 423 CALLFN(·call128, 128) 424 CALLFN(·call256, 256) 425 CALLFN(·call512, 512) 426 CALLFN(·call1024, 1024) 427 CALLFN(·call2048, 2048) 428 CALLFN(·call4096, 4096) 429 CALLFN(·call8192, 8192) 430 CALLFN(·call16384, 16384) 431 CALLFN(·call32768, 32768) 432 CALLFN(·call65536, 65536) 433 CALLFN(·call131072, 131072) 434 CALLFN(·call262144, 262144) 435 CALLFN(·call524288, 524288) 436 CALLFN(·call1048576, 1048576) 437 CALLFN(·call2097152, 2097152) 438 CALLFN(·call4194304, 4194304) 439 CALLFN(·call8388608, 8388608) 440 CALLFN(·call16777216, 16777216) 441 CALLFN(·call33554432, 33554432) 442 CALLFN(·call67108864, 67108864) 443 CALLFN(·call134217728, 134217728) 444 CALLFN(·call268435456, 268435456) 445 CALLFN(·call536870912, 536870912) 446 CALLFN(·call1073741824, 1073741824) 447 448 // bool cas(int32 *val, int32 old, int32 new) 449 // Atomically: 450 // if(*val == old){ 451 // *val = new; 452 // return 1; 453 // } else 454 // return 0; 455 TEXT runtime·cas(SB), NOSPLIT, $0-17 456 MOVQ ptr+0(FP), BX 457 MOVL old+8(FP), AX 458 MOVL new+12(FP), CX 459 LOCK 460 CMPXCHGL CX, 0(BX) 461 SETEQ ret+16(FP) 462 RET 463 464 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 465 // Atomically: 466 // if(*val == *old){ 467 // *val = new; 468 // return 1; 469 // } else { 470 // return 0; 471 // } 472 TEXT runtime·cas64(SB), NOSPLIT, $0-25 473 MOVQ ptr+0(FP), BX 474 MOVQ old+8(FP), AX 475 MOVQ new+16(FP), CX 476 LOCK 477 CMPXCHGQ CX, 0(BX) 478 SETEQ ret+24(FP) 479 RET 480 481 TEXT runtime·casuintptr(SB), NOSPLIT, $0-25 482 JMP runtime·cas64(SB) 483 484 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-16 485 JMP runtime·atomicload64(SB) 486 487 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-16 488 JMP runtime·atomicload64(SB) 489 490 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16 491 JMP runtime·atomicstore64(SB) 492 493 // bool casp(void **val, void *old, void *new) 494 // Atomically: 495 // if(*val == old){ 496 // *val = new; 497 // return 1; 498 // } else 499 // return 0; 500 TEXT runtime·casp1(SB), NOSPLIT, $0-25 501 MOVQ ptr+0(FP), BX 502 MOVQ old+8(FP), AX 503 MOVQ new+16(FP), CX 504 LOCK 505 CMPXCHGQ CX, 0(BX) 506 SETEQ ret+24(FP) 507 RET 508 509 // uint32 xadd(uint32 volatile *val, int32 delta) 510 // Atomically: 511 // *val += delta; 512 // return *val; 513 TEXT runtime·xadd(SB), NOSPLIT, $0-20 514 MOVQ ptr+0(FP), BX 515 MOVL delta+8(FP), AX 516 MOVL AX, CX 517 LOCK 518 XADDL AX, 0(BX) 519 ADDL CX, AX 520 MOVL AX, ret+16(FP) 521 RET 522 523 TEXT runtime·xadd64(SB), NOSPLIT, $0-24 524 MOVQ ptr+0(FP), BX 525 MOVQ delta+8(FP), AX 526 MOVQ AX, CX 527 LOCK 528 XADDQ AX, 0(BX) 529 ADDQ CX, AX 530 MOVQ AX, ret+16(FP) 531 RET 532 533 TEXT runtime·xchg(SB), NOSPLIT, $0-20 534 MOVQ ptr+0(FP), BX 535 MOVL new+8(FP), AX 536 XCHGL AX, 0(BX) 537 MOVL AX, ret+16(FP) 538 RET 539 540 TEXT runtime·xchg64(SB), NOSPLIT, $0-24 541 MOVQ ptr+0(FP), BX 542 MOVQ new+8(FP), AX 543 XCHGQ AX, 0(BX) 544 MOVQ AX, ret+16(FP) 545 RET 546 547 TEXT runtime·xchgp1(SB), NOSPLIT, $0-24 548 MOVQ ptr+0(FP), BX 549 MOVQ new+8(FP), AX 550 XCHGQ AX, 0(BX) 551 MOVQ AX, ret+16(FP) 552 RET 553 554 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24 555 JMP runtime·xchg64(SB) 556 557 TEXT runtime·procyield(SB),NOSPLIT,$0-0 558 MOVL cycles+0(FP), AX 559 again: 560 PAUSE 561 SUBL $1, AX 562 JNZ again 563 RET 564 565 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-16 566 MOVQ ptr+0(FP), BX 567 MOVQ val+8(FP), AX 568 XCHGQ AX, 0(BX) 569 RET 570 571 TEXT runtime·atomicstore(SB), NOSPLIT, $0-12 572 MOVQ ptr+0(FP), BX 573 MOVL val+8(FP), AX 574 XCHGL AX, 0(BX) 575 RET 576 577 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16 578 MOVQ ptr+0(FP), BX 579 MOVQ val+8(FP), AX 580 XCHGQ AX, 0(BX) 581 RET 582 583 // void runtime·atomicor8(byte volatile*, byte); 584 TEXT runtime·atomicor8(SB), NOSPLIT, $0-9 585 MOVQ ptr+0(FP), AX 586 MOVB val+8(FP), BX 587 LOCK 588 ORB BX, (AX) 589 RET 590 591 // void runtime·atomicand8(byte volatile*, byte); 592 TEXT runtime·atomicand8(SB), NOSPLIT, $0-9 593 MOVQ ptr+0(FP), AX 594 MOVB val+8(FP), BX 595 LOCK 596 ANDB BX, (AX) 597 RET 598 599 // void jmpdefer(fn, sp); 600 // called from deferreturn. 601 // 1. pop the caller 602 // 2. sub 5 bytes from the callers return 603 // 3. jmp to the argument 604 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 605 MOVQ fv+0(FP), DX // fn 606 MOVQ argp+8(FP), BX // caller sp 607 LEAQ -8(BX), SP // caller sp after CALL 608 SUBQ $5, (SP) // return to CALL again 609 MOVQ 0(DX), BX 610 JMP BX // but first run the deferred function 611 612 // Save state of caller into g->sched. Smashes R8, R9. 613 TEXT gosave<>(SB),NOSPLIT,$0 614 get_tls(R8) 615 MOVQ g(R8), R8 616 MOVQ 0(SP), R9 617 MOVQ R9, (g_sched+gobuf_pc)(R8) 618 LEAQ 8(SP), R9 619 MOVQ R9, (g_sched+gobuf_sp)(R8) 620 MOVQ $0, (g_sched+gobuf_ret)(R8) 621 MOVQ $0, (g_sched+gobuf_ctxt)(R8) 622 MOVQ BP, (g_sched+gobuf_bp)(R8) 623 RET 624 625 // asmcgocall(void(*fn)(void*), void *arg) 626 // Call fn(arg) on the scheduler stack, 627 // aligned appropriately for the gcc ABI. 628 // See cgocall.c for more details. 629 TEXT ·asmcgocall(SB),NOSPLIT,$0-16 630 MOVQ fn+0(FP), AX 631 MOVQ arg+8(FP), BX 632 CALL asmcgocall<>(SB) 633 RET 634 635 TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-20 636 MOVQ fn+0(FP), AX 637 MOVQ arg+8(FP), BX 638 CALL asmcgocall<>(SB) 639 MOVL AX, ret+16(FP) 640 RET 641 642 // asmcgocall common code. fn in AX, arg in BX. returns errno in AX. 643 TEXT asmcgocall<>(SB),NOSPLIT,$0-0 644 MOVQ SP, DX 645 646 // Figure out if we need to switch to m->g0 stack. 647 // We get called to create new OS threads too, and those 648 // come in on the m->g0 stack already. 649 get_tls(CX) 650 MOVQ g(CX), R8 651 MOVQ g_m(R8), R8 652 MOVQ m_g0(R8), SI 653 MOVQ g(CX), DI 654 CMPQ SI, DI 655 JEQ nosave 656 MOVQ m_gsignal(R8), SI 657 CMPQ SI, DI 658 JEQ nosave 659 660 MOVQ m_g0(R8), SI 661 CALL gosave<>(SB) 662 MOVQ SI, g(CX) 663 MOVQ (g_sched+gobuf_sp)(SI), SP 664 nosave: 665 666 // Now on a scheduling stack (a pthread-created stack). 667 // Make sure we have enough room for 4 stack-backed fast-call 668 // registers as per windows amd64 calling convention. 669 SUBQ $64, SP 670 ANDQ $~15, SP // alignment for gcc ABI 671 MOVQ DI, 48(SP) // save g 672 MOVQ (g_stack+stack_hi)(DI), DI 673 SUBQ DX, DI 674 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 675 MOVQ BX, DI // DI = first argument in AMD64 ABI 676 MOVQ BX, CX // CX = first argument in Win64 677 CALL AX 678 679 // Restore registers, g, stack pointer. 680 get_tls(CX) 681 MOVQ 48(SP), DI 682 MOVQ (g_stack+stack_hi)(DI), SI 683 SUBQ 40(SP), SI 684 MOVQ DI, g(CX) 685 MOVQ SI, SP 686 RET 687 688 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 689 // Turn the fn into a Go func (by taking its address) and call 690 // cgocallback_gofunc. 691 TEXT runtime·cgocallback(SB),NOSPLIT,$24-24 692 LEAQ fn+0(FP), AX 693 MOVQ AX, 0(SP) 694 MOVQ frame+8(FP), AX 695 MOVQ AX, 8(SP) 696 MOVQ framesize+16(FP), AX 697 MOVQ AX, 16(SP) 698 MOVQ $runtime·cgocallback_gofunc(SB), AX 699 CALL AX 700 RET 701 702 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 703 // See cgocall.c for more details. 704 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24 705 NO_LOCAL_POINTERS 706 707 // If g is nil, Go did not create the current thread. 708 // Call needm to obtain one m for temporary use. 709 // In this case, we're running on the thread stack, so there's 710 // lots of space, but the linker doesn't know. Hide the call from 711 // the linker analysis by using an indirect call through AX. 712 get_tls(CX) 713 #ifdef GOOS_windows 714 MOVL $0, BX 715 CMPQ CX, $0 716 JEQ 2(PC) 717 #endif 718 MOVQ g(CX), BX 719 CMPQ BX, $0 720 JEQ needm 721 MOVQ g_m(BX), BX 722 MOVQ BX, R8 // holds oldm until end of function 723 JMP havem 724 needm: 725 MOVQ $0, 0(SP) 726 MOVQ $runtime·needm(SB), AX 727 CALL AX 728 MOVQ 0(SP), R8 729 get_tls(CX) 730 MOVQ g(CX), BX 731 MOVQ g_m(BX), BX 732 733 // Set m->sched.sp = SP, so that if a panic happens 734 // during the function we are about to execute, it will 735 // have a valid SP to run on the g0 stack. 736 // The next few lines (after the havem label) 737 // will save this SP onto the stack and then write 738 // the same SP back to m->sched.sp. That seems redundant, 739 // but if an unrecovered panic happens, unwindm will 740 // restore the g->sched.sp from the stack location 741 // and then systemstack will try to use it. If we don't set it here, 742 // that restored SP will be uninitialized (typically 0) and 743 // will not be usable. 744 MOVQ m_g0(BX), SI 745 MOVQ SP, (g_sched+gobuf_sp)(SI) 746 747 havem: 748 // Now there's a valid m, and we're running on its m->g0. 749 // Save current m->g0->sched.sp on stack and then set it to SP. 750 // Save current sp in m->g0->sched.sp in preparation for 751 // switch back to m->curg stack. 752 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 753 MOVQ m_g0(BX), SI 754 MOVQ (g_sched+gobuf_sp)(SI), AX 755 MOVQ AX, 0(SP) 756 MOVQ SP, (g_sched+gobuf_sp)(SI) 757 758 // Switch to m->curg stack and call runtime.cgocallbackg. 759 // Because we are taking over the execution of m->curg 760 // but *not* resuming what had been running, we need to 761 // save that information (m->curg->sched) so we can restore it. 762 // We can restore m->curg->sched.sp easily, because calling 763 // runtime.cgocallbackg leaves SP unchanged upon return. 764 // To save m->curg->sched.pc, we push it onto the stack. 765 // This has the added benefit that it looks to the traceback 766 // routine like cgocallbackg is going to return to that 767 // PC (because the frame we allocate below has the same 768 // size as cgocallback_gofunc's frame declared above) 769 // so that the traceback will seamlessly trace back into 770 // the earlier calls. 771 // 772 // In the new goroutine, 0(SP) holds the saved R8. 773 MOVQ m_curg(BX), SI 774 MOVQ SI, g(CX) 775 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 776 MOVQ (g_sched+gobuf_pc)(SI), BX 777 MOVQ BX, -8(DI) 778 // Compute the size of the frame, including return PC and, if 779 // GOEXPERIMENT=framepointer, the saved based pointer 780 LEAQ fv+0(FP), AX 781 SUBQ SP, AX 782 SUBQ AX, DI 783 MOVQ DI, SP 784 785 MOVQ R8, 0(SP) 786 CALL runtime·cgocallbackg(SB) 787 MOVQ 0(SP), R8 788 789 // Compute the size of the frame again. FP and SP have 790 // completely different values here than they did above, 791 // but only their difference matters. 792 LEAQ fv+0(FP), AX 793 SUBQ SP, AX 794 795 // Restore g->sched (== m->curg->sched) from saved values. 796 get_tls(CX) 797 MOVQ g(CX), SI 798 MOVQ SP, DI 799 ADDQ AX, DI 800 MOVQ -8(DI), BX 801 MOVQ BX, (g_sched+gobuf_pc)(SI) 802 MOVQ DI, (g_sched+gobuf_sp)(SI) 803 804 // Switch back to m->g0's stack and restore m->g0->sched.sp. 805 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 806 // so we do not have to restore it.) 807 MOVQ g(CX), BX 808 MOVQ g_m(BX), BX 809 MOVQ m_g0(BX), SI 810 MOVQ SI, g(CX) 811 MOVQ (g_sched+gobuf_sp)(SI), SP 812 MOVQ 0(SP), AX 813 MOVQ AX, (g_sched+gobuf_sp)(SI) 814 815 // If the m on entry was nil, we called needm above to borrow an m 816 // for the duration of the call. Since the call is over, return it with dropm. 817 CMPQ R8, $0 818 JNE 3(PC) 819 MOVQ $runtime·dropm(SB), AX 820 CALL AX 821 822 // Done! 823 RET 824 825 // void setg(G*); set g. for use by needm. 826 TEXT runtime·setg(SB), NOSPLIT, $0-8 827 MOVQ gg+0(FP), BX 828 #ifdef GOOS_windows 829 CMPQ BX, $0 830 JNE settls 831 MOVQ $0, 0x28(GS) 832 RET 833 settls: 834 MOVQ g_m(BX), AX 835 LEAQ m_tls(AX), AX 836 MOVQ AX, 0x28(GS) 837 #endif 838 get_tls(CX) 839 MOVQ BX, g(CX) 840 RET 841 842 // void setg_gcc(G*); set g called from gcc. 843 TEXT setg_gcc<>(SB),NOSPLIT,$0 844 get_tls(AX) 845 MOVQ DI, g(AX) 846 RET 847 848 // check that SP is in range [g->stack.lo, g->stack.hi) 849 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 850 get_tls(CX) 851 MOVQ g(CX), AX 852 CMPQ (g_stack+stack_hi)(AX), SP 853 JHI 2(PC) 854 INT $3 855 CMPQ SP, (g_stack+stack_lo)(AX) 856 JHI 2(PC) 857 INT $3 858 RET 859 860 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-16 861 MOVQ argp+0(FP),AX // addr of first arg 862 MOVQ -8(AX),AX // get calling pc 863 MOVQ AX, ret+8(FP) 864 RET 865 866 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16 867 MOVQ argp+0(FP),AX // addr of first arg 868 MOVQ pc+8(FP), BX 869 MOVQ BX, -8(AX) // set calling pc 870 RET 871 872 TEXT runtime·getcallersp(SB),NOSPLIT,$0-16 873 MOVQ argp+0(FP), AX 874 MOVQ AX, ret+8(FP) 875 RET 876 877 // func cputicks() int64 878 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 879 CMPB runtime·lfenceBeforeRdtsc(SB), $1 880 JNE mfence 881 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 882 JMP done 883 mfence: 884 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 885 done: 886 RDTSC 887 SHLQ $32, DX 888 ADDQ DX, AX 889 MOVQ AX, ret+0(FP) 890 RET 891 892 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 893 // redirects to memhash(p, h, size) using the size 894 // stored in the closure. 895 TEXT runtime·memhash_varlen(SB),NOSPLIT,$32-24 896 GO_ARGS 897 NO_LOCAL_POINTERS 898 MOVQ p+0(FP), AX 899 MOVQ h+8(FP), BX 900 MOVQ 8(DX), CX 901 MOVQ AX, 0(SP) 902 MOVQ BX, 8(SP) 903 MOVQ CX, 16(SP) 904 CALL runtime·memhash(SB) 905 MOVQ 24(SP), AX 906 MOVQ AX, ret+16(FP) 907 RET 908 909 // hash function using AES hardware instructions 910 TEXT runtime·aeshash(SB),NOSPLIT,$0-32 911 MOVQ p+0(FP), AX // ptr to data 912 MOVQ s+16(FP), CX // size 913 LEAQ ret+24(FP), DX 914 JMP runtime·aeshashbody(SB) 915 916 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24 917 MOVQ p+0(FP), AX // ptr to string struct 918 MOVQ 8(AX), CX // length of string 919 MOVQ (AX), AX // string data 920 LEAQ ret+16(FP), DX 921 JMP runtime·aeshashbody(SB) 922 923 // AX: data 924 // CX: length 925 // DX: address to put return value 926 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 927 MOVQ h+8(FP), X6 // seed to low 64 bits of xmm6 928 PINSRQ $1, CX, X6 // size to high 64 bits of xmm6 929 PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times 930 MOVO runtime·aeskeysched(SB), X7 931 CMPQ CX, $16 932 JB aes0to15 933 JE aes16 934 CMPQ CX, $32 935 JBE aes17to32 936 CMPQ CX, $64 937 JBE aes33to64 938 CMPQ CX, $128 939 JBE aes65to128 940 JMP aes129plus 941 942 aes0to15: 943 TESTQ CX, CX 944 JE aes0 945 946 ADDQ $16, AX 947 TESTW $0xff0, AX 948 JE endofpage 949 950 // 16 bytes loaded at this address won't cross 951 // a page boundary, so we can load it directly. 952 MOVOU -16(AX), X0 953 ADDQ CX, CX 954 MOVQ $masks<>(SB), AX 955 PAND (AX)(CX*8), X0 956 957 // scramble 3 times 958 AESENC X6, X0 959 AESENC X7, X0 960 AESENC X7, X0 961 MOVQ X0, (DX) 962 RET 963 964 endofpage: 965 // address ends in 1111xxxx. Might be up against 966 // a page boundary, so load ending at last byte. 967 // Then shift bytes down using pshufb. 968 MOVOU -32(AX)(CX*1), X0 969 ADDQ CX, CX 970 MOVQ $shifts<>(SB), AX 971 PSHUFB (AX)(CX*8), X0 972 AESENC X6, X0 973 AESENC X7, X0 974 AESENC X7, X0 975 MOVQ X0, (DX) 976 RET 977 978 aes0: 979 // return input seed 980 MOVQ h+8(FP), AX 981 MOVQ AX, (DX) 982 RET 983 984 aes16: 985 MOVOU (AX), X0 986 AESENC X6, X0 987 AESENC X7, X0 988 AESENC X7, X0 989 MOVQ X0, (DX) 990 RET 991 992 aes17to32: 993 // load data to be hashed 994 MOVOU (AX), X0 995 MOVOU -16(AX)(CX*1), X1 996 997 // scramble 3 times 998 AESENC X6, X0 999 AESENC runtime·aeskeysched+16(SB), X1 1000 AESENC X7, X0 1001 AESENC X7, X1 1002 AESENC X7, X0 1003 AESENC X7, X1 1004 1005 // combine results 1006 PXOR X1, X0 1007 MOVQ X0, (DX) 1008 RET 1009 1010 aes33to64: 1011 MOVOU (AX), X0 1012 MOVOU 16(AX), X1 1013 MOVOU -32(AX)(CX*1), X2 1014 MOVOU -16(AX)(CX*1), X3 1015 1016 AESENC X6, X0 1017 AESENC runtime·aeskeysched+16(SB), X1 1018 AESENC runtime·aeskeysched+32(SB), X2 1019 AESENC runtime·aeskeysched+48(SB), X3 1020 AESENC X7, X0 1021 AESENC X7, X1 1022 AESENC X7, X2 1023 AESENC X7, X3 1024 AESENC X7, X0 1025 AESENC X7, X1 1026 AESENC X7, X2 1027 AESENC X7, X3 1028 1029 PXOR X2, X0 1030 PXOR X3, X1 1031 PXOR X1, X0 1032 MOVQ X0, (DX) 1033 RET 1034 1035 aes65to128: 1036 MOVOU (AX), X0 1037 MOVOU 16(AX), X1 1038 MOVOU 32(AX), X2 1039 MOVOU 48(AX), X3 1040 MOVOU -64(AX)(CX*1), X4 1041 MOVOU -48(AX)(CX*1), X5 1042 MOVOU -32(AX)(CX*1), X8 1043 MOVOU -16(AX)(CX*1), X9 1044 1045 AESENC X6, X0 1046 AESENC runtime·aeskeysched+16(SB), X1 1047 AESENC runtime·aeskeysched+32(SB), X2 1048 AESENC runtime·aeskeysched+48(SB), X3 1049 AESENC runtime·aeskeysched+64(SB), X4 1050 AESENC runtime·aeskeysched+80(SB), X5 1051 AESENC runtime·aeskeysched+96(SB), X8 1052 AESENC runtime·aeskeysched+112(SB), X9 1053 AESENC X7, X0 1054 AESENC X7, X1 1055 AESENC X7, X2 1056 AESENC X7, X3 1057 AESENC X7, X4 1058 AESENC X7, X5 1059 AESENC X7, X8 1060 AESENC X7, X9 1061 AESENC X7, X0 1062 AESENC X7, X1 1063 AESENC X7, X2 1064 AESENC X7, X3 1065 AESENC X7, X4 1066 AESENC X7, X5 1067 AESENC X7, X8 1068 AESENC X7, X9 1069 1070 PXOR X4, X0 1071 PXOR X5, X1 1072 PXOR X8, X2 1073 PXOR X9, X3 1074 PXOR X2, X0 1075 PXOR X3, X1 1076 PXOR X1, X0 1077 MOVQ X0, (DX) 1078 RET 1079 1080 aes129plus: 1081 // start with last (possibly overlapping) block 1082 MOVOU -128(AX)(CX*1), X0 1083 MOVOU -112(AX)(CX*1), X1 1084 MOVOU -96(AX)(CX*1), X2 1085 MOVOU -80(AX)(CX*1), X3 1086 MOVOU -64(AX)(CX*1), X4 1087 MOVOU -48(AX)(CX*1), X5 1088 MOVOU -32(AX)(CX*1), X8 1089 MOVOU -16(AX)(CX*1), X9 1090 1091 // scramble state once 1092 AESENC X6, X0 1093 AESENC runtime·aeskeysched+16(SB), X1 1094 AESENC runtime·aeskeysched+32(SB), X2 1095 AESENC runtime·aeskeysched+48(SB), X3 1096 AESENC runtime·aeskeysched+64(SB), X4 1097 AESENC runtime·aeskeysched+80(SB), X5 1098 AESENC runtime·aeskeysched+96(SB), X8 1099 AESENC runtime·aeskeysched+112(SB), X9 1100 1101 // compute number of remaining 128-byte blocks 1102 DECQ CX 1103 SHRQ $7, CX 1104 1105 aesloop: 1106 // scramble state, xor in a block 1107 MOVOU (AX), X10 1108 MOVOU 16(AX), X11 1109 MOVOU 32(AX), X12 1110 MOVOU 48(AX), X13 1111 AESENC X10, X0 1112 AESENC X11, X1 1113 AESENC X12, X2 1114 AESENC X13, X3 1115 MOVOU 64(AX), X10 1116 MOVOU 80(AX), X11 1117 MOVOU 96(AX), X12 1118 MOVOU 112(AX), X13 1119 AESENC X10, X4 1120 AESENC X11, X5 1121 AESENC X12, X8 1122 AESENC X13, X9 1123 1124 // scramble state 1125 AESENC X7, X0 1126 AESENC X7, X1 1127 AESENC X7, X2 1128 AESENC X7, X3 1129 AESENC X7, X4 1130 AESENC X7, X5 1131 AESENC X7, X8 1132 AESENC X7, X9 1133 1134 ADDQ $128, AX 1135 DECQ CX 1136 JNE aesloop 1137 1138 // 2 more scrambles to finish 1139 AESENC X7, X0 1140 AESENC X7, X1 1141 AESENC X7, X2 1142 AESENC X7, X3 1143 AESENC X7, X4 1144 AESENC X7, X5 1145 AESENC X7, X8 1146 AESENC X7, X9 1147 AESENC X7, X0 1148 AESENC X7, X1 1149 AESENC X7, X2 1150 AESENC X7, X3 1151 AESENC X7, X4 1152 AESENC X7, X5 1153 AESENC X7, X8 1154 AESENC X7, X9 1155 1156 PXOR X4, X0 1157 PXOR X5, X1 1158 PXOR X8, X2 1159 PXOR X9, X3 1160 PXOR X2, X0 1161 PXOR X3, X1 1162 PXOR X1, X0 1163 MOVQ X0, (DX) 1164 RET 1165 1166 TEXT runtime·aeshash32(SB),NOSPLIT,$0-24 1167 MOVQ p+0(FP), AX // ptr to data 1168 MOVQ h+8(FP), X0 // seed 1169 PINSRD $2, (AX), X0 // data 1170 AESENC runtime·aeskeysched+0(SB), X0 1171 AESENC runtime·aeskeysched+16(SB), X0 1172 AESENC runtime·aeskeysched+32(SB), X0 1173 MOVQ X0, ret+16(FP) 1174 RET 1175 1176 TEXT runtime·aeshash64(SB),NOSPLIT,$0-24 1177 MOVQ p+0(FP), AX // ptr to data 1178 MOVQ h+8(FP), X0 // seed 1179 PINSRQ $1, (AX), X0 // data 1180 AESENC runtime·aeskeysched+0(SB), X0 1181 AESENC runtime·aeskeysched+16(SB), X0 1182 AESENC runtime·aeskeysched+32(SB), X0 1183 MOVQ X0, ret+16(FP) 1184 RET 1185 1186 // simple mask to get rid of data in the high part of the register. 1187 DATA masks<>+0x00(SB)/8, $0x0000000000000000 1188 DATA masks<>+0x08(SB)/8, $0x0000000000000000 1189 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 1190 DATA masks<>+0x18(SB)/8, $0x0000000000000000 1191 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 1192 DATA masks<>+0x28(SB)/8, $0x0000000000000000 1193 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 1194 DATA masks<>+0x38(SB)/8, $0x0000000000000000 1195 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 1196 DATA masks<>+0x48(SB)/8, $0x0000000000000000 1197 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 1198 DATA masks<>+0x58(SB)/8, $0x0000000000000000 1199 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 1200 DATA masks<>+0x68(SB)/8, $0x0000000000000000 1201 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 1202 DATA masks<>+0x78(SB)/8, $0x0000000000000000 1203 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 1204 DATA masks<>+0x88(SB)/8, $0x0000000000000000 1205 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 1206 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 1207 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 1208 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 1209 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 1210 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 1211 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 1212 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 1213 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 1214 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 1215 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 1216 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 1217 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 1218 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 1219 GLOBL masks<>(SB),RODATA,$256 1220 1221 // these are arguments to pshufb. They move data down from 1222 // the high bytes of the register to the low bytes of the register. 1223 // index is how many bytes to move. 1224 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 1225 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 1226 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 1227 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 1228 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 1229 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 1230 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 1231 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 1232 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 1233 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 1234 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 1235 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 1236 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 1237 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 1238 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 1239 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 1240 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 1241 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 1242 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 1243 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 1244 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 1245 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 1246 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 1247 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 1248 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 1249 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 1250 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 1251 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 1252 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 1253 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 1254 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 1255 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 1256 GLOBL shifts<>(SB),RODATA,$256 1257 1258 TEXT runtime·memeq(SB),NOSPLIT,$0-25 1259 MOVQ a+0(FP), SI 1260 MOVQ b+8(FP), DI 1261 MOVQ size+16(FP), BX 1262 CALL runtime·memeqbody(SB) 1263 MOVB AX, ret+24(FP) 1264 RET 1265 1266 // memequal_varlen(a, b unsafe.Pointer) bool 1267 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17 1268 MOVQ a+0(FP), SI 1269 MOVQ b+8(FP), DI 1270 CMPQ SI, DI 1271 JEQ eq 1272 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure 1273 CALL runtime·memeqbody(SB) 1274 MOVB AX, ret+16(FP) 1275 RET 1276 eq: 1277 MOVB $1, ret+16(FP) 1278 RET 1279 1280 // eqstring tests whether two strings are equal. 1281 // The compiler guarantees that strings passed 1282 // to eqstring have equal length. 1283 // See runtime_test.go:eqstring_generic for 1284 // equivalent Go code. 1285 TEXT runtime·eqstring(SB),NOSPLIT,$0-33 1286 MOVQ s1str+0(FP), SI 1287 MOVQ s2str+16(FP), DI 1288 CMPQ SI, DI 1289 JEQ eq 1290 MOVQ s1len+8(FP), BX 1291 CALL runtime·memeqbody(SB) 1292 MOVB AX, v+32(FP) 1293 RET 1294 eq: 1295 MOVB $1, v+32(FP) 1296 RET 1297 1298 // a in SI 1299 // b in DI 1300 // count in BX 1301 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1302 XORQ AX, AX 1303 1304 CMPQ BX, $8 1305 JB small 1306 1307 // 64 bytes at a time using xmm registers 1308 hugeloop: 1309 CMPQ BX, $64 1310 JB bigloop 1311 MOVOU (SI), X0 1312 MOVOU (DI), X1 1313 MOVOU 16(SI), X2 1314 MOVOU 16(DI), X3 1315 MOVOU 32(SI), X4 1316 MOVOU 32(DI), X5 1317 MOVOU 48(SI), X6 1318 MOVOU 48(DI), X7 1319 PCMPEQB X1, X0 1320 PCMPEQB X3, X2 1321 PCMPEQB X5, X4 1322 PCMPEQB X7, X6 1323 PAND X2, X0 1324 PAND X6, X4 1325 PAND X4, X0 1326 PMOVMSKB X0, DX 1327 ADDQ $64, SI 1328 ADDQ $64, DI 1329 SUBQ $64, BX 1330 CMPL DX, $0xffff 1331 JEQ hugeloop 1332 RET 1333 1334 // 8 bytes at a time using 64-bit register 1335 bigloop: 1336 CMPQ BX, $8 1337 JBE leftover 1338 MOVQ (SI), CX 1339 MOVQ (DI), DX 1340 ADDQ $8, SI 1341 ADDQ $8, DI 1342 SUBQ $8, BX 1343 CMPQ CX, DX 1344 JEQ bigloop 1345 RET 1346 1347 // remaining 0-8 bytes 1348 leftover: 1349 MOVQ -8(SI)(BX*1), CX 1350 MOVQ -8(DI)(BX*1), DX 1351 CMPQ CX, DX 1352 SETEQ AX 1353 RET 1354 1355 small: 1356 CMPQ BX, $0 1357 JEQ equal 1358 1359 LEAQ 0(BX*8), CX 1360 NEGQ CX 1361 1362 CMPB SI, $0xf8 1363 JA si_high 1364 1365 // load at SI won't cross a page boundary. 1366 MOVQ (SI), SI 1367 JMP si_finish 1368 si_high: 1369 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 1370 MOVQ -8(SI)(BX*1), SI 1371 SHRQ CX, SI 1372 si_finish: 1373 1374 // same for DI. 1375 CMPB DI, $0xf8 1376 JA di_high 1377 MOVQ (DI), DI 1378 JMP di_finish 1379 di_high: 1380 MOVQ -8(DI)(BX*1), DI 1381 SHRQ CX, DI 1382 di_finish: 1383 1384 SUBQ SI, DI 1385 SHLQ CX, DI 1386 equal: 1387 SETEQ AX 1388 RET 1389 1390 TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 1391 MOVQ s1_base+0(FP), SI 1392 MOVQ s1_len+8(FP), BX 1393 MOVQ s2_base+16(FP), DI 1394 MOVQ s2_len+24(FP), DX 1395 CALL runtime·cmpbody(SB) 1396 MOVQ AX, ret+32(FP) 1397 RET 1398 1399 TEXT bytes·Compare(SB),NOSPLIT,$0-56 1400 MOVQ s1+0(FP), SI 1401 MOVQ s1+8(FP), BX 1402 MOVQ s2+24(FP), DI 1403 MOVQ s2+32(FP), DX 1404 CALL runtime·cmpbody(SB) 1405 MOVQ AX, res+48(FP) 1406 RET 1407 1408 // input: 1409 // SI = a 1410 // DI = b 1411 // BX = alen 1412 // DX = blen 1413 // output: 1414 // AX = 1/0/-1 1415 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1416 CMPQ SI, DI 1417 JEQ allsame 1418 CMPQ BX, DX 1419 MOVQ DX, R8 1420 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 1421 CMPQ R8, $8 1422 JB small 1423 1424 loop: 1425 CMPQ R8, $16 1426 JBE _0through16 1427 MOVOU (SI), X0 1428 MOVOU (DI), X1 1429 PCMPEQB X0, X1 1430 PMOVMSKB X1, AX 1431 XORQ $0xffff, AX // convert EQ to NE 1432 JNE diff16 // branch if at least one byte is not equal 1433 ADDQ $16, SI 1434 ADDQ $16, DI 1435 SUBQ $16, R8 1436 JMP loop 1437 1438 // AX = bit mask of differences 1439 diff16: 1440 BSFQ AX, BX // index of first byte that differs 1441 XORQ AX, AX 1442 MOVB (SI)(BX*1), CX 1443 CMPB CX, (DI)(BX*1) 1444 SETHI AX 1445 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 1446 RET 1447 1448 // 0 through 16 bytes left, alen>=8, blen>=8 1449 _0through16: 1450 CMPQ R8, $8 1451 JBE _0through8 1452 MOVQ (SI), AX 1453 MOVQ (DI), CX 1454 CMPQ AX, CX 1455 JNE diff8 1456 _0through8: 1457 MOVQ -8(SI)(R8*1), AX 1458 MOVQ -8(DI)(R8*1), CX 1459 CMPQ AX, CX 1460 JEQ allsame 1461 1462 // AX and CX contain parts of a and b that differ. 1463 diff8: 1464 BSWAPQ AX // reverse order of bytes 1465 BSWAPQ CX 1466 XORQ AX, CX 1467 BSRQ CX, CX // index of highest bit difference 1468 SHRQ CX, AX // move a's bit to bottom 1469 ANDQ $1, AX // mask bit 1470 LEAQ -1(AX*2), AX // 1/0 => +1/-1 1471 RET 1472 1473 // 0-7 bytes in common 1474 small: 1475 LEAQ (R8*8), CX // bytes left -> bits left 1476 NEGQ CX // - bits lift (== 64 - bits left mod 64) 1477 JEQ allsame 1478 1479 // load bytes of a into high bytes of AX 1480 CMPB SI, $0xf8 1481 JA si_high 1482 MOVQ (SI), SI 1483 JMP si_finish 1484 si_high: 1485 MOVQ -8(SI)(R8*1), SI 1486 SHRQ CX, SI 1487 si_finish: 1488 SHLQ CX, SI 1489 1490 // load bytes of b in to high bytes of BX 1491 CMPB DI, $0xf8 1492 JA di_high 1493 MOVQ (DI), DI 1494 JMP di_finish 1495 di_high: 1496 MOVQ -8(DI)(R8*1), DI 1497 SHRQ CX, DI 1498 di_finish: 1499 SHLQ CX, DI 1500 1501 BSWAPQ SI // reverse order of bytes 1502 BSWAPQ DI 1503 XORQ SI, DI // find bit differences 1504 JEQ allsame 1505 BSRQ DI, CX // index of highest bit difference 1506 SHRQ CX, SI // move a's bit to bottom 1507 ANDQ $1, SI // mask bit 1508 LEAQ -1(SI*2), AX // 1/0 => +1/-1 1509 RET 1510 1511 allsame: 1512 XORQ AX, AX 1513 XORQ CX, CX 1514 CMPQ BX, DX 1515 SETGT AX // 1 if alen > blen 1516 SETEQ CX // 1 if alen == blen 1517 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 1518 RET 1519 1520 TEXT bytes·IndexByte(SB),NOSPLIT,$0-40 1521 MOVQ s+0(FP), SI 1522 MOVQ s_len+8(FP), BX 1523 MOVB c+24(FP), AL 1524 CALL runtime·indexbytebody(SB) 1525 MOVQ AX, ret+32(FP) 1526 RET 1527 1528 TEXT strings·IndexByte(SB),NOSPLIT,$0-32 1529 MOVQ s+0(FP), SI 1530 MOVQ s_len+8(FP), BX 1531 MOVB c+16(FP), AL 1532 CALL runtime·indexbytebody(SB) 1533 MOVQ AX, ret+24(FP) 1534 RET 1535 1536 // input: 1537 // SI: data 1538 // BX: data len 1539 // AL: byte sought 1540 // output: 1541 // AX 1542 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 1543 MOVQ SI, DI 1544 1545 CMPQ BX, $16 1546 JLT small 1547 1548 // round up to first 16-byte boundary 1549 TESTQ $15, SI 1550 JZ aligned 1551 MOVQ SI, CX 1552 ANDQ $~15, CX 1553 ADDQ $16, CX 1554 1555 // search the beginning 1556 SUBQ SI, CX 1557 REPN; SCASB 1558 JZ success 1559 1560 // DI is 16-byte aligned; get ready to search using SSE instructions 1561 aligned: 1562 // round down to last 16-byte boundary 1563 MOVQ BX, R11 1564 ADDQ SI, R11 1565 ANDQ $~15, R11 1566 1567 // shuffle X0 around so that each byte contains c 1568 MOVD AX, X0 1569 PUNPCKLBW X0, X0 1570 PUNPCKLBW X0, X0 1571 PSHUFL $0, X0, X0 1572 JMP condition 1573 1574 sse: 1575 // move the next 16-byte chunk of the buffer into X1 1576 MOVO (DI), X1 1577 // compare bytes in X0 to X1 1578 PCMPEQB X0, X1 1579 // take the top bit of each byte in X1 and put the result in DX 1580 PMOVMSKB X1, DX 1581 TESTL DX, DX 1582 JNZ ssesuccess 1583 ADDQ $16, DI 1584 1585 condition: 1586 CMPQ DI, R11 1587 JLT sse 1588 1589 // search the end 1590 MOVQ SI, CX 1591 ADDQ BX, CX 1592 SUBQ R11, CX 1593 // if CX == 0, the zero flag will be set and we'll end up 1594 // returning a false success 1595 JZ failure 1596 REPN; SCASB 1597 JZ success 1598 1599 failure: 1600 MOVQ $-1, AX 1601 RET 1602 1603 // handle for lengths < 16 1604 small: 1605 MOVQ BX, CX 1606 REPN; SCASB 1607 JZ success 1608 MOVQ $-1, AX 1609 RET 1610 1611 // we've found the chunk containing the byte 1612 // now just figure out which specific byte it is 1613 ssesuccess: 1614 // get the index of the least significant set bit 1615 BSFW DX, DX 1616 SUBQ SI, DI 1617 ADDQ DI, DX 1618 MOVQ DX, AX 1619 RET 1620 1621 success: 1622 SUBQ SI, DI 1623 SUBL $1, DI 1624 MOVQ DI, AX 1625 RET 1626 1627 TEXT bytes·Equal(SB),NOSPLIT,$0-49 1628 MOVQ a_len+8(FP), BX 1629 MOVQ b_len+32(FP), CX 1630 XORQ AX, AX 1631 CMPQ BX, CX 1632 JNE eqret 1633 MOVQ a+0(FP), SI 1634 MOVQ b+24(FP), DI 1635 CALL runtime·memeqbody(SB) 1636 eqret: 1637 MOVB AX, ret+48(FP) 1638 RET 1639 1640 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1641 get_tls(CX) 1642 MOVQ g(CX), AX 1643 MOVQ g_m(AX), AX 1644 MOVL m_fastrand(AX), DX 1645 ADDL DX, DX 1646 MOVL DX, BX 1647 XORL $0x88888eef, DX 1648 CMOVLMI BX, DX 1649 MOVL DX, m_fastrand(AX) 1650 MOVL DX, ret+0(FP) 1651 RET 1652 1653 TEXT runtime·return0(SB), NOSPLIT, $0 1654 MOVL $0, AX 1655 RET 1656 1657 1658 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1659 // Must obey the gcc calling convention. 1660 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1661 get_tls(CX) 1662 MOVQ g(CX), AX 1663 MOVQ g_m(AX), AX 1664 MOVQ m_curg(AX), AX 1665 MOVQ (g_stack+stack_hi)(AX), AX 1666 RET 1667 1668 // The top-most function running on a goroutine 1669 // returns to goexit+PCQuantum. 1670 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1671 BYTE $0x90 // NOP 1672 CALL runtime·goexit1(SB) // does not return 1673 // traceback from goexit1 must hit code range of goexit 1674 BYTE $0x90 // NOP 1675 1676 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 1677 MOVQ addr+0(FP), AX 1678 PREFETCHT0 (AX) 1679 RET 1680 1681 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 1682 MOVQ addr+0(FP), AX 1683 PREFETCHT1 (AX) 1684 RET 1685 1686 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 1687 MOVQ addr+0(FP), AX 1688 PREFETCHT2 (AX) 1689 RET 1690 1691 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 1692 MOVQ addr+0(FP), AX 1693 PREFETCHNTA (AX) 1694 RET 1695 1696 // This is called from .init_array and follows the platform, not Go, ABI. 1697 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-8 1698 MOVQ runtime·lastmoduledatap(SB), AX 1699 MOVQ DI, moduledata_next(AX) 1700 MOVQ DI, runtime·lastmoduledatap(SB) 1701 RET