github.com/emc-advanced-dev/unik@v0.0.0-20190717152701-a58d3e8e33b7/containers/compilers/rump/go/gopatches/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 11 TEXT runtime·ksyscall(SB),NOSPLIT,$0-56 12 CALL runtime·entersyscall(SB) 13 14 MOVQ fn+0(FP), DI // DI = fn 15 get_tls(CX) 16 MOVQ g(CX), AX // AX = g 17 MOVQ g_m(AX), BX // BX = m 18 MOVQ m_gsignal(BX), DX // DX = gsignal 19 MOVQ m_g0(BX), DX // DX = g0 20 MOVQ m_curg(BX), R8 21 22 // save our state in g->sched. Pretend to 23 // be systemstack_switch if the G stack is scanned. 24 MOVQ $runtime·systemstack_switch(SB), SI 25 MOVQ SI, (g_sched+gobuf_pc)(AX) 26 MOVQ SP, (g_sched+gobuf_sp)(AX) 27 MOVQ AX, (g_sched+gobuf_g)(AX) 28 MOVQ BP, (g_sched+gobuf_bp)(AX) 29 30 // switch to g0 31 MOVQ DX, g(CX) 32 MOVQ (g_sched+gobuf_sp)(DX), BX 33 // make it look like mstart called systemstack on g0, to stop traceback 34 SUBQ $8, BX 35 MOVQ $runtime·mstart(SB), DX 36 MOVQ DX, 0(BX) 37 38 // call target function 39 MOVQ 8(SP), DI 40 MOVQ SP, SI 41 ADDQ $16, SI 42 MOVQ $0, DX // dlen is ignored for local calls 43 MOVQ SP, CX 44 ADDQ $40, CX 45 46 MOVQ BX, SP // <- change stack just before call. 47 LEAQ rump_syscall(SB), AX 48 CALL AX 49 MOVQ AX, DX // errno 50 51 // SWITCH BACK 52 get_tls(CX) 53 MOVQ g(CX), AX 54 MOVQ g_m(AX), BX 55 MOVQ m_curg(BX), AX 56 MOVQ AX, g(CX) 57 MOVQ (g_sched+gobuf_sp)(AX), SP 58 MOVQ $0, (g_sched+gobuf_sp)(AX) 59 60 MOVQ DX, 56(SP) // errno 61 62 63 CALL runtime·exitsyscall(SB) 64 RET 65 66 67 68 TEXT runtime·rt0_go(SB),NOSPLIT,$0 69 // copy arguments forward on an even stack 70 MOVQ DI, AX // argc 71 MOVQ SI, BX // argv 72 SUBQ $(4*8+7), SP // 2args 2auto 73 ANDQ $~15, SP 74 MOVQ AX, 16(SP) 75 MOVQ BX, 24(SP) 76 77 // create istack out of the given (operating system) stack. 78 // _cgo_init may update stackguard. 79 MOVQ $runtime·g0(SB), DI 80 LEAQ (-64*1024+104)(SP), BX 81 MOVQ BX, g_stackguard0(DI) 82 MOVQ BX, g_stackguard1(DI) 83 MOVQ BX, (g_stack+stack_lo)(DI) 84 MOVQ SP, (g_stack+stack_hi)(DI) 85 86 // find out information about the processor we're on 87 MOVQ $0, AX 88 CPUID 89 CMPQ AX, $0 90 JE nocpuinfo 91 92 // Figure out how to serialize RDTSC. 93 // On Intel processors LFENCE is enough. AMD requires MFENCE. 94 // Don't know about the rest, so let's do MFENCE. 95 CMPL BX, $0x756E6547 // "Genu" 96 JNE notintel 97 CMPL DX, $0x49656E69 // "ineI" 98 JNE notintel 99 CMPL CX, $0x6C65746E // "ntel" 100 JNE notintel 101 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 102 notintel: 103 104 MOVQ $1, AX 105 CPUID 106 MOVL CX, runtime·cpuid_ecx(SB) 107 MOVL DX, runtime·cpuid_edx(SB) 108 nocpuinfo: 109 110 // if there is an _cgo_init, call it. 111 MOVQ _cgo_init(SB), AX 112 TESTQ AX, AX 113 JZ needtls 114 // g0 already in DI 115 MOVQ DI, CX // Win64 uses CX for first parameter 116 MOVQ $setg_gcc<>(SB), SI 117 CALL AX 118 119 // update stackguard after _cgo_init 120 MOVQ $runtime·g0(SB), CX 121 MOVQ (g_stack+stack_lo)(CX), AX 122 ADDQ $const__StackGuard, AX 123 MOVQ AX, g_stackguard0(CX) 124 MOVQ AX, g_stackguard1(CX) 125 126 CMPL runtime·iswindows(SB), $0 127 JEQ ok 128 needtls: 129 // skip TLS setup on Plan 9 130 CMPL runtime·isplan9(SB), $1 131 JEQ ok 132 // skip TLS setup on Solaris 133 CMPL runtime·issolaris(SB), $1 134 JEQ ok 135 136 LEAQ runtime·tls0(SB), DI 137 CALL runtime·settls(SB) 138 139 // store through it, to make sure it works 140 get_tls(BX) 141 MOVQ $0x123, g(BX) 142 MOVQ runtime·tls0(SB), AX 143 CMPQ AX, $0x123 144 JEQ 2(PC) 145 MOVL AX, 0 // abort 146 ok: 147 // set the per-goroutine and per-mach "registers" 148 get_tls(BX) 149 LEAQ runtime·g0(SB), CX 150 MOVQ CX, g(BX) 151 LEAQ runtime·m0(SB), AX 152 153 // save m->g0 = g0 154 MOVQ CX, m_g0(AX) 155 // save m0 to g0->m 156 MOVQ AX, g_m(CX) 157 158 CLD // convention is D is always left cleared 159 CALL runtime·check(SB) 160 161 MOVL 16(SP), AX // copy argc 162 MOVL AX, 0(SP) 163 MOVQ 24(SP), AX // copy argv 164 MOVQ AX, 8(SP) 165 CALL runtime·args(SB) 166 CALL runtime·osinit(SB) 167 CALL runtime·schedinit(SB) 168 169 // create a new goroutine to start program 170 MOVQ $runtime·mainPC(SB), AX // entry 171 PUSHQ AX 172 PUSHQ $0 // arg size 173 CALL runtime·newproc(SB) 174 POPQ AX 175 POPQ AX 176 177 // start this M 178 CALL runtime·mstart(SB) 179 180 MOVL $0xf1, 0xf1 // crash 181 RET 182 183 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) 184 GLOBL runtime·mainPC(SB),RODATA,$8 185 186 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 187 BYTE $0xcc 188 RET 189 190 TEXT runtime·asminit(SB),NOSPLIT,$0-0 191 // No per-thread init. 192 RET 193 194 /* 195 * go-routine 196 */ 197 198 // void gosave(Gobuf*) 199 // save state in Gobuf; setjmp 200 TEXT runtime·gosave(SB), NOSPLIT, $0-8 201 MOVQ buf+0(FP), AX // gobuf 202 LEAQ buf+0(FP), BX // caller's SP 203 MOVQ BX, gobuf_sp(AX) 204 MOVQ 0(SP), BX // caller's PC 205 MOVQ BX, gobuf_pc(AX) 206 MOVQ $0, gobuf_ret(AX) 207 MOVQ $0, gobuf_ctxt(AX) 208 MOVQ BP, gobuf_bp(AX) 209 get_tls(CX) 210 MOVQ g(CX), BX 211 MOVQ BX, gobuf_g(AX) 212 RET 213 214 // void gogo(Gobuf*) 215 // restore state from Gobuf; longjmp 216 TEXT runtime·gogo(SB), NOSPLIT, $0-8 217 MOVQ buf+0(FP), BX // gobuf 218 MOVQ gobuf_g(BX), DX 219 MOVQ 0(DX), CX // make sure g != nil 220 get_tls(CX) 221 MOVQ DX, g(CX) 222 MOVQ gobuf_sp(BX), SP // restore SP 223 MOVQ gobuf_ret(BX), AX 224 MOVQ gobuf_ctxt(BX), DX 225 MOVQ gobuf_bp(BX), BP 226 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 227 MOVQ $0, gobuf_ret(BX) 228 MOVQ $0, gobuf_ctxt(BX) 229 MOVQ $0, gobuf_bp(BX) 230 MOVQ gobuf_pc(BX), BX 231 JMP BX 232 233 // func mcall(fn func(*g)) 234 // Switch to m->g0's stack, call fn(g). 235 // Fn must never return. It should gogo(&g->sched) 236 // to keep running g. 237 TEXT runtime·mcall(SB), NOSPLIT, $0-8 238 MOVQ fn+0(FP), DI 239 240 get_tls(CX) 241 MOVQ g(CX), AX // save state in g->sched 242 MOVQ 0(SP), BX // caller's PC 243 MOVQ BX, (g_sched+gobuf_pc)(AX) 244 LEAQ fn+0(FP), BX // caller's SP 245 MOVQ BX, (g_sched+gobuf_sp)(AX) 246 MOVQ AX, (g_sched+gobuf_g)(AX) 247 MOVQ BP, (g_sched+gobuf_bp)(AX) 248 249 // switch to m->g0 & its stack, call fn 250 MOVQ g(CX), BX 251 MOVQ g_m(BX), BX 252 MOVQ m_g0(BX), SI 253 CMPQ SI, AX // if g == m->g0 call badmcall 254 JNE 3(PC) 255 MOVQ $runtime·badmcall(SB), AX 256 JMP AX 257 MOVQ SI, g(CX) // g = m->g0 258 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 259 PUSHQ AX 260 MOVQ DI, DX 261 MOVQ 0(DI), DI 262 CALL DI 263 POPQ AX 264 MOVQ $runtime·badmcall2(SB), AX 265 JMP AX 266 RET 267 268 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 269 // of the G stack. We need to distinguish the routine that 270 // lives at the bottom of the G stack from the one that lives 271 // at the top of the system stack because the one at the top of 272 // the system stack terminates the stack walk (see topofstack()). 273 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 274 RET 275 276 // func systemstack(fn func()) 277 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 278 MOVQ fn+0(FP), DI // DI = fn 279 get_tls(CX) 280 MOVQ g(CX), AX // AX = g 281 MOVQ g_m(AX), BX // BX = m 282 283 MOVQ m_gsignal(BX), DX // DX = gsignal 284 CMPQ AX, DX 285 JEQ noswitch 286 287 MOVQ m_g0(BX), DX // DX = g0 288 CMPQ AX, DX 289 JEQ noswitch 290 291 MOVQ m_curg(BX), R8 292 CMPQ AX, R8 293 JEQ switch 294 295 // Bad: g is not gsignal, not g0, not curg. What is it? 296 MOVQ $runtime·badsystemstack(SB), AX 297 CALL AX 298 299 switch: 300 // save our state in g->sched. Pretend to 301 // be systemstack_switch if the G stack is scanned. 302 MOVQ $runtime·systemstack_switch(SB), SI 303 MOVQ SI, (g_sched+gobuf_pc)(AX) 304 MOVQ SP, (g_sched+gobuf_sp)(AX) 305 MOVQ AX, (g_sched+gobuf_g)(AX) 306 MOVQ BP, (g_sched+gobuf_bp)(AX) 307 308 // switch to g0 309 MOVQ DX, g(CX) 310 MOVQ (g_sched+gobuf_sp)(DX), BX 311 // make it look like mstart called systemstack on g0, to stop traceback 312 SUBQ $8, BX 313 MOVQ $runtime·mstart(SB), DX 314 MOVQ DX, 0(BX) 315 MOVQ BX, SP 316 317 // call target function 318 MOVQ DI, DX 319 MOVQ 0(DI), DI 320 CALL DI 321 322 // switch back to g 323 get_tls(CX) 324 MOVQ g(CX), AX 325 MOVQ g_m(AX), BX 326 MOVQ m_curg(BX), AX 327 MOVQ AX, g(CX) 328 MOVQ (g_sched+gobuf_sp)(AX), SP 329 MOVQ $0, (g_sched+gobuf_sp)(AX) 330 RET 331 332 noswitch: 333 // already on m stack, just call directly 334 MOVQ DI, DX 335 MOVQ 0(DI), DI 336 CALL DI 337 RET 338 339 /* 340 * support for morestack 341 */ 342 343 // Called during function prolog when more stack is needed. 344 // 345 // The traceback routines see morestack on a g0 as being 346 // the top of a stack (for example, morestack calling newstack 347 // calling the scheduler calling newm calling gc), so we must 348 // record an argument size. For that purpose, it has no arguments. 349 TEXT runtime·morestack(SB),NOSPLIT,$0-0 350 // Cannot grow scheduler stack (m->g0). 351 get_tls(CX) 352 MOVQ g(CX), BX 353 MOVQ g_m(BX), BX 354 MOVQ m_g0(BX), SI 355 CMPQ g(CX), SI 356 JNE 2(PC) 357 INT $3 358 359 // Cannot grow signal stack (m->gsignal). 360 MOVQ m_gsignal(BX), SI 361 CMPQ g(CX), SI 362 JNE 2(PC) 363 INT $3 364 365 // Called from f. 366 // Set m->morebuf to f's caller. 367 MOVQ 8(SP), AX // f's caller's PC 368 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 369 LEAQ 16(SP), AX // f's caller's SP 370 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 371 get_tls(CX) 372 MOVQ g(CX), SI 373 MOVQ SI, (m_morebuf+gobuf_g)(BX) 374 375 // Set g->sched to context in f. 376 MOVQ 0(SP), AX // f's PC 377 MOVQ AX, (g_sched+gobuf_pc)(SI) 378 MOVQ SI, (g_sched+gobuf_g)(SI) 379 LEAQ 8(SP), AX // f's SP 380 MOVQ AX, (g_sched+gobuf_sp)(SI) 381 MOVQ DX, (g_sched+gobuf_ctxt)(SI) 382 MOVQ BP, (g_sched+gobuf_bp)(SI) 383 384 // Call newstack on m->g0's stack. 385 MOVQ m_g0(BX), BX 386 MOVQ BX, g(CX) 387 MOVQ (g_sched+gobuf_sp)(BX), SP 388 CALL runtime·newstack(SB) 389 MOVQ $0, 0x1003 // crash if newstack returns 390 RET 391 392 // morestack but not preserving ctxt. 393 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 394 MOVL $0, DX 395 JMP runtime·morestack(SB) 396 397 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 398 // We came here via a RET to an overwritten return PC. 399 // AX may be live. Other registers are available. 400 401 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 402 get_tls(CX) 403 MOVQ g(CX), CX 404 MOVQ (g_stkbar+slice_array)(CX), DX 405 MOVQ g_stkbarPos(CX), BX 406 IMULQ $stkbar__size, BX // Too big for SIB. 407 MOVQ stkbar_savedLRVal(DX)(BX*1), BX 408 // Record that this stack barrier was hit. 409 ADDQ $1, g_stkbarPos(CX) 410 // Jump to the original return PC. 411 JMP BX 412 413 // reflectcall: call a function with the given argument list 414 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 415 // we don't have variable-sized frames, so we use a small number 416 // of constant-sized-frame functions to encode a few bits of size in the pc. 417 // Caution: ugly multiline assembly macros in your future! 418 419 #define DISPATCH(NAME,MAXSIZE) \ 420 CMPQ CX, $MAXSIZE; \ 421 JA 3(PC); \ 422 MOVQ $NAME(SB), AX; \ 423 JMP AX 424 // Note: can't just "JMP NAME(SB)" - bad inlining results. 425 426 TEXT reflect·call(SB), NOSPLIT, $0-0 427 JMP ·reflectcall(SB) 428 429 TEXT ·reflectcall(SB), NOSPLIT, $0-32 430 MOVLQZX argsize+24(FP), CX 431 // NOTE(rsc): No call16, because CALLFN needs four words 432 // of argument space to invoke callwritebarrier. 433 DISPATCH(runtime·call32, 32) 434 DISPATCH(runtime·call64, 64) 435 DISPATCH(runtime·call128, 128) 436 DISPATCH(runtime·call256, 256) 437 DISPATCH(runtime·call512, 512) 438 DISPATCH(runtime·call1024, 1024) 439 DISPATCH(runtime·call2048, 2048) 440 DISPATCH(runtime·call4096, 4096) 441 DISPATCH(runtime·call8192, 8192) 442 DISPATCH(runtime·call16384, 16384) 443 DISPATCH(runtime·call32768, 32768) 444 DISPATCH(runtime·call65536, 65536) 445 DISPATCH(runtime·call131072, 131072) 446 DISPATCH(runtime·call262144, 262144) 447 DISPATCH(runtime·call524288, 524288) 448 DISPATCH(runtime·call1048576, 1048576) 449 DISPATCH(runtime·call2097152, 2097152) 450 DISPATCH(runtime·call4194304, 4194304) 451 DISPATCH(runtime·call8388608, 8388608) 452 DISPATCH(runtime·call16777216, 16777216) 453 DISPATCH(runtime·call33554432, 33554432) 454 DISPATCH(runtime·call67108864, 67108864) 455 DISPATCH(runtime·call134217728, 134217728) 456 DISPATCH(runtime·call268435456, 268435456) 457 DISPATCH(runtime·call536870912, 536870912) 458 DISPATCH(runtime·call1073741824, 1073741824) 459 MOVQ $runtime·badreflectcall(SB), AX 460 JMP AX 461 462 #define CALLFN(NAME,MAXSIZE) \ 463 TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \ 464 NO_LOCAL_POINTERS; \ 465 /* copy arguments to stack */ \ 466 MOVQ argptr+16(FP), SI; \ 467 MOVLQZX argsize+24(FP), CX; \ 468 MOVQ SP, DI; \ 469 REP;MOVSB; \ 470 /* call function */ \ 471 MOVQ f+8(FP), DX; \ 472 PCDATA $PCDATA_StackMapIndex, $0; \ 473 CALL (DX); \ 474 /* copy return values back */ \ 475 MOVQ argptr+16(FP), DI; \ 476 MOVLQZX argsize+24(FP), CX; \ 477 MOVLQZX retoffset+28(FP), BX; \ 478 MOVQ SP, SI; \ 479 ADDQ BX, DI; \ 480 ADDQ BX, SI; \ 481 SUBQ BX, CX; \ 482 REP;MOVSB; \ 483 /* execute write barrier updates */ \ 484 MOVQ argtype+0(FP), DX; \ 485 MOVQ argptr+16(FP), DI; \ 486 MOVLQZX argsize+24(FP), CX; \ 487 MOVLQZX retoffset+28(FP), BX; \ 488 MOVQ DX, 0(SP); \ 489 MOVQ DI, 8(SP); \ 490 MOVQ CX, 16(SP); \ 491 MOVQ BX, 24(SP); \ 492 CALL runtime·callwritebarrier(SB); \ 493 RET 494 495 CALLFN(·call32, 32) 496 CALLFN(·call64, 64) 497 CALLFN(·call128, 128) 498 CALLFN(·call256, 256) 499 CALLFN(·call512, 512) 500 CALLFN(·call1024, 1024) 501 CALLFN(·call2048, 2048) 502 CALLFN(·call4096, 4096) 503 CALLFN(·call8192, 8192) 504 CALLFN(·call16384, 16384) 505 CALLFN(·call32768, 32768) 506 CALLFN(·call65536, 65536) 507 CALLFN(·call131072, 131072) 508 CALLFN(·call262144, 262144) 509 CALLFN(·call524288, 524288) 510 CALLFN(·call1048576, 1048576) 511 CALLFN(·call2097152, 2097152) 512 CALLFN(·call4194304, 4194304) 513 CALLFN(·call8388608, 8388608) 514 CALLFN(·call16777216, 16777216) 515 CALLFN(·call33554432, 33554432) 516 CALLFN(·call67108864, 67108864) 517 CALLFN(·call134217728, 134217728) 518 CALLFN(·call268435456, 268435456) 519 CALLFN(·call536870912, 536870912) 520 CALLFN(·call1073741824, 1073741824) 521 522 // bool cas(int32 *val, int32 old, int32 new) 523 // Atomically: 524 // if(*val == old){ 525 // *val = new; 526 // return 1; 527 // } else 528 // return 0; 529 TEXT runtime·cas(SB), NOSPLIT, $0-17 530 MOVQ ptr+0(FP), BX 531 MOVL old+8(FP), AX 532 MOVL new+12(FP), CX 533 LOCK 534 CMPXCHGL CX, 0(BX) 535 SETEQ ret+16(FP) 536 RET 537 538 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 539 // Atomically: 540 // if(*val == *old){ 541 // *val = new; 542 // return 1; 543 // } else { 544 // return 0; 545 // } 546 TEXT runtime·cas64(SB), NOSPLIT, $0-25 547 MOVQ ptr+0(FP), BX 548 MOVQ old+8(FP), AX 549 MOVQ new+16(FP), CX 550 LOCK 551 CMPXCHGQ CX, 0(BX) 552 SETEQ ret+24(FP) 553 RET 554 555 TEXT runtime·casuintptr(SB), NOSPLIT, $0-25 556 JMP runtime·cas64(SB) 557 558 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-16 559 JMP runtime·atomicload64(SB) 560 561 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-16 562 JMP runtime·atomicload64(SB) 563 564 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16 565 JMP runtime·atomicstore64(SB) 566 567 // bool casp(void **val, void *old, void *new) 568 // Atomically: 569 // if(*val == old){ 570 // *val = new; 571 // return 1; 572 // } else 573 // return 0; 574 TEXT runtime·casp1(SB), NOSPLIT, $0-25 575 MOVQ ptr+0(FP), BX 576 MOVQ old+8(FP), AX 577 MOVQ new+16(FP), CX 578 LOCK 579 CMPXCHGQ CX, 0(BX) 580 SETEQ ret+24(FP) 581 RET 582 583 // uint32 xadd(uint32 volatile *val, int32 delta) 584 // Atomically: 585 // *val += delta; 586 // return *val; 587 TEXT runtime·xadd(SB), NOSPLIT, $0-20 588 MOVQ ptr+0(FP), BX 589 MOVL delta+8(FP), AX 590 MOVL AX, CX 591 LOCK 592 XADDL AX, 0(BX) 593 ADDL CX, AX 594 MOVL AX, ret+16(FP) 595 RET 596 597 TEXT runtime·xadd64(SB), NOSPLIT, $0-24 598 MOVQ ptr+0(FP), BX 599 MOVQ delta+8(FP), AX 600 MOVQ AX, CX 601 LOCK 602 XADDQ AX, 0(BX) 603 ADDQ CX, AX 604 MOVQ AX, ret+16(FP) 605 RET 606 607 TEXT runtime·xadduintptr(SB), NOSPLIT, $0-24 608 JMP runtime·xadd64(SB) 609 610 TEXT runtime·xchg(SB), NOSPLIT, $0-20 611 MOVQ ptr+0(FP), BX 612 MOVL new+8(FP), AX 613 XCHGL AX, 0(BX) 614 MOVL AX, ret+16(FP) 615 RET 616 617 TEXT runtime·xchg64(SB), NOSPLIT, $0-24 618 MOVQ ptr+0(FP), BX 619 MOVQ new+8(FP), AX 620 XCHGQ AX, 0(BX) 621 MOVQ AX, ret+16(FP) 622 RET 623 624 TEXT runtime·xchgp1(SB), NOSPLIT, $0-24 625 MOVQ ptr+0(FP), BX 626 MOVQ new+8(FP), AX 627 XCHGQ AX, 0(BX) 628 MOVQ AX, ret+16(FP) 629 RET 630 631 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24 632 JMP runtime·xchg64(SB) 633 634 TEXT runtime·procyield(SB),NOSPLIT,$0-0 635 MOVL cycles+0(FP), AX 636 again: 637 PAUSE 638 SUBL $1, AX 639 JNZ again 640 RET 641 642 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-16 643 MOVQ ptr+0(FP), BX 644 MOVQ val+8(FP), AX 645 XCHGQ AX, 0(BX) 646 RET 647 648 TEXT runtime·atomicstore(SB), NOSPLIT, $0-12 649 MOVQ ptr+0(FP), BX 650 MOVL val+8(FP), AX 651 XCHGL AX, 0(BX) 652 RET 653 654 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16 655 MOVQ ptr+0(FP), BX 656 MOVQ val+8(FP), AX 657 XCHGQ AX, 0(BX) 658 RET 659 660 // void runtime·atomicor8(byte volatile*, byte); 661 TEXT runtime·atomicor8(SB), NOSPLIT, $0-9 662 MOVQ ptr+0(FP), AX 663 MOVB val+8(FP), BX 664 LOCK 665 ORB BX, (AX) 666 RET 667 668 // void runtime·atomicand8(byte volatile*, byte); 669 TEXT runtime·atomicand8(SB), NOSPLIT, $0-9 670 MOVQ ptr+0(FP), AX 671 MOVB val+8(FP), BX 672 LOCK 673 ANDB BX, (AX) 674 RET 675 676 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 677 // Stores are already ordered on x86, so this is just a 678 // compile barrier. 679 RET 680 681 // void jmpdefer(fn, sp); 682 // called from deferreturn. 683 // 1. pop the caller 684 // 2. sub 5 bytes from the callers return 685 // 3. jmp to the argument 686 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 687 MOVQ fv+0(FP), DX // fn 688 MOVQ argp+8(FP), BX // caller sp 689 LEAQ -8(BX), SP // caller sp after CALL 690 SUBQ $5, (SP) // return to CALL again 691 MOVQ 0(DX), BX 692 JMP BX // but first run the deferred function 693 694 // Save state of caller into g->sched. Smashes R8, R9. 695 TEXT gosave<>(SB),NOSPLIT,$0 696 get_tls(R8) 697 MOVQ g(R8), R8 698 MOVQ 0(SP), R9 699 MOVQ R9, (g_sched+gobuf_pc)(R8) 700 LEAQ 8(SP), R9 701 MOVQ R9, (g_sched+gobuf_sp)(R8) 702 MOVQ $0, (g_sched+gobuf_ret)(R8) 703 MOVQ $0, (g_sched+gobuf_ctxt)(R8) 704 MOVQ BP, (g_sched+gobuf_bp)(R8) 705 RET 706 707 // func asmcgocall(fn, arg unsafe.Pointer) int32 708 // Call fn(arg) on the scheduler stack, 709 // aligned appropriately for the gcc ABI. 710 // See cgocall.go for more details. 711 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 712 MOVQ fn+0(FP), AX 713 MOVQ arg+8(FP), BX 714 715 MOVQ SP, DX 716 717 // Figure out if we need to switch to m->g0 stack. 718 // We get called to create new OS threads too, and those 719 // come in on the m->g0 stack already. 720 get_tls(CX) 721 MOVQ g(CX), R8 722 MOVQ g_m(R8), R8 723 MOVQ m_g0(R8), SI 724 MOVQ g(CX), DI 725 CMPQ SI, DI 726 JEQ nosave 727 MOVQ m_gsignal(R8), SI 728 CMPQ SI, DI 729 JEQ nosave 730 731 MOVQ m_g0(R8), SI 732 CALL gosave<>(SB) 733 MOVQ SI, g(CX) 734 MOVQ (g_sched+gobuf_sp)(SI), SP 735 nosave: 736 737 // Now on a scheduling stack (a pthread-created stack). 738 // Make sure we have enough room for 4 stack-backed fast-call 739 // registers as per windows amd64 calling convention. 740 SUBQ $64, SP 741 ANDQ $~15, SP // alignment for gcc ABI 742 MOVQ DI, 48(SP) // save g 743 MOVQ (g_stack+stack_hi)(DI), DI 744 SUBQ DX, DI 745 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 746 MOVQ BX, DI // DI = first argument in AMD64 ABI 747 MOVQ BX, CX // CX = first argument in Win64 748 CALL AX 749 750 // Restore registers, g, stack pointer. 751 get_tls(CX) 752 MOVQ 48(SP), DI 753 MOVQ (g_stack+stack_hi)(DI), SI 754 SUBQ 40(SP), SI 755 MOVQ DI, g(CX) 756 MOVQ SI, SP 757 758 MOVL AX, ret+16(FP) 759 RET 760 761 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 762 // Turn the fn into a Go func (by taking its address) and call 763 // cgocallback_gofunc. 764 TEXT runtime·cgocallback(SB),NOSPLIT,$24-24 765 LEAQ fn+0(FP), AX 766 MOVQ AX, 0(SP) 767 MOVQ frame+8(FP), AX 768 MOVQ AX, 8(SP) 769 MOVQ framesize+16(FP), AX 770 MOVQ AX, 16(SP) 771 MOVQ $runtime·cgocallback_gofunc(SB), AX 772 CALL AX 773 RET 774 775 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 776 // See cgocall.go for more details. 777 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24 778 NO_LOCAL_POINTERS 779 780 // If g is nil, Go did not create the current thread. 781 // Call needm to obtain one m for temporary use. 782 // In this case, we're running on the thread stack, so there's 783 // lots of space, but the linker doesn't know. Hide the call from 784 // the linker analysis by using an indirect call through AX. 785 get_tls(CX) 786 #ifdef GOOS_windows 787 MOVL $0, BX 788 CMPQ CX, $0 789 JEQ 2(PC) 790 #endif 791 MOVQ g(CX), BX 792 CMPQ BX, $0 793 JEQ needm 794 MOVQ g_m(BX), BX 795 MOVQ BX, R8 // holds oldm until end of function 796 JMP havem 797 needm: 798 MOVQ $0, 0(SP) 799 MOVQ $runtime·needm(SB), AX 800 CALL AX 801 MOVQ 0(SP), R8 802 get_tls(CX) 803 MOVQ g(CX), BX 804 MOVQ g_m(BX), BX 805 806 // Set m->sched.sp = SP, so that if a panic happens 807 // during the function we are about to execute, it will 808 // have a valid SP to run on the g0 stack. 809 // The next few lines (after the havem label) 810 // will save this SP onto the stack and then write 811 // the same SP back to m->sched.sp. That seems redundant, 812 // but if an unrecovered panic happens, unwindm will 813 // restore the g->sched.sp from the stack location 814 // and then systemstack will try to use it. If we don't set it here, 815 // that restored SP will be uninitialized (typically 0) and 816 // will not be usable. 817 MOVQ m_g0(BX), SI 818 MOVQ SP, (g_sched+gobuf_sp)(SI) 819 820 havem: 821 // Now there's a valid m, and we're running on its m->g0. 822 // Save current m->g0->sched.sp on stack and then set it to SP. 823 // Save current sp in m->g0->sched.sp in preparation for 824 // switch back to m->curg stack. 825 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 826 MOVQ m_g0(BX), SI 827 MOVQ (g_sched+gobuf_sp)(SI), AX 828 MOVQ AX, 0(SP) 829 MOVQ SP, (g_sched+gobuf_sp)(SI) 830 831 // Switch to m->curg stack and call runtime.cgocallbackg. 832 // Because we are taking over the execution of m->curg 833 // but *not* resuming what had been running, we need to 834 // save that information (m->curg->sched) so we can restore it. 835 // We can restore m->curg->sched.sp easily, because calling 836 // runtime.cgocallbackg leaves SP unchanged upon return. 837 // To save m->curg->sched.pc, we push it onto the stack. 838 // This has the added benefit that it looks to the traceback 839 // routine like cgocallbackg is going to return to that 840 // PC (because the frame we allocate below has the same 841 // size as cgocallback_gofunc's frame declared above) 842 // so that the traceback will seamlessly trace back into 843 // the earlier calls. 844 // 845 // In the new goroutine, 0(SP) holds the saved R8. 846 MOVQ m_curg(BX), SI 847 MOVQ SI, g(CX) 848 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 849 MOVQ (g_sched+gobuf_pc)(SI), BX 850 MOVQ BX, -8(DI) 851 // Compute the size of the frame, including return PC and, if 852 // GOEXPERIMENT=framepointer, the saved based pointer 853 LEAQ fv+0(FP), AX 854 SUBQ SP, AX 855 SUBQ AX, DI 856 MOVQ DI, SP 857 858 MOVQ R8, 0(SP) 859 CALL runtime·cgocallbackg(SB) 860 MOVQ 0(SP), R8 861 862 // Compute the size of the frame again. FP and SP have 863 // completely different values here than they did above, 864 // but only their difference matters. 865 LEAQ fv+0(FP), AX 866 SUBQ SP, AX 867 868 // Restore g->sched (== m->curg->sched) from saved values. 869 get_tls(CX) 870 MOVQ g(CX), SI 871 MOVQ SP, DI 872 ADDQ AX, DI 873 MOVQ -8(DI), BX 874 MOVQ BX, (g_sched+gobuf_pc)(SI) 875 MOVQ DI, (g_sched+gobuf_sp)(SI) 876 877 // Switch back to m->g0's stack and restore m->g0->sched.sp. 878 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 879 // so we do not have to restore it.) 880 MOVQ g(CX), BX 881 MOVQ g_m(BX), BX 882 MOVQ m_g0(BX), SI 883 MOVQ SI, g(CX) 884 MOVQ (g_sched+gobuf_sp)(SI), SP 885 MOVQ 0(SP), AX 886 MOVQ AX, (g_sched+gobuf_sp)(SI) 887 888 // If the m on entry was nil, we called needm above to borrow an m 889 // for the duration of the call. Since the call is over, return it with dropm. 890 CMPQ R8, $0 891 JNE 3(PC) 892 MOVQ $runtime·dropm(SB), AX 893 CALL AX 894 895 // Done! 896 RET 897 898 // void setg(G*); set g. for use by needm. 899 TEXT runtime·setg(SB), NOSPLIT, $0-8 900 MOVQ gg+0(FP), BX 901 #ifdef GOOS_windows 902 CMPQ BX, $0 903 JNE settls 904 MOVQ $0, 0x28(GS) 905 RET 906 settls: 907 MOVQ g_m(BX), AX 908 LEAQ m_tls(AX), AX 909 MOVQ AX, 0x28(GS) 910 #endif 911 get_tls(CX) 912 MOVQ BX, g(CX) 913 RET 914 915 // void setg_gcc(G*); set g called from gcc. 916 TEXT setg_gcc<>(SB),NOSPLIT,$0 917 get_tls(AX) 918 MOVQ DI, g(AX) 919 RET 920 921 // check that SP is in range [g->stack.lo, g->stack.hi) 922 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 923 get_tls(CX) 924 MOVQ g(CX), AX 925 CMPQ (g_stack+stack_hi)(AX), SP 926 JHI 2(PC) 927 INT $3 928 CMPQ SP, (g_stack+stack_lo)(AX) 929 JHI 2(PC) 930 INT $3 931 RET 932 933 TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16 934 MOVQ argp+0(FP),AX // addr of first arg 935 MOVQ -8(AX),AX // get calling pc 936 CMPQ AX, runtime·stackBarrierPC(SB) 937 JNE nobar 938 // Get original return PC. 939 CALL runtime·nextBarrierPC(SB) 940 MOVQ 0(SP), AX 941 nobar: 942 MOVQ AX, ret+8(FP) 943 RET 944 945 TEXT runtime·setcallerpc(SB),NOSPLIT,$8-16 946 MOVQ argp+0(FP),AX // addr of first arg 947 MOVQ pc+8(FP), BX 948 MOVQ -8(AX), CX 949 CMPQ CX, runtime·stackBarrierPC(SB) 950 JEQ setbar 951 MOVQ BX, -8(AX) // set calling pc 952 RET 953 setbar: 954 // Set the stack barrier return PC. 955 MOVQ BX, 0(SP) 956 CALL runtime·setNextBarrierPC(SB) 957 RET 958 959 TEXT runtime·getcallersp(SB),NOSPLIT,$0-16 960 MOVQ argp+0(FP), AX 961 MOVQ AX, ret+8(FP) 962 RET 963 964 // func cputicks() int64 965 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 966 CMPB runtime·lfenceBeforeRdtsc(SB), $1 967 JNE mfence 968 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 969 JMP done 970 mfence: 971 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 972 done: 973 RDTSC 974 SHLQ $32, DX 975 ADDQ DX, AX 976 MOVQ AX, ret+0(FP) 977 RET 978 979 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 980 // redirects to memhash(p, h, size) using the size 981 // stored in the closure. 982 TEXT runtime·memhash_varlen(SB),NOSPLIT,$32-24 983 GO_ARGS 984 NO_LOCAL_POINTERS 985 MOVQ p+0(FP), AX 986 MOVQ h+8(FP), BX 987 MOVQ 8(DX), CX 988 MOVQ AX, 0(SP) 989 MOVQ BX, 8(SP) 990 MOVQ CX, 16(SP) 991 CALL runtime·memhash(SB) 992 MOVQ 24(SP), AX 993 MOVQ AX, ret+16(FP) 994 RET 995 996 // hash function using AES hardware instructions 997 TEXT runtime·aeshash(SB),NOSPLIT,$0-32 998 MOVQ p+0(FP), AX // ptr to data 999 MOVQ s+16(FP), CX // size 1000 LEAQ ret+24(FP), DX 1001 JMP runtime·aeshashbody(SB) 1002 1003 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24 1004 MOVQ p+0(FP), AX // ptr to string struct 1005 MOVQ 8(AX), CX // length of string 1006 MOVQ (AX), AX // string data 1007 LEAQ ret+16(FP), DX 1008 JMP runtime·aeshashbody(SB) 1009 1010 // AX: data 1011 // CX: length 1012 // DX: address to put return value 1013 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 1014 MOVQ h+8(FP), X6 // seed to low 64 bits of xmm6 1015 PINSRQ $1, CX, X6 // size to high 64 bits of xmm6 1016 PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times 1017 MOVO runtime·aeskeysched(SB), X7 1018 CMPQ CX, $16 1019 JB aes0to15 1020 JE aes16 1021 CMPQ CX, $32 1022 JBE aes17to32 1023 CMPQ CX, $64 1024 JBE aes33to64 1025 CMPQ CX, $128 1026 JBE aes65to128 1027 JMP aes129plus 1028 1029 aes0to15: 1030 TESTQ CX, CX 1031 JE aes0 1032 1033 ADDQ $16, AX 1034 TESTW $0xff0, AX 1035 JE endofpage 1036 1037 // 16 bytes loaded at this address won't cross 1038 // a page boundary, so we can load it directly. 1039 MOVOU -16(AX), X0 1040 ADDQ CX, CX 1041 MOVQ $masks<>(SB), AX 1042 PAND (AX)(CX*8), X0 1043 1044 // scramble 3 times 1045 AESENC X6, X0 1046 AESENC X7, X0 1047 AESENC X7, X0 1048 MOVQ X0, (DX) 1049 RET 1050 1051 endofpage: 1052 // address ends in 1111xxxx. Might be up against 1053 // a page boundary, so load ending at last byte. 1054 // Then shift bytes down using pshufb. 1055 MOVOU -32(AX)(CX*1), X0 1056 ADDQ CX, CX 1057 MOVQ $shifts<>(SB), AX 1058 PSHUFB (AX)(CX*8), X0 1059 AESENC X6, X0 1060 AESENC X7, X0 1061 AESENC X7, X0 1062 MOVQ X0, (DX) 1063 RET 1064 1065 aes0: 1066 // return input seed 1067 MOVQ h+8(FP), AX 1068 MOVQ AX, (DX) 1069 RET 1070 1071 aes16: 1072 MOVOU (AX), X0 1073 AESENC X6, X0 1074 AESENC X7, X0 1075 AESENC X7, X0 1076 MOVQ X0, (DX) 1077 RET 1078 1079 aes17to32: 1080 // load data to be hashed 1081 MOVOU (AX), X0 1082 MOVOU -16(AX)(CX*1), X1 1083 1084 // scramble 3 times 1085 AESENC X6, X0 1086 AESENC runtime·aeskeysched+16(SB), X1 1087 AESENC X7, X0 1088 AESENC X7, X1 1089 AESENC X7, X0 1090 AESENC X7, X1 1091 1092 // combine results 1093 PXOR X1, X0 1094 MOVQ X0, (DX) 1095 RET 1096 1097 aes33to64: 1098 MOVOU (AX), X0 1099 MOVOU 16(AX), X1 1100 MOVOU -32(AX)(CX*1), X2 1101 MOVOU -16(AX)(CX*1), X3 1102 1103 AESENC X6, X0 1104 AESENC runtime·aeskeysched+16(SB), X1 1105 AESENC runtime·aeskeysched+32(SB), X2 1106 AESENC runtime·aeskeysched+48(SB), X3 1107 AESENC X7, X0 1108 AESENC X7, X1 1109 AESENC X7, X2 1110 AESENC X7, X3 1111 AESENC X7, X0 1112 AESENC X7, X1 1113 AESENC X7, X2 1114 AESENC X7, X3 1115 1116 PXOR X2, X0 1117 PXOR X3, X1 1118 PXOR X1, X0 1119 MOVQ X0, (DX) 1120 RET 1121 1122 aes65to128: 1123 MOVOU (AX), X0 1124 MOVOU 16(AX), X1 1125 MOVOU 32(AX), X2 1126 MOVOU 48(AX), X3 1127 MOVOU -64(AX)(CX*1), X4 1128 MOVOU -48(AX)(CX*1), X5 1129 MOVOU -32(AX)(CX*1), X8 1130 MOVOU -16(AX)(CX*1), X9 1131 1132 AESENC X6, X0 1133 AESENC runtime·aeskeysched+16(SB), X1 1134 AESENC runtime·aeskeysched+32(SB), X2 1135 AESENC runtime·aeskeysched+48(SB), X3 1136 AESENC runtime·aeskeysched+64(SB), X4 1137 AESENC runtime·aeskeysched+80(SB), X5 1138 AESENC runtime·aeskeysched+96(SB), X8 1139 AESENC runtime·aeskeysched+112(SB), X9 1140 AESENC X7, X0 1141 AESENC X7, X1 1142 AESENC X7, X2 1143 AESENC X7, X3 1144 AESENC X7, X4 1145 AESENC X7, X5 1146 AESENC X7, X8 1147 AESENC X7, X9 1148 AESENC X7, X0 1149 AESENC X7, X1 1150 AESENC X7, X2 1151 AESENC X7, X3 1152 AESENC X7, X4 1153 AESENC X7, X5 1154 AESENC X7, X8 1155 AESENC X7, X9 1156 1157 PXOR X4, X0 1158 PXOR X5, X1 1159 PXOR X8, X2 1160 PXOR X9, X3 1161 PXOR X2, X0 1162 PXOR X3, X1 1163 PXOR X1, X0 1164 MOVQ X0, (DX) 1165 RET 1166 1167 aes129plus: 1168 // start with last (possibly overlapping) block 1169 MOVOU -128(AX)(CX*1), X0 1170 MOVOU -112(AX)(CX*1), X1 1171 MOVOU -96(AX)(CX*1), X2 1172 MOVOU -80(AX)(CX*1), X3 1173 MOVOU -64(AX)(CX*1), X4 1174 MOVOU -48(AX)(CX*1), X5 1175 MOVOU -32(AX)(CX*1), X8 1176 MOVOU -16(AX)(CX*1), X9 1177 1178 // scramble state once 1179 AESENC X6, X0 1180 AESENC runtime·aeskeysched+16(SB), X1 1181 AESENC runtime·aeskeysched+32(SB), X2 1182 AESENC runtime·aeskeysched+48(SB), X3 1183 AESENC runtime·aeskeysched+64(SB), X4 1184 AESENC runtime·aeskeysched+80(SB), X5 1185 AESENC runtime·aeskeysched+96(SB), X8 1186 AESENC runtime·aeskeysched+112(SB), X9 1187 1188 // compute number of remaining 128-byte blocks 1189 DECQ CX 1190 SHRQ $7, CX 1191 1192 aesloop: 1193 // scramble state, xor in a block 1194 MOVOU (AX), X10 1195 MOVOU 16(AX), X11 1196 MOVOU 32(AX), X12 1197 MOVOU 48(AX), X13 1198 AESENC X10, X0 1199 AESENC X11, X1 1200 AESENC X12, X2 1201 AESENC X13, X3 1202 MOVOU 64(AX), X10 1203 MOVOU 80(AX), X11 1204 MOVOU 96(AX), X12 1205 MOVOU 112(AX), X13 1206 AESENC X10, X4 1207 AESENC X11, X5 1208 AESENC X12, X8 1209 AESENC X13, X9 1210 1211 // scramble state 1212 AESENC X7, X0 1213 AESENC X7, X1 1214 AESENC X7, X2 1215 AESENC X7, X3 1216 AESENC X7, X4 1217 AESENC X7, X5 1218 AESENC X7, X8 1219 AESENC X7, X9 1220 1221 ADDQ $128, AX 1222 DECQ CX 1223 JNE aesloop 1224 1225 // 2 more scrambles to finish 1226 AESENC X7, X0 1227 AESENC X7, X1 1228 AESENC X7, X2 1229 AESENC X7, X3 1230 AESENC X7, X4 1231 AESENC X7, X5 1232 AESENC X7, X8 1233 AESENC X7, X9 1234 AESENC X7, X0 1235 AESENC X7, X1 1236 AESENC X7, X2 1237 AESENC X7, X3 1238 AESENC X7, X4 1239 AESENC X7, X5 1240 AESENC X7, X8 1241 AESENC X7, X9 1242 1243 PXOR X4, X0 1244 PXOR X5, X1 1245 PXOR X8, X2 1246 PXOR X9, X3 1247 PXOR X2, X0 1248 PXOR X3, X1 1249 PXOR X1, X0 1250 MOVQ X0, (DX) 1251 RET 1252 1253 TEXT runtime·aeshash32(SB),NOSPLIT,$0-24 1254 MOVQ p+0(FP), AX // ptr to data 1255 MOVQ h+8(FP), X0 // seed 1256 PINSRD $2, (AX), X0 // data 1257 AESENC runtime·aeskeysched+0(SB), X0 1258 AESENC runtime·aeskeysched+16(SB), X0 1259 AESENC runtime·aeskeysched+32(SB), X0 1260 MOVQ X0, ret+16(FP) 1261 RET 1262 1263 TEXT runtime·aeshash64(SB),NOSPLIT,$0-24 1264 MOVQ p+0(FP), AX // ptr to data 1265 MOVQ h+8(FP), X0 // seed 1266 PINSRQ $1, (AX), X0 // data 1267 AESENC runtime·aeskeysched+0(SB), X0 1268 AESENC runtime·aeskeysched+16(SB), X0 1269 AESENC runtime·aeskeysched+32(SB), X0 1270 MOVQ X0, ret+16(FP) 1271 RET 1272 1273 // simple mask to get rid of data in the high part of the register. 1274 DATA masks<>+0x00(SB)/8, $0x0000000000000000 1275 DATA masks<>+0x08(SB)/8, $0x0000000000000000 1276 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 1277 DATA masks<>+0x18(SB)/8, $0x0000000000000000 1278 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 1279 DATA masks<>+0x28(SB)/8, $0x0000000000000000 1280 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 1281 DATA masks<>+0x38(SB)/8, $0x0000000000000000 1282 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 1283 DATA masks<>+0x48(SB)/8, $0x0000000000000000 1284 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 1285 DATA masks<>+0x58(SB)/8, $0x0000000000000000 1286 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 1287 DATA masks<>+0x68(SB)/8, $0x0000000000000000 1288 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 1289 DATA masks<>+0x78(SB)/8, $0x0000000000000000 1290 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 1291 DATA masks<>+0x88(SB)/8, $0x0000000000000000 1292 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 1293 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 1294 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 1295 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 1296 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 1297 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 1298 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 1299 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 1300 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 1301 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 1302 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 1303 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 1304 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 1305 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 1306 GLOBL masks<>(SB),RODATA,$256 1307 1308 // these are arguments to pshufb. They move data down from 1309 // the high bytes of the register to the low bytes of the register. 1310 // index is how many bytes to move. 1311 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 1312 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 1313 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 1314 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 1315 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 1316 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 1317 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 1318 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 1319 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 1320 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 1321 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 1322 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 1323 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 1324 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 1325 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 1326 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 1327 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 1328 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 1329 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 1330 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 1331 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 1332 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 1333 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 1334 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 1335 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 1336 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 1337 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 1338 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 1339 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 1340 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 1341 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 1342 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 1343 GLOBL shifts<>(SB),RODATA,$256 1344 1345 TEXT runtime·memeq(SB),NOSPLIT,$0-25 1346 MOVQ a+0(FP), SI 1347 MOVQ b+8(FP), DI 1348 MOVQ size+16(FP), BX 1349 LEAQ ret+24(FP), AX 1350 JMP runtime·memeqbody(SB) 1351 1352 // memequal_varlen(a, b unsafe.Pointer) bool 1353 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17 1354 MOVQ a+0(FP), SI 1355 MOVQ b+8(FP), DI 1356 CMPQ SI, DI 1357 JEQ eq 1358 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure 1359 LEAQ ret+16(FP), AX 1360 JMP runtime·memeqbody(SB) 1361 eq: 1362 MOVB $1, ret+16(FP) 1363 RET 1364 1365 // eqstring tests whether two strings are equal. 1366 // The compiler guarantees that strings passed 1367 // to eqstring have equal length. 1368 // See runtime_test.go:eqstring_generic for 1369 // equivalent Go code. 1370 TEXT runtime·eqstring(SB),NOSPLIT,$0-33 1371 MOVQ s1str+0(FP), SI 1372 MOVQ s2str+16(FP), DI 1373 CMPQ SI, DI 1374 JEQ eq 1375 MOVQ s1len+8(FP), BX 1376 LEAQ v+32(FP), AX 1377 JMP runtime·memeqbody(SB) 1378 eq: 1379 MOVB $1, v+32(FP) 1380 RET 1381 1382 // a in SI 1383 // b in DI 1384 // count in BX 1385 // address of result byte in AX 1386 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1387 CMPQ BX, $8 1388 JB small 1389 1390 // 64 bytes at a time using xmm registers 1391 hugeloop: 1392 CMPQ BX, $64 1393 JB bigloop 1394 MOVOU (SI), X0 1395 MOVOU (DI), X1 1396 MOVOU 16(SI), X2 1397 MOVOU 16(DI), X3 1398 MOVOU 32(SI), X4 1399 MOVOU 32(DI), X5 1400 MOVOU 48(SI), X6 1401 MOVOU 48(DI), X7 1402 PCMPEQB X1, X0 1403 PCMPEQB X3, X2 1404 PCMPEQB X5, X4 1405 PCMPEQB X7, X6 1406 PAND X2, X0 1407 PAND X6, X4 1408 PAND X4, X0 1409 PMOVMSKB X0, DX 1410 ADDQ $64, SI 1411 ADDQ $64, DI 1412 SUBQ $64, BX 1413 CMPL DX, $0xffff 1414 JEQ hugeloop 1415 MOVB $0, (AX) 1416 RET 1417 1418 // 8 bytes at a time using 64-bit register 1419 bigloop: 1420 CMPQ BX, $8 1421 JBE leftover 1422 MOVQ (SI), CX 1423 MOVQ (DI), DX 1424 ADDQ $8, SI 1425 ADDQ $8, DI 1426 SUBQ $8, BX 1427 CMPQ CX, DX 1428 JEQ bigloop 1429 MOVB $0, (AX) 1430 RET 1431 1432 // remaining 0-8 bytes 1433 leftover: 1434 MOVQ -8(SI)(BX*1), CX 1435 MOVQ -8(DI)(BX*1), DX 1436 CMPQ CX, DX 1437 SETEQ (AX) 1438 RET 1439 1440 small: 1441 CMPQ BX, $0 1442 JEQ equal 1443 1444 LEAQ 0(BX*8), CX 1445 NEGQ CX 1446 1447 CMPB SI, $0xf8 1448 JA si_high 1449 1450 // load at SI won't cross a page boundary. 1451 MOVQ (SI), SI 1452 JMP si_finish 1453 si_high: 1454 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 1455 MOVQ -8(SI)(BX*1), SI 1456 SHRQ CX, SI 1457 si_finish: 1458 1459 // same for DI. 1460 CMPB DI, $0xf8 1461 JA di_high 1462 MOVQ (DI), DI 1463 JMP di_finish 1464 di_high: 1465 MOVQ -8(DI)(BX*1), DI 1466 SHRQ CX, DI 1467 di_finish: 1468 1469 SUBQ SI, DI 1470 SHLQ CX, DI 1471 equal: 1472 SETEQ (AX) 1473 RET 1474 1475 TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 1476 MOVQ s1_base+0(FP), SI 1477 MOVQ s1_len+8(FP), BX 1478 MOVQ s2_base+16(FP), DI 1479 MOVQ s2_len+24(FP), DX 1480 LEAQ ret+32(FP), R9 1481 JMP runtime·cmpbody(SB) 1482 1483 TEXT bytes·Compare(SB),NOSPLIT,$0-56 1484 MOVQ s1+0(FP), SI 1485 MOVQ s1+8(FP), BX 1486 MOVQ s2+24(FP), DI 1487 MOVQ s2+32(FP), DX 1488 LEAQ res+48(FP), R9 1489 JMP runtime·cmpbody(SB) 1490 1491 // input: 1492 // SI = a 1493 // DI = b 1494 // BX = alen 1495 // DX = blen 1496 // R9 = address of output word (stores -1/0/1 here) 1497 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1498 CMPQ SI, DI 1499 JEQ allsame 1500 CMPQ BX, DX 1501 MOVQ DX, R8 1502 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 1503 CMPQ R8, $8 1504 JB small 1505 1506 loop: 1507 CMPQ R8, $16 1508 JBE _0through16 1509 MOVOU (SI), X0 1510 MOVOU (DI), X1 1511 PCMPEQB X0, X1 1512 PMOVMSKB X1, AX 1513 XORQ $0xffff, AX // convert EQ to NE 1514 JNE diff16 // branch if at least one byte is not equal 1515 ADDQ $16, SI 1516 ADDQ $16, DI 1517 SUBQ $16, R8 1518 JMP loop 1519 1520 // AX = bit mask of differences 1521 diff16: 1522 BSFQ AX, BX // index of first byte that differs 1523 XORQ AX, AX 1524 MOVB (SI)(BX*1), CX 1525 CMPB CX, (DI)(BX*1) 1526 SETHI AX 1527 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 1528 MOVQ AX, (R9) 1529 RET 1530 1531 // 0 through 16 bytes left, alen>=8, blen>=8 1532 _0through16: 1533 CMPQ R8, $8 1534 JBE _0through8 1535 MOVQ (SI), AX 1536 MOVQ (DI), CX 1537 CMPQ AX, CX 1538 JNE diff8 1539 _0through8: 1540 MOVQ -8(SI)(R8*1), AX 1541 MOVQ -8(DI)(R8*1), CX 1542 CMPQ AX, CX 1543 JEQ allsame 1544 1545 // AX and CX contain parts of a and b that differ. 1546 diff8: 1547 BSWAPQ AX // reverse order of bytes 1548 BSWAPQ CX 1549 XORQ AX, CX 1550 BSRQ CX, CX // index of highest bit difference 1551 SHRQ CX, AX // move a's bit to bottom 1552 ANDQ $1, AX // mask bit 1553 LEAQ -1(AX*2), AX // 1/0 => +1/-1 1554 MOVQ AX, (R9) 1555 RET 1556 1557 // 0-7 bytes in common 1558 small: 1559 LEAQ (R8*8), CX // bytes left -> bits left 1560 NEGQ CX // - bits lift (== 64 - bits left mod 64) 1561 JEQ allsame 1562 1563 // load bytes of a into high bytes of AX 1564 CMPB SI, $0xf8 1565 JA si_high 1566 MOVQ (SI), SI 1567 JMP si_finish 1568 si_high: 1569 MOVQ -8(SI)(R8*1), SI 1570 SHRQ CX, SI 1571 si_finish: 1572 SHLQ CX, SI 1573 1574 // load bytes of b in to high bytes of BX 1575 CMPB DI, $0xf8 1576 JA di_high 1577 MOVQ (DI), DI 1578 JMP di_finish 1579 di_high: 1580 MOVQ -8(DI)(R8*1), DI 1581 SHRQ CX, DI 1582 di_finish: 1583 SHLQ CX, DI 1584 1585 BSWAPQ SI // reverse order of bytes 1586 BSWAPQ DI 1587 XORQ SI, DI // find bit differences 1588 JEQ allsame 1589 BSRQ DI, CX // index of highest bit difference 1590 SHRQ CX, SI // move a's bit to bottom 1591 ANDQ $1, SI // mask bit 1592 LEAQ -1(SI*2), AX // 1/0 => +1/-1 1593 MOVQ AX, (R9) 1594 RET 1595 1596 allsame: 1597 XORQ AX, AX 1598 XORQ CX, CX 1599 CMPQ BX, DX 1600 SETGT AX // 1 if alen > blen 1601 SETEQ CX // 1 if alen == blen 1602 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 1603 MOVQ AX, (R9) 1604 RET 1605 1606 TEXT bytes·IndexByte(SB),NOSPLIT,$0-40 1607 MOVQ s+0(FP), SI 1608 MOVQ s_len+8(FP), BX 1609 MOVB c+24(FP), AL 1610 LEAQ ret+32(FP), R8 1611 JMP runtime·indexbytebody(SB) 1612 1613 TEXT strings·IndexByte(SB),NOSPLIT,$0-32 1614 MOVQ s+0(FP), SI 1615 MOVQ s_len+8(FP), BX 1616 MOVB c+16(FP), AL 1617 LEAQ ret+24(FP), R8 1618 JMP runtime·indexbytebody(SB) 1619 1620 // input: 1621 // SI: data 1622 // BX: data len 1623 // AL: byte sought 1624 // R8: address to put result 1625 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 1626 MOVQ SI, DI 1627 1628 CMPQ BX, $16 1629 JLT small 1630 1631 // round up to first 16-byte boundary 1632 TESTQ $15, SI 1633 JZ aligned 1634 MOVQ SI, CX 1635 ANDQ $~15, CX 1636 ADDQ $16, CX 1637 1638 // search the beginning 1639 SUBQ SI, CX 1640 REPN; SCASB 1641 JZ success 1642 1643 // DI is 16-byte aligned; get ready to search using SSE instructions 1644 aligned: 1645 // round down to last 16-byte boundary 1646 MOVQ BX, R11 1647 ADDQ SI, R11 1648 ANDQ $~15, R11 1649 1650 // shuffle X0 around so that each byte contains c 1651 MOVD AX, X0 1652 PUNPCKLBW X0, X0 1653 PUNPCKLBW X0, X0 1654 PSHUFL $0, X0, X0 1655 JMP condition 1656 1657 sse: 1658 // move the next 16-byte chunk of the buffer into X1 1659 MOVO (DI), X1 1660 // compare bytes in X0 to X1 1661 PCMPEQB X0, X1 1662 // take the top bit of each byte in X1 and put the result in DX 1663 PMOVMSKB X1, DX 1664 TESTL DX, DX 1665 JNZ ssesuccess 1666 ADDQ $16, DI 1667 1668 condition: 1669 CMPQ DI, R11 1670 JLT sse 1671 1672 // search the end 1673 MOVQ SI, CX 1674 ADDQ BX, CX 1675 SUBQ R11, CX 1676 // if CX == 0, the zero flag will be set and we'll end up 1677 // returning a false success 1678 JZ failure 1679 REPN; SCASB 1680 JZ success 1681 1682 failure: 1683 MOVQ $-1, (R8) 1684 RET 1685 1686 // handle for lengths < 16 1687 small: 1688 MOVQ BX, CX 1689 REPN; SCASB 1690 JZ success 1691 MOVQ $-1, (R8) 1692 RET 1693 1694 // we've found the chunk containing the byte 1695 // now just figure out which specific byte it is 1696 ssesuccess: 1697 // get the index of the least significant set bit 1698 BSFW DX, DX 1699 SUBQ SI, DI 1700 ADDQ DI, DX 1701 MOVQ DX, (R8) 1702 RET 1703 1704 success: 1705 SUBQ SI, DI 1706 SUBL $1, DI 1707 MOVQ DI, (R8) 1708 RET 1709 1710 TEXT bytes·Equal(SB),NOSPLIT,$0-49 1711 MOVQ a_len+8(FP), BX 1712 MOVQ b_len+32(FP), CX 1713 CMPQ BX, CX 1714 JNE eqret 1715 MOVQ a+0(FP), SI 1716 MOVQ b+24(FP), DI 1717 LEAQ ret+48(FP), AX 1718 JMP runtime·memeqbody(SB) 1719 eqret: 1720 MOVB $0, ret+48(FP) 1721 RET 1722 1723 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1724 get_tls(CX) 1725 MOVQ g(CX), AX 1726 MOVQ g_m(AX), AX 1727 MOVL m_fastrand(AX), DX 1728 ADDL DX, DX 1729 MOVL DX, BX 1730 XORL $0x88888eef, DX 1731 CMOVLMI BX, DX 1732 MOVL DX, m_fastrand(AX) 1733 MOVL DX, ret+0(FP) 1734 RET 1735 1736 TEXT runtime·return0(SB), NOSPLIT, $0 1737 MOVL $0, AX 1738 RET 1739 1740 1741 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1742 // Must obey the gcc calling convention. 1743 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1744 get_tls(CX) 1745 MOVQ g(CX), AX 1746 MOVQ g_m(AX), AX 1747 MOVQ m_curg(AX), AX 1748 MOVQ (g_stack+stack_hi)(AX), AX 1749 RET 1750 1751 // The top-most function running on a goroutine 1752 // returns to goexit+PCQuantum. 1753 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1754 BYTE $0x90 // NOP 1755 CALL runtime·goexit1(SB) // does not return 1756 // traceback from goexit1 must hit code range of goexit 1757 BYTE $0x90 // NOP 1758 1759 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 1760 MOVQ addr+0(FP), AX 1761 PREFETCHT0 (AX) 1762 RET 1763 1764 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 1765 MOVQ addr+0(FP), AX 1766 PREFETCHT1 (AX) 1767 RET 1768 1769 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 1770 MOVQ addr+0(FP), AX 1771 PREFETCHT2 (AX) 1772 RET 1773 1774 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 1775 MOVQ addr+0(FP), AX 1776 PREFETCHNTA (AX) 1777 RET 1778 1779 // This is called from .init_array and follows the platform, not Go, ABI. 1780 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1781 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save 1782 MOVQ runtime·lastmoduledatap(SB), AX 1783 MOVQ DI, moduledata_next(AX) 1784 MOVQ DI, runtime·lastmoduledatap(SB) 1785 POPQ R15 1786 RET