github.com/c0deoo1/golang1.5@v0.0.0-20220525150107-c87c805d4593/src/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVQ DI, AX // argc 13 MOVQ SI, BX // argv 14 SUBQ $(4*8+7), SP // 2args 2auto 这里做了对齐 15 ANDQ $~15, SP 16 MOVQ AX, 16(SP) // argc 17 MOVQ BX, 24(SP) // argv 18 19 // create istack out of the given (operating system) stack. 20 // _cgo_init may update stackguard. 21 MOVQ $runtime·g0(SB), DI 22 LEAQ (-64*1024+104)(SP), BX // g0的堆栈大小大致为64k-104 23 MOVQ BX, g_stackguard0(DI) 24 MOVQ BX, g_stackguard1(DI) 25 MOVQ BX, (g_stack+stack_lo)(DI) // 设置g0的堆栈范围 26 MOVQ SP, (g_stack+stack_hi)(DI) 27 28 // find out information about the processor we're on 29 MOVQ $0, AX 30 CPUID 31 CMPQ AX, $0 32 JE nocpuinfo 33 34 // Figure out how to serialize RDTSC. 35 // On Intel processors LFENCE is enough. AMD requires MFENCE. 36 // Don't know about the rest, so let's do MFENCE. 37 CMPL BX, $0x756E6547 // "Genu" 38 JNE notintel 39 CMPL DX, $0x49656E69 // "ineI" 40 JNE notintel 41 CMPL CX, $0x6C65746E // "ntel" 42 JNE notintel 43 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 44 notintel: 45 46 MOVQ $1, AX 47 CPUID 48 MOVL CX, runtime·cpuid_ecx(SB) 49 MOVL DX, runtime·cpuid_edx(SB) 50 nocpuinfo: 51 52 // if there is an _cgo_init, call it. 53 // CGO相关的初始化。 TODO 54 MOVQ _cgo_init(SB), AX 55 TESTQ AX, AX 56 JZ needtls 57 // g0 already in DI 58 MOVQ DI, CX // Win64 uses CX for first parameter 59 MOVQ $setg_gcc<>(SB), SI 60 CALL AX 61 62 // update stackguard after _cgo_init 63 MOVQ $runtime·g0(SB), CX 64 MOVQ (g_stack+stack_lo)(CX), AX 65 ADDQ $const__StackGuard, AX 66 MOVQ AX, g_stackguard0(CX) 67 MOVQ AX, g_stackguard1(CX) 68 69 CMPL runtime·iswindows(SB), $0 70 JEQ ok 71 needtls: 72 // skip TLS setup on Plan 9 73 CMPL runtime·isplan9(SB), $1 74 JEQ ok 75 // skip TLS setup on Solaris 76 CMPL runtime·issolaris(SB), $1 77 JEQ ok 78 79 LEAQ runtime·tls0(SB), DI 80 CALL runtime·settls(SB) 81 82 // store through it, to make sure it works 83 get_tls(BX) 84 MOVQ $0x123, g(BX) 85 MOVQ runtime·tls0(SB), AX 86 CMPQ AX, $0x123 87 JEQ 2(PC) 88 MOVL AX, 0 // abort 89 ok: 90 // set the per-goroutine and per-mach "registers" 91 // tls始终指向g,建立m0和g0的对应关系 92 get_tls(BX) 93 LEAQ runtime·g0(SB), CX 94 MOVQ CX, g(BX) 95 LEAQ runtime·m0(SB), AX 96 97 // save m->g0 = g0 98 MOVQ CX, m_g0(AX) 99 // save m0 to g0->m 100 MOVQ AX, g_m(CX) 101 102 CLD // convention is D is always left cleared 103 CALL runtime·check(SB) 104 105 MOVL 16(SP), AX // copy argc 106 MOVL AX, 0(SP) 107 MOVQ 24(SP), AX // copy argv 108 MOVQ AX, 8(SP) 109 CALL runtime·args(SB) // 参数初始化:保存参数,从vDSO中获取时间的高效函数 110 CALL runtime·osinit(SB) // 系统初始化:获取CPU的个数 111 CALL runtime·schedinit(SB) // 调度初始化 112 113 // create a new goroutine to start program 114 // 创建初始的G runtime·main 115 MOVQ $runtime·mainPC(SB), AX // entry 116 PUSHQ AX 117 PUSHQ $0 // arg size 118 CALL runtime·newproc(SB) 119 POPQ AX 120 POPQ AX 121 122 // start this M 123 // 开启GMP循环 124 CALL runtime·mstart(SB) 125 126 MOVL $0xf1, 0xf1 // crash 127 RET 128 129 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) 130 GLOBL runtime·mainPC(SB),RODATA,$8 131 132 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 133 BYTE $0xcc 134 RET 135 136 TEXT runtime·asminit(SB),NOSPLIT,$0-0 137 // No per-thread init. 138 RET 139 140 /* 141 * go-routine 142 */ 143 144 // void gosave(Gobuf*) 145 // save state in Gobuf; setjmp 146 TEXT runtime·gosave(SB), NOSPLIT, $0-8 147 MOVQ buf+0(FP), AX // gobuf 148 LEAQ buf+0(FP), BX // caller's SP 149 MOVQ BX, gobuf_sp(AX) 150 MOVQ 0(SP), BX // caller's PC 151 MOVQ BX, gobuf_pc(AX) 152 MOVQ $0, gobuf_ret(AX) 153 MOVQ $0, gobuf_ctxt(AX) 154 MOVQ BP, gobuf_bp(AX) 155 get_tls(CX) 156 MOVQ g(CX), BX 157 MOVQ BX, gobuf_g(AX) 158 RET 159 160 // void gogo(Gobuf*) 161 // restore state from Gobuf; longjmp 162 TEXT runtime·gogo(SB), NOSPLIT, $0-8 163 MOVQ buf+0(FP), BX // gobuf 164 MOVQ gobuf_g(BX), DX 165 MOVQ 0(DX), CX // make sure g != nil 166 get_tls(CX) 167 MOVQ DX, g(CX) 168 MOVQ gobuf_sp(BX), SP // restore SP 169 MOVQ gobuf_ret(BX), AX 170 MOVQ gobuf_ctxt(BX), DX 171 MOVQ gobuf_bp(BX), BP 172 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 173 MOVQ $0, gobuf_ret(BX) 174 MOVQ $0, gobuf_ctxt(BX) 175 MOVQ $0, gobuf_bp(BX) 176 MOVQ gobuf_pc(BX), BX 177 JMP BX 178 179 // func mcall(fn func(*g)) 180 // Switch to m->g0's stack, call fn(g). 181 // Fn must never return. It should gogo(&g->sched) 182 // to keep running g. 183 TEXT runtime·mcall(SB), NOSPLIT, $0-8 184 MOVQ fn+0(FP), DI 185 186 get_tls(CX) 187 MOVQ g(CX), AX // save state in g->sched 188 MOVQ 0(SP), BX // caller's PC 189 MOVQ BX, (g_sched+gobuf_pc)(AX) 190 LEAQ fn+0(FP), BX // caller's SP 191 MOVQ BX, (g_sched+gobuf_sp)(AX) 192 MOVQ AX, (g_sched+gobuf_g)(AX) 193 MOVQ BP, (g_sched+gobuf_bp)(AX) 194 195 // switch to m->g0 & its stack, call fn 196 MOVQ g(CX), BX 197 MOVQ g_m(BX), BX 198 MOVQ m_g0(BX), SI 199 CMPQ SI, AX // if g == m->g0 call badmcall 200 JNE 3(PC) 201 MOVQ $runtime·badmcall(SB), AX 202 JMP AX 203 MOVQ SI, g(CX) // g = m->g0 204 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 205 PUSHQ AX 206 MOVQ DI, DX 207 MOVQ 0(DI), DI 208 CALL DI 209 POPQ AX 210 MOVQ $runtime·badmcall2(SB), AX 211 JMP AX 212 RET 213 214 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 215 // of the G stack. We need to distinguish the routine that 216 // lives at the bottom of the G stack from the one that lives 217 // at the top of the system stack because the one at the top of 218 // the system stack terminates the stack walk (see topofstack()). 219 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 220 RET 221 222 // func systemstack(fn func()) 223 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 224 MOVQ fn+0(FP), DI // DI = fn 225 get_tls(CX) 226 MOVQ g(CX), AX // AX = g 227 MOVQ g_m(AX), BX // BX = m 228 229 MOVQ m_gsignal(BX), DX // DX = gsignal 230 CMPQ AX, DX 231 JEQ noswitch 232 233 MOVQ m_g0(BX), DX // DX = g0 234 CMPQ AX, DX 235 JEQ noswitch 236 237 MOVQ m_curg(BX), R8 238 CMPQ AX, R8 239 JEQ switch 240 241 // Bad: g is not gsignal, not g0, not curg. What is it? 242 MOVQ $runtime·badsystemstack(SB), AX 243 CALL AX 244 245 switch: 246 // save our state in g->sched. Pretend to 247 // be systemstack_switch if the G stack is scanned. 248 MOVQ $runtime·systemstack_switch(SB), SI 249 MOVQ SI, (g_sched+gobuf_pc)(AX) 250 MOVQ SP, (g_sched+gobuf_sp)(AX) 251 MOVQ AX, (g_sched+gobuf_g)(AX) 252 MOVQ BP, (g_sched+gobuf_bp)(AX) 253 254 // switch to g0 255 MOVQ DX, g(CX) 256 MOVQ (g_sched+gobuf_sp)(DX), BX 257 // make it look like mstart called systemstack on g0, to stop traceback 258 SUBQ $8, BX 259 MOVQ $runtime·mstart(SB), DX 260 MOVQ DX, 0(BX) 261 MOVQ BX, SP 262 263 // call target function 264 MOVQ DI, DX 265 MOVQ 0(DI), DI 266 CALL DI 267 268 // switch back to g 269 get_tls(CX) 270 MOVQ g(CX), AX 271 MOVQ g_m(AX), BX 272 MOVQ m_curg(BX), AX 273 MOVQ AX, g(CX) 274 MOVQ (g_sched+gobuf_sp)(AX), SP 275 MOVQ $0, (g_sched+gobuf_sp)(AX) 276 RET 277 278 noswitch: 279 // already on m stack, just call directly 280 MOVQ DI, DX 281 MOVQ 0(DI), DI 282 CALL DI 283 RET 284 285 /* 286 * support for morestack 287 */ 288 289 // Called during function prolog when more stack is needed. 290 // 291 // The traceback routines see morestack on a g0 as being 292 // the top of a stack (for example, morestack calling newstack 293 // calling the scheduler calling newm calling gc), so we must 294 // record an argument size. For that purpose, it has no arguments. 295 TEXT runtime·morestack(SB),NOSPLIT,$0-0 296 // Cannot grow scheduler stack (m->g0). 297 get_tls(CX) 298 MOVQ g(CX), BX 299 MOVQ g_m(BX), BX 300 MOVQ m_g0(BX), SI 301 CMPQ g(CX), SI 302 JNE 2(PC) 303 INT $3 304 305 // Cannot grow signal stack (m->gsignal). 306 MOVQ m_gsignal(BX), SI 307 CMPQ g(CX), SI 308 JNE 2(PC) 309 INT $3 310 311 // Called from f. 312 // Set m->morebuf to f's caller. 313 MOVQ 8(SP), AX // f's caller's PC 314 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 315 LEAQ 16(SP), AX // f's caller's SP 316 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 317 get_tls(CX) 318 MOVQ g(CX), SI 319 MOVQ SI, (m_morebuf+gobuf_g)(BX) 320 321 // Set g->sched to context in f. 322 MOVQ 0(SP), AX // f's PC 323 MOVQ AX, (g_sched+gobuf_pc)(SI) 324 MOVQ SI, (g_sched+gobuf_g)(SI) 325 LEAQ 8(SP), AX // f's SP 326 MOVQ AX, (g_sched+gobuf_sp)(SI) 327 MOVQ DX, (g_sched+gobuf_ctxt)(SI) 328 MOVQ BP, (g_sched+gobuf_bp)(SI) 329 330 // Call newstack on m->g0's stack. 331 MOVQ m_g0(BX), BX 332 MOVQ BX, g(CX) 333 MOVQ (g_sched+gobuf_sp)(BX), SP 334 CALL runtime·newstack(SB) 335 MOVQ $0, 0x1003 // crash if newstack returns 336 RET 337 338 // morestack but not preserving ctxt. 339 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 340 MOVL $0, DX 341 JMP runtime·morestack(SB) 342 343 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 344 // We came here via a RET to an overwritten return PC. 345 // AX may be live. Other registers are available. 346 347 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 348 get_tls(CX) 349 MOVQ g(CX), CX 350 MOVQ (g_stkbar+slice_array)(CX), DX 351 MOVQ g_stkbarPos(CX), BX 352 IMULQ $stkbar__size, BX // Too big for SIB. 353 MOVQ stkbar_savedLRVal(DX)(BX*1), BX 354 // Record that this stack barrier was hit. 355 ADDQ $1, g_stkbarPos(CX) 356 // Jump to the original return PC. 357 JMP BX 358 359 // reflectcall: call a function with the given argument list 360 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 361 // we don't have variable-sized frames, so we use a small number 362 // of constant-sized-frame functions to encode a few bits of size in the pc. 363 // Caution: ugly multiline assembly macros in your future! 364 365 #define DISPATCH(NAME,MAXSIZE) \ 366 CMPQ CX, $MAXSIZE; \ 367 JA 3(PC); \ 368 MOVQ $NAME(SB), AX; \ 369 JMP AX 370 // Note: can't just "JMP NAME(SB)" - bad inlining results. 371 372 TEXT reflect·call(SB), NOSPLIT, $0-0 373 JMP ·reflectcall(SB) 374 375 TEXT ·reflectcall(SB), NOSPLIT, $0-32 376 MOVLQZX argsize+24(FP), CX 377 // NOTE(rsc): No call16, because CALLFN needs four words 378 // of argument space to invoke callwritebarrier. 379 DISPATCH(runtime·call32, 32) 380 DISPATCH(runtime·call64, 64) 381 DISPATCH(runtime·call128, 128) 382 DISPATCH(runtime·call256, 256) 383 DISPATCH(runtime·call512, 512) 384 DISPATCH(runtime·call1024, 1024) 385 DISPATCH(runtime·call2048, 2048) 386 DISPATCH(runtime·call4096, 4096) 387 DISPATCH(runtime·call8192, 8192) 388 DISPATCH(runtime·call16384, 16384) 389 DISPATCH(runtime·call32768, 32768) 390 DISPATCH(runtime·call65536, 65536) 391 DISPATCH(runtime·call131072, 131072) 392 DISPATCH(runtime·call262144, 262144) 393 DISPATCH(runtime·call524288, 524288) 394 DISPATCH(runtime·call1048576, 1048576) 395 DISPATCH(runtime·call2097152, 2097152) 396 DISPATCH(runtime·call4194304, 4194304) 397 DISPATCH(runtime·call8388608, 8388608) 398 DISPATCH(runtime·call16777216, 16777216) 399 DISPATCH(runtime·call33554432, 33554432) 400 DISPATCH(runtime·call67108864, 67108864) 401 DISPATCH(runtime·call134217728, 134217728) 402 DISPATCH(runtime·call268435456, 268435456) 403 DISPATCH(runtime·call536870912, 536870912) 404 DISPATCH(runtime·call1073741824, 1073741824) 405 MOVQ $runtime·badreflectcall(SB), AX 406 JMP AX 407 408 #define CALLFN(NAME,MAXSIZE) \ 409 TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \ 410 NO_LOCAL_POINTERS; \ 411 /* copy arguments to stack */ \ 412 MOVQ argptr+16(FP), SI; \ 413 MOVLQZX argsize+24(FP), CX; \ 414 MOVQ SP, DI; \ 415 REP;MOVSB; \ 416 /* call function */ \ 417 MOVQ f+8(FP), DX; \ 418 PCDATA $PCDATA_StackMapIndex, $0; \ 419 CALL (DX); \ 420 /* copy return values back */ \ 421 MOVQ argptr+16(FP), DI; \ 422 MOVLQZX argsize+24(FP), CX; \ 423 MOVLQZX retoffset+28(FP), BX; \ 424 MOVQ SP, SI; \ 425 ADDQ BX, DI; \ 426 ADDQ BX, SI; \ 427 SUBQ BX, CX; \ 428 REP;MOVSB; \ 429 /* execute write barrier updates */ \ 430 MOVQ argtype+0(FP), DX; \ 431 MOVQ argptr+16(FP), DI; \ 432 MOVLQZX argsize+24(FP), CX; \ 433 MOVLQZX retoffset+28(FP), BX; \ 434 MOVQ DX, 0(SP); \ 435 MOVQ DI, 8(SP); \ 436 MOVQ CX, 16(SP); \ 437 MOVQ BX, 24(SP); \ 438 CALL runtime·callwritebarrier(SB); \ 439 RET 440 441 CALLFN(·call32, 32) 442 CALLFN(·call64, 64) 443 CALLFN(·call128, 128) 444 CALLFN(·call256, 256) 445 CALLFN(·call512, 512) 446 CALLFN(·call1024, 1024) 447 CALLFN(·call2048, 2048) 448 CALLFN(·call4096, 4096) 449 CALLFN(·call8192, 8192) 450 CALLFN(·call16384, 16384) 451 CALLFN(·call32768, 32768) 452 CALLFN(·call65536, 65536) 453 CALLFN(·call131072, 131072) 454 CALLFN(·call262144, 262144) 455 CALLFN(·call524288, 524288) 456 CALLFN(·call1048576, 1048576) 457 CALLFN(·call2097152, 2097152) 458 CALLFN(·call4194304, 4194304) 459 CALLFN(·call8388608, 8388608) 460 CALLFN(·call16777216, 16777216) 461 CALLFN(·call33554432, 33554432) 462 CALLFN(·call67108864, 67108864) 463 CALLFN(·call134217728, 134217728) 464 CALLFN(·call268435456, 268435456) 465 CALLFN(·call536870912, 536870912) 466 CALLFN(·call1073741824, 1073741824) 467 468 // bool cas(int32 *val, int32 old, int32 new) 469 // Atomically: 470 // if(*val == old){ 471 // *val = new; 472 // return 1; 473 // } else 474 // return 0; 475 TEXT runtime·cas(SB), NOSPLIT, $0-17 476 MOVQ ptr+0(FP), BX 477 MOVL old+8(FP), AX 478 MOVL new+12(FP), CX 479 LOCK 480 CMPXCHGL CX, 0(BX) 481 SETEQ ret+16(FP) 482 RET 483 484 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 485 // Atomically: 486 // if(*val == *old){ 487 // *val = new; 488 // return 1; 489 // } else { 490 // return 0; 491 // } 492 TEXT runtime·cas64(SB), NOSPLIT, $0-25 493 MOVQ ptr+0(FP), BX 494 MOVQ old+8(FP), AX 495 MOVQ new+16(FP), CX 496 LOCK 497 CMPXCHGQ CX, 0(BX) 498 SETEQ ret+24(FP) 499 RET 500 501 TEXT runtime·casuintptr(SB), NOSPLIT, $0-25 502 JMP runtime·cas64(SB) 503 504 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-16 505 JMP runtime·atomicload64(SB) 506 507 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-16 508 JMP runtime·atomicload64(SB) 509 510 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16 511 JMP runtime·atomicstore64(SB) 512 513 // bool casp(void **val, void *old, void *new) 514 // Atomically: 515 // if(*val == old){ 516 // *val = new; 517 // return 1; 518 // } else 519 // return 0; 520 TEXT runtime·casp1(SB), NOSPLIT, $0-25 521 MOVQ ptr+0(FP), BX 522 MOVQ old+8(FP), AX 523 MOVQ new+16(FP), CX 524 LOCK 525 CMPXCHGQ CX, 0(BX) 526 SETEQ ret+24(FP) 527 RET 528 529 // uint32 xadd(uint32 volatile *val, int32 delta) 530 // Atomically: 531 // *val += delta; 532 // return *val; 533 TEXT runtime·xadd(SB), NOSPLIT, $0-20 534 MOVQ ptr+0(FP), BX 535 MOVL delta+8(FP), AX 536 MOVL AX, CX 537 LOCK 538 XADDL AX, 0(BX) 539 ADDL CX, AX 540 MOVL AX, ret+16(FP) 541 RET 542 543 TEXT runtime·xadd64(SB), NOSPLIT, $0-24 544 MOVQ ptr+0(FP), BX 545 MOVQ delta+8(FP), AX 546 MOVQ AX, CX 547 LOCK 548 XADDQ AX, 0(BX) 549 ADDQ CX, AX 550 MOVQ AX, ret+16(FP) 551 RET 552 553 TEXT runtime·xadduintptr(SB), NOSPLIT, $0-24 554 JMP runtime·xadd64(SB) 555 556 TEXT runtime·xchg(SB), NOSPLIT, $0-20 557 MOVQ ptr+0(FP), BX 558 MOVL new+8(FP), AX 559 XCHGL AX, 0(BX) 560 MOVL AX, ret+16(FP) 561 RET 562 563 TEXT runtime·xchg64(SB), NOSPLIT, $0-24 564 MOVQ ptr+0(FP), BX 565 MOVQ new+8(FP), AX 566 XCHGQ AX, 0(BX) 567 MOVQ AX, ret+16(FP) 568 RET 569 570 TEXT runtime·xchgp1(SB), NOSPLIT, $0-24 571 MOVQ ptr+0(FP), BX 572 MOVQ new+8(FP), AX 573 XCHGQ AX, 0(BX) 574 MOVQ AX, ret+16(FP) 575 RET 576 577 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24 578 JMP runtime·xchg64(SB) 579 580 TEXT runtime·procyield(SB),NOSPLIT,$0-0 581 MOVL cycles+0(FP), AX 582 again: 583 PAUSE 584 SUBL $1, AX 585 JNZ again 586 RET 587 588 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-16 589 MOVQ ptr+0(FP), BX 590 MOVQ val+8(FP), AX 591 XCHGQ AX, 0(BX) 592 RET 593 594 TEXT runtime·atomicstore(SB), NOSPLIT, $0-12 595 MOVQ ptr+0(FP), BX 596 MOVL val+8(FP), AX 597 XCHGL AX, 0(BX) 598 RET 599 600 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16 601 MOVQ ptr+0(FP), BX 602 MOVQ val+8(FP), AX 603 XCHGQ AX, 0(BX) 604 RET 605 606 // void runtime·atomicor8(byte volatile*, byte); 607 TEXT runtime·atomicor8(SB), NOSPLIT, $0-9 608 MOVQ ptr+0(FP), AX 609 MOVB val+8(FP), BX 610 LOCK 611 ORB BX, (AX) 612 RET 613 614 // void runtime·atomicand8(byte volatile*, byte); 615 TEXT runtime·atomicand8(SB), NOSPLIT, $0-9 616 MOVQ ptr+0(FP), AX 617 MOVB val+8(FP), BX 618 LOCK 619 ANDB BX, (AX) 620 RET 621 622 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 623 // Stores are already ordered on x86, so this is just a 624 // compile barrier. 625 RET 626 627 // void jmpdefer(fn, sp); 628 // called from deferreturn. 629 // 1. pop the caller 630 // 2. sub 5 bytes from the callers return 631 // 3. jmp to the argument 632 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 633 MOVQ fv+0(FP), DX // fn 634 MOVQ argp+8(FP), BX // caller sp 635 LEAQ -8(BX), SP // caller sp after CALL 636 SUBQ $5, (SP) // return to CALL again 637 MOVQ 0(DX), BX 638 JMP BX // but first run the deferred function 639 640 // Save state of caller into g->sched. Smashes R8, R9. 641 TEXT gosave<>(SB),NOSPLIT,$0 642 get_tls(R8) 643 MOVQ g(R8), R8 644 MOVQ 0(SP), R9 645 MOVQ R9, (g_sched+gobuf_pc)(R8) 646 LEAQ 8(SP), R9 647 MOVQ R9, (g_sched+gobuf_sp)(R8) 648 MOVQ $0, (g_sched+gobuf_ret)(R8) 649 MOVQ $0, (g_sched+gobuf_ctxt)(R8) 650 MOVQ BP, (g_sched+gobuf_bp)(R8) 651 RET 652 653 // func asmcgocall(fn, arg unsafe.Pointer) int32 654 // Call fn(arg) on the scheduler stack, 655 // aligned appropriately for the gcc ABI. 656 // See cgocall.go for more details. 657 TEXT ·asmcgocall(SB),NOSPLIT,$0-20 658 MOVQ fn+0(FP), AX 659 MOVQ arg+8(FP), BX 660 661 MOVQ SP, DX 662 663 // Figure out if we need to switch to m->g0 stack. 664 // We get called to create new OS threads too, and those 665 // come in on the m->g0 stack already. 666 get_tls(CX) 667 MOVQ g(CX), R8 668 MOVQ g_m(R8), R8 669 MOVQ m_g0(R8), SI 670 MOVQ g(CX), DI 671 CMPQ SI, DI 672 JEQ nosave 673 MOVQ m_gsignal(R8), SI 674 CMPQ SI, DI 675 JEQ nosave 676 677 MOVQ m_g0(R8), SI 678 CALL gosave<>(SB) 679 MOVQ SI, g(CX) 680 MOVQ (g_sched+gobuf_sp)(SI), SP 681 nosave: 682 683 // Now on a scheduling stack (a pthread-created stack). 684 // Make sure we have enough room for 4 stack-backed fast-call 685 // registers as per windows amd64 calling convention. 686 SUBQ $64, SP 687 ANDQ $~15, SP // alignment for gcc ABI 688 MOVQ DI, 48(SP) // save g 689 MOVQ (g_stack+stack_hi)(DI), DI 690 SUBQ DX, DI 691 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 692 MOVQ BX, DI // DI = first argument in AMD64 ABI 693 MOVQ BX, CX // CX = first argument in Win64 694 CALL AX 695 696 // Restore registers, g, stack pointer. 697 get_tls(CX) 698 MOVQ 48(SP), DI 699 MOVQ (g_stack+stack_hi)(DI), SI 700 SUBQ 40(SP), SI 701 MOVQ DI, g(CX) 702 MOVQ SI, SP 703 704 MOVL AX, ret+16(FP) 705 RET 706 707 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 708 // Turn the fn into a Go func (by taking its address) and call 709 // cgocallback_gofunc. 710 TEXT runtime·cgocallback(SB),NOSPLIT,$24-24 711 LEAQ fn+0(FP), AX 712 MOVQ AX, 0(SP) 713 MOVQ frame+8(FP), AX 714 MOVQ AX, 8(SP) 715 MOVQ framesize+16(FP), AX 716 MOVQ AX, 16(SP) 717 MOVQ $runtime·cgocallback_gofunc(SB), AX 718 CALL AX 719 RET 720 721 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 722 // See cgocall.go for more details. 723 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24 724 NO_LOCAL_POINTERS 725 726 // If g is nil, Go did not create the current thread. 727 // Call needm to obtain one m for temporary use. 728 // In this case, we're running on the thread stack, so there's 729 // lots of space, but the linker doesn't know. Hide the call from 730 // the linker analysis by using an indirect call through AX. 731 get_tls(CX) 732 #ifdef GOOS_windows 733 MOVL $0, BX 734 CMPQ CX, $0 735 JEQ 2(PC) 736 #endif 737 MOVQ g(CX), BX 738 CMPQ BX, $0 739 JEQ needm 740 MOVQ g_m(BX), BX 741 MOVQ BX, R8 // holds oldm until end of function 742 JMP havem 743 needm: 744 MOVQ $0, 0(SP) 745 MOVQ $runtime·needm(SB), AX 746 CALL AX 747 MOVQ 0(SP), R8 748 get_tls(CX) 749 MOVQ g(CX), BX 750 MOVQ g_m(BX), BX 751 752 // Set m->sched.sp = SP, so that if a panic happens 753 // during the function we are about to execute, it will 754 // have a valid SP to run on the g0 stack. 755 // The next few lines (after the havem label) 756 // will save this SP onto the stack and then write 757 // the same SP back to m->sched.sp. That seems redundant, 758 // but if an unrecovered panic happens, unwindm will 759 // restore the g->sched.sp from the stack location 760 // and then systemstack will try to use it. If we don't set it here, 761 // that restored SP will be uninitialized (typically 0) and 762 // will not be usable. 763 MOVQ m_g0(BX), SI 764 MOVQ SP, (g_sched+gobuf_sp)(SI) 765 766 havem: 767 // Now there's a valid m, and we're running on its m->g0. 768 // Save current m->g0->sched.sp on stack and then set it to SP. 769 // Save current sp in m->g0->sched.sp in preparation for 770 // switch back to m->curg stack. 771 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 772 MOVQ m_g0(BX), SI 773 MOVQ (g_sched+gobuf_sp)(SI), AX 774 MOVQ AX, 0(SP) 775 MOVQ SP, (g_sched+gobuf_sp)(SI) 776 777 // Switch to m->curg stack and call runtime.cgocallbackg. 778 // Because we are taking over the execution of m->curg 779 // but *not* resuming what had been running, we need to 780 // save that information (m->curg->sched) so we can restore it. 781 // We can restore m->curg->sched.sp easily, because calling 782 // runtime.cgocallbackg leaves SP unchanged upon return. 783 // To save m->curg->sched.pc, we push it onto the stack. 784 // This has the added benefit that it looks to the traceback 785 // routine like cgocallbackg is going to return to that 786 // PC (because the frame we allocate below has the same 787 // size as cgocallback_gofunc's frame declared above) 788 // so that the traceback will seamlessly trace back into 789 // the earlier calls. 790 // 791 // In the new goroutine, 0(SP) holds the saved R8. 792 MOVQ m_curg(BX), SI 793 MOVQ SI, g(CX) 794 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 795 MOVQ (g_sched+gobuf_pc)(SI), BX 796 MOVQ BX, -8(DI) 797 // Compute the size of the frame, including return PC and, if 798 // GOEXPERIMENT=framepointer, the saved based pointer 799 LEAQ fv+0(FP), AX 800 SUBQ SP, AX 801 SUBQ AX, DI 802 MOVQ DI, SP 803 804 MOVQ R8, 0(SP) 805 CALL runtime·cgocallbackg(SB) 806 MOVQ 0(SP), R8 807 808 // Compute the size of the frame again. FP and SP have 809 // completely different values here than they did above, 810 // but only their difference matters. 811 LEAQ fv+0(FP), AX 812 SUBQ SP, AX 813 814 // Restore g->sched (== m->curg->sched) from saved values. 815 get_tls(CX) 816 MOVQ g(CX), SI 817 MOVQ SP, DI 818 ADDQ AX, DI 819 MOVQ -8(DI), BX 820 MOVQ BX, (g_sched+gobuf_pc)(SI) 821 MOVQ DI, (g_sched+gobuf_sp)(SI) 822 823 // Switch back to m->g0's stack and restore m->g0->sched.sp. 824 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 825 // so we do not have to restore it.) 826 MOVQ g(CX), BX 827 MOVQ g_m(BX), BX 828 MOVQ m_g0(BX), SI 829 MOVQ SI, g(CX) 830 MOVQ (g_sched+gobuf_sp)(SI), SP 831 MOVQ 0(SP), AX 832 MOVQ AX, (g_sched+gobuf_sp)(SI) 833 834 // If the m on entry was nil, we called needm above to borrow an m 835 // for the duration of the call. Since the call is over, return it with dropm. 836 CMPQ R8, $0 837 JNE 3(PC) 838 MOVQ $runtime·dropm(SB), AX 839 CALL AX 840 841 // Done! 842 RET 843 844 // void setg(G*); set g. for use by needm. 845 TEXT runtime·setg(SB), NOSPLIT, $0-8 846 MOVQ gg+0(FP), BX 847 #ifdef GOOS_windows 848 CMPQ BX, $0 849 JNE settls 850 MOVQ $0, 0x28(GS) 851 RET 852 settls: 853 MOVQ g_m(BX), AX 854 LEAQ m_tls(AX), AX 855 MOVQ AX, 0x28(GS) 856 #endif 857 get_tls(CX) 858 MOVQ BX, g(CX) 859 RET 860 861 // void setg_gcc(G*); set g called from gcc. 862 TEXT setg_gcc<>(SB),NOSPLIT,$0 863 get_tls(AX) 864 MOVQ DI, g(AX) 865 RET 866 867 // check that SP is in range [g->stack.lo, g->stack.hi) 868 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 869 get_tls(CX) 870 MOVQ g(CX), AX 871 CMPQ (g_stack+stack_hi)(AX), SP 872 JHI 2(PC) 873 INT $3 874 CMPQ SP, (g_stack+stack_lo)(AX) 875 JHI 2(PC) 876 INT $3 877 RET 878 879 TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16 880 MOVQ argp+0(FP),AX // addr of first arg 881 MOVQ -8(AX),AX // get calling pc 882 CMPQ AX, runtime·stackBarrierPC(SB) 883 JNE nobar 884 // Get original return PC. 885 CALL runtime·nextBarrierPC(SB) 886 MOVQ 0(SP), AX 887 nobar: 888 MOVQ AX, ret+8(FP) 889 RET 890 891 TEXT runtime·setcallerpc(SB),NOSPLIT,$8-16 892 MOVQ argp+0(FP),AX // addr of first arg 893 MOVQ pc+8(FP), BX 894 MOVQ -8(AX), CX 895 CMPQ CX, runtime·stackBarrierPC(SB) 896 JEQ setbar 897 MOVQ BX, -8(AX) // set calling pc 898 RET 899 setbar: 900 // Set the stack barrier return PC. 901 MOVQ BX, 0(SP) 902 CALL runtime·setNextBarrierPC(SB) 903 RET 904 905 TEXT runtime·getcallersp(SB),NOSPLIT,$0-16 906 MOVQ argp+0(FP), AX 907 MOVQ AX, ret+8(FP) 908 RET 909 910 // func cputicks() int64 911 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 912 CMPB runtime·lfenceBeforeRdtsc(SB), $1 913 JNE mfence 914 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 915 JMP done 916 mfence: 917 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 918 done: 919 RDTSC 920 SHLQ $32, DX 921 ADDQ DX, AX 922 MOVQ AX, ret+0(FP) 923 RET 924 925 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 926 // redirects to memhash(p, h, size) using the size 927 // stored in the closure. 928 TEXT runtime·memhash_varlen(SB),NOSPLIT,$32-24 929 GO_ARGS 930 NO_LOCAL_POINTERS 931 MOVQ p+0(FP), AX 932 MOVQ h+8(FP), BX 933 MOVQ 8(DX), CX 934 MOVQ AX, 0(SP) 935 MOVQ BX, 8(SP) 936 MOVQ CX, 16(SP) 937 CALL runtime·memhash(SB) 938 MOVQ 24(SP), AX 939 MOVQ AX, ret+16(FP) 940 RET 941 942 // hash function using AES hardware instructions 943 TEXT runtime·aeshash(SB),NOSPLIT,$0-32 944 MOVQ p+0(FP), AX // ptr to data 945 MOVQ s+16(FP), CX // size 946 LEAQ ret+24(FP), DX 947 JMP runtime·aeshashbody(SB) 948 949 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24 950 MOVQ p+0(FP), AX // ptr to string struct 951 MOVQ 8(AX), CX // length of string 952 MOVQ (AX), AX // string data 953 LEAQ ret+16(FP), DX 954 JMP runtime·aeshashbody(SB) 955 956 // AX: data 957 // CX: length 958 // DX: address to put return value 959 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 960 MOVQ h+8(FP), X6 // seed to low 64 bits of xmm6 961 PINSRQ $1, CX, X6 // size to high 64 bits of xmm6 962 PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times 963 MOVO runtime·aeskeysched(SB), X7 964 CMPQ CX, $16 965 JB aes0to15 966 JE aes16 967 CMPQ CX, $32 968 JBE aes17to32 969 CMPQ CX, $64 970 JBE aes33to64 971 CMPQ CX, $128 972 JBE aes65to128 973 JMP aes129plus 974 975 aes0to15: 976 TESTQ CX, CX 977 JE aes0 978 979 ADDQ $16, AX 980 TESTW $0xff0, AX 981 JE endofpage 982 983 // 16 bytes loaded at this address won't cross 984 // a page boundary, so we can load it directly. 985 MOVOU -16(AX), X0 986 ADDQ CX, CX 987 MOVQ $masks<>(SB), AX 988 PAND (AX)(CX*8), X0 989 990 // scramble 3 times 991 AESENC X6, X0 992 AESENC X7, X0 993 AESENC X7, X0 994 MOVQ X0, (DX) 995 RET 996 997 endofpage: 998 // address ends in 1111xxxx. Might be up against 999 // a page boundary, so load ending at last byte. 1000 // Then shift bytes down using pshufb. 1001 MOVOU -32(AX)(CX*1), X0 1002 ADDQ CX, CX 1003 MOVQ $shifts<>(SB), AX 1004 PSHUFB (AX)(CX*8), X0 1005 AESENC X6, X0 1006 AESENC X7, X0 1007 AESENC X7, X0 1008 MOVQ X0, (DX) 1009 RET 1010 1011 aes0: 1012 // return input seed 1013 MOVQ h+8(FP), AX 1014 MOVQ AX, (DX) 1015 RET 1016 1017 aes16: 1018 MOVOU (AX), X0 1019 AESENC X6, X0 1020 AESENC X7, X0 1021 AESENC X7, X0 1022 MOVQ X0, (DX) 1023 RET 1024 1025 aes17to32: 1026 // load data to be hashed 1027 MOVOU (AX), X0 1028 MOVOU -16(AX)(CX*1), X1 1029 1030 // scramble 3 times 1031 AESENC X6, X0 1032 AESENC runtime·aeskeysched+16(SB), X1 1033 AESENC X7, X0 1034 AESENC X7, X1 1035 AESENC X7, X0 1036 AESENC X7, X1 1037 1038 // combine results 1039 PXOR X1, X0 1040 MOVQ X0, (DX) 1041 RET 1042 1043 aes33to64: 1044 MOVOU (AX), X0 1045 MOVOU 16(AX), X1 1046 MOVOU -32(AX)(CX*1), X2 1047 MOVOU -16(AX)(CX*1), X3 1048 1049 AESENC X6, X0 1050 AESENC runtime·aeskeysched+16(SB), X1 1051 AESENC runtime·aeskeysched+32(SB), X2 1052 AESENC runtime·aeskeysched+48(SB), X3 1053 AESENC X7, X0 1054 AESENC X7, X1 1055 AESENC X7, X2 1056 AESENC X7, X3 1057 AESENC X7, X0 1058 AESENC X7, X1 1059 AESENC X7, X2 1060 AESENC X7, X3 1061 1062 PXOR X2, X0 1063 PXOR X3, X1 1064 PXOR X1, X0 1065 MOVQ X0, (DX) 1066 RET 1067 1068 aes65to128: 1069 MOVOU (AX), X0 1070 MOVOU 16(AX), X1 1071 MOVOU 32(AX), X2 1072 MOVOU 48(AX), X3 1073 MOVOU -64(AX)(CX*1), X4 1074 MOVOU -48(AX)(CX*1), X5 1075 MOVOU -32(AX)(CX*1), X8 1076 MOVOU -16(AX)(CX*1), X9 1077 1078 AESENC X6, X0 1079 AESENC runtime·aeskeysched+16(SB), X1 1080 AESENC runtime·aeskeysched+32(SB), X2 1081 AESENC runtime·aeskeysched+48(SB), X3 1082 AESENC runtime·aeskeysched+64(SB), X4 1083 AESENC runtime·aeskeysched+80(SB), X5 1084 AESENC runtime·aeskeysched+96(SB), X8 1085 AESENC runtime·aeskeysched+112(SB), X9 1086 AESENC X7, X0 1087 AESENC X7, X1 1088 AESENC X7, X2 1089 AESENC X7, X3 1090 AESENC X7, X4 1091 AESENC X7, X5 1092 AESENC X7, X8 1093 AESENC X7, X9 1094 AESENC X7, X0 1095 AESENC X7, X1 1096 AESENC X7, X2 1097 AESENC X7, X3 1098 AESENC X7, X4 1099 AESENC X7, X5 1100 AESENC X7, X8 1101 AESENC X7, X9 1102 1103 PXOR X4, X0 1104 PXOR X5, X1 1105 PXOR X8, X2 1106 PXOR X9, X3 1107 PXOR X2, X0 1108 PXOR X3, X1 1109 PXOR X1, X0 1110 MOVQ X0, (DX) 1111 RET 1112 1113 aes129plus: 1114 // start with last (possibly overlapping) block 1115 MOVOU -128(AX)(CX*1), X0 1116 MOVOU -112(AX)(CX*1), X1 1117 MOVOU -96(AX)(CX*1), X2 1118 MOVOU -80(AX)(CX*1), X3 1119 MOVOU -64(AX)(CX*1), X4 1120 MOVOU -48(AX)(CX*1), X5 1121 MOVOU -32(AX)(CX*1), X8 1122 MOVOU -16(AX)(CX*1), X9 1123 1124 // scramble state once 1125 AESENC X6, X0 1126 AESENC runtime·aeskeysched+16(SB), X1 1127 AESENC runtime·aeskeysched+32(SB), X2 1128 AESENC runtime·aeskeysched+48(SB), X3 1129 AESENC runtime·aeskeysched+64(SB), X4 1130 AESENC runtime·aeskeysched+80(SB), X5 1131 AESENC runtime·aeskeysched+96(SB), X8 1132 AESENC runtime·aeskeysched+112(SB), X9 1133 1134 // compute number of remaining 128-byte blocks 1135 DECQ CX 1136 SHRQ $7, CX 1137 1138 aesloop: 1139 // scramble state, xor in a block 1140 MOVOU (AX), X10 1141 MOVOU 16(AX), X11 1142 MOVOU 32(AX), X12 1143 MOVOU 48(AX), X13 1144 AESENC X10, X0 1145 AESENC X11, X1 1146 AESENC X12, X2 1147 AESENC X13, X3 1148 MOVOU 64(AX), X10 1149 MOVOU 80(AX), X11 1150 MOVOU 96(AX), X12 1151 MOVOU 112(AX), X13 1152 AESENC X10, X4 1153 AESENC X11, X5 1154 AESENC X12, X8 1155 AESENC X13, X9 1156 1157 // scramble state 1158 AESENC X7, X0 1159 AESENC X7, X1 1160 AESENC X7, X2 1161 AESENC X7, X3 1162 AESENC X7, X4 1163 AESENC X7, X5 1164 AESENC X7, X8 1165 AESENC X7, X9 1166 1167 ADDQ $128, AX 1168 DECQ CX 1169 JNE aesloop 1170 1171 // 2 more scrambles to finish 1172 AESENC X7, X0 1173 AESENC X7, X1 1174 AESENC X7, X2 1175 AESENC X7, X3 1176 AESENC X7, X4 1177 AESENC X7, X5 1178 AESENC X7, X8 1179 AESENC X7, X9 1180 AESENC X7, X0 1181 AESENC X7, X1 1182 AESENC X7, X2 1183 AESENC X7, X3 1184 AESENC X7, X4 1185 AESENC X7, X5 1186 AESENC X7, X8 1187 AESENC X7, X9 1188 1189 PXOR X4, X0 1190 PXOR X5, X1 1191 PXOR X8, X2 1192 PXOR X9, X3 1193 PXOR X2, X0 1194 PXOR X3, X1 1195 PXOR X1, X0 1196 MOVQ X0, (DX) 1197 RET 1198 1199 TEXT runtime·aeshash32(SB),NOSPLIT,$0-24 1200 MOVQ p+0(FP), AX // ptr to data 1201 MOVQ h+8(FP), X0 // seed 1202 PINSRD $2, (AX), X0 // data 1203 AESENC runtime·aeskeysched+0(SB), X0 1204 AESENC runtime·aeskeysched+16(SB), X0 1205 AESENC runtime·aeskeysched+32(SB), X0 1206 MOVQ X0, ret+16(FP) 1207 RET 1208 1209 TEXT runtime·aeshash64(SB),NOSPLIT,$0-24 1210 MOVQ p+0(FP), AX // ptr to data 1211 MOVQ h+8(FP), X0 // seed 1212 PINSRQ $1, (AX), X0 // data 1213 AESENC runtime·aeskeysched+0(SB), X0 1214 AESENC runtime·aeskeysched+16(SB), X0 1215 AESENC runtime·aeskeysched+32(SB), X0 1216 MOVQ X0, ret+16(FP) 1217 RET 1218 1219 // simple mask to get rid of data in the high part of the register. 1220 DATA masks<>+0x00(SB)/8, $0x0000000000000000 1221 DATA masks<>+0x08(SB)/8, $0x0000000000000000 1222 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 1223 DATA masks<>+0x18(SB)/8, $0x0000000000000000 1224 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 1225 DATA masks<>+0x28(SB)/8, $0x0000000000000000 1226 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 1227 DATA masks<>+0x38(SB)/8, $0x0000000000000000 1228 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 1229 DATA masks<>+0x48(SB)/8, $0x0000000000000000 1230 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 1231 DATA masks<>+0x58(SB)/8, $0x0000000000000000 1232 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 1233 DATA masks<>+0x68(SB)/8, $0x0000000000000000 1234 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 1235 DATA masks<>+0x78(SB)/8, $0x0000000000000000 1236 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 1237 DATA masks<>+0x88(SB)/8, $0x0000000000000000 1238 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 1239 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 1240 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 1241 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 1242 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 1243 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 1244 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 1245 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 1246 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 1247 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 1248 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 1249 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 1250 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 1251 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 1252 GLOBL masks<>(SB),RODATA,$256 1253 1254 // these are arguments to pshufb. They move data down from 1255 // the high bytes of the register to the low bytes of the register. 1256 // index is how many bytes to move. 1257 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 1258 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 1259 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 1260 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 1261 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 1262 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 1263 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 1264 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 1265 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 1266 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 1267 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 1268 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 1269 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 1270 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 1271 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 1272 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 1273 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 1274 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 1275 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 1276 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 1277 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 1278 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 1279 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 1280 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 1281 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 1282 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 1283 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 1284 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 1285 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 1286 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 1287 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 1288 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 1289 GLOBL shifts<>(SB),RODATA,$256 1290 1291 TEXT runtime·memeq(SB),NOSPLIT,$0-25 1292 MOVQ a+0(FP), SI 1293 MOVQ b+8(FP), DI 1294 MOVQ size+16(FP), BX 1295 LEAQ ret+24(FP), AX 1296 JMP runtime·memeqbody(SB) 1297 1298 // memequal_varlen(a, b unsafe.Pointer) bool 1299 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17 1300 MOVQ a+0(FP), SI 1301 MOVQ b+8(FP), DI 1302 CMPQ SI, DI 1303 JEQ eq 1304 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure 1305 LEAQ ret+16(FP), AX 1306 JMP runtime·memeqbody(SB) 1307 eq: 1308 MOVB $1, ret+16(FP) 1309 RET 1310 1311 // eqstring tests whether two strings are equal. 1312 // The compiler guarantees that strings passed 1313 // to eqstring have equal length. 1314 // See runtime_test.go:eqstring_generic for 1315 // equivalent Go code. 1316 TEXT runtime·eqstring(SB),NOSPLIT,$0-33 1317 MOVQ s1str+0(FP), SI 1318 MOVQ s2str+16(FP), DI 1319 CMPQ SI, DI 1320 JEQ eq 1321 MOVQ s1len+8(FP), BX 1322 LEAQ v+32(FP), AX 1323 JMP runtime·memeqbody(SB) 1324 eq: 1325 MOVB $1, v+32(FP) 1326 RET 1327 1328 // a in SI 1329 // b in DI 1330 // count in BX 1331 // address of result byte in AX 1332 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1333 CMPQ BX, $8 1334 JB small 1335 1336 // 64 bytes at a time using xmm registers 1337 hugeloop: 1338 CMPQ BX, $64 1339 JB bigloop 1340 MOVOU (SI), X0 1341 MOVOU (DI), X1 1342 MOVOU 16(SI), X2 1343 MOVOU 16(DI), X3 1344 MOVOU 32(SI), X4 1345 MOVOU 32(DI), X5 1346 MOVOU 48(SI), X6 1347 MOVOU 48(DI), X7 1348 PCMPEQB X1, X0 1349 PCMPEQB X3, X2 1350 PCMPEQB X5, X4 1351 PCMPEQB X7, X6 1352 PAND X2, X0 1353 PAND X6, X4 1354 PAND X4, X0 1355 PMOVMSKB X0, DX 1356 ADDQ $64, SI 1357 ADDQ $64, DI 1358 SUBQ $64, BX 1359 CMPL DX, $0xffff 1360 JEQ hugeloop 1361 MOVB $0, (AX) 1362 RET 1363 1364 // 8 bytes at a time using 64-bit register 1365 bigloop: 1366 CMPQ BX, $8 1367 JBE leftover 1368 MOVQ (SI), CX 1369 MOVQ (DI), DX 1370 ADDQ $8, SI 1371 ADDQ $8, DI 1372 SUBQ $8, BX 1373 CMPQ CX, DX 1374 JEQ bigloop 1375 MOVB $0, (AX) 1376 RET 1377 1378 // remaining 0-8 bytes 1379 leftover: 1380 MOVQ -8(SI)(BX*1), CX 1381 MOVQ -8(DI)(BX*1), DX 1382 CMPQ CX, DX 1383 SETEQ (AX) 1384 RET 1385 1386 small: 1387 CMPQ BX, $0 1388 JEQ equal 1389 1390 LEAQ 0(BX*8), CX 1391 NEGQ CX 1392 1393 CMPB SI, $0xf8 1394 JA si_high 1395 1396 // load at SI won't cross a page boundary. 1397 MOVQ (SI), SI 1398 JMP si_finish 1399 si_high: 1400 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 1401 MOVQ -8(SI)(BX*1), SI 1402 SHRQ CX, SI 1403 si_finish: 1404 1405 // same for DI. 1406 CMPB DI, $0xf8 1407 JA di_high 1408 MOVQ (DI), DI 1409 JMP di_finish 1410 di_high: 1411 MOVQ -8(DI)(BX*1), DI 1412 SHRQ CX, DI 1413 di_finish: 1414 1415 SUBQ SI, DI 1416 SHLQ CX, DI 1417 equal: 1418 SETEQ (AX) 1419 RET 1420 1421 TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 1422 MOVQ s1_base+0(FP), SI 1423 MOVQ s1_len+8(FP), BX 1424 MOVQ s2_base+16(FP), DI 1425 MOVQ s2_len+24(FP), DX 1426 LEAQ ret+32(FP), R9 1427 JMP runtime·cmpbody(SB) 1428 1429 TEXT bytes·Compare(SB),NOSPLIT,$0-56 1430 MOVQ s1+0(FP), SI 1431 MOVQ s1+8(FP), BX 1432 MOVQ s2+24(FP), DI 1433 MOVQ s2+32(FP), DX 1434 LEAQ res+48(FP), R9 1435 JMP runtime·cmpbody(SB) 1436 1437 // input: 1438 // SI = a 1439 // DI = b 1440 // BX = alen 1441 // DX = blen 1442 // R9 = address of output word (stores -1/0/1 here) 1443 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1444 CMPQ SI, DI 1445 JEQ allsame 1446 CMPQ BX, DX 1447 MOVQ DX, R8 1448 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 1449 CMPQ R8, $8 1450 JB small 1451 1452 loop: 1453 CMPQ R8, $16 1454 JBE _0through16 1455 MOVOU (SI), X0 1456 MOVOU (DI), X1 1457 PCMPEQB X0, X1 1458 PMOVMSKB X1, AX 1459 XORQ $0xffff, AX // convert EQ to NE 1460 JNE diff16 // branch if at least one byte is not equal 1461 ADDQ $16, SI 1462 ADDQ $16, DI 1463 SUBQ $16, R8 1464 JMP loop 1465 1466 // AX = bit mask of differences 1467 diff16: 1468 BSFQ AX, BX // index of first byte that differs 1469 XORQ AX, AX 1470 MOVB (SI)(BX*1), CX 1471 CMPB CX, (DI)(BX*1) 1472 SETHI AX 1473 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 1474 MOVQ AX, (R9) 1475 RET 1476 1477 // 0 through 16 bytes left, alen>=8, blen>=8 1478 _0through16: 1479 CMPQ R8, $8 1480 JBE _0through8 1481 MOVQ (SI), AX 1482 MOVQ (DI), CX 1483 CMPQ AX, CX 1484 JNE diff8 1485 _0through8: 1486 MOVQ -8(SI)(R8*1), AX 1487 MOVQ -8(DI)(R8*1), CX 1488 CMPQ AX, CX 1489 JEQ allsame 1490 1491 // AX and CX contain parts of a and b that differ. 1492 diff8: 1493 BSWAPQ AX // reverse order of bytes 1494 BSWAPQ CX 1495 XORQ AX, CX 1496 BSRQ CX, CX // index of highest bit difference 1497 SHRQ CX, AX // move a's bit to bottom 1498 ANDQ $1, AX // mask bit 1499 LEAQ -1(AX*2), AX // 1/0 => +1/-1 1500 MOVQ AX, (R9) 1501 RET 1502 1503 // 0-7 bytes in common 1504 small: 1505 LEAQ (R8*8), CX // bytes left -> bits left 1506 NEGQ CX // - bits lift (== 64 - bits left mod 64) 1507 JEQ allsame 1508 1509 // load bytes of a into high bytes of AX 1510 CMPB SI, $0xf8 1511 JA si_high 1512 MOVQ (SI), SI 1513 JMP si_finish 1514 si_high: 1515 MOVQ -8(SI)(R8*1), SI 1516 SHRQ CX, SI 1517 si_finish: 1518 SHLQ CX, SI 1519 1520 // load bytes of b in to high bytes of BX 1521 CMPB DI, $0xf8 1522 JA di_high 1523 MOVQ (DI), DI 1524 JMP di_finish 1525 di_high: 1526 MOVQ -8(DI)(R8*1), DI 1527 SHRQ CX, DI 1528 di_finish: 1529 SHLQ CX, DI 1530 1531 BSWAPQ SI // reverse order of bytes 1532 BSWAPQ DI 1533 XORQ SI, DI // find bit differences 1534 JEQ allsame 1535 BSRQ DI, CX // index of highest bit difference 1536 SHRQ CX, SI // move a's bit to bottom 1537 ANDQ $1, SI // mask bit 1538 LEAQ -1(SI*2), AX // 1/0 => +1/-1 1539 MOVQ AX, (R9) 1540 RET 1541 1542 allsame: 1543 XORQ AX, AX 1544 XORQ CX, CX 1545 CMPQ BX, DX 1546 SETGT AX // 1 if alen > blen 1547 SETEQ CX // 1 if alen == blen 1548 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 1549 MOVQ AX, (R9) 1550 RET 1551 1552 TEXT bytes·IndexByte(SB),NOSPLIT,$0-40 1553 MOVQ s+0(FP), SI 1554 MOVQ s_len+8(FP), BX 1555 MOVB c+24(FP), AL 1556 LEAQ ret+32(FP), R8 1557 JMP runtime·indexbytebody(SB) 1558 1559 TEXT strings·IndexByte(SB),NOSPLIT,$0-32 1560 MOVQ s+0(FP), SI 1561 MOVQ s_len+8(FP), BX 1562 MOVB c+16(FP), AL 1563 LEAQ ret+24(FP), R8 1564 JMP runtime·indexbytebody(SB) 1565 1566 // input: 1567 // SI: data 1568 // BX: data len 1569 // AL: byte sought 1570 // R8: address to put result 1571 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 1572 MOVQ SI, DI 1573 1574 CMPQ BX, $16 1575 JLT small 1576 1577 // round up to first 16-byte boundary 1578 TESTQ $15, SI 1579 JZ aligned 1580 MOVQ SI, CX 1581 ANDQ $~15, CX 1582 ADDQ $16, CX 1583 1584 // search the beginning 1585 SUBQ SI, CX 1586 REPN; SCASB 1587 JZ success 1588 1589 // DI is 16-byte aligned; get ready to search using SSE instructions 1590 aligned: 1591 // round down to last 16-byte boundary 1592 MOVQ BX, R11 1593 ADDQ SI, R11 1594 ANDQ $~15, R11 1595 1596 // shuffle X0 around so that each byte contains c 1597 MOVD AX, X0 1598 PUNPCKLBW X0, X0 1599 PUNPCKLBW X0, X0 1600 PSHUFL $0, X0, X0 1601 JMP condition 1602 1603 sse: 1604 // move the next 16-byte chunk of the buffer into X1 1605 MOVO (DI), X1 1606 // compare bytes in X0 to X1 1607 PCMPEQB X0, X1 1608 // take the top bit of each byte in X1 and put the result in DX 1609 PMOVMSKB X1, DX 1610 TESTL DX, DX 1611 JNZ ssesuccess 1612 ADDQ $16, DI 1613 1614 condition: 1615 CMPQ DI, R11 1616 JLT sse 1617 1618 // search the end 1619 MOVQ SI, CX 1620 ADDQ BX, CX 1621 SUBQ R11, CX 1622 // if CX == 0, the zero flag will be set and we'll end up 1623 // returning a false success 1624 JZ failure 1625 REPN; SCASB 1626 JZ success 1627 1628 failure: 1629 MOVQ $-1, (R8) 1630 RET 1631 1632 // handle for lengths < 16 1633 small: 1634 MOVQ BX, CX 1635 REPN; SCASB 1636 JZ success 1637 MOVQ $-1, (R8) 1638 RET 1639 1640 // we've found the chunk containing the byte 1641 // now just figure out which specific byte it is 1642 ssesuccess: 1643 // get the index of the least significant set bit 1644 BSFW DX, DX 1645 SUBQ SI, DI 1646 ADDQ DI, DX 1647 MOVQ DX, (R8) 1648 RET 1649 1650 success: 1651 SUBQ SI, DI 1652 SUBL $1, DI 1653 MOVQ DI, (R8) 1654 RET 1655 1656 TEXT bytes·Equal(SB),NOSPLIT,$0-49 1657 MOVQ a_len+8(FP), BX 1658 MOVQ b_len+32(FP), CX 1659 CMPQ BX, CX 1660 JNE eqret 1661 MOVQ a+0(FP), SI 1662 MOVQ b+24(FP), DI 1663 LEAQ ret+48(FP), AX 1664 JMP runtime·memeqbody(SB) 1665 eqret: 1666 MOVB $0, ret+48(FP) 1667 RET 1668 1669 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1670 get_tls(CX) 1671 MOVQ g(CX), AX 1672 MOVQ g_m(AX), AX 1673 MOVL m_fastrand(AX), DX 1674 ADDL DX, DX 1675 MOVL DX, BX 1676 XORL $0x88888eef, DX 1677 CMOVLMI BX, DX 1678 MOVL DX, m_fastrand(AX) 1679 MOVL DX, ret+0(FP) 1680 RET 1681 1682 TEXT runtime·return0(SB), NOSPLIT, $0 1683 MOVL $0, AX 1684 RET 1685 1686 1687 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1688 // Must obey the gcc calling convention. 1689 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1690 get_tls(CX) 1691 MOVQ g(CX), AX 1692 MOVQ g_m(AX), AX 1693 MOVQ m_curg(AX), AX 1694 MOVQ (g_stack+stack_hi)(AX), AX 1695 RET 1696 1697 // The top-most function running on a goroutine 1698 // returns to goexit+PCQuantum. 1699 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1700 BYTE $0x90 // NOP 1701 CALL runtime·goexit1(SB) // does not return 1702 // traceback from goexit1 must hit code range of goexit 1703 BYTE $0x90 // NOP 1704 1705 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 1706 MOVQ addr+0(FP), AX 1707 PREFETCHT0 (AX) 1708 RET 1709 1710 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 1711 MOVQ addr+0(FP), AX 1712 PREFETCHT1 (AX) 1713 RET 1714 1715 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 1716 MOVQ addr+0(FP), AX 1717 PREFETCHT2 (AX) 1718 RET 1719 1720 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 1721 MOVQ addr+0(FP), AX 1722 PREFETCHNTA (AX) 1723 RET 1724 1725 // This is called from .init_array and follows the platform, not Go, ABI. 1726 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1727 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save 1728 MOVQ runtime·lastmoduledatap(SB), AX 1729 MOVQ DI, moduledata_next(AX) 1730 MOVQ DI, runtime·lastmoduledatap(SB) 1731 POPQ R15 1732 RET