github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVQ DI, AX // argc 13 MOVQ SI, BX // argv 14 SUBQ $(4*8+7), SP // 2args 2auto 15 ANDQ $~15, SP 16 MOVQ AX, 16(SP) 17 MOVQ BX, 24(SP) 18 19 // create istack out of the given (operating system) stack. 20 // _cgo_init may update stackguard. 21 MOVQ $runtime·g0(SB), DI 22 LEAQ (-64*1024+104)(SP), BX 23 MOVQ BX, g_stackguard0(DI) 24 MOVQ BX, g_stackguard1(DI) 25 MOVQ BX, (g_stack+stack_lo)(DI) 26 MOVQ SP, (g_stack+stack_hi)(DI) 27 28 // find out information about the processor we're on 29 MOVQ $0, AX 30 CPUID 31 CMPQ AX, $0 32 JE nocpuinfo 33 MOVQ $1, AX 34 CPUID 35 MOVL CX, runtime·cpuid_ecx(SB) 36 MOVL DX, runtime·cpuid_edx(SB) 37 nocpuinfo: 38 39 // if there is an _cgo_init, call it. 40 MOVQ _cgo_init(SB), AX 41 TESTQ AX, AX 42 JZ needtls 43 // g0 already in DI 44 MOVQ DI, CX // Win64 uses CX for first parameter 45 MOVQ $setg_gcc<>(SB), SI 46 CALL AX 47 48 // update stackguard after _cgo_init 49 MOVQ $runtime·g0(SB), CX 50 MOVQ (g_stack+stack_lo)(CX), AX 51 ADDQ $const__StackGuard, AX 52 MOVQ AX, g_stackguard0(CX) 53 MOVQ AX, g_stackguard1(CX) 54 55 CMPL runtime·iswindows(SB), $0 56 JEQ ok 57 needtls: 58 // skip TLS setup on Plan 9 59 CMPL runtime·isplan9(SB), $1 60 JEQ ok 61 // skip TLS setup on Solaris 62 CMPL runtime·issolaris(SB), $1 63 JEQ ok 64 65 LEAQ runtime·tls0(SB), DI 66 CALL runtime·settls(SB) 67 68 // store through it, to make sure it works 69 get_tls(BX) 70 MOVQ $0x123, g(BX) 71 MOVQ runtime·tls0(SB), AX 72 CMPQ AX, $0x123 73 JEQ 2(PC) 74 MOVL AX, 0 // abort 75 ok: 76 // set the per-goroutine and per-mach "registers" 77 get_tls(BX) 78 LEAQ runtime·g0(SB), CX 79 MOVQ CX, g(BX) 80 LEAQ runtime·m0(SB), AX 81 82 // save m->g0 = g0 83 MOVQ CX, m_g0(AX) 84 // save m0 to g0->m 85 MOVQ AX, g_m(CX) 86 87 CLD // convention is D is always left cleared 88 CALL runtime·check(SB) 89 90 MOVL 16(SP), AX // copy argc 91 MOVL AX, 0(SP) 92 MOVQ 24(SP), AX // copy argv 93 MOVQ AX, 8(SP) 94 CALL runtime·args(SB) 95 CALL runtime·osinit(SB) 96 CALL runtime·schedinit(SB) 97 98 // create a new goroutine to start program 99 MOVQ $runtime·main·f(SB), BP // entry 100 PUSHQ BP 101 PUSHQ $0 // arg size 102 CALL runtime·newproc(SB) 103 POPQ AX 104 POPQ AX 105 106 // start this M 107 CALL runtime·mstart(SB) 108 109 MOVL $0xf1, 0xf1 // crash 110 RET 111 112 DATA runtime·main·f+0(SB)/8,$runtime·main(SB) 113 GLOBL runtime·main·f(SB),RODATA,$8 114 115 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 116 BYTE $0xcc 117 RET 118 119 TEXT runtime·asminit(SB),NOSPLIT,$0-0 120 // No per-thread init. 121 RET 122 123 /* 124 * go-routine 125 */ 126 127 // void gosave(Gobuf*) 128 // save state in Gobuf; setjmp 129 TEXT runtime·gosave(SB), NOSPLIT, $0-8 130 MOVQ buf+0(FP), AX // gobuf 131 LEAQ buf+0(FP), BX // caller's SP 132 MOVQ BX, gobuf_sp(AX) 133 MOVQ 0(SP), BX // caller's PC 134 MOVQ BX, gobuf_pc(AX) 135 MOVQ $0, gobuf_ret(AX) 136 MOVQ $0, gobuf_ctxt(AX) 137 get_tls(CX) 138 MOVQ g(CX), BX 139 MOVQ BX, gobuf_g(AX) 140 RET 141 142 // void gogo(Gobuf*) 143 // restore state from Gobuf; longjmp 144 TEXT runtime·gogo(SB), NOSPLIT, $0-8 145 MOVQ buf+0(FP), BX // gobuf 146 MOVQ gobuf_g(BX), DX 147 MOVQ 0(DX), CX // make sure g != nil 148 get_tls(CX) 149 MOVQ DX, g(CX) 150 MOVQ gobuf_sp(BX), SP // restore SP 151 MOVQ gobuf_ret(BX), AX 152 MOVQ gobuf_ctxt(BX), DX 153 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 154 MOVQ $0, gobuf_ret(BX) 155 MOVQ $0, gobuf_ctxt(BX) 156 MOVQ gobuf_pc(BX), BX 157 JMP BX 158 159 // func mcall(fn func(*g)) 160 // Switch to m->g0's stack, call fn(g). 161 // Fn must never return. It should gogo(&g->sched) 162 // to keep running g. 163 TEXT runtime·mcall(SB), NOSPLIT, $0-8 164 MOVQ fn+0(FP), DI 165 166 get_tls(CX) 167 MOVQ g(CX), AX // save state in g->sched 168 MOVQ 0(SP), BX // caller's PC 169 MOVQ BX, (g_sched+gobuf_pc)(AX) 170 LEAQ fn+0(FP), BX // caller's SP 171 MOVQ BX, (g_sched+gobuf_sp)(AX) 172 MOVQ AX, (g_sched+gobuf_g)(AX) 173 174 // switch to m->g0 & its stack, call fn 175 MOVQ g(CX), BX 176 MOVQ g_m(BX), BX 177 MOVQ m_g0(BX), SI 178 CMPQ SI, AX // if g == m->g0 call badmcall 179 JNE 3(PC) 180 MOVQ $runtime·badmcall(SB), AX 181 JMP AX 182 MOVQ SI, g(CX) // g = m->g0 183 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 184 PUSHQ AX 185 MOVQ DI, DX 186 MOVQ 0(DI), DI 187 CALL DI 188 POPQ AX 189 MOVQ $runtime·badmcall2(SB), AX 190 JMP AX 191 RET 192 193 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 194 // of the G stack. We need to distinguish the routine that 195 // lives at the bottom of the G stack from the one that lives 196 // at the top of the system stack because the one at the top of 197 // the system stack terminates the stack walk (see topofstack()). 198 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 199 RET 200 201 // func systemstack(fn func()) 202 TEXT runtime·systemstack(SB), NOSPLIT, $0-8 203 MOVQ fn+0(FP), DI // DI = fn 204 get_tls(CX) 205 MOVQ g(CX), AX // AX = g 206 MOVQ g_m(AX), BX // BX = m 207 208 MOVQ m_gsignal(BX), DX // DX = gsignal 209 CMPQ AX, DX 210 JEQ noswitch 211 212 MOVQ m_g0(BX), DX // DX = g0 213 CMPQ AX, DX 214 JEQ noswitch 215 216 MOVQ m_curg(BX), BP 217 CMPQ AX, BP 218 JEQ switch 219 220 // Bad: g is not gsignal, not g0, not curg. What is it? 221 MOVQ $runtime·badsystemstack(SB), AX 222 CALL AX 223 224 switch: 225 // save our state in g->sched. Pretend to 226 // be systemstack_switch if the G stack is scanned. 227 MOVQ $runtime·systemstack_switch(SB), BP 228 MOVQ BP, (g_sched+gobuf_pc)(AX) 229 MOVQ SP, (g_sched+gobuf_sp)(AX) 230 MOVQ AX, (g_sched+gobuf_g)(AX) 231 232 // switch to g0 233 MOVQ DX, g(CX) 234 MOVQ (g_sched+gobuf_sp)(DX), BX 235 // make it look like mstart called systemstack on g0, to stop traceback 236 SUBQ $8, BX 237 MOVQ $runtime·mstart(SB), DX 238 MOVQ DX, 0(BX) 239 MOVQ BX, SP 240 241 // call target function 242 MOVQ DI, DX 243 MOVQ 0(DI), DI 244 CALL DI 245 246 // switch back to g 247 get_tls(CX) 248 MOVQ g(CX), AX 249 MOVQ g_m(AX), BX 250 MOVQ m_curg(BX), AX 251 MOVQ AX, g(CX) 252 MOVQ (g_sched+gobuf_sp)(AX), SP 253 MOVQ $0, (g_sched+gobuf_sp)(AX) 254 RET 255 256 noswitch: 257 // already on m stack, just call directly 258 MOVQ DI, DX 259 MOVQ 0(DI), DI 260 CALL DI 261 RET 262 263 /* 264 * support for morestack 265 */ 266 267 // Called during function prolog when more stack is needed. 268 // 269 // The traceback routines see morestack on a g0 as being 270 // the top of a stack (for example, morestack calling newstack 271 // calling the scheduler calling newm calling gc), so we must 272 // record an argument size. For that purpose, it has no arguments. 273 TEXT runtime·morestack(SB),NOSPLIT,$0-0 274 // Cannot grow scheduler stack (m->g0). 275 get_tls(CX) 276 MOVQ g(CX), BX 277 MOVQ g_m(BX), BX 278 MOVQ m_g0(BX), SI 279 CMPQ g(CX), SI 280 JNE 2(PC) 281 INT $3 282 283 // Cannot grow signal stack (m->gsignal). 284 MOVQ m_gsignal(BX), SI 285 CMPQ g(CX), SI 286 JNE 2(PC) 287 INT $3 288 289 // Called from f. 290 // Set m->morebuf to f's caller. 291 MOVQ 8(SP), AX // f's caller's PC 292 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 293 LEAQ 16(SP), AX // f's caller's SP 294 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 295 get_tls(CX) 296 MOVQ g(CX), SI 297 MOVQ SI, (m_morebuf+gobuf_g)(BX) 298 299 // Set g->sched to context in f. 300 MOVQ 0(SP), AX // f's PC 301 MOVQ AX, (g_sched+gobuf_pc)(SI) 302 MOVQ SI, (g_sched+gobuf_g)(SI) 303 LEAQ 8(SP), AX // f's SP 304 MOVQ AX, (g_sched+gobuf_sp)(SI) 305 MOVQ DX, (g_sched+gobuf_ctxt)(SI) 306 307 // Call newstack on m->g0's stack. 308 MOVQ m_g0(BX), BP 309 MOVQ BP, g(CX) 310 MOVQ (g_sched+gobuf_sp)(BP), SP 311 CALL runtime·newstack(SB) 312 MOVQ $0, 0x1003 // crash if newstack returns 313 RET 314 315 // morestack but not preserving ctxt. 316 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 317 MOVL $0, DX 318 JMP runtime·morestack(SB) 319 320 // reflectcall: call a function with the given argument list 321 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 322 // we don't have variable-sized frames, so we use a small number 323 // of constant-sized-frame functions to encode a few bits of size in the pc. 324 // Caution: ugly multiline assembly macros in your future! 325 326 #define DISPATCH(NAME,MAXSIZE) \ 327 CMPQ CX, $MAXSIZE; \ 328 JA 3(PC); \ 329 MOVQ $NAME(SB), AX; \ 330 JMP AX 331 // Note: can't just "JMP NAME(SB)" - bad inlining results. 332 333 TEXT reflect·call(SB), NOSPLIT, $0-0 334 JMP ·reflectcall(SB) 335 336 TEXT ·reflectcall(SB), NOSPLIT, $0-32 337 MOVLQZX argsize+24(FP), CX 338 // NOTE(rsc): No call16, because CALLFN needs four words 339 // of argument space to invoke callwritebarrier. 340 DISPATCH(runtime·call32, 32) 341 DISPATCH(runtime·call64, 64) 342 DISPATCH(runtime·call128, 128) 343 DISPATCH(runtime·call256, 256) 344 DISPATCH(runtime·call512, 512) 345 DISPATCH(runtime·call1024, 1024) 346 DISPATCH(runtime·call2048, 2048) 347 DISPATCH(runtime·call4096, 4096) 348 DISPATCH(runtime·call8192, 8192) 349 DISPATCH(runtime·call16384, 16384) 350 DISPATCH(runtime·call32768, 32768) 351 DISPATCH(runtime·call65536, 65536) 352 DISPATCH(runtime·call131072, 131072) 353 DISPATCH(runtime·call262144, 262144) 354 DISPATCH(runtime·call524288, 524288) 355 DISPATCH(runtime·call1048576, 1048576) 356 DISPATCH(runtime·call2097152, 2097152) 357 DISPATCH(runtime·call4194304, 4194304) 358 DISPATCH(runtime·call8388608, 8388608) 359 DISPATCH(runtime·call16777216, 16777216) 360 DISPATCH(runtime·call33554432, 33554432) 361 DISPATCH(runtime·call67108864, 67108864) 362 DISPATCH(runtime·call134217728, 134217728) 363 DISPATCH(runtime·call268435456, 268435456) 364 DISPATCH(runtime·call536870912, 536870912) 365 DISPATCH(runtime·call1073741824, 1073741824) 366 MOVQ $runtime·badreflectcall(SB), AX 367 JMP AX 368 369 #define CALLFN(NAME,MAXSIZE) \ 370 TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \ 371 NO_LOCAL_POINTERS; \ 372 /* copy arguments to stack */ \ 373 MOVQ argptr+16(FP), SI; \ 374 MOVLQZX argsize+24(FP), CX; \ 375 MOVQ SP, DI; \ 376 REP;MOVSB; \ 377 /* call function */ \ 378 MOVQ f+8(FP), DX; \ 379 PCDATA $PCDATA_StackMapIndex, $0; \ 380 CALL (DX); \ 381 /* copy return values back */ \ 382 MOVQ argptr+16(FP), DI; \ 383 MOVLQZX argsize+24(FP), CX; \ 384 MOVLQZX retoffset+28(FP), BX; \ 385 MOVQ SP, SI; \ 386 ADDQ BX, DI; \ 387 ADDQ BX, SI; \ 388 SUBQ BX, CX; \ 389 REP;MOVSB; \ 390 /* execute write barrier updates */ \ 391 MOVQ argtype+0(FP), DX; \ 392 MOVQ argptr+16(FP), DI; \ 393 MOVLQZX argsize+24(FP), CX; \ 394 MOVLQZX retoffset+28(FP), BX; \ 395 MOVQ DX, 0(SP); \ 396 MOVQ DI, 8(SP); \ 397 MOVQ CX, 16(SP); \ 398 MOVQ BX, 24(SP); \ 399 CALL runtime·callwritebarrier(SB); \ 400 RET 401 402 CALLFN(·call32, 32) 403 CALLFN(·call64, 64) 404 CALLFN(·call128, 128) 405 CALLFN(·call256, 256) 406 CALLFN(·call512, 512) 407 CALLFN(·call1024, 1024) 408 CALLFN(·call2048, 2048) 409 CALLFN(·call4096, 4096) 410 CALLFN(·call8192, 8192) 411 CALLFN(·call16384, 16384) 412 CALLFN(·call32768, 32768) 413 CALLFN(·call65536, 65536) 414 CALLFN(·call131072, 131072) 415 CALLFN(·call262144, 262144) 416 CALLFN(·call524288, 524288) 417 CALLFN(·call1048576, 1048576) 418 CALLFN(·call2097152, 2097152) 419 CALLFN(·call4194304, 4194304) 420 CALLFN(·call8388608, 8388608) 421 CALLFN(·call16777216, 16777216) 422 CALLFN(·call33554432, 33554432) 423 CALLFN(·call67108864, 67108864) 424 CALLFN(·call134217728, 134217728) 425 CALLFN(·call268435456, 268435456) 426 CALLFN(·call536870912, 536870912) 427 CALLFN(·call1073741824, 1073741824) 428 429 // bool cas(int32 *val, int32 old, int32 new) 430 // Atomically: 431 // if(*val == old){ 432 // *val = new; 433 // return 1; 434 // } else 435 // return 0; 436 TEXT runtime·cas(SB), NOSPLIT, $0-17 437 MOVQ ptr+0(FP), BX 438 MOVL old+8(FP), AX 439 MOVL new+12(FP), CX 440 LOCK 441 CMPXCHGL CX, 0(BX) 442 SETEQ ret+16(FP) 443 RET 444 445 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 446 // Atomically: 447 // if(*val == *old){ 448 // *val = new; 449 // return 1; 450 // } else { 451 // return 0; 452 // } 453 TEXT runtime·cas64(SB), NOSPLIT, $0-25 454 MOVQ ptr+0(FP), BX 455 MOVQ old+8(FP), AX 456 MOVQ new+16(FP), CX 457 LOCK 458 CMPXCHGQ CX, 0(BX) 459 SETEQ ret+24(FP) 460 RET 461 462 TEXT runtime·casuintptr(SB), NOSPLIT, $0-25 463 JMP runtime·cas64(SB) 464 465 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-16 466 JMP runtime·atomicload64(SB) 467 468 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-16 469 JMP runtime·atomicload64(SB) 470 471 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16 472 JMP runtime·atomicstore64(SB) 473 474 // bool casp(void **val, void *old, void *new) 475 // Atomically: 476 // if(*val == old){ 477 // *val = new; 478 // return 1; 479 // } else 480 // return 0; 481 TEXT runtime·casp1(SB), NOSPLIT, $0-25 482 MOVQ ptr+0(FP), BX 483 MOVQ old+8(FP), AX 484 MOVQ new+16(FP), CX 485 LOCK 486 CMPXCHGQ CX, 0(BX) 487 SETEQ ret+24(FP) 488 RET 489 490 // uint32 xadd(uint32 volatile *val, int32 delta) 491 // Atomically: 492 // *val += delta; 493 // return *val; 494 TEXT runtime·xadd(SB), NOSPLIT, $0-20 495 MOVQ ptr+0(FP), BX 496 MOVL delta+8(FP), AX 497 MOVL AX, CX 498 LOCK 499 XADDL AX, 0(BX) 500 ADDL CX, AX 501 MOVL AX, ret+16(FP) 502 RET 503 504 TEXT runtime·xadd64(SB), NOSPLIT, $0-24 505 MOVQ ptr+0(FP), BX 506 MOVQ delta+8(FP), AX 507 MOVQ AX, CX 508 LOCK 509 XADDQ AX, 0(BX) 510 ADDQ CX, AX 511 MOVQ AX, ret+16(FP) 512 RET 513 514 TEXT runtime·xchg(SB), NOSPLIT, $0-20 515 MOVQ ptr+0(FP), BX 516 MOVL new+8(FP), AX 517 XCHGL AX, 0(BX) 518 MOVL AX, ret+16(FP) 519 RET 520 521 TEXT runtime·xchg64(SB), NOSPLIT, $0-24 522 MOVQ ptr+0(FP), BX 523 MOVQ new+8(FP), AX 524 XCHGQ AX, 0(BX) 525 MOVQ AX, ret+16(FP) 526 RET 527 528 TEXT runtime·xchgp1(SB), NOSPLIT, $0-24 529 MOVQ ptr+0(FP), BX 530 MOVQ new+8(FP), AX 531 XCHGQ AX, 0(BX) 532 MOVQ AX, ret+16(FP) 533 RET 534 535 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24 536 JMP runtime·xchg64(SB) 537 538 TEXT runtime·procyield(SB),NOSPLIT,$0-0 539 MOVL cycles+0(FP), AX 540 again: 541 PAUSE 542 SUBL $1, AX 543 JNZ again 544 RET 545 546 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-16 547 MOVQ ptr+0(FP), BX 548 MOVQ val+8(FP), AX 549 XCHGQ AX, 0(BX) 550 RET 551 552 TEXT runtime·atomicstore(SB), NOSPLIT, $0-12 553 MOVQ ptr+0(FP), BX 554 MOVL val+8(FP), AX 555 XCHGL AX, 0(BX) 556 RET 557 558 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16 559 MOVQ ptr+0(FP), BX 560 MOVQ val+8(FP), AX 561 XCHGQ AX, 0(BX) 562 RET 563 564 // void runtime·atomicor8(byte volatile*, byte); 565 TEXT runtime·atomicor8(SB), NOSPLIT, $0-9 566 MOVQ ptr+0(FP), AX 567 MOVB val+8(FP), BX 568 LOCK 569 ORB BX, (AX) 570 RET 571 572 // void jmpdefer(fn, sp); 573 // called from deferreturn. 574 // 1. pop the caller 575 // 2. sub 5 bytes from the callers return 576 // 3. jmp to the argument 577 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 578 MOVQ fv+0(FP), DX // fn 579 MOVQ argp+8(FP), BX // caller sp 580 LEAQ -8(BX), SP // caller sp after CALL 581 SUBQ $5, (SP) // return to CALL again 582 MOVQ 0(DX), BX 583 JMP BX // but first run the deferred function 584 585 // Save state of caller into g->sched. Smashes R8, R9. 586 TEXT gosave<>(SB),NOSPLIT,$0 587 get_tls(R8) 588 MOVQ g(R8), R8 589 MOVQ 0(SP), R9 590 MOVQ R9, (g_sched+gobuf_pc)(R8) 591 LEAQ 8(SP), R9 592 MOVQ R9, (g_sched+gobuf_sp)(R8) 593 MOVQ $0, (g_sched+gobuf_ret)(R8) 594 MOVQ $0, (g_sched+gobuf_ctxt)(R8) 595 RET 596 597 // asmcgocall(void(*fn)(void*), void *arg) 598 // Call fn(arg) on the scheduler stack, 599 // aligned appropriately for the gcc ABI. 600 // See cgocall.c for more details. 601 TEXT ·asmcgocall(SB),NOSPLIT,$0-16 602 MOVQ fn+0(FP), AX 603 MOVQ arg+8(FP), BX 604 CALL asmcgocall<>(SB) 605 RET 606 607 TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-20 608 MOVQ fn+0(FP), AX 609 MOVQ arg+8(FP), BX 610 CALL asmcgocall<>(SB) 611 MOVL AX, ret+16(FP) 612 RET 613 614 // asmcgocall common code. fn in AX, arg in BX. returns errno in AX. 615 TEXT asmcgocall<>(SB),NOSPLIT,$0-0 616 MOVQ SP, DX 617 618 // Figure out if we need to switch to m->g0 stack. 619 // We get called to create new OS threads too, and those 620 // come in on the m->g0 stack already. 621 get_tls(CX) 622 MOVQ g(CX), BP 623 MOVQ g_m(BP), BP 624 MOVQ m_g0(BP), SI 625 MOVQ g(CX), DI 626 CMPQ SI, DI 627 JEQ nosave 628 MOVQ m_gsignal(BP), SI 629 CMPQ SI, DI 630 JEQ nosave 631 632 MOVQ m_g0(BP), SI 633 CALL gosave<>(SB) 634 MOVQ SI, g(CX) 635 MOVQ (g_sched+gobuf_sp)(SI), SP 636 nosave: 637 638 // Now on a scheduling stack (a pthread-created stack). 639 // Make sure we have enough room for 4 stack-backed fast-call 640 // registers as per windows amd64 calling convention. 641 SUBQ $64, SP 642 ANDQ $~15, SP // alignment for gcc ABI 643 MOVQ DI, 48(SP) // save g 644 MOVQ (g_stack+stack_hi)(DI), DI 645 SUBQ DX, DI 646 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 647 MOVQ BX, DI // DI = first argument in AMD64 ABI 648 MOVQ BX, CX // CX = first argument in Win64 649 CALL AX 650 651 // Restore registers, g, stack pointer. 652 get_tls(CX) 653 MOVQ 48(SP), DI 654 MOVQ (g_stack+stack_hi)(DI), SI 655 SUBQ 40(SP), SI 656 MOVQ DI, g(CX) 657 MOVQ SI, SP 658 RET 659 660 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 661 // Turn the fn into a Go func (by taking its address) and call 662 // cgocallback_gofunc. 663 TEXT runtime·cgocallback(SB),NOSPLIT,$24-24 664 LEAQ fn+0(FP), AX 665 MOVQ AX, 0(SP) 666 MOVQ frame+8(FP), AX 667 MOVQ AX, 8(SP) 668 MOVQ framesize+16(FP), AX 669 MOVQ AX, 16(SP) 670 MOVQ $runtime·cgocallback_gofunc(SB), AX 671 CALL AX 672 RET 673 674 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 675 // See cgocall.c for more details. 676 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24 677 NO_LOCAL_POINTERS 678 679 // If g is nil, Go did not create the current thread. 680 // Call needm to obtain one m for temporary use. 681 // In this case, we're running on the thread stack, so there's 682 // lots of space, but the linker doesn't know. Hide the call from 683 // the linker analysis by using an indirect call through AX. 684 get_tls(CX) 685 #ifdef GOOS_windows 686 MOVL $0, BP 687 CMPQ CX, $0 688 JEQ 2(PC) 689 #endif 690 MOVQ g(CX), BP 691 CMPQ BP, $0 692 JEQ needm 693 MOVQ g_m(BP), BP 694 MOVQ BP, R8 // holds oldm until end of function 695 JMP havem 696 needm: 697 MOVQ $0, 0(SP) 698 MOVQ $runtime·needm(SB), AX 699 CALL AX 700 MOVQ 0(SP), R8 701 get_tls(CX) 702 MOVQ g(CX), BP 703 MOVQ g_m(BP), BP 704 705 // Set m->sched.sp = SP, so that if a panic happens 706 // during the function we are about to execute, it will 707 // have a valid SP to run on the g0 stack. 708 // The next few lines (after the havem label) 709 // will save this SP onto the stack and then write 710 // the same SP back to m->sched.sp. That seems redundant, 711 // but if an unrecovered panic happens, unwindm will 712 // restore the g->sched.sp from the stack location 713 // and then systemstack will try to use it. If we don't set it here, 714 // that restored SP will be uninitialized (typically 0) and 715 // will not be usable. 716 MOVQ m_g0(BP), SI 717 MOVQ SP, (g_sched+gobuf_sp)(SI) 718 719 havem: 720 // Now there's a valid m, and we're running on its m->g0. 721 // Save current m->g0->sched.sp on stack and then set it to SP. 722 // Save current sp in m->g0->sched.sp in preparation for 723 // switch back to m->curg stack. 724 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 725 MOVQ m_g0(BP), SI 726 MOVQ (g_sched+gobuf_sp)(SI), AX 727 MOVQ AX, 0(SP) 728 MOVQ SP, (g_sched+gobuf_sp)(SI) 729 730 // Switch to m->curg stack and call runtime.cgocallbackg. 731 // Because we are taking over the execution of m->curg 732 // but *not* resuming what had been running, we need to 733 // save that information (m->curg->sched) so we can restore it. 734 // We can restore m->curg->sched.sp easily, because calling 735 // runtime.cgocallbackg leaves SP unchanged upon return. 736 // To save m->curg->sched.pc, we push it onto the stack. 737 // This has the added benefit that it looks to the traceback 738 // routine like cgocallbackg is going to return to that 739 // PC (because the frame we allocate below has the same 740 // size as cgocallback_gofunc's frame declared above) 741 // so that the traceback will seamlessly trace back into 742 // the earlier calls. 743 // 744 // In the new goroutine, 0(SP) holds the saved R8. 745 MOVQ m_curg(BP), SI 746 MOVQ SI, g(CX) 747 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 748 MOVQ (g_sched+gobuf_pc)(SI), BP 749 MOVQ BP, -8(DI) 750 LEAQ -(8+8)(DI), SP 751 MOVQ R8, 0(SP) 752 CALL runtime·cgocallbackg(SB) 753 MOVQ 0(SP), R8 754 755 // Restore g->sched (== m->curg->sched) from saved values. 756 get_tls(CX) 757 MOVQ g(CX), SI 758 MOVQ 8(SP), BP 759 MOVQ BP, (g_sched+gobuf_pc)(SI) 760 LEAQ (8+8)(SP), DI 761 MOVQ DI, (g_sched+gobuf_sp)(SI) 762 763 // Switch back to m->g0's stack and restore m->g0->sched.sp. 764 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 765 // so we do not have to restore it.) 766 MOVQ g(CX), BP 767 MOVQ g_m(BP), BP 768 MOVQ m_g0(BP), SI 769 MOVQ SI, g(CX) 770 MOVQ (g_sched+gobuf_sp)(SI), SP 771 MOVQ 0(SP), AX 772 MOVQ AX, (g_sched+gobuf_sp)(SI) 773 774 // If the m on entry was nil, we called needm above to borrow an m 775 // for the duration of the call. Since the call is over, return it with dropm. 776 CMPQ R8, $0 777 JNE 3(PC) 778 MOVQ $runtime·dropm(SB), AX 779 CALL AX 780 781 // Done! 782 RET 783 784 // void setg(G*); set g. for use by needm. 785 TEXT runtime·setg(SB), NOSPLIT, $0-8 786 MOVQ gg+0(FP), BX 787 #ifdef GOOS_windows 788 CMPQ BX, $0 789 JNE settls 790 MOVQ $0, 0x28(GS) 791 RET 792 settls: 793 MOVQ g_m(BX), AX 794 LEAQ m_tls(AX), AX 795 MOVQ AX, 0x28(GS) 796 #endif 797 get_tls(CX) 798 MOVQ BX, g(CX) 799 RET 800 801 // void setg_gcc(G*); set g called from gcc. 802 TEXT setg_gcc<>(SB),NOSPLIT,$0 803 get_tls(AX) 804 MOVQ DI, g(AX) 805 RET 806 807 // check that SP is in range [g->stack.lo, g->stack.hi) 808 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 809 get_tls(CX) 810 MOVQ g(CX), AX 811 CMPQ (g_stack+stack_hi)(AX), SP 812 JHI 2(PC) 813 INT $3 814 CMPQ SP, (g_stack+stack_lo)(AX) 815 JHI 2(PC) 816 INT $3 817 RET 818 819 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-16 820 MOVQ argp+0(FP),AX // addr of first arg 821 MOVQ -8(AX),AX // get calling pc 822 MOVQ AX, ret+8(FP) 823 RET 824 825 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-16 826 MOVQ p+0(FP),AX // addr of first arg 827 MOVQ -8(AX),AX // get calling pc 828 MOVQ AX,ret+8(FP) 829 RET 830 831 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16 832 MOVQ argp+0(FP),AX // addr of first arg 833 MOVQ pc+8(FP), BX 834 MOVQ BX, -8(AX) // set calling pc 835 RET 836 837 TEXT runtime·getcallersp(SB),NOSPLIT,$0-16 838 MOVQ argp+0(FP), AX 839 MOVQ AX, ret+8(FP) 840 RET 841 842 // func gogetcallersp(p unsafe.Pointer) uintptr 843 TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-16 844 MOVQ p+0(FP),AX // addr of first arg 845 MOVQ AX, ret+8(FP) 846 RET 847 848 // int64 runtime·cputicks(void) 849 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 850 RDTSC 851 SHLQ $32, DX 852 ADDQ DX, AX 853 MOVQ AX, ret+0(FP) 854 RET 855 856 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 857 // redirects to memhash(p, h, size) using the size 858 // stored in the closure. 859 TEXT runtime·memhash_varlen(SB),NOSPLIT,$32-24 860 GO_ARGS 861 NO_LOCAL_POINTERS 862 MOVQ p+0(FP), AX 863 MOVQ h+8(FP), BX 864 MOVQ 8(DX), CX 865 MOVQ AX, 0(SP) 866 MOVQ BX, 8(SP) 867 MOVQ CX, 16(SP) 868 CALL runtime·memhash(SB) 869 MOVQ 24(SP), AX 870 MOVQ AX, ret+16(FP) 871 RET 872 873 // hash function using AES hardware instructions 874 TEXT runtime·aeshash(SB),NOSPLIT,$0-32 875 MOVQ p+0(FP), AX // ptr to data 876 MOVQ s+16(FP), CX // size 877 LEAQ ret+24(FP), DX 878 JMP runtime·aeshashbody(SB) 879 880 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24 881 MOVQ p+0(FP), AX // ptr to string struct 882 MOVQ 8(AX), CX // length of string 883 MOVQ (AX), AX // string data 884 LEAQ ret+16(FP), DX 885 JMP runtime·aeshashbody(SB) 886 887 // AX: data 888 // CX: length 889 // DX: address to put return value 890 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 891 MOVQ h+8(FP), X6 // seed to low 64 bits of xmm6 892 PINSRQ $1, CX, X6 // size to high 64 bits of xmm6 893 PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times 894 MOVO runtime·aeskeysched(SB), X7 895 CMPQ CX, $16 896 JB aes0to15 897 JE aes16 898 CMPQ CX, $32 899 JBE aes17to32 900 CMPQ CX, $64 901 JBE aes33to64 902 CMPQ CX, $128 903 JBE aes65to128 904 JMP aes129plus 905 906 aes0to15: 907 TESTQ CX, CX 908 JE aes0 909 910 ADDQ $16, AX 911 TESTW $0xff0, AX 912 JE endofpage 913 914 // 16 bytes loaded at this address won't cross 915 // a page boundary, so we can load it directly. 916 MOVOU -16(AX), X0 917 ADDQ CX, CX 918 MOVQ $masks<>(SB), BP 919 PAND (BP)(CX*8), X0 920 921 // scramble 3 times 922 AESENC X6, X0 923 AESENC X7, X0 924 AESENC X7, X0 925 MOVQ X0, (DX) 926 RET 927 928 endofpage: 929 // address ends in 1111xxxx. Might be up against 930 // a page boundary, so load ending at last byte. 931 // Then shift bytes down using pshufb. 932 MOVOU -32(AX)(CX*1), X0 933 ADDQ CX, CX 934 MOVQ $shifts<>(SB), BP 935 PSHUFB (BP)(CX*8), X0 936 AESENC X6, X0 937 AESENC X7, X0 938 AESENC X7, X0 939 MOVQ X0, (DX) 940 RET 941 942 aes0: 943 // return input seed 944 MOVQ h+8(FP), AX 945 MOVQ AX, (DX) 946 RET 947 948 aes16: 949 MOVOU (AX), X0 950 AESENC X6, X0 951 AESENC X7, X0 952 AESENC X7, X0 953 MOVQ X0, (DX) 954 RET 955 956 aes17to32: 957 // load data to be hashed 958 MOVOU (AX), X0 959 MOVOU -16(AX)(CX*1), X1 960 961 // scramble 3 times 962 AESENC X6, X0 963 AESENC runtime·aeskeysched+16(SB), X1 964 AESENC X7, X0 965 AESENC X7, X1 966 AESENC X7, X0 967 AESENC X7, X1 968 969 // combine results 970 PXOR X1, X0 971 MOVQ X0, (DX) 972 RET 973 974 aes33to64: 975 MOVOU (AX), X0 976 MOVOU 16(AX), X1 977 MOVOU -32(AX)(CX*1), X2 978 MOVOU -16(AX)(CX*1), X3 979 980 AESENC X6, X0 981 AESENC runtime·aeskeysched+16(SB), X1 982 AESENC runtime·aeskeysched+32(SB), X2 983 AESENC runtime·aeskeysched+48(SB), X3 984 AESENC X7, X0 985 AESENC X7, X1 986 AESENC X7, X2 987 AESENC X7, X3 988 AESENC X7, X0 989 AESENC X7, X1 990 AESENC X7, X2 991 AESENC X7, X3 992 993 PXOR X2, X0 994 PXOR X3, X1 995 PXOR X1, X0 996 MOVQ X0, (DX) 997 RET 998 999 aes65to128: 1000 MOVOU (AX), X0 1001 MOVOU 16(AX), X1 1002 MOVOU 32(AX), X2 1003 MOVOU 48(AX), X3 1004 MOVOU -64(AX)(CX*1), X4 1005 MOVOU -48(AX)(CX*1), X5 1006 MOVOU -32(AX)(CX*1), X8 1007 MOVOU -16(AX)(CX*1), X9 1008 1009 AESENC X6, X0 1010 AESENC runtime·aeskeysched+16(SB), X1 1011 AESENC runtime·aeskeysched+32(SB), X2 1012 AESENC runtime·aeskeysched+48(SB), X3 1013 AESENC runtime·aeskeysched+64(SB), X4 1014 AESENC runtime·aeskeysched+80(SB), X5 1015 AESENC runtime·aeskeysched+96(SB), X8 1016 AESENC runtime·aeskeysched+112(SB), X9 1017 AESENC X7, X0 1018 AESENC X7, X1 1019 AESENC X7, X2 1020 AESENC X7, X3 1021 AESENC X7, X4 1022 AESENC X7, X5 1023 AESENC X7, X8 1024 AESENC X7, X9 1025 AESENC X7, X0 1026 AESENC X7, X1 1027 AESENC X7, X2 1028 AESENC X7, X3 1029 AESENC X7, X4 1030 AESENC X7, X5 1031 AESENC X7, X8 1032 AESENC X7, X9 1033 1034 PXOR X4, X0 1035 PXOR X5, X1 1036 PXOR X8, X2 1037 PXOR X9, X3 1038 PXOR X2, X0 1039 PXOR X3, X1 1040 PXOR X1, X0 1041 MOVQ X0, (DX) 1042 RET 1043 1044 aes129plus: 1045 // start with last (possibly overlapping) block 1046 MOVOU -128(AX)(CX*1), X0 1047 MOVOU -112(AX)(CX*1), X1 1048 MOVOU -96(AX)(CX*1), X2 1049 MOVOU -80(AX)(CX*1), X3 1050 MOVOU -64(AX)(CX*1), X4 1051 MOVOU -48(AX)(CX*1), X5 1052 MOVOU -32(AX)(CX*1), X8 1053 MOVOU -16(AX)(CX*1), X9 1054 1055 // scramble state once 1056 AESENC X6, X0 1057 AESENC runtime·aeskeysched+16(SB), X1 1058 AESENC runtime·aeskeysched+32(SB), X2 1059 AESENC runtime·aeskeysched+48(SB), X3 1060 AESENC runtime·aeskeysched+64(SB), X4 1061 AESENC runtime·aeskeysched+80(SB), X5 1062 AESENC runtime·aeskeysched+96(SB), X8 1063 AESENC runtime·aeskeysched+112(SB), X9 1064 1065 // compute number of remaining 128-byte blocks 1066 DECQ CX 1067 SHRQ $7, CX 1068 1069 aesloop: 1070 // scramble state, xor in a block 1071 MOVOU (AX), X10 1072 MOVOU 16(AX), X11 1073 MOVOU 32(AX), X12 1074 MOVOU 48(AX), X13 1075 AESENC X10, X0 1076 AESENC X11, X1 1077 AESENC X12, X2 1078 AESENC X13, X3 1079 MOVOU 64(AX), X10 1080 MOVOU 80(AX), X11 1081 MOVOU 96(AX), X12 1082 MOVOU 112(AX), X13 1083 AESENC X10, X4 1084 AESENC X11, X5 1085 AESENC X12, X8 1086 AESENC X13, X9 1087 1088 // scramble state 1089 AESENC X7, X0 1090 AESENC X7, X1 1091 AESENC X7, X2 1092 AESENC X7, X3 1093 AESENC X7, X4 1094 AESENC X7, X5 1095 AESENC X7, X8 1096 AESENC X7, X9 1097 1098 ADDQ $128, AX 1099 DECQ CX 1100 JNE aesloop 1101 1102 // 2 more scrambles to finish 1103 AESENC X7, X0 1104 AESENC X7, X1 1105 AESENC X7, X2 1106 AESENC X7, X3 1107 AESENC X7, X4 1108 AESENC X7, X5 1109 AESENC X7, X8 1110 AESENC X7, X9 1111 AESENC X7, X0 1112 AESENC X7, X1 1113 AESENC X7, X2 1114 AESENC X7, X3 1115 AESENC X7, X4 1116 AESENC X7, X5 1117 AESENC X7, X8 1118 AESENC X7, X9 1119 1120 PXOR X4, X0 1121 PXOR X5, X1 1122 PXOR X8, X2 1123 PXOR X9, X3 1124 PXOR X2, X0 1125 PXOR X3, X1 1126 PXOR X1, X0 1127 MOVQ X0, (DX) 1128 RET 1129 1130 TEXT runtime·aeshash32(SB),NOSPLIT,$0-24 1131 MOVQ p+0(FP), AX // ptr to data 1132 MOVQ h+8(FP), X0 // seed 1133 PINSRD $2, (AX), X0 // data 1134 AESENC runtime·aeskeysched+0(SB), X0 1135 AESENC runtime·aeskeysched+16(SB), X0 1136 AESENC runtime·aeskeysched+32(SB), X0 1137 MOVQ X0, ret+16(FP) 1138 RET 1139 1140 TEXT runtime·aeshash64(SB),NOSPLIT,$0-24 1141 MOVQ p+0(FP), AX // ptr to data 1142 MOVQ h+8(FP), X0 // seed 1143 PINSRQ $1, (AX), X0 // data 1144 AESENC runtime·aeskeysched+0(SB), X0 1145 AESENC runtime·aeskeysched+16(SB), X0 1146 AESENC runtime·aeskeysched+32(SB), X0 1147 MOVQ X0, ret+16(FP) 1148 RET 1149 1150 // simple mask to get rid of data in the high part of the register. 1151 DATA masks<>+0x00(SB)/8, $0x0000000000000000 1152 DATA masks<>+0x08(SB)/8, $0x0000000000000000 1153 DATA masks<>+0x10(SB)/8, $0x00000000000000ff 1154 DATA masks<>+0x18(SB)/8, $0x0000000000000000 1155 DATA masks<>+0x20(SB)/8, $0x000000000000ffff 1156 DATA masks<>+0x28(SB)/8, $0x0000000000000000 1157 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 1158 DATA masks<>+0x38(SB)/8, $0x0000000000000000 1159 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 1160 DATA masks<>+0x48(SB)/8, $0x0000000000000000 1161 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 1162 DATA masks<>+0x58(SB)/8, $0x0000000000000000 1163 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 1164 DATA masks<>+0x68(SB)/8, $0x0000000000000000 1165 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 1166 DATA masks<>+0x78(SB)/8, $0x0000000000000000 1167 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 1168 DATA masks<>+0x88(SB)/8, $0x0000000000000000 1169 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 1170 DATA masks<>+0x98(SB)/8, $0x00000000000000ff 1171 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 1172 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 1173 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 1174 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 1175 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 1176 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 1177 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 1178 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 1179 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 1180 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 1181 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 1182 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 1183 GLOBL masks<>(SB),RODATA,$256 1184 1185 // these are arguments to pshufb. They move data down from 1186 // the high bytes of the register to the low bytes of the register. 1187 // index is how many bytes to move. 1188 DATA shifts<>+0x00(SB)/8, $0x0000000000000000 1189 DATA shifts<>+0x08(SB)/8, $0x0000000000000000 1190 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 1191 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 1192 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 1193 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 1194 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 1195 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 1196 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 1197 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 1198 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 1199 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 1200 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 1201 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 1202 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 1203 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 1204 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 1205 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 1206 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 1207 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 1208 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 1209 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 1210 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 1211 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 1212 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 1213 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 1214 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 1215 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 1216 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 1217 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 1218 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 1219 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 1220 GLOBL shifts<>(SB),RODATA,$256 1221 1222 TEXT runtime·memeq(SB),NOSPLIT,$0-25 1223 MOVQ a+0(FP), SI 1224 MOVQ b+8(FP), DI 1225 MOVQ size+16(FP), BX 1226 CALL runtime·memeqbody(SB) 1227 MOVB AX, ret+24(FP) 1228 RET 1229 1230 // memequal_varlen(a, b unsafe.Pointer) bool 1231 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17 1232 MOVQ a+0(FP), SI 1233 MOVQ b+8(FP), DI 1234 CMPQ SI, DI 1235 JEQ eq 1236 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure 1237 CALL runtime·memeqbody(SB) 1238 MOVB AX, ret+16(FP) 1239 RET 1240 eq: 1241 MOVB $1, ret+16(FP) 1242 RET 1243 1244 // eqstring tests whether two strings are equal. 1245 // See runtime_test.go:eqstring_generic for 1246 // equivalent Go code. 1247 TEXT runtime·eqstring(SB),NOSPLIT,$0-33 1248 MOVQ s1len+8(FP), AX 1249 MOVQ s2len+24(FP), BX 1250 CMPQ AX, BX 1251 JNE noteq 1252 MOVQ s1str+0(FP), SI 1253 MOVQ s2str+16(FP), DI 1254 CMPQ SI, DI 1255 JEQ eq 1256 CALL runtime·memeqbody(SB) 1257 MOVB AX, v+32(FP) 1258 RET 1259 eq: 1260 MOVB $1, v+32(FP) 1261 RET 1262 noteq: 1263 MOVB $0, v+32(FP) 1264 RET 1265 1266 // a in SI 1267 // b in DI 1268 // count in BX 1269 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1270 XORQ AX, AX 1271 1272 CMPQ BX, $8 1273 JB small 1274 1275 // 64 bytes at a time using xmm registers 1276 hugeloop: 1277 CMPQ BX, $64 1278 JB bigloop 1279 MOVOU (SI), X0 1280 MOVOU (DI), X1 1281 MOVOU 16(SI), X2 1282 MOVOU 16(DI), X3 1283 MOVOU 32(SI), X4 1284 MOVOU 32(DI), X5 1285 MOVOU 48(SI), X6 1286 MOVOU 48(DI), X7 1287 PCMPEQB X1, X0 1288 PCMPEQB X3, X2 1289 PCMPEQB X5, X4 1290 PCMPEQB X7, X6 1291 PAND X2, X0 1292 PAND X6, X4 1293 PAND X4, X0 1294 PMOVMSKB X0, DX 1295 ADDQ $64, SI 1296 ADDQ $64, DI 1297 SUBQ $64, BX 1298 CMPL DX, $0xffff 1299 JEQ hugeloop 1300 RET 1301 1302 // 8 bytes at a time using 64-bit register 1303 bigloop: 1304 CMPQ BX, $8 1305 JBE leftover 1306 MOVQ (SI), CX 1307 MOVQ (DI), DX 1308 ADDQ $8, SI 1309 ADDQ $8, DI 1310 SUBQ $8, BX 1311 CMPQ CX, DX 1312 JEQ bigloop 1313 RET 1314 1315 // remaining 0-8 bytes 1316 leftover: 1317 MOVQ -8(SI)(BX*1), CX 1318 MOVQ -8(DI)(BX*1), DX 1319 CMPQ CX, DX 1320 SETEQ AX 1321 RET 1322 1323 small: 1324 CMPQ BX, $0 1325 JEQ equal 1326 1327 LEAQ 0(BX*8), CX 1328 NEGQ CX 1329 1330 CMPB SI, $0xf8 1331 JA si_high 1332 1333 // load at SI won't cross a page boundary. 1334 MOVQ (SI), SI 1335 JMP si_finish 1336 si_high: 1337 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 1338 MOVQ -8(SI)(BX*1), SI 1339 SHRQ CX, SI 1340 si_finish: 1341 1342 // same for DI. 1343 CMPB DI, $0xf8 1344 JA di_high 1345 MOVQ (DI), DI 1346 JMP di_finish 1347 di_high: 1348 MOVQ -8(DI)(BX*1), DI 1349 SHRQ CX, DI 1350 di_finish: 1351 1352 SUBQ SI, DI 1353 SHLQ CX, DI 1354 equal: 1355 SETEQ AX 1356 RET 1357 1358 TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 1359 MOVQ s1_base+0(FP), SI 1360 MOVQ s1_len+8(FP), BX 1361 MOVQ s2_base+16(FP), DI 1362 MOVQ s2_len+24(FP), DX 1363 CALL runtime·cmpbody(SB) 1364 MOVQ AX, ret+32(FP) 1365 RET 1366 1367 TEXT bytes·Compare(SB),NOSPLIT,$0-56 1368 MOVQ s1+0(FP), SI 1369 MOVQ s1+8(FP), BX 1370 MOVQ s2+24(FP), DI 1371 MOVQ s2+32(FP), DX 1372 CALL runtime·cmpbody(SB) 1373 MOVQ AX, res+48(FP) 1374 RET 1375 1376 // input: 1377 // SI = a 1378 // DI = b 1379 // BX = alen 1380 // DX = blen 1381 // output: 1382 // AX = 1/0/-1 1383 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1384 CMPQ SI, DI 1385 JEQ allsame 1386 CMPQ BX, DX 1387 MOVQ DX, BP 1388 CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare 1389 CMPQ BP, $8 1390 JB small 1391 1392 loop: 1393 CMPQ BP, $16 1394 JBE _0through16 1395 MOVOU (SI), X0 1396 MOVOU (DI), X1 1397 PCMPEQB X0, X1 1398 PMOVMSKB X1, AX 1399 XORQ $0xffff, AX // convert EQ to NE 1400 JNE diff16 // branch if at least one byte is not equal 1401 ADDQ $16, SI 1402 ADDQ $16, DI 1403 SUBQ $16, BP 1404 JMP loop 1405 1406 // AX = bit mask of differences 1407 diff16: 1408 BSFQ AX, BX // index of first byte that differs 1409 XORQ AX, AX 1410 MOVB (SI)(BX*1), CX 1411 CMPB CX, (DI)(BX*1) 1412 SETHI AX 1413 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 1414 RET 1415 1416 // 0 through 16 bytes left, alen>=8, blen>=8 1417 _0through16: 1418 CMPQ BP, $8 1419 JBE _0through8 1420 MOVQ (SI), AX 1421 MOVQ (DI), CX 1422 CMPQ AX, CX 1423 JNE diff8 1424 _0through8: 1425 MOVQ -8(SI)(BP*1), AX 1426 MOVQ -8(DI)(BP*1), CX 1427 CMPQ AX, CX 1428 JEQ allsame 1429 1430 // AX and CX contain parts of a and b that differ. 1431 diff8: 1432 BSWAPQ AX // reverse order of bytes 1433 BSWAPQ CX 1434 XORQ AX, CX 1435 BSRQ CX, CX // index of highest bit difference 1436 SHRQ CX, AX // move a's bit to bottom 1437 ANDQ $1, AX // mask bit 1438 LEAQ -1(AX*2), AX // 1/0 => +1/-1 1439 RET 1440 1441 // 0-7 bytes in common 1442 small: 1443 LEAQ (BP*8), CX // bytes left -> bits left 1444 NEGQ CX // - bits lift (== 64 - bits left mod 64) 1445 JEQ allsame 1446 1447 // load bytes of a into high bytes of AX 1448 CMPB SI, $0xf8 1449 JA si_high 1450 MOVQ (SI), SI 1451 JMP si_finish 1452 si_high: 1453 MOVQ -8(SI)(BP*1), SI 1454 SHRQ CX, SI 1455 si_finish: 1456 SHLQ CX, SI 1457 1458 // load bytes of b in to high bytes of BX 1459 CMPB DI, $0xf8 1460 JA di_high 1461 MOVQ (DI), DI 1462 JMP di_finish 1463 di_high: 1464 MOVQ -8(DI)(BP*1), DI 1465 SHRQ CX, DI 1466 di_finish: 1467 SHLQ CX, DI 1468 1469 BSWAPQ SI // reverse order of bytes 1470 BSWAPQ DI 1471 XORQ SI, DI // find bit differences 1472 JEQ allsame 1473 BSRQ DI, CX // index of highest bit difference 1474 SHRQ CX, SI // move a's bit to bottom 1475 ANDQ $1, SI // mask bit 1476 LEAQ -1(SI*2), AX // 1/0 => +1/-1 1477 RET 1478 1479 allsame: 1480 XORQ AX, AX 1481 XORQ CX, CX 1482 CMPQ BX, DX 1483 SETGT AX // 1 if alen > blen 1484 SETEQ CX // 1 if alen == blen 1485 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 1486 RET 1487 1488 TEXT bytes·IndexByte(SB),NOSPLIT,$0 1489 MOVQ s+0(FP), SI 1490 MOVQ s_len+8(FP), BX 1491 MOVB c+24(FP), AL 1492 CALL runtime·indexbytebody(SB) 1493 MOVQ AX, ret+32(FP) 1494 RET 1495 1496 TEXT strings·IndexByte(SB),NOSPLIT,$0 1497 MOVQ s+0(FP), SI 1498 MOVQ s_len+8(FP), BX 1499 MOVB c+16(FP), AL 1500 CALL runtime·indexbytebody(SB) 1501 MOVQ AX, ret+24(FP) 1502 RET 1503 1504 // input: 1505 // SI: data 1506 // BX: data len 1507 // AL: byte sought 1508 // output: 1509 // AX 1510 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 1511 MOVQ SI, DI 1512 1513 CMPQ BX, $16 1514 JLT small 1515 1516 // round up to first 16-byte boundary 1517 TESTQ $15, SI 1518 JZ aligned 1519 MOVQ SI, CX 1520 ANDQ $~15, CX 1521 ADDQ $16, CX 1522 1523 // search the beginning 1524 SUBQ SI, CX 1525 REPN; SCASB 1526 JZ success 1527 1528 // DI is 16-byte aligned; get ready to search using SSE instructions 1529 aligned: 1530 // round down to last 16-byte boundary 1531 MOVQ BX, R11 1532 ADDQ SI, R11 1533 ANDQ $~15, R11 1534 1535 // shuffle X0 around so that each byte contains c 1536 MOVD AX, X0 1537 PUNPCKLBW X0, X0 1538 PUNPCKLBW X0, X0 1539 PSHUFL $0, X0, X0 1540 JMP condition 1541 1542 sse: 1543 // move the next 16-byte chunk of the buffer into X1 1544 MOVO (DI), X1 1545 // compare bytes in X0 to X1 1546 PCMPEQB X0, X1 1547 // take the top bit of each byte in X1 and put the result in DX 1548 PMOVMSKB X1, DX 1549 TESTL DX, DX 1550 JNZ ssesuccess 1551 ADDQ $16, DI 1552 1553 condition: 1554 CMPQ DI, R11 1555 JLT sse 1556 1557 // search the end 1558 MOVQ SI, CX 1559 ADDQ BX, CX 1560 SUBQ R11, CX 1561 // if CX == 0, the zero flag will be set and we'll end up 1562 // returning a false success 1563 JZ failure 1564 REPN; SCASB 1565 JZ success 1566 1567 failure: 1568 MOVQ $-1, AX 1569 RET 1570 1571 // handle for lengths < 16 1572 small: 1573 MOVQ BX, CX 1574 REPN; SCASB 1575 JZ success 1576 MOVQ $-1, AX 1577 RET 1578 1579 // we've found the chunk containing the byte 1580 // now just figure out which specific byte it is 1581 ssesuccess: 1582 // get the index of the least significant set bit 1583 BSFW DX, DX 1584 SUBQ SI, DI 1585 ADDQ DI, DX 1586 MOVQ DX, AX 1587 RET 1588 1589 success: 1590 SUBQ SI, DI 1591 SUBL $1, DI 1592 MOVQ DI, AX 1593 RET 1594 1595 TEXT bytes·Equal(SB),NOSPLIT,$0-49 1596 MOVQ a_len+8(FP), BX 1597 MOVQ b_len+32(FP), CX 1598 XORQ AX, AX 1599 CMPQ BX, CX 1600 JNE eqret 1601 MOVQ a+0(FP), SI 1602 MOVQ b+24(FP), DI 1603 CALL runtime·memeqbody(SB) 1604 eqret: 1605 MOVB AX, ret+48(FP) 1606 RET 1607 1608 // A Duff's device for zeroing memory. 1609 // The compiler jumps to computed addresses within 1610 // this routine to zero chunks of memory. Do not 1611 // change this code without also changing the code 1612 // in ../../cmd/6g/ggen.c:clearfat. 1613 // AX: zero 1614 // DI: ptr to memory to be zeroed 1615 // DI is updated as a side effect. 1616 TEXT runtime·duffzero(SB), NOSPLIT, $0-0 1617 STOSQ 1618 STOSQ 1619 STOSQ 1620 STOSQ 1621 STOSQ 1622 STOSQ 1623 STOSQ 1624 STOSQ 1625 STOSQ 1626 STOSQ 1627 STOSQ 1628 STOSQ 1629 STOSQ 1630 STOSQ 1631 STOSQ 1632 STOSQ 1633 STOSQ 1634 STOSQ 1635 STOSQ 1636 STOSQ 1637 STOSQ 1638 STOSQ 1639 STOSQ 1640 STOSQ 1641 STOSQ 1642 STOSQ 1643 STOSQ 1644 STOSQ 1645 STOSQ 1646 STOSQ 1647 STOSQ 1648 STOSQ 1649 STOSQ 1650 STOSQ 1651 STOSQ 1652 STOSQ 1653 STOSQ 1654 STOSQ 1655 STOSQ 1656 STOSQ 1657 STOSQ 1658 STOSQ 1659 STOSQ 1660 STOSQ 1661 STOSQ 1662 STOSQ 1663 STOSQ 1664 STOSQ 1665 STOSQ 1666 STOSQ 1667 STOSQ 1668 STOSQ 1669 STOSQ 1670 STOSQ 1671 STOSQ 1672 STOSQ 1673 STOSQ 1674 STOSQ 1675 STOSQ 1676 STOSQ 1677 STOSQ 1678 STOSQ 1679 STOSQ 1680 STOSQ 1681 STOSQ 1682 STOSQ 1683 STOSQ 1684 STOSQ 1685 STOSQ 1686 STOSQ 1687 STOSQ 1688 STOSQ 1689 STOSQ 1690 STOSQ 1691 STOSQ 1692 STOSQ 1693 STOSQ 1694 STOSQ 1695 STOSQ 1696 STOSQ 1697 STOSQ 1698 STOSQ 1699 STOSQ 1700 STOSQ 1701 STOSQ 1702 STOSQ 1703 STOSQ 1704 STOSQ 1705 STOSQ 1706 STOSQ 1707 STOSQ 1708 STOSQ 1709 STOSQ 1710 STOSQ 1711 STOSQ 1712 STOSQ 1713 STOSQ 1714 STOSQ 1715 STOSQ 1716 STOSQ 1717 STOSQ 1718 STOSQ 1719 STOSQ 1720 STOSQ 1721 STOSQ 1722 STOSQ 1723 STOSQ 1724 STOSQ 1725 STOSQ 1726 STOSQ 1727 STOSQ 1728 STOSQ 1729 STOSQ 1730 STOSQ 1731 STOSQ 1732 STOSQ 1733 STOSQ 1734 STOSQ 1735 STOSQ 1736 STOSQ 1737 STOSQ 1738 STOSQ 1739 STOSQ 1740 STOSQ 1741 STOSQ 1742 STOSQ 1743 STOSQ 1744 STOSQ 1745 RET 1746 1747 // A Duff's device for copying memory. 1748 // The compiler jumps to computed addresses within 1749 // this routine to copy chunks of memory. Source 1750 // and destination must not overlap. Do not 1751 // change this code without also changing the code 1752 // in ../../cmd/6g/cgen.c:sgen. 1753 // SI: ptr to source memory 1754 // DI: ptr to destination memory 1755 // SI and DI are updated as a side effect. 1756 1757 // NOTE: this is equivalent to a sequence of MOVSQ but 1758 // for some reason that is 3.5x slower than this code. 1759 // The STOSQ above seem fine, though. 1760 TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 1761 MOVQ (SI),CX 1762 ADDQ $8,SI 1763 MOVQ CX,(DI) 1764 ADDQ $8,DI 1765 1766 MOVQ (SI),CX 1767 ADDQ $8,SI 1768 MOVQ CX,(DI) 1769 ADDQ $8,DI 1770 1771 MOVQ (SI),CX 1772 ADDQ $8,SI 1773 MOVQ CX,(DI) 1774 ADDQ $8,DI 1775 1776 MOVQ (SI),CX 1777 ADDQ $8,SI 1778 MOVQ CX,(DI) 1779 ADDQ $8,DI 1780 1781 MOVQ (SI),CX 1782 ADDQ $8,SI 1783 MOVQ CX,(DI) 1784 ADDQ $8,DI 1785 1786 MOVQ (SI),CX 1787 ADDQ $8,SI 1788 MOVQ CX,(DI) 1789 ADDQ $8,DI 1790 1791 MOVQ (SI),CX 1792 ADDQ $8,SI 1793 MOVQ CX,(DI) 1794 ADDQ $8,DI 1795 1796 MOVQ (SI),CX 1797 ADDQ $8,SI 1798 MOVQ CX,(DI) 1799 ADDQ $8,DI 1800 1801 MOVQ (SI),CX 1802 ADDQ $8,SI 1803 MOVQ CX,(DI) 1804 ADDQ $8,DI 1805 1806 MOVQ (SI),CX 1807 ADDQ $8,SI 1808 MOVQ CX,(DI) 1809 ADDQ $8,DI 1810 1811 MOVQ (SI),CX 1812 ADDQ $8,SI 1813 MOVQ CX,(DI) 1814 ADDQ $8,DI 1815 1816 MOVQ (SI),CX 1817 ADDQ $8,SI 1818 MOVQ CX,(DI) 1819 ADDQ $8,DI 1820 1821 MOVQ (SI),CX 1822 ADDQ $8,SI 1823 MOVQ CX,(DI) 1824 ADDQ $8,DI 1825 1826 MOVQ (SI),CX 1827 ADDQ $8,SI 1828 MOVQ CX,(DI) 1829 ADDQ $8,DI 1830 1831 MOVQ (SI),CX 1832 ADDQ $8,SI 1833 MOVQ CX,(DI) 1834 ADDQ $8,DI 1835 1836 MOVQ (SI),CX 1837 ADDQ $8,SI 1838 MOVQ CX,(DI) 1839 ADDQ $8,DI 1840 1841 MOVQ (SI),CX 1842 ADDQ $8,SI 1843 MOVQ CX,(DI) 1844 ADDQ $8,DI 1845 1846 MOVQ (SI),CX 1847 ADDQ $8,SI 1848 MOVQ CX,(DI) 1849 ADDQ $8,DI 1850 1851 MOVQ (SI),CX 1852 ADDQ $8,SI 1853 MOVQ CX,(DI) 1854 ADDQ $8,DI 1855 1856 MOVQ (SI),CX 1857 ADDQ $8,SI 1858 MOVQ CX,(DI) 1859 ADDQ $8,DI 1860 1861 MOVQ (SI),CX 1862 ADDQ $8,SI 1863 MOVQ CX,(DI) 1864 ADDQ $8,DI 1865 1866 MOVQ (SI),CX 1867 ADDQ $8,SI 1868 MOVQ CX,(DI) 1869 ADDQ $8,DI 1870 1871 MOVQ (SI),CX 1872 ADDQ $8,SI 1873 MOVQ CX,(DI) 1874 ADDQ $8,DI 1875 1876 MOVQ (SI),CX 1877 ADDQ $8,SI 1878 MOVQ CX,(DI) 1879 ADDQ $8,DI 1880 1881 MOVQ (SI),CX 1882 ADDQ $8,SI 1883 MOVQ CX,(DI) 1884 ADDQ $8,DI 1885 1886 MOVQ (SI),CX 1887 ADDQ $8,SI 1888 MOVQ CX,(DI) 1889 ADDQ $8,DI 1890 1891 MOVQ (SI),CX 1892 ADDQ $8,SI 1893 MOVQ CX,(DI) 1894 ADDQ $8,DI 1895 1896 MOVQ (SI),CX 1897 ADDQ $8,SI 1898 MOVQ CX,(DI) 1899 ADDQ $8,DI 1900 1901 MOVQ (SI),CX 1902 ADDQ $8,SI 1903 MOVQ CX,(DI) 1904 ADDQ $8,DI 1905 1906 MOVQ (SI),CX 1907 ADDQ $8,SI 1908 MOVQ CX,(DI) 1909 ADDQ $8,DI 1910 1911 MOVQ (SI),CX 1912 ADDQ $8,SI 1913 MOVQ CX,(DI) 1914 ADDQ $8,DI 1915 1916 MOVQ (SI),CX 1917 ADDQ $8,SI 1918 MOVQ CX,(DI) 1919 ADDQ $8,DI 1920 1921 MOVQ (SI),CX 1922 ADDQ $8,SI 1923 MOVQ CX,(DI) 1924 ADDQ $8,DI 1925 1926 MOVQ (SI),CX 1927 ADDQ $8,SI 1928 MOVQ CX,(DI) 1929 ADDQ $8,DI 1930 1931 MOVQ (SI),CX 1932 ADDQ $8,SI 1933 MOVQ CX,(DI) 1934 ADDQ $8,DI 1935 1936 MOVQ (SI),CX 1937 ADDQ $8,SI 1938 MOVQ CX,(DI) 1939 ADDQ $8,DI 1940 1941 MOVQ (SI),CX 1942 ADDQ $8,SI 1943 MOVQ CX,(DI) 1944 ADDQ $8,DI 1945 1946 MOVQ (SI),CX 1947 ADDQ $8,SI 1948 MOVQ CX,(DI) 1949 ADDQ $8,DI 1950 1951 MOVQ (SI),CX 1952 ADDQ $8,SI 1953 MOVQ CX,(DI) 1954 ADDQ $8,DI 1955 1956 MOVQ (SI),CX 1957 ADDQ $8,SI 1958 MOVQ CX,(DI) 1959 ADDQ $8,DI 1960 1961 MOVQ (SI),CX 1962 ADDQ $8,SI 1963 MOVQ CX,(DI) 1964 ADDQ $8,DI 1965 1966 MOVQ (SI),CX 1967 ADDQ $8,SI 1968 MOVQ CX,(DI) 1969 ADDQ $8,DI 1970 1971 MOVQ (SI),CX 1972 ADDQ $8,SI 1973 MOVQ CX,(DI) 1974 ADDQ $8,DI 1975 1976 MOVQ (SI),CX 1977 ADDQ $8,SI 1978 MOVQ CX,(DI) 1979 ADDQ $8,DI 1980 1981 MOVQ (SI),CX 1982 ADDQ $8,SI 1983 MOVQ CX,(DI) 1984 ADDQ $8,DI 1985 1986 MOVQ (SI),CX 1987 ADDQ $8,SI 1988 MOVQ CX,(DI) 1989 ADDQ $8,DI 1990 1991 MOVQ (SI),CX 1992 ADDQ $8,SI 1993 MOVQ CX,(DI) 1994 ADDQ $8,DI 1995 1996 MOVQ (SI),CX 1997 ADDQ $8,SI 1998 MOVQ CX,(DI) 1999 ADDQ $8,DI 2000 2001 MOVQ (SI),CX 2002 ADDQ $8,SI 2003 MOVQ CX,(DI) 2004 ADDQ $8,DI 2005 2006 MOVQ (SI),CX 2007 ADDQ $8,SI 2008 MOVQ CX,(DI) 2009 ADDQ $8,DI 2010 2011 MOVQ (SI),CX 2012 ADDQ $8,SI 2013 MOVQ CX,(DI) 2014 ADDQ $8,DI 2015 2016 MOVQ (SI),CX 2017 ADDQ $8,SI 2018 MOVQ CX,(DI) 2019 ADDQ $8,DI 2020 2021 MOVQ (SI),CX 2022 ADDQ $8,SI 2023 MOVQ CX,(DI) 2024 ADDQ $8,DI 2025 2026 MOVQ (SI),CX 2027 ADDQ $8,SI 2028 MOVQ CX,(DI) 2029 ADDQ $8,DI 2030 2031 MOVQ (SI),CX 2032 ADDQ $8,SI 2033 MOVQ CX,(DI) 2034 ADDQ $8,DI 2035 2036 MOVQ (SI),CX 2037 ADDQ $8,SI 2038 MOVQ CX,(DI) 2039 ADDQ $8,DI 2040 2041 MOVQ (SI),CX 2042 ADDQ $8,SI 2043 MOVQ CX,(DI) 2044 ADDQ $8,DI 2045 2046 MOVQ (SI),CX 2047 ADDQ $8,SI 2048 MOVQ CX,(DI) 2049 ADDQ $8,DI 2050 2051 MOVQ (SI),CX 2052 ADDQ $8,SI 2053 MOVQ CX,(DI) 2054 ADDQ $8,DI 2055 2056 MOVQ (SI),CX 2057 ADDQ $8,SI 2058 MOVQ CX,(DI) 2059 ADDQ $8,DI 2060 2061 MOVQ (SI),CX 2062 ADDQ $8,SI 2063 MOVQ CX,(DI) 2064 ADDQ $8,DI 2065 2066 MOVQ (SI),CX 2067 ADDQ $8,SI 2068 MOVQ CX,(DI) 2069 ADDQ $8,DI 2070 2071 MOVQ (SI),CX 2072 ADDQ $8,SI 2073 MOVQ CX,(DI) 2074 ADDQ $8,DI 2075 2076 MOVQ (SI),CX 2077 ADDQ $8,SI 2078 MOVQ CX,(DI) 2079 ADDQ $8,DI 2080 2081 MOVQ (SI),CX 2082 ADDQ $8,SI 2083 MOVQ CX,(DI) 2084 ADDQ $8,DI 2085 2086 MOVQ (SI),CX 2087 ADDQ $8,SI 2088 MOVQ CX,(DI) 2089 ADDQ $8,DI 2090 2091 MOVQ (SI),CX 2092 ADDQ $8,SI 2093 MOVQ CX,(DI) 2094 ADDQ $8,DI 2095 2096 MOVQ (SI),CX 2097 ADDQ $8,SI 2098 MOVQ CX,(DI) 2099 ADDQ $8,DI 2100 2101 MOVQ (SI),CX 2102 ADDQ $8,SI 2103 MOVQ CX,(DI) 2104 ADDQ $8,DI 2105 2106 MOVQ (SI),CX 2107 ADDQ $8,SI 2108 MOVQ CX,(DI) 2109 ADDQ $8,DI 2110 2111 MOVQ (SI),CX 2112 ADDQ $8,SI 2113 MOVQ CX,(DI) 2114 ADDQ $8,DI 2115 2116 MOVQ (SI),CX 2117 ADDQ $8,SI 2118 MOVQ CX,(DI) 2119 ADDQ $8,DI 2120 2121 MOVQ (SI),CX 2122 ADDQ $8,SI 2123 MOVQ CX,(DI) 2124 ADDQ $8,DI 2125 2126 MOVQ (SI),CX 2127 ADDQ $8,SI 2128 MOVQ CX,(DI) 2129 ADDQ $8,DI 2130 2131 MOVQ (SI),CX 2132 ADDQ $8,SI 2133 MOVQ CX,(DI) 2134 ADDQ $8,DI 2135 2136 MOVQ (SI),CX 2137 ADDQ $8,SI 2138 MOVQ CX,(DI) 2139 ADDQ $8,DI 2140 2141 MOVQ (SI),CX 2142 ADDQ $8,SI 2143 MOVQ CX,(DI) 2144 ADDQ $8,DI 2145 2146 MOVQ (SI),CX 2147 ADDQ $8,SI 2148 MOVQ CX,(DI) 2149 ADDQ $8,DI 2150 2151 MOVQ (SI),CX 2152 ADDQ $8,SI 2153 MOVQ CX,(DI) 2154 ADDQ $8,DI 2155 2156 MOVQ (SI),CX 2157 ADDQ $8,SI 2158 MOVQ CX,(DI) 2159 ADDQ $8,DI 2160 2161 MOVQ (SI),CX 2162 ADDQ $8,SI 2163 MOVQ CX,(DI) 2164 ADDQ $8,DI 2165 2166 MOVQ (SI),CX 2167 ADDQ $8,SI 2168 MOVQ CX,(DI) 2169 ADDQ $8,DI 2170 2171 MOVQ (SI),CX 2172 ADDQ $8,SI 2173 MOVQ CX,(DI) 2174 ADDQ $8,DI 2175 2176 MOVQ (SI),CX 2177 ADDQ $8,SI 2178 MOVQ CX,(DI) 2179 ADDQ $8,DI 2180 2181 MOVQ (SI),CX 2182 ADDQ $8,SI 2183 MOVQ CX,(DI) 2184 ADDQ $8,DI 2185 2186 MOVQ (SI),CX 2187 ADDQ $8,SI 2188 MOVQ CX,(DI) 2189 ADDQ $8,DI 2190 2191 MOVQ (SI),CX 2192 ADDQ $8,SI 2193 MOVQ CX,(DI) 2194 ADDQ $8,DI 2195 2196 MOVQ (SI),CX 2197 ADDQ $8,SI 2198 MOVQ CX,(DI) 2199 ADDQ $8,DI 2200 2201 MOVQ (SI),CX 2202 ADDQ $8,SI 2203 MOVQ CX,(DI) 2204 ADDQ $8,DI 2205 2206 MOVQ (SI),CX 2207 ADDQ $8,SI 2208 MOVQ CX,(DI) 2209 ADDQ $8,DI 2210 2211 MOVQ (SI),CX 2212 ADDQ $8,SI 2213 MOVQ CX,(DI) 2214 ADDQ $8,DI 2215 2216 MOVQ (SI),CX 2217 ADDQ $8,SI 2218 MOVQ CX,(DI) 2219 ADDQ $8,DI 2220 2221 MOVQ (SI),CX 2222 ADDQ $8,SI 2223 MOVQ CX,(DI) 2224 ADDQ $8,DI 2225 2226 MOVQ (SI),CX 2227 ADDQ $8,SI 2228 MOVQ CX,(DI) 2229 ADDQ $8,DI 2230 2231 MOVQ (SI),CX 2232 ADDQ $8,SI 2233 MOVQ CX,(DI) 2234 ADDQ $8,DI 2235 2236 MOVQ (SI),CX 2237 ADDQ $8,SI 2238 MOVQ CX,(DI) 2239 ADDQ $8,DI 2240 2241 MOVQ (SI),CX 2242 ADDQ $8,SI 2243 MOVQ CX,(DI) 2244 ADDQ $8,DI 2245 2246 MOVQ (SI),CX 2247 ADDQ $8,SI 2248 MOVQ CX,(DI) 2249 ADDQ $8,DI 2250 2251 MOVQ (SI),CX 2252 ADDQ $8,SI 2253 MOVQ CX,(DI) 2254 ADDQ $8,DI 2255 2256 MOVQ (SI),CX 2257 ADDQ $8,SI 2258 MOVQ CX,(DI) 2259 ADDQ $8,DI 2260 2261 MOVQ (SI),CX 2262 ADDQ $8,SI 2263 MOVQ CX,(DI) 2264 ADDQ $8,DI 2265 2266 MOVQ (SI),CX 2267 ADDQ $8,SI 2268 MOVQ CX,(DI) 2269 ADDQ $8,DI 2270 2271 MOVQ (SI),CX 2272 ADDQ $8,SI 2273 MOVQ CX,(DI) 2274 ADDQ $8,DI 2275 2276 MOVQ (SI),CX 2277 ADDQ $8,SI 2278 MOVQ CX,(DI) 2279 ADDQ $8,DI 2280 2281 MOVQ (SI),CX 2282 ADDQ $8,SI 2283 MOVQ CX,(DI) 2284 ADDQ $8,DI 2285 2286 MOVQ (SI),CX 2287 ADDQ $8,SI 2288 MOVQ CX,(DI) 2289 ADDQ $8,DI 2290 2291 MOVQ (SI),CX 2292 ADDQ $8,SI 2293 MOVQ CX,(DI) 2294 ADDQ $8,DI 2295 2296 MOVQ (SI),CX 2297 ADDQ $8,SI 2298 MOVQ CX,(DI) 2299 ADDQ $8,DI 2300 2301 MOVQ (SI),CX 2302 ADDQ $8,SI 2303 MOVQ CX,(DI) 2304 ADDQ $8,DI 2305 2306 MOVQ (SI),CX 2307 ADDQ $8,SI 2308 MOVQ CX,(DI) 2309 ADDQ $8,DI 2310 2311 MOVQ (SI),CX 2312 ADDQ $8,SI 2313 MOVQ CX,(DI) 2314 ADDQ $8,DI 2315 2316 MOVQ (SI),CX 2317 ADDQ $8,SI 2318 MOVQ CX,(DI) 2319 ADDQ $8,DI 2320 2321 MOVQ (SI),CX 2322 ADDQ $8,SI 2323 MOVQ CX,(DI) 2324 ADDQ $8,DI 2325 2326 MOVQ (SI),CX 2327 ADDQ $8,SI 2328 MOVQ CX,(DI) 2329 ADDQ $8,DI 2330 2331 MOVQ (SI),CX 2332 ADDQ $8,SI 2333 MOVQ CX,(DI) 2334 ADDQ $8,DI 2335 2336 MOVQ (SI),CX 2337 ADDQ $8,SI 2338 MOVQ CX,(DI) 2339 ADDQ $8,DI 2340 2341 MOVQ (SI),CX 2342 ADDQ $8,SI 2343 MOVQ CX,(DI) 2344 ADDQ $8,DI 2345 2346 MOVQ (SI),CX 2347 ADDQ $8,SI 2348 MOVQ CX,(DI) 2349 ADDQ $8,DI 2350 2351 MOVQ (SI),CX 2352 ADDQ $8,SI 2353 MOVQ CX,(DI) 2354 ADDQ $8,DI 2355 2356 MOVQ (SI),CX 2357 ADDQ $8,SI 2358 MOVQ CX,(DI) 2359 ADDQ $8,DI 2360 2361 MOVQ (SI),CX 2362 ADDQ $8,SI 2363 MOVQ CX,(DI) 2364 ADDQ $8,DI 2365 2366 MOVQ (SI),CX 2367 ADDQ $8,SI 2368 MOVQ CX,(DI) 2369 ADDQ $8,DI 2370 2371 MOVQ (SI),CX 2372 ADDQ $8,SI 2373 MOVQ CX,(DI) 2374 ADDQ $8,DI 2375 2376 MOVQ (SI),CX 2377 ADDQ $8,SI 2378 MOVQ CX,(DI) 2379 ADDQ $8,DI 2380 2381 MOVQ (SI),CX 2382 ADDQ $8,SI 2383 MOVQ CX,(DI) 2384 ADDQ $8,DI 2385 2386 MOVQ (SI),CX 2387 ADDQ $8,SI 2388 MOVQ CX,(DI) 2389 ADDQ $8,DI 2390 2391 MOVQ (SI),CX 2392 ADDQ $8,SI 2393 MOVQ CX,(DI) 2394 ADDQ $8,DI 2395 2396 MOVQ (SI),CX 2397 ADDQ $8,SI 2398 MOVQ CX,(DI) 2399 ADDQ $8,DI 2400 2401 RET 2402 2403 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 2404 get_tls(CX) 2405 MOVQ g(CX), AX 2406 MOVQ g_m(AX), AX 2407 MOVL m_fastrand(AX), DX 2408 ADDL DX, DX 2409 MOVL DX, BX 2410 XORL $0x88888eef, DX 2411 CMOVLMI BX, DX 2412 MOVL DX, m_fastrand(AX) 2413 MOVL DX, ret+0(FP) 2414 RET 2415 2416 TEXT runtime·return0(SB), NOSPLIT, $0 2417 MOVL $0, AX 2418 RET 2419 2420 2421 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 2422 // Must obey the gcc calling convention. 2423 TEXT _cgo_topofstack(SB),NOSPLIT,$0 2424 get_tls(CX) 2425 MOVQ g(CX), AX 2426 MOVQ g_m(AX), AX 2427 MOVQ m_curg(AX), AX 2428 MOVQ (g_stack+stack_hi)(AX), AX 2429 RET 2430 2431 // The top-most function running on a goroutine 2432 // returns to goexit+PCQuantum. 2433 TEXT runtime·goexit(SB),NOSPLIT,$0-0 2434 BYTE $0x90 // NOP 2435 CALL runtime·goexit1(SB) // does not return 2436 2437 TEXT runtime·getg(SB),NOSPLIT,$0-8 2438 get_tls(CX) 2439 MOVQ g(CX), AX 2440 MOVQ AX, ret+0(FP) 2441 RET 2442 2443 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 2444 MOVQ addr+0(FP), AX 2445 PREFETCHT0 (AX) 2446 RET 2447 2448 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8 2449 MOVQ addr+0(FP), AX 2450 PREFETCHT1 (AX) 2451 RET 2452 2453 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8 2454 MOVQ addr+0(FP), AX 2455 PREFETCHT2 (AX) 2456 RET 2457 2458 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8 2459 MOVQ addr+0(FP), AX 2460 PREFETCHNTA (AX) 2461 RET