github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/asm_amd64.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "zasm_GOOS_GOARCH.h" 6 7 TEXT _rt0_amd64(SB),7,$-8 8 // copy arguments forward on an even stack 9 MOVQ DI, AX // argc 10 MOVQ SI, BX // argv 11 SUBQ $(4*8+7), SP // 2args 2auto 12 ANDQ $~15, SP 13 MOVQ AX, 16(SP) 14 MOVQ BX, 24(SP) 15 16 // create istack out of the given (operating system) stack. 17 // _cgo_init may update stackguard. 18 MOVQ $runtime·g0(SB), DI 19 LEAQ (-64*1024+104)(SP), BX 20 MOVQ BX, g_stackguard(DI) 21 MOVQ SP, g_stackbase(DI) 22 23 // find out information about the processor we're on 24 MOVQ $0, AX 25 CPUID 26 CMPQ AX, $0 27 JE nocpuinfo 28 MOVQ $1, AX 29 CPUID 30 MOVL CX, runtime·cpuid_ecx(SB) 31 MOVL DX, runtime·cpuid_edx(SB) 32 nocpuinfo: 33 34 // if there is an _cgo_init, call it. 35 MOVQ _cgo_init(SB), AX 36 TESTQ AX, AX 37 JZ needtls 38 // g0 already in DI 39 MOVQ DI, CX // Win64 uses CX for first parameter 40 MOVQ $setmg_gcc<>(SB), SI 41 CALL AX 42 CMPL runtime·iswindows(SB), $0 43 JEQ ok 44 45 needtls: 46 // skip TLS setup on Plan 9 47 CMPL runtime·isplan9(SB), $1 48 JEQ ok 49 50 LEAQ runtime·tls0(SB), DI 51 CALL runtime·settls(SB) 52 53 // store through it, to make sure it works 54 get_tls(BX) 55 MOVQ $0x123, g(BX) 56 MOVQ runtime·tls0(SB), AX 57 CMPQ AX, $0x123 58 JEQ 2(PC) 59 MOVL AX, 0 // abort 60 ok: 61 // set the per-goroutine and per-mach "registers" 62 get_tls(BX) 63 LEAQ runtime·g0(SB), CX 64 MOVQ CX, g(BX) 65 LEAQ runtime·m0(SB), AX 66 MOVQ AX, m(BX) 67 68 // save m->g0 = g0 69 MOVQ CX, m_g0(AX) 70 71 CLD // convention is D is always left cleared 72 CALL runtime·check(SB) 73 74 MOVL 16(SP), AX // copy argc 75 MOVL AX, 0(SP) 76 MOVQ 24(SP), AX // copy argv 77 MOVQ AX, 8(SP) 78 CALL runtime·args(SB) 79 CALL runtime·osinit(SB) 80 CALL runtime·hashinit(SB) 81 CALL runtime·schedinit(SB) 82 83 // create a new goroutine to start program 84 PUSHQ $runtime·main·f(SB) // entry 85 PUSHQ $0 // arg size 86 CALL runtime·newproc(SB) 87 POPQ AX 88 POPQ AX 89 90 // start this M 91 CALL runtime·mstart(SB) 92 93 MOVL $0xf1, 0xf1 // crash 94 RET 95 96 DATA runtime·main·f+0(SB)/8,$runtime·main(SB) 97 GLOBL runtime·main·f(SB),8,$8 98 99 TEXT runtime·breakpoint(SB),7,$0 100 BYTE $0xcc 101 RET 102 103 TEXT runtime·asminit(SB),7,$0 104 // No per-thread init. 105 RET 106 107 /* 108 * go-routine 109 */ 110 111 // void gosave(Gobuf*) 112 // save state in Gobuf; setjmp 113 TEXT runtime·gosave(SB), 7, $0 114 MOVQ 8(SP), AX // gobuf 115 LEAQ 8(SP), BX // caller's SP 116 MOVQ BX, gobuf_sp(AX) 117 MOVQ 0(SP), BX // caller's PC 118 MOVQ BX, gobuf_pc(AX) 119 get_tls(CX) 120 MOVQ g(CX), BX 121 MOVQ BX, gobuf_g(AX) 122 RET 123 124 // void gogo(Gobuf*, uintptr) 125 // restore state from Gobuf; longjmp 126 TEXT runtime·gogo(SB), 7, $0 127 MOVQ 16(SP), AX // return 2nd arg 128 MOVQ 8(SP), BX // gobuf 129 MOVQ gobuf_g(BX), DX 130 MOVQ 0(DX), CX // make sure g != nil 131 get_tls(CX) 132 MOVQ DX, g(CX) 133 MOVQ gobuf_sp(BX), SP // restore SP 134 MOVQ gobuf_pc(BX), BX 135 JMP BX 136 137 // void gogocall(Gobuf*, void (*fn)(void), uintptr r0) 138 // restore state from Gobuf but then call fn. 139 // (call fn, returning to state in Gobuf) 140 TEXT runtime·gogocall(SB), 7, $0 141 MOVQ 24(SP), DX // context 142 MOVQ 16(SP), AX // fn 143 MOVQ 8(SP), BX // gobuf 144 MOVQ gobuf_g(BX), DI 145 get_tls(CX) 146 MOVQ DI, g(CX) 147 MOVQ 0(DI), CX // make sure g != nil 148 MOVQ gobuf_sp(BX), SP // restore SP 149 MOVQ gobuf_pc(BX), BX 150 PUSHQ BX 151 JMP AX 152 POPQ BX // not reached 153 154 // void gogocallfn(Gobuf*, FuncVal*) 155 // restore state from Gobuf but then call fn. 156 // (call fn, returning to state in Gobuf) 157 TEXT runtime·gogocallfn(SB), 7, $0 158 MOVQ 16(SP), DX // fn 159 MOVQ 8(SP), BX // gobuf 160 MOVQ gobuf_g(BX), AX 161 get_tls(CX) 162 MOVQ AX, g(CX) 163 MOVQ 0(AX), CX // make sure g != nil 164 MOVQ gobuf_sp(BX), SP // restore SP 165 MOVQ gobuf_pc(BX), BX 166 PUSHQ BX 167 MOVQ 0(DX), BX 168 JMP BX 169 POPQ BX // not reached 170 171 // void mcall(void (*fn)(G*)) 172 // Switch to m->g0's stack, call fn(g). 173 // Fn must never return. It should gogo(&g->sched) 174 // to keep running g. 175 TEXT runtime·mcall(SB), 7, $0 176 MOVQ fn+0(FP), DI 177 178 get_tls(CX) 179 MOVQ g(CX), AX // save state in g->gobuf 180 MOVQ 0(SP), BX // caller's PC 181 MOVQ BX, (g_sched+gobuf_pc)(AX) 182 LEAQ 8(SP), BX // caller's SP 183 MOVQ BX, (g_sched+gobuf_sp)(AX) 184 MOVQ AX, (g_sched+gobuf_g)(AX) 185 186 // switch to m->g0 & its stack, call fn 187 MOVQ m(CX), BX 188 MOVQ m_g0(BX), SI 189 CMPQ SI, AX // if g == m->g0 call badmcall 190 JNE 2(PC) 191 CALL runtime·badmcall(SB) 192 MOVQ SI, g(CX) // g = m->g0 193 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->gobuf.sp 194 PUSHQ AX 195 CALL DI 196 POPQ AX 197 CALL runtime·badmcall2(SB) 198 RET 199 200 /* 201 * support for morestack 202 */ 203 204 // Called during function prolog when more stack is needed. 205 // Caller has already done get_tls(CX); MOVQ m(CX), BX. 206 TEXT runtime·morestack(SB),7,$0 207 // Cannot grow scheduler stack (m->g0). 208 MOVQ m_g0(BX), SI 209 CMPQ g(CX), SI 210 JNE 2(PC) 211 INT $3 212 213 MOVQ DX, m_cret(BX) 214 215 // Called from f. 216 // Set m->morebuf to f's caller. 217 MOVQ 8(SP), AX // f's caller's PC 218 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 219 LEAQ 16(SP), AX // f's caller's SP 220 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 221 MOVQ AX, m_moreargp(BX) 222 get_tls(CX) 223 MOVQ g(CX), SI 224 MOVQ SI, (m_morebuf+gobuf_g)(BX) 225 226 // Set m->morepc to f's PC. 227 MOVQ 0(SP), AX 228 MOVQ AX, m_morepc(BX) 229 230 // Call newstack on m->g0's stack. 231 MOVQ m_g0(BX), BP 232 MOVQ BP, g(CX) 233 MOVQ (g_sched+gobuf_sp)(BP), SP 234 CALL runtime·newstack(SB) 235 MOVQ $0, 0x1003 // crash if newstack returns 236 RET 237 238 // Called from reflection library. Mimics morestack, 239 // reuses stack growth code to create a frame 240 // with the desired args running the desired function. 241 // 242 // func call(fn *byte, arg *byte, argsize uint32). 243 TEXT reflect·call(SB), 7, $0 244 get_tls(CX) 245 MOVQ m(CX), BX 246 247 // Save our caller's state as the PC and SP to 248 // restore when returning from f. 249 MOVQ 0(SP), AX // our caller's PC 250 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 251 LEAQ 8(SP), AX // our caller's SP 252 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 253 MOVQ g(CX), AX 254 MOVQ AX, (m_morebuf+gobuf_g)(BX) 255 256 // Set up morestack arguments to call f on a new stack. 257 // We set f's frame size to 1, as a hint to newstack 258 // that this is a call from reflect·call. 259 // If it turns out that f needs a larger frame than 260 // the default stack, f's usual stack growth prolog will 261 // allocate a new segment (and recopy the arguments). 262 MOVQ 8(SP), AX // fn 263 MOVQ 16(SP), DX // arg frame 264 MOVL 24(SP), CX // arg size 265 266 MOVQ AX, m_morepc(BX) // f's PC 267 MOVQ DX, m_moreargp(BX) // argument frame pointer 268 MOVL CX, m_moreargsize(BX) // f's argument size 269 MOVL $1, m_moreframesize(BX) // f's frame size 270 271 // Call newstack on m->g0's stack. 272 MOVQ m_g0(BX), BP 273 get_tls(CX) 274 MOVQ BP, g(CX) 275 MOVQ (g_sched+gobuf_sp)(BP), SP 276 CALL runtime·newstack(SB) 277 MOVQ $0, 0x1103 // crash if newstack returns 278 RET 279 280 // Return point when leaving stack. 281 TEXT runtime·lessstack(SB), 7, $0 282 // Save return value in m->cret 283 get_tls(CX) 284 MOVQ m(CX), BX 285 MOVQ AX, m_cret(BX) 286 287 // Call oldstack on m->g0's stack. 288 MOVQ m_g0(BX), BP 289 MOVQ BP, g(CX) 290 MOVQ (g_sched+gobuf_sp)(BP), SP 291 CALL runtime·oldstack(SB) 292 MOVQ $0, 0x1004 // crash if oldstack returns 293 RET 294 295 // morestack trampolines 296 TEXT runtime·morestack00(SB),7,$0 297 get_tls(CX) 298 MOVQ m(CX), BX 299 MOVQ $0, AX 300 MOVQ AX, m_moreframesize(BX) 301 MOVQ $runtime·morestack(SB), AX 302 JMP AX 303 304 TEXT runtime·morestack01(SB),7,$0 305 get_tls(CX) 306 MOVQ m(CX), BX 307 SHLQ $32, AX 308 MOVQ AX, m_moreframesize(BX) 309 MOVQ $runtime·morestack(SB), AX 310 JMP AX 311 312 TEXT runtime·morestack10(SB),7,$0 313 get_tls(CX) 314 MOVQ m(CX), BX 315 MOVLQZX AX, AX 316 MOVQ AX, m_moreframesize(BX) 317 MOVQ $runtime·morestack(SB), AX 318 JMP AX 319 320 TEXT runtime·morestack11(SB),7,$0 321 get_tls(CX) 322 MOVQ m(CX), BX 323 MOVQ AX, m_moreframesize(BX) 324 MOVQ $runtime·morestack(SB), AX 325 JMP AX 326 327 // subcases of morestack01 328 // with const of 8,16,...48 329 TEXT runtime·morestack8(SB),7,$0 330 PUSHQ $1 331 MOVQ $morestack<>(SB), AX 332 JMP AX 333 334 TEXT runtime·morestack16(SB),7,$0 335 PUSHQ $2 336 MOVQ $morestack<>(SB), AX 337 JMP AX 338 339 TEXT runtime·morestack24(SB),7,$0 340 PUSHQ $3 341 MOVQ $morestack<>(SB), AX 342 JMP AX 343 344 TEXT runtime·morestack32(SB),7,$0 345 PUSHQ $4 346 MOVQ $morestack<>(SB), AX 347 JMP AX 348 349 TEXT runtime·morestack40(SB),7,$0 350 PUSHQ $5 351 MOVQ $morestack<>(SB), AX 352 JMP AX 353 354 TEXT runtime·morestack48(SB),7,$0 355 PUSHQ $6 356 MOVQ $morestack<>(SB), AX 357 JMP AX 358 359 TEXT morestack<>(SB),7,$0 360 get_tls(CX) 361 MOVQ m(CX), BX 362 POPQ AX 363 SHLQ $35, AX 364 MOVQ AX, m_moreframesize(BX) 365 MOVQ $runtime·morestack(SB), AX 366 JMP AX 367 368 // bool cas(int32 *val, int32 old, int32 new) 369 // Atomically: 370 // if(*val == old){ 371 // *val = new; 372 // return 1; 373 // } else 374 // return 0; 375 TEXT runtime·cas(SB), 7, $0 376 MOVQ 8(SP), BX 377 MOVL 16(SP), AX 378 MOVL 20(SP), CX 379 LOCK 380 CMPXCHGL CX, 0(BX) 381 JZ 3(PC) 382 MOVL $0, AX 383 RET 384 MOVL $1, AX 385 RET 386 387 // bool runtime·cas64(uint64 *val, uint64 *old, uint64 new) 388 // Atomically: 389 // if(*val == *old){ 390 // *val = new; 391 // return 1; 392 // } else { 393 // *old = *val 394 // return 0; 395 // } 396 TEXT runtime·cas64(SB), 7, $0 397 MOVQ 8(SP), BX 398 MOVQ 16(SP), BP 399 MOVQ 0(BP), AX 400 MOVQ 24(SP), CX 401 LOCK 402 CMPXCHGQ CX, 0(BX) 403 JNZ cas64_fail 404 MOVL $1, AX 405 RET 406 cas64_fail: 407 MOVQ AX, 0(BP) 408 MOVL $0, AX 409 RET 410 411 // bool casp(void **val, void *old, void *new) 412 // Atomically: 413 // if(*val == old){ 414 // *val = new; 415 // return 1; 416 // } else 417 // return 0; 418 TEXT runtime·casp(SB), 7, $0 419 MOVQ 8(SP), BX 420 MOVQ 16(SP), AX 421 MOVQ 24(SP), CX 422 LOCK 423 CMPXCHGQ CX, 0(BX) 424 JZ 3(PC) 425 MOVL $0, AX 426 RET 427 MOVL $1, AX 428 RET 429 430 // uint32 xadd(uint32 volatile *val, int32 delta) 431 // Atomically: 432 // *val += delta; 433 // return *val; 434 TEXT runtime·xadd(SB), 7, $0 435 MOVQ 8(SP), BX 436 MOVL 16(SP), AX 437 MOVL AX, CX 438 LOCK 439 XADDL AX, 0(BX) 440 ADDL CX, AX 441 RET 442 443 TEXT runtime·xadd64(SB), 7, $0 444 MOVQ 8(SP), BX 445 MOVQ 16(SP), AX 446 MOVQ AX, CX 447 LOCK 448 XADDQ AX, 0(BX) 449 ADDQ CX, AX 450 RET 451 452 TEXT runtime·xchg(SB), 7, $0 453 MOVQ 8(SP), BX 454 MOVL 16(SP), AX 455 XCHGL AX, 0(BX) 456 RET 457 458 TEXT runtime·xchg64(SB), 7, $0 459 MOVQ 8(SP), BX 460 MOVQ 16(SP), AX 461 XCHGQ AX, 0(BX) 462 RET 463 464 TEXT runtime·procyield(SB),7,$0 465 MOVL 8(SP), AX 466 again: 467 PAUSE 468 SUBL $1, AX 469 JNZ again 470 RET 471 472 TEXT runtime·atomicstorep(SB), 7, $0 473 MOVQ 8(SP), BX 474 MOVQ 16(SP), AX 475 XCHGQ AX, 0(BX) 476 RET 477 478 TEXT runtime·atomicstore(SB), 7, $0 479 MOVQ 8(SP), BX 480 MOVL 16(SP), AX 481 XCHGL AX, 0(BX) 482 RET 483 484 TEXT runtime·atomicstore64(SB), 7, $0 485 MOVQ 8(SP), BX 486 MOVQ 16(SP), AX 487 XCHGQ AX, 0(BX) 488 RET 489 490 // void jmpdefer(fn, sp); 491 // called from deferreturn. 492 // 1. pop the caller 493 // 2. sub 5 bytes from the callers return 494 // 3. jmp to the argument 495 TEXT runtime·jmpdefer(SB), 7, $0 496 MOVQ 8(SP), DX // fn 497 MOVQ 16(SP), BX // caller sp 498 LEAQ -8(BX), SP // caller sp after CALL 499 SUBQ $5, (SP) // return to CALL again 500 MOVQ 0(DX), BX 501 JMP BX // but first run the deferred function 502 503 // Dummy function to use in saved gobuf.PC, 504 // to match SP pointing at a return address. 505 // The gobuf.PC is unused by the contortions here 506 // but setting it to return will make the traceback code work. 507 TEXT return<>(SB),7,$0 508 RET 509 510 // asmcgocall(void(*fn)(void*), void *arg) 511 // Call fn(arg) on the scheduler stack, 512 // aligned appropriately for the gcc ABI. 513 // See cgocall.c for more details. 514 TEXT runtime·asmcgocall(SB),7,$0 515 MOVQ fn+0(FP), AX 516 MOVQ arg+8(FP), BX 517 MOVQ SP, DX 518 519 // Figure out if we need to switch to m->g0 stack. 520 // We get called to create new OS threads too, and those 521 // come in on the m->g0 stack already. 522 get_tls(CX) 523 MOVQ m(CX), BP 524 MOVQ m_g0(BP), SI 525 MOVQ g(CX), DI 526 CMPQ SI, DI 527 JEQ 6(PC) 528 MOVQ SP, (g_sched+gobuf_sp)(DI) 529 MOVQ $return<>(SB), (g_sched+gobuf_pc)(DI) 530 MOVQ DI, (g_sched+gobuf_g)(DI) 531 MOVQ SI, g(CX) 532 MOVQ (g_sched+gobuf_sp)(SI), SP 533 534 // Now on a scheduling stack (a pthread-created stack). 535 // Make sure we have enough room for 4 stack-backed fast-call 536 // registers as per windows amd64 calling convention. 537 SUBQ $64, SP 538 ANDQ $~15, SP // alignment for gcc ABI 539 MOVQ DI, 48(SP) // save g 540 MOVQ DX, 40(SP) // save SP 541 MOVQ BX, DI // DI = first argument in AMD64 ABI 542 MOVQ BX, CX // CX = first argument in Win64 543 CALL AX 544 545 // Restore registers, g, stack pointer. 546 get_tls(CX) 547 MOVQ 48(SP), DI 548 MOVQ DI, g(CX) 549 MOVQ 40(SP), SP 550 RET 551 552 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 553 // Turn the fn into a Go func (by taking its address) and call 554 // cgocallback_gofunc. 555 TEXT runtime·cgocallback(SB),7,$24 556 LEAQ fn+0(FP), AX 557 MOVQ AX, 0(SP) 558 MOVQ frame+8(FP), AX 559 MOVQ AX, 8(SP) 560 MOVQ framesize+16(FP), AX 561 MOVQ AX, 16(SP) 562 MOVQ $runtime·cgocallback_gofunc(SB), AX 563 CALL AX 564 RET 565 566 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 567 // See cgocall.c for more details. 568 TEXT runtime·cgocallback_gofunc(SB),7,$24 569 // If m is nil, Go did not create the current thread. 570 // Call needm to obtain one for temporary use. 571 // In this case, we're running on the thread stack, so there's 572 // lots of space, but the linker doesn't know. Hide the call from 573 // the linker analysis by using an indirect call through AX. 574 get_tls(CX) 575 #ifdef GOOS_windows 576 CMPQ CX, $0 577 JNE 3(PC) 578 PUSHQ $0 579 JMP needm 580 #endif 581 MOVQ m(CX), BP 582 PUSHQ BP 583 CMPQ BP, $0 584 JNE havem 585 needm: 586 MOVQ $runtime·needm(SB), AX 587 CALL AX 588 get_tls(CX) 589 MOVQ m(CX), BP 590 591 havem: 592 // Now there's a valid m, and we're running on its m->g0. 593 // Save current m->g0->sched.sp on stack and then set it to SP. 594 // Save current sp in m->g0->sched.sp in preparation for 595 // switch back to m->curg stack. 596 MOVQ m_g0(BP), SI 597 PUSHQ (g_sched+gobuf_sp)(SI) 598 MOVQ SP, (g_sched+gobuf_sp)(SI) 599 600 // Switch to m->curg stack and call runtime.cgocallbackg 601 // with the three arguments. Because we are taking over 602 // the execution of m->curg but *not* resuming what had 603 // been running, we need to save that information (m->curg->gobuf) 604 // so that we can restore it when we're done. 605 // We can restore m->curg->gobuf.sp easily, because calling 606 // runtime.cgocallbackg leaves SP unchanged upon return. 607 // To save m->curg->gobuf.pc, we push it onto the stack. 608 // This has the added benefit that it looks to the traceback 609 // routine like cgocallbackg is going to return to that 610 // PC (because we defined cgocallbackg to have 611 // a frame size of 24, the same amount that we use below), 612 // so that the traceback will seamlessly trace back into 613 // the earlier calls. 614 MOVQ fn+0(FP), AX 615 MOVQ frame+8(FP), BX 616 MOVQ framesize+16(FP), DX 617 618 MOVQ m_curg(BP), SI 619 MOVQ SI, g(CX) 620 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 621 622 // Push gobuf.pc 623 MOVQ (g_sched+gobuf_pc)(SI), BP 624 SUBQ $8, DI 625 MOVQ BP, 0(DI) 626 627 // Push arguments to cgocallbackg. 628 // Frame size here must match the frame size above 629 // to trick traceback routines into doing the right thing. 630 SUBQ $24, DI 631 MOVQ AX, 0(DI) 632 MOVQ BX, 8(DI) 633 MOVQ DX, 16(DI) 634 635 // Switch stack and make the call. 636 MOVQ DI, SP 637 CALL runtime·cgocallbackg(SB) 638 639 // Restore g->gobuf (== m->curg->gobuf) from saved values. 640 get_tls(CX) 641 MOVQ g(CX), SI 642 MOVQ 24(SP), BP 643 MOVQ BP, (g_sched+gobuf_pc)(SI) 644 LEAQ (24+8)(SP), DI 645 MOVQ DI, (g_sched+gobuf_sp)(SI) 646 647 // Switch back to m->g0's stack and restore m->g0->sched.sp. 648 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 649 // so we do not have to restore it.) 650 MOVQ m(CX), BP 651 MOVQ m_g0(BP), SI 652 MOVQ SI, g(CX) 653 MOVQ (g_sched+gobuf_sp)(SI), SP 654 POPQ (g_sched+gobuf_sp)(SI) 655 656 // If the m on entry was nil, we called needm above to borrow an m 657 // for the duration of the call. Since the call is over, return it with dropm. 658 POPQ BP 659 CMPQ BP, $0 660 JNE 3(PC) 661 MOVQ $runtime·dropm(SB), AX 662 CALL AX 663 664 // Done! 665 RET 666 667 // void setmg(M*, G*); set m and g. for use by needm. 668 TEXT runtime·setmg(SB), 7, $0 669 MOVQ mm+0(FP), AX 670 #ifdef GOOS_windows 671 CMPQ AX, $0 672 JNE settls 673 MOVQ $0, 0x28(GS) 674 RET 675 settls: 676 LEAQ m_tls(AX), AX 677 MOVQ AX, 0x28(GS) 678 #endif 679 get_tls(CX) 680 MOVQ mm+0(FP), AX 681 MOVQ AX, m(CX) 682 MOVQ gg+8(FP), BX 683 MOVQ BX, g(CX) 684 RET 685 686 // void setmg_gcc(M*, G*); set m and g called from gcc. 687 TEXT setmg_gcc<>(SB),7,$0 688 get_tls(AX) 689 MOVQ DI, m(AX) 690 MOVQ SI, g(AX) 691 RET 692 693 // check that SP is in range [g->stackbase, g->stackguard) 694 TEXT runtime·stackcheck(SB), 7, $0 695 get_tls(CX) 696 MOVQ g(CX), AX 697 CMPQ g_stackbase(AX), SP 698 JHI 2(PC) 699 INT $3 700 CMPQ SP, g_stackguard(AX) 701 JHI 2(PC) 702 INT $3 703 RET 704 705 TEXT runtime·memclr(SB),7,$0 706 MOVQ 8(SP), DI // arg 1 addr 707 MOVQ 16(SP), CX // arg 2 count 708 MOVQ CX, BX 709 ANDQ $7, BX 710 SHRQ $3, CX 711 MOVQ $0, AX 712 CLD 713 REP 714 STOSQ 715 MOVQ BX, CX 716 REP 717 STOSB 718 RET 719 720 TEXT runtime·getcallerpc(SB),7,$0 721 MOVQ x+0(FP),AX // addr of first arg 722 MOVQ -8(AX),AX // get calling pc 723 RET 724 725 TEXT runtime·setcallerpc(SB),7,$0 726 MOVQ x+0(FP),AX // addr of first arg 727 MOVQ x+8(FP), BX 728 MOVQ BX, -8(AX) // set calling pc 729 RET 730 731 TEXT runtime·getcallersp(SB),7,$0 732 MOVQ sp+0(FP), AX 733 RET 734 735 // int64 runtime·cputicks(void) 736 TEXT runtime·cputicks(SB),7,$0 737 RDTSC 738 SHLQ $32, DX 739 ADDQ DX, AX 740 RET 741 742 TEXT runtime·stackguard(SB),7,$0 743 MOVQ SP, DX 744 MOVQ DX, sp+0(FP) 745 get_tls(CX) 746 MOVQ g(CX), BX 747 MOVQ g_stackguard(BX), DX 748 MOVQ DX, limit+8(FP) 749 RET 750 751 GLOBL runtime·tls0(SB), $64 752 753 // hash function using AES hardware instructions 754 TEXT runtime·aeshash(SB),7,$0 755 MOVQ 8(SP), DX // ptr to hash value 756 MOVQ 16(SP), CX // size 757 MOVQ 24(SP), AX // ptr to data 758 JMP runtime·aeshashbody(SB) 759 760 TEXT runtime·aeshashstr(SB),7,$0 761 MOVQ 8(SP), DX // ptr to hash value 762 MOVQ 24(SP), AX // ptr to string struct 763 MOVQ 8(AX), CX // length of string 764 MOVQ (AX), AX // string data 765 JMP runtime·aeshashbody(SB) 766 767 // AX: data 768 // CX: length 769 // DX: ptr to seed input / hash output 770 TEXT runtime·aeshashbody(SB),7,$0 771 MOVQ (DX), X0 // seed to low 64 bits of xmm0 772 PINSRQ $1, CX, X0 // size to high 64 bits of xmm0 773 MOVO runtime·aeskeysched+0(SB), X2 774 MOVO runtime·aeskeysched+16(SB), X3 775 aesloop: 776 CMPQ CX, $16 777 JB aesloopend 778 MOVOU (AX), X1 779 AESENC X2, X0 780 AESENC X1, X0 781 SUBQ $16, CX 782 ADDQ $16, AX 783 JMP aesloop 784 aesloopend: 785 TESTQ CX, CX 786 JE finalize // no partial block 787 788 TESTQ $16, AX 789 JNE highpartial 790 791 // address ends in 0xxxx. 16 bytes loaded 792 // at this address won't cross a page boundary, so 793 // we can load it directly. 794 MOVOU (AX), X1 795 ADDQ CX, CX 796 PAND masks(SB)(CX*8), X1 797 JMP partial 798 highpartial: 799 // address ends in 1xxxx. Might be up against 800 // a page boundary, so load ending at last byte. 801 // Then shift bytes down using pshufb. 802 MOVOU -16(AX)(CX*1), X1 803 ADDQ CX, CX 804 PSHUFB shifts(SB)(CX*8), X1 805 partial: 806 // incorporate partial block into hash 807 AESENC X3, X0 808 AESENC X1, X0 809 finalize: 810 // finalize hash 811 AESENC X2, X0 812 AESENC X3, X0 813 AESENC X2, X0 814 MOVQ X0, (DX) 815 RET 816 817 TEXT runtime·aeshash32(SB),7,$0 818 MOVQ 8(SP), DX // ptr to hash value 819 MOVQ 24(SP), AX // ptr to data 820 MOVQ (DX), X0 // seed 821 PINSRD $2, (AX), X0 // data 822 AESENC runtime·aeskeysched+0(SB), X0 823 AESENC runtime·aeskeysched+16(SB), X0 824 AESENC runtime·aeskeysched+0(SB), X0 825 MOVQ X0, (DX) 826 RET 827 828 TEXT runtime·aeshash64(SB),7,$0 829 MOVQ 8(SP), DX // ptr to hash value 830 MOVQ 24(SP), AX // ptr to data 831 MOVQ (DX), X0 // seed 832 PINSRQ $1, (AX), X0 // data 833 AESENC runtime·aeskeysched+0(SB), X0 834 AESENC runtime·aeskeysched+16(SB), X0 835 AESENC runtime·aeskeysched+0(SB), X0 836 MOVQ X0, (DX) 837 RET 838 839 // simple mask to get rid of data in the high part of the register. 840 TEXT masks(SB),7,$0 841 QUAD $0x0000000000000000 842 QUAD $0x0000000000000000 843 QUAD $0x00000000000000ff 844 QUAD $0x0000000000000000 845 QUAD $0x000000000000ffff 846 QUAD $0x0000000000000000 847 QUAD $0x0000000000ffffff 848 QUAD $0x0000000000000000 849 QUAD $0x00000000ffffffff 850 QUAD $0x0000000000000000 851 QUAD $0x000000ffffffffff 852 QUAD $0x0000000000000000 853 QUAD $0x0000ffffffffffff 854 QUAD $0x0000000000000000 855 QUAD $0x00ffffffffffffff 856 QUAD $0x0000000000000000 857 QUAD $0xffffffffffffffff 858 QUAD $0x0000000000000000 859 QUAD $0xffffffffffffffff 860 QUAD $0x00000000000000ff 861 QUAD $0xffffffffffffffff 862 QUAD $0x000000000000ffff 863 QUAD $0xffffffffffffffff 864 QUAD $0x0000000000ffffff 865 QUAD $0xffffffffffffffff 866 QUAD $0x00000000ffffffff 867 QUAD $0xffffffffffffffff 868 QUAD $0x000000ffffffffff 869 QUAD $0xffffffffffffffff 870 QUAD $0x0000ffffffffffff 871 QUAD $0xffffffffffffffff 872 QUAD $0x00ffffffffffffff 873 874 // these are arguments to pshufb. They move data down from 875 // the high bytes of the register to the low bytes of the register. 876 // index is how many bytes to move. 877 TEXT shifts(SB),7,$0 878 QUAD $0x0000000000000000 879 QUAD $0x0000000000000000 880 QUAD $0xffffffffffffff0f 881 QUAD $0xffffffffffffffff 882 QUAD $0xffffffffffff0f0e 883 QUAD $0xffffffffffffffff 884 QUAD $0xffffffffff0f0e0d 885 QUAD $0xffffffffffffffff 886 QUAD $0xffffffff0f0e0d0c 887 QUAD $0xffffffffffffffff 888 QUAD $0xffffff0f0e0d0c0b 889 QUAD $0xffffffffffffffff 890 QUAD $0xffff0f0e0d0c0b0a 891 QUAD $0xffffffffffffffff 892 QUAD $0xff0f0e0d0c0b0a09 893 QUAD $0xffffffffffffffff 894 QUAD $0x0f0e0d0c0b0a0908 895 QUAD $0xffffffffffffffff 896 QUAD $0x0e0d0c0b0a090807 897 QUAD $0xffffffffffffff0f 898 QUAD $0x0d0c0b0a09080706 899 QUAD $0xffffffffffff0f0e 900 QUAD $0x0c0b0a0908070605 901 QUAD $0xffffffffff0f0e0d 902 QUAD $0x0b0a090807060504 903 QUAD $0xffffffff0f0e0d0c 904 QUAD $0x0a09080706050403 905 QUAD $0xffffff0f0e0d0c0b 906 QUAD $0x0908070605040302 907 QUAD $0xffff0f0e0d0c0b0a 908 QUAD $0x0807060504030201 909 QUAD $0xff0f0e0d0c0b0a09 910 911 TEXT runtime·memeq(SB),7,$0 912 MOVQ a+0(FP), SI 913 MOVQ b+8(FP), DI 914 MOVQ count+16(FP), BX 915 JMP runtime·memeqbody(SB) 916 917 918 TEXT bytes·Equal(SB),7,$0 919 MOVQ a_len+8(FP), BX 920 MOVQ b_len+32(FP), CX 921 XORQ AX, AX 922 CMPQ BX, CX 923 JNE eqret 924 MOVQ a+0(FP), SI 925 MOVQ b+24(FP), DI 926 CALL runtime·memeqbody(SB) 927 eqret: 928 MOVB AX, ret+48(FP) 929 RET 930 931 // a in SI 932 // b in DI 933 // count in BX 934 TEXT runtime·memeqbody(SB),7,$0 935 XORQ AX, AX 936 937 CMPQ BX, $8 938 JB small 939 940 // 64 bytes at a time using xmm registers 941 hugeloop: 942 CMPQ BX, $64 943 JB bigloop 944 MOVOU (SI), X0 945 MOVOU (DI), X1 946 MOVOU 16(SI), X2 947 MOVOU 16(DI), X3 948 MOVOU 32(SI), X4 949 MOVOU 32(DI), X5 950 MOVOU 48(SI), X6 951 MOVOU 48(DI), X7 952 PCMPEQB X1, X0 953 PCMPEQB X3, X2 954 PCMPEQB X5, X4 955 PCMPEQB X7, X6 956 PAND X2, X0 957 PAND X6, X4 958 PAND X4, X0 959 PMOVMSKB X0, DX 960 ADDQ $64, SI 961 ADDQ $64, DI 962 SUBQ $64, BX 963 CMPL DX, $0xffff 964 JEQ hugeloop 965 RET 966 967 // 8 bytes at a time using 64-bit register 968 bigloop: 969 CMPQ BX, $8 970 JBE leftover 971 MOVQ (SI), CX 972 MOVQ (DI), DX 973 ADDQ $8, SI 974 ADDQ $8, DI 975 SUBQ $8, BX 976 CMPQ CX, DX 977 JEQ bigloop 978 RET 979 980 // remaining 0-8 bytes 981 leftover: 982 MOVQ -8(SI)(BX*1), CX 983 MOVQ -8(DI)(BX*1), DX 984 CMPQ CX, DX 985 SETEQ AX 986 RET 987 988 small: 989 CMPQ BX, $0 990 JEQ equal 991 992 LEAQ 0(BX*8), CX 993 NEGQ CX 994 995 CMPB SI, $0xf8 996 JA si_high 997 998 // load at SI won't cross a page boundary. 999 MOVQ (SI), SI 1000 JMP si_finish 1001 si_high: 1002 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 1003 MOVQ -8(SI)(BX*1), SI 1004 SHRQ CX, SI 1005 si_finish: 1006 1007 // same for DI. 1008 CMPB DI, $0xf8 1009 JA di_high 1010 MOVQ (DI), DI 1011 JMP di_finish 1012 di_high: 1013 MOVQ -8(DI)(BX*1), DI 1014 SHRQ CX, DI 1015 di_finish: 1016 1017 SUBQ SI, DI 1018 SHLQ CX, DI 1019 equal: 1020 SETEQ AX 1021 RET