github.com/fjballest/golang@v0.0.0-20151209143359-e4c5fe594ca8/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL 30 JMP has_cpuid 31 #else 32 // first see if CPUID instruction is supported. 33 PUSHFL 34 PUSHFL 35 XORL $(1<<21), 0(SP) // flip ID bit 36 POPFL 37 PUSHFL 38 POPL AX 39 XORL 0(SP), AX 40 POPFL // restore EFLAGS 41 TESTL $(1<<21), AX 42 JNE has_cpuid 43 #endif 44 45 bad_proc: // show that the program requires MMX. 46 MOVL $2, 0(SP) 47 MOVL $bad_proc_msg<>(SB), 4(SP) 48 MOVL $0x3d, 8(SP) 49 CALL runtime·write(SB) 50 MOVL $1, 0(SP) 51 CALL runtime·exit(SB) 52 INT $3 53 54 has_cpuid: 55 MOVL $0, AX 56 CPUID 57 CMPL AX, $0 58 JE nocpuinfo 59 60 // Figure out how to serialize RDTSC. 61 // On Intel processors LFENCE is enough. AMD requires MFENCE. 62 // Don't know about the rest, so let's do MFENCE. 63 CMPL BX, $0x756E6547 // "Genu" 64 JNE notintel 65 CMPL DX, $0x49656E69 // "ineI" 66 JNE notintel 67 CMPL CX, $0x6C65746E // "ntel" 68 JNE notintel 69 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 70 notintel: 71 72 MOVL $1, AX 73 CPUID 74 MOVL CX, AX // Move to global variable clobbers CX when generating PIC 75 MOVL AX, runtime·cpuid_ecx(SB) 76 MOVL DX, runtime·cpuid_edx(SB) 77 78 // Check for MMX support 79 TESTL $(1<<23), DX // MMX 80 JZ bad_proc 81 82 nocpuinfo: 83 84 // if there is an _cgo_init, call it to let it 85 // initialize and to set up GS. if not, 86 // we set up GS ourselves. 87 MOVL _cgo_init(SB), AX 88 TESTL AX, AX 89 JZ needtls 90 MOVL $setg_gcc<>(SB), BX 91 MOVL BX, 4(SP) 92 MOVL BP, 0(SP) 93 CALL AX 94 95 // update stackguard after _cgo_init 96 MOVL $runtime·g0(SB), CX 97 MOVL (g_stack+stack_lo)(CX), AX 98 ADDL $const__StackGuard, AX 99 MOVL AX, g_stackguard0(CX) 100 MOVL AX, g_stackguard1(CX) 101 102 #ifndef GOOS_windows 103 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 104 JMP ok 105 #endif 106 needtls: 107 #ifdef GOOS_plan9 108 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 109 JMP ok 110 #endif 111 112 // set up %gs 113 CALL runtime·ldt0setup(SB) 114 115 // store through it, to make sure it works 116 get_tls(BX) 117 MOVL $0x123, g(BX) 118 MOVL runtime·m0+m_tls(SB), AX 119 CMPL AX, $0x123 120 JEQ ok 121 MOVL AX, 0 // abort 122 ok: 123 // set up m and g "registers" 124 get_tls(BX) 125 LEAL runtime·g0(SB), DX 126 MOVL DX, g(BX) 127 LEAL runtime·m0(SB), AX 128 129 // save m->g0 = g0 130 MOVL DX, m_g0(AX) 131 // save g0->m = m0 132 MOVL AX, g_m(DX) 133 134 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 135 136 // convention is D is always cleared 137 CLD 138 139 CALL runtime·check(SB) 140 141 // saved argc, argv 142 MOVL 120(SP), AX 143 MOVL AX, 0(SP) 144 MOVL 124(SP), AX 145 MOVL AX, 4(SP) 146 CALL runtime·args(SB) 147 CALL runtime·osinit(SB) 148 CALL runtime·schedinit(SB) 149 150 // create a new goroutine to start program 151 PUSHL $runtime·mainPC(SB) // entry 152 PUSHL $0 // arg size 153 CALL runtime·newproc(SB) 154 POPL AX 155 POPL AX 156 157 // start this M 158 CALL runtime·mstart(SB) 159 160 INT $3 161 RET 162 163 DATA bad_proc_msg<>+0x00(SB)/8, $"This pro" 164 DATA bad_proc_msg<>+0x08(SB)/8, $"gram can" 165 DATA bad_proc_msg<>+0x10(SB)/8, $" only be" 166 DATA bad_proc_msg<>+0x18(SB)/8, $" run on " 167 DATA bad_proc_msg<>+0x20(SB)/8, $"processe" 168 DATA bad_proc_msg<>+0x28(SB)/8, $"rs with " 169 DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp" 170 DATA bad_proc_msg<>+0x38(SB)/4, $"ort." 171 DATA bad_proc_msg<>+0x3c(SB)/1, $0xa 172 GLOBL bad_proc_msg<>(SB), RODATA, $0x3d 173 174 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 175 GLOBL runtime·mainPC(SB),RODATA,$4 176 177 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 178 INT $3 179 RET 180 181 TEXT runtime·asminit(SB),NOSPLIT,$0-0 182 // Linux and MinGW start the FPU in extended double precision. 183 // Other operating systems use double precision. 184 // Change to double precision to match them, 185 // and to match other hardware that only has double. 186 PUSHL $0x27F 187 FLDCW 0(SP) 188 POPL AX 189 RET 190 191 /* 192 * go-routine 193 */ 194 195 // void gosave(Gobuf*) 196 // save state in Gobuf; setjmp 197 TEXT runtime·gosave(SB), NOSPLIT, $0-4 198 MOVL buf+0(FP), AX // gobuf 199 LEAL buf+0(FP), BX // caller's SP 200 MOVL BX, gobuf_sp(AX) 201 MOVL 0(SP), BX // caller's PC 202 MOVL BX, gobuf_pc(AX) 203 MOVL $0, gobuf_ret(AX) 204 MOVL $0, gobuf_ctxt(AX) 205 get_tls(CX) 206 MOVL g(CX), BX 207 MOVL BX, gobuf_g(AX) 208 RET 209 210 // void gogo(Gobuf*) 211 // restore state from Gobuf; longjmp 212 TEXT runtime·gogo(SB), NOSPLIT, $0-4 213 MOVL buf+0(FP), BX // gobuf 214 MOVL gobuf_g(BX), DX 215 MOVL 0(DX), CX // make sure g != nil 216 get_tls(CX) 217 MOVL DX, g(CX) 218 MOVL gobuf_sp(BX), SP // restore SP 219 MOVL gobuf_ret(BX), AX 220 MOVL gobuf_ctxt(BX), DX 221 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 222 MOVL $0, gobuf_ret(BX) 223 MOVL $0, gobuf_ctxt(BX) 224 MOVL gobuf_pc(BX), BX 225 JMP BX 226 227 // func mcall(fn func(*g)) 228 // Switch to m->g0's stack, call fn(g). 229 // Fn must never return. It should gogo(&g->sched) 230 // to keep running g. 231 TEXT runtime·mcall(SB), NOSPLIT, $0-4 232 MOVL fn+0(FP), DI 233 234 get_tls(DX) 235 MOVL g(DX), AX // save state in g->sched 236 MOVL 0(SP), BX // caller's PC 237 MOVL BX, (g_sched+gobuf_pc)(AX) 238 LEAL fn+0(FP), BX // caller's SP 239 MOVL BX, (g_sched+gobuf_sp)(AX) 240 MOVL AX, (g_sched+gobuf_g)(AX) 241 242 // switch to m->g0 & its stack, call fn 243 MOVL g(DX), BX 244 MOVL g_m(BX), BX 245 MOVL m_g0(BX), SI 246 CMPL SI, AX // if g == m->g0 call badmcall 247 JNE 3(PC) 248 MOVL $runtime·badmcall(SB), AX 249 JMP AX 250 MOVL SI, g(DX) // g = m->g0 251 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 252 PUSHL AX 253 MOVL DI, DX 254 MOVL 0(DI), DI 255 CALL DI 256 POPL AX 257 MOVL $runtime·badmcall2(SB), AX 258 JMP AX 259 RET 260 261 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 262 // of the G stack. We need to distinguish the routine that 263 // lives at the bottom of the G stack from the one that lives 264 // at the top of the system stack because the one at the top of 265 // the system stack terminates the stack walk (see topofstack()). 266 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 267 RET 268 269 // func systemstack(fn func()) 270 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 271 MOVL fn+0(FP), DI // DI = fn 272 get_tls(CX) 273 MOVL g(CX), AX // AX = g 274 MOVL g_m(AX), BX // BX = m 275 276 MOVL m_gsignal(BX), DX // DX = gsignal 277 CMPL AX, DX 278 JEQ noswitch 279 280 MOVL m_g0(BX), DX // DX = g0 281 CMPL AX, DX 282 JEQ noswitch 283 284 MOVL m_curg(BX), BP 285 CMPL AX, BP 286 JEQ switch 287 288 // Bad: g is not gsignal, not g0, not curg. What is it? 289 // Hide call from linker nosplit analysis. 290 MOVL $runtime·badsystemstack(SB), AX 291 CALL AX 292 293 switch: 294 // save our state in g->sched. Pretend to 295 // be systemstack_switch if the G stack is scanned. 296 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 297 MOVL SP, (g_sched+gobuf_sp)(AX) 298 MOVL AX, (g_sched+gobuf_g)(AX) 299 300 // switch to g0 301 get_tls(CX) 302 MOVL DX, g(CX) 303 MOVL (g_sched+gobuf_sp)(DX), BX 304 // make it look like mstart called systemstack on g0, to stop traceback 305 SUBL $4, BX 306 MOVL $runtime·mstart(SB), DX 307 MOVL DX, 0(BX) 308 MOVL BX, SP 309 310 // call target function 311 MOVL DI, DX 312 MOVL 0(DI), DI 313 CALL DI 314 315 // switch back to g 316 get_tls(CX) 317 MOVL g(CX), AX 318 MOVL g_m(AX), BX 319 MOVL m_curg(BX), AX 320 MOVL AX, g(CX) 321 MOVL (g_sched+gobuf_sp)(AX), SP 322 MOVL $0, (g_sched+gobuf_sp)(AX) 323 RET 324 325 noswitch: 326 // already on system stack, just call directly 327 MOVL DI, DX 328 MOVL 0(DI), DI 329 CALL DI 330 RET 331 332 /* 333 * support for morestack 334 */ 335 336 // Called during function prolog when more stack is needed. 337 // 338 // The traceback routines see morestack on a g0 as being 339 // the top of a stack (for example, morestack calling newstack 340 // calling the scheduler calling newm calling gc), so we must 341 // record an argument size. For that purpose, it has no arguments. 342 TEXT runtime·morestack(SB),NOSPLIT,$0-0 343 // Cannot grow scheduler stack (m->g0). 344 get_tls(CX) 345 MOVL g(CX), BX 346 MOVL g_m(BX), BX 347 MOVL m_g0(BX), SI 348 CMPL g(CX), SI 349 JNE 2(PC) 350 INT $3 351 352 // Cannot grow signal stack. 353 MOVL m_gsignal(BX), SI 354 CMPL g(CX), SI 355 JNE 2(PC) 356 INT $3 357 358 // Called from f. 359 // Set m->morebuf to f's caller. 360 MOVL 4(SP), DI // f's caller's PC 361 MOVL DI, (m_morebuf+gobuf_pc)(BX) 362 LEAL 8(SP), CX // f's caller's SP 363 MOVL CX, (m_morebuf+gobuf_sp)(BX) 364 get_tls(CX) 365 MOVL g(CX), SI 366 MOVL SI, (m_morebuf+gobuf_g)(BX) 367 368 // Set g->sched to context in f. 369 MOVL 0(SP), AX // f's PC 370 MOVL AX, (g_sched+gobuf_pc)(SI) 371 MOVL SI, (g_sched+gobuf_g)(SI) 372 LEAL 4(SP), AX // f's SP 373 MOVL AX, (g_sched+gobuf_sp)(SI) 374 MOVL DX, (g_sched+gobuf_ctxt)(SI) 375 376 // Call newstack on m->g0's stack. 377 MOVL m_g0(BX), BP 378 MOVL BP, g(CX) 379 MOVL (g_sched+gobuf_sp)(BP), AX 380 MOVL -4(AX), BX // fault if CALL would, before smashing SP 381 MOVL AX, SP 382 CALL runtime·newstack(SB) 383 MOVL $0, 0x1003 // crash if newstack returns 384 RET 385 386 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 387 MOVL $0, DX 388 JMP runtime·morestack(SB) 389 390 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 391 // We came here via a RET to an overwritten return PC. 392 // AX may be live. Other registers are available. 393 394 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 395 get_tls(CX) 396 MOVL g(CX), CX 397 MOVL (g_stkbar+slice_array)(CX), DX 398 MOVL g_stkbarPos(CX), BX 399 IMULL $stkbar__size, BX // Too big for SIB. 400 MOVL stkbar_savedLRVal(DX)(BX*1), BX 401 // Record that this stack barrier was hit. 402 ADDL $1, g_stkbarPos(CX) 403 // Jump to the original return PC. 404 JMP BX 405 406 // reflectcall: call a function with the given argument list 407 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 408 // we don't have variable-sized frames, so we use a small number 409 // of constant-sized-frame functions to encode a few bits of size in the pc. 410 // Caution: ugly multiline assembly macros in your future! 411 412 #define DISPATCH(NAME,MAXSIZE) \ 413 CMPL CX, $MAXSIZE; \ 414 JA 3(PC); \ 415 MOVL $NAME(SB), AX; \ 416 JMP AX 417 // Note: can't just "JMP NAME(SB)" - bad inlining results. 418 419 TEXT reflect·call(SB), NOSPLIT, $0-0 420 JMP ·reflectcall(SB) 421 422 TEXT ·reflectcall(SB), NOSPLIT, $0-20 423 MOVL argsize+12(FP), CX 424 DISPATCH(runtime·call16, 16) 425 DISPATCH(runtime·call32, 32) 426 DISPATCH(runtime·call64, 64) 427 DISPATCH(runtime·call128, 128) 428 DISPATCH(runtime·call256, 256) 429 DISPATCH(runtime·call512, 512) 430 DISPATCH(runtime·call1024, 1024) 431 DISPATCH(runtime·call2048, 2048) 432 DISPATCH(runtime·call4096, 4096) 433 DISPATCH(runtime·call8192, 8192) 434 DISPATCH(runtime·call16384, 16384) 435 DISPATCH(runtime·call32768, 32768) 436 DISPATCH(runtime·call65536, 65536) 437 DISPATCH(runtime·call131072, 131072) 438 DISPATCH(runtime·call262144, 262144) 439 DISPATCH(runtime·call524288, 524288) 440 DISPATCH(runtime·call1048576, 1048576) 441 DISPATCH(runtime·call2097152, 2097152) 442 DISPATCH(runtime·call4194304, 4194304) 443 DISPATCH(runtime·call8388608, 8388608) 444 DISPATCH(runtime·call16777216, 16777216) 445 DISPATCH(runtime·call33554432, 33554432) 446 DISPATCH(runtime·call67108864, 67108864) 447 DISPATCH(runtime·call134217728, 134217728) 448 DISPATCH(runtime·call268435456, 268435456) 449 DISPATCH(runtime·call536870912, 536870912) 450 DISPATCH(runtime·call1073741824, 1073741824) 451 MOVL $runtime·badreflectcall(SB), AX 452 JMP AX 453 454 #define CALLFN(NAME,MAXSIZE) \ 455 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 456 NO_LOCAL_POINTERS; \ 457 /* copy arguments to stack */ \ 458 MOVL argptr+8(FP), SI; \ 459 MOVL argsize+12(FP), CX; \ 460 MOVL SP, DI; \ 461 REP;MOVSB; \ 462 /* call function */ \ 463 MOVL f+4(FP), DX; \ 464 MOVL (DX), AX; \ 465 PCDATA $PCDATA_StackMapIndex, $0; \ 466 CALL AX; \ 467 /* copy return values back */ \ 468 MOVL argptr+8(FP), DI; \ 469 MOVL argsize+12(FP), CX; \ 470 MOVL retoffset+16(FP), BX; \ 471 MOVL SP, SI; \ 472 ADDL BX, DI; \ 473 ADDL BX, SI; \ 474 SUBL BX, CX; \ 475 REP;MOVSB; \ 476 /* execute write barrier updates */ \ 477 MOVL argtype+0(FP), DX; \ 478 MOVL argptr+8(FP), DI; \ 479 MOVL argsize+12(FP), CX; \ 480 MOVL retoffset+16(FP), BX; \ 481 MOVL DX, 0(SP); \ 482 MOVL DI, 4(SP); \ 483 MOVL CX, 8(SP); \ 484 MOVL BX, 12(SP); \ 485 CALL runtime·callwritebarrier(SB); \ 486 RET 487 488 CALLFN(·call16, 16) 489 CALLFN(·call32, 32) 490 CALLFN(·call64, 64) 491 CALLFN(·call128, 128) 492 CALLFN(·call256, 256) 493 CALLFN(·call512, 512) 494 CALLFN(·call1024, 1024) 495 CALLFN(·call2048, 2048) 496 CALLFN(·call4096, 4096) 497 CALLFN(·call8192, 8192) 498 CALLFN(·call16384, 16384) 499 CALLFN(·call32768, 32768) 500 CALLFN(·call65536, 65536) 501 CALLFN(·call131072, 131072) 502 CALLFN(·call262144, 262144) 503 CALLFN(·call524288, 524288) 504 CALLFN(·call1048576, 1048576) 505 CALLFN(·call2097152, 2097152) 506 CALLFN(·call4194304, 4194304) 507 CALLFN(·call8388608, 8388608) 508 CALLFN(·call16777216, 16777216) 509 CALLFN(·call33554432, 33554432) 510 CALLFN(·call67108864, 67108864) 511 CALLFN(·call134217728, 134217728) 512 CALLFN(·call268435456, 268435456) 513 CALLFN(·call536870912, 536870912) 514 CALLFN(·call1073741824, 1073741824) 515 516 TEXT runtime·procyield(SB),NOSPLIT,$0-0 517 MOVL cycles+0(FP), AX 518 again: 519 PAUSE 520 SUBL $1, AX 521 JNZ again 522 RET 523 524 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 525 // Stores are already ordered on x86, so this is just a 526 // compile barrier. 527 RET 528 529 // void jmpdefer(fn, sp); 530 // called from deferreturn. 531 // 1. pop the caller 532 // 2. sub 5 bytes from the callers return 533 // 3. jmp to the argument 534 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 535 MOVL fv+0(FP), DX // fn 536 MOVL argp+4(FP), BX // caller sp 537 LEAL -4(BX), SP // caller sp after CALL 538 SUBL $5, (SP) // return to CALL again 539 MOVL 0(DX), BX 540 JMP BX // but first run the deferred function 541 542 // Save state of caller into g->sched. 543 TEXT gosave<>(SB),NOSPLIT,$0 544 PUSHL AX 545 PUSHL BX 546 get_tls(BX) 547 MOVL g(BX), BX 548 LEAL arg+0(FP), AX 549 MOVL AX, (g_sched+gobuf_sp)(BX) 550 MOVL -4(AX), AX 551 MOVL AX, (g_sched+gobuf_pc)(BX) 552 MOVL $0, (g_sched+gobuf_ret)(BX) 553 MOVL $0, (g_sched+gobuf_ctxt)(BX) 554 POPL BX 555 POPL AX 556 RET 557 558 // func asmcgocall(fn, arg unsafe.Pointer) int32 559 // Call fn(arg) on the scheduler stack, 560 // aligned appropriately for the gcc ABI. 561 // See cgocall.go for more details. 562 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 563 MOVL fn+0(FP), AX 564 MOVL arg+4(FP), BX 565 566 MOVL SP, DX 567 568 // Figure out if we need to switch to m->g0 stack. 569 // We get called to create new OS threads too, and those 570 // come in on the m->g0 stack already. 571 get_tls(CX) 572 MOVL g(CX), BP 573 MOVL g_m(BP), BP 574 MOVL m_g0(BP), SI 575 MOVL g(CX), DI 576 CMPL SI, DI 577 JEQ noswitch 578 CALL gosave<>(SB) 579 get_tls(CX) 580 MOVL SI, g(CX) 581 MOVL (g_sched+gobuf_sp)(SI), SP 582 583 noswitch: 584 // Now on a scheduling stack (a pthread-created stack). 585 SUBL $32, SP 586 ANDL $~15, SP // alignment, perhaps unnecessary 587 MOVL DI, 8(SP) // save g 588 MOVL (g_stack+stack_hi)(DI), DI 589 SUBL DX, DI 590 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 591 MOVL BX, 0(SP) // first argument in x86-32 ABI 592 CALL AX 593 594 // Restore registers, g, stack pointer. 595 get_tls(CX) 596 MOVL 8(SP), DI 597 MOVL (g_stack+stack_hi)(DI), SI 598 SUBL 4(SP), SI 599 MOVL DI, g(CX) 600 MOVL SI, SP 601 602 MOVL AX, ret+8(FP) 603 RET 604 605 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 606 // Turn the fn into a Go func (by taking its address) and call 607 // cgocallback_gofunc. 608 TEXT runtime·cgocallback(SB),NOSPLIT,$12-12 609 LEAL fn+0(FP), AX 610 MOVL AX, 0(SP) 611 MOVL frame+4(FP), AX 612 MOVL AX, 4(SP) 613 MOVL framesize+8(FP), AX 614 MOVL AX, 8(SP) 615 MOVL $runtime·cgocallback_gofunc(SB), AX 616 CALL AX 617 RET 618 619 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 620 // See cgocall.go for more details. 621 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12 622 NO_LOCAL_POINTERS 623 624 // If g is nil, Go did not create the current thread. 625 // Call needm to obtain one for temporary use. 626 // In this case, we're running on the thread stack, so there's 627 // lots of space, but the linker doesn't know. Hide the call from 628 // the linker analysis by using an indirect call through AX. 629 get_tls(CX) 630 #ifdef GOOS_windows 631 MOVL $0, BP 632 CMPL CX, $0 633 JEQ 2(PC) // TODO 634 #endif 635 MOVL g(CX), BP 636 CMPL BP, $0 637 JEQ needm 638 MOVL g_m(BP), BP 639 MOVL BP, DX // saved copy of oldm 640 JMP havem 641 needm: 642 MOVL $0, 0(SP) 643 MOVL $runtime·needm(SB), AX 644 CALL AX 645 MOVL 0(SP), DX 646 get_tls(CX) 647 MOVL g(CX), BP 648 MOVL g_m(BP), BP 649 650 // Set m->sched.sp = SP, so that if a panic happens 651 // during the function we are about to execute, it will 652 // have a valid SP to run on the g0 stack. 653 // The next few lines (after the havem label) 654 // will save this SP onto the stack and then write 655 // the same SP back to m->sched.sp. That seems redundant, 656 // but if an unrecovered panic happens, unwindm will 657 // restore the g->sched.sp from the stack location 658 // and then systemstack will try to use it. If we don't set it here, 659 // that restored SP will be uninitialized (typically 0) and 660 // will not be usable. 661 MOVL m_g0(BP), SI 662 MOVL SP, (g_sched+gobuf_sp)(SI) 663 664 havem: 665 // Now there's a valid m, and we're running on its m->g0. 666 // Save current m->g0->sched.sp on stack and then set it to SP. 667 // Save current sp in m->g0->sched.sp in preparation for 668 // switch back to m->curg stack. 669 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 670 MOVL m_g0(BP), SI 671 MOVL (g_sched+gobuf_sp)(SI), AX 672 MOVL AX, 0(SP) 673 MOVL SP, (g_sched+gobuf_sp)(SI) 674 675 // Switch to m->curg stack and call runtime.cgocallbackg. 676 // Because we are taking over the execution of m->curg 677 // but *not* resuming what had been running, we need to 678 // save that information (m->curg->sched) so we can restore it. 679 // We can restore m->curg->sched.sp easily, because calling 680 // runtime.cgocallbackg leaves SP unchanged upon return. 681 // To save m->curg->sched.pc, we push it onto the stack. 682 // This has the added benefit that it looks to the traceback 683 // routine like cgocallbackg is going to return to that 684 // PC (because the frame we allocate below has the same 685 // size as cgocallback_gofunc's frame declared above) 686 // so that the traceback will seamlessly trace back into 687 // the earlier calls. 688 // 689 // In the new goroutine, 0(SP) holds the saved oldm (DX) register. 690 // 4(SP) and 8(SP) are unused. 691 MOVL m_curg(BP), SI 692 MOVL SI, g(CX) 693 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 694 MOVL (g_sched+gobuf_pc)(SI), BP 695 MOVL BP, -4(DI) 696 LEAL -(4+12)(DI), SP 697 MOVL DX, 0(SP) 698 CALL runtime·cgocallbackg(SB) 699 MOVL 0(SP), DX 700 701 // Restore g->sched (== m->curg->sched) from saved values. 702 get_tls(CX) 703 MOVL g(CX), SI 704 MOVL 12(SP), BP 705 MOVL BP, (g_sched+gobuf_pc)(SI) 706 LEAL (12+4)(SP), DI 707 MOVL DI, (g_sched+gobuf_sp)(SI) 708 709 // Switch back to m->g0's stack and restore m->g0->sched.sp. 710 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 711 // so we do not have to restore it.) 712 MOVL g(CX), BP 713 MOVL g_m(BP), BP 714 MOVL m_g0(BP), SI 715 MOVL SI, g(CX) 716 MOVL (g_sched+gobuf_sp)(SI), SP 717 MOVL 0(SP), AX 718 MOVL AX, (g_sched+gobuf_sp)(SI) 719 720 // If the m on entry was nil, we called needm above to borrow an m 721 // for the duration of the call. Since the call is over, return it with dropm. 722 CMPL DX, $0 723 JNE 3(PC) 724 MOVL $runtime·dropm(SB), AX 725 CALL AX 726 727 // Done! 728 RET 729 730 // void setg(G*); set g. for use by needm. 731 TEXT runtime·setg(SB), NOSPLIT, $0-4 732 MOVL gg+0(FP), BX 733 #ifdef GOOS_windows 734 CMPL BX, $0 735 JNE settls 736 MOVL $0, 0x14(FS) 737 RET 738 settls: 739 MOVL g_m(BX), AX 740 LEAL m_tls(AX), AX 741 MOVL AX, 0x14(FS) 742 #endif 743 get_tls(CX) 744 MOVL BX, g(CX) 745 RET 746 747 // void setg_gcc(G*); set g. for use by gcc 748 TEXT setg_gcc<>(SB), NOSPLIT, $0 749 get_tls(AX) 750 MOVL gg+0(FP), DX 751 MOVL DX, g(AX) 752 RET 753 754 // check that SP is in range [g->stack.lo, g->stack.hi) 755 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 756 get_tls(CX) 757 MOVL g(CX), AX 758 CMPL (g_stack+stack_hi)(AX), SP 759 JHI 2(PC) 760 INT $3 761 CMPL SP, (g_stack+stack_lo)(AX) 762 JHI 2(PC) 763 INT $3 764 RET 765 766 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 767 MOVL argp+0(FP),AX // addr of first arg 768 MOVL -4(AX),AX // get calling pc 769 CMPL AX, runtime·stackBarrierPC(SB) 770 JNE nobar 771 // Get original return PC. 772 CALL runtime·nextBarrierPC(SB) 773 MOVL 0(SP), AX 774 nobar: 775 MOVL AX, ret+4(FP) 776 RET 777 778 TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8 779 MOVL argp+0(FP),AX // addr of first arg 780 MOVL pc+4(FP), BX 781 MOVL -4(AX), DX 782 CMPL DX, runtime·stackBarrierPC(SB) 783 JEQ setbar 784 MOVL BX, -4(AX) // set calling pc 785 RET 786 setbar: 787 // Set the stack barrier return PC. 788 MOVL BX, 0(SP) 789 CALL runtime·setNextBarrierPC(SB) 790 RET 791 792 TEXT runtime·getcallersp(SB), NOSPLIT, $0-8 793 MOVL argp+0(FP), AX 794 MOVL AX, ret+4(FP) 795 RET 796 797 // func cputicks() int64 798 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 799 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 800 JEQ done 801 CMPB runtime·lfenceBeforeRdtsc(SB), $1 802 JNE mfence 803 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 804 JMP done 805 mfence: 806 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 807 done: 808 RDTSC 809 MOVL AX, ret_lo+0(FP) 810 MOVL DX, ret_hi+4(FP) 811 RET 812 813 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 814 // set up ldt 7 to point at m0.tls 815 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 816 // the entry number is just a hint. setldt will set up GS with what it used. 817 MOVL $7, 0(SP) 818 LEAL runtime·m0+m_tls(SB), AX 819 MOVL AX, 4(SP) 820 MOVL $32, 8(SP) // sizeof(tls array) 821 CALL runtime·setldt(SB) 822 RET 823 824 TEXT runtime·emptyfunc(SB),0,$0-0 825 RET 826 827 TEXT runtime·abort(SB),NOSPLIT,$0-0 828 INT $0x3 829 830 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 831 // redirects to memhash(p, h, size) using the size 832 // stored in the closure. 833 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 834 GO_ARGS 835 NO_LOCAL_POINTERS 836 MOVL p+0(FP), AX 837 MOVL h+4(FP), BX 838 MOVL 4(DX), CX 839 MOVL AX, 0(SP) 840 MOVL BX, 4(SP) 841 MOVL CX, 8(SP) 842 CALL runtime·memhash(SB) 843 MOVL 12(SP), AX 844 MOVL AX, ret+8(FP) 845 RET 846 847 // hash function using AES hardware instructions 848 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 849 MOVL p+0(FP), AX // ptr to data 850 MOVL s+8(FP), BX // size 851 LEAL ret+12(FP), DX 852 JMP runtime·aeshashbody(SB) 853 854 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 855 MOVL p+0(FP), AX // ptr to string object 856 MOVL 4(AX), BX // length of string 857 MOVL (AX), AX // string data 858 LEAL ret+8(FP), DX 859 JMP runtime·aeshashbody(SB) 860 861 // AX: data 862 // BX: length 863 // DX: address to put return value 864 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 865 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 866 PINSRW $4, BX, X0 // 16 bits of length 867 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 868 MOVO X0, X1 // save unscrambled seed 869 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 870 AESENC X0, X0 // scramble seed 871 872 CMPL BX, $16 873 JB aes0to15 874 JE aes16 875 CMPL BX, $32 876 JBE aes17to32 877 CMPL BX, $64 878 JBE aes33to64 879 JMP aes65plus 880 881 aes0to15: 882 TESTL BX, BX 883 JE aes0 884 885 ADDL $16, AX 886 TESTW $0xff0, AX 887 JE endofpage 888 889 // 16 bytes loaded at this address won't cross 890 // a page boundary, so we can load it directly. 891 MOVOU -16(AX), X1 892 ADDL BX, BX 893 PAND masks<>(SB)(BX*8), X1 894 895 final1: 896 AESENC X0, X1 // scramble input, xor in seed 897 AESENC X1, X1 // scramble combo 2 times 898 AESENC X1, X1 899 MOVL X1, (DX) 900 RET 901 902 endofpage: 903 // address ends in 1111xxxx. Might be up against 904 // a page boundary, so load ending at last byte. 905 // Then shift bytes down using pshufb. 906 MOVOU -32(AX)(BX*1), X1 907 ADDL BX, BX 908 PSHUFB shifts<>(SB)(BX*8), X1 909 JMP final1 910 911 aes0: 912 // Return scrambled input seed 913 AESENC X0, X0 914 MOVL X0, (DX) 915 RET 916 917 aes16: 918 MOVOU (AX), X1 919 JMP final1 920 921 aes17to32: 922 // make second starting seed 923 PXOR runtime·aeskeysched+16(SB), X1 924 AESENC X1, X1 925 926 // load data to be hashed 927 MOVOU (AX), X2 928 MOVOU -16(AX)(BX*1), X3 929 930 // scramble 3 times 931 AESENC X0, X2 932 AESENC X1, X3 933 AESENC X2, X2 934 AESENC X3, X3 935 AESENC X2, X2 936 AESENC X3, X3 937 938 // combine results 939 PXOR X3, X2 940 MOVL X2, (DX) 941 RET 942 943 aes33to64: 944 // make 3 more starting seeds 945 MOVO X1, X2 946 MOVO X1, X3 947 PXOR runtime·aeskeysched+16(SB), X1 948 PXOR runtime·aeskeysched+32(SB), X2 949 PXOR runtime·aeskeysched+48(SB), X3 950 AESENC X1, X1 951 AESENC X2, X2 952 AESENC X3, X3 953 954 MOVOU (AX), X4 955 MOVOU 16(AX), X5 956 MOVOU -32(AX)(BX*1), X6 957 MOVOU -16(AX)(BX*1), X7 958 959 AESENC X0, X4 960 AESENC X1, X5 961 AESENC X2, X6 962 AESENC X3, X7 963 964 AESENC X4, X4 965 AESENC X5, X5 966 AESENC X6, X6 967 AESENC X7, X7 968 969 AESENC X4, X4 970 AESENC X5, X5 971 AESENC X6, X6 972 AESENC X7, X7 973 974 PXOR X6, X4 975 PXOR X7, X5 976 PXOR X5, X4 977 MOVL X4, (DX) 978 RET 979 980 aes65plus: 981 // make 3 more starting seeds 982 MOVO X1, X2 983 MOVO X1, X3 984 PXOR runtime·aeskeysched+16(SB), X1 985 PXOR runtime·aeskeysched+32(SB), X2 986 PXOR runtime·aeskeysched+48(SB), X3 987 AESENC X1, X1 988 AESENC X2, X2 989 AESENC X3, X3 990 991 // start with last (possibly overlapping) block 992 MOVOU -64(AX)(BX*1), X4 993 MOVOU -48(AX)(BX*1), X5 994 MOVOU -32(AX)(BX*1), X6 995 MOVOU -16(AX)(BX*1), X7 996 997 // scramble state once 998 AESENC X0, X4 999 AESENC X1, X5 1000 AESENC X2, X6 1001 AESENC X3, X7 1002 1003 // compute number of remaining 64-byte blocks 1004 DECL BX 1005 SHRL $6, BX 1006 1007 aesloop: 1008 // scramble state, xor in a block 1009 MOVOU (AX), X0 1010 MOVOU 16(AX), X1 1011 MOVOU 32(AX), X2 1012 MOVOU 48(AX), X3 1013 AESENC X0, X4 1014 AESENC X1, X5 1015 AESENC X2, X6 1016 AESENC X3, X7 1017 1018 // scramble state 1019 AESENC X4, X4 1020 AESENC X5, X5 1021 AESENC X6, X6 1022 AESENC X7, X7 1023 1024 ADDL $64, AX 1025 DECL BX 1026 JNE aesloop 1027 1028 // 2 more scrambles to finish 1029 AESENC X4, X4 1030 AESENC X5, X5 1031 AESENC X6, X6 1032 AESENC X7, X7 1033 1034 AESENC X4, X4 1035 AESENC X5, X5 1036 AESENC X6, X6 1037 AESENC X7, X7 1038 1039 PXOR X6, X4 1040 PXOR X7, X5 1041 PXOR X5, X4 1042 MOVL X4, (DX) 1043 RET 1044 1045 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1046 MOVL p+0(FP), AX // ptr to data 1047 MOVL h+4(FP), X0 // seed 1048 PINSRD $1, (AX), X0 // data 1049 AESENC runtime·aeskeysched+0(SB), X0 1050 AESENC runtime·aeskeysched+16(SB), X0 1051 AESENC runtime·aeskeysched+32(SB), X0 1052 MOVL X0, ret+8(FP) 1053 RET 1054 1055 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1056 MOVL p+0(FP), AX // ptr to data 1057 MOVQ (AX), X0 // data 1058 PINSRD $2, h+4(FP), X0 // seed 1059 AESENC runtime·aeskeysched+0(SB), X0 1060 AESENC runtime·aeskeysched+16(SB), X0 1061 AESENC runtime·aeskeysched+32(SB), X0 1062 MOVL X0, ret+8(FP) 1063 RET 1064 1065 // simple mask to get rid of data in the high part of the register. 1066 DATA masks<>+0x00(SB)/4, $0x00000000 1067 DATA masks<>+0x04(SB)/4, $0x00000000 1068 DATA masks<>+0x08(SB)/4, $0x00000000 1069 DATA masks<>+0x0c(SB)/4, $0x00000000 1070 1071 DATA masks<>+0x10(SB)/4, $0x000000ff 1072 DATA masks<>+0x14(SB)/4, $0x00000000 1073 DATA masks<>+0x18(SB)/4, $0x00000000 1074 DATA masks<>+0x1c(SB)/4, $0x00000000 1075 1076 DATA masks<>+0x20(SB)/4, $0x0000ffff 1077 DATA masks<>+0x24(SB)/4, $0x00000000 1078 DATA masks<>+0x28(SB)/4, $0x00000000 1079 DATA masks<>+0x2c(SB)/4, $0x00000000 1080 1081 DATA masks<>+0x30(SB)/4, $0x00ffffff 1082 DATA masks<>+0x34(SB)/4, $0x00000000 1083 DATA masks<>+0x38(SB)/4, $0x00000000 1084 DATA masks<>+0x3c(SB)/4, $0x00000000 1085 1086 DATA masks<>+0x40(SB)/4, $0xffffffff 1087 DATA masks<>+0x44(SB)/4, $0x00000000 1088 DATA masks<>+0x48(SB)/4, $0x00000000 1089 DATA masks<>+0x4c(SB)/4, $0x00000000 1090 1091 DATA masks<>+0x50(SB)/4, $0xffffffff 1092 DATA masks<>+0x54(SB)/4, $0x000000ff 1093 DATA masks<>+0x58(SB)/4, $0x00000000 1094 DATA masks<>+0x5c(SB)/4, $0x00000000 1095 1096 DATA masks<>+0x60(SB)/4, $0xffffffff 1097 DATA masks<>+0x64(SB)/4, $0x0000ffff 1098 DATA masks<>+0x68(SB)/4, $0x00000000 1099 DATA masks<>+0x6c(SB)/4, $0x00000000 1100 1101 DATA masks<>+0x70(SB)/4, $0xffffffff 1102 DATA masks<>+0x74(SB)/4, $0x00ffffff 1103 DATA masks<>+0x78(SB)/4, $0x00000000 1104 DATA masks<>+0x7c(SB)/4, $0x00000000 1105 1106 DATA masks<>+0x80(SB)/4, $0xffffffff 1107 DATA masks<>+0x84(SB)/4, $0xffffffff 1108 DATA masks<>+0x88(SB)/4, $0x00000000 1109 DATA masks<>+0x8c(SB)/4, $0x00000000 1110 1111 DATA masks<>+0x90(SB)/4, $0xffffffff 1112 DATA masks<>+0x94(SB)/4, $0xffffffff 1113 DATA masks<>+0x98(SB)/4, $0x000000ff 1114 DATA masks<>+0x9c(SB)/4, $0x00000000 1115 1116 DATA masks<>+0xa0(SB)/4, $0xffffffff 1117 DATA masks<>+0xa4(SB)/4, $0xffffffff 1118 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1119 DATA masks<>+0xac(SB)/4, $0x00000000 1120 1121 DATA masks<>+0xb0(SB)/4, $0xffffffff 1122 DATA masks<>+0xb4(SB)/4, $0xffffffff 1123 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1124 DATA masks<>+0xbc(SB)/4, $0x00000000 1125 1126 DATA masks<>+0xc0(SB)/4, $0xffffffff 1127 DATA masks<>+0xc4(SB)/4, $0xffffffff 1128 DATA masks<>+0xc8(SB)/4, $0xffffffff 1129 DATA masks<>+0xcc(SB)/4, $0x00000000 1130 1131 DATA masks<>+0xd0(SB)/4, $0xffffffff 1132 DATA masks<>+0xd4(SB)/4, $0xffffffff 1133 DATA masks<>+0xd8(SB)/4, $0xffffffff 1134 DATA masks<>+0xdc(SB)/4, $0x000000ff 1135 1136 DATA masks<>+0xe0(SB)/4, $0xffffffff 1137 DATA masks<>+0xe4(SB)/4, $0xffffffff 1138 DATA masks<>+0xe8(SB)/4, $0xffffffff 1139 DATA masks<>+0xec(SB)/4, $0x0000ffff 1140 1141 DATA masks<>+0xf0(SB)/4, $0xffffffff 1142 DATA masks<>+0xf4(SB)/4, $0xffffffff 1143 DATA masks<>+0xf8(SB)/4, $0xffffffff 1144 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1145 1146 GLOBL masks<>(SB),RODATA,$256 1147 1148 // these are arguments to pshufb. They move data down from 1149 // the high bytes of the register to the low bytes of the register. 1150 // index is how many bytes to move. 1151 DATA shifts<>+0x00(SB)/4, $0x00000000 1152 DATA shifts<>+0x04(SB)/4, $0x00000000 1153 DATA shifts<>+0x08(SB)/4, $0x00000000 1154 DATA shifts<>+0x0c(SB)/4, $0x00000000 1155 1156 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1157 DATA shifts<>+0x14(SB)/4, $0xffffffff 1158 DATA shifts<>+0x18(SB)/4, $0xffffffff 1159 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1160 1161 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1162 DATA shifts<>+0x24(SB)/4, $0xffffffff 1163 DATA shifts<>+0x28(SB)/4, $0xffffffff 1164 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1165 1166 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1167 DATA shifts<>+0x34(SB)/4, $0xffffffff 1168 DATA shifts<>+0x38(SB)/4, $0xffffffff 1169 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1170 1171 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1172 DATA shifts<>+0x44(SB)/4, $0xffffffff 1173 DATA shifts<>+0x48(SB)/4, $0xffffffff 1174 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1175 1176 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1177 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1178 DATA shifts<>+0x58(SB)/4, $0xffffffff 1179 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1180 1181 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1182 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1183 DATA shifts<>+0x68(SB)/4, $0xffffffff 1184 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1185 1186 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1187 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1188 DATA shifts<>+0x78(SB)/4, $0xffffffff 1189 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1190 1191 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1192 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1193 DATA shifts<>+0x88(SB)/4, $0xffffffff 1194 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1195 1196 DATA shifts<>+0x90(SB)/4, $0x0a090807 1197 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1198 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1199 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1200 1201 DATA shifts<>+0xa0(SB)/4, $0x09080706 1202 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1203 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1204 DATA shifts<>+0xac(SB)/4, $0xffffffff 1205 1206 DATA shifts<>+0xb0(SB)/4, $0x08070605 1207 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1208 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1209 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1210 1211 DATA shifts<>+0xc0(SB)/4, $0x07060504 1212 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1213 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1214 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1215 1216 DATA shifts<>+0xd0(SB)/4, $0x06050403 1217 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1218 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1219 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1220 1221 DATA shifts<>+0xe0(SB)/4, $0x05040302 1222 DATA shifts<>+0xe4(SB)/4, $0x09080706 1223 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1224 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1225 1226 DATA shifts<>+0xf0(SB)/4, $0x04030201 1227 DATA shifts<>+0xf4(SB)/4, $0x08070605 1228 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1229 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1230 1231 GLOBL shifts<>(SB),RODATA,$256 1232 1233 TEXT ·checkASM(SB),NOSPLIT,$0-1 1234 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1235 MOVL $masks<>(SB), AX 1236 MOVL $shifts<>(SB), BX 1237 ORL BX, AX 1238 TESTL $15, AX 1239 SETEQ ret+0(FP) 1240 RET 1241 1242 TEXT runtime·memeq(SB),NOSPLIT,$0-13 1243 MOVL a+0(FP), SI 1244 MOVL b+4(FP), DI 1245 MOVL size+8(FP), BX 1246 LEAL ret+12(FP), AX 1247 JMP runtime·memeqbody(SB) 1248 1249 // memequal_varlen(a, b unsafe.Pointer) bool 1250 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1251 MOVL a+0(FP), SI 1252 MOVL b+4(FP), DI 1253 CMPL SI, DI 1254 JEQ eq 1255 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1256 LEAL ret+8(FP), AX 1257 JMP runtime·memeqbody(SB) 1258 eq: 1259 MOVB $1, ret+8(FP) 1260 RET 1261 1262 // eqstring tests whether two strings are equal. 1263 // The compiler guarantees that strings passed 1264 // to eqstring have equal length. 1265 // See runtime_test.go:eqstring_generic for 1266 // equivalent Go code. 1267 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1268 MOVL s1str+0(FP), SI 1269 MOVL s2str+8(FP), DI 1270 CMPL SI, DI 1271 JEQ same 1272 MOVL s1len+4(FP), BX 1273 LEAL v+16(FP), AX 1274 JMP runtime·memeqbody(SB) 1275 same: 1276 MOVB $1, v+16(FP) 1277 RET 1278 1279 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1280 MOVL a_len+4(FP), BX 1281 MOVL b_len+16(FP), CX 1282 CMPL BX, CX 1283 JNE eqret 1284 MOVL a+0(FP), SI 1285 MOVL b+12(FP), DI 1286 LEAL ret+24(FP), AX 1287 JMP runtime·memeqbody(SB) 1288 eqret: 1289 MOVB $0, ret+24(FP) 1290 RET 1291 1292 // a in SI 1293 // b in DI 1294 // count in BX 1295 // address of result byte in AX 1296 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1297 CMPL BX, $4 1298 JB small 1299 1300 // 64 bytes at a time using xmm registers 1301 hugeloop: 1302 CMPL BX, $64 1303 JB bigloop 1304 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1305 JE bigloop 1306 MOVOU (SI), X0 1307 MOVOU (DI), X1 1308 MOVOU 16(SI), X2 1309 MOVOU 16(DI), X3 1310 MOVOU 32(SI), X4 1311 MOVOU 32(DI), X5 1312 MOVOU 48(SI), X6 1313 MOVOU 48(DI), X7 1314 PCMPEQB X1, X0 1315 PCMPEQB X3, X2 1316 PCMPEQB X5, X4 1317 PCMPEQB X7, X6 1318 PAND X2, X0 1319 PAND X6, X4 1320 PAND X4, X0 1321 PMOVMSKB X0, DX 1322 ADDL $64, SI 1323 ADDL $64, DI 1324 SUBL $64, BX 1325 CMPL DX, $0xffff 1326 JEQ hugeloop 1327 MOVB $0, (AX) 1328 RET 1329 1330 // 4 bytes at a time using 32-bit register 1331 bigloop: 1332 CMPL BX, $4 1333 JBE leftover 1334 MOVL (SI), CX 1335 MOVL (DI), DX 1336 ADDL $4, SI 1337 ADDL $4, DI 1338 SUBL $4, BX 1339 CMPL CX, DX 1340 JEQ bigloop 1341 MOVB $0, (AX) 1342 RET 1343 1344 // remaining 0-4 bytes 1345 leftover: 1346 MOVL -4(SI)(BX*1), CX 1347 MOVL -4(DI)(BX*1), DX 1348 CMPL CX, DX 1349 SETEQ (AX) 1350 RET 1351 1352 small: 1353 CMPL BX, $0 1354 JEQ equal 1355 1356 LEAL 0(BX*8), CX 1357 NEGL CX 1358 1359 MOVL SI, DX 1360 CMPB DX, $0xfc 1361 JA si_high 1362 1363 // load at SI won't cross a page boundary. 1364 MOVL (SI), SI 1365 JMP si_finish 1366 si_high: 1367 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1368 MOVL -4(SI)(BX*1), SI 1369 SHRL CX, SI 1370 si_finish: 1371 1372 // same for DI. 1373 MOVL DI, DX 1374 CMPB DX, $0xfc 1375 JA di_high 1376 MOVL (DI), DI 1377 JMP di_finish 1378 di_high: 1379 MOVL -4(DI)(BX*1), DI 1380 SHRL CX, DI 1381 di_finish: 1382 1383 SUBL SI, DI 1384 SHLL CX, DI 1385 equal: 1386 SETEQ (AX) 1387 RET 1388 1389 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1390 MOVL s1_base+0(FP), SI 1391 MOVL s1_len+4(FP), BX 1392 MOVL s2_base+8(FP), DI 1393 MOVL s2_len+12(FP), DX 1394 LEAL ret+16(FP), AX 1395 JMP runtime·cmpbody(SB) 1396 1397 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1398 MOVL s1+0(FP), SI 1399 MOVL s1+4(FP), BX 1400 MOVL s2+12(FP), DI 1401 MOVL s2+16(FP), DX 1402 LEAL ret+24(FP), AX 1403 JMP runtime·cmpbody(SB) 1404 1405 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1406 MOVL s+0(FP), SI 1407 MOVL s_len+4(FP), CX 1408 MOVB c+12(FP), AL 1409 MOVL SI, DI 1410 CLD; REPN; SCASB 1411 JZ 3(PC) 1412 MOVL $-1, ret+16(FP) 1413 RET 1414 SUBL SI, DI 1415 SUBL $1, DI 1416 MOVL DI, ret+16(FP) 1417 RET 1418 1419 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1420 MOVL s+0(FP), SI 1421 MOVL s_len+4(FP), CX 1422 MOVB c+8(FP), AL 1423 MOVL SI, DI 1424 CLD; REPN; SCASB 1425 JZ 3(PC) 1426 MOVL $-1, ret+12(FP) 1427 RET 1428 SUBL SI, DI 1429 SUBL $1, DI 1430 MOVL DI, ret+12(FP) 1431 RET 1432 1433 // input: 1434 // SI = a 1435 // DI = b 1436 // BX = alen 1437 // DX = blen 1438 // AX = address of return word (set to 1/0/-1) 1439 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1440 MOVL DX, BP 1441 SUBL BX, DX // DX = blen-alen 1442 CMOVLGT BX, BP // BP = min(alen, blen) 1443 CMPL SI, DI 1444 JEQ allsame 1445 CMPL BP, $4 1446 JB small 1447 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1448 JE mediumloop 1449 largeloop: 1450 CMPL BP, $16 1451 JB mediumloop 1452 MOVOU (SI), X0 1453 MOVOU (DI), X1 1454 PCMPEQB X0, X1 1455 PMOVMSKB X1, BX 1456 XORL $0xffff, BX // convert EQ to NE 1457 JNE diff16 // branch if at least one byte is not equal 1458 ADDL $16, SI 1459 ADDL $16, DI 1460 SUBL $16, BP 1461 JMP largeloop 1462 1463 diff16: 1464 BSFL BX, BX // index of first byte that differs 1465 XORL DX, DX 1466 MOVB (SI)(BX*1), CX 1467 CMPB CX, (DI)(BX*1) 1468 SETHI DX 1469 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1470 MOVL DX, (AX) 1471 RET 1472 1473 mediumloop: 1474 CMPL BP, $4 1475 JBE _0through4 1476 MOVL (SI), BX 1477 MOVL (DI), CX 1478 CMPL BX, CX 1479 JNE diff4 1480 ADDL $4, SI 1481 ADDL $4, DI 1482 SUBL $4, BP 1483 JMP mediumloop 1484 1485 _0through4: 1486 MOVL -4(SI)(BP*1), BX 1487 MOVL -4(DI)(BP*1), CX 1488 CMPL BX, CX 1489 JEQ allsame 1490 1491 diff4: 1492 BSWAPL BX // reverse order of bytes 1493 BSWAPL CX 1494 XORL BX, CX // find bit differences 1495 BSRL CX, CX // index of highest bit difference 1496 SHRL CX, BX // move a's bit to bottom 1497 ANDL $1, BX // mask bit 1498 LEAL -1(BX*2), BX // 1/0 => +1/-1 1499 MOVL BX, (AX) 1500 RET 1501 1502 // 0-3 bytes in common 1503 small: 1504 LEAL (BP*8), CX 1505 NEGL CX 1506 JEQ allsame 1507 1508 // load si 1509 CMPB SI, $0xfc 1510 JA si_high 1511 MOVL (SI), SI 1512 JMP si_finish 1513 si_high: 1514 MOVL -4(SI)(BP*1), SI 1515 SHRL CX, SI 1516 si_finish: 1517 SHLL CX, SI 1518 1519 // same for di 1520 CMPB DI, $0xfc 1521 JA di_high 1522 MOVL (DI), DI 1523 JMP di_finish 1524 di_high: 1525 MOVL -4(DI)(BP*1), DI 1526 SHRL CX, DI 1527 di_finish: 1528 SHLL CX, DI 1529 1530 BSWAPL SI // reverse order of bytes 1531 BSWAPL DI 1532 XORL SI, DI // find bit differences 1533 JEQ allsame 1534 BSRL DI, CX // index of highest bit difference 1535 SHRL CX, SI // move a's bit to bottom 1536 ANDL $1, SI // mask bit 1537 LEAL -1(SI*2), BX // 1/0 => +1/-1 1538 MOVL BX, (AX) 1539 RET 1540 1541 // all the bytes in common are the same, so we just need 1542 // to compare the lengths. 1543 allsame: 1544 XORL BX, BX 1545 XORL CX, CX 1546 TESTL DX, DX 1547 SETLT BX // 1 if alen > blen 1548 SETEQ CX // 1 if alen == blen 1549 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1550 MOVL BX, (AX) 1551 RET 1552 1553 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1554 get_tls(CX) 1555 MOVL g(CX), AX 1556 MOVL g_m(AX), AX 1557 MOVL m_fastrand(AX), DX 1558 ADDL DX, DX 1559 MOVL DX, BX 1560 XORL $0x88888eef, DX 1561 CMOVLMI BX, DX 1562 MOVL DX, m_fastrand(AX) 1563 MOVL DX, ret+0(FP) 1564 RET 1565 1566 TEXT runtime·return0(SB), NOSPLIT, $0 1567 MOVL $0, AX 1568 RET 1569 1570 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1571 // Must obey the gcc calling convention. 1572 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1573 get_tls(CX) 1574 MOVL g(CX), AX 1575 MOVL g_m(AX), AX 1576 MOVL m_curg(AX), AX 1577 MOVL (g_stack+stack_hi)(AX), AX 1578 RET 1579 1580 // The top-most function running on a goroutine 1581 // returns to goexit+PCQuantum. 1582 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1583 BYTE $0x90 // NOP 1584 CALL runtime·goexit1(SB) // does not return 1585 // traceback from goexit1 must hit code range of goexit 1586 BYTE $0x90 // NOP 1587 1588 // Prefetching doesn't seem to help. 1589 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1590 RET 1591 1592 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1593 RET 1594 1595 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1596 RET 1597 1598 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1599 RET 1600 1601 // Add a module's moduledata to the linked list of moduledata objects. This 1602 // is called from .init_array by a function generated in the linker and so 1603 // follows the platform ABI wrt register preservation -- it only touches AX, 1604 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1605 // instead the pointer to the moduledata is passed in AX. 1606 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1607 MOVL runtime·lastmoduledatap(SB), DX 1608 MOVL AX, moduledata_next(DX) 1609 MOVL AX, runtime·lastmoduledatap(SB) 1610 RET