golang.org/toolchain@v0.0.1-go1.9rc2.windows-amd64/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL 30 JMP has_cpuid 31 #else 32 // first see if CPUID instruction is supported. 33 PUSHFL 34 PUSHFL 35 XORL $(1<<21), 0(SP) // flip ID bit 36 POPFL 37 PUSHFL 38 POPL AX 39 XORL 0(SP), AX 40 POPFL // restore EFLAGS 41 TESTL $(1<<21), AX 42 JNE has_cpuid 43 #endif 44 45 bad_proc: // show that the program requires MMX. 46 MOVL $2, 0(SP) 47 MOVL $bad_proc_msg<>(SB), 4(SP) 48 MOVL $0x3d, 8(SP) 49 CALL runtime·write(SB) 50 MOVL $1, 0(SP) 51 CALL runtime·exit(SB) 52 INT $3 53 54 has_cpuid: 55 MOVL $0, AX 56 CPUID 57 MOVL AX, SI 58 CMPL AX, $0 59 JE nocpuinfo 60 61 // Figure out how to serialize RDTSC. 62 // On Intel processors LFENCE is enough. AMD requires MFENCE. 63 // Don't know about the rest, so let's do MFENCE. 64 CMPL BX, $0x756E6547 // "Genu" 65 JNE notintel 66 CMPL DX, $0x49656E69 // "ineI" 67 JNE notintel 68 CMPL CX, $0x6C65746E // "ntel" 69 JNE notintel 70 MOVB $1, runtime·isIntel(SB) 71 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 72 notintel: 73 74 // Load EAX=1 cpuid flags 75 MOVL $1, AX 76 CPUID 77 MOVL CX, DI // Move to global variable clobbers CX when generating PIC 78 MOVL AX, runtime·processorVersionInfo(SB) 79 80 // Check for MMX support 81 TESTL $(1<<23), DX // MMX 82 JZ bad_proc 83 84 TESTL $(1<<26), DX // SSE2 85 SETNE runtime·support_sse2(SB) 86 87 TESTL $(1<<9), DI // SSSE3 88 SETNE runtime·support_ssse3(SB) 89 90 TESTL $(1<<19), DI // SSE4.1 91 SETNE runtime·support_sse41(SB) 92 93 TESTL $(1<<20), DI // SSE4.2 94 SETNE runtime·support_sse42(SB) 95 96 TESTL $(1<<23), DI // POPCNT 97 SETNE runtime·support_popcnt(SB) 98 99 TESTL $(1<<25), DI // AES 100 SETNE runtime·support_aes(SB) 101 102 TESTL $(1<<27), DI // OSXSAVE 103 SETNE runtime·support_osxsave(SB) 104 105 // If OS support for XMM and YMM is not present 106 // support_avx will be set back to false later. 107 TESTL $(1<<28), DI // AVX 108 SETNE runtime·support_avx(SB) 109 110 eax7: 111 // Load EAX=7/ECX=0 cpuid flags 112 CMPL SI, $7 113 JLT osavx 114 MOVL $7, AX 115 MOVL $0, CX 116 CPUID 117 118 TESTL $(1<<3), BX // BMI1 119 SETNE runtime·support_bmi1(SB) 120 121 // If OS support for XMM and YMM is not present 122 // support_avx2 will be set back to false later. 123 TESTL $(1<<5), BX 124 SETNE runtime·support_avx2(SB) 125 126 TESTL $(1<<8), BX // BMI2 127 SETNE runtime·support_bmi2(SB) 128 129 TESTL $(1<<9), BX // ERMS 130 SETNE runtime·support_erms(SB) 131 132 osavx: 133 // nacl does not support XGETBV to test 134 // for XMM and YMM OS support. 135 #ifndef GOOS_nacl 136 CMPB runtime·support_osxsave(SB), $1 137 JNE noavx 138 MOVL $0, CX 139 // For XGETBV, OSXSAVE bit is required and sufficient 140 XGETBV 141 ANDL $6, AX 142 CMPL AX, $6 // Check for OS support of XMM and YMM registers. 143 JE nocpuinfo 144 #endif 145 noavx: 146 MOVB $0, runtime·support_avx(SB) 147 MOVB $0, runtime·support_avx2(SB) 148 149 nocpuinfo: 150 // if there is an _cgo_init, call it to let it 151 // initialize and to set up GS. if not, 152 // we set up GS ourselves. 153 MOVL _cgo_init(SB), AX 154 TESTL AX, AX 155 JZ needtls 156 MOVL $setg_gcc<>(SB), BX 157 MOVL BX, 4(SP) 158 MOVL BP, 0(SP) 159 CALL AX 160 161 // update stackguard after _cgo_init 162 MOVL $runtime·g0(SB), CX 163 MOVL (g_stack+stack_lo)(CX), AX 164 ADDL $const__StackGuard, AX 165 MOVL AX, g_stackguard0(CX) 166 MOVL AX, g_stackguard1(CX) 167 168 #ifndef GOOS_windows 169 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 170 JMP ok 171 #endif 172 needtls: 173 #ifdef GOOS_plan9 174 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 175 JMP ok 176 #endif 177 178 // set up %gs 179 CALL runtime·ldt0setup(SB) 180 181 // store through it, to make sure it works 182 get_tls(BX) 183 MOVL $0x123, g(BX) 184 MOVL runtime·m0+m_tls(SB), AX 185 CMPL AX, $0x123 186 JEQ ok 187 MOVL AX, 0 // abort 188 ok: 189 // set up m and g "registers" 190 get_tls(BX) 191 LEAL runtime·g0(SB), DX 192 MOVL DX, g(BX) 193 LEAL runtime·m0(SB), AX 194 195 // save m->g0 = g0 196 MOVL DX, m_g0(AX) 197 // save g0->m = m0 198 MOVL AX, g_m(DX) 199 200 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 201 202 // convention is D is always cleared 203 CLD 204 205 CALL runtime·check(SB) 206 207 // saved argc, argv 208 MOVL 120(SP), AX 209 MOVL AX, 0(SP) 210 MOVL 124(SP), AX 211 MOVL AX, 4(SP) 212 CALL runtime·args(SB) 213 CALL runtime·osinit(SB) 214 CALL runtime·schedinit(SB) 215 216 // create a new goroutine to start program 217 PUSHL $runtime·mainPC(SB) // entry 218 PUSHL $0 // arg size 219 CALL runtime·newproc(SB) 220 POPL AX 221 POPL AX 222 223 // start this M 224 CALL runtime·mstart(SB) 225 226 INT $3 227 RET 228 229 DATA bad_proc_msg<>+0x00(SB)/8, $"This pro" 230 DATA bad_proc_msg<>+0x08(SB)/8, $"gram can" 231 DATA bad_proc_msg<>+0x10(SB)/8, $" only be" 232 DATA bad_proc_msg<>+0x18(SB)/8, $" run on " 233 DATA bad_proc_msg<>+0x20(SB)/8, $"processo" 234 DATA bad_proc_msg<>+0x28(SB)/8, $"rs with " 235 DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp" 236 DATA bad_proc_msg<>+0x38(SB)/4, $"ort." 237 DATA bad_proc_msg<>+0x3c(SB)/1, $0xa 238 GLOBL bad_proc_msg<>(SB), RODATA, $0x3d 239 240 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 241 GLOBL runtime·mainPC(SB),RODATA,$4 242 243 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 244 INT $3 245 RET 246 247 TEXT runtime·asminit(SB),NOSPLIT,$0-0 248 // Linux and MinGW start the FPU in extended double precision. 249 // Other operating systems use double precision. 250 // Change to double precision to match them, 251 // and to match other hardware that only has double. 252 FLDCW runtime·controlWord64(SB) 253 RET 254 255 /* 256 * go-routine 257 */ 258 259 // void gosave(Gobuf*) 260 // save state in Gobuf; setjmp 261 TEXT runtime·gosave(SB), NOSPLIT, $0-4 262 MOVL buf+0(FP), AX // gobuf 263 LEAL buf+0(FP), BX // caller's SP 264 MOVL BX, gobuf_sp(AX) 265 MOVL 0(SP), BX // caller's PC 266 MOVL BX, gobuf_pc(AX) 267 MOVL $0, gobuf_ret(AX) 268 // Assert ctxt is zero. See func save. 269 MOVL gobuf_ctxt(AX), BX 270 TESTL BX, BX 271 JZ 2(PC) 272 CALL runtime·badctxt(SB) 273 get_tls(CX) 274 MOVL g(CX), BX 275 MOVL BX, gobuf_g(AX) 276 RET 277 278 // void gogo(Gobuf*) 279 // restore state from Gobuf; longjmp 280 TEXT runtime·gogo(SB), NOSPLIT, $8-4 281 MOVL buf+0(FP), BX // gobuf 282 283 // If ctxt is not nil, invoke deletion barrier before overwriting. 284 MOVL gobuf_ctxt(BX), DX 285 TESTL DX, DX 286 JZ nilctxt 287 LEAL gobuf_ctxt(BX), AX 288 MOVL AX, 0(SP) 289 MOVL $0, 4(SP) 290 CALL runtime·writebarrierptr_prewrite(SB) 291 MOVL buf+0(FP), BX 292 293 nilctxt: 294 MOVL gobuf_g(BX), DX 295 MOVL 0(DX), CX // make sure g != nil 296 get_tls(CX) 297 MOVL DX, g(CX) 298 MOVL gobuf_sp(BX), SP // restore SP 299 MOVL gobuf_ret(BX), AX 300 MOVL gobuf_ctxt(BX), DX 301 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 302 MOVL $0, gobuf_ret(BX) 303 MOVL $0, gobuf_ctxt(BX) 304 MOVL gobuf_pc(BX), BX 305 JMP BX 306 307 // func mcall(fn func(*g)) 308 // Switch to m->g0's stack, call fn(g). 309 // Fn must never return. It should gogo(&g->sched) 310 // to keep running g. 311 TEXT runtime·mcall(SB), NOSPLIT, $0-4 312 MOVL fn+0(FP), DI 313 314 get_tls(DX) 315 MOVL g(DX), AX // save state in g->sched 316 MOVL 0(SP), BX // caller's PC 317 MOVL BX, (g_sched+gobuf_pc)(AX) 318 LEAL fn+0(FP), BX // caller's SP 319 MOVL BX, (g_sched+gobuf_sp)(AX) 320 MOVL AX, (g_sched+gobuf_g)(AX) 321 322 // switch to m->g0 & its stack, call fn 323 MOVL g(DX), BX 324 MOVL g_m(BX), BX 325 MOVL m_g0(BX), SI 326 CMPL SI, AX // if g == m->g0 call badmcall 327 JNE 3(PC) 328 MOVL $runtime·badmcall(SB), AX 329 JMP AX 330 MOVL SI, g(DX) // g = m->g0 331 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 332 PUSHL AX 333 MOVL DI, DX 334 MOVL 0(DI), DI 335 CALL DI 336 POPL AX 337 MOVL $runtime·badmcall2(SB), AX 338 JMP AX 339 RET 340 341 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 342 // of the G stack. We need to distinguish the routine that 343 // lives at the bottom of the G stack from the one that lives 344 // at the top of the system stack because the one at the top of 345 // the system stack terminates the stack walk (see topofstack()). 346 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 347 RET 348 349 // func systemstack(fn func()) 350 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 351 MOVL fn+0(FP), DI // DI = fn 352 get_tls(CX) 353 MOVL g(CX), AX // AX = g 354 MOVL g_m(AX), BX // BX = m 355 356 MOVL m_gsignal(BX), DX // DX = gsignal 357 CMPL AX, DX 358 JEQ noswitch 359 360 MOVL m_g0(BX), DX // DX = g0 361 CMPL AX, DX 362 JEQ noswitch 363 364 MOVL m_curg(BX), BP 365 CMPL AX, BP 366 JEQ switch 367 368 // Bad: g is not gsignal, not g0, not curg. What is it? 369 // Hide call from linker nosplit analysis. 370 MOVL $runtime·badsystemstack(SB), AX 371 CALL AX 372 373 switch: 374 // save our state in g->sched. Pretend to 375 // be systemstack_switch if the G stack is scanned. 376 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 377 MOVL SP, (g_sched+gobuf_sp)(AX) 378 MOVL AX, (g_sched+gobuf_g)(AX) 379 380 // switch to g0 381 get_tls(CX) 382 MOVL DX, g(CX) 383 MOVL (g_sched+gobuf_sp)(DX), BX 384 // make it look like mstart called systemstack on g0, to stop traceback 385 SUBL $4, BX 386 MOVL $runtime·mstart(SB), DX 387 MOVL DX, 0(BX) 388 MOVL BX, SP 389 390 // call target function 391 MOVL DI, DX 392 MOVL 0(DI), DI 393 CALL DI 394 395 // switch back to g 396 get_tls(CX) 397 MOVL g(CX), AX 398 MOVL g_m(AX), BX 399 MOVL m_curg(BX), AX 400 MOVL AX, g(CX) 401 MOVL (g_sched+gobuf_sp)(AX), SP 402 MOVL $0, (g_sched+gobuf_sp)(AX) 403 RET 404 405 noswitch: 406 // already on system stack, just call directly 407 MOVL DI, DX 408 MOVL 0(DI), DI 409 CALL DI 410 RET 411 412 /* 413 * support for morestack 414 */ 415 416 // Called during function prolog when more stack is needed. 417 // 418 // The traceback routines see morestack on a g0 as being 419 // the top of a stack (for example, morestack calling newstack 420 // calling the scheduler calling newm calling gc), so we must 421 // record an argument size. For that purpose, it has no arguments. 422 TEXT runtime·morestack(SB),NOSPLIT,$0-0 423 // Cannot grow scheduler stack (m->g0). 424 get_tls(CX) 425 MOVL g(CX), BX 426 MOVL g_m(BX), BX 427 MOVL m_g0(BX), SI 428 CMPL g(CX), SI 429 JNE 3(PC) 430 CALL runtime·badmorestackg0(SB) 431 INT $3 432 433 // Cannot grow signal stack. 434 MOVL m_gsignal(BX), SI 435 CMPL g(CX), SI 436 JNE 3(PC) 437 CALL runtime·badmorestackgsignal(SB) 438 INT $3 439 440 // Called from f. 441 // Set m->morebuf to f's caller. 442 MOVL 4(SP), DI // f's caller's PC 443 MOVL DI, (m_morebuf+gobuf_pc)(BX) 444 LEAL 8(SP), CX // f's caller's SP 445 MOVL CX, (m_morebuf+gobuf_sp)(BX) 446 get_tls(CX) 447 MOVL g(CX), SI 448 MOVL SI, (m_morebuf+gobuf_g)(BX) 449 450 // Set g->sched to context in f. 451 MOVL 0(SP), AX // f's PC 452 MOVL AX, (g_sched+gobuf_pc)(SI) 453 MOVL SI, (g_sched+gobuf_g)(SI) 454 LEAL 4(SP), AX // f's SP 455 MOVL AX, (g_sched+gobuf_sp)(SI) 456 // newstack will fill gobuf.ctxt. 457 458 // Call newstack on m->g0's stack. 459 MOVL m_g0(BX), BP 460 MOVL BP, g(CX) 461 MOVL (g_sched+gobuf_sp)(BP), AX 462 MOVL -4(AX), BX // fault if CALL would, before smashing SP 463 MOVL AX, SP 464 PUSHL DX // ctxt argument 465 CALL runtime·newstack(SB) 466 MOVL $0, 0x1003 // crash if newstack returns 467 POPL DX // keep balance check happy 468 RET 469 470 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 471 MOVL $0, DX 472 JMP runtime·morestack(SB) 473 474 // reflectcall: call a function with the given argument list 475 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 476 // we don't have variable-sized frames, so we use a small number 477 // of constant-sized-frame functions to encode a few bits of size in the pc. 478 // Caution: ugly multiline assembly macros in your future! 479 480 #define DISPATCH(NAME,MAXSIZE) \ 481 CMPL CX, $MAXSIZE; \ 482 JA 3(PC); \ 483 MOVL $NAME(SB), AX; \ 484 JMP AX 485 // Note: can't just "JMP NAME(SB)" - bad inlining results. 486 487 TEXT reflect·call(SB), NOSPLIT, $0-0 488 JMP ·reflectcall(SB) 489 490 TEXT ·reflectcall(SB), NOSPLIT, $0-20 491 MOVL argsize+12(FP), CX 492 DISPATCH(runtime·call16, 16) 493 DISPATCH(runtime·call32, 32) 494 DISPATCH(runtime·call64, 64) 495 DISPATCH(runtime·call128, 128) 496 DISPATCH(runtime·call256, 256) 497 DISPATCH(runtime·call512, 512) 498 DISPATCH(runtime·call1024, 1024) 499 DISPATCH(runtime·call2048, 2048) 500 DISPATCH(runtime·call4096, 4096) 501 DISPATCH(runtime·call8192, 8192) 502 DISPATCH(runtime·call16384, 16384) 503 DISPATCH(runtime·call32768, 32768) 504 DISPATCH(runtime·call65536, 65536) 505 DISPATCH(runtime·call131072, 131072) 506 DISPATCH(runtime·call262144, 262144) 507 DISPATCH(runtime·call524288, 524288) 508 DISPATCH(runtime·call1048576, 1048576) 509 DISPATCH(runtime·call2097152, 2097152) 510 DISPATCH(runtime·call4194304, 4194304) 511 DISPATCH(runtime·call8388608, 8388608) 512 DISPATCH(runtime·call16777216, 16777216) 513 DISPATCH(runtime·call33554432, 33554432) 514 DISPATCH(runtime·call67108864, 67108864) 515 DISPATCH(runtime·call134217728, 134217728) 516 DISPATCH(runtime·call268435456, 268435456) 517 DISPATCH(runtime·call536870912, 536870912) 518 DISPATCH(runtime·call1073741824, 1073741824) 519 MOVL $runtime·badreflectcall(SB), AX 520 JMP AX 521 522 #define CALLFN(NAME,MAXSIZE) \ 523 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 524 NO_LOCAL_POINTERS; \ 525 /* copy arguments to stack */ \ 526 MOVL argptr+8(FP), SI; \ 527 MOVL argsize+12(FP), CX; \ 528 MOVL SP, DI; \ 529 REP;MOVSB; \ 530 /* call function */ \ 531 MOVL f+4(FP), DX; \ 532 MOVL (DX), AX; \ 533 PCDATA $PCDATA_StackMapIndex, $0; \ 534 CALL AX; \ 535 /* copy return values back */ \ 536 MOVL argtype+0(FP), DX; \ 537 MOVL argptr+8(FP), DI; \ 538 MOVL argsize+12(FP), CX; \ 539 MOVL retoffset+16(FP), BX; \ 540 MOVL SP, SI; \ 541 ADDL BX, DI; \ 542 ADDL BX, SI; \ 543 SUBL BX, CX; \ 544 CALL callRet<>(SB); \ 545 RET 546 547 // callRet copies return values back at the end of call*. This is a 548 // separate function so it can allocate stack space for the arguments 549 // to reflectcallmove. It does not follow the Go ABI; it expects its 550 // arguments in registers. 551 TEXT callRet<>(SB), NOSPLIT, $16-0 552 MOVL DX, 0(SP) 553 MOVL DI, 4(SP) 554 MOVL SI, 8(SP) 555 MOVL CX, 12(SP) 556 CALL runtime·reflectcallmove(SB) 557 RET 558 559 CALLFN(·call16, 16) 560 CALLFN(·call32, 32) 561 CALLFN(·call64, 64) 562 CALLFN(·call128, 128) 563 CALLFN(·call256, 256) 564 CALLFN(·call512, 512) 565 CALLFN(·call1024, 1024) 566 CALLFN(·call2048, 2048) 567 CALLFN(·call4096, 4096) 568 CALLFN(·call8192, 8192) 569 CALLFN(·call16384, 16384) 570 CALLFN(·call32768, 32768) 571 CALLFN(·call65536, 65536) 572 CALLFN(·call131072, 131072) 573 CALLFN(·call262144, 262144) 574 CALLFN(·call524288, 524288) 575 CALLFN(·call1048576, 1048576) 576 CALLFN(·call2097152, 2097152) 577 CALLFN(·call4194304, 4194304) 578 CALLFN(·call8388608, 8388608) 579 CALLFN(·call16777216, 16777216) 580 CALLFN(·call33554432, 33554432) 581 CALLFN(·call67108864, 67108864) 582 CALLFN(·call134217728, 134217728) 583 CALLFN(·call268435456, 268435456) 584 CALLFN(·call536870912, 536870912) 585 CALLFN(·call1073741824, 1073741824) 586 587 TEXT runtime·procyield(SB),NOSPLIT,$0-0 588 MOVL cycles+0(FP), AX 589 again: 590 PAUSE 591 SUBL $1, AX 592 JNZ again 593 RET 594 595 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 596 // Stores are already ordered on x86, so this is just a 597 // compile barrier. 598 RET 599 600 // void jmpdefer(fn, sp); 601 // called from deferreturn. 602 // 1. pop the caller 603 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers 604 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes 605 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the 606 // LEAL to load the offset into BX, and finally 5 for the call & displacement) 607 // 3. jmp to the argument 608 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 609 MOVL fv+0(FP), DX // fn 610 MOVL argp+4(FP), BX // caller sp 611 LEAL -4(BX), SP // caller sp after CALL 612 #ifdef GOBUILDMODE_shared 613 SUBL $16, (SP) // return to CALL again 614 #else 615 SUBL $5, (SP) // return to CALL again 616 #endif 617 MOVL 0(DX), BX 618 JMP BX // but first run the deferred function 619 620 // Save state of caller into g->sched. 621 TEXT gosave<>(SB),NOSPLIT,$0 622 PUSHL AX 623 PUSHL BX 624 get_tls(BX) 625 MOVL g(BX), BX 626 LEAL arg+0(FP), AX 627 MOVL AX, (g_sched+gobuf_sp)(BX) 628 MOVL -4(AX), AX 629 MOVL AX, (g_sched+gobuf_pc)(BX) 630 MOVL $0, (g_sched+gobuf_ret)(BX) 631 // Assert ctxt is zero. See func save. 632 MOVL (g_sched+gobuf_ctxt)(BX), AX 633 TESTL AX, AX 634 JZ 2(PC) 635 CALL runtime·badctxt(SB) 636 POPL BX 637 POPL AX 638 RET 639 640 // func asmcgocall(fn, arg unsafe.Pointer) int32 641 // Call fn(arg) on the scheduler stack, 642 // aligned appropriately for the gcc ABI. 643 // See cgocall.go for more details. 644 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 645 MOVL fn+0(FP), AX 646 MOVL arg+4(FP), BX 647 648 MOVL SP, DX 649 650 // Figure out if we need to switch to m->g0 stack. 651 // We get called to create new OS threads too, and those 652 // come in on the m->g0 stack already. 653 get_tls(CX) 654 MOVL g(CX), BP 655 MOVL g_m(BP), BP 656 MOVL m_g0(BP), SI 657 MOVL g(CX), DI 658 CMPL SI, DI 659 JEQ noswitch 660 CALL gosave<>(SB) 661 get_tls(CX) 662 MOVL SI, g(CX) 663 MOVL (g_sched+gobuf_sp)(SI), SP 664 665 noswitch: 666 // Now on a scheduling stack (a pthread-created stack). 667 SUBL $32, SP 668 ANDL $~15, SP // alignment, perhaps unnecessary 669 MOVL DI, 8(SP) // save g 670 MOVL (g_stack+stack_hi)(DI), DI 671 SUBL DX, DI 672 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 673 MOVL BX, 0(SP) // first argument in x86-32 ABI 674 CALL AX 675 676 // Restore registers, g, stack pointer. 677 get_tls(CX) 678 MOVL 8(SP), DI 679 MOVL (g_stack+stack_hi)(DI), SI 680 SUBL 4(SP), SI 681 MOVL DI, g(CX) 682 MOVL SI, SP 683 684 MOVL AX, ret+8(FP) 685 RET 686 687 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 688 // Turn the fn into a Go func (by taking its address) and call 689 // cgocallback_gofunc. 690 TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 691 LEAL fn+0(FP), AX 692 MOVL AX, 0(SP) 693 MOVL frame+4(FP), AX 694 MOVL AX, 4(SP) 695 MOVL framesize+8(FP), AX 696 MOVL AX, 8(SP) 697 MOVL ctxt+12(FP), AX 698 MOVL AX, 12(SP) 699 MOVL $runtime·cgocallback_gofunc(SB), AX 700 CALL AX 701 RET 702 703 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 704 // See cgocall.go for more details. 705 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16 706 NO_LOCAL_POINTERS 707 708 // If g is nil, Go did not create the current thread. 709 // Call needm to obtain one for temporary use. 710 // In this case, we're running on the thread stack, so there's 711 // lots of space, but the linker doesn't know. Hide the call from 712 // the linker analysis by using an indirect call through AX. 713 get_tls(CX) 714 #ifdef GOOS_windows 715 MOVL $0, BP 716 CMPL CX, $0 717 JEQ 2(PC) // TODO 718 #endif 719 MOVL g(CX), BP 720 CMPL BP, $0 721 JEQ needm 722 MOVL g_m(BP), BP 723 MOVL BP, DX // saved copy of oldm 724 JMP havem 725 needm: 726 MOVL $0, 0(SP) 727 MOVL $runtime·needm(SB), AX 728 CALL AX 729 MOVL 0(SP), DX 730 get_tls(CX) 731 MOVL g(CX), BP 732 MOVL g_m(BP), BP 733 734 // Set m->sched.sp = SP, so that if a panic happens 735 // during the function we are about to execute, it will 736 // have a valid SP to run on the g0 stack. 737 // The next few lines (after the havem label) 738 // will save this SP onto the stack and then write 739 // the same SP back to m->sched.sp. That seems redundant, 740 // but if an unrecovered panic happens, unwindm will 741 // restore the g->sched.sp from the stack location 742 // and then systemstack will try to use it. If we don't set it here, 743 // that restored SP will be uninitialized (typically 0) and 744 // will not be usable. 745 MOVL m_g0(BP), SI 746 MOVL SP, (g_sched+gobuf_sp)(SI) 747 748 havem: 749 // Now there's a valid m, and we're running on its m->g0. 750 // Save current m->g0->sched.sp on stack and then set it to SP. 751 // Save current sp in m->g0->sched.sp in preparation for 752 // switch back to m->curg stack. 753 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 754 MOVL m_g0(BP), SI 755 MOVL (g_sched+gobuf_sp)(SI), AX 756 MOVL AX, 0(SP) 757 MOVL SP, (g_sched+gobuf_sp)(SI) 758 759 // Switch to m->curg stack and call runtime.cgocallbackg. 760 // Because we are taking over the execution of m->curg 761 // but *not* resuming what had been running, we need to 762 // save that information (m->curg->sched) so we can restore it. 763 // We can restore m->curg->sched.sp easily, because calling 764 // runtime.cgocallbackg leaves SP unchanged upon return. 765 // To save m->curg->sched.pc, we push it onto the stack. 766 // This has the added benefit that it looks to the traceback 767 // routine like cgocallbackg is going to return to that 768 // PC (because the frame we allocate below has the same 769 // size as cgocallback_gofunc's frame declared above) 770 // so that the traceback will seamlessly trace back into 771 // the earlier calls. 772 // 773 // In the new goroutine, 4(SP) holds the saved oldm (DX) register. 774 // 8(SP) is unused. 775 MOVL m_curg(BP), SI 776 MOVL SI, g(CX) 777 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 778 MOVL (g_sched+gobuf_pc)(SI), BP 779 MOVL BP, -4(DI) 780 MOVL ctxt+12(FP), CX 781 LEAL -(4+12)(DI), SP 782 MOVL DX, 4(SP) 783 MOVL CX, 0(SP) 784 CALL runtime·cgocallbackg(SB) 785 MOVL 4(SP), DX 786 787 // Restore g->sched (== m->curg->sched) from saved values. 788 get_tls(CX) 789 MOVL g(CX), SI 790 MOVL 12(SP), BP 791 MOVL BP, (g_sched+gobuf_pc)(SI) 792 LEAL (12+4)(SP), DI 793 MOVL DI, (g_sched+gobuf_sp)(SI) 794 795 // Switch back to m->g0's stack and restore m->g0->sched.sp. 796 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 797 // so we do not have to restore it.) 798 MOVL g(CX), BP 799 MOVL g_m(BP), BP 800 MOVL m_g0(BP), SI 801 MOVL SI, g(CX) 802 MOVL (g_sched+gobuf_sp)(SI), SP 803 MOVL 0(SP), AX 804 MOVL AX, (g_sched+gobuf_sp)(SI) 805 806 // If the m on entry was nil, we called needm above to borrow an m 807 // for the duration of the call. Since the call is over, return it with dropm. 808 CMPL DX, $0 809 JNE 3(PC) 810 MOVL $runtime·dropm(SB), AX 811 CALL AX 812 813 // Done! 814 RET 815 816 // void setg(G*); set g. for use by needm. 817 TEXT runtime·setg(SB), NOSPLIT, $0-4 818 MOVL gg+0(FP), BX 819 #ifdef GOOS_windows 820 CMPL BX, $0 821 JNE settls 822 MOVL $0, 0x14(FS) 823 RET 824 settls: 825 MOVL g_m(BX), AX 826 LEAL m_tls(AX), AX 827 MOVL AX, 0x14(FS) 828 #endif 829 get_tls(CX) 830 MOVL BX, g(CX) 831 RET 832 833 // void setg_gcc(G*); set g. for use by gcc 834 TEXT setg_gcc<>(SB), NOSPLIT, $0 835 get_tls(AX) 836 MOVL gg+0(FP), DX 837 MOVL DX, g(AX) 838 RET 839 840 // check that SP is in range [g->stack.lo, g->stack.hi) 841 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 842 get_tls(CX) 843 MOVL g(CX), AX 844 CMPL (g_stack+stack_hi)(AX), SP 845 JHI 2(PC) 846 INT $3 847 CMPL SP, (g_stack+stack_lo)(AX) 848 JHI 2(PC) 849 INT $3 850 RET 851 852 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 853 MOVL argp+0(FP),AX // addr of first arg 854 MOVL -4(AX),AX // get calling pc 855 MOVL AX, ret+4(FP) 856 RET 857 858 // func cputicks() int64 859 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 860 CMPB runtime·support_sse2(SB), $1 861 JNE done 862 CMPB runtime·lfenceBeforeRdtsc(SB), $1 863 JNE mfence 864 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 865 JMP done 866 mfence: 867 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 868 done: 869 RDTSC 870 MOVL AX, ret_lo+0(FP) 871 MOVL DX, ret_hi+4(FP) 872 RET 873 874 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 875 // set up ldt 7 to point at m0.tls 876 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 877 // the entry number is just a hint. setldt will set up GS with what it used. 878 MOVL $7, 0(SP) 879 LEAL runtime·m0+m_tls(SB), AX 880 MOVL AX, 4(SP) 881 MOVL $32, 8(SP) // sizeof(tls array) 882 CALL runtime·setldt(SB) 883 RET 884 885 TEXT runtime·emptyfunc(SB),0,$0-0 886 RET 887 888 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 889 // redirects to memhash(p, h, size) using the size 890 // stored in the closure. 891 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 892 GO_ARGS 893 NO_LOCAL_POINTERS 894 MOVL p+0(FP), AX 895 MOVL h+4(FP), BX 896 MOVL 4(DX), CX 897 MOVL AX, 0(SP) 898 MOVL BX, 4(SP) 899 MOVL CX, 8(SP) 900 CALL runtime·memhash(SB) 901 MOVL 12(SP), AX 902 MOVL AX, ret+8(FP) 903 RET 904 905 // hash function using AES hardware instructions 906 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 907 MOVL p+0(FP), AX // ptr to data 908 MOVL s+8(FP), BX // size 909 LEAL ret+12(FP), DX 910 JMP runtime·aeshashbody(SB) 911 912 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 913 MOVL p+0(FP), AX // ptr to string object 914 MOVL 4(AX), BX // length of string 915 MOVL (AX), AX // string data 916 LEAL ret+8(FP), DX 917 JMP runtime·aeshashbody(SB) 918 919 // AX: data 920 // BX: length 921 // DX: address to put return value 922 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 923 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 924 PINSRW $4, BX, X0 // 16 bits of length 925 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 926 MOVO X0, X1 // save unscrambled seed 927 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 928 AESENC X0, X0 // scramble seed 929 930 CMPL BX, $16 931 JB aes0to15 932 JE aes16 933 CMPL BX, $32 934 JBE aes17to32 935 CMPL BX, $64 936 JBE aes33to64 937 JMP aes65plus 938 939 aes0to15: 940 TESTL BX, BX 941 JE aes0 942 943 ADDL $16, AX 944 TESTW $0xff0, AX 945 JE endofpage 946 947 // 16 bytes loaded at this address won't cross 948 // a page boundary, so we can load it directly. 949 MOVOU -16(AX), X1 950 ADDL BX, BX 951 PAND masks<>(SB)(BX*8), X1 952 953 final1: 954 AESENC X0, X1 // scramble input, xor in seed 955 AESENC X1, X1 // scramble combo 2 times 956 AESENC X1, X1 957 MOVL X1, (DX) 958 RET 959 960 endofpage: 961 // address ends in 1111xxxx. Might be up against 962 // a page boundary, so load ending at last byte. 963 // Then shift bytes down using pshufb. 964 MOVOU -32(AX)(BX*1), X1 965 ADDL BX, BX 966 PSHUFB shifts<>(SB)(BX*8), X1 967 JMP final1 968 969 aes0: 970 // Return scrambled input seed 971 AESENC X0, X0 972 MOVL X0, (DX) 973 RET 974 975 aes16: 976 MOVOU (AX), X1 977 JMP final1 978 979 aes17to32: 980 // make second starting seed 981 PXOR runtime·aeskeysched+16(SB), X1 982 AESENC X1, X1 983 984 // load data to be hashed 985 MOVOU (AX), X2 986 MOVOU -16(AX)(BX*1), X3 987 988 // scramble 3 times 989 AESENC X0, X2 990 AESENC X1, X3 991 AESENC X2, X2 992 AESENC X3, X3 993 AESENC X2, X2 994 AESENC X3, X3 995 996 // combine results 997 PXOR X3, X2 998 MOVL X2, (DX) 999 RET 1000 1001 aes33to64: 1002 // make 3 more starting seeds 1003 MOVO X1, X2 1004 MOVO X1, X3 1005 PXOR runtime·aeskeysched+16(SB), X1 1006 PXOR runtime·aeskeysched+32(SB), X2 1007 PXOR runtime·aeskeysched+48(SB), X3 1008 AESENC X1, X1 1009 AESENC X2, X2 1010 AESENC X3, X3 1011 1012 MOVOU (AX), X4 1013 MOVOU 16(AX), X5 1014 MOVOU -32(AX)(BX*1), X6 1015 MOVOU -16(AX)(BX*1), X7 1016 1017 AESENC X0, X4 1018 AESENC X1, X5 1019 AESENC X2, X6 1020 AESENC X3, X7 1021 1022 AESENC X4, X4 1023 AESENC X5, X5 1024 AESENC X6, X6 1025 AESENC X7, X7 1026 1027 AESENC X4, X4 1028 AESENC X5, X5 1029 AESENC X6, X6 1030 AESENC X7, X7 1031 1032 PXOR X6, X4 1033 PXOR X7, X5 1034 PXOR X5, X4 1035 MOVL X4, (DX) 1036 RET 1037 1038 aes65plus: 1039 // make 3 more starting seeds 1040 MOVO X1, X2 1041 MOVO X1, X3 1042 PXOR runtime·aeskeysched+16(SB), X1 1043 PXOR runtime·aeskeysched+32(SB), X2 1044 PXOR runtime·aeskeysched+48(SB), X3 1045 AESENC X1, X1 1046 AESENC X2, X2 1047 AESENC X3, X3 1048 1049 // start with last (possibly overlapping) block 1050 MOVOU -64(AX)(BX*1), X4 1051 MOVOU -48(AX)(BX*1), X5 1052 MOVOU -32(AX)(BX*1), X6 1053 MOVOU -16(AX)(BX*1), X7 1054 1055 // scramble state once 1056 AESENC X0, X4 1057 AESENC X1, X5 1058 AESENC X2, X6 1059 AESENC X3, X7 1060 1061 // compute number of remaining 64-byte blocks 1062 DECL BX 1063 SHRL $6, BX 1064 1065 aesloop: 1066 // scramble state, xor in a block 1067 MOVOU (AX), X0 1068 MOVOU 16(AX), X1 1069 MOVOU 32(AX), X2 1070 MOVOU 48(AX), X3 1071 AESENC X0, X4 1072 AESENC X1, X5 1073 AESENC X2, X6 1074 AESENC X3, X7 1075 1076 // scramble state 1077 AESENC X4, X4 1078 AESENC X5, X5 1079 AESENC X6, X6 1080 AESENC X7, X7 1081 1082 ADDL $64, AX 1083 DECL BX 1084 JNE aesloop 1085 1086 // 2 more scrambles to finish 1087 AESENC X4, X4 1088 AESENC X5, X5 1089 AESENC X6, X6 1090 AESENC X7, X7 1091 1092 AESENC X4, X4 1093 AESENC X5, X5 1094 AESENC X6, X6 1095 AESENC X7, X7 1096 1097 PXOR X6, X4 1098 PXOR X7, X5 1099 PXOR X5, X4 1100 MOVL X4, (DX) 1101 RET 1102 1103 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1104 MOVL p+0(FP), AX // ptr to data 1105 MOVL h+4(FP), X0 // seed 1106 PINSRD $1, (AX), X0 // data 1107 AESENC runtime·aeskeysched+0(SB), X0 1108 AESENC runtime·aeskeysched+16(SB), X0 1109 AESENC runtime·aeskeysched+32(SB), X0 1110 MOVL X0, ret+8(FP) 1111 RET 1112 1113 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1114 MOVL p+0(FP), AX // ptr to data 1115 MOVQ (AX), X0 // data 1116 PINSRD $2, h+4(FP), X0 // seed 1117 AESENC runtime·aeskeysched+0(SB), X0 1118 AESENC runtime·aeskeysched+16(SB), X0 1119 AESENC runtime·aeskeysched+32(SB), X0 1120 MOVL X0, ret+8(FP) 1121 RET 1122 1123 // simple mask to get rid of data in the high part of the register. 1124 DATA masks<>+0x00(SB)/4, $0x00000000 1125 DATA masks<>+0x04(SB)/4, $0x00000000 1126 DATA masks<>+0x08(SB)/4, $0x00000000 1127 DATA masks<>+0x0c(SB)/4, $0x00000000 1128 1129 DATA masks<>+0x10(SB)/4, $0x000000ff 1130 DATA masks<>+0x14(SB)/4, $0x00000000 1131 DATA masks<>+0x18(SB)/4, $0x00000000 1132 DATA masks<>+0x1c(SB)/4, $0x00000000 1133 1134 DATA masks<>+0x20(SB)/4, $0x0000ffff 1135 DATA masks<>+0x24(SB)/4, $0x00000000 1136 DATA masks<>+0x28(SB)/4, $0x00000000 1137 DATA masks<>+0x2c(SB)/4, $0x00000000 1138 1139 DATA masks<>+0x30(SB)/4, $0x00ffffff 1140 DATA masks<>+0x34(SB)/4, $0x00000000 1141 DATA masks<>+0x38(SB)/4, $0x00000000 1142 DATA masks<>+0x3c(SB)/4, $0x00000000 1143 1144 DATA masks<>+0x40(SB)/4, $0xffffffff 1145 DATA masks<>+0x44(SB)/4, $0x00000000 1146 DATA masks<>+0x48(SB)/4, $0x00000000 1147 DATA masks<>+0x4c(SB)/4, $0x00000000 1148 1149 DATA masks<>+0x50(SB)/4, $0xffffffff 1150 DATA masks<>+0x54(SB)/4, $0x000000ff 1151 DATA masks<>+0x58(SB)/4, $0x00000000 1152 DATA masks<>+0x5c(SB)/4, $0x00000000 1153 1154 DATA masks<>+0x60(SB)/4, $0xffffffff 1155 DATA masks<>+0x64(SB)/4, $0x0000ffff 1156 DATA masks<>+0x68(SB)/4, $0x00000000 1157 DATA masks<>+0x6c(SB)/4, $0x00000000 1158 1159 DATA masks<>+0x70(SB)/4, $0xffffffff 1160 DATA masks<>+0x74(SB)/4, $0x00ffffff 1161 DATA masks<>+0x78(SB)/4, $0x00000000 1162 DATA masks<>+0x7c(SB)/4, $0x00000000 1163 1164 DATA masks<>+0x80(SB)/4, $0xffffffff 1165 DATA masks<>+0x84(SB)/4, $0xffffffff 1166 DATA masks<>+0x88(SB)/4, $0x00000000 1167 DATA masks<>+0x8c(SB)/4, $0x00000000 1168 1169 DATA masks<>+0x90(SB)/4, $0xffffffff 1170 DATA masks<>+0x94(SB)/4, $0xffffffff 1171 DATA masks<>+0x98(SB)/4, $0x000000ff 1172 DATA masks<>+0x9c(SB)/4, $0x00000000 1173 1174 DATA masks<>+0xa0(SB)/4, $0xffffffff 1175 DATA masks<>+0xa4(SB)/4, $0xffffffff 1176 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1177 DATA masks<>+0xac(SB)/4, $0x00000000 1178 1179 DATA masks<>+0xb0(SB)/4, $0xffffffff 1180 DATA masks<>+0xb4(SB)/4, $0xffffffff 1181 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1182 DATA masks<>+0xbc(SB)/4, $0x00000000 1183 1184 DATA masks<>+0xc0(SB)/4, $0xffffffff 1185 DATA masks<>+0xc4(SB)/4, $0xffffffff 1186 DATA masks<>+0xc8(SB)/4, $0xffffffff 1187 DATA masks<>+0xcc(SB)/4, $0x00000000 1188 1189 DATA masks<>+0xd0(SB)/4, $0xffffffff 1190 DATA masks<>+0xd4(SB)/4, $0xffffffff 1191 DATA masks<>+0xd8(SB)/4, $0xffffffff 1192 DATA masks<>+0xdc(SB)/4, $0x000000ff 1193 1194 DATA masks<>+0xe0(SB)/4, $0xffffffff 1195 DATA masks<>+0xe4(SB)/4, $0xffffffff 1196 DATA masks<>+0xe8(SB)/4, $0xffffffff 1197 DATA masks<>+0xec(SB)/4, $0x0000ffff 1198 1199 DATA masks<>+0xf0(SB)/4, $0xffffffff 1200 DATA masks<>+0xf4(SB)/4, $0xffffffff 1201 DATA masks<>+0xf8(SB)/4, $0xffffffff 1202 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1203 1204 GLOBL masks<>(SB),RODATA,$256 1205 1206 // these are arguments to pshufb. They move data down from 1207 // the high bytes of the register to the low bytes of the register. 1208 // index is how many bytes to move. 1209 DATA shifts<>+0x00(SB)/4, $0x00000000 1210 DATA shifts<>+0x04(SB)/4, $0x00000000 1211 DATA shifts<>+0x08(SB)/4, $0x00000000 1212 DATA shifts<>+0x0c(SB)/4, $0x00000000 1213 1214 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1215 DATA shifts<>+0x14(SB)/4, $0xffffffff 1216 DATA shifts<>+0x18(SB)/4, $0xffffffff 1217 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1218 1219 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1220 DATA shifts<>+0x24(SB)/4, $0xffffffff 1221 DATA shifts<>+0x28(SB)/4, $0xffffffff 1222 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1223 1224 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1225 DATA shifts<>+0x34(SB)/4, $0xffffffff 1226 DATA shifts<>+0x38(SB)/4, $0xffffffff 1227 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1228 1229 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1230 DATA shifts<>+0x44(SB)/4, $0xffffffff 1231 DATA shifts<>+0x48(SB)/4, $0xffffffff 1232 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1233 1234 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1235 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1236 DATA shifts<>+0x58(SB)/4, $0xffffffff 1237 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1238 1239 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1240 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1241 DATA shifts<>+0x68(SB)/4, $0xffffffff 1242 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1243 1244 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1245 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1246 DATA shifts<>+0x78(SB)/4, $0xffffffff 1247 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1248 1249 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1250 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1251 DATA shifts<>+0x88(SB)/4, $0xffffffff 1252 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1253 1254 DATA shifts<>+0x90(SB)/4, $0x0a090807 1255 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1256 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1257 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1258 1259 DATA shifts<>+0xa0(SB)/4, $0x09080706 1260 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1261 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1262 DATA shifts<>+0xac(SB)/4, $0xffffffff 1263 1264 DATA shifts<>+0xb0(SB)/4, $0x08070605 1265 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1266 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1267 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1268 1269 DATA shifts<>+0xc0(SB)/4, $0x07060504 1270 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1271 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1272 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1273 1274 DATA shifts<>+0xd0(SB)/4, $0x06050403 1275 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1276 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1277 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1278 1279 DATA shifts<>+0xe0(SB)/4, $0x05040302 1280 DATA shifts<>+0xe4(SB)/4, $0x09080706 1281 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1282 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1283 1284 DATA shifts<>+0xf0(SB)/4, $0x04030201 1285 DATA shifts<>+0xf4(SB)/4, $0x08070605 1286 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1287 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1288 1289 GLOBL shifts<>(SB),RODATA,$256 1290 1291 TEXT ·checkASM(SB),NOSPLIT,$0-1 1292 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1293 MOVL $masks<>(SB), AX 1294 MOVL $shifts<>(SB), BX 1295 ORL BX, AX 1296 TESTL $15, AX 1297 SETEQ ret+0(FP) 1298 RET 1299 1300 // memequal(p, q unsafe.Pointer, size uintptr) bool 1301 TEXT runtime·memequal(SB),NOSPLIT,$0-13 1302 MOVL a+0(FP), SI 1303 MOVL b+4(FP), DI 1304 CMPL SI, DI 1305 JEQ eq 1306 MOVL size+8(FP), BX 1307 LEAL ret+12(FP), AX 1308 JMP runtime·memeqbody(SB) 1309 eq: 1310 MOVB $1, ret+12(FP) 1311 RET 1312 1313 // memequal_varlen(a, b unsafe.Pointer) bool 1314 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1315 MOVL a+0(FP), SI 1316 MOVL b+4(FP), DI 1317 CMPL SI, DI 1318 JEQ eq 1319 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1320 LEAL ret+8(FP), AX 1321 JMP runtime·memeqbody(SB) 1322 eq: 1323 MOVB $1, ret+8(FP) 1324 RET 1325 1326 // eqstring tests whether two strings are equal. 1327 // The compiler guarantees that strings passed 1328 // to eqstring have equal length. 1329 // See runtime_test.go:eqstring_generic for 1330 // equivalent Go code. 1331 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1332 MOVL s1_base+0(FP), SI 1333 MOVL s2_base+8(FP), DI 1334 CMPL SI, DI 1335 JEQ same 1336 MOVL s1_len+4(FP), BX 1337 LEAL ret+16(FP), AX 1338 JMP runtime·memeqbody(SB) 1339 same: 1340 MOVB $1, ret+16(FP) 1341 RET 1342 1343 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1344 MOVL a_len+4(FP), BX 1345 MOVL b_len+16(FP), CX 1346 CMPL BX, CX 1347 JNE eqret 1348 MOVL a+0(FP), SI 1349 MOVL b+12(FP), DI 1350 LEAL ret+24(FP), AX 1351 JMP runtime·memeqbody(SB) 1352 eqret: 1353 MOVB $0, ret+24(FP) 1354 RET 1355 1356 // a in SI 1357 // b in DI 1358 // count in BX 1359 // address of result byte in AX 1360 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1361 CMPL BX, $4 1362 JB small 1363 1364 // 64 bytes at a time using xmm registers 1365 hugeloop: 1366 CMPL BX, $64 1367 JB bigloop 1368 CMPB runtime·support_sse2(SB), $1 1369 JNE bigloop 1370 MOVOU (SI), X0 1371 MOVOU (DI), X1 1372 MOVOU 16(SI), X2 1373 MOVOU 16(DI), X3 1374 MOVOU 32(SI), X4 1375 MOVOU 32(DI), X5 1376 MOVOU 48(SI), X6 1377 MOVOU 48(DI), X7 1378 PCMPEQB X1, X0 1379 PCMPEQB X3, X2 1380 PCMPEQB X5, X4 1381 PCMPEQB X7, X6 1382 PAND X2, X0 1383 PAND X6, X4 1384 PAND X4, X0 1385 PMOVMSKB X0, DX 1386 ADDL $64, SI 1387 ADDL $64, DI 1388 SUBL $64, BX 1389 CMPL DX, $0xffff 1390 JEQ hugeloop 1391 MOVB $0, (AX) 1392 RET 1393 1394 // 4 bytes at a time using 32-bit register 1395 bigloop: 1396 CMPL BX, $4 1397 JBE leftover 1398 MOVL (SI), CX 1399 MOVL (DI), DX 1400 ADDL $4, SI 1401 ADDL $4, DI 1402 SUBL $4, BX 1403 CMPL CX, DX 1404 JEQ bigloop 1405 MOVB $0, (AX) 1406 RET 1407 1408 // remaining 0-4 bytes 1409 leftover: 1410 MOVL -4(SI)(BX*1), CX 1411 MOVL -4(DI)(BX*1), DX 1412 CMPL CX, DX 1413 SETEQ (AX) 1414 RET 1415 1416 small: 1417 CMPL BX, $0 1418 JEQ equal 1419 1420 LEAL 0(BX*8), CX 1421 NEGL CX 1422 1423 MOVL SI, DX 1424 CMPB DX, $0xfc 1425 JA si_high 1426 1427 // load at SI won't cross a page boundary. 1428 MOVL (SI), SI 1429 JMP si_finish 1430 si_high: 1431 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1432 MOVL -4(SI)(BX*1), SI 1433 SHRL CX, SI 1434 si_finish: 1435 1436 // same for DI. 1437 MOVL DI, DX 1438 CMPB DX, $0xfc 1439 JA di_high 1440 MOVL (DI), DI 1441 JMP di_finish 1442 di_high: 1443 MOVL -4(DI)(BX*1), DI 1444 SHRL CX, DI 1445 di_finish: 1446 1447 SUBL SI, DI 1448 SHLL CX, DI 1449 equal: 1450 SETEQ (AX) 1451 RET 1452 1453 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1454 MOVL s1_base+0(FP), SI 1455 MOVL s1_len+4(FP), BX 1456 MOVL s2_base+8(FP), DI 1457 MOVL s2_len+12(FP), DX 1458 LEAL ret+16(FP), AX 1459 JMP runtime·cmpbody(SB) 1460 1461 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1462 MOVL s1+0(FP), SI 1463 MOVL s1+4(FP), BX 1464 MOVL s2+12(FP), DI 1465 MOVL s2+16(FP), DX 1466 LEAL ret+24(FP), AX 1467 JMP runtime·cmpbody(SB) 1468 1469 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1470 MOVL s+0(FP), SI 1471 MOVL s_len+4(FP), CX 1472 MOVB c+12(FP), AL 1473 MOVL SI, DI 1474 CLD; REPN; SCASB 1475 JZ 3(PC) 1476 MOVL $-1, ret+16(FP) 1477 RET 1478 SUBL SI, DI 1479 SUBL $1, DI 1480 MOVL DI, ret+16(FP) 1481 RET 1482 1483 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1484 MOVL s+0(FP), SI 1485 MOVL s_len+4(FP), CX 1486 MOVB c+8(FP), AL 1487 MOVL SI, DI 1488 CLD; REPN; SCASB 1489 JZ 3(PC) 1490 MOVL $-1, ret+12(FP) 1491 RET 1492 SUBL SI, DI 1493 SUBL $1, DI 1494 MOVL DI, ret+12(FP) 1495 RET 1496 1497 // input: 1498 // SI = a 1499 // DI = b 1500 // BX = alen 1501 // DX = blen 1502 // AX = address of return word (set to 1/0/-1) 1503 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1504 MOVL DX, BP 1505 SUBL BX, DX // DX = blen-alen 1506 JLE 2(PC) 1507 MOVL BX, BP // BP = min(alen, blen) 1508 CMPL SI, DI 1509 JEQ allsame 1510 CMPL BP, $4 1511 JB small 1512 CMPB runtime·support_sse2(SB), $1 1513 JNE mediumloop 1514 largeloop: 1515 CMPL BP, $16 1516 JB mediumloop 1517 MOVOU (SI), X0 1518 MOVOU (DI), X1 1519 PCMPEQB X0, X1 1520 PMOVMSKB X1, BX 1521 XORL $0xffff, BX // convert EQ to NE 1522 JNE diff16 // branch if at least one byte is not equal 1523 ADDL $16, SI 1524 ADDL $16, DI 1525 SUBL $16, BP 1526 JMP largeloop 1527 1528 diff16: 1529 BSFL BX, BX // index of first byte that differs 1530 XORL DX, DX 1531 MOVB (SI)(BX*1), CX 1532 CMPB CX, (DI)(BX*1) 1533 SETHI DX 1534 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1535 MOVL DX, (AX) 1536 RET 1537 1538 mediumloop: 1539 CMPL BP, $4 1540 JBE _0through4 1541 MOVL (SI), BX 1542 MOVL (DI), CX 1543 CMPL BX, CX 1544 JNE diff4 1545 ADDL $4, SI 1546 ADDL $4, DI 1547 SUBL $4, BP 1548 JMP mediumloop 1549 1550 _0through4: 1551 MOVL -4(SI)(BP*1), BX 1552 MOVL -4(DI)(BP*1), CX 1553 CMPL BX, CX 1554 JEQ allsame 1555 1556 diff4: 1557 BSWAPL BX // reverse order of bytes 1558 BSWAPL CX 1559 XORL BX, CX // find bit differences 1560 BSRL CX, CX // index of highest bit difference 1561 SHRL CX, BX // move a's bit to bottom 1562 ANDL $1, BX // mask bit 1563 LEAL -1(BX*2), BX // 1/0 => +1/-1 1564 MOVL BX, (AX) 1565 RET 1566 1567 // 0-3 bytes in common 1568 small: 1569 LEAL (BP*8), CX 1570 NEGL CX 1571 JEQ allsame 1572 1573 // load si 1574 CMPB SI, $0xfc 1575 JA si_high 1576 MOVL (SI), SI 1577 JMP si_finish 1578 si_high: 1579 MOVL -4(SI)(BP*1), SI 1580 SHRL CX, SI 1581 si_finish: 1582 SHLL CX, SI 1583 1584 // same for di 1585 CMPB DI, $0xfc 1586 JA di_high 1587 MOVL (DI), DI 1588 JMP di_finish 1589 di_high: 1590 MOVL -4(DI)(BP*1), DI 1591 SHRL CX, DI 1592 di_finish: 1593 SHLL CX, DI 1594 1595 BSWAPL SI // reverse order of bytes 1596 BSWAPL DI 1597 XORL SI, DI // find bit differences 1598 JEQ allsame 1599 BSRL DI, CX // index of highest bit difference 1600 SHRL CX, SI // move a's bit to bottom 1601 ANDL $1, SI // mask bit 1602 LEAL -1(SI*2), BX // 1/0 => +1/-1 1603 MOVL BX, (AX) 1604 RET 1605 1606 // all the bytes in common are the same, so we just need 1607 // to compare the lengths. 1608 allsame: 1609 XORL BX, BX 1610 XORL CX, CX 1611 TESTL DX, DX 1612 SETLT BX // 1 if alen > blen 1613 SETEQ CX // 1 if alen == blen 1614 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1615 MOVL BX, (AX) 1616 RET 1617 1618 TEXT runtime·return0(SB), NOSPLIT, $0 1619 MOVL $0, AX 1620 RET 1621 1622 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1623 // Must obey the gcc calling convention. 1624 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1625 get_tls(CX) 1626 MOVL g(CX), AX 1627 MOVL g_m(AX), AX 1628 MOVL m_curg(AX), AX 1629 MOVL (g_stack+stack_hi)(AX), AX 1630 RET 1631 1632 // The top-most function running on a goroutine 1633 // returns to goexit+PCQuantum. 1634 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1635 BYTE $0x90 // NOP 1636 CALL runtime·goexit1(SB) // does not return 1637 // traceback from goexit1 must hit code range of goexit 1638 BYTE $0x90 // NOP 1639 1640 // Prefetching doesn't seem to help. 1641 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1642 RET 1643 1644 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1645 RET 1646 1647 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1648 RET 1649 1650 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1651 RET 1652 1653 // Add a module's moduledata to the linked list of moduledata objects. This 1654 // is called from .init_array by a function generated in the linker and so 1655 // follows the platform ABI wrt register preservation -- it only touches AX, 1656 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1657 // instead the pointer to the moduledata is passed in AX. 1658 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1659 MOVL runtime·lastmoduledatap(SB), DX 1660 MOVL AX, moduledata_next(DX) 1661 MOVL AX, runtime·lastmoduledatap(SB) 1662 RET 1663 1664 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12 1665 MOVL a+0(FP), AX 1666 MOVL AX, 0(SP) 1667 MOVL $0, 4(SP) 1668 FMOVV 0(SP), F0 1669 FMOVDP F0, ret+4(FP) 1670 RET 1671 1672 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 1673 FMOVD a+0(FP), F0 1674 FSTCW 0(SP) 1675 FLDCW runtime·controlWord64trunc(SB) 1676 FMOVVP F0, 4(SP) 1677 FLDCW 0(SP) 1678 MOVL 4(SP), AX 1679 MOVL AX, ret+8(FP) 1680 RET