github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/runtime/asm_386.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 SUBL $128, SP // plenty of scratch 15 ANDL $~15, SP 16 MOVL AX, 120(SP) // save argc, argv away 17 MOVL BX, 124(SP) 18 19 // set default stack bounds. 20 // _cgo_init may update stackguard. 21 MOVL $runtime·g0(SB), BP 22 LEAL (-64*1024+104)(SP), BX 23 MOVL BX, g_stackguard0(BP) 24 MOVL BX, g_stackguard1(BP) 25 MOVL BX, (g_stack+stack_lo)(BP) 26 MOVL SP, (g_stack+stack_hi)(BP) 27 28 // find out information about the processor we're on 29 #ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL 30 JMP has_cpuid 31 #else 32 // first see if CPUID instruction is supported. 33 PUSHFL 34 PUSHFL 35 XORL $(1<<21), 0(SP) // flip ID bit 36 POPFL 37 PUSHFL 38 POPL AX 39 XORL 0(SP), AX 40 POPFL // restore EFLAGS 41 TESTL $(1<<21), AX 42 JNE has_cpuid 43 #endif 44 45 bad_proc: // show that the program requires MMX. 46 MOVL $2, 0(SP) 47 MOVL $bad_proc_msg<>(SB), 4(SP) 48 MOVL $0x3d, 8(SP) 49 CALL runtime·write(SB) 50 MOVL $1, 0(SP) 51 CALL runtime·exit(SB) 52 INT $3 53 54 has_cpuid: 55 MOVL $0, AX 56 CPUID 57 MOVL AX, SI 58 CMPL AX, $0 59 JE nocpuinfo 60 61 // Figure out how to serialize RDTSC. 62 // On Intel processors LFENCE is enough. AMD requires MFENCE. 63 // Don't know about the rest, so let's do MFENCE. 64 CMPL BX, $0x756E6547 // "Genu" 65 JNE notintel 66 CMPL DX, $0x49656E69 // "ineI" 67 JNE notintel 68 CMPL CX, $0x6C65746E // "ntel" 69 JNE notintel 70 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 71 notintel: 72 73 // Load EAX=1 cpuid flags 74 MOVL $1, AX 75 CPUID 76 MOVL CX, AX // Move to global variable clobbers CX when generating PIC 77 MOVL AX, runtime·cpuid_ecx(SB) 78 MOVL DX, runtime·cpuid_edx(SB) 79 80 // Check for MMX support 81 TESTL $(1<<23), DX // MMX 82 JZ bad_proc 83 84 // Load EAX=7/ECX=0 cpuid flags 85 CMPL SI, $7 86 JLT nocpuinfo 87 MOVL $7, AX 88 MOVL $0, CX 89 CPUID 90 MOVL BX, runtime·cpuid_ebx7(SB) 91 92 nocpuinfo: 93 94 // if there is an _cgo_init, call it to let it 95 // initialize and to set up GS. if not, 96 // we set up GS ourselves. 97 MOVL _cgo_init(SB), AX 98 TESTL AX, AX 99 JZ needtls 100 MOVL $setg_gcc<>(SB), BX 101 MOVL BX, 4(SP) 102 MOVL BP, 0(SP) 103 CALL AX 104 105 // update stackguard after _cgo_init 106 MOVL $runtime·g0(SB), CX 107 MOVL (g_stack+stack_lo)(CX), AX 108 ADDL $const__StackGuard, AX 109 MOVL AX, g_stackguard0(CX) 110 MOVL AX, g_stackguard1(CX) 111 112 #ifndef GOOS_windows 113 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows 114 JMP ok 115 #endif 116 needtls: 117 #ifdef GOOS_plan9 118 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases 119 JMP ok 120 #endif 121 122 // set up %gs 123 CALL runtime·ldt0setup(SB) 124 125 // store through it, to make sure it works 126 get_tls(BX) 127 MOVL $0x123, g(BX) 128 MOVL runtime·m0+m_tls(SB), AX 129 CMPL AX, $0x123 130 JEQ ok 131 MOVL AX, 0 // abort 132 ok: 133 // set up m and g "registers" 134 get_tls(BX) 135 LEAL runtime·g0(SB), DX 136 MOVL DX, g(BX) 137 LEAL runtime·m0(SB), AX 138 139 // save m->g0 = g0 140 MOVL DX, m_g0(AX) 141 // save g0->m = m0 142 MOVL AX, g_m(DX) 143 144 CALL runtime·emptyfunc(SB) // fault if stack check is wrong 145 146 // convention is D is always cleared 147 CLD 148 149 CALL runtime·check(SB) 150 151 // saved argc, argv 152 MOVL 120(SP), AX 153 MOVL AX, 0(SP) 154 MOVL 124(SP), AX 155 MOVL AX, 4(SP) 156 CALL runtime·args(SB) 157 CALL runtime·osinit(SB) 158 CALL runtime·schedinit(SB) 159 160 // create a new goroutine to start program 161 PUSHL $runtime·mainPC(SB) // entry 162 PUSHL $0 // arg size 163 CALL runtime·newproc(SB) 164 POPL AX 165 POPL AX 166 167 // start this M 168 CALL runtime·mstart(SB) 169 170 INT $3 171 RET 172 173 DATA bad_proc_msg<>+0x00(SB)/8, $"This pro" 174 DATA bad_proc_msg<>+0x08(SB)/8, $"gram can" 175 DATA bad_proc_msg<>+0x10(SB)/8, $" only be" 176 DATA bad_proc_msg<>+0x18(SB)/8, $" run on " 177 DATA bad_proc_msg<>+0x20(SB)/8, $"processo" 178 DATA bad_proc_msg<>+0x28(SB)/8, $"rs with " 179 DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp" 180 DATA bad_proc_msg<>+0x38(SB)/4, $"ort." 181 DATA bad_proc_msg<>+0x3c(SB)/1, $0xa 182 GLOBL bad_proc_msg<>(SB), RODATA, $0x3d 183 184 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 185 GLOBL runtime·mainPC(SB),RODATA,$4 186 187 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 188 INT $3 189 RET 190 191 TEXT runtime·asminit(SB),NOSPLIT,$0-0 192 // Linux and MinGW start the FPU in extended double precision. 193 // Other operating systems use double precision. 194 // Change to double precision to match them, 195 // and to match other hardware that only has double. 196 FLDCW runtime·controlWord64(SB) 197 RET 198 199 /* 200 * go-routine 201 */ 202 203 // void gosave(Gobuf*) 204 // save state in Gobuf; setjmp 205 TEXT runtime·gosave(SB), NOSPLIT, $0-4 206 MOVL buf+0(FP), AX // gobuf 207 LEAL buf+0(FP), BX // caller's SP 208 MOVL BX, gobuf_sp(AX) 209 MOVL 0(SP), BX // caller's PC 210 MOVL BX, gobuf_pc(AX) 211 MOVL $0, gobuf_ret(AX) 212 // Assert ctxt is zero. See func save. 213 MOVL gobuf_ctxt(AX), BX 214 TESTL BX, BX 215 JZ 2(PC) 216 CALL runtime·badctxt(SB) 217 get_tls(CX) 218 MOVL g(CX), BX 219 MOVL BX, gobuf_g(AX) 220 RET 221 222 // void gogo(Gobuf*) 223 // restore state from Gobuf; longjmp 224 TEXT runtime·gogo(SB), NOSPLIT, $8-4 225 MOVL buf+0(FP), BX // gobuf 226 227 // If ctxt is not nil, invoke deletion barrier before overwriting. 228 MOVL gobuf_ctxt(BX), DX 229 TESTL DX, DX 230 JZ nilctxt 231 LEAL gobuf_ctxt(BX), AX 232 MOVL AX, 0(SP) 233 MOVL $0, 4(SP) 234 CALL runtime·writebarrierptr_prewrite(SB) 235 MOVL buf+0(FP), BX 236 237 nilctxt: 238 MOVL gobuf_g(BX), DX 239 MOVL 0(DX), CX // make sure g != nil 240 get_tls(CX) 241 MOVL DX, g(CX) 242 MOVL gobuf_sp(BX), SP // restore SP 243 MOVL gobuf_ret(BX), AX 244 MOVL gobuf_ctxt(BX), DX 245 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 246 MOVL $0, gobuf_ret(BX) 247 MOVL $0, gobuf_ctxt(BX) 248 MOVL gobuf_pc(BX), BX 249 JMP BX 250 251 // func mcall(fn func(*g)) 252 // Switch to m->g0's stack, call fn(g). 253 // Fn must never return. It should gogo(&g->sched) 254 // to keep running g. 255 TEXT runtime·mcall(SB), NOSPLIT, $0-4 256 MOVL fn+0(FP), DI 257 258 get_tls(DX) 259 MOVL g(DX), AX // save state in g->sched 260 MOVL 0(SP), BX // caller's PC 261 MOVL BX, (g_sched+gobuf_pc)(AX) 262 LEAL fn+0(FP), BX // caller's SP 263 MOVL BX, (g_sched+gobuf_sp)(AX) 264 MOVL AX, (g_sched+gobuf_g)(AX) 265 266 // switch to m->g0 & its stack, call fn 267 MOVL g(DX), BX 268 MOVL g_m(BX), BX 269 MOVL m_g0(BX), SI 270 CMPL SI, AX // if g == m->g0 call badmcall 271 JNE 3(PC) 272 MOVL $runtime·badmcall(SB), AX 273 JMP AX 274 MOVL SI, g(DX) // g = m->g0 275 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 276 PUSHL AX 277 MOVL DI, DX 278 MOVL 0(DI), DI 279 CALL DI 280 POPL AX 281 MOVL $runtime·badmcall2(SB), AX 282 JMP AX 283 RET 284 285 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 286 // of the G stack. We need to distinguish the routine that 287 // lives at the bottom of the G stack from the one that lives 288 // at the top of the system stack because the one at the top of 289 // the system stack terminates the stack walk (see topofstack()). 290 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 291 RET 292 293 // func systemstack(fn func()) 294 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 295 MOVL fn+0(FP), DI // DI = fn 296 get_tls(CX) 297 MOVL g(CX), AX // AX = g 298 MOVL g_m(AX), BX // BX = m 299 300 MOVL m_gsignal(BX), DX // DX = gsignal 301 CMPL AX, DX 302 JEQ noswitch 303 304 MOVL m_g0(BX), DX // DX = g0 305 CMPL AX, DX 306 JEQ noswitch 307 308 MOVL m_curg(BX), BP 309 CMPL AX, BP 310 JEQ switch 311 312 // Bad: g is not gsignal, not g0, not curg. What is it? 313 // Hide call from linker nosplit analysis. 314 MOVL $runtime·badsystemstack(SB), AX 315 CALL AX 316 317 switch: 318 // save our state in g->sched. Pretend to 319 // be systemstack_switch if the G stack is scanned. 320 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX) 321 MOVL SP, (g_sched+gobuf_sp)(AX) 322 MOVL AX, (g_sched+gobuf_g)(AX) 323 324 // switch to g0 325 get_tls(CX) 326 MOVL DX, g(CX) 327 MOVL (g_sched+gobuf_sp)(DX), BX 328 // make it look like mstart called systemstack on g0, to stop traceback 329 SUBL $4, BX 330 MOVL $runtime·mstart(SB), DX 331 MOVL DX, 0(BX) 332 MOVL BX, SP 333 334 // call target function 335 MOVL DI, DX 336 MOVL 0(DI), DI 337 CALL DI 338 339 // switch back to g 340 get_tls(CX) 341 MOVL g(CX), AX 342 MOVL g_m(AX), BX 343 MOVL m_curg(BX), AX 344 MOVL AX, g(CX) 345 MOVL (g_sched+gobuf_sp)(AX), SP 346 MOVL $0, (g_sched+gobuf_sp)(AX) 347 RET 348 349 noswitch: 350 // already on system stack, just call directly 351 MOVL DI, DX 352 MOVL 0(DI), DI 353 CALL DI 354 RET 355 356 /* 357 * support for morestack 358 */ 359 360 // Called during function prolog when more stack is needed. 361 // 362 // The traceback routines see morestack on a g0 as being 363 // the top of a stack (for example, morestack calling newstack 364 // calling the scheduler calling newm calling gc), so we must 365 // record an argument size. For that purpose, it has no arguments. 366 TEXT runtime·morestack(SB),NOSPLIT,$0-0 367 // Cannot grow scheduler stack (m->g0). 368 get_tls(CX) 369 MOVL g(CX), BX 370 MOVL g_m(BX), BX 371 MOVL m_g0(BX), SI 372 CMPL g(CX), SI 373 JNE 3(PC) 374 CALL runtime·badmorestackg0(SB) 375 INT $3 376 377 // Cannot grow signal stack. 378 MOVL m_gsignal(BX), SI 379 CMPL g(CX), SI 380 JNE 3(PC) 381 CALL runtime·badmorestackgsignal(SB) 382 INT $3 383 384 // Called from f. 385 // Set m->morebuf to f's caller. 386 MOVL 4(SP), DI // f's caller's PC 387 MOVL DI, (m_morebuf+gobuf_pc)(BX) 388 LEAL 8(SP), CX // f's caller's SP 389 MOVL CX, (m_morebuf+gobuf_sp)(BX) 390 get_tls(CX) 391 MOVL g(CX), SI 392 MOVL SI, (m_morebuf+gobuf_g)(BX) 393 394 // Set g->sched to context in f. 395 MOVL 0(SP), AX // f's PC 396 MOVL AX, (g_sched+gobuf_pc)(SI) 397 MOVL SI, (g_sched+gobuf_g)(SI) 398 LEAL 4(SP), AX // f's SP 399 MOVL AX, (g_sched+gobuf_sp)(SI) 400 // newstack will fill gobuf.ctxt. 401 402 // Call newstack on m->g0's stack. 403 MOVL m_g0(BX), BP 404 MOVL BP, g(CX) 405 MOVL (g_sched+gobuf_sp)(BP), AX 406 MOVL -4(AX), BX // fault if CALL would, before smashing SP 407 MOVL AX, SP 408 PUSHL DX // ctxt argument 409 CALL runtime·newstack(SB) 410 MOVL $0, 0x1003 // crash if newstack returns 411 POPL DX // keep balance check happy 412 RET 413 414 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0 415 MOVL $0, DX 416 JMP runtime·morestack(SB) 417 418 // reflectcall: call a function with the given argument list 419 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 420 // we don't have variable-sized frames, so we use a small number 421 // of constant-sized-frame functions to encode a few bits of size in the pc. 422 // Caution: ugly multiline assembly macros in your future! 423 424 #define DISPATCH(NAME,MAXSIZE) \ 425 CMPL CX, $MAXSIZE; \ 426 JA 3(PC); \ 427 MOVL $NAME(SB), AX; \ 428 JMP AX 429 // Note: can't just "JMP NAME(SB)" - bad inlining results. 430 431 TEXT reflect·call(SB), NOSPLIT, $0-0 432 JMP ·reflectcall(SB) 433 434 TEXT ·reflectcall(SB), NOSPLIT, $0-20 435 MOVL argsize+12(FP), CX 436 DISPATCH(runtime·call16, 16) 437 DISPATCH(runtime·call32, 32) 438 DISPATCH(runtime·call64, 64) 439 DISPATCH(runtime·call128, 128) 440 DISPATCH(runtime·call256, 256) 441 DISPATCH(runtime·call512, 512) 442 DISPATCH(runtime·call1024, 1024) 443 DISPATCH(runtime·call2048, 2048) 444 DISPATCH(runtime·call4096, 4096) 445 DISPATCH(runtime·call8192, 8192) 446 DISPATCH(runtime·call16384, 16384) 447 DISPATCH(runtime·call32768, 32768) 448 DISPATCH(runtime·call65536, 65536) 449 DISPATCH(runtime·call131072, 131072) 450 DISPATCH(runtime·call262144, 262144) 451 DISPATCH(runtime·call524288, 524288) 452 DISPATCH(runtime·call1048576, 1048576) 453 DISPATCH(runtime·call2097152, 2097152) 454 DISPATCH(runtime·call4194304, 4194304) 455 DISPATCH(runtime·call8388608, 8388608) 456 DISPATCH(runtime·call16777216, 16777216) 457 DISPATCH(runtime·call33554432, 33554432) 458 DISPATCH(runtime·call67108864, 67108864) 459 DISPATCH(runtime·call134217728, 134217728) 460 DISPATCH(runtime·call268435456, 268435456) 461 DISPATCH(runtime·call536870912, 536870912) 462 DISPATCH(runtime·call1073741824, 1073741824) 463 MOVL $runtime·badreflectcall(SB), AX 464 JMP AX 465 466 #define CALLFN(NAME,MAXSIZE) \ 467 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 468 NO_LOCAL_POINTERS; \ 469 /* copy arguments to stack */ \ 470 MOVL argptr+8(FP), SI; \ 471 MOVL argsize+12(FP), CX; \ 472 MOVL SP, DI; \ 473 REP;MOVSB; \ 474 /* call function */ \ 475 MOVL f+4(FP), DX; \ 476 MOVL (DX), AX; \ 477 PCDATA $PCDATA_StackMapIndex, $0; \ 478 CALL AX; \ 479 /* copy return values back */ \ 480 MOVL argtype+0(FP), DX; \ 481 MOVL argptr+8(FP), DI; \ 482 MOVL argsize+12(FP), CX; \ 483 MOVL retoffset+16(FP), BX; \ 484 MOVL SP, SI; \ 485 ADDL BX, DI; \ 486 ADDL BX, SI; \ 487 SUBL BX, CX; \ 488 CALL callRet<>(SB); \ 489 RET 490 491 // callRet copies return values back at the end of call*. This is a 492 // separate function so it can allocate stack space for the arguments 493 // to reflectcallmove. It does not follow the Go ABI; it expects its 494 // arguments in registers. 495 TEXT callRet<>(SB), NOSPLIT, $16-0 496 MOVL DX, 0(SP) 497 MOVL DI, 4(SP) 498 MOVL SI, 8(SP) 499 MOVL CX, 12(SP) 500 CALL runtime·reflectcallmove(SB) 501 RET 502 503 CALLFN(·call16, 16) 504 CALLFN(·call32, 32) 505 CALLFN(·call64, 64) 506 CALLFN(·call128, 128) 507 CALLFN(·call256, 256) 508 CALLFN(·call512, 512) 509 CALLFN(·call1024, 1024) 510 CALLFN(·call2048, 2048) 511 CALLFN(·call4096, 4096) 512 CALLFN(·call8192, 8192) 513 CALLFN(·call16384, 16384) 514 CALLFN(·call32768, 32768) 515 CALLFN(·call65536, 65536) 516 CALLFN(·call131072, 131072) 517 CALLFN(·call262144, 262144) 518 CALLFN(·call524288, 524288) 519 CALLFN(·call1048576, 1048576) 520 CALLFN(·call2097152, 2097152) 521 CALLFN(·call4194304, 4194304) 522 CALLFN(·call8388608, 8388608) 523 CALLFN(·call16777216, 16777216) 524 CALLFN(·call33554432, 33554432) 525 CALLFN(·call67108864, 67108864) 526 CALLFN(·call134217728, 134217728) 527 CALLFN(·call268435456, 268435456) 528 CALLFN(·call536870912, 536870912) 529 CALLFN(·call1073741824, 1073741824) 530 531 TEXT runtime·procyield(SB),NOSPLIT,$0-0 532 MOVL cycles+0(FP), AX 533 again: 534 PAUSE 535 SUBL $1, AX 536 JNZ again 537 RET 538 539 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 540 // Stores are already ordered on x86, so this is just a 541 // compile barrier. 542 RET 543 544 // void jmpdefer(fn, sp); 545 // called from deferreturn. 546 // 1. pop the caller 547 // 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers 548 // return (when building for shared libraries, subtract 16 bytes -- 5 bytes 549 // for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the 550 // LEAL to load the offset into BX, and finally 5 for the call & displacement) 551 // 3. jmp to the argument 552 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 553 MOVL fv+0(FP), DX // fn 554 MOVL argp+4(FP), BX // caller sp 555 LEAL -4(BX), SP // caller sp after CALL 556 #ifdef GOBUILDMODE_shared 557 SUBL $16, (SP) // return to CALL again 558 #else 559 SUBL $5, (SP) // return to CALL again 560 #endif 561 MOVL 0(DX), BX 562 JMP BX // but first run the deferred function 563 564 // Save state of caller into g->sched. 565 TEXT gosave<>(SB),NOSPLIT,$0 566 PUSHL AX 567 PUSHL BX 568 get_tls(BX) 569 MOVL g(BX), BX 570 LEAL arg+0(FP), AX 571 MOVL AX, (g_sched+gobuf_sp)(BX) 572 MOVL -4(AX), AX 573 MOVL AX, (g_sched+gobuf_pc)(BX) 574 MOVL $0, (g_sched+gobuf_ret)(BX) 575 // Assert ctxt is zero. See func save. 576 MOVL (g_sched+gobuf_ctxt)(BX), AX 577 TESTL AX, AX 578 JZ 2(PC) 579 CALL runtime·badctxt(SB) 580 POPL BX 581 POPL AX 582 RET 583 584 // func asmcgocall(fn, arg unsafe.Pointer) int32 585 // Call fn(arg) on the scheduler stack, 586 // aligned appropriately for the gcc ABI. 587 // See cgocall.go for more details. 588 TEXT ·asmcgocall(SB),NOSPLIT,$0-12 589 MOVL fn+0(FP), AX 590 MOVL arg+4(FP), BX 591 592 MOVL SP, DX 593 594 // Figure out if we need to switch to m->g0 stack. 595 // We get called to create new OS threads too, and those 596 // come in on the m->g0 stack already. 597 get_tls(CX) 598 MOVL g(CX), BP 599 MOVL g_m(BP), BP 600 MOVL m_g0(BP), SI 601 MOVL g(CX), DI 602 CMPL SI, DI 603 JEQ noswitch 604 CALL gosave<>(SB) 605 get_tls(CX) 606 MOVL SI, g(CX) 607 MOVL (g_sched+gobuf_sp)(SI), SP 608 609 noswitch: 610 // Now on a scheduling stack (a pthread-created stack). 611 SUBL $32, SP 612 ANDL $~15, SP // alignment, perhaps unnecessary 613 MOVL DI, 8(SP) // save g 614 MOVL (g_stack+stack_hi)(DI), DI 615 SUBL DX, DI 616 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 617 MOVL BX, 0(SP) // first argument in x86-32 ABI 618 CALL AX 619 620 // Restore registers, g, stack pointer. 621 get_tls(CX) 622 MOVL 8(SP), DI 623 MOVL (g_stack+stack_hi)(DI), SI 624 SUBL 4(SP), SI 625 MOVL DI, g(CX) 626 MOVL SI, SP 627 628 MOVL AX, ret+8(FP) 629 RET 630 631 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) 632 // Turn the fn into a Go func (by taking its address) and call 633 // cgocallback_gofunc. 634 TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 635 LEAL fn+0(FP), AX 636 MOVL AX, 0(SP) 637 MOVL frame+4(FP), AX 638 MOVL AX, 4(SP) 639 MOVL framesize+8(FP), AX 640 MOVL AX, 8(SP) 641 MOVL ctxt+12(FP), AX 642 MOVL AX, 12(SP) 643 MOVL $runtime·cgocallback_gofunc(SB), AX 644 CALL AX 645 RET 646 647 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) 648 // See cgocall.go for more details. 649 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16 650 NO_LOCAL_POINTERS 651 652 // If g is nil, Go did not create the current thread. 653 // Call needm to obtain one for temporary use. 654 // In this case, we're running on the thread stack, so there's 655 // lots of space, but the linker doesn't know. Hide the call from 656 // the linker analysis by using an indirect call through AX. 657 get_tls(CX) 658 #ifdef GOOS_windows 659 MOVL $0, BP 660 CMPL CX, $0 661 JEQ 2(PC) // TODO 662 #endif 663 MOVL g(CX), BP 664 CMPL BP, $0 665 JEQ needm 666 MOVL g_m(BP), BP 667 MOVL BP, DX // saved copy of oldm 668 JMP havem 669 needm: 670 MOVL $0, 0(SP) 671 MOVL $runtime·needm(SB), AX 672 CALL AX 673 MOVL 0(SP), DX 674 get_tls(CX) 675 MOVL g(CX), BP 676 MOVL g_m(BP), BP 677 678 // Set m->sched.sp = SP, so that if a panic happens 679 // during the function we are about to execute, it will 680 // have a valid SP to run on the g0 stack. 681 // The next few lines (after the havem label) 682 // will save this SP onto the stack and then write 683 // the same SP back to m->sched.sp. That seems redundant, 684 // but if an unrecovered panic happens, unwindm will 685 // restore the g->sched.sp from the stack location 686 // and then systemstack will try to use it. If we don't set it here, 687 // that restored SP will be uninitialized (typically 0) and 688 // will not be usable. 689 MOVL m_g0(BP), SI 690 MOVL SP, (g_sched+gobuf_sp)(SI) 691 692 havem: 693 // Now there's a valid m, and we're running on its m->g0. 694 // Save current m->g0->sched.sp on stack and then set it to SP. 695 // Save current sp in m->g0->sched.sp in preparation for 696 // switch back to m->curg stack. 697 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 698 MOVL m_g0(BP), SI 699 MOVL (g_sched+gobuf_sp)(SI), AX 700 MOVL AX, 0(SP) 701 MOVL SP, (g_sched+gobuf_sp)(SI) 702 703 // Switch to m->curg stack and call runtime.cgocallbackg. 704 // Because we are taking over the execution of m->curg 705 // but *not* resuming what had been running, we need to 706 // save that information (m->curg->sched) so we can restore it. 707 // We can restore m->curg->sched.sp easily, because calling 708 // runtime.cgocallbackg leaves SP unchanged upon return. 709 // To save m->curg->sched.pc, we push it onto the stack. 710 // This has the added benefit that it looks to the traceback 711 // routine like cgocallbackg is going to return to that 712 // PC (because the frame we allocate below has the same 713 // size as cgocallback_gofunc's frame declared above) 714 // so that the traceback will seamlessly trace back into 715 // the earlier calls. 716 // 717 // In the new goroutine, 4(SP) holds the saved oldm (DX) register. 718 // 8(SP) is unused. 719 MOVL m_curg(BP), SI 720 MOVL SI, g(CX) 721 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 722 MOVL (g_sched+gobuf_pc)(SI), BP 723 MOVL BP, -4(DI) 724 MOVL ctxt+12(FP), CX 725 LEAL -(4+12)(DI), SP 726 MOVL DX, 4(SP) 727 MOVL CX, 0(SP) 728 CALL runtime·cgocallbackg(SB) 729 MOVL 4(SP), DX 730 731 // Restore g->sched (== m->curg->sched) from saved values. 732 get_tls(CX) 733 MOVL g(CX), SI 734 MOVL 12(SP), BP 735 MOVL BP, (g_sched+gobuf_pc)(SI) 736 LEAL (12+4)(SP), DI 737 MOVL DI, (g_sched+gobuf_sp)(SI) 738 739 // Switch back to m->g0's stack and restore m->g0->sched.sp. 740 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 741 // so we do not have to restore it.) 742 MOVL g(CX), BP 743 MOVL g_m(BP), BP 744 MOVL m_g0(BP), SI 745 MOVL SI, g(CX) 746 MOVL (g_sched+gobuf_sp)(SI), SP 747 MOVL 0(SP), AX 748 MOVL AX, (g_sched+gobuf_sp)(SI) 749 750 // If the m on entry was nil, we called needm above to borrow an m 751 // for the duration of the call. Since the call is over, return it with dropm. 752 CMPL DX, $0 753 JNE 3(PC) 754 MOVL $runtime·dropm(SB), AX 755 CALL AX 756 757 // Done! 758 RET 759 760 // void setg(G*); set g. for use by needm. 761 TEXT runtime·setg(SB), NOSPLIT, $0-4 762 MOVL gg+0(FP), BX 763 #ifdef GOOS_windows 764 CMPL BX, $0 765 JNE settls 766 MOVL $0, 0x14(FS) 767 RET 768 settls: 769 MOVL g_m(BX), AX 770 LEAL m_tls(AX), AX 771 MOVL AX, 0x14(FS) 772 #endif 773 get_tls(CX) 774 MOVL BX, g(CX) 775 RET 776 777 // void setg_gcc(G*); set g. for use by gcc 778 TEXT setg_gcc<>(SB), NOSPLIT, $0 779 get_tls(AX) 780 MOVL gg+0(FP), DX 781 MOVL DX, g(AX) 782 RET 783 784 // check that SP is in range [g->stack.lo, g->stack.hi) 785 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 786 get_tls(CX) 787 MOVL g(CX), AX 788 CMPL (g_stack+stack_hi)(AX), SP 789 JHI 2(PC) 790 INT $3 791 CMPL SP, (g_stack+stack_lo)(AX) 792 JHI 2(PC) 793 INT $3 794 RET 795 796 TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8 797 MOVL argp+0(FP),AX // addr of first arg 798 MOVL -4(AX),AX // get calling pc 799 MOVL AX, ret+4(FP) 800 RET 801 802 // func cputicks() int64 803 TEXT runtime·cputicks(SB),NOSPLIT,$0-8 804 TESTL $0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence 805 JEQ done 806 CMPB runtime·lfenceBeforeRdtsc(SB), $1 807 JNE mfence 808 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE 809 JMP done 810 mfence: 811 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE 812 done: 813 RDTSC 814 MOVL AX, ret_lo+0(FP) 815 MOVL DX, ret_hi+4(FP) 816 RET 817 818 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 819 // set up ldt 7 to point at m0.tls 820 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. 821 // the entry number is just a hint. setldt will set up GS with what it used. 822 MOVL $7, 0(SP) 823 LEAL runtime·m0+m_tls(SB), AX 824 MOVL AX, 4(SP) 825 MOVL $32, 8(SP) // sizeof(tls array) 826 CALL runtime·setldt(SB) 827 RET 828 829 TEXT runtime·emptyfunc(SB),0,$0-0 830 RET 831 832 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 833 // redirects to memhash(p, h, size) using the size 834 // stored in the closure. 835 TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12 836 GO_ARGS 837 NO_LOCAL_POINTERS 838 MOVL p+0(FP), AX 839 MOVL h+4(FP), BX 840 MOVL 4(DX), CX 841 MOVL AX, 0(SP) 842 MOVL BX, 4(SP) 843 MOVL CX, 8(SP) 844 CALL runtime·memhash(SB) 845 MOVL 12(SP), AX 846 MOVL AX, ret+8(FP) 847 RET 848 849 // hash function using AES hardware instructions 850 TEXT runtime·aeshash(SB),NOSPLIT,$0-16 851 MOVL p+0(FP), AX // ptr to data 852 MOVL s+8(FP), BX // size 853 LEAL ret+12(FP), DX 854 JMP runtime·aeshashbody(SB) 855 856 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 857 MOVL p+0(FP), AX // ptr to string object 858 MOVL 4(AX), BX // length of string 859 MOVL (AX), AX // string data 860 LEAL ret+8(FP), DX 861 JMP runtime·aeshashbody(SB) 862 863 // AX: data 864 // BX: length 865 // DX: address to put return value 866 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0 867 MOVL h+4(FP), X0 // 32 bits of per-table hash seed 868 PINSRW $4, BX, X0 // 16 bits of length 869 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times 870 MOVO X0, X1 // save unscrambled seed 871 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 872 AESENC X0, X0 // scramble seed 873 874 CMPL BX, $16 875 JB aes0to15 876 JE aes16 877 CMPL BX, $32 878 JBE aes17to32 879 CMPL BX, $64 880 JBE aes33to64 881 JMP aes65plus 882 883 aes0to15: 884 TESTL BX, BX 885 JE aes0 886 887 ADDL $16, AX 888 TESTW $0xff0, AX 889 JE endofpage 890 891 // 16 bytes loaded at this address won't cross 892 // a page boundary, so we can load it directly. 893 MOVOU -16(AX), X1 894 ADDL BX, BX 895 PAND masks<>(SB)(BX*8), X1 896 897 final1: 898 AESENC X0, X1 // scramble input, xor in seed 899 AESENC X1, X1 // scramble combo 2 times 900 AESENC X1, X1 901 MOVL X1, (DX) 902 RET 903 904 endofpage: 905 // address ends in 1111xxxx. Might be up against 906 // a page boundary, so load ending at last byte. 907 // Then shift bytes down using pshufb. 908 MOVOU -32(AX)(BX*1), X1 909 ADDL BX, BX 910 PSHUFB shifts<>(SB)(BX*8), X1 911 JMP final1 912 913 aes0: 914 // Return scrambled input seed 915 AESENC X0, X0 916 MOVL X0, (DX) 917 RET 918 919 aes16: 920 MOVOU (AX), X1 921 JMP final1 922 923 aes17to32: 924 // make second starting seed 925 PXOR runtime·aeskeysched+16(SB), X1 926 AESENC X1, X1 927 928 // load data to be hashed 929 MOVOU (AX), X2 930 MOVOU -16(AX)(BX*1), X3 931 932 // scramble 3 times 933 AESENC X0, X2 934 AESENC X1, X3 935 AESENC X2, X2 936 AESENC X3, X3 937 AESENC X2, X2 938 AESENC X3, X3 939 940 // combine results 941 PXOR X3, X2 942 MOVL X2, (DX) 943 RET 944 945 aes33to64: 946 // make 3 more starting seeds 947 MOVO X1, X2 948 MOVO X1, X3 949 PXOR runtime·aeskeysched+16(SB), X1 950 PXOR runtime·aeskeysched+32(SB), X2 951 PXOR runtime·aeskeysched+48(SB), X3 952 AESENC X1, X1 953 AESENC X2, X2 954 AESENC X3, X3 955 956 MOVOU (AX), X4 957 MOVOU 16(AX), X5 958 MOVOU -32(AX)(BX*1), X6 959 MOVOU -16(AX)(BX*1), X7 960 961 AESENC X0, X4 962 AESENC X1, X5 963 AESENC X2, X6 964 AESENC X3, X7 965 966 AESENC X4, X4 967 AESENC X5, X5 968 AESENC X6, X6 969 AESENC X7, X7 970 971 AESENC X4, X4 972 AESENC X5, X5 973 AESENC X6, X6 974 AESENC X7, X7 975 976 PXOR X6, X4 977 PXOR X7, X5 978 PXOR X5, X4 979 MOVL X4, (DX) 980 RET 981 982 aes65plus: 983 // make 3 more starting seeds 984 MOVO X1, X2 985 MOVO X1, X3 986 PXOR runtime·aeskeysched+16(SB), X1 987 PXOR runtime·aeskeysched+32(SB), X2 988 PXOR runtime·aeskeysched+48(SB), X3 989 AESENC X1, X1 990 AESENC X2, X2 991 AESENC X3, X3 992 993 // start with last (possibly overlapping) block 994 MOVOU -64(AX)(BX*1), X4 995 MOVOU -48(AX)(BX*1), X5 996 MOVOU -32(AX)(BX*1), X6 997 MOVOU -16(AX)(BX*1), X7 998 999 // scramble state once 1000 AESENC X0, X4 1001 AESENC X1, X5 1002 AESENC X2, X6 1003 AESENC X3, X7 1004 1005 // compute number of remaining 64-byte blocks 1006 DECL BX 1007 SHRL $6, BX 1008 1009 aesloop: 1010 // scramble state, xor in a block 1011 MOVOU (AX), X0 1012 MOVOU 16(AX), X1 1013 MOVOU 32(AX), X2 1014 MOVOU 48(AX), X3 1015 AESENC X0, X4 1016 AESENC X1, X5 1017 AESENC X2, X6 1018 AESENC X3, X7 1019 1020 // scramble state 1021 AESENC X4, X4 1022 AESENC X5, X5 1023 AESENC X6, X6 1024 AESENC X7, X7 1025 1026 ADDL $64, AX 1027 DECL BX 1028 JNE aesloop 1029 1030 // 2 more scrambles to finish 1031 AESENC X4, X4 1032 AESENC X5, X5 1033 AESENC X6, X6 1034 AESENC X7, X7 1035 1036 AESENC X4, X4 1037 AESENC X5, X5 1038 AESENC X6, X6 1039 AESENC X7, X7 1040 1041 PXOR X6, X4 1042 PXOR X7, X5 1043 PXOR X5, X4 1044 MOVL X4, (DX) 1045 RET 1046 1047 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 1048 MOVL p+0(FP), AX // ptr to data 1049 MOVL h+4(FP), X0 // seed 1050 PINSRD $1, (AX), X0 // data 1051 AESENC runtime·aeskeysched+0(SB), X0 1052 AESENC runtime·aeskeysched+16(SB), X0 1053 AESENC runtime·aeskeysched+32(SB), X0 1054 MOVL X0, ret+8(FP) 1055 RET 1056 1057 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 1058 MOVL p+0(FP), AX // ptr to data 1059 MOVQ (AX), X0 // data 1060 PINSRD $2, h+4(FP), X0 // seed 1061 AESENC runtime·aeskeysched+0(SB), X0 1062 AESENC runtime·aeskeysched+16(SB), X0 1063 AESENC runtime·aeskeysched+32(SB), X0 1064 MOVL X0, ret+8(FP) 1065 RET 1066 1067 // simple mask to get rid of data in the high part of the register. 1068 DATA masks<>+0x00(SB)/4, $0x00000000 1069 DATA masks<>+0x04(SB)/4, $0x00000000 1070 DATA masks<>+0x08(SB)/4, $0x00000000 1071 DATA masks<>+0x0c(SB)/4, $0x00000000 1072 1073 DATA masks<>+0x10(SB)/4, $0x000000ff 1074 DATA masks<>+0x14(SB)/4, $0x00000000 1075 DATA masks<>+0x18(SB)/4, $0x00000000 1076 DATA masks<>+0x1c(SB)/4, $0x00000000 1077 1078 DATA masks<>+0x20(SB)/4, $0x0000ffff 1079 DATA masks<>+0x24(SB)/4, $0x00000000 1080 DATA masks<>+0x28(SB)/4, $0x00000000 1081 DATA masks<>+0x2c(SB)/4, $0x00000000 1082 1083 DATA masks<>+0x30(SB)/4, $0x00ffffff 1084 DATA masks<>+0x34(SB)/4, $0x00000000 1085 DATA masks<>+0x38(SB)/4, $0x00000000 1086 DATA masks<>+0x3c(SB)/4, $0x00000000 1087 1088 DATA masks<>+0x40(SB)/4, $0xffffffff 1089 DATA masks<>+0x44(SB)/4, $0x00000000 1090 DATA masks<>+0x48(SB)/4, $0x00000000 1091 DATA masks<>+0x4c(SB)/4, $0x00000000 1092 1093 DATA masks<>+0x50(SB)/4, $0xffffffff 1094 DATA masks<>+0x54(SB)/4, $0x000000ff 1095 DATA masks<>+0x58(SB)/4, $0x00000000 1096 DATA masks<>+0x5c(SB)/4, $0x00000000 1097 1098 DATA masks<>+0x60(SB)/4, $0xffffffff 1099 DATA masks<>+0x64(SB)/4, $0x0000ffff 1100 DATA masks<>+0x68(SB)/4, $0x00000000 1101 DATA masks<>+0x6c(SB)/4, $0x00000000 1102 1103 DATA masks<>+0x70(SB)/4, $0xffffffff 1104 DATA masks<>+0x74(SB)/4, $0x00ffffff 1105 DATA masks<>+0x78(SB)/4, $0x00000000 1106 DATA masks<>+0x7c(SB)/4, $0x00000000 1107 1108 DATA masks<>+0x80(SB)/4, $0xffffffff 1109 DATA masks<>+0x84(SB)/4, $0xffffffff 1110 DATA masks<>+0x88(SB)/4, $0x00000000 1111 DATA masks<>+0x8c(SB)/4, $0x00000000 1112 1113 DATA masks<>+0x90(SB)/4, $0xffffffff 1114 DATA masks<>+0x94(SB)/4, $0xffffffff 1115 DATA masks<>+0x98(SB)/4, $0x000000ff 1116 DATA masks<>+0x9c(SB)/4, $0x00000000 1117 1118 DATA masks<>+0xa0(SB)/4, $0xffffffff 1119 DATA masks<>+0xa4(SB)/4, $0xffffffff 1120 DATA masks<>+0xa8(SB)/4, $0x0000ffff 1121 DATA masks<>+0xac(SB)/4, $0x00000000 1122 1123 DATA masks<>+0xb0(SB)/4, $0xffffffff 1124 DATA masks<>+0xb4(SB)/4, $0xffffffff 1125 DATA masks<>+0xb8(SB)/4, $0x00ffffff 1126 DATA masks<>+0xbc(SB)/4, $0x00000000 1127 1128 DATA masks<>+0xc0(SB)/4, $0xffffffff 1129 DATA masks<>+0xc4(SB)/4, $0xffffffff 1130 DATA masks<>+0xc8(SB)/4, $0xffffffff 1131 DATA masks<>+0xcc(SB)/4, $0x00000000 1132 1133 DATA masks<>+0xd0(SB)/4, $0xffffffff 1134 DATA masks<>+0xd4(SB)/4, $0xffffffff 1135 DATA masks<>+0xd8(SB)/4, $0xffffffff 1136 DATA masks<>+0xdc(SB)/4, $0x000000ff 1137 1138 DATA masks<>+0xe0(SB)/4, $0xffffffff 1139 DATA masks<>+0xe4(SB)/4, $0xffffffff 1140 DATA masks<>+0xe8(SB)/4, $0xffffffff 1141 DATA masks<>+0xec(SB)/4, $0x0000ffff 1142 1143 DATA masks<>+0xf0(SB)/4, $0xffffffff 1144 DATA masks<>+0xf4(SB)/4, $0xffffffff 1145 DATA masks<>+0xf8(SB)/4, $0xffffffff 1146 DATA masks<>+0xfc(SB)/4, $0x00ffffff 1147 1148 GLOBL masks<>(SB),RODATA,$256 1149 1150 // these are arguments to pshufb. They move data down from 1151 // the high bytes of the register to the low bytes of the register. 1152 // index is how many bytes to move. 1153 DATA shifts<>+0x00(SB)/4, $0x00000000 1154 DATA shifts<>+0x04(SB)/4, $0x00000000 1155 DATA shifts<>+0x08(SB)/4, $0x00000000 1156 DATA shifts<>+0x0c(SB)/4, $0x00000000 1157 1158 DATA shifts<>+0x10(SB)/4, $0xffffff0f 1159 DATA shifts<>+0x14(SB)/4, $0xffffffff 1160 DATA shifts<>+0x18(SB)/4, $0xffffffff 1161 DATA shifts<>+0x1c(SB)/4, $0xffffffff 1162 1163 DATA shifts<>+0x20(SB)/4, $0xffff0f0e 1164 DATA shifts<>+0x24(SB)/4, $0xffffffff 1165 DATA shifts<>+0x28(SB)/4, $0xffffffff 1166 DATA shifts<>+0x2c(SB)/4, $0xffffffff 1167 1168 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d 1169 DATA shifts<>+0x34(SB)/4, $0xffffffff 1170 DATA shifts<>+0x38(SB)/4, $0xffffffff 1171 DATA shifts<>+0x3c(SB)/4, $0xffffffff 1172 1173 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c 1174 DATA shifts<>+0x44(SB)/4, $0xffffffff 1175 DATA shifts<>+0x48(SB)/4, $0xffffffff 1176 DATA shifts<>+0x4c(SB)/4, $0xffffffff 1177 1178 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b 1179 DATA shifts<>+0x54(SB)/4, $0xffffff0f 1180 DATA shifts<>+0x58(SB)/4, $0xffffffff 1181 DATA shifts<>+0x5c(SB)/4, $0xffffffff 1182 1183 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a 1184 DATA shifts<>+0x64(SB)/4, $0xffff0f0e 1185 DATA shifts<>+0x68(SB)/4, $0xffffffff 1186 DATA shifts<>+0x6c(SB)/4, $0xffffffff 1187 1188 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09 1189 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d 1190 DATA shifts<>+0x78(SB)/4, $0xffffffff 1191 DATA shifts<>+0x7c(SB)/4, $0xffffffff 1192 1193 DATA shifts<>+0x80(SB)/4, $0x0b0a0908 1194 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c 1195 DATA shifts<>+0x88(SB)/4, $0xffffffff 1196 DATA shifts<>+0x8c(SB)/4, $0xffffffff 1197 1198 DATA shifts<>+0x90(SB)/4, $0x0a090807 1199 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b 1200 DATA shifts<>+0x98(SB)/4, $0xffffff0f 1201 DATA shifts<>+0x9c(SB)/4, $0xffffffff 1202 1203 DATA shifts<>+0xa0(SB)/4, $0x09080706 1204 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a 1205 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e 1206 DATA shifts<>+0xac(SB)/4, $0xffffffff 1207 1208 DATA shifts<>+0xb0(SB)/4, $0x08070605 1209 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09 1210 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d 1211 DATA shifts<>+0xbc(SB)/4, $0xffffffff 1212 1213 DATA shifts<>+0xc0(SB)/4, $0x07060504 1214 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908 1215 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c 1216 DATA shifts<>+0xcc(SB)/4, $0xffffffff 1217 1218 DATA shifts<>+0xd0(SB)/4, $0x06050403 1219 DATA shifts<>+0xd4(SB)/4, $0x0a090807 1220 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b 1221 DATA shifts<>+0xdc(SB)/4, $0xffffff0f 1222 1223 DATA shifts<>+0xe0(SB)/4, $0x05040302 1224 DATA shifts<>+0xe4(SB)/4, $0x09080706 1225 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a 1226 DATA shifts<>+0xec(SB)/4, $0xffff0f0e 1227 1228 DATA shifts<>+0xf0(SB)/4, $0x04030201 1229 DATA shifts<>+0xf4(SB)/4, $0x08070605 1230 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 1231 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d 1232 1233 GLOBL shifts<>(SB),RODATA,$256 1234 1235 TEXT ·checkASM(SB),NOSPLIT,$0-1 1236 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1237 MOVL $masks<>(SB), AX 1238 MOVL $shifts<>(SB), BX 1239 ORL BX, AX 1240 TESTL $15, AX 1241 SETEQ ret+0(FP) 1242 RET 1243 1244 // memequal(p, q unsafe.Pointer, size uintptr) bool 1245 TEXT runtime·memequal(SB),NOSPLIT,$0-13 1246 MOVL a+0(FP), SI 1247 MOVL b+4(FP), DI 1248 CMPL SI, DI 1249 JEQ eq 1250 MOVL size+8(FP), BX 1251 LEAL ret+12(FP), AX 1252 JMP runtime·memeqbody(SB) 1253 eq: 1254 MOVB $1, ret+12(FP) 1255 RET 1256 1257 // memequal_varlen(a, b unsafe.Pointer) bool 1258 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 1259 MOVL a+0(FP), SI 1260 MOVL b+4(FP), DI 1261 CMPL SI, DI 1262 JEQ eq 1263 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 1264 LEAL ret+8(FP), AX 1265 JMP runtime·memeqbody(SB) 1266 eq: 1267 MOVB $1, ret+8(FP) 1268 RET 1269 1270 // eqstring tests whether two strings are equal. 1271 // The compiler guarantees that strings passed 1272 // to eqstring have equal length. 1273 // See runtime_test.go:eqstring_generic for 1274 // equivalent Go code. 1275 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 1276 MOVL s1_base+0(FP), SI 1277 MOVL s2_base+8(FP), DI 1278 CMPL SI, DI 1279 JEQ same 1280 MOVL s1_len+4(FP), BX 1281 LEAL ret+16(FP), AX 1282 JMP runtime·memeqbody(SB) 1283 same: 1284 MOVB $1, ret+16(FP) 1285 RET 1286 1287 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1288 MOVL a_len+4(FP), BX 1289 MOVL b_len+16(FP), CX 1290 CMPL BX, CX 1291 JNE eqret 1292 MOVL a+0(FP), SI 1293 MOVL b+12(FP), DI 1294 LEAL ret+24(FP), AX 1295 JMP runtime·memeqbody(SB) 1296 eqret: 1297 MOVB $0, ret+24(FP) 1298 RET 1299 1300 // a in SI 1301 // b in DI 1302 // count in BX 1303 // address of result byte in AX 1304 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 1305 CMPL BX, $4 1306 JB small 1307 1308 // 64 bytes at a time using xmm registers 1309 hugeloop: 1310 CMPL BX, $64 1311 JB bigloop 1312 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1313 JE bigloop 1314 MOVOU (SI), X0 1315 MOVOU (DI), X1 1316 MOVOU 16(SI), X2 1317 MOVOU 16(DI), X3 1318 MOVOU 32(SI), X4 1319 MOVOU 32(DI), X5 1320 MOVOU 48(SI), X6 1321 MOVOU 48(DI), X7 1322 PCMPEQB X1, X0 1323 PCMPEQB X3, X2 1324 PCMPEQB X5, X4 1325 PCMPEQB X7, X6 1326 PAND X2, X0 1327 PAND X6, X4 1328 PAND X4, X0 1329 PMOVMSKB X0, DX 1330 ADDL $64, SI 1331 ADDL $64, DI 1332 SUBL $64, BX 1333 CMPL DX, $0xffff 1334 JEQ hugeloop 1335 MOVB $0, (AX) 1336 RET 1337 1338 // 4 bytes at a time using 32-bit register 1339 bigloop: 1340 CMPL BX, $4 1341 JBE leftover 1342 MOVL (SI), CX 1343 MOVL (DI), DX 1344 ADDL $4, SI 1345 ADDL $4, DI 1346 SUBL $4, BX 1347 CMPL CX, DX 1348 JEQ bigloop 1349 MOVB $0, (AX) 1350 RET 1351 1352 // remaining 0-4 bytes 1353 leftover: 1354 MOVL -4(SI)(BX*1), CX 1355 MOVL -4(DI)(BX*1), DX 1356 CMPL CX, DX 1357 SETEQ (AX) 1358 RET 1359 1360 small: 1361 CMPL BX, $0 1362 JEQ equal 1363 1364 LEAL 0(BX*8), CX 1365 NEGL CX 1366 1367 MOVL SI, DX 1368 CMPB DX, $0xfc 1369 JA si_high 1370 1371 // load at SI won't cross a page boundary. 1372 MOVL (SI), SI 1373 JMP si_finish 1374 si_high: 1375 // address ends in 111111xx. Load up to bytes we want, move to correct position. 1376 MOVL -4(SI)(BX*1), SI 1377 SHRL CX, SI 1378 si_finish: 1379 1380 // same for DI. 1381 MOVL DI, DX 1382 CMPB DX, $0xfc 1383 JA di_high 1384 MOVL (DI), DI 1385 JMP di_finish 1386 di_high: 1387 MOVL -4(DI)(BX*1), DI 1388 SHRL CX, DI 1389 di_finish: 1390 1391 SUBL SI, DI 1392 SHLL CX, DI 1393 equal: 1394 SETEQ (AX) 1395 RET 1396 1397 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 1398 MOVL s1_base+0(FP), SI 1399 MOVL s1_len+4(FP), BX 1400 MOVL s2_base+8(FP), DI 1401 MOVL s2_len+12(FP), DX 1402 LEAL ret+16(FP), AX 1403 JMP runtime·cmpbody(SB) 1404 1405 TEXT bytes·Compare(SB),NOSPLIT,$0-28 1406 MOVL s1+0(FP), SI 1407 MOVL s1+4(FP), BX 1408 MOVL s2+12(FP), DI 1409 MOVL s2+16(FP), DX 1410 LEAL ret+24(FP), AX 1411 JMP runtime·cmpbody(SB) 1412 1413 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 1414 MOVL s+0(FP), SI 1415 MOVL s_len+4(FP), CX 1416 MOVB c+12(FP), AL 1417 MOVL SI, DI 1418 CLD; REPN; SCASB 1419 JZ 3(PC) 1420 MOVL $-1, ret+16(FP) 1421 RET 1422 SUBL SI, DI 1423 SUBL $1, DI 1424 MOVL DI, ret+16(FP) 1425 RET 1426 1427 TEXT strings·IndexByte(SB),NOSPLIT,$0-16 1428 MOVL s+0(FP), SI 1429 MOVL s_len+4(FP), CX 1430 MOVB c+8(FP), AL 1431 MOVL SI, DI 1432 CLD; REPN; SCASB 1433 JZ 3(PC) 1434 MOVL $-1, ret+12(FP) 1435 RET 1436 SUBL SI, DI 1437 SUBL $1, DI 1438 MOVL DI, ret+12(FP) 1439 RET 1440 1441 // input: 1442 // SI = a 1443 // DI = b 1444 // BX = alen 1445 // DX = blen 1446 // AX = address of return word (set to 1/0/-1) 1447 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 1448 MOVL DX, BP 1449 SUBL BX, DX // DX = blen-alen 1450 JLE 2(PC) 1451 MOVL BX, BP // BP = min(alen, blen) 1452 CMPL SI, DI 1453 JEQ allsame 1454 CMPL BP, $4 1455 JB small 1456 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 1457 JE mediumloop 1458 largeloop: 1459 CMPL BP, $16 1460 JB mediumloop 1461 MOVOU (SI), X0 1462 MOVOU (DI), X1 1463 PCMPEQB X0, X1 1464 PMOVMSKB X1, BX 1465 XORL $0xffff, BX // convert EQ to NE 1466 JNE diff16 // branch if at least one byte is not equal 1467 ADDL $16, SI 1468 ADDL $16, DI 1469 SUBL $16, BP 1470 JMP largeloop 1471 1472 diff16: 1473 BSFL BX, BX // index of first byte that differs 1474 XORL DX, DX 1475 MOVB (SI)(BX*1), CX 1476 CMPB CX, (DI)(BX*1) 1477 SETHI DX 1478 LEAL -1(DX*2), DX // convert 1/0 to +1/-1 1479 MOVL DX, (AX) 1480 RET 1481 1482 mediumloop: 1483 CMPL BP, $4 1484 JBE _0through4 1485 MOVL (SI), BX 1486 MOVL (DI), CX 1487 CMPL BX, CX 1488 JNE diff4 1489 ADDL $4, SI 1490 ADDL $4, DI 1491 SUBL $4, BP 1492 JMP mediumloop 1493 1494 _0through4: 1495 MOVL -4(SI)(BP*1), BX 1496 MOVL -4(DI)(BP*1), CX 1497 CMPL BX, CX 1498 JEQ allsame 1499 1500 diff4: 1501 BSWAPL BX // reverse order of bytes 1502 BSWAPL CX 1503 XORL BX, CX // find bit differences 1504 BSRL CX, CX // index of highest bit difference 1505 SHRL CX, BX // move a's bit to bottom 1506 ANDL $1, BX // mask bit 1507 LEAL -1(BX*2), BX // 1/0 => +1/-1 1508 MOVL BX, (AX) 1509 RET 1510 1511 // 0-3 bytes in common 1512 small: 1513 LEAL (BP*8), CX 1514 NEGL CX 1515 JEQ allsame 1516 1517 // load si 1518 CMPB SI, $0xfc 1519 JA si_high 1520 MOVL (SI), SI 1521 JMP si_finish 1522 si_high: 1523 MOVL -4(SI)(BP*1), SI 1524 SHRL CX, SI 1525 si_finish: 1526 SHLL CX, SI 1527 1528 // same for di 1529 CMPB DI, $0xfc 1530 JA di_high 1531 MOVL (DI), DI 1532 JMP di_finish 1533 di_high: 1534 MOVL -4(DI)(BP*1), DI 1535 SHRL CX, DI 1536 di_finish: 1537 SHLL CX, DI 1538 1539 BSWAPL SI // reverse order of bytes 1540 BSWAPL DI 1541 XORL SI, DI // find bit differences 1542 JEQ allsame 1543 BSRL DI, CX // index of highest bit difference 1544 SHRL CX, SI // move a's bit to bottom 1545 ANDL $1, SI // mask bit 1546 LEAL -1(SI*2), BX // 1/0 => +1/-1 1547 MOVL BX, (AX) 1548 RET 1549 1550 // all the bytes in common are the same, so we just need 1551 // to compare the lengths. 1552 allsame: 1553 XORL BX, BX 1554 XORL CX, CX 1555 TESTL DX, DX 1556 SETLT BX // 1 if alen > blen 1557 SETEQ CX // 1 if alen == blen 1558 LEAL -1(CX)(BX*2), BX // 1,0,-1 result 1559 MOVL BX, (AX) 1560 RET 1561 1562 TEXT runtime·return0(SB), NOSPLIT, $0 1563 MOVL $0, AX 1564 RET 1565 1566 // Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1567 // Must obey the gcc calling convention. 1568 TEXT _cgo_topofstack(SB),NOSPLIT,$0 1569 get_tls(CX) 1570 MOVL g(CX), AX 1571 MOVL g_m(AX), AX 1572 MOVL m_curg(AX), AX 1573 MOVL (g_stack+stack_hi)(AX), AX 1574 RET 1575 1576 // The top-most function running on a goroutine 1577 // returns to goexit+PCQuantum. 1578 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1579 BYTE $0x90 // NOP 1580 CALL runtime·goexit1(SB) // does not return 1581 // traceback from goexit1 must hit code range of goexit 1582 BYTE $0x90 // NOP 1583 1584 // Prefetching doesn't seem to help. 1585 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1586 RET 1587 1588 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1589 RET 1590 1591 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1592 RET 1593 1594 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1595 RET 1596 1597 // Add a module's moduledata to the linked list of moduledata objects. This 1598 // is called from .init_array by a function generated in the linker and so 1599 // follows the platform ABI wrt register preservation -- it only touches AX, 1600 // CX (implicitly) and DX, but it does not follow the ABI wrt arguments: 1601 // instead the pointer to the moduledata is passed in AX. 1602 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1603 MOVL runtime·lastmoduledatap(SB), DX 1604 MOVL AX, moduledata_next(DX) 1605 MOVL AX, runtime·lastmoduledatap(SB) 1606 RET 1607 1608 TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12 1609 MOVL a+0(FP), AX 1610 MOVL AX, 0(SP) 1611 MOVL $0, 4(SP) 1612 FMOVV 0(SP), F0 1613 FMOVDP F0, ret+4(FP) 1614 RET 1615 1616 TEXT runtime·float64touint32(SB),NOSPLIT,$12-12 1617 FMOVD a+0(FP), F0 1618 FSTCW 0(SP) 1619 FLDCW runtime·controlWord64trunc(SB) 1620 FMOVVP F0, 4(SP) 1621 FLDCW 0(SP) 1622 MOVL 4(SP), AX 1623 MOVL AX, ret+8(FP) 1624 RET