github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/runtime/asm_amd64p32.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 MOVL SP, CX 15 SUBL $128, SP // plenty of scratch 16 ANDL $~15, CX 17 MOVL CX, SP 18 19 MOVL AX, 16(SP) 20 MOVL BX, 24(SP) 21 22 // create istack out of the given (operating system) stack. 23 MOVL $runtime·g0(SB), DI 24 LEAL (-64*1024+104)(SP), BX 25 MOVL BX, g_stackguard0(DI) 26 MOVL BX, g_stackguard1(DI) 27 MOVL BX, (g_stack+stack_lo)(DI) 28 MOVL SP, (g_stack+stack_hi)(DI) 29 30 // find out information about the processor we're on 31 MOVQ $0, AX 32 CPUID 33 CMPQ AX, $0 34 JE nocpuinfo 35 MOVQ $1, AX 36 CPUID 37 MOVL CX, runtime·cpuid_ecx(SB) 38 MOVL DX, runtime·cpuid_edx(SB) 39 nocpuinfo: 40 41 needtls: 42 LEAL runtime·m0+m_tls(SB), DI 43 CALL runtime·settls(SB) 44 45 // store through it, to make sure it works 46 get_tls(BX) 47 MOVQ $0x123, g(BX) 48 MOVQ runtime·m0+m_tls(SB), AX 49 CMPQ AX, $0x123 50 JEQ 2(PC) 51 MOVL AX, 0 // abort 52 ok: 53 // set the per-goroutine and per-mach "registers" 54 get_tls(BX) 55 LEAL runtime·g0(SB), CX 56 MOVL CX, g(BX) 57 LEAL runtime·m0(SB), AX 58 59 // save m->g0 = g0 60 MOVL CX, m_g0(AX) 61 // save m0 to g0->m 62 MOVL AX, g_m(CX) 63 64 CLD // convention is D is always left cleared 65 CALL runtime·check(SB) 66 67 MOVL 16(SP), AX // copy argc 68 MOVL AX, 0(SP) 69 MOVL 24(SP), AX // copy argv 70 MOVL AX, 4(SP) 71 CALL runtime·args(SB) 72 CALL runtime·osinit(SB) 73 CALL runtime·schedinit(SB) 74 75 // create a new goroutine to start program 76 MOVL $runtime·mainPC(SB), AX // entry 77 MOVL $0, 0(SP) 78 MOVL AX, 4(SP) 79 CALL runtime·newproc(SB) 80 81 // start this M 82 CALL runtime·mstart(SB) 83 84 MOVL $0xf1, 0xf1 // crash 85 RET 86 87 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 88 GLOBL runtime·mainPC(SB),RODATA,$4 89 90 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 91 INT $3 92 RET 93 94 TEXT runtime·asminit(SB),NOSPLIT,$0-0 95 // No per-thread init. 96 RET 97 98 /* 99 * go-routine 100 */ 101 102 // void gosave(Gobuf*) 103 // save state in Gobuf; setjmp 104 TEXT runtime·gosave(SB), NOSPLIT, $0-4 105 MOVL buf+0(FP), AX // gobuf 106 LEAL buf+0(FP), BX // caller's SP 107 MOVL BX, gobuf_sp(AX) 108 MOVL 0(SP), BX // caller's PC 109 MOVL BX, gobuf_pc(AX) 110 MOVL $0, gobuf_ctxt(AX) 111 MOVQ $0, gobuf_ret(AX) 112 get_tls(CX) 113 MOVL g(CX), BX 114 MOVL BX, gobuf_g(AX) 115 RET 116 117 // void gogo(Gobuf*) 118 // restore state from Gobuf; longjmp 119 TEXT runtime·gogo(SB), NOSPLIT, $0-4 120 MOVL buf+0(FP), BX // gobuf 121 MOVL gobuf_g(BX), DX 122 MOVL 0(DX), CX // make sure g != nil 123 get_tls(CX) 124 MOVL DX, g(CX) 125 MOVL gobuf_sp(BX), SP // restore SP 126 MOVL gobuf_ctxt(BX), DX 127 MOVQ gobuf_ret(BX), AX 128 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 129 MOVQ $0, gobuf_ret(BX) 130 MOVL $0, gobuf_ctxt(BX) 131 MOVL gobuf_pc(BX), BX 132 JMP BX 133 134 // func mcall(fn func(*g)) 135 // Switch to m->g0's stack, call fn(g). 136 // Fn must never return. It should gogo(&g->sched) 137 // to keep running g. 138 TEXT runtime·mcall(SB), NOSPLIT, $0-4 139 MOVL fn+0(FP), DI 140 141 get_tls(CX) 142 MOVL g(CX), AX // save state in g->sched 143 MOVL 0(SP), BX // caller's PC 144 MOVL BX, (g_sched+gobuf_pc)(AX) 145 LEAL fn+0(FP), BX // caller's SP 146 MOVL BX, (g_sched+gobuf_sp)(AX) 147 MOVL AX, (g_sched+gobuf_g)(AX) 148 149 // switch to m->g0 & its stack, call fn 150 MOVL g(CX), BX 151 MOVL g_m(BX), BX 152 MOVL m_g0(BX), SI 153 CMPL SI, AX // if g == m->g0 call badmcall 154 JNE 3(PC) 155 MOVL $runtime·badmcall(SB), AX 156 JMP AX 157 MOVL SI, g(CX) // g = m->g0 158 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 159 PUSHQ AX 160 MOVL DI, DX 161 MOVL 0(DI), DI 162 CALL DI 163 POPQ AX 164 MOVL $runtime·badmcall2(SB), AX 165 JMP AX 166 RET 167 168 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 169 // of the G stack. We need to distinguish the routine that 170 // lives at the bottom of the G stack from the one that lives 171 // at the top of the system stack because the one at the top of 172 // the system stack terminates the stack walk (see topofstack()). 173 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 174 RET 175 176 // func systemstack(fn func()) 177 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 178 MOVL fn+0(FP), DI // DI = fn 179 get_tls(CX) 180 MOVL g(CX), AX // AX = g 181 MOVL g_m(AX), BX // BX = m 182 183 MOVL m_gsignal(BX), DX // DX = gsignal 184 CMPL AX, DX 185 JEQ noswitch 186 187 MOVL m_g0(BX), DX // DX = g0 188 CMPL AX, DX 189 JEQ noswitch 190 191 MOVL m_curg(BX), R8 192 CMPL AX, R8 193 JEQ switch 194 195 // Not g0, not curg. Must be gsignal, but that's not allowed. 196 // Hide call from linker nosplit analysis. 197 MOVL $runtime·badsystemstack(SB), AX 198 CALL AX 199 200 switch: 201 // save our state in g->sched. Pretend to 202 // be systemstack_switch if the G stack is scanned. 203 MOVL $runtime·systemstack_switch(SB), SI 204 MOVL SI, (g_sched+gobuf_pc)(AX) 205 MOVL SP, (g_sched+gobuf_sp)(AX) 206 MOVL AX, (g_sched+gobuf_g)(AX) 207 208 // switch to g0 209 MOVL DX, g(CX) 210 MOVL (g_sched+gobuf_sp)(DX), SP 211 212 // call target function 213 MOVL DI, DX 214 MOVL 0(DI), DI 215 CALL DI 216 217 // switch back to g 218 get_tls(CX) 219 MOVL g(CX), AX 220 MOVL g_m(AX), BX 221 MOVL m_curg(BX), AX 222 MOVL AX, g(CX) 223 MOVL (g_sched+gobuf_sp)(AX), SP 224 MOVL $0, (g_sched+gobuf_sp)(AX) 225 RET 226 227 noswitch: 228 // already on m stack, just call directly 229 MOVL DI, DX 230 MOVL 0(DI), DI 231 CALL DI 232 RET 233 234 /* 235 * support for morestack 236 */ 237 238 // Called during function prolog when more stack is needed. 239 // 240 // The traceback routines see morestack on a g0 as being 241 // the top of a stack (for example, morestack calling newstack 242 // calling the scheduler calling newm calling gc), so we must 243 // record an argument size. For that purpose, it has no arguments. 244 TEXT runtime·morestack(SB),NOSPLIT,$0-0 245 get_tls(CX) 246 MOVL g(CX), BX 247 MOVL g_m(BX), BX 248 249 // Cannot grow scheduler stack (m->g0). 250 MOVL m_g0(BX), SI 251 CMPL g(CX), SI 252 JNE 2(PC) 253 MOVL 0, AX 254 255 // Cannot grow signal stack (m->gsignal). 256 MOVL m_gsignal(BX), SI 257 CMPL g(CX), SI 258 JNE 2(PC) 259 MOVL 0, AX 260 261 // Called from f. 262 // Set m->morebuf to f's caller. 263 MOVL 8(SP), AX // f's caller's PC 264 MOVL AX, (m_morebuf+gobuf_pc)(BX) 265 LEAL 16(SP), AX // f's caller's SP 266 MOVL AX, (m_morebuf+gobuf_sp)(BX) 267 get_tls(CX) 268 MOVL g(CX), SI 269 MOVL SI, (m_morebuf+gobuf_g)(BX) 270 271 // Set g->sched to context in f. 272 MOVL 0(SP), AX // f's PC 273 MOVL AX, (g_sched+gobuf_pc)(SI) 274 MOVL SI, (g_sched+gobuf_g)(SI) 275 LEAL 8(SP), AX // f's SP 276 MOVL AX, (g_sched+gobuf_sp)(SI) 277 MOVL DX, (g_sched+gobuf_ctxt)(SI) 278 279 // Call newstack on m->g0's stack. 280 MOVL m_g0(BX), BX 281 MOVL BX, g(CX) 282 MOVL (g_sched+gobuf_sp)(BX), SP 283 CALL runtime·newstack(SB) 284 MOVL $0, 0x1003 // crash if newstack returns 285 RET 286 287 // morestack trampolines 288 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 289 MOVL $0, DX 290 JMP runtime·morestack(SB) 291 292 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 293 // We came here via a RET to an overwritten return PC. 294 // AX may be live. Other registers are available. 295 296 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 297 get_tls(CX) 298 MOVL g(CX), CX 299 MOVL (g_stkbar+slice_array)(CX), DX 300 MOVL g_stkbarPos(CX), BX 301 IMULL $stkbar__size, BX // Too big for SIB. 302 ADDL DX, BX 303 MOVL stkbar_savedLRVal(BX), BX 304 // Record that this stack barrier was hit. 305 ADDL $1, g_stkbarPos(CX) 306 // Jump to the original return PC. 307 JMP BX 308 309 // reflectcall: call a function with the given argument list 310 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 311 // we don't have variable-sized frames, so we use a small number 312 // of constant-sized-frame functions to encode a few bits of size in the pc. 313 // Caution: ugly multiline assembly macros in your future! 314 315 #define DISPATCH(NAME,MAXSIZE) \ 316 CMPL CX, $MAXSIZE; \ 317 JA 3(PC); \ 318 MOVL $NAME(SB), AX; \ 319 JMP AX 320 // Note: can't just "JMP NAME(SB)" - bad inlining results. 321 322 TEXT reflect·call(SB), NOSPLIT, $0-0 323 JMP ·reflectcall(SB) 324 325 TEXT ·reflectcall(SB), NOSPLIT, $0-20 326 MOVLQZX argsize+12(FP), CX 327 DISPATCH(runtime·call16, 16) 328 DISPATCH(runtime·call32, 32) 329 DISPATCH(runtime·call64, 64) 330 DISPATCH(runtime·call128, 128) 331 DISPATCH(runtime·call256, 256) 332 DISPATCH(runtime·call512, 512) 333 DISPATCH(runtime·call1024, 1024) 334 DISPATCH(runtime·call2048, 2048) 335 DISPATCH(runtime·call4096, 4096) 336 DISPATCH(runtime·call8192, 8192) 337 DISPATCH(runtime·call16384, 16384) 338 DISPATCH(runtime·call32768, 32768) 339 DISPATCH(runtime·call65536, 65536) 340 DISPATCH(runtime·call131072, 131072) 341 DISPATCH(runtime·call262144, 262144) 342 DISPATCH(runtime·call524288, 524288) 343 DISPATCH(runtime·call1048576, 1048576) 344 DISPATCH(runtime·call2097152, 2097152) 345 DISPATCH(runtime·call4194304, 4194304) 346 DISPATCH(runtime·call8388608, 8388608) 347 DISPATCH(runtime·call16777216, 16777216) 348 DISPATCH(runtime·call33554432, 33554432) 349 DISPATCH(runtime·call67108864, 67108864) 350 DISPATCH(runtime·call134217728, 134217728) 351 DISPATCH(runtime·call268435456, 268435456) 352 DISPATCH(runtime·call536870912, 536870912) 353 DISPATCH(runtime·call1073741824, 1073741824) 354 MOVL $runtime·badreflectcall(SB), AX 355 JMP AX 356 357 #define CALLFN(NAME,MAXSIZE) \ 358 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 359 NO_LOCAL_POINTERS; \ 360 /* copy arguments to stack */ \ 361 MOVL argptr+8(FP), SI; \ 362 MOVL argsize+12(FP), CX; \ 363 MOVL SP, DI; \ 364 REP;MOVSB; \ 365 /* call function */ \ 366 MOVL f+4(FP), DX; \ 367 MOVL (DX), AX; \ 368 CALL AX; \ 369 /* copy return values back */ \ 370 MOVL argptr+8(FP), DI; \ 371 MOVL argsize+12(FP), CX; \ 372 MOVL retoffset+16(FP), BX; \ 373 MOVL SP, SI; \ 374 ADDL BX, DI; \ 375 ADDL BX, SI; \ 376 SUBL BX, CX; \ 377 REP;MOVSB; \ 378 /* execute write barrier updates */ \ 379 MOVL argtype+0(FP), DX; \ 380 MOVL argptr+8(FP), DI; \ 381 MOVL argsize+12(FP), CX; \ 382 MOVL retoffset+16(FP), BX; \ 383 MOVL DX, 0(SP); \ 384 MOVL DI, 4(SP); \ 385 MOVL CX, 8(SP); \ 386 MOVL BX, 12(SP); \ 387 CALL runtime·callwritebarrier(SB); \ 388 RET 389 390 CALLFN(·call16, 16) 391 CALLFN(·call32, 32) 392 CALLFN(·call64, 64) 393 CALLFN(·call128, 128) 394 CALLFN(·call256, 256) 395 CALLFN(·call512, 512) 396 CALLFN(·call1024, 1024) 397 CALLFN(·call2048, 2048) 398 CALLFN(·call4096, 4096) 399 CALLFN(·call8192, 8192) 400 CALLFN(·call16384, 16384) 401 CALLFN(·call32768, 32768) 402 CALLFN(·call65536, 65536) 403 CALLFN(·call131072, 131072) 404 CALLFN(·call262144, 262144) 405 CALLFN(·call524288, 524288) 406 CALLFN(·call1048576, 1048576) 407 CALLFN(·call2097152, 2097152) 408 CALLFN(·call4194304, 4194304) 409 CALLFN(·call8388608, 8388608) 410 CALLFN(·call16777216, 16777216) 411 CALLFN(·call33554432, 33554432) 412 CALLFN(·call67108864, 67108864) 413 CALLFN(·call134217728, 134217728) 414 CALLFN(·call268435456, 268435456) 415 CALLFN(·call536870912, 536870912) 416 CALLFN(·call1073741824, 1073741824) 417 418 TEXT runtime·procyield(SB),NOSPLIT,$0-0 419 MOVL cycles+0(FP), AX 420 again: 421 PAUSE 422 SUBL $1, AX 423 JNZ again 424 RET 425 426 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 427 // Stores are already ordered on x86, so this is just a 428 // compile barrier. 429 RET 430 431 // void jmpdefer(fn, sp); 432 // called from deferreturn. 433 // 1. pop the caller 434 // 2. sub 5 bytes from the callers return 435 // 3. jmp to the argument 436 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 437 MOVL fv+0(FP), DX 438 MOVL argp+4(FP), BX 439 LEAL -8(BX), SP // caller sp after CALL 440 SUBL $5, (SP) // return to CALL again 441 MOVL 0(DX), BX 442 JMP BX // but first run the deferred function 443 444 // func asmcgocall(fn, arg unsafe.Pointer) int32 445 // Not implemented. 446 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12 447 MOVL 0, AX 448 RET 449 450 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 451 // Not implemented. 452 TEXT runtime·cgocallback(SB),NOSPLIT,$0-12 453 MOVL 0, AX 454 RET 455 456 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 457 // Not implemented. 458 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-12 459 MOVL 0, AX 460 RET 461 462 // void setg(G*); set g. for use by needm. 463 // Not implemented. 464 TEXT runtime·setg(SB), NOSPLIT, $0-4 465 MOVL 0, AX 466 RET 467 468 // check that SP is in range [g->stack.lo, g->stack.hi) 469 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 470 get_tls(CX) 471 MOVL g(CX), AX 472 CMPL (g_stack+stack_hi)(AX), SP 473 JHI 2(PC) 474 MOVL 0, AX 475 CMPL SP, (g_stack+stack_lo)(AX) 476 JHI 2(PC) 477 MOVL 0, AX 478 RET 479 480 TEXT runtime·memclr(SB),NOSPLIT,$0-8 481 MOVL ptr+0(FP), DI 482 MOVL n+4(FP), CX 483 MOVQ CX, BX 484 ANDQ $3, BX 485 SHRQ $2, CX 486 MOVQ $0, AX 487 CLD 488 REP 489 STOSL 490 MOVQ BX, CX 491 REP 492 STOSB 493 // Note: we zero only 4 bytes at a time so that the tail is at most 494 // 3 bytes. That guarantees that we aren't zeroing pointers with STOSB. 495 // See issue 13160. 496 RET 497 498 TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12 499 MOVL argp+0(FP),AX // addr of first arg 500 MOVL -8(AX),AX // get calling pc 501 CMPL AX, runtime·stackBarrierPC(SB) 502 JNE nobar 503 // Get original return PC. 504 CALL runtime·nextBarrierPC(SB) 505 MOVL 0(SP), AX 506 nobar: 507 MOVL AX, ret+8(FP) 508 RET 509 510 TEXT runtime·setcallerpc(SB),NOSPLIT,$8-8 511 MOVL argp+0(FP),AX // addr of first arg 512 MOVL pc+4(FP), BX // pc to set 513 MOVL -8(AX), CX 514 CMPL CX, runtime·stackBarrierPC(SB) 515 JEQ setbar 516 MOVQ BX, -8(AX) // set calling pc 517 RET 518 setbar: 519 // Set the stack barrier return PC. 520 MOVL BX, 0(SP) 521 CALL runtime·setNextBarrierPC(SB) 522 RET 523 524 TEXT runtime·getcallersp(SB),NOSPLIT,$0-12 525 MOVL argp+0(FP), AX 526 MOVL AX, ret+8(FP) 527 RET 528 529 // int64 runtime·cputicks(void) 530 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 531 RDTSC 532 SHLQ $32, DX 533 ADDQ DX, AX 534 MOVQ AX, ret+0(FP) 535 RET 536 537 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 538 // redirects to memhash(p, h, size) using the size 539 // stored in the closure. 540 TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12 541 GO_ARGS 542 NO_LOCAL_POINTERS 543 MOVL p+0(FP), AX 544 MOVL h+4(FP), BX 545 MOVL 4(DX), CX 546 MOVL AX, 0(SP) 547 MOVL BX, 4(SP) 548 MOVL CX, 8(SP) 549 CALL runtime·memhash(SB) 550 MOVL 16(SP), AX 551 MOVL AX, ret+8(FP) 552 RET 553 554 // hash function using AES hardware instructions 555 // For now, our one amd64p32 system (NaCl) does not 556 // support using AES instructions, so have not bothered to 557 // write the implementations. Can copy and adjust the ones 558 // in asm_amd64.s when the time comes. 559 560 TEXT runtime·aeshash(SB),NOSPLIT,$0-20 561 MOVL AX, ret+16(FP) 562 RET 563 564 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20 565 MOVL AX, ret+16(FP) 566 RET 567 568 TEXT runtime·aeshash32(SB),NOSPLIT,$0-20 569 MOVL AX, ret+16(FP) 570 RET 571 572 TEXT runtime·aeshash64(SB),NOSPLIT,$0-20 573 MOVL AX, ret+16(FP) 574 RET 575 576 // memequal(p, q unsafe.Pointer, size uintptr) bool 577 TEXT runtime·memequal(SB),NOSPLIT,$0-13 578 MOVL a+0(FP), SI 579 MOVL b+4(FP), DI 580 CMPL SI, DI 581 JEQ eq 582 MOVL size+8(FP), BX 583 CALL runtime·memeqbody(SB) 584 MOVB AX, ret+16(FP) 585 RET 586 eq: 587 MOVB $1, ret+16(FP) 588 RET 589 590 // memequal_varlen(a, b unsafe.Pointer) bool 591 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 592 MOVL a+0(FP), SI 593 MOVL b+4(FP), DI 594 CMPL SI, DI 595 JEQ eq 596 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 597 CALL runtime·memeqbody(SB) 598 MOVB AX, ret+8(FP) 599 RET 600 eq: 601 MOVB $1, ret+8(FP) 602 RET 603 604 // eqstring tests whether two strings are equal. 605 // The compiler guarantees that strings passed 606 // to eqstring have equal length. 607 // See runtime_test.go:eqstring_generic for 608 // equivalent Go code. 609 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 610 MOVL s1str+0(FP), SI 611 MOVL s2str+8(FP), DI 612 CMPL SI, DI 613 JEQ same 614 MOVL s1len+4(FP), BX 615 CALL runtime·memeqbody(SB) 616 MOVB AX, v+16(FP) 617 RET 618 same: 619 MOVB $1, v+16(FP) 620 RET 621 622 // a in SI 623 // b in DI 624 // count in BX 625 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 626 XORQ AX, AX 627 628 CMPQ BX, $8 629 JB small 630 631 // 64 bytes at a time using xmm registers 632 hugeloop: 633 CMPQ BX, $64 634 JB bigloop 635 MOVOU (SI), X0 636 MOVOU (DI), X1 637 MOVOU 16(SI), X2 638 MOVOU 16(DI), X3 639 MOVOU 32(SI), X4 640 MOVOU 32(DI), X5 641 MOVOU 48(SI), X6 642 MOVOU 48(DI), X7 643 PCMPEQB X1, X0 644 PCMPEQB X3, X2 645 PCMPEQB X5, X4 646 PCMPEQB X7, X6 647 PAND X2, X0 648 PAND X6, X4 649 PAND X4, X0 650 PMOVMSKB X0, DX 651 ADDQ $64, SI 652 ADDQ $64, DI 653 SUBQ $64, BX 654 CMPL DX, $0xffff 655 JEQ hugeloop 656 RET 657 658 // 8 bytes at a time using 64-bit register 659 bigloop: 660 CMPQ BX, $8 661 JBE leftover 662 MOVQ (SI), CX 663 MOVQ (DI), DX 664 ADDQ $8, SI 665 ADDQ $8, DI 666 SUBQ $8, BX 667 CMPQ CX, DX 668 JEQ bigloop 669 RET 670 671 // remaining 0-8 bytes 672 leftover: 673 ADDQ BX, SI 674 ADDQ BX, DI 675 MOVQ -8(SI), CX 676 MOVQ -8(DI), DX 677 CMPQ CX, DX 678 SETEQ AX 679 RET 680 681 small: 682 CMPQ BX, $0 683 JEQ equal 684 685 LEAQ 0(BX*8), CX 686 NEGQ CX 687 688 CMPB SI, $0xf8 689 JA si_high 690 691 // load at SI won't cross a page boundary. 692 MOVQ (SI), SI 693 JMP si_finish 694 si_high: 695 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 696 MOVQ BX, DX 697 ADDQ SI, DX 698 MOVQ -8(DX), SI 699 SHRQ CX, SI 700 si_finish: 701 702 // same for DI. 703 CMPB DI, $0xf8 704 JA di_high 705 MOVQ (DI), DI 706 JMP di_finish 707 di_high: 708 MOVQ BX, DX 709 ADDQ DI, DX 710 MOVQ -8(DX), DI 711 SHRQ CX, DI 712 di_finish: 713 714 SUBQ SI, DI 715 SHLQ CX, DI 716 equal: 717 SETEQ AX 718 RET 719 720 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 721 MOVL s1_base+0(FP), SI 722 MOVL s1_len+4(FP), BX 723 MOVL s2_base+8(FP), DI 724 MOVL s2_len+12(FP), DX 725 CALL runtime·cmpbody(SB) 726 MOVL AX, ret+16(FP) 727 RET 728 729 TEXT bytes·Compare(SB),NOSPLIT,$0-28 730 MOVL s1+0(FP), SI 731 MOVL s1+4(FP), BX 732 MOVL s2+12(FP), DI 733 MOVL s2+16(FP), DX 734 CALL runtime·cmpbody(SB) 735 MOVL AX, res+24(FP) 736 RET 737 738 // input: 739 // SI = a 740 // DI = b 741 // BX = alen 742 // DX = blen 743 // output: 744 // AX = 1/0/-1 745 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 746 CMPQ SI, DI 747 JEQ allsame 748 CMPQ BX, DX 749 MOVQ DX, R8 750 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 751 CMPQ R8, $8 752 JB small 753 754 loop: 755 CMPQ R8, $16 756 JBE _0through16 757 MOVOU (SI), X0 758 MOVOU (DI), X1 759 PCMPEQB X0, X1 760 PMOVMSKB X1, AX 761 XORQ $0xffff, AX // convert EQ to NE 762 JNE diff16 // branch if at least one byte is not equal 763 ADDQ $16, SI 764 ADDQ $16, DI 765 SUBQ $16, R8 766 JMP loop 767 768 // AX = bit mask of differences 769 diff16: 770 BSFQ AX, BX // index of first byte that differs 771 XORQ AX, AX 772 ADDQ BX, SI 773 MOVB (SI), CX 774 ADDQ BX, DI 775 CMPB CX, (DI) 776 SETHI AX 777 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 778 RET 779 780 // 0 through 16 bytes left, alen>=8, blen>=8 781 _0through16: 782 CMPQ R8, $8 783 JBE _0through8 784 MOVQ (SI), AX 785 MOVQ (DI), CX 786 CMPQ AX, CX 787 JNE diff8 788 _0through8: 789 ADDQ R8, SI 790 ADDQ R8, DI 791 MOVQ -8(SI), AX 792 MOVQ -8(DI), CX 793 CMPQ AX, CX 794 JEQ allsame 795 796 // AX and CX contain parts of a and b that differ. 797 diff8: 798 BSWAPQ AX // reverse order of bytes 799 BSWAPQ CX 800 XORQ AX, CX 801 BSRQ CX, CX // index of highest bit difference 802 SHRQ CX, AX // move a's bit to bottom 803 ANDQ $1, AX // mask bit 804 LEAQ -1(AX*2), AX // 1/0 => +1/-1 805 RET 806 807 // 0-7 bytes in common 808 small: 809 LEAQ (R8*8), CX // bytes left -> bits left 810 NEGQ CX // - bits lift (== 64 - bits left mod 64) 811 JEQ allsame 812 813 // load bytes of a into high bytes of AX 814 CMPB SI, $0xf8 815 JA si_high 816 MOVQ (SI), SI 817 JMP si_finish 818 si_high: 819 ADDQ R8, SI 820 MOVQ -8(SI), SI 821 SHRQ CX, SI 822 si_finish: 823 SHLQ CX, SI 824 825 // load bytes of b in to high bytes of BX 826 CMPB DI, $0xf8 827 JA di_high 828 MOVQ (DI), DI 829 JMP di_finish 830 di_high: 831 ADDQ R8, DI 832 MOVQ -8(DI), DI 833 SHRQ CX, DI 834 di_finish: 835 SHLQ CX, DI 836 837 BSWAPQ SI // reverse order of bytes 838 BSWAPQ DI 839 XORQ SI, DI // find bit differences 840 JEQ allsame 841 BSRQ DI, CX // index of highest bit difference 842 SHRQ CX, SI // move a's bit to bottom 843 ANDQ $1, SI // mask bit 844 LEAQ -1(SI*2), AX // 1/0 => +1/-1 845 RET 846 847 allsame: 848 XORQ AX, AX 849 XORQ CX, CX 850 CMPQ BX, DX 851 SETGT AX // 1 if alen > blen 852 SETEQ CX // 1 if alen == blen 853 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 854 RET 855 856 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 857 MOVL s+0(FP), SI 858 MOVL s_len+4(FP), BX 859 MOVB c+12(FP), AL 860 CALL runtime·indexbytebody(SB) 861 MOVL AX, ret+16(FP) 862 RET 863 864 TEXT strings·IndexByte(SB),NOSPLIT,$0-20 865 MOVL s+0(FP), SI 866 MOVL s_len+4(FP), BX 867 MOVB c+8(FP), AL 868 CALL runtime·indexbytebody(SB) 869 MOVL AX, ret+16(FP) 870 RET 871 872 // input: 873 // SI: data 874 // BX: data len 875 // AL: byte sought 876 // output: 877 // AX 878 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 879 MOVL SI, DI 880 881 CMPL BX, $16 882 JLT small 883 884 // round up to first 16-byte boundary 885 TESTL $15, SI 886 JZ aligned 887 MOVL SI, CX 888 ANDL $~15, CX 889 ADDL $16, CX 890 891 // search the beginning 892 SUBL SI, CX 893 REPN; SCASB 894 JZ success 895 896 // DI is 16-byte aligned; get ready to search using SSE instructions 897 aligned: 898 // round down to last 16-byte boundary 899 MOVL BX, R11 900 ADDL SI, R11 901 ANDL $~15, R11 902 903 // shuffle X0 around so that each byte contains c 904 MOVD AX, X0 905 PUNPCKLBW X0, X0 906 PUNPCKLBW X0, X0 907 PSHUFL $0, X0, X0 908 JMP condition 909 910 sse: 911 // move the next 16-byte chunk of the buffer into X1 912 MOVO (DI), X1 913 // compare bytes in X0 to X1 914 PCMPEQB X0, X1 915 // take the top bit of each byte in X1 and put the result in DX 916 PMOVMSKB X1, DX 917 TESTL DX, DX 918 JNZ ssesuccess 919 ADDL $16, DI 920 921 condition: 922 CMPL DI, R11 923 JLT sse 924 925 // search the end 926 MOVL SI, CX 927 ADDL BX, CX 928 SUBL R11, CX 929 // if CX == 0, the zero flag will be set and we'll end up 930 // returning a false success 931 JZ failure 932 REPN; SCASB 933 JZ success 934 935 failure: 936 MOVL $-1, AX 937 RET 938 939 // handle for lengths < 16 940 small: 941 MOVL BX, CX 942 REPN; SCASB 943 JZ success 944 MOVL $-1, AX 945 RET 946 947 // we've found the chunk containing the byte 948 // now just figure out which specific byte it is 949 ssesuccess: 950 // get the index of the least significant set bit 951 BSFW DX, DX 952 SUBL SI, DI 953 ADDL DI, DX 954 MOVL DX, AX 955 RET 956 957 success: 958 SUBL SI, DI 959 SUBL $1, DI 960 MOVL DI, AX 961 RET 962 963 TEXT bytes·Equal(SB),NOSPLIT,$0-25 964 MOVL a_len+4(FP), BX 965 MOVL b_len+16(FP), CX 966 XORL AX, AX 967 CMPL BX, CX 968 JNE eqret 969 MOVL a+0(FP), SI 970 MOVL b+12(FP), DI 971 CALL runtime·memeqbody(SB) 972 eqret: 973 MOVB AX, ret+24(FP) 974 RET 975 976 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 977 get_tls(CX) 978 MOVL g(CX), AX 979 MOVL g_m(AX), AX 980 MOVL m_fastrand(AX), DX 981 ADDL DX, DX 982 MOVL DX, BX 983 XORL $0x88888eef, DX 984 CMOVLMI BX, DX 985 MOVL DX, m_fastrand(AX) 986 MOVL DX, ret+0(FP) 987 RET 988 989 TEXT runtime·return0(SB), NOSPLIT, $0 990 MOVL $0, AX 991 RET 992 993 // The top-most function running on a goroutine 994 // returns to goexit+PCQuantum. 995 TEXT runtime·goexit(SB),NOSPLIT,$0-0 996 BYTE $0x90 // NOP 997 CALL runtime·goexit1(SB) // does not return 998 // traceback from goexit1 must hit code range of goexit 999 BYTE $0x90 // NOP 1000 1001 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1002 MOVL addr+0(FP), AX 1003 PREFETCHT0 (AX) 1004 RET 1005 1006 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1007 MOVL addr+0(FP), AX 1008 PREFETCHT1 (AX) 1009 RET 1010 1011 1012 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1013 MOVL addr+0(FP), AX 1014 PREFETCHT2 (AX) 1015 RET 1016 1017 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1018 MOVL addr+0(FP), AX 1019 PREFETCHNTA (AX) 1020 RET 1021 1022 TEXT ·checkASM(SB),NOSPLIT,$0-1 1023 MOVB $1, ret+0(FP) 1024 RET