github.com/aloncn/graphics-go@v0.0.1/src/runtime/asm_amd64p32.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 MOVL SP, CX 15 SUBL $128, SP // plenty of scratch 16 ANDL $~15, CX 17 MOVL CX, SP 18 19 MOVL AX, 16(SP) 20 MOVL BX, 24(SP) 21 22 // create istack out of the given (operating system) stack. 23 MOVL $runtime·g0(SB), DI 24 LEAL (-64*1024+104)(SP), BX 25 MOVL BX, g_stackguard0(DI) 26 MOVL BX, g_stackguard1(DI) 27 MOVL BX, (g_stack+stack_lo)(DI) 28 MOVL SP, (g_stack+stack_hi)(DI) 29 30 // find out information about the processor we're on 31 MOVQ $0, AX 32 CPUID 33 CMPQ AX, $0 34 JE nocpuinfo 35 MOVQ $1, AX 36 CPUID 37 MOVL CX, runtime·cpuid_ecx(SB) 38 MOVL DX, runtime·cpuid_edx(SB) 39 nocpuinfo: 40 41 needtls: 42 LEAL runtime·m0+m_tls(SB), DI 43 CALL runtime·settls(SB) 44 45 // store through it, to make sure it works 46 get_tls(BX) 47 MOVQ $0x123, g(BX) 48 MOVQ runtime·m0+m_tls(SB), AX 49 CMPQ AX, $0x123 50 JEQ 2(PC) 51 MOVL AX, 0 // abort 52 ok: 53 // set the per-goroutine and per-mach "registers" 54 get_tls(BX) 55 LEAL runtime·g0(SB), CX 56 MOVL CX, g(BX) 57 LEAL runtime·m0(SB), AX 58 59 // save m->g0 = g0 60 MOVL CX, m_g0(AX) 61 // save m0 to g0->m 62 MOVL AX, g_m(CX) 63 64 CLD // convention is D is always left cleared 65 CALL runtime·check(SB) 66 67 MOVL 16(SP), AX // copy argc 68 MOVL AX, 0(SP) 69 MOVL 24(SP), AX // copy argv 70 MOVL AX, 4(SP) 71 CALL runtime·args(SB) 72 CALL runtime·osinit(SB) 73 CALL runtime·schedinit(SB) 74 75 // create a new goroutine to start program 76 MOVL $runtime·mainPC(SB), AX // entry 77 MOVL $0, 0(SP) 78 MOVL AX, 4(SP) 79 CALL runtime·newproc(SB) 80 81 // start this M 82 CALL runtime·mstart(SB) 83 84 MOVL $0xf1, 0xf1 // crash 85 RET 86 87 DATA runtime·mainPC+0(SB)/4,$runtime·main(SB) 88 GLOBL runtime·mainPC(SB),RODATA,$4 89 90 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 91 INT $3 92 RET 93 94 TEXT runtime·asminit(SB),NOSPLIT,$0-0 95 // No per-thread init. 96 RET 97 98 /* 99 * go-routine 100 */ 101 102 // void gosave(Gobuf*) 103 // save state in Gobuf; setjmp 104 TEXT runtime·gosave(SB), NOSPLIT, $0-4 105 MOVL buf+0(FP), AX // gobuf 106 LEAL buf+0(FP), BX // caller's SP 107 MOVL BX, gobuf_sp(AX) 108 MOVL 0(SP), BX // caller's PC 109 MOVL BX, gobuf_pc(AX) 110 MOVL $0, gobuf_ctxt(AX) 111 MOVQ $0, gobuf_ret(AX) 112 get_tls(CX) 113 MOVL g(CX), BX 114 MOVL BX, gobuf_g(AX) 115 RET 116 117 // void gogo(Gobuf*) 118 // restore state from Gobuf; longjmp 119 TEXT runtime·gogo(SB), NOSPLIT, $0-4 120 MOVL buf+0(FP), BX // gobuf 121 MOVL gobuf_g(BX), DX 122 MOVL 0(DX), CX // make sure g != nil 123 get_tls(CX) 124 MOVL DX, g(CX) 125 MOVL gobuf_sp(BX), SP // restore SP 126 MOVL gobuf_ctxt(BX), DX 127 MOVQ gobuf_ret(BX), AX 128 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 129 MOVQ $0, gobuf_ret(BX) 130 MOVL $0, gobuf_ctxt(BX) 131 MOVL gobuf_pc(BX), BX 132 JMP BX 133 134 // func mcall(fn func(*g)) 135 // Switch to m->g0's stack, call fn(g). 136 // Fn must never return. It should gogo(&g->sched) 137 // to keep running g. 138 TEXT runtime·mcall(SB), NOSPLIT, $0-4 139 MOVL fn+0(FP), DI 140 141 get_tls(CX) 142 MOVL g(CX), AX // save state in g->sched 143 MOVL 0(SP), BX // caller's PC 144 MOVL BX, (g_sched+gobuf_pc)(AX) 145 LEAL fn+0(FP), BX // caller's SP 146 MOVL BX, (g_sched+gobuf_sp)(AX) 147 MOVL AX, (g_sched+gobuf_g)(AX) 148 149 // switch to m->g0 & its stack, call fn 150 MOVL g(CX), BX 151 MOVL g_m(BX), BX 152 MOVL m_g0(BX), SI 153 CMPL SI, AX // if g == m->g0 call badmcall 154 JNE 3(PC) 155 MOVL $runtime·badmcall(SB), AX 156 JMP AX 157 MOVL SI, g(CX) // g = m->g0 158 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 159 PUSHQ AX 160 MOVL DI, DX 161 MOVL 0(DI), DI 162 CALL DI 163 POPQ AX 164 MOVL $runtime·badmcall2(SB), AX 165 JMP AX 166 RET 167 168 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 169 // of the G stack. We need to distinguish the routine that 170 // lives at the bottom of the G stack from the one that lives 171 // at the top of the system stack because the one at the top of 172 // the system stack terminates the stack walk (see topofstack()). 173 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 174 RET 175 176 // func systemstack(fn func()) 177 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 178 MOVL fn+0(FP), DI // DI = fn 179 get_tls(CX) 180 MOVL g(CX), AX // AX = g 181 MOVL g_m(AX), BX // BX = m 182 183 MOVL m_gsignal(BX), DX // DX = gsignal 184 CMPL AX, DX 185 JEQ noswitch 186 187 MOVL m_g0(BX), DX // DX = g0 188 CMPL AX, DX 189 JEQ noswitch 190 191 MOVL m_curg(BX), R8 192 CMPL AX, R8 193 JEQ switch 194 195 // Not g0, not curg. Must be gsignal, but that's not allowed. 196 // Hide call from linker nosplit analysis. 197 MOVL $runtime·badsystemstack(SB), AX 198 CALL AX 199 200 switch: 201 // save our state in g->sched. Pretend to 202 // be systemstack_switch if the G stack is scanned. 203 MOVL $runtime·systemstack_switch(SB), SI 204 MOVL SI, (g_sched+gobuf_pc)(AX) 205 MOVL SP, (g_sched+gobuf_sp)(AX) 206 MOVL AX, (g_sched+gobuf_g)(AX) 207 208 // switch to g0 209 MOVL DX, g(CX) 210 MOVL (g_sched+gobuf_sp)(DX), SP 211 212 // call target function 213 MOVL DI, DX 214 MOVL 0(DI), DI 215 CALL DI 216 217 // switch back to g 218 get_tls(CX) 219 MOVL g(CX), AX 220 MOVL g_m(AX), BX 221 MOVL m_curg(BX), AX 222 MOVL AX, g(CX) 223 MOVL (g_sched+gobuf_sp)(AX), SP 224 MOVL $0, (g_sched+gobuf_sp)(AX) 225 RET 226 227 noswitch: 228 // already on m stack, just call directly 229 MOVL DI, DX 230 MOVL 0(DI), DI 231 CALL DI 232 RET 233 234 /* 235 * support for morestack 236 */ 237 238 // Called during function prolog when more stack is needed. 239 // 240 // The traceback routines see morestack on a g0 as being 241 // the top of a stack (for example, morestack calling newstack 242 // calling the scheduler calling newm calling gc), so we must 243 // record an argument size. For that purpose, it has no arguments. 244 TEXT runtime·morestack(SB),NOSPLIT,$0-0 245 get_tls(CX) 246 MOVL g(CX), BX 247 MOVL g_m(BX), BX 248 249 // Cannot grow scheduler stack (m->g0). 250 MOVL m_g0(BX), SI 251 CMPL g(CX), SI 252 JNE 2(PC) 253 MOVL 0, AX 254 255 // Cannot grow signal stack (m->gsignal). 256 MOVL m_gsignal(BX), SI 257 CMPL g(CX), SI 258 JNE 2(PC) 259 MOVL 0, AX 260 261 // Called from f. 262 // Set m->morebuf to f's caller. 263 MOVL 8(SP), AX // f's caller's PC 264 MOVL AX, (m_morebuf+gobuf_pc)(BX) 265 LEAL 16(SP), AX // f's caller's SP 266 MOVL AX, (m_morebuf+gobuf_sp)(BX) 267 get_tls(CX) 268 MOVL g(CX), SI 269 MOVL SI, (m_morebuf+gobuf_g)(BX) 270 271 // Set g->sched to context in f. 272 MOVL 0(SP), AX // f's PC 273 MOVL AX, (g_sched+gobuf_pc)(SI) 274 MOVL SI, (g_sched+gobuf_g)(SI) 275 LEAL 8(SP), AX // f's SP 276 MOVL AX, (g_sched+gobuf_sp)(SI) 277 MOVL DX, (g_sched+gobuf_ctxt)(SI) 278 279 // Call newstack on m->g0's stack. 280 MOVL m_g0(BX), BX 281 MOVL BX, g(CX) 282 MOVL (g_sched+gobuf_sp)(BX), SP 283 CALL runtime·newstack(SB) 284 MOVL $0, 0x1003 // crash if newstack returns 285 RET 286 287 // morestack trampolines 288 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 289 MOVL $0, DX 290 JMP runtime·morestack(SB) 291 292 TEXT runtime·stackBarrier(SB),NOSPLIT,$0 293 // We came here via a RET to an overwritten return PC. 294 // AX may be live. Other registers are available. 295 296 // Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal. 297 get_tls(CX) 298 MOVL g(CX), CX 299 MOVL (g_stkbar+slice_array)(CX), DX 300 MOVL g_stkbarPos(CX), BX 301 IMULL $stkbar__size, BX // Too big for SIB. 302 ADDL DX, BX 303 MOVL stkbar_savedLRVal(BX), BX 304 // Record that this stack barrier was hit. 305 ADDL $1, g_stkbarPos(CX) 306 // Jump to the original return PC. 307 JMP BX 308 309 // reflectcall: call a function with the given argument list 310 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 311 // we don't have variable-sized frames, so we use a small number 312 // of constant-sized-frame functions to encode a few bits of size in the pc. 313 // Caution: ugly multiline assembly macros in your future! 314 315 #define DISPATCH(NAME,MAXSIZE) \ 316 CMPL CX, $MAXSIZE; \ 317 JA 3(PC); \ 318 MOVL $NAME(SB), AX; \ 319 JMP AX 320 // Note: can't just "JMP NAME(SB)" - bad inlining results. 321 322 TEXT reflect·call(SB), NOSPLIT, $0-0 323 JMP ·reflectcall(SB) 324 325 TEXT ·reflectcall(SB), NOSPLIT, $0-20 326 MOVLQZX argsize+12(FP), CX 327 DISPATCH(runtime·call16, 16) 328 DISPATCH(runtime·call32, 32) 329 DISPATCH(runtime·call64, 64) 330 DISPATCH(runtime·call128, 128) 331 DISPATCH(runtime·call256, 256) 332 DISPATCH(runtime·call512, 512) 333 DISPATCH(runtime·call1024, 1024) 334 DISPATCH(runtime·call2048, 2048) 335 DISPATCH(runtime·call4096, 4096) 336 DISPATCH(runtime·call8192, 8192) 337 DISPATCH(runtime·call16384, 16384) 338 DISPATCH(runtime·call32768, 32768) 339 DISPATCH(runtime·call65536, 65536) 340 DISPATCH(runtime·call131072, 131072) 341 DISPATCH(runtime·call262144, 262144) 342 DISPATCH(runtime·call524288, 524288) 343 DISPATCH(runtime·call1048576, 1048576) 344 DISPATCH(runtime·call2097152, 2097152) 345 DISPATCH(runtime·call4194304, 4194304) 346 DISPATCH(runtime·call8388608, 8388608) 347 DISPATCH(runtime·call16777216, 16777216) 348 DISPATCH(runtime·call33554432, 33554432) 349 DISPATCH(runtime·call67108864, 67108864) 350 DISPATCH(runtime·call134217728, 134217728) 351 DISPATCH(runtime·call268435456, 268435456) 352 DISPATCH(runtime·call536870912, 536870912) 353 DISPATCH(runtime·call1073741824, 1073741824) 354 MOVL $runtime·badreflectcall(SB), AX 355 JMP AX 356 357 #define CALLFN(NAME,MAXSIZE) \ 358 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 359 NO_LOCAL_POINTERS; \ 360 /* copy arguments to stack */ \ 361 MOVL argptr+8(FP), SI; \ 362 MOVL argsize+12(FP), CX; \ 363 MOVL SP, DI; \ 364 REP;MOVSB; \ 365 /* call function */ \ 366 MOVL f+4(FP), DX; \ 367 MOVL (DX), AX; \ 368 CALL AX; \ 369 /* copy return values back */ \ 370 MOVL argptr+8(FP), DI; \ 371 MOVL argsize+12(FP), CX; \ 372 MOVL retoffset+16(FP), BX; \ 373 MOVL SP, SI; \ 374 ADDL BX, DI; \ 375 ADDL BX, SI; \ 376 SUBL BX, CX; \ 377 REP;MOVSB; \ 378 /* execute write barrier updates */ \ 379 MOVL argtype+0(FP), DX; \ 380 MOVL argptr+8(FP), DI; \ 381 MOVL argsize+12(FP), CX; \ 382 MOVL retoffset+16(FP), BX; \ 383 MOVL DX, 0(SP); \ 384 MOVL DI, 4(SP); \ 385 MOVL CX, 8(SP); \ 386 MOVL BX, 12(SP); \ 387 CALL runtime·callwritebarrier(SB); \ 388 RET 389 390 CALLFN(·call16, 16) 391 CALLFN(·call32, 32) 392 CALLFN(·call64, 64) 393 CALLFN(·call128, 128) 394 CALLFN(·call256, 256) 395 CALLFN(·call512, 512) 396 CALLFN(·call1024, 1024) 397 CALLFN(·call2048, 2048) 398 CALLFN(·call4096, 4096) 399 CALLFN(·call8192, 8192) 400 CALLFN(·call16384, 16384) 401 CALLFN(·call32768, 32768) 402 CALLFN(·call65536, 65536) 403 CALLFN(·call131072, 131072) 404 CALLFN(·call262144, 262144) 405 CALLFN(·call524288, 524288) 406 CALLFN(·call1048576, 1048576) 407 CALLFN(·call2097152, 2097152) 408 CALLFN(·call4194304, 4194304) 409 CALLFN(·call8388608, 8388608) 410 CALLFN(·call16777216, 16777216) 411 CALLFN(·call33554432, 33554432) 412 CALLFN(·call67108864, 67108864) 413 CALLFN(·call134217728, 134217728) 414 CALLFN(·call268435456, 268435456) 415 CALLFN(·call536870912, 536870912) 416 CALLFN(·call1073741824, 1073741824) 417 418 TEXT runtime·procyield(SB),NOSPLIT,$0-0 419 MOVL cycles+0(FP), AX 420 again: 421 PAUSE 422 SUBL $1, AX 423 JNZ again 424 RET 425 426 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 427 // Stores are already ordered on x86, so this is just a 428 // compile barrier. 429 RET 430 431 // void jmpdefer(fn, sp); 432 // called from deferreturn. 433 // 1. pop the caller 434 // 2. sub 5 bytes from the callers return 435 // 3. jmp to the argument 436 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 437 MOVL fv+0(FP), DX 438 MOVL argp+4(FP), BX 439 LEAL -8(BX), SP // caller sp after CALL 440 SUBL $5, (SP) // return to CALL again 441 MOVL 0(DX), BX 442 JMP BX // but first run the deferred function 443 444 // func asmcgocall(fn, arg unsafe.Pointer) int32 445 // Not implemented. 446 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12 447 MOVL 0, AX 448 RET 449 450 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 451 // Not implemented. 452 TEXT runtime·cgocallback(SB),NOSPLIT,$0-12 453 MOVL 0, AX 454 RET 455 456 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) 457 // Not implemented. 458 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-12 459 MOVL 0, AX 460 RET 461 462 // void setg(G*); set g. for use by needm. 463 // Not implemented. 464 TEXT runtime·setg(SB), NOSPLIT, $0-4 465 MOVL 0, AX 466 RET 467 468 // check that SP is in range [g->stack.lo, g->stack.hi) 469 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 470 get_tls(CX) 471 MOVL g(CX), AX 472 CMPL (g_stack+stack_hi)(AX), SP 473 JHI 2(PC) 474 MOVL 0, AX 475 CMPL SP, (g_stack+stack_lo)(AX) 476 JHI 2(PC) 477 MOVL 0, AX 478 RET 479 480 TEXT runtime·memclr(SB),NOSPLIT,$0-8 481 MOVL ptr+0(FP), DI 482 MOVL n+4(FP), CX 483 MOVQ CX, BX 484 ANDQ $3, BX 485 SHRQ $2, CX 486 MOVQ $0, AX 487 CLD 488 REP 489 STOSL 490 MOVQ BX, CX 491 REP 492 STOSB 493 // Note: we zero only 4 bytes at a time so that the tail is at most 494 // 3 bytes. That guarantees that we aren't zeroing pointers with STOSB. 495 // See issue 13160. 496 RET 497 498 TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12 499 MOVL argp+0(FP),AX // addr of first arg 500 MOVL -8(AX),AX // get calling pc 501 CMPL AX, runtime·stackBarrierPC(SB) 502 JNE nobar 503 // Get original return PC. 504 CALL runtime·nextBarrierPC(SB) 505 MOVL 0(SP), AX 506 nobar: 507 MOVL AX, ret+8(FP) 508 RET 509 510 TEXT runtime·setcallerpc(SB),NOSPLIT,$8-8 511 MOVL argp+0(FP),AX // addr of first arg 512 MOVL pc+4(FP), BX // pc to set 513 MOVL -8(AX), CX 514 CMPL CX, runtime·stackBarrierPC(SB) 515 JEQ setbar 516 MOVQ BX, -8(AX) // set calling pc 517 RET 518 setbar: 519 // Set the stack barrier return PC. 520 MOVL BX, 0(SP) 521 CALL runtime·setNextBarrierPC(SB) 522 RET 523 524 TEXT runtime·getcallersp(SB),NOSPLIT,$0-12 525 MOVL argp+0(FP), AX 526 MOVL AX, ret+8(FP) 527 RET 528 529 // int64 runtime·cputicks(void) 530 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 531 RDTSC 532 SHLQ $32, DX 533 ADDQ DX, AX 534 MOVQ AX, ret+0(FP) 535 RET 536 537 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 538 // redirects to memhash(p, h, size) using the size 539 // stored in the closure. 540 TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12 541 GO_ARGS 542 NO_LOCAL_POINTERS 543 MOVL p+0(FP), AX 544 MOVL h+4(FP), BX 545 MOVL 4(DX), CX 546 MOVL AX, 0(SP) 547 MOVL BX, 4(SP) 548 MOVL CX, 8(SP) 549 CALL runtime·memhash(SB) 550 MOVL 16(SP), AX 551 MOVL AX, ret+8(FP) 552 RET 553 554 // hash function using AES hardware instructions 555 // For now, our one amd64p32 system (NaCl) does not 556 // support using AES instructions, so have not bothered to 557 // write the implementations. Can copy and adjust the ones 558 // in asm_amd64.s when the time comes. 559 560 TEXT runtime·aeshash(SB),NOSPLIT,$0-20 561 MOVL AX, ret+16(FP) 562 RET 563 564 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20 565 MOVL AX, ret+16(FP) 566 RET 567 568 TEXT runtime·aeshash32(SB),NOSPLIT,$0-20 569 MOVL AX, ret+16(FP) 570 RET 571 572 TEXT runtime·aeshash64(SB),NOSPLIT,$0-20 573 MOVL AX, ret+16(FP) 574 RET 575 576 TEXT runtime·memeq(SB),NOSPLIT,$0-17 577 MOVL a+0(FP), SI 578 MOVL b+4(FP), DI 579 MOVL size+8(FP), BX 580 CALL runtime·memeqbody(SB) 581 MOVB AX, ret+16(FP) 582 RET 583 584 // memequal_varlen(a, b unsafe.Pointer) bool 585 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 586 MOVL a+0(FP), SI 587 MOVL b+4(FP), DI 588 CMPL SI, DI 589 JEQ eq 590 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 591 CALL runtime·memeqbody(SB) 592 MOVB AX, ret+8(FP) 593 RET 594 eq: 595 MOVB $1, ret+8(FP) 596 RET 597 598 // eqstring tests whether two strings are equal. 599 // The compiler guarantees that strings passed 600 // to eqstring have equal length. 601 // See runtime_test.go:eqstring_generic for 602 // equivalent Go code. 603 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 604 MOVL s1str+0(FP), SI 605 MOVL s2str+8(FP), DI 606 CMPL SI, DI 607 JEQ same 608 MOVL s1len+4(FP), BX 609 CALL runtime·memeqbody(SB) 610 MOVB AX, v+16(FP) 611 RET 612 same: 613 MOVB $1, v+16(FP) 614 RET 615 616 // a in SI 617 // b in DI 618 // count in BX 619 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 620 XORQ AX, AX 621 622 CMPQ BX, $8 623 JB small 624 625 // 64 bytes at a time using xmm registers 626 hugeloop: 627 CMPQ BX, $64 628 JB bigloop 629 MOVOU (SI), X0 630 MOVOU (DI), X1 631 MOVOU 16(SI), X2 632 MOVOU 16(DI), X3 633 MOVOU 32(SI), X4 634 MOVOU 32(DI), X5 635 MOVOU 48(SI), X6 636 MOVOU 48(DI), X7 637 PCMPEQB X1, X0 638 PCMPEQB X3, X2 639 PCMPEQB X5, X4 640 PCMPEQB X7, X6 641 PAND X2, X0 642 PAND X6, X4 643 PAND X4, X0 644 PMOVMSKB X0, DX 645 ADDQ $64, SI 646 ADDQ $64, DI 647 SUBQ $64, BX 648 CMPL DX, $0xffff 649 JEQ hugeloop 650 RET 651 652 // 8 bytes at a time using 64-bit register 653 bigloop: 654 CMPQ BX, $8 655 JBE leftover 656 MOVQ (SI), CX 657 MOVQ (DI), DX 658 ADDQ $8, SI 659 ADDQ $8, DI 660 SUBQ $8, BX 661 CMPQ CX, DX 662 JEQ bigloop 663 RET 664 665 // remaining 0-8 bytes 666 leftover: 667 ADDQ BX, SI 668 ADDQ BX, DI 669 MOVQ -8(SI), CX 670 MOVQ -8(DI), DX 671 CMPQ CX, DX 672 SETEQ AX 673 RET 674 675 small: 676 CMPQ BX, $0 677 JEQ equal 678 679 LEAQ 0(BX*8), CX 680 NEGQ CX 681 682 CMPB SI, $0xf8 683 JA si_high 684 685 // load at SI won't cross a page boundary. 686 MOVQ (SI), SI 687 JMP si_finish 688 si_high: 689 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 690 MOVQ BX, DX 691 ADDQ SI, DX 692 MOVQ -8(DX), SI 693 SHRQ CX, SI 694 si_finish: 695 696 // same for DI. 697 CMPB DI, $0xf8 698 JA di_high 699 MOVQ (DI), DI 700 JMP di_finish 701 di_high: 702 MOVQ BX, DX 703 ADDQ DI, DX 704 MOVQ -8(DX), DI 705 SHRQ CX, DI 706 di_finish: 707 708 SUBQ SI, DI 709 SHLQ CX, DI 710 equal: 711 SETEQ AX 712 RET 713 714 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 715 MOVL s1_base+0(FP), SI 716 MOVL s1_len+4(FP), BX 717 MOVL s2_base+8(FP), DI 718 MOVL s2_len+12(FP), DX 719 CALL runtime·cmpbody(SB) 720 MOVL AX, ret+16(FP) 721 RET 722 723 TEXT bytes·Compare(SB),NOSPLIT,$0-28 724 MOVL s1+0(FP), SI 725 MOVL s1+4(FP), BX 726 MOVL s2+12(FP), DI 727 MOVL s2+16(FP), DX 728 CALL runtime·cmpbody(SB) 729 MOVL AX, res+24(FP) 730 RET 731 732 // input: 733 // SI = a 734 // DI = b 735 // BX = alen 736 // DX = blen 737 // output: 738 // AX = 1/0/-1 739 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 740 CMPQ SI, DI 741 JEQ allsame 742 CMPQ BX, DX 743 MOVQ DX, R8 744 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 745 CMPQ R8, $8 746 JB small 747 748 loop: 749 CMPQ R8, $16 750 JBE _0through16 751 MOVOU (SI), X0 752 MOVOU (DI), X1 753 PCMPEQB X0, X1 754 PMOVMSKB X1, AX 755 XORQ $0xffff, AX // convert EQ to NE 756 JNE diff16 // branch if at least one byte is not equal 757 ADDQ $16, SI 758 ADDQ $16, DI 759 SUBQ $16, R8 760 JMP loop 761 762 // AX = bit mask of differences 763 diff16: 764 BSFQ AX, BX // index of first byte that differs 765 XORQ AX, AX 766 ADDQ BX, SI 767 MOVB (SI), CX 768 ADDQ BX, DI 769 CMPB CX, (DI) 770 SETHI AX 771 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 772 RET 773 774 // 0 through 16 bytes left, alen>=8, blen>=8 775 _0through16: 776 CMPQ R8, $8 777 JBE _0through8 778 MOVQ (SI), AX 779 MOVQ (DI), CX 780 CMPQ AX, CX 781 JNE diff8 782 _0through8: 783 ADDQ R8, SI 784 ADDQ R8, DI 785 MOVQ -8(SI), AX 786 MOVQ -8(DI), CX 787 CMPQ AX, CX 788 JEQ allsame 789 790 // AX and CX contain parts of a and b that differ. 791 diff8: 792 BSWAPQ AX // reverse order of bytes 793 BSWAPQ CX 794 XORQ AX, CX 795 BSRQ CX, CX // index of highest bit difference 796 SHRQ CX, AX // move a's bit to bottom 797 ANDQ $1, AX // mask bit 798 LEAQ -1(AX*2), AX // 1/0 => +1/-1 799 RET 800 801 // 0-7 bytes in common 802 small: 803 LEAQ (R8*8), CX // bytes left -> bits left 804 NEGQ CX // - bits lift (== 64 - bits left mod 64) 805 JEQ allsame 806 807 // load bytes of a into high bytes of AX 808 CMPB SI, $0xf8 809 JA si_high 810 MOVQ (SI), SI 811 JMP si_finish 812 si_high: 813 ADDQ R8, SI 814 MOVQ -8(SI), SI 815 SHRQ CX, SI 816 si_finish: 817 SHLQ CX, SI 818 819 // load bytes of b in to high bytes of BX 820 CMPB DI, $0xf8 821 JA di_high 822 MOVQ (DI), DI 823 JMP di_finish 824 di_high: 825 ADDQ R8, DI 826 MOVQ -8(DI), DI 827 SHRQ CX, DI 828 di_finish: 829 SHLQ CX, DI 830 831 BSWAPQ SI // reverse order of bytes 832 BSWAPQ DI 833 XORQ SI, DI // find bit differences 834 JEQ allsame 835 BSRQ DI, CX // index of highest bit difference 836 SHRQ CX, SI // move a's bit to bottom 837 ANDQ $1, SI // mask bit 838 LEAQ -1(SI*2), AX // 1/0 => +1/-1 839 RET 840 841 allsame: 842 XORQ AX, AX 843 XORQ CX, CX 844 CMPQ BX, DX 845 SETGT AX // 1 if alen > blen 846 SETEQ CX // 1 if alen == blen 847 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 848 RET 849 850 TEXT bytes·IndexByte(SB),NOSPLIT,$0-20 851 MOVL s+0(FP), SI 852 MOVL s_len+4(FP), BX 853 MOVB c+12(FP), AL 854 CALL runtime·indexbytebody(SB) 855 MOVL AX, ret+16(FP) 856 RET 857 858 TEXT strings·IndexByte(SB),NOSPLIT,$0-20 859 MOVL s+0(FP), SI 860 MOVL s_len+4(FP), BX 861 MOVB c+8(FP), AL 862 CALL runtime·indexbytebody(SB) 863 MOVL AX, ret+16(FP) 864 RET 865 866 // input: 867 // SI: data 868 // BX: data len 869 // AL: byte sought 870 // output: 871 // AX 872 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 873 MOVL SI, DI 874 875 CMPL BX, $16 876 JLT small 877 878 // round up to first 16-byte boundary 879 TESTL $15, SI 880 JZ aligned 881 MOVL SI, CX 882 ANDL $~15, CX 883 ADDL $16, CX 884 885 // search the beginning 886 SUBL SI, CX 887 REPN; SCASB 888 JZ success 889 890 // DI is 16-byte aligned; get ready to search using SSE instructions 891 aligned: 892 // round down to last 16-byte boundary 893 MOVL BX, R11 894 ADDL SI, R11 895 ANDL $~15, R11 896 897 // shuffle X0 around so that each byte contains c 898 MOVD AX, X0 899 PUNPCKLBW X0, X0 900 PUNPCKLBW X0, X0 901 PSHUFL $0, X0, X0 902 JMP condition 903 904 sse: 905 // move the next 16-byte chunk of the buffer into X1 906 MOVO (DI), X1 907 // compare bytes in X0 to X1 908 PCMPEQB X0, X1 909 // take the top bit of each byte in X1 and put the result in DX 910 PMOVMSKB X1, DX 911 TESTL DX, DX 912 JNZ ssesuccess 913 ADDL $16, DI 914 915 condition: 916 CMPL DI, R11 917 JLT sse 918 919 // search the end 920 MOVL SI, CX 921 ADDL BX, CX 922 SUBL R11, CX 923 // if CX == 0, the zero flag will be set and we'll end up 924 // returning a false success 925 JZ failure 926 REPN; SCASB 927 JZ success 928 929 failure: 930 MOVL $-1, AX 931 RET 932 933 // handle for lengths < 16 934 small: 935 MOVL BX, CX 936 REPN; SCASB 937 JZ success 938 MOVL $-1, AX 939 RET 940 941 // we've found the chunk containing the byte 942 // now just figure out which specific byte it is 943 ssesuccess: 944 // get the index of the least significant set bit 945 BSFW DX, DX 946 SUBL SI, DI 947 ADDL DI, DX 948 MOVL DX, AX 949 RET 950 951 success: 952 SUBL SI, DI 953 SUBL $1, DI 954 MOVL DI, AX 955 RET 956 957 TEXT bytes·Equal(SB),NOSPLIT,$0-25 958 MOVL a_len+4(FP), BX 959 MOVL b_len+16(FP), CX 960 XORL AX, AX 961 CMPL BX, CX 962 JNE eqret 963 MOVL a+0(FP), SI 964 MOVL b+12(FP), DI 965 CALL runtime·memeqbody(SB) 966 eqret: 967 MOVB AX, ret+24(FP) 968 RET 969 970 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 971 get_tls(CX) 972 MOVL g(CX), AX 973 MOVL g_m(AX), AX 974 MOVL m_fastrand(AX), DX 975 ADDL DX, DX 976 MOVL DX, BX 977 XORL $0x88888eef, DX 978 CMOVLMI BX, DX 979 MOVL DX, m_fastrand(AX) 980 MOVL DX, ret+0(FP) 981 RET 982 983 TEXT runtime·return0(SB), NOSPLIT, $0 984 MOVL $0, AX 985 RET 986 987 // The top-most function running on a goroutine 988 // returns to goexit+PCQuantum. 989 TEXT runtime·goexit(SB),NOSPLIT,$0-0 990 BYTE $0x90 // NOP 991 CALL runtime·goexit1(SB) // does not return 992 // traceback from goexit1 must hit code range of goexit 993 BYTE $0x90 // NOP 994 995 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 996 MOVL addr+0(FP), AX 997 PREFETCHT0 (AX) 998 RET 999 1000 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1001 MOVL addr+0(FP), AX 1002 PREFETCHT1 (AX) 1003 RET 1004 1005 1006 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1007 MOVL addr+0(FP), AX 1008 PREFETCHT2 (AX) 1009 RET 1010 1011 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1012 MOVL addr+0(FP), AX 1013 PREFETCHNTA (AX) 1014 RET 1015 1016 TEXT ·checkASM(SB),NOSPLIT,$0-1 1017 MOVB $1, ret+0(FP) 1018 RET