github.com/reiver/go@v0.0.0-20150109200633-1d0c7792f172/src/runtime/asm_amd64p32.s (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "go_tls.h" 7 #include "funcdata.h" 8 #include "textflag.h" 9 10 TEXT runtime·rt0_go(SB),NOSPLIT,$0 11 // copy arguments forward on an even stack 12 MOVL argc+0(FP), AX 13 MOVL argv+4(FP), BX 14 MOVL SP, CX 15 SUBL $128, SP // plenty of scratch 16 ANDL $~15, CX 17 MOVL CX, SP 18 19 MOVL AX, 16(SP) 20 MOVL BX, 24(SP) 21 22 // create istack out of the given (operating system) stack. 23 MOVL $runtime·g0(SB), DI 24 LEAL (-64*1024+104)(SP), BX 25 MOVL BX, g_stackguard0(DI) 26 MOVL BX, g_stackguard1(DI) 27 MOVL BX, (g_stack+stack_lo)(DI) 28 MOVL SP, (g_stack+stack_hi)(DI) 29 30 // find out information about the processor we're on 31 MOVQ $0, AX 32 CPUID 33 CMPQ AX, $0 34 JE nocpuinfo 35 MOVQ $1, AX 36 CPUID 37 MOVL CX, runtime·cpuid_ecx(SB) 38 MOVL DX, runtime·cpuid_edx(SB) 39 nocpuinfo: 40 41 needtls: 42 LEAL runtime·tls0(SB), DI 43 CALL runtime·settls(SB) 44 45 // store through it, to make sure it works 46 get_tls(BX) 47 MOVQ $0x123, g(BX) 48 MOVQ runtime·tls0(SB), AX 49 CMPQ AX, $0x123 50 JEQ 2(PC) 51 MOVL AX, 0 // abort 52 ok: 53 // set the per-goroutine and per-mach "registers" 54 get_tls(BX) 55 LEAL runtime·g0(SB), CX 56 MOVL CX, g(BX) 57 LEAL runtime·m0(SB), AX 58 59 // save m->g0 = g0 60 MOVL CX, m_g0(AX) 61 // save m0 to g0->m 62 MOVL AX, g_m(CX) 63 64 CLD // convention is D is always left cleared 65 CALL runtime·check(SB) 66 67 MOVL 16(SP), AX // copy argc 68 MOVL AX, 0(SP) 69 MOVL 24(SP), AX // copy argv 70 MOVL AX, 4(SP) 71 CALL runtime·args(SB) 72 CALL runtime·osinit(SB) 73 CALL runtime·schedinit(SB) 74 75 // create a new goroutine to start program 76 MOVL $runtime·main·f(SB), AX // entry 77 MOVL $0, 0(SP) 78 MOVL AX, 4(SP) 79 CALL runtime·newproc(SB) 80 81 // start this M 82 CALL runtime·mstart(SB) 83 84 MOVL $0xf1, 0xf1 // crash 85 RET 86 87 DATA runtime·main·f+0(SB)/4,$runtime·main(SB) 88 GLOBL runtime·main·f(SB),RODATA,$4 89 90 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 91 INT $3 92 RET 93 94 TEXT runtime·asminit(SB),NOSPLIT,$0-0 95 // No per-thread init. 96 RET 97 98 /* 99 * go-routine 100 */ 101 102 // void gosave(Gobuf*) 103 // save state in Gobuf; setjmp 104 TEXT runtime·gosave(SB), NOSPLIT, $0-4 105 MOVL buf+0(FP), AX // gobuf 106 LEAL buf+0(FP), BX // caller's SP 107 MOVL BX, gobuf_sp(AX) 108 MOVL 0(SP), BX // caller's PC 109 MOVL BX, gobuf_pc(AX) 110 MOVL $0, gobuf_ctxt(AX) 111 MOVQ $0, gobuf_ret(AX) 112 get_tls(CX) 113 MOVL g(CX), BX 114 MOVL BX, gobuf_g(AX) 115 RET 116 117 // void gogo(Gobuf*) 118 // restore state from Gobuf; longjmp 119 TEXT runtime·gogo(SB), NOSPLIT, $0-4 120 MOVL buf+0(FP), BX // gobuf 121 MOVL gobuf_g(BX), DX 122 MOVL 0(DX), CX // make sure g != nil 123 get_tls(CX) 124 MOVL DX, g(CX) 125 MOVL gobuf_sp(BX), SP // restore SP 126 MOVL gobuf_ctxt(BX), DX 127 MOVQ gobuf_ret(BX), AX 128 MOVL $0, gobuf_sp(BX) // clear to help garbage collector 129 MOVQ $0, gobuf_ret(BX) 130 MOVL $0, gobuf_ctxt(BX) 131 MOVL gobuf_pc(BX), BX 132 JMP BX 133 134 // func mcall(fn func(*g)) 135 // Switch to m->g0's stack, call fn(g). 136 // Fn must never return. It should gogo(&g->sched) 137 // to keep running g. 138 TEXT runtime·mcall(SB), NOSPLIT, $0-4 139 MOVL fn+0(FP), DI 140 141 get_tls(CX) 142 MOVL g(CX), AX // save state in g->sched 143 MOVL 0(SP), BX // caller's PC 144 MOVL BX, (g_sched+gobuf_pc)(AX) 145 LEAL fn+0(FP), BX // caller's SP 146 MOVL BX, (g_sched+gobuf_sp)(AX) 147 MOVL AX, (g_sched+gobuf_g)(AX) 148 149 // switch to m->g0 & its stack, call fn 150 MOVL g(CX), BX 151 MOVL g_m(BX), BX 152 MOVL m_g0(BX), SI 153 CMPL SI, AX // if g == m->g0 call badmcall 154 JNE 3(PC) 155 MOVL $runtime·badmcall(SB), AX 156 JMP AX 157 MOVL SI, g(CX) // g = m->g0 158 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 159 PUSHQ AX 160 MOVL DI, DX 161 MOVL 0(DI), DI 162 CALL DI 163 POPQ AX 164 MOVL $runtime·badmcall2(SB), AX 165 JMP AX 166 RET 167 168 // systemstack_switch is a dummy routine that systemstack leaves at the bottom 169 // of the G stack. We need to distinguish the routine that 170 // lives at the bottom of the G stack from the one that lives 171 // at the top of the system stack because the one at the top of 172 // the system stack terminates the stack walk (see topofstack()). 173 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 174 RET 175 176 // func systemstack(fn func()) 177 TEXT runtime·systemstack(SB), NOSPLIT, $0-4 178 MOVL fn+0(FP), DI // DI = fn 179 get_tls(CX) 180 MOVL g(CX), AX // AX = g 181 MOVL g_m(AX), BX // BX = m 182 183 MOVL m_gsignal(BX), DX // DX = gsignal 184 CMPL AX, DX 185 JEQ noswitch 186 187 MOVL m_g0(BX), DX // DX = g0 188 CMPL AX, DX 189 JEQ noswitch 190 191 MOVL m_curg(BX), R8 192 CMPL AX, R8 193 JEQ switch 194 195 // Not g0, not curg. Must be gsignal, but that's not allowed. 196 // Hide call from linker nosplit analysis. 197 MOVL $runtime·badsystemstack(SB), AX 198 CALL AX 199 200 switch: 201 // save our state in g->sched. Pretend to 202 // be systemstack_switch if the G stack is scanned. 203 MOVL $runtime·systemstack_switch(SB), SI 204 MOVL SI, (g_sched+gobuf_pc)(AX) 205 MOVL SP, (g_sched+gobuf_sp)(AX) 206 MOVL AX, (g_sched+gobuf_g)(AX) 207 208 // switch to g0 209 MOVL DX, g(CX) 210 MOVL (g_sched+gobuf_sp)(DX), SP 211 212 // call target function 213 MOVL DI, DX 214 MOVL 0(DI), DI 215 CALL DI 216 217 // switch back to g 218 get_tls(CX) 219 MOVL g(CX), AX 220 MOVL g_m(AX), BX 221 MOVL m_curg(BX), AX 222 MOVL AX, g(CX) 223 MOVL (g_sched+gobuf_sp)(AX), SP 224 MOVL $0, (g_sched+gobuf_sp)(AX) 225 RET 226 227 noswitch: 228 // already on m stack, just call directly 229 MOVL DI, DX 230 MOVL 0(DI), DI 231 CALL DI 232 RET 233 234 /* 235 * support for morestack 236 */ 237 238 // Called during function prolog when more stack is needed. 239 // 240 // The traceback routines see morestack on a g0 as being 241 // the top of a stack (for example, morestack calling newstack 242 // calling the scheduler calling newm calling gc), so we must 243 // record an argument size. For that purpose, it has no arguments. 244 TEXT runtime·morestack(SB),NOSPLIT,$0-0 245 get_tls(CX) 246 MOVL g(CX), BX 247 MOVL g_m(BX), BX 248 249 // Cannot grow scheduler stack (m->g0). 250 MOVL m_g0(BX), SI 251 CMPL g(CX), SI 252 JNE 2(PC) 253 MOVL 0, AX 254 255 // Cannot grow signal stack (m->gsignal). 256 MOVL m_gsignal(BX), SI 257 CMPL g(CX), SI 258 JNE 2(PC) 259 MOVL 0, AX 260 261 // Called from f. 262 // Set m->morebuf to f's caller. 263 MOVL 8(SP), AX // f's caller's PC 264 MOVL AX, (m_morebuf+gobuf_pc)(BX) 265 LEAL 16(SP), AX // f's caller's SP 266 MOVL AX, (m_morebuf+gobuf_sp)(BX) 267 get_tls(CX) 268 MOVL g(CX), SI 269 MOVL SI, (m_morebuf+gobuf_g)(BX) 270 271 // Set g->sched to context in f. 272 MOVL 0(SP), AX // f's PC 273 MOVL AX, (g_sched+gobuf_pc)(SI) 274 MOVL SI, (g_sched+gobuf_g)(SI) 275 LEAL 8(SP), AX // f's SP 276 MOVL AX, (g_sched+gobuf_sp)(SI) 277 MOVL DX, (g_sched+gobuf_ctxt)(SI) 278 279 // Call newstack on m->g0's stack. 280 MOVL m_g0(BX), BX 281 MOVL BX, g(CX) 282 MOVL (g_sched+gobuf_sp)(BX), SP 283 CALL runtime·newstack(SB) 284 MOVL $0, 0x1003 // crash if newstack returns 285 RET 286 287 // morestack trampolines 288 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 289 MOVL $0, DX 290 JMP runtime·morestack(SB) 291 292 // reflectcall: call a function with the given argument list 293 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 294 // we don't have variable-sized frames, so we use a small number 295 // of constant-sized-frame functions to encode a few bits of size in the pc. 296 // Caution: ugly multiline assembly macros in your future! 297 298 #define DISPATCH(NAME,MAXSIZE) \ 299 CMPL CX, $MAXSIZE; \ 300 JA 3(PC); \ 301 MOVL $NAME(SB), AX; \ 302 JMP AX 303 // Note: can't just "JMP NAME(SB)" - bad inlining results. 304 305 TEXT reflect·call(SB), NOSPLIT, $0-0 306 JMP ·reflectcall(SB) 307 308 TEXT ·reflectcall(SB), NOSPLIT, $0-20 309 MOVLQZX argsize+12(FP), CX 310 DISPATCH(runtime·call16, 16) 311 DISPATCH(runtime·call32, 32) 312 DISPATCH(runtime·call64, 64) 313 DISPATCH(runtime·call128, 128) 314 DISPATCH(runtime·call256, 256) 315 DISPATCH(runtime·call512, 512) 316 DISPATCH(runtime·call1024, 1024) 317 DISPATCH(runtime·call2048, 2048) 318 DISPATCH(runtime·call4096, 4096) 319 DISPATCH(runtime·call8192, 8192) 320 DISPATCH(runtime·call16384, 16384) 321 DISPATCH(runtime·call32768, 32768) 322 DISPATCH(runtime·call65536, 65536) 323 DISPATCH(runtime·call131072, 131072) 324 DISPATCH(runtime·call262144, 262144) 325 DISPATCH(runtime·call524288, 524288) 326 DISPATCH(runtime·call1048576, 1048576) 327 DISPATCH(runtime·call2097152, 2097152) 328 DISPATCH(runtime·call4194304, 4194304) 329 DISPATCH(runtime·call8388608, 8388608) 330 DISPATCH(runtime·call16777216, 16777216) 331 DISPATCH(runtime·call33554432, 33554432) 332 DISPATCH(runtime·call67108864, 67108864) 333 DISPATCH(runtime·call134217728, 134217728) 334 DISPATCH(runtime·call268435456, 268435456) 335 DISPATCH(runtime·call536870912, 536870912) 336 DISPATCH(runtime·call1073741824, 1073741824) 337 MOVL $runtime·badreflectcall(SB), AX 338 JMP AX 339 340 #define CALLFN(NAME,MAXSIZE) \ 341 TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ 342 NO_LOCAL_POINTERS; \ 343 /* copy arguments to stack */ \ 344 MOVL argptr+8(FP), SI; \ 345 MOVL argsize+12(FP), CX; \ 346 MOVL SP, DI; \ 347 REP;MOVSB; \ 348 /* call function */ \ 349 MOVL f+4(FP), DX; \ 350 MOVL (DX), AX; \ 351 CALL AX; \ 352 /* copy return values back */ \ 353 MOVL argptr+8(FP), DI; \ 354 MOVL argsize+12(FP), CX; \ 355 MOVL retoffset+16(FP), BX; \ 356 MOVL SP, SI; \ 357 ADDL BX, DI; \ 358 ADDL BX, SI; \ 359 SUBL BX, CX; \ 360 REP;MOVSB; \ 361 /* execute write barrier updates */ \ 362 MOVL argtype+0(FP), DX; \ 363 MOVL argptr+8(FP), DI; \ 364 MOVL argsize+12(FP), CX; \ 365 MOVL retoffset+16(FP), BX; \ 366 MOVL DX, 0(SP); \ 367 MOVL DI, 4(SP); \ 368 MOVL CX, 8(SP); \ 369 MOVL BX, 12(SP); \ 370 CALL runtime·callwritebarrier(SB); \ 371 RET 372 373 CALLFN(·call16, 16) 374 CALLFN(·call32, 32) 375 CALLFN(·call64, 64) 376 CALLFN(·call128, 128) 377 CALLFN(·call256, 256) 378 CALLFN(·call512, 512) 379 CALLFN(·call1024, 1024) 380 CALLFN(·call2048, 2048) 381 CALLFN(·call4096, 4096) 382 CALLFN(·call8192, 8192) 383 CALLFN(·call16384, 16384) 384 CALLFN(·call32768, 32768) 385 CALLFN(·call65536, 65536) 386 CALLFN(·call131072, 131072) 387 CALLFN(·call262144, 262144) 388 CALLFN(·call524288, 524288) 389 CALLFN(·call1048576, 1048576) 390 CALLFN(·call2097152, 2097152) 391 CALLFN(·call4194304, 4194304) 392 CALLFN(·call8388608, 8388608) 393 CALLFN(·call16777216, 16777216) 394 CALLFN(·call33554432, 33554432) 395 CALLFN(·call67108864, 67108864) 396 CALLFN(·call134217728, 134217728) 397 CALLFN(·call268435456, 268435456) 398 CALLFN(·call536870912, 536870912) 399 CALLFN(·call1073741824, 1073741824) 400 401 // bool cas(int32 *val, int32 old, int32 new) 402 // Atomically: 403 // if(*val == old){ 404 // *val = new; 405 // return 1; 406 // } else 407 // return 0; 408 TEXT runtime·cas(SB), NOSPLIT, $0-17 409 MOVL ptr+0(FP), BX 410 MOVL old+4(FP), AX 411 MOVL new+8(FP), CX 412 LOCK 413 CMPXCHGL CX, 0(BX) 414 SETEQ ret+16(FP) 415 RET 416 417 TEXT runtime·casuintptr(SB), NOSPLIT, $0-17 418 JMP runtime·cas(SB) 419 420 TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-12 421 JMP runtime·atomicload(SB) 422 423 TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-12 424 JMP runtime·atomicload(SB) 425 426 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-12 427 JMP runtime·atomicstore(SB) 428 429 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) 430 // Atomically: 431 // if(*val == *old){ 432 // *val = new; 433 // return 1; 434 // } else { 435 // return 0; 436 // } 437 TEXT runtime·cas64(SB), NOSPLIT, $0-25 438 MOVL ptr+0(FP), BX 439 MOVQ old+8(FP), AX 440 MOVQ new+16(FP), CX 441 LOCK 442 CMPXCHGQ CX, 0(BX) 443 SETEQ ret+24(FP) 444 RET 445 446 // bool casp(void **val, void *old, void *new) 447 // Atomically: 448 // if(*val == old){ 449 // *val = new; 450 // return 1; 451 // } else 452 // return 0; 453 TEXT runtime·casp1(SB), NOSPLIT, $0-17 454 MOVL ptr+0(FP), BX 455 MOVL old+4(FP), AX 456 MOVL new+8(FP), CX 457 LOCK 458 CMPXCHGL CX, 0(BX) 459 SETEQ ret+16(FP) 460 RET 461 462 // uint32 xadd(uint32 volatile *val, int32 delta) 463 // Atomically: 464 // *val += delta; 465 // return *val; 466 TEXT runtime·xadd(SB), NOSPLIT, $0-12 467 MOVL ptr+0(FP), BX 468 MOVL delta+4(FP), AX 469 MOVL AX, CX 470 LOCK 471 XADDL AX, 0(BX) 472 ADDL CX, AX 473 MOVL AX, ret+8(FP) 474 RET 475 476 TEXT runtime·xadd64(SB), NOSPLIT, $0-24 477 MOVL ptr+0(FP), BX 478 MOVQ delta+8(FP), AX 479 MOVQ AX, CX 480 LOCK 481 XADDQ AX, 0(BX) 482 ADDQ CX, AX 483 MOVQ AX, ret+16(FP) 484 RET 485 486 TEXT runtime·xchg(SB), NOSPLIT, $0-12 487 MOVL ptr+0(FP), BX 488 MOVL new+4(FP), AX 489 XCHGL AX, 0(BX) 490 MOVL AX, ret+8(FP) 491 RET 492 493 TEXT runtime·xchg64(SB), NOSPLIT, $0-24 494 MOVL ptr+0(FP), BX 495 MOVQ new+8(FP), AX 496 XCHGQ AX, 0(BX) 497 MOVQ AX, ret+16(FP) 498 RET 499 500 TEXT runtime·xchgp1(SB), NOSPLIT, $0-12 501 MOVL ptr+0(FP), BX 502 MOVL new+4(FP), AX 503 XCHGL AX, 0(BX) 504 MOVL AX, ret+8(FP) 505 RET 506 507 TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12 508 JMP runtime·xchg(SB) 509 510 TEXT runtime·procyield(SB),NOSPLIT,$0-0 511 MOVL cycles+0(FP), AX 512 again: 513 PAUSE 514 SUBL $1, AX 515 JNZ again 516 RET 517 518 TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8 519 MOVL ptr+0(FP), BX 520 MOVL val+4(FP), AX 521 XCHGL AX, 0(BX) 522 RET 523 524 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8 525 MOVL ptr+0(FP), BX 526 MOVL val+4(FP), AX 527 XCHGL AX, 0(BX) 528 RET 529 530 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16 531 MOVL ptr+0(FP), BX 532 MOVQ val+8(FP), AX 533 XCHGQ AX, 0(BX) 534 RET 535 536 // void runtime·atomicor8(byte volatile*, byte); 537 TEXT runtime·atomicor8(SB), NOSPLIT, $0-5 538 MOVL ptr+0(FP), BX 539 MOVB val+4(FP), AX 540 LOCK 541 ORB AX, 0(BX) 542 RET 543 544 // void jmpdefer(fn, sp); 545 // called from deferreturn. 546 // 1. pop the caller 547 // 2. sub 5 bytes from the callers return 548 // 3. jmp to the argument 549 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8 550 MOVL fv+0(FP), DX 551 MOVL argp+4(FP), BX 552 LEAL -8(BX), SP // caller sp after CALL 553 SUBL $5, (SP) // return to CALL again 554 MOVL 0(DX), BX 555 JMP BX // but first run the deferred function 556 557 // asmcgocall(void(*fn)(void*), void *arg) 558 // Not implemented. 559 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-8 560 MOVL 0, AX 561 RET 562 563 // asmcgocall(void(*fn)(void*), void *arg) 564 // Not implemented. 565 TEXT runtime·asmcgocall_errno(SB),NOSPLIT,$0-12 566 MOVL 0, AX 567 RET 568 569 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) 570 // Not implemented. 571 TEXT runtime·cgocallback(SB),NOSPLIT,$0-12 572 MOVL 0, AX 573 RET 574 575 // void setg(G*); set g. for use by needm. 576 // Not implemented. 577 TEXT runtime·setg(SB), NOSPLIT, $0-4 578 MOVL 0, AX 579 RET 580 581 // check that SP is in range [g->stack.lo, g->stack.hi) 582 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 583 get_tls(CX) 584 MOVL g(CX), AX 585 CMPL (g_stack+stack_hi)(AX), SP 586 JHI 2(PC) 587 MOVL 0, AX 588 CMPL SP, (g_stack+stack_lo)(AX) 589 JHI 2(PC) 590 MOVL 0, AX 591 RET 592 593 TEXT runtime·memclr(SB),NOSPLIT,$0-8 594 MOVL ptr+0(FP), DI 595 MOVL n+4(FP), CX 596 MOVQ CX, BX 597 ANDQ $7, BX 598 SHRQ $3, CX 599 MOVQ $0, AX 600 CLD 601 REP 602 STOSQ 603 MOVQ BX, CX 604 REP 605 STOSB 606 RET 607 608 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-12 609 MOVL argp+0(FP),AX // addr of first arg 610 MOVL -8(AX),AX // get calling pc 611 MOVL AX, ret+8(FP) 612 RET 613 614 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-12 615 MOVL p+0(FP),AX // addr of first arg 616 MOVL -8(AX),AX // get calling pc 617 MOVL AX, ret+8(FP) 618 RET 619 620 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8 621 MOVL argp+0(FP),AX // addr of first arg 622 MOVL pc+4(FP), BX // pc to set 623 MOVQ BX, -8(AX) // set calling pc 624 RET 625 626 TEXT runtime·getcallersp(SB),NOSPLIT,$0-12 627 MOVL argp+0(FP), AX 628 MOVL AX, ret+8(FP) 629 RET 630 631 // func gogetcallersp(p unsafe.Pointer) uintptr 632 TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-12 633 MOVL p+0(FP),AX // addr of first arg 634 MOVL AX, ret+8(FP) 635 RET 636 637 // int64 runtime·cputicks(void) 638 TEXT runtime·cputicks(SB),NOSPLIT,$0-0 639 RDTSC 640 SHLQ $32, DX 641 ADDQ DX, AX 642 MOVQ AX, ret+0(FP) 643 RET 644 645 // memhash_varlen(p unsafe.Pointer, h seed) uintptr 646 // redirects to memhash(p, h, size) using the size 647 // stored in the closure. 648 TEXT runtime·memhash_varlen(SB),NOSPLIT,$20-12 649 GO_ARGS 650 NO_LOCAL_POINTERS 651 MOVL p+0(FP), AX 652 MOVL h+4(FP), BX 653 MOVL 4(DX), CX 654 MOVL AX, 0(SP) 655 MOVL BX, 4(SP) 656 MOVL CX, 8(SP) 657 CALL runtime·memhash(SB) 658 MOVL 16(SP), AX 659 MOVL AX, ret+8(FP) 660 RET 661 662 // hash function using AES hardware instructions 663 // For now, our one amd64p32 system (NaCl) does not 664 // support using AES instructions, so have not bothered to 665 // write the implementations. Can copy and adjust the ones 666 // in asm_amd64.s when the time comes. 667 668 TEXT runtime·aeshash(SB),NOSPLIT,$0-20 669 MOVL AX, ret+16(FP) 670 RET 671 672 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20 673 MOVL AX, ret+16(FP) 674 RET 675 676 TEXT runtime·aeshash32(SB),NOSPLIT,$0-20 677 MOVL AX, ret+16(FP) 678 RET 679 680 TEXT runtime·aeshash64(SB),NOSPLIT,$0-20 681 MOVL AX, ret+16(FP) 682 RET 683 684 TEXT runtime·memeq(SB),NOSPLIT,$0-17 685 MOVL a+0(FP), SI 686 MOVL b+4(FP), DI 687 MOVL size+8(FP), BX 688 CALL runtime·memeqbody(SB) 689 MOVB AX, ret+16(FP) 690 RET 691 692 // memequal_varlen(a, b unsafe.Pointer) bool 693 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 694 MOVL a+0(FP), SI 695 MOVL b+4(FP), DI 696 CMPL SI, DI 697 JEQ eq 698 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 699 CALL runtime·memeqbody(SB) 700 MOVB AX, ret+8(FP) 701 RET 702 eq: 703 MOVB $1, ret+8(FP) 704 RET 705 706 // eqstring tests whether two strings are equal. 707 // See runtime_test.go:eqstring_generic for 708 // equivalent Go code. 709 TEXT runtime·eqstring(SB),NOSPLIT,$0-17 710 MOVL s1len+4(FP), AX 711 MOVL s2len+12(FP), BX 712 CMPL AX, BX 713 JNE different 714 MOVL s1str+0(FP), SI 715 MOVL s2str+8(FP), DI 716 CMPL SI, DI 717 JEQ same 718 CALL runtime·memeqbody(SB) 719 MOVB AX, v+16(FP) 720 RET 721 same: 722 MOVB $1, v+16(FP) 723 RET 724 different: 725 MOVB $0, v+16(FP) 726 RET 727 728 // a in SI 729 // b in DI 730 // count in BX 731 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 732 XORQ AX, AX 733 734 CMPQ BX, $8 735 JB small 736 737 // 64 bytes at a time using xmm registers 738 hugeloop: 739 CMPQ BX, $64 740 JB bigloop 741 MOVOU (SI), X0 742 MOVOU (DI), X1 743 MOVOU 16(SI), X2 744 MOVOU 16(DI), X3 745 MOVOU 32(SI), X4 746 MOVOU 32(DI), X5 747 MOVOU 48(SI), X6 748 MOVOU 48(DI), X7 749 PCMPEQB X1, X0 750 PCMPEQB X3, X2 751 PCMPEQB X5, X4 752 PCMPEQB X7, X6 753 PAND X2, X0 754 PAND X6, X4 755 PAND X4, X0 756 PMOVMSKB X0, DX 757 ADDQ $64, SI 758 ADDQ $64, DI 759 SUBQ $64, BX 760 CMPL DX, $0xffff 761 JEQ hugeloop 762 RET 763 764 // 8 bytes at a time using 64-bit register 765 bigloop: 766 CMPQ BX, $8 767 JBE leftover 768 MOVQ (SI), CX 769 MOVQ (DI), DX 770 ADDQ $8, SI 771 ADDQ $8, DI 772 SUBQ $8, BX 773 CMPQ CX, DX 774 JEQ bigloop 775 RET 776 777 // remaining 0-8 bytes 778 leftover: 779 ADDQ BX, SI 780 ADDQ BX, DI 781 MOVQ -8(SI), CX 782 MOVQ -8(DI), DX 783 CMPQ CX, DX 784 SETEQ AX 785 RET 786 787 small: 788 CMPQ BX, $0 789 JEQ equal 790 791 LEAQ 0(BX*8), CX 792 NEGQ CX 793 794 CMPB SI, $0xf8 795 JA si_high 796 797 // load at SI won't cross a page boundary. 798 MOVQ (SI), SI 799 JMP si_finish 800 si_high: 801 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 802 MOVQ BX, DX 803 ADDQ SI, DX 804 MOVQ -8(DX), SI 805 SHRQ CX, SI 806 si_finish: 807 808 // same for DI. 809 CMPB DI, $0xf8 810 JA di_high 811 MOVQ (DI), DI 812 JMP di_finish 813 di_high: 814 MOVQ BX, DX 815 ADDQ DI, DX 816 MOVQ -8(DX), DI 817 SHRQ CX, DI 818 di_finish: 819 820 SUBQ SI, DI 821 SHLQ CX, DI 822 equal: 823 SETEQ AX 824 RET 825 826 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 827 MOVL s1_base+0(FP), SI 828 MOVL s1_len+4(FP), BX 829 MOVL s2_base+8(FP), DI 830 MOVL s2_len+12(FP), DX 831 CALL runtime·cmpbody(SB) 832 MOVL AX, ret+16(FP) 833 RET 834 835 TEXT bytes·Compare(SB),NOSPLIT,$0-28 836 MOVL s1+0(FP), SI 837 MOVL s1+4(FP), BX 838 MOVL s2+12(FP), DI 839 MOVL s2+16(FP), DX 840 CALL runtime·cmpbody(SB) 841 MOVQ AX, res+24(FP) 842 RET 843 844 // input: 845 // SI = a 846 // DI = b 847 // BX = alen 848 // DX = blen 849 // output: 850 // AX = 1/0/-1 851 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 852 CMPQ SI, DI 853 JEQ allsame 854 CMPQ BX, DX 855 MOVQ DX, R8 856 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare 857 CMPQ R8, $8 858 JB small 859 860 loop: 861 CMPQ R8, $16 862 JBE _0through16 863 MOVOU (SI), X0 864 MOVOU (DI), X1 865 PCMPEQB X0, X1 866 PMOVMSKB X1, AX 867 XORQ $0xffff, AX // convert EQ to NE 868 JNE diff16 // branch if at least one byte is not equal 869 ADDQ $16, SI 870 ADDQ $16, DI 871 SUBQ $16, R8 872 JMP loop 873 874 // AX = bit mask of differences 875 diff16: 876 BSFQ AX, BX // index of first byte that differs 877 XORQ AX, AX 878 ADDQ BX, SI 879 MOVB (SI), CX 880 ADDQ BX, DI 881 CMPB CX, (DI) 882 SETHI AX 883 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 884 RET 885 886 // 0 through 16 bytes left, alen>=8, blen>=8 887 _0through16: 888 CMPQ R8, $8 889 JBE _0through8 890 MOVQ (SI), AX 891 MOVQ (DI), CX 892 CMPQ AX, CX 893 JNE diff8 894 _0through8: 895 ADDQ R8, SI 896 ADDQ R8, DI 897 MOVQ -8(SI), AX 898 MOVQ -8(DI), CX 899 CMPQ AX, CX 900 JEQ allsame 901 902 // AX and CX contain parts of a and b that differ. 903 diff8: 904 BSWAPQ AX // reverse order of bytes 905 BSWAPQ CX 906 XORQ AX, CX 907 BSRQ CX, CX // index of highest bit difference 908 SHRQ CX, AX // move a's bit to bottom 909 ANDQ $1, AX // mask bit 910 LEAQ -1(AX*2), AX // 1/0 => +1/-1 911 RET 912 913 // 0-7 bytes in common 914 small: 915 LEAQ (R8*8), CX // bytes left -> bits left 916 NEGQ CX // - bits lift (== 64 - bits left mod 64) 917 JEQ allsame 918 919 // load bytes of a into high bytes of AX 920 CMPB SI, $0xf8 921 JA si_high 922 MOVQ (SI), SI 923 JMP si_finish 924 si_high: 925 ADDQ R8, SI 926 MOVQ -8(SI), SI 927 SHRQ CX, SI 928 si_finish: 929 SHLQ CX, SI 930 931 // load bytes of b in to high bytes of BX 932 CMPB DI, $0xf8 933 JA di_high 934 MOVQ (DI), DI 935 JMP di_finish 936 di_high: 937 ADDQ R8, DI 938 MOVQ -8(DI), DI 939 SHRQ CX, DI 940 di_finish: 941 SHLQ CX, DI 942 943 BSWAPQ SI // reverse order of bytes 944 BSWAPQ DI 945 XORQ SI, DI // find bit differences 946 JEQ allsame 947 BSRQ DI, CX // index of highest bit difference 948 SHRQ CX, SI // move a's bit to bottom 949 ANDQ $1, SI // mask bit 950 LEAQ -1(SI*2), AX // 1/0 => +1/-1 951 RET 952 953 allsame: 954 XORQ AX, AX 955 XORQ CX, CX 956 CMPQ BX, DX 957 SETGT AX // 1 if alen > blen 958 SETEQ CX // 1 if alen == blen 959 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result 960 RET 961 962 TEXT bytes·IndexByte(SB),NOSPLIT,$0 963 MOVL s+0(FP), SI 964 MOVL s_len+4(FP), BX 965 MOVB c+12(FP), AL 966 CALL runtime·indexbytebody(SB) 967 MOVL AX, ret+16(FP) 968 RET 969 970 TEXT strings·IndexByte(SB),NOSPLIT,$0 971 MOVL s+0(FP), SI 972 MOVL s_len+4(FP), BX 973 MOVB c+8(FP), AL 974 CALL runtime·indexbytebody(SB) 975 MOVL AX, ret+16(FP) 976 RET 977 978 // input: 979 // SI: data 980 // BX: data len 981 // AL: byte sought 982 // output: 983 // AX 984 TEXT runtime·indexbytebody(SB),NOSPLIT,$0 985 MOVL SI, DI 986 987 CMPL BX, $16 988 JLT small 989 990 // round up to first 16-byte boundary 991 TESTL $15, SI 992 JZ aligned 993 MOVL SI, CX 994 ANDL $~15, CX 995 ADDL $16, CX 996 997 // search the beginning 998 SUBL SI, CX 999 REPN; SCASB 1000 JZ success 1001 1002 // DI is 16-byte aligned; get ready to search using SSE instructions 1003 aligned: 1004 // round down to last 16-byte boundary 1005 MOVL BX, R11 1006 ADDL SI, R11 1007 ANDL $~15, R11 1008 1009 // shuffle X0 around so that each byte contains c 1010 MOVD AX, X0 1011 PUNPCKLBW X0, X0 1012 PUNPCKLBW X0, X0 1013 PSHUFL $0, X0, X0 1014 JMP condition 1015 1016 sse: 1017 // move the next 16-byte chunk of the buffer into X1 1018 MOVO (DI), X1 1019 // compare bytes in X0 to X1 1020 PCMPEQB X0, X1 1021 // take the top bit of each byte in X1 and put the result in DX 1022 PMOVMSKB X1, DX 1023 TESTL DX, DX 1024 JNZ ssesuccess 1025 ADDL $16, DI 1026 1027 condition: 1028 CMPL DI, R11 1029 JLT sse 1030 1031 // search the end 1032 MOVL SI, CX 1033 ADDL BX, CX 1034 SUBL R11, CX 1035 // if CX == 0, the zero flag will be set and we'll end up 1036 // returning a false success 1037 JZ failure 1038 REPN; SCASB 1039 JZ success 1040 1041 failure: 1042 MOVL $-1, AX 1043 RET 1044 1045 // handle for lengths < 16 1046 small: 1047 MOVL BX, CX 1048 REPN; SCASB 1049 JZ success 1050 MOVL $-1, AX 1051 RET 1052 1053 // we've found the chunk containing the byte 1054 // now just figure out which specific byte it is 1055 ssesuccess: 1056 // get the index of the least significant set bit 1057 BSFW DX, DX 1058 SUBL SI, DI 1059 ADDL DI, DX 1060 MOVL DX, AX 1061 RET 1062 1063 success: 1064 SUBL SI, DI 1065 SUBL $1, DI 1066 MOVL DI, AX 1067 RET 1068 1069 TEXT bytes·Equal(SB),NOSPLIT,$0-25 1070 MOVL a_len+4(FP), BX 1071 MOVL b_len+16(FP), CX 1072 XORL AX, AX 1073 CMPL BX, CX 1074 JNE eqret 1075 MOVL a+0(FP), SI 1076 MOVL b+12(FP), DI 1077 CALL runtime·memeqbody(SB) 1078 eqret: 1079 MOVB AX, ret+24(FP) 1080 RET 1081 1082 TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 1083 get_tls(CX) 1084 MOVL g(CX), AX 1085 MOVL g_m(AX), AX 1086 MOVL m_fastrand(AX), DX 1087 ADDL DX, DX 1088 MOVL DX, BX 1089 XORL $0x88888eef, DX 1090 CMOVLMI BX, DX 1091 MOVL DX, m_fastrand(AX) 1092 MOVL DX, ret+0(FP) 1093 RET 1094 1095 TEXT runtime·return0(SB), NOSPLIT, $0 1096 MOVL $0, AX 1097 RET 1098 1099 // The top-most function running on a goroutine 1100 // returns to goexit+PCQuantum. 1101 TEXT runtime·goexit(SB),NOSPLIT,$0-0 1102 BYTE $0x90 // NOP 1103 CALL runtime·goexit1(SB) // does not return 1104 1105 TEXT runtime·getg(SB),NOSPLIT,$0-4 1106 get_tls(CX) 1107 MOVL g(CX), AX 1108 MOVL AX, ret+0(FP) 1109 RET 1110 1111 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 1112 MOVL addr+0(FP), AX 1113 PREFETCHT0 (AX) 1114 RET 1115 1116 TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4 1117 MOVL addr+0(FP), AX 1118 PREFETCHT1 (AX) 1119 RET 1120 1121 1122 TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4 1123 MOVL addr+0(FP), AX 1124 PREFETCHT2 (AX) 1125 RET 1126 1127 TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4 1128 MOVL addr+0(FP), AX 1129 PREFETCHNTA (AX) 1130 RET