golang.org/toolchain@v0.0.1-go1.9rc2.windows-amd64/src/runtime/asm_amd64p32.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	MOVL	SP, CX
    15  	SUBL	$128, CX		// plenty of scratch
    16  	ANDL	$~15, CX
    17  	MOVL	CX, SP
    18  
    19  	MOVL	AX, 16(SP)
    20  	MOVL	BX, 24(SP)
    21  	
    22  	// create istack out of the given (operating system) stack.
    23  	MOVL	$runtime·g0(SB), DI
    24  	LEAL	(-64*1024+104)(SP), BX
    25  	MOVL	BX, g_stackguard0(DI)
    26  	MOVL	BX, g_stackguard1(DI)
    27  	MOVL	BX, (g_stack+stack_lo)(DI)
    28  	MOVL	SP, (g_stack+stack_hi)(DI)
    29  
    30  	// find out information about the processor we're on
    31  	MOVL	$0, AX
    32  	CPUID
    33  	CMPL	AX, $0
    34  	JE	nocpuinfo
    35  
    36  	CMPL	BX, $0x756E6547  // "Genu"
    37  	JNE	notintel
    38  	CMPL	DX, $0x49656E69  // "ineI"
    39  	JNE	notintel
    40  	CMPL	CX, $0x6C65746E  // "ntel"
    41  	JNE	notintel
    42  	MOVB	$1, runtime·isIntel(SB)
    43  notintel:
    44  
    45  	// Load EAX=1 cpuid flags
    46  	MOVL	$1, AX
    47  	CPUID
    48  	MOVL	AX, runtime·processorVersionInfo(SB)
    49  
    50  	TESTL	$(1<<26), DX // SSE2
    51  	SETNE	runtime·support_sse2(SB)
    52  
    53  	TESTL	$(1<<9), CX // SSSE3
    54  	SETNE	runtime·support_ssse3(SB)
    55  
    56  	TESTL	$(1<<19), CX // SSE4.1
    57  	SETNE	runtime·support_sse41(SB)
    58  
    59  	TESTL	$(1<<20), CX // SSE4.2
    60  	SETNE	runtime·support_sse42(SB)
    61  
    62  	TESTL	$(1<<23), CX // POPCNT
    63  	SETNE	runtime·support_popcnt(SB)
    64  
    65  	TESTL	$(1<<25), CX // AES
    66  	SETNE	runtime·support_aes(SB)
    67  
    68  	TESTL	$(1<<27), CX // OSXSAVE
    69  	SETNE	runtime·support_osxsave(SB)
    70  
    71  	// If OS support for XMM and YMM is not present
    72  	// support_avx will be set back to false later.
    73  	TESTL	$(1<<28), CX // AVX
    74  	SETNE	runtime·support_avx(SB)
    75  
    76  eax7:
    77  	// Load EAX=7/ECX=0 cpuid flags
    78  	CMPL	SI, $7
    79  	JLT	osavx
    80  	MOVL	$7, AX
    81  	MOVL	$0, CX
    82  	CPUID
    83  
    84  	TESTL	$(1<<3), BX // BMI1
    85  	SETNE	runtime·support_bmi1(SB)
    86  
    87  	// If OS support for XMM and YMM is not present
    88  	// support_avx2 will be set back to false later.
    89  	TESTL	$(1<<5), BX
    90  	SETNE	runtime·support_avx2(SB)
    91  
    92  	TESTL	$(1<<8), BX // BMI2
    93  	SETNE	runtime·support_bmi2(SB)
    94  
    95  	TESTL	$(1<<9), BX // ERMS
    96  	SETNE	runtime·support_erms(SB)
    97  
    98  osavx:
    99  	// nacl does not support XGETBV to test
   100  	// for XMM and YMM OS support.
   101  #ifndef GOOS_nacl
   102  	CMPB	runtime·support_osxsave(SB), $1
   103  	JNE	noavx
   104  	MOVL	$0, CX
   105  	// For XGETBV, OSXSAVE bit is required and sufficient
   106  	XGETBV
   107  	ANDL	$6, AX
   108  	CMPL	AX, $6 // Check for OS support of XMM and YMM registers.
   109  	JE nocpuinfo
   110  #endif
   111  noavx:
   112  	MOVB $0, runtime·support_avx(SB)
   113  	MOVB $0, runtime·support_avx2(SB)
   114  
   115  nocpuinfo:
   116  
   117  needtls:
   118  	LEAL	runtime·m0+m_tls(SB), DI
   119  	CALL	runtime·settls(SB)
   120  
   121  	// store through it, to make sure it works
   122  	get_tls(BX)
   123  	MOVQ	$0x123, g(BX)
   124  	MOVQ	runtime·m0+m_tls(SB), AX
   125  	CMPQ	AX, $0x123
   126  	JEQ 2(PC)
   127  	MOVL	AX, 0	// abort
   128  ok:
   129  	// set the per-goroutine and per-mach "registers"
   130  	get_tls(BX)
   131  	LEAL	runtime·g0(SB), CX
   132  	MOVL	CX, g(BX)
   133  	LEAL	runtime·m0(SB), AX
   134  
   135  	// save m->g0 = g0
   136  	MOVL	CX, m_g0(AX)
   137  	// save m0 to g0->m
   138  	MOVL	AX, g_m(CX)
   139  
   140  	CLD				// convention is D is always left cleared
   141  	CALL	runtime·check(SB)
   142  
   143  	MOVL	16(SP), AX		// copy argc
   144  	MOVL	AX, 0(SP)
   145  	MOVL	24(SP), AX		// copy argv
   146  	MOVL	AX, 4(SP)
   147  	CALL	runtime·args(SB)
   148  	CALL	runtime·osinit(SB)
   149  	CALL	runtime·schedinit(SB)
   150  
   151  	// create a new goroutine to start program
   152  	MOVL	$runtime·mainPC(SB), AX	// entry
   153  	MOVL	$0, 0(SP)
   154  	MOVL	AX, 4(SP)
   155  	CALL	runtime·newproc(SB)
   156  
   157  	// start this M
   158  	CALL	runtime·mstart(SB)
   159  
   160  	MOVL	$0xf1, 0xf1  // crash
   161  	RET
   162  
   163  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   164  GLOBL	runtime·mainPC(SB),RODATA,$4
   165  
   166  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   167  	INT $3
   168  	RET
   169  
   170  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   171  	// No per-thread init.
   172  	RET
   173  
   174  /*
   175   *  go-routine
   176   */
   177  
   178  // void gosave(Gobuf*)
   179  // save state in Gobuf; setjmp
   180  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   181  	MOVL	buf+0(FP), AX	// gobuf
   182  	LEAL	buf+0(FP), BX	// caller's SP
   183  	MOVL	BX, gobuf_sp(AX)
   184  	MOVL	0(SP), BX		// caller's PC
   185  	MOVL	BX, gobuf_pc(AX)
   186  	MOVQ	$0, gobuf_ret(AX)
   187  	// Assert ctxt is zero. See func save.
   188  	MOVL	gobuf_ctxt(AX), BX
   189  	TESTL	BX, BX
   190  	JZ	2(PC)
   191  	CALL	runtime·badctxt(SB)
   192  	get_tls(CX)
   193  	MOVL	g(CX), BX
   194  	MOVL	BX, gobuf_g(AX)
   195  	RET
   196  
   197  // void gogo(Gobuf*)
   198  // restore state from Gobuf; longjmp
   199  TEXT runtime·gogo(SB), NOSPLIT, $8-4
   200  	MOVL	buf+0(FP), BX		// gobuf
   201  
   202  	// If ctxt is not nil, invoke deletion barrier before overwriting.
   203  	MOVL	gobuf_ctxt(BX), DX
   204  	TESTL	DX, DX
   205  	JZ	nilctxt
   206  	LEAL	gobuf_ctxt(BX), AX
   207  	MOVL	AX, 0(SP)
   208  	MOVL	$0, 4(SP)
   209  	CALL	runtime·writebarrierptr_prewrite(SB)
   210  	MOVL	buf+0(FP), BX
   211  
   212  nilctxt:
   213  	MOVL	gobuf_g(BX), DX
   214  	MOVL	0(DX), CX		// make sure g != nil
   215  	get_tls(CX)
   216  	MOVL	DX, g(CX)
   217  	MOVL	gobuf_sp(BX), SP	// restore SP
   218  	MOVL	gobuf_ctxt(BX), DX
   219  	MOVQ	gobuf_ret(BX), AX
   220  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   221  	MOVQ	$0, gobuf_ret(BX)
   222  	MOVL	$0, gobuf_ctxt(BX)
   223  	MOVL	gobuf_pc(BX), BX
   224  	JMP	BX
   225  
   226  // func mcall(fn func(*g))
   227  // Switch to m->g0's stack, call fn(g).
   228  // Fn must never return. It should gogo(&g->sched)
   229  // to keep running g.
   230  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   231  	MOVL	fn+0(FP), DI
   232  	
   233  	get_tls(CX)
   234  	MOVL	g(CX), AX	// save state in g->sched
   235  	MOVL	0(SP), BX	// caller's PC
   236  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   237  	LEAL	fn+0(FP), BX	// caller's SP
   238  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   239  	MOVL	AX, (g_sched+gobuf_g)(AX)
   240  
   241  	// switch to m->g0 & its stack, call fn
   242  	MOVL	g(CX), BX
   243  	MOVL	g_m(BX), BX
   244  	MOVL	m_g0(BX), SI
   245  	CMPL	SI, AX	// if g == m->g0 call badmcall
   246  	JNE	3(PC)
   247  	MOVL	$runtime·badmcall(SB), AX
   248  	JMP	AX
   249  	MOVL	SI, g(CX)	// g = m->g0
   250  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   251  	PUSHQ	AX
   252  	MOVL	DI, DX
   253  	MOVL	0(DI), DI
   254  	CALL	DI
   255  	POPQ	AX
   256  	MOVL	$runtime·badmcall2(SB), AX
   257  	JMP	AX
   258  	RET
   259  
   260  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   261  // of the G stack. We need to distinguish the routine that
   262  // lives at the bottom of the G stack from the one that lives
   263  // at the top of the system stack because the one at the top of
   264  // the system stack terminates the stack walk (see topofstack()).
   265  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   266  	RET
   267  
   268  // func systemstack(fn func())
   269  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   270  	MOVL	fn+0(FP), DI	// DI = fn
   271  	get_tls(CX)
   272  	MOVL	g(CX), AX	// AX = g
   273  	MOVL	g_m(AX), BX	// BX = m
   274  
   275  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   276  	CMPL	AX, DX
   277  	JEQ	noswitch
   278  
   279  	MOVL	m_g0(BX), DX	// DX = g0
   280  	CMPL	AX, DX
   281  	JEQ	noswitch
   282  
   283  	MOVL	m_curg(BX), R8
   284  	CMPL	AX, R8
   285  	JEQ	switch
   286  	
   287  	// Not g0, not curg. Must be gsignal, but that's not allowed.
   288  	// Hide call from linker nosplit analysis.
   289  	MOVL	$runtime·badsystemstack(SB), AX
   290  	CALL	AX
   291  
   292  switch:
   293  	// save our state in g->sched. Pretend to
   294  	// be systemstack_switch if the G stack is scanned.
   295  	MOVL	$runtime·systemstack_switch(SB), SI
   296  	MOVL	SI, (g_sched+gobuf_pc)(AX)
   297  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   298  	MOVL	AX, (g_sched+gobuf_g)(AX)
   299  
   300  	// switch to g0
   301  	MOVL	DX, g(CX)
   302  	MOVL	(g_sched+gobuf_sp)(DX), SP
   303  
   304  	// call target function
   305  	MOVL	DI, DX
   306  	MOVL	0(DI), DI
   307  	CALL	DI
   308  
   309  	// switch back to g
   310  	get_tls(CX)
   311  	MOVL	g(CX), AX
   312  	MOVL	g_m(AX), BX
   313  	MOVL	m_curg(BX), AX
   314  	MOVL	AX, g(CX)
   315  	MOVL	(g_sched+gobuf_sp)(AX), SP
   316  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   317  	RET
   318  
   319  noswitch:
   320  	// already on m stack, just call directly
   321  	MOVL	DI, DX
   322  	MOVL	0(DI), DI
   323  	CALL	DI
   324  	RET
   325  
   326  /*
   327   * support for morestack
   328   */
   329  
   330  // Called during function prolog when more stack is needed.
   331  //
   332  // The traceback routines see morestack on a g0 as being
   333  // the top of a stack (for example, morestack calling newstack
   334  // calling the scheduler calling newm calling gc), so we must
   335  // record an argument size. For that purpose, it has no arguments.
   336  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   337  	get_tls(CX)
   338  	MOVL	g(CX), BX
   339  	MOVL	g_m(BX), BX
   340  
   341  	// Cannot grow scheduler stack (m->g0).
   342  	MOVL	m_g0(BX), SI
   343  	CMPL	g(CX), SI
   344  	JNE	3(PC)
   345  	CALL	runtime·badmorestackg0(SB)
   346  	MOVL	0, AX
   347  
   348  	// Cannot grow signal stack (m->gsignal).
   349  	MOVL	m_gsignal(BX), SI
   350  	CMPL	g(CX), SI
   351  	JNE	3(PC)
   352  	CALL	runtime·badmorestackgsignal(SB)
   353  	MOVL	0, AX
   354  
   355  	// Called from f.
   356  	// Set m->morebuf to f's caller.
   357  	MOVL	8(SP), AX	// f's caller's PC
   358  	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   359  	LEAL	16(SP), AX	// f's caller's SP
   360  	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   361  	get_tls(CX)
   362  	MOVL	g(CX), SI
   363  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   364  
   365  	// Set g->sched to context in f.
   366  	MOVL	0(SP), AX // f's PC
   367  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   368  	MOVL	SI, (g_sched+gobuf_g)(SI)
   369  	LEAL	8(SP), AX // f's SP
   370  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   371  	// newstack will fill gobuf.ctxt.
   372  
   373  	// Call newstack on m->g0's stack.
   374  	MOVL	m_g0(BX), BX
   375  	MOVL	BX, g(CX)
   376  	MOVL	(g_sched+gobuf_sp)(BX), SP
   377  	PUSHQ	DX	// ctxt argument
   378  	CALL	runtime·newstack(SB)
   379  	MOVL	$0, 0x1003	// crash if newstack returns
   380  	POPQ	DX	// keep balance check happy
   381  	RET
   382  
   383  // morestack trampolines
   384  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   385  	MOVL	$0, DX
   386  	JMP	runtime·morestack(SB)
   387  
   388  // reflectcall: call a function with the given argument list
   389  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   390  // we don't have variable-sized frames, so we use a small number
   391  // of constant-sized-frame functions to encode a few bits of size in the pc.
   392  // Caution: ugly multiline assembly macros in your future!
   393  
   394  #define DISPATCH(NAME,MAXSIZE)		\
   395  	CMPL	CX, $MAXSIZE;		\
   396  	JA	3(PC);			\
   397  	MOVL	$NAME(SB), AX;		\
   398  	JMP	AX
   399  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   400  
   401  TEXT reflect·call(SB), NOSPLIT, $0-0
   402  	JMP	·reflectcall(SB)
   403  
   404  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   405  	MOVLQZX argsize+12(FP), CX
   406  	DISPATCH(runtime·call16, 16)
   407  	DISPATCH(runtime·call32, 32)
   408  	DISPATCH(runtime·call64, 64)
   409  	DISPATCH(runtime·call128, 128)
   410  	DISPATCH(runtime·call256, 256)
   411  	DISPATCH(runtime·call512, 512)
   412  	DISPATCH(runtime·call1024, 1024)
   413  	DISPATCH(runtime·call2048, 2048)
   414  	DISPATCH(runtime·call4096, 4096)
   415  	DISPATCH(runtime·call8192, 8192)
   416  	DISPATCH(runtime·call16384, 16384)
   417  	DISPATCH(runtime·call32768, 32768)
   418  	DISPATCH(runtime·call65536, 65536)
   419  	DISPATCH(runtime·call131072, 131072)
   420  	DISPATCH(runtime·call262144, 262144)
   421  	DISPATCH(runtime·call524288, 524288)
   422  	DISPATCH(runtime·call1048576, 1048576)
   423  	DISPATCH(runtime·call2097152, 2097152)
   424  	DISPATCH(runtime·call4194304, 4194304)
   425  	DISPATCH(runtime·call8388608, 8388608)
   426  	DISPATCH(runtime·call16777216, 16777216)
   427  	DISPATCH(runtime·call33554432, 33554432)
   428  	DISPATCH(runtime·call67108864, 67108864)
   429  	DISPATCH(runtime·call134217728, 134217728)
   430  	DISPATCH(runtime·call268435456, 268435456)
   431  	DISPATCH(runtime·call536870912, 536870912)
   432  	DISPATCH(runtime·call1073741824, 1073741824)
   433  	MOVL	$runtime·badreflectcall(SB), AX
   434  	JMP	AX
   435  
   436  #define CALLFN(NAME,MAXSIZE)			\
   437  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   438  	NO_LOCAL_POINTERS;			\
   439  	/* copy arguments to stack */		\
   440  	MOVL	argptr+8(FP), SI;		\
   441  	MOVL	argsize+12(FP), CX;		\
   442  	MOVL	SP, DI;				\
   443  	REP;MOVSB;				\
   444  	/* call function */			\
   445  	MOVL	f+4(FP), DX;			\
   446  	MOVL	(DX), AX;			\
   447  	CALL	AX;				\
   448  	/* copy return values back */		\
   449  	MOVL	argtype+0(FP), DX;		\
   450  	MOVL	argptr+8(FP), DI;		\
   451  	MOVL	argsize+12(FP), CX;		\
   452  	MOVL	retoffset+16(FP), BX;		\
   453  	MOVL	SP, SI;				\
   454  	ADDL	BX, DI;				\
   455  	ADDL	BX, SI;				\
   456  	SUBL	BX, CX;				\
   457  	CALL	callRet<>(SB);			\
   458  	RET
   459  
   460  // callRet copies return values back at the end of call*. This is a
   461  // separate function so it can allocate stack space for the arguments
   462  // to reflectcallmove. It does not follow the Go ABI; it expects its
   463  // arguments in registers.
   464  TEXT callRet<>(SB), NOSPLIT, $16-0
   465  	MOVL	DX, 0(SP)
   466  	MOVL	DI, 4(SP)
   467  	MOVL	SI, 8(SP)
   468  	MOVL	CX, 12(SP)
   469  	CALL	runtime·reflectcallmove(SB)
   470  	RET
   471  
   472  CALLFN(·call16, 16)
   473  CALLFN(·call32, 32)
   474  CALLFN(·call64, 64)
   475  CALLFN(·call128, 128)
   476  CALLFN(·call256, 256)
   477  CALLFN(·call512, 512)
   478  CALLFN(·call1024, 1024)
   479  CALLFN(·call2048, 2048)
   480  CALLFN(·call4096, 4096)
   481  CALLFN(·call8192, 8192)
   482  CALLFN(·call16384, 16384)
   483  CALLFN(·call32768, 32768)
   484  CALLFN(·call65536, 65536)
   485  CALLFN(·call131072, 131072)
   486  CALLFN(·call262144, 262144)
   487  CALLFN(·call524288, 524288)
   488  CALLFN(·call1048576, 1048576)
   489  CALLFN(·call2097152, 2097152)
   490  CALLFN(·call4194304, 4194304)
   491  CALLFN(·call8388608, 8388608)
   492  CALLFN(·call16777216, 16777216)
   493  CALLFN(·call33554432, 33554432)
   494  CALLFN(·call67108864, 67108864)
   495  CALLFN(·call134217728, 134217728)
   496  CALLFN(·call268435456, 268435456)
   497  CALLFN(·call536870912, 536870912)
   498  CALLFN(·call1073741824, 1073741824)
   499  
   500  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   501  	MOVL	cycles+0(FP), AX
   502  again:
   503  	PAUSE
   504  	SUBL	$1, AX
   505  	JNZ	again
   506  	RET
   507  
   508  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   509  	// Stores are already ordered on x86, so this is just a
   510  	// compile barrier.
   511  	RET
   512  
   513  // void jmpdefer(fn, sp);
   514  // called from deferreturn.
   515  // 1. pop the caller
   516  // 2. sub 5 bytes from the callers return
   517  // 3. jmp to the argument
   518  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   519  	MOVL	fv+0(FP), DX
   520  	MOVL	argp+4(FP), BX
   521  	LEAL	-8(BX), SP	// caller sp after CALL
   522  	SUBL	$5, (SP)	// return to CALL again
   523  	MOVL	0(DX), BX
   524  	JMP	BX	// but first run the deferred function
   525  
   526  // func asmcgocall(fn, arg unsafe.Pointer) int32
   527  // Not implemented.
   528  TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12
   529  	MOVL	0, AX
   530  	RET
   531  
   532  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   533  // Not implemented.
   534  TEXT runtime·cgocallback(SB),NOSPLIT,$0-16
   535  	MOVL	0, AX
   536  	RET
   537  
   538  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   539  // Not implemented.
   540  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-16
   541  	MOVL	0, AX
   542  	RET
   543  
   544  // void setg(G*); set g. for use by needm.
   545  // Not implemented.
   546  TEXT runtime·setg(SB), NOSPLIT, $0-4
   547  	MOVL	0, AX
   548  	RET
   549  
   550  // check that SP is in range [g->stack.lo, g->stack.hi)
   551  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   552  	get_tls(CX)
   553  	MOVL	g(CX), AX
   554  	CMPL	(g_stack+stack_hi)(AX), SP
   555  	JHI	2(PC)
   556  	MOVL	0, AX
   557  	CMPL	SP, (g_stack+stack_lo)(AX)
   558  	JHI	2(PC)
   559  	MOVL	0, AX
   560  	RET
   561  
   562  TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8
   563  	MOVL	ptr+0(FP), DI
   564  	MOVL	n+4(FP), CX
   565  	MOVQ	CX, BX
   566  	ANDQ	$3, BX
   567  	SHRQ	$2, CX
   568  	MOVQ	$0, AX
   569  	CLD
   570  	REP
   571  	STOSL
   572  	MOVQ	BX, CX
   573  	REP
   574  	STOSB
   575  	// Note: we zero only 4 bytes at a time so that the tail is at most
   576  	// 3 bytes. That guarantees that we aren't zeroing pointers with STOSB.
   577  	// See issue 13160.
   578  	RET
   579  
   580  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12
   581  	MOVL	argp+0(FP),AX		// addr of first arg
   582  	MOVL	-8(AX),AX		// get calling pc
   583  	MOVL	AX, ret+8(FP)
   584  	RET
   585  
   586  // int64 runtime·cputicks(void)
   587  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   588  	RDTSC
   589  	SHLQ	$32, DX
   590  	ADDQ	DX, AX
   591  	MOVQ	AX, ret+0(FP)
   592  	RET
   593  
   594  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   595  // redirects to memhash(p, h, size) using the size
   596  // stored in the closure.
   597  TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12
   598  	GO_ARGS
   599  	NO_LOCAL_POINTERS
   600  	MOVL	p+0(FP), AX
   601  	MOVL	h+4(FP), BX
   602  	MOVL	4(DX), CX
   603  	MOVL	AX, 0(SP)
   604  	MOVL	BX, 4(SP)
   605  	MOVL	CX, 8(SP)
   606  	CALL	runtime·memhash(SB)
   607  	MOVL	16(SP), AX
   608  	MOVL	AX, ret+8(FP)
   609  	RET
   610  
   611  // hash function using AES hardware instructions
   612  // For now, our one amd64p32 system (NaCl) does not
   613  // support using AES instructions, so have not bothered to
   614  // write the implementations. Can copy and adjust the ones
   615  // in asm_amd64.s when the time comes.
   616  
   617  TEXT runtime·aeshash(SB),NOSPLIT,$0-20
   618  	MOVL	AX, ret+16(FP)
   619  	RET
   620  
   621  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   622  	MOVL	AX, ret+8(FP)
   623  	RET
   624  
   625  TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
   626  	MOVL	AX, ret+8(FP)
   627  	RET
   628  
   629  TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
   630  	MOVL	AX, ret+8(FP)
   631  	RET
   632  
   633  // memequal(p, q unsafe.Pointer, size uintptr) bool
   634  TEXT runtime·memequal(SB),NOSPLIT,$0-17
   635  	MOVL	a+0(FP), SI
   636  	MOVL	b+4(FP), DI
   637  	CMPL	SI, DI
   638  	JEQ	eq
   639  	MOVL	size+8(FP), BX
   640  	CALL	runtime·memeqbody(SB)
   641  	MOVB	AX, ret+16(FP)
   642  	RET
   643  eq:
   644  	MOVB    $1, ret+16(FP)
   645  	RET
   646  
   647  // memequal_varlen(a, b unsafe.Pointer) bool
   648  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
   649  	MOVL    a+0(FP), SI
   650  	MOVL    b+4(FP), DI
   651  	CMPL    SI, DI
   652  	JEQ     eq
   653  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
   654  	CALL    runtime·memeqbody(SB)
   655  	MOVB    AX, ret+8(FP)
   656  	RET
   657  eq:
   658  	MOVB    $1, ret+8(FP)
   659  	RET
   660  
   661  // eqstring tests whether two strings are equal.
   662  // The compiler guarantees that strings passed
   663  // to eqstring have equal length.
   664  // See runtime_test.go:eqstring_generic for
   665  // equivalent Go code.
   666  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
   667  	MOVL	s1_base+0(FP), SI
   668  	MOVL	s2_base+8(FP), DI
   669  	CMPL	SI, DI
   670  	JEQ	same
   671  	MOVL	s1_len+4(FP), BX
   672  	CALL	runtime·memeqbody(SB)
   673  	MOVB	AX, ret+16(FP)
   674  	RET
   675  same:
   676  	MOVB	$1, ret+16(FP)
   677  	RET
   678  
   679  // a in SI
   680  // b in DI
   681  // count in BX
   682  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
   683  	XORQ	AX, AX
   684  
   685  	CMPQ	BX, $8
   686  	JB	small
   687  	
   688  	// 64 bytes at a time using xmm registers
   689  hugeloop:
   690  	CMPQ	BX, $64
   691  	JB	bigloop
   692  	MOVOU	(SI), X0
   693  	MOVOU	(DI), X1
   694  	MOVOU	16(SI), X2
   695  	MOVOU	16(DI), X3
   696  	MOVOU	32(SI), X4
   697  	MOVOU	32(DI), X5
   698  	MOVOU	48(SI), X6
   699  	MOVOU	48(DI), X7
   700  	PCMPEQB	X1, X0
   701  	PCMPEQB	X3, X2
   702  	PCMPEQB	X5, X4
   703  	PCMPEQB	X7, X6
   704  	PAND	X2, X0
   705  	PAND	X6, X4
   706  	PAND	X4, X0
   707  	PMOVMSKB X0, DX
   708  	ADDQ	$64, SI
   709  	ADDQ	$64, DI
   710  	SUBQ	$64, BX
   711  	CMPL	DX, $0xffff
   712  	JEQ	hugeloop
   713  	RET
   714  
   715  	// 8 bytes at a time using 64-bit register
   716  bigloop:
   717  	CMPQ	BX, $8
   718  	JBE	leftover
   719  	MOVQ	(SI), CX
   720  	MOVQ	(DI), DX
   721  	ADDQ	$8, SI
   722  	ADDQ	$8, DI
   723  	SUBQ	$8, BX
   724  	CMPQ	CX, DX
   725  	JEQ	bigloop
   726  	RET
   727  
   728  	// remaining 0-8 bytes
   729  leftover:
   730  	ADDQ	BX, SI
   731  	ADDQ	BX, DI
   732  	MOVQ	-8(SI), CX
   733  	MOVQ	-8(DI), DX
   734  	CMPQ	CX, DX
   735  	SETEQ	AX
   736  	RET
   737  
   738  small:
   739  	CMPQ	BX, $0
   740  	JEQ	equal
   741  
   742  	LEAQ	0(BX*8), CX
   743  	NEGQ	CX
   744  
   745  	CMPB	SI, $0xf8
   746  	JA	si_high
   747  
   748  	// load at SI won't cross a page boundary.
   749  	MOVQ	(SI), SI
   750  	JMP	si_finish
   751  si_high:
   752  	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   753  	MOVQ	BX, DX
   754  	ADDQ	SI, DX
   755  	MOVQ	-8(DX), SI
   756  	SHRQ	CX, SI
   757  si_finish:
   758  
   759  	// same for DI.
   760  	CMPB	DI, $0xf8
   761  	JA	di_high
   762  	MOVQ	(DI), DI
   763  	JMP	di_finish
   764  di_high:
   765  	MOVQ	BX, DX
   766  	ADDQ	DI, DX
   767  	MOVQ	-8(DX), DI
   768  	SHRQ	CX, DI
   769  di_finish:
   770  
   771  	SUBQ	SI, DI
   772  	SHLQ	CX, DI
   773  equal:
   774  	SETEQ	AX
   775  	RET
   776  
   777  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
   778  	MOVL	s1_base+0(FP), SI
   779  	MOVL	s1_len+4(FP), BX
   780  	MOVL	s2_base+8(FP), DI
   781  	MOVL	s2_len+12(FP), DX
   782  	CALL	runtime·cmpbody(SB)
   783  	MOVL	AX, ret+16(FP)
   784  	RET
   785  
   786  TEXT bytes·Compare(SB),NOSPLIT,$0-28
   787  	MOVL	s1+0(FP), SI
   788  	MOVL	s1+4(FP), BX
   789  	MOVL	s2+12(FP), DI
   790  	MOVL	s2+16(FP), DX
   791  	CALL	runtime·cmpbody(SB)
   792  	MOVL	AX, res+24(FP)
   793  	RET
   794  
   795  // input:
   796  //   SI = a
   797  //   DI = b
   798  //   BX = alen
   799  //   DX = blen
   800  // output:
   801  //   AX = 1/0/-1
   802  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
   803  	CMPQ	SI, DI
   804  	JEQ	allsame
   805  	CMPQ	BX, DX
   806  	MOVQ	DX, R8
   807  	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
   808  	CMPQ	R8, $8
   809  	JB	small
   810  
   811  loop:
   812  	CMPQ	R8, $16
   813  	JBE	_0through16
   814  	MOVOU	(SI), X0
   815  	MOVOU	(DI), X1
   816  	PCMPEQB X0, X1
   817  	PMOVMSKB X1, AX
   818  	XORQ	$0xffff, AX	// convert EQ to NE
   819  	JNE	diff16	// branch if at least one byte is not equal
   820  	ADDQ	$16, SI
   821  	ADDQ	$16, DI
   822  	SUBQ	$16, R8
   823  	JMP	loop
   824  	
   825  	// AX = bit mask of differences
   826  diff16:
   827  	BSFQ	AX, BX	// index of first byte that differs
   828  	XORQ	AX, AX
   829  	ADDQ	BX, SI
   830  	MOVB	(SI), CX
   831  	ADDQ	BX, DI
   832  	CMPB	CX, (DI)
   833  	SETHI	AX
   834  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
   835  	RET
   836  
   837  	// 0 through 16 bytes left, alen>=8, blen>=8
   838  _0through16:
   839  	CMPQ	R8, $8
   840  	JBE	_0through8
   841  	MOVQ	(SI), AX
   842  	MOVQ	(DI), CX
   843  	CMPQ	AX, CX
   844  	JNE	diff8
   845  _0through8:
   846  	ADDQ	R8, SI
   847  	ADDQ	R8, DI
   848  	MOVQ	-8(SI), AX
   849  	MOVQ	-8(DI), CX
   850  	CMPQ	AX, CX
   851  	JEQ	allsame
   852  
   853  	// AX and CX contain parts of a and b that differ.
   854  diff8:
   855  	BSWAPQ	AX	// reverse order of bytes
   856  	BSWAPQ	CX
   857  	XORQ	AX, CX
   858  	BSRQ	CX, CX	// index of highest bit difference
   859  	SHRQ	CX, AX	// move a's bit to bottom
   860  	ANDQ	$1, AX	// mask bit
   861  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   862  	RET
   863  
   864  	// 0-7 bytes in common
   865  small:
   866  	LEAQ	(R8*8), CX	// bytes left -> bits left
   867  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   868  	JEQ	allsame
   869  
   870  	// load bytes of a into high bytes of AX
   871  	CMPB	SI, $0xf8
   872  	JA	si_high
   873  	MOVQ	(SI), SI
   874  	JMP	si_finish
   875  si_high:
   876  	ADDQ	R8, SI
   877  	MOVQ	-8(SI), SI
   878  	SHRQ	CX, SI
   879  si_finish:
   880  	SHLQ	CX, SI
   881  
   882  	// load bytes of b in to high bytes of BX
   883  	CMPB	DI, $0xf8
   884  	JA	di_high
   885  	MOVQ	(DI), DI
   886  	JMP	di_finish
   887  di_high:
   888  	ADDQ	R8, DI
   889  	MOVQ	-8(DI), DI
   890  	SHRQ	CX, DI
   891  di_finish:
   892  	SHLQ	CX, DI
   893  
   894  	BSWAPQ	SI	// reverse order of bytes
   895  	BSWAPQ	DI
   896  	XORQ	SI, DI	// find bit differences
   897  	JEQ	allsame
   898  	BSRQ	DI, CX	// index of highest bit difference
   899  	SHRQ	CX, SI	// move a's bit to bottom
   900  	ANDQ	$1, SI	// mask bit
   901  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   902  	RET
   903  
   904  allsame:
   905  	XORQ	AX, AX
   906  	XORQ	CX, CX
   907  	CMPQ	BX, DX
   908  	SETGT	AX	// 1 if alen > blen
   909  	SETEQ	CX	// 1 if alen == blen
   910  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   911  	RET
   912  
   913  TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
   914  	MOVL s+0(FP), SI
   915  	MOVL s_len+4(FP), BX
   916  	MOVB c+12(FP), AL
   917  	CALL runtime·indexbytebody(SB)
   918  	MOVL AX, ret+16(FP)
   919  	RET
   920  
   921  TEXT strings·IndexByte(SB),NOSPLIT,$0-20
   922  	MOVL s+0(FP), SI
   923  	MOVL s_len+4(FP), BX
   924  	MOVB c+8(FP), AL
   925  	CALL runtime·indexbytebody(SB)
   926  	MOVL AX, ret+16(FP)
   927  	RET
   928  
   929  // input:
   930  //   SI: data
   931  //   BX: data len
   932  //   AL: byte sought
   933  // output:
   934  //   AX
   935  TEXT runtime·indexbytebody(SB),NOSPLIT,$0
   936  	MOVL SI, DI
   937  
   938  	CMPL BX, $16
   939  	JLT small
   940  
   941  	// round up to first 16-byte boundary
   942  	TESTL $15, SI
   943  	JZ aligned
   944  	MOVL SI, CX
   945  	ANDL $~15, CX
   946  	ADDL $16, CX
   947  
   948  	// search the beginning
   949  	SUBL SI, CX
   950  	REPN; SCASB
   951  	JZ success
   952  
   953  // DI is 16-byte aligned; get ready to search using SSE instructions
   954  aligned:
   955  	// round down to last 16-byte boundary
   956  	MOVL BX, R11
   957  	ADDL SI, R11
   958  	ANDL $~15, R11
   959  
   960  	// shuffle X0 around so that each byte contains c
   961  	MOVD AX, X0
   962  	PUNPCKLBW X0, X0
   963  	PUNPCKLBW X0, X0
   964  	PSHUFL $0, X0, X0
   965  	JMP condition
   966  
   967  sse:
   968  	// move the next 16-byte chunk of the buffer into X1
   969  	MOVO (DI), X1
   970  	// compare bytes in X0 to X1
   971  	PCMPEQB X0, X1
   972  	// take the top bit of each byte in X1 and put the result in DX
   973  	PMOVMSKB X1, DX
   974  	TESTL DX, DX
   975  	JNZ ssesuccess
   976  	ADDL $16, DI
   977  
   978  condition:
   979  	CMPL DI, R11
   980  	JLT sse
   981  
   982  	// search the end
   983  	MOVL SI, CX
   984  	ADDL BX, CX
   985  	SUBL R11, CX
   986  	// if CX == 0, the zero flag will be set and we'll end up
   987  	// returning a false success
   988  	JZ failure
   989  	REPN; SCASB
   990  	JZ success
   991  
   992  failure:
   993  	MOVL $-1, AX
   994  	RET
   995  
   996  // handle for lengths < 16
   997  small:
   998  	MOVL BX, CX
   999  	REPN; SCASB
  1000  	JZ success
  1001  	MOVL $-1, AX
  1002  	RET
  1003  
  1004  // we've found the chunk containing the byte
  1005  // now just figure out which specific byte it is
  1006  ssesuccess:
  1007  	// get the index of the least significant set bit
  1008  	BSFW DX, DX
  1009  	SUBL SI, DI
  1010  	ADDL DI, DX
  1011  	MOVL DX, AX
  1012  	RET
  1013  
  1014  success:
  1015  	SUBL SI, DI
  1016  	SUBL $1, DI
  1017  	MOVL DI, AX
  1018  	RET
  1019  
  1020  TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1021  	MOVL	a_len+4(FP), BX
  1022  	MOVL	b_len+16(FP), CX
  1023  	XORL	AX, AX
  1024  	CMPL	BX, CX
  1025  	JNE	eqret
  1026  	MOVL	a+0(FP), SI
  1027  	MOVL	b+12(FP), DI
  1028  	CALL	runtime·memeqbody(SB)
  1029  eqret:
  1030  	MOVB	AX, ret+24(FP)
  1031  	RET
  1032  
  1033  TEXT runtime·return0(SB), NOSPLIT, $0
  1034  	MOVL	$0, AX
  1035  	RET
  1036  
  1037  // The top-most function running on a goroutine
  1038  // returns to goexit+PCQuantum.
  1039  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1040  	BYTE	$0x90	// NOP
  1041  	CALL	runtime·goexit1(SB)	// does not return
  1042  	// traceback from goexit1 must hit code range of goexit
  1043  	BYTE	$0x90	// NOP
  1044  
  1045  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1046  	MOVL	addr+0(FP), AX
  1047  	PREFETCHT0	(AX)
  1048  	RET
  1049  
  1050  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1051  	MOVL	addr+0(FP), AX
  1052  	PREFETCHT1	(AX)
  1053  	RET
  1054  
  1055  
  1056  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1057  	MOVL	addr+0(FP), AX
  1058  	PREFETCHT2	(AX)
  1059  	RET
  1060  
  1061  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1062  	MOVL	addr+0(FP), AX
  1063  	PREFETCHNTA	(AX)
  1064  	RET
  1065  
  1066  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1067  	MOVB	$1, ret+0(FP)
  1068  	RET