github.com/zxy12/golang151_with_comment@v0.0.0-20190507085033-721809559d3c/runtime/asm_amd64p32.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	MOVL	SP, CX
    15  	SUBL	$128, SP		// plenty of scratch
    16  	ANDL	$~15, CX
    17  	MOVL	CX, SP
    18  
    19  	MOVL	AX, 16(SP)
    20  	MOVL	BX, 24(SP)
    21  	
    22  	// create istack out of the given (operating system) stack.
    23  	MOVL	$runtime·g0(SB), DI
    24  	LEAL	(-64*1024+104)(SP), BX
    25  	MOVL	BX, g_stackguard0(DI)
    26  	MOVL	BX, g_stackguard1(DI)
    27  	MOVL	BX, (g_stack+stack_lo)(DI)
    28  	MOVL	SP, (g_stack+stack_hi)(DI)
    29  
    30  	// find out information about the processor we're on
    31  	MOVQ	$0, AX
    32  	CPUID
    33  	CMPQ	AX, $0
    34  	JE	nocpuinfo
    35  	MOVQ	$1, AX
    36  	CPUID
    37  	MOVL	CX, runtime·cpuid_ecx(SB)
    38  	MOVL	DX, runtime·cpuid_edx(SB)
    39  nocpuinfo:	
    40  	
    41  needtls:
    42  	LEAL	runtime·tls0(SB), DI
    43  	CALL	runtime·settls(SB)
    44  
    45  	// store through it, to make sure it works
    46  	get_tls(BX)
    47  	MOVQ	$0x123, g(BX)
    48  	MOVQ	runtime·tls0(SB), AX
    49  	CMPQ	AX, $0x123
    50  	JEQ 2(PC)
    51  	MOVL	AX, 0	// abort
    52  ok:
    53  	// set the per-goroutine and per-mach "registers"
    54  	get_tls(BX)
    55  	LEAL	runtime·g0(SB), CX
    56  	MOVL	CX, g(BX)
    57  	LEAL	runtime·m0(SB), AX
    58  
    59  	// save m->g0 = g0
    60  	MOVL	CX, m_g0(AX)
    61  	// save m0 to g0->m
    62  	MOVL	AX, g_m(CX)
    63  
    64  	CLD				// convention is D is always left cleared
    65  	CALL	runtime·check(SB)
    66  
    67  	MOVL	16(SP), AX		// copy argc
    68  	MOVL	AX, 0(SP)
    69  	MOVL	24(SP), AX		// copy argv
    70  	MOVL	AX, 4(SP)
    71  	CALL	runtime·args(SB)
    72  	CALL	runtime·osinit(SB)
    73  	CALL	runtime·schedinit(SB)
    74  
    75  	// create a new goroutine to start program
    76  	MOVL	$runtime·mainPC(SB), AX	// entry
    77  	MOVL	$0, 0(SP)
    78  	MOVL	AX, 4(SP)
    79  	CALL	runtime·newproc(SB)
    80  
    81  	// start this M
    82  	CALL	runtime·mstart(SB)
    83  
    84  	MOVL	$0xf1, 0xf1  // crash
    85  	RET
    86  
    87  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
    88  GLOBL	runtime·mainPC(SB),RODATA,$4
    89  
    90  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
    91  	INT $3
    92  	RET
    93  
    94  TEXT runtime·asminit(SB),NOSPLIT,$0-0
    95  	// No per-thread init.
    96  	RET
    97  
    98  /*
    99   *  go-routine
   100   */
   101  
   102  // void gosave(Gobuf*)
   103  // save state in Gobuf; setjmp
   104  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   105  	MOVL	buf+0(FP), AX	// gobuf
   106  	LEAL	buf+0(FP), BX	// caller's SP
   107  	MOVL	BX, gobuf_sp(AX)
   108  	MOVL	0(SP), BX		// caller's PC
   109  	MOVL	BX, gobuf_pc(AX)
   110  	MOVL	$0, gobuf_ctxt(AX)
   111  	MOVQ	$0, gobuf_ret(AX)
   112  	get_tls(CX)
   113  	MOVL	g(CX), BX
   114  	MOVL	BX, gobuf_g(AX)
   115  	RET
   116  
   117  // void gogo(Gobuf*)
   118  // restore state from Gobuf; longjmp
   119  TEXT runtime·gogo(SB), NOSPLIT, $0-4
   120  	MOVL	buf+0(FP), BX		// gobuf
   121  	MOVL	gobuf_g(BX), DX
   122  	MOVL	0(DX), CX		// make sure g != nil
   123  	get_tls(CX)
   124  	MOVL	DX, g(CX)
   125  	MOVL	gobuf_sp(BX), SP	// restore SP
   126  	MOVL	gobuf_ctxt(BX), DX
   127  	MOVQ	gobuf_ret(BX), AX
   128  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   129  	MOVQ	$0, gobuf_ret(BX)
   130  	MOVL	$0, gobuf_ctxt(BX)
   131  	MOVL	gobuf_pc(BX), BX
   132  	JMP	BX
   133  
   134  // func mcall(fn func(*g))
   135  // Switch to m->g0's stack, call fn(g).
   136  // Fn must never return.  It should gogo(&g->sched)
   137  // to keep running g.
   138  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   139  	MOVL	fn+0(FP), DI
   140  	
   141  	get_tls(CX)
   142  	MOVL	g(CX), AX	// save state in g->sched
   143  	MOVL	0(SP), BX	// caller's PC
   144  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   145  	LEAL	fn+0(FP), BX	// caller's SP
   146  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   147  	MOVL	AX, (g_sched+gobuf_g)(AX)
   148  
   149  	// switch to m->g0 & its stack, call fn
   150  	MOVL	g(CX), BX
   151  	MOVL	g_m(BX), BX
   152  	MOVL	m_g0(BX), SI
   153  	CMPL	SI, AX	// if g == m->g0 call badmcall
   154  	JNE	3(PC)
   155  	MOVL	$runtime·badmcall(SB), AX
   156  	JMP	AX
   157  	MOVL	SI, g(CX)	// g = m->g0
   158  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   159  	PUSHQ	AX
   160  	MOVL	DI, DX
   161  	MOVL	0(DI), DI
   162  	CALL	DI
   163  	POPQ	AX
   164  	MOVL	$runtime·badmcall2(SB), AX
   165  	JMP	AX
   166  	RET
   167  
   168  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   169  // of the G stack.  We need to distinguish the routine that
   170  // lives at the bottom of the G stack from the one that lives
   171  // at the top of the system stack because the one at the top of
   172  // the system stack terminates the stack walk (see topofstack()).
   173  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   174  	RET
   175  
   176  // func systemstack(fn func())
   177  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   178  	MOVL	fn+0(FP), DI	// DI = fn
   179  	get_tls(CX)
   180  	MOVL	g(CX), AX	// AX = g
   181  	MOVL	g_m(AX), BX	// BX = m
   182  
   183  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   184  	CMPL	AX, DX
   185  	JEQ	noswitch
   186  
   187  	MOVL	m_g0(BX), DX	// DX = g0
   188  	CMPL	AX, DX
   189  	JEQ	noswitch
   190  
   191  	MOVL	m_curg(BX), R8
   192  	CMPL	AX, R8
   193  	JEQ	switch
   194  	
   195  	// Not g0, not curg. Must be gsignal, but that's not allowed.
   196  	// Hide call from linker nosplit analysis.
   197  	MOVL	$runtime·badsystemstack(SB), AX
   198  	CALL	AX
   199  
   200  switch:
   201  	// save our state in g->sched.  Pretend to
   202  	// be systemstack_switch if the G stack is scanned.
   203  	MOVL	$runtime·systemstack_switch(SB), SI
   204  	MOVL	SI, (g_sched+gobuf_pc)(AX)
   205  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   206  	MOVL	AX, (g_sched+gobuf_g)(AX)
   207  
   208  	// switch to g0
   209  	MOVL	DX, g(CX)
   210  	MOVL	(g_sched+gobuf_sp)(DX), SP
   211  
   212  	// call target function
   213  	MOVL	DI, DX
   214  	MOVL	0(DI), DI
   215  	CALL	DI
   216  
   217  	// switch back to g
   218  	get_tls(CX)
   219  	MOVL	g(CX), AX
   220  	MOVL	g_m(AX), BX
   221  	MOVL	m_curg(BX), AX
   222  	MOVL	AX, g(CX)
   223  	MOVL	(g_sched+gobuf_sp)(AX), SP
   224  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   225  	RET
   226  
   227  noswitch:
   228  	// already on m stack, just call directly
   229  	MOVL	DI, DX
   230  	MOVL	0(DI), DI
   231  	CALL	DI
   232  	RET
   233  
   234  /*
   235   * support for morestack
   236   */
   237  
   238  // Called during function prolog when more stack is needed.
   239  //
   240  // The traceback routines see morestack on a g0 as being
   241  // the top of a stack (for example, morestack calling newstack
   242  // calling the scheduler calling newm calling gc), so we must
   243  // record an argument size. For that purpose, it has no arguments.
   244  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   245  	get_tls(CX)
   246  	MOVL	g(CX), BX
   247  	MOVL	g_m(BX), BX
   248  
   249  	// Cannot grow scheduler stack (m->g0).
   250  	MOVL	m_g0(BX), SI
   251  	CMPL	g(CX), SI
   252  	JNE	2(PC)
   253  	MOVL	0, AX
   254  
   255  	// Cannot grow signal stack (m->gsignal).
   256  	MOVL	m_gsignal(BX), SI
   257  	CMPL	g(CX), SI
   258  	JNE	2(PC)
   259  	MOVL	0, AX
   260  
   261  	// Called from f.
   262  	// Set m->morebuf to f's caller.
   263  	MOVL	8(SP), AX	// f's caller's PC
   264  	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   265  	LEAL	16(SP), AX	// f's caller's SP
   266  	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   267  	get_tls(CX)
   268  	MOVL	g(CX), SI
   269  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   270  
   271  	// Set g->sched to context in f.
   272  	MOVL	0(SP), AX // f's PC
   273  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   274  	MOVL	SI, (g_sched+gobuf_g)(SI)
   275  	LEAL	8(SP), AX // f's SP
   276  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   277  	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   278  
   279  	// Call newstack on m->g0's stack.
   280  	MOVL	m_g0(BX), BX
   281  	MOVL	BX, g(CX)
   282  	MOVL	(g_sched+gobuf_sp)(BX), SP
   283  	CALL	runtime·newstack(SB)
   284  	MOVL	$0, 0x1003	// crash if newstack returns
   285  	RET
   286  
   287  // morestack trampolines
   288  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   289  	MOVL	$0, DX
   290  	JMP	runtime·morestack(SB)
   291  
   292  TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   293  	// We came here via a RET to an overwritten return PC.
   294  	// AX may be live. Other registers are available.
   295  
   296  	// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   297  	get_tls(CX)
   298  	MOVL	g(CX), CX
   299  	MOVL	(g_stkbar+slice_array)(CX), DX
   300  	MOVL	g_stkbarPos(CX), BX
   301  	IMULL	$stkbar__size, BX	// Too big for SIB.
   302  	ADDL	DX, BX
   303  	MOVL	stkbar_savedLRVal(BX), BX
   304  	// Record that this stack barrier was hit.
   305  	ADDL	$1, g_stkbarPos(CX)
   306  	// Jump to the original return PC.
   307  	JMP	BX
   308  
   309  // reflectcall: call a function with the given argument list
   310  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   311  // we don't have variable-sized frames, so we use a small number
   312  // of constant-sized-frame functions to encode a few bits of size in the pc.
   313  // Caution: ugly multiline assembly macros in your future!
   314  
   315  #define DISPATCH(NAME,MAXSIZE)		\
   316  	CMPL	CX, $MAXSIZE;		\
   317  	JA	3(PC);			\
   318  	MOVL	$NAME(SB), AX;		\
   319  	JMP	AX
   320  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   321  
   322  TEXT reflect·call(SB), NOSPLIT, $0-0
   323  	JMP	·reflectcall(SB)
   324  
   325  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   326  	MOVLQZX argsize+12(FP), CX
   327  	DISPATCH(runtime·call16, 16)
   328  	DISPATCH(runtime·call32, 32)
   329  	DISPATCH(runtime·call64, 64)
   330  	DISPATCH(runtime·call128, 128)
   331  	DISPATCH(runtime·call256, 256)
   332  	DISPATCH(runtime·call512, 512)
   333  	DISPATCH(runtime·call1024, 1024)
   334  	DISPATCH(runtime·call2048, 2048)
   335  	DISPATCH(runtime·call4096, 4096)
   336  	DISPATCH(runtime·call8192, 8192)
   337  	DISPATCH(runtime·call16384, 16384)
   338  	DISPATCH(runtime·call32768, 32768)
   339  	DISPATCH(runtime·call65536, 65536)
   340  	DISPATCH(runtime·call131072, 131072)
   341  	DISPATCH(runtime·call262144, 262144)
   342  	DISPATCH(runtime·call524288, 524288)
   343  	DISPATCH(runtime·call1048576, 1048576)
   344  	DISPATCH(runtime·call2097152, 2097152)
   345  	DISPATCH(runtime·call4194304, 4194304)
   346  	DISPATCH(runtime·call8388608, 8388608)
   347  	DISPATCH(runtime·call16777216, 16777216)
   348  	DISPATCH(runtime·call33554432, 33554432)
   349  	DISPATCH(runtime·call67108864, 67108864)
   350  	DISPATCH(runtime·call134217728, 134217728)
   351  	DISPATCH(runtime·call268435456, 268435456)
   352  	DISPATCH(runtime·call536870912, 536870912)
   353  	DISPATCH(runtime·call1073741824, 1073741824)
   354  	MOVL	$runtime·badreflectcall(SB), AX
   355  	JMP	AX
   356  
   357  #define CALLFN(NAME,MAXSIZE)			\
   358  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   359  	NO_LOCAL_POINTERS;			\
   360  	/* copy arguments to stack */		\
   361  	MOVL	argptr+8(FP), SI;		\
   362  	MOVL	argsize+12(FP), CX;		\
   363  	MOVL	SP, DI;				\
   364  	REP;MOVSB;				\
   365  	/* call function */			\
   366  	MOVL	f+4(FP), DX;			\
   367  	MOVL	(DX), AX;			\
   368  	CALL	AX;				\
   369  	/* copy return values back */		\
   370  	MOVL	argptr+8(FP), DI;		\
   371  	MOVL	argsize+12(FP), CX;		\
   372  	MOVL	retoffset+16(FP), BX;		\
   373  	MOVL	SP, SI;				\
   374  	ADDL	BX, DI;				\
   375  	ADDL	BX, SI;				\
   376  	SUBL	BX, CX;				\
   377  	REP;MOVSB;				\
   378  	/* execute write barrier updates */	\
   379  	MOVL	argtype+0(FP), DX;		\
   380  	MOVL	argptr+8(FP), DI;		\
   381  	MOVL	argsize+12(FP), CX;		\
   382  	MOVL	retoffset+16(FP), BX;		\
   383  	MOVL	DX, 0(SP);			\
   384  	MOVL	DI, 4(SP);			\
   385  	MOVL	CX, 8(SP);			\
   386  	MOVL	BX, 12(SP);			\
   387  	CALL	runtime·callwritebarrier(SB);	\
   388  	RET
   389  
   390  CALLFN(·call16, 16)
   391  CALLFN(·call32, 32)
   392  CALLFN(·call64, 64)
   393  CALLFN(·call128, 128)
   394  CALLFN(·call256, 256)
   395  CALLFN(·call512, 512)
   396  CALLFN(·call1024, 1024)
   397  CALLFN(·call2048, 2048)
   398  CALLFN(·call4096, 4096)
   399  CALLFN(·call8192, 8192)
   400  CALLFN(·call16384, 16384)
   401  CALLFN(·call32768, 32768)
   402  CALLFN(·call65536, 65536)
   403  CALLFN(·call131072, 131072)
   404  CALLFN(·call262144, 262144)
   405  CALLFN(·call524288, 524288)
   406  CALLFN(·call1048576, 1048576)
   407  CALLFN(·call2097152, 2097152)
   408  CALLFN(·call4194304, 4194304)
   409  CALLFN(·call8388608, 8388608)
   410  CALLFN(·call16777216, 16777216)
   411  CALLFN(·call33554432, 33554432)
   412  CALLFN(·call67108864, 67108864)
   413  CALLFN(·call134217728, 134217728)
   414  CALLFN(·call268435456, 268435456)
   415  CALLFN(·call536870912, 536870912)
   416  CALLFN(·call1073741824, 1073741824)
   417  
   418  // bool cas(int32 *val, int32 old, int32 new)
   419  // Atomically:
   420  //	if(*val == old){
   421  //		*val = new;
   422  //		return 1;
   423  //	} else
   424  //		return 0;
   425  TEXT runtime·cas(SB), NOSPLIT, $0-17
   426  	MOVL	ptr+0(FP), BX
   427  	MOVL	old+4(FP), AX
   428  	MOVL	new+8(FP), CX
   429  	LOCK
   430  	CMPXCHGL	CX, 0(BX)
   431  	SETEQ	ret+16(FP)
   432  	RET
   433  
   434  TEXT runtime·casuintptr(SB), NOSPLIT, $0-17
   435  	JMP	runtime·cas(SB)
   436  
   437  TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-12
   438  	JMP	runtime·atomicload(SB)
   439  
   440  TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-12
   441  	JMP	runtime·atomicload(SB)
   442  
   443  TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-12
   444  	JMP	runtime·atomicstore(SB)
   445  
   446  // bool	runtime·cas64(uint64 *val, uint64 old, uint64 new)
   447  // Atomically:
   448  //	if(*val == *old){
   449  //		*val = new;
   450  //		return 1;
   451  //	} else {
   452  //		return 0;
   453  //	}
   454  TEXT runtime·cas64(SB), NOSPLIT, $0-25
   455  	MOVL	ptr+0(FP), BX
   456  	MOVQ	old+8(FP), AX
   457  	MOVQ	new+16(FP), CX
   458  	LOCK
   459  	CMPXCHGQ	CX, 0(BX)
   460  	SETEQ	ret+24(FP)
   461  	RET
   462  
   463  // bool casp(void **val, void *old, void *new)
   464  // Atomically:
   465  //	if(*val == old){
   466  //		*val = new;
   467  //		return 1;
   468  //	} else
   469  //		return 0;
   470  TEXT runtime·casp1(SB), NOSPLIT, $0-17
   471  	MOVL	ptr+0(FP), BX
   472  	MOVL	old+4(FP), AX
   473  	MOVL	new+8(FP), CX
   474  	LOCK
   475  	CMPXCHGL	CX, 0(BX)
   476  	SETEQ	ret+16(FP)
   477  	RET
   478  
   479  // uint32 xadd(uint32 volatile *val, int32 delta)
   480  // Atomically:
   481  //	*val += delta;
   482  //	return *val;
   483  TEXT runtime·xadd(SB), NOSPLIT, $0-12
   484  	MOVL	ptr+0(FP), BX
   485  	MOVL	delta+4(FP), AX
   486  	MOVL	AX, CX
   487  	LOCK
   488  	XADDL	AX, 0(BX)
   489  	ADDL	CX, AX
   490  	MOVL	AX, ret+8(FP)
   491  	RET
   492  
   493  TEXT runtime·xadd64(SB), NOSPLIT, $0-24
   494  	MOVL	ptr+0(FP), BX
   495  	MOVQ	delta+8(FP), AX
   496  	MOVQ	AX, CX
   497  	LOCK
   498  	XADDQ	AX, 0(BX)
   499  	ADDQ	CX, AX
   500  	MOVQ	AX, ret+16(FP)
   501  	RET
   502  
   503  TEXT runtime·xadduintptr(SB), NOSPLIT, $0-12
   504  	JMP	runtime·xadd(SB)
   505  
   506  TEXT runtime·xchg(SB), NOSPLIT, $0-12
   507  	MOVL	ptr+0(FP), BX
   508  	MOVL	new+4(FP), AX
   509  	XCHGL	AX, 0(BX)
   510  	MOVL	AX, ret+8(FP)
   511  	RET
   512  
   513  TEXT runtime·xchg64(SB), NOSPLIT, $0-24
   514  	MOVL	ptr+0(FP), BX
   515  	MOVQ	new+8(FP), AX
   516  	XCHGQ	AX, 0(BX)
   517  	MOVQ	AX, ret+16(FP)
   518  	RET
   519  
   520  TEXT runtime·xchgp1(SB), NOSPLIT, $0-12
   521  	MOVL	ptr+0(FP), BX
   522  	MOVL	new+4(FP), AX
   523  	XCHGL	AX, 0(BX)
   524  	MOVL	AX, ret+8(FP)
   525  	RET
   526  
   527  TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
   528  	JMP	runtime·xchg(SB)
   529  
   530  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   531  	MOVL	cycles+0(FP), AX
   532  again:
   533  	PAUSE
   534  	SUBL	$1, AX
   535  	JNZ	again
   536  	RET
   537  
   538  TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8
   539  	MOVL	ptr+0(FP), BX
   540  	MOVL	val+4(FP), AX
   541  	XCHGL	AX, 0(BX)
   542  	RET
   543  
   544  TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
   545  	MOVL	ptr+0(FP), BX
   546  	MOVL	val+4(FP), AX
   547  	XCHGL	AX, 0(BX)
   548  	RET
   549  
   550  TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
   551  	MOVL	ptr+0(FP), BX
   552  	MOVQ	val+8(FP), AX
   553  	XCHGQ	AX, 0(BX)
   554  	RET
   555  
   556  // void	runtime·atomicor8(byte volatile*, byte);
   557  TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
   558  	MOVL	ptr+0(FP), BX
   559  	MOVB	val+4(FP), AX
   560  	LOCK
   561  	ORB	AX, 0(BX)
   562  	RET
   563  
   564  // void	runtime·atomicand8(byte volatile*, byte);
   565  TEXT runtime·atomicand8(SB), NOSPLIT, $0-5
   566  	MOVL	ptr+0(FP), BX
   567  	MOVB	val+4(FP), AX
   568  	LOCK
   569  	ANDB	AX, 0(BX)
   570  	RET
   571  
   572  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   573  	// Stores are already ordered on x86, so this is just a
   574  	// compile barrier.
   575  	RET
   576  
   577  // void jmpdefer(fn, sp);
   578  // called from deferreturn.
   579  // 1. pop the caller
   580  // 2. sub 5 bytes from the callers return
   581  // 3. jmp to the argument
   582  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   583  	MOVL	fv+0(FP), DX
   584  	MOVL	argp+4(FP), BX
   585  	LEAL	-8(BX), SP	// caller sp after CALL
   586  	SUBL	$5, (SP)	// return to CALL again
   587  	MOVL	0(DX), BX
   588  	JMP	BX	// but first run the deferred function
   589  
   590  // func asmcgocall(fn, arg unsafe.Pointer) int32
   591  // Not implemented.
   592  TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12
   593  	MOVL	0, AX
   594  	RET
   595  
   596  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   597  // Not implemented.
   598  TEXT runtime·cgocallback(SB),NOSPLIT,$0-12
   599  	MOVL	0, AX
   600  	RET
   601  
   602  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   603  // Not implemented.
   604  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-12
   605  	MOVL	0, AX
   606  	RET
   607  
   608  // void setg(G*); set g. for use by needm.
   609  // Not implemented.
   610  TEXT runtime·setg(SB), NOSPLIT, $0-4
   611  	MOVL	0, AX
   612  	RET
   613  
   614  // check that SP is in range [g->stack.lo, g->stack.hi)
   615  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   616  	get_tls(CX)
   617  	MOVL	g(CX), AX
   618  	CMPL	(g_stack+stack_hi)(AX), SP
   619  	JHI	2(PC)
   620  	MOVL	0, AX
   621  	CMPL	SP, (g_stack+stack_lo)(AX)
   622  	JHI	2(PC)
   623  	MOVL	0, AX
   624  	RET
   625  
   626  TEXT runtime·memclr(SB),NOSPLIT,$0-8
   627  	MOVL	ptr+0(FP), DI
   628  	MOVL	n+4(FP), CX
   629  	MOVQ	CX, BX
   630  	ANDQ	$7, BX
   631  	SHRQ	$3, CX
   632  	MOVQ	$0, AX
   633  	CLD
   634  	REP
   635  	STOSQ
   636  	MOVQ	BX, CX
   637  	REP
   638  	STOSB
   639  	RET
   640  
   641  TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12
   642  	MOVL	argp+0(FP),AX		// addr of first arg
   643  	MOVL	-8(AX),AX		// get calling pc
   644  	CMPL	AX, runtime·stackBarrierPC(SB)
   645  	JNE	nobar
   646  	// Get original return PC.
   647  	CALL	runtime·nextBarrierPC(SB)
   648  	MOVL	0(SP), AX
   649  nobar:
   650  	MOVL	AX, ret+8(FP)
   651  	RET
   652  
   653  TEXT runtime·setcallerpc(SB),NOSPLIT,$8-8
   654  	MOVL	argp+0(FP),AX		// addr of first arg
   655  	MOVL	pc+4(FP), BX		// pc to set
   656  	MOVL	-8(AX), CX
   657  	CMPL	CX, runtime·stackBarrierPC(SB)
   658  	JEQ	setbar
   659  	MOVQ	BX, -8(AX)		// set calling pc
   660  	RET
   661  setbar:
   662  	// Set the stack barrier return PC.
   663  	MOVL	BX, 0(SP)
   664  	CALL	runtime·setNextBarrierPC(SB)
   665  	RET
   666  
   667  TEXT runtime·getcallersp(SB),NOSPLIT,$0-12
   668  	MOVL	argp+0(FP), AX
   669  	MOVL	AX, ret+8(FP)
   670  	RET
   671  
   672  // int64 runtime·cputicks(void)
   673  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   674  	RDTSC
   675  	SHLQ	$32, DX
   676  	ADDQ	DX, AX
   677  	MOVQ	AX, ret+0(FP)
   678  	RET
   679  
   680  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   681  // redirects to memhash(p, h, size) using the size
   682  // stored in the closure.
   683  TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12
   684  	GO_ARGS
   685  	NO_LOCAL_POINTERS
   686  	MOVL	p+0(FP), AX
   687  	MOVL	h+4(FP), BX
   688  	MOVL	4(DX), CX
   689  	MOVL	AX, 0(SP)
   690  	MOVL	BX, 4(SP)
   691  	MOVL	CX, 8(SP)
   692  	CALL	runtime·memhash(SB)
   693  	MOVL	16(SP), AX
   694  	MOVL	AX, ret+8(FP)
   695  	RET
   696  
   697  // hash function using AES hardware instructions
   698  // For now, our one amd64p32 system (NaCl) does not
   699  // support using AES instructions, so have not bothered to
   700  // write the implementations. Can copy and adjust the ones
   701  // in asm_amd64.s when the time comes.
   702  
   703  TEXT runtime·aeshash(SB),NOSPLIT,$0-20
   704  	MOVL	AX, ret+16(FP)
   705  	RET
   706  
   707  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20
   708  	MOVL	AX, ret+16(FP)
   709  	RET
   710  
   711  TEXT runtime·aeshash32(SB),NOSPLIT,$0-20
   712  	MOVL	AX, ret+16(FP)
   713  	RET
   714  
   715  TEXT runtime·aeshash64(SB),NOSPLIT,$0-20
   716  	MOVL	AX, ret+16(FP)
   717  	RET
   718  
   719  TEXT runtime·memeq(SB),NOSPLIT,$0-17
   720  	MOVL	a+0(FP), SI
   721  	MOVL	b+4(FP), DI
   722  	MOVL	size+8(FP), BX
   723  	CALL	runtime·memeqbody(SB)
   724  	MOVB	AX, ret+16(FP)
   725  	RET
   726  
   727  // memequal_varlen(a, b unsafe.Pointer) bool
   728  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
   729  	MOVL    a+0(FP), SI
   730  	MOVL    b+4(FP), DI
   731  	CMPL    SI, DI
   732  	JEQ     eq
   733  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
   734  	CALL    runtime·memeqbody(SB)
   735  	MOVB    AX, ret+8(FP)
   736  	RET
   737  eq:
   738  	MOVB    $1, ret+8(FP)
   739  	RET
   740  
   741  // eqstring tests whether two strings are equal.
   742  // The compiler guarantees that strings passed
   743  // to eqstring have equal length.
   744  // See runtime_test.go:eqstring_generic for
   745  // equivalent Go code.
   746  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
   747  	MOVL	s1str+0(FP), SI
   748  	MOVL	s2str+8(FP), DI
   749  	CMPL	SI, DI
   750  	JEQ	same
   751  	MOVL	s1len+4(FP), BX
   752  	CALL	runtime·memeqbody(SB)
   753  	MOVB	AX, v+16(FP)
   754  	RET
   755  same:
   756  	MOVB	$1, v+16(FP)
   757  	RET
   758  
   759  // a in SI
   760  // b in DI
   761  // count in BX
   762  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
   763  	XORQ	AX, AX
   764  
   765  	CMPQ	BX, $8
   766  	JB	small
   767  	
   768  	// 64 bytes at a time using xmm registers
   769  hugeloop:
   770  	CMPQ	BX, $64
   771  	JB	bigloop
   772  	MOVOU	(SI), X0
   773  	MOVOU	(DI), X1
   774  	MOVOU	16(SI), X2
   775  	MOVOU	16(DI), X3
   776  	MOVOU	32(SI), X4
   777  	MOVOU	32(DI), X5
   778  	MOVOU	48(SI), X6
   779  	MOVOU	48(DI), X7
   780  	PCMPEQB	X1, X0
   781  	PCMPEQB	X3, X2
   782  	PCMPEQB	X5, X4
   783  	PCMPEQB	X7, X6
   784  	PAND	X2, X0
   785  	PAND	X6, X4
   786  	PAND	X4, X0
   787  	PMOVMSKB X0, DX
   788  	ADDQ	$64, SI
   789  	ADDQ	$64, DI
   790  	SUBQ	$64, BX
   791  	CMPL	DX, $0xffff
   792  	JEQ	hugeloop
   793  	RET
   794  
   795  	// 8 bytes at a time using 64-bit register
   796  bigloop:
   797  	CMPQ	BX, $8
   798  	JBE	leftover
   799  	MOVQ	(SI), CX
   800  	MOVQ	(DI), DX
   801  	ADDQ	$8, SI
   802  	ADDQ	$8, DI
   803  	SUBQ	$8, BX
   804  	CMPQ	CX, DX
   805  	JEQ	bigloop
   806  	RET
   807  
   808  	// remaining 0-8 bytes
   809  leftover:
   810  	ADDQ	BX, SI
   811  	ADDQ	BX, DI
   812  	MOVQ	-8(SI), CX
   813  	MOVQ	-8(DI), DX
   814  	CMPQ	CX, DX
   815  	SETEQ	AX
   816  	RET
   817  
   818  small:
   819  	CMPQ	BX, $0
   820  	JEQ	equal
   821  
   822  	LEAQ	0(BX*8), CX
   823  	NEGQ	CX
   824  
   825  	CMPB	SI, $0xf8
   826  	JA	si_high
   827  
   828  	// load at SI won't cross a page boundary.
   829  	MOVQ	(SI), SI
   830  	JMP	si_finish
   831  si_high:
   832  	// address ends in 11111xxx.  Load up to bytes we want, move to correct position.
   833  	MOVQ	BX, DX
   834  	ADDQ	SI, DX
   835  	MOVQ	-8(DX), SI
   836  	SHRQ	CX, SI
   837  si_finish:
   838  
   839  	// same for DI.
   840  	CMPB	DI, $0xf8
   841  	JA	di_high
   842  	MOVQ	(DI), DI
   843  	JMP	di_finish
   844  di_high:
   845  	MOVQ	BX, DX
   846  	ADDQ	DI, DX
   847  	MOVQ	-8(DX), DI
   848  	SHRQ	CX, DI
   849  di_finish:
   850  
   851  	SUBQ	SI, DI
   852  	SHLQ	CX, DI
   853  equal:
   854  	SETEQ	AX
   855  	RET
   856  
   857  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
   858  	MOVL	s1_base+0(FP), SI
   859  	MOVL	s1_len+4(FP), BX
   860  	MOVL	s2_base+8(FP), DI
   861  	MOVL	s2_len+12(FP), DX
   862  	CALL	runtime·cmpbody(SB)
   863  	MOVL	AX, ret+16(FP)
   864  	RET
   865  
   866  TEXT bytes·Compare(SB),NOSPLIT,$0-28
   867  	MOVL	s1+0(FP), SI
   868  	MOVL	s1+4(FP), BX
   869  	MOVL	s2+12(FP), DI
   870  	MOVL	s2+16(FP), DX
   871  	CALL	runtime·cmpbody(SB)
   872  	MOVL	AX, res+24(FP)
   873  	RET
   874  
   875  // input:
   876  //   SI = a
   877  //   DI = b
   878  //   BX = alen
   879  //   DX = blen
   880  // output:
   881  //   AX = 1/0/-1
   882  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
   883  	CMPQ	SI, DI
   884  	JEQ	allsame
   885  	CMPQ	BX, DX
   886  	MOVQ	DX, R8
   887  	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
   888  	CMPQ	R8, $8
   889  	JB	small
   890  
   891  loop:
   892  	CMPQ	R8, $16
   893  	JBE	_0through16
   894  	MOVOU	(SI), X0
   895  	MOVOU	(DI), X1
   896  	PCMPEQB X0, X1
   897  	PMOVMSKB X1, AX
   898  	XORQ	$0xffff, AX	// convert EQ to NE
   899  	JNE	diff16	// branch if at least one byte is not equal
   900  	ADDQ	$16, SI
   901  	ADDQ	$16, DI
   902  	SUBQ	$16, R8
   903  	JMP	loop
   904  	
   905  	// AX = bit mask of differences
   906  diff16:
   907  	BSFQ	AX, BX	// index of first byte that differs
   908  	XORQ	AX, AX
   909  	ADDQ	BX, SI
   910  	MOVB	(SI), CX
   911  	ADDQ	BX, DI
   912  	CMPB	CX, (DI)
   913  	SETHI	AX
   914  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
   915  	RET
   916  
   917  	// 0 through 16 bytes left, alen>=8, blen>=8
   918  _0through16:
   919  	CMPQ	R8, $8
   920  	JBE	_0through8
   921  	MOVQ	(SI), AX
   922  	MOVQ	(DI), CX
   923  	CMPQ	AX, CX
   924  	JNE	diff8
   925  _0through8:
   926  	ADDQ	R8, SI
   927  	ADDQ	R8, DI
   928  	MOVQ	-8(SI), AX
   929  	MOVQ	-8(DI), CX
   930  	CMPQ	AX, CX
   931  	JEQ	allsame
   932  
   933  	// AX and CX contain parts of a and b that differ.
   934  diff8:
   935  	BSWAPQ	AX	// reverse order of bytes
   936  	BSWAPQ	CX
   937  	XORQ	AX, CX
   938  	BSRQ	CX, CX	// index of highest bit difference
   939  	SHRQ	CX, AX	// move a's bit to bottom
   940  	ANDQ	$1, AX	// mask bit
   941  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   942  	RET
   943  
   944  	// 0-7 bytes in common
   945  small:
   946  	LEAQ	(R8*8), CX	// bytes left -> bits left
   947  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   948  	JEQ	allsame
   949  
   950  	// load bytes of a into high bytes of AX
   951  	CMPB	SI, $0xf8
   952  	JA	si_high
   953  	MOVQ	(SI), SI
   954  	JMP	si_finish
   955  si_high:
   956  	ADDQ	R8, SI
   957  	MOVQ	-8(SI), SI
   958  	SHRQ	CX, SI
   959  si_finish:
   960  	SHLQ	CX, SI
   961  
   962  	// load bytes of b in to high bytes of BX
   963  	CMPB	DI, $0xf8
   964  	JA	di_high
   965  	MOVQ	(DI), DI
   966  	JMP	di_finish
   967  di_high:
   968  	ADDQ	R8, DI
   969  	MOVQ	-8(DI), DI
   970  	SHRQ	CX, DI
   971  di_finish:
   972  	SHLQ	CX, DI
   973  
   974  	BSWAPQ	SI	// reverse order of bytes
   975  	BSWAPQ	DI
   976  	XORQ	SI, DI	// find bit differences
   977  	JEQ	allsame
   978  	BSRQ	DI, CX	// index of highest bit difference
   979  	SHRQ	CX, SI	// move a's bit to bottom
   980  	ANDQ	$1, SI	// mask bit
   981  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   982  	RET
   983  
   984  allsame:
   985  	XORQ	AX, AX
   986  	XORQ	CX, CX
   987  	CMPQ	BX, DX
   988  	SETGT	AX	// 1 if alen > blen
   989  	SETEQ	CX	// 1 if alen == blen
   990  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   991  	RET
   992  
   993  TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
   994  	MOVL s+0(FP), SI
   995  	MOVL s_len+4(FP), BX
   996  	MOVB c+12(FP), AL
   997  	CALL runtime·indexbytebody(SB)
   998  	MOVL AX, ret+16(FP)
   999  	RET
  1000  
  1001  TEXT strings·IndexByte(SB),NOSPLIT,$0-20
  1002  	MOVL s+0(FP), SI
  1003  	MOVL s_len+4(FP), BX
  1004  	MOVB c+8(FP), AL
  1005  	CALL runtime·indexbytebody(SB)
  1006  	MOVL AX, ret+16(FP)
  1007  	RET
  1008  
  1009  // input:
  1010  //   SI: data
  1011  //   BX: data len
  1012  //   AL: byte sought
  1013  // output:
  1014  //   AX
  1015  TEXT runtime·indexbytebody(SB),NOSPLIT,$0
  1016  	MOVL SI, DI
  1017  
  1018  	CMPL BX, $16
  1019  	JLT small
  1020  
  1021  	// round up to first 16-byte boundary
  1022  	TESTL $15, SI
  1023  	JZ aligned
  1024  	MOVL SI, CX
  1025  	ANDL $~15, CX
  1026  	ADDL $16, CX
  1027  
  1028  	// search the beginning
  1029  	SUBL SI, CX
  1030  	REPN; SCASB
  1031  	JZ success
  1032  
  1033  // DI is 16-byte aligned; get ready to search using SSE instructions
  1034  aligned:
  1035  	// round down to last 16-byte boundary
  1036  	MOVL BX, R11
  1037  	ADDL SI, R11
  1038  	ANDL $~15, R11
  1039  
  1040  	// shuffle X0 around so that each byte contains c
  1041  	MOVD AX, X0
  1042  	PUNPCKLBW X0, X0
  1043  	PUNPCKLBW X0, X0
  1044  	PSHUFL $0, X0, X0
  1045  	JMP condition
  1046  
  1047  sse:
  1048  	// move the next 16-byte chunk of the buffer into X1
  1049  	MOVO (DI), X1
  1050  	// compare bytes in X0 to X1
  1051  	PCMPEQB X0, X1
  1052  	// take the top bit of each byte in X1 and put the result in DX
  1053  	PMOVMSKB X1, DX
  1054  	TESTL DX, DX
  1055  	JNZ ssesuccess
  1056  	ADDL $16, DI
  1057  
  1058  condition:
  1059  	CMPL DI, R11
  1060  	JLT sse
  1061  
  1062  	// search the end
  1063  	MOVL SI, CX
  1064  	ADDL BX, CX
  1065  	SUBL R11, CX
  1066  	// if CX == 0, the zero flag will be set and we'll end up
  1067  	// returning a false success
  1068  	JZ failure
  1069  	REPN; SCASB
  1070  	JZ success
  1071  
  1072  failure:
  1073  	MOVL $-1, AX
  1074  	RET
  1075  
  1076  // handle for lengths < 16
  1077  small:
  1078  	MOVL BX, CX
  1079  	REPN; SCASB
  1080  	JZ success
  1081  	MOVL $-1, AX
  1082  	RET
  1083  
  1084  // we've found the chunk containing the byte
  1085  // now just figure out which specific byte it is
  1086  ssesuccess:
  1087  	// get the index of the least significant set bit
  1088  	BSFW DX, DX
  1089  	SUBL SI, DI
  1090  	ADDL DI, DX
  1091  	MOVL DX, AX
  1092  	RET
  1093  
  1094  success:
  1095  	SUBL SI, DI
  1096  	SUBL $1, DI
  1097  	MOVL DI, AX
  1098  	RET
  1099  
  1100  TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1101  	MOVL	a_len+4(FP), BX
  1102  	MOVL	b_len+16(FP), CX
  1103  	XORL	AX, AX
  1104  	CMPL	BX, CX
  1105  	JNE	eqret
  1106  	MOVL	a+0(FP), SI
  1107  	MOVL	b+12(FP), DI
  1108  	CALL	runtime·memeqbody(SB)
  1109  eqret:
  1110  	MOVB	AX, ret+24(FP)
  1111  	RET
  1112  
  1113  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
  1114  	get_tls(CX)
  1115  	MOVL	g(CX), AX
  1116  	MOVL	g_m(AX), AX
  1117  	MOVL	m_fastrand(AX), DX
  1118  	ADDL	DX, DX
  1119  	MOVL	DX, BX
  1120  	XORL	$0x88888eef, DX
  1121  	CMOVLMI	BX, DX
  1122  	MOVL	DX, m_fastrand(AX)
  1123  	MOVL	DX, ret+0(FP)
  1124  	RET
  1125  
  1126  TEXT runtime·return0(SB), NOSPLIT, $0
  1127  	MOVL	$0, AX
  1128  	RET
  1129  
  1130  // The top-most function running on a goroutine
  1131  // returns to goexit+PCQuantum.
  1132  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1133  	BYTE	$0x90	// NOP
  1134  	CALL	runtime·goexit1(SB)	// does not return
  1135  	// traceback from goexit1 must hit code range of goexit
  1136  	BYTE	$0x90	// NOP
  1137  
  1138  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1139  	MOVL	addr+0(FP), AX
  1140  	PREFETCHT0	(AX)
  1141  	RET
  1142  
  1143  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1144  	MOVL	addr+0(FP), AX
  1145  	PREFETCHT1	(AX)
  1146  	RET
  1147  
  1148  
  1149  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1150  	MOVL	addr+0(FP), AX
  1151  	PREFETCHT2	(AX)
  1152  	RET
  1153  
  1154  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1155  	MOVL	addr+0(FP), AX
  1156  	PREFETCHNTA	(AX)
  1157  	RET