rsc.io/go@v0.0.0-20150416155037-e040fd465409/src/runtime/asm_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	SUBL	$128, SP		// plenty of scratch
    15  	ANDL	$~15, SP
    16  	MOVL	AX, 120(SP)		// save argc, argv away
    17  	MOVL	BX, 124(SP)
    18  
    19  	// set default stack bounds.
    20  	// _cgo_init may update stackguard.
    21  	MOVL	$runtime·g0(SB), BP
    22  	LEAL	(-64*1024+104)(SP), BX
    23  	MOVL	BX, g_stackguard0(BP)
    24  	MOVL	BX, g_stackguard1(BP)
    25  	MOVL	BX, (g_stack+stack_lo)(BP)
    26  	MOVL	SP, (g_stack+stack_hi)(BP)
    27  	
    28  	// find out information about the processor we're on
    29  	MOVL	$0, AX
    30  	CPUID
    31  	CMPL	AX, $0
    32  	JE	nocpuinfo
    33  
    34  	// Figure out how to serialize RDTSC.
    35  	// On Intel processors LFENCE is enough. AMD requires MFENCE.
    36  	// Don't know about the rest, so let's do MFENCE.
    37  	CMPL	BX, $0x756E6547  // "Genu"
    38  	JNE	notintel
    39  	CMPL	DX, $0x49656E69  // "ineI"
    40  	JNE	notintel
    41  	CMPL	CX, $0x6C65746E  // "ntel"
    42  	JNE	notintel
    43  	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
    44  notintel:
    45  
    46  	MOVL	$1, AX
    47  	CPUID
    48  	MOVL	CX, runtime·cpuid_ecx(SB)
    49  	MOVL	DX, runtime·cpuid_edx(SB)
    50  nocpuinfo:	
    51  
    52  	// if there is an _cgo_init, call it to let it
    53  	// initialize and to set up GS.  if not,
    54  	// we set up GS ourselves.
    55  	MOVL	_cgo_init(SB), AX
    56  	TESTL	AX, AX
    57  	JZ	needtls
    58  	MOVL	$setg_gcc<>(SB), BX
    59  	MOVL	BX, 4(SP)
    60  	MOVL	BP, 0(SP)
    61  	CALL	AX
    62  
    63  	// update stackguard after _cgo_init
    64  	MOVL	$runtime·g0(SB), CX
    65  	MOVL	(g_stack+stack_lo)(CX), AX
    66  	ADDL	$const__StackGuard, AX
    67  	MOVL	AX, g_stackguard0(CX)
    68  	MOVL	AX, g_stackguard1(CX)
    69  
    70  	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
    71  	CMPL runtime·iswindows(SB), $0
    72  	JEQ ok
    73  needtls:
    74  	// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
    75  	CMPL	runtime·isplan9(SB), $1
    76  	JEQ	ok
    77  
    78  	// set up %gs
    79  	CALL	runtime·ldt0setup(SB)
    80  
    81  	// store through it, to make sure it works
    82  	get_tls(BX)
    83  	MOVL	$0x123, g(BX)
    84  	MOVL	runtime·tls0(SB), AX
    85  	CMPL	AX, $0x123
    86  	JEQ	ok
    87  	MOVL	AX, 0	// abort
    88  ok:
    89  	// set up m and g "registers"
    90  	get_tls(BX)
    91  	LEAL	runtime·g0(SB), CX
    92  	MOVL	CX, g(BX)
    93  	LEAL	runtime·m0(SB), AX
    94  
    95  	// save m->g0 = g0
    96  	MOVL	CX, m_g0(AX)
    97  	// save g0->m = m0
    98  	MOVL	AX, g_m(CX)
    99  
   100  	CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   101  
   102  	// convention is D is always cleared
   103  	CLD
   104  
   105  	CALL	runtime·check(SB)
   106  
   107  	// saved argc, argv
   108  	MOVL	120(SP), AX
   109  	MOVL	AX, 0(SP)
   110  	MOVL	124(SP), AX
   111  	MOVL	AX, 4(SP)
   112  	CALL	runtime·args(SB)
   113  	CALL	runtime·osinit(SB)
   114  	CALL	runtime·schedinit(SB)
   115  
   116  	// create a new goroutine to start program
   117  	PUSHL	$runtime·mainPC(SB)	// entry
   118  	PUSHL	$0	// arg size
   119  	CALL	runtime·newproc(SB)
   120  	POPL	AX
   121  	POPL	AX
   122  
   123  	// start this M
   124  	CALL	runtime·mstart(SB)
   125  
   126  	INT $3
   127  	RET
   128  
   129  DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   130  GLOBL	runtime·mainPC(SB),RODATA,$4
   131  
   132  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   133  	INT $3
   134  	RET
   135  
   136  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   137  	// Linux and MinGW start the FPU in extended double precision.
   138  	// Other operating systems use double precision.
   139  	// Change to double precision to match them,
   140  	// and to match other hardware that only has double.
   141  	PUSHL $0x27F
   142  	FLDCW	0(SP)
   143  	POPL AX
   144  	RET
   145  
   146  /*
   147   *  go-routine
   148   */
   149  
   150  // void gosave(Gobuf*)
   151  // save state in Gobuf; setjmp
   152  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   153  	MOVL	buf+0(FP), AX		// gobuf
   154  	LEAL	buf+0(FP), BX		// caller's SP
   155  	MOVL	BX, gobuf_sp(AX)
   156  	MOVL	0(SP), BX		// caller's PC
   157  	MOVL	BX, gobuf_pc(AX)
   158  	MOVL	$0, gobuf_ret(AX)
   159  	MOVL	$0, gobuf_ctxt(AX)
   160  	get_tls(CX)
   161  	MOVL	g(CX), BX
   162  	MOVL	BX, gobuf_g(AX)
   163  	RET
   164  
   165  // void gogo(Gobuf*)
   166  // restore state from Gobuf; longjmp
   167  TEXT runtime·gogo(SB), NOSPLIT, $0-4
   168  	MOVL	buf+0(FP), BX		// gobuf
   169  	MOVL	gobuf_g(BX), DX
   170  	MOVL	0(DX), CX		// make sure g != nil
   171  	get_tls(CX)
   172  	MOVL	DX, g(CX)
   173  	MOVL	gobuf_sp(BX), SP	// restore SP
   174  	MOVL	gobuf_ret(BX), AX
   175  	MOVL	gobuf_ctxt(BX), DX
   176  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   177  	MOVL	$0, gobuf_ret(BX)
   178  	MOVL	$0, gobuf_ctxt(BX)
   179  	MOVL	gobuf_pc(BX), BX
   180  	JMP	BX
   181  
   182  // func mcall(fn func(*g))
   183  // Switch to m->g0's stack, call fn(g).
   184  // Fn must never return.  It should gogo(&g->sched)
   185  // to keep running g.
   186  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   187  	MOVL	fn+0(FP), DI
   188  	
   189  	get_tls(CX)
   190  	MOVL	g(CX), AX	// save state in g->sched
   191  	MOVL	0(SP), BX	// caller's PC
   192  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   193  	LEAL	fn+0(FP), BX	// caller's SP
   194  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   195  	MOVL	AX, (g_sched+gobuf_g)(AX)
   196  
   197  	// switch to m->g0 & its stack, call fn
   198  	MOVL	g(CX), BX
   199  	MOVL	g_m(BX), BX
   200  	MOVL	m_g0(BX), SI
   201  	CMPL	SI, AX	// if g == m->g0 call badmcall
   202  	JNE	3(PC)
   203  	MOVL	$runtime·badmcall(SB), AX
   204  	JMP	AX
   205  	MOVL	SI, g(CX)	// g = m->g0
   206  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   207  	PUSHL	AX
   208  	MOVL	DI, DX
   209  	MOVL	0(DI), DI
   210  	CALL	DI
   211  	POPL	AX
   212  	MOVL	$runtime·badmcall2(SB), AX
   213  	JMP	AX
   214  	RET
   215  
   216  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   217  // of the G stack.  We need to distinguish the routine that
   218  // lives at the bottom of the G stack from the one that lives
   219  // at the top of the system stack because the one at the top of
   220  // the system stack terminates the stack walk (see topofstack()).
   221  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   222  	RET
   223  
   224  // func systemstack(fn func())
   225  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   226  	MOVL	fn+0(FP), DI	// DI = fn
   227  	get_tls(CX)
   228  	MOVL	g(CX), AX	// AX = g
   229  	MOVL	g_m(AX), BX	// BX = m
   230  
   231  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   232  	CMPL	AX, DX
   233  	JEQ	noswitch
   234  
   235  	MOVL	m_g0(BX), DX	// DX = g0
   236  	CMPL	AX, DX
   237  	JEQ	noswitch
   238  
   239  	MOVL	m_curg(BX), BP
   240  	CMPL	AX, BP
   241  	JEQ	switch
   242  	
   243  	// Bad: g is not gsignal, not g0, not curg. What is it?
   244  	// Hide call from linker nosplit analysis.
   245  	MOVL	$runtime·badsystemstack(SB), AX
   246  	CALL	AX
   247  
   248  switch:
   249  	// save our state in g->sched.  Pretend to
   250  	// be systemstack_switch if the G stack is scanned.
   251  	MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   252  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   253  	MOVL	AX, (g_sched+gobuf_g)(AX)
   254  
   255  	// switch to g0
   256  	MOVL	DX, g(CX)
   257  	MOVL	(g_sched+gobuf_sp)(DX), BX
   258  	// make it look like mstart called systemstack on g0, to stop traceback
   259  	SUBL	$4, BX
   260  	MOVL	$runtime·mstart(SB), DX
   261  	MOVL	DX, 0(BX)
   262  	MOVL	BX, SP
   263  
   264  	// call target function
   265  	MOVL	DI, DX
   266  	MOVL	0(DI), DI
   267  	CALL	DI
   268  
   269  	// switch back to g
   270  	get_tls(CX)
   271  	MOVL	g(CX), AX
   272  	MOVL	g_m(AX), BX
   273  	MOVL	m_curg(BX), AX
   274  	MOVL	AX, g(CX)
   275  	MOVL	(g_sched+gobuf_sp)(AX), SP
   276  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   277  	RET
   278  
   279  noswitch:
   280  	// already on system stack, just call directly
   281  	MOVL	DI, DX
   282  	MOVL	0(DI), DI
   283  	CALL	DI
   284  	RET
   285  
   286  /*
   287   * support for morestack
   288   */
   289  
   290  // Called during function prolog when more stack is needed.
   291  //
   292  // The traceback routines see morestack on a g0 as being
   293  // the top of a stack (for example, morestack calling newstack
   294  // calling the scheduler calling newm calling gc), so we must
   295  // record an argument size. For that purpose, it has no arguments.
   296  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   297  	// Cannot grow scheduler stack (m->g0).
   298  	get_tls(CX)
   299  	MOVL	g(CX), BX
   300  	MOVL	g_m(BX), BX
   301  	MOVL	m_g0(BX), SI
   302  	CMPL	g(CX), SI
   303  	JNE	2(PC)
   304  	INT	$3
   305  
   306  	// Cannot grow signal stack.
   307  	MOVL	m_gsignal(BX), SI
   308  	CMPL	g(CX), SI
   309  	JNE	2(PC)
   310  	INT	$3
   311  
   312  	// Called from f.
   313  	// Set m->morebuf to f's caller.
   314  	MOVL	4(SP), DI	// f's caller's PC
   315  	MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   316  	LEAL	8(SP), CX	// f's caller's SP
   317  	MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   318  	get_tls(CX)
   319  	MOVL	g(CX), SI
   320  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   321  
   322  	// Set g->sched to context in f.
   323  	MOVL	0(SP), AX	// f's PC
   324  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   325  	MOVL	SI, (g_sched+gobuf_g)(SI)
   326  	LEAL	4(SP), AX	// f's SP
   327  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   328  	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   329  
   330  	// Call newstack on m->g0's stack.
   331  	MOVL	m_g0(BX), BP
   332  	MOVL	BP, g(CX)
   333  	MOVL	(g_sched+gobuf_sp)(BP), AX
   334  	MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   335  	MOVL	AX, SP
   336  	CALL	runtime·newstack(SB)
   337  	MOVL	$0, 0x1003	// crash if newstack returns
   338  	RET
   339  
   340  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   341  	MOVL	$0, DX
   342  	JMP runtime·morestack(SB)
   343  
   344  // reflectcall: call a function with the given argument list
   345  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   346  // we don't have variable-sized frames, so we use a small number
   347  // of constant-sized-frame functions to encode a few bits of size in the pc.
   348  // Caution: ugly multiline assembly macros in your future!
   349  
   350  #define DISPATCH(NAME,MAXSIZE)		\
   351  	CMPL	CX, $MAXSIZE;		\
   352  	JA	3(PC);			\
   353  	MOVL	$NAME(SB), AX;		\
   354  	JMP	AX
   355  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   356  
   357  TEXT reflect·call(SB), NOSPLIT, $0-0
   358  	JMP	·reflectcall(SB)
   359  
   360  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   361  	MOVL	argsize+12(FP), CX
   362  	DISPATCH(runtime·call16, 16)
   363  	DISPATCH(runtime·call32, 32)
   364  	DISPATCH(runtime·call64, 64)
   365  	DISPATCH(runtime·call128, 128)
   366  	DISPATCH(runtime·call256, 256)
   367  	DISPATCH(runtime·call512, 512)
   368  	DISPATCH(runtime·call1024, 1024)
   369  	DISPATCH(runtime·call2048, 2048)
   370  	DISPATCH(runtime·call4096, 4096)
   371  	DISPATCH(runtime·call8192, 8192)
   372  	DISPATCH(runtime·call16384, 16384)
   373  	DISPATCH(runtime·call32768, 32768)
   374  	DISPATCH(runtime·call65536, 65536)
   375  	DISPATCH(runtime·call131072, 131072)
   376  	DISPATCH(runtime·call262144, 262144)
   377  	DISPATCH(runtime·call524288, 524288)
   378  	DISPATCH(runtime·call1048576, 1048576)
   379  	DISPATCH(runtime·call2097152, 2097152)
   380  	DISPATCH(runtime·call4194304, 4194304)
   381  	DISPATCH(runtime·call8388608, 8388608)
   382  	DISPATCH(runtime·call16777216, 16777216)
   383  	DISPATCH(runtime·call33554432, 33554432)
   384  	DISPATCH(runtime·call67108864, 67108864)
   385  	DISPATCH(runtime·call134217728, 134217728)
   386  	DISPATCH(runtime·call268435456, 268435456)
   387  	DISPATCH(runtime·call536870912, 536870912)
   388  	DISPATCH(runtime·call1073741824, 1073741824)
   389  	MOVL	$runtime·badreflectcall(SB), AX
   390  	JMP	AX
   391  
   392  #define CALLFN(NAME,MAXSIZE)			\
   393  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   394  	NO_LOCAL_POINTERS;			\
   395  	/* copy arguments to stack */		\
   396  	MOVL	argptr+8(FP), SI;		\
   397  	MOVL	argsize+12(FP), CX;		\
   398  	MOVL	SP, DI;				\
   399  	REP;MOVSB;				\
   400  	/* call function */			\
   401  	MOVL	f+4(FP), DX;			\
   402  	MOVL	(DX), AX; 			\
   403  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   404  	CALL	AX;				\
   405  	/* copy return values back */		\
   406  	MOVL	argptr+8(FP), DI;		\
   407  	MOVL	argsize+12(FP), CX;		\
   408  	MOVL	retoffset+16(FP), BX;		\
   409  	MOVL	SP, SI;				\
   410  	ADDL	BX, DI;				\
   411  	ADDL	BX, SI;				\
   412  	SUBL	BX, CX;				\
   413  	REP;MOVSB;				\
   414  	/* execute write barrier updates */	\
   415  	MOVL	argtype+0(FP), DX;		\
   416  	MOVL	argptr+8(FP), DI;		\
   417  	MOVL	argsize+12(FP), CX;		\
   418  	MOVL	retoffset+16(FP), BX;		\
   419  	MOVL	DX, 0(SP);			\
   420  	MOVL	DI, 4(SP);			\
   421  	MOVL	CX, 8(SP);			\
   422  	MOVL	BX, 12(SP);			\
   423  	CALL	runtime·callwritebarrier(SB);	\
   424  	RET
   425  
   426  CALLFN(·call16, 16)
   427  CALLFN(·call32, 32)
   428  CALLFN(·call64, 64)
   429  CALLFN(·call128, 128)
   430  CALLFN(·call256, 256)
   431  CALLFN(·call512, 512)
   432  CALLFN(·call1024, 1024)
   433  CALLFN(·call2048, 2048)
   434  CALLFN(·call4096, 4096)
   435  CALLFN(·call8192, 8192)
   436  CALLFN(·call16384, 16384)
   437  CALLFN(·call32768, 32768)
   438  CALLFN(·call65536, 65536)
   439  CALLFN(·call131072, 131072)
   440  CALLFN(·call262144, 262144)
   441  CALLFN(·call524288, 524288)
   442  CALLFN(·call1048576, 1048576)
   443  CALLFN(·call2097152, 2097152)
   444  CALLFN(·call4194304, 4194304)
   445  CALLFN(·call8388608, 8388608)
   446  CALLFN(·call16777216, 16777216)
   447  CALLFN(·call33554432, 33554432)
   448  CALLFN(·call67108864, 67108864)
   449  CALLFN(·call134217728, 134217728)
   450  CALLFN(·call268435456, 268435456)
   451  CALLFN(·call536870912, 536870912)
   452  CALLFN(·call1073741824, 1073741824)
   453  
   454  // bool cas(int32 *val, int32 old, int32 new)
   455  // Atomically:
   456  //	if(*val == old){
   457  //		*val = new;
   458  //		return 1;
   459  //	}else
   460  //		return 0;
   461  TEXT runtime·cas(SB), NOSPLIT, $0-13
   462  	MOVL	ptr+0(FP), BX
   463  	MOVL	old+4(FP), AX
   464  	MOVL	new+8(FP), CX
   465  	LOCK
   466  	CMPXCHGL	CX, 0(BX)
   467  	SETEQ	ret+12(FP)
   468  	RET
   469  
   470  TEXT runtime·casuintptr(SB), NOSPLIT, $0-13
   471  	JMP	runtime·cas(SB)
   472  
   473  TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8
   474  	JMP	runtime·atomicload(SB)
   475  
   476  TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8
   477  	JMP	runtime·atomicload(SB)
   478  
   479  TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-8
   480  	JMP	runtime·atomicstore(SB)
   481  
   482  // bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
   483  // Atomically:
   484  //	if(*val == *old){
   485  //		*val = new;
   486  //		return 1;
   487  //	} else {
   488  //		return 0;
   489  //	}
   490  TEXT runtime·cas64(SB), NOSPLIT, $0-21
   491  	MOVL	ptr+0(FP), BP
   492  	MOVL	old_lo+4(FP), AX
   493  	MOVL	old_hi+8(FP), DX
   494  	MOVL	new_lo+12(FP), BX
   495  	MOVL	new_hi+16(FP), CX
   496  	LOCK
   497  	CMPXCHG8B	0(BP)
   498  	SETEQ	ret+20(FP)
   499  	RET
   500  
   501  // bool casp(void **p, void *old, void *new)
   502  // Atomically:
   503  //	if(*p == old){
   504  //		*p = new;
   505  //		return 1;
   506  //	}else
   507  //		return 0;
   508  TEXT runtime·casp1(SB), NOSPLIT, $0-13
   509  	MOVL	ptr+0(FP), BX
   510  	MOVL	old+4(FP), AX
   511  	MOVL	new+8(FP), CX
   512  	LOCK
   513  	CMPXCHGL	CX, 0(BX)
   514  	SETEQ	ret+12(FP)
   515  	RET
   516  
   517  // uint32 xadd(uint32 volatile *val, int32 delta)
   518  // Atomically:
   519  //	*val += delta;
   520  //	return *val;
   521  TEXT runtime·xadd(SB), NOSPLIT, $0-12
   522  	MOVL	ptr+0(FP), BX
   523  	MOVL	delta+4(FP), AX
   524  	MOVL	AX, CX
   525  	LOCK
   526  	XADDL	AX, 0(BX)
   527  	ADDL	CX, AX
   528  	MOVL	AX, ret+8(FP)
   529  	RET
   530  
   531  TEXT runtime·xchg(SB), NOSPLIT, $0-12
   532  	MOVL	ptr+0(FP), BX
   533  	MOVL	new+4(FP), AX
   534  	XCHGL	AX, 0(BX)
   535  	MOVL	AX, ret+8(FP)
   536  	RET
   537  
   538  TEXT runtime·xchgp1(SB), NOSPLIT, $0-12
   539  	MOVL	ptr+0(FP), BX
   540  	MOVL	new+4(FP), AX
   541  	XCHGL	AX, 0(BX)
   542  	MOVL	AX, ret+8(FP)
   543  	RET
   544  
   545  TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
   546  	JMP	runtime·xchg(SB)
   547  
   548  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   549  	MOVL	cycles+0(FP), AX
   550  again:
   551  	PAUSE
   552  	SUBL	$1, AX
   553  	JNZ	again
   554  	RET
   555  
   556  TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8
   557  	MOVL	ptr+0(FP), BX
   558  	MOVL	val+4(FP), AX
   559  	XCHGL	AX, 0(BX)
   560  	RET
   561  
   562  TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
   563  	MOVL	ptr+0(FP), BX
   564  	MOVL	val+4(FP), AX
   565  	XCHGL	AX, 0(BX)
   566  	RET
   567  
   568  // uint64 atomicload64(uint64 volatile* addr);
   569  TEXT runtime·atomicload64(SB), NOSPLIT, $0-12
   570  	MOVL	ptr+0(FP), AX
   571  	TESTL	$7, AX
   572  	JZ	2(PC)
   573  	MOVL	0, AX // crash with nil ptr deref
   574  	LEAL	ret_lo+4(FP), BX
   575  	// MOVQ (%EAX), %MM0
   576  	BYTE $0x0f; BYTE $0x6f; BYTE $0x00
   577  	// MOVQ %MM0, 0(%EBX)
   578  	BYTE $0x0f; BYTE $0x7f; BYTE $0x03
   579  	// EMMS
   580  	BYTE $0x0F; BYTE $0x77
   581  	RET
   582  
   583  // void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
   584  TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12
   585  	MOVL	ptr+0(FP), AX
   586  	TESTL	$7, AX
   587  	JZ	2(PC)
   588  	MOVL	0, AX // crash with nil ptr deref
   589  	// MOVQ and EMMS were introduced on the Pentium MMX.
   590  	// MOVQ 0x8(%ESP), %MM0
   591  	BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
   592  	// MOVQ %MM0, (%EAX)
   593  	BYTE $0x0f; BYTE $0x7f; BYTE $0x00 
   594  	// EMMS
   595  	BYTE $0x0F; BYTE $0x77
   596  	// This is essentially a no-op, but it provides required memory fencing.
   597  	// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
   598  	MOVL	$0, AX
   599  	LOCK
   600  	XADDL	AX, (SP)
   601  	RET
   602  
   603  // void	runtime·atomicor8(byte volatile*, byte);
   604  TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
   605  	MOVL	ptr+0(FP), AX
   606  	MOVB	val+4(FP), BX
   607  	LOCK
   608  	ORB	BX, (AX)
   609  	RET
   610  
   611  // void	runtime·atomicand8(byte volatile*, byte);
   612  TEXT runtime·atomicand8(SB), NOSPLIT, $0-5
   613  	MOVL	ptr+0(FP), AX
   614  	MOVB	val+4(FP), BX
   615  	LOCK
   616  	ANDB	BX, (AX)
   617  	RET
   618  
   619  // void jmpdefer(fn, sp);
   620  // called from deferreturn.
   621  // 1. pop the caller
   622  // 2. sub 5 bytes from the callers return
   623  // 3. jmp to the argument
   624  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   625  	MOVL	fv+0(FP), DX	// fn
   626  	MOVL	argp+4(FP), BX	// caller sp
   627  	LEAL	-4(BX), SP	// caller sp after CALL
   628  	SUBL	$5, (SP)	// return to CALL again
   629  	MOVL	0(DX), BX
   630  	JMP	BX	// but first run the deferred function
   631  
   632  // Save state of caller into g->sched.
   633  TEXT gosave<>(SB),NOSPLIT,$0
   634  	PUSHL	AX
   635  	PUSHL	BX
   636  	get_tls(BX)
   637  	MOVL	g(BX), BX
   638  	LEAL	arg+0(FP), AX
   639  	MOVL	AX, (g_sched+gobuf_sp)(BX)
   640  	MOVL	-4(AX), AX
   641  	MOVL	AX, (g_sched+gobuf_pc)(BX)
   642  	MOVL	$0, (g_sched+gobuf_ret)(BX)
   643  	MOVL	$0, (g_sched+gobuf_ctxt)(BX)
   644  	POPL	BX
   645  	POPL	AX
   646  	RET
   647  
   648  // asmcgocall(void(*fn)(void*), void *arg)
   649  // Call fn(arg) on the scheduler stack,
   650  // aligned appropriately for the gcc ABI.
   651  // See cgocall.c for more details.
   652  TEXT ·asmcgocall(SB),NOSPLIT,$0-8
   653  	MOVL	fn+0(FP), AX
   654  	MOVL	arg+4(FP), BX
   655  	CALL	asmcgocall<>(SB)
   656  	RET
   657  
   658  TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-12
   659  	MOVL	fn+0(FP), AX
   660  	MOVL	arg+4(FP), BX
   661  	CALL	asmcgocall<>(SB)
   662  	MOVL	AX, ret+8(FP)
   663  	RET
   664  
   665  TEXT asmcgocall<>(SB),NOSPLIT,$0-0
   666  	// fn in AX, arg in BX
   667  	MOVL	SP, DX
   668  
   669  	// Figure out if we need to switch to m->g0 stack.
   670  	// We get called to create new OS threads too, and those
   671  	// come in on the m->g0 stack already.
   672  	get_tls(CX)
   673  	MOVL	g(CX), BP
   674  	MOVL	g_m(BP), BP
   675  	MOVL	m_g0(BP), SI
   676  	MOVL	g(CX), DI
   677  	CMPL	SI, DI
   678  	JEQ	4(PC)
   679  	CALL	gosave<>(SB)
   680  	MOVL	SI, g(CX)
   681  	MOVL	(g_sched+gobuf_sp)(SI), SP
   682  
   683  	// Now on a scheduling stack (a pthread-created stack).
   684  	SUBL	$32, SP
   685  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   686  	MOVL	DI, 8(SP)	// save g
   687  	MOVL	(g_stack+stack_hi)(DI), DI
   688  	SUBL	DX, DI
   689  	MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   690  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   691  	CALL	AX
   692  
   693  	// Restore registers, g, stack pointer.
   694  	get_tls(CX)
   695  	MOVL	8(SP), DI
   696  	MOVL	(g_stack+stack_hi)(DI), SI
   697  	SUBL	4(SP), SI
   698  	MOVL	DI, g(CX)
   699  	MOVL	SI, SP
   700  	RET
   701  
   702  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   703  // Turn the fn into a Go func (by taking its address) and call
   704  // cgocallback_gofunc.
   705  TEXT runtime·cgocallback(SB),NOSPLIT,$12-12
   706  	LEAL	fn+0(FP), AX
   707  	MOVL	AX, 0(SP)
   708  	MOVL	frame+4(FP), AX
   709  	MOVL	AX, 4(SP)
   710  	MOVL	framesize+8(FP), AX
   711  	MOVL	AX, 8(SP)
   712  	MOVL	$runtime·cgocallback_gofunc(SB), AX
   713  	CALL	AX
   714  	RET
   715  
   716  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   717  // See cgocall.c for more details.
   718  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12
   719  	NO_LOCAL_POINTERS
   720  
   721  	// If g is nil, Go did not create the current thread.
   722  	// Call needm to obtain one for temporary use.
   723  	// In this case, we're running on the thread stack, so there's
   724  	// lots of space, but the linker doesn't know. Hide the call from
   725  	// the linker analysis by using an indirect call through AX.
   726  	get_tls(CX)
   727  #ifdef GOOS_windows
   728  	MOVL	$0, BP
   729  	CMPL	CX, $0
   730  	JEQ	2(PC) // TODO
   731  #endif
   732  	MOVL	g(CX), BP
   733  	CMPL	BP, $0
   734  	JEQ	needm
   735  	MOVL	g_m(BP), BP
   736  	MOVL	BP, DX // saved copy of oldm
   737  	JMP	havem
   738  needm:
   739  	MOVL	$0, 0(SP)
   740  	MOVL	$runtime·needm(SB), AX
   741  	CALL	AX
   742  	MOVL	0(SP), DX
   743  	get_tls(CX)
   744  	MOVL	g(CX), BP
   745  	MOVL	g_m(BP), BP
   746  
   747  	// Set m->sched.sp = SP, so that if a panic happens
   748  	// during the function we are about to execute, it will
   749  	// have a valid SP to run on the g0 stack.
   750  	// The next few lines (after the havem label)
   751  	// will save this SP onto the stack and then write
   752  	// the same SP back to m->sched.sp. That seems redundant,
   753  	// but if an unrecovered panic happens, unwindm will
   754  	// restore the g->sched.sp from the stack location
   755  	// and then systemstack will try to use it. If we don't set it here,
   756  	// that restored SP will be uninitialized (typically 0) and
   757  	// will not be usable.
   758  	MOVL	m_g0(BP), SI
   759  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   760  
   761  havem:
   762  	// Now there's a valid m, and we're running on its m->g0.
   763  	// Save current m->g0->sched.sp on stack and then set it to SP.
   764  	// Save current sp in m->g0->sched.sp in preparation for
   765  	// switch back to m->curg stack.
   766  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   767  	MOVL	m_g0(BP), SI
   768  	MOVL	(g_sched+gobuf_sp)(SI), AX
   769  	MOVL	AX, 0(SP)
   770  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   771  
   772  	// Switch to m->curg stack and call runtime.cgocallbackg.
   773  	// Because we are taking over the execution of m->curg
   774  	// but *not* resuming what had been running, we need to
   775  	// save that information (m->curg->sched) so we can restore it.
   776  	// We can restore m->curg->sched.sp easily, because calling
   777  	// runtime.cgocallbackg leaves SP unchanged upon return.
   778  	// To save m->curg->sched.pc, we push it onto the stack.
   779  	// This has the added benefit that it looks to the traceback
   780  	// routine like cgocallbackg is going to return to that
   781  	// PC (because the frame we allocate below has the same
   782  	// size as cgocallback_gofunc's frame declared above)
   783  	// so that the traceback will seamlessly trace back into
   784  	// the earlier calls.
   785  	//
   786  	// In the new goroutine, 0(SP) holds the saved oldm (DX) register.
   787  	// 4(SP) and 8(SP) are unused.
   788  	MOVL	m_curg(BP), SI
   789  	MOVL	SI, g(CX)
   790  	MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   791  	MOVL	(g_sched+gobuf_pc)(SI), BP
   792  	MOVL	BP, -4(DI)
   793  	LEAL	-(4+12)(DI), SP
   794  	MOVL	DX, 0(SP)
   795  	CALL	runtime·cgocallbackg(SB)
   796  	MOVL	0(SP), DX
   797  
   798  	// Restore g->sched (== m->curg->sched) from saved values.
   799  	get_tls(CX)
   800  	MOVL	g(CX), SI
   801  	MOVL	12(SP), BP
   802  	MOVL	BP, (g_sched+gobuf_pc)(SI)
   803  	LEAL	(12+4)(SP), DI
   804  	MOVL	DI, (g_sched+gobuf_sp)(SI)
   805  
   806  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   807  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   808  	// so we do not have to restore it.)
   809  	MOVL	g(CX), BP
   810  	MOVL	g_m(BP), BP
   811  	MOVL	m_g0(BP), SI
   812  	MOVL	SI, g(CX)
   813  	MOVL	(g_sched+gobuf_sp)(SI), SP
   814  	MOVL	0(SP), AX
   815  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   816  	
   817  	// If the m on entry was nil, we called needm above to borrow an m
   818  	// for the duration of the call. Since the call is over, return it with dropm.
   819  	CMPL	DX, $0
   820  	JNE 3(PC)
   821  	MOVL	$runtime·dropm(SB), AX
   822  	CALL	AX
   823  
   824  	// Done!
   825  	RET
   826  
   827  // void setg(G*); set g. for use by needm.
   828  TEXT runtime·setg(SB), NOSPLIT, $0-4
   829  	MOVL	gg+0(FP), BX
   830  #ifdef GOOS_windows
   831  	CMPL	BX, $0
   832  	JNE	settls
   833  	MOVL	$0, 0x14(FS)
   834  	RET
   835  settls:
   836  	MOVL	g_m(BX), AX
   837  	LEAL	m_tls(AX), AX
   838  	MOVL	AX, 0x14(FS)
   839  #endif
   840  	get_tls(CX)
   841  	MOVL	BX, g(CX)
   842  	RET
   843  
   844  // void setg_gcc(G*); set g. for use by gcc
   845  TEXT setg_gcc<>(SB), NOSPLIT, $0
   846  	get_tls(AX)
   847  	MOVL	gg+0(FP), DX
   848  	MOVL	DX, g(AX)
   849  	RET
   850  
   851  // check that SP is in range [g->stack.lo, g->stack.hi)
   852  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   853  	get_tls(CX)
   854  	MOVL	g(CX), AX
   855  	CMPL	(g_stack+stack_hi)(AX), SP
   856  	JHI	2(PC)
   857  	INT	$3
   858  	CMPL	SP, (g_stack+stack_lo)(AX)
   859  	JHI	2(PC)
   860  	INT	$3
   861  	RET
   862  
   863  TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
   864  	MOVL	argp+0(FP),AX		// addr of first arg
   865  	MOVL	-4(AX),AX		// get calling pc
   866  	MOVL	AX, ret+4(FP)
   867  	RET
   868  
   869  TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
   870  	MOVL	argp+0(FP),AX		// addr of first arg
   871  	MOVL	pc+4(FP), BX
   872  	MOVL	BX, -4(AX)		// set calling pc
   873  	RET
   874  
   875  TEXT runtime·getcallersp(SB), NOSPLIT, $0-8
   876  	MOVL	argp+0(FP), AX
   877  	MOVL	AX, ret+4(FP)
   878  	RET
   879  
   880  // func cputicks() int64
   881  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   882  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence
   883  	JEQ	done
   884  	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   885  	JNE	mfence
   886  	BYTE	$0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
   887  	JMP	done
   888  mfence:
   889  	BYTE	$0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
   890  done:
   891  	RDTSC
   892  	MOVL	AX, ret_lo+0(FP)
   893  	MOVL	DX, ret_hi+4(FP)
   894  	RET
   895  
   896  TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   897  	// set up ldt 7 to point at tls0
   898  	// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   899  	// the entry number is just a hint.  setldt will set up GS with what it used.
   900  	MOVL	$7, 0(SP)
   901  	LEAL	runtime·tls0(SB), AX
   902  	MOVL	AX, 4(SP)
   903  	MOVL	$32, 8(SP)	// sizeof(tls array)
   904  	CALL	runtime·setldt(SB)
   905  	RET
   906  
   907  TEXT runtime·emptyfunc(SB),0,$0-0
   908  	RET
   909  
   910  TEXT runtime·abort(SB),NOSPLIT,$0-0
   911  	INT $0x3
   912  
   913  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   914  // redirects to memhash(p, h, size) using the size
   915  // stored in the closure.
   916  TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
   917  	GO_ARGS
   918  	NO_LOCAL_POINTERS
   919  	MOVL	p+0(FP), AX
   920  	MOVL	h+4(FP), BX
   921  	MOVL	4(DX), CX
   922  	MOVL	AX, 0(SP)
   923  	MOVL	BX, 4(SP)
   924  	MOVL	CX, 8(SP)
   925  	CALL	runtime·memhash(SB)
   926  	MOVL	12(SP), AX
   927  	MOVL	AX, ret+8(FP)
   928  	RET
   929  
   930  // hash function using AES hardware instructions
   931  TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   932  	MOVL	p+0(FP), AX	// ptr to data
   933  	MOVL	s+8(FP), CX	// size
   934  	LEAL	ret+12(FP), DX
   935  	JMP	runtime·aeshashbody(SB)
   936  
   937  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   938  	MOVL	p+0(FP), AX	// ptr to string object
   939  	MOVL	4(AX), CX	// length of string
   940  	MOVL	(AX), AX	// string data
   941  	LEAL	ret+8(FP), DX
   942  	JMP	runtime·aeshashbody(SB)
   943  
   944  // AX: data
   945  // CX: length
   946  // DX: address to put return value
   947  TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   948  	MOVL	h+4(FP), X6	// seed to low 64 bits of xmm6
   949  	PINSRD	$2, CX, X6	// size to high 64 bits of xmm6
   950  	PSHUFHW	$0, X6, X6	// replace size with its low 2 bytes repeated 4 times
   951  	MOVO	runtime·aeskeysched(SB), X7
   952  	CMPL	CX, $16
   953  	JB	aes0to15
   954  	JE	aes16
   955  	CMPL	CX, $32
   956  	JBE	aes17to32
   957  	CMPL	CX, $64
   958  	JBE	aes33to64
   959  	JMP	aes65plus
   960  	
   961  aes0to15:
   962  	TESTL	CX, CX
   963  	JE	aes0
   964  
   965  	ADDL	$16, AX
   966  	TESTW	$0xff0, AX
   967  	JE	endofpage
   968  
   969  	// 16 bytes loaded at this address won't cross
   970  	// a page boundary, so we can load it directly.
   971  	MOVOU	-16(AX), X0
   972  	ADDL	CX, CX
   973  	PAND	masks<>(SB)(CX*8), X0
   974  
   975  	// scramble 3 times
   976  	AESENC	X6, X0
   977  	AESENC	X7, X0
   978  	AESENC	X7, X0
   979  	MOVL	X0, (DX)
   980  	RET
   981  
   982  endofpage:
   983  	// address ends in 1111xxxx.  Might be up against
   984  	// a page boundary, so load ending at last byte.
   985  	// Then shift bytes down using pshufb.
   986  	MOVOU	-32(AX)(CX*1), X0
   987  	ADDL	CX, CX
   988  	PSHUFB	shifts<>(SB)(CX*8), X0
   989  	AESENC	X6, X0
   990  	AESENC	X7, X0
   991  	AESENC	X7, X0
   992  	MOVL	X0, (DX)
   993  	RET
   994  
   995  aes0:
   996  	// return input seed
   997  	MOVL	h+4(FP), AX
   998  	MOVL	AX, (DX)
   999  	RET
  1000  
  1001  aes16:
  1002  	MOVOU	(AX), X0
  1003  	AESENC	X6, X0
  1004  	AESENC	X7, X0
  1005  	AESENC	X7, X0
  1006  	MOVL	X0, (DX)
  1007  	RET
  1008  
  1009  
  1010  aes17to32:
  1011  	// load data to be hashed
  1012  	MOVOU	(AX), X0
  1013  	MOVOU	-16(AX)(CX*1), X1
  1014  
  1015  	// scramble 3 times
  1016  	AESENC	X6, X0
  1017  	AESENC	runtime·aeskeysched+16(SB), X1
  1018  	AESENC	X7, X0
  1019  	AESENC	X7, X1
  1020  	AESENC	X7, X0
  1021  	AESENC	X7, X1
  1022  
  1023  	// combine results
  1024  	PXOR	X1, X0
  1025  	MOVL	X0, (DX)
  1026  	RET
  1027  
  1028  aes33to64:
  1029  	MOVOU	(AX), X0
  1030  	MOVOU	16(AX), X1
  1031  	MOVOU	-32(AX)(CX*1), X2
  1032  	MOVOU	-16(AX)(CX*1), X3
  1033  	
  1034  	AESENC	X6, X0
  1035  	AESENC	runtime·aeskeysched+16(SB), X1
  1036  	AESENC	runtime·aeskeysched+32(SB), X2
  1037  	AESENC	runtime·aeskeysched+48(SB), X3
  1038  	AESENC	X7, X0
  1039  	AESENC	X7, X1
  1040  	AESENC	X7, X2
  1041  	AESENC	X7, X3
  1042  	AESENC	X7, X0
  1043  	AESENC	X7, X1
  1044  	AESENC	X7, X2
  1045  	AESENC	X7, X3
  1046  
  1047  	PXOR	X2, X0
  1048  	PXOR	X3, X1
  1049  	PXOR	X1, X0
  1050  	MOVL	X0, (DX)
  1051  	RET
  1052  
  1053  aes65plus:
  1054  	// start with last (possibly overlapping) block
  1055  	MOVOU	-64(AX)(CX*1), X0
  1056  	MOVOU	-48(AX)(CX*1), X1
  1057  	MOVOU	-32(AX)(CX*1), X2
  1058  	MOVOU	-16(AX)(CX*1), X3
  1059  
  1060  	// scramble state once
  1061  	AESENC	X6, X0
  1062  	AESENC	runtime·aeskeysched+16(SB), X1
  1063  	AESENC	runtime·aeskeysched+32(SB), X2
  1064  	AESENC	runtime·aeskeysched+48(SB), X3
  1065  
  1066  	// compute number of remaining 64-byte blocks
  1067  	DECL	CX
  1068  	SHRL	$6, CX
  1069  	
  1070  aesloop:
  1071  	// scramble state, xor in a block
  1072  	MOVOU	(AX), X4
  1073  	MOVOU	16(AX), X5
  1074  	AESENC	X4, X0
  1075  	AESENC	X5, X1
  1076  	MOVOU	32(AX), X4
  1077  	MOVOU	48(AX), X5
  1078  	AESENC	X4, X2
  1079  	AESENC	X5, X3
  1080  
  1081  	// scramble state
  1082  	AESENC	X7, X0
  1083  	AESENC	X7, X1
  1084  	AESENC	X7, X2
  1085  	AESENC	X7, X3
  1086  
  1087  	ADDL	$64, AX
  1088  	DECL	CX
  1089  	JNE	aesloop
  1090  
  1091  	// 2 more scrambles to finish
  1092  	AESENC	X7, X0
  1093  	AESENC	X7, X1
  1094  	AESENC	X7, X2
  1095  	AESENC	X7, X3
  1096  	AESENC	X7, X0
  1097  	AESENC	X7, X1
  1098  	AESENC	X7, X2
  1099  	AESENC	X7, X3
  1100  
  1101  	PXOR	X2, X0
  1102  	PXOR	X3, X1
  1103  	PXOR	X1, X0
  1104  	MOVL	X0, (DX)
  1105  	RET
  1106  
  1107  TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1108  	MOVL	p+0(FP), AX	// ptr to data
  1109  	MOVL	h+4(FP), X0	// seed
  1110  	PINSRD	$1, (AX), X0	// data
  1111  	AESENC	runtime·aeskeysched+0(SB), X0
  1112  	AESENC	runtime·aeskeysched+16(SB), X0
  1113  	AESENC	runtime·aeskeysched+32(SB), X0
  1114  	MOVL	X0, ret+8(FP)
  1115  	RET
  1116  
  1117  TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1118  	MOVL	p+0(FP), AX	// ptr to data
  1119  	MOVQ	(AX), X0	// data
  1120  	PINSRD	$2, h+4(FP), X0	// seed
  1121  	AESENC	runtime·aeskeysched+0(SB), X0
  1122  	AESENC	runtime·aeskeysched+16(SB), X0
  1123  	AESENC	runtime·aeskeysched+32(SB), X0
  1124  	MOVL	X0, ret+8(FP)
  1125  	RET
  1126  
  1127  // simple mask to get rid of data in the high part of the register.
  1128  DATA masks<>+0x00(SB)/4, $0x00000000
  1129  DATA masks<>+0x04(SB)/4, $0x00000000
  1130  DATA masks<>+0x08(SB)/4, $0x00000000
  1131  DATA masks<>+0x0c(SB)/4, $0x00000000
  1132  	
  1133  DATA masks<>+0x10(SB)/4, $0x000000ff
  1134  DATA masks<>+0x14(SB)/4, $0x00000000
  1135  DATA masks<>+0x18(SB)/4, $0x00000000
  1136  DATA masks<>+0x1c(SB)/4, $0x00000000
  1137  	
  1138  DATA masks<>+0x20(SB)/4, $0x0000ffff
  1139  DATA masks<>+0x24(SB)/4, $0x00000000
  1140  DATA masks<>+0x28(SB)/4, $0x00000000
  1141  DATA masks<>+0x2c(SB)/4, $0x00000000
  1142  	
  1143  DATA masks<>+0x30(SB)/4, $0x00ffffff
  1144  DATA masks<>+0x34(SB)/4, $0x00000000
  1145  DATA masks<>+0x38(SB)/4, $0x00000000
  1146  DATA masks<>+0x3c(SB)/4, $0x00000000
  1147  	
  1148  DATA masks<>+0x40(SB)/4, $0xffffffff
  1149  DATA masks<>+0x44(SB)/4, $0x00000000
  1150  DATA masks<>+0x48(SB)/4, $0x00000000
  1151  DATA masks<>+0x4c(SB)/4, $0x00000000
  1152  	
  1153  DATA masks<>+0x50(SB)/4, $0xffffffff
  1154  DATA masks<>+0x54(SB)/4, $0x000000ff
  1155  DATA masks<>+0x58(SB)/4, $0x00000000
  1156  DATA masks<>+0x5c(SB)/4, $0x00000000
  1157  	
  1158  DATA masks<>+0x60(SB)/4, $0xffffffff
  1159  DATA masks<>+0x64(SB)/4, $0x0000ffff
  1160  DATA masks<>+0x68(SB)/4, $0x00000000
  1161  DATA masks<>+0x6c(SB)/4, $0x00000000
  1162  	
  1163  DATA masks<>+0x70(SB)/4, $0xffffffff
  1164  DATA masks<>+0x74(SB)/4, $0x00ffffff
  1165  DATA masks<>+0x78(SB)/4, $0x00000000
  1166  DATA masks<>+0x7c(SB)/4, $0x00000000
  1167  	
  1168  DATA masks<>+0x80(SB)/4, $0xffffffff
  1169  DATA masks<>+0x84(SB)/4, $0xffffffff
  1170  DATA masks<>+0x88(SB)/4, $0x00000000
  1171  DATA masks<>+0x8c(SB)/4, $0x00000000
  1172  	
  1173  DATA masks<>+0x90(SB)/4, $0xffffffff
  1174  DATA masks<>+0x94(SB)/4, $0xffffffff
  1175  DATA masks<>+0x98(SB)/4, $0x000000ff
  1176  DATA masks<>+0x9c(SB)/4, $0x00000000
  1177  	
  1178  DATA masks<>+0xa0(SB)/4, $0xffffffff
  1179  DATA masks<>+0xa4(SB)/4, $0xffffffff
  1180  DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1181  DATA masks<>+0xac(SB)/4, $0x00000000
  1182  	
  1183  DATA masks<>+0xb0(SB)/4, $0xffffffff
  1184  DATA masks<>+0xb4(SB)/4, $0xffffffff
  1185  DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1186  DATA masks<>+0xbc(SB)/4, $0x00000000
  1187  	
  1188  DATA masks<>+0xc0(SB)/4, $0xffffffff
  1189  DATA masks<>+0xc4(SB)/4, $0xffffffff
  1190  DATA masks<>+0xc8(SB)/4, $0xffffffff
  1191  DATA masks<>+0xcc(SB)/4, $0x00000000
  1192  	
  1193  DATA masks<>+0xd0(SB)/4, $0xffffffff
  1194  DATA masks<>+0xd4(SB)/4, $0xffffffff
  1195  DATA masks<>+0xd8(SB)/4, $0xffffffff
  1196  DATA masks<>+0xdc(SB)/4, $0x000000ff
  1197  	
  1198  DATA masks<>+0xe0(SB)/4, $0xffffffff
  1199  DATA masks<>+0xe4(SB)/4, $0xffffffff
  1200  DATA masks<>+0xe8(SB)/4, $0xffffffff
  1201  DATA masks<>+0xec(SB)/4, $0x0000ffff
  1202  	
  1203  DATA masks<>+0xf0(SB)/4, $0xffffffff
  1204  DATA masks<>+0xf4(SB)/4, $0xffffffff
  1205  DATA masks<>+0xf8(SB)/4, $0xffffffff
  1206  DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1207  
  1208  GLOBL masks<>(SB),RODATA,$256
  1209  
  1210  // these are arguments to pshufb.  They move data down from
  1211  // the high bytes of the register to the low bytes of the register.
  1212  // index is how many bytes to move.
  1213  DATA shifts<>+0x00(SB)/4, $0x00000000
  1214  DATA shifts<>+0x04(SB)/4, $0x00000000
  1215  DATA shifts<>+0x08(SB)/4, $0x00000000
  1216  DATA shifts<>+0x0c(SB)/4, $0x00000000
  1217  	
  1218  DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1219  DATA shifts<>+0x14(SB)/4, $0xffffffff
  1220  DATA shifts<>+0x18(SB)/4, $0xffffffff
  1221  DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1222  	
  1223  DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1224  DATA shifts<>+0x24(SB)/4, $0xffffffff
  1225  DATA shifts<>+0x28(SB)/4, $0xffffffff
  1226  DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1227  	
  1228  DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1229  DATA shifts<>+0x34(SB)/4, $0xffffffff
  1230  DATA shifts<>+0x38(SB)/4, $0xffffffff
  1231  DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1232  	
  1233  DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1234  DATA shifts<>+0x44(SB)/4, $0xffffffff
  1235  DATA shifts<>+0x48(SB)/4, $0xffffffff
  1236  DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1237  	
  1238  DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1239  DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1240  DATA shifts<>+0x58(SB)/4, $0xffffffff
  1241  DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1242  	
  1243  DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1244  DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1245  DATA shifts<>+0x68(SB)/4, $0xffffffff
  1246  DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1247  	
  1248  DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1249  DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1250  DATA shifts<>+0x78(SB)/4, $0xffffffff
  1251  DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1252  	
  1253  DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1254  DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1255  DATA shifts<>+0x88(SB)/4, $0xffffffff
  1256  DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1257  	
  1258  DATA shifts<>+0x90(SB)/4, $0x0a090807
  1259  DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1260  DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1261  DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1262  	
  1263  DATA shifts<>+0xa0(SB)/4, $0x09080706
  1264  DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1265  DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1266  DATA shifts<>+0xac(SB)/4, $0xffffffff
  1267  	
  1268  DATA shifts<>+0xb0(SB)/4, $0x08070605
  1269  DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1270  DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1271  DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1272  	
  1273  DATA shifts<>+0xc0(SB)/4, $0x07060504
  1274  DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1275  DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1276  DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1277  	
  1278  DATA shifts<>+0xd0(SB)/4, $0x06050403
  1279  DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1280  DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1281  DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1282  	
  1283  DATA shifts<>+0xe0(SB)/4, $0x05040302
  1284  DATA shifts<>+0xe4(SB)/4, $0x09080706
  1285  DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1286  DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1287  	
  1288  DATA shifts<>+0xf0(SB)/4, $0x04030201
  1289  DATA shifts<>+0xf4(SB)/4, $0x08070605
  1290  DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1291  DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1292  
  1293  GLOBL shifts<>(SB),RODATA,$256
  1294  
  1295  TEXT runtime·memeq(SB),NOSPLIT,$0-13
  1296  	MOVL	a+0(FP), SI
  1297  	MOVL	b+4(FP), DI
  1298  	MOVL	size+8(FP), BX
  1299  	CALL	runtime·memeqbody(SB)
  1300  	MOVB	AX, ret+12(FP)
  1301  	RET
  1302  
  1303  // memequal_varlen(a, b unsafe.Pointer) bool
  1304  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
  1305  	MOVL    a+0(FP), SI
  1306  	MOVL    b+4(FP), DI
  1307  	CMPL    SI, DI
  1308  	JEQ     eq
  1309  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
  1310  	CALL    runtime·memeqbody(SB)
  1311  	MOVB    AX, ret+8(FP)
  1312  	RET
  1313  eq:
  1314  	MOVB    $1, ret+8(FP)
  1315  	RET
  1316  
  1317  // eqstring tests whether two strings are equal.
  1318  // The compiler guarantees that strings passed
  1319  // to eqstring have equal length.
  1320  // See runtime_test.go:eqstring_generic for
  1321  // equivalent Go code.
  1322  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
  1323  	MOVL	s1str+0(FP), SI
  1324  	MOVL	s2str+8(FP), DI
  1325  	CMPL	SI, DI
  1326  	JEQ	same
  1327  	MOVL	s1len+4(FP), BX
  1328  	CALL	runtime·memeqbody(SB)
  1329  	MOVB	AX, v+16(FP)
  1330  	RET
  1331  same:
  1332  	MOVB	$1, v+16(FP)
  1333  	RET
  1334  
  1335  TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1336  	MOVL	a_len+4(FP), BX
  1337  	MOVL	b_len+16(FP), CX
  1338  	XORL	AX, AX
  1339  	CMPL	BX, CX
  1340  	JNE	eqret
  1341  	MOVL	a+0(FP), SI
  1342  	MOVL	b+12(FP), DI
  1343  	CALL	runtime·memeqbody(SB)
  1344  eqret:
  1345  	MOVB	AX, ret+24(FP)
  1346  	RET
  1347  
  1348  // a in SI
  1349  // b in DI
  1350  // count in BX
  1351  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1352  	XORL	AX, AX
  1353  
  1354  	CMPL	BX, $4
  1355  	JB	small
  1356  
  1357  	// 64 bytes at a time using xmm registers
  1358  hugeloop:
  1359  	CMPL	BX, $64
  1360  	JB	bigloop
  1361  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1362  	JE	bigloop
  1363  	MOVOU	(SI), X0
  1364  	MOVOU	(DI), X1
  1365  	MOVOU	16(SI), X2
  1366  	MOVOU	16(DI), X3
  1367  	MOVOU	32(SI), X4
  1368  	MOVOU	32(DI), X5
  1369  	MOVOU	48(SI), X6
  1370  	MOVOU	48(DI), X7
  1371  	PCMPEQB	X1, X0
  1372  	PCMPEQB	X3, X2
  1373  	PCMPEQB	X5, X4
  1374  	PCMPEQB	X7, X6
  1375  	PAND	X2, X0
  1376  	PAND	X6, X4
  1377  	PAND	X4, X0
  1378  	PMOVMSKB X0, DX
  1379  	ADDL	$64, SI
  1380  	ADDL	$64, DI
  1381  	SUBL	$64, BX
  1382  	CMPL	DX, $0xffff
  1383  	JEQ	hugeloop
  1384  	RET
  1385  
  1386  	// 4 bytes at a time using 32-bit register
  1387  bigloop:
  1388  	CMPL	BX, $4
  1389  	JBE	leftover
  1390  	MOVL	(SI), CX
  1391  	MOVL	(DI), DX
  1392  	ADDL	$4, SI
  1393  	ADDL	$4, DI
  1394  	SUBL	$4, BX
  1395  	CMPL	CX, DX
  1396  	JEQ	bigloop
  1397  	RET
  1398  
  1399  	// remaining 0-4 bytes
  1400  leftover:
  1401  	MOVL	-4(SI)(BX*1), CX
  1402  	MOVL	-4(DI)(BX*1), DX
  1403  	CMPL	CX, DX
  1404  	SETEQ	AX
  1405  	RET
  1406  
  1407  small:
  1408  	CMPL	BX, $0
  1409  	JEQ	equal
  1410  
  1411  	LEAL	0(BX*8), CX
  1412  	NEGL	CX
  1413  
  1414  	MOVL	SI, DX
  1415  	CMPB	DX, $0xfc
  1416  	JA	si_high
  1417  
  1418  	// load at SI won't cross a page boundary.
  1419  	MOVL	(SI), SI
  1420  	JMP	si_finish
  1421  si_high:
  1422  	// address ends in 111111xx.  Load up to bytes we want, move to correct position.
  1423  	MOVL	-4(SI)(BX*1), SI
  1424  	SHRL	CX, SI
  1425  si_finish:
  1426  
  1427  	// same for DI.
  1428  	MOVL	DI, DX
  1429  	CMPB	DX, $0xfc
  1430  	JA	di_high
  1431  	MOVL	(DI), DI
  1432  	JMP	di_finish
  1433  di_high:
  1434  	MOVL	-4(DI)(BX*1), DI
  1435  	SHRL	CX, DI
  1436  di_finish:
  1437  
  1438  	SUBL	SI, DI
  1439  	SHLL	CX, DI
  1440  equal:
  1441  	SETEQ	AX
  1442  	RET
  1443  
  1444  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
  1445  	MOVL	s1_base+0(FP), SI
  1446  	MOVL	s1_len+4(FP), BX
  1447  	MOVL	s2_base+8(FP), DI
  1448  	MOVL	s2_len+12(FP), DX
  1449  	CALL	runtime·cmpbody(SB)
  1450  	MOVL	AX, ret+16(FP)
  1451  	RET
  1452  
  1453  TEXT bytes·Compare(SB),NOSPLIT,$0-28
  1454  	MOVL	s1+0(FP), SI
  1455  	MOVL	s1+4(FP), BX
  1456  	MOVL	s2+12(FP), DI
  1457  	MOVL	s2+16(FP), DX
  1458  	CALL	runtime·cmpbody(SB)
  1459  	MOVL	AX, ret+24(FP)
  1460  	RET
  1461  
  1462  TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
  1463  	MOVL	s+0(FP), SI
  1464  	MOVL	s_len+4(FP), CX
  1465  	MOVB	c+12(FP), AL
  1466  	MOVL	SI, DI
  1467  	CLD; REPN; SCASB
  1468  	JZ 3(PC)
  1469  	MOVL	$-1, ret+16(FP)
  1470  	RET
  1471  	SUBL	SI, DI
  1472  	SUBL	$1, DI
  1473  	MOVL	DI, ret+16(FP)
  1474  	RET
  1475  
  1476  TEXT strings·IndexByte(SB),NOSPLIT,$0-16
  1477  	MOVL	s+0(FP), SI
  1478  	MOVL	s_len+4(FP), CX
  1479  	MOVB	c+8(FP), AL
  1480  	MOVL	SI, DI
  1481  	CLD; REPN; SCASB
  1482  	JZ 3(PC)
  1483  	MOVL	$-1, ret+12(FP)
  1484  	RET
  1485  	SUBL	SI, DI
  1486  	SUBL	$1, DI
  1487  	MOVL	DI, ret+12(FP)
  1488  	RET
  1489  
  1490  // input:
  1491  //   SI = a
  1492  //   DI = b
  1493  //   BX = alen
  1494  //   DX = blen
  1495  // output:
  1496  //   AX = 1/0/-1
  1497  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1498  	CMPL	SI, DI
  1499  	JEQ	allsame
  1500  	CMPL	BX, DX
  1501  	MOVL	DX, BP
  1502  	CMOVLLT	BX, BP // BP = min(alen, blen)
  1503  	CMPL	BP, $4
  1504  	JB	small
  1505  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1506  	JE	mediumloop
  1507  largeloop:
  1508  	CMPL	BP, $16
  1509  	JB	mediumloop
  1510  	MOVOU	(SI), X0
  1511  	MOVOU	(DI), X1
  1512  	PCMPEQB X0, X1
  1513  	PMOVMSKB X1, AX
  1514  	XORL	$0xffff, AX	// convert EQ to NE
  1515  	JNE	diff16	// branch if at least one byte is not equal
  1516  	ADDL	$16, SI
  1517  	ADDL	$16, DI
  1518  	SUBL	$16, BP
  1519  	JMP	largeloop
  1520  
  1521  diff16:
  1522  	BSFL	AX, BX	// index of first byte that differs
  1523  	XORL	AX, AX
  1524  	MOVB	(SI)(BX*1), CX
  1525  	CMPB	CX, (DI)(BX*1)
  1526  	SETHI	AX
  1527  	LEAL	-1(AX*2), AX	// convert 1/0 to +1/-1
  1528  	RET
  1529  
  1530  mediumloop:
  1531  	CMPL	BP, $4
  1532  	JBE	_0through4
  1533  	MOVL	(SI), AX
  1534  	MOVL	(DI), CX
  1535  	CMPL	AX, CX
  1536  	JNE	diff4
  1537  	ADDL	$4, SI
  1538  	ADDL	$4, DI
  1539  	SUBL	$4, BP
  1540  	JMP	mediumloop
  1541  
  1542  _0through4:
  1543  	MOVL	-4(SI)(BP*1), AX
  1544  	MOVL	-4(DI)(BP*1), CX
  1545  	CMPL	AX, CX
  1546  	JEQ	allsame
  1547  
  1548  diff4:
  1549  	BSWAPL	AX	// reverse order of bytes
  1550  	BSWAPL	CX
  1551  	XORL	AX, CX	// find bit differences
  1552  	BSRL	CX, CX	// index of highest bit difference
  1553  	SHRL	CX, AX	// move a's bit to bottom
  1554  	ANDL	$1, AX	// mask bit
  1555  	LEAL	-1(AX*2), AX // 1/0 => +1/-1
  1556  	RET
  1557  
  1558  	// 0-3 bytes in common
  1559  small:
  1560  	LEAL	(BP*8), CX
  1561  	NEGL	CX
  1562  	JEQ	allsame
  1563  
  1564  	// load si
  1565  	CMPB	SI, $0xfc
  1566  	JA	si_high
  1567  	MOVL	(SI), SI
  1568  	JMP	si_finish
  1569  si_high:
  1570  	MOVL	-4(SI)(BP*1), SI
  1571  	SHRL	CX, SI
  1572  si_finish:
  1573  	SHLL	CX, SI
  1574  
  1575  	// same for di
  1576  	CMPB	DI, $0xfc
  1577  	JA	di_high
  1578  	MOVL	(DI), DI
  1579  	JMP	di_finish
  1580  di_high:
  1581  	MOVL	-4(DI)(BP*1), DI
  1582  	SHRL	CX, DI
  1583  di_finish:
  1584  	SHLL	CX, DI
  1585  
  1586  	BSWAPL	SI	// reverse order of bytes
  1587  	BSWAPL	DI
  1588  	XORL	SI, DI	// find bit differences
  1589  	JEQ	allsame
  1590  	BSRL	DI, CX	// index of highest bit difference
  1591  	SHRL	CX, SI	// move a's bit to bottom
  1592  	ANDL	$1, SI	// mask bit
  1593  	LEAL	-1(SI*2), AX // 1/0 => +1/-1
  1594  	RET
  1595  
  1596  	// all the bytes in common are the same, so we just need
  1597  	// to compare the lengths.
  1598  allsame:
  1599  	XORL	AX, AX
  1600  	XORL	CX, CX
  1601  	CMPL	BX, DX
  1602  	SETGT	AX	// 1 if alen > blen
  1603  	SETEQ	CX	// 1 if alen == blen
  1604  	LEAL	-1(CX)(AX*2), AX	// 1,0,-1 result
  1605  	RET
  1606  
  1607  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
  1608  	get_tls(CX)
  1609  	MOVL	g(CX), AX
  1610  	MOVL	g_m(AX), AX
  1611  	MOVL	m_fastrand(AX), DX
  1612  	ADDL	DX, DX
  1613  	MOVL	DX, BX
  1614  	XORL	$0x88888eef, DX
  1615  	CMOVLMI	BX, DX
  1616  	MOVL	DX, m_fastrand(AX)
  1617  	MOVL	DX, ret+0(FP)
  1618  	RET
  1619  
  1620  TEXT runtime·return0(SB), NOSPLIT, $0
  1621  	MOVL	$0, AX
  1622  	RET
  1623  
  1624  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1625  // Must obey the gcc calling convention.
  1626  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1627  	get_tls(CX)
  1628  	MOVL	g(CX), AX
  1629  	MOVL	g_m(AX), AX
  1630  	MOVL	m_curg(AX), AX
  1631  	MOVL	(g_stack+stack_hi)(AX), AX
  1632  	RET
  1633  
  1634  // The top-most function running on a goroutine
  1635  // returns to goexit+PCQuantum.
  1636  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1637  	BYTE	$0x90	// NOP
  1638  	CALL	runtime·goexit1(SB)	// does not return
  1639  	// traceback from goexit1 must hit code range of goexit
  1640  	BYTE	$0x90	// NOP
  1641  
  1642  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1643  	MOVL	addr+0(FP), AX
  1644  	PREFETCHT0	(AX)
  1645  	RET
  1646  
  1647  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1648  	MOVL	addr+0(FP), AX
  1649  	PREFETCHT1	(AX)
  1650  	RET
  1651  
  1652  
  1653  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1654  	MOVL	addr+0(FP), AX
  1655  	PREFETCHT2	(AX)
  1656  	RET
  1657  
  1658  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1659  	MOVL	addr+0(FP), AX
  1660  	PREFETCHNTA	(AX)
  1661  	RET