github.com/reiver/go@v0.0.0-20150109200633-1d0c7792f172/src/runtime/asm_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVL	argc+0(FP), AX
    13  	MOVL	argv+4(FP), BX
    14  	SUBL	$128, SP		// plenty of scratch
    15  	ANDL	$~15, SP
    16  	MOVL	AX, 120(SP)		// save argc, argv away
    17  	MOVL	BX, 124(SP)
    18  
    19  	// set default stack bounds.
    20  	// _cgo_init may update stackguard.
    21  	MOVL	$runtime·g0(SB), BP
    22  	LEAL	(-64*1024+104)(SP), BX
    23  	MOVL	BX, g_stackguard0(BP)
    24  	MOVL	BX, g_stackguard1(BP)
    25  	MOVL	BX, (g_stack+stack_lo)(BP)
    26  	MOVL	SP, (g_stack+stack_hi)(BP)
    27  	
    28  	// find out information about the processor we're on
    29  	MOVL	$0, AX
    30  	CPUID
    31  	CMPL	AX, $0
    32  	JE	nocpuinfo
    33  	MOVL	$1, AX
    34  	CPUID
    35  	MOVL	CX, runtime·cpuid_ecx(SB)
    36  	MOVL	DX, runtime·cpuid_edx(SB)
    37  nocpuinfo:	
    38  
    39  	// if there is an _cgo_init, call it to let it
    40  	// initialize and to set up GS.  if not,
    41  	// we set up GS ourselves.
    42  	MOVL	_cgo_init(SB), AX
    43  	TESTL	AX, AX
    44  	JZ	needtls
    45  	MOVL	$setg_gcc<>(SB), BX
    46  	MOVL	BX, 4(SP)
    47  	MOVL	BP, 0(SP)
    48  	CALL	AX
    49  
    50  	// update stackguard after _cgo_init
    51  	MOVL	$runtime·g0(SB), CX
    52  	MOVL	(g_stack+stack_lo)(CX), AX
    53  	ADDL	$const__StackGuard, AX
    54  	MOVL	AX, g_stackguard0(CX)
    55  	MOVL	AX, g_stackguard1(CX)
    56  
    57  	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
    58  	CMPL runtime·iswindows(SB), $0
    59  	JEQ ok
    60  needtls:
    61  	// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
    62  	CMPL	runtime·isplan9(SB), $1
    63  	JEQ	ok
    64  
    65  	// set up %gs
    66  	CALL	runtime·ldt0setup(SB)
    67  
    68  	// store through it, to make sure it works
    69  	get_tls(BX)
    70  	MOVL	$0x123, g(BX)
    71  	MOVL	runtime·tls0(SB), AX
    72  	CMPL	AX, $0x123
    73  	JEQ	ok
    74  	MOVL	AX, 0	// abort
    75  ok:
    76  	// set up m and g "registers"
    77  	get_tls(BX)
    78  	LEAL	runtime·g0(SB), CX
    79  	MOVL	CX, g(BX)
    80  	LEAL	runtime·m0(SB), AX
    81  
    82  	// save m->g0 = g0
    83  	MOVL	CX, m_g0(AX)
    84  	// save g0->m = m0
    85  	MOVL	AX, g_m(CX)
    86  
    87  	CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
    88  
    89  	// convention is D is always cleared
    90  	CLD
    91  
    92  	CALL	runtime·check(SB)
    93  
    94  	// saved argc, argv
    95  	MOVL	120(SP), AX
    96  	MOVL	AX, 0(SP)
    97  	MOVL	124(SP), AX
    98  	MOVL	AX, 4(SP)
    99  	CALL	runtime·args(SB)
   100  	CALL	runtime·osinit(SB)
   101  	CALL	runtime·schedinit(SB)
   102  
   103  	// create a new goroutine to start program
   104  	PUSHL	$runtime·main·f(SB)	// entry
   105  	PUSHL	$0	// arg size
   106  	CALL	runtime·newproc(SB)
   107  	POPL	AX
   108  	POPL	AX
   109  
   110  	// start this M
   111  	CALL	runtime·mstart(SB)
   112  
   113  	INT $3
   114  	RET
   115  
   116  DATA	runtime·main·f+0(SB)/4,$runtime·main(SB)
   117  GLOBL	runtime·main·f(SB),RODATA,$4
   118  
   119  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   120  	INT $3
   121  	RET
   122  
   123  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   124  	// Linux and MinGW start the FPU in extended double precision.
   125  	// Other operating systems use double precision.
   126  	// Change to double precision to match them,
   127  	// and to match other hardware that only has double.
   128  	PUSHL $0x27F
   129  	FLDCW	0(SP)
   130  	POPL AX
   131  	RET
   132  
   133  /*
   134   *  go-routine
   135   */
   136  
   137  // void gosave(Gobuf*)
   138  // save state in Gobuf; setjmp
   139  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   140  	MOVL	buf+0(FP), AX		// gobuf
   141  	LEAL	buf+0(FP), BX		// caller's SP
   142  	MOVL	BX, gobuf_sp(AX)
   143  	MOVL	0(SP), BX		// caller's PC
   144  	MOVL	BX, gobuf_pc(AX)
   145  	MOVL	$0, gobuf_ret(AX)
   146  	MOVL	$0, gobuf_ctxt(AX)
   147  	get_tls(CX)
   148  	MOVL	g(CX), BX
   149  	MOVL	BX, gobuf_g(AX)
   150  	RET
   151  
   152  // void gogo(Gobuf*)
   153  // restore state from Gobuf; longjmp
   154  TEXT runtime·gogo(SB), NOSPLIT, $0-4
   155  	MOVL	buf+0(FP), BX		// gobuf
   156  	MOVL	gobuf_g(BX), DX
   157  	MOVL	0(DX), CX		// make sure g != nil
   158  	get_tls(CX)
   159  	MOVL	DX, g(CX)
   160  	MOVL	gobuf_sp(BX), SP	// restore SP
   161  	MOVL	gobuf_ret(BX), AX
   162  	MOVL	gobuf_ctxt(BX), DX
   163  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   164  	MOVL	$0, gobuf_ret(BX)
   165  	MOVL	$0, gobuf_ctxt(BX)
   166  	MOVL	gobuf_pc(BX), BX
   167  	JMP	BX
   168  
   169  // func mcall(fn func(*g))
   170  // Switch to m->g0's stack, call fn(g).
   171  // Fn must never return.  It should gogo(&g->sched)
   172  // to keep running g.
   173  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   174  	MOVL	fn+0(FP), DI
   175  	
   176  	get_tls(CX)
   177  	MOVL	g(CX), AX	// save state in g->sched
   178  	MOVL	0(SP), BX	// caller's PC
   179  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   180  	LEAL	fn+0(FP), BX	// caller's SP
   181  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   182  	MOVL	AX, (g_sched+gobuf_g)(AX)
   183  
   184  	// switch to m->g0 & its stack, call fn
   185  	MOVL	g(CX), BX
   186  	MOVL	g_m(BX), BX
   187  	MOVL	m_g0(BX), SI
   188  	CMPL	SI, AX	// if g == m->g0 call badmcall
   189  	JNE	3(PC)
   190  	MOVL	$runtime·badmcall(SB), AX
   191  	JMP	AX
   192  	MOVL	SI, g(CX)	// g = m->g0
   193  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   194  	PUSHL	AX
   195  	MOVL	DI, DX
   196  	MOVL	0(DI), DI
   197  	CALL	DI
   198  	POPL	AX
   199  	MOVL	$runtime·badmcall2(SB), AX
   200  	JMP	AX
   201  	RET
   202  
   203  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   204  // of the G stack.  We need to distinguish the routine that
   205  // lives at the bottom of the G stack from the one that lives
   206  // at the top of the system stack because the one at the top of
   207  // the system stack terminates the stack walk (see topofstack()).
   208  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   209  	RET
   210  
   211  // func systemstack(fn func())
   212  TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   213  	MOVL	fn+0(FP), DI	// DI = fn
   214  	get_tls(CX)
   215  	MOVL	g(CX), AX	// AX = g
   216  	MOVL	g_m(AX), BX	// BX = m
   217  
   218  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   219  	CMPL	AX, DX
   220  	JEQ	noswitch
   221  
   222  	MOVL	m_g0(BX), DX	// DX = g0
   223  	CMPL	AX, DX
   224  	JEQ	noswitch
   225  
   226  	MOVL	m_curg(BX), BP
   227  	CMPL	AX, BP
   228  	JEQ	switch
   229  	
   230  	// Bad: g is not gsignal, not g0, not curg. What is it?
   231  	// Hide call from linker nosplit analysis.
   232  	MOVL	$runtime·badsystemstack(SB), AX
   233  	CALL	AX
   234  
   235  switch:
   236  	// save our state in g->sched.  Pretend to
   237  	// be systemstack_switch if the G stack is scanned.
   238  	MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   239  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   240  	MOVL	AX, (g_sched+gobuf_g)(AX)
   241  
   242  	// switch to g0
   243  	MOVL	DX, g(CX)
   244  	MOVL	(g_sched+gobuf_sp)(DX), BX
   245  	// make it look like mstart called systemstack on g0, to stop traceback
   246  	SUBL	$4, BX
   247  	MOVL	$runtime·mstart(SB), DX
   248  	MOVL	DX, 0(BX)
   249  	MOVL	BX, SP
   250  
   251  	// call target function
   252  	MOVL	DI, DX
   253  	MOVL	0(DI), DI
   254  	CALL	DI
   255  
   256  	// switch back to g
   257  	get_tls(CX)
   258  	MOVL	g(CX), AX
   259  	MOVL	g_m(AX), BX
   260  	MOVL	m_curg(BX), AX
   261  	MOVL	AX, g(CX)
   262  	MOVL	(g_sched+gobuf_sp)(AX), SP
   263  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   264  	RET
   265  
   266  noswitch:
   267  	// already on system stack, just call directly
   268  	MOVL	DI, DX
   269  	MOVL	0(DI), DI
   270  	CALL	DI
   271  	RET
   272  
   273  /*
   274   * support for morestack
   275   */
   276  
   277  // Called during function prolog when more stack is needed.
   278  //
   279  // The traceback routines see morestack on a g0 as being
   280  // the top of a stack (for example, morestack calling newstack
   281  // calling the scheduler calling newm calling gc), so we must
   282  // record an argument size. For that purpose, it has no arguments.
   283  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   284  	// Cannot grow scheduler stack (m->g0).
   285  	get_tls(CX)
   286  	MOVL	g(CX), BX
   287  	MOVL	g_m(BX), BX
   288  	MOVL	m_g0(BX), SI
   289  	CMPL	g(CX), SI
   290  	JNE	2(PC)
   291  	INT	$3
   292  
   293  	// Cannot grow signal stack.
   294  	MOVL	m_gsignal(BX), SI
   295  	CMPL	g(CX), SI
   296  	JNE	2(PC)
   297  	INT	$3
   298  
   299  	// Called from f.
   300  	// Set m->morebuf to f's caller.
   301  	MOVL	4(SP), DI	// f's caller's PC
   302  	MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   303  	LEAL	8(SP), CX	// f's caller's SP
   304  	MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   305  	get_tls(CX)
   306  	MOVL	g(CX), SI
   307  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   308  
   309  	// Set g->sched to context in f.
   310  	MOVL	0(SP), AX	// f's PC
   311  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   312  	MOVL	SI, (g_sched+gobuf_g)(SI)
   313  	LEAL	4(SP), AX	// f's SP
   314  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   315  	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   316  
   317  	// Call newstack on m->g0's stack.
   318  	MOVL	m_g0(BX), BP
   319  	MOVL	BP, g(CX)
   320  	MOVL	(g_sched+gobuf_sp)(BP), AX
   321  	MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   322  	MOVL	AX, SP
   323  	CALL	runtime·newstack(SB)
   324  	MOVL	$0, 0x1003	// crash if newstack returns
   325  	RET
   326  
   327  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   328  	MOVL	$0, DX
   329  	JMP runtime·morestack(SB)
   330  
   331  // reflectcall: call a function with the given argument list
   332  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   333  // we don't have variable-sized frames, so we use a small number
   334  // of constant-sized-frame functions to encode a few bits of size in the pc.
   335  // Caution: ugly multiline assembly macros in your future!
   336  
   337  #define DISPATCH(NAME,MAXSIZE)		\
   338  	CMPL	CX, $MAXSIZE;		\
   339  	JA	3(PC);			\
   340  	MOVL	$NAME(SB), AX;		\
   341  	JMP	AX
   342  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   343  
   344  TEXT reflect·call(SB), NOSPLIT, $0-0
   345  	JMP	·reflectcall(SB)
   346  
   347  TEXT ·reflectcall(SB), NOSPLIT, $0-20
   348  	MOVL	argsize+12(FP), CX
   349  	DISPATCH(runtime·call16, 16)
   350  	DISPATCH(runtime·call32, 32)
   351  	DISPATCH(runtime·call64, 64)
   352  	DISPATCH(runtime·call128, 128)
   353  	DISPATCH(runtime·call256, 256)
   354  	DISPATCH(runtime·call512, 512)
   355  	DISPATCH(runtime·call1024, 1024)
   356  	DISPATCH(runtime·call2048, 2048)
   357  	DISPATCH(runtime·call4096, 4096)
   358  	DISPATCH(runtime·call8192, 8192)
   359  	DISPATCH(runtime·call16384, 16384)
   360  	DISPATCH(runtime·call32768, 32768)
   361  	DISPATCH(runtime·call65536, 65536)
   362  	DISPATCH(runtime·call131072, 131072)
   363  	DISPATCH(runtime·call262144, 262144)
   364  	DISPATCH(runtime·call524288, 524288)
   365  	DISPATCH(runtime·call1048576, 1048576)
   366  	DISPATCH(runtime·call2097152, 2097152)
   367  	DISPATCH(runtime·call4194304, 4194304)
   368  	DISPATCH(runtime·call8388608, 8388608)
   369  	DISPATCH(runtime·call16777216, 16777216)
   370  	DISPATCH(runtime·call33554432, 33554432)
   371  	DISPATCH(runtime·call67108864, 67108864)
   372  	DISPATCH(runtime·call134217728, 134217728)
   373  	DISPATCH(runtime·call268435456, 268435456)
   374  	DISPATCH(runtime·call536870912, 536870912)
   375  	DISPATCH(runtime·call1073741824, 1073741824)
   376  	MOVL	$runtime·badreflectcall(SB), AX
   377  	JMP	AX
   378  
   379  #define CALLFN(NAME,MAXSIZE)			\
   380  TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   381  	NO_LOCAL_POINTERS;			\
   382  	/* copy arguments to stack */		\
   383  	MOVL	argptr+8(FP), SI;		\
   384  	MOVL	argsize+12(FP), CX;		\
   385  	MOVL	SP, DI;				\
   386  	REP;MOVSB;				\
   387  	/* call function */			\
   388  	MOVL	f+4(FP), DX;			\
   389  	MOVL	(DX), AX; 			\
   390  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   391  	CALL	AX;				\
   392  	/* copy return values back */		\
   393  	MOVL	argptr+8(FP), DI;		\
   394  	MOVL	argsize+12(FP), CX;		\
   395  	MOVL	retoffset+16(FP), BX;		\
   396  	MOVL	SP, SI;				\
   397  	ADDL	BX, DI;				\
   398  	ADDL	BX, SI;				\
   399  	SUBL	BX, CX;				\
   400  	REP;MOVSB;				\
   401  	/* execute write barrier updates */	\
   402  	MOVL	argtype+0(FP), DX;		\
   403  	MOVL	argptr+8(FP), DI;		\
   404  	MOVL	argsize+12(FP), CX;		\
   405  	MOVL	retoffset+16(FP), BX;		\
   406  	MOVL	DX, 0(SP);			\
   407  	MOVL	DI, 4(SP);			\
   408  	MOVL	CX, 8(SP);			\
   409  	MOVL	BX, 12(SP);			\
   410  	CALL	runtime·callwritebarrier(SB);	\
   411  	RET
   412  
   413  CALLFN(·call16, 16)
   414  CALLFN(·call32, 32)
   415  CALLFN(·call64, 64)
   416  CALLFN(·call128, 128)
   417  CALLFN(·call256, 256)
   418  CALLFN(·call512, 512)
   419  CALLFN(·call1024, 1024)
   420  CALLFN(·call2048, 2048)
   421  CALLFN(·call4096, 4096)
   422  CALLFN(·call8192, 8192)
   423  CALLFN(·call16384, 16384)
   424  CALLFN(·call32768, 32768)
   425  CALLFN(·call65536, 65536)
   426  CALLFN(·call131072, 131072)
   427  CALLFN(·call262144, 262144)
   428  CALLFN(·call524288, 524288)
   429  CALLFN(·call1048576, 1048576)
   430  CALLFN(·call2097152, 2097152)
   431  CALLFN(·call4194304, 4194304)
   432  CALLFN(·call8388608, 8388608)
   433  CALLFN(·call16777216, 16777216)
   434  CALLFN(·call33554432, 33554432)
   435  CALLFN(·call67108864, 67108864)
   436  CALLFN(·call134217728, 134217728)
   437  CALLFN(·call268435456, 268435456)
   438  CALLFN(·call536870912, 536870912)
   439  CALLFN(·call1073741824, 1073741824)
   440  
   441  // bool cas(int32 *val, int32 old, int32 new)
   442  // Atomically:
   443  //	if(*val == old){
   444  //		*val = new;
   445  //		return 1;
   446  //	}else
   447  //		return 0;
   448  TEXT runtime·cas(SB), NOSPLIT, $0-13
   449  	MOVL	ptr+0(FP), BX
   450  	MOVL	old+4(FP), AX
   451  	MOVL	new+8(FP), CX
   452  	LOCK
   453  	CMPXCHGL	CX, 0(BX)
   454  	SETEQ	ret+12(FP)
   455  	RET
   456  
   457  TEXT runtime·casuintptr(SB), NOSPLIT, $0-13
   458  	JMP	runtime·cas(SB)
   459  
   460  TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8
   461  	JMP	runtime·atomicload(SB)
   462  
   463  TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8
   464  	JMP	runtime·atomicload(SB)
   465  
   466  TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-8
   467  	JMP	runtime·atomicstore(SB)
   468  
   469  // bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
   470  // Atomically:
   471  //	if(*val == *old){
   472  //		*val = new;
   473  //		return 1;
   474  //	} else {
   475  //		return 0;
   476  //	}
   477  TEXT runtime·cas64(SB), NOSPLIT, $0-21
   478  	MOVL	ptr+0(FP), BP
   479  	MOVL	old_lo+4(FP), AX
   480  	MOVL	old_hi+8(FP), DX
   481  	MOVL	new_lo+12(FP), BX
   482  	MOVL	new_hi+16(FP), CX
   483  	LOCK
   484  	CMPXCHG8B	0(BP)
   485  	SETEQ	ret+20(FP)
   486  	RET
   487  
   488  // bool casp(void **p, void *old, void *new)
   489  // Atomically:
   490  //	if(*p == old){
   491  //		*p = new;
   492  //		return 1;
   493  //	}else
   494  //		return 0;
   495  TEXT runtime·casp1(SB), NOSPLIT, $0-13
   496  	MOVL	ptr+0(FP), BX
   497  	MOVL	old+4(FP), AX
   498  	MOVL	new+8(FP), CX
   499  	LOCK
   500  	CMPXCHGL	CX, 0(BX)
   501  	SETEQ	ret+12(FP)
   502  	RET
   503  
   504  // uint32 xadd(uint32 volatile *val, int32 delta)
   505  // Atomically:
   506  //	*val += delta;
   507  //	return *val;
   508  TEXT runtime·xadd(SB), NOSPLIT, $0-12
   509  	MOVL	ptr+0(FP), BX
   510  	MOVL	delta+4(FP), AX
   511  	MOVL	AX, CX
   512  	LOCK
   513  	XADDL	AX, 0(BX)
   514  	ADDL	CX, AX
   515  	MOVL	AX, ret+8(FP)
   516  	RET
   517  
   518  TEXT runtime·xchg(SB), NOSPLIT, $0-12
   519  	MOVL	ptr+0(FP), BX
   520  	MOVL	new+4(FP), AX
   521  	XCHGL	AX, 0(BX)
   522  	MOVL	AX, ret+8(FP)
   523  	RET
   524  
   525  TEXT runtime·xchgp1(SB), NOSPLIT, $0-12
   526  	MOVL	ptr+0(FP), BX
   527  	MOVL	new+4(FP), AX
   528  	XCHGL	AX, 0(BX)
   529  	MOVL	AX, ret+8(FP)
   530  	RET
   531  
   532  TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
   533  	JMP	runtime·xchg(SB)
   534  
   535  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   536  	MOVL	cycles+0(FP), AX
   537  again:
   538  	PAUSE
   539  	SUBL	$1, AX
   540  	JNZ	again
   541  	RET
   542  
   543  TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-8
   544  	MOVL	ptr+0(FP), BX
   545  	MOVL	val+4(FP), AX
   546  	XCHGL	AX, 0(BX)
   547  	RET
   548  
   549  TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
   550  	MOVL	ptr+0(FP), BX
   551  	MOVL	val+4(FP), AX
   552  	XCHGL	AX, 0(BX)
   553  	RET
   554  
   555  // uint64 atomicload64(uint64 volatile* addr);
   556  TEXT runtime·atomicload64(SB), NOSPLIT, $0-12
   557  	MOVL	ptr+0(FP), AX
   558  	LEAL	ret_lo+4(FP), BX
   559  	// MOVQ (%EAX), %MM0
   560  	BYTE $0x0f; BYTE $0x6f; BYTE $0x00
   561  	// MOVQ %MM0, 0(%EBX)
   562  	BYTE $0x0f; BYTE $0x7f; BYTE $0x03
   563  	// EMMS
   564  	BYTE $0x0F; BYTE $0x77
   565  	RET
   566  
   567  // void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
   568  TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12
   569  	MOVL	ptr+0(FP), AX
   570  	// MOVQ and EMMS were introduced on the Pentium MMX.
   571  	// MOVQ 0x8(%ESP), %MM0
   572  	BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
   573  	// MOVQ %MM0, (%EAX)
   574  	BYTE $0x0f; BYTE $0x7f; BYTE $0x00 
   575  	// EMMS
   576  	BYTE $0x0F; BYTE $0x77
   577  	// This is essentially a no-op, but it provides required memory fencing.
   578  	// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
   579  	MOVL	$0, AX
   580  	LOCK
   581  	XADDL	AX, (SP)
   582  	RET
   583  
   584  // void	runtime·atomicor8(byte volatile*, byte);
   585  TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
   586  	MOVL	ptr+0(FP), AX
   587  	MOVB	val+4(FP), BX
   588  	LOCK
   589  	ORB	BX, (AX)
   590  	RET
   591  
   592  // void jmpdefer(fn, sp);
   593  // called from deferreturn.
   594  // 1. pop the caller
   595  // 2. sub 5 bytes from the callers return
   596  // 3. jmp to the argument
   597  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   598  	MOVL	fv+0(FP), DX	// fn
   599  	MOVL	argp+4(FP), BX	// caller sp
   600  	LEAL	-4(BX), SP	// caller sp after CALL
   601  	SUBL	$5, (SP)	// return to CALL again
   602  	MOVL	0(DX), BX
   603  	JMP	BX	// but first run the deferred function
   604  
   605  // Save state of caller into g->sched.
   606  TEXT gosave<>(SB),NOSPLIT,$0
   607  	PUSHL	AX
   608  	PUSHL	BX
   609  	get_tls(BX)
   610  	MOVL	g(BX), BX
   611  	LEAL	arg+0(FP), AX
   612  	MOVL	AX, (g_sched+gobuf_sp)(BX)
   613  	MOVL	-4(AX), AX
   614  	MOVL	AX, (g_sched+gobuf_pc)(BX)
   615  	MOVL	$0, (g_sched+gobuf_ret)(BX)
   616  	MOVL	$0, (g_sched+gobuf_ctxt)(BX)
   617  	POPL	BX
   618  	POPL	AX
   619  	RET
   620  
   621  // asmcgocall(void(*fn)(void*), void *arg)
   622  // Call fn(arg) on the scheduler stack,
   623  // aligned appropriately for the gcc ABI.
   624  // See cgocall.c for more details.
   625  TEXT ·asmcgocall(SB),NOSPLIT,$0-8
   626  	MOVL	fn+0(FP), AX
   627  	MOVL	arg+4(FP), BX
   628  	CALL	asmcgocall<>(SB)
   629  	RET
   630  
   631  TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-12
   632  	MOVL	fn+0(FP), AX
   633  	MOVL	arg+4(FP), BX
   634  	CALL	asmcgocall<>(SB)
   635  	MOVL	AX, ret+8(FP)
   636  	RET
   637  
   638  TEXT asmcgocall<>(SB),NOSPLIT,$0-0
   639  	// fn in AX, arg in BX
   640  	MOVL	SP, DX
   641  
   642  	// Figure out if we need to switch to m->g0 stack.
   643  	// We get called to create new OS threads too, and those
   644  	// come in on the m->g0 stack already.
   645  	get_tls(CX)
   646  	MOVL	g(CX), BP
   647  	MOVL	g_m(BP), BP
   648  	MOVL	m_g0(BP), SI
   649  	MOVL	g(CX), DI
   650  	CMPL	SI, DI
   651  	JEQ	4(PC)
   652  	CALL	gosave<>(SB)
   653  	MOVL	SI, g(CX)
   654  	MOVL	(g_sched+gobuf_sp)(SI), SP
   655  
   656  	// Now on a scheduling stack (a pthread-created stack).
   657  	SUBL	$32, SP
   658  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   659  	MOVL	DI, 8(SP)	// save g
   660  	MOVL	(g_stack+stack_hi)(DI), DI
   661  	SUBL	DX, DI
   662  	MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   663  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   664  	CALL	AX
   665  
   666  	// Restore registers, g, stack pointer.
   667  	get_tls(CX)
   668  	MOVL	8(SP), DI
   669  	MOVL	(g_stack+stack_hi)(DI), SI
   670  	SUBL	4(SP), SI
   671  	MOVL	DI, g(CX)
   672  	MOVL	SI, SP
   673  	RET
   674  
   675  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   676  // Turn the fn into a Go func (by taking its address) and call
   677  // cgocallback_gofunc.
   678  TEXT runtime·cgocallback(SB),NOSPLIT,$12-12
   679  	LEAL	fn+0(FP), AX
   680  	MOVL	AX, 0(SP)
   681  	MOVL	frame+4(FP), AX
   682  	MOVL	AX, 4(SP)
   683  	MOVL	framesize+8(FP), AX
   684  	MOVL	AX, 8(SP)
   685  	MOVL	$runtime·cgocallback_gofunc(SB), AX
   686  	CALL	AX
   687  	RET
   688  
   689  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   690  // See cgocall.c for more details.
   691  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12
   692  	NO_LOCAL_POINTERS
   693  
   694  	// If g is nil, Go did not create the current thread.
   695  	// Call needm to obtain one for temporary use.
   696  	// In this case, we're running on the thread stack, so there's
   697  	// lots of space, but the linker doesn't know. Hide the call from
   698  	// the linker analysis by using an indirect call through AX.
   699  	get_tls(CX)
   700  #ifdef GOOS_windows
   701  	MOVL	$0, BP
   702  	CMPL	CX, $0
   703  	JEQ	2(PC) // TODO
   704  #endif
   705  	MOVL	g(CX), BP
   706  	CMPL	BP, $0
   707  	JEQ	needm
   708  	MOVL	g_m(BP), BP
   709  	MOVL	BP, DX // saved copy of oldm
   710  	JMP	havem
   711  needm:
   712  	MOVL	$0, 0(SP)
   713  	MOVL	$runtime·needm(SB), AX
   714  	CALL	AX
   715  	MOVL	0(SP), DX
   716  	get_tls(CX)
   717  	MOVL	g(CX), BP
   718  	MOVL	g_m(BP), BP
   719  
   720  	// Set m->sched.sp = SP, so that if a panic happens
   721  	// during the function we are about to execute, it will
   722  	// have a valid SP to run on the g0 stack.
   723  	// The next few lines (after the havem label)
   724  	// will save this SP onto the stack and then write
   725  	// the same SP back to m->sched.sp. That seems redundant,
   726  	// but if an unrecovered panic happens, unwindm will
   727  	// restore the g->sched.sp from the stack location
   728  	// and then systemstack will try to use it. If we don't set it here,
   729  	// that restored SP will be uninitialized (typically 0) and
   730  	// will not be usable.
   731  	MOVL	m_g0(BP), SI
   732  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   733  
   734  havem:
   735  	// Now there's a valid m, and we're running on its m->g0.
   736  	// Save current m->g0->sched.sp on stack and then set it to SP.
   737  	// Save current sp in m->g0->sched.sp in preparation for
   738  	// switch back to m->curg stack.
   739  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   740  	MOVL	m_g0(BP), SI
   741  	MOVL	(g_sched+gobuf_sp)(SI), AX
   742  	MOVL	AX, 0(SP)
   743  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   744  
   745  	// Switch to m->curg stack and call runtime.cgocallbackg.
   746  	// Because we are taking over the execution of m->curg
   747  	// but *not* resuming what had been running, we need to
   748  	// save that information (m->curg->sched) so we can restore it.
   749  	// We can restore m->curg->sched.sp easily, because calling
   750  	// runtime.cgocallbackg leaves SP unchanged upon return.
   751  	// To save m->curg->sched.pc, we push it onto the stack.
   752  	// This has the added benefit that it looks to the traceback
   753  	// routine like cgocallbackg is going to return to that
   754  	// PC (because the frame we allocate below has the same
   755  	// size as cgocallback_gofunc's frame declared above)
   756  	// so that the traceback will seamlessly trace back into
   757  	// the earlier calls.
   758  	//
   759  	// In the new goroutine, 0(SP) holds the saved oldm (DX) register.
   760  	// 4(SP) and 8(SP) are unused.
   761  	MOVL	m_curg(BP), SI
   762  	MOVL	SI, g(CX)
   763  	MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   764  	MOVL	(g_sched+gobuf_pc)(SI), BP
   765  	MOVL	BP, -4(DI)
   766  	LEAL	-(4+12)(DI), SP
   767  	MOVL	DX, 0(SP)
   768  	CALL	runtime·cgocallbackg(SB)
   769  	MOVL	0(SP), DX
   770  
   771  	// Restore g->sched (== m->curg->sched) from saved values.
   772  	get_tls(CX)
   773  	MOVL	g(CX), SI
   774  	MOVL	12(SP), BP
   775  	MOVL	BP, (g_sched+gobuf_pc)(SI)
   776  	LEAL	(12+4)(SP), DI
   777  	MOVL	DI, (g_sched+gobuf_sp)(SI)
   778  
   779  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   780  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   781  	// so we do not have to restore it.)
   782  	MOVL	g(CX), BP
   783  	MOVL	g_m(BP), BP
   784  	MOVL	m_g0(BP), SI
   785  	MOVL	SI, g(CX)
   786  	MOVL	(g_sched+gobuf_sp)(SI), SP
   787  	MOVL	0(SP), AX
   788  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   789  	
   790  	// If the m on entry was nil, we called needm above to borrow an m
   791  	// for the duration of the call. Since the call is over, return it with dropm.
   792  	CMPL	DX, $0
   793  	JNE 3(PC)
   794  	MOVL	$runtime·dropm(SB), AX
   795  	CALL	AX
   796  
   797  	// Done!
   798  	RET
   799  
   800  // void setg(G*); set g. for use by needm.
   801  TEXT runtime·setg(SB), NOSPLIT, $0-4
   802  	MOVL	gg+0(FP), BX
   803  #ifdef GOOS_windows
   804  	CMPL	BX, $0
   805  	JNE	settls
   806  	MOVL	$0, 0x14(FS)
   807  	RET
   808  settls:
   809  	MOVL	g_m(BX), AX
   810  	LEAL	m_tls(AX), AX
   811  	MOVL	AX, 0x14(FS)
   812  #endif
   813  	get_tls(CX)
   814  	MOVL	BX, g(CX)
   815  	RET
   816  
   817  // void setg_gcc(G*); set g. for use by gcc
   818  TEXT setg_gcc<>(SB), NOSPLIT, $0
   819  	get_tls(AX)
   820  	MOVL	gg+0(FP), DX
   821  	MOVL	DX, g(AX)
   822  	RET
   823  
   824  // check that SP is in range [g->stack.lo, g->stack.hi)
   825  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   826  	get_tls(CX)
   827  	MOVL	g(CX), AX
   828  	CMPL	(g_stack+stack_hi)(AX), SP
   829  	JHI	2(PC)
   830  	INT	$3
   831  	CMPL	SP, (g_stack+stack_lo)(AX)
   832  	JHI	2(PC)
   833  	INT	$3
   834  	RET
   835  
   836  TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
   837  	MOVL	argp+0(FP),AX		// addr of first arg
   838  	MOVL	-4(AX),AX		// get calling pc
   839  	MOVL	AX, ret+4(FP)
   840  	RET
   841  
   842  TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
   843  	MOVL	p+0(FP),AX		// addr of first arg
   844  	MOVL	-4(AX),AX		// get calling pc
   845  	MOVL	AX, ret+4(FP)
   846  	RET
   847  
   848  TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
   849  	MOVL	argp+0(FP),AX		// addr of first arg
   850  	MOVL	pc+4(FP), BX
   851  	MOVL	BX, -4(AX)		// set calling pc
   852  	RET
   853  
   854  TEXT runtime·getcallersp(SB), NOSPLIT, $0-8
   855  	MOVL	argp+0(FP), AX
   856  	MOVL	AX, ret+4(FP)
   857  	RET
   858  
   859  // func gogetcallersp(p unsafe.Pointer) uintptr
   860  TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-8
   861  	MOVL	p+0(FP),AX		// addr of first arg
   862  	MOVL	AX, ret+4(FP)
   863  	RET
   864  
   865  // int64 runtime·cputicks(void), so really
   866  // void runtime·cputicks(int64 *ticks)
   867  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   868  	RDTSC
   869  	MOVL	AX, ret_lo+0(FP)
   870  	MOVL	DX, ret_hi+4(FP)
   871  	RET
   872  
   873  TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   874  	// set up ldt 7 to point at tls0
   875  	// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   876  	// the entry number is just a hint.  setldt will set up GS with what it used.
   877  	MOVL	$7, 0(SP)
   878  	LEAL	runtime·tls0(SB), AX
   879  	MOVL	AX, 4(SP)
   880  	MOVL	$32, 8(SP)	// sizeof(tls array)
   881  	CALL	runtime·setldt(SB)
   882  	RET
   883  
   884  TEXT runtime·emptyfunc(SB),0,$0-0
   885  	RET
   886  
   887  TEXT runtime·abort(SB),NOSPLIT,$0-0
   888  	INT $0x3
   889  
   890  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   891  // redirects to memhash(p, h, size) using the size
   892  // stored in the closure.
   893  TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
   894  	GO_ARGS
   895  	NO_LOCAL_POINTERS
   896  	MOVL	p+0(FP), AX
   897  	MOVL	h+4(FP), BX
   898  	MOVL	4(DX), CX
   899  	MOVL	AX, 0(SP)
   900  	MOVL	BX, 4(SP)
   901  	MOVL	CX, 8(SP)
   902  	CALL	runtime·memhash(SB)
   903  	MOVL	12(SP), AX
   904  	MOVL	AX, ret+8(FP)
   905  	RET
   906  
   907  // hash function using AES hardware instructions
   908  TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   909  	MOVL	p+0(FP), AX	// ptr to data
   910  	MOVL	s+8(FP), CX	// size
   911  	LEAL	ret+12(FP), DX
   912  	JMP	runtime·aeshashbody(SB)
   913  
   914  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   915  	MOVL	p+0(FP), AX	// ptr to string object
   916  	MOVL	4(AX), CX	// length of string
   917  	MOVL	(AX), AX	// string data
   918  	LEAL	ret+8(FP), DX
   919  	JMP	runtime·aeshashbody(SB)
   920  
   921  // AX: data
   922  // CX: length
   923  // DX: address to put return value
   924  TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   925  	MOVL	h+4(FP), X6	// seed to low 64 bits of xmm6
   926  	PINSRD	$2, CX, X6	// size to high 64 bits of xmm6
   927  	PSHUFHW	$0, X6, X6	// replace size with its low 2 bytes repeated 4 times
   928  	MOVO	runtime·aeskeysched(SB), X7
   929  	CMPL	CX, $16
   930  	JB	aes0to15
   931  	JE	aes16
   932  	CMPL	CX, $32
   933  	JBE	aes17to32
   934  	CMPL	CX, $64
   935  	JBE	aes33to64
   936  	JMP	aes65plus
   937  	
   938  aes0to15:
   939  	TESTL	CX, CX
   940  	JE	aes0
   941  
   942  	ADDL	$16, AX
   943  	TESTW	$0xff0, AX
   944  	JE	endofpage
   945  
   946  	// 16 bytes loaded at this address won't cross
   947  	// a page boundary, so we can load it directly.
   948  	MOVOU	-16(AX), X0
   949  	ADDL	CX, CX
   950  	PAND	masks<>(SB)(CX*8), X0
   951  
   952  	// scramble 3 times
   953  	AESENC	X6, X0
   954  	AESENC	X7, X0
   955  	AESENC	X7, X0
   956  	MOVL	X0, (DX)
   957  	RET
   958  
   959  endofpage:
   960  	// address ends in 1111xxxx.  Might be up against
   961  	// a page boundary, so load ending at last byte.
   962  	// Then shift bytes down using pshufb.
   963  	MOVOU	-32(AX)(CX*1), X0
   964  	ADDL	CX, CX
   965  	PSHUFB	shifts<>(SB)(CX*8), X0
   966  	AESENC	X6, X0
   967  	AESENC	X7, X0
   968  	AESENC	X7, X0
   969  	MOVL	X0, (DX)
   970  	RET
   971  
   972  aes0:
   973  	// return input seed
   974  	MOVL	h+4(FP), AX
   975  	MOVL	AX, (DX)
   976  	RET
   977  
   978  aes16:
   979  	MOVOU	(AX), X0
   980  	AESENC	X6, X0
   981  	AESENC	X7, X0
   982  	AESENC	X7, X0
   983  	MOVL	X0, (DX)
   984  	RET
   985  
   986  
   987  aes17to32:
   988  	// load data to be hashed
   989  	MOVOU	(AX), X0
   990  	MOVOU	-16(AX)(CX*1), X1
   991  
   992  	// scramble 3 times
   993  	AESENC	X6, X0
   994  	AESENC	runtime·aeskeysched+16(SB), X1
   995  	AESENC	X7, X0
   996  	AESENC	X7, X1
   997  	AESENC	X7, X0
   998  	AESENC	X7, X1
   999  
  1000  	// combine results
  1001  	PXOR	X1, X0
  1002  	MOVL	X0, (DX)
  1003  	RET
  1004  
  1005  aes33to64:
  1006  	MOVOU	(AX), X0
  1007  	MOVOU	16(AX), X1
  1008  	MOVOU	-32(AX)(CX*1), X2
  1009  	MOVOU	-16(AX)(CX*1), X3
  1010  	
  1011  	AESENC	X6, X0
  1012  	AESENC	runtime·aeskeysched+16(SB), X1
  1013  	AESENC	runtime·aeskeysched+32(SB), X2
  1014  	AESENC	runtime·aeskeysched+48(SB), X3
  1015  	AESENC	X7, X0
  1016  	AESENC	X7, X1
  1017  	AESENC	X7, X2
  1018  	AESENC	X7, X3
  1019  	AESENC	X7, X0
  1020  	AESENC	X7, X1
  1021  	AESENC	X7, X2
  1022  	AESENC	X7, X3
  1023  
  1024  	PXOR	X2, X0
  1025  	PXOR	X3, X1
  1026  	PXOR	X1, X0
  1027  	MOVL	X0, (DX)
  1028  	RET
  1029  
  1030  aes65plus:
  1031  	// start with last (possibly overlapping) block
  1032  	MOVOU	-64(AX)(CX*1), X0
  1033  	MOVOU	-48(AX)(CX*1), X1
  1034  	MOVOU	-32(AX)(CX*1), X2
  1035  	MOVOU	-16(AX)(CX*1), X3
  1036  
  1037  	// scramble state once
  1038  	AESENC	X6, X0
  1039  	AESENC	runtime·aeskeysched+16(SB), X1
  1040  	AESENC	runtime·aeskeysched+32(SB), X2
  1041  	AESENC	runtime·aeskeysched+48(SB), X3
  1042  
  1043  	// compute number of remaining 64-byte blocks
  1044  	DECL	CX
  1045  	SHRL	$6, CX
  1046  	
  1047  aesloop:
  1048  	// scramble state, xor in a block
  1049  	MOVOU	(AX), X4
  1050  	MOVOU	16(AX), X5
  1051  	AESENC	X4, X0
  1052  	AESENC	X5, X1
  1053  	MOVOU	32(AX), X4
  1054  	MOVOU	48(AX), X5
  1055  	AESENC	X4, X2
  1056  	AESENC	X5, X3
  1057  
  1058  	// scramble state
  1059  	AESENC	X7, X0
  1060  	AESENC	X7, X1
  1061  	AESENC	X7, X2
  1062  	AESENC	X7, X3
  1063  
  1064  	ADDL	$64, AX
  1065  	DECL	CX
  1066  	JNE	aesloop
  1067  
  1068  	// 2 more scrambles to finish
  1069  	AESENC	X7, X0
  1070  	AESENC	X7, X1
  1071  	AESENC	X7, X2
  1072  	AESENC	X7, X3
  1073  	AESENC	X7, X0
  1074  	AESENC	X7, X1
  1075  	AESENC	X7, X2
  1076  	AESENC	X7, X3
  1077  
  1078  	PXOR	X2, X0
  1079  	PXOR	X3, X1
  1080  	PXOR	X1, X0
  1081  	MOVL	X0, (DX)
  1082  	RET
  1083  
  1084  TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1085  	MOVL	p+0(FP), AX	// ptr to data
  1086  	MOVL	h+4(FP), X0	// seed
  1087  	PINSRD	$1, (AX), X0	// data
  1088  	AESENC	runtime·aeskeysched+0(SB), X0
  1089  	AESENC	runtime·aeskeysched+16(SB), X0
  1090  	AESENC	runtime·aeskeysched+32(SB), X0
  1091  	MOVL	X0, ret+8(FP)
  1092  	RET
  1093  
  1094  TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1095  	MOVL	p+0(FP), AX	// ptr to data
  1096  	MOVQ	(AX), X0	// data
  1097  	PINSRD	$2, h+4(FP), X0	// seed
  1098  	AESENC	runtime·aeskeysched+0(SB), X0
  1099  	AESENC	runtime·aeskeysched+16(SB), X0
  1100  	AESENC	runtime·aeskeysched+32(SB), X0
  1101  	MOVL	X0, ret+8(FP)
  1102  	RET
  1103  
  1104  // simple mask to get rid of data in the high part of the register.
  1105  DATA masks<>+0x00(SB)/4, $0x00000000
  1106  DATA masks<>+0x04(SB)/4, $0x00000000
  1107  DATA masks<>+0x08(SB)/4, $0x00000000
  1108  DATA masks<>+0x0c(SB)/4, $0x00000000
  1109  	
  1110  DATA masks<>+0x10(SB)/4, $0x000000ff
  1111  DATA masks<>+0x14(SB)/4, $0x00000000
  1112  DATA masks<>+0x18(SB)/4, $0x00000000
  1113  DATA masks<>+0x1c(SB)/4, $0x00000000
  1114  	
  1115  DATA masks<>+0x20(SB)/4, $0x0000ffff
  1116  DATA masks<>+0x24(SB)/4, $0x00000000
  1117  DATA masks<>+0x28(SB)/4, $0x00000000
  1118  DATA masks<>+0x2c(SB)/4, $0x00000000
  1119  	
  1120  DATA masks<>+0x30(SB)/4, $0x00ffffff
  1121  DATA masks<>+0x34(SB)/4, $0x00000000
  1122  DATA masks<>+0x38(SB)/4, $0x00000000
  1123  DATA masks<>+0x3c(SB)/4, $0x00000000
  1124  	
  1125  DATA masks<>+0x40(SB)/4, $0xffffffff
  1126  DATA masks<>+0x44(SB)/4, $0x00000000
  1127  DATA masks<>+0x48(SB)/4, $0x00000000
  1128  DATA masks<>+0x4c(SB)/4, $0x00000000
  1129  	
  1130  DATA masks<>+0x50(SB)/4, $0xffffffff
  1131  DATA masks<>+0x54(SB)/4, $0x000000ff
  1132  DATA masks<>+0x58(SB)/4, $0x00000000
  1133  DATA masks<>+0x5c(SB)/4, $0x00000000
  1134  	
  1135  DATA masks<>+0x60(SB)/4, $0xffffffff
  1136  DATA masks<>+0x64(SB)/4, $0x0000ffff
  1137  DATA masks<>+0x68(SB)/4, $0x00000000
  1138  DATA masks<>+0x6c(SB)/4, $0x00000000
  1139  	
  1140  DATA masks<>+0x70(SB)/4, $0xffffffff
  1141  DATA masks<>+0x74(SB)/4, $0x00ffffff
  1142  DATA masks<>+0x78(SB)/4, $0x00000000
  1143  DATA masks<>+0x7c(SB)/4, $0x00000000
  1144  	
  1145  DATA masks<>+0x80(SB)/4, $0xffffffff
  1146  DATA masks<>+0x84(SB)/4, $0xffffffff
  1147  DATA masks<>+0x88(SB)/4, $0x00000000
  1148  DATA masks<>+0x8c(SB)/4, $0x00000000
  1149  	
  1150  DATA masks<>+0x90(SB)/4, $0xffffffff
  1151  DATA masks<>+0x94(SB)/4, $0xffffffff
  1152  DATA masks<>+0x98(SB)/4, $0x000000ff
  1153  DATA masks<>+0x9c(SB)/4, $0x00000000
  1154  	
  1155  DATA masks<>+0xa0(SB)/4, $0xffffffff
  1156  DATA masks<>+0xa4(SB)/4, $0xffffffff
  1157  DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1158  DATA masks<>+0xac(SB)/4, $0x00000000
  1159  	
  1160  DATA masks<>+0xb0(SB)/4, $0xffffffff
  1161  DATA masks<>+0xb4(SB)/4, $0xffffffff
  1162  DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1163  DATA masks<>+0xbc(SB)/4, $0x00000000
  1164  	
  1165  DATA masks<>+0xc0(SB)/4, $0xffffffff
  1166  DATA masks<>+0xc4(SB)/4, $0xffffffff
  1167  DATA masks<>+0xc8(SB)/4, $0xffffffff
  1168  DATA masks<>+0xcc(SB)/4, $0x00000000
  1169  	
  1170  DATA masks<>+0xd0(SB)/4, $0xffffffff
  1171  DATA masks<>+0xd4(SB)/4, $0xffffffff
  1172  DATA masks<>+0xd8(SB)/4, $0xffffffff
  1173  DATA masks<>+0xdc(SB)/4, $0x000000ff
  1174  	
  1175  DATA masks<>+0xe0(SB)/4, $0xffffffff
  1176  DATA masks<>+0xe4(SB)/4, $0xffffffff
  1177  DATA masks<>+0xe8(SB)/4, $0xffffffff
  1178  DATA masks<>+0xec(SB)/4, $0x0000ffff
  1179  	
  1180  DATA masks<>+0xf0(SB)/4, $0xffffffff
  1181  DATA masks<>+0xf4(SB)/4, $0xffffffff
  1182  DATA masks<>+0xf8(SB)/4, $0xffffffff
  1183  DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1184  
  1185  GLOBL masks<>(SB),RODATA,$256
  1186  
  1187  // these are arguments to pshufb.  They move data down from
  1188  // the high bytes of the register to the low bytes of the register.
  1189  // index is how many bytes to move.
  1190  DATA shifts<>+0x00(SB)/4, $0x00000000
  1191  DATA shifts<>+0x04(SB)/4, $0x00000000
  1192  DATA shifts<>+0x08(SB)/4, $0x00000000
  1193  DATA shifts<>+0x0c(SB)/4, $0x00000000
  1194  	
  1195  DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1196  DATA shifts<>+0x14(SB)/4, $0xffffffff
  1197  DATA shifts<>+0x18(SB)/4, $0xffffffff
  1198  DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1199  	
  1200  DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1201  DATA shifts<>+0x24(SB)/4, $0xffffffff
  1202  DATA shifts<>+0x28(SB)/4, $0xffffffff
  1203  DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1204  	
  1205  DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1206  DATA shifts<>+0x34(SB)/4, $0xffffffff
  1207  DATA shifts<>+0x38(SB)/4, $0xffffffff
  1208  DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1209  	
  1210  DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1211  DATA shifts<>+0x44(SB)/4, $0xffffffff
  1212  DATA shifts<>+0x48(SB)/4, $0xffffffff
  1213  DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1214  	
  1215  DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1216  DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1217  DATA shifts<>+0x58(SB)/4, $0xffffffff
  1218  DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1219  	
  1220  DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1221  DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1222  DATA shifts<>+0x68(SB)/4, $0xffffffff
  1223  DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1224  	
  1225  DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1226  DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1227  DATA shifts<>+0x78(SB)/4, $0xffffffff
  1228  DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1229  	
  1230  DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1231  DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1232  DATA shifts<>+0x88(SB)/4, $0xffffffff
  1233  DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1234  	
  1235  DATA shifts<>+0x90(SB)/4, $0x0a090807
  1236  DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1237  DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1238  DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1239  	
  1240  DATA shifts<>+0xa0(SB)/4, $0x09080706
  1241  DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1242  DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1243  DATA shifts<>+0xac(SB)/4, $0xffffffff
  1244  	
  1245  DATA shifts<>+0xb0(SB)/4, $0x08070605
  1246  DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1247  DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1248  DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1249  	
  1250  DATA shifts<>+0xc0(SB)/4, $0x07060504
  1251  DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1252  DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1253  DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1254  	
  1255  DATA shifts<>+0xd0(SB)/4, $0x06050403
  1256  DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1257  DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1258  DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1259  	
  1260  DATA shifts<>+0xe0(SB)/4, $0x05040302
  1261  DATA shifts<>+0xe4(SB)/4, $0x09080706
  1262  DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1263  DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1264  	
  1265  DATA shifts<>+0xf0(SB)/4, $0x04030201
  1266  DATA shifts<>+0xf4(SB)/4, $0x08070605
  1267  DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1268  DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1269  
  1270  GLOBL shifts<>(SB),RODATA,$256
  1271  
  1272  TEXT runtime·memeq(SB),NOSPLIT,$0-13
  1273  	MOVL	a+0(FP), SI
  1274  	MOVL	b+4(FP), DI
  1275  	MOVL	size+8(FP), BX
  1276  	CALL	runtime·memeqbody(SB)
  1277  	MOVB	AX, ret+12(FP)
  1278  	RET
  1279  
  1280  // memequal_varlen(a, b unsafe.Pointer) bool
  1281  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
  1282  	MOVL    a+0(FP), SI
  1283  	MOVL    b+4(FP), DI
  1284  	CMPL    SI, DI
  1285  	JEQ     eq
  1286  	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
  1287  	CALL    runtime·memeqbody(SB)
  1288  	MOVB    AX, ret+8(FP)
  1289  	RET
  1290  eq:
  1291  	MOVB    $1, ret+8(FP)
  1292  	RET
  1293  
  1294  // eqstring tests whether two strings are equal.
  1295  // See runtime_test.go:eqstring_generic for
  1296  // equivalent Go code.
  1297  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
  1298  	MOVL	s1len+4(FP), AX
  1299  	MOVL	s2len+12(FP), BX
  1300  	CMPL	AX, BX
  1301  	JNE	different
  1302  	MOVL	s1str+0(FP), SI
  1303  	MOVL	s2str+8(FP), DI
  1304  	CMPL	SI, DI
  1305  	JEQ	same
  1306  	CALL	runtime·memeqbody(SB)
  1307  	MOVB	AX, v+16(FP)
  1308  	RET
  1309  same:
  1310  	MOVB	$1, v+16(FP)
  1311  	RET
  1312  different:
  1313  	MOVB	$0, v+16(FP)
  1314  	RET
  1315  
  1316  TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1317  	MOVL	a_len+4(FP), BX
  1318  	MOVL	b_len+16(FP), CX
  1319  	XORL	AX, AX
  1320  	CMPL	BX, CX
  1321  	JNE	eqret
  1322  	MOVL	a+0(FP), SI
  1323  	MOVL	b+12(FP), DI
  1324  	CALL	runtime·memeqbody(SB)
  1325  eqret:
  1326  	MOVB	AX, ret+24(FP)
  1327  	RET
  1328  
  1329  // a in SI
  1330  // b in DI
  1331  // count in BX
  1332  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1333  	XORL	AX, AX
  1334  
  1335  	CMPL	BX, $4
  1336  	JB	small
  1337  
  1338  	// 64 bytes at a time using xmm registers
  1339  hugeloop:
  1340  	CMPL	BX, $64
  1341  	JB	bigloop
  1342  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1343  	JE	bigloop
  1344  	MOVOU	(SI), X0
  1345  	MOVOU	(DI), X1
  1346  	MOVOU	16(SI), X2
  1347  	MOVOU	16(DI), X3
  1348  	MOVOU	32(SI), X4
  1349  	MOVOU	32(DI), X5
  1350  	MOVOU	48(SI), X6
  1351  	MOVOU	48(DI), X7
  1352  	PCMPEQB	X1, X0
  1353  	PCMPEQB	X3, X2
  1354  	PCMPEQB	X5, X4
  1355  	PCMPEQB	X7, X6
  1356  	PAND	X2, X0
  1357  	PAND	X6, X4
  1358  	PAND	X4, X0
  1359  	PMOVMSKB X0, DX
  1360  	ADDL	$64, SI
  1361  	ADDL	$64, DI
  1362  	SUBL	$64, BX
  1363  	CMPL	DX, $0xffff
  1364  	JEQ	hugeloop
  1365  	RET
  1366  
  1367  	// 4 bytes at a time using 32-bit register
  1368  bigloop:
  1369  	CMPL	BX, $4
  1370  	JBE	leftover
  1371  	MOVL	(SI), CX
  1372  	MOVL	(DI), DX
  1373  	ADDL	$4, SI
  1374  	ADDL	$4, DI
  1375  	SUBL	$4, BX
  1376  	CMPL	CX, DX
  1377  	JEQ	bigloop
  1378  	RET
  1379  
  1380  	// remaining 0-4 bytes
  1381  leftover:
  1382  	MOVL	-4(SI)(BX*1), CX
  1383  	MOVL	-4(DI)(BX*1), DX
  1384  	CMPL	CX, DX
  1385  	SETEQ	AX
  1386  	RET
  1387  
  1388  small:
  1389  	CMPL	BX, $0
  1390  	JEQ	equal
  1391  
  1392  	LEAL	0(BX*8), CX
  1393  	NEGL	CX
  1394  
  1395  	MOVL	SI, DX
  1396  	CMPB	DX, $0xfc
  1397  	JA	si_high
  1398  
  1399  	// load at SI won't cross a page boundary.
  1400  	MOVL	(SI), SI
  1401  	JMP	si_finish
  1402  si_high:
  1403  	// address ends in 111111xx.  Load up to bytes we want, move to correct position.
  1404  	MOVL	-4(SI)(BX*1), SI
  1405  	SHRL	CX, SI
  1406  si_finish:
  1407  
  1408  	// same for DI.
  1409  	MOVL	DI, DX
  1410  	CMPB	DX, $0xfc
  1411  	JA	di_high
  1412  	MOVL	(DI), DI
  1413  	JMP	di_finish
  1414  di_high:
  1415  	MOVL	-4(DI)(BX*1), DI
  1416  	SHRL	CX, DI
  1417  di_finish:
  1418  
  1419  	SUBL	SI, DI
  1420  	SHLL	CX, DI
  1421  equal:
  1422  	SETEQ	AX
  1423  	RET
  1424  
  1425  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
  1426  	MOVL	s1_base+0(FP), SI
  1427  	MOVL	s1_len+4(FP), BX
  1428  	MOVL	s2_base+8(FP), DI
  1429  	MOVL	s2_len+12(FP), DX
  1430  	CALL	runtime·cmpbody(SB)
  1431  	MOVL	AX, ret+16(FP)
  1432  	RET
  1433  
  1434  TEXT bytes·Compare(SB),NOSPLIT,$0-28
  1435  	MOVL	s1+0(FP), SI
  1436  	MOVL	s1+4(FP), BX
  1437  	MOVL	s2+12(FP), DI
  1438  	MOVL	s2+16(FP), DX
  1439  	CALL	runtime·cmpbody(SB)
  1440  	MOVL	AX, ret+24(FP)
  1441  	RET
  1442  
  1443  TEXT bytes·IndexByte(SB),NOSPLIT,$0
  1444  	MOVL	s+0(FP), SI
  1445  	MOVL	s_len+4(FP), CX
  1446  	MOVB	c+12(FP), AL
  1447  	MOVL	SI, DI
  1448  	CLD; REPN; SCASB
  1449  	JZ 3(PC)
  1450  	MOVL	$-1, ret+16(FP)
  1451  	RET
  1452  	SUBL	SI, DI
  1453  	SUBL	$1, DI
  1454  	MOVL	DI, ret+16(FP)
  1455  	RET
  1456  
  1457  TEXT strings·IndexByte(SB),NOSPLIT,$0
  1458  	MOVL	s+0(FP), SI
  1459  	MOVL	s_len+4(FP), CX
  1460  	MOVB	c+8(FP), AL
  1461  	MOVL	SI, DI
  1462  	CLD; REPN; SCASB
  1463  	JZ 3(PC)
  1464  	MOVL	$-1, ret+12(FP)
  1465  	RET
  1466  	SUBL	SI, DI
  1467  	SUBL	$1, DI
  1468  	MOVL	DI, ret+12(FP)
  1469  	RET
  1470  
  1471  // input:
  1472  //   SI = a
  1473  //   DI = b
  1474  //   BX = alen
  1475  //   DX = blen
  1476  // output:
  1477  //   AX = 1/0/-1
  1478  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1479  	CMPL	SI, DI
  1480  	JEQ	allsame
  1481  	CMPL	BX, DX
  1482  	MOVL	DX, BP
  1483  	CMOVLLT	BX, BP // BP = min(alen, blen)
  1484  	CMPL	BP, $4
  1485  	JB	small
  1486  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1487  	JE	mediumloop
  1488  largeloop:
  1489  	CMPL	BP, $16
  1490  	JB	mediumloop
  1491  	MOVOU	(SI), X0
  1492  	MOVOU	(DI), X1
  1493  	PCMPEQB X0, X1
  1494  	PMOVMSKB X1, AX
  1495  	XORL	$0xffff, AX	// convert EQ to NE
  1496  	JNE	diff16	// branch if at least one byte is not equal
  1497  	ADDL	$16, SI
  1498  	ADDL	$16, DI
  1499  	SUBL	$16, BP
  1500  	JMP	largeloop
  1501  
  1502  diff16:
  1503  	BSFL	AX, BX	// index of first byte that differs
  1504  	XORL	AX, AX
  1505  	MOVB	(SI)(BX*1), CX
  1506  	CMPB	CX, (DI)(BX*1)
  1507  	SETHI	AX
  1508  	LEAL	-1(AX*2), AX	// convert 1/0 to +1/-1
  1509  	RET
  1510  
  1511  mediumloop:
  1512  	CMPL	BP, $4
  1513  	JBE	_0through4
  1514  	MOVL	(SI), AX
  1515  	MOVL	(DI), CX
  1516  	CMPL	AX, CX
  1517  	JNE	diff4
  1518  	ADDL	$4, SI
  1519  	ADDL	$4, DI
  1520  	SUBL	$4, BP
  1521  	JMP	mediumloop
  1522  
  1523  _0through4:
  1524  	MOVL	-4(SI)(BP*1), AX
  1525  	MOVL	-4(DI)(BP*1), CX
  1526  	CMPL	AX, CX
  1527  	JEQ	allsame
  1528  
  1529  diff4:
  1530  	BSWAPL	AX	// reverse order of bytes
  1531  	BSWAPL	CX
  1532  	XORL	AX, CX	// find bit differences
  1533  	BSRL	CX, CX	// index of highest bit difference
  1534  	SHRL	CX, AX	// move a's bit to bottom
  1535  	ANDL	$1, AX	// mask bit
  1536  	LEAL	-1(AX*2), AX // 1/0 => +1/-1
  1537  	RET
  1538  
  1539  	// 0-3 bytes in common
  1540  small:
  1541  	LEAL	(BP*8), CX
  1542  	NEGL	CX
  1543  	JEQ	allsame
  1544  
  1545  	// load si
  1546  	CMPB	SI, $0xfc
  1547  	JA	si_high
  1548  	MOVL	(SI), SI
  1549  	JMP	si_finish
  1550  si_high:
  1551  	MOVL	-4(SI)(BP*1), SI
  1552  	SHRL	CX, SI
  1553  si_finish:
  1554  	SHLL	CX, SI
  1555  
  1556  	// same for di
  1557  	CMPB	DI, $0xfc
  1558  	JA	di_high
  1559  	MOVL	(DI), DI
  1560  	JMP	di_finish
  1561  di_high:
  1562  	MOVL	-4(DI)(BP*1), DI
  1563  	SHRL	CX, DI
  1564  di_finish:
  1565  	SHLL	CX, DI
  1566  
  1567  	BSWAPL	SI	// reverse order of bytes
  1568  	BSWAPL	DI
  1569  	XORL	SI, DI	// find bit differences
  1570  	JEQ	allsame
  1571  	BSRL	DI, CX	// index of highest bit difference
  1572  	SHRL	CX, SI	// move a's bit to bottom
  1573  	ANDL	$1, SI	// mask bit
  1574  	LEAL	-1(SI*2), AX // 1/0 => +1/-1
  1575  	RET
  1576  
  1577  	// all the bytes in common are the same, so we just need
  1578  	// to compare the lengths.
  1579  allsame:
  1580  	XORL	AX, AX
  1581  	XORL	CX, CX
  1582  	CMPL	BX, DX
  1583  	SETGT	AX	// 1 if alen > blen
  1584  	SETEQ	CX	// 1 if alen == blen
  1585  	LEAL	-1(CX)(AX*2), AX	// 1,0,-1 result
  1586  	RET
  1587  
  1588  // A Duff's device for zeroing memory.
  1589  // The compiler jumps to computed addresses within
  1590  // this routine to zero chunks of memory.  Do not
  1591  // change this code without also changing the code
  1592  // in ../../cmd/8g/ggen.c:clearfat.
  1593  // AX: zero
  1594  // DI: ptr to memory to be zeroed
  1595  // DI is updated as a side effect.
  1596  TEXT runtime·duffzero(SB), NOSPLIT, $0-0
  1597  	STOSL
  1598  	STOSL
  1599  	STOSL
  1600  	STOSL
  1601  	STOSL
  1602  	STOSL
  1603  	STOSL
  1604  	STOSL
  1605  	STOSL
  1606  	STOSL
  1607  	STOSL
  1608  	STOSL
  1609  	STOSL
  1610  	STOSL
  1611  	STOSL
  1612  	STOSL
  1613  	STOSL
  1614  	STOSL
  1615  	STOSL
  1616  	STOSL
  1617  	STOSL
  1618  	STOSL
  1619  	STOSL
  1620  	STOSL
  1621  	STOSL
  1622  	STOSL
  1623  	STOSL
  1624  	STOSL
  1625  	STOSL
  1626  	STOSL
  1627  	STOSL
  1628  	STOSL
  1629  	STOSL
  1630  	STOSL
  1631  	STOSL
  1632  	STOSL
  1633  	STOSL
  1634  	STOSL
  1635  	STOSL
  1636  	STOSL
  1637  	STOSL
  1638  	STOSL
  1639  	STOSL
  1640  	STOSL
  1641  	STOSL
  1642  	STOSL
  1643  	STOSL
  1644  	STOSL
  1645  	STOSL
  1646  	STOSL
  1647  	STOSL
  1648  	STOSL
  1649  	STOSL
  1650  	STOSL
  1651  	STOSL
  1652  	STOSL
  1653  	STOSL
  1654  	STOSL
  1655  	STOSL
  1656  	STOSL
  1657  	STOSL
  1658  	STOSL
  1659  	STOSL
  1660  	STOSL
  1661  	STOSL
  1662  	STOSL
  1663  	STOSL
  1664  	STOSL
  1665  	STOSL
  1666  	STOSL
  1667  	STOSL
  1668  	STOSL
  1669  	STOSL
  1670  	STOSL
  1671  	STOSL
  1672  	STOSL
  1673  	STOSL
  1674  	STOSL
  1675  	STOSL
  1676  	STOSL
  1677  	STOSL
  1678  	STOSL
  1679  	STOSL
  1680  	STOSL
  1681  	STOSL
  1682  	STOSL
  1683  	STOSL
  1684  	STOSL
  1685  	STOSL
  1686  	STOSL
  1687  	STOSL
  1688  	STOSL
  1689  	STOSL
  1690  	STOSL
  1691  	STOSL
  1692  	STOSL
  1693  	STOSL
  1694  	STOSL
  1695  	STOSL
  1696  	STOSL
  1697  	STOSL
  1698  	STOSL
  1699  	STOSL
  1700  	STOSL
  1701  	STOSL
  1702  	STOSL
  1703  	STOSL
  1704  	STOSL
  1705  	STOSL
  1706  	STOSL
  1707  	STOSL
  1708  	STOSL
  1709  	STOSL
  1710  	STOSL
  1711  	STOSL
  1712  	STOSL
  1713  	STOSL
  1714  	STOSL
  1715  	STOSL
  1716  	STOSL
  1717  	STOSL
  1718  	STOSL
  1719  	STOSL
  1720  	STOSL
  1721  	STOSL
  1722  	STOSL
  1723  	STOSL
  1724  	STOSL
  1725  	RET
  1726  
  1727  // A Duff's device for copying memory.
  1728  // The compiler jumps to computed addresses within
  1729  // this routine to copy chunks of memory.  Source
  1730  // and destination must not overlap.  Do not
  1731  // change this code without also changing the code
  1732  // in ../../cmd/6g/cgen.c:sgen.
  1733  // SI: ptr to source memory
  1734  // DI: ptr to destination memory
  1735  // SI and DI are updated as a side effect.
  1736  
  1737  // NOTE: this is equivalent to a sequence of MOVSL but
  1738  // for some reason MOVSL is really slow.
  1739  TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
  1740  	MOVL	(SI),CX
  1741  	ADDL	$4,SI
  1742  	MOVL	CX,(DI)
  1743  	ADDL	$4,DI
  1744  	
  1745  	MOVL	(SI),CX
  1746  	ADDL	$4,SI
  1747  	MOVL	CX,(DI)
  1748  	ADDL	$4,DI
  1749  	
  1750  	MOVL	(SI),CX
  1751  	ADDL	$4,SI
  1752  	MOVL	CX,(DI)
  1753  	ADDL	$4,DI
  1754  	
  1755  	MOVL	(SI),CX
  1756  	ADDL	$4,SI
  1757  	MOVL	CX,(DI)
  1758  	ADDL	$4,DI
  1759  	
  1760  	MOVL	(SI),CX
  1761  	ADDL	$4,SI
  1762  	MOVL	CX,(DI)
  1763  	ADDL	$4,DI
  1764  	
  1765  	MOVL	(SI),CX
  1766  	ADDL	$4,SI
  1767  	MOVL	CX,(DI)
  1768  	ADDL	$4,DI
  1769  	
  1770  	MOVL	(SI),CX
  1771  	ADDL	$4,SI
  1772  	MOVL	CX,(DI)
  1773  	ADDL	$4,DI
  1774  	
  1775  	MOVL	(SI),CX
  1776  	ADDL	$4,SI
  1777  	MOVL	CX,(DI)
  1778  	ADDL	$4,DI
  1779  	
  1780  	MOVL	(SI),CX
  1781  	ADDL	$4,SI
  1782  	MOVL	CX,(DI)
  1783  	ADDL	$4,DI
  1784  	
  1785  	MOVL	(SI),CX
  1786  	ADDL	$4,SI
  1787  	MOVL	CX,(DI)
  1788  	ADDL	$4,DI
  1789  	
  1790  	MOVL	(SI),CX
  1791  	ADDL	$4,SI
  1792  	MOVL	CX,(DI)
  1793  	ADDL	$4,DI
  1794  	
  1795  	MOVL	(SI),CX
  1796  	ADDL	$4,SI
  1797  	MOVL	CX,(DI)
  1798  	ADDL	$4,DI
  1799  	
  1800  	MOVL	(SI),CX
  1801  	ADDL	$4,SI
  1802  	MOVL	CX,(DI)
  1803  	ADDL	$4,DI
  1804  	
  1805  	MOVL	(SI),CX
  1806  	ADDL	$4,SI
  1807  	MOVL	CX,(DI)
  1808  	ADDL	$4,DI
  1809  	
  1810  	MOVL	(SI),CX
  1811  	ADDL	$4,SI
  1812  	MOVL	CX,(DI)
  1813  	ADDL	$4,DI
  1814  	
  1815  	MOVL	(SI),CX
  1816  	ADDL	$4,SI
  1817  	MOVL	CX,(DI)
  1818  	ADDL	$4,DI
  1819  	
  1820  	MOVL	(SI),CX
  1821  	ADDL	$4,SI
  1822  	MOVL	CX,(DI)
  1823  	ADDL	$4,DI
  1824  	
  1825  	MOVL	(SI),CX
  1826  	ADDL	$4,SI
  1827  	MOVL	CX,(DI)
  1828  	ADDL	$4,DI
  1829  	
  1830  	MOVL	(SI),CX
  1831  	ADDL	$4,SI
  1832  	MOVL	CX,(DI)
  1833  	ADDL	$4,DI
  1834  	
  1835  	MOVL	(SI),CX
  1836  	ADDL	$4,SI
  1837  	MOVL	CX,(DI)
  1838  	ADDL	$4,DI
  1839  	
  1840  	MOVL	(SI),CX
  1841  	ADDL	$4,SI
  1842  	MOVL	CX,(DI)
  1843  	ADDL	$4,DI
  1844  	
  1845  	MOVL	(SI),CX
  1846  	ADDL	$4,SI
  1847  	MOVL	CX,(DI)
  1848  	ADDL	$4,DI
  1849  	
  1850  	MOVL	(SI),CX
  1851  	ADDL	$4,SI
  1852  	MOVL	CX,(DI)
  1853  	ADDL	$4,DI
  1854  	
  1855  	MOVL	(SI),CX
  1856  	ADDL	$4,SI
  1857  	MOVL	CX,(DI)
  1858  	ADDL	$4,DI
  1859  	
  1860  	MOVL	(SI),CX
  1861  	ADDL	$4,SI
  1862  	MOVL	CX,(DI)
  1863  	ADDL	$4,DI
  1864  	
  1865  	MOVL	(SI),CX
  1866  	ADDL	$4,SI
  1867  	MOVL	CX,(DI)
  1868  	ADDL	$4,DI
  1869  	
  1870  	MOVL	(SI),CX
  1871  	ADDL	$4,SI
  1872  	MOVL	CX,(DI)
  1873  	ADDL	$4,DI
  1874  	
  1875  	MOVL	(SI),CX
  1876  	ADDL	$4,SI
  1877  	MOVL	CX,(DI)
  1878  	ADDL	$4,DI
  1879  	
  1880  	MOVL	(SI),CX
  1881  	ADDL	$4,SI
  1882  	MOVL	CX,(DI)
  1883  	ADDL	$4,DI
  1884  	
  1885  	MOVL	(SI),CX
  1886  	ADDL	$4,SI
  1887  	MOVL	CX,(DI)
  1888  	ADDL	$4,DI
  1889  	
  1890  	MOVL	(SI),CX
  1891  	ADDL	$4,SI
  1892  	MOVL	CX,(DI)
  1893  	ADDL	$4,DI
  1894  	
  1895  	MOVL	(SI),CX
  1896  	ADDL	$4,SI
  1897  	MOVL	CX,(DI)
  1898  	ADDL	$4,DI
  1899  	
  1900  	MOVL	(SI),CX
  1901  	ADDL	$4,SI
  1902  	MOVL	CX,(DI)
  1903  	ADDL	$4,DI
  1904  	
  1905  	MOVL	(SI),CX
  1906  	ADDL	$4,SI
  1907  	MOVL	CX,(DI)
  1908  	ADDL	$4,DI
  1909  	
  1910  	MOVL	(SI),CX
  1911  	ADDL	$4,SI
  1912  	MOVL	CX,(DI)
  1913  	ADDL	$4,DI
  1914  	
  1915  	MOVL	(SI),CX
  1916  	ADDL	$4,SI
  1917  	MOVL	CX,(DI)
  1918  	ADDL	$4,DI
  1919  	
  1920  	MOVL	(SI),CX
  1921  	ADDL	$4,SI
  1922  	MOVL	CX,(DI)
  1923  	ADDL	$4,DI
  1924  	
  1925  	MOVL	(SI),CX
  1926  	ADDL	$4,SI
  1927  	MOVL	CX,(DI)
  1928  	ADDL	$4,DI
  1929  	
  1930  	MOVL	(SI),CX
  1931  	ADDL	$4,SI
  1932  	MOVL	CX,(DI)
  1933  	ADDL	$4,DI
  1934  	
  1935  	MOVL	(SI),CX
  1936  	ADDL	$4,SI
  1937  	MOVL	CX,(DI)
  1938  	ADDL	$4,DI
  1939  	
  1940  	MOVL	(SI),CX
  1941  	ADDL	$4,SI
  1942  	MOVL	CX,(DI)
  1943  	ADDL	$4,DI
  1944  	
  1945  	MOVL	(SI),CX
  1946  	ADDL	$4,SI
  1947  	MOVL	CX,(DI)
  1948  	ADDL	$4,DI
  1949  	
  1950  	MOVL	(SI),CX
  1951  	ADDL	$4,SI
  1952  	MOVL	CX,(DI)
  1953  	ADDL	$4,DI
  1954  	
  1955  	MOVL	(SI),CX
  1956  	ADDL	$4,SI
  1957  	MOVL	CX,(DI)
  1958  	ADDL	$4,DI
  1959  	
  1960  	MOVL	(SI),CX
  1961  	ADDL	$4,SI
  1962  	MOVL	CX,(DI)
  1963  	ADDL	$4,DI
  1964  	
  1965  	MOVL	(SI),CX
  1966  	ADDL	$4,SI
  1967  	MOVL	CX,(DI)
  1968  	ADDL	$4,DI
  1969  	
  1970  	MOVL	(SI),CX
  1971  	ADDL	$4,SI
  1972  	MOVL	CX,(DI)
  1973  	ADDL	$4,DI
  1974  	
  1975  	MOVL	(SI),CX
  1976  	ADDL	$4,SI
  1977  	MOVL	CX,(DI)
  1978  	ADDL	$4,DI
  1979  	
  1980  	MOVL	(SI),CX
  1981  	ADDL	$4,SI
  1982  	MOVL	CX,(DI)
  1983  	ADDL	$4,DI
  1984  	
  1985  	MOVL	(SI),CX
  1986  	ADDL	$4,SI
  1987  	MOVL	CX,(DI)
  1988  	ADDL	$4,DI
  1989  	
  1990  	MOVL	(SI),CX
  1991  	ADDL	$4,SI
  1992  	MOVL	CX,(DI)
  1993  	ADDL	$4,DI
  1994  	
  1995  	MOVL	(SI),CX
  1996  	ADDL	$4,SI
  1997  	MOVL	CX,(DI)
  1998  	ADDL	$4,DI
  1999  	
  2000  	MOVL	(SI),CX
  2001  	ADDL	$4,SI
  2002  	MOVL	CX,(DI)
  2003  	ADDL	$4,DI
  2004  	
  2005  	MOVL	(SI),CX
  2006  	ADDL	$4,SI
  2007  	MOVL	CX,(DI)
  2008  	ADDL	$4,DI
  2009  	
  2010  	MOVL	(SI),CX
  2011  	ADDL	$4,SI
  2012  	MOVL	CX,(DI)
  2013  	ADDL	$4,DI
  2014  	
  2015  	MOVL	(SI),CX
  2016  	ADDL	$4,SI
  2017  	MOVL	CX,(DI)
  2018  	ADDL	$4,DI
  2019  	
  2020  	MOVL	(SI),CX
  2021  	ADDL	$4,SI
  2022  	MOVL	CX,(DI)
  2023  	ADDL	$4,DI
  2024  	
  2025  	MOVL	(SI),CX
  2026  	ADDL	$4,SI
  2027  	MOVL	CX,(DI)
  2028  	ADDL	$4,DI
  2029  	
  2030  	MOVL	(SI),CX
  2031  	ADDL	$4,SI
  2032  	MOVL	CX,(DI)
  2033  	ADDL	$4,DI
  2034  	
  2035  	MOVL	(SI),CX
  2036  	ADDL	$4,SI
  2037  	MOVL	CX,(DI)
  2038  	ADDL	$4,DI
  2039  	
  2040  	MOVL	(SI),CX
  2041  	ADDL	$4,SI
  2042  	MOVL	CX,(DI)
  2043  	ADDL	$4,DI
  2044  	
  2045  	MOVL	(SI),CX
  2046  	ADDL	$4,SI
  2047  	MOVL	CX,(DI)
  2048  	ADDL	$4,DI
  2049  	
  2050  	MOVL	(SI),CX
  2051  	ADDL	$4,SI
  2052  	MOVL	CX,(DI)
  2053  	ADDL	$4,DI
  2054  	
  2055  	MOVL	(SI),CX
  2056  	ADDL	$4,SI
  2057  	MOVL	CX,(DI)
  2058  	ADDL	$4,DI
  2059  	
  2060  	MOVL	(SI),CX
  2061  	ADDL	$4,SI
  2062  	MOVL	CX,(DI)
  2063  	ADDL	$4,DI
  2064  	
  2065  	MOVL	(SI),CX
  2066  	ADDL	$4,SI
  2067  	MOVL	CX,(DI)
  2068  	ADDL	$4,DI
  2069  	
  2070  	MOVL	(SI),CX
  2071  	ADDL	$4,SI
  2072  	MOVL	CX,(DI)
  2073  	ADDL	$4,DI
  2074  	
  2075  	MOVL	(SI),CX
  2076  	ADDL	$4,SI
  2077  	MOVL	CX,(DI)
  2078  	ADDL	$4,DI
  2079  	
  2080  	MOVL	(SI),CX
  2081  	ADDL	$4,SI
  2082  	MOVL	CX,(DI)
  2083  	ADDL	$4,DI
  2084  	
  2085  	MOVL	(SI),CX
  2086  	ADDL	$4,SI
  2087  	MOVL	CX,(DI)
  2088  	ADDL	$4,DI
  2089  	
  2090  	MOVL	(SI),CX
  2091  	ADDL	$4,SI
  2092  	MOVL	CX,(DI)
  2093  	ADDL	$4,DI
  2094  	
  2095  	MOVL	(SI),CX
  2096  	ADDL	$4,SI
  2097  	MOVL	CX,(DI)
  2098  	ADDL	$4,DI
  2099  	
  2100  	MOVL	(SI),CX
  2101  	ADDL	$4,SI
  2102  	MOVL	CX,(DI)
  2103  	ADDL	$4,DI
  2104  	
  2105  	MOVL	(SI),CX
  2106  	ADDL	$4,SI
  2107  	MOVL	CX,(DI)
  2108  	ADDL	$4,DI
  2109  	
  2110  	MOVL	(SI),CX
  2111  	ADDL	$4,SI
  2112  	MOVL	CX,(DI)
  2113  	ADDL	$4,DI
  2114  	
  2115  	MOVL	(SI),CX
  2116  	ADDL	$4,SI
  2117  	MOVL	CX,(DI)
  2118  	ADDL	$4,DI
  2119  	
  2120  	MOVL	(SI),CX
  2121  	ADDL	$4,SI
  2122  	MOVL	CX,(DI)
  2123  	ADDL	$4,DI
  2124  	
  2125  	MOVL	(SI),CX
  2126  	ADDL	$4,SI
  2127  	MOVL	CX,(DI)
  2128  	ADDL	$4,DI
  2129  	
  2130  	MOVL	(SI),CX
  2131  	ADDL	$4,SI
  2132  	MOVL	CX,(DI)
  2133  	ADDL	$4,DI
  2134  	
  2135  	MOVL	(SI),CX
  2136  	ADDL	$4,SI
  2137  	MOVL	CX,(DI)
  2138  	ADDL	$4,DI
  2139  	
  2140  	MOVL	(SI),CX
  2141  	ADDL	$4,SI
  2142  	MOVL	CX,(DI)
  2143  	ADDL	$4,DI
  2144  	
  2145  	MOVL	(SI),CX
  2146  	ADDL	$4,SI
  2147  	MOVL	CX,(DI)
  2148  	ADDL	$4,DI
  2149  	
  2150  	MOVL	(SI),CX
  2151  	ADDL	$4,SI
  2152  	MOVL	CX,(DI)
  2153  	ADDL	$4,DI
  2154  	
  2155  	MOVL	(SI),CX
  2156  	ADDL	$4,SI
  2157  	MOVL	CX,(DI)
  2158  	ADDL	$4,DI
  2159  	
  2160  	MOVL	(SI),CX
  2161  	ADDL	$4,SI
  2162  	MOVL	CX,(DI)
  2163  	ADDL	$4,DI
  2164  	
  2165  	MOVL	(SI),CX
  2166  	ADDL	$4,SI
  2167  	MOVL	CX,(DI)
  2168  	ADDL	$4,DI
  2169  	
  2170  	MOVL	(SI),CX
  2171  	ADDL	$4,SI
  2172  	MOVL	CX,(DI)
  2173  	ADDL	$4,DI
  2174  	
  2175  	MOVL	(SI),CX
  2176  	ADDL	$4,SI
  2177  	MOVL	CX,(DI)
  2178  	ADDL	$4,DI
  2179  	
  2180  	MOVL	(SI),CX
  2181  	ADDL	$4,SI
  2182  	MOVL	CX,(DI)
  2183  	ADDL	$4,DI
  2184  	
  2185  	MOVL	(SI),CX
  2186  	ADDL	$4,SI
  2187  	MOVL	CX,(DI)
  2188  	ADDL	$4,DI
  2189  	
  2190  	MOVL	(SI),CX
  2191  	ADDL	$4,SI
  2192  	MOVL	CX,(DI)
  2193  	ADDL	$4,DI
  2194  	
  2195  	MOVL	(SI),CX
  2196  	ADDL	$4,SI
  2197  	MOVL	CX,(DI)
  2198  	ADDL	$4,DI
  2199  	
  2200  	MOVL	(SI),CX
  2201  	ADDL	$4,SI
  2202  	MOVL	CX,(DI)
  2203  	ADDL	$4,DI
  2204  	
  2205  	MOVL	(SI),CX
  2206  	ADDL	$4,SI
  2207  	MOVL	CX,(DI)
  2208  	ADDL	$4,DI
  2209  	
  2210  	MOVL	(SI),CX
  2211  	ADDL	$4,SI
  2212  	MOVL	CX,(DI)
  2213  	ADDL	$4,DI
  2214  	
  2215  	MOVL	(SI),CX
  2216  	ADDL	$4,SI
  2217  	MOVL	CX,(DI)
  2218  	ADDL	$4,DI
  2219  	
  2220  	MOVL	(SI),CX
  2221  	ADDL	$4,SI
  2222  	MOVL	CX,(DI)
  2223  	ADDL	$4,DI
  2224  	
  2225  	MOVL	(SI),CX
  2226  	ADDL	$4,SI
  2227  	MOVL	CX,(DI)
  2228  	ADDL	$4,DI
  2229  	
  2230  	MOVL	(SI),CX
  2231  	ADDL	$4,SI
  2232  	MOVL	CX,(DI)
  2233  	ADDL	$4,DI
  2234  	
  2235  	MOVL	(SI),CX
  2236  	ADDL	$4,SI
  2237  	MOVL	CX,(DI)
  2238  	ADDL	$4,DI
  2239  	
  2240  	MOVL	(SI),CX
  2241  	ADDL	$4,SI
  2242  	MOVL	CX,(DI)
  2243  	ADDL	$4,DI
  2244  	
  2245  	MOVL	(SI),CX
  2246  	ADDL	$4,SI
  2247  	MOVL	CX,(DI)
  2248  	ADDL	$4,DI
  2249  	
  2250  	MOVL	(SI),CX
  2251  	ADDL	$4,SI
  2252  	MOVL	CX,(DI)
  2253  	ADDL	$4,DI
  2254  	
  2255  	MOVL	(SI),CX
  2256  	ADDL	$4,SI
  2257  	MOVL	CX,(DI)
  2258  	ADDL	$4,DI
  2259  	
  2260  	MOVL	(SI),CX
  2261  	ADDL	$4,SI
  2262  	MOVL	CX,(DI)
  2263  	ADDL	$4,DI
  2264  	
  2265  	MOVL	(SI),CX
  2266  	ADDL	$4,SI
  2267  	MOVL	CX,(DI)
  2268  	ADDL	$4,DI
  2269  	
  2270  	MOVL	(SI),CX
  2271  	ADDL	$4,SI
  2272  	MOVL	CX,(DI)
  2273  	ADDL	$4,DI
  2274  	
  2275  	MOVL	(SI),CX
  2276  	ADDL	$4,SI
  2277  	MOVL	CX,(DI)
  2278  	ADDL	$4,DI
  2279  	
  2280  	MOVL	(SI),CX
  2281  	ADDL	$4,SI
  2282  	MOVL	CX,(DI)
  2283  	ADDL	$4,DI
  2284  	
  2285  	MOVL	(SI),CX
  2286  	ADDL	$4,SI
  2287  	MOVL	CX,(DI)
  2288  	ADDL	$4,DI
  2289  	
  2290  	MOVL	(SI),CX
  2291  	ADDL	$4,SI
  2292  	MOVL	CX,(DI)
  2293  	ADDL	$4,DI
  2294  	
  2295  	MOVL	(SI),CX
  2296  	ADDL	$4,SI
  2297  	MOVL	CX,(DI)
  2298  	ADDL	$4,DI
  2299  	
  2300  	MOVL	(SI),CX
  2301  	ADDL	$4,SI
  2302  	MOVL	CX,(DI)
  2303  	ADDL	$4,DI
  2304  	
  2305  	MOVL	(SI),CX
  2306  	ADDL	$4,SI
  2307  	MOVL	CX,(DI)
  2308  	ADDL	$4,DI
  2309  	
  2310  	MOVL	(SI),CX
  2311  	ADDL	$4,SI
  2312  	MOVL	CX,(DI)
  2313  	ADDL	$4,DI
  2314  	
  2315  	MOVL	(SI),CX
  2316  	ADDL	$4,SI
  2317  	MOVL	CX,(DI)
  2318  	ADDL	$4,DI
  2319  	
  2320  	MOVL	(SI),CX
  2321  	ADDL	$4,SI
  2322  	MOVL	CX,(DI)
  2323  	ADDL	$4,DI
  2324  	
  2325  	MOVL	(SI),CX
  2326  	ADDL	$4,SI
  2327  	MOVL	CX,(DI)
  2328  	ADDL	$4,DI
  2329  	
  2330  	MOVL	(SI),CX
  2331  	ADDL	$4,SI
  2332  	MOVL	CX,(DI)
  2333  	ADDL	$4,DI
  2334  	
  2335  	MOVL	(SI),CX
  2336  	ADDL	$4,SI
  2337  	MOVL	CX,(DI)
  2338  	ADDL	$4,DI
  2339  	
  2340  	MOVL	(SI),CX
  2341  	ADDL	$4,SI
  2342  	MOVL	CX,(DI)
  2343  	ADDL	$4,DI
  2344  	
  2345  	MOVL	(SI),CX
  2346  	ADDL	$4,SI
  2347  	MOVL	CX,(DI)
  2348  	ADDL	$4,DI
  2349  	
  2350  	MOVL	(SI),CX
  2351  	ADDL	$4,SI
  2352  	MOVL	CX,(DI)
  2353  	ADDL	$4,DI
  2354  	
  2355  	MOVL	(SI),CX
  2356  	ADDL	$4,SI
  2357  	MOVL	CX,(DI)
  2358  	ADDL	$4,DI
  2359  	
  2360  	MOVL	(SI),CX
  2361  	ADDL	$4,SI
  2362  	MOVL	CX,(DI)
  2363  	ADDL	$4,DI
  2364  	
  2365  	MOVL	(SI),CX
  2366  	ADDL	$4,SI
  2367  	MOVL	CX,(DI)
  2368  	ADDL	$4,DI
  2369  	
  2370  	MOVL	(SI),CX
  2371  	ADDL	$4,SI
  2372  	MOVL	CX,(DI)
  2373  	ADDL	$4,DI
  2374  	
  2375  	MOVL	(SI),CX
  2376  	ADDL	$4,SI
  2377  	MOVL	CX,(DI)
  2378  	ADDL	$4,DI
  2379  	
  2380  	RET
  2381  
  2382  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
  2383  	get_tls(CX)
  2384  	MOVL	g(CX), AX
  2385  	MOVL	g_m(AX), AX
  2386  	MOVL	m_fastrand(AX), DX
  2387  	ADDL	DX, DX
  2388  	MOVL	DX, BX
  2389  	XORL	$0x88888eef, DX
  2390  	CMOVLMI	BX, DX
  2391  	MOVL	DX, m_fastrand(AX)
  2392  	MOVL	DX, ret+0(FP)
  2393  	RET
  2394  
  2395  TEXT runtime·return0(SB), NOSPLIT, $0
  2396  	MOVL	$0, AX
  2397  	RET
  2398  
  2399  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  2400  // Must obey the gcc calling convention.
  2401  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  2402  	get_tls(CX)
  2403  	MOVL	g(CX), AX
  2404  	MOVL	g_m(AX), AX
  2405  	MOVL	m_curg(AX), AX
  2406  	MOVL	(g_stack+stack_hi)(AX), AX
  2407  	RET
  2408  
  2409  // The top-most function running on a goroutine
  2410  // returns to goexit+PCQuantum.
  2411  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  2412  	BYTE	$0x90	// NOP
  2413  	CALL	runtime·goexit1(SB)	// does not return
  2414  
  2415  TEXT runtime·getg(SB),NOSPLIT,$0-4
  2416  	get_tls(CX)
  2417  	MOVL	g(CX), AX
  2418  	MOVL	AX, ret+0(FP)
  2419  	RET
  2420  
  2421  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  2422  	MOVL	addr+0(FP), AX
  2423  	PREFETCHT0	(AX)
  2424  	RET
  2425  
  2426  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  2427  	MOVL	addr+0(FP), AX
  2428  	PREFETCHT1	(AX)
  2429  	RET
  2430  
  2431  
  2432  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  2433  	MOVL	addr+0(FP), AX
  2434  	PREFETCHT2	(AX)
  2435  	RET
  2436  
  2437  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  2438  	MOVL	addr+0(FP), AX
  2439  	PREFETCHNTA	(AX)
  2440  	RET