github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/runtime/asm_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "zasm_GOOS_GOARCH.h"
     6  #include "funcdata.h"
     7  #include "textflag.h"
     8  
     9  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    10  	// copy arguments forward on an even stack
    11  	MOVL	argc+0(FP), AX
    12  	MOVL	argv+4(FP), BX
    13  	SUBL	$128, SP		// plenty of scratch
    14  	ANDL	$~15, SP
    15  	MOVL	AX, 120(SP)		// save argc, argv away
    16  	MOVL	BX, 124(SP)
    17  
    18  	// set default stack bounds.
    19  	// _cgo_init may update stackguard.
    20  	MOVL	$runtime·g0(SB), BP
    21  	LEAL	(-64*1024+104)(SP), BX
    22  	MOVL	BX, g_stackguard0(BP)
    23  	MOVL	BX, g_stackguard1(BP)
    24  	MOVL	BX, (g_stack+stack_lo)(BP)
    25  	MOVL	SP, (g_stack+stack_hi)(BP)
    26  	
    27  	// find out information about the processor we're on
    28  	MOVL	$0, AX
    29  	CPUID
    30  	CMPL	AX, $0
    31  	JE	nocpuinfo
    32  	MOVL	$1, AX
    33  	CPUID
    34  	MOVL	CX, runtime·cpuid_ecx(SB)
    35  	MOVL	DX, runtime·cpuid_edx(SB)
    36  nocpuinfo:	
    37  
    38  	// if there is an _cgo_init, call it to let it
    39  	// initialize and to set up GS.  if not,
    40  	// we set up GS ourselves.
    41  	MOVL	_cgo_init(SB), AX
    42  	TESTL	AX, AX
    43  	JZ	needtls
    44  	MOVL	$setg_gcc<>(SB), BX
    45  	MOVL	BX, 4(SP)
    46  	MOVL	BP, 0(SP)
    47  	CALL	AX
    48  
    49  	// update stackguard after _cgo_init
    50  	MOVL	$runtime·g0(SB), CX
    51  	MOVL	(g_stack+stack_lo)(CX), AX
    52  	ADDL	$const_StackGuard, AX
    53  	MOVL	AX, g_stackguard0(CX)
    54  	MOVL	AX, g_stackguard1(CX)
    55  
    56  	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
    57  	CMPL runtime·iswindows(SB), $0
    58  	JEQ ok
    59  needtls:
    60  	// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
    61  	CMPL	runtime·isplan9(SB), $1
    62  	JEQ	ok
    63  
    64  	// set up %gs
    65  	CALL	runtime·ldt0setup(SB)
    66  
    67  	// store through it, to make sure it works
    68  	get_tls(BX)
    69  	MOVL	$0x123, g(BX)
    70  	MOVL	runtime·tls0(SB), AX
    71  	CMPL	AX, $0x123
    72  	JEQ	ok
    73  	MOVL	AX, 0	// abort
    74  ok:
    75  	// set up m and g "registers"
    76  	get_tls(BX)
    77  	LEAL	runtime·g0(SB), CX
    78  	MOVL	CX, g(BX)
    79  	LEAL	runtime·m0(SB), AX
    80  
    81  	// save m->g0 = g0
    82  	MOVL	CX, m_g0(AX)
    83  	// save g0->m = m0
    84  	MOVL	AX, g_m(CX)
    85  
    86  	CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
    87  
    88  	// convention is D is always cleared
    89  	CLD
    90  
    91  	CALL	runtime·check(SB)
    92  
    93  	// saved argc, argv
    94  	MOVL	120(SP), AX
    95  	MOVL	AX, 0(SP)
    96  	MOVL	124(SP), AX
    97  	MOVL	AX, 4(SP)
    98  	CALL	runtime·args(SB)
    99  	CALL	runtime·osinit(SB)
   100  	CALL	runtime·schedinit(SB)
   101  
   102  	// create a new goroutine to start program
   103  	PUSHL	$runtime·main·f(SB)	// entry
   104  	PUSHL	$0	// arg size
   105  	CALL	runtime·newproc(SB)
   106  	POPL	AX
   107  	POPL	AX
   108  
   109  	// start this M
   110  	CALL	runtime·mstart(SB)
   111  
   112  	INT $3
   113  	RET
   114  
   115  DATA	runtime·main·f+0(SB)/4,$runtime·main(SB)
   116  GLOBL	runtime·main·f(SB),RODATA,$4
   117  
   118  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   119  	INT $3
   120  	RET
   121  
   122  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   123  	// Linux and MinGW start the FPU in extended double precision.
   124  	// Other operating systems use double precision.
   125  	// Change to double precision to match them,
   126  	// and to match other hardware that only has double.
   127  	PUSHL $0x27F
   128  	FLDCW	0(SP)
   129  	POPL AX
   130  	RET
   131  
   132  /*
   133   *  go-routine
   134   */
   135  
   136  // void gosave(Gobuf*)
   137  // save state in Gobuf; setjmp
   138  TEXT runtime·gosave(SB), NOSPLIT, $0-4
   139  	MOVL	buf+0(FP), AX		// gobuf
   140  	LEAL	buf+0(FP), BX		// caller's SP
   141  	MOVL	BX, gobuf_sp(AX)
   142  	MOVL	0(SP), BX		// caller's PC
   143  	MOVL	BX, gobuf_pc(AX)
   144  	MOVL	$0, gobuf_ret(AX)
   145  	MOVL	$0, gobuf_ctxt(AX)
   146  	get_tls(CX)
   147  	MOVL	g(CX), BX
   148  	MOVL	BX, gobuf_g(AX)
   149  	RET
   150  
   151  // void gogo(Gobuf*)
   152  // restore state from Gobuf; longjmp
   153  TEXT runtime·gogo(SB), NOSPLIT, $0-4
   154  	MOVL	buf+0(FP), BX		// gobuf
   155  	MOVL	gobuf_g(BX), DX
   156  	MOVL	0(DX), CX		// make sure g != nil
   157  	get_tls(CX)
   158  	MOVL	DX, g(CX)
   159  	MOVL	gobuf_sp(BX), SP	// restore SP
   160  	MOVL	gobuf_ret(BX), AX
   161  	MOVL	gobuf_ctxt(BX), DX
   162  	MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   163  	MOVL	$0, gobuf_ret(BX)
   164  	MOVL	$0, gobuf_ctxt(BX)
   165  	MOVL	gobuf_pc(BX), BX
   166  	JMP	BX
   167  
   168  // func mcall(fn func(*g))
   169  // Switch to m->g0's stack, call fn(g).
   170  // Fn must never return.  It should gogo(&g->sched)
   171  // to keep running g.
   172  TEXT runtime·mcall(SB), NOSPLIT, $0-4
   173  	MOVL	fn+0(FP), DI
   174  	
   175  	get_tls(CX)
   176  	MOVL	g(CX), AX	// save state in g->sched
   177  	MOVL	0(SP), BX	// caller's PC
   178  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   179  	LEAL	fn+0(FP), BX	// caller's SP
   180  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   181  	MOVL	AX, (g_sched+gobuf_g)(AX)
   182  
   183  	// switch to m->g0 & its stack, call fn
   184  	MOVL	g(CX), BX
   185  	MOVL	g_m(BX), BX
   186  	MOVL	m_g0(BX), SI
   187  	CMPL	SI, AX	// if g == m->g0 call badmcall
   188  	JNE	3(PC)
   189  	MOVL	$runtime·badmcall(SB), AX
   190  	JMP	AX
   191  	MOVL	SI, g(CX)	// g = m->g0
   192  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   193  	PUSHL	AX
   194  	MOVL	DI, DX
   195  	MOVL	0(DI), DI
   196  	CALL	DI
   197  	POPL	AX
   198  	MOVL	$runtime·badmcall2(SB), AX
   199  	JMP	AX
   200  	RET
   201  
   202  // switchtoM is a dummy routine that onM leaves at the bottom
   203  // of the G stack.  We need to distinguish the routine that
   204  // lives at the bottom of the G stack from the one that lives
   205  // at the top of the M stack because the one at the top of
   206  // the M stack terminates the stack walk (see topofstack()).
   207  TEXT runtime·switchtoM(SB), NOSPLIT, $0-0
   208  	RET
   209  
   210  // func onM_signalok(fn func())
   211  TEXT runtime·onM_signalok(SB), NOSPLIT, $0-4
   212  	get_tls(CX)
   213  	MOVL	g(CX), AX	// AX = g
   214  	MOVL	g_m(AX), BX	// BX = m
   215  	MOVL	m_gsignal(BX), DX	// DX = gsignal
   216  	CMPL	AX, DX
   217  	JEQ	ongsignal
   218  	JMP	runtime·onM(SB)
   219  
   220  ongsignal:
   221  	MOVL	fn+0(FP), DI	// DI = fn
   222  	MOVL	DI, DX
   223  	MOVL	0(DI), DI
   224  	CALL	DI
   225  	RET
   226  
   227  // func onM(fn func())
   228  TEXT runtime·onM(SB), NOSPLIT, $0-4
   229  	MOVL	fn+0(FP), DI	// DI = fn
   230  	get_tls(CX)
   231  	MOVL	g(CX), AX	// AX = g
   232  	MOVL	g_m(AX), BX	// BX = m
   233  
   234  	MOVL	m_g0(BX), DX	// DX = g0
   235  	CMPL	AX, DX
   236  	JEQ	onm
   237  
   238  	MOVL	m_curg(BX), BP
   239  	CMPL	AX, BP
   240  	JEQ	oncurg
   241  	
   242  	// Not g0, not curg. Must be gsignal, but that's not allowed.
   243  	// Hide call from linker nosplit analysis.
   244  	MOVL	$runtime·badonm(SB), AX
   245  	CALL	AX
   246  
   247  oncurg:
   248  	// save our state in g->sched.  Pretend to
   249  	// be switchtoM if the G stack is scanned.
   250  	MOVL	$runtime·switchtoM(SB), (g_sched+gobuf_pc)(AX)
   251  	MOVL	SP, (g_sched+gobuf_sp)(AX)
   252  	MOVL	AX, (g_sched+gobuf_g)(AX)
   253  
   254  	// switch to g0
   255  	MOVL	DX, g(CX)
   256  	MOVL	(g_sched+gobuf_sp)(DX), BX
   257  	// make it look like mstart called onM on g0, to stop traceback
   258  	SUBL	$4, BX
   259  	MOVL	$runtime·mstart(SB), DX
   260  	MOVL	DX, 0(BX)
   261  	MOVL	BX, SP
   262  
   263  	// call target function
   264  	MOVL	DI, DX
   265  	MOVL	0(DI), DI
   266  	CALL	DI
   267  
   268  	// switch back to g
   269  	get_tls(CX)
   270  	MOVL	g(CX), AX
   271  	MOVL	g_m(AX), BX
   272  	MOVL	m_curg(BX), AX
   273  	MOVL	AX, g(CX)
   274  	MOVL	(g_sched+gobuf_sp)(AX), SP
   275  	MOVL	$0, (g_sched+gobuf_sp)(AX)
   276  	RET
   277  
   278  onm:
   279  	// already on m stack, just call directly
   280  	MOVL	DI, DX
   281  	MOVL	0(DI), DI
   282  	CALL	DI
   283  	RET
   284  
   285  /*
   286   * support for morestack
   287   */
   288  
   289  // Called during function prolog when more stack is needed.
   290  //
   291  // The traceback routines see morestack on a g0 as being
   292  // the top of a stack (for example, morestack calling newstack
   293  // calling the scheduler calling newm calling gc), so we must
   294  // record an argument size. For that purpose, it has no arguments.
   295  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   296  	// Cannot grow scheduler stack (m->g0).
   297  	get_tls(CX)
   298  	MOVL	g(CX), BX
   299  	MOVL	g_m(BX), BX
   300  	MOVL	m_g0(BX), SI
   301  	CMPL	g(CX), SI
   302  	JNE	2(PC)
   303  	INT	$3
   304  
   305  	// Cannot grow signal stack.
   306  	MOVL	m_gsignal(BX), SI
   307  	CMPL	g(CX), SI
   308  	JNE	2(PC)
   309  	INT	$3
   310  
   311  	// Called from f.
   312  	// Set m->morebuf to f's caller.
   313  	MOVL	4(SP), DI	// f's caller's PC
   314  	MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   315  	LEAL	8(SP), CX	// f's caller's SP
   316  	MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   317  	get_tls(CX)
   318  	MOVL	g(CX), SI
   319  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   320  
   321  	// Set g->sched to context in f.
   322  	MOVL	0(SP), AX	// f's PC
   323  	MOVL	AX, (g_sched+gobuf_pc)(SI)
   324  	MOVL	SI, (g_sched+gobuf_g)(SI)
   325  	LEAL	4(SP), AX	// f's SP
   326  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   327  	MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   328  
   329  	// Call newstack on m->g0's stack.
   330  	MOVL	m_g0(BX), BP
   331  	MOVL	BP, g(CX)
   332  	MOVL	(g_sched+gobuf_sp)(BP), AX
   333  	MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   334  	MOVL	AX, SP
   335  	CALL	runtime·newstack(SB)
   336  	MOVL	$0, 0x1003	// crash if newstack returns
   337  	RET
   338  
   339  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   340  	MOVL	$0, DX
   341  	JMP runtime·morestack(SB)
   342  
   343  // reflectcall: call a function with the given argument list
   344  // func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
   345  // we don't have variable-sized frames, so we use a small number
   346  // of constant-sized-frame functions to encode a few bits of size in the pc.
   347  // Caution: ugly multiline assembly macros in your future!
   348  
   349  #define DISPATCH(NAME,MAXSIZE)		\
   350  	CMPL	CX, $MAXSIZE;		\
   351  	JA	3(PC);			\
   352  	MOVL	$NAME(SB), AX;		\
   353  	JMP	AX
   354  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   355  
   356  TEXT ·reflectcall(SB), NOSPLIT, $0-16
   357  	MOVL	argsize+8(FP), CX
   358  	DISPATCH(runtime·call16, 16)
   359  	DISPATCH(runtime·call32, 32)
   360  	DISPATCH(runtime·call64, 64)
   361  	DISPATCH(runtime·call128, 128)
   362  	DISPATCH(runtime·call256, 256)
   363  	DISPATCH(runtime·call512, 512)
   364  	DISPATCH(runtime·call1024, 1024)
   365  	DISPATCH(runtime·call2048, 2048)
   366  	DISPATCH(runtime·call4096, 4096)
   367  	DISPATCH(runtime·call8192, 8192)
   368  	DISPATCH(runtime·call16384, 16384)
   369  	DISPATCH(runtime·call32768, 32768)
   370  	DISPATCH(runtime·call65536, 65536)
   371  	DISPATCH(runtime·call131072, 131072)
   372  	DISPATCH(runtime·call262144, 262144)
   373  	DISPATCH(runtime·call524288, 524288)
   374  	DISPATCH(runtime·call1048576, 1048576)
   375  	DISPATCH(runtime·call2097152, 2097152)
   376  	DISPATCH(runtime·call4194304, 4194304)
   377  	DISPATCH(runtime·call8388608, 8388608)
   378  	DISPATCH(runtime·call16777216, 16777216)
   379  	DISPATCH(runtime·call33554432, 33554432)
   380  	DISPATCH(runtime·call67108864, 67108864)
   381  	DISPATCH(runtime·call134217728, 134217728)
   382  	DISPATCH(runtime·call268435456, 268435456)
   383  	DISPATCH(runtime·call536870912, 536870912)
   384  	DISPATCH(runtime·call1073741824, 1073741824)
   385  	MOVL	$runtime·badreflectcall(SB), AX
   386  	JMP	AX
   387  
   388  #define CALLFN(NAME,MAXSIZE)			\
   389  TEXT NAME(SB), WRAPPER, $MAXSIZE-16;		\
   390  	NO_LOCAL_POINTERS;			\
   391  	/* copy arguments to stack */		\
   392  	MOVL	argptr+4(FP), SI;		\
   393  	MOVL	argsize+8(FP), CX;		\
   394  	MOVL	SP, DI;				\
   395  	REP;MOVSB;				\
   396  	/* call function */			\
   397  	MOVL	f+0(FP), DX;			\
   398  	MOVL	(DX), AX; 			\
   399  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   400  	CALL	AX;				\
   401  	/* copy return values back */		\
   402  	MOVL	argptr+4(FP), DI;		\
   403  	MOVL	argsize+8(FP), CX;		\
   404  	MOVL	retoffset+12(FP), BX;		\
   405  	MOVL	SP, SI;				\
   406  	ADDL	BX, DI;				\
   407  	ADDL	BX, SI;				\
   408  	SUBL	BX, CX;				\
   409  	REP;MOVSB;				\
   410  	RET
   411  
   412  CALLFN(·call16, 16)
   413  CALLFN(·call32, 32)
   414  CALLFN(·call64, 64)
   415  CALLFN(·call128, 128)
   416  CALLFN(·call256, 256)
   417  CALLFN(·call512, 512)
   418  CALLFN(·call1024, 1024)
   419  CALLFN(·call2048, 2048)
   420  CALLFN(·call4096, 4096)
   421  CALLFN(·call8192, 8192)
   422  CALLFN(·call16384, 16384)
   423  CALLFN(·call32768, 32768)
   424  CALLFN(·call65536, 65536)
   425  CALLFN(·call131072, 131072)
   426  CALLFN(·call262144, 262144)
   427  CALLFN(·call524288, 524288)
   428  CALLFN(·call1048576, 1048576)
   429  CALLFN(·call2097152, 2097152)
   430  CALLFN(·call4194304, 4194304)
   431  CALLFN(·call8388608, 8388608)
   432  CALLFN(·call16777216, 16777216)
   433  CALLFN(·call33554432, 33554432)
   434  CALLFN(·call67108864, 67108864)
   435  CALLFN(·call134217728, 134217728)
   436  CALLFN(·call268435456, 268435456)
   437  CALLFN(·call536870912, 536870912)
   438  CALLFN(·call1073741824, 1073741824)
   439  
   440  // bool cas(int32 *val, int32 old, int32 new)
   441  // Atomically:
   442  //	if(*val == old){
   443  //		*val = new;
   444  //		return 1;
   445  //	}else
   446  //		return 0;
   447  TEXT runtime·cas(SB), NOSPLIT, $0-13
   448  	MOVL	ptr+0(FP), BX
   449  	MOVL	old+4(FP), AX
   450  	MOVL	new+8(FP), CX
   451  	LOCK
   452  	CMPXCHGL	CX, 0(BX)
   453  	JZ 4(PC)
   454  	MOVL	$0, AX
   455  	MOVB	AX, ret+12(FP)
   456  	RET
   457  	MOVL	$1, AX
   458  	MOVB	AX, ret+12(FP)
   459  	RET
   460  
   461  TEXT runtime·casuintptr(SB), NOSPLIT, $0-13
   462  	JMP	runtime·cas(SB)
   463  
   464  TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8
   465  	JMP	runtime·atomicload(SB)
   466  
   467  TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8
   468  	JMP	runtime·atomicload(SB)
   469  
   470  TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-8
   471  	JMP	runtime·atomicstore(SB)
   472  
   473  // bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
   474  // Atomically:
   475  //	if(*val == *old){
   476  //		*val = new;
   477  //		return 1;
   478  //	} else {
   479  //		return 0;
   480  //	}
   481  TEXT runtime·cas64(SB), NOSPLIT, $0-21
   482  	MOVL	ptr+0(FP), BP
   483  	MOVL	old_lo+4(FP), AX
   484  	MOVL	old_hi+8(FP), DX
   485  	MOVL	new_lo+12(FP), BX
   486  	MOVL	new_hi+16(FP), CX
   487  	LOCK
   488  	CMPXCHG8B	0(BP)
   489  	JNZ	cas64_fail
   490  	MOVL	$1, AX
   491  	MOVB	AX, ret+20(FP)
   492  	RET
   493  cas64_fail:
   494  	MOVL	$0, AX
   495  	MOVB	AX, ret+20(FP)
   496  	RET
   497  
   498  // bool casp(void **p, void *old, void *new)
   499  // Atomically:
   500  //	if(*p == old){
   501  //		*p = new;
   502  //		return 1;
   503  //	}else
   504  //		return 0;
   505  TEXT runtime·casp(SB), NOSPLIT, $0-13
   506  	MOVL	ptr+0(FP), BX
   507  	MOVL	old+4(FP), AX
   508  	MOVL	new+8(FP), CX
   509  	LOCK
   510  	CMPXCHGL	CX, 0(BX)
   511  	JZ 4(PC)
   512  	MOVL	$0, AX
   513  	MOVB	AX, ret+12(FP)
   514  	RET
   515  	MOVL	$1, AX
   516  	MOVB	AX, ret+12(FP)
   517  	RET
   518  
   519  // uint32 xadd(uint32 volatile *val, int32 delta)
   520  // Atomically:
   521  //	*val += delta;
   522  //	return *val;
   523  TEXT runtime·xadd(SB), NOSPLIT, $0-12
   524  	MOVL	ptr+0(FP), BX
   525  	MOVL	delta+4(FP), AX
   526  	MOVL	AX, CX
   527  	LOCK
   528  	XADDL	AX, 0(BX)
   529  	ADDL	CX, AX
   530  	MOVL	AX, ret+8(FP)
   531  	RET
   532  
   533  TEXT runtime·xchg(SB), NOSPLIT, $0-12
   534  	MOVL	ptr+0(FP), BX
   535  	MOVL	new+4(FP), AX
   536  	XCHGL	AX, 0(BX)
   537  	MOVL	AX, ret+8(FP)
   538  	RET
   539  
   540  TEXT runtime·xchgp(SB), NOSPLIT, $0-12
   541  	MOVL	ptr+0(FP), BX
   542  	MOVL	new+4(FP), AX
   543  	XCHGL	AX, 0(BX)
   544  	MOVL	AX, ret+8(FP)
   545  	RET
   546  
   547  TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
   548  	JMP	runtime·xchg(SB)
   549  
   550  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   551  	MOVL	cycles+0(FP), AX
   552  again:
   553  	PAUSE
   554  	SUBL	$1, AX
   555  	JNZ	again
   556  	RET
   557  
   558  TEXT runtime·atomicstorep(SB), NOSPLIT, $0-8
   559  	MOVL	ptr+0(FP), BX
   560  	MOVL	val+4(FP), AX
   561  	XCHGL	AX, 0(BX)
   562  	RET
   563  
   564  TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
   565  	MOVL	ptr+0(FP), BX
   566  	MOVL	val+4(FP), AX
   567  	XCHGL	AX, 0(BX)
   568  	RET
   569  
   570  // uint64 atomicload64(uint64 volatile* addr);
   571  TEXT runtime·atomicload64(SB), NOSPLIT, $0-12
   572  	MOVL	ptr+0(FP), AX
   573  	LEAL	ret_lo+4(FP), BX
   574  	// MOVQ (%EAX), %MM0
   575  	BYTE $0x0f; BYTE $0x6f; BYTE $0x00
   576  	// MOVQ %MM0, 0(%EBX)
   577  	BYTE $0x0f; BYTE $0x7f; BYTE $0x03
   578  	// EMMS
   579  	BYTE $0x0F; BYTE $0x77
   580  	RET
   581  
   582  // void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
   583  TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12
   584  	MOVL	ptr+0(FP), AX
   585  	// MOVQ and EMMS were introduced on the Pentium MMX.
   586  	// MOVQ 0x8(%ESP), %MM0
   587  	BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
   588  	// MOVQ %MM0, (%EAX)
   589  	BYTE $0x0f; BYTE $0x7f; BYTE $0x00 
   590  	// EMMS
   591  	BYTE $0x0F; BYTE $0x77
   592  	// This is essentially a no-op, but it provides required memory fencing.
   593  	// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
   594  	MOVL	$0, AX
   595  	LOCK
   596  	XADDL	AX, (SP)
   597  	RET
   598  
   599  // void	runtime·atomicor8(byte volatile*, byte);
   600  TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
   601  	MOVL	ptr+0(FP), AX
   602  	MOVB	val+4(FP), BX
   603  	LOCK
   604  	ORB	BX, (AX)
   605  	RET
   606  
   607  // void jmpdefer(fn, sp);
   608  // called from deferreturn.
   609  // 1. pop the caller
   610  // 2. sub 5 bytes from the callers return
   611  // 3. jmp to the argument
   612  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   613  	MOVL	fv+0(FP), DX	// fn
   614  	MOVL	argp+4(FP), BX	// caller sp
   615  	LEAL	-4(BX), SP	// caller sp after CALL
   616  	SUBL	$5, (SP)	// return to CALL again
   617  	MOVL	0(DX), BX
   618  	JMP	BX	// but first run the deferred function
   619  
   620  // Save state of caller into g->sched.
   621  TEXT gosave<>(SB),NOSPLIT,$0
   622  	PUSHL	AX
   623  	PUSHL	BX
   624  	get_tls(BX)
   625  	MOVL	g(BX), BX
   626  	LEAL	arg+0(FP), AX
   627  	MOVL	AX, (g_sched+gobuf_sp)(BX)
   628  	MOVL	-4(AX), AX
   629  	MOVL	AX, (g_sched+gobuf_pc)(BX)
   630  	MOVL	$0, (g_sched+gobuf_ret)(BX)
   631  	MOVL	$0, (g_sched+gobuf_ctxt)(BX)
   632  	POPL	BX
   633  	POPL	AX
   634  	RET
   635  
   636  // asmcgocall(void(*fn)(void*), void *arg)
   637  // Call fn(arg) on the scheduler stack,
   638  // aligned appropriately for the gcc ABI.
   639  // See cgocall.c for more details.
   640  TEXT ·asmcgocall(SB),NOSPLIT,$0-8
   641  	MOVL	fn+0(FP), AX
   642  	MOVL	arg+4(FP), BX
   643  	CALL	asmcgocall<>(SB)
   644  	RET
   645  
   646  TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-12
   647  	GO_ARGS
   648  	MOVL	fn+0(FP), AX
   649  	MOVL	arg+4(FP), BX
   650  	CALL	asmcgocall<>(SB)
   651  	MOVL	AX, ret+8(FP)
   652  	RET
   653  
   654  // asmcgocall common code. fn in AX, arg in BX. returns errno in AX.
   655  //
   656  // We need to include dummy space for parameters.  If there is a callback,
   657  // ·cgocallback_gofunc will splice the callback onto the goroutine stack.
   658  // Since ·cgocallback_gofunc expects 12 parameters, we need to place dummy
   659  // parameters here.
   660  TEXT asmcgocall<>(SB),NOSPLIT,$16-0
   661  	NO_LOCAL_POINTERS
   662  	MOVL	$0, 0(SP)
   663  	MOVL	$0, 4(SP)
   664  	MOVL	$0, 8(SP)
   665  	MOVL	$0, 12(SP)
   666  	
   667  	// fn in AX, arg in BX
   668  	MOVL	SP, DX
   669  
   670  	// Figure out if we need to switch to m->g0 stack.
   671  	// We get called to create new OS threads too, and those
   672  	// come in on the m->g0 stack already.
   673  	get_tls(CX)
   674  	MOVL	g(CX), BP
   675  	MOVL	g_m(BP), BP
   676  	MOVL	m_g0(BP), SI
   677  	MOVL	g(CX), DI
   678  	CMPL	SI, DI
   679  	JEQ	4(PC)
   680  	CALL	gosave<>(SB)
   681  	get_tls(CX)
   682  	MOVL	SI, g(CX)
   683  	MOVL	(g_sched+gobuf_sp)(SI), SP
   684  
   685  	// Now on a scheduling stack (a pthread-created stack).
   686  	SUBL	$32, SP
   687  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   688  	MOVL	DI, 8(SP)	// save g
   689  	MOVL	(g_stack+stack_hi)(DI), DI
   690  	SUBL	DX, DI
   691  	MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   692  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   693  	CALL	AX
   694  
   695  	// Restore registers, g, stack pointer.
   696  	get_tls(CX)
   697  	MOVL	8(SP), DI
   698  	MOVL	(g_stack+stack_hi)(DI), SI
   699  	SUBL	4(SP), SI
   700  	MOVL	DI, g(CX)
   701  	MOVL	SI, SP
   702  	RET
   703  
   704  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   705  // Turn the fn into a Go func (by taking its address) and call
   706  // cgocallback_gofunc.
   707  TEXT runtime·cgocallback(SB),NOSPLIT,$12-12
   708  	LEAL	fn+0(FP), AX
   709  	MOVL	AX, 0(SP)
   710  	MOVL	frame+4(FP), AX
   711  	MOVL	AX, 4(SP)
   712  	MOVL	framesize+8(FP), AX
   713  	MOVL	AX, 8(SP)
   714  	MOVL	$runtime·cgocallback_gofunc(SB), AX
   715  	CALL	AX
   716  	RET
   717  
   718  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   719  // See cgocall.c for more details.
   720  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-12
   721  	NO_LOCAL_POINTERS
   722  
   723  	// If g is nil, Go did not create the current thread.
   724  	// Call needm to obtain one for temporary use.
   725  	// In this case, we're running on the thread stack, so there's
   726  	// lots of space, but the linker doesn't know. Hide the call from
   727  	// the linker analysis by using an indirect call through AX.
   728  	get_tls(CX)
   729  #ifdef GOOS_windows
   730  	MOVL	$0, BP
   731  	CMPL	CX, $0
   732  	JEQ	2(PC) // TODO
   733  #endif
   734  	MOVL	g(CX), BP
   735  	CMPL	BP, $0
   736  	JEQ	needm
   737  	MOVL	g_m(BP), BP
   738  	MOVL	BP, DX // saved copy of oldm
   739  	JMP	havem
   740  needm:
   741  	MOVL	$0, 0(SP)
   742  	MOVL	$runtime·needm(SB), AX
   743  	CALL	AX
   744  	MOVL	0(SP), DX
   745  	get_tls(CX)
   746  	MOVL	g(CX), BP
   747  	MOVL	g_m(BP), BP
   748  
   749  havem:
   750  	// Now there's a valid m, and we're running on its m->g0.
   751  	// Save current m->g0->sched.sp on stack and then set it to SP.
   752  	// Save current sp in m->g0->sched.sp in preparation for
   753  	// switch back to m->curg stack.
   754  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   755  	MOVL	m_g0(BP), SI
   756  	MOVL	(g_sched+gobuf_sp)(SI), AX
   757  	MOVL	AX, 0(SP)
   758  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   759  
   760  	// Switch to m->curg stack and call runtime.cgocallbackg.
   761  	// Because we are taking over the execution of m->curg
   762  	// but *not* resuming what had been running, we need to
   763  	// save that information (m->curg->sched) so we can restore it.
   764  	// We can restore m->curg->sched.sp easily, because calling
   765  	// runtime.cgocallbackg leaves SP unchanged upon return.
   766  	// To save m->curg->sched.pc, we push it onto the stack.
   767  	// This has the added benefit that it looks to the traceback
   768  	// routine like cgocallbackg is going to return to that
   769  	// PC (because the frame we allocate below has the same
   770  	// size as cgocallback_gofunc's frame declared above)
   771  	// so that the traceback will seamlessly trace back into
   772  	// the earlier calls.
   773  	//
   774  	// In the new goroutine, 0(SP) holds the saved oldm (DX) register.
   775  	// 4(SP) and 8(SP) are unused.
   776  	MOVL	m_curg(BP), SI
   777  	MOVL	SI, g(CX)
   778  	MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   779  	MOVL	(g_sched+gobuf_pc)(SI), BP
   780  	MOVL	BP, -4(DI)
   781  	LEAL	-(4+12)(DI), SP
   782  	MOVL	DX, 0(SP)
   783  	CALL	runtime·cgocallbackg(SB)
   784  	MOVL	0(SP), DX
   785  
   786  	// Restore g->sched (== m->curg->sched) from saved values.
   787  	get_tls(CX)
   788  	MOVL	g(CX), SI
   789  	MOVL	12(SP), BP
   790  	MOVL	BP, (g_sched+gobuf_pc)(SI)
   791  	LEAL	(12+4)(SP), DI
   792  	MOVL	DI, (g_sched+gobuf_sp)(SI)
   793  
   794  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   795  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   796  	// so we do not have to restore it.)
   797  	MOVL	g(CX), BP
   798  	MOVL	g_m(BP), BP
   799  	MOVL	m_g0(BP), SI
   800  	MOVL	SI, g(CX)
   801  	MOVL	(g_sched+gobuf_sp)(SI), SP
   802  	MOVL	0(SP), AX
   803  	MOVL	AX, (g_sched+gobuf_sp)(SI)
   804  	
   805  	// If the m on entry was nil, we called needm above to borrow an m
   806  	// for the duration of the call. Since the call is over, return it with dropm.
   807  	CMPL	DX, $0
   808  	JNE 3(PC)
   809  	MOVL	$runtime·dropm(SB), AX
   810  	CALL	AX
   811  
   812  	// Done!
   813  	RET
   814  
   815  // void setg(G*); set g. for use by needm.
   816  TEXT runtime·setg(SB), NOSPLIT, $0-4
   817  	MOVL	gg+0(FP), BX
   818  #ifdef GOOS_windows
   819  	CMPL	BX, $0
   820  	JNE	settls
   821  	MOVL	$0, 0x14(FS)
   822  	RET
   823  settls:
   824  	MOVL	g_m(BX), AX
   825  	LEAL	m_tls(AX), AX
   826  	MOVL	AX, 0x14(FS)
   827  #endif
   828  	get_tls(CX)
   829  	MOVL	BX, g(CX)
   830  	RET
   831  
   832  // void setg_gcc(G*); set g. for use by gcc
   833  TEXT setg_gcc<>(SB), NOSPLIT, $0
   834  	get_tls(AX)
   835  	MOVL	gg+0(FP), DX
   836  	MOVL	DX, g(AX)
   837  	RET
   838  
   839  // check that SP is in range [g->stack.lo, g->stack.hi)
   840  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   841  	get_tls(CX)
   842  	MOVL	g(CX), AX
   843  	CMPL	(g_stack+stack_hi)(AX), SP
   844  	JHI	2(PC)
   845  	INT	$3
   846  	CMPL	SP, (g_stack+stack_lo)(AX)
   847  	JHI	2(PC)
   848  	INT	$3
   849  	RET
   850  
   851  TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
   852  	MOVL	argp+0(FP),AX		// addr of first arg
   853  	MOVL	-4(AX),AX		// get calling pc
   854  	MOVL	AX, ret+4(FP)
   855  	RET
   856  
   857  TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
   858  	MOVL	p+0(FP),AX		// addr of first arg
   859  	MOVL	-4(AX),AX		// get calling pc
   860  	MOVL	AX, ret+4(FP)
   861  	RET
   862  
   863  TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
   864  	MOVL	argp+0(FP),AX		// addr of first arg
   865  	MOVL	pc+4(FP), BX
   866  	MOVL	BX, -4(AX)		// set calling pc
   867  	RET
   868  
   869  TEXT runtime·getcallersp(SB), NOSPLIT, $0-8
   870  	MOVL	argp+0(FP), AX
   871  	MOVL	AX, ret+4(FP)
   872  	RET
   873  
   874  // func gogetcallersp(p unsafe.Pointer) uintptr
   875  TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-8
   876  	MOVL	p+0(FP),AX		// addr of first arg
   877  	MOVL	AX, ret+4(FP)
   878  	RET
   879  
   880  // int64 runtime·cputicks(void), so really
   881  // void runtime·cputicks(int64 *ticks)
   882  TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   883  	RDTSC
   884  	MOVL	AX, ret_lo+0(FP)
   885  	MOVL	DX, ret_hi+4(FP)
   886  	RET
   887  
   888  TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   889  	// set up ldt 7 to point at tls0
   890  	// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   891  	// the entry number is just a hint.  setldt will set up GS with what it used.
   892  	MOVL	$7, 0(SP)
   893  	LEAL	runtime·tls0(SB), AX
   894  	MOVL	AX, 4(SP)
   895  	MOVL	$32, 8(SP)	// sizeof(tls array)
   896  	CALL	runtime·setldt(SB)
   897  	RET
   898  
   899  TEXT runtime·emptyfunc(SB),0,$0-0
   900  	RET
   901  
   902  TEXT runtime·abort(SB),NOSPLIT,$0-0
   903  	INT $0x3
   904  
   905  // hash function using AES hardware instructions
   906  TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   907  	MOVL	p+0(FP), AX	// ptr to data
   908  	MOVL	s+4(FP), CX	// size
   909  	JMP	runtime·aeshashbody(SB)
   910  
   911  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-16
   912  	MOVL	p+0(FP), AX	// ptr to string object
   913  	// s+4(FP) is ignored, it is always sizeof(String)
   914  	MOVL	4(AX), CX	// length of string
   915  	MOVL	(AX), AX	// string data
   916  	JMP	runtime·aeshashbody(SB)
   917  
   918  // AX: data
   919  // CX: length
   920  TEXT runtime·aeshashbody(SB),NOSPLIT,$0-16
   921  	MOVL	h+8(FP), X0	// seed to low 32 bits of xmm0
   922  	PINSRD	$1, CX, X0	// size to next 32 bits of xmm0
   923  	MOVO	runtime·aeskeysched+0(SB), X2
   924  	MOVO	runtime·aeskeysched+16(SB), X3
   925  	CMPL	CX, $16
   926  	JB	aessmall
   927  aesloop:
   928  	CMPL	CX, $16
   929  	JBE	aesloopend
   930  	MOVOU	(AX), X1
   931  	AESENC	X2, X0
   932  	AESENC	X1, X0
   933  	SUBL	$16, CX
   934  	ADDL	$16, AX
   935  	JMP	aesloop
   936  // 1-16 bytes remaining
   937  aesloopend:
   938  	// This load may overlap with the previous load above.
   939  	// We'll hash some bytes twice, but that's ok.
   940  	MOVOU	-16(AX)(CX*1), X1
   941  	JMP	partial
   942  // 0-15 bytes
   943  aessmall:
   944  	TESTL	CX, CX
   945  	JE	finalize	// 0 bytes
   946  
   947  	CMPB	AX, $0xf0
   948  	JA	highpartial
   949  
   950  	// 16 bytes loaded at this address won't cross
   951  	// a page boundary, so we can load it directly.
   952  	MOVOU	(AX), X1
   953  	ADDL	CX, CX
   954  	PAND	masks<>(SB)(CX*8), X1
   955  	JMP	partial
   956  highpartial:
   957  	// address ends in 1111xxxx.  Might be up against
   958  	// a page boundary, so load ending at last byte.
   959  	// Then shift bytes down using pshufb.
   960  	MOVOU	-16(AX)(CX*1), X1
   961  	ADDL	CX, CX
   962  	PSHUFB	shifts<>(SB)(CX*8), X1
   963  partial:
   964  	// incorporate partial block into hash
   965  	AESENC	X3, X0
   966  	AESENC	X1, X0
   967  finalize:	
   968  	// finalize hash
   969  	AESENC	X2, X0
   970  	AESENC	X3, X0
   971  	AESENC	X2, X0
   972  	MOVL	X0, ret+12(FP)
   973  	RET
   974  
   975  TEXT runtime·aeshash32(SB),NOSPLIT,$0-16
   976  	MOVL	p+0(FP), AX	// ptr to data
   977  	// s+4(FP) is ignored, it is always sizeof(int32)
   978  	MOVL	h+8(FP), X0	// seed
   979  	PINSRD	$1, (AX), X0	// data
   980  	AESENC	runtime·aeskeysched+0(SB), X0
   981  	AESENC	runtime·aeskeysched+16(SB), X0
   982  	AESENC	runtime·aeskeysched+0(SB), X0
   983  	MOVL	X0, ret+12(FP)
   984  	RET
   985  
   986  TEXT runtime·aeshash64(SB),NOSPLIT,$0-16
   987  	MOVL	p+0(FP), AX	// ptr to data
   988  	// s+4(FP) is ignored, it is always sizeof(int64)
   989  	MOVQ	(AX), X0	// data
   990  	PINSRD	$2, h+8(FP), X0	// seed
   991  	AESENC	runtime·aeskeysched+0(SB), X0
   992  	AESENC	runtime·aeskeysched+16(SB), X0
   993  	AESENC	runtime·aeskeysched+0(SB), X0
   994  	MOVL	X0, ret+12(FP)
   995  	RET
   996  
   997  // simple mask to get rid of data in the high part of the register.
   998  DATA masks<>+0x00(SB)/4, $0x00000000
   999  DATA masks<>+0x04(SB)/4, $0x00000000
  1000  DATA masks<>+0x08(SB)/4, $0x00000000
  1001  DATA masks<>+0x0c(SB)/4, $0x00000000
  1002  	
  1003  DATA masks<>+0x10(SB)/4, $0x000000ff
  1004  DATA masks<>+0x14(SB)/4, $0x00000000
  1005  DATA masks<>+0x18(SB)/4, $0x00000000
  1006  DATA masks<>+0x1c(SB)/4, $0x00000000
  1007  	
  1008  DATA masks<>+0x20(SB)/4, $0x0000ffff
  1009  DATA masks<>+0x24(SB)/4, $0x00000000
  1010  DATA masks<>+0x28(SB)/4, $0x00000000
  1011  DATA masks<>+0x2c(SB)/4, $0x00000000
  1012  	
  1013  DATA masks<>+0x30(SB)/4, $0x00ffffff
  1014  DATA masks<>+0x34(SB)/4, $0x00000000
  1015  DATA masks<>+0x38(SB)/4, $0x00000000
  1016  DATA masks<>+0x3c(SB)/4, $0x00000000
  1017  	
  1018  DATA masks<>+0x40(SB)/4, $0xffffffff
  1019  DATA masks<>+0x44(SB)/4, $0x00000000
  1020  DATA masks<>+0x48(SB)/4, $0x00000000
  1021  DATA masks<>+0x4c(SB)/4, $0x00000000
  1022  	
  1023  DATA masks<>+0x50(SB)/4, $0xffffffff
  1024  DATA masks<>+0x54(SB)/4, $0x000000ff
  1025  DATA masks<>+0x58(SB)/4, $0x00000000
  1026  DATA masks<>+0x5c(SB)/4, $0x00000000
  1027  	
  1028  DATA masks<>+0x60(SB)/4, $0xffffffff
  1029  DATA masks<>+0x64(SB)/4, $0x0000ffff
  1030  DATA masks<>+0x68(SB)/4, $0x00000000
  1031  DATA masks<>+0x6c(SB)/4, $0x00000000
  1032  	
  1033  DATA masks<>+0x70(SB)/4, $0xffffffff
  1034  DATA masks<>+0x74(SB)/4, $0x00ffffff
  1035  DATA masks<>+0x78(SB)/4, $0x00000000
  1036  DATA masks<>+0x7c(SB)/4, $0x00000000
  1037  	
  1038  DATA masks<>+0x80(SB)/4, $0xffffffff
  1039  DATA masks<>+0x84(SB)/4, $0xffffffff
  1040  DATA masks<>+0x88(SB)/4, $0x00000000
  1041  DATA masks<>+0x8c(SB)/4, $0x00000000
  1042  	
  1043  DATA masks<>+0x90(SB)/4, $0xffffffff
  1044  DATA masks<>+0x94(SB)/4, $0xffffffff
  1045  DATA masks<>+0x98(SB)/4, $0x000000ff
  1046  DATA masks<>+0x9c(SB)/4, $0x00000000
  1047  	
  1048  DATA masks<>+0xa0(SB)/4, $0xffffffff
  1049  DATA masks<>+0xa4(SB)/4, $0xffffffff
  1050  DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1051  DATA masks<>+0xac(SB)/4, $0x00000000
  1052  	
  1053  DATA masks<>+0xb0(SB)/4, $0xffffffff
  1054  DATA masks<>+0xb4(SB)/4, $0xffffffff
  1055  DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1056  DATA masks<>+0xbc(SB)/4, $0x00000000
  1057  	
  1058  DATA masks<>+0xc0(SB)/4, $0xffffffff
  1059  DATA masks<>+0xc4(SB)/4, $0xffffffff
  1060  DATA masks<>+0xc8(SB)/4, $0xffffffff
  1061  DATA masks<>+0xcc(SB)/4, $0x00000000
  1062  	
  1063  DATA masks<>+0xd0(SB)/4, $0xffffffff
  1064  DATA masks<>+0xd4(SB)/4, $0xffffffff
  1065  DATA masks<>+0xd8(SB)/4, $0xffffffff
  1066  DATA masks<>+0xdc(SB)/4, $0x000000ff
  1067  	
  1068  DATA masks<>+0xe0(SB)/4, $0xffffffff
  1069  DATA masks<>+0xe4(SB)/4, $0xffffffff
  1070  DATA masks<>+0xe8(SB)/4, $0xffffffff
  1071  DATA masks<>+0xec(SB)/4, $0x0000ffff
  1072  	
  1073  DATA masks<>+0xf0(SB)/4, $0xffffffff
  1074  DATA masks<>+0xf4(SB)/4, $0xffffffff
  1075  DATA masks<>+0xf8(SB)/4, $0xffffffff
  1076  DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1077  
  1078  GLOBL masks<>(SB),RODATA,$256
  1079  
  1080  // these are arguments to pshufb.  They move data down from
  1081  // the high bytes of the register to the low bytes of the register.
  1082  // index is how many bytes to move.
  1083  DATA shifts<>+0x00(SB)/4, $0x00000000
  1084  DATA shifts<>+0x04(SB)/4, $0x00000000
  1085  DATA shifts<>+0x08(SB)/4, $0x00000000
  1086  DATA shifts<>+0x0c(SB)/4, $0x00000000
  1087  	
  1088  DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1089  DATA shifts<>+0x14(SB)/4, $0xffffffff
  1090  DATA shifts<>+0x18(SB)/4, $0xffffffff
  1091  DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1092  	
  1093  DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1094  DATA shifts<>+0x24(SB)/4, $0xffffffff
  1095  DATA shifts<>+0x28(SB)/4, $0xffffffff
  1096  DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1097  	
  1098  DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1099  DATA shifts<>+0x34(SB)/4, $0xffffffff
  1100  DATA shifts<>+0x38(SB)/4, $0xffffffff
  1101  DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1102  	
  1103  DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1104  DATA shifts<>+0x44(SB)/4, $0xffffffff
  1105  DATA shifts<>+0x48(SB)/4, $0xffffffff
  1106  DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1107  	
  1108  DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1109  DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1110  DATA shifts<>+0x58(SB)/4, $0xffffffff
  1111  DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1112  	
  1113  DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1114  DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1115  DATA shifts<>+0x68(SB)/4, $0xffffffff
  1116  DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1117  	
  1118  DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1119  DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1120  DATA shifts<>+0x78(SB)/4, $0xffffffff
  1121  DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1122  	
  1123  DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1124  DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1125  DATA shifts<>+0x88(SB)/4, $0xffffffff
  1126  DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1127  	
  1128  DATA shifts<>+0x90(SB)/4, $0x0a090807
  1129  DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1130  DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1131  DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1132  	
  1133  DATA shifts<>+0xa0(SB)/4, $0x09080706
  1134  DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1135  DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1136  DATA shifts<>+0xac(SB)/4, $0xffffffff
  1137  	
  1138  DATA shifts<>+0xb0(SB)/4, $0x08070605
  1139  DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1140  DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1141  DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1142  	
  1143  DATA shifts<>+0xc0(SB)/4, $0x07060504
  1144  DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1145  DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1146  DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1147  	
  1148  DATA shifts<>+0xd0(SB)/4, $0x06050403
  1149  DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1150  DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1151  DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1152  	
  1153  DATA shifts<>+0xe0(SB)/4, $0x05040302
  1154  DATA shifts<>+0xe4(SB)/4, $0x09080706
  1155  DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1156  DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1157  	
  1158  DATA shifts<>+0xf0(SB)/4, $0x04030201
  1159  DATA shifts<>+0xf4(SB)/4, $0x08070605
  1160  DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1161  DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1162  
  1163  GLOBL shifts<>(SB),RODATA,$256
  1164  
  1165  TEXT runtime·memeq(SB),NOSPLIT,$0-13
  1166  	MOVL	a+0(FP), SI
  1167  	MOVL	b+4(FP), DI
  1168  	MOVL	size+8(FP), BX
  1169  	CALL	runtime·memeqbody(SB)
  1170  	MOVB	AX, ret+12(FP)
  1171  	RET
  1172  
  1173  // eqstring tests whether two strings are equal.
  1174  // See runtime_test.go:eqstring_generic for
  1175  // equivalent Go code.
  1176  TEXT runtime·eqstring(SB),NOSPLIT,$0-17
  1177  	MOVL	s1len+4(FP), AX
  1178  	MOVL	s2len+12(FP), BX
  1179  	CMPL	AX, BX
  1180  	JNE	different
  1181  	MOVL	s1str+0(FP), SI
  1182  	MOVL	s2str+8(FP), DI
  1183  	CMPL	SI, DI
  1184  	JEQ	same
  1185  	CALL	runtime·memeqbody(SB)
  1186  	MOVB	AX, v+16(FP)
  1187  	RET
  1188  same:
  1189  	MOVB	$1, v+16(FP)
  1190  	RET
  1191  different:
  1192  	MOVB	$0, v+16(FP)
  1193  	RET
  1194  
  1195  TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1196  	MOVL	a_len+4(FP), BX
  1197  	MOVL	b_len+16(FP), CX
  1198  	XORL	AX, AX
  1199  	CMPL	BX, CX
  1200  	JNE	eqret
  1201  	MOVL	a+0(FP), SI
  1202  	MOVL	b+12(FP), DI
  1203  	CALL	runtime·memeqbody(SB)
  1204  eqret:
  1205  	MOVB	AX, ret+24(FP)
  1206  	RET
  1207  
  1208  // a in SI
  1209  // b in DI
  1210  // count in BX
  1211  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1212  	XORL	AX, AX
  1213  
  1214  	CMPL	BX, $4
  1215  	JB	small
  1216  
  1217  	// 64 bytes at a time using xmm registers
  1218  hugeloop:
  1219  	CMPL	BX, $64
  1220  	JB	bigloop
  1221  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1222  	JE	bigloop
  1223  	MOVOU	(SI), X0
  1224  	MOVOU	(DI), X1
  1225  	MOVOU	16(SI), X2
  1226  	MOVOU	16(DI), X3
  1227  	MOVOU	32(SI), X4
  1228  	MOVOU	32(DI), X5
  1229  	MOVOU	48(SI), X6
  1230  	MOVOU	48(DI), X7
  1231  	PCMPEQB	X1, X0
  1232  	PCMPEQB	X3, X2
  1233  	PCMPEQB	X5, X4
  1234  	PCMPEQB	X7, X6
  1235  	PAND	X2, X0
  1236  	PAND	X6, X4
  1237  	PAND	X4, X0
  1238  	PMOVMSKB X0, DX
  1239  	ADDL	$64, SI
  1240  	ADDL	$64, DI
  1241  	SUBL	$64, BX
  1242  	CMPL	DX, $0xffff
  1243  	JEQ	hugeloop
  1244  	RET
  1245  
  1246  	// 4 bytes at a time using 32-bit register
  1247  bigloop:
  1248  	CMPL	BX, $4
  1249  	JBE	leftover
  1250  	MOVL	(SI), CX
  1251  	MOVL	(DI), DX
  1252  	ADDL	$4, SI
  1253  	ADDL	$4, DI
  1254  	SUBL	$4, BX
  1255  	CMPL	CX, DX
  1256  	JEQ	bigloop
  1257  	RET
  1258  
  1259  	// remaining 0-4 bytes
  1260  leftover:
  1261  	MOVL	-4(SI)(BX*1), CX
  1262  	MOVL	-4(DI)(BX*1), DX
  1263  	CMPL	CX, DX
  1264  	SETEQ	AX
  1265  	RET
  1266  
  1267  small:
  1268  	CMPL	BX, $0
  1269  	JEQ	equal
  1270  
  1271  	LEAL	0(BX*8), CX
  1272  	NEGL	CX
  1273  
  1274  	MOVL	SI, DX
  1275  	CMPB	DX, $0xfc
  1276  	JA	si_high
  1277  
  1278  	// load at SI won't cross a page boundary.
  1279  	MOVL	(SI), SI
  1280  	JMP	si_finish
  1281  si_high:
  1282  	// address ends in 111111xx.  Load up to bytes we want, move to correct position.
  1283  	MOVL	-4(SI)(BX*1), SI
  1284  	SHRL	CX, SI
  1285  si_finish:
  1286  
  1287  	// same for DI.
  1288  	MOVL	DI, DX
  1289  	CMPB	DX, $0xfc
  1290  	JA	di_high
  1291  	MOVL	(DI), DI
  1292  	JMP	di_finish
  1293  di_high:
  1294  	MOVL	-4(DI)(BX*1), DI
  1295  	SHRL	CX, DI
  1296  di_finish:
  1297  
  1298  	SUBL	SI, DI
  1299  	SHLL	CX, DI
  1300  equal:
  1301  	SETEQ	AX
  1302  	RET
  1303  
  1304  TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
  1305  	MOVL	s1_base+0(FP), SI
  1306  	MOVL	s1_len+4(FP), BX
  1307  	MOVL	s2_base+8(FP), DI
  1308  	MOVL	s2_len+12(FP), DX
  1309  	CALL	runtime·cmpbody(SB)
  1310  	MOVL	AX, ret+16(FP)
  1311  	RET
  1312  
  1313  TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
  1314  	MOVL	s1+0(FP), SI
  1315  	MOVL	s1+4(FP), BX
  1316  	MOVL	s2+12(FP), DI
  1317  	MOVL	s2+16(FP), DX
  1318  	CALL	runtime·cmpbody(SB)
  1319  	MOVL	AX, ret+24(FP)
  1320  	RET
  1321  
  1322  TEXT bytes·IndexByte(SB),NOSPLIT,$0
  1323  	MOVL	s+0(FP), SI
  1324  	MOVL	s_len+4(FP), CX
  1325  	MOVB	c+12(FP), AL
  1326  	MOVL	SI, DI
  1327  	CLD; REPN; SCASB
  1328  	JZ 3(PC)
  1329  	MOVL	$-1, ret+16(FP)
  1330  	RET
  1331  	SUBL	SI, DI
  1332  	SUBL	$1, DI
  1333  	MOVL	DI, ret+16(FP)
  1334  	RET
  1335  
  1336  TEXT strings·IndexByte(SB),NOSPLIT,$0
  1337  	MOVL	s+0(FP), SI
  1338  	MOVL	s_len+4(FP), CX
  1339  	MOVB	c+8(FP), AL
  1340  	MOVL	SI, DI
  1341  	CLD; REPN; SCASB
  1342  	JZ 3(PC)
  1343  	MOVL	$-1, ret+12(FP)
  1344  	RET
  1345  	SUBL	SI, DI
  1346  	SUBL	$1, DI
  1347  	MOVL	DI, ret+12(FP)
  1348  	RET
  1349  
  1350  // input:
  1351  //   SI = a
  1352  //   DI = b
  1353  //   BX = alen
  1354  //   DX = blen
  1355  // output:
  1356  //   AX = 1/0/-1
  1357  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1358  	CMPL	SI, DI
  1359  	JEQ	cmp_allsame
  1360  	CMPL	BX, DX
  1361  	MOVL	DX, BP
  1362  	CMOVLLT	BX, BP // BP = min(alen, blen)
  1363  	CMPL	BP, $4
  1364  	JB	cmp_small
  1365  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1366  	JE	cmp_mediumloop
  1367  cmp_largeloop:
  1368  	CMPL	BP, $16
  1369  	JB	cmp_mediumloop
  1370  	MOVOU	(SI), X0
  1371  	MOVOU	(DI), X1
  1372  	PCMPEQB X0, X1
  1373  	PMOVMSKB X1, AX
  1374  	XORL	$0xffff, AX	// convert EQ to NE
  1375  	JNE	cmp_diff16	// branch if at least one byte is not equal
  1376  	ADDL	$16, SI
  1377  	ADDL	$16, DI
  1378  	SUBL	$16, BP
  1379  	JMP	cmp_largeloop
  1380  
  1381  cmp_diff16:
  1382  	BSFL	AX, BX	// index of first byte that differs
  1383  	XORL	AX, AX
  1384  	MOVB	(SI)(BX*1), CX
  1385  	CMPB	CX, (DI)(BX*1)
  1386  	SETHI	AX
  1387  	LEAL	-1(AX*2), AX	// convert 1/0 to +1/-1
  1388  	RET
  1389  
  1390  cmp_mediumloop:
  1391  	CMPL	BP, $4
  1392  	JBE	cmp_0through4
  1393  	MOVL	(SI), AX
  1394  	MOVL	(DI), CX
  1395  	CMPL	AX, CX
  1396  	JNE	cmp_diff4
  1397  	ADDL	$4, SI
  1398  	ADDL	$4, DI
  1399  	SUBL	$4, BP
  1400  	JMP	cmp_mediumloop
  1401  
  1402  cmp_0through4:
  1403  	MOVL	-4(SI)(BP*1), AX
  1404  	MOVL	-4(DI)(BP*1), CX
  1405  	CMPL	AX, CX
  1406  	JEQ	cmp_allsame
  1407  
  1408  cmp_diff4:
  1409  	BSWAPL	AX	// reverse order of bytes
  1410  	BSWAPL	CX
  1411  	XORL	AX, CX	// find bit differences
  1412  	BSRL	CX, CX	// index of highest bit difference
  1413  	SHRL	CX, AX	// move a's bit to bottom
  1414  	ANDL	$1, AX	// mask bit
  1415  	LEAL	-1(AX*2), AX // 1/0 => +1/-1
  1416  	RET
  1417  
  1418  	// 0-3 bytes in common
  1419  cmp_small:
  1420  	LEAL	(BP*8), CX
  1421  	NEGL	CX
  1422  	JEQ	cmp_allsame
  1423  
  1424  	// load si
  1425  	CMPB	SI, $0xfc
  1426  	JA	cmp_si_high
  1427  	MOVL	(SI), SI
  1428  	JMP	cmp_si_finish
  1429  cmp_si_high:
  1430  	MOVL	-4(SI)(BP*1), SI
  1431  	SHRL	CX, SI
  1432  cmp_si_finish:
  1433  	SHLL	CX, SI
  1434  
  1435  	// same for di
  1436  	CMPB	DI, $0xfc
  1437  	JA	cmp_di_high
  1438  	MOVL	(DI), DI
  1439  	JMP	cmp_di_finish
  1440  cmp_di_high:
  1441  	MOVL	-4(DI)(BP*1), DI
  1442  	SHRL	CX, DI
  1443  cmp_di_finish:
  1444  	SHLL	CX, DI
  1445  
  1446  	BSWAPL	SI	// reverse order of bytes
  1447  	BSWAPL	DI
  1448  	XORL	SI, DI	// find bit differences
  1449  	JEQ	cmp_allsame
  1450  	BSRL	DI, CX	// index of highest bit difference
  1451  	SHRL	CX, SI	// move a's bit to bottom
  1452  	ANDL	$1, SI	// mask bit
  1453  	LEAL	-1(SI*2), AX // 1/0 => +1/-1
  1454  	RET
  1455  
  1456  	// all the bytes in common are the same, so we just need
  1457  	// to compare the lengths.
  1458  cmp_allsame:
  1459  	XORL	AX, AX
  1460  	XORL	CX, CX
  1461  	CMPL	BX, DX
  1462  	SETGT	AX	// 1 if alen > blen
  1463  	SETEQ	CX	// 1 if alen == blen
  1464  	LEAL	-1(CX)(AX*2), AX	// 1,0,-1 result
  1465  	RET
  1466  
  1467  // A Duff's device for zeroing memory.
  1468  // The compiler jumps to computed addresses within
  1469  // this routine to zero chunks of memory.  Do not
  1470  // change this code without also changing the code
  1471  // in ../../cmd/8g/ggen.c:clearfat.
  1472  // AX: zero
  1473  // DI: ptr to memory to be zeroed
  1474  // DI is updated as a side effect.
  1475  TEXT runtime·duffzero(SB), NOSPLIT, $0-0
  1476  	STOSL
  1477  	STOSL
  1478  	STOSL
  1479  	STOSL
  1480  	STOSL
  1481  	STOSL
  1482  	STOSL
  1483  	STOSL
  1484  	STOSL
  1485  	STOSL
  1486  	STOSL
  1487  	STOSL
  1488  	STOSL
  1489  	STOSL
  1490  	STOSL
  1491  	STOSL
  1492  	STOSL
  1493  	STOSL
  1494  	STOSL
  1495  	STOSL
  1496  	STOSL
  1497  	STOSL
  1498  	STOSL
  1499  	STOSL
  1500  	STOSL
  1501  	STOSL
  1502  	STOSL
  1503  	STOSL
  1504  	STOSL
  1505  	STOSL
  1506  	STOSL
  1507  	STOSL
  1508  	STOSL
  1509  	STOSL
  1510  	STOSL
  1511  	STOSL
  1512  	STOSL
  1513  	STOSL
  1514  	STOSL
  1515  	STOSL
  1516  	STOSL
  1517  	STOSL
  1518  	STOSL
  1519  	STOSL
  1520  	STOSL
  1521  	STOSL
  1522  	STOSL
  1523  	STOSL
  1524  	STOSL
  1525  	STOSL
  1526  	STOSL
  1527  	STOSL
  1528  	STOSL
  1529  	STOSL
  1530  	STOSL
  1531  	STOSL
  1532  	STOSL
  1533  	STOSL
  1534  	STOSL
  1535  	STOSL
  1536  	STOSL
  1537  	STOSL
  1538  	STOSL
  1539  	STOSL
  1540  	STOSL
  1541  	STOSL
  1542  	STOSL
  1543  	STOSL
  1544  	STOSL
  1545  	STOSL
  1546  	STOSL
  1547  	STOSL
  1548  	STOSL
  1549  	STOSL
  1550  	STOSL
  1551  	STOSL
  1552  	STOSL
  1553  	STOSL
  1554  	STOSL
  1555  	STOSL
  1556  	STOSL
  1557  	STOSL
  1558  	STOSL
  1559  	STOSL
  1560  	STOSL
  1561  	STOSL
  1562  	STOSL
  1563  	STOSL
  1564  	STOSL
  1565  	STOSL
  1566  	STOSL
  1567  	STOSL
  1568  	STOSL
  1569  	STOSL
  1570  	STOSL
  1571  	STOSL
  1572  	STOSL
  1573  	STOSL
  1574  	STOSL
  1575  	STOSL
  1576  	STOSL
  1577  	STOSL
  1578  	STOSL
  1579  	STOSL
  1580  	STOSL
  1581  	STOSL
  1582  	STOSL
  1583  	STOSL
  1584  	STOSL
  1585  	STOSL
  1586  	STOSL
  1587  	STOSL
  1588  	STOSL
  1589  	STOSL
  1590  	STOSL
  1591  	STOSL
  1592  	STOSL
  1593  	STOSL
  1594  	STOSL
  1595  	STOSL
  1596  	STOSL
  1597  	STOSL
  1598  	STOSL
  1599  	STOSL
  1600  	STOSL
  1601  	STOSL
  1602  	STOSL
  1603  	STOSL
  1604  	RET
  1605  
  1606  // A Duff's device for copying memory.
  1607  // The compiler jumps to computed addresses within
  1608  // this routine to copy chunks of memory.  Source
  1609  // and destination must not overlap.  Do not
  1610  // change this code without also changing the code
  1611  // in ../../cmd/6g/cgen.c:sgen.
  1612  // SI: ptr to source memory
  1613  // DI: ptr to destination memory
  1614  // SI and DI are updated as a side effect.
  1615  
  1616  // NOTE: this is equivalent to a sequence of MOVSL but
  1617  // for some reason MOVSL is really slow.
  1618  TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
  1619  	MOVL	(SI),CX
  1620  	ADDL	$4,SI
  1621  	MOVL	CX,(DI)
  1622  	ADDL	$4,DI
  1623  	
  1624  	MOVL	(SI),CX
  1625  	ADDL	$4,SI
  1626  	MOVL	CX,(DI)
  1627  	ADDL	$4,DI
  1628  	
  1629  	MOVL	(SI),CX
  1630  	ADDL	$4,SI
  1631  	MOVL	CX,(DI)
  1632  	ADDL	$4,DI
  1633  	
  1634  	MOVL	(SI),CX
  1635  	ADDL	$4,SI
  1636  	MOVL	CX,(DI)
  1637  	ADDL	$4,DI
  1638  	
  1639  	MOVL	(SI),CX
  1640  	ADDL	$4,SI
  1641  	MOVL	CX,(DI)
  1642  	ADDL	$4,DI
  1643  	
  1644  	MOVL	(SI),CX
  1645  	ADDL	$4,SI
  1646  	MOVL	CX,(DI)
  1647  	ADDL	$4,DI
  1648  	
  1649  	MOVL	(SI),CX
  1650  	ADDL	$4,SI
  1651  	MOVL	CX,(DI)
  1652  	ADDL	$4,DI
  1653  	
  1654  	MOVL	(SI),CX
  1655  	ADDL	$4,SI
  1656  	MOVL	CX,(DI)
  1657  	ADDL	$4,DI
  1658  	
  1659  	MOVL	(SI),CX
  1660  	ADDL	$4,SI
  1661  	MOVL	CX,(DI)
  1662  	ADDL	$4,DI
  1663  	
  1664  	MOVL	(SI),CX
  1665  	ADDL	$4,SI
  1666  	MOVL	CX,(DI)
  1667  	ADDL	$4,DI
  1668  	
  1669  	MOVL	(SI),CX
  1670  	ADDL	$4,SI
  1671  	MOVL	CX,(DI)
  1672  	ADDL	$4,DI
  1673  	
  1674  	MOVL	(SI),CX
  1675  	ADDL	$4,SI
  1676  	MOVL	CX,(DI)
  1677  	ADDL	$4,DI
  1678  	
  1679  	MOVL	(SI),CX
  1680  	ADDL	$4,SI
  1681  	MOVL	CX,(DI)
  1682  	ADDL	$4,DI
  1683  	
  1684  	MOVL	(SI),CX
  1685  	ADDL	$4,SI
  1686  	MOVL	CX,(DI)
  1687  	ADDL	$4,DI
  1688  	
  1689  	MOVL	(SI),CX
  1690  	ADDL	$4,SI
  1691  	MOVL	CX,(DI)
  1692  	ADDL	$4,DI
  1693  	
  1694  	MOVL	(SI),CX
  1695  	ADDL	$4,SI
  1696  	MOVL	CX,(DI)
  1697  	ADDL	$4,DI
  1698  	
  1699  	MOVL	(SI),CX
  1700  	ADDL	$4,SI
  1701  	MOVL	CX,(DI)
  1702  	ADDL	$4,DI
  1703  	
  1704  	MOVL	(SI),CX
  1705  	ADDL	$4,SI
  1706  	MOVL	CX,(DI)
  1707  	ADDL	$4,DI
  1708  	
  1709  	MOVL	(SI),CX
  1710  	ADDL	$4,SI
  1711  	MOVL	CX,(DI)
  1712  	ADDL	$4,DI
  1713  	
  1714  	MOVL	(SI),CX
  1715  	ADDL	$4,SI
  1716  	MOVL	CX,(DI)
  1717  	ADDL	$4,DI
  1718  	
  1719  	MOVL	(SI),CX
  1720  	ADDL	$4,SI
  1721  	MOVL	CX,(DI)
  1722  	ADDL	$4,DI
  1723  	
  1724  	MOVL	(SI),CX
  1725  	ADDL	$4,SI
  1726  	MOVL	CX,(DI)
  1727  	ADDL	$4,DI
  1728  	
  1729  	MOVL	(SI),CX
  1730  	ADDL	$4,SI
  1731  	MOVL	CX,(DI)
  1732  	ADDL	$4,DI
  1733  	
  1734  	MOVL	(SI),CX
  1735  	ADDL	$4,SI
  1736  	MOVL	CX,(DI)
  1737  	ADDL	$4,DI
  1738  	
  1739  	MOVL	(SI),CX
  1740  	ADDL	$4,SI
  1741  	MOVL	CX,(DI)
  1742  	ADDL	$4,DI
  1743  	
  1744  	MOVL	(SI),CX
  1745  	ADDL	$4,SI
  1746  	MOVL	CX,(DI)
  1747  	ADDL	$4,DI
  1748  	
  1749  	MOVL	(SI),CX
  1750  	ADDL	$4,SI
  1751  	MOVL	CX,(DI)
  1752  	ADDL	$4,DI
  1753  	
  1754  	MOVL	(SI),CX
  1755  	ADDL	$4,SI
  1756  	MOVL	CX,(DI)
  1757  	ADDL	$4,DI
  1758  	
  1759  	MOVL	(SI),CX
  1760  	ADDL	$4,SI
  1761  	MOVL	CX,(DI)
  1762  	ADDL	$4,DI
  1763  	
  1764  	MOVL	(SI),CX
  1765  	ADDL	$4,SI
  1766  	MOVL	CX,(DI)
  1767  	ADDL	$4,DI
  1768  	
  1769  	MOVL	(SI),CX
  1770  	ADDL	$4,SI
  1771  	MOVL	CX,(DI)
  1772  	ADDL	$4,DI
  1773  	
  1774  	MOVL	(SI),CX
  1775  	ADDL	$4,SI
  1776  	MOVL	CX,(DI)
  1777  	ADDL	$4,DI
  1778  	
  1779  	MOVL	(SI),CX
  1780  	ADDL	$4,SI
  1781  	MOVL	CX,(DI)
  1782  	ADDL	$4,DI
  1783  	
  1784  	MOVL	(SI),CX
  1785  	ADDL	$4,SI
  1786  	MOVL	CX,(DI)
  1787  	ADDL	$4,DI
  1788  	
  1789  	MOVL	(SI),CX
  1790  	ADDL	$4,SI
  1791  	MOVL	CX,(DI)
  1792  	ADDL	$4,DI
  1793  	
  1794  	MOVL	(SI),CX
  1795  	ADDL	$4,SI
  1796  	MOVL	CX,(DI)
  1797  	ADDL	$4,DI
  1798  	
  1799  	MOVL	(SI),CX
  1800  	ADDL	$4,SI
  1801  	MOVL	CX,(DI)
  1802  	ADDL	$4,DI
  1803  	
  1804  	MOVL	(SI),CX
  1805  	ADDL	$4,SI
  1806  	MOVL	CX,(DI)
  1807  	ADDL	$4,DI
  1808  	
  1809  	MOVL	(SI),CX
  1810  	ADDL	$4,SI
  1811  	MOVL	CX,(DI)
  1812  	ADDL	$4,DI
  1813  	
  1814  	MOVL	(SI),CX
  1815  	ADDL	$4,SI
  1816  	MOVL	CX,(DI)
  1817  	ADDL	$4,DI
  1818  	
  1819  	MOVL	(SI),CX
  1820  	ADDL	$4,SI
  1821  	MOVL	CX,(DI)
  1822  	ADDL	$4,DI
  1823  	
  1824  	MOVL	(SI),CX
  1825  	ADDL	$4,SI
  1826  	MOVL	CX,(DI)
  1827  	ADDL	$4,DI
  1828  	
  1829  	MOVL	(SI),CX
  1830  	ADDL	$4,SI
  1831  	MOVL	CX,(DI)
  1832  	ADDL	$4,DI
  1833  	
  1834  	MOVL	(SI),CX
  1835  	ADDL	$4,SI
  1836  	MOVL	CX,(DI)
  1837  	ADDL	$4,DI
  1838  	
  1839  	MOVL	(SI),CX
  1840  	ADDL	$4,SI
  1841  	MOVL	CX,(DI)
  1842  	ADDL	$4,DI
  1843  	
  1844  	MOVL	(SI),CX
  1845  	ADDL	$4,SI
  1846  	MOVL	CX,(DI)
  1847  	ADDL	$4,DI
  1848  	
  1849  	MOVL	(SI),CX
  1850  	ADDL	$4,SI
  1851  	MOVL	CX,(DI)
  1852  	ADDL	$4,DI
  1853  	
  1854  	MOVL	(SI),CX
  1855  	ADDL	$4,SI
  1856  	MOVL	CX,(DI)
  1857  	ADDL	$4,DI
  1858  	
  1859  	MOVL	(SI),CX
  1860  	ADDL	$4,SI
  1861  	MOVL	CX,(DI)
  1862  	ADDL	$4,DI
  1863  	
  1864  	MOVL	(SI),CX
  1865  	ADDL	$4,SI
  1866  	MOVL	CX,(DI)
  1867  	ADDL	$4,DI
  1868  	
  1869  	MOVL	(SI),CX
  1870  	ADDL	$4,SI
  1871  	MOVL	CX,(DI)
  1872  	ADDL	$4,DI
  1873  	
  1874  	MOVL	(SI),CX
  1875  	ADDL	$4,SI
  1876  	MOVL	CX,(DI)
  1877  	ADDL	$4,DI
  1878  	
  1879  	MOVL	(SI),CX
  1880  	ADDL	$4,SI
  1881  	MOVL	CX,(DI)
  1882  	ADDL	$4,DI
  1883  	
  1884  	MOVL	(SI),CX
  1885  	ADDL	$4,SI
  1886  	MOVL	CX,(DI)
  1887  	ADDL	$4,DI
  1888  	
  1889  	MOVL	(SI),CX
  1890  	ADDL	$4,SI
  1891  	MOVL	CX,(DI)
  1892  	ADDL	$4,DI
  1893  	
  1894  	MOVL	(SI),CX
  1895  	ADDL	$4,SI
  1896  	MOVL	CX,(DI)
  1897  	ADDL	$4,DI
  1898  	
  1899  	MOVL	(SI),CX
  1900  	ADDL	$4,SI
  1901  	MOVL	CX,(DI)
  1902  	ADDL	$4,DI
  1903  	
  1904  	MOVL	(SI),CX
  1905  	ADDL	$4,SI
  1906  	MOVL	CX,(DI)
  1907  	ADDL	$4,DI
  1908  	
  1909  	MOVL	(SI),CX
  1910  	ADDL	$4,SI
  1911  	MOVL	CX,(DI)
  1912  	ADDL	$4,DI
  1913  	
  1914  	MOVL	(SI),CX
  1915  	ADDL	$4,SI
  1916  	MOVL	CX,(DI)
  1917  	ADDL	$4,DI
  1918  	
  1919  	MOVL	(SI),CX
  1920  	ADDL	$4,SI
  1921  	MOVL	CX,(DI)
  1922  	ADDL	$4,DI
  1923  	
  1924  	MOVL	(SI),CX
  1925  	ADDL	$4,SI
  1926  	MOVL	CX,(DI)
  1927  	ADDL	$4,DI
  1928  	
  1929  	MOVL	(SI),CX
  1930  	ADDL	$4,SI
  1931  	MOVL	CX,(DI)
  1932  	ADDL	$4,DI
  1933  	
  1934  	MOVL	(SI),CX
  1935  	ADDL	$4,SI
  1936  	MOVL	CX,(DI)
  1937  	ADDL	$4,DI
  1938  	
  1939  	MOVL	(SI),CX
  1940  	ADDL	$4,SI
  1941  	MOVL	CX,(DI)
  1942  	ADDL	$4,DI
  1943  	
  1944  	MOVL	(SI),CX
  1945  	ADDL	$4,SI
  1946  	MOVL	CX,(DI)
  1947  	ADDL	$4,DI
  1948  	
  1949  	MOVL	(SI),CX
  1950  	ADDL	$4,SI
  1951  	MOVL	CX,(DI)
  1952  	ADDL	$4,DI
  1953  	
  1954  	MOVL	(SI),CX
  1955  	ADDL	$4,SI
  1956  	MOVL	CX,(DI)
  1957  	ADDL	$4,DI
  1958  	
  1959  	MOVL	(SI),CX
  1960  	ADDL	$4,SI
  1961  	MOVL	CX,(DI)
  1962  	ADDL	$4,DI
  1963  	
  1964  	MOVL	(SI),CX
  1965  	ADDL	$4,SI
  1966  	MOVL	CX,(DI)
  1967  	ADDL	$4,DI
  1968  	
  1969  	MOVL	(SI),CX
  1970  	ADDL	$4,SI
  1971  	MOVL	CX,(DI)
  1972  	ADDL	$4,DI
  1973  	
  1974  	MOVL	(SI),CX
  1975  	ADDL	$4,SI
  1976  	MOVL	CX,(DI)
  1977  	ADDL	$4,DI
  1978  	
  1979  	MOVL	(SI),CX
  1980  	ADDL	$4,SI
  1981  	MOVL	CX,(DI)
  1982  	ADDL	$4,DI
  1983  	
  1984  	MOVL	(SI),CX
  1985  	ADDL	$4,SI
  1986  	MOVL	CX,(DI)
  1987  	ADDL	$4,DI
  1988  	
  1989  	MOVL	(SI),CX
  1990  	ADDL	$4,SI
  1991  	MOVL	CX,(DI)
  1992  	ADDL	$4,DI
  1993  	
  1994  	MOVL	(SI),CX
  1995  	ADDL	$4,SI
  1996  	MOVL	CX,(DI)
  1997  	ADDL	$4,DI
  1998  	
  1999  	MOVL	(SI),CX
  2000  	ADDL	$4,SI
  2001  	MOVL	CX,(DI)
  2002  	ADDL	$4,DI
  2003  	
  2004  	MOVL	(SI),CX
  2005  	ADDL	$4,SI
  2006  	MOVL	CX,(DI)
  2007  	ADDL	$4,DI
  2008  	
  2009  	MOVL	(SI),CX
  2010  	ADDL	$4,SI
  2011  	MOVL	CX,(DI)
  2012  	ADDL	$4,DI
  2013  	
  2014  	MOVL	(SI),CX
  2015  	ADDL	$4,SI
  2016  	MOVL	CX,(DI)
  2017  	ADDL	$4,DI
  2018  	
  2019  	MOVL	(SI),CX
  2020  	ADDL	$4,SI
  2021  	MOVL	CX,(DI)
  2022  	ADDL	$4,DI
  2023  	
  2024  	MOVL	(SI),CX
  2025  	ADDL	$4,SI
  2026  	MOVL	CX,(DI)
  2027  	ADDL	$4,DI
  2028  	
  2029  	MOVL	(SI),CX
  2030  	ADDL	$4,SI
  2031  	MOVL	CX,(DI)
  2032  	ADDL	$4,DI
  2033  	
  2034  	MOVL	(SI),CX
  2035  	ADDL	$4,SI
  2036  	MOVL	CX,(DI)
  2037  	ADDL	$4,DI
  2038  	
  2039  	MOVL	(SI),CX
  2040  	ADDL	$4,SI
  2041  	MOVL	CX,(DI)
  2042  	ADDL	$4,DI
  2043  	
  2044  	MOVL	(SI),CX
  2045  	ADDL	$4,SI
  2046  	MOVL	CX,(DI)
  2047  	ADDL	$4,DI
  2048  	
  2049  	MOVL	(SI),CX
  2050  	ADDL	$4,SI
  2051  	MOVL	CX,(DI)
  2052  	ADDL	$4,DI
  2053  	
  2054  	MOVL	(SI),CX
  2055  	ADDL	$4,SI
  2056  	MOVL	CX,(DI)
  2057  	ADDL	$4,DI
  2058  	
  2059  	MOVL	(SI),CX
  2060  	ADDL	$4,SI
  2061  	MOVL	CX,(DI)
  2062  	ADDL	$4,DI
  2063  	
  2064  	MOVL	(SI),CX
  2065  	ADDL	$4,SI
  2066  	MOVL	CX,(DI)
  2067  	ADDL	$4,DI
  2068  	
  2069  	MOVL	(SI),CX
  2070  	ADDL	$4,SI
  2071  	MOVL	CX,(DI)
  2072  	ADDL	$4,DI
  2073  	
  2074  	MOVL	(SI),CX
  2075  	ADDL	$4,SI
  2076  	MOVL	CX,(DI)
  2077  	ADDL	$4,DI
  2078  	
  2079  	MOVL	(SI),CX
  2080  	ADDL	$4,SI
  2081  	MOVL	CX,(DI)
  2082  	ADDL	$4,DI
  2083  	
  2084  	MOVL	(SI),CX
  2085  	ADDL	$4,SI
  2086  	MOVL	CX,(DI)
  2087  	ADDL	$4,DI
  2088  	
  2089  	MOVL	(SI),CX
  2090  	ADDL	$4,SI
  2091  	MOVL	CX,(DI)
  2092  	ADDL	$4,DI
  2093  	
  2094  	MOVL	(SI),CX
  2095  	ADDL	$4,SI
  2096  	MOVL	CX,(DI)
  2097  	ADDL	$4,DI
  2098  	
  2099  	MOVL	(SI),CX
  2100  	ADDL	$4,SI
  2101  	MOVL	CX,(DI)
  2102  	ADDL	$4,DI
  2103  	
  2104  	MOVL	(SI),CX
  2105  	ADDL	$4,SI
  2106  	MOVL	CX,(DI)
  2107  	ADDL	$4,DI
  2108  	
  2109  	MOVL	(SI),CX
  2110  	ADDL	$4,SI
  2111  	MOVL	CX,(DI)
  2112  	ADDL	$4,DI
  2113  	
  2114  	MOVL	(SI),CX
  2115  	ADDL	$4,SI
  2116  	MOVL	CX,(DI)
  2117  	ADDL	$4,DI
  2118  	
  2119  	MOVL	(SI),CX
  2120  	ADDL	$4,SI
  2121  	MOVL	CX,(DI)
  2122  	ADDL	$4,DI
  2123  	
  2124  	MOVL	(SI),CX
  2125  	ADDL	$4,SI
  2126  	MOVL	CX,(DI)
  2127  	ADDL	$4,DI
  2128  	
  2129  	MOVL	(SI),CX
  2130  	ADDL	$4,SI
  2131  	MOVL	CX,(DI)
  2132  	ADDL	$4,DI
  2133  	
  2134  	MOVL	(SI),CX
  2135  	ADDL	$4,SI
  2136  	MOVL	CX,(DI)
  2137  	ADDL	$4,DI
  2138  	
  2139  	MOVL	(SI),CX
  2140  	ADDL	$4,SI
  2141  	MOVL	CX,(DI)
  2142  	ADDL	$4,DI
  2143  	
  2144  	MOVL	(SI),CX
  2145  	ADDL	$4,SI
  2146  	MOVL	CX,(DI)
  2147  	ADDL	$4,DI
  2148  	
  2149  	MOVL	(SI),CX
  2150  	ADDL	$4,SI
  2151  	MOVL	CX,(DI)
  2152  	ADDL	$4,DI
  2153  	
  2154  	MOVL	(SI),CX
  2155  	ADDL	$4,SI
  2156  	MOVL	CX,(DI)
  2157  	ADDL	$4,DI
  2158  	
  2159  	MOVL	(SI),CX
  2160  	ADDL	$4,SI
  2161  	MOVL	CX,(DI)
  2162  	ADDL	$4,DI
  2163  	
  2164  	MOVL	(SI),CX
  2165  	ADDL	$4,SI
  2166  	MOVL	CX,(DI)
  2167  	ADDL	$4,DI
  2168  	
  2169  	MOVL	(SI),CX
  2170  	ADDL	$4,SI
  2171  	MOVL	CX,(DI)
  2172  	ADDL	$4,DI
  2173  	
  2174  	MOVL	(SI),CX
  2175  	ADDL	$4,SI
  2176  	MOVL	CX,(DI)
  2177  	ADDL	$4,DI
  2178  	
  2179  	MOVL	(SI),CX
  2180  	ADDL	$4,SI
  2181  	MOVL	CX,(DI)
  2182  	ADDL	$4,DI
  2183  	
  2184  	MOVL	(SI),CX
  2185  	ADDL	$4,SI
  2186  	MOVL	CX,(DI)
  2187  	ADDL	$4,DI
  2188  	
  2189  	MOVL	(SI),CX
  2190  	ADDL	$4,SI
  2191  	MOVL	CX,(DI)
  2192  	ADDL	$4,DI
  2193  	
  2194  	MOVL	(SI),CX
  2195  	ADDL	$4,SI
  2196  	MOVL	CX,(DI)
  2197  	ADDL	$4,DI
  2198  	
  2199  	MOVL	(SI),CX
  2200  	ADDL	$4,SI
  2201  	MOVL	CX,(DI)
  2202  	ADDL	$4,DI
  2203  	
  2204  	MOVL	(SI),CX
  2205  	ADDL	$4,SI
  2206  	MOVL	CX,(DI)
  2207  	ADDL	$4,DI
  2208  	
  2209  	MOVL	(SI),CX
  2210  	ADDL	$4,SI
  2211  	MOVL	CX,(DI)
  2212  	ADDL	$4,DI
  2213  	
  2214  	MOVL	(SI),CX
  2215  	ADDL	$4,SI
  2216  	MOVL	CX,(DI)
  2217  	ADDL	$4,DI
  2218  	
  2219  	MOVL	(SI),CX
  2220  	ADDL	$4,SI
  2221  	MOVL	CX,(DI)
  2222  	ADDL	$4,DI
  2223  	
  2224  	MOVL	(SI),CX
  2225  	ADDL	$4,SI
  2226  	MOVL	CX,(DI)
  2227  	ADDL	$4,DI
  2228  	
  2229  	MOVL	(SI),CX
  2230  	ADDL	$4,SI
  2231  	MOVL	CX,(DI)
  2232  	ADDL	$4,DI
  2233  	
  2234  	MOVL	(SI),CX
  2235  	ADDL	$4,SI
  2236  	MOVL	CX,(DI)
  2237  	ADDL	$4,DI
  2238  	
  2239  	MOVL	(SI),CX
  2240  	ADDL	$4,SI
  2241  	MOVL	CX,(DI)
  2242  	ADDL	$4,DI
  2243  	
  2244  	MOVL	(SI),CX
  2245  	ADDL	$4,SI
  2246  	MOVL	CX,(DI)
  2247  	ADDL	$4,DI
  2248  	
  2249  	MOVL	(SI),CX
  2250  	ADDL	$4,SI
  2251  	MOVL	CX,(DI)
  2252  	ADDL	$4,DI
  2253  	
  2254  	MOVL	(SI),CX
  2255  	ADDL	$4,SI
  2256  	MOVL	CX,(DI)
  2257  	ADDL	$4,DI
  2258  	
  2259  	RET
  2260  
  2261  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
  2262  	get_tls(CX)
  2263  	MOVL	g(CX), AX
  2264  	MOVL	g_m(AX), AX
  2265  	MOVL	m_fastrand(AX), DX
  2266  	ADDL	DX, DX
  2267  	MOVL	DX, BX
  2268  	XORL	$0x88888eef, DX
  2269  	CMOVLMI	BX, DX
  2270  	MOVL	DX, m_fastrand(AX)
  2271  	MOVL	DX, ret+0(FP)
  2272  	RET
  2273  
  2274  TEXT runtime·return0(SB), NOSPLIT, $0
  2275  	MOVL	$0, AX
  2276  	RET
  2277  
  2278  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  2279  // Must obey the gcc calling convention.
  2280  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  2281  	get_tls(CX)
  2282  	MOVL	g(CX), AX
  2283  	MOVL	g_m(AX), AX
  2284  	MOVL	m_curg(AX), AX
  2285  	MOVL	(g_stack+stack_hi)(AX), AX
  2286  	RET