github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/runtime/asm_386.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "zasm_GOOS_GOARCH.h"
     6  
     7  TEXT _rt0_386(SB),7,$0
     8  	// copy arguments forward on an even stack
     9  	MOVL	argc+0(FP), AX
    10  	MOVL	argv+4(FP), BX
    11  	SUBL	$128, SP		// plenty of scratch
    12  	ANDL	$~15, SP
    13  	MOVL	AX, 120(SP)		// save argc, argv away
    14  	MOVL	BX, 124(SP)
    15  
    16  	// set default stack bounds.
    17  	// _cgo_init may update stackguard.
    18  	MOVL	$runtime·g0(SB), BP
    19  	LEAL	(-64*1024+104)(SP), BX
    20  	MOVL	BX, g_stackguard(BP)
    21  	MOVL	SP, g_stackbase(BP)
    22  	
    23  	// find out information about the processor we're on
    24  	MOVL	$0, AX
    25  	CPUID
    26  	CMPL	AX, $0
    27  	JE	nocpuinfo
    28  	MOVL	$1, AX
    29  	CPUID
    30  	MOVL	CX, runtime·cpuid_ecx(SB)
    31  	MOVL	DX, runtime·cpuid_edx(SB)
    32  nocpuinfo:	
    33  
    34  	// if there is an _cgo_init, call it to let it
    35  	// initialize and to set up GS.  if not,
    36  	// we set up GS ourselves.
    37  	MOVL	_cgo_init(SB), AX
    38  	TESTL	AX, AX
    39  	JZ	needtls
    40  	MOVL	$setmg_gcc<>(SB), BX
    41  	MOVL	BX, 4(SP)
    42  	MOVL	BP, 0(SP)
    43  	CALL	AX
    44  	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
    45  	CMPL runtime·iswindows(SB), $0
    46  	JEQ ok
    47  needtls:
    48  	// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
    49  	CMPL	runtime·isplan9(SB), $1
    50  	JEQ	ok
    51  
    52  	// set up %gs
    53  	CALL	runtime·ldt0setup(SB)
    54  
    55  	// store through it, to make sure it works
    56  	get_tls(BX)
    57  	MOVL	$0x123, g(BX)
    58  	MOVL	runtime·tls0(SB), AX
    59  	CMPL	AX, $0x123
    60  	JEQ	ok
    61  	MOVL	AX, 0	// abort
    62  ok:
    63  	// set up m and g "registers"
    64  	get_tls(BX)
    65  	LEAL	runtime·g0(SB), CX
    66  	MOVL	CX, g(BX)
    67  	LEAL	runtime·m0(SB), AX
    68  	MOVL	AX, m(BX)
    69  
    70  	// save m->g0 = g0
    71  	MOVL	CX, m_g0(AX)
    72  
    73  	CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
    74  
    75  	// convention is D is always cleared
    76  	CLD
    77  
    78  	CALL	runtime·check(SB)
    79  
    80  	// saved argc, argv
    81  	MOVL	120(SP), AX
    82  	MOVL	AX, 0(SP)
    83  	MOVL	124(SP), AX
    84  	MOVL	AX, 4(SP)
    85  	CALL	runtime·args(SB)
    86  	CALL	runtime·osinit(SB)
    87  	CALL	runtime·hashinit(SB)
    88  	CALL	runtime·schedinit(SB)
    89  
    90  	// create a new goroutine to start program
    91  	PUSHL	$runtime·main·f(SB)	// entry
    92  	PUSHL	$0	// arg size
    93  	CALL	runtime·newproc(SB)
    94  	POPL	AX
    95  	POPL	AX
    96  
    97  	// start this M
    98  	CALL	runtime·mstart(SB)
    99  
   100  	INT $3
   101  	RET
   102  
   103  DATA	runtime·main·f+0(SB)/4,$runtime·main(SB)
   104  GLOBL	runtime·main·f(SB),8,$4
   105  
   106  TEXT runtime·breakpoint(SB),7,$0
   107  	INT $3
   108  	RET
   109  
   110  TEXT runtime·asminit(SB),7,$0
   111  	// Linux and MinGW start the FPU in extended double precision.
   112  	// Other operating systems use double precision.
   113  	// Change to double precision to match them,
   114  	// and to match other hardware that only has double.
   115  	PUSHL $0x27F
   116  	FLDCW	0(SP)
   117  	POPL AX
   118  	RET
   119  
   120  /*
   121   *  go-routine
   122   */
   123  
   124  // void gosave(Gobuf*)
   125  // save state in Gobuf; setjmp
   126  TEXT runtime·gosave(SB), 7, $0
   127  	MOVL	4(SP), AX		// gobuf
   128  	LEAL	4(SP), BX		// caller's SP
   129  	MOVL	BX, gobuf_sp(AX)
   130  	MOVL	0(SP), BX		// caller's PC
   131  	MOVL	BX, gobuf_pc(AX)
   132  	get_tls(CX)
   133  	MOVL	g(CX), BX
   134  	MOVL	BX, gobuf_g(AX)
   135  	RET
   136  
   137  // void gogo(Gobuf*, uintptr)
   138  // restore state from Gobuf; longjmp
   139  TEXT runtime·gogo(SB), 7, $0
   140  	MOVL	8(SP), AX		// return 2nd arg
   141  	MOVL	4(SP), BX		// gobuf
   142  	MOVL	gobuf_g(BX), DX
   143  	MOVL	0(DX), CX		// make sure g != nil
   144  	get_tls(CX)
   145  	MOVL	DX, g(CX)
   146  	MOVL	gobuf_sp(BX), SP	// restore SP
   147  	MOVL	gobuf_pc(BX), BX
   148  	JMP	BX
   149  
   150  // void gogocall(Gobuf*, void (*fn)(void), uintptr r0)
   151  // restore state from Gobuf but then call fn.
   152  // (call fn, returning to state in Gobuf)
   153  TEXT runtime·gogocall(SB), 7, $0
   154  	MOVL	12(SP), DX	// context
   155  	MOVL	8(SP), AX		// fn
   156  	MOVL	4(SP), BX		// gobuf
   157  	MOVL	gobuf_g(BX), DI
   158  	get_tls(CX)
   159  	MOVL	DI, g(CX)
   160  	MOVL	0(DI), CX		// make sure g != nil
   161  	MOVL	gobuf_sp(BX), SP	// restore SP
   162  	MOVL	gobuf_pc(BX), BX
   163  	PUSHL	BX
   164  	JMP	AX
   165  	POPL	BX	// not reached
   166  
   167  // void gogocallfn(Gobuf*, FuncVal*)
   168  // restore state from Gobuf but then call fn.
   169  // (call fn, returning to state in Gobuf)
   170  TEXT runtime·gogocallfn(SB), 7, $0
   171  	MOVL	8(SP), DX		// fn
   172  	MOVL	4(SP), BX		// gobuf
   173  	MOVL	gobuf_g(BX), DI
   174  	get_tls(CX)
   175  	MOVL	DI, g(CX)
   176  	MOVL	0(DI), CX		// make sure g != nil
   177  	MOVL	gobuf_sp(BX), SP	// restore SP
   178  	MOVL	gobuf_pc(BX), BX
   179  	PUSHL	BX
   180  	MOVL	0(DX), BX
   181  	JMP	BX
   182  	POPL	BX	// not reached
   183  
   184  // void mcall(void (*fn)(G*))
   185  // Switch to m->g0's stack, call fn(g).
   186  // Fn must never return.  It should gogo(&g->sched)
   187  // to keep running g.
   188  TEXT runtime·mcall(SB), 7, $0
   189  	MOVL	fn+0(FP), DI
   190  	
   191  	get_tls(CX)
   192  	MOVL	g(CX), AX	// save state in g->gobuf
   193  	MOVL	0(SP), BX	// caller's PC
   194  	MOVL	BX, (g_sched+gobuf_pc)(AX)
   195  	LEAL	4(SP), BX	// caller's SP
   196  	MOVL	BX, (g_sched+gobuf_sp)(AX)
   197  	MOVL	AX, (g_sched+gobuf_g)(AX)
   198  
   199  	// switch to m->g0 & its stack, call fn
   200  	MOVL	m(CX), BX
   201  	MOVL	m_g0(BX), SI
   202  	CMPL	SI, AX	// if g == m->g0 call badmcall
   203  	JNE	2(PC)
   204  	CALL	runtime·badmcall(SB)
   205  	MOVL	SI, g(CX)	// g = m->g0
   206  	MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->gobuf.sp
   207  	PUSHL	AX
   208  	CALL	DI
   209  	POPL	AX
   210  	CALL	runtime·badmcall2(SB)
   211  	RET
   212  
   213  /*
   214   * support for morestack
   215   */
   216  
   217  // Called during function prolog when more stack is needed.
   218  TEXT runtime·morestack(SB),7,$0
   219  	// Cannot grow scheduler stack (m->g0).
   220  	get_tls(CX)
   221  	MOVL	m(CX), BX
   222  	MOVL	m_g0(BX), SI
   223  	CMPL	g(CX), SI
   224  	JNE	2(PC)
   225  	INT	$3
   226  	
   227  	MOVL	DX, m_cret(BX)
   228  
   229  	// frame size in DI
   230  	// arg size in AX
   231  	// Save in m.
   232  	MOVL	DI, m_moreframesize(BX)
   233  	MOVL	AX, m_moreargsize(BX)
   234  
   235  	// Called from f.
   236  	// Set m->morebuf to f's caller.
   237  	MOVL	4(SP), DI	// f's caller's PC
   238  	MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   239  	LEAL	8(SP), CX	// f's caller's SP
   240  	MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   241  	MOVL	CX, m_moreargp(BX)
   242  	get_tls(CX)
   243  	MOVL	g(CX), SI
   244  	MOVL	SI, (m_morebuf+gobuf_g)(BX)
   245  
   246  	// Set m->morepc to f's PC.
   247  	MOVL	0(SP), AX
   248  	MOVL	AX, m_morepc(BX)
   249  
   250  	// Call newstack on m->g0's stack.
   251  	MOVL	m_g0(BX), BP
   252  	MOVL	BP, g(CX)
   253  	MOVL	(g_sched+gobuf_sp)(BP), AX
   254  	MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   255  	MOVL	AX, SP
   256  	CALL	runtime·newstack(SB)
   257  	MOVL	$0, 0x1003	// crash if newstack returns
   258  	RET
   259  
   260  // Called from reflection library.  Mimics morestack,
   261  // reuses stack growth code to create a frame
   262  // with the desired args running the desired function.
   263  //
   264  // func call(fn *byte, arg *byte, argsize uint32).
   265  TEXT reflect·call(SB), 7, $0
   266  	get_tls(CX)
   267  	MOVL	m(CX), BX
   268  
   269  	// Save our caller's state as the PC and SP to
   270  	// restore when returning from f.
   271  	MOVL	0(SP), AX	// our caller's PC
   272  	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   273  	LEAL	4(SP), AX	// our caller's SP
   274  	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   275  	MOVL	g(CX), AX
   276  	MOVL	AX, (m_morebuf+gobuf_g)(BX)
   277  
   278  	// Set up morestack arguments to call f on a new stack.
   279  	// We set f's frame size to 1, as a hint to newstack
   280  	// that this is a call from reflect·call.
   281  	// If it turns out that f needs a larger frame than
   282  	// the default stack, f's usual stack growth prolog will
   283  	// allocate a new segment (and recopy the arguments).
   284  	MOVL	4(SP), AX	// fn
   285  	MOVL	8(SP), DX	// arg frame
   286  	MOVL	12(SP), CX	// arg size
   287  
   288  	MOVL	AX, m_morepc(BX)	// f's PC
   289  	MOVL	DX, m_moreargp(BX)	// f's argument pointer
   290  	MOVL	CX, m_moreargsize(BX)	// f's argument size
   291  	MOVL	$1, m_moreframesize(BX)	// f's frame size
   292  
   293  	// Call newstack on m->g0's stack.
   294  	MOVL	m_g0(BX), BP
   295  	get_tls(CX)
   296  	MOVL	BP, g(CX)
   297  	MOVL	(g_sched+gobuf_sp)(BP), SP
   298  	CALL	runtime·newstack(SB)
   299  	MOVL	$0, 0x1103	// crash if newstack returns
   300  	RET
   301  
   302  
   303  // Return point when leaving stack.
   304  TEXT runtime·lessstack(SB), 7, $0
   305  	// Save return value in m->cret
   306  	get_tls(CX)
   307  	MOVL	m(CX), BX
   308  	MOVL	AX, m_cret(BX)
   309  
   310  	// Call oldstack on m->g0's stack.
   311  	MOVL	m_g0(BX), BP
   312  	MOVL	BP, g(CX)
   313  	MOVL	(g_sched+gobuf_sp)(BP), SP
   314  	CALL	runtime·oldstack(SB)
   315  	MOVL	$0, 0x1004	// crash if oldstack returns
   316  	RET
   317  
   318  
   319  // bool cas(int32 *val, int32 old, int32 new)
   320  // Atomically:
   321  //	if(*val == old){
   322  //		*val = new;
   323  //		return 1;
   324  //	}else
   325  //		return 0;
   326  TEXT runtime·cas(SB), 7, $0
   327  	MOVL	4(SP), BX
   328  	MOVL	8(SP), AX
   329  	MOVL	12(SP), CX
   330  	LOCK
   331  	CMPXCHGL	CX, 0(BX)
   332  	JZ 3(PC)
   333  	MOVL	$0, AX
   334  	RET
   335  	MOVL	$1, AX
   336  	RET
   337  
   338  // bool runtime·cas64(uint64 *val, uint64 *old, uint64 new)
   339  // Atomically:
   340  //	if(*val == *old){
   341  //		*val = new;
   342  //		return 1;
   343  //	} else {
   344  //		*old = *val
   345  //		return 0;
   346  //	}
   347  TEXT runtime·cas64(SB), 7, $0
   348  	MOVL	4(SP), BP
   349  	MOVL	8(SP), SI
   350  	MOVL	0(SI), AX
   351  	MOVL	4(SI), DX
   352  	MOVL	12(SP), BX
   353  	MOVL	16(SP), CX
   354  	LOCK
   355  	CMPXCHG8B	0(BP)
   356  	JNZ	cas64_fail
   357  	MOVL	$1, AX
   358  	RET
   359  cas64_fail:
   360  	MOVL	AX, 0(SI)
   361  	MOVL	DX, 4(SI)
   362  	MOVL	$0, AX
   363  	RET
   364  
   365  // bool casp(void **p, void *old, void *new)
   366  // Atomically:
   367  //	if(*p == old){
   368  //		*p = new;
   369  //		return 1;
   370  //	}else
   371  //		return 0;
   372  TEXT runtime·casp(SB), 7, $0
   373  	MOVL	4(SP), BX
   374  	MOVL	8(SP), AX
   375  	MOVL	12(SP), CX
   376  	LOCK
   377  	CMPXCHGL	CX, 0(BX)
   378  	JZ 3(PC)
   379  	MOVL	$0, AX
   380  	RET
   381  	MOVL	$1, AX
   382  	RET
   383  
   384  // uint32 xadd(uint32 volatile *val, int32 delta)
   385  // Atomically:
   386  //	*val += delta;
   387  //	return *val;
   388  TEXT runtime·xadd(SB), 7, $0
   389  	MOVL	4(SP), BX
   390  	MOVL	8(SP), AX
   391  	MOVL	AX, CX
   392  	LOCK
   393  	XADDL	AX, 0(BX)
   394  	ADDL	CX, AX
   395  	RET
   396  
   397  TEXT runtime·xchg(SB), 7, $0
   398  	MOVL	4(SP), BX
   399  	MOVL	8(SP), AX
   400  	XCHGL	AX, 0(BX)
   401  	RET
   402  
   403  TEXT runtime·procyield(SB),7,$0
   404  	MOVL	4(SP), AX
   405  again:
   406  	PAUSE
   407  	SUBL	$1, AX
   408  	JNZ	again
   409  	RET
   410  
   411  TEXT runtime·atomicstorep(SB), 7, $0
   412  	MOVL	4(SP), BX
   413  	MOVL	8(SP), AX
   414  	XCHGL	AX, 0(BX)
   415  	RET
   416  
   417  TEXT runtime·atomicstore(SB), 7, $0
   418  	MOVL	4(SP), BX
   419  	MOVL	8(SP), AX
   420  	XCHGL	AX, 0(BX)
   421  	RET
   422  
   423  // uint64 atomicload64(uint64 volatile* addr);
   424  // so actually
   425  // void atomicload64(uint64 *res, uint64 volatile *addr);
   426  TEXT runtime·atomicload64(SB), 7, $0
   427  	MOVL    4(SP), BX
   428  	MOVL	8(SP), AX
   429  	// MOVQ (%EAX), %MM0
   430  	BYTE $0x0f; BYTE $0x6f; BYTE $0x00
   431  	// MOVQ %MM0, 0(%EBX)
   432  	BYTE $0x0f; BYTE $0x7f; BYTE $0x03
   433  	// EMMS
   434  	BYTE $0x0F; BYTE $0x77
   435  	RET
   436  
   437  // void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
   438  TEXT runtime·atomicstore64(SB), 7, $0
   439  	MOVL	4(SP), AX
   440  	// MOVQ and EMMS were introduced on the Pentium MMX.
   441  	// MOVQ 0x8(%ESP), %MM0
   442  	BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
   443  	// MOVQ %MM0, (%EAX)
   444  	BYTE $0x0f; BYTE $0x7f; BYTE $0x00 
   445  	// EMMS
   446  	BYTE $0x0F; BYTE $0x77
   447  	// This is essentially a no-op, but it provides required memory fencing.
   448  	// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
   449  	MOVL	$0, AX
   450  	LOCK
   451  	XADDL	AX, (SP)
   452  	RET
   453  
   454  // void jmpdefer(fn, sp);
   455  // called from deferreturn.
   456  // 1. pop the caller
   457  // 2. sub 5 bytes from the callers return
   458  // 3. jmp to the argument
   459  TEXT runtime·jmpdefer(SB), 7, $0
   460  	MOVL	4(SP), DX	// fn
   461  	MOVL	8(SP), BX	// caller sp
   462  	LEAL	-4(BX), SP	// caller sp after CALL
   463  	SUBL	$5, (SP)	// return to CALL again
   464  	MOVL	0(DX), BX
   465  	JMP	BX	// but first run the deferred function
   466  
   467  // Dummy function to use in saved gobuf.PC,
   468  // to match SP pointing at a return address.
   469  // The gobuf.PC is unused by the contortions here
   470  // but setting it to return will make the traceback code work.
   471  TEXT return<>(SB),7,$0
   472  	RET
   473  
   474  // asmcgocall(void(*fn)(void*), void *arg)
   475  // Call fn(arg) on the scheduler stack,
   476  // aligned appropriately for the gcc ABI.
   477  // See cgocall.c for more details.
   478  TEXT runtime·asmcgocall(SB),7,$0
   479  	MOVL	fn+0(FP), AX
   480  	MOVL	arg+4(FP), BX
   481  	MOVL	SP, DX
   482  
   483  	// Figure out if we need to switch to m->g0 stack.
   484  	// We get called to create new OS threads too, and those
   485  	// come in on the m->g0 stack already.
   486  	get_tls(CX)
   487  	MOVL	m(CX), BP
   488  	MOVL	m_g0(BP), SI
   489  	MOVL	g(CX), DI
   490  	CMPL	SI, DI
   491  	JEQ	6(PC)
   492  	MOVL	SP, (g_sched+gobuf_sp)(DI)
   493  	MOVL	$return<>(SB), (g_sched+gobuf_pc)(DI)
   494  	MOVL	DI, (g_sched+gobuf_g)(DI)
   495  	MOVL	SI, g(CX)
   496  	MOVL	(g_sched+gobuf_sp)(SI), SP
   497  
   498  	// Now on a scheduling stack (a pthread-created stack).
   499  	SUBL	$32, SP
   500  	ANDL	$~15, SP	// alignment, perhaps unnecessary
   501  	MOVL	DI, 8(SP)	// save g
   502  	MOVL	DX, 4(SP)	// save SP
   503  	MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   504  	CALL	AX
   505  
   506  	// Restore registers, g, stack pointer.
   507  	get_tls(CX)
   508  	MOVL	8(SP), DI
   509  	MOVL	DI, g(CX)
   510  	MOVL	4(SP), SP
   511  	RET
   512  
   513  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   514  // Turn the fn into a Go func (by taking its address) and call
   515  // cgocallback_gofunc.
   516  TEXT runtime·cgocallback(SB),7,$12
   517  	LEAL	fn+0(FP), AX
   518  	MOVL	AX, 0(SP)
   519  	MOVL	frame+4(FP), AX
   520  	MOVL	AX, 4(SP)
   521  	MOVL	framesize+8(FP), AX
   522  	MOVL	AX, 8(SP)
   523  	MOVL	$runtime·cgocallback_gofunc(SB), AX
   524  	CALL	AX
   525  	RET
   526  
   527  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   528  // See cgocall.c for more details.
   529  TEXT runtime·cgocallback_gofunc(SB),7,$12
   530  	// If m is nil, Go did not create the current thread.
   531  	// Call needm to obtain one for temporary use.
   532  	// In this case, we're running on the thread stack, so there's
   533  	// lots of space, but the linker doesn't know. Hide the call from
   534  	// the linker analysis by using an indirect call through AX.
   535  	get_tls(CX)
   536  #ifdef GOOS_windows
   537  	CMPL	CX, $0
   538  	JNE	3(PC)
   539  	PUSHL	$0
   540  	JMP needm
   541  #endif
   542  	MOVL	m(CX), BP
   543  	PUSHL	BP
   544  	CMPL	BP, $0
   545  	JNE	havem
   546  needm:
   547  	MOVL	$runtime·needm(SB), AX
   548  	CALL	AX
   549  	get_tls(CX)
   550  	MOVL	m(CX), BP
   551  
   552  havem:
   553  	// Now there's a valid m, and we're running on its m->g0.
   554  	// Save current m->g0->sched.sp on stack and then set it to SP.
   555  	// Save current sp in m->g0->sched.sp in preparation for
   556  	// switch back to m->curg stack.
   557  	MOVL	m_g0(BP), SI
   558  	PUSHL	(g_sched+gobuf_sp)(SI)
   559  	MOVL	SP, (g_sched+gobuf_sp)(SI)
   560  
   561  	// Switch to m->curg stack and call runtime.cgocallbackg
   562  	// with the three arguments.  Because we are taking over
   563  	// the execution of m->curg but *not* resuming what had
   564  	// been running, we need to save that information (m->curg->gobuf)
   565  	// so that we can restore it when we're done. 
   566  	// We can restore m->curg->gobuf.sp easily, because calling
   567  	// runtime.cgocallbackg leaves SP unchanged upon return.
   568  	// To save m->curg->gobuf.pc, we push it onto the stack.
   569  	// This has the added benefit that it looks to the traceback
   570  	// routine like cgocallbackg is going to return to that
   571  	// PC (because we defined cgocallbackg to have
   572  	// a frame size of 12, the same amount that we use below),
   573  	// so that the traceback will seamlessly trace back into
   574  	// the earlier calls.
   575  	MOVL	fn+0(FP), AX
   576  	MOVL	frame+4(FP), BX
   577  	MOVL	framesize+8(FP), DX
   578  
   579  	MOVL	m_curg(BP), SI
   580  	MOVL	SI, g(CX)
   581  	MOVL	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   582  
   583  	// Push gobuf.pc
   584  	MOVL	(g_sched+gobuf_pc)(SI), BP
   585  	SUBL	$4, DI
   586  	MOVL	BP, 0(DI)
   587  
   588  	// Push arguments to cgocallbackg.
   589  	// Frame size here must match the frame size above
   590  	// to trick traceback routines into doing the right thing.
   591  	SUBL	$12, DI
   592  	MOVL	AX, 0(DI)
   593  	MOVL	BX, 4(DI)
   594  	MOVL	DX, 8(DI)
   595  	
   596  	// Switch stack and make the call.
   597  	MOVL	DI, SP
   598  	CALL	runtime·cgocallbackg(SB)
   599  
   600  	// Restore g->gobuf (== m->curg->gobuf) from saved values.
   601  	get_tls(CX)
   602  	MOVL	g(CX), SI
   603  	MOVL	12(SP), BP
   604  	MOVL	BP, (g_sched+gobuf_pc)(SI)
   605  	LEAL	(12+4)(SP), DI
   606  	MOVL	DI, (g_sched+gobuf_sp)(SI)
   607  
   608  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   609  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   610  	// so we do not have to restore it.)
   611  	MOVL	m(CX), BP
   612  	MOVL	m_g0(BP), SI
   613  	MOVL	SI, g(CX)
   614  	MOVL	(g_sched+gobuf_sp)(SI), SP
   615  	POPL	(g_sched+gobuf_sp)(SI)
   616  	
   617  	// If the m on entry was nil, we called needm above to borrow an m
   618  	// for the duration of the call. Since the call is over, return it with dropm.
   619  	POPL	BP
   620  	CMPL	BP, $0
   621  	JNE 3(PC)
   622  	MOVL	$runtime·dropm(SB), AX
   623  	CALL	AX
   624  
   625  	// Done!
   626  	RET
   627  
   628  // void setmg(M*, G*); set m and g. for use by needm.
   629  TEXT runtime·setmg(SB), 7, $0
   630  #ifdef GOOS_windows
   631  	MOVL	mm+0(FP), AX
   632  	CMPL	AX, $0
   633  	JNE	settls
   634  	MOVL	$0, 0x14(FS)
   635  	RET
   636  settls:
   637  	LEAL	m_tls(AX), AX
   638  	MOVL	AX, 0x14(FS)
   639  #endif
   640  	MOVL	mm+0(FP), AX
   641  	get_tls(CX)
   642  	MOVL	mm+0(FP), AX
   643  	MOVL	AX, m(CX)
   644  	MOVL	gg+4(FP), BX
   645  	MOVL	BX, g(CX)
   646  	RET
   647  
   648  // void setmg_gcc(M*, G*); set m and g. for use by gcc
   649  TEXT setmg_gcc<>(SB), 7, $0	
   650  	get_tls(AX)
   651  	MOVL	mm+0(FP), DX
   652  	MOVL	DX, m(AX)
   653  	MOVL	gg+4(FP), DX
   654  	MOVL	DX,g (AX)
   655  	RET
   656  
   657  // check that SP is in range [g->stackbase, g->stackguard)
   658  TEXT runtime·stackcheck(SB), 7, $0
   659  	get_tls(CX)
   660  	MOVL	g(CX), AX
   661  	CMPL	g_stackbase(AX), SP
   662  	JHI	2(PC)
   663  	INT	$3
   664  	CMPL	SP, g_stackguard(AX)
   665  	JHI	2(PC)
   666  	INT	$3
   667  	RET
   668  
   669  TEXT runtime·memclr(SB),7,$0
   670  	MOVL	4(SP), DI		// arg 1 addr
   671  	MOVL	8(SP), CX		// arg 2 count
   672  	MOVL	CX, BX
   673  	ANDL	$3, BX
   674  	SHRL	$2, CX
   675  	MOVL	$0, AX
   676  	CLD
   677  	REP
   678  	STOSL
   679  	MOVL	BX, CX
   680  	REP
   681  	STOSB
   682  	RET
   683  
   684  TEXT runtime·getcallerpc(SB),7,$0
   685  	MOVL	x+0(FP),AX		// addr of first arg
   686  	MOVL	-4(AX),AX		// get calling pc
   687  	RET
   688  
   689  TEXT runtime·setcallerpc(SB),7,$0
   690  	MOVL	x+0(FP),AX		// addr of first arg
   691  	MOVL	x+4(FP), BX
   692  	MOVL	BX, -4(AX)		// set calling pc
   693  	RET
   694  
   695  TEXT runtime·getcallersp(SB), 7, $0
   696  	MOVL	sp+0(FP), AX
   697  	RET
   698  
   699  // int64 runtime·cputicks(void), so really
   700  // void runtime·cputicks(int64 *ticks)
   701  TEXT runtime·cputicks(SB),7,$0
   702  	RDTSC
   703  	MOVL	ret+0(FP), DI
   704  	MOVL	AX, 0(DI)
   705  	MOVL	DX, 4(DI)
   706  	RET
   707  
   708  TEXT runtime·ldt0setup(SB),7,$16
   709  	// set up ldt 7 to point at tls0
   710  	// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   711  	// the entry number is just a hint.  setldt will set up GS with what it used.
   712  	MOVL	$7, 0(SP)
   713  	LEAL	runtime·tls0(SB), AX
   714  	MOVL	AX, 4(SP)
   715  	MOVL	$32, 8(SP)	// sizeof(tls array)
   716  	CALL	runtime·setldt(SB)
   717  	RET
   718  
   719  TEXT runtime·emptyfunc(SB),0,$0
   720  	RET
   721  
   722  TEXT runtime·abort(SB),7,$0
   723  	INT $0x3
   724  
   725  TEXT runtime·stackguard(SB),7,$0
   726  	MOVL	SP, DX
   727  	MOVL	DX, sp+0(FP)
   728  	get_tls(CX)
   729  	MOVL	g(CX), BX
   730  	MOVL	g_stackguard(BX), DX
   731  	MOVL	DX, limit+4(FP)
   732  	RET
   733  
   734  GLOBL runtime·tls0(SB), $32
   735  
   736  // hash function using AES hardware instructions
   737  TEXT runtime·aeshash(SB),7,$0
   738  	MOVL	4(SP), DX	// ptr to hash value
   739  	MOVL	8(SP), CX	// size
   740  	MOVL	12(SP), AX	// ptr to data
   741  	JMP	runtime·aeshashbody(SB)
   742  
   743  TEXT runtime·aeshashstr(SB),7,$0
   744  	MOVL	4(SP), DX	// ptr to hash value
   745  	MOVL	12(SP), AX	// ptr to string struct
   746  	MOVL	4(AX), CX	// length of string
   747  	MOVL	(AX), AX	// string data
   748  	JMP	runtime·aeshashbody(SB)
   749  
   750  // AX: data
   751  // CX: length
   752  // DX: ptr to seed input / hash output
   753  TEXT runtime·aeshashbody(SB),7,$0
   754  	MOVL	(DX), X0	// seed to low 32 bits of xmm0
   755  	PINSRD	$1, CX, X0	// size to next 32 bits of xmm0
   756  	MOVO	runtime·aeskeysched+0(SB), X2
   757  	MOVO	runtime·aeskeysched+16(SB), X3
   758  aesloop:
   759  	CMPL	CX, $16
   760  	JB	aesloopend
   761  	MOVOU	(AX), X1
   762  	AESENC	X2, X0
   763  	AESENC	X1, X0
   764  	SUBL	$16, CX
   765  	ADDL	$16, AX
   766  	JMP	aesloop
   767  aesloopend:
   768  	TESTL	CX, CX
   769  	JE	finalize	// no partial block
   770  
   771  	TESTL	$16, AX
   772  	JNE	highpartial
   773  
   774  	// address ends in 0xxxx.  16 bytes loaded
   775  	// at this address won't cross a page boundary, so
   776  	// we can load it directly.
   777  	MOVOU	(AX), X1
   778  	ADDL	CX, CX
   779  	PAND	masks(SB)(CX*8), X1
   780  	JMP	partial
   781  highpartial:
   782  	// address ends in 1xxxx.  Might be up against
   783  	// a page boundary, so load ending at last byte.
   784  	// Then shift bytes down using pshufb.
   785  	MOVOU	-16(AX)(CX*1), X1
   786  	ADDL	CX, CX
   787  	PSHUFB	shifts(SB)(CX*8), X1
   788  partial:
   789  	// incorporate partial block into hash
   790  	AESENC	X3, X0
   791  	AESENC	X1, X0
   792  finalize:	
   793  	// finalize hash
   794  	AESENC	X2, X0
   795  	AESENC	X3, X0
   796  	AESENC	X2, X0
   797  	MOVL	X0, (DX)
   798  	RET
   799  
   800  TEXT runtime·aeshash32(SB),7,$0
   801  	MOVL	4(SP), DX	// ptr to hash value
   802  	MOVL	12(SP), AX	// ptr to data
   803  	MOVL	(DX), X0	// seed
   804  	PINSRD	$1, (AX), X0	// data
   805  	AESENC	runtime·aeskeysched+0(SB), X0
   806  	AESENC	runtime·aeskeysched+16(SB), X0
   807  	AESENC	runtime·aeskeysched+0(SB), X0
   808  	MOVL	X0, (DX)
   809  	RET
   810  
   811  TEXT runtime·aeshash64(SB),7,$0
   812  	MOVL	4(SP), DX	// ptr to hash value
   813  	MOVL	12(SP), AX	// ptr to data
   814  	MOVQ	(AX), X0	// data
   815  	PINSRD	$2, (DX), X0	// seed
   816  	AESENC	runtime·aeskeysched+0(SB), X0
   817  	AESENC	runtime·aeskeysched+16(SB), X0
   818  	AESENC	runtime·aeskeysched+0(SB), X0
   819  	MOVL	X0, (DX)
   820  	RET
   821  
   822  
   823  // simple mask to get rid of data in the high part of the register.
   824  TEXT masks(SB),7,$0
   825  	LONG $0x00000000
   826  	LONG $0x00000000
   827  	LONG $0x00000000
   828  	LONG $0x00000000
   829  	
   830  	LONG $0x000000ff
   831  	LONG $0x00000000
   832  	LONG $0x00000000
   833  	LONG $0x00000000
   834  	
   835  	LONG $0x0000ffff
   836  	LONG $0x00000000
   837  	LONG $0x00000000
   838  	LONG $0x00000000
   839  	
   840  	LONG $0x00ffffff
   841  	LONG $0x00000000
   842  	LONG $0x00000000
   843  	LONG $0x00000000
   844  	
   845  	LONG $0xffffffff
   846  	LONG $0x00000000
   847  	LONG $0x00000000
   848  	LONG $0x00000000
   849  	
   850  	LONG $0xffffffff
   851  	LONG $0x000000ff
   852  	LONG $0x00000000
   853  	LONG $0x00000000
   854  	
   855  	LONG $0xffffffff
   856  	LONG $0x0000ffff
   857  	LONG $0x00000000
   858  	LONG $0x00000000
   859  	
   860  	LONG $0xffffffff
   861  	LONG $0x00ffffff
   862  	LONG $0x00000000
   863  	LONG $0x00000000
   864  	
   865  	LONG $0xffffffff
   866  	LONG $0xffffffff
   867  	LONG $0x00000000
   868  	LONG $0x00000000
   869  	
   870  	LONG $0xffffffff
   871  	LONG $0xffffffff
   872  	LONG $0x000000ff
   873  	LONG $0x00000000
   874  	
   875  	LONG $0xffffffff
   876  	LONG $0xffffffff
   877  	LONG $0x0000ffff
   878  	LONG $0x00000000
   879  	
   880  	LONG $0xffffffff
   881  	LONG $0xffffffff
   882  	LONG $0x00ffffff
   883  	LONG $0x00000000
   884  	
   885  	LONG $0xffffffff
   886  	LONG $0xffffffff
   887  	LONG $0xffffffff
   888  	LONG $0x00000000
   889  	
   890  	LONG $0xffffffff
   891  	LONG $0xffffffff
   892  	LONG $0xffffffff
   893  	LONG $0x000000ff
   894  	
   895  	LONG $0xffffffff
   896  	LONG $0xffffffff
   897  	LONG $0xffffffff
   898  	LONG $0x0000ffff
   899  	
   900  	LONG $0xffffffff
   901  	LONG $0xffffffff
   902  	LONG $0xffffffff
   903  	LONG $0x00ffffff
   904  
   905  	// these are arguments to pshufb.  They move data down from
   906  	// the high bytes of the register to the low bytes of the register.
   907  	// index is how many bytes to move.
   908  TEXT shifts(SB),7,$0
   909  	LONG $0x00000000
   910  	LONG $0x00000000
   911  	LONG $0x00000000
   912  	LONG $0x00000000
   913  	
   914  	LONG $0xffffff0f
   915  	LONG $0xffffffff
   916  	LONG $0xffffffff
   917  	LONG $0xffffffff
   918  	
   919  	LONG $0xffff0f0e
   920  	LONG $0xffffffff
   921  	LONG $0xffffffff
   922  	LONG $0xffffffff
   923  	
   924  	LONG $0xff0f0e0d
   925  	LONG $0xffffffff
   926  	LONG $0xffffffff
   927  	LONG $0xffffffff
   928  	
   929  	LONG $0x0f0e0d0c
   930  	LONG $0xffffffff
   931  	LONG $0xffffffff
   932  	LONG $0xffffffff
   933  	
   934  	LONG $0x0e0d0c0b
   935  	LONG $0xffffff0f
   936  	LONG $0xffffffff
   937  	LONG $0xffffffff
   938  	
   939  	LONG $0x0d0c0b0a
   940  	LONG $0xffff0f0e
   941  	LONG $0xffffffff
   942  	LONG $0xffffffff
   943  	
   944  	LONG $0x0c0b0a09
   945  	LONG $0xff0f0e0d
   946  	LONG $0xffffffff
   947  	LONG $0xffffffff
   948  	
   949  	LONG $0x0b0a0908
   950  	LONG $0x0f0e0d0c
   951  	LONG $0xffffffff
   952  	LONG $0xffffffff
   953  	
   954  	LONG $0x0a090807
   955  	LONG $0x0e0d0c0b
   956  	LONG $0xffffff0f
   957  	LONG $0xffffffff
   958  	
   959  	LONG $0x09080706
   960  	LONG $0x0d0c0b0a
   961  	LONG $0xffff0f0e
   962  	LONG $0xffffffff
   963  	
   964  	LONG $0x08070605
   965  	LONG $0x0c0b0a09
   966  	LONG $0xff0f0e0d
   967  	LONG $0xffffffff
   968  	
   969  	LONG $0x07060504
   970  	LONG $0x0b0a0908
   971  	LONG $0x0f0e0d0c
   972  	LONG $0xffffffff
   973  	
   974  	LONG $0x06050403
   975  	LONG $0x0a090807
   976  	LONG $0x0e0d0c0b
   977  	LONG $0xffffff0f
   978  	
   979  	LONG $0x05040302
   980  	LONG $0x09080706
   981  	LONG $0x0d0c0b0a
   982  	LONG $0xffff0f0e
   983  	
   984  	LONG $0x04030201
   985  	LONG $0x08070605
   986  	LONG $0x0c0b0a09
   987  	LONG $0xff0f0e0d
   988  
   989  TEXT runtime·memeq(SB),7,$0
   990  	MOVL	a+0(FP), SI
   991  	MOVL	b+4(FP), DI
   992  	MOVL	count+8(FP), BX
   993  	JMP	runtime·memeqbody(SB)
   994  
   995  
   996  TEXT bytes·Equal(SB),7,$0
   997  	MOVL	a_len+4(FP), BX
   998  	MOVL	b_len+16(FP), CX
   999  	XORL	AX, AX
  1000  	CMPL	BX, CX
  1001  	JNE	eqret
  1002  	MOVL	a+0(FP), SI
  1003  	MOVL	b+12(FP), DI
  1004  	CALL	runtime·memeqbody(SB)
  1005  eqret:
  1006  	MOVB	AX, ret+24(FP)
  1007  	RET
  1008  
  1009  // a in SI
  1010  // b in DI
  1011  // count in BX
  1012  TEXT runtime·memeqbody(SB),7,$0
  1013  	XORL	AX, AX
  1014  
  1015  	CMPL	BX, $4
  1016  	JB	small
  1017  
  1018  	// 64 bytes at a time using xmm registers
  1019  hugeloop:
  1020  	CMPL	BX, $64
  1021  	JB	bigloop
  1022  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1023  	JE	bigloop
  1024  	MOVOU	(SI), X0
  1025  	MOVOU	(DI), X1
  1026  	MOVOU	16(SI), X2
  1027  	MOVOU	16(DI), X3
  1028  	MOVOU	32(SI), X4
  1029  	MOVOU	32(DI), X5
  1030  	MOVOU	48(SI), X6
  1031  	MOVOU	48(DI), X7
  1032  	PCMPEQB	X1, X0
  1033  	PCMPEQB	X3, X2
  1034  	PCMPEQB	X5, X4
  1035  	PCMPEQB	X7, X6
  1036  	PAND	X2, X0
  1037  	PAND	X6, X4
  1038  	PAND	X4, X0
  1039  	PMOVMSKB X0, DX
  1040  	ADDL	$64, SI
  1041  	ADDL	$64, DI
  1042  	SUBL	$64, BX
  1043  	CMPL	DX, $0xffff
  1044  	JEQ	hugeloop
  1045  	RET
  1046  
  1047  	// 4 bytes at a time using 32-bit register
  1048  bigloop:
  1049  	CMPL	BX, $4
  1050  	JBE	leftover
  1051  	MOVL	(SI), CX
  1052  	MOVL	(DI), DX
  1053  	ADDL	$4, SI
  1054  	ADDL	$4, DI
  1055  	SUBL	$4, BX
  1056  	CMPL	CX, DX
  1057  	JEQ	bigloop
  1058  	RET
  1059  
  1060  	// remaining 0-4 bytes
  1061  leftover:
  1062  	MOVL	-4(SI)(BX*1), CX
  1063  	MOVL	-4(DI)(BX*1), DX
  1064  	CMPL	CX, DX
  1065  	SETEQ	AX
  1066  	RET
  1067  
  1068  small:
  1069  	CMPL	BX, $0
  1070  	JEQ	equal
  1071  
  1072  	LEAL	0(BX*8), CX
  1073  	NEGL	CX
  1074  
  1075  	MOVL	SI, DX
  1076  	CMPB	DX, $0xfc
  1077  	JA	si_high
  1078  
  1079  	// load at SI won't cross a page boundary.
  1080  	MOVL	(SI), SI
  1081  	JMP	si_finish
  1082  si_high:
  1083  	// address ends in 111111xx.  Load up to bytes we want, move to correct position.
  1084  	MOVL	-4(SI)(BX*1), SI
  1085  	SHRL	CX, SI
  1086  si_finish:
  1087  
  1088  	// same for DI.
  1089  	MOVL	DI, DX
  1090  	CMPB	DX, $0xfc
  1091  	JA	di_high
  1092  	MOVL	(DI), DI
  1093  	JMP	di_finish
  1094  di_high:
  1095  	MOVL	-4(DI)(BX*1), DI
  1096  	SHRL	CX, DI
  1097  di_finish:
  1098  
  1099  	SUBL	SI, DI
  1100  	SHLL	CX, DI
  1101  equal:
  1102  	SETEQ	AX
  1103  	RET