github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/runtime/asm_amd64.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVQ	DI, AX		// argc
    13  	MOVQ	SI, BX		// argv
    14  	SUBQ	$(4*8+7), SP		// 2args 2auto
    15  	ANDQ	$~15, SP
    16  	MOVQ	AX, 16(SP)
    17  	MOVQ	BX, 24(SP)
    18  	
    19  	// create istack out of the given (operating system) stack.
    20  	// _cgo_init may update stackguard.
    21  	MOVQ	$runtime·g0(SB), DI
    22  	LEAQ	(-64*1024+104)(SP), BX
    23  	MOVQ	BX, g_stackguard0(DI)
    24  	MOVQ	BX, g_stackguard1(DI)
    25  	MOVQ	BX, (g_stack+stack_lo)(DI)
    26  	MOVQ	SP, (g_stack+stack_hi)(DI)
    27  
    28  	// find out information about the processor we're on
    29  	MOVQ	$0, AX
    30  	CPUID
    31  	CMPQ	AX, $0
    32  	JE	nocpuinfo
    33  	MOVQ	$1, AX
    34  	CPUID
    35  	MOVL	CX, runtime·cpuid_ecx(SB)
    36  	MOVL	DX, runtime·cpuid_edx(SB)
    37  nocpuinfo:	
    38  	
    39  	// if there is an _cgo_init, call it.
    40  	MOVQ	_cgo_init(SB), AX
    41  	TESTQ	AX, AX
    42  	JZ	needtls
    43  	// g0 already in DI
    44  	MOVQ	DI, CX	// Win64 uses CX for first parameter
    45  	MOVQ	$setg_gcc<>(SB), SI
    46  	CALL	AX
    47  
    48  	// update stackguard after _cgo_init
    49  	MOVQ	$runtime·g0(SB), CX
    50  	MOVQ	(g_stack+stack_lo)(CX), AX
    51  	ADDQ	$const__StackGuard, AX
    52  	MOVQ	AX, g_stackguard0(CX)
    53  	MOVQ	AX, g_stackguard1(CX)
    54  
    55  	CMPL	runtime·iswindows(SB), $0
    56  	JEQ ok
    57  needtls:
    58  	// skip TLS setup on Plan 9
    59  	CMPL	runtime·isplan9(SB), $1
    60  	JEQ ok
    61  	// skip TLS setup on Solaris
    62  	CMPL	runtime·issolaris(SB), $1
    63  	JEQ ok
    64  
    65  	LEAQ	runtime·tls0(SB), DI
    66  	CALL	runtime·settls(SB)
    67  
    68  	// store through it, to make sure it works
    69  	get_tls(BX)
    70  	MOVQ	$0x123, g(BX)
    71  	MOVQ	runtime·tls0(SB), AX
    72  	CMPQ	AX, $0x123
    73  	JEQ 2(PC)
    74  	MOVL	AX, 0	// abort
    75  ok:
    76  	// set the per-goroutine and per-mach "registers"
    77  	get_tls(BX)
    78  	LEAQ	runtime·g0(SB), CX
    79  	MOVQ	CX, g(BX)
    80  	LEAQ	runtime·m0(SB), AX
    81  
    82  	// save m->g0 = g0
    83  	MOVQ	CX, m_g0(AX)
    84  	// save m0 to g0->m
    85  	MOVQ	AX, g_m(CX)
    86  
    87  	CLD				// convention is D is always left cleared
    88  	CALL	runtime·check(SB)
    89  
    90  	MOVL	16(SP), AX		// copy argc
    91  	MOVL	AX, 0(SP)
    92  	MOVQ	24(SP), AX		// copy argv
    93  	MOVQ	AX, 8(SP)
    94  	CALL	runtime·args(SB)
    95  	CALL	runtime·osinit(SB)
    96  	CALL	runtime·schedinit(SB)
    97  
    98  	// create a new goroutine to start program
    99  	MOVQ	$runtime·main·f(SB), BP		// entry
   100  	PUSHQ	BP
   101  	PUSHQ	$0			// arg size
   102  	CALL	runtime·newproc(SB)
   103  	POPQ	AX
   104  	POPQ	AX
   105  
   106  	// start this M
   107  	CALL	runtime·mstart(SB)
   108  
   109  	MOVL	$0xf1, 0xf1  // crash
   110  	RET
   111  
   112  DATA	runtime·main·f+0(SB)/8,$runtime·main(SB)
   113  GLOBL	runtime·main·f(SB),RODATA,$8
   114  
   115  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   116  	BYTE	$0xcc
   117  	RET
   118  
   119  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   120  	// No per-thread init.
   121  	RET
   122  
   123  /*
   124   *  go-routine
   125   */
   126  
   127  // void gosave(Gobuf*)
   128  // save state in Gobuf; setjmp
   129  TEXT runtime·gosave(SB), NOSPLIT, $0-8
   130  	MOVQ	buf+0(FP), AX		// gobuf
   131  	LEAQ	buf+0(FP), BX		// caller's SP
   132  	MOVQ	BX, gobuf_sp(AX)
   133  	MOVQ	0(SP), BX		// caller's PC
   134  	MOVQ	BX, gobuf_pc(AX)
   135  	MOVQ	$0, gobuf_ret(AX)
   136  	MOVQ	$0, gobuf_ctxt(AX)
   137  	get_tls(CX)
   138  	MOVQ	g(CX), BX
   139  	MOVQ	BX, gobuf_g(AX)
   140  	RET
   141  
   142  // void gogo(Gobuf*)
   143  // restore state from Gobuf; longjmp
   144  TEXT runtime·gogo(SB), NOSPLIT, $0-8
   145  	MOVQ	buf+0(FP), BX		// gobuf
   146  	MOVQ	gobuf_g(BX), DX
   147  	MOVQ	0(DX), CX		// make sure g != nil
   148  	get_tls(CX)
   149  	MOVQ	DX, g(CX)
   150  	MOVQ	gobuf_sp(BX), SP	// restore SP
   151  	MOVQ	gobuf_ret(BX), AX
   152  	MOVQ	gobuf_ctxt(BX), DX
   153  	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   154  	MOVQ	$0, gobuf_ret(BX)
   155  	MOVQ	$0, gobuf_ctxt(BX)
   156  	MOVQ	gobuf_pc(BX), BX
   157  	JMP	BX
   158  
   159  // func mcall(fn func(*g))
   160  // Switch to m->g0's stack, call fn(g).
   161  // Fn must never return.  It should gogo(&g->sched)
   162  // to keep running g.
   163  TEXT runtime·mcall(SB), NOSPLIT, $0-8
   164  	MOVQ	fn+0(FP), DI
   165  	
   166  	get_tls(CX)
   167  	MOVQ	g(CX), AX	// save state in g->sched
   168  	MOVQ	0(SP), BX	// caller's PC
   169  	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   170  	LEAQ	fn+0(FP), BX	// caller's SP
   171  	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   172  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   173  
   174  	// switch to m->g0 & its stack, call fn
   175  	MOVQ	g(CX), BX
   176  	MOVQ	g_m(BX), BX
   177  	MOVQ	m_g0(BX), SI
   178  	CMPQ	SI, AX	// if g == m->g0 call badmcall
   179  	JNE	3(PC)
   180  	MOVQ	$runtime·badmcall(SB), AX
   181  	JMP	AX
   182  	MOVQ	SI, g(CX)	// g = m->g0
   183  	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   184  	PUSHQ	AX
   185  	MOVQ	DI, DX
   186  	MOVQ	0(DI), DI
   187  	CALL	DI
   188  	POPQ	AX
   189  	MOVQ	$runtime·badmcall2(SB), AX
   190  	JMP	AX
   191  	RET
   192  
   193  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   194  // of the G stack.  We need to distinguish the routine that
   195  // lives at the bottom of the G stack from the one that lives
   196  // at the top of the system stack because the one at the top of
   197  // the system stack terminates the stack walk (see topofstack()).
   198  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   199  	RET
   200  
   201  // func systemstack(fn func())
   202  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   203  	MOVQ	fn+0(FP), DI	// DI = fn
   204  	get_tls(CX)
   205  	MOVQ	g(CX), AX	// AX = g
   206  	MOVQ	g_m(AX), BX	// BX = m
   207  
   208  	MOVQ	m_gsignal(BX), DX	// DX = gsignal
   209  	CMPQ	AX, DX
   210  	JEQ	noswitch
   211  
   212  	MOVQ	m_g0(BX), DX	// DX = g0
   213  	CMPQ	AX, DX
   214  	JEQ	noswitch
   215  
   216  	MOVQ	m_curg(BX), BP
   217  	CMPQ	AX, BP
   218  	JEQ	switch
   219  	
   220  	// Bad: g is not gsignal, not g0, not curg. What is it?
   221  	MOVQ	$runtime·badsystemstack(SB), AX
   222  	CALL	AX
   223  
   224  switch:
   225  	// save our state in g->sched.  Pretend to
   226  	// be systemstack_switch if the G stack is scanned.
   227  	MOVQ	$runtime·systemstack_switch(SB), BP
   228  	MOVQ	BP, (g_sched+gobuf_pc)(AX)
   229  	MOVQ	SP, (g_sched+gobuf_sp)(AX)
   230  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   231  
   232  	// switch to g0
   233  	MOVQ	DX, g(CX)
   234  	MOVQ	(g_sched+gobuf_sp)(DX), BX
   235  	// make it look like mstart called systemstack on g0, to stop traceback
   236  	SUBQ	$8, BX
   237  	MOVQ	$runtime·mstart(SB), DX
   238  	MOVQ	DX, 0(BX)
   239  	MOVQ	BX, SP
   240  
   241  	// call target function
   242  	MOVQ	DI, DX
   243  	MOVQ	0(DI), DI
   244  	CALL	DI
   245  
   246  	// switch back to g
   247  	get_tls(CX)
   248  	MOVQ	g(CX), AX
   249  	MOVQ	g_m(AX), BX
   250  	MOVQ	m_curg(BX), AX
   251  	MOVQ	AX, g(CX)
   252  	MOVQ	(g_sched+gobuf_sp)(AX), SP
   253  	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   254  	RET
   255  
   256  noswitch:
   257  	// already on m stack, just call directly
   258  	MOVQ	DI, DX
   259  	MOVQ	0(DI), DI
   260  	CALL	DI
   261  	RET
   262  
   263  /*
   264   * support for morestack
   265   */
   266  
   267  // Called during function prolog when more stack is needed.
   268  //
   269  // The traceback routines see morestack on a g0 as being
   270  // the top of a stack (for example, morestack calling newstack
   271  // calling the scheduler calling newm calling gc), so we must
   272  // record an argument size. For that purpose, it has no arguments.
   273  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   274  	// Cannot grow scheduler stack (m->g0).
   275  	get_tls(CX)
   276  	MOVQ	g(CX), BX
   277  	MOVQ	g_m(BX), BX
   278  	MOVQ	m_g0(BX), SI
   279  	CMPQ	g(CX), SI
   280  	JNE	2(PC)
   281  	INT	$3
   282  
   283  	// Cannot grow signal stack (m->gsignal).
   284  	MOVQ	m_gsignal(BX), SI
   285  	CMPQ	g(CX), SI
   286  	JNE	2(PC)
   287  	INT	$3
   288  
   289  	// Called from f.
   290  	// Set m->morebuf to f's caller.
   291  	MOVQ	8(SP), AX	// f's caller's PC
   292  	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   293  	LEAQ	16(SP), AX	// f's caller's SP
   294  	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   295  	get_tls(CX)
   296  	MOVQ	g(CX), SI
   297  	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   298  
   299  	// Set g->sched to context in f.
   300  	MOVQ	0(SP), AX // f's PC
   301  	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   302  	MOVQ	SI, (g_sched+gobuf_g)(SI)
   303  	LEAQ	8(SP), AX // f's SP
   304  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   305  	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   306  
   307  	// Call newstack on m->g0's stack.
   308  	MOVQ	m_g0(BX), BP
   309  	MOVQ	BP, g(CX)
   310  	MOVQ	(g_sched+gobuf_sp)(BP), SP
   311  	CALL	runtime·newstack(SB)
   312  	MOVQ	$0, 0x1003	// crash if newstack returns
   313  	RET
   314  
   315  // morestack but not preserving ctxt.
   316  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   317  	MOVL	$0, DX
   318  	JMP	runtime·morestack(SB)
   319  
   320  // reflectcall: call a function with the given argument list
   321  // func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
   322  // we don't have variable-sized frames, so we use a small number
   323  // of constant-sized-frame functions to encode a few bits of size in the pc.
   324  // Caution: ugly multiline assembly macros in your future!
   325  
   326  #define DISPATCH(NAME,MAXSIZE)		\
   327  	CMPQ	CX, $MAXSIZE;		\
   328  	JA	3(PC);			\
   329  	MOVQ	$NAME(SB), AX;		\
   330  	JMP	AX
   331  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   332  
   333  TEXT ·reflectcall(SB), NOSPLIT, $0-24
   334  	MOVLQZX argsize+16(FP), CX
   335  	DISPATCH(runtime·call16, 16)
   336  	DISPATCH(runtime·call32, 32)
   337  	DISPATCH(runtime·call64, 64)
   338  	DISPATCH(runtime·call128, 128)
   339  	DISPATCH(runtime·call256, 256)
   340  	DISPATCH(runtime·call512, 512)
   341  	DISPATCH(runtime·call1024, 1024)
   342  	DISPATCH(runtime·call2048, 2048)
   343  	DISPATCH(runtime·call4096, 4096)
   344  	DISPATCH(runtime·call8192, 8192)
   345  	DISPATCH(runtime·call16384, 16384)
   346  	DISPATCH(runtime·call32768, 32768)
   347  	DISPATCH(runtime·call65536, 65536)
   348  	DISPATCH(runtime·call131072, 131072)
   349  	DISPATCH(runtime·call262144, 262144)
   350  	DISPATCH(runtime·call524288, 524288)
   351  	DISPATCH(runtime·call1048576, 1048576)
   352  	DISPATCH(runtime·call2097152, 2097152)
   353  	DISPATCH(runtime·call4194304, 4194304)
   354  	DISPATCH(runtime·call8388608, 8388608)
   355  	DISPATCH(runtime·call16777216, 16777216)
   356  	DISPATCH(runtime·call33554432, 33554432)
   357  	DISPATCH(runtime·call67108864, 67108864)
   358  	DISPATCH(runtime·call134217728, 134217728)
   359  	DISPATCH(runtime·call268435456, 268435456)
   360  	DISPATCH(runtime·call536870912, 536870912)
   361  	DISPATCH(runtime·call1073741824, 1073741824)
   362  	MOVQ	$runtime·badreflectcall(SB), AX
   363  	JMP	AX
   364  
   365  #define CALLFN(NAME,MAXSIZE)			\
   366  TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   367  	NO_LOCAL_POINTERS;			\
   368  	/* copy arguments to stack */		\
   369  	MOVQ	argptr+8(FP), SI;		\
   370  	MOVLQZX argsize+16(FP), CX;		\
   371  	MOVQ	SP, DI;				\
   372  	REP;MOVSB;				\
   373  	/* call function */			\
   374  	MOVQ	f+0(FP), DX;			\
   375  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   376  	CALL	(DX);				\
   377  	/* copy return values back */		\
   378  	MOVQ	argptr+8(FP), DI;		\
   379  	MOVLQZX	argsize+16(FP), CX;		\
   380  	MOVLQZX retoffset+20(FP), BX;		\
   381  	MOVQ	SP, SI;				\
   382  	ADDQ	BX, DI;				\
   383  	ADDQ	BX, SI;				\
   384  	SUBQ	BX, CX;				\
   385  	REP;MOVSB;				\
   386  	RET
   387  
   388  CALLFN(·call16, 16)
   389  CALLFN(·call32, 32)
   390  CALLFN(·call64, 64)
   391  CALLFN(·call128, 128)
   392  CALLFN(·call256, 256)
   393  CALLFN(·call512, 512)
   394  CALLFN(·call1024, 1024)
   395  CALLFN(·call2048, 2048)
   396  CALLFN(·call4096, 4096)
   397  CALLFN(·call8192, 8192)
   398  CALLFN(·call16384, 16384)
   399  CALLFN(·call32768, 32768)
   400  CALLFN(·call65536, 65536)
   401  CALLFN(·call131072, 131072)
   402  CALLFN(·call262144, 262144)
   403  CALLFN(·call524288, 524288)
   404  CALLFN(·call1048576, 1048576)
   405  CALLFN(·call2097152, 2097152)
   406  CALLFN(·call4194304, 4194304)
   407  CALLFN(·call8388608, 8388608)
   408  CALLFN(·call16777216, 16777216)
   409  CALLFN(·call33554432, 33554432)
   410  CALLFN(·call67108864, 67108864)
   411  CALLFN(·call134217728, 134217728)
   412  CALLFN(·call268435456, 268435456)
   413  CALLFN(·call536870912, 536870912)
   414  CALLFN(·call1073741824, 1073741824)
   415  
   416  // bool cas(int32 *val, int32 old, int32 new)
   417  // Atomically:
   418  //	if(*val == old){
   419  //		*val = new;
   420  //		return 1;
   421  //	} else
   422  //		return 0;
   423  TEXT runtime·cas(SB), NOSPLIT, $0-17
   424  	MOVQ	ptr+0(FP), BX
   425  	MOVL	old+8(FP), AX
   426  	MOVL	new+12(FP), CX
   427  	LOCK
   428  	CMPXCHGL	CX, 0(BX)
   429  	JZ 4(PC)
   430  	MOVL	$0, AX
   431  	MOVB	AX, ret+16(FP)
   432  	RET
   433  	MOVL	$1, AX
   434  	MOVB	AX, ret+16(FP)
   435  	RET
   436  
   437  // bool	runtime·cas64(uint64 *val, uint64 old, uint64 new)
   438  // Atomically:
   439  //	if(*val == *old){
   440  //		*val = new;
   441  //		return 1;
   442  //	} else {
   443  //		return 0;
   444  //	}
   445  TEXT runtime·cas64(SB), NOSPLIT, $0-25
   446  	MOVQ	ptr+0(FP), BX
   447  	MOVQ	old+8(FP), AX
   448  	MOVQ	new+16(FP), CX
   449  	LOCK
   450  	CMPXCHGQ	CX, 0(BX)
   451  	JNZ	fail
   452  	MOVL	$1, AX
   453  	MOVB	AX, ret+24(FP)
   454  	RET
   455  fail:
   456  	MOVL	$0, AX
   457  	MOVB	AX, ret+24(FP)
   458  	RET
   459  	
   460  TEXT runtime·casuintptr(SB), NOSPLIT, $0-25
   461  	JMP	runtime·cas64(SB)
   462  
   463  TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-16
   464  	JMP	runtime·atomicload64(SB)
   465  
   466  TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-16
   467  	JMP	runtime·atomicload64(SB)
   468  
   469  TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
   470  	JMP	runtime·atomicstore64(SB)
   471  
   472  // bool casp(void **val, void *old, void *new)
   473  // Atomically:
   474  //	if(*val == old){
   475  //		*val = new;
   476  //		return 1;
   477  //	} else
   478  //		return 0;
   479  TEXT runtime·casp1(SB), NOSPLIT, $0-25
   480  	MOVQ	ptr+0(FP), BX
   481  	MOVQ	old+8(FP), AX
   482  	MOVQ	new+16(FP), CX
   483  	LOCK
   484  	CMPXCHGQ	CX, 0(BX)
   485  	JZ 4(PC)
   486  	MOVL	$0, AX
   487  	MOVB	AX, ret+24(FP)
   488  	RET
   489  	MOVL	$1, AX
   490  	MOVB	AX, ret+24(FP)
   491  	RET
   492  
   493  // uint32 xadd(uint32 volatile *val, int32 delta)
   494  // Atomically:
   495  //	*val += delta;
   496  //	return *val;
   497  TEXT runtime·xadd(SB), NOSPLIT, $0-20
   498  	MOVQ	ptr+0(FP), BX
   499  	MOVL	delta+8(FP), AX
   500  	MOVL	AX, CX
   501  	LOCK
   502  	XADDL	AX, 0(BX)
   503  	ADDL	CX, AX
   504  	MOVL	AX, ret+16(FP)
   505  	RET
   506  
   507  TEXT runtime·xadd64(SB), NOSPLIT, $0-24
   508  	MOVQ	ptr+0(FP), BX
   509  	MOVQ	delta+8(FP), AX
   510  	MOVQ	AX, CX
   511  	LOCK
   512  	XADDQ	AX, 0(BX)
   513  	ADDQ	CX, AX
   514  	MOVQ	AX, ret+16(FP)
   515  	RET
   516  
   517  TEXT runtime·xchg(SB), NOSPLIT, $0-20
   518  	MOVQ	ptr+0(FP), BX
   519  	MOVL	new+8(FP), AX
   520  	XCHGL	AX, 0(BX)
   521  	MOVL	AX, ret+16(FP)
   522  	RET
   523  
   524  TEXT runtime·xchg64(SB), NOSPLIT, $0-24
   525  	MOVQ	ptr+0(FP), BX
   526  	MOVQ	new+8(FP), AX
   527  	XCHGQ	AX, 0(BX)
   528  	MOVQ	AX, ret+16(FP)
   529  	RET
   530  
   531  TEXT runtime·xchgp1(SB), NOSPLIT, $0-24
   532  	MOVQ	ptr+0(FP), BX
   533  	MOVQ	new+8(FP), AX
   534  	XCHGQ	AX, 0(BX)
   535  	MOVQ	AX, ret+16(FP)
   536  	RET
   537  
   538  TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24
   539  	JMP	runtime·xchg64(SB)
   540  
   541  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   542  	MOVL	cycles+0(FP), AX
   543  again:
   544  	PAUSE
   545  	SUBL	$1, AX
   546  	JNZ	again
   547  	RET
   548  
   549  TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-16
   550  	MOVQ	ptr+0(FP), BX
   551  	MOVQ	val+8(FP), AX
   552  	XCHGQ	AX, 0(BX)
   553  	RET
   554  
   555  TEXT runtime·atomicstore(SB), NOSPLIT, $0-12
   556  	MOVQ	ptr+0(FP), BX
   557  	MOVL	val+8(FP), AX
   558  	XCHGL	AX, 0(BX)
   559  	RET
   560  
   561  TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
   562  	MOVQ	ptr+0(FP), BX
   563  	MOVQ	val+8(FP), AX
   564  	XCHGQ	AX, 0(BX)
   565  	RET
   566  
   567  // void	runtime·atomicor8(byte volatile*, byte);
   568  TEXT runtime·atomicor8(SB), NOSPLIT, $0-9
   569  	MOVQ	ptr+0(FP), AX
   570  	MOVB	val+8(FP), BX
   571  	LOCK
   572  	ORB	BX, (AX)
   573  	RET
   574  
   575  // void jmpdefer(fn, sp);
   576  // called from deferreturn.
   577  // 1. pop the caller
   578  // 2. sub 5 bytes from the callers return
   579  // 3. jmp to the argument
   580  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   581  	MOVQ	fv+0(FP), DX	// fn
   582  	MOVQ	argp+8(FP), BX	// caller sp
   583  	LEAQ	-8(BX), SP	// caller sp after CALL
   584  	SUBQ	$5, (SP)	// return to CALL again
   585  	MOVQ	0(DX), BX
   586  	JMP	BX	// but first run the deferred function
   587  
   588  // Save state of caller into g->sched. Smashes R8, R9.
   589  TEXT gosave<>(SB),NOSPLIT,$0
   590  	get_tls(R8)
   591  	MOVQ	g(R8), R8
   592  	MOVQ	0(SP), R9
   593  	MOVQ	R9, (g_sched+gobuf_pc)(R8)
   594  	LEAQ	8(SP), R9
   595  	MOVQ	R9, (g_sched+gobuf_sp)(R8)
   596  	MOVQ	$0, (g_sched+gobuf_ret)(R8)
   597  	MOVQ	$0, (g_sched+gobuf_ctxt)(R8)
   598  	RET
   599  
   600  // asmcgocall(void(*fn)(void*), void *arg)
   601  // Call fn(arg) on the scheduler stack,
   602  // aligned appropriately for the gcc ABI.
   603  // See cgocall.c for more details.
   604  TEXT ·asmcgocall(SB),NOSPLIT,$0-16
   605  	MOVQ	fn+0(FP), AX
   606  	MOVQ	arg+8(FP), BX
   607  	CALL	asmcgocall<>(SB)
   608  	RET
   609  
   610  TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-20
   611  	MOVQ	fn+0(FP), AX
   612  	MOVQ	arg+8(FP), BX
   613  	CALL	asmcgocall<>(SB)
   614  	MOVL	AX, ret+16(FP)
   615  	RET
   616  
   617  // asmcgocall common code. fn in AX, arg in BX. returns errno in AX.
   618  TEXT asmcgocall<>(SB),NOSPLIT,$0-0
   619  	MOVQ	SP, DX
   620  
   621  	// Figure out if we need to switch to m->g0 stack.
   622  	// We get called to create new OS threads too, and those
   623  	// come in on the m->g0 stack already.
   624  	get_tls(CX)
   625  	MOVQ	g(CX), BP
   626  	MOVQ	g_m(BP), BP
   627  	MOVQ	m_g0(BP), SI
   628  	MOVQ	g(CX), DI
   629  	CMPQ	SI, DI
   630  	JEQ	nosave
   631  	MOVQ	m_gsignal(BP), SI
   632  	CMPQ	SI, DI
   633  	JEQ	nosave
   634  	
   635  	MOVQ	m_g0(BP), SI
   636  	CALL	gosave<>(SB)
   637  	MOVQ	SI, g(CX)
   638  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   639  nosave:
   640  
   641  	// Now on a scheduling stack (a pthread-created stack).
   642  	// Make sure we have enough room for 4 stack-backed fast-call
   643  	// registers as per windows amd64 calling convention.
   644  	SUBQ	$64, SP
   645  	ANDQ	$~15, SP	// alignment for gcc ABI
   646  	MOVQ	DI, 48(SP)	// save g
   647  	MOVQ	(g_stack+stack_hi)(DI), DI
   648  	SUBQ	DX, DI
   649  	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   650  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   651  	MOVQ	BX, CX		// CX = first argument in Win64
   652  	CALL	AX
   653  
   654  	// Restore registers, g, stack pointer.
   655  	get_tls(CX)
   656  	MOVQ	48(SP), DI
   657  	MOVQ	(g_stack+stack_hi)(DI), SI
   658  	SUBQ	40(SP), SI
   659  	MOVQ	DI, g(CX)
   660  	MOVQ	SI, SP
   661  	RET
   662  
   663  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   664  // Turn the fn into a Go func (by taking its address) and call
   665  // cgocallback_gofunc.
   666  TEXT runtime·cgocallback(SB),NOSPLIT,$24-24
   667  	LEAQ	fn+0(FP), AX
   668  	MOVQ	AX, 0(SP)
   669  	MOVQ	frame+8(FP), AX
   670  	MOVQ	AX, 8(SP)
   671  	MOVQ	framesize+16(FP), AX
   672  	MOVQ	AX, 16(SP)
   673  	MOVQ	$runtime·cgocallback_gofunc(SB), AX
   674  	CALL	AX
   675  	RET
   676  
   677  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   678  // See cgocall.c for more details.
   679  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24
   680  	NO_LOCAL_POINTERS
   681  
   682  	// If g is nil, Go did not create the current thread.
   683  	// Call needm to obtain one m for temporary use.
   684  	// In this case, we're running on the thread stack, so there's
   685  	// lots of space, but the linker doesn't know. Hide the call from
   686  	// the linker analysis by using an indirect call through AX.
   687  	get_tls(CX)
   688  #ifdef GOOS_windows
   689  	MOVL	$0, BP
   690  	CMPQ	CX, $0
   691  	JEQ	2(PC)
   692  #endif
   693  	MOVQ	g(CX), BP
   694  	CMPQ	BP, $0
   695  	JEQ	needm
   696  	MOVQ	g_m(BP), BP
   697  	MOVQ	BP, R8 // holds oldm until end of function
   698  	JMP	havem
   699  needm:
   700  	MOVQ	$0, 0(SP)
   701  	MOVQ	$runtime·needm(SB), AX
   702  	CALL	AX
   703  	MOVQ	0(SP), R8
   704  	get_tls(CX)
   705  	MOVQ	g(CX), BP
   706  	MOVQ	g_m(BP), BP
   707  	
   708  	// Set m->sched.sp = SP, so that if a panic happens
   709  	// during the function we are about to execute, it will
   710  	// have a valid SP to run on the g0 stack.
   711  	// The next few lines (after the havem label)
   712  	// will save this SP onto the stack and then write
   713  	// the same SP back to m->sched.sp. That seems redundant,
   714  	// but if an unrecovered panic happens, unwindm will
   715  	// restore the g->sched.sp from the stack location
   716  	// and then systemstack will try to use it. If we don't set it here,
   717  	// that restored SP will be uninitialized (typically 0) and
   718  	// will not be usable.
   719  	MOVQ	m_g0(BP), SI
   720  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   721  
   722  havem:
   723  	// Now there's a valid m, and we're running on its m->g0.
   724  	// Save current m->g0->sched.sp on stack and then set it to SP.
   725  	// Save current sp in m->g0->sched.sp in preparation for
   726  	// switch back to m->curg stack.
   727  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   728  	MOVQ	m_g0(BP), SI
   729  	MOVQ	(g_sched+gobuf_sp)(SI), AX
   730  	MOVQ	AX, 0(SP)
   731  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   732  
   733  	// Switch to m->curg stack and call runtime.cgocallbackg.
   734  	// Because we are taking over the execution of m->curg
   735  	// but *not* resuming what had been running, we need to
   736  	// save that information (m->curg->sched) so we can restore it.
   737  	// We can restore m->curg->sched.sp easily, because calling
   738  	// runtime.cgocallbackg leaves SP unchanged upon return.
   739  	// To save m->curg->sched.pc, we push it onto the stack.
   740  	// This has the added benefit that it looks to the traceback
   741  	// routine like cgocallbackg is going to return to that
   742  	// PC (because the frame we allocate below has the same
   743  	// size as cgocallback_gofunc's frame declared above)
   744  	// so that the traceback will seamlessly trace back into
   745  	// the earlier calls.
   746  	//
   747  	// In the new goroutine, 0(SP) holds the saved R8.
   748  	MOVQ	m_curg(BP), SI
   749  	MOVQ	SI, g(CX)
   750  	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   751  	MOVQ	(g_sched+gobuf_pc)(SI), BP
   752  	MOVQ	BP, -8(DI)
   753  	LEAQ	-(8+8)(DI), SP
   754  	MOVQ	R8, 0(SP)
   755  	CALL	runtime·cgocallbackg(SB)
   756  	MOVQ	0(SP), R8
   757  
   758  	// Restore g->sched (== m->curg->sched) from saved values.
   759  	get_tls(CX)
   760  	MOVQ	g(CX), SI
   761  	MOVQ	8(SP), BP
   762  	MOVQ	BP, (g_sched+gobuf_pc)(SI)
   763  	LEAQ	(8+8)(SP), DI
   764  	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   765  
   766  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   767  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   768  	// so we do not have to restore it.)
   769  	MOVQ	g(CX), BP
   770  	MOVQ	g_m(BP), BP
   771  	MOVQ	m_g0(BP), SI
   772  	MOVQ	SI, g(CX)
   773  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   774  	MOVQ	0(SP), AX
   775  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   776  	
   777  	// If the m on entry was nil, we called needm above to borrow an m
   778  	// for the duration of the call. Since the call is over, return it with dropm.
   779  	CMPQ	R8, $0
   780  	JNE 3(PC)
   781  	MOVQ	$runtime·dropm(SB), AX
   782  	CALL	AX
   783  
   784  	// Done!
   785  	RET
   786  
   787  // void setg(G*); set g. for use by needm.
   788  TEXT runtime·setg(SB), NOSPLIT, $0-8
   789  	MOVQ	gg+0(FP), BX
   790  #ifdef GOOS_windows
   791  	CMPQ	BX, $0
   792  	JNE	settls
   793  	MOVQ	$0, 0x28(GS)
   794  	RET
   795  settls:
   796  	MOVQ	g_m(BX), AX
   797  	LEAQ	m_tls(AX), AX
   798  	MOVQ	AX, 0x28(GS)
   799  #endif
   800  	get_tls(CX)
   801  	MOVQ	BX, g(CX)
   802  	RET
   803  
   804  // void setg_gcc(G*); set g called from gcc.
   805  TEXT setg_gcc<>(SB),NOSPLIT,$0
   806  	get_tls(AX)
   807  	MOVQ	DI, g(AX)
   808  	RET
   809  
   810  // check that SP is in range [g->stack.lo, g->stack.hi)
   811  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   812  	get_tls(CX)
   813  	MOVQ	g(CX), AX
   814  	CMPQ	(g_stack+stack_hi)(AX), SP
   815  	JHI	2(PC)
   816  	INT	$3
   817  	CMPQ	SP, (g_stack+stack_lo)(AX)
   818  	JHI	2(PC)
   819  	INT	$3
   820  	RET
   821  
   822  TEXT runtime·getcallerpc(SB),NOSPLIT,$0-16
   823  	MOVQ	argp+0(FP),AX		// addr of first arg
   824  	MOVQ	-8(AX),AX		// get calling pc
   825  	MOVQ	AX, ret+8(FP)
   826  	RET
   827  
   828  TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-16
   829  	MOVQ	p+0(FP),AX		// addr of first arg
   830  	MOVQ	-8(AX),AX		// get calling pc
   831  	MOVQ	AX,ret+8(FP)
   832  	RET
   833  
   834  TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
   835  	MOVQ	argp+0(FP),AX		// addr of first arg
   836  	MOVQ	pc+8(FP), BX
   837  	MOVQ	BX, -8(AX)		// set calling pc
   838  	RET
   839  
   840  TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
   841  	MOVQ	argp+0(FP), AX
   842  	MOVQ	AX, ret+8(FP)
   843  	RET
   844  
   845  // func gogetcallersp(p unsafe.Pointer) uintptr
   846  TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-16
   847  	MOVQ	p+0(FP),AX		// addr of first arg
   848  	MOVQ	AX, ret+8(FP)
   849  	RET
   850  
   851  // int64 runtime·cputicks(void)
   852  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   853  	RDTSC
   854  	SHLQ	$32, DX
   855  	ADDQ	DX, AX
   856  	MOVQ	AX, ret+0(FP)
   857  	RET
   858  
   859  // hash function using AES hardware instructions
   860  TEXT runtime·aeshash(SB),NOSPLIT,$0-32
   861  	MOVQ	p+0(FP), AX	// ptr to data
   862  	MOVQ	s+8(FP), CX	// size
   863  	JMP	runtime·aeshashbody(SB)
   864  
   865  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-32
   866  	MOVQ	p+0(FP), AX	// ptr to string struct
   867  	// s+8(FP) is ignored, it is always sizeof(String)
   868  	MOVQ	8(AX), CX	// length of string
   869  	MOVQ	(AX), AX	// string data
   870  	JMP	runtime·aeshashbody(SB)
   871  
   872  // AX: data
   873  // CX: length
   874  TEXT runtime·aeshashbody(SB),NOSPLIT,$0-32
   875  	MOVQ	h+16(FP), X0	// seed to low 64 bits of xmm0
   876  	PINSRQ	$1, CX, X0	// size to high 64 bits of xmm0
   877  	MOVO	runtime·aeskeysched+0(SB), X2
   878  	MOVO	runtime·aeskeysched+16(SB), X3
   879  	CMPQ	CX, $16
   880  	JB	small
   881  loop:
   882  	CMPQ	CX, $16
   883  	JBE	loopend
   884  	MOVOU	(AX), X1
   885  	AESENC	X2, X0
   886  	AESENC	X1, X0
   887  	SUBQ	$16, CX
   888  	ADDQ	$16, AX
   889  	JMP	loop
   890  // 1-16 bytes remaining
   891  loopend:
   892  	// This load may overlap with the previous load above.
   893  	// We'll hash some bytes twice, but that's ok.
   894  	MOVOU	-16(AX)(CX*1), X1
   895  	JMP	partial
   896  // 0-15 bytes
   897  small:
   898  	TESTQ	CX, CX
   899  	JE	finalize	// 0 bytes
   900  
   901  	CMPB	AX, $0xf0
   902  	JA	highpartial
   903  
   904  	// 16 bytes loaded at this address won't cross
   905  	// a page boundary, so we can load it directly.
   906  	MOVOU	(AX), X1
   907  	ADDQ	CX, CX
   908  	MOVQ	$masks<>(SB), BP
   909  	PAND	(BP)(CX*8), X1
   910  	JMP	partial
   911  highpartial:
   912  	// address ends in 1111xxxx.  Might be up against
   913  	// a page boundary, so load ending at last byte.
   914  	// Then shift bytes down using pshufb.
   915  	MOVOU	-16(AX)(CX*1), X1
   916  	ADDQ	CX, CX
   917  	MOVQ	$shifts<>(SB), BP
   918  	PSHUFB	(BP)(CX*8), X1
   919  partial:
   920  	// incorporate partial block into hash
   921  	AESENC	X3, X0
   922  	AESENC	X1, X0
   923  finalize:	
   924  	// finalize hash
   925  	AESENC	X2, X0
   926  	AESENC	X3, X0
   927  	AESENC	X2, X0
   928  	MOVQ	X0, res+24(FP)
   929  	RET
   930  
   931  TEXT runtime·aeshash32(SB),NOSPLIT,$0-32
   932  	MOVQ	p+0(FP), AX	// ptr to data
   933  	// s+8(FP) is ignored, it is always sizeof(int32)
   934  	MOVQ	h+16(FP), X0	// seed
   935  	PINSRD	$2, (AX), X0	// data
   936  	AESENC	runtime·aeskeysched+0(SB), X0
   937  	AESENC	runtime·aeskeysched+16(SB), X0
   938  	AESENC	runtime·aeskeysched+0(SB), X0
   939  	MOVQ	X0, ret+24(FP)
   940  	RET
   941  
   942  TEXT runtime·aeshash64(SB),NOSPLIT,$0-32
   943  	MOVQ	p+0(FP), AX	// ptr to data
   944  	// s+8(FP) is ignored, it is always sizeof(int64)
   945  	MOVQ	h+16(FP), X0	// seed
   946  	PINSRQ	$1, (AX), X0	// data
   947  	AESENC	runtime·aeskeysched+0(SB), X0
   948  	AESENC	runtime·aeskeysched+16(SB), X0
   949  	AESENC	runtime·aeskeysched+0(SB), X0
   950  	MOVQ	X0, ret+24(FP)
   951  	RET
   952  
   953  // simple mask to get rid of data in the high part of the register.
   954  DATA masks<>+0x00(SB)/8, $0x0000000000000000
   955  DATA masks<>+0x08(SB)/8, $0x0000000000000000
   956  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
   957  DATA masks<>+0x18(SB)/8, $0x0000000000000000
   958  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
   959  DATA masks<>+0x28(SB)/8, $0x0000000000000000
   960  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
   961  DATA masks<>+0x38(SB)/8, $0x0000000000000000
   962  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
   963  DATA masks<>+0x48(SB)/8, $0x0000000000000000
   964  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
   965  DATA masks<>+0x58(SB)/8, $0x0000000000000000
   966  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
   967  DATA masks<>+0x68(SB)/8, $0x0000000000000000
   968  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
   969  DATA masks<>+0x78(SB)/8, $0x0000000000000000
   970  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
   971  DATA masks<>+0x88(SB)/8, $0x0000000000000000
   972  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
   973  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
   974  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
   975  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
   976  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
   977  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
   978  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
   979  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
   980  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
   981  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
   982  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
   983  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
   984  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
   985  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
   986  GLOBL masks<>(SB),RODATA,$256
   987  
   988  // these are arguments to pshufb.  They move data down from
   989  // the high bytes of the register to the low bytes of the register.
   990  // index is how many bytes to move.
   991  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
   992  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
   993  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
   994  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
   995  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
   996  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
   997  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
   998  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
   999  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1000  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1001  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1002  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1003  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1004  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1005  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1006  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1007  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1008  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1009  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1010  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1011  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1012  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1013  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1014  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1015  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1016  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1017  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1018  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1019  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1020  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1021  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1022  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1023  GLOBL shifts<>(SB),RODATA,$256
  1024  
  1025  TEXT runtime·memeq(SB),NOSPLIT,$0-25
  1026  	MOVQ	a+0(FP), SI
  1027  	MOVQ	b+8(FP), DI
  1028  	MOVQ	size+16(FP), BX
  1029  	CALL	runtime·memeqbody(SB)
  1030  	MOVB	AX, ret+24(FP)
  1031  	RET
  1032  
  1033  // eqstring tests whether two strings are equal.
  1034  // See runtime_test.go:eqstring_generic for
  1035  // equivalent Go code.
  1036  TEXT runtime·eqstring(SB),NOSPLIT,$0-33
  1037  	MOVQ	s1len+8(FP), AX
  1038  	MOVQ	s2len+24(FP), BX
  1039  	CMPQ	AX, BX
  1040  	JNE	noteq
  1041  	MOVQ	s1str+0(FP), SI
  1042  	MOVQ	s2str+16(FP), DI
  1043  	CMPQ	SI, DI
  1044  	JEQ	eq
  1045  	CALL	runtime·memeqbody(SB)
  1046  	MOVB	AX, v+32(FP)
  1047  	RET
  1048  eq:
  1049  	MOVB	$1, v+32(FP)
  1050  	RET
  1051  noteq:
  1052  	MOVB	$0, v+32(FP)
  1053  	RET
  1054  
  1055  // a in SI
  1056  // b in DI
  1057  // count in BX
  1058  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1059  	XORQ	AX, AX
  1060  
  1061  	CMPQ	BX, $8
  1062  	JB	small
  1063  	
  1064  	// 64 bytes at a time using xmm registers
  1065  hugeloop:
  1066  	CMPQ	BX, $64
  1067  	JB	bigloop
  1068  	MOVOU	(SI), X0
  1069  	MOVOU	(DI), X1
  1070  	MOVOU	16(SI), X2
  1071  	MOVOU	16(DI), X3
  1072  	MOVOU	32(SI), X4
  1073  	MOVOU	32(DI), X5
  1074  	MOVOU	48(SI), X6
  1075  	MOVOU	48(DI), X7
  1076  	PCMPEQB	X1, X0
  1077  	PCMPEQB	X3, X2
  1078  	PCMPEQB	X5, X4
  1079  	PCMPEQB	X7, X6
  1080  	PAND	X2, X0
  1081  	PAND	X6, X4
  1082  	PAND	X4, X0
  1083  	PMOVMSKB X0, DX
  1084  	ADDQ	$64, SI
  1085  	ADDQ	$64, DI
  1086  	SUBQ	$64, BX
  1087  	CMPL	DX, $0xffff
  1088  	JEQ	hugeloop
  1089  	RET
  1090  
  1091  	// 8 bytes at a time using 64-bit register
  1092  bigloop:
  1093  	CMPQ	BX, $8
  1094  	JBE	leftover
  1095  	MOVQ	(SI), CX
  1096  	MOVQ	(DI), DX
  1097  	ADDQ	$8, SI
  1098  	ADDQ	$8, DI
  1099  	SUBQ	$8, BX
  1100  	CMPQ	CX, DX
  1101  	JEQ	bigloop
  1102  	RET
  1103  
  1104  	// remaining 0-8 bytes
  1105  leftover:
  1106  	MOVQ	-8(SI)(BX*1), CX
  1107  	MOVQ	-8(DI)(BX*1), DX
  1108  	CMPQ	CX, DX
  1109  	SETEQ	AX
  1110  	RET
  1111  
  1112  small:
  1113  	CMPQ	BX, $0
  1114  	JEQ	equal
  1115  
  1116  	LEAQ	0(BX*8), CX
  1117  	NEGQ	CX
  1118  
  1119  	CMPB	SI, $0xf8
  1120  	JA	si_high
  1121  
  1122  	// load at SI won't cross a page boundary.
  1123  	MOVQ	(SI), SI
  1124  	JMP	si_finish
  1125  si_high:
  1126  	// address ends in 11111xxx.  Load up to bytes we want, move to correct position.
  1127  	MOVQ	-8(SI)(BX*1), SI
  1128  	SHRQ	CX, SI
  1129  si_finish:
  1130  
  1131  	// same for DI.
  1132  	CMPB	DI, $0xf8
  1133  	JA	di_high
  1134  	MOVQ	(DI), DI
  1135  	JMP	di_finish
  1136  di_high:
  1137  	MOVQ	-8(DI)(BX*1), DI
  1138  	SHRQ	CX, DI
  1139  di_finish:
  1140  
  1141  	SUBQ	SI, DI
  1142  	SHLQ	CX, DI
  1143  equal:
  1144  	SETEQ	AX
  1145  	RET
  1146  
  1147  TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
  1148  	MOVQ	s1_base+0(FP), SI
  1149  	MOVQ	s1_len+8(FP), BX
  1150  	MOVQ	s2_base+16(FP), DI
  1151  	MOVQ	s2_len+24(FP), DX
  1152  	CALL	runtime·cmpbody(SB)
  1153  	MOVQ	AX, ret+32(FP)
  1154  	RET
  1155  
  1156  TEXT runtime·cmpbytes(SB),NOSPLIT,$0-56
  1157  	MOVQ	s1+0(FP), SI
  1158  	MOVQ	s1+8(FP), BX
  1159  	MOVQ	s2+24(FP), DI
  1160  	MOVQ	s2+32(FP), DX
  1161  	CALL	runtime·cmpbody(SB)
  1162  	MOVQ	AX, res+48(FP)
  1163  	RET
  1164  
  1165  // input:
  1166  //   SI = a
  1167  //   DI = b
  1168  //   BX = alen
  1169  //   DX = blen
  1170  // output:
  1171  //   AX = 1/0/-1
  1172  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1173  	CMPQ	SI, DI
  1174  	JEQ	allsame
  1175  	CMPQ	BX, DX
  1176  	MOVQ	DX, BP
  1177  	CMOVQLT	BX, BP // BP = min(alen, blen) = # of bytes to compare
  1178  	CMPQ	BP, $8
  1179  	JB	small
  1180  
  1181  loop:
  1182  	CMPQ	BP, $16
  1183  	JBE	_0through16
  1184  	MOVOU	(SI), X0
  1185  	MOVOU	(DI), X1
  1186  	PCMPEQB X0, X1
  1187  	PMOVMSKB X1, AX
  1188  	XORQ	$0xffff, AX	// convert EQ to NE
  1189  	JNE	diff16	// branch if at least one byte is not equal
  1190  	ADDQ	$16, SI
  1191  	ADDQ	$16, DI
  1192  	SUBQ	$16, BP
  1193  	JMP	loop
  1194  	
  1195  	// AX = bit mask of differences
  1196  diff16:
  1197  	BSFQ	AX, BX	// index of first byte that differs
  1198  	XORQ	AX, AX
  1199  	MOVB	(SI)(BX*1), CX
  1200  	CMPB	CX, (DI)(BX*1)
  1201  	SETHI	AX
  1202  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
  1203  	RET
  1204  
  1205  	// 0 through 16 bytes left, alen>=8, blen>=8
  1206  _0through16:
  1207  	CMPQ	BP, $8
  1208  	JBE	_0through8
  1209  	MOVQ	(SI), AX
  1210  	MOVQ	(DI), CX
  1211  	CMPQ	AX, CX
  1212  	JNE	diff8
  1213  _0through8:
  1214  	MOVQ	-8(SI)(BP*1), AX
  1215  	MOVQ	-8(DI)(BP*1), CX
  1216  	CMPQ	AX, CX
  1217  	JEQ	allsame
  1218  
  1219  	// AX and CX contain parts of a and b that differ.
  1220  diff8:
  1221  	BSWAPQ	AX	// reverse order of bytes
  1222  	BSWAPQ	CX
  1223  	XORQ	AX, CX
  1224  	BSRQ	CX, CX	// index of highest bit difference
  1225  	SHRQ	CX, AX	// move a's bit to bottom
  1226  	ANDQ	$1, AX	// mask bit
  1227  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
  1228  	RET
  1229  
  1230  	// 0-7 bytes in common
  1231  small:
  1232  	LEAQ	(BP*8), CX	// bytes left -> bits left
  1233  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
  1234  	JEQ	allsame
  1235  
  1236  	// load bytes of a into high bytes of AX
  1237  	CMPB	SI, $0xf8
  1238  	JA	si_high
  1239  	MOVQ	(SI), SI
  1240  	JMP	si_finish
  1241  si_high:
  1242  	MOVQ	-8(SI)(BP*1), SI
  1243  	SHRQ	CX, SI
  1244  si_finish:
  1245  	SHLQ	CX, SI
  1246  
  1247  	// load bytes of b in to high bytes of BX
  1248  	CMPB	DI, $0xf8
  1249  	JA	di_high
  1250  	MOVQ	(DI), DI
  1251  	JMP	di_finish
  1252  di_high:
  1253  	MOVQ	-8(DI)(BP*1), DI
  1254  	SHRQ	CX, DI
  1255  di_finish:
  1256  	SHLQ	CX, DI
  1257  
  1258  	BSWAPQ	SI	// reverse order of bytes
  1259  	BSWAPQ	DI
  1260  	XORQ	SI, DI	// find bit differences
  1261  	JEQ	allsame
  1262  	BSRQ	DI, CX	// index of highest bit difference
  1263  	SHRQ	CX, SI	// move a's bit to bottom
  1264  	ANDQ	$1, SI	// mask bit
  1265  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
  1266  	RET
  1267  
  1268  allsame:
  1269  	XORQ	AX, AX
  1270  	XORQ	CX, CX
  1271  	CMPQ	BX, DX
  1272  	SETGT	AX	// 1 if alen > blen
  1273  	SETEQ	CX	// 1 if alen == blen
  1274  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
  1275  	RET
  1276  
  1277  TEXT bytes·IndexByte(SB),NOSPLIT,$0
  1278  	MOVQ s+0(FP), SI
  1279  	MOVQ s_len+8(FP), BX
  1280  	MOVB c+24(FP), AL
  1281  	CALL runtime·indexbytebody(SB)
  1282  	MOVQ AX, ret+32(FP)
  1283  	RET
  1284  
  1285  TEXT strings·IndexByte(SB),NOSPLIT,$0
  1286  	MOVQ s+0(FP), SI
  1287  	MOVQ s_len+8(FP), BX
  1288  	MOVB c+16(FP), AL
  1289  	CALL runtime·indexbytebody(SB)
  1290  	MOVQ AX, ret+24(FP)
  1291  	RET
  1292  
  1293  // input:
  1294  //   SI: data
  1295  //   BX: data len
  1296  //   AL: byte sought
  1297  // output:
  1298  //   AX
  1299  TEXT runtime·indexbytebody(SB),NOSPLIT,$0
  1300  	MOVQ SI, DI
  1301  
  1302  	CMPQ BX, $16
  1303  	JLT small
  1304  
  1305  	// round up to first 16-byte boundary
  1306  	TESTQ $15, SI
  1307  	JZ aligned
  1308  	MOVQ SI, CX
  1309  	ANDQ $~15, CX
  1310  	ADDQ $16, CX
  1311  
  1312  	// search the beginning
  1313  	SUBQ SI, CX
  1314  	REPN; SCASB
  1315  	JZ success
  1316  
  1317  // DI is 16-byte aligned; get ready to search using SSE instructions
  1318  aligned:
  1319  	// round down to last 16-byte boundary
  1320  	MOVQ BX, R11
  1321  	ADDQ SI, R11
  1322  	ANDQ $~15, R11
  1323  
  1324  	// shuffle X0 around so that each byte contains c
  1325  	MOVD AX, X0
  1326  	PUNPCKLBW X0, X0
  1327  	PUNPCKLBW X0, X0
  1328  	PSHUFL $0, X0, X0
  1329  	JMP condition
  1330  
  1331  sse:
  1332  	// move the next 16-byte chunk of the buffer into X1
  1333  	MOVO (DI), X1
  1334  	// compare bytes in X0 to X1
  1335  	PCMPEQB X0, X1
  1336  	// take the top bit of each byte in X1 and put the result in DX
  1337  	PMOVMSKB X1, DX
  1338  	TESTL DX, DX
  1339  	JNZ ssesuccess
  1340  	ADDQ $16, DI
  1341  
  1342  condition:
  1343  	CMPQ DI, R11
  1344  	JLT sse
  1345  
  1346  	// search the end
  1347  	MOVQ SI, CX
  1348  	ADDQ BX, CX
  1349  	SUBQ R11, CX
  1350  	// if CX == 0, the zero flag will be set and we'll end up
  1351  	// returning a false success
  1352  	JZ failure
  1353  	REPN; SCASB
  1354  	JZ success
  1355  
  1356  failure:
  1357  	MOVQ $-1, AX
  1358  	RET
  1359  
  1360  // handle for lengths < 16
  1361  small:
  1362  	MOVQ BX, CX
  1363  	REPN; SCASB
  1364  	JZ success
  1365  	MOVQ $-1, AX
  1366  	RET
  1367  
  1368  // we've found the chunk containing the byte
  1369  // now just figure out which specific byte it is
  1370  ssesuccess:
  1371  	// get the index of the least significant set bit
  1372  	BSFW DX, DX
  1373  	SUBQ SI, DI
  1374  	ADDQ DI, DX
  1375  	MOVQ DX, AX
  1376  	RET
  1377  
  1378  success:
  1379  	SUBQ SI, DI
  1380  	SUBL $1, DI
  1381  	MOVQ DI, AX
  1382  	RET
  1383  
  1384  TEXT bytes·Equal(SB),NOSPLIT,$0-49
  1385  	MOVQ	a_len+8(FP), BX
  1386  	MOVQ	b_len+32(FP), CX
  1387  	XORQ	AX, AX
  1388  	CMPQ	BX, CX
  1389  	JNE	eqret
  1390  	MOVQ	a+0(FP), SI
  1391  	MOVQ	b+24(FP), DI
  1392  	CALL	runtime·memeqbody(SB)
  1393  eqret:
  1394  	MOVB	AX, ret+48(FP)
  1395  	RET
  1396  
  1397  // A Duff's device for zeroing memory.
  1398  // The compiler jumps to computed addresses within
  1399  // this routine to zero chunks of memory.  Do not
  1400  // change this code without also changing the code
  1401  // in ../../cmd/6g/ggen.c:clearfat.
  1402  // AX: zero
  1403  // DI: ptr to memory to be zeroed
  1404  // DI is updated as a side effect.
  1405  TEXT runtime·duffzero(SB), NOSPLIT, $0-0
  1406  	STOSQ
  1407  	STOSQ
  1408  	STOSQ
  1409  	STOSQ
  1410  	STOSQ
  1411  	STOSQ
  1412  	STOSQ
  1413  	STOSQ
  1414  	STOSQ
  1415  	STOSQ
  1416  	STOSQ
  1417  	STOSQ
  1418  	STOSQ
  1419  	STOSQ
  1420  	STOSQ
  1421  	STOSQ
  1422  	STOSQ
  1423  	STOSQ
  1424  	STOSQ
  1425  	STOSQ
  1426  	STOSQ
  1427  	STOSQ
  1428  	STOSQ
  1429  	STOSQ
  1430  	STOSQ
  1431  	STOSQ
  1432  	STOSQ
  1433  	STOSQ
  1434  	STOSQ
  1435  	STOSQ
  1436  	STOSQ
  1437  	STOSQ
  1438  	STOSQ
  1439  	STOSQ
  1440  	STOSQ
  1441  	STOSQ
  1442  	STOSQ
  1443  	STOSQ
  1444  	STOSQ
  1445  	STOSQ
  1446  	STOSQ
  1447  	STOSQ
  1448  	STOSQ
  1449  	STOSQ
  1450  	STOSQ
  1451  	STOSQ
  1452  	STOSQ
  1453  	STOSQ
  1454  	STOSQ
  1455  	STOSQ
  1456  	STOSQ
  1457  	STOSQ
  1458  	STOSQ
  1459  	STOSQ
  1460  	STOSQ
  1461  	STOSQ
  1462  	STOSQ
  1463  	STOSQ
  1464  	STOSQ
  1465  	STOSQ
  1466  	STOSQ
  1467  	STOSQ
  1468  	STOSQ
  1469  	STOSQ
  1470  	STOSQ
  1471  	STOSQ
  1472  	STOSQ
  1473  	STOSQ
  1474  	STOSQ
  1475  	STOSQ
  1476  	STOSQ
  1477  	STOSQ
  1478  	STOSQ
  1479  	STOSQ
  1480  	STOSQ
  1481  	STOSQ
  1482  	STOSQ
  1483  	STOSQ
  1484  	STOSQ
  1485  	STOSQ
  1486  	STOSQ
  1487  	STOSQ
  1488  	STOSQ
  1489  	STOSQ
  1490  	STOSQ
  1491  	STOSQ
  1492  	STOSQ
  1493  	STOSQ
  1494  	STOSQ
  1495  	STOSQ
  1496  	STOSQ
  1497  	STOSQ
  1498  	STOSQ
  1499  	STOSQ
  1500  	STOSQ
  1501  	STOSQ
  1502  	STOSQ
  1503  	STOSQ
  1504  	STOSQ
  1505  	STOSQ
  1506  	STOSQ
  1507  	STOSQ
  1508  	STOSQ
  1509  	STOSQ
  1510  	STOSQ
  1511  	STOSQ
  1512  	STOSQ
  1513  	STOSQ
  1514  	STOSQ
  1515  	STOSQ
  1516  	STOSQ
  1517  	STOSQ
  1518  	STOSQ
  1519  	STOSQ
  1520  	STOSQ
  1521  	STOSQ
  1522  	STOSQ
  1523  	STOSQ
  1524  	STOSQ
  1525  	STOSQ
  1526  	STOSQ
  1527  	STOSQ
  1528  	STOSQ
  1529  	STOSQ
  1530  	STOSQ
  1531  	STOSQ
  1532  	STOSQ
  1533  	STOSQ
  1534  	RET
  1535  
  1536  // A Duff's device for copying memory.
  1537  // The compiler jumps to computed addresses within
  1538  // this routine to copy chunks of memory.  Source
  1539  // and destination must not overlap.  Do not
  1540  // change this code without also changing the code
  1541  // in ../../cmd/6g/cgen.c:sgen.
  1542  // SI: ptr to source memory
  1543  // DI: ptr to destination memory
  1544  // SI and DI are updated as a side effect.
  1545  
  1546  // NOTE: this is equivalent to a sequence of MOVSQ but
  1547  // for some reason that is 3.5x slower than this code.
  1548  // The STOSQ above seem fine, though.
  1549  TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
  1550  	MOVQ	(SI),CX
  1551  	ADDQ	$8,SI
  1552  	MOVQ	CX,(DI)
  1553  	ADDQ	$8,DI
  1554  
  1555  	MOVQ	(SI),CX
  1556  	ADDQ	$8,SI
  1557  	MOVQ	CX,(DI)
  1558  	ADDQ	$8,DI
  1559  
  1560  	MOVQ	(SI),CX
  1561  	ADDQ	$8,SI
  1562  	MOVQ	CX,(DI)
  1563  	ADDQ	$8,DI
  1564  
  1565  	MOVQ	(SI),CX
  1566  	ADDQ	$8,SI
  1567  	MOVQ	CX,(DI)
  1568  	ADDQ	$8,DI
  1569  
  1570  	MOVQ	(SI),CX
  1571  	ADDQ	$8,SI
  1572  	MOVQ	CX,(DI)
  1573  	ADDQ	$8,DI
  1574  
  1575  	MOVQ	(SI),CX
  1576  	ADDQ	$8,SI
  1577  	MOVQ	CX,(DI)
  1578  	ADDQ	$8,DI
  1579  
  1580  	MOVQ	(SI),CX
  1581  	ADDQ	$8,SI
  1582  	MOVQ	CX,(DI)
  1583  	ADDQ	$8,DI
  1584  
  1585  	MOVQ	(SI),CX
  1586  	ADDQ	$8,SI
  1587  	MOVQ	CX,(DI)
  1588  	ADDQ	$8,DI
  1589  
  1590  	MOVQ	(SI),CX
  1591  	ADDQ	$8,SI
  1592  	MOVQ	CX,(DI)
  1593  	ADDQ	$8,DI
  1594  
  1595  	MOVQ	(SI),CX
  1596  	ADDQ	$8,SI
  1597  	MOVQ	CX,(DI)
  1598  	ADDQ	$8,DI
  1599  
  1600  	MOVQ	(SI),CX
  1601  	ADDQ	$8,SI
  1602  	MOVQ	CX,(DI)
  1603  	ADDQ	$8,DI
  1604  
  1605  	MOVQ	(SI),CX
  1606  	ADDQ	$8,SI
  1607  	MOVQ	CX,(DI)
  1608  	ADDQ	$8,DI
  1609  
  1610  	MOVQ	(SI),CX
  1611  	ADDQ	$8,SI
  1612  	MOVQ	CX,(DI)
  1613  	ADDQ	$8,DI
  1614  
  1615  	MOVQ	(SI),CX
  1616  	ADDQ	$8,SI
  1617  	MOVQ	CX,(DI)
  1618  	ADDQ	$8,DI
  1619  
  1620  	MOVQ	(SI),CX
  1621  	ADDQ	$8,SI
  1622  	MOVQ	CX,(DI)
  1623  	ADDQ	$8,DI
  1624  
  1625  	MOVQ	(SI),CX
  1626  	ADDQ	$8,SI
  1627  	MOVQ	CX,(DI)
  1628  	ADDQ	$8,DI
  1629  
  1630  	MOVQ	(SI),CX
  1631  	ADDQ	$8,SI
  1632  	MOVQ	CX,(DI)
  1633  	ADDQ	$8,DI
  1634  
  1635  	MOVQ	(SI),CX
  1636  	ADDQ	$8,SI
  1637  	MOVQ	CX,(DI)
  1638  	ADDQ	$8,DI
  1639  
  1640  	MOVQ	(SI),CX
  1641  	ADDQ	$8,SI
  1642  	MOVQ	CX,(DI)
  1643  	ADDQ	$8,DI
  1644  
  1645  	MOVQ	(SI),CX
  1646  	ADDQ	$8,SI
  1647  	MOVQ	CX,(DI)
  1648  	ADDQ	$8,DI
  1649  
  1650  	MOVQ	(SI),CX
  1651  	ADDQ	$8,SI
  1652  	MOVQ	CX,(DI)
  1653  	ADDQ	$8,DI
  1654  
  1655  	MOVQ	(SI),CX
  1656  	ADDQ	$8,SI
  1657  	MOVQ	CX,(DI)
  1658  	ADDQ	$8,DI
  1659  
  1660  	MOVQ	(SI),CX
  1661  	ADDQ	$8,SI
  1662  	MOVQ	CX,(DI)
  1663  	ADDQ	$8,DI
  1664  
  1665  	MOVQ	(SI),CX
  1666  	ADDQ	$8,SI
  1667  	MOVQ	CX,(DI)
  1668  	ADDQ	$8,DI
  1669  
  1670  	MOVQ	(SI),CX
  1671  	ADDQ	$8,SI
  1672  	MOVQ	CX,(DI)
  1673  	ADDQ	$8,DI
  1674  
  1675  	MOVQ	(SI),CX
  1676  	ADDQ	$8,SI
  1677  	MOVQ	CX,(DI)
  1678  	ADDQ	$8,DI
  1679  
  1680  	MOVQ	(SI),CX
  1681  	ADDQ	$8,SI
  1682  	MOVQ	CX,(DI)
  1683  	ADDQ	$8,DI
  1684  
  1685  	MOVQ	(SI),CX
  1686  	ADDQ	$8,SI
  1687  	MOVQ	CX,(DI)
  1688  	ADDQ	$8,DI
  1689  
  1690  	MOVQ	(SI),CX
  1691  	ADDQ	$8,SI
  1692  	MOVQ	CX,(DI)
  1693  	ADDQ	$8,DI
  1694  
  1695  	MOVQ	(SI),CX
  1696  	ADDQ	$8,SI
  1697  	MOVQ	CX,(DI)
  1698  	ADDQ	$8,DI
  1699  
  1700  	MOVQ	(SI),CX
  1701  	ADDQ	$8,SI
  1702  	MOVQ	CX,(DI)
  1703  	ADDQ	$8,DI
  1704  
  1705  	MOVQ	(SI),CX
  1706  	ADDQ	$8,SI
  1707  	MOVQ	CX,(DI)
  1708  	ADDQ	$8,DI
  1709  
  1710  	MOVQ	(SI),CX
  1711  	ADDQ	$8,SI
  1712  	MOVQ	CX,(DI)
  1713  	ADDQ	$8,DI
  1714  
  1715  	MOVQ	(SI),CX
  1716  	ADDQ	$8,SI
  1717  	MOVQ	CX,(DI)
  1718  	ADDQ	$8,DI
  1719  
  1720  	MOVQ	(SI),CX
  1721  	ADDQ	$8,SI
  1722  	MOVQ	CX,(DI)
  1723  	ADDQ	$8,DI
  1724  
  1725  	MOVQ	(SI),CX
  1726  	ADDQ	$8,SI
  1727  	MOVQ	CX,(DI)
  1728  	ADDQ	$8,DI
  1729  
  1730  	MOVQ	(SI),CX
  1731  	ADDQ	$8,SI
  1732  	MOVQ	CX,(DI)
  1733  	ADDQ	$8,DI
  1734  
  1735  	MOVQ	(SI),CX
  1736  	ADDQ	$8,SI
  1737  	MOVQ	CX,(DI)
  1738  	ADDQ	$8,DI
  1739  
  1740  	MOVQ	(SI),CX
  1741  	ADDQ	$8,SI
  1742  	MOVQ	CX,(DI)
  1743  	ADDQ	$8,DI
  1744  
  1745  	MOVQ	(SI),CX
  1746  	ADDQ	$8,SI
  1747  	MOVQ	CX,(DI)
  1748  	ADDQ	$8,DI
  1749  
  1750  	MOVQ	(SI),CX
  1751  	ADDQ	$8,SI
  1752  	MOVQ	CX,(DI)
  1753  	ADDQ	$8,DI
  1754  
  1755  	MOVQ	(SI),CX
  1756  	ADDQ	$8,SI
  1757  	MOVQ	CX,(DI)
  1758  	ADDQ	$8,DI
  1759  
  1760  	MOVQ	(SI),CX
  1761  	ADDQ	$8,SI
  1762  	MOVQ	CX,(DI)
  1763  	ADDQ	$8,DI
  1764  
  1765  	MOVQ	(SI),CX
  1766  	ADDQ	$8,SI
  1767  	MOVQ	CX,(DI)
  1768  	ADDQ	$8,DI
  1769  
  1770  	MOVQ	(SI),CX
  1771  	ADDQ	$8,SI
  1772  	MOVQ	CX,(DI)
  1773  	ADDQ	$8,DI
  1774  
  1775  	MOVQ	(SI),CX
  1776  	ADDQ	$8,SI
  1777  	MOVQ	CX,(DI)
  1778  	ADDQ	$8,DI
  1779  
  1780  	MOVQ	(SI),CX
  1781  	ADDQ	$8,SI
  1782  	MOVQ	CX,(DI)
  1783  	ADDQ	$8,DI
  1784  
  1785  	MOVQ	(SI),CX
  1786  	ADDQ	$8,SI
  1787  	MOVQ	CX,(DI)
  1788  	ADDQ	$8,DI
  1789  
  1790  	MOVQ	(SI),CX
  1791  	ADDQ	$8,SI
  1792  	MOVQ	CX,(DI)
  1793  	ADDQ	$8,DI
  1794  
  1795  	MOVQ	(SI),CX
  1796  	ADDQ	$8,SI
  1797  	MOVQ	CX,(DI)
  1798  	ADDQ	$8,DI
  1799  
  1800  	MOVQ	(SI),CX
  1801  	ADDQ	$8,SI
  1802  	MOVQ	CX,(DI)
  1803  	ADDQ	$8,DI
  1804  
  1805  	MOVQ	(SI),CX
  1806  	ADDQ	$8,SI
  1807  	MOVQ	CX,(DI)
  1808  	ADDQ	$8,DI
  1809  
  1810  	MOVQ	(SI),CX
  1811  	ADDQ	$8,SI
  1812  	MOVQ	CX,(DI)
  1813  	ADDQ	$8,DI
  1814  
  1815  	MOVQ	(SI),CX
  1816  	ADDQ	$8,SI
  1817  	MOVQ	CX,(DI)
  1818  	ADDQ	$8,DI
  1819  
  1820  	MOVQ	(SI),CX
  1821  	ADDQ	$8,SI
  1822  	MOVQ	CX,(DI)
  1823  	ADDQ	$8,DI
  1824  
  1825  	MOVQ	(SI),CX
  1826  	ADDQ	$8,SI
  1827  	MOVQ	CX,(DI)
  1828  	ADDQ	$8,DI
  1829  
  1830  	MOVQ	(SI),CX
  1831  	ADDQ	$8,SI
  1832  	MOVQ	CX,(DI)
  1833  	ADDQ	$8,DI
  1834  
  1835  	MOVQ	(SI),CX
  1836  	ADDQ	$8,SI
  1837  	MOVQ	CX,(DI)
  1838  	ADDQ	$8,DI
  1839  
  1840  	MOVQ	(SI),CX
  1841  	ADDQ	$8,SI
  1842  	MOVQ	CX,(DI)
  1843  	ADDQ	$8,DI
  1844  
  1845  	MOVQ	(SI),CX
  1846  	ADDQ	$8,SI
  1847  	MOVQ	CX,(DI)
  1848  	ADDQ	$8,DI
  1849  
  1850  	MOVQ	(SI),CX
  1851  	ADDQ	$8,SI
  1852  	MOVQ	CX,(DI)
  1853  	ADDQ	$8,DI
  1854  
  1855  	MOVQ	(SI),CX
  1856  	ADDQ	$8,SI
  1857  	MOVQ	CX,(DI)
  1858  	ADDQ	$8,DI
  1859  
  1860  	MOVQ	(SI),CX
  1861  	ADDQ	$8,SI
  1862  	MOVQ	CX,(DI)
  1863  	ADDQ	$8,DI
  1864  
  1865  	MOVQ	(SI),CX
  1866  	ADDQ	$8,SI
  1867  	MOVQ	CX,(DI)
  1868  	ADDQ	$8,DI
  1869  
  1870  	MOVQ	(SI),CX
  1871  	ADDQ	$8,SI
  1872  	MOVQ	CX,(DI)
  1873  	ADDQ	$8,DI
  1874  
  1875  	MOVQ	(SI),CX
  1876  	ADDQ	$8,SI
  1877  	MOVQ	CX,(DI)
  1878  	ADDQ	$8,DI
  1879  
  1880  	MOVQ	(SI),CX
  1881  	ADDQ	$8,SI
  1882  	MOVQ	CX,(DI)
  1883  	ADDQ	$8,DI
  1884  
  1885  	MOVQ	(SI),CX
  1886  	ADDQ	$8,SI
  1887  	MOVQ	CX,(DI)
  1888  	ADDQ	$8,DI
  1889  
  1890  	MOVQ	(SI),CX
  1891  	ADDQ	$8,SI
  1892  	MOVQ	CX,(DI)
  1893  	ADDQ	$8,DI
  1894  
  1895  	MOVQ	(SI),CX
  1896  	ADDQ	$8,SI
  1897  	MOVQ	CX,(DI)
  1898  	ADDQ	$8,DI
  1899  
  1900  	MOVQ	(SI),CX
  1901  	ADDQ	$8,SI
  1902  	MOVQ	CX,(DI)
  1903  	ADDQ	$8,DI
  1904  
  1905  	MOVQ	(SI),CX
  1906  	ADDQ	$8,SI
  1907  	MOVQ	CX,(DI)
  1908  	ADDQ	$8,DI
  1909  
  1910  	MOVQ	(SI),CX
  1911  	ADDQ	$8,SI
  1912  	MOVQ	CX,(DI)
  1913  	ADDQ	$8,DI
  1914  
  1915  	MOVQ	(SI),CX
  1916  	ADDQ	$8,SI
  1917  	MOVQ	CX,(DI)
  1918  	ADDQ	$8,DI
  1919  
  1920  	MOVQ	(SI),CX
  1921  	ADDQ	$8,SI
  1922  	MOVQ	CX,(DI)
  1923  	ADDQ	$8,DI
  1924  
  1925  	MOVQ	(SI),CX
  1926  	ADDQ	$8,SI
  1927  	MOVQ	CX,(DI)
  1928  	ADDQ	$8,DI
  1929  
  1930  	MOVQ	(SI),CX
  1931  	ADDQ	$8,SI
  1932  	MOVQ	CX,(DI)
  1933  	ADDQ	$8,DI
  1934  
  1935  	MOVQ	(SI),CX
  1936  	ADDQ	$8,SI
  1937  	MOVQ	CX,(DI)
  1938  	ADDQ	$8,DI
  1939  
  1940  	MOVQ	(SI),CX
  1941  	ADDQ	$8,SI
  1942  	MOVQ	CX,(DI)
  1943  	ADDQ	$8,DI
  1944  
  1945  	MOVQ	(SI),CX
  1946  	ADDQ	$8,SI
  1947  	MOVQ	CX,(DI)
  1948  	ADDQ	$8,DI
  1949  
  1950  	MOVQ	(SI),CX
  1951  	ADDQ	$8,SI
  1952  	MOVQ	CX,(DI)
  1953  	ADDQ	$8,DI
  1954  
  1955  	MOVQ	(SI),CX
  1956  	ADDQ	$8,SI
  1957  	MOVQ	CX,(DI)
  1958  	ADDQ	$8,DI
  1959  
  1960  	MOVQ	(SI),CX
  1961  	ADDQ	$8,SI
  1962  	MOVQ	CX,(DI)
  1963  	ADDQ	$8,DI
  1964  
  1965  	MOVQ	(SI),CX
  1966  	ADDQ	$8,SI
  1967  	MOVQ	CX,(DI)
  1968  	ADDQ	$8,DI
  1969  
  1970  	MOVQ	(SI),CX
  1971  	ADDQ	$8,SI
  1972  	MOVQ	CX,(DI)
  1973  	ADDQ	$8,DI
  1974  
  1975  	MOVQ	(SI),CX
  1976  	ADDQ	$8,SI
  1977  	MOVQ	CX,(DI)
  1978  	ADDQ	$8,DI
  1979  
  1980  	MOVQ	(SI),CX
  1981  	ADDQ	$8,SI
  1982  	MOVQ	CX,(DI)
  1983  	ADDQ	$8,DI
  1984  
  1985  	MOVQ	(SI),CX
  1986  	ADDQ	$8,SI
  1987  	MOVQ	CX,(DI)
  1988  	ADDQ	$8,DI
  1989  
  1990  	MOVQ	(SI),CX
  1991  	ADDQ	$8,SI
  1992  	MOVQ	CX,(DI)
  1993  	ADDQ	$8,DI
  1994  
  1995  	MOVQ	(SI),CX
  1996  	ADDQ	$8,SI
  1997  	MOVQ	CX,(DI)
  1998  	ADDQ	$8,DI
  1999  
  2000  	MOVQ	(SI),CX
  2001  	ADDQ	$8,SI
  2002  	MOVQ	CX,(DI)
  2003  	ADDQ	$8,DI
  2004  
  2005  	MOVQ	(SI),CX
  2006  	ADDQ	$8,SI
  2007  	MOVQ	CX,(DI)
  2008  	ADDQ	$8,DI
  2009  
  2010  	MOVQ	(SI),CX
  2011  	ADDQ	$8,SI
  2012  	MOVQ	CX,(DI)
  2013  	ADDQ	$8,DI
  2014  
  2015  	MOVQ	(SI),CX
  2016  	ADDQ	$8,SI
  2017  	MOVQ	CX,(DI)
  2018  	ADDQ	$8,DI
  2019  
  2020  	MOVQ	(SI),CX
  2021  	ADDQ	$8,SI
  2022  	MOVQ	CX,(DI)
  2023  	ADDQ	$8,DI
  2024  
  2025  	MOVQ	(SI),CX
  2026  	ADDQ	$8,SI
  2027  	MOVQ	CX,(DI)
  2028  	ADDQ	$8,DI
  2029  
  2030  	MOVQ	(SI),CX
  2031  	ADDQ	$8,SI
  2032  	MOVQ	CX,(DI)
  2033  	ADDQ	$8,DI
  2034  
  2035  	MOVQ	(SI),CX
  2036  	ADDQ	$8,SI
  2037  	MOVQ	CX,(DI)
  2038  	ADDQ	$8,DI
  2039  
  2040  	MOVQ	(SI),CX
  2041  	ADDQ	$8,SI
  2042  	MOVQ	CX,(DI)
  2043  	ADDQ	$8,DI
  2044  
  2045  	MOVQ	(SI),CX
  2046  	ADDQ	$8,SI
  2047  	MOVQ	CX,(DI)
  2048  	ADDQ	$8,DI
  2049  
  2050  	MOVQ	(SI),CX
  2051  	ADDQ	$8,SI
  2052  	MOVQ	CX,(DI)
  2053  	ADDQ	$8,DI
  2054  
  2055  	MOVQ	(SI),CX
  2056  	ADDQ	$8,SI
  2057  	MOVQ	CX,(DI)
  2058  	ADDQ	$8,DI
  2059  
  2060  	MOVQ	(SI),CX
  2061  	ADDQ	$8,SI
  2062  	MOVQ	CX,(DI)
  2063  	ADDQ	$8,DI
  2064  
  2065  	MOVQ	(SI),CX
  2066  	ADDQ	$8,SI
  2067  	MOVQ	CX,(DI)
  2068  	ADDQ	$8,DI
  2069  
  2070  	MOVQ	(SI),CX
  2071  	ADDQ	$8,SI
  2072  	MOVQ	CX,(DI)
  2073  	ADDQ	$8,DI
  2074  
  2075  	MOVQ	(SI),CX
  2076  	ADDQ	$8,SI
  2077  	MOVQ	CX,(DI)
  2078  	ADDQ	$8,DI
  2079  
  2080  	MOVQ	(SI),CX
  2081  	ADDQ	$8,SI
  2082  	MOVQ	CX,(DI)
  2083  	ADDQ	$8,DI
  2084  
  2085  	MOVQ	(SI),CX
  2086  	ADDQ	$8,SI
  2087  	MOVQ	CX,(DI)
  2088  	ADDQ	$8,DI
  2089  
  2090  	MOVQ	(SI),CX
  2091  	ADDQ	$8,SI
  2092  	MOVQ	CX,(DI)
  2093  	ADDQ	$8,DI
  2094  
  2095  	MOVQ	(SI),CX
  2096  	ADDQ	$8,SI
  2097  	MOVQ	CX,(DI)
  2098  	ADDQ	$8,DI
  2099  
  2100  	MOVQ	(SI),CX
  2101  	ADDQ	$8,SI
  2102  	MOVQ	CX,(DI)
  2103  	ADDQ	$8,DI
  2104  
  2105  	MOVQ	(SI),CX
  2106  	ADDQ	$8,SI
  2107  	MOVQ	CX,(DI)
  2108  	ADDQ	$8,DI
  2109  
  2110  	MOVQ	(SI),CX
  2111  	ADDQ	$8,SI
  2112  	MOVQ	CX,(DI)
  2113  	ADDQ	$8,DI
  2114  
  2115  	MOVQ	(SI),CX
  2116  	ADDQ	$8,SI
  2117  	MOVQ	CX,(DI)
  2118  	ADDQ	$8,DI
  2119  
  2120  	MOVQ	(SI),CX
  2121  	ADDQ	$8,SI
  2122  	MOVQ	CX,(DI)
  2123  	ADDQ	$8,DI
  2124  
  2125  	MOVQ	(SI),CX
  2126  	ADDQ	$8,SI
  2127  	MOVQ	CX,(DI)
  2128  	ADDQ	$8,DI
  2129  
  2130  	MOVQ	(SI),CX
  2131  	ADDQ	$8,SI
  2132  	MOVQ	CX,(DI)
  2133  	ADDQ	$8,DI
  2134  
  2135  	MOVQ	(SI),CX
  2136  	ADDQ	$8,SI
  2137  	MOVQ	CX,(DI)
  2138  	ADDQ	$8,DI
  2139  
  2140  	MOVQ	(SI),CX
  2141  	ADDQ	$8,SI
  2142  	MOVQ	CX,(DI)
  2143  	ADDQ	$8,DI
  2144  
  2145  	MOVQ	(SI),CX
  2146  	ADDQ	$8,SI
  2147  	MOVQ	CX,(DI)
  2148  	ADDQ	$8,DI
  2149  
  2150  	MOVQ	(SI),CX
  2151  	ADDQ	$8,SI
  2152  	MOVQ	CX,(DI)
  2153  	ADDQ	$8,DI
  2154  
  2155  	MOVQ	(SI),CX
  2156  	ADDQ	$8,SI
  2157  	MOVQ	CX,(DI)
  2158  	ADDQ	$8,DI
  2159  
  2160  	MOVQ	(SI),CX
  2161  	ADDQ	$8,SI
  2162  	MOVQ	CX,(DI)
  2163  	ADDQ	$8,DI
  2164  
  2165  	MOVQ	(SI),CX
  2166  	ADDQ	$8,SI
  2167  	MOVQ	CX,(DI)
  2168  	ADDQ	$8,DI
  2169  
  2170  	MOVQ	(SI),CX
  2171  	ADDQ	$8,SI
  2172  	MOVQ	CX,(DI)
  2173  	ADDQ	$8,DI
  2174  
  2175  	MOVQ	(SI),CX
  2176  	ADDQ	$8,SI
  2177  	MOVQ	CX,(DI)
  2178  	ADDQ	$8,DI
  2179  
  2180  	MOVQ	(SI),CX
  2181  	ADDQ	$8,SI
  2182  	MOVQ	CX,(DI)
  2183  	ADDQ	$8,DI
  2184  
  2185  	MOVQ	(SI),CX
  2186  	ADDQ	$8,SI
  2187  	MOVQ	CX,(DI)
  2188  	ADDQ	$8,DI
  2189  
  2190  	RET
  2191  
  2192  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
  2193  	get_tls(CX)
  2194  	MOVQ	g(CX), AX
  2195  	MOVQ	g_m(AX), AX
  2196  	MOVL	m_fastrand(AX), DX
  2197  	ADDL	DX, DX
  2198  	MOVL	DX, BX
  2199  	XORL	$0x88888eef, DX
  2200  	CMOVLMI	BX, DX
  2201  	MOVL	DX, m_fastrand(AX)
  2202  	MOVL	DX, ret+0(FP)
  2203  	RET
  2204  
  2205  TEXT runtime·return0(SB), NOSPLIT, $0
  2206  	MOVL	$0, AX
  2207  	RET
  2208  
  2209  
  2210  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  2211  // Must obey the gcc calling convention.
  2212  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  2213  	get_tls(CX)
  2214  	MOVQ	g(CX), AX
  2215  	MOVQ	g_m(AX), AX
  2216  	MOVQ	m_curg(AX), AX
  2217  	MOVQ	(g_stack+stack_hi)(AX), AX
  2218  	RET
  2219  
  2220  // The top-most function running on a goroutine
  2221  // returns to goexit+PCQuantum.
  2222  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  2223  	BYTE	$0x90	// NOP
  2224  	CALL	runtime·goexit1(SB)	// does not return
  2225  
  2226  TEXT runtime·getg(SB),NOSPLIT,$0-8
  2227  	get_tls(CX)
  2228  	MOVQ	g(CX), AX
  2229  	MOVQ	AX, ret+0(FP)
  2230  	RET
  2231  
  2232  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
  2233  	MOVQ	addr+0(FP), AX
  2234  	PREFETCHT0	(AX)
  2235  	RET
  2236  
  2237  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
  2238  	MOVQ	addr+0(FP), AX
  2239  	PREFETCHT1	(AX)
  2240  	RET
  2241  
  2242  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
  2243  	MOVQ	addr+0(FP), AX
  2244  	PREFETCHT2	(AX)
  2245  	RET
  2246  
  2247  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
  2248  	MOVQ	addr+0(FP), AX
  2249  	PREFETCHNTA	(AX)
  2250  	RET