github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/runtime/asm_amd64.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11  	// copy arguments forward on an even stack
    12  	MOVQ	DI, AX		// argc
    13  	MOVQ	SI, BX		// argv
    14  	SUBQ	$(4*8+7), SP		// 2args 2auto
    15  	ANDQ	$~15, SP
    16  	MOVQ	AX, 16(SP)
    17  	MOVQ	BX, 24(SP)
    18  	
    19  	// create istack out of the given (operating system) stack.
    20  	// _cgo_init may update stackguard.
    21  	MOVQ	$runtime·g0(SB), DI
    22  	LEAQ	(-64*1024+104)(SP), BX
    23  	MOVQ	BX, g_stackguard0(DI)
    24  	MOVQ	BX, g_stackguard1(DI)
    25  	MOVQ	BX, (g_stack+stack_lo)(DI)
    26  	MOVQ	SP, (g_stack+stack_hi)(DI)
    27  
    28  	// find out information about the processor we're on
    29  	MOVQ	$0, AX
    30  	CPUID
    31  	CMPQ	AX, $0
    32  	JE	nocpuinfo
    33  	MOVQ	$1, AX
    34  	CPUID
    35  	MOVL	CX, runtime·cpuid_ecx(SB)
    36  	MOVL	DX, runtime·cpuid_edx(SB)
    37  nocpuinfo:	
    38  	
    39  	// if there is an _cgo_init, call it.
    40  	MOVQ	_cgo_init(SB), AX
    41  	TESTQ	AX, AX
    42  	JZ	needtls
    43  	// g0 already in DI
    44  	MOVQ	DI, CX	// Win64 uses CX for first parameter
    45  	MOVQ	$setg_gcc<>(SB), SI
    46  	CALL	AX
    47  
    48  	// update stackguard after _cgo_init
    49  	MOVQ	$runtime·g0(SB), CX
    50  	MOVQ	(g_stack+stack_lo)(CX), AX
    51  	ADDQ	$const__StackGuard, AX
    52  	MOVQ	AX, g_stackguard0(CX)
    53  	MOVQ	AX, g_stackguard1(CX)
    54  
    55  	CMPL	runtime·iswindows(SB), $0
    56  	JEQ ok
    57  needtls:
    58  	// skip TLS setup on Plan 9
    59  	CMPL	runtime·isplan9(SB), $1
    60  	JEQ ok
    61  	// skip TLS setup on Solaris
    62  	CMPL	runtime·issolaris(SB), $1
    63  	JEQ ok
    64  
    65  	LEAQ	runtime·tls0(SB), DI
    66  	CALL	runtime·settls(SB)
    67  
    68  	// store through it, to make sure it works
    69  	get_tls(BX)
    70  	MOVQ	$0x123, g(BX)
    71  	MOVQ	runtime·tls0(SB), AX
    72  	CMPQ	AX, $0x123
    73  	JEQ 2(PC)
    74  	MOVL	AX, 0	// abort
    75  ok:
    76  	// set the per-goroutine and per-mach "registers"
    77  	get_tls(BX)
    78  	LEAQ	runtime·g0(SB), CX
    79  	MOVQ	CX, g(BX)
    80  	LEAQ	runtime·m0(SB), AX
    81  
    82  	// save m->g0 = g0
    83  	MOVQ	CX, m_g0(AX)
    84  	// save m0 to g0->m
    85  	MOVQ	AX, g_m(CX)
    86  
    87  	CLD				// convention is D is always left cleared
    88  	CALL	runtime·check(SB)
    89  
    90  	MOVL	16(SP), AX		// copy argc
    91  	MOVL	AX, 0(SP)
    92  	MOVQ	24(SP), AX		// copy argv
    93  	MOVQ	AX, 8(SP)
    94  	CALL	runtime·args(SB)
    95  	CALL	runtime·osinit(SB)
    96  	CALL	runtime·schedinit(SB)
    97  
    98  	// create a new goroutine to start program
    99  	MOVQ	$runtime·main·f(SB), BP		// entry
   100  	PUSHQ	BP
   101  	PUSHQ	$0			// arg size
   102  	CALL	runtime·newproc(SB)
   103  	POPQ	AX
   104  	POPQ	AX
   105  
   106  	// start this M
   107  	CALL	runtime·mstart(SB)
   108  
   109  	MOVL	$0xf1, 0xf1  // crash
   110  	RET
   111  
   112  DATA	runtime·main·f+0(SB)/8,$runtime·main(SB)
   113  GLOBL	runtime·main·f(SB),RODATA,$8
   114  
   115  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   116  	BYTE	$0xcc
   117  	RET
   118  
   119  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   120  	// No per-thread init.
   121  	RET
   122  
   123  /*
   124   *  go-routine
   125   */
   126  
   127  // void gosave(Gobuf*)
   128  // save state in Gobuf; setjmp
   129  TEXT runtime·gosave(SB), NOSPLIT, $0-8
   130  	MOVQ	buf+0(FP), AX		// gobuf
   131  	LEAQ	buf+0(FP), BX		// caller's SP
   132  	MOVQ	BX, gobuf_sp(AX)
   133  	MOVQ	0(SP), BX		// caller's PC
   134  	MOVQ	BX, gobuf_pc(AX)
   135  	MOVQ	$0, gobuf_ret(AX)
   136  	MOVQ	$0, gobuf_ctxt(AX)
   137  	get_tls(CX)
   138  	MOVQ	g(CX), BX
   139  	MOVQ	BX, gobuf_g(AX)
   140  	RET
   141  
   142  // void gogo(Gobuf*)
   143  // restore state from Gobuf; longjmp
   144  TEXT runtime·gogo(SB), NOSPLIT, $0-8
   145  	MOVQ	buf+0(FP), BX		// gobuf
   146  	MOVQ	gobuf_g(BX), DX
   147  	MOVQ	0(DX), CX		// make sure g != nil
   148  	get_tls(CX)
   149  	MOVQ	DX, g(CX)
   150  	MOVQ	gobuf_sp(BX), SP	// restore SP
   151  	MOVQ	gobuf_ret(BX), AX
   152  	MOVQ	gobuf_ctxt(BX), DX
   153  	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   154  	MOVQ	$0, gobuf_ret(BX)
   155  	MOVQ	$0, gobuf_ctxt(BX)
   156  	MOVQ	gobuf_pc(BX), BX
   157  	JMP	BX
   158  
   159  // func mcall(fn func(*g))
   160  // Switch to m->g0's stack, call fn(g).
   161  // Fn must never return.  It should gogo(&g->sched)
   162  // to keep running g.
   163  TEXT runtime·mcall(SB), NOSPLIT, $0-8
   164  	MOVQ	fn+0(FP), DI
   165  	
   166  	get_tls(CX)
   167  	MOVQ	g(CX), AX	// save state in g->sched
   168  	MOVQ	0(SP), BX	// caller's PC
   169  	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   170  	LEAQ	fn+0(FP), BX	// caller's SP
   171  	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   172  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   173  
   174  	// switch to m->g0 & its stack, call fn
   175  	MOVQ	g(CX), BX
   176  	MOVQ	g_m(BX), BX
   177  	MOVQ	m_g0(BX), SI
   178  	CMPQ	SI, AX	// if g == m->g0 call badmcall
   179  	JNE	3(PC)
   180  	MOVQ	$runtime·badmcall(SB), AX
   181  	JMP	AX
   182  	MOVQ	SI, g(CX)	// g = m->g0
   183  	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   184  	PUSHQ	AX
   185  	MOVQ	DI, DX
   186  	MOVQ	0(DI), DI
   187  	CALL	DI
   188  	POPQ	AX
   189  	MOVQ	$runtime·badmcall2(SB), AX
   190  	JMP	AX
   191  	RET
   192  
   193  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   194  // of the G stack.  We need to distinguish the routine that
   195  // lives at the bottom of the G stack from the one that lives
   196  // at the top of the system stack because the one at the top of
   197  // the system stack terminates the stack walk (see topofstack()).
   198  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   199  	RET
   200  
   201  // func systemstack(fn func())
   202  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   203  	MOVQ	fn+0(FP), DI	// DI = fn
   204  	get_tls(CX)
   205  	MOVQ	g(CX), AX	// AX = g
   206  	MOVQ	g_m(AX), BX	// BX = m
   207  
   208  	MOVQ	m_gsignal(BX), DX	// DX = gsignal
   209  	CMPQ	AX, DX
   210  	JEQ	noswitch
   211  
   212  	MOVQ	m_g0(BX), DX	// DX = g0
   213  	CMPQ	AX, DX
   214  	JEQ	noswitch
   215  
   216  	MOVQ	m_curg(BX), BP
   217  	CMPQ	AX, BP
   218  	JEQ	switch
   219  	
   220  	// Bad: g is not gsignal, not g0, not curg. What is it?
   221  	MOVQ	$runtime·badsystemstack(SB), AX
   222  	CALL	AX
   223  
   224  switch:
   225  	// save our state in g->sched.  Pretend to
   226  	// be systemstack_switch if the G stack is scanned.
   227  	MOVQ	$runtime·systemstack_switch(SB), BP
   228  	MOVQ	BP, (g_sched+gobuf_pc)(AX)
   229  	MOVQ	SP, (g_sched+gobuf_sp)(AX)
   230  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   231  
   232  	// switch to g0
   233  	MOVQ	DX, g(CX)
   234  	MOVQ	(g_sched+gobuf_sp)(DX), BX
   235  	// make it look like mstart called systemstack on g0, to stop traceback
   236  	SUBQ	$8, BX
   237  	MOVQ	$runtime·mstart(SB), DX
   238  	MOVQ	DX, 0(BX)
   239  	MOVQ	BX, SP
   240  
   241  	// call target function
   242  	MOVQ	DI, DX
   243  	MOVQ	0(DI), DI
   244  	CALL	DI
   245  
   246  	// switch back to g
   247  	get_tls(CX)
   248  	MOVQ	g(CX), AX
   249  	MOVQ	g_m(AX), BX
   250  	MOVQ	m_curg(BX), AX
   251  	MOVQ	AX, g(CX)
   252  	MOVQ	(g_sched+gobuf_sp)(AX), SP
   253  	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   254  	RET
   255  
   256  noswitch:
   257  	// already on m stack, just call directly
   258  	MOVQ	DI, DX
   259  	MOVQ	0(DI), DI
   260  	CALL	DI
   261  	RET
   262  
   263  /*
   264   * support for morestack
   265   */
   266  
   267  // Called during function prolog when more stack is needed.
   268  //
   269  // The traceback routines see morestack on a g0 as being
   270  // the top of a stack (for example, morestack calling newstack
   271  // calling the scheduler calling newm calling gc), so we must
   272  // record an argument size. For that purpose, it has no arguments.
   273  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   274  	// Cannot grow scheduler stack (m->g0).
   275  	get_tls(CX)
   276  	MOVQ	g(CX), BX
   277  	MOVQ	g_m(BX), BX
   278  	MOVQ	m_g0(BX), SI
   279  	CMPQ	g(CX), SI
   280  	JNE	2(PC)
   281  	INT	$3
   282  
   283  	// Cannot grow signal stack (m->gsignal).
   284  	MOVQ	m_gsignal(BX), SI
   285  	CMPQ	g(CX), SI
   286  	JNE	2(PC)
   287  	INT	$3
   288  
   289  	// Called from f.
   290  	// Set m->morebuf to f's caller.
   291  	MOVQ	8(SP), AX	// f's caller's PC
   292  	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   293  	LEAQ	16(SP), AX	// f's caller's SP
   294  	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   295  	get_tls(CX)
   296  	MOVQ	g(CX), SI
   297  	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   298  
   299  	// Set g->sched to context in f.
   300  	MOVQ	0(SP), AX // f's PC
   301  	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   302  	MOVQ	SI, (g_sched+gobuf_g)(SI)
   303  	LEAQ	8(SP), AX // f's SP
   304  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   305  	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   306  
   307  	// Call newstack on m->g0's stack.
   308  	MOVQ	m_g0(BX), BP
   309  	MOVQ	BP, g(CX)
   310  	MOVQ	(g_sched+gobuf_sp)(BP), SP
   311  	CALL	runtime·newstack(SB)
   312  	MOVQ	$0, 0x1003	// crash if newstack returns
   313  	RET
   314  
   315  // morestack but not preserving ctxt.
   316  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   317  	MOVL	$0, DX
   318  	JMP	runtime·morestack(SB)
   319  
   320  // reflectcall: call a function with the given argument list
   321  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   322  // we don't have variable-sized frames, so we use a small number
   323  // of constant-sized-frame functions to encode a few bits of size in the pc.
   324  // Caution: ugly multiline assembly macros in your future!
   325  
   326  #define DISPATCH(NAME,MAXSIZE)		\
   327  	CMPQ	CX, $MAXSIZE;		\
   328  	JA	3(PC);			\
   329  	MOVQ	$NAME(SB), AX;		\
   330  	JMP	AX
   331  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   332  
   333  TEXT reflect·call(SB), NOSPLIT, $0-0
   334  	JMP	·reflectcall(SB)
   335  
   336  TEXT ·reflectcall(SB), NOSPLIT, $0-32
   337  	MOVLQZX argsize+24(FP), CX
   338  	// NOTE(rsc): No call16, because CALLFN needs four words
   339  	// of argument space to invoke callwritebarrier.
   340  	DISPATCH(runtime·call32, 32)
   341  	DISPATCH(runtime·call64, 64)
   342  	DISPATCH(runtime·call128, 128)
   343  	DISPATCH(runtime·call256, 256)
   344  	DISPATCH(runtime·call512, 512)
   345  	DISPATCH(runtime·call1024, 1024)
   346  	DISPATCH(runtime·call2048, 2048)
   347  	DISPATCH(runtime·call4096, 4096)
   348  	DISPATCH(runtime·call8192, 8192)
   349  	DISPATCH(runtime·call16384, 16384)
   350  	DISPATCH(runtime·call32768, 32768)
   351  	DISPATCH(runtime·call65536, 65536)
   352  	DISPATCH(runtime·call131072, 131072)
   353  	DISPATCH(runtime·call262144, 262144)
   354  	DISPATCH(runtime·call524288, 524288)
   355  	DISPATCH(runtime·call1048576, 1048576)
   356  	DISPATCH(runtime·call2097152, 2097152)
   357  	DISPATCH(runtime·call4194304, 4194304)
   358  	DISPATCH(runtime·call8388608, 8388608)
   359  	DISPATCH(runtime·call16777216, 16777216)
   360  	DISPATCH(runtime·call33554432, 33554432)
   361  	DISPATCH(runtime·call67108864, 67108864)
   362  	DISPATCH(runtime·call134217728, 134217728)
   363  	DISPATCH(runtime·call268435456, 268435456)
   364  	DISPATCH(runtime·call536870912, 536870912)
   365  	DISPATCH(runtime·call1073741824, 1073741824)
   366  	MOVQ	$runtime·badreflectcall(SB), AX
   367  	JMP	AX
   368  
   369  #define CALLFN(NAME,MAXSIZE)			\
   370  TEXT NAME(SB), WRAPPER, $MAXSIZE-32;		\
   371  	NO_LOCAL_POINTERS;			\
   372  	/* copy arguments to stack */		\
   373  	MOVQ	argptr+16(FP), SI;		\
   374  	MOVLQZX argsize+24(FP), CX;		\
   375  	MOVQ	SP, DI;				\
   376  	REP;MOVSB;				\
   377  	/* call function */			\
   378  	MOVQ	f+8(FP), DX;			\
   379  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   380  	CALL	(DX);				\
   381  	/* copy return values back */		\
   382  	MOVQ	argptr+16(FP), DI;		\
   383  	MOVLQZX	argsize+24(FP), CX;		\
   384  	MOVLQZX retoffset+28(FP), BX;		\
   385  	MOVQ	SP, SI;				\
   386  	ADDQ	BX, DI;				\
   387  	ADDQ	BX, SI;				\
   388  	SUBQ	BX, CX;				\
   389  	REP;MOVSB;				\
   390  	/* execute write barrier updates */	\
   391  	MOVQ	argtype+0(FP), DX;		\
   392  	MOVQ	argptr+16(FP), DI;		\
   393  	MOVLQZX	argsize+24(FP), CX;		\
   394  	MOVLQZX retoffset+28(FP), BX;		\
   395  	MOVQ	DX, 0(SP);			\
   396  	MOVQ	DI, 8(SP);			\
   397  	MOVQ	CX, 16(SP);			\
   398  	MOVQ	BX, 24(SP);			\
   399  	CALL	runtime·callwritebarrier(SB);	\
   400  	RET
   401  
   402  CALLFN(·call32, 32)
   403  CALLFN(·call64, 64)
   404  CALLFN(·call128, 128)
   405  CALLFN(·call256, 256)
   406  CALLFN(·call512, 512)
   407  CALLFN(·call1024, 1024)
   408  CALLFN(·call2048, 2048)
   409  CALLFN(·call4096, 4096)
   410  CALLFN(·call8192, 8192)
   411  CALLFN(·call16384, 16384)
   412  CALLFN(·call32768, 32768)
   413  CALLFN(·call65536, 65536)
   414  CALLFN(·call131072, 131072)
   415  CALLFN(·call262144, 262144)
   416  CALLFN(·call524288, 524288)
   417  CALLFN(·call1048576, 1048576)
   418  CALLFN(·call2097152, 2097152)
   419  CALLFN(·call4194304, 4194304)
   420  CALLFN(·call8388608, 8388608)
   421  CALLFN(·call16777216, 16777216)
   422  CALLFN(·call33554432, 33554432)
   423  CALLFN(·call67108864, 67108864)
   424  CALLFN(·call134217728, 134217728)
   425  CALLFN(·call268435456, 268435456)
   426  CALLFN(·call536870912, 536870912)
   427  CALLFN(·call1073741824, 1073741824)
   428  
   429  // bool cas(int32 *val, int32 old, int32 new)
   430  // Atomically:
   431  //	if(*val == old){
   432  //		*val = new;
   433  //		return 1;
   434  //	} else
   435  //		return 0;
   436  TEXT runtime·cas(SB), NOSPLIT, $0-17
   437  	MOVQ	ptr+0(FP), BX
   438  	MOVL	old+8(FP), AX
   439  	MOVL	new+12(FP), CX
   440  	LOCK
   441  	CMPXCHGL	CX, 0(BX)
   442  	SETEQ	ret+16(FP)
   443  	RET
   444  
   445  // bool	runtime·cas64(uint64 *val, uint64 old, uint64 new)
   446  // Atomically:
   447  //	if(*val == *old){
   448  //		*val = new;
   449  //		return 1;
   450  //	} else {
   451  //		return 0;
   452  //	}
   453  TEXT runtime·cas64(SB), NOSPLIT, $0-25
   454  	MOVQ	ptr+0(FP), BX
   455  	MOVQ	old+8(FP), AX
   456  	MOVQ	new+16(FP), CX
   457  	LOCK
   458  	CMPXCHGQ	CX, 0(BX)
   459  	SETEQ	ret+24(FP)
   460  	RET
   461  	
   462  TEXT runtime·casuintptr(SB), NOSPLIT, $0-25
   463  	JMP	runtime·cas64(SB)
   464  
   465  TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-16
   466  	JMP	runtime·atomicload64(SB)
   467  
   468  TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-16
   469  	JMP	runtime·atomicload64(SB)
   470  
   471  TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
   472  	JMP	runtime·atomicstore64(SB)
   473  
   474  // bool casp(void **val, void *old, void *new)
   475  // Atomically:
   476  //	if(*val == old){
   477  //		*val = new;
   478  //		return 1;
   479  //	} else
   480  //		return 0;
   481  TEXT runtime·casp1(SB), NOSPLIT, $0-25
   482  	MOVQ	ptr+0(FP), BX
   483  	MOVQ	old+8(FP), AX
   484  	MOVQ	new+16(FP), CX
   485  	LOCK
   486  	CMPXCHGQ	CX, 0(BX)
   487  	SETEQ	ret+24(FP)
   488  	RET
   489  
   490  // uint32 xadd(uint32 volatile *val, int32 delta)
   491  // Atomically:
   492  //	*val += delta;
   493  //	return *val;
   494  TEXT runtime·xadd(SB), NOSPLIT, $0-20
   495  	MOVQ	ptr+0(FP), BX
   496  	MOVL	delta+8(FP), AX
   497  	MOVL	AX, CX
   498  	LOCK
   499  	XADDL	AX, 0(BX)
   500  	ADDL	CX, AX
   501  	MOVL	AX, ret+16(FP)
   502  	RET
   503  
   504  TEXT runtime·xadd64(SB), NOSPLIT, $0-24
   505  	MOVQ	ptr+0(FP), BX
   506  	MOVQ	delta+8(FP), AX
   507  	MOVQ	AX, CX
   508  	LOCK
   509  	XADDQ	AX, 0(BX)
   510  	ADDQ	CX, AX
   511  	MOVQ	AX, ret+16(FP)
   512  	RET
   513  
   514  TEXT runtime·xchg(SB), NOSPLIT, $0-20
   515  	MOVQ	ptr+0(FP), BX
   516  	MOVL	new+8(FP), AX
   517  	XCHGL	AX, 0(BX)
   518  	MOVL	AX, ret+16(FP)
   519  	RET
   520  
   521  TEXT runtime·xchg64(SB), NOSPLIT, $0-24
   522  	MOVQ	ptr+0(FP), BX
   523  	MOVQ	new+8(FP), AX
   524  	XCHGQ	AX, 0(BX)
   525  	MOVQ	AX, ret+16(FP)
   526  	RET
   527  
   528  TEXT runtime·xchgp1(SB), NOSPLIT, $0-24
   529  	MOVQ	ptr+0(FP), BX
   530  	MOVQ	new+8(FP), AX
   531  	XCHGQ	AX, 0(BX)
   532  	MOVQ	AX, ret+16(FP)
   533  	RET
   534  
   535  TEXT runtime·xchguintptr(SB), NOSPLIT, $0-24
   536  	JMP	runtime·xchg64(SB)
   537  
   538  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   539  	MOVL	cycles+0(FP), AX
   540  again:
   541  	PAUSE
   542  	SUBL	$1, AX
   543  	JNZ	again
   544  	RET
   545  
   546  TEXT runtime·atomicstorep1(SB), NOSPLIT, $0-16
   547  	MOVQ	ptr+0(FP), BX
   548  	MOVQ	val+8(FP), AX
   549  	XCHGQ	AX, 0(BX)
   550  	RET
   551  
   552  TEXT runtime·atomicstore(SB), NOSPLIT, $0-12
   553  	MOVQ	ptr+0(FP), BX
   554  	MOVL	val+8(FP), AX
   555  	XCHGL	AX, 0(BX)
   556  	RET
   557  
   558  TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
   559  	MOVQ	ptr+0(FP), BX
   560  	MOVQ	val+8(FP), AX
   561  	XCHGQ	AX, 0(BX)
   562  	RET
   563  
   564  // void	runtime·atomicor8(byte volatile*, byte);
   565  TEXT runtime·atomicor8(SB), NOSPLIT, $0-9
   566  	MOVQ	ptr+0(FP), AX
   567  	MOVB	val+8(FP), BX
   568  	LOCK
   569  	ORB	BX, (AX)
   570  	RET
   571  
   572  // void jmpdefer(fn, sp);
   573  // called from deferreturn.
   574  // 1. pop the caller
   575  // 2. sub 5 bytes from the callers return
   576  // 3. jmp to the argument
   577  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   578  	MOVQ	fv+0(FP), DX	// fn
   579  	MOVQ	argp+8(FP), BX	// caller sp
   580  	LEAQ	-8(BX), SP	// caller sp after CALL
   581  	SUBQ	$5, (SP)	// return to CALL again
   582  	MOVQ	0(DX), BX
   583  	JMP	BX	// but first run the deferred function
   584  
   585  // Save state of caller into g->sched. Smashes R8, R9.
   586  TEXT gosave<>(SB),NOSPLIT,$0
   587  	get_tls(R8)
   588  	MOVQ	g(R8), R8
   589  	MOVQ	0(SP), R9
   590  	MOVQ	R9, (g_sched+gobuf_pc)(R8)
   591  	LEAQ	8(SP), R9
   592  	MOVQ	R9, (g_sched+gobuf_sp)(R8)
   593  	MOVQ	$0, (g_sched+gobuf_ret)(R8)
   594  	MOVQ	$0, (g_sched+gobuf_ctxt)(R8)
   595  	RET
   596  
   597  // asmcgocall(void(*fn)(void*), void *arg)
   598  // Call fn(arg) on the scheduler stack,
   599  // aligned appropriately for the gcc ABI.
   600  // See cgocall.c for more details.
   601  TEXT ·asmcgocall(SB),NOSPLIT,$0-16
   602  	MOVQ	fn+0(FP), AX
   603  	MOVQ	arg+8(FP), BX
   604  	CALL	asmcgocall<>(SB)
   605  	RET
   606  
   607  TEXT ·asmcgocall_errno(SB),NOSPLIT,$0-20
   608  	MOVQ	fn+0(FP), AX
   609  	MOVQ	arg+8(FP), BX
   610  	CALL	asmcgocall<>(SB)
   611  	MOVL	AX, ret+16(FP)
   612  	RET
   613  
   614  // asmcgocall common code. fn in AX, arg in BX. returns errno in AX.
   615  TEXT asmcgocall<>(SB),NOSPLIT,$0-0
   616  	MOVQ	SP, DX
   617  
   618  	// Figure out if we need to switch to m->g0 stack.
   619  	// We get called to create new OS threads too, and those
   620  	// come in on the m->g0 stack already.
   621  	get_tls(CX)
   622  	MOVQ	g(CX), BP
   623  	MOVQ	g_m(BP), BP
   624  	MOVQ	m_g0(BP), SI
   625  	MOVQ	g(CX), DI
   626  	CMPQ	SI, DI
   627  	JEQ	nosave
   628  	MOVQ	m_gsignal(BP), SI
   629  	CMPQ	SI, DI
   630  	JEQ	nosave
   631  	
   632  	MOVQ	m_g0(BP), SI
   633  	CALL	gosave<>(SB)
   634  	MOVQ	SI, g(CX)
   635  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   636  nosave:
   637  
   638  	// Now on a scheduling stack (a pthread-created stack).
   639  	// Make sure we have enough room for 4 stack-backed fast-call
   640  	// registers as per windows amd64 calling convention.
   641  	SUBQ	$64, SP
   642  	ANDQ	$~15, SP	// alignment for gcc ABI
   643  	MOVQ	DI, 48(SP)	// save g
   644  	MOVQ	(g_stack+stack_hi)(DI), DI
   645  	SUBQ	DX, DI
   646  	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   647  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   648  	MOVQ	BX, CX		// CX = first argument in Win64
   649  	CALL	AX
   650  
   651  	// Restore registers, g, stack pointer.
   652  	get_tls(CX)
   653  	MOVQ	48(SP), DI
   654  	MOVQ	(g_stack+stack_hi)(DI), SI
   655  	SUBQ	40(SP), SI
   656  	MOVQ	DI, g(CX)
   657  	MOVQ	SI, SP
   658  	RET
   659  
   660  // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   661  // Turn the fn into a Go func (by taking its address) and call
   662  // cgocallback_gofunc.
   663  TEXT runtime·cgocallback(SB),NOSPLIT,$24-24
   664  	LEAQ	fn+0(FP), AX
   665  	MOVQ	AX, 0(SP)
   666  	MOVQ	frame+8(FP), AX
   667  	MOVQ	AX, 8(SP)
   668  	MOVQ	framesize+16(FP), AX
   669  	MOVQ	AX, 16(SP)
   670  	MOVQ	$runtime·cgocallback_gofunc(SB), AX
   671  	CALL	AX
   672  	RET
   673  
   674  // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   675  // See cgocall.c for more details.
   676  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-24
   677  	NO_LOCAL_POINTERS
   678  
   679  	// If g is nil, Go did not create the current thread.
   680  	// Call needm to obtain one m for temporary use.
   681  	// In this case, we're running on the thread stack, so there's
   682  	// lots of space, but the linker doesn't know. Hide the call from
   683  	// the linker analysis by using an indirect call through AX.
   684  	get_tls(CX)
   685  #ifdef GOOS_windows
   686  	MOVL	$0, BP
   687  	CMPQ	CX, $0
   688  	JEQ	2(PC)
   689  #endif
   690  	MOVQ	g(CX), BP
   691  	CMPQ	BP, $0
   692  	JEQ	needm
   693  	MOVQ	g_m(BP), BP
   694  	MOVQ	BP, R8 // holds oldm until end of function
   695  	JMP	havem
   696  needm:
   697  	MOVQ	$0, 0(SP)
   698  	MOVQ	$runtime·needm(SB), AX
   699  	CALL	AX
   700  	MOVQ	0(SP), R8
   701  	get_tls(CX)
   702  	MOVQ	g(CX), BP
   703  	MOVQ	g_m(BP), BP
   704  	
   705  	// Set m->sched.sp = SP, so that if a panic happens
   706  	// during the function we are about to execute, it will
   707  	// have a valid SP to run on the g0 stack.
   708  	// The next few lines (after the havem label)
   709  	// will save this SP onto the stack and then write
   710  	// the same SP back to m->sched.sp. That seems redundant,
   711  	// but if an unrecovered panic happens, unwindm will
   712  	// restore the g->sched.sp from the stack location
   713  	// and then systemstack will try to use it. If we don't set it here,
   714  	// that restored SP will be uninitialized (typically 0) and
   715  	// will not be usable.
   716  	MOVQ	m_g0(BP), SI
   717  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   718  
   719  havem:
   720  	// Now there's a valid m, and we're running on its m->g0.
   721  	// Save current m->g0->sched.sp on stack and then set it to SP.
   722  	// Save current sp in m->g0->sched.sp in preparation for
   723  	// switch back to m->curg stack.
   724  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   725  	MOVQ	m_g0(BP), SI
   726  	MOVQ	(g_sched+gobuf_sp)(SI), AX
   727  	MOVQ	AX, 0(SP)
   728  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   729  
   730  	// Switch to m->curg stack and call runtime.cgocallbackg.
   731  	// Because we are taking over the execution of m->curg
   732  	// but *not* resuming what had been running, we need to
   733  	// save that information (m->curg->sched) so we can restore it.
   734  	// We can restore m->curg->sched.sp easily, because calling
   735  	// runtime.cgocallbackg leaves SP unchanged upon return.
   736  	// To save m->curg->sched.pc, we push it onto the stack.
   737  	// This has the added benefit that it looks to the traceback
   738  	// routine like cgocallbackg is going to return to that
   739  	// PC (because the frame we allocate below has the same
   740  	// size as cgocallback_gofunc's frame declared above)
   741  	// so that the traceback will seamlessly trace back into
   742  	// the earlier calls.
   743  	//
   744  	// In the new goroutine, 0(SP) holds the saved R8.
   745  	MOVQ	m_curg(BP), SI
   746  	MOVQ	SI, g(CX)
   747  	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   748  	MOVQ	(g_sched+gobuf_pc)(SI), BP
   749  	MOVQ	BP, -8(DI)
   750  	LEAQ	-(8+8)(DI), SP
   751  	MOVQ	R8, 0(SP)
   752  	CALL	runtime·cgocallbackg(SB)
   753  	MOVQ	0(SP), R8
   754  
   755  	// Restore g->sched (== m->curg->sched) from saved values.
   756  	get_tls(CX)
   757  	MOVQ	g(CX), SI
   758  	MOVQ	8(SP), BP
   759  	MOVQ	BP, (g_sched+gobuf_pc)(SI)
   760  	LEAQ	(8+8)(SP), DI
   761  	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   762  
   763  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   764  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   765  	// so we do not have to restore it.)
   766  	MOVQ	g(CX), BP
   767  	MOVQ	g_m(BP), BP
   768  	MOVQ	m_g0(BP), SI
   769  	MOVQ	SI, g(CX)
   770  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   771  	MOVQ	0(SP), AX
   772  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   773  	
   774  	// If the m on entry was nil, we called needm above to borrow an m
   775  	// for the duration of the call. Since the call is over, return it with dropm.
   776  	CMPQ	R8, $0
   777  	JNE 3(PC)
   778  	MOVQ	$runtime·dropm(SB), AX
   779  	CALL	AX
   780  
   781  	// Done!
   782  	RET
   783  
   784  // void setg(G*); set g. for use by needm.
   785  TEXT runtime·setg(SB), NOSPLIT, $0-8
   786  	MOVQ	gg+0(FP), BX
   787  #ifdef GOOS_windows
   788  	CMPQ	BX, $0
   789  	JNE	settls
   790  	MOVQ	$0, 0x28(GS)
   791  	RET
   792  settls:
   793  	MOVQ	g_m(BX), AX
   794  	LEAQ	m_tls(AX), AX
   795  	MOVQ	AX, 0x28(GS)
   796  #endif
   797  	get_tls(CX)
   798  	MOVQ	BX, g(CX)
   799  	RET
   800  
   801  // void setg_gcc(G*); set g called from gcc.
   802  TEXT setg_gcc<>(SB),NOSPLIT,$0
   803  	get_tls(AX)
   804  	MOVQ	DI, g(AX)
   805  	RET
   806  
   807  // check that SP is in range [g->stack.lo, g->stack.hi)
   808  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   809  	get_tls(CX)
   810  	MOVQ	g(CX), AX
   811  	CMPQ	(g_stack+stack_hi)(AX), SP
   812  	JHI	2(PC)
   813  	INT	$3
   814  	CMPQ	SP, (g_stack+stack_lo)(AX)
   815  	JHI	2(PC)
   816  	INT	$3
   817  	RET
   818  
   819  TEXT runtime·getcallerpc(SB),NOSPLIT,$0-16
   820  	MOVQ	argp+0(FP),AX		// addr of first arg
   821  	MOVQ	-8(AX),AX		// get calling pc
   822  	MOVQ	AX, ret+8(FP)
   823  	RET
   824  
   825  TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-16
   826  	MOVQ	p+0(FP),AX		// addr of first arg
   827  	MOVQ	-8(AX),AX		// get calling pc
   828  	MOVQ	AX,ret+8(FP)
   829  	RET
   830  
   831  TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
   832  	MOVQ	argp+0(FP),AX		// addr of first arg
   833  	MOVQ	pc+8(FP), BX
   834  	MOVQ	BX, -8(AX)		// set calling pc
   835  	RET
   836  
   837  TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
   838  	MOVQ	argp+0(FP), AX
   839  	MOVQ	AX, ret+8(FP)
   840  	RET
   841  
   842  // func gogetcallersp(p unsafe.Pointer) uintptr
   843  TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-16
   844  	MOVQ	p+0(FP),AX		// addr of first arg
   845  	MOVQ	AX, ret+8(FP)
   846  	RET
   847  
   848  // int64 runtime·cputicks(void)
   849  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   850  	RDTSC
   851  	SHLQ	$32, DX
   852  	ADDQ	DX, AX
   853  	MOVQ	AX, ret+0(FP)
   854  	RET
   855  
   856  // memhash_varlen(p unsafe.Pointer, h seed) uintptr
   857  // redirects to memhash(p, h, size) using the size
   858  // stored in the closure.
   859  TEXT runtime·memhash_varlen(SB),NOSPLIT,$32-24
   860  	GO_ARGS
   861  	NO_LOCAL_POINTERS
   862  	MOVQ	p+0(FP), AX
   863  	MOVQ	h+8(FP), BX
   864  	MOVQ	8(DX), CX
   865  	MOVQ	AX, 0(SP)
   866  	MOVQ	BX, 8(SP)
   867  	MOVQ	CX, 16(SP)
   868  	CALL	runtime·memhash(SB)
   869  	MOVQ	24(SP), AX
   870  	MOVQ	AX, ret+16(FP)
   871  	RET
   872  
   873  // hash function using AES hardware instructions
   874  TEXT runtime·aeshash(SB),NOSPLIT,$0-32
   875  	MOVQ	p+0(FP), AX	// ptr to data
   876  	MOVQ	s+16(FP), CX	// size
   877  	LEAQ	ret+24(FP), DX
   878  	JMP	runtime·aeshashbody(SB)
   879  
   880  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24
   881  	MOVQ	p+0(FP), AX	// ptr to string struct
   882  	MOVQ	8(AX), CX	// length of string
   883  	MOVQ	(AX), AX	// string data
   884  	LEAQ	ret+16(FP), DX
   885  	JMP	runtime·aeshashbody(SB)
   886  
   887  // AX: data
   888  // CX: length
   889  // DX: address to put return value
   890  TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   891  	MOVQ	h+8(FP), X6	// seed to low 64 bits of xmm6
   892  	PINSRQ	$1, CX, X6	// size to high 64 bits of xmm6
   893  	PSHUFHW	$0, X6, X6	// replace size with its low 2 bytes repeated 4 times
   894  	MOVO	runtime·aeskeysched(SB), X7
   895  	CMPQ	CX, $16
   896  	JB	aes0to15
   897  	JE	aes16
   898  	CMPQ	CX, $32
   899  	JBE	aes17to32
   900  	CMPQ	CX, $64
   901  	JBE	aes33to64
   902  	CMPQ	CX, $128
   903  	JBE	aes65to128
   904  	JMP	aes129plus
   905  
   906  aes0to15:
   907  	TESTQ	CX, CX
   908  	JE	aes0
   909  
   910  	ADDQ	$16, AX
   911  	TESTW	$0xff0, AX
   912  	JE	endofpage
   913  
   914  	// 16 bytes loaded at this address won't cross
   915  	// a page boundary, so we can load it directly.
   916  	MOVOU	-16(AX), X0
   917  	ADDQ	CX, CX
   918  	MOVQ	$masks<>(SB), BP
   919  	PAND	(BP)(CX*8), X0
   920  
   921  	// scramble 3 times
   922  	AESENC	X6, X0
   923  	AESENC	X7, X0
   924  	AESENC	X7, X0
   925  	MOVQ	X0, (DX)
   926  	RET
   927  
   928  endofpage:
   929  	// address ends in 1111xxxx.  Might be up against
   930  	// a page boundary, so load ending at last byte.
   931  	// Then shift bytes down using pshufb.
   932  	MOVOU	-32(AX)(CX*1), X0
   933  	ADDQ	CX, CX
   934  	MOVQ	$shifts<>(SB), BP
   935  	PSHUFB	(BP)(CX*8), X0
   936  	AESENC	X6, X0
   937  	AESENC	X7, X0
   938  	AESENC	X7, X0
   939  	MOVQ	X0, (DX)
   940  	RET
   941  
   942  aes0:
   943  	// return input seed
   944  	MOVQ	h+8(FP), AX
   945  	MOVQ	AX, (DX)
   946  	RET
   947  
   948  aes16:
   949  	MOVOU	(AX), X0
   950  	AESENC	X6, X0
   951  	AESENC	X7, X0
   952  	AESENC	X7, X0
   953  	MOVQ	X0, (DX)
   954  	RET
   955  
   956  aes17to32:
   957  	// load data to be hashed
   958  	MOVOU	(AX), X0
   959  	MOVOU	-16(AX)(CX*1), X1
   960  
   961  	// scramble 3 times
   962  	AESENC	X6, X0
   963  	AESENC	runtime·aeskeysched+16(SB), X1
   964  	AESENC	X7, X0
   965  	AESENC	X7, X1
   966  	AESENC	X7, X0
   967  	AESENC	X7, X1
   968  
   969  	// combine results
   970  	PXOR	X1, X0
   971  	MOVQ	X0, (DX)
   972  	RET
   973  
   974  aes33to64:
   975  	MOVOU	(AX), X0
   976  	MOVOU	16(AX), X1
   977  	MOVOU	-32(AX)(CX*1), X2
   978  	MOVOU	-16(AX)(CX*1), X3
   979  	
   980  	AESENC	X6, X0
   981  	AESENC	runtime·aeskeysched+16(SB), X1
   982  	AESENC	runtime·aeskeysched+32(SB), X2
   983  	AESENC	runtime·aeskeysched+48(SB), X3
   984  	AESENC	X7, X0
   985  	AESENC	X7, X1
   986  	AESENC	X7, X2
   987  	AESENC	X7, X3
   988  	AESENC	X7, X0
   989  	AESENC	X7, X1
   990  	AESENC	X7, X2
   991  	AESENC	X7, X3
   992  
   993  	PXOR	X2, X0
   994  	PXOR	X3, X1
   995  	PXOR	X1, X0
   996  	MOVQ	X0, (DX)
   997  	RET
   998  
   999  aes65to128:
  1000  	MOVOU	(AX), X0
  1001  	MOVOU	16(AX), X1
  1002  	MOVOU	32(AX), X2
  1003  	MOVOU	48(AX), X3
  1004  	MOVOU	-64(AX)(CX*1), X4
  1005  	MOVOU	-48(AX)(CX*1), X5
  1006  	MOVOU	-32(AX)(CX*1), X8
  1007  	MOVOU	-16(AX)(CX*1), X9
  1008  	
  1009  	AESENC	X6, X0
  1010  	AESENC	runtime·aeskeysched+16(SB), X1
  1011  	AESENC	runtime·aeskeysched+32(SB), X2
  1012  	AESENC	runtime·aeskeysched+48(SB), X3
  1013  	AESENC	runtime·aeskeysched+64(SB), X4
  1014  	AESENC	runtime·aeskeysched+80(SB), X5
  1015  	AESENC	runtime·aeskeysched+96(SB), X8
  1016  	AESENC	runtime·aeskeysched+112(SB), X9
  1017  	AESENC	X7, X0
  1018  	AESENC	X7, X1
  1019  	AESENC	X7, X2
  1020  	AESENC	X7, X3
  1021  	AESENC	X7, X4
  1022  	AESENC	X7, X5
  1023  	AESENC	X7, X8
  1024  	AESENC	X7, X9
  1025  	AESENC	X7, X0
  1026  	AESENC	X7, X1
  1027  	AESENC	X7, X2
  1028  	AESENC	X7, X3
  1029  	AESENC	X7, X4
  1030  	AESENC	X7, X5
  1031  	AESENC	X7, X8
  1032  	AESENC	X7, X9
  1033  
  1034  	PXOR	X4, X0
  1035  	PXOR	X5, X1
  1036  	PXOR	X8, X2
  1037  	PXOR	X9, X3
  1038  	PXOR	X2, X0
  1039  	PXOR	X3, X1
  1040  	PXOR	X1, X0
  1041  	MOVQ	X0, (DX)
  1042  	RET
  1043  
  1044  aes129plus:
  1045  	// start with last (possibly overlapping) block
  1046  	MOVOU	-128(AX)(CX*1), X0
  1047  	MOVOU	-112(AX)(CX*1), X1
  1048  	MOVOU	-96(AX)(CX*1), X2
  1049  	MOVOU	-80(AX)(CX*1), X3
  1050  	MOVOU	-64(AX)(CX*1), X4
  1051  	MOVOU	-48(AX)(CX*1), X5
  1052  	MOVOU	-32(AX)(CX*1), X8
  1053  	MOVOU	-16(AX)(CX*1), X9
  1054  
  1055  	// scramble state once
  1056  	AESENC	X6, X0
  1057  	AESENC	runtime·aeskeysched+16(SB), X1
  1058  	AESENC	runtime·aeskeysched+32(SB), X2
  1059  	AESENC	runtime·aeskeysched+48(SB), X3
  1060  	AESENC	runtime·aeskeysched+64(SB), X4
  1061  	AESENC	runtime·aeskeysched+80(SB), X5
  1062  	AESENC	runtime·aeskeysched+96(SB), X8
  1063  	AESENC	runtime·aeskeysched+112(SB), X9
  1064  
  1065  	// compute number of remaining 128-byte blocks
  1066  	DECQ	CX
  1067  	SHRQ	$7, CX
  1068  	
  1069  aesloop:
  1070  	// scramble state, xor in a block
  1071  	MOVOU	(AX), X10
  1072  	MOVOU	16(AX), X11
  1073  	MOVOU	32(AX), X12
  1074  	MOVOU	48(AX), X13
  1075  	AESENC	X10, X0
  1076  	AESENC	X11, X1
  1077  	AESENC	X12, X2
  1078  	AESENC	X13, X3
  1079  	MOVOU	64(AX), X10
  1080  	MOVOU	80(AX), X11
  1081  	MOVOU	96(AX), X12
  1082  	MOVOU	112(AX), X13
  1083  	AESENC	X10, X4
  1084  	AESENC	X11, X5
  1085  	AESENC	X12, X8
  1086  	AESENC	X13, X9
  1087  
  1088  	// scramble state
  1089  	AESENC	X7, X0
  1090  	AESENC	X7, X1
  1091  	AESENC	X7, X2
  1092  	AESENC	X7, X3
  1093  	AESENC	X7, X4
  1094  	AESENC	X7, X5
  1095  	AESENC	X7, X8
  1096  	AESENC	X7, X9
  1097  
  1098  	ADDQ	$128, AX
  1099  	DECQ	CX
  1100  	JNE	aesloop
  1101  
  1102  	// 2 more scrambles to finish
  1103  	AESENC	X7, X0
  1104  	AESENC	X7, X1
  1105  	AESENC	X7, X2
  1106  	AESENC	X7, X3
  1107  	AESENC	X7, X4
  1108  	AESENC	X7, X5
  1109  	AESENC	X7, X8
  1110  	AESENC	X7, X9
  1111  	AESENC	X7, X0
  1112  	AESENC	X7, X1
  1113  	AESENC	X7, X2
  1114  	AESENC	X7, X3
  1115  	AESENC	X7, X4
  1116  	AESENC	X7, X5
  1117  	AESENC	X7, X8
  1118  	AESENC	X7, X9
  1119  
  1120  	PXOR	X4, X0
  1121  	PXOR	X5, X1
  1122  	PXOR	X8, X2
  1123  	PXOR	X9, X3
  1124  	PXOR	X2, X0
  1125  	PXOR	X3, X1
  1126  	PXOR	X1, X0
  1127  	MOVQ	X0, (DX)
  1128  	RET
  1129  	
  1130  TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
  1131  	MOVQ	p+0(FP), AX	// ptr to data
  1132  	MOVQ	h+8(FP), X0	// seed
  1133  	PINSRD	$2, (AX), X0	// data
  1134  	AESENC	runtime·aeskeysched+0(SB), X0
  1135  	AESENC	runtime·aeskeysched+16(SB), X0
  1136  	AESENC	runtime·aeskeysched+32(SB), X0
  1137  	MOVQ	X0, ret+16(FP)
  1138  	RET
  1139  
  1140  TEXT runtime·aeshash64(SB),NOSPLIT,$0-24
  1141  	MOVQ	p+0(FP), AX	// ptr to data
  1142  	MOVQ	h+8(FP), X0	// seed
  1143  	PINSRQ	$1, (AX), X0	// data
  1144  	AESENC	runtime·aeskeysched+0(SB), X0
  1145  	AESENC	runtime·aeskeysched+16(SB), X0
  1146  	AESENC	runtime·aeskeysched+32(SB), X0
  1147  	MOVQ	X0, ret+16(FP)
  1148  	RET
  1149  
  1150  // simple mask to get rid of data in the high part of the register.
  1151  DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1152  DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1153  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1154  DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1155  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1156  DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1157  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1158  DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1159  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1160  DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1161  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1162  DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1163  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1164  DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1165  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1166  DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1167  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1168  DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1169  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1170  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1171  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1172  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1173  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1174  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1175  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1176  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1177  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1178  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1179  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1180  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1181  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1182  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1183  GLOBL masks<>(SB),RODATA,$256
  1184  
  1185  // these are arguments to pshufb.  They move data down from
  1186  // the high bytes of the register to the low bytes of the register.
  1187  // index is how many bytes to move.
  1188  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1189  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1190  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1191  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1192  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1193  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1194  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1195  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1196  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1197  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1198  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1199  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1200  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1201  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1202  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1203  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1204  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1205  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1206  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1207  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1208  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1209  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1210  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1211  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1212  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1213  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1214  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1215  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1216  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1217  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1218  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1219  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1220  GLOBL shifts<>(SB),RODATA,$256
  1221  
  1222  TEXT runtime·memeq(SB),NOSPLIT,$0-25
  1223  	MOVQ	a+0(FP), SI
  1224  	MOVQ	b+8(FP), DI
  1225  	MOVQ	size+16(FP), BX
  1226  	CALL	runtime·memeqbody(SB)
  1227  	MOVB	AX, ret+24(FP)
  1228  	RET
  1229  
  1230  // memequal_varlen(a, b unsafe.Pointer) bool
  1231  TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17
  1232  	MOVQ	a+0(FP), SI
  1233  	MOVQ	b+8(FP), DI
  1234  	CMPQ	SI, DI
  1235  	JEQ	eq
  1236  	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
  1237  	CALL	runtime·memeqbody(SB)
  1238  	MOVB	AX, ret+16(FP)
  1239  	RET
  1240  eq:
  1241  	MOVB	$1, ret+16(FP)
  1242  	RET
  1243  
  1244  // eqstring tests whether two strings are equal.
  1245  // See runtime_test.go:eqstring_generic for
  1246  // equivalent Go code.
  1247  TEXT runtime·eqstring(SB),NOSPLIT,$0-33
  1248  	MOVQ	s1len+8(FP), AX
  1249  	MOVQ	s2len+24(FP), BX
  1250  	CMPQ	AX, BX
  1251  	JNE	noteq
  1252  	MOVQ	s1str+0(FP), SI
  1253  	MOVQ	s2str+16(FP), DI
  1254  	CMPQ	SI, DI
  1255  	JEQ	eq
  1256  	CALL	runtime·memeqbody(SB)
  1257  	MOVB	AX, v+32(FP)
  1258  	RET
  1259  eq:
  1260  	MOVB	$1, v+32(FP)
  1261  	RET
  1262  noteq:
  1263  	MOVB	$0, v+32(FP)
  1264  	RET
  1265  
  1266  // a in SI
  1267  // b in DI
  1268  // count in BX
  1269  TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1270  	XORQ	AX, AX
  1271  
  1272  	CMPQ	BX, $8
  1273  	JB	small
  1274  	
  1275  	// 64 bytes at a time using xmm registers
  1276  hugeloop:
  1277  	CMPQ	BX, $64
  1278  	JB	bigloop
  1279  	MOVOU	(SI), X0
  1280  	MOVOU	(DI), X1
  1281  	MOVOU	16(SI), X2
  1282  	MOVOU	16(DI), X3
  1283  	MOVOU	32(SI), X4
  1284  	MOVOU	32(DI), X5
  1285  	MOVOU	48(SI), X6
  1286  	MOVOU	48(DI), X7
  1287  	PCMPEQB	X1, X0
  1288  	PCMPEQB	X3, X2
  1289  	PCMPEQB	X5, X4
  1290  	PCMPEQB	X7, X6
  1291  	PAND	X2, X0
  1292  	PAND	X6, X4
  1293  	PAND	X4, X0
  1294  	PMOVMSKB X0, DX
  1295  	ADDQ	$64, SI
  1296  	ADDQ	$64, DI
  1297  	SUBQ	$64, BX
  1298  	CMPL	DX, $0xffff
  1299  	JEQ	hugeloop
  1300  	RET
  1301  
  1302  	// 8 bytes at a time using 64-bit register
  1303  bigloop:
  1304  	CMPQ	BX, $8
  1305  	JBE	leftover
  1306  	MOVQ	(SI), CX
  1307  	MOVQ	(DI), DX
  1308  	ADDQ	$8, SI
  1309  	ADDQ	$8, DI
  1310  	SUBQ	$8, BX
  1311  	CMPQ	CX, DX
  1312  	JEQ	bigloop
  1313  	RET
  1314  
  1315  	// remaining 0-8 bytes
  1316  leftover:
  1317  	MOVQ	-8(SI)(BX*1), CX
  1318  	MOVQ	-8(DI)(BX*1), DX
  1319  	CMPQ	CX, DX
  1320  	SETEQ	AX
  1321  	RET
  1322  
  1323  small:
  1324  	CMPQ	BX, $0
  1325  	JEQ	equal
  1326  
  1327  	LEAQ	0(BX*8), CX
  1328  	NEGQ	CX
  1329  
  1330  	CMPB	SI, $0xf8
  1331  	JA	si_high
  1332  
  1333  	// load at SI won't cross a page boundary.
  1334  	MOVQ	(SI), SI
  1335  	JMP	si_finish
  1336  si_high:
  1337  	// address ends in 11111xxx.  Load up to bytes we want, move to correct position.
  1338  	MOVQ	-8(SI)(BX*1), SI
  1339  	SHRQ	CX, SI
  1340  si_finish:
  1341  
  1342  	// same for DI.
  1343  	CMPB	DI, $0xf8
  1344  	JA	di_high
  1345  	MOVQ	(DI), DI
  1346  	JMP	di_finish
  1347  di_high:
  1348  	MOVQ	-8(DI)(BX*1), DI
  1349  	SHRQ	CX, DI
  1350  di_finish:
  1351  
  1352  	SUBQ	SI, DI
  1353  	SHLQ	CX, DI
  1354  equal:
  1355  	SETEQ	AX
  1356  	RET
  1357  
  1358  TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
  1359  	MOVQ	s1_base+0(FP), SI
  1360  	MOVQ	s1_len+8(FP), BX
  1361  	MOVQ	s2_base+16(FP), DI
  1362  	MOVQ	s2_len+24(FP), DX
  1363  	CALL	runtime·cmpbody(SB)
  1364  	MOVQ	AX, ret+32(FP)
  1365  	RET
  1366  
  1367  TEXT bytes·Compare(SB),NOSPLIT,$0-56
  1368  	MOVQ	s1+0(FP), SI
  1369  	MOVQ	s1+8(FP), BX
  1370  	MOVQ	s2+24(FP), DI
  1371  	MOVQ	s2+32(FP), DX
  1372  	CALL	runtime·cmpbody(SB)
  1373  	MOVQ	AX, res+48(FP)
  1374  	RET
  1375  
  1376  // input:
  1377  //   SI = a
  1378  //   DI = b
  1379  //   BX = alen
  1380  //   DX = blen
  1381  // output:
  1382  //   AX = 1/0/-1
  1383  TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1384  	CMPQ	SI, DI
  1385  	JEQ	allsame
  1386  	CMPQ	BX, DX
  1387  	MOVQ	DX, BP
  1388  	CMOVQLT	BX, BP // BP = min(alen, blen) = # of bytes to compare
  1389  	CMPQ	BP, $8
  1390  	JB	small
  1391  
  1392  loop:
  1393  	CMPQ	BP, $16
  1394  	JBE	_0through16
  1395  	MOVOU	(SI), X0
  1396  	MOVOU	(DI), X1
  1397  	PCMPEQB X0, X1
  1398  	PMOVMSKB X1, AX
  1399  	XORQ	$0xffff, AX	// convert EQ to NE
  1400  	JNE	diff16	// branch if at least one byte is not equal
  1401  	ADDQ	$16, SI
  1402  	ADDQ	$16, DI
  1403  	SUBQ	$16, BP
  1404  	JMP	loop
  1405  	
  1406  	// AX = bit mask of differences
  1407  diff16:
  1408  	BSFQ	AX, BX	// index of first byte that differs
  1409  	XORQ	AX, AX
  1410  	MOVB	(SI)(BX*1), CX
  1411  	CMPB	CX, (DI)(BX*1)
  1412  	SETHI	AX
  1413  	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
  1414  	RET
  1415  
  1416  	// 0 through 16 bytes left, alen>=8, blen>=8
  1417  _0through16:
  1418  	CMPQ	BP, $8
  1419  	JBE	_0through8
  1420  	MOVQ	(SI), AX
  1421  	MOVQ	(DI), CX
  1422  	CMPQ	AX, CX
  1423  	JNE	diff8
  1424  _0through8:
  1425  	MOVQ	-8(SI)(BP*1), AX
  1426  	MOVQ	-8(DI)(BP*1), CX
  1427  	CMPQ	AX, CX
  1428  	JEQ	allsame
  1429  
  1430  	// AX and CX contain parts of a and b that differ.
  1431  diff8:
  1432  	BSWAPQ	AX	// reverse order of bytes
  1433  	BSWAPQ	CX
  1434  	XORQ	AX, CX
  1435  	BSRQ	CX, CX	// index of highest bit difference
  1436  	SHRQ	CX, AX	// move a's bit to bottom
  1437  	ANDQ	$1, AX	// mask bit
  1438  	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
  1439  	RET
  1440  
  1441  	// 0-7 bytes in common
  1442  small:
  1443  	LEAQ	(BP*8), CX	// bytes left -> bits left
  1444  	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
  1445  	JEQ	allsame
  1446  
  1447  	// load bytes of a into high bytes of AX
  1448  	CMPB	SI, $0xf8
  1449  	JA	si_high
  1450  	MOVQ	(SI), SI
  1451  	JMP	si_finish
  1452  si_high:
  1453  	MOVQ	-8(SI)(BP*1), SI
  1454  	SHRQ	CX, SI
  1455  si_finish:
  1456  	SHLQ	CX, SI
  1457  
  1458  	// load bytes of b in to high bytes of BX
  1459  	CMPB	DI, $0xf8
  1460  	JA	di_high
  1461  	MOVQ	(DI), DI
  1462  	JMP	di_finish
  1463  di_high:
  1464  	MOVQ	-8(DI)(BP*1), DI
  1465  	SHRQ	CX, DI
  1466  di_finish:
  1467  	SHLQ	CX, DI
  1468  
  1469  	BSWAPQ	SI	// reverse order of bytes
  1470  	BSWAPQ	DI
  1471  	XORQ	SI, DI	// find bit differences
  1472  	JEQ	allsame
  1473  	BSRQ	DI, CX	// index of highest bit difference
  1474  	SHRQ	CX, SI	// move a's bit to bottom
  1475  	ANDQ	$1, SI	// mask bit
  1476  	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
  1477  	RET
  1478  
  1479  allsame:
  1480  	XORQ	AX, AX
  1481  	XORQ	CX, CX
  1482  	CMPQ	BX, DX
  1483  	SETGT	AX	// 1 if alen > blen
  1484  	SETEQ	CX	// 1 if alen == blen
  1485  	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
  1486  	RET
  1487  
  1488  TEXT bytes·IndexByte(SB),NOSPLIT,$0
  1489  	MOVQ s+0(FP), SI
  1490  	MOVQ s_len+8(FP), BX
  1491  	MOVB c+24(FP), AL
  1492  	CALL runtime·indexbytebody(SB)
  1493  	MOVQ AX, ret+32(FP)
  1494  	RET
  1495  
  1496  TEXT strings·IndexByte(SB),NOSPLIT,$0
  1497  	MOVQ s+0(FP), SI
  1498  	MOVQ s_len+8(FP), BX
  1499  	MOVB c+16(FP), AL
  1500  	CALL runtime·indexbytebody(SB)
  1501  	MOVQ AX, ret+24(FP)
  1502  	RET
  1503  
  1504  // input:
  1505  //   SI: data
  1506  //   BX: data len
  1507  //   AL: byte sought
  1508  // output:
  1509  //   AX
  1510  TEXT runtime·indexbytebody(SB),NOSPLIT,$0
  1511  	MOVQ SI, DI
  1512  
  1513  	CMPQ BX, $16
  1514  	JLT small
  1515  
  1516  	// round up to first 16-byte boundary
  1517  	TESTQ $15, SI
  1518  	JZ aligned
  1519  	MOVQ SI, CX
  1520  	ANDQ $~15, CX
  1521  	ADDQ $16, CX
  1522  
  1523  	// search the beginning
  1524  	SUBQ SI, CX
  1525  	REPN; SCASB
  1526  	JZ success
  1527  
  1528  // DI is 16-byte aligned; get ready to search using SSE instructions
  1529  aligned:
  1530  	// round down to last 16-byte boundary
  1531  	MOVQ BX, R11
  1532  	ADDQ SI, R11
  1533  	ANDQ $~15, R11
  1534  
  1535  	// shuffle X0 around so that each byte contains c
  1536  	MOVD AX, X0
  1537  	PUNPCKLBW X0, X0
  1538  	PUNPCKLBW X0, X0
  1539  	PSHUFL $0, X0, X0
  1540  	JMP condition
  1541  
  1542  sse:
  1543  	// move the next 16-byte chunk of the buffer into X1
  1544  	MOVO (DI), X1
  1545  	// compare bytes in X0 to X1
  1546  	PCMPEQB X0, X1
  1547  	// take the top bit of each byte in X1 and put the result in DX
  1548  	PMOVMSKB X1, DX
  1549  	TESTL DX, DX
  1550  	JNZ ssesuccess
  1551  	ADDQ $16, DI
  1552  
  1553  condition:
  1554  	CMPQ DI, R11
  1555  	JLT sse
  1556  
  1557  	// search the end
  1558  	MOVQ SI, CX
  1559  	ADDQ BX, CX
  1560  	SUBQ R11, CX
  1561  	// if CX == 0, the zero flag will be set and we'll end up
  1562  	// returning a false success
  1563  	JZ failure
  1564  	REPN; SCASB
  1565  	JZ success
  1566  
  1567  failure:
  1568  	MOVQ $-1, AX
  1569  	RET
  1570  
  1571  // handle for lengths < 16
  1572  small:
  1573  	MOVQ BX, CX
  1574  	REPN; SCASB
  1575  	JZ success
  1576  	MOVQ $-1, AX
  1577  	RET
  1578  
  1579  // we've found the chunk containing the byte
  1580  // now just figure out which specific byte it is
  1581  ssesuccess:
  1582  	// get the index of the least significant set bit
  1583  	BSFW DX, DX
  1584  	SUBQ SI, DI
  1585  	ADDQ DI, DX
  1586  	MOVQ DX, AX
  1587  	RET
  1588  
  1589  success:
  1590  	SUBQ SI, DI
  1591  	SUBL $1, DI
  1592  	MOVQ DI, AX
  1593  	RET
  1594  
  1595  TEXT bytes·Equal(SB),NOSPLIT,$0-49
  1596  	MOVQ	a_len+8(FP), BX
  1597  	MOVQ	b_len+32(FP), CX
  1598  	XORQ	AX, AX
  1599  	CMPQ	BX, CX
  1600  	JNE	eqret
  1601  	MOVQ	a+0(FP), SI
  1602  	MOVQ	b+24(FP), DI
  1603  	CALL	runtime·memeqbody(SB)
  1604  eqret:
  1605  	MOVB	AX, ret+48(FP)
  1606  	RET
  1607  
  1608  // A Duff's device for zeroing memory.
  1609  // The compiler jumps to computed addresses within
  1610  // this routine to zero chunks of memory.  Do not
  1611  // change this code without also changing the code
  1612  // in ../../cmd/6g/ggen.c:clearfat.
  1613  // AX: zero
  1614  // DI: ptr to memory to be zeroed
  1615  // DI is updated as a side effect.
  1616  TEXT runtime·duffzero(SB), NOSPLIT, $0-0
  1617  	STOSQ
  1618  	STOSQ
  1619  	STOSQ
  1620  	STOSQ
  1621  	STOSQ
  1622  	STOSQ
  1623  	STOSQ
  1624  	STOSQ
  1625  	STOSQ
  1626  	STOSQ
  1627  	STOSQ
  1628  	STOSQ
  1629  	STOSQ
  1630  	STOSQ
  1631  	STOSQ
  1632  	STOSQ
  1633  	STOSQ
  1634  	STOSQ
  1635  	STOSQ
  1636  	STOSQ
  1637  	STOSQ
  1638  	STOSQ
  1639  	STOSQ
  1640  	STOSQ
  1641  	STOSQ
  1642  	STOSQ
  1643  	STOSQ
  1644  	STOSQ
  1645  	STOSQ
  1646  	STOSQ
  1647  	STOSQ
  1648  	STOSQ
  1649  	STOSQ
  1650  	STOSQ
  1651  	STOSQ
  1652  	STOSQ
  1653  	STOSQ
  1654  	STOSQ
  1655  	STOSQ
  1656  	STOSQ
  1657  	STOSQ
  1658  	STOSQ
  1659  	STOSQ
  1660  	STOSQ
  1661  	STOSQ
  1662  	STOSQ
  1663  	STOSQ
  1664  	STOSQ
  1665  	STOSQ
  1666  	STOSQ
  1667  	STOSQ
  1668  	STOSQ
  1669  	STOSQ
  1670  	STOSQ
  1671  	STOSQ
  1672  	STOSQ
  1673  	STOSQ
  1674  	STOSQ
  1675  	STOSQ
  1676  	STOSQ
  1677  	STOSQ
  1678  	STOSQ
  1679  	STOSQ
  1680  	STOSQ
  1681  	STOSQ
  1682  	STOSQ
  1683  	STOSQ
  1684  	STOSQ
  1685  	STOSQ
  1686  	STOSQ
  1687  	STOSQ
  1688  	STOSQ
  1689  	STOSQ
  1690  	STOSQ
  1691  	STOSQ
  1692  	STOSQ
  1693  	STOSQ
  1694  	STOSQ
  1695  	STOSQ
  1696  	STOSQ
  1697  	STOSQ
  1698  	STOSQ
  1699  	STOSQ
  1700  	STOSQ
  1701  	STOSQ
  1702  	STOSQ
  1703  	STOSQ
  1704  	STOSQ
  1705  	STOSQ
  1706  	STOSQ
  1707  	STOSQ
  1708  	STOSQ
  1709  	STOSQ
  1710  	STOSQ
  1711  	STOSQ
  1712  	STOSQ
  1713  	STOSQ
  1714  	STOSQ
  1715  	STOSQ
  1716  	STOSQ
  1717  	STOSQ
  1718  	STOSQ
  1719  	STOSQ
  1720  	STOSQ
  1721  	STOSQ
  1722  	STOSQ
  1723  	STOSQ
  1724  	STOSQ
  1725  	STOSQ
  1726  	STOSQ
  1727  	STOSQ
  1728  	STOSQ
  1729  	STOSQ
  1730  	STOSQ
  1731  	STOSQ
  1732  	STOSQ
  1733  	STOSQ
  1734  	STOSQ
  1735  	STOSQ
  1736  	STOSQ
  1737  	STOSQ
  1738  	STOSQ
  1739  	STOSQ
  1740  	STOSQ
  1741  	STOSQ
  1742  	STOSQ
  1743  	STOSQ
  1744  	STOSQ
  1745  	RET
  1746  
  1747  // A Duff's device for copying memory.
  1748  // The compiler jumps to computed addresses within
  1749  // this routine to copy chunks of memory.  Source
  1750  // and destination must not overlap.  Do not
  1751  // change this code without also changing the code
  1752  // in ../../cmd/6g/cgen.c:sgen.
  1753  // SI: ptr to source memory
  1754  // DI: ptr to destination memory
  1755  // SI and DI are updated as a side effect.
  1756  
  1757  // NOTE: this is equivalent to a sequence of MOVSQ but
  1758  // for some reason that is 3.5x slower than this code.
  1759  // The STOSQ above seem fine, though.
  1760  TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
  1761  	MOVQ	(SI),CX
  1762  	ADDQ	$8,SI
  1763  	MOVQ	CX,(DI)
  1764  	ADDQ	$8,DI
  1765  
  1766  	MOVQ	(SI),CX
  1767  	ADDQ	$8,SI
  1768  	MOVQ	CX,(DI)
  1769  	ADDQ	$8,DI
  1770  
  1771  	MOVQ	(SI),CX
  1772  	ADDQ	$8,SI
  1773  	MOVQ	CX,(DI)
  1774  	ADDQ	$8,DI
  1775  
  1776  	MOVQ	(SI),CX
  1777  	ADDQ	$8,SI
  1778  	MOVQ	CX,(DI)
  1779  	ADDQ	$8,DI
  1780  
  1781  	MOVQ	(SI),CX
  1782  	ADDQ	$8,SI
  1783  	MOVQ	CX,(DI)
  1784  	ADDQ	$8,DI
  1785  
  1786  	MOVQ	(SI),CX
  1787  	ADDQ	$8,SI
  1788  	MOVQ	CX,(DI)
  1789  	ADDQ	$8,DI
  1790  
  1791  	MOVQ	(SI),CX
  1792  	ADDQ	$8,SI
  1793  	MOVQ	CX,(DI)
  1794  	ADDQ	$8,DI
  1795  
  1796  	MOVQ	(SI),CX
  1797  	ADDQ	$8,SI
  1798  	MOVQ	CX,(DI)
  1799  	ADDQ	$8,DI
  1800  
  1801  	MOVQ	(SI),CX
  1802  	ADDQ	$8,SI
  1803  	MOVQ	CX,(DI)
  1804  	ADDQ	$8,DI
  1805  
  1806  	MOVQ	(SI),CX
  1807  	ADDQ	$8,SI
  1808  	MOVQ	CX,(DI)
  1809  	ADDQ	$8,DI
  1810  
  1811  	MOVQ	(SI),CX
  1812  	ADDQ	$8,SI
  1813  	MOVQ	CX,(DI)
  1814  	ADDQ	$8,DI
  1815  
  1816  	MOVQ	(SI),CX
  1817  	ADDQ	$8,SI
  1818  	MOVQ	CX,(DI)
  1819  	ADDQ	$8,DI
  1820  
  1821  	MOVQ	(SI),CX
  1822  	ADDQ	$8,SI
  1823  	MOVQ	CX,(DI)
  1824  	ADDQ	$8,DI
  1825  
  1826  	MOVQ	(SI),CX
  1827  	ADDQ	$8,SI
  1828  	MOVQ	CX,(DI)
  1829  	ADDQ	$8,DI
  1830  
  1831  	MOVQ	(SI),CX
  1832  	ADDQ	$8,SI
  1833  	MOVQ	CX,(DI)
  1834  	ADDQ	$8,DI
  1835  
  1836  	MOVQ	(SI),CX
  1837  	ADDQ	$8,SI
  1838  	MOVQ	CX,(DI)
  1839  	ADDQ	$8,DI
  1840  
  1841  	MOVQ	(SI),CX
  1842  	ADDQ	$8,SI
  1843  	MOVQ	CX,(DI)
  1844  	ADDQ	$8,DI
  1845  
  1846  	MOVQ	(SI),CX
  1847  	ADDQ	$8,SI
  1848  	MOVQ	CX,(DI)
  1849  	ADDQ	$8,DI
  1850  
  1851  	MOVQ	(SI),CX
  1852  	ADDQ	$8,SI
  1853  	MOVQ	CX,(DI)
  1854  	ADDQ	$8,DI
  1855  
  1856  	MOVQ	(SI),CX
  1857  	ADDQ	$8,SI
  1858  	MOVQ	CX,(DI)
  1859  	ADDQ	$8,DI
  1860  
  1861  	MOVQ	(SI),CX
  1862  	ADDQ	$8,SI
  1863  	MOVQ	CX,(DI)
  1864  	ADDQ	$8,DI
  1865  
  1866  	MOVQ	(SI),CX
  1867  	ADDQ	$8,SI
  1868  	MOVQ	CX,(DI)
  1869  	ADDQ	$8,DI
  1870  
  1871  	MOVQ	(SI),CX
  1872  	ADDQ	$8,SI
  1873  	MOVQ	CX,(DI)
  1874  	ADDQ	$8,DI
  1875  
  1876  	MOVQ	(SI),CX
  1877  	ADDQ	$8,SI
  1878  	MOVQ	CX,(DI)
  1879  	ADDQ	$8,DI
  1880  
  1881  	MOVQ	(SI),CX
  1882  	ADDQ	$8,SI
  1883  	MOVQ	CX,(DI)
  1884  	ADDQ	$8,DI
  1885  
  1886  	MOVQ	(SI),CX
  1887  	ADDQ	$8,SI
  1888  	MOVQ	CX,(DI)
  1889  	ADDQ	$8,DI
  1890  
  1891  	MOVQ	(SI),CX
  1892  	ADDQ	$8,SI
  1893  	MOVQ	CX,(DI)
  1894  	ADDQ	$8,DI
  1895  
  1896  	MOVQ	(SI),CX
  1897  	ADDQ	$8,SI
  1898  	MOVQ	CX,(DI)
  1899  	ADDQ	$8,DI
  1900  
  1901  	MOVQ	(SI),CX
  1902  	ADDQ	$8,SI
  1903  	MOVQ	CX,(DI)
  1904  	ADDQ	$8,DI
  1905  
  1906  	MOVQ	(SI),CX
  1907  	ADDQ	$8,SI
  1908  	MOVQ	CX,(DI)
  1909  	ADDQ	$8,DI
  1910  
  1911  	MOVQ	(SI),CX
  1912  	ADDQ	$8,SI
  1913  	MOVQ	CX,(DI)
  1914  	ADDQ	$8,DI
  1915  
  1916  	MOVQ	(SI),CX
  1917  	ADDQ	$8,SI
  1918  	MOVQ	CX,(DI)
  1919  	ADDQ	$8,DI
  1920  
  1921  	MOVQ	(SI),CX
  1922  	ADDQ	$8,SI
  1923  	MOVQ	CX,(DI)
  1924  	ADDQ	$8,DI
  1925  
  1926  	MOVQ	(SI),CX
  1927  	ADDQ	$8,SI
  1928  	MOVQ	CX,(DI)
  1929  	ADDQ	$8,DI
  1930  
  1931  	MOVQ	(SI),CX
  1932  	ADDQ	$8,SI
  1933  	MOVQ	CX,(DI)
  1934  	ADDQ	$8,DI
  1935  
  1936  	MOVQ	(SI),CX
  1937  	ADDQ	$8,SI
  1938  	MOVQ	CX,(DI)
  1939  	ADDQ	$8,DI
  1940  
  1941  	MOVQ	(SI),CX
  1942  	ADDQ	$8,SI
  1943  	MOVQ	CX,(DI)
  1944  	ADDQ	$8,DI
  1945  
  1946  	MOVQ	(SI),CX
  1947  	ADDQ	$8,SI
  1948  	MOVQ	CX,(DI)
  1949  	ADDQ	$8,DI
  1950  
  1951  	MOVQ	(SI),CX
  1952  	ADDQ	$8,SI
  1953  	MOVQ	CX,(DI)
  1954  	ADDQ	$8,DI
  1955  
  1956  	MOVQ	(SI),CX
  1957  	ADDQ	$8,SI
  1958  	MOVQ	CX,(DI)
  1959  	ADDQ	$8,DI
  1960  
  1961  	MOVQ	(SI),CX
  1962  	ADDQ	$8,SI
  1963  	MOVQ	CX,(DI)
  1964  	ADDQ	$8,DI
  1965  
  1966  	MOVQ	(SI),CX
  1967  	ADDQ	$8,SI
  1968  	MOVQ	CX,(DI)
  1969  	ADDQ	$8,DI
  1970  
  1971  	MOVQ	(SI),CX
  1972  	ADDQ	$8,SI
  1973  	MOVQ	CX,(DI)
  1974  	ADDQ	$8,DI
  1975  
  1976  	MOVQ	(SI),CX
  1977  	ADDQ	$8,SI
  1978  	MOVQ	CX,(DI)
  1979  	ADDQ	$8,DI
  1980  
  1981  	MOVQ	(SI),CX
  1982  	ADDQ	$8,SI
  1983  	MOVQ	CX,(DI)
  1984  	ADDQ	$8,DI
  1985  
  1986  	MOVQ	(SI),CX
  1987  	ADDQ	$8,SI
  1988  	MOVQ	CX,(DI)
  1989  	ADDQ	$8,DI
  1990  
  1991  	MOVQ	(SI),CX
  1992  	ADDQ	$8,SI
  1993  	MOVQ	CX,(DI)
  1994  	ADDQ	$8,DI
  1995  
  1996  	MOVQ	(SI),CX
  1997  	ADDQ	$8,SI
  1998  	MOVQ	CX,(DI)
  1999  	ADDQ	$8,DI
  2000  
  2001  	MOVQ	(SI),CX
  2002  	ADDQ	$8,SI
  2003  	MOVQ	CX,(DI)
  2004  	ADDQ	$8,DI
  2005  
  2006  	MOVQ	(SI),CX
  2007  	ADDQ	$8,SI
  2008  	MOVQ	CX,(DI)
  2009  	ADDQ	$8,DI
  2010  
  2011  	MOVQ	(SI),CX
  2012  	ADDQ	$8,SI
  2013  	MOVQ	CX,(DI)
  2014  	ADDQ	$8,DI
  2015  
  2016  	MOVQ	(SI),CX
  2017  	ADDQ	$8,SI
  2018  	MOVQ	CX,(DI)
  2019  	ADDQ	$8,DI
  2020  
  2021  	MOVQ	(SI),CX
  2022  	ADDQ	$8,SI
  2023  	MOVQ	CX,(DI)
  2024  	ADDQ	$8,DI
  2025  
  2026  	MOVQ	(SI),CX
  2027  	ADDQ	$8,SI
  2028  	MOVQ	CX,(DI)
  2029  	ADDQ	$8,DI
  2030  
  2031  	MOVQ	(SI),CX
  2032  	ADDQ	$8,SI
  2033  	MOVQ	CX,(DI)
  2034  	ADDQ	$8,DI
  2035  
  2036  	MOVQ	(SI),CX
  2037  	ADDQ	$8,SI
  2038  	MOVQ	CX,(DI)
  2039  	ADDQ	$8,DI
  2040  
  2041  	MOVQ	(SI),CX
  2042  	ADDQ	$8,SI
  2043  	MOVQ	CX,(DI)
  2044  	ADDQ	$8,DI
  2045  
  2046  	MOVQ	(SI),CX
  2047  	ADDQ	$8,SI
  2048  	MOVQ	CX,(DI)
  2049  	ADDQ	$8,DI
  2050  
  2051  	MOVQ	(SI),CX
  2052  	ADDQ	$8,SI
  2053  	MOVQ	CX,(DI)
  2054  	ADDQ	$8,DI
  2055  
  2056  	MOVQ	(SI),CX
  2057  	ADDQ	$8,SI
  2058  	MOVQ	CX,(DI)
  2059  	ADDQ	$8,DI
  2060  
  2061  	MOVQ	(SI),CX
  2062  	ADDQ	$8,SI
  2063  	MOVQ	CX,(DI)
  2064  	ADDQ	$8,DI
  2065  
  2066  	MOVQ	(SI),CX
  2067  	ADDQ	$8,SI
  2068  	MOVQ	CX,(DI)
  2069  	ADDQ	$8,DI
  2070  
  2071  	MOVQ	(SI),CX
  2072  	ADDQ	$8,SI
  2073  	MOVQ	CX,(DI)
  2074  	ADDQ	$8,DI
  2075  
  2076  	MOVQ	(SI),CX
  2077  	ADDQ	$8,SI
  2078  	MOVQ	CX,(DI)
  2079  	ADDQ	$8,DI
  2080  
  2081  	MOVQ	(SI),CX
  2082  	ADDQ	$8,SI
  2083  	MOVQ	CX,(DI)
  2084  	ADDQ	$8,DI
  2085  
  2086  	MOVQ	(SI),CX
  2087  	ADDQ	$8,SI
  2088  	MOVQ	CX,(DI)
  2089  	ADDQ	$8,DI
  2090  
  2091  	MOVQ	(SI),CX
  2092  	ADDQ	$8,SI
  2093  	MOVQ	CX,(DI)
  2094  	ADDQ	$8,DI
  2095  
  2096  	MOVQ	(SI),CX
  2097  	ADDQ	$8,SI
  2098  	MOVQ	CX,(DI)
  2099  	ADDQ	$8,DI
  2100  
  2101  	MOVQ	(SI),CX
  2102  	ADDQ	$8,SI
  2103  	MOVQ	CX,(DI)
  2104  	ADDQ	$8,DI
  2105  
  2106  	MOVQ	(SI),CX
  2107  	ADDQ	$8,SI
  2108  	MOVQ	CX,(DI)
  2109  	ADDQ	$8,DI
  2110  
  2111  	MOVQ	(SI),CX
  2112  	ADDQ	$8,SI
  2113  	MOVQ	CX,(DI)
  2114  	ADDQ	$8,DI
  2115  
  2116  	MOVQ	(SI),CX
  2117  	ADDQ	$8,SI
  2118  	MOVQ	CX,(DI)
  2119  	ADDQ	$8,DI
  2120  
  2121  	MOVQ	(SI),CX
  2122  	ADDQ	$8,SI
  2123  	MOVQ	CX,(DI)
  2124  	ADDQ	$8,DI
  2125  
  2126  	MOVQ	(SI),CX
  2127  	ADDQ	$8,SI
  2128  	MOVQ	CX,(DI)
  2129  	ADDQ	$8,DI
  2130  
  2131  	MOVQ	(SI),CX
  2132  	ADDQ	$8,SI
  2133  	MOVQ	CX,(DI)
  2134  	ADDQ	$8,DI
  2135  
  2136  	MOVQ	(SI),CX
  2137  	ADDQ	$8,SI
  2138  	MOVQ	CX,(DI)
  2139  	ADDQ	$8,DI
  2140  
  2141  	MOVQ	(SI),CX
  2142  	ADDQ	$8,SI
  2143  	MOVQ	CX,(DI)
  2144  	ADDQ	$8,DI
  2145  
  2146  	MOVQ	(SI),CX
  2147  	ADDQ	$8,SI
  2148  	MOVQ	CX,(DI)
  2149  	ADDQ	$8,DI
  2150  
  2151  	MOVQ	(SI),CX
  2152  	ADDQ	$8,SI
  2153  	MOVQ	CX,(DI)
  2154  	ADDQ	$8,DI
  2155  
  2156  	MOVQ	(SI),CX
  2157  	ADDQ	$8,SI
  2158  	MOVQ	CX,(DI)
  2159  	ADDQ	$8,DI
  2160  
  2161  	MOVQ	(SI),CX
  2162  	ADDQ	$8,SI
  2163  	MOVQ	CX,(DI)
  2164  	ADDQ	$8,DI
  2165  
  2166  	MOVQ	(SI),CX
  2167  	ADDQ	$8,SI
  2168  	MOVQ	CX,(DI)
  2169  	ADDQ	$8,DI
  2170  
  2171  	MOVQ	(SI),CX
  2172  	ADDQ	$8,SI
  2173  	MOVQ	CX,(DI)
  2174  	ADDQ	$8,DI
  2175  
  2176  	MOVQ	(SI),CX
  2177  	ADDQ	$8,SI
  2178  	MOVQ	CX,(DI)
  2179  	ADDQ	$8,DI
  2180  
  2181  	MOVQ	(SI),CX
  2182  	ADDQ	$8,SI
  2183  	MOVQ	CX,(DI)
  2184  	ADDQ	$8,DI
  2185  
  2186  	MOVQ	(SI),CX
  2187  	ADDQ	$8,SI
  2188  	MOVQ	CX,(DI)
  2189  	ADDQ	$8,DI
  2190  
  2191  	MOVQ	(SI),CX
  2192  	ADDQ	$8,SI
  2193  	MOVQ	CX,(DI)
  2194  	ADDQ	$8,DI
  2195  
  2196  	MOVQ	(SI),CX
  2197  	ADDQ	$8,SI
  2198  	MOVQ	CX,(DI)
  2199  	ADDQ	$8,DI
  2200  
  2201  	MOVQ	(SI),CX
  2202  	ADDQ	$8,SI
  2203  	MOVQ	CX,(DI)
  2204  	ADDQ	$8,DI
  2205  
  2206  	MOVQ	(SI),CX
  2207  	ADDQ	$8,SI
  2208  	MOVQ	CX,(DI)
  2209  	ADDQ	$8,DI
  2210  
  2211  	MOVQ	(SI),CX
  2212  	ADDQ	$8,SI
  2213  	MOVQ	CX,(DI)
  2214  	ADDQ	$8,DI
  2215  
  2216  	MOVQ	(SI),CX
  2217  	ADDQ	$8,SI
  2218  	MOVQ	CX,(DI)
  2219  	ADDQ	$8,DI
  2220  
  2221  	MOVQ	(SI),CX
  2222  	ADDQ	$8,SI
  2223  	MOVQ	CX,(DI)
  2224  	ADDQ	$8,DI
  2225  
  2226  	MOVQ	(SI),CX
  2227  	ADDQ	$8,SI
  2228  	MOVQ	CX,(DI)
  2229  	ADDQ	$8,DI
  2230  
  2231  	MOVQ	(SI),CX
  2232  	ADDQ	$8,SI
  2233  	MOVQ	CX,(DI)
  2234  	ADDQ	$8,DI
  2235  
  2236  	MOVQ	(SI),CX
  2237  	ADDQ	$8,SI
  2238  	MOVQ	CX,(DI)
  2239  	ADDQ	$8,DI
  2240  
  2241  	MOVQ	(SI),CX
  2242  	ADDQ	$8,SI
  2243  	MOVQ	CX,(DI)
  2244  	ADDQ	$8,DI
  2245  
  2246  	MOVQ	(SI),CX
  2247  	ADDQ	$8,SI
  2248  	MOVQ	CX,(DI)
  2249  	ADDQ	$8,DI
  2250  
  2251  	MOVQ	(SI),CX
  2252  	ADDQ	$8,SI
  2253  	MOVQ	CX,(DI)
  2254  	ADDQ	$8,DI
  2255  
  2256  	MOVQ	(SI),CX
  2257  	ADDQ	$8,SI
  2258  	MOVQ	CX,(DI)
  2259  	ADDQ	$8,DI
  2260  
  2261  	MOVQ	(SI),CX
  2262  	ADDQ	$8,SI
  2263  	MOVQ	CX,(DI)
  2264  	ADDQ	$8,DI
  2265  
  2266  	MOVQ	(SI),CX
  2267  	ADDQ	$8,SI
  2268  	MOVQ	CX,(DI)
  2269  	ADDQ	$8,DI
  2270  
  2271  	MOVQ	(SI),CX
  2272  	ADDQ	$8,SI
  2273  	MOVQ	CX,(DI)
  2274  	ADDQ	$8,DI
  2275  
  2276  	MOVQ	(SI),CX
  2277  	ADDQ	$8,SI
  2278  	MOVQ	CX,(DI)
  2279  	ADDQ	$8,DI
  2280  
  2281  	MOVQ	(SI),CX
  2282  	ADDQ	$8,SI
  2283  	MOVQ	CX,(DI)
  2284  	ADDQ	$8,DI
  2285  
  2286  	MOVQ	(SI),CX
  2287  	ADDQ	$8,SI
  2288  	MOVQ	CX,(DI)
  2289  	ADDQ	$8,DI
  2290  
  2291  	MOVQ	(SI),CX
  2292  	ADDQ	$8,SI
  2293  	MOVQ	CX,(DI)
  2294  	ADDQ	$8,DI
  2295  
  2296  	MOVQ	(SI),CX
  2297  	ADDQ	$8,SI
  2298  	MOVQ	CX,(DI)
  2299  	ADDQ	$8,DI
  2300  
  2301  	MOVQ	(SI),CX
  2302  	ADDQ	$8,SI
  2303  	MOVQ	CX,(DI)
  2304  	ADDQ	$8,DI
  2305  
  2306  	MOVQ	(SI),CX
  2307  	ADDQ	$8,SI
  2308  	MOVQ	CX,(DI)
  2309  	ADDQ	$8,DI
  2310  
  2311  	MOVQ	(SI),CX
  2312  	ADDQ	$8,SI
  2313  	MOVQ	CX,(DI)
  2314  	ADDQ	$8,DI
  2315  
  2316  	MOVQ	(SI),CX
  2317  	ADDQ	$8,SI
  2318  	MOVQ	CX,(DI)
  2319  	ADDQ	$8,DI
  2320  
  2321  	MOVQ	(SI),CX
  2322  	ADDQ	$8,SI
  2323  	MOVQ	CX,(DI)
  2324  	ADDQ	$8,DI
  2325  
  2326  	MOVQ	(SI),CX
  2327  	ADDQ	$8,SI
  2328  	MOVQ	CX,(DI)
  2329  	ADDQ	$8,DI
  2330  
  2331  	MOVQ	(SI),CX
  2332  	ADDQ	$8,SI
  2333  	MOVQ	CX,(DI)
  2334  	ADDQ	$8,DI
  2335  
  2336  	MOVQ	(SI),CX
  2337  	ADDQ	$8,SI
  2338  	MOVQ	CX,(DI)
  2339  	ADDQ	$8,DI
  2340  
  2341  	MOVQ	(SI),CX
  2342  	ADDQ	$8,SI
  2343  	MOVQ	CX,(DI)
  2344  	ADDQ	$8,DI
  2345  
  2346  	MOVQ	(SI),CX
  2347  	ADDQ	$8,SI
  2348  	MOVQ	CX,(DI)
  2349  	ADDQ	$8,DI
  2350  
  2351  	MOVQ	(SI),CX
  2352  	ADDQ	$8,SI
  2353  	MOVQ	CX,(DI)
  2354  	ADDQ	$8,DI
  2355  
  2356  	MOVQ	(SI),CX
  2357  	ADDQ	$8,SI
  2358  	MOVQ	CX,(DI)
  2359  	ADDQ	$8,DI
  2360  
  2361  	MOVQ	(SI),CX
  2362  	ADDQ	$8,SI
  2363  	MOVQ	CX,(DI)
  2364  	ADDQ	$8,DI
  2365  
  2366  	MOVQ	(SI),CX
  2367  	ADDQ	$8,SI
  2368  	MOVQ	CX,(DI)
  2369  	ADDQ	$8,DI
  2370  
  2371  	MOVQ	(SI),CX
  2372  	ADDQ	$8,SI
  2373  	MOVQ	CX,(DI)
  2374  	ADDQ	$8,DI
  2375  
  2376  	MOVQ	(SI),CX
  2377  	ADDQ	$8,SI
  2378  	MOVQ	CX,(DI)
  2379  	ADDQ	$8,DI
  2380  
  2381  	MOVQ	(SI),CX
  2382  	ADDQ	$8,SI
  2383  	MOVQ	CX,(DI)
  2384  	ADDQ	$8,DI
  2385  
  2386  	MOVQ	(SI),CX
  2387  	ADDQ	$8,SI
  2388  	MOVQ	CX,(DI)
  2389  	ADDQ	$8,DI
  2390  
  2391  	MOVQ	(SI),CX
  2392  	ADDQ	$8,SI
  2393  	MOVQ	CX,(DI)
  2394  	ADDQ	$8,DI
  2395  
  2396  	MOVQ	(SI),CX
  2397  	ADDQ	$8,SI
  2398  	MOVQ	CX,(DI)
  2399  	ADDQ	$8,DI
  2400  
  2401  	RET
  2402  
  2403  TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
  2404  	get_tls(CX)
  2405  	MOVQ	g(CX), AX
  2406  	MOVQ	g_m(AX), AX
  2407  	MOVL	m_fastrand(AX), DX
  2408  	ADDL	DX, DX
  2409  	MOVL	DX, BX
  2410  	XORL	$0x88888eef, DX
  2411  	CMOVLMI	BX, DX
  2412  	MOVL	DX, m_fastrand(AX)
  2413  	MOVL	DX, ret+0(FP)
  2414  	RET
  2415  
  2416  TEXT runtime·return0(SB), NOSPLIT, $0
  2417  	MOVL	$0, AX
  2418  	RET
  2419  
  2420  
  2421  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  2422  // Must obey the gcc calling convention.
  2423  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  2424  	get_tls(CX)
  2425  	MOVQ	g(CX), AX
  2426  	MOVQ	g_m(AX), AX
  2427  	MOVQ	m_curg(AX), AX
  2428  	MOVQ	(g_stack+stack_hi)(AX), AX
  2429  	RET
  2430  
  2431  // The top-most function running on a goroutine
  2432  // returns to goexit+PCQuantum.
  2433  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  2434  	BYTE	$0x90	// NOP
  2435  	CALL	runtime·goexit1(SB)	// does not return
  2436  
  2437  TEXT runtime·getg(SB),NOSPLIT,$0-8
  2438  	get_tls(CX)
  2439  	MOVQ	g(CX), AX
  2440  	MOVQ	AX, ret+0(FP)
  2441  	RET
  2442  
  2443  TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
  2444  	MOVQ	addr+0(FP), AX
  2445  	PREFETCHT0	(AX)
  2446  	RET
  2447  
  2448  TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
  2449  	MOVQ	addr+0(FP), AX
  2450  	PREFETCHT1	(AX)
  2451  	RET
  2452  
  2453  TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
  2454  	MOVQ	addr+0(FP), AX
  2455  	PREFETCHT2	(AX)
  2456  	RET
  2457  
  2458  TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
  2459  	MOVQ	addr+0(FP), AX
  2460  	PREFETCHNTA	(AX)
  2461  	RET