github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/runtime/asm_amd64.s (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  
    10  // _rt0_amd64 is common startup code for most amd64 systems when using
    11  // internal linking. This is the entry point for the program from the
    12  // kernel for an ordinary -buildmode=exe program. The stack holds the
    13  // number of arguments and the C-style argv.
    14  TEXT _rt0_amd64(SB),NOSPLIT,$-8
    15  	MOVQ	0(SP), DI	// argc
    16  	LEAQ	8(SP), SI	// argv
    17  	JMP	runtime·rt0_go(SB)
    18  
    19  // main is common startup code for most amd64 systems when using
    20  // external linking. The C startup code will call the symbol "main"
    21  // passing argc and argv in the usual C ABI registers DI and SI.
    22  TEXT main(SB),NOSPLIT,$-8
    23  	JMP	runtime·rt0_go(SB)
    24  
    25  // _rt0_amd64_lib is common startup code for most amd64 systems when
    26  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    27  // arrange to invoke this function as a global constructor (for
    28  // c-archive) or when the shared library is loaded (for c-shared).
    29  // We expect argc and argv to be passed in the usual C ABI registers
    30  // DI and SI.
    31  TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
    32  	// Align stack per ELF ABI requirements.
    33  	MOVQ	SP, AX
    34  	ANDQ	$~15, SP
    35  	// Save C ABI callee-saved registers, as caller may need them.
    36  	MOVQ	BX, 0x10(SP)
    37  	MOVQ	BP, 0x18(SP)
    38  	MOVQ	R12, 0x20(SP)
    39  	MOVQ	R13, 0x28(SP)
    40  	MOVQ	R14, 0x30(SP)
    41  	MOVQ	R15, 0x38(SP)
    42  	MOVQ	AX, 0x40(SP)
    43  
    44  	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    45  	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    46  
    47  	// Synchronous initialization.
    48  	CALL	runtime·libpreinit(SB)
    49  
    50  	// Create a new thread to finish Go runtime initialization.
    51  	MOVQ	_cgo_sys_thread_create(SB), AX
    52  	TESTQ	AX, AX
    53  	JZ	nocgo
    54  	MOVQ	$_rt0_amd64_lib_go(SB), DI
    55  	MOVQ	$0, SI
    56  	CALL	AX
    57  	JMP	restore
    58  
    59  nocgo:
    60  	MOVQ	$0x800000, 0(SP)		// stacksize
    61  	MOVQ	$_rt0_amd64_lib_go(SB), AX
    62  	MOVQ	AX, 8(SP)			// fn
    63  	CALL	runtime·newosproc0(SB)
    64  
    65  restore:
    66  	MOVQ	0x10(SP), BX
    67  	MOVQ	0x18(SP), BP
    68  	MOVQ	0x20(SP), R12
    69  	MOVQ	0x28(SP), R13
    70  	MOVQ	0x30(SP), R14
    71  	MOVQ	0x38(SP), R15
    72  	MOVQ	0x40(SP), SP
    73  	RET
    74  
    75  // _rt0_amd64_lib_go initializes the Go runtime.
    76  // This is started in a separate thread by _rt0_amd64_lib.
    77  TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    78  	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    79  	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    80  	JMP	runtime·rt0_go(SB)
    81  
    82  DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    83  GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    84  DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    85  GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    86  
    87  TEXT runtime·rt0_go(SB),NOSPLIT,$0
    88  	// copy arguments forward on an even stack
    89  	MOVQ	DI, AX		// argc
    90  	MOVQ	SI, BX		// argv
    91  	SUBQ	$(4*8+7), SP		// 2args 2auto
    92  	ANDQ	$~15, SP
    93  	MOVQ	AX, 16(SP)
    94  	MOVQ	BX, 24(SP)
    95  
    96  	// create istack out of the given (operating system) stack.
    97  	// _cgo_init may update stackguard.
    98  	MOVQ	$runtime·g0(SB), DI
    99  	LEAQ	(-64*1024+104)(SP), BX
   100  	MOVQ	BX, g_stackguard0(DI)
   101  	MOVQ	BX, g_stackguard1(DI)
   102  	MOVQ	BX, (g_stack+stack_lo)(DI)
   103  	MOVQ	SP, (g_stack+stack_hi)(DI)
   104  
   105  	// find out information about the processor we're on
   106  	MOVL	$0, AX
   107  	CPUID
   108  	MOVL	AX, SI
   109  	CMPL	AX, $0
   110  	JE	nocpuinfo
   111  
   112  	// Figure out how to serialize RDTSC.
   113  	// On Intel processors LFENCE is enough. AMD requires MFENCE.
   114  	// Don't know about the rest, so let's do MFENCE.
   115  	CMPL	BX, $0x756E6547  // "Genu"
   116  	JNE	notintel
   117  	CMPL	DX, $0x49656E69  // "ineI"
   118  	JNE	notintel
   119  	CMPL	CX, $0x6C65746E  // "ntel"
   120  	JNE	notintel
   121  	MOVB	$1, runtime·isIntel(SB)
   122  	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   123  notintel:
   124  
   125  	// Load EAX=1 cpuid flags
   126  	MOVL	$1, AX
   127  	CPUID
   128  	MOVL	AX, runtime·processorVersionInfo(SB)
   129  
   130  nocpuinfo:
   131  	// if there is an _cgo_init, call it.
   132  	MOVQ	_cgo_init(SB), AX
   133  	TESTQ	AX, AX
   134  	JZ	needtls
   135  	// g0 already in DI
   136  	MOVQ	DI, CX	// Win64 uses CX for first parameter
   137  	MOVQ	$setg_gcc<>(SB), SI
   138  	CALL	AX
   139  
   140  	// update stackguard after _cgo_init
   141  	MOVQ	$runtime·g0(SB), CX
   142  	MOVQ	(g_stack+stack_lo)(CX), AX
   143  	ADDQ	$const__StackGuard, AX
   144  	MOVQ	AX, g_stackguard0(CX)
   145  	MOVQ	AX, g_stackguard1(CX)
   146  
   147  #ifndef GOOS_windows
   148  	JMP ok
   149  #endif
   150  needtls:
   151  #ifdef GOOS_plan9
   152  	// skip TLS setup on Plan 9
   153  	JMP ok
   154  #endif
   155  #ifdef GOOS_solaris
   156  	// skip TLS setup on Solaris
   157  	JMP ok
   158  #endif
   159  #ifdef GOOS_darwin
   160  	// skip TLS setup on Darwin
   161  	JMP ok
   162  #endif
   163  
   164  	LEAQ	runtime·m0+m_tls(SB), DI
   165  	CALL	runtime·settls(SB)
   166  
   167  	// store through it, to make sure it works
   168  	get_tls(BX)
   169  	MOVQ	$0x123, g(BX)
   170  	MOVQ	runtime·m0+m_tls(SB), AX
   171  	CMPQ	AX, $0x123
   172  	JEQ 2(PC)
   173  	CALL	runtime·abort(SB)
   174  ok:
   175  	// set the per-goroutine and per-mach "registers"
   176  	get_tls(BX)
   177  	LEAQ	runtime·g0(SB), CX
   178  	MOVQ	CX, g(BX)
   179  	LEAQ	runtime·m0(SB), AX
   180  
   181  	// save m->g0 = g0
   182  	MOVQ	CX, m_g0(AX)
   183  	// save m0 to g0->m
   184  	MOVQ	AX, g_m(CX)
   185  
   186  	CLD				// convention is D is always left cleared
   187  	CALL	runtime·check(SB)
   188  
   189  	MOVL	16(SP), AX		// copy argc
   190  	MOVL	AX, 0(SP)
   191  	MOVQ	24(SP), AX		// copy argv
   192  	MOVQ	AX, 8(SP)
   193  	CALL	runtime·args(SB)
   194  	CALL	runtime·osinit(SB)
   195  	CALL	runtime·schedinit(SB)
   196  
   197  	// create a new goroutine to start program
   198  	MOVQ	$runtime·mainPC(SB), AX		// entry
   199  	PUSHQ	AX
   200  	PUSHQ	$0			// arg size
   201  	CALL	runtime·newproc(SB)
   202  	POPQ	AX
   203  	POPQ	AX
   204  
   205  	// start this M
   206  	CALL	runtime·mstart(SB)
   207  
   208  	CALL	runtime·abort(SB)	// mstart should never return
   209  	RET
   210  
   211  	// Prevent dead-code elimination of debugCallV1, which is
   212  	// intended to be called by debuggers.
   213  	MOVQ	$runtime·debugCallV1(SB), AX
   214  	RET
   215  
   216  DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
   217  GLOBL	runtime·mainPC(SB),RODATA,$8
   218  
   219  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   220  	BYTE	$0xcc
   221  	RET
   222  
   223  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   224  	// No per-thread init.
   225  	RET
   226  
   227  /*
   228   *  go-routine
   229   */
   230  
   231  // func gosave(buf *gobuf)
   232  // save state in Gobuf; setjmp
   233  TEXT runtime·gosave(SB), NOSPLIT, $0-8
   234  	MOVQ	buf+0(FP), AX		// gobuf
   235  	LEAQ	buf+0(FP), BX		// caller's SP
   236  	MOVQ	BX, gobuf_sp(AX)
   237  	MOVQ	0(SP), BX		// caller's PC
   238  	MOVQ	BX, gobuf_pc(AX)
   239  	MOVQ	$0, gobuf_ret(AX)
   240  	MOVQ	BP, gobuf_bp(AX)
   241  	// Assert ctxt is zero. See func save.
   242  	MOVQ	gobuf_ctxt(AX), BX
   243  	TESTQ	BX, BX
   244  	JZ	2(PC)
   245  	CALL	runtime·badctxt(SB)
   246  	get_tls(CX)
   247  	MOVQ	g(CX), BX
   248  	MOVQ	BX, gobuf_g(AX)
   249  	RET
   250  
   251  // func gogo(buf *gobuf)
   252  // restore state from Gobuf; longjmp
   253  TEXT runtime·gogo(SB), NOSPLIT, $16-8
   254  	MOVQ	buf+0(FP), BX		// gobuf
   255  	MOVQ	gobuf_g(BX), DX
   256  	MOVQ	0(DX), CX		// make sure g != nil
   257  	get_tls(CX)
   258  	MOVQ	DX, g(CX)
   259  	MOVQ	gobuf_sp(BX), SP	// restore SP
   260  	MOVQ	gobuf_ret(BX), AX
   261  	MOVQ	gobuf_ctxt(BX), DX
   262  	MOVQ	gobuf_bp(BX), BP
   263  	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   264  	MOVQ	$0, gobuf_ret(BX)
   265  	MOVQ	$0, gobuf_ctxt(BX)
   266  	MOVQ	$0, gobuf_bp(BX)
   267  	MOVQ	gobuf_pc(BX), BX
   268  	JMP	BX
   269  
   270  // func mcall(fn func(*g))
   271  // Switch to m->g0's stack, call fn(g).
   272  // Fn must never return. It should gogo(&g->sched)
   273  // to keep running g.
   274  TEXT runtime·mcall(SB), NOSPLIT, $0-8
   275  	MOVQ	fn+0(FP), DI
   276  
   277  	get_tls(CX)
   278  	MOVQ	g(CX), AX	// save state in g->sched
   279  	MOVQ	0(SP), BX	// caller's PC
   280  	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   281  	LEAQ	fn+0(FP), BX	// caller's SP
   282  	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   283  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   284  	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   285  
   286  	// switch to m->g0 & its stack, call fn
   287  	MOVQ	g(CX), BX
   288  	MOVQ	g_m(BX), BX
   289  	MOVQ	m_g0(BX), SI
   290  	CMPQ	SI, AX	// if g == m->g0 call badmcall
   291  	JNE	3(PC)
   292  	MOVQ	$runtime·badmcall(SB), AX
   293  	JMP	AX
   294  	MOVQ	SI, g(CX)	// g = m->g0
   295  	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   296  	PUSHQ	AX
   297  	MOVQ	DI, DX
   298  	MOVQ	0(DI), DI
   299  	CALL	DI
   300  	POPQ	AX
   301  	MOVQ	$runtime·badmcall2(SB), AX
   302  	JMP	AX
   303  	RET
   304  
   305  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   306  // of the G stack. We need to distinguish the routine that
   307  // lives at the bottom of the G stack from the one that lives
   308  // at the top of the system stack because the one at the top of
   309  // the system stack terminates the stack walk (see topofstack()).
   310  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   311  	RET
   312  
   313  // func systemstack(fn func())
   314  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   315  	MOVQ	fn+0(FP), DI	// DI = fn
   316  	get_tls(CX)
   317  	MOVQ	g(CX), AX	// AX = g
   318  	MOVQ	g_m(AX), BX	// BX = m
   319  
   320  	CMPQ	AX, m_gsignal(BX)
   321  	JEQ	noswitch
   322  
   323  	MOVQ	m_g0(BX), DX	// DX = g0
   324  	CMPQ	AX, DX
   325  	JEQ	noswitch
   326  
   327  	CMPQ	AX, m_curg(BX)
   328  	JNE	bad
   329  
   330  	// switch stacks
   331  	// save our state in g->sched. Pretend to
   332  	// be systemstack_switch if the G stack is scanned.
   333  	MOVQ	$runtime·systemstack_switch(SB), SI
   334  	MOVQ	SI, (g_sched+gobuf_pc)(AX)
   335  	MOVQ	SP, (g_sched+gobuf_sp)(AX)
   336  	MOVQ	AX, (g_sched+gobuf_g)(AX)
   337  	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   338  
   339  	// switch to g0
   340  	MOVQ	DX, g(CX)
   341  	MOVQ	(g_sched+gobuf_sp)(DX), BX
   342  	// make it look like mstart called systemstack on g0, to stop traceback
   343  	SUBQ	$8, BX
   344  	MOVQ	$runtime·mstart(SB), DX
   345  	MOVQ	DX, 0(BX)
   346  	MOVQ	BX, SP
   347  
   348  	// call target function
   349  	MOVQ	DI, DX
   350  	MOVQ	0(DI), DI
   351  	CALL	DI
   352  
   353  	// switch back to g
   354  	get_tls(CX)
   355  	MOVQ	g(CX), AX
   356  	MOVQ	g_m(AX), BX
   357  	MOVQ	m_curg(BX), AX
   358  	MOVQ	AX, g(CX)
   359  	MOVQ	(g_sched+gobuf_sp)(AX), SP
   360  	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   361  	RET
   362  
   363  noswitch:
   364  	// already on m stack; tail call the function
   365  	// Using a tail call here cleans up tracebacks since we won't stop
   366  	// at an intermediate systemstack.
   367  	MOVQ	DI, DX
   368  	MOVQ	0(DI), DI
   369  	JMP	DI
   370  
   371  bad:
   372  	// Bad: g is not gsignal, not g0, not curg. What is it?
   373  	MOVQ	$runtime·badsystemstack(SB), AX
   374  	CALL	AX
   375  	INT	$3
   376  
   377  
   378  /*
   379   * support for morestack
   380   */
   381  
   382  // Called during function prolog when more stack is needed.
   383  //
   384  // The traceback routines see morestack on a g0 as being
   385  // the top of a stack (for example, morestack calling newstack
   386  // calling the scheduler calling newm calling gc), so we must
   387  // record an argument size. For that purpose, it has no arguments.
   388  TEXT runtime·morestack(SB),NOSPLIT,$0-0
   389  	// Cannot grow scheduler stack (m->g0).
   390  	get_tls(CX)
   391  	MOVQ	g(CX), BX
   392  	MOVQ	g_m(BX), BX
   393  	MOVQ	m_g0(BX), SI
   394  	CMPQ	g(CX), SI
   395  	JNE	3(PC)
   396  	CALL	runtime·badmorestackg0(SB)
   397  	CALL	runtime·abort(SB)
   398  
   399  	// Cannot grow signal stack (m->gsignal).
   400  	MOVQ	m_gsignal(BX), SI
   401  	CMPQ	g(CX), SI
   402  	JNE	3(PC)
   403  	CALL	runtime·badmorestackgsignal(SB)
   404  	CALL	runtime·abort(SB)
   405  
   406  	// Called from f.
   407  	// Set m->morebuf to f's caller.
   408  	MOVQ	8(SP), AX	// f's caller's PC
   409  	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   410  	LEAQ	16(SP), AX	// f's caller's SP
   411  	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   412  	get_tls(CX)
   413  	MOVQ	g(CX), SI
   414  	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   415  
   416  	// Set g->sched to context in f.
   417  	MOVQ	0(SP), AX // f's PC
   418  	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   419  	MOVQ	SI, (g_sched+gobuf_g)(SI)
   420  	LEAQ	8(SP), AX // f's SP
   421  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   422  	MOVQ	BP, (g_sched+gobuf_bp)(SI)
   423  	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   424  
   425  	// Call newstack on m->g0's stack.
   426  	MOVQ	m_g0(BX), BX
   427  	MOVQ	BX, g(CX)
   428  	MOVQ	(g_sched+gobuf_sp)(BX), SP
   429  	CALL	runtime·newstack(SB)
   430  	CALL	runtime·abort(SB)	// crash if newstack returns
   431  	RET
   432  
   433  // morestack but not preserving ctxt.
   434  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   435  	MOVL	$0, DX
   436  	JMP	runtime·morestack(SB)
   437  
   438  // reflectcall: call a function with the given argument list
   439  // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   440  // we don't have variable-sized frames, so we use a small number
   441  // of constant-sized-frame functions to encode a few bits of size in the pc.
   442  // Caution: ugly multiline assembly macros in your future!
   443  
   444  #define DISPATCH(NAME,MAXSIZE)		\
   445  	CMPQ	CX, $MAXSIZE;		\
   446  	JA	3(PC);			\
   447  	MOVQ	$NAME(SB), AX;		\
   448  	JMP	AX
   449  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   450  
   451  TEXT reflect·call(SB), NOSPLIT, $0-0
   452  	JMP	·reflectcall(SB)
   453  
   454  TEXT ·reflectcall(SB), NOSPLIT, $0-32
   455  	MOVLQZX argsize+24(FP), CX
   456  	DISPATCH(runtime·call32, 32)
   457  	DISPATCH(runtime·call64, 64)
   458  	DISPATCH(runtime·call128, 128)
   459  	DISPATCH(runtime·call256, 256)
   460  	DISPATCH(runtime·call512, 512)
   461  	DISPATCH(runtime·call1024, 1024)
   462  	DISPATCH(runtime·call2048, 2048)
   463  	DISPATCH(runtime·call4096, 4096)
   464  	DISPATCH(runtime·call8192, 8192)
   465  	DISPATCH(runtime·call16384, 16384)
   466  	DISPATCH(runtime·call32768, 32768)
   467  	DISPATCH(runtime·call65536, 65536)
   468  	DISPATCH(runtime·call131072, 131072)
   469  	DISPATCH(runtime·call262144, 262144)
   470  	DISPATCH(runtime·call524288, 524288)
   471  	DISPATCH(runtime·call1048576, 1048576)
   472  	DISPATCH(runtime·call2097152, 2097152)
   473  	DISPATCH(runtime·call4194304, 4194304)
   474  	DISPATCH(runtime·call8388608, 8388608)
   475  	DISPATCH(runtime·call16777216, 16777216)
   476  	DISPATCH(runtime·call33554432, 33554432)
   477  	DISPATCH(runtime·call67108864, 67108864)
   478  	DISPATCH(runtime·call134217728, 134217728)
   479  	DISPATCH(runtime·call268435456, 268435456)
   480  	DISPATCH(runtime·call536870912, 536870912)
   481  	DISPATCH(runtime·call1073741824, 1073741824)
   482  	MOVQ	$runtime·badreflectcall(SB), AX
   483  	JMP	AX
   484  
   485  #define CALLFN(NAME,MAXSIZE)			\
   486  TEXT NAME(SB), WRAPPER, $MAXSIZE-32;		\
   487  	NO_LOCAL_POINTERS;			\
   488  	/* copy arguments to stack */		\
   489  	MOVQ	argptr+16(FP), SI;		\
   490  	MOVLQZX argsize+24(FP), CX;		\
   491  	MOVQ	SP, DI;				\
   492  	REP;MOVSB;				\
   493  	/* call function */			\
   494  	MOVQ	f+8(FP), DX;			\
   495  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   496  	CALL	(DX);				\
   497  	/* copy return values back */		\
   498  	MOVQ	argtype+0(FP), DX;		\
   499  	MOVQ	argptr+16(FP), DI;		\
   500  	MOVLQZX	argsize+24(FP), CX;		\
   501  	MOVLQZX	retoffset+28(FP), BX;		\
   502  	MOVQ	SP, SI;				\
   503  	ADDQ	BX, DI;				\
   504  	ADDQ	BX, SI;				\
   505  	SUBQ	BX, CX;				\
   506  	CALL	callRet<>(SB);			\
   507  	RET
   508  
   509  // callRet copies return values back at the end of call*. This is a
   510  // separate function so it can allocate stack space for the arguments
   511  // to reflectcallmove. It does not follow the Go ABI; it expects its
   512  // arguments in registers.
   513  TEXT callRet<>(SB), NOSPLIT, $32-0
   514  	NO_LOCAL_POINTERS
   515  	MOVQ	DX, 0(SP)
   516  	MOVQ	DI, 8(SP)
   517  	MOVQ	SI, 16(SP)
   518  	MOVQ	CX, 24(SP)
   519  	CALL	runtime·reflectcallmove(SB)
   520  	RET
   521  
   522  CALLFN(·call32, 32)
   523  CALLFN(·call64, 64)
   524  CALLFN(·call128, 128)
   525  CALLFN(·call256, 256)
   526  CALLFN(·call512, 512)
   527  CALLFN(·call1024, 1024)
   528  CALLFN(·call2048, 2048)
   529  CALLFN(·call4096, 4096)
   530  CALLFN(·call8192, 8192)
   531  CALLFN(·call16384, 16384)
   532  CALLFN(·call32768, 32768)
   533  CALLFN(·call65536, 65536)
   534  CALLFN(·call131072, 131072)
   535  CALLFN(·call262144, 262144)
   536  CALLFN(·call524288, 524288)
   537  CALLFN(·call1048576, 1048576)
   538  CALLFN(·call2097152, 2097152)
   539  CALLFN(·call4194304, 4194304)
   540  CALLFN(·call8388608, 8388608)
   541  CALLFN(·call16777216, 16777216)
   542  CALLFN(·call33554432, 33554432)
   543  CALLFN(·call67108864, 67108864)
   544  CALLFN(·call134217728, 134217728)
   545  CALLFN(·call268435456, 268435456)
   546  CALLFN(·call536870912, 536870912)
   547  CALLFN(·call1073741824, 1073741824)
   548  
   549  TEXT runtime·procyield(SB),NOSPLIT,$0-0
   550  	MOVL	cycles+0(FP), AX
   551  again:
   552  	PAUSE
   553  	SUBL	$1, AX
   554  	JNZ	again
   555  	RET
   556  
   557  
   558  TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   559  	// Stores are already ordered on x86, so this is just a
   560  	// compile barrier.
   561  	RET
   562  
   563  // func jmpdefer(fv *funcval, argp uintptr)
   564  // argp is a caller SP.
   565  // called from deferreturn.
   566  // 1. pop the caller
   567  // 2. sub 5 bytes from the callers return
   568  // 3. jmp to the argument
   569  TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   570  	MOVQ	fv+0(FP), DX	// fn
   571  	MOVQ	argp+8(FP), BX	// caller sp
   572  	LEAQ	-8(BX), SP	// caller sp after CALL
   573  	MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
   574  	SUBQ	$5, (SP)	// return to CALL again
   575  	MOVQ	0(DX), BX
   576  	JMP	BX	// but first run the deferred function
   577  
   578  // Save state of caller into g->sched. Smashes R8, R9.
   579  TEXT gosave<>(SB),NOSPLIT,$0
   580  	get_tls(R8)
   581  	MOVQ	g(R8), R8
   582  	MOVQ	0(SP), R9
   583  	MOVQ	R9, (g_sched+gobuf_pc)(R8)
   584  	LEAQ	8(SP), R9
   585  	MOVQ	R9, (g_sched+gobuf_sp)(R8)
   586  	MOVQ	$0, (g_sched+gobuf_ret)(R8)
   587  	MOVQ	BP, (g_sched+gobuf_bp)(R8)
   588  	// Assert ctxt is zero. See func save.
   589  	MOVQ	(g_sched+gobuf_ctxt)(R8), R9
   590  	TESTQ	R9, R9
   591  	JZ	2(PC)
   592  	CALL	runtime·badctxt(SB)
   593  	RET
   594  
   595  // func asmcgocall(fn, arg unsafe.Pointer) int32
   596  // Call fn(arg) on the scheduler stack,
   597  // aligned appropriately for the gcc ABI.
   598  // See cgocall.go for more details.
   599  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   600  	MOVQ	fn+0(FP), AX
   601  	MOVQ	arg+8(FP), BX
   602  
   603  	MOVQ	SP, DX
   604  
   605  	// Figure out if we need to switch to m->g0 stack.
   606  	// We get called to create new OS threads too, and those
   607  	// come in on the m->g0 stack already.
   608  	get_tls(CX)
   609  	MOVQ	g(CX), R8
   610  	CMPQ	R8, $0
   611  	JEQ	nosave
   612  	MOVQ	g_m(R8), R8
   613  	MOVQ	m_g0(R8), SI
   614  	MOVQ	g(CX), DI
   615  	CMPQ	SI, DI
   616  	JEQ	nosave
   617  	MOVQ	m_gsignal(R8), SI
   618  	CMPQ	SI, DI
   619  	JEQ	nosave
   620  
   621  	// Switch to system stack.
   622  	MOVQ	m_g0(R8), SI
   623  	CALL	gosave<>(SB)
   624  	MOVQ	SI, g(CX)
   625  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   626  
   627  	// Now on a scheduling stack (a pthread-created stack).
   628  	// Make sure we have enough room for 4 stack-backed fast-call
   629  	// registers as per windows amd64 calling convention.
   630  	SUBQ	$64, SP
   631  	ANDQ	$~15, SP	// alignment for gcc ABI
   632  	MOVQ	DI, 48(SP)	// save g
   633  	MOVQ	(g_stack+stack_hi)(DI), DI
   634  	SUBQ	DX, DI
   635  	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   636  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   637  	MOVQ	BX, CX		// CX = first argument in Win64
   638  	CALL	AX
   639  
   640  	// Restore registers, g, stack pointer.
   641  	get_tls(CX)
   642  	MOVQ	48(SP), DI
   643  	MOVQ	(g_stack+stack_hi)(DI), SI
   644  	SUBQ	40(SP), SI
   645  	MOVQ	DI, g(CX)
   646  	MOVQ	SI, SP
   647  
   648  	MOVL	AX, ret+16(FP)
   649  	RET
   650  
   651  nosave:
   652  	// Running on a system stack, perhaps even without a g.
   653  	// Having no g can happen during thread creation or thread teardown
   654  	// (see needm/dropm on Solaris, for example).
   655  	// This code is like the above sequence but without saving/restoring g
   656  	// and without worrying about the stack moving out from under us
   657  	// (because we're on a system stack, not a goroutine stack).
   658  	// The above code could be used directly if already on a system stack,
   659  	// but then the only path through this code would be a rare case on Solaris.
   660  	// Using this code for all "already on system stack" calls exercises it more,
   661  	// which should help keep it correct.
   662  	SUBQ	$64, SP
   663  	ANDQ	$~15, SP
   664  	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   665  	MOVQ	DX, 40(SP)	// save original stack pointer
   666  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   667  	MOVQ	BX, CX		// CX = first argument in Win64
   668  	CALL	AX
   669  	MOVQ	40(SP), SI	// restore original stack pointer
   670  	MOVQ	SI, SP
   671  	MOVL	AX, ret+16(FP)
   672  	RET
   673  
   674  // func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr)
   675  // Turn the fn into a Go func (by taking its address) and call
   676  // cgocallback_gofunc.
   677  TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   678  	LEAQ	fn+0(FP), AX
   679  	MOVQ	AX, 0(SP)
   680  	MOVQ	frame+8(FP), AX
   681  	MOVQ	AX, 8(SP)
   682  	MOVQ	framesize+16(FP), AX
   683  	MOVQ	AX, 16(SP)
   684  	MOVQ	ctxt+24(FP), AX
   685  	MOVQ	AX, 24(SP)
   686  	MOVQ	$runtime·cgocallback_gofunc(SB), AX
   687  	CALL	AX
   688  	RET
   689  
   690  // func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr)
   691  // See cgocall.go for more details.
   692  TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   693  	NO_LOCAL_POINTERS
   694  
   695  	// If g is nil, Go did not create the current thread.
   696  	// Call needm to obtain one m for temporary use.
   697  	// In this case, we're running on the thread stack, so there's
   698  	// lots of space, but the linker doesn't know. Hide the call from
   699  	// the linker analysis by using an indirect call through AX.
   700  	get_tls(CX)
   701  #ifdef GOOS_windows
   702  	MOVL	$0, BX
   703  	CMPQ	CX, $0
   704  	JEQ	2(PC)
   705  #endif
   706  	MOVQ	g(CX), BX
   707  	CMPQ	BX, $0
   708  	JEQ	needm
   709  	MOVQ	g_m(BX), BX
   710  	MOVQ	BX, R8 // holds oldm until end of function
   711  	JMP	havem
   712  needm:
   713  	MOVQ	$0, 0(SP)
   714  	MOVQ	$runtime·needm(SB), AX
   715  	CALL	AX
   716  	MOVQ	0(SP), R8
   717  	get_tls(CX)
   718  	MOVQ	g(CX), BX
   719  	MOVQ	g_m(BX), BX
   720  
   721  	// Set m->sched.sp = SP, so that if a panic happens
   722  	// during the function we are about to execute, it will
   723  	// have a valid SP to run on the g0 stack.
   724  	// The next few lines (after the havem label)
   725  	// will save this SP onto the stack and then write
   726  	// the same SP back to m->sched.sp. That seems redundant,
   727  	// but if an unrecovered panic happens, unwindm will
   728  	// restore the g->sched.sp from the stack location
   729  	// and then systemstack will try to use it. If we don't set it here,
   730  	// that restored SP will be uninitialized (typically 0) and
   731  	// will not be usable.
   732  	MOVQ	m_g0(BX), SI
   733  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   734  
   735  havem:
   736  	// Now there's a valid m, and we're running on its m->g0.
   737  	// Save current m->g0->sched.sp on stack and then set it to SP.
   738  	// Save current sp in m->g0->sched.sp in preparation for
   739  	// switch back to m->curg stack.
   740  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   741  	MOVQ	m_g0(BX), SI
   742  	MOVQ	(g_sched+gobuf_sp)(SI), AX
   743  	MOVQ	AX, 0(SP)
   744  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   745  
   746  	// Switch to m->curg stack and call runtime.cgocallbackg.
   747  	// Because we are taking over the execution of m->curg
   748  	// but *not* resuming what had been running, we need to
   749  	// save that information (m->curg->sched) so we can restore it.
   750  	// We can restore m->curg->sched.sp easily, because calling
   751  	// runtime.cgocallbackg leaves SP unchanged upon return.
   752  	// To save m->curg->sched.pc, we push it onto the stack.
   753  	// This has the added benefit that it looks to the traceback
   754  	// routine like cgocallbackg is going to return to that
   755  	// PC (because the frame we allocate below has the same
   756  	// size as cgocallback_gofunc's frame declared above)
   757  	// so that the traceback will seamlessly trace back into
   758  	// the earlier calls.
   759  	//
   760  	// In the new goroutine, 8(SP) holds the saved R8.
   761  	MOVQ	m_curg(BX), SI
   762  	MOVQ	SI, g(CX)
   763  	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   764  	MOVQ	(g_sched+gobuf_pc)(SI), BX
   765  	MOVQ	BX, -8(DI)
   766  	// Compute the size of the frame, including return PC and, if
   767  	// GOEXPERIMENT=framepointer, the saved base pointer
   768  	MOVQ	ctxt+24(FP), BX
   769  	LEAQ	fv+0(FP), AX
   770  	SUBQ	SP, AX
   771  	SUBQ	AX, DI
   772  	MOVQ	DI, SP
   773  
   774  	MOVQ	R8, 8(SP)
   775  	MOVQ	BX, 0(SP)
   776  	CALL	runtime·cgocallbackg(SB)
   777  	MOVQ	8(SP), R8
   778  
   779  	// Compute the size of the frame again. FP and SP have
   780  	// completely different values here than they did above,
   781  	// but only their difference matters.
   782  	LEAQ	fv+0(FP), AX
   783  	SUBQ	SP, AX
   784  
   785  	// Restore g->sched (== m->curg->sched) from saved values.
   786  	get_tls(CX)
   787  	MOVQ	g(CX), SI
   788  	MOVQ	SP, DI
   789  	ADDQ	AX, DI
   790  	MOVQ	-8(DI), BX
   791  	MOVQ	BX, (g_sched+gobuf_pc)(SI)
   792  	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   793  
   794  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   795  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   796  	// so we do not have to restore it.)
   797  	MOVQ	g(CX), BX
   798  	MOVQ	g_m(BX), BX
   799  	MOVQ	m_g0(BX), SI
   800  	MOVQ	SI, g(CX)
   801  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   802  	MOVQ	0(SP), AX
   803  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   804  
   805  	// If the m on entry was nil, we called needm above to borrow an m
   806  	// for the duration of the call. Since the call is over, return it with dropm.
   807  	CMPQ	R8, $0
   808  	JNE 3(PC)
   809  	MOVQ	$runtime·dropm(SB), AX
   810  	CALL	AX
   811  
   812  	// Done!
   813  	RET
   814  
   815  // func setg(gg *g)
   816  // set g. for use by needm.
   817  TEXT runtime·setg(SB), NOSPLIT, $0-8
   818  	MOVQ	gg+0(FP), BX
   819  #ifdef GOOS_windows
   820  	CMPQ	BX, $0
   821  	JNE	settls
   822  	MOVQ	$0, 0x28(GS)
   823  	RET
   824  settls:
   825  	MOVQ	g_m(BX), AX
   826  	LEAQ	m_tls(AX), AX
   827  	MOVQ	AX, 0x28(GS)
   828  #endif
   829  	get_tls(CX)
   830  	MOVQ	BX, g(CX)
   831  	RET
   832  
   833  // void setg_gcc(G*); set g called from gcc.
   834  TEXT setg_gcc<>(SB),NOSPLIT,$0
   835  	get_tls(AX)
   836  	MOVQ	DI, g(AX)
   837  	RET
   838  
   839  TEXT runtime·abort(SB),NOSPLIT,$0-0
   840  	INT	$3
   841  loop:
   842  	JMP	loop
   843  
   844  // check that SP is in range [g->stack.lo, g->stack.hi)
   845  TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   846  	get_tls(CX)
   847  	MOVQ	g(CX), AX
   848  	CMPQ	(g_stack+stack_hi)(AX), SP
   849  	JHI	2(PC)
   850  	CALL	runtime·abort(SB)
   851  	CMPQ	SP, (g_stack+stack_lo)(AX)
   852  	JHI	2(PC)
   853  	CALL	runtime·abort(SB)
   854  	RET
   855  
   856  // func cputicks() int64
   857  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   858  	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   859  	JNE	mfence
   860  	LFENCE
   861  	JMP	done
   862  mfence:
   863  	MFENCE
   864  done:
   865  	RDTSC
   866  	SHLQ	$32, DX
   867  	ADDQ	DX, AX
   868  	MOVQ	AX, ret+0(FP)
   869  	RET
   870  
   871  // func aeshash(p unsafe.Pointer, h, s uintptr) uintptr
   872  // hash function using AES hardware instructions
   873  TEXT runtime·aeshash(SB),NOSPLIT,$0-32
   874  	MOVQ	p+0(FP), AX	// ptr to data
   875  	MOVQ	s+16(FP), CX	// size
   876  	LEAQ	ret+24(FP), DX
   877  	JMP	runtime·aeshashbody(SB)
   878  
   879  // func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
   880  TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24
   881  	MOVQ	p+0(FP), AX	// ptr to string struct
   882  	MOVQ	8(AX), CX	// length of string
   883  	MOVQ	(AX), AX	// string data
   884  	LEAQ	ret+16(FP), DX
   885  	JMP	runtime·aeshashbody(SB)
   886  
   887  // AX: data
   888  // CX: length
   889  // DX: address to put return value
   890  TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   891  	// Fill an SSE register with our seeds.
   892  	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
   893  	PINSRW	$4, CX, X0			// 16 bits of length
   894  	PSHUFHW $0, X0, X0			// repeat length 4 times total
   895  	MOVO	X0, X1				// save unscrambled seed
   896  	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
   897  	AESENC	X0, X0				// scramble seed
   898  
   899  	CMPQ	CX, $16
   900  	JB	aes0to15
   901  	JE	aes16
   902  	CMPQ	CX, $32
   903  	JBE	aes17to32
   904  	CMPQ	CX, $64
   905  	JBE	aes33to64
   906  	CMPQ	CX, $128
   907  	JBE	aes65to128
   908  	JMP	aes129plus
   909  
   910  aes0to15:
   911  	TESTQ	CX, CX
   912  	JE	aes0
   913  
   914  	ADDQ	$16, AX
   915  	TESTW	$0xff0, AX
   916  	JE	endofpage
   917  
   918  	// 16 bytes loaded at this address won't cross
   919  	// a page boundary, so we can load it directly.
   920  	MOVOU	-16(AX), X1
   921  	ADDQ	CX, CX
   922  	MOVQ	$masks<>(SB), AX
   923  	PAND	(AX)(CX*8), X1
   924  final1:
   925  	PXOR	X0, X1	// xor data with seed
   926  	AESENC	X1, X1	// scramble combo 3 times
   927  	AESENC	X1, X1
   928  	AESENC	X1, X1
   929  	MOVQ	X1, (DX)
   930  	RET
   931  
   932  endofpage:
   933  	// address ends in 1111xxxx. Might be up against
   934  	// a page boundary, so load ending at last byte.
   935  	// Then shift bytes down using pshufb.
   936  	MOVOU	-32(AX)(CX*1), X1
   937  	ADDQ	CX, CX
   938  	MOVQ	$shifts<>(SB), AX
   939  	PSHUFB	(AX)(CX*8), X1
   940  	JMP	final1
   941  
   942  aes0:
   943  	// Return scrambled input seed
   944  	AESENC	X0, X0
   945  	MOVQ	X0, (DX)
   946  	RET
   947  
   948  aes16:
   949  	MOVOU	(AX), X1
   950  	JMP	final1
   951  
   952  aes17to32:
   953  	// make second starting seed
   954  	PXOR	runtime·aeskeysched+16(SB), X1
   955  	AESENC	X1, X1
   956  
   957  	// load data to be hashed
   958  	MOVOU	(AX), X2
   959  	MOVOU	-16(AX)(CX*1), X3
   960  
   961  	// xor with seed
   962  	PXOR	X0, X2
   963  	PXOR	X1, X3
   964  
   965  	// scramble 3 times
   966  	AESENC	X2, X2
   967  	AESENC	X3, X3
   968  	AESENC	X2, X2
   969  	AESENC	X3, X3
   970  	AESENC	X2, X2
   971  	AESENC	X3, X3
   972  
   973  	// combine results
   974  	PXOR	X3, X2
   975  	MOVQ	X2, (DX)
   976  	RET
   977  
   978  aes33to64:
   979  	// make 3 more starting seeds
   980  	MOVO	X1, X2
   981  	MOVO	X1, X3
   982  	PXOR	runtime·aeskeysched+16(SB), X1
   983  	PXOR	runtime·aeskeysched+32(SB), X2
   984  	PXOR	runtime·aeskeysched+48(SB), X3
   985  	AESENC	X1, X1
   986  	AESENC	X2, X2
   987  	AESENC	X3, X3
   988  
   989  	MOVOU	(AX), X4
   990  	MOVOU	16(AX), X5
   991  	MOVOU	-32(AX)(CX*1), X6
   992  	MOVOU	-16(AX)(CX*1), X7
   993  
   994  	PXOR	X0, X4
   995  	PXOR	X1, X5
   996  	PXOR	X2, X6
   997  	PXOR	X3, X7
   998  
   999  	AESENC	X4, X4
  1000  	AESENC	X5, X5
  1001  	AESENC	X6, X6
  1002  	AESENC	X7, X7
  1003  
  1004  	AESENC	X4, X4
  1005  	AESENC	X5, X5
  1006  	AESENC	X6, X6
  1007  	AESENC	X7, X7
  1008  
  1009  	AESENC	X4, X4
  1010  	AESENC	X5, X5
  1011  	AESENC	X6, X6
  1012  	AESENC	X7, X7
  1013  
  1014  	PXOR	X6, X4
  1015  	PXOR	X7, X5
  1016  	PXOR	X5, X4
  1017  	MOVQ	X4, (DX)
  1018  	RET
  1019  
  1020  aes65to128:
  1021  	// make 7 more starting seeds
  1022  	MOVO	X1, X2
  1023  	MOVO	X1, X3
  1024  	MOVO	X1, X4
  1025  	MOVO	X1, X5
  1026  	MOVO	X1, X6
  1027  	MOVO	X1, X7
  1028  	PXOR	runtime·aeskeysched+16(SB), X1
  1029  	PXOR	runtime·aeskeysched+32(SB), X2
  1030  	PXOR	runtime·aeskeysched+48(SB), X3
  1031  	PXOR	runtime·aeskeysched+64(SB), X4
  1032  	PXOR	runtime·aeskeysched+80(SB), X5
  1033  	PXOR	runtime·aeskeysched+96(SB), X6
  1034  	PXOR	runtime·aeskeysched+112(SB), X7
  1035  	AESENC	X1, X1
  1036  	AESENC	X2, X2
  1037  	AESENC	X3, X3
  1038  	AESENC	X4, X4
  1039  	AESENC	X5, X5
  1040  	AESENC	X6, X6
  1041  	AESENC	X7, X7
  1042  
  1043  	// load data
  1044  	MOVOU	(AX), X8
  1045  	MOVOU	16(AX), X9
  1046  	MOVOU	32(AX), X10
  1047  	MOVOU	48(AX), X11
  1048  	MOVOU	-64(AX)(CX*1), X12
  1049  	MOVOU	-48(AX)(CX*1), X13
  1050  	MOVOU	-32(AX)(CX*1), X14
  1051  	MOVOU	-16(AX)(CX*1), X15
  1052  
  1053  	// xor with seed
  1054  	PXOR	X0, X8
  1055  	PXOR	X1, X9
  1056  	PXOR	X2, X10
  1057  	PXOR	X3, X11
  1058  	PXOR	X4, X12
  1059  	PXOR	X5, X13
  1060  	PXOR	X6, X14
  1061  	PXOR	X7, X15
  1062  
  1063  	// scramble 3 times
  1064  	AESENC	X8, X8
  1065  	AESENC	X9, X9
  1066  	AESENC	X10, X10
  1067  	AESENC	X11, X11
  1068  	AESENC	X12, X12
  1069  	AESENC	X13, X13
  1070  	AESENC	X14, X14
  1071  	AESENC	X15, X15
  1072  
  1073  	AESENC	X8, X8
  1074  	AESENC	X9, X9
  1075  	AESENC	X10, X10
  1076  	AESENC	X11, X11
  1077  	AESENC	X12, X12
  1078  	AESENC	X13, X13
  1079  	AESENC	X14, X14
  1080  	AESENC	X15, X15
  1081  
  1082  	AESENC	X8, X8
  1083  	AESENC	X9, X9
  1084  	AESENC	X10, X10
  1085  	AESENC	X11, X11
  1086  	AESENC	X12, X12
  1087  	AESENC	X13, X13
  1088  	AESENC	X14, X14
  1089  	AESENC	X15, X15
  1090  
  1091  	// combine results
  1092  	PXOR	X12, X8
  1093  	PXOR	X13, X9
  1094  	PXOR	X14, X10
  1095  	PXOR	X15, X11
  1096  	PXOR	X10, X8
  1097  	PXOR	X11, X9
  1098  	PXOR	X9, X8
  1099  	MOVQ	X8, (DX)
  1100  	RET
  1101  
  1102  aes129plus:
  1103  	// make 7 more starting seeds
  1104  	MOVO	X1, X2
  1105  	MOVO	X1, X3
  1106  	MOVO	X1, X4
  1107  	MOVO	X1, X5
  1108  	MOVO	X1, X6
  1109  	MOVO	X1, X7
  1110  	PXOR	runtime·aeskeysched+16(SB), X1
  1111  	PXOR	runtime·aeskeysched+32(SB), X2
  1112  	PXOR	runtime·aeskeysched+48(SB), X3
  1113  	PXOR	runtime·aeskeysched+64(SB), X4
  1114  	PXOR	runtime·aeskeysched+80(SB), X5
  1115  	PXOR	runtime·aeskeysched+96(SB), X6
  1116  	PXOR	runtime·aeskeysched+112(SB), X7
  1117  	AESENC	X1, X1
  1118  	AESENC	X2, X2
  1119  	AESENC	X3, X3
  1120  	AESENC	X4, X4
  1121  	AESENC	X5, X5
  1122  	AESENC	X6, X6
  1123  	AESENC	X7, X7
  1124  
  1125  	// start with last (possibly overlapping) block
  1126  	MOVOU	-128(AX)(CX*1), X8
  1127  	MOVOU	-112(AX)(CX*1), X9
  1128  	MOVOU	-96(AX)(CX*1), X10
  1129  	MOVOU	-80(AX)(CX*1), X11
  1130  	MOVOU	-64(AX)(CX*1), X12
  1131  	MOVOU	-48(AX)(CX*1), X13
  1132  	MOVOU	-32(AX)(CX*1), X14
  1133  	MOVOU	-16(AX)(CX*1), X15
  1134  
  1135  	// xor in seed
  1136  	PXOR	X0, X8
  1137  	PXOR	X1, X9
  1138  	PXOR	X2, X10
  1139  	PXOR	X3, X11
  1140  	PXOR	X4, X12
  1141  	PXOR	X5, X13
  1142  	PXOR	X6, X14
  1143  	PXOR	X7, X15
  1144  
  1145  	// compute number of remaining 128-byte blocks
  1146  	DECQ	CX
  1147  	SHRQ	$7, CX
  1148  
  1149  aesloop:
  1150  	// scramble state
  1151  	AESENC	X8, X8
  1152  	AESENC	X9, X9
  1153  	AESENC	X10, X10
  1154  	AESENC	X11, X11
  1155  	AESENC	X12, X12
  1156  	AESENC	X13, X13
  1157  	AESENC	X14, X14
  1158  	AESENC	X15, X15
  1159  
  1160  	// scramble state, xor in a block
  1161  	MOVOU	(AX), X0
  1162  	MOVOU	16(AX), X1
  1163  	MOVOU	32(AX), X2
  1164  	MOVOU	48(AX), X3
  1165  	AESENC	X0, X8
  1166  	AESENC	X1, X9
  1167  	AESENC	X2, X10
  1168  	AESENC	X3, X11
  1169  	MOVOU	64(AX), X4
  1170  	MOVOU	80(AX), X5
  1171  	MOVOU	96(AX), X6
  1172  	MOVOU	112(AX), X7
  1173  	AESENC	X4, X12
  1174  	AESENC	X5, X13
  1175  	AESENC	X6, X14
  1176  	AESENC	X7, X15
  1177  
  1178  	ADDQ	$128, AX
  1179  	DECQ	CX
  1180  	JNE	aesloop
  1181  
  1182  	// 3 more scrambles to finish
  1183  	AESENC	X8, X8
  1184  	AESENC	X9, X9
  1185  	AESENC	X10, X10
  1186  	AESENC	X11, X11
  1187  	AESENC	X12, X12
  1188  	AESENC	X13, X13
  1189  	AESENC	X14, X14
  1190  	AESENC	X15, X15
  1191  	AESENC	X8, X8
  1192  	AESENC	X9, X9
  1193  	AESENC	X10, X10
  1194  	AESENC	X11, X11
  1195  	AESENC	X12, X12
  1196  	AESENC	X13, X13
  1197  	AESENC	X14, X14
  1198  	AESENC	X15, X15
  1199  	AESENC	X8, X8
  1200  	AESENC	X9, X9
  1201  	AESENC	X10, X10
  1202  	AESENC	X11, X11
  1203  	AESENC	X12, X12
  1204  	AESENC	X13, X13
  1205  	AESENC	X14, X14
  1206  	AESENC	X15, X15
  1207  
  1208  	PXOR	X12, X8
  1209  	PXOR	X13, X9
  1210  	PXOR	X14, X10
  1211  	PXOR	X15, X11
  1212  	PXOR	X10, X8
  1213  	PXOR	X11, X9
  1214  	PXOR	X9, X8
  1215  	MOVQ	X8, (DX)
  1216  	RET
  1217  
  1218  // func aeshash32(p unsafe.Pointer, h uintptr) uintptr
  1219  TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
  1220  	MOVQ	p+0(FP), AX	// ptr to data
  1221  	MOVQ	h+8(FP), X0	// seed
  1222  	PINSRD	$2, (AX), X0	// data
  1223  	AESENC	runtime·aeskeysched+0(SB), X0
  1224  	AESENC	runtime·aeskeysched+16(SB), X0
  1225  	AESENC	runtime·aeskeysched+32(SB), X0
  1226  	MOVQ	X0, ret+16(FP)
  1227  	RET
  1228  
  1229  // func aeshash64(p unsafe.Pointer, h uintptr) uintptr
  1230  TEXT runtime·aeshash64(SB),NOSPLIT,$0-24
  1231  	MOVQ	p+0(FP), AX	// ptr to data
  1232  	MOVQ	h+8(FP), X0	// seed
  1233  	PINSRQ	$1, (AX), X0	// data
  1234  	AESENC	runtime·aeskeysched+0(SB), X0
  1235  	AESENC	runtime·aeskeysched+16(SB), X0
  1236  	AESENC	runtime·aeskeysched+32(SB), X0
  1237  	MOVQ	X0, ret+16(FP)
  1238  	RET
  1239  
  1240  // simple mask to get rid of data in the high part of the register.
  1241  DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1242  DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1243  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1244  DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1245  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1246  DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1247  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1248  DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1249  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1250  DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1251  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1252  DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1253  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1254  DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1255  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1256  DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1257  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1258  DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1259  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1260  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1261  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1262  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1263  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1264  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1265  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1266  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1267  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1268  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1269  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1270  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1271  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1272  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1273  GLOBL masks<>(SB),RODATA,$256
  1274  
  1275  // func checkASM() bool
  1276  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1277  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1278  	MOVQ	$masks<>(SB), AX
  1279  	MOVQ	$shifts<>(SB), BX
  1280  	ORQ	BX, AX
  1281  	TESTQ	$15, AX
  1282  	SETEQ	ret+0(FP)
  1283  	RET
  1284  
  1285  // these are arguments to pshufb. They move data down from
  1286  // the high bytes of the register to the low bytes of the register.
  1287  // index is how many bytes to move.
  1288  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1289  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1290  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1291  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1292  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1293  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1294  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1295  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1296  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1297  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1298  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1299  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1300  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1301  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1302  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1303  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1304  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1305  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1306  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1307  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1308  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1309  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1310  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1311  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1312  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1313  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1314  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1315  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1316  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1317  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1318  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1319  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1320  GLOBL shifts<>(SB),RODATA,$256
  1321  
  1322  TEXT runtime·return0(SB), NOSPLIT, $0
  1323  	MOVL	$0, AX
  1324  	RET
  1325  
  1326  
  1327  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1328  // Must obey the gcc calling convention.
  1329  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1330  	get_tls(CX)
  1331  	MOVQ	g(CX), AX
  1332  	MOVQ	g_m(AX), AX
  1333  	MOVQ	m_curg(AX), AX
  1334  	MOVQ	(g_stack+stack_hi)(AX), AX
  1335  	RET
  1336  
  1337  // The top-most function running on a goroutine
  1338  // returns to goexit+PCQuantum.
  1339  TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1340  	BYTE	$0x90	// NOP
  1341  	CALL	runtime·goexit1(SB)	// does not return
  1342  	// traceback from goexit1 must hit code range of goexit
  1343  	BYTE	$0x90	// NOP
  1344  
  1345  // This is called from .init_array and follows the platform, not Go, ABI.
  1346  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1347  	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1348  	MOVQ	runtime·lastmoduledatap(SB), AX
  1349  	MOVQ	DI, moduledata_next(AX)
  1350  	MOVQ	DI, runtime·lastmoduledatap(SB)
  1351  	POPQ	R15
  1352  	RET
  1353  
  1354  // gcWriteBarrier performs a heap pointer write and informs the GC.
  1355  //
  1356  // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1357  // - DI is the destination of the write
  1358  // - AX is the value being written at DI
  1359  // It clobbers FLAGS. It does not clobber any general-purpose registers,
  1360  // but may clobber others (e.g., SSE registers).
  1361  TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120
  1362  	// Save the registers clobbered by the fast path. This is slightly
  1363  	// faster than having the caller spill these.
  1364  	MOVQ	R14, 104(SP)
  1365  	MOVQ	R13, 112(SP)
  1366  	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1367  	// across a sequence of write barriers.
  1368  	get_tls(R13)
  1369  	MOVQ	g(R13), R13
  1370  	MOVQ	g_m(R13), R13
  1371  	MOVQ	m_p(R13), R13
  1372  	MOVQ	(p_wbBuf+wbBuf_next)(R13), R14
  1373  	// Increment wbBuf.next position.
  1374  	LEAQ	16(R14), R14
  1375  	MOVQ	R14, (p_wbBuf+wbBuf_next)(R13)
  1376  	CMPQ	R14, (p_wbBuf+wbBuf_end)(R13)
  1377  	// Record the write.
  1378  	MOVQ	AX, -16(R14)	// Record value
  1379  	// Note: This turns bad pointer writes into bad
  1380  	// pointer reads, which could be confusing. We could avoid
  1381  	// reading from obviously bad pointers, which would
  1382  	// take care of the vast majority of these. We could
  1383  	// patch this up in the signal handler, or use XCHG to
  1384  	// combine the read and the write.
  1385  	MOVQ	(DI), R13
  1386  	MOVQ	R13, -8(R14)	// Record *slot
  1387  	// Is the buffer full? (flags set in CMPQ above)
  1388  	JEQ	flush
  1389  ret:
  1390  	MOVQ	104(SP), R14
  1391  	MOVQ	112(SP), R13
  1392  	// Do the write.
  1393  	MOVQ	AX, (DI)
  1394  	RET
  1395  
  1396  flush:
  1397  	// Save all general purpose registers since these could be
  1398  	// clobbered by wbBufFlush and were not saved by the caller.
  1399  	// It is possible for wbBufFlush to clobber other registers
  1400  	// (e.g., SSE registers), but the compiler takes care of saving
  1401  	// those in the caller if necessary. This strikes a balance
  1402  	// with registers that are likely to be used.
  1403  	//
  1404  	// We don't have type information for these, but all code under
  1405  	// here is NOSPLIT, so nothing will observe these.
  1406  	//
  1407  	// TODO: We could strike a different balance; e.g., saving X0
  1408  	// and not saving GP registers that are less likely to be used.
  1409  	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1410  	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1411  	MOVQ	BX, 16(SP)
  1412  	MOVQ	CX, 24(SP)
  1413  	MOVQ	DX, 32(SP)
  1414  	// DI already saved
  1415  	MOVQ	SI, 40(SP)
  1416  	MOVQ	BP, 48(SP)
  1417  	MOVQ	R8, 56(SP)
  1418  	MOVQ	R9, 64(SP)
  1419  	MOVQ	R10, 72(SP)
  1420  	MOVQ	R11, 80(SP)
  1421  	MOVQ	R12, 88(SP)
  1422  	// R13 already saved
  1423  	// R14 already saved
  1424  	MOVQ	R15, 96(SP)
  1425  
  1426  	// This takes arguments DI and AX
  1427  	CALL	runtime·wbBufFlush(SB)
  1428  
  1429  	MOVQ	0(SP), DI
  1430  	MOVQ	8(SP), AX
  1431  	MOVQ	16(SP), BX
  1432  	MOVQ	24(SP), CX
  1433  	MOVQ	32(SP), DX
  1434  	MOVQ	40(SP), SI
  1435  	MOVQ	48(SP), BP
  1436  	MOVQ	56(SP), R8
  1437  	MOVQ	64(SP), R9
  1438  	MOVQ	72(SP), R10
  1439  	MOVQ	80(SP), R11
  1440  	MOVQ	88(SP), R12
  1441  	MOVQ	96(SP), R15
  1442  	JMP	ret
  1443  
  1444  DATA	debugCallFrameTooLarge<>+0x00(SB)/8, $"call fra"
  1445  DATA	debugCallFrameTooLarge<>+0x08(SB)/8, $"me too l"
  1446  DATA	debugCallFrameTooLarge<>+0x10(SB)/4, $"arge"
  1447  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $0x14	// Size duplicated below
  1448  
  1449  // debugCallV1 is the entry point for debugger-injected function
  1450  // calls on running goroutines. It informs the runtime that a
  1451  // debug call has been injected and creates a call frame for the
  1452  // debugger to fill in.
  1453  //
  1454  // To inject a function call, a debugger should:
  1455  // 1. Check that the goroutine is in state _Grunning and that
  1456  //    there are at least 256 bytes free on the stack.
  1457  // 2. Push the current PC on the stack (updating SP).
  1458  // 3. Write the desired argument frame size at SP-16 (using the SP
  1459  //    after step 2).
  1460  // 4. Save all machine registers (including flags and XMM reigsters)
  1461  //    so they can be restored later by the debugger.
  1462  // 5. Set the PC to debugCallV1 and resume execution.
  1463  //
  1464  // If the goroutine is in state _Grunnable, then it's not generally
  1465  // safe to inject a call because it may return out via other runtime
  1466  // operations. Instead, the debugger should unwind the stack to find
  1467  // the return to non-runtime code, add a temporary breakpoint there,
  1468  // and inject the call once that breakpoint is hit.
  1469  //
  1470  // If the goroutine is in any other state, it's not safe to inject a call.
  1471  //
  1472  // This function communicates back to the debugger by setting RAX and
  1473  // invoking INT3 to raise a breakpoint signal. See the comments in the
  1474  // implementation for the protocol the debugger is expected to
  1475  // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1476  //
  1477  // The debugger must ensure that any pointers passed to the function
  1478  // obey escape analysis requirements. Specifically, it must not pass
  1479  // a stack pointer to an escaping argument. debugCallV1 cannot check
  1480  // this invariant.
  1481  TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0
  1482  	// Save all registers that may contain pointers in GC register
  1483  	// map order (see ssa.registersAMD64). This makes it possible
  1484  	// to copy the stack while updating pointers currently held in
  1485  	// registers, and for the GC to find roots in registers.
  1486  	//
  1487  	// We can't do anything that might clobber any of these
  1488  	// registers before this.
  1489  	MOVQ	R15, r15-(14*8+8)(SP)
  1490  	MOVQ	R14, r14-(13*8+8)(SP)
  1491  	MOVQ	R13, r13-(12*8+8)(SP)
  1492  	MOVQ	R12, r12-(11*8+8)(SP)
  1493  	MOVQ	R11, r11-(10*8+8)(SP)
  1494  	MOVQ	R10, r10-(9*8+8)(SP)
  1495  	MOVQ	R9, r9-(8*8+8)(SP)
  1496  	MOVQ	R8, r8-(7*8+8)(SP)
  1497  	MOVQ	DI, di-(6*8+8)(SP)
  1498  	MOVQ	SI, si-(5*8+8)(SP)
  1499  	MOVQ	BP, bp-(4*8+8)(SP)
  1500  	MOVQ	BX, bx-(3*8+8)(SP)
  1501  	MOVQ	DX, dx-(2*8+8)(SP)
  1502  	// Save the frame size before we clobber it. Either of the last
  1503  	// saves could clobber this depending on whether there's a saved BP.
  1504  	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1505  	MOVQ	CX, cx-(1*8+8)(SP)
  1506  	MOVQ	AX, ax-(0*8+8)(SP)
  1507  
  1508  	// Save the argument frame size.
  1509  	MOVQ	DX, frameSize-128(SP)
  1510  
  1511  	// Perform a safe-point check.
  1512  	MOVQ	retpc-8(FP), AX	// Caller's PC
  1513  	MOVQ	AX, 0(SP)
  1514  	CALL	runtime·debugCallCheck(SB)
  1515  	MOVQ	8(SP), AX
  1516  	TESTQ	AX, AX
  1517  	JZ	good
  1518  	// The safety check failed. Put the reason string at the top
  1519  	// of the stack.
  1520  	MOVQ	AX, 0(SP)
  1521  	MOVQ	16(SP), AX
  1522  	MOVQ	AX, 8(SP)
  1523  	// Set AX to 8 and invoke INT3. The debugger should get the
  1524  	// reason a call can't be injected from the top of the stack
  1525  	// and resume execution.
  1526  	MOVQ	$8, AX
  1527  	BYTE	$0xcc
  1528  	JMP	restore
  1529  
  1530  good:
  1531  	// Registers are saved and it's safe to make a call.
  1532  	// Open up a call frame, moving the stack if necessary.
  1533  	//
  1534  	// Once the frame is allocated, this will set AX to 0 and
  1535  	// invoke INT3. The debugger should write the argument
  1536  	// frame for the call at SP, push the trapping PC on the
  1537  	// stack, set the PC to the function to call, set RCX to point
  1538  	// to the closure (if a closure call), and resume execution.
  1539  	//
  1540  	// If the function returns, this will set AX to 1 and invoke
  1541  	// INT3. The debugger can then inspect any return value saved
  1542  	// on the stack at SP and resume execution again.
  1543  	//
  1544  	// If the function panics, this will set AX to 2 and invoke INT3.
  1545  	// The interface{} value of the panic will be at SP. The debugger
  1546  	// can inspect the panic value and resume execution again.
  1547  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1548  	CMPQ	AX, $MAXSIZE;			\
  1549  	JA	5(PC);				\
  1550  	MOVQ	$NAME(SB), AX;			\
  1551  	MOVQ	AX, 0(SP);			\
  1552  	CALL	runtime·debugCallWrap(SB);	\
  1553  	JMP	restore
  1554  
  1555  	MOVQ	frameSize-128(SP), AX
  1556  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1557  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1558  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1559  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1560  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1561  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1562  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1563  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1564  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1565  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1566  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1567  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1568  	// The frame size is too large. Report the error.
  1569  	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1570  	MOVQ	AX, 0(SP)
  1571  	MOVQ	$0x14, 8(SP)
  1572  	MOVQ	$8, AX
  1573  	BYTE	$0xcc
  1574  	JMP	restore
  1575  
  1576  restore:
  1577  	// Calls and failures resume here.
  1578  	//
  1579  	// Set AX to 16 and invoke INT3. The debugger should restore
  1580  	// all registers except RIP and RSP and resume execution.
  1581  	MOVQ	$16, AX
  1582  	BYTE	$0xcc
  1583  	// We must not modify flags after this point.
  1584  
  1585  	// Restore pointer-containing registers, which may have been
  1586  	// modified from the debugger's copy by stack copying.
  1587  	MOVQ	ax-(0*8+8)(SP), AX
  1588  	MOVQ	cx-(1*8+8)(SP), CX
  1589  	MOVQ	dx-(2*8+8)(SP), DX
  1590  	MOVQ	bx-(3*8+8)(SP), BX
  1591  	MOVQ	bp-(4*8+8)(SP), BP
  1592  	MOVQ	si-(5*8+8)(SP), SI
  1593  	MOVQ	di-(6*8+8)(SP), DI
  1594  	MOVQ	r8-(7*8+8)(SP), R8
  1595  	MOVQ	r9-(8*8+8)(SP), R9
  1596  	MOVQ	r10-(9*8+8)(SP), R10
  1597  	MOVQ	r11-(10*8+8)(SP), R11
  1598  	MOVQ	r12-(11*8+8)(SP), R12
  1599  	MOVQ	r13-(12*8+8)(SP), R13
  1600  	MOVQ	r14-(13*8+8)(SP), R14
  1601  	MOVQ	r15-(14*8+8)(SP), R15
  1602  
  1603  	RET
  1604  
  1605  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1606  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1607  	NO_LOCAL_POINTERS;			\
  1608  	MOVQ	$0, AX;				\
  1609  	BYTE	$0xcc;				\
  1610  	MOVQ	$1, AX;				\
  1611  	BYTE	$0xcc;				\
  1612  	RET
  1613  DEBUG_CALL_FN(debugCall32<>, 32)
  1614  DEBUG_CALL_FN(debugCall64<>, 64)
  1615  DEBUG_CALL_FN(debugCall128<>, 128)
  1616  DEBUG_CALL_FN(debugCall256<>, 256)
  1617  DEBUG_CALL_FN(debugCall512<>, 512)
  1618  DEBUG_CALL_FN(debugCall1024<>, 1024)
  1619  DEBUG_CALL_FN(debugCall2048<>, 2048)
  1620  DEBUG_CALL_FN(debugCall4096<>, 4096)
  1621  DEBUG_CALL_FN(debugCall8192<>, 8192)
  1622  DEBUG_CALL_FN(debugCall16384<>, 16384)
  1623  DEBUG_CALL_FN(debugCall32768<>, 32768)
  1624  DEBUG_CALL_FN(debugCall65536<>, 65536)
  1625  
  1626  // func debugCallPanicked(val interface{})
  1627  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1628  	// Copy the panic value to the top of stack.
  1629  	MOVQ	val_type+0(FP), AX
  1630  	MOVQ	AX, 0(SP)
  1631  	MOVQ	val_data+8(FP), AX
  1632  	MOVQ	AX, 8(SP)
  1633  	MOVQ	$2, AX
  1634  	BYTE	$0xcc
  1635  	RET