github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/runtime/memclr_amd64.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !plan9
     6  
     7  #include "textflag.h"
     8  
     9  // NOTE: Windows externalthreadhandler expects memclr to preserve DX.
    10  
    11  // void runtime·memclr(void*, uintptr)
    12  TEXT runtime·memclr(SB), NOSPLIT, $0-16
    13  	MOVQ	ptr+0(FP), DI
    14  	MOVQ	n+8(FP), BX
    15  	XORQ	AX, AX
    16  
    17  	// MOVOU seems always faster than REP STOSQ.
    18  tail:
    19  	TESTQ	BX, BX
    20  	JEQ	_0
    21  	CMPQ	BX, $2
    22  	JBE	_1or2
    23  	CMPQ	BX, $4
    24  	JBE	_3or4
    25  	CMPQ	BX, $8
    26  	JBE	_5through8
    27  	CMPQ	BX, $16
    28  	JBE	_9through16
    29  	PXOR	X0, X0
    30  	CMPQ	BX, $32
    31  	JBE	_17through32
    32  	CMPQ	BX, $64
    33  	JBE	_33through64
    34  	CMPQ	BX, $128
    35  	JBE	_65through128
    36  	CMPQ	BX, $256
    37  	JBE	_129through256
    38  	// TODO: use branch table and BSR to make this just a single dispatch
    39  	// TODO: for really big clears, use MOVNTDQ.
    40  
    41  loop:
    42  	MOVOU	X0, 0(DI)
    43  	MOVOU	X0, 16(DI)
    44  	MOVOU	X0, 32(DI)
    45  	MOVOU	X0, 48(DI)
    46  	MOVOU	X0, 64(DI)
    47  	MOVOU	X0, 80(DI)
    48  	MOVOU	X0, 96(DI)
    49  	MOVOU	X0, 112(DI)
    50  	MOVOU	X0, 128(DI)
    51  	MOVOU	X0, 144(DI)
    52  	MOVOU	X0, 160(DI)
    53  	MOVOU	X0, 176(DI)
    54  	MOVOU	X0, 192(DI)
    55  	MOVOU	X0, 208(DI)
    56  	MOVOU	X0, 224(DI)
    57  	MOVOU	X0, 240(DI)
    58  	SUBQ	$256, BX
    59  	ADDQ	$256, DI
    60  	CMPQ	BX, $256
    61  	JAE	loop
    62  	JMP	tail
    63  
    64  _1or2:
    65  	MOVB	AX, (DI)
    66  	MOVB	AX, -1(DI)(BX*1)
    67  	RET
    68  _0:
    69  	RET
    70  _3or4:
    71  	MOVW	AX, (DI)
    72  	MOVW	AX, -2(DI)(BX*1)
    73  	RET
    74  _5through8:
    75  	MOVL	AX, (DI)
    76  	MOVL	AX, -4(DI)(BX*1)
    77  	RET
    78  _9through16:
    79  	MOVQ	AX, (DI)
    80  	MOVQ	AX, -8(DI)(BX*1)
    81  	RET
    82  _17through32:
    83  	MOVOU	X0, (DI)
    84  	MOVOU	X0, -16(DI)(BX*1)
    85  	RET
    86  _33through64:
    87  	MOVOU	X0, (DI)
    88  	MOVOU	X0, 16(DI)
    89  	MOVOU	X0, -32(DI)(BX*1)
    90  	MOVOU	X0, -16(DI)(BX*1)
    91  	RET
    92  _65through128:
    93  	MOVOU	X0, (DI)
    94  	MOVOU	X0, 16(DI)
    95  	MOVOU	X0, 32(DI)
    96  	MOVOU	X0, 48(DI)
    97  	MOVOU	X0, -64(DI)(BX*1)
    98  	MOVOU	X0, -48(DI)(BX*1)
    99  	MOVOU	X0, -32(DI)(BX*1)
   100  	MOVOU	X0, -16(DI)(BX*1)
   101  	RET
   102  _129through256:
   103  	MOVOU	X0, (DI)
   104  	MOVOU	X0, 16(DI)
   105  	MOVOU	X0, 32(DI)
   106  	MOVOU	X0, 48(DI)
   107  	MOVOU	X0, 64(DI)
   108  	MOVOU	X0, 80(DI)
   109  	MOVOU	X0, 96(DI)
   110  	MOVOU	X0, 112(DI)
   111  	MOVOU	X0, -128(DI)(BX*1)
   112  	MOVOU	X0, -112(DI)(BX*1)
   113  	MOVOU	X0, -96(DI)(BX*1)
   114  	MOVOU	X0, -80(DI)(BX*1)
   115  	MOVOU	X0, -64(DI)(BX*1)
   116  	MOVOU	X0, -48(DI)(BX*1)
   117  	MOVOU	X0, -32(DI)(BX*1)
   118  	MOVOU	X0, -16(DI)(BX*1)
   119  	RET