github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/runtime/memclr_386.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !plan9
     6  
     7  #include "textflag.h"
     8  
     9  // NOTE: Windows externalthreadhandler expects memclr to preserve DX.
    10  
    11  // void runtime·memclr(void*, uintptr)
    12  TEXT runtime·memclr(SB), NOSPLIT, $0-8
    13  	MOVL	ptr+0(FP), DI
    14  	MOVL	n+4(FP), BX
    15  	XORL	AX, AX
    16  
    17  	// MOVOU seems always faster than REP STOSL.
    18  clr_tail:
    19  	TESTL	BX, BX
    20  	JEQ	clr_0
    21  	CMPL	BX, $2
    22  	JBE	clr_1or2
    23  	CMPL	BX, $4
    24  	JBE	clr_3or4
    25  	CMPL	BX, $8
    26  	JBE	clr_5through8
    27  	CMPL	BX, $16
    28  	JBE	clr_9through16
    29  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
    30  	JEQ	nosse2
    31  	PXOR	X0, X0
    32  	CMPL	BX, $32
    33  	JBE	clr_17through32
    34  	CMPL	BX, $64
    35  	JBE	clr_33through64
    36  	CMPL	BX, $128
    37  	JBE	clr_65through128
    38  	CMPL	BX, $256
    39  	JBE	clr_129through256
    40  	// TODO: use branch table and BSR to make this just a single dispatch
    41  
    42  clr_loop:
    43  	MOVOU	X0, 0(DI)
    44  	MOVOU	X0, 16(DI)
    45  	MOVOU	X0, 32(DI)
    46  	MOVOU	X0, 48(DI)
    47  	MOVOU	X0, 64(DI)
    48  	MOVOU	X0, 80(DI)
    49  	MOVOU	X0, 96(DI)
    50  	MOVOU	X0, 112(DI)
    51  	MOVOU	X0, 128(DI)
    52  	MOVOU	X0, 144(DI)
    53  	MOVOU	X0, 160(DI)
    54  	MOVOU	X0, 176(DI)
    55  	MOVOU	X0, 192(DI)
    56  	MOVOU	X0, 208(DI)
    57  	MOVOU	X0, 224(DI)
    58  	MOVOU	X0, 240(DI)
    59  	SUBL	$256, BX
    60  	ADDL	$256, DI
    61  	CMPL	BX, $256
    62  	JAE	clr_loop
    63  	JMP	clr_tail
    64  
    65  clr_1or2:
    66  	MOVB	AX, (DI)
    67  	MOVB	AX, -1(DI)(BX*1)
    68  	RET
    69  clr_0:
    70  	RET
    71  clr_3or4:
    72  	MOVW	AX, (DI)
    73  	MOVW	AX, -2(DI)(BX*1)
    74  	RET
    75  clr_5through8:
    76  	MOVL	AX, (DI)
    77  	MOVL	AX, -4(DI)(BX*1)
    78  	RET
    79  clr_9through16:
    80  	MOVL	AX, (DI)
    81  	MOVL	AX, 4(DI)
    82  	MOVL	AX, -8(DI)(BX*1)
    83  	MOVL	AX, -4(DI)(BX*1)
    84  	RET
    85  clr_17through32:
    86  	MOVOU	X0, (DI)
    87  	MOVOU	X0, -16(DI)(BX*1)
    88  	RET
    89  clr_33through64:
    90  	MOVOU	X0, (DI)
    91  	MOVOU	X0, 16(DI)
    92  	MOVOU	X0, -32(DI)(BX*1)
    93  	MOVOU	X0, -16(DI)(BX*1)
    94  	RET
    95  clr_65through128:
    96  	MOVOU	X0, (DI)
    97  	MOVOU	X0, 16(DI)
    98  	MOVOU	X0, 32(DI)
    99  	MOVOU	X0, 48(DI)
   100  	MOVOU	X0, -64(DI)(BX*1)
   101  	MOVOU	X0, -48(DI)(BX*1)
   102  	MOVOU	X0, -32(DI)(BX*1)
   103  	MOVOU	X0, -16(DI)(BX*1)
   104  	RET
   105  clr_129through256:
   106  	MOVOU	X0, (DI)
   107  	MOVOU	X0, 16(DI)
   108  	MOVOU	X0, 32(DI)
   109  	MOVOU	X0, 48(DI)
   110  	MOVOU	X0, 64(DI)
   111  	MOVOU	X0, 80(DI)
   112  	MOVOU	X0, 96(DI)
   113  	MOVOU	X0, 112(DI)
   114  	MOVOU	X0, -128(DI)(BX*1)
   115  	MOVOU	X0, -112(DI)(BX*1)
   116  	MOVOU	X0, -96(DI)(BX*1)
   117  	MOVOU	X0, -80(DI)(BX*1)
   118  	MOVOU	X0, -64(DI)(BX*1)
   119  	MOVOU	X0, -48(DI)(BX*1)
   120  	MOVOU	X0, -32(DI)(BX*1)
   121  	MOVOU	X0, -16(DI)(BX*1)
   122  	RET
   123  nosse2:
   124  	MOVL	BX, CX
   125  	SHRL	$2, CX
   126  	REP
   127  	STOSL
   128  	ANDL	$3, BX
   129  	JNE	clr_tail
   130  	RET