github.com/aloncn/graphics-go@v0.0.1/src/runtime/memclr_386.s (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build !plan9
     6  
     7  #include "textflag.h"
     8  
     9  // NOTE: Windows externalthreadhandler expects memclr to preserve DX.
    10  
    11  // void runtime·memclr(void*, uintptr)
    12  TEXT runtime·memclr(SB), NOSPLIT, $0-8
    13  	MOVL	ptr+0(FP), DI
    14  	MOVL	n+4(FP), BX
    15  	XORL	AX, AX
    16  
    17  	// MOVOU seems always faster than REP STOSL.
    18  tail:
    19  	TESTL	BX, BX
    20  	JEQ	_0
    21  	CMPL	BX, $2
    22  	JBE	_1or2
    23  	CMPL	BX, $4
    24  	JB	_3
    25  	JE	_4
    26  	CMPL	BX, $8
    27  	JBE	_5through8
    28  	CMPL	BX, $16
    29  	JBE	_9through16
    30  	TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
    31  	JEQ	nosse2
    32  	PXOR	X0, X0
    33  	CMPL	BX, $32
    34  	JBE	_17through32
    35  	CMPL	BX, $64
    36  	JBE	_33through64
    37  	CMPL	BX, $128
    38  	JBE	_65through128
    39  	CMPL	BX, $256
    40  	JBE	_129through256
    41  	// TODO: use branch table and BSR to make this just a single dispatch
    42  
    43  loop:
    44  	MOVOU	X0, 0(DI)
    45  	MOVOU	X0, 16(DI)
    46  	MOVOU	X0, 32(DI)
    47  	MOVOU	X0, 48(DI)
    48  	MOVOU	X0, 64(DI)
    49  	MOVOU	X0, 80(DI)
    50  	MOVOU	X0, 96(DI)
    51  	MOVOU	X0, 112(DI)
    52  	MOVOU	X0, 128(DI)
    53  	MOVOU	X0, 144(DI)
    54  	MOVOU	X0, 160(DI)
    55  	MOVOU	X0, 176(DI)
    56  	MOVOU	X0, 192(DI)
    57  	MOVOU	X0, 208(DI)
    58  	MOVOU	X0, 224(DI)
    59  	MOVOU	X0, 240(DI)
    60  	SUBL	$256, BX
    61  	ADDL	$256, DI
    62  	CMPL	BX, $256
    63  	JAE	loop
    64  	JMP	tail
    65  
    66  _1or2:
    67  	MOVB	AX, (DI)
    68  	MOVB	AX, -1(DI)(BX*1)
    69  	RET
    70  _0:
    71  	RET
    72  _3:
    73  	MOVW	AX, (DI)
    74  	MOVB	AX, 2(DI)
    75  	RET
    76  _4:
    77  	// We need a separate case for 4 to make sure we clear pointers atomically.
    78  	MOVL	AX, (DI)
    79  	RET
    80  _5through8:
    81  	MOVL	AX, (DI)
    82  	MOVL	AX, -4(DI)(BX*1)
    83  	RET
    84  _9through16:
    85  	MOVL	AX, (DI)
    86  	MOVL	AX, 4(DI)
    87  	MOVL	AX, -8(DI)(BX*1)
    88  	MOVL	AX, -4(DI)(BX*1)
    89  	RET
    90  _17through32:
    91  	MOVOU	X0, (DI)
    92  	MOVOU	X0, -16(DI)(BX*1)
    93  	RET
    94  _33through64:
    95  	MOVOU	X0, (DI)
    96  	MOVOU	X0, 16(DI)
    97  	MOVOU	X0, -32(DI)(BX*1)
    98  	MOVOU	X0, -16(DI)(BX*1)
    99  	RET
   100  _65through128:
   101  	MOVOU	X0, (DI)
   102  	MOVOU	X0, 16(DI)
   103  	MOVOU	X0, 32(DI)
   104  	MOVOU	X0, 48(DI)
   105  	MOVOU	X0, -64(DI)(BX*1)
   106  	MOVOU	X0, -48(DI)(BX*1)
   107  	MOVOU	X0, -32(DI)(BX*1)
   108  	MOVOU	X0, -16(DI)(BX*1)
   109  	RET
   110  _129through256:
   111  	MOVOU	X0, (DI)
   112  	MOVOU	X0, 16(DI)
   113  	MOVOU	X0, 32(DI)
   114  	MOVOU	X0, 48(DI)
   115  	MOVOU	X0, 64(DI)
   116  	MOVOU	X0, 80(DI)
   117  	MOVOU	X0, 96(DI)
   118  	MOVOU	X0, 112(DI)
   119  	MOVOU	X0, -128(DI)(BX*1)
   120  	MOVOU	X0, -112(DI)(BX*1)
   121  	MOVOU	X0, -96(DI)(BX*1)
   122  	MOVOU	X0, -80(DI)(BX*1)
   123  	MOVOU	X0, -64(DI)(BX*1)
   124  	MOVOU	X0, -48(DI)(BX*1)
   125  	MOVOU	X0, -32(DI)(BX*1)
   126  	MOVOU	X0, -16(DI)(BX*1)
   127  	RET
   128  nosse2:
   129  	MOVL	BX, CX
   130  	SHRL	$2, CX
   131  	REP
   132  	STOSL
   133  	ANDL	$3, BX
   134  	JNE	tail
   135  	RET