github.com/aloncn/graphics-go@v0.0.1/src/runtime/memclr_386.s (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !plan9 6 7 #include "textflag.h" 8 9 // NOTE: Windows externalthreadhandler expects memclr to preserve DX. 10 11 // void runtime·memclr(void*, uintptr) 12 TEXT runtime·memclr(SB), NOSPLIT, $0-8 13 MOVL ptr+0(FP), DI 14 MOVL n+4(FP), BX 15 XORL AX, AX 16 17 // MOVOU seems always faster than REP STOSL. 18 tail: 19 TESTL BX, BX 20 JEQ _0 21 CMPL BX, $2 22 JBE _1or2 23 CMPL BX, $4 24 JB _3 25 JE _4 26 CMPL BX, $8 27 JBE _5through8 28 CMPL BX, $16 29 JBE _9through16 30 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 31 JEQ nosse2 32 PXOR X0, X0 33 CMPL BX, $32 34 JBE _17through32 35 CMPL BX, $64 36 JBE _33through64 37 CMPL BX, $128 38 JBE _65through128 39 CMPL BX, $256 40 JBE _129through256 41 // TODO: use branch table and BSR to make this just a single dispatch 42 43 loop: 44 MOVOU X0, 0(DI) 45 MOVOU X0, 16(DI) 46 MOVOU X0, 32(DI) 47 MOVOU X0, 48(DI) 48 MOVOU X0, 64(DI) 49 MOVOU X0, 80(DI) 50 MOVOU X0, 96(DI) 51 MOVOU X0, 112(DI) 52 MOVOU X0, 128(DI) 53 MOVOU X0, 144(DI) 54 MOVOU X0, 160(DI) 55 MOVOU X0, 176(DI) 56 MOVOU X0, 192(DI) 57 MOVOU X0, 208(DI) 58 MOVOU X0, 224(DI) 59 MOVOU X0, 240(DI) 60 SUBL $256, BX 61 ADDL $256, DI 62 CMPL BX, $256 63 JAE loop 64 JMP tail 65 66 _1or2: 67 MOVB AX, (DI) 68 MOVB AX, -1(DI)(BX*1) 69 RET 70 _0: 71 RET 72 _3: 73 MOVW AX, (DI) 74 MOVB AX, 2(DI) 75 RET 76 _4: 77 // We need a separate case for 4 to make sure we clear pointers atomically. 78 MOVL AX, (DI) 79 RET 80 _5through8: 81 MOVL AX, (DI) 82 MOVL AX, -4(DI)(BX*1) 83 RET 84 _9through16: 85 MOVL AX, (DI) 86 MOVL AX, 4(DI) 87 MOVL AX, -8(DI)(BX*1) 88 MOVL AX, -4(DI)(BX*1) 89 RET 90 _17through32: 91 MOVOU X0, (DI) 92 MOVOU X0, -16(DI)(BX*1) 93 RET 94 _33through64: 95 MOVOU X0, (DI) 96 MOVOU X0, 16(DI) 97 MOVOU X0, -32(DI)(BX*1) 98 MOVOU X0, -16(DI)(BX*1) 99 RET 100 _65through128: 101 MOVOU X0, (DI) 102 MOVOU X0, 16(DI) 103 MOVOU X0, 32(DI) 104 MOVOU X0, 48(DI) 105 MOVOU X0, -64(DI)(BX*1) 106 MOVOU X0, -48(DI)(BX*1) 107 MOVOU X0, -32(DI)(BX*1) 108 MOVOU X0, -16(DI)(BX*1) 109 RET 110 _129through256: 111 MOVOU X0, (DI) 112 MOVOU X0, 16(DI) 113 MOVOU X0, 32(DI) 114 MOVOU X0, 48(DI) 115 MOVOU X0, 64(DI) 116 MOVOU X0, 80(DI) 117 MOVOU X0, 96(DI) 118 MOVOU X0, 112(DI) 119 MOVOU X0, -128(DI)(BX*1) 120 MOVOU X0, -112(DI)(BX*1) 121 MOVOU X0, -96(DI)(BX*1) 122 MOVOU X0, -80(DI)(BX*1) 123 MOVOU X0, -64(DI)(BX*1) 124 MOVOU X0, -48(DI)(BX*1) 125 MOVOU X0, -32(DI)(BX*1) 126 MOVOU X0, -16(DI)(BX*1) 127 RET 128 nosse2: 129 MOVL BX, CX 130 SHRL $2, CX 131 REP 132 STOSL 133 ANDL $3, BX 134 JNE tail 135 RET