github.com/primecitizens/pcz/std@v0.2.1/core/mem/clear_386.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2014 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && 386 && !plan9
     9  
    10  #include "textflag.h"
    11  
    12  // See memclrNoHeapPointers Go doc for important implementation constraints.
    13  
    14  // func Clear(ptr unsafe.Pointer, n uintptr)
    15  TEXT ·Clear(SB), NOSPLIT, $0-8
    16  	MOVL ptr+0(FP), DI
    17  	MOVL n+4(FP), BX
    18  	XORL AX, AX
    19  
    20  	// MOVOU seems always faster than REP STOSL.
    21  tail:
    22  	// BSR+branch table make almost all memmove/memclr benchmarks worse. Not worth doing.
    23  	TESTL BX, BX
    24  	JEQ _0
    25  	CMPL BX, $2
    26  	JBE _1or2
    27  	CMPL BX, $4
    28  	JB _3
    29  	JE _4
    30  	CMPL BX, $8
    31  	JBE _5through8
    32  	CMPL BX, $16
    33  	JBE _9through16
    34  #ifdef GO386_softfloat
    35  	JMP nosse2
    36  #endif
    37  	PXOR X0, X0
    38  	CMPL BX, $32
    39  	JBE _17through32
    40  	CMPL BX, $64
    41  	JBE _33through64
    42  	CMPL BX, $128
    43  	JBE _65through128
    44  	CMPL BX, $256
    45  	JBE _129through256
    46  
    47  loop:
    48  	MOVOU X0, 0(DI)
    49  	MOVOU X0, 16(DI)
    50  	MOVOU X0, 32(DI)
    51  	MOVOU X0, 48(DI)
    52  	MOVOU X0, 64(DI)
    53  	MOVOU X0, 80(DI)
    54  	MOVOU X0, 96(DI)
    55  	MOVOU X0, 112(DI)
    56  	MOVOU X0, 128(DI)
    57  	MOVOU X0, 144(DI)
    58  	MOVOU X0, 160(DI)
    59  	MOVOU X0, 176(DI)
    60  	MOVOU X0, 192(DI)
    61  	MOVOU X0, 208(DI)
    62  	MOVOU X0, 224(DI)
    63  	MOVOU X0, 240(DI)
    64  	SUBL $256, BX
    65  	ADDL $256, DI
    66  	CMPL BX, $256
    67  	JAE loop
    68  	JMP tail
    69  
    70  _1or2:
    71  	MOVB AX, (DI)
    72  	MOVB AX, -1(DI)(BX*1)
    73  	RET
    74  _0:
    75  	RET
    76  _3:
    77  	MOVW AX, (DI)
    78  	MOVB AX, 2(DI)
    79  	RET
    80  _4:
    81  	// We need a separate case for 4 to make sure we clear pointers atomically.
    82  	MOVL AX, (DI)
    83  	RET
    84  _5through8:
    85  	MOVL AX, (DI)
    86  	MOVL AX, -4(DI)(BX*1)
    87  	RET
    88  _9through16:
    89  	MOVL AX, (DI)
    90  	MOVL AX, 4(DI)
    91  	MOVL AX, -8(DI)(BX*1)
    92  	MOVL AX, -4(DI)(BX*1)
    93  	RET
    94  _17through32:
    95  	MOVOU X0, (DI)
    96  	MOVOU X0, -16(DI)(BX*1)
    97  	RET
    98  _33through64:
    99  	MOVOU X0, (DI)
   100  	MOVOU X0, 16(DI)
   101  	MOVOU X0, -32(DI)(BX*1)
   102  	MOVOU X0, -16(DI)(BX*1)
   103  	RET
   104  _65through128:
   105  	MOVOU X0, (DI)
   106  	MOVOU X0, 16(DI)
   107  	MOVOU X0, 32(DI)
   108  	MOVOU X0, 48(DI)
   109  	MOVOU X0, -64(DI)(BX*1)
   110  	MOVOU X0, -48(DI)(BX*1)
   111  	MOVOU X0, -32(DI)(BX*1)
   112  	MOVOU X0, -16(DI)(BX*1)
   113  	RET
   114  _129through256:
   115  	MOVOU X0, (DI)
   116  	MOVOU X0, 16(DI)
   117  	MOVOU X0, 32(DI)
   118  	MOVOU X0, 48(DI)
   119  	MOVOU X0, 64(DI)
   120  	MOVOU X0, 80(DI)
   121  	MOVOU X0, 96(DI)
   122  	MOVOU X0, 112(DI)
   123  	MOVOU X0, -128(DI)(BX*1)
   124  	MOVOU X0, -112(DI)(BX*1)
   125  	MOVOU X0, -96(DI)(BX*1)
   126  	MOVOU X0, -80(DI)(BX*1)
   127  	MOVOU X0, -64(DI)(BX*1)
   128  	MOVOU X0, -48(DI)(BX*1)
   129  	MOVOU X0, -32(DI)(BX*1)
   130  	MOVOU X0, -16(DI)(BX*1)
   131  	RET
   132  nosse2:
   133  	MOVL BX, CX
   134  	SHRL $2, CX
   135  	REP
   136  	STOSL
   137  	ANDL $3, BX
   138  	JNE tail
   139  	RET