github.com/primecitizens/pcz/std@v0.2.1/core/mem/clear_mips64x.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2015 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && (mips64 || mips64le)
     9  
    10  #include "textflag.h"
    11  
    12  // See memclrNoHeapPointers Go doc for important implementation constraints.
    13  
    14  // func Clear(ptr unsafe.Pointer, n uintptr)
    15  TEXT ·Clear(SB),NOSPLIT,$0-16
    16  	MOVV ptr+0(FP), R1
    17  	MOVV n+8(FP), R2
    18  	ADDV R1, R2, R4
    19  
    20  	// if less than 16 bytes or no MSA, do words check
    21  	SGTU $16, R2, R3
    22  	BNE R3, no_msa
    23  	MOVBU core∕cpu·MIPS64X+const_offsetMIPS64XHasMSA(SB), R3
    24  	BEQ R3, R0, no_msa
    25  
    26  	VMOVB $0, W0
    27  
    28  	SGTU $128, R2, R3
    29  	BEQ R3, msa_large
    30  
    31  	AND $15, R2, R5
    32  	XOR R2, R5, R6
    33  	ADDVU R1, R6
    34  
    35  msa_small:
    36  	VMOVB W0, (R1)
    37  	ADDVU $16, R1
    38  	SGTU R6, R1, R3
    39  	BNE R3, R0, msa_small
    40  	BEQ R5, R0, done
    41  	VMOVB W0, -16(R4)
    42  	JMP done
    43  
    44  msa_large:
    45  	AND $127, R2, R5
    46  	XOR R2, R5, R6
    47  	ADDVU R1, R6
    48  
    49  msa_large_loop:
    50  	VMOVB W0, (R1)
    51  	VMOVB W0, 16(R1)
    52  	VMOVB W0, 32(R1)
    53  	VMOVB W0, 48(R1)
    54  	VMOVB W0, 64(R1)
    55  	VMOVB W0, 80(R1)
    56  	VMOVB W0, 96(R1)
    57  	VMOVB W0, 112(R1)
    58  
    59  	ADDVU $128, R1
    60  	SGTU R6, R1, R3
    61  	BNE R3, R0, msa_large_loop
    62  	BEQ R5, R0, done
    63  	VMOVB W0, -128(R4)
    64  	VMOVB W0, -112(R4)
    65  	VMOVB W0, -96(R4)
    66  	VMOVB W0, -80(R4)
    67  	VMOVB W0, -64(R4)
    68  	VMOVB W0, -48(R4)
    69  	VMOVB W0, -32(R4)
    70  	VMOVB W0, -16(R4)
    71  	JMP done
    72  
    73  no_msa:
    74  	// if less than 8 bytes, do one byte at a time
    75  	SGTU $8, R2, R3
    76  	BNE R3, out
    77  
    78  	// do one byte at a time until 8-aligned
    79  	AND $7, R1, R3
    80  	BEQ R3, words
    81  	MOVB R0, (R1)
    82  	ADDV $1, R1
    83  	JMP -4(PC)
    84  
    85  words:
    86  	// do 8 bytes at a time if there is room
    87  	ADDV $-7, R4, R2
    88  
    89  	SGTU R2, R1, R3
    90  	BEQ R3, out
    91  	MOVV R0, (R1)
    92  	ADDV $8, R1
    93  	JMP -4(PC)
    94  
    95  out:
    96  	BEQ R1, R4, done
    97  	MOVB R0, (R1)
    98  	ADDV $1, R1
    99  	JMP -3(PC)
   100  done:
   101  	RET