github.com/primecitizens/pcz/std@v0.2.1/core/mem/equal_riscv64.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2019 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && riscv64
     9  
    10  #include "textflag.h"
    11  
    12  #define CTXT S10
    13  
    14  // func MemEqual(a, b unsafe.Pointer, size uintptr) bool
    15  TEXT ·Equal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    16  	// X10 = a_base
    17  	// X11 = b_base
    18  	// X12 = size
    19  	JMP memequal<>(SB)
    20  
    21  // func memequal_varlen(a, b unsafe.Pointer) bool
    22  TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
    23  	MOV 8(CTXT), X12    // compiler stores size at offset 8 in the closure
    24  	// X10 = a_base
    25  	// X11 = b_base
    26  	JMP memequal<>(SB)
    27  
    28  // On entry X10 and X11 contain pointers, X12 contains length.
    29  // For non-regabi X13 contains address for return value.
    30  // For regabi return value in X10.
    31  TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0
    32  	BEQ X10, X11, eq
    33  
    34  	MOV $32, X23
    35  	BLT X12, X23, loop4_check
    36  
    37  	// Check alignment - if alignment differs we have to do one byte at a time.
    38  	AND $7, X10, X9
    39  	AND $7, X11, X19
    40  	BNE X9, X19, loop4_check
    41  	BEQZ X9, loop32_check
    42  
    43  	// Check one byte at a time until we reach 8 byte alignment.
    44  	SUB X9, X12, X12
    45  align:
    46  	ADD $-1, X9
    47  	MOVBU 0(X10), X19
    48  	MOVBU 0(X11), X20
    49  	BNE X19, X20, not_eq
    50  	ADD $1, X10
    51  	ADD $1, X11
    52  	BNEZ X9, align
    53  
    54  loop32_check:
    55  	MOV $32, X9
    56  	BLT X12, X9, loop16_check
    57  loop32:
    58  	MOV 0(X10), X19
    59  	MOV 0(X11), X20
    60  	MOV 8(X10), X21
    61  	MOV 8(X11), X22
    62  	BNE X19, X20, not_eq
    63  	BNE X21, X22, not_eq
    64  	MOV 16(X10), X14
    65  	MOV 16(X11), X15
    66  	MOV 24(X10), X16
    67  	MOV 24(X11), X17
    68  	BNE X14, X15, not_eq
    69  	BNE X16, X17, not_eq
    70  	ADD $32, X10
    71  	ADD $32, X11
    72  	ADD $-32, X12
    73  	BGE X12, X9, loop32
    74  	BEQZ X12, eq
    75  
    76  loop16_check:
    77  	MOV $16, X23
    78  	BLT X12, X23, loop4_check
    79  loop16:
    80  	MOV 0(X10), X19
    81  	MOV 0(X11), X20
    82  	MOV 8(X10), X21
    83  	MOV 8(X11), X22
    84  	BNE X19, X20, not_eq
    85  	BNE X21, X22, not_eq
    86  	ADD $16, X10
    87  	ADD $16, X11
    88  	ADD $-16, X12
    89  	BGE X12, X23, loop16
    90  	BEQZ X12, eq
    91  
    92  loop4_check:
    93  	MOV $4, X23
    94  	BLT X12, X23, loop1
    95  loop4:
    96  	MOVBU 0(X10), X19
    97  	MOVBU 0(X11), X20
    98  	MOVBU 1(X10), X21
    99  	MOVBU 1(X11), X22
   100  	BNE X19, X20, not_eq
   101  	BNE X21, X22, not_eq
   102  	MOVBU 2(X10), X14
   103  	MOVBU 2(X11), X15
   104  	MOVBU 3(X10), X16
   105  	MOVBU 3(X11), X17
   106  	BNE X14, X15, not_eq
   107  	BNE X16, X17, not_eq
   108  	ADD $4, X10
   109  	ADD $4, X11
   110  	ADD $-4, X12
   111  	BGE X12, X23, loop4
   112  
   113  loop1:
   114  	BEQZ X12, eq
   115  	MOVBU 0(X10), X19
   116  	MOVBU 0(X11), X20
   117  	BNE X19, X20, not_eq
   118  	ADD $1, X10
   119  	ADD $1, X11
   120  	ADD $-1, X12
   121  	JMP loop1
   122  
   123  not_eq:
   124  	MOVB ZERO, X10
   125  	RET
   126  eq:
   127  	MOV $1, X10
   128  	RET