github.com/primecitizens/pcz/std@v0.2.1/core/cmp/bs_riscv64.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  // 
     4  // Copyright 2022 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && riscv64
     9  
    10  #include "textflag.h"
    11  
    12  TEXT ·Bytes<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
    13  	// X10 = a_base
    14  	// X11 = a_len
    15  	// X12 = a_cap (unused)
    16  	// X13 = b_base (want in X12)
    17  	// X14 = b_len (want in X13)
    18  	// X15 = b_cap (unused)
    19  	MOV X13, X12
    20  	MOV X14, X13
    21  	JMP compare<>(SB)
    22  
    23  TEXT ·String<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
    24  	// X10 = a_base
    25  	// X11 = a_len
    26  	// X12 = b_base
    27  	// X13 = b_len
    28  	JMP compare<>(SB)
    29  
    30  // On entry:
    31  // X10 points to start of a
    32  // X11 length of a
    33  // X12 points to start of b
    34  // X13 length of b
    35  // for non-regabi X14 points to the address to store the return value (-1/0/1)
    36  // for regabi the return value in X10
    37  TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
    38  	BEQ X10, X12, cmp_len
    39  
    40  	MOV X11, X5
    41  	BGE X13, X5, use_a_len // X5 = min(len(a), len(b))
    42  	MOV X13, X5
    43  use_a_len:
    44  	BEQZ X5, cmp_len
    45  
    46  	MOV $32, X6
    47  	BLT X5, X6, loop4_check
    48  
    49  	// Check alignment - if alignment differs we have to do one byte at a time.
    50  	AND $7, X10, X7
    51  	AND $7, X12, X8
    52  	BNE X7, X8, loop4_check
    53  	BEQZ X7, loop32_check
    54  
    55  	// Check one byte at a time until we reach 8 byte alignment.
    56  	SUB X7, X5, X5
    57  align:
    58  	ADD $-1, X7
    59  	MOVBU 0(X10), X8
    60  	MOVBU 0(X12), X9
    61  	BNE X8, X9, cmp
    62  	ADD $1, X10
    63  	ADD $1, X12
    64  	BNEZ X7, align
    65  
    66  loop32_check:
    67  	MOV $32, X7
    68  	BLT X5, X7, loop16_check
    69  loop32:
    70  	MOV 0(X10), X15
    71  	MOV 0(X12), X16
    72  	MOV 8(X10), X17
    73  	MOV 8(X12), X18
    74  	BEQ X15, X16, loop32a
    75  	JMP cmp8a
    76  loop32a:
    77  	BEQ X17, X18, loop32b
    78  	JMP cmp8b
    79  loop32b:
    80  	MOV 16(X10), X15
    81  	MOV 16(X12), X16
    82  	MOV 24(X10), X17
    83  	MOV 24(X12), X18
    84  	BEQ X15, X16, loop32c
    85  	JMP cmp8a
    86  loop32c:
    87  	BEQ X17, X18, loop32d
    88  	JMP cmp8b
    89  loop32d:
    90  	ADD $32, X10
    91  	ADD $32, X12
    92  	ADD $-32, X5
    93  	BGE X5, X7, loop32
    94  	BEQZ X5, cmp_len
    95  
    96  loop16_check:
    97  	MOV $16, X6
    98  	BLT X5, X6, loop4_check
    99  loop16:
   100  	MOV 0(X10), X15
   101  	MOV 0(X12), X16
   102  	MOV 8(X10), X17
   103  	MOV 8(X12), X18
   104  	BEQ X15, X16, loop16a
   105  	JMP cmp8a
   106  loop16a:
   107  	BEQ X17, X18, loop16b
   108  	JMP cmp8b
   109  loop16b:
   110  	ADD $16, X10
   111  	ADD $16, X12
   112  	ADD $-16, X5
   113  	BGE X5, X6, loop16
   114  	BEQZ X5, cmp_len
   115  
   116  loop4_check:
   117  	MOV $4, X6
   118  	BLT X5, X6, loop1
   119  loop4:
   120  	MOVBU 0(X10), X8
   121  	MOVBU 0(X12), X9
   122  	MOVBU 1(X10), X15
   123  	MOVBU 1(X12), X16
   124  	BEQ X8, X9, loop4a
   125  	SLTU X9, X8, X5
   126  	SLTU X8, X9, X6
   127  	JMP cmp_ret
   128  loop4a:
   129  	BEQ X15, X16, loop4b
   130  	SLTU X16, X15, X5
   131  	SLTU X15, X16, X6
   132  	JMP cmp_ret
   133  loop4b:
   134  	MOVBU 2(X10), X21
   135  	MOVBU 2(X12), X22
   136  	MOVBU 3(X10), X23
   137  	MOVBU 3(X12), X24
   138  	BEQ X21, X22, loop4c
   139  	SLTU X22, X21, X5
   140  	SLTU X21, X22, X6
   141  	JMP cmp_ret
   142  loop4c:
   143  	BEQ X23, X24, loop4d
   144  	SLTU X24, X23, X5
   145  	SLTU X23, X24, X6
   146  	JMP cmp_ret
   147  loop4d:
   148  	ADD $4, X10
   149  	ADD $4, X12
   150  	ADD $-4, X5
   151  	BGE X5, X6, loop4
   152  
   153  loop1:
   154  	BEQZ X5, cmp_len
   155  	MOVBU 0(X10), X8
   156  	MOVBU 0(X12), X9
   157  	BNE X8, X9, cmp
   158  	ADD $1, X10
   159  	ADD $1, X12
   160  	ADD $-1, X5
   161  	JMP loop1
   162  
   163  	// Compare 8 bytes of memory in X15/X16 that are known to differ.
   164  cmp8a:
   165  	MOV $0xff, X19
   166  cmp8a_loop:
   167  	AND X15, X19, X8
   168  	AND X16, X19, X9
   169  	BNE X8, X9, cmp
   170  	SLLI $8, X19
   171  	JMP cmp8a_loop
   172  
   173  	// Compare 8 bytes of memory in X17/X18 that are known to differ.
   174  cmp8b:
   175  	MOV $0xff, X19
   176  cmp8b_loop:
   177  	AND X17, X19, X8
   178  	AND X18, X19, X9
   179  	BNE X8, X9, cmp
   180  	SLLI $8, X19
   181  	JMP cmp8b_loop
   182  
   183  cmp_len:
   184  	MOV X11, X8
   185  	MOV X13, X9
   186  cmp:
   187  	SLTU X9, X8, X5
   188  	SLTU X8, X9, X6
   189  cmp_ret:
   190  	SUB X5, X6, X10
   191  	RET