github.com/primecitizens/pcz/std@v0.2.1/core/cmp/bs_riscv64.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2022 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build pcz && riscv64 9 10 #include "textflag.h" 11 12 TEXT ·Bytes<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 13 // X10 = a_base 14 // X11 = a_len 15 // X12 = a_cap (unused) 16 // X13 = b_base (want in X12) 17 // X14 = b_len (want in X13) 18 // X15 = b_cap (unused) 19 MOV X13, X12 20 MOV X14, X13 21 JMP compare<>(SB) 22 23 TEXT ·String<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 24 // X10 = a_base 25 // X11 = a_len 26 // X12 = b_base 27 // X13 = b_len 28 JMP compare<>(SB) 29 30 // On entry: 31 // X10 points to start of a 32 // X11 length of a 33 // X12 points to start of b 34 // X13 length of b 35 // for non-regabi X14 points to the address to store the return value (-1/0/1) 36 // for regabi the return value in X10 37 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 38 BEQ X10, X12, cmp_len 39 40 MOV X11, X5 41 BGE X13, X5, use_a_len // X5 = min(len(a), len(b)) 42 MOV X13, X5 43 use_a_len: 44 BEQZ X5, cmp_len 45 46 MOV $32, X6 47 BLT X5, X6, loop4_check 48 49 // Check alignment - if alignment differs we have to do one byte at a time. 50 AND $7, X10, X7 51 AND $7, X12, X8 52 BNE X7, X8, loop4_check 53 BEQZ X7, loop32_check 54 55 // Check one byte at a time until we reach 8 byte alignment. 56 SUB X7, X5, X5 57 align: 58 ADD $-1, X7 59 MOVBU 0(X10), X8 60 MOVBU 0(X12), X9 61 BNE X8, X9, cmp 62 ADD $1, X10 63 ADD $1, X12 64 BNEZ X7, align 65 66 loop32_check: 67 MOV $32, X7 68 BLT X5, X7, loop16_check 69 loop32: 70 MOV 0(X10), X15 71 MOV 0(X12), X16 72 MOV 8(X10), X17 73 MOV 8(X12), X18 74 BEQ X15, X16, loop32a 75 JMP cmp8a 76 loop32a: 77 BEQ X17, X18, loop32b 78 JMP cmp8b 79 loop32b: 80 MOV 16(X10), X15 81 MOV 16(X12), X16 82 MOV 24(X10), X17 83 MOV 24(X12), X18 84 BEQ X15, X16, loop32c 85 JMP cmp8a 86 loop32c: 87 BEQ X17, X18, loop32d 88 JMP cmp8b 89 loop32d: 90 ADD $32, X10 91 ADD $32, X12 92 ADD $-32, X5 93 BGE X5, X7, loop32 94 BEQZ X5, cmp_len 95 96 loop16_check: 97 MOV $16, X6 98 BLT X5, X6, loop4_check 99 loop16: 100 MOV 0(X10), X15 101 MOV 0(X12), X16 102 MOV 8(X10), X17 103 MOV 8(X12), X18 104 BEQ X15, X16, loop16a 105 JMP cmp8a 106 loop16a: 107 BEQ X17, X18, loop16b 108 JMP cmp8b 109 loop16b: 110 ADD $16, X10 111 ADD $16, X12 112 ADD $-16, X5 113 BGE X5, X6, loop16 114 BEQZ X5, cmp_len 115 116 loop4_check: 117 MOV $4, X6 118 BLT X5, X6, loop1 119 loop4: 120 MOVBU 0(X10), X8 121 MOVBU 0(X12), X9 122 MOVBU 1(X10), X15 123 MOVBU 1(X12), X16 124 BEQ X8, X9, loop4a 125 SLTU X9, X8, X5 126 SLTU X8, X9, X6 127 JMP cmp_ret 128 loop4a: 129 BEQ X15, X16, loop4b 130 SLTU X16, X15, X5 131 SLTU X15, X16, X6 132 JMP cmp_ret 133 loop4b: 134 MOVBU 2(X10), X21 135 MOVBU 2(X12), X22 136 MOVBU 3(X10), X23 137 MOVBU 3(X12), X24 138 BEQ X21, X22, loop4c 139 SLTU X22, X21, X5 140 SLTU X21, X22, X6 141 JMP cmp_ret 142 loop4c: 143 BEQ X23, X24, loop4d 144 SLTU X24, X23, X5 145 SLTU X23, X24, X6 146 JMP cmp_ret 147 loop4d: 148 ADD $4, X10 149 ADD $4, X12 150 ADD $-4, X5 151 BGE X5, X6, loop4 152 153 loop1: 154 BEQZ X5, cmp_len 155 MOVBU 0(X10), X8 156 MOVBU 0(X12), X9 157 BNE X8, X9, cmp 158 ADD $1, X10 159 ADD $1, X12 160 ADD $-1, X5 161 JMP loop1 162 163 // Compare 8 bytes of memory in X15/X16 that are known to differ. 164 cmp8a: 165 MOV $0xff, X19 166 cmp8a_loop: 167 AND X15, X19, X8 168 AND X16, X19, X9 169 BNE X8, X9, cmp 170 SLLI $8, X19 171 JMP cmp8a_loop 172 173 // Compare 8 bytes of memory in X17/X18 that are known to differ. 174 cmp8b: 175 MOV $0xff, X19 176 cmp8b_loop: 177 AND X17, X19, X8 178 AND X18, X19, X9 179 BNE X8, X9, cmp 180 SLLI $8, X19 181 JMP cmp8b_loop 182 183 cmp_len: 184 MOV X11, X8 185 MOV X13, X9 186 cmp: 187 SLTU X9, X8, X5 188 SLTU X8, X9, X6 189 cmp_ret: 190 SUB X5, X6, X10 191 RET