github.com/bir3/gocompiler@v0.9.2202/src/internal/bytealg/compare_riscv64.s (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 9 // X10 = a_base 10 // X11 = a_len 11 // X12 = a_cap (unused) 12 // X13 = b_base (want in X12) 13 // X14 = b_len (want in X13) 14 // X15 = b_cap (unused) 15 MOV X13, X12 16 MOV X14, X13 17 JMP compare<>(SB) 18 19 TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 20 // X10 = a_base 21 // X11 = a_len 22 // X12 = b_base 23 // X13 = b_len 24 JMP compare<>(SB) 25 26 // On entry: 27 // X10 points to start of a 28 // X11 length of a 29 // X12 points to start of b 30 // X13 length of b 31 // for non-regabi X14 points to the address to store the return value (-1/0/1) 32 // for regabi the return value in X10 33 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 34 BEQ X10, X12, cmp_len 35 36 MOV X11, X5 37 BGE X13, X5, use_a_len // X5 = min(len(a), len(b)) 38 MOV X13, X5 39 use_a_len: 40 BEQZ X5, cmp_len 41 42 MOV $32, X6 43 BLT X5, X6, check8_unaligned 44 45 // Check alignment - if alignment differs we have to do one byte at a time. 46 AND $7, X10, X7 47 AND $7, X12, X8 48 BNE X7, X8, check8_unaligned 49 BEQZ X7, compare32 50 51 // Check one byte at a time until we reach 8 byte alignment. 52 SUB X7, X0, X7 53 ADD $8, X7, X7 54 SUB X7, X5, X5 55 align: 56 SUB $1, X7 57 MOVBU 0(X10), X8 58 MOVBU 0(X12), X9 59 BNE X8, X9, cmp 60 ADD $1, X10 61 ADD $1, X12 62 BNEZ X7, align 63 64 check32: 65 // X6 contains $32 66 BLT X5, X6, compare16 67 compare32: 68 MOV 0(X10), X15 69 MOV 0(X12), X16 70 MOV 8(X10), X17 71 MOV 8(X12), X18 72 BNE X15, X16, cmp8a 73 BNE X17, X18, cmp8b 74 MOV 16(X10), X15 75 MOV 16(X12), X16 76 MOV 24(X10), X17 77 MOV 24(X12), X18 78 BNE X15, X16, cmp8a 79 BNE X17, X18, cmp8b 80 ADD $32, X10 81 ADD $32, X12 82 SUB $32, X5 83 BGE X5, X6, compare32 84 BEQZ X5, cmp_len 85 86 check16: 87 MOV $16, X6 88 BLT X5, X6, check8_unaligned 89 compare16: 90 MOV 0(X10), X15 91 MOV 0(X12), X16 92 MOV 8(X10), X17 93 MOV 8(X12), X18 94 BNE X15, X16, cmp8a 95 BNE X17, X18, cmp8b 96 ADD $16, X10 97 ADD $16, X12 98 SUB $16, X5 99 BEQZ X5, cmp_len 100 101 check8_unaligned: 102 MOV $8, X6 103 BLT X5, X6, check4_unaligned 104 compare8_unaligned: 105 MOVBU 0(X10), X8 106 MOVBU 1(X10), X15 107 MOVBU 2(X10), X17 108 MOVBU 3(X10), X19 109 MOVBU 4(X10), X21 110 MOVBU 5(X10), X23 111 MOVBU 6(X10), X25 112 MOVBU 7(X10), X29 113 MOVBU 0(X12), X9 114 MOVBU 1(X12), X16 115 MOVBU 2(X12), X18 116 MOVBU 3(X12), X20 117 MOVBU 4(X12), X22 118 MOVBU 5(X12), X24 119 MOVBU 6(X12), X28 120 MOVBU 7(X12), X30 121 BNE X8, X9, cmp1a 122 BNE X15, X16, cmp1b 123 BNE X17, X18, cmp1c 124 BNE X19, X20, cmp1d 125 BNE X21, X22, cmp1e 126 BNE X23, X24, cmp1f 127 BNE X25, X28, cmp1g 128 BNE X29, X30, cmp1h 129 ADD $8, X10 130 ADD $8, X12 131 SUB $8, X5 132 BGE X5, X6, compare8_unaligned 133 BEQZ X5, cmp_len 134 135 check4_unaligned: 136 MOV $4, X6 137 BLT X5, X6, compare1 138 compare4_unaligned: 139 MOVBU 0(X10), X8 140 MOVBU 1(X10), X15 141 MOVBU 2(X10), X17 142 MOVBU 3(X10), X19 143 MOVBU 0(X12), X9 144 MOVBU 1(X12), X16 145 MOVBU 2(X12), X18 146 MOVBU 3(X12), X20 147 BNE X8, X9, cmp1a 148 BNE X15, X16, cmp1b 149 BNE X17, X18, cmp1c 150 BNE X19, X20, cmp1d 151 ADD $4, X10 152 ADD $4, X12 153 SUB $4, X5 154 BGE X5, X6, compare4_unaligned 155 156 compare1: 157 BEQZ X5, cmp_len 158 MOVBU 0(X10), X8 159 MOVBU 0(X12), X9 160 BNE X8, X9, cmp 161 ADD $1, X10 162 ADD $1, X12 163 SUB $1, X5 164 JMP compare1 165 166 // Compare 8 bytes of memory in X15/X16 that are known to differ. 167 cmp8a: 168 MOV X15, X17 169 MOV X16, X18 170 171 // Compare 8 bytes of memory in X17/X18 that are known to differ. 172 cmp8b: 173 MOV $0xff, X19 174 cmp8_loop: 175 AND X17, X19, X8 176 AND X18, X19, X9 177 BNE X8, X9, cmp 178 SLLI $8, X19 179 JMP cmp8_loop 180 181 cmp1a: 182 SLTU X9, X8, X5 183 SLTU X8, X9, X6 184 JMP cmp_ret 185 cmp1b: 186 SLTU X16, X15, X5 187 SLTU X15, X16, X6 188 JMP cmp_ret 189 cmp1c: 190 SLTU X18, X17, X5 191 SLTU X17, X18, X6 192 JMP cmp_ret 193 cmp1d: 194 SLTU X20, X19, X5 195 SLTU X19, X20, X6 196 JMP cmp_ret 197 cmp1e: 198 SLTU X22, X21, X5 199 SLTU X21, X22, X6 200 JMP cmp_ret 201 cmp1f: 202 SLTU X24, X23, X5 203 SLTU X23, X24, X6 204 JMP cmp_ret 205 cmp1g: 206 SLTU X28, X25, X5 207 SLTU X25, X28, X6 208 JMP cmp_ret 209 cmp1h: 210 SLTU X30, X29, X5 211 SLTU X29, X30, X6 212 JMP cmp_ret 213 214 cmp_len: 215 MOV X11, X8 216 MOV X13, X9 217 cmp: 218 SLTU X9, X8, X5 219 SLTU X8, X9, X6 220 cmp_ret: 221 SUB X5, X6, X10 222 RET