github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/internal/bytealg/compare_riscv64.s (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 9 // X10 = a_base 10 // X11 = a_len 11 // X12 = a_cap (unused) 12 // X13 = b_base (want in X12) 13 // X14 = b_len (want in X13) 14 // X15 = b_cap (unused) 15 MOV X13, X12 16 MOV X14, X13 17 JMP compare<>(SB) 18 19 TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 20 // X10 = a_base 21 // X11 = a_len 22 // X12 = b_base 23 // X13 = b_len 24 JMP compare<>(SB) 25 26 // On entry: 27 // X10 points to start of a 28 // X11 length of a 29 // X12 points to start of b 30 // X13 length of b 31 // for non-regabi X14 points to the address to store the return value (-1/0/1) 32 // for regabi the return value in X10 33 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 34 BEQ X10, X12, cmp_len 35 36 MOV X11, X5 37 BGE X13, X5, use_a_len // X5 = min(len(a), len(b)) 38 MOV X13, X5 39 use_a_len: 40 BEQZ X5, cmp_len 41 42 MOV $32, X6 43 BLT X5, X6, check8_unaligned 44 45 // Check alignment - if alignment differs we have to do one byte at a time. 46 AND $7, X10, X7 47 AND $7, X12, X8 48 BNE X7, X8, check8_unaligned 49 BEQZ X7, compare32 50 51 // Check one byte at a time until we reach 8 byte alignment. 52 SUB X7, X5, X5 53 align: 54 ADD $-1, X7 55 MOVBU 0(X10), X8 56 MOVBU 0(X12), X9 57 BNE X8, X9, cmp 58 ADD $1, X10 59 ADD $1, X12 60 BNEZ X7, align 61 62 check32: 63 MOV $32, X6 64 BLT X5, X6, compare16 65 compare32: 66 MOV 0(X10), X15 67 MOV 0(X12), X16 68 MOV 8(X10), X17 69 MOV 8(X12), X18 70 BNE X15, X16, cmp8a 71 BNE X17, X18, cmp8b 72 MOV 16(X10), X15 73 MOV 16(X12), X16 74 MOV 24(X10), X17 75 MOV 24(X12), X18 76 BNE X15, X16, cmp8a 77 BNE X17, X18, cmp8b 78 ADD $32, X10 79 ADD $32, X12 80 ADD $-32, X5 81 BGE X5, X6, compare32 82 BEQZ X5, cmp_len 83 84 check16: 85 MOV $16, X6 86 BLT X5, X6, check8_unaligned 87 compare16: 88 MOV 0(X10), X15 89 MOV 0(X12), X16 90 MOV 8(X10), X17 91 MOV 8(X12), X18 92 BNE X15, X16, cmp8a 93 BNE X17, X18, cmp8b 94 ADD $16, X10 95 ADD $16, X12 96 ADD $-16, X5 97 BEQZ X5, cmp_len 98 99 check8_unaligned: 100 MOV $8, X6 101 BLT X5, X6, check4_unaligned 102 compare8_unaligned: 103 MOVBU 0(X10), X8 104 MOVBU 1(X10), X15 105 MOVBU 2(X10), X17 106 MOVBU 3(X10), X19 107 MOVBU 4(X10), X21 108 MOVBU 5(X10), X23 109 MOVBU 6(X10), X25 110 MOVBU 7(X10), X29 111 MOVBU 0(X12), X9 112 MOVBU 1(X12), X16 113 MOVBU 2(X12), X18 114 MOVBU 3(X12), X20 115 MOVBU 4(X12), X22 116 MOVBU 5(X12), X24 117 MOVBU 6(X12), X28 118 MOVBU 7(X12), X30 119 BNE X8, X9, cmp1a 120 BNE X15, X16, cmp1b 121 BNE X17, X18, cmp1c 122 BNE X19, X20, cmp1d 123 BNE X21, X22, cmp1e 124 BNE X23, X24, cmp1f 125 BNE X25, X28, cmp1g 126 BNE X29, X30, cmp1h 127 ADD $8, X10 128 ADD $8, X12 129 ADD $-8, X5 130 BGE X5, X6, compare8_unaligned 131 BEQZ X5, cmp_len 132 133 check4_unaligned: 134 MOV $4, X6 135 BLT X5, X6, compare1 136 compare4_unaligned: 137 MOVBU 0(X10), X8 138 MOVBU 1(X10), X15 139 MOVBU 2(X10), X17 140 MOVBU 3(X10), X19 141 MOVBU 0(X12), X9 142 MOVBU 1(X12), X16 143 MOVBU 2(X12), X18 144 MOVBU 3(X12), X20 145 BNE X8, X9, cmp1a 146 BNE X15, X16, cmp1b 147 BNE X17, X18, cmp1c 148 BNE X19, X20, cmp1d 149 ADD $4, X10 150 ADD $4, X12 151 ADD $-4, X5 152 BGE X5, X6, compare4_unaligned 153 154 compare1: 155 BEQZ X5, cmp_len 156 MOVBU 0(X10), X8 157 MOVBU 0(X12), X9 158 BNE X8, X9, cmp 159 ADD $1, X10 160 ADD $1, X12 161 ADD $-1, X5 162 JMP compare1 163 164 // Compare 8 bytes of memory in X15/X16 that are known to differ. 165 cmp8a: 166 MOV X15, X17 167 MOV X16, X18 168 169 // Compare 8 bytes of memory in X17/X18 that are known to differ. 170 cmp8b: 171 MOV $0xff, X19 172 cmp8_loop: 173 AND X17, X19, X8 174 AND X18, X19, X9 175 BNE X8, X9, cmp 176 SLLI $8, X19 177 JMP cmp8_loop 178 179 cmp1a: 180 SLTU X9, X8, X5 181 SLTU X8, X9, X6 182 JMP cmp_ret 183 cmp1b: 184 SLTU X16, X15, X5 185 SLTU X15, X16, X6 186 JMP cmp_ret 187 cmp1c: 188 SLTU X18, X17, X5 189 SLTU X17, X18, X6 190 JMP cmp_ret 191 cmp1d: 192 SLTU X20, X19, X5 193 SLTU X19, X20, X6 194 JMP cmp_ret 195 cmp1e: 196 SLTU X22, X21, X5 197 SLTU X21, X22, X6 198 JMP cmp_ret 199 cmp1f: 200 SLTU X24, X23, X5 201 SLTU X23, X24, X6 202 JMP cmp_ret 203 cmp1g: 204 SLTU X28, X25, X5 205 SLTU X25, X28, X6 206 JMP cmp_ret 207 cmp1h: 208 SLTU X30, X29, X5 209 SLTU X29, X30, X6 210 JMP cmp_ret 211 212 cmp_len: 213 MOV X11, X8 214 MOV X13, X9 215 cmp: 216 SLTU X9, X8, X5 217 SLTU X8, X9, X6 218 cmp_ret: 219 SUB X5, X6, X10 220 RET