github.com/JimmyHuang454/JLS-go@v0.0.0-20230831150107-90d536585ba0/internal/bytealg/compare_riscv64.s (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 9 // X10 = a_base 10 // X11 = a_len 11 // X12 = a_cap (unused) 12 // X13 = b_base (want in X12) 13 // X14 = b_len (want in X13) 14 // X15 = b_cap (unused) 15 MOV X13, X12 16 MOV X14, X13 17 JMP compare<>(SB) 18 19 TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 20 // X10 = a_base 21 // X11 = a_len 22 // X12 = b_base 23 // X13 = b_len 24 JMP compare<>(SB) 25 26 // On entry: 27 // X10 points to start of a 28 // X11 length of a 29 // X12 points to start of b 30 // X13 length of b 31 // for non-regabi X14 points to the address to store the return value (-1/0/1) 32 // for regabi the return value in X10 33 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 34 BEQ X10, X12, cmp_len 35 36 MOV X11, X5 37 BGE X13, X5, use_a_len // X5 = min(len(a), len(b)) 38 MOV X13, X5 39 use_a_len: 40 BEQZ X5, cmp_len 41 42 MOV $32, X6 43 BLT X5, X6, loop4_check 44 45 // Check alignment - if alignment differs we have to do one byte at a time. 46 AND $7, X10, X7 47 AND $7, X12, X8 48 BNE X7, X8, loop4_check 49 BEQZ X7, loop32_check 50 51 // Check one byte at a time until we reach 8 byte alignment. 52 SUB X7, X5, X5 53 align: 54 ADD $-1, X7 55 MOVBU 0(X10), X8 56 MOVBU 0(X12), X9 57 BNE X8, X9, cmp 58 ADD $1, X10 59 ADD $1, X12 60 BNEZ X7, align 61 62 loop32_check: 63 MOV $32, X7 64 BLT X5, X7, loop16_check 65 loop32: 66 MOV 0(X10), X15 67 MOV 0(X12), X16 68 MOV 8(X10), X17 69 MOV 8(X12), X18 70 BEQ X15, X16, loop32a 71 JMP cmp8a 72 loop32a: 73 BEQ X17, X18, loop32b 74 JMP cmp8b 75 loop32b: 76 MOV 16(X10), X15 77 MOV 16(X12), X16 78 MOV 24(X10), X17 79 MOV 24(X12), X18 80 BEQ X15, X16, loop32c 81 JMP cmp8a 82 loop32c: 83 BEQ X17, X18, loop32d 84 JMP cmp8b 85 loop32d: 86 ADD $32, X10 87 ADD $32, X12 88 ADD $-32, X5 89 BGE X5, X7, loop32 90 BEQZ X5, cmp_len 91 92 loop16_check: 93 MOV $16, X6 94 BLT X5, X6, loop4_check 95 loop16: 96 MOV 0(X10), X15 97 MOV 0(X12), X16 98 MOV 8(X10), X17 99 MOV 8(X12), X18 100 BEQ X15, X16, loop16a 101 JMP cmp8a 102 loop16a: 103 BEQ X17, X18, loop16b 104 JMP cmp8b 105 loop16b: 106 ADD $16, X10 107 ADD $16, X12 108 ADD $-16, X5 109 BGE X5, X6, loop16 110 BEQZ X5, cmp_len 111 112 loop4_check: 113 MOV $4, X6 114 BLT X5, X6, loop1 115 loop4: 116 MOVBU 0(X10), X8 117 MOVBU 0(X12), X9 118 MOVBU 1(X10), X15 119 MOVBU 1(X12), X16 120 BEQ X8, X9, loop4a 121 SLTU X9, X8, X5 122 SLTU X8, X9, X6 123 JMP cmp_ret 124 loop4a: 125 BEQ X15, X16, loop4b 126 SLTU X16, X15, X5 127 SLTU X15, X16, X6 128 JMP cmp_ret 129 loop4b: 130 MOVBU 2(X10), X21 131 MOVBU 2(X12), X22 132 MOVBU 3(X10), X23 133 MOVBU 3(X12), X24 134 BEQ X21, X22, loop4c 135 SLTU X22, X21, X5 136 SLTU X21, X22, X6 137 JMP cmp_ret 138 loop4c: 139 BEQ X23, X24, loop4d 140 SLTU X24, X23, X5 141 SLTU X23, X24, X6 142 JMP cmp_ret 143 loop4d: 144 ADD $4, X10 145 ADD $4, X12 146 ADD $-4, X5 147 BGE X5, X6, loop4 148 149 loop1: 150 BEQZ X5, cmp_len 151 MOVBU 0(X10), X8 152 MOVBU 0(X12), X9 153 BNE X8, X9, cmp 154 ADD $1, X10 155 ADD $1, X12 156 ADD $-1, X5 157 JMP loop1 158 159 // Compare 8 bytes of memory in X15/X16 that are known to differ. 160 cmp8a: 161 MOV $0xff, X19 162 cmp8a_loop: 163 AND X15, X19, X8 164 AND X16, X19, X9 165 BNE X8, X9, cmp 166 SLLI $8, X19 167 JMP cmp8a_loop 168 169 // Compare 8 bytes of memory in X17/X18 that are known to differ. 170 cmp8b: 171 MOV $0xff, X19 172 cmp8b_loop: 173 AND X17, X19, X8 174 AND X18, X19, X9 175 BNE X8, X9, cmp 176 SLLI $8, X19 177 JMP cmp8b_loop 178 179 cmp_len: 180 MOV X11, X8 181 MOV X13, X9 182 cmp: 183 SLTU X9, X8, X5 184 SLTU X8, X9, X6 185 cmp_ret: 186 SUB X5, X6, X10 187 RET