github.com/bir3/gocompiler@v0.9.2202/src/internal/bytealg/index_arm64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Index(SB),NOSPLIT,$0-56 9 MOVD a_base+0(FP), R0 10 MOVD a_len+8(FP), R1 11 MOVD b_base+24(FP), R2 12 MOVD b_len+32(FP), R3 13 MOVD $ret+48(FP), R9 14 B indexbody<>(SB) 15 16 TEXT ·IndexString(SB),NOSPLIT,$0-40 17 MOVD a_base+0(FP), R0 18 MOVD a_len+8(FP), R1 19 MOVD b_base+16(FP), R2 20 MOVD b_len+24(FP), R3 21 MOVD $ret+32(FP), R9 22 B indexbody<>(SB) 23 24 // input: 25 // R0: haystack 26 // R1: length of haystack 27 // R2: needle 28 // R3: length of needle (2 <= len <= 32) 29 // R9: address to put result 30 TEXT indexbody<>(SB),NOSPLIT,$0-56 31 // main idea is to load 'sep' into separate register(s) 32 // to avoid repeatedly re-load it again and again 33 // for sebsequent substring comparisons 34 SUB R3, R1, R4 35 // R4 contains the start of last substring for comparison 36 ADD R0, R4, R4 37 ADD $1, R0, R8 38 39 CMP $8, R3 40 BHI greater_8 41 TBZ $3, R3, len_2_7 42 len_8: 43 // R5 contains 8-byte of sep 44 MOVD (R2), R5 45 loop_8: 46 // R6 contains substring for comparison 47 CMP R4, R0 48 BHI not_found 49 MOVD.P 1(R0), R6 50 CMP R5, R6 51 BNE loop_8 52 B found 53 len_2_7: 54 TBZ $2, R3, len_2_3 55 TBZ $1, R3, len_4_5 56 TBZ $0, R3, len_6 57 len_7: 58 // R5 and R6 contain 7-byte of sep 59 MOVWU (R2), R5 60 // 1-byte overlap with R5 61 MOVWU 3(R2), R6 62 loop_7: 63 CMP R4, R0 64 BHI not_found 65 MOVWU.P 1(R0), R3 66 CMP R5, R3 67 BNE loop_7 68 MOVWU 2(R0), R3 69 CMP R6, R3 70 BNE loop_7 71 B found 72 len_6: 73 // R5 and R6 contain 6-byte of sep 74 MOVWU (R2), R5 75 MOVHU 4(R2), R6 76 loop_6: 77 CMP R4, R0 78 BHI not_found 79 MOVWU.P 1(R0), R3 80 CMP R5, R3 81 BNE loop_6 82 MOVHU 3(R0), R3 83 CMP R6, R3 84 BNE loop_6 85 B found 86 len_4_5: 87 TBZ $0, R3, len_4 88 len_5: 89 // R5 and R7 contain 5-byte of sep 90 MOVWU (R2), R5 91 MOVBU 4(R2), R7 92 loop_5: 93 CMP R4, R0 94 BHI not_found 95 MOVWU.P 1(R0), R3 96 CMP R5, R3 97 BNE loop_5 98 MOVBU 3(R0), R3 99 CMP R7, R3 100 BNE loop_5 101 B found 102 len_4: 103 // R5 contains 4-byte of sep 104 MOVWU (R2), R5 105 loop_4: 106 CMP R4, R0 107 BHI not_found 108 MOVWU.P 1(R0), R6 109 CMP R5, R6 110 BNE loop_4 111 B found 112 len_2_3: 113 TBZ $0, R3, len_2 114 len_3: 115 // R6 and R7 contain 3-byte of sep 116 MOVHU (R2), R6 117 MOVBU 2(R2), R7 118 loop_3: 119 CMP R4, R0 120 BHI not_found 121 MOVHU.P 1(R0), R3 122 CMP R6, R3 123 BNE loop_3 124 MOVBU 1(R0), R3 125 CMP R7, R3 126 BNE loop_3 127 B found 128 len_2: 129 // R5 contains 2-byte of sep 130 MOVHU (R2), R5 131 loop_2: 132 CMP R4, R0 133 BHI not_found 134 MOVHU.P 1(R0), R6 135 CMP R5, R6 136 BNE loop_2 137 found: 138 SUB R8, R0, R0 139 MOVD R0, (R9) 140 RET 141 not_found: 142 MOVD $-1, R0 143 MOVD R0, (R9) 144 RET 145 greater_8: 146 SUB $9, R3, R11 // len(sep) - 9, offset of R0 for last 8 bytes 147 CMP $16, R3 148 BHI greater_16 149 len_9_16: 150 MOVD.P 8(R2), R5 // R5 contains the first 8-byte of sep 151 SUB $16, R3, R7 // len(sep) - 16, offset of R2 for last 8 bytes 152 MOVD (R2)(R7), R6 // R6 contains the last 8-byte of sep 153 loop_9_16: 154 // search the first 8 bytes first 155 CMP R4, R0 156 BHI not_found 157 MOVD.P 1(R0), R7 158 CMP R5, R7 159 BNE loop_9_16 160 MOVD (R0)(R11), R7 161 CMP R6, R7 // compare the last 8 bytes 162 BNE loop_9_16 163 B found 164 greater_16: 165 CMP $24, R3 166 BHI len_25_32 167 len_17_24: 168 LDP.P 16(R2), (R5, R6) // R5 and R6 contain the first 16-byte of sep 169 SUB $24, R3, R10 // len(sep) - 24 170 MOVD (R2)(R10), R7 // R7 contains the last 8-byte of sep 171 loop_17_24: 172 // search the first 16 bytes first 173 CMP R4, R0 174 BHI not_found 175 MOVD.P 1(R0), R10 176 CMP R5, R10 177 BNE loop_17_24 178 MOVD 7(R0), R10 179 CMP R6, R10 180 BNE loop_17_24 181 MOVD (R0)(R11), R10 182 CMP R7, R10 // compare the last 8 bytes 183 BNE loop_17_24 184 B found 185 len_25_32: 186 LDP.P 16(R2), (R5, R6) 187 MOVD.P 8(R2), R7 // R5, R6 and R7 contain the first 24-byte of sep 188 SUB $32, R3, R12 // len(sep) - 32 189 MOVD (R2)(R12), R10 // R10 contains the last 8-byte of sep 190 loop_25_32: 191 // search the first 24 bytes first 192 CMP R4, R0 193 BHI not_found 194 MOVD.P 1(R0), R12 195 CMP R5, R12 196 BNE loop_25_32 197 MOVD 7(R0), R12 198 CMP R6, R12 199 BNE loop_25_32 200 MOVD 15(R0), R12 201 CMP R7, R12 202 BNE loop_25_32 203 MOVD (R0)(R11), R12 204 CMP R10, R12 // compare the last 8 bytes 205 BNE loop_25_32 206 B found