github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/index_arm64.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2018 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build pcz && arm64 9 10 #include "textflag.h" 11 12 TEXT ·indexSlice(SB),NOSPLIT,$0-56 13 MOVD a_base+0(FP), R0 14 MOVD a_len+8(FP), R1 15 MOVD b_base+24(FP), R2 16 MOVD b_len+32(FP), R3 17 MOVD $ret+48(FP), R9 18 B indexbody<>(SB) 19 20 TEXT ·index(SB),NOSPLIT,$0-40 21 MOVD a_base+0(FP), R0 22 MOVD a_len+8(FP), R1 23 MOVD b_base+16(FP), R2 24 MOVD b_len+24(FP), R3 25 MOVD $ret+32(FP), R9 26 B indexbody<>(SB) 27 28 // input: 29 // R0: haystack 30 // R1: length of haystack 31 // R2: needle 32 // R3: length of needle (2 <= len <= 32) 33 // R9: address to put result 34 TEXT indexbody<>(SB),NOSPLIT,$0-56 35 // main idea is to load 'sep' into separate register(s) 36 // to avoid repeatedly re-load it again and again 37 // for sebsequent substring comparisons 38 SUB R3, R1, R4 39 // R4 contains the start of last substring for comparison 40 ADD R0, R4, R4 41 ADD $1, R0, R8 42 43 CMP $8, R3 44 BHI greater_8 45 TBZ $3, R3, len_2_7 46 len_8: 47 // R5 contains 8-byte of sep 48 MOVD (R2), R5 49 loop_8: 50 // R6 contains substring for comparison 51 CMP R4, R0 52 BHI not_found 53 MOVD.P 1(R0), R6 54 CMP R5, R6 55 BNE loop_8 56 B found 57 len_2_7: 58 TBZ $2, R3, len_2_3 59 TBZ $1, R3, len_4_5 60 TBZ $0, R3, len_6 61 len_7: 62 // R5 and R6 contain 7-byte of sep 63 MOVWU (R2), R5 64 // 1-byte overlap with R5 65 MOVWU 3(R2), R6 66 loop_7: 67 CMP R4, R0 68 BHI not_found 69 MOVWU.P 1(R0), R3 70 CMP R5, R3 71 BNE loop_7 72 MOVWU 2(R0), R3 73 CMP R6, R3 74 BNE loop_7 75 B found 76 len_6: 77 // R5 and R6 contain 6-byte of sep 78 MOVWU (R2), R5 79 MOVHU 4(R2), R6 80 loop_6: 81 CMP R4, R0 82 BHI not_found 83 MOVWU.P 1(R0), R3 84 CMP R5, R3 85 BNE loop_6 86 MOVHU 3(R0), R3 87 CMP R6, R3 88 BNE loop_6 89 B found 90 len_4_5: 91 TBZ $0, R3, len_4 92 len_5: 93 // R5 and R7 contain 5-byte of sep 94 MOVWU (R2), R5 95 MOVBU 4(R2), R7 96 loop_5: 97 CMP R4, R0 98 BHI not_found 99 MOVWU.P 1(R0), R3 100 CMP R5, R3 101 BNE loop_5 102 MOVBU 3(R0), R3 103 CMP R7, R3 104 BNE loop_5 105 B found 106 len_4: 107 // R5 contains 4-byte of sep 108 MOVWU (R2), R5 109 loop_4: 110 CMP R4, R0 111 BHI not_found 112 MOVWU.P 1(R0), R6 113 CMP R5, R6 114 BNE loop_4 115 B found 116 len_2_3: 117 TBZ $0, R3, len_2 118 len_3: 119 // R6 and R7 contain 3-byte of sep 120 MOVHU (R2), R6 121 MOVBU 2(R2), R7 122 loop_3: 123 CMP R4, R0 124 BHI not_found 125 MOVHU.P 1(R0), R3 126 CMP R6, R3 127 BNE loop_3 128 MOVBU 1(R0), R3 129 CMP R7, R3 130 BNE loop_3 131 B found 132 len_2: 133 // R5 contains 2-byte of sep 134 MOVHU (R2), R5 135 loop_2: 136 CMP R4, R0 137 BHI not_found 138 MOVHU.P 1(R0), R6 139 CMP R5, R6 140 BNE loop_2 141 found: 142 SUB R8, R0, R0 143 MOVD R0, (R9) 144 RET 145 not_found: 146 MOVD $-1, R0 147 MOVD R0, (R9) 148 RET 149 greater_8: 150 SUB $9, R3, R11 // len(sep) - 9, offset of R0 for last 8 bytes 151 CMP $16, R3 152 BHI greater_16 153 len_9_16: 154 MOVD.P 8(R2), R5 // R5 contains the first 8-byte of sep 155 SUB $16, R3, R7 // len(sep) - 16, offset of R2 for last 8 bytes 156 MOVD (R2)(R7), R6 // R6 contains the last 8-byte of sep 157 loop_9_16: 158 // search the first 8 bytes first 159 CMP R4, R0 160 BHI not_found 161 MOVD.P 1(R0), R7 162 CMP R5, R7 163 BNE loop_9_16 164 MOVD (R0)(R11), R7 165 CMP R6, R7 // compare the last 8 bytes 166 BNE loop_9_16 167 B found 168 greater_16: 169 CMP $24, R3 170 BHI len_25_32 171 len_17_24: 172 LDP.P 16(R2), (R5, R6) // R5 and R6 contain the first 16-byte of sep 173 SUB $24, R3, R10 // len(sep) - 24 174 MOVD (R2)(R10), R7 // R7 contains the last 8-byte of sep 175 loop_17_24: 176 // search the first 16 bytes first 177 CMP R4, R0 178 BHI not_found 179 MOVD.P 1(R0), R10 180 CMP R5, R10 181 BNE loop_17_24 182 MOVD 7(R0), R10 183 CMP R6, R10 184 BNE loop_17_24 185 MOVD (R0)(R11), R10 186 CMP R7, R10 // compare the last 8 bytes 187 BNE loop_17_24 188 B found 189 len_25_32: 190 LDP.P 16(R2), (R5, R6) 191 MOVD.P 8(R2), R7 // R5, R6 and R7 contain the first 24-byte of sep 192 SUB $32, R3, R12 // len(sep) - 32 193 MOVD (R2)(R12), R10 // R10 contains the last 8-byte of sep 194 loop_25_32: 195 // search the first 24 bytes first 196 CMP R4, R0 197 BHI not_found 198 MOVD.P 1(R0), R12 199 CMP R5, R12 200 BNE loop_25_32 201 MOVD 7(R0), R12 202 CMP R6, R12 203 BNE loop_25_32 204 MOVD 15(R0), R12 205 CMP R7, R12 206 BNE loop_25_32 207 MOVD (R0)(R11), R12 208 CMP R10, R12 // compare the last 8 bytes 209 BNE loop_25_32 210 B found