github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/index_s390x.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2018 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build pcz && s390x 9 10 #include "textflag.h" 11 12 // Caller must confirm availability of vx facility before calling. 13 TEXT ·indexSlice(SB),NOSPLIT|NOFRAME,$0-56 14 LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s) 15 LMG b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep) 16 MOVD $ret+48(FP), R5 17 BR indexbody<>(SB) 18 19 // Caller must confirm availability of vx facility before calling. 20 TEXT ·index(SB),NOSPLIT|NOFRAME,$0-40 21 LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s) 22 LMG b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep) 23 MOVD $ret+32(FP), R5 24 BR indexbody<>(SB) 25 26 // s: string we are searching 27 // sep: string to search for 28 // R1=&s[0], R2=len(s) 29 // R3=&sep[0], R4=len(sep) 30 // R5=&ret (int) 31 // Caller must confirm availability of vx facility before calling. 32 TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0 33 CMPBGT R4, R2, notfound 34 ADD R1, R2 35 SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index) 36 CMPBEQ R4, $0, notfound 37 SUB $1, R4 // R4=len(sep)-1 for use as VLL index 38 VLL R4, (R3), V0 // contains first 16 bytes of sep 39 MOVD R1, R7 40 index2plus: 41 CMPBNE R4, $1, index3plus 42 MOVD $15(R7), R9 43 CMPBGE R9, R2, index2to16 44 VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00... 45 VONE V16 46 VREPH $0, V0, V1 47 CMPBGE R9, R2, index2to16 48 index2loop: 49 VL 0(R7), V2 // 16 bytes, even indices 50 VL 1(R7), V4 // 16 bytes, odd indices 51 VCEQH V1, V2, V5 // compare even indices 52 VCEQH V1, V4, V6 // compare odd indices 53 VSEL V5, V6, V31, V7 // merge even and odd indices 54 VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found 55 BLT foundV17 56 MOVD $16(R7), R7 // R7+=16 57 ADD $15, R7, R9 58 CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search) 59 CMPBLE R7, R2, index2to16 60 BR notfound 61 62 index3plus: 63 CMPBNE R4, $2, index4plus 64 ADD $15, R7, R9 65 CMPBGE R9, R2, index2to16 66 MOVD $1, R0 67 VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00... 68 VONE V16 69 VREPH $0, V0, V1 70 VREPB $2, V0, V8 71 index3loop: 72 VL (R7), V2 // load 16-bytes into V2 73 VLL R0, 16(R7), V3 // load 2-bytes into V3 74 VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1 75 VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2 76 VCEQH V1, V2, V5 // compare 2-byte even indices 77 VCEQH V1, V4, V6 // compare 2-byte odd indices 78 VCEQB V8, V9, V10 // compare last bytes 79 VSEL V5, V6, V31, V7 // merge even and odd indices 80 VN V7, V10, V7 // AND indices with last byte 81 VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found 82 BLT foundV17 83 MOVD $16(R7), R7 // R7+=16 84 ADD $15, R7, R9 85 CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search) 86 CMPBLE R7, R2, index2to16 87 BR notfound 88 89 index4plus: 90 CMPBNE R4, $3, index5plus 91 ADD $15, R7, R9 92 CMPBGE R9, R2, index2to16 93 MOVD $2, R0 94 VGBM $0x8888, V29 // 0xff000000ff000000... 95 VGBM $0x2222, V30 // 0x0000ff000000ff00... 96 VGBM $0xcccc, V31 // 0xffff0000ffff0000... 97 VONE V16 98 VREPF $0, V0, V1 99 index4loop: 100 VL (R7), V2 // load 16-bytes into V2 101 VLL R0, 16(R7), V3 // load 3-bytes into V3 102 VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1 103 VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1 104 VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1 105 VCEQF V1, V2, V5 // compare index 0, 4, ... 106 VCEQF V1, V4, V6 // compare index 1, 5, ... 107 VCEQF V1, V9, V11 // compare index 2, 6, ... 108 VCEQF V1, V10, V12 // compare index 3, 7, ... 109 VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ... 110 VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ... 111 VSEL V13, V14, V31, V7 // final merge 112 VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found 113 BLT foundV17 114 MOVD $16(R7), R7 // R7+=16 115 ADD $15, R7, R9 116 CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search) 117 CMPBLE R7, R2, index2to16 118 BR notfound 119 120 index5plus: 121 CMPBGT R4, $15, index17plus 122 index2to16: 123 CMPBGT R7, R2, notfound 124 MOVD $1(R7), R8 125 CMPBGT R8, R2, index2to16tail 126 index2to16loop: 127 // unrolled 2x 128 VLL R4, (R7), V1 129 VLL R4, 1(R7), V2 130 VCEQGS V0, V1, V3 131 BEQ found 132 MOVD $1(R7), R7 133 VCEQGS V0, V2, V4 134 BEQ found 135 MOVD $1(R7), R7 136 CMPBLT R7, R2, index2to16loop 137 CMPBGT R7, R2, notfound 138 index2to16tail: 139 VLL R4, (R7), V1 140 VCEQGS V0, V1, V2 141 BEQ found 142 BR notfound 143 144 index17plus: 145 CMPBGT R4, $31, index33plus 146 SUB $16, R4, R0 147 VLL R0, 16(R3), V1 148 VONE V7 149 index17to32loop: 150 VL (R7), V2 151 VLL R0, 16(R7), V3 152 VCEQG V0, V2, V4 153 VCEQG V1, V3, V5 154 VN V4, V5, V6 155 VCEQGS V6, V7, V8 156 BEQ found 157 MOVD $1(R7), R7 158 CMPBLE R7, R2, index17to32loop 159 BR notfound 160 161 index33plus: 162 CMPBGT R4, $47, index49plus 163 SUB $32, R4, R0 164 VL 16(R3), V1 165 VLL R0, 32(R3), V2 166 VONE V11 167 index33to48loop: 168 VL (R7), V3 169 VL 16(R7), V4 170 VLL R0, 32(R7), V5 171 VCEQG V0, V3, V6 172 VCEQG V1, V4, V7 173 VCEQG V2, V5, V8 174 VN V6, V7, V9 175 VN V8, V9, V10 176 VCEQGS V10, V11, V12 177 BEQ found 178 MOVD $1(R7), R7 179 CMPBLE R7, R2, index33to48loop 180 BR notfound 181 182 index49plus: 183 CMPBGT R4, $63, index65plus 184 SUB $48, R4, R0 185 VL 16(R3), V1 186 VL 32(R3), V2 187 VLL R0, 48(R3), V3 188 VONE V15 189 index49to64loop: 190 VL (R7), V4 191 VL 16(R7), V5 192 VL 32(R7), V6 193 VLL R0, 48(R7), V7 194 VCEQG V0, V4, V8 195 VCEQG V1, V5, V9 196 VCEQG V2, V6, V10 197 VCEQG V3, V7, V11 198 VN V8, V9, V12 199 VN V10, V11, V13 200 VN V12, V13, V14 201 VCEQGS V14, V15, V16 202 BEQ found 203 MOVD $1(R7), R7 204 CMPBLE R7, R2, index49to64loop 205 notfound: 206 MOVD $-1, (R5) 207 RET 208 209 index65plus: 210 // not implemented 211 MOVD $0, (R0) 212 RET 213 214 foundV17: // index is in doubleword V17[0] 215 VLGVG $0, V17, R8 216 ADD R8, R7 217 found: 218 SUB R1, R7 219 MOVD R7, (R5) 220 RET