github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/index_s390x.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 // Caller must confirm availability of vx facility before calling. 9 TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56 10 LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s) 11 LMG b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep) 12 MOVD $ret+48(FP), R5 13 BR indexbody<>(SB) 14 15 // Caller must confirm availability of vx facility before calling. 16 TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40 17 LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s) 18 LMG b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep) 19 MOVD $ret+32(FP), R5 20 BR indexbody<>(SB) 21 22 // s: string we are searching 23 // sep: string to search for 24 // R1=&s[0], R2=len(s) 25 // R3=&sep[0], R4=len(sep) 26 // R5=&ret (int) 27 // Caller must confirm availability of vx facility before calling. 28 TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0 29 CMPBGT R4, R2, notfound 30 ADD R1, R2 31 SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index) 32 CMPBEQ R4, $0, notfound 33 SUB $1, R4 // R4=len(sep)-1 for use as VLL index 34 VLL R4, (R3), V0 // contains first 16 bytes of sep 35 MOVD R1, R7 36 index2plus: 37 CMPBNE R4, $1, index3plus 38 MOVD $15(R7), R9 39 CMPBGE R9, R2, index2to16 40 VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00... 41 VONE V16 42 VREPH $0, V0, V1 43 CMPBGE R9, R2, index2to16 44 index2loop: 45 VL 0(R7), V2 // 16 bytes, even indices 46 VL 1(R7), V4 // 16 bytes, odd indices 47 VCEQH V1, V2, V5 // compare even indices 48 VCEQH V1, V4, V6 // compare odd indices 49 VSEL V5, V6, V31, V7 // merge even and odd indices 50 VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found 51 BLT foundV17 52 MOVD $16(R7), R7 // R7+=16 53 ADD $15, R7, R9 54 CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search) 55 CMPBLE R7, R2, index2to16 56 BR notfound 57 58 index3plus: 59 CMPBNE R4, $2, index4plus 60 ADD $15, R7, R9 61 CMPBGE R9, R2, index2to16 62 MOVD $1, R0 63 VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00... 64 VONE V16 65 VREPH $0, V0, V1 66 VREPB $2, V0, V8 67 index3loop: 68 VL (R7), V2 // load 16-bytes into V2 69 VLL R0, 16(R7), V3 // load 2-bytes into V3 70 VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1 71 VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2 72 VCEQH V1, V2, V5 // compare 2-byte even indices 73 VCEQH V1, V4, V6 // compare 2-byte odd indices 74 VCEQB V8, V9, V10 // compare last bytes 75 VSEL V5, V6, V31, V7 // merge even and odd indices 76 VN V7, V10, V7 // AND indices with last byte 77 VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found 78 BLT foundV17 79 MOVD $16(R7), R7 // R7+=16 80 ADD $15, R7, R9 81 CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search) 82 CMPBLE R7, R2, index2to16 83 BR notfound 84 85 index4plus: 86 CMPBNE R4, $3, index5plus 87 ADD $15, R7, R9 88 CMPBGE R9, R2, index2to16 89 MOVD $2, R0 90 VGBM $0x8888, V29 // 0xff000000ff000000... 91 VGBM $0x2222, V30 // 0x0000ff000000ff00... 92 VGBM $0xcccc, V31 // 0xffff0000ffff0000... 93 VONE V16 94 VREPF $0, V0, V1 95 index4loop: 96 VL (R7), V2 // load 16-bytes into V2 97 VLL R0, 16(R7), V3 // load 3-bytes into V3 98 VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1 99 VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1 100 VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1 101 VCEQF V1, V2, V5 // compare index 0, 4, ... 102 VCEQF V1, V4, V6 // compare index 1, 5, ... 103 VCEQF V1, V9, V11 // compare index 2, 6, ... 104 VCEQF V1, V10, V12 // compare index 3, 7, ... 105 VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ... 106 VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ... 107 VSEL V13, V14, V31, V7 // final merge 108 VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found 109 BLT foundV17 110 MOVD $16(R7), R7 // R7+=16 111 ADD $15, R7, R9 112 CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search) 113 CMPBLE R7, R2, index2to16 114 BR notfound 115 116 index5plus: 117 CMPBGT R4, $15, index17plus 118 index2to16: 119 CMPBGT R7, R2, notfound 120 MOVD $1(R7), R8 121 CMPBGT R8, R2, index2to16tail 122 index2to16loop: 123 // unrolled 2x 124 VLL R4, (R7), V1 125 VLL R4, 1(R7), V2 126 VCEQGS V0, V1, V3 127 BEQ found 128 MOVD $1(R7), R7 129 VCEQGS V0, V2, V4 130 BEQ found 131 MOVD $1(R7), R7 132 CMPBLT R7, R2, index2to16loop 133 CMPBGT R7, R2, notfound 134 index2to16tail: 135 VLL R4, (R7), V1 136 VCEQGS V0, V1, V2 137 BEQ found 138 BR notfound 139 140 index17plus: 141 CMPBGT R4, $31, index33plus 142 SUB $16, R4, R0 143 VLL R0, 16(R3), V1 144 VONE V7 145 index17to32loop: 146 VL (R7), V2 147 VLL R0, 16(R7), V3 148 VCEQG V0, V2, V4 149 VCEQG V1, V3, V5 150 VN V4, V5, V6 151 VCEQGS V6, V7, V8 152 BEQ found 153 MOVD $1(R7), R7 154 CMPBLE R7, R2, index17to32loop 155 BR notfound 156 157 index33plus: 158 CMPBGT R4, $47, index49plus 159 SUB $32, R4, R0 160 VL 16(R3), V1 161 VLL R0, 32(R3), V2 162 VONE V11 163 index33to48loop: 164 VL (R7), V3 165 VL 16(R7), V4 166 VLL R0, 32(R7), V5 167 VCEQG V0, V3, V6 168 VCEQG V1, V4, V7 169 VCEQG V2, V5, V8 170 VN V6, V7, V9 171 VN V8, V9, V10 172 VCEQGS V10, V11, V12 173 BEQ found 174 MOVD $1(R7), R7 175 CMPBLE R7, R2, index33to48loop 176 BR notfound 177 178 index49plus: 179 CMPBGT R4, $63, index65plus 180 SUB $48, R4, R0 181 VL 16(R3), V1 182 VL 32(R3), V2 183 VLL R0, 48(R3), V3 184 VONE V15 185 index49to64loop: 186 VL (R7), V4 187 VL 16(R7), V5 188 VL 32(R7), V6 189 VLL R0, 48(R7), V7 190 VCEQG V0, V4, V8 191 VCEQG V1, V5, V9 192 VCEQG V2, V6, V10 193 VCEQG V3, V7, V11 194 VN V8, V9, V12 195 VN V10, V11, V13 196 VN V12, V13, V14 197 VCEQGS V14, V15, V16 198 BEQ found 199 MOVD $1(R7), R7 200 CMPBLE R7, R2, index49to64loop 201 notfound: 202 MOVD $-1, (R5) 203 RET 204 205 index65plus: 206 // not implemented 207 MOVD $0, (R0) 208 RET 209 210 foundV17: // index is in doubleword V17[0] 211 VLGVG $0, V17, R8 212 ADD R8, R7 213 found: 214 SUB R1, R7 215 MOVD R7, (R5) 216 RET