github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/indexbyte_s390x.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2018 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build pcz && s390x 9 10 #include "textflag.h" 11 12 TEXT ·IndexSliceByte(SB),NOSPLIT|NOFRAME,$0-40 13 MOVD b_base+0(FP), R3// b_base => R3 14 MOVD b_len+8(FP), R4 // b_len => R4 15 MOVBZ c+24(FP), R5 // c => R5 16 MOVD $ret+32(FP), R2 // &ret => R9 17 BR indexbytebody<>(SB) 18 19 TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-32 20 MOVD s_base+0(FP), R3// s_base => R3 21 MOVD s_len+8(FP), R4 // s_len => R4 22 MOVBZ c+16(FP), R5 // c => R5 23 MOVD $ret+24(FP), R2 // &ret => R9 24 BR indexbytebody<>(SB) 25 26 // input: 27 // R3: s 28 // R4: s_len 29 // R5: c -- byte sought 30 // R2: &ret -- address to put index into 31 TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0 32 CMPBEQ R4, $0, notfound 33 MOVD R3, R6 // store base for later 34 ADD R3, R4, R8 // the address after the end of the string 35 //if the length is small, use loop; otherwise, use vector or srst search 36 CMPBGE R4, $16, large 37 38 residual: 39 CMPBEQ R3, R8, notfound 40 MOVBZ 0(R3), R7 41 LA 1(R3), R3 42 CMPBNE R7, R5, residual 43 44 found: 45 SUB R6, R3 46 SUB $1, R3 47 MOVD R3, 0(R2) 48 RET 49 50 notfound: 51 MOVD $-1, 0(R2) 52 RET 53 54 large: 55 MOVBZ ·hasVX(SB), R1 56 CMPBNE R1, $0, vectorimpl 57 58 srstimpl: // no vector facility 59 MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 60 srstloop: 61 WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8)) 62 BVS srstloop // interrupted - continue 63 BGT notfoundr0 64 foundr0: 65 XOR R0, R0 // reset R0 66 SUB R6, R8 // remove base 67 MOVD R8, 0(R2) 68 RET 69 notfoundr0: 70 XOR R0, R0 // reset R0 71 MOVD $-1, 0(R2) 72 RET 73 74 vectorimpl: 75 //if the address is not 16byte aligned, use loop for the header 76 MOVD R3, R8 77 AND $15, R8 78 CMPBGT R8, $0, notaligned 79 80 aligned: 81 ADD R6, R4, R8 82 MOVD R8, R7 83 AND $-16, R7 84 // replicate c across V17 85 VLVGB $0, R5, V19 86 VREPB $0, V19, V17 87 88 vectorloop: 89 CMPBGE R3, R7, residual 90 VL 0(R3), V16 // load string to be searched into V16 91 ADD $16, R3 92 VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly 93 BVS vectorloop 94 95 // when vector search found c in the string 96 VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7 97 SUB $16, R3 98 SUB R6, R3 99 ADD R3, R7 100 MOVD R7, 0(R2) 101 RET 102 103 notaligned: 104 MOVD R3, R8 105 AND $-16, R8 106 ADD $16, R8 107 notalignedloop: 108 CMPBEQ R3, R8, aligned 109 MOVBZ 0(R3), R7 110 LA 1(R3), R3 111 CMPBNE R7, R5, notalignedloop 112 BR found