github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/indexbyte_s390x.s (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  // 
     4  // Copyright 2018 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && s390x
     9  
    10  #include "textflag.h"
    11  
    12  TEXT ·IndexSliceByte(SB),NOSPLIT|NOFRAME,$0-40
    13  	MOVD b_base+0(FP), R3// b_base => R3
    14  	MOVD b_len+8(FP), R4 // b_len => R4
    15  	MOVBZ c+24(FP), R5    // c => R5
    16  	MOVD $ret+32(FP), R2 // &ret => R9
    17  	BR indexbytebody<>(SB)
    18  
    19  TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
    20  	MOVD s_base+0(FP), R3// s_base => R3
    21  	MOVD s_len+8(FP), R4 // s_len => R4
    22  	MOVBZ c+16(FP), R5    // c => R5
    23  	MOVD $ret+24(FP), R2 // &ret => R9
    24  	BR indexbytebody<>(SB)
    25  
    26  // input:
    27  // R3: s
    28  // R4: s_len
    29  // R5: c -- byte sought
    30  // R2: &ret -- address to put index into
    31  TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0
    32  	CMPBEQ R4, $0, notfound
    33  	MOVD R3, R6          // store base for later
    34  	ADD R3, R4, R8      // the address after the end of the string
    35  	//if the length is small, use loop; otherwise, use vector or srst search
    36  	CMPBGE R4, $16, large
    37  
    38  residual:
    39  	CMPBEQ R3, R8, notfound
    40  	MOVBZ 0(R3), R7
    41  	LA 1(R3), R3
    42  	CMPBNE R7, R5, residual
    43  
    44  found:
    45  	SUB R6, R3
    46  	SUB $1, R3
    47  	MOVD R3, 0(R2)
    48  	RET
    49  
    50  notfound:
    51  	MOVD $-1, 0(R2)
    52  	RET
    53  
    54  large:
    55  	MOVBZ ·hasVX(SB), R1
    56  	CMPBNE R1, $0, vectorimpl
    57  
    58  srstimpl:                       // no vector facility
    59  	MOVBZ R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
    60  srstloop:
    61  	WORD $0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
    62  	BVS srstloop        // interrupted - continue
    63  	BGT notfoundr0
    64  foundr0:
    65  	XOR R0, R0          // reset R0
    66  	SUB R6, R8          // remove base
    67  	MOVD R8, 0(R2)
    68  	RET
    69  notfoundr0:
    70  	XOR R0, R0          // reset R0
    71  	MOVD $-1, 0(R2)
    72  	RET
    73  
    74  vectorimpl:
    75  	//if the address is not 16byte aligned, use loop for the header
    76  	MOVD R3, R8
    77  	AND $15, R8
    78  	CMPBGT R8, $0, notaligned
    79  
    80  aligned:
    81  	ADD R6, R4, R8
    82  	MOVD R8, R7
    83  	AND $-16, R7
    84  	// replicate c across V17
    85  	VLVGB $0, R5, V19
    86  	VREPB $0, V19, V17
    87  
    88  vectorloop:
    89  	CMPBGE R3, R7, residual
    90  	VL 0(R3), V16    // load string to be searched into V16
    91  	ADD $16, R3
    92  	VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly
    93  	BVS vectorloop
    94  
    95  	// when vector search found c in the string
    96  	VLGVB $7, V18, R7   // load 7th element of V18 containing index into R7
    97  	SUB $16, R3
    98  	SUB R6, R3
    99  	ADD R3, R7
   100  	MOVD R7, 0(R2)
   101  	RET
   102  
   103  notaligned:
   104  	MOVD R3, R8
   105  	AND $-16, R8
   106  	ADD     $16, R8
   107  notalignedloop:
   108  	CMPBEQ R3, R8, aligned
   109  	MOVBZ 0(R3), R7
   110  	LA 1(R3), R3
   111  	CMPBNE R7, R5, notalignedloop
   112  	BR found