github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/internal/bytealg/indexbyte_s390x.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
     9  	MOVD	b_base+0(FP), R3// b_base => R3
    10  	MOVD	b_len+8(FP), R4 // b_len => R4
    11  	MOVBZ	c+24(FP), R5    // c => R5
    12  	MOVD	$ret+32(FP), R2 // &ret => R9
    13  	BR	indexbytebody<>(SB)
    14  
    15  TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32
    16  	MOVD	s_base+0(FP), R3// s_base => R3
    17  	MOVD	s_len+8(FP), R4 // s_len => R4
    18  	MOVBZ	c+16(FP), R5    // c => R5
    19  	MOVD	$ret+24(FP), R2 // &ret => R9
    20  	BR	indexbytebody<>(SB)
    21  
    22  TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
    23  	FUNCDATA $0, ·IndexByte·args_stackmap(SB)
    24  	MOVD	b_base+0(FP), R3// b_base => R3
    25  	MOVD	b_len+8(FP), R4 // b_len => R4
    26  	MOVBZ	c+24(FP), R5    // c => R5
    27  	MOVD	$ret+32(FP), R2 // &ret => R9
    28  	BR	indexbytebody<>(SB)
    29  
    30  TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
    31  	FUNCDATA $0, ·IndexByteString·args_stackmap(SB)
    32  	MOVD	s_base+0(FP), R3// s_base => R3
    33  	MOVD	s_len+8(FP), R4 // s_len => R4
    34  	MOVBZ	c+16(FP), R5    // c => R5
    35  	MOVD	$ret+24(FP), R2 // &ret => R9
    36  	BR	indexbytebody<>(SB)
    37  
    38  // input:
    39  // R3: s
    40  // R4: s_len
    41  // R5: c -- byte sought
    42  // R2: &ret -- address to put index into
    43  TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0
    44  	CMPBEQ	R4, $0, notfound
    45  	MOVD	R3, R6          // store base for later
    46  	ADD	R3, R4, R8      // the address after the end of the string
    47  	//if the length is small, use loop; otherwise, use vector or srst search
    48  	CMPBGE	R4, $16, large
    49  
    50  residual:
    51  	CMPBEQ	R3, R8, notfound
    52  	MOVBZ	0(R3), R7
    53  	LA	1(R3), R3
    54  	CMPBNE	R7, R5, residual
    55  
    56  found:
    57  	SUB	R6, R3
    58  	SUB	$1, R3
    59  	MOVD	R3, 0(R2)
    60  	RET
    61  
    62  notfound:
    63  	MOVD	$-1, 0(R2)
    64  	RET
    65  
    66  large:
    67  	MOVBZ	internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1
    68  	CMPBNE	R1, $0, vectorimpl
    69  
    70  srstimpl:                       // no vector facility
    71  	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
    72  srstloop:
    73  	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
    74  	BVS	srstloop        // interrupted - continue
    75  	BGT	notfoundr0
    76  foundr0:
    77  	XOR	R0, R0          // reset R0
    78  	SUB	R6, R8          // remove base
    79  	MOVD	R8, 0(R2)
    80  	RET
    81  notfoundr0:
    82  	XOR	R0, R0          // reset R0
    83  	MOVD	$-1, 0(R2)
    84  	RET
    85  
    86  vectorimpl:
    87  	//if the address is not 16byte aligned, use loop for the header
    88  	MOVD	R3, R8
    89  	AND	$15, R8
    90  	CMPBGT	R8, $0, notaligned
    91  
    92  aligned:
    93  	ADD	R6, R4, R8
    94  	MOVD	R8, R7
    95  	AND	$-16, R7
    96  	// replicate c across V17
    97  	VLVGB	$0, R5, V19
    98  	VREPB	$0, V19, V17
    99  
   100  vectorloop:
   101  	CMPBGE	R3, R7, residual
   102  	VL	0(R3), V16    // load string to be searched into V16
   103  	ADD	$16, R3
   104  	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   105  	BVS	vectorloop
   106  
   107  	// when vector search found c in the string
   108  	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   109  	SUB	$16, R3
   110  	SUB	R6, R3
   111  	ADD	R3, R7
   112  	MOVD	R7, 0(R2)
   113  	RET
   114  
   115  notaligned:
   116  	MOVD	R3, R8
   117  	AND	$-16, R8
   118  	ADD     $16, R8
   119  notalignedloop:
   120  	CMPBEQ	R3, R8, aligned
   121  	MOVBZ	0(R3), R7
   122  	LA	1(R3), R3
   123  	CMPBNE	R7, R5, notalignedloop
   124  	BR	found