github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/index_s390x.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // Caller must confirm availability of vx facility before calling.
     9  TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
    10  	LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
    11  	LMG	b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
    12  	MOVD	$ret+48(FP), R5
    13  	BR	indexbody<>(SB)
    14  
    15  // Caller must confirm availability of vx facility before calling.
    16  TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
    17  	LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
    18  	LMG	b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
    19  	MOVD	$ret+32(FP), R5
    20  	BR	indexbody<>(SB)
    21  
    22  // s: string we are searching
    23  // sep: string to search for
    24  // R1=&s[0], R2=len(s)
    25  // R3=&sep[0], R4=len(sep)
    26  // R5=&ret (int)
    27  // Caller must confirm availability of vx facility before calling.
    28  TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
    29  	CMPBGT	R4, R2, notfound
    30  	ADD	R1, R2
    31  	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
    32  	CMPBEQ	R4, $0, notfound
    33  	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
    34  	VLL	R4, (R3), V0 // contains first 16 bytes of sep
    35  	MOVD	R1, R7
    36  index2plus:
    37  	CMPBNE	R4, $1, index3plus
    38  	MOVD	$15(R7), R9
    39  	CMPBGE	R9, R2, index2to16
    40  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
    41  	VONE	V16
    42  	VREPH	$0, V0, V1
    43  	CMPBGE	R9, R2, index2to16
    44  index2loop:
    45  	VL	0(R7), V2          // 16 bytes, even indices
    46  	VL	1(R7), V4          // 16 bytes, odd indices
    47  	VCEQH	V1, V2, V5         // compare even indices
    48  	VCEQH	V1, V4, V6         // compare odd indices
    49  	VSEL	V5, V6, V31, V7    // merge even and odd indices
    50  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
    51  	BLT	foundV17
    52  	MOVD	$16(R7), R7        // R7+=16
    53  	ADD	$15, R7, R9
    54  	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
    55  	CMPBLE	R7, R2, index2to16
    56  	BR	notfound
    57  
    58  index3plus:
    59  	CMPBNE	R4, $2, index4plus
    60  	ADD	$15, R7, R9
    61  	CMPBGE	R9, R2, index2to16
    62  	MOVD	$1, R0
    63  	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
    64  	VONE	V16
    65  	VREPH	$0, V0, V1
    66  	VREPB	$2, V0, V8
    67  index3loop:
    68  	VL	(R7), V2           // load 16-bytes into V2
    69  	VLL	R0, 16(R7), V3     // load 2-bytes into V3
    70  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
    71  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
    72  	VCEQH	V1, V2, V5         // compare 2-byte even indices
    73  	VCEQH	V1, V4, V6         // compare 2-byte odd indices
    74  	VCEQB	V8, V9, V10        // compare last bytes
    75  	VSEL	V5, V6, V31, V7    // merge even and odd indices
    76  	VN	V7, V10, V7        // AND indices with last byte
    77  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
    78  	BLT	foundV17
    79  	MOVD	$16(R7), R7        // R7+=16
    80  	ADD	$15, R7, R9
    81  	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
    82  	CMPBLE	R7, R2, index2to16
    83  	BR	notfound
    84  
    85  index4plus:
    86  	CMPBNE	R4, $3, index5plus
    87  	ADD	$15, R7, R9
    88  	CMPBGE	R9, R2, index2to16
    89  	MOVD	$2, R0
    90  	VGBM	$0x8888, V29       // 0xff000000ff000000...
    91  	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
    92  	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
    93  	VONE	V16
    94  	VREPF	$0, V0, V1
    95  index4loop:
    96  	VL	(R7), V2           // load 16-bytes into V2
    97  	VLL	R0, 16(R7), V3     // load 3-bytes into V3
    98  	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
    99  	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
   100  	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
   101  	VCEQF	V1, V2, V5         // compare index 0, 4, ...
   102  	VCEQF	V1, V4, V6         // compare index 1, 5, ...
   103  	VCEQF	V1, V9, V11        // compare index 2, 6, ...
   104  	VCEQF	V1, V10, V12       // compare index 3, 7, ...
   105  	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
   106  	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
   107  	VSEL	V13, V14, V31, V7  // final merge
   108  	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
   109  	BLT	foundV17
   110  	MOVD	$16(R7), R7        // R7+=16
   111  	ADD	$15, R7, R9
   112  	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
   113  	CMPBLE	R7, R2, index2to16
   114  	BR	notfound
   115  
   116  index5plus:
   117  	CMPBGT	R4, $15, index17plus
   118  index2to16:
   119  	CMPBGT	R7, R2, notfound
   120  	MOVD	$1(R7), R8
   121  	CMPBGT	R8, R2, index2to16tail
   122  index2to16loop:
   123  	// unrolled 2x
   124  	VLL	R4, (R7), V1
   125  	VLL	R4, 1(R7), V2
   126  	VCEQGS	V0, V1, V3
   127  	BEQ	found
   128  	MOVD	$1(R7), R7
   129  	VCEQGS	V0, V2, V4
   130  	BEQ	found
   131  	MOVD	$1(R7), R7
   132  	CMPBLT	R7, R2, index2to16loop
   133  	CMPBGT	R7, R2, notfound
   134  index2to16tail:
   135  	VLL	R4, (R7), V1
   136  	VCEQGS	V0, V1, V2
   137  	BEQ	found
   138  	BR	notfound
   139  
   140  index17plus:
   141  	CMPBGT	R4, $31, index33plus
   142  	SUB	$16, R4, R0
   143  	VLL	R0, 16(R3), V1
   144  	VONE	V7
   145  index17to32loop:
   146  	VL	(R7), V2
   147  	VLL	R0, 16(R7), V3
   148  	VCEQG	V0, V2, V4
   149  	VCEQG	V1, V3, V5
   150  	VN	V4, V5, V6
   151  	VCEQGS	V6, V7, V8
   152  	BEQ	found
   153  	MOVD	$1(R7), R7
   154  	CMPBLE  R7, R2, index17to32loop
   155  	BR	notfound
   156  
   157  index33plus:
   158  	CMPBGT	R4, $47, index49plus
   159  	SUB	$32, R4, R0
   160  	VL	16(R3), V1
   161  	VLL	R0, 32(R3), V2
   162  	VONE	V11
   163  index33to48loop:
   164  	VL	(R7), V3
   165  	VL	16(R7), V4
   166  	VLL	R0, 32(R7), V5
   167  	VCEQG	V0, V3, V6
   168  	VCEQG	V1, V4, V7
   169  	VCEQG	V2, V5, V8
   170  	VN	V6, V7, V9
   171  	VN	V8, V9, V10
   172  	VCEQGS	V10, V11, V12
   173  	BEQ	found
   174  	MOVD	$1(R7), R7
   175  	CMPBLE  R7, R2, index33to48loop
   176  	BR	notfound
   177  
   178  index49plus:
   179  	CMPBGT	R4, $63, index65plus
   180  	SUB	$48, R4, R0
   181  	VL	16(R3), V1
   182  	VL	32(R3), V2
   183  	VLL	R0, 48(R3), V3
   184  	VONE	V15
   185  index49to64loop:
   186  	VL	(R7), V4
   187  	VL	16(R7), V5
   188  	VL	32(R7), V6
   189  	VLL	R0, 48(R7), V7
   190  	VCEQG	V0, V4, V8
   191  	VCEQG	V1, V5, V9
   192  	VCEQG	V2, V6, V10
   193  	VCEQG	V3, V7, V11
   194  	VN	V8, V9, V12
   195  	VN	V10, V11, V13
   196  	VN	V12, V13, V14
   197  	VCEQGS	V14, V15, V16
   198  	BEQ	found
   199  	MOVD	$1(R7), R7
   200  	CMPBLE  R7, R2, index49to64loop
   201  notfound:
   202  	MOVD	$-1, (R5)
   203  	RET
   204  
   205  index65plus:
   206  	// not implemented
   207  	MOVD	$0, (R0)
   208  	RET
   209  
   210  foundV17: // index is in doubleword V17[0]
   211  	VLGVG	$0, V17, R8
   212  	ADD	R8, R7
   213  found:
   214  	SUB	R1, R7
   215  	MOVD	R7, (R5)
   216  	RET