github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/index.native.go

github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/index.native.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2018 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && (amd64 || arm64 || s390x || ppc64le || ppc64)
     9  
    10  package bytealg
    11  
    12  import "github.com/primecitizens/pcz/std/core/cmp"
    13  
    14  //go:noescape
    15  
    16  // indeSlice returns the index of the first instance of b in a, or -1 if b is not present in a.
    17  // Requires 2 <= len(b) <= indexArgBMaxLen.
    18  func indexSlice(a, b []byte) int
    19  
    20  //go:noescape
    21  
    22  // index returns the index of the first instance of b in a, or -1 if b is not present in a.
    23  // Requires 2 <= len(b) <= indexArgBMaxLen.
    24  func index(a, b string) int
    25  
    26  // IndexSlice returns the index of the first instance of sep in s,
    27  // or -1 if sep is not present in s.
    28  func IndexSlice(s, sep []byte) int {
    29  	n := len(sep)
    30  	switch {
    31  	case n == 0:
    32  		return 0
    33  	case n == 1:
    34  		return IndexSliceByte(s, sep[0])
    35  	case n == len(s):
    36  		if cmp.BytesEqual(sep, s) {
    37  			return 0
    38  		}
    39  		return -1
    40  	case n > len(s):
    41  		return -1
    42  	case n <= indexArgBMaxLen:
    43  		// Use brute force when s and sep both are small
    44  		if len(s) <= MaxBruteForce {
    45  			return indexSlice(s, sep)
    46  		}
    47  		c0 := sep[0]
    48  		c1 := sep[1]
    49  		i := 0
    50  		t := len(s) - n + 1
    51  		fails := 0
    52  		for i < t {
    53  			if s[i] != c0 {
    54  				// IndexSliceByte is faster than indexSlice, so use it as long as
    55  				// we're not getting lots of false positives.
    56  				o := IndexSliceByte(s[i+1:t], c0)
    57  				if o < 0 {
    58  					return -1
    59  				}
    60  				i += o + 1
    61  			}
    62  			if s[i+1] == c1 && cmp.BytesEqual(s[i:i+n], sep) {
    63  				return i
    64  			}
    65  			fails++
    66  			i++
    67  			// Switch to indexSlice when IndexByte produces too many false positives.
    68  			if fails > cutover(i) {
    69  				r := indexSlice(s[i:], sep)
    70  				if r >= 0 {
    71  					return r + i
    72  				}
    73  				return -1
    74  			}
    75  		}
    76  		return -1
    77  	}
    78  	c0 := sep[0]
    79  	c1 := sep[1]
    80  	i := 0
    81  	fails := 0
    82  	t := len(s) - n + 1
    83  	for i < t {
    84  		if s[i] != c0 {
    85  			o := IndexSliceByte(s[i+1:t], c0)
    86  			if o < 0 {
    87  				break
    88  			}
    89  			i += o + 1
    90  		}
    91  		if s[i+1] == c1 && cmp.BytesEqual(s[i:i+n], sep) {
    92  			return i
    93  		}
    94  		i++
    95  		fails++
    96  		if fails >= 4+i>>4 && i < t {
    97  			// Give up on IndexByte, it isn't skipping ahead
    98  			// far enough to be better than Rabin-Karp.
    99  			// Experiments (using IndexPeriodic) suggest
   100  			// the cutover is about 16 byte skips.
   101  			// TODO: if large prefixes of sep are matching
   102  			// we should cutover at even larger average skips,
   103  			// because Equal becomes that much more expensive.
   104  			// This code does not take that effect into account.
   105  			j := IndexRabinKarpBytes(s[i:], sep)
   106  			if j < 0 {
   107  				return -1
   108  			}
   109  			return i + j
   110  		}
   111  	}
   112  	return -1
   113  }
   114  
   115  // Index returns the index of the first instance of substr in s,
   116  // or -1 if substr is not present in s.
   117  func Index(s, substr string) int {
   118  	n := len(substr)
   119  	switch {
   120  	case n == 0:
   121  		return 0
   122  	case n == 1:
   123  		return IndexByte(s, substr[0])
   124  	case n == len(s):
   125  		if substr == s {
   126  			return 0
   127  		}
   128  		return -1
   129  	case n > len(s):
   130  		return -1
   131  	case n <= indexArgBMaxLen:
   132  		// Use brute force when s and substr both are small
   133  		if len(s) <= MaxBruteForce {
   134  			return index(s, substr)
   135  		}
   136  		c0 := substr[0]
   137  		c1 := substr[1]
   138  		i := 0
   139  		t := len(s) - n + 1
   140  		fails := 0
   141  		for i < t {
   142  			if s[i] != c0 {
   143  				// IndexByte is faster than index, so use it as long as
   144  				// we're not getting lots of false positives.
   145  				o := IndexByte(s[i+1:t], c0)
   146  				if o < 0 {
   147  					return -1
   148  				}
   149  				i += o + 1
   150  			}
   151  			if s[i+1] == c1 && s[i:i+n] == substr {
   152  				return i
   153  			}
   154  			fails++
   155  			i++
   156  			// Switch to index when IndexByte produces too many false positives.
   157  			if fails > cutover(i) {
   158  				r := index(s[i:], substr)
   159  				if r >= 0 {
   160  					return r + i
   161  				}
   162  				return -1
   163  			}
   164  		}
   165  		return -1
   166  	}
   167  	c0 := substr[0]
   168  	c1 := substr[1]
   169  	i := 0
   170  	t := len(s) - n + 1
   171  	fails := 0
   172  	for i < t {
   173  		if s[i] != c0 {
   174  			o := IndexByte(s[i+1:t], c0)
   175  			if o < 0 {
   176  				return -1
   177  			}
   178  			i += o + 1
   179  		}
   180  		if s[i+1] == c1 && s[i:i+n] == substr {
   181  			return i
   182  		}
   183  		i++
   184  		fails++
   185  		if fails >= 4+i>>4 && i < t {
   186  			// See comment in ../bytes/bytes.go.
   187  			j := IndexRabinKarp(s[i:], substr)
   188  			if j < 0 {
   189  				return -1
   190  			}
   191  			return i + j
   192  		}
   193  	}
   194  	return -1
   195  }