github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/index.generic.go

github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/index.generic.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  //
     4  // Copyright 2018 The Go Authors. All rights reserved.
     5  // Use of this source code is governed by a BSD-style
     6  // license that can be found in the LICENSE file.
     7  
     8  //go:build pcz && !(amd64 || arm64 || s390x || ppc64le || ppc64)
     9  
    10  package bytealg
    11  
    12  import (
    13  	"github.com/primecitizens/pcz/std/core/cmp"
    14  )
    15  
    16  const (
    17  	// indexArgBMaxLen is the maximum length of the string to be searched for (argument b) in Index.
    18  	// If indexArgBMaxLen is not 0, make sure indexArgBMaxLen >= 4.
    19  	indexArgBMaxLen = 0
    20  	MaxBruteForce   = 0
    21  )
    22  
    23  // cutover reports the number of failures of IndexByte we should tolerate
    24  // before switching over to Index.
    25  // n is the number of bytes processed so far.
    26  // See the bytes.Index implementation for details.
    27  func cutover(n int) int {
    28  	return 0
    29  }
    30  
    31  // Index returns the index of the first instance of b in a,
    32  // or -1 if b is not present in a.
    33  func Index(s, sep []byte) int {
    34  	n := len(sep)
    35  	switch {
    36  	case n == 0:
    37  		return 0
    38  	case n == 1:
    39  		return IndexSliceByte(s, sep[0])
    40  	case n == len(s):
    41  		if cmp.BytesEqual(sep, s) {
    42  			return 0
    43  		}
    44  		return -1
    45  	case n > len(s):
    46  		return -1
    47  	case n <= indexArgBMaxLen:
    48  		c0 := sep[0]
    49  		c1 := sep[1]
    50  		i := 0
    51  		t := len(s) - n + 1
    52  		fails := 0
    53  		for i < t {
    54  			if s[i] != c0 {
    55  				// IndexByte is faster than Index, so use it as long as
    56  				// we're not getting lots of false positives.
    57  				o := IndexSliceByte(s[i+1:t], c0)
    58  				if o < 0 {
    59  					return -1
    60  				}
    61  				i += o + 1
    62  			}
    63  			if s[i+1] == c1 && cmp.BytesEqual(s[i:i+n], sep) {
    64  				return i
    65  			}
    66  			fails++
    67  			i++
    68  			// Switch to Index when IndexByte produces too many false positives.
    69  			if fails > cutover(i) {
    70  				r := Index(s[i:], sep)
    71  				if r >= 0 {
    72  					return r + i
    73  				}
    74  				return -1
    75  			}
    76  		}
    77  		return -1
    78  	}
    79  	c0 := sep[0]
    80  	c1 := sep[1]
    81  	i := 0
    82  	fails := 0
    83  	t := len(s) - n + 1
    84  	for i < t {
    85  		if s[i] != c0 {
    86  			o := IndexSliceByte(s[i+1:t], c0)
    87  			if o < 0 {
    88  				break
    89  			}
    90  			i += o + 1
    91  		}
    92  		if s[i+1] == c1 && cmp.BytesEqual(s[i:i+n], sep) {
    93  			return i
    94  		}
    95  		i++
    96  		fails++
    97  		if fails >= 4+i>>4 && i < t {
    98  			// Give up on IndexByte, it isn't skipping ahead
    99  			// far enough to be better than Rabin-Karp.
   100  			// Experiments (using IndexPeriodic) suggest
   101  			// the cutover is about 16 byte skips.
   102  			// TODO: if large prefixes of sep are matching
   103  			// we should cutover at even larger average skips,
   104  			// because Equal becomes that much more expensive.
   105  			// This code does not take that effect into account.
   106  			j := IndexRabinKarpBytes(s[i:], sep)
   107  			if j < 0 {
   108  				return -1
   109  			}
   110  			return i + j
   111  		}
   112  	}
   113  	return -1
   114  }
   115  
   116  // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
   117  func IndexString(s, substr string) int {
   118  	n := len(substr)
   119  	switch {
   120  	case n == 0:
   121  		return 0
   122  	case n == 1:
   123  		return IndexByte(s, substr[0])
   124  	case n == len(s):
   125  		if substr == s {
   126  			return 0
   127  		}
   128  		return -1
   129  	case n > len(s):
   130  		return -1
   131  	case n <= indexArgBMaxLen:
   132  		c0 := substr[0]
   133  		c1 := substr[1]
   134  		i := 0
   135  		t := len(s) - n + 1
   136  		fails := 0
   137  		for i < t {
   138  			if s[i] != c0 {
   139  				// IndexByteString is faster than IndexString, so use it as long as
   140  				// we're not getting lots of false positives.
   141  				o := IndexByte(s[i+1:t], c0)
   142  				if o < 0 {
   143  					return -1
   144  				}
   145  				i += o + 1
   146  			}
   147  			if s[i+1] == c1 && s[i:i+n] == substr {
   148  				return i
   149  			}
   150  			fails++
   151  			i++
   152  			// Switch to IndexString when IndexByte produces too many false positives.
   153  			if fails > cutover(i) {
   154  				r := IndexString(s[i:], substr)
   155  				if r >= 0 {
   156  					return r + i
   157  				}
   158  				return -1
   159  			}
   160  		}
   161  		return -1
   162  	}
   163  	c0 := substr[0]
   164  	c1 := substr[1]
   165  	i := 0
   166  	t := len(s) - n + 1
   167  	fails := 0
   168  	for i < t {
   169  		if s[i] != c0 {
   170  			o := IndexByte(s[i+1:t], c0)
   171  			if o < 0 {
   172  				return -1
   173  			}
   174  			i += o + 1
   175  		}
   176  		if s[i+1] == c1 && s[i:i+n] == substr {
   177  			return i
   178  		}
   179  		i++
   180  		fails++
   181  		if fails >= 4+i>>4 && i < t {
   182  			// See comment in ../bytes/bytes.go.
   183  			j := IndexRabinKarp(s[i:], substr)
   184  			if j < 0 {
   185  				return -1
   186  			}
   187  			return i + j
   188  		}
   189  	}
   190  	return -1
   191  }