github.com/pgavlin/text@v0.0.0-20240419000839-8438d0a47805/internal/bytealg/bytealg.go (about)

     1  package bytealg
     2  
     3  import (
     4  	"unsafe"
     5  )
     6  
     7  // MaxLen is the maximum length of the string to be searched for (argument b) in Index.
     8  // If MaxLen is not 0, make sure MaxLen >= 4.
     9  //
    10  //go:linkname MaxLen internal/bytealg.MaxLen
    11  var MaxLen int
    12  
    13  // PrimeRK is the prime base used in Rabin-Karp algorithm.
    14  const PrimeRK = 16777619
    15  
    16  // AsString returns its input as a string.
    17  func AsString[S ~string | ~[]byte](s S) string {
    18  	return *(*string)(unsafe.Pointer(&s))
    19  }
    20  
    21  //go:linkname cmpstring runtime.cmpstring
    22  func cmpstring(a, b string) int
    23  
    24  func Compare[S1, S2 ~string | ~[]byte](a S1, b S2) int {
    25  	return cmpstring(AsString(a), AsString(b))
    26  }
    27  
    28  //go:linkname countString internal/bytealg.CountString
    29  func countString(s string, c byte) int
    30  
    31  func CountString[S ~string | ~[]byte](s S, c byte) int {
    32  	return countString(AsString(s), c)
    33  }
    34  
    35  // HashStr returns the hash and the appropriate multiplicative
    36  // factor for use in Rabin-Karp algorithm.
    37  func HashStr[S ~string | ~[]byte](sep S) (uint32, uint32) {
    38  	hash := uint32(0)
    39  	for i := 0; i < len(sep); i++ {
    40  		hash = hash*PrimeRK + uint32(sep[i])
    41  	}
    42  	var pow, sq uint32 = 1, PrimeRK
    43  	for i := len(sep); i > 0; i >>= 1 {
    44  		if i&1 != 0 {
    45  			pow *= sq
    46  		}
    47  		sq *= sq
    48  	}
    49  	return hash, pow
    50  }
    51  
    52  // HashStrRev returns the hash of the reverse of sep and the
    53  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    54  func HashStrRev[S ~string | ~[]byte](sep S) (uint32, uint32) {
    55  	hash := uint32(0)
    56  	for i := len(sep) - 1; i >= 0; i-- {
    57  		hash = hash*PrimeRK + uint32(sep[i])
    58  	}
    59  	var pow, sq uint32 = 1, PrimeRK
    60  	for i := len(sep); i > 0; i >>= 1 {
    61  		if i&1 != 0 {
    62  			pow *= sq
    63  		}
    64  		sq *= sq
    65  	}
    66  	return hash, pow
    67  }
    68  
    69  //go:linkname Cutover internal/bytealg.Cutover
    70  func Cutover(n int) int
    71  
    72  //go:linkname indexByteString internal/bytealg.IndexByteString
    73  //go:noescape
    74  func indexByteString(s string, c byte) int
    75  
    76  func IndexByteString[S ~string | ~[]byte](s S, c byte) int {
    77  	return indexByteString(AsString(s), c)
    78  }
    79  
    80  // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
    81  // first occurrence of sep in s, or -1 if not present.
    82  func IndexRabinKarp[S1 ~string | ~[]byte, S2 ~string | ~[]byte](s S1, sep S2) int {
    83  	// Rabin-Karp search
    84  	hashss, pow := HashStr(sep)
    85  	n := len(sep)
    86  	var h uint32
    87  	for i := 0; i < n; i++ {
    88  		h = h*PrimeRK + uint32(s[i])
    89  	}
    90  	if h == hashss && string(s[:n]) == string(sep) {
    91  		return 0
    92  	}
    93  	for i := n; i < len(s); {
    94  		h *= PrimeRK
    95  		h += uint32(s[i])
    96  		h -= pow * uint32(s[i-n])
    97  		i++
    98  		if h == hashss && string(s[i-n:i]) == string(sep) {
    99  			return i - n
   100  		}
   101  	}
   102  	return -1
   103  }
   104  
   105  //go:linkname indexString internal/bytealg.IndexString
   106  //go:noescape
   107  func indexString(a, b string) int
   108  
   109  // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
   110  // Requires 2 <= len(b) <= MaxLen.
   111  func IndexString[S1, S2 ~string | ~[]byte](a S1, b S2) int {
   112  	return indexString(AsString(a), AsString(b))
   113  }