github.com/pgavlin/text@v0.0.0-20240419000839-8438d0a47805/internal/bytealg/bytealg.go (about) 1 package bytealg 2 3 import ( 4 "unsafe" 5 ) 6 7 // MaxLen is the maximum length of the string to be searched for (argument b) in Index. 8 // If MaxLen is not 0, make sure MaxLen >= 4. 9 // 10 //go:linkname MaxLen internal/bytealg.MaxLen 11 var MaxLen int 12 13 // PrimeRK is the prime base used in Rabin-Karp algorithm. 14 const PrimeRK = 16777619 15 16 // AsString returns its input as a string. 17 func AsString[S ~string | ~[]byte](s S) string { 18 return *(*string)(unsafe.Pointer(&s)) 19 } 20 21 //go:linkname cmpstring runtime.cmpstring 22 func cmpstring(a, b string) int 23 24 func Compare[S1, S2 ~string | ~[]byte](a S1, b S2) int { 25 return cmpstring(AsString(a), AsString(b)) 26 } 27 28 //go:linkname countString internal/bytealg.CountString 29 func countString(s string, c byte) int 30 31 func CountString[S ~string | ~[]byte](s S, c byte) int { 32 return countString(AsString(s), c) 33 } 34 35 // HashStr returns the hash and the appropriate multiplicative 36 // factor for use in Rabin-Karp algorithm. 37 func HashStr[S ~string | ~[]byte](sep S) (uint32, uint32) { 38 hash := uint32(0) 39 for i := 0; i < len(sep); i++ { 40 hash = hash*PrimeRK + uint32(sep[i]) 41 } 42 var pow, sq uint32 = 1, PrimeRK 43 for i := len(sep); i > 0; i >>= 1 { 44 if i&1 != 0 { 45 pow *= sq 46 } 47 sq *= sq 48 } 49 return hash, pow 50 } 51 52 // HashStrRev returns the hash of the reverse of sep and the 53 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 54 func HashStrRev[S ~string | ~[]byte](sep S) (uint32, uint32) { 55 hash := uint32(0) 56 for i := len(sep) - 1; i >= 0; i-- { 57 hash = hash*PrimeRK + uint32(sep[i]) 58 } 59 var pow, sq uint32 = 1, PrimeRK 60 for i := len(sep); i > 0; i >>= 1 { 61 if i&1 != 0 { 62 pow *= sq 63 } 64 sq *= sq 65 } 66 return hash, pow 67 } 68 69 //go:linkname Cutover internal/bytealg.Cutover 70 func Cutover(n int) int 71 72 //go:linkname indexByteString internal/bytealg.IndexByteString 73 //go:noescape 74 func indexByteString(s string, c byte) int 75 76 func IndexByteString[S ~string | ~[]byte](s S, c byte) int { 77 return indexByteString(AsString(s), c) 78 } 79 80 // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the 81 // first occurrence of sep in s, or -1 if not present. 82 func IndexRabinKarp[S1 ~string | ~[]byte, S2 ~string | ~[]byte](s S1, sep S2) int { 83 // Rabin-Karp search 84 hashss, pow := HashStr(sep) 85 n := len(sep) 86 var h uint32 87 for i := 0; i < n; i++ { 88 h = h*PrimeRK + uint32(s[i]) 89 } 90 if h == hashss && string(s[:n]) == string(sep) { 91 return 0 92 } 93 for i := n; i < len(s); { 94 h *= PrimeRK 95 h += uint32(s[i]) 96 h -= pow * uint32(s[i-n]) 97 i++ 98 if h == hashss && string(s[i-n:i]) == string(sep) { 99 return i - n 100 } 101 } 102 return -1 103 } 104 105 //go:linkname indexString internal/bytealg.IndexString 106 //go:noescape 107 func indexString(a, b string) int 108 109 // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a. 110 // Requires 2 <= len(b) <= MaxLen. 111 func IndexString[S1, S2 ~string | ~[]byte](a S1, b S2) int { 112 return indexString(AsString(a), AsString(b)) 113 }