github.com/scottcagno/storage@v1.8.0/pkg/search/rabin-karp.go (about) 1 package search 2 3 import "bytes" 4 5 // RabinKarp algorithm is inferior for single pattern searching to Knuth–Morris–Pratt algorithm or the 6 // Boyer–Moore string search algorithm (and other faster single pattern string searching algorithms) because 7 // of its slow worst case behavior. However, it is a useful algorithm for multiple pattern searches. 8 type RabinKarp struct{} 9 10 func NewRabinKarp() *RabinKarp { 11 return new(RabinKarp) 12 } 13 14 func (rk *RabinKarp) String() string { 15 return "RABIN-KARP" 16 } 17 18 func (rk *RabinKarp) FindIndex(text, pattern []byte) int { 19 if text == nil || pattern == nil { 20 return -1 21 } 22 return rabinKarpFinder(pattern, text) 23 } 24 25 func (rk *RabinKarp) FindIndexString(text, pattern string) int { 26 return rabinKarpFinderString(pattern, text) 27 } 28 29 func rabinKarpFinder(pattern, text []byte) int { 30 return indexRabinKarpBytes(text, pattern) 31 } 32 33 func rabinKarpFinderString(pattern, text string) int { 34 return indexRabinKarpString(text, pattern) 35 } 36 37 // PrimeRK is the prime base used in Rabin-Karp algorithm. 38 const PrimeRK = 16777619 39 40 // indexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the 41 // first occurrence of substr in s, or -1 if not present. 42 func indexRabinKarpBytes(s, sep []byte) int { 43 // Rabin-Karp search 44 hashsep, pow := hashBytes(sep) 45 n := len(sep) 46 var h uint32 47 for i := 0; i < n; i++ { 48 h = h*PrimeRK + uint32(s[i]) 49 } 50 if h == hashsep && bytes.Equal(s[:n], sep) { 51 return 0 52 } 53 for i := n; i < len(s); { 54 h *= PrimeRK 55 h += uint32(s[i]) 56 h -= pow * uint32(s[i-n]) 57 i++ 58 if h == hashsep && bytes.Equal(s[i-n:i], sep) { 59 return i - n 60 } 61 } 62 return -1 63 } 64 65 // hashBytes returns the hash and the appropriate multiplicative 66 // factor for use in Rabin-Karp algorithm. 67 func hashBytes(sep []byte) (uint32, uint32) { 68 hash := uint32(0) 69 for i := 0; i < len(sep); i++ { 70 hash = hash*PrimeRK + uint32(sep[i]) 71 } 72 var pow, sq uint32 = 1, PrimeRK 73 for i := len(sep); i > 0; i >>= 1 { 74 if i&1 != 0 { 75 pow *= sq 76 } 77 sq *= sq 78 } 79 return hash, pow 80 } 81 82 // hashBytesRev returns the hash of the reverse of sep and the 83 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 84 func hashBytesRev(sep []byte) (uint32, uint32) { 85 hash := uint32(0) 86 for i := len(sep) - 1; i >= 0; i-- { 87 hash = hash*PrimeRK + uint32(sep[i]) 88 } 89 var pow, sq uint32 = 1, PrimeRK 90 for i := len(sep); i > 0; i >>= 1 { 91 if i&1 != 0 { 92 pow *= sq 93 } 94 sq *= sq 95 } 96 return hash, pow 97 } 98 99 // indexRabinKarpString uses the Rabin-Karp search algorithm to return the index of the 100 // first occurrence of substr in s, or -1 if not present. 101 func indexRabinKarpString(s, substr string) int { 102 // Rabin-Karp search 103 hashss, pow := hashString(substr) 104 n := len(substr) 105 var h uint32 106 for i := 0; i < n; i++ { 107 h = h*PrimeRK + uint32(s[i]) 108 } 109 if h == hashss && s[:n] == substr { 110 return 0 111 } 112 for i := n; i < len(s); { 113 h *= PrimeRK 114 h += uint32(s[i]) 115 h -= pow * uint32(s[i-n]) 116 i++ 117 if h == hashss && s[i-n:i] == substr { 118 return i - n 119 } 120 } 121 return -1 122 } 123 124 // hashString returns the hash and the appropriate multiplicative 125 // factor for use in Rabin-Karp algorithm. 126 func hashString(sep string) (uint32, uint32) { 127 hash := uint32(0) 128 for i := 0; i < len(sep); i++ { 129 hash = hash*PrimeRK + uint32(sep[i]) 130 } 131 var pow, sq uint32 = 1, PrimeRK 132 for i := len(sep); i > 0; i >>= 1 { 133 if i&1 != 0 { 134 pow *= sq 135 } 136 sq *= sq 137 } 138 return hash, pow 139 } 140 141 // hashStringRev returns the hash of the reverse of sep and the 142 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 143 func hashStringRev(sep string) (uint32, uint32) { 144 hash := uint32(0) 145 for i := len(sep) - 1; i >= 0; i-- { 146 hash = hash*PrimeRK + uint32(sep[i]) 147 } 148 var pow, sq uint32 = 1, PrimeRK 149 for i := len(sep); i > 0; i >>= 1 { 150 if i&1 != 0 { 151 pow *= sq 152 } 153 sq *= sq 154 } 155 return hash, pow 156 }