github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/rabin_karp.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 4 package bytealg 5 6 import "github.com/primecitizens/pcz/std/core/cmp" 7 8 // FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev, 9 // IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate 10 // three of them without causing allocation? 11 12 // PrimeRK is the prime base used in Rabin-Karp algorithm. 13 const PrimeRK = 16777619 14 15 // HashStrBytes returns the hash and the appropriate multiplicative 16 // factor for use in Rabin-Karp algorithm. 17 func HashStrBytes(sep []byte) (uint32, uint32) { 18 hash := uint32(0) 19 for i := 0; i < len(sep); i++ { 20 hash = hash*PrimeRK + uint32(sep[i]) 21 } 22 var pow, sq uint32 = 1, PrimeRK 23 for i := len(sep); i > 0; i >>= 1 { 24 if i&1 != 0 { 25 pow *= sq 26 } 27 sq *= sq 28 } 29 return hash, pow 30 } 31 32 // HashStr returns the hash and the appropriate multiplicative 33 // factor for use in Rabin-Karp algorithm. 34 func HashStr(sep string) (uint32, uint32) { 35 hash := uint32(0) 36 for i := 0; i < len(sep); i++ { 37 hash = hash*PrimeRK + uint32(sep[i]) 38 } 39 var pow, sq uint32 = 1, PrimeRK 40 for i := len(sep); i > 0; i >>= 1 { 41 if i&1 != 0 { 42 pow *= sq 43 } 44 sq *= sq 45 } 46 return hash, pow 47 } 48 49 // HashStrRevBytes returns the hash of the reverse of sep and the 50 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 51 func HashStrRevBytes(sep []byte) (uint32, uint32) { 52 hash := uint32(0) 53 for i := len(sep) - 1; i >= 0; i-- { 54 hash = hash*PrimeRK + uint32(sep[i]) 55 } 56 var pow, sq uint32 = 1, PrimeRK 57 for i := len(sep); i > 0; i >>= 1 { 58 if i&1 != 0 { 59 pow *= sq 60 } 61 sq *= sq 62 } 63 return hash, pow 64 } 65 66 // HashStrRev returns the hash of the reverse of sep and the 67 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 68 func HashStrRev(sep string) (uint32, uint32) { 69 hash := uint32(0) 70 for i := len(sep) - 1; i >= 0; i-- { 71 hash = hash*PrimeRK + uint32(sep[i]) 72 } 73 var pow, sq uint32 = 1, PrimeRK 74 for i := len(sep); i > 0; i >>= 1 { 75 if i&1 != 0 { 76 pow *= sq 77 } 78 sq *= sq 79 } 80 return hash, pow 81 } 82 83 // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the 84 // first occurrence of substr in s, or -1 if not present. 85 func IndexRabinKarpBytes(s, sep []byte) int { 86 // Rabin-Karp search 87 hashsep, pow := HashStrBytes(sep) 88 n := len(sep) 89 var h uint32 90 for i := 0; i < n; i++ { 91 h = h*PrimeRK + uint32(s[i]) 92 } 93 if h == hashsep && cmp.BytesEqual(s[:n], sep) { 94 return 0 95 } 96 for i := n; i < len(s); { 97 h *= PrimeRK 98 h += uint32(s[i]) 99 h -= pow * uint32(s[i-n]) 100 i++ 101 if h == hashsep && cmp.BytesEqual(s[i-n:i], sep) { 102 return i - n 103 } 104 } 105 return -1 106 } 107 108 // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the 109 // first occurrence of substr in s, or -1 if not present. 110 func IndexRabinKarp(s, substr string) int { 111 // Rabin-Karp search 112 hashss, pow := HashStr(substr) 113 n := len(substr) 114 var h uint32 115 for i := 0; i < n; i++ { 116 h = h*PrimeRK + uint32(s[i]) 117 } 118 if h == hashss && s[:n] == substr { 119 return 0 120 } 121 for i := n; i < len(s); { 122 h *= PrimeRK 123 h += uint32(s[i]) 124 h -= pow * uint32(s[i-n]) 125 i++ 126 if h == hashss && s[i-n:i] == substr { 127 return i - n 128 } 129 } 130 return -1 131 }