github.com/scottcagno/storage@v1.8.0/pkg/search/rabin-karp.go (about)

     1  package search
     2  
     3  import "bytes"
     4  
     5  // RabinKarp algorithm is inferior for single pattern searching to Knuth–Morris–Pratt algorithm or the
     6  // Boyer–Moore string search algorithm (and other faster single pattern string searching algorithms) because
     7  // of its slow worst case behavior. However, it is a useful algorithm for multiple pattern searches.
     8  type RabinKarp struct{}
     9  
    10  func NewRabinKarp() *RabinKarp {
    11  	return new(RabinKarp)
    12  }
    13  
    14  func (rk *RabinKarp) String() string {
    15  	return "RABIN-KARP"
    16  }
    17  
    18  func (rk *RabinKarp) FindIndex(text, pattern []byte) int {
    19  	if text == nil || pattern == nil {
    20  		return -1
    21  	}
    22  	return rabinKarpFinder(pattern, text)
    23  }
    24  
    25  func (rk *RabinKarp) FindIndexString(text, pattern string) int {
    26  	return rabinKarpFinderString(pattern, text)
    27  }
    28  
    29  func rabinKarpFinder(pattern, text []byte) int {
    30  	return indexRabinKarpBytes(text, pattern)
    31  }
    32  
    33  func rabinKarpFinderString(pattern, text string) int {
    34  	return indexRabinKarpString(text, pattern)
    35  }
    36  
    37  // PrimeRK is the prime base used in Rabin-Karp algorithm.
    38  const PrimeRK = 16777619
    39  
    40  // indexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
    41  // first occurrence of substr in s, or -1 if not present.
    42  func indexRabinKarpBytes(s, sep []byte) int {
    43  	// Rabin-Karp search
    44  	hashsep, pow := hashBytes(sep)
    45  	n := len(sep)
    46  	var h uint32
    47  	for i := 0; i < n; i++ {
    48  		h = h*PrimeRK + uint32(s[i])
    49  	}
    50  	if h == hashsep && bytes.Equal(s[:n], sep) {
    51  		return 0
    52  	}
    53  	for i := n; i < len(s); {
    54  		h *= PrimeRK
    55  		h += uint32(s[i])
    56  		h -= pow * uint32(s[i-n])
    57  		i++
    58  		if h == hashsep && bytes.Equal(s[i-n:i], sep) {
    59  			return i - n
    60  		}
    61  	}
    62  	return -1
    63  }
    64  
    65  // hashBytes returns the hash and the appropriate multiplicative
    66  // factor for use in Rabin-Karp algorithm.
    67  func hashBytes(sep []byte) (uint32, uint32) {
    68  	hash := uint32(0)
    69  	for i := 0; i < len(sep); i++ {
    70  		hash = hash*PrimeRK + uint32(sep[i])
    71  	}
    72  	var pow, sq uint32 = 1, PrimeRK
    73  	for i := len(sep); i > 0; i >>= 1 {
    74  		if i&1 != 0 {
    75  			pow *= sq
    76  		}
    77  		sq *= sq
    78  	}
    79  	return hash, pow
    80  }
    81  
    82  // hashBytesRev returns the hash of the reverse of sep and the
    83  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    84  func hashBytesRev(sep []byte) (uint32, uint32) {
    85  	hash := uint32(0)
    86  	for i := len(sep) - 1; i >= 0; i-- {
    87  		hash = hash*PrimeRK + uint32(sep[i])
    88  	}
    89  	var pow, sq uint32 = 1, PrimeRK
    90  	for i := len(sep); i > 0; i >>= 1 {
    91  		if i&1 != 0 {
    92  			pow *= sq
    93  		}
    94  		sq *= sq
    95  	}
    96  	return hash, pow
    97  }
    98  
    99  // indexRabinKarpString uses the Rabin-Karp search algorithm to return the index of the
   100  // first occurrence of substr in s, or -1 if not present.
   101  func indexRabinKarpString(s, substr string) int {
   102  	// Rabin-Karp search
   103  	hashss, pow := hashString(substr)
   104  	n := len(substr)
   105  	var h uint32
   106  	for i := 0; i < n; i++ {
   107  		h = h*PrimeRK + uint32(s[i])
   108  	}
   109  	if h == hashss && s[:n] == substr {
   110  		return 0
   111  	}
   112  	for i := n; i < len(s); {
   113  		h *= PrimeRK
   114  		h += uint32(s[i])
   115  		h -= pow * uint32(s[i-n])
   116  		i++
   117  		if h == hashss && s[i-n:i] == substr {
   118  			return i - n
   119  		}
   120  	}
   121  	return -1
   122  }
   123  
   124  // hashString returns the hash and the appropriate multiplicative
   125  // factor for use in Rabin-Karp algorithm.
   126  func hashString(sep string) (uint32, uint32) {
   127  	hash := uint32(0)
   128  	for i := 0; i < len(sep); i++ {
   129  		hash = hash*PrimeRK + uint32(sep[i])
   130  	}
   131  	var pow, sq uint32 = 1, PrimeRK
   132  	for i := len(sep); i > 0; i >>= 1 {
   133  		if i&1 != 0 {
   134  			pow *= sq
   135  		}
   136  		sq *= sq
   137  	}
   138  	return hash, pow
   139  }
   140  
   141  // hashStringRev returns the hash of the reverse of sep and the
   142  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
   143  func hashStringRev(sep string) (uint32, uint32) {
   144  	hash := uint32(0)
   145  	for i := len(sep) - 1; i >= 0; i-- {
   146  		hash = hash*PrimeRK + uint32(sep[i])
   147  	}
   148  	var pow, sq uint32 = 1, PrimeRK
   149  	for i := len(sep); i > 0; i >>= 1 {
   150  		if i&1 != 0 {
   151  			pow *= sq
   152  		}
   153  		sq *= sq
   154  	}
   155  	return hash, pow
   156  }