github.com/primecitizens/pcz/std@v0.2.1/core/bytealg/rabin_karp.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright 2023 The Prime Citizens
     3  
     4  package bytealg
     5  
     6  import "github.com/primecitizens/pcz/std/core/cmp"
     7  
     8  // FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
     9  // IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
    10  // three of them without causing allocation?
    11  
    12  // PrimeRK is the prime base used in Rabin-Karp algorithm.
    13  const PrimeRK = 16777619
    14  
    15  // HashStrBytes returns the hash and the appropriate multiplicative
    16  // factor for use in Rabin-Karp algorithm.
    17  func HashStrBytes(sep []byte) (uint32, uint32) {
    18  	hash := uint32(0)
    19  	for i := 0; i < len(sep); i++ {
    20  		hash = hash*PrimeRK + uint32(sep[i])
    21  	}
    22  	var pow, sq uint32 = 1, PrimeRK
    23  	for i := len(sep); i > 0; i >>= 1 {
    24  		if i&1 != 0 {
    25  			pow *= sq
    26  		}
    27  		sq *= sq
    28  	}
    29  	return hash, pow
    30  }
    31  
    32  // HashStr returns the hash and the appropriate multiplicative
    33  // factor for use in Rabin-Karp algorithm.
    34  func HashStr(sep string) (uint32, uint32) {
    35  	hash := uint32(0)
    36  	for i := 0; i < len(sep); i++ {
    37  		hash = hash*PrimeRK + uint32(sep[i])
    38  	}
    39  	var pow, sq uint32 = 1, PrimeRK
    40  	for i := len(sep); i > 0; i >>= 1 {
    41  		if i&1 != 0 {
    42  			pow *= sq
    43  		}
    44  		sq *= sq
    45  	}
    46  	return hash, pow
    47  }
    48  
    49  // HashStrRevBytes returns the hash of the reverse of sep and the
    50  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    51  func HashStrRevBytes(sep []byte) (uint32, uint32) {
    52  	hash := uint32(0)
    53  	for i := len(sep) - 1; i >= 0; i-- {
    54  		hash = hash*PrimeRK + uint32(sep[i])
    55  	}
    56  	var pow, sq uint32 = 1, PrimeRK
    57  	for i := len(sep); i > 0; i >>= 1 {
    58  		if i&1 != 0 {
    59  			pow *= sq
    60  		}
    61  		sq *= sq
    62  	}
    63  	return hash, pow
    64  }
    65  
    66  // HashStrRev returns the hash of the reverse of sep and the
    67  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    68  func HashStrRev(sep string) (uint32, uint32) {
    69  	hash := uint32(0)
    70  	for i := len(sep) - 1; i >= 0; i-- {
    71  		hash = hash*PrimeRK + uint32(sep[i])
    72  	}
    73  	var pow, sq uint32 = 1, PrimeRK
    74  	for i := len(sep); i > 0; i >>= 1 {
    75  		if i&1 != 0 {
    76  			pow *= sq
    77  		}
    78  		sq *= sq
    79  	}
    80  	return hash, pow
    81  }
    82  
    83  // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
    84  // first occurrence of substr in s, or -1 if not present.
    85  func IndexRabinKarpBytes(s, sep []byte) int {
    86  	// Rabin-Karp search
    87  	hashsep, pow := HashStrBytes(sep)
    88  	n := len(sep)
    89  	var h uint32
    90  	for i := 0; i < n; i++ {
    91  		h = h*PrimeRK + uint32(s[i])
    92  	}
    93  	if h == hashsep && cmp.BytesEqual(s[:n], sep) {
    94  		return 0
    95  	}
    96  	for i := n; i < len(s); {
    97  		h *= PrimeRK
    98  		h += uint32(s[i])
    99  		h -= pow * uint32(s[i-n])
   100  		i++
   101  		if h == hashsep && cmp.BytesEqual(s[i-n:i], sep) {
   102  			return i - n
   103  		}
   104  	}
   105  	return -1
   106  }
   107  
   108  // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
   109  // first occurrence of substr in s, or -1 if not present.
   110  func IndexRabinKarp(s, substr string) int {
   111  	// Rabin-Karp search
   112  	hashss, pow := HashStr(substr)
   113  	n := len(substr)
   114  	var h uint32
   115  	for i := 0; i < n; i++ {
   116  		h = h*PrimeRK + uint32(s[i])
   117  	}
   118  	if h == hashss && s[:n] == substr {
   119  		return 0
   120  	}
   121  	for i := n; i < len(s); {
   122  		h *= PrimeRK
   123  		h += uint32(s[i])
   124  		h -= pow * uint32(s[i-n])
   125  		i++
   126  		if h == hashss && s[i-n:i] == substr {
   127  			return i - n
   128  		}
   129  	}
   130  	return -1
   131  }