github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/bytealg.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bytealg
     6  
     7  import (
     8  	"unsafe"
     9  
    10  	"github.com/SandwichDev/go-internals/cpu"
    11  )
    12  
    13  // Offsets into internal/cpu records for use in assembly.
    14  const (
    15  	offsetX86HasSSE2   = unsafe.Offsetof(cpu.X86.HasSSE2)
    16  	offsetX86HasSSE42  = unsafe.Offsetof(cpu.X86.HasSSE42)
    17  	offsetX86HasAVX2   = unsafe.Offsetof(cpu.X86.HasAVX2)
    18  	offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
    19  
    20  	offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
    21  )
    22  
    23  // MaxLen is the maximum length of the string to be searched for (argument b) in Index.
    24  // If MaxLen is not 0, make sure MaxLen >= 4.
    25  var MaxLen int
    26  
    27  // FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
    28  // IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
    29  // three of them without causing allocation?
    30  
    31  // PrimeRK is the prime base used in Rabin-Karp algorithm.
    32  const PrimeRK = 16777619
    33  
    34  // HashStrBytes returns the hash and the appropriate multiplicative
    35  // factor for use in Rabin-Karp algorithm.
    36  func HashStrBytes(sep []byte) (uint32, uint32) {
    37  	hash := uint32(0)
    38  	for i := 0; i < len(sep); i++ {
    39  		hash = hash*PrimeRK + uint32(sep[i])
    40  	}
    41  	var pow, sq uint32 = 1, PrimeRK
    42  	for i := len(sep); i > 0; i >>= 1 {
    43  		if i&1 != 0 {
    44  			pow *= sq
    45  		}
    46  		sq *= sq
    47  	}
    48  	return hash, pow
    49  }
    50  
    51  // HashStr returns the hash and the appropriate multiplicative
    52  // factor for use in Rabin-Karp algorithm.
    53  func HashStr(sep string) (uint32, uint32) {
    54  	hash := uint32(0)
    55  	for i := 0; i < len(sep); i++ {
    56  		hash = hash*PrimeRK + uint32(sep[i])
    57  	}
    58  	var pow, sq uint32 = 1, PrimeRK
    59  	for i := len(sep); i > 0; i >>= 1 {
    60  		if i&1 != 0 {
    61  			pow *= sq
    62  		}
    63  		sq *= sq
    64  	}
    65  	return hash, pow
    66  }
    67  
    68  // HashStrRevBytes returns the hash of the reverse of sep and the
    69  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    70  func HashStrRevBytes(sep []byte) (uint32, uint32) {
    71  	hash := uint32(0)
    72  	for i := len(sep) - 1; i >= 0; i-- {
    73  		hash = hash*PrimeRK + uint32(sep[i])
    74  	}
    75  	var pow, sq uint32 = 1, PrimeRK
    76  	for i := len(sep); i > 0; i >>= 1 {
    77  		if i&1 != 0 {
    78  			pow *= sq
    79  		}
    80  		sq *= sq
    81  	}
    82  	return hash, pow
    83  }
    84  
    85  // HashStrRev returns the hash of the reverse of sep and the
    86  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    87  func HashStrRev(sep string) (uint32, uint32) {
    88  	hash := uint32(0)
    89  	for i := len(sep) - 1; i >= 0; i-- {
    90  		hash = hash*PrimeRK + uint32(sep[i])
    91  	}
    92  	var pow, sq uint32 = 1, PrimeRK
    93  	for i := len(sep); i > 0; i >>= 1 {
    94  		if i&1 != 0 {
    95  			pow *= sq
    96  		}
    97  		sq *= sq
    98  	}
    99  	return hash, pow
   100  }
   101  
   102  // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
   103  // first occurrence of substr in s, or -1 if not present.
   104  func IndexRabinKarpBytes(s, sep []byte) int {
   105  	// Rabin-Karp search
   106  	hashsep, pow := HashStrBytes(sep)
   107  	n := len(sep)
   108  	var h uint32
   109  	for i := 0; i < n; i++ {
   110  		h = h*PrimeRK + uint32(s[i])
   111  	}
   112  	if h == hashsep && Equal(s[:n], sep) {
   113  		return 0
   114  	}
   115  	for i := n; i < len(s); {
   116  		h *= PrimeRK
   117  		h += uint32(s[i])
   118  		h -= pow * uint32(s[i-n])
   119  		i++
   120  		if h == hashsep && Equal(s[i-n:i], sep) {
   121  			return i - n
   122  		}
   123  	}
   124  	return -1
   125  }
   126  
   127  // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
   128  // first occurrence of substr in s, or -1 if not present.
   129  func IndexRabinKarp(s, substr string) int {
   130  	// Rabin-Karp search
   131  	hashss, pow := HashStr(substr)
   132  	n := len(substr)
   133  	var h uint32
   134  	for i := 0; i < n; i++ {
   135  		h = h*PrimeRK + uint32(s[i])
   136  	}
   137  	if h == hashss && s[:n] == substr {
   138  		return 0
   139  	}
   140  	for i := n; i < len(s); {
   141  		h *= PrimeRK
   142  		h += uint32(s[i])
   143  		h -= pow * uint32(s[i-n])
   144  		i++
   145  		if h == hashss && s[i-n:i] == substr {
   146  			return i - n
   147  		}
   148  	}
   149  	return -1
   150  }