github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/internal/bytealg/bytealg.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bytealg
     6  
     7  import (
     8  	"internal/cpu"
     9  	"unsafe"
    10  )
    11  
    12  // Offsets into internal/cpu records for use in assembly.
    13  const (
    14  	offsetX86HasSSE2   = unsafe.Offsetof(cpu.X86.HasSSE2)
    15  	offsetX86HasSSE42  = unsafe.Offsetof(cpu.X86.HasSSE42)
    16  	offsetX86HasAVX2   = unsafe.Offsetof(cpu.X86.HasAVX2)
    17  	offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
    18  
    19  	offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
    20  )
    21  
    22  // MaxLen is the maximum length of the string to be searched for (argument b) in Index.
    23  // If MaxLen is not 0, make sure MaxLen >= 4.
    24  var MaxLen int
    25  
    26  // FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
    27  // IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
    28  // three of them without causing allocation?
    29  
    30  // PrimeRK is the prime base used in Rabin-Karp algorithm.
    31  const PrimeRK = 16777619
    32  
    33  // HashStrBytes returns the hash and the appropriate multiplicative
    34  // factor for use in Rabin-Karp algorithm.
    35  func HashStrBytes(sep []byte) (uint32, uint32) {
    36  	hash := uint32(0)
    37  	for i := 0; i < len(sep); i++ {
    38  		hash = hash*PrimeRK + uint32(sep[i])
    39  	}
    40  	var pow, sq uint32 = 1, PrimeRK
    41  	for i := len(sep); i > 0; i >>= 1 {
    42  		if i&1 != 0 {
    43  			pow *= sq
    44  		}
    45  		sq *= sq
    46  	}
    47  	return hash, pow
    48  }
    49  
    50  // HashStr returns the hash and the appropriate multiplicative
    51  // factor for use in Rabin-Karp algorithm.
    52  func HashStr(sep string) (uint32, uint32) {
    53  	hash := uint32(0)
    54  	for i := 0; i < len(sep); i++ {
    55  		hash = hash*PrimeRK + uint32(sep[i])
    56  	}
    57  	var pow, sq uint32 = 1, PrimeRK
    58  	for i := len(sep); i > 0; i >>= 1 {
    59  		if i&1 != 0 {
    60  			pow *= sq
    61  		}
    62  		sq *= sq
    63  	}
    64  	return hash, pow
    65  }
    66  
    67  // HashStrRevBytes returns the hash of the reverse of sep and the
    68  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    69  func HashStrRevBytes(sep []byte) (uint32, uint32) {
    70  	hash := uint32(0)
    71  	for i := len(sep) - 1; i >= 0; i-- {
    72  		hash = hash*PrimeRK + uint32(sep[i])
    73  	}
    74  	var pow, sq uint32 = 1, PrimeRK
    75  	for i := len(sep); i > 0; i >>= 1 {
    76  		if i&1 != 0 {
    77  			pow *= sq
    78  		}
    79  		sq *= sq
    80  	}
    81  	return hash, pow
    82  }
    83  
    84  // HashStrRev returns the hash of the reverse of sep and the
    85  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    86  func HashStrRev(sep string) (uint32, uint32) {
    87  	hash := uint32(0)
    88  	for i := len(sep) - 1; i >= 0; i-- {
    89  		hash = hash*PrimeRK + uint32(sep[i])
    90  	}
    91  	var pow, sq uint32 = 1, PrimeRK
    92  	for i := len(sep); i > 0; i >>= 1 {
    93  		if i&1 != 0 {
    94  			pow *= sq
    95  		}
    96  		sq *= sq
    97  	}
    98  	return hash, pow
    99  }
   100  
   101  // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
   102  // first occurrence of substr in s, or -1 if not present.
   103  func IndexRabinKarpBytes(s, sep []byte) int {
   104  	// Rabin-Karp search
   105  	hashsep, pow := HashStrBytes(sep)
   106  	n := len(sep)
   107  	var h uint32
   108  	for i := 0; i < n; i++ {
   109  		h = h*PrimeRK + uint32(s[i])
   110  	}
   111  	if h == hashsep && Equal(s[:n], sep) {
   112  		return 0
   113  	}
   114  	for i := n; i < len(s); {
   115  		h *= PrimeRK
   116  		h += uint32(s[i])
   117  		h -= pow * uint32(s[i-n])
   118  		i++
   119  		if h == hashsep && Equal(s[i-n:i], sep) {
   120  			return i - n
   121  		}
   122  	}
   123  	return -1
   124  }
   125  
   126  // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
   127  // first occurrence of substr in s, or -1 if not present.
   128  func IndexRabinKarp(s, substr string) int {
   129  	// Rabin-Karp search
   130  	hashss, pow := HashStr(substr)
   131  	n := len(substr)
   132  	var h uint32
   133  	for i := 0; i < n; i++ {
   134  		h = h*PrimeRK + uint32(s[i])
   135  	}
   136  	if h == hashss && s[:n] == substr {
   137  		return 0
   138  	}
   139  	for i := n; i < len(s); {
   140  		h *= PrimeRK
   141  		h += uint32(s[i])
   142  		h -= pow * uint32(s[i-n])
   143  		i++
   144  		if h == hashss && s[i-n:i] == substr {
   145  			return i - n
   146  		}
   147  	}
   148  	return -1
   149  }