github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/bytealg.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bytealg 6 7 import ( 8 "unsafe" 9 10 "github.com/SandwichDev/go-internals/cpu" 11 ) 12 13 // Offsets into internal/cpu records for use in assembly. 14 const ( 15 offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2) 16 offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42) 17 offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2) 18 offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT) 19 20 offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX) 21 ) 22 23 // MaxLen is the maximum length of the string to be searched for (argument b) in Index. 24 // If MaxLen is not 0, make sure MaxLen >= 4. 25 var MaxLen int 26 27 // FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev, 28 // IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate 29 // three of them without causing allocation? 30 31 // PrimeRK is the prime base used in Rabin-Karp algorithm. 32 const PrimeRK = 16777619 33 34 // HashStrBytes returns the hash and the appropriate multiplicative 35 // factor for use in Rabin-Karp algorithm. 36 func HashStrBytes(sep []byte) (uint32, uint32) { 37 hash := uint32(0) 38 for i := 0; i < len(sep); i++ { 39 hash = hash*PrimeRK + uint32(sep[i]) 40 } 41 var pow, sq uint32 = 1, PrimeRK 42 for i := len(sep); i > 0; i >>= 1 { 43 if i&1 != 0 { 44 pow *= sq 45 } 46 sq *= sq 47 } 48 return hash, pow 49 } 50 51 // HashStr returns the hash and the appropriate multiplicative 52 // factor for use in Rabin-Karp algorithm. 53 func HashStr(sep string) (uint32, uint32) { 54 hash := uint32(0) 55 for i := 0; i < len(sep); i++ { 56 hash = hash*PrimeRK + uint32(sep[i]) 57 } 58 var pow, sq uint32 = 1, PrimeRK 59 for i := len(sep); i > 0; i >>= 1 { 60 if i&1 != 0 { 61 pow *= sq 62 } 63 sq *= sq 64 } 65 return hash, pow 66 } 67 68 // HashStrRevBytes returns the hash of the reverse of sep and the 69 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 70 func HashStrRevBytes(sep []byte) (uint32, uint32) { 71 hash := uint32(0) 72 for i := len(sep) - 1; i >= 0; i-- { 73 hash = hash*PrimeRK + uint32(sep[i]) 74 } 75 var pow, sq uint32 = 1, PrimeRK 76 for i := len(sep); i > 0; i >>= 1 { 77 if i&1 != 0 { 78 pow *= sq 79 } 80 sq *= sq 81 } 82 return hash, pow 83 } 84 85 // HashStrRev returns the hash of the reverse of sep and the 86 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 87 func HashStrRev(sep string) (uint32, uint32) { 88 hash := uint32(0) 89 for i := len(sep) - 1; i >= 0; i-- { 90 hash = hash*PrimeRK + uint32(sep[i]) 91 } 92 var pow, sq uint32 = 1, PrimeRK 93 for i := len(sep); i > 0; i >>= 1 { 94 if i&1 != 0 { 95 pow *= sq 96 } 97 sq *= sq 98 } 99 return hash, pow 100 } 101 102 // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the 103 // first occurrence of substr in s, or -1 if not present. 104 func IndexRabinKarpBytes(s, sep []byte) int { 105 // Rabin-Karp search 106 hashsep, pow := HashStrBytes(sep) 107 n := len(sep) 108 var h uint32 109 for i := 0; i < n; i++ { 110 h = h*PrimeRK + uint32(s[i]) 111 } 112 if h == hashsep && Equal(s[:n], sep) { 113 return 0 114 } 115 for i := n; i < len(s); { 116 h *= PrimeRK 117 h += uint32(s[i]) 118 h -= pow * uint32(s[i-n]) 119 i++ 120 if h == hashsep && Equal(s[i-n:i], sep) { 121 return i - n 122 } 123 } 124 return -1 125 } 126 127 // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the 128 // first occurrence of substr in s, or -1 if not present. 129 func IndexRabinKarp(s, substr string) int { 130 // Rabin-Karp search 131 hashss, pow := HashStr(substr) 132 n := len(substr) 133 var h uint32 134 for i := 0; i < n; i++ { 135 h = h*PrimeRK + uint32(s[i]) 136 } 137 if h == hashss && s[:n] == substr { 138 return 0 139 } 140 for i := n; i < len(s); { 141 h *= PrimeRK 142 h += uint32(s[i]) 143 h -= pow * uint32(s[i-n]) 144 i++ 145 if h == hashss && s[i-n:i] == substr { 146 return i - n 147 } 148 } 149 return -1 150 }