github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/internal/bytealg/bytealg.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bytealg 6 7 import ( 8 "internal/cpu" 9 "unsafe" 10 ) 11 12 // Offsets into internal/cpu records for use in assembly. 13 const ( 14 offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2) 15 offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42) 16 offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2) 17 offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT) 18 19 offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX) 20 ) 21 22 // MaxLen is the maximum length of the string to be searched for (argument b) in Index. 23 // If MaxLen is not 0, make sure MaxLen >= 4. 24 var MaxLen int 25 26 // FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev, 27 // IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate 28 // three of them without causing allocation? 29 30 // PrimeRK is the prime base used in Rabin-Karp algorithm. 31 const PrimeRK = 16777619 32 33 // HashStrBytes returns the hash and the appropriate multiplicative 34 // factor for use in Rabin-Karp algorithm. 35 func HashStrBytes(sep []byte) (uint32, uint32) { 36 hash := uint32(0) 37 for i := 0; i < len(sep); i++ { 38 hash = hash*PrimeRK + uint32(sep[i]) 39 } 40 var pow, sq uint32 = 1, PrimeRK 41 for i := len(sep); i > 0; i >>= 1 { 42 if i&1 != 0 { 43 pow *= sq 44 } 45 sq *= sq 46 } 47 return hash, pow 48 } 49 50 // HashStr returns the hash and the appropriate multiplicative 51 // factor for use in Rabin-Karp algorithm. 52 func HashStr(sep string) (uint32, uint32) { 53 hash := uint32(0) 54 for i := 0; i < len(sep); i++ { 55 hash = hash*PrimeRK + uint32(sep[i]) 56 } 57 var pow, sq uint32 = 1, PrimeRK 58 for i := len(sep); i > 0; i >>= 1 { 59 if i&1 != 0 { 60 pow *= sq 61 } 62 sq *= sq 63 } 64 return hash, pow 65 } 66 67 // HashStrRevBytes returns the hash of the reverse of sep and the 68 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 69 func HashStrRevBytes(sep []byte) (uint32, uint32) { 70 hash := uint32(0) 71 for i := len(sep) - 1; i >= 0; i-- { 72 hash = hash*PrimeRK + uint32(sep[i]) 73 } 74 var pow, sq uint32 = 1, PrimeRK 75 for i := len(sep); i > 0; i >>= 1 { 76 if i&1 != 0 { 77 pow *= sq 78 } 79 sq *= sq 80 } 81 return hash, pow 82 } 83 84 // HashStrRev returns the hash of the reverse of sep and the 85 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 86 func HashStrRev(sep string) (uint32, uint32) { 87 hash := uint32(0) 88 for i := len(sep) - 1; i >= 0; i-- { 89 hash = hash*PrimeRK + uint32(sep[i]) 90 } 91 var pow, sq uint32 = 1, PrimeRK 92 for i := len(sep); i > 0; i >>= 1 { 93 if i&1 != 0 { 94 pow *= sq 95 } 96 sq *= sq 97 } 98 return hash, pow 99 } 100 101 // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the 102 // first occurrence of substr in s, or -1 if not present. 103 func IndexRabinKarpBytes(s, sep []byte) int { 104 // Rabin-Karp search 105 hashsep, pow := HashStrBytes(sep) 106 n := len(sep) 107 var h uint32 108 for i := 0; i < n; i++ { 109 h = h*PrimeRK + uint32(s[i]) 110 } 111 if h == hashsep && Equal(s[:n], sep) { 112 return 0 113 } 114 for i := n; i < len(s); { 115 h *= PrimeRK 116 h += uint32(s[i]) 117 h -= pow * uint32(s[i-n]) 118 i++ 119 if h == hashsep && Equal(s[i-n:i], sep) { 120 return i - n 121 } 122 } 123 return -1 124 } 125 126 // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the 127 // first occurrence of substr in s, or -1 if not present. 128 func IndexRabinKarp(s, substr string) int { 129 // Rabin-Karp search 130 hashss, pow := HashStr(substr) 131 n := len(substr) 132 var h uint32 133 for i := 0; i < n; i++ { 134 h = h*PrimeRK + uint32(s[i]) 135 } 136 if h == hashss && s[:n] == substr { 137 return 0 138 } 139 for i := n; i < len(s); { 140 h *= PrimeRK 141 h += uint32(s[i]) 142 h -= pow * uint32(s[i-n]) 143 i++ 144 if h == hashss && s[i-n:i] == substr { 145 return i - n 146 } 147 } 148 return -1 149 }