github.com/AlexanderZh/ahocorasick@v0.1.8/ahocorasick_test.go (about) 1 package ahocorasick 2 3 import ( 4 "bytes" 5 "math/rand" 6 "reflect" 7 "testing" 8 ) 9 10 func convert(got []*Match) []Match { 11 var converted []Match 12 for _, matchptr := range got { 13 converted = append(converted, *matchptr) 14 } 15 return converted 16 } 17 18 func TestFindAllByteSlice(t *testing.T) { 19 m := compile([][]byte{ 20 []byte("he"), 21 []byte("his"), 22 []byte("hers"), 23 []byte("she")}, 24 ) 25 m.findAll([]byte("ushers")) // => { "she" 1 }, { "he" 2}, { "hers" 2 } 26 tests := []struct { 27 patterns [][]byte 28 expected []Match 29 text []byte 30 }{ 31 { 32 [][]byte{[]byte("na"), []byte("ink"), []byte("ki")}, 33 []Match{{[]byte("ink"), 0}, {[]byte("ki"), 2}}, 34 []byte("inking"), 35 }, 36 { 37 [][]byte{[]byte("ca"), []byte("erica"), []byte("rice")}, 38 []Match{{[]byte("ca"), 3}, {[]byte("erica"), 0}}, 39 []byte("erican"), 40 }, 41 { 42 [][]byte{[]byte("he"), []byte("she"), []byte("his"), []byte("hers")}, 43 []Match{{[]byte("he"), 2}, {[]byte("she"), 1}, {[]byte("hers"), 2}}, 44 []byte("ushers"), 45 }, 46 { 47 [][]byte{[]byte("they"), []byte("their"), []byte("theyre"), []byte("the"), []byte("tea"), []byte("te"), []byte("team"), []byte("go"), []byte("goo"), []byte("good"), []byte("oode")}, 48 []Match{{[]byte("the"), 0}, {[]byte("they"), 0}, {[]byte("theyre"), 0}, {[]byte("go"), 13}, {[]byte("goo"), 13}, {[]byte("good"), 13}, {[]byte("oode"), 14}, {[]byte("te"), 19}, {[]byte("tea"), 19}, {[]byte("team"), 19}}, 49 []byte("theyre not a goode team"), 50 }, 51 { 52 [][]byte{[]byte("a")}, 53 []Match{{[]byte("a"), 0}, {[]byte("a"), 1}, {[]byte("a"), 2}, {[]byte("a"), 5}, {[]byte("a"), 7}, {[]byte("a"), 9}, {[]byte("a"), 11}}, 54 []byte("aaabbabababa"), 55 }, 56 { 57 [][]byte{}, 58 []Match{}, 59 []byte("there is no patterns"), 60 }, 61 { 62 [][]byte{[]byte("锅"), []byte("持有人"), []byte("potholderz"), []byte("MF DOOM")}, 63 []Match{{[]byte("potholderz"), 0}, {[]byte("MF DOOM"), 14}, {[]byte("锅"), 39}, {[]byte("持有人"), 43}}, 64 []byte("potholderz by MF DOOM hot shit aw shit 锅 持有人"), 65 }, 66 } 67 for _, test := range tests { 68 matcher := compile(test.patterns) 69 for i := 0; i < 1000; i++ { //check memory leak 70 b := matcher.Serialize() 71 _, e := Deserialize(b) 72 if e != nil { 73 t.Errorf("error serializer") 74 } 75 } 76 77 got := matcher.findAll(test.text) 78 gotConverted := convert(got) 79 if !(len(got) == 0 && len(test.expected) == 0) && 80 !reflect.DeepEqual(gotConverted, test.expected) { 81 t.Errorf(` 82 Text: %s 83 Expected: %v 84 Got: %v 85 `, test.text, test.expected, gotConverted) 86 } 87 } 88 } 89 90 func TestIncreaseSize(t *testing.T) { 91 m := &Matcher{ 92 []int{5, 0, 0}, 93 []int{0, 0, 0}, 94 []int{0, 0, 0}, 95 [][]SWord{}, 96 } 97 m.increaseSize(1) 98 if !reflect.DeepEqual(m.base, []int{5, 0, 0, -3}) { 99 t.Errorf("Got: %v\n", m.base) 100 } 101 if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -1}) { 102 t.Errorf("Got: %v\n", m.check) 103 } 104 105 m.increaseSize(1) 106 if !reflect.DeepEqual(m.base, []int{5, 0, 0, -4, -3}) { 107 t.Errorf("Got: %v\n", m.base) 108 } 109 if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -1}) { 110 t.Errorf("Got: %v\n", m.check) 111 } 112 113 m.increaseSize(1) 114 if !reflect.DeepEqual(m.base, []int{5, 0, 0, -5, -3, -4}) { 115 t.Errorf("Got: %v\n", m.base) 116 } 117 if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -1}) { 118 t.Errorf("Got: %v\n", m.check) 119 } 120 121 m = &Matcher{ 122 []int{5, 0, 0}, 123 []int{0, 0, 0}, 124 []int{0, 0, 0}, 125 [][]SWord{}, 126 } 127 m.increaseSize(3) 128 if !reflect.DeepEqual(m.base, []int{5, 0, 0, -5, -3, -4}) { 129 t.Errorf("Got: %v\n", m.base) 130 } 131 if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -1}) { 132 t.Errorf("Got: %v\n", m.check) 133 } 134 135 m.increaseSize(3) 136 if !reflect.DeepEqual(m.base, []int{5, 0, 0, -8, -3, -4, -5, -6, -7}) { 137 t.Errorf("Got: %v\n", m.base) 138 } 139 if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -6, -7, -8, -1}) { 140 t.Errorf("Got: %v\n", m.check) 141 } 142 143 m = &Matcher{ 144 []int{0}, 145 []int{0}, 146 []int{0}, 147 [][]SWord{}, 148 } 149 m.increaseSize(5) 150 if !reflect.DeepEqual(m.base, []int{0, -5, -1, -2, -3, -4}) { 151 t.Errorf("Got: %v\n", m.base) 152 } 153 if !reflect.DeepEqual(m.check, []int{-1, -2, -3, -4, -5, -1}) { 154 t.Errorf("Got: %v\n", m.check) 155 } 156 157 m = &Matcher{ 158 []int{-103, -1867}, 159 []int{0, 0}, 160 []int{}, 161 [][]SWord{}, 162 } 163 m.increaseSize(5) 164 if !reflect.DeepEqual(m.base, []int{-103, -1867, -6, -2, -3, -4, -5}) { 165 t.Errorf("Got: %v\n", m.base) 166 } 167 if !reflect.DeepEqual(m.check, []int{-2, 0, -3, -4, -5, -6, -1}) { 168 t.Errorf("Got: %v\n", m.check) 169 } 170 } 171 172 func TestNextFreeState(t *testing.T) { 173 m := &Matcher{ 174 []int{5, 0, 0, -3}, 175 []int{-3, 0, 0, -1}, 176 []int{}, 177 [][]SWord{}, 178 } 179 nextState := m.nextFreeState(3) 180 if nextState != -1 { 181 t.Errorf("Got: %d\n", nextState) 182 } 183 184 m.increaseSize(3) 185 nextState = m.nextFreeState(3) 186 if nextState != 4 { 187 t.Errorf("Got: %d\n", nextState) 188 } 189 } 190 191 func TestOccupyState(t *testing.T) { 192 m := &Matcher{ 193 []int{5, 0, 0, -3}, 194 []int{-3, 0, 0, -1}, 195 []int{}, 196 [][]SWord{}, 197 } 198 m.increaseSize(5) 199 m.occupyState(3, 1) 200 m.occupyState(4, 1) 201 m.occupyState(8, 1) 202 m.occupyState(6, 1) 203 m.occupyState(5, 1) 204 m.occupyState(7, 1) 205 if !reflect.DeepEqual(m.base, []int{5, 0, 0, -1867, -1867, -1867, -1867, -1867, -1867}) { 206 t.Errorf("Got: %v\n", m.base) 207 } 208 if !reflect.DeepEqual(m.check, []int{0, 0, 0, 1, 1, 1, 1, 1, 1}) { 209 t.Errorf("Got: %v\n", m.check) 210 } 211 } 212 213 func TestRandomGen100kNotFound(t *testing.T) { 214 N := 100000 215 L := 128 216 M := 1000000 217 218 words := make([][]byte, N) 219 buffer := make([]byte, M) 220 rand.Read(buffer) 221 222 for i := 0; i < N; i++ { 223 words[i] = make([]byte, L) 224 rand.Read(words[i]) 225 } 226 227 m := CompileByteSlices(words) 228 229 Ms := m.FindAllByteSlice(buffer) 230 if len(Ms) != 0 { 231 t.Errorf("Got %d matches", len(Ms)) 232 } 233 } 234 235 // example of match interface redefining 236 type MatchKey struct { 237 Index int // the start index of the match 238 Key int // key of pattern 239 } 240 241 type MatchesKeys struct { 242 matches []MatchKey 243 } 244 245 func (m *MatchesKeys) Append(pos int, key int) { 246 m.matches = append(m.matches, MatchKey{pos, key}) 247 } 248 249 func (m *MatchesKeys) Count() int { 250 return len(m.matches) 251 } 252 253 254 func initTestByteSlice(N int, L int) [][]byte{ 255 words := make([][]byte, N) 256 for i := 0; i < N; i++ { 257 words[i] = make([]byte, L) 258 rand.Read(words[i]) 259 } 260 return words 261 } 262 263 func TestRandomGen100kNotFoundReader(t *testing.T) { 264 N := 100000 265 L := 128 266 M := 1000000 267 words := initTestByteSlice(N,L) 268 269 buffer := make([]byte, M) 270 rand.Read(buffer) 271 272 273 274 m := CompileByteSlices(words) 275 data := bytes.NewReader(buffer) 276 var Ms Matches 277 Ms = &MatchesKeys{} 278 m.FindAllByteReader(data, Ms) 279 if Ms.Count()!= 0 { 280 t.Errorf("Got %d matches", Ms.Count()) 281 } 282 } 283 284 func TestRandomGen100k1Found(t *testing.T) { 285 N := 100000 286 L := 128 287 M := 1000000 288 289 words := make([][]byte, N) 290 buffer := make([]byte, M) 291 rand.Read(buffer) 292 293 for i := 0; i < N; i++ { 294 words[i] = make([]byte, L) 295 rand.Read(words[i]) 296 } 297 298 m := CompileByteSlices(words) 299 300 idx := rand.Intn(N - 1) 301 buffer2 := append(buffer, words[idx]...) 302 Ms := m.FindAllByteSlice(buffer2) 303 if len(Ms) != 1 { 304 t.Errorf("Got %d matches instead of 1", len(Ms)) 305 } 306 } 307 308 func TestRandomGen100k1FoundReader(t *testing.T) { 309 N := 100000 310 L := 128 311 M := 1000000 312 313 words := make([][]byte, N) 314 buffer := make([]byte, M) 315 rand.Read(buffer) 316 317 for i := 0; i < N; i++ { 318 words[i] = make([]byte, L) 319 rand.Read(words[i]) 320 } 321 322 m := CompileByteSlices(words) 323 324 idx := rand.Intn(N - 1) 325 buffer2 := append(buffer, words[idx]...) 326 var Ms Matches 327 Ms = &MatchesKeys{} 328 m.FindAllByteReader(bytes.NewReader(buffer2),Ms) 329 if Ms.Count() != 1 { 330 t.Errorf("Got %d matches instead of 1", Ms.Count()) 331 } 332 } 333 334 func TestRandomGen100kAllFound(t *testing.T) { 335 N := 100000 336 L := 128 337 338 words := make([][]byte, N) 339 340 for i := 0; i < N; i++ { 341 words[i] = make([]byte, L) 342 rand.Read(words[i]) 343 } 344 345 m := CompileByteSlices(words) 346 347 buffer2 := make([]byte, N*L) 348 for i, w := range words { 349 for j := 0; j < L; j++ { 350 buffer2[i*L+j] = w[j] 351 } 352 } 353 Ms := m.FindAllByteSlice(buffer2) 354 if len(Ms) != N { 355 t.Errorf("Got %d matches instead of %d", len(Ms), N) 356 } 357 } 358 359 func BenchmarkRandomGen100kAllFoundReader(b *testing.B) { 360 N := 100000 361 L := 128 362 363 words := make([][]byte, N) 364 365 for i := 0; i < N; i++ { 366 words[i] = make([]byte, L) 367 rand.Read(words[i]) 368 } 369 370 m := CompileByteSlices(words) 371 372 buffer2 := make([]byte, N*L) 373 for i, w := range words { 374 for j := 0; j < L; j++ { 375 buffer2[i*L+j] = w[j] 376 } 377 } 378 Ms := &MatchesKeys{} 379 m.FindAllByteReader(bytes.NewReader(buffer2),Ms) 380 if Ms.Count() != 1 { 381 b.Errorf("Got %d matches instead of 1", Ms.Count()) 382 } 383 }