github.com/AlexanderZh/ahocorasick@v0.1.8/ahocorasick_test.go (about)

     1  package ahocorasick
     2  
     3  import (
     4  	"bytes"
     5  	"math/rand"
     6  	"reflect"
     7  	"testing"
     8  )
     9  
    10  func convert(got []*Match) []Match {
    11  	var converted []Match
    12  	for _, matchptr := range got {
    13  		converted = append(converted, *matchptr)
    14  	}
    15  	return converted
    16  }
    17  
    18  func TestFindAllByteSlice(t *testing.T) {
    19  	m := compile([][]byte{
    20  		[]byte("he"),
    21  		[]byte("his"),
    22  		[]byte("hers"),
    23  		[]byte("she")},
    24  	)
    25  	m.findAll([]byte("ushers")) // => { "she" 1 }, { "he" 2}, { "hers" 2 }
    26  	tests := []struct {
    27  		patterns [][]byte
    28  		expected []Match
    29  		text     []byte
    30  	}{
    31  		{
    32  			[][]byte{[]byte("na"), []byte("ink"), []byte("ki")},
    33  			[]Match{{[]byte("ink"), 0}, {[]byte("ki"), 2}},
    34  			[]byte("inking"),
    35  		},
    36  		{
    37  			[][]byte{[]byte("ca"), []byte("erica"), []byte("rice")},
    38  			[]Match{{[]byte("ca"), 3}, {[]byte("erica"), 0}},
    39  			[]byte("erican"),
    40  		},
    41  		{
    42  			[][]byte{[]byte("he"), []byte("she"), []byte("his"), []byte("hers")},
    43  			[]Match{{[]byte("he"), 2}, {[]byte("she"), 1}, {[]byte("hers"), 2}},
    44  			[]byte("ushers"),
    45  		},
    46  		{
    47  			[][]byte{[]byte("they"), []byte("their"), []byte("theyre"), []byte("the"), []byte("tea"), []byte("te"), []byte("team"), []byte("go"), []byte("goo"), []byte("good"), []byte("oode")},
    48  			[]Match{{[]byte("the"), 0}, {[]byte("they"), 0}, {[]byte("theyre"), 0}, {[]byte("go"), 13}, {[]byte("goo"), 13}, {[]byte("good"), 13}, {[]byte("oode"), 14}, {[]byte("te"), 19}, {[]byte("tea"), 19}, {[]byte("team"), 19}},
    49  			[]byte("theyre not a goode team"),
    50  		},
    51  		{
    52  			[][]byte{[]byte("a")},
    53  			[]Match{{[]byte("a"), 0}, {[]byte("a"), 1}, {[]byte("a"), 2}, {[]byte("a"), 5}, {[]byte("a"), 7}, {[]byte("a"), 9}, {[]byte("a"), 11}},
    54  			[]byte("aaabbabababa"),
    55  		},
    56  		{
    57  			[][]byte{},
    58  			[]Match{},
    59  			[]byte("there is no patterns"),
    60  		},
    61  		{
    62  			[][]byte{[]byte("锅"), []byte("持有人"), []byte("potholderz"), []byte("MF DOOM")},
    63  			[]Match{{[]byte("potholderz"), 0}, {[]byte("MF DOOM"), 14}, {[]byte("锅"), 39}, {[]byte("持有人"), 43}},
    64  			[]byte("potholderz by MF DOOM hot shit aw shit 锅 持有人"),
    65  		},
    66  	}
    67  	for _, test := range tests {
    68  		matcher := compile(test.patterns)
    69  		for i := 0; i < 1000; i++ { //check memory leak
    70  			b := matcher.Serialize()
    71  			_, e := Deserialize(b)
    72  			if e != nil {
    73  				t.Errorf("error serializer")
    74  			}
    75  		}
    76  
    77  		got := matcher.findAll(test.text)
    78  		gotConverted := convert(got)
    79  		if !(len(got) == 0 && len(test.expected) == 0) &&
    80  			!reflect.DeepEqual(gotConverted, test.expected) {
    81  			t.Errorf(`
    82          Text:     %s
    83  		Expected: %v
    84  		Got:      %v
    85  		`, test.text, test.expected, gotConverted)
    86  		}
    87  	}
    88  }
    89  
    90  func TestIncreaseSize(t *testing.T) {
    91  	m := &Matcher{
    92  		[]int{5, 0, 0},
    93  		[]int{0, 0, 0},
    94  		[]int{0, 0, 0},
    95  		[][]SWord{},
    96  	}
    97  	m.increaseSize(1)
    98  	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -3}) {
    99  		t.Errorf("Got: %v\n", m.base)
   100  	}
   101  	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -1}) {
   102  		t.Errorf("Got: %v\n", m.check)
   103  	}
   104  
   105  	m.increaseSize(1)
   106  	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -4, -3}) {
   107  		t.Errorf("Got: %v\n", m.base)
   108  	}
   109  	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -1}) {
   110  		t.Errorf("Got: %v\n", m.check)
   111  	}
   112  
   113  	m.increaseSize(1)
   114  	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -5, -3, -4}) {
   115  		t.Errorf("Got: %v\n", m.base)
   116  	}
   117  	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -1}) {
   118  		t.Errorf("Got: %v\n", m.check)
   119  	}
   120  
   121  	m = &Matcher{
   122  		[]int{5, 0, 0},
   123  		[]int{0, 0, 0},
   124  		[]int{0, 0, 0},
   125  		[][]SWord{},
   126  	}
   127  	m.increaseSize(3)
   128  	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -5, -3, -4}) {
   129  		t.Errorf("Got: %v\n", m.base)
   130  	}
   131  	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -1}) {
   132  		t.Errorf("Got: %v\n", m.check)
   133  	}
   134  
   135  	m.increaseSize(3)
   136  	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -8, -3, -4, -5, -6, -7}) {
   137  		t.Errorf("Got: %v\n", m.base)
   138  	}
   139  	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -6, -7, -8, -1}) {
   140  		t.Errorf("Got: %v\n", m.check)
   141  	}
   142  
   143  	m = &Matcher{
   144  		[]int{0},
   145  		[]int{0},
   146  		[]int{0},
   147  		[][]SWord{},
   148  	}
   149  	m.increaseSize(5)
   150  	if !reflect.DeepEqual(m.base, []int{0, -5, -1, -2, -3, -4}) {
   151  		t.Errorf("Got: %v\n", m.base)
   152  	}
   153  	if !reflect.DeepEqual(m.check, []int{-1, -2, -3, -4, -5, -1}) {
   154  		t.Errorf("Got: %v\n", m.check)
   155  	}
   156  
   157  	m = &Matcher{
   158  		[]int{-103, -1867},
   159  		[]int{0, 0},
   160  		[]int{},
   161  		[][]SWord{},
   162  	}
   163  	m.increaseSize(5)
   164  	if !reflect.DeepEqual(m.base, []int{-103, -1867, -6, -2, -3, -4, -5}) {
   165  		t.Errorf("Got: %v\n", m.base)
   166  	}
   167  	if !reflect.DeepEqual(m.check, []int{-2, 0, -3, -4, -5, -6, -1}) {
   168  		t.Errorf("Got: %v\n", m.check)
   169  	}
   170  }
   171  
   172  func TestNextFreeState(t *testing.T) {
   173  	m := &Matcher{
   174  		[]int{5, 0, 0, -3},
   175  		[]int{-3, 0, 0, -1},
   176  		[]int{},
   177  		[][]SWord{},
   178  	}
   179  	nextState := m.nextFreeState(3)
   180  	if nextState != -1 {
   181  		t.Errorf("Got: %d\n", nextState)
   182  	}
   183  
   184  	m.increaseSize(3)
   185  	nextState = m.nextFreeState(3)
   186  	if nextState != 4 {
   187  		t.Errorf("Got: %d\n", nextState)
   188  	}
   189  }
   190  
   191  func TestOccupyState(t *testing.T) {
   192  	m := &Matcher{
   193  		[]int{5, 0, 0, -3},
   194  		[]int{-3, 0, 0, -1},
   195  		[]int{},
   196  		[][]SWord{},
   197  	}
   198  	m.increaseSize(5)
   199  	m.occupyState(3, 1)
   200  	m.occupyState(4, 1)
   201  	m.occupyState(8, 1)
   202  	m.occupyState(6, 1)
   203  	m.occupyState(5, 1)
   204  	m.occupyState(7, 1)
   205  	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -1867, -1867, -1867, -1867, -1867, -1867}) {
   206  		t.Errorf("Got: %v\n", m.base)
   207  	}
   208  	if !reflect.DeepEqual(m.check, []int{0, 0, 0, 1, 1, 1, 1, 1, 1}) {
   209  		t.Errorf("Got: %v\n", m.check)
   210  	}
   211  }
   212  
   213  func TestRandomGen100kNotFound(t *testing.T) {
   214  	N := 100000
   215  	L := 128
   216  	M := 1000000
   217  
   218  	words := make([][]byte, N)
   219  	buffer := make([]byte, M)
   220  	rand.Read(buffer)
   221  
   222  	for i := 0; i < N; i++ {
   223  		words[i] = make([]byte, L)
   224  		rand.Read(words[i])
   225  	}
   226  
   227  	m := CompileByteSlices(words)
   228  
   229  	Ms := m.FindAllByteSlice(buffer)
   230  	if len(Ms) != 0 {
   231  		t.Errorf("Got %d matches", len(Ms))
   232  	}
   233  }
   234  
   235  // example of match interface redefining
   236  type MatchKey struct {
   237  	Index int // the start index of the match
   238  	Key   int // key of pattern
   239  }
   240  
   241  type MatchesKeys struct {
   242  	matches []MatchKey
   243  }
   244  
   245  func (m *MatchesKeys) Append(pos int, key int) {
   246  	m.matches = append(m.matches, MatchKey{pos, key})
   247  }
   248  
   249  func (m *MatchesKeys) Count() int {
   250  	return len(m.matches)
   251  }
   252  
   253  
   254  func initTestByteSlice(N int, L int) [][]byte{
   255  	words := make([][]byte, N)
   256  	for i := 0; i < N; i++ {
   257  		words[i] = make([]byte, L)
   258  		rand.Read(words[i])
   259  	}
   260  	return words
   261  }
   262  
   263  func TestRandomGen100kNotFoundReader(t *testing.T) {
   264  	N := 100000
   265  	L := 128
   266  	M := 1000000
   267  	words := initTestByteSlice(N,L)
   268  	
   269  	buffer := make([]byte, M)
   270  	rand.Read(buffer)
   271  
   272  	
   273  
   274  	m := CompileByteSlices(words)
   275  	data := bytes.NewReader(buffer)
   276  	var Ms Matches
   277  	Ms = &MatchesKeys{}
   278  	m.FindAllByteReader(data, Ms)
   279  	if  Ms.Count()!= 0 {
   280  		t.Errorf("Got %d matches", Ms.Count())
   281  	}
   282  }
   283  
   284  func TestRandomGen100k1Found(t *testing.T) {
   285  	N := 100000
   286  	L := 128
   287  	M := 1000000
   288  
   289  	words := make([][]byte, N)
   290  	buffer := make([]byte, M)
   291  	rand.Read(buffer)
   292  
   293  	for i := 0; i < N; i++ {
   294  		words[i] = make([]byte, L)
   295  		rand.Read(words[i])
   296  	}
   297  
   298  	m := CompileByteSlices(words)
   299  
   300  	idx := rand.Intn(N - 1)
   301  	buffer2 := append(buffer, words[idx]...)
   302  	Ms := m.FindAllByteSlice(buffer2)
   303  	if len(Ms) != 1 {
   304  		t.Errorf("Got %d matches instead of 1", len(Ms))
   305  	}
   306  }
   307  
   308  func TestRandomGen100k1FoundReader(t *testing.T) {
   309  	N := 100000
   310  	L := 128
   311  	M := 1000000
   312  
   313  	words := make([][]byte, N)
   314  	buffer := make([]byte, M)
   315  	rand.Read(buffer)
   316  
   317  	for i := 0; i < N; i++ {
   318  		words[i] = make([]byte, L)
   319  		rand.Read(words[i])
   320  	}
   321  
   322  	m := CompileByteSlices(words)
   323  
   324  	idx := rand.Intn(N - 1)
   325  	buffer2 := append(buffer, words[idx]...)
   326  	var Ms Matches
   327  	Ms = &MatchesKeys{}
   328  	m.FindAllByteReader(bytes.NewReader(buffer2),Ms)
   329  	if Ms.Count() != 1 {
   330  		t.Errorf("Got %d matches instead of 1", Ms.Count())
   331  	}
   332  }
   333  
   334  func TestRandomGen100kAllFound(t *testing.T) {
   335  	N := 100000
   336  	L := 128
   337  
   338  	words := make([][]byte, N)
   339  
   340  	for i := 0; i < N; i++ {
   341  		words[i] = make([]byte, L)
   342  		rand.Read(words[i])
   343  	}
   344  
   345  	m := CompileByteSlices(words)
   346  
   347  	buffer2 := make([]byte, N*L)
   348  	for i, w := range words {
   349  		for j := 0; j < L; j++ {
   350  			buffer2[i*L+j] = w[j]
   351  		}
   352  	}
   353  	Ms := m.FindAllByteSlice(buffer2)
   354  	if len(Ms) != N {
   355  		t.Errorf("Got %d matches instead of %d", len(Ms), N)
   356  	}
   357  }
   358  
   359  func BenchmarkRandomGen100kAllFoundReader(b *testing.B) {
   360  	N := 100000
   361  	L := 128
   362  
   363  	words := make([][]byte, N)
   364  
   365  	for i := 0; i < N; i++ {
   366  		words[i] = make([]byte, L)
   367  		rand.Read(words[i])
   368  	}
   369  
   370  	m := CompileByteSlices(words)
   371  
   372  	buffer2 := make([]byte, N*L)
   373  	for i, w := range words {
   374  		for j := 0; j < L; j++ {
   375  			buffer2[i*L+j] = w[j]
   376  		}
   377  	}
   378  	Ms := &MatchesKeys{}
   379  	m.FindAllByteReader(bytes.NewReader(buffer2),Ms)
   380  	if Ms.Count() != 1 {
   381  		b.Errorf("Got %d matches instead of 1", Ms.Count())
   382  	}
   383  }