github.com/shohhei1126/hugo@v0.42.2-0.20180623210752-3d5928889ad7/related/inverted_index_test.go (about)

     1  // Copyright 2017-present The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package related
    15  
    16  import (
    17  	"fmt"
    18  	"math/rand"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/stretchr/testify/require"
    23  )
    24  
    25  type testDoc struct {
    26  	keywords map[string][]Keyword
    27  	date     time.Time
    28  }
    29  
    30  func (d *testDoc) String() string {
    31  	s := "\n"
    32  	for k, v := range d.keywords {
    33  		s += k + ":\t\t"
    34  		for _, vv := range v {
    35  			s += "  " + vv.String()
    36  		}
    37  		s += "\n"
    38  	}
    39  	return s
    40  }
    41  
    42  func newTestDoc(name string, keywords ...string) *testDoc {
    43  	km := make(map[string][]Keyword)
    44  
    45  	time.Sleep(1 * time.Millisecond)
    46  	kw := &testDoc{keywords: km, date: time.Now()}
    47  
    48  	kw.addKeywords(name, keywords...)
    49  	return kw
    50  }
    51  
    52  func (d *testDoc) addKeywords(name string, keywords ...string) *testDoc {
    53  	keywordm := createTestKeywords(name, keywords...)
    54  
    55  	for k, v := range keywordm {
    56  		keywords := make([]Keyword, len(v))
    57  		for i := 0; i < len(v); i++ {
    58  			keywords[i] = StringKeyword(v[i])
    59  		}
    60  		d.keywords[k] = keywords
    61  	}
    62  	return d
    63  }
    64  
    65  func createTestKeywords(name string, keywords ...string) map[string][]string {
    66  	return map[string][]string{
    67  		name: keywords,
    68  	}
    69  }
    70  
    71  func (d *testDoc) SearchKeywords(cfg IndexConfig) ([]Keyword, error) {
    72  	return d.keywords[cfg.Name], nil
    73  }
    74  
    75  func (d *testDoc) PubDate() time.Time {
    76  	return d.date
    77  }
    78  
    79  func TestSearch(t *testing.T) {
    80  
    81  	config := Config{
    82  		Threshold:    90,
    83  		IncludeNewer: false,
    84  		Indices: IndexConfigs{
    85  			IndexConfig{Name: "tags", Weight: 50},
    86  			IndexConfig{Name: "keywords", Weight: 65},
    87  		},
    88  	}
    89  
    90  	idx := NewInvertedIndex(config)
    91  	//idx.debug = true
    92  
    93  	docs := []Document{
    94  		newTestDoc("tags", "a", "b", "c", "d"),
    95  		newTestDoc("tags", "b", "d", "g"),
    96  		newTestDoc("tags", "b", "h").addKeywords("keywords", "a"),
    97  		newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"),
    98  	}
    99  
   100  	idx.Add(docs...)
   101  
   102  	t.Run("count", func(t *testing.T) {
   103  		assert := require.New(t)
   104  		assert.Len(idx.index, 2)
   105  		set1, found := idx.index["tags"]
   106  		assert.True(found)
   107  		// 6 tags
   108  		assert.Len(set1, 6)
   109  
   110  		set2, found := idx.index["keywords"]
   111  		assert.True(found)
   112  		assert.Len(set2, 2)
   113  
   114  	})
   115  
   116  	t.Run("search-tags", func(t *testing.T) {
   117  		assert := require.New(t)
   118  		m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...))
   119  		assert.NoError(err)
   120  		assert.Len(m, 2)
   121  		assert.Equal(docs[0], m[0])
   122  		assert.Equal(docs[1], m[1])
   123  	})
   124  
   125  	t.Run("search-tags-and-keywords", func(t *testing.T) {
   126  		assert := require.New(t)
   127  		m, err := idx.search(
   128  			newQueryElement("tags", StringsToKeywords("a", "b", "z")...),
   129  			newQueryElement("keywords", StringsToKeywords("a", "b")...))
   130  		assert.NoError(err)
   131  		assert.Len(m, 3)
   132  		assert.Equal(docs[3], m[0])
   133  		assert.Equal(docs[2], m[1])
   134  		assert.Equal(docs[0], m[2])
   135  	})
   136  
   137  	t.Run("searchdoc-all", func(t *testing.T) {
   138  		assert := require.New(t)
   139  		doc := newTestDoc("tags", "a").addKeywords("keywords", "a")
   140  		m, err := idx.SearchDoc(doc)
   141  		assert.NoError(err)
   142  		assert.Len(m, 2)
   143  		assert.Equal(docs[3], m[0])
   144  		assert.Equal(docs[2], m[1])
   145  	})
   146  
   147  	t.Run("searchdoc-tags", func(t *testing.T) {
   148  		assert := require.New(t)
   149  		doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
   150  		m, err := idx.SearchDoc(doc, "tags")
   151  		assert.NoError(err)
   152  		assert.Len(m, 2)
   153  		assert.Equal(docs[0], m[0])
   154  		assert.Equal(docs[1], m[1])
   155  	})
   156  
   157  	t.Run("searchdoc-keywords-date", func(t *testing.T) {
   158  		assert := require.New(t)
   159  		doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
   160  		// This will get a date newer than the others.
   161  		newDoc := newTestDoc("keywords", "a", "b")
   162  		idx.Add(newDoc)
   163  
   164  		m, err := idx.SearchDoc(doc, "keywords")
   165  		assert.NoError(err)
   166  		assert.Len(m, 2)
   167  		assert.Equal(docs[3], m[0])
   168  	})
   169  
   170  }
   171  
   172  func BenchmarkRelatedNewIndex(b *testing.B) {
   173  
   174  	pages := make([]*testDoc, 100)
   175  	numkeywords := 30
   176  	allKeywords := make([]string, numkeywords)
   177  	for i := 0; i < numkeywords; i++ {
   178  		allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
   179  	}
   180  
   181  	for i := 0; i < len(pages); i++ {
   182  		start := rand.Intn(len(allKeywords))
   183  		end := start + 3
   184  		if end >= len(allKeywords) {
   185  			end = start + 1
   186  		}
   187  
   188  		kw := newTestDoc("tags", allKeywords[start:end]...)
   189  		if i%5 == 0 {
   190  			start := rand.Intn(len(allKeywords))
   191  			end := start + 3
   192  			if end >= len(allKeywords) {
   193  				end = start + 1
   194  			}
   195  			kw.addKeywords("keywords", allKeywords[start:end]...)
   196  		}
   197  
   198  		pages[i] = kw
   199  	}
   200  
   201  	cfg := Config{
   202  		Threshold: 50,
   203  		Indices: IndexConfigs{
   204  			IndexConfig{Name: "tags", Weight: 100},
   205  			IndexConfig{Name: "keywords", Weight: 200},
   206  		},
   207  	}
   208  
   209  	b.Run("singles", func(b *testing.B) {
   210  		for i := 0; i < b.N; i++ {
   211  			idx := NewInvertedIndex(cfg)
   212  			for _, doc := range pages {
   213  				idx.Add(doc)
   214  			}
   215  		}
   216  	})
   217  
   218  	b.Run("all", func(b *testing.B) {
   219  		for i := 0; i < b.N; i++ {
   220  			idx := NewInvertedIndex(cfg)
   221  			docs := make([]Document, len(pages))
   222  			for i := 0; i < len(pages); i++ {
   223  				docs[i] = pages[i]
   224  			}
   225  			idx.Add(docs...)
   226  		}
   227  	})
   228  
   229  }
   230  
   231  func BenchmarkRelatedMatchesIn(b *testing.B) {
   232  
   233  	q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...)
   234  	q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...)
   235  
   236  	docs := make([]*testDoc, 1000)
   237  	numkeywords := 20
   238  	allKeywords := make([]string, numkeywords)
   239  	for i := 0; i < numkeywords; i++ {
   240  		allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
   241  	}
   242  
   243  	cfg := Config{
   244  		Threshold: 20,
   245  		Indices: IndexConfigs{
   246  			IndexConfig{Name: "tags", Weight: 100},
   247  			IndexConfig{Name: "keywords", Weight: 200},
   248  		},
   249  	}
   250  
   251  	idx := NewInvertedIndex(cfg)
   252  
   253  	for i := 0; i < len(docs); i++ {
   254  		start := rand.Intn(len(allKeywords))
   255  		end := start + 3
   256  		if end >= len(allKeywords) {
   257  			end = start + 1
   258  		}
   259  
   260  		index := "tags"
   261  		if i%5 == 0 {
   262  			index = "keywords"
   263  		}
   264  
   265  		idx.Add(newTestDoc(index, allKeywords[start:end]...))
   266  	}
   267  
   268  	b.ResetTimer()
   269  	for i := 0; i < b.N; i++ {
   270  		if i%10 == 0 {
   271  			idx.search(q2)
   272  		} else {
   273  			idx.search(q1)
   274  		}
   275  	}
   276  }