github.com/gohugoio/hugo@v0.88.1/related/inverted_index_test.go (about)

     1  // Copyright 2019 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package related
    15  
    16  import (
    17  	"fmt"
    18  	"math/rand"
    19  	"testing"
    20  	"time"
    21  
    22  	qt "github.com/frankban/quicktest"
    23  )
    24  
    25  type testDoc struct {
    26  	keywords map[string][]Keyword
    27  	date     time.Time
    28  	name     string
    29  }
    30  
    31  func (d *testDoc) String() string {
    32  	s := "\n"
    33  	for k, v := range d.keywords {
    34  		s += k + ":\t\t"
    35  		for _, vv := range v {
    36  			s += "  " + vv.String()
    37  		}
    38  		s += "\n"
    39  	}
    40  	return s
    41  }
    42  
    43  func (d *testDoc) Name() string {
    44  	return d.name
    45  }
    46  
    47  func newTestDoc(name string, keywords ...string) *testDoc {
    48  	time.Sleep(1 * time.Millisecond)
    49  	return newTestDocWithDate(name, time.Now(), keywords...)
    50  }
    51  
    52  func newTestDocWithDate(name string, date time.Time, keywords ...string) *testDoc {
    53  	km := make(map[string][]Keyword)
    54  
    55  	kw := &testDoc{keywords: km, date: date}
    56  
    57  	kw.addKeywords(name, keywords...)
    58  	return kw
    59  }
    60  
    61  func (d *testDoc) addKeywords(name string, keywords ...string) *testDoc {
    62  	keywordm := createTestKeywords(name, keywords...)
    63  
    64  	for k, v := range keywordm {
    65  		keywords := make([]Keyword, len(v))
    66  		for i := 0; i < len(v); i++ {
    67  			keywords[i] = StringKeyword(v[i])
    68  		}
    69  		d.keywords[k] = keywords
    70  	}
    71  	return d
    72  }
    73  
    74  func createTestKeywords(name string, keywords ...string) map[string][]string {
    75  	return map[string][]string{
    76  		name: keywords,
    77  	}
    78  }
    79  
    80  func (d *testDoc) RelatedKeywords(cfg IndexConfig) ([]Keyword, error) {
    81  	return d.keywords[cfg.Name], nil
    82  }
    83  
    84  func (d *testDoc) PublishDate() time.Time {
    85  	return d.date
    86  }
    87  
    88  func TestSearch(t *testing.T) {
    89  	config := Config{
    90  		Threshold:    90,
    91  		IncludeNewer: false,
    92  		Indices: IndexConfigs{
    93  			IndexConfig{Name: "tags", Weight: 50},
    94  			IndexConfig{Name: "keywords", Weight: 65},
    95  		},
    96  	}
    97  
    98  	idx := NewInvertedIndex(config)
    99  	// idx.debug = true
   100  
   101  	docs := []Document{
   102  		newTestDoc("tags", "a", "b", "c", "d"),
   103  		newTestDoc("tags", "b", "d", "g"),
   104  		newTestDoc("tags", "b", "h").addKeywords("keywords", "a"),
   105  		newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"),
   106  	}
   107  
   108  	idx.Add(docs...)
   109  
   110  	t.Run("count", func(t *testing.T) {
   111  		c := qt.New(t)
   112  		c.Assert(len(idx.index), qt.Equals, 2)
   113  		set1, found := idx.index["tags"]
   114  		c.Assert(found, qt.Equals, true)
   115  		// 6 tags
   116  		c.Assert(len(set1), qt.Equals, 6)
   117  
   118  		set2, found := idx.index["keywords"]
   119  		c.Assert(found, qt.Equals, true)
   120  		c.Assert(len(set2), qt.Equals, 2)
   121  	})
   122  
   123  	t.Run("search-tags", func(t *testing.T) {
   124  		c := qt.New(t)
   125  		m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...))
   126  		c.Assert(err, qt.IsNil)
   127  		c.Assert(len(m), qt.Equals, 2)
   128  		c.Assert(m[0], qt.Equals, docs[0])
   129  		c.Assert(m[1], qt.Equals, docs[1])
   130  	})
   131  
   132  	t.Run("search-tags-and-keywords", func(t *testing.T) {
   133  		c := qt.New(t)
   134  		m, err := idx.search(
   135  			newQueryElement("tags", StringsToKeywords("a", "b", "z")...),
   136  			newQueryElement("keywords", StringsToKeywords("a", "b")...))
   137  		c.Assert(err, qt.IsNil)
   138  		c.Assert(len(m), qt.Equals, 3)
   139  		c.Assert(m[0], qt.Equals, docs[3])
   140  		c.Assert(m[1], qt.Equals, docs[2])
   141  		c.Assert(m[2], qt.Equals, docs[0])
   142  	})
   143  
   144  	t.Run("searchdoc-all", func(t *testing.T) {
   145  		c := qt.New(t)
   146  		doc := newTestDoc("tags", "a").addKeywords("keywords", "a")
   147  		m, err := idx.SearchDoc(doc)
   148  		c.Assert(err, qt.IsNil)
   149  		c.Assert(len(m), qt.Equals, 2)
   150  		c.Assert(m[0], qt.Equals, docs[3])
   151  		c.Assert(m[1], qt.Equals, docs[2])
   152  	})
   153  
   154  	t.Run("searchdoc-tags", func(t *testing.T) {
   155  		c := qt.New(t)
   156  		doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
   157  		m, err := idx.SearchDoc(doc, "tags")
   158  		c.Assert(err, qt.IsNil)
   159  		c.Assert(len(m), qt.Equals, 2)
   160  		c.Assert(m[0], qt.Equals, docs[0])
   161  		c.Assert(m[1], qt.Equals, docs[1])
   162  	})
   163  
   164  	t.Run("searchdoc-keywords-date", func(t *testing.T) {
   165  		c := qt.New(t)
   166  		doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
   167  		// This will get a date newer than the others.
   168  		newDoc := newTestDoc("keywords", "a", "b")
   169  		idx.Add(newDoc)
   170  
   171  		m, err := idx.SearchDoc(doc, "keywords")
   172  		c.Assert(err, qt.IsNil)
   173  		c.Assert(len(m), qt.Equals, 2)
   174  		c.Assert(m[0], qt.Equals, docs[3])
   175  	})
   176  
   177  	t.Run("searchdoc-keywords-same-date", func(t *testing.T) {
   178  		c := qt.New(t)
   179  		idx := NewInvertedIndex(config)
   180  
   181  		date := time.Now()
   182  
   183  		doc := newTestDocWithDate("keywords", date, "a", "b")
   184  		doc.name = "thedoc"
   185  
   186  		for i := 0; i < 10; i++ {
   187  			docc := *doc
   188  			docc.name = fmt.Sprintf("doc%d", i)
   189  			idx.Add(&docc)
   190  		}
   191  
   192  		m, err := idx.SearchDoc(doc, "keywords")
   193  		c.Assert(err, qt.IsNil)
   194  		c.Assert(len(m), qt.Equals, 10)
   195  		for i := 0; i < 10; i++ {
   196  			c.Assert(m[i].Name(), qt.Equals, fmt.Sprintf("doc%d", i))
   197  		}
   198  	})
   199  }
   200  
   201  func TestToKeywordsToLower(t *testing.T) {
   202  	c := qt.New(t)
   203  	slice := []string{"A", "B", "C"}
   204  	config := IndexConfig{ToLower: true}
   205  	keywords, err := config.ToKeywords(slice)
   206  	c.Assert(err, qt.IsNil)
   207  	c.Assert(slice, qt.DeepEquals, []string{"A", "B", "C"})
   208  	c.Assert(keywords, qt.DeepEquals, []Keyword{
   209  		StringKeyword("a"),
   210  		StringKeyword("b"),
   211  		StringKeyword("c"),
   212  	})
   213  }
   214  
   215  func BenchmarkRelatedNewIndex(b *testing.B) {
   216  	pages := make([]*testDoc, 100)
   217  	numkeywords := 30
   218  	allKeywords := make([]string, numkeywords)
   219  	for i := 0; i < numkeywords; i++ {
   220  		allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
   221  	}
   222  
   223  	for i := 0; i < len(pages); i++ {
   224  		start := rand.Intn(len(allKeywords))
   225  		end := start + 3
   226  		if end >= len(allKeywords) {
   227  			end = start + 1
   228  		}
   229  
   230  		kw := newTestDoc("tags", allKeywords[start:end]...)
   231  		if i%5 == 0 {
   232  			start := rand.Intn(len(allKeywords))
   233  			end := start + 3
   234  			if end >= len(allKeywords) {
   235  				end = start + 1
   236  			}
   237  			kw.addKeywords("keywords", allKeywords[start:end]...)
   238  		}
   239  
   240  		pages[i] = kw
   241  	}
   242  
   243  	cfg := Config{
   244  		Threshold: 50,
   245  		Indices: IndexConfigs{
   246  			IndexConfig{Name: "tags", Weight: 100},
   247  			IndexConfig{Name: "keywords", Weight: 200},
   248  		},
   249  	}
   250  
   251  	b.Run("singles", func(b *testing.B) {
   252  		for i := 0; i < b.N; i++ {
   253  			idx := NewInvertedIndex(cfg)
   254  			for _, doc := range pages {
   255  				idx.Add(doc)
   256  			}
   257  		}
   258  	})
   259  
   260  	b.Run("all", func(b *testing.B) {
   261  		for i := 0; i < b.N; i++ {
   262  			idx := NewInvertedIndex(cfg)
   263  			docs := make([]Document, len(pages))
   264  			for i := 0; i < len(pages); i++ {
   265  				docs[i] = pages[i]
   266  			}
   267  			idx.Add(docs...)
   268  		}
   269  	})
   270  }
   271  
   272  func BenchmarkRelatedMatchesIn(b *testing.B) {
   273  	q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...)
   274  	q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...)
   275  
   276  	docs := make([]*testDoc, 1000)
   277  	numkeywords := 20
   278  	allKeywords := make([]string, numkeywords)
   279  	for i := 0; i < numkeywords; i++ {
   280  		allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
   281  	}
   282  
   283  	cfg := Config{
   284  		Threshold: 20,
   285  		Indices: IndexConfigs{
   286  			IndexConfig{Name: "tags", Weight: 100},
   287  			IndexConfig{Name: "keywords", Weight: 200},
   288  		},
   289  	}
   290  
   291  	idx := NewInvertedIndex(cfg)
   292  
   293  	for i := 0; i < len(docs); i++ {
   294  		start := rand.Intn(len(allKeywords))
   295  		end := start + 3
   296  		if end >= len(allKeywords) {
   297  			end = start + 1
   298  		}
   299  
   300  		index := "tags"
   301  		if i%5 == 0 {
   302  			index = "keywords"
   303  		}
   304  
   305  		idx.Add(newTestDoc(index, allKeywords[start:end]...))
   306  	}
   307  
   308  	b.ResetTimer()
   309  	for i := 0; i < b.N; i++ {
   310  		if i%10 == 0 {
   311  			idx.search(q2)
   312  		} else {
   313  			idx.search(q1)
   314  		}
   315  	}
   316  }