github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/stopwords/detector_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package stopwords
    13  
    14  import (
    15  	"testing"
    16  
    17  	"github.com/stretchr/testify/require"
    18  	"github.com/weaviate/weaviate/entities/models"
    19  )
    20  
    21  func TestStopwordDetector(t *testing.T) {
    22  	type testcase struct {
    23  		cfg               models.StopwordConfig
    24  		input             []string
    25  		expectedCountable int
    26  	}
    27  
    28  	runTest := func(t *testing.T, tests []testcase) {
    29  		for _, test := range tests {
    30  			sd, err := NewDetectorFromConfig(test.cfg)
    31  			require.Nil(t, err)
    32  
    33  			var result []string
    34  			for _, word := range test.input {
    35  				if !sd.IsStopword(word) {
    36  					result = append(result, word)
    37  				}
    38  			}
    39  			require.Equal(t, test.expectedCountable, len(result))
    40  		}
    41  	}
    42  
    43  	t.Run("with en preset, additions", func(t *testing.T) {
    44  		tests := []testcase{
    45  			{
    46  				cfg: models.StopwordConfig{
    47  					Preset:    "en",
    48  					Additions: []string{"dog"},
    49  				},
    50  				input:             []string{"dog", "dog", "dog", "dog"},
    51  				expectedCountable: 0,
    52  			},
    53  			{
    54  				cfg: models.StopwordConfig{
    55  					Preset:    "en",
    56  					Additions: []string{"dog"},
    57  				},
    58  				input:             []string{"dog", "dog", "dog", "cat"},
    59  				expectedCountable: 1,
    60  			},
    61  			{
    62  				cfg: models.StopwordConfig{
    63  					Preset:    "en",
    64  					Additions: []string{"dog"},
    65  				},
    66  				input:             []string{"a", "dog", "is", "the", "best"},
    67  				expectedCountable: 1,
    68  			},
    69  		}
    70  
    71  		runTest(t, tests)
    72  	})
    73  
    74  	t.Run("with no preset, additions", func(t *testing.T) {
    75  		tests := []testcase{
    76  			{
    77  				cfg: models.StopwordConfig{
    78  					Preset:    "none",
    79  					Additions: []string{"dog"},
    80  				},
    81  				input:             []string{"a", "dog", "is", "the", "best"},
    82  				expectedCountable: 4,
    83  			},
    84  		}
    85  
    86  		runTest(t, tests)
    87  	})
    88  
    89  	t.Run("with en preset, removals", func(t *testing.T) {
    90  		tests := []testcase{
    91  			{
    92  				cfg: models.StopwordConfig{
    93  					Preset:   "en",
    94  					Removals: []string{"a"},
    95  				},
    96  				input:             []string{"a", "dog", "is", "the", "best"},
    97  				expectedCountable: 3,
    98  			},
    99  			{
   100  				cfg: models.StopwordConfig{
   101  					Preset:   "en",
   102  					Removals: []string{"a", "is", "the"},
   103  				},
   104  				input:             []string{"a", "dog", "is", "the", "best"},
   105  				expectedCountable: 5,
   106  			},
   107  		}
   108  
   109  		runTest(t, tests)
   110  	})
   111  
   112  	t.Run("with en preset, removals", func(t *testing.T) {
   113  		tests := []testcase{
   114  			{
   115  				cfg: models.StopwordConfig{
   116  					Preset:   "en",
   117  					Removals: []string{"a"},
   118  				},
   119  				input:             []string{"a", "dog", "is", "the", "best"},
   120  				expectedCountable: 3,
   121  			},
   122  			{
   123  				cfg: models.StopwordConfig{
   124  					Preset:   "en",
   125  					Removals: []string{"a", "is", "the"},
   126  				},
   127  				input:             []string{"a", "dog", "is", "the", "best"},
   128  				expectedCountable: 5,
   129  			},
   130  		}
   131  
   132  		runTest(t, tests)
   133  	})
   134  
   135  	t.Run("with en preset, additions, removals", func(t *testing.T) {
   136  		tests := []testcase{
   137  			{
   138  				cfg: models.StopwordConfig{
   139  					Preset:    "en",
   140  					Additions: []string{"dog"},
   141  					Removals:  []string{"a"},
   142  				},
   143  				input:             []string{"a", "dog", "is", "the", "best"},
   144  				expectedCountable: 2,
   145  			},
   146  			{
   147  				cfg: models.StopwordConfig{
   148  					Preset:    "en",
   149  					Additions: []string{"dog", "best"},
   150  					Removals:  []string{"a", "the", "is"},
   151  				},
   152  				input:             []string{"a", "dog", "is", "the", "best"},
   153  				expectedCountable: 3,
   154  			},
   155  		}
   156  
   157  		runTest(t, tests)
   158  	})
   159  }