github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/aggregator/text_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package aggregator
    13  
    14  import (
    15  	"testing"
    16  
    17  	"github.com/stretchr/testify/assert"
    18  	"github.com/weaviate/weaviate/entities/aggregation"
    19  )
    20  
    21  func TestTextAggregator_TopOccurrencesCalculation(t *testing.T) {
    22  	testCases := []struct {
    23  		name                   string
    24  		texts                  []string
    25  		expectedCount          int
    26  		expectedTopOccurrences []aggregation.TextOccurrence
    27  	}{
    28  		{
    29  			name:          "All texts occurring once",
    30  			texts:         []string{"b_occurs1", "c_occurs1", "g_occurs1", "f_occurs1", "a_occurs1", "d_occurs1", "e_occurs1"},
    31  			expectedCount: 7,
    32  			expectedTopOccurrences: []aggregation.TextOccurrence{
    33  				{Value: "a_occurs1", Occurs: 1},
    34  				{Value: "b_occurs1", Occurs: 1},
    35  				{Value: "c_occurs1", Occurs: 1},
    36  				{Value: "d_occurs1", Occurs: 1},
    37  				{Value: "e_occurs1", Occurs: 1},
    38  			},
    39  		},
    40  		{
    41  			name: "All texts occurring different number of times",
    42  			texts: []string{
    43  				"b_occurs2", "e_occurs5", "d_occurs4", "c_occurs3", "g_occurs7", "e_occurs5", "d_occurs4",
    44  				"f_occurs6", "g_occurs7", "c_occurs3", "b_occurs2", "g_occurs7", "f_occurs6", "g_occurs7", "d_occurs4",
    45  				"a_occurs1", "f_occurs6", "g_occurs7", "g_occurs7", "f_occurs6", "d_occurs4", "e_occurs5", "g_occurs7",
    46  				"c_occurs3", "f_occurs6", "e_occurs5", "f_occurs6", "e_occurs5",
    47  			},
    48  			expectedCount: 28,
    49  			expectedTopOccurrences: []aggregation.TextOccurrence{
    50  				{Value: "g_occurs7", Occurs: 7},
    51  				{Value: "f_occurs6", Occurs: 6},
    52  				{Value: "e_occurs5", Occurs: 5},
    53  				{Value: "d_occurs4", Occurs: 4},
    54  				{Value: "c_occurs3", Occurs: 3},
    55  			},
    56  		},
    57  		{
    58  			name: "Some texts occurring same number of times",
    59  			texts: []string{
    60  				"a_occurs4", "b_occurs3", "g_occurs4", "f_occurs3", "a_occurs4", "e_occurs2", "a_occurs4",
    61  				"c_occurs2", "g_occurs4", "f_occurs3", "b_occurs3", "c_occurs2", "a_occurs4", "f_occurs3", "g_occurs4",
    62  				"b_occurs3", "d_occurs1", "e_occurs2", "g_occurs4",
    63  			},
    64  			expectedCount: 19,
    65  			expectedTopOccurrences: []aggregation.TextOccurrence{
    66  				{Value: "a_occurs4", Occurs: 4},
    67  				{Value: "g_occurs4", Occurs: 4},
    68  				{Value: "b_occurs3", Occurs: 3},
    69  				{Value: "f_occurs3", Occurs: 3},
    70  				{Value: "c_occurs2", Occurs: 2},
    71  			},
    72  		},
    73  		{
    74  			name:          "Fewer texts than limit",
    75  			texts:         []string{"b_occurs3", "d_occurs3", "c_occurs1", "d_occurs3", "b_occurs3", "b_occurs3", "a_occurs1", "d_occurs3"},
    76  			expectedCount: 8,
    77  			expectedTopOccurrences: []aggregation.TextOccurrence{
    78  				{Value: "b_occurs3", Occurs: 3},
    79  				{Value: "d_occurs3", Occurs: 3},
    80  				{Value: "a_occurs1", Occurs: 1},
    81  				{Value: "c_occurs1", Occurs: 1},
    82  			},
    83  		},
    84  	}
    85  
    86  	for _, tc := range testCases {
    87  		t.Run(tc.name, func(t *testing.T) {
    88  			agg := newTextAggregator(5)
    89  			for _, text := range tc.texts {
    90  				agg.AddText(text)
    91  			}
    92  
    93  			res := agg.Res()
    94  			assert.Equal(t, tc.expectedCount, res.Count)
    95  			assert.Equal(t, tc.expectedTopOccurrences, res.Items)
    96  		})
    97  	}
    98  }