github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/aggregator/shard_combiner_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package aggregator
    13  
    14  import (
    15  	"math/rand"
    16  	"testing"
    17  
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/weaviate/weaviate/entities/aggregation"
    20  )
    21  
    22  const (
    23  	YearMonthDayHourMinute = "2022-06-16T18:30:"
    24  	NanoSecondsTimeZone    = ".451235Z"
    25  )
    26  
    27  type TestStructDates struct {
    28  	name            string
    29  	dates1          []string
    30  	dates2          []string
    31  	expectedMedian  string
    32  	expectedMaximum string
    33  	expectedMode    string
    34  	expectedMinimum string
    35  }
    36  
    37  func TestShardCombinerMergeDates(t *testing.T) {
    38  	tests := []TestStructDates{
    39  		{
    40  			name:            "Many values",
    41  			dates1:          []string{"55", "26", "10"},
    42  			dates2:          []string{"15", "26", "45", "26"},
    43  			expectedMaximum: "55",
    44  			expectedMinimum: "10",
    45  			expectedMedian:  "26",
    46  			expectedMode:    "26",
    47  		},
    48  		{
    49  			name:            "Struct with single element",
    50  			dates1:          []string{"45"},
    51  			dates2:          []string{"00", "26", "45", "27"},
    52  			expectedMaximum: "45",
    53  			expectedMinimum: "00",
    54  			expectedMedian:  "27",
    55  			expectedMode:    "45",
    56  		},
    57  	}
    58  	for _, tt := range tests {
    59  		t.Run(tt.name, func(t *testing.T) {
    60  			testDates(t, tt.dates1, tt.dates2, tt)
    61  			testDates(t, tt.dates2, tt.dates1, tt)
    62  		})
    63  	}
    64  }
    65  
    66  func testDates(t *testing.T, dates1, dates2 []string, tt TestStructDates) {
    67  	sc := NewShardCombiner()
    68  	dateMap1 := createDateAgg(dates1)
    69  	dateMap2 := createDateAgg(dates2)
    70  
    71  	sc.mergeDateProp(dateMap1, dateMap2)
    72  	sc.finalizeDateProp(dateMap1)
    73  	assert.Equal(t, YearMonthDayHourMinute+tt.expectedMinimum+NanoSecondsTimeZone, dateMap1["minimum"])
    74  	assert.Equal(t, YearMonthDayHourMinute+tt.expectedMaximum+NanoSecondsTimeZone, dateMap1["maximum"])
    75  	assert.Equal(t, YearMonthDayHourMinute+tt.expectedMedian+NanoSecondsTimeZone, dateMap1["median"])
    76  	assert.Equal(t, int64(len(tt.dates1)+len(tt.dates2)), dateMap1["count"])
    77  	assert.Equal(t, YearMonthDayHourMinute+tt.expectedMode+NanoSecondsTimeZone, dateMap1["mode"])
    78  }
    79  
    80  func createDateAgg(dates []string) map[string]interface{} {
    81  	agg := newDateAggregator()
    82  	for _, date := range dates {
    83  		agg.AddTimestamp(YearMonthDayHourMinute + date + NanoSecondsTimeZone)
    84  	}
    85  	agg.buildPairsFromCounts() // needed to populate all required info
    86  
    87  	prop := aggregation.Property{}
    88  	aggs := []aggregation.Aggregator{aggregation.MedianAggregator, aggregation.MinimumAggregator, aggregation.MaximumAggregator, aggregation.CountAggregator, aggregation.ModeAggregator}
    89  	addDateAggregations(&prop, aggs, agg)
    90  	return prop.DateAggregations
    91  }
    92  
    93  type TestStructNumbers struct {
    94  	name     string
    95  	numbers1 []float64
    96  	numbers2 []float64
    97  	testMode bool
    98  }
    99  
   100  func TestShardCombinerMergeNumerical(t *testing.T) {
   101  	tests := []TestStructNumbers{
   102  		{
   103  			name:     "Uneven number of elements for both",
   104  			numbers1: []float64{0, 9, 9},
   105  			numbers2: []float64{2},
   106  			testMode: true,
   107  		},
   108  		{
   109  			name:     "Even number of elements for both",
   110  			numbers1: []float64{0, 5, 10, 15},
   111  			numbers2: []float64{15, 15},
   112  			testMode: true,
   113  		},
   114  		{
   115  			name:     "Mode is affected by merge",
   116  			numbers1: []float64{2.5, 2.5, 10, 15},
   117  			numbers2: []float64{15, 15},
   118  			testMode: true,
   119  		},
   120  		{
   121  			name:     "random",
   122  			numbers1: createRandomSlice(),
   123  			numbers2: createRandomSlice(),
   124  			testMode: false,
   125  		},
   126  	}
   127  	for _, tt := range tests {
   128  		t.Run(tt.name, func(t *testing.T) {
   129  			testNumbers(t, tt.numbers1, tt.numbers2, tt.testMode)
   130  			testNumbers(t, tt.numbers2, tt.numbers1, tt.testMode)
   131  		})
   132  	}
   133  }
   134  
   135  func TestShardCombinerMergeNil(t *testing.T) {
   136  	tests := []struct {
   137  		name         string
   138  		results      []*aggregation.Result
   139  		totalResults int
   140  	}{
   141  		{
   142  			name: "First is nil",
   143  			results: []*aggregation.Result{
   144  				{
   145  					Groups: []aggregation.Group{},
   146  				},
   147  				{
   148  					Groups: []aggregation.Group{{GroupedBy: &aggregation.GroupedBy{Value: 10, Path: []string{"something"}}}},
   149  				},
   150  			},
   151  			totalResults: 1,
   152  		},
   153  		{
   154  			name: "Second is nil",
   155  			results: []*aggregation.Result{
   156  				{
   157  					Groups: []aggregation.Group{{GroupedBy: &aggregation.GroupedBy{Value: 10, Path: []string{"something"}}}},
   158  				},
   159  				{
   160  					Groups: []aggregation.Group{},
   161  				},
   162  			},
   163  			totalResults: 1,
   164  		},
   165  		{
   166  			name: "Both are nil",
   167  			results: []*aggregation.Result{
   168  				{
   169  					Groups: []aggregation.Group{},
   170  				},
   171  				{
   172  					Groups: []aggregation.Group{},
   173  				},
   174  			},
   175  			totalResults: 0,
   176  		},
   177  		{
   178  			name: "Non are nil",
   179  			results: []*aggregation.Result{
   180  				{
   181  					Groups: []aggregation.Group{{GroupedBy: &aggregation.GroupedBy{Value: 9, Path: []string{"other thing"}}}},
   182  				},
   183  				{
   184  					Groups: []aggregation.Group{{GroupedBy: &aggregation.GroupedBy{Value: 10, Path: []string{"something"}}}},
   185  				},
   186  			},
   187  			totalResults: 2,
   188  		},
   189  		{
   190  			name: "Ungrouped with nil",
   191  			results: []*aggregation.Result{
   192  				{
   193  					Groups: []aggregation.Group{{Count: 1}},
   194  				},
   195  				{
   196  					Groups: []aggregation.Group{},
   197  				},
   198  			},
   199  			totalResults: 1,
   200  		},
   201  	}
   202  
   203  	for _, tt := range tests {
   204  		t.Run(tt.name, func(t *testing.T) {
   205  			combinedResults := NewShardCombiner().Do(tt.results)
   206  			assert.Equal(t, len(combinedResults.Groups), tt.totalResults)
   207  		})
   208  	}
   209  }
   210  
   211  func testNumbers(t *testing.T, numbers1, numbers2 []float64, testMode bool) {
   212  	sc := NewShardCombiner()
   213  	numberMap1 := createNumericalAgg(numbers1)
   214  	numberMap2 := createNumericalAgg(numbers2)
   215  
   216  	combinedMap := createNumericalAgg(append(numbers1, numbers2...))
   217  
   218  	sc.mergeNumericalProp(numberMap1, numberMap2)
   219  	sc.finalizeNumerical(numberMap1)
   220  
   221  	assert.Equal(t, len(numbers1)+len(numbers2), int(numberMap1["count"].(float64)))
   222  	assert.InDelta(t, combinedMap["mean"], numberMap1["mean"], 0.0001)
   223  	assert.InDelta(t, combinedMap["median"], numberMap1["median"], 0.0001)
   224  	if testMode { // for random numbers the mode is flaky as there is no guaranteed order if several values have the same count
   225  		assert.Equal(t, combinedMap["mode"], numberMap1["mode"])
   226  	}
   227  }
   228  
   229  func createNumericalAgg(numbers []float64) map[string]interface{} {
   230  	agg := newNumericalAggregator()
   231  	for _, num := range numbers {
   232  		agg.AddFloat64(num)
   233  	}
   234  	agg.buildPairsFromCounts() // needed to populate all required info
   235  
   236  	prop := aggregation.Property{}
   237  	aggs := []aggregation.Aggregator{aggregation.MedianAggregator, aggregation.MeanAggregator, aggregation.ModeAggregator, aggregation.CountAggregator}
   238  	addNumericalAggregations(&prop, aggs, agg)
   239  	return prop.NumericalAggregations
   240  }
   241  
   242  func createRandomSlice() []float64 {
   243  	size := rand.Intn(100) + 1 // at least one entry
   244  	array := make([]float64, size)
   245  	for i := 0; i < size; i++ {
   246  		array[i] = rand.Float64() * 1000
   247  	}
   248  	return array
   249  }