github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/aggregations_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package db 16 17 import ( 18 "context" 19 "fmt" 20 "testing" 21 "time" 22 23 "github.com/go-openapi/strfmt" 24 "github.com/google/uuid" 25 "github.com/sirupsen/logrus" 26 "github.com/stretchr/testify/assert" 27 "github.com/stretchr/testify/require" 28 "github.com/weaviate/weaviate/entities/aggregation" 29 "github.com/weaviate/weaviate/entities/filters" 30 "github.com/weaviate/weaviate/entities/models" 31 "github.com/weaviate/weaviate/entities/schema" 32 ) 33 34 func Test_Aggregations(t *testing.T) { 35 dirName := t.TempDir() 36 37 shardState := singleShardState() 38 logger := logrus.New() 39 schemaGetter := &fakeSchemaGetter{ 40 schema: schema.Schema{Objects: &models.Schema{Classes: nil}}, 41 shardState: shardState, 42 } 43 repo, err := New(logger, Config{ 44 MemtablesFlushDirtyAfter: 60, 45 RootPath: dirName, 46 QueryMaximumResults: 10000, 47 MaxImportGoroutinesFactor: 1, 48 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 49 require.Nil(t, err) 50 repo.SetSchemaGetter(schemaGetter) 51 require.Nil(t, repo.WaitForStartup(testCtx())) 52 migrator := NewMigrator(repo, logger) 53 54 t.Run("prepare test schema and data ", 55 prepareCompanyTestSchemaAndData(repo, migrator, schemaGetter)) 56 57 t.Run("numerical aggregations with grouping", 58 testNumericalAggregationsWithGrouping(repo, true)) 59 60 t.Run("numerical aggregations without grouping (formerly Meta)", 61 testNumericalAggregationsWithoutGrouping(repo, true)) 62 63 t.Run("numerical aggregations with filters", 64 testNumericalAggregationsWithFilters(repo)) 65 66 t.Run("date aggregations with grouping", 67 testDateAggregationsWithGrouping(repo, true)) 68 69 t.Run("date aggregations without grouping", 70 testDateAggregationsWithoutGrouping(repo, true)) 71 72 t.Run("date aggregations with filters", 73 testDateAggregationsWithFilters(repo)) 74 75 t.Run("clean up", 76 cleanupCompanyTestSchemaAndData(repo, migrator)) 77 } 78 79 func Test_Aggregations_MultiShard(t *testing.T) { 80 dirName := t.TempDir() 81 82 shardState := fixedMultiShardState() 83 logger := logrus.New() 84 schemaGetter := &fakeSchemaGetter{ 85 schema: schema.Schema{Objects: &models.Schema{Classes: nil}}, 86 shardState: shardState, 87 } 88 repo, err := New(logger, Config{ 89 MemtablesFlushDirtyAfter: 60, 90 RootPath: dirName, 91 QueryMaximumResults: 10000, 92 MaxImportGoroutinesFactor: 1, 93 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 94 require.Nil(t, err) 95 repo.SetSchemaGetter(schemaGetter) 96 require.Nil(t, repo.WaitForStartup(testCtx())) 97 migrator := NewMigrator(repo, logger) 98 99 t.Run("prepare test schema and data ", 100 prepareCompanyTestSchemaAndData(repo, migrator, schemaGetter)) 101 102 t.Run("numerical aggregations with grouping", 103 testNumericalAggregationsWithGrouping(repo, false)) 104 105 t.Run("numerical aggregations without grouping (formerly Meta)", 106 testNumericalAggregationsWithoutGrouping(repo, false)) 107 108 t.Run("numerical aggregations with filters", 109 testNumericalAggregationsWithFilters(repo)) 110 111 t.Run("date aggregations with grouping", 112 testDateAggregationsWithGrouping(repo, true)) 113 114 t.Run("date aggregations without grouping", 115 testDateAggregationsWithoutGrouping(repo, true)) 116 117 t.Run("date aggregations with filters", 118 testDateAggregationsWithFilters(repo)) 119 120 t.Run("clean up", 121 cleanupCompanyTestSchemaAndData(repo, migrator)) 122 } 123 124 func prepareCompanyTestSchemaAndData(repo *DB, 125 migrator *Migrator, schemaGetter *fakeSchemaGetter, 126 ) func(t *testing.T) { 127 return func(t *testing.T) { 128 schema := schema.Schema{ 129 Objects: &models.Schema{ 130 Classes: []*models.Class{ 131 productClass, 132 companyClass, 133 arrayTypesClass, 134 customerClass, 135 }, 136 }, 137 } 138 139 schemaGetter.schema = schema 140 141 t.Run("creating the class", func(t *testing.T) { 142 require.Nil(t, 143 migrator.AddClass(context.Background(), productClass, schemaGetter.shardState)) 144 require.Nil(t, 145 migrator.AddClass(context.Background(), companyClass, schemaGetter.shardState)) 146 require.Nil(t, 147 migrator.AddClass(context.Background(), arrayTypesClass, schemaGetter.shardState)) 148 require.Nil(t, 149 migrator.AddClass(context.Background(), customerClass, schemaGetter.shardState)) 150 }) 151 152 schemaGetter.schema = schema 153 154 t.Run("import products", func(t *testing.T) { 155 for i, schema := range products { 156 t.Run(fmt.Sprintf("importing product %d", i), func(t *testing.T) { 157 fixture := models.Object{ 158 Class: productClass.Class, 159 ID: productsIds[i], 160 Properties: schema, 161 } 162 require.Nil(t, 163 repo.PutObject(context.Background(), &fixture, []float32{0.1, 0.2, 0.01, 0.2}, nil, nil)) 164 }) 165 } 166 }) 167 168 t.Run("import companies", func(t *testing.T) { 169 for j := 0; j < importFactor; j++ { 170 for i, schema := range companies { 171 t.Run(fmt.Sprintf("importing company %d", i), func(t *testing.T) { 172 fixture := models.Object{ 173 Class: companyClass.Class, 174 ID: companyIDs[j*(importFactor-1)+i], 175 Properties: schema, 176 } 177 178 require.Nil(t, 179 repo.PutObject(context.Background(), &fixture, []float32{0.1, 0.1, 0.1, 0.1}, nil, nil)) 180 }) 181 } 182 } 183 }) 184 185 t.Run("import array types", func(t *testing.T) { 186 for i, schema := range arrayTypes { 187 t.Run(fmt.Sprintf("importing array type %d", i), func(t *testing.T) { 188 fixture := models.Object{ 189 Class: arrayTypesClass.Class, 190 ID: strfmt.UUID(uuid.Must(uuid.NewRandom()).String()), 191 Properties: schema, 192 } 193 require.Nil(t, 194 repo.PutObject(context.Background(), &fixture, []float32{0.1, 0.1, 0.1, 0.1}, nil, nil)) 195 }) 196 } 197 }) 198 199 t.Run("import customers", func(t *testing.T) { 200 for i, schema := range customers { 201 t.Run(fmt.Sprintf("importing customer #%d", i), func(t *testing.T) { 202 fixture := models.Object{ 203 Class: customerClass.Class, 204 ID: strfmt.UUID(uuid.Must(uuid.NewRandom()).String()), 205 Properties: schema, 206 } 207 require.Nil(t, 208 repo.PutObject(context.Background(), &fixture, []float32{0.1, 0.1, 0.1, 0.1}, nil, nil)) 209 }) 210 } 211 }) 212 } 213 } 214 215 func cleanupCompanyTestSchemaAndData(repo *DB, 216 migrator *Migrator, 217 ) func(t *testing.T) { 218 return func(t *testing.T) { 219 assert.Nil(t, repo.Shutdown(context.Background())) 220 } 221 } 222 223 func testNumericalAggregationsWithGrouping(repo *DB, exact bool) func(t *testing.T) { 224 return func(t *testing.T) { 225 epsilon := 0.1 226 if !exact { 227 epsilon = 1.0 228 } 229 230 t.Run("single field, single aggregator", func(t *testing.T) { 231 params := aggregation.Params{ 232 ClassName: schema.ClassName(companyClass.Class), 233 GroupBy: &filters.Path{ 234 Class: schema.ClassName(companyClass.Class), 235 Property: schema.PropertyName("sector"), 236 }, 237 IncludeMetaCount: true, 238 Properties: []aggregation.ParamProperty{ 239 { 240 Name: schema.PropertyName("dividendYield"), 241 Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator}, 242 }, 243 }, 244 } 245 246 res, err := repo.Aggregate(context.Background(), params) 247 require.Nil(t, err) 248 249 expectedResult := &aggregation.Result{ 250 Groups: []aggregation.Group{ 251 { 252 Count: 60, 253 GroupedBy: &aggregation.GroupedBy{ 254 Path: []string{"sector"}, 255 Value: "Food", 256 }, 257 Properties: map[string]aggregation.Property{ 258 "dividendYield": { 259 Type: aggregation.PropertyTypeNumerical, 260 NumericalAggregations: map[string]interface{}{ 261 "mean": 2.066666666666666, 262 }, 263 }, 264 }, 265 }, 266 { 267 Count: 30, 268 GroupedBy: &aggregation.GroupedBy{ 269 Path: []string{"sector"}, 270 Value: "Financials", 271 }, 272 Properties: map[string]aggregation.Property{ 273 "dividendYield": { 274 Type: aggregation.PropertyTypeNumerical, 275 NumericalAggregations: map[string]interface{}{ 276 "mean": 2.1999999999999999, 277 }, 278 }, 279 }, 280 }, 281 }, 282 } 283 284 require.Equal(t, len(expectedResult.Groups), len(res.Groups)) 285 286 for i := 0; i <= 1; i++ { 287 assert.Equal(t, expectedResult.Groups[i].Count, 288 res.Groups[i].Count) 289 290 expectedDivYield := expectedResult.Groups[i].Properties["dividendYield"] 291 actualDivYield := res.Groups[i].Properties["dividendYield"] 292 293 assert.InEpsilon(t, expectedDivYield.NumericalAggregations["mean"], 294 actualDivYield.NumericalAggregations["mean"], epsilon) 295 } 296 }) 297 298 t.Run("grouping by a non-numerical, non-string prop", func(t *testing.T) { 299 params := aggregation.Params{ 300 ClassName: schema.ClassName(companyClass.Class), 301 GroupBy: &filters.Path{ 302 Class: schema.ClassName(companyClass.Class), 303 Property: schema.PropertyName("listedInIndex"), 304 }, 305 Properties: []aggregation.ParamProperty{ 306 { 307 Name: schema.PropertyName("dividendYield"), 308 Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator}, 309 }, 310 }, 311 } 312 313 res, err := repo.Aggregate(context.Background(), params) 314 require.Nil(t, err) 315 316 expectedResult := &aggregation.Result{ 317 Groups: []aggregation.Group{ 318 { 319 Count: 80, 320 GroupedBy: &aggregation.GroupedBy{ 321 Path: []string{"listedInIndex"}, 322 Value: true, 323 }, 324 Properties: map[string]aggregation.Property{ 325 "dividendYield": { 326 Type: aggregation.PropertyTypeNumerical, 327 NumericalAggregations: map[string]interface{}{ 328 "mean": 2.375, 329 }, 330 }, 331 }, 332 }, 333 { 334 Count: 10, 335 GroupedBy: &aggregation.GroupedBy{ 336 Path: []string{"listedInIndex"}, 337 Value: false, 338 }, 339 Properties: map[string]aggregation.Property{ 340 "dividendYield": { 341 Type: aggregation.PropertyTypeNumerical, 342 NumericalAggregations: map[string]interface{}{ 343 "mean": 0.0, 344 }, 345 }, 346 }, 347 }, 348 }, 349 } 350 351 // there is now way to use InEpsilon or InDelta on nested structs with 352 // testify, so unfortunately we have to do a manual deep equal: 353 assert.Equal(t, len(res.Groups), len(expectedResult.Groups)) 354 assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count) 355 assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy) 356 assert.InEpsilon(t, expectedResult.Groups[0].Properties["dividendYield"]. 357 NumericalAggregations["mean"], 358 res.Groups[0].Properties["dividendYield"].NumericalAggregations["mean"], 359 epsilon) 360 assert.Equal(t, len(res.Groups), len(expectedResult.Groups)) 361 assert.Equal(t, expectedResult.Groups[1].Count, res.Groups[1].Count) 362 assert.Equal(t, expectedResult.Groups[1].GroupedBy, res.Groups[1].GroupedBy) 363 assert.InDelta(t, expectedResult.Groups[1].Properties["dividendYield"]. 364 NumericalAggregations["mean"], 365 res.Groups[1].Properties["dividendYield"].NumericalAggregations["mean"], 366 epsilon) 367 }) 368 369 t.Run("multiple fields, multiple aggregators, grouped by string", func(t *testing.T) { 370 params := aggregation.Params{ 371 ClassName: schema.ClassName(companyClass.Class), 372 GroupBy: &filters.Path{ 373 Class: schema.ClassName(companyClass.Class), 374 Property: schema.PropertyName("sector"), 375 }, 376 Properties: []aggregation.ParamProperty{ 377 { 378 Name: schema.PropertyName("dividendYield"), 379 Aggregators: []aggregation.Aggregator{ 380 aggregation.MeanAggregator, 381 aggregation.MaximumAggregator, 382 aggregation.MinimumAggregator, 383 aggregation.SumAggregator, 384 aggregation.ModeAggregator, 385 aggregation.MedianAggregator, 386 aggregation.CountAggregator, 387 aggregation.TypeAggregator, 388 }, 389 }, 390 { 391 Name: schema.PropertyName("price"), 392 Aggregators: []aggregation.Aggregator{ 393 aggregation.TypeAggregator, 394 aggregation.MeanAggregator, 395 aggregation.MaximumAggregator, 396 aggregation.MinimumAggregator, 397 aggregation.SumAggregator, 398 // aggregation.ModeAggregator, // ignore as there is no most common value 399 aggregation.MedianAggregator, 400 aggregation.CountAggregator, 401 }, 402 }, 403 { 404 Name: schema.PropertyName("listedInIndex"), 405 Aggregators: []aggregation.Aggregator{ 406 aggregation.TypeAggregator, 407 aggregation.PercentageTrueAggregator, 408 aggregation.PercentageFalseAggregator, 409 aggregation.TotalTrueAggregator, 410 aggregation.TotalFalseAggregator, 411 }, 412 }, 413 { 414 Name: schema.PropertyName("location"), 415 Aggregators: []aggregation.Aggregator{ 416 aggregation.TypeAggregator, 417 aggregation.NewTopOccurrencesAggregator(ptInt(5)), 418 }, 419 }, 420 }, 421 } 422 423 res, err := repo.Aggregate(context.Background(), params) 424 require.Nil(t, err) 425 426 expectedResult := &aggregation.Result{ 427 Groups: []aggregation.Group{ 428 { 429 Count: 60, 430 GroupedBy: &aggregation.GroupedBy{ 431 Path: []string{"sector"}, 432 Value: "Food", 433 }, 434 Properties: map[string]aggregation.Property{ 435 "dividendYield": { 436 Type: aggregation.PropertyTypeNumerical, 437 NumericalAggregations: map[string]interface{}{ 438 "mean": 2.06667, 439 "maximum": 8.0, 440 "minimum": 0.0, 441 "sum": 124, 442 "mode": 0., 443 "median": 1.1, 444 "count": 60, 445 }, 446 }, 447 "price": { 448 Type: aggregation.PropertyTypeNumerical, 449 NumericalAggregations: map[string]interface{}{ 450 "mean": 218.33333, 451 "maximum": 800., 452 "minimum": 10., 453 "sum": 13100., 454 // "mode": 70, 455 "median": 115, 456 "count": 60, 457 }, 458 }, 459 "listedInIndex": { 460 Type: aggregation.PropertyTypeBoolean, 461 BooleanAggregation: aggregation.Boolean{ 462 TotalTrue: 50, 463 TotalFalse: 10, 464 PercentageTrue: 0.8333333333333334, 465 PercentageFalse: 0.16666666666666666, 466 Count: 60, 467 }, 468 }, 469 "location": { 470 Type: aggregation.PropertyTypeText, 471 TextAggregation: aggregation.Text{ 472 Count: 60, 473 Items: []aggregation.TextOccurrence{ 474 { 475 Value: "Atlanta", 476 Occurs: 20, 477 }, 478 { 479 Value: "Detroit", 480 Occurs: 10, 481 }, 482 { 483 Value: "Los Angeles", 484 Occurs: 10, 485 }, 486 { 487 Value: "New York", 488 Occurs: 10, 489 }, 490 { 491 Value: "San Francisco", 492 Occurs: 10, 493 }, 494 }, 495 }, 496 }, 497 }, 498 }, 499 { 500 Count: 30, 501 GroupedBy: &aggregation.GroupedBy{ 502 Path: []string{"sector"}, 503 Value: "Financials", 504 }, 505 Properties: map[string]aggregation.Property{ 506 "dividendYield": { 507 Type: aggregation.PropertyTypeNumerical, 508 NumericalAggregations: map[string]interface{}{ 509 "mean": 2.2, 510 "maximum": 4.0, 511 "minimum": 1.3, 512 "sum": 66., 513 "mode": 1.3, 514 "median": 1.3, 515 "count": 30, 516 }, 517 }, 518 "price": { 519 Type: aggregation.PropertyTypeNumerical, 520 NumericalAggregations: map[string]interface{}{ 521 "mean": 265.66667, 522 "maximum": 600., 523 "minimum": 47., 524 "sum": 7970., 525 // "mode": 47, 526 "median": 150., 527 "count": 30., 528 }, 529 }, 530 "listedInIndex": { 531 Type: aggregation.PropertyTypeBoolean, 532 BooleanAggregation: aggregation.Boolean{ 533 TotalTrue: 30, 534 TotalFalse: 0, 535 PercentageTrue: 1, 536 PercentageFalse: 0, 537 Count: 30, 538 }, 539 }, 540 "location": { 541 Type: aggregation.PropertyTypeText, 542 TextAggregation: aggregation.Text{ 543 Count: 30, 544 Items: []aggregation.TextOccurrence{ 545 { 546 Value: "New York", 547 Occurs: 20, 548 }, 549 { 550 Value: "San Francisco", 551 Occurs: 10, 552 }, 553 }, 554 }, 555 }, 556 }, 557 }, 558 }, 559 } 560 561 // there is now way to use InEpsilon or InDelta on nested structs with 562 // testify, so unfortunately we have to do a manual deep equal: 563 assert.Equal(t, len(res.Groups), len(expectedResult.Groups)) 564 assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count) 565 assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy) 566 expectedProps := expectedResult.Groups[0].Properties 567 actualProps := res.Groups[0].Properties 568 assert.Equal(t, expectedProps["location"].TextAggregation.Count, 569 actualProps["location"].TextAggregation.Count) 570 assert.ElementsMatch(t, expectedProps["location"].TextAggregation.Items, 571 actualProps["location"].TextAggregation.Items) 572 assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"]) 573 assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations, 574 actualProps["dividendYield"].NumericalAggregations, epsilon*100) 575 assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations, 576 actualProps["price"].NumericalAggregations, epsilon*100) 577 578 assert.Equal(t, len(res.Groups), len(expectedResult.Groups)) 579 assert.Equal(t, expectedResult.Groups[1].Count, res.Groups[1].Count) 580 assert.Equal(t, expectedResult.Groups[1].GroupedBy, res.Groups[1].GroupedBy) 581 expectedProps = expectedResult.Groups[1].Properties 582 actualProps = res.Groups[1].Properties 583 assert.Equal(t, expectedProps["location"], actualProps["location"]) 584 assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"]) 585 assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations, 586 actualProps["dividendYield"].NumericalAggregations, epsilon*100) 587 assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations, 588 actualProps["price"].NumericalAggregations, epsilon*500) 589 }) 590 591 t.Run("with filters, grouped by string", func(t *testing.T) { 592 params := aggregation.Params{ 593 ClassName: schema.ClassName(companyClass.Class), 594 GroupBy: &filters.Path{ 595 Class: schema.ClassName(companyClass.Class), 596 Property: schema.PropertyName("sector"), 597 }, 598 Filters: &filters.LocalFilter{ 599 Root: &filters.Clause{ 600 Operator: filters.OperatorLessThan, 601 Value: &filters.Value{ 602 Type: schema.DataTypeInt, 603 Value: 600, 604 }, 605 On: &filters.Path{ 606 Property: "price", 607 }, 608 }, 609 }, 610 Properties: []aggregation.ParamProperty{ 611 { 612 Name: schema.PropertyName("dividendYield"), 613 Aggregators: []aggregation.Aggregator{ 614 aggregation.MeanAggregator, 615 aggregation.MaximumAggregator, 616 aggregation.MinimumAggregator, 617 aggregation.SumAggregator, 618 // aggregation.ModeAggregator, 619 aggregation.MedianAggregator, 620 aggregation.CountAggregator, 621 aggregation.TypeAggregator, 622 }, 623 }, 624 { 625 Name: schema.PropertyName("price"), 626 Aggregators: []aggregation.Aggregator{ 627 aggregation.TypeAggregator, 628 aggregation.MeanAggregator, 629 aggregation.MaximumAggregator, 630 aggregation.MinimumAggregator, 631 aggregation.SumAggregator, 632 // aggregation.ModeAggregator, // ignore as there is no most common value 633 aggregation.MedianAggregator, 634 aggregation.CountAggregator, 635 }, 636 }, 637 { 638 Name: schema.PropertyName("listedInIndex"), 639 Aggregators: []aggregation.Aggregator{ 640 aggregation.TypeAggregator, 641 aggregation.PercentageTrueAggregator, 642 aggregation.PercentageFalseAggregator, 643 aggregation.TotalTrueAggregator, 644 aggregation.TotalFalseAggregator, 645 }, 646 }, 647 { 648 Name: schema.PropertyName("location"), 649 Aggregators: []aggregation.Aggregator{ 650 aggregation.TypeAggregator, 651 aggregation.NewTopOccurrencesAggregator(ptInt(5)), 652 }, 653 }, 654 }, 655 } 656 657 res, err := repo.Aggregate(context.Background(), params) 658 require.Nil(t, err) 659 660 expectedResult := &aggregation.Result{ 661 Groups: []aggregation.Group{ 662 { 663 Count: 50, 664 GroupedBy: &aggregation.GroupedBy{ 665 Path: []string{"sector"}, 666 Value: "Food", 667 }, 668 Properties: map[string]aggregation.Property{ 669 "dividendYield": { 670 Type: aggregation.PropertyTypeNumerical, 671 NumericalAggregations: map[string]interface{}{ 672 "mean": 2.48, 673 "maximum": 8.0, 674 "minimum": 0.0, 675 "sum": 124., 676 "median": 1.3, 677 "count": 50, 678 }, 679 }, 680 "price": { 681 Type: aggregation.PropertyTypeNumerical, 682 NumericalAggregations: map[string]interface{}{ 683 "mean": 102., 684 "maximum": 200., 685 "minimum": 10., 686 "sum": 5100., 687 "median": 70., 688 "count": 50., 689 }, 690 }, 691 "listedInIndex": { 692 Type: aggregation.PropertyTypeBoolean, 693 BooleanAggregation: aggregation.Boolean{ 694 TotalTrue: 50, 695 TotalFalse: 0, 696 PercentageTrue: 1, 697 PercentageFalse: 0, 698 Count: 50, 699 }, 700 }, 701 "location": { 702 Type: aggregation.PropertyTypeText, 703 TextAggregation: aggregation.Text{ 704 Count: 50, 705 Items: []aggregation.TextOccurrence{ 706 { 707 Value: "Atlanta", 708 Occurs: 20, 709 }, 710 { 711 Value: "Detroit", 712 Occurs: 10, 713 }, 714 { 715 Value: "New York", 716 Occurs: 10, 717 }, 718 { 719 Value: "San Francisco", 720 Occurs: 10, 721 }, 722 }, 723 }, 724 }, 725 }, 726 }, 727 { 728 Count: 20, 729 GroupedBy: &aggregation.GroupedBy{ 730 Path: []string{"sector"}, 731 Value: "Financials", 732 }, 733 Properties: map[string]aggregation.Property{ 734 "dividendYield": { 735 Type: aggregation.PropertyTypeNumerical, 736 NumericalAggregations: map[string]interface{}{ 737 "mean": 1.3, 738 "maximum": 1.3, 739 "minimum": 1.3, 740 "sum": 26., 741 "median": 1.3, 742 "count": 20., 743 }, 744 }, 745 "price": { 746 Type: aggregation.PropertyTypeNumerical, 747 NumericalAggregations: map[string]interface{}{ 748 "mean": 98.5, 749 "maximum": 150., 750 "minimum": 47., 751 "sum": 1970., 752 "median": 98.5, 753 "count": 20., 754 }, 755 }, 756 "listedInIndex": { 757 Type: aggregation.PropertyTypeBoolean, 758 BooleanAggregation: aggregation.Boolean{ 759 TotalTrue: 20, 760 TotalFalse: 0, 761 PercentageTrue: 1, 762 PercentageFalse: 0, 763 Count: 20, 764 }, 765 }, 766 "location": { 767 Type: aggregation.PropertyTypeText, 768 TextAggregation: aggregation.Text{ 769 Count: 20, 770 Items: []aggregation.TextOccurrence{ 771 { 772 Value: "New York", 773 Occurs: 10, 774 }, 775 { 776 Value: "San Francisco", 777 Occurs: 10, 778 }, 779 }, 780 }, 781 }, 782 }, 783 }, 784 }, 785 } 786 787 // there is now way to use InEpsilon or InDelta on nested structs with 788 // testify, so unfortunately we have to do a manual deep equal: 789 assert.Equal(t, len(res.Groups), len(expectedResult.Groups)) 790 assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count) 791 assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy) 792 expectedProps := expectedResult.Groups[0].Properties 793 actualProps := res.Groups[0].Properties 794 assert.Equal(t, expectedProps["location"].TextAggregation.Count, 795 actualProps["location"].TextAggregation.Count) 796 assert.ElementsMatch(t, expectedProps["location"].TextAggregation.Items, 797 actualProps["location"].TextAggregation.Items) 798 assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"]) 799 assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations, 800 actualProps["dividendYield"].NumericalAggregations, epsilon*100) 801 assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations, 802 actualProps["price"].NumericalAggregations, epsilon*100) 803 804 assert.Equal(t, len(res.Groups), len(expectedResult.Groups)) 805 assert.Equal(t, expectedResult.Groups[1].Count, res.Groups[1].Count) 806 assert.Equal(t, expectedResult.Groups[1].GroupedBy, res.Groups[1].GroupedBy) 807 expectedProps = expectedResult.Groups[1].Properties 808 actualProps = res.Groups[1].Properties 809 assert.Equal(t, expectedProps["location"].TextAggregation.Count, 810 actualProps["location"].TextAggregation.Count) 811 assert.ElementsMatch(t, expectedProps["location"].TextAggregation.Items, 812 actualProps["location"].TextAggregation.Items) 813 assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"]) 814 assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations, 815 actualProps["dividendYield"].NumericalAggregations, epsilon*100) 816 assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations, 817 actualProps["price"].NumericalAggregations, epsilon*100) 818 }) 819 820 t.Run("no filters, grouped by ref prop", func(t *testing.T) { 821 params := aggregation.Params{ 822 ClassName: schema.ClassName(companyClass.Class), 823 GroupBy: &filters.Path{ 824 Class: schema.ClassName(companyClass.Class), 825 Property: schema.PropertyName("makesProduct"), 826 }, 827 Properties: []aggregation.ParamProperty{ 828 { 829 Name: schema.PropertyName("dividendYield"), 830 Aggregators: []aggregation.Aggregator{ 831 aggregation.MeanAggregator, 832 aggregation.MaximumAggregator, 833 aggregation.MinimumAggregator, 834 aggregation.SumAggregator, 835 // aggregation.ModeAggregator, 836 aggregation.MedianAggregator, 837 aggregation.CountAggregator, 838 aggregation.TypeAggregator, 839 }, 840 }, 841 { 842 Name: schema.PropertyName("price"), 843 Aggregators: []aggregation.Aggregator{ 844 aggregation.TypeAggregator, 845 aggregation.MeanAggregator, 846 aggregation.MaximumAggregator, 847 aggregation.MinimumAggregator, 848 aggregation.SumAggregator, 849 // aggregation.ModeAggregator, // ignore as there is no most common value 850 aggregation.MedianAggregator, 851 aggregation.CountAggregator, 852 }, 853 }, 854 { 855 Name: schema.PropertyName("listedInIndex"), 856 Aggregators: []aggregation.Aggregator{ 857 aggregation.TypeAggregator, 858 aggregation.PercentageTrueAggregator, 859 aggregation.PercentageFalseAggregator, 860 aggregation.TotalTrueAggregator, 861 aggregation.TotalFalseAggregator, 862 }, 863 }, 864 { 865 Name: schema.PropertyName("location"), 866 Aggregators: []aggregation.Aggregator{ 867 aggregation.TypeAggregator, 868 aggregation.NewTopOccurrencesAggregator(ptInt(5)), 869 }, 870 }, 871 }, 872 } 873 874 res, err := repo.Aggregate(context.Background(), params) 875 require.Nil(t, err) 876 877 expectedResult := &aggregation.Result{ 878 Groups: []aggregation.Group{ 879 { 880 Count: 10, 881 GroupedBy: &aggregation.GroupedBy{ 882 Path: []string{"makesProduct"}, 883 Value: strfmt.URI("weaviate://localhost/1295c052-263d-4aae-99dd-920c5a370d06"), 884 }, 885 Properties: map[string]aggregation.Property{ 886 "dividendYield": { 887 Type: aggregation.PropertyTypeNumerical, 888 NumericalAggregations: map[string]interface{}{ 889 "mean": 8.0, 890 "maximum": 8.0, 891 "minimum": 8.0, 892 "sum": 80.0, 893 "median": 8.0, 894 "count": 10., 895 }, 896 }, 897 "price": { 898 Type: aggregation.PropertyTypeNumerical, 899 NumericalAggregations: map[string]interface{}{ 900 "mean": 10., 901 "maximum": 10., 902 "minimum": 10., 903 "sum": 100., 904 "median": 10., 905 "count": 10., 906 }, 907 }, 908 "listedInIndex": { 909 Type: aggregation.PropertyTypeBoolean, 910 BooleanAggregation: aggregation.Boolean{ 911 TotalTrue: 10, 912 TotalFalse: 0, 913 PercentageTrue: 1, 914 PercentageFalse: 0, 915 Count: 10, 916 }, 917 }, 918 "location": { 919 Type: aggregation.PropertyTypeText, 920 TextAggregation: aggregation.Text{ 921 Count: 10, 922 Items: []aggregation.TextOccurrence{ 923 { 924 Value: "Detroit", 925 Occurs: 10, 926 }, 927 }, 928 }, 929 }, 930 }, 931 }, 932 }, 933 } 934 935 // there is now way to use InEpsilon or InDelta on nested structs with 936 // testify, so unfortunately we have to do a manual deep equal: 937 assert.Equal(t, len(res.Groups), len(expectedResult.Groups)) 938 assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count) 939 assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy) 940 expectedProps := expectedResult.Groups[0].Properties 941 actualProps := res.Groups[0].Properties 942 assert.Equal(t, expectedProps["location"].TextAggregation.Count, 943 actualProps["location"].TextAggregation.Count) 944 assert.ElementsMatch(t, expectedProps["location"].TextAggregation.Items, 945 actualProps["location"].TextAggregation.Items) 946 assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"]) 947 assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations, 948 actualProps["dividendYield"].NumericalAggregations, epsilon*100) 949 assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations, 950 actualProps["price"].NumericalAggregations, epsilon*100) 951 }) 952 953 t.Run("with ref filter, grouped by string", func(t *testing.T) { 954 params := aggregation.Params{ 955 ClassName: schema.ClassName(companyClass.Class), 956 GroupBy: &filters.Path{ 957 Class: schema.ClassName(companyClass.Class), 958 Property: schema.PropertyName("sector"), 959 }, 960 Filters: &filters.LocalFilter{ 961 Root: &filters.Clause{ 962 Operator: filters.OperatorEqual, 963 Value: &filters.Value{ 964 Type: schema.DataTypeText, 965 Value: "Superbread", 966 }, 967 On: &filters.Path{ 968 Property: "makesProduct", 969 Child: &filters.Path{ 970 Class: "AggregationsTestProduct", 971 Property: "name", 972 }, 973 }, 974 }, 975 }, 976 Properties: []aggregation.ParamProperty{ 977 { 978 Name: schema.PropertyName("dividendYield"), 979 Aggregators: []aggregation.Aggregator{ 980 aggregation.MeanAggregator, 981 aggregation.MaximumAggregator, 982 aggregation.MinimumAggregator, 983 aggregation.SumAggregator, 984 // aggregation.ModeAggregator, 985 aggregation.MedianAggregator, 986 aggregation.CountAggregator, 987 aggregation.TypeAggregator, 988 }, 989 }, 990 { 991 Name: schema.PropertyName("price"), 992 Aggregators: []aggregation.Aggregator{ 993 aggregation.TypeAggregator, 994 aggregation.MeanAggregator, 995 aggregation.MaximumAggregator, 996 aggregation.MinimumAggregator, 997 aggregation.SumAggregator, 998 // aggregation.ModeAggregator, // ignore as there is no most common value 999 aggregation.MedianAggregator, 1000 aggregation.CountAggregator, 1001 }, 1002 }, 1003 { 1004 Name: schema.PropertyName("listedInIndex"), 1005 Aggregators: []aggregation.Aggregator{ 1006 aggregation.TypeAggregator, 1007 aggregation.PercentageTrueAggregator, 1008 aggregation.PercentageFalseAggregator, 1009 aggregation.TotalTrueAggregator, 1010 aggregation.TotalFalseAggregator, 1011 }, 1012 }, 1013 { 1014 Name: schema.PropertyName("location"), 1015 Aggregators: []aggregation.Aggregator{ 1016 aggregation.TypeAggregator, 1017 aggregation.NewTopOccurrencesAggregator(ptInt(5)), 1018 }, 1019 }, 1020 }, 1021 } 1022 1023 res, err := repo.Aggregate(context.Background(), params) 1024 require.Nil(t, err) 1025 require.NotNil(t, res) 1026 1027 expectedResult := &aggregation.Result{ 1028 Groups: []aggregation.Group{ 1029 { 1030 Count: 10, 1031 GroupedBy: &aggregation.GroupedBy{ 1032 Path: []string{"sector"}, 1033 Value: "Food", 1034 }, 1035 Properties: map[string]aggregation.Property{ 1036 "dividendYield": { 1037 Type: aggregation.PropertyTypeNumerical, 1038 NumericalAggregations: map[string]interface{}{ 1039 "mean": 8.0, 1040 "maximum": 8.0, 1041 "minimum": 8.0, 1042 "sum": 80., 1043 "median": 8.0, 1044 "count": 10., 1045 }, 1046 }, 1047 "price": { 1048 Type: aggregation.PropertyTypeNumerical, 1049 NumericalAggregations: map[string]interface{}{ 1050 "mean": 10., 1051 "maximum": 10., 1052 "minimum": 10., 1053 "sum": 100., 1054 "median": 10., 1055 "count": 10., 1056 }, 1057 }, 1058 "listedInIndex": { 1059 Type: aggregation.PropertyTypeBoolean, 1060 BooleanAggregation: aggregation.Boolean{ 1061 TotalTrue: 10, 1062 TotalFalse: 0, 1063 PercentageTrue: 1, 1064 PercentageFalse: 0, 1065 Count: 10, 1066 }, 1067 }, 1068 "location": { 1069 Type: aggregation.PropertyTypeText, 1070 TextAggregation: aggregation.Text{ 1071 Count: 10, 1072 Items: []aggregation.TextOccurrence{ 1073 { 1074 Value: "Detroit", 1075 Occurs: 10, 1076 }, 1077 }, 1078 }, 1079 }, 1080 }, 1081 }, 1082 }, 1083 } 1084 1085 // there is now way to use InEpsilon or InDelta on nested structs with 1086 // testify, so unfortunately we have to do a manual deep equal: 1087 assert.Equal(t, len(res.Groups), len(expectedResult.Groups)) 1088 assert.Equal(t, expectedResult.Groups[0].Count, res.Groups[0].Count) 1089 assert.Equal(t, expectedResult.Groups[0].GroupedBy, res.Groups[0].GroupedBy) 1090 expectedProps := expectedResult.Groups[0].Properties 1091 actualProps := res.Groups[0].Properties 1092 assert.Equal(t, expectedProps["location"], actualProps["location"]) 1093 assert.Equal(t, expectedProps["listedInIndex"], actualProps["listedInIndex"]) 1094 assert.InDeltaMapValues(t, expectedProps["dividendYield"].NumericalAggregations, 1095 actualProps["dividendYield"].NumericalAggregations, 0.001) 1096 assert.InDeltaMapValues(t, expectedProps["price"].NumericalAggregations, 1097 actualProps["price"].NumericalAggregations, 0.001) 1098 }) 1099 1100 t.Run("array types, single aggregator strings", func(t *testing.T) { 1101 if !exact { 1102 t.Skip() 1103 } 1104 params := aggregation.Params{ 1105 ClassName: schema.ClassName(arrayTypesClass.Class), 1106 GroupBy: &filters.Path{ 1107 Class: schema.ClassName(arrayTypesClass.Class), 1108 Property: schema.PropertyName("strings"), 1109 }, 1110 IncludeMetaCount: true, 1111 } 1112 1113 res, err := repo.Aggregate(context.Background(), params) 1114 require.Nil(t, err) 1115 1116 expectedResult := &aggregation.Result{ 1117 Groups: []aggregation.Group{ 1118 { 1119 Count: 2, 1120 GroupedBy: &aggregation.GroupedBy{ 1121 Path: []string{"strings"}, 1122 Value: "a", 1123 }, 1124 Properties: map[string]aggregation.Property{}, 1125 }, 1126 { 1127 Count: 1, 1128 GroupedBy: &aggregation.GroupedBy{ 1129 Path: []string{"strings"}, 1130 Value: "b", 1131 }, 1132 Properties: map[string]aggregation.Property{}, 1133 }, 1134 { 1135 Count: 1, 1136 GroupedBy: &aggregation.GroupedBy{ 1137 Path: []string{"strings"}, 1138 Value: "c", 1139 }, 1140 Properties: map[string]aggregation.Property{}, 1141 }, 1142 }, 1143 } 1144 1145 assert.ElementsMatch(t, expectedResult.Groups, res.Groups) 1146 }) 1147 1148 t.Run("array types, single aggregator numbers", func(t *testing.T) { 1149 if !exact { 1150 t.Skip() 1151 } 1152 params := aggregation.Params{ 1153 ClassName: schema.ClassName(arrayTypesClass.Class), 1154 GroupBy: &filters.Path{ 1155 Class: schema.ClassName(arrayTypesClass.Class), 1156 Property: schema.PropertyName("numbers"), 1157 }, 1158 IncludeMetaCount: true, 1159 } 1160 1161 res, err := repo.Aggregate(context.Background(), params) 1162 require.Nil(t, err) 1163 1164 expectedResult := &aggregation.Result{ 1165 Groups: []aggregation.Group{ 1166 { 1167 Count: 2, 1168 GroupedBy: &aggregation.GroupedBy{ 1169 Path: []string{"numbers"}, 1170 Value: float64(1.0), 1171 }, 1172 Properties: map[string]aggregation.Property{}, 1173 }, 1174 { 1175 Count: 2, 1176 GroupedBy: &aggregation.GroupedBy{ 1177 Path: []string{"numbers"}, 1178 Value: float64(2.0), 1179 }, 1180 Properties: map[string]aggregation.Property{}, 1181 }, 1182 { 1183 Count: 1, 1184 GroupedBy: &aggregation.GroupedBy{ 1185 Path: []string{"numbers"}, 1186 Value: float64(3.0), 1187 }, 1188 Properties: map[string]aggregation.Property{}, 1189 }, 1190 }, 1191 } 1192 1193 assert.ElementsMatch(t, expectedResult.Groups, res.Groups) 1194 }) 1195 } 1196 } 1197 1198 func testDateAggregationsWithFilters(repo *DB) func(t *testing.T) { 1199 return func(t *testing.T) { 1200 t.Run("Aggregations with filter that matches nothing", func(t *testing.T) { 1201 params := aggregation.Params{ 1202 ClassName: schema.ClassName(customerClass.Class), 1203 Filters: &filters.LocalFilter{ 1204 Root: &filters.Clause{ 1205 Operator: filters.OperatorGreaterThan, 1206 Value: &filters.Value{ 1207 Type: schema.DataTypeDate, 1208 Value: "0312-06-16T17:30:17.231346Z", // hello roman empire! 1209 }, 1210 On: &filters.Path{ 1211 Property: "timeArrived", 1212 }, 1213 }, 1214 }, 1215 IncludeMetaCount: true, 1216 Properties: []aggregation.ParamProperty{ 1217 { 1218 Name: schema.PropertyName("timeArrived"), 1219 Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator, aggregation.CountAggregator, aggregation.MaximumAggregator, aggregation.MedianAggregator, aggregation.MinimumAggregator, aggregation.ModeAggregator, aggregation.TypeAggregator}, 1220 }, 1221 }, 1222 } 1223 res, err := repo.Aggregate(context.Background(), params) 1224 1225 // No results match the filter, so only a count of 0 is included 1226 require.Nil(t, err) 1227 require.Equal(t, 1, len(res.Groups)) 1228 require.Equal(t, 1, len(res.Groups[0].Properties)) 1229 require.Equal(t, 1, len(res.Groups[0].Properties["timeArrived"].DateAggregations)) 1230 require.Equal(t, int64(0), res.Groups[0].Properties["timeArrived"].DateAggregations["count"].(int64)) 1231 }) 1232 } 1233 } 1234 1235 func testNumericalAggregationsWithFilters(repo *DB) func(t *testing.T) { 1236 return func(t *testing.T) { 1237 t.Run("Aggregations with filter that matches nothing", func(t *testing.T) { 1238 params := aggregation.Params{ 1239 ClassName: schema.ClassName(companyClass.Class), 1240 Filters: &filters.LocalFilter{ 1241 Root: &filters.Clause{ 1242 Operator: filters.OperatorLessThan, 1243 Value: &filters.Value{ 1244 Type: schema.DataTypeInt, 1245 Value: -5, // price is positive everywhere 1246 }, 1247 On: &filters.Path{ 1248 Property: "price", 1249 }, 1250 }, 1251 }, 1252 IncludeMetaCount: true, 1253 Properties: []aggregation.ParamProperty{ 1254 { 1255 Name: schema.PropertyName("dividendYield"), 1256 Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator, aggregation.CountAggregator, aggregation.MaximumAggregator, aggregation.MedianAggregator, aggregation.MinimumAggregator, aggregation.ModeAggregator, aggregation.TypeAggregator}, 1257 }, 1258 }, 1259 } 1260 res, err := repo.Aggregate(context.Background(), params) 1261 1262 // No results match the filter, so only a count of 0 is included 1263 require.Nil(t, err) 1264 require.Equal(t, 1, len(res.Groups)) 1265 require.Equal(t, 1, len(res.Groups[0].Properties)) 1266 require.Equal(t, 1, len(res.Groups[0].Properties["dividendYield"].NumericalAggregations)) 1267 require.Equal(t, float64(0), res.Groups[0].Properties["dividendYield"].NumericalAggregations["count"].(float64)) 1268 }) 1269 } 1270 } 1271 1272 func testNumericalAggregationsWithoutGrouping(repo *DB, 1273 exact bool, 1274 ) func(t *testing.T) { 1275 return func(t *testing.T) { 1276 t.Run("only meta count, no other aggregations", func(t *testing.T) { 1277 params := aggregation.Params{ 1278 ClassName: schema.ClassName(companyClass.Class), 1279 IncludeMetaCount: true, 1280 GroupBy: nil, // explicitly set to nil 1281 } 1282 1283 res, err := repo.Aggregate(context.Background(), params) 1284 require.Nil(t, err) 1285 1286 expectedResult := &aggregation.Result{ 1287 Groups: []aggregation.Group{ 1288 { 1289 GroupedBy: nil, 1290 Count: 90, 1291 }, 1292 }, 1293 } 1294 1295 require.NotNil(t, res) 1296 assert.Equal(t, expectedResult.Groups, res.Groups) 1297 }) 1298 1299 t.Run("single field, single aggregator", func(t *testing.T) { 1300 params := aggregation.Params{ 1301 ClassName: schema.ClassName(companyClass.Class), 1302 GroupBy: nil, // explicitly set to nil 1303 Properties: []aggregation.ParamProperty{ 1304 { 1305 Name: schema.PropertyName("dividendYield"), 1306 Aggregators: []aggregation.Aggregator{aggregation.MeanAggregator}, 1307 }, 1308 }, 1309 } 1310 1311 res, err := repo.Aggregate(context.Background(), params) 1312 require.Nil(t, err) 1313 1314 if exact { 1315 expectedResult := &aggregation.Result{ 1316 Groups: []aggregation.Group{ 1317 { 1318 GroupedBy: nil, 1319 Properties: map[string]aggregation.Property{ 1320 "dividendYield": { 1321 Type: aggregation.PropertyTypeNumerical, 1322 NumericalAggregations: map[string]interface{}{ 1323 "mean": 2.111111111111111, 1324 }, 1325 }, 1326 }, 1327 }, 1328 }, 1329 } 1330 1331 assert.Equal(t, expectedResult.Groups, res.Groups) 1332 } else { 1333 require.Len(t, res.Groups, 1) 1334 divYield := res.Groups[0].Properties["dividendYield"] 1335 assert.Equal(t, aggregation.PropertyTypeNumerical, divYield.Type) 1336 assert.InDelta(t, 2.1111, divYield.NumericalAggregations["mean"], 2) 1337 } 1338 }) 1339 1340 t.Run("multiple fields, multiple aggregators", func(t *testing.T) { 1341 params := aggregation.Params{ 1342 ClassName: schema.ClassName(companyClass.Class), 1343 GroupBy: nil, // explicitly set to nil, 1344 IncludeMetaCount: true, 1345 Properties: []aggregation.ParamProperty{ 1346 { 1347 Name: schema.PropertyName("dividendYield"), 1348 Aggregators: []aggregation.Aggregator{ 1349 aggregation.MeanAggregator, 1350 aggregation.MaximumAggregator, 1351 aggregation.MinimumAggregator, 1352 aggregation.SumAggregator, 1353 aggregation.ModeAggregator, 1354 aggregation.MedianAggregator, 1355 aggregation.CountAggregator, 1356 aggregation.TypeAggregator, // ignored in the repo, but can't block 1357 }, 1358 }, 1359 { 1360 Name: schema.PropertyName("price"), 1361 Aggregators: []aggregation.Aggregator{ 1362 aggregation.MeanAggregator, 1363 aggregation.MaximumAggregator, 1364 aggregation.MinimumAggregator, 1365 aggregation.SumAggregator, 1366 aggregation.ModeAggregator, 1367 aggregation.MedianAggregator, 1368 aggregation.CountAggregator, 1369 aggregation.TypeAggregator, // ignored in the repo, but can't block 1370 }, 1371 }, 1372 { 1373 Name: schema.PropertyName("listedInIndex"), 1374 Aggregators: []aggregation.Aggregator{ 1375 aggregation.PercentageTrueAggregator, 1376 aggregation.PercentageFalseAggregator, 1377 aggregation.TotalTrueAggregator, 1378 aggregation.TotalFalseAggregator, 1379 aggregation.TypeAggregator, // ignored in the repo, but can't block 1380 }, 1381 }, 1382 { 1383 Name: schema.PropertyName("location"), 1384 Aggregators: []aggregation.Aggregator{ 1385 // limit is so high, it's not really restrictive 1386 aggregation.NewTopOccurrencesAggregator(ptInt(10)), 1387 aggregation.TypeAggregator, // ignored in the repo, but can't block 1388 }, 1389 }, 1390 { 1391 Name: schema.PropertyName("sector"), 1392 Aggregators: []aggregation.Aggregator{ 1393 // limit is very restrictive 1394 aggregation.NewTopOccurrencesAggregator(ptInt(1)), 1395 aggregation.TypeAggregator, // ignored in the repo, but can't block 1396 }, 1397 }, 1398 // we are not expecting any result from the following agg, as this is 1399 // handled in the usecase. However, we at least want to make sure it 1400 // doesn't block or lead to any errors 1401 { 1402 Name: schema.PropertyName("makesProduct"), 1403 Aggregators: []aggregation.Aggregator{ 1404 aggregation.PointingToAggregator, 1405 aggregation.TypeAggregator, 1406 }, 1407 }, 1408 }, 1409 } 1410 1411 res, err := repo.Aggregate(context.Background(), params) 1412 require.Nil(t, err) 1413 1414 expectedResult := &aggregation.Result{ 1415 Groups: []aggregation.Group{ 1416 { 1417 Count: 90, // because includeMetaCount was set 1418 Properties: map[string]aggregation.Property{ 1419 "dividendYield": { 1420 Type: aggregation.PropertyTypeNumerical, 1421 NumericalAggregations: map[string]interface{}{ 1422 "mean": 2.111111111111111, 1423 "maximum": 8.0, 1424 "minimum": 0.0, 1425 "sum": 190., 1426 "mode": 1.3, 1427 "median": 1.3, 1428 "count": 90., 1429 }, 1430 }, 1431 "price": { 1432 Type: aggregation.PropertyTypeNumerical, 1433 NumericalAggregations: map[string]interface{}{ 1434 "mean": 234.11111111111111, 1435 "maximum": 800., 1436 "minimum": 10., 1437 "sum": 21070., 1438 "mode": 70., 1439 "median": 150., 1440 "count": 90., 1441 }, 1442 }, 1443 "listedInIndex": { 1444 Type: aggregation.PropertyTypeBoolean, 1445 BooleanAggregation: aggregation.Boolean{ 1446 TotalTrue: 80, 1447 TotalFalse: 10, 1448 PercentageTrue: 0.8888888888888888, 1449 PercentageFalse: 0.1111111111111111, 1450 Count: 90, 1451 }, 1452 }, 1453 "location": { 1454 Type: aggregation.PropertyTypeText, 1455 TextAggregation: aggregation.Text{ 1456 Count: 90, 1457 Items: []aggregation.TextOccurrence{ 1458 { 1459 Value: "New York", 1460 Occurs: 30, 1461 }, 1462 { 1463 Value: "Atlanta", 1464 Occurs: 20, 1465 }, 1466 { 1467 Value: "San Francisco", 1468 Occurs: 20, 1469 }, 1470 { 1471 Value: "Detroit", 1472 Occurs: 10, 1473 }, 1474 { 1475 Value: "Los Angeles", 1476 Occurs: 10, 1477 }, 1478 }, 1479 }, 1480 }, 1481 "sector": { 1482 Type: aggregation.PropertyTypeText, 1483 TextAggregation: aggregation.Text{ 1484 Count: 90, 1485 Items: []aggregation.TextOccurrence{ 1486 { 1487 Value: "Food", 1488 Occurs: 60, 1489 }, 1490 }, 1491 }, 1492 }, 1493 }, 1494 }, 1495 }, 1496 } 1497 1498 if exact { 1499 assert.Equal(t, expectedResult.Groups, res.Groups) 1500 } else { 1501 t.Run("numerical fields", func(t *testing.T) { 1502 aggs := res.Groups[0].Properties["dividendYield"].NumericalAggregations 1503 expextedAggs := expectedResult.Groups[0].Properties["dividendYield"].NumericalAggregations 1504 1505 // max, min, count, sum are always exact matches, but we need an 1506 // epsilon check because of floating point arithmetics 1507 assert.InEpsilon(t, expextedAggs["maximum"], aggs["maximum"], 0.1) 1508 assert.Equal(t, expextedAggs["minimum"], aggs["minimum"]) // equal because the result == 0 1509 assert.InEpsilon(t, expextedAggs["count"], aggs["count"], 0.1) 1510 assert.InEpsilon(t, expextedAggs["sum"], aggs["sum"], 0.1) 1511 1512 // mean, mode, median are always fuzzy 1513 assert.InDelta(t, expextedAggs["mean"], aggs["mean"], 2) 1514 assert.InDelta(t, expextedAggs["mode"], aggs["mode"], 2) 1515 assert.InDelta(t, expextedAggs["median"], aggs["median"], 2) 1516 }) 1517 1518 t.Run("int fields", func(t *testing.T) { 1519 aggs := res.Groups[0].Properties["price"].NumericalAggregations 1520 expextedAggs := expectedResult.Groups[0].Properties["price"].NumericalAggregations 1521 1522 // max, min, count, sum are always exact matches, but we need an 1523 // epsilon check because of floating point arithmetics 1524 assert.InEpsilon(t, expextedAggs["maximum"], aggs["maximum"], 0.1) 1525 assert.InEpsilon(t, expextedAggs["minimum"], aggs["minimum"], 0.1) 1526 assert.InEpsilon(t, expextedAggs["count"], aggs["count"], 0.1) 1527 assert.InEpsilon(t, expextedAggs["sum"], aggs["sum"], 0.1) 1528 1529 // mean, mode, median are always fuzzy 1530 assert.InEpsilon(t, expextedAggs["mean"], aggs["mean"], 0.5, "mean") 1531 assert.InEpsilon(t, expextedAggs["mode"], aggs["mode"], 10, "mode") 1532 assert.InEpsilon(t, expextedAggs["median"], aggs["median"], 0.5, "median") 1533 }) 1534 1535 t.Run("boolean fields", func(t *testing.T) { 1536 aggs := res.Groups[0].Properties["listedInIndex"].BooleanAggregation 1537 expectedAggs := expectedResult.Groups[0].Properties["listedInIndex"].BooleanAggregation 1538 1539 assert.InEpsilon(t, expectedAggs.TotalTrue, aggs.TotalTrue, 0.1) 1540 assert.InEpsilon(t, expectedAggs.TotalFalse, aggs.TotalFalse, 0.1) 1541 assert.InEpsilon(t, expectedAggs.PercentageTrue, aggs.PercentageTrue, 0.1) 1542 assert.InEpsilon(t, expectedAggs.PercentageFalse, aggs.PercentageFalse, 0.1) 1543 assert.InEpsilon(t, expectedAggs.Count, aggs.Count, 0.1) 1544 }) 1545 1546 t.Run("text fields (location)", func(t *testing.T) { 1547 aggs := res.Groups[0].Properties["location"].TextAggregation 1548 expectedAggs := expectedResult.Groups[0].Properties["location"].TextAggregation 1549 1550 assert.Equal(t, expectedAggs.Count, aggs.Count) 1551 assert.ElementsMatch(t, expectedAggs.Items, aggs.Items) 1552 }) 1553 t.Run("text fields (sector)", func(t *testing.T) { 1554 aggs := res.Groups[0].Properties["sector"].TextAggregation 1555 expectedAggs := expectedResult.Groups[0].Properties["sector"].TextAggregation 1556 1557 assert.Equal(t, expectedAggs.Count, aggs.Count) 1558 assert.ElementsMatch(t, expectedAggs.Items, aggs.Items) 1559 }) 1560 } 1561 }) 1562 1563 t.Run("multiple fields, multiple aggregators, single-level filter", func(t *testing.T) { 1564 if !exact { 1565 // filtering is happening inside a shard, so there is no need to test 1566 // this again for multi-sharding. This saves us from adapting all the 1567 // assertions to work with fuzzy values 1568 t.Skip() 1569 } 1570 1571 params := aggregation.Params{ 1572 ClassName: schema.ClassName(companyClass.Class), 1573 GroupBy: nil, // explicitly set to nil, 1574 Filters: sectorEqualsFoodFilter(), 1575 IncludeMetaCount: true, 1576 Properties: []aggregation.ParamProperty{ 1577 { 1578 Name: schema.PropertyName("dividendYield"), 1579 Aggregators: []aggregation.Aggregator{ 1580 aggregation.MeanAggregator, 1581 aggregation.MaximumAggregator, 1582 aggregation.MinimumAggregator, 1583 aggregation.SumAggregator, 1584 aggregation.ModeAggregator, 1585 aggregation.MedianAggregator, 1586 aggregation.CountAggregator, 1587 aggregation.TypeAggregator, // ignored in the repo, but can't block 1588 }, 1589 }, 1590 { 1591 Name: schema.PropertyName("price"), 1592 Aggregators: []aggregation.Aggregator{ 1593 aggregation.MeanAggregator, 1594 aggregation.MaximumAggregator, 1595 aggregation.MinimumAggregator, 1596 aggregation.SumAggregator, 1597 aggregation.ModeAggregator, 1598 aggregation.MedianAggregator, 1599 aggregation.CountAggregator, 1600 aggregation.TypeAggregator, // ignored in the repo, but can't block 1601 }, 1602 }, 1603 { 1604 Name: schema.PropertyName("listedInIndex"), 1605 Aggregators: []aggregation.Aggregator{ 1606 aggregation.PercentageTrueAggregator, 1607 aggregation.PercentageFalseAggregator, 1608 aggregation.TotalTrueAggregator, 1609 aggregation.TotalFalseAggregator, 1610 aggregation.TypeAggregator, // ignored in the repo, but can't block 1611 }, 1612 }, 1613 { 1614 Name: schema.PropertyName("location"), 1615 Aggregators: []aggregation.Aggregator{ 1616 // limit is so high, it's not really restrictive 1617 aggregation.NewTopOccurrencesAggregator(ptInt(10)), 1618 aggregation.TypeAggregator, // ignored in the repo, but can't block 1619 }, 1620 }, 1621 { 1622 Name: schema.PropertyName("sector"), 1623 Aggregators: []aggregation.Aggregator{ 1624 // limit is very restrictive 1625 aggregation.NewTopOccurrencesAggregator(ptInt(1)), 1626 aggregation.TypeAggregator, // ignored in the repo, but can't block 1627 }, 1628 }, 1629 // we are not expecting any result from the following agg, as this is 1630 // handled in the usecase. However, we at least want to make sure it 1631 // doesn't block or lead to any errors 1632 { 1633 Name: schema.PropertyName("makesProduct"), 1634 Aggregators: []aggregation.Aggregator{ 1635 aggregation.PointingToAggregator, 1636 aggregation.TypeAggregator, 1637 }, 1638 }, 1639 }, 1640 } 1641 1642 res, err := repo.Aggregate(context.Background(), params) 1643 require.Nil(t, err) 1644 1645 actualDivYield := res.Groups[0].Properties["dividendYield"] 1646 delete(res.Groups[0].Properties, "dividendYield") 1647 actualPrice := res.Groups[0].Properties["price"] 1648 delete(res.Groups[0].Properties, "price") 1649 actualMakesProduct := res.Groups[0].Properties["makesProduct"] 1650 delete(res.Groups[0].Properties, "makesProduct") 1651 1652 expectedDivYield := aggregation.Property{ 1653 Type: aggregation.PropertyTypeNumerical, 1654 NumericalAggregations: map[string]interface{}{ 1655 "mean": 2.066666666666666, 1656 "maximum": 8.0, 1657 "minimum": 0.0, 1658 "sum": 124, 1659 "mode": 0.0, 1660 "median": 1.2, 1661 "count": 60., 1662 }, 1663 } 1664 1665 expectedPrice := aggregation.Property{ 1666 Type: aggregation.PropertyTypeNumerical, 1667 NumericalAggregations: map[string]interface{}{ 1668 "mean": 218.33333333333334, 1669 "maximum": 800., 1670 "minimum": 10., 1671 "sum": 13100., 1672 "mode": 70., 1673 "median": 115., 1674 "count": 60., 1675 }, 1676 } 1677 1678 expectedMakesProduct := aggregation.Property{ 1679 Type: aggregation.PropertyTypeReference, 1680 ReferenceAggregation: aggregation.Reference{ 1681 PointingTo: []string{"weaviate://localhost/1295c052-263d-4aae-99dd-920c5a370d06"}, 1682 }, 1683 } 1684 1685 expectedResult := &aggregation.Result{ 1686 Groups: []aggregation.Group{ 1687 { 1688 Count: 60, // because includeMetaCount was set 1689 Properties: map[string]aggregation.Property{ 1690 "listedInIndex": { 1691 Type: aggregation.PropertyTypeBoolean, 1692 BooleanAggregation: aggregation.Boolean{ 1693 TotalTrue: 50, 1694 TotalFalse: 10, 1695 PercentageTrue: 0.8333333333333334, 1696 PercentageFalse: 0.16666666666666666, 1697 Count: 60, 1698 }, 1699 }, 1700 "location": { 1701 Type: aggregation.PropertyTypeText, 1702 TextAggregation: aggregation.Text{ 1703 Count: 60, 1704 Items: []aggregation.TextOccurrence{ 1705 { 1706 Value: "Atlanta", 1707 Occurs: 20, 1708 }, 1709 { 1710 Value: "Detroit", 1711 Occurs: 10, 1712 }, 1713 { 1714 Value: "Los Angeles", 1715 Occurs: 10, 1716 }, 1717 { 1718 Value: "New York", 1719 Occurs: 10, 1720 }, 1721 { 1722 Value: "San Francisco", 1723 Occurs: 10, 1724 }, 1725 }, 1726 }, 1727 }, 1728 "sector": { 1729 Type: aggregation.PropertyTypeText, 1730 TextAggregation: aggregation.Text{ 1731 Count: 60, 1732 Items: []aggregation.TextOccurrence{ 1733 { 1734 Value: "Food", 1735 Occurs: 60, 1736 }, 1737 }, 1738 }, 1739 }, 1740 }, 1741 }, 1742 }, 1743 } 1744 1745 assert.Equal(t, expectedResult.Groups, res.Groups) 1746 1747 // floating point arithmetic for numerical fields 1748 1749 assert.InEpsilon(t, expectedDivYield.NumericalAggregations["mean"], 1750 actualDivYield.NumericalAggregations["mean"], 0.1) 1751 assert.InEpsilon(t, expectedPrice.NumericalAggregations["mean"], 1752 actualPrice.NumericalAggregations["mean"], 0.1) 1753 1754 assert.InEpsilon(t, expectedDivYield.NumericalAggregations["maximum"], 1755 actualDivYield.NumericalAggregations["maximum"], 0.1) 1756 assert.InEpsilon(t, expectedPrice.NumericalAggregations["maximum"], 1757 actualPrice.NumericalAggregations["maximum"], 0.1) 1758 1759 assert.Equal(t, expectedDivYield.NumericalAggregations["minimum"], 1760 actualDivYield.NumericalAggregations["minimum"]) 1761 assert.Equal(t, expectedPrice.NumericalAggregations["minimum"], 1762 actualPrice.NumericalAggregations["minimum"]) 1763 1764 assert.Equal(t, expectedDivYield.NumericalAggregations["mode"], 1765 actualDivYield.NumericalAggregations["mode"]) 1766 assert.Equal(t, expectedPrice.NumericalAggregations["mode"], 1767 actualPrice.NumericalAggregations["mode"]) 1768 1769 assert.InEpsilon(t, expectedDivYield.NumericalAggregations["median"], 1770 actualDivYield.NumericalAggregations["median"], 0.1) 1771 assert.InEpsilon(t, expectedPrice.NumericalAggregations["median"], 1772 actualPrice.NumericalAggregations["median"], 0.1) 1773 1774 assert.InEpsilon(t, expectedDivYield.NumericalAggregations["count"], 1775 actualDivYield.NumericalAggregations["count"], 0.1) 1776 assert.InEpsilon(t, expectedPrice.NumericalAggregations["count"], 1777 actualPrice.NumericalAggregations["count"], 0.1) 1778 1779 assert.Equal(t, expectedMakesProduct.ReferenceAggregation.PointingTo, 1780 actualMakesProduct.ReferenceAggregation.PointingTo) 1781 }) 1782 1783 t.Run("multiple fields, multiple aggregators, ref filter", func(t *testing.T) { 1784 if !exact { 1785 // filtering is happening inside a shard, so there is no need to test 1786 // this again for multi-sharding. This saves us from adapting all the 1787 // assertions to work with fuzzy values 1788 t.Skip() 1789 } 1790 1791 params := aggregation.Params{ 1792 ClassName: schema.ClassName(companyClass.Class), 1793 GroupBy: nil, // explicitly set to nil, 1794 Filters: &filters.LocalFilter{ 1795 Root: &filters.Clause{ 1796 Operator: filters.OperatorEqual, 1797 Value: &filters.Value{ 1798 Type: schema.DataTypeText, 1799 Value: "Superbread", 1800 }, 1801 On: &filters.Path{ 1802 Property: "makesProduct", 1803 Child: &filters.Path{ 1804 Class: "AggregationsTestProduct", 1805 Property: "name", 1806 }, 1807 }, 1808 }, 1809 }, 1810 IncludeMetaCount: true, 1811 Properties: []aggregation.ParamProperty{ 1812 { 1813 Name: schema.PropertyName("dividendYield"), 1814 Aggregators: []aggregation.Aggregator{ 1815 aggregation.MeanAggregator, 1816 aggregation.MaximumAggregator, 1817 aggregation.MinimumAggregator, 1818 aggregation.SumAggregator, 1819 aggregation.ModeAggregator, 1820 aggregation.MedianAggregator, 1821 aggregation.CountAggregator, 1822 aggregation.TypeAggregator, // ignored in the repo, but can't block 1823 }, 1824 }, 1825 { 1826 Name: schema.PropertyName("price"), 1827 Aggregators: []aggregation.Aggregator{ 1828 aggregation.MeanAggregator, 1829 aggregation.MaximumAggregator, 1830 aggregation.MinimumAggregator, 1831 aggregation.SumAggregator, 1832 aggregation.ModeAggregator, 1833 aggregation.MedianAggregator, 1834 aggregation.CountAggregator, 1835 aggregation.TypeAggregator, // ignored in the repo, but can't block 1836 }, 1837 }, 1838 { 1839 Name: schema.PropertyName("listedInIndex"), 1840 Aggregators: []aggregation.Aggregator{ 1841 aggregation.PercentageTrueAggregator, 1842 aggregation.PercentageFalseAggregator, 1843 aggregation.TotalTrueAggregator, 1844 aggregation.TotalFalseAggregator, 1845 aggregation.TypeAggregator, // ignored in the repo, but can't block 1846 }, 1847 }, 1848 { 1849 Name: schema.PropertyName("location"), 1850 Aggregators: []aggregation.Aggregator{ 1851 // limit is so high, it's not really restrictive 1852 aggregation.NewTopOccurrencesAggregator(ptInt(10)), 1853 aggregation.TypeAggregator, // ignored in the repo, but can't block 1854 }, 1855 }, 1856 { 1857 Name: schema.PropertyName("sector"), 1858 Aggregators: []aggregation.Aggregator{ 1859 // limit is very restrictive 1860 aggregation.NewTopOccurrencesAggregator(ptInt(1)), 1861 aggregation.TypeAggregator, // ignored in the repo, but can't block 1862 }, 1863 }, 1864 // we are not expecting any result from the following agg, as this is 1865 // handled in the usecase. However, we at least want to make sure it 1866 // doesn't block or lead to any errors 1867 { 1868 Name: schema.PropertyName("makesProduct"), 1869 Aggregators: []aggregation.Aggregator{ 1870 aggregation.PointingToAggregator, 1871 aggregation.TypeAggregator, 1872 }, 1873 }, 1874 }, 1875 } 1876 1877 res, err := repo.Aggregate(context.Background(), params) 1878 require.Nil(t, err) 1879 1880 expectedResult := &aggregation.Result{ 1881 Groups: []aggregation.Group{ 1882 { 1883 Count: 10, 1884 Properties: map[string]aggregation.Property{ 1885 "makesProduct": { 1886 Type: aggregation.PropertyTypeReference, 1887 ReferenceAggregation: aggregation.Reference{PointingTo: []string{"weaviate://localhost/1295c052-263d-4aae-99dd-920c5a370d06"}}, 1888 }, 1889 "dividendYield": { 1890 Type: aggregation.PropertyTypeNumerical, 1891 NumericalAggregations: map[string]interface{}{ 1892 "mean": 8.0, 1893 "maximum": 8.0, 1894 "minimum": 8.0, 1895 "sum": 80., 1896 "mode": 8.0, 1897 "median": 8.0, 1898 "count": 10., 1899 }, 1900 }, 1901 "price": { 1902 Type: aggregation.PropertyTypeNumerical, 1903 NumericalAggregations: map[string]interface{}{ 1904 "mean": 10., 1905 "maximum": 10., 1906 "minimum": 10., 1907 "sum": 100., 1908 "mode": 10., 1909 "median": 10., 1910 "count": 10., 1911 }, 1912 }, 1913 "listedInIndex": { 1914 Type: aggregation.PropertyTypeBoolean, 1915 BooleanAggregation: aggregation.Boolean{ 1916 TotalTrue: 10, 1917 TotalFalse: 0, 1918 PercentageTrue: 1, 1919 PercentageFalse: 0, 1920 Count: 10, 1921 }, 1922 }, 1923 "location": { 1924 Type: aggregation.PropertyTypeText, 1925 TextAggregation: aggregation.Text{ 1926 Count: 10, 1927 Items: []aggregation.TextOccurrence{ 1928 { 1929 Value: "Detroit", 1930 Occurs: 10, 1931 }, 1932 }, 1933 }, 1934 }, 1935 "sector": { 1936 Type: aggregation.PropertyTypeText, 1937 TextAggregation: aggregation.Text{ 1938 Count: 10, 1939 Items: []aggregation.TextOccurrence{ 1940 { 1941 Value: "Food", 1942 Occurs: 10, 1943 }, 1944 }, 1945 }, 1946 }, 1947 }, 1948 }, 1949 }, 1950 } 1951 1952 assert.Equal(t, expectedResult.Groups, res.Groups) 1953 }) 1954 1955 t.Run("array types, only meta count, no other aggregations", func(t *testing.T) { 1956 params := aggregation.Params{ 1957 ClassName: schema.ClassName(arrayTypesClass.Class), 1958 IncludeMetaCount: true, 1959 GroupBy: nil, // explicitly set to nil 1960 } 1961 1962 res, err := repo.Aggregate(context.Background(), params) 1963 require.Nil(t, err) 1964 1965 expectedResult := &aggregation.Result{ 1966 Groups: []aggregation.Group{ 1967 { 1968 GroupedBy: nil, 1969 Count: 2, 1970 }, 1971 }, 1972 } 1973 1974 require.NotNil(t, res) 1975 assert.Equal(t, expectedResult.Groups, res.Groups) 1976 }) 1977 1978 t.Run("array types, single aggregator numbers", func(t *testing.T) { 1979 params := aggregation.Params{ 1980 ClassName: schema.ClassName(arrayTypesClass.Class), 1981 GroupBy: nil, // explicitly set to nil 1982 Properties: []aggregation.ParamProperty{ 1983 { 1984 Name: schema.PropertyName("numbers"), 1985 Aggregators: []aggregation.Aggregator{ 1986 aggregation.MeanAggregator, 1987 aggregation.MaximumAggregator, 1988 aggregation.MinimumAggregator, 1989 aggregation.SumAggregator, 1990 aggregation.ModeAggregator, 1991 aggregation.MedianAggregator, 1992 aggregation.CountAggregator, 1993 aggregation.TypeAggregator, // ignored in the repo, but can't block 1994 }, 1995 }, 1996 }, 1997 } 1998 1999 res, err := repo.Aggregate(context.Background(), params) 2000 require.Nil(t, err) 2001 2002 expectedResult := &aggregation.Result{ 2003 Groups: []aggregation.Group{ 2004 { 2005 GroupedBy: nil, 2006 Properties: map[string]aggregation.Property{ 2007 "numbers": { 2008 Type: aggregation.PropertyTypeNumerical, 2009 NumericalAggregations: map[string]interface{}{ 2010 "mean": 2.0, 2011 "maximum": 3.0, 2012 "minimum": 1.0, 2013 "sum": 14.0, 2014 "mode": 2.0, 2015 "median": 2.0, 2016 "count": 7., 2017 }, 2018 }, 2019 }, 2020 }, 2021 }, 2022 } 2023 2024 assert.Equal(t, expectedResult.Groups, res.Groups) 2025 }) 2026 2027 t.Run("array types, single aggregator strings", func(t *testing.T) { 2028 if !exact { 2029 t.Skip() 2030 } 2031 params := aggregation.Params{ 2032 ClassName: schema.ClassName(arrayTypesClass.Class), 2033 GroupBy: nil, // explicitly set to nil 2034 Properties: []aggregation.ParamProperty{ 2035 { 2036 Name: schema.PropertyName("strings"), 2037 Aggregators: []aggregation.Aggregator{ 2038 // limit is very restrictive 2039 aggregation.NewTopOccurrencesAggregator(ptInt(1)), 2040 aggregation.TypeAggregator, // ignored in the repo, but can't block 2041 }, 2042 }, 2043 }, 2044 } 2045 2046 res, err := repo.Aggregate(context.Background(), params) 2047 require.Nil(t, err) 2048 2049 expectedResult := &aggregation.Result{ 2050 Groups: []aggregation.Group{ 2051 { 2052 GroupedBy: nil, 2053 Properties: map[string]aggregation.Property{ 2054 "strings": { 2055 Type: aggregation.PropertyTypeText, 2056 TextAggregation: aggregation.Text{ 2057 Count: 4, 2058 Items: []aggregation.TextOccurrence{ 2059 { 2060 Value: "a", 2061 Occurs: 2, 2062 }, 2063 }, 2064 }, 2065 }, 2066 }, 2067 }, 2068 }, 2069 } 2070 2071 assert.Equal(t, expectedResult.Groups, res.Groups) 2072 }) 2073 } 2074 } 2075 2076 func testDateAggregationsWithGrouping(repo *DB, exact bool) func(t *testing.T) { 2077 return func(t *testing.T) { 2078 t.Run("group on only unique values", func(t *testing.T) { 2079 params := aggregation.Params{ 2080 ClassName: schema.ClassName(customerClass.Class), 2081 IncludeMetaCount: true, 2082 GroupBy: &filters.Path{ 2083 Class: schema.ClassName(customerClass.Class), 2084 // Each customer obj has a unique value for the `internalId` field 2085 Property: schema.PropertyName("internalId"), 2086 }, 2087 } 2088 2089 res, err := repo.Aggregate(context.Background(), params) 2090 require.Nil(t, err) 2091 2092 require.NotNil(t, res) 2093 assert.Len(t, res.Groups, len(customers)) 2094 }) 2095 2096 t.Run("group on only identical values", func(t *testing.T) { 2097 params := aggregation.Params{ 2098 ClassName: schema.ClassName(customerClass.Class), 2099 IncludeMetaCount: true, 2100 GroupBy: &filters.Path{ 2101 Class: schema.ClassName(customerClass.Class), 2102 // Each customer obj has the same value for the `countryOfOrigin` field 2103 Property: schema.PropertyName("countryOfOrigin"), 2104 }, 2105 Properties: []aggregation.ParamProperty{ 2106 { 2107 Name: "timeArrived", 2108 Aggregators: []aggregation.Aggregator{ 2109 aggregation.CountAggregator, 2110 aggregation.MinimumAggregator, 2111 aggregation.MaximumAggregator, 2112 aggregation.MedianAggregator, 2113 aggregation.ModeAggregator, 2114 }, 2115 }, 2116 }, 2117 } 2118 2119 res, err := repo.Aggregate(context.Background(), params) 2120 require.Nil(t, err) 2121 2122 require.NotNil(t, res) 2123 assert.Len(t, res.Groups, 1) 2124 2125 expectedProperties := map[string]interface{}{ 2126 "count": int64(10), 2127 "minimum": "2022-06-16T17:30:17.231346Z", 2128 "maximum": "2022-06-16T17:30:26.451235Z", 2129 "median": "2022-06-16T17:30:21.1179905Z", 2130 "mode": "2022-06-16T17:30:17.231346Z", 2131 } 2132 receivedProperties := res.Groups[0].Properties["timeArrived"].DateAggregations 2133 assert.EqualValues(t, expectedProperties, receivedProperties) 2134 }) 2135 2136 t.Run("group on some unique values", func(t *testing.T) { 2137 params := aggregation.Params{ 2138 ClassName: schema.ClassName(customerClass.Class), 2139 IncludeMetaCount: true, 2140 GroupBy: &filters.Path{ 2141 Class: schema.ClassName(customerClass.Class), 2142 // should result in two groups due to bool value 2143 Property: schema.PropertyName("isNewCustomer"), 2144 }, 2145 Properties: []aggregation.ParamProperty{ 2146 { 2147 Name: "timeArrived", 2148 Aggregators: []aggregation.Aggregator{ 2149 aggregation.CountAggregator, 2150 aggregation.MinimumAggregator, 2151 aggregation.MaximumAggregator, 2152 aggregation.MedianAggregator, 2153 aggregation.ModeAggregator, 2154 }, 2155 }, 2156 }, 2157 } 2158 2159 res, err := repo.Aggregate(context.Background(), params) 2160 require.Nil(t, err) 2161 2162 require.NotNil(t, res) 2163 assert.Len(t, res.Groups, 2) 2164 2165 expectedResult := []aggregation.Group{ 2166 { 2167 Properties: map[string]aggregation.Property{ 2168 "timeArrived": { 2169 Type: "date", 2170 DateAggregations: map[string]interface{}{ 2171 "count": int64(6), 2172 "maximum": "2022-06-16T17:30:25.524536Z", 2173 "median": "2022-06-16T17:30:19.6718905Z", 2174 "minimum": "2022-06-16T17:30:17.231346Z", 2175 "mode": "2022-06-16T17:30:17.231346Z", 2176 }, 2177 }, 2178 }, 2179 GroupedBy: &aggregation.GroupedBy{ 2180 Value: false, 2181 Path: []string{"isNewCustomer"}, 2182 }, 2183 Count: 6, 2184 }, 2185 { 2186 Properties: map[string]aggregation.Property{ 2187 "timeArrived": { 2188 Type: "date", 2189 DateAggregations: map[string]interface{}{ 2190 "count": int64(4), 2191 "maximum": "2022-06-16T17:30:26.451235Z", 2192 "median": "2022-06-16T17:30:22.224622Z", 2193 "minimum": "2022-06-16T17:30:20.123546Z", 2194 "mode": "2022-06-16T17:30:20.123546Z", 2195 }, 2196 }, 2197 }, 2198 GroupedBy: &aggregation.GroupedBy{ 2199 Value: true, 2200 Path: []string{"isNewCustomer"}, 2201 }, 2202 Count: 4, 2203 }, 2204 } 2205 2206 assert.EqualValues(t, expectedResult, res.Groups) 2207 }) 2208 } 2209 } 2210 2211 func testDateAggregationsWithoutGrouping(repo *DB, exact bool) func(t *testing.T) { 2212 return func(t *testing.T) { 2213 t.Run("without grouping", func(t *testing.T) { 2214 params := aggregation.Params{ 2215 ClassName: schema.ClassName(customerClass.Class), 2216 GroupBy: nil, 2217 Properties: []aggregation.ParamProperty{ 2218 { 2219 Name: "timeArrived", 2220 Aggregators: []aggregation.Aggregator{ 2221 aggregation.CountAggregator, 2222 aggregation.MinimumAggregator, 2223 aggregation.MaximumAggregator, 2224 aggregation.MedianAggregator, 2225 aggregation.ModeAggregator, 2226 }, 2227 }, 2228 }, 2229 } 2230 2231 res, err := repo.Aggregate(context.Background(), params) 2232 require.Nil(t, err) 2233 2234 require.NotNil(t, res) 2235 require.Len(t, res.Groups, 1) 2236 }) 2237 } 2238 } 2239 2240 func ptInt(in int) *int { 2241 return &in 2242 } 2243 2244 func sectorEqualsFoodFilter() *filters.LocalFilter { 2245 return &filters.LocalFilter{ 2246 Root: &filters.Clause{ 2247 Operator: filters.OperatorEqual, 2248 On: &filters.Path{ 2249 Class: "Company", 2250 Property: "sector", 2251 }, 2252 Value: &filters.Value{ 2253 Value: "Food", 2254 Type: schema.DataTypeText, 2255 }, 2256 }, 2257 } 2258 } 2259 2260 func mustStringToTime(s string) time.Time { 2261 asTime, err := time.ParseInLocation(time.RFC3339Nano, s, time.UTC) 2262 if err != nil { 2263 panic(fmt.Sprintf("failed to parse time: %s, %s", s, err)) 2264 } 2265 return asTime 2266 }