github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/aggregator/grouped.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package aggregator 13 14 import ( 15 "context" 16 17 "github.com/pkg/errors" 18 "github.com/weaviate/weaviate/entities/aggregation" 19 ) 20 21 // groupedAggregator performs aggregation in groups. This is a two-step 22 // process. First a whole-db scan is performed to identify the groups, then 23 // the top-n groups are selected (the rest is discarded). Only for those top 24 // groups an actual aggregation is performed 25 type groupedAggregator struct { 26 *Aggregator 27 } 28 29 func newGroupedAggregator(agg *Aggregator) *groupedAggregator { 30 return &groupedAggregator{Aggregator: agg} 31 } 32 33 func (ga *groupedAggregator) Do(ctx context.Context) (*aggregation.Result, error) { 34 out := aggregation.Result{} 35 36 groups, err := ga.identifyGroups(ctx) 37 if err != nil { 38 return nil, errors.Wrap(err, "identify groups") 39 } 40 41 out.Groups = make([]aggregation.Group, len(groups)) 42 for i, g := range groups { 43 res, err := ga.aggregateGroup(ctx, g.res, g.docIDs) 44 if err != nil { 45 return nil, errors.Wrapf(err, "aggregate group %d (%v)", i, 46 g.res.GroupedBy.Value) 47 } 48 out.Groups[i] = res 49 } 50 51 return &out, nil 52 } 53 54 // group is a helper construct that contains the final aggregation.Group which 55 // will eventually be served to the user. But it also contains the list of 56 // docIDs in that group, so we can use those to perform the actual aggregation 57 // (for each group) in a second step 58 type group struct { 59 res aggregation.Group 60 docIDs []uint64 61 } 62 63 func (ga *groupedAggregator) identifyGroups(ctx context.Context) ([]group, error) { 64 limit := 100 // reasonable default in case we get none 65 if ga.params.Limit != nil { 66 limit = *ga.params.Limit 67 } 68 return newGrouper(ga.Aggregator, limit).Do(ctx) 69 } 70 71 func (ga *groupedAggregator) aggregateGroup(ctx context.Context, 72 in aggregation.Group, ids []uint64, 73 ) (aggregation.Group, error) { 74 out := in 75 fa := newFilteredAggregator(ga.Aggregator) 76 props, err := fa.properties(ctx, ids) 77 if err != nil { 78 return out, errors.Wrap(err, "aggregate properties") 79 } 80 81 out.Properties = props 82 return out, nil 83 }