github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/aggregator/grouped.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package aggregator
    13  
    14  import (
    15  	"context"
    16  
    17  	"github.com/pkg/errors"
    18  	"github.com/weaviate/weaviate/entities/aggregation"
    19  )
    20  
    21  // groupedAggregator performs aggregation in groups. This is a two-step
    22  // process. First a whole-db scan is performed to identify the groups, then
    23  // the top-n groups are selected (the rest is discarded). Only for those top
    24  // groups an actual aggregation is performed
    25  type groupedAggregator struct {
    26  	*Aggregator
    27  }
    28  
    29  func newGroupedAggregator(agg *Aggregator) *groupedAggregator {
    30  	return &groupedAggregator{Aggregator: agg}
    31  }
    32  
    33  func (ga *groupedAggregator) Do(ctx context.Context) (*aggregation.Result, error) {
    34  	out := aggregation.Result{}
    35  
    36  	groups, err := ga.identifyGroups(ctx)
    37  	if err != nil {
    38  		return nil, errors.Wrap(err, "identify groups")
    39  	}
    40  
    41  	out.Groups = make([]aggregation.Group, len(groups))
    42  	for i, g := range groups {
    43  		res, err := ga.aggregateGroup(ctx, g.res, g.docIDs)
    44  		if err != nil {
    45  			return nil, errors.Wrapf(err, "aggregate group %d (%v)", i,
    46  				g.res.GroupedBy.Value)
    47  		}
    48  		out.Groups[i] = res
    49  	}
    50  
    51  	return &out, nil
    52  }
    53  
    54  // group is a helper construct that contains the final aggregation.Group which
    55  // will eventually be served to the user. But it also contains the list of
    56  // docIDs in that group, so we can use those to perform the actual aggregation
    57  // (for each group) in a second step
    58  type group struct {
    59  	res    aggregation.Group
    60  	docIDs []uint64
    61  }
    62  
    63  func (ga *groupedAggregator) identifyGroups(ctx context.Context) ([]group, error) {
    64  	limit := 100 // reasonable default in case we get none
    65  	if ga.params.Limit != nil {
    66  		limit = *ga.params.Limit
    67  	}
    68  	return newGrouper(ga.Aggregator, limit).Do(ctx)
    69  }
    70  
    71  func (ga *groupedAggregator) aggregateGroup(ctx context.Context,
    72  	in aggregation.Group, ids []uint64,
    73  ) (aggregation.Group, error) {
    74  	out := in
    75  	fa := newFilteredAggregator(ga.Aggregator)
    76  	props, err := fa.properties(ctx, ids)
    77  	if err != nil {
    78  		return out, errors.Wrap(err, "aggregate properties")
    79  	}
    80  
    81  	out.Properties = props
    82  	return out, nil
    83  }