github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/querier/astmapper/shard_summer.go (about)

     1  package astmapper
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"strconv"
     7  	"strings"
     8  
     9  	"github.com/pkg/errors"
    10  	"github.com/prometheus/client_golang/prometheus"
    11  	"github.com/prometheus/common/model"
    12  	"github.com/prometheus/prometheus/model/labels"
    13  	"github.com/prometheus/prometheus/promql/parser"
    14  
    15  	"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
    16  )
    17  
    18  const (
    19  	// ShardLabel is a reserved label referencing a cortex shard
    20  	ShardLabel = "__cortex_shard__"
    21  	// ShardLabelFmt is the fmt of the ShardLabel key.
    22  	ShardLabelFmt = "%d_of_%d"
    23  )
    24  
    25  var (
    26  	// ShardLabelRE matches a value in ShardLabelFmt
    27  	ShardLabelRE = regexp.MustCompile("^[0-9]+_of_[0-9]+$")
    28  )
    29  
    30  type squasher = func(...parser.Node) (parser.Expr, error)
    31  
    32  type shardSummer struct {
    33  	shards       int
    34  	currentShard *int
    35  	squash       squasher
    36  
    37  	// Metrics.
    38  	shardedQueries prometheus.Counter
    39  }
    40  
    41  // NewShardSummer instantiates an ASTMapper which will fan out sum queries by shard
    42  func NewShardSummer(shards int, squasher squasher, shardedQueries prometheus.Counter) (ASTMapper, error) {
    43  	if squasher == nil {
    44  		return nil, errors.Errorf("squasher required and not passed")
    45  	}
    46  
    47  	return NewASTNodeMapper(&shardSummer{
    48  		shards:         shards,
    49  		squash:         squasher,
    50  		currentShard:   nil,
    51  		shardedQueries: shardedQueries,
    52  	}), nil
    53  }
    54  
    55  // CopyWithCurShard clones a shardSummer with a new current shard.
    56  func (summer *shardSummer) CopyWithCurShard(curshard int) *shardSummer {
    57  	s := *summer
    58  	s.currentShard = &curshard
    59  	return &s
    60  }
    61  
    62  // shardSummer expands a query AST by sharding and re-summing when possible
    63  func (summer *shardSummer) MapNode(node parser.Node) (parser.Node, bool, error) {
    64  
    65  	switch n := node.(type) {
    66  	case *parser.AggregateExpr:
    67  		if CanParallelize(n) && n.Op == parser.SUM {
    68  			result, err := summer.shardSum(n)
    69  			return result, true, err
    70  		}
    71  
    72  		return n, false, nil
    73  
    74  	case *parser.VectorSelector:
    75  		if summer.currentShard != nil {
    76  			mapped, err := shardVectorSelector(*summer.currentShard, summer.shards, n)
    77  			return mapped, true, err
    78  		}
    79  		return n, true, nil
    80  
    81  	case *parser.MatrixSelector:
    82  		if summer.currentShard != nil {
    83  			mapped, err := shardMatrixSelector(*summer.currentShard, summer.shards, n)
    84  			return mapped, true, err
    85  		}
    86  		return n, true, nil
    87  
    88  	default:
    89  		return n, false, nil
    90  	}
    91  }
    92  
    93  // shardSum contains the logic for how we split/stitch legs of a parallelized sum query
    94  func (summer *shardSummer) shardSum(expr *parser.AggregateExpr) (parser.Node, error) {
    95  
    96  	parent, subSums, err := summer.splitSum(expr)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  
   101  	combinedSums, err := summer.squash(subSums...)
   102  
   103  	if err != nil {
   104  		return nil, err
   105  	}
   106  
   107  	parent.Expr = combinedSums
   108  	return parent, nil
   109  }
   110  
   111  // splitSum forms the parent and child legs of a parallel query
   112  func (summer *shardSummer) splitSum(
   113  	expr *parser.AggregateExpr,
   114  ) (
   115  	parent *parser.AggregateExpr,
   116  	children []parser.Node,
   117  	err error,
   118  ) {
   119  	parent = &parser.AggregateExpr{
   120  		Op:    expr.Op,
   121  		Param: expr.Param,
   122  	}
   123  	var mkChild func(sharded *parser.AggregateExpr) parser.Expr
   124  
   125  	if expr.Without {
   126  		/*
   127  			parallelizing a sum using without(foo) is representable naively as
   128  			sum without(foo) (
   129  			  sum without(__cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
   130  			  sum without(__cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
   131  			)
   132  			or (more optimized):
   133  			sum without(__cortex_shard__) (
   134  			  sum without(foo) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
   135  			  sum without(foo) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
   136  			)
   137  
   138  		*/
   139  		parent.Grouping = []string{ShardLabel}
   140  		parent.Without = true
   141  		mkChild = func(sharded *parser.AggregateExpr) parser.Expr {
   142  			sharded.Grouping = expr.Grouping
   143  			sharded.Without = true
   144  			return sharded
   145  		}
   146  	} else if len(expr.Grouping) > 0 {
   147  		/*
   148  			parallelizing a sum using by(foo) is representable as
   149  			sum by(foo) (
   150  			  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
   151  			  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
   152  			)
   153  		*/
   154  		parent.Grouping = expr.Grouping
   155  		mkChild = func(sharded *parser.AggregateExpr) parser.Expr {
   156  			groups := make([]string, 0, len(expr.Grouping)+1)
   157  			groups = append(groups, expr.Grouping...)
   158  			groups = append(groups, ShardLabel)
   159  			sharded.Grouping = groups
   160  			return sharded
   161  		}
   162  	} else {
   163  		/*
   164  			parallelizing a non-parameterized sum is representable as
   165  			sum(
   166  			  sum without(__cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
   167  			  sum without(__cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
   168  			)
   169  			or (more optimized):
   170  			sum without(__cortex_shard__) (
   171  			  sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
   172  			  sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
   173  			)
   174  		*/
   175  		parent.Grouping = []string{ShardLabel}
   176  		parent.Without = true
   177  		mkChild = func(sharded *parser.AggregateExpr) parser.Expr {
   178  			sharded.Grouping = []string{ShardLabel}
   179  			return sharded
   180  		}
   181  	}
   182  
   183  	// iterate across shardFactor to create children
   184  	for i := 0; i < summer.shards; i++ {
   185  		cloned, err := CloneNode(expr.Expr)
   186  		if err != nil {
   187  			return parent, children, err
   188  		}
   189  
   190  		subSummer := NewASTNodeMapper(summer.CopyWithCurShard(i))
   191  		sharded, err := subSummer.Map(cloned)
   192  		if err != nil {
   193  			return parent, children, err
   194  		}
   195  
   196  		subSum := mkChild(&parser.AggregateExpr{
   197  			Op:   expr.Op,
   198  			Expr: sharded.(parser.Expr),
   199  		})
   200  
   201  		children = append(children,
   202  			subSum,
   203  		)
   204  	}
   205  
   206  	summer.recordShards(float64(summer.shards))
   207  
   208  	return parent, children, nil
   209  }
   210  
   211  // ShardSummer is explicitly passed a prometheus.Counter during construction
   212  // in order to prevent duplicate metric registerings (ShardSummers are created per request).
   213  //recordShards prevents calling nil interfaces (commonly used in tests).
   214  func (summer *shardSummer) recordShards(_ float64) {
   215  	if summer.shardedQueries != nil {
   216  		summer.shardedQueries.Add(float64(summer.shards))
   217  	}
   218  }
   219  
   220  func shardVectorSelector(curshard, shards int, selector *parser.VectorSelector) (parser.Node, error) {
   221  	shardMatcher, err := labels.NewMatcher(labels.MatchEqual, ShardLabel, fmt.Sprintf(ShardLabelFmt, curshard, shards))
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  
   226  	return &parser.VectorSelector{
   227  		Name:   selector.Name,
   228  		Offset: selector.Offset,
   229  		LabelMatchers: append(
   230  			[]*labels.Matcher{shardMatcher},
   231  			selector.LabelMatchers...,
   232  		),
   233  	}, nil
   234  }
   235  
   236  func shardMatrixSelector(curshard, shards int, selector *parser.MatrixSelector) (parser.Node, error) {
   237  	shardMatcher, err := labels.NewMatcher(labels.MatchEqual, ShardLabel, fmt.Sprintf(ShardLabelFmt, curshard, shards))
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  
   242  	if vs, ok := selector.VectorSelector.(*parser.VectorSelector); ok {
   243  		return &parser.MatrixSelector{
   244  			VectorSelector: &parser.VectorSelector{
   245  				Name:   vs.Name,
   246  				Offset: vs.Offset,
   247  				LabelMatchers: append(
   248  					[]*labels.Matcher{shardMatcher},
   249  					vs.LabelMatchers...,
   250  				),
   251  				PosRange: vs.PosRange,
   252  			},
   253  			Range:  selector.Range,
   254  			EndPos: selector.EndPos,
   255  		}, nil
   256  	}
   257  
   258  	return nil, fmt.Errorf("invalid selector type: %T", selector.VectorSelector)
   259  }
   260  
   261  // ParseShard will extract the shard information encoded in ShardLabelFmt
   262  func ParseShard(input string) (parsed ShardAnnotation, err error) {
   263  	if !ShardLabelRE.MatchString(input) {
   264  		return parsed, errors.Errorf("Invalid ShardLabel value: [%s]", input)
   265  	}
   266  
   267  	matches := strings.Split(input, "_")
   268  	x, err := strconv.Atoi(matches[0])
   269  	if err != nil {
   270  		return parsed, err
   271  	}
   272  	of, err := strconv.Atoi(matches[2])
   273  	if err != nil {
   274  		return parsed, err
   275  	}
   276  
   277  	if x >= of {
   278  		return parsed, errors.Errorf("Shards out of bounds: [%d] >= [%d]", x, of)
   279  	}
   280  	return ShardAnnotation{
   281  		Shard: x,
   282  		Of:    of,
   283  	}, err
   284  }
   285  
   286  // ShardAnnotation is a convenience struct which holds data from a parsed shard label
   287  type ShardAnnotation struct {
   288  	Shard int
   289  	Of    int
   290  }
   291  
   292  func (shard ShardAnnotation) Match(fp model.Fingerprint) bool {
   293  	return uint64(fp)%uint64(shard.Of) == uint64(shard.Shard)
   294  }
   295  
   296  // String encodes a shardAnnotation into a label value
   297  func (shard ShardAnnotation) String() string {
   298  	return fmt.Sprintf(ShardLabelFmt, shard.Shard, shard.Of)
   299  }
   300  
   301  // Label generates the ShardAnnotation as a label
   302  func (shard ShardAnnotation) Label() labels.Label {
   303  	return labels.Label{
   304  		Name:  ShardLabel,
   305  		Value: shard.String(),
   306  	}
   307  }
   308  
   309  func (shard ShardAnnotation) TSDB() index.ShardAnnotation {
   310  	return index.NewShard(uint32(shard.Shard), uint32(shard.Of))
   311  }
   312  
   313  // ShardFromMatchers extracts a ShardAnnotation and the index it was pulled from in the matcher list
   314  func ShardFromMatchers(matchers []*labels.Matcher) (shard *ShardAnnotation, idx int, err error) {
   315  	for i, matcher := range matchers {
   316  		if matcher.Name == ShardLabel && matcher.Type == labels.MatchEqual {
   317  			shard, err := ParseShard(matcher.Value)
   318  			if err != nil {
   319  				return nil, i, err
   320  			}
   321  			return &shard, i, nil
   322  		}
   323  	}
   324  	return nil, 0, nil
   325  }