github.com/thanos-io/thanos@v0.32.5/test/e2e/rules_api_test.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package e2e_test
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"os"
    10  	"path/filepath"
    11  	"reflect"
    12  	"sort"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/efficientgo/e2e"
    17  	e2emon "github.com/efficientgo/e2e/monitoring"
    18  	"github.com/go-kit/log"
    19  	"github.com/pkg/errors"
    20  	"github.com/prometheus/prometheus/rules"
    21  
    22  	"github.com/efficientgo/core/testutil"
    23  	"github.com/thanos-io/thanos/pkg/httpconfig"
    24  	"github.com/thanos-io/thanos/pkg/promclient"
    25  	"github.com/thanos-io/thanos/pkg/rules/rulespb"
    26  	"github.com/thanos-io/thanos/pkg/runutil"
    27  	"github.com/thanos-io/thanos/pkg/store/labelpb"
    28  	"github.com/thanos-io/thanos/test/e2e/e2ethanos"
    29  )
    30  
    31  func TestRulesAPI_Fanout(t *testing.T) {
    32  	t.Parallel()
    33  
    34  	e, err := e2e.NewDockerEnvironment("rules-fanout")
    35  	testutil.Ok(t, err)
    36  	t.Cleanup(e2ethanos.CleanScenario(t, e))
    37  
    38  	qBuilder := e2ethanos.NewQuerierBuilder(e, "query")
    39  
    40  	// Use querier work dir for shared resources (easiest to obtain).
    41  	promRulesSubDir := filepath.Join("rules")
    42  	testutil.Ok(t, os.MkdirAll(filepath.Join(qBuilder.Dir(), promRulesSubDir), os.ModePerm))
    43  	// Create the abort_on_partial_response alert for Prometheus.
    44  	// We don't create the warn_on_partial_response alert as Prometheus has strict yaml unmarshalling.
    45  	createRuleFile(t, filepath.Join(qBuilder.Dir(), promRulesSubDir, "rules.yaml"), testAlertRuleAbortOnPartialResponse)
    46  
    47  	thanosRulesSubDir := filepath.Join("thanos-rules")
    48  	testutil.Ok(t, os.MkdirAll(filepath.Join(qBuilder.Dir(), thanosRulesSubDir), os.ModePerm))
    49  	createRuleFiles(t, filepath.Join(qBuilder.Dir(), thanosRulesSubDir))
    50  	// We create a rule group with limit.
    51  	createRuleFile(t, filepath.Join(qBuilder.Dir(), thanosRulesSubDir, "rules-with-limit.yaml"), testAlertRuleWithLimit)
    52  
    53  	// 2x Prometheus.
    54  	prom1, sidecar1 := e2ethanos.NewPrometheusWithSidecar(
    55  		e,
    56  		"prom1",
    57  		e2ethanos.DefaultPromConfig("ha", 0, "", filepath.Join(qBuilder.InternalDir(), promRulesSubDir, "*.yaml"), e2ethanos.LocalPrometheusTarget),
    58  		"",
    59  		e2ethanos.DefaultPrometheusImage(), "",
    60  	)
    61  	prom2, sidecar2 := e2ethanos.NewPrometheusWithSidecar(
    62  		e,
    63  		"prom2",
    64  		e2ethanos.DefaultPromConfig("ha", 1, "", filepath.Join(qBuilder.InternalDir(), promRulesSubDir, "*.yaml"), e2ethanos.LocalPrometheusTarget),
    65  		"",
    66  		e2ethanos.DefaultPrometheusImage(), "",
    67  	)
    68  	testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2))
    69  
    70  	queryCfg := []httpconfig.Config{
    71  		{
    72  			EndpointsConfig: httpconfig.EndpointsConfig{
    73  				StaticAddresses: []string{qBuilder.InternalEndpoint("http")},
    74  				Scheme:          "http",
    75  			},
    76  		},
    77  	}
    78  
    79  	// Recreate rulers with the corresponding query config.
    80  	r1 := e2ethanos.NewRulerBuilder(e, "rule1").InitTSDB(filepath.Join(qBuilder.InternalDir(), thanosRulesSubDir), queryCfg)
    81  	r2 := e2ethanos.NewRulerBuilder(e, "rule2").InitTSDB(filepath.Join(qBuilder.InternalDir(), thanosRulesSubDir), queryCfg)
    82  	testutil.Ok(t, e2e.StartAndWaitReady(r1, r2))
    83  
    84  	stores := []string{sidecar1.InternalEndpoint("grpc"), sidecar2.InternalEndpoint("grpc"), r1.InternalEndpoint("grpc"), r2.InternalEndpoint("grpc")}
    85  	q := qBuilder.
    86  		WithStoreAddresses(stores...).
    87  		WithRuleAddresses(stores...).
    88  		Init()
    89  	testutil.Ok(t, e2e.StartAndWaitReady(q))
    90  
    91  	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
    92  	t.Cleanup(cancel)
    93  
    94  	testutil.Ok(t, q.WaitSumMetricsWithOptions(e2emon.Equals(4), []string{"thanos_store_nodes_grpc_connections"}, e2emon.WaitMissingMetrics()))
    95  
    96  	ruleAndAssert(t, ctx, q.Endpoint("http"), "", []*rulespb.RuleGroup{
    97  		{
    98  			Name: "example_abort",
    99  			File: q.Dir() + "/rules/rules.yaml",
   100  			Rules: []*rulespb.Rule{
   101  				rulespb.NewAlertingRule(&rulespb.Alert{
   102  					Name:  "TestAlert_AbortOnPartialResponse",
   103  					State: rulespb.AlertState_FIRING,
   104  					Query: "absent(some_metric)",
   105  					Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{
   106  						{Name: "prometheus", Value: "ha"},
   107  						{Name: "severity", Value: "page"},
   108  					}},
   109  					Health: string(rules.HealthGood),
   110  				}),
   111  			},
   112  		},
   113  		{
   114  			Name: "example_abort",
   115  			File: q.Dir() + "/thanos-rules/rules-0.yaml",
   116  			Rules: []*rulespb.Rule{
   117  				rulespb.NewAlertingRule(&rulespb.Alert{
   118  					Name:  "TestAlert_AbortOnPartialResponse",
   119  					State: rulespb.AlertState_FIRING,
   120  					Query: "absent(some_metric)",
   121  					Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{
   122  						{Name: "severity", Value: "page"},
   123  					}},
   124  					Health: string(rules.HealthGood),
   125  				}),
   126  			},
   127  		},
   128  		{
   129  			Name: "example_warn",
   130  			File: q.Dir() + "/thanos-rules/rules-1.yaml",
   131  			Rules: []*rulespb.Rule{
   132  				rulespb.NewAlertingRule(&rulespb.Alert{
   133  					Name:  "TestAlert_WarnOnPartialResponse",
   134  					State: rulespb.AlertState_FIRING,
   135  					Query: "absent(some_metric)",
   136  					Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{
   137  						{Name: "severity", Value: "page"},
   138  					}},
   139  					Health: string(rules.HealthGood),
   140  				}),
   141  			},
   142  		},
   143  		{
   144  			Name:  "example_with_limit",
   145  			File:  q.Dir() + "/thanos-rules/rules-with-limit.yaml",
   146  			Limit: 1,
   147  			Rules: []*rulespb.Rule{
   148  				rulespb.NewAlertingRule(&rulespb.Alert{
   149  					Name:  "TestAlert_WithLimit",
   150  					State: rulespb.AlertState_INACTIVE,
   151  					Query: `promhttp_metric_handler_requests_total`,
   152  					Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{
   153  						{Name: "severity", Value: "page"},
   154  					}},
   155  					Health: string(rules.HealthBad),
   156  				}),
   157  			},
   158  		},
   159  	})
   160  }
   161  
   162  func ruleAndAssert(t *testing.T, ctx context.Context, addr, typ string, want []*rulespb.RuleGroup) {
   163  	t.Helper()
   164  
   165  	fmt.Println("ruleAndAssert: Waiting for results for rules type", typ)
   166  	var result []*rulespb.RuleGroup
   167  
   168  	logger := log.NewLogfmtLogger(os.Stdout)
   169  	testutil.Ok(t, runutil.RetryWithLog(logger, time.Second, ctx.Done(), func() error {
   170  		res, err := promclient.NewDefaultClient().RulesInGRPC(ctx, urlParse(t, "http://"+addr), typ)
   171  		if err != nil {
   172  			return err
   173  		}
   174  
   175  		if len(result) != len(res) {
   176  			fmt.Println("ruleAndAssert: new result:", res)
   177  			result = res
   178  		}
   179  
   180  		if len(res) != len(want) {
   181  			return errors.Errorf("unexpected result size, want %d; got: %d result: %v", len(want), len(res), res)
   182  		}
   183  
   184  		for ig, g := range res {
   185  			res[ig].LastEvaluation = time.Time{}
   186  			res[ig].EvaluationDurationSeconds = 0
   187  			res[ig].Interval = 0
   188  			res[ig].PartialResponseStrategy = 0
   189  
   190  			sort.Slice(g.Rules, func(i, j int) bool { return g.Rules[i].Compare(g.Rules[j]) < 0 })
   191  
   192  			for ir, r := range g.Rules {
   193  				if alert := r.GetAlert(); alert != nil {
   194  					res[ig].Rules[ir] = rulespb.NewAlertingRule(&rulespb.Alert{
   195  						Name:   alert.Name,
   196  						State:  alert.State,
   197  						Query:  alert.Query,
   198  						Labels: alert.Labels,
   199  						Health: alert.Health,
   200  					})
   201  				} else if rec := r.GetRecording(); rec != nil {
   202  					res[ig].Rules[ir] = rulespb.NewAlertingRule(&rulespb.Alert{
   203  						Name:   rec.Name,
   204  						Query:  rec.Query,
   205  						Labels: rec.Labels,
   206  						Health: rec.Health,
   207  					})
   208  				}
   209  			}
   210  		}
   211  
   212  		if !reflect.DeepEqual(want, res) {
   213  			return errors.Errorf("unexpected result\nwant %v\ngot: %v", want, res)
   214  		}
   215  
   216  		return nil
   217  	}))
   218  }