github.com/thanos-io/thanos@v0.32.5/test/e2e/rules_api_test.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package e2e_test 5 6 import ( 7 "context" 8 "fmt" 9 "os" 10 "path/filepath" 11 "reflect" 12 "sort" 13 "testing" 14 "time" 15 16 "github.com/efficientgo/e2e" 17 e2emon "github.com/efficientgo/e2e/monitoring" 18 "github.com/go-kit/log" 19 "github.com/pkg/errors" 20 "github.com/prometheus/prometheus/rules" 21 22 "github.com/efficientgo/core/testutil" 23 "github.com/thanos-io/thanos/pkg/httpconfig" 24 "github.com/thanos-io/thanos/pkg/promclient" 25 "github.com/thanos-io/thanos/pkg/rules/rulespb" 26 "github.com/thanos-io/thanos/pkg/runutil" 27 "github.com/thanos-io/thanos/pkg/store/labelpb" 28 "github.com/thanos-io/thanos/test/e2e/e2ethanos" 29 ) 30 31 func TestRulesAPI_Fanout(t *testing.T) { 32 t.Parallel() 33 34 e, err := e2e.NewDockerEnvironment("rules-fanout") 35 testutil.Ok(t, err) 36 t.Cleanup(e2ethanos.CleanScenario(t, e)) 37 38 qBuilder := e2ethanos.NewQuerierBuilder(e, "query") 39 40 // Use querier work dir for shared resources (easiest to obtain). 41 promRulesSubDir := filepath.Join("rules") 42 testutil.Ok(t, os.MkdirAll(filepath.Join(qBuilder.Dir(), promRulesSubDir), os.ModePerm)) 43 // Create the abort_on_partial_response alert for Prometheus. 44 // We don't create the warn_on_partial_response alert as Prometheus has strict yaml unmarshalling. 45 createRuleFile(t, filepath.Join(qBuilder.Dir(), promRulesSubDir, "rules.yaml"), testAlertRuleAbortOnPartialResponse) 46 47 thanosRulesSubDir := filepath.Join("thanos-rules") 48 testutil.Ok(t, os.MkdirAll(filepath.Join(qBuilder.Dir(), thanosRulesSubDir), os.ModePerm)) 49 createRuleFiles(t, filepath.Join(qBuilder.Dir(), thanosRulesSubDir)) 50 // We create a rule group with limit. 51 createRuleFile(t, filepath.Join(qBuilder.Dir(), thanosRulesSubDir, "rules-with-limit.yaml"), testAlertRuleWithLimit) 52 53 // 2x Prometheus. 54 prom1, sidecar1 := e2ethanos.NewPrometheusWithSidecar( 55 e, 56 "prom1", 57 e2ethanos.DefaultPromConfig("ha", 0, "", filepath.Join(qBuilder.InternalDir(), promRulesSubDir, "*.yaml"), e2ethanos.LocalPrometheusTarget), 58 "", 59 e2ethanos.DefaultPrometheusImage(), "", 60 ) 61 prom2, sidecar2 := e2ethanos.NewPrometheusWithSidecar( 62 e, 63 "prom2", 64 e2ethanos.DefaultPromConfig("ha", 1, "", filepath.Join(qBuilder.InternalDir(), promRulesSubDir, "*.yaml"), e2ethanos.LocalPrometheusTarget), 65 "", 66 e2ethanos.DefaultPrometheusImage(), "", 67 ) 68 testutil.Ok(t, e2e.StartAndWaitReady(prom1, sidecar1, prom2, sidecar2)) 69 70 queryCfg := []httpconfig.Config{ 71 { 72 EndpointsConfig: httpconfig.EndpointsConfig{ 73 StaticAddresses: []string{qBuilder.InternalEndpoint("http")}, 74 Scheme: "http", 75 }, 76 }, 77 } 78 79 // Recreate rulers with the corresponding query config. 80 r1 := e2ethanos.NewRulerBuilder(e, "rule1").InitTSDB(filepath.Join(qBuilder.InternalDir(), thanosRulesSubDir), queryCfg) 81 r2 := e2ethanos.NewRulerBuilder(e, "rule2").InitTSDB(filepath.Join(qBuilder.InternalDir(), thanosRulesSubDir), queryCfg) 82 testutil.Ok(t, e2e.StartAndWaitReady(r1, r2)) 83 84 stores := []string{sidecar1.InternalEndpoint("grpc"), sidecar2.InternalEndpoint("grpc"), r1.InternalEndpoint("grpc"), r2.InternalEndpoint("grpc")} 85 q := qBuilder. 86 WithStoreAddresses(stores...). 87 WithRuleAddresses(stores...). 88 Init() 89 testutil.Ok(t, e2e.StartAndWaitReady(q)) 90 91 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) 92 t.Cleanup(cancel) 93 94 testutil.Ok(t, q.WaitSumMetricsWithOptions(e2emon.Equals(4), []string{"thanos_store_nodes_grpc_connections"}, e2emon.WaitMissingMetrics())) 95 96 ruleAndAssert(t, ctx, q.Endpoint("http"), "", []*rulespb.RuleGroup{ 97 { 98 Name: "example_abort", 99 File: q.Dir() + "/rules/rules.yaml", 100 Rules: []*rulespb.Rule{ 101 rulespb.NewAlertingRule(&rulespb.Alert{ 102 Name: "TestAlert_AbortOnPartialResponse", 103 State: rulespb.AlertState_FIRING, 104 Query: "absent(some_metric)", 105 Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ 106 {Name: "prometheus", Value: "ha"}, 107 {Name: "severity", Value: "page"}, 108 }}, 109 Health: string(rules.HealthGood), 110 }), 111 }, 112 }, 113 { 114 Name: "example_abort", 115 File: q.Dir() + "/thanos-rules/rules-0.yaml", 116 Rules: []*rulespb.Rule{ 117 rulespb.NewAlertingRule(&rulespb.Alert{ 118 Name: "TestAlert_AbortOnPartialResponse", 119 State: rulespb.AlertState_FIRING, 120 Query: "absent(some_metric)", 121 Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ 122 {Name: "severity", Value: "page"}, 123 }}, 124 Health: string(rules.HealthGood), 125 }), 126 }, 127 }, 128 { 129 Name: "example_warn", 130 File: q.Dir() + "/thanos-rules/rules-1.yaml", 131 Rules: []*rulespb.Rule{ 132 rulespb.NewAlertingRule(&rulespb.Alert{ 133 Name: "TestAlert_WarnOnPartialResponse", 134 State: rulespb.AlertState_FIRING, 135 Query: "absent(some_metric)", 136 Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ 137 {Name: "severity", Value: "page"}, 138 }}, 139 Health: string(rules.HealthGood), 140 }), 141 }, 142 }, 143 { 144 Name: "example_with_limit", 145 File: q.Dir() + "/thanos-rules/rules-with-limit.yaml", 146 Limit: 1, 147 Rules: []*rulespb.Rule{ 148 rulespb.NewAlertingRule(&rulespb.Alert{ 149 Name: "TestAlert_WithLimit", 150 State: rulespb.AlertState_INACTIVE, 151 Query: `promhttp_metric_handler_requests_total`, 152 Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ 153 {Name: "severity", Value: "page"}, 154 }}, 155 Health: string(rules.HealthBad), 156 }), 157 }, 158 }, 159 }) 160 } 161 162 func ruleAndAssert(t *testing.T, ctx context.Context, addr, typ string, want []*rulespb.RuleGroup) { 163 t.Helper() 164 165 fmt.Println("ruleAndAssert: Waiting for results for rules type", typ) 166 var result []*rulespb.RuleGroup 167 168 logger := log.NewLogfmtLogger(os.Stdout) 169 testutil.Ok(t, runutil.RetryWithLog(logger, time.Second, ctx.Done(), func() error { 170 res, err := promclient.NewDefaultClient().RulesInGRPC(ctx, urlParse(t, "http://"+addr), typ) 171 if err != nil { 172 return err 173 } 174 175 if len(result) != len(res) { 176 fmt.Println("ruleAndAssert: new result:", res) 177 result = res 178 } 179 180 if len(res) != len(want) { 181 return errors.Errorf("unexpected result size, want %d; got: %d result: %v", len(want), len(res), res) 182 } 183 184 for ig, g := range res { 185 res[ig].LastEvaluation = time.Time{} 186 res[ig].EvaluationDurationSeconds = 0 187 res[ig].Interval = 0 188 res[ig].PartialResponseStrategy = 0 189 190 sort.Slice(g.Rules, func(i, j int) bool { return g.Rules[i].Compare(g.Rules[j]) < 0 }) 191 192 for ir, r := range g.Rules { 193 if alert := r.GetAlert(); alert != nil { 194 res[ig].Rules[ir] = rulespb.NewAlertingRule(&rulespb.Alert{ 195 Name: alert.Name, 196 State: alert.State, 197 Query: alert.Query, 198 Labels: alert.Labels, 199 Health: alert.Health, 200 }) 201 } else if rec := r.GetRecording(); rec != nil { 202 res[ig].Rules[ir] = rulespb.NewAlertingRule(&rulespb.Alert{ 203 Name: rec.Name, 204 Query: rec.Query, 205 Labels: rec.Labels, 206 Health: rec.Health, 207 }) 208 } 209 } 210 } 211 212 if !reflect.DeepEqual(want, res) { 213 return errors.Errorf("unexpected result\nwant %v\ngot: %v", want, res) 214 } 215 216 return nil 217 })) 218 }