github.com/thanos-io/thanos@v0.32.5/test/e2e/compatibility_test.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package e2e_test 5 6 import ( 7 "bytes" 8 "fmt" 9 "io" 10 "net/http" 11 "net/url" 12 "os" 13 "path/filepath" 14 "testing" 15 "time" 16 17 "github.com/efficientgo/e2e" 18 e2edb "github.com/efficientgo/e2e/db" 19 e2emon "github.com/efficientgo/e2e/monitoring" 20 "github.com/efficientgo/e2e/monitoring/promconfig" 21 sdconfig "github.com/efficientgo/e2e/monitoring/promconfig/discovery/config" 22 "github.com/efficientgo/e2e/monitoring/promconfig/discovery/targetgroup" 23 e2eobs "github.com/efficientgo/e2e/observable" 24 common_cfg "github.com/prometheus/common/config" 25 "github.com/prometheus/common/model" 26 "github.com/prometheus/prometheus/config" 27 28 "github.com/efficientgo/core/testutil" 29 "github.com/thanos-io/thanos/pkg/alert" 30 "github.com/thanos-io/thanos/pkg/httpconfig" 31 "github.com/thanos-io/thanos/pkg/queryfrontend" 32 "github.com/thanos-io/thanos/pkg/store" 33 "github.com/thanos-io/thanos/test/e2e/e2ethanos" 34 ) 35 36 // TestPromQLCompliance tests PromQL compatibility against https://github.com/prometheus/compliance/tree/main/promql. 37 // NOTE: This requires dockerization of compliance framework: https://github.com/prometheus/compliance/pull/46 38 // Test requires at least ~11m, so run this with `-test.timeout 9999m`. 39 func TestPromQLCompliance(t *testing.T) { 40 testPromQLCompliance(t, false, store.EagerRetrieval) 41 } 42 43 // TestPromQLComplianceWithLazy tests PromQL compatibility against https://github.com/prometheus/compliance/tree/main/promql. 44 // NOTE: This requires dockerization of compliance framework: https://github.com/prometheus/compliance/pull/46 45 // Test requires at least ~11m, so run this with `-test.timeout 9999m`. 46 // This uses lazy evaluation to test out how it works in comparison to eager. 47 func TestPromQLComplianceWithLazy(t *testing.T) { 48 testPromQLCompliance(t, false, store.LazyRetrieval) 49 } 50 51 // TestPromQLComplianceWithQueryFrontend tests PromQL compatibility with query frontend with sharding enabled. 52 func TestPromQLComplianceWithShardingQueryFrontend(t *testing.T) { 53 testPromQLCompliance(t, true, store.EagerRetrieval) 54 } 55 56 func testPromQLCompliance(t *testing.T, queryFrontend bool, retrievalStrategy store.RetrievalStrategy) { 57 t.Skip("This is interactive test, it requires time to build up (scrape) the data. The data is also obtain from remote promlab servers.") 58 59 e, err := e2e.NewDockerEnvironment("compatibility") 60 testutil.Ok(t, err) 61 t.Cleanup(e.Close) 62 63 // Start receive + Querier. 64 receiverRunnable := e2ethanos.NewReceiveBuilder(e, "receive").WithIngestionEnabled().Init() 65 queryReceive := e2edb.NewThanosQuerier(e, "query_receive", []string{receiverRunnable.InternalEndpoint("grpc")}) 66 testutil.Ok(t, e2e.StartAndWaitReady(receiverRunnable, queryReceive)) 67 68 rwURL, err := url.Parse(e2ethanos.RemoteWriteEndpoint(receiverRunnable.InternalEndpoint("remote-write"))) 69 testutil.Ok(t, err) 70 // Start reference Prometheus. 71 prom := e2edb.NewPrometheus(e, "prom") 72 testutil.Ok(t, prom.SetConfig(promconfig.Config{ 73 GlobalConfig: promconfig.GlobalConfig{ 74 EvaluationInterval: model.Duration(5 * time.Second), 75 ScrapeInterval: model.Duration(5 * time.Second), 76 ExternalLabels: map[model.LabelName]model.LabelValue{ 77 "prometheus": "1", 78 }, 79 }, 80 RemoteWriteConfigs: []*promconfig.RemoteWriteConfig{ 81 { 82 URL: &common_cfg.URL{URL: rwURL}, 83 }, 84 }, 85 ScrapeConfigs: []*promconfig.ScrapeConfig{ 86 { 87 JobName: "demo", 88 ServiceDiscoveryConfig: sdconfig.ServiceDiscoveryConfig{ 89 StaticConfigs: []*targetgroup.Group{ 90 { 91 Source: "demo.promlabs.com:10000", 92 }, 93 { 94 Source: "demo.promlabs.com:10001", 95 }, 96 { 97 Source: "demo.promlabs.com:10002", 98 }, 99 }, 100 }, 101 }, 102 }, 103 })) 104 testutil.Ok(t, e2e.StartAndWaitReady(prom)) 105 106 // Start sidecar + Querier 107 sidecar := e2edb.NewThanosSidecar(e, "sidecar", prom, e2edb.WithImage("thanos")) 108 extraOpts := []e2edb.Option{e2edb.WithImage("thanos"), e2edb.WithFlagOverride(map[string]string{"--grpc.proxy-strategy": string(retrievalStrategy)})} 109 querySidecar := e2edb.NewThanosQuerier(e, "query_sidecar", []string{sidecar.InternalEndpoint("grpc")}, extraOpts...) 110 testutil.Ok(t, e2e.StartAndWaitReady(sidecar, querySidecar)) 111 112 // Start noop promql-compliance-tester. See https://github.com/prometheus/compliance/tree/main/promql on how to build local docker image. 113 compliance := e.Runnable("promql-compliance-tester").Init(e2e.StartOptions{ 114 Image: "promql-compliance-tester:latest", 115 Command: e2e.NewCommandWithoutEntrypoint("tail", "-f", "/dev/null"), 116 }) 117 testutil.Ok(t, e2e.StartAndWaitReady(compliance)) 118 119 // Wait 10 minutes for Prometheus to scrape relevant data. 120 time.Sleep(10 * time.Minute) 121 122 t.Run("receive", func(t *testing.T) { 123 queryTargetRunnable := queryReceive 124 if queryFrontend { 125 qf := newQueryFrontendRunnable(e, "query_frontend_receive", queryReceive.InternalEndpoint("http")) 126 testutil.Ok(t, e2e.StartAndWaitReady(qf)) 127 queryTargetRunnable = qf 128 } 129 130 testutil.Ok(t, os.WriteFile(filepath.Join(compliance.Dir(), "receive.yaml"), 131 []byte(promQLCompatConfig(prom, queryTargetRunnable, []string{"prometheus", "receive", "tenant_id"})), os.ModePerm)) 132 133 testutil.Ok(t, compliance.Exec(e2e.NewCommand( 134 "/promql-compliance-tester", 135 "-config-file", filepath.Join(compliance.InternalDir(), "receive.yaml"), 136 "-config-file", "/promql-test-queries.yml", 137 ))) 138 }) 139 t.Run("sidecar", func(t *testing.T) { 140 queryTargetRunnable := querySidecar 141 if queryFrontend { 142 qf := newQueryFrontendRunnable(e, "query_frontend_sidecar", queryReceive.InternalEndpoint("http")) 143 testutil.Ok(t, e2e.StartAndWaitReady(qf)) 144 queryTargetRunnable = qf 145 } 146 147 testutil.Ok(t, os.WriteFile(filepath.Join(compliance.Dir(), "sidecar.yaml"), 148 []byte(promQLCompatConfig(prom, queryTargetRunnable, []string{"prometheus"})), os.ModePerm)) 149 150 testutil.Ok(t, compliance.Exec(e2e.NewCommand( 151 "/promql-compliance-tester", 152 "-config-file", filepath.Join(compliance.InternalDir(), "sidecar.yaml"), 153 "-config-file", "/promql-test-queries.yml", 154 ))) 155 }) 156 } 157 158 // nolint (it's still used in skipped test). 159 func promQLCompatConfig(reference *e2emon.Prometheus, target e2e.Runnable, dropLabels []string) string { 160 return `reference_target_config: 161 query_url: 'http://` + reference.InternalEndpoint("http") + `' 162 163 test_target_config: 164 query_url: 'http://` + target.InternalEndpoint("http") + `' 165 166 query_tweaks: 167 - note: 'Thanos requires adding "external_labels" to distinguish Prometheus servers, leading to extra labels in query results that need to be stripped before comparing results.' 168 no_bug: true 169 drop_result_labels: 170 ` + func() (ret string) { 171 for _, l := range dropLabels { 172 ret += ` - ` + l + "\n" 173 } 174 return ret 175 }() 176 } 177 178 // TestAlertCompliance tests Alert compatibility against https://github.com/prometheus/compliance/blob/main/alert_generator. 179 // NOTE: This requires a dockerization of compliance framework: https://github.com/prometheus/compliance/pull/46 180 func TestAlertCompliance(t *testing.T) { 181 t.Skip("This is an interactive test, using https://github.com/prometheus/compliance/tree/main/alert_generator. This tool is not optimized for CI runs (e.g. it infinitely retries, takes 38 minutes)") 182 183 t.Run("stateful ruler", func(t *testing.T) { 184 e, err := e2e.NewDockerEnvironment("alert-compat") 185 testutil.Ok(t, err) 186 t.Cleanup(e.Close) 187 188 // Start receive + Querier. 189 receive := e2ethanos.NewReceiveBuilder(e, "receive").WithIngestionEnabled().Init() 190 rwEndpoint := e2ethanos.RemoteWriteEndpoint(receive.InternalEndpoint("remote-write")) 191 querierBuilder := e2ethanos.NewQuerierBuilder(e, "query") 192 193 compliance := e.Runnable("alert_generator_compliance_tester").WithPorts(map[string]int{"http": 8080}).Init(e2e.StartOptions{ 194 Image: "alert_generator_compliance_tester:latest", 195 Command: e2e.NewCommandRunUntilStop(), 196 }) 197 198 rFuture := e2ethanos.NewRulerBuilder(e, "1") 199 ruler := rFuture.WithAlertManagerConfig([]alert.AlertmanagerConfig{ 200 { 201 EndpointsConfig: httpconfig.EndpointsConfig{ 202 StaticAddresses: []string{compliance.InternalEndpoint("http")}, 203 Scheme: "http", 204 }, 205 Timeout: amTimeout, 206 APIVersion: alert.APIv1, 207 }, 208 }). 209 // Use default resend delay and eval interval, as the compliance spec requires this. 210 WithResendDelay("1m"). 211 WithEvalInterval("1m"). 212 WithReplicaLabel(""). 213 InitTSDB(filepath.Join(rFuture.InternalDir(), "rules"), []httpconfig.Config{ 214 { 215 EndpointsConfig: httpconfig.EndpointsConfig{ 216 StaticAddresses: []string{ 217 querierBuilder.InternalEndpoint("http"), 218 }, 219 Scheme: "http", 220 }, 221 }, 222 }) 223 224 query := querierBuilder. 225 WithStoreAddresses(receive.InternalEndpoint("grpc"), ruler.InternalEndpoint("grpc")). 226 // We deduplicate by this, since alert compatibility tool requires clean metric without labels 227 // attached by receivers. 228 WithReplicaLabels("receive", "tenant_id"). 229 Init() 230 testutil.Ok(t, e2e.StartAndWaitReady(receive, query, ruler, compliance)) 231 232 // Pull rules.yaml: 233 { 234 var stdout bytes.Buffer 235 testutil.Ok(t, compliance.Exec(e2e.NewCommand("cat", "/rules.yaml"), e2e.WithExecOptionStdout(&stdout))) 236 testutil.Ok(t, os.MkdirAll(filepath.Join(ruler.Dir(), "rules"), os.ModePerm)) 237 testutil.Ok(t, os.WriteFile(filepath.Join(ruler.Dir(), "rules", "rules.yaml"), stdout.Bytes(), os.ModePerm)) 238 239 // Reload ruler. 240 resp, err := http.Post("http://"+ruler.Endpoint("http")+"/-/reload", "", nil) 241 testutil.Ok(t, err) 242 defer func() { 243 _, _ = io.Copy(io.Discard, resp.Body) 244 _ = resp.Body.Close() 245 }() 246 testutil.Equals(t, http.StatusOK, resp.StatusCode) 247 } 248 alertCompatCfg := alertCompatConfig(rwEndpoint, query.InternalEndpoint("http"), ruler.InternalEndpoint("http")) 249 testutil.Ok(t, os.WriteFile(filepath.Join(compliance.Dir(), "test-thanos.yaml"), []byte(alertCompatCfg), os.ModePerm)) 250 251 fmt.Println(alertCompatCfg) 252 253 testutil.Ok(t, compliance.Exec(e2e.NewCommand( 254 "/alert_generator_compliance_tester", "-config-file", filepath.Join(compliance.InternalDir(), "test-thanos.yaml")), 255 )) 256 }) 257 258 t.Run("stateless ruler", func(t *testing.T) { 259 e, err := e2e.NewDockerEnvironment("alert-compat") 260 testutil.Ok(t, err) 261 t.Cleanup(e.Close) 262 263 // Start receive + Querier. 264 receive := e2ethanos.NewReceiveBuilder(e, "receive").WithIngestionEnabled().Init() 265 rwEndpoint := e2ethanos.RemoteWriteEndpoint(receive.InternalEndpoint("remote-write")) 266 rwURL := urlParse(t, rwEndpoint) 267 rFuture := e2ethanos.NewRulerBuilder(e, "1") 268 query := e2ethanos.NewQuerierBuilder(e, "query"). 269 WithStoreAddresses(receive.InternalEndpoint("grpc")). 270 // We deduplicate by this, since alert compatibility tool requires clean metric without labels 271 // attached by receivers. 272 WithReplicaLabels("receive", "tenant_id"). 273 Init() 274 275 compliance := e.Runnable("alert_generator_compliance_tester").WithPorts(map[string]int{"http": 8080}).Init(e2e.StartOptions{ 276 Image: "alert_generator_compliance_tester:latest", 277 Command: e2e.NewCommandRunUntilStop(), 278 }) 279 280 ruler := rFuture.WithAlertManagerConfig([]alert.AlertmanagerConfig{ 281 { 282 EndpointsConfig: httpconfig.EndpointsConfig{ 283 StaticAddresses: []string{compliance.InternalEndpoint("http")}, 284 Scheme: "http", 285 }, 286 Timeout: amTimeout, 287 APIVersion: alert.APIv1, 288 }, 289 }). 290 // Use default resend delay and eval interval, as the compliance spec requires this. 291 WithResendDelay("1m"). 292 WithEvalInterval("1m"). 293 WithReplicaLabel(""). 294 WithRestoreIgnoredLabels("tenant_id"). 295 InitStateless(filepath.Join(rFuture.InternalDir(), "rules"), []httpconfig.Config{ 296 { 297 EndpointsConfig: httpconfig.EndpointsConfig{ 298 StaticAddresses: []string{ 299 query.InternalEndpoint("http"), 300 }, 301 Scheme: "http", 302 }, 303 }, 304 }, []*config.RemoteWriteConfig{ 305 {URL: &common_cfg.URL{URL: rwURL}, Name: "thanos-receiver"}, 306 }) 307 308 testutil.Ok(t, e2e.StartAndWaitReady(receive, query, ruler, compliance)) 309 310 // Pull rules.yaml: 311 { 312 var stdout bytes.Buffer 313 testutil.Ok(t, compliance.Exec(e2e.NewCommand("cat", "/rules.yaml"), e2e.WithExecOptionStdout(&stdout))) 314 testutil.Ok(t, os.MkdirAll(filepath.Join(ruler.Dir(), "rules"), os.ModePerm)) 315 testutil.Ok(t, os.WriteFile(filepath.Join(ruler.Dir(), "rules", "rules.yaml"), stdout.Bytes(), os.ModePerm)) 316 317 // Reload ruler. 318 resp, err := http.Post("http://"+ruler.Endpoint("http")+"/-/reload", "", nil) 319 testutil.Ok(t, err) 320 defer func() { 321 _, _ = io.Copy(io.Discard, resp.Body) 322 _ = resp.Body.Close() 323 }() 324 testutil.Equals(t, http.StatusOK, resp.StatusCode) 325 } 326 alertCompatCfg := alertCompatConfig(rwEndpoint, query.InternalEndpoint("http"), query.InternalEndpoint("http")) 327 testutil.Ok(t, os.WriteFile(filepath.Join(compliance.Dir(), "test-thanos.yaml"), []byte(alertCompatCfg), os.ModePerm)) 328 329 fmt.Println(alertCompatCfg) 330 331 testutil.Ok(t, compliance.Exec(e2e.NewCommand( 332 "/alert_generator_compliance_tester", "-config-file", filepath.Join(compliance.InternalDir(), "test-thanos.yaml")), 333 )) 334 }) 335 } 336 337 // nolint (it's still used in skipped test). 338 func alertCompatConfig(remoteWriteURL, queryURL, rulesURL string) string { 339 return fmt.Sprintf(`settings: 340 remote_write_url: '%s' 341 query_base_url: 'http://%s' 342 rules_and_alerts_api_base_url: 'http://%s' 343 alert_reception_server_port: 8080 344 alert_message_parser: default 345 `, remoteWriteURL, queryURL, rulesURL) 346 } 347 348 func newQueryFrontendRunnable(e e2e.Environment, name, downstreamURL string) *e2eobs.Observable { 349 inMemoryCacheConfig := queryfrontend.CacheProviderConfig{ 350 Type: queryfrontend.INMEMORY, 351 Config: queryfrontend.InMemoryResponseCacheConfig{ 352 MaxSizeItems: 1000, 353 Validity: time.Hour, 354 }, 355 } 356 config := queryfrontend.Config{ 357 QueryRangeConfig: queryfrontend.QueryRangeConfig{ 358 AlignRangeWithStep: false, 359 }, 360 NumShards: 3, 361 } 362 return e2ethanos.NewQueryFrontend(e, name, downstreamURL, config, inMemoryCacheConfig) 363 }