github.com/m3db/m3@v1.5.0/scripts/docker-integration-tests/prometheus/test.sh (about) 1 #!/usr/bin/env bash 2 3 set -xe 4 5 M3_PATH=${M3_PATH:-$GOPATH/src/github.com/m3db/m3} 6 TESTDIR="$M3_PATH"/scripts/docker-integration-tests/ 7 source "$TESTDIR"/common.sh 8 source "$TESTDIR"/prometheus/test-correctness.sh 9 source "$TESTDIR"/prometheus/metadata-limits.sh 10 REVISION=$(git rev-parse HEAD) 11 COMPOSE_FILE="$TESTDIR"/prometheus/docker-compose.yml 12 # quay.io/m3db/prometheus_remote_client_golang @ v0.4.3 13 PROMREMOTECLI_IMAGE=quay.io/m3db/prometheus_remote_client_golang:v0.4.3 14 JQ_IMAGE=realguess/jq:1.4@sha256:300c5d9fb1d74154248d155ce182e207cf6630acccbaadd0168e18b15bfaa786 15 METRIC_NAME_TEST_RESTRICT_WRITE=bar_metric 16 QUERY_LIMIT_MESSAGE="${QUERY_LIMIT_MESSAGE:-query exceeded limit}" 17 RUN_GLOBAL_LIMIT_TEST="${RUN_GLOBAL_LIMIT_TEST:-true}" 18 QUERY_TIMEOUT_STATUS_CODE="${QUERY_TIMEOUT_STATUS_CODE:-504}" 19 export REVISION 20 21 echo "Pull containers required for test" 22 docker pull $PROMREMOTECLI_IMAGE 23 docker pull $JQ_IMAGE 24 25 echo "Run m3dbnode and m3coordinator containers" 26 docker-compose -f ${COMPOSE_FILE} up -d dbnode01 27 docker-compose -f ${COMPOSE_FILE} up -d coordinator01 28 29 TEST_SUCCESS=false 30 31 function defer { 32 if [[ "$TEST_SUCCESS" != "true" ]]; then 33 echo "Test failure, printing docker-compose logs" 34 docker-compose -f ${COMPOSE_FILE} logs 35 fi 36 37 docker-compose -f ${COMPOSE_FILE} down || echo "unable to shutdown containers" # CI fails to stop all containers sometimes 38 } 39 trap defer EXIT 40 41 setup_single_m3db_node 42 43 echo "Start Prometheus containers" 44 docker-compose -f ${COMPOSE_FILE} up -d prometheus01 45 46 function test_readiness { 47 # Check readiness probe eventually succeeds 48 echo "Check readiness probe eventually succeeds" 49 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 50 '[[ $(curl --write-out "%{http_code}" --silent --output /dev/null 0.0.0.0:7201/ready) -eq "200" ]]' 51 } 52 53 function test_prometheus_remote_read { 54 # Ensure Prometheus can proxy a Prometheus query 55 echo "Wait until the remote write endpoint generates and allows for data to be queried" 56 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 57 '[[ $(curl -sSf 0.0.0.0:9090/api/v1/query?query=prometheus_remote_storage_samples_total | jq -r .data.result[0].value[1]) -gt 100 ]]' 58 } 59 60 function test_prometheus_remote_write_multi_namespaces { 61 # Make sure we're proxying writes to the unaggregated namespace 62 echo "Wait until data begins being written to remote storage for the unaggregated namespace" 63 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 64 '[[ $(curl -sSf 0.0.0.0:9090/api/v1/query?query=database_write_tagged_success\\{namespace=\"unagg\"\\} | jq -r .data.result[0].value[1]) -gt 0 ]]' 65 66 # Make sure we're proxying writes to the aggregated namespace 67 echo "Wait until data begins being written to remote storage for the aggregated namespace" 68 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 69 '[[ $(curl -sSf 0.0.0.0:9090/api/v1/query?query=database_write_tagged_success\\{namespace=\"agg\"\\} | jq -r .data.result[0].value[1]) -gt 0 ]]' 70 } 71 72 function prometheus_remote_write { 73 local metric_name=$1 74 local datapoint_timestamp=$2 75 local datapoint_value=$3 76 local expect_success=$4 77 local expect_success_err=$5 78 local expect_status=$6 79 local expect_status_err=$7 80 local metrics_type=$8 81 local metrics_storage_policy=$9 82 local map_tags_header=${10} 83 84 local optional_tags="" 85 for i in $(seq 0 10); do 86 local optional_tag_name=$(eval "echo \$TAG_NAME_$i") 87 local optional_tag_value=$(eval "echo \$TAG_VALUE_$i") 88 if [[ "$optional_tag_name" != "" ]] || [[ "$optional_tag_value" != "" ]]; then 89 optional_tags="$optional_tags -t ${optional_tag_name}:${optional_tag_value}" 90 fi 91 done 92 93 network_name="prometheus" 94 network=$(docker network ls | fgrep $network_name | tr -s ' ' | cut -f 1 -d ' ' | tail -n 1) 95 out=$((docker run -it --rm --network $network \ 96 $PROMREMOTECLI_IMAGE \ 97 -u http://coordinator01:7201/api/v1/prom/remote/write \ 98 -t __name__:${metric_name} ${optional_tags} \ 99 -h "M3-Metrics-Type: ${metrics_type}" \ 100 -h "M3-Storage-Policy: ${metrics_storage_policy}" \ 101 -h "M3-Map-Tags-JSON: ${map_tags_header}" \ 102 -d ${datapoint_timestamp},${datapoint_value} | grep -v promremotecli_log) || true) 103 success=$(echo $out | grep -v promremotecli_log | docker run --rm -i $JQ_IMAGE jq .success) 104 status=$(echo $out | grep -v promremotecli_log | docker run --rm -i $JQ_IMAGE jq .statusCode) 105 if [[ "$success" != "$expect_success" ]]; then 106 echo $expect_success_err 107 return 1 108 fi 109 if [[ "$status" != "$expect_status" ]]; then 110 echo "${expect_status_err}: actual=${status}" 111 return 1 112 fi 113 echo "Returned success=${success}, status=${status} as expected" 114 return 0 115 } 116 117 function test_prometheus_remote_write_empty_label_name_returns_400_status_code { 118 echo "Test write empty name for a label returns HTTP 400" 119 now=$(date +"%s") 120 TAG_NAME_0="non_empty_name" TAG_VALUE_0="foo" \ 121 TAG_NAME_1="" TAG_VALUE_1="bar" \ 122 prometheus_remote_write \ 123 "foo_metric" $now 42 \ 124 false "Expected request to fail" \ 125 400 "Expected request to return status code 400" 126 } 127 128 function test_prometheus_remote_write_empty_label_value_returns_400_status_code { 129 echo "Test write empty value for a label returns HTTP 400" 130 now=$(date +"%s") 131 TAG_NAME_0="foo" TAG_VALUE_0="bar" \ 132 TAG_NAME_1="non_empty_name" TAG_VALUE_1="" \ 133 prometheus_remote_write \ 134 "foo_metric" $now 42 \ 135 false "Expected request to fail" \ 136 400 "Expected request to return status code 400" 137 } 138 139 function test_prometheus_remote_write_duplicate_label_returns_400_status_code { 140 echo "Test write with duplicate labels returns HTTP 400" 141 now=$(date +"%s") 142 hour_ago=$(( now - 3600 )) 143 TAG_NAME_0="dupe_name" TAG_VALUE_0="foo" \ 144 TAG_NAME_1="non_dupe_name" TAG_VALUE_1="bar" \ 145 TAG_NAME_2="dupe_name" TAG_VALUE_2="baz" \ 146 prometheus_remote_write \ 147 "foo_metric" $now 42 \ 148 false "Expected request to fail" \ 149 400 "Expected request to return status code 400" 150 } 151 152 function test_prometheus_remote_write_too_old_returns_400_status_code { 153 echo "Test write into the past returns HTTP 400" 154 now=$(date +"%s") 155 hour_ago=$(( now - 3600 )) 156 prometheus_remote_write \ 157 "foo_metric" $hour_ago 3.142 \ 158 false "Expected request to fail" \ 159 400 "Expected request to return status code 400" 160 } 161 162 function test_prometheus_remote_write_restrict_metrics_type { 163 # Test we can specify metrics type 164 echo "Test write with unaggregated metrics type works as expected" 165 prometheus_remote_write \ 166 $METRIC_NAME_TEST_RESTRICT_WRITE now 42.42 \ 167 true "Expected request to succeed" \ 168 200 "Expected request to return status code 200" \ 169 unaggregated 170 171 echo "Test write with aggregated metrics type works as expected" 172 prometheus_remote_write \ 173 $METRIC_NAME_TEST_RESTRICT_WRITE now 84.84 \ 174 true "Expected request to succeed" \ 175 200 "Expected request to return status code 200" \ 176 aggregated 15s:10h 177 } 178 179 function test_prometheus_remote_write_map_tags { 180 echo "Test map tags header works as expected" 181 prometheus_remote_write \ 182 $METRIC_NAME_TEST_RESTRICT_WRITE now 42.42 \ 183 true "Expected request to succeed" \ 184 200 "Expected request to return status code 200" \ 185 unaggregated "" '{"tagMappers":[{"write":{"tag":"globaltag","value":"somevalue"}}]}' 186 187 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 \ 188 endpoint=query query="$METRIC_NAME_TEST_RESTRICT_WRITE" params="" return_status_code="" \ 189 metrics_type="unaggregated" jq_path=".data.result[0].metric.globaltag" expected_value="somevalue" \ 190 retry_with_backoff prometheus_query_native 191 } 192 193 function test_query_lookback_applied { 194 # Note: this test depends on the config in m3coordinator.yml for this test 195 # and the following config value "lookbackDuration: 10m". 196 echo "Test lookback config respected" 197 now=$(date +"%s") 198 # Write into past less than the lookback duration. 199 eight_mins_ago=$(( now - 480 )) 200 prometheus_remote_write \ 201 "lookback_test" $eight_mins_ago 42 \ 202 true "Expected request to succeed" \ 203 200 "Expected request to return status code 200" \ 204 "unaggregated" 205 206 # Now query and ensure that the latest timestamp is within the last two steps 207 # from now. 208 ATTEMPTS=10 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 209 '[[ $(curl -s "0.0.0.0:7201/api/v1/query_range?query=lookback_test&step=15&start=$(expr $(date "+%s") - 600)&end=$(date "+%s")" | jq -r ".data.result[0].values[-1][0]") -gt $(expr $(date "+%s") - 30) ]]' 210 } 211 212 function test_query_limits_applied { 213 # Test the default series limit applied when directly querying 214 # coordinator (limit set to 100 in m3coordinator.yml) 215 # NB: ensure that the limit is not exceeded (it may be below limit). 216 echo "Test query limit with coordinator defaults" 217 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 218 '[[ $(curl -s 0.0.0.0:7201/api/v1/query?query=\\{metrics_storage=\"m3db_remote\"\\} | jq -r ".data.result | length") -lt 101 ]]' 219 220 # Test the series limit applied when directly querying 221 # coordinator (series limit set by header) 222 echo "Test query series limit with coordinator limit header (default errors without RequireExhaustive disabled)" 223 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 224 '[[ $(curl -s -H "M3-Limit-Max-Series: 10" 0.0.0.0:7201/api/v1/query?query=\\{metrics_storage=\"m3db_remote\"\\} | jq ."error" | grep "") ]]' 225 226 echo "Test query series limit with require-exhaustive headers false" 227 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 228 '[[ $(curl -s -H "M3-Limit-Max-Series: 2" -H "M3-Limit-Require-Exhaustive: false" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success | jq -r ".data.result | length") -eq 2 ]]' 229 230 echo "Test query series limit with require-exhaustive headers true (below limit therefore no error)" 231 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 232 '[[ $(curl -s -H "M3-Limit-Max-Series: 4" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success | jq -r ".data.result | length") -eq 3 ]]' 233 234 echo "Test query series limit with require-exhaustive headers true (above limit therefore error)" 235 # Test that require exhaustive error is returned 236 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 237 '[[ -n $(curl -s -H "M3-Limit-Max-Series: 3" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success | jq ."error" | grep "$QUERY_LIMIT_MESSAGE") ]]' 238 # Test that require exhaustive error is 4xx 239 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 240 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "M3-Limit-Max-Series: 3" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success) = "400" ]]' 241 242 # Test the docs limit applied when directly querying 243 # coordinator (docs limit set by header) 244 echo "Test query docs limit with coordinator limit header" 245 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 246 '[[ $(curl -s -H "M3-Limit-Max-Docs: 1" 0.0.0.0:7201/api/v1/query?query=\\{metrics_storage=\"m3db_remote\"\\} | jq -r ".data.result | length") -lt 101 ]]' 247 248 echo "Test query docs limit with require-exhaustive headers false" 249 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 250 '[[ $(curl -s -H "M3-Limit-Max-Docs: 1" -H "M3-Limit-Require-Exhaustive: false" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success | jq -r ".data.result | length") -eq 3 ]]' 251 252 echo "Test query docs limit with require-exhaustive headers true (below limit therefore no error)" 253 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 254 '[[ $(curl -s -H "M3-Limit-Max-Docs: 4" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success | jq -r ".data.result | length") -eq 3 ]]' 255 256 echo "Test query docs limit with require-exhaustive headers true (above limit therefore error)" 257 # Test that require exhaustive error is returned 258 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 259 '[[ -n $(curl -s -H "M3-Limit-Max-Docs: 1" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success | jq ."error" | grep "$QUERY_LIMIT_MESSAGE") ]]' 260 # Test that require exhaustive error is 4xx 261 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 262 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "M3-Limit-Max-Docs: 1" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success) = "400" ]]' 263 264 echo "Test query returned-datapoints limit - zero limit disabled" 265 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 266 '[[ $(curl -s -H "M3-Limit-Max-Returned-Datapoints: 0" "0.0.0.0:7201/api/v1/query_range?query=database_write_tagged_success&step=15&start=$(expr $(date "+%s") - 6000)&end=$(date "+%s")" | jq -r ".data.result | length") -eq 3 ]]' 267 268 echo "Test query returned-series limit - zero limit disabled" 269 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 270 '[[ $(curl -s -H "M3-Limit-Max-Returned-Series: 0" "0.0.0.0:7201/api/v1/query_range?query=database_write_tagged_success&step=15&start=$(expr $(date "+%s") - 6000)&end=$(date "+%s")" | jq -r ".data.result | length") -eq 3 ]]' 271 272 echo "Test query returned-series limit - above limit" 273 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 274 '[[ $(curl -s -H "M3-Limit-Max-Returned-Series: 4" "0.0.0.0:7201/api/v1/query_range?query=database_write_tagged_success&step=15&start=$(expr $(date "+%s") - 6000)&end=$(date "+%s")" | jq -r ".data.result | length") -eq 3 ]]' 275 276 echo "Test query returned-series limit - at limit" 277 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 278 '[[ $(curl -s -H "M3-Limit-Max-Returned-Series: 3" "0.0.0.0:7201/api/v1/query_range?query=database_write_tagged_success&step=15&start=$(expr $(date "+%s") - 6000)&end=$(date "+%s")" | jq -r ".data.result | length") -eq 3 ]]' 279 280 echo "Test query returned-series limit - below limit" 281 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 282 '[[ $(curl -s -H "M3-Limit-Max-Returned-Series: 2" "0.0.0.0:7201/api/v1/query_range?query=database_write_tagged_success&step=15&start=$(expr $(date "+%s") - 6000)&end=$(date "+%s")" | jq -r ".data.result | length") -eq 2 ]]' 283 284 # Test returned series metadata limits 285 TAG_NAME_0="metadata_test_label" TAG_VALUE_0="series_label_0" \ 286 prometheus_remote_write \ 287 metadata_test_series now 42.42 \ 288 true "Expected request to succeed" \ 289 200 "Expected request to return status code 200" 290 TAG_NAME_0="metadata_test_label" TAG_VALUE_0="series_label_1" \ 291 prometheus_remote_write \ 292 metadata_test_series now 42.42 \ 293 true "Expected request to succeed" \ 294 200 "Expected request to return status code 200" 295 TAG_NAME_0="metadata_test_label" TAG_VALUE_0="series_label_2" \ 296 prometheus_remote_write \ 297 metadata_test_series now 42.42 \ 298 true "Expected request to succeed" \ 299 200 "Expected request to return status code 200" 300 301 echo "Test query returned-series limit - zero limit disabled" 302 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 303 '[[ $(curl -s -H "M3-Limit-Max-Returned-SeriesMetadata: 0" "0.0.0.0:7201/api/v1/label/metadata_test_label/values?match[]=metadata_test_series" | jq -r ".data | length") -eq 3 ]]' 304 305 echo "Test query returned-series limit - above limit" 306 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 307 '[[ $(curl -s -H "M3-Limit-Max-Returned-SeriesMetadata: 4" "0.0.0.0:7201/api/v1/label/metadata_test_label/values?match[]=metadata_test_series" | jq -r ".data | length") -eq 3 ]]' 308 309 echo "Test query returned-series limit - at limit" 310 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 311 '[[ $(curl -s -H "M3-Limit-Max-Returned-SeriesMetadata: 3" "0.0.0.0:7201/api/v1/label/metadata_test_label/values?match[]=metadata_test_series" | jq -r ".data | length") -eq 3 ]]' 312 313 echo "Test query returned-series limit - below limit" 314 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 315 '[[ $(curl -s -H "M3-Limit-Max-Returned-SeriesMetadata: 2" "0.0.0.0:7201/api/v1/label/metadata_test_label/values?match[]=metadata_test_series" | jq -r ".data | length") -eq 2 ]]' 316 317 # Test time range limits with query APIs. 318 query_url="0.0.0.0:7201/api/v1/query_range" 319 echo "Test query time range limit with coordinator defaults" 320 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 321 '[[ $(curl -s "${query_url}?query=database_write_tagged_success&step=15&start=0&end=$(date +%s)" | jq -r ".data.result | length") -gt 0 ]]' 322 323 echo "Test query time range limit with require-exhaustive headers false" 324 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 325 '[[ $(curl -s -H "M3-Limit-Max-Range: 4h" -H "M3-Limit-Require-Exhaustive: false" "${query_url}?query=database_write_tagged_success&step=15&start=0&end=$(date +%s)" | jq -r ".data.result | length") -gt 0 ]]' 326 327 echo "Test query time range limit with require-exhaustive headers true (above limit therefore error)" 328 # Test that require exhaustive error is returned 329 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 330 '[[ -n $(curl -s -H "M3-Limit-Max-Range: 4h" -H "M3-Limit-Require-Exhaustive: true" "${query_url}?query=database_write_tagged_success&step=15&start=0&end=$(date +%s)" | jq ."error" | grep "$QUERY_LIMIT_MESSAGE") ]]' 331 # Test that require exhaustive error is 4xx 332 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 333 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "M3-Limit-Max-Range: 4h" -H "M3-Limit-Require-Exhaustive: true" "${query_url}?query=database_write_tagged_success&step=15&start=0&end=$(date +%s)") = "400" ]]' 334 335 # Test time range limits with metadata APIs. 336 meta_query_url="0.0.0.0:7201/api/v1/label/metadata_test_label/values" 337 echo "Test query time range limit with coordinator defaults" 338 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 339 '[[ $(curl -s "${meta_query_url}?match[]=metadata_test_series&start=0&end=$(date +%s)" | jq -r ".data | length") -gt 0 ]]' 340 341 echo "Test query time range limit with require-exhaustive headers false" 342 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 343 '[[ $(curl -s -H "M3-Limit-Max-Range: 4h" -H "M3-Limit-Require-Exhaustive: false" "${meta_query_url}?match[]=metadata_test_series&start=0&end=$(date +%s)" | jq -r ".data | length") -gt 0 ]]' 344 345 echo "Test query time range limit with require-exhaustive headers true (above limit therefore error)" 346 # Test that require exhaustive error is returned 347 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 348 '[[ -n $(curl -s -H "M3-Limit-Max-Range: 4h" -H "M3-Limit-Require-Exhaustive: true" "${meta_query_url}?match[]=metadata_test_series&start=0&end=$(date +%s)" | jq ."error" | grep "$QUERY_LIMIT_MESSAGE") ]]' 349 # Test that require exhaustive error is 4xx 350 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 351 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "M3-Limit-Max-Range: 4h" -H "M3-Limit-Require-Exhaustive: true" "${meta_query_url}?match[]=metadata_test_series&start=0&end=$(date +%s)") = "400" ]]' 352 } 353 354 function test_query_limits_global_applied { 355 TAG_NAME_0="query_global_limit_test" TAG_VALUE_0="series_label_0" \ 356 prometheus_remote_write \ 357 metadata_test_series now 42.42 \ 358 true "Expected request to succeed" \ 359 200 "Expected request to return status code 200" 360 TAG_NAME_0="query_global_limit_test" TAG_VALUE_0="series_label_1" \ 361 prometheus_remote_write \ 362 metadata_test_series now 42.42 \ 363 true "Expected request to succeed" \ 364 200 "Expected request to return status code 200" 365 TAG_NAME_0="query_global_limit_test" TAG_VALUE_0="series_label_2" \ 366 prometheus_remote_write \ 367 metadata_test_series now 42.42 \ 368 true "Expected request to succeed" \ 369 200 "Expected request to return status code 200" 370 371 # Set global limits. 372 curl -vvvsSf -X POST 0.0.0.0:7201/api/v1/kvstore -d '{ 373 "key": "m3db.query.limits", 374 "value": { 375 "maxRecentlyQueriedSeriesDiskRead": { 376 "limit": 1, 377 "lookbackSeconds": 5 378 } 379 }, 380 "commit": true 381 }' 382 383 # Test that global limits are tripped. 384 ATTEMPTS=20 TIMEOUT=1 MAX_TIMEOUT=1 retry_with_backoff \ 385 '[[ $(curl -s 0.0.0.0:7201/api/v1/query?query=\\{query_global_limit_test!=\"\"\\} | jq -r ."status") = "error" ]]' 386 387 # Force waited for permit. 388 curl -vvvsSf -X POST 0.0.0.0:7201/api/v1/kvstore -d '{ 389 "key": "m3db.query.limits", 390 "value": { 391 "maxRecentlyQueriedSeriesDiskRead": { 392 "limit": 10000, 393 "lookbackSeconds": 5, 394 "forceWaited": true 395 } 396 }, 397 "commit": true 398 }' 399 400 # Check that success and waited header is returned. 401 ATTEMPTS=20 TIMEOUT=1 MAX_TIMEOUT=1 retry_with_backoff \ 402 '[[ $(curl -s -D headers.out 0.0.0.0:7201/api/v1/query?query=\\{query_global_limit_test!=\"\"\\} | jq -r ."status") = "success" ]] && [[ $(cat headers.out | grep M3-Waited | wc -l | xargs) = "1" ]]' 403 404 # Check that error when require no wait header set and waited header is returned. 405 STATUS=$(curl -s -o /dev/null -w "%{http_code}" -H "M3-Limit-Require-No-Wait: true" 0.0.0.0:7201/api/v1/query?query=\\{query_global_limit_test!=\"\"\\}) 406 test "$STATUS" = "400" 407 408 # Restore global limits. 409 curl -vvvsSf -X POST 0.0.0.0:7201/api/v1/kvstore -d '{ 410 "key": "m3db.query.limits", 411 "value": { 412 "maxRecentlyQueriedSeriesDiskRead": { 413 "limit": 0, 414 "lookbackSeconds": 15, 415 "forceWaited": false 416 } 417 }, 418 "commit": true 419 }' 420 } 421 422 function test_query_timeouts { 423 echo "Test query timeouts" 424 425 # Exercise APIs with different minimal timeouts to trigger timeouts in varying parts of the stack 426 427 # Confirms that timeouts at the coordinator layer 428 ATTEMPTS=10 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 429 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "timeout: 1ns" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success) = "$QUERY_TIMEOUT_STATUS_CODE" ]]' 430 ATTEMPTS=10 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 431 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "timeout: 1ns" "0.0.0.0:7201/api/v1/query_range?query=database_write_tagged_success&step=15&start=0&end=100") = "$QUERY_TIMEOUT_STATUS_CODE" ]]' 432 ATTEMPTS=10 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 433 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "timeout: 1ns" 0.0.0.0:7201/api/v1/labels) = "$QUERY_TIMEOUT_STATUS_CODE" ]]' 434 ATTEMPTS=10 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 435 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "timeout: 1ns" 0.0.0.0:7201/api/v1/label/__name__/values) = "$QUERY_TIMEOUT_STATUS_CODE" ]]' 436 437 # Confirms that timeouts from coordinator -> m3db 438 ATTEMPTS=10 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 439 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "timeout: 1ms" 0.0.0.0:7201/api/v1/query?query=database_write_tagged_success) = "$QUERY_TIMEOUT_STATUS_CODE" ]]' 440 ATTEMPTS=10 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 441 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "timeout: 1ms" "0.0.0.0:7201/api/v1/query_range?query=database_write_tagged_success&step=15&start=0&end=100") = "$QUERY_TIMEOUT_STATUS_CODE" ]]' 442 ATTEMPTS=10 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 443 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "timeout: 1ms" 0.0.0.0:7201/api/v1/labels) = "$QUERY_TIMEOUT_STATUS_CODE" ]]' 444 ATTEMPTS=10 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 445 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "timeout: 1ms" 0.0.0.0:7201/api/v1/label/__name__/values) = "$QUERY_TIMEOUT_STATUS_CODE" ]]' 446 } 447 448 function prometheus_query_native { 449 local endpoint=${endpoint:-} 450 local query=${query:-} 451 local params=${params:-} 452 local metrics_type=${metrics_type:-} 453 local metrics_storage_policy=${metrics_storage_policy:-} 454 local jq_path=${jq_path:-} 455 local expected_value=${expected_value:-} 456 local return_status_code=${return_status_code:-} 457 458 params_prefixed="" 459 if [[ "$params" != "" ]]; then 460 params_prefixed='&'"${params}" 461 fi 462 463 if [[ "$return_status_code" == "true" ]]; then 464 result=$(curl --write-out '%{http_code}' --silent --output /dev/null \ 465 -H "M3-Metrics-Type: ${metrics_type}" \ 466 -H "M3-Storage-Policy: ${metrics_storage_policy}" \ 467 "0.0.0.0:7201/m3query/api/v1/${endpoint}?query=${query}${params_prefixed}") 468 else 469 result=$(curl -s \ 470 -H "M3-Metrics-Type: ${metrics_type}" \ 471 -H "M3-Storage-Policy: ${metrics_storage_policy}" \ 472 "0.0.0.0:7201/m3query/api/v1/${endpoint}?query=${query}${params_prefixed}" | jq -r "${jq_path}" | head -1) 473 fi 474 test "$result" = "$expected_value" 475 return $? 476 } 477 478 function test_query_restrict_metrics_type { 479 now=$(date +"%s") 480 hour_ago=$(( $now - 3600 )) 481 step="30s" 482 params_instant="" 483 params_range="start=${hour_ago}"'&'"end=${now}"'&'"step=30s" 484 jq_path_instant=".data.result[0].value[1]" 485 jq_path_range=".data.result[0].values[][1]" 486 return_status_code="" 487 488 # Test restricting to unaggregated metrics 489 echo "Test query restrict to unaggregated metrics type (instant)" 490 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 \ 491 endpoint=query query="$METRIC_NAME_TEST_RESTRICT_WRITE" params="$params_instant" return_status_code="$return_status_code" \ 492 metrics_type="unaggregated" jq_path="$jq_path_instant" expected_value="42.42" \ 493 retry_with_backoff prometheus_query_native 494 echo "Test query restrict to unaggregated metrics type (range)" 495 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 \ 496 endpoint=query_range query="$METRIC_NAME_TEST_RESTRICT_WRITE" params="$params_range" return_status_code="$return_status_code" \ 497 metrics_type="unaggregated" jq_path="$jq_path_range" expected_value="42.42" \ 498 retry_with_backoff prometheus_query_native 499 500 # Test restricting to aggregated metrics 501 echo "Test query restrict to aggregated metrics type (instant)" 502 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 \ 503 endpoint=query query="$METRIC_NAME_TEST_RESTRICT_WRITE" params="$params_instant" return_status_code="$return_status_code" \ 504 metrics_type="aggregated" metrics_storage_policy="15s:10h" jq_path="$jq_path_instant" expected_value="84.84" \ 505 retry_with_backoff prometheus_query_native 506 echo "Test query restrict to aggregated metrics type (range)" 507 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 \ 508 endpoint=query_range query="$METRIC_NAME_TEST_RESTRICT_WRITE" params="$params_range" return_status_code="$return_status_code" \ 509 metrics_type="aggregated" metrics_storage_policy="15s:10h" jq_path="$jq_path_range" expected_value="84.84" \ 510 retry_with_backoff prometheus_query_native 511 } 512 513 function test_prometheus_query_native_timeout { 514 now=$(date +"%s") 515 hour_ago=$(( $now - 3600 )) 516 step="30s" 517 timeout=".0001s" 518 params_instant="timeout=${timeout}" 519 params_range="start=${hour_ago}"'&'"end=${now}"'&'"step=30s"'&'"timeout=${timeout}" 520 return_status_code="true" 521 522 echo "Test query gateway timeout (instant)" 523 endpoint=query query="$METRIC_NAME_TEST_RESTRICT_WRITE" params="$params_instant" \ 524 metrics_type="unaggregated" return_status_code="$return_status_code" expected_value="$QUERY_TIMEOUT_STATUS_CODE" \ 525 prometheus_query_native 526 echo "Test query gateway timeout (range)" 527 endpoint=query_range query="$METRIC_NAME_TEST_RESTRICT_WRITE" params="$params_range" \ 528 metrics_type="unaggregated" return_status_code="$return_status_code" expected_value="$QUERY_TIMEOUT_STATUS_CODE" \ 529 prometheus_query_native 530 } 531 532 function test_query_restrict_tags { 533 # Test the default restrict tags is applied when directly querying 534 # coordinator (restrict tags set to hide any restricted_metrics_type="hidden" 535 # in m3coordinator.yml) 536 537 # First write some hidden metrics. 538 echo "Test write with unaggregated metrics type works as expected" 539 TAG_NAME_0="restricted_metrics_type" TAG_VALUE_0="hidden" \ 540 TAG_NAME_1="foo_tag" TAG_VALUE_1="foo_tag_value" \ 541 prometheus_remote_write \ 542 some_hidden_metric now 42.42 \ 543 true "Expected request to succeed" \ 544 200 "Expected request to return status code 200" 545 546 # Check that we can see them with zero restrictions applied as an 547 # override (we do this check first so that when we test that they 548 # don't appear by default we know that the metrics are already visible). 549 echo "Test restrict by tags with header override to remove restrict works" 550 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 551 '[[ $(curl -s -H "M3-Restrict-By-Tags-JSON: {}" 0.0.0.0:7201/api/v1/query?query=\\{restricted_metrics_type=\"hidden\"\\} | jq -r ".data.result | length") -eq 1 ]]' 552 553 # Now test that the defaults will hide the metrics altogether. 554 echo "Test restrict by tags with coordinator defaults" 555 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 556 '[[ $(curl -s 0.0.0.0:7201/api/v1/query?query=\\{restricted_metrics_type=\"hidden\"\\} | jq -r ".data.result | length") -eq 0 ]]' 557 } 558 559 function test_series { 560 # Test series search with start/end specified 561 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 562 '[[ $(curl -s "0.0.0.0:7201/api/v1/series?match[]=prometheus_remote_storage_samples_total&start=0&end=9999999999999.99999" | jq -r ".data | length") -eq 1 ]]' 563 564 # Test series search with no start/end specified 565 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 566 '[[ $(curl -s "0.0.0.0:7201/api/v1/series?match[]=prometheus_remote_storage_samples_total" | jq -r ".data | length") -eq 1 ]]' 567 568 # Test series search with min/max start time using the Prometheus Go 569 # min/max formatted timestamps, which is sent as part of a Prometheus 570 # remote query. 571 # minTime = time.Unix(math.MinInt64/1000+62135596801, 0).UTC() 572 # maxTime = time.Unix(math.MaxInt64/1000-62135596801, 999999999).UTC() 573 # minTimeFormatted = minTime.Format(time.RFC3339Nano) 574 # maxTimeFormatted = maxTime.Format(time.RFC3339Nano) 575 # Which: 576 # minTimeFormatted="-292273086-05-16T16:47:06Z" 577 # maxTimeFormatted="292277025-08-18T07:12:54.999999999Z" 578 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 579 '[[ $(curl -s "0.0.0.0:7201/api/v1/series?match[]=prometheus_remote_storage_samples_total&start=-292273086-05-16T16:47:06Z&end=292277025-08-18T07:12:54.999999999Z" | jq -r ".data | length") -eq 1 ]]' 580 } 581 582 function test_label_query_limits_applied { 583 # Test that require exhaustive does nothing if limits are not hit 584 echo "Test label limits with require-exhaustive headers true (below limit therefore no error)" 585 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 586 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "M3-Limit-Max-Series: 10000" -H "M3-Limit-Max-Series: 10000" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/label/__name__/values) = "200" ]]' 587 588 # the header takes precedence over the configured default series limit 589 echo "Test label series limit with coordinator limit header (default requires exhaustive so error)" 590 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 591 '[[ -n $(curl -s -H "M3-Limit-Max-Series: 1" 0.0.0.0:7201/api/v1/label/__name__/values | jq ."error" | grep "$QUERY_LIMIT_MESSAGE") ]]' 592 593 echo "Test label series limit with require-exhaustive headers false" 594 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 595 '[[ $(curl -s -H "M3-Limit-Max-Series: 2" -H "M3-Limit-Require-Exhaustive: false" 0.0.0.0:7201/api/v1/label/__name__/values | jq -r ".data | length") -eq 1 ]]' 596 597 echo "Test label series limit with require-exhaustive headers true (above limit therefore error)" 598 # Test that require exhaustive error is returned 599 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 600 '[[ -n $(curl -s -H "M3-Limit-Max-Series: 2" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/label/__name__/values | jq ."error" | grep "$QUERY_LIMIT_MESSAGE") ]]' 601 # Test that require exhaustive error is 4xx 602 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 603 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "M3-Limit-Max-Series: 2" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/label/__name__/values) = "400" ]]' 604 605 echo "Test label docs limit with coordinator limit header (default requires exhaustive so error)" 606 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 607 '[[ -n $(curl -s -H "M3-Limit-Max-Docs: 1" 0.0.0.0:7201/api/v1/label/__name__/values | jq ."error" | grep "$QUERY_LIMIT_MESSAGE") ]]' 608 609 echo "Test label docs limit with require-exhaustive headers false" 610 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 611 '[[ $(curl -s -H "M3-Limit-Max-Docs: 2" -H "M3-Limit-Require-Exhaustive: false" 0.0.0.0:7201/api/v1/label/__name__/values | jq -r ".data | length") -eq 1 ]]' 612 613 echo "Test label docs limit with require-exhaustive headers true (above limit therefore error)" 614 # Test that require exhaustive error is returned 615 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 616 '[[ -n $(curl -s -H "M3-Limit-Max-Docs: 1" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/label/__name__/values | jq ."error" | grep "$QUERY_LIMIT_MESSAGE") ]]' 617 # Test that require exhaustive error is 4xx 618 ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 619 '[[ $(curl -s -o /dev/null -w "%{http_code}" -H "M3-Limit-Max-Docs: 1" -H "M3-Limit-Require-Exhaustive: true" 0.0.0.0:7201/api/v1/label/__name__/values) = "400" ]]' 620 } 621 622 function test_labels { 623 TAG_NAME_0="name_0" TAG_VALUE_0="value_0_1" \ 624 TAG_NAME_1="name_1" TAG_VALUE_1="value_1_1" \ 625 TAG_NAME_2="name_2" TAG_VALUE_2="value_2_1" \ 626 prometheus_remote_write \ 627 label_metric now 42.42 \ 628 true "Expected request to succeed" \ 629 200 "Expected request to return status code 200" 630 631 TAG_NAME_0="name_0" TAG_VALUE_0="value_0_2" \ 632 TAG_NAME_1="name_1" TAG_VALUE_1="value_1_2" \ 633 prometheus_remote_write \ 634 label_metric_2 now 42.42 \ 635 true "Expected request to succeed" \ 636 200 "Expected request to return status code 200" 637 638 # Test label search with match 639 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 640 '[[ $(curl -s "0.0.0.0:7201/api/v1/labels" | jq -r "[.data[] | select(index(\"name_0\", \"name_1\", \"name_2\"))] | length") -eq 3 ]]' 641 642 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 643 '[[ $(curl -s "0.0.0.0:7201/api/v1/labels?match[]=label_metric" | jq -r ".data | length") -eq 4 ]]' 644 645 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 646 '[[ $(curl -s "0.0.0.0:7201/api/v1/labels?match[]=label_metric_2" | jq -r ".data | length") -eq 3 ]]' 647 648 # Test label values search with match 649 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 650 '[[ $(curl -s "0.0.0.0:7201/api/v1/label/name_1/values" | jq -r ".data | length") -eq 2 ]]' # two values without a match 651 652 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 653 '[[ $(curl -s "0.0.0.0:7201/api/v1/label/name_1/values?match[]=label_metric" | jq -r ".data | length") -eq 1 ]]' 654 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 655 '[[ $(curl -s "0.0.0.0:7201/api/v1/label/name_1/values?match[]=label_metric" | jq -r ".data[0]") = "value_1_1" ]]' 656 657 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 658 '[[ $(curl -s "0.0.0.0:7201/api/v1/label/name_1/values?match[]=label_metric_2" | jq -r ".data | length") -eq 1 ]]' 659 ATTEMPTS=5 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ 660 '[[ $(curl -s "0.0.0.0:7201/api/v1/label/name_1/values?match[]=label_metric_2" | jq -r ".data[0]") = "value_1_2" ]]' 661 } 662 663 echo "Running readiness test" 664 test_readiness 665 666 echo "Running prometheus tests" 667 test_prometheus_remote_read 668 test_prometheus_remote_write_multi_namespaces 669 test_prometheus_remote_write_empty_label_name_returns_400_status_code 670 test_prometheus_remote_write_empty_label_value_returns_400_status_code 671 test_prometheus_remote_write_duplicate_label_returns_400_status_code 672 test_prometheus_remote_write_too_old_returns_400_status_code 673 test_prometheus_remote_write_restrict_metrics_type 674 test_query_lookback_applied 675 test_query_limits_applied 676 test_query_restrict_metrics_type 677 test_query_timeouts 678 test_prometheus_query_native_timeout 679 test_query_restrict_tags 680 test_prometheus_remote_write_map_tags 681 test_series 682 test_label_query_limits_applied 683 test_labels 684 if [[ "$RUN_GLOBAL_LIMIT_TEST" == "true" ]]; then 685 test_query_limits_global_applied 686 fi 687 688 echo "Running function correctness tests" 689 test_correctness 690 691 echo "Running aggregate limit tests" 692 test_global_aggregate_limits 693 694 TEST_SUCCESS=true