github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/tracing/tracing-test.sh (about) 1 #!/bin/bash 2 # Copyright (c) 2019 Intel Corporation 3 # 4 # SPDX-License-Identifier: Apache-2.0 5 # 6 7 set -o errexit 8 set -o nounset 9 set -o pipefail 10 set -o errtrace 11 12 script_name=${0##*/} 13 14 # Set to true if all tests pass 15 success="false" 16 17 DEBUG=${DEBUG:-} 18 19 # If set to any value, do not shut down the Jaeger service. 20 DEBUG_KEEP_JAEGER=${DEBUG_KEEP_JAEGER:-} 21 22 [ -n "$DEBUG" ] && set -o xtrace 23 24 SCRIPT_PATH=$(dirname "$(readlink -f "$0")") 25 source "${SCRIPT_PATH}/../lib/common.bash" 26 27 RUNTIME="io.containerd.kata.v2" 28 CONTAINER_IMAGE="quay.io/prometheus/busybox:latest" 29 30 TRACE_LOG_DIR=${TRACE_LOG_DIR:-${KATA_TESTS_LOGDIR}/traces} 31 32 jaeger_server=${jaeger_server:-localhost} 33 jaeger_ui_port=${jaeger_ui_port:-16686} 34 jaeger_docker_container_name="jaeger" 35 36 # Cleanup will remove Jaeger container and 37 # disable tracing. 38 cleanup() 39 { 40 local fp="die" 41 local result="failed" 42 local dest="$logdir" 43 44 if [ "$success" = "true" ]; then 45 local fp="info" 46 result="passed" 47 48 [ -z "$DEBUG_KEEP_JAEGER" ] && stop_jaeger 2>/dev/null || true 49 50 # The tests worked so remove the logs 51 if [ -n "$DEBUG" ]; then 52 eval "$fp" "test $result - logs left in '$dest'" 53 else 54 "${SCRIPT_PATH}/../.ci/configure_tracing_for_kata.sh" disable 55 56 [ -d "$logdir" ] && rm -rf "$logdir" || true 57 fi 58 59 return 0 60 fi 61 62 if [ -n "${CI:-}" ]; then 63 # Running under the CI, so copy the logs to allow them 64 # to be added as test artifacts. 65 sudo mkdir -p "$TRACE_LOG_DIR" 66 sudo cp -a "$logdir"/* "$TRACE_LOG_DIR" 67 68 dest="$TRACE_LOG_DIR" 69 fi 70 71 eval "$fp" "test $result - logs left in '$dest'" 72 } 73 74 # Run an operation to generate Jaeger trace spans 75 create_traces() 76 { 77 sudo ctr image pull "$CONTAINER_IMAGE" 78 sudo ctr run --runtime "$RUNTIME" --rm "$CONTAINER_IMAGE" tracing-test true 79 } 80 81 start_jaeger() 82 { 83 local jaeger_docker_image="jaegertracing/all-in-one:latest" 84 85 # Defaults - see https://www.jaegertracing.io/docs/getting-started/ 86 docker run -d --runtime runc --name "${jaeger_docker_container_name}" \ 87 -e COLLECTOR_ZIPKIN_HTTP_PORT=9411 \ 88 -p 5775:5775/udp \ 89 -p 6831:6831/udp \ 90 -p 6832:6832/udp \ 91 -p 5778:5778 \ 92 -p "${jaeger_ui_port}:${jaeger_ui_port}" \ 93 -p 14268:14268 \ 94 -p 9411:9411 \ 95 "$jaeger_docker_image" 96 97 sudo mkdir -m 0750 -p "$TRACE_LOG_DIR" 98 } 99 100 stop_jaeger() 101 { 102 docker stop "${jaeger_docker_container_name}" 103 docker rm -f "${jaeger_docker_container_name}" 104 } 105 106 get_jaeger_traces() 107 { 108 local service="$1" 109 [ -z "$service" ] && die "need jaeger service name" 110 111 local traces_url="http://${jaeger_server}:${jaeger_ui_port}/api/traces?service=${service}" 112 curl -s "${traces_url}" 2>/dev/null 113 } 114 115 get_trace_summary() 116 { 117 local status="$1" 118 [ -z "$status" ] && die "need jaeger status JSON" 119 120 echo "${status}" | jq -S '.data[].spans[] | [.spanID, .operationName] | @sh' 121 } 122 123 get_span_count() 124 { 125 local status="$1" 126 [ -z "$status" ] && die "need jaeger status JSON" 127 128 # This could be simplified but creating a variable holding the 129 # summary is useful in debug mode as the summary is displayed. 130 local trace_summary=$(get_trace_summary "$status" || true) 131 132 [ -z "$trace_summary" ] && die "failed to get trace summary" 133 134 local count=$(echo "${trace_summary}" | wc -l) 135 136 [ -z "$count" ] && count=0 137 138 echo "$count" 139 } 140 141 # Returns status from Jaeger web UI 142 get_jaeger_status() 143 { 144 local service="$1" 145 local logdir="$2" 146 147 [ -z "$service" ] && die "need jaeger service name" 148 [ -z "$logdir" ] && die "need logdir" 149 150 local status="" 151 local span_count=0 152 153 # Find spans 154 status=$(get_jaeger_traces "$service" || true) 155 if [ -n "$status" ]; then 156 echo "$status" | tee "$logdir/${service}-status.json" 157 span_count=$(get_span_count "$status") 158 fi 159 160 [ -z "$status" ] && die "failed to query Jaeger for status" 161 [ "$span_count" -eq 0 ] && die "failed to find any trace spans" 162 [ "$span_count" -le 0 ] && die "invalid span count" 163 164 get_trace_summary "$status" > "$logdir/span-summary.txt" 165 } 166 167 # Check Jaeger spans for the specified service. 168 check_jaeger_status() 169 { 170 local service="$1" 171 local min_spans="$2" 172 local logdir="$3" 173 174 [ -z "$service" ] && die "need jaeger service name" 175 [ -z "$min_spans" ] && die "need minimum trace span count" 176 [ -z "$logdir" ] && die "need logdir" 177 178 local status 179 local errors=0 180 181 info "Checking Jaeger status" 182 183 status=$(get_jaeger_status "$service" "$logdir") 184 185 #------------------------------ 186 # Basic sanity checks 187 [ -z "$status" ] && die "failed to query status via HTTP" 188 189 local span_lines=$(echo "$status"|jq -S '.data[].spans | length') 190 [ -z "$span_lines" ] && die "no span status" 191 192 # Log the spans to allow for analysis in case the test fails 193 echo "$status"|jq -S . > "$logdir/${service}-traces-formatted.json" 194 195 local span_lines_count=$(echo "$span_lines"|wc -l) 196 197 # Total up all span counts 198 local spans=$(echo "$span_lines"|paste -sd+ -|bc) 199 [ -z "$spans" ] && die "no spans" 200 201 # Ensure total span count is numeric 202 echo "$spans"|grep -q "^[0-9][0-9]*$" || die "invalid span count: '$spans'" 203 204 info "found $spans spans (across $span_lines_count traces)" 205 206 # Validate 207 [ "$spans" -lt "$min_spans" ] && die "expected >= $min_spans spans, got $spans" 208 209 # Look for common errors in span data 210 local error_msg=$(echo "$status"|jq -S . 2>/dev/null|grep "invalid parent span" || true) 211 212 if [ -n "$error_msg" ]; then 213 errors=$((errors+1)) 214 warn "Found invalid parent span errors: $error_msg" 215 else 216 errors=$((errors-1)) 217 [ "$errors" -lt 0 ] && errors=0 218 fi 219 220 # Crude but it works 221 error_or_warning_msgs=$(echo "$status" |\ 222 jq -S . 2>/dev/null |\ 223 jq '.data[].spans[].warnings' |\ 224 grep -E -v "\<null\>" |\ 225 grep -E -v "\[" |\ 226 grep -E -v "\]" |\ 227 grep -E -v "clock skew" || true) # ignore clock skew error 228 229 if [ -n "$error_or_warning_msgs" ]; then 230 errors=$((errors+1)) 231 warn "Found errors/warnings: $error_or_warning_msgs" 232 else 233 errors=$((errors-1)) 234 [ "$errors" -lt 0 ] && errors=0 235 fi 236 237 [ "$errors" -eq 0 ] || die "errors detected" 238 } 239 240 setup() 241 { 242 # containerd must be running in order to use ctr to generate traces 243 sudo systemctl restart containerd 244 245 start_jaeger 246 247 "${SCRIPT_PATH}/../.ci/configure_tracing_for_kata.sh" enable 248 } 249 250 run_test() 251 { 252 local service="$1" 253 local min_spans="$2" 254 local logdir="$3" 255 256 [ -z "$service" ] && die "need service name" 257 [ -z "$min_spans" ] && die "need minimum span count" 258 [ -z "$logdir" ] && die "need logdir" 259 260 info "Running test for service '$service'" 261 262 logdir="$logdir/$service" 263 mkdir -p "$logdir" 264 265 check_jaeger_status "$service" "$min_spans" "$logdir" 266 267 info "test passed" 268 } 269 270 run_tests() 271 { 272 # List of services to check 273 # 274 # Format: "name:min-spans" 275 # 276 # Where: 277 # 278 # - 'name' is the Jaeger service name. 279 # - 'min-spans' is an integer representing the minimum number of 280 # trace spans this service should generate. 281 # 282 # Notes: 283 # 284 # - Uses an array to ensure predictable ordering. 285 # - All services listed are expected to generate traces 286 # when create_traces() is called a single time. 287 local -a services 288 289 services+=("kata:50") 290 291 create_traces 292 293 logdir=$(mktemp -d) 294 295 for service in "${services[@]}" 296 do 297 local name=$(echo "${service}"|cut -d: -f1) 298 local min_spans=$(echo "${service}"|cut -d: -f2) 299 300 run_test "${name}" "${min_spans}" "${logdir}" 301 done 302 303 info "all tests passed" 304 success="true" 305 } 306 307 usage() 308 { 309 cat <<EOT 310 311 Usage: $script_name [<command>] 312 313 Commands: 314 315 clean - Perform cleanup phase only. 316 help - Show usage. 317 run - Only run tests (no setup or cleanup). 318 setup - Perform setup phase only. 319 320 Environment variables: 321 322 CI - if set, save logs of all tests to ${TRACE_LOG_DIR}. 323 DEBUG - if set, enable tracing and do not cleanup after tests. 324 DEBUG_KEEP_JAEGER - if set, do not shut down the Jaeger service. 325 326 Notes: 327 - Runs all test phases if no arguments are specified. 328 329 EOT 330 } 331 332 main() 333 { 334 local cmd="${1:-}" 335 336 case "$cmd" in 337 clean) success="true"; cleanup; exit 0;; 338 help|-h|-help|--help) usage; exit 0;; 339 run) run_tests; exit 0;; 340 setup) setup; exit 0;; 341 esac 342 343 trap cleanup EXIT 344 345 setup 346 347 run_tests 348 } 349 350 main "$@"