github.com/kata-containers/tests@v0.0.0-20240307153542-772105b56064/functional/tracing/tracing-test.sh (about) 1 #!/bin/bash 2 # Copyright (c) 2019-2022 Intel Corporation 3 # 4 # SPDX-License-Identifier: Apache-2.0 5 # 6 7 set -o errexit 8 set -o nounset 9 set -o pipefail 10 set -o errtrace 11 12 script_name=${0##*/} 13 source "/etc/os-release" || source "/usr/lib/os-release" 14 15 # Set to true if all tests pass 16 success="false" 17 18 DEBUG=${DEBUG:-} 19 20 # If set to any value, do not shut down the Jaeger service. 21 DEBUG_KEEP_JAEGER=${DEBUG_KEEP_JAEGER:-} 22 # If set to any value, do not shut down the trace forwarder. 23 DEBUG_KEEP_FORWARDER=${DEBUG_KEEP_FORWARDER:-} 24 25 [ -n "$DEBUG" ] && set -o xtrace 26 27 SCRIPT_PATH=$(dirname "$(readlink -f "$0")") 28 source "${SCRIPT_PATH}/../../lib/common.bash" 29 30 RUNTIME="io.containerd.kata.v2" 31 CONTAINER_IMAGE="quay.io/prometheus/busybox:latest" 32 33 TRACE_LOG_DIR=${TRACE_LOG_DIR:-${KATA_TESTS_LOGDIR}/traces} 34 35 KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" 36 37 # files for output 38 formatted_traces_file="kata-traces-formatted.json" 39 trace_summary_file="span-summary.txt" 40 41 # tmux(1) session to run the trace forwarder in 42 KATA_TMUX_FORWARDER_SESSION="kata-trace-forwarder-session" 43 44 katacontainers_repo_dir="$GOPATH/src/github.com/kata-containers/kata-containers" 45 46 forwarder_dir="${katacontainers_repo_dir}/src/tools/trace-forwarder" 47 forwarder_binary_name="kata-trace-forwarder" 48 49 # path prefix for CLH socket path 50 socket_path_prefix="/run/vc/vm/" 51 52 container_id="tracing-test" 53 54 jaeger_server=${jaeger_server:-localhost} 55 jaeger_ui_port=${jaeger_ui_port:-16686} 56 jaeger_docker_container_name="jaeger" 57 58 # Span data for testing: 59 # 1. Existence of spans in jaeger output 60 # 2. That the relative ordering in the data occurs 61 # in the jaeger output. 62 # This is tested to make sure specific spans exist in the output and 63 # that the order of spans is preserved. 64 # Ordered in latest in sequence to earliest. 65 # 66 # Fields are all span existing span names in relative order from latest 67 # to earliest call in a sequence of calls. Example (pseudocode): 68 # func1() { 69 # span = trace("func1") 70 # func2() 71 # end span 72 # } 73 # func2() { 74 # span = trace("func2") 75 # func3() 76 # end span 77 # } 78 # func3() { 79 # span = trace("func3") 80 # end span 81 # } 82 # The following data should result in a passing test: 83 # 'func3:func2:func1' 84 # 'func3:func2' 85 # 'func3:func1' 86 # 'func2:func1' 87 span_ordering_data=( 88 'StartVM:createSandboxFromConfig:create:rootSpan' 89 'setup_shared_namespaces:StartVM:createSandboxFromConfig:create:rootSpan' 90 'start_container:Start:rootSpan' 91 'stopSandbox:Stop:Start:rootSpan' 92 ) 93 94 # Cleanup will remove Jaeger container and 95 # disable tracing. 96 cleanup() 97 { 98 local fp="die" 99 local result="failed" 100 local dest="$logdir" 101 102 if [ "$success" = "true" ]; then 103 local fp="info" 104 result="passed" 105 106 [ -z "$DEBUG_KEEP_JAEGER" ] && stop_jaeger 2>/dev/null || true 107 108 [ -z "$DEBUG_KEEP_FORWARDER" ] && kill_trace_forwarder 109 110 # The tests worked so remove the logs 111 if [ -n "$DEBUG" ]; then 112 eval "$fp" "test $result - logs left in '$dest'" 113 else 114 "${SCRIPT_PATH}/../../.ci/configure_tracing_for_kata.sh" disable 115 116 [ -d "$logdir" ] && rm -rf "$logdir" || true 117 fi 118 119 return 0 120 fi 121 122 if [ -n "${CI:-}" ]; then 123 # Running under the CI, so copy the logs to allow them 124 # to be added as test artifacts. 125 sudo mkdir -p "$TRACE_LOG_DIR" 126 sudo cp -a "$logdir"/* "$TRACE_LOG_DIR" 127 128 dest="$TRACE_LOG_DIR" 129 fi 130 131 eval "$fp" "test $result - logs left in '$dest'" 132 } 133 134 # Run an operation to generate Jaeger trace spans 135 create_traces() 136 { 137 sudo ctr image pull "$CONTAINER_IMAGE" 138 sudo ctr run --runtime "$RUNTIME" --rm "$CONTAINER_IMAGE" "$container_id" true 139 } 140 141 start_jaeger() 142 { 143 local jaeger_docker_image="jaegertracing/all-in-one:latest" 144 145 docker rm -f "${jaeger_docker_container_name}" 146 147 # Defaults - see https://www.jaegertracing.io/docs/getting-started/ 148 docker run -d --runtime runc --name "${jaeger_docker_container_name}" \ 149 -e COLLECTOR_ZIPKIN_HTTP_PORT=9411 \ 150 -p 5775:5775/udp \ 151 -p 6831:6831/udp \ 152 -p 6832:6832/udp \ 153 -p 5778:5778 \ 154 -p "${jaeger_ui_port}:${jaeger_ui_port}" \ 155 -p 14268:14268 \ 156 -p 9411:9411 \ 157 "$jaeger_docker_image" 158 159 sudo mkdir -m 0750 -p "$TRACE_LOG_DIR" 160 } 161 162 stop_jaeger() 163 { 164 docker stop "${jaeger_docker_container_name}" 165 docker rm -f "${jaeger_docker_container_name}" 166 } 167 168 get_jaeger_traces() 169 { 170 local service="$1" 171 [ -z "$service" ] && die "need jaeger service name" 172 173 local traces_url="http://${jaeger_server}:${jaeger_ui_port}/api/traces?service=${service}" 174 curl -s "${traces_url}" 2>/dev/null 175 } 176 177 get_trace_summary() 178 { 179 local status="$1" 180 [ -z "$status" ] && die "need jaeger status JSON" 181 182 echo "${status}" | jq -S '.data[].spans[] | [.spanID, .operationName] | @sh' 183 } 184 185 get_span_count() 186 { 187 local status="$1" 188 [ -z "$status" ] && die "need jaeger status JSON" 189 190 # This could be simplified but creating a variable holding the 191 # summary is useful in debug mode as the summary is displayed. 192 local trace_summary=$(get_trace_summary "$status" || true) 193 194 [ -z "$trace_summary" ] && die "failed to get trace summary" 195 196 local count=$(echo "${trace_summary}" | wc -l) 197 198 [ -z "$count" ] && count=0 199 200 echo "$count" 201 } 202 203 # Returns status from Jaeger web UI 204 get_jaeger_status() 205 { 206 local service="$1" 207 local logdir="$2" 208 209 [ -z "$service" ] && die "need jaeger service name" 210 [ -z "$logdir" ] && die "need logdir" 211 212 local status="" 213 local span_count=0 214 215 # Find spans 216 status=$(get_jaeger_traces "$service" || true) 217 if [ -n "$status" ]; then 218 echo "$status" | tee "$logdir/${service}-status.json" 219 span_count=$(get_span_count "$status") 220 fi 221 222 [ -z "$status" ] && die "failed to query Jaeger for status" 223 [ "$span_count" -eq 0 ] && die "failed to find any trace spans" 224 [ "$span_count" -le 0 ] && die "invalid span count" 225 226 get_trace_summary "$status" > "$logdir/$trace_summary_file" 227 } 228 229 # Check Jaeger spans for the specified service. 230 check_jaeger_output() 231 { 232 local service="$1" 233 local min_spans="$2" 234 local logdir="$3" 235 236 [ -z "$service" ] && die "need jaeger service name" 237 [ -z "$min_spans" ] && die "need minimum trace span count" 238 [ -z "$logdir" ] && die "need logdir" 239 240 local status 241 local errors=0 242 243 info "Checking Jaeger status" 244 245 status=$(get_jaeger_status "$service" "$logdir") 246 247 #------------------------------ 248 # Basic sanity checks 249 [ -z "$status" ] && die "failed to query status via HTTP" 250 251 local span_lines=$(echo "$status"|jq -S '.data[].spans | length') 252 [ -z "$span_lines" ] && die "no span status" 253 254 # Log the spans to allow for analysis in case the test fails 255 echo "$status"|jq -S . > "$logdir/${service}-traces-formatted.json" 256 257 local span_lines_count=$(echo "$span_lines"|wc -l) 258 259 # Total up all span counts 260 local spans=$(echo "$span_lines"|paste -sd+ -|bc) 261 [ -z "$spans" ] && die "no spans" 262 263 # Ensure total span count is numeric 264 echo "$spans"|grep -q "^[0-9][0-9]*$" || die "invalid span count: '$spans'" 265 266 info "found $spans spans (across $span_lines_count traces)" 267 268 # Validate 269 [ "$spans" -lt "$min_spans" ] && die "expected >= $min_spans spans, got $spans" 270 271 # Look for common errors in span data 272 local error_msg=$(echo "$status"|jq -S . 2>/dev/null|grep "invalid parent span" || true) 273 274 if [ -n "$error_msg" ]; then 275 errors=$((errors+1)) 276 warn "Found invalid parent span errors: $error_msg" 277 else 278 errors=$((errors-1)) 279 [ "$errors" -lt 0 ] && errors=0 280 fi 281 282 # Crude but it works 283 error_or_warning_msgs=$(echo "$status" |\ 284 jq -S . 2>/dev/null |\ 285 jq '.data[].spans[].warnings' |\ 286 grep -E -v "\<null\>" |\ 287 grep -E -v "\[" |\ 288 grep -E -v "\]" |\ 289 grep -E -v "clock skew" || true) # ignore clock skew error 290 291 if [ -n "$error_or_warning_msgs" ]; then 292 errors=$((errors+1)) 293 warn "Found errors/warnings: $error_or_warning_msgs" 294 else 295 errors=$((errors-1)) 296 [ "$errors" -lt 0 ] && errors=0 297 fi 298 299 [ "$errors" -eq 0 ] || die "errors detected" 300 } 301 302 # Check output for spans in span_ordering_data 303 check_spans() 304 { 305 local logdir="$1" 306 [ -z "$logdir" ] && die "need logdir" 307 308 local errors=0 309 310 # Check for existence of spans in output so we do not do the more 311 # time consuming test of checking span ordering if it will fail 312 info "Checking spans: ${span_ordering_data[@]}" 313 local missing_spans=() 314 for span_ordering in "${span_ordering_data[@]}"; do 315 local test_spans=(`echo $span_ordering | tr ':' ' '`) 316 for s in "${test_spans[@]}"; do 317 grep -q \'$s\' "$logdir/$trace_summary_file" || missing_spans+=( "$s" ) 318 done 319 done 320 if [ "${#missing_spans[@]}" -gt 0 ]; then 321 die "Fail: Missing spans: ${missing_spans[@]}" 322 fi 323 324 # Check relative ordering of spans. We are not checking full trace, just 325 # that known calls are not out of order based on the test input. 326 for span_ordering in "${span_ordering_data[@]}"; do # runs maximum length of span_ordering_data 327 local test_spans=(`echo $span_ordering | tr ':' ' '`) 328 329 # create array for span IDs that match span string 330 local span_ids=() 331 for span in "${test_spans[@]}"; do 332 grep -q \'$span\' "$logdir/$trace_summary_file" || die "Fail: Missing span: $span" 333 id=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.operationName==\"$span\") | .spanID") || die "Fail: error with span $span retrieved from traces" 334 id_formatted=$(echo $id | tr -d '\"' | tr '\n' ':') # format to a string for parsing later, not an array 335 span_ids+=("$id_formatted") 336 done 337 338 # We now have 2 parallel arrays where test_spans[n] is the string name and 339 # span_ids[n] has all possible span IDs for that string separated by a colon 340 341 # Since functions can be called multiple times, we may have multiple results 342 # for span IDs. 343 initial_span_ids=(`echo ${span_ids[0]} | tr ':' ' '`) 344 for initial in "${initial_span_ids[@]}"; do # test parents for all initial spans 345 # construct array of all parents of first span 346 local retrieved_spans=() 347 local current_span="$initial" 348 [ "$current_span" != "" ] || break 349 350 MAX_DEPTH=20 # to prevent infinite loop due to unforeseen errors 351 for i in `seq 1 $MAX_DEPTH`; do 352 retrieved_spans+=("$current_span") 353 current_span=$(cat "$logdir/$formatted_traces_file" | jq ".data[].spans[] | select(.spanID==\"$current_span\") | .references[].spanID") || die "Fail: error with current_span $current_span retrieved from formatted traces" 354 [ "$current_span" != "" ] || break 355 current_span=$(echo $current_span | tr -d '"') 356 [ $i -lt $MAX_DEPTH ] || die "Fail: max depth reached, error in jq or adjust test depth" 357 done 358 359 # Keep track of this index so we can ensure we are testing the constructed array in order 360 # Increment when there is a match between test case and constructed path 361 local retrieved_ids_index=0 362 363 local matches=0 364 local index=0 365 366 # TODO: Optimize 367 for ((index=0; index<${#span_ids[@]}; index++)); do 368 for ((r_index=$retrieved_ids_index; r_index<${#retrieved_spans[@]}; r_index++)); do 369 grep -q "${retrieved_spans[$r_index]}" <<< ${span_ids[$index]} && (( retrieved_ids_index=$r_index+1 )) && (( matches+=1 )) && break 370 done 371 done 372 373 local last_initial_span_index=${#initial_span_ids[@]}-1 374 if [ $matches -eq ${#span_ids[@]} ]; then 375 info "Pass: spans \"${test_spans[@]}\" found in jaeger output" 376 break 377 elif [ $matches -lt ${#span_ids[@]} ] && [ "$initial" = "${initial_span_ids[$last_initial_span_index]}" ]; then 378 die "Fail: spans \"${test_spans[@]}\" NOT in jaeger output" 379 fi 380 # else repeat test for next initial span ID 381 done 382 done 383 384 385 } 386 387 run_trace_forwarder() 388 { 389 source "$HOME/.cargo/env" 390 command -v "$forwarder_binary_name" &>/dev/null || (cd "$forwarder_dir" && cargo install --locked --path .) 391 392 if [ $KATA_HYPERVISOR = "qemu" ]; then 393 tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "$forwarder_binary_name -l trace" 394 elif [ $KATA_HYPERVISOR = "cloud-hypervisor" ]; then 395 # CLH uses hybrid VSOCK which uses a local UNIX socket that we need to specify 396 socket_path_template=$socket_path_prefix$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath') 397 socket_path=$(echo "$socket_path_template" | sed "s/{ID}/${container_id}/g" | tr -d '"') 398 sudo mkdir -p $(dirname "$socket_path") 399 400 tmux new-session -d -s "$KATA_TMUX_FORWARDER_SESSION" "$forwarder_binary_name -l trace --socket-path $socket_path" 401 else 402 die "Unsupported hypervisor $KATA_HYPERVISOR" 403 fi 404 405 info "Verifying trace forwarder in tmux session $KATA_TMUX_FORWARDER_SESSION" 406 407 local cmd="tmux capture-pane -pt $KATA_TMUX_FORWARDER_SESSION | tr -d '\n' | tr -d '\"' | grep -q \"source:kata-trace-forwarder\"" 408 waitForProcess 10 1 "$cmd" 409 } 410 411 kill_trace_forwarder() 412 { 413 tmux kill-session -t "$KATA_TMUX_FORWARDER_SESSION" 414 } 415 416 setup() 417 { 418 # containerd must be running in order to use ctr to generate traces 419 restart_containerd_service 420 421 local cmds=() 422 # For container manager (containerd) 423 cmds+=('ctr') 424 # For jaeger 425 cmds+=('docker') 426 # For launching processes 427 cmds+=('tmux') 428 429 local cmd 430 for cmd in "${cmds[@]}" 431 do 432 local result 433 result=$(command -v "$cmd" || true) 434 [ -n "$result" ] || die "need $cmd" 435 done 436 437 run_trace_forwarder 438 439 start_jaeger 440 441 "${SCRIPT_PATH}/../../.ci/configure_tracing_for_kata.sh" enable 442 } 443 444 run_test() 445 { 446 local service="$1" 447 local min_spans="$2" 448 local logdir="$3" 449 450 [ -z "$service" ] && die "need service name" 451 [ -z "$min_spans" ] && die "need minimum span count" 452 [ -z "$logdir" ] && die "need logdir" 453 454 info "Running test for service '$service'" 455 456 logdir="$logdir/$service" 457 mkdir -p "$logdir" 458 459 check_jaeger_output "$service" "$min_spans" "$logdir" 460 check_spans "$logdir" 461 462 info "test passed" 463 } 464 465 run_tests() 466 { 467 # List of services to check 468 # 469 # Format: "name:min-spans" 470 # 471 # Where: 472 # 473 # - 'name' is the Jaeger service name. 474 # - 'min-spans' is an integer representing the minimum number of 475 # trace spans this service should generate. 476 # 477 # Notes: 478 # 479 # - Uses an array to ensure predictable ordering. 480 # - All services listed are expected to generate traces 481 # when create_traces() is called a single time. 482 local -a services 483 484 services+=("kata:125") 485 486 create_traces 487 488 logdir=$(mktemp -d) 489 490 for service in "${services[@]}" 491 do 492 local name=$(echo "${service}"|cut -d: -f1) 493 local min_spans=$(echo "${service}"|cut -d: -f2) 494 495 run_test "${name}" "${min_spans}" "${logdir}" 496 done 497 498 info "all tests passed" 499 success="true" 500 } 501 502 usage() 503 { 504 cat <<EOF 505 506 Usage: $script_name [<command>] 507 508 Commands: 509 510 clean - Perform cleanup phase only. 511 help - Show usage. 512 run - Only run tests (no setup or cleanup). 513 setup - Perform setup phase only. 514 515 Environment variables: 516 517 CI - if set, save logs of all tests to ${TRACE_LOG_DIR}. 518 DEBUG - if set, enable tracing and do not cleanup after tests. 519 DEBUG_KEEP_JAEGER - if set, do not shut down the Jaeger service. 520 DEBUG_KEEP_FORWARDER - if set, do not shut down the trace forwarder. 521 522 Notes: 523 - Runs all test phases if no arguments are specified. 524 525 EOF 526 } 527 528 main() 529 { 530 local cmd="${1:-}" 531 532 source /etc/os-release || source /usr/lib/os-release 533 534 # Only run on Ubuntu LTS for now 535 [ "${ID:-}" = ubuntu ] && grep -q LTS <<< "${VERSION:-}" || { 536 info "Exiting as not running on Ubuntu LTS" 537 exit 0 538 } 539 540 # Only run on x86_64 for now 541 [ "$(uname -m)" != "x86_64" ] && { 542 info "Exiting, only run on x86_64" 543 exit 0 544 } 545 546 [ "${CI_JOB:-}" = "METRICS" ] && { 547 info "Exiting as not running on metrics CI" 548 exit 0 549 } 550 551 [ "${CI_JOB:-}" = "CRI_CONTAINERD_K8S_DEVMAPPER" ] && { 552 info "Exiting to limit number of jobs that run tests related to tracing." 553 exit 0 554 } 555 556 case "$cmd" in 557 clean) success="true"; cleanup; exit 0;; 558 help|-h|-help|--help) usage; exit 0;; 559 run) run_tests; exit 0;; 560 setup) setup; exit 0;; 561 esac 562 563 trap cleanup EXIT 564 565 setup 566 567 run_tests 568 } 569 570 main "$@"