github.com/dmaizel/tests@v0.0.0-20210728163746-cae6a2d9cee8/tracing/tracing-test.sh (about)

     1  #!/bin/bash
     2  # Copyright (c) 2019 Intel Corporation
     3  #
     4  # SPDX-License-Identifier: Apache-2.0
     5  #
     6  
     7  set -o errexit
     8  set -o nounset
     9  set -o pipefail
    10  set -o errtrace
    11  
    12  script_name=${0##*/}
    13  
    14  # Set to true if all tests pass
    15  success="false"
    16  
    17  DEBUG=${DEBUG:-}
    18  
    19  # If set to any value, do not shut down the Jaeger service.
    20  DEBUG_KEEP_JAEGER=${DEBUG_KEEP_JAEGER:-}
    21  
    22  [ -n "$DEBUG" ] && set -o xtrace
    23  
    24  SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
    25  source "${SCRIPT_PATH}/../lib/common.bash"
    26  
    27  RUNTIME="io.containerd.kata.v2"
    28  CONTAINER_IMAGE="quay.io/prometheus/busybox:latest"
    29  
    30  TRACE_LOG_DIR=${TRACE_LOG_DIR:-${KATA_TESTS_LOGDIR}/traces}
    31  
    32  jaeger_server=${jaeger_server:-localhost}
    33  jaeger_ui_port=${jaeger_ui_port:-16686}
    34  jaeger_docker_container_name="jaeger"
    35  
    36  # Cleanup will remove Jaeger container and
    37  # disable tracing.
    38  cleanup()
    39  {
    40  	local fp="die"
    41  	local result="failed"
    42  	local dest="$logdir"
    43  
    44  	if [ "$success" = "true" ]; then
    45  		local fp="info"
    46  		result="passed"
    47  
    48  		[ -z "$DEBUG_KEEP_JAEGER" ] && stop_jaeger 2>/dev/null || true
    49  
    50  		# The tests worked so remove the logs
    51  		if [ -n "$DEBUG" ]; then
    52  			eval "$fp" "test $result - logs left in '$dest'"
    53  		else
    54  			"${SCRIPT_PATH}/../.ci/configure_tracing_for_kata.sh" disable
    55  
    56  			[ -d "$logdir" ] && rm -rf "$logdir" || true
    57  		fi
    58  
    59  		return 0
    60  	fi
    61  
    62  	if [ -n "${CI:-}" ]; then
    63  		# Running under the CI, so copy the logs to allow them
    64  		# to be added as test artifacts.
    65  		sudo mkdir -p "$TRACE_LOG_DIR"
    66  		sudo cp -a "$logdir"/* "$TRACE_LOG_DIR"
    67  
    68  		dest="$TRACE_LOG_DIR"
    69  	fi
    70  
    71  	eval "$fp" "test $result - logs left in '$dest'"
    72  }
    73  
    74  # Run an operation to generate Jaeger trace spans
    75  create_traces()
    76  {
    77  	sudo ctr image pull "$CONTAINER_IMAGE"
    78  	sudo ctr run --runtime "$RUNTIME" --rm "$CONTAINER_IMAGE" tracing-test true
    79  }
    80  
    81  start_jaeger()
    82  {
    83  	local jaeger_docker_image="jaegertracing/all-in-one:latest"
    84  
    85  	# Defaults - see https://www.jaegertracing.io/docs/getting-started/
    86  	docker run -d --runtime runc --name "${jaeger_docker_container_name}" \
    87  		-e COLLECTOR_ZIPKIN_HTTP_PORT=9411 \
    88  		-p 5775:5775/udp \
    89  		-p 6831:6831/udp \
    90  		-p 6832:6832/udp \
    91  		-p 5778:5778 \
    92  		-p "${jaeger_ui_port}:${jaeger_ui_port}" \
    93  		-p 14268:14268 \
    94  		-p 9411:9411 \
    95  		"$jaeger_docker_image"
    96  
    97  	sudo mkdir -m 0750 -p "$TRACE_LOG_DIR"
    98  }
    99  
   100  stop_jaeger()
   101  {
   102  	docker stop "${jaeger_docker_container_name}"
   103  	docker rm -f "${jaeger_docker_container_name}"
   104  }
   105  
   106  get_jaeger_traces()
   107  {
   108  	local service="$1"
   109  	[ -z "$service" ] && die "need jaeger service name"
   110  
   111  	local traces_url="http://${jaeger_server}:${jaeger_ui_port}/api/traces?service=${service}"
   112  	curl -s "${traces_url}" 2>/dev/null
   113  }
   114  
   115  get_trace_summary()
   116  {
   117  	local status="$1"
   118  	[ -z "$status" ] && die "need jaeger status JSON"
   119  
   120  	echo "${status}" | jq -S '.data[].spans[] | [.spanID, .operationName] | @sh'
   121  }
   122  
   123  get_span_count()
   124  {
   125  	local status="$1"
   126  	[ -z "$status" ] && die "need jaeger status JSON"
   127  
   128  	# This could be simplified but creating a variable holding the
   129  	# summary is useful in debug mode as the summary is displayed.
   130  	local trace_summary=$(get_trace_summary "$status" || true)
   131  
   132  	[ -z "$trace_summary" ] && die "failed to get trace summary"
   133  
   134  	local count=$(echo "${trace_summary}" | wc -l)
   135  
   136  	[ -z "$count" ] && count=0
   137  
   138  	echo "$count"
   139  }
   140  
   141  # Returns status from Jaeger web UI
   142  get_jaeger_status()
   143  {
   144  	local service="$1"
   145  	local logdir="$2"
   146  
   147  	[ -z "$service" ] && die "need jaeger service name"
   148  	[ -z "$logdir" ] && die "need logdir"
   149  
   150  	local status=""
   151  	local span_count=0
   152  
   153  	# Find spans
   154  	status=$(get_jaeger_traces "$service" || true)
   155  	if [ -n "$status" ]; then
   156  		echo "$status" | tee "$logdir/${service}-status.json"
   157  		span_count=$(get_span_count "$status")
   158  	fi
   159  
   160  	[ -z "$status" ] && die "failed to query Jaeger for status"
   161  	[ "$span_count" -eq 0 ] && die "failed to find any trace spans"
   162  	[ "$span_count" -le 0 ] && die "invalid span count"
   163  
   164  	get_trace_summary "$status" > "$logdir/span-summary.txt"
   165  }
   166  
   167  # Check Jaeger spans for the specified service.
   168  check_jaeger_status()
   169  {
   170  	local service="$1"
   171  	local min_spans="$2"
   172  	local logdir="$3"
   173  
   174  	[ -z "$service" ] && die "need jaeger service name"
   175  	[ -z "$min_spans" ] && die "need minimum trace span count"
   176  	[ -z "$logdir" ] && die "need logdir"
   177  
   178  	local status
   179  	local errors=0
   180  
   181  	info "Checking Jaeger status"
   182  
   183  	status=$(get_jaeger_status "$service" "$logdir")
   184  
   185  	#------------------------------
   186  	# Basic sanity checks
   187  	[ -z "$status" ] && die "failed to query status via HTTP"
   188  
   189  	local span_lines=$(echo "$status"|jq -S '.data[].spans | length')
   190  	[ -z "$span_lines" ] && die "no span status"
   191  
   192  	# Log the spans to allow for analysis in case the test fails
   193  	echo "$status"|jq -S . > "$logdir/${service}-traces-formatted.json"
   194  
   195  	local span_lines_count=$(echo "$span_lines"|wc -l)
   196  
   197  	# Total up all span counts
   198  	local spans=$(echo "$span_lines"|paste -sd+ -|bc)
   199  	[ -z "$spans" ] && die "no spans"
   200  
   201  	# Ensure total span count is numeric
   202  	echo "$spans"|grep -q "^[0-9][0-9]*$" || die "invalid span count: '$spans'"
   203  
   204  	info "found $spans spans (across $span_lines_count traces)"
   205  
   206  	# Validate
   207  	[ "$spans" -lt "$min_spans" ] && die "expected >= $min_spans spans, got $spans"
   208  
   209  	# Look for common errors in span data
   210  	local error_msg=$(echo "$status"|jq -S . 2>/dev/null|grep "invalid parent span" || true)
   211  
   212  	if [ -n "$error_msg" ]; then
   213  		errors=$((errors+1))
   214  		warn "Found invalid parent span errors: $error_msg"
   215  	else
   216  		errors=$((errors-1))
   217  		[ "$errors" -lt 0 ] && errors=0
   218  	fi
   219  
   220  	# Crude but it works
   221  	error_or_warning_msgs=$(echo "$status" |\
   222  		jq -S . 2>/dev/null |\
   223  		jq '.data[].spans[].warnings' |\
   224  		grep -E -v "\<null\>" |\
   225  		grep -E -v "\[" |\
   226  		grep -E -v "\]" |\
   227  		grep -E -v "clock skew" || true) # ignore clock skew error
   228  
   229  	if [ -n "$error_or_warning_msgs" ]; then
   230  		errors=$((errors+1))
   231  		warn "Found errors/warnings: $error_or_warning_msgs"
   232  	else
   233  		errors=$((errors-1))
   234  		[ "$errors" -lt 0 ] && errors=0
   235  	fi
   236  
   237  	[ "$errors" -eq 0 ] || die "errors detected"
   238  }
   239  
   240  setup()
   241  {
   242  	# containerd must be running in order to use ctr to generate traces
   243  	sudo systemctl restart containerd
   244  
   245  	start_jaeger
   246  
   247  	"${SCRIPT_PATH}/../.ci/configure_tracing_for_kata.sh" enable
   248  }
   249  
   250  run_test()
   251  {
   252  	local service="$1"
   253  	local min_spans="$2"
   254  	local logdir="$3"
   255  
   256  	[ -z "$service" ] && die "need service name"
   257  	[ -z "$min_spans" ] && die "need minimum span count"
   258  	[ -z "$logdir" ] && die "need logdir"
   259  
   260  	info "Running test for service '$service'"
   261  
   262  	logdir="$logdir/$service"
   263  	mkdir -p "$logdir"
   264  
   265  	check_jaeger_status "$service" "$min_spans" "$logdir"
   266  
   267  	info "test passed"
   268  }
   269  
   270  run_tests()
   271  {
   272  	# List of services to check
   273  	#
   274  	# Format: "name:min-spans"
   275  	#
   276  	# Where:
   277  	#
   278  	# - 'name' is the Jaeger service name.
   279  	# - 'min-spans' is an integer representing the minimum number of
   280  	#   trace spans this service should generate.
   281  	#
   282  	# Notes:
   283  	#
   284  	# - Uses an array to ensure predictable ordering.
   285  	# - All services listed are expected to generate traces
   286  	#   when create_traces() is called a single time.
   287  	local -a services
   288  
   289  	services+=("kata:50")
   290  
   291  	create_traces
   292  
   293  	logdir=$(mktemp -d)
   294  
   295  	for service in "${services[@]}"
   296  	do
   297  		local name=$(echo "${service}"|cut -d: -f1)
   298  		local min_spans=$(echo "${service}"|cut -d: -f2)
   299  
   300  		run_test "${name}" "${min_spans}" "${logdir}"
   301  	done
   302  
   303  	info "all tests passed"
   304  	success="true"
   305  }
   306  
   307  usage()
   308  {
   309  	cat <<EOT
   310  
   311  Usage: $script_name [<command>]
   312  
   313  Commands:
   314  
   315    clean  - Perform cleanup phase only.
   316    help   - Show usage.
   317    run    - Only run tests (no setup or cleanup).
   318    setup  - Perform setup phase only.
   319  
   320  Environment variables:
   321  
   322    CI    - if set, save logs of all tests to ${TRACE_LOG_DIR}.
   323    DEBUG - if set, enable tracing and do not cleanup after tests.
   324    DEBUG_KEEP_JAEGER - if set, do not shut down the Jaeger service.
   325  
   326  Notes:
   327    - Runs all test phases if no arguments are specified.
   328  
   329  EOT
   330  }
   331  
   332  main()
   333  {
   334  	local cmd="${1:-}"
   335  
   336  	case "$cmd" in
   337  		clean) success="true"; cleanup; exit 0;;
   338  		help|-h|-help|--help) usage; exit 0;;
   339  		run) run_tests; exit 0;;
   340  		setup) setup; exit 0;;
   341  	esac
   342  
   343  	trap cleanup EXIT
   344  
   345  	setup
   346  
   347  	run_tests
   348  }
   349  
   350  main "$@"