k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/experiment/kind-logs-e2e-k8s.sh (about) 1 #!/bin/sh 2 # Copyright 2018 The Kubernetes Authors. 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 16 # hack script for running a kind e2e 17 # must be run with a kubernetes checkout in $PWD (IE from the checkout) 18 # Usage: SKIP="ginkgo skip regex" FOCUS="ginkgo focus regex" kind-e2e.sh 19 20 set -o errexit -o nounset -o xtrace 21 22 # Settings: 23 # SKIP: ginkgo skip regex 24 # FOCUS: ginkgo focus regex 25 # GA_ONLY: true - limit to GA APIs/features as much as possible 26 # false - (default) APIs and features left at defaults 27 # FEATURE_GATES: 28 # JSON or YAML encoding of a string/bool map: {"FeatureGateA": true, "FeatureGateB": false} 29 # Enables or disables feature gates in the entire cluster. 30 # Cannot be used when GA_ONLY=true. 31 # RUNTIME_CONFIG: 32 # JSON or YAML encoding of a string/string (!) map: {"apia.example.com/v1alpha1": "true", "apib.example.com/v1beta1": "false"} 33 # Enables API groups in the apiserver via --runtime-config. 34 # Cannot be used when GA_ONLY=true. 35 36 # cleanup logic for cleanup on exit 37 CLEANED_UP=false 38 cleanup() { 39 if [ "$CLEANED_UP" = "true" ]; then 40 return 41 fi 42 # KIND_CREATE_ATTEMPTED is true once we: kind create 43 if [ "${KIND_CREATE_ATTEMPTED:-}" = true ]; then 44 kind "export" logs "${ARTIFACTS}" || true 45 kind delete cluster || true 46 fi 47 rm -f _output/bin/e2e.test || true 48 # remove our tempdir, this needs to be last, or it will prevent kind delete 49 if [ -n "${TMP_DIR:-}" ]; then 50 rm -rf "${TMP_DIR:?}" 51 fi 52 CLEANED_UP=true 53 } 54 55 # setup signal handlers 56 # shellcheck disable=SC2317 # this is not unreachable code 57 signal_handler() { 58 if [ -n "${GINKGO_PID:-}" ]; then 59 kill -TERM "$GINKGO_PID" || true 60 fi 61 cleanup 62 } 63 trap signal_handler INT TERM 64 65 # build kubernetes / node image, e2e binaries 66 build() { 67 # build the node image w/ kubernetes 68 kind build node-image -v 1 69 # Ginkgo v1 is used by Kubernetes 1.24 and earlier, fallback if v2 is not available. 70 GINKGO_SRC_DIR="vendor/github.com/onsi/ginkgo/v2/ginkgo" 71 if [ ! -d "$GINKGO_SRC_DIR" ]; then 72 GINKGO_SRC_DIR="vendor/github.com/onsi/ginkgo/ginkgo" 73 fi 74 # make sure we have e2e requirements 75 make all WHAT="cmd/kubectl test/e2e/e2e.test ${GINKGO_SRC_DIR}" 76 } 77 78 check_structured_log_support() { 79 case "${KUBE_VERSION}" in 80 v1.1[0-8].*) 81 echo "$1 is only supported on versions >= v1.19, got ${KUBE_VERSION}" 82 exit 1 83 ;; 84 esac 85 } 86 87 # up a cluster with kind 88 create_cluster() { 89 # Grab the version of the cluster we're about to start 90 KUBE_VERSION="$(docker run --rm --entrypoint=cat "kindest/node:latest" /kind/version)" 91 92 # Default Log level for all components in test clusters 93 KIND_CLUSTER_LOG_LEVEL=${KIND_CLUSTER_LOG_LEVEL:-4} 94 95 # potentially enable --logging-format 96 CLUSTER_LOG_FORMAT=${CLUSTER_LOG_FORMAT:-} 97 scheduler_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" 98 controllerManager_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" 99 apiServer_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" 100 if [ -n "$CLUSTER_LOG_FORMAT" ]; then 101 check_structured_log_support "CLUSTER_LOG_FORMAT" 102 scheduler_extra_args="${scheduler_extra_args} 103 \"logging-format\": \"${CLUSTER_LOG_FORMAT}\"" 104 controllerManager_extra_args="${controllerManager_extra_args} 105 \"logging-format\": \"${CLUSTER_LOG_FORMAT}\"" 106 apiServer_extra_args="${apiServer_extra_args} 107 \"logging-format\": \"${CLUSTER_LOG_FORMAT}\"" 108 fi 109 kubelet_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\" 110 \"container-log-max-size\": 100Mi" 111 KUBELET_LOG_FORMAT=${KUBELET_LOG_FORMAT:-$CLUSTER_LOG_FORMAT} 112 if [ -n "$KUBELET_LOG_FORMAT" ]; then 113 check_structured_log_support "KUBECTL_LOG_FORMAT" 114 kubelet_extra_args="${kubelet_extra_args} 115 \"logging-format\": \"${KUBELET_LOG_FORMAT}\"" 116 fi 117 118 # JSON or YAML map injected into featureGates config 119 feature_gates="${FEATURE_GATES:-{\}}" 120 # --runtime-config argument value passed to the API server, again as a map 121 runtime_config="${RUNTIME_CONFIG:-{\}}" 122 123 case "${GA_ONLY:-false}" in 124 false) 125 : 126 ;; 127 true) 128 if [ "${feature_gates}" != "{}" ]; then 129 echo "GA_ONLY=true and FEATURE_GATES=${feature_gates} are mutually exclusive." 130 exit 1 131 fi 132 if [ "${runtime_config}" != "{}" ]; then 133 echo "GA_ONLY=true and RUNTIME_CONFIG=${runtime_config} are mutually exclusive." 134 exit 1 135 fi 136 137 echo "Limiting to GA APIs and features for ${KUBE_VERSION}" 138 feature_gates='{"AllAlpha":false,"AllBeta":false}' 139 runtime_config='{"api/alpha":"false", "api/beta":"false"}' 140 ;; 141 *) 142 echo "\$GA_ONLY set to '${GA_ONLY}'; supported values are true and false (default)" 143 exit 1 144 ;; 145 esac 146 147 # create the config file 148 cat <<EOF > "${ARTIFACTS}/kind-config.yaml" 149 # config for 1 control plane node and 2 workers (necessary for conformance) 150 kind: Cluster 151 apiVersion: kind.x-k8s.io/v1alpha4 152 networking: 153 ipFamily: ${IP_FAMILY:-ipv4} 154 kubeProxyMode: ${KUBE_PROXY_MODE:-iptables} 155 # don't pass through host search paths 156 # TODO: possibly a reasonable default in the future for kind ... 157 dnsSearch: [] 158 nodes: 159 - role: control-plane 160 - role: worker 161 - role: worker 162 featureGates: ${feature_gates} 163 runtimeConfig: ${runtime_config} 164 kubeadmConfigPatches: 165 - | 166 kind: ClusterConfiguration 167 metadata: 168 name: config 169 apiServer: 170 extraArgs: 171 ${apiServer_extra_args} 172 controllerManager: 173 extraArgs: 174 ${controllerManager_extra_args} 175 scheduler: 176 extraArgs: 177 ${scheduler_extra_args} 178 --- 179 kind: InitConfiguration 180 nodeRegistration: 181 kubeletExtraArgs: 182 ${kubelet_extra_args} 183 --- 184 kind: JoinConfiguration 185 nodeRegistration: 186 kubeletExtraArgs: 187 ${kubelet_extra_args} 188 EOF 189 # NOTE: must match the number of workers above 190 NUM_NODES=2 191 # actually create the cluster 192 # TODO(BenTheElder): settle on verbosity for this script 193 KIND_CREATE_ATTEMPTED=true 194 kind create cluster \ 195 --image=kindest/node:latest \ 196 --retain \ 197 --wait=1m \ 198 -v=3 \ 199 "--config=${ARTIFACTS}/kind-config.yaml" 200 201 # debug cluster version 202 kubectl version 203 204 # Patch kube-proxy to set the verbosity level 205 kubectl patch -n kube-system daemonset/kube-proxy \ 206 --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/command/-", "value": "--v='"${KIND_CLUSTER_LOG_LEVEL}"'" }]' 207 } 208 209 # run e2es with ginkgo-e2e.sh 210 run_tests() { 211 # IPv6 clusters need some CoreDNS changes in order to work in k8s CI: 212 # 1. k8s CI doesn´t offer IPv6 connectivity, so CoreDNS should be configured 213 # to work in an offline environment: 214 # https://github.com/coredns/coredns/issues/2494#issuecomment-457215452 215 # 2. k8s CI adds following domains to resolv.conf search field: 216 # c.k8s-prow-builds.internal google.internal. 217 # CoreDNS should handle those domains and answer with NXDOMAIN instead of SERVFAIL 218 # otherwise pods stops trying to resolve the domain. 219 if [ "${IP_FAMILY:-ipv4}" = "ipv6" ]; then 220 # Get the current config 221 original_coredns=$(kubectl get -oyaml -n=kube-system configmap/coredns) 222 echo "Original CoreDNS config:" 223 echo "${original_coredns}" 224 # Patch it 225 fixed_coredns=$( 226 printf '%s' "${original_coredns}" | sed \ 227 -e 's/^.*kubernetes cluster\.local/& internal/' \ 228 -e '/^.*upstream$/d' \ 229 -e '/^.*fallthrough.*$/d' \ 230 -e '/^.*forward . \/etc\/resolv.conf$/d' \ 231 -e '/^.*loop$/d' \ 232 ) 233 echo "Patched CoreDNS config:" 234 echo "${fixed_coredns}" 235 printf '%s' "${fixed_coredns}" | kubectl apply -f - 236 fi 237 238 # ginkgo regexes 239 SKIP="${SKIP:-}" 240 FOCUS="${FOCUS:-"\\[Conformance\\]"}" 241 # if we set PARALLEL=true, skip serial tests set --ginkgo-parallel 242 if [ "${PARALLEL:-false}" = "true" ]; then 243 export GINKGO_PARALLEL=y 244 if [ -z "${SKIP}" ]; then 245 SKIP="\\[Serial\\]" 246 else 247 SKIP="\\[Serial\\]|${SKIP}" 248 fi 249 fi 250 251 # setting this env prevents ginkgo e2e from trying to run provider setup 252 export KUBERNETES_CONFORMANCE_TEST='y' 253 # setting these is required to make RuntimeClass tests work ... :/ 254 export KUBE_CONTAINER_RUNTIME=remote 255 export KUBE_CONTAINER_RUNTIME_ENDPOINT=unix:///run/containerd/containerd.sock 256 export KUBE_CONTAINER_RUNTIME_NAME=containerd 257 # ginkgo can take forever to exit, so we run it in the background and save the 258 # PID, bash will not run traps while waiting on a process, but it will while 259 # running a builtin like `wait`, saving the PID also allows us to forward the 260 # interrupt 261 ./hack/ginkgo-e2e.sh \ 262 '--provider=skeleton' "--num-nodes=${NUM_NODES}" \ 263 "--ginkgo.focus=${FOCUS}" "--ginkgo.skip=${SKIP}" \ 264 "--report-dir=${ARTIFACTS}" '--disable-log-dump=true' & 265 GINKGO_PID=$! 266 wait "$GINKGO_PID" 267 } 268 269 prune_kind_logs() { 270 # The logs for the pods are the same as for the containers. k/k/test/integration/logs/benchmark/get-logs.sh 271 # only looks for the container logs. 272 rm -rf "${ARTIFACTS}"/kind-*/pods 273 # One kubelet log is enough. get-logs.sh uses kind-worker. 274 rm -f "${ARTIFACTS}"/kind-control-plane/kubelet.log "${ARTIFACTS}"/kind-worker[0-9]*/kubelet.log 275 # The journal is large and only useful for debugging cluster startup. Let's 276 # assume that we don't need it. 277 rm -f "${ARTIFACTS}"/kind-*/journal.log 278 279 # Above we allow individual files to be as large as 100Mi before 280 # kubelet rotates them. Because "kind export logs" only copies 281 # the current log, each file is smaller than 100Mi. In practice, 282 # files are smaller. As a safeguard we fail the job here if the 283 # overall log data exceeds 300Mi. 284 total="$(du -b -s -c "${ARTIFACTS}"/kind-* | tail -1 | sed -e 's/\s*total//')" 285 limit=$((300 * 1024 * 1024 )) 286 if [ "$total" -gt "$limit" ]; then 287 echo "ERROR: Total amount of data in <ARTIFACTS>/kind-* is $total bytes, which is more than the limit of $limit. Try reducing verbosity or number of tests." 288 return 1 289 fi 290 } 291 292 main() { 293 # create temp dir and setup cleanup 294 TMP_DIR=$(mktemp -d) 295 296 # ensure artifacts (results) directory exists when not in CI 297 export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}" 298 mkdir -p "${ARTIFACTS}" 299 300 # export the KUBECONFIG to a unique path for testing 301 KUBECONFIG="${HOME}/.kube/kind-test-config" 302 export KUBECONFIG 303 echo "exported KUBECONFIG=${KUBECONFIG}" 304 305 # debug kind version 306 kind version 307 308 # build kubernetes 309 build 310 # in CI attempt to release some memory after building 311 if [ -n "${KUBETEST_IN_DOCKER:-}" ]; then 312 sync || true 313 echo 1 > /proc/sys/vm/drop_caches || true 314 fi 315 316 # create the cluster and run tests 317 res=0 318 create_cluster || res=$? 319 run_tests || res=$? 320 cleanup || res=$? 321 prune_kind_logs || res=$? 322 exit $res 323 } 324 325 main