k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/experiment/kind-logs-e2e-k8s.sh

k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/experiment/kind-logs-e2e-k8s.sh (about)

     1  #!/bin/sh
     2  # Copyright 2018 The Kubernetes Authors.
     3  #
     4  # Licensed under the Apache License, Version 2.0 (the "License");
     5  # you may not use this file except in compliance with the License.
     6  # You may obtain a copy of the License at
     7  #
     8  #     http://www.apache.org/licenses/LICENSE-2.0
     9  #
    10  # Unless required by applicable law or agreed to in writing, software
    11  # distributed under the License is distributed on an "AS IS" BASIS,
    12  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  # See the License for the specific language governing permissions and
    14  # limitations under the License.
    15  
    16  # hack script for running a kind e2e
    17  # must be run with a kubernetes checkout in $PWD (IE from the checkout)
    18  # Usage: SKIP="ginkgo skip regex" FOCUS="ginkgo focus regex" kind-e2e.sh
    19  
    20  set -o errexit -o nounset -o xtrace
    21  
    22  # Settings:
    23  # SKIP: ginkgo skip regex
    24  # FOCUS: ginkgo focus regex
    25  # GA_ONLY: true  - limit to GA APIs/features as much as possible
    26  #          false - (default) APIs and features left at defaults
    27  # FEATURE_GATES:
    28  #          JSON or YAML encoding of a string/bool map: {"FeatureGateA": true, "FeatureGateB": false}
    29  #          Enables or disables feature gates in the entire cluster.
    30  #          Cannot be used when GA_ONLY=true.
    31  # RUNTIME_CONFIG:
    32  #          JSON or YAML encoding of a string/string (!) map: {"apia.example.com/v1alpha1": "true", "apib.example.com/v1beta1": "false"}
    33  #          Enables API groups in the apiserver via --runtime-config.
    34  #          Cannot be used when GA_ONLY=true.
    35  
    36  # cleanup logic for cleanup on exit
    37  CLEANED_UP=false
    38  cleanup() {
    39    if [ "$CLEANED_UP" = "true" ]; then
    40      return
    41    fi
    42    # KIND_CREATE_ATTEMPTED is true once we: kind create
    43    if [ "${KIND_CREATE_ATTEMPTED:-}" = true ]; then
    44      kind "export" logs "${ARTIFACTS}" || true
    45      kind delete cluster || true
    46    fi
    47    rm -f _output/bin/e2e.test || true
    48    # remove our tempdir, this needs to be last, or it will prevent kind delete
    49    if [ -n "${TMP_DIR:-}" ]; then
    50      rm -rf "${TMP_DIR:?}"
    51    fi
    52    CLEANED_UP=true
    53  }
    54  
    55  # setup signal handlers
    56  # shellcheck disable=SC2317 # this is not unreachable code
    57  signal_handler() {
    58    if [ -n "${GINKGO_PID:-}" ]; then
    59      kill -TERM "$GINKGO_PID" || true
    60    fi
    61    cleanup
    62  }
    63  trap signal_handler INT TERM
    64  
    65  # build kubernetes / node image, e2e binaries
    66  build() {
    67    # build the node image w/ kubernetes
    68    kind build node-image -v 1
    69    # Ginkgo v1 is used by Kubernetes 1.24 and earlier, fallback if v2 is not available.
    70    GINKGO_SRC_DIR="vendor/github.com/onsi/ginkgo/v2/ginkgo"
    71    if [ ! -d "$GINKGO_SRC_DIR" ]; then
    72        GINKGO_SRC_DIR="vendor/github.com/onsi/ginkgo/ginkgo"
    73    fi
    74    # make sure we have e2e requirements
    75    make all WHAT="cmd/kubectl test/e2e/e2e.test ${GINKGO_SRC_DIR}"
    76  }
    77  
    78  check_structured_log_support() {
    79  	case "${KUBE_VERSION}" in
    80  		v1.1[0-8].*)
    81  			echo "$1 is only supported on versions >= v1.19, got ${KUBE_VERSION}"
    82  			exit 1
    83  			;;
    84  	esac
    85  }
    86  
    87  # up a cluster with kind
    88  create_cluster() {
    89    # Grab the version of the cluster we're about to start
    90    KUBE_VERSION="$(docker run --rm --entrypoint=cat "kindest/node:latest" /kind/version)"
    91  
    92    # Default Log level for all components in test clusters
    93    KIND_CLUSTER_LOG_LEVEL=${KIND_CLUSTER_LOG_LEVEL:-4}
    94  
    95    # potentially enable --logging-format
    96    CLUSTER_LOG_FORMAT=${CLUSTER_LOG_FORMAT:-}
    97    scheduler_extra_args="      \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\""
    98    controllerManager_extra_args="      \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\""
    99    apiServer_extra_args="      \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\""
   100    if [ -n "$CLUSTER_LOG_FORMAT" ]; then
   101        check_structured_log_support "CLUSTER_LOG_FORMAT"
   102        scheduler_extra_args="${scheduler_extra_args}
   103        \"logging-format\": \"${CLUSTER_LOG_FORMAT}\""
   104        controllerManager_extra_args="${controllerManager_extra_args}
   105        \"logging-format\": \"${CLUSTER_LOG_FORMAT}\""
   106        apiServer_extra_args="${apiServer_extra_args}
   107        \"logging-format\": \"${CLUSTER_LOG_FORMAT}\""
   108    fi
   109    kubelet_extra_args="      \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"
   110        \"container-log-max-size\": 100Mi"
   111    KUBELET_LOG_FORMAT=${KUBELET_LOG_FORMAT:-$CLUSTER_LOG_FORMAT}
   112    if [ -n "$KUBELET_LOG_FORMAT" ]; then
   113        check_structured_log_support "KUBECTL_LOG_FORMAT"
   114        kubelet_extra_args="${kubelet_extra_args}
   115        \"logging-format\": \"${KUBELET_LOG_FORMAT}\""
   116    fi
   117  
   118    # JSON or YAML map injected into featureGates config
   119    feature_gates="${FEATURE_GATES:-{\}}"
   120    # --runtime-config argument value passed to the API server, again as a map
   121    runtime_config="${RUNTIME_CONFIG:-{\}}"
   122  
   123    case "${GA_ONLY:-false}" in
   124    false)
   125      :
   126      ;;
   127    true)
   128      if [ "${feature_gates}" != "{}" ]; then
   129        echo "GA_ONLY=true and FEATURE_GATES=${feature_gates} are mutually exclusive."
   130        exit 1
   131      fi
   132      if [ "${runtime_config}" != "{}" ]; then
   133        echo "GA_ONLY=true and RUNTIME_CONFIG=${runtime_config} are mutually exclusive."
   134        exit 1
   135      fi
   136  
   137      echo "Limiting to GA APIs and features for ${KUBE_VERSION}"
   138      feature_gates='{"AllAlpha":false,"AllBeta":false}'
   139      runtime_config='{"api/alpha":"false", "api/beta":"false"}'
   140      ;;
   141    *)
   142      echo "\$GA_ONLY set to '${GA_ONLY}'; supported values are true and false (default)"
   143      exit 1
   144      ;;
   145    esac
   146  
   147    # create the config file
   148    cat <<EOF > "${ARTIFACTS}/kind-config.yaml"
   149  # config for 1 control plane node and 2 workers (necessary for conformance)
   150  kind: Cluster
   151  apiVersion: kind.x-k8s.io/v1alpha4
   152  networking:
   153    ipFamily: ${IP_FAMILY:-ipv4}
   154    kubeProxyMode: ${KUBE_PROXY_MODE:-iptables}
   155    # don't pass through host search paths
   156    # TODO: possibly a reasonable default in the future for kind ...
   157    dnsSearch: []
   158  nodes:
   159  - role: control-plane
   160  - role: worker
   161  - role: worker
   162  featureGates: ${feature_gates}
   163  runtimeConfig: ${runtime_config}
   164  kubeadmConfigPatches:
   165  - |
   166    kind: ClusterConfiguration
   167    metadata:
   168      name: config
   169    apiServer:
   170      extraArgs:
   171  ${apiServer_extra_args}
   172    controllerManager:
   173      extraArgs:
   174  ${controllerManager_extra_args}
   175    scheduler:
   176      extraArgs:
   177  ${scheduler_extra_args}
   178    ---
   179    kind: InitConfiguration
   180    nodeRegistration:
   181      kubeletExtraArgs:
   182  ${kubelet_extra_args}
   183    ---
   184    kind: JoinConfiguration
   185    nodeRegistration:
   186      kubeletExtraArgs:
   187  ${kubelet_extra_args}
   188  EOF
   189    # NOTE: must match the number of workers above
   190    NUM_NODES=2
   191    # actually create the cluster
   192    # TODO(BenTheElder): settle on verbosity for this script
   193    KIND_CREATE_ATTEMPTED=true
   194    kind create cluster \
   195      --image=kindest/node:latest \
   196      --retain \
   197      --wait=1m \
   198      -v=3 \
   199      "--config=${ARTIFACTS}/kind-config.yaml"
   200  
   201    # debug cluster version
   202    kubectl version
   203  
   204    # Patch kube-proxy to set the verbosity level
   205    kubectl patch -n kube-system daemonset/kube-proxy \
   206      --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/command/-", "value": "--v='"${KIND_CLUSTER_LOG_LEVEL}"'" }]'
   207  }
   208  
   209  # run e2es with ginkgo-e2e.sh
   210  run_tests() {
   211    # IPv6 clusters need some CoreDNS changes in order to work in k8s CI:
   212    # 1. k8s CI doesn´t offer IPv6 connectivity, so CoreDNS should be configured
   213    # to work in an offline environment:
   214    # https://github.com/coredns/coredns/issues/2494#issuecomment-457215452
   215    # 2. k8s CI adds following domains to resolv.conf search field:
   216    # c.k8s-prow-builds.internal google.internal.
   217    # CoreDNS should handle those domains and answer with NXDOMAIN instead of SERVFAIL
   218    # otherwise pods stops trying to resolve the domain.
   219    if [ "${IP_FAMILY:-ipv4}" = "ipv6" ]; then
   220      # Get the current config
   221      original_coredns=$(kubectl get -oyaml -n=kube-system configmap/coredns)
   222      echo "Original CoreDNS config:"
   223      echo "${original_coredns}"
   224      # Patch it
   225      fixed_coredns=$(
   226        printf '%s' "${original_coredns}" | sed \
   227          -e 's/^.*kubernetes cluster\.local/& internal/' \
   228          -e '/^.*upstream$/d' \
   229          -e '/^.*fallthrough.*$/d' \
   230          -e '/^.*forward . \/etc\/resolv.conf$/d' \
   231          -e '/^.*loop$/d' \
   232      )
   233      echo "Patched CoreDNS config:"
   234      echo "${fixed_coredns}"
   235      printf '%s' "${fixed_coredns}" | kubectl apply -f -
   236    fi
   237  
   238    # ginkgo regexes
   239    SKIP="${SKIP:-}"
   240    FOCUS="${FOCUS:-"\\[Conformance\\]"}"
   241    # if we set PARALLEL=true, skip serial tests set --ginkgo-parallel
   242    if [ "${PARALLEL:-false}" = "true" ]; then
   243      export GINKGO_PARALLEL=y
   244      if [ -z "${SKIP}" ]; then
   245        SKIP="\\[Serial\\]"
   246      else
   247        SKIP="\\[Serial\\]|${SKIP}"
   248      fi
   249    fi
   250  
   251    # setting this env prevents ginkgo e2e from trying to run provider setup
   252    export KUBERNETES_CONFORMANCE_TEST='y'
   253    # setting these is required to make RuntimeClass tests work ... :/
   254    export KUBE_CONTAINER_RUNTIME=remote
   255    export KUBE_CONTAINER_RUNTIME_ENDPOINT=unix:///run/containerd/containerd.sock
   256    export KUBE_CONTAINER_RUNTIME_NAME=containerd
   257    # ginkgo can take forever to exit, so we run it in the background and save the
   258    # PID, bash will not run traps while waiting on a process, but it will while
   259    # running a builtin like `wait`, saving the PID also allows us to forward the
   260    # interrupt
   261    ./hack/ginkgo-e2e.sh \
   262      '--provider=skeleton' "--num-nodes=${NUM_NODES}" \
   263      "--ginkgo.focus=${FOCUS}" "--ginkgo.skip=${SKIP}" \
   264      "--report-dir=${ARTIFACTS}" '--disable-log-dump=true' &
   265    GINKGO_PID=$!
   266    wait "$GINKGO_PID"
   267  }
   268  
   269  prune_kind_logs() {
   270    # The logs for the pods are the same as for the containers. k/k/test/integration/logs/benchmark/get-logs.sh
   271    # only looks for the container logs.
   272    rm -rf "${ARTIFACTS}"/kind-*/pods
   273    # One kubelet log is enough. get-logs.sh uses kind-worker.
   274    rm -f "${ARTIFACTS}"/kind-control-plane/kubelet.log "${ARTIFACTS}"/kind-worker[0-9]*/kubelet.log
   275    # The journal is large and only useful for debugging cluster startup. Let's
   276    # assume that we don't need it.
   277    rm -f "${ARTIFACTS}"/kind-*/journal.log
   278  
   279    # Above we allow individual files to be as large as 100Mi before
   280    # kubelet rotates them. Because "kind export logs" only copies
   281    # the current log, each file is smaller than 100Mi. In practice,
   282    # files are smaller. As a safeguard we fail the job here if the
   283    # overall log data exceeds 300Mi.
   284    total="$(du -b -s -c "${ARTIFACTS}"/kind-* | tail -1 | sed -e 's/\s*total//')"
   285    limit=$((300 * 1024 * 1024 ))
   286    if [ "$total" -gt "$limit" ]; then
   287        echo "ERROR: Total amount of data in <ARTIFACTS>/kind-* is $total bytes, which is more than the limit of $limit. Try reducing verbosity or number of tests."
   288        return 1
   289    fi
   290  }
   291  
   292  main() {
   293    # create temp dir and setup cleanup
   294    TMP_DIR=$(mktemp -d)
   295  
   296    # ensure artifacts (results) directory exists when not in CI
   297    export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}"
   298    mkdir -p "${ARTIFACTS}"
   299  
   300    # export the KUBECONFIG to a unique path for testing
   301    KUBECONFIG="${HOME}/.kube/kind-test-config"
   302    export KUBECONFIG
   303    echo "exported KUBECONFIG=${KUBECONFIG}"
   304  
   305    # debug kind version
   306    kind version
   307  
   308    # build kubernetes
   309    build
   310    # in CI attempt to release some memory after building
   311    if [ -n "${KUBETEST_IN_DOCKER:-}" ]; then
   312      sync || true
   313      echo 1 > /proc/sys/vm/drop_caches || true
   314    fi
   315  
   316    # create the cluster and run tests
   317    res=0
   318    create_cluster || res=$?
   319    run_tests || res=$?
   320    cleanup || res=$?
   321    prune_kind_logs || res=$?
   322    exit $res
   323  }
   324  
   325  main