github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/tools/migrate-ingester-statefulsets.sh (about) 1 #!/bin/bash 2 3 # Exit on any problems. 4 set -e 5 6 if [[ $# -lt 4 ]]; then 7 echo "Usage: $0 <namespace> <from_statefulset> <to_statefulset> <instances_to_downscale | 'all'>" 8 exit 1 9 fi 10 11 NAMESPACE=$1 12 DOWNSCALE_STATEFULSET=$2 13 UPSCALE_STATEFULSET=$3 14 INSTANCES_TO_DOWNSCALE=$4 15 16 DOWNSCALE_REPLICA_SIZE=$(kubectl get statefulset "$DOWNSCALE_STATEFULSET" -o 'jsonpath={.spec.replicas}' --namespace="$NAMESPACE") 17 UPSCALE_REPLICA_SIZE=$(kubectl get statefulset "$UPSCALE_STATEFULSET" -o 'jsonpath={.spec.replicas}' --namespace="$NAMESPACE") 18 19 if [[ "$INSTANCES_TO_DOWNSCALE" = "all" ]]; then 20 INSTANCES_TO_DOWNSCALE=$DOWNSCALE_REPLICA_SIZE 21 fi 22 23 echo "Going to downscale $NAMESPACE/$DOWNSCALE_STATEFULSET and upscale $NAMESPACE/$UPSCALE_STATEFULSET by $INSTANCES_TO_DOWNSCALE instances" 24 25 while [[ $INSTANCES_TO_DOWNSCALE -gt 0 ]]; do 26 echo "----------------------------------------" 27 echo "$(date): Scaling UP $UPSCALE_STATEFULSET to $((UPSCALE_REPLICA_SIZE + 1))" 28 # Scale up 29 kubectl scale statefulset "$UPSCALE_STATEFULSET" --namespace="$NAMESPACE" --current-replicas="$UPSCALE_REPLICA_SIZE" --replicas=$((UPSCALE_REPLICA_SIZE + 1)) 30 kubectl rollout status statefulset "$UPSCALE_STATEFULSET" --namespace="$NAMESPACE" --timeout=30m 31 UPSCALE_REPLICA_SIZE=$((UPSCALE_REPLICA_SIZE + 1)) 32 33 # Call /shutdown on the pod manually, so that it has enough time to flush chunks. By doing standard termination, pod may not have enough time. 34 # Wget is special BusyBox version. -T allows it to wait for 30m for shutdown to complete. 35 POD_TO_SHUTDOWN=$DOWNSCALE_STATEFULSET-$((DOWNSCALE_REPLICA_SIZE - 1)) 36 37 echo "$(date): Triggering flush on $POD_TO_SHUTDOWN" 38 39 # wget (BusyBox version) will fail, but we don't care ... important thing is that it has triggered shutdown. 40 # -T causes wget to wait only 5 seconds, otherwise /shutdown takes a long time. 41 # Preferably we would wait for /shutdown to return, but unfortunately that doesn't work (even with big timeout), wget complains with weird error. 42 kubectl exec "$POD_TO_SHUTDOWN" --namespace="$NAMESPACE" -- wget -T 5 http://localhost:80/shutdown >/dev/null 2>/dev/null || true 43 44 # While request to /shutdown completes only after flushing has finished, it unfortunately returns 204 status code, 45 # which confuses wget. That is the reason why instead of waiting for /shutdown to complete, this script waits for 46 # specific log messages to appear in the log file that signal start/end of data flushing. 47 if kubectl logs -f "$POD_TO_SHUTDOWN" --namespace="$NAMESPACE" | grep -E -q "starting to flush all the chunks|starting to flush and ship TSDB blocks"; then 48 echo "$(date): Flushing started" 49 else 50 echo "$(date): Flushing not started? Check logs for pod $POD_TO_SHUTDOWN" 51 exit 1 52 fi 53 54 if kubectl logs -f "$POD_TO_SHUTDOWN" --namespace="$NAMESPACE" | grep -E -q "flushing of chunks complete|finished flushing and shipping TSDB blocks"; then 55 echo "$(date): Flushing complete" 56 else 57 echo "$(date): Failed to flush? Check logs for pod $POD_TO_SHUTDOWN" 58 exit 1 59 fi 60 61 echo 62 63 echo "$(date): Scaling DOWN $DOWNSCALE_STATEFULSET to $((DOWNSCALE_REPLICA_SIZE - 1))" 64 kubectl scale statefulset "$DOWNSCALE_STATEFULSET" --namespace="$NAMESPACE" --current-replicas="$DOWNSCALE_REPLICA_SIZE" --replicas=$((DOWNSCALE_REPLICA_SIZE - 1)) 65 kubectl rollout status statefulset "$DOWNSCALE_STATEFULSET" --namespace="$NAMESPACE" --timeout=30m 66 DOWNSCALE_REPLICA_SIZE=$((DOWNSCALE_REPLICA_SIZE - 1)) 67 68 INSTANCES_TO_DOWNSCALE=$((INSTANCES_TO_DOWNSCALE - 1)) 69 70 echo "----------------------------------------" 71 echo 72 done