github.com/eth-easl/loader@v0.0.0-20230908084258-8a37e1d94279/scripts/setup/create_multinode.sh

github.com/eth-easl/loader@v0.0.0-20230908084258-8a37e1d94279/scripts/setup/create_multinode.sh (about)

     1  #!/usr/bin/env bash
     2  
     3  #
     4  # MIT License
     5  #
     6  # Copyright (c) 2023 EASL and the vHive community
     7  #
     8  # Permission is hereby granted, free of charge, to any person obtaining a copy
     9  # of this software and associated documentation files (the "Software"), to deal
    10  # in the Software without restriction, including without limitation the rights
    11  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12  # copies of the Software, and to permit persons to whom the Software is
    13  # furnished to do so, subject to the following conditions:
    14  #
    15  # The above copyright notice and this permission notice shall be included in all
    16  # copies or substantial portions of the Software.
    17  #
    18  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19  # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20  # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    21  # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    24  # SOFTWARE.
    25  #
    26  
    27  MASTER_NODE=$1
    28  DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)"
    29  
    30  source "$DIR/setup.cfg"
    31  
    32  if [ "$CLUSTER_MODE" = "container" ]
    33  then
    34      OPERATION_MODE="stock-only"
    35      FIRECRACKER_SNAPSHOTS=""
    36  elif [ $CLUSTER_MODE = "firecracker" ]
    37  then
    38      OPERATION_MODE=""
    39      FIRECRACKER_SNAPSHOTS=""
    40  elif [ $CLUSTER_MODE = "firecracker_snapshots" ]
    41  then
    42      OPERATION_MODE=""
    43      FIRECRACKER_SNAPSHOTS="-snapshots"
    44  else
    45      echo "Unsupported cluster mode"
    46      exit 1
    47  fi
    48  
    49  if [ $PODS_PER_NODE -gt 1022 ]; then
    50      # CIDR range limitation exceeded
    51      echo "Pods per node cannot be greater than 1022. Cluster deployment has been aborted."
    52      exit 1
    53  fi
    54  
    55  server_exec() {
    56      ssh -oStrictHostKeyChecking=no -p 22 "$1" "$2";
    57  }
    58  
    59  common_init() {
    60      internal_init() {
    61          server_exec $1 "git clone --branch=$VHIVE_BRANCH https://github.com/ease-lab/vhive"
    62          server_exec $1 "cd; ./vhive/scripts/cloudlab/setup_node.sh $OPERATION_MODE"
    63          server_exec $1 'tmux new -s containerd -d'
    64          server_exec $1 'tmux send -t containerd "sudo containerd 2>&1 | tee ~/containerd_log.txt" ENTER'
    65          # install precise NTP clock synchronizer
    66          server_exec $1 'sudo apt-get update && sudo apt-get install -y chrony htop sysstat'
    67          # synchronize clock across nodes
    68          server_exec $1 "sudo chronyd -q \"server ops.emulab.net iburst\""
    69          # dump clock info
    70          server_exec $1 'sudo chronyc tracking'
    71          # stabilize the node
    72          server_exec $1 './vhive/scripts/stabilize.sh'
    73      }
    74  
    75      for node in "$@"
    76      do
    77          internal_init "$node" &
    78      done
    79  
    80      wait
    81  }
    82  
    83  function setup_master() {
    84      echo "Setting up master node: $MASTER_NODE"
    85  
    86      server_exec "$MASTER_NODE" 'wget -q https://go.dev/dl/go1.19.4.linux-amd64.tar.gz >/dev/null'
    87      server_exec "$MASTER_NODE" 'sudo rm -rf /usr/local/go && sudo tar -C /usr/local/ -xzf go1.19.4.linux-amd64.tar.gz >/dev/null'
    88      server_exec "$MASTER_NODE" 'echo "export PATH=$PATH:/usr/local/go/bin" >> .profile'
    89  
    90      server_exec "$MASTER_NODE" 'tmux new -s runner -d'
    91      server_exec "$MASTER_NODE" 'tmux new -s kwatch -d'
    92      server_exec "$MASTER_NODE" 'tmux new -s master -d'
    93  
    94      # Setup Github authentication
    95      ACCESS_TOKEN="$(cat $GITHUB_TOKEN)"
    96  
    97      server_exec $MASTER_NODE 'echo -en "\n\n" | ssh-keygen -t rsa'
    98      server_exec $MASTER_NODE 'ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts'
    99      server_exec $MASTER_NODE 'curl -H "Authorization: token '"$ACCESS_TOKEN"'" --data "{\"title\":\"'"key:\$(hostname)"'\",\"key\":\"'"\$(cat ~/.ssh/id_rsa.pub)"'\"}" https://api.github.com/user/keys'
   100  
   101      clone_loader $MASTER_NODE
   102  
   103      MN_CLUSTER="./vhive/scripts/cluster/create_multinode_cluster.sh ${OPERATION_MODE}"
   104      server_exec "$MASTER_NODE" "tmux send -t master \"$MN_CLUSTER\" ENTER"
   105  
   106      # Get the join token from k8s.
   107      while [ ! "$LOGIN_TOKEN" ]
   108      do
   109          sleep 1
   110          server_exec "$MASTER_NODE" 'tmux capture-pane -t master -b token'
   111          LOGIN_TOKEN="$(server_exec "$MASTER_NODE" 'tmux show-buffer -b token | grep -B 3 "All nodes need to be joined"')"
   112          echo "$LOGIN_TOKEN"
   113      done
   114      # cut of last line
   115      LOGIN_TOKEN=${LOGIN_TOKEN%[$'\t\r\n']*}
   116      # remove the \
   117      LOGIN_TOKEN=${LOGIN_TOKEN/\\/}
   118      # remove all remaining tabs, line ends and returns
   119      LOGIN_TOKEN=${LOGIN_TOKEN//[$'\t\r\n']}
   120  }
   121  
   122  function setup_loader() {
   123      echo "Setting up loader/monitoring node: $1"
   124  
   125      server_exec "$1" 'wget -q https://go.dev/dl/go1.20.5.linux-amd64.tar.gz >/dev/null'
   126      server_exec "$1" 'sudo rm -rf /usr/local/go && sudo tar -C /usr/local/ -xzf go1.20.5.linux-amd64.tar.gz >/dev/null'
   127      server_exec "$1" 'echo "export PATH=$PATH:/usr/local/go/bin" >> .profile'
   128  }
   129  
   130  function setup_vhive_firecracker_daemon() {
   131      node=$1
   132  
   133      server_exec $node 'cd vhive; source /etc/profile && go build'
   134      server_exec $node 'tmux new -s firecracker -d'
   135      server_exec $node 'tmux send -t firecracker "sudo PATH=$PATH /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 2>&1 | tee ~/firecracker_log.txt" ENTER'
   136      server_exec $node 'tmux new -s vhive -d'
   137      server_exec $node 'tmux send -t vhive "cd vhive" ENTER'
   138      RUN_VHIVE_CMD="sudo ./vhive ${FIRECRACKER_SNAPSHOTS} 2>&1 | tee ~/vhive_log.txt"
   139      server_exec $node "tmux send -t vhive \"$RUN_VHIVE_CMD\" ENTER"
   140  }
   141  
   142  function setup_workers() {
   143      internal_setup() {
   144          node=$1
   145  
   146          echo "Setting up worker node: $node"
   147          server_exec $node "./vhive/scripts/cluster/setup_worker_kubelet.sh $OPERATION_MODE"
   148  
   149          if [ "$OPERATION_MODE" = "" ]; then
   150              setup_vhive_firecracker_daemon $node
   151          fi
   152  
   153          server_exec $node "sudo ${LOGIN_TOKEN}"
   154          echo "Worker node $node has joined the cluster."
   155  
   156          # Stretch the capacity of the worker node to 240 (k8s default: 110)
   157          # Empirically, this gives us a max. #pods being 240-40=200
   158          echo "Stretching node capacity for $node."
   159          server_exec $node "echo \"maxPods: ${PODS_PER_NODE}\" > >(sudo tee -a /var/lib/kubelet/config.yaml >/dev/null)"
   160          server_exec $node "echo \"containerLogMaxSize: 512Mi\" > >(sudo tee -a /var/lib/kubelet/config.yaml >/dev/null)"
   161          server_exec $node 'sudo systemctl restart kubelet'
   162          server_exec $node 'sleep 10'
   163  
   164          # Rejoin has to be performed although errors will be thrown. Otherwise, restarting the kubelet will cause the node unreachable for some reason
   165          server_exec $node "sudo ${LOGIN_TOKEN} > /dev/null 2>&1"
   166          echo "Worker node $node joined the cluster (again :P)."
   167      }
   168  
   169      for node in "$@"
   170      do
   171          internal_setup "$node" &
   172      done
   173  
   174      wait
   175  }
   176  
   177  function extend_CIDR() {
   178      #* Get node name list.
   179      readarray -t NODE_NAMES < <(server_exec $MASTER_NODE 'kubectl get no' | tail -n +2 | awk '{print $1}')
   180  
   181      if [ ${#NODE_NAMES[@]} -gt 63 ]; then
   182          echo "Cannot extend CIDR range for more than 63 nodes. Cluster deployment has been aborted."
   183          exit 1
   184      fi
   185  
   186      for i in "${!NODE_NAMES[@]}"; do
   187          NODE_NAME=${NODE_NAMES[i]}
   188          #* Compute subnet: 00001010.10101000.000000 00.00000000 -> about 1022 IPs per worker.
   189          #* To be safe, we change both master and workers with an offset of 0.0.4.0 (4 * 2^8)
   190          # (NB: zsh indices start from 1.)
   191          #* Assume less than 63 nodes in total.
   192          let SUBNET=i*4+4
   193          #* Extend pod ip range, delete and create again.
   194          server_exec $MASTER_NODE "kubectl get node $NODE_NAME -o json | jq '.spec.podCIDR |= \"10.168.$SUBNET.0/22\"' > node.yaml"
   195          server_exec $MASTER_NODE "kubectl delete node $NODE_NAME && kubectl create -f node.yaml"
   196  
   197          echo "Changed pod CIDR for worker $NODE_NAME to 10.168.$SUBNET.0/22"
   198          sleep 5
   199      done
   200  
   201      #* Join the cluster for the 3rd time.
   202      for node in "$@"
   203      do
   204          server_exec $node "sudo ${LOGIN_TOKEN} > /dev/null 2>&1"
   205          echo "Worker node $node joined the cluster (again^2 :D)."
   206      done
   207  }
   208  
   209  function clone_loader() {
   210      server_exec $1 "git clone --depth=1 --branch=$LOADER_BRANCH git@github.com:eth-easl/loader.git"
   211      server_exec $1 'echo -en "\n\n" | sudo apt-get install -y python3-pip python-dev'
   212      server_exec $1 'cd; cd loader; pip install -r config/requirements.txt'
   213  }
   214  
   215  function copy_k8s_certificates() {
   216      function internal_copy() {
   217          server_exec $1 "mkdir -p ~/.kube"
   218          rsync ./kubeconfig $1:~/.kube/config
   219      }
   220  
   221      echo $MASTER_NODE
   222      rsync $MASTER_NODE:~/.kube/config ./kubeconfig
   223  
   224      for node in "$@"
   225      do
   226          internal_copy "$node" &
   227      done
   228  
   229      wait
   230  
   231      rm ./kubeconfig
   232  }
   233  
   234  function clone_loader_on_workers() {
   235      function internal_clone() {
   236          rsync ./id_rsa* $1:~/.ssh/
   237          server_exec $1 "chmod 600 ~/.ssh/id_rsa"
   238          server_exec $1 'ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts'
   239  
   240          clone_loader $1
   241      }
   242  
   243      # copying ssh keys first from the master node
   244      rsync $MASTER_NODE:~/.ssh/id_rsa* .
   245  
   246      for node in "$@"
   247      do
   248          internal_clone "$node" &
   249      done
   250  
   251      wait
   252  
   253      rm ./id_rsa*
   254  }
   255  
   256  ###############################################
   257  ######## MAIN SETUP PROCEDURE IS BELOW ########
   258  ###############################################
   259  
   260  {
   261      # Set up all nodes including the master
   262      common_init "$@"
   263  
   264      shift # make argument list only contain worker nodes (drops master node)
   265  
   266      setup_master
   267      setup_loader $1
   268      setup_workers "$@"
   269  
   270      if [ $PODS_PER_NODE -gt 240 ]; then
   271          extend_CIDR "$@"
   272      fi
   273  
   274      # Notify the master that all nodes have joined the cluster
   275      server_exec $MASTER_NODE 'tmux send -t master "y" ENTER'
   276  
   277      namespace_info=$(server_exec $MASTER_NODE "kubectl get namespaces")
   278      while [[ ${namespace_info} != *'knative-serving'*  ]]; do
   279          sleep 60
   280          namespace_info=$(server_exec $MASTER_NODE "kubectl get namespaces")
   281      done
   282  
   283      echo "Master node $MASTER_NODE finalised."
   284  
   285      # Copy API server certificates from master to each worker node
   286      copy_k8s_certificates "$@"
   287      clone_loader_on_workers "$@"
   288  
   289      server_exec $MASTER_NODE 'cd loader; bash scripts/setup/patch_init_scale.sh'
   290  
   291      source $DIR/label.sh
   292  
   293      # Force placement of metrics collectors and instrumentation on the loader node and control plane on master
   294      label_nodes $MASTER_NODE $1 # loader node is second on the list, becoming first after arg shift
   295  
   296      # patch knative to accept nodeselector
   297      server_exec $MASTER_NODE "cd loader; kubectl patch configmap config-features -n knative-serving -p '{\"data\": {\"kubernetes.podspec-nodeselector\": \"enabled\"}}'"
   298  
   299      if [[ "$DEPLOY_PROMETHEUS" == true ]]; then
   300          $DIR/expose_infra_metrics.sh $MASTER_NODE
   301      fi
   302  }