github.com/eth-easl/loader@v0.0.0-20230908084258-8a37e1d94279/scripts/setup/expose_infra_metrics.sh (about)

     1  #!/usr/bin/env bash
     2  #
     3  # MIT License
     4  #
     5  # Copyright (c) 2023 EASL and the vHive community
     6  #
     7  # Permission is hereby granted, free of charge, to any person obtaining a copy
     8  # of this software and associated documentation files (the "Software"), to deal
     9  # in the Software without restriction, including without limitation the rights
    10  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  # copies of the Software, and to permit persons to whom the Software is
    12  # furnished to do so, subject to the following conditions:
    13  #
    14  # The above copyright notice and this permission notice shall be included in all
    15  # copies or substantial portions of the Software.
    16  #
    17  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    23  # SOFTWARE.
    24  #
    25  
    26  MASTER_NODE=$1
    27  
    28  server_exec() { 
    29  	ssh -oStrictHostKeyChecking=no -p 22 $MASTER_NODE $1;
    30  }
    31  
    32  {
    33  	echo 'Setting up monitoring components'
    34  	server_exec 'sudo apt install htop'
    35  
    36  	#* Deploy Metrics Server to k8s in namespace kube-system.
    37  	server_exec 'cd loader; kubectl apply -f config/metrics_server_components.yaml'
    38  
    39  	#* Install helm.
    40  	server_exec 'curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash'
    41  	#* Install and start prometheus stack using helm.
    42  	server_exec 'helm repo add prometheus-community https://prometheus-community.github.io/helm-charts'
    43  	server_exec 'helm repo update'
    44  
    45  	server_exec 'kubectl create namespace monitoring'
    46  	release_label="prometheus"
    47  	prometheus_chart_version="43.3.1"
    48  	server_exec "cd loader; helm install -n monitoring $release_label --version $prometheus_chart_version prometheus-community/kube-prometheus-stack -f config/prometh_stack_values.yaml"
    49  	#* Apply the ServiceMonitors/PodMonitors to collect metrics from Knative.
    50  	#* The ports of the control manager and scheduler are mapped in a way that prometheus default installation can find them. 
    51  	server_exec 'cd loader; kubectl apply -f config/prometh_kn.yaml'
    52  
    53  	#* Bind addresses of the control manager and scheduler to "0.0.0.0" so that prometheus can scrape them from any domains.
    54  	server_exec 'cd loader; sudo kubeadm upgrade apply --config config/kubeadm_init.yaml --ignore-preflight-errors all --force --v=7'
    55  
    56  
    57  	#* Change scrape intervals to 15s for all used monitors.
    58  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-kube-prometheus-apiserver --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    59  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-kube-prometheus-coredns --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    60  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-kube-prometheus-kube-controller-manager --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    61  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-kube-prometheus-kube-etcd --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    62  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-kube-prometheus-kube-proxy --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    63  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-kube-prometheus-kube-scheduler --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    64  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-kube-prometheus-operator --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    65  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-kube-prometheus-prometheus --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    66  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-kube-state-metrics --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    67  	server_exec "sudo kubectl -n monitoring patch ServiceMonitor prometheus-prometheus-node-exporter --type json -p '[{"op": "add", "path": "/spec/endpoints/0/interval", "value": "15s"}]'"
    68  
    69  	sleep 5
    70  
    71  	#* Set up port prometheus panel (infinite loops are important to circumvent kubectl timeout in the middle of experiments).
    72  	server_exec 'tmux new -s prometheusd -d'
    73  	server_exec 'tmux send -t prometheusd "while true; do kubectl port-forward -n monitoring svc/prometheus-operated 9090; done" ENTER'
    74  
    75  	#* Set up grafana dash board (id: admin, pwd: prom-operator).
    76  	server_exec 'tmux new -s grafanad -d'
    77  	server_exec 'tmux send -t grafanad "while true; do kubectl -n monitoring port-forward deployment/prometheus-grafana 3000; done" ENTER'
    78  
    79  	echo 'Done setting up monitoring components'
    80  
    81  	exit
    82  }