github.com/m-lab/locate@v0.17.6/cmd/heartbeat/health/kubernetes-client.go (about) 1 package health 2 3 import ( 4 "context" 5 "log" 6 "net/url" 7 "path" 8 "strconv" 9 "strings" 10 "time" 11 12 "github.com/m-lab/go/rtx" 13 "github.com/m-lab/locate/metrics" 14 v1 "k8s.io/api/core/v1" 15 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 "k8s.io/client-go/kubernetes" 17 "k8s.io/client-go/tools/clientcmd" 18 "k8s.io/client-go/tools/clientcmd/api" 19 ) 20 21 var errKubernetesAPI = "error making request to Kubernetes API server" 22 23 // KubernetesClient manages requests to the Kubernetes API server. 24 type KubernetesClient struct { 25 pod string 26 node string 27 namespace string 28 clientset kubernetes.Interface 29 } 30 31 // MustNewKubernetesClient creates a new KubenernetesClient instance. 32 // If the client cannot be instantiated, the function will exit. 33 func MustNewKubernetesClient(url *url.URL, pod, node, namespace, auth string) *KubernetesClient { 34 defConfig := getDefaultClientConfig(url, auth) 35 restConfig, err := defConfig.ClientConfig() 36 rtx.Must(err, "failed to create kubernetes config") 37 38 clientset, err := kubernetes.NewForConfig(restConfig) 39 rtx.Must(err, "failed to create kubernetes clientset") 40 41 client := &KubernetesClient{ 42 pod: pod, 43 node: node, 44 namespace: namespace, 45 clientset: clientset, 46 } 47 return client 48 } 49 50 func getDefaultClientConfig(url *url.URL, auth string) clientcmd.ClientConfig { 51 // This is a low-level structure normally created from parsing a kubeconfig 52 // file. Since we know all values we can create the client object directly. 53 // 54 // The cluster and user names serve only to define a context that 55 // associates login credentials with a specific cluster. 56 clusterClient := api.Config{ 57 Clusters: map[string]*api.Cluster{ 58 // Define the cluster address and CA Certificate. 59 "cluster": { 60 Server: url.String(), 61 InsecureSkipTLSVerify: false, // Require a valid CA Certificate. 62 CertificateAuthority: path.Join(auth, "ca.crt"), 63 }, 64 }, 65 AuthInfos: map[string]*api.AuthInfo{ 66 // Define the user credentials for access to the API. 67 "user": { 68 TokenFile: path.Join(auth, "token"), 69 }, 70 }, 71 Contexts: map[string]*api.Context{ 72 // Define a context that refers to the above cluster and user. 73 "cluster-user": { 74 Cluster: "cluster", 75 AuthInfo: "user", 76 }, 77 }, 78 // Use the above context. 79 CurrentContext: "cluster-user", 80 } 81 82 defConfig := clientcmd.NewDefaultClientConfig( 83 clusterClient, 84 &clientcmd.ConfigOverrides{ 85 ClusterInfo: api.Cluster{Server: ""}, 86 }, 87 ) 88 89 return defConfig 90 } 91 92 // isHealthy returns true if it can determine the following conditions are true: 93 // - The Pod's status is "Running" 94 // - The Node's Ready condition is "True" 95 // - The Node does not have a "lame-duck" taint 96 // 97 // OR if it cannot contact the API Server to make a determination. 98 func (c *KubernetesClient) isHealthy(ctx context.Context) bool { 99 start := time.Now() 100 isHealthy := c.isPodRunning(ctx) && c.isNodeReady(ctx) 101 metrics.KubernetesRequestTimeHistogram.WithLabelValues(strconv.FormatBool(isHealthy)).Observe(time.Since(start).Seconds()) 102 return isHealthy 103 } 104 105 func (c *KubernetesClient) isPodRunning(ctx context.Context) bool { 106 pod, err := c.clientset.CoreV1().Pods(c.namespace).Get(ctx, c.pod, metav1.GetOptions{}) 107 if err != nil { 108 log.Printf("%s: %v", errKubernetesAPI, err) 109 metrics.KubernetesRequestsTotal.WithLabelValues("pod", extractError(err)).Inc() 110 return true 111 } 112 113 metrics.KubernetesRequestsTotal.WithLabelValues("pod", "OK").Inc() 114 return pod.Status.Phase == "Running" 115 } 116 117 // isNodeReady returns true if it can determine the following conditions are true: 118 // - The Node's Ready condition is "True" 119 // - The Node does not have a "lame-duck" taint 120 // 121 // OR if it cannot contact the API Server to make a determination. 122 func (c *KubernetesClient) isNodeReady(ctx context.Context) bool { 123 node, err := c.clientset.CoreV1().Nodes().Get(ctx, c.node, metav1.GetOptions{}) 124 if err != nil { 125 log.Printf("%s: %v", errKubernetesAPI, err) 126 metrics.KubernetesRequestsTotal.WithLabelValues("node", extractError(err)).Inc() 127 return true 128 } 129 130 metrics.KubernetesRequestsTotal.WithLabelValues("node", "OK").Inc() 131 for _, condition := range node.Status.Conditions { 132 if condition.Type == "Ready" && condition.Status == "True" { 133 return !isInMaintenance(node) 134 } 135 } 136 137 return false 138 } 139 140 func isInMaintenance(node *v1.Node) bool { 141 for _, taint := range node.Spec.Taints { 142 if taint.Key == "lame-duck" { 143 return true 144 } 145 } 146 147 return false 148 } 149 150 // extractError extracts the base error string from the error returned by the 151 // the Kubernetes API. 152 func extractError(err error) string { 153 parts := strings.Split(err.Error(), ": ") 154 return parts[len(parts)-1] 155 }