volcano.sh/apis@v1.8.2/pkg/apis/helpers/helpers.go (about) 1 /* 2 Copyright 2018 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package helpers 18 19 import ( 20 "context" 21 "crypto/tls" 22 "crypto/x509" 23 "fmt" 24 "net" 25 "net/http" 26 "os" 27 "os/signal" 28 "reflect" 29 "syscall" 30 "time" 31 32 v1 "k8s.io/api/core/v1" 33 apierrors "k8s.io/apimachinery/pkg/api/errors" 34 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 35 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 36 "k8s.io/apiserver/pkg/server/healthz" 37 "k8s.io/apiserver/pkg/server/mux" 38 "k8s.io/client-go/kubernetes" 39 "k8s.io/klog/v2" 40 41 vcbatch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 42 vcbus "volcano.sh/apis/pkg/apis/bus/v1alpha1" 43 flow "volcano.sh/apis/pkg/apis/flow/v1alpha1" 44 schedulerv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 45 ) 46 47 // JobKind creates job GroupVersionKind. 48 var JobKind = vcbatch.SchemeGroupVersion.WithKind("Job") 49 50 // CommandKind creates command GroupVersionKind. 51 var CommandKind = vcbus.SchemeGroupVersion.WithKind("Command") 52 53 // V1beta1QueueKind is queue kind with v1alpha2 version. 54 var V1beta1QueueKind = schedulerv1beta1.SchemeGroupVersion.WithKind("Queue") 55 56 // JobFlowKind creates jobflow GroupVersionKind. 57 var JobFlowKind = flow.SchemeGroupVersion.WithKind("JobFlow") 58 59 // JobTemplateKind creates jobtemplate GroupVersionKind. 60 var JobTemplateKind = flow.SchemeGroupVersion.WithKind("JobTemplate") 61 62 // CreateOrUpdateConfigMap creates config map if not present or updates config map if necessary. 63 func CreateOrUpdateConfigMap(job *vcbatch.Job, kubeClients kubernetes.Interface, data map[string]string, cmName string) error { 64 // If ConfigMap does not exist, create one for Job. 65 cmOld, err := kubeClients.CoreV1().ConfigMaps(job.Namespace).Get(context.TODO(), cmName, metav1.GetOptions{}) 66 if err != nil { 67 if !apierrors.IsNotFound(err) { 68 klog.V(3).Infof("Failed to get ConfigMap for Job <%s/%s>: %v", 69 job.Namespace, job.Name, err) 70 return err 71 } 72 73 cm := &v1.ConfigMap{ 74 ObjectMeta: metav1.ObjectMeta{ 75 Namespace: job.Namespace, 76 Name: cmName, 77 OwnerReferences: []metav1.OwnerReference{ 78 *metav1.NewControllerRef(job, JobKind), 79 }, 80 }, 81 Data: data, 82 } 83 84 if _, err := kubeClients.CoreV1().ConfigMaps(job.Namespace).Create(context.TODO(), cm, metav1.CreateOptions{}); err != nil { 85 klog.V(3).Infof("Failed to create ConfigMap for Job <%s/%s>: %v", 86 job.Namespace, job.Name, err) 87 return err 88 } 89 return nil 90 } 91 92 // no changes 93 if reflect.DeepEqual(cmOld.Data, data) { 94 return nil 95 } 96 97 cmOld.Data = data 98 if _, err := kubeClients.CoreV1().ConfigMaps(job.Namespace).Update(context.TODO(), cmOld, metav1.UpdateOptions{}); err != nil { 99 klog.V(3).Infof("Failed to update ConfigMap for Job <%s/%s>: %v", 100 job.Namespace, job.Name, err) 101 return err 102 } 103 104 return nil 105 } 106 107 // CreateOrUpdateSecret creates secret if not present or updates secret if necessary 108 func CreateOrUpdateSecret(job *vcbatch.Job, kubeClients kubernetes.Interface, data map[string][]byte, secretName string) error { 109 secretOld, err := kubeClients.CoreV1().Secrets(job.Namespace).Get(context.TODO(), secretName, metav1.GetOptions{}) 110 if err != nil { 111 if !apierrors.IsNotFound(err) { 112 klog.V(3).Infof("Failed to get Secret for Job <%s/%s>: %v", 113 job.Namespace, job.Name, err) 114 return err 115 } 116 117 secret := &v1.Secret{ 118 ObjectMeta: metav1.ObjectMeta{ 119 Name: secretName, 120 Namespace: job.Namespace, 121 OwnerReferences: []metav1.OwnerReference{ 122 *metav1.NewControllerRef(job, JobKind), 123 }, 124 }, 125 Data: data, 126 } 127 128 if _, err := kubeClients.CoreV1().Secrets(job.Namespace).Create(context.TODO(), secret, metav1.CreateOptions{}); err != nil { 129 klog.V(3).Infof("Failed to create Secret for Job <%s/%s>: %v", 130 job.Namespace, job.Name, err) 131 return err 132 } 133 134 return nil 135 } 136 137 // no changes 138 SSHConfig := "config" 139 if reflect.DeepEqual(secretOld.Data[SSHConfig], data[SSHConfig]) { 140 return nil 141 } 142 143 secretOld.Data = data 144 if _, err := kubeClients.CoreV1().Secrets(job.Namespace).Update(context.TODO(), secretOld, metav1.UpdateOptions{}); err != nil { 145 klog.V(3).Infof("Failed to update Secret for Job <%s/%s>: %v", 146 job.Namespace, job.Name, err) 147 return err 148 } 149 150 return nil 151 } 152 153 // DeleteConfigmap deletes the config map resource. 154 func DeleteConfigmap(job *vcbatch.Job, kubeClients kubernetes.Interface, cmName string) error { 155 if err := kubeClients.CoreV1().ConfigMaps(job.Namespace).Delete(context.TODO(), cmName, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) { 156 klog.Errorf("Failed to delete Configmap of Job %v/%v: %v", 157 job.Namespace, job.Name, err) 158 return err 159 } 160 161 return nil 162 } 163 164 // DeleteSecret delete secret. 165 func DeleteSecret(job *vcbatch.Job, kubeClients kubernetes.Interface, secretName string) error { 166 err := kubeClients.CoreV1().Secrets(job.Namespace).Delete(context.TODO(), secretName, metav1.DeleteOptions{}) 167 if err != nil && apierrors.IsNotFound(err) { 168 return nil 169 } 170 171 return err 172 } 173 174 // GeneratePodgroupName generate podgroup name of normal pod. 175 func GeneratePodgroupName(pod *v1.Pod) string { 176 pgName := vcbatch.PodgroupNamePrefix 177 178 if len(pod.OwnerReferences) != 0 { 179 for _, ownerReference := range pod.OwnerReferences { 180 if ownerReference.Controller != nil && *ownerReference.Controller { 181 pgName += string(ownerReference.UID) 182 return pgName 183 } 184 } 185 } 186 187 pgName += string(pod.UID) 188 189 return pgName 190 } 191 192 // StartHealthz register healthz interface. 193 func StartHealthz(healthzBindAddress, name string, caCertData, certData, certKeyData []byte) error { 194 listener, err := net.Listen("tcp", healthzBindAddress) 195 if err != nil { 196 return fmt.Errorf("failed to create listener: %v", err) 197 } 198 199 pathRecorderMux := mux.NewPathRecorderMux(name) 200 healthz.InstallHandler(pathRecorderMux) 201 202 server := &http.Server{ 203 Addr: listener.Addr().String(), 204 Handler: pathRecorderMux, 205 MaxHeaderBytes: 1 << 20, 206 } 207 if len(caCertData) != 0 && len(certData) != 0 && len(certKeyData) != 0 { 208 certPool := x509.NewCertPool() 209 certPool.AppendCertsFromPEM(caCertData) 210 211 sCert, err := tls.X509KeyPair(certData, certKeyData) 212 if err != nil { 213 return fmt.Errorf("failed to parse certData: %v", err) 214 } 215 server.TLSConfig = &tls.Config{ 216 Certificates: []tls.Certificate{sCert}, 217 RootCAs: certPool, 218 MinVersion: tls.VersionTLS12, 219 ClientAuth: tls.VerifyClientCertIfGiven, 220 CipherSuites: []uint16{ 221 tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, 222 tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, 223 tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, 224 }, 225 } 226 } 227 228 return runServer(server, listener) 229 } 230 231 func runServer(server *http.Server, ln net.Listener) error { 232 if ln == nil || server == nil { 233 return fmt.Errorf("listener and server must not be nil") 234 } 235 236 stopCh := make(chan os.Signal, 2) 237 signal.Notify(stopCh, syscall.SIGTERM, syscall.SIGINT) 238 239 go func() { 240 <-stopCh 241 ctx, cancel := context.WithTimeout(context.Background(), 0) 242 server.Shutdown(ctx) 243 cancel() 244 }() 245 246 go func() { 247 defer utilruntime.HandleCrash() 248 249 listener := tcpKeepAliveListener{ln.(*net.TCPListener)} 250 251 var err error 252 if server.TLSConfig != nil { 253 err = server.ServeTLS(listener, "", "") 254 } else { 255 err = server.Serve(listener) 256 } 257 msg := fmt.Sprintf("Stopped listening on %s", listener.Addr().String()) 258 select { 259 case <-stopCh: 260 klog.Info(msg) 261 default: 262 klog.Fatalf("%s due to error: %v", msg, err) 263 } 264 }() 265 266 return nil 267 } 268 269 type tcpKeepAliveListener struct { 270 *net.TCPListener 271 } 272 273 // Accept waits for and returns the next connection to the listener. 274 func (ln tcpKeepAliveListener) Accept() (net.Conn, error) { 275 tc, err := ln.AcceptTCP() 276 if err != nil { 277 return nil, err 278 } 279 tc.SetKeepAlive(true) 280 tc.SetKeepAlivePeriod(3 * time.Minute) 281 return tc, nil 282 }