volcano.sh/apis@v1.8.2/pkg/apis/helpers/helpers.go (about)

     1  /*
     2  Copyright 2018 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package helpers
    18  
    19  import (
    20  	"context"
    21  	"crypto/tls"
    22  	"crypto/x509"
    23  	"fmt"
    24  	"net"
    25  	"net/http"
    26  	"os"
    27  	"os/signal"
    28  	"reflect"
    29  	"syscall"
    30  	"time"
    31  
    32  	v1 "k8s.io/api/core/v1"
    33  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    34  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    35  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    36  	"k8s.io/apiserver/pkg/server/healthz"
    37  	"k8s.io/apiserver/pkg/server/mux"
    38  	"k8s.io/client-go/kubernetes"
    39  	"k8s.io/klog/v2"
    40  
    41  	vcbatch "volcano.sh/apis/pkg/apis/batch/v1alpha1"
    42  	vcbus "volcano.sh/apis/pkg/apis/bus/v1alpha1"
    43  	flow "volcano.sh/apis/pkg/apis/flow/v1alpha1"
    44  	schedulerv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    45  )
    46  
    47  // JobKind creates job GroupVersionKind.
    48  var JobKind = vcbatch.SchemeGroupVersion.WithKind("Job")
    49  
    50  // CommandKind creates command GroupVersionKind.
    51  var CommandKind = vcbus.SchemeGroupVersion.WithKind("Command")
    52  
    53  // V1beta1QueueKind is queue kind with v1alpha2 version.
    54  var V1beta1QueueKind = schedulerv1beta1.SchemeGroupVersion.WithKind("Queue")
    55  
    56  // JobFlowKind creates jobflow GroupVersionKind.
    57  var JobFlowKind = flow.SchemeGroupVersion.WithKind("JobFlow")
    58  
    59  // JobTemplateKind creates jobtemplate GroupVersionKind.
    60  var JobTemplateKind = flow.SchemeGroupVersion.WithKind("JobTemplate")
    61  
    62  // CreateOrUpdateConfigMap creates config map if not present or updates config map if necessary.
    63  func CreateOrUpdateConfigMap(job *vcbatch.Job, kubeClients kubernetes.Interface, data map[string]string, cmName string) error {
    64  	// If ConfigMap does not exist, create one for Job.
    65  	cmOld, err := kubeClients.CoreV1().ConfigMaps(job.Namespace).Get(context.TODO(), cmName, metav1.GetOptions{})
    66  	if err != nil {
    67  		if !apierrors.IsNotFound(err) {
    68  			klog.V(3).Infof("Failed to get ConfigMap for Job <%s/%s>: %v",
    69  				job.Namespace, job.Name, err)
    70  			return err
    71  		}
    72  
    73  		cm := &v1.ConfigMap{
    74  			ObjectMeta: metav1.ObjectMeta{
    75  				Namespace: job.Namespace,
    76  				Name:      cmName,
    77  				OwnerReferences: []metav1.OwnerReference{
    78  					*metav1.NewControllerRef(job, JobKind),
    79  				},
    80  			},
    81  			Data: data,
    82  		}
    83  
    84  		if _, err := kubeClients.CoreV1().ConfigMaps(job.Namespace).Create(context.TODO(), cm, metav1.CreateOptions{}); err != nil {
    85  			klog.V(3).Infof("Failed to create ConfigMap for Job <%s/%s>: %v",
    86  				job.Namespace, job.Name, err)
    87  			return err
    88  		}
    89  		return nil
    90  	}
    91  
    92  	// no changes
    93  	if reflect.DeepEqual(cmOld.Data, data) {
    94  		return nil
    95  	}
    96  
    97  	cmOld.Data = data
    98  	if _, err := kubeClients.CoreV1().ConfigMaps(job.Namespace).Update(context.TODO(), cmOld, metav1.UpdateOptions{}); err != nil {
    99  		klog.V(3).Infof("Failed to update ConfigMap for Job <%s/%s>: %v",
   100  			job.Namespace, job.Name, err)
   101  		return err
   102  	}
   103  
   104  	return nil
   105  }
   106  
   107  // CreateOrUpdateSecret creates secret if not present or updates secret if necessary
   108  func CreateOrUpdateSecret(job *vcbatch.Job, kubeClients kubernetes.Interface, data map[string][]byte, secretName string) error {
   109  	secretOld, err := kubeClients.CoreV1().Secrets(job.Namespace).Get(context.TODO(), secretName, metav1.GetOptions{})
   110  	if err != nil {
   111  		if !apierrors.IsNotFound(err) {
   112  			klog.V(3).Infof("Failed to get Secret for Job <%s/%s>: %v",
   113  				job.Namespace, job.Name, err)
   114  			return err
   115  		}
   116  
   117  		secret := &v1.Secret{
   118  			ObjectMeta: metav1.ObjectMeta{
   119  				Name:      secretName,
   120  				Namespace: job.Namespace,
   121  				OwnerReferences: []metav1.OwnerReference{
   122  					*metav1.NewControllerRef(job, JobKind),
   123  				},
   124  			},
   125  			Data: data,
   126  		}
   127  
   128  		if _, err := kubeClients.CoreV1().Secrets(job.Namespace).Create(context.TODO(), secret, metav1.CreateOptions{}); err != nil {
   129  			klog.V(3).Infof("Failed to create Secret for Job <%s/%s>: %v",
   130  				job.Namespace, job.Name, err)
   131  			return err
   132  		}
   133  
   134  		return nil
   135  	}
   136  
   137  	// no changes
   138  	SSHConfig := "config"
   139  	if reflect.DeepEqual(secretOld.Data[SSHConfig], data[SSHConfig]) {
   140  		return nil
   141  	}
   142  
   143  	secretOld.Data = data
   144  	if _, err := kubeClients.CoreV1().Secrets(job.Namespace).Update(context.TODO(), secretOld, metav1.UpdateOptions{}); err != nil {
   145  		klog.V(3).Infof("Failed to update Secret for Job <%s/%s>: %v",
   146  			job.Namespace, job.Name, err)
   147  		return err
   148  	}
   149  
   150  	return nil
   151  }
   152  
   153  // DeleteConfigmap deletes the config map resource.
   154  func DeleteConfigmap(job *vcbatch.Job, kubeClients kubernetes.Interface, cmName string) error {
   155  	if err := kubeClients.CoreV1().ConfigMaps(job.Namespace).Delete(context.TODO(), cmName, metav1.DeleteOptions{}); err != nil && !apierrors.IsNotFound(err) {
   156  		klog.Errorf("Failed to delete Configmap of Job %v/%v: %v",
   157  			job.Namespace, job.Name, err)
   158  		return err
   159  	}
   160  
   161  	return nil
   162  }
   163  
   164  // DeleteSecret delete secret.
   165  func DeleteSecret(job *vcbatch.Job, kubeClients kubernetes.Interface, secretName string) error {
   166  	err := kubeClients.CoreV1().Secrets(job.Namespace).Delete(context.TODO(), secretName, metav1.DeleteOptions{})
   167  	if err != nil && apierrors.IsNotFound(err) {
   168  		return nil
   169  	}
   170  
   171  	return err
   172  }
   173  
   174  // GeneratePodgroupName generate podgroup name of normal pod.
   175  func GeneratePodgroupName(pod *v1.Pod) string {
   176  	pgName := vcbatch.PodgroupNamePrefix
   177  
   178  	if len(pod.OwnerReferences) != 0 {
   179  		for _, ownerReference := range pod.OwnerReferences {
   180  			if ownerReference.Controller != nil && *ownerReference.Controller {
   181  				pgName += string(ownerReference.UID)
   182  				return pgName
   183  			}
   184  		}
   185  	}
   186  
   187  	pgName += string(pod.UID)
   188  
   189  	return pgName
   190  }
   191  
   192  // StartHealthz register healthz interface.
   193  func StartHealthz(healthzBindAddress, name string, caCertData, certData, certKeyData []byte) error {
   194  	listener, err := net.Listen("tcp", healthzBindAddress)
   195  	if err != nil {
   196  		return fmt.Errorf("failed to create listener: %v", err)
   197  	}
   198  
   199  	pathRecorderMux := mux.NewPathRecorderMux(name)
   200  	healthz.InstallHandler(pathRecorderMux)
   201  
   202  	server := &http.Server{
   203  		Addr:           listener.Addr().String(),
   204  		Handler:        pathRecorderMux,
   205  		MaxHeaderBytes: 1 << 20,
   206  	}
   207  	if len(caCertData) != 0 && len(certData) != 0 && len(certKeyData) != 0 {
   208  		certPool := x509.NewCertPool()
   209  		certPool.AppendCertsFromPEM(caCertData)
   210  
   211  		sCert, err := tls.X509KeyPair(certData, certKeyData)
   212  		if err != nil {
   213  			return fmt.Errorf("failed to parse certData: %v", err)
   214  		}
   215  		server.TLSConfig = &tls.Config{
   216  			Certificates: []tls.Certificate{sCert},
   217  			RootCAs:      certPool,
   218  			MinVersion:   tls.VersionTLS12,
   219  			ClientAuth:   tls.VerifyClientCertIfGiven,
   220  			CipherSuites: []uint16{
   221  				tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
   222  				tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
   223  				tls.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
   224  			},
   225  		}
   226  	}
   227  
   228  	return runServer(server, listener)
   229  }
   230  
   231  func runServer(server *http.Server, ln net.Listener) error {
   232  	if ln == nil || server == nil {
   233  		return fmt.Errorf("listener and server must not be nil")
   234  	}
   235  
   236  	stopCh := make(chan os.Signal, 2)
   237  	signal.Notify(stopCh, syscall.SIGTERM, syscall.SIGINT)
   238  
   239  	go func() {
   240  		<-stopCh
   241  		ctx, cancel := context.WithTimeout(context.Background(), 0)
   242  		server.Shutdown(ctx)
   243  		cancel()
   244  	}()
   245  
   246  	go func() {
   247  		defer utilruntime.HandleCrash()
   248  
   249  		listener := tcpKeepAliveListener{ln.(*net.TCPListener)}
   250  
   251  		var err error
   252  		if server.TLSConfig != nil {
   253  			err = server.ServeTLS(listener, "", "")
   254  		} else {
   255  			err = server.Serve(listener)
   256  		}
   257  		msg := fmt.Sprintf("Stopped listening on %s", listener.Addr().String())
   258  		select {
   259  		case <-stopCh:
   260  			klog.Info(msg)
   261  		default:
   262  			klog.Fatalf("%s due to error: %v", msg, err)
   263  		}
   264  	}()
   265  
   266  	return nil
   267  }
   268  
   269  type tcpKeepAliveListener struct {
   270  	*net.TCPListener
   271  }
   272  
   273  // Accept waits for and returns the next connection to the listener.
   274  func (ln tcpKeepAliveListener) Accept() (net.Conn, error) {
   275  	tc, err := ln.AcceptTCP()
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	tc.SetKeepAlive(true)
   280  	tc.SetKeepAlivePeriod(3 * time.Minute)
   281  	return tc, nil
   282  }