volcano.sh/volcano@v1.9.0/pkg/controllers/job/plugins/svc/svc.go (about)

     1  /*
     2  Copyright 2019 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package svc
    18  
    19  import (
    20  	"context"
    21  	"flag"
    22  	"fmt"
    23  	"strconv"
    24  	"strings"
    25  
    26  	v1 "k8s.io/api/core/v1"
    27  	networkingv1 "k8s.io/api/networking/v1"
    28  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/klog/v2"
    31  
    32  	batch "volcano.sh/apis/pkg/apis/batch/v1alpha1"
    33  	"volcano.sh/apis/pkg/apis/helpers"
    34  	jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers"
    35  	pluginsinterface "volcano.sh/volcano/pkg/controllers/job/plugins/interface"
    36  )
    37  
    38  type servicePlugin struct {
    39  	// Arguments given for the plugin
    40  	pluginArguments []string
    41  
    42  	Clientset pluginsinterface.PluginClientset
    43  
    44  	// flag parse args
    45  	publishNotReadyAddresses bool
    46  	disableNetworkPolicy     bool
    47  }
    48  
    49  // New creates service plugin.
    50  func New(client pluginsinterface.PluginClientset, arguments []string) pluginsinterface.PluginInterface {
    51  	servicePlugin := servicePlugin{pluginArguments: arguments, Clientset: client}
    52  
    53  	servicePlugin.addFlags()
    54  
    55  	return &servicePlugin
    56  }
    57  
    58  func (sp *servicePlugin) Name() string {
    59  	return "svc"
    60  }
    61  
    62  func (sp *servicePlugin) addFlags() {
    63  	flagSet := flag.NewFlagSet(sp.Name(), flag.ContinueOnError)
    64  	flagSet.BoolVar(&sp.publishNotReadyAddresses, "publish-not-ready-addresses", sp.publishNotReadyAddresses,
    65  		"set publishNotReadyAddresses of svc to true")
    66  	flagSet.BoolVar(&sp.disableNetworkPolicy, "disable-network-policy", sp.disableNetworkPolicy,
    67  		"set disableNetworkPolicy of svc to true")
    68  
    69  	if err := flagSet.Parse(sp.pluginArguments); err != nil {
    70  		klog.Errorf("plugin %s flagset parse failed, err: %v", sp.Name(), err)
    71  	}
    72  }
    73  
    74  func (sp *servicePlugin) OnPodCreate(pod *v1.Pod, job *batch.Job) error {
    75  	// Add `hostname` and `subdomain` for pod, mount service config for pod.
    76  	// A pod with `hostname` and `subdomain` will have the fully qualified domain name(FQDN)
    77  	// `hostname.subdomain.namespace.svc.cluster-domain.example`.
    78  	// If there exists a headless service in the same namespace as the pod and with the
    79  	// same name as the `subdomain`, the cluster's KubeDNS Server will returns an A record for
    80  	// the Pods's fully qualified hostname, pointing to the Pod’s IP.
    81  	// `hostname.subdomain` will be used as address of the pod.
    82  	// By default, a client Pod’s DNS search list will include the Pod’s own namespace and
    83  	// the cluster’s default domain, so the pod can be accessed by pods in the same namespace
    84  	// through the address of pod.
    85  	// More info: https://kubernetes.io/docs/concepts/services-networking/dns-pod-service
    86  	if len(pod.Spec.Hostname) == 0 {
    87  		pod.Spec.Hostname = pod.Name
    88  	}
    89  	if len(pod.Spec.Subdomain) == 0 {
    90  		pod.Spec.Subdomain = job.Name
    91  	}
    92  
    93  	var hostEnv []v1.EnvVar
    94  	var envNames []string
    95  
    96  	for _, ts := range job.Spec.Tasks {
    97  		// TODO(k82cn): The splitter and the prefix of env should be configurable.
    98  		formateENVKey := strings.Replace(ts.Name, "-", "_", -1)
    99  		envNames = append(envNames, fmt.Sprintf(EnvTaskHostFmt, strings.ToUpper(formateENVKey)))
   100  		envNames = append(envNames, fmt.Sprintf(EnvHostNumFmt, strings.ToUpper(formateENVKey)))
   101  	}
   102  
   103  	for _, name := range envNames {
   104  		hostEnv = append(hostEnv, v1.EnvVar{
   105  			Name: name,
   106  			ValueFrom: &v1.EnvVarSource{
   107  				ConfigMapKeyRef: &v1.ConfigMapKeySelector{
   108  					LocalObjectReference: v1.LocalObjectReference{Name: sp.cmName(job)},
   109  					Key:                  name,
   110  				}}},
   111  		)
   112  	}
   113  
   114  	for i := range pod.Spec.Containers {
   115  		pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, hostEnv...)
   116  	}
   117  
   118  	for i := range pod.Spec.InitContainers {
   119  		pod.Spec.InitContainers[i].Env = append(pod.Spec.InitContainers[i].Env, hostEnv...)
   120  	}
   121  
   122  	sp.mountConfigmap(pod, job)
   123  
   124  	return nil
   125  }
   126  
   127  func (sp *servicePlugin) OnJobAdd(job *batch.Job) error {
   128  	if job.Status.ControlledResources["plugin-"+sp.Name()] == sp.Name() {
   129  		return nil
   130  	}
   131  
   132  	hostFile := GenerateHosts(job)
   133  
   134  	// Create ConfigMap of hosts for Pods to mount.
   135  	if err := helpers.CreateOrUpdateConfigMap(job, sp.Clientset.KubeClients, hostFile, sp.cmName(job)); err != nil {
   136  		return err
   137  	}
   138  
   139  	if err := sp.createServiceIfNotExist(job); err != nil {
   140  		return err
   141  	}
   142  
   143  	if !sp.disableNetworkPolicy {
   144  		if err := sp.createNetworkPolicyIfNotExist(job); err != nil {
   145  			return err
   146  		}
   147  	}
   148  	job.Status.ControlledResources["plugin-"+sp.Name()] = sp.Name()
   149  
   150  	return nil
   151  }
   152  
   153  func (sp *servicePlugin) OnJobDelete(job *batch.Job) error {
   154  	if job.Status.ControlledResources["plugin-"+sp.Name()] != sp.Name() {
   155  		return nil
   156  	}
   157  
   158  	if err := helpers.DeleteConfigmap(job, sp.Clientset.KubeClients, sp.cmName(job)); err != nil {
   159  		return err
   160  	}
   161  
   162  	if err := sp.Clientset.KubeClients.CoreV1().Services(job.Namespace).Delete(context.TODO(), job.Name, metav1.DeleteOptions{}); err != nil {
   163  		if !apierrors.IsNotFound(err) {
   164  			klog.Errorf("Failed to delete Service of Job %v/%v: %v", job.Namespace, job.Name, err)
   165  			return err
   166  		}
   167  	}
   168  	delete(job.Status.ControlledResources, "plugin-"+sp.Name())
   169  
   170  	if !sp.disableNetworkPolicy {
   171  		if err := sp.Clientset.KubeClients.NetworkingV1().NetworkPolicies(job.Namespace).Delete(context.TODO(), job.Name, metav1.DeleteOptions{}); err != nil {
   172  			if !apierrors.IsNotFound(err) {
   173  				klog.Errorf("Failed to delete Network policy of Job %v/%v: %v", job.Namespace, job.Name, err)
   174  				return err
   175  			}
   176  		}
   177  	}
   178  	return nil
   179  }
   180  
   181  func (sp *servicePlugin) OnJobUpdate(job *batch.Job) error {
   182  	hostFile := GenerateHosts(job)
   183  
   184  	// updates ConfigMap of hosts for Pods to mount.
   185  	return helpers.CreateOrUpdateConfigMap(job, sp.Clientset.KubeClients, hostFile, sp.cmName(job))
   186  }
   187  
   188  func (sp *servicePlugin) mountConfigmap(pod *v1.Pod, job *batch.Job) {
   189  	cmName := sp.cmName(job)
   190  	cmVolume := v1.Volume{
   191  		Name: cmName,
   192  	}
   193  	cmVolume.ConfigMap = &v1.ConfigMapVolumeSource{
   194  		LocalObjectReference: v1.LocalObjectReference{
   195  			Name: cmName,
   196  		},
   197  	}
   198  	pod.Spec.Volumes = append(pod.Spec.Volumes, cmVolume)
   199  
   200  	vm := v1.VolumeMount{
   201  		MountPath: ConfigMapMountPath,
   202  		Name:      cmName,
   203  	}
   204  
   205  	for i, c := range pod.Spec.Containers {
   206  		pod.Spec.Containers[i].VolumeMounts = append(c.VolumeMounts, vm)
   207  	}
   208  	for i, c := range pod.Spec.InitContainers {
   209  		pod.Spec.InitContainers[i].VolumeMounts = append(c.VolumeMounts, vm)
   210  	}
   211  }
   212  
   213  func (sp *servicePlugin) createServiceIfNotExist(job *batch.Job) error {
   214  	// If Service does not exist, create one for Job.
   215  	if _, err := sp.Clientset.KubeClients.CoreV1().Services(job.Namespace).Get(context.TODO(), job.Name, metav1.GetOptions{}); err != nil {
   216  		if !apierrors.IsNotFound(err) {
   217  			klog.V(3).Infof("Failed to get Service for Job <%s/%s>: %v",
   218  				job.Namespace, job.Name, err)
   219  			return err
   220  		}
   221  
   222  		svc := &v1.Service{
   223  			ObjectMeta: metav1.ObjectMeta{
   224  				Namespace: job.Namespace,
   225  				Name:      job.Name,
   226  				OwnerReferences: []metav1.OwnerReference{
   227  					*metav1.NewControllerRef(job, helpers.JobKind),
   228  				},
   229  			},
   230  			Spec: v1.ServiceSpec{
   231  				ClusterIP: "None",
   232  				Selector: map[string]string{
   233  					batch.JobNameKey:      job.Name,
   234  					batch.JobNamespaceKey: job.Namespace,
   235  				},
   236  				PublishNotReadyAddresses: sp.publishNotReadyAddresses,
   237  			},
   238  		}
   239  
   240  		if _, e := sp.Clientset.KubeClients.CoreV1().Services(job.Namespace).Create(context.TODO(), svc, metav1.CreateOptions{}); e != nil {
   241  			klog.V(3).Infof("Failed to create Service for Job <%s/%s>: %v", job.Namespace, job.Name, e)
   242  			return e
   243  		}
   244  		job.Status.ControlledResources["plugin-"+sp.Name()] = sp.Name()
   245  	}
   246  
   247  	return nil
   248  }
   249  
   250  // Limit pods can be accessible only by pods belong to the job.
   251  func (sp *servicePlugin) createNetworkPolicyIfNotExist(job *batch.Job) error {
   252  	// If network policy does not exist, create one for Job.
   253  	if _, err := sp.Clientset.KubeClients.NetworkingV1().NetworkPolicies(job.Namespace).Get(context.TODO(), job.Name, metav1.GetOptions{}); err != nil {
   254  		if !apierrors.IsNotFound(err) {
   255  			klog.V(3).Infof("Failed to get NetworkPolicy for Job <%s/%s>: %v",
   256  				job.Namespace, job.Name, err)
   257  			return err
   258  		}
   259  
   260  		networkpolicy := &networkingv1.NetworkPolicy{
   261  			ObjectMeta: metav1.ObjectMeta{
   262  				Namespace: job.Namespace,
   263  				Name:      job.Name,
   264  				OwnerReferences: []metav1.OwnerReference{
   265  					*metav1.NewControllerRef(job, helpers.JobKind),
   266  				},
   267  			},
   268  			Spec: networkingv1.NetworkPolicySpec{
   269  				PodSelector: metav1.LabelSelector{
   270  					MatchLabels: map[string]string{
   271  						batch.JobNameKey:      job.Name,
   272  						batch.JobNamespaceKey: job.Namespace,
   273  					},
   274  				},
   275  				Ingress: []networkingv1.NetworkPolicyIngressRule{{
   276  					From: []networkingv1.NetworkPolicyPeer{{
   277  						PodSelector: &metav1.LabelSelector{
   278  							MatchLabels: map[string]string{
   279  								batch.JobNameKey:      job.Name,
   280  								batch.JobNamespaceKey: job.Namespace,
   281  							},
   282  						},
   283  					}},
   284  				}},
   285  				PolicyTypes: []networkingv1.PolicyType{networkingv1.PolicyTypeIngress},
   286  			},
   287  		}
   288  
   289  		if _, e := sp.Clientset.KubeClients.NetworkingV1().NetworkPolicies(job.Namespace).Create(context.TODO(), networkpolicy, metav1.CreateOptions{}); e != nil {
   290  			klog.V(3).Infof("Failed to create Service for Job <%s/%s>: %v", job.Namespace, job.Name, e)
   291  			return e
   292  		}
   293  		job.Status.ControlledResources["plugin-"+sp.Name()] = sp.Name()
   294  	}
   295  
   296  	return nil
   297  }
   298  
   299  func (sp *servicePlugin) cmName(job *batch.Job) string {
   300  	return fmt.Sprintf("%s-%s", job.Name, sp.Name())
   301  }
   302  
   303  // GenerateHosts generates hostnames per task.
   304  func GenerateHosts(job *batch.Job) map[string]string {
   305  	hostFile := make(map[string]string, len(job.Spec.Tasks))
   306  
   307  	for _, ts := range job.Spec.Tasks {
   308  		hosts := make([]string, 0, ts.Replicas)
   309  
   310  		for i := 0; i < int(ts.Replicas); i++ {
   311  			hostName := ts.Template.Spec.Hostname
   312  			subdomain := ts.Template.Spec.Subdomain
   313  			if len(hostName) == 0 {
   314  				hostName = jobhelpers.MakePodName(job.Name, ts.Name, i)
   315  			}
   316  			if len(subdomain) == 0 {
   317  				subdomain = job.Name
   318  			}
   319  			hosts = append(hosts, hostName+"."+subdomain)
   320  			if len(ts.Template.Spec.Hostname) != 0 {
   321  				break
   322  			}
   323  		}
   324  
   325  		formateENVKey := strings.Replace(ts.Name, "-", "_", -1)
   326  		key := fmt.Sprintf(ConfigMapTaskHostFmt, formateENVKey)
   327  		hostFile[key] = strings.Join(hosts, "\n")
   328  
   329  		// TODO(k82cn): The splitter and the prefix of env should be configurable.
   330  		// export hosts as environment
   331  		key = fmt.Sprintf(EnvTaskHostFmt, strings.ToUpper(formateENVKey))
   332  		hostFile[key] = strings.Join(hosts, ",")
   333  		// export host number as environment.
   334  		key = fmt.Sprintf(EnvHostNumFmt, strings.ToUpper(formateENVKey))
   335  		hostFile[key] = strconv.Itoa(len(hosts))
   336  	}
   337  
   338  	return hostFile
   339  }