github.com/kubeflow/training-operator@v1.7.0/pkg/common/interface.go (about) 1 /* 2 Copyright 2023 The Kubeflow Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 v1 "k8s.io/api/core/v1" 21 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 "k8s.io/apimachinery/pkg/runtime/schema" 23 24 apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" 25 ) 26 27 // ControllerInterface defines the Interface to be implemented by custom operators. e.g. tf-operator needs to implement this interface 28 type ControllerInterface interface { 29 // Returns the Controller name 30 ControllerName() string 31 32 // Returns the GroupVersionKind of the API 33 GetAPIGroupVersionKind() schema.GroupVersionKind 34 35 // Returns the GroupVersion of the API 36 GetAPIGroupVersion() schema.GroupVersion 37 38 // Returns the Group Name(value) in the labels of the job 39 GetGroupNameLabelValue() string 40 41 // Returns the Job from Informer Cache 42 GetJobFromInformerCache(namespace, name string) (metav1.Object, error) 43 44 // Returns the Job from API server 45 GetJobFromAPIClient(namespace, name string) (metav1.Object, error) 46 47 // GetPodsForJob returns the pods managed by the job. This can be achieved by selecting pods using label key "job-name" 48 // i.e. all pods created by the job will come with label "job-name" = <this_job_name> 49 GetPodsForJob(job interface{}) ([]*v1.Pod, error) 50 51 // GetServicesForJob returns the services managed by the job. This can be achieved by selecting services using label key "job-name" 52 // i.e. all services created by the job will come with label "job-name" = <this_job_name> 53 GetServicesForJob(job interface{}) ([]*v1.Service, error) 54 55 // DeleteJob deletes the job 56 DeleteJob(job interface{}) error 57 58 // UpdateJobStatus updates the job status and job conditions 59 UpdateJobStatus(job interface{}, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, jobStatus *apiv1.JobStatus) error 60 61 // UpdateJobStatusInApiServer updates the job status in API server 62 UpdateJobStatusInApiServer(job interface{}, jobStatus *apiv1.JobStatus) error 63 64 // SetClusterSpec sets the cluster spec for the pod 65 SetClusterSpec(job interface{}, podTemplate *v1.PodTemplateSpec, rtype, index string) error 66 67 // Returns the default container name in pod 68 GetDefaultContainerName() string 69 70 // Get the default container port name 71 GetDefaultContainerPortName() string 72 73 // Returns if this replica type with index specified is a master role. 74 // MasterRole pod will have "job-role=master" set in its label 75 IsMasterRole(replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, rtype apiv1.ReplicaType, index int) bool 76 77 // ReconcileJobs checks and updates replicas for each given ReplicaSpec of a job. 78 // Common implementation will be provided and User can still override this to implement their own reconcile logic 79 ReconcileJobs(job interface{}, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, jobStatus apiv1.JobStatus, runPolicy *apiv1.RunPolicy) error 80 81 // ReconcilePods checks and updates pods for each given ReplicaSpec. 82 // It will requeue the job in case of an error while creating/deleting pods. 83 // Common implementation will be provided and User can still override this to implement their own reconcile logic 84 ReconcilePods(job interface{}, jobStatus *apiv1.JobStatus, pods []*v1.Pod, rtype apiv1.ReplicaType, spec *apiv1.ReplicaSpec, 85 replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) error 86 87 // ReconcileServices checks and updates services for each given ReplicaSpec. 88 // It will requeue the job in case of an error while creating/deleting services. 89 // Common implementation will be provided and User can still override this to implement their own reconcile logic 90 ReconcileServices(job metav1.Object, services []*v1.Service, rtype apiv1.ReplicaType, spec *apiv1.ReplicaSpec) error 91 92 // GetFrameworkName returns framework name (e.g., tensorflow). 93 GetFrameworkName() string 94 }