github.com/kubeflow/training-operator@v1.7.0/pkg/common/interface.go (about)

     1  /*
     2  Copyright 2023 The Kubeflow Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package common
    18  
    19  import (
    20  	v1 "k8s.io/api/core/v1"
    21  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    22  	"k8s.io/apimachinery/pkg/runtime/schema"
    23  
    24  	apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
    25  )
    26  
    27  // ControllerInterface defines the Interface to be implemented by custom operators. e.g. tf-operator needs to implement this interface
    28  type ControllerInterface interface {
    29  	// Returns the Controller name
    30  	ControllerName() string
    31  
    32  	// Returns the GroupVersionKind of the API
    33  	GetAPIGroupVersionKind() schema.GroupVersionKind
    34  
    35  	// Returns the GroupVersion of the API
    36  	GetAPIGroupVersion() schema.GroupVersion
    37  
    38  	// Returns the Group Name(value) in the labels of the job
    39  	GetGroupNameLabelValue() string
    40  
    41  	// Returns the Job from Informer Cache
    42  	GetJobFromInformerCache(namespace, name string) (metav1.Object, error)
    43  
    44  	// Returns the Job from API server
    45  	GetJobFromAPIClient(namespace, name string) (metav1.Object, error)
    46  
    47  	// GetPodsForJob returns the pods managed by the job. This can be achieved by selecting pods using label key "job-name"
    48  	// i.e. all pods created by the job will come with label "job-name" = <this_job_name>
    49  	GetPodsForJob(job interface{}) ([]*v1.Pod, error)
    50  
    51  	// GetServicesForJob returns the services managed by the job. This can be achieved by selecting services using label key "job-name"
    52  	// i.e. all services created by the job will come with label "job-name" = <this_job_name>
    53  	GetServicesForJob(job interface{}) ([]*v1.Service, error)
    54  
    55  	// DeleteJob deletes the job
    56  	DeleteJob(job interface{}) error
    57  
    58  	// UpdateJobStatus updates the job status and job conditions
    59  	UpdateJobStatus(job interface{}, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, jobStatus *apiv1.JobStatus) error
    60  
    61  	// UpdateJobStatusInApiServer updates the job status in API server
    62  	UpdateJobStatusInApiServer(job interface{}, jobStatus *apiv1.JobStatus) error
    63  
    64  	// SetClusterSpec sets the cluster spec for the pod
    65  	SetClusterSpec(job interface{}, podTemplate *v1.PodTemplateSpec, rtype, index string) error
    66  
    67  	// Returns the default container name in pod
    68  	GetDefaultContainerName() string
    69  
    70  	// Get the default container port name
    71  	GetDefaultContainerPortName() string
    72  
    73  	// Returns if this replica type with index specified is a master role.
    74  	// MasterRole pod will have "job-role=master" set in its label
    75  	IsMasterRole(replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, rtype apiv1.ReplicaType, index int) bool
    76  
    77  	// ReconcileJobs checks and updates replicas for each given ReplicaSpec of a job.
    78  	// Common implementation will be provided and User can still override this to implement their own reconcile logic
    79  	ReconcileJobs(job interface{}, replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec, jobStatus apiv1.JobStatus, runPolicy *apiv1.RunPolicy) error
    80  
    81  	// ReconcilePods checks and updates pods for each given ReplicaSpec.
    82  	// It will requeue the job in case of an error while creating/deleting pods.
    83  	// Common implementation will be provided and User can still override this to implement their own reconcile logic
    84  	ReconcilePods(job interface{}, jobStatus *apiv1.JobStatus, pods []*v1.Pod, rtype apiv1.ReplicaType, spec *apiv1.ReplicaSpec,
    85  		replicas map[apiv1.ReplicaType]*apiv1.ReplicaSpec) error
    86  
    87  	// ReconcileServices checks and updates services for each given ReplicaSpec.
    88  	// It will requeue the job in case of an error while creating/deleting services.
    89  	// Common implementation will be provided and User can still override this to implement their own reconcile logic
    90  	ReconcileServices(job metav1.Object, services []*v1.Service, rtype apiv1.ReplicaType, spec *apiv1.ReplicaSpec) error
    91  
    92  	// GetFrameworkName returns framework name (e.g., tensorflow).
    93  	GetFrameworkName() string
    94  }