sigs.k8s.io/kueue@v0.6.2/pkg/controller/jobframework/interface.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  Licensed under the Apache License, Version 2.0 (the "License");
     4  you may not use this file except in compliance with the License.
     5  You may obtain a copy of the License at
     6      http://www.apache.org/licenses/LICENSE-2.0
     7  Unless required by applicable law or agreed to in writing, software
     8  distributed under the License is distributed on an "AS IS" BASIS,
     9  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    10  See the License for the specific language governing permissions and
    11  limitations under the License.
    12  */
    13  
    14  package jobframework
    15  
    16  import (
    17  	"context"
    18  
    19  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    20  	"k8s.io/apimachinery/pkg/runtime/schema"
    21  	"k8s.io/apimachinery/pkg/types"
    22  	"k8s.io/client-go/tools/record"
    23  	"sigs.k8s.io/controller-runtime/pkg/client"
    24  
    25  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    26  	"sigs.k8s.io/kueue/pkg/controller/constants"
    27  	"sigs.k8s.io/kueue/pkg/podset"
    28  )
    29  
    30  // GenericJob if the interface which needs to be implemented by all jobs
    31  // managed by the kueue's jobframework.
    32  type GenericJob interface {
    33  	// Object returns the job instance.
    34  	Object() client.Object
    35  	// IsSuspended returns whether the job is suspended or not.
    36  	IsSuspended() bool
    37  	// Suspend will suspend the job.
    38  	Suspend()
    39  	// RunWithPodSetsInfo will inject the node affinity and podSet counts extracting from workload to job and unsuspend it.
    40  	RunWithPodSetsInfo(podSetsInfo []podset.PodSetInfo) error
    41  	// RestorePodSetsInfo will restore the original node affinity and podSet counts of the job.
    42  	// Returns whether any change was done.
    43  	RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool
    44  	// Finished means whether the job is completed/failed or not,
    45  	// condition represents the workload finished condition.
    46  	Finished() (condition metav1.Condition, finished bool)
    47  	// PodSets will build workload podSets corresponding to the job.
    48  	PodSets() []kueue.PodSet
    49  	// IsActive returns true if there are any running pods.
    50  	IsActive() bool
    51  	// PodsReady instructs whether job derived pods are all ready now.
    52  	PodsReady() bool
    53  	// GVK returns GVK (Group Version Kind) for the job.
    54  	GVK() schema.GroupVersionKind
    55  }
    56  
    57  // Optional interfaces, are meant to implemented by jobs to enable additional
    58  // features of the jobframework reconciler.
    59  
    60  type JobWithReclaimablePods interface {
    61  	// ReclaimablePods returns the list of reclaimable pods.
    62  	ReclaimablePods() ([]kueue.ReclaimablePod, error)
    63  }
    64  
    65  type StopReason int
    66  
    67  const (
    68  	StopReasonWorkloadDeleted StopReason = iota
    69  	StopReasonWorkloadEvicted
    70  	StopReasonNoMatchingWorkload
    71  	StopReasonNotAdmitted
    72  )
    73  
    74  type JobWithCustomStop interface {
    75  	// Stop implements a custom stop procedure.
    76  	// The function should be idempotent: not do any API calls if the job is already stopped.
    77  	// Returns whether the Job stopped with this call or an error
    78  	Stop(ctx context.Context, c client.Client, podSetsInfo []podset.PodSetInfo, stopReason StopReason, eventMsg string) (bool, error)
    79  }
    80  
    81  // JobWithFinalize interface should be implemented by generic jobs,
    82  // when custom finalization logic is needed for a job, after it's finished.
    83  type JobWithFinalize interface {
    84  	Finalize(ctx context.Context, c client.Client) error
    85  }
    86  
    87  // JobWithSkip interface should be implemented by generic jobs,
    88  // when reconciliation should be skipped depending on the job's state
    89  type JobWithSkip interface {
    90  	Skip() bool
    91  }
    92  
    93  type JobWithPriorityClass interface {
    94  	// PriorityClass returns the job's priority class name.
    95  	PriorityClass() string
    96  }
    97  
    98  // ComposableJob interface should be implemented by generic jobs that
    99  // are composed out of multiple API objects.
   100  type ComposableJob interface {
   101  	// Load loads all members of the composable job. If removeFinalizers == true, workload and job finalizers should be removed.
   102  	Load(ctx context.Context, c client.Client, key *types.NamespacedName) (removeFinalizers bool, err error)
   103  	// Run unsuspends all members of the ComposableJob and injects the node affinity with podSet
   104  	// counts extracting from workload to all members of the ComposableJob.
   105  	Run(ctx context.Context, c client.Client, podSetsInfo []podset.PodSetInfo, r record.EventRecorder, msg string) error
   106  	// ConstructComposableWorkload returns a new Workload that's assembled out of all members of the ComposableJob.
   107  	ConstructComposableWorkload(ctx context.Context, c client.Client, r record.EventRecorder) (*kueue.Workload, error)
   108  	// ListChildWorkloads returns all workloads related to the composable job
   109  	ListChildWorkloads(ctx context.Context, c client.Client, parent types.NamespacedName) (*kueue.WorkloadList, error)
   110  	// FindMatchingWorkloads returns all related workloads, workload that matches the ComposableJob and duplicates that has to be deleted.
   111  	FindMatchingWorkloads(ctx context.Context, c client.Client, r record.EventRecorder) (match *kueue.Workload, toDelete []*kueue.Workload, err error)
   112  	// Stop implements the custom stop procedure for ComposableJob
   113  	Stop(ctx context.Context, c client.Client, podSetsInfo []podset.PodSetInfo, stopReason StopReason, eventMsg string) ([]client.Object, error)
   114  }
   115  
   116  func ParentWorkloadName(job GenericJob) string {
   117  	return job.Object().GetAnnotations()[constants.ParentWorkloadAnnotation]
   118  }
   119  
   120  func QueueName(job GenericJob) string {
   121  	return QueueNameForObject(job.Object())
   122  }
   123  
   124  func QueueNameForObject(object client.Object) string {
   125  	if queueLabel := object.GetLabels()[constants.QueueLabel]; queueLabel != "" {
   126  		return queueLabel
   127  	}
   128  	// fallback to the annotation (deprecated)
   129  	return object.GetAnnotations()[constants.QueueAnnotation]
   130  }
   131  
   132  func workloadPriorityClassName(job GenericJob) string {
   133  	object := job.Object()
   134  	if workloadPriorityClassLabel := object.GetLabels()[constants.WorkloadPriorityClassLabel]; workloadPriorityClassLabel != "" {
   135  		return workloadPriorityClassLabel
   136  	}
   137  	return ""
   138  }
   139  
   140  func PrebuiltWorkloadFor(job GenericJob) (string, bool) {
   141  	name, found := job.Object().GetLabels()[constants.PrebuiltWorkloadLabel]
   142  	return name, found
   143  }