sigs.k8s.io/kueue@v0.6.2/pkg/controller/jobs/pod/pod_controller.go

sigs.k8s.io/kueue@v0.6.2/pkg/controller/jobs/pod/pod_controller.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package pod
    18  
    19  import (
    20  	"cmp"
    21  	"context"
    22  	"crypto/sha256"
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"slices"
    27  	"sort"
    28  	"strconv"
    29  	"strings"
    30  	"time"
    31  
    32  	corev1 "k8s.io/api/core/v1"
    33  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    34  	apimeta "k8s.io/apimachinery/pkg/api/meta"
    35  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    36  	"k8s.io/apimachinery/pkg/runtime"
    37  	"k8s.io/apimachinery/pkg/runtime/schema"
    38  	"k8s.io/apimachinery/pkg/types"
    39  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    40  	"k8s.io/apimachinery/pkg/util/validation"
    41  	"k8s.io/client-go/tools/record"
    42  	"k8s.io/klog/v2"
    43  	"k8s.io/utils/ptr"
    44  	ctrl "sigs.k8s.io/controller-runtime"
    45  	"sigs.k8s.io/controller-runtime/pkg/client"
    46  	"sigs.k8s.io/controller-runtime/pkg/controller"
    47  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    48  
    49  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    50  	"sigs.k8s.io/kueue/pkg/constants"
    51  	controllerconsts "sigs.k8s.io/kueue/pkg/controller/constants"
    52  	"sigs.k8s.io/kueue/pkg/controller/jobframework"
    53  	"sigs.k8s.io/kueue/pkg/podset"
    54  	"sigs.k8s.io/kueue/pkg/util/kubeversion"
    55  	"sigs.k8s.io/kueue/pkg/util/parallelize"
    56  	utilslices "sigs.k8s.io/kueue/pkg/util/slices"
    57  )
    58  
    59  const (
    60  	SchedulingGateName             = "kueue.x-k8s.io/admission"
    61  	FrameworkName                  = "pod"
    62  	gateNotFound                   = -1
    63  	ConditionTypeTerminationTarget = "TerminationTarget"
    64  	errMsgIncorrectGroupRoleCount  = "pod group can't include more than 8 roles"
    65  	IsGroupWorkloadAnnotationKey   = "kueue.x-k8s.io/is-group-workload"
    66  	IsGroupWorkloadAnnotationValue = "true"
    67  )
    68  
    69  // Event reasons used by the pod controller
    70  const (
    71  	ReasonExcessPodDeleted     = "ExcessPodDeleted"
    72  	ReasonOwnerReferencesAdded = "OwnerReferencesAdded"
    73  )
    74  
    75  var (
    76  	gvk                          = corev1.SchemeGroupVersion.WithKind("Pod")
    77  	errIncorrectReconcileRequest = fmt.Errorf("event handler error: got a single pod reconcile request for a pod group")
    78  	errPendingOps                = jobframework.UnretryableError("waiting to observe previous operations on pods")
    79  	errPodNoSupportKubeVersion   = errors.New("pod integration only supported in Kubernetes 1.27 or newer")
    80  )
    81  
    82  func init() {
    83  	utilruntime.Must(jobframework.RegisterIntegration(FrameworkName, jobframework.IntegrationCallbacks{
    84  		SetupIndexes:          SetupIndexes,
    85  		NewReconciler:         NewReconciler,
    86  		SetupWebhook:          SetupWebhook,
    87  		JobType:               &corev1.Pod{},
    88  		CanSupportIntegration: CanSupportIntegration,
    89  	}))
    90  }
    91  
    92  // +kubebuilder:rbac:groups=scheduling.k8s.io,resources=priorityclasses,verbs=list;get;watch
    93  // +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update;patch
    94  // +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;update;patch;delete
    95  // +kubebuilder:rbac:groups="",resources=pods/status,verbs=get;patch
    96  // +kubebuilder:rbac:groups="",resources=pods/finalizers,verbs=get;update
    97  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads,verbs=get;list;watch;create;update;patch;delete
    98  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads/status,verbs=get;update;patch
    99  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads/finalizers,verbs=update
   100  // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=resourceflavors,verbs=get;list;watch
   101  
   102  type Reconciler struct {
   103  	*jobframework.JobReconciler
   104  	expectationsStore *expectationsStore
   105  }
   106  
   107  func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
   108  	return r.ReconcileGenericJob(ctx, req, &Pod{excessPodExpectations: r.expectationsStore})
   109  }
   110  
   111  func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
   112  	concurrency := mgr.GetControllerOptions().GroupKindConcurrency[gvk.GroupKind().String()]
   113  	ctrl.Log.V(3).Info("Setting up Pod reconciler", "concurrency", concurrency)
   114  	return ctrl.NewControllerManagedBy(mgr).
   115  		Watches(&corev1.Pod{}, &podEventHandler{cleanedUpPodsExpectations: r.expectationsStore}).Named("v1_pod").
   116  		Watches(&kueue.Workload{}, &workloadHandler{}).
   117  		WithOptions(controller.Options{
   118  			MaxConcurrentReconciles: concurrency,
   119  		}).
   120  		Complete(r)
   121  }
   122  
   123  func NewReconciler(c client.Client, record record.EventRecorder, opts ...jobframework.Option) jobframework.JobReconcilerInterface {
   124  	return &Reconciler{
   125  		JobReconciler:     jobframework.NewReconciler(c, record, opts...),
   126  		expectationsStore: newUIDExpectations("finalizedPods"),
   127  	}
   128  }
   129  
   130  type Pod struct {
   131  	pod                   corev1.Pod
   132  	key                   types.NamespacedName
   133  	isFound               bool
   134  	isGroup               bool
   135  	unretriableGroup      *bool
   136  	list                  corev1.PodList
   137  	excessPodExpectations *expectationsStore
   138  	satisfiedExcessPods   bool
   139  }
   140  
   141  var (
   142  	_ jobframework.GenericJob      = (*Pod)(nil)
   143  	_ jobframework.JobWithFinalize = (*Pod)(nil)
   144  	_ jobframework.ComposableJob   = (*Pod)(nil)
   145  )
   146  
   147  func fromObject(o runtime.Object) *Pod {
   148  	out := Pod{}
   149  	out.pod = *o.(*corev1.Pod)
   150  	return &out
   151  }
   152  
   153  // Object returns the job instance.
   154  func (p *Pod) Object() client.Object {
   155  	return &p.pod
   156  }
   157  
   158  // gateIndex returns the index of the Kueue scheduling gate for corev1.Pod.
   159  // If the scheduling gate is not found, returns -1.
   160  func gateIndex(p *corev1.Pod) int {
   161  	for i := range p.Spec.SchedulingGates {
   162  		if p.Spec.SchedulingGates[i].Name == SchedulingGateName {
   163  			return i
   164  		}
   165  	}
   166  	return gateNotFound
   167  }
   168  
   169  func isPodTerminated(p *corev1.Pod) bool {
   170  	return p.Status.Phase == corev1.PodFailed || p.Status.Phase == corev1.PodSucceeded
   171  }
   172  
   173  func podSuspended(p *corev1.Pod) bool {
   174  	return isPodTerminated(p) || gateIndex(p) != gateNotFound
   175  }
   176  
   177  func isUnretriablePod(pod corev1.Pod) bool {
   178  	return pod.Annotations[RetriableInGroupAnnotation] == "false"
   179  }
   180  
   181  // isUnretriableGroup returns true if at least one pod in the group
   182  // has a RetriableInGroupAnnotation set to 'false'.
   183  func (p *Pod) isUnretriableGroup() bool {
   184  	if p.unretriableGroup != nil {
   185  		return *p.unretriableGroup
   186  	}
   187  
   188  	for _, pod := range p.list.Items {
   189  		if isUnretriablePod(pod) {
   190  			p.unretriableGroup = ptr.To(true)
   191  			return true
   192  		}
   193  	}
   194  
   195  	p.unretriableGroup = ptr.To(false)
   196  	return false
   197  }
   198  
   199  // IsSuspended returns whether the job is suspended or not.
   200  func (p *Pod) IsSuspended() bool {
   201  	if !p.isGroup {
   202  		return podSuspended(&p.pod)
   203  	}
   204  
   205  	for i := range p.list.Items {
   206  		if podSuspended(&p.list.Items[i]) {
   207  			return true
   208  		}
   209  	}
   210  
   211  	return false
   212  }
   213  
   214  // Suspend will suspend the job.
   215  func (p *Pod) Suspend() {
   216  	// Not implemented because this is not called when JobWithCustomStop is implemented.
   217  }
   218  
   219  // ungatePod removes the kueue scheduling gate from the pod.
   220  // Returns true if the pod has been ungated and false otherwise.
   221  func ungatePod(pod *corev1.Pod) bool {
   222  	idx := gateIndex(pod)
   223  	if idx != gateNotFound {
   224  		pod.Spec.SchedulingGates = append(pod.Spec.SchedulingGates[:idx], pod.Spec.SchedulingGates[idx+1:]...)
   225  		return true
   226  	}
   227  
   228  	return false
   229  }
   230  
   231  // Run will inject the node affinity and podSet counts extracting from workload to job and unsuspend it.
   232  func (p *Pod) Run(ctx context.Context, c client.Client, podSetsInfo []podset.PodSetInfo, recorder record.EventRecorder, msg string) error {
   233  	log := ctrl.LoggerFrom(ctx)
   234  
   235  	if !p.isGroup {
   236  		if len(podSetsInfo) != 1 {
   237  			return fmt.Errorf("%w: expecting 1 pod set got %d", podset.ErrInvalidPodsetInfo, len(podSetsInfo))
   238  		}
   239  
   240  		if ungated := ungatePod(&p.pod); !ungated {
   241  			return nil
   242  		}
   243  
   244  		if err := podset.Merge(&p.pod.ObjectMeta, &p.pod.Spec, podSetsInfo[0]); err != nil {
   245  			return err
   246  		}
   247  
   248  		err := c.Update(ctx, &p.pod)
   249  		if err != nil {
   250  			return err
   251  		}
   252  		if recorder != nil {
   253  			recorder.Event(&p.pod, corev1.EventTypeNormal, jobframework.ReasonStarted, msg)
   254  		}
   255  		return nil
   256  	}
   257  
   258  	var podsToUngate []*corev1.Pod
   259  
   260  	for i := range p.list.Items {
   261  		pod := &p.list.Items[i]
   262  		if ungated := ungatePod(pod); !ungated {
   263  			continue
   264  		}
   265  		podsToUngate = append(podsToUngate, pod)
   266  	}
   267  	if len(podsToUngate) == 0 {
   268  		return nil
   269  	}
   270  
   271  	return parallelize.Until(ctx, len(podsToUngate), func(i int) error {
   272  		pod := podsToUngate[i]
   273  		roleHash, err := getRoleHash(*pod)
   274  		if err != nil {
   275  			return err
   276  		}
   277  
   278  		podSetIndex := slices.IndexFunc(podSetsInfo, func(info podset.PodSetInfo) bool {
   279  			return info.Name == roleHash
   280  		})
   281  		if podSetIndex == -1 {
   282  			return fmt.Errorf("%w: podSetInfo with the name '%s' is not found", podset.ErrInvalidPodsetInfo, roleHash)
   283  		}
   284  
   285  		err = podset.Merge(&pod.ObjectMeta, &pod.Spec, podSetsInfo[podSetIndex])
   286  		if err != nil {
   287  			return err
   288  		}
   289  
   290  		log.V(3).Info("Starting pod in group", "podInGroup", klog.KObj(pod))
   291  		if err := c.Update(ctx, pod); err != nil {
   292  			return err
   293  		}
   294  		if recorder != nil {
   295  			recorder.Event(pod, corev1.EventTypeNormal, jobframework.ReasonStarted, msg)
   296  		}
   297  		return nil
   298  	})
   299  
   300  }
   301  
   302  // RunWithPodSetsInfo will inject the node affinity and podSet counts extracting from workload to job and unsuspend it.
   303  func (p *Pod) RunWithPodSetsInfo(_ []podset.PodSetInfo) error {
   304  	// Not implemented because this is not called when JobWithCustomRun is implemented.
   305  	return fmt.Errorf("RunWithPodSetsInfo is not implemented for the Pod object")
   306  }
   307  
   308  // RestorePodSetsInfo will restore the original node affinity and podSet counts of the job.
   309  func (p *Pod) RestorePodSetsInfo(_ []podset.PodSetInfo) bool {
   310  	// Not implemented since Pods cannot be updated, they can only be terminated.
   311  	return false
   312  }
   313  
   314  // Finished means whether the job is completed/failed or not,
   315  // condition represents the workload finished condition.
   316  func (p *Pod) Finished() (metav1.Condition, bool) {
   317  	finished := true
   318  
   319  	condition := metav1.Condition{
   320  		Type:    kueue.WorkloadFinished,
   321  		Status:  metav1.ConditionTrue,
   322  		Reason:  "JobFinished",
   323  		Message: "Job finished successfully",
   324  	}
   325  
   326  	if !p.isGroup {
   327  		ph := p.pod.Status.Phase
   328  		finished = ph == corev1.PodSucceeded || ph == corev1.PodFailed
   329  
   330  		if ph == corev1.PodFailed {
   331  			condition.Message = "Job failed"
   332  		}
   333  
   334  		return condition, finished
   335  	}
   336  	isActive := false
   337  	succeededCount := 0
   338  
   339  	groupTotalCount, err := p.groupTotalCount()
   340  	if err != nil {
   341  		ctrl.Log.V(2).Error(err, "failed to check if pod group is finished")
   342  		return metav1.Condition{}, false
   343  	}
   344  	for _, pod := range p.list.Items {
   345  		if pod.Status.Phase == corev1.PodSucceeded {
   346  			succeededCount++
   347  		}
   348  
   349  		if !isPodTerminated(&pod) {
   350  			isActive = true
   351  		}
   352  	}
   353  
   354  	unretriableGroup := p.isUnretriableGroup()
   355  
   356  	if succeededCount == groupTotalCount || (!isActive && unretriableGroup) {
   357  		condition.Message = fmt.Sprintf("Pods succeeded: %d/%d.", succeededCount, groupTotalCount)
   358  	} else {
   359  		return metav1.Condition{}, false
   360  	}
   361  
   362  	return condition, finished
   363  }
   364  
   365  // PodSets will build workload podSets corresponding to the job.
   366  func (p *Pod) PodSets() []kueue.PodSet {
   367  	return []kueue.PodSet{
   368  		{
   369  			Name:  kueue.DefaultPodSetName,
   370  			Count: 1,
   371  			Template: corev1.PodTemplateSpec{
   372  				Spec: *p.pod.Spec.DeepCopy(),
   373  			},
   374  		},
   375  	}
   376  }
   377  
   378  // IsActive returns true if there are any running pods.
   379  func (p *Pod) IsActive() bool {
   380  	for i := range p.list.Items {
   381  		if p.list.Items[i].Status.Phase == corev1.PodRunning {
   382  			return true
   383  		}
   384  	}
   385  	return false
   386  }
   387  
   388  func hasPodReadyTrue(conds []corev1.PodCondition) bool {
   389  	for i := range conds {
   390  		c := conds[i]
   391  		if c.Type == corev1.PodReady {
   392  			return c.Status == corev1.ConditionTrue
   393  		}
   394  	}
   395  	return false
   396  }
   397  
   398  // PodsReady instructs whether job derived pods are all ready now.
   399  func (p *Pod) PodsReady() bool {
   400  	if !p.isGroup {
   401  		return hasPodReadyTrue(p.pod.Status.Conditions)
   402  	}
   403  
   404  	for i := range p.list.Items {
   405  		if !hasPodReadyTrue(p.list.Items[i].Status.Conditions) {
   406  			return false
   407  		}
   408  	}
   409  	return true
   410  }
   411  
   412  // GVK returns GVK (Group Version Kind) for the job.
   413  func (p *Pod) GVK() schema.GroupVersionKind {
   414  	return gvk
   415  }
   416  
   417  func (p *Pod) Stop(ctx context.Context, c client.Client, _ []podset.PodSetInfo, stopReason jobframework.StopReason, eventMsg string) ([]client.Object, error) {
   418  	var podsInGroup []corev1.Pod
   419  
   420  	if p.isGroup {
   421  		podsInGroup = p.list.Items
   422  	} else {
   423  		podsInGroup = []corev1.Pod{p.pod}
   424  	}
   425  
   426  	stoppedNow := make([]client.Object, 0)
   427  	for i := range podsInGroup {
   428  		// If the workload is being deleted, delete even finished Pods.
   429  		if !podsInGroup[i].DeletionTimestamp.IsZero() || (stopReason != jobframework.StopReasonWorkloadDeleted && podSuspended(&podsInGroup[i])) {
   430  			continue
   431  		}
   432  		podInGroup := fromObject(&podsInGroup[i])
   433  
   434  		// The podset info is not relevant here, since this should mark the pod's end of life
   435  		pCopy := &corev1.Pod{
   436  			ObjectMeta: metav1.ObjectMeta{
   437  				UID:       podInGroup.pod.UID,
   438  				Name:      podInGroup.pod.Name,
   439  				Namespace: podInGroup.pod.Namespace,
   440  			},
   441  			TypeMeta: podInGroup.pod.TypeMeta,
   442  			Status: corev1.PodStatus{
   443  				Conditions: []corev1.PodCondition{
   444  					{
   445  						Type:   ConditionTypeTerminationTarget,
   446  						Status: corev1.ConditionTrue,
   447  						LastTransitionTime: metav1.Time{
   448  							Time: time.Now(),
   449  						},
   450  						Reason:  "StoppedByKueue",
   451  						Message: eventMsg,
   452  					},
   453  				},
   454  			},
   455  		}
   456  		if err := c.Status().Patch(ctx, pCopy, client.Apply, client.FieldOwner(constants.KueueName)); err != nil && !apierrors.IsNotFound(err) {
   457  			return stoppedNow, err
   458  		}
   459  		if err := c.Delete(ctx, podInGroup.Object()); err != nil && !apierrors.IsNotFound(err) {
   460  			return stoppedNow, err
   461  		}
   462  		stoppedNow = append(stoppedNow, podInGroup.Object())
   463  	}
   464  
   465  	// If related workload is deleted, the generic reconciler will stop the pod group and finalize the workload.
   466  	// However, it won't finalize the pods. Since the Stop method for the pod group deletes all the pods in the
   467  	// group, the pods will be finalized here.
   468  	if p.isGroup && stopReason == jobframework.StopReasonWorkloadDeleted {
   469  		err := p.Finalize(ctx, c)
   470  		if err != nil {
   471  			return stoppedNow, err
   472  		}
   473  	}
   474  
   475  	return stoppedNow, nil
   476  }
   477  
   478  func SetupIndexes(ctx context.Context, indexer client.FieldIndexer) error {
   479  	if err := indexer.IndexField(ctx, &corev1.Pod{}, PodGroupNameCacheKey, IndexPodGroupName); err != nil {
   480  		return err
   481  	}
   482  	if err := jobframework.SetupWorkloadOwnerIndex(ctx, indexer, gvk); err != nil {
   483  		return err
   484  	}
   485  	return nil
   486  }
   487  
   488  func CanSupportIntegration(opts ...jobframework.Option) (bool, error) {
   489  	options := jobframework.ProcessOptions(opts...)
   490  
   491  	v := options.KubeServerVersion.GetServerVersion()
   492  	if v.String() == "" || v.LessThan(kubeversion.KubeVersion1_27) {
   493  		return false, fmt.Errorf("kubernetesVersion %q: %w", v.String(), errPodNoSupportKubeVersion)
   494  	}
   495  	return true, nil
   496  }
   497  
   498  func (p *Pod) Finalize(ctx context.Context, c client.Client) error {
   499  	groupName := podGroupName(p.pod)
   500  
   501  	var podsInGroup corev1.PodList
   502  	if groupName == "" {
   503  		podsInGroup.Items = append(podsInGroup.Items, *p.Object().(*corev1.Pod))
   504  	} else {
   505  		if err := c.List(ctx, &podsInGroup, client.MatchingFields{
   506  			PodGroupNameCacheKey: groupName,
   507  		}, client.InNamespace(p.pod.Namespace)); err != nil {
   508  			return err
   509  		}
   510  	}
   511  
   512  	return parallelize.Until(ctx, len(podsInGroup.Items), func(i int) error {
   513  		pod := &podsInGroup.Items[i]
   514  		if controllerutil.RemoveFinalizer(pod, PodFinalizer) {
   515  			return c.Update(ctx, pod)
   516  		}
   517  		return nil
   518  	})
   519  }
   520  
   521  func (p *Pod) Skip() bool {
   522  	// Skip pod reconciliation, if pod is found, and it's managed label is not set or incorrect.
   523  	if v, ok := p.pod.GetLabels()[ManagedLabelKey]; p.isFound && (!ok || v != ManagedLabelValue) {
   524  		return true
   525  	}
   526  	return false
   527  }
   528  
   529  // podGroupName returns a value of GroupNameLabel for the pod object.
   530  // Returns an empty string if there's no such label.
   531  func podGroupName(p corev1.Pod) string {
   532  	return p.GetLabels()[GroupNameLabel]
   533  }
   534  
   535  // groupTotalCount returns the value of GroupTotalCountAnnotation for the pod being reconciled at the moment.
   536  // It doesn't check if the whole group has the same total group count annotation value.
   537  func (p *Pod) groupTotalCount() (int, error) {
   538  	if podGroupName(p.pod) == "" {
   539  		return 0, fmt.Errorf("pod doesn't have a '%s' label", GroupNameLabel)
   540  	}
   541  
   542  	gtcAnnotation, ok := p.Object().GetAnnotations()[GroupTotalCountAnnotation]
   543  	if !ok {
   544  		return 0, fmt.Errorf("failed to extract '%s' annotation",
   545  			GroupTotalCountAnnotation)
   546  	}
   547  
   548  	gtc, err := strconv.Atoi(gtcAnnotation)
   549  	if err != nil {
   550  		return 0, err
   551  	}
   552  
   553  	if gtc < 1 {
   554  		return 0, fmt.Errorf("incorrect annotation value '%s=%s': group total count should be greater than zero",
   555  			GroupTotalCountAnnotation, gtcAnnotation)
   556  	}
   557  
   558  	return gtc, nil
   559  }
   560  
   561  // getRoleHash will filter all the fields of the pod that are relevant to admission (pod role) and return a sha256
   562  // checksum of those fields. This is used to group the pods of the same roles when interacting with the workload.
   563  func getRoleHash(p corev1.Pod) (string, error) {
   564  	if roleHash, ok := p.Annotations[RoleHashAnnotation]; ok {
   565  		return roleHash, nil
   566  	}
   567  
   568  	shape := map[string]interface{}{
   569  		"spec": map[string]interface{}{
   570  			"initContainers":            containersShape(p.Spec.InitContainers),
   571  			"containers":                containersShape(p.Spec.Containers),
   572  			"nodeSelector":              p.Spec.NodeSelector,
   573  			"affinity":                  p.Spec.Affinity,
   574  			"tolerations":               p.Spec.Tolerations,
   575  			"runtimeClassName":          p.Spec.RuntimeClassName,
   576  			"priority":                  p.Spec.Priority,
   577  			"topologySpreadConstraints": p.Spec.TopologySpreadConstraints,
   578  			"overhead":                  p.Spec.Overhead,
   579  			"resourceClaims":            p.Spec.ResourceClaims,
   580  		},
   581  	}
   582  
   583  	shapeJson, err := json.Marshal(shape)
   584  	if err != nil {
   585  		return "", err
   586  	}
   587  
   588  	// Trim hash to 8 characters and return
   589  	return fmt.Sprintf("%x", sha256.Sum256(shapeJson))[:8], nil
   590  }
   591  
   592  // Load loads all pods in the group
   593  func (p *Pod) Load(ctx context.Context, c client.Client, key *types.NamespacedName) (removeFinalizers bool, err error) {
   594  	nsKey := strings.Split(key.Namespace, "/")
   595  
   596  	if len(nsKey) == 1 {
   597  		if err := c.Get(ctx, *key, &p.pod); err != nil {
   598  			return apierrors.IsNotFound(err), err
   599  		}
   600  		p.isFound = true
   601  
   602  		// If the key.Namespace doesn't contain a "group/" prefix, even though
   603  		// the pod has a group name, there's something wrong with the event handler.
   604  		if podGroupName(p.pod) != "" {
   605  			return false, errIncorrectReconcileRequest
   606  		}
   607  
   608  		return !p.pod.DeletionTimestamp.IsZero(), nil
   609  	}
   610  
   611  	p.isGroup = true
   612  
   613  	key.Namespace = nsKey[1]
   614  	p.key = *key
   615  
   616  	// Check the expectations before listing pods, otherwise a new pod can sneak in
   617  	// and update the expectations after we've retrieved active pods from the store.
   618  	p.satisfiedExcessPods = p.excessPodExpectations.Satisfied(ctrl.LoggerFrom(ctx), *key)
   619  
   620  	if err := c.List(ctx, &p.list, client.MatchingFields{
   621  		PodGroupNameCacheKey: key.Name,
   622  	}, client.InNamespace(key.Namespace)); err != nil {
   623  		return false, err
   624  	}
   625  
   626  	if len(p.list.Items) > 0 {
   627  		p.isFound = true
   628  		p.pod = p.list.Items[0]
   629  		key.Name = p.pod.Name
   630  	}
   631  
   632  	// If none of the pods in group are found,
   633  	// the respective workload should be finalized
   634  	return !p.isFound, nil
   635  }
   636  
   637  func (p *Pod) constructGroupPodSets() ([]kueue.PodSet, error) {
   638  	return constructGroupPodSets(p.list.Items)
   639  }
   640  
   641  func constructGroupPodSets(pods []corev1.Pod) ([]kueue.PodSet, error) {
   642  	var resultPodSets []kueue.PodSet
   643  
   644  	for _, podInGroup := range pods {
   645  		if !isPodRunnableOrSucceeded(&podInGroup) {
   646  			continue
   647  		}
   648  
   649  		roleHash, err := getRoleHash(podInGroup)
   650  		if err != nil {
   651  			return nil, fmt.Errorf("failed to calculate pod role hash: %w", err)
   652  		}
   653  
   654  		podRoleFound := false
   655  		for psi := range resultPodSets {
   656  			if resultPodSets[psi].Name == roleHash {
   657  				podRoleFound = true
   658  				resultPodSets[psi].Count++
   659  			}
   660  		}
   661  
   662  		if !podRoleFound {
   663  			podSet := fromObject(&podInGroup).PodSets()
   664  			podSet[0].Name = roleHash
   665  
   666  			resultPodSets = append(resultPodSets, podSet[0])
   667  		}
   668  	}
   669  
   670  	slices.SortFunc(resultPodSets, func(a, b kueue.PodSet) int {
   671  		return cmp.Compare(a.Name, b.Name)
   672  	})
   673  
   674  	return resultPodSets, nil
   675  }
   676  
   677  // validatePodGroupMetadata validates metadata of all members of the pod group
   678  func (p *Pod) validatePodGroupMetadata(r record.EventRecorder, activePods []corev1.Pod) error {
   679  	groupTotalCount, err := p.groupTotalCount()
   680  	if err != nil {
   681  		return err
   682  	}
   683  	originalQueue := jobframework.QueueName(p)
   684  
   685  	if len(activePods) < groupTotalCount {
   686  		errMsg := fmt.Sprintf("'%s' group has fewer runnable pods than expected", podGroupName(p.pod))
   687  		r.Eventf(p.Object(), corev1.EventTypeWarning, jobframework.ReasonErrWorkloadCompose, errMsg)
   688  		return jobframework.UnretryableError(errMsg)
   689  	}
   690  
   691  	for _, podInGroup := range p.list.Items {
   692  		// Skip failed pods
   693  		if podInGroup.Status.Phase == corev1.PodFailed {
   694  			continue
   695  		}
   696  
   697  		if podInGroupQueue := jobframework.QueueNameForObject(&podInGroup); podInGroupQueue != originalQueue {
   698  			return jobframework.UnretryableError(fmt.Sprintf("pods '%s' and '%s' has different queue names: %s!=%s",
   699  				p.pod.GetName(), podInGroup.GetName(),
   700  				originalQueue, podInGroupQueue))
   701  		}
   702  
   703  		tc, err := strconv.Atoi(podInGroup.GetAnnotations()[GroupTotalCountAnnotation])
   704  		if err != nil {
   705  			return fmt.Errorf("failed to extract '%s' annotation from the pod '%s': %w",
   706  				GroupTotalCountAnnotation,
   707  				podInGroup.GetName(),
   708  				err)
   709  		}
   710  		if tc != groupTotalCount {
   711  			return jobframework.UnretryableError(fmt.Sprintf("pods '%s' and '%s' has different '%s' values: %d!=%d",
   712  				p.pod.GetName(), podInGroup.GetName(),
   713  				GroupTotalCountAnnotation,
   714  				groupTotalCount, tc))
   715  		}
   716  	}
   717  
   718  	return nil
   719  }
   720  
   721  // runnableOrSucceededPods returns a slice of active pods in the group
   722  func (p *Pod) runnableOrSucceededPods() []corev1.Pod {
   723  	return utilslices.Pick(p.list.Items, isPodRunnableOrSucceeded)
   724  }
   725  
   726  // notRunnableNorSucceededPods returns a slice of inactive pods in the group
   727  func (p *Pod) notRunnableNorSucceededPods() []corev1.Pod {
   728  	return utilslices.Pick(p.list.Items, func(p *corev1.Pod) bool { return !isPodRunnableOrSucceeded(p) })
   729  }
   730  
   731  // isPodRunnableOrSucceeded returns whether the Pod can eventually run, is Running or Succeeded.
   732  // A Pod cannot run if it's gated and has a deletionTimestamp.
   733  func isPodRunnableOrSucceeded(p *corev1.Pod) bool {
   734  	if p.DeletionTimestamp != nil && len(p.Spec.SchedulingGates) > 0 {
   735  		return false
   736  	}
   737  	return p.Status.Phase != corev1.PodFailed
   738  }
   739  
   740  // lastActiveTime returns the last timestamp on which the pod was observed active:
   741  // - the time the pod was declared Failed
   742  // - the deletion time
   743  func lastActiveTime(p *corev1.Pod) time.Time {
   744  	lastTransition := metav1.Now()
   745  	for _, c := range p.Status.Conditions {
   746  		if c.Type == corev1.ContainersReady {
   747  			if c.Status == corev1.ConditionFalse && c.Reason == string(corev1.PodFailed) {
   748  				lastTransition = c.LastTransitionTime
   749  			}
   750  			break
   751  		}
   752  	}
   753  	deletionTime := ptr.Deref(p.DeletionTimestamp, metav1.Now())
   754  	if lastTransition.Before(&deletionTime) {
   755  		return lastTransition.Time
   756  	}
   757  	return deletionTime.Time
   758  }
   759  
   760  // sortInactivePods sorts the provided pods slice based on:
   761  // - finalizer state (pods with finalizers are first)
   762  // - lastActiveTime (pods that were active last are first)
   763  // - creation timestamp (newer pods are first)
   764  func sortInactivePods(inactivePods []corev1.Pod) {
   765  	sort.Slice(inactivePods, func(i, j int) bool {
   766  		pi := &inactivePods[i]
   767  		pj := &inactivePods[j]
   768  		iFin := slices.Contains(pi.Finalizers, PodFinalizer)
   769  		jFin := slices.Contains(pj.Finalizers, PodFinalizer)
   770  		if iFin != jFin {
   771  			return iFin
   772  		}
   773  
   774  		iLastActive := lastActiveTime(pi)
   775  		jLastActive := lastActiveTime(pj)
   776  
   777  		if iLastActive.Equal(jLastActive) {
   778  			return pi.CreationTimestamp.Before(&pj.CreationTimestamp)
   779  		}
   780  		return jLastActive.Before(iLastActive)
   781  	})
   782  }
   783  
   784  // sortActivePods sorts the provided pods slice based on:
   785  // - finalizer state (pods with no finalizers are last)
   786  // - gated state (pods that are still gated are last)
   787  // - creation timestamp (newer pods are last)
   788  func sortActivePods(activePods []corev1.Pod) {
   789  	// Sort active pods by creation timestamp
   790  	sort.Slice(activePods, func(i, j int) bool {
   791  		pi := &activePods[i]
   792  		pj := &activePods[j]
   793  		iFin := slices.Contains(pi.Finalizers, PodFinalizer)
   794  		jFin := slices.Contains(pj.Finalizers, PodFinalizer)
   795  		// Prefer to keep pods that have a finalizer.
   796  		if iFin != jFin {
   797  			return iFin
   798  		}
   799  		iGated := gateIndex(pi) != gateNotFound
   800  		jGated := gateIndex(pj) != gateNotFound
   801  		// Prefer to keep pods that aren't gated.
   802  		if iGated != jGated {
   803  			return !iGated
   804  		}
   805  		return pi.CreationTimestamp.Before(&pj.CreationTimestamp)
   806  	})
   807  }
   808  
   809  func (p *Pod) removeExcessPods(ctx context.Context, c client.Client, r record.EventRecorder, extraPods []corev1.Pod) error {
   810  	if len(extraPods) == 0 {
   811  		return nil
   812  	}
   813  
   814  	log := ctrl.LoggerFrom(ctx)
   815  
   816  	// Extract all the latest created extra pods
   817  	extraPodsUIDs := utilslices.Map(extraPods, func(p *corev1.Pod) types.UID { return p.UID })
   818  	p.excessPodExpectations.ExpectUIDs(log, p.key, extraPodsUIDs)
   819  
   820  	// Finalize and delete the active pods created last
   821  	err := parallelize.Until(ctx, len(extraPods), func(i int) error {
   822  		pod := extraPods[i]
   823  		if controllerutil.RemoveFinalizer(&pod, PodFinalizer) {
   824  			log.V(3).Info("Finalizing excess pod in group", "excessPod", klog.KObj(&pod))
   825  			if err := c.Update(ctx, &pod); err != nil {
   826  				// We won't observe this cleanup in the event handler.
   827  				p.excessPodExpectations.ObservedUID(log, p.key, pod.UID)
   828  				return err
   829  			}
   830  		}
   831  		if pod.DeletionTimestamp.IsZero() {
   832  			log.V(3).Info("Deleting excess pod in group", "excessPod", klog.KObj(&pod))
   833  			if err := c.Delete(ctx, &pod); err != nil {
   834  				// We won't observe this cleanup in the event handler.
   835  				p.excessPodExpectations.ObservedUID(log, p.key, pod.UID)
   836  				return err
   837  			}
   838  			r.Event(&pod, corev1.EventTypeNormal, ReasonExcessPodDeleted, "Excess pod deleted")
   839  		}
   840  		return nil
   841  	})
   842  	if err != nil {
   843  		return err
   844  	}
   845  	return nil
   846  }
   847  
   848  func (p *Pod) finalizePods(ctx context.Context, c client.Client, extraPods []corev1.Pod) error {
   849  	if len(extraPods) == 0 {
   850  		return nil
   851  	}
   852  
   853  	log := ctrl.LoggerFrom(ctx)
   854  
   855  	// Extract all the latest created extra pods
   856  	extraPodsUIDs := utilslices.Map(extraPods, func(p *corev1.Pod) types.UID { return p.UID })
   857  	p.excessPodExpectations.ExpectUIDs(log, p.key, extraPodsUIDs)
   858  
   859  	err := parallelize.Until(ctx, len(extraPods), func(i int) error {
   860  		pod := extraPods[i]
   861  		if controllerutil.RemoveFinalizer(&pod, PodFinalizer) {
   862  			log.V(3).Info("Finalizing pod in group", "Pod", klog.KObj(&pod))
   863  			if err := c.Update(ctx, &pod); err != nil {
   864  				// We won't observe this cleanup in the event handler.
   865  				p.excessPodExpectations.ObservedUID(log, p.key, pod.UID)
   866  				return err
   867  			}
   868  		} else {
   869  			// We don't expect an event in this case.
   870  			p.excessPodExpectations.ObservedUID(log, p.key, pod.UID)
   871  		}
   872  		return nil
   873  	})
   874  	if err != nil {
   875  		return err
   876  	}
   877  	return nil
   878  }
   879  
   880  func (p *Pod) ensureWorkloadOwnedByAllMembers(ctx context.Context, c client.Client, r record.EventRecorder, workload *kueue.Workload) error {
   881  	oldOwnersCnt := len(workload.GetOwnerReferences())
   882  	for _, pod := range p.list.Items {
   883  		if err := controllerutil.SetOwnerReference(&pod, workload, c.Scheme()); err != nil {
   884  			return err
   885  		}
   886  	}
   887  	newOwnersCnt := len(workload.GetOwnerReferences())
   888  	if addedOwnersCnt := newOwnersCnt - oldOwnersCnt; addedOwnersCnt > 0 {
   889  		log := ctrl.LoggerFrom(ctx).WithValues("workload", klog.KObj(workload))
   890  		log.V(4).Info("Adding owner references for workload", "count", addedOwnersCnt)
   891  		err := c.Update(ctx, workload)
   892  		if err == nil {
   893  			r.Eventf(workload, corev1.EventTypeNormal, ReasonOwnerReferencesAdded, fmt.Sprintf("Added %d owner reference(s)", addedOwnersCnt))
   894  		}
   895  		return err
   896  	}
   897  	return nil
   898  }
   899  
   900  func (p *Pod) ConstructComposableWorkload(ctx context.Context, c client.Client, r record.EventRecorder) (*kueue.Workload, error) {
   901  	object := p.Object()
   902  	log := ctrl.LoggerFrom(ctx)
   903  
   904  	wl := &kueue.Workload{
   905  		ObjectMeta: metav1.ObjectMeta{
   906  			Namespace:  p.pod.GetNamespace(),
   907  			Labels:     map[string]string{},
   908  			Finalizers: []string{kueue.ResourceInUseFinalizerName},
   909  		},
   910  		Spec: kueue.WorkloadSpec{
   911  			QueueName: jobframework.QueueName(p),
   912  		},
   913  	}
   914  
   915  	// Construct workload for a single pod
   916  	if !p.isGroup {
   917  		wl.Spec.PodSets = p.PodSets()
   918  
   919  		wl.Name = jobframework.GetWorkloadNameForOwnerWithGVK(p.pod.GetName(), p.GVK())
   920  		jobUid := string(object.GetUID())
   921  		if errs := validation.IsValidLabelValue(jobUid); len(errs) == 0 {
   922  			wl.Labels[controllerconsts.JobUIDLabel] = jobUid
   923  		} else {
   924  			log.V(2).Info(
   925  				"Validation of the owner job UID label has failed. Creating workload without the label.",
   926  				"ValidationErrors", errs,
   927  				"LabelValue", jobUid,
   928  			)
   929  		}
   930  
   931  		// add the controller ref
   932  		if err := controllerutil.SetControllerReference(object, wl, c.Scheme()); err != nil {
   933  			return nil, err
   934  		}
   935  
   936  		return wl, nil
   937  	}
   938  
   939  	if err := p.finalizePods(ctx, c, p.notRunnableNorSucceededPods()); err != nil {
   940  		return nil, err
   941  	}
   942  
   943  	activePods := p.runnableOrSucceededPods()
   944  
   945  	if wl.Annotations == nil {
   946  		wl.Annotations = make(map[string]string)
   947  	}
   948  	wl.Annotations[IsGroupWorkloadAnnotationKey] = IsGroupWorkloadAnnotationValue
   949  
   950  	err := p.validatePodGroupMetadata(r, activePods)
   951  	if err != nil {
   952  		return nil, err
   953  	}
   954  
   955  	groupTotalCount, err := p.groupTotalCount()
   956  	if err != nil {
   957  		return nil, err
   958  	}
   959  
   960  	// Cleanup extra pods if there's any
   961  	if excessPodsCount := len(activePods) - groupTotalCount; excessPodsCount > 0 {
   962  		sortActivePods(activePods)
   963  		err = p.removeExcessPods(ctx, c, r, activePods[len(activePods)-excessPodsCount:])
   964  		if err != nil {
   965  			return nil, err
   966  		}
   967  		p.list.Items = activePods[:len(activePods)-excessPodsCount]
   968  	}
   969  
   970  	// Construct workload for a pod group
   971  	wl.Spec.PodSets, err = p.constructGroupPodSets()
   972  	if err != nil {
   973  		if jobframework.IsUnretryableError(err) {
   974  			r.Eventf(object, corev1.EventTypeWarning, jobframework.ReasonErrWorkloadCompose, err.Error())
   975  		}
   976  		return nil, err
   977  	}
   978  
   979  	if len(wl.Spec.PodSets) > 8 {
   980  		return nil, jobframework.UnretryableError(errMsgIncorrectGroupRoleCount)
   981  	}
   982  
   983  	wl.Name = podGroupName(p.pod)
   984  	for _, pod := range p.list.Items {
   985  		if err := controllerutil.SetOwnerReference(&pod, wl, c.Scheme()); err != nil {
   986  			return nil, err
   987  		}
   988  	}
   989  
   990  	return wl, nil
   991  }
   992  
   993  func (p *Pod) ListChildWorkloads(ctx context.Context, c client.Client, key types.NamespacedName) (*kueue.WorkloadList, error) {
   994  	log := ctrl.LoggerFrom(ctx)
   995  
   996  	workloads := &kueue.WorkloadList{}
   997  
   998  	// Get related workloads for the pod group
   999  	if p.isGroup {
  1000  		workload := &kueue.Workload{}
  1001  		if err := c.Get(ctx, types.NamespacedName{Name: key.Name, Namespace: key.Namespace}, workload); err != nil {
  1002  			if apierrors.IsNotFound(err) {
  1003  				return workloads, nil
  1004  			}
  1005  			log.Error(err, "Unable to get related workload for the pod group")
  1006  			return nil, err
  1007  		}
  1008  
  1009  		workloads.Items = []kueue.Workload{*workload}
  1010  		return workloads, nil
  1011  	}
  1012  
  1013  	// List related workloads for the single pod
  1014  	if err := c.List(ctx, workloads, client.InNamespace(key.Namespace),
  1015  		client.MatchingFields{jobframework.GetOwnerKey(gvk): key.Name}); err != nil {
  1016  		log.Error(err, "Unable to get related workload for the single pod")
  1017  		return nil, err
  1018  	}
  1019  
  1020  	return workloads, nil
  1021  }
  1022  
  1023  func (p *Pod) FindMatchingWorkloads(ctx context.Context, c client.Client, r record.EventRecorder) (*kueue.Workload, []*kueue.Workload, error) {
  1024  	log := ctrl.LoggerFrom(ctx)
  1025  	groupName := podGroupName(p.pod)
  1026  
  1027  	if groupName == "" {
  1028  		return jobframework.FindMatchingWorkloads(ctx, c, p)
  1029  	}
  1030  
  1031  	// Find a matching workload first if there is one.
  1032  	workload := &kueue.Workload{}
  1033  	if err := c.Get(ctx, types.NamespacedName{Name: groupName, Namespace: p.pod.GetNamespace()}, workload); err != nil {
  1034  		if apierrors.IsNotFound(err) {
  1035  			return nil, nil, nil
  1036  		}
  1037  		log.Error(err, "Unable to get related workload")
  1038  		return nil, nil, err
  1039  	}
  1040  
  1041  	// Cleanup excess pods for each workload pod set (role)
  1042  	activePods := p.runnableOrSucceededPods()
  1043  	inactivePods := p.notRunnableNorSucceededPods()
  1044  
  1045  	var keptPods []corev1.Pod
  1046  	var excessActivePods []corev1.Pod
  1047  	var replacedInactivePods []corev1.Pod
  1048  
  1049  	for _, ps := range workload.Spec.PodSets {
  1050  		// Find all the active and inactive pods of the role
  1051  		var roleHashErrors []error
  1052  		hasRoleFunc := func(p *corev1.Pod) bool {
  1053  			hash, err := getRoleHash(*p)
  1054  			if err != nil {
  1055  				roleHashErrors = append(roleHashErrors, err)
  1056  				return false
  1057  			}
  1058  			return hash == ps.Name
  1059  		}
  1060  		roleActivePods := utilslices.Pick(activePods, hasRoleFunc)
  1061  		roleInactivePods := utilslices.Pick(inactivePods, hasRoleFunc)
  1062  		if len(roleHashErrors) > 0 {
  1063  			return nil, nil, fmt.Errorf("failed to calculate pod role hash: %w", errors.Join(roleHashErrors...))
  1064  		}
  1065  
  1066  		if excessCount := len(roleActivePods) - int(ps.Count); excessCount > 0 {
  1067  			sortActivePods(roleActivePods)
  1068  			excessActivePods = append(excessActivePods, roleActivePods[len(roleActivePods)-excessCount:]...)
  1069  			keptPods = append(keptPods, roleActivePods[:len(roleActivePods)-excessCount]...)
  1070  		} else {
  1071  			keptPods = append(keptPods, roleActivePods...)
  1072  		}
  1073  
  1074  		if finalizeablePodsCount := min(len(roleInactivePods), len(roleInactivePods)+len(roleActivePods)-int(ps.Count)); finalizeablePodsCount > 0 {
  1075  			sortInactivePods(roleInactivePods)
  1076  			replacedInactivePods = append(replacedInactivePods, roleInactivePods[len(roleInactivePods)-finalizeablePodsCount:]...)
  1077  			keptPods = append(keptPods, roleInactivePods[:len(roleInactivePods)-finalizeablePodsCount]...)
  1078  		} else {
  1079  			keptPods = append(keptPods, roleInactivePods...)
  1080  		}
  1081  	}
  1082  
  1083  	jobPodSets, err := constructGroupPodSets(keptPods)
  1084  	if err != nil {
  1085  		return nil, nil, err
  1086  	}
  1087  
  1088  	if len(keptPods) == 0 || !p.equivalentToWorkload(workload, jobPodSets) {
  1089  		return nil, []*kueue.Workload{workload}, nil
  1090  	}
  1091  
  1092  	// Do not clean up more pods until observing previous operations
  1093  	if !p.satisfiedExcessPods {
  1094  		return nil, nil, errPendingOps
  1095  	}
  1096  
  1097  	p.list.Items = keptPods
  1098  	if err := p.ensureWorkloadOwnedByAllMembers(ctx, c, r, workload); err != nil {
  1099  		return nil, nil, err
  1100  	}
  1101  
  1102  	if err := p.removeExcessPods(ctx, c, r, excessActivePods); err != nil {
  1103  		return nil, nil, err
  1104  	}
  1105  
  1106  	if err := p.finalizePods(ctx, c, replacedInactivePods); err != nil {
  1107  		return nil, nil, err
  1108  	}
  1109  	return workload, []*kueue.Workload{}, nil
  1110  }
  1111  
  1112  func (p *Pod) equivalentToWorkload(wl *kueue.Workload, jobPodSets []kueue.PodSet) bool {
  1113  	workloadFinished := apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadFinished)
  1114  
  1115  	if wl.GetName() != podGroupName(p.pod) {
  1116  		return false
  1117  	}
  1118  
  1119  	if !workloadFinished && len(wl.Spec.PodSets) < len(jobPodSets) {
  1120  		return false
  1121  	}
  1122  
  1123  	// Match the current state of pod sets
  1124  	// to the pod set info in the workload
  1125  	j := -1
  1126  	for i := range jobPodSets {
  1127  		for j++; j < len(wl.Spec.PodSets); j++ {
  1128  			if jobPodSets[i].Name == wl.Spec.PodSets[j].Name {
  1129  				break
  1130  			}
  1131  		}
  1132  		// If actual pod set info has a role that workload doesn't have,
  1133  		// consider workload not an equivalent to the pod group
  1134  		if j == len(wl.Spec.PodSets) {
  1135  			return false
  1136  		}
  1137  		// Check counts for found pod sets
  1138  		if !workloadFinished && wl.Spec.PodSets[j].Count < jobPodSets[i].Count {
  1139  			return false
  1140  		}
  1141  	}
  1142  
  1143  	return true
  1144  }
  1145  
  1146  func (p *Pod) ReclaimablePods() ([]kueue.ReclaimablePod, error) {
  1147  	if !p.isGroup {
  1148  		return []kueue.ReclaimablePod{}, nil
  1149  	}
  1150  
  1151  	var result []kueue.ReclaimablePod
  1152  	for _, pod := range p.list.Items {
  1153  		if pod.Status.Phase == corev1.PodSucceeded {
  1154  			roleHash, err := getRoleHash(pod)
  1155  			if err != nil {
  1156  				return nil, err
  1157  			}
  1158  
  1159  			roleFound := false
  1160  			for i := range result {
  1161  				if result[i].Name == roleHash {
  1162  					result[i].Count++
  1163  					roleFound = true
  1164  				}
  1165  			}
  1166  
  1167  			if !roleFound {
  1168  				result = append(result, kueue.ReclaimablePod{Name: roleHash, Count: 1})
  1169  			}
  1170  		}
  1171  	}
  1172  
  1173  	return result, nil
  1174  }
  1175  
  1176  func IsPodOwnerManagedByKueue(p *Pod) bool {
  1177  	if owner := metav1.GetControllerOf(&p.pod); owner != nil {
  1178  		return jobframework.IsOwnerManagedByKueue(owner) || (owner.Kind == "RayCluster" && strings.HasPrefix(owner.APIVersion, "ray.io/v1alpha1"))
  1179  	}
  1180  	return false
  1181  }
  1182  
  1183  func GetWorkloadNameForPod(podName string) string {
  1184  	return jobframework.GetWorkloadNameForOwnerWithGVK(podName, gvk)
  1185  }