github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/podkiller/killer.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // todo: move APIServer update/patch/create actions to client package 18 19 package podkiller 20 21 import ( 22 "context" 23 "fmt" 24 "time" 25 26 "github.com/pkg/errors" 27 v1 "k8s.io/api/core/v1" 28 policy "k8s.io/api/policy/v1beta1" 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/util/wait" 32 "k8s.io/client-go/kubernetes" 33 "k8s.io/client-go/tools/events" 34 cri "k8s.io/cri-api/pkg/apis" 35 "k8s.io/klog/v2" 36 "k8s.io/kubernetes/pkg/kubelet/container" 37 "k8s.io/kubernetes/pkg/kubelet/cri/remote" 38 39 "github.com/kubewharf/katalyst-core/pkg/config" 40 "github.com/kubewharf/katalyst-core/pkg/consts" 41 "github.com/kubewharf/katalyst-core/pkg/metrics" 42 ) 43 44 const ( 45 MetricsNameKillPod = "kill_pod" 46 MetricsNameKillContainer = "kill_container" 47 ) 48 49 type InitFunc func(conf *config.Configuration, client kubernetes.Interface, recorder events.EventRecorder, emitter metrics.MetricEmitter) (Killer, error) 50 51 // Killer implements pod eviction logic. 52 type Killer interface { 53 // Name returns name as identifier for a specific Killer. 54 Name() string 55 56 // Evict for given pods and corresponding graceful period seconds. 57 Evict(ctx context.Context, pod *v1.Pod, gracePeriodSeconds int64, reason, plugin string) error 58 } 59 60 // DummyKiller is a stub implementation for Killer interface. 61 type DummyKiller struct{} 62 63 func (d DummyKiller) Name() string { return consts.KillerNameFakeKiller } 64 func (d DummyKiller) Evict(_ context.Context, _ *v1.Pod, _ int64, _, _ string) error { return nil } 65 66 var _ Killer = DummyKiller{} 67 68 // EvictionAPIKiller implements Killer interface it evict those given pods by 69 // eviction API, and wait until pods have actually been deleted. 70 type EvictionAPIKiller struct { 71 emitter metrics.MetricEmitter 72 client kubernetes.Interface 73 recorder events.EventRecorder 74 } 75 76 // NewEvictionAPIKiller returns a new updater Object. 77 func NewEvictionAPIKiller(_ *config.Configuration, client kubernetes.Interface, recorder events.EventRecorder, emitter metrics.MetricEmitter) (Killer, error) { 78 return &EvictionAPIKiller{ 79 emitter: emitter, 80 client: client, 81 recorder: recorder, 82 }, nil 83 } 84 85 func (e *EvictionAPIKiller) Name() string { return consts.KillerNameEvictionKiller } 86 87 func (e *EvictionAPIKiller) Evict(_ context.Context, pod *v1.Pod, gracePeriodSeconds int64, reason, plugin string) error { 88 const ( 89 policyGroupVersion = "policy/v1beta1" 90 evictionKind = "Eviction" 91 ) 92 93 evictPod := func(pod *v1.Pod, gracePeriodOverride int64) error { 94 klog.Infof("[eviction-killer] send request for pod %v/%v", pod.Namespace, pod.Name) 95 96 deleteOptions := &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriodOverride} 97 eviction := &policy.Eviction{ 98 TypeMeta: metav1.TypeMeta{ 99 APIVersion: policyGroupVersion, 100 Kind: evictionKind, 101 }, 102 ObjectMeta: metav1.ObjectMeta{ 103 Name: pod.Name, 104 Namespace: pod.Namespace, 105 }, 106 DeleteOptions: deleteOptions, 107 } 108 return e.client.PolicyV1beta1().Evictions(eviction.Namespace).Evict(context.Background(), eviction) 109 } 110 111 return evict(e.client, e.recorder, e.emitter, pod, gracePeriodSeconds, reason, plugin, evictPod) 112 } 113 114 // DeletionAPIKiller implements Killer interface it evict those 115 // given pods by calling pod deletion API. 116 type DeletionAPIKiller struct { 117 emitter metrics.MetricEmitter 118 client kubernetes.Interface 119 recorder events.EventRecorder 120 } 121 122 func NewDeletionAPIKiller(_ *config.Configuration, client kubernetes.Interface, recorder events.EventRecorder, emitter metrics.MetricEmitter) (Killer, error) { 123 return &DeletionAPIKiller{ 124 emitter: emitter, 125 client: client, 126 recorder: recorder, 127 }, nil 128 } 129 130 func (d *DeletionAPIKiller) Name() string { return consts.KillerNameDeletionKiller } 131 132 func (d *DeletionAPIKiller) Evict(ctx context.Context, pod *v1.Pod, gracePeriodSeconds int64, reason, plugin string) error { 133 evictPod := func(pod *v1.Pod, gracePeriodOverride int64) error { 134 klog.Infof("[deletion-killer] send request for pod %v/%v", pod.Namespace, pod.Name) 135 136 deleteOptions := metav1.DeleteOptions{GracePeriodSeconds: &gracePeriodOverride} 137 return d.client.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, deleteOptions) 138 } 139 140 return evict(d.client, d.recorder, d.emitter, pod, gracePeriodSeconds, reason, plugin, evictPod) 141 } 142 143 // getWaitingPeriod get waiting period from graceful period. 144 func getWaitingPeriod(gracePeriod int64) time.Duration { 145 // the default timeout is relative to the grace period; 146 // settle on 10s to wait for kubelet->runtime traffic to complete in sigkill 147 timeout := gracePeriod + gracePeriod/2 148 minTimeout := int64(10) 149 if timeout < minTimeout { 150 timeout = minTimeout 151 } 152 return time.Duration(timeout) * time.Second 153 } 154 155 // waitForDeleted wait util pods have been physically deleted from APIServer. 156 func waitForDeleted(client kubernetes.Interface, pods []*v1.Pod, timeout time.Duration) ([]*v1.Pod, error) { 157 const interval = time.Second * 5 158 err := wait.PollImmediate(interval, timeout, func() (bool, error) { 159 var pendingPods []*v1.Pod 160 for i, pod := range pods { 161 // todo: refer through ETCD to make sure pods are physically deleted (is it reasonable?) 162 p, err := client.CoreV1().Pods(pod.Namespace).Get(context.Background(), pod.Name, metav1.GetOptions{}) 163 if apierrors.IsNotFound(err) || (p != nil && p.ObjectMeta.UID != pod.ObjectMeta.UID) { 164 continue 165 } else if err != nil { 166 return false, err 167 } else { 168 pendingPods = append(pendingPods, pods[i]) 169 } 170 } 171 pods = pendingPods 172 if len(pendingPods) > 0 { 173 return false, nil 174 } 175 return true, nil 176 }) 177 return pods, err 178 } 179 180 // deleteWithRetry keeping calling deletion func until it checks pods 181 // have been deleted timeout and return an error if it doesn't get a 182 // callback within a reasonable time. 183 func deleteWithRetry(pod *v1.Pod, gracePeriod int64, timeoutDuration time.Duration, 184 evictPod func(_ *v1.Pod, gracePeriod int64) error, 185 ) error { 186 timeoutTick := time.NewTimer(timeoutDuration) 187 for { 188 success := false 189 select { 190 case <-timeoutTick.C: 191 return errors.Errorf("eviction request did not complete within %v", timeoutDuration) 192 default: 193 err := evictPod(pod, gracePeriod) 194 if err == nil { 195 success = true 196 break 197 } else if apierrors.IsNotFound(err) { 198 success = true 199 break 200 } else if apierrors.IsTooManyRequests(err) { 201 delay, retry := apierrors.SuggestsClientDelay(err) 202 if !retry { 203 delay = 5 204 } 205 time.Sleep(time.Duration(delay) * time.Second) 206 } else { 207 return errors.Errorf("error when evicting pod %q: %v", pod.Name, err) 208 } 209 } 210 211 if success { 212 break 213 } 214 } 215 216 return nil 217 } 218 219 // evict all killer implementations will perform evict actions. 220 func evict(client kubernetes.Interface, recorder events.EventRecorder, emitter metrics.MetricEmitter, pod *v1.Pod, 221 gracePeriodSeconds int64, reason, plugin string, evictPod func(_ *v1.Pod, gracePeriod int64) error, 222 ) error { 223 timeoutDuration := getWaitingPeriod(gracePeriodSeconds) 224 klog.Infof("[killer] evict pod %v/%v with graceful seconds %v", pod.Namespace, pod.Name, gracePeriodSeconds) 225 226 if err := deleteWithRetry(pod, gracePeriodSeconds, timeoutDuration, evictPod); err != nil { 227 recorder.Eventf(pod, nil, v1.EventTypeWarning, consts.EventReasonEvictFailed, consts.EventActionEvicting, 228 fmt.Sprintf("Evict failed: %s", err)) 229 _ = emitter.StoreInt64(MetricsNameKillPod, 1, metrics.MetricTypeNameRaw, 230 metrics.MetricTag{Key: "state", Val: "failed"}, 231 metrics.MetricTag{Key: "pod_ns", Val: pod.Namespace}, 232 metrics.MetricTag{Key: "pod_name", Val: pod.Name}, 233 metrics.MetricTag{Key: "plugin_name", Val: plugin}) 234 235 return fmt.Errorf("evict failed %v", err) 236 } 237 238 recorder.Eventf(pod, nil, v1.EventTypeNormal, consts.EventReasonEvictCreated, consts.EventActionEvicting, 239 "Successfully create eviction; reason: %s", reason) 240 _ = emitter.StoreInt64(MetricsNameKillPod, 1, metrics.MetricTypeNameRaw, 241 metrics.MetricTag{Key: "state", Val: "succeeded"}, 242 metrics.MetricTag{Key: "pod_ns", Val: pod.Namespace}, 243 metrics.MetricTag{Key: "pod_name", Val: pod.Name}, 244 metrics.MetricTag{Key: "plugin_name", Val: plugin}) 245 klog.Infof("[killer] successfully create eviction for pod %v/%v", pod.Namespace, pod.Name) 246 247 podArray := []*v1.Pod{pod} 248 _, err := waitForDeleted(client, podArray, timeoutDuration) 249 if err != nil { 250 recorder.Eventf(pod, nil, v1.EventTypeWarning, consts.EventReasonEvictExceededGracePeriod, consts.EventActionEvicting, 251 "Container runtime did not kill the pod within specified grace period") 252 253 return fmt.Errorf("container deletion did not complete within %v", timeoutDuration) 254 } 255 256 recorder.Eventf(pod, nil, v1.EventTypeNormal, consts.EventReasonEvictSucceeded, consts.EventActionEvicting, 257 "Evicted pod has been deleted physically; reason: %s", reason) 258 klog.Infof("[killer] pod %s/%s has been deleted physically", pod.Namespace, pod.Name) 259 260 return nil 261 } 262 263 // ContainerKiller implements Killer interface it actually does not evict pod but 264 // stop containers in given pod directly. 265 type ContainerKiller struct { 266 containerManager cri.ContainerManager 267 recorder events.EventRecorder 268 emitter metrics.MetricEmitter 269 } 270 271 func NewContainerKiller(conf *config.Configuration, _ kubernetes.Interface, recorder events.EventRecorder, emitter metrics.MetricEmitter) (Killer, error) { 272 remoteRuntimeService, err := remote.NewRemoteRuntimeService(conf.RuntimeEndpoint, 2*time.Minute) 273 if err != nil { 274 return nil, err 275 } 276 277 return &ContainerKiller{ 278 containerManager: remoteRuntimeService, 279 recorder: recorder, 280 emitter: emitter, 281 }, nil 282 } 283 284 func (c *ContainerKiller) Name() string { return consts.KillerNameContainerKiller } 285 286 func (c *ContainerKiller) Evict(_ context.Context, pod *v1.Pod, gracePeriodSeconds int64, reason, plugin string) error { 287 if pod == nil { 288 return fmt.Errorf("pod is nil") 289 } 290 291 for _, containerStatus := range pod.Status.ContainerStatuses { 292 containerID := container.ParseContainerID(containerStatus.ContainerID) 293 err := c.containerManager.StopContainer(containerID.ID, gracePeriodSeconds) 294 if err != nil { 295 c.recorder.Eventf(pod, nil, v1.EventTypeNormal, consts.EventReasonContainerStopped, consts.EventActionContainerStopping, 296 "Failed to kill container %v; reason: %s", containerStatus.Name, reason) 297 _ = c.emitter.StoreInt64(MetricsNameKillContainer, 1, metrics.MetricTypeNameRaw, 298 metrics.MetricTag{Key: "state", Val: "failed"}, 299 metrics.MetricTag{Key: "pod_ns", Val: pod.Namespace}, 300 metrics.MetricTag{Key: "pod_name", Val: pod.Name}, 301 metrics.MetricTag{Key: "container_name", Val: containerStatus.Name}, 302 metrics.MetricTag{Key: "plugin_name", Val: plugin}) 303 klog.Infof("[killer] failed to kill container %v(containerID: %v) for pod %v/%v, error:%v", containerStatus.Name, containerID, pod.Namespace, pod.Name, err) 304 return fmt.Errorf("ContainerKiller stop container %v failed with error: %v", containerStatus.ContainerID, err) 305 } 306 c.recorder.Eventf(pod, nil, v1.EventTypeNormal, consts.EventReasonContainerStopped, consts.EventActionContainerStopping, 307 "Successfully kill container %v; reason: %s", containerStatus.Name, reason) 308 _ = c.emitter.StoreInt64(MetricsNameKillContainer, 1, metrics.MetricTypeNameRaw, 309 metrics.MetricTag{Key: "state", Val: "succeeded"}, 310 metrics.MetricTag{Key: "pod_ns", Val: pod.Namespace}, 311 metrics.MetricTag{Key: "pod_name", Val: pod.Name}, 312 metrics.MetricTag{Key: "container_name", Val: containerStatus.Name}, 313 metrics.MetricTag{Key: "plugin_name", Val: plugin}) 314 klog.Infof("[killer] successfully kill container %v/%v for pod %v/%v", containerStatus.Name, containerStatus.ContainerID, pod.Namespace, pod.Name) 315 } 316 // TODO: do we have to wait for container being completely killed? 317 318 return nil 319 }