github.com/alibaba/sealer@v0.8.6-0.20220430115802-37a2bdaa8173/pkg/debug/debug.go (about) 1 // Copyright © 2021 Alibaba Group Holding Ltd. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package debug 16 17 import ( 18 "context" 19 "fmt" 20 "os" 21 "strings" 22 "time" 23 24 "github.com/docker/distribution/reference" 25 "github.com/pkg/errors" 26 "github.com/spf13/cobra" 27 corev1 "k8s.io/api/core/v1" 28 apierrors "k8s.io/apimachinery/pkg/api/errors" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/fields" 31 "k8s.io/apimachinery/pkg/runtime" 32 "k8s.io/apimachinery/pkg/runtime/schema" 33 "k8s.io/apimachinery/pkg/watch" 34 "k8s.io/cli-runtime/pkg/genericclioptions" 35 "k8s.io/client-go/kubernetes" 36 "k8s.io/client-go/kubernetes/scheme" 37 corev1client "k8s.io/client-go/kubernetes/typed/core/v1" 38 "k8s.io/client-go/rest" 39 "k8s.io/client-go/tools/cache" 40 "k8s.io/client-go/tools/clientcmd" 41 watchtools "k8s.io/client-go/tools/watch" 42 43 "github.com/alibaba/sealer/pkg/debug/clusterinfo" 44 ) 45 46 const ( 47 NodeDebugPrefix = "node-debugger" 48 PodDebugPrefix = "pod-debugger" 49 50 TypeDebugNode = "node" 51 TypeDebugPod = "pod" 52 53 FSDebugID = "." 54 ) 55 56 // DebuggerOptions holds the options for an invocation of debug. 57 type DebuggerOptions struct { 58 Type string // debug pod or node 59 TargetName string // pod/node name to be debugged 60 61 Image string // debug container/pod image name 62 Env []corev1.EnvVar 63 Interactive bool // -i 64 TTY bool // -t 65 Command []string // after -- 66 CheckList []string // check network、volume etc 67 68 DebugContainerName string // debug container name 69 Namespace string // kubernetes namespace 70 PullPolicy string 71 72 AdminKubeConfigPath string 73 74 // Type is container 75 TargetContainer string // target container to share the namespace 76 } 77 78 type Debugger struct { 79 *DebuggerOptions 80 Motd string 81 82 kubeClientCorev1 corev1client.CoreV1Interface 83 84 genericclioptions.IOStreams 85 } 86 87 // NewDebugOptions returns a DebugOptions initialized with default values. 88 func NewDebugOptions() *DebuggerOptions { 89 return &DebuggerOptions{ 90 Command: []string{}, 91 92 Namespace: corev1.NamespaceDefault, 93 PullPolicy: string(corev1.PullIfNotPresent), 94 } 95 } 96 97 func NewDebugger(options *DebuggerOptions) *Debugger { 98 return &Debugger{ 99 DebuggerOptions: options, 100 IOStreams: genericclioptions.IOStreams{ 101 Out: os.Stdout, 102 ErrOut: os.Stderr, 103 }, 104 } 105 } 106 107 // CompleteAndVerifyOptions completes and verifies DebugOptions. 108 func (debugger *Debugger) CompleteAndVerifyOptions(cmd *cobra.Command, args []string, imager ImagesManagement) error { 109 // args 110 debugger.TargetName = args[0] 111 argsLen := cmd.ArgsLenAtDash() 112 113 if argsLen == -1 && len(args) > 1 { 114 debugger.Command = args[1:] 115 } 116 117 if argsLen > 0 && len(args) > argsLen { 118 debugger.Command = args[argsLen:] 119 } 120 121 if len(debugger.Image) == 0 { 122 image, err := imager.GetDefaultImage() 123 if err != nil { 124 return err 125 } 126 127 debugger.Image = image 128 } 129 130 if len(debugger.Image) > 0 && !reference.ReferenceRegexp.MatchString(debugger.Image) { 131 return fmt.Errorf("invalid image name %q: %v", debugger.Image, reference.ErrReferenceInvalidFormat) 132 } 133 134 // stdin/tty 135 if debugger.TTY || debugger.Interactive { 136 debugger.In = os.Stdin 137 debugger.Interactive = true 138 } 139 140 // env 141 envStrings, err := cmd.Flags().GetStringToString("env") 142 if err != nil { 143 return fmt.Errorf("error getting env flag: %v", err) 144 } 145 for k, v := range envStrings { 146 debugger.Env = append(debugger.Env, corev1.EnvVar{Name: k, Value: v}) 147 } 148 149 // PullPolicy 150 if strings.EqualFold(debugger.PullPolicy, string(corev1.PullAlways)) { 151 debugger.PullPolicy = string(corev1.PullAlways) 152 } 153 154 if strings.EqualFold(debugger.PullPolicy, string(corev1.PullIfNotPresent)) { 155 debugger.PullPolicy = string(corev1.PullIfNotPresent) 156 } 157 158 if strings.EqualFold(debugger.PullPolicy, string(corev1.PullNever)) { 159 debugger.PullPolicy = string(corev1.PullNever) 160 } 161 162 // checklist: add check items into env 163 debugger.Env = append(debugger.Env, corev1.EnvVar{ 164 Name: "CHECK_LIST", 165 Value: strings.Join(debugger.CheckList, " "), 166 }) 167 168 return nil 169 } 170 171 // Run generates a debug pod/node and attach to it according to command flag. 172 func (debugger *Debugger) Run() (string, error) { 173 ctx := context.Background() 174 175 // get the rest config 176 restConfig, err := clientcmd.BuildConfigFromFlags("", debugger.AdminKubeConfigPath) 177 if err != nil { 178 return "", errors.Wrapf(err, "failed to get rest config from file %s", debugger.AdminKubeConfigPath) 179 } 180 if err := SetKubernetesDefaults(restConfig); err != nil { 181 return "", err 182 } 183 184 // get the kube client set 185 kubeClientSet, err := kubernetes.NewForConfig(restConfig) 186 if err != nil { 187 return "", errors.Wrapf(err, "failed to create kubernetes client from file %s", debugger.AdminKubeConfigPath) 188 } 189 debugger.kubeClientCorev1 = kubeClientSet.CoreV1() 190 191 var ( 192 debugPod *corev1.Pod 193 errDebug error 194 ) 195 196 // generate a debug container or pod 197 if debugger.Type == TypeDebugNode { 198 debugPod, errDebug = debugger.DebugNode(ctx) 199 } else { 200 debugPod, errDebug = debugger.DebugPod(ctx) 201 } 202 203 if errDebug != nil { 204 return "", errDebug 205 } 206 207 // will only create debug container but will not to connect it 208 if len(debugger.Command) == 0 && !debugger.TTY { 209 return debugger.getDebugID(debugPod), nil 210 } 211 212 // clean the debugger container/pod 213 clean := &Cleaner{ 214 CleanOptions: &CleanOptions{ 215 Namespace: debugPod.Namespace, 216 PodName: debugPod.Name, 217 ContainerName: debugger.DebugContainerName, 218 }, 219 } 220 221 if errCon := debugger.connectPod(ctx, debugPod, restConfig); errCon != nil { 222 // There is no error handling because they are the default clean actions. 223 // Even if it returns an error, we should not return the error to user. 224 if debugger.Type == TypeDebugNode { 225 _ = clean.RemovePod(ctx, debugger.kubeClientCorev1) 226 } else { 227 _ = clean.ExitEphemeralContainer(restConfig) 228 } 229 230 return "", errCon 231 } 232 233 // It is the same as before. 234 if debugger.Type == TypeDebugNode { 235 _ = clean.RemovePod(ctx, debugger.kubeClientCorev1) 236 } else { 237 _ = clean.ExitEphemeralContainer(restConfig) 238 } 239 240 return "", nil 241 } 242 243 // addClusterInfoIntoEnv adds the cluster infos into DebugOptions.Env 244 func (debugger *Debugger) addClusterInfoIntoEnv(ctx context.Context) error { 245 podsIPList, err := clusterinfo.GetPodsIP(ctx, debugger.kubeClientCorev1, debugger.Namespace) 246 if err != nil { 247 return err 248 } 249 debugger.Env = append(debugger.Env, corev1.EnvVar{ 250 Name: "POD_IP_LIST", 251 Value: strings.Join(podsIPList, " "), 252 }) 253 254 nodesIPList, err := clusterinfo.GetNodesIP(ctx, debugger.kubeClientCorev1) 255 if err != nil { 256 return err 257 } 258 debugger.Env = append(debugger.Env, corev1.EnvVar{ 259 Name: "NODE_IP_LIST", 260 Value: strings.Join(nodesIPList, " "), 261 }) 262 263 dnsSVCName, dnsSVCIP, dnsEndpointsIPs, err := clusterinfo.GetDNSServiceAll(ctx, debugger.kubeClientCorev1) 264 if err != nil { 265 return err 266 } 267 debugger.Env = append(debugger.Env, 268 corev1.EnvVar{ 269 Name: "KUBE_DNS_SERVICE_NAME", 270 Value: dnsSVCName, 271 }, 272 corev1.EnvVar{ 273 Name: "KUBE_DNS_SERVICE_IP", 274 Value: dnsSVCIP, 275 }, 276 corev1.EnvVar{ 277 Name: "KUBE_DNS_ENDPOINTS_IPS", 278 Value: strings.Join(dnsEndpointsIPs, " "), 279 }, 280 ) 281 282 return nil 283 } 284 285 func (debugger *Debugger) connectPod(ctx context.Context, debugPod *corev1.Pod, restConfig *rest.Config) error { 286 // wait the debug container(ephemeral container) running 287 debugPodRun, err := WaitForContainer(ctx, debugger.kubeClientCorev1, debugPod.Namespace, debugPod.Name, debugger.DebugContainerName) 288 if err != nil { 289 return err 290 } 291 292 status := GetContainerStatusByName(debugPodRun, debugger.DebugContainerName) 293 if status == nil { 294 return fmt.Errorf("error getting container status of container name %s", debugger.DebugContainerName) 295 } 296 297 if status.State.Terminated != nil { 298 return fmt.Errorf("debug container %s terminated", debugger.DebugContainerName) 299 } 300 301 // begin attaching to debug container(ephemeral container) 302 connectOpts := &Connector{ 303 NameSpace: debugPodRun.Namespace, 304 Pod: debugPodRun, 305 Command: debugger.Command, 306 ContainerName: debugger.DebugContainerName, 307 Stdin: debugger.Interactive, 308 TTY: debugger.TTY, 309 IOStreams: debugger.IOStreams, 310 Config: restConfig, 311 Motd: debugger.Motd, 312 } 313 314 if err := connectOpts.Connect(); err != nil { 315 return err 316 } 317 318 return nil 319 } 320 321 // getDebugID returns the debug ID that consists of namespace, pod name, container name 322 func (debugger *Debugger) getDebugID(pod *corev1.Pod) string { 323 return debugger.DebugContainerName + FSDebugID + pod.Name + FSDebugID + debugger.Namespace 324 } 325 326 // SetKubernetesDefaults sets default values on the provided client config for accessing the 327 // Kubernetes API or returns an error if any of the defaults are impossible or invalid. 328 func SetKubernetesDefaults(config *rest.Config) error { 329 if config.GroupVersion == nil { 330 config.GroupVersion = &corev1.SchemeGroupVersion 331 } 332 333 if config.APIPath == "" { 334 config.APIPath = "/api" 335 } 336 337 if config.NegotiatedSerializer == nil { 338 config.NegotiatedSerializer = scheme.Codecs.WithoutConversion() 339 //restConfig.NegotiatedSerializer = scheme.Codecs 340 } 341 342 return rest.SetKubernetesDefaults(config) 343 } 344 345 // WaitForContainer watches the given pod until the container is running or terminated. 346 func WaitForContainer(ctx context.Context, client corev1client.PodsGetter, namespace, podName, containerName string) (*corev1.Pod, error) { 347 ctx, cancel := watchtools.ContextWithOptionalTimeout(ctx, 5*time.Second) 348 defer cancel() 349 350 // register the watcher and lister 351 fieldSelector := fields.OneTermEqualSelector("metadata.name", podName).String() 352 listAndWatch := &cache.ListWatch{ 353 ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { 354 options.FieldSelector = fieldSelector 355 return client.Pods(namespace).List(ctx, options) 356 }, 357 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 358 options.FieldSelector = fieldSelector 359 return client.Pods(namespace).Watch(ctx, options) 360 }, 361 } 362 363 // waiting sync 364 event, err := watchtools.UntilWithSync(ctx, listAndWatch, &corev1.Pod{}, nil, func(event watch.Event) (bool, error) { 365 switch event.Type { 366 case watch.Deleted: 367 return false, apierrors.NewNotFound(schema.GroupResource{Resource: "pods"}, "") 368 } 369 370 pod, ok := event.Object.(*corev1.Pod) 371 if !ok { 372 return false, fmt.Errorf("watch did not return a pod: %v", event.Object) 373 } 374 375 status := GetContainerStatusByName(pod, containerName) 376 if status == nil { 377 return false, nil 378 } 379 380 if status.State.Waiting != nil && status.State.Waiting.Reason == "ImagePullBackOff" { 381 return false, fmt.Errorf("failed to pull image") 382 } 383 384 if status.State.Running != nil || status.State.Terminated != nil { 385 return true, nil 386 } 387 388 return false, nil 389 }) 390 391 if event != nil { 392 return event.Object.(*corev1.Pod), err 393 } 394 395 return nil, err 396 } 397 398 // GetContainerStatusByName returns the container status by the containerName. 399 func GetContainerStatusByName(pod *corev1.Pod, containerName string) *corev1.ContainerStatus { 400 allContainerStatus := [][]corev1.ContainerStatus{pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses, pod.Status.EphemeralContainerStatuses} 401 402 for _, statusSlice := range allContainerStatus { 403 for _, status := range statusSlice { 404 if status.Name == containerName { 405 return &status 406 } 407 } 408 } 409 410 return nil 411 } 412 413 // ContainerNameToRef returns the container names in pod. 414 func ContainerNameToRef(pod *corev1.Pod) map[string]*corev1.Container { 415 names := map[string]*corev1.Container{} 416 417 for i := range pod.Spec.Containers { 418 ref := &pod.Spec.Containers[i] 419 names[ref.Name] = ref 420 } 421 422 for i := range pod.Spec.InitContainers { 423 ref := &pod.Spec.InitContainers[i] 424 names[ref.Name] = ref 425 } 426 427 for i := range pod.Spec.EphemeralContainers { 428 ref := (*corev1.Container)(&pod.Spec.EphemeralContainers[i].EphemeralContainerCommon) 429 names[ref.Name] = ref 430 } 431 432 return names 433 }