github.com/alibaba/sealer@v0.8.6-0.20220430115802-37a2bdaa8173/pkg/debug/debug.go (about)

     1  // Copyright © 2021 Alibaba Group Holding Ltd.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package debug
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"os"
    21  	"strings"
    22  	"time"
    23  
    24  	"github.com/docker/distribution/reference"
    25  	"github.com/pkg/errors"
    26  	"github.com/spf13/cobra"
    27  	corev1 "k8s.io/api/core/v1"
    28  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/fields"
    31  	"k8s.io/apimachinery/pkg/runtime"
    32  	"k8s.io/apimachinery/pkg/runtime/schema"
    33  	"k8s.io/apimachinery/pkg/watch"
    34  	"k8s.io/cli-runtime/pkg/genericclioptions"
    35  	"k8s.io/client-go/kubernetes"
    36  	"k8s.io/client-go/kubernetes/scheme"
    37  	corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
    38  	"k8s.io/client-go/rest"
    39  	"k8s.io/client-go/tools/cache"
    40  	"k8s.io/client-go/tools/clientcmd"
    41  	watchtools "k8s.io/client-go/tools/watch"
    42  
    43  	"github.com/alibaba/sealer/pkg/debug/clusterinfo"
    44  )
    45  
    46  const (
    47  	NodeDebugPrefix = "node-debugger"
    48  	PodDebugPrefix  = "pod-debugger"
    49  
    50  	TypeDebugNode = "node"
    51  	TypeDebugPod  = "pod"
    52  
    53  	FSDebugID = "."
    54  )
    55  
    56  // DebuggerOptions holds the options for an invocation of debug.
    57  type DebuggerOptions struct {
    58  	Type       string // debug pod or node
    59  	TargetName string // pod/node name to be debugged
    60  
    61  	Image       string // debug container/pod image name
    62  	Env         []corev1.EnvVar
    63  	Interactive bool     // -i
    64  	TTY         bool     // -t
    65  	Command     []string // after --
    66  	CheckList   []string // check network、volume etc
    67  
    68  	DebugContainerName string // debug container name
    69  	Namespace          string // kubernetes namespace
    70  	PullPolicy         string
    71  
    72  	AdminKubeConfigPath string
    73  
    74  	// Type is container
    75  	TargetContainer string // target container to share the namespace
    76  }
    77  
    78  type Debugger struct {
    79  	*DebuggerOptions
    80  	Motd string
    81  
    82  	kubeClientCorev1 corev1client.CoreV1Interface
    83  
    84  	genericclioptions.IOStreams
    85  }
    86  
    87  // NewDebugOptions returns a DebugOptions initialized with default values.
    88  func NewDebugOptions() *DebuggerOptions {
    89  	return &DebuggerOptions{
    90  		Command: []string{},
    91  
    92  		Namespace:  corev1.NamespaceDefault,
    93  		PullPolicy: string(corev1.PullIfNotPresent),
    94  	}
    95  }
    96  
    97  func NewDebugger(options *DebuggerOptions) *Debugger {
    98  	return &Debugger{
    99  		DebuggerOptions: options,
   100  		IOStreams: genericclioptions.IOStreams{
   101  			Out:    os.Stdout,
   102  			ErrOut: os.Stderr,
   103  		},
   104  	}
   105  }
   106  
   107  // CompleteAndVerifyOptions completes and verifies DebugOptions.
   108  func (debugger *Debugger) CompleteAndVerifyOptions(cmd *cobra.Command, args []string, imager ImagesManagement) error {
   109  	// args
   110  	debugger.TargetName = args[0]
   111  	argsLen := cmd.ArgsLenAtDash()
   112  
   113  	if argsLen == -1 && len(args) > 1 {
   114  		debugger.Command = args[1:]
   115  	}
   116  
   117  	if argsLen > 0 && len(args) > argsLen {
   118  		debugger.Command = args[argsLen:]
   119  	}
   120  
   121  	if len(debugger.Image) == 0 {
   122  		image, err := imager.GetDefaultImage()
   123  		if err != nil {
   124  			return err
   125  		}
   126  
   127  		debugger.Image = image
   128  	}
   129  
   130  	if len(debugger.Image) > 0 && !reference.ReferenceRegexp.MatchString(debugger.Image) {
   131  		return fmt.Errorf("invalid image name %q: %v", debugger.Image, reference.ErrReferenceInvalidFormat)
   132  	}
   133  
   134  	// stdin/tty
   135  	if debugger.TTY || debugger.Interactive {
   136  		debugger.In = os.Stdin
   137  		debugger.Interactive = true
   138  	}
   139  
   140  	// env
   141  	envStrings, err := cmd.Flags().GetStringToString("env")
   142  	if err != nil {
   143  		return fmt.Errorf("error getting env flag: %v", err)
   144  	}
   145  	for k, v := range envStrings {
   146  		debugger.Env = append(debugger.Env, corev1.EnvVar{Name: k, Value: v})
   147  	}
   148  
   149  	// PullPolicy
   150  	if strings.EqualFold(debugger.PullPolicy, string(corev1.PullAlways)) {
   151  		debugger.PullPolicy = string(corev1.PullAlways)
   152  	}
   153  
   154  	if strings.EqualFold(debugger.PullPolicy, string(corev1.PullIfNotPresent)) {
   155  		debugger.PullPolicy = string(corev1.PullIfNotPresent)
   156  	}
   157  
   158  	if strings.EqualFold(debugger.PullPolicy, string(corev1.PullNever)) {
   159  		debugger.PullPolicy = string(corev1.PullNever)
   160  	}
   161  
   162  	// checklist: add check items into env
   163  	debugger.Env = append(debugger.Env, corev1.EnvVar{
   164  		Name:  "CHECK_LIST",
   165  		Value: strings.Join(debugger.CheckList, " "),
   166  	})
   167  
   168  	return nil
   169  }
   170  
   171  // Run generates a debug pod/node and attach to it according to command flag.
   172  func (debugger *Debugger) Run() (string, error) {
   173  	ctx := context.Background()
   174  
   175  	// get the rest config
   176  	restConfig, err := clientcmd.BuildConfigFromFlags("", debugger.AdminKubeConfigPath)
   177  	if err != nil {
   178  		return "", errors.Wrapf(err, "failed to get rest config from file %s", debugger.AdminKubeConfigPath)
   179  	}
   180  	if err := SetKubernetesDefaults(restConfig); err != nil {
   181  		return "", err
   182  	}
   183  
   184  	// get the kube client set
   185  	kubeClientSet, err := kubernetes.NewForConfig(restConfig)
   186  	if err != nil {
   187  		return "", errors.Wrapf(err, "failed to create kubernetes client from file %s", debugger.AdminKubeConfigPath)
   188  	}
   189  	debugger.kubeClientCorev1 = kubeClientSet.CoreV1()
   190  
   191  	var (
   192  		debugPod *corev1.Pod
   193  		errDebug error
   194  	)
   195  
   196  	// generate a debug container or pod
   197  	if debugger.Type == TypeDebugNode {
   198  		debugPod, errDebug = debugger.DebugNode(ctx)
   199  	} else {
   200  		debugPod, errDebug = debugger.DebugPod(ctx)
   201  	}
   202  
   203  	if errDebug != nil {
   204  		return "", errDebug
   205  	}
   206  
   207  	// will only create debug container but will not to connect it
   208  	if len(debugger.Command) == 0 && !debugger.TTY {
   209  		return debugger.getDebugID(debugPod), nil
   210  	}
   211  
   212  	// clean the debugger container/pod
   213  	clean := &Cleaner{
   214  		CleanOptions: &CleanOptions{
   215  			Namespace:     debugPod.Namespace,
   216  			PodName:       debugPod.Name,
   217  			ContainerName: debugger.DebugContainerName,
   218  		},
   219  	}
   220  
   221  	if errCon := debugger.connectPod(ctx, debugPod, restConfig); errCon != nil {
   222  		// There is no error handling because they are the default clean actions.
   223  		// Even if it returns an error, we should not return the error to user.
   224  		if debugger.Type == TypeDebugNode {
   225  			_ = clean.RemovePod(ctx, debugger.kubeClientCorev1)
   226  		} else {
   227  			_ = clean.ExitEphemeralContainer(restConfig)
   228  		}
   229  
   230  		return "", errCon
   231  	}
   232  
   233  	// It is the same as before.
   234  	if debugger.Type == TypeDebugNode {
   235  		_ = clean.RemovePod(ctx, debugger.kubeClientCorev1)
   236  	} else {
   237  		_ = clean.ExitEphemeralContainer(restConfig)
   238  	}
   239  
   240  	return "", nil
   241  }
   242  
   243  // addClusterInfoIntoEnv adds the cluster infos into DebugOptions.Env
   244  func (debugger *Debugger) addClusterInfoIntoEnv(ctx context.Context) error {
   245  	podsIPList, err := clusterinfo.GetPodsIP(ctx, debugger.kubeClientCorev1, debugger.Namespace)
   246  	if err != nil {
   247  		return err
   248  	}
   249  	debugger.Env = append(debugger.Env, corev1.EnvVar{
   250  		Name:  "POD_IP_LIST",
   251  		Value: strings.Join(podsIPList, " "),
   252  	})
   253  
   254  	nodesIPList, err := clusterinfo.GetNodesIP(ctx, debugger.kubeClientCorev1)
   255  	if err != nil {
   256  		return err
   257  	}
   258  	debugger.Env = append(debugger.Env, corev1.EnvVar{
   259  		Name:  "NODE_IP_LIST",
   260  		Value: strings.Join(nodesIPList, " "),
   261  	})
   262  
   263  	dnsSVCName, dnsSVCIP, dnsEndpointsIPs, err := clusterinfo.GetDNSServiceAll(ctx, debugger.kubeClientCorev1)
   264  	if err != nil {
   265  		return err
   266  	}
   267  	debugger.Env = append(debugger.Env,
   268  		corev1.EnvVar{
   269  			Name:  "KUBE_DNS_SERVICE_NAME",
   270  			Value: dnsSVCName,
   271  		},
   272  		corev1.EnvVar{
   273  			Name:  "KUBE_DNS_SERVICE_IP",
   274  			Value: dnsSVCIP,
   275  		},
   276  		corev1.EnvVar{
   277  			Name:  "KUBE_DNS_ENDPOINTS_IPS",
   278  			Value: strings.Join(dnsEndpointsIPs, " "),
   279  		},
   280  	)
   281  
   282  	return nil
   283  }
   284  
   285  func (debugger *Debugger) connectPod(ctx context.Context, debugPod *corev1.Pod, restConfig *rest.Config) error {
   286  	// wait the debug container(ephemeral container) running
   287  	debugPodRun, err := WaitForContainer(ctx, debugger.kubeClientCorev1, debugPod.Namespace, debugPod.Name, debugger.DebugContainerName)
   288  	if err != nil {
   289  		return err
   290  	}
   291  
   292  	status := GetContainerStatusByName(debugPodRun, debugger.DebugContainerName)
   293  	if status == nil {
   294  		return fmt.Errorf("error getting container status of container name %s", debugger.DebugContainerName)
   295  	}
   296  
   297  	if status.State.Terminated != nil {
   298  		return fmt.Errorf("debug container %s terminated", debugger.DebugContainerName)
   299  	}
   300  
   301  	// begin attaching to debug container(ephemeral container)
   302  	connectOpts := &Connector{
   303  		NameSpace:     debugPodRun.Namespace,
   304  		Pod:           debugPodRun,
   305  		Command:       debugger.Command,
   306  		ContainerName: debugger.DebugContainerName,
   307  		Stdin:         debugger.Interactive,
   308  		TTY:           debugger.TTY,
   309  		IOStreams:     debugger.IOStreams,
   310  		Config:        restConfig,
   311  		Motd:          debugger.Motd,
   312  	}
   313  
   314  	if err := connectOpts.Connect(); err != nil {
   315  		return err
   316  	}
   317  
   318  	return nil
   319  }
   320  
   321  // getDebugID returns the debug ID that consists of namespace, pod name, container name
   322  func (debugger *Debugger) getDebugID(pod *corev1.Pod) string {
   323  	return debugger.DebugContainerName + FSDebugID + pod.Name + FSDebugID + debugger.Namespace
   324  }
   325  
   326  // SetKubernetesDefaults sets default values on the provided client config for accessing the
   327  // Kubernetes API or returns an error if any of the defaults are impossible or invalid.
   328  func SetKubernetesDefaults(config *rest.Config) error {
   329  	if config.GroupVersion == nil {
   330  		config.GroupVersion = &corev1.SchemeGroupVersion
   331  	}
   332  
   333  	if config.APIPath == "" {
   334  		config.APIPath = "/api"
   335  	}
   336  
   337  	if config.NegotiatedSerializer == nil {
   338  		config.NegotiatedSerializer = scheme.Codecs.WithoutConversion()
   339  		//restConfig.NegotiatedSerializer = scheme.Codecs
   340  	}
   341  
   342  	return rest.SetKubernetesDefaults(config)
   343  }
   344  
   345  // WaitForContainer watches the given pod until the container is running or terminated.
   346  func WaitForContainer(ctx context.Context, client corev1client.PodsGetter, namespace, podName, containerName string) (*corev1.Pod, error) {
   347  	ctx, cancel := watchtools.ContextWithOptionalTimeout(ctx, 5*time.Second)
   348  	defer cancel()
   349  
   350  	// register the watcher and lister
   351  	fieldSelector := fields.OneTermEqualSelector("metadata.name", podName).String()
   352  	listAndWatch := &cache.ListWatch{
   353  		ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
   354  			options.FieldSelector = fieldSelector
   355  			return client.Pods(namespace).List(ctx, options)
   356  		},
   357  		WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
   358  			options.FieldSelector = fieldSelector
   359  			return client.Pods(namespace).Watch(ctx, options)
   360  		},
   361  	}
   362  
   363  	// waiting sync
   364  	event, err := watchtools.UntilWithSync(ctx, listAndWatch, &corev1.Pod{}, nil, func(event watch.Event) (bool, error) {
   365  		switch event.Type {
   366  		case watch.Deleted:
   367  			return false, apierrors.NewNotFound(schema.GroupResource{Resource: "pods"}, "")
   368  		}
   369  
   370  		pod, ok := event.Object.(*corev1.Pod)
   371  		if !ok {
   372  			return false, fmt.Errorf("watch did not return a pod: %v", event.Object)
   373  		}
   374  
   375  		status := GetContainerStatusByName(pod, containerName)
   376  		if status == nil {
   377  			return false, nil
   378  		}
   379  
   380  		if status.State.Waiting != nil && status.State.Waiting.Reason == "ImagePullBackOff" {
   381  			return false, fmt.Errorf("failed to pull image")
   382  		}
   383  
   384  		if status.State.Running != nil || status.State.Terminated != nil {
   385  			return true, nil
   386  		}
   387  
   388  		return false, nil
   389  	})
   390  
   391  	if event != nil {
   392  		return event.Object.(*corev1.Pod), err
   393  	}
   394  
   395  	return nil, err
   396  }
   397  
   398  // GetContainerStatusByName returns the container status by the containerName.
   399  func GetContainerStatusByName(pod *corev1.Pod, containerName string) *corev1.ContainerStatus {
   400  	allContainerStatus := [][]corev1.ContainerStatus{pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses, pod.Status.EphemeralContainerStatuses}
   401  
   402  	for _, statusSlice := range allContainerStatus {
   403  		for _, status := range statusSlice {
   404  			if status.Name == containerName {
   405  				return &status
   406  			}
   407  		}
   408  	}
   409  
   410  	return nil
   411  }
   412  
   413  // ContainerNameToRef returns the container names in pod.
   414  func ContainerNameToRef(pod *corev1.Pod) map[string]*corev1.Container {
   415  	names := map[string]*corev1.Container{}
   416  
   417  	for i := range pod.Spec.Containers {
   418  		ref := &pod.Spec.Containers[i]
   419  		names[ref.Name] = ref
   420  	}
   421  
   422  	for i := range pod.Spec.InitContainers {
   423  		ref := &pod.Spec.InitContainers[i]
   424  		names[ref.Name] = ref
   425  	}
   426  
   427  	for i := range pod.Spec.EphemeralContainers {
   428  		ref := (*corev1.Container)(&pod.Spec.EphemeralContainers[i].EphemeralContainerCommon)
   429  		names[ref.Name] = ref
   430  	}
   431  
   432  	return names
   433  }