github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/pkg/cli/cmd/fault/fault_node.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package fault
    21  
    22  import (
    23  	"bufio"
    24  	"context"
    25  	"encoding/base64"
    26  	"fmt"
    27  	"io/ioutil"
    28  	"os"
    29  	"path/filepath"
    30  	"strings"
    31  
    32  	"github.com/chaos-mesh/chaos-mesh/api/v1alpha1"
    33  	"github.com/spf13/cobra"
    34  	corev1 "k8s.io/api/core/v1"
    35  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    36  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    37  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    38  	"k8s.io/apimachinery/pkg/runtime"
    39  	"k8s.io/cli-runtime/pkg/genericiooptions"
    40  	"k8s.io/client-go/kubernetes"
    41  	cmdutil "k8s.io/kubectl/pkg/cmd/util"
    42  	"k8s.io/kubectl/pkg/util/templates"
    43  
    44  	cp "github.com/1aal/kubeblocks/pkg/cli/cloudprovider"
    45  	"github.com/1aal/kubeblocks/pkg/cli/create"
    46  	"github.com/1aal/kubeblocks/pkg/cli/printer"
    47  	"github.com/1aal/kubeblocks/pkg/cli/util"
    48  	"github.com/1aal/kubeblocks/pkg/cli/util/prompt"
    49  )
    50  
    51  var faultNodeExample = templates.Examples(`
    52  	# Stop a specified EC2 instance.
    53  	kbcli fault node stop node1 -c=aws --region=cn-northwest-1 --duration=3m
    54  
    55  	# Stop two specified EC2 instances.
    56  	kbcli fault node stop node1 node2 -c=aws --region=cn-northwest-1 --duration=3m
    57  
    58  	# Restart two specified EC2 instances.
    59  	kbcli fault node restart node1 node2 -c=aws --region=cn-northwest-1 --duration=3m
    60  
    61  	# Detach two specified volume from two specified EC2 instances.
    62  	kbcli fault node detach-volume node1 node2 -c=aws --region=cn-northwest-1 --duration=1m --volume-id=v1,v2 --device-name=/d1,/d2
    63  
    64  	# Stop two specified GCK instances.
    65  	kbcli fault node stop node1 node2 -c=gcp --region=us-central1-c --project=apecloud-platform-engineering	
    66  
    67  	# Restart two specified GCK instances.
    68  	kbcli fault node restart node1 node2 -c=gcp --region=us-central1-c --project=apecloud-platform-engineering
    69  
    70  	# Detach two specified volume from two specified GCK instances.
    71  	kbcli fault node detach-volume node1 node2 -c=gcp --region=us-central1-c --project=apecloud-platform-engineering --device-name=/d1,/d2
    72  `)
    73  
    74  type NodeChaoOptions struct {
    75  	Kind string `json:"kind"`
    76  
    77  	Action string `json:"action"`
    78  
    79  	CloudProvider string `json:"-"`
    80  
    81  	SecretName string `json:"secretName"`
    82  
    83  	Region string `json:"region"`
    84  
    85  	Instance string `json:"instance"`
    86  
    87  	VolumeID  string   `json:"volumeID"`
    88  	VolumeIDs []string `json:"-"`
    89  
    90  	DeviceName  string   `json:"deviceName,omitempty"`
    91  	DeviceNames []string `json:"-"`
    92  
    93  	Project string `json:"project"`
    94  
    95  	Duration string `json:"duration"`
    96  
    97  	AutoApprove bool `json:"-"`
    98  
    99  	create.CreateOptions `json:"-"`
   100  }
   101  
   102  func NewNodeOptions(f cmdutil.Factory, streams genericiooptions.IOStreams) *NodeChaoOptions {
   103  	o := &NodeChaoOptions{
   104  		CreateOptions: create.CreateOptions{
   105  			Factory:         f,
   106  			IOStreams:       streams,
   107  			CueTemplateName: CueTemplateNodeChaos,
   108  		},
   109  	}
   110  	o.CreateOptions.PreCreate = o.PreCreate
   111  	o.CreateOptions.Options = o
   112  	return o
   113  }
   114  
   115  func NewNodeChaosCmd(f cmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command {
   116  	cmd := &cobra.Command{
   117  		Use:   "node",
   118  		Short: "Node chaos.",
   119  	}
   120  
   121  	cmd.AddCommand(
   122  		NewStopCmd(f, streams),
   123  		NewRestartCmd(f, streams),
   124  		NewDetachVolumeCmd(f, streams),
   125  	)
   126  	return cmd
   127  }
   128  
   129  func NewStopCmd(f cmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command {
   130  	o := NewNodeOptions(f, streams)
   131  	cmd := o.NewCobraCommand(Stop, StopShort)
   132  
   133  	o.AddCommonFlag(cmd)
   134  	return cmd
   135  }
   136  
   137  func NewRestartCmd(f cmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command {
   138  	o := NewNodeOptions(f, streams)
   139  	cmd := o.NewCobraCommand(Restart, RestartShort)
   140  
   141  	o.AddCommonFlag(cmd)
   142  	return cmd
   143  }
   144  
   145  func NewDetachVolumeCmd(f cmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command {
   146  	o := NewNodeOptions(f, streams)
   147  	cmd := o.NewCobraCommand(DetachVolume, DetachVolumeShort)
   148  
   149  	o.AddCommonFlag(cmd)
   150  	cmd.Flags().StringSliceVar(&o.VolumeIDs, "volume-id", nil, "The volume ids of the ec2. Only available when cloud-provider=aws.")
   151  	cmd.Flags().StringSliceVar(&o.DeviceNames, "device-name", nil, "The device name of the volume.")
   152  
   153  	util.CheckErr(cmd.MarkFlagRequired("device-name"))
   154  	return cmd
   155  }
   156  
   157  func (o *NodeChaoOptions) NewCobraCommand(use, short string) *cobra.Command {
   158  	return &cobra.Command{
   159  		Use:     use,
   160  		Short:   short,
   161  		Example: faultNodeExample,
   162  		Run: func(cmd *cobra.Command, args []string) {
   163  			cmdutil.CheckErr(o.Execute(use, args, false))
   164  		},
   165  	}
   166  }
   167  
   168  func (o *NodeChaoOptions) Execute(action string, args []string, testEnv bool) error {
   169  	o.Args = args
   170  	if err := o.CreateOptions.Complete(); err != nil {
   171  		return err
   172  	}
   173  	if err := o.Complete(action); err != nil {
   174  		return err
   175  	}
   176  	if err := o.Validate(); err != nil {
   177  		return err
   178  	}
   179  
   180  	for idx, arg := range o.Args {
   181  		o.Instance = arg
   182  		if o.DeviceNames != nil {
   183  			o.DeviceName = o.DeviceNames[idx]
   184  		}
   185  		if o.VolumeIDs != nil {
   186  			o.VolumeID = o.VolumeIDs[idx]
   187  		}
   188  		if err := o.CreateSecret(testEnv); err != nil {
   189  			return err
   190  		}
   191  		if err := o.Run(); err != nil {
   192  			return err
   193  		}
   194  	}
   195  	return nil
   196  }
   197  
   198  func (o *NodeChaoOptions) AddCommonFlag(cmd *cobra.Command) {
   199  	cmd.Flags().StringVarP(&o.CloudProvider, "cloud-provider", "c", "", fmt.Sprintf("Cloud provider type, one of %v", supportedCloudProviders))
   200  	cmd.Flags().StringVar(&o.Region, "region", "", "The region of the node.")
   201  	cmd.Flags().StringVar(&o.Project, "project", "", "The name of the GCP project. Only available when cloud-provider=gcp.")
   202  	cmd.Flags().StringVar(&o.SecretName, "secret", "", "The name of the secret containing cloud provider specific credentials.")
   203  	cmd.Flags().StringVar(&o.Duration, "duration", "30s", "Supported formats of the duration are: ms / s / m / h.")
   204  
   205  	cmd.Flags().BoolVar(&o.AutoApprove, "auto-approve", false, "Skip interactive approval before create secret.")
   206  	cmd.Flags().StringVar(&o.DryRun, "dry-run", "none", `Must be "client", or "server". If with client strategy, only print the object that would be sent, and no data is actually sent. If with server strategy, submit the server-side request, but no data is persistent.`)
   207  	cmd.Flags().Lookup("dry-run").NoOptDefVal = Unchanged
   208  	printer.AddOutputFlagForCreate(cmd, &o.Format, false)
   209  
   210  	util.CheckErr(cmd.MarkFlagRequired("cloud-provider"))
   211  	util.CheckErr(cmd.MarkFlagRequired("region"))
   212  
   213  	// register flag completion func
   214  	registerFlagCompletionFunc(cmd, o.Factory)
   215  }
   216  
   217  func (o *NodeChaoOptions) Validate() error {
   218  	if ok, err := IsRegularMatch(o.Duration); !ok {
   219  		return err
   220  	}
   221  
   222  	if len(o.Args) == 0 {
   223  		return fmt.Errorf("node instance is required")
   224  	}
   225  
   226  	switch o.CloudProvider {
   227  	case cp.AWS:
   228  		if o.Project != "" {
   229  			return fmt.Errorf("--project is not supported when cloud provider is aws")
   230  		}
   231  		if o.Action == DetachVolume && o.VolumeIDs == nil {
   232  			return fmt.Errorf("--volume-id is required when cloud provider is aws")
   233  		}
   234  		if o.Action == DetachVolume && len(o.DeviceNames) != len(o.VolumeIDs) {
   235  			return fmt.Errorf("the number of volume-id must be equal to the number of device-name")
   236  		}
   237  	case cp.GCP:
   238  		if o.Project == "" {
   239  			return fmt.Errorf("--project is required when cloud provider is gcp")
   240  		}
   241  		if o.VolumeIDs != nil {
   242  			return fmt.Errorf(" --volume-id is not supported when cloud provider is gcp")
   243  		}
   244  	default:
   245  		return fmt.Errorf("cloud provider type, one of %v", supportedCloudProviders)
   246  	}
   247  
   248  	if o.DeviceNames != nil && len(o.Args) != len(o.DeviceNames) {
   249  		return fmt.Errorf("the number of device-name must be equal to the number of node")
   250  	}
   251  	return nil
   252  }
   253  
   254  func (o *NodeChaoOptions) Complete(action string) error {
   255  	if o.CloudProvider == cp.AWS {
   256  		o.GVR = GetGVR(Group, Version, ResourceAWSChaos)
   257  		o.Kind = KindAWSChaos
   258  		if o.SecretName == "" {
   259  			o.SecretName = AWSSecretName
   260  		}
   261  		switch action {
   262  		case Stop:
   263  			o.Action = string(v1alpha1.Ec2Stop)
   264  		case Restart:
   265  			o.Action = string(v1alpha1.Ec2Restart)
   266  		case DetachVolume:
   267  			o.Action = string(v1alpha1.DetachVolume)
   268  		}
   269  	} else if o.CloudProvider == cp.GCP {
   270  		o.GVR = GetGVR(Group, Version, ResourceGCPChaos)
   271  		o.Kind = KindGCPChaos
   272  		if o.SecretName == "" {
   273  			o.SecretName = GCPSecretName
   274  		}
   275  		switch action {
   276  		case Stop:
   277  			o.Action = string(v1alpha1.NodeStop)
   278  		case Restart:
   279  			o.Action = string(v1alpha1.NodeReset)
   280  		case DetachVolume:
   281  			o.Action = string(v1alpha1.DiskLoss)
   282  		}
   283  	}
   284  	return nil
   285  }
   286  
   287  func (o *NodeChaoOptions) PreCreate(obj *unstructured.Unstructured) error {
   288  	var c v1alpha1.InnerObject
   289  
   290  	if o.CloudProvider == cp.AWS {
   291  		c = &v1alpha1.AWSChaos{}
   292  	} else if o.CloudProvider == cp.GCP {
   293  		c = &v1alpha1.GCPChaos{}
   294  	}
   295  
   296  	if err := runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, c); err != nil {
   297  		return err
   298  	}
   299  
   300  	data, e := runtime.DefaultUnstructuredConverter.ToUnstructured(c)
   301  	if e != nil {
   302  		return e
   303  	}
   304  	obj.SetUnstructuredContent(data)
   305  	return nil
   306  }
   307  
   308  func (o *NodeChaoOptions) CreateSecret(testEnv bool) error {
   309  	if testEnv {
   310  		return nil
   311  	}
   312  
   313  	if o.DryRun != "none" {
   314  		return nil
   315  	}
   316  
   317  	config, err := o.Factory.ToRESTConfig()
   318  	if err != nil {
   319  		return err
   320  	}
   321  
   322  	clientSet, err := kubernetes.NewForConfig(config)
   323  	if err != nil {
   324  		return err
   325  	}
   326  
   327  	// Check if Secret already exists
   328  	secretClient := clientSet.CoreV1().Secrets(o.Namespace)
   329  	_, err = secretClient.Get(context.TODO(), o.SecretName, metav1.GetOptions{})
   330  	if err == nil {
   331  		fmt.Printf("Secret %s exists under %s namespace.\n", o.SecretName, o.Namespace)
   332  		return nil
   333  	} else if !k8serrors.IsNotFound(err) {
   334  		return err
   335  	}
   336  
   337  	if err := o.confirmToContinue(); err != nil {
   338  		return err
   339  	}
   340  
   341  	switch o.CloudProvider {
   342  	case "aws":
   343  		if err := handleAWS(clientSet, o.Namespace, o.SecretName); err != nil {
   344  			return err
   345  		}
   346  	case "gcp":
   347  		if err := handleGCP(clientSet, o.Namespace, o.SecretName); err != nil {
   348  			return err
   349  		}
   350  	default:
   351  		return fmt.Errorf("unknown cloud provider:%s", o.CloudProvider)
   352  	}
   353  	return nil
   354  }
   355  
   356  func (o *NodeChaoOptions) confirmToContinue() error {
   357  	if !o.AutoApprove {
   358  		printer.Warning(o.Out, "A secret will be created for the cloud account to access %s, do you want to continue to create this secret: %s  ?\n  Only 'yes' will be accepted to confirm.\n\n", o.CloudProvider, o.SecretName)
   359  		entered, _ := prompt.NewPrompt("Enter a value:", nil, o.In).Run()
   360  		if entered != "yes" {
   361  			fmt.Fprintf(o.Out, "\nCancel automatic secert creation. You will not be able to access the nodes on the cluster.\n")
   362  			return cmdutil.ErrExit
   363  		}
   364  	}
   365  	fmt.Fprintf(o.Out, "Continue to create secret: %s\n", o.SecretName)
   366  	return nil
   367  }
   368  
   369  func handleAWS(clientSet *kubernetes.Clientset, namespace, secretName string) error {
   370  	accessKeyID, secretAccessKey, err := readAWSCredentials()
   371  	if err != nil {
   372  		return err
   373  	}
   374  
   375  	secret := &corev1.Secret{
   376  		ObjectMeta: metav1.ObjectMeta{
   377  			Name:      secretName,
   378  			Namespace: namespace,
   379  		},
   380  		Type: corev1.SecretTypeOpaque,
   381  		StringData: map[string]string{
   382  			"aws_access_key_id":     accessKeyID,
   383  			"aws_secret_access_key": secretAccessKey,
   384  		},
   385  	}
   386  
   387  	createdSecret, err := clientSet.CoreV1().Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{})
   388  	if err != nil {
   389  		return err
   390  	}
   391  
   392  	fmt.Printf("Secret %s created successfully\n", createdSecret.Name)
   393  	return nil
   394  }
   395  
   396  func handleGCP(clientSet *kubernetes.Clientset, namespace, secretName string) error {
   397  	home, err := os.UserHomeDir()
   398  	if err != nil {
   399  		return err
   400  	}
   401  
   402  	filePath := filepath.Join(home, ".config", "gcloud", "application_default_credentials.json")
   403  	data, err := ioutil.ReadFile(filePath)
   404  	jsonData := string(data)
   405  	fmt.Println(jsonData)
   406  	if err != nil {
   407  		return err
   408  	}
   409  	encodedData := base64.StdEncoding.EncodeToString([]byte(jsonData))
   410  
   411  	secret := &corev1.Secret{
   412  		ObjectMeta: metav1.ObjectMeta{
   413  			Name:      secretName,
   414  			Namespace: namespace,
   415  		},
   416  		Type: corev1.SecretTypeOpaque,
   417  		StringData: map[string]string{
   418  			"service_account": encodedData,
   419  		},
   420  	}
   421  
   422  	createdSecret, err := clientSet.CoreV1().Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{})
   423  	if err != nil {
   424  		return err
   425  	}
   426  
   427  	fmt.Printf("Secret %s created successfully\n", createdSecret.Name)
   428  	return nil
   429  }
   430  
   431  func readAWSCredentials() (string, string, error) {
   432  	home, err := os.UserHomeDir()
   433  	if err != nil {
   434  		return "", "", err
   435  	}
   436  	filePath := filepath.Join(home, ".aws", "credentials")
   437  	file, err := os.Open(filePath)
   438  	if err != nil {
   439  		return "", "", err
   440  	}
   441  	defer func(file *os.File) {
   442  		err := file.Close()
   443  		if err != nil {
   444  			fmt.Printf("unable to close file: %s", err)
   445  		}
   446  	}(file)
   447  
   448  	// Read file content line by line using bufio.Scanner
   449  	scanner := bufio.NewScanner(file)
   450  	accessKeyID := ""
   451  	secretAccessKey := ""
   452  
   453  	for scanner.Scan() {
   454  		line := scanner.Text()
   455  		if strings.HasPrefix(line, "aws_access_key_id") {
   456  			accessKeyID = strings.TrimSpace(strings.SplitN(line, "=", 2)[1])
   457  		} else if strings.HasPrefix(line, "aws_secret_access_key") {
   458  			secretAccessKey = strings.TrimSpace(strings.SplitN(line, "=", 2)[1])
   459  		}
   460  	}
   461  
   462  	if scanner.Err() != nil {
   463  		return "", "", scanner.Err()
   464  	}
   465  
   466  	if accessKeyID == "" || secretAccessKey == "" {
   467  		return "", "", fmt.Errorf("unable to find valid AWS access key information")
   468  	}
   469  
   470  	return accessKeyID, secretAccessKey, nil
   471  }