github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/pkg/cli/cmd/fault/fault_node.go (about) 1 /* 2 Copyright (C) 2022-2023 ApeCloud Co., Ltd 3 4 This file is part of KubeBlocks project 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Affero General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Affero General Public License for more details. 15 16 You should have received a copy of the GNU Affero General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 package fault 21 22 import ( 23 "bufio" 24 "context" 25 "encoding/base64" 26 "fmt" 27 "io/ioutil" 28 "os" 29 "path/filepath" 30 "strings" 31 32 "github.com/chaos-mesh/chaos-mesh/api/v1alpha1" 33 "github.com/spf13/cobra" 34 corev1 "k8s.io/api/core/v1" 35 k8serrors "k8s.io/apimachinery/pkg/api/errors" 36 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 37 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 38 "k8s.io/apimachinery/pkg/runtime" 39 "k8s.io/cli-runtime/pkg/genericiooptions" 40 "k8s.io/client-go/kubernetes" 41 cmdutil "k8s.io/kubectl/pkg/cmd/util" 42 "k8s.io/kubectl/pkg/util/templates" 43 44 cp "github.com/1aal/kubeblocks/pkg/cli/cloudprovider" 45 "github.com/1aal/kubeblocks/pkg/cli/create" 46 "github.com/1aal/kubeblocks/pkg/cli/printer" 47 "github.com/1aal/kubeblocks/pkg/cli/util" 48 "github.com/1aal/kubeblocks/pkg/cli/util/prompt" 49 ) 50 51 var faultNodeExample = templates.Examples(` 52 # Stop a specified EC2 instance. 53 kbcli fault node stop node1 -c=aws --region=cn-northwest-1 --duration=3m 54 55 # Stop two specified EC2 instances. 56 kbcli fault node stop node1 node2 -c=aws --region=cn-northwest-1 --duration=3m 57 58 # Restart two specified EC2 instances. 59 kbcli fault node restart node1 node2 -c=aws --region=cn-northwest-1 --duration=3m 60 61 # Detach two specified volume from two specified EC2 instances. 62 kbcli fault node detach-volume node1 node2 -c=aws --region=cn-northwest-1 --duration=1m --volume-id=v1,v2 --device-name=/d1,/d2 63 64 # Stop two specified GCK instances. 65 kbcli fault node stop node1 node2 -c=gcp --region=us-central1-c --project=apecloud-platform-engineering 66 67 # Restart two specified GCK instances. 68 kbcli fault node restart node1 node2 -c=gcp --region=us-central1-c --project=apecloud-platform-engineering 69 70 # Detach two specified volume from two specified GCK instances. 71 kbcli fault node detach-volume node1 node2 -c=gcp --region=us-central1-c --project=apecloud-platform-engineering --device-name=/d1,/d2 72 `) 73 74 type NodeChaoOptions struct { 75 Kind string `json:"kind"` 76 77 Action string `json:"action"` 78 79 CloudProvider string `json:"-"` 80 81 SecretName string `json:"secretName"` 82 83 Region string `json:"region"` 84 85 Instance string `json:"instance"` 86 87 VolumeID string `json:"volumeID"` 88 VolumeIDs []string `json:"-"` 89 90 DeviceName string `json:"deviceName,omitempty"` 91 DeviceNames []string `json:"-"` 92 93 Project string `json:"project"` 94 95 Duration string `json:"duration"` 96 97 AutoApprove bool `json:"-"` 98 99 create.CreateOptions `json:"-"` 100 } 101 102 func NewNodeOptions(f cmdutil.Factory, streams genericiooptions.IOStreams) *NodeChaoOptions { 103 o := &NodeChaoOptions{ 104 CreateOptions: create.CreateOptions{ 105 Factory: f, 106 IOStreams: streams, 107 CueTemplateName: CueTemplateNodeChaos, 108 }, 109 } 110 o.CreateOptions.PreCreate = o.PreCreate 111 o.CreateOptions.Options = o 112 return o 113 } 114 115 func NewNodeChaosCmd(f cmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command { 116 cmd := &cobra.Command{ 117 Use: "node", 118 Short: "Node chaos.", 119 } 120 121 cmd.AddCommand( 122 NewStopCmd(f, streams), 123 NewRestartCmd(f, streams), 124 NewDetachVolumeCmd(f, streams), 125 ) 126 return cmd 127 } 128 129 func NewStopCmd(f cmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command { 130 o := NewNodeOptions(f, streams) 131 cmd := o.NewCobraCommand(Stop, StopShort) 132 133 o.AddCommonFlag(cmd) 134 return cmd 135 } 136 137 func NewRestartCmd(f cmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command { 138 o := NewNodeOptions(f, streams) 139 cmd := o.NewCobraCommand(Restart, RestartShort) 140 141 o.AddCommonFlag(cmd) 142 return cmd 143 } 144 145 func NewDetachVolumeCmd(f cmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command { 146 o := NewNodeOptions(f, streams) 147 cmd := o.NewCobraCommand(DetachVolume, DetachVolumeShort) 148 149 o.AddCommonFlag(cmd) 150 cmd.Flags().StringSliceVar(&o.VolumeIDs, "volume-id", nil, "The volume ids of the ec2. Only available when cloud-provider=aws.") 151 cmd.Flags().StringSliceVar(&o.DeviceNames, "device-name", nil, "The device name of the volume.") 152 153 util.CheckErr(cmd.MarkFlagRequired("device-name")) 154 return cmd 155 } 156 157 func (o *NodeChaoOptions) NewCobraCommand(use, short string) *cobra.Command { 158 return &cobra.Command{ 159 Use: use, 160 Short: short, 161 Example: faultNodeExample, 162 Run: func(cmd *cobra.Command, args []string) { 163 cmdutil.CheckErr(o.Execute(use, args, false)) 164 }, 165 } 166 } 167 168 func (o *NodeChaoOptions) Execute(action string, args []string, testEnv bool) error { 169 o.Args = args 170 if err := o.CreateOptions.Complete(); err != nil { 171 return err 172 } 173 if err := o.Complete(action); err != nil { 174 return err 175 } 176 if err := o.Validate(); err != nil { 177 return err 178 } 179 180 for idx, arg := range o.Args { 181 o.Instance = arg 182 if o.DeviceNames != nil { 183 o.DeviceName = o.DeviceNames[idx] 184 } 185 if o.VolumeIDs != nil { 186 o.VolumeID = o.VolumeIDs[idx] 187 } 188 if err := o.CreateSecret(testEnv); err != nil { 189 return err 190 } 191 if err := o.Run(); err != nil { 192 return err 193 } 194 } 195 return nil 196 } 197 198 func (o *NodeChaoOptions) AddCommonFlag(cmd *cobra.Command) { 199 cmd.Flags().StringVarP(&o.CloudProvider, "cloud-provider", "c", "", fmt.Sprintf("Cloud provider type, one of %v", supportedCloudProviders)) 200 cmd.Flags().StringVar(&o.Region, "region", "", "The region of the node.") 201 cmd.Flags().StringVar(&o.Project, "project", "", "The name of the GCP project. Only available when cloud-provider=gcp.") 202 cmd.Flags().StringVar(&o.SecretName, "secret", "", "The name of the secret containing cloud provider specific credentials.") 203 cmd.Flags().StringVar(&o.Duration, "duration", "30s", "Supported formats of the duration are: ms / s / m / h.") 204 205 cmd.Flags().BoolVar(&o.AutoApprove, "auto-approve", false, "Skip interactive approval before create secret.") 206 cmd.Flags().StringVar(&o.DryRun, "dry-run", "none", `Must be "client", or "server". If with client strategy, only print the object that would be sent, and no data is actually sent. If with server strategy, submit the server-side request, but no data is persistent.`) 207 cmd.Flags().Lookup("dry-run").NoOptDefVal = Unchanged 208 printer.AddOutputFlagForCreate(cmd, &o.Format, false) 209 210 util.CheckErr(cmd.MarkFlagRequired("cloud-provider")) 211 util.CheckErr(cmd.MarkFlagRequired("region")) 212 213 // register flag completion func 214 registerFlagCompletionFunc(cmd, o.Factory) 215 } 216 217 func (o *NodeChaoOptions) Validate() error { 218 if ok, err := IsRegularMatch(o.Duration); !ok { 219 return err 220 } 221 222 if len(o.Args) == 0 { 223 return fmt.Errorf("node instance is required") 224 } 225 226 switch o.CloudProvider { 227 case cp.AWS: 228 if o.Project != "" { 229 return fmt.Errorf("--project is not supported when cloud provider is aws") 230 } 231 if o.Action == DetachVolume && o.VolumeIDs == nil { 232 return fmt.Errorf("--volume-id is required when cloud provider is aws") 233 } 234 if o.Action == DetachVolume && len(o.DeviceNames) != len(o.VolumeIDs) { 235 return fmt.Errorf("the number of volume-id must be equal to the number of device-name") 236 } 237 case cp.GCP: 238 if o.Project == "" { 239 return fmt.Errorf("--project is required when cloud provider is gcp") 240 } 241 if o.VolumeIDs != nil { 242 return fmt.Errorf(" --volume-id is not supported when cloud provider is gcp") 243 } 244 default: 245 return fmt.Errorf("cloud provider type, one of %v", supportedCloudProviders) 246 } 247 248 if o.DeviceNames != nil && len(o.Args) != len(o.DeviceNames) { 249 return fmt.Errorf("the number of device-name must be equal to the number of node") 250 } 251 return nil 252 } 253 254 func (o *NodeChaoOptions) Complete(action string) error { 255 if o.CloudProvider == cp.AWS { 256 o.GVR = GetGVR(Group, Version, ResourceAWSChaos) 257 o.Kind = KindAWSChaos 258 if o.SecretName == "" { 259 o.SecretName = AWSSecretName 260 } 261 switch action { 262 case Stop: 263 o.Action = string(v1alpha1.Ec2Stop) 264 case Restart: 265 o.Action = string(v1alpha1.Ec2Restart) 266 case DetachVolume: 267 o.Action = string(v1alpha1.DetachVolume) 268 } 269 } else if o.CloudProvider == cp.GCP { 270 o.GVR = GetGVR(Group, Version, ResourceGCPChaos) 271 o.Kind = KindGCPChaos 272 if o.SecretName == "" { 273 o.SecretName = GCPSecretName 274 } 275 switch action { 276 case Stop: 277 o.Action = string(v1alpha1.NodeStop) 278 case Restart: 279 o.Action = string(v1alpha1.NodeReset) 280 case DetachVolume: 281 o.Action = string(v1alpha1.DiskLoss) 282 } 283 } 284 return nil 285 } 286 287 func (o *NodeChaoOptions) PreCreate(obj *unstructured.Unstructured) error { 288 var c v1alpha1.InnerObject 289 290 if o.CloudProvider == cp.AWS { 291 c = &v1alpha1.AWSChaos{} 292 } else if o.CloudProvider == cp.GCP { 293 c = &v1alpha1.GCPChaos{} 294 } 295 296 if err := runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, c); err != nil { 297 return err 298 } 299 300 data, e := runtime.DefaultUnstructuredConverter.ToUnstructured(c) 301 if e != nil { 302 return e 303 } 304 obj.SetUnstructuredContent(data) 305 return nil 306 } 307 308 func (o *NodeChaoOptions) CreateSecret(testEnv bool) error { 309 if testEnv { 310 return nil 311 } 312 313 if o.DryRun != "none" { 314 return nil 315 } 316 317 config, err := o.Factory.ToRESTConfig() 318 if err != nil { 319 return err 320 } 321 322 clientSet, err := kubernetes.NewForConfig(config) 323 if err != nil { 324 return err 325 } 326 327 // Check if Secret already exists 328 secretClient := clientSet.CoreV1().Secrets(o.Namespace) 329 _, err = secretClient.Get(context.TODO(), o.SecretName, metav1.GetOptions{}) 330 if err == nil { 331 fmt.Printf("Secret %s exists under %s namespace.\n", o.SecretName, o.Namespace) 332 return nil 333 } else if !k8serrors.IsNotFound(err) { 334 return err 335 } 336 337 if err := o.confirmToContinue(); err != nil { 338 return err 339 } 340 341 switch o.CloudProvider { 342 case "aws": 343 if err := handleAWS(clientSet, o.Namespace, o.SecretName); err != nil { 344 return err 345 } 346 case "gcp": 347 if err := handleGCP(clientSet, o.Namespace, o.SecretName); err != nil { 348 return err 349 } 350 default: 351 return fmt.Errorf("unknown cloud provider:%s", o.CloudProvider) 352 } 353 return nil 354 } 355 356 func (o *NodeChaoOptions) confirmToContinue() error { 357 if !o.AutoApprove { 358 printer.Warning(o.Out, "A secret will be created for the cloud account to access %s, do you want to continue to create this secret: %s ?\n Only 'yes' will be accepted to confirm.\n\n", o.CloudProvider, o.SecretName) 359 entered, _ := prompt.NewPrompt("Enter a value:", nil, o.In).Run() 360 if entered != "yes" { 361 fmt.Fprintf(o.Out, "\nCancel automatic secert creation. You will not be able to access the nodes on the cluster.\n") 362 return cmdutil.ErrExit 363 } 364 } 365 fmt.Fprintf(o.Out, "Continue to create secret: %s\n", o.SecretName) 366 return nil 367 } 368 369 func handleAWS(clientSet *kubernetes.Clientset, namespace, secretName string) error { 370 accessKeyID, secretAccessKey, err := readAWSCredentials() 371 if err != nil { 372 return err 373 } 374 375 secret := &corev1.Secret{ 376 ObjectMeta: metav1.ObjectMeta{ 377 Name: secretName, 378 Namespace: namespace, 379 }, 380 Type: corev1.SecretTypeOpaque, 381 StringData: map[string]string{ 382 "aws_access_key_id": accessKeyID, 383 "aws_secret_access_key": secretAccessKey, 384 }, 385 } 386 387 createdSecret, err := clientSet.CoreV1().Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{}) 388 if err != nil { 389 return err 390 } 391 392 fmt.Printf("Secret %s created successfully\n", createdSecret.Name) 393 return nil 394 } 395 396 func handleGCP(clientSet *kubernetes.Clientset, namespace, secretName string) error { 397 home, err := os.UserHomeDir() 398 if err != nil { 399 return err 400 } 401 402 filePath := filepath.Join(home, ".config", "gcloud", "application_default_credentials.json") 403 data, err := ioutil.ReadFile(filePath) 404 jsonData := string(data) 405 fmt.Println(jsonData) 406 if err != nil { 407 return err 408 } 409 encodedData := base64.StdEncoding.EncodeToString([]byte(jsonData)) 410 411 secret := &corev1.Secret{ 412 ObjectMeta: metav1.ObjectMeta{ 413 Name: secretName, 414 Namespace: namespace, 415 }, 416 Type: corev1.SecretTypeOpaque, 417 StringData: map[string]string{ 418 "service_account": encodedData, 419 }, 420 } 421 422 createdSecret, err := clientSet.CoreV1().Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{}) 423 if err != nil { 424 return err 425 } 426 427 fmt.Printf("Secret %s created successfully\n", createdSecret.Name) 428 return nil 429 } 430 431 func readAWSCredentials() (string, string, error) { 432 home, err := os.UserHomeDir() 433 if err != nil { 434 return "", "", err 435 } 436 filePath := filepath.Join(home, ".aws", "credentials") 437 file, err := os.Open(filePath) 438 if err != nil { 439 return "", "", err 440 } 441 defer func(file *os.File) { 442 err := file.Close() 443 if err != nil { 444 fmt.Printf("unable to close file: %s", err) 445 } 446 }(file) 447 448 // Read file content line by line using bufio.Scanner 449 scanner := bufio.NewScanner(file) 450 accessKeyID := "" 451 secretAccessKey := "" 452 453 for scanner.Scan() { 454 line := scanner.Text() 455 if strings.HasPrefix(line, "aws_access_key_id") { 456 accessKeyID = strings.TrimSpace(strings.SplitN(line, "=", 2)[1]) 457 } else if strings.HasPrefix(line, "aws_secret_access_key") { 458 secretAccessKey = strings.TrimSpace(strings.SplitN(line, "=", 2)[1]) 459 } 460 } 461 462 if scanner.Err() != nil { 463 return "", "", scanner.Err() 464 } 465 466 if accessKeyID == "" || secretAccessKey == "" { 467 return "", "", fmt.Errorf("unable to find valid AWS access key information") 468 } 469 470 return accessKeyID, secretAccessKey, nil 471 }