github.com/openshift/installer@v1.4.17/cmd/openshift-install/gather.go (about) 1 package main 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io/fs" 8 "net" 9 "os" 10 "path/filepath" 11 "strconv" 12 "strings" 13 "syscall" 14 "time" 15 16 "github.com/sirupsen/logrus" 17 "github.com/spf13/cobra" 18 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 "k8s.io/client-go/rest" 20 21 configv1 "github.com/openshift/api/config/v1" 22 configclient "github.com/openshift/client-go/config/clientset/versioned" 23 "github.com/openshift/installer/cmd/openshift-install/command" 24 "github.com/openshift/installer/pkg/asset/installconfig" 25 assetstore "github.com/openshift/installer/pkg/asset/store" 26 "github.com/openshift/installer/pkg/asset/tls" 27 "github.com/openshift/installer/pkg/clusterapi" 28 serialgather "github.com/openshift/installer/pkg/gather" 29 "github.com/openshift/installer/pkg/gather/service" 30 "github.com/openshift/installer/pkg/gather/ssh" 31 "github.com/openshift/installer/pkg/infrastructure" 32 infra "github.com/openshift/installer/pkg/infrastructure/platform" 33 34 _ "github.com/openshift/installer/pkg/gather/aws" 35 _ "github.com/openshift/installer/pkg/gather/azure" 36 _ "github.com/openshift/installer/pkg/gather/gcp" 37 ) 38 39 func newGatherCmd(ctx context.Context) *cobra.Command { 40 cmd := &cobra.Command{ 41 Use: "gather", 42 Short: "Gather debugging data for a given installation failure", 43 Long: `Gather debugging data for a given installation failure. 44 45 When installation for OpenShift cluster fails, gathering all the data useful for debugging can 46 become a difficult task. This command helps users to collect the most relevant information that can be used 47 to debug the installation failures`, 48 RunE: func(cmd *cobra.Command, args []string) error { 49 return cmd.Help() 50 }, 51 } 52 cmd.AddCommand(newGatherBootstrapCmd(ctx)) 53 return cmd 54 } 55 56 var gatherBootstrapOpts struct { 57 bootstrap string 58 masters []string 59 sshKeys []string 60 skipAnalysis bool 61 } 62 63 func newGatherBootstrapCmd(ctx context.Context) *cobra.Command { 64 cmd := &cobra.Command{ 65 Use: "bootstrap", 66 Short: "Gather debugging data for a failing-to-bootstrap control plane", 67 Args: cobra.ExactArgs(0), 68 Run: func(_ *cobra.Command, _ []string) { 69 cleanup := command.SetupFileHook(command.RootOpts.Dir) 70 defer cleanup() 71 bundlePath, err := runGatherBootstrapCmd(ctx, command.RootOpts.Dir) 72 if err != nil { 73 logrus.Fatal(err) 74 } 75 76 if !gatherBootstrapOpts.skipAnalysis { 77 if err := service.AnalyzeGatherBundle(bundlePath); err != nil { 78 logrus.Fatal(err) 79 } 80 } 81 82 logrus.Infof("Bootstrap gather logs captured here %q", bundlePath) 83 }, 84 } 85 cmd.PersistentFlags().StringVar(&gatherBootstrapOpts.bootstrap, "bootstrap", "", "Hostname or IP of the bootstrap host") 86 cmd.PersistentFlags().StringArrayVar(&gatherBootstrapOpts.masters, "master", []string{}, "Hostnames or IPs of all control plane hosts") 87 cmd.PersistentFlags().StringArrayVar(&gatherBootstrapOpts.sshKeys, "key", []string{}, "Path to SSH private keys that should be used for authentication. If no key was provided, SSH private keys from user's environment will be used") 88 cmd.PersistentFlags().BoolVar(&gatherBootstrapOpts.skipAnalysis, "skipAnalysis", false, "Skip analysis of the gathered data") 89 return cmd 90 } 91 92 func runGatherBootstrapCmd(ctx context.Context, directory string) (string, error) { 93 assetStore, err := assetstore.NewStore(directory) 94 if err != nil { 95 return "", fmt.Errorf("failed to create asset store: %w", err) 96 } 97 // add the default bootstrap key pair to the sshKeys list 98 bootstrapSSHKeyPair := &tls.BootstrapSSHKeyPair{} 99 if err := assetStore.Fetch(ctx, bootstrapSSHKeyPair); err != nil { 100 return "", fmt.Errorf("failed to fetch %s: %w", bootstrapSSHKeyPair.Name(), err) 101 } 102 tmpfile, err := os.CreateTemp("", "bootstrap-ssh") 103 if err != nil { 104 return "", err 105 } 106 defer os.Remove(tmpfile.Name()) 107 if _, err := tmpfile.Write(bootstrapSSHKeyPair.Private()); err != nil { 108 return "", err 109 } 110 if err := tmpfile.Close(); err != nil { 111 return "", err 112 } 113 gatherBootstrapOpts.sshKeys = append(gatherBootstrapOpts.sshKeys, tmpfile.Name()) 114 115 ha := &infrastructure.HostAddresses{ 116 Bootstrap: gatherBootstrapOpts.bootstrap, 117 Port: 22, 118 Masters: gatherBootstrapOpts.masters, 119 } 120 121 if ha.Bootstrap == "" && len(ha.Masters) == 0 { 122 config := &installconfig.InstallConfig{} 123 if err := assetStore.Fetch(ctx, config); err != nil { 124 return "", fmt.Errorf("failed to fetch %s: %w", config.Name(), err) 125 } 126 127 provider, err := infra.ProviderForPlatform(config.Config.Platform.Name(), config.Config.EnabledFeatureGates()) 128 if err != nil { 129 return "", fmt.Errorf("error getting infrastructure provider: %w", err) 130 } 131 if err = provider.ExtractHostAddresses(directory, config.Config, ha); err != nil { 132 logrus.Warnf("Failed to extract host addresses: %s", err.Error()) 133 } 134 } 135 136 if ha.Bootstrap == "" { 137 return "", errors.New("must provide bootstrap host address") 138 } 139 140 return gatherBootstrap(ha.Bootstrap, ha.Port, ha.Masters, directory) 141 } 142 143 func gatherBootstrap(bootstrap string, port int, masters []string, directory string) (string, error) { 144 gatherID := time.Now().Format("20060102150405") 145 archives := map[string]string{} 146 147 if capiManifestsBundlePath, err := gatherCAPIArtifacts(directory, gatherID); err != nil { 148 // Do not fail the whole gather if we can't find capi manifests (we can be running terraform) 149 logrus.Infof("Failed to gather Cluster API manifests: %s", err.Error()) 150 } else { 151 archives[capiManifestsBundlePath] = "clusterapi" 152 } 153 154 serialLogBundle := filepath.Join(directory, fmt.Sprintf("serial-log-bundle-%s.tar.gz", gatherID)) 155 serialLogBundlePath, err := filepath.Abs(serialLogBundle) 156 if err != nil { 157 return "", fmt.Errorf("failed to stat log file: %w", err) 158 } 159 160 consoleGather, err := serialgather.New(logrus.StandardLogger(), serialLogBundlePath, bootstrap, masters, directory) 161 if err != nil { 162 logrus.Infof("Skipping VM console logs gather: %s", err.Error()) 163 } else { 164 logrus.Info("Pulling VM console logs") 165 if err := consoleGather.Run(); err != nil { 166 logrus.Infof("Failed to gather VM console logs: %s", err.Error()) 167 } else { 168 archives[serialLogBundlePath] = "serial" 169 } 170 } 171 172 clusterLogBundlePath, err := pullLogsFromBootstrap(gatherID, bootstrap, port, masters, directory) 173 if err != nil { 174 logrus.Infof("Failed to gather bootstrap logs: %s", err.Error()) 175 } else { 176 archives[clusterLogBundlePath] = "" 177 } 178 179 if len(archives) == 0 { 180 return "", fmt.Errorf("failed to gather VM console and bootstrap logs") 181 } 182 183 logBundlePath := filepath.Join(directory, fmt.Sprintf("log-bundle-%s.tar.gz", gatherID)) 184 err = serialgather.CombineArchives(logBundlePath, archives) 185 if err != nil { 186 return "", fmt.Errorf("failed to combine archives: %w", err) 187 } 188 189 return logBundlePath, nil 190 } 191 192 func pullLogsFromBootstrap(gatherID string, bootstrap string, port int, masters []string, directory string) (string, error) { 193 logrus.Info("Pulling debug logs from the bootstrap machine") 194 client, err := ssh.NewClient("core", net.JoinHostPort(bootstrap, strconv.Itoa(port)), gatherBootstrapOpts.sshKeys) 195 if err != nil { 196 if errors.Is(err, syscall.ECONNREFUSED) || errors.Is(err, syscall.ETIMEDOUT) { 197 return "", fmt.Errorf("failed to connect to the bootstrap machine: %w", err) 198 } 199 return "", fmt.Errorf("failed to create SSH client: %w", err) 200 } 201 202 if err := ssh.Run(client, fmt.Sprintf("/usr/local/bin/installer-gather.sh --id %s %s", gatherID, strings.Join(masters, " "))); err != nil { 203 return "", fmt.Errorf("failed to run remote command: %w", err) 204 } 205 206 file := filepath.Join(directory, fmt.Sprintf("cluster-log-bundle-%s.tar.gz", gatherID)) 207 if err := ssh.PullFileTo(client, fmt.Sprintf("/home/core/log-bundle-%s.tar.gz", gatherID), file); err != nil { 208 return "", fmt.Errorf("failed to pull log file from remote: %w", err) 209 } 210 211 clusterLogBundlePath, err := filepath.Abs(file) 212 if err != nil { 213 return "", fmt.Errorf("failed to stat log file: %w", err) 214 } 215 216 return clusterLogBundlePath, nil 217 } 218 219 func logClusterOperatorConditions(ctx context.Context, config *rest.Config) error { 220 client, err := configclient.NewForConfig(config) 221 if err != nil { 222 return fmt.Errorf("creating a config client: %w", err) 223 } 224 225 operators, err := client.ConfigV1().ClusterOperators().List(ctx, metav1.ListOptions{}) 226 if err != nil { 227 return fmt.Errorf("listing ClusterOperator objects: %w", err) 228 } 229 230 for _, operator := range operators.Items { 231 for _, condition := range operator.Status.Conditions { 232 if condition.Type == configv1.OperatorUpgradeable { 233 continue 234 } else if condition.Type == configv1.OperatorAvailable && condition.Status == configv1.ConditionTrue { 235 continue 236 } else if (condition.Type == configv1.OperatorDegraded || condition.Type == configv1.OperatorProgressing) && condition.Status == configv1.ConditionFalse { 237 continue 238 } 239 if condition.Type == configv1.OperatorAvailable || condition.Type == configv1.OperatorDegraded { 240 logrus.Errorf("Cluster operator %s %s is %s with %s: %s", operator.ObjectMeta.Name, condition.Type, condition.Status, condition.Reason, condition.Message) 241 } else { 242 logrus.Infof("Cluster operator %s %s is %s with %s: %s", operator.ObjectMeta.Name, condition.Type, condition.Status, condition.Reason, condition.Message) 243 } 244 } 245 } 246 247 return nil 248 } 249 250 func gatherCAPIArtifacts(directory, gatherID string) (string, error) { 251 logrus.Infoln("Pulling Cluster API artifacts") 252 dir, err := filepath.Abs(directory) 253 if err != nil { 254 return "", fmt.Errorf("failed to get absolute path for %s: %w", directory, err) 255 } 256 257 capiDir := filepath.Join(dir, clusterapi.ArtifactsDir) 258 if _, err := os.Stat(capiDir); err != nil { 259 if errors.Is(err, fs.ErrNotExist) { 260 return "", fmt.Errorf("either Cluster API manifests not generated or terraform provision") 261 } 262 return "", fmt.Errorf("failed to stat Cluster API output directory: %w", err) 263 } 264 265 bundleDir := filepath.Join(dir, fmt.Sprintf("capi-artifacts-bundle-%s", gatherID)) 266 // Symlink the hidden directory so the artifacts are not hidden in the archive 267 if err := os.Symlink(capiDir, bundleDir); err != nil { 268 return "", fmt.Errorf("failed to copy Cluster API artifacts: %w", err) 269 } 270 defer os.Remove(bundleDir) 271 272 var capiArtifacts []string 273 manifests, err := filepath.Glob(filepath.Join(bundleDir, "*.yaml")) 274 if err != nil { 275 return "", fmt.Errorf("failed to gather Cluster API manifests: %w", err) 276 } 277 capiArtifacts = append(capiArtifacts, manifests...) 278 279 logs, err := filepath.Glob(filepath.Join(bundleDir, "*.log")) 280 if err != nil { 281 return "", fmt.Errorf("failed to gather Cluster API control plane logs: %w", err) 282 } 283 capiArtifacts = append(capiArtifacts, logs...) 284 285 capiArtifactsBundlePath := fmt.Sprintf("%s.tar.gz", bundleDir) 286 if err := serialgather.CreateArchive(capiArtifacts, capiArtifactsBundlePath); err != nil { 287 return "", fmt.Errorf("failed to create clusterapi bundle file: %w", err) 288 } 289 return capiArtifactsBundlePath, nil 290 }