github.com/openshift/installer@v1.4.17/pkg/gather/service/analyze.go (about) 1 package service 2 3 import ( 4 "archive/tar" 5 "compress/gzip" 6 "encoding/json" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "regexp" 12 "strings" 13 14 "github.com/sirupsen/logrus" 15 ) 16 17 // regex matching the path of a service entries file. The captured group is the name of the service. 18 // For example, if the filename is "log-bundle-20210329190553/bootstrap/services/release-image.json", 19 // then the name of the service is "release-image". 20 // In case the log-bundle is from bootstrap-in-place installation the file name is: 21 // "log-bundle-20210329190553/log-bundle-bootstrap/bootstrap/services/release-image.json" 22 var serviceEntriesFilePathRegex = regexp.MustCompile(`^[^\/]+(?:\/log-bundle-bootstrap)?\/bootstrap\/services\/([^.]+)\.json$`) 23 24 // AnalyzeGatherBundle will analyze the bootstrap gather bundle at the specified path. 25 // Analysis will be logged. 26 // Returns an error if there was a problem reading the bundle. 27 func AnalyzeGatherBundle(bundlePath string) error { 28 // open the bundle file for reading 29 bundleFile, err := os.Open(bundlePath) 30 if err != nil { 31 return fmt.Errorf("could not open the gather bundle: %w", err) 32 } 33 defer bundleFile.Close() 34 return analyzeGatherBundle(bundleFile) 35 } 36 37 func analyzeGatherBundle(bundleFile io.Reader) error { 38 // decompress the bundle 39 uncompressedStream, err := gzip.NewReader(bundleFile) 40 if err != nil { 41 return fmt.Errorf("could not decompress the gather bundle: %w", err) 42 } 43 defer uncompressedStream.Close() 44 45 // read through the tar for relevant files 46 tarReader := tar.NewReader(uncompressedStream) 47 serviceAnalyses := make(map[string]analysis) 48 servicesFound := make([]string, 0) 49 for { 50 header, err := tarReader.Next() 51 if err == io.EOF { 52 break 53 } 54 if err != nil { 55 return fmt.Errorf("encountered an error reading from the gather bundle: %w", err) 56 } 57 if header.Typeflag != tar.TypeReg { 58 continue 59 } 60 61 serviceEntriesFileSubmatch := serviceEntriesFilePathRegex.FindStringSubmatch(header.Name) 62 if serviceEntriesFileSubmatch == nil { 63 continue 64 } 65 serviceName := serviceEntriesFileSubmatch[1] 66 servicesFound = append(servicesFound, serviceName) 67 68 serviceAnalysis, err := analyzeService(tarReader) 69 if err != nil { 70 logrus.Infof("Could not analyze the %s.service: %v", serviceName, err) 71 continue 72 } 73 74 serviceAnalyses[serviceName] = serviceAnalysis 75 } 76 77 if len(servicesFound) == 0 { 78 logrus.Error("Invalid log bundle or the bootstrap machine could not be reached and bootstrap logs were not collected") 79 return nil 80 } 81 82 analysisChecks := []struct { 83 name string 84 check func(analysis) bool 85 optional bool 86 }{ 87 {name: "release-image", check: checkReleaseImageDownload, optional: false}, 88 {name: "bootkube", check: checkBootkubeService, optional: false}, 89 } 90 for _, check := range analysisChecks { 91 a := serviceAnalyses[check.name] 92 if a.starts == 0 { 93 if check.optional { 94 logrus.Infof("The bootstrap machine did not execute the %s.service systemd unit", check.name) 95 break 96 } 97 logrus.Errorf("The bootstrap machine did not execute the %s.service systemd unit", check.name) 98 break 99 } 100 if !check.check(a) { 101 break 102 } 103 } 104 105 return nil 106 } 107 108 func checkReleaseImageDownload(a analysis) bool { 109 if a.successful { 110 return true 111 } 112 logrus.Error("The bootstrap machine failed to download the release image") 113 a.logLastError() 114 return false 115 } 116 117 // bootstrap-verify-api-servel-urls.sh is currently running as part of the bootkube service. 118 // And the verification of the API and API-Int URLs are the only stage where a failure is 119 // currently reported. So, here we are able to conclude that a failure corresponds to a 120 // failure to resolve either the API URL or API-Int URL or both. If that changes and if 121 // any other stage in the bootkube service starts reporting a failure, we need to revisit 122 // this. At that point verification of the URLs could be moved to its own service. 123 func checkBootkubeService(a analysis) bool { 124 if a.successful { 125 return true 126 } 127 // Note: Even when there is a stage failure, we are not returning false here. That is 128 // intentional because we donot want to report this as an error in the "analyze" output. 129 a.logLastError() 130 return true 131 } 132 133 type analysis struct { 134 // starts is the number of times that the service started 135 starts int 136 // successful is true if the last invocation of the service ended in success 137 successful bool 138 // failingStage is the stage that failed in the last unsuccessful invocation of the service 139 failingStage string 140 // lastError is the last error recorded in the last failure of the service 141 lastError string 142 } 143 144 func analyzeService(r io.Reader) (analysis, error) { 145 a := analysis{} 146 decoder := json.NewDecoder(r) 147 t, err := decoder.Token() 148 if err != nil { 149 return a, fmt.Errorf("service entries file does not begin with a token: %w", err) 150 } 151 delim, isDelim := t.(json.Delim) 152 if !isDelim { 153 return a, errors.New("service entries file does not begin with a delimiter") 154 } 155 if delim != '[' { 156 return a, errors.New("service entries file does not begin with an array") 157 } 158 var lastEntry *Entry 159 for decoder.More() { 160 entry := &Entry{} 161 if err := decoder.Decode(entry); err != nil { 162 return a, fmt.Errorf("could not decode an entry in the service entries file: %w", err) 163 } 164 165 // record a new start of the service 166 if entry.Phase == ServiceStart { 167 a.starts++ 168 } 169 170 // the service is only considered successful if the last entry is either the service ending successfully or a 171 // post-command ending successfully. 172 a.successful = entry.Result == Success && (entry.Phase == ServiceEnd || entry.Phase == PostCommandEnd) 173 174 // save the last error 175 if entry.Result == Failure { 176 // if a stage failure causes a service (or pre- or post-command) failure, we want to preserve the failing 177 // stage from the stage end entry. 178 if lastEntry == nil || lastEntry.Phase != StageEnd || lastEntry.Result != Failure { 179 a.failingStage = entry.Stage 180 } 181 a.lastError = entry.ErrorMessage 182 } 183 lastEntry = entry 184 } 185 return a, nil 186 } 187 188 func (a analysis) logLastError() { 189 for _, l := range strings.Split(a.lastError, "\n") { 190 logrus.Info(l) 191 } 192 }