github.com/openshift/installer@v1.4.17/pkg/gather/service/analyze.go (about)

     1  package service
     2  
     3  import (
     4  	"archive/tar"
     5  	"compress/gzip"
     6  	"encoding/json"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"regexp"
    12  	"strings"
    13  
    14  	"github.com/sirupsen/logrus"
    15  )
    16  
    17  // regex matching the path of a service entries file. The captured group is the name of the service.
    18  // For example, if the filename is "log-bundle-20210329190553/bootstrap/services/release-image.json",
    19  // then the name of the service is "release-image".
    20  // In case the log-bundle is from bootstrap-in-place installation the file name is:
    21  // "log-bundle-20210329190553/log-bundle-bootstrap/bootstrap/services/release-image.json"
    22  var serviceEntriesFilePathRegex = regexp.MustCompile(`^[^\/]+(?:\/log-bundle-bootstrap)?\/bootstrap\/services\/([^.]+)\.json$`)
    23  
    24  // AnalyzeGatherBundle will analyze the bootstrap gather bundle at the specified path.
    25  // Analysis will be logged.
    26  // Returns an error if there was a problem reading the bundle.
    27  func AnalyzeGatherBundle(bundlePath string) error {
    28  	// open the bundle file for reading
    29  	bundleFile, err := os.Open(bundlePath)
    30  	if err != nil {
    31  		return fmt.Errorf("could not open the gather bundle: %w", err)
    32  	}
    33  	defer bundleFile.Close()
    34  	return analyzeGatherBundle(bundleFile)
    35  }
    36  
    37  func analyzeGatherBundle(bundleFile io.Reader) error {
    38  	// decompress the bundle
    39  	uncompressedStream, err := gzip.NewReader(bundleFile)
    40  	if err != nil {
    41  		return fmt.Errorf("could not decompress the gather bundle: %w", err)
    42  	}
    43  	defer uncompressedStream.Close()
    44  
    45  	// read through the tar for relevant files
    46  	tarReader := tar.NewReader(uncompressedStream)
    47  	serviceAnalyses := make(map[string]analysis)
    48  	servicesFound := make([]string, 0)
    49  	for {
    50  		header, err := tarReader.Next()
    51  		if err == io.EOF {
    52  			break
    53  		}
    54  		if err != nil {
    55  			return fmt.Errorf("encountered an error reading from the gather bundle: %w", err)
    56  		}
    57  		if header.Typeflag != tar.TypeReg {
    58  			continue
    59  		}
    60  
    61  		serviceEntriesFileSubmatch := serviceEntriesFilePathRegex.FindStringSubmatch(header.Name)
    62  		if serviceEntriesFileSubmatch == nil {
    63  			continue
    64  		}
    65  		serviceName := serviceEntriesFileSubmatch[1]
    66  		servicesFound = append(servicesFound, serviceName)
    67  
    68  		serviceAnalysis, err := analyzeService(tarReader)
    69  		if err != nil {
    70  			logrus.Infof("Could not analyze the %s.service: %v", serviceName, err)
    71  			continue
    72  		}
    73  
    74  		serviceAnalyses[serviceName] = serviceAnalysis
    75  	}
    76  
    77  	if len(servicesFound) == 0 {
    78  		logrus.Error("Invalid log bundle or the bootstrap machine could not be reached and bootstrap logs were not collected")
    79  		return nil
    80  	}
    81  
    82  	analysisChecks := []struct {
    83  		name     string
    84  		check    func(analysis) bool
    85  		optional bool
    86  	}{
    87  		{name: "release-image", check: checkReleaseImageDownload, optional: false},
    88  		{name: "bootkube", check: checkBootkubeService, optional: false},
    89  	}
    90  	for _, check := range analysisChecks {
    91  		a := serviceAnalyses[check.name]
    92  		if a.starts == 0 {
    93  			if check.optional {
    94  				logrus.Infof("The bootstrap machine did not execute the %s.service systemd unit", check.name)
    95  				break
    96  			}
    97  			logrus.Errorf("The bootstrap machine did not execute the %s.service systemd unit", check.name)
    98  			break
    99  		}
   100  		if !check.check(a) {
   101  			break
   102  		}
   103  	}
   104  
   105  	return nil
   106  }
   107  
   108  func checkReleaseImageDownload(a analysis) bool {
   109  	if a.successful {
   110  		return true
   111  	}
   112  	logrus.Error("The bootstrap machine failed to download the release image")
   113  	a.logLastError()
   114  	return false
   115  }
   116  
   117  // bootstrap-verify-api-servel-urls.sh is currently running as part of the bootkube service.
   118  // And the verification of the API and API-Int URLs are the only stage where a failure is
   119  // currently reported. So, here we are able to conclude that a failure corresponds to a
   120  // failure to resolve either the API URL or API-Int URL or both. If that changes and if
   121  // any other stage in the bootkube service starts reporting a failure, we need to revisit
   122  // this. At that point verification of the URLs could be moved to its own service.
   123  func checkBootkubeService(a analysis) bool {
   124  	if a.successful {
   125  		return true
   126  	}
   127  	// Note: Even when there is a stage failure, we are not returning false here. That is
   128  	// intentional because we donot want to report this as an error in the "analyze" output.
   129  	a.logLastError()
   130  	return true
   131  }
   132  
   133  type analysis struct {
   134  	// starts is the number of times that the service started
   135  	starts int
   136  	// successful is true if the last invocation of the service ended in success
   137  	successful bool
   138  	// failingStage is the stage that failed in the last unsuccessful invocation of the service
   139  	failingStage string
   140  	// lastError is the last error recorded in the last failure of the service
   141  	lastError string
   142  }
   143  
   144  func analyzeService(r io.Reader) (analysis, error) {
   145  	a := analysis{}
   146  	decoder := json.NewDecoder(r)
   147  	t, err := decoder.Token()
   148  	if err != nil {
   149  		return a, fmt.Errorf("service entries file does not begin with a token: %w", err)
   150  	}
   151  	delim, isDelim := t.(json.Delim)
   152  	if !isDelim {
   153  		return a, errors.New("service entries file does not begin with a delimiter")
   154  	}
   155  	if delim != '[' {
   156  		return a, errors.New("service entries file does not begin with an array")
   157  	}
   158  	var lastEntry *Entry
   159  	for decoder.More() {
   160  		entry := &Entry{}
   161  		if err := decoder.Decode(entry); err != nil {
   162  			return a, fmt.Errorf("could not decode an entry in the service entries file: %w", err)
   163  		}
   164  
   165  		// record a new start of the service
   166  		if entry.Phase == ServiceStart {
   167  			a.starts++
   168  		}
   169  
   170  		// the service is only considered successful if the last entry is either the service ending successfully or a
   171  		// post-command ending successfully.
   172  		a.successful = entry.Result == Success && (entry.Phase == ServiceEnd || entry.Phase == PostCommandEnd)
   173  
   174  		// save the last error
   175  		if entry.Result == Failure {
   176  			// if a stage failure causes a service (or pre- or post-command) failure, we want to preserve the failing
   177  			// stage from the stage end entry.
   178  			if lastEntry == nil || lastEntry.Phase != StageEnd || lastEntry.Result != Failure {
   179  				a.failingStage = entry.Stage
   180  			}
   181  			a.lastError = entry.ErrorMessage
   182  		}
   183  		lastEntry = entry
   184  	}
   185  	return a, nil
   186  }
   187  
   188  func (a analysis) logLastError() {
   189  	for _, l := range strings.Split(a.lastError, "\n") {
   190  		logrus.Info(l)
   191  	}
   192  }