github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/logexporter/cmd/main.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // TODO(shyamjvs): Make this exporter work for master too, currently facing
    18  // gcloud auth error when run from within a pod on the master.
    19  
    20  package main
    21  
    22  import (
    23  	"flag"
    24  	"fmt"
    25  	"io"
    26  	"io/ioutil"
    27  	"os"
    28  	"os/exec"
    29  	"path/filepath"
    30  	"time"
    31  
    32  	"github.com/golang/glog"
    33  )
    34  
    35  // Initialize the log exporter's configuration related flags.
    36  var (
    37  	nodeName             = flag.String("node-name", "", "Name of the node this log exporter is running on")
    38  	gcsPath              = flag.String("gcs-path", "", "Path to the GCS directory under which to upload logs, for eg: gs://my-logs-bucket/logs")
    39  	cloudProvider        = flag.String("cloud-provider", "", "Cloud provider for this node (gce/gke/aws/kubemark/..)")
    40  	gcloudAuthFilePath   = flag.String("gcloud-auth-file-path", "/etc/service-account/service-account.json", "Path to gcloud service account file, for authenticating gsutil to write to GCS bucket")
    41  	enableHollowNodeLogs = flag.Bool("enable-hollow-node-logs", false, "Enable uploading hollow node logs too. Relevant only for kubemark nodes")
    42  	sleepDuration        = flag.Duration("sleep-duration", 60*time.Second, "Duration to sleep before exiting with success. Useful for making pods schedule with hard anti-affinity when run as a job on a k8s cluster")
    43  )
    44  
    45  var (
    46  	localLogPath = "/var/log"
    47  
    48  	// Node-type specific logfiles.
    49  	// Currently we only handle nodes, and neglect master.
    50  	nodeLogs = []string{"kube-proxy", "node-problem-detector", "fluentd"}
    51  
    52  	// Cloud provider specific logfiles.
    53  	awsLogs      = []string{"cloud-init-output"}
    54  	gceLogs      = []string{"startupscript"}
    55  	kubemarkLogs = []string{"*-hollow-node-*"}
    56  
    57  	// System services/kernel related logfiles.
    58  	kernelLog            = "kern"
    59  	initdLogs            = []string{"docker"}
    60  	supervisordLogs      = []string{"kubelet", "supervisor/supervisord", "supervisor/kubelet-stdout", "supervisor/kubelet-stderr", "supervisor/docker-stdout", "supervisor/docker-stderr"}
    61  	systemdServices      = []string{"kern", "kubelet", "docker"}
    62  	systemdSetupServices = []string{"kube-node-installation", "kube-node-configuration"}
    63  	nodeSystemdServices  = []string{"node-problem-detector"}
    64  )
    65  
    66  // Check if the config provided through the flags take valid values.
    67  func checkConfigValidity() error {
    68  	glog.Info("Verifying if a valid config has been provided through the flags")
    69  	if *nodeName == "" {
    70  		return fmt.Errorf("Flag --node-name has its value unspecified")
    71  	}
    72  	if *gcsPath == "" {
    73  		return fmt.Errorf("Flag --gcs-path has its value unspecified")
    74  	}
    75  	if _, err := os.Stat(*gcloudAuthFilePath); err != nil {
    76  		return fmt.Errorf("Could not find the gcloud service account file: %v", err)
    77  	} else {
    78  		cmd := exec.Command("gcloud", "auth", "activate-service-account", "--key-file="+*gcloudAuthFilePath)
    79  		if err := cmd.Run(); err != nil {
    80  			return fmt.Errorf("Failed to activate gcloud service account: %v", err)
    81  		}
    82  	}
    83  	return nil
    84  }
    85  
    86  // Create logfile for systemd service in outputDir with the given journalctl outputMode.
    87  func createSystemdLogfile(service string, outputMode string, outputDir string) error {
    88  	// Generate the journalctl command.
    89  	journalCmdArgs := []string{fmt.Sprintf("--output=%v", outputMode), "-D", "/var/log/journal"}
    90  	if service == "kern" {
    91  		journalCmdArgs = append(journalCmdArgs, "-k")
    92  	} else {
    93  		journalCmdArgs = append(journalCmdArgs, "-u", fmt.Sprintf("%v.service", service))
    94  	}
    95  	cmd := exec.Command("journalctl", journalCmdArgs...)
    96  
    97  	// Run the command and record the output to a file.
    98  	output, err := cmd.Output()
    99  	if err != nil {
   100  		return fmt.Errorf("Journalctl command for '%v' service failed: %v", service, err)
   101  	}
   102  	logfile := filepath.Join(outputDir, service+".log")
   103  	if err := ioutil.WriteFile(logfile, output, 0444); err != nil {
   104  		return fmt.Errorf("Writing to file of journalctl logs for '%v' service failed: %v", service, err)
   105  	}
   106  	return nil
   107  }
   108  
   109  // Create logfiles for systemd services in outputDir.
   110  func createSystemdLogfiles(outputDir string) {
   111  	services := append(systemdServices, nodeSystemdServices...)
   112  	for _, service := range services {
   113  		if err := createSystemdLogfile(service, "cat", outputDir); err != nil {
   114  			glog.Warningf("Failed to record journalctl logs: %v", err)
   115  		}
   116  	}
   117  	// Service logs specific to VM setup.
   118  	for _, service := range systemdSetupServices {
   119  		if err := createSystemdLogfile(service, "short-precise", outputDir); err != nil {
   120  			glog.Warningf("Failed to record journalctl logs: %v", err)
   121  		}
   122  	}
   123  }
   124  
   125  // Copy logfiles specific to this node based on the cloud-provider, system services, etc
   126  // to a temporary directory. Also create logfiles for systemd services if journalctl is present.
   127  // We do not expect this function to see an error.
   128  func prepareLogfiles(logDir string) {
   129  	glog.Info("Preparing logfiles relevant to this node")
   130  	logfiles := nodeLogs[:]
   131  
   132  	switch *cloudProvider {
   133  	case "gce", "gke":
   134  		logfiles = append(logfiles, gceLogs...)
   135  	case "aws":
   136  		logfiles = append(logfiles, awsLogs...)
   137  	default:
   138  		glog.Errorf("Unknown cloud provider '%v' provided, skipping any provider specific logs", *cloudProvider)
   139  	}
   140  
   141  	// Grab kubemark logs too, if asked for.
   142  	if *enableHollowNodeLogs {
   143  		logfiles = append(logfiles, kubemarkLogs...)
   144  	}
   145  
   146  	// Select system/service specific logs.
   147  	if _, err := os.Stat("/workspace/etc/systemd/journald.conf"); err == nil {
   148  		glog.Info("Journalctl found on host. Collecting systemd logs")
   149  		createSystemdLogfiles(logDir)
   150  	} else {
   151  		glog.Infof("Journalctl not found on host (%v). Collecting supervisord logs instead", err)
   152  		logfiles = append(logfiles, kernelLog)
   153  		logfiles = append(logfiles, initdLogs...)
   154  		logfiles = append(logfiles, supervisordLogs...)
   155  	}
   156  
   157  	// Copy all the logfiles that exist, to logDir.
   158  	for _, logfile := range logfiles {
   159  		logfileFullPath := filepath.Join(localLogPath, logfile+".log*") // Append .log* to copy rotated logs too.
   160  		cmd := exec.Command("/bin/sh", "-c", fmt.Sprintf("cp %v %v", logfileFullPath, logDir))
   161  		if err := cmd.Run(); err != nil {
   162  			glog.Warningf("Failed to copy any logfiles with pattern '%v': %v", logfileFullPath, err)
   163  		}
   164  	}
   165  }
   166  
   167  func uploadLogfilesToGCS(logDir string) error {
   168  	cmd := exec.Command("/bin/sh", "-c", fmt.Sprintf("ls %v/*", logDir))
   169  	if output, err := cmd.Output(); err != nil {
   170  		return fmt.Errorf("Could not list any logfiles: %v", err)
   171  	} else {
   172  		glog.Infof("List of logfiles available: %v", string(output))
   173  	}
   174  
   175  	gcsLogPath := *gcsPath + "/" + *nodeName
   176  	glog.Infof("Uploading logfiles to GCS at path '%v'", gcsLogPath)
   177  	var err error
   178  	for uploadAttempt := 0; uploadAttempt < 3; uploadAttempt++ {
   179  		// Upload the files with compression (-z) and parallelism (-m) for speeding
   180  		// up, and set their ACL to make them publicly readable.
   181  		cmd := exec.Command("gsutil", "-m", "-q", "cp", "-a", "public-read", "-c",
   182  			"-z", "log,txt,xml", logDir+"/*", gcsLogPath)
   183  		if err = cmd.Run(); err != nil {
   184  			glog.Errorf("Attempt %v to upload to GCS failed: %v", uploadAttempt, err)
   185  			continue
   186  		}
   187  		return writeSuccessMarkerFile()
   188  	}
   189  	return fmt.Errorf("Multiple attempts of gsutil failed, the final one due to: %v", err)
   190  }
   191  
   192  // Write a marker file to GCS named after this node to indicate logexporter's success.
   193  // The directory to which we write this file can then be used as a registry to quickly
   194  // fetch the list of nodes on which logexporter succeeded.
   195  func writeSuccessMarkerFile() error {
   196  	markerFilePath := *gcsPath + "/logexported-nodes-registry/" + *nodeName + ".txt"
   197  	cmd := exec.Command("gsutil", "-q", "cp", "-a", "public-read", "-", markerFilePath)
   198  	stdin, err := cmd.StdinPipe()
   199  	if err != nil {
   200  		return fmt.Errorf("Failed to get stdin pipe to write marker file: %v", err)
   201  	}
   202  	io.WriteString(stdin, "")
   203  	stdin.Close()
   204  	if err = cmd.Run(); err != nil {
   205  		return fmt.Errorf("Failed to write marker file to GCS: %v", err)
   206  	}
   207  	return nil
   208  }
   209  
   210  func main() {
   211  	flag.Parse()
   212  	if err := checkConfigValidity(); err != nil {
   213  		glog.Fatalf("Bad config provided: %v", err)
   214  	}
   215  
   216  	localTmpLogPath, err := ioutil.TempDir("/tmp", "k8s-systemd-logs")
   217  	if err != nil {
   218  		glog.Fatalf("Could not create temporary dir locally for copying logs: %v", err)
   219  	}
   220  	defer os.RemoveAll(localTmpLogPath)
   221  
   222  	prepareLogfiles(localTmpLogPath)
   223  	if err := uploadLogfilesToGCS(localTmpLogPath); err != nil {
   224  		glog.Fatalf("Could not upload logs to GCS: %v", err)
   225  	}
   226  	glog.Info("Logs successfully uploaded")
   227  
   228  	glog.Infof("Entering sleep for a duration of %v seconds", *sleepDuration)
   229  	time.Sleep(*sleepDuration)
   230  }